ALLCools
ALLCools copied to clipboard
KeyError: 'dmr_motif_da' in scan_motifs
Hello, I have recently started using allcools to analyze sample datasets from Scalebio. I am using a dataset from human PBMCs with 4426 cells. I am currently stuck on the motifscan and motif enrichment step. I am using a conda enviroment created using the provided "allcools_env.yaml" file. I had to add "np.object = object" because the method is deprecated in the current version of numpy.
The code I used:
### Motif scanning Allcools ###
import numpy as np
np.object = object
np.unicode = str
import pandas as pd
import xarray as xr
import seaborn as sns
import matplotlib.pyplot as plt
from scipy.stats import fisher_exact
from statsmodels.stats.multitest import multipletests
from ALLCools.mcds import RegionDS
#Making regionds file from mcds file (location of 5kb)
region_ds = mcds.to_region_ds(region_dim='5kb')
region_ds.attrs["region_ds_location"] = "~/PBMC_by_lot"
#region_ds.to_zarr("~/PBMC_by_lot_s1.zarr", mode="w")
#Scanning motifs in hg38 #
region_ds.scan_motifs(genome_fasta='~/hg38.fa',
cpu=80,
standardize_length=500,
motif_set_path=None,
chrom_size_path="~/allcools/hg38chrom.sizes",
combine_cluster=True,
chunk_size=10000,
motif_dim='motif')
The output is:
~/miniconda3/envs/allcools/lib/python3.8/site-packages/ALLCools/motif/parse_meme.py:128: FutureWarning: The squeeze argument has been deprecated and will be removed in a future version. Append .squeeze("columns") to the call to squeeze.
motif_set.thresholds = pd.read_csv(
Scan 2179 motif in 642098 sequences.
Job 0 returned
Job 1 returned
Job 2 returned
Job 3 returned
Job 4 returned
Job 5 returned
Job 6 returned
Job 7 returned
Job 8 returned
Job 9 returned
Job 10 returned
Job 11 returned
Job 12 returned
Job 13 returned
Job 14 returned
Job 15 returned
Job 16 returned
Job 17 returned
Job 18 returned
Job 19 returned
Job 20 returned
Job 21 returned
Job 22 returned
Job 23 returned
Job 24 returned
Job 25 returned
Job 26 returned
Job 27 returned
Job 28 returned
Job 29 returned
Job 30 returned
Job 31 returned
Job 32 returned
Job 33 returned
Job 34 returned
Job 35 returned
Job 36 returned
Job 37 returned
Job 38 returned
Job 39 returned
Job 40 returned
Job 41 returned
Job 42 returned
Job 43 returned
Job 44 returned
Job 45 returned
Job 46 returned
Job 47 returned
Job 48 returned
Job 49 returned
Job 50 returned
Job 51 returned
Job 52 returned
Job 53 returned
Job 54 returned
Job 55 returned
Job 56 returned
Job 57 returned
Job 58 returned
Job 59 returned
Job 60 returned
Job 61 returned
Job 62 returned
Job 63 returned
Job 64 returned
---------------------------------------------------------------------------
KeyError Traceback (most recent call last)
File [~/miniconda3/envs/allcools/lib/python3.8/site-packages/xarray/core/dataset.py:1348](http://localhost:8891/lab/tree/~/miniconda3/envs/allcools/lib/python3.8/site-packages/xarray/core/dataset.py#line=1347), in Dataset._construct_dataarray(self, name)
1347 try:
-> 1348 variable = self._variables[name]
1349 except KeyError:
KeyError: 'dmr_motif_da'
During handling of the above exception, another exception occurred:
KeyError Traceback (most recent call last)
Cell In[7], line 3
1 #Scanning motifs in hg38 #
2 #This might take a long time
----> 3 region_ds.scan_motifs(genome_fasta='/cluster/data/ck_care/Ensembl_regulatory_region_annotations/hg38.fa',
4 cpu=80,
5 standardize_length=500,
6 motif_set_path=None,
7 chrom_size_path="/cluster/data/ck_care/Methylation/allcools/hg38chrom.sizes",
8 combine_cluster=True,
9 chunk_size=10000,
10 motif_dim='motif')
File [~/miniconda3/envs/allcools/lib/python3.8/site-packages/ALLCools/mcds/region_ds.py:781](http://localhost:8891/lab/tree/~/miniconda3/envs/allcools/lib/python3.8/site-packages/ALLCools/mcds/region_ds.py#line=780), in RegionDS.scan_motifs(self, genome_fasta, cpu, standardize_length, motif_set_path, chrom_size_path, combine_cluster, fnr_fpr_fold, chunk_size, motif_dim, snakemake)
769 self._scan_motifs_snakemake(
770 fasta_path,
771 output_dir=region_ds_path,
(...)
777 chunk_size=chunk_size,
778 )
779 else:
780 # directly scan motif under current process
--> 781 self._scan_motif_local(
782 fasta_path=fasta_path,
783 cpu=cpu,
784 motif_set_path=motif_set_path,
785 combine_cluster=combine_cluster,
786 fnr_fpr_fold=fnr_fpr_fold,
787 chunk_size=chunk_size,
788 motif_dim=motif_dim,
789 )
File [~/miniconda3/envs/allcools/lib/python3.8/site-packages/ALLCools/mcds/region_ds.py:817](http://localhost:8891/lab/tree/~/miniconda3/envs/allcools/lib/python3.8/site-packages/ALLCools/mcds/region_ds.py#line=816), in RegionDS._scan_motif_local(self, fasta_path, cpu, motif_set_path, combine_cluster, fnr_fpr_fold, chunk_size, motif_dim)
814 region_dim = self.region_dim
815 region_ds_path = self.location
--> 817 motif_ds = motif_set.scan_motifs(
818 fasta_path=fasta_path,
819 output_dir=region_ds_path,
820 cpu=cpu,
821 region_dim=region_dim,
822 combine_cluster=combine_cluster,
823 motif_dim=motif_dim,
824 chunk_size=chunk_size,
825 )
826 new_dataset_dim = {f"{region_dim}_{motif_dim}": region_dim}
827 if combine_cluster:
File [~/miniconda3/envs/allcools/lib/python3.8/site-packages/ALLCools/motif/motifs.py:225](http://localhost:8891/lab/tree/~/miniconda3/envs/allcools/lib/python3.8/site-packages/ALLCools/motif/motifs.py#line=224), in MotifSet.scan_motifs(self, fasta_path, output_dir, cpu, region_dim, motif_dim, combine_cluster, dtype, chunk_size)
222 motif_ds = RegionDS.open(f"{output_dir}/{region_dim}_{motif_dim}", region_dim=region_dim)
224 if combine_cluster:
--> 225 self._aggregate_motif_clusters(
226 output_dir=output_dir,
227 motif_dim=motif_dim,
228 region_dim=region_dim,
229 dtype=dtype,
230 chunk_size=chunk_size,
231 )
232 motif_cluster_ds = RegionDS.open(f"{output_dir}/{region_dim}_{motif_dim}-cluster", region_dim=region_dim)
233 motif_ds.update(motif_cluster_ds)
File [~/miniconda3/envs/allcools/lib/python3.8/site-packages/ALLCools/motif/motifs.py:192](http://localhost:8891/lab/tree/~/miniconda3/envs/allcools/lib/python3.8/site-packages/ALLCools/motif/motifs.py#line=191), in MotifSet._aggregate_motif_clusters(self, output_dir, motif_dim, region_dim, dtype, chunk_size)
189 motif_cluster_ds = RegionDS(motif_cluster_ds)
191 cluster_output_dir = f"{output_dir}/{region_dim}_{motif_dim}-cluster"
--> 192 for i, chunk in enumerate(
193 motif_cluster_ds.iter_array(dim=region_dim, chunk_size=chunk_size, da="dmr_motif_da", load=True)
194 ):
195 _ds = xr.Dataset({"dmr_motif-cluster_da": chunk}).load()
196 if i == 0:
File [~/miniconda3/envs/allcools/lib/python3.8/site-packages/ALLCools/mcds/region_ds.py:479](http://localhost:8891/lab/tree/~/miniconda3/envs/allcools/lib/python3.8/site-packages/ALLCools/mcds/region_ds.py#line=478), in RegionDS.iter_array(self, chunk_size, dim, da, load)
477 if da is None:
478 da = f"{dim}_da"
--> 479 _da = self[da]
481 try:
482 assert dim in _da.dims
File [~/miniconda3/envs/allcools/lib/python3.8/site-packages/xarray/core/dataset.py:1439](http://localhost:8891/lab/tree/~/miniconda3/envs/allcools/lib/python3.8/site-packages/xarray/core/dataset.py#line=1438), in Dataset.__getitem__(self, key)
1437 return self.isel(**key)
1438 if utils.hashable(key):
-> 1439 return self._construct_dataarray(key)
1440 if utils.iterable_of_hashable(key):
1441 return self._copy_listed(key)
File [~/miniconda3/envs/allcools/lib/python3.8/site-packages/xarray/core/dataset.py:1350](http://localhost:8891/lab/tree/~/miniconda3/envs/allcools/lib/python3.8/site-packages/xarray/core/dataset.py#line=1349), in Dataset._construct_dataarray(self, name)
1348 variable = self._variables[name]
1349 except KeyError:
-> 1350 _, name, variable = _get_virtual_variable(self._variables, name, self.dims)
1352 needed_dims = set(variable.dims)
1354 coords: dict[Hashable, Variable] = {}
File [~/miniconda3/envs/allcools/lib/python3.8/site-packages/xarray/core/dataset.py:186](http://localhost:8891/lab/tree/~/miniconda3/envs/allcools/lib/python3.8/site-packages/xarray/core/dataset.py#line=185), in _get_virtual_variable(variables, key, dim_sizes)
184 split_key = key.split(".", 1)
185 if len(split_key) != 2:
--> 186 raise KeyError(key)
188 ref_name, var_name = split_key
189 ref_var = variables[ref_name]
KeyError: 'dmr_motif_da'
Is there a workaround or solution that I can try?