pypath Unable to download progeny mouse model

Describe the bug Tried downloading the progeny mouse model but I get a TypeError: Nonetype is not iterable. Human works fine.

To Reproduce

model = dc.get_progeny(organism='mouse', top=100)

model = dc.get_progeny(organism='human', top=100)
model



  | source | target | weight | p_value
-- | -- | -- | -- | --
Androgen | TMPRSS2 | 11.490631 | 0.000000e+00
Androgen | NKX3-1 | 10.622551 | 2.242078e-44
Androgen | MBOAT2 | 10.472733 | 4.624285e-44
Androgen | KLK2 | 10.176186 | 1.944414e-40
Androgen | SARG | 11.386852 | 2.790209e-40
... | ... | ... | ...
p53 | CCDC150 | -3.174527 | 7.396252e-13
p53 | LCE1A | 6.154823 | 8.475458e-13
p53 | TREM2 | 4.101937 | 9.739648e-13
p53 | GDF9 | 3.355741 | 1.087433e-12
p53 | NHLH2 | 2.201638 | 1.651582e-12

Expected behavior Expected to download the mouse marker genes.

System

OS: Centos 7
Python 3.9
decoupler 1.3.1

Additional context

TypeError                                 Traceback (most recent call last)
Cell In[8], line 1
----> 1 model = dc.get_progeny(organism='mouse', top=100)
      2 model

File ~/miniconda3/envs/scvi-env/lib/python3.9/site-packages/decoupler/omnip.py:139, in get_progeny(organism, top)
    135 p.columns = ['source', 'target', 'weight', 'p_value']
    137 if not _is_human(organism):
--> 139     p = translate_net(
    140         p,
    141         columns='target',
    142         source_organism=9606,
    143         target_organism=organism,
    144     )
    146 return p

File ~/miniconda3/envs/scvi-env/lib/python3.9/site-packages/decoupler/omnip.py:453, in translate_net(net, columns, source_organism, target_organism, id_type, unique_by, **kwargs)
    450 hom_net = net.copy()
    452 # Translate
--> 453 hom_net = homology.translate_df(
    454     df=hom_net,
    455     target=_target_organism,
    456     cols=columns,
    457     source=_source_organism,
    458 )
    460 unique_by = common.to_list(unique_by)
    462 if unique_by and all(c in hom_net.columns for c in unique_by):
    463 
    464     # Remove duplicated based on source and target

File ~/miniconda3/envs/scvi-env/lib/python3.9/site-packages/pypath/utils/homology.py:1901, in translate_df(df, target, cols, source, homologene, ensembl, ensembl_hc, ensembl_types, **kwargs)
   1898 args.pop('manager')
   1899 kwargs = args.pop('kwargs')
-> 1901 return manager.translate_df(**args, **kwargs)

File ~/miniconda3/envs/scvi-env/lib/python3.9/site-packages/pypath/utils/homology.py:480, in HomologyManager.translate_df(self, df, target, cols, source, homologene, ensembl, ensembl_hc, ensembl_types, **kwargs)
    477 source = taxonomy.ensure_ncbi_tax_id(source)
    478 target = taxonomy.ensure_ncbi_tax_id(target)
--> 480 table = self.which_table(
    481     target = target,
    482     source = source,
    483 )
    485 param = self._translation_param(locals())
    487 return table.translate_df(
    488     df = df,
    489     cols = cols,
   (...)
    492     **kwargs
    493 )

File ~/miniconda3/envs/scvi-env/lib/python3.9/site-packages/pypath/utils/homology.py:183, in HomologyManager.which_table(self, target, source, only_swissprot)
    179 self.expiry[key] = time.time()
    181 if key not in self.tables:
--> 183     self.load(key)
    185 if key in self.tables:
    187     return self.tables[key]

File ~/miniconda3/envs/scvi-env/lib/python3.9/site-packages/pypath/utils/homology.py:214, in HomologyManager.load(self, key)
    207     self._log(
    208         'Homology table from taxon %u to %u (only SwissProt: %s) '
    209         'has been loaded from `%s`.' % (key + (cachefile,))
    210     )
    212 else:
--> 214     self.tables[key] = self._load(key)
    215     pickle.dump(self.tables[key], open(cachefile, 'wb'))
    216     self._log(
    217         'Homology table from taxon %u to %u (only SwissProt: %s) '
    218         'has been saved to `%s`.' % (key + (cachefile,))
    219     )

File ~/miniconda3/envs/scvi-env/lib/python3.9/site-packages/pypath/utils/homology.py:224, in HomologyManager._load(self, key)
    222 def _load(self, key):
--> 224     return ProteinHomology(
    225         target = key[1],
    226         source = key[0],
    227         only_swissprot = key[2],
    228         homologene = self.homologene,
    229         ensembl = self.ensembl,
    230         ensembl_hc = self.ensembl_hc,
    231         ensembl_types = self.ensembl_types,
    232     )

File ~/miniconda3/envs/scvi-env/lib/python3.9/site-packages/pypath/utils/homology.py:715, in ProteinHomology.__init__(self, target, source, only_swissprot, homologene, ensembl, ensembl_hc, ensembl_types)
    711 self.load_proteome(self.target, self.only_swissprot)
    713 if source is not None:
--> 715     self.load(source)

File ~/miniconda3/envs/scvi-env/lib/python3.9/site-packages/pypath/utils/homology.py:731, in ProteinHomology.load(self, source)
    727 def load(self, source = None):
    729     if self.homologene:
--> 731         self.load_homologene(source)
    733     if self.ensembl:
    735         self.load_ensembl(source)

File ~/miniconda3/envs/scvi-env/lib/python3.9/site-packages/pypath/utils/homology.py:1206, in ProteinHomology.load_homologene(self, source)
   1200         target_r.update(hgr[r])
   1202 for e in target_e:
   1204     target_u.update(
   1205         set(
-> 1206             mapping.map_name(e, 'entrez', 'uniprot', self.target)
   1207         )
   1208     )
   1210 for r in target_r:
   1212     target_u.update(
   1213         set(
   1214             mapping.map_name(e, 'refseqp', 'uniprot', self.target)
   1215         )
   1216     )

File ~/miniconda3/envs/scvi-env/lib/python3.9/site-packages/pypath/utils/mapping.py:3323, in map_name(name, id_type, target_id_type, ncbi_tax_id, strict, expand_complexes, uniprot_cleanup)
   3270 """
   3271 Translates one instance of one ID type to a different one.
   3272 Returns set of the target ID type.
   (...)
   3318         ID, call the `uniprot_cleanup` function at the end.
   3319 """
   3321 mapper = get_mapper()
-> 3323 return mapper.map_name(
   3324     name = name,
   3325     id_type = id_type,
   3326     target_id_type = target_id_type,
   3327     ncbi_tax_id = ncbi_tax_id,
   3328     strict = strict,
   3329     expand_complexes = expand_complexes,
   3330     uniprot_cleanup = uniprot_cleanup,
   3331 )

File ~/miniconda3/envs/scvi-env/lib/python3.9/site-packages/pypath/utils/mapping.py:2000, in Mapper.map_name(self, name, id_type, target_id_type, ncbi_tax_id, strict, expand_complexes, uniprot_cleanup)
   1996 # for UniProt IDs we do a few more steps to
   1997 # try to find out the primary SwissProt ID
   1998 if uniprot_cleanup and target_id_type == 'uniprot':
-> 2000     mapped_names = self.uniprot_cleanup(
   2001         uniprots = mapped_names,
   2002         ncbi_tax_id = ncbi_tax_id,
   2003     )
   2005 return mapped_names

File ~/miniconda3/envs/scvi-env/lib/python3.9/site-packages/pypath/utils/mapping.py:2029, in Mapper.uniprot_cleanup(self, uniprots, ncbi_tax_id)
   2026 uniprots = common.to_set(uniprots)
   2028 # step 1: translate secondary IDs to primary
-> 2029 uniprots = self.primary_uniprot(uniprots)
   2031 # step 2: translate TrEMBL to SwissProt by gene symbols
   2032 if self._trembl_swissprot_by_genesymbol:

File ~/miniconda3/envs/scvi-env/lib/python3.9/site-packages/pypath/utils/mapping.py:2599, in Mapper.primary_uniprot(self, uniprots, ncbi_tax_id)
   2595 ncbi_tax_id = ncbi_tax_id or self.ncbi_tax_id
   2597 for uniprot in uniprots:
-> 2599     primary = self.map_name(
   2600         name = uniprot,
   2601         id_type = 'uniprot-sec',
   2602         target_id_type = 'uniprot-pri',
   2603         ncbi_tax_id = ncbi_tax_id,
   2604     )
   2606     if primary:
   2608         primaries.update(primary)

File ~/miniconda3/envs/scvi-env/lib/python3.9/site-packages/pypath/utils/mapping.py:1785, in Mapper.map_name(self, name, id_type, target_id_type, ncbi_tax_id, strict, expand_complexes, uniprot_cleanup)
   1771     mapped_names = self.chain_map(
   1772         name = name,
   1773         id_type = id_type,
   (...)
   1779         uniprot_cleanup = uniprot_cleanup,
   1780     )
   1782 else:
   1783 
   1784     # all the other ID types
-> 1785     mapped_names = self._map_name(
   1786         name = name,
   1787         id_type = id_type,
   1788         target_id_type = target_id_type,
   1789         ncbi_tax_id = ncbi_tax_id,
   1790     )
   1792 # as ID translation tables for PRO IDs are not organism specific
   1793 # we need an extra step to limit the results to the target organism
   1794 if id_type == 'pro' and target_id_type == 'uniprot':

File ~/miniconda3/envs/scvi-env/lib/python3.9/site-packages/pypath/utils/mapping.py:2317, in Mapper._map_name(self, name, id_type, target_id_type, ncbi_tax_id)
   2310 """
   2311 Once we have defined the name type and the target name type,
   2312 this function looks it up in the most suitable dictionary.
   2313 """
   2315 ncbi_tax_id = ncbi_tax_id or self.ncbi_tax_id
-> 2317 tbl = self.which_table(
   2318     id_type,
   2319     target_id_type,
   2320     ncbi_tax_id = ncbi_tax_id,
   2321 )
   2323 return tbl[name] if tbl else set()

File ~/miniconda3/envs/scvi-env/lib/python3.9/site-packages/pypath/utils/mapping.py:1383, in Mapper.which_table(self, id_type, target_id_type, load, ncbi_tax_id)
   1372 if resource:
   1374     self._log(
   1375         'Chosen built-in defined ID translation table: '
   1376         'resource=%s, id_type_a=%s, id_type_b=%s' % (
   (...)
   1380         )
   1381     )
-> 1383     self.load_mapping(
   1384         resource = resource,
   1385         load_a_to_b = load_a_to_b,
   1386         load_b_to_a = load_b_to_a,
   1387         ncbi_tax_id = ncbi_tax_id,
   1388     )
   1390     tbl = check_loaded()
   1392     break

File ~/miniconda3/envs/scvi-env/lib/python3.9/site-packages/pypath/utils/mapping.py:2973, in Mapper.load_mapping(self, resource, **kwargs)
   2958     return
   2960 self._log(
   2961     'Loading mapping table for organism `%s` '
   2962     'with identifiers `%s` and `%s`, '
   (...)
   2970     )
   2971 )
-> 2973 reader = MapReader(param = resource, **kwargs)
   2975 a_to_b = reader.mapping_table_a_to_b
   2976 b_to_a = reader.mapping_table_b_to_a

File ~/miniconda3/envs/scvi-env/lib/python3.9/site-packages/pypath/utils/mapping.py:235, in MapReader.__init__(self, param, ncbi_tax_id, entity_type, load_a_to_b, load_b_to_a, uniprots, lifetime)
    232 self.b_to_a = None
    233 self.uniprots = uniprots
--> 235 self.load()

File ~/miniconda3/envs/scvi-env/lib/python3.9/site-packages/pypath/utils/mapping.py:265, in MapReader.load(self)
    260     self.read_cache()
    262 if not self.tables_loaded():
    263 
    264     # read from the original source
--> 265     self.read()
    267     if self.tables_loaded():
    268 
    269         # write cache only at successful loading
    270         self.write_cache()

File ~/miniconda3/envs/scvi-env/lib/python3.9/site-packages/pypath/utils/mapping.py:423, in MapReader.read(self)
    419 method = 'read_mapping_%s' % self.source_type
    421 if hasattr(self, method):
--> 423     getattr(self, method)()

File ~/miniconda3/envs/scvi-env/lib/python3.9/site-packages/pypath/utils/mapping.py:534, in MapReader.read_mapping_file(self)
    531 a_to_b = collections.defaultdict(set)
    532 b_to_a = collections.defaultdict(set)
--> 534 for i, line in enumerate(infile):
    536     if self.param.header and i < self.param.header:
    538         continue

File ~/miniconda3/envs/scvi-env/lib/python3.9/site-packages/pypath/inputs/uniprot.py:481, in get_uniprot_sec(organism)
    470 url = urls.urls['uniprot_sec']['url']
    471 c = curl.Curl(url, silent = False, large = True, timeout = 2400)
    473 for line in filter(
    474     lambda line:
    475         len(line) == 2 and (organism is None or line[1] in proteome),
    476         map(
    477             lambda i:
    478                 i[1].split(),
    479             filter(
    480                 lambda i: i[0] >= 30,
--> 481                 enumerate(c.result)
    482             )
    483         )
    484     ):
    486     yield line

TypeError: 'NoneType' object is not iterable

Jan 09 '23 21:01 jpfeil

Yes, I also encounter this error when I run the mouse model, it seems to be the error of converting mouse gene to human gene.

Jan 10 '23 06:01 SZ-qing

Hi @jpfeil and @nierq,

Indeed there seems to be a server side issue with the conversion. @deeenes could you take a look please?

Jan 10 '23 10:01 PauBadiaM

hello, I also encounter the same error。

>>> import pycurl
>>> import decoupler as dc


>>>
>>>
>>> dc.get_progeny(organism = 'Mus musculus',top =100)
Traceback (most recent call last):
  File "<stdin>", line 1, in <module>
  File "/public/home/niuxy/Software/miniconda3/envs/py39/lib/python3.9/site-packages/decoupler/omnip.py", line 139, in get_progeny
    p = translate_net(
  File "/public/home/niuxy/Software/miniconda3/envs/py39/lib/python3.9/site-packages/decoupler/omnip.py", line 432, in translate_net
    _target_organism = taxonomy.ensure_ncbi_tax_id(target_organism)
  File "/public/home/niuxy/Software/miniconda3/envs/py39/lib/python3.9/site-packages/pypath/utils/taxonomy.py", line 367, in ensure_ncbi_tax_id
    taxid_from_common_name(taxon_id) or
  File "/public/home/niuxy/Software/miniconda3/envs/py39/lib/python3.9/site-packages/pypath/utils/taxonomy.py", line 283, in taxid_from_common_name
    common_to_ncbi = get_db('common')
  File "/public/home/niuxy/Software/miniconda3/envs/py39/lib/python3.9/site-packages/pypath/utils/taxonomy.py", line 435, in get_db
    init_db(key)
  File "/public/home/niuxy/Software/miniconda3/envs/py39/lib/python3.9/site-packages/pypath/utils/taxonomy.py", line 450, in init_db
    ncbi_data = uniprot_input.uniprot_ncbi_taxids_2()
  File "/public/home/niuxy/Software/miniconda3/envs/py39/lib/python3.9/site-packages/pypath/inputs/uniprot.py", line 1301, in uniprot_ncbi_taxids_2
    for line in c.result:
TypeError: 'NoneType' object is not iterable

Jan 12 '23 11:01 xiangrong7

I now have an alternative solution to this problem. step 1: Using R packet progeny to obtain model data used by decoupler

library(progeny) model_data <- progeny::model_mouse_full

write.table(model_data ,'./progeny_mouse_geneset.txt')`

step2 In python gene_set = pd.read_csv("./progeny_mouse_geneset.txt",sep='\t') gene_set.rename(columns = {'gene':'target','pathway':'source','p.value':'p_value'},inplace = True) gene_set = gene_set.reindex(columns=['source','target','weight','p_value'])

##select top n gene gene_set = ( gene_set. sort_values('p_value'). groupby('source'). head(200). sort_values(['source', 'p_value']). reset_index() )

dc.run_mlm(mat=raw_data, net=gene_set, source='source', target='target', weight='weight', verbose=True, use_raw=False)

Or you can do progeny analysis directly in R：

library(Seurat) library(anndata) library(progeny) data <- read_h5ad('./all_adata_MC38.integrated_final_rawdata.h5ad') data_seurat <- CreateSeuratObject(counts = t(as.matrix(data$X)), meta.data = data$obs) #Standardized data is required For specific steps, please refer to the introduction of progeny：https://saezlab.github.io/progeny/articles/ProgenySingleCell.html

Jan 16 '23 03:01 SZ-qing

@xiangrong7, yours is not the same error, it happens on a completely different line. The error that you encountered is likely due to a random network failure, your computer failed to download a file. You can get rid of the error by trying again on a good quality network:

from pypath.share import curl
import decoupler as dc

with curl.cache_off():
    dc.get_progeny(organism = 'Mus musculus', top = 100)

Jan 18 '23 13:01 deeenes

Hello @jpfeil,

Apologies about the slow answer. I have to correct my previous comment, actually your issue is very similar to @xiangrong7's, it is a transient networking error. The traceback is long, but the most downstream element that we can identify is the call at pypath/utils/mapping.py:2599: it wants to translate secondary UniProt IDs to primary ones. Then normally a download happens which looks like this in the log:

    116 [2023-01-18 15:12:04] [mapping] Requested to load ID translation table from `uniprot-sec` to `uniprot-pri`, organism: 9606.
    117 [2023-01-18 15:12:04] [mapping] Chosen built-in defined ID translation table: resource=basic, id_type_a=uniprot-sec, id_type_b=uniprot-pri
    118 [2023-01-18 15:12:04] [inputs] Selecting input method (step 1): module `uniprot.get_uniprot_sec`, method `None`.
    119 [2023-01-18 15:12:04] [inputs] Selecting input method (step 2): module `pypath.inputs.uniprot`, method `get_uniprot_sec`.
    120 [2023-01-18 15:12:04] [inputs] Importing module `pypath.inputs.uniprot`.
    121 [2023-01-18 15:12:04] [mapping] Loading mapping table for organism `9606` with identifiers `uniprot-sec` and `uniprot-pri`, input type `file`
    122 [2023-01-18 15:12:04] [mapping] Reader created for ID translation table, parameters: `ncbi_tax_id=9606, id_a=uniprot-sec, id_b=uniprot-pri, load_a_to_b=1, load_b_to_a=0, input_type=file (FileMapping)`.
    123 [2023-01-18 15:12:04] [inputs] Selecting input method (step 1): module `uniprot.get_uniprot_sec`, method `None`.
    124 [2023-01-18 15:12:04] [inputs] Selecting input method (step 2): module `pypath.inputs.uniprot`, method `get_uniprot_sec`.
    125 [2023-01-18 15:12:04] [inputs] Importing module `pypath.inputs.uniprot`.
    126 [2023-01-18 15:12:04] [uniprot_input] Loading list of all UniProt IDs for organism `9606` (only SwissProt: None).
    127 [2023-01-18 15:12:04] [curl] Creating Curl object to retrieve data from `https://legacy.uniprot.org/uniprot/`
    128 [2023-01-18 15:12:04] [curl] GET parameters added to the URL: `query=organism%3A9606&format=tab&columns=id`
    129 [2023-01-18 15:12:04] [curl] Cache file path: `/home/denes/pypath/nocache/cache/590b20597cb4f25fe88d504e4dc430df-`
    130 [2023-01-18 15:12:04] [curl] Setting up and calling pycurl.
    131 [2023-01-18 15:12:25] [curl] Opening file `/home/denes/pypath/nocache/cache/590b20597cb4f25fe88d504e4dc430df-`
    132 [2023-01-18 15:12:25] [curl] Extracting data from file type `plain`
    133 [2023-01-18 15:12:25] [curl] Opening plain text file `/home/denes/pypath/nocache/cache/590b20597cb4f25fe88d504e4dc430df-`.
    134 [2023-01-18 15:12:25] [curl] Contents of `/home/denes/pypath/nocache/cache/590b20597cb4f25fe88d504e4dc430df-` has been read and the file has been closed.
    135 [2023-01-18 15:12:25] [curl] File at `https://legacy.uniprot.org/uniprot/?query=organism%3A9606&format=tab&columns=id` successfully retrieved. Resulted file type `plain text, unicode string`. Local file at
    136                       `/home/denes/pypath/nocache/cache/590b20597cb4f25fe88d504e4dc430df-`.
    137 [2023-01-18 15:12:25] [curl] Creating Curl object to retrieve data from `ftp://ftp.expasy.org/databases/uniprot/current_release/knowledgebase/complete/docs/sec_ac.txt`
    138 [2023-01-18 15:12:25] [curl] Cache file path: `/home/denes/pypath/nocache/cache/7814fe9dc734379a8c28d4b1478d2f85-sec_ac.txt`
    139 [2023-01-18 15:12:25] [curl] Setting up and calling pycurl.
    140 [2023-01-18 15:12:27] [curl] Opening file `/home/denes/pypath/nocache/cache/7814fe9dc734379a8c28d4b1478d2f85-sec_ac.txt`
    141 [2023-01-18 15:12:27] [curl] Extracting data from file type `plain`
    142 [2023-01-18 15:12:27] [curl] Opening plain text file `/home/denes/pypath/nocache/cache/7814fe9dc734379a8c28d4b1478d2f85-sec_ac.txt`.
    143 [2023-01-18 15:12:27] [curl] File at `ftp://ftp.expasy.org/databases/uniprot/current_release/knowledgebase/complete/docs/sec_ac.txt` successfully retrieved. Resulted file type `plain text, file object`. Local file at
    144                       `/home/denes/pypath/nocache/cache/7814fe9dc734379a8c28d4b1478d2f85-sec_ac.txt`.

The solution is similar to the one suggested above: try again this particular download with the cache turned off:

from pypath.utils import mapping
from pypath.share import curl

with curl.cache_off():
    mapping.get_mapper().primary_uniprot(['P00533'])

I hope this helps, please let me know if you're still experiencing any issue.

Best,

Denes

Jan 18 '23 14:01 deeenes

I used the method you mentioned above.I don't know if other people's problems have been solved, but my problems still exist.

<class 'TypeError'>, 'NoneType' object is not iterable, <traceback object at 0x2b65030c4040> Traceback (most recent call last): File "", line 1, in File "/work/home/nierq01/anaconda3/envs/progeny/lib/python3.10/site-packages/decoupler/omnip.py", line 139, in get_progeny p = translate_net( File "/work/home/nierq01/anaconda3/envs/progeny/lib/python3.10/site-packages/decoupler/omnip.py", line 432, in translate_net _target_organism = taxonomy.ensure_ncbi_tax_id(target_organism) File "/work/home/nierq01/anaconda3/envs/progeny/lib/python3.10/site-packages/pypath/utils/taxonomy.py", line 367, in ensure_ncbi_tax_id taxid_from_common_name(taxon_id) or File "/work/home/nierq01/anaconda3/envs/progeny/lib/python3.10/site-packages/pypath/utils/taxonomy.py", line 283, in taxid_from_common_name common_to_ncbi = get_db('common') File "/work/home/nierq01/anaconda3/envs/progeny/lib/python3.10/site-packages/pypath/utils/taxonomy.py", line 435, in get_db init_db(key) File "/work/home/nierq01/anaconda3/envs/progeny/lib/python3.10/site-packages/pypath/utils/taxonomy.py", line 450, in init_db ncbi_data = uniprot_input.uniprot_ncbi_taxids_2() File "/work/home/nierq01/anaconda3/envs/progeny/lib/python3.10/site-packages/pypath/inputs/uniprot.py", line 1301, in uniprot_ncbi_taxids_2 for line in c.result: TypeError: 'NoneType' object is not iterable

Jan 19 '23 02:01 SZ-qing

Hi @nierq, I'm sorry to hear this error is still with you. I've just run the above function successfully, suggesting that this is a download issue similar to the others above. These kind of errors are not uncommon, but having them so often suggests a really bad connection or some other issue. To know more we need curl debug logs:

from pypath.share import curl
from pypath.inputs import uniprot
import pypath

with curl.cache_off(), curl.debug_on():
    taxons = uniprot.uniprot_ncbi_taxids_2()

pypath.log()

If the download is not successful, the above log could give useful information. This file is not too large (2.3 MB), repeated failure of downloading it suggests some fundamental issue. Can you download it from the CLI?

curl -LOvvv  https://ftp.uniprot.org/pub/databases/uniprot/current_release/knowledgebase/complete/docs/speclist.txt

Can you open the URL above in your browser?

Jan 19 '23 10:01 deeenes

pypath pypath copied to clipboard

Unable to download progeny mouse model

pypath
pypath copied to clipboard