esgf-pyclient icon indicating copy to clipboard operation
esgf-pyclient copied to clipboard

Facets warning appears even when facets are specified

Open onnyyonn opened this issue 10 months ago • 4 comments

I am searching for all the files matching a given list of parameters. Even though I specify facets while creating a new_context, I still get the facets warning. In case it matters, I am creating a number of new contexts inside loops. Here is an example code snippet for what I am trying to do:

from pyesgf.search import SearchConnection
import numpy as np
import json, os, operator, itertools, tqdm

query_params = {"latest": True,
                "mip_era": "CMIP6",
                "activity_id": "HighResMIP",
                "realm": "atmos",
                "frequency": "6hr",
                }
variables = ['ua', 'uas', 'va', 'vas', 'psl', 'zg']
experiment_ids = ['highres-future', 'hist-1950']
source_ids = ["MPI-ESM1-2-XR", "MPI-ESM1-2-HR"]

def search_esgf(source_id, experiment_id, variable, query_params, savefile):
    try:
        ctx = conn.new_context(source_id=source_id, experiment_id=experiment_id, variable=variable, facets='source_id', **query_params)
        results = ctx.search()
        
        # Convert search results into a list of filename and download url
        files = []
        for i in range(0, len(results)):
            files.extend(list(map(lambda f : {'filename': f.filename, 'url': f.download_url, 'size': f.size, 'checksum': f.checksum, 'checksum_type': f.checksum_type},
                                    results[i].file_context().search())))
        
        # Consolidate all duplicate download links into a single entry
        files = sorted(files, key=operator.itemgetter("filename"))
        files_gr=[]
        for i,g in itertools.groupby(files, key=operator.itemgetter("filename")):
            grp = list(g)
            entry = grp[0].copy()
            entry['url'] = tuple(e['url'] for e in grp)
            files_gr.append(entry)
        
        # Save file list
        with open(savefile, 'w') as f:
            json.dump(files_gr, f, indent = 4)
    
    except KeyboardInterrupt:
        raise KeyboardInterrupt
            
    except Exception as e:
        print(e)

conn = SearchConnection('https://esgf-data.dkrz.de/esg-search', distrib=True)
for sid in tqdm.tqdm(source_ids, desc="Models", position=0):
    for eid in tqdm.tqdm(experiment_ids, desc="Experiments", position=1):
        for v in tqdm.tqdm(variables, desc="Variables", position=2):
            savefile = query_params["activity_id"]+"_"+eid+"_"+sid+"_"+v+"_"+query_params["frequency"]+".json"
            if not os.path.exists(savefile):
                search_esgf(sid, eid, v, query_params, savefile)

Here I am calling the search_esgf function inside loops, and inside that function a new context is being created, with facets='source_id' specified. Why do I still get the facets warning?

onnyyonn avatar Aug 31 '23 04:08 onnyyonn