pangeo-forge-recipes
pangeo-forge-recipes copied to clipboard
workflow with SSL authentication is broken
I just stumbled across an issue when working on a recipe for https://github.com/pangeo-forge/staged-recipes/issues/96. My recipe requires authentication for ESGF to read NetCDF files and I do so using an SSLContext object. My workflow is broken by #359. I can repdroduce this issue using the slightly adapted tutorial code, e.g., I'll add fsspec_open_kwargs={"ssl": ssl.create_default_context()}
to the FilePattern object:
import pandas as pd
import ssl
import xarray as xr
from pangeo_forge_recipes.recipes import setup_logging, XarrayZarrRecipe
from pangeo_forge_recipes.patterns import ConcatDim, FilePattern
dates = pd.date_range('1981-09-01', '2022-02-01', freq='D')
URL_FORMAT = (
"https://www.ncei.noaa.gov/data/sea-surface-temperature-optimum-interpolation/"
"v2.1/access/avhrr/{time:%Y%m}/oisst-avhrr-v02r01.{time:%Y%m%d}.nc"
)
def make_url(time):
return URL_FORMAT.format(time=time)
time_concat_dim = ConcatDim("time", dates, nitems_per_file=1)
pattern = FilePattern(make_url, time_concat_dim, fsspec_open_kwargs={"ssl": ssl.create_default_context()})
and running the second part of the tutorial...
# Create recipe object
recipe = XarrayZarrRecipe(pattern, inputs_per_chunk=2)
# Set up logging
setup_logging()
# Prune the recipe
recipe_pruned = recipe.copy_pruned()
# Run the recipe
run_function = recipe_pruned.to_function()
run_function()
# Check the output
oisst_zarr = xr.open_zarr(recipe.target_mapper, consolidated=True)
oisst_zarr
... is broken because the SSLContext
object can not be deecopied during hashing in the asdict
call in https://github.com/pangeo-forge/pangeo-forge-recipes/blob/16eb6bfd23a26997523b86c0fe5428103ec95b8a/pangeo_forge_recipes/serialization.py#L70
I see that storage_config
is ignored during hashing anyway... https://github.com/pangeo-forge/pangeo-forge-recipes/blob/16eb6bfd23a26997523b86c0fe5428103ec95b8a/pangeo_forge_recipes/recipes/base.py#L18
I guess, ignoring that during the asdict
call might solve the problem already since it is removed afterwards anyway. This might be related to https://github.com/python/cpython/issues/88071.
Here is the full traceback...
---------------------------------------------------------------------------
TypeError Traceback (most recent call last)
Cell In [2], line 12
10 # Run the recipe
11 run_function = recipe_pruned.to_function()
---> 12 run_function()
14 # Check the output
15 oisst_zarr = xr.open_zarr(recipe.target_mapper, consolidated=True)
File ~/python/packages/pangeo-forge-recipes/pangeo_forge_recipes/executors/python.py:46, in FunctionPipelineExecutor.compile.<locals>.function()
44 stage.function(m, config=pipeline.config)
45 else:
---> 46 stage.function(config=pipeline.config)
File ~/python/packages/pangeo-forge-recipes/pangeo_forge_recipes/recipes/xarray_zarr.py:581, in prepare_target(config)
578 config.storage_config.metadata[_GLOBAL_METADATA_KEY] = recipe_meta
580 zgroup = zarr.open_group(config.target_mapper)
--> 581 for k, v in config.get_execution_context().items():
582 zgroup.attrs[f"pangeo-forge:{k}"] = v
File ~/python/packages/pangeo-forge-recipes/pangeo_forge_recipes/recipes/base.py:59, in BaseRecipe.get_execution_context(self)
55 def get_execution_context(self):
56 return dict(
57 # See https://stackoverflow.com/a/2073599 re: version
58 version=pkg_resources.require("pangeo-forge-recipes")[0].version,
---> 59 recipe_hash=self.sha256().hex(),
60 inputs_hash=self.file_pattern.sha256().hex(),
61 )
File ~/python/packages/pangeo-forge-recipes/pangeo_forge_recipes/recipes/base.py:53, in BaseRecipe.sha256(self)
52 def sha256(self):
---> 53 return dataclass_sha256(self, ignore_keys=self._hash_exclude_)
File ~/python/packages/pangeo-forge-recipes/pangeo_forge_recipes/serialization.py:70, in dataclass_sha256(dclass, ignore_keys)
59 def dataclass_sha256(dclass: type, ignore_keys: List[str]) -> bytes:
60 """Generate a deterministic sha256 hash from a Python ``dataclass``. Fields for which the value
61 is either ``None`` or an empty collection are excluded from the hash calculation automatically.
62 To manually exclude other fields from the calculation, pass their names via ``igonore_keys``.
(...)
67 :param ignore_keys: A list of field names to exclude from the hash calculation.
68 """
---> 70 d = asdict(dclass, dict_factory=dict_drop_empty)
71 for k in ignore_keys:
72 del d[k]
File ~/miniconda3/envs/pangeo-forge-esgf/lib/python3.10/dataclasses.py:1239, in asdict(obj, dict_factory)
1237 if not _is_dataclass_instance(obj):
1238 raise TypeError("asdict() should be called on dataclass instances")
-> 1239 return _asdict_inner(obj, dict_factory)
File ~/miniconda3/envs/pangeo-forge-esgf/lib/python3.10/dataclasses.py:1246, in _asdict_inner(obj, dict_factory)
1244 result = []
1245 for f in fields(obj):
-> 1246 value = _asdict_inner(getattr(obj, f.name), dict_factory)
1247 result.append((f.name, value))
1248 return dict_factory(result)
File ~/miniconda3/envs/pangeo-forge-esgf/lib/python3.10/dataclasses.py:1280, in _asdict_inner(obj, dict_factory)
1276 return type(obj)((_asdict_inner(k, dict_factory),
1277 _asdict_inner(v, dict_factory))
1278 for k, v in obj.items())
1279 else:
-> 1280 return copy.deepcopy(obj)
File ~/miniconda3/envs/pangeo-forge-esgf/lib/python3.10/copy.py:172, in deepcopy(x, memo, _nil)
170 y = x
171 else:
--> 172 y = _reconstruct(x, memo, *rv)
174 # If is its own copy, don't memoize.
175 if y is not x:
File ~/miniconda3/envs/pangeo-forge-esgf/lib/python3.10/copy.py:271, in _reconstruct(x, memo, func, args, state, listiter, dictiter, deepcopy)
269 if state is not None:
270 if deep:
--> 271 state = deepcopy(state, memo)
272 if hasattr(y, '__setstate__'):
273 y.__setstate__(state)
File ~/miniconda3/envs/pangeo-forge-esgf/lib/python3.10/copy.py:146, in deepcopy(x, memo, _nil)
144 copier = _deepcopy_dispatch.get(cls)
145 if copier is not None:
--> 146 y = copier(x, memo)
147 else:
148 if issubclass(cls, type):
File ~/miniconda3/envs/pangeo-forge-esgf/lib/python3.10/copy.py:231, in _deepcopy_dict(x, memo, deepcopy)
229 memo[id(x)] = y
230 for key, value in x.items():
--> 231 y[deepcopy(key, memo)] = deepcopy(value, memo)
232 return y
File ~/miniconda3/envs/pangeo-forge-esgf/lib/python3.10/copy.py:146, in deepcopy(x, memo, _nil)
144 copier = _deepcopy_dispatch.get(cls)
145 if copier is not None:
--> 146 y = copier(x, memo)
147 else:
148 if issubclass(cls, type):
File ~/miniconda3/envs/pangeo-forge-esgf/lib/python3.10/copy.py:231, in _deepcopy_dict(x, memo, deepcopy)
229 memo[id(x)] = y
230 for key, value in x.items():
--> 231 y[deepcopy(key, memo)] = deepcopy(value, memo)
232 return y
File ~/miniconda3/envs/pangeo-forge-esgf/lib/python3.10/copy.py:161, in deepcopy(x, memo, _nil)
159 reductor = getattr(x, "__reduce_ex__", None)
160 if reductor is not None:
--> 161 rv = reductor(4)
162 else:
163 reductor = getattr(x, "__reduce__", None)
TypeError: cannot pickle 'SSLContext' object
Also related to https://github.com/python/cpython/issues/77204