Update the `coiled` and `append` tutorials to the V2 syntax
Towards closing: https://github.com/zarr-developers/VirtualiZarr/issues/645
@abarciauskas-bgse this modified your append tutorial. Feel free to change anything here.
Codecov Report
:white_check_mark: All modified and coverable lines are covered by tests.
:white_check_mark: Project coverage is 87.75%. Comparing base (a2b65c1) to head (357a914).
Additional details and impacted files
@@ Coverage Diff @@
## main #789 +/- ##
=======================================
Coverage 87.75% 87.75%
=======================================
Files 35 35
Lines 1886 1886
=======================================
Hits 1655 1655
Misses 231 231
:rocket: New features to boost your workflow:
- :snowflake: Test Analytics: Detect flaky tests, report on failures, and find test suite problems.
@norlandrhagen when I try to load some data from the append example, for example:
ds['sst'][:,0,300,300].load()
I get back:
---------------------------------------------------------------------------
IcechunkError Traceback (most recent call last)
Cell In[23], line 3
1 ### Try loading some data
----> 3 ds['sst'][:,0,300,300].load()
File [~/miniforge3/envs/pangeo_z3/lib/python3.12/site-packages/xarray/core/dataarray.py:1157](http://localhost:8888/home/rsignell/miniforge3/envs/pangeo_z3/lib/python3.12/site-packages/xarray/core/dataarray.py#line=1156), in DataArray.load(self, **kwargs)
1137 def load(self, **kwargs) -> Self:
1138 """Manually trigger loading of this array's data from disk or a
1139 remote source into memory and return this array.
1140
(...) 1155 dask.compute
1156 """
-> 1157 ds = self._to_temp_dataset().load(**kwargs)
1158 new = self._from_temp_dataset(ds)
1159 self._variable = new._variable
File [~/miniforge3/envs/pangeo_z3/lib/python3.12/site-packages/xarray/core/dataset.py:542](http://localhost:8888/home/rsignell/miniforge3/envs/pangeo_z3/lib/python3.12/site-packages/xarray/core/dataset.py#line=541), in Dataset.load(self, **kwargs)
539 chunkmanager = get_chunked_array_type(*lazy_data.values())
541 # evaluate all the chunked arrays simultaneously
--> 542 evaluated_data: tuple[np.ndarray[Any, Any], ...] = chunkmanager.compute(
543 *lazy_data.values(), **kwargs
544 )
546 for k, data in zip(lazy_data, evaluated_data, strict=False):
547 self.variables[k].data = data
File [~/miniforge3/envs/pangeo_z3/lib/python3.12/site-packages/xarray/namedarray/daskmanager.py:85](http://localhost:8888/home/rsignell/miniforge3/envs/pangeo_z3/lib/python3.12/site-packages/xarray/namedarray/daskmanager.py#line=84), in DaskManager.compute(self, *data, **kwargs)
80 def compute(
81 self, *data: Any, **kwargs: Any
82 ) -> tuple[np.ndarray[Any, _DType_co], ...]:
83 from dask.array import compute
---> 85 return compute(*data, **kwargs)
File [~/miniforge3/envs/pangeo_z3/lib/python3.12/site-packages/dask/base.py:681](http://localhost:8888/home/rsignell/miniforge3/envs/pangeo_z3/lib/python3.12/site-packages/dask/base.py#line=680), in compute(traverse, optimize_graph, scheduler, get, *args, **kwargs)
678 expr = expr.optimize()
679 keys = list(flatten(expr.__dask_keys__()))
--> 681 results = schedule(expr, keys, **kwargs)
683 return repack(results)
File [~/miniforge3/envs/pangeo_z3/lib/python3.12/site-packages/xarray/core/indexing.py:573](http://localhost:8888/home/rsignell/miniforge3/envs/pangeo_z3/lib/python3.12/site-packages/xarray/core/indexing.py#line=572), in ImplicitToExplicitIndexingAdapter.__array__(self, dtype, copy)
569 def __array__(
570 self, dtype: np.typing.DTypeLike = None, /, *, copy: bool | None = None
571 ) -> np.ndarray:
572 if Version(np.__version__) >= Version("2.0.0"):
--> 573 return np.asarray(self.get_duck_array(), dtype=dtype, copy=copy)
574 else:
575 return np.asarray(self.get_duck_array(), dtype=dtype)
File [~/miniforge3/envs/pangeo_z3/lib/python3.12/site-packages/xarray/core/indexing.py:578](http://localhost:8888/home/rsignell/miniforge3/envs/pangeo_z3/lib/python3.12/site-packages/xarray/core/indexing.py#line=577), in ImplicitToExplicitIndexingAdapter.get_duck_array(self)
577 def get_duck_array(self):
--> 578 return self.array.get_duck_array()
File [~/miniforge3/envs/pangeo_z3/lib/python3.12/site-packages/xarray/core/indexing.py:797](http://localhost:8888/home/rsignell/miniforge3/envs/pangeo_z3/lib/python3.12/site-packages/xarray/core/indexing.py#line=796), in CopyOnWriteArray.get_duck_array(self)
796 def get_duck_array(self):
--> 797 return self.array.get_duck_array()
File [~/miniforge3/envs/pangeo_z3/lib/python3.12/site-packages/xarray/coding/common.py:80](http://localhost:8888/home/rsignell/miniforge3/envs/pangeo_z3/lib/python3.12/site-packages/xarray/coding/common.py#line=79), in _ElementwiseFunctionArray.get_duck_array(self)
79 def get_duck_array(self):
---> 80 return self.func(self.array.get_duck_array())
File [~/miniforge3/envs/pangeo_z3/lib/python3.12/site-packages/xarray/coding/common.py:80](http://localhost:8888/home/rsignell/miniforge3/envs/pangeo_z3/lib/python3.12/site-packages/xarray/coding/common.py#line=79), in _ElementwiseFunctionArray.get_duck_array(self)
79 def get_duck_array(self):
---> 80 return self.func(self.array.get_duck_array())
File [~/miniforge3/envs/pangeo_z3/lib/python3.12/site-packages/xarray/core/indexing.py:652](http://localhost:8888/home/rsignell/miniforge3/envs/pangeo_z3/lib/python3.12/site-packages/xarray/core/indexing.py#line=651), in LazilyIndexedArray.get_duck_array(self)
648 array = apply_indexer(self.array, self.key)
649 else:
650 # If the array is not an ExplicitlyIndexedNDArrayMixin,
651 # it may wrap a BackendArray so use its __getitem__
--> 652 array = self.array[self.key]
654 # self.array[self.key] is now a numpy array when
655 # self.array is a BackendArray subclass
656 # and self.key is BasicIndexer((slice(None, None, None),))
657 # so we need the explicit check for ExplicitlyIndexed
658 if isinstance(array, ExplicitlyIndexed):
File [~/miniforge3/envs/pangeo_z3/lib/python3.12/site-packages/xarray/backends/zarr.py:224](http://localhost:8888/home/rsignell/miniforge3/envs/pangeo_z3/lib/python3.12/site-packages/xarray/backends/zarr.py#line=223), in ZarrArrayWrapper.__getitem__(self, key)
222 elif isinstance(key, indexing.OuterIndexer):
223 method = self._oindex
--> 224 return indexing.explicit_indexing_adapter(
225 key, array.shape, indexing.IndexingSupport.VECTORIZED, method
226 )
File [~/miniforge3/envs/pangeo_z3/lib/python3.12/site-packages/xarray/core/indexing.py:1021](http://localhost:8888/home/rsignell/miniforge3/envs/pangeo_z3/lib/python3.12/site-packages/xarray/core/indexing.py#line=1020), in explicit_indexing_adapter(key, shape, indexing_support, raw_indexing_method)
999 """Support explicit indexing by delegating to a raw indexing method.
1000
1001 Outer and/or vectorized indexers are supported by indexing a second time
(...) 1018 Indexing result, in the form of a duck numpy-array.
1019 """
1020 raw_key, numpy_indices = decompose_indexer(key, shape, indexing_support)
-> 1021 result = raw_indexing_method(raw_key.tuple)
1022 if numpy_indices.tuple:
1023 # index the loaded duck array
1024 indexable = as_indexable(result)
File [~/miniforge3/envs/pangeo_z3/lib/python3.12/site-packages/xarray/backends/zarr.py:214](http://localhost:8888/home/rsignell/miniforge3/envs/pangeo_z3/lib/python3.12/site-packages/xarray/backends/zarr.py#line=213), in ZarrArrayWrapper._getitem(self, key)
213 def _getitem(self, key):
--> 214 return self._array[key]
File [~/miniforge3/envs/pangeo_z3/lib/python3.12/site-packages/zarr/core/array.py:2503](http://localhost:8888/home/rsignell/miniforge3/envs/pangeo_z3/lib/python3.12/site-packages/zarr/core/array.py#line=2502), in Array.__getitem__(self, selection)
2501 return self.vindex[cast("CoordinateSelection | MaskSelection", selection)]
2502 elif is_pure_orthogonal_indexing(pure_selection, self.ndim):
-> 2503 return self.get_orthogonal_selection(pure_selection, fields=fields)
2504 else:
2505 return self.get_basic_selection(cast("BasicSelection", pure_selection), fields=fields)
File [~/miniforge3/envs/pangeo_z3/lib/python3.12/site-packages/zarr/core/array.py:2942](http://localhost:8888/home/rsignell/miniforge3/envs/pangeo_z3/lib/python3.12/site-packages/zarr/core/array.py#line=2941), in Array.get_orthogonal_selection(self, selection, out, fields, prototype)
2940 prototype = default_buffer_prototype()
2941 indexer = OrthogonalIndexer(selection, self.shape, self.metadata.chunk_grid)
-> 2942 return sync(
2943 self._async_array._get_selection(
2944 indexer=indexer, out=out, fields=fields, prototype=prototype
2945 )
2946 )
File [~/miniforge3/envs/pangeo_z3/lib/python3.12/site-packages/zarr/core/sync.py:163](http://localhost:8888/home/rsignell/miniforge3/envs/pangeo_z3/lib/python3.12/site-packages/zarr/core/sync.py#line=162), in sync(coro, loop, timeout)
160 return_result = next(iter(finished)).result()
162 if isinstance(return_result, BaseException):
--> 163 raise return_result
164 else:
165 return return_result
File [~/miniforge3/envs/pangeo_z3/lib/python3.12/site-packages/zarr/core/sync.py:119](http://localhost:8888/home/rsignell/miniforge3/envs/pangeo_z3/lib/python3.12/site-packages/zarr/core/sync.py#line=118), in _runner(coro)
114 """
115 Await a coroutine and return the result of running it. If awaiting the coroutine raises an
116 exception, the exception will be returned.
117 """
118 try:
--> 119 return await coro
120 except Exception as ex:
121 return ex
File [~/miniforge3/envs/pangeo_z3/lib/python3.12/site-packages/zarr/core/array.py:1365](http://localhost:8888/home/rsignell/miniforge3/envs/pangeo_z3/lib/python3.12/site-packages/zarr/core/array.py#line=1364), in AsyncArray._get_selection(self, indexer, prototype, out, fields)
1362 _config = replace(_config, order=self.order)
1364 # reading chunks and decoding them
-> 1365 await self.codec_pipeline.read(
1366 [
1367 (
1368 self.store_path / self.metadata.encode_chunk_key(chunk_coords),
1369 self.metadata.get_chunk_spec(chunk_coords, _config, prototype=prototype),
1370 chunk_selection,
1371 out_selection,
1372 is_complete_chunk,
1373 )
1374 for chunk_coords, chunk_selection, out_selection, is_complete_chunk in indexer
1375 ],
1376 out_buffer,
1377 drop_axes=indexer.drop_axes,
1378 )
1379 if isinstance(indexer, BasicIndexer) and indexer.shape == ():
1380 return out_buffer.as_scalar()
File [~/miniforge3/envs/pangeo_z3/lib/python3.12/site-packages/zarr/core/codec_pipeline.py:466](http://localhost:8888/home/rsignell/miniforge3/envs/pangeo_z3/lib/python3.12/site-packages/zarr/core/codec_pipeline.py#line=465), in BatchedCodecPipeline.read(self, batch_info, out, drop_axes)
460 async def read(
461 self,
462 batch_info: Iterable[tuple[ByteGetter, ArraySpec, SelectorTuple, SelectorTuple, bool]],
463 out: NDBuffer,
464 drop_axes: tuple[int, ...] = (),
465 ) -> None:
--> 466 await concurrent_map(
467 [
468 (single_batch_info, out, drop_axes)
469 for single_batch_info in batched(batch_info, self.batch_size)
470 ],
471 self.read_batch,
472 config.get("async.concurrency"),
473 )
File [~/miniforge3/envs/pangeo_z3/lib/python3.12/site-packages/zarr/core/common.py:91](http://localhost:8888/home/rsignell/miniforge3/envs/pangeo_z3/lib/python3.12/site-packages/zarr/core/common.py#line=90), in concurrent_map(items, func, limit)
88 async with sem:
89 return await func(*item)
---> 91 return await asyncio.gather(*[asyncio.ensure_future(run(item)) for item in items])
File [~/miniforge3/envs/pangeo_z3/lib/python3.12/site-packages/zarr/core/common.py:89](http://localhost:8888/home/rsignell/miniforge3/envs/pangeo_z3/lib/python3.12/site-packages/zarr/core/common.py#line=88), in concurrent_map.<locals>.run(item)
87 async def run(item: tuple[Any]) -> V:
88 async with sem:
---> 89 return await func(*item)
File [~/miniforge3/envs/pangeo_z3/lib/python3.12/site-packages/zarr/core/codec_pipeline.py:265](http://localhost:8888/home/rsignell/miniforge3/envs/pangeo_z3/lib/python3.12/site-packages/zarr/core/codec_pipeline.py#line=264), in BatchedCodecPipeline.read_batch(self, batch_info, out, drop_axes)
263 out[out_selection] = fill_value_or_default(chunk_spec)
264 else:
--> 265 chunk_bytes_batch = await concurrent_map(
266 [(byte_getter, array_spec.prototype) for byte_getter, array_spec, *_ in batch_info],
267 lambda byte_getter, prototype: byte_getter.get(prototype),
268 config.get("async.concurrency"),
269 )
270 chunk_array_batch = await self.decode_batch(
271 [
272 (chunk_bytes, chunk_spec)
(...) 276 ],
277 )
278 for chunk_array, (_, chunk_spec, chunk_selection, out_selection, _) in zip(
279 chunk_array_batch, batch_info, strict=False
280 ):
File [~/miniforge3/envs/pangeo_z3/lib/python3.12/site-packages/zarr/core/common.py:91](http://localhost:8888/home/rsignell/miniforge3/envs/pangeo_z3/lib/python3.12/site-packages/zarr/core/common.py#line=90), in concurrent_map(items, func, limit)
88 async with sem:
89 return await func(*item)
---> 91 return await asyncio.gather(*[asyncio.ensure_future(run(item)) for item in items])
File [~/miniforge3/envs/pangeo_z3/lib/python3.12/site-packages/zarr/core/common.py:89](http://localhost:8888/home/rsignell/miniforge3/envs/pangeo_z3/lib/python3.12/site-packages/zarr/core/common.py#line=88), in concurrent_map.<locals>.run(item)
87 async def run(item: tuple[Any]) -> V:
88 async with sem:
---> 89 return await func(*item)
File [~/miniforge3/envs/pangeo_z3/lib/python3.12/site-packages/zarr/storage/_common.py:164](http://localhost:8888/home/rsignell/miniforge3/envs/pangeo_z3/lib/python3.12/site-packages/zarr/storage/_common.py#line=163), in StorePath.get(self, prototype, byte_range)
162 if prototype is None:
163 prototype = default_buffer_prototype()
--> 164 return await self.store.get(self.path, prototype=prototype, byte_range=byte_range)
File [~/miniforge3/envs/pangeo_z3/lib/python3.12/site-packages/icechunk/store.py:162](http://localhost:8888/home/rsignell/miniforge3/envs/pangeo_z3/lib/python3.12/site-packages/icechunk/store.py#line=161), in IcechunkStore.get(self, key, prototype, byte_range)
143 """Retrieve the value associated with a given key.
144
145 Parameters
(...) 158 Buffer
159 """
161 try:
--> 162 result = await self._store.get(key, _byte_request_to_tuple(byte_range))
163 except KeyError as _e:
164 # Zarr python expects None to be returned if the key does not exist
165 # but an IcechunkStore returns an error if the key does not exist
166 return None
IcechunkError: x a virtual chunk in this repository resolves to the url prefix s3://noaa-cdr-sea-surface-temp-optimum-interpolation-pds/, to be able to fetch the chunk you need to authorize the virtual chunk
| container when you open/create the repository, see https://icechunk.io/en/stable/virtual/
|
| context:
| 0: icechunk::store::get
| with key="sst/c/2/0/0/0" byte_range=From(0)
| at icechunk/src/store.rs:198
|
`-> a virtual chunk in this repository resolves to the url prefix s3://noaa-cdr-sea-surface-temp-optimum-interpolation-pds/, to be able to fetch the chunk you need to authorize the virtual chunk
container when you open/create the repository, see https://icechunk.io/en/stable/virtual/
I'm guessing we will need something like this to specify the anon credentials for s3 (example from another workflow):
chunk_bucket = "esip"
chunk_scheme = "s3://"
chunk_endpoint = "https://usgs.osn.mghpcc.org"
storage_name = 'netcdf3'
storage = icechunk.local_filesystem_storage(
path=f'icechunk/{storage_name}'
)
config = icechunk.RepositoryConfig.default()
config.set_virtual_chunk_container(icechunk.VirtualChunkContainer(f"{chunk_scheme}{chunk_bucket}/", icechunk.s3_store(region="us-east-1", endpoint_url=chunk_endpoint)))
credentials = icechunk.containers_credentials({f"{chunk_scheme}{chunk_bucket}/": icechunk.s3_credentials(anonymous=True)})
Thanks @rsignell! Nice catch. I updated the notebook to include the icechunk.containers_credentials.
@rsignell do you have any interest in updating the two other examples?
Isn't the solution to this error to pass authorize_virtual_chunk_access={prefix: credentials} when opening the icechunk repo?