zarr-python
zarr-python copied to clipboard
Opening Icechunk using Zarr 3.0 in a flask context
I am trying to open a local .icechunk via:
import xarray as xr
import logging
from pathlib import Path
logger = logging.getLogger(__name__)
import icechunk
import zarr
def open_icechunk_dataset(file_path: Path, **kwargs) -> xr.Dataset:
"""
Load an Icechunk dataset using the Icechunk Python API,
which returns a Zarr v3-compatible store for xarray.
Args:
file_path: Path to .icechunk directory
**kwargs: Additional kwargs for xr.open_zarr (e.g. consolidated=False)
Returns:
xarray.Dataset
"""
storage = icechunk.local_filesystem_storage(str(file_path))
# Icechunk API (as per https://icechunk.io/en/latest/overview/#icechunk-overview)
repo = icechunk.Repository.open(storage)
print("repo:", dir(repo))
session = repo.readonly_session("main")
print("session: ", dir(session))
ds = xr.open_zarr(
session.store,
zarr_format=3,
consolidated=False,
**kwargs,
)
print(f"DS: {ds}")
return ds
Within a flask context I get the following:
File "/apps/msc-ip-api-nightly/venv/lib/python3.11/site-packages/pygeoapi/icechunk/icechunk_engine.py", line 33, in open_icechunk_dataset
ds = xr.open_zarr(
^^^^^^^^^^^^^
File "/apps/msc-ip-api-nightly/venv/lib/python3.11/site-packages/xarray/backends/zarr.py", line 1436, in open_zarr
ds = open_dataset(
^^^^^^^^^^^^^
File "/apps/msc-ip-api-nightly/venv/lib/python3.11/site-packages/xarray/backends/api.py", line 670, in open_dataset
backend_ds = backend.open_dataset(
^^^^^^^^^^^^^^^^^^^^^
File "/apps/msc-ip-api-nightly/venv/lib/python3.11/site-packages/xarray/backends/zarr.py", line 1508, in open_dataset
store = ZarrStore.open_group(
^^^^^^^^^^^^^^^^^^^^^
File "/apps/msc-ip-api-nightly/venv/lib/python3.11/site-packages/xarray/backends/zarr.py", line 693, in open_group
) = _get_open_params(
^^^^^^^^^^^^^^^^^
File "/apps/msc-ip-api-nightly/venv/lib/python3.11/site-packages/xarray/backends/zarr.py", line 1735, in _get_open_params
zarr_group = zarr.open_group(store, **open_kwargs)
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "/apps/msc-ip-api-nightly/venv/lib/python3.11/site-packages/zarr/api/synchronous.py", line 531, in open_group
sync(
File "/apps/msc-ip-api-nightly/venv/lib/python3.11/site-packages/zarr/core/sync.py", line 150, in sync
raise SyncError("Calling sync() from within a running loop")
zarr.core.sync.SyncError: Calling sync() from within a running loop
I can confirm that the code being called (open_icechunk_dataset) works as expected in an isolated environment, not sure what is going on in this case?
Using the following: "zarr!=3.0.3,>=3"
I've been trying to reproduce using a flask app and can't seem to trigger this error. Can you provide a minimal example that triggers the SyncError?
Here's my attempt but this works in all the cases I've tried.
from flask import Flask, jsonify
import zarr
import icechunk
app = Flask(__name__)
@app.route("/")
def index():
# Create a Zarr array in memory
store = {}
root = zarr.create_group(store=store)
arr = root.create_array("data", shape=(5,), dtype="i4", overwrite=True)
# Fill with some values
arr[:] = [1, 2, 3, 4, 5]
# Return a slice as JSON
return jsonify({"first_three": arr[:3].tolist()})
@app.route("/ic")
def ic():
storage = icechunk.in_memory_storage()
repo = icechunk.Repository.create(
storage=storage,
)
with repo.transaction(branch="main", message="wrote some data") as store:
root = zarr.create_group(store=store)
arr = root.create_array("data", shape=(5,), dtype="i4", overwrite=True)
# Fill with some values
arr[:] = [1, 2, 3, 4, 5]
store = repo.readonly_session("main").store
arr = zarr.open_array(store=store, path='data')
first_three = arr[:3]
# Return a slice as JSON
return jsonify({"first_three": first_three.tolist()})
if __name__ == "__main__":
app.run(debug=True)
Note it is working for vanilla Zarr and Icechunk stores.