zarr-python icon indicating copy to clipboard operation
zarr-python copied to clipboard

Opening Icechunk using Zarr 3.0 in a flask context

Open adanb13 opened this issue 3 months ago • 1 comments

I am trying to open a local .icechunk via:

import xarray as xr
import logging
from pathlib import Path
logger = logging.getLogger(__name__)
import icechunk
import zarr


def open_icechunk_dataset(file_path: Path, **kwargs) -> xr.Dataset:
    """
    Load an Icechunk dataset using the Icechunk Python API,
    which returns a Zarr v3-compatible store for xarray.

    Args:
        file_path: Path to .icechunk directory
        **kwargs: Additional kwargs for xr.open_zarr (e.g. consolidated=False)
    Returns:
        xarray.Dataset
    """
    storage = icechunk.local_filesystem_storage(str(file_path))
    # Icechunk API (as per https://icechunk.io/en/latest/overview/#icechunk-overview)
    repo = icechunk.Repository.open(storage)
    print("repo:", dir(repo))
    session = repo.readonly_session("main")
    print("session: ", dir(session))
    ds = xr.open_zarr(
        session.store,
        zarr_format=3,
        consolidated=False,
        **kwargs,
    )
    print(f"DS: {ds}")
    return ds

Within a flask context I get the following:

  File "/apps/msc-ip-api-nightly/venv/lib/python3.11/site-packages/pygeoapi/icechunk/icechunk_engine.py", line 33, in open_icechunk_dataset
    ds = xr.open_zarr(
         ^^^^^^^^^^^^^
  File "/apps/msc-ip-api-nightly/venv/lib/python3.11/site-packages/xarray/backends/zarr.py", line 1436, in open_zarr
    ds = open_dataset(
         ^^^^^^^^^^^^^
  File "/apps/msc-ip-api-nightly/venv/lib/python3.11/site-packages/xarray/backends/api.py", line 670, in open_dataset
    backend_ds = backend.open_dataset(
                 ^^^^^^^^^^^^^^^^^^^^^
  File "/apps/msc-ip-api-nightly/venv/lib/python3.11/site-packages/xarray/backends/zarr.py", line 1508, in open_dataset
    store = ZarrStore.open_group(
            ^^^^^^^^^^^^^^^^^^^^^
  File "/apps/msc-ip-api-nightly/venv/lib/python3.11/site-packages/xarray/backends/zarr.py", line 693, in open_group
    ) = _get_open_params(
        ^^^^^^^^^^^^^^^^^
  File "/apps/msc-ip-api-nightly/venv/lib/python3.11/site-packages/xarray/backends/zarr.py", line 1735, in _get_open_params
    zarr_group = zarr.open_group(store, **open_kwargs)
                 ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "/apps/msc-ip-api-nightly/venv/lib/python3.11/site-packages/zarr/api/synchronous.py", line 531, in open_group
    sync(
  File "/apps/msc-ip-api-nightly/venv/lib/python3.11/site-packages/zarr/core/sync.py", line 150, in sync
    raise SyncError("Calling sync() from within a running loop")
zarr.core.sync.SyncError: Calling sync() from within a running loop

I can confirm that the code being called (open_icechunk_dataset) works as expected in an isolated environment, not sure what is going on in this case?

Using the following: "zarr!=3.0.3,>=3"

adanb13 avatar Sep 17 '25 05:09 adanb13

I've been trying to reproduce using a flask app and can't seem to trigger this error. Can you provide a minimal example that triggers the SyncError?

Here's my attempt but this works in all the cases I've tried.

from flask import Flask, jsonify
import zarr
import icechunk

app = Flask(__name__)

@app.route("/")
def index():
    # Create a Zarr array in memory
    store = {}
    root = zarr.create_group(store=store)
    arr = root.create_array("data", shape=(5,), dtype="i4", overwrite=True)

    # Fill with some values
    arr[:] = [1, 2, 3, 4, 5]

    # Return a slice as JSON
    return jsonify({"first_three": arr[:3].tolist()})

@app.route("/ic")
def ic():

    storage = icechunk.in_memory_storage()
    repo = icechunk.Repository.create(
        storage=storage,
    )
    with repo.transaction(branch="main", message="wrote some data") as store:
        root = zarr.create_group(store=store)
        arr = root.create_array("data", shape=(5,), dtype="i4", overwrite=True)

        # Fill with some values
        arr[:] = [1, 2, 3, 4, 5]
    
    store = repo.readonly_session("main").store
    arr = zarr.open_array(store=store, path='data')
    first_three = arr[:3]

    # Return a slice as JSON
    return jsonify({"first_three": first_three.tolist()})

if __name__ == "__main__":
    app.run(debug=True)

Note it is working for vanilla Zarr and Icechunk stores.

jhamman avatar Sep 17 '25 05:09 jhamman