zarr-python icon indicating copy to clipboard operation
zarr-python copied to clipboard

[v3] array written by tensorstore returns all nulls

Open joshmoore opened this issue 1 year ago • 4 comments
trafficstars

Arrays written by tensorstore are being returned by open_array, open, AsyncArray.open, etc. as having all fill values.

$ ./ts_info.py output.zarr/0
min=3 max=4095

$ ./zr_info.py output.zarr/0/
min=0 max=0
zarr.json
{
  "chunk_grid": {
    "configuration": {
      "chunk_shape": [
        1,
        1,
        275,
        271
      ]
    },
    "name": "regular"
  },
  "chunk_key_encoding": {
    "name": "default"
  },
  "codecs": [
    {
      "configuration": {
        "endian": "little"
      },
      "name": "bytes"
    },
    {
      "configuration": {
        "blocksize": 0,
        "clevel": 5,
        "cname": "lz4",
        "shuffle": "shuffle",
        "typesize": 2
      },
      "name": "blosc"
    }
  ],
  "data_type": "uint16",
  "dimension_names": [
    "c",
    "z",
    "y",
    "x"
  ],
  "fill_value": 0,
  "node_type": "array",
  "shape": [
    2,
    236,
    275,
    271
  ],
  "zarr_format": 3
}

joshmoore avatar Jul 11 '24 10:07 joshmoore

that's so bad that I'm hopeful that there's a simple fix!

d-v-b avatar Jul 11 '24 11:07 d-v-b

Let me know if you would be interested in the creation code and/or the dataset itself.

joshmoore avatar Jul 11 '24 11:07 joshmoore

that would be great!

d-v-b avatar Jul 11 '24 12:07 d-v-b

Workflow:

  • Download https://uk1s3.embassy.ebi.ac.uk/idr/zarr/v0.4/idr0062A/6001240.zarr
  • Clone resave.py https://github.com/ome/ome2024-ngff-challenge/pull/3
  • mamba env create -n ex -f environment.yaml
  • ./resave.py 6001240.zarr output.zarr
ts_info.py
#!/usr/bin/env python
import random
import numpy as np
import zarr
import sys
import os

import tensorstore as ts

import argparse
parser = argparse.ArgumentParser()
parser.add_argument("--input-bucket")
parser.add_argument("--input-endpoint")
parser.add_argument("--input-anon", action="store_true")
parser.add_argument("--input-region", default="us-east-1")
parser.add_argument("--input-driver", default="zarr3")
parser.add_argument("input_path")
ns = parser.parse_args()


def create_configs(ns):
    configs = []
    for selection in ("input",):
        anon = getattr(ns, f"{selection}_anon")
        bucket = getattr(ns, f"{selection}_bucket")
        endpoint = getattr(ns, f"{selection}_endpoint")
        region = getattr(ns, f"{selection}_region")

        if bucket:
            store = {
                'driver': 's3',
                'bucket': bucket,
                'aws_region': region,
            }
            if anon:
                store['aws_credentials'] = { 'anonymous': anon }
            if endpoint:
                store["endpoint"] = endpoint
        else:
            store = {
                'driver': 'file',
            }
        configs.append(store)
    return configs

CONFIGS = create_configs(ns)

def info(input_path: str):

    CONFIGS[0]["path"] = input_path

    read = ts.open({
        'driver': ns.input_driver,
        'kvstore': CONFIGS[0],
    }).result()

    shape = read.shape
    chunks = read.schema.chunk_layout.read_chunk.shape
    return read

read = info(ns.input_path)
arr = read[:].read().result()
print(np.min(arr), np.max(arr))
zr_info.py
#!/usr/bin/env python
import random
import numpy as np
import zarr
import sys
import os

import logging
logging.basicConfig(level=0)

import argparse
parser = argparse.ArgumentParser()
parser.add_argument("--input-bucket")
parser.add_argument("--input-endpoint")
parser.add_argument("--input-anon", action="store_true")
parser.add_argument("--input-region", default="us-east-1")
parser.add_argument("input_path")
ns = parser.parse_args()


def create_configs(ns):
    configs = []
    for selection in ("input",):
        anon = getattr(ns, f"{selection}_anon")
        bucket = getattr(ns, f"{selection}_bucket")
        endpoint = getattr(ns, f"{selection}_endpoint")
        region = getattr(ns, f"{selection}_region")

        if bucket:
            store = {
                'driver': 's3',
                'bucket': bucket,
                'aws_region': region,
            }
            if anon:
                store['aws_credentials'] = { 'anonymous': anon }
            if endpoint:
                store["endpoint"] = endpoint
        else:
            store = {
                'driver': 'file',
            }
        configs.append(store)
    return configs

CONFIGS = create_configs(ns)

STORES = []
for config, path, mode in (
        (CONFIGS[0], ns.input_path, "r"),
    ):
    if "bucket" in config:
        store_class = zarr.store.RemoteStore
        anon = config.get("aws_credentials", {}).get("anonymous", False)
        store = store_class(
            url=f's3://{config["bucket"]}/{path}',
            anon=anon,
            endpoint_url=config.get("endpoint", None),
            mode=mode,
        )
    else:
        store_class = zarr.store.LocalStore
        store = store_class(path, mode=mode)

    STORES.append(store)

async def info(input_path: str):
    # from zarr.api.synchronous import open
    # return open(store=STORES[0], zarr_version=3)

    # from zarr.api.synchronous import open_array
    # return open_array(store=STORES[0], zarr_version=3)

    import zarr
    return zarr.open(store=STORES[0], zarr_version=3)

    if False:
        from zarr.array import Array, AsyncArray
        from zarr.buffer import default_buffer_prototype, NDBuffer
        arr = await AsyncArray.open(store=STORES[0])

        whole = [
            slice(0, x) for x in arr.shape
        ]
        out = NDBuffer.from_numpy_array(np.empty(arr.shape))
        # return await arr._get_selection([], out=out, prototype=default_buffer_prototype)
        return await arr.getitem(slice(None)) # 0, 0

    elif False:
        from zarr.api.asynchronous import open
        arr = await open(store=STORES[0], mode="r")
        return await arr.get_basic_selection((..., ..., ..., ..., ...))


import asyncio
loop = asyncio.get_event_loop()
arr = loop.run_until_complete(info(ns.input_path))
loop.close()
print(np.min(arr), np.max(arr))

joshmoore avatar Jul 11 '24 12:07 joshmoore

Download https://uk1s3.embassy.ebi.ac.uk/idr/zarr/v0.4/idr0062A/6001240.zarr

Seems as if I can not download this dataset. Was it deleted or do I need some kind of authorization for that? Also, the workflow did not work for me that well. Maybe you can provide the output.zarr? Then I will have a look at this

brokkoli71 avatar Nov 06 '24 19:11 brokkoli71

@joshmoore - do you have time to look at this again? If not, I suggest we close this.

jhamman avatar Nov 13 '24 23:11 jhamman

  • Download is possible via aws s3 cp --recursive --endpoint-url=https://uk1s3.embassy.ebi.ac.uk --no-sign-request s3://idr/zarr/v0.4/idr0062A/6001240.zarr/ 6001240.zarr/
  • An existing output.zarr is available at https://uk1s3.embassy.ebi.ac.uk/ebi-ngff-challenge-2024/4ffaeed2-fa70-4907-820f-8a96ef683095.zarr (validator)
  • But more important (❗) updating to b1 the error goes away:

Target

$./ts_info.py 4ffaeed2-fa70-4907-820f-8a96ef683095.zarr/0/
0 255

with 3.0.0a0

$./zr_info.py 4ffaeed2-fa70-4907-820f-8a96ef683095.zarr/0/
/private/tmp/issue-2029/./zr_info.py:99: DeprecationWarning: There is no current event loop
  loop = asyncio.get_event_loop()
DEBUG:asyncio:Using selector: KqueueSelector
DEBUG:asyncio:Using selector: KqueueSelector
0 0

Update to 3.0.0b1 and retry

$ diff --git a/zr_info.py b/zr_info.py
index 5caf7f4..a1fdfd9 100755
--- a/zr_info.py
+++ b/zr_info.py
@@ -59,7 +59,7 @@ for config, path, mode in (
             mode=mode,
         )
     else:
-        store_class = zarr.store.LocalStore
+        store_class = zarr.storage.local.LocalStore
         store = store_class(path, mode=mode)

     STORES.append(store)
$ pip install zarr==3.0.0b1
$ ./zr_info.py 4ffaeed2-fa70-4907-820f-8a96ef683095.zarr/0/
/private/tmp/issue-2029/./zr_info.py:96: DeprecationWarning: There is no current event loop
  loop = asyncio.get_event_loop()
DEBUG:asyncio:Using selector: KqueueSelector
DEBUG:asyncio:Using selector: KqueueSelector
0 255

:tada:

joshmoore avatar Nov 14 '24 09:11 joshmoore