zarr-python
zarr-python copied to clipboard
[v3] array written by tensorstore returns all nulls
Arrays written by tensorstore are being returned by open_array, open, AsyncArray.open, etc. as having all fill values.
$ ./ts_info.py output.zarr/0
min=3 max=4095
$ ./zr_info.py output.zarr/0/
min=0 max=0
zarr.json
{
"chunk_grid": {
"configuration": {
"chunk_shape": [
1,
1,
275,
271
]
},
"name": "regular"
},
"chunk_key_encoding": {
"name": "default"
},
"codecs": [
{
"configuration": {
"endian": "little"
},
"name": "bytes"
},
{
"configuration": {
"blocksize": 0,
"clevel": 5,
"cname": "lz4",
"shuffle": "shuffle",
"typesize": 2
},
"name": "blosc"
}
],
"data_type": "uint16",
"dimension_names": [
"c",
"z",
"y",
"x"
],
"fill_value": 0,
"node_type": "array",
"shape": [
2,
236,
275,
271
],
"zarr_format": 3
}
that's so bad that I'm hopeful that there's a simple fix!
Let me know if you would be interested in the creation code and/or the dataset itself.
that would be great!
Workflow:
- Download https://uk1s3.embassy.ebi.ac.uk/idr/zarr/v0.4/idr0062A/6001240.zarr
- Clone resave.py https://github.com/ome/ome2024-ngff-challenge/pull/3
mamba env create -n ex -f environment.yaml./resave.py 6001240.zarr output.zarr
ts_info.py
#!/usr/bin/env python
import random
import numpy as np
import zarr
import sys
import os
import tensorstore as ts
import argparse
parser = argparse.ArgumentParser()
parser.add_argument("--input-bucket")
parser.add_argument("--input-endpoint")
parser.add_argument("--input-anon", action="store_true")
parser.add_argument("--input-region", default="us-east-1")
parser.add_argument("--input-driver", default="zarr3")
parser.add_argument("input_path")
ns = parser.parse_args()
def create_configs(ns):
configs = []
for selection in ("input",):
anon = getattr(ns, f"{selection}_anon")
bucket = getattr(ns, f"{selection}_bucket")
endpoint = getattr(ns, f"{selection}_endpoint")
region = getattr(ns, f"{selection}_region")
if bucket:
store = {
'driver': 's3',
'bucket': bucket,
'aws_region': region,
}
if anon:
store['aws_credentials'] = { 'anonymous': anon }
if endpoint:
store["endpoint"] = endpoint
else:
store = {
'driver': 'file',
}
configs.append(store)
return configs
CONFIGS = create_configs(ns)
def info(input_path: str):
CONFIGS[0]["path"] = input_path
read = ts.open({
'driver': ns.input_driver,
'kvstore': CONFIGS[0],
}).result()
shape = read.shape
chunks = read.schema.chunk_layout.read_chunk.shape
return read
read = info(ns.input_path)
arr = read[:].read().result()
print(np.min(arr), np.max(arr))
zr_info.py
#!/usr/bin/env python
import random
import numpy as np
import zarr
import sys
import os
import logging
logging.basicConfig(level=0)
import argparse
parser = argparse.ArgumentParser()
parser.add_argument("--input-bucket")
parser.add_argument("--input-endpoint")
parser.add_argument("--input-anon", action="store_true")
parser.add_argument("--input-region", default="us-east-1")
parser.add_argument("input_path")
ns = parser.parse_args()
def create_configs(ns):
configs = []
for selection in ("input",):
anon = getattr(ns, f"{selection}_anon")
bucket = getattr(ns, f"{selection}_bucket")
endpoint = getattr(ns, f"{selection}_endpoint")
region = getattr(ns, f"{selection}_region")
if bucket:
store = {
'driver': 's3',
'bucket': bucket,
'aws_region': region,
}
if anon:
store['aws_credentials'] = { 'anonymous': anon }
if endpoint:
store["endpoint"] = endpoint
else:
store = {
'driver': 'file',
}
configs.append(store)
return configs
CONFIGS = create_configs(ns)
STORES = []
for config, path, mode in (
(CONFIGS[0], ns.input_path, "r"),
):
if "bucket" in config:
store_class = zarr.store.RemoteStore
anon = config.get("aws_credentials", {}).get("anonymous", False)
store = store_class(
url=f's3://{config["bucket"]}/{path}',
anon=anon,
endpoint_url=config.get("endpoint", None),
mode=mode,
)
else:
store_class = zarr.store.LocalStore
store = store_class(path, mode=mode)
STORES.append(store)
async def info(input_path: str):
# from zarr.api.synchronous import open
# return open(store=STORES[0], zarr_version=3)
# from zarr.api.synchronous import open_array
# return open_array(store=STORES[0], zarr_version=3)
import zarr
return zarr.open(store=STORES[0], zarr_version=3)
if False:
from zarr.array import Array, AsyncArray
from zarr.buffer import default_buffer_prototype, NDBuffer
arr = await AsyncArray.open(store=STORES[0])
whole = [
slice(0, x) for x in arr.shape
]
out = NDBuffer.from_numpy_array(np.empty(arr.shape))
# return await arr._get_selection([], out=out, prototype=default_buffer_prototype)
return await arr.getitem(slice(None)) # 0, 0
elif False:
from zarr.api.asynchronous import open
arr = await open(store=STORES[0], mode="r")
return await arr.get_basic_selection((..., ..., ..., ..., ...))
import asyncio
loop = asyncio.get_event_loop()
arr = loop.run_until_complete(info(ns.input_path))
loop.close()
print(np.min(arr), np.max(arr))
Download https://uk1s3.embassy.ebi.ac.uk/idr/zarr/v0.4/idr0062A/6001240.zarr
Seems as if I can not download this dataset. Was it deleted or do I need some kind of authorization for that?
Also, the workflow did not work for me that well. Maybe you can provide the output.zarr? Then I will have a look at this
@joshmoore - do you have time to look at this again? If not, I suggest we close this.
- Download is possible via
aws s3 cp --recursive --endpoint-url=https://uk1s3.embassy.ebi.ac.uk --no-sign-request s3://idr/zarr/v0.4/idr0062A/6001240.zarr/ 6001240.zarr/ - An existing output.zarr is available at https://uk1s3.embassy.ebi.ac.uk/ebi-ngff-challenge-2024/4ffaeed2-fa70-4907-820f-8a96ef683095.zarr (validator)
- But more important (❗) updating to b1 the error goes away:
Target
$./ts_info.py 4ffaeed2-fa70-4907-820f-8a96ef683095.zarr/0/
0 255
with 3.0.0a0
$./zr_info.py 4ffaeed2-fa70-4907-820f-8a96ef683095.zarr/0/
/private/tmp/issue-2029/./zr_info.py:99: DeprecationWarning: There is no current event loop
loop = asyncio.get_event_loop()
DEBUG:asyncio:Using selector: KqueueSelector
DEBUG:asyncio:Using selector: KqueueSelector
0 0
Update to 3.0.0b1 and retry
$ diff --git a/zr_info.py b/zr_info.py
index 5caf7f4..a1fdfd9 100755
--- a/zr_info.py
+++ b/zr_info.py
@@ -59,7 +59,7 @@ for config, path, mode in (
mode=mode,
)
else:
- store_class = zarr.store.LocalStore
+ store_class = zarr.storage.local.LocalStore
store = store_class(path, mode=mode)
STORES.append(store)
$ pip install zarr==3.0.0b1
$ ./zr_info.py 4ffaeed2-fa70-4907-820f-8a96ef683095.zarr/0/
/private/tmp/issue-2029/./zr_info.py:96: DeprecationWarning: There is no current event loop
loop = asyncio.get_event_loop()
DEBUG:asyncio:Using selector: KqueueSelector
DEBUG:asyncio:Using selector: KqueueSelector
0 255
:tada: