stac-rs
stac-rs copied to clipboard
GDAL GTI
I think it would be neat to be able to load the result of a stacrs query as a lazy xarray.DataSet via rioxarray. The new GDAL Raster Tile Index (GTI) enables this
import xarray
ds = xarray.open_dataset("GTI:https://github.com/mdsumner/gti/raw/refs/heads/main/inst/extdata/tmerc_seaice.fgb", engine = "rasterio")
when chunks are accessed from the xarray dataset, reprojection happens behind the scenes for the user
example from https://gist.github.com/mdsumner/724f1db0bd0d211de6e3775486cd3df0
I tried out GTI on a stac-geoparquet query result but I think there's a difference in how stacrs formats the result and what GTI expects.
Query setup, search, save to stac-geoparquet
from datetime import datetime, timedelta
import stacrs
def convert_to_iso_datetime_range(simple_date_range):
start_date_str, end_date_str = simple_date_range.split('/')
start_date = datetime.strptime(start_date_str, "%Y-%m-%d")
end_date = datetime.strptime(end_date_str, "%Y-%m-%d")
start_date_iso = start_date.strftime("%Y-%m-%dT%H:%M:%S.%fZ")[:-3] + "Z"
end_date_iso = (end_date - timedelta(seconds=1)).strftime("%Y-%m-%dT%H:%M:%S.%fZ")[:-3] + "Z"
return f"{start_date_iso}/{end_date_iso}"
max_cloud_cover = 15
max_nodata_percent = 15
query_args = {
"eo:cloud_cover": {"lt": max_cloud_cover},
"s2:nodata_pixel_percentage": {"lt": max_nodata_percent},
}
simple_date_range = "2023-01-01/2024-01-01"
iso_datetime_range = convert_to_iso_datetime_range(simple_date_range)
collection_id = "sentinel-2-c1-l2a"
catalog = "https://earth-search.aws.element84.com/v1"
lon, lat = -111.760826, 34.871002
# stack args
assets=["rededge1", "rededge2", "rededge3", "nir", "swir16", "swir22"]
rescale=False
xy_coords="center"
stack_id = "grid:code"
stack_dim = "time"
stacrs.search_to(
"items.parquet",
catalog,
collections=collection_id,
intersects={"type": "Point", "coordinates": [lon, lat]},
sortby="-properties.datetime",
max_items=20,
datetime=iso_datetime_range,
query=query_args
)
Trying to open as a GTI backed xarray.Dataset.
ds = xarray.open_dataset("GTI:/home/rave/dask_on_ray_poc/items.parquet", engine = "rasterio")
the error is below, I think the uris to the assets are expected in a different column name and possible a different structure
---------------------------------------------------------------------------
KeyError Traceback (most recent call last)
File ~/dask_on_ray_poc/.pixi/envs/default/lib/python3.11/site-packages/xarray/backends/file_manager.py:211, in CachingFileManager._acquire_with_cache_info(self, needs_lock)
210 try:
--> 211 file = self._cache[self._key]
212 except KeyError:
File ~/dask_on_ray_poc/.pixi/envs/default/lib/python3.11/site-packages/xarray/backends/lru_cache.py:56, in LRUCache.__getitem__(self, key)
55 with self._lock:
---> 56 value = self._cache[key]
57 self._cache.move_to_end(key)
KeyError: [<function open at 0x7f06008f58a0>, ('GT[I:/home/rave/dask_on_ray_poc/items.parquet](file:///I:/home/rave/dask_on_ray_poc/items.parquet)',), 'r', (('sharing', False),), '8d86ba30-5382-4017-9502-74600ad8d83e']
During handling of the above exception, another exception occurred:
CPLE_AppDefinedError Traceback (most recent call last)
File rasterio/_base.pyx:310, in rasterio._base.DatasetBase.__init__()
File rasterio/_base.pyx:221, in rasterio._base.open_dataset()
File rasterio/_err.pyx:359, in rasterio._err.exc_wrap_pointer()
CPLE_AppDefinedError: Cannot find field location
During handling of the above exception, another exception occurred:
RasterioIOError Traceback (most recent call last)
Cell In[6], line 1
----> 1 ds = xarray.open_dataset("GT[I:/home/rave/dask_on_ray_poc/items.parquet](file:///I:/home/rave/dask_on_ray_poc/items.parquet)", engine = "rasterio")
File ~/dask_on_ray_poc/.pixi/envs/default/lib/python3.11/site-packages/xarray/backends/api.py:671, in open_dataset(filename_or_obj, engine, chunks, cache, decode_cf, mask_and_scale, decode_times, decode_timedelta, use_cftime, concat_characters, decode_coords, drop_variables, inline_array, chunked_array_type, from_array_kwargs, backend_kwargs, **kwargs)
659 decoders = _resolve_decoders_kwargs(
660 decode_cf,
661 open_backend_dataset_parameters=backend.open_dataset_parameters,
(...)
667 decode_coords=decode_coords,
668 )
670 overwrite_encoded_chunks = kwargs.pop("overwrite_encoded_chunks", None)
--> 671 backend_ds = backend.open_dataset(
672 filename_or_obj,
673 drop_variables=drop_variables,
674 **decoders,
675 **kwargs,
676 )
677 ds = _dataset_from_backend_dataset(
678 backend_ds,
679 filename_or_obj,
(...)
689 **kwargs,
690 )
691 return ds
File ~/dask_on_ray_poc/.pixi/envs/default/lib/python3.11/site-packages/rioxarray/xarray_plugin.py:58, in RasterioBackend.open_dataset(self, filename_or_obj, drop_variables, parse_coordinates, lock, masked, mask_and_scale, variable, group, default_name, decode_coords, decode_times, decode_timedelta, band_as_variable, open_kwargs)
56 if open_kwargs is None:
57 open_kwargs = {}
---> 58 rds = _io.open_rasterio(
59 filename_or_obj,
60 parse_coordinates=parse_coordinates,
61 cache=False,
62 lock=lock,
63 masked=masked,
64 mask_and_scale=mask_and_scale,
65 variable=variable,
66 group=group,
67 default_name=default_name,
68 decode_times=decode_times,
69 decode_timedelta=decode_timedelta,
70 band_as_variable=band_as_variable,
71 **open_kwargs,
72 )
73 if isinstance(rds, xarray.DataArray):
74 dataset = rds.to_dataset()
File ~/dask_on_ray_poc/.pixi/envs/default/lib/python3.11/site-packages/rioxarray/_io.py:1128, in open_rasterio(filename, parse_coordinates, chunks, cache, lock, masked, mask_and_scale, variable, group, default_name, decode_times, decode_timedelta, band_as_variable, **open_kwargs)
1126 else:
1127 manager = URIManager(file_opener, filename, mode="r", kwargs=open_kwargs)
-> 1128 riods = manager.acquire()
1129 captured_warnings = rio_warnings.copy()
1131 # raise the NotGeoreferencedWarning if applicable
File ~/dask_on_ray_poc/.pixi/envs/default/lib/python3.11/site-packages/xarray/backends/file_manager.py:193, in CachingFileManager.acquire(self, needs_lock)
178 def acquire(self, needs_lock=True):
179 """Acquire a file object from the manager.
180
181 A new file is only opened if it has expired from the
(...)
191 An open file object, as returned by ``opener(*args, **kwargs)``.
192 """
--> 193 file, _ = self._acquire_with_cache_info(needs_lock)
194 return file
File ~/dask_on_ray_poc/.pixi/envs/default/lib/python3.11/site-packages/xarray/backends/file_manager.py:217, in CachingFileManager._acquire_with_cache_info(self, needs_lock)
215 kwargs = kwargs.copy()
216 kwargs["mode"] = self._mode
--> 217 file = self._opener(*self._args, **kwargs)
218 if self._mode == "w":
219 # ensure file doesn't get overridden when opened again
220 self._mode = "a"
File ~/dask_on_ray_poc/.pixi/envs/default/lib/python3.11/site-packages/rasterio/env.py:463, in ensure_env_with_credentials.<locals>.wrapper(*args, **kwds)
460 session = DummySession()
462 with env_ctor(session=session):
--> 463 return f(*args, **kwds)
File ~/dask_on_ray_poc/.pixi/envs/default/lib/python3.11/site-packages/rasterio/__init__.py:355, in open(fp, mode, driver, width, height, count, crs, transform, dtype, nodata, sharing, opener, **kwargs)
352 path = _parse_path(raw_dataset_path)
354 if mode == "r":
--> 355 dataset = DatasetReader(path, driver=driver, sharing=sharing, **kwargs)
356 elif mode == "r+":
357 dataset = get_writer_for_path(path, driver=driver)(
358 path, mode, driver=driver, sharing=sharing, **kwargs
359 )
File rasterio/_base.pyx:312, in rasterio._base.DatasetBase.__init__()
RasterioIOError: Cannot find field location
I'm looking into why this field issue is occurring and curious if once this is diagnosed we could get this working in stacrs?