stac-rs icon indicating copy to clipboard operation
stac-rs copied to clipboard

GDAL GTI

Open rbavery opened this issue 1 year ago • 4 comments

I think it would be neat to be able to load the result of a stacrs query as a lazy xarray.DataSet via rioxarray. The new GDAL Raster Tile Index (GTI) enables this

import xarray
ds = xarray.open_dataset("GTI:https://github.com/mdsumner/gti/raw/refs/heads/main/inst/extdata/tmerc_seaice.fgb", engine = "rasterio")

when chunks are accessed from the xarray dataset, reprojection happens behind the scenes for the user

example from https://gist.github.com/mdsumner/724f1db0bd0d211de6e3775486cd3df0

I tried out GTI on a stac-geoparquet query result but I think there's a difference in how stacrs formats the result and what GTI expects.

Query setup, search, save to stac-geoparquet

from datetime import datetime, timedelta
import stacrs
def convert_to_iso_datetime_range(simple_date_range):
    start_date_str, end_date_str = simple_date_range.split('/')
    start_date = datetime.strptime(start_date_str, "%Y-%m-%d")
    end_date = datetime.strptime(end_date_str, "%Y-%m-%d")
    start_date_iso = start_date.strftime("%Y-%m-%dT%H:%M:%S.%fZ")[:-3] + "Z"
    end_date_iso = (end_date - timedelta(seconds=1)).strftime("%Y-%m-%dT%H:%M:%S.%fZ")[:-3] + "Z"
    return f"{start_date_iso}/{end_date_iso}"

max_cloud_cover = 15
max_nodata_percent = 15
query_args = {
            "eo:cloud_cover": {"lt": max_cloud_cover},
            "s2:nodata_pixel_percentage": {"lt": max_nodata_percent},
        }
simple_date_range = "2023-01-01/2024-01-01"
iso_datetime_range = convert_to_iso_datetime_range(simple_date_range)
collection_id = "sentinel-2-c1-l2a"
catalog = "https://earth-search.aws.element84.com/v1"
lon, lat = -111.760826, 34.871002

# stack args
assets=["rededge1", "rededge2", "rededge3", "nir", "swir16", "swir22"]
rescale=False
xy_coords="center"
stack_id = "grid:code"
stack_dim = "time"

stacrs.search_to(
    "items.parquet",
    catalog,
    collections=collection_id,
    intersects={"type": "Point", "coordinates": [lon, lat]},
    sortby="-properties.datetime",
    max_items=20,
    datetime=iso_datetime_range,
    query=query_args
)

Trying to open as a GTI backed xarray.Dataset.

ds = xarray.open_dataset("GTI:/home/rave/dask_on_ray_poc/items.parquet", engine = "rasterio")

the error is below, I think the uris to the assets are expected in a different column name and possible a different structure

---------------------------------------------------------------------------
KeyError                                  Traceback (most recent call last)
File ~/dask_on_ray_poc/.pixi/envs/default/lib/python3.11/site-packages/xarray/backends/file_manager.py:211, in CachingFileManager._acquire_with_cache_info(self, needs_lock)
    210 try:
--> 211     file = self._cache[self._key]
    212 except KeyError:

File ~/dask_on_ray_poc/.pixi/envs/default/lib/python3.11/site-packages/xarray/backends/lru_cache.py:56, in LRUCache.__getitem__(self, key)
     55 with self._lock:
---> 56     value = self._cache[key]
     57     self._cache.move_to_end(key)

KeyError: [<function open at 0x7f06008f58a0>, ('GT[I:/home/rave/dask_on_ray_poc/items.parquet](file:///I:/home/rave/dask_on_ray_poc/items.parquet)',), 'r', (('sharing', False),), '8d86ba30-5382-4017-9502-74600ad8d83e']

During handling of the above exception, another exception occurred:

CPLE_AppDefinedError                      Traceback (most recent call last)
File rasterio/_base.pyx:310, in rasterio._base.DatasetBase.__init__()

File rasterio/_base.pyx:221, in rasterio._base.open_dataset()

File rasterio/_err.pyx:359, in rasterio._err.exc_wrap_pointer()

CPLE_AppDefinedError: Cannot find field location

During handling of the above exception, another exception occurred:

RasterioIOError                           Traceback (most recent call last)
Cell In[6], line 1
----> 1 ds = xarray.open_dataset("GT[I:/home/rave/dask_on_ray_poc/items.parquet](file:///I:/home/rave/dask_on_ray_poc/items.parquet)", engine = "rasterio")

File ~/dask_on_ray_poc/.pixi/envs/default/lib/python3.11/site-packages/xarray/backends/api.py:671, in open_dataset(filename_or_obj, engine, chunks, cache, decode_cf, mask_and_scale, decode_times, decode_timedelta, use_cftime, concat_characters, decode_coords, drop_variables, inline_array, chunked_array_type, from_array_kwargs, backend_kwargs, **kwargs)
    659 decoders = _resolve_decoders_kwargs(
    660     decode_cf,
    661     open_backend_dataset_parameters=backend.open_dataset_parameters,
   (...)
    667     decode_coords=decode_coords,
    668 )
    670 overwrite_encoded_chunks = kwargs.pop("overwrite_encoded_chunks", None)
--> 671 backend_ds = backend.open_dataset(
    672     filename_or_obj,
    673     drop_variables=drop_variables,
    674     **decoders,
    675     **kwargs,
    676 )
    677 ds = _dataset_from_backend_dataset(
    678     backend_ds,
    679     filename_or_obj,
   (...)
    689     **kwargs,
    690 )
    691 return ds

File ~/dask_on_ray_poc/.pixi/envs/default/lib/python3.11/site-packages/rioxarray/xarray_plugin.py:58, in RasterioBackend.open_dataset(self, filename_or_obj, drop_variables, parse_coordinates, lock, masked, mask_and_scale, variable, group, default_name, decode_coords, decode_times, decode_timedelta, band_as_variable, open_kwargs)
     56 if open_kwargs is None:
     57     open_kwargs = {}
---> 58 rds = _io.open_rasterio(
     59     filename_or_obj,
     60     parse_coordinates=parse_coordinates,
     61     cache=False,
     62     lock=lock,
     63     masked=masked,
     64     mask_and_scale=mask_and_scale,
     65     variable=variable,
     66     group=group,
     67     default_name=default_name,
     68     decode_times=decode_times,
     69     decode_timedelta=decode_timedelta,
     70     band_as_variable=band_as_variable,
     71     **open_kwargs,
     72 )
     73 if isinstance(rds, xarray.DataArray):
     74     dataset = rds.to_dataset()

File ~/dask_on_ray_poc/.pixi/envs/default/lib/python3.11/site-packages/rioxarray/_io.py:1128, in open_rasterio(filename, parse_coordinates, chunks, cache, lock, masked, mask_and_scale, variable, group, default_name, decode_times, decode_timedelta, band_as_variable, **open_kwargs)
   1126     else:
   1127         manager = URIManager(file_opener, filename, mode="r", kwargs=open_kwargs)
-> 1128     riods = manager.acquire()
   1129     captured_warnings = rio_warnings.copy()
   1131 # raise the NotGeoreferencedWarning if applicable

File ~/dask_on_ray_poc/.pixi/envs/default/lib/python3.11/site-packages/xarray/backends/file_manager.py:193, in CachingFileManager.acquire(self, needs_lock)
    178 def acquire(self, needs_lock=True):
    179     """Acquire a file object from the manager.
    180 
    181     A new file is only opened if it has expired from the
   (...)
    191         An open file object, as returned by ``opener(*args, **kwargs)``.
    192     """
--> 193     file, _ = self._acquire_with_cache_info(needs_lock)
    194     return file

File ~/dask_on_ray_poc/.pixi/envs/default/lib/python3.11/site-packages/xarray/backends/file_manager.py:217, in CachingFileManager._acquire_with_cache_info(self, needs_lock)
    215     kwargs = kwargs.copy()
    216     kwargs["mode"] = self._mode
--> 217 file = self._opener(*self._args, **kwargs)
    218 if self._mode == "w":
    219     # ensure file doesn't get overridden when opened again
    220     self._mode = "a"

File ~/dask_on_ray_poc/.pixi/envs/default/lib/python3.11/site-packages/rasterio/env.py:463, in ensure_env_with_credentials.<locals>.wrapper(*args, **kwds)
    460     session = DummySession()
    462 with env_ctor(session=session):
--> 463     return f(*args, **kwds)

File ~/dask_on_ray_poc/.pixi/envs/default/lib/python3.11/site-packages/rasterio/__init__.py:355, in open(fp, mode, driver, width, height, count, crs, transform, dtype, nodata, sharing, opener, **kwargs)
    352     path = _parse_path(raw_dataset_path)
    354 if mode == "r":
--> 355     dataset = DatasetReader(path, driver=driver, sharing=sharing, **kwargs)
    356 elif mode == "r+":
    357     dataset = get_writer_for_path(path, driver=driver)(
    358         path, mode, driver=driver, sharing=sharing, **kwargs
    359     )

File rasterio/_base.pyx:312, in rasterio._base.DatasetBase.__init__()

RasterioIOError: Cannot find field location

I'm looking into why this field issue is occurring and curious if once this is diagnosed we could get this working in stacrs?

rbavery avatar Nov 21 '24 00:11 rbavery