scikit-downscale icon indicating copy to clipboard operation
scikit-downscale copied to clipboard

add dimension check to PointWiseDownscaler inputs

Open dgergel opened this issue 5 years ago • 1 comments

currently if member_id is an additional dimension (which is easy to have happen if you grab multiple ensemble members from CMIP6 when searching the catalog), the PointwiseDownscaler fails with a ValueError, ValueError: dimensions {'member_id'} do not exist. Expected one or more of ('time', 'lat', 'lon'), coming from a dask backend issue. I believe this also occurs with any other extra dims present in the input data (e.g. height). We should probably add a check with a more informative error message for extra dims that should be removed. Including the traceback below for additional ref.

ValueError                                Traceback (most recent call last)
<ipython-input-81-1210dae294dd> in <module>
----> 1 predicted = model.predict(holdout_subset).load()

/opt/conda/lib/python3.7/site-packages/xarray/core/dataarray.py in load(self, **kwargs)
    812         dask.array.compute
    813         """
--> 814         ds = self._to_temp_dataset().load(**kwargs)
    815         new = self._from_temp_dataset(ds)
    816         self._variable = new._variable

/opt/conda/lib/python3.7/site-packages/xarray/core/dataset.py in load(self, **kwargs)
    656 
    657             # evaluate all the dask arrays simultaneously
--> 658             evaluated_data = da.compute(*lazy_data.values(), **kwargs)
    659 
    660             for k, data in zip(lazy_data, evaluated_data):

/opt/conda/lib/python3.7/site-packages/dask/base.py in compute(*args, **kwargs)
    434     keys = [x.__dask_keys__() for x in collections]
    435     postcomputes = [x.__dask_postcompute__() for x in collections]
--> 436     results = schedule(dsk, keys, **kwargs)
    437     return repack([f(r, *a) for r, (f, a) in zip(results, postcomputes)])
    438 

/opt/conda/lib/python3.7/site-packages/distributed/client.py in get(self, dsk, keys, restrictions, loose_restrictions, resources, sync, asynchronous, direct, retries, priority, fifo_timeout, actors, **kwargs)
   2570                     should_rejoin = False
   2571             try:
-> 2572                 results = self.gather(packed, asynchronous=asynchronous, direct=direct)
   2573             finally:
   2574                 for f in futures.values():

/opt/conda/lib/python3.7/site-packages/distributed/client.py in gather(self, futures, errors, direct, asynchronous)
   1870                 direct=direct,
   1871                 local_worker=local_worker,
-> 1872                 asynchronous=asynchronous,
   1873             )
   1874 

/opt/conda/lib/python3.7/site-packages/distributed/client.py in sync(self, func, asynchronous, callback_timeout, *args, **kwargs)
    765         else:
    766             return sync(
--> 767                 self.loop, func, *args, callback_timeout=callback_timeout, **kwargs
    768             )
    769 

/opt/conda/lib/python3.7/site-packages/distributed/utils.py in sync(loop, func, callback_timeout, *args, **kwargs)
    332     if error[0]:
    333         typ, exc, tb = error[0]
--> 334         raise exc.with_traceback(tb)
    335     else:
    336         return result[0]

/opt/conda/lib/python3.7/site-packages/distributed/utils.py in f()
    316             if callback_timeout is not None:
    317                 future = gen.with_timeout(timedelta(seconds=callback_timeout), future)
--> 318             result[0] = yield future
    319         except Exception as exc:
    320             error[0] = sys.exc_info()

/opt/conda/lib/python3.7/site-packages/tornado/gen.py in run(self)
    733 
    734                     try:
--> 735                         value = future.result()
    736                     except Exception:
    737                         exc_info = sys.exc_info()

/opt/conda/lib/python3.7/site-packages/distributed/client.py in _gather(self, futures, errors, direct, local_worker)
   1726                             exc = CancelledError(key)
   1727                         else:
-> 1728                             raise exception.with_traceback(traceback)
   1729                         raise exc
   1730                     if errors == "skip":

/opt/conda/lib/python3.7/site-packages/xarray/core/parallel.py in _wrapper()
    285         ]
    286 
--> 287         result = func(*converted_args, **kwargs)
    288 
    289         # check all dims are present

/opt/conda/lib/python3.7/site-packages/skdownscale/pointwise_models/core.py in _fit_wrapper()

/opt/conda/lib/python3.7/site-packages/xarray/core/dataarray.py in __getitem__()
    641         else:
    642             # xarray-style array indexing
--> 643             return self.isel(indexers=self._item_key_to_dict(key))
    644 
    645     def __setitem__(self, key: Any, value: Any) -> None:

/opt/conda/lib/python3.7/site-packages/xarray/core/dataarray.py in isel()
   1051         # lists, or zero or one-dimensional np.ndarray's
   1052 
-> 1053         variable = self._variable.isel(indexers, missing_dims=missing_dims)
   1054 
   1055         coords = {}

/opt/conda/lib/python3.7/site-packages/xarray/core/variable.py in isel()
   1069         indexers = either_dict_or_kwargs(indexers, indexers_kwargs, "isel")
   1070 
-> 1071         indexers = drop_dims_from_indexers(indexers, self.dims, missing_dims)
   1072 
   1073         key = tuple(indexers.get(dim, slice(None)) for dim in self.dims)

/opt/conda/lib/python3.7/site-packages/xarray/core/utils.py in drop_dims_from_indexers()
    765         if invalid:
    766             raise ValueError(
--> 767                 f"dimensions {invalid} do not exist. Expected one or more of {dims}"
    768             )
    769 

ValueError: dimensions {'member_id'} do not exist. Expected one or more of ('time', 'lat', 'lon')

dgergel avatar Aug 07 '20 23:08 dgergel

+1 for adding a more informative error (and a test that raises such an issue). Thanks for reporting!

jhamman avatar Aug 11 '20 00:08 jhamman