scikit-downscale icon indicating copy to clipboard operation
scikit-downscale copied to clipboard

PointwiseDownscaler BCSD models failing on `check_n_features_n` attribute

Open dgergel opened this issue 5 years ago • 0 comments

Currently the PointwiseDownscaler BCSD temperature and precip models are failing because the BCSD temp and precip objects lose their _check_n_features attribute when applied to a dataarray/dataset with the PointwiseDownscaler wrapper function. The supporting stack trace:

<timed exec> in <module>
/opt/conda/envs/downscale_latest_latest/lib/python3.7/site-packages/xarray/core/dataarray.py in load(self, **kwargs)
    806         dask.array.compute
    807         """
--> 808         ds = self._to_temp_dataset().load(**kwargs)
    809         new = self._from_temp_dataset(ds)
    810         self._variable = new._variable
/opt/conda/envs/downscale_latest_latest/lib/python3.7/site-packages/xarray/core/dataset.py in load(self, **kwargs)
    652 
    653             # evaluate all the dask arrays simultaneously
--> 654             evaluated_data = da.compute(*lazy_data.values(), **kwargs)
    655 
    656             for k, data in zip(lazy_data, evaluated_data):
/opt/conda/envs/downscale_latest_latest/lib/python3.7/site-packages/dask/base.py in compute(*args, **kwargs)
    434     keys = [x.__dask_keys__() for x in collections]
    435     postcomputes = [x.__dask_postcompute__() for x in collections]
--> 436     results = schedule(dsk, keys, **kwargs)
    437     return repack([f(r, *a) for r, (f, a) in zip(results, postcomputes)])
    438 
/opt/conda/envs/downscale_latest_latest/lib/python3.7/site-packages/distributed/client.py in get(self, dsk, keys, restrictions, loose_restrictions, resources, sync, asynchronous, direct, retries, priority, fifo_timeout, actors, **kwargs)
   2570                     should_rejoin = False
   2571             try:
-> 2572                 results = self.gather(packed, asynchronous=asynchronous, direct=direct)
   2573             finally:
   2574                 for f in futures.values():
/opt/conda/envs/downscale_latest_latest/lib/python3.7/site-packages/distributed/client.py in gather(self, futures, errors, direct, asynchronous)
   1870                 direct=direct,
   1871                 local_worker=local_worker,
-> 1872                 asynchronous=asynchronous,
   1873             )
   1874 
/opt/conda/envs/downscale_latest_latest/lib/python3.7/site-packages/distributed/client.py in sync(self, func, asynchronous, callback_timeout, *args, **kwargs)
    765         else:
    766             return sync(
--> 767                 self.loop, func, *args, callback_timeout=callback_timeout, **kwargs
    768             )
    769 
/opt/conda/envs/downscale_latest_latest/lib/python3.7/site-packages/distributed/utils.py in sync(loop, func, callback_timeout, *args, **kwargs)
    332     if error[0]:
    333         typ, exc, tb = error[0]
--> 334         raise exc.with_traceback(tb)
    335     else:
    336         return result[0]
/opt/conda/envs/downscale_latest_latest/lib/python3.7/site-packages/distributed/utils.py in f()
    316             if callback_timeout is not None:
    317                 future = gen.with_timeout(timedelta(seconds=callback_timeout), future)
--> 318             result[0] = yield future
    319         except Exception as exc:
    320             error[0] = sys.exc_info()
/opt/conda/envs/downscale_latest_latest/lib/python3.7/site-packages/tornado/gen.py in run(self)
    760 
    761                     try:
--> 762                         value = future.result()
    763                     except Exception:
    764                         exc_info = sys.exc_info()
/opt/conda/envs/downscale_latest_latest/lib/python3.7/site-packages/distributed/client.py in _gather(self, futures, errors, direct, local_worker)
   1726                             exc = CancelledError(key)
   1727                         else:
-> 1728                             raise exception.with_traceback(traceback)
   1729                         raise exc
   1730                     if errors == "skip":
/opt/conda/envs/downscale_latest_latest/lib/python3.7/site-packages/xarray/core/parallel.py in _wrapper()
    282         ]
    283 
--> 284         result = func(*converted_args, **kwargs)
    285 
    286         # check all dims are present
/opt/conda/lib/python3.7/site-packages/skdownscale/pointwise_models/core.py in _fit_wrapper()
/opt/conda/lib/python3.7/site-packages/skdownscale/pointwise_models/bcsd.py in fit()
/opt/conda/lib/python3.7/site-packages/skdownscale/pointwise_models/base.py in _validate_data()
AttributeError: 'BcsdTemperature' object has no attribute '_check_n_features'

This is also evident when comparing the attributes using the dir() function between BCSD objects versus the same objects wrapped by PointwiseDownscaler. For example:

from skdownscale.pointwise_models import PointWiseDownscaler, BcsdTemperature
model = BcsdTemperature(return_anoms=False)
dir(model)
['__class__', '__delattr__', '__dict__', '__dir__', '__doc__', '__eq__', '__format__', '__ge__', '__getattribute__', '__getstate__', '__gt__', '__hash__', '__init__', '__init_subclass__', '__le__', '__lt__', '__module__', '__ne__', '__new__', '__reduce__', '__reduce_ex__', '__repr__', '__setattr__', '__setstate__', '__sizeof__', '__str__', '__subclasshook__', '__weakref__', '_check_X_y', '_check_array', '_check_n_features', '_create_groups', '_fit_attributes', '_get_param_names', '_get_tags', '_more_tags', '_pre_fit', '_qm_fit_by_group', '_qm_transform_by_group', '_remove_climatology', '_repr_html_', '_repr_html_inner', '_repr_mimebundle_', '_timestep', '_validate_data', 'climate_trend', 'climate_trend_grouper', 'fit', 'get_params', 'predict', 'qm_kwargs', 'return_anoms', 'set_params', 'time_grouper']

versus

from skdownscale.pointwise_models import PointWiseDownscaler, BcsdTemperature
model = PointWiseDownscaler(BcsdTemperature(return_anoms=False))
dir(model) 
['__class__', '__delattr__', '__dict__', '__dir__', '__doc__', '__eq__', '__format__', '__ge__', '__getattribute__', '__gt__', '__hash__', '__init__', '__init_subclass__', '__le__', '__lt__', '__module__', '__ne__', '__new__', '__reduce__', '__reduce_ex__', '__repr__', '__setattr__', '__sizeof__', '__str__', '__subclasshook__', '__weakref__', '_dim', '_model', '_models', '_to_feature_x', 'fit', 'predict', 'transform']

It appears that this is coming directly from how we're passing the BCSD object around in the _fit_wrapper function, e.g. here: https://github.com/jhamman/scikit-downscale/blob/master/skdownscale/pointwise_models/core.py#L68. This should be pretty straightforward to fix, looks like copy.deepcopy just isn't getting methods but thought it was worth a design considerations conversation before a PR fix.

cc @jhamman

dgergel avatar Dec 03 '20 18:12 dgergel