PointwiseDownscaler BCSD models failing on `check_n_features_n` attribute
Currently the PointwiseDownscaler BCSD temperature and precip models are failing because the BCSD temp and precip objects lose their _check_n_features attribute when applied to a dataarray/dataset with the PointwiseDownscaler wrapper function. The supporting stack trace:
<timed exec> in <module>
/opt/conda/envs/downscale_latest_latest/lib/python3.7/site-packages/xarray/core/dataarray.py in load(self, **kwargs)
806 dask.array.compute
807 """
--> 808 ds = self._to_temp_dataset().load(**kwargs)
809 new = self._from_temp_dataset(ds)
810 self._variable = new._variable
/opt/conda/envs/downscale_latest_latest/lib/python3.7/site-packages/xarray/core/dataset.py in load(self, **kwargs)
652
653 # evaluate all the dask arrays simultaneously
--> 654 evaluated_data = da.compute(*lazy_data.values(), **kwargs)
655
656 for k, data in zip(lazy_data, evaluated_data):
/opt/conda/envs/downscale_latest_latest/lib/python3.7/site-packages/dask/base.py in compute(*args, **kwargs)
434 keys = [x.__dask_keys__() for x in collections]
435 postcomputes = [x.__dask_postcompute__() for x in collections]
--> 436 results = schedule(dsk, keys, **kwargs)
437 return repack([f(r, *a) for r, (f, a) in zip(results, postcomputes)])
438
/opt/conda/envs/downscale_latest_latest/lib/python3.7/site-packages/distributed/client.py in get(self, dsk, keys, restrictions, loose_restrictions, resources, sync, asynchronous, direct, retries, priority, fifo_timeout, actors, **kwargs)
2570 should_rejoin = False
2571 try:
-> 2572 results = self.gather(packed, asynchronous=asynchronous, direct=direct)
2573 finally:
2574 for f in futures.values():
/opt/conda/envs/downscale_latest_latest/lib/python3.7/site-packages/distributed/client.py in gather(self, futures, errors, direct, asynchronous)
1870 direct=direct,
1871 local_worker=local_worker,
-> 1872 asynchronous=asynchronous,
1873 )
1874
/opt/conda/envs/downscale_latest_latest/lib/python3.7/site-packages/distributed/client.py in sync(self, func, asynchronous, callback_timeout, *args, **kwargs)
765 else:
766 return sync(
--> 767 self.loop, func, *args, callback_timeout=callback_timeout, **kwargs
768 )
769
/opt/conda/envs/downscale_latest_latest/lib/python3.7/site-packages/distributed/utils.py in sync(loop, func, callback_timeout, *args, **kwargs)
332 if error[0]:
333 typ, exc, tb = error[0]
--> 334 raise exc.with_traceback(tb)
335 else:
336 return result[0]
/opt/conda/envs/downscale_latest_latest/lib/python3.7/site-packages/distributed/utils.py in f()
316 if callback_timeout is not None:
317 future = gen.with_timeout(timedelta(seconds=callback_timeout), future)
--> 318 result[0] = yield future
319 except Exception as exc:
320 error[0] = sys.exc_info()
/opt/conda/envs/downscale_latest_latest/lib/python3.7/site-packages/tornado/gen.py in run(self)
760
761 try:
--> 762 value = future.result()
763 except Exception:
764 exc_info = sys.exc_info()
/opt/conda/envs/downscale_latest_latest/lib/python3.7/site-packages/distributed/client.py in _gather(self, futures, errors, direct, local_worker)
1726 exc = CancelledError(key)
1727 else:
-> 1728 raise exception.with_traceback(traceback)
1729 raise exc
1730 if errors == "skip":
/opt/conda/envs/downscale_latest_latest/lib/python3.7/site-packages/xarray/core/parallel.py in _wrapper()
282 ]
283
--> 284 result = func(*converted_args, **kwargs)
285
286 # check all dims are present
/opt/conda/lib/python3.7/site-packages/skdownscale/pointwise_models/core.py in _fit_wrapper()
/opt/conda/lib/python3.7/site-packages/skdownscale/pointwise_models/bcsd.py in fit()
/opt/conda/lib/python3.7/site-packages/skdownscale/pointwise_models/base.py in _validate_data()
AttributeError: 'BcsdTemperature' object has no attribute '_check_n_features'
This is also evident when comparing the attributes using the dir() function between BCSD objects versus the same objects wrapped by PointwiseDownscaler. For example:
from skdownscale.pointwise_models import PointWiseDownscaler, BcsdTemperature
model = BcsdTemperature(return_anoms=False)
dir(model)
['__class__', '__delattr__', '__dict__', '__dir__', '__doc__', '__eq__', '__format__', '__ge__', '__getattribute__', '__getstate__', '__gt__', '__hash__', '__init__', '__init_subclass__', '__le__', '__lt__', '__module__', '__ne__', '__new__', '__reduce__', '__reduce_ex__', '__repr__', '__setattr__', '__setstate__', '__sizeof__', '__str__', '__subclasshook__', '__weakref__', '_check_X_y', '_check_array', '_check_n_features', '_create_groups', '_fit_attributes', '_get_param_names', '_get_tags', '_more_tags', '_pre_fit', '_qm_fit_by_group', '_qm_transform_by_group', '_remove_climatology', '_repr_html_', '_repr_html_inner', '_repr_mimebundle_', '_timestep', '_validate_data', 'climate_trend', 'climate_trend_grouper', 'fit', 'get_params', 'predict', 'qm_kwargs', 'return_anoms', 'set_params', 'time_grouper']
versus
from skdownscale.pointwise_models import PointWiseDownscaler, BcsdTemperature
model = PointWiseDownscaler(BcsdTemperature(return_anoms=False))
dir(model)
['__class__', '__delattr__', '__dict__', '__dir__', '__doc__', '__eq__', '__format__', '__ge__', '__getattribute__', '__gt__', '__hash__', '__init__', '__init_subclass__', '__le__', '__lt__', '__module__', '__ne__', '__new__', '__reduce__', '__reduce_ex__', '__repr__', '__setattr__', '__sizeof__', '__str__', '__subclasshook__', '__weakref__', '_dim', '_model', '_models', '_to_feature_x', 'fit', 'predict', 'transform']
It appears that this is coming directly from how we're passing the BCSD object around in the _fit_wrapper function, e.g. here: https://github.com/jhamman/scikit-downscale/blob/master/skdownscale/pointwise_models/core.py#L68. This should be pretty straightforward to fix, looks like copy.deepcopy just isn't getting methods but thought it was worth a design considerations conversation before a PR fix.
cc @jhamman