ibex
ibex copied to clipboard
Unable to pickle ibex-wrapped sklearn transformers
import pickle
import numpy as np
import pandas as pd
from ibex.sklearn.preprocessing import Imputer
from pandas.testing import assert_frame_equal
df_in = pd.DataFrame({'x': [1, np.nan, 3]})
expected_output = pd.DataFrame({'x': [1., 2., 3.]}, index=df_in.index)
imp = Imputer(strategy='mean')
imp.fit(df_in)
assert_frame_equal(imp.transform(df_in), expected_output) # all good
with open('imp.pkl', 'wb') as f:
pickle.dump(imp, f)
del imp
with open('imp.pkl', 'rb') as f:
imp = pickle.load(f)
assert_frame_equal(imp.transform(df_in), expected_output)
This throws
Traceback (most recent call last):
File "/Users/tsweetser/.pyenv/versions/sulu/lib/python3.6/site-packages/ibex/_base.py", line 271, in x_columns
return self.__x_cols
File "/Users/tsweetser/.pyenv/versions/sulu/lib/python3.6/site-packages/ibex/_adapter.py", line 318, in __getattribute__
base_ret = est.__getattribute__(self, name)
AttributeError: 'Imputer' object has no attribute '_FrameMixin__x_cols'
During handling of the above exception, another exception occurred:
Traceback (most recent call last):
File "example.py", line 18, in <module>
assert_frame_equal(imp.transform(df_in), expected_output)
File "/Users/tsweetser/.pyenv/versions/sulu/lib/python3.6/site-packages/ibex/_adapter.py", line 254, in transform
**kwargs)
File "/Users/tsweetser/.pyenv/versions/sulu/lib/python3.6/site-packages/ibex/_adapter.py", line 280, in __adapter_run
res = fn(self.__x(inv, X), *args, **kwargs)
File "/Users/tsweetser/.pyenv/versions/sulu/lib/python3.6/site-packages/ibex/_adapter.py", line 293, in __x
return X[self.x_columns] if not inv else X
File "/Users/tsweetser/.pyenv/versions/sulu/lib/python3.6/site-packages/ibex/_adapter.py", line 318, in __getattribute__
base_ret = est.__getattribute__(self, name)
File "/Users/tsweetser/.pyenv/versions/sulu/lib/python3.6/site-packages/ibex/_base.py", line 273, in x_columns
raise NotFittedError()
sklearn.exceptions.NotFittedError
System info:
- Mac OSX 10.11.6
- Python 3.6.5
- ibex 0.1.3
- scikit-learn 0.19.1
- pandas 0.22.0
- numpy 1.14.3
For comparison, using scikit-learn
directly works just fine:
import pickle
import numpy as np
from sklearn.preprocessing import Imputer
from numpy.testing import assert_array_equal
arr_in = np.array([[1], [np.nan], [3]], dtype=float)
expected_output = np.array([[1], [2], [3]], dtype=float)
imp = Imputer(strategy='mean')
imp.fit(arr_in)
assert_array_equal(imp.transform(arr_in), expected_output) # all good
with open('imp.pkl', 'wb') as f:
pickle.dump(imp, f)
del imp
with open('imp.pkl', 'rb') as f:
imp = pickle.load(f)
assert_array_equal(imp.transform(arr_in), expected_output)