ibex icon indicating copy to clipboard operation
ibex copied to clipboard

Unable to pickle ibex-wrapped sklearn transformers

Open hacktuarial opened this issue 6 years ago • 1 comments

import pickle
import numpy as np
import pandas as pd
from ibex.sklearn.preprocessing import Imputer
from pandas.testing import assert_frame_equal

df_in = pd.DataFrame({'x': [1, np.nan, 3]})
expected_output = pd.DataFrame({'x': [1., 2., 3.]}, index=df_in.index)

imp = Imputer(strategy='mean')
imp.fit(df_in)
assert_frame_equal(imp.transform(df_in), expected_output)  # all good
with open('imp.pkl', 'wb') as f:
    pickle.dump(imp, f)
del imp
with open('imp.pkl', 'rb') as f:
    imp = pickle.load(f)
assert_frame_equal(imp.transform(df_in), expected_output)

This throws

Traceback (most recent call last):
  File "/Users/tsweetser/.pyenv/versions/sulu/lib/python3.6/site-packages/ibex/_base.py", line 271, in x_columns
    return self.__x_cols
  File "/Users/tsweetser/.pyenv/versions/sulu/lib/python3.6/site-packages/ibex/_adapter.py", line 318, in __getattribute__
    base_ret = est.__getattribute__(self, name)
AttributeError: 'Imputer' object has no attribute '_FrameMixin__x_cols'

During handling of the above exception, another exception occurred:

Traceback (most recent call last):
  File "example.py", line 18, in <module>
    assert_frame_equal(imp.transform(df_in), expected_output)
  File "/Users/tsweetser/.pyenv/versions/sulu/lib/python3.6/site-packages/ibex/_adapter.py", line 254, in transform
    **kwargs)
  File "/Users/tsweetser/.pyenv/versions/sulu/lib/python3.6/site-packages/ibex/_adapter.py", line 280, in __adapter_run
    res = fn(self.__x(inv, X), *args, **kwargs)
  File "/Users/tsweetser/.pyenv/versions/sulu/lib/python3.6/site-packages/ibex/_adapter.py", line 293, in __x
    return X[self.x_columns] if not inv else X
  File "/Users/tsweetser/.pyenv/versions/sulu/lib/python3.6/site-packages/ibex/_adapter.py", line 318, in __getattribute__
    base_ret = est.__getattribute__(self, name)
  File "/Users/tsweetser/.pyenv/versions/sulu/lib/python3.6/site-packages/ibex/_base.py", line 273, in x_columns
    raise NotFittedError()
sklearn.exceptions.NotFittedError

System info:

  • Mac OSX 10.11.6
  • Python 3.6.5
  • ibex 0.1.3
  • scikit-learn 0.19.1
  • pandas 0.22.0
  • numpy 1.14.3

hacktuarial avatar May 02 '18 23:05 hacktuarial

For comparison, using scikit-learn directly works just fine:

import pickle
import numpy as np
from sklearn.preprocessing import Imputer
from numpy.testing import assert_array_equal

arr_in = np.array([[1], [np.nan], [3]], dtype=float)
expected_output = np.array([[1], [2], [3]], dtype=float)

imp = Imputer(strategy='mean')
imp.fit(arr_in)
assert_array_equal(imp.transform(arr_in), expected_output)  # all good
with open('imp.pkl', 'wb') as f:
    pickle.dump(imp, f)
del imp
with open('imp.pkl', 'rb') as f:
    imp = pickle.load(f)
assert_array_equal(imp.transform(arr_in), expected_output)

hacktuarial avatar May 02 '18 23:05 hacktuarial