dummyPy
dummyPy copied to clipboard
Error with - dump the OneHotEncoder object using pickle (and dill) and load it again
Partial Python code related to it -
from dummyPy import OneHotEncoder
import pandas as pd
import dill, pickle
X = pd.DataFrame() ## (71, 3000000)
print(len(cat_col)) ## 50
encoder = OneHotEncoder(cat_cols)
chunk = 1000
total = len(X)
print(X.shape) ## (3067907, 71)
for i in range(0, total, chunk):
encoder.fit(X.iloc[i:i+chunk])
if(i%50000 == 0):
dill.dump(encoder, open( "model/one_hot.encoder", "wb" ))
dill.dump(encoder, open( "model/one_hot.encoder", "wb" ))## same proble occurs with pickle
Error message -
---------------------------------------------------------------------------
ValueError Traceback (most recent call last)
C:\ProgramData\Anaconda3\lib\site-packages\scipy\sparse\coo.py in __init__(self, arg1, shape, dtype, copy)
138 try:
--> 139 obj, (row, col) = arg1
140 except (TypeError, ValueError):
ValueError: not enough values to unpack (expected 2, got 0)
During handling of the above exception, another exception occurred:
TypeError Traceback (most recent call last)
<ipython-input-9-8dae15cd54ca> in <module>()
----> 1 res = encoder.transform(X)
C:\ProgramData\Anaconda3\lib\site-packages\dummyPy\dummyPy.py in transform(self, data, dtype)
156 if column_name in self.categorical_columns
157 else data[column_name].values.reshape(-1, 1)
--> 158 for column_name in data.columns]
159 transformed_np_array = np.array(np.concatenate(transformed_data, axis=1), dtype=object)
160
C:\ProgramData\Anaconda3\lib\site-packages\dummyPy\dummyPy.py in <listcomp>(.0)
156 if column_name in self.categorical_columns
157 else data[column_name].values.reshape(-1, 1)
--> 158 for column_name in data.columns]
159 transformed_np_array = np.array(np.concatenate(transformed_data, axis=1), dtype=object)
160
C:\ProgramData\Anaconda3\lib\site-packages\dummyPy\dummyPy.py in transform(self, column_data)
32
33 return(coo_matrix((data, zip(*row_cols)),
---> 34 shape=(column_data.shape[0], len(self.column_mapper))))
35
36 def __eq__(self, other):
C:\ProgramData\Anaconda3\lib\site-packages\scipy\sparse\coo.py in __init__(self, arg1, shape, dtype, copy)
139 obj, (row, col) = arg1
140 except (TypeError, ValueError):
--> 141 raise TypeError('invalid input format')
142
143 if shape is None:
TypeError: invalid input format
pickling works for me