probatus
probatus copied to clipboard
Value Error in EarlyStoppingShapRFECV when I use LGBM as clf
Describe the bug
Whenever I use lgbm as clf of EarlyStoppingShapRFECV the following error is occur
'ValueError: Model type not supported'
Environment (please complete the following information):
- probatus version - 1.8.9
- python version - 3.7
- OS: windows and linux
- lightgbm - 3.2.1
To Reproduce
from lightgbm import LGBMClassifier
import pandas as pd
from probatus.feature_elimination import EarlyStoppingShapRFECV
from sklearn.datasets import make_classification
feature_names = [
'f1', 'f2', 'f3', 'f4', 'f5', 'f6', 'f7',
'f8', 'f9', 'f10', 'f11', 'f12', 'f13',
'f14', 'f15', 'f16', 'f17', 'f18', 'f19', 'f20']
# Prepare two samples
X, y = make_classification(n_samples=200, class_sep=0.05, n_informative=6, n_features=20,
random_state=0, n_redundant=10, n_clusters_per_class=1)
X = pd.DataFrame(X, columns=feature_names)
# Prepare model
clf = LGBMClassifier(n_estimators=200, max_depth=3)
# Run feature elimination
shap_elimination = EarlyStoppingShapRFECV(
clf=clf, step=0.2, cv=10, scoring='roc_auc', early_stopping_rounds=10, n_jobs=3)
report = shap_elimination.fit_compute(X, y)
# Make plots
performance_plot = shap_elimination.plot()
# Get final feature set
final_features_set = shap_elimination.get_reduced_features_set(num_features=3)
Error traceback
---------------------------------------------------------------------------
ValueError Traceback (most recent call last)
~\AppData\Local\Temp/ipykernel_25508/1460954107.py in <module>
21 shap_elimination = EarlyStoppingShapRFECV(
22 clf=clf, step=0.2, cv=10, scoring='roc_auc', early_stopping_rounds=10, n_jobs=1)
---> 23 report = shap_elimination.fit_compute(X, y)
24
25 # Make plots
c:\Users\kmiii\.conda\envs\py37\lib\site-packages\probatus\feature_elimination\feature_elimination.py in fit_compute(self, X, y, sample_weight, columns_to_keep, column_names, **shap_kwargs)
655 columns_to_keep=columns_to_keep,
656 column_names=column_names,
--> 657 **shap_kwargs,
658 )
659 return self.compute()
c:\Users\kmiii\.conda\envs\py37\lib\site-packages\probatus\feature_elimination\feature_elimination.py in fit(self, X, y, sample_weight, columns_to_keep, column_names, **shap_kwargs)
547 **shap_kwargs,
548 )
--> 549 for train_index, val_index in self.cv.split(current_X, self.y)
550 )
551
c:\Users\kmiii\.conda\envs\py37\lib\site-packages\joblib\parallel.py in __call__(self, iterable)
1041 # remaining jobs.
1042 self._iterating = False
-> 1043 if self.dispatch_one_batch(iterator):
1044 self._iterating = self._original_iterator is not None
1045
c:\Users\kmiii\.conda\envs\py37\lib\site-packages\joblib\parallel.py in dispatch_one_batch(self, iterator)
859 return False
860 else:
--> 861 self._dispatch(tasks)
862 return True
863
c:\Users\kmiii\.conda\envs\py37\lib\site-packages\joblib\parallel.py in _dispatch(self, batch)
777 with self._lock:
778 job_idx = len(self._jobs)
--> 779 job = self._backend.apply_async(batch, callback=cb)
780 # A job can complete so quickly than its callback is
781 # called before we get here, causing self._jobs to
c:\Users\kmiii\.conda\envs\py37\lib\site-packages\joblib\_parallel_backends.py in apply_async(self, func, callback)
206 def apply_async(self, func, callback=None):
207 """Schedule a func to be run"""
--> 208 result = ImmediateResult(func)
209 if callback:
210 callback(result)
c:\Users\kmiii\.conda\envs\py37\lib\site-packages\joblib\_parallel_backends.py in __init__(self, batch)
570 # Don't delay the application, to avoid keeping the input
571 # arguments in memory
--> 572 self.results = batch()
573
574 def get(self):
c:\Users\kmiii\.conda\envs\py37\lib\site-packages\joblib\parallel.py in __call__(self)
261 with parallel_backend(self._backend, n_jobs=self._n_jobs):
262 return [func(*args, **kwargs)
--> 263 for func, args, kwargs in self.items]
264
265 def __reduce__(self):
c:\Users\kmiii\.conda\envs\py37\lib\site-packages\joblib\parallel.py in <listcomp>(.0)
261 with parallel_backend(self._backend, n_jobs=self._n_jobs):
262 return [func(*args, **kwargs)
--> 263 for func, args, kwargs in self.items]
264
265 def __reduce__(self):
c:\Users\kmiii\.conda\envs\py37\lib\site-packages\probatus\feature_elimination\feature_elimination.py in _get_feature_shap_values_per_fold(self, X, y, clf, train_index, val_index, sample_weight, **shap_kwargs)
1235 sample_weight=sample_weight,
1236 train_index=train_index,
-> 1237 val_index=val_index,
1238 )
1239
c:\Users\kmiii\.conda\envs\py37\lib\site-packages\probatus\feature_elimination\feature_elimination.py in _get_fit_params(self, clf, X_train, y_train, X_val, y_val, sample_weight, train_index, val_index)
1184 pass
1185
-> 1186 raise ValueError("Model type not supported")
1187
1188 def _get_feature_shap_values_per_fold(self, X, y, clf, train_index, val_index, sample_weight=None, **shap_kwargs):
ValueError: Model type not supported
This was just because print_evaluation of lgbm was renamed to log_evaluation after lgbm 3.3.0 I sent a pull request for this.
Please see #188