tune-sklearn icon indicating copy to clipboard operation
tune-sklearn copied to clipboard

TuneSearchCV not correctly handling error_score parameter

Open ssiegel95 opened this issue 2 years ago • 0 comments

Running random search using vanilla RandomizedSearchCV and then the equivalent operation using tune_sklearn. In the first case, the incorrect parameter combination is gracefully handled by sklearn via the error_score parameter being set to int|float as documented. However, when running the equivalent search using TuneSearchCV the entire job fails before completion.

import numpy as np
from sklearn.datasets import load_digits
from sklearn.decomposition import PCA
from sklearn.model_selection import RandomizedSearchCV
from sklearn.pipeline import Pipeline
from sklearn.svm import LinearSVC
from tune_sklearn import TuneSearchCV

X, y = load_digits(return_X_y=True)

PIPELINE = Pipeline(
    steps=[
        ("pca", PCA(iterated_power=7)),
        ("linearsvc", LinearSVC(dual=False, max_iter=10000)),
    ]
)

GRID = {
    "linearsvc__penalty": ["l1", "l2"],
    "linearsvc__loss": ["hinge", "squared_hinge"],
    "linearsvc__fit_intercept": [True, False],
    "linearsvc__dual": [True, False],
    "linearsvc__tol": [1e-05, 0.0001, 0.001, 0.01, 0.1],
    "linearsvc__C": [0.0001, 0.001, 0.01, 0.1, 0.5, 1.0, 5.0, 10.0, 15.0, 20.0, 25.0],
}

# first with sklearn
random = RandomizedSearchCV(
    PIPELINE, param_distributions=GRID, error_score=np.nan, n_iter=5, random_state=100
)

random.fit(X, y)
# will complete with the following warning
# ValueError: Unsupported set of arguments: The combination of penalty='l2'
# and loss='hinge' are not supported when
# dual=False, Parameters: penalty='l2', loss='hinge', dual=False
print(random.best_estimator_)
# Pipeline(steps=[('pca', PCA(iterated_power=7)),
#                ('linearsvc',
#                 LinearSVC(C=25.0, dual=False, max_iter=10000, tol=1e-05))])

# *******SAME THING WITH TUNE_SKLEARN ****************
# now with tune-sklear
random = TuneSearchCV(
    estimator=PIPELINE,
    param_distributions=GRID,
    search_optimization="random",
    n_trials=5,
    error_score=np.nan,
    verbose=True,
    random_state=100,
)

random.fit(X, y)
print(random.best_estimator)
# run terminates

ssiegel95 avatar Aug 13 '22 02:08 ssiegel95