auto-sklearn icon indicating copy to clipboard operation
auto-sklearn copied to clipboard

Custom metrics do not work with AutoSklearn2Classifier

Open viktor-shcherb opened this issue 1 year ago • 0 comments

Describe the bug

I am creating a custom MCC scorer for binary classification problem, and encountering the following error:

FileNotFoundError: [Errno 2] No such file or directory: '/home/[[email protected]](mailto:[email protected])/PycharmProjects/laion-copyright/venv_new/lib/python3.9/site-packages/autosklearn/experimental/mcc/askl2_training_data.json'

To Reproduce

Replace the dataset loading logic with any other dataset, matthews_corrcoef is imported from Scikit-Learn.

scorer = autosklearn.metrics.make_scorer(
    name='mcc',
    score_func=matthews_corrcoef,
    optimum=1,
    greater_is_better=True,
    needs_proba=False,
    needs_threshold=False,
)

train_x, train_y, valid_x, valid_y, _, _ = load_dataset(target)

classifier = AutoSklearn2Classifier(
    time_left_for_this_task=24 * 60 * 60,  # 1d
    per_run_time_limit=15 * 60,
    memory_limit=20 * 1024,
    n_jobs=4, 
    max_models_on_disc=50,
    ensemble_size=50,
    seed=42,
    metric=scorer
)
classifier.fit(train_x, train_y, valid_x, valid_y)

Expected behavior

No error

Actual behavior, stacktrace or logfile

[0;31m---------------------------------------------------------------------------[0m
[0;31mFileNotFoundError[0m                         Traceback (most recent call last)
Cell [0;32mIn[21], line 13[0m
[1;32m     10[0m [38;5;28;01mfor[39;00m target [38;5;129;01min[39;00m all_target_columns:
[1;32m     11[0m     train_x, train_y, valid_x, valid_y, _, _ [38;5;241m=[39m load_dataset(target)
[0;32m---> 13[0m     classifier [38;5;241m=[39m [43mAutoSklearn2Classifier[49m[43m([49m
[1;32m     14[0m [43m        [49m[38;5;66;43;03m# tmp_folder=cache_path / 'tnp2',[39;49;00m
[1;32m     15[0m [43m        [49m[43mtime_left_for_this_task[49m[38;5;241;43m=[39;49m[38;5;241;43m24[39;49m[43m [49m[38;5;241;43m*[39;49m[43m [49m[38;5;241;43m60[39;49m[43m [49m[38;5;241;43m*[39;49m[43m [49m[38;5;241;43m60[39;49m[43m,[49m[43m  [49m[38;5;66;43;03m# 1d[39;49;00m
[1;32m     16[0m [43m        [49m[43mper_run_time_limit[49m[38;5;241;43m=[39;49m[38;5;241;43m15[39;49m[43m [49m[38;5;241;43m*[39;49m[43m [49m[38;5;241;43m60[39;49m[43m,[49m
[1;32m     17[0m [43m        [49m[43mmemory_limit[49m[38;5;241;43m=[39;49m[38;5;241;43m20[39;49m[43m [49m[38;5;241;43m*[39;49m[43m [49m[38;5;241;43m1024[39;49m[43m,[49m
[1;32m     18[0m [43m        [49m[43mn_jobs[49m[38;5;241;43m=[39;49m[38;5;241;43m4[39;49m[43m,[49m[43m [49m
[1;32m     19[0m [43m        [49m[43mmax_models_on_disc[49m[38;5;241;43m=[39;49m[38;5;241;43m50[39;49m[43m,[49m
[1;32m     20[0m [43m        [49m[43mensemble_size[49m[38;5;241;43m=[39;49m[38;5;241;43m50[39;49m[43m,[49m
[1;32m     21[0m [43m        [49m[43mseed[49m[38;5;241;43m=[39;49m[38;5;241;43m42[39;49m[43m,[49m
[1;32m     22[0m [43m        [49m[43mmetric[49m[38;5;241;43m=[39;49m[43mscorer[49m
[1;32m     23[0m [43m    [49m[43m)[49m
[1;32m     24[0m     classifier[38;5;241m.[39mfit(train_x, train_y, valid_x, valid_y)
[1;32m     25[0m     save_model(classifier, [38;5;124m'[39m[38;5;124mautosklearn[39m[38;5;124m'[39m, target)

File [0;32m~/PycharmProjects/laion-copyright/venv_new/lib/python3.9/site-packages/autosklearn/experimental/askl2.py:311[0m, in [0;36mAutoSklearn2Classifier.__init__[0;34m(self, time_left_for_this_task, per_run_time_limit, ensemble_size, ensemble_class, ensemble_kwargs, ensemble_nbest, max_models_on_disc, seed, memory_limit, tmp_folder, delete_tmp_folder_after_terminate, n_jobs, dask_client, disable_evaluator_output, smac_scenario_args, logging_config, metric, scoring_functions, load_models, dataset_compression, allow_string_features)[0m
[1;32m    306[0m include_preprocessors [38;5;241m=[39m [[38;5;124m"[39m[38;5;124mno_preprocessing[39m[38;5;124m"[39m]
[1;32m    307[0m include [38;5;241m=[39m {
[1;32m    308[0m     [38;5;124m"[39m[38;5;124mclassifier[39m[38;5;124m"[39m: include_estimators,
[1;32m    309[0m     [38;5;124m"[39m[38;5;124mfeature_preprocessor[39m[38;5;124m"[39m: include_preprocessors,
[1;32m    310[0m }
[0;32m--> 311[0m [38;5;28;43mself[39;49m[38;5;241;43m.[39;49m[43mtrain_selectors[49m[43m([49m[43mselected_metric[49m[38;5;241;43m=[39;49m[43mmetric[49m[43m)[49m
[1;32m    312[0m [38;5;28msuper[39m()[38;5;241m.[39m[38;5;21m__init__[39m(
[1;32m    313[0m     time_left_for_this_task[38;5;241m=[39mtime_left_for_this_task,
[1;32m    314[0m     per_run_time_limit[38;5;241m=[39mper_run_time_limit,
[0;32m   (...)[0m
[1;32m    339[0m     allow_string_features[38;5;241m=[39mallow_string_features,
[1;32m    340[0m )

File [0;32m~/PycharmProjects/laion-copyright/venv_new/lib/python3.9/site-packages/autosklearn/experimental/askl2.py:356[0m, in [0;36mAutoSklearn2Classifier.train_selectors[0;34m(self, selected_metric)[0m
[1;32m    352[0m [38;5;28;01mfor[39;00m metric [38;5;129;01min[39;00m metric_list:
[1;32m    353[0m     training_data_file [38;5;241m=[39m (
[1;32m    354[0m         [38;5;28mself[39m[38;5;241m.[39mthis_directory [38;5;241m/[39m metric[38;5;241m.[39mname [38;5;241m/[39m [38;5;124m"[39m[38;5;124maskl2_training_data.json[39m[38;5;124m"[39m
[1;32m    355[0m     )
[0;32m--> 356[0m     [38;5;28;01mwith[39;00m [38;5;28;43mopen[39;49m[43m([49m[43mtraining_data_file[49m[43m)[49m [38;5;28;01mas[39;00m fh:
[1;32m    357[0m         training_data [38;5;241m=[39m json[38;5;241m.[39mload(fh)
[1;32m    358[0m         fh[38;5;241m.[39mseek([38;5;241m0[39m)

[0;31mFileNotFoundError[0m: [Errno 2] No such file or directory: '/home/[email protected]/PycharmProjects/laion-copyright/venv_new/lib/python3.9/site-packages/autosklearn/experimental/mcc/askl2_training_data.json'

Sorry for broken encoding, the problem arises when a training_data_file is trying to get loaded at 356th line, askl2.py file. The tmp_folder does not exist.

Environment and installation:

  • Red Hat Enterprise Linux 8.8 (Ootpa)
  • venv
  • Python 3.9

pip freeze:

anyio==4.4.0
argon2-cffi==23.1.0
argon2-cffi-bindings==21.2.0
arrow==1.3.0
asttokens==2.4.1
async-lru==2.0.4
attrs==23.2.0
auto-sklearn==0.15.0
Babel==2.15.0
beautifulsoup4==4.12.3
bleach==6.1.0
certifi==2024.7.4
cffi==1.16.0
charset-normalizer==3.3.2
click==8.1.7
cloudpickle==3.0.0
comm==0.2.2
ConfigSpace==0.4.21
Cython==3.0.10
dask==2024.7.1
debugpy==1.8.2
decorator==5.1.1
defusedxml==0.7.1
distributed==2024.7.1
distro==1.9.0
emcee==3.1.6
exceptiongroup==1.2.2
executing==2.0.1
fastjsonschema==2.20.0
fqdn==1.5.1
fsspec==2024.6.1
h11==0.14.0
httpcore==1.0.5
httpx==0.27.0
idna==3.7
importlib_metadata==8.2.0
ipykernel==6.29.5
ipython==8.18.1
ipywidgets==8.1.3
isoduration==20.11.0
jedi==0.19.1
Jinja2==3.1.4
joblib==1.4.2
json5==0.9.25
jsonpointer==3.0.0
jsonschema==4.23.0
jsonschema-specifications==2023.12.1
jupyter==1.0.0
jupyter-console==6.6.3
jupyter-events==0.10.0
jupyter-lsp==2.2.5
jupyter_client==8.6.2
jupyter_core==5.7.2
jupyter_server==2.14.2
jupyter_server_terminals==0.5.3
jupyterlab==4.2.4
jupyterlab_pygments==0.3.0
jupyterlab_server==2.27.3
jupyterlab_widgets==3.0.11
liac-arff==2.5.0
locket==1.0.0
MarkupSafe==2.1.5
matplotlib-inline==0.1.7
mistune==3.0.2
msgpack==1.0.8
nbclient==0.10.0
nbconvert==7.16.4
nbformat==5.10.4
nest-asyncio==1.6.0
notebook==7.2.1
notebook_shim==0.2.4
numpy==1.23.3
overrides==7.7.0
packaging==24.1
pandas==1.5.3
pandocfilters==1.5.1
parso==0.8.4
partd==1.4.2
pexpect==4.9.0
platformdirs==4.2.2
prometheus_client==0.20.0
prompt_toolkit==3.0.47
psutil==6.0.0
ptyprocess==0.7.0
pure_eval==0.2.3
pycparser==2.22
Pygments==2.18.0
pynisher==0.6.4
pyparsing==3.1.2
pyrfr==0.8.3
python-dateutil==2.9.0.post0
python-json-logger==2.0.7
pytz==2024.1
PyYAML==6.0.1
pyzmq==26.0.3
qtconsole==5.5.2
QtPy==2.4.1
referencing==0.35.1
requests==2.32.3
rfc3339-validator==0.1.4
rfc3986-validator==0.1.1
rpds-py==0.19.1
scikit-learn==0.24.2
scipy==1.13.1
Send2Trash==1.8.3
six==1.16.0
smac==1.2
sniffio==1.3.1
sortedcontainers==2.4.0
soupsieve==2.5
stack-data==0.6.3
tblib==3.0.0
terminado==0.18.1
threadpoolctl==3.5.0
tinycss2==1.3.0
tomli==2.0.1
toolz==0.12.1
tornado==6.4.1
tqdm==4.66.4
traitlets==5.14.3
types-python-dateutil==2.9.0.20240316
typing_extensions==4.12.2
uri-template==1.3.0
urllib3==2.2.2
wcwidth==0.2.13
webcolors==24.6.0
webencodings==0.5.1
websocket-client==1.8.0
widgetsnbextension==4.0.11
zict==3.0.0
zipp==3.19.2

viktor-shcherb avatar Jul 29 '24 08:07 viktor-shcherb