auto-sklearn
auto-sklearn copied to clipboard
Custom metrics do not work with AutoSklearn2Classifier
Describe the bug
I am creating a custom MCC scorer for binary classification problem, and encountering the following error:
FileNotFoundError: [Errno 2] No such file or directory: '/home/[[email protected]](mailto:[email protected])/PycharmProjects/laion-copyright/venv_new/lib/python3.9/site-packages/autosklearn/experimental/mcc/askl2_training_data.json'
To Reproduce
Replace the dataset loading logic with any other dataset, matthews_corrcoef is imported from Scikit-Learn.
scorer = autosklearn.metrics.make_scorer(
name='mcc',
score_func=matthews_corrcoef,
optimum=1,
greater_is_better=True,
needs_proba=False,
needs_threshold=False,
)
train_x, train_y, valid_x, valid_y, _, _ = load_dataset(target)
classifier = AutoSklearn2Classifier(
time_left_for_this_task=24 * 60 * 60, # 1d
per_run_time_limit=15 * 60,
memory_limit=20 * 1024,
n_jobs=4,
max_models_on_disc=50,
ensemble_size=50,
seed=42,
metric=scorer
)
classifier.fit(train_x, train_y, valid_x, valid_y)
Expected behavior
No error
Actual behavior, stacktrace or logfile
[0;31m---------------------------------------------------------------------------[0m
[0;31mFileNotFoundError[0m Traceback (most recent call last)
Cell [0;32mIn[21], line 13[0m
[1;32m 10[0m [38;5;28;01mfor[39;00m target [38;5;129;01min[39;00m all_target_columns:
[1;32m 11[0m train_x, train_y, valid_x, valid_y, _, _ [38;5;241m=[39m load_dataset(target)
[0;32m---> 13[0m classifier [38;5;241m=[39m [43mAutoSklearn2Classifier[49m[43m([49m
[1;32m 14[0m [43m [49m[38;5;66;43;03m# tmp_folder=cache_path / 'tnp2',[39;49;00m
[1;32m 15[0m [43m [49m[43mtime_left_for_this_task[49m[38;5;241;43m=[39;49m[38;5;241;43m24[39;49m[43m [49m[38;5;241;43m*[39;49m[43m [49m[38;5;241;43m60[39;49m[43m [49m[38;5;241;43m*[39;49m[43m [49m[38;5;241;43m60[39;49m[43m,[49m[43m [49m[38;5;66;43;03m# 1d[39;49;00m
[1;32m 16[0m [43m [49m[43mper_run_time_limit[49m[38;5;241;43m=[39;49m[38;5;241;43m15[39;49m[43m [49m[38;5;241;43m*[39;49m[43m [49m[38;5;241;43m60[39;49m[43m,[49m
[1;32m 17[0m [43m [49m[43mmemory_limit[49m[38;5;241;43m=[39;49m[38;5;241;43m20[39;49m[43m [49m[38;5;241;43m*[39;49m[43m [49m[38;5;241;43m1024[39;49m[43m,[49m
[1;32m 18[0m [43m [49m[43mn_jobs[49m[38;5;241;43m=[39;49m[38;5;241;43m4[39;49m[43m,[49m[43m [49m
[1;32m 19[0m [43m [49m[43mmax_models_on_disc[49m[38;5;241;43m=[39;49m[38;5;241;43m50[39;49m[43m,[49m
[1;32m 20[0m [43m [49m[43mensemble_size[49m[38;5;241;43m=[39;49m[38;5;241;43m50[39;49m[43m,[49m
[1;32m 21[0m [43m [49m[43mseed[49m[38;5;241;43m=[39;49m[38;5;241;43m42[39;49m[43m,[49m
[1;32m 22[0m [43m [49m[43mmetric[49m[38;5;241;43m=[39;49m[43mscorer[49m
[1;32m 23[0m [43m [49m[43m)[49m
[1;32m 24[0m classifier[38;5;241m.[39mfit(train_x, train_y, valid_x, valid_y)
[1;32m 25[0m save_model(classifier, [38;5;124m'[39m[38;5;124mautosklearn[39m[38;5;124m'[39m, target)
File [0;32m~/PycharmProjects/laion-copyright/venv_new/lib/python3.9/site-packages/autosklearn/experimental/askl2.py:311[0m, in [0;36mAutoSklearn2Classifier.__init__[0;34m(self, time_left_for_this_task, per_run_time_limit, ensemble_size, ensemble_class, ensemble_kwargs, ensemble_nbest, max_models_on_disc, seed, memory_limit, tmp_folder, delete_tmp_folder_after_terminate, n_jobs, dask_client, disable_evaluator_output, smac_scenario_args, logging_config, metric, scoring_functions, load_models, dataset_compression, allow_string_features)[0m
[1;32m 306[0m include_preprocessors [38;5;241m=[39m [[38;5;124m"[39m[38;5;124mno_preprocessing[39m[38;5;124m"[39m]
[1;32m 307[0m include [38;5;241m=[39m {
[1;32m 308[0m [38;5;124m"[39m[38;5;124mclassifier[39m[38;5;124m"[39m: include_estimators,
[1;32m 309[0m [38;5;124m"[39m[38;5;124mfeature_preprocessor[39m[38;5;124m"[39m: include_preprocessors,
[1;32m 310[0m }
[0;32m--> 311[0m [38;5;28;43mself[39;49m[38;5;241;43m.[39;49m[43mtrain_selectors[49m[43m([49m[43mselected_metric[49m[38;5;241;43m=[39;49m[43mmetric[49m[43m)[49m
[1;32m 312[0m [38;5;28msuper[39m()[38;5;241m.[39m[38;5;21m__init__[39m(
[1;32m 313[0m time_left_for_this_task[38;5;241m=[39mtime_left_for_this_task,
[1;32m 314[0m per_run_time_limit[38;5;241m=[39mper_run_time_limit,
[0;32m (...)[0m
[1;32m 339[0m allow_string_features[38;5;241m=[39mallow_string_features,
[1;32m 340[0m )
File [0;32m~/PycharmProjects/laion-copyright/venv_new/lib/python3.9/site-packages/autosklearn/experimental/askl2.py:356[0m, in [0;36mAutoSklearn2Classifier.train_selectors[0;34m(self, selected_metric)[0m
[1;32m 352[0m [38;5;28;01mfor[39;00m metric [38;5;129;01min[39;00m metric_list:
[1;32m 353[0m training_data_file [38;5;241m=[39m (
[1;32m 354[0m [38;5;28mself[39m[38;5;241m.[39mthis_directory [38;5;241m/[39m metric[38;5;241m.[39mname [38;5;241m/[39m [38;5;124m"[39m[38;5;124maskl2_training_data.json[39m[38;5;124m"[39m
[1;32m 355[0m )
[0;32m--> 356[0m [38;5;28;01mwith[39;00m [38;5;28;43mopen[39;49m[43m([49m[43mtraining_data_file[49m[43m)[49m [38;5;28;01mas[39;00m fh:
[1;32m 357[0m training_data [38;5;241m=[39m json[38;5;241m.[39mload(fh)
[1;32m 358[0m fh[38;5;241m.[39mseek([38;5;241m0[39m)
[0;31mFileNotFoundError[0m: [Errno 2] No such file or directory: '/home/[email protected]/PycharmProjects/laion-copyright/venv_new/lib/python3.9/site-packages/autosklearn/experimental/mcc/askl2_training_data.json'
Sorry for broken encoding, the problem arises when a training_data_file is trying to get loaded at 356th line, askl2.py file. The tmp_folder does not exist.
Environment and installation:
- Red Hat Enterprise Linux 8.8 (Ootpa)
- venv
- Python 3.9
pip freeze:
anyio==4.4.0
argon2-cffi==23.1.0
argon2-cffi-bindings==21.2.0
arrow==1.3.0
asttokens==2.4.1
async-lru==2.0.4
attrs==23.2.0
auto-sklearn==0.15.0
Babel==2.15.0
beautifulsoup4==4.12.3
bleach==6.1.0
certifi==2024.7.4
cffi==1.16.0
charset-normalizer==3.3.2
click==8.1.7
cloudpickle==3.0.0
comm==0.2.2
ConfigSpace==0.4.21
Cython==3.0.10
dask==2024.7.1
debugpy==1.8.2
decorator==5.1.1
defusedxml==0.7.1
distributed==2024.7.1
distro==1.9.0
emcee==3.1.6
exceptiongroup==1.2.2
executing==2.0.1
fastjsonschema==2.20.0
fqdn==1.5.1
fsspec==2024.6.1
h11==0.14.0
httpcore==1.0.5
httpx==0.27.0
idna==3.7
importlib_metadata==8.2.0
ipykernel==6.29.5
ipython==8.18.1
ipywidgets==8.1.3
isoduration==20.11.0
jedi==0.19.1
Jinja2==3.1.4
joblib==1.4.2
json5==0.9.25
jsonpointer==3.0.0
jsonschema==4.23.0
jsonschema-specifications==2023.12.1
jupyter==1.0.0
jupyter-console==6.6.3
jupyter-events==0.10.0
jupyter-lsp==2.2.5
jupyter_client==8.6.2
jupyter_core==5.7.2
jupyter_server==2.14.2
jupyter_server_terminals==0.5.3
jupyterlab==4.2.4
jupyterlab_pygments==0.3.0
jupyterlab_server==2.27.3
jupyterlab_widgets==3.0.11
liac-arff==2.5.0
locket==1.0.0
MarkupSafe==2.1.5
matplotlib-inline==0.1.7
mistune==3.0.2
msgpack==1.0.8
nbclient==0.10.0
nbconvert==7.16.4
nbformat==5.10.4
nest-asyncio==1.6.0
notebook==7.2.1
notebook_shim==0.2.4
numpy==1.23.3
overrides==7.7.0
packaging==24.1
pandas==1.5.3
pandocfilters==1.5.1
parso==0.8.4
partd==1.4.2
pexpect==4.9.0
platformdirs==4.2.2
prometheus_client==0.20.0
prompt_toolkit==3.0.47
psutil==6.0.0
ptyprocess==0.7.0
pure_eval==0.2.3
pycparser==2.22
Pygments==2.18.0
pynisher==0.6.4
pyparsing==3.1.2
pyrfr==0.8.3
python-dateutil==2.9.0.post0
python-json-logger==2.0.7
pytz==2024.1
PyYAML==6.0.1
pyzmq==26.0.3
qtconsole==5.5.2
QtPy==2.4.1
referencing==0.35.1
requests==2.32.3
rfc3339-validator==0.1.4
rfc3986-validator==0.1.1
rpds-py==0.19.1
scikit-learn==0.24.2
scipy==1.13.1
Send2Trash==1.8.3
six==1.16.0
smac==1.2
sniffio==1.3.1
sortedcontainers==2.4.0
soupsieve==2.5
stack-data==0.6.3
tblib==3.0.0
terminado==0.18.1
threadpoolctl==3.5.0
tinycss2==1.3.0
tomli==2.0.1
toolz==0.12.1
tornado==6.4.1
tqdm==4.66.4
traitlets==5.14.3
types-python-dateutil==2.9.0.20240316
typing_extensions==4.12.2
uri-template==1.3.0
urllib3==2.2.2
wcwidth==0.2.13
webcolors==24.6.0
webencodings==0.5.1
websocket-client==1.8.0
widgetsnbextension==4.0.11
zict==3.0.0
zipp==3.19.2