pyDVL
pyDVL copied to clipboard
Too many processes started
This code uses all cores in my machine, despite setting n_jobs=1
. Since we are ditching the whole parallel backend abstraction this will probably disappear, but I thought it best to report it anyway.
from sklearn.datasets import load_breast_cancer
from sklearn.linear_model import LogisticRegression
from pydvl.utils import Dataset, Scorer, Utility
from pydvl.value import (MaxUpdates, RelativeTruncation,
permutation_montecarlo_shapley)
data = Dataset.from_sklearn(
load_breast_cancer(),
train_size=10,
stratify_by_target=True,
random_state=16,
)
model = LogisticRegression(n_jobs=1)
u = Utility(
model,
data,
Scorer("accuracy", default=0.0)
)
values = permutation_montecarlo_shapley(
u,
truncation=RelativeTruncation(u, 0.05),
done=MaxUpdates(1000),
seed=16,
n_jobs=1,
progress=True
)
df = values.to_dataframe(column="data_value")