BentoML
BentoML copied to clipboard
bug: 'XGBClassifier' object has no attribute '__call__'
Describe the bug
Oh, I have a model trained using the XGBoost library following these steps:
Step 1: Save the model after training using joblib.dump(best_model, abspath(config.model.path)).
Step 2: After training, I used BentoML to save the model using bentoml.picklable_model.save_model.
Step 3: When I try to load the model to perform an API call using bentoml.picklable_model.get, I encounter the error 'XGBClassifier' object has no attribute 'call'.
I'm not sure why, even though in Step 2, after using save_model, I saw that a call method was present in the model signature, specifically displayed as follows: "Using the default model signature for pickable model ({'call': ModelSignature(batchable=False, batch_dim=(0, 0), input_spec=None, output_spec=None)}) for model 'xgboost'." This means that my model already has the call method, but when trying to infer, the error still says "no attribute 'call'".
train code:
import warnings
warnings.filterwarnings(action="ignore")
from functools import partial
from typing import Callable
from bentoml.types import ModelSignature
import hydra
import joblib
import numpy as np
import pandas as pd
from hydra.utils import to_absolute_path as abspath
from hyperopt import STATUS_OK, Trials, fmin, hp, tpe
from omegaconf import DictConfig
from sklearn.metrics import accuracy_score
from xgboost import XGBClassifier
import bentoml
def load_data(path: DictConfig):
X_train = pd.read_csv(abspath(path.X_train.path))
X_test = pd.read_csv(abspath(path.X_test.path))
y_train = pd.read_csv(abspath(path.y_train.path))
y_test = pd.read_csv(abspath(path.y_test.path))
return X_train, X_test, y_train, y_test
def get_objective(
X_train: pd.DataFrame,
y_train: pd.DataFrame,
X_test: pd.DataFrame,
y_test: pd.DataFrame,
config: DictConfig,
space: dict,
):
model = XGBClassifier(
use_label_encoder=config.model.use_label_encoder,
objective=config.model.objective,
n_estimators=space["n_estimators"],
max_depth=int(space["max_depth"]),
gamma=space["gamma"],
reg_alpha=int(space["reg_alpha"]),
min_child_weight=int(space["min_child_weight"]),
colsample_bytree=int(space["colsample_bytree"]),
)
evaluation = [(X_train, y_train), (X_test, y_test)]
model.fit(
X_train,
y_train,
eval_set=evaluation,
eval_metric=config.model.eval_metric,
early_stopping_rounds=config.model.early_stopping_rounds,
)
prediction = model.predict(X_test.values)
accuracy = accuracy_score(y_test, prediction)
print("SCORE:", accuracy)
return {"loss": -accuracy, "status": STATUS_OK, "model": model}
def optimize(objective: Callable, space: dict):
trials = Trials()
best_hyperparams = fmin(
fn=objective,
space=space,
algo=tpe.suggest,
max_evals=100,
trials=trials,
)
print("The best hyperparameters are : ", "\n")
print(best_hyperparams)
best_model = trials.results[
np.argmin([r["loss"] for r in trials.results])
]["model"]
return best_model
@hydra.main(config_path="../../config", config_name="main")
def train(config: DictConfig):
"""Function to train the model"""
X_train, X_test, y_train, y_test = load_data(config.processed)
# Define space
space = {
"max_depth": hp.quniform("max_depth", **config.model.max_depth),
"gamma": hp.uniform("gamma", **config.model.gamma),
"reg_alpha": hp.quniform("reg_alpha", **config.model.reg_alpha),
"reg_lambda": hp.uniform("reg_lambda", **config.model.reg_lambda),
"colsample_bytree": hp.uniform(
"colsample_bytree", **config.model.colsample_bytree
),
"min_child_weight": hp.quniform(
"min_child_weight", **config.model.min_child_weight
),
"n_estimators": config.model.n_estimators,
"seed": config.model.seed,
}
objective = partial(
get_objective, X_train, y_train, X_test, y_test, config
)
# Find best model
best_model = optimize(objective, space)
bentoml.picklable_model.save_model(config.model.name, best_model,
signatures={"__call__": ModelSignature(batchable=False)})
# Save model
joblib.dump(best_model, abspath(config.model.path))
if __name__ == "__main__":
train()
save model:
import bentoml
import hydra
import joblib
from hydra.utils import to_absolute_path as abspath
from omegaconf import DictConfig
from bentoml.types import ModelSignature
def load_model(model_path: str):
return joblib.load(model_path)
@hydra.main(config_path="../../config", config_name="main")
def save_to_bentoml(config: DictConfig):
model = load_model(abspath(config.model.path))
bentoml.picklable_model.save_model(config.model.name, model)
if __name__ == "__main__":
save_to_bentoml()
service:
import bentoml
import numpy as np
import pandas as pd
from bentoml.io import JSON, NumpyNdarray
from hydra import compose, initialize
from patsy import dmatrix
from pydantic import BaseModel
with initialize( config_path="../../config"):
config = compose(config_name="main")
FEATURES = config.process.features
MODEL_NAME = config.model.name
class Employee(BaseModel):
City: str = "Pune"
PaymentTier: int = 1
Age: int = 25
Gender: str = "Female"
EverBenched: str = "No"
ExperienceInCurrentDomain: int = 1
def add_dummy_data(df: pd.DataFrame):
"""Add dummy rows so that patsy can create features similar to the train dataset"""
rows = {
"City": ["Bangalore", "New Delhi", "Pune"],
"Gender": ["Male", "Female", "Female"],
"EverBenched": ["Yes", "Yes", "No"],
"PaymentTier": [0, 0, 0],
"Age": [0, 0, 0],
"ExperienceInCurrentDomain": [0, 0, 0],
}
dummy_df = pd.DataFrame(rows)
return pd.concat([df, dummy_df])
def rename_columns(X: pd.DataFrame):
X.columns = X.columns.str.replace("[", "_", regex=True).str.replace(
"]", "", regex=True
)
return X
def transform_data(df: pd.DataFrame):
"""Transform the data"""
dummy_df = add_dummy_data(df)
feature_str = " + ".join(FEATURES)
dummy_X = dmatrix(f"{feature_str} - 1", dummy_df, return_type="dataframe")
dummy_X = rename_columns(dummy_X)
return dummy_X.iloc[0, :].values.reshape(1, -1)
model = bentoml.picklable_model.get(
f"{MODEL_NAME}:latest"
).to_runner()
# Create service with the model
service = bentoml.Service("predict_employee", runners=[model])
@service.api(input=JSON(pydantic_model=Employee), output=NumpyNdarray())
def predict(employee: Employee) -> np.ndarray:
"""Transform the data then make predictions"""
df = pd.DataFrame(employee.dict(), index=[0])
df = transform_data(df)
result = model.run(df)[0]
return np.array(result)
To reproduce
Oh, I have a model trained using the XGBoost library following these steps:
Step 1: Save the model after training using joblib.dump(best_model, abspath(config.model.path)).
Step 2: After training, I used BentoML to save the model using bentoml.picklable_model.save_model.
Step 3: When I try to load the model to perform an API call using bentoml.picklable_model.get, I encounter the error 'XGBClassifier' object has no attribute 'call'.
I'm not sure why, even though in Step 2, after using save_model, I saw that a call method was present in the model signature, specifically displayed as follows: "Using the default model signature for pickable model ({'call': ModelSignature(batchable=False, batch_dim=(0, 0), input_spec=None, output_spec=None)}) for model 'xgboost'." This means that my model already has the call method, but when trying to infer, the error still says "no attribute 'call'".
Expected behavior
The expected behavior when calling the /predict API is to receive the correct results, not an error like "'XGBClassifier' object has no attribute 'call'."
Environment
These are the library versions I used:
bentoml==1.1.0 dagshub==0.1.8 deepchecks==0.6.1 hydra-core==1.2.0 hyperopt==0.2.7 joblib==1.1.1 mlflow==1.25.1 numpy==1.22.4 pandas==1.4.2 pandera==0.13.4 patsy==0.5.2 pydantic==1.9.1 pytest_steps==1.8.0 requests==2.28.0 scikit_learn==1.2.1 streamlit==1.10.0 xgboost==1.7.6 dvc==2.8.1 fsspec==2022.7.1
I realize that when serving, I call model.run(df), and it seems like this function does something to reload the model using cloudpickle, but after loading, it does not convert it into a runner, causing the saved model to not have the registered call method when using save_model. How can I handle this situation?
The following simple code snippet also does not work because when run model.run, the model does not have the registered call method.
import bentoml
model = bentoml.picklable_model.get("xgboost:latest").to_runner()
model.init_local()
# print(getattr(model, "__call__"))
model.run([[5.9, 3., 5.1, 1.8]])
So the result of this line:
# Find best model
best_model = optimize(objective, space)
Can best_model be called directly with best_model(...) ? If not, how is it supposed to be used in prediction?
Skipping the lengthy code above, I simply ran:
model = bentoml.picklable_model.get("xgboost:latest").to_runner()
model.init_local()
#print(getattr(model, "call"))
model.run([[5.9, 3., 5.1, 1.8]])
and an error occurred."
Result when run with model.run and comment print(getattr(model, "call")) :
Traceback (most recent call last):
File "a.py", line 7, in <module>
r.run([7])
File "/home/nghiamt/PycharmProjects/MLOps/end-to-end-project/venv/lib/python3.8/site-packages/bentoml/_internal/runner/runner.py", line 52, in run
return self.runner._runner_handle.run_method(self, *args, **kwargs)
File "/home/nghiamt/PycharmProjects/MLOps/end-to-end-project/venv/lib/python3.8/site-packages/bentoml/_internal/runner/runner_handle/local.py", line 48, in run_method
return getattr(self._runnable, __bentoml_method.name)(*args, **kwargs)
File "/home/nghiamt/PycharmProjects/MLOps/end-to-end-project/venv/lib/python3.8/site-packages/bentoml/_internal/runner/runnable.py", line 140, in method
return self.func(obj, *args, **kwargs)
File "/home/nghiamt/PycharmProjects/MLOps/end-to-end-project/venv/lib/python3.8/site-packages/bentoml/_internal/frameworks/picklable.py", line 171, in _run
return getattr(self.model, method_name)(
AttributeError: 'XGBClassifier' object has no attribute '__call__'
If I comment out the run method and run as follows, it still works, proving that the call method still exists. However, I don't understand why when I run the run method, it throws an error saying there is no call. Result when run with print(getattr(model, "call")) and comment run:
import bentoml
model= bentoml.picklable_model.get("xgboost:latest").to_runner()
model.init_local()
print(getattr(model, "__call__"))
# model.run([7])
/home/nghiamt/PycharmProjects/MLOps/end-to-end-project/venv/lib/python3.8/site-packages/requests/__init__.py:109: RequestsDependencyWarning: urllib3 (1.26.16) or chardet (5.2.0)/charset_normalizer (2.0.12) doesn't match a supported version!
warnings.warn(
'Runner.init_local' is for debugging and testing only. Make sure to remove it before deploying to production.
RunnerMethod(runner=Runner(name='xgboost', models=[Model(tag="xgboost:tzvcynrr2kpu5xjw", path="/home/nghiamt/bentoml/models/xgboost/tzvcynrr2kpu5xjw")], resource_config=None, runnable_class=<class 'bentoml._internal.frameworks.picklable.get_runnable.<locals>.PicklableRunnable'>, embedded=False, runner_methods=[...], scheduling_strategy=<class 'bentoml._internal.runner.strategy.DefaultStrategy'>, workers_per_resource=1, runnable_init_params={}, _runner_handle=<bentoml._internal.runner.runner_handle.local.LocalRunnerRef object at 0x7f9297c627f0>), name='__call__', config=RunnableMethodConfig(batchable=True, batch_dim=(0, 0), input_spec=None, output_spec=None), max_batch_size=100, max_latency_ms=60000)
This indicates that there is an issue with the run method.
The error says __call__ is missing on the model object, while what you are inspecting is the runner object, they are different objects and have different methods. Please inspect on the resulted best_model to see if __call__ is present.
You can also get the underlying model by:
model= bentoml.picklable_model.load_model("xgboost:latest")
print(getattr(model, "__call__")) # <-- is it there?
BentoML also has built-in support for XGBoost framework, try saving and loading model with bentoml.xgboost.* instead of bentoml.picklable_model. And the default entry fro XGBoost model inference is .predict().
I tried getattr(model, "__call__") to check __call__ method existed, If I ignore run method, my code worked:
I tried
getattr(model, "__call__")to check__call__method existed
Wrong object being checked, the model in your screenshot is indeed a runner(it is returned by to_runner() method). Please check the code given in my last reply, it has a difference.
You can also just check if runner.__call__.run exists. But it seems like to me that the original model doesn't have a __call__ function.