BentoML
BentoML copied to clipboard
bug: bentoml not support gpu in mlflow?
Describe the bug
i am save bento with mlflow (sentence transformers)
def save_model_to_mlflow(self, version):
signature = mlflow.models.infer_signature(
self.input_data, self.output_data
)
model_info: mlflow.models.model.ModelInfo = (
mlflow.sentence_transformers.log_model(
model=self.model,
artifact_path=self.model_name,
signature=signature,
registered_model_name=self.model_name,
)
)
self.mlflow_helper.update_model_description(
self.model_name, model_info.run_id, "test sentence bert model"
)
self.mlflow_helper.update_model_tag(
self.model_name,
model_info.run_id,
{"ct": "true", "model_version": version},
)
bentoml.mlflow.import_model(
tag,
model_uri=version.source,
signatures={"predict": {"batchable": batchable}},
)
and below is service.py
from typing import List
import bentoml
from bentoml.io import JSON, NumpyNdarray
from constant import BUILD_NAME, MODEL_NAME
sbert_model = bentoml.mlflow.get(MODEL_NAME)
_sbert_runnable = sbert_model.to_runnable()
class TestSentenceBert(_sbert_runnable):
def __init__(self):
super().__init__()
@bentoml.Runnable.method(batchable=True, batch_dim=0)
def predict(self, sentences: List[str]):
output = super().predict(sentences)
return output
sbert_runner = bentoml.Runner(TestSentenceBert)
svc = bentoml.Service(
BUILD_NAME, runners=[sbert_runner], models=[sbert_model]
)
samples = [
"안녕",
"게임",
]
@svc.api(
input=JSON.from_sample(samples),
output=NumpyNdarray(),
route=BUILD_NAME,
)
async def predict(sentences):
output = await sbert_runner.predict.async_run(sentences)
return output
how can i use gpu~? model not found and i want model.to("cuda:0")
To reproduce
No response
Expected behavior
No response
Environment
newest version
hi I think This seem to be an intentional feature of bentoml.
there is no way to check if mlflow model use gpu in bentoml
you can see the more detail in below link
https://github.com/bentoml/BentoML/blob/main/src/bentoml/_internal/frameworks/mlflow.py#L246
# https://github.com/bentoml/BentoML/blob/main/src/bentoml/_internal/frameworks/mlflow.py#L246
class MLflowPyfuncRunnable(bentoml.Runnable):
# The only case that multi-threading may not be supported is when user define a
# custom python_function MLflow model with pure python code, but there's no way
# of telling that from the MLflow model metadata. It should be a very rare case,
# because most custom python_function models are likely numpy code or model
# inference with pre/post-processing code.
SUPPORTED_RESOURCES = ("cpu", )
SUPPORTS_CPU_MULTI_THREADING = True
...
have you try this?
class TestSentenceBert(_sbert_runnable): # override
SUPPORTED_RESOURCES = ("gpu",) # <--- need to add "gpu" force to find gpu
SUPPORTS_CPU_MULTI_THREADING = True
def __init__(self):
super().__init__()
@bentoml.Runnable.method(batchable=True, batch_dim=0)
def predict(self, sentences: List[str]):
output = super().predict(sentences)
return output