testcontainers-python
testcontainers-python copied to clipboard
New Container: MFflow
What is the new container you'd like to have?
I would like to provide a new testcontainer for mlflow, a tool for managing your machine learning model life cycle.
Why not just use a generic container for this?
I added handy utilites to get the url to track to and to get the client to interact with the container directly.
The implementation would look like:
import logging
import requests
from mlflow import MlflowClient
from testcontainers.core.container import DockerContainer
from testcontainers.core.waiting_utils import wait_container_is_ready
logger = logging.getLogger(__name__)
class MFlowContainer(DockerContainer):
"""Test container for MLflow.
Args:
image: the image to use. Change if you need different version.
port: the internal port to use. The exposed port is assigned automatically.
cmd: the command to run. Defaults to "mlflow server". If you want to use the ui for debugging and testing use "mlflow ui".
"""
def __init__(
self, image: str = "ghcr.io/mlflow/mlflow:v2.14.1", port: int = 5000, cmd: str = "mlflow server"
) -> None:
super().__init__(image=image)
self.port = port
self.with_exposed_ports(self.port)
self.cmd = cmd
def _configure(self) -> None:
self.with_env("MLFLOW_PORT", str(self.port))
self.with_env("MLFLOW_HOST", "0.0.0.0")
self.with_command(self.cmd)
def get_url(self) -> str:
"""Returns the url of the container.
Returns:
The url. Use to track to.
"""
return f"http://{self.get_container_host_ip()}:{self.get_exposed_port(self.port)}"
@wait_container_is_ready(requests.exceptions.ConnectionError, requests.exceptions.ReadTimeout)
def _readiness_probe(self) -> None:
# https://mlflow.org/docs/latest/deployment/deploy-model-locally.html?highlight=health
response = requests.get(f"{self.get_url()}/health", timeout=1)
response.raise_for_status()
def get_client(self) -> MlflowClient:
"""Returns the MlflowClient of the container.
Can be used for testing.
"""
return MlflowClient(self.get_url())
def start(self) -> "MFlowContainer":
self._configure()
super().start()
self._readiness_probe()
return self
only nitpick about the implementation is that ideally we wouldn't even return the client - want to avoid putting other libraries into the public API.
not sure about whether or not mlflow warrants having a container, thats a separate question. if it does, this is pretty close.