Support uv in mlflow.models.predict (mlflow#13824)

Signed-off-by: serena-ruan <[email protected]> Signed-off-by: k99kurella <[email protected]>
karthikkurella · Jan 30, 2025 · 223ed51 · 223ed51
1 parent 6249cd3
commit 223ed51
Show file tree

Hide file tree

Showing 9 changed files with 192 additions and 132 deletions.
diff --git a/conftest.py b/conftest.py
@@ -385,7 +385,9 @@ def serve_wheel(request, tmp_path_factory):
             if existing_url := os.environ.get("PIP_EXTRA_INDEX_URL"):
                 url = f"{existing_url} {url}"
             os.environ["PIP_EXTRA_INDEX_URL"] = url
-
+            # Set the `UV_INDEX` environment variable to allow fetching the wheel from the
+            # url when using `uv` as environment manager
+            os.environ["UV_INDEX"] = f"mlflow={url}"
             yield
         finally:
             prc.terminate()
diff --git a/mlflow/models/container/__init__.py b/mlflow/models/container/__init__.py
@@ -166,7 +166,7 @@ def _install_model_dependencies_to_env(model_path, env_manager) -> list[str]:
         activate_cmd = ["source /miniconda/bin/activate custom_env"]
 
     elif env_manager == em.VIRTUALENV:
-        env_activate_cmd = _get_or_create_virtualenv(model_path)
+        env_activate_cmd = _get_or_create_virtualenv(model_path, env_manager=env_manager)
         path = env_activate_cmd.split(" ")[-1]
         os.symlink(path, "/opt/activate")
         activate_cmd = [env_activate_cmd]

diff --git a/mlflow/models/python_api.py b/mlflow/models/python_api.py
@@ -1,5 +1,6 @@
 import logging
 import os
+import shutil
 from io import StringIO
 from typing import ForwardRef, get_args, get_origin
 
@@ -13,6 +14,10 @@
 from mlflow.utils.file_utils import TempDir
 
 _logger = logging.getLogger(__name__)
+UV_INSTALLATION_INSTRUCTIONS = (
+    "Run `pip install uv` to install uv. See "
+    "https://docs.astral.sh/uv/getting-started/installation for other installation methods."
+)
 
 
 def build_docker(
@@ -128,6 +133,7 @@ def predict(
         env_manager: Specify a way to create an environment for MLmodel inference:
 
             - "virtualenv" (default): use virtualenv (and pyenv for Python version management)
+            - "uv": use uv
             - "local": use the local environment
             - "conda": use conda
 
@@ -149,8 +155,8 @@ def predict(
             current os.environ are passed, and this parameter can be used to override them.
 
             .. note::
-                This parameter is only supported when `env_manager` is set to "virtualenv"
-                or "conda".
+                This parameter is only supported when `env_manager` is set to "virtualenv",
+                "conda" or "uv".
 
     Code example:
 
@@ -166,7 +172,14 @@ def predict(
             content_type="json",
         )
 
-        # Run prediction with additional pip dependencies
+        # Run prediction with "uv" as the environment manager
+        mlflow.models.predict(
+            model_uri=f"runs:/{run_id}/model",
+            input_data={"x": 1, "y": 2},
+            env_manager="uv",
+        )
+
+        # Run prediction with additional pip dependencies and extra environment variables
         mlflow.models.predict(
             model_uri=f"runs:/{run_id}/model",
             input_data={"x": 1, "y": 2},
@@ -183,18 +196,36 @@ def predict(
         raise MlflowException.invalid_parameter_value(
             f"Content type must be one of {_CONTENT_TYPE_JSON} or {_CONTENT_TYPE_CSV}."
         )
-    if extra_envs and env_manager not in (_EnvManager.VIRTUALENV, _EnvManager.CONDA):
+    if extra_envs and env_manager not in (
+        _EnvManager.VIRTUALENV,
+        _EnvManager.CONDA,
+        _EnvManager.UV,
+    ):
         raise MlflowException.invalid_parameter_value(
             "Extra environment variables are only supported when env_manager is "
-            f"set to '{_EnvManager.VIRTUALENV}' or '{_EnvManager.CONDA}'."
+            f"set to '{_EnvManager.VIRTUALENV}', '{_EnvManager.CONDA}' or '{_EnvManager.UV}'."
+        )
+    if env_manager == _EnvManager.UV:
+        if not shutil.which("uv"):
+            raise MlflowException(
+                f"Found '{env_manager}' as env_manager, but the 'uv' command is not found in the "
+                f"PATH. {UV_INSTALLATION_INSTRUCTIONS} Alternatively, you can use 'virtualenv' or "
+                "'conda' as the environment manager, but note their performances are not "
+                "as good as 'uv'."
+            )
+    else:
+        _logger.info(
+            f"It is highly recommended to use `{_EnvManager.UV}` as the environment manager for "
+            "predicting with MLflow models as its performance is significantly better than other "
+            f"environment managers. {UV_INSTALLATION_INSTRUCTIONS}"
         )
 
     is_dbconnect_mode = is_databricks_connect()
     if is_dbconnect_mode:
-        if env_manager != _EnvManager.VIRTUALENV:
+        if env_manager not in (_EnvManager.VIRTUALENV, _EnvManager.UV):
             raise MlflowException(
-                "Databricks Connect only supports virtualenv as the environment manager. "
-                f"Got {env_manager}."
+                f"Databricks Connect only supports '{_EnvManager.VIRTUALENV}' or '{_EnvManager.UV}'"
+                f" as the environment manager. Got {env_manager}."
             )
         pyfunc_backend_env_root_config = {
             "create_env_root_dir": False,

diff --git a/mlflow/projects/backend/local.py b/mlflow/projects/backend/local.py
@@ -52,7 +52,6 @@
     _get_mlflow_virtualenv_root,
     _get_virtualenv_extra_env_vars,
     _get_virtualenv_name,
-    _install_python,
 )
 
 _logger = logging.getLogger(__name__)
@@ -151,19 +150,23 @@ def run(
             if is_in_databricks_runtime():
                 nfs_tmp_dir = get_or_create_nfs_tmp_dir()
                 env_root = Path(nfs_tmp_dir) / "envs"
-                pyenv_root = env_root / _PYENV_ROOT_DIR
+                pyenv_root_dir = str(env_root / _PYENV_ROOT_DIR)
                 virtualenv_root = env_root / _VIRTUALENV_ENVS_DIR
                 env_vars = _get_virtualenv_extra_env_vars(str(env_root))
             else:
-                pyenv_root = None
+                pyenv_root_dir = None
                 virtualenv_root = Path(_get_mlflow_virtualenv_root())
                 env_vars = None
-            python_bin_path = _install_python(python_env.python, pyenv_root=pyenv_root)
             work_dir_path = Path(work_dir)
             env_name = _get_virtualenv_name(python_env, work_dir_path)
             env_dir = virtualenv_root / env_name
             activate_cmd = _create_virtualenv(
-                work_dir_path, python_bin_path, env_dir, python_env, extra_env=env_vars
+                local_model_path=work_dir_path,
+                python_env=python_env,
+                env_dir=env_dir,
+                pyenv_root_dir=pyenv_root_dir,
+                env_manager=env_manager,
+                extra_env=env_vars,
             )
             command_args += [activate_cmd]
         elif env_manager == _EnvManager.CONDA:

diff --git a/mlflow/pyfunc/backend.py b/mlflow/pyfunc/backend.py
@@ -25,7 +25,7 @@
 from mlflow.tracking.artifact_utils import _download_artifact_from_uri
 from mlflow.utils import env_manager as em
 from mlflow.utils.conda import get_conda_bin_executable, get_or_create_conda_env
-from mlflow.utils.environment import Environment, _PythonEnv
+from mlflow.utils.environment import Environment, _get_pip_install_mlflow, _PythonEnv
 from mlflow.utils.file_utils import (
     TempDir,
     get_or_create_nfs_tmp_dir,
@@ -36,10 +36,7 @@
 from mlflow.utils.nfs_on_spark import get_nfs_cache_root_dir
 from mlflow.utils.os import is_windows
 from mlflow.utils.process import ShellCommandException, cache_return_value_per_process
-from mlflow.utils.virtualenv import (
-    _get_or_create_virtualenv,
-    _get_pip_install_mlflow,
-)
+from mlflow.utils.virtualenv import _get_or_create_virtualenv
 from mlflow.version import VERSION
 
 _logger = logging.getLogger(__name__)
@@ -135,13 +132,14 @@ def _get_or_create_env_root_dir(should_use_nfs):
         else:
             env_root_dir = self._env_root_dir
 
-        if self._env_manager == em.VIRTUALENV:
+        if self._env_manager in {em.VIRTUALENV, em.UV}:
             activate_cmd = _get_or_create_virtualenv(
                 local_path,
                 self._env_id,
                 env_root_dir=env_root_dir,
                 capture_output=capture_output,
                 pip_requirements_override=pip_requirements_override,
+                env_manager=self._env_manager,
             )
             self._environment = Environment(activate_cmd, extra_env=extra_envs)
         elif self._env_manager == em.CONDA:

diff --git a/mlflow/utils/env_manager.py b/mlflow/utils/env_manager.py
@@ -4,10 +4,11 @@
 LOCAL = "local"
 CONDA = "conda"
 VIRTUALENV = "virtualenv"
+UV = "uv"
 
 
 def validate(env_manager):
-    allowed_values = [LOCAL, CONDA, VIRTUALENV]
+    allowed_values = [LOCAL, CONDA, VIRTUALENV, UV]
     if env_manager not in allowed_values:
         raise MlflowException(
             f"Invalid value for `env_manager`: {env_manager}. Must be one of {allowed_values}",

diff --git a/mlflow/utils/file_utils.py b/mlflow/utils/file_utils.py
@@ -1037,6 +1037,9 @@ def remove_on_error(path: os.PathLike, onerror=None):
                 os.remove(path)
             elif os.path.isdir(path):
                 shutil.rmtree(path)
+        _logger.warning(
+            f"Failed to remove {path}" if os.path.exists(path) else f"Successfully removed {path}"
+        )
         raise