diff --git a/conftest.py b/conftest.py index 1d14a8d73a9d6..f682b2fc28a1e 100644 --- a/conftest.py +++ b/conftest.py @@ -385,7 +385,9 @@ def serve_wheel(request, tmp_path_factory): if existing_url := os.environ.get("PIP_EXTRA_INDEX_URL"): url = f"{existing_url} {url}" os.environ["PIP_EXTRA_INDEX_URL"] = url - + # Set the `UV_INDEX` environment variable to allow fetching the wheel from the + # url when using `uv` as environment manager + os.environ["UV_INDEX"] = f"mlflow={url}" yield finally: prc.terminate() diff --git a/mlflow/models/container/__init__.py b/mlflow/models/container/__init__.py index d3d908c41d348..c4e072aca1fd0 100644 --- a/mlflow/models/container/__init__.py +++ b/mlflow/models/container/__init__.py @@ -166,7 +166,7 @@ def _install_model_dependencies_to_env(model_path, env_manager) -> list[str]: activate_cmd = ["source /miniconda/bin/activate custom_env"] elif env_manager == em.VIRTUALENV: - env_activate_cmd = _get_or_create_virtualenv(model_path) + env_activate_cmd = _get_or_create_virtualenv(model_path, env_manager=env_manager) path = env_activate_cmd.split(" ")[-1] os.symlink(path, "/opt/activate") activate_cmd = [env_activate_cmd] diff --git a/mlflow/models/python_api.py b/mlflow/models/python_api.py index 3e26fa6e0a59a..750d186571dd8 100644 --- a/mlflow/models/python_api.py +++ b/mlflow/models/python_api.py @@ -1,5 +1,6 @@ import logging import os +import shutil from io import StringIO from typing import ForwardRef, get_args, get_origin @@ -13,6 +14,10 @@ from mlflow.utils.file_utils import TempDir _logger = logging.getLogger(__name__) +UV_INSTALLATION_INSTRUCTIONS = ( + "Run `pip install uv` to install uv. See " + "https://docs.astral.sh/uv/getting-started/installation for other installation methods." +) def build_docker( @@ -128,6 +133,7 @@ def predict( env_manager: Specify a way to create an environment for MLmodel inference: - "virtualenv" (default): use virtualenv (and pyenv for Python version management) + - "uv": use uv - "local": use the local environment - "conda": use conda @@ -149,8 +155,8 @@ def predict( current os.environ are passed, and this parameter can be used to override them. .. note:: - This parameter is only supported when `env_manager` is set to "virtualenv" - or "conda". + This parameter is only supported when `env_manager` is set to "virtualenv", + "conda" or "uv". Code example: @@ -166,7 +172,14 @@ def predict( content_type="json", ) - # Run prediction with additional pip dependencies + # Run prediction with "uv" as the environment manager + mlflow.models.predict( + model_uri=f"runs:/{run_id}/model", + input_data={"x": 1, "y": 2}, + env_manager="uv", + ) + + # Run prediction with additional pip dependencies and extra environment variables mlflow.models.predict( model_uri=f"runs:/{run_id}/model", input_data={"x": 1, "y": 2}, @@ -183,18 +196,36 @@ def predict( raise MlflowException.invalid_parameter_value( f"Content type must be one of {_CONTENT_TYPE_JSON} or {_CONTENT_TYPE_CSV}." ) - if extra_envs and env_manager not in (_EnvManager.VIRTUALENV, _EnvManager.CONDA): + if extra_envs and env_manager not in ( + _EnvManager.VIRTUALENV, + _EnvManager.CONDA, + _EnvManager.UV, + ): raise MlflowException.invalid_parameter_value( "Extra environment variables are only supported when env_manager is " - f"set to '{_EnvManager.VIRTUALENV}' or '{_EnvManager.CONDA}'." + f"set to '{_EnvManager.VIRTUALENV}', '{_EnvManager.CONDA}' or '{_EnvManager.UV}'." + ) + if env_manager == _EnvManager.UV: + if not shutil.which("uv"): + raise MlflowException( + f"Found '{env_manager}' as env_manager, but the 'uv' command is not found in the " + f"PATH. {UV_INSTALLATION_INSTRUCTIONS} Alternatively, you can use 'virtualenv' or " + "'conda' as the environment manager, but note their performances are not " + "as good as 'uv'." + ) + else: + _logger.info( + f"It is highly recommended to use `{_EnvManager.UV}` as the environment manager for " + "predicting with MLflow models as its performance is significantly better than other " + f"environment managers. {UV_INSTALLATION_INSTRUCTIONS}" ) is_dbconnect_mode = is_databricks_connect() if is_dbconnect_mode: - if env_manager != _EnvManager.VIRTUALENV: + if env_manager not in (_EnvManager.VIRTUALENV, _EnvManager.UV): raise MlflowException( - "Databricks Connect only supports virtualenv as the environment manager. " - f"Got {env_manager}." + f"Databricks Connect only supports '{_EnvManager.VIRTUALENV}' or '{_EnvManager.UV}'" + f" as the environment manager. Got {env_manager}." ) pyfunc_backend_env_root_config = { "create_env_root_dir": False, diff --git a/mlflow/projects/backend/local.py b/mlflow/projects/backend/local.py index ef2e9e199c6a6..d81d234bd6faf 100644 --- a/mlflow/projects/backend/local.py +++ b/mlflow/projects/backend/local.py @@ -52,7 +52,6 @@ _get_mlflow_virtualenv_root, _get_virtualenv_extra_env_vars, _get_virtualenv_name, - _install_python, ) _logger = logging.getLogger(__name__) @@ -151,19 +150,23 @@ def run( if is_in_databricks_runtime(): nfs_tmp_dir = get_or_create_nfs_tmp_dir() env_root = Path(nfs_tmp_dir) / "envs" - pyenv_root = env_root / _PYENV_ROOT_DIR + pyenv_root_dir = str(env_root / _PYENV_ROOT_DIR) virtualenv_root = env_root / _VIRTUALENV_ENVS_DIR env_vars = _get_virtualenv_extra_env_vars(str(env_root)) else: - pyenv_root = None + pyenv_root_dir = None virtualenv_root = Path(_get_mlflow_virtualenv_root()) env_vars = None - python_bin_path = _install_python(python_env.python, pyenv_root=pyenv_root) work_dir_path = Path(work_dir) env_name = _get_virtualenv_name(python_env, work_dir_path) env_dir = virtualenv_root / env_name activate_cmd = _create_virtualenv( - work_dir_path, python_bin_path, env_dir, python_env, extra_env=env_vars + local_model_path=work_dir_path, + python_env=python_env, + env_dir=env_dir, + pyenv_root_dir=pyenv_root_dir, + env_manager=env_manager, + extra_env=env_vars, ) command_args += [activate_cmd] elif env_manager == _EnvManager.CONDA: diff --git a/mlflow/pyfunc/backend.py b/mlflow/pyfunc/backend.py index cda715c544bfa..a42ec5382f2af 100644 --- a/mlflow/pyfunc/backend.py +++ b/mlflow/pyfunc/backend.py @@ -25,7 +25,7 @@ from mlflow.tracking.artifact_utils import _download_artifact_from_uri from mlflow.utils import env_manager as em from mlflow.utils.conda import get_conda_bin_executable, get_or_create_conda_env -from mlflow.utils.environment import Environment, _PythonEnv +from mlflow.utils.environment import Environment, _get_pip_install_mlflow, _PythonEnv from mlflow.utils.file_utils import ( TempDir, get_or_create_nfs_tmp_dir, @@ -36,10 +36,7 @@ from mlflow.utils.nfs_on_spark import get_nfs_cache_root_dir from mlflow.utils.os import is_windows from mlflow.utils.process import ShellCommandException, cache_return_value_per_process -from mlflow.utils.virtualenv import ( - _get_or_create_virtualenv, - _get_pip_install_mlflow, -) +from mlflow.utils.virtualenv import _get_or_create_virtualenv from mlflow.version import VERSION _logger = logging.getLogger(__name__) @@ -135,13 +132,14 @@ def _get_or_create_env_root_dir(should_use_nfs): else: env_root_dir = self._env_root_dir - if self._env_manager == em.VIRTUALENV: + if self._env_manager in {em.VIRTUALENV, em.UV}: activate_cmd = _get_or_create_virtualenv( local_path, self._env_id, env_root_dir=env_root_dir, capture_output=capture_output, pip_requirements_override=pip_requirements_override, + env_manager=self._env_manager, ) self._environment = Environment(activate_cmd, extra_env=extra_envs) elif self._env_manager == em.CONDA: diff --git a/mlflow/utils/env_manager.py b/mlflow/utils/env_manager.py index d2b5716411147..017fea334c0d1 100644 --- a/mlflow/utils/env_manager.py +++ b/mlflow/utils/env_manager.py @@ -4,10 +4,11 @@ LOCAL = "local" CONDA = "conda" VIRTUALENV = "virtualenv" +UV = "uv" def validate(env_manager): - allowed_values = [LOCAL, CONDA, VIRTUALENV] + allowed_values = [LOCAL, CONDA, VIRTUALENV, UV] if env_manager not in allowed_values: raise MlflowException( f"Invalid value for `env_manager`: {env_manager}. Must be one of {allowed_values}", diff --git a/mlflow/utils/file_utils.py b/mlflow/utils/file_utils.py index 45b4fdef409b6..f6cc20936a3e5 100644 --- a/mlflow/utils/file_utils.py +++ b/mlflow/utils/file_utils.py @@ -1037,6 +1037,9 @@ def remove_on_error(path: os.PathLike, onerror=None): os.remove(path) elif os.path.isdir(path): shutil.rmtree(path) + _logger.warning( + f"Failed to remove {path}" if os.path.exists(path) else f"Successfully removed {path}" + ) raise diff --git a/mlflow/utils/virtualenv.py b/mlflow/utils/virtualenv.py index 3ae2860ebc19b..e58994f256d66 100644 --- a/mlflow/utils/virtualenv.py +++ b/mlflow/utils/virtualenv.py @@ -6,20 +6,21 @@ import tempfile import uuid from pathlib import Path +from typing import Literal, Optional from packaging.version import Version import mlflow -from mlflow.environment_variables import MLFLOW_ENV_ROOT +from mlflow.environment_variables import _MLFLOW_TESTING, MLFLOW_ENV_ROOT from mlflow.exceptions import MlflowException from mlflow.models.model import MLMODEL_FILE_NAME, Model +from mlflow.utils import env_manager as em from mlflow.utils.conda import _PIP_CACHE_DIR from mlflow.utils.environment import ( _CONDA_ENV_FILE_NAME, _PYTHON_ENV_FILE_NAME, _REQUIREMENTS_FILE_NAME, _get_mlflow_env_name, - _get_pip_install_mlflow, _PythonEnv, ) from mlflow.utils.file_utils import remove_on_error @@ -231,18 +232,57 @@ def _get_virtualenv_name(python_env, work_dir_path, env_id=None): ) -def _create_virtualenv( - local_model_path, python_bin_path, env_dir, python_env, extra_env=None, capture_output=False -): +def _get_virtualenv_activate_cmd(env_dir: Path) -> str: # Created a command to activate the environment paths = ("bin", "activate") if not is_windows() else ("Scripts", "activate.bat") activate_cmd = env_dir.joinpath(*paths) - activate_cmd = f"source {activate_cmd}" if not is_windows() else str(activate_cmd) + return f"source {activate_cmd}" if not is_windows() else str(activate_cmd) + +def _create_virtualenv( + local_model_path: Path, + python_env: _PythonEnv, + env_dir: Path, + pyenv_root_dir: Optional[str] = None, + env_manager: Literal["virtualenv", "uv"] = em.UV, + extra_env: Optional[dict[str, str]] = None, + capture_output: bool = False, + pip_requirements_override: Optional[list[str]] = None, +): + if env_manager not in {em.VIRTUALENV, em.UV}: + raise MlflowException.invalid_parameter_value( + f"Invalid value for `env_manager`: {env_manager}. " + f"Must be one of `{em.VIRTUALENV}, {em.UV}`" + ) + + activate_cmd = _get_virtualenv_activate_cmd(env_dir) if env_dir.exists(): - _logger.info("Environment %s already exists", env_dir) + _logger.info(f"Environment {env_dir} already exists") return activate_cmd + if env_manager == em.VIRTUALENV: + python_bin_path = _install_python( + python_env.python, pyenv_root=pyenv_root_dir, capture_output=capture_output + ) + _logger.info(f"Creating a new environment in {env_dir} with {python_bin_path}") + env_creation_cmd = [ + sys.executable, + "-m", + "virtualenv", + "--python", + python_bin_path, + env_dir, + ] + install_deps_cmd_prefix = "python -m pip install" + elif env_manager == em.UV: + _logger.info( + f"Creating a new environment in {env_dir} with python " + f"version {python_env.python} using uv" + ) + env_creation_cmd = ["uv", "venv", env_dir, f"--python={python_env.python}"] + install_deps_cmd_prefix = "uv pip install --prerelease=allow" + if _MLFLOW_TESTING: + os.environ["RUST_LOG"] = "uv=debug" with remove_on_error( env_dir, onerror=lambda e: _logger.warning( @@ -252,9 +292,8 @@ def _create_virtualenv( env_dir, ), ): - _logger.info("Creating a new environment in %s with %s", env_dir, python_bin_path) _exec_cmd( - [sys.executable, "-m", "virtualenv", "--python", python_bin_path, env_dir], + env_creation_cmd, capture_output=capture_output, ) @@ -281,10 +320,21 @@ def _create_virtualenv( tmp_req_file = f"requirements.{uuid.uuid4().hex}.txt" Path(tmpdir).joinpath(tmp_req_file).write_text("\n".join(deps)) - cmd = _join_commands(activate_cmd, f"python -m pip install -r {tmp_req_file}") + cmd = _join_commands(activate_cmd, f"{install_deps_cmd_prefix} -r {tmp_req_file}") _exec_cmd(cmd, capture_output=capture_output, cwd=tmpdir, extra_env=extra_env) - return activate_cmd + if pip_requirements_override: + _logger.info( + "Installing additional dependencies specified by " + f"pip_requirements_override: {pip_requirements_override}" + ) + cmd = _join_commands( + activate_cmd, + f"{install_deps_cmd_prefix} --quiet {' '.join(pip_requirements_override)}", + ) + _exec_cmd(cmd, capture_output=capture_output, extra_env=extra_env) + + return activate_cmd def _copy_model_to_writeable_destination(model_src, dst): @@ -326,9 +376,10 @@ def _get_or_create_virtualenv( # noqa: D417 env_id=None, env_root_dir=None, capture_output=False, - pip_requirements_override=None, + pip_requirements_override: Optional[list[str]] = None, + env_manager: Literal["virtualenv", "uv"] = em.UV, ): - """Restores an MLflow model's environment with pyenv and virtualenv and returns a command + """Restores an MLflow model's environment in a virtual environment and returns a command to activate it. Args: @@ -340,111 +391,53 @@ def _get_or_create_virtualenv( # noqa: D417 environment after the environment has been activated. pip_requirements_override: If specified, install the specified python dependencies to the environment (upgrade if already installed). + env_manager: Specifies the environment manager to use to create the environment. + Defaults to "uv". + + .. tip:: + It is highly recommended to use "uv" as it has significant performance improvements + over "virtualenv". Returns: - Command to activate the created virtualenv environment + Command to activate the created virtual environment (e.g. "source /path/to/bin/activate"). """ - _validate_pyenv_is_available() - _validate_virtualenv_is_available() + if env_manager == em.VIRTUALENV: + _validate_pyenv_is_available() + _validate_virtualenv_is_available() - # Read environment information local_model_path = Path(local_model_path) python_env = _get_python_env(local_model_path) - extra_env = _get_virtualenv_extra_env_vars(env_root_dir) - if env_root_dir is not None: - virtual_envs_root_path = Path(env_root_dir) / _VIRTUALENV_ENVS_DIR - pyenv_root_path = Path(env_root_dir) / _PYENV_ROOT_DIR - pyenv_root_path.mkdir(parents=True, exist_ok=True) - pyenv_root_dir = str(pyenv_root_path) - else: + pyenv_root_dir = None + if env_root_dir is None: virtual_envs_root_path = Path(_get_mlflow_virtualenv_root()) - pyenv_root_dir = None - + else: + virtual_envs_root_path = Path(env_root_dir) / _VIRTUALENV_ENVS_DIR + if env_manager == em.VIRTUALENV: + pyenv_root_path = Path(env_root_dir) / _PYENV_ROOT_DIR + pyenv_root_path.mkdir(parents=True, exist_ok=True) + pyenv_root_dir = str(pyenv_root_path) + + virtual_envs_root_path = ( + Path(env_root_dir) / _VIRTUALENV_ENVS_DIR + if env_root_dir is not None + else Path(_get_mlflow_virtualenv_root()) + ) virtual_envs_root_path.mkdir(parents=True, exist_ok=True) env_name = _get_virtualenv_name(python_env, local_model_path, env_id) env_dir = virtual_envs_root_path / env_name - if env_dir.exists(): - paths = ("bin", "activate") if not is_windows() else ("Scripts", "activate.bat") - activate_cmd = env_dir.joinpath(*paths) - return f"source {activate_cmd}" if not is_windows() else str(activate_cmd) + extra_env = _get_virtualenv_extra_env_vars(env_root_dir) # Create an environment - python_bin_path = _install_python( - python_env.python, pyenv_root=pyenv_root_dir, capture_output=capture_output - ) - try: - activate_cmd = _create_virtualenv( - local_model_path, - python_bin_path, - env_dir, - python_env, - extra_env=extra_env, - capture_output=capture_output, - ) - - # Install additional dependencies specified by `requirements_override` - if pip_requirements_override: - _logger.info( - "Installing additional dependencies specified by " - f"pip_requirements_override: {pip_requirements_override}" - ) - cmd = _join_commands( - activate_cmd, - f"python -m pip install --quiet -U {' '.join(pip_requirements_override)}", - ) - _exec_cmd(cmd, capture_output=capture_output, extra_env=extra_env) - - return activate_cmd - - except: - _logger.warning("Encountered unexpected error while creating %s", env_dir) - if env_dir.exists(): - _logger.warning("Attempting to remove %s", env_dir) - shutil.rmtree(env_dir, ignore_errors=True) - msg = "Failed to remove %s" if env_dir.exists() else "Successfully removed %s" - _logger.warning(msg, env_dir) - raise - - -def _execute_in_virtualenv( - activate_cmd, - command, - install_mlflow, - command_env=None, - synchronous=True, - capture_output=False, - env_root_dir=None, - **kwargs, -): - """Runs a command in a specified virtualenv environment. - - Args: - activate_cmd: Command to activate the virtualenv environment. - command: Command to run in the virtualenv environment. - install_mlflow: Flag to determine whether to install mlflow in the virtualenv - environment. - command_env: Environment variables passed to a process running the command. - synchronous: Set the `synchronous` argument when calling `_exec_cmd`. - capture_output: Set the `capture_output` argument when calling `_exec_cmd`. - env_root_dir: See doc of PyFuncBackend constructor argument `env_root_dir`. - kwargs: Set the `kwargs` argument when calling `_exec_cmd`. - - """ - if command_env is None: - command_env = os.environ.copy() - - if env_root_dir is not None: - command_env = {**command_env, **_get_virtualenv_extra_env_vars(env_root_dir)} - - pre_command = [activate_cmd] - if install_mlflow: - pre_command.append(_get_pip_install_mlflow()) - - cmd = _join_commands(*pre_command, command) - _logger.info("Running command: %s", " ".join(cmd)) - return _exec_cmd( - cmd, capture_output=capture_output, env=command_env, synchronous=synchronous, **kwargs + return _create_virtualenv( + local_model_path=local_model_path, + python_env=python_env, + env_dir=env_dir, + pyenv_root_dir=pyenv_root_dir, + env_manager=env_manager, + extra_env=extra_env, + capture_output=capture_output, + pip_requirements_override=pip_requirements_override, ) diff --git a/tests/models/test_python_api.py b/tests/models/test_python_api.py index 711dd00654913..c213b9f33c86d 100644 --- a/tests/models/test_python_api.py +++ b/tests/models/test_python_api.py @@ -16,7 +16,7 @@ _CONTENT_TYPE_JSON, _serialize_input_data, ) -from mlflow.utils.env_manager import CONDA, LOCAL, VIRTUALENV +from mlflow.utils.env_manager import CONDA, LOCAL, UV, VIRTUALENV @pytest.mark.parametrize( @@ -55,7 +55,11 @@ ), ], ) -def test_predict(input_data, expected_data, content_type): +@pytest.mark.parametrize( + "env_manager", + [VIRTUALENV, UV], +) +def test_predict(input_data, expected_data, content_type, env_manager): class TestModel(mlflow.pyfunc.PythonModel): def predict(self, context, model_input): if isinstance(model_input, pd.DataFrame): @@ -77,12 +81,13 @@ def predict(self, context, model_input): model_uri=model_info.model_uri, input_data=input_data, content_type=content_type, + env_manager=env_manager, ) @pytest.mark.parametrize( "env_manager", - [VIRTUALENV, CONDA], + [VIRTUALENV, CONDA, UV], ) def test_predict_with_pip_requirements_override(env_manager): if env_manager == CONDA: @@ -130,7 +135,31 @@ def predict(self, context, model_input): ) -@pytest.mark.parametrize("env_manager", [VIRTUALENV, CONDA]) +@pytest.mark.parametrize("env_manager", [VIRTUALENV, CONDA, UV]) +def test_predict_with_model_alias(env_manager): + class TestModel(mlflow.pyfunc.PythonModel): + def predict(self, context, model_input): + assert os.environ["TEST"] == "test" + return model_input + + with mlflow.start_run(): + mlflow.pyfunc.log_model( + "model", + python_model=TestModel(), + registered_model_name="model_name", + ) + client = mlflow.MlflowClient() + client.set_registered_model_alias("model_name", "test_alias", 1) + + mlflow.models.predict( + model_uri="models:/model_name@test_alias", + input_data="abc", + env_manager=env_manager, + extra_envs={"TEST": "test"}, + ) + + +@pytest.mark.parametrize("env_manager", [VIRTUALENV, CONDA, UV]) def test_predict_with_extra_envs(env_manager): class TestModel(mlflow.pyfunc.PythonModel): def predict(self, context, model_input): @@ -167,7 +196,7 @@ def predict(self, context, model_input): with pytest.raises( MlflowException, match=r"Extra environment variables are only " - r"supported when env_manager is set to 'virtualenv' or 'conda'", + r"supported when env_manager is set to 'virtualenv', 'conda' or 'uv'", ): mlflow.models.predict( model_uri=model_info.model_uri,