[LLM Serving] add skeleton public api to ray.serve.llm (#50297)

## Why are these changes needed? add llm serving skeleton public api to ray.serve.llm and links to doc ## Related issue number  --------- Signed-off-by: Gene Su <[email protected]>
ray-project · Feb 7, 2025 · 9e37334 · 9e37334
1 parent ac045c7
commit 9e37334
Show file tree

Hide file tree

Showing 3 changed files with 125 additions and 0 deletions.
diff --git a/doc/source/serve/api/index.md b/doc/source/serve/api/index.md
@@ -120,6 +120,37 @@ See the [model composition guide](serve-model-composition) for how to update cod
    serve.exceptions.DeploymentUnavailableError
 ```
 
+#### Large Language Model (LLM) Serving APIs
+
+##### Configs
+```{eval-rst}
+.. autosummary::
+   :nosignatures:
+   :toctree: doc/
+
+   serve.llm.LLMConfig
+```
+
+##### Deployments
+```{eval-rst}
+.. autosummary::
+   :nosignatures:
+   :toctree: doc/
+
+   serve.llm.VLLMDeployment
+   serve.llm.LLMModelRouterDeployment
+```
+
+##### Builders
+```{eval-rst}
+.. autosummary::
+   :nosignatures:
+   :toctree: doc/
+
+   serve.llm.build_vllm_deployment
+   serve.llm.build_openai_app
+```
+
 (serve-cli)=
 
 ## Command Line Interface (CLI)

diff --git a/python/ray/serve/llm.py b/python/ray/serve/llm.py
@@ -0,0 +1,77 @@
+try:
+    from ray.llm._internal.serve import (
+        LLMConfig as _LLMConfig,
+        VLLMDeployment as _VLLMDeployment,
+        LLMModelRouterDeployment as _LLMModelRouterDeployment,
+        LLMServingArgs,
+    )
+except ImportError:
+    _LLMConfig = object
+    _VLLMDeployment = object
+    _LLMModelRouterDeployment = object
+    LLMServingArgs = object
+
+from ray.serve.deployment import Application
+from ray.util.annotations import PublicAPI
+
+
+@PublicAPI(stability="alpha")
+class LLMConfig(_LLMConfig):
+    """The configuration for starting an LLM deployment."""
+
+    ...
+
+
+@PublicAPI(stability="alpha")
+class VLLMDeployment(_VLLMDeployment):
+    """The LLM deployment implementation to use vllm and the inferencing engine."""
+
+    ...
+
+
+@PublicAPI(stability="alpha")
+class LLMModelRouterDeployment(_LLMModelRouterDeployment):
+    """The router deployment to create OpenAI compatible endpoints and route between
+    LLM deployments.
+
+    This deployment creates the following endpoints:
+      - /v1/chat/completions: Chat interface (ChatGPT-style)
+      - /v1/completions: Text completion
+      - /v1/models: List available models
+      - /v1/models/{model}: Model information
+    """
+
+    ...
+
+
+@PublicAPI(stability="alpha")
+def build_vllm_deployment(llm_config: LLMConfig) -> Application:
+    """Helper to build a single vllm deployment from the given llm config.
+
+    Args:
+        llm_config: The llm config to build vllm deployment.
+
+    Returns:
+        The configured Ray Serve Application for vllm deployment.
+    """
+    from ray.llm._internal.serve import build_vllm_deployment
+
+    return build_vllm_deployment(llm_config=llm_config)
+
+
+@PublicAPI(stability="alpha")
+def build_openai_app(llm_serving_args: LLMServingArgs) -> Application:
+    """Helper to build an OpenAI compatible app with the llm deployment setup from
+    the given llm serving args. This is the main entry point for users to create a
+    Serve application serving LLMs.
+
+    Args:
+        llm_serving_args: The list of llm configs or the paths to the llm config to
+            build the app.
+
+    Returns:
+        The configured Ray Serve Application router.
+    """
+    from ray.llm._internal.serve import build_openai_app
+
+    return build_openai_app(llm_serving_args=llm_serving_args)
diff --git a/python/ray/serve/tests/unit/test_llm_imports.py b/python/ray/serve/tests/unit/test_llm_imports.py
@@ -0,0 +1,17 @@
+import sys
+import pytest
+
+
+def test_serve_llm_import_does_not_error():
+    import ray.serve.llm  # noqa: F401
+    from ray.serve.llm import (
+        LLMConfig,  # noqa: F401
+        VLLMDeployment,  # noqa: F401
+        LLMModelRouterDeployment,  # noqa: F401
+        build_vllm_deployment,  # noqa: F401
+        build_openai_app,  # noqa: F401
+    )
+
+
+if __name__ == "__main__":
+    sys.exit(pytest.main(["-v", __file__]))