Skip to content

Commit

Permalink
[LLM Serving] add skeleton public api to ray.serve.llm (#50297)
Browse files Browse the repository at this point in the history
## Why are these changes needed?

add llm serving skeleton public api to ray.serve.llm and links to doc

## Related issue number

<!-- For example: "Closes #1234" -->

---------

Signed-off-by: Gene Su <[email protected]>
  • Loading branch information
GeneDer authored Feb 7, 2025
1 parent ac045c7 commit 9e37334
Show file tree
Hide file tree
Showing 3 changed files with 125 additions and 0 deletions.
31 changes: 31 additions & 0 deletions doc/source/serve/api/index.md
Original file line number Diff line number Diff line change
Expand Up @@ -120,6 +120,37 @@ See the [model composition guide](serve-model-composition) for how to update cod
serve.exceptions.DeploymentUnavailableError
```

#### Large Language Model (LLM) Serving APIs

##### Configs
```{eval-rst}
.. autosummary::
:nosignatures:
:toctree: doc/
serve.llm.LLMConfig
```

##### Deployments
```{eval-rst}
.. autosummary::
:nosignatures:
:toctree: doc/
serve.llm.VLLMDeployment
serve.llm.LLMModelRouterDeployment
```

##### Builders
```{eval-rst}
.. autosummary::
:nosignatures:
:toctree: doc/
serve.llm.build_vllm_deployment
serve.llm.build_openai_app
```

(serve-cli)=

## Command Line Interface (CLI)
Expand Down
77 changes: 77 additions & 0 deletions python/ray/serve/llm.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,77 @@
try:
from ray.llm._internal.serve import (
LLMConfig as _LLMConfig,
VLLMDeployment as _VLLMDeployment,
LLMModelRouterDeployment as _LLMModelRouterDeployment,
LLMServingArgs,
)
except ImportError:
_LLMConfig = object
_VLLMDeployment = object
_LLMModelRouterDeployment = object
LLMServingArgs = object

from ray.serve.deployment import Application
from ray.util.annotations import PublicAPI


@PublicAPI(stability="alpha")
class LLMConfig(_LLMConfig):
"""The configuration for starting an LLM deployment."""

...


@PublicAPI(stability="alpha")
class VLLMDeployment(_VLLMDeployment):
"""The LLM deployment implementation to use vllm and the inferencing engine."""

...


@PublicAPI(stability="alpha")
class LLMModelRouterDeployment(_LLMModelRouterDeployment):
"""The router deployment to create OpenAI compatible endpoints and route between
LLM deployments.
This deployment creates the following endpoints:
- /v1/chat/completions: Chat interface (ChatGPT-style)
- /v1/completions: Text completion
- /v1/models: List available models
- /v1/models/{model}: Model information
"""

...


@PublicAPI(stability="alpha")
def build_vllm_deployment(llm_config: LLMConfig) -> Application:
"""Helper to build a single vllm deployment from the given llm config.
Args:
llm_config: The llm config to build vllm deployment.
Returns:
The configured Ray Serve Application for vllm deployment.
"""
from ray.llm._internal.serve import build_vllm_deployment

return build_vllm_deployment(llm_config=llm_config)


@PublicAPI(stability="alpha")
def build_openai_app(llm_serving_args: LLMServingArgs) -> Application:
"""Helper to build an OpenAI compatible app with the llm deployment setup from
the given llm serving args. This is the main entry point for users to create a
Serve application serving LLMs.
Args:
llm_serving_args: The list of llm configs or the paths to the llm config to
build the app.
Returns:
The configured Ray Serve Application router.
"""
from ray.llm._internal.serve import build_openai_app

return build_openai_app(llm_serving_args=llm_serving_args)
17 changes: 17 additions & 0 deletions python/ray/serve/tests/unit/test_llm_imports.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,17 @@
import sys
import pytest


def test_serve_llm_import_does_not_error():
import ray.serve.llm # noqa: F401
from ray.serve.llm import (
LLMConfig, # noqa: F401
VLLMDeployment, # noqa: F401
LLMModelRouterDeployment, # noqa: F401
build_vllm_deployment, # noqa: F401
build_openai_app, # noqa: F401
)


if __name__ == "__main__":
sys.exit(pytest.main(["-v", __file__]))

0 comments on commit 9e37334

Please sign in to comment.