Skip to content

Commit

Permalink
💄CLI interface improvements (#18)
Browse files Browse the repository at this point in the history
# Summary

- The `click` CLI interface is tested with a bunch of unit tests
- The `main` function validation is added.
- `OpenAIBackend` initializer parameters are optimized
    - target, host, and port parameters usage is simplified
- `openai.NotFound` **_available models_** error is handled
- `SerializableFileType` renamed to `SerializableFileExtension`
- `SerializableFileExtension` now inherits `str` to simplify usage,
since this Enum class is mostly used to work with strings.
- `rate_type_to_load_gen_mode` renamed to
`RATE_TYPE_TO_LOAD_GEN_MODE_MAPPER`
- `rate_type_to_profile_mode` renamed to
`RATE_TYPE_TO_PROFILE_MODE_MAPPER`
- CLI parameters are renamed:
    - `--num-seconds` -> `--max-seconds`
    - `--num-requests` -> `--max-requests`
- `path` removed from CLI arguments since it is not used
- .env `GUIDELLM` prefix is fixed
- Unused comments, settings, and code are removed
- Logger default unit test uses the injected logging settings object
- Module `backend.openai` has `_base_url` renamed to the `base_url`
- In `OpenAIBackend.make_request`, the `GenerativeResponse` always
counts `output_tokens` with `self._token_count`
- `SerializableFileExtensions` is replaced with pure Python strings

---------

Co-authored-by: Dmytro Parfeniuk <[email protected]>
Co-authored-by: Mark Kurtz <[email protected]>
  • Loading branch information
3 people authored Aug 2, 2024
1 parent 996de81 commit 8494b7a
Show file tree
Hide file tree
Showing 25 changed files with 335 additions and 141 deletions.
5 changes: 5 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -164,3 +164,8 @@ cython_debug/

# MacOS files
.DS_Store


# Project specific files
*.json
*.yaml
31 changes: 31 additions & 0 deletions DEVELOPING.md
Original file line number Diff line number Diff line change
Expand Up @@ -228,6 +228,37 @@ The project is fully configurable with environment variables. With that configur
| `GUIDELLM__OPENAI__BASE_URL` | `http://localhost:8080` | The address to the **OpenAI-compatible** server.<br><br>OpenAI live base url is `https://api.openai.com/v1` |
| `GUIDELLM__OPENAI__API_KEY` | `invalid` | Corresponds to the **OpenAI-compatible** server API key.<br><br>If you look for the live key - check [this link](https://platform.openai.com/api-keys). |

<br>

## Project configuration

The project configuartion is powered by _[`🔗 pydantic-settings`](https://docs.pydantic.dev/latest/concepts/pydantic_settings/)_

The project configuration entrypoint is represented by lazy-loaded `settigns` singleton object ( `src/config/__init__` )

The project is fully configurable with environment variables. All the default values and

```py
class NestedIntoLogging(BaseModel):
nested: str = "default value"

class LoggingSettings(BaseModel):
# ...
disabled: bool = False


class Settings(BaseSettings):
"""The entrypoint to settings."""

# ...
logging: LoggingSettings = LoggingSettings()


settings = Settings()
```

With that configuration set you can load parameters to `LoggingSettings()` by using environment variables. Just run `export GUIDELLM__LOGGING__DISABLED=true` or `export GUIDELLM__LOGGING__NESTED=another_value` respectfully. The nesting delimiter is `__`

## Contact and Support

If you need help or have any questions, please open an issue on GitHub or contact us at [email protected].
7 changes: 5 additions & 2 deletions src/config/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,9 +20,12 @@ class OpenAISettings(BaseModel):
api_key: str = "invalid"

# OpenAI-compatible server URL
# NOTE: The default value is default address of llama.cpp web server
# NOTE: The default value is default address of llama.cpp http server
base_url: str = "http://localhost:8080"

# The max value of generated tokens
max_gen_tokens: int = 4096


class Settings(BaseSettings):
"""
Expand All @@ -39,7 +42,7 @@ class Settings(BaseSettings):
"""

model_config = SettingsConfigDict(
env_prefix="GUIDELLM",
env_prefix="GUIDELLM__",
env_nested_delimiter="__",
env_file=".env",
extra="ignore",
Expand Down
4 changes: 1 addition & 3 deletions src/guidellm/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,4 @@
evaluating and benchmarking large language models (LLMs).
"""

from .logger import configure_logger, logger

__all__ = ["logger", "configure_logger"]
from .logger import configure_logger, logger # noqa: F401
57 changes: 32 additions & 25 deletions src/guidellm/backend/openai.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
import functools
from typing import Any, Dict, Generator, List, Optional

import openai
from loguru import logger
from openai import OpenAI, Stream
from openai.types import Completion
Expand Down Expand Up @@ -37,9 +37,11 @@ class OpenAIBackend(Backend):
def __init__(
self,
openai_api_key: Optional[str] = None,
internal_callback_url: Optional[str] = None,
target: Optional[str] = None,
host: Optional[str] = None,
port: Optional[int] = None,
model: Optional[str] = None,
**request_args: Any,
**request_args,
):
"""
Initialize an OpenAI Client
Expand All @@ -54,19 +56,22 @@ def __init__(
"must be specify for the OpenAI backend"
)

if not (_base_url := (internal_callback_url or settings.openai.base_url)):
if target is not None:
base_url = target
elif host and port:
base_url = f"{host}:{port}"
elif settings.openai.base_url is not None:
base_url = settings.openai.base_url
else:
raise ValueError(
"`GUIDELLM__OPENAI__BASE_URL` environment variable "
"or --openai-base-url CLI parameter "
"must be specify for the OpenAI backend"
"or --target CLI parameter must be specify for the OpenAI backend."
)
self.openai_client = OpenAI(api_key=_api_key, base_url=_base_url)

self.openai_client = OpenAI(api_key=_api_key, base_url=base_url)
self.model = model or self.default_model

logger.info(
f"Initialized OpenAIBackend with callback url: {internal_callback_url} "
f"and model: {self.model}"
)
logger.info(f"OpenAI {self.model} Backend listening on {target}")

def make_request(
self, request: TextGenerationRequest
Expand All @@ -85,8 +90,11 @@ def make_request(
# How many completions to generate for each prompt
request_args: Dict = {"n": 1}

if (num_gen_tokens := request.params.get("generated_tokens", None)) is not None:
request_args.update(max_tokens=num_gen_tokens, stop=None)
num_gen_tokens: int = (
request.params.get("generated_tokens", None)
or settings.openai.max_gen_tokens
)
request_args.update({"max_tokens": num_gen_tokens, "stop": None})

if self.request_args:
request_args.update(self.request_args)
Expand All @@ -110,11 +118,7 @@ def make_request(
prompt_token_count=(
request.prompt_token_count or self._token_count(request.prompt)
),
output_token_count=(
num_gen_tokens
if num_gen_tokens
else self._token_count(chunk_content)
),
output_token_count=(self._token_count(chunk_content)),
)
else:
logger.debug("Received token from OpenAI backend")
Expand All @@ -128,15 +132,18 @@ def available_models(self) -> List[str]:
:rtype: List[str]
"""

models: List[str] = [
model.id for model in self.openai_client.models.list().data
]
logger.info(f"Available models: {models}")

return models
try:
models: List[str] = [
model.id for model in self.openai_client.models.list().data
]
except openai.NotFoundError as error:
logger.error("No available models for OpenAI Backend")
raise error
else:
logger.info(f"Available models: {models}")
return models

@property
@functools.lru_cache(maxsize=1)
def default_model(self) -> str:
"""
Get the default model for the backend.
Expand Down
3 changes: 1 addition & 2 deletions src/guidellm/core/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@
TextGenerationError,
TextGenerationResult,
)
from .serializable import Serializable, SerializableFileType
from .serializable import Serializable

__all__ = [
"Distribution",
Expand All @@ -19,6 +19,5 @@
"TextGenerationBenchmarkReport",
"RequestConcurrencyMeasurement",
"Serializable",
"SerializableFileType",
"GuidanceReport",
]
2 changes: 1 addition & 1 deletion src/guidellm/core/report.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,8 +2,8 @@

from pydantic import Field

from guidellm.core.serializable import Serializable
from guidellm.core.result import TextGenerationBenchmarkReport
from guidellm.core.serializable import Serializable

__all__ = [
"GuidanceReport",
Expand Down
76 changes: 26 additions & 50 deletions src/guidellm/core/serializable.py
Original file line number Diff line number Diff line change
@@ -1,24 +1,18 @@
from typing import Any, Optional

import os
from typing import Any, Literal, Tuple, Union

import yaml
from loguru import logger
from pydantic import BaseModel, ConfigDict
from enum import Enum

from guidellm.utils import is_file_name
from guidellm.utils import is_directory_name, is_file_name

__all__ = ["Serializable", "_Extension"]

__all__ = ["Serializable", "SerializableFileType"]

_Extension = Union[Literal["yaml"], Literal["json"]]

class SerializableFileType(Enum):
"""
Enum class for file types supported by Serializable.
"""

YAML = "yaml"
JSON = "json"
AVAILABLE_FILE_EXTENSIONS: Tuple[_Extension, ...] = ("yaml", "json")


class Serializable(BaseModel):
Expand Down Expand Up @@ -90,7 +84,7 @@ def from_json(cls, data: str):

return obj

def save_file(self, path: str, type_: Optional[SerializableFileType] = None) -> str:
def save_file(self, path: str, extension: _Extension = "yaml") -> str:
"""
Save the model to a file in either YAML or JSON format.
Expand All @@ -103,44 +97,28 @@ def save_file(self, path: str, type_: Optional[SerializableFileType] = None) ->
it will save in YAML format.
:return: The path to the saved file.
"""
logger.debug("Saving to file... {} with format: {}", path, type_)

if not is_file_name(path):
file_name = f"{self.__class__.__name__.lower()}"
if type_:
file_name += f".{type_.value.lower()}"
else:
file_name += ".yaml"
type_ = SerializableFileType.YAML
path = os.path.join(path, file_name)

if not type_:
extension = path.split(".")[-1].upper()

if extension not in SerializableFileType.__members__:
if is_file_name(path):
requested_extension = path.split(".")[-1].lower()
if requested_extension not in AVAILABLE_FILE_EXTENSIONS:
raise ValueError(
f"Unsupported file extension: {extension}. "
f"Expected one of {', '.join(SerializableFileType.__members__)}) "
f"for {path}"
f"Unsupported file extension: .{extension}. "
f"Expected one of {', '.join(AVAILABLE_FILE_EXTENSIONS)})."
)

type_ = SerializableFileType[extension]

if type_.name not in SerializableFileType.__members__:
raise ValueError(
f"Unsupported file format: {type_} "
f"(expected 'yaml' or 'json') for {path}"
)

os.makedirs(os.path.dirname(path), exist_ok=True)
elif is_directory_name(path):
file_name = f"{self.__class__.__name__.lower()}.{extension}"
path = os.path.join(path, file_name)
else:
raise ValueError("Output path must be a either directory or file path")

with open(path, "w") as file:
if type_ == SerializableFileType.YAML:
if extension == "yaml":
file.write(self.to_yaml())
elif type_ == SerializableFileType.JSON:
elif extension == "json":
file.write(self.to_json())
else:
raise ValueError(f"Unsupported file format: {type_}")
raise ValueError(f"Unsupported file format: {extension}")

logger.info("Successfully saved {} to {}", self.__class__.__name__, path)

Expand All @@ -161,25 +139,23 @@ def load_file(cls, path: str):
elif not os.path.isfile(path):
raise ValueError(f"Path is not a file: {path}")

extension = path.split(".")[-1].upper()
extension = path.split(".")[-1].lower()

if extension not in SerializableFileType.__members__:
if extension not in AVAILABLE_FILE_EXTENSIONS:
raise ValueError(
f"Unsupported file extension: {extension}. "
f"Expected one of {', '.join(SerializableFileType.__members__)}) "
f"Expected one of {AVAILABLE_FILE_EXTENSIONS}) "
f"for {path}"
)

type_ = SerializableFileType[extension]

with open(path, "r") as file:
data = file.read()

if type_ == SerializableFileType.YAML:
if extension == "yaml":
obj = cls.from_yaml(data)
elif type_ == SerializableFileType.JSON:
elif extension == "json":
obj = cls.from_json(data)
else:
raise ValueError(f"Unsupported file format: {type_}")
raise ValueError(f"Unsupported file format: {extension}")

return obj
8 changes: 4 additions & 4 deletions src/guidellm/executor/__init__.py
Original file line number Diff line number Diff line change
@@ -1,17 +1,17 @@
from .executor import Executor
from .profile_generator import (
RATE_TYPE_TO_LOAD_GEN_MODE_MAPPER,
RATE_TYPE_TO_PROFILE_MODE_MAPPER,
FixedRateProfileGenerator,
Profile,
ProfileGenerationMode,
ProfileGenerator,
SweepProfileGenerator,
rate_type_to_load_gen_mode,
rate_type_to_profile_mode,
)

__all__ = [
"rate_type_to_load_gen_mode",
"rate_type_to_profile_mode",
"RATE_TYPE_TO_LOAD_GEN_MODE_MAPPER",
"RATE_TYPE_TO_PROFILE_MODE_MAPPER",
"Executor",
"ProfileGenerationMode",
"Profile",
Expand Down
5 changes: 3 additions & 2 deletions src/guidellm/executor/profile_generator.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@
"SweepProfileGenerator",
]

rate_type_to_load_gen_mode = {
RATE_TYPE_TO_LOAD_GEN_MODE_MAPPER = {
"synchronous": LoadGenerationMode.SYNCHRONOUS,
"constant": LoadGenerationMode.CONSTANT,
"poisson": LoadGenerationMode.POISSON,
Expand All @@ -28,7 +28,7 @@ class ProfileGenerationMode(Enum):
SWEEP = "sweep"


rate_type_to_profile_mode = {
RATE_TYPE_TO_PROFILE_MODE_MAPPER = {
"synchronous": ProfileGenerationMode.FIXED_RATE,
"constant": ProfileGenerationMode.FIXED_RATE,
"poisson": ProfileGenerationMode.FIXED_RATE,
Expand Down Expand Up @@ -80,6 +80,7 @@ def __init__(
super().__init__(ProfileGenerationMode.FIXED_RATE)
if load_gen_mode == LoadGenerationMode.SYNCHRONOUS and rates and len(rates) > 0:
raise ValueError("custom rates are not supported in synchronous mode")

self._rates: Optional[List[float]] = rates
self._load_gen_mode = load_gen_mode
self._generated: bool = False
Expand Down
Loading

0 comments on commit 8494b7a

Please sign in to comment.