Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[V1][Metrics] Add initial Prometheus logger #12416

Merged
merged 3 commits into from
Jan 27, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
41 changes: 35 additions & 6 deletions tests/entrypoints/openai/test_metrics.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,24 @@
MODEL_NAME = "TinyLlama/TinyLlama-1.1B-Chat-v1.0"


@pytest.fixture(scope="module", params=[True, False])
def use_v1(request):
# Module-scoped variant of run_with_both_engines
#
# Use this fixture to run a test with both v0 and v1, and
# also to conditionalize the test logic e.g.
#
# def test_metrics_exist(use_v1, server, client):
# ...
# expected = EXPECTED_V1_METRICS if use_v1 else EXPECTED_METRICS
# for metric in expected:
# assert metric in response.text
#
# @skip_v1 wouldn't work here because this is a module-level
# fixture - per-function decorators would have no effect
yield request.param


@pytest.fixture(scope="module")
def default_server_args():
return [
Expand All @@ -36,10 +54,12 @@ def default_server_args():
"--enable-chunked-prefill",
"--disable-frontend-multiprocessing",
])
def server(default_server_args, request):
def server(use_v1, default_server_args, request):
if request.param:
default_server_args.append(request.param)
with RemoteOpenAIServer(MODEL_NAME, default_server_args) as remote_server:
env_dict = dict(VLLM_USE_V1='1' if use_v1 else '0')
with RemoteOpenAIServer(MODEL_NAME, default_server_args,
env_dict=env_dict) as remote_server:
yield remote_server


Expand Down Expand Up @@ -84,7 +104,9 @@ async def client(server):

@pytest.mark.asyncio
async def test_metrics_counts(server: RemoteOpenAIServer,
client: openai.AsyncClient):
client: openai.AsyncClient, use_v1: bool):
if use_v1:
pytest.skip("Skipping test on vllm V1")
for _ in range(_NUM_REQUESTS):
# sending a request triggers the metrics to be logged.
await client.completions.create(
Expand Down Expand Up @@ -174,10 +196,15 @@ async def test_metrics_counts(server: RemoteOpenAIServer,
"swap_space_bytes",
]

EXPECTED_METRICS_V1 = [
"vllm:num_requests_running",
"vllm:num_requests_waiting",
]


@pytest.mark.asyncio
async def test_metrics_exist(server: RemoteOpenAIServer,
client: openai.AsyncClient):
client: openai.AsyncClient, use_v1: bool):
# sending a request triggers the metrics to be logged.
await client.completions.create(model=MODEL_NAME,
prompt="Hello, my name is",
Expand All @@ -187,11 +214,13 @@ async def test_metrics_exist(server: RemoteOpenAIServer,
response = requests.get(server.url_for("metrics"))
assert response.status_code == HTTPStatus.OK

for metric in EXPECTED_METRICS:
for metric in (EXPECTED_METRICS_V1 if use_v1 else EXPECTED_METRICS):
assert metric in response.text


def test_metrics_exist_run_batch():
def test_metrics_exist_run_batch(use_v1: bool):
if use_v1:
pytest.skip("Skipping test on vllm V1")
input_batch = """{"custom_id": "request-0", "method": "POST", "url": "/v1/embeddings", "body": {"model": "intfloat/e5-mistral-7b-instruct", "input": "You are a helpful assistant."}}""" # noqa: E501

base_url = "0.0.0.0"
Expand Down
11 changes: 7 additions & 4 deletions vllm/v1/engine/async_llm.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,8 @@
from vllm.v1.engine.output_processor import OutputProcessor
from vllm.v1.engine.processor import Processor
from vllm.v1.executor.abstract import Executor
from vllm.v1.metrics.loggers import LoggingStatLogger, StatLoggerBase
from vllm.v1.metrics.loggers import (LoggingStatLogger, PrometheusStatLogger,
StatLoggerBase)
from vllm.v1.metrics.stats import IterationStats, SchedulerStats

logger = init_logger(__name__)
Expand All @@ -46,13 +47,15 @@ def __init__(

assert start_engine_loop

self.model_config = vllm_config.model_config

self.log_requests = log_requests
self.log_stats = log_stats
self.stat_loggers: List[StatLoggerBase] = [
LoggingStatLogger(),
# TODO(rob): PrometheusStatLogger(),
PrometheusStatLogger(labels=dict(
model_name=self.model_config.served_model_name)),
]
self.model_config = vllm_config.model_config

# Tokenizer (+ ensure liveness if running in another process).
self.tokenizer = init_tokenizer_from_configs(
Expand Down Expand Up @@ -272,7 +275,7 @@ async def _run_output_handler(self):

# 4) Logging.
# TODO(rob): make into a coroutine and launch it in
# background thread once we add Prometheus.
# background thread once Prometheus overhead is non-trivial.
assert iteration_stats is not None
self._log_stats(
scheduler_stats=outputs.scheduler_stats,
Expand Down
36 changes: 36 additions & 0 deletions vllm/v1/metrics/loggers.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,8 @@
import time
from abc import ABC, abstractmethod
from typing import Dict

import prometheus_client

from vllm.logger import init_logger
from vllm.v1.metrics.stats import SchedulerStats
Expand Down Expand Up @@ -36,3 +39,36 @@ def log(self, scheduler_stats: SchedulerStats):
scheduler_stats.num_running_reqs,
scheduler_stats.num_waiting_reqs,
)


class PrometheusStatLogger(StatLoggerBase):

def __init__(self, labels: Dict[str, str]):
self.labels = labels

labelnames = self.labels.keys()
labelvalues = self.labels.values()

self._unregister_vllm_metrics()

self.gauge_scheduler_running = prometheus_client.Gauge(
name="vllm:num_requests_running",
documentation="Number of requests in model execution batches.",
labelnames=labelnames).labels(*labelvalues)

self.gauge_scheduler_waiting = prometheus_client.Gauge(
name="vllm:num_requests_waiting",
documentation="Number of requests waiting to be processed.",
labelnames=labelnames).labels(*labelvalues)

def log(self, scheduler_stats: SchedulerStats):
"""Log to prometheus."""
self.gauge_scheduler_running.set(scheduler_stats.num_running_reqs)
self.gauge_scheduler_waiting.set(scheduler_stats.num_waiting_reqs)

@staticmethod
def _unregister_vllm_metrics():
# Unregister any existing vLLM collectors (for CI/CD
for collector in list(prometheus_client.REGISTRY._collector_to_names):
if hasattr(collector, "_name") and "vllm" in collector._name:
prometheus_client.REGISTRY.unregister(collector)
Loading