diff --git a/src/vllm_tgis_adapter/grpc/grpc_server.py b/src/vllm_tgis_adapter/grpc/grpc_server.py index efbb213..302c6de 100644 --- a/src/vllm_tgis_adapter/grpc/grpc_server.py +++ b/src/vllm_tgis_adapter/grpc/grpc_server.py @@ -191,10 +191,10 @@ async def post_init(self) -> None: assert self.tokenizer is not None # Swap in the special TGIS stats logger - assert hasattr(self.engine.engine, "stat_logger") - assert self.engine.engine.stat_logger + assert hasattr(self.engine.engine, "stat_loggers") + assert self.engine.engine.stat_loggers - vllm_stat_logger = self.engine.engine.stat_logger + vllm_stat_logger = self.engine.engine.stat_loggers["prometheus"] tgis_stats_logger = TGISStatLogger( vllm_stat_logger=vllm_stat_logger, max_sequence_len=self.config.max_model_len, diff --git a/src/vllm_tgis_adapter/tgis_utils/metrics.py b/src/vllm_tgis_adapter/tgis_utils/metrics.py index ccaa6f2..665533f 100644 --- a/src/vllm_tgis_adapter/tgis_utils/metrics.py +++ b/src/vllm_tgis_adapter/tgis_utils/metrics.py @@ -5,7 +5,7 @@ from prometheus_client import Counter, Gauge, Histogram from vllm import RequestOutput -from vllm.engine.metrics import StatLogger, Stats +from vllm.engine.metrics import StatLoggerBase, Stats from vllm_tgis_adapter.grpc.pb.generation_pb2 import ( BatchedTokenizeRequest, @@ -102,10 +102,10 @@ def observe_generation_success(self, start_time: float) -> None: self.tgi_request_duration.observe(duration) -class TGISStatLogger(StatLogger): - """Wraps the vLLM StatLogger to report TGIS metric names for compatibility.""" +class TGISStatLogger(StatLoggerBase): + """Wraps the vLLM StatLoggerBase to report TGIS metric names for compatibility.""" - def __init__(self, vllm_stat_logger: StatLogger, max_sequence_len: int): + def __init__(self, vllm_stat_logger: StatLoggerBase, max_sequence_len: int): # Not calling super-init because we're wrapping and delegating to # vllm_stat_logger self._vllm_stat_logger = vllm_stat_logger