From 56edd2755e342f78c7ea482ef43d4b53149398e5 Mon Sep 17 00:00:00 2001 From: Bill Nell Date: Mon, 10 Feb 2025 20:20:11 +0000 Subject: [PATCH] wip --- vllm/v1/core/scheduler.py | 4 +--- vllm/v1/engine/__init__.py | 1 - vllm/v1/engine/core.py | 1 - vllm/v1/engine/output_processor.py | 2 +- 4 files changed, 2 insertions(+), 6 deletions(-) diff --git a/vllm/v1/core/scheduler.py b/vllm/v1/core/scheduler.py index 2e87393517cdd..7ec59644fe346 100644 --- a/vllm/v1/core/scheduler.py +++ b/vllm/v1/core/scheduler.py @@ -446,7 +446,6 @@ def update_from_output( new_token_ids=[], new_logprobs = [], new_prompt_logprobs_tensors = [], - finished=[], finish_reason={}, scheduler_stats=None ) @@ -522,11 +521,10 @@ def update_from_output( new_ids = request.output_token_ids[-num_new_tokens:] output.new_token_ids += new_ids - # XXXXXXXXX list/None + # TODO: This is not right output.new_logprobs += new_logprobs output.new_prompt_logprobs_tensors += (prompt_logprobs_tensors if prompt_logprobs_tensors is not None else []) - output.finished.append(request.is_finished()) if request.get_finished_reason() is not None: output.finish_reason[req_id] = request.get_finished_reason() #print(f"req stop = {request.stop_reason}, {request.status}") diff --git a/vllm/v1/engine/__init__.py b/vllm/v1/engine/__init__.py index 6c4cc62f6aafc..9e6af34158b5c 100644 --- a/vllm/v1/engine/__init__.py +++ b/vllm/v1/engine/__init__.py @@ -96,7 +96,6 @@ class EngineCoreOutputs( # TODO: need offsets for logprobs? new_logprobs: List[Optional[LogprobsLists]] new_prompt_logprobs_tensors: List[Optional[LogprobsTensors]] - finished: List[bool] finish_reason: Dict[str, FinishReason] # Union[List, Dict]? scheduler_stats: SchedulerStats diff --git a/vllm/v1/engine/core.py b/vllm/v1/engine/core.py index 7e4f93e03da10..42ad11633984a 100644 --- a/vllm/v1/engine/core.py +++ b/vllm/v1/engine/core.py @@ -127,7 +127,6 @@ def step(self) -> EngineCoreOutputs: new_token_ids=[], new_logprobs = [], new_prompt_logprobs_tensors = [], - finished=[], finish_reason={}, scheduler_stats=self.scheduler.make_stats() ) diff --git a/vllm/v1/engine/output_processor.py b/vllm/v1/engine/output_processor.py index e1fe72785c585..0b81459d60ea8 100644 --- a/vllm/v1/engine/output_processor.py +++ b/vllm/v1/engine/output_processor.py @@ -210,7 +210,7 @@ def process_outputs( # Free completed requests. if request_output.finished: self.request_states.pop(req_id) - if not engine_core_outputs.finished[i]: + if finish_reason is not None: # If req not finished in EngineCore, but Detokenizer # detected stop string, abort needed in EngineCore. reqs_to_abort.append(req_id)