From 56edd2755e342f78c7ea482ef43d4b53149398e5 Mon Sep 17 00:00:00 2001
From: Bill Nell <bill@neuralmagic.com>
Date: Mon, 10 Feb 2025 20:20:11 +0000
Subject: [PATCH] wip

---
 vllm/v1/core/scheduler.py          | 4 +---
 vllm/v1/engine/__init__.py         | 1 -
 vllm/v1/engine/core.py             | 1 -
 vllm/v1/engine/output_processor.py | 2 +-
 4 files changed, 2 insertions(+), 6 deletions(-)

diff --git a/vllm/v1/core/scheduler.py b/vllm/v1/core/scheduler.py
index 2e87393517cdd..7ec59644fe346 100644
--- a/vllm/v1/core/scheduler.py
+++ b/vllm/v1/core/scheduler.py
@@ -446,7 +446,6 @@ def update_from_output(
                                    new_token_ids=[],
                                    new_logprobs = [],
                                    new_prompt_logprobs_tensors = [],
-                                   finished=[],
                                    finish_reason={},
                                    scheduler_stats=None
                                    )
@@ -522,11 +521,10 @@ def update_from_output(
                 new_ids = request.output_token_ids[-num_new_tokens:]
                 output.new_token_ids += new_ids
 
-                # XXXXXXXXX list/None
+                # TODO: This is not right
                 output.new_logprobs += new_logprobs
                 output.new_prompt_logprobs_tensors += (prompt_logprobs_tensors if prompt_logprobs_tensors is not None else [])
 
-                output.finished.append(request.is_finished())
                 if request.get_finished_reason() is not None:
                     output.finish_reason[req_id] = request.get_finished_reason()
                 #print(f"req stop = {request.stop_reason}, {request.status}")
diff --git a/vllm/v1/engine/__init__.py b/vllm/v1/engine/__init__.py
index 6c4cc62f6aafc..9e6af34158b5c 100644
--- a/vllm/v1/engine/__init__.py
+++ b/vllm/v1/engine/__init__.py
@@ -96,7 +96,6 @@ class EngineCoreOutputs(
     # TODO: need offsets for logprobs?
     new_logprobs: List[Optional[LogprobsLists]]
     new_prompt_logprobs_tensors: List[Optional[LogprobsTensors]]
-    finished: List[bool]
     finish_reason: Dict[str, FinishReason]  # Union[List, Dict]?
     scheduler_stats: SchedulerStats
 
diff --git a/vllm/v1/engine/core.py b/vllm/v1/engine/core.py
index 7e4f93e03da10..42ad11633984a 100644
--- a/vllm/v1/engine/core.py
+++ b/vllm/v1/engine/core.py
@@ -127,7 +127,6 @@ def step(self) -> EngineCoreOutputs:
                 new_token_ids=[],
                 new_logprobs = [],
                 new_prompt_logprobs_tensors = [],
-                finished=[],
                 finish_reason={},
                 scheduler_stats=self.scheduler.make_stats()
             )
diff --git a/vllm/v1/engine/output_processor.py b/vllm/v1/engine/output_processor.py
index e1fe72785c585..0b81459d60ea8 100644
--- a/vllm/v1/engine/output_processor.py
+++ b/vllm/v1/engine/output_processor.py
@@ -210,7 +210,7 @@ def process_outputs(
                 # Free completed requests.
                 if request_output.finished:
                     self.request_states.pop(req_id)
-                    if not engine_core_outputs.finished[i]:
+                    if finish_reason is not None:
                         # If req not finished in EngineCore, but Detokenizer
                         # detected stop string, abort needed in EngineCore.
                         reqs_to_abort.append(req_id)