From 9f4b82661d1c9c94a6a1233754a6acf3c13b542e Mon Sep 17 00:00:00 2001 From: Bill Nell Date: Wed, 19 Feb 2025 01:53:57 +0000 Subject: [PATCH] fix merge Signed-off-by: Bill Nell --- vllm/v1/core/scheduler.py | 12 +++++------- 1 file changed, 5 insertions(+), 7 deletions(-) diff --git a/vllm/v1/core/scheduler.py b/vllm/v1/core/scheduler.py index 9ed304f6f7edd..90264566be40d 100644 --- a/vllm/v1/core/scheduler.py +++ b/vllm/v1/core/scheduler.py @@ -549,7 +549,7 @@ def update_from_output( stopped = False new_logprobs = None - new_token_ids = None + new_token_ids: List[int] = [] if request.num_computed_tokens >= request.num_tokens: for output_token_id in generated_token_ids: @@ -570,7 +570,6 @@ def update_from_output( # the outer lists can be of length > 1. new_logprobs = logprobs.slice(req_index, req_index + 1) - new_token_ids = request.output_token_ids[-num_new_tokens:] num_new_tokens = 1 else: num_new_tokens = 0 @@ -586,8 +585,7 @@ def update_from_output( output.new_token_id_offsets = [1] * i output.new_token_id_offsets.append(offset) - new_ids = request.output_token_ids[-num_new_tokens:] - output.new_token_ids += new_ids + output.new_token_ids += new_token_ids if new_logprobs is not None: output.new_logprobs[req_id] = new_logprobs @@ -609,9 +607,9 @@ def update_from_output( new_running.append(request) # Add sentinel to make output processing simpler. - num_new_tokens = len(output.new_token_ids) - if num_new_tokens > 0 and output.new_token_id_offsets is not None: - output.new_token_id_offsets.append(num_new_tokens) + total_new_tokens = len(output.new_token_ids) + if total_new_tokens > 0 and output.new_token_id_offsets is not None: + output.new_token_id_offsets.append(total_new_tokens) self.running = new_running output.new_prompt_logprobs_tensors = prompt_logprobs_dict