From 8b23565dcc9606e63a859a96afc81201b0135929 Mon Sep 17 00:00:00 2001 From: Allen Wang Date: Wed, 9 Oct 2024 18:25:09 +0000 Subject: [PATCH 1/2] Sets for vLLM TPU --- vllm/worker/tpu_model_runner.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/vllm/worker/tpu_model_runner.py b/vllm/worker/tpu_model_runner.py index 12e4215038d74..ca2c84081e36a 100644 --- a/vllm/worker/tpu_model_runner.py +++ b/vllm/worker/tpu_model_runner.py @@ -543,7 +543,8 @@ def execute_model( seq_group_metadata_list=ctx.seq_group_metadata_list, scheduler_outputs=ctx.scheduler_outputs, is_async=False, - is_last_step=False) + is_last_step=False, + is_first_step_output=i==0) model_input.async_callback() if use_async_out_proc: return [sampler_outputs[-1]] From 557a91c3c65871f27c26d618ae6da68573b6de92 Mon Sep 17 00:00:00 2001 From: Allen Wang Date: Wed, 9 Oct 2024 18:48:48 +0000 Subject: [PATCH 2/2] run formatter --- vllm/worker/tpu_model_runner.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/vllm/worker/tpu_model_runner.py b/vllm/worker/tpu_model_runner.py index ca2c84081e36a..dd122f9f1272b 100644 --- a/vllm/worker/tpu_model_runner.py +++ b/vllm/worker/tpu_model_runner.py @@ -544,7 +544,7 @@ def execute_model( scheduler_outputs=ctx.scheduler_outputs, is_async=False, is_last_step=False, - is_first_step_output=i==0) + is_first_step_output=i == 0) model_input.async_callback() if use_async_out_proc: return [sampler_outputs[-1]]