From 8b23565dcc9606e63a859a96afc81201b0135929 Mon Sep 17 00:00:00 2001
From: Allen Wang <allencwang@google.com>
Date: Wed, 9 Oct 2024 18:25:09 +0000
Subject: [PATCH 1/2] Sets  for vLLM TPU

---
 vllm/worker/tpu_model_runner.py | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/vllm/worker/tpu_model_runner.py b/vllm/worker/tpu_model_runner.py
index 12e4215038d74..ca2c84081e36a 100644
--- a/vllm/worker/tpu_model_runner.py
+++ b/vllm/worker/tpu_model_runner.py
@@ -543,7 +543,8 @@ def execute_model(
                         seq_group_metadata_list=ctx.seq_group_metadata_list,
                         scheduler_outputs=ctx.scheduler_outputs,
                         is_async=False,
-                        is_last_step=False)
+                        is_last_step=False,
+                        is_first_step_output=i==0)
                     model_input.async_callback()
             if use_async_out_proc:
                 return [sampler_outputs[-1]]

From 557a91c3c65871f27c26d618ae6da68573b6de92 Mon Sep 17 00:00:00 2001
From: Allen Wang <allencwang@google.com>
Date: Wed, 9 Oct 2024 18:48:48 +0000
Subject: [PATCH 2/2] run formatter

---
 vllm/worker/tpu_model_runner.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/vllm/worker/tpu_model_runner.py b/vllm/worker/tpu_model_runner.py
index ca2c84081e36a..dd122f9f1272b 100644
--- a/vllm/worker/tpu_model_runner.py
+++ b/vllm/worker/tpu_model_runner.py
@@ -544,7 +544,7 @@ def execute_model(
                         scheduler_outputs=ctx.scheduler_outputs,
                         is_async=False,
                         is_last_step=False,
-                        is_first_step_output=i==0)
+                        is_first_step_output=i == 0)
                     model_input.async_callback()
             if use_async_out_proc:
                 return [sampler_outputs[-1]]