From 8523f400b2c508ee2c9a182619151c744f73073d Mon Sep 17 00:00:00 2001
From: Roger Wang <ywang@roblox.com>
Date: Fri, 24 May 2024 16:26:19 -0700
Subject: [PATCH 1/2] add test run

---
 benchmarks/benchmark_serving.py | 31 ++++++++++++++++++++++++++++---
 1 file changed, 28 insertions(+), 3 deletions(-)

diff --git a/benchmarks/benchmark_serving.py b/benchmarks/benchmark_serving.py
index 9c3fed4817de2..5890748e83464 100644
--- a/benchmarks/benchmark_serving.py
+++ b/benchmarks/benchmark_serving.py
@@ -215,6 +215,11 @@ def calculate_metrics(
         else:
             actual_output_lens.append(0)
 
+    if completed == 0:
+        warnings.warn(
+            "All requests failed. This is likely due to a misconfiguration "
+            "on the benchmark arguments.",
+            stacklevel=2)
     metrics = BenchmarkMetrics(
         completed=completed,
         total_input=total_input,
@@ -226,9 +231,9 @@ def calculate_metrics(
         1000,  # ttfts is empty if streaming is not supported by backend
         median_ttft_ms=np.median(ttfts or 0) * 1000,
         p99_ttft_ms=np.percentile(ttfts or 0, 99) * 1000,
-        mean_tpot_ms=np.mean(tpots) * 1000,
-        median_tpot_ms=np.median(tpots) * 1000,
-        p99_tpot_ms=np.percentile(tpots, 99) * 1000,
+        mean_tpot_ms=np.mean(tpots or 0) * 1000,
+        median_tpot_ms=np.median(tpots or 0) * 1000,
+        p99_tpot_ms=np.percentile(tpots or 0, 99) * 1000,
     )
 
     return metrics, actual_output_lens
@@ -250,6 +255,26 @@ async def benchmark(
     else:
         raise ValueError(f"Unknown backend: {backend}")
 
+    print("{s:{c}^{n}}".format(s=' Serving Benchmark ', n=50, c='-'))
+    print("Starting initial single prompt test run...")
+    test_prompt, test_prompt_len, test_output_len = input_requests[0]
+    test_request_func_input = RequestFuncInput(
+        model=model_id,
+        prompt=test_prompt,
+        api_url=api_url,
+        prompt_len=test_prompt_len,
+        output_len=test_output_len,
+        best_of=best_of,
+        use_beam_search=use_beam_search,
+    )
+    test_output = await request_func(request_func_input=test_request_func_input
+                                     )
+    if not test_output.success:
+        raise ValueError(
+            "Initial test run failed - Please make sure benchmark arguments "
+            f"are correctly specified. Error: {test_output.error}")
+    else:
+        print("Initial test run completed. Starting main benchmark run...")
     print(f"Traffic request rate: {request_rate}")
 
     pbar = None if disable_tqdm else tqdm(total=len(input_requests))

From ca19c7e82416797791664ef484b26f7c4ef497d2 Mon Sep 17 00:00:00 2001
From: Roger Wang <ywang@roblox.com>
Date: Fri, 24 May 2024 16:56:20 -0700
Subject: [PATCH 2/2] iterate

---
 benchmarks/backend_request_func.py | 6 ++++++
 benchmarks/benchmark_serving.py    | 6 ++----
 2 files changed, 8 insertions(+), 4 deletions(-)

diff --git a/benchmarks/backend_request_func.py b/benchmarks/backend_request_func.py
index f9d167590fe47..58dcc6167efa6 100644
--- a/benchmarks/backend_request_func.py
+++ b/benchmarks/backend_request_func.py
@@ -89,6 +89,9 @@ async def async_request_tgi(
                     output.latency = most_recent_timestamp - st
                     output.success = True
                     output.generated_text = data["generated_text"]
+                else:
+                    output.error = response.reason or ""
+                    output.success = False
         except Exception:
             output.success = False
             exc_info = sys.exc_info()
@@ -276,6 +279,9 @@ async def async_request_openai_completions(
                     output.generated_text = generated_text
                     output.success = True
                     output.latency = latency
+                else:
+                    output.error = response.reason or ""
+                    output.success = False
         except Exception:
             output.success = False
             exc_info = sys.exc_info()
diff --git a/benchmarks/benchmark_serving.py b/benchmarks/benchmark_serving.py
index 5890748e83464..f3d71de775f82 100644
--- a/benchmarks/benchmark_serving.py
+++ b/benchmarks/benchmark_serving.py
@@ -255,10 +255,9 @@ async def benchmark(
     else:
         raise ValueError(f"Unknown backend: {backend}")
 
-    print("{s:{c}^{n}}".format(s=' Serving Benchmark ', n=50, c='-'))
     print("Starting initial single prompt test run...")
     test_prompt, test_prompt_len, test_output_len = input_requests[0]
-    test_request_func_input = RequestFuncInput(
+    test_input = RequestFuncInput(
         model=model_id,
         prompt=test_prompt,
         api_url=api_url,
@@ -267,8 +266,7 @@ async def benchmark(
         best_of=best_of,
         use_beam_search=use_beam_search,
     )
-    test_output = await request_func(request_func_input=test_request_func_input
-                                     )
+    test_output = await request_func(request_func_input=test_input)
     if not test_output.success:
         raise ValueError(
             "Initial test run failed - Please make sure benchmark arguments "