From 8523f400b2c508ee2c9a182619151c744f73073d Mon Sep 17 00:00:00 2001 From: Roger Wang Date: Fri, 24 May 2024 16:26:19 -0700 Subject: [PATCH 1/2] add test run --- benchmarks/benchmark_serving.py | 31 ++++++++++++++++++++++++++++--- 1 file changed, 28 insertions(+), 3 deletions(-) diff --git a/benchmarks/benchmark_serving.py b/benchmarks/benchmark_serving.py index 9c3fed4817de2..5890748e83464 100644 --- a/benchmarks/benchmark_serving.py +++ b/benchmarks/benchmark_serving.py @@ -215,6 +215,11 @@ def calculate_metrics( else: actual_output_lens.append(0) + if completed == 0: + warnings.warn( + "All requests failed. This is likely due to a misconfiguration " + "on the benchmark arguments.", + stacklevel=2) metrics = BenchmarkMetrics( completed=completed, total_input=total_input, @@ -226,9 +231,9 @@ def calculate_metrics( 1000, # ttfts is empty if streaming is not supported by backend median_ttft_ms=np.median(ttfts or 0) * 1000, p99_ttft_ms=np.percentile(ttfts or 0, 99) * 1000, - mean_tpot_ms=np.mean(tpots) * 1000, - median_tpot_ms=np.median(tpots) * 1000, - p99_tpot_ms=np.percentile(tpots, 99) * 1000, + mean_tpot_ms=np.mean(tpots or 0) * 1000, + median_tpot_ms=np.median(tpots or 0) * 1000, + p99_tpot_ms=np.percentile(tpots or 0, 99) * 1000, ) return metrics, actual_output_lens @@ -250,6 +255,26 @@ async def benchmark( else: raise ValueError(f"Unknown backend: {backend}") + print("{s:{c}^{n}}".format(s=' Serving Benchmark ', n=50, c='-')) + print("Starting initial single prompt test run...") + test_prompt, test_prompt_len, test_output_len = input_requests[0] + test_request_func_input = RequestFuncInput( + model=model_id, + prompt=test_prompt, + api_url=api_url, + prompt_len=test_prompt_len, + output_len=test_output_len, + best_of=best_of, + use_beam_search=use_beam_search, + ) + test_output = await request_func(request_func_input=test_request_func_input + ) + if not test_output.success: + raise ValueError( + "Initial test run failed - Please make sure benchmark arguments " + f"are correctly specified. Error: {test_output.error}") + else: + print("Initial test run completed. Starting main benchmark run...") print(f"Traffic request rate: {request_rate}") pbar = None if disable_tqdm else tqdm(total=len(input_requests)) From ca19c7e82416797791664ef484b26f7c4ef497d2 Mon Sep 17 00:00:00 2001 From: Roger Wang Date: Fri, 24 May 2024 16:56:20 -0700 Subject: [PATCH 2/2] iterate --- benchmarks/backend_request_func.py | 6 ++++++ benchmarks/benchmark_serving.py | 6 ++---- 2 files changed, 8 insertions(+), 4 deletions(-) diff --git a/benchmarks/backend_request_func.py b/benchmarks/backend_request_func.py index f9d167590fe47..58dcc6167efa6 100644 --- a/benchmarks/backend_request_func.py +++ b/benchmarks/backend_request_func.py @@ -89,6 +89,9 @@ async def async_request_tgi( output.latency = most_recent_timestamp - st output.success = True output.generated_text = data["generated_text"] + else: + output.error = response.reason or "" + output.success = False except Exception: output.success = False exc_info = sys.exc_info() @@ -276,6 +279,9 @@ async def async_request_openai_completions( output.generated_text = generated_text output.success = True output.latency = latency + else: + output.error = response.reason or "" + output.success = False except Exception: output.success = False exc_info = sys.exc_info() diff --git a/benchmarks/benchmark_serving.py b/benchmarks/benchmark_serving.py index 5890748e83464..f3d71de775f82 100644 --- a/benchmarks/benchmark_serving.py +++ b/benchmarks/benchmark_serving.py @@ -255,10 +255,9 @@ async def benchmark( else: raise ValueError(f"Unknown backend: {backend}") - print("{s:{c}^{n}}".format(s=' Serving Benchmark ', n=50, c='-')) print("Starting initial single prompt test run...") test_prompt, test_prompt_len, test_output_len = input_requests[0] - test_request_func_input = RequestFuncInput( + test_input = RequestFuncInput( model=model_id, prompt=test_prompt, api_url=api_url, @@ -267,8 +266,7 @@ async def benchmark( best_of=best_of, use_beam_search=use_beam_search, ) - test_output = await request_func(request_func_input=test_request_func_input - ) + test_output = await request_func(request_func_input=test_input) if not test_output.success: raise ValueError( "Initial test run failed - Please make sure benchmark arguments "