From 5f46b90c63b6d6db8d08a79fc7a67d6dda5128dc Mon Sep 17 00:00:00 2001 From: Cade Daniel Date: Tue, 30 Jul 2024 17:53:21 -0700 Subject: [PATCH] [Speculative decoding] Add serving benchmark for llama3 70b + speculative decoding (#6964) Signed-off-by: Alvant --- .../tests/serving-tests.json | 23 ++++++++++++++++++- 1 file changed, 22 insertions(+), 1 deletion(-) diff --git a/.buildkite/nightly-benchmarks/tests/serving-tests.json b/.buildkite/nightly-benchmarks/tests/serving-tests.json index 86a0fefa339f7..300af0524d7c0 100644 --- a/.buildkite/nightly-benchmarks/tests/serving-tests.json +++ b/.buildkite/nightly-benchmarks/tests/serving-tests.json @@ -55,5 +55,26 @@ "dataset_path": "./ShareGPT_V3_unfiltered_cleaned_split.json", "num_prompts": 200 } + }, + { + "test_name": "serving_llama70B_tp4_sharegpt_specdecode", + "qps_list": [2], + "server_parameters": { + "model": "meta-llama/Meta-Llama-3-70B-Instruct", + "disable_log_requests": "", + "tensor_parallel_size": 4, + "swap_space": 16, + "speculative_model": "turboderp/Qwama-0.5B-Instruct", + "num_speculative_tokens": 4, + "speculative_draft_tensor_parallel_size": 1, + "use_v2_block_manager": "" + }, + "client_parameters": { + "model": "meta-llama/Meta-Llama-3-70B-Instruct", + "backend": "vllm", + "dataset_name": "sharegpt", + "dataset_path": "./ShareGPT_V3_unfiltered_cleaned_split.json", + "num_prompts": 200 + } } -] \ No newline at end of file +]