From 85a658a799cc88907711b095ae3d440ea33cff07 Mon Sep 17 00:00:00 2001 From: kthui <18255193+kthui@users.noreply.github.com> Date: Thu, 29 Feb 2024 14:12:57 -0800 Subject: [PATCH] Add test for max queue delay timeout prompt response --- qa/L0_batcher/queue_timeout_test.py | 88 +++++++++++++++++++++++++++++ qa/L0_batcher/test.sh | 40 ++++++++++++- 2 files changed, 127 insertions(+), 1 deletion(-) create mode 100755 qa/L0_batcher/queue_timeout_test.py diff --git a/qa/L0_batcher/queue_timeout_test.py b/qa/L0_batcher/queue_timeout_test.py new file mode 100755 index 00000000000..cbe9fc2ca63 --- /dev/null +++ b/qa/L0_batcher/queue_timeout_test.py @@ -0,0 +1,88 @@ +#!/usr/bin/env python3 + +# Copyright 2024, NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions +# are met: +# * Redistributions of source code must retain the above copyright +# notice, this list of conditions and the following disclaimer. +# * Redistributions in binary form must reproduce the above copyright +# notice, this list of conditions and the following disclaimer in the +# documentation and/or other materials provided with the distribution. +# * Neither the name of NVIDIA CORPORATION nor the names of its +# contributors may be used to endorse or promote products derived +# from this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY +# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +# PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR +# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY +# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +import concurrent.futures +import time +import unittest + +import numpy as np +import tritonclient.grpc as grpcclient +from tritonclient.utils import InferenceServerException + + +class TestMaxQueueDelayTimeout(unittest.TestCase): + def setUp(self): + # Initialize client + self._triton = grpcclient.InferenceServerClient("localhost:8001") + + def _get_inputs(self, batch_size): + self.assertIsInstance(batch_size, int) + self.assertGreater(batch_size, 0) + shape = [batch_size, 8] + inputs = [grpcclient.InferInput("INPUT0", shape, "FP32")] + inputs[0].set_data_from_numpy(np.ones(shape, dtype=np.float32)) + return inputs + + def _generate_callback_and_response_pair(self): + response = {"responded": False, "result": None, "error": None} + + def callback(result, error): + response["responded"] = True + response["result"] = result + response["error"] = error + + return callback, response + + # Test queued requests on dynamic batch scheduler can be cancelled + def test_default_queue_policy_timeout_prompt_response(self): + model_name = "dynamic_batch" + with concurrent.futures.ThreadPoolExecutor() as pool: + # Saturate the slots on the model + saturate_thread = pool.submit( + self._triton.infer, model_name, self._get_inputs(batch_size=1) + ) + time.sleep(2) # ensure the slots are filled + # The next request should be queued + callback, response = self._generate_callback_and_response_pair() + queue_future = self._triton.async_infer( + model_name, self._get_inputs(batch_size=1), callback + ) + time.sleep(2) # ensure the request is queued + # Check if the request has timed-out + time.sleep(2) # ensure the timeout period has expired + self.assertTrue(response["responded"]) + self.assertEqual(response["result"], None) + self.assertIsInstance(response["error"], InferenceServerException) + self.assertEqual(response["error"].status(), "StatusCode.UNAVAILABLE") + self.assertEqual(response["error"].message(), "Request timeout expired") + # Join saturating thread + saturate_thread.result() + + +if __name__ == "__main__": + unittest.main() diff --git a/qa/L0_batcher/test.sh b/qa/L0_batcher/test.sh index c5f8819276e..dae674552d4 100755 --- a/qa/L0_batcher/test.sh +++ b/qa/L0_batcher/test.sh @@ -1,5 +1,5 @@ #!/bin/bash -# Copyright 2018-2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# Copyright 2018-2024, NVIDIA CORPORATION & AFFILIATES. All rights reserved. # # Redistribution and use in source and binary forms, with or without # modification, are permitted provided that the following conditions @@ -736,6 +736,44 @@ if [[ "$(< /proc/sys/kernel/osrelease)" != *microsoft* ]]; then unset TRITONSERVER_DELAY_SCHEDULER fi +# Test requests should be returned immediately upon timeout, without waiting for +# the next slot to be available and then returned. +rm -rf models && mkdir models +mkdir -p models/dynamic_batch/1 && (cd models/dynamic_batch && \ + echo 'backend: "identity"' >> config.pbtxt && \ + echo 'max_batch_size: 1' >> config.pbtxt && \ + echo -e 'input [{ name: "INPUT0" \n data_type: TYPE_FP32 \n dims: [ -1 ] }]' >> config.pbtxt && \ + echo -e 'output [{ name: "OUTPUT0" \n data_type: TYPE_FP32 \n dims: [ -1 ] }]' >> config.pbtxt && \ + echo -e 'instance_group [{ count: 1 \n kind: KIND_CPU }]' >> config.pbtxt && \ + echo -e 'dynamic_batching {' >> config.pbtxt && \ + echo -e ' preferred_batch_size: [ 1 ]' >> config.pbtxt && \ + echo -e ' default_queue_policy { timeout_action: REJECT \n default_timeout_microseconds: 1000000 \n max_queue_size: 8 }' >> config.pbtxt && \ + echo -e '}' >> config.pbtxt && \ + echo -e 'parameters [{ key: "execute_delay_ms" \n value: { string_value: "8000" } }]' >> config.pbtxt) + +TEST_LOG="queue_timeout_test.log" +SERVER_LOG="./queue_timeout_test.server.log" + +SERVER_ARGS="--model-repository=`pwd`/models --log-verbose=2" +run_server +if [ "$SERVER_PID" == "0" ]; then + echo -e "\n***\n*** Failed to start $SERVER\n***" + cat $SERVER_LOG + exit 1 +fi + +set +e +python queue_timeout_test.py > $TEST_LOG 2>&1 +if [ $? -ne 0 ]; then + echo -e "\n***\n*** Scheduler Tests Failed\n***" + cat $TEST_LOG + RET=1 +fi +set -e + +kill $SERVER_PID +wait $SERVER_PID + if [ $RET -eq 0 ]; then echo -e "\n***\n*** Test Passed\n***" else