From 005c6502b9d9272a83d987a9917e2b21e3f2a6e9 Mon Sep 17 00:00:00 2001 From: Katherine Yang Date: Tue, 7 May 2024 17:46:37 -0700 Subject: [PATCH 1/7] add test for shape validation --- .../input_shape_validation_test.py | 147 ++++++++++++++++++ qa/L0_input_validation/test.sh | 11 +- 2 files changed, 157 insertions(+), 1 deletion(-) create mode 100755 qa/L0_input_validation/input_shape_validation_test.py diff --git a/qa/L0_input_validation/input_shape_validation_test.py b/qa/L0_input_validation/input_shape_validation_test.py new file mode 100755 index 0000000000..c780cea5d8 --- /dev/null +++ b/qa/L0_input_validation/input_shape_validation_test.py @@ -0,0 +1,147 @@ +#!/usr/bin/env python +# Copyright (c) 2024, NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions +# are met: +# * Redistributions of source code must retain the above copyright +# notice, this list of conditions and the following disclaimer. +# * Redistributions in binary form must reproduce the above copyright +# notice, this list of conditions and the following disclaimer in the +# documentation and/or other materials provided with the distribution. +# * Neither the name of NVIDIA CORPORATION nor the names of its +# contributors may be used to endorse or promote products derived +# from this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY +# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +# PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR +# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY +# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +import asyncio +from pathlib import Path +from subprocess import Popen +from tempfile import TemporaryDirectory +from typing import Optional + +import numpy as np +import pytest +import torch +from tritonclient.grpc.aio import InferenceServerClient, InferInput +from tritonclient.utils import np_to_triton_dtype + +GRPC_PORT = 9653 +FIXED_LAST_DIM = 8 + + +@pytest.fixture +def repo_dir(): + with TemporaryDirectory() as model_repo: + (Path(model_repo) / "pt_identity" / "1").mkdir(parents=True, exist_ok=True) + + torch.jit.save( + torch.jit.script(torch.nn.Identity()), + model_repo + "/pt_identity/1/model.pt", + ) + + pbtxt = f""" + name: "pt_identity" + backend: "pytorch" + max_batch_size: 8 + + input [ + {{ + name: "INPUT0" + data_type: TYPE_FP32 + dims: [ {FIXED_LAST_DIM} ] + }} + ] + output [ + {{ + name: "OUTPUT0" + data_type: TYPE_FP32 + dims: [ {FIXED_LAST_DIM} ] + }} + ] + # ensure we batch requests together + dynamic_batching {{ + max_queue_delay_microseconds: {int(5e6)} + }} + """ + with open(model_repo + "/pt_identity/config.pbtxt", "w") as f: + f.write(pbtxt) + + yield model_repo + + +async def poll_readiness(client: InferenceServerClient, server_proc): + while True: + if server_proc is not None and (ret_code := server_proc.poll()) is not None: + _, stderr = server_proc.communicate() + print(stderr) + raise Exception(f"Tritonserver died with return code {ret_code}") + try: + if await client.is_server_ready(): + break + except: # noqa: E722 + pass + await asyncio.sleep(0.5) + + +@pytest.mark.asyncio +async def test_shape_overlapped(repo_dir: str): + with Popen( + [ + "/opt/tritonserver/bin/tritonserver", + "--model-repository", + repo_dir, + "--grpc-port", + str(GRPC_PORT), + ] + ) as server: + await poll_readiness( + InferenceServerClient("localhost:" + str(GRPC_PORT)), server + ) + + alice = InferenceServerClient("localhost:" + str(GRPC_PORT)) + bob = InferenceServerClient("localhost:" + str(GRPC_PORT)) + + input_data_1 = np.arange(FIXED_LAST_DIM + 2)[None].astype(np.float32) + print(f"{input_data_1=}") + inputs_1 = [ + InferInput( + "INPUT0", input_data_1.shape, np_to_triton_dtype(input_data_1.dtype) + ), + ] + inputs_1[0].set_data_from_numpy(input_data_1) + # Compromised input shape + inputs_1[0].set_shape((1, FIXED_LAST_DIM)) + + input_data_2 = 100 + np.arange(FIXED_LAST_DIM)[None].astype(np.float32) + print(f"{input_data_2=}") + inputs_2 = [ + InferInput( + "INPUT0", + shape=input_data_2.shape, + datatype=np_to_triton_dtype(input_data_2.dtype), + ) + ] + inputs_2[0].set_data_from_numpy(input_data_2) + + t1 = asyncio.create_task(alice.infer("pt_identity", inputs_1)) + t2 = asyncio.create_task(bob.infer("pt_identity", inputs_2)) + + alice_result, bob_result = await asyncio.gather(t1, t2) + print(f"{alice_result.as_numpy('OUTPUT0')=}") + print(f"{bob_result.as_numpy('OUTPUT0')=}") + server.terminate() + assert np.allclose( + bob_result.as_numpy("OUTPUT0"), input_data_2 + ), "Bob's result should be the same as input" diff --git a/qa/L0_input_validation/test.sh b/qa/L0_input_validation/test.sh index 1c66c2bbaa..9e4021580f 100755 --- a/qa/L0_input_validation/test.sh +++ b/qa/L0_input_validation/test.sh @@ -44,6 +44,7 @@ RET=0 CLIENT_LOG="./input_validation_client.log" TEST_PY=./input_validation_test.py +SHAPE_TEST_PY=./input_shape_validation_test.py TEST_RESULT_FILE='./test_results.txt' export CUDA_VISIBLE_DEVICES=0 @@ -64,7 +65,7 @@ set +e python3 -m pytest --junitxml="input_validation.report.xml" $TEST_PY >> $CLIENT_LOG 2>&1 if [ $? -ne 0 ]; then - echo -e "\n***\n*** python_unittest.py FAILED. \n***" + echo -e "\n***\n*** input_validation_test.py FAILED. \n***" RET=1 fi set -e @@ -72,6 +73,14 @@ set -e kill $SERVER_PID wait $SERVER_PID +pip install torch +python3 -m pytest $SHAPE_TEST_PY >> $CLIENT_LOG 2>&1 +if [ $? -ne 0 ]; then + echo -e "\n***\n*** input_shape_validation_test.py FAILED. \n***" + RET=1 + +fi + if [ $RET -eq 0 ]; then echo -e "\n***\n*** Input Validation Test Passed\n***" else From c820036abf67ec2dbaa303e027b3d7351c11d317 Mon Sep 17 00:00:00 2001 From: Katherine Yang Date: Wed, 8 May 2024 18:43:08 -0700 Subject: [PATCH 2/7] updated test to pass when changes exist --- .../input_shape_validation_test.py | 35 ++++++++++++------- 1 file changed, 23 insertions(+), 12 deletions(-) diff --git a/qa/L0_input_validation/input_shape_validation_test.py b/qa/L0_input_validation/input_shape_validation_test.py index c780cea5d8..dce4a29fcf 100755 --- a/qa/L0_input_validation/input_shape_validation_test.py +++ b/qa/L0_input_validation/input_shape_validation_test.py @@ -41,7 +41,6 @@ FIXED_LAST_DIM = 8 -@pytest.fixture def repo_dir(): with TemporaryDirectory() as model_repo: (Path(model_repo) / "pt_identity" / "1").mkdir(parents=True, exist_ok=True) @@ -95,6 +94,13 @@ async def poll_readiness(client: InferenceServerClient, server_proc): await asyncio.sleep(0.5) +async def server_terminated(client: InferenceServerClient, server_proc): + if server_proc is not None and (ret_code := server_proc.poll()) is not None: + _, stderr = server_proc.communicate() + print(stderr) + raise Exception(f"Tritonserver died with return code {ret_code}") + + @pytest.mark.asyncio async def test_shape_overlapped(repo_dir: str): with Popen( @@ -134,14 +140,19 @@ async def test_shape_overlapped(repo_dir: str): ) ] inputs_2[0].set_data_from_numpy(input_data_2) - - t1 = asyncio.create_task(alice.infer("pt_identity", inputs_1)) - t2 = asyncio.create_task(bob.infer("pt_identity", inputs_2)) - - alice_result, bob_result = await asyncio.gather(t1, t2) - print(f"{alice_result.as_numpy('OUTPUT0')=}") - print(f"{bob_result.as_numpy('OUTPUT0')=}") - server.terminate() - assert np.allclose( - bob_result.as_numpy("OUTPUT0"), input_data_2 - ), "Bob's result should be the same as input" + with pytest.raises(Exception) as e_info: + server_terminated( + InferenceServerClient("localhost:" + str(GRPC_PORT)), server + ) + t1 = asyncio.create_task( + alice.infer("pt_identity", inputs_1) + ) # should fail here + t2 = asyncio.create_task(bob.infer("pt_identity", inputs_2)) + + # alice_result, bob_result = await asyncio.gather(t1, t2) + # print(f"{alice_result.as_numpy('OUTPUT0')=}") + # print(f"{bob_result.as_numpy('OUTPUT0')=}") + # server.terminate() + # assert np.allclose( + # bob_result.as_numpy("OUTPUT0"), input_data_2 + # ), "Bob's result should be the same as input" From 5dd1161bb8185b34e3e89e93f81f3e72075a8ffd Mon Sep 17 00:00:00 2001 From: Yingge He Date: Fri, 17 May 2024 12:20:27 -0700 Subject: [PATCH 3/7] Update test error message --- qa/L0_cuda_shared_memory/cuda_shared_memory_test.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/qa/L0_cuda_shared_memory/cuda_shared_memory_test.py b/qa/L0_cuda_shared_memory/cuda_shared_memory_test.py index ce4f72aec7..07f9c05a88 100755 --- a/qa/L0_cuda_shared_memory/cuda_shared_memory_test.py +++ b/qa/L0_cuda_shared_memory/cuda_shared_memory_test.py @@ -283,7 +283,7 @@ def test_too_big_shm(self): ) if len(error_msg) > 0: self.assertIn( - "unexpected total byte size 128 for input 'INPUT1', expecting 64", + "input byte size mismatch for input 'INPUT1' for model 'simple'. Expected 64, got 128", error_msg[-1], ) shm_handles.append(shm_ip2_handle) From 6d6b9f2b3b894c14db59a05b12df2dcfcd7308c6 Mon Sep 17 00:00:00 2001 From: Yingge He Date: Fri, 17 May 2024 12:29:20 -0700 Subject: [PATCH 4/7] Update L0_shared_memory test error message --- qa/L0_shared_memory/shared_memory_test.py | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/qa/L0_shared_memory/shared_memory_test.py b/qa/L0_shared_memory/shared_memory_test.py index e162f6b296..c38ecb4814 100755 --- a/qa/L0_shared_memory/shared_memory_test.py +++ b/qa/L0_shared_memory/shared_memory_test.py @@ -118,8 +118,8 @@ def test_reregister_after_register(self): "dummy_data", "/dummy_data", 8 ) except Exception as ex: - self.assertTrue( - "shared memory region 'dummy_data' already in manager" in str(ex) + self.assertIn( + "shared memory region 'dummy_data' already in manager", str(ex) ) shm_status = self.triton_client.get_system_shared_memory_status() if self.protocol == "http": @@ -271,9 +271,9 @@ def test_too_big_shm(self): use_system_shared_memory=True, ) if len(error_msg) > 0: - self.assertTrue( - "unexpected total byte size 128 for input 'INPUT1', expecting 64" - in error_msg[-1] + self.assertIn( + "input byte size mismatch for input 'INPUT1' for model 'simple'. Expected 64, got 128", + error_msg[-1], ) shm_handles.append(shm_ip2_handle) self._cleanup_server(shm_handles) From 9178092b6c10381ede8d45bb179670134a663378 Mon Sep 17 00:00:00 2001 From: Yingge He Date: Tue, 21 May 2024 11:27:21 -0700 Subject: [PATCH 5/7] Update name and comments --- .../input_shape_validation_test.py | 17 ++++++++++------- 1 file changed, 10 insertions(+), 7 deletions(-) diff --git a/qa/L0_input_validation/input_shape_validation_test.py b/qa/L0_input_validation/input_shape_validation_test.py index dce4a29fcf..51a233b49c 100755 --- a/qa/L0_input_validation/input_shape_validation_test.py +++ b/qa/L0_input_validation/input_shape_validation_test.py @@ -41,7 +41,9 @@ FIXED_LAST_DIM = 8 -def repo_dir(): +# This helper function creates a temporary model repository which contains +# pt_identity model and yields the path to the model repository. +def tmp_repo_path(): with TemporaryDirectory() as model_repo: (Path(model_repo) / "pt_identity" / "1").mkdir(parents=True, exist_ok=True) @@ -94,7 +96,7 @@ async def poll_readiness(client: InferenceServerClient, server_proc): await asyncio.sleep(0.5) -async def server_terminated(client: InferenceServerClient, server_proc): +async def server_terminated(server_proc): if server_proc is not None and (ret_code := server_proc.poll()) is not None: _, stderr = server_proc.communicate() print(stderr) @@ -102,16 +104,17 @@ async def server_terminated(client: InferenceServerClient, server_proc): @pytest.mark.asyncio -async def test_shape_overlapped(repo_dir: str): +async def test_shape_overlapped(tmp_repo_path: str): with Popen( [ "/opt/tritonserver/bin/tritonserver", "--model-repository", - repo_dir, + tmp_repo_path, "--grpc-port", str(GRPC_PORT), ] ) as server: + # wait until server is ready await poll_readiness( InferenceServerClient("localhost:" + str(GRPC_PORT)), server ) @@ -119,6 +122,7 @@ async def test_shape_overlapped(repo_dir: str): alice = InferenceServerClient("localhost:" + str(GRPC_PORT)) bob = InferenceServerClient("localhost:" + str(GRPC_PORT)) + # wrong input shape input_data_1 = np.arange(FIXED_LAST_DIM + 2)[None].astype(np.float32) print(f"{input_data_1=}") inputs_1 = [ @@ -130,6 +134,7 @@ async def test_shape_overlapped(repo_dir: str): # Compromised input shape inputs_1[0].set_shape((1, FIXED_LAST_DIM)) + # correct input shape input_data_2 = 100 + np.arange(FIXED_LAST_DIM)[None].astype(np.float32) print(f"{input_data_2=}") inputs_2 = [ @@ -141,9 +146,7 @@ async def test_shape_overlapped(repo_dir: str): ] inputs_2[0].set_data_from_numpy(input_data_2) with pytest.raises(Exception) as e_info: - server_terminated( - InferenceServerClient("localhost:" + str(GRPC_PORT)), server - ) + server_terminated(server) t1 = asyncio.create_task( alice.infer("pt_identity", inputs_1) ) # should fail here From 01c7301f3b7e0eef5865675ef0b69a3d407bcbca Mon Sep 17 00:00:00 2001 From: Yingge He Date: Thu, 23 May 2024 13:36:13 -0700 Subject: [PATCH 6/7] Rewrite tests --- .../input_shape_validation_test.py | 161 ------------------ .../input_validation_test.py | 41 ++++- qa/L0_input_validation/test.sh | 40 ++++- 3 files changed, 76 insertions(+), 166 deletions(-) delete mode 100755 qa/L0_input_validation/input_shape_validation_test.py diff --git a/qa/L0_input_validation/input_shape_validation_test.py b/qa/L0_input_validation/input_shape_validation_test.py deleted file mode 100755 index 51a233b49c..0000000000 --- a/qa/L0_input_validation/input_shape_validation_test.py +++ /dev/null @@ -1,161 +0,0 @@ -#!/usr/bin/env python -# Copyright (c) 2024, NVIDIA CORPORATION & AFFILIATES. All rights reserved. -# -# Redistribution and use in source and binary forms, with or without -# modification, are permitted provided that the following conditions -# are met: -# * Redistributions of source code must retain the above copyright -# notice, this list of conditions and the following disclaimer. -# * Redistributions in binary form must reproduce the above copyright -# notice, this list of conditions and the following disclaimer in the -# documentation and/or other materials provided with the distribution. -# * Neither the name of NVIDIA CORPORATION nor the names of its -# contributors may be used to endorse or promote products derived -# from this software without specific prior written permission. -# -# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY -# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR -# PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR -# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, -# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, -# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR -# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY -# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT -# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE -# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - -import asyncio -from pathlib import Path -from subprocess import Popen -from tempfile import TemporaryDirectory -from typing import Optional - -import numpy as np -import pytest -import torch -from tritonclient.grpc.aio import InferenceServerClient, InferInput -from tritonclient.utils import np_to_triton_dtype - -GRPC_PORT = 9653 -FIXED_LAST_DIM = 8 - - -# This helper function creates a temporary model repository which contains -# pt_identity model and yields the path to the model repository. -def tmp_repo_path(): - with TemporaryDirectory() as model_repo: - (Path(model_repo) / "pt_identity" / "1").mkdir(parents=True, exist_ok=True) - - torch.jit.save( - torch.jit.script(torch.nn.Identity()), - model_repo + "/pt_identity/1/model.pt", - ) - - pbtxt = f""" - name: "pt_identity" - backend: "pytorch" - max_batch_size: 8 - - input [ - {{ - name: "INPUT0" - data_type: TYPE_FP32 - dims: [ {FIXED_LAST_DIM} ] - }} - ] - output [ - {{ - name: "OUTPUT0" - data_type: TYPE_FP32 - dims: [ {FIXED_LAST_DIM} ] - }} - ] - # ensure we batch requests together - dynamic_batching {{ - max_queue_delay_microseconds: {int(5e6)} - }} - """ - with open(model_repo + "/pt_identity/config.pbtxt", "w") as f: - f.write(pbtxt) - - yield model_repo - - -async def poll_readiness(client: InferenceServerClient, server_proc): - while True: - if server_proc is not None and (ret_code := server_proc.poll()) is not None: - _, stderr = server_proc.communicate() - print(stderr) - raise Exception(f"Tritonserver died with return code {ret_code}") - try: - if await client.is_server_ready(): - break - except: # noqa: E722 - pass - await asyncio.sleep(0.5) - - -async def server_terminated(server_proc): - if server_proc is not None and (ret_code := server_proc.poll()) is not None: - _, stderr = server_proc.communicate() - print(stderr) - raise Exception(f"Tritonserver died with return code {ret_code}") - - -@pytest.mark.asyncio -async def test_shape_overlapped(tmp_repo_path: str): - with Popen( - [ - "/opt/tritonserver/bin/tritonserver", - "--model-repository", - tmp_repo_path, - "--grpc-port", - str(GRPC_PORT), - ] - ) as server: - # wait until server is ready - await poll_readiness( - InferenceServerClient("localhost:" + str(GRPC_PORT)), server - ) - - alice = InferenceServerClient("localhost:" + str(GRPC_PORT)) - bob = InferenceServerClient("localhost:" + str(GRPC_PORT)) - - # wrong input shape - input_data_1 = np.arange(FIXED_LAST_DIM + 2)[None].astype(np.float32) - print(f"{input_data_1=}") - inputs_1 = [ - InferInput( - "INPUT0", input_data_1.shape, np_to_triton_dtype(input_data_1.dtype) - ), - ] - inputs_1[0].set_data_from_numpy(input_data_1) - # Compromised input shape - inputs_1[0].set_shape((1, FIXED_LAST_DIM)) - - # correct input shape - input_data_2 = 100 + np.arange(FIXED_LAST_DIM)[None].astype(np.float32) - print(f"{input_data_2=}") - inputs_2 = [ - InferInput( - "INPUT0", - shape=input_data_2.shape, - datatype=np_to_triton_dtype(input_data_2.dtype), - ) - ] - inputs_2[0].set_data_from_numpy(input_data_2) - with pytest.raises(Exception) as e_info: - server_terminated(server) - t1 = asyncio.create_task( - alice.infer("pt_identity", inputs_1) - ) # should fail here - t2 = asyncio.create_task(bob.infer("pt_identity", inputs_2)) - - # alice_result, bob_result = await asyncio.gather(t1, t2) - # print(f"{alice_result.as_numpy('OUTPUT0')=}") - # print(f"{bob_result.as_numpy('OUTPUT0')=}") - # server.terminate() - # assert np.allclose( - # bob_result.as_numpy("OUTPUT0"), input_data_2 - # ), "Bob's result should be the same as input" diff --git a/qa/L0_input_validation/input_validation_test.py b/qa/L0_input_validation/input_validation_test.py index afd791b527..843a4447f5 100755 --- a/qa/L0_input_validation/input_validation_test.py +++ b/qa/L0_input_validation/input_validation_test.py @@ -33,7 +33,7 @@ import numpy as np import tritonclient.grpc as tritongrpcclient -from tritonclient.utils import InferenceServerException +from tritonclient.utils import InferenceServerException, np_to_triton_dtype class InputValTest(unittest.TestCase): @@ -113,5 +113,44 @@ def test_input_validation_all_optional(self): self.assertIn(str(response.outputs[0].name), "OUTPUT0") +class InputShapeTest(unittest.TestCase): + def test_input_shape_validation(self): + expected_dim = 8 + model_name = "pt_identity" + triton_client = tritongrpcclient.InferenceServerClient("localhost:8001") + + # Pass + input_data = np.arange(expected_dim)[None].astype(np.float32) + inputs = [ + tritongrpcclient.InferInput( + "INPUT0", input_data.shape, np_to_triton_dtype(input_data.dtype) + ) + ] + inputs[0].set_data_from_numpy(input_data) + triton_client.infer(model_name=model_name, inputs=inputs) + + # Larger input byte size than expected + input_data = np.arange(expected_dim + 2)[None].astype(np.float32) + inputs = [ + tritongrpcclient.InferInput( + "INPUT0", input_data.shape, np_to_triton_dtype(input_data.dtype) + ) + ] + inputs[0].set_data_from_numpy(input_data) + # Compromised input shape + inputs[0].set_shape((1, expected_dim)) + + with self.assertRaises(InferenceServerException) as e: + triton_client.infer( + model_name=model_name, + inputs=inputs, + ) + err_str = str(e.exception) + self.assertIn( + "input byte size mismatch for input 'INPUT0' for model 'pt_identity'. Expected 32, got 40", + err_str, + ) + + if __name__ == "__main__": unittest.main() diff --git a/qa/L0_input_validation/test.sh b/qa/L0_input_validation/test.sh index 9e4021580f..fc1d6fd9a1 100755 --- a/qa/L0_input_validation/test.sh +++ b/qa/L0_input_validation/test.sh @@ -42,18 +42,19 @@ source ../common/util.sh RET=0 +SERVER=/opt/tritonserver/bin/tritonserver CLIENT_LOG="./input_validation_client.log" TEST_PY=./input_validation_test.py SHAPE_TEST_PY=./input_shape_validation_test.py TEST_RESULT_FILE='./test_results.txt' +SERVER_LOG="./inference_server.log" export CUDA_VISIBLE_DEVICES=0 rm -fr *.log -SERVER=/opt/tritonserver/bin/tritonserver +# input_validation_test SERVER_ARGS="--model-repository=`pwd`/models" -SERVER_LOG="./inference_server.log" run_server if [ "$SERVER_PID" == "0" ]; then echo -e "\n***\n*** Failed to start $SERVER\n***" @@ -73,13 +74,44 @@ set -e kill $SERVER_PID wait $SERVER_PID +# input_shape_validation_test pip install torch -python3 -m pytest $SHAPE_TEST_PY >> $CLIENT_LOG 2>&1 +pip install pytest-asyncio + +mkdir -p models/pt_identity/1 +PYTHON_CODE=$(cat <> $CLIENT_LOG 2>&1 + if [ $? -ne 0 ]; then echo -e "\n***\n*** input_shape_validation_test.py FAILED. \n***" RET=1 - fi +set -e if [ $RET -eq 0 ]; then echo -e "\n***\n*** Input Validation Test Passed\n***" From 9d72426e6db0c8fe0ab0af9354ad5a2aaeb4c87f Mon Sep 17 00:00:00 2001 From: Yingge He Date: Fri, 24 May 2024 02:56:45 -0700 Subject: [PATCH 7/7] Add input string shape validation test --- .../input_validation_test.py | 71 +++++++++++++++++-- qa/L0_input_validation/test.sh | 11 ++- 2 files changed, 74 insertions(+), 8 deletions(-) diff --git a/qa/L0_input_validation/input_validation_test.py b/qa/L0_input_validation/input_validation_test.py index 843a4447f5..e683723711 100755 --- a/qa/L0_input_validation/input_validation_test.py +++ b/qa/L0_input_validation/input_validation_test.py @@ -31,6 +31,7 @@ import unittest +import infer_util as iu import numpy as np import tritonclient.grpc as tritongrpcclient from tritonclient.utils import InferenceServerException, np_to_triton_dtype @@ -115,12 +116,12 @@ def test_input_validation_all_optional(self): class InputShapeTest(unittest.TestCase): def test_input_shape_validation(self): - expected_dim = 8 + input_size = 8 model_name = "pt_identity" triton_client = tritongrpcclient.InferenceServerClient("localhost:8001") # Pass - input_data = np.arange(expected_dim)[None].astype(np.float32) + input_data = np.arange(input_size)[None].astype(np.float32) inputs = [ tritongrpcclient.InferInput( "INPUT0", input_data.shape, np_to_triton_dtype(input_data.dtype) @@ -130,7 +131,7 @@ def test_input_shape_validation(self): triton_client.infer(model_name=model_name, inputs=inputs) # Larger input byte size than expected - input_data = np.arange(expected_dim + 2)[None].astype(np.float32) + input_data = np.arange(input_size + 2)[None].astype(np.float32) inputs = [ tritongrpcclient.InferInput( "INPUT0", input_data.shape, np_to_triton_dtype(input_data.dtype) @@ -138,8 +139,7 @@ def test_input_shape_validation(self): ] inputs[0].set_data_from_numpy(input_data) # Compromised input shape - inputs[0].set_shape((1, expected_dim)) - + inputs[0].set_shape((1, input_size)) with self.assertRaises(InferenceServerException) as e: triton_client.infer( model_name=model_name, @@ -151,6 +151,67 @@ def test_input_shape_validation(self): err_str, ) + def test_input_string_shape_validation(self): + input_size = 16 + model_name = "graphdef_object_int32_int32" + np_dtype_string = np.dtype(object) + triton_client = tritongrpcclient.InferenceServerClient("localhost:8001") + + def get_input_array(input_size, np_dtype): + rinput_dtype = iu._range_repr_dtype(np_dtype) + input_array = np.random.randint( + low=0, high=127, size=(1, input_size), dtype=rinput_dtype + ) + + # Convert to string type + inn = np.array( + [str(x) for x in input_array.reshape(input_array.size)], dtype=object + ) + input_array = inn.reshape(input_array.shape) + + inputs = [] + inputs.append( + tritongrpcclient.InferInput( + "INPUT0", input_array.shape, np_to_triton_dtype(np_dtype) + ) + ) + inputs.append( + tritongrpcclient.InferInput( + "INPUT1", input_array.shape, np_to_triton_dtype(np_dtype) + ) + ) + + inputs[0].set_data_from_numpy(input_array) + inputs[1].set_data_from_numpy(input_array) + return inputs + + # Input size is less than expected + inputs = get_input_array(input_size - 2, np_dtype_string) + # Compromised input shape + inputs[0].set_shape((1, input_size)) + inputs[1].set_shape((1, input_size)) + with self.assertRaises(InferenceServerException) as e: + triton_client.infer(model_name=model_name, inputs=inputs) + err_str = str(e.exception) + self.assertIn( + f"expected {input_size} strings for inference input 'INPUT1', got {input_size-2}", + err_str, + ) + + # Input size is greater than expected + inputs = get_input_array(input_size + 2, np_dtype_string) + # Compromised input shape + inputs[0].set_shape((1, input_size)) + inputs[1].set_shape((1, input_size)) + with self.assertRaises(InferenceServerException) as e: + triton_client.infer(model_name=model_name, inputs=inputs) + err_str = str(e.exception) + self.assertIn( + # Core will throw exception as soon as reading the "input_size+1"th byte. + f"unexpected number of string elements {input_size+1} for inference input 'INPUT1', expecting {input_size}", + err_str, + ) + if __name__ == "__main__": unittest.main() diff --git a/qa/L0_input_validation/test.sh b/qa/L0_input_validation/test.sh index fc1d6fd9a1..ef4a1a6d65 100755 --- a/qa/L0_input_validation/test.sh +++ b/qa/L0_input_validation/test.sh @@ -42,6 +42,7 @@ source ../common/util.sh RET=0 +DATADIR=/data/inferenceserver/${REPO_VERSION} SERVER=/opt/tritonserver/bin/tritonserver CLIENT_LOG="./input_validation_client.log" TEST_PY=./input_validation_test.py @@ -95,7 +96,8 @@ if [ $? -ne 0 ]; then exit 1 fi -# input_validation_test +cp -r $DATADIR/qa_model_repository/graphdef_object_int32_int32 models/. + SERVER_ARGS="--model-repository=`pwd`/models --log-verbose=1" run_server if [ "$SERVER_PID" == "0" ]; then @@ -105,14 +107,17 @@ if [ "$SERVER_PID" == "0" ]; then fi set +e -python3 $TEST_PY InputShapeTest.test_input_shape_validation >> $CLIENT_LOG 2>&1 +python3 $TEST_PY InputShapeTest >> $CLIENT_LOG 2>&1 if [ $? -ne 0 ]; then - echo -e "\n***\n*** input_shape_validation_test.py FAILED. \n***" + echo -e "\n***\n*** input_validation_test.py FAILED. \n***" RET=1 fi set -e +kill $SERVER_PID +wait $SERVER_PID + if [ $RET -eq 0 ]; then echo -e "\n***\n*** Input Validation Test Passed\n***" else