From 005c6502b9d9272a83d987a9917e2b21e3f2a6e9 Mon Sep 17 00:00:00 2001
From: Katherine Yang <katheriney@nvidia.com>
Date: Tue, 7 May 2024 17:46:37 -0700
Subject: [PATCH 1/7] add test for shape validation

---
 .../input_shape_validation_test.py            | 147 ++++++++++++++++++
 qa/L0_input_validation/test.sh                |  11 +-
 2 files changed, 157 insertions(+), 1 deletion(-)
 create mode 100755 qa/L0_input_validation/input_shape_validation_test.py

diff --git a/qa/L0_input_validation/input_shape_validation_test.py b/qa/L0_input_validation/input_shape_validation_test.py
new file mode 100755
index 0000000000..c780cea5d8
--- /dev/null
+++ b/qa/L0_input_validation/input_shape_validation_test.py
@@ -0,0 +1,147 @@
+#!/usr/bin/env python
+# Copyright (c) 2024, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions
+# are met:
+#  * Redistributions of source code must retain the above copyright
+#    notice, this list of conditions and the following disclaimer.
+#  * Redistributions in binary form must reproduce the above copyright
+#    notice, this list of conditions and the following disclaimer in the
+#    documentation and/or other materials provided with the distribution.
+#  * Neither the name of NVIDIA CORPORATION nor the names of its
+#    contributors may be used to endorse or promote products derived
+#    from this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
+# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
+# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+import asyncio
+from pathlib import Path
+from subprocess import Popen
+from tempfile import TemporaryDirectory
+from typing import Optional
+
+import numpy as np
+import pytest
+import torch
+from tritonclient.grpc.aio import InferenceServerClient, InferInput
+from tritonclient.utils import np_to_triton_dtype
+
+GRPC_PORT = 9653
+FIXED_LAST_DIM = 8
+
+
+@pytest.fixture
+def repo_dir():
+    with TemporaryDirectory() as model_repo:
+        (Path(model_repo) / "pt_identity" / "1").mkdir(parents=True, exist_ok=True)
+
+        torch.jit.save(
+            torch.jit.script(torch.nn.Identity()),
+            model_repo + "/pt_identity/1/model.pt",
+        )
+
+        pbtxt = f"""
+        name: "pt_identity"
+        backend: "pytorch"
+        max_batch_size: 8
+
+        input [
+          {{
+            name: "INPUT0"
+            data_type: TYPE_FP32
+            dims: [ {FIXED_LAST_DIM} ]
+          }}
+        ]
+        output [
+          {{
+            name: "OUTPUT0"
+            data_type: TYPE_FP32
+            dims: [ {FIXED_LAST_DIM} ]
+          }}
+        ]
+        # ensure we batch requests together
+        dynamic_batching {{
+            max_queue_delay_microseconds: {int(5e6)}
+        }}
+        """
+        with open(model_repo + "/pt_identity/config.pbtxt", "w") as f:
+            f.write(pbtxt)
+
+        yield model_repo
+
+
+async def poll_readiness(client: InferenceServerClient, server_proc):
+    while True:
+        if server_proc is not None and (ret_code := server_proc.poll()) is not None:
+            _, stderr = server_proc.communicate()
+            print(stderr)
+            raise Exception(f"Tritonserver died with return code {ret_code}")
+        try:
+            if await client.is_server_ready():
+                break
+        except:  # noqa: E722
+            pass
+        await asyncio.sleep(0.5)
+
+
+@pytest.mark.asyncio
+async def test_shape_overlapped(repo_dir: str):
+    with Popen(
+        [
+            "/opt/tritonserver/bin/tritonserver",
+            "--model-repository",
+            repo_dir,
+            "--grpc-port",
+            str(GRPC_PORT),
+        ]
+    ) as server:
+        await poll_readiness(
+            InferenceServerClient("localhost:" + str(GRPC_PORT)), server
+        )
+
+        alice = InferenceServerClient("localhost:" + str(GRPC_PORT))
+        bob = InferenceServerClient("localhost:" + str(GRPC_PORT))
+
+        input_data_1 = np.arange(FIXED_LAST_DIM + 2)[None].astype(np.float32)
+        print(f"{input_data_1=}")
+        inputs_1 = [
+            InferInput(
+                "INPUT0", input_data_1.shape, np_to_triton_dtype(input_data_1.dtype)
+            ),
+        ]
+        inputs_1[0].set_data_from_numpy(input_data_1)
+        # Compromised input shape
+        inputs_1[0].set_shape((1, FIXED_LAST_DIM))
+
+        input_data_2 = 100 + np.arange(FIXED_LAST_DIM)[None].astype(np.float32)
+        print(f"{input_data_2=}")
+        inputs_2 = [
+            InferInput(
+                "INPUT0",
+                shape=input_data_2.shape,
+                datatype=np_to_triton_dtype(input_data_2.dtype),
+            )
+        ]
+        inputs_2[0].set_data_from_numpy(input_data_2)
+
+        t1 = asyncio.create_task(alice.infer("pt_identity", inputs_1))
+        t2 = asyncio.create_task(bob.infer("pt_identity", inputs_2))
+
+        alice_result, bob_result = await asyncio.gather(t1, t2)
+        print(f"{alice_result.as_numpy('OUTPUT0')=}")
+        print(f"{bob_result.as_numpy('OUTPUT0')=}")
+        server.terminate()
+        assert np.allclose(
+            bob_result.as_numpy("OUTPUT0"), input_data_2
+        ), "Bob's result should be the same as input"
diff --git a/qa/L0_input_validation/test.sh b/qa/L0_input_validation/test.sh
index 1c66c2bbaa..9e4021580f 100755
--- a/qa/L0_input_validation/test.sh
+++ b/qa/L0_input_validation/test.sh
@@ -44,6 +44,7 @@ RET=0
 
 CLIENT_LOG="./input_validation_client.log"
 TEST_PY=./input_validation_test.py
+SHAPE_TEST_PY=./input_shape_validation_test.py
 TEST_RESULT_FILE='./test_results.txt'
 
 export CUDA_VISIBLE_DEVICES=0
@@ -64,7 +65,7 @@ set +e
 python3 -m pytest --junitxml="input_validation.report.xml" $TEST_PY >> $CLIENT_LOG 2>&1
 
 if [ $? -ne 0 ]; then
-    echo -e "\n***\n*** python_unittest.py FAILED. \n***"
+    echo -e "\n***\n*** input_validation_test.py FAILED. \n***"
     RET=1
 fi
 set -e
@@ -72,6 +73,14 @@ set -e
 kill $SERVER_PID
 wait $SERVER_PID
 
+pip install torch
+python3 -m pytest $SHAPE_TEST_PY >> $CLIENT_LOG 2>&1
+if [ $? -ne 0 ]; then
+    echo -e "\n***\n*** input_shape_validation_test.py FAILED. \n***"
+    RET=1
+
+fi
+
 if [ $RET -eq 0 ]; then
     echo -e "\n***\n*** Input Validation Test Passed\n***"
 else

From c820036abf67ec2dbaa303e027b3d7351c11d317 Mon Sep 17 00:00:00 2001
From: Katherine Yang <katheriney@nvidia.com>
Date: Wed, 8 May 2024 18:43:08 -0700
Subject: [PATCH 2/7] updated test to pass when changes exist

---
 .../input_shape_validation_test.py            | 35 ++++++++++++-------
 1 file changed, 23 insertions(+), 12 deletions(-)

diff --git a/qa/L0_input_validation/input_shape_validation_test.py b/qa/L0_input_validation/input_shape_validation_test.py
index c780cea5d8..dce4a29fcf 100755
--- a/qa/L0_input_validation/input_shape_validation_test.py
+++ b/qa/L0_input_validation/input_shape_validation_test.py
@@ -41,7 +41,6 @@
 FIXED_LAST_DIM = 8
 
 
-@pytest.fixture
 def repo_dir():
     with TemporaryDirectory() as model_repo:
         (Path(model_repo) / "pt_identity" / "1").mkdir(parents=True, exist_ok=True)
@@ -95,6 +94,13 @@ async def poll_readiness(client: InferenceServerClient, server_proc):
         await asyncio.sleep(0.5)
 
 
+async def server_terminated(client: InferenceServerClient, server_proc):
+    if server_proc is not None and (ret_code := server_proc.poll()) is not None:
+        _, stderr = server_proc.communicate()
+        print(stderr)
+        raise Exception(f"Tritonserver died with return code {ret_code}")
+
+
 @pytest.mark.asyncio
 async def test_shape_overlapped(repo_dir: str):
     with Popen(
@@ -134,14 +140,19 @@ async def test_shape_overlapped(repo_dir: str):
             )
         ]
         inputs_2[0].set_data_from_numpy(input_data_2)
-
-        t1 = asyncio.create_task(alice.infer("pt_identity", inputs_1))
-        t2 = asyncio.create_task(bob.infer("pt_identity", inputs_2))
-
-        alice_result, bob_result = await asyncio.gather(t1, t2)
-        print(f"{alice_result.as_numpy('OUTPUT0')=}")
-        print(f"{bob_result.as_numpy('OUTPUT0')=}")
-        server.terminate()
-        assert np.allclose(
-            bob_result.as_numpy("OUTPUT0"), input_data_2
-        ), "Bob's result should be the same as input"
+        with pytest.raises(Exception) as e_info:
+            server_terminated(
+                InferenceServerClient("localhost:" + str(GRPC_PORT)), server
+            )
+            t1 = asyncio.create_task(
+                alice.infer("pt_identity", inputs_1)
+            )  # should fail here
+            t2 = asyncio.create_task(bob.infer("pt_identity", inputs_2))
+
+        # alice_result, bob_result = await asyncio.gather(t1, t2)
+        # print(f"{alice_result.as_numpy('OUTPUT0')=}")
+        # print(f"{bob_result.as_numpy('OUTPUT0')=}")
+        # server.terminate()
+        # assert np.allclose(
+        #     bob_result.as_numpy("OUTPUT0"), input_data_2
+        # ), "Bob's result should be the same as input"

From 5dd1161bb8185b34e3e89e93f81f3e72075a8ffd Mon Sep 17 00:00:00 2001
From: Yingge He <yinggeh@nvidia.com>
Date: Fri, 17 May 2024 12:20:27 -0700
Subject: [PATCH 3/7] Update test error message

---
 qa/L0_cuda_shared_memory/cuda_shared_memory_test.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/qa/L0_cuda_shared_memory/cuda_shared_memory_test.py b/qa/L0_cuda_shared_memory/cuda_shared_memory_test.py
index ce4f72aec7..07f9c05a88 100755
--- a/qa/L0_cuda_shared_memory/cuda_shared_memory_test.py
+++ b/qa/L0_cuda_shared_memory/cuda_shared_memory_test.py
@@ -283,7 +283,7 @@ def test_too_big_shm(self):
         )
         if len(error_msg) > 0:
             self.assertIn(
-                "unexpected total byte size 128 for input 'INPUT1', expecting 64",
+                "input byte size mismatch for input 'INPUT1' for model 'simple'. Expected 64, got 128",
                 error_msg[-1],
             )
         shm_handles.append(shm_ip2_handle)

From 6d6b9f2b3b894c14db59a05b12df2dcfcd7308c6 Mon Sep 17 00:00:00 2001
From: Yingge He <yinggeh@nvidia.com>
Date: Fri, 17 May 2024 12:29:20 -0700
Subject: [PATCH 4/7] Update L0_shared_memory test error message

---
 qa/L0_shared_memory/shared_memory_test.py | 10 +++++-----
 1 file changed, 5 insertions(+), 5 deletions(-)

diff --git a/qa/L0_shared_memory/shared_memory_test.py b/qa/L0_shared_memory/shared_memory_test.py
index e162f6b296..c38ecb4814 100755
--- a/qa/L0_shared_memory/shared_memory_test.py
+++ b/qa/L0_shared_memory/shared_memory_test.py
@@ -118,8 +118,8 @@ def test_reregister_after_register(self):
                 "dummy_data", "/dummy_data", 8
             )
         except Exception as ex:
-            self.assertTrue(
-                "shared memory region 'dummy_data' already in manager" in str(ex)
+            self.assertIn(
+                "shared memory region 'dummy_data' already in manager", str(ex)
             )
         shm_status = self.triton_client.get_system_shared_memory_status()
         if self.protocol == "http":
@@ -271,9 +271,9 @@ def test_too_big_shm(self):
             use_system_shared_memory=True,
         )
         if len(error_msg) > 0:
-            self.assertTrue(
-                "unexpected total byte size 128 for input 'INPUT1', expecting 64"
-                in error_msg[-1]
+            self.assertIn(
+                "input byte size mismatch for input 'INPUT1' for model 'simple'. Expected 64, got 128",
+                error_msg[-1],
             )
         shm_handles.append(shm_ip2_handle)
         self._cleanup_server(shm_handles)

From 9178092b6c10381ede8d45bb179670134a663378 Mon Sep 17 00:00:00 2001
From: Yingge He <yinggeh@nvidia.com>
Date: Tue, 21 May 2024 11:27:21 -0700
Subject: [PATCH 5/7] Update name and comments

---
 .../input_shape_validation_test.py              | 17 ++++++++++-------
 1 file changed, 10 insertions(+), 7 deletions(-)

diff --git a/qa/L0_input_validation/input_shape_validation_test.py b/qa/L0_input_validation/input_shape_validation_test.py
index dce4a29fcf..51a233b49c 100755
--- a/qa/L0_input_validation/input_shape_validation_test.py
+++ b/qa/L0_input_validation/input_shape_validation_test.py
@@ -41,7 +41,9 @@
 FIXED_LAST_DIM = 8
 
 
-def repo_dir():
+# This helper function creates a temporary model repository which contains
+# pt_identity model and yields the path to the model repository.
+def tmp_repo_path():
     with TemporaryDirectory() as model_repo:
         (Path(model_repo) / "pt_identity" / "1").mkdir(parents=True, exist_ok=True)
 
@@ -94,7 +96,7 @@ async def poll_readiness(client: InferenceServerClient, server_proc):
         await asyncio.sleep(0.5)
 
 
-async def server_terminated(client: InferenceServerClient, server_proc):
+async def server_terminated(server_proc):
     if server_proc is not None and (ret_code := server_proc.poll()) is not None:
         _, stderr = server_proc.communicate()
         print(stderr)
@@ -102,16 +104,17 @@ async def server_terminated(client: InferenceServerClient, server_proc):
 
 
 @pytest.mark.asyncio
-async def test_shape_overlapped(repo_dir: str):
+async def test_shape_overlapped(tmp_repo_path: str):
     with Popen(
         [
             "/opt/tritonserver/bin/tritonserver",
             "--model-repository",
-            repo_dir,
+            tmp_repo_path,
             "--grpc-port",
             str(GRPC_PORT),
         ]
     ) as server:
+        # wait until server is ready
         await poll_readiness(
             InferenceServerClient("localhost:" + str(GRPC_PORT)), server
         )
@@ -119,6 +122,7 @@ async def test_shape_overlapped(repo_dir: str):
         alice = InferenceServerClient("localhost:" + str(GRPC_PORT))
         bob = InferenceServerClient("localhost:" + str(GRPC_PORT))
 
+        # wrong input shape
         input_data_1 = np.arange(FIXED_LAST_DIM + 2)[None].astype(np.float32)
         print(f"{input_data_1=}")
         inputs_1 = [
@@ -130,6 +134,7 @@ async def test_shape_overlapped(repo_dir: str):
         # Compromised input shape
         inputs_1[0].set_shape((1, FIXED_LAST_DIM))
 
+        # correct input shape
         input_data_2 = 100 + np.arange(FIXED_LAST_DIM)[None].astype(np.float32)
         print(f"{input_data_2=}")
         inputs_2 = [
@@ -141,9 +146,7 @@ async def test_shape_overlapped(repo_dir: str):
         ]
         inputs_2[0].set_data_from_numpy(input_data_2)
         with pytest.raises(Exception) as e_info:
-            server_terminated(
-                InferenceServerClient("localhost:" + str(GRPC_PORT)), server
-            )
+            server_terminated(server)
             t1 = asyncio.create_task(
                 alice.infer("pt_identity", inputs_1)
             )  # should fail here

From 01c7301f3b7e0eef5865675ef0b69a3d407bcbca Mon Sep 17 00:00:00 2001
From: Yingge He <yinggeh@nvidia.com>
Date: Thu, 23 May 2024 13:36:13 -0700
Subject: [PATCH 6/7] Rewrite tests

---
 .../input_shape_validation_test.py            | 161 ------------------
 .../input_validation_test.py                  |  41 ++++-
 qa/L0_input_validation/test.sh                |  40 ++++-
 3 files changed, 76 insertions(+), 166 deletions(-)
 delete mode 100755 qa/L0_input_validation/input_shape_validation_test.py

diff --git a/qa/L0_input_validation/input_shape_validation_test.py b/qa/L0_input_validation/input_shape_validation_test.py
deleted file mode 100755
index 51a233b49c..0000000000
--- a/qa/L0_input_validation/input_shape_validation_test.py
+++ /dev/null
@@ -1,161 +0,0 @@
-#!/usr/bin/env python
-# Copyright (c) 2024, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
-#
-# Redistribution and use in source and binary forms, with or without
-# modification, are permitted provided that the following conditions
-# are met:
-#  * Redistributions of source code must retain the above copyright
-#    notice, this list of conditions and the following disclaimer.
-#  * Redistributions in binary form must reproduce the above copyright
-#    notice, this list of conditions and the following disclaimer in the
-#    documentation and/or other materials provided with the distribution.
-#  * Neither the name of NVIDIA CORPORATION nor the names of its
-#    contributors may be used to endorse or promote products derived
-#    from this software without specific prior written permission.
-#
-# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
-# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
-# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
-# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
-# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
-# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
-# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
-# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
-# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
-# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-
-import asyncio
-from pathlib import Path
-from subprocess import Popen
-from tempfile import TemporaryDirectory
-from typing import Optional
-
-import numpy as np
-import pytest
-import torch
-from tritonclient.grpc.aio import InferenceServerClient, InferInput
-from tritonclient.utils import np_to_triton_dtype
-
-GRPC_PORT = 9653
-FIXED_LAST_DIM = 8
-
-
-# This helper function creates a temporary model repository which contains
-# pt_identity model and yields the path to the model repository.
-def tmp_repo_path():
-    with TemporaryDirectory() as model_repo:
-        (Path(model_repo) / "pt_identity" / "1").mkdir(parents=True, exist_ok=True)
-
-        torch.jit.save(
-            torch.jit.script(torch.nn.Identity()),
-            model_repo + "/pt_identity/1/model.pt",
-        )
-
-        pbtxt = f"""
-        name: "pt_identity"
-        backend: "pytorch"
-        max_batch_size: 8
-
-        input [
-          {{
-            name: "INPUT0"
-            data_type: TYPE_FP32
-            dims: [ {FIXED_LAST_DIM} ]
-          }}
-        ]
-        output [
-          {{
-            name: "OUTPUT0"
-            data_type: TYPE_FP32
-            dims: [ {FIXED_LAST_DIM} ]
-          }}
-        ]
-        # ensure we batch requests together
-        dynamic_batching {{
-            max_queue_delay_microseconds: {int(5e6)}
-        }}
-        """
-        with open(model_repo + "/pt_identity/config.pbtxt", "w") as f:
-            f.write(pbtxt)
-
-        yield model_repo
-
-
-async def poll_readiness(client: InferenceServerClient, server_proc):
-    while True:
-        if server_proc is not None and (ret_code := server_proc.poll()) is not None:
-            _, stderr = server_proc.communicate()
-            print(stderr)
-            raise Exception(f"Tritonserver died with return code {ret_code}")
-        try:
-            if await client.is_server_ready():
-                break
-        except:  # noqa: E722
-            pass
-        await asyncio.sleep(0.5)
-
-
-async def server_terminated(server_proc):
-    if server_proc is not None and (ret_code := server_proc.poll()) is not None:
-        _, stderr = server_proc.communicate()
-        print(stderr)
-        raise Exception(f"Tritonserver died with return code {ret_code}")
-
-
-@pytest.mark.asyncio
-async def test_shape_overlapped(tmp_repo_path: str):
-    with Popen(
-        [
-            "/opt/tritonserver/bin/tritonserver",
-            "--model-repository",
-            tmp_repo_path,
-            "--grpc-port",
-            str(GRPC_PORT),
-        ]
-    ) as server:
-        # wait until server is ready
-        await poll_readiness(
-            InferenceServerClient("localhost:" + str(GRPC_PORT)), server
-        )
-
-        alice = InferenceServerClient("localhost:" + str(GRPC_PORT))
-        bob = InferenceServerClient("localhost:" + str(GRPC_PORT))
-
-        # wrong input shape
-        input_data_1 = np.arange(FIXED_LAST_DIM + 2)[None].astype(np.float32)
-        print(f"{input_data_1=}")
-        inputs_1 = [
-            InferInput(
-                "INPUT0", input_data_1.shape, np_to_triton_dtype(input_data_1.dtype)
-            ),
-        ]
-        inputs_1[0].set_data_from_numpy(input_data_1)
-        # Compromised input shape
-        inputs_1[0].set_shape((1, FIXED_LAST_DIM))
-
-        # correct input shape
-        input_data_2 = 100 + np.arange(FIXED_LAST_DIM)[None].astype(np.float32)
-        print(f"{input_data_2=}")
-        inputs_2 = [
-            InferInput(
-                "INPUT0",
-                shape=input_data_2.shape,
-                datatype=np_to_triton_dtype(input_data_2.dtype),
-            )
-        ]
-        inputs_2[0].set_data_from_numpy(input_data_2)
-        with pytest.raises(Exception) as e_info:
-            server_terminated(server)
-            t1 = asyncio.create_task(
-                alice.infer("pt_identity", inputs_1)
-            )  # should fail here
-            t2 = asyncio.create_task(bob.infer("pt_identity", inputs_2))
-
-        # alice_result, bob_result = await asyncio.gather(t1, t2)
-        # print(f"{alice_result.as_numpy('OUTPUT0')=}")
-        # print(f"{bob_result.as_numpy('OUTPUT0')=}")
-        # server.terminate()
-        # assert np.allclose(
-        #     bob_result.as_numpy("OUTPUT0"), input_data_2
-        # ), "Bob's result should be the same as input"
diff --git a/qa/L0_input_validation/input_validation_test.py b/qa/L0_input_validation/input_validation_test.py
index afd791b527..843a4447f5 100755
--- a/qa/L0_input_validation/input_validation_test.py
+++ b/qa/L0_input_validation/input_validation_test.py
@@ -33,7 +33,7 @@
 
 import numpy as np
 import tritonclient.grpc as tritongrpcclient
-from tritonclient.utils import InferenceServerException
+from tritonclient.utils import InferenceServerException, np_to_triton_dtype
 
 
 class InputValTest(unittest.TestCase):
@@ -113,5 +113,44 @@ def test_input_validation_all_optional(self):
         self.assertIn(str(response.outputs[0].name), "OUTPUT0")
 
 
+class InputShapeTest(unittest.TestCase):
+    def test_input_shape_validation(self):
+        expected_dim = 8
+        model_name = "pt_identity"
+        triton_client = tritongrpcclient.InferenceServerClient("localhost:8001")
+
+        # Pass
+        input_data = np.arange(expected_dim)[None].astype(np.float32)
+        inputs = [
+            tritongrpcclient.InferInput(
+                "INPUT0", input_data.shape, np_to_triton_dtype(input_data.dtype)
+            )
+        ]
+        inputs[0].set_data_from_numpy(input_data)
+        triton_client.infer(model_name=model_name, inputs=inputs)
+
+        # Larger input byte size than expected
+        input_data = np.arange(expected_dim + 2)[None].astype(np.float32)
+        inputs = [
+            tritongrpcclient.InferInput(
+                "INPUT0", input_data.shape, np_to_triton_dtype(input_data.dtype)
+            )
+        ]
+        inputs[0].set_data_from_numpy(input_data)
+        # Compromised input shape
+        inputs[0].set_shape((1, expected_dim))
+
+        with self.assertRaises(InferenceServerException) as e:
+            triton_client.infer(
+                model_name=model_name,
+                inputs=inputs,
+            )
+        err_str = str(e.exception)
+        self.assertIn(
+            "input byte size mismatch for input 'INPUT0' for model 'pt_identity'. Expected 32, got 40",
+            err_str,
+        )
+
+
 if __name__ == "__main__":
     unittest.main()
diff --git a/qa/L0_input_validation/test.sh b/qa/L0_input_validation/test.sh
index 9e4021580f..fc1d6fd9a1 100755
--- a/qa/L0_input_validation/test.sh
+++ b/qa/L0_input_validation/test.sh
@@ -42,18 +42,19 @@ source ../common/util.sh
 
 RET=0
 
+SERVER=/opt/tritonserver/bin/tritonserver
 CLIENT_LOG="./input_validation_client.log"
 TEST_PY=./input_validation_test.py
 SHAPE_TEST_PY=./input_shape_validation_test.py
 TEST_RESULT_FILE='./test_results.txt'
+SERVER_LOG="./inference_server.log"
 
 export CUDA_VISIBLE_DEVICES=0
 
 rm -fr *.log
 
-SERVER=/opt/tritonserver/bin/tritonserver
+# input_validation_test
 SERVER_ARGS="--model-repository=`pwd`/models"
-SERVER_LOG="./inference_server.log"
 run_server
 if [ "$SERVER_PID" == "0" ]; then
     echo -e "\n***\n*** Failed to start $SERVER\n***"
@@ -73,13 +74,44 @@ set -e
 kill $SERVER_PID
 wait $SERVER_PID
 
+# input_shape_validation_test
 pip install torch
-python3 -m pytest $SHAPE_TEST_PY >> $CLIENT_LOG 2>&1
+pip install pytest-asyncio
+
+mkdir -p models/pt_identity/1
+PYTHON_CODE=$(cat <<END
+import torch
+torch.jit.save(
+    torch.jit.script(torch.nn.Identity()),
+    "`pwd`/models/pt_identity/1/model.pt",
+)
+END
+)
+res="$(python3 -c "$PYTHON_CODE")"
+
+if [ $? -ne 0 ]; then
+    echo -e "\n***\n*** model "pt_identity" initialization FAILED. \n***"
+    echo $res
+    exit 1
+fi
+
+# input_validation_test
+SERVER_ARGS="--model-repository=`pwd`/models  --log-verbose=1"
+run_server
+if [ "$SERVER_PID" == "0" ]; then
+    echo -e "\n***\n*** Failed to start $SERVER\n***"
+    cat $SERVER_LOG
+    exit 1
+fi
+
+set +e
+python3 $TEST_PY InputShapeTest.test_input_shape_validation >> $CLIENT_LOG 2>&1
+
 if [ $? -ne 0 ]; then
     echo -e "\n***\n*** input_shape_validation_test.py FAILED. \n***"
     RET=1
-
 fi
+set -e
 
 if [ $RET -eq 0 ]; then
     echo -e "\n***\n*** Input Validation Test Passed\n***"

From 9d72426e6db0c8fe0ab0af9354ad5a2aaeb4c87f Mon Sep 17 00:00:00 2001
From: Yingge He <yinggeh@nvidia.com>
Date: Fri, 24 May 2024 02:56:45 -0700
Subject: [PATCH 7/7] Add input string shape validation test

---
 .../input_validation_test.py                  | 71 +++++++++++++++++--
 qa/L0_input_validation/test.sh                | 11 ++-
 2 files changed, 74 insertions(+), 8 deletions(-)

diff --git a/qa/L0_input_validation/input_validation_test.py b/qa/L0_input_validation/input_validation_test.py
index 843a4447f5..e683723711 100755
--- a/qa/L0_input_validation/input_validation_test.py
+++ b/qa/L0_input_validation/input_validation_test.py
@@ -31,6 +31,7 @@
 
 import unittest
 
+import infer_util as iu
 import numpy as np
 import tritonclient.grpc as tritongrpcclient
 from tritonclient.utils import InferenceServerException, np_to_triton_dtype
@@ -115,12 +116,12 @@ def test_input_validation_all_optional(self):
 
 class InputShapeTest(unittest.TestCase):
     def test_input_shape_validation(self):
-        expected_dim = 8
+        input_size = 8
         model_name = "pt_identity"
         triton_client = tritongrpcclient.InferenceServerClient("localhost:8001")
 
         # Pass
-        input_data = np.arange(expected_dim)[None].astype(np.float32)
+        input_data = np.arange(input_size)[None].astype(np.float32)
         inputs = [
             tritongrpcclient.InferInput(
                 "INPUT0", input_data.shape, np_to_triton_dtype(input_data.dtype)
@@ -130,7 +131,7 @@ def test_input_shape_validation(self):
         triton_client.infer(model_name=model_name, inputs=inputs)
 
         # Larger input byte size than expected
-        input_data = np.arange(expected_dim + 2)[None].astype(np.float32)
+        input_data = np.arange(input_size + 2)[None].astype(np.float32)
         inputs = [
             tritongrpcclient.InferInput(
                 "INPUT0", input_data.shape, np_to_triton_dtype(input_data.dtype)
@@ -138,8 +139,7 @@ def test_input_shape_validation(self):
         ]
         inputs[0].set_data_from_numpy(input_data)
         # Compromised input shape
-        inputs[0].set_shape((1, expected_dim))
-
+        inputs[0].set_shape((1, input_size))
         with self.assertRaises(InferenceServerException) as e:
             triton_client.infer(
                 model_name=model_name,
@@ -151,6 +151,67 @@ def test_input_shape_validation(self):
             err_str,
         )
 
+    def test_input_string_shape_validation(self):
+        input_size = 16
+        model_name = "graphdef_object_int32_int32"
+        np_dtype_string = np.dtype(object)
+        triton_client = tritongrpcclient.InferenceServerClient("localhost:8001")
+
+        def get_input_array(input_size, np_dtype):
+            rinput_dtype = iu._range_repr_dtype(np_dtype)
+            input_array = np.random.randint(
+                low=0, high=127, size=(1, input_size), dtype=rinput_dtype
+            )
+
+            # Convert to string type
+            inn = np.array(
+                [str(x) for x in input_array.reshape(input_array.size)], dtype=object
+            )
+            input_array = inn.reshape(input_array.shape)
+
+            inputs = []
+            inputs.append(
+                tritongrpcclient.InferInput(
+                    "INPUT0", input_array.shape, np_to_triton_dtype(np_dtype)
+                )
+            )
+            inputs.append(
+                tritongrpcclient.InferInput(
+                    "INPUT1", input_array.shape, np_to_triton_dtype(np_dtype)
+                )
+            )
+
+            inputs[0].set_data_from_numpy(input_array)
+            inputs[1].set_data_from_numpy(input_array)
+            return inputs
+
+        # Input size is less than expected
+        inputs = get_input_array(input_size - 2, np_dtype_string)
+        # Compromised input shape
+        inputs[0].set_shape((1, input_size))
+        inputs[1].set_shape((1, input_size))
+        with self.assertRaises(InferenceServerException) as e:
+            triton_client.infer(model_name=model_name, inputs=inputs)
+        err_str = str(e.exception)
+        self.assertIn(
+            f"expected {input_size} strings for inference input 'INPUT1', got {input_size-2}",
+            err_str,
+        )
+
+        # Input size is greater than expected
+        inputs = get_input_array(input_size + 2, np_dtype_string)
+        # Compromised input shape
+        inputs[0].set_shape((1, input_size))
+        inputs[1].set_shape((1, input_size))
+        with self.assertRaises(InferenceServerException) as e:
+            triton_client.infer(model_name=model_name, inputs=inputs)
+        err_str = str(e.exception)
+        self.assertIn(
+            # Core will throw exception as soon as reading the "input_size+1"th byte.
+            f"unexpected number of string elements {input_size+1} for inference input 'INPUT1', expecting {input_size}",
+            err_str,
+        )
+
 
 if __name__ == "__main__":
     unittest.main()
diff --git a/qa/L0_input_validation/test.sh b/qa/L0_input_validation/test.sh
index fc1d6fd9a1..ef4a1a6d65 100755
--- a/qa/L0_input_validation/test.sh
+++ b/qa/L0_input_validation/test.sh
@@ -42,6 +42,7 @@ source ../common/util.sh
 
 RET=0
 
+DATADIR=/data/inferenceserver/${REPO_VERSION}
 SERVER=/opt/tritonserver/bin/tritonserver
 CLIENT_LOG="./input_validation_client.log"
 TEST_PY=./input_validation_test.py
@@ -95,7 +96,8 @@ if [ $? -ne 0 ]; then
     exit 1
 fi
 
-# input_validation_test
+cp -r $DATADIR/qa_model_repository/graphdef_object_int32_int32 models/.
+
 SERVER_ARGS="--model-repository=`pwd`/models  --log-verbose=1"
 run_server
 if [ "$SERVER_PID" == "0" ]; then
@@ -105,14 +107,17 @@ if [ "$SERVER_PID" == "0" ]; then
 fi
 
 set +e
-python3 $TEST_PY InputShapeTest.test_input_shape_validation >> $CLIENT_LOG 2>&1
+python3 $TEST_PY InputShapeTest >> $CLIENT_LOG 2>&1
 
 if [ $? -ne 0 ]; then
-    echo -e "\n***\n*** input_shape_validation_test.py FAILED. \n***"
+    echo -e "\n***\n*** input_validation_test.py FAILED. \n***"
     RET=1
 fi
 set -e
 
+kill $SERVER_PID
+wait $SERVER_PID
+
 if [ $RET -eq 0 ]; then
     echo -e "\n***\n*** Input Validation Test Passed\n***"
 else