From 0a1b2a3e639b12773aa3e3f73cc1afe64c1018a7 Mon Sep 17 00:00:00 2001 From: Iman Tabrizian Date: Mon, 29 May 2023 18:39:42 -0400 Subject: [PATCH] Add testing for GPU tensor error handling --- qa/L0_backend_python/python_test.py | 66 +++++++++++++++++++++++++++++ qa/L0_backend_python/test.sh | 4 ++ 2 files changed, 70 insertions(+) diff --git a/qa/L0_backend_python/python_test.py b/qa/L0_backend_python/python_test.py index 49413bce55..2520b8cb4b 100644 --- a/qa/L0_backend_python/python_test.py +++ b/qa/L0_backend_python/python_test.py @@ -38,6 +38,7 @@ import os from tritonclient.utils import * +import tritonclient.utils.cuda_shared_memory as cuda_shared_memory import tritonclient.http as httpclient @@ -59,6 +60,13 @@ def _infer_help(self, model_name, shape, data_type): output0 = result.as_numpy('OUTPUT0') self.assertTrue(np.all(input_data_0 == output0)) + def _create_cuda_region(self, client, size, name): + shm0_handle = cuda_shared_memory.create_shared_memory_region( + name, byte_size=size, device_id=0) + client.register_cuda_shared_memory( + name, cuda_shared_memory.get_raw_handle(shm0_handle), 0, size) + return shm0_handle + def _optional_input_infer(self, model_name, has_input0, has_input1): with httpclient.InferenceServerClient("localhost:8000") as client: shape = (1,) @@ -144,6 +152,64 @@ def test_growth_error(self): with self._shm_leak_detector.Probe() as shm_probe: self._infer_help(model_name, shape, dtype) + # CUDA Shared memory is not supported on jetson + def test_gpu_tensor_error(self): + model_name = 'identity_bool' + with httpclient.InferenceServerClient("localhost:8000") as client: + input_data = np.array([[True] * 1000], dtype=bool) + inputs = [ + httpclient.InferInput("INPUT0", input_data.shape, + np_to_triton_dtype(input_data.dtype)) + ] + inputs[0].set_data_from_numpy(input_data) + + requested_outputs = [httpclient.InferRequestedOutput('OUTPUT0')] + + # intentionally create a shared memory region with not enough size. + client.unregister_cuda_shared_memory() + shm0_handle = self._create_cuda_region(client, 1, + 'output0_data') + + requested_outputs[0].set_shared_memory('output0_data', 1) + with self.assertRaises(InferenceServerException) as ex: + client.infer(model_name, inputs, outputs=requested_outputs) + self.assertIn( + "should be at least 1000 bytes to hold the results", + str(ex.exception)) + client.unregister_cuda_shared_memory() + cuda_shared_memory.destroy_shared_memory_region(shm0_handle) + + def test_dlpack_tensor_error(self): + model_name = 'dlpack_identity' + with httpclient.InferenceServerClient("localhost:8000") as client: + input_data = np.array([[1] * 1000], dtype=np.float32) + inputs = [ + httpclient.InferInput("INPUT0", input_data.shape, + np_to_triton_dtype(input_data.dtype)) + ] + + requested_outputs = [httpclient.InferRequestedOutput('OUTPUT0')] + input_data_size = input_data.itemsize * input_data.size + client.unregister_cuda_shared_memory() + input_region = self._create_cuda_region(client, input_data_size, + 'input0_data') + inputs[0].set_shared_memory('input0_data', input_data_size) + cuda_shared_memory.set_shared_memory_region( + input_region, [input_data]) + + # Intentionally create a small region to trigger an error + shm0_handle = self._create_cuda_region(client, 1, + 'output0_data') + requested_outputs[0].set_shared_memory('output0_data', 1) + + with self.assertRaises(InferenceServerException) as ex: + client.infer(model_name, inputs, outputs=requested_outputs) + self.assertIn( + "should be at least 4000 bytes to hold the results", + str(ex.exception)) + client.unregister_cuda_shared_memory() + cuda_shared_memory.destroy_shared_memory_region(shm0_handle) + def test_async_infer(self): model_name = "identity_uint8" request_parallelism = 4 diff --git a/qa/L0_backend_python/test.sh b/qa/L0_backend_python/test.sh index 587d1b8e13..659ddf18d2 100755 --- a/qa/L0_backend_python/test.sh +++ b/qa/L0_backend_python/test.sh @@ -128,6 +128,10 @@ mkdir -p models/string_fixed/1/ cp ../python_models/string_fixed/model.py ./models/string_fixed/1/ cp ../python_models/string_fixed/config.pbtxt ./models/string_fixed +mkdir -p models/dlpack_identity/1/ +cp ../python_models/dlpack_identity/model.py ./models/dlpack_identity/1/ +cp ../python_models/dlpack_identity/config.pbtxt ./models/dlpack_identity + # Skip torch install on Jetson since it is already installed. if [ "$TEST_JETSON" == "0" ]; then pip3 install torch==1.13.0+cpu -f https://download.pytorch.org/whl/torch_stable.html