triton-inference-server · dyastremsky · May 31, 2023 · Mar 21, 2023 · Mar 21, 2023 · Mar 31, 2023
diff --git a/qa/L0_backend_python/python_test.py b/qa/L0_backend_python/python_test.py
@@ -1,6 +1,6 @@
 #!/usr/bin/python
 
-# Copyright 2019-2022, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+# Copyright 2019-2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
 #
 # Redistribution and use in source and binary forms, with or without
 # modification, are permitted provided that the following conditions
@@ -40,8 +40,6 @@
 from tritonclient.utils import *
 import tritonclient.http as httpclient
 
-TEST_JETSON = bool(int(os.environ.get('TEST_JETSON', 0)))
-
 
 class PythonTest(tu.TestResultCollector):
 
@@ -113,43 +111,38 @@ def _optional_input_infer(self, model_name, has_input0, has_input1):
             np.testing.assert_equal(output1, expected_output1,
                                     "OUTPUT1 doesn't match expected OUTPUT1")
 
-    # We do not use a docker on Jetson so it does not impose a shared memory
-    # allocation limit of 1GB. This means test will pass without the expected
-    # error on jetson and is hence unnecessary.
-    if not TEST_JETSON:
-
-        def test_growth_error(self):
-            # 2 MiBs
-            total_byte_size = 2 * 1024 * 1024
-            shape = [total_byte_size]
-            model_name = 'identity_uint8_nobatch'
-            dtype = np.uint8
-            with self._shm_leak_detector.Probe() as shm_probe:
-                self._infer_help(model_name, shape, dtype)
-
-            # 1 GiB payload leads to error in the main Python backned process.
-            # Total shared memory available is 1GiB.
-            total_byte_size = 1024 * 1024 * 1024
-            shape = [total_byte_size]
-            with self.assertRaises(InferenceServerException) as ex:
-                self._infer_help(model_name, shape, dtype)
-            self.assertIn("Failed to increase the shared memory pool size",
-                          str(ex.exception))
-
-            # 512 MiBs payload leads to error in the Python stub process.
-            total_byte_size = 512 * 1024 * 1024
-            shape = [total_byte_size]
-            with self.assertRaises(InferenceServerException) as ex:
-                self._infer_help(model_name, shape, dtype)
-            self.assertIn("Failed to increase the shared memory pool size",
-                          str(ex.exception))
-
-            # 2 MiBs
-            # Send a small paylaod to make sure it is still working properly
-            total_byte_size = 2 * 1024 * 1024
-            shape = [total_byte_size]
-            with self._shm_leak_detector.Probe() as shm_probe:
-                self._infer_help(model_name, shape, dtype)
+    def test_growth_error(self):
+        # 2 MiBs
+        total_byte_size = 2 * 1024 * 1024
+        shape = [total_byte_size]
+        model_name = 'identity_uint8_nobatch'
+        dtype = np.uint8
+        with self._shm_leak_detector.Probe() as shm_probe:
+            self._infer_help(model_name, shape, dtype)
+
+        # 1 GiB payload leads to error in the main Python backned process.
+        # Total shared memory available is 1GiB.
+        total_byte_size = 1024 * 1024 * 1024
+        shape = [total_byte_size]
+        with self.assertRaises(InferenceServerException) as ex:
+            self._infer_help(model_name, shape, dtype)
+        self.assertIn("Failed to increase the shared memory pool size",
+                      str(ex.exception))
+
+        # 512 MiBs payload leads to error in the Python stub process.
+        total_byte_size = 512 * 1024 * 1024
+        shape = [total_byte_size]
+        with self.assertRaises(InferenceServerException) as ex:
+            self._infer_help(model_name, shape, dtype)
+        self.assertIn("Failed to increase the shared memory pool size",
+                      str(ex.exception))
+
+        # 2 MiBs
+        # Send a small paylaod to make sure it is still working properly
+        total_byte_size = 2 * 1024 * 1024
+        shape = [total_byte_size]
+        with self._shm_leak_detector.Probe() as shm_probe:
+            self._infer_help(model_name, shape, dtype)
 
     def test_async_infer(self):
         model_name = "identity_uint8"
@@ -189,8 +182,9 @@ def test_async_infer(self):
 
                 # Make sure the requests ran in parallel.
                 stats = client.get_inference_statistics(model_name)
-                test_cond = (len(stats['model_stats']) != 1) or (
-                    stats['model_stats'][0]['name'] != model_name)
+                test_cond = (len(stats['model_stats'])
+                             != 1) or (stats['model_stats'][0]['name']
+                                       != model_name)
                 self.assertFalse(
                     test_cond,
                     "error: expected statistics for {}".format(model_name))

diff --git a/qa/L0_backend_python/test.sh b/qa/L0_backend_python/test.sh
@@ -131,9 +131,6 @@ cp ../python_models/string_fixed/config.pbtxt ./models/string_fixed
 # Skip torch install on Jetson since it is already installed.
 if [ "$TEST_JETSON" == "0" ]; then
   pip3 install torch==1.13.0+cpu -f https://download.pytorch.org/whl/torch_stable.html
-else
-  # test_growth_error is skipped on jetson
-  EXPECTED_NUM_TESTS=8
 fi
 
 prev_num_pages=`get_shm_pages`
@@ -371,7 +368,7 @@ and shared memory pages after starting triton equals to $current_num_pages \n***
     exit 1
 fi
 
-# Disable env test for Jetson since build is non-dockerized and cloud storage repos are not supported
+# Disable env test for Jetson since cloud storage repos are not supported
 # Disable ensemble, unittest, io and bls tests for Jetson since GPU Tensors are not supported
 # Disable variants test for Jetson since already built without GPU Tensor support
 # Disable decoupled test because it uses GPU tensors

diff --git a/qa/L0_perf_nomodel/run_test.sh b/qa/L0_perf_nomodel/run_test.sh
@@ -47,19 +47,13 @@ TRITON_DIR=${TRITON_DIR:="/opt/tritonserver"}
 ARCH=${ARCH:="x86_64"}
 SERVER=${TRITON_DIR}/bin/tritonserver
 BACKEND_DIR=${TRITON_DIR}/backends
+DATADIR=${DATADIR:="/data/inferenceserver/${REPO_VERSION}"}
 MODEL_REPO="${PWD}/models"
+PERF_CLIENT=../clients/perf_client
 TF_VERSION=${TF_VERSION:=2}
 SERVER_ARGS="--model-repository=${MODEL_REPO} --backend-directory=${BACKEND_DIR} --backend-config=tensorflow,version=${TF_VERSION}"
 source ../common/util.sh
 
-# DATADIR is already set in environment variable for aarch64
-if [ "$ARCH" == "aarch64" ]; then
-    PERF_CLIENT=${TRITON_DIR}/clients/bin/perf_client
-else
-    PERF_CLIENT=../clients/perf_client
-    DATADIR=/data/inferenceserver/${REPO_VERSION}
-fi
-
 # Select the single GPU that will be available to the inference server
 export CUDA_VISIBLE_DEVICES=0
 
@@ -76,6 +70,10 @@ if [[ $BACKENDS == *"python"* ]]; then
         sed -i "s/^name:.*/name: \"python_zero_1_float32\"/" config.pbtxt)
 fi
 
+if [[ $BACKENDS == *"custom"* ]]; then
+    mkdir -p "custom_models/custom_zero_1_float32/1"
+fi
+
 PERF_CLIENT_PERCENTILE_ARGS="" &&
     (( ${PERF_CLIENT_PERCENTILE} != 0 )) &&
     PERF_CLIENT_PERCENTILE_ARGS="--percentile=${PERF_CLIENT_PERCENTILE}"

diff --git a/qa/L0_perf_resnet/run_test.sh b/qa/L0_perf_resnet/run_test.sh
@@ -54,15 +54,13 @@ rm -fr models && mkdir -p models && \
             sed -i "s/^max_batch_size:.*/max_batch_size: ${MAX_BATCH}/" config.pbtxt && \
             echo "instance_group [ { count: ${INSTANCE_CNT} }]")
 
-# Onnx and onnx-trt models are very slow on Jetson.
 MEASUREMENT_WINDOW=5000
+PERF_CLIENT=../clients/perf_client
+# Onnx and onnx-trt models are very slow on Jetson.
 if [ "$ARCH" == "aarch64" ]; then
-    PERF_CLIENT=${TRITON_DIR}/clients/bin/perf_client
     if [ "$MODEL_FRAMEWORK" == "onnx" ] || [ "$MODEL_FRAMEWORK" == "onnx_trt" ]; then
         MEASUREMENT_WINDOW=20000
     fi
-else
-    PERF_CLIENT=../clients/perf_client
 fi
 
 # Overload use of PERF_CLIENT_PROTOCOL for convenience with existing test and