From f9d3f48ca4dc62ea46b8caceb30b64637fd1a029 Mon Sep 17 00:00:00 2001
From: David Yastremsky <dyastremsky@nvidia.com>
Date: Mon, 20 Mar 2023 18:02:41 -0700
Subject: [PATCH 01/11] Add flags for ORT build

---
 build.py | 7 ++++++-
 1 file changed, 6 insertions(+), 1 deletion(-)

diff --git a/build.py b/build.py
index a6bb111520..ce65a8aa61 100755
--- a/build.py
+++ b/build.py
@@ -670,8 +670,13 @@ def onnxruntime_cmake_args(images, library_paths):
                               None, ort_include_path),
             cmake_backend_arg('onnxruntime', 'TRITON_ONNXRUNTIME_LIB_PATHS',
                               None, ort_lib_path),
+            cmake_backend_arg('onnxruntime', 'TRITON_BUILD_PLATFORM',
+                                    None, 'jetpack')
+            # Jetson build needs the CUDA compiler to be found
+            cmake_backend_arg('onnxruntime', 'CMAKE_CUDA_COMPILER',
+                                      None, '$(which nvcc)')
             cmake_backend_enable('onnxruntime',
-                                 'TRITON_ENABLE_ONNXRUNTIME_OPENVINO', False)
+                                 'TRITON_ENABLE_ONNXRUNTIME_OPENVINO', False),
         ]
     else:
         if target_platform() == 'windows':

From 9f0054e7476c6cece4b2852e55daf6ce84d13997 Mon Sep 17 00:00:00 2001
From: David Yastremsky <dyastremsky@nvidia.com>
Date: Mon, 20 Mar 2023 19:47:08 -0700
Subject: [PATCH 02/11] Separate list with commas

---
 build.py | 9 +++++----
 1 file changed, 5 insertions(+), 4 deletions(-)

diff --git a/build.py b/build.py
index ce65a8aa61..e84a53dc50 100755
--- a/build.py
+++ b/build.py
@@ -670,11 +670,12 @@ def onnxruntime_cmake_args(images, library_paths):
                               None, ort_include_path),
             cmake_backend_arg('onnxruntime', 'TRITON_ONNXRUNTIME_LIB_PATHS',
                               None, ort_lib_path),
-            cmake_backend_arg('onnxruntime', 'TRITON_BUILD_PLATFORM',
-                                    None, 'jetpack')
+            cmake_backend_arg('onnxruntime', 'TRITON_BUILD_PLATFORM', None,
+                              'jetpack'),
+            # TODO: Remove if not needed, else put back if arch_90 error
             # Jetson build needs the CUDA compiler to be found
-            cmake_backend_arg('onnxruntime', 'CMAKE_CUDA_COMPILER',
-                                      None, '$(which nvcc)')
+            # cmake_backend_arg('onnxruntime', 'CMAKE_CUDA_COMPILER', None,
+            #                   '$(which nvcc)'),
             cmake_backend_enable('onnxruntime',
                                  'TRITON_ENABLE_ONNXRUNTIME_OPENVINO', False),
         ]

From e54f0c2023ddfcc43a6f5b0abbd0220b05a3649d Mon Sep 17 00:00:00 2001
From: David Yastremsky <dyastremsky@nvidia.com>
Date: Fri, 31 Mar 2023 15:22:21 -0700
Subject: [PATCH 03/11] Remove unnecessary detection of nvcc compiler

---
 build.py | 4 ----
 1 file changed, 4 deletions(-)

diff --git a/build.py b/build.py
index e84a53dc50..c50baab03f 100755
--- a/build.py
+++ b/build.py
@@ -672,10 +672,6 @@ def onnxruntime_cmake_args(images, library_paths):
                               None, ort_lib_path),
             cmake_backend_arg('onnxruntime', 'TRITON_BUILD_PLATFORM', None,
                               'jetpack'),
-            # TODO: Remove if not needed, else put back if arch_90 error
-            # Jetson build needs the CUDA compiler to be found
-            # cmake_backend_arg('onnxruntime', 'CMAKE_CUDA_COMPILER', None,
-            #                   '$(which nvcc)'),
             cmake_backend_enable('onnxruntime',
                                  'TRITON_ENABLE_ONNXRUNTIME_OPENVINO', False),
         ]

From bc5999c3bd2f08c111bafed0d34aacd01a3d1a62 Mon Sep 17 00:00:00 2001
From: David Yastremsky <dyastremsky@nvidia.com>
Date: Tue, 11 Apr 2023 15:25:20 -0700
Subject: [PATCH 04/11] Fixed Jetson path for perf_client, datadir

---
 qa/L0_perf_nomodel/run_test.sh | 10 ++--------
 qa/L0_perf_resnet/run_test.sh  |  6 ++----
 2 files changed, 4 insertions(+), 12 deletions(-)

diff --git a/qa/L0_perf_nomodel/run_test.sh b/qa/L0_perf_nomodel/run_test.sh
index b427c33750..1c5944291a 100755
--- a/qa/L0_perf_nomodel/run_test.sh
+++ b/qa/L0_perf_nomodel/run_test.sh
@@ -47,19 +47,13 @@ TRITON_DIR=${TRITON_DIR:="/opt/tritonserver"}
 ARCH=${ARCH:="x86_64"}
 SERVER=${TRITON_DIR}/bin/tritonserver
 BACKEND_DIR=${TRITON_DIR}/backends
+DATADIR=${DATADIR:="/data/inferenceserver/${REPO_VERSION}"}
 MODEL_REPO="${PWD}/models"
+PERF_CLIENT=../clients/perf_client
 TF_VERSION=${TF_VERSION:=2}
 SERVER_ARGS="--model-repository=${MODEL_REPO} --backend-directory=${BACKEND_DIR} --backend-config=tensorflow,version=${TF_VERSION}"
 source ../common/util.sh
 
-# DATADIR is already set in environment variable for aarch64
-if [ "$ARCH" == "aarch64" ]; then
-    PERF_CLIENT=${TRITON_DIR}/clients/bin/perf_client
-else
-    PERF_CLIENT=../clients/perf_client
-    DATADIR=/data/inferenceserver/${REPO_VERSION}
-fi
-
 # Select the single GPU that will be available to the inference server
 export CUDA_VISIBLE_DEVICES=0
 
diff --git a/qa/L0_perf_resnet/run_test.sh b/qa/L0_perf_resnet/run_test.sh
index c3ab67d964..bbd9b33c42 100755
--- a/qa/L0_perf_resnet/run_test.sh
+++ b/qa/L0_perf_resnet/run_test.sh
@@ -54,15 +54,13 @@ rm -fr models && mkdir -p models && \
             sed -i "s/^max_batch_size:.*/max_batch_size: ${MAX_BATCH}/" config.pbtxt && \
             echo "instance_group [ { count: ${INSTANCE_CNT} }]")
 
-# Onnx and onnx-trt models are very slow on Jetson.
 MEASUREMENT_WINDOW=5000
+PERF_CLIENT=../clients/perf_client
+# Onnx and onnx-trt models are very slow on Jetson.
 if [ "$ARCH" == "aarch64" ]; then
-    PERF_CLIENT=${TRITON_DIR}/clients/bin/perf_client
     if [ "$MODEL_FRAMEWORK" == "onnx" ] || [ "$MODEL_FRAMEWORK" == "onnx_trt" ]; then
         MEASUREMENT_WINDOW=20000
     fi
-else
-    PERF_CLIENT=../clients/perf_client
 fi
 
 # Overload use of PERF_CLIENT_PROTOCOL for convenience with existing test and 

From 4ab6398aa3c4855fa056ce69313f3eea79e02ff0 Mon Sep 17 00:00:00 2001
From: David Yastremsky <dyastremsky@nvidia.com>
Date: Wed, 12 Apr 2023 09:35:55 -0700
Subject: [PATCH 05/11] Create version directoryy for custom model

---
 qa/L0_perf_nomodel/run_test.sh | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/qa/L0_perf_nomodel/run_test.sh b/qa/L0_perf_nomodel/run_test.sh
index 1c5944291a..f901777d25 100755
--- a/qa/L0_perf_nomodel/run_test.sh
+++ b/qa/L0_perf_nomodel/run_test.sh
@@ -70,6 +70,10 @@ if [[ $BACKENDS == *"python"* ]]; then
         sed -i "s/^name:.*/name: \"python_zero_1_float32\"/" config.pbtxt)
 fi
 
+if [[ $BACKENDS == *"custom"* ]]; then
+    mkdir -p "custom_models/custom_zero_1_float32/1"
+fi
+
 PERF_CLIENT_PERCENTILE_ARGS="" &&
     (( ${PERF_CLIENT_PERCENTILE} != 0 )) &&
     PERF_CLIENT_PERCENTILE_ARGS="--percentile=${PERF_CLIENT_PERCENTILE}"

From 3a1571743a7b6ed2ab52121dafc479ecfb9e9545 Mon Sep 17 00:00:00 2001
From: David Yastremsky <dyastremsky@nvidia.com>
Date: Tue, 2 May 2023 09:19:24 -0700
Subject: [PATCH 06/11] Remove probe check for shm, add shm exceed error for
 Jetson

---
 qa/L0_backend_python/python_test.py | 76 +++++++++++++----------------
 1 file changed, 35 insertions(+), 41 deletions(-)

diff --git a/qa/L0_backend_python/python_test.py b/qa/L0_backend_python/python_test.py
index 2c7d4f8722..7943fadfbb 100644
--- a/qa/L0_backend_python/python_test.py
+++ b/qa/L0_backend_python/python_test.py
@@ -40,8 +40,6 @@
 from tritonclient.utils import *
 import tritonclient.http as httpclient
 
-TEST_JETSON = bool(int(os.environ.get('TEST_JETSON', 0)))
-
 
 class PythonTest(tu.TestResultCollector):
 
@@ -113,43 +111,38 @@ def _optional_input_infer(self, model_name, has_input0, has_input1):
             np.testing.assert_equal(output1, expected_output1,
                                     "OUTPUT1 doesn't match expected OUTPUT1")
 
-    # We do not use a docker on Jetson so it does not impose a shared memory
-    # allocation limit of 1GB. This means test will pass without the expected
-    # error on jetson and is hence unnecessary.
-    if not TEST_JETSON:
-
-        def test_growth_error(self):
-            # 2 MiBs
-            total_byte_size = 2 * 1024 * 1024
-            shape = [total_byte_size]
-            model_name = 'identity_uint8_nobatch'
-            dtype = np.uint8
-            with self._shm_leak_detector.Probe() as shm_probe:
-                self._infer_help(model_name, shape, dtype)
-
-            # 1 GiB payload leads to error in the main Python backned process.
-            # Total shared memory available is 1GiB.
-            total_byte_size = 1024 * 1024 * 1024
-            shape = [total_byte_size]
-            with self.assertRaises(InferenceServerException) as ex:
-                self._infer_help(model_name, shape, dtype)
-            self.assertIn("Failed to increase the shared memory pool size",
-                          str(ex.exception))
-
-            # 512 MiBs payload leads to error in the Python stub process.
-            total_byte_size = 512 * 1024 * 1024
-            shape = [total_byte_size]
-            with self.assertRaises(InferenceServerException) as ex:
-                self._infer_help(model_name, shape, dtype)
-            self.assertIn("Failed to increase the shared memory pool size",
-                          str(ex.exception))
-
-            # 2 MiBs
-            # Send a small paylaod to make sure it is still working properly
-            total_byte_size = 2 * 1024 * 1024
-            shape = [total_byte_size]
-            with self._shm_leak_detector.Probe() as shm_probe:
-                self._infer_help(model_name, shape, dtype)
+    def test_growth_error(self):
+        # 2 MiBs
+        total_byte_size = 2 * 1024 * 1024
+        shape = [total_byte_size]
+        model_name = 'identity_uint8_nobatch'
+        dtype = np.uint8
+        with self._shm_leak_detector.Probe() as shm_probe:
+            self._infer_help(model_name, shape, dtype)
+
+        # 1 GiB payload leads to error in the main Python backned process.
+        # Total shared memory available is 1GiB.
+        total_byte_size = 1024 * 1024 * 1024
+        shape = [total_byte_size]
+        with self.assertRaises(InferenceServerException) as ex:
+            self._infer_help(model_name, shape, dtype)
+        self.assertIn("Failed to increase the shared memory pool size",
+                      str(ex.exception))
+
+        # 512 MiBs payload leads to error in the Python stub process.
+        total_byte_size = 512 * 1024 * 1024
+        shape = [total_byte_size]
+        with self.assertRaises(InferenceServerException) as ex:
+            self._infer_help(model_name, shape, dtype)
+        self.assertIn("Failed to increase the shared memory pool size",
+                      str(ex.exception))
+
+        # 2 MiBs
+        # Send a small paylaod to make sure it is still working properly
+        total_byte_size = 2 * 1024 * 1024
+        shape = [total_byte_size]
+        with self._shm_leak_detector.Probe() as shm_probe:
+            self._infer_help(model_name, shape, dtype)
 
     def test_async_infer(self):
         model_name = "identity_uint8"
@@ -189,8 +182,9 @@ def test_async_infer(self):
 
                 # Make sure the requests ran in parallel.
                 stats = client.get_inference_statistics(model_name)
-                test_cond = (len(stats['model_stats']) != 1) or (
-                    stats['model_stats'][0]['name'] != model_name)
+                test_cond = (len(stats['model_stats'])
+                             != 1) or (stats['model_stats'][0]['name']
+                                       != model_name)
                 self.assertFalse(
                     test_cond,
                     "error: expected statistics for {}".format(model_name))

From 4aae0307391fe86ef96cdbf08ddefd4e294071b8 Mon Sep 17 00:00:00 2001
From: David Yastremsky <dyastremsky@nvidia.com>
Date: Tue, 2 May 2023 10:20:33 -0700
Subject: [PATCH 07/11] Copyright updates, fix Jetson Probe

---
 qa/L0_backend_python/python_test.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/qa/L0_backend_python/python_test.py b/qa/L0_backend_python/python_test.py
index 7943fadfbb..49413bce55 100644
--- a/qa/L0_backend_python/python_test.py
+++ b/qa/L0_backend_python/python_test.py
@@ -1,6 +1,6 @@
 #!/usr/bin/python
 
-# Copyright 2019-2022, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+# Copyright 2019-2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
 #
 # Redistribution and use in source and binary forms, with or without
 # modification, are permitted provided that the following conditions

From b929dccccdef9d7bc85c1b525f20fb76ba4ba522 Mon Sep 17 00:00:00 2001
From: David Yastremsky <dyastremsky@nvidia.com>
Date: Tue, 2 May 2023 11:34:31 -0700
Subject: [PATCH 08/11] Fix be_python test num on Jetson

---
 qa/L0_backend_python/test.sh | 3 ---
 1 file changed, 3 deletions(-)

diff --git a/qa/L0_backend_python/test.sh b/qa/L0_backend_python/test.sh
index 4af090f411..3b4cf75eab 100755
--- a/qa/L0_backend_python/test.sh
+++ b/qa/L0_backend_python/test.sh
@@ -131,9 +131,6 @@ cp ../python_models/string_fixed/config.pbtxt ./models/string_fixed
 # Skip torch install on Jetson since it is already installed.
 if [ "$TEST_JETSON" == "0" ]; then
   pip3 install torch==1.13.0+cpu -f https://download.pytorch.org/whl/torch_stable.html
-else
-  # test_growth_error is skipped on jetson
-  EXPECTED_NUM_TESTS=8
 fi
 
 prev_num_pages=`get_shm_pages`

From f790956051550476a7bf6f52d419d1d187b3f418 Mon Sep 17 00:00:00 2001
From: David Yastremsky <dyastremsky@nvidia.com>
Date: Wed, 3 May 2023 13:39:06 -0700
Subject: [PATCH 09/11] Remove extra comma, non-Dockerized Jetson comment

---
 build.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/build.py b/build.py
index c50baab03f..81a8aebd55 100755
--- a/build.py
+++ b/build.py
@@ -673,7 +673,7 @@ def onnxruntime_cmake_args(images, library_paths):
             cmake_backend_arg('onnxruntime', 'TRITON_BUILD_PLATFORM', None,
                               'jetpack'),
             cmake_backend_enable('onnxruntime',
-                                 'TRITON_ENABLE_ONNXRUNTIME_OPENVINO', False),
+                                 'TRITON_ENABLE_ONNXRUNTIME_OPENVINO', False)
         ]
     else:
         if target_platform() == 'windows':

From f95fae10730a7cf218a50af68226fdf814694b8d Mon Sep 17 00:00:00 2001
From: David Yastremsky <dyastremsky@nvidia.com>
Date: Fri, 19 May 2023 11:34:44 -0700
Subject: [PATCH 10/11] Remove comment about Jetson being non-dockerized

---
 qa/L0_backend_python/test.sh | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/qa/L0_backend_python/test.sh b/qa/L0_backend_python/test.sh
index 3b4cf75eab..587d1b8e13 100755
--- a/qa/L0_backend_python/test.sh
+++ b/qa/L0_backend_python/test.sh
@@ -368,7 +368,7 @@ and shared memory pages after starting triton equals to $current_num_pages \n***
     exit 1
 fi
 
-# Disable env test for Jetson since build is non-dockerized and cloud storage repos are not supported
+# Disable env test for Jetson since cloud storage repos are not supported
 # Disable ensemble, unittest, io and bls tests for Jetson since GPU Tensors are not supported
 # Disable variants test for Jetson since already built without GPU Tensor support
 # Disable decoupled test because it uses GPU tensors

From 20ce47ac5fa01590838ae468841be7eb95cc6636 Mon Sep 17 00:00:00 2001
From: David Yastremsky <dyastremsky@nvidia.com>
Date: Thu, 25 May 2023 15:48:42 -0700
Subject: [PATCH 11/11] Remove no longer needed flag

---
 build.py | 2 --
 1 file changed, 2 deletions(-)

diff --git a/build.py b/build.py
index 81a8aebd55..a6bb111520 100755
--- a/build.py
+++ b/build.py
@@ -670,8 +670,6 @@ def onnxruntime_cmake_args(images, library_paths):
                               None, ort_include_path),
             cmake_backend_arg('onnxruntime', 'TRITON_ONNXRUNTIME_LIB_PATHS',
                               None, ort_lib_path),
-            cmake_backend_arg('onnxruntime', 'TRITON_BUILD_PLATFORM', None,
-                              'jetpack'),
             cmake_backend_enable('onnxruntime',
                                  'TRITON_ENABLE_ONNXRUNTIME_OPENVINO', False)
         ]