From f9d3f48ca4dc62ea46b8caceb30b64637fd1a029 Mon Sep 17 00:00:00 2001 From: David Yastremsky Date: Mon, 20 Mar 2023 18:02:41 -0700 Subject: [PATCH 01/11] Add flags for ORT build --- build.py | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/build.py b/build.py index a6bb111520..ce65a8aa61 100755 --- a/build.py +++ b/build.py @@ -670,8 +670,13 @@ def onnxruntime_cmake_args(images, library_paths): None, ort_include_path), cmake_backend_arg('onnxruntime', 'TRITON_ONNXRUNTIME_LIB_PATHS', None, ort_lib_path), + cmake_backend_arg('onnxruntime', 'TRITON_BUILD_PLATFORM', + None, 'jetpack') + # Jetson build needs the CUDA compiler to be found + cmake_backend_arg('onnxruntime', 'CMAKE_CUDA_COMPILER', + None, '$(which nvcc)') cmake_backend_enable('onnxruntime', - 'TRITON_ENABLE_ONNXRUNTIME_OPENVINO', False) + 'TRITON_ENABLE_ONNXRUNTIME_OPENVINO', False), ] else: if target_platform() == 'windows': From 9f0054e7476c6cece4b2852e55daf6ce84d13997 Mon Sep 17 00:00:00 2001 From: David Yastremsky Date: Mon, 20 Mar 2023 19:47:08 -0700 Subject: [PATCH 02/11] Separate list with commas --- build.py | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/build.py b/build.py index ce65a8aa61..e84a53dc50 100755 --- a/build.py +++ b/build.py @@ -670,11 +670,12 @@ def onnxruntime_cmake_args(images, library_paths): None, ort_include_path), cmake_backend_arg('onnxruntime', 'TRITON_ONNXRUNTIME_LIB_PATHS', None, ort_lib_path), - cmake_backend_arg('onnxruntime', 'TRITON_BUILD_PLATFORM', - None, 'jetpack') + cmake_backend_arg('onnxruntime', 'TRITON_BUILD_PLATFORM', None, + 'jetpack'), + # TODO: Remove if not needed, else put back if arch_90 error # Jetson build needs the CUDA compiler to be found - cmake_backend_arg('onnxruntime', 'CMAKE_CUDA_COMPILER', - None, '$(which nvcc)') + # cmake_backend_arg('onnxruntime', 'CMAKE_CUDA_COMPILER', None, + # '$(which nvcc)'), cmake_backend_enable('onnxruntime', 'TRITON_ENABLE_ONNXRUNTIME_OPENVINO', False), ] From e54f0c2023ddfcc43a6f5b0abbd0220b05a3649d Mon Sep 17 00:00:00 2001 From: David Yastremsky Date: Fri, 31 Mar 2023 15:22:21 -0700 Subject: [PATCH 03/11] Remove unnecessary detection of nvcc compiler --- build.py | 4 ---- 1 file changed, 4 deletions(-) diff --git a/build.py b/build.py index e84a53dc50..c50baab03f 100755 --- a/build.py +++ b/build.py @@ -672,10 +672,6 @@ def onnxruntime_cmake_args(images, library_paths): None, ort_lib_path), cmake_backend_arg('onnxruntime', 'TRITON_BUILD_PLATFORM', None, 'jetpack'), - # TODO: Remove if not needed, else put back if arch_90 error - # Jetson build needs the CUDA compiler to be found - # cmake_backend_arg('onnxruntime', 'CMAKE_CUDA_COMPILER', None, - # '$(which nvcc)'), cmake_backend_enable('onnxruntime', 'TRITON_ENABLE_ONNXRUNTIME_OPENVINO', False), ] From bc5999c3bd2f08c111bafed0d34aacd01a3d1a62 Mon Sep 17 00:00:00 2001 From: David Yastremsky Date: Tue, 11 Apr 2023 15:25:20 -0700 Subject: [PATCH 04/11] Fixed Jetson path for perf_client, datadir --- qa/L0_perf_nomodel/run_test.sh | 10 ++-------- qa/L0_perf_resnet/run_test.sh | 6 ++---- 2 files changed, 4 insertions(+), 12 deletions(-) diff --git a/qa/L0_perf_nomodel/run_test.sh b/qa/L0_perf_nomodel/run_test.sh index b427c33750..1c5944291a 100755 --- a/qa/L0_perf_nomodel/run_test.sh +++ b/qa/L0_perf_nomodel/run_test.sh @@ -47,19 +47,13 @@ TRITON_DIR=${TRITON_DIR:="/opt/tritonserver"} ARCH=${ARCH:="x86_64"} SERVER=${TRITON_DIR}/bin/tritonserver BACKEND_DIR=${TRITON_DIR}/backends +DATADIR=${DATADIR:="/data/inferenceserver/${REPO_VERSION}"} MODEL_REPO="${PWD}/models" +PERF_CLIENT=../clients/perf_client TF_VERSION=${TF_VERSION:=2} SERVER_ARGS="--model-repository=${MODEL_REPO} --backend-directory=${BACKEND_DIR} --backend-config=tensorflow,version=${TF_VERSION}" source ../common/util.sh -# DATADIR is already set in environment variable for aarch64 -if [ "$ARCH" == "aarch64" ]; then - PERF_CLIENT=${TRITON_DIR}/clients/bin/perf_client -else - PERF_CLIENT=../clients/perf_client - DATADIR=/data/inferenceserver/${REPO_VERSION} -fi - # Select the single GPU that will be available to the inference server export CUDA_VISIBLE_DEVICES=0 diff --git a/qa/L0_perf_resnet/run_test.sh b/qa/L0_perf_resnet/run_test.sh index c3ab67d964..bbd9b33c42 100755 --- a/qa/L0_perf_resnet/run_test.sh +++ b/qa/L0_perf_resnet/run_test.sh @@ -54,15 +54,13 @@ rm -fr models && mkdir -p models && \ sed -i "s/^max_batch_size:.*/max_batch_size: ${MAX_BATCH}/" config.pbtxt && \ echo "instance_group [ { count: ${INSTANCE_CNT} }]") -# Onnx and onnx-trt models are very slow on Jetson. MEASUREMENT_WINDOW=5000 +PERF_CLIENT=../clients/perf_client +# Onnx and onnx-trt models are very slow on Jetson. if [ "$ARCH" == "aarch64" ]; then - PERF_CLIENT=${TRITON_DIR}/clients/bin/perf_client if [ "$MODEL_FRAMEWORK" == "onnx" ] || [ "$MODEL_FRAMEWORK" == "onnx_trt" ]; then MEASUREMENT_WINDOW=20000 fi -else - PERF_CLIENT=../clients/perf_client fi # Overload use of PERF_CLIENT_PROTOCOL for convenience with existing test and From 4ab6398aa3c4855fa056ce69313f3eea79e02ff0 Mon Sep 17 00:00:00 2001 From: David Yastremsky Date: Wed, 12 Apr 2023 09:35:55 -0700 Subject: [PATCH 05/11] Create version directoryy for custom model --- qa/L0_perf_nomodel/run_test.sh | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/qa/L0_perf_nomodel/run_test.sh b/qa/L0_perf_nomodel/run_test.sh index 1c5944291a..f901777d25 100755 --- a/qa/L0_perf_nomodel/run_test.sh +++ b/qa/L0_perf_nomodel/run_test.sh @@ -70,6 +70,10 @@ if [[ $BACKENDS == *"python"* ]]; then sed -i "s/^name:.*/name: \"python_zero_1_float32\"/" config.pbtxt) fi +if [[ $BACKENDS == *"custom"* ]]; then + mkdir -p "custom_models/custom_zero_1_float32/1" +fi + PERF_CLIENT_PERCENTILE_ARGS="" && (( ${PERF_CLIENT_PERCENTILE} != 0 )) && PERF_CLIENT_PERCENTILE_ARGS="--percentile=${PERF_CLIENT_PERCENTILE}" From 3a1571743a7b6ed2ab52121dafc479ecfb9e9545 Mon Sep 17 00:00:00 2001 From: David Yastremsky Date: Tue, 2 May 2023 09:19:24 -0700 Subject: [PATCH 06/11] Remove probe check for shm, add shm exceed error for Jetson --- qa/L0_backend_python/python_test.py | 76 +++++++++++++---------------- 1 file changed, 35 insertions(+), 41 deletions(-) diff --git a/qa/L0_backend_python/python_test.py b/qa/L0_backend_python/python_test.py index 2c7d4f8722..7943fadfbb 100644 --- a/qa/L0_backend_python/python_test.py +++ b/qa/L0_backend_python/python_test.py @@ -40,8 +40,6 @@ from tritonclient.utils import * import tritonclient.http as httpclient -TEST_JETSON = bool(int(os.environ.get('TEST_JETSON', 0))) - class PythonTest(tu.TestResultCollector): @@ -113,43 +111,38 @@ def _optional_input_infer(self, model_name, has_input0, has_input1): np.testing.assert_equal(output1, expected_output1, "OUTPUT1 doesn't match expected OUTPUT1") - # We do not use a docker on Jetson so it does not impose a shared memory - # allocation limit of 1GB. This means test will pass without the expected - # error on jetson and is hence unnecessary. - if not TEST_JETSON: - - def test_growth_error(self): - # 2 MiBs - total_byte_size = 2 * 1024 * 1024 - shape = [total_byte_size] - model_name = 'identity_uint8_nobatch' - dtype = np.uint8 - with self._shm_leak_detector.Probe() as shm_probe: - self._infer_help(model_name, shape, dtype) - - # 1 GiB payload leads to error in the main Python backned process. - # Total shared memory available is 1GiB. - total_byte_size = 1024 * 1024 * 1024 - shape = [total_byte_size] - with self.assertRaises(InferenceServerException) as ex: - self._infer_help(model_name, shape, dtype) - self.assertIn("Failed to increase the shared memory pool size", - str(ex.exception)) - - # 512 MiBs payload leads to error in the Python stub process. - total_byte_size = 512 * 1024 * 1024 - shape = [total_byte_size] - with self.assertRaises(InferenceServerException) as ex: - self._infer_help(model_name, shape, dtype) - self.assertIn("Failed to increase the shared memory pool size", - str(ex.exception)) - - # 2 MiBs - # Send a small paylaod to make sure it is still working properly - total_byte_size = 2 * 1024 * 1024 - shape = [total_byte_size] - with self._shm_leak_detector.Probe() as shm_probe: - self._infer_help(model_name, shape, dtype) + def test_growth_error(self): + # 2 MiBs + total_byte_size = 2 * 1024 * 1024 + shape = [total_byte_size] + model_name = 'identity_uint8_nobatch' + dtype = np.uint8 + with self._shm_leak_detector.Probe() as shm_probe: + self._infer_help(model_name, shape, dtype) + + # 1 GiB payload leads to error in the main Python backned process. + # Total shared memory available is 1GiB. + total_byte_size = 1024 * 1024 * 1024 + shape = [total_byte_size] + with self.assertRaises(InferenceServerException) as ex: + self._infer_help(model_name, shape, dtype) + self.assertIn("Failed to increase the shared memory pool size", + str(ex.exception)) + + # 512 MiBs payload leads to error in the Python stub process. + total_byte_size = 512 * 1024 * 1024 + shape = [total_byte_size] + with self.assertRaises(InferenceServerException) as ex: + self._infer_help(model_name, shape, dtype) + self.assertIn("Failed to increase the shared memory pool size", + str(ex.exception)) + + # 2 MiBs + # Send a small paylaod to make sure it is still working properly + total_byte_size = 2 * 1024 * 1024 + shape = [total_byte_size] + with self._shm_leak_detector.Probe() as shm_probe: + self._infer_help(model_name, shape, dtype) def test_async_infer(self): model_name = "identity_uint8" @@ -189,8 +182,9 @@ def test_async_infer(self): # Make sure the requests ran in parallel. stats = client.get_inference_statistics(model_name) - test_cond = (len(stats['model_stats']) != 1) or ( - stats['model_stats'][0]['name'] != model_name) + test_cond = (len(stats['model_stats']) + != 1) or (stats['model_stats'][0]['name'] + != model_name) self.assertFalse( test_cond, "error: expected statistics for {}".format(model_name)) From 4aae0307391fe86ef96cdbf08ddefd4e294071b8 Mon Sep 17 00:00:00 2001 From: David Yastremsky Date: Tue, 2 May 2023 10:20:33 -0700 Subject: [PATCH 07/11] Copyright updates, fix Jetson Probe --- qa/L0_backend_python/python_test.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/qa/L0_backend_python/python_test.py b/qa/L0_backend_python/python_test.py index 7943fadfbb..49413bce55 100644 --- a/qa/L0_backend_python/python_test.py +++ b/qa/L0_backend_python/python_test.py @@ -1,6 +1,6 @@ #!/usr/bin/python -# Copyright 2019-2022, NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# Copyright 2019-2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved. # # Redistribution and use in source and binary forms, with or without # modification, are permitted provided that the following conditions From b929dccccdef9d7bc85c1b525f20fb76ba4ba522 Mon Sep 17 00:00:00 2001 From: David Yastremsky Date: Tue, 2 May 2023 11:34:31 -0700 Subject: [PATCH 08/11] Fix be_python test num on Jetson --- qa/L0_backend_python/test.sh | 3 --- 1 file changed, 3 deletions(-) diff --git a/qa/L0_backend_python/test.sh b/qa/L0_backend_python/test.sh index 4af090f411..3b4cf75eab 100755 --- a/qa/L0_backend_python/test.sh +++ b/qa/L0_backend_python/test.sh @@ -131,9 +131,6 @@ cp ../python_models/string_fixed/config.pbtxt ./models/string_fixed # Skip torch install on Jetson since it is already installed. if [ "$TEST_JETSON" == "0" ]; then pip3 install torch==1.13.0+cpu -f https://download.pytorch.org/whl/torch_stable.html -else - # test_growth_error is skipped on jetson - EXPECTED_NUM_TESTS=8 fi prev_num_pages=`get_shm_pages` From f790956051550476a7bf6f52d419d1d187b3f418 Mon Sep 17 00:00:00 2001 From: David Yastremsky Date: Wed, 3 May 2023 13:39:06 -0700 Subject: [PATCH 09/11] Remove extra comma, non-Dockerized Jetson comment --- build.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/build.py b/build.py index c50baab03f..81a8aebd55 100755 --- a/build.py +++ b/build.py @@ -673,7 +673,7 @@ def onnxruntime_cmake_args(images, library_paths): cmake_backend_arg('onnxruntime', 'TRITON_BUILD_PLATFORM', None, 'jetpack'), cmake_backend_enable('onnxruntime', - 'TRITON_ENABLE_ONNXRUNTIME_OPENVINO', False), + 'TRITON_ENABLE_ONNXRUNTIME_OPENVINO', False) ] else: if target_platform() == 'windows': From f95fae10730a7cf218a50af68226fdf814694b8d Mon Sep 17 00:00:00 2001 From: David Yastremsky Date: Fri, 19 May 2023 11:34:44 -0700 Subject: [PATCH 10/11] Remove comment about Jetson being non-dockerized --- qa/L0_backend_python/test.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/qa/L0_backend_python/test.sh b/qa/L0_backend_python/test.sh index 3b4cf75eab..587d1b8e13 100755 --- a/qa/L0_backend_python/test.sh +++ b/qa/L0_backend_python/test.sh @@ -368,7 +368,7 @@ and shared memory pages after starting triton equals to $current_num_pages \n*** exit 1 fi -# Disable env test for Jetson since build is non-dockerized and cloud storage repos are not supported +# Disable env test for Jetson since cloud storage repos are not supported # Disable ensemble, unittest, io and bls tests for Jetson since GPU Tensors are not supported # Disable variants test for Jetson since already built without GPU Tensor support # Disable decoupled test because it uses GPU tensors From 20ce47ac5fa01590838ae468841be7eb95cc6636 Mon Sep 17 00:00:00 2001 From: David Yastremsky Date: Thu, 25 May 2023 15:48:42 -0700 Subject: [PATCH 11/11] Remove no longer needed flag --- build.py | 2 -- 1 file changed, 2 deletions(-) diff --git a/build.py b/build.py index 81a8aebd55..a6bb111520 100755 --- a/build.py +++ b/build.py @@ -670,8 +670,6 @@ def onnxruntime_cmake_args(images, library_paths): None, ort_include_path), cmake_backend_arg('onnxruntime', 'TRITON_ONNXRUNTIME_LIB_PATHS', None, ort_lib_path), - cmake_backend_arg('onnxruntime', 'TRITON_BUILD_PLATFORM', None, - 'jetpack'), cmake_backend_enable('onnxruntime', 'TRITON_ENABLE_ONNXRUNTIME_OPENVINO', False) ]