diff --git a/qa/L0_backend_python/python_test.py b/qa/L0_backend_python/python_test.py index 2c7d4f8722..49413bce55 100644 --- a/qa/L0_backend_python/python_test.py +++ b/qa/L0_backend_python/python_test.py @@ -1,6 +1,6 @@ #!/usr/bin/python -# Copyright 2019-2022, NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# Copyright 2019-2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved. # # Redistribution and use in source and binary forms, with or without # modification, are permitted provided that the following conditions @@ -40,8 +40,6 @@ from tritonclient.utils import * import tritonclient.http as httpclient -TEST_JETSON = bool(int(os.environ.get('TEST_JETSON', 0))) - class PythonTest(tu.TestResultCollector): @@ -113,43 +111,38 @@ def _optional_input_infer(self, model_name, has_input0, has_input1): np.testing.assert_equal(output1, expected_output1, "OUTPUT1 doesn't match expected OUTPUT1") - # We do not use a docker on Jetson so it does not impose a shared memory - # allocation limit of 1GB. This means test will pass without the expected - # error on jetson and is hence unnecessary. - if not TEST_JETSON: - - def test_growth_error(self): - # 2 MiBs - total_byte_size = 2 * 1024 * 1024 - shape = [total_byte_size] - model_name = 'identity_uint8_nobatch' - dtype = np.uint8 - with self._shm_leak_detector.Probe() as shm_probe: - self._infer_help(model_name, shape, dtype) - - # 1 GiB payload leads to error in the main Python backned process. - # Total shared memory available is 1GiB. - total_byte_size = 1024 * 1024 * 1024 - shape = [total_byte_size] - with self.assertRaises(InferenceServerException) as ex: - self._infer_help(model_name, shape, dtype) - self.assertIn("Failed to increase the shared memory pool size", - str(ex.exception)) - - # 512 MiBs payload leads to error in the Python stub process. - total_byte_size = 512 * 1024 * 1024 - shape = [total_byte_size] - with self.assertRaises(InferenceServerException) as ex: - self._infer_help(model_name, shape, dtype) - self.assertIn("Failed to increase the shared memory pool size", - str(ex.exception)) - - # 2 MiBs - # Send a small paylaod to make sure it is still working properly - total_byte_size = 2 * 1024 * 1024 - shape = [total_byte_size] - with self._shm_leak_detector.Probe() as shm_probe: - self._infer_help(model_name, shape, dtype) + def test_growth_error(self): + # 2 MiBs + total_byte_size = 2 * 1024 * 1024 + shape = [total_byte_size] + model_name = 'identity_uint8_nobatch' + dtype = np.uint8 + with self._shm_leak_detector.Probe() as shm_probe: + self._infer_help(model_name, shape, dtype) + + # 1 GiB payload leads to error in the main Python backned process. + # Total shared memory available is 1GiB. + total_byte_size = 1024 * 1024 * 1024 + shape = [total_byte_size] + with self.assertRaises(InferenceServerException) as ex: + self._infer_help(model_name, shape, dtype) + self.assertIn("Failed to increase the shared memory pool size", + str(ex.exception)) + + # 512 MiBs payload leads to error in the Python stub process. + total_byte_size = 512 * 1024 * 1024 + shape = [total_byte_size] + with self.assertRaises(InferenceServerException) as ex: + self._infer_help(model_name, shape, dtype) + self.assertIn("Failed to increase the shared memory pool size", + str(ex.exception)) + + # 2 MiBs + # Send a small paylaod to make sure it is still working properly + total_byte_size = 2 * 1024 * 1024 + shape = [total_byte_size] + with self._shm_leak_detector.Probe() as shm_probe: + self._infer_help(model_name, shape, dtype) def test_async_infer(self): model_name = "identity_uint8" @@ -189,8 +182,9 @@ def test_async_infer(self): # Make sure the requests ran in parallel. stats = client.get_inference_statistics(model_name) - test_cond = (len(stats['model_stats']) != 1) or ( - stats['model_stats'][0]['name'] != model_name) + test_cond = (len(stats['model_stats']) + != 1) or (stats['model_stats'][0]['name'] + != model_name) self.assertFalse( test_cond, "error: expected statistics for {}".format(model_name)) diff --git a/qa/L0_backend_python/test.sh b/qa/L0_backend_python/test.sh index 4af090f411..587d1b8e13 100755 --- a/qa/L0_backend_python/test.sh +++ b/qa/L0_backend_python/test.sh @@ -131,9 +131,6 @@ cp ../python_models/string_fixed/config.pbtxt ./models/string_fixed # Skip torch install on Jetson since it is already installed. if [ "$TEST_JETSON" == "0" ]; then pip3 install torch==1.13.0+cpu -f https://download.pytorch.org/whl/torch_stable.html -else - # test_growth_error is skipped on jetson - EXPECTED_NUM_TESTS=8 fi prev_num_pages=`get_shm_pages` @@ -371,7 +368,7 @@ and shared memory pages after starting triton equals to $current_num_pages \n*** exit 1 fi -# Disable env test for Jetson since build is non-dockerized and cloud storage repos are not supported +# Disable env test for Jetson since cloud storage repos are not supported # Disable ensemble, unittest, io and bls tests for Jetson since GPU Tensors are not supported # Disable variants test for Jetson since already built without GPU Tensor support # Disable decoupled test because it uses GPU tensors diff --git a/qa/L0_perf_nomodel/run_test.sh b/qa/L0_perf_nomodel/run_test.sh index b427c33750..f901777d25 100755 --- a/qa/L0_perf_nomodel/run_test.sh +++ b/qa/L0_perf_nomodel/run_test.sh @@ -47,19 +47,13 @@ TRITON_DIR=${TRITON_DIR:="/opt/tritonserver"} ARCH=${ARCH:="x86_64"} SERVER=${TRITON_DIR}/bin/tritonserver BACKEND_DIR=${TRITON_DIR}/backends +DATADIR=${DATADIR:="/data/inferenceserver/${REPO_VERSION}"} MODEL_REPO="${PWD}/models" +PERF_CLIENT=../clients/perf_client TF_VERSION=${TF_VERSION:=2} SERVER_ARGS="--model-repository=${MODEL_REPO} --backend-directory=${BACKEND_DIR} --backend-config=tensorflow,version=${TF_VERSION}" source ../common/util.sh -# DATADIR is already set in environment variable for aarch64 -if [ "$ARCH" == "aarch64" ]; then - PERF_CLIENT=${TRITON_DIR}/clients/bin/perf_client -else - PERF_CLIENT=../clients/perf_client - DATADIR=/data/inferenceserver/${REPO_VERSION} -fi - # Select the single GPU that will be available to the inference server export CUDA_VISIBLE_DEVICES=0 @@ -76,6 +70,10 @@ if [[ $BACKENDS == *"python"* ]]; then sed -i "s/^name:.*/name: \"python_zero_1_float32\"/" config.pbtxt) fi +if [[ $BACKENDS == *"custom"* ]]; then + mkdir -p "custom_models/custom_zero_1_float32/1" +fi + PERF_CLIENT_PERCENTILE_ARGS="" && (( ${PERF_CLIENT_PERCENTILE} != 0 )) && PERF_CLIENT_PERCENTILE_ARGS="--percentile=${PERF_CLIENT_PERCENTILE}" diff --git a/qa/L0_perf_resnet/run_test.sh b/qa/L0_perf_resnet/run_test.sh index c3ab67d964..bbd9b33c42 100755 --- a/qa/L0_perf_resnet/run_test.sh +++ b/qa/L0_perf_resnet/run_test.sh @@ -54,15 +54,13 @@ rm -fr models && mkdir -p models && \ sed -i "s/^max_batch_size:.*/max_batch_size: ${MAX_BATCH}/" config.pbtxt && \ echo "instance_group [ { count: ${INSTANCE_CNT} }]") -# Onnx and onnx-trt models are very slow on Jetson. MEASUREMENT_WINDOW=5000 +PERF_CLIENT=../clients/perf_client +# Onnx and onnx-trt models are very slow on Jetson. if [ "$ARCH" == "aarch64" ]; then - PERF_CLIENT=${TRITON_DIR}/clients/bin/perf_client if [ "$MODEL_FRAMEWORK" == "onnx" ] || [ "$MODEL_FRAMEWORK" == "onnx_trt" ]; then MEASUREMENT_WINDOW=20000 fi -else - PERF_CLIENT=../clients/perf_client fi # Overload use of PERF_CLIENT_PROTOCOL for convenience with existing test and