Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Update Jetson tests in Docker container #5734

Merged
merged 11 commits into from
May 31, 2023
78 changes: 36 additions & 42 deletions qa/L0_backend_python/python_test.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
#!/usr/bin/python

# Copyright 2019-2022, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
# Copyright 2019-2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions
Expand Down Expand Up @@ -40,8 +40,6 @@
from tritonclient.utils import *
import tritonclient.http as httpclient

TEST_JETSON = bool(int(os.environ.get('TEST_JETSON', 0)))


class PythonTest(tu.TestResultCollector):

Expand Down Expand Up @@ -113,43 +111,38 @@ def _optional_input_infer(self, model_name, has_input0, has_input1):
np.testing.assert_equal(output1, expected_output1,
"OUTPUT1 doesn't match expected OUTPUT1")

# We do not use a docker on Jetson so it does not impose a shared memory
# allocation limit of 1GB. This means test will pass without the expected
# error on jetson and is hence unnecessary.
if not TEST_JETSON:

def test_growth_error(self):
# 2 MiBs
total_byte_size = 2 * 1024 * 1024
shape = [total_byte_size]
model_name = 'identity_uint8_nobatch'
dtype = np.uint8
with self._shm_leak_detector.Probe() as shm_probe:
self._infer_help(model_name, shape, dtype)

# 1 GiB payload leads to error in the main Python backned process.
# Total shared memory available is 1GiB.
total_byte_size = 1024 * 1024 * 1024
shape = [total_byte_size]
with self.assertRaises(InferenceServerException) as ex:
self._infer_help(model_name, shape, dtype)
self.assertIn("Failed to increase the shared memory pool size",
str(ex.exception))

# 512 MiBs payload leads to error in the Python stub process.
total_byte_size = 512 * 1024 * 1024
shape = [total_byte_size]
with self.assertRaises(InferenceServerException) as ex:
self._infer_help(model_name, shape, dtype)
self.assertIn("Failed to increase the shared memory pool size",
str(ex.exception))

# 2 MiBs
# Send a small paylaod to make sure it is still working properly
total_byte_size = 2 * 1024 * 1024
shape = [total_byte_size]
with self._shm_leak_detector.Probe() as shm_probe:
self._infer_help(model_name, shape, dtype)
def test_growth_error(self):
Tabrizian marked this conversation as resolved.
Show resolved Hide resolved
# 2 MiBs
total_byte_size = 2 * 1024 * 1024
shape = [total_byte_size]
model_name = 'identity_uint8_nobatch'
dtype = np.uint8
with self._shm_leak_detector.Probe() as shm_probe:
self._infer_help(model_name, shape, dtype)

# 1 GiB payload leads to error in the main Python backned process.
# Total shared memory available is 1GiB.
total_byte_size = 1024 * 1024 * 1024
shape = [total_byte_size]
with self.assertRaises(InferenceServerException) as ex:
self._infer_help(model_name, shape, dtype)
self.assertIn("Failed to increase the shared memory pool size",
str(ex.exception))

# 512 MiBs payload leads to error in the Python stub process.
total_byte_size = 512 * 1024 * 1024
shape = [total_byte_size]
with self.assertRaises(InferenceServerException) as ex:
self._infer_help(model_name, shape, dtype)
self.assertIn("Failed to increase the shared memory pool size",
str(ex.exception))

# 2 MiBs
# Send a small paylaod to make sure it is still working properly
total_byte_size = 2 * 1024 * 1024
shape = [total_byte_size]
with self._shm_leak_detector.Probe() as shm_probe:
self._infer_help(model_name, shape, dtype)

def test_async_infer(self):
model_name = "identity_uint8"
Expand Down Expand Up @@ -189,8 +182,9 @@ def test_async_infer(self):

# Make sure the requests ran in parallel.
stats = client.get_inference_statistics(model_name)
test_cond = (len(stats['model_stats']) != 1) or (
stats['model_stats'][0]['name'] != model_name)
test_cond = (len(stats['model_stats'])
!= 1) or (stats['model_stats'][0]['name']
!= model_name)
self.assertFalse(
test_cond,
"error: expected statistics for {}".format(model_name))
Expand Down
5 changes: 1 addition & 4 deletions qa/L0_backend_python/test.sh
Original file line number Diff line number Diff line change
Expand Up @@ -131,9 +131,6 @@ cp ../python_models/string_fixed/config.pbtxt ./models/string_fixed
# Skip torch install on Jetson since it is already installed.
if [ "$TEST_JETSON" == "0" ]; then
pip3 install torch==1.13.0+cpu -f https://download.pytorch.org/whl/torch_stable.html
else
# test_growth_error is skipped on jetson
EXPECTED_NUM_TESTS=8
fi

prev_num_pages=`get_shm_pages`
Expand Down Expand Up @@ -371,7 +368,7 @@ and shared memory pages after starting triton equals to $current_num_pages \n***
exit 1
fi

# Disable env test for Jetson since build is non-dockerized and cloud storage repos are not supported
krishung5 marked this conversation as resolved.
Show resolved Hide resolved
# Disable env test for Jetson since cloud storage repos are not supported
# Disable ensemble, unittest, io and bls tests for Jetson since GPU Tensors are not supported
# Disable variants test for Jetson since already built without GPU Tensor support
# Disable decoupled test because it uses GPU tensors
Expand Down
14 changes: 6 additions & 8 deletions qa/L0_perf_nomodel/run_test.sh
Original file line number Diff line number Diff line change
Expand Up @@ -47,19 +47,13 @@ TRITON_DIR=${TRITON_DIR:="/opt/tritonserver"}
ARCH=${ARCH:="x86_64"}
SERVER=${TRITON_DIR}/bin/tritonserver
BACKEND_DIR=${TRITON_DIR}/backends
DATADIR=${DATADIR:="/data/inferenceserver/${REPO_VERSION}"}
MODEL_REPO="${PWD}/models"
PERF_CLIENT=../clients/perf_client
TF_VERSION=${TF_VERSION:=2}
SERVER_ARGS="--model-repository=${MODEL_REPO} --backend-directory=${BACKEND_DIR} --backend-config=tensorflow,version=${TF_VERSION}"
source ../common/util.sh

# DATADIR is already set in environment variable for aarch64
if [ "$ARCH" == "aarch64" ]; then
PERF_CLIENT=${TRITON_DIR}/clients/bin/perf_client
else
PERF_CLIENT=../clients/perf_client
DATADIR=/data/inferenceserver/${REPO_VERSION}
fi

# Select the single GPU that will be available to the inference server
export CUDA_VISIBLE_DEVICES=0

Expand All @@ -76,6 +70,10 @@ if [[ $BACKENDS == *"python"* ]]; then
sed -i "s/^name:.*/name: \"python_zero_1_float32\"/" config.pbtxt)
fi

if [[ $BACKENDS == *"custom"* ]]; then
mkdir -p "custom_models/custom_zero_1_float32/1"
fi

PERF_CLIENT_PERCENTILE_ARGS="" &&
(( ${PERF_CLIENT_PERCENTILE} != 0 )) &&
PERF_CLIENT_PERCENTILE_ARGS="--percentile=${PERF_CLIENT_PERCENTILE}"
Expand Down
6 changes: 2 additions & 4 deletions qa/L0_perf_resnet/run_test.sh
Original file line number Diff line number Diff line change
Expand Up @@ -54,15 +54,13 @@ rm -fr models && mkdir -p models && \
sed -i "s/^max_batch_size:.*/max_batch_size: ${MAX_BATCH}/" config.pbtxt && \
echo "instance_group [ { count: ${INSTANCE_CNT} }]")

# Onnx and onnx-trt models are very slow on Jetson.
MEASUREMENT_WINDOW=5000
PERF_CLIENT=../clients/perf_client
# Onnx and onnx-trt models are very slow on Jetson.
if [ "$ARCH" == "aarch64" ]; then
PERF_CLIENT=${TRITON_DIR}/clients/bin/perf_client
if [ "$MODEL_FRAMEWORK" == "onnx" ] || [ "$MODEL_FRAMEWORK" == "onnx_trt" ]; then
MEASUREMENT_WINDOW=20000
fi
else
PERF_CLIENT=../clients/perf_client
fi

# Overload use of PERF_CLIENT_PROTOCOL for convenience with existing test and
Expand Down