From 648b8e2e29a824ac821fd855b6524a7238fecc4f Mon Sep 17 00:00:00 2001 From: krishung5 Date: Wed, 17 Jul 2024 19:42:08 -0700 Subject: [PATCH 01/13] Reture custom code if known bls shm memory leak is reported --- qa/L0_backend_python/bls/test.sh | 13 ++++---- qa/L0_backend_python/test.sh | 23 ++++++++++++-- ...hon_unittest.py => test_infer_shm_leak.py} | 30 +++++++++++-------- qa/common/shm_util.py | 5 +++- 4 files changed, 48 insertions(+), 23 deletions(-) rename qa/L0_backend_python/{python_unittest.py => test_infer_shm_leak.py} (75%) diff --git a/qa/L0_backend_python/bls/test.sh b/qa/L0_backend_python/bls/test.sh index 204af7e2ba..827cdce710 100755 --- a/qa/L0_backend_python/bls/test.sh +++ b/qa/L0_backend_python/bls/test.sh @@ -25,7 +25,7 @@ # (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -CLIENT_PY=../python_unittest.py +CLIENT_PY=../test_infer_shm_leak.py CLIENT_LOG="./bls_client.log" TEST_RESULT_FILE='test_results.txt' source ../../common/util.sh @@ -33,7 +33,7 @@ source ../../common/util.sh TRITON_REPO_ORGANIZATION=${TRITON_REPO_ORGANIZATION:=http://github.com/triton-inference-server} RET=0 -rm -fr *.log ./models *.txt +rm -fr *.log ./models *.txt *.xml # FIXME: [DLIS-5970] Until Windows supports GPU tensors, only test CPU if [[ ${TEST_WINDOWS} == 0 ]]; then @@ -119,13 +119,14 @@ if [[ ${TEST_WINDOWS} == 0 ]]; then for MODEL_NAME in bls bls_memory bls_memory_async bls_async; do export MODEL_NAME=${MODEL_NAME} - - python3 -m pytest --junitxml="${MODEL_NAME}.${TRIAL}.${CUDA_MEMORY_POOL_SIZE_MB}.report.xml" $CLIENT_PY >> $CLIENT_LOG 2>&1 - if [ $? -ne 0 ]; then + # Run with pytest to capture the return code correctly + pytest --junitxml="${MODEL_NAME}.${TRIAL}.${CUDA_MEMORY_POOL_SIZE_MB}.report.xml" $CLIENT_PY >> $CLIENT_LOG 2>&1 + RET=$? + if [ $RET -ne 0 ]; then + echo $RET echo -e "\n***\n*** ${MODEL_NAME} ${BLS_KIND} test FAILED. \n***" cat $SERVER_LOG cat $CLIENT_LOG - RET=1 fi done diff --git a/qa/L0_backend_python/test.sh b/qa/L0_backend_python/test.sh index 0e0240cd95..e91494f79d 100755 --- a/qa/L0_backend_python/test.sh +++ b/qa/L0_backend_python/test.sh @@ -423,9 +423,16 @@ if [ "$TEST_JETSON" == "0" ]; then setup_virtualenv (cd ${TEST} && bash -ex test.sh) - if [ $? -ne 0 ]; then + EXIT_CODE=$? + if [ $EXIT_CODE -ne 0 ]; then echo "Subtest ${TEST} FAILED" - RET=1 + RET=$EXIT_CODE + + # In bls test, it is allowed to fail with a strict memory leak of 480 bytes with exit code '123'. + # Propagate the exit code to make sure it's not overwritten by other tests. + if [[ ${TEST} == "bls" ]] && [[ $EXIT_CODE -ne 1 ]] ; then + BLS_RET=$RET + fi fi deactivate_virtualenv @@ -472,4 +479,14 @@ else echo -e "\n***\n*** Test FAILED\n***" fi -exit $RET +# Exit with RET if it is 1, meaning that the test failed. +# Otherwise, exit with BLS_RET if it is set, meaning that the known memory leak is captured. +if [ $RET -eq 1 ]; then + exit $RET +else + if [ -z "$BLS_RET" ]; then + exit $RET + else + exit $BLS_RET + fi +fi diff --git a/qa/L0_backend_python/python_unittest.py b/qa/L0_backend_python/test_infer_shm_leak.py similarity index 75% rename from qa/L0_backend_python/python_unittest.py rename to qa/L0_backend_python/test_infer_shm_leak.py index 4b94996976..e449ed4ef5 100755 --- a/qa/L0_backend_python/python_unittest.py +++ b/qa/L0_backend_python/test_infer_shm_leak.py @@ -33,6 +33,7 @@ import os import unittest +import pytest import shm_util import tritonclient.grpc as grpcclient from tritonclient.utils import * @@ -41,11 +42,12 @@ # we overwrite the IP address with the TRITONSERVER_IPADDR envvar _tritonserver_ipaddr = os.environ.get("TRITONSERVER_IPADDR", "localhost") +# The exit code 123 is used to indicate that the shm leak probe detected a 480 bytes leak. +# Any leak other than 480 bytes will cause the test to fail with the default exit code 1. +ALLOWED_FAILURE_EXIT_CODE = 123 -class PythonUnittest(unittest.TestCase): - def setUp(self): - self._shm_leak_detector = shm_util.ShmLeakDetector() +class TestInferShmLeak: def _run_unittest(self, model_name): with grpcclient.InferenceServerClient(f"{_tritonserver_ipaddr}:8001") as client: # No input is required @@ -54,15 +56,17 @@ def _run_unittest(self, model_name): # The model returns 1 if the tests were successfully passed. # Otherwise, it will return 0. - self.assertEqual( - output0, [1], f"python_unittest failed for model {model_name}" - ) - - def test_python_unittest(self): - model_name = os.environ["MODEL_NAME"] - with self._shm_leak_detector.Probe() as shm_probe: - self._run_unittest(model_name) + assert output0 == [1], f"python_unittest failed for model {model_name}" + def test_shm_leak(self): + self._shm_leak_detector = shm_util.ShmLeakDetector() + model_name = os.environ.get("MODEL_NAME", "default_model") -if __name__ == "__main__": - unittest.main() + try: + with self._shm_leak_detector.Probe() as shm_probe: + self._run_unittest(model_name) + except AssertionError as e: + if "Known shared memory leak of 480 bytes detected" in str(e): + pytest.exit(str(e), returncode=ALLOWED_FAILURE_EXIT_CODE) + else: + raise e diff --git a/qa/common/shm_util.py b/qa/common/shm_util.py index 16e5ce4e45..28728be43b 100755 --- a/qa/common/shm_util.py +++ b/qa/common/shm_util.py @@ -1,6 +1,6 @@ #!/usr/bin/env python3 -# Copyright 2018-2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# Copyright 2018-2024, NVIDIA CORPORATION & AFFILIATES. All rights reserved. # # Redistribution and use in source and binary forms, with or without # modification, are permitted provided that the following conditions @@ -441,6 +441,9 @@ def __exit__(self, type, value, traceback): print( f"Shared memory leak detected [{shm_region}]: {curr_shm_free_size} (curr free) < {prev_shm_free_size} (prev free)." ) + # Known shared memory leak of 480 bytes in BLS test. + if curr_shm_free_size == 1006576 and prev_shm_free_size == 1007056: + assert False, f"Known shared memory leak of 480 bytes detected." assert not shm_leak_detected, f"Shared memory leak detected." def _get_shm_free_sizes(self, delay_sec=0): From d2aef79e3c0a41aa3744a32034aaa1e7cef2254e Mon Sep 17 00:00:00 2001 From: krishung5 Date: Wed, 17 Jul 2024 19:42:29 -0700 Subject: [PATCH 02/13] Update pytest script naming --- qa/L0_backend_python/argument_validation/test.sh | 2 +- qa/L0_backend_python/custom_metrics/test.sh | 2 +- qa/L0_backend_python/request_rescheduling/test.sh | 2 +- qa/L0_dlpack_multi_gpu/test.sh | 6 +++--- qa/L0_warmup/test.sh | 6 +++--- 5 files changed, 9 insertions(+), 9 deletions(-) diff --git a/qa/L0_backend_python/argument_validation/test.sh b/qa/L0_backend_python/argument_validation/test.sh index b14ba4abb3..90cbef89b5 100755 --- a/qa/L0_backend_python/argument_validation/test.sh +++ b/qa/L0_backend_python/argument_validation/test.sh @@ -25,7 +25,7 @@ # (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -CLIENT_PY=../python_unittest.py +CLIENT_PY=../test_infer_shm_leak.py CLIENT_LOG="./arg_validation_client.log" TEST_RESULT_FILE='test_results.txt' SERVER_ARGS="--model-repository=${MODELDIR}/argument_validation/models --backend-directory=${BACKEND_DIR} --log-verbose=1" diff --git a/qa/L0_backend_python/custom_metrics/test.sh b/qa/L0_backend_python/custom_metrics/test.sh index 4491d9e030..9020c7ebfd 100755 --- a/qa/L0_backend_python/custom_metrics/test.sh +++ b/qa/L0_backend_python/custom_metrics/test.sh @@ -25,7 +25,7 @@ # (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -CLIENT_PY=../python_unittest.py +CLIENT_PY=../test_infer_shm_leak.py CLIENT_LOG="./custom_metrics_client.log" TEST_RESULT_FILE='test_results.txt' source ../../common/util.sh diff --git a/qa/L0_backend_python/request_rescheduling/test.sh b/qa/L0_backend_python/request_rescheduling/test.sh index 6fd6fe09e5..31ba6692d9 100755 --- a/qa/L0_backend_python/request_rescheduling/test.sh +++ b/qa/L0_backend_python/request_rescheduling/test.sh @@ -25,7 +25,7 @@ # (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -CLIENT_PY="../python_unittest.py" +CLIENT_PY="../test_infer_shm_leak.py" CLIENT_LOG="./request_rescheduling_client.log" TEST_RESULT_FILE='test_results.txt' source ../../common/util.sh diff --git a/qa/L0_dlpack_multi_gpu/test.sh b/qa/L0_dlpack_multi_gpu/test.sh index 996f062f42..ae72daa7d0 100755 --- a/qa/L0_dlpack_multi_gpu/test.sh +++ b/qa/L0_dlpack_multi_gpu/test.sh @@ -27,7 +27,7 @@ SERVER=/opt/tritonserver/bin/tritonserver SERVER_ARGS="--model-repository=`pwd`/models --log-verbose=1" -CLIENT_PY=./python_unittest.py +CLIENT_PY=./test_infer_shm_leak.py CLIENT_LOG="./client.log" EXPECTED_NUM_TESTS="1" TEST_RESULT_FILE='test_results.txt' @@ -52,8 +52,8 @@ rm -fr *.log ./models mkdir -p models/dlpack_test/1/ cp ../python_models/dlpack_test/model.py models/dlpack_test/1/ cp ../python_models/dlpack_test/config.pbtxt models/dlpack_test -cp ../L0_backend_python/python_unittest.py . -sed -i 's#sys.path.append("../../common")#sys.path.append("../common")#g' python_unittest.py +cp ../L0_backend_python/test_infer_shm_leak.py . +sed -i 's#sys.path.append("../../common")#sys.path.append("../common")#g' test_infer_shm_leak.py run_server if [ "$SERVER_PID" == "0" ]; then diff --git a/qa/L0_warmup/test.sh b/qa/L0_warmup/test.sh index aeed873b25..a535aed25b 100755 --- a/qa/L0_warmup/test.sh +++ b/qa/L0_warmup/test.sh @@ -42,7 +42,7 @@ export CUDA_VISIBLE_DEVICES=0 CLIENT=../clients/image_client CLIENT_LOG="./client.log" -CLIENT_PY=./python_unittest.py +CLIENT_PY=./test_infer_shm_leak.py EXPECTED_NUM_TESTS="1" TEST_RESULT_FILE='test_results.txt' @@ -449,8 +449,8 @@ mkdir -p models/bls_onnx_warmup/1/ cp ../python_models/bls_onnx_warmup/model.py models/bls_onnx_warmup/1/ cp ../python_models/bls_onnx_warmup/config.pbtxt models/bls_onnx_warmup/. -cp ../L0_backend_python/python_unittest.py . -sed -i 's#sys.path.append("../../common")#sys.path.append("../common")#g' python_unittest.py +cp ../L0_backend_python/test_infer_shm_leak.py . +sed -i 's#sys.path.append("../../common")#sys.path.append("../common")#g' test_infer_shm_leak.py run_server if [ "$SERVER_PID" == "0" ]; then From fc3d0ec47e917122abfbe7d80d8716e68c050f76 Mon Sep 17 00:00:00 2001 From: krishung5 Date: Wed, 17 Jul 2024 19:45:15 -0700 Subject: [PATCH 03/13] Remove debug logging --- qa/L0_backend_python/bls/test.sh | 1 - 1 file changed, 1 deletion(-) diff --git a/qa/L0_backend_python/bls/test.sh b/qa/L0_backend_python/bls/test.sh index 827cdce710..c2146ebaca 100755 --- a/qa/L0_backend_python/bls/test.sh +++ b/qa/L0_backend_python/bls/test.sh @@ -123,7 +123,6 @@ if [[ ${TEST_WINDOWS} == 0 ]]; then pytest --junitxml="${MODEL_NAME}.${TRIAL}.${CUDA_MEMORY_POOL_SIZE_MB}.report.xml" $CLIENT_PY >> $CLIENT_LOG 2>&1 RET=$? if [ $RET -ne 0 ]; then - echo $RET echo -e "\n***\n*** ${MODEL_NAME} ${BLS_KIND} test FAILED. \n***" cat $SERVER_LOG cat $CLIENT_LOG From c30fcaa15513b3b1400c9e437597e9e0c6dabb7f Mon Sep 17 00:00:00 2001 From: krishung5 Date: Thu, 18 Jul 2024 00:45:13 -0700 Subject: [PATCH 04/13] Not overwrite RET --- qa/L0_backend_python/bls/test.sh | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/qa/L0_backend_python/bls/test.sh b/qa/L0_backend_python/bls/test.sh index c2146ebaca..883e56305b 100755 --- a/qa/L0_backend_python/bls/test.sh +++ b/qa/L0_backend_python/bls/test.sh @@ -121,9 +121,10 @@ if [[ ${TEST_WINDOWS} == 0 ]]; then export MODEL_NAME=${MODEL_NAME} # Run with pytest to capture the return code correctly pytest --junitxml="${MODEL_NAME}.${TRIAL}.${CUDA_MEMORY_POOL_SIZE_MB}.report.xml" $CLIENT_PY >> $CLIENT_LOG 2>&1 - RET=$? - if [ $RET -ne 0 ]; then + EXIT_CODE=$? + if [ $EXIT_CODE -ne 0 ]; then echo -e "\n***\n*** ${MODEL_NAME} ${BLS_KIND} test FAILED. \n***" + RET=$EXIT_CODE cat $SERVER_LOG cat $CLIENT_LOG fi From b4a88810d70f177647c49cd074801b27a5aa32c2 Mon Sep 17 00:00:00 2001 From: krishung5 Date: Thu, 18 Jul 2024 00:49:08 -0700 Subject: [PATCH 05/13] Move set -e after kill_server --- qa/L0_backend_python/bls/test.sh | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/qa/L0_backend_python/bls/test.sh b/qa/L0_backend_python/bls/test.sh index 883e56305b..a5a6ece30b 100755 --- a/qa/L0_backend_python/bls/test.sh +++ b/qa/L0_backend_python/bls/test.sh @@ -130,10 +130,10 @@ if [[ ${TEST_WINDOWS} == 0 ]]; then fi done - set -e - kill_server + set -e + # Check for bls 'test_timeout' to ensure timeout value is being correctly passed if [ `grep -c "Request timeout: 11000000000" $SERVER_LOG` == "0" ]; then echo -e "\n***\n*** BLS timeout value not correctly passed to model: line ${LINENO}\n***" From 47e7b8059a0d5274fb9d442cea442323fb755210 Mon Sep 17 00:00:00 2001 From: krishung5 Date: Thu, 18 Jul 2024 00:57:21 -0700 Subject: [PATCH 06/13] Remove the install package --- qa/L0_backend_python/setup_python_enviroment.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/qa/L0_backend_python/setup_python_enviroment.sh b/qa/L0_backend_python/setup_python_enviroment.sh index 88baccc4f6..a2171e02da 100755 --- a/qa/L0_backend_python/setup_python_enviroment.sh +++ b/qa/L0_backend_python/setup_python_enviroment.sh @@ -151,7 +151,7 @@ apt-get update && apt-get -y install \ libboost-dev rm -f /usr/bin/python3 && \ ln -s "/usr/bin/python3.${PYTHON_ENV_VERSION}" /usr/bin/python3 -pip3 install --upgrade install requests numpy virtualenv protobuf +pip3 install --upgrade requests numpy virtualenv protobuf find /opt/tritonserver/qa/pkgs/ -maxdepth 1 -type f -name \ "tritonclient-*linux*.whl" | xargs printf -- '%s[all]' | \ xargs pip3 install --upgrade From ee7e7035773eef01113a6132b0f26bd3b21b40b8 Mon Sep 17 00:00:00 2001 From: krishung5 Date: Thu, 18 Jul 2024 10:44:54 -0700 Subject: [PATCH 07/13] Only check bls timeout value if the test passed --- qa/L0_backend_python/bls/test.sh | 14 +++++++++----- 1 file changed, 9 insertions(+), 5 deletions(-) diff --git a/qa/L0_backend_python/bls/test.sh b/qa/L0_backend_python/bls/test.sh index a5a6ece30b..c2cdb11e04 100755 --- a/qa/L0_backend_python/bls/test.sh +++ b/qa/L0_backend_python/bls/test.sh @@ -134,11 +134,15 @@ if [[ ${TEST_WINDOWS} == 0 ]]; then set -e - # Check for bls 'test_timeout' to ensure timeout value is being correctly passed - if [ `grep -c "Request timeout: 11000000000" $SERVER_LOG` == "0" ]; then - echo -e "\n***\n*** BLS timeout value not correctly passed to model: line ${LINENO}\n***" - cat $SERVER_LOG - RET=1 + # Only check the timeout value if there is no error since the test + # may fail before the test_timeout case gets run. + if [ $RET -eq 0 ]; then + # Check for bls 'test_timeout' to ensure timeout value is being correctly passed + if [ `grep -c "Request timeout: 11000000000" $SERVER_LOG` == "0" ]; then + echo -e "\n***\n*** BLS timeout value not correctly passed to model: line ${LINENO}\n***" + cat $SERVER_LOG + RET=1 + fi fi if [[ $CUDA_MEMORY_POOL_SIZE_MB -eq 128 ]]; then From f5f2bd6b8cdb7b0906e17c6d673c1733507b78e6 Mon Sep 17 00:00:00 2001 From: krishung5 Date: Thu, 18 Jul 2024 23:57:24 -0700 Subject: [PATCH 08/13] Set +e to not failing when subtest returns non-zero code --- qa/L0_backend_python/test.sh | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/qa/L0_backend_python/test.sh b/qa/L0_backend_python/test.sh index e91494f79d..764a79b29d 100755 --- a/qa/L0_backend_python/test.sh +++ b/qa/L0_backend_python/test.sh @@ -422,6 +422,7 @@ if [ "$TEST_JETSON" == "0" ]; then # between dependencies. setup_virtualenv + set +e (cd ${TEST} && bash -ex test.sh) EXIT_CODE=$? if [ $EXIT_CODE -ne 0 ]; then @@ -434,6 +435,7 @@ if [ "$TEST_JETSON" == "0" ]; then BLS_RET=$RET fi fi + set -e deactivate_virtualenv done @@ -442,11 +444,13 @@ if [ "$TEST_JETSON" == "0" ]; then if [[ ${PYTHON_ENV_VERSION} = "10" ]] && [[ ${TEST_WINDOWS} == 0 ]]; then # In 'env' test we use miniconda for dependency management. No need to run # the test in a virtual environment. + set +e (cd env && bash -ex test.sh) if [ $? -ne 0 ]; then echo "Subtest env FAILED" RET=1 fi + set -e fi fi @@ -463,12 +467,14 @@ for TEST in ${SUBTESTS}; do # between dependencies. setup_virtualenv + set +e (cd ${TEST} && bash -ex test.sh) if [ $? -ne 0 ]; then echo "Subtest ${TEST} FAILED" RET=1 fi + set -e deactivate_virtualenv done From ef5d373b54a778467d37707cb89dff2382b54434 Mon Sep 17 00:00:00 2001 From: krishung5 Date: Thu, 25 Jul 2024 17:36:12 -0700 Subject: [PATCH 09/13] Check cuda memory pool for 256mb --- qa/L0_backend_python/bls/test.sh | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/qa/L0_backend_python/bls/test.sh b/qa/L0_backend_python/bls/test.sh index c2cdb11e04..51d03da348 100755 --- a/qa/L0_backend_python/bls/test.sh +++ b/qa/L0_backend_python/bls/test.sh @@ -145,9 +145,9 @@ if [[ ${TEST_WINDOWS} == 0 ]]; then fi fi - if [[ $CUDA_MEMORY_POOL_SIZE_MB -eq 128 ]]; then + if [[ $CUDA_MEMORY_POOL_SIZE_MB -eq 256 ]]; then if [ `grep -c "Failed to allocate memory from CUDA memory pool" $SERVER_LOG` != "0" ]; then - echo -e "\n***\n*** Expected to use CUDA memory pool for all tests when CUDA_MEMOY_POOL_SIZE_MB is 128 MB for 'bls' $BLS_KIND test\n***" + echo -e "\n***\n*** Expected to use CUDA memory pool for all tests when CUDA_MEMOY_POOL_SIZE_MB is 256 MB for 'bls' $BLS_KIND test\n***" cat $SERVER_LOG RET=1 fi From 42f081d02c79cb520db748f2b4f7b180cfec4319 Mon Sep 17 00:00:00 2001 From: krishung5 Date: Mon, 29 Jul 2024 14:22:19 -0700 Subject: [PATCH 10/13] Update log --- qa/L0_backend_python/bls/test.sh | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/qa/L0_backend_python/bls/test.sh b/qa/L0_backend_python/bls/test.sh index 51d03da348..f57b70fe7f 100755 --- a/qa/L0_backend_python/bls/test.sh +++ b/qa/L0_backend_python/bls/test.sh @@ -347,10 +347,10 @@ set -e kill_server -if [ $RET -eq 1 ]; then - echo -e "\n***\n*** BLS test FAILED. \n***" -else +if [ $RET -eq 0 ]; then echo -e "\n***\n*** BLS test PASSED. \n***" +else + echo -e "\n***\n*** BLS test FAILED. \n***" fi exit $RET From 3ec800091b727e80217b15f4b3b105e5478eb938 Mon Sep 17 00:00:00 2001 From: krishung5 Date: Tue, 30 Jul 2024 11:03:40 -0700 Subject: [PATCH 11/13] Update comment --- qa/L0_backend_python/test_infer_shm_leak.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/qa/L0_backend_python/test_infer_shm_leak.py b/qa/L0_backend_python/test_infer_shm_leak.py index e449ed4ef5..966243e86e 100755 --- a/qa/L0_backend_python/test_infer_shm_leak.py +++ b/qa/L0_backend_python/test_infer_shm_leak.py @@ -42,8 +42,9 @@ # we overwrite the IP address with the TRITONSERVER_IPADDR envvar _tritonserver_ipaddr = os.environ.get("TRITONSERVER_IPADDR", "localhost") -# The exit code 123 is used to indicate that the shm leak probe detected a 480 bytes leak. -# Any leak other than 480 bytes will cause the test to fail with the default exit code 1. +# The exit code 123 is used to indicate that the shm leak probe detected a 480 +# bytes leak in the bls sub-test. Any leak other than 480 bytes will cause the +# test to fail with the default exit code 1. ALLOWED_FAILURE_EXIT_CODE = 123 From 948d031c5d9107f9e78ca91ec106434579381855 Mon Sep 17 00:00:00 2001 From: krishung5 Date: Tue, 30 Jul 2024 15:08:25 -0700 Subject: [PATCH 12/13] Typo --- qa/L0_backend_python/bls/test.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/qa/L0_backend_python/bls/test.sh b/qa/L0_backend_python/bls/test.sh index f57b70fe7f..46d1f40818 100755 --- a/qa/L0_backend_python/bls/test.sh +++ b/qa/L0_backend_python/bls/test.sh @@ -147,7 +147,7 @@ if [[ ${TEST_WINDOWS} == 0 ]]; then if [[ $CUDA_MEMORY_POOL_SIZE_MB -eq 256 ]]; then if [ `grep -c "Failed to allocate memory from CUDA memory pool" $SERVER_LOG` != "0" ]; then - echo -e "\n***\n*** Expected to use CUDA memory pool for all tests when CUDA_MEMOY_POOL_SIZE_MB is 256 MB for 'bls' $BLS_KIND test\n***" + echo -e "\n***\n*** Expected to use CUDA memory pool for all tests when CUDA_MEMORY_POOL_SIZE_MB is 256 MB for 'bls' $BLS_KIND test\n***" cat $SERVER_LOG RET=1 fi From 8717160ec9145632283ec9dfd09ee602ea58ce88 Mon Sep 17 00:00:00 2001 From: krishung5 Date: Tue, 30 Jul 2024 16:43:42 -0700 Subject: [PATCH 13/13] Add ticket reference --- qa/common/shm_util.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/qa/common/shm_util.py b/qa/common/shm_util.py index 28728be43b..0e533bcdbb 100755 --- a/qa/common/shm_util.py +++ b/qa/common/shm_util.py @@ -441,7 +441,7 @@ def __exit__(self, type, value, traceback): print( f"Shared memory leak detected [{shm_region}]: {curr_shm_free_size} (curr free) < {prev_shm_free_size} (prev free)." ) - # Known shared memory leak of 480 bytes in BLS test. + # FIXME DLIS-7122: Known shared memory leak of 480 bytes in BLS test. if curr_shm_free_size == 1006576 and prev_shm_free_size == 1007056: assert False, f"Known shared memory leak of 480 bytes detected." assert not shm_leak_detected, f"Shared memory leak detected."