Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Improve L0_backend_python debugging #6157

Merged
merged 2 commits into from
Aug 8, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 5 additions & 3 deletions qa/L0_backend_python/argument_validation/test.sh
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
#!/bin/bash
# Copyright (c) 2022, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
# Copyright 2022-2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions
Expand All @@ -26,14 +26,14 @@
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

CLIENT_PY=../python_unittest.py
CLIENT_LOG="./client.log"
CLIENT_LOG="./arg_validation_client.log"
EXPECTED_NUM_TESTS="1"
TEST_RESULT_FILE='test_results.txt'
TRITON_DIR=${TRITON_DIR:="/opt/tritonserver"}
SERVER=${TRITON_DIR}/bin/tritonserver
BACKEND_DIR=${TRITON_DIR}/backends
SERVER_ARGS="--model-repository=`pwd`/models --backend-directory=${BACKEND_DIR} --log-verbose=1"
SERVER_LOG="./inference_server.log"
SERVER_LOG="./arg_validation_server.log"

RET=0
source ../../common/util.sh
Expand Down Expand Up @@ -75,4 +75,6 @@ else
echo -e "\n***\n*** Argument validation test PASSED. \n***"
fi

collect_artifacts_from_subdir

exit $RET
52 changes: 49 additions & 3 deletions qa/L0_backend_python/bls/test.sh
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,7 @@
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

CLIENT_PY=../python_unittest.py
CLIENT_LOG="./client.log"
CLIENT_LOG="./bls_client.log"
EXPECTED_NUM_TESTS="1"
TEST_RESULT_FILE='test_results.txt'
source ../../common/util.sh
Expand All @@ -35,9 +35,10 @@ TRITON_DIR=${TRITON_DIR:="/opt/tritonserver"}
SERVER=${TRITON_DIR}/bin/tritonserver
BACKEND_DIR=${TRITON_DIR}/backends
SERVER_ARGS="--model-repository=`pwd`/models --backend-directory=${BACKEND_DIR} --log-verbose=1"
SERVER_LOG="./inference_server.log"

RET=0
# This variable is used to print out the correct server log for each sub-test.
SUB_TEST_RET=0
rm -fr *.log ./models *.txt

pip3 uninstall -y torch
Expand Down Expand Up @@ -104,6 +105,7 @@ cp -r ${DATADIR}/qa_model_repository/libtorch_nobatch_float32_float32_float32/ .

for TRIAL in non_decoupled decoupled ; do
export BLS_KIND=$TRIAL
SERVER_LOG="./bls_$TRIAL.inference_server.log"

run_server
if [ "$SERVER_PID" == "0" ]; then
Expand All @@ -120,12 +122,14 @@ for TRIAL in non_decoupled decoupled ; do
echo -e "\n***\n*** 'bls' $BLS_KIND test FAILED. \n***"
cat $CLIENT_LOG
RET=1
SUB_TEST_RET=1
else
check_test_results $TEST_RESULT_FILE $EXPECTED_NUM_TESTS
if [ $? -ne 0 ]; then
cat $CLIENT_LOG
echo -e "\n***\n*** Test Result Verification Failed\n***"
RET=1
SUB_TEST_RET=1
fi
fi

Expand All @@ -135,12 +139,14 @@ for TRIAL in non_decoupled decoupled ; do
echo -e "\n***\n*** 'bls_memory' $BLS_KIND test FAILED. \n***"
cat $CLIENT_LOG
RET=1
SUB_TEST_RET=1
else
check_test_results $TEST_RESULT_FILE $EXPECTED_NUM_TESTS
if [ $? -ne 0 ]; then
cat $CLIENT_LOG
echo -e "\n***\n*** Test Result Verification Failed\n***"
RET=1
SUB_TEST_RET=1
fi
fi

Expand All @@ -150,12 +156,14 @@ for TRIAL in non_decoupled decoupled ; do
echo -e "\n***\n*** 'bls_async_memory' $BLS_KIND test FAILED. \n***"
cat $CLIENT_LOG
RET=1
SUB_TEST_RET=1
else
check_test_results $TEST_RESULT_FILE $EXPECTED_NUM_TESTS
if [ $? -ne 0 ]; then
cat $CLIENT_LOG
echo -e "\n***\n*** Test Result Verification Failed\n***"
RET=1
SUB_TEST_RET=1
fi
fi

Expand All @@ -165,19 +173,26 @@ for TRIAL in non_decoupled decoupled ; do
echo -e "\n***\n*** 'bls_async' $BLS_KIND test FAILED. \n***"
cat $CLIENT_LOG
RET=1
SUB_TEST_RET=1
else
check_test_results $TEST_RESULT_FILE $EXPECTED_NUM_TESTS
if [ $? -ne 0 ]; then
cat $CLIENT_LOG
echo -e "\n***\n*** Test Result Verification Failed\n***"
RET=1
SUB_TEST_RET=1
fi
fi

set -e

kill $SERVER_PID
wait $SERVER_PID

if [ $SUB_TEST_RET -eq 1 ]; then
cat $CLIENT_LOG
cat $SERVER_LOG
fi
done

# Test error handling when BLS is used in "initialize" or "finalize" function
Expand All @@ -188,11 +203,13 @@ mkdir -p models/bls_init_error/1/
cp ../../python_models/bls_init_error/model.py models/bls_init_error/1/
cp ../../python_models/bls_init_error/config.pbtxt models/bls_init_error
SERVER_LOG="./bls_init_error_server.log"
SUB_TEST_RET=0

run_server
if [ "$SERVER_PID" != "0" ]; then
echo -e "*** FAILED: unexpected success starting $SERVER" >> $CLIENT_LOG
RET=1
SUB_TEST_RET=1
kill $SERVER_PID
wait $SERVER_PID
else
Expand All @@ -201,14 +218,21 @@ else
else
echo -e "Not found \"$ERROR_MESSAGE\"" >> $CLIENT_LOG
RET=1
SUB_TEST_RET=1
fi
fi

if [ $SUB_TEST_RET -eq 1 ]; then
cat $CLIENT_LOG
cat $SERVER_LOG
fi

rm -fr ./models
mkdir -p models/bls_finalize_error/1/
cp ../../python_models/bls_finalize_error/model.py models/bls_finalize_error/1/
cp ../../python_models/bls_finalize_error/config.pbtxt models/bls_finalize_error/
SERVER_LOG="./bls_finalize_error_server.log"
SUB_TEST_RET=0

run_server
if [ "$SERVER_PID" == "0" ]; then
Expand All @@ -225,9 +249,16 @@ if grep "$ERROR_MESSAGE" $SERVER_LOG; then
else
echo -e "Not found \"$ERROR_MESSAGE\"" >> $CLIENT_LOG
RET=1
SUB_TEST_RET=1
fi

if [ $SUB_TEST_RET -eq 1 ]; then
cat $CLIENT_LOG
cat $SERVER_LOG
fi

# Test model loading API with BLS
SUB_TEST_RET=0
rm -fr ./models
mkdir -p models/bls_model_loading/1/
cp ../../python_models/bls_model_loading/model.py models/bls_model_loading/1/
Expand All @@ -254,6 +285,7 @@ set -e
if [ "$code" == "400" ]; then
echo -e "\n***\n*** Failed to load model '${MODEL_NAME}'\n***"
RET=1
SUB_TEST_RET=1
fi

set +e
Expand All @@ -263,12 +295,14 @@ if [ $? -ne 0 ]; then
echo -e "\n***\n*** 'bls_model_loading' test FAILED. \n***"
cat $CLIENT_LOG
RET=1
SUB_TEST_RET=1
else
check_test_results $TEST_RESULT_FILE $EXPECTED_NUM_TESTS
if [ $? -ne 0 ]; then
cat $CLIENT_LOG
echo -e "\n***\n*** Test Result Verification Failed\n***"
RET=1
SUB_TEST_RET=1
fi
fi

Expand All @@ -277,6 +311,11 @@ set -e
kill $SERVER_PID
wait $SERVER_PID

if [ $SUB_TEST_RET -eq 1 ]; then
cat $CLIENT_LOG
cat $SERVER_LOG
fi

# Test model loading API with BLS warmup
(cd models/bls_model_loading && \
echo "model_warmup [{" >> config.pbtxt && \
Expand All @@ -300,6 +339,7 @@ wait $SERVER_PID
echo " }" >> config.pbtxt && \
echo "}]" >> config.pbtxt )

SUB_TEST_RET=0
SERVER_LOG="./bls_model_loading_server_warmup.log"
run_server
if [ "$SERVER_PID" == "0" ]; then
Expand All @@ -314,17 +354,23 @@ set -e
if [ "$code" == "400" ]; then
echo -e "\n***\n*** Failed to load model '${MODEL_NAME}'\n***"
RET=1
SUB_TEST_RET=1
fi

kill $SERVER_PID
wait $SERVER_PID

if [ $RET -eq 1 ]; then
if [ $SUB_TEST_RET -eq 1 ]; then
cat $CLIENT_LOG
cat $SERVER_LOG
fi

if [ $RET -eq 1 ]; then
echo -e "\n***\n*** BLS test FAILED. \n***"
else
echo -e "\n***\n*** BLS test PASSED. \n***"
fi

collect_artifacts_from_subdir

exit $RET
6 changes: 4 additions & 2 deletions qa/L0_backend_python/custom_metrics/test.sh
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,7 @@
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

CLIENT_PY=../python_unittest.py
CLIENT_LOG="./client.log"
CLIENT_LOG="./custom_metrics_client.log"
EXPECTED_NUM_TESTS="1"
TEST_RESULT_FILE='test_results.txt'
source ../../common/util.sh
Expand All @@ -35,7 +35,7 @@ TRITON_DIR=${TRITON_DIR:="/opt/tritonserver"}
SERVER=${TRITON_DIR}/bin/tritonserver
BACKEND_DIR=${TRITON_DIR}/backends
SERVER_ARGS="--model-repository=`pwd`/models --backend-directory=${BACKEND_DIR} --log-verbose=1"
SERVER_LOG="./inference_server.log"
SERVER_LOG="./custom_metrics_server.log"

RET=0
rm -fr *.log ./models *.txt
Expand Down Expand Up @@ -82,4 +82,6 @@ else
echo -e "\n***\n*** Custom Metrics test PASSED. \n***"
fi

collect_artifacts_from_subdir

exit $RET
6 changes: 4 additions & 2 deletions qa/L0_backend_python/decoupled/test.sh
Original file line number Diff line number Diff line change
Expand Up @@ -26,14 +26,14 @@
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

CLIENT_PY=./decoupled_test.py
CLIENT_LOG="./client.log"
CLIENT_LOG="./decoupled_client.log"
EXPECTED_NUM_TESTS="5"
TEST_RESULT_FILE='test_results.txt'
TRITON_DIR=${TRITON_DIR:="/opt/tritonserver"}
SERVER=${TRITON_DIR}/bin/tritonserver
BACKEND_DIR=${TRITON_DIR}/backends
SERVER_ARGS="--model-repository=`pwd`/models --backend-directory=${BACKEND_DIR} --log-verbose=1"
SERVER_LOG="./inference_server.log"
SERVER_LOG="./decoupled_server.log"

pip3 uninstall -y torch
pip3 install torch==1.13.0+cu117 -f https://download.pytorch.org/whl/torch_stable.html
Expand Down Expand Up @@ -118,4 +118,6 @@ else
echo -e "\n***\n*** Decoupled test PASSED. \n***"
fi

collect_artifacts_from_subdir

exit $RET
8 changes: 5 additions & 3 deletions qa/L0_backend_python/ensemble/test.sh
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
#!/bin/bash
# Copyright 2021-2022, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
# Copyright 2021-2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions
Expand All @@ -26,7 +26,7 @@
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

CLIENT_PY=./lifecycle_test.py
CLIENT_LOG="./client.log"
CLIENT_LOG="./ensemble_client.log"
EXPECTED_NUM_TESTS="1"
TEST_RESULT_FILE='test_results.txt'
source ../common.sh
Expand All @@ -36,7 +36,7 @@ TRITON_DIR=${TRITON_DIR:="/opt/tritonserver"}
SERVER=${TRITON_DIR}/bin/tritonserver
BACKEND_DIR=${TRITON_DIR}/backends
SERVER_ARGS="--model-repository=`pwd`/models --backend-directory=${BACKEND_DIR} --log-verbose=1"
SERVER_LOG="./inference_server.log"
SERVER_LOG="./ensemble_server.log"

RET=0
rm -rf models/ $CLIENT_LOG
Expand Down Expand Up @@ -114,4 +114,6 @@ else
echo -e "\n***\n*** Ensemble test PASSED. \n***"
fi

collect_artifacts_from_subdir

exit $RET
6 changes: 4 additions & 2 deletions qa/L0_backend_python/env/test.sh
Original file line number Diff line number Diff line change
Expand Up @@ -25,15 +25,15 @@
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

CLIENT_LOG="./client.log"
CLIENT_LOG="./env_client.log"
source ../common.sh
source ../../common/util.sh

SERVER=/opt/tritonserver/bin/tritonserver
BASE_SERVER_ARGS="--model-repository=`pwd`/models --log-verbose=1 --disable-auto-complete-config"
PYTHON_BACKEND_BRANCH=$PYTHON_BACKEND_REPO_TAG
SERVER_ARGS=$BASE_SERVER_ARGS
SERVER_LOG="./inference_server.log"
SERVER_LOG="./env_server.log"

RET=0

Expand Down Expand Up @@ -315,4 +315,6 @@ else
echo -e "\n***\n*** Env Manager Test FAILED.\n***"
fi

collect_artifacts_from_subdir

exit $RET
Loading