triton-inference-server · mc-nv · Feb 22, 2024 · Feb 16, 2024 · Feb 16, 2024 · Feb 16, 2024
diff --git a/qa/L0_backend_config/test.sh b/qa/L0_backend_config/test.sh
@@ -236,6 +236,8 @@ else
 
 fi
 
+: '
+# Disabling onnxruntime tests for r24.02 release
 # Onnxruntime: Batching ON
 rm -rf ./models/
 mkdir -p ./models/no_config
@@ -306,17 +308,18 @@ else
     wait $SERVER_PID
 
 fi
+'
 
 #
 # General backend tests
 #
 
 # We want to make sure that backend configurations
-# are not lost. For this purpose we are using only onnx backend
+# are not lost. For this purpose we are using only tensorflow backend
 
 rm -rf ./models/
 mkdir -p ./models/no_config/
-cp -r /data/inferenceserver/${REPO_VERSION}/qa_model_repository/onnx_float32_float32_float32/1 ./models/no_config/
+cp -r /data/inferenceserver/${REPO_VERSION}/qa_model_repository/savedmodel_float32_float32_float32/1 ./models/no_config/
 
 # First getting a baseline for the number of default configs
 # added during a server set up
@@ -351,7 +354,7 @@ fi
 # One of defaultconfigs is `min-compute-capability`. This test
 # checks if it is properlly overridden.
 MIN_COMPUTE_CAPABILITY=XX
-SERVER_ARGS="--backend-config=onnxruntime,min-compute-capability=$MIN_COMPUTE_CAPABILITY $COMMON_ARGS"
+SERVER_ARGS="--backend-config=tensorflow,min-compute-capability=$MIN_COMPUTE_CAPABILITY $COMMON_ARGS"
 SERVER_LOG=$SERVER_LOG_BASE.global_configs.log
 run_server
 
@@ -374,7 +377,7 @@ else
 
 fi
 # Now make sure that specific backend configs are not lost.
-SERVER_ARGS="--backend-config=onnxruntime,a=0 --backend-config=onnxruntime,y=0 --backend-config=onnxruntime,z=0 $COMMON_ARGS"
+SERVER_ARGS="--backend-config=tensorflow,a=0 --backend-config=tensorflow,y=0 --backend-config=tensorflow,z=0 $COMMON_ARGS"
 SERVER_LOG=$SERVER_LOG_BASE.specific_configs.log
 EXPECTED_CONFIG_COUNT=$(($DEFAULT_CONFIG_COUNT+3))
 run_server
@@ -398,7 +401,6 @@ else
 
 fi
 
-
 # Print test outcome
 if [ $RET -eq 0 ]; then
     echo -e "\n***\n*** Test Passed\n***"

diff --git a/qa/L0_backend_python/bls/test.sh b/qa/L0_backend_python/bls/test.sh
@@ -41,6 +41,7 @@ if [[ ${TEST_WINDOWS} == 0 ]]; then
     mkdir -p models/bls/1/
     cp ../../python_models/bls/model.py models/bls/1/
     cp ../../python_models/bls/config.pbtxt models/bls
+    sed -i 's/onnx_nobatch_sequence_int32/plan_nobatch_sequence_int32/g' models/bls/1/model.py
 
     mkdir -p models/dlpack_add_sub/1/
     cp ../../python_models/dlpack_add_sub/model.py models/dlpack_add_sub/1/
@@ -74,7 +75,7 @@ if [[ ${TEST_WINDOWS} == 0 ]]; then
     cp ../../python_models/dlpack_identity/model.py models/dlpack_identity/1/
     cp ../../python_models/dlpack_identity/config.pbtxt models/dlpack_identity
 
-    cp -r ${DATADIR}/qa_sequence_implicit_model_repository/onnx_nobatch_sequence_int32/ ./models
+    cp -r ${DATADIR}/qa_sequence_implicit_model_repository/plan_nobatch_sequence_int32/ ./models
 
     git clone https://github.com/triton-inference-server/python_backend -b $PYTHON_BACKEND_REPO_TAG
     mkdir -p models/square_int32/1/
@@ -219,9 +220,9 @@ if [[ ${TEST_WINDOWS} == 0 ]]; then
     mkdir -p models/bls_model_loading/1/
     cp ../../python_models/bls_model_loading/model.py models/bls_model_loading/1/
     cp ../../python_models/bls_model_loading/config.pbtxt models/bls_model_loading/
-    cp -fr ${DATADIR}/qa_model_repository/onnx_int32_int32_int32 models/.
+    cp -fr ${DATADIR}/qa_model_repository/plan_int32_int32_int32 models/.
     # Make only version 2, 3 is valid version directory
-    rm -rf models/onnx_int32_int32_int32/1
+    rm -rf models/plan_int32_int32_int32/1
 
     SERVER_LOG="./bls_model_loading_server.log"
     SERVER_ARGS="--model-repository=${MODELDIR}/bls/models --backend-directory=${BACKEND_DIR} --model-control-mode=explicit --log-verbose=1"

diff --git a/qa/L0_batch_custom/batch_custom_test.py b/qa/L0_batch_custom/batch_custom_test.py
@@ -232,7 +232,7 @@ def test_volume_batching(self):
         # Send 12 requests with batch size 1. The max_queue_delay is set
         # to non-zero. Depending upon the timing of the requests arrival
         # there can be either 4-6 model executions.
-        model_base = "onnx"
+        model_base = "savedmodel"
         dtype = np.float16
         shapes = (
             [

diff --git a/qa/L0_batch_custom/test.sh b/qa/L0_batch_custom/test.sh
@@ -46,7 +46,7 @@ BATCH_CUSTOM_TEST=batch_custom_test.py
 CLIENT_LOG_BASE="./client.log"
 DATADIR=/data/inferenceserver/${REPO_VERSION}/qa_identity_model_repository
 EXPECTED_NUM_TESTS="1"
-MODEL_NAME="onnx_zero_1_float16"
+MODEL_NAME="savedmodel_zero_1_float16"
 SERVER=/opt/tritonserver/bin/tritonserver
 SERVER_ARGS="--model-repository=models --log-verbose 1"
 SERVER_LOG_BASE="./inference_server.log"
@@ -101,7 +101,7 @@ cp -r backend/examples/batching_strategies/single_batching/build/libtriton_singl
 
 # Run a test to validate the single batching strategy example.
 # Then, run tests to validate the volume batching example being passed in via the backend dir, model dir, version dir, and model config.
-BACKEND_DIR="/opt/tritonserver/backends/onnxruntime"
+BACKEND_DIR="/opt/tritonserver/backends/tensorflow"
 MODEL_DIR="models/$MODEL_NAME"
 VERSION_DIR="$MODEL_DIR/1/"
 

diff --git a/qa/L0_batch_input/test.sh b/qa/L0_batch_input/test.sh
@@ -60,6 +60,9 @@ rm -f $SERVER_LOG $CLIENT_LOG
 
 RET=0
 for BACKEND in $BACKENDS; do
+    if [[ "$BACKEND" == 'onnx' ]]; then
+        continue
+    fi
     rm -rf models && mkdir models
     cp -r $DATADIR/${BACKEND}_batch_input models/ragged_element_count_acc_zero
     (cd models/ragged_element_count_acc_zero && \

diff --git a/qa/L0_batcher/test.sh b/qa/L0_batcher/test.sh
@@ -107,7 +107,7 @@ source ../common/util.sh
 RET=0
 
 # If BACKENDS not specified, set to all
-BACKENDS=${BACKENDS:="graphdef savedmodel onnx libtorch plan python"}
+BACKENDS=${BACKENDS:="graphdef savedmodel libtorch plan python"}
 export BACKENDS
 
 # Basic batcher tests
@@ -237,6 +237,7 @@ if [[ $BACKENDS == *"plan"* ]]; then
                     dynamic_batching { preferred_batch_size: [ 2, 6 ], max_queue_delay_microseconds: 10000000 }" >> config.pbtxt)
 fi
 
+: '
 if [[ $BACKENDS == *"onnx"* ]]; then
     # Use nobatch model to match the ragged test requirement
     cp -r $DATADIR/qa_identity_model_repository/onnx_nobatch_zero_1_float32 var_models/onnx_zero_1_float32 && \
@@ -249,6 +250,7 @@ if [[ $BACKENDS == *"onnx"* ]]; then
                                     source_input: \"INPUT0\" }] \
                     dynamic_batching { preferred_batch_size: [ 2, 6 ], max_queue_delay_microseconds: 10000000 }" >> config.pbtxt)
 fi
+'
 
 if [[ $BACKENDS == *"libtorch"* ]]; then
     # Use nobatch model to match the ragged test requirement

diff --git a/qa/L0_client_nobatch/test.sh b/qa/L0_client_nobatch/test.sh
@@ -47,8 +47,13 @@ EXPECTED_NUM_TESTS="4"
 
 DATADIR=/data/inferenceserver/${REPO_VERSION}
 
+rm -fr models && mkdir models
+cp -r $DATADIR/qa_model_repository/* models
+rm `find ./models/ -name '*onnx*'` -rf
+
+
 SERVER=/opt/tritonserver/bin/tritonserver
-SERVER_ARGS="--model-repository=$DATADIR/qa_model_repository"
+SERVER_ARGS="--model-repository=models"
 SERVER_LOG="./inference_server.log"
 source ../common/util.sh
 

diff --git a/qa/L0_cmdline_trace/test.sh b/qa/L0_cmdline_trace/test.sh
@@ -58,7 +58,7 @@ export CUDA_VISIBLE_DEVICES=0
 
 DATADIR=/data/inferenceserver/${REPO_VERSION}/qa_model_repository
 ENSEMBLEDIR=$DATADIR/../qa_ensemble_model_repository/qa_model_repository/
-MODELBASE=onnx_int32_int32_int32
+MODELBASE=savedmodel_int32_int32_int32
 
 MODELSDIR=`pwd`/trace_models
 
@@ -78,6 +78,8 @@ rm -fr $MODELSDIR && mkdir -p $MODELSDIR && \
 
 RET=0
 
+ls $MODELSDIR
+
 # trace-level=OFF make sure no tracing
 SERVER_ARGS="--trace-file=trace_off.log --trace-level=OFF --trace-rate=1 --model-repository=$MODELSDIR"
 SERVER_LOG="./inference_server_off.log"

diff --git a/qa/L0_compute_capability/test.sh b/qa/L0_compute_capability/test.sh
@@ -53,6 +53,9 @@ RET=0
 BACKENDS=${BACKENDS:="graphdef savedmodel onnx libtorch plan"}
 
 for BACKEND in $BACKENDS; do
+    if [[ "$BACKEND" == 'onnx' ]]; then
+        continue
+    fi
     # Need just one model for the backend...
     rm -fr models && mkdir models
     cp -r ${DATADIR}/qa_model_repository/${BACKEND}_float32_float32_float32 \

diff --git a/qa/L0_custom_ops/test.sh b/qa/L0_custom_ops/test.sh
@@ -45,7 +45,7 @@ ZERO_OUT_TEST=zero_out_test.py
 CUDA_OP_TEST=cuda_op_test.py
 MOD_OP_TEST=mod_op_test.py
 VISION_OP_TEST=vision_op_test.py
-ONNX_OP_TEST=onnx_op_test.py
+#ONNX_OP_TEST=onnx_op_test.py
 
 SERVER=/opt/tritonserver/bin/tritonserver
 SERVER_LOG="./inference_server.log"
@@ -204,6 +204,7 @@ fi
 kill $SERVER_PID
 wait $SERVER_PID
 
+: '
 # ONNX
 rm -rf onnx_custom_ops && \
     mkdir -p onnx_custom_ops/custom_op/1 && \
@@ -233,6 +234,7 @@ if [ $? -ne 0 ]; then
     RET=1
 fi
 
+
 set -e
 
 if [ $RET -eq 0 ]; then
@@ -241,5 +243,6 @@ fi
 
 kill $SERVER_PID
 wait $SERVER_PID
+'
 
 exit $RET
diff --git a/qa/L0_device_memory_tracker/test.sh b/qa/L0_device_memory_tracker/test.sh
@@ -57,9 +57,9 @@ RET=0
 # prepare model repository, only contains ONNX and TRT models as the
 # corresponding backend are known to be memory.
 rm -rf models && mkdir models
-# ONNX
-cp -r /data/inferenceserver/${REPO_VERSION}/onnx_model_store/* models/.
-rm -r models/*cpu
+## ONNX
+##cp -r /data/inferenceserver/${REPO_VERSION}/onnx_model_store/* models/.
+#rm -r models/*cpu
 
 # Convert to get TRT models against the system
 CAFFE2PLAN=../common/caffe2plan
@@ -92,7 +92,7 @@ set -e
 # Set multiple instances on selected model to test instance-wise collection
 # and accumulation.
 echo "instance_group [{ count: 2; kind: KIND_GPU }]" >> models/resnet152_plan/config.pbtxt
-echo "instance_group [{ count: 2; kind: KIND_GPU }]" >> models/densenet/config.pbtxt
+#echo "instance_group [{ count: 2; kind: KIND_GPU }]" >> models/densenet/config.pbtxt
 
 # testing use nvidia-smi for Python to validate the reported usage
 pip install nvidia-ml-py3

diff --git a/qa/L0_dyna_implicit_state/test.sh b/qa/L0_dyna_implicit_state/test.sh
@@ -39,7 +39,7 @@ if [ ! -z "$TEST_REPO_ARCH" ]; then
 fi
 
 export ENSEMBLES=0
-BACKENDS=${BACKENDS:="onnx plan"}
+BACKENDS=${BACKENDS:="plan"}
 export BACKENDS
 export IMPLICIT_STATE=1
 

diff --git a/qa/L0_dyna_sequence_batcher/dyna_sequence_batcher_test.py b/qa/L0_dyna_sequence_batcher/dyna_sequence_batcher_test.py
@@ -47,12 +47,19 @@
 BACKENDS = os.environ.get(
     "BACKENDS", "graphdef savedmodel libtorch onnx plan custom custom_string"
 )
+
 IMPLICIT_STATE = int(os.environ["IMPLICIT_STATE"]) == 1
 
 _trials = BACKENDS.split(" ")
+if "onnx" in _trials:
+    _trials.remove("onnx")
 for backend in BACKENDS.split(" "):
     if NO_BATCHING:
-        if (backend != "custom") and (backend != "custom_string"):
+        if (
+            (backend != "custom")
+            and (backend != "custom_string")
+            and (backend != "onnx")
+        ):
             _trials += (backend + "_nobatch",)
 
 _ragged_batch_supported_trials = []

diff --git a/qa/L0_dyna_sequence_batcher/test.sh b/qa/L0_dyna_sequence_batcher/test.sh
@@ -53,7 +53,7 @@ IMPLICIT_STATE=${IMPLICIT_STATE:="0"}
 export IMPLICIT_STATE
 
 # If BACKENDS not specified, set to all
-BACKENDS=${BACKENDS:="graphdef savedmodel libtorch onnx plan custom custom_string"}
+BACKENDS=${BACKENDS:="graphdef savedmodel libtorch plan custom custom_string"}
 export BACKENDS
 
 MODEL_REPOSITORY=''
@@ -95,6 +95,8 @@ if [ $IMPLICIT_STATE == "0" ]; then
             sed -i "s/name:.*\"INPUT\"/name: \"INPUT\"\\nallow_ragged_batch: true/" config.pbtxt)
 fi
 
+rm `find ./models/ -name '*onnx*'` -rf
+
 # Need to launch the server for each test so that the model status is
 # reset (which is used to make sure the correct batch size was used
 # for execution). Test everything with fixed-tensor-size models and

diff --git a/qa/L0_grpc/test.sh b/qa/L0_grpc/test.sh
@@ -489,7 +489,8 @@ wait $SERVER_PID
 
 # Run cpp client unit test
 rm -rf unit_test_models && mkdir unit_test_models
-cp -r $DATADIR/qa_model_repository/onnx_int32_int32_int32 unit_test_models/.
+cp -r $DATADIR/qa_model_repository/plan_int32_int32_int32 unit_test_models/client_test_simple
+sed -i "s/plan_int32_int32_int32/client_test_simple/g" unit_test_models/client_test_simple/config.pbtxt
 cp -r ${MODELDIR}/simple unit_test_models/.
 
 SERVER_ARGS="--backend-directory=${BACKEND_DIR} --model-repository=unit_test_models
@@ -517,22 +518,23 @@ wait $SERVER_PID
 
 # Run cpp client load API unit test
 rm -rf unit_test_models && mkdir unit_test_models
-cp -r $DATADIR/qa_model_repository/onnx_int32_int32_int32 unit_test_models/.
+cp -r $DATADIR/qa_model_repository/plan_int32_int32_int32 unit_test_models/client_test_simple/
+sed -i "s/plan_int32_int32_int32/client_test_simple/g" unit_test_models/client_test_simple/config.pbtxt
 # Make only version 2, 3 is valid version directory while config requests 1, 3
-rm -rf unit_test_models/onnx_int32_int32_int32/1
+rm -rf unit_test_models/client_test_simple/1
 
-# Start with EXPLICIT mode and load onnx_float32_float32_float32
+# Start with EXPLICIT mode and load client_test_simple
 SERVER_ARGS="--model-repository=`pwd`/unit_test_models \
              --model-control-mode=explicit \
-             --load-model=onnx_int32_int32_int32 \
+             --load-model=client_test_simple \
              --strict-model-config=false"
-SERVER_LOG="./inference_server_cc_unit_test.load.log"
 CLIENT_LOG="./cc_unit_test.load.log"
 
 for i in \
    "LoadWithFileOverride" \
    "LoadWithConfigOverride" \
    ; do
+    SERVER_LOG="./inference_server_cc_unit_test.load."$i".log"
     run_server
     if [ "$SERVER_PID" == "0" ]; then
         echo -e "\n***\n*** Failed to start $SERVER\n***"