diff --git a/qa/L0_backend_config/test.sh b/qa/L0_backend_config/test.sh index b898735798..8ee90babaf 100755 --- a/qa/L0_backend_config/test.sh +++ b/qa/L0_backend_config/test.sh @@ -236,6 +236,8 @@ else fi +: ' +# Disabling onnxruntime tests for r24.02 release # Onnxruntime: Batching ON rm -rf ./models/ mkdir -p ./models/no_config @@ -306,17 +308,18 @@ else wait $SERVER_PID fi +' # # General backend tests # # We want to make sure that backend configurations -# are not lost. For this purpose we are using only onnx backend +# are not lost. For this purpose we are using only tensorflow backend rm -rf ./models/ mkdir -p ./models/no_config/ -cp -r /data/inferenceserver/${REPO_VERSION}/qa_model_repository/onnx_float32_float32_float32/1 ./models/no_config/ +cp -r /data/inferenceserver/${REPO_VERSION}/qa_model_repository/savedmodel_float32_float32_float32/1 ./models/no_config/ # First getting a baseline for the number of default configs # added during a server set up @@ -351,7 +354,7 @@ fi # One of defaultconfigs is `min-compute-capability`. This test # checks if it is properlly overridden. MIN_COMPUTE_CAPABILITY=XX -SERVER_ARGS="--backend-config=onnxruntime,min-compute-capability=$MIN_COMPUTE_CAPABILITY $COMMON_ARGS" +SERVER_ARGS="--backend-config=tensorflow,min-compute-capability=$MIN_COMPUTE_CAPABILITY $COMMON_ARGS" SERVER_LOG=$SERVER_LOG_BASE.global_configs.log run_server @@ -374,7 +377,7 @@ else fi # Now make sure that specific backend configs are not lost. -SERVER_ARGS="--backend-config=onnxruntime,a=0 --backend-config=onnxruntime,y=0 --backend-config=onnxruntime,z=0 $COMMON_ARGS" +SERVER_ARGS="--backend-config=tensorflow,a=0 --backend-config=tensorflow,y=0 --backend-config=tensorflow,z=0 $COMMON_ARGS" SERVER_LOG=$SERVER_LOG_BASE.specific_configs.log EXPECTED_CONFIG_COUNT=$(($DEFAULT_CONFIG_COUNT+3)) run_server @@ -398,7 +401,6 @@ else fi - # Print test outcome if [ $RET -eq 0 ]; then echo -e "\n***\n*** Test Passed\n***" diff --git a/qa/L0_backend_python/bls/test.sh b/qa/L0_backend_python/bls/test.sh index 1848af125c..429a6622d7 100755 --- a/qa/L0_backend_python/bls/test.sh +++ b/qa/L0_backend_python/bls/test.sh @@ -41,6 +41,7 @@ if [[ ${TEST_WINDOWS} == 0 ]]; then mkdir -p models/bls/1/ cp ../../python_models/bls/model.py models/bls/1/ cp ../../python_models/bls/config.pbtxt models/bls + sed -i 's/onnx_nobatch_sequence_int32/plan_nobatch_sequence_int32/g' models/bls/1/model.py mkdir -p models/dlpack_add_sub/1/ cp ../../python_models/dlpack_add_sub/model.py models/dlpack_add_sub/1/ @@ -74,7 +75,7 @@ if [[ ${TEST_WINDOWS} == 0 ]]; then cp ../../python_models/dlpack_identity/model.py models/dlpack_identity/1/ cp ../../python_models/dlpack_identity/config.pbtxt models/dlpack_identity - cp -r ${DATADIR}/qa_sequence_implicit_model_repository/onnx_nobatch_sequence_int32/ ./models + cp -r ${DATADIR}/qa_sequence_implicit_model_repository/plan_nobatch_sequence_int32/ ./models git clone https://github.com/triton-inference-server/python_backend -b $PYTHON_BACKEND_REPO_TAG mkdir -p models/square_int32/1/ @@ -219,9 +220,9 @@ if [[ ${TEST_WINDOWS} == 0 ]]; then mkdir -p models/bls_model_loading/1/ cp ../../python_models/bls_model_loading/model.py models/bls_model_loading/1/ cp ../../python_models/bls_model_loading/config.pbtxt models/bls_model_loading/ - cp -fr ${DATADIR}/qa_model_repository/onnx_int32_int32_int32 models/. + cp -fr ${DATADIR}/qa_model_repository/plan_int32_int32_int32 models/. # Make only version 2, 3 is valid version directory - rm -rf models/onnx_int32_int32_int32/1 + rm -rf models/plan_int32_int32_int32/1 SERVER_LOG="./bls_model_loading_server.log" SERVER_ARGS="--model-repository=${MODELDIR}/bls/models --backend-directory=${BACKEND_DIR} --model-control-mode=explicit --log-verbose=1" diff --git a/qa/L0_batch_custom/batch_custom_test.py b/qa/L0_batch_custom/batch_custom_test.py index 6cd6346ad3..1585cf2848 100755 --- a/qa/L0_batch_custom/batch_custom_test.py +++ b/qa/L0_batch_custom/batch_custom_test.py @@ -232,7 +232,7 @@ def test_volume_batching(self): # Send 12 requests with batch size 1. The max_queue_delay is set # to non-zero. Depending upon the timing of the requests arrival # there can be either 4-6 model executions. - model_base = "onnx" + model_base = "savedmodel" dtype = np.float16 shapes = ( [ diff --git a/qa/L0_batch_custom/test.sh b/qa/L0_batch_custom/test.sh index 01701df661..64b8665d23 100755 --- a/qa/L0_batch_custom/test.sh +++ b/qa/L0_batch_custom/test.sh @@ -46,7 +46,7 @@ BATCH_CUSTOM_TEST=batch_custom_test.py CLIENT_LOG_BASE="./client.log" DATADIR=/data/inferenceserver/${REPO_VERSION}/qa_identity_model_repository EXPECTED_NUM_TESTS="1" -MODEL_NAME="onnx_zero_1_float16" +MODEL_NAME="savedmodel_zero_1_float16" SERVER=/opt/tritonserver/bin/tritonserver SERVER_ARGS="--model-repository=models --log-verbose 1" SERVER_LOG_BASE="./inference_server.log" @@ -101,7 +101,7 @@ cp -r backend/examples/batching_strategies/single_batching/build/libtriton_singl # Run a test to validate the single batching strategy example. # Then, run tests to validate the volume batching example being passed in via the backend dir, model dir, version dir, and model config. -BACKEND_DIR="/opt/tritonserver/backends/onnxruntime" +BACKEND_DIR="/opt/tritonserver/backends/tensorflow" MODEL_DIR="models/$MODEL_NAME" VERSION_DIR="$MODEL_DIR/1/" diff --git a/qa/L0_batch_input/test.sh b/qa/L0_batch_input/test.sh index e780516ec4..ac00e4f6b4 100755 --- a/qa/L0_batch_input/test.sh +++ b/qa/L0_batch_input/test.sh @@ -60,6 +60,9 @@ rm -f $SERVER_LOG $CLIENT_LOG RET=0 for BACKEND in $BACKENDS; do + if [[ "$BACKEND" == 'onnx' ]]; then + continue + fi rm -rf models && mkdir models cp -r $DATADIR/${BACKEND}_batch_input models/ragged_element_count_acc_zero (cd models/ragged_element_count_acc_zero && \ diff --git a/qa/L0_batcher/test.sh b/qa/L0_batcher/test.sh index c5f8819276..2b48819823 100755 --- a/qa/L0_batcher/test.sh +++ b/qa/L0_batcher/test.sh @@ -107,7 +107,7 @@ source ../common/util.sh RET=0 # If BACKENDS not specified, set to all -BACKENDS=${BACKENDS:="graphdef savedmodel onnx libtorch plan python"} +BACKENDS=${BACKENDS:="graphdef savedmodel libtorch plan python"} export BACKENDS # Basic batcher tests @@ -237,6 +237,7 @@ if [[ $BACKENDS == *"plan"* ]]; then dynamic_batching { preferred_batch_size: [ 2, 6 ], max_queue_delay_microseconds: 10000000 }" >> config.pbtxt) fi +: ' if [[ $BACKENDS == *"onnx"* ]]; then # Use nobatch model to match the ragged test requirement cp -r $DATADIR/qa_identity_model_repository/onnx_nobatch_zero_1_float32 var_models/onnx_zero_1_float32 && \ @@ -249,6 +250,7 @@ if [[ $BACKENDS == *"onnx"* ]]; then source_input: \"INPUT0\" }] \ dynamic_batching { preferred_batch_size: [ 2, 6 ], max_queue_delay_microseconds: 10000000 }" >> config.pbtxt) fi +' if [[ $BACKENDS == *"libtorch"* ]]; then # Use nobatch model to match the ragged test requirement diff --git a/qa/L0_client_nobatch/test.sh b/qa/L0_client_nobatch/test.sh index 58b1b3dc58..78eafc80c6 100755 --- a/qa/L0_client_nobatch/test.sh +++ b/qa/L0_client_nobatch/test.sh @@ -47,8 +47,13 @@ EXPECTED_NUM_TESTS="4" DATADIR=/data/inferenceserver/${REPO_VERSION} +rm -fr models && mkdir models +cp -r $DATADIR/qa_model_repository/* models +rm `find ./models/ -name '*onnx*'` -rf + + SERVER=/opt/tritonserver/bin/tritonserver -SERVER_ARGS="--model-repository=$DATADIR/qa_model_repository" +SERVER_ARGS="--model-repository=models" SERVER_LOG="./inference_server.log" source ../common/util.sh diff --git a/qa/L0_cmdline_trace/test.sh b/qa/L0_cmdline_trace/test.sh index d0f86dc2a9..5bb4788eab 100755 --- a/qa/L0_cmdline_trace/test.sh +++ b/qa/L0_cmdline_trace/test.sh @@ -58,7 +58,7 @@ export CUDA_VISIBLE_DEVICES=0 DATADIR=/data/inferenceserver/${REPO_VERSION}/qa_model_repository ENSEMBLEDIR=$DATADIR/../qa_ensemble_model_repository/qa_model_repository/ -MODELBASE=onnx_int32_int32_int32 +MODELBASE=savedmodel_int32_int32_int32 MODELSDIR=`pwd`/trace_models @@ -78,6 +78,8 @@ rm -fr $MODELSDIR && mkdir -p $MODELSDIR && \ RET=0 +ls $MODELSDIR + # trace-level=OFF make sure no tracing SERVER_ARGS="--trace-file=trace_off.log --trace-level=OFF --trace-rate=1 --model-repository=$MODELSDIR" SERVER_LOG="./inference_server_off.log" diff --git a/qa/L0_compute_capability/test.sh b/qa/L0_compute_capability/test.sh index d85acb1b6e..d4c4fca424 100755 --- a/qa/L0_compute_capability/test.sh +++ b/qa/L0_compute_capability/test.sh @@ -53,6 +53,9 @@ RET=0 BACKENDS=${BACKENDS:="graphdef savedmodel onnx libtorch plan"} for BACKEND in $BACKENDS; do + if [[ "$BACKEND" == 'onnx' ]]; then + continue + fi # Need just one model for the backend... rm -fr models && mkdir models cp -r ${DATADIR}/qa_model_repository/${BACKEND}_float32_float32_float32 \ diff --git a/qa/L0_custom_ops/test.sh b/qa/L0_custom_ops/test.sh index a12c1d67a4..33daa16921 100755 --- a/qa/L0_custom_ops/test.sh +++ b/qa/L0_custom_ops/test.sh @@ -45,7 +45,7 @@ ZERO_OUT_TEST=zero_out_test.py CUDA_OP_TEST=cuda_op_test.py MOD_OP_TEST=mod_op_test.py VISION_OP_TEST=vision_op_test.py -ONNX_OP_TEST=onnx_op_test.py +#ONNX_OP_TEST=onnx_op_test.py SERVER=/opt/tritonserver/bin/tritonserver SERVER_LOG="./inference_server.log" @@ -204,6 +204,7 @@ fi kill $SERVER_PID wait $SERVER_PID +: ' # ONNX rm -rf onnx_custom_ops && \ mkdir -p onnx_custom_ops/custom_op/1 && \ @@ -233,6 +234,7 @@ if [ $? -ne 0 ]; then RET=1 fi + set -e if [ $RET -eq 0 ]; then @@ -241,5 +243,6 @@ fi kill $SERVER_PID wait $SERVER_PID +' exit $RET diff --git a/qa/L0_device_memory_tracker/test.sh b/qa/L0_device_memory_tracker/test.sh index 7eb0d745da..fa2edd9315 100755 --- a/qa/L0_device_memory_tracker/test.sh +++ b/qa/L0_device_memory_tracker/test.sh @@ -57,9 +57,9 @@ RET=0 # prepare model repository, only contains ONNX and TRT models as the # corresponding backend are known to be memory. rm -rf models && mkdir models -# ONNX -cp -r /data/inferenceserver/${REPO_VERSION}/onnx_model_store/* models/. -rm -r models/*cpu +## ONNX +##cp -r /data/inferenceserver/${REPO_VERSION}/onnx_model_store/* models/. +#rm -r models/*cpu # Convert to get TRT models against the system CAFFE2PLAN=../common/caffe2plan @@ -92,7 +92,7 @@ set -e # Set multiple instances on selected model to test instance-wise collection # and accumulation. echo "instance_group [{ count: 2; kind: KIND_GPU }]" >> models/resnet152_plan/config.pbtxt -echo "instance_group [{ count: 2; kind: KIND_GPU }]" >> models/densenet/config.pbtxt +#echo "instance_group [{ count: 2; kind: KIND_GPU }]" >> models/densenet/config.pbtxt # testing use nvidia-smi for Python to validate the reported usage pip install nvidia-ml-py3 diff --git a/qa/L0_dyna_implicit_state/test.sh b/qa/L0_dyna_implicit_state/test.sh index 0721d5cd32..81eab8f7d9 100755 --- a/qa/L0_dyna_implicit_state/test.sh +++ b/qa/L0_dyna_implicit_state/test.sh @@ -39,7 +39,7 @@ if [ ! -z "$TEST_REPO_ARCH" ]; then fi export ENSEMBLES=0 -BACKENDS=${BACKENDS:="onnx plan"} +BACKENDS=${BACKENDS:="plan"} export BACKENDS export IMPLICIT_STATE=1 diff --git a/qa/L0_dyna_sequence_batcher/dyna_sequence_batcher_test.py b/qa/L0_dyna_sequence_batcher/dyna_sequence_batcher_test.py index f2c709469b..02ef9e7f39 100755 --- a/qa/L0_dyna_sequence_batcher/dyna_sequence_batcher_test.py +++ b/qa/L0_dyna_sequence_batcher/dyna_sequence_batcher_test.py @@ -47,12 +47,19 @@ BACKENDS = os.environ.get( "BACKENDS", "graphdef savedmodel libtorch onnx plan custom custom_string" ) + IMPLICIT_STATE = int(os.environ["IMPLICIT_STATE"]) == 1 _trials = BACKENDS.split(" ") +if "onnx" in _trials: + _trials.remove("onnx") for backend in BACKENDS.split(" "): if NO_BATCHING: - if (backend != "custom") and (backend != "custom_string"): + if ( + (backend != "custom") + and (backend != "custom_string") + and (backend != "onnx") + ): _trials += (backend + "_nobatch",) _ragged_batch_supported_trials = [] diff --git a/qa/L0_dyna_sequence_batcher/test.sh b/qa/L0_dyna_sequence_batcher/test.sh index acac8399af..4ded6dd3fa 100755 --- a/qa/L0_dyna_sequence_batcher/test.sh +++ b/qa/L0_dyna_sequence_batcher/test.sh @@ -53,7 +53,7 @@ IMPLICIT_STATE=${IMPLICIT_STATE:="0"} export IMPLICIT_STATE # If BACKENDS not specified, set to all -BACKENDS=${BACKENDS:="graphdef savedmodel libtorch onnx plan custom custom_string"} +BACKENDS=${BACKENDS:="graphdef savedmodel libtorch plan custom custom_string"} export BACKENDS MODEL_REPOSITORY='' @@ -95,6 +95,8 @@ if [ $IMPLICIT_STATE == "0" ]; then sed -i "s/name:.*\"INPUT\"/name: \"INPUT\"\\nallow_ragged_batch: true/" config.pbtxt) fi +rm `find ./models/ -name '*onnx*'` -rf + # Need to launch the server for each test so that the model status is # reset (which is used to make sure the correct batch size was used # for execution). Test everything with fixed-tensor-size models and diff --git a/qa/L0_grpc/test.sh b/qa/L0_grpc/test.sh index 73b9710a71..2f1a17fdcc 100755 --- a/qa/L0_grpc/test.sh +++ b/qa/L0_grpc/test.sh @@ -489,7 +489,8 @@ wait $SERVER_PID # Run cpp client unit test rm -rf unit_test_models && mkdir unit_test_models -cp -r $DATADIR/qa_model_repository/onnx_int32_int32_int32 unit_test_models/. +cp -r $DATADIR/qa_model_repository/plan_int32_int32_int32 unit_test_models/client_test_simple +sed -i "s/plan_int32_int32_int32/client_test_simple/g" unit_test_models/client_test_simple/config.pbtxt cp -r ${MODELDIR}/simple unit_test_models/. SERVER_ARGS="--backend-directory=${BACKEND_DIR} --model-repository=unit_test_models @@ -517,22 +518,23 @@ wait $SERVER_PID # Run cpp client load API unit test rm -rf unit_test_models && mkdir unit_test_models -cp -r $DATADIR/qa_model_repository/onnx_int32_int32_int32 unit_test_models/. +cp -r $DATADIR/qa_model_repository/plan_int32_int32_int32 unit_test_models/client_test_simple/ +sed -i "s/plan_int32_int32_int32/client_test_simple/g" unit_test_models/client_test_simple/config.pbtxt # Make only version 2, 3 is valid version directory while config requests 1, 3 -rm -rf unit_test_models/onnx_int32_int32_int32/1 +rm -rf unit_test_models/client_test_simple/1 -# Start with EXPLICIT mode and load onnx_float32_float32_float32 +# Start with EXPLICIT mode and load client_test_simple SERVER_ARGS="--model-repository=`pwd`/unit_test_models \ --model-control-mode=explicit \ - --load-model=onnx_int32_int32_int32 \ + --load-model=client_test_simple \ --strict-model-config=false" -SERVER_LOG="./inference_server_cc_unit_test.load.log" CLIENT_LOG="./cc_unit_test.load.log" for i in \ "LoadWithFileOverride" \ "LoadWithConfigOverride" \ ; do + SERVER_LOG="./inference_server_cc_unit_test.load."$i".log" run_server if [ "$SERVER_PID" == "0" ]; then echo -e "\n***\n*** Failed to start $SERVER\n***" diff --git a/qa/L0_http/http_test.py b/qa/L0_http/http_test.py index 4432fe9186..769cbc605c 100755 --- a/qa/L0_http/http_test.py +++ b/qa/L0_http/http_test.py @@ -66,20 +66,20 @@ def _raw_binary_helper( ) def test_raw_binary(self): - model = "onnx_zero_1_float32" + model = "savedmodel_zero_1_float32" input_bytes = np.arange(8, dtype=np.float32).tobytes() self._raw_binary_helper(model, input_bytes, input_bytes) def test_raw_binary_longer(self): # Similar to test_raw_binary but test with different data size - model = "onnx_zero_1_float32" + model = "savedmodel_zero_1_float32" input_bytes = np.arange(32, dtype=np.float32).tobytes() self._raw_binary_helper(model, input_bytes, input_bytes) def test_byte(self): # Select model that satisfies constraints for raw binary request # i.e. BYTE type the element count must be 1 - model = "onnx_zero_1_object_1_element" + model = "savedmodel_zero_1_object_1_element" input = "427" headers = {"Inference-Header-Content-Length": "0"} r = requests.post(self._get_infer_url(model), data=input, headers=headers) @@ -100,7 +100,7 @@ def test_byte(self): def test_byte_too_many_elements(self): # Select model that doesn't satisfy constraints for raw binary request # i.e. BYTE type the element count must be 1 - model = "onnx_zero_1_object" + model = "savedmodel_zero_1_object" input = "427" headers = {"Inference-Header-Content-Length": "0"} r = requests.post(self._get_infer_url(model), data=input, headers=headers) @@ -119,7 +119,7 @@ def test_byte_too_many_elements(self): def test_multi_variable_dimensions(self): # Select model that doesn't satisfy constraints for raw binary request # i.e. this model has multiple variable-sized dimensions - model = "onnx_zero_1_float16" + model = "savedmodel_zero_1_float16" input = np.ones([2, 2], dtype=np.float16) headers = {"Inference-Header-Content-Length": "0"} r = requests.post( @@ -140,7 +140,7 @@ def test_multi_variable_dimensions(self): def test_multi_inputs(self): # Select model that doesn't satisfy constraints for raw binary request # i.e. input count must be 1 - model = "onnx_zero_3_float32" + model = "savedmodel_zero_3_float32" # Use one numpy array, after tobytes() it can be seen as three inputs # each with 8 elements (this ambiguity is why this is not allowed) input = np.arange(24, dtype=np.float32) @@ -167,7 +167,7 @@ def test_multi_inputs(self): def test_content_encoding_chunked_manually(self): # Similar to test_raw_binary but test with extra headers extra_headers = {"Transfer-Encoding": "chunked"} - model = "onnx_zero_1_float32" + model = "savedmodel_zero_1_float32" input_bytes = np.arange(8, dtype=np.float32).tobytes() # Encode input into a single chunk (for simplicity) following chunked # encoding format: https://developer.mozilla.org/en-US/docs/Web/HTTP/Headers/Transfer-Encoding @@ -189,7 +189,7 @@ def test_content_encoding_unsupported_client(self): with self.subTest(encoding=encoding): headers = {"Transfer-Encoding": encoding} np_input = np.arange(8, dtype=np.float32).reshape(1, -1) - model = "onnx_zero_1_float32" + model = "savedmodel_zero_1_float32" # Setup inputs inputs = [] inputs.append( @@ -208,7 +208,7 @@ def test_content_encoding_unsupported_client(self): client.infer(model_name=model, inputs=inputs, headers=headers) def test_descriptive_status_code(self): - model = "onnx_zero_1_float32_queue" + model = "savedmodel_zero_1_float32_queue" input_bytes = np.arange(8, dtype=np.float32).tobytes() # Send two requests to model that only queues 1 request at the maximum, diff --git a/qa/L0_http/test.sh b/qa/L0_http/test.sh index 2b78305452..a247a9c94f 100755 --- a/qa/L0_http/test.sh +++ b/qa/L0_http/test.sh @@ -504,7 +504,8 @@ wait $SERVER_PID # Run cpp client unit test rm -rf unit_test_models && mkdir unit_test_models -cp -r $DATADIR/qa_model_repository/onnx_int32_int32_int32 unit_test_models/. +cp -r $DATADIR/qa_model_repository/plan_int32_int32_int32 unit_test_models/client_test_simple +sed -i "s/plan_int32_int32_int32/client_test_simple/g" unit_test_models/client_test_simple/config.pbtxt cp -r ${MODELDIR}/simple unit_test_models/. SERVER_ARGS="--backend-directory=${BACKEND_DIR} --model-repository=unit_test_models @@ -532,14 +533,15 @@ wait $SERVER_PID # Run cpp client load API unit test rm -rf unit_test_models && mkdir unit_test_models -cp -r $DATADIR/qa_model_repository/onnx_int32_int32_int32 unit_test_models/. +cp -r $DATADIR/qa_model_repository/plan_int32_int32_int32 unit_test_models/client_test_simple/ +sed -i "s/plan_int32_int32_int32/client_test_simple/g" unit_test_models/client_test_simple/config.pbtxt # Make only version 2, 3 is valid version directory while config requests 1, 3 -rm -rf unit_test_models/onnx_int32_int32_int32/1 +rm -rf unit_test_models/client_test_simple/1 -# Start with EXPLICIT mode and load onnx_float32_float32_float32 +# Start with EXPLICIT mode and load client_test_simple SERVER_ARGS="--model-repository=`pwd`/unit_test_models \ --model-control-mode=explicit \ - --load-model=onnx_int32_int32_int32 \ + --load-model=client_test_simple \ --strict-model-config=false" SERVER_LOG="./inference_server_cc_unit_test.load.log" CLIENT_LOG="./cc_unit_test.load.log" @@ -592,18 +594,18 @@ wait $SERVER_PID MODELDIR=python_unit_test_models mkdir -p $MODELDIR rm -rf ${MODELDIR}/* -cp -r $DATADIR/qa_identity_model_repository/onnx_zero_1_float32 ${MODELDIR}/. -cp -r $DATADIR/qa_identity_model_repository/onnx_zero_1_object ${MODELDIR}/. -cp -r $DATADIR/qa_identity_model_repository/onnx_zero_1_float16 ${MODELDIR}/. -cp -r $DATADIR/qa_identity_model_repository/onnx_zero_3_float32 ${MODELDIR}/. -cp -r ${MODELDIR}/onnx_zero_1_object ${MODELDIR}/onnx_zero_1_object_1_element && \ - (cd $MODELDIR/onnx_zero_1_object_1_element && \ - sed -i "s/onnx_zero_1_object/onnx_zero_1_object_1_element/" config.pbtxt && \ +cp -r $DATADIR/qa_identity_model_repository/savedmodel_zero_1_float32 ${MODELDIR}/. +cp -r $DATADIR/qa_identity_model_repository/savedmodel_zero_1_object ${MODELDIR}/. +cp -r $DATADIR/qa_identity_model_repository/savedmodel_zero_1_float16 ${MODELDIR}/. +cp -r $DATADIR/qa_identity_model_repository/savedmodel_zero_3_float32 ${MODELDIR}/. +cp -r ${MODELDIR}/savedmodel_zero_1_object ${MODELDIR}/savedmodel_zero_1_object_1_element && \ + (cd $MODELDIR/savedmodel_zero_1_object_1_element && \ + sed -i "s/savedmodel_zero_1_object/savedmodel_zero_1_object_1_element/" config.pbtxt && \ sed -i "0,/-1/{s/-1/1/}" config.pbtxt) # Model for error code test -cp -r ${MODELDIR}/onnx_zero_1_float32 ${MODELDIR}/onnx_zero_1_float32_queue && \ - (cd $MODELDIR/onnx_zero_1_float32_queue && \ - sed -i "s/onnx_zero_1_float32/onnx_zero_1_float32_queue/" config.pbtxt && \ +cp -r ${MODELDIR}/savedmodel_zero_1_float32 ${MODELDIR}/savedmodel_zero_1_float32_queue && \ + (cd $MODELDIR/savedmodel_zero_1_float32_queue && \ + sed -i "s/savedmodel_zero_1_float32/savedmodel_zero_1_float32_queue/" config.pbtxt && \ echo "dynamic_batching { " >> config.pbtxt && \ echo " max_queue_delay_microseconds: 1000000" >> config.pbtxt && \ echo " preferred_batch_size: [ 8 ]" >> config.pbtxt && \ diff --git a/qa/L0_implicit_state/implicit_state.py b/qa/L0_implicit_state/implicit_state.py index 2cdf7ff2e0..ed9d641d46 100755 --- a/qa/L0_implicit_state/implicit_state.py +++ b/qa/L0_implicit_state/implicit_state.py @@ -193,6 +193,9 @@ def test_request_output_not_allowed(self): triton_client = tritonhttpclient.InferenceServerClient("localhost:8000") for backend in BACKENDS.split(" "): + if backend.strip() == "onnx": + continue + inputs = [] if backend.strip() == "libtorch": inputs.append(tritonhttpclient.InferInput("INPUT__0", [1], "INT32")) @@ -229,6 +232,9 @@ def test_request_output_not_allowed(self): def test_request_output(self): triton_client = tritonhttpclient.InferenceServerClient("localhost:8000") for backend in BACKENDS.split(" "): + if backend.strip() == "onnx": + continue + inputs = [] if backend.strip() == "libtorch": inputs.append(tritonhttpclient.InferInput("INPUT__0", [1], "INT32")) diff --git a/qa/L0_implicit_state/test.sh b/qa/L0_implicit_state/test.sh index 0722d29be1..524aed921f 100755 --- a/qa/L0_implicit_state/test.sh +++ b/qa/L0_implicit_state/test.sh @@ -72,6 +72,9 @@ mkdir -p models/single_state_buffer/1/ mkdir -p models/growable_memory/1/ for BACKEND in $BACKENDS; do + if [[ "$BACKEND" == 'onnx' ]]; then + continue + fi dtype="int32" model_name=${BACKEND}_nobatch_sequence_${dtype} rm -rf models/$model_name @@ -132,6 +135,7 @@ wait $SERVER_PID (cd ../L0_sequence_batcher/ && bash -ex test.sh) RET=$? + if [ $RET == 0 ]; then echo -e "\n***\n*** Implicit State Passed\n***" else diff --git a/qa/L0_infer/test.sh b/qa/L0_infer/test.sh index 34a669f874..cfebd8dec3 100755 --- a/qa/L0_infer/test.sh +++ b/qa/L0_infer/test.sh @@ -129,7 +129,7 @@ if [ "$TRITON_SERVER_CPU_ONLY" == "1" ]; then fi # If BACKENDS not specified, set to all -BACKENDS=${BACKENDS:="graphdef savedmodel onnx libtorch plan python python_dlpack openvino"} +BACKENDS=${BACKENDS:="graphdef savedmodel libtorch plan python python_dlpack openvino"} export BACKENDS # If ENSEMBLES not specified, set to 1 @@ -210,6 +210,8 @@ function generate_model_repository() { elif [ "$BACKEND" == "plan" ] && [ "$TRITON_SERVER_CPU_ONLY" == "1" ]; then # skip plan_tensorrt models since they don't run on CPU only containers continue + elif [ "$BACKEND" == "onnx" ]; then + continue else cp -r ${DATADIR}/qa_model_repository/${BACKEND}* \ models/. @@ -251,12 +253,7 @@ function generate_model_repository() { KIND="KIND_GPU" && [[ "$TARGET" == "cpu" ]] && KIND="KIND_CPU" for FW in $BACKENDS; do - if [ "$FW" == "onnx" ] && [ "$TEST_VALGRIND" -eq 1 ]; then - # Reduce the instance count to make loading onnx models faster - for MC in `ls models/${FW}*/config.pbtxt`; do - echo "instance_group [ { kind: ${KIND} count: 1 }]" >> $MC - done - elif [ "$FW" != "plan" ] && [ "$FW" != "python" ] && [ "$FW" != "python_dlpack" ] && [ "$FW" != "openvino" ];then + if [ "$FW" != "plan" ] && [ "$FW" != "python" ] && [ "$FW" != "python_dlpack" ] && [ "$FW" != "openvino" ];then for MC in `ls models/${FW}*/config.pbtxt`; do echo "instance_group [ { kind: ${KIND} }]" >> $MC done @@ -348,7 +345,7 @@ done # Loading all the onnx models at once requires more than 12 hours. Loading them # separately to reduce the loading time. if [ "$TEST_VALGRIND" -eq 1 ]; then - TESTING_BACKENDS="python python_dlpack onnx" + TESTING_BACKENDS="python python_dlpack" EXPECTED_NUM_TESTS=42 if [[ "aarch64" != $(uname -m) ]] ; then pip3 install torch==1.13.0+cpu -f https://download.pytorch.org/whl/torch_stable.html diff --git a/qa/L0_infer_reshape/infer_reshape_test.py b/qa/L0_infer_reshape/infer_reshape_test.py index e77dcbecaf..a277ffdb97 100755 --- a/qa/L0_infer_reshape/infer_reshape_test.py +++ b/qa/L0_infer_reshape/infer_reshape_test.py @@ -112,48 +112,6 @@ def _full_reshape(self, dtype, input_shapes, output_shapes=None, no_batch=True): use_cuda_shared_memory=TEST_CUDA_SHARED_MEMORY, ) - if tu.validate_for_onnx_model( - dtype, dtype, dtype, input_shapes[0], input_shapes[0], input_shapes[0] - ): - # model that supports batching - for bs in (1, 8): - full_shapes = [ - [ - bs, - ] - + input_shape - for input_shape in input_shapes - ] - full_output_shapes = [ - [ - bs, - ] - + output_shape - for output_shape in output_shapes - ] - iu.infer_zero( - self, - "onnx", - bs, - dtype, - full_shapes, - full_output_shapes, - use_system_shared_memory=TEST_SYSTEM_SHARED_MEMORY, - use_cuda_shared_memory=TEST_CUDA_SHARED_MEMORY, - ) - # model that does not support batching - if no_batch: - iu.infer_zero( - self, - "onnx_nobatch", - 1, - dtype, - input_shapes, - output_shapes, - use_system_shared_memory=TEST_SYSTEM_SHARED_MEMORY, - use_cuda_shared_memory=TEST_CUDA_SHARED_MEMORY, - ) - if tu.validate_for_libtorch_model( dtype, dtype, diff --git a/qa/L0_infer_reshape/test.sh b/qa/L0_infer_reshape/test.sh index 218be954d9..cc80d407e6 100755 --- a/qa/L0_infer_reshape/test.sh +++ b/qa/L0_infer_reshape/test.sh @@ -76,6 +76,7 @@ for i in \ done create_nop_version_dir `pwd`/models +rm `find ./models/ -name '*onnx*'` -rf RET=0 diff --git a/qa/L0_infer_variable/infer_variable_test.py b/qa/L0_infer_variable/infer_variable_test.py index e5e6470a3c..54c1559d53 100755 --- a/qa/L0_infer_variable/infer_variable_test.py +++ b/qa/L0_infer_variable/infer_variable_test.py @@ -205,28 +205,6 @@ def _infer_exact_helper( swap=swap, ) - if tu.validate_for_onnx_model( - input_dtype, - output0_dtype, - output1_dtype, - input_shape, - output0_shape, - output1_shape, - ): - # No basic ensemble models are created against custom models [TODO] - _infer_exact_helper( - self, - "onnx", - input_shape, - 8, - input_dtype, - output0_dtype, - output1_dtype, - output0_raw=output0_raw, - output1_raw=output1_raw, - swap=swap, - ) - if tu.validate_for_libtorch_model( input_dtype, output0_dtype, diff --git a/qa/L0_infer_variable/test.sh b/qa/L0_infer_variable/test.sh index 9760583b94..22e25fcc03 100755 --- a/qa/L0_infer_variable/test.sh +++ b/qa/L0_infer_variable/test.sh @@ -74,6 +74,7 @@ for TARGET in cpu gpu; do done done + rm `find ./models/ -name '*onnx*'` -rf run_server if [ "$SERVER_PID" == "0" ]; then echo -e "\n***\n*** Failed to start $SERVER\n***" diff --git a/qa/L0_infer_zero/test.sh b/qa/L0_infer_zero/test.sh index 02676b2f85..b3ae9dc247 100755 --- a/qa/L0_infer_zero/test.sh +++ b/qa/L0_infer_zero/test.sh @@ -54,6 +54,7 @@ rm -f $SERVER_LOG $CLIENT_LOG rm -fr models && mkdir models cp -r /data/inferenceserver/${REPO_VERSION}/qa_identity_model_repository/* models/. && \ cp -r /data/inferenceserver/${REPO_VERSION}/qa_ensemble_model_repository/qa_identity_model_repository/* models/. +rm `find ./models/ -name '*onnx*'` -rf # Remove version-compatible TensorRT models, as they require version-compatibility # mode to be turned on when starting the server. diff --git a/qa/L0_io/test.sh b/qa/L0_io/test.sh index 84ab4fb0c0..6da32f9bac 100755 --- a/qa/L0_io/test.sh +++ b/qa/L0_io/test.sh @@ -60,7 +60,7 @@ RET=0 # Prepare float32 models with basic config rm -rf $MODELSDIR -for trial in graphdef savedmodel onnx libtorch plan python python_dlpack; do +for trial in graphdef savedmodel libtorch plan python python_dlpack; do full=${trial}_float32_float32_float32 if [ "$trial" == "python" ]; then mkdir -p $MODELSDIR/${full}/1 && \ @@ -126,7 +126,7 @@ for trial in graphdef savedmodel onnx libtorch plan python python_dlpack; do done # Prepare string models with basic config -for trial in graphdef savedmodel onnx ; do +for trial in graphdef savedmodel ; do full=${trial}_object_object_object mkdir -p $MODELSDIR/${full}/1 && \ cp -r $DATADIR/${full}/1/* $MODELSDIR/${full}/1/. && \ @@ -163,7 +163,7 @@ if [ $? -ne 0 ]; then fi set -e -TRIALS="graphdef savedmodel onnx libtorch plan python python_dlpack libtorch_multi_gpu libtorch_multi_device" +TRIALS="graphdef savedmodel libtorch plan python python_dlpack libtorch_multi_gpu libtorch_multi_device" for input_device in -1 0 1; do for output_device in -1 0 1; do for trial in ${TRIALS}; do @@ -230,7 +230,7 @@ for input_device in -1 0 1; do done done - for trial in graphdef savedmodel onnx; do + for trial in graphdef savedmodel; do model_devices="-1 0 1" for model_device in $model_devices; do full=${trial}_object_object_object diff --git a/qa/L0_java_resnet/test.sh b/qa/L0_java_resnet/test.sh index 1ca08b4c65..2aa2319824 100755 --- a/qa/L0_java_resnet/test.sh +++ b/qa/L0_java_resnet/test.sh @@ -47,7 +47,7 @@ JAVACPP_BRANCH_TAG=${JAVACPP_BRANCH_TAG:="master"} # Create local model repository mkdir -p ${MODEL_REPO} # TODO: fix build to support GPU only resnet50v1.5_fp16_savedmodel -for BACKEND in _fp32_libtorch _fp32_onnx; do +for BACKEND in _fp32_libtorch ; do cp -r $DATADIR/perf_model_store/resnet50${BACKEND} ${MODEL_REPO}/ echo ${MODEL_REPO}/resnet50${BACKEND}/config.pbtxt sed -i "s/kind: KIND_GPU/kind: KIND_CPU/" ${MODEL_REPO}/resnet50${BACKEND}/config.pbtxt @@ -78,7 +78,7 @@ if [ $? -ne 0 ]; then fi # TODO: fix build to support GPU only resnet so can test TF as well -for BACKEND in ONNX TORCH; do +for BACKEND in TORCH; do if [ `grep -c "${BACKEND} test PASSED" ${CLIENT_LOG}` != "1" ]; then echo -e "\n***\n*** ${BACKEND} backend test FAILED. Expected '${BACKEND} test PASSED'\n***" RET=1 diff --git a/qa/L0_java_sequence_batcher/test.sh b/qa/L0_java_sequence_batcher/test.sh index 2f988322d9..243951a81a 100755 --- a/qa/L0_java_sequence_batcher/test.sh +++ b/qa/L0_java_sequence_batcher/test.sh @@ -62,7 +62,7 @@ sed -i 's/Simple/SequenceTest/g' $SAMPLES_REPO/pom.xml rm -f *.log RET=0 -for BACKEND in graphdef libtorch onnx savedmodel; do +for BACKEND in graphdef libtorch savedmodel; do # Create local model repository mkdir -p ${MODEL_REPO} MODEL=${BACKEND}_nobatch_sequence_int32 diff --git a/qa/L0_large_payload/large_payload_test.py b/qa/L0_large_payload/large_payload_test.py index fff57290ef..6dc2fc890b 100755 --- a/qa/L0_large_payload/large_payload_test.py +++ b/qa/L0_large_payload/large_payload_test.py @@ -148,7 +148,7 @@ def test_savedmodel(self): ) self._test_helper(client, model_name) - def test_onnx(self): + def _test_onnx(self): # onnx_nobatch_zero_1_float32 is identity model with input shape [-1] for client in self._clients: model_name = tu.get_zero_model_name("onnx_nobatch", 1, self._data_type) diff --git a/qa/L0_large_payload/test.sh b/qa/L0_large_payload/test.sh index 325cab4ed5..89a0448c41 100755 --- a/qa/L0_large_payload/test.sh +++ b/qa/L0_large_payload/test.sh @@ -56,7 +56,7 @@ RET=0 MODEL_SUFFIX=nobatch_zero_1_float32 rm -fr all_models && mkdir all_models -for TARGET in graphdef savedmodel onnx libtorch plan; do +for TARGET in graphdef savedmodel libtorch plan; do cp -r /data/inferenceserver/${REPO_VERSION}/qa_identity_model_repository/${TARGET}_$MODEL_SUFFIX \ all_models/. done @@ -71,7 +71,7 @@ cp ../python_models/identity_fp32/model.py all_models/python_$MODEL_SUFFIX/1/mod # Restart server before every test to make sure server state # is invariant to previous test -for TARGET in graphdef savedmodel onnx libtorch plan python; do +for TARGET in graphdef savedmodel libtorch plan python; do rm -fr models && mkdir models && \ cp -r all_models/${TARGET}_$MODEL_SUFFIX models/. diff --git a/qa/L0_lifecycle/lifecycle_test.py b/qa/L0_lifecycle/lifecycle_test.py index 9130d2ee02..1342158823 100755 --- a/qa/L0_lifecycle/lifecycle_test.py +++ b/qa/L0_lifecycle/lifecycle_test.py @@ -205,7 +205,7 @@ def test_parse_error_modelfail(self): # And other models should be loaded successfully try: - for base_name in ["savedmodel", "onnx"]: + for base_name in ["savedmodel", "libtorch"]: for triton_client in ( httpclient.InferenceServerClient("localhost:8000", verbose=True), grpcclient.InferenceServerClient("localhost:8001", verbose=True), @@ -268,7 +268,7 @@ def test_parse_error_modelfail_nostrict(self): # And other models should be loaded successfully try: - for base_name in ["savedmodel", "onnx"]: + for base_name in ["savedmodel", "libtorch"]: for triton_client in ( httpclient.InferenceServerClient("localhost:8000", verbose=True), grpcclient.InferenceServerClient("localhost:8001", verbose=True), @@ -324,7 +324,7 @@ def test_parse_error_no_model_config(self): # And other models should be loaded successfully try: - for base_name in ["savedmodel", "onnx"]: + for base_name in ["savedmodel", "libtorch"]: model_name = tu.get_model_name( base_name, np.float32, np.float32, np.float32 ) @@ -356,7 +356,7 @@ def test_init_error_modelfail(self): self.assertFalse(triton_client.is_server_ready()) # one model uses sequence batcher while the other uses dynamic batcher - model_names = ["onnx_sequence_int32", "onnx_int32_int32_int32"] + model_names = ["libtorch_sequence_int32", "libtorch_int32_int32_int32"] for model_name in model_names: self.assertFalse(triton_client.is_model_ready(model_name)) @@ -365,7 +365,7 @@ def test_init_error_modelfail(self): # And other models should be loaded successfully try: - for base_name in ["graphdef", "savedmodel", "onnx"]: + for base_name in ["graphdef", "savedmodel", "libtorch"]: model_name = tu.get_model_name( base_name, np.float32, np.float32, np.float32 ) @@ -375,7 +375,7 @@ def test_init_error_modelfail(self): try: tensor_shape = (1, 16) - for base_name in ["graphdef", "savedmodel", "onnx"]: + for base_name in ["graphdef", "savedmodel", "libtorch"]: iu.infer_exact( self, base_name, @@ -411,7 +411,7 @@ def test_parse_error_model_no_version(self): # Sanity check that other models are loaded properly try: - for base_name in ["savedmodel", "onnx"]: + for base_name in ["savedmodel", "libtorch"]: model_name = tu.get_model_name( base_name, np.float32, np.float32, np.float32 ) @@ -425,7 +425,7 @@ def test_parse_error_model_no_version(self): self.assertTrue(False, "unexpected error {}".format(ex)) try: - for base_name in ["savedmodel", "onnx"]: + for base_name in ["savedmodel", "libtorch"]: iu.infer_exact( self, base_name, @@ -535,7 +535,9 @@ def test_dynamic_model_load_unload(self): savedmodel_name = tu.get_model_name( "savedmodel", np.float32, np.float32, np.float32 ) - onnx_name = tu.get_model_name("onnx", np.float32, np.float32, np.float32) + libtorch_name = tu.get_model_name( + "libtorch", np.float32, np.float32, np.float32 + ) # Make sure savedmodel model is not in the status (because # initially it is not in the model repository) @@ -548,8 +550,8 @@ def test_dynamic_model_load_unload(self): self.assertTrue(triton_client.is_server_ready()) self.assertFalse(triton_client.is_model_ready(savedmodel_name, "1")) self.assertFalse(triton_client.is_model_ready(savedmodel_name, "3")) - self.assertTrue(triton_client.is_model_ready(onnx_name, "1")) - self.assertTrue(triton_client.is_model_ready(onnx_name, "3")) + self.assertTrue(triton_client.is_model_ready(libtorch_name, "1")) + self.assertTrue(triton_client.is_model_ready(libtorch_name, "3")) except Exception as ex: self.assertTrue(False, "unexpected error {}".format(ex)) @@ -566,8 +568,8 @@ def test_dynamic_model_load_unload(self): self.assertTrue(triton_client.is_server_ready()) self.assertTrue(triton_client.is_model_ready(savedmodel_name, "1")) self.assertTrue(triton_client.is_model_ready(savedmodel_name, "3")) - self.assertTrue(triton_client.is_model_ready(onnx_name, "1")) - self.assertTrue(triton_client.is_model_ready(onnx_name, "3")) + self.assertTrue(triton_client.is_model_ready(libtorch_name, "1")) + self.assertTrue(triton_client.is_model_ready(libtorch_name, "3")) except Exception as ex: self.assertTrue(False, "unexpected error {}".format(ex)) @@ -642,8 +644,8 @@ def test_dynamic_model_load_unload(self): self.assertTrue(triton_client.is_server_ready()) self.assertFalse(triton_client.is_model_ready(savedmodel_name, "1")) self.assertFalse(triton_client.is_model_ready(savedmodel_name, "3")) - self.assertTrue(triton_client.is_model_ready(onnx_name, "1")) - self.assertTrue(triton_client.is_model_ready(onnx_name, "3")) + self.assertTrue(triton_client.is_model_ready(libtorch_name, "1")) + self.assertTrue(triton_client.is_model_ready(libtorch_name, "3")) except Exception as ex: self.assertTrue(False, "unexpected error {}".format(ex)) @@ -682,8 +684,8 @@ def test_dynamic_model_load_unload(self): self.assertTrue(triton_client.is_server_ready()) self.assertTrue(triton_client.is_model_ready(savedmodel_name, "1")) self.assertTrue(triton_client.is_model_ready(savedmodel_name, "3")) - self.assertTrue(triton_client.is_model_ready(onnx_name, "1")) - self.assertTrue(triton_client.is_model_ready(onnx_name, "3")) + self.assertTrue(triton_client.is_model_ready(libtorch_name, "1")) + self.assertTrue(triton_client.is_model_ready(libtorch_name, "3")) triton_client = httpclient.InferenceServerClient( "localhost:8000", verbose=True @@ -712,10 +714,10 @@ def test_dynamic_model_load_unload(self): except Exception as ex: self.assertTrue(False, "unexpected error {}".format(ex)) - # Remove onnx model from the model repository and give it + # Remove libtorch model from the model repository and give it # time to unload. Make sure that it is unavailable. try: - shutil.rmtree("models/" + onnx_name) + shutil.rmtree("models/" + libtorch_name) time.sleep(5) # wait for model to unload for triton_client in ( httpclient.InferenceServerClient("localhost:8000", verbose=True), @@ -725,8 +727,8 @@ def test_dynamic_model_load_unload(self): self.assertTrue(triton_client.is_server_ready()) self.assertTrue(triton_client.is_model_ready(savedmodel_name, "1")) self.assertTrue(triton_client.is_model_ready(savedmodel_name, "3")) - self.assertFalse(triton_client.is_model_ready(onnx_name, "1")) - self.assertFalse(triton_client.is_model_ready(onnx_name, "3")) + self.assertFalse(triton_client.is_model_ready(libtorch_name, "1")) + self.assertFalse(triton_client.is_model_ready(libtorch_name, "3")) except Exception as ex: self.assertTrue(False, "unexpected error {}".format(ex)) @@ -734,7 +736,7 @@ def test_dynamic_model_load_unload(self): try: iu.infer_exact( self, - "onnx", + "libtorch", tensor_shape, 1, np.float32, @@ -742,10 +744,12 @@ def test_dynamic_model_load_unload(self): np.float32, swap=True, ) - self.assertTrue(False, "expected error for unavailable model " + onnx_name) + self.assertTrue( + False, "expected error for unavailable model " + libtorch_name + ) except Exception as ex: self.assertIn( - "Request for unknown model: 'onnx_float32_float32_float32' has no available versions", + "Request for unknown model: 'libtorch_float32_float32_float32' has no available versions", ex.message(), ) @@ -754,7 +758,9 @@ def test_dynamic_model_load_unload_disabled(self): savedmodel_name = tu.get_model_name( "savedmodel", np.float32, np.float32, np.float32 ) - onnx_name = tu.get_model_name("onnx", np.float32, np.float32, np.float32) + libtorch_name = tu.get_model_name( + "libtorch", np.float32, np.float32, np.float32 + ) # Make sure savedmodel model is not in the status (because # initially it is not in the model repository) @@ -767,8 +773,8 @@ def test_dynamic_model_load_unload_disabled(self): self.assertTrue(triton_client.is_server_ready()) self.assertFalse(triton_client.is_model_ready(savedmodel_name, "1")) self.assertFalse(triton_client.is_model_ready(savedmodel_name, "3")) - self.assertTrue(triton_client.is_model_ready(onnx_name, "1")) - self.assertTrue(triton_client.is_model_ready(onnx_name, "3")) + self.assertTrue(triton_client.is_model_ready(libtorch_name, "1")) + self.assertTrue(triton_client.is_model_ready(libtorch_name, "3")) except Exception as ex: self.assertTrue(False, "unexpected error {}".format(ex)) @@ -785,8 +791,8 @@ def test_dynamic_model_load_unload_disabled(self): self.assertTrue(triton_client.is_server_ready()) self.assertFalse(triton_client.is_model_ready(savedmodel_name, "1")) self.assertFalse(triton_client.is_model_ready(savedmodel_name, "3")) - self.assertTrue(triton_client.is_model_ready(onnx_name, "1")) - self.assertTrue(triton_client.is_model_ready(onnx_name, "3")) + self.assertTrue(triton_client.is_model_ready(libtorch_name, "1")) + self.assertTrue(triton_client.is_model_ready(libtorch_name, "3")) except Exception as ex: self.assertTrue(False, "unexpected error {}".format(ex)) @@ -814,7 +820,7 @@ def test_dynamic_model_load_unload_disabled(self): # Remove one of the original models from the model repository. # Unloading is disabled so it should remain available in the status. try: - shutil.rmtree("models/" + onnx_name) + shutil.rmtree("models/" + libtorch_name) time.sleep(5) # wait for model to unload (but it shouldn't) for triton_client in ( httpclient.InferenceServerClient("localhost:8000", verbose=True), @@ -824,8 +830,8 @@ def test_dynamic_model_load_unload_disabled(self): self.assertTrue(triton_client.is_server_ready()) self.assertFalse(triton_client.is_model_ready(savedmodel_name, "1")) self.assertFalse(triton_client.is_model_ready(savedmodel_name, "3")) - self.assertTrue(triton_client.is_model_ready(onnx_name, "1")) - self.assertTrue(triton_client.is_model_ready(onnx_name, "3")) + self.assertTrue(triton_client.is_model_ready(libtorch_name, "1")) + self.assertTrue(triton_client.is_model_ready(libtorch_name, "3")) except Exception as ex: self.assertTrue(False, "unexpected error {}".format(ex)) @@ -834,7 +840,7 @@ def test_dynamic_model_load_unload_disabled(self): try: iu.infer_exact( self, - "onnx", + "libtorch", tensor_shape, 1, np.float32, @@ -1279,7 +1285,7 @@ def test_multiple_model_repository_polling(self): (1,), model_shape, ) - self._infer_success_models(["graphdef", "onnx"], (1, 3), model_shape) + self._infer_success_models(["graphdef", "libtorch"], (1, 3), model_shape) # Add the savedmodel to the second model repository, should cause # it to be unloaded due to duplication @@ -1297,7 +1303,7 @@ def test_multiple_model_repository_polling(self): except Exception as ex: self.assertTrue(False, "unexpected error {}".format(ex)) - self._infer_success_models(["graphdef", "onnx"], (1, 3), model_shape) + self._infer_success_models(["graphdef", "libtorch"], (1, 3), model_shape) # Remove the savedmodel from the first model repository, the # model from the second model repository should be loaded @@ -1306,7 +1312,7 @@ def test_multiple_model_repository_polling(self): shutil.rmtree("models/" + savedmodel_name) time.sleep(5) # wait for model to unload self._infer_success_models( - ["savedmodel", "graphdef", "onnx"], (1, 3), model_shape + ["savedmodel", "graphdef", "libtorch"], (1, 3), model_shape ) def test_multiple_model_repository_control(self): @@ -1316,7 +1322,7 @@ def test_multiple_model_repository_control(self): savedmodel_name = tu.get_model_name( "savedmodel", np.float32, np.float32, np.float32 ) - model_bases = ["savedmodel", "graphdef", "onnx"] + model_bases = ["savedmodel", "graphdef", "libtorch"] # Initially models are not loaded for base in model_bases: @@ -1353,7 +1359,7 @@ def test_multiple_model_repository_control(self): (1,), model_shape, ) - self._infer_success_models(["graphdef", "onnx"], (1, 3), model_shape) + self._infer_success_models(["graphdef", "libtorch"], (1, 3), model_shape) # Add the savedmodel to the second model repository. Because # not polling this doesn't change any model state, all models @@ -1366,7 +1372,7 @@ def test_multiple_model_repository_control(self): (1,), model_shape, ) - self._infer_success_models(["graphdef", "onnx"], (1, 3), model_shape) + self._infer_success_models(["graphdef", "libtorch"], (1, 3), model_shape) # Load savedmodel again which should fail because it is now duplicated # in 2 model repositories. Use HTTP here. @@ -1394,7 +1400,7 @@ def test_multiple_model_repository_control(self): except Exception as ex: self.assertTrue(False, "unexpected error {}".format(ex)) - self._infer_success_models(["graphdef", "onnx"], (1, 3), model_shape) + self._infer_success_models(["graphdef", "libtorch"], (1, 3), model_shape) # Remove the savedmodel from the first model repository and # explicitly load savedmodel. The savedmodel from the second @@ -1413,18 +1419,18 @@ def test_multiple_model_repository_control(self): self.assertIn("failed to load '{}'".format(savedmodel_name), ex.message()) self._infer_success_models( - ["savedmodel", "graphdef", "onnx"], (1, 3), model_shape + ["savedmodel", "graphdef", "libtorch"], (1, 3), model_shape ) def test_model_control(self): model_shape = (1, 16) - onnx_name = tu.get_model_name("onnx", np.float32, np.float32, np.float32) + libtorch_name = tu.get_model_name("plan", np.float32, np.float32, np.float32) ensemble_prefix = "simple_" - ensemble_name = ensemble_prefix + onnx_name + ensemble_name = ensemble_prefix + libtorch_name # Make sure no models are loaded - for model_name in (onnx_name, ensemble_name): + for model_name in (libtorch_name, ensemble_name): try: for triton_client in ( httpclient.InferenceServerClient("localhost:8000", verbose=True), @@ -1462,44 +1468,44 @@ def test_model_control(self): self._infer_success_models( [ - "onnx", + "plan", ], (1, 3), model_shape, ) self._infer_success_models( [ - "simple_onnx", + "simple_plan", ], (1, 3), model_shape, swap=True, ) - # Delete model configuration for onnx, which will cause + # Delete model configuration for libtorch, which will cause # the autofiller to use the latest version policy so that only # version 3 will be available if the models are re-loaded - for model_name in (onnx_name,): + for model_name in (libtorch_name,): os.remove("models/" + model_name + "/config.pbtxt") self._infer_success_models( [ - "onnx", + "plan", ], (1, 3), model_shape, ) self._infer_success_models( [ - "simple_onnx", + "simple_plan", ], (1, 3), model_shape, swap=True, ) - # Reload models, only version 3 should be available for onnx - for model_name in (onnx_name, ensemble_name): + # Reload models, only version 3 should be available for libtorch + for model_name in (libtorch_name, ensemble_name): try: triton_client = grpcclient.InferenceServerClient( "localhost:8001", verbose=True @@ -1510,21 +1516,21 @@ def test_model_control(self): self._infer_success_models( [ - "onnx", + "plan", ], (3,), model_shape, ) self._infer_success_models( [ - "simple_onnx", + "simple_plan", ], (1, 3), model_shape, swap=True, ) - for model_name in (onnx_name,): + for model_name in (libtorch_name,): try: for triton_client in ( httpclient.InferenceServerClient("localhost:8000", verbose=True), @@ -1552,11 +1558,11 @@ def test_model_control(self): triton_client = httpclient.InferenceServerClient( "localhost:8000", verbose=True ) - triton_client.unload_model(onnx_name) + triton_client.unload_model(libtorch_name) except Exception as ex: self.assertTrue(False, "unexpected error {}".format(ex)) - for model_name in (onnx_name, ensemble_name): + for model_name in (libtorch_name, ensemble_name): try: for triton_client in ( httpclient.InferenceServerClient("localhost:8000", verbose=True), @@ -1577,13 +1583,13 @@ def test_model_control(self): "localhost:8000", verbose=True ) triton_client.unload_model(ensemble_name) - triton_client.load_model(onnx_name) + triton_client.load_model(libtorch_name) except Exception as ex: self.assertTrue(False, "unexpected error {}".format(ex)) self._infer_success_models( [ - "onnx", + "plan", ], (3,), model_shape, @@ -1602,7 +1608,7 @@ def test_model_control(self): self.assertTrue(False, "unexpected error {}".format(ex)) def test_model_control_fail(self): - model_name = tu.get_model_name("onnx", np.float32, np.float32, np.float32) + model_name = tu.get_model_name("plan", np.float32, np.float32, np.float32) # Make sure no models are loaded try: @@ -1639,13 +1645,15 @@ def test_model_control_fail(self): def test_model_control_ensemble(self): model_shape = (1, 16) - onnx_name = tu.get_model_name("onnx", np.float32, np.float32, np.float32) + libtorch_name = tu.get_model_name( + "savedmodel", np.float32, np.float32, np.float32 + ) ensemble_prefix = "simple_" - ensemble_name = ensemble_prefix + onnx_name + ensemble_name = ensemble_prefix + libtorch_name # Make sure no models are loaded - for model_name in (onnx_name, ensemble_name): + for model_name in (libtorch_name, ensemble_name): try: for triton_client in ( httpclient.InferenceServerClient("localhost:8000", verbose=True), @@ -1669,14 +1677,14 @@ def test_model_control_ensemble(self): self._infer_success_models( [ - "onnx", + "savedmodel", ], (1, 3), model_shape, ) self._infer_success_models( [ - "simple_onnx", + "simple_savedmodel", ], (1, 3), model_shape, @@ -1691,7 +1699,7 @@ def test_model_control_ensemble(self): triton_client.unload_model(ensemble_name, unload_dependents=True) except Exception as ex: self.assertTrue(False, "unexpected error {}".format(ex)) - for model_name in (onnx_name, ensemble_name): + for model_name in (libtorch_name, ensemble_name): try: for triton_client in ( httpclient.InferenceServerClient("localhost:8000", verbose=True), @@ -1717,7 +1725,7 @@ def test_model_control_ensemble(self): self._infer_success_models( [ - "onnx", + "savedmodel", ], (1, 3), model_shape, @@ -1732,8 +1740,8 @@ def test_model_control_ensemble(self): self.assertTrue(triton_client.is_server_ready()) self.assertFalse(triton_client.is_model_ready(ensemble_name, "1")) self.assertFalse(triton_client.is_model_ready(ensemble_name, "3")) - self.assertTrue(triton_client.is_model_ready(onnx_name, "1")) - self.assertTrue(triton_client.is_model_ready(onnx_name, "3")) + self.assertTrue(triton_client.is_model_ready(libtorch_name, "1")) + self.assertTrue(triton_client.is_model_ready(libtorch_name, "3")) except Exception as ex: self.assertTrue(False, "unexpected error {}".format(ex)) @@ -2030,11 +2038,13 @@ def test_model_reload_fail(self): def test_multiple_model_repository_control_startup_models(self): model_shape = (1, 16) - onnx_name = tu.get_model_name("onnx", np.float32, np.float32, np.float32) + graphdef_name = tu.get_model_name( + "graphdef", np.float32, np.float32, np.float32 + ) plan_name = tu.get_model_name("plan", np.float32, np.float32, np.float32) ensemble_prefix = "simple_" - onnx_ensemble_name = ensemble_prefix + onnx_name + graphdef_ensemble_name = ensemble_prefix + graphdef_name plan_ensemble_name = ensemble_prefix + plan_name # Make sure unloaded models are not in the status @@ -2055,14 +2065,14 @@ def test_multiple_model_repository_control_startup_models(self): # And loaded models work properly self._infer_success_models( [ - "onnx", + "graphdef", ], (1, 3), model_shape, ) self._infer_success_models( [ - "simple_onnx", + "simple_graphdef", ], (1, 3), model_shape, @@ -2119,43 +2129,43 @@ def test_multiple_model_repository_control_startup_models(self): # Delete model configuration, which will cause the autofiller # to use the latest version policy so that only version 3 will # be available if the models are re-loaded - os.remove("models/" + onnx_name + "/config.pbtxt") + os.remove("models/" + plan_name + "/config.pbtxt") self._infer_success_models( [ - "plan", + "graphdef", ], (1, 3), model_shape, ) self._infer_success_models( [ - "simple_plan", + "simple_graphdef", ], (1, 3), model_shape, swap=True, ) - # Reload onnx, only version 3 should be available + # Reload plan model, only version 3 should be available try: triton_client = grpcclient.InferenceServerClient( "localhost:8001", verbose=True ) - triton_client.load_model(onnx_name) + triton_client.load_model(plan_name) except Exception as ex: self.assertTrue(False, "unexpected error {}".format(ex)) self._infer_success_models( [ - "onnx", + "plan", ], (3,), model_shape, ) self._infer_success_models( [ - "simple_onnx", + "simple_plan", ], (1, 3), model_shape, @@ -2169,7 +2179,7 @@ def test_multiple_model_repository_control_startup_models(self): ): self.assertTrue(triton_client.is_server_live()) self.assertTrue(triton_client.is_server_ready()) - self.assertFalse(triton_client.is_model_ready(onnx_name, "1")) + self.assertFalse(triton_client.is_model_ready(plan_name, "1")) except Exception as ex: self.assertTrue(False, "unexpected error {}".format(ex)) @@ -2183,17 +2193,17 @@ def test_multiple_model_repository_control_startup_models(self): except Exception as ex: self.assertTrue(False, "unexpected error {}".format(ex)) - # Unload the onnx, as side effect, the ensemble model + # Unload the plan, as side effect, the ensemble model # will be forced to be unloaded try: triton_client = httpclient.InferenceServerClient( "localhost:8000", verbose=True ) - triton_client.unload_model(onnx_name) + triton_client.unload_model(plan_name) except Exception as ex: self.assertTrue(False, "unexpected error {}".format(ex)) - for model_name in [onnx_name, onnx_ensemble_name]: + for model_name in [plan_name, plan_ensemble_name]: try: for triton_client in ( httpclient.InferenceServerClient("localhost:8000", verbose=True), @@ -2206,35 +2216,35 @@ def test_multiple_model_repository_control_startup_models(self): except Exception as ex: self.assertTrue(False, "unexpected error {}".format(ex)) - # Explicitly unload the onnx ensemble and load the + # Explicitly unload the plan ensemble and load the # depending model. The ensemble model should not be reloaded # because it was explicitly unloaded. try: triton_client = httpclient.InferenceServerClient( "localhost:8000", verbose=True ) - triton_client.unload_model(onnx_ensemble_name) - triton_client.load_model(onnx_name) + triton_client.unload_model(plan_ensemble_name) + triton_client.load_model(plan_name) except Exception as ex: self.assertTrue(False, "unexpected error {}".format(ex)) self._infer_success_models( [ - "onnx", + "plan", ], (3,), model_shape, ) self._infer_success_models( [ - "plan", + "graphdef", ], (1, 3), model_shape, ) self._infer_success_models( [ - "simple_plan", + "simple_graphdef", ], (1, 3), model_shape, @@ -2248,8 +2258,8 @@ def test_multiple_model_repository_control_startup_models(self): ): self.assertTrue(triton_client.is_server_live()) self.assertTrue(triton_client.is_server_ready()) - self.assertFalse(triton_client.is_model_ready(onnx_ensemble_name, "1")) - self.assertFalse(triton_client.is_model_ready(onnx_ensemble_name, "3")) + self.assertFalse(triton_client.is_model_ready(plan_ensemble_name, "1")) + self.assertFalse(triton_client.is_model_ready(plan_ensemble_name, "3")) except Exception as ex: self.assertTrue(False, "unexpected error {}".format(ex)) @@ -2280,7 +2290,7 @@ def test_model_repository_index(self): self.assertTrue(False, "unexpected error {}".format(ex)) # Check model repository index - # All models should be in ready state except onnx_float32_float32_float32 + # All models should be in ready state except libtorch_float32_float32_float32 # which appears in two repositories. model_bases.append("simple_graphdef") try: @@ -2292,7 +2302,7 @@ def test_model_repository_index(self): self.assertEqual(len(index), 8) for i in index: indexed.append(i["name"]) - if i["name"] == "onnx_float32_float32_float32": + if i["name"] == "libtorch_float32_float32_float32": self.assertEqual(i["state"], "UNAVAILABLE") self.assertEqual( i["reason"], "model appears in two or more repositories" @@ -2311,7 +2321,7 @@ def test_model_repository_index(self): self.assertEqual(len(index.models), 8) for i in index.models: indexed.append(i.name) - if i.name == "onnx_float32_float32_float32": + if i.name == "libtorch_float32_float32_float32": self.assertEqual(i.state, "UNAVAILABLE") self.assertEqual( i.reason, "model appears in two or more repositories" @@ -2332,7 +2342,7 @@ def test_config_override(self): httpclient.InferenceServerClient("localhost:8000", verbose=True), grpcclient.InferenceServerClient("localhost:8001", verbose=True), ): - for base in (("onnx", "onnxruntime"),): + for base in (("plan", "tensorrt"),): model_name = tu.get_model_name( base[0], np.float32, np.float32, np.float32 ) @@ -2404,7 +2414,7 @@ def test_file_override(self): model_shape = (1, 16) override_base = "override_model" - for base in (("onnx", "onnxruntime"),): + for base in (("plan", "tensorrt"),): model_name = tu.get_model_name(base[0], np.float32, np.float32, np.float32) override_model_name = tu.get_model_name( override_base, np.float32, np.float32, np.float32 @@ -2432,7 +2442,7 @@ def test_file_override(self): # not be used. try: triton_client.load_model( - model_name, files={"file:1/model.onnx": file_content} + model_name, files={"file:1/model.plan": file_content} ) self.assertTrue(False, "expected error on missing override config") except InferenceServerException as ex: @@ -2464,7 +2474,7 @@ def test_file_override(self): triton_client.load_model( override_model_name, config="""{{"backend":"{backend}" }}""".format(backend=base[1]), - files={"file:1/model.onnx": file_content}, + files={"file:1/model.plan": file_content}, ) except Exception as ex: self.assertTrue(False, "unexpected error {}".format(ex)) @@ -2501,7 +2511,7 @@ def test_file_override(self): triton_client.load_model( model_name, config="""{{"backend":"{backend}" }}""".format(backend=base[1]), - files={"file:1/model.onnx": file_content}, + files={"file:1/model.plan": file_content}, ) except Exception as ex: self.assertTrue(False, "unexpected error {}".format(ex)) diff --git a/qa/L0_lifecycle/test.sh b/qa/L0_lifecycle/test.sh index 4476a5db32..564f89fad1 100755 --- a/qa/L0_lifecycle/test.sh +++ b/qa/L0_lifecycle/test.sh @@ -96,7 +96,6 @@ kill $SERVER_PID wait $SERVER_PID LOG_IDX=$((LOG_IDX+1)) - # LifeCycleTest.test_parse_error_noexit SERVER_ARGS="--model-repository=/tmp/xyzx --strict-readiness=false \ --exit-on-error=false" @@ -336,7 +335,7 @@ mkdir models models_0 for i in graphdef savedmodel ; do cp -r $DATADIR/qa_model_repository/${i}_float32_float32_float32 models/. done -for i in onnx plan ; do +for i in plan libtorch ; do cp -r $DATADIR/qa_model_repository/${i}_float32_float32_float32 models_0/. done # Change the model files so that multiple versions will be loaded, and one of @@ -398,7 +397,7 @@ mkdir models models_0 for i in graphdef savedmodel ; do cp -r $DATADIR/qa_model_repository/${i}_float32_float32_float32 models/. done -for i in onnx plan ; do +for i in plan libtorch ; do cp -r $DATADIR/qa_model_repository/${i}_float32_float32_float32 models_0/. done rm models/graphdef_float32_float32_float32/config.pbtxt @@ -439,14 +438,14 @@ LOG_IDX=$((LOG_IDX+1)) # LifeCycleTest.test_init_error_modelfail rm -fr models models_0 mkdir models models_0 -cp -r $DATADIR/qa_sequence_model_repository/onnx_sequence_int32 models/. -cp -r $DATADIR/qa_model_repository/onnx_int32_int32_int32 models_0/. -sed -i "s/OUTPUT/_OUTPUT/" models/onnx_sequence_int32/config.pbtxt -sed -i "s/OUTPUT/_OUTPUT/" models_0/onnx_int32_int32_int32/config.pbtxt -for i in graphdef savedmodel; do +cp -r $DATADIR/qa_sequence_model_repository/savedmodel_sequence_int32 models/. +cp -r $DATADIR/qa_model_repository/savedmodel_int32_int32_int32 models_0/. +sed -i "s/OUTPUT/_OUTPUT/" models/savedmodel_sequence_int32/config.pbtxt +sed -i "s/OUTPUT/_OUTPUT/" models_0/savedmodel_int32_int32_int32/config.pbtxt +for i in graphdef libtorch; do cp -r $DATADIR/qa_model_repository/${i}_float32_float32_float32 models/. done -for i in onnx ; do +for i in savedmodel ; do cp -r $DATADIR/qa_model_repository/${i}_float32_float32_float32 models_0/. done @@ -477,7 +476,7 @@ LOG_IDX=$((LOG_IDX+1)) # LifeCycleTest.test_parse_error_model_no_version rm -fr models mkdir models -for i in savedmodel onnx plan ; do +for i in savedmodel libtorch plan ; do cp -r $DATADIR/qa_model_repository/${i}_float32_float32_float32 models/. done mkdir -p models/graphdef_float32_float32_float32 @@ -581,7 +580,7 @@ LOG_IDX=$((LOG_IDX+1)) # LifeCycleTest.test_dynamic_model_load_unload rm -fr models savedmodel_float32_float32_float32 mkdir models -for i in graphdef onnx plan ; do +for i in graphdef libtorch plan ; do cp -r $DATADIR/qa_model_repository/${i}_float32_float32_float32 models/. done cp -r $DATADIR/qa_model_repository/savedmodel_float32_float32_float32 . @@ -610,7 +609,7 @@ LOG_IDX=$((LOG_IDX+1)) # LifeCycleTest.test_dynamic_model_load_unload_disabled rm -fr models savedmodel_float32_float32_float32 mkdir models -for i in graphdef onnx plan; do +for i in graphdef libtorch plan; do cp -r $DATADIR/qa_model_repository/${i}_float32_float32_float32 models/. done cp -r $DATADIR/qa_model_repository/savedmodel_float32_float32_float32 . @@ -762,7 +761,7 @@ mkdir models models_0 for i in graphdef ; do cp -r $DATADIR/qa_model_repository/${i}_float32_float32_float32 models/. done -for i in onnx ; do +for i in libtorch ; do cp -r $DATADIR/qa_model_repository/${i}_float32_float32_float32 models_0/. done cp -r $DATADIR/qa_model_repository/savedmodel_float32_float32_float32 . @@ -796,7 +795,7 @@ mkdir models models_0 for i in graphdef ; do cp -r $DATADIR/qa_model_repository/${i}_float32_float32_float32 models/. done -for i in onnx ; do +for i in libtorch ; do cp -r $DATADIR/qa_model_repository/${i}_float32_float32_float32 models_0/. done cp -r $DATADIR/qa_model_repository/savedmodel_float32_float32_float32 . @@ -829,7 +828,7 @@ LOG_IDX=$((LOG_IDX+1)) # LifeCycleTest.test_model_control rm -fr models config.pbtxt.* mkdir models -for i in onnx ; do +for i in plan ; do cp -r $DATADIR/qa_model_repository/${i}_float32_float32_float32 models/. cp -r $DATADIR/qa_ensemble_model_repository/qa_model_repository/simple_${i}_float32_float32_float32 models/. sed -i "s/max_batch_size:.*/max_batch_size: 1/" models/${i}_float32_float32_float32/config.pbtxt @@ -861,7 +860,7 @@ LOG_IDX=$((LOG_IDX+1)) # LifeCycleTest.test_model_control_fail rm -fr models config.pbtxt.* mkdir models -for i in onnx ; do +for i in plan ; do cp -r $DATADIR/qa_model_repository/${i}_float32_float32_float32 models/. # Remove all model files so the model will fail to load rm models/${i}_float32_float32_float32/*/* @@ -893,7 +892,7 @@ LOG_IDX=$((LOG_IDX+1)) # LifeCycleTest.test_model_control_ensemble rm -fr models config.pbtxt.* mkdir models -for i in onnx ; do +for i in savedmodel ; do cp -r $DATADIR/qa_model_repository/${i}_float32_float32_float32 models/. cp -r $DATADIR/qa_ensemble_model_repository/qa_model_repository/simple_${i}_float32_float32_float32 models/. sed -i "s/max_batch_size:.*/max_batch_size: 1/" models/${i}_float32_float32_float32/config.pbtxt @@ -926,7 +925,7 @@ LOG_IDX=$((LOG_IDX+1)) rm -fr models models_0 config.pbtxt.* mkdir models models_0 # Ensemble models in the second repository -for i in plan onnx ; do +for i in plan graphdef ; do cp -r $DATADIR/qa_model_repository/${i}_float32_float32_float32 models/. cp -r $DATADIR/qa_ensemble_model_repository/qa_model_repository/simple_${i}_float32_float32_float32 models_0/. sed -i "s/max_batch_size:.*/max_batch_size: 1/" models/${i}_float32_float32_float32/config.pbtxt @@ -945,7 +944,7 @@ SERVER_ARGS="--model-repository=`pwd`/models --model-repository=`pwd`/models_0 \ --strict-model-config=false --exit-on-error=false \ --load-model=savedmodel_float32_float32_float32 \ --load-model=plan_float32_float32_float32 \ - --load-model=simple_onnx_float32_float32_float32" + --load-model=simple_graphdef_float32_float32_float32" SERVER_LOG="./inference_server_$LOG_IDX.log" run_server if [ "$SERVER_PID" == "0" ]; then @@ -971,7 +970,7 @@ LOG_IDX=$((LOG_IDX+1)) rm -fr models models_0 config.pbtxt.* mkdir models models_0 # Ensemble models in the second repository -for i in plan onnx ; do +for i in plan graphdef ; do cp -r $DATADIR/qa_model_repository/${i}_float32_float32_float32 models/. cp -r $DATADIR/qa_ensemble_model_repository/qa_model_repository/simple_${i}_float32_float32_float32 models_0/. sed -i "s/max_batch_size:.*/max_batch_size: 1/" models/${i}_float32_float32_float32/config.pbtxt @@ -1012,7 +1011,7 @@ LOG_IDX=$((LOG_IDX+1)) # an additional --load-model argument, it should fail rm -fr models mkdir models -for i in onnx ; do +for i in plan ; do cp -r $DATADIR/qa_model_repository/${i}_float32_float32_float32 models/. sed -i "s/max_batch_size:.*/max_batch_size: 1/" models/${i}_float32_float32_float32/config.pbtxt done @@ -1024,7 +1023,7 @@ SERVER_ARGS="--model-repository=`pwd`/models --model-repository=`pwd`/models_0 \ --strict-readiness=true \ --exit-on-error=true \ --load-model=* \ - --load-model=onnx_float32_float32_float32" + --load-model=plan_float32_float32_float32" SERVER_LOG="./inference_server_$LOG_IDX.log" run_server if [ "$SERVER_PID" != "0" ]; then @@ -1057,6 +1056,7 @@ if [ "$SERVER_PID" != "0" ]; then kill $SERVER_PID wait $SERVER_PID fi + # check server log for the error messages to make sure they're printed if [ `grep -c "model not found in any model repository" $SERVER_LOG` == "0" ]; then echo -e "\n***\n*** Server log ${SERVER_LOG} did not print model load failure for non-existent model\n***" @@ -1075,8 +1075,8 @@ for i in graphdef savedmodel ; do cp -r $DATADIR/qa_ensemble_model_repository/qa_model_repository/simple_${i}_float32_float32_float32 models_0/. done -# onnx doesn't load because it is duplicated in 2 repositories -for i in onnx ; do +# libtorch doesn't load because it is duplicated in 2 repositories +for i in libtorch ; do cp -r $DATADIR/qa_model_repository/${i}_float32_float32_float32 models/. cp -r $DATADIR/qa_model_repository/${i}_float32_float32_float32 models_0/. done @@ -1085,7 +1085,7 @@ SERVER_ARGS="--model-repository=`pwd`/models --model-repository=`pwd`/models_0 \ --model-control-mode=explicit \ --strict-readiness=false \ --strict-model-config=false --exit-on-error=false \ - --load-model=onnx_float32_float32_float32 \ + --load-model=libtorch_float32_float32_float32 \ --load-model=graphdef_float32_float32_float32 \ --load-model=simple_savedmodel_float32_float32_float32" SERVER_LOG="./inference_server_$LOG_IDX.log" @@ -1369,7 +1369,7 @@ done # Send HTTP request to control endpoint rm -fr models config.pbtxt.* mkdir models -for i in graphdef savedmodel onnx plan ; do +for i in graphdef savedmodel libtorch plan ; do cp -r $DATADIR/qa_model_repository/${i}_float32_float32_float32 models/. done @@ -1480,10 +1480,10 @@ LOG_IDX=$((LOG_IDX+1)) # LifeCycleTest.test_config_override rm -fr models config.pbtxt.* mkdir models -cp -r $DATADIR/qa_model_repository/onnx_float32_float32_float32 models/. +cp -r $DATADIR/qa_model_repository/plan_float32_float32_float32 models/. # Make only version 2 is valid version directory while config requests 1, 3 -rm models/onnx_float32_float32_float32/1/* -rm models/onnx_float32_float32_float32/3/* +rm models/plan_float32_float32_float32/1/* +rm models/plan_float32_float32_float32/3/* SERVER_ARGS="--model-repository=`pwd`/models --model-repository=`pwd`/models \ --model-control-mode=explicit \ @@ -1512,14 +1512,14 @@ LOG_IDX=$((LOG_IDX+1)) # LifeCycleTest.test_file_override rm -fr models config.pbtxt.* mkdir models -cp -r $DATADIR/qa_model_repository/onnx_float32_float32_float32 models/. +cp -r $DATADIR/qa_model_repository/plan_float32_float32_float32 models/. # Make only version 2, 3 is valid version directory while config requests 1, 3 -rm -rf models/onnx_float32_float32_float32/1 +rm -rf models/plan_float32_float32_float32/1 -# Start with EXPLICIT mode and load onnx_float32_float32_float32 +# Start with EXPLICIT mode and load plan_float32_float32_float32 SERVER_ARGS="--model-repository=`pwd`/models \ --model-control-mode=explicit \ - --load-model=onnx_float32_float32_float32 \ + --load-model=plan_float32_float32_float32 \ --strict-model-config=false" SERVER_LOG="./inference_server_$LOG_IDX.log" run_server @@ -1905,7 +1905,7 @@ LOG_IDX=$((LOG_IDX+1)) rm -rf models mkdir models # Sanity check loading multiple instances in parallel for each supported backend -PARALLEL_BACKENDS="python onnx" +PARALLEL_BACKENDS="python" for backend in ${PARALLEL_BACKENDS} ; do model="${backend}_float32_float32_float32" model_dir="models/${model}" diff --git a/qa/L0_logging/test.sh b/qa/L0_logging/test.sh index 160bffe3dd..0f12166ecd 100755 --- a/qa/L0_logging/test.sh +++ b/qa/L0_logging/test.sh @@ -50,7 +50,7 @@ fi export CUDA_VISIBLE_DEVICES=0 DATADIR=/data/inferenceserver/${REPO_VERSION}/qa_model_repository -MODELBASE=onnx_int32_int32_int32 +MODELBASE=savedmodel_int32_int32_int32 MODELSDIR=`pwd`/log_models diff --git a/qa/L0_long_running_stress/test.sh b/qa/L0_long_running_stress/test.sh index b98a89f955..83d8b0bf3a 100755 --- a/qa/L0_long_running_stress/test.sh +++ b/qa/L0_long_running_stress/test.sh @@ -63,7 +63,7 @@ fi RET=0 # If BACKENDS not specified, set to all -BACKENDS=${BACKENDS:="graphdef savedmodel onnx libtorch"} +BACKENDS=${BACKENDS:="graphdef savedmodel libtorch"} export BACKENDS export CI_JOB_ID=${CI_JOB_ID} @@ -137,6 +137,8 @@ cp -r $DATADIR/tf_model_store/resnet_v1_50_graphdef $MODEL_DIR/resnet_v1_50_grap sed -i 's/^name: "resnet_v1_50_graphdef"/name: "resnet_v1_50_graphdef_def"/' config.pbtxt && \ echo "optimization { }" >> config.pbtxt) +rm `find $MODEL_DIR/ -name '*onnx*'` -rf + SERVER_ARGS="--model-repository=`pwd`/$MODEL_DIR" SERVER_LOG="./server.log" run_server diff --git a/qa/L0_memory_growth/test.sh b/qa/L0_memory_growth/test.sh index 64277e6b6e..f53db17d8b 100755 --- a/qa/L0_memory_growth/test.sh +++ b/qa/L0_memory_growth/test.sh @@ -102,6 +102,7 @@ export MAX_ALLOWED_ALLOC="100" # Create local model repository mkdir -p models/ cp -r $DATADIR/perf_model_store/resnet50* models/ +rm -rf models/resnet50_fp32_onnx # Copy and prepare trt model cp -r $DATADIR/caffe_models/trt_model_store/resnet50_plan models/resnet50_fp16_plan diff --git a/qa/L0_model_config/autofill_noplatform/onnx/bad_input_dims/1/model.onnx b/qa/L0_model_config/autofill_noplatform/onnx/bad_input_dims/1/model.onnx deleted file mode 100644 index b352d3225f..0000000000 --- a/qa/L0_model_config/autofill_noplatform/onnx/bad_input_dims/1/model.onnx +++ /dev/null @@ -1,33 +0,0 @@ -TRTIS:Ô - -INPUT0_INPUT0"Identity - -INPUT1_INPUT1"Identity - -_INPUT0 -_INPUT1CAST0"Add - -_INPUT0 -_INPUT1CAST1"Sub -! -CAST0OUTPUT0"Cast* -to  -! -CAST1OUTPUT1"Cast* -to onnx_int32_int8_int8Z -INPUT0 - -var_0 -Z -INPUT1 - -var_0 -b -OUTPUT0 - -var_1 -b -OUTPUT1 - -var_2 -B diff --git a/qa/L0_model_config/autofill_noplatform/onnx/bad_input_dims/config.pbtxt b/qa/L0_model_config/autofill_noplatform/onnx/bad_input_dims/config.pbtxt deleted file mode 100644 index b393fb4e00..0000000000 --- a/qa/L0_model_config/autofill_noplatform/onnx/bad_input_dims/config.pbtxt +++ /dev/null @@ -1,25 +0,0 @@ -max_batch_size: 1 -input [ - { - name: "INPUT0" - data_type: TYPE_INT32 - dims: [ 16, 1 ] - }, - { - name: "INPUT1" - data_type: TYPE_INT32 - dims: [ 16 ] - } -] -output [ - { - name: "OUTPUT0" - data_type: TYPE_INT8 - dims: [ 16 ] - }, - { - name: "OUTPUT1" - data_type: TYPE_INT8 - dims: [ 16 ] - } -] diff --git a/qa/L0_model_config/autofill_noplatform/onnx/bad_input_dims/expected b/qa/L0_model_config/autofill_noplatform/onnx/bad_input_dims/expected deleted file mode 100644 index 52d579417e..0000000000 --- a/qa/L0_model_config/autofill_noplatform/onnx/bad_input_dims/expected +++ /dev/null @@ -1 +0,0 @@ -model 'bad_input_dims', tensor 'INPUT0': the model expects 2 dimensions (shape \[-1,16\]) but the model configuration specifies 3 dimensions (an initial batch dimension because max_batch_size > 0 followed by the explicit tensor shape, making complete shape \[-1,16,1\]) \ No newline at end of file diff --git a/qa/L0_model_config/autofill_noplatform/onnx/bad_max_batch_size/1/model.onnx b/qa/L0_model_config/autofill_noplatform/onnx/bad_max_batch_size/1/model.onnx deleted file mode 100644 index c9f6a92bc7..0000000000 --- a/qa/L0_model_config/autofill_noplatform/onnx/bad_max_batch_size/1/model.onnx +++ /dev/null @@ -1,33 +0,0 @@ -triton:¸ - -INPUT0_INPUT0"Identity - -INPUT1_INPUT1"Identity - -_INPUT0 -_INPUT1CAST0"Add - -_INPUT0 -_INPUT1CAST1"Sub -! -CAST0OUTPUT0"Cast* -to  -! -CAST1OUTPUT1"Cast* -to onnx_nobatch_int32_int8_int8Z -INPUT0 - - -Z -INPUT1 - - -b -OUTPUT0 - - -b -OUTPUT1 - - -B \ No newline at end of file diff --git a/qa/L0_model_config/autofill_noplatform/onnx/bad_max_batch_size/config.pbtxt b/qa/L0_model_config/autofill_noplatform/onnx/bad_max_batch_size/config.pbtxt deleted file mode 100644 index 7d4be73dbb..0000000000 --- a/qa/L0_model_config/autofill_noplatform/onnx/bad_max_batch_size/config.pbtxt +++ /dev/null @@ -1,13 +0,0 @@ -max_batch_size: 1 -input [ - { - name: "INPUT0" - data_type: TYPE_INT32 - dims: [ 16 ] - }, - { - name: "INPUT1" - data_type: TYPE_INT32 - dims: [ 16 ] - } -] diff --git a/qa/L0_model_config/autofill_noplatform/onnx/bad_max_batch_size/expected b/qa/L0_model_config/autofill_noplatform/onnx/bad_max_batch_size/expected deleted file mode 100644 index 07ebf4b459..0000000000 --- a/qa/L0_model_config/autofill_noplatform/onnx/bad_max_batch_size/expected +++ /dev/null @@ -1 +0,0 @@ -autofill failed for model 'bad_max_batch_size': model does not support batching while non-zero max_batch_size is specified \ No newline at end of file diff --git a/qa/L0_model_config/autofill_noplatform/onnx/bad_output_dims/1/model.onnx b/qa/L0_model_config/autofill_noplatform/onnx/bad_output_dims/1/model.onnx deleted file mode 100644 index b352d3225f..0000000000 --- a/qa/L0_model_config/autofill_noplatform/onnx/bad_output_dims/1/model.onnx +++ /dev/null @@ -1,33 +0,0 @@ -TRTIS:Ô - -INPUT0_INPUT0"Identity - -INPUT1_INPUT1"Identity - -_INPUT0 -_INPUT1CAST0"Add - -_INPUT0 -_INPUT1CAST1"Sub -! -CAST0OUTPUT0"Cast* -to  -! -CAST1OUTPUT1"Cast* -to onnx_int32_int8_int8Z -INPUT0 - -var_0 -Z -INPUT1 - -var_0 -b -OUTPUT0 - -var_1 -b -OUTPUT1 - -var_2 -B diff --git a/qa/L0_model_config/autofill_noplatform/onnx/bad_output_dims/config.pbtxt b/qa/L0_model_config/autofill_noplatform/onnx/bad_output_dims/config.pbtxt deleted file mode 100644 index 004ed9a54f..0000000000 --- a/qa/L0_model_config/autofill_noplatform/onnx/bad_output_dims/config.pbtxt +++ /dev/null @@ -1,25 +0,0 @@ -max_batch_size: 1 -input [ - { - name: "INPUT0" - data_type: TYPE_INT32 - dims: [ 16 ] - }, - { - name: "INPUT1" - data_type: TYPE_INT32 - dims: [ 16 ] - } -] -output [ - { - name: "OUTPUT0" - data_type: TYPE_INT8 - dims: [ 16 ] - }, - { - name: "OUTPUT1" - data_type: TYPE_INT8 - dims: [ 1 ] - } -] diff --git a/qa/L0_model_config/autofill_noplatform/onnx/bad_output_dims/expected b/qa/L0_model_config/autofill_noplatform/onnx/bad_output_dims/expected deleted file mode 100644 index 5a11d49e68..0000000000 --- a/qa/L0_model_config/autofill_noplatform/onnx/bad_output_dims/expected +++ /dev/null @@ -1 +0,0 @@ -model 'bad_output_dims', tensor 'OUTPUT1': the model expects 2 dimensions (shape \[-1,16\]) but the model configuration specifies 2 dimensions (an initial batch dimension because max_batch_size > 0 followed by the explicit tensor shape, making complete shape \[-1,1\]) diff --git a/qa/L0_model_config/autofill_noplatform/onnx/too_few_inputs/1/model.onnx b/qa/L0_model_config/autofill_noplatform/onnx/too_few_inputs/1/model.onnx deleted file mode 100644 index b352d3225f..0000000000 --- a/qa/L0_model_config/autofill_noplatform/onnx/too_few_inputs/1/model.onnx +++ /dev/null @@ -1,33 +0,0 @@ -TRTIS:Ô - -INPUT0_INPUT0"Identity - -INPUT1_INPUT1"Identity - -_INPUT0 -_INPUT1CAST0"Add - -_INPUT0 -_INPUT1CAST1"Sub -! -CAST0OUTPUT0"Cast* -to  -! -CAST1OUTPUT1"Cast* -to onnx_int32_int8_int8Z -INPUT0 - -var_0 -Z -INPUT1 - -var_0 -b -OUTPUT0 - -var_1 -b -OUTPUT1 - -var_2 -B diff --git a/qa/L0_model_config/autofill_noplatform/onnx/too_few_inputs/config.pbtxt b/qa/L0_model_config/autofill_noplatform/onnx/too_few_inputs/config.pbtxt deleted file mode 100644 index 2814fb7e5c..0000000000 --- a/qa/L0_model_config/autofill_noplatform/onnx/too_few_inputs/config.pbtxt +++ /dev/null @@ -1,20 +0,0 @@ -max_batch_size: 1 -input [ - { - name: "INPUT1" - data_type: TYPE_INT32 - dims: [ 16 ] - } -] -output [ - { - name: "OUTPUT0" - data_type: TYPE_INT8 - dims: [ 16 ] - }, - { - name: "OUTPUT1" - data_type: TYPE_INT8 - dims: [ 16 ] - } -] diff --git a/qa/L0_model_config/autofill_noplatform/onnx/too_few_inputs/expected b/qa/L0_model_config/autofill_noplatform/onnx/too_few_inputs/expected deleted file mode 100644 index f6639e85ae..0000000000 --- a/qa/L0_model_config/autofill_noplatform/onnx/too_few_inputs/expected +++ /dev/null @@ -1 +0,0 @@ -unable to load model 'too_few_inputs', configuration expects 1 inputs, model provides 2 \ No newline at end of file diff --git a/qa/L0_model_config/autofill_noplatform/onnx/too_many_inputs/1/model.onnx b/qa/L0_model_config/autofill_noplatform/onnx/too_many_inputs/1/model.onnx deleted file mode 100644 index b352d3225f..0000000000 --- a/qa/L0_model_config/autofill_noplatform/onnx/too_many_inputs/1/model.onnx +++ /dev/null @@ -1,33 +0,0 @@ -TRTIS:Ô - -INPUT0_INPUT0"Identity - -INPUT1_INPUT1"Identity - -_INPUT0 -_INPUT1CAST0"Add - -_INPUT0 -_INPUT1CAST1"Sub -! -CAST0OUTPUT0"Cast* -to  -! -CAST1OUTPUT1"Cast* -to onnx_int32_int8_int8Z -INPUT0 - -var_0 -Z -INPUT1 - -var_0 -b -OUTPUT0 - -var_1 -b -OUTPUT1 - -var_2 -B diff --git a/qa/L0_model_config/autofill_noplatform/onnx/too_many_inputs/config.pbtxt b/qa/L0_model_config/autofill_noplatform/onnx/too_many_inputs/config.pbtxt deleted file mode 100644 index 6ba2274876..0000000000 --- a/qa/L0_model_config/autofill_noplatform/onnx/too_many_inputs/config.pbtxt +++ /dev/null @@ -1,30 +0,0 @@ -max_batch_size: 1 -input [ - { - name: "INPUT0" - data_type: TYPE_INT32 - dims: [ 16 ] - }, - { - name: "INPUT1" - data_type: TYPE_INT32 - dims: [ 16 ] - }, - { - name: "INPUT_EXTRA" - data_type: TYPE_INT32 - dims: [ 16 ] - } -] -output [ - { - name: "OUTPUT0" - data_type: TYPE_INT8 - dims: [ 16 ] - }, - { - name: "OUTPUT1" - data_type: TYPE_INT8 - dims: [ 16 ] - } -] diff --git a/qa/L0_model_config/autofill_noplatform/onnx/too_many_inputs/expected b/qa/L0_model_config/autofill_noplatform/onnx/too_many_inputs/expected deleted file mode 100644 index e88e97dcfb..0000000000 --- a/qa/L0_model_config/autofill_noplatform/onnx/too_many_inputs/expected +++ /dev/null @@ -1 +0,0 @@ -unable to load model 'too_many_inputs', configuration expects 3 inputs, model provides 2 \ No newline at end of file diff --git a/qa/L0_model_config/autofill_noplatform/onnx/unknown_input/1/model.onnx b/qa/L0_model_config/autofill_noplatform/onnx/unknown_input/1/model.onnx deleted file mode 100644 index b352d3225f..0000000000 --- a/qa/L0_model_config/autofill_noplatform/onnx/unknown_input/1/model.onnx +++ /dev/null @@ -1,33 +0,0 @@ -TRTIS:Ô - -INPUT0_INPUT0"Identity - -INPUT1_INPUT1"Identity - -_INPUT0 -_INPUT1CAST0"Add - -_INPUT0 -_INPUT1CAST1"Sub -! -CAST0OUTPUT0"Cast* -to  -! -CAST1OUTPUT1"Cast* -to onnx_int32_int8_int8Z -INPUT0 - -var_0 -Z -INPUT1 - -var_0 -b -OUTPUT0 - -var_1 -b -OUTPUT1 - -var_2 -B diff --git a/qa/L0_model_config/autofill_noplatform/onnx/unknown_input/config.pbtxt b/qa/L0_model_config/autofill_noplatform/onnx/unknown_input/config.pbtxt deleted file mode 100644 index 0df318caa8..0000000000 --- a/qa/L0_model_config/autofill_noplatform/onnx/unknown_input/config.pbtxt +++ /dev/null @@ -1,25 +0,0 @@ -max_batch_size: 1 -input [ - { - name: "INPUT0" - data_type: TYPE_INT32 - dims: [ 16 ] - }, - { - name: "INPUT_UNKNOWN" - data_type: TYPE_INT32 - dims: [ 16 ] - } -] -output [ - { - name: "OUTPUT0" - data_type: TYPE_INT8 - dims: [ 16 ] - }, - { - name: "OUTPUT1" - data_type: TYPE_INT8 - dims: [ 16 ] - } -] diff --git a/qa/L0_model_config/autofill_noplatform/onnx/unknown_input/expected b/qa/L0_model_config/autofill_noplatform/onnx/unknown_input/expected deleted file mode 100644 index e2a2abbf09..0000000000 --- a/qa/L0_model_config/autofill_noplatform/onnx/unknown_input/expected +++ /dev/null @@ -1 +0,0 @@ -unexpected inference input 'INPUT_UNKNOWN', allowed inputs are: INPUT0, INPUT1 \ No newline at end of file diff --git a/qa/L0_model_config/autofill_noplatform/onnx/unknown_output/1/model.onnx b/qa/L0_model_config/autofill_noplatform/onnx/unknown_output/1/model.onnx deleted file mode 100644 index b352d3225f..0000000000 --- a/qa/L0_model_config/autofill_noplatform/onnx/unknown_output/1/model.onnx +++ /dev/null @@ -1,33 +0,0 @@ -TRTIS:Ô - -INPUT0_INPUT0"Identity - -INPUT1_INPUT1"Identity - -_INPUT0 -_INPUT1CAST0"Add - -_INPUT0 -_INPUT1CAST1"Sub -! -CAST0OUTPUT0"Cast* -to  -! -CAST1OUTPUT1"Cast* -to onnx_int32_int8_int8Z -INPUT0 - -var_0 -Z -INPUT1 - -var_0 -b -OUTPUT0 - -var_1 -b -OUTPUT1 - -var_2 -B diff --git a/qa/L0_model_config/autofill_noplatform/onnx/unknown_output/config.pbtxt b/qa/L0_model_config/autofill_noplatform/onnx/unknown_output/config.pbtxt deleted file mode 100644 index 979b05c4ee..0000000000 --- a/qa/L0_model_config/autofill_noplatform/onnx/unknown_output/config.pbtxt +++ /dev/null @@ -1,20 +0,0 @@ -max_batch_size: 1 -input [ - { - name: "INPUT0" - data_type: TYPE_INT32 - dims: [ 16 ] - }, - { - name: "INPUT1" - data_type: TYPE_INT32 - dims: [ 16 ] - } -] -output [ - { - name: "OUTPUT_UNKNOWN" - data_type: TYPE_INT8 - dims: [ 16 ] - } -] diff --git a/qa/L0_model_config/autofill_noplatform/onnx/unknown_output/expected b/qa/L0_model_config/autofill_noplatform/onnx/unknown_output/expected deleted file mode 100644 index 38fd5e2785..0000000000 --- a/qa/L0_model_config/autofill_noplatform/onnx/unknown_output/expected +++ /dev/null @@ -1 +0,0 @@ -unexpected inference output 'OUTPUT_UNKNOWN', allowed outputs are: OUTPUT0, OUTPUT1 \ No newline at end of file diff --git a/qa/L0_model_config/autofill_noplatform_success/onnx/cpu_instance/config.pbtxt b/qa/L0_model_config/autofill_noplatform_success/onnx/cpu_instance/config.pbtxt deleted file mode 100644 index 137ad375c8..0000000000 --- a/qa/L0_model_config/autofill_noplatform_success/onnx/cpu_instance/config.pbtxt +++ /dev/null @@ -1,23 +0,0 @@ - -name: "cpu_instance" -platform: "onnxruntime_onnx" -max_batch_size: 8 -version_policy: { latest { num_versions: 1 }} -input [ - { - name: "INPUT0" - data_type: TYPE_FP16 - dims: [ -1,-1 ] - } -] -output [ - { - name: "OUTPUT0" - data_type: TYPE_FP16 - dims: [ -1,-1 ] - } -] -instance_group { - name: "cpu_instance" - kind: KIND_CPU -} diff --git a/qa/L0_model_config/autofill_noplatform_success/onnx/cpu_instance/expected b/qa/L0_model_config/autofill_noplatform_success/onnx/cpu_instance/expected deleted file mode 100644 index 008a7a0b7f..0000000000 --- a/qa/L0_model_config/autofill_noplatform_success/onnx/cpu_instance/expected +++ /dev/null @@ -1,36 +0,0 @@ -name: "cpu_instance" -platform: "onnxruntime_onnx" -version_policy { - latest { - num_versions: 1 - } -} -max_batch_size: 8 -input { - name: "INPUT0" - data_type: TYPE_FP16 - dims: -1 - dims: -1 -} -output { - name: "OUTPUT0" - data_type: TYPE_FP16 - dims: -1 - dims: -1 -} -instance_group { - name: "cpu_instance" - count: 2 - kind: KIND_CPU -} -default_model_filename: "model.onnx" -optimization { - input_pinned_memory { - enable: true - } - output_pinned_memory { - enable: true - } -} -backend: "onnxruntime" -runtime: "" \ No newline at end of file diff --git a/qa/L0_model_config/autofill_noplatform_success/onnx/empty_config/1/model.onnx b/qa/L0_model_config/autofill_noplatform_success/onnx/empty_config/1/model.onnx deleted file mode 100644 index b352d3225f..0000000000 --- a/qa/L0_model_config/autofill_noplatform_success/onnx/empty_config/1/model.onnx +++ /dev/null @@ -1,33 +0,0 @@ -TRTIS:Ô - -INPUT0_INPUT0"Identity - -INPUT1_INPUT1"Identity - -_INPUT0 -_INPUT1CAST0"Add - -_INPUT0 -_INPUT1CAST1"Sub -! -CAST0OUTPUT0"Cast* -to  -! -CAST1OUTPUT1"Cast* -to onnx_int32_int8_int8Z -INPUT0 - -var_0 -Z -INPUT1 - -var_0 -b -OUTPUT0 - -var_1 -b -OUTPUT1 - -var_2 -B diff --git a/qa/L0_model_config/autofill_noplatform_success/onnx/empty_config/config.pbtxt b/qa/L0_model_config/autofill_noplatform_success/onnx/empty_config/config.pbtxt deleted file mode 100644 index e69de29bb2..0000000000 diff --git a/qa/L0_model_config/autofill_noplatform_success/onnx/empty_config/expected b/qa/L0_model_config/autofill_noplatform_success/onnx/empty_config/expected deleted file mode 100644 index bedc4e44fa..0000000000 --- a/qa/L0_model_config/autofill_noplatform_success/onnx/empty_config/expected +++ /dev/null @@ -1,48 +0,0 @@ -name: "empty_config" -platform: "onnxruntime_onnx" -version_policy { - latest { - num_versions: 1 - } -} -max_batch_size: 4 -input { - name: "INPUT0" - data_type: TYPE_INT32 - dims: 16 -} -input { - name: "INPUT1" - data_type: TYPE_INT32 - dims: 16 -} -output { - name: "OUTPUT0" - data_type: TYPE_INT8 - dims: 16 -} -output { - name: "OUTPUT1" - data_type: TYPE_INT8 - dims: 16 -} -instance_group { - name: "empty_config" - count: 1 - gpus: 0 - kind: KIND_GPU -} -dynamic_batching { - preferred_batch_size: 4 -} -default_model_filename: "model.onnx" -optimization { - input_pinned_memory { - enable: true - } - output_pinned_memory { - enable: true - } -} -backend: "onnxruntime" -runtime: "" diff --git a/qa/L0_model_config/autofill_noplatform_success/onnx/empty_config/expected.1 b/qa/L0_model_config/autofill_noplatform_success/onnx/empty_config/expected.1 deleted file mode 100644 index 7e2a45c522..0000000000 --- a/qa/L0_model_config/autofill_noplatform_success/onnx/empty_config/expected.1 +++ /dev/null @@ -1,48 +0,0 @@ -name: "empty_config" -platform: "onnxruntime_onnx" -version_policy { - latest { - num_versions: 1 - } -} -max_batch_size: 4 -input { - name: "INPUT0" - data_type: TYPE_INT32 - dims: 16 -} -input { - name: "INPUT1" - data_type: TYPE_INT32 - dims: 16 -} -output { - name: "OUTPUT1" - data_type: TYPE_INT8 - dims: 16 -} -output { - name: "OUTPUT0" - data_type: TYPE_INT8 - dims: 16 -} -instance_group { - name: "empty_config" - count: 1 - gpus: 0 - kind: KIND_GPU -} -dynamic_batching { - preferred_batch_size: 4 -} -default_model_filename: "model.onnx" -optimization { - input_pinned_memory { - enable: true - } - output_pinned_memory { - enable: true - } -} -backend: "onnxruntime" -runtime: "" diff --git a/qa/L0_model_config/autofill_noplatform_success/onnx/empty_config/expected.2 b/qa/L0_model_config/autofill_noplatform_success/onnx/empty_config/expected.2 deleted file mode 100644 index 56def5c317..0000000000 --- a/qa/L0_model_config/autofill_noplatform_success/onnx/empty_config/expected.2 +++ /dev/null @@ -1,48 +0,0 @@ -name: "empty_config" -platform: "onnxruntime_onnx" -version_policy { - latest { - num_versions: 1 - } -} -max_batch_size: 4 -input { - name: "INPUT1" - data_type: TYPE_INT32 - dims: 16 -} -input { - name: "INPUT0" - data_type: TYPE_INT32 - dims: 16 -} -output { - name: "OUTPUT0" - data_type: TYPE_INT8 - dims: 16 -} -output { - name: "OUTPUT1" - data_type: TYPE_INT8 - dims: 16 -} -instance_group { - name: "empty_config" - count: 1 - gpus: 0 - kind: KIND_GPU -} -dynamic_batching { - preferred_batch_size: 4 -} -default_model_filename: "model.onnx" -optimization { - input_pinned_memory { - enable: true - } - output_pinned_memory { - enable: true - } -} -backend: "onnxruntime" -runtime: "" diff --git a/qa/L0_model_config/autofill_noplatform_success/onnx/empty_config/expected.3 b/qa/L0_model_config/autofill_noplatform_success/onnx/empty_config/expected.3 deleted file mode 100644 index 35a82c5be1..0000000000 --- a/qa/L0_model_config/autofill_noplatform_success/onnx/empty_config/expected.3 +++ /dev/null @@ -1,48 +0,0 @@ -name: "empty_config" -platform: "onnxruntime_onnx" -version_policy { - latest { - num_versions: 1 - } -} -max_batch_size: 4 -input { - name: "INPUT1" - data_type: TYPE_INT32 - dims: 16 -} -input { - name: "INPUT0" - data_type: TYPE_INT32 - dims: 16 -} -output { - name: "OUTPUT1" - data_type: TYPE_INT8 - dims: 16 -} -output { - name: "OUTPUT0" - data_type: TYPE_INT8 - dims: 16 -} -instance_group { - name: "empty_config" - count: 1 - gpus: 0 - kind: KIND_GPU -} -dynamic_batching { - preferred_batch_size: 4 -} -default_model_filename: "model.onnx" -optimization { - input_pinned_memory { - enable: true - } - output_pinned_memory { - enable: true - } -} -backend: "onnxruntime" -runtime: "" diff --git a/qa/L0_model_config/autofill_noplatform_success/onnx/no_config/1/model.onnx b/qa/L0_model_config/autofill_noplatform_success/onnx/no_config/1/model.onnx deleted file mode 100644 index b352d3225f..0000000000 --- a/qa/L0_model_config/autofill_noplatform_success/onnx/no_config/1/model.onnx +++ /dev/null @@ -1,33 +0,0 @@ -TRTIS:Ô - -INPUT0_INPUT0"Identity - -INPUT1_INPUT1"Identity - -_INPUT0 -_INPUT1CAST0"Add - -_INPUT0 -_INPUT1CAST1"Sub -! -CAST0OUTPUT0"Cast* -to  -! -CAST1OUTPUT1"Cast* -to onnx_int32_int8_int8Z -INPUT0 - -var_0 -Z -INPUT1 - -var_0 -b -OUTPUT0 - -var_1 -b -OUTPUT1 - -var_2 -B diff --git a/qa/L0_model_config/autofill_noplatform_success/onnx/no_config/expected b/qa/L0_model_config/autofill_noplatform_success/onnx/no_config/expected deleted file mode 100644 index f2a7d4e43e..0000000000 --- a/qa/L0_model_config/autofill_noplatform_success/onnx/no_config/expected +++ /dev/null @@ -1,48 +0,0 @@ -name: "no_config" -platform: "onnxruntime_onnx" -version_policy { - latest { - num_versions: 1 - } -} -max_batch_size: 4 -input { - name: "INPUT0" - data_type: TYPE_INT32 - dims: 16 -} -input { - name: "INPUT1" - data_type: TYPE_INT32 - dims: 16 -} -output { - name: "OUTPUT0" - data_type: TYPE_INT8 - dims: 16 -} -output { - name: "OUTPUT1" - data_type: TYPE_INT8 - dims: 16 -} -instance_group { - name: "no_config" - count: 1 - gpus: 0 - kind: KIND_GPU -} -dynamic_batching { - preferred_batch_size: 4 -} -default_model_filename: "model.onnx" -optimization { - input_pinned_memory { - enable: true - } - output_pinned_memory { - enable: true - } -} -backend: "onnxruntime" -runtime: "" diff --git a/qa/L0_model_config/autofill_noplatform_success/onnx/no_config/expected.1 b/qa/L0_model_config/autofill_noplatform_success/onnx/no_config/expected.1 deleted file mode 100644 index ca6269959f..0000000000 --- a/qa/L0_model_config/autofill_noplatform_success/onnx/no_config/expected.1 +++ /dev/null @@ -1,48 +0,0 @@ -name: "no_config" -platform: "onnxruntime_onnx" -version_policy { - latest { - num_versions: 1 - } -} -max_batch_size: 4 -input { - name: "INPUT0" - data_type: TYPE_INT32 - dims: 16 -} -input { - name: "INPUT1" - data_type: TYPE_INT32 - dims: 16 -} -output { - name: "OUTPUT1" - data_type: TYPE_INT8 - dims: 16 -} -output { - name: "OUTPUT0" - data_type: TYPE_INT8 - dims: 16 -} -instance_group { - name: "no_config" - count: 1 - gpus: 0 - kind: KIND_GPU -} -dynamic_batching { - preferred_batch_size: 4 -} -default_model_filename: "model.onnx" -optimization { - input_pinned_memory { - enable: true - } - output_pinned_memory { - enable: true - } -} -backend: "onnxruntime" -runtime: "" diff --git a/qa/L0_model_config/autofill_noplatform_success/onnx/no_config/expected.2 b/qa/L0_model_config/autofill_noplatform_success/onnx/no_config/expected.2 deleted file mode 100644 index 51d73ebdfe..0000000000 --- a/qa/L0_model_config/autofill_noplatform_success/onnx/no_config/expected.2 +++ /dev/null @@ -1,48 +0,0 @@ -name: "no_config" -platform: "onnxruntime_onnx" -version_policy { - latest { - num_versions: 1 - } -} -max_batch_size: 4 -input { - name: "INPUT1" - data_type: TYPE_INT32 - dims: 16 -} -input { - name: "INPUT0" - data_type: TYPE_INT32 - dims: 16 -} -output { - name: "OUTPUT0" - data_type: TYPE_INT8 - dims: 16 -} -output { - name: "OUTPUT1" - data_type: TYPE_INT8 - dims: 16 -} -instance_group { - name: "no_config" - count: 1 - gpus: 0 - kind: KIND_GPU -} -dynamic_batching { - preferred_batch_size: 4 -} -default_model_filename: "model.onnx" -optimization { - input_pinned_memory { - enable: true - } - output_pinned_memory { - enable: true - } -} -backend: "onnxruntime" -runtime: "" diff --git a/qa/L0_model_config/autofill_noplatform_success/onnx/no_config/expected.3 b/qa/L0_model_config/autofill_noplatform_success/onnx/no_config/expected.3 deleted file mode 100644 index c5121d60b5..0000000000 --- a/qa/L0_model_config/autofill_noplatform_success/onnx/no_config/expected.3 +++ /dev/null @@ -1,48 +0,0 @@ -name: "no_config" -platform: "onnxruntime_onnx" -version_policy { - latest { - num_versions: 1 - } -} -max_batch_size: 4 -input { - name: "INPUT1" - data_type: TYPE_INT32 - dims: 16 -} -input { - name: "INPUT0" - data_type: TYPE_INT32 - dims: 16 -} -output { - name: "OUTPUT1" - data_type: TYPE_INT8 - dims: 16 -} -output { - name: "OUTPUT0" - data_type: TYPE_INT8 - dims: 16 -} -instance_group { - name: "no_config" - count: 1 - gpus: 0 - kind: KIND_GPU -} -dynamic_batching { - preferred_batch_size: 4 -} -default_model_filename: "model.onnx" -optimization { - input_pinned_memory { - enable: true - } - output_pinned_memory { - enable: true - } -} -backend: "onnxruntime" -runtime: "" diff --git a/qa/L0_model_config/autofill_noplatform_success/onnx/no_config_no_batch/1/model.onnx b/qa/L0_model_config/autofill_noplatform_success/onnx/no_config_no_batch/1/model.onnx deleted file mode 100644 index ebe41ef108..0000000000 --- a/qa/L0_model_config/autofill_noplatform_success/onnx/no_config_no_batch/1/model.onnx +++ /dev/null @@ -1,33 +0,0 @@ -TRTIS:¸ - -INPUT0_INPUT0"Identity - -INPUT1_INPUT1"Identity - -_INPUT0 -_INPUT1CAST0"Add - -_INPUT0 -_INPUT1CAST1"Sub -! -CAST0OUTPUT0"Cast* -to  -! -CAST1OUTPUT1"Cast* -to onnx_nobatch_int32_int8_int8Z -INPUT0 - - -Z -INPUT1 - - -b -OUTPUT0 - - -b -OUTPUT1 - - -B diff --git a/qa/L0_model_config/autofill_noplatform_success/onnx/no_config_no_batch/config.pbtxt b/qa/L0_model_config/autofill_noplatform_success/onnx/no_config_no_batch/config.pbtxt deleted file mode 100644 index 5913902a76..0000000000 --- a/qa/L0_model_config/autofill_noplatform_success/onnx/no_config_no_batch/config.pbtxt +++ /dev/null @@ -1,5 +0,0 @@ -instance_group [ - { - kind: KIND_CPU - } -] diff --git a/qa/L0_model_config/autofill_noplatform_success/onnx/no_config_no_batch/expected b/qa/L0_model_config/autofill_noplatform_success/onnx/no_config_no_batch/expected deleted file mode 100644 index 9adc820017..0000000000 --- a/qa/L0_model_config/autofill_noplatform_success/onnx/no_config_no_batch/expected +++ /dev/null @@ -1,43 +0,0 @@ -name: "no_config_no_batch" -platform: "onnxruntime_onnx" -version_policy { - latest { - num_versions: 1 - } -} -input { - name: "INPUT0" - data_type: TYPE_INT32 - dims: 16 -} -input { - name: "INPUT1" - data_type: TYPE_INT32 - dims: 16 -} -output { - name: "OUTPUT0" - data_type: TYPE_INT8 - dims: 16 -} -output { - name: "OUTPUT1" - data_type: TYPE_INT8 - dims: 16 -} -instance_group { - name: "no_config_no_batch_0" - count: 2 - kind: KIND_CPU -} -default_model_filename: "model.onnx" -optimization { - input_pinned_memory { - enable: true - } - output_pinned_memory { - enable: true - } -} -backend: "onnxruntime" -runtime: "" diff --git a/qa/L0_model_config/autofill_noplatform_success/onnx/no_config_no_batch/expected.1 b/qa/L0_model_config/autofill_noplatform_success/onnx/no_config_no_batch/expected.1 deleted file mode 100644 index 5ba1985bd6..0000000000 --- a/qa/L0_model_config/autofill_noplatform_success/onnx/no_config_no_batch/expected.1 +++ /dev/null @@ -1,43 +0,0 @@ -name: "no_config_no_batch" -platform: "onnxruntime_onnx" -version_policy { - latest { - num_versions: 1 - } -} -input { - name: "INPUT1" - data_type: TYPE_INT32 - dims: 16 -} -input { - name: "INPUT0" - data_type: TYPE_INT32 - dims: 16 -} -output { - name: "OUTPUT0" - data_type: TYPE_INT8 - dims: 16 -} -output { - name: "OUTPUT1" - data_type: TYPE_INT8 - dims: 16 -} -instance_group { - name: "no_config_no_batch_0" - count: 2 - kind: KIND_CPU -} -default_model_filename: "model.onnx" -optimization { - input_pinned_memory { - enable: true - } - output_pinned_memory { - enable: true - } -} -backend: "onnxruntime" -runtime: "" diff --git a/qa/L0_model_config/autofill_noplatform_success/onnx/no_config_no_batch/expected.2 b/qa/L0_model_config/autofill_noplatform_success/onnx/no_config_no_batch/expected.2 deleted file mode 100644 index fa82234e53..0000000000 --- a/qa/L0_model_config/autofill_noplatform_success/onnx/no_config_no_batch/expected.2 +++ /dev/null @@ -1,43 +0,0 @@ -name: "no_config_no_batch" -platform: "onnxruntime_onnx" -version_policy { - latest { - num_versions: 1 - } -} -input { - name: "INPUT0" - data_type: TYPE_INT32 - dims: 16 -} -input { - name: "INPUT1" - data_type: TYPE_INT32 - dims: 16 -} -output { - name: "OUTPUT1" - data_type: TYPE_INT8 - dims: 16 -} -output { - name: "OUTPUT0" - data_type: TYPE_INT8 - dims: 16 -} -instance_group { - name: "no_config_no_batch_0" - count: 2 - kind: KIND_CPU -} -default_model_filename: "model.onnx" -optimization { - input_pinned_memory { - enable: true - } - output_pinned_memory { - enable: true - } -} -backend: "onnxruntime" -runtime: "" diff --git a/qa/L0_model_config/autofill_noplatform_success/onnx/no_config_no_batch/expected.3 b/qa/L0_model_config/autofill_noplatform_success/onnx/no_config_no_batch/expected.3 deleted file mode 100644 index e5e92cb9be..0000000000 --- a/qa/L0_model_config/autofill_noplatform_success/onnx/no_config_no_batch/expected.3 +++ /dev/null @@ -1,43 +0,0 @@ -name: "no_config_no_batch" -platform: "onnxruntime_onnx" -version_policy { - latest { - num_versions: 1 - } -} -input { - name: "INPUT1" - data_type: TYPE_INT32 - dims: 16 -} -input { - name: "INPUT0" - data_type: TYPE_INT32 - dims: 16 -} -output { - name: "OUTPUT1" - data_type: TYPE_INT8 - dims: 16 -} -output { - name: "OUTPUT0" - data_type: TYPE_INT8 - dims: 16 -} -instance_group { - name: "no_config_no_batch_0" - count: 2 - kind: KIND_CPU -} -default_model_filename: "model.onnx" -optimization { - input_pinned_memory { - enable: true - } - output_pinned_memory { - enable: true - } -} -backend: "onnxruntime" -runtime: "" diff --git a/qa/L0_model_config/test.sh b/qa/L0_model_config/test.sh index 5b8cf6cf26..ef75c59f17 100755 --- a/qa/L0_model_config/test.sh +++ b/qa/L0_model_config/test.sh @@ -48,7 +48,7 @@ source ../common/util.sh export CUDA_VISIBLE_DEVICES=0 -TRIALS="tensorflow_savedmodel tensorflow_graphdef tensorrt_plan onnxruntime_onnx pytorch_libtorch" +TRIALS="tensorflow_savedmodel tensorflow_graphdef tensorrt_plan pytorch_libtorch" # Copy fixed TensorRT plans into the test model repositories. for modelpath in \ @@ -275,9 +275,9 @@ cp /data/inferenceserver/${REPO_VERSION}/qa_reshape_model_repository/plan_zero_4 autofill_noplatform_success/tensorrt/reshape_config_provided/1 # Copy identity model into onnx test directories -mkdir -p autofill_noplatform_success/onnx/cpu_instance/1 -cp -r /data/inferenceserver/${REPO_VERSION}/qa_identity_model_repository/onnx_zero_1_float16/1/model.onnx \ - autofill_noplatform_success/onnx/cpu_instance/1 +#mkdir -p autofill_noplatform_success/onnx/cpu_instance/1 +#cp -r /data/inferenceserver/${REPO_VERSION}/qa_identity_model_repository/onnx_zero_1_float16/1/model.onnx \ +# autofill_noplatform_success/onnx/cpu_instance/1 # Copy openvino models into test directories for modelpath in \ diff --git a/qa/L0_multi_server/test.sh b/qa/L0_multi_server/test.sh index cd5ff3d407..2ba0a76c6c 100755 --- a/qa/L0_multi_server/test.sh +++ b/qa/L0_multi_server/test.sh @@ -54,13 +54,13 @@ RET=0 MULTI_SERVER=multi_server CLIENT_LOG=$MULTI_SERVER MULTI_SERVER=./$MULTI_SERVER -BACKENDS=(graphdef onnx plan) +BACKENDS=(graphdef plan) THREAD_COUNT=32 LOOPS=32 EXTRA_ARGS=" -t ${THREAD_COUNT} -l ${LOOPS}" for (( I=1; I<${THREAD_COUNT}+2; I++ )); do - BACKEND_INDEX=$(((I % 3) - 1)) + BACKEND_INDEX=$(((I % 2) - 1)) full=${BACKENDS[$BACKEND_INDEX]}_float32_float32_float32 mkdir -p ${MODELSDIR}${I}/simple${I}/1 && \ cp -r $DATADIR/${full}/1/* ${MODELSDIR}${I}/simple${I}/1/. && \ diff --git a/qa/L0_output_name/output_name_test.py b/qa/L0_output_name/output_name_test.py index 905174640c..19636aed56 100755 --- a/qa/L0_output_name/output_name_test.py +++ b/qa/L0_output_name/output_name_test.py @@ -36,7 +36,7 @@ import grpc -_trials = ("graphdef", "libtorch", "onnx", "plan", "savedmodel") +_trials = ("graphdef", "libtorch", "plan", "savedmodel") class OutputNameValidationTest(tu.TestResultCollector): diff --git a/qa/L0_output_name/test.sh b/qa/L0_output_name/test.sh index 7c1a5664a0..d29ceb7c6e 100755 --- a/qa/L0_output_name/test.sh +++ b/qa/L0_output_name/test.sh @@ -50,6 +50,7 @@ rm -rf $DATADIR mkdir $DATADIR cp -r /data/inferenceserver/${REPO_VERSION}/qa_identity_model_repository/*_nobatch_zero_1_float32 $DATADIR +rm `find ./models/ -name '*onnx*'` -rf SERVER=/opt/tritonserver/bin/tritonserver SERVER_ARGS="--model-repository=$DATADIR" diff --git a/qa/L0_perf_analyzer_report/test.sh b/qa/L0_perf_analyzer_report/test.sh index 7a04905842..2e35a1e2a6 100755 --- a/qa/L0_perf_analyzer_report/test.sh +++ b/qa/L0_perf_analyzer_report/test.sh @@ -98,8 +98,8 @@ SERVER_LOG="./inference_server.log" rm -f $SERVER_LOG $CLIENT_LOG MODEL_DIR="./models" rm -fr ${MODEL_DIR} && mkdir ${MODEL_DIR} -ENSEMBLE_MODEL="simple_onnx_float32_float32_float32" -COMPOSING_MODEL="onnx_float32_float32_float32" +ENSEMBLE_MODEL="simple_libtorch_float32_float32_float32" +COMPOSING_MODEL="libtorch_float32_float32_float32" ENSEMBLE_MODEL_CACHE_ENABLED="${ENSEMBLE_MODEL}_cache_enabled" ENSEMBLE_MODEL_CACHE_DISABLED="${ENSEMBLE_MODEL}_cache_disabled" COMPOSING_MODEL_CACHE_ENABLED="${COMPOSING_MODEL}_cache_enabled" diff --git a/qa/L0_sagemaker/test.sh b/qa/L0_sagemaker/test.sh index b5bd07c519..94b2a25af5 100755 --- a/qa/L0_sagemaker/test.sh +++ b/qa/L0_sagemaker/test.sh @@ -65,14 +65,15 @@ ENSEMBLEDIR=/data/inferenceserver/${REPO_VERSION}/qa_ensemble_model_repository/q SERVER=/opt/tritonserver/bin/tritonserver SERVER_LOG="./server.log" # Link model repository to "/opt/ml/model" +rm -rf /opt/ml mkdir /opt/ml/ ln -s `pwd`/models /opt/ml/model source ../common/util.sh mkdir models && \ - cp -r $DATADIR/qa_model_repository/onnx_int32_int32_int32 models/sm_model && \ + cp -r $DATADIR/qa_model_repository/plan_int32_int32_int32 models/sm_model && \ rm -r models/sm_model/2 && rm -r models/sm_model/3 && \ - sed -i "s/onnx_int32_int32_int32/sm_model/" models/sm_model/config.pbtxt + sed -i "s/plan_int32_int32_int32/sm_model/" models/sm_model/config.pbtxt # Use SageMaker's ping endpoint to check server status # Wait until server health endpoint shows ready. Sets WAIT_RET to 0 on @@ -376,12 +377,12 @@ MODEL2_PATH="models/987654321ihgfedcba/model" mkdir -p "${MODEL1_PATH}" mkdir -p "${MODEL2_PATH}" -cp -r $DATADIR/qa_model_repository/onnx_int32_int32_int32/* ${MODEL1_PATH} && \ +cp -r $DATADIR/qa_model_repository/plan_int32_int32_int32/* ${MODEL1_PATH} && \ rm -r ${MODEL1_PATH}/2 && rm -r ${MODEL1_PATH}/3 && \ - sed -i "s/onnx_int32_int32_int32/sm_mme_model_1/" ${MODEL1_PATH}/config.pbtxt + sed -i "s/plan_int32_int32_int32/sm_mme_model_1/" ${MODEL1_PATH}/config.pbtxt -cp -r $DATADIR/qa_identity_model_repository/onnx_zero_1_float32/* ${MODEL2_PATH} && \ - sed -i "s/onnx_zero_1_float32/sm_mme_model_2/" ${MODEL2_PATH}/config.pbtxt +cp -r $DATADIR/qa_identity_model_repository/plan_zero_1_float32/* ${MODEL2_PATH} && \ + sed -i "s/plan_zero_1_float32/sm_mme_model_2/" ${MODEL2_PATH}/config.pbtxt # Ensemble model ENSEMBLE_MODEL_PATH="models/123456789ensemble/model" diff --git a/qa/L0_sequence_batcher/sequence_batcher_test.py b/qa/L0_sequence_batcher/sequence_batcher_test.py index 3e6cfc032a..bd3bcff4b8 100755 --- a/qa/L0_sequence_batcher/sequence_batcher_test.py +++ b/qa/L0_sequence_batcher/sequence_batcher_test.py @@ -57,7 +57,7 @@ else: _protocols = ("http",) -BACKENDS = os.environ.get("BACKENDS", "graphdef savedmodel onnx plan custom python") +BACKENDS = os.environ.get("BACKENDS", "graphdef savedmodel plan custom python") ENSEMBLES = bool(int(os.environ.get("ENSEMBLES", 1))) NO_BATCHING = int(os.environ["NO_BATCHING"]) == 1 @@ -70,14 +70,16 @@ _trials = () if NO_BATCHING: for backend in BACKENDS.split(" "): - if backend != "custom": + if backend != "custom" and backend != "onnx": _trials += (backend + "_nobatch",) elif os.environ["BATCHER_TYPE"] == "VARIABLE": for backend in BACKENDS.split(" "): - if (backend != "libtorch") and (backend != "custom"): + if (backend != "libtorch") and (backend != "custom") and (backend != "onnx"): _trials += (backend,) else: - _trials = BACKENDS.split(" ") + for backend in BACKENDS.split(" "): + if backend != "onnx": + _trials += (backend,) # Add ensemble to the _trials ENSEMBLE_PREFIXES = ["simple_", "sequence_", "fan_"] @@ -174,7 +176,9 @@ def get_expected_result_implicit( def test_simple_sequence(self): # Send one sequence and check for correct accumulator # result. The result should be returned immediately. + print(_trials) for trial in _trials: + print("---------------------------" + str(trial)) # Run on different protocols. for idx, protocol in enumerate(_protocols): dtypes = self.get_datatype(trial) diff --git a/qa/L0_sequence_batcher/test.sh b/qa/L0_sequence_batcher/test.sh index d91b433966..98634b0363 100755 --- a/qa/L0_sequence_batcher/test.sh +++ b/qa/L0_sequence_batcher/test.sh @@ -123,7 +123,7 @@ source ../common/util.sh RET=0 # If BACKENDS not specified, set to all -BACKENDS=${BACKENDS:="graphdef savedmodel onnx plan libtorch custom python"} +BACKENDS=${BACKENDS:="graphdef savedmodel plan libtorch custom python"} export BACKENDS # If MODEL_TRIALS not specified set to 0 1 2 4 v @@ -521,6 +521,8 @@ for model_trial in $MODEL_TRIALS; do done fi + rm `find ./$MODEL_PATH/ -name '*onnx*'` -rf + # Need to launch the server for each test so that the model status # is reset (which is used to make sure the correct batch size was # used for execution). Test everything with fixed-tensor-size @@ -713,6 +715,7 @@ fi MODEL_PATH=queue_delay_models # remove ensemble models from the test model repo rm -rf queue_delay_models/simple_* queue_delay_models/fan_* queue_delay_models/sequence_* +rm `find ./queue_delay_models/ -name '*onnx*'` -rf for i in $QUEUE_DELAY_TESTS ; do export NO_BATCHING=0 export TRITONSERVER_BACKLOG_DELAY_SCHEDULER=0 diff --git a/qa/L0_sequence_corrid_batcher/sequence_corrid_batcher_test.py b/qa/L0_sequence_corrid_batcher/sequence_corrid_batcher_test.py index 15f16da352..a472ed8885 100755 --- a/qa/L0_sequence_corrid_batcher/sequence_corrid_batcher_test.py +++ b/qa/L0_sequence_corrid_batcher/sequence_corrid_batcher_test.py @@ -46,9 +46,9 @@ _model_instances = int(os.environ["MODEL_INSTANCES"]) if _no_batching: - _trials = ("savedmodel_nobatch", "graphdef_nobatch", "plan_nobatch", "onnx_nobatch") + _trials = ("savedmodel_nobatch", "graphdef_nobatch", "plan_nobatch") else: - _trials = ("savedmodel", "graphdef", "plan", "onnx") + _trials = ("savedmodel", "graphdef", "plan") _protocols = ("http", "grpc") _max_sequence_idle_ms = 5000 @@ -67,7 +67,6 @@ def get_expected_result(self, expected_result, corrid, value, trial, flag_str=No (("nobatch" not in trial) and ("custom" not in trial)) or ("graphdef" in trial) or ("plan" in trial) - or ("onnx" in trial) ) or ("libtorch" in trial): expected_result = value if flag_str is not None: diff --git a/qa/L0_sequence_corrid_batcher/test.sh b/qa/L0_sequence_corrid_batcher/test.sh index 8d114a395a..164470d353 100755 --- a/qa/L0_sequence_corrid_batcher/test.sh +++ b/qa/L0_sequence_corrid_batcher/test.sh @@ -62,7 +62,6 @@ for m in \ $DATADIR/qa_dyna_sequence_model_repository/graphdef_dyna_sequence_int32 \ $DATADIR/qa_dyna_sequence_model_repository/savedmodel_dyna_sequence_int32 \ $DATADIR/qa_dyna_sequence_model_repository/plan_dyna_sequence_int32 \ - $DATADIR/qa_dyna_sequence_model_repository/onnx_dyna_sequence_int32 \ $DATADIR/qa_dyna_sequence_model_repository/libtorch_dyna_sequence_int32; do cp -r $m models4/. && \ (cd models4/$(basename $m) && \ diff --git a/qa/L0_server_status/server_status_test.py b/qa/L0_server_status/server_status_test.py index 7ab04708f0..3a6996693a 100755 --- a/qa/L0_server_status/server_status_test.py +++ b/qa/L0_server_status/server_status_test.py @@ -156,11 +156,11 @@ def test_unknown_model_version(self): def test_model_latest_infer(self): input_size = 16 tensor_shape = (1, input_size) - platform_name = {"graphdef": "tensorflow_graphdef", "onnx": "onnxruntime_onnx"} + platform_name = {"graphdef": "tensorflow_graphdef"} # There are 3 versions of *_int32_int32_int32 and all # should be available. - for platform in ("graphdef", "onnx"): + for platform in ("graphdef",): model_name = platform + "_int32_int32_int32" # Initially there should be no version stats.. @@ -316,7 +316,7 @@ def test_model_specific_infer(self): # There are 3 versions of *_float32_float32_float32 but only # versions 1 and 3 should be available. - for platform in ("graphdef", "onnx", "plan"): + for platform in ("graphdef", "plan"): tensor_shape = (1, input_size) model_name = platform + "_float32_float32_float32" @@ -439,7 +439,7 @@ def test_model_versions_deleted(self): # version 3 was executed once. Version 2 and 3 models were # deleted from the model repository so now only expect version 1 to # be ready and show stats. - for platform in ("graphdef", "onnx"): + for platform in ("graphdef",): model_name = platform + "_int32_int32_int32" try: @@ -615,7 +615,7 @@ def test_infer_stats_no_model_version(self): # version 3 was executed once. Version 2 and 3 models were # deleted from the model repository so now only expect version 1 to # be ready and show infer stats. - for platform in ("graphdef", "onnx"): + for platform in ("graphdef",): model_name = platform + "_int32_int32_int32" try: @@ -723,8 +723,8 @@ def test_infer_stats_no_model(self): stats = infer_stats.model_stats self.assertEqual( len(stats), - 219, - "expected 219 infer stats for all ready versions of all model", + 173, + "expected 173 infer stats for all ready versions of all model", ) except InferenceServerException as ex: diff --git a/qa/L0_server_status/test.sh b/qa/L0_server_status/test.sh index 1e27339a38..c162c6969f 100755 --- a/qa/L0_server_status/test.sh +++ b/qa/L0_server_status/test.sh @@ -55,6 +55,7 @@ source ../common/util.sh rm -fr models cp -r $DATADIR/qa_model_repository models +rm `find ./models/ -name '*onnx*'` -rf run_server if [ "$SERVER_PID" == "0" ]; then @@ -85,7 +86,7 @@ fi set -e rm -fr models/graphdef_int32_int32_int32/2 models/graphdef_int32_int32_int32/3 -rm -fr models/onnx_int32_int32_int32/2 models/onnx_int32_int32_int32/3 +#rm -fr models/onnx_int32_int32_int32/2 models/onnx_int32_int32_int32/3 cp -r models/graphdef_float16_float32_float32/1 models/graphdef_float16_float32_float32/7 sleep 3 diff --git a/qa/L0_simple_lib/test.sh b/qa/L0_simple_lib/test.sh index 7045f512ef..36975de5b8 100755 --- a/qa/L0_simple_lib/test.sh +++ b/qa/L0_simple_lib/test.sh @@ -55,7 +55,7 @@ for SIMPLE_CLIENT in simple ; do CLIENT_LOG=$SIMPLE_CLIENT SIMPLE_CLIENT=./$SIMPLE_CLIENT - for trial in graphdef savedmodel onnx libtorch plan; do + for trial in graphdef savedmodel libtorch plan; do full=${trial}_float32_float32_float32 rm -rf $MODELSDIR mkdir -p $MODELSDIR/simple/1 && \ diff --git a/qa/L0_storage_S3/test.sh b/qa/L0_storage_S3/test.sh index f16dc81e83..830352725e 100755 --- a/qa/L0_storage_S3/test.sh +++ b/qa/L0_storage_S3/test.sh @@ -164,15 +164,16 @@ for ENV_VAR in "env" "env_dummy" "config"; do # Now start model tests - for FW in graphdef savedmodel onnx libtorch plan; do + for FW in graphdef savedmodel libtorch plan; do cp -r /data/inferenceserver/${REPO_VERSION}/qa_model_repository/${FW}_float32_float32_float32/ models/ done # Copy models with string inputs and remove nobatch (bs=1) models cp -r /data/inferenceserver/${REPO_VERSION}/qa_model_repository/*_object_object_object/ models/ rm -rf models/*nobatch* + rm `find ./models/ -name '*onnx*'` -rf - for FW in graphdef savedmodel onnx libtorch plan; do + for FW in graphdef savedmodel libtorch plan; do for MC in `ls models/${FW}*/config.pbtxt`; do echo "instance_group [ { kind: ${KIND} }]" >> $MC done diff --git a/qa/L0_storage_S3_local/test.sh b/qa/L0_storage_S3_local/test.sh index e60b106b31..07bc5793ae 100755 --- a/qa/L0_storage_S3_local/test.sh +++ b/qa/L0_storage_S3_local/test.sh @@ -47,7 +47,7 @@ EXPECTED_NUM_TESTS="3" DATADIR="/data/inferenceserver/${REPO_VERSION}/qa_model_repository" # Used to control which backends are run in infer_test.py -BACKENDS=${BACKENDS:="graphdef savedmodel onnx libtorch plan"} +BACKENDS=${BACKENDS:="graphdef savedmodel libtorch plan"} function run_unit_tests() { echo "Running unit tests: ${INFER_TEST}" @@ -276,7 +276,7 @@ awslocal $ENDPOINT_FLAG s3 rm s3://demo-bucket1.0 --recursive --include "*" && \ # Test for multiple model repositories using S3 cloud storage echo "=== Running multiple-model-repository tests ===" BACKENDS1="graphdef libtorch" -BACKENDS2="onnx plan savedmodel" +BACKENDS2="plan savedmodel" export BACKENDS="$BACKENDS1 $BACKENDS2" set +e diff --git a/qa/L0_storage_azure/test.sh b/qa/L0_storage_azure/test.sh index 15f9c78bcc..6d136b542a 100755 --- a/qa/L0_storage_azure/test.sh +++ b/qa/L0_storage_azure/test.sh @@ -82,7 +82,7 @@ rm -f $SERVER_LOG_BASE* $CLIENT_LOG_BASE* RET=0 # Used to control which backends are run in infer_test.py -BACKENDS=${BACKENDS:="graphdef savedmodel onnx libtorch plan"} +BACKENDS=${BACKENDS:="graphdef savedmodel libtorch plan"} function run_unit_tests() { BACKENDS=$BACKENDS python $INFER_TEST >$CLIENT_LOG 2>&1 @@ -110,6 +110,7 @@ function setup_model_repo() { # Copy models with string inputs and remove nobatch (bs=1) models cp -r /data/inferenceserver/${REPO_VERSION}/qa_model_repository/*_object_object_object models/ rm -rf models/*nobatch* + rm `find ./models/ -name '*onnx*'` -rf } setup_model_repo diff --git a/qa/L0_storage_swiftstack/infer_test.py b/qa/L0_storage_swiftstack/infer_test.py index f8a65a01a4..4f62e0ae30 100755 --- a/qa/L0_storage_swiftstack/infer_test.py +++ b/qa/L0_storage_swiftstack/infer_test.py @@ -148,6 +148,8 @@ def _infer_exact_helper( (input_size,), (input_size,), ): + pass + """ _infer_exact_helper( self, "onnx", @@ -160,6 +162,7 @@ def _infer_exact_helper( output1_raw=output1_raw, swap=swap, ) + """ if tu.validate_for_libtorch_model( input_dtype, diff --git a/qa/L0_storage_swiftstack/test.sh b/qa/L0_storage_swiftstack/test.sh index 99fb5610d6..75e0d14719 100755 --- a/qa/L0_storage_swiftstack/test.sh +++ b/qa/L0_storage_swiftstack/test.sh @@ -104,11 +104,11 @@ aws s3 rm $BUCKET_URL/ --recursive --include "*" # Now start model tests -for FW in graphdef savedmodel onnx libtorch plan; do +for FW in graphdef savedmodel libtorch plan; do cp -r /data/inferenceserver/${REPO_VERSION}/qa_model_repository/${FW}_float32_float32_float32/ models/ done -for FW in graphdef savedmodel onnx libtorch plan; do +for FW in graphdef savedmodel libtorch plan; do for MC in `ls models/${FW}*/config.pbtxt`; do echo "instance_group [ { kind: KIND_GPU }]" >> $MC done diff --git a/qa/L0_trace/test.sh b/qa/L0_trace/test.sh index b4a17bcd95..83f755b55b 100755 --- a/qa/L0_trace/test.sh +++ b/qa/L0_trace/test.sh @@ -52,7 +52,7 @@ export CUDA_VISIBLE_DEVICES=0 DATADIR=/data/inferenceserver/${REPO_VERSION}/qa_model_repository ENSEMBLEDIR=$DATADIR/../qa_ensemble_model_repository/qa_model_repository/ BLSDIR=../python_models/bls_simple -MODELBASE=onnx_int32_int32_int32 +MODELBASE=savedmodel_int32_int32_int32 MODELSDIR=`pwd`/trace_models @@ -70,7 +70,7 @@ cp -r $DATADIR/$MODELBASE $MODELSDIR/simple && \ cp -r $MODELSDIR/simple $MODELSDIR/global_simple && \ (cd $MODELSDIR/global_simple && \ sed -i "s/^name:.*/name: \"global_simple\"/" config.pbtxt) && \ - cp -r $ENSEMBLEDIR/simple_onnx_int32_int32_int32 $MODELSDIR/ensemble_add_sub_int32_int32_int32 && \ + cp -r $ENSEMBLEDIR/simple_savedmodel_int32_int32_int32 $MODELSDIR/ensemble_add_sub_int32_int32_int32 && \ rm -r $MODELSDIR/ensemble_add_sub_int32_int32_int32/2 && \ rm -r $MODELSDIR/ensemble_add_sub_int32_int32_int32/3 && \ (cd $MODELSDIR/ensemble_add_sub_int32_int32_int32 && \ @@ -720,7 +720,7 @@ rm -r ${MODEL_PATH} mkdir -p "${MODEL_PATH}" cp -r $DATADIR/$MODELBASE/* ${MODEL_PATH} && \ rm -r ${MODEL_PATH}/2 && rm -r ${MODEL_PATH}/3 && \ - sed -i "s/onnx_int32_int32_int32/simple/" ${MODEL_PATH}/config.pbtxt + sed -i "s/savedmodel_int32_int32_int32/simple/" ${MODEL_PATH}/config.pbtxt SERVER_ARGS="--allow-sagemaker=true --model-control-mode=explicit \ diff --git a/qa/L0_vertex_ai/test.sh b/qa/L0_vertex_ai/test.sh index 7403bf14cf..79b3eed78c 100755 --- a/qa/L0_vertex_ai/test.sh +++ b/qa/L0_vertex_ai/test.sh @@ -59,12 +59,12 @@ source ../common/util.sh # Set up the multi model repository with the swap and non-swap versions mkdir multi_models && \ - cp -r $DATADIR/qa_model_repository/onnx_int32_int32_int32 multi_models/addsub && \ + cp -r $DATADIR/qa_model_repository/plan_int32_int32_int32 multi_models/addsub && \ rm -r multi_models/addsub/2 && rm -r multi_models/addsub/3 && \ - sed -i "s/onnx_int32_int32_int32/addsub/" multi_models/addsub/config.pbtxt && \ - cp -r $DATADIR/qa_model_repository/onnx_int32_int32_int32 multi_models/subadd && \ + sed -i "s/plan_int32_int32_int32/addsub/" multi_models/addsub/config.pbtxt && \ + cp -r $DATADIR/qa_model_repository/plan_int32_int32_int32 multi_models/subadd && \ rm -r multi_models/subadd/1 && rm -r multi_models/subadd/2 && \ - sed -i "s/onnx_int32_int32_int32/subadd/" multi_models/subadd/config.pbtxt + sed -i "s/plan_int32_int32_int32/subadd/" multi_models/subadd/config.pbtxt mkdir single_model && \ cp -r multi_models/addsub single_model/. diff --git a/qa/L0_warmup/test.sh b/qa/L0_warmup/test.sh index aeed873b25..437dd108ad 100755 --- a/qa/L0_warmup/test.sh +++ b/qa/L0_warmup/test.sh @@ -51,7 +51,7 @@ IMAGE="../images/vulture.jpeg" DATADIR=`pwd`/models # If BACKENDS not specified, set to all -BACKENDS=${BACKENDS:="graphdef savedmodel onnx libtorch plan"} +BACKENDS=${BACKENDS:="graphdef savedmodel libtorch plan"} SERVER=/opt/tritonserver/bin/tritonserver SERVER_ARGS="--model-repository=$DATADIR --log-verbose=1 --exit-timeout-secs=120" @@ -177,7 +177,7 @@ for BACKEND in ${BACKENDS}; do # Test for variable-size data type (string) rm -fr models && mkdir models - SUPPORT_STRING=0 && ([[ $BACKEND == "savedmodel" ]] || [[ $BACKEND == "onnx" ]] || [[ $BACKEND == "savedmodel" ]]) && SUPPORT_STRING=1 + SUPPORT_STRING=0 && ([[ $BACKEND == "savedmodel" ]] || [[ $BACKEND == "savedmodel" ]]) && SUPPORT_STRING=1 if [ "$SUPPORT_STRING" == "1" ] ; then cp -r /data/inferenceserver/${REPO_VERSION}/qa_sequence_model_repository/${BACKEND}_sequence_object models/. cp -r /data/inferenceserver/${REPO_VERSION}/qa_identity_model_repository/${BACKEND}_zero_1_object models/. @@ -412,14 +412,14 @@ set -e kill $SERVER_PID wait $SERVER_PID -# Test the onnx model to verify that the memory type of the output tensor +# Test the tensorrt model to verify that the memory type of the output tensor # remains unchanged with the warmup setting pip3 uninstall -y torch pip3 install torch==1.13.0+cu117 -f https://download.pytorch.org/whl/torch_stable.html rm -fr models && mkdir models -cp -r /data/inferenceserver/${REPO_VERSION}/qa_model_repository/onnx_nobatch_float32_float32_float32 models/. -(cd models/onnx_nobatch_float32_float32_float32 && \ +cp -r /data/inferenceserver/${REPO_VERSION}/qa_model_repository/plan_nobatch_float32_float32_float32 models/. +(cd models/plan_nobatch_float32_float32_float32 && \ echo "" >> config.pbtxt && \ echo 'instance_group [{' >> config.pbtxt && \ echo ' kind : KIND_GPU' >> config.pbtxt && \ @@ -445,9 +445,10 @@ cp -r /data/inferenceserver/${REPO_VERSION}/qa_model_repository/onnx_nobatch_flo echo ' }' >> config.pbtxt && \ echo '}]' >> config.pbtxt ) -mkdir -p models/bls_onnx_warmup/1/ -cp ../python_models/bls_onnx_warmup/model.py models/bls_onnx_warmup/1/ -cp ../python_models/bls_onnx_warmup/config.pbtxt models/bls_onnx_warmup/. +mkdir -p models/bls_plan_warmup/1/ +cp ../python_models/bls_onnx_warmup/model.py models/bls_plan_warmup/1/ +cp ../python_models/bls_onnx_warmup/config.pbtxt models/bls_plan_warmup/. +sed -i -e 's/onnx/plan/g' models/bls_plan_warmup/1/model.py cp ../L0_backend_python/python_unittest.py . sed -i 's#sys.path.append("../../common")#sys.path.append("../common")#g' python_unittest.py @@ -461,10 +462,10 @@ fi set +e -export MODEL_NAME='bls_onnx_warmup' +export MODEL_NAME='bls_plan_warmup' python3 -m pytest --junitxml=warmup.report.xml $CLIENT_PY >> $CLIENT_LOG 2>&1 if [ $? -ne 0 ]; then - echo -e "\n***\n*** 'bls_onnx_warmup' test FAILED. \n***" + echo -e "\n***\n*** 'bls_plan_warmup' test FAILED. \n***" cat $CLIENT_LOG RET=1 fi diff --git a/qa/common/check_copyright.py b/qa/common/check_copyright.py index ff18ca8e39..aa597b51d6 100755 --- a/qa/common/check_copyright.py +++ b/qa/common/check_copyright.py @@ -54,6 +54,7 @@ "deploy/gke-marketplace-app/server-deployer/chart/.helmignore", "deploy/gcp/.helmignore", "deploy/aws/.helmignore", + "deploy/oci/.helmignore", "deploy/fleetcommand/.helmignore", "docs/.gitignore", "docs/_static/.gitattributes", diff --git a/qa/python_models/bls_model_loading/model.py b/qa/python_models/bls_model_loading/model.py index 84162e2fac..40f30989aa 100644 --- a/qa/python_models/bls_model_loading/model.py +++ b/qa/python_models/bls_model_loading/model.py @@ -33,7 +33,7 @@ class PBBLSModelLoadingTest(unittest.TestCase): def setUp(self): - self.model_name = "onnx_int32_int32_int32" + self.model_name = "plan_int32_int32_int32" def tearDown(self): # The unload call does not wait for the requested model to be fully @@ -57,7 +57,7 @@ def test_load_with_config_override(self): self.assertTrue(pb_utils.is_model_ready(self.model_name)) # Send the config with the wrong format - wrong_config = '"parameters": {"config": {{"backend":"onnxruntime", "version_policy":{"specific":{"versions":[2]}}}}}' + wrong_config = '"parameters": {"config": {{"backend":"tensorrt", "version_policy":{"specific":{"versions":[2]}}}}}' with self.assertRaises(pb_utils.TritonModelException): pb_utils.load_model(model_name=self.model_name, config=wrong_config) # The model should not be changed after a failed load model request @@ -70,7 +70,7 @@ def test_load_with_config_override(self): # Send the config with the correct format config = ( - '{"backend":"onnxruntime", "version_policy":{"specific":{"versions":[2]}}}' + '{"backend":"tensorrt", "version_policy":{"specific":{"versions":[2]}}}' ) pb_utils.load_model(self.model_name, config=config) # The model should be changed after a successful load model request @@ -83,10 +83,10 @@ def test_load_with_file_override(self): self.assertTrue(pb_utils.is_model_ready(self.model_name)) override_name = "override_model" - config = '{"backend":"onnxruntime"}' - with open("models/onnx_int32_int32_int32/3/model.onnx", "rb") as file: + config = '{"backend":"tensorrt"}' + with open("models/plan_int32_int32_int32/3/model.plan", "rb") as file: data = file.read() - files = {"file:1/model.onnx": data} + files = {"file:1/model.plan": data} # Request to load the model with override file, should fail without # providing override config. diff --git a/qa/python_models/bls_onnx_warmup/config.pbtxt b/qa/python_models/bls_onnx_warmup/config.pbtxt index 879f85ca81..a549cc39e5 100644 --- a/qa/python_models/bls_onnx_warmup/config.pbtxt +++ b/qa/python_models/bls_onnx_warmup/config.pbtxt @@ -24,7 +24,6 @@ # (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -name: "bls_onnx_warmup" backend: "python" output [