diff --git a/qa/L0_backend_config/test.sh b/qa/L0_backend_config/test.sh
index b898735798..8ee90babaf 100755
--- a/qa/L0_backend_config/test.sh
+++ b/qa/L0_backend_config/test.sh
@@ -236,6 +236,8 @@ else
 
 fi
 
+: '
+# Disabling onnxruntime tests for r24.02 release
 # Onnxruntime: Batching ON
 rm -rf ./models/
 mkdir -p ./models/no_config
@@ -306,17 +308,18 @@ else
     wait $SERVER_PID
 
 fi
+'
 
 #
 # General backend tests
 #
 
 # We want to make sure that backend configurations
-# are not lost. For this purpose we are using only onnx backend
+# are not lost. For this purpose we are using only tensorflow backend
 
 rm -rf ./models/
 mkdir -p ./models/no_config/
-cp -r /data/inferenceserver/${REPO_VERSION}/qa_model_repository/onnx_float32_float32_float32/1 ./models/no_config/
+cp -r /data/inferenceserver/${REPO_VERSION}/qa_model_repository/savedmodel_float32_float32_float32/1 ./models/no_config/
 
 # First getting a baseline for the number of default configs
 # added during a server set up
@@ -351,7 +354,7 @@ fi
 # One of defaultconfigs is `min-compute-capability`. This test
 # checks if it is properlly overridden.
 MIN_COMPUTE_CAPABILITY=XX
-SERVER_ARGS="--backend-config=onnxruntime,min-compute-capability=$MIN_COMPUTE_CAPABILITY $COMMON_ARGS"
+SERVER_ARGS="--backend-config=tensorflow,min-compute-capability=$MIN_COMPUTE_CAPABILITY $COMMON_ARGS"
 SERVER_LOG=$SERVER_LOG_BASE.global_configs.log
 run_server
 
@@ -374,7 +377,7 @@ else
 
 fi
 # Now make sure that specific backend configs are not lost.
-SERVER_ARGS="--backend-config=onnxruntime,a=0 --backend-config=onnxruntime,y=0 --backend-config=onnxruntime,z=0 $COMMON_ARGS"
+SERVER_ARGS="--backend-config=tensorflow,a=0 --backend-config=tensorflow,y=0 --backend-config=tensorflow,z=0 $COMMON_ARGS"
 SERVER_LOG=$SERVER_LOG_BASE.specific_configs.log
 EXPECTED_CONFIG_COUNT=$(($DEFAULT_CONFIG_COUNT+3))
 run_server
@@ -398,7 +401,6 @@ else
 
 fi
 
-
 # Print test outcome
 if [ $RET -eq 0 ]; then
     echo -e "\n***\n*** Test Passed\n***"
diff --git a/qa/L0_backend_python/bls/test.sh b/qa/L0_backend_python/bls/test.sh
index 1848af125c..429a6622d7 100755
--- a/qa/L0_backend_python/bls/test.sh
+++ b/qa/L0_backend_python/bls/test.sh
@@ -41,6 +41,7 @@ if [[ ${TEST_WINDOWS} == 0 ]]; then
     mkdir -p models/bls/1/
     cp ../../python_models/bls/model.py models/bls/1/
     cp ../../python_models/bls/config.pbtxt models/bls
+    sed -i 's/onnx_nobatch_sequence_int32/plan_nobatch_sequence_int32/g' models/bls/1/model.py
 
     mkdir -p models/dlpack_add_sub/1/
     cp ../../python_models/dlpack_add_sub/model.py models/dlpack_add_sub/1/
@@ -74,7 +75,7 @@ if [[ ${TEST_WINDOWS} == 0 ]]; then
     cp ../../python_models/dlpack_identity/model.py models/dlpack_identity/1/
     cp ../../python_models/dlpack_identity/config.pbtxt models/dlpack_identity
 
-    cp -r ${DATADIR}/qa_sequence_implicit_model_repository/onnx_nobatch_sequence_int32/ ./models
+    cp -r ${DATADIR}/qa_sequence_implicit_model_repository/plan_nobatch_sequence_int32/ ./models
 
     git clone https://github.com/triton-inference-server/python_backend -b $PYTHON_BACKEND_REPO_TAG
     mkdir -p models/square_int32/1/
@@ -219,9 +220,9 @@ if [[ ${TEST_WINDOWS} == 0 ]]; then
     mkdir -p models/bls_model_loading/1/
     cp ../../python_models/bls_model_loading/model.py models/bls_model_loading/1/
     cp ../../python_models/bls_model_loading/config.pbtxt models/bls_model_loading/
-    cp -fr ${DATADIR}/qa_model_repository/onnx_int32_int32_int32 models/.
+    cp -fr ${DATADIR}/qa_model_repository/plan_int32_int32_int32 models/.
     # Make only version 2, 3 is valid version directory
-    rm -rf models/onnx_int32_int32_int32/1
+    rm -rf models/plan_int32_int32_int32/1
 
     SERVER_LOG="./bls_model_loading_server.log"
     SERVER_ARGS="--model-repository=${MODELDIR}/bls/models --backend-directory=${BACKEND_DIR} --model-control-mode=explicit --log-verbose=1"
diff --git a/qa/L0_batch_custom/batch_custom_test.py b/qa/L0_batch_custom/batch_custom_test.py
index 6cd6346ad3..1585cf2848 100755
--- a/qa/L0_batch_custom/batch_custom_test.py
+++ b/qa/L0_batch_custom/batch_custom_test.py
@@ -232,7 +232,7 @@ def test_volume_batching(self):
         # Send 12 requests with batch size 1. The max_queue_delay is set
         # to non-zero. Depending upon the timing of the requests arrival
         # there can be either 4-6 model executions.
-        model_base = "onnx"
+        model_base = "savedmodel"
         dtype = np.float16
         shapes = (
             [
diff --git a/qa/L0_batch_custom/test.sh b/qa/L0_batch_custom/test.sh
index 01701df661..64b8665d23 100755
--- a/qa/L0_batch_custom/test.sh
+++ b/qa/L0_batch_custom/test.sh
@@ -46,7 +46,7 @@ BATCH_CUSTOM_TEST=batch_custom_test.py
 CLIENT_LOG_BASE="./client.log"
 DATADIR=/data/inferenceserver/${REPO_VERSION}/qa_identity_model_repository
 EXPECTED_NUM_TESTS="1"
-MODEL_NAME="onnx_zero_1_float16"
+MODEL_NAME="savedmodel_zero_1_float16"
 SERVER=/opt/tritonserver/bin/tritonserver
 SERVER_ARGS="--model-repository=models --log-verbose 1"
 SERVER_LOG_BASE="./inference_server.log"
@@ -101,7 +101,7 @@ cp -r backend/examples/batching_strategies/single_batching/build/libtriton_singl
 
 # Run a test to validate the single batching strategy example.
 # Then, run tests to validate the volume batching example being passed in via the backend dir, model dir, version dir, and model config.
-BACKEND_DIR="/opt/tritonserver/backends/onnxruntime"
+BACKEND_DIR="/opt/tritonserver/backends/tensorflow"
 MODEL_DIR="models/$MODEL_NAME"
 VERSION_DIR="$MODEL_DIR/1/"
 
diff --git a/qa/L0_batch_input/test.sh b/qa/L0_batch_input/test.sh
index e780516ec4..ac00e4f6b4 100755
--- a/qa/L0_batch_input/test.sh
+++ b/qa/L0_batch_input/test.sh
@@ -60,6 +60,9 @@ rm -f $SERVER_LOG $CLIENT_LOG
 
 RET=0
 for BACKEND in $BACKENDS; do
+    if [[ "$BACKEND" == 'onnx' ]]; then
+        continue
+    fi
     rm -rf models && mkdir models
     cp -r $DATADIR/${BACKEND}_batch_input models/ragged_element_count_acc_zero
     (cd models/ragged_element_count_acc_zero && \
diff --git a/qa/L0_batcher/test.sh b/qa/L0_batcher/test.sh
index c5f8819276..2b48819823 100755
--- a/qa/L0_batcher/test.sh
+++ b/qa/L0_batcher/test.sh
@@ -107,7 +107,7 @@ source ../common/util.sh
 RET=0
 
 # If BACKENDS not specified, set to all
-BACKENDS=${BACKENDS:="graphdef savedmodel onnx libtorch plan python"}
+BACKENDS=${BACKENDS:="graphdef savedmodel libtorch plan python"}
 export BACKENDS
 
 # Basic batcher tests
@@ -237,6 +237,7 @@ if [[ $BACKENDS == *"plan"* ]]; then
                     dynamic_batching { preferred_batch_size: [ 2, 6 ], max_queue_delay_microseconds: 10000000 }" >> config.pbtxt)
 fi
 
+: '
 if [[ $BACKENDS == *"onnx"* ]]; then
     # Use nobatch model to match the ragged test requirement
     cp -r $DATADIR/qa_identity_model_repository/onnx_nobatch_zero_1_float32 var_models/onnx_zero_1_float32 && \
@@ -249,6 +250,7 @@ if [[ $BACKENDS == *"onnx"* ]]; then
                                     source_input: \"INPUT0\" }] \
                     dynamic_batching { preferred_batch_size: [ 2, 6 ], max_queue_delay_microseconds: 10000000 }" >> config.pbtxt)
 fi
+'
 
 if [[ $BACKENDS == *"libtorch"* ]]; then
     # Use nobatch model to match the ragged test requirement
diff --git a/qa/L0_client_nobatch/test.sh b/qa/L0_client_nobatch/test.sh
index 58b1b3dc58..78eafc80c6 100755
--- a/qa/L0_client_nobatch/test.sh
+++ b/qa/L0_client_nobatch/test.sh
@@ -47,8 +47,13 @@ EXPECTED_NUM_TESTS="4"
 
 DATADIR=/data/inferenceserver/${REPO_VERSION}
 
+rm -fr models && mkdir models
+cp -r $DATADIR/qa_model_repository/* models
+rm `find ./models/ -name '*onnx*'` -rf
+
+
 SERVER=/opt/tritonserver/bin/tritonserver
-SERVER_ARGS="--model-repository=$DATADIR/qa_model_repository"
+SERVER_ARGS="--model-repository=models"
 SERVER_LOG="./inference_server.log"
 source ../common/util.sh
 
diff --git a/qa/L0_cmdline_trace/test.sh b/qa/L0_cmdline_trace/test.sh
index d0f86dc2a9..5bb4788eab 100755
--- a/qa/L0_cmdline_trace/test.sh
+++ b/qa/L0_cmdline_trace/test.sh
@@ -58,7 +58,7 @@ export CUDA_VISIBLE_DEVICES=0
 
 DATADIR=/data/inferenceserver/${REPO_VERSION}/qa_model_repository
 ENSEMBLEDIR=$DATADIR/../qa_ensemble_model_repository/qa_model_repository/
-MODELBASE=onnx_int32_int32_int32
+MODELBASE=savedmodel_int32_int32_int32
 
 MODELSDIR=`pwd`/trace_models
 
@@ -78,6 +78,8 @@ rm -fr $MODELSDIR && mkdir -p $MODELSDIR && \
 
 RET=0
 
+ls $MODELSDIR
+
 # trace-level=OFF make sure no tracing
 SERVER_ARGS="--trace-file=trace_off.log --trace-level=OFF --trace-rate=1 --model-repository=$MODELSDIR"
 SERVER_LOG="./inference_server_off.log"
diff --git a/qa/L0_compute_capability/test.sh b/qa/L0_compute_capability/test.sh
index d85acb1b6e..d4c4fca424 100755
--- a/qa/L0_compute_capability/test.sh
+++ b/qa/L0_compute_capability/test.sh
@@ -53,6 +53,9 @@ RET=0
 BACKENDS=${BACKENDS:="graphdef savedmodel onnx libtorch plan"}
 
 for BACKEND in $BACKENDS; do
+    if [[ "$BACKEND" == 'onnx' ]]; then
+        continue
+    fi
     # Need just one model for the backend...
     rm -fr models && mkdir models
     cp -r ${DATADIR}/qa_model_repository/${BACKEND}_float32_float32_float32 \
diff --git a/qa/L0_custom_ops/test.sh b/qa/L0_custom_ops/test.sh
index a12c1d67a4..33daa16921 100755
--- a/qa/L0_custom_ops/test.sh
+++ b/qa/L0_custom_ops/test.sh
@@ -45,7 +45,7 @@ ZERO_OUT_TEST=zero_out_test.py
 CUDA_OP_TEST=cuda_op_test.py
 MOD_OP_TEST=mod_op_test.py
 VISION_OP_TEST=vision_op_test.py
-ONNX_OP_TEST=onnx_op_test.py
+#ONNX_OP_TEST=onnx_op_test.py
 
 SERVER=/opt/tritonserver/bin/tritonserver
 SERVER_LOG="./inference_server.log"
@@ -204,6 +204,7 @@ fi
 kill $SERVER_PID
 wait $SERVER_PID
 
+: '
 # ONNX
 rm -rf onnx_custom_ops && \
     mkdir -p onnx_custom_ops/custom_op/1 && \
@@ -233,6 +234,7 @@ if [ $? -ne 0 ]; then
     RET=1
 fi
 
+
 set -e
 
 if [ $RET -eq 0 ]; then
@@ -241,5 +243,6 @@ fi
 
 kill $SERVER_PID
 wait $SERVER_PID
+'
 
 exit $RET
diff --git a/qa/L0_device_memory_tracker/test.sh b/qa/L0_device_memory_tracker/test.sh
index 7eb0d745da..fa2edd9315 100755
--- a/qa/L0_device_memory_tracker/test.sh
+++ b/qa/L0_device_memory_tracker/test.sh
@@ -57,9 +57,9 @@ RET=0
 # prepare model repository, only contains ONNX and TRT models as the
 # corresponding backend are known to be memory.
 rm -rf models && mkdir models
-# ONNX
-cp -r /data/inferenceserver/${REPO_VERSION}/onnx_model_store/* models/.
-rm -r models/*cpu
+## ONNX
+##cp -r /data/inferenceserver/${REPO_VERSION}/onnx_model_store/* models/.
+#rm -r models/*cpu
 
 # Convert to get TRT models against the system
 CAFFE2PLAN=../common/caffe2plan
@@ -92,7 +92,7 @@ set -e
 # Set multiple instances on selected model to test instance-wise collection
 # and accumulation.
 echo "instance_group [{ count: 2; kind: KIND_GPU }]" >> models/resnet152_plan/config.pbtxt
-echo "instance_group [{ count: 2; kind: KIND_GPU }]" >> models/densenet/config.pbtxt
+#echo "instance_group [{ count: 2; kind: KIND_GPU }]" >> models/densenet/config.pbtxt
 
 # testing use nvidia-smi for Python to validate the reported usage
 pip install nvidia-ml-py3
diff --git a/qa/L0_dyna_implicit_state/test.sh b/qa/L0_dyna_implicit_state/test.sh
index 0721d5cd32..81eab8f7d9 100755
--- a/qa/L0_dyna_implicit_state/test.sh
+++ b/qa/L0_dyna_implicit_state/test.sh
@@ -39,7 +39,7 @@ if [ ! -z "$TEST_REPO_ARCH" ]; then
 fi
 
 export ENSEMBLES=0
-BACKENDS=${BACKENDS:="onnx plan"}
+BACKENDS=${BACKENDS:="plan"}
 export BACKENDS
 export IMPLICIT_STATE=1
 
diff --git a/qa/L0_dyna_sequence_batcher/dyna_sequence_batcher_test.py b/qa/L0_dyna_sequence_batcher/dyna_sequence_batcher_test.py
index f2c709469b..02ef9e7f39 100755
--- a/qa/L0_dyna_sequence_batcher/dyna_sequence_batcher_test.py
+++ b/qa/L0_dyna_sequence_batcher/dyna_sequence_batcher_test.py
@@ -47,12 +47,19 @@
 BACKENDS = os.environ.get(
     "BACKENDS", "graphdef savedmodel libtorch onnx plan custom custom_string"
 )
+
 IMPLICIT_STATE = int(os.environ["IMPLICIT_STATE"]) == 1
 
 _trials = BACKENDS.split(" ")
+if "onnx" in _trials:
+    _trials.remove("onnx")
 for backend in BACKENDS.split(" "):
     if NO_BATCHING:
-        if (backend != "custom") and (backend != "custom_string"):
+        if (
+            (backend != "custom")
+            and (backend != "custom_string")
+            and (backend != "onnx")
+        ):
             _trials += (backend + "_nobatch",)
 
 _ragged_batch_supported_trials = []
diff --git a/qa/L0_dyna_sequence_batcher/test.sh b/qa/L0_dyna_sequence_batcher/test.sh
index acac8399af..4ded6dd3fa 100755
--- a/qa/L0_dyna_sequence_batcher/test.sh
+++ b/qa/L0_dyna_sequence_batcher/test.sh
@@ -53,7 +53,7 @@ IMPLICIT_STATE=${IMPLICIT_STATE:="0"}
 export IMPLICIT_STATE
 
 # If BACKENDS not specified, set to all
-BACKENDS=${BACKENDS:="graphdef savedmodel libtorch onnx plan custom custom_string"}
+BACKENDS=${BACKENDS:="graphdef savedmodel libtorch plan custom custom_string"}
 export BACKENDS
 
 MODEL_REPOSITORY=''
@@ -95,6 +95,8 @@ if [ $IMPLICIT_STATE == "0" ]; then
             sed -i "s/name:.*\"INPUT\"/name: \"INPUT\"\\nallow_ragged_batch: true/" config.pbtxt)
 fi
 
+rm `find ./models/ -name '*onnx*'` -rf
+
 # Need to launch the server for each test so that the model status is
 # reset (which is used to make sure the correct batch size was used
 # for execution). Test everything with fixed-tensor-size models and
diff --git a/qa/L0_grpc/test.sh b/qa/L0_grpc/test.sh
index 73b9710a71..2f1a17fdcc 100755
--- a/qa/L0_grpc/test.sh
+++ b/qa/L0_grpc/test.sh
@@ -489,7 +489,8 @@ wait $SERVER_PID
 
 # Run cpp client unit test
 rm -rf unit_test_models && mkdir unit_test_models
-cp -r $DATADIR/qa_model_repository/onnx_int32_int32_int32 unit_test_models/.
+cp -r $DATADIR/qa_model_repository/plan_int32_int32_int32 unit_test_models/client_test_simple
+sed -i "s/plan_int32_int32_int32/client_test_simple/g" unit_test_models/client_test_simple/config.pbtxt
 cp -r ${MODELDIR}/simple unit_test_models/.
 
 SERVER_ARGS="--backend-directory=${BACKEND_DIR} --model-repository=unit_test_models
@@ -517,22 +518,23 @@ wait $SERVER_PID
 
 # Run cpp client load API unit test
 rm -rf unit_test_models && mkdir unit_test_models
-cp -r $DATADIR/qa_model_repository/onnx_int32_int32_int32 unit_test_models/.
+cp -r $DATADIR/qa_model_repository/plan_int32_int32_int32 unit_test_models/client_test_simple/
+sed -i "s/plan_int32_int32_int32/client_test_simple/g" unit_test_models/client_test_simple/config.pbtxt
 # Make only version 2, 3 is valid version directory while config requests 1, 3
-rm -rf unit_test_models/onnx_int32_int32_int32/1
+rm -rf unit_test_models/client_test_simple/1
 
-# Start with EXPLICIT mode and load onnx_float32_float32_float32
+# Start with EXPLICIT mode and load client_test_simple
 SERVER_ARGS="--model-repository=`pwd`/unit_test_models \
              --model-control-mode=explicit \
-             --load-model=onnx_int32_int32_int32 \
+             --load-model=client_test_simple \
              --strict-model-config=false"
-SERVER_LOG="./inference_server_cc_unit_test.load.log"
 CLIENT_LOG="./cc_unit_test.load.log"
 
 for i in \
    "LoadWithFileOverride" \
    "LoadWithConfigOverride" \
    ; do
+    SERVER_LOG="./inference_server_cc_unit_test.load."$i".log"
     run_server
     if [ "$SERVER_PID" == "0" ]; then
         echo -e "\n***\n*** Failed to start $SERVER\n***"
diff --git a/qa/L0_http/http_test.py b/qa/L0_http/http_test.py
index 4432fe9186..769cbc605c 100755
--- a/qa/L0_http/http_test.py
+++ b/qa/L0_http/http_test.py
@@ -66,20 +66,20 @@ def _raw_binary_helper(
         )
 
     def test_raw_binary(self):
-        model = "onnx_zero_1_float32"
+        model = "savedmodel_zero_1_float32"
         input_bytes = np.arange(8, dtype=np.float32).tobytes()
         self._raw_binary_helper(model, input_bytes, input_bytes)
 
     def test_raw_binary_longer(self):
         # Similar to test_raw_binary but test with different data size
-        model = "onnx_zero_1_float32"
+        model = "savedmodel_zero_1_float32"
         input_bytes = np.arange(32, dtype=np.float32).tobytes()
         self._raw_binary_helper(model, input_bytes, input_bytes)
 
     def test_byte(self):
         # Select model that satisfies constraints for raw binary request
         # i.e. BYTE type the element count must be 1
-        model = "onnx_zero_1_object_1_element"
+        model = "savedmodel_zero_1_object_1_element"
         input = "427"
         headers = {"Inference-Header-Content-Length": "0"}
         r = requests.post(self._get_infer_url(model), data=input, headers=headers)
@@ -100,7 +100,7 @@ def test_byte(self):
     def test_byte_too_many_elements(self):
         # Select model that doesn't satisfy constraints for raw binary request
         # i.e. BYTE type the element count must be 1
-        model = "onnx_zero_1_object"
+        model = "savedmodel_zero_1_object"
         input = "427"
         headers = {"Inference-Header-Content-Length": "0"}
         r = requests.post(self._get_infer_url(model), data=input, headers=headers)
@@ -119,7 +119,7 @@ def test_byte_too_many_elements(self):
     def test_multi_variable_dimensions(self):
         # Select model that doesn't satisfy constraints for raw binary request
         # i.e. this model has multiple variable-sized dimensions
-        model = "onnx_zero_1_float16"
+        model = "savedmodel_zero_1_float16"
         input = np.ones([2, 2], dtype=np.float16)
         headers = {"Inference-Header-Content-Length": "0"}
         r = requests.post(
@@ -140,7 +140,7 @@ def test_multi_variable_dimensions(self):
     def test_multi_inputs(self):
         # Select model that doesn't satisfy constraints for raw binary request
         # i.e. input count must be 1
-        model = "onnx_zero_3_float32"
+        model = "savedmodel_zero_3_float32"
         # Use one numpy array, after tobytes() it can be seen as three inputs
         # each with 8 elements (this ambiguity is why this is not allowed)
         input = np.arange(24, dtype=np.float32)
@@ -167,7 +167,7 @@ def test_multi_inputs(self):
     def test_content_encoding_chunked_manually(self):
         # Similar to test_raw_binary but test with extra headers
         extra_headers = {"Transfer-Encoding": "chunked"}
-        model = "onnx_zero_1_float32"
+        model = "savedmodel_zero_1_float32"
         input_bytes = np.arange(8, dtype=np.float32).tobytes()
         # Encode input into a single chunk (for simplicity) following chunked
         # encoding format: https://developer.mozilla.org/en-US/docs/Web/HTTP/Headers/Transfer-Encoding
@@ -189,7 +189,7 @@ def test_content_encoding_unsupported_client(self):
             with self.subTest(encoding=encoding):
                 headers = {"Transfer-Encoding": encoding}
                 np_input = np.arange(8, dtype=np.float32).reshape(1, -1)
-                model = "onnx_zero_1_float32"
+                model = "savedmodel_zero_1_float32"
                 # Setup inputs
                 inputs = []
                 inputs.append(
@@ -208,7 +208,7 @@ def test_content_encoding_unsupported_client(self):
                         client.infer(model_name=model, inputs=inputs, headers=headers)
 
     def test_descriptive_status_code(self):
-        model = "onnx_zero_1_float32_queue"
+        model = "savedmodel_zero_1_float32_queue"
         input_bytes = np.arange(8, dtype=np.float32).tobytes()
 
         # Send two requests to model that only queues 1 request at the maximum,
diff --git a/qa/L0_http/test.sh b/qa/L0_http/test.sh
index 2b78305452..a247a9c94f 100755
--- a/qa/L0_http/test.sh
+++ b/qa/L0_http/test.sh
@@ -504,7 +504,8 @@ wait $SERVER_PID
 
 # Run cpp client unit test
 rm -rf unit_test_models && mkdir unit_test_models
-cp -r $DATADIR/qa_model_repository/onnx_int32_int32_int32 unit_test_models/.
+cp -r $DATADIR/qa_model_repository/plan_int32_int32_int32 unit_test_models/client_test_simple
+sed -i "s/plan_int32_int32_int32/client_test_simple/g" unit_test_models/client_test_simple/config.pbtxt
 cp -r ${MODELDIR}/simple unit_test_models/.
 
 SERVER_ARGS="--backend-directory=${BACKEND_DIR} --model-repository=unit_test_models
@@ -532,14 +533,15 @@ wait $SERVER_PID
 
 # Run cpp client load API unit test
 rm -rf unit_test_models && mkdir unit_test_models
-cp -r $DATADIR/qa_model_repository/onnx_int32_int32_int32 unit_test_models/.
+cp -r $DATADIR/qa_model_repository/plan_int32_int32_int32 unit_test_models/client_test_simple/
+sed -i "s/plan_int32_int32_int32/client_test_simple/g" unit_test_models/client_test_simple/config.pbtxt
 # Make only version 2, 3 is valid version directory while config requests 1, 3
-rm -rf unit_test_models/onnx_int32_int32_int32/1
+rm -rf unit_test_models/client_test_simple/1
 
-# Start with EXPLICIT mode and load onnx_float32_float32_float32
+# Start with EXPLICIT mode and load client_test_simple
 SERVER_ARGS="--model-repository=`pwd`/unit_test_models \
              --model-control-mode=explicit \
-             --load-model=onnx_int32_int32_int32 \
+             --load-model=client_test_simple \
              --strict-model-config=false"
 SERVER_LOG="./inference_server_cc_unit_test.load.log"
 CLIENT_LOG="./cc_unit_test.load.log"
@@ -592,18 +594,18 @@ wait $SERVER_PID
 MODELDIR=python_unit_test_models
 mkdir -p $MODELDIR
 rm -rf ${MODELDIR}/*
-cp -r $DATADIR/qa_identity_model_repository/onnx_zero_1_float32 ${MODELDIR}/.
-cp -r $DATADIR/qa_identity_model_repository/onnx_zero_1_object ${MODELDIR}/.
-cp -r $DATADIR/qa_identity_model_repository/onnx_zero_1_float16 ${MODELDIR}/.
-cp -r $DATADIR/qa_identity_model_repository/onnx_zero_3_float32 ${MODELDIR}/.
-cp -r ${MODELDIR}/onnx_zero_1_object ${MODELDIR}/onnx_zero_1_object_1_element && \
-    (cd $MODELDIR/onnx_zero_1_object_1_element && \
-        sed -i "s/onnx_zero_1_object/onnx_zero_1_object_1_element/" config.pbtxt && \
+cp -r $DATADIR/qa_identity_model_repository/savedmodel_zero_1_float32 ${MODELDIR}/.
+cp -r $DATADIR/qa_identity_model_repository/savedmodel_zero_1_object ${MODELDIR}/.
+cp -r $DATADIR/qa_identity_model_repository/savedmodel_zero_1_float16 ${MODELDIR}/.
+cp -r $DATADIR/qa_identity_model_repository/savedmodel_zero_3_float32 ${MODELDIR}/.
+cp -r ${MODELDIR}/savedmodel_zero_1_object ${MODELDIR}/savedmodel_zero_1_object_1_element && \
+    (cd $MODELDIR/savedmodel_zero_1_object_1_element && \
+        sed -i "s/savedmodel_zero_1_object/savedmodel_zero_1_object_1_element/" config.pbtxt && \
         sed -i "0,/-1/{s/-1/1/}" config.pbtxt)
 # Model for error code test
-cp -r ${MODELDIR}/onnx_zero_1_float32 ${MODELDIR}/onnx_zero_1_float32_queue && \
-    (cd $MODELDIR/onnx_zero_1_float32_queue && \
-        sed -i "s/onnx_zero_1_float32/onnx_zero_1_float32_queue/" config.pbtxt && \
+cp -r ${MODELDIR}/savedmodel_zero_1_float32 ${MODELDIR}/savedmodel_zero_1_float32_queue && \
+    (cd $MODELDIR/savedmodel_zero_1_float32_queue && \
+        sed -i "s/savedmodel_zero_1_float32/savedmodel_zero_1_float32_queue/" config.pbtxt && \
         echo "dynamic_batching { " >> config.pbtxt && \
         echo "    max_queue_delay_microseconds: 1000000" >> config.pbtxt && \
         echo "    preferred_batch_size: [ 8 ]" >> config.pbtxt && \
diff --git a/qa/L0_implicit_state/implicit_state.py b/qa/L0_implicit_state/implicit_state.py
index 2cdf7ff2e0..ed9d641d46 100755
--- a/qa/L0_implicit_state/implicit_state.py
+++ b/qa/L0_implicit_state/implicit_state.py
@@ -193,6 +193,9 @@ def test_request_output_not_allowed(self):
         triton_client = tritonhttpclient.InferenceServerClient("localhost:8000")
 
         for backend in BACKENDS.split(" "):
+            if backend.strip() == "onnx":
+                continue
+
             inputs = []
             if backend.strip() == "libtorch":
                 inputs.append(tritonhttpclient.InferInput("INPUT__0", [1], "INT32"))
@@ -229,6 +232,9 @@ def test_request_output_not_allowed(self):
     def test_request_output(self):
         triton_client = tritonhttpclient.InferenceServerClient("localhost:8000")
         for backend in BACKENDS.split(" "):
+            if backend.strip() == "onnx":
+                continue
+
             inputs = []
             if backend.strip() == "libtorch":
                 inputs.append(tritonhttpclient.InferInput("INPUT__0", [1], "INT32"))
diff --git a/qa/L0_implicit_state/test.sh b/qa/L0_implicit_state/test.sh
index 0722d29be1..524aed921f 100755
--- a/qa/L0_implicit_state/test.sh
+++ b/qa/L0_implicit_state/test.sh
@@ -72,6 +72,9 @@ mkdir -p models/single_state_buffer/1/
 mkdir -p models/growable_memory/1/
 
 for BACKEND in $BACKENDS; do
+    if [[ "$BACKEND" == 'onnx' ]]; then
+        continue
+    fi
     dtype="int32"
     model_name=${BACKEND}_nobatch_sequence_${dtype}
     rm -rf models/$model_name
@@ -132,6 +135,7 @@ wait $SERVER_PID
 (cd ../L0_sequence_batcher/ && bash -ex test.sh)
 RET=$?
 
+
 if [ $RET == 0 ]; then
     echo -e "\n***\n*** Implicit State Passed\n***"
 else
diff --git a/qa/L0_infer/test.sh b/qa/L0_infer/test.sh
index 34a669f874..cfebd8dec3 100755
--- a/qa/L0_infer/test.sh
+++ b/qa/L0_infer/test.sh
@@ -129,7 +129,7 @@ if [ "$TRITON_SERVER_CPU_ONLY" == "1" ]; then
 fi
 
 # If BACKENDS not specified, set to all
-BACKENDS=${BACKENDS:="graphdef savedmodel onnx libtorch plan python python_dlpack openvino"}
+BACKENDS=${BACKENDS:="graphdef savedmodel libtorch plan python python_dlpack openvino"}
 export BACKENDS
 
 # If ENSEMBLES not specified, set to 1
@@ -210,6 +210,8 @@ function generate_model_repository() {
       elif [ "$BACKEND" == "plan" ] && [ "$TRITON_SERVER_CPU_ONLY" == "1" ]; then
         # skip plan_tensorrt models since they don't run on CPU only containers
         continue
+      elif [ "$BACKEND" == "onnx" ]; then
+        continue
       else
         cp -r ${DATADIR}/qa_model_repository/${BACKEND}* \
           models/.
@@ -251,12 +253,7 @@ function generate_model_repository() {
 
     KIND="KIND_GPU" && [[ "$TARGET" == "cpu" ]] && KIND="KIND_CPU"
     for FW in $BACKENDS; do
-      if [ "$FW" == "onnx" ] && [ "$TEST_VALGRIND" -eq 1 ]; then
-        # Reduce the instance count to make loading onnx models faster
-        for MC in `ls models/${FW}*/config.pbtxt`; do
-            echo "instance_group [ { kind: ${KIND} count: 1 }]" >> $MC
-        done
-      elif [ "$FW" != "plan" ] && [ "$FW" != "python" ] && [ "$FW" != "python_dlpack" ] && [ "$FW" != "openvino" ];then
+      if [ "$FW" != "plan" ] && [ "$FW" != "python" ] && [ "$FW" != "python_dlpack" ] && [ "$FW" != "openvino" ];then
         for MC in `ls models/${FW}*/config.pbtxt`; do
             echo "instance_group [ { kind: ${KIND} }]" >> $MC
         done
@@ -348,7 +345,7 @@ done
 # Loading all the onnx models at once requires more than 12 hours. Loading them
 # separately to reduce the loading time.
 if [ "$TEST_VALGRIND" -eq 1 ]; then
-  TESTING_BACKENDS="python python_dlpack onnx"
+  TESTING_BACKENDS="python python_dlpack"
   EXPECTED_NUM_TESTS=42
   if [[ "aarch64" != $(uname -m) ]] ; then
       pip3 install torch==1.13.0+cpu -f https://download.pytorch.org/whl/torch_stable.html
diff --git a/qa/L0_infer_reshape/infer_reshape_test.py b/qa/L0_infer_reshape/infer_reshape_test.py
index e77dcbecaf..a277ffdb97 100755
--- a/qa/L0_infer_reshape/infer_reshape_test.py
+++ b/qa/L0_infer_reshape/infer_reshape_test.py
@@ -112,48 +112,6 @@ def _full_reshape(self, dtype, input_shapes, output_shapes=None, no_batch=True):
                     use_cuda_shared_memory=TEST_CUDA_SHARED_MEMORY,
                 )
 
-        if tu.validate_for_onnx_model(
-            dtype, dtype, dtype, input_shapes[0], input_shapes[0], input_shapes[0]
-        ):
-            # model that supports batching
-            for bs in (1, 8):
-                full_shapes = [
-                    [
-                        bs,
-                    ]
-                    + input_shape
-                    for input_shape in input_shapes
-                ]
-                full_output_shapes = [
-                    [
-                        bs,
-                    ]
-                    + output_shape
-                    for output_shape in output_shapes
-                ]
-                iu.infer_zero(
-                    self,
-                    "onnx",
-                    bs,
-                    dtype,
-                    full_shapes,
-                    full_output_shapes,
-                    use_system_shared_memory=TEST_SYSTEM_SHARED_MEMORY,
-                    use_cuda_shared_memory=TEST_CUDA_SHARED_MEMORY,
-                )
-            # model that does not support batching
-            if no_batch:
-                iu.infer_zero(
-                    self,
-                    "onnx_nobatch",
-                    1,
-                    dtype,
-                    input_shapes,
-                    output_shapes,
-                    use_system_shared_memory=TEST_SYSTEM_SHARED_MEMORY,
-                    use_cuda_shared_memory=TEST_CUDA_SHARED_MEMORY,
-                )
-
         if tu.validate_for_libtorch_model(
             dtype,
             dtype,
diff --git a/qa/L0_infer_reshape/test.sh b/qa/L0_infer_reshape/test.sh
index 218be954d9..cc80d407e6 100755
--- a/qa/L0_infer_reshape/test.sh
+++ b/qa/L0_infer_reshape/test.sh
@@ -76,6 +76,7 @@ for i in \
 done
 
 create_nop_version_dir `pwd`/models
+rm `find ./models/ -name '*onnx*'` -rf
 
 RET=0
 
diff --git a/qa/L0_infer_variable/infer_variable_test.py b/qa/L0_infer_variable/infer_variable_test.py
index e5e6470a3c..54c1559d53 100755
--- a/qa/L0_infer_variable/infer_variable_test.py
+++ b/qa/L0_infer_variable/infer_variable_test.py
@@ -205,28 +205,6 @@ def _infer_exact_helper(
                         swap=swap,
                     )
 
-        if tu.validate_for_onnx_model(
-            input_dtype,
-            output0_dtype,
-            output1_dtype,
-            input_shape,
-            output0_shape,
-            output1_shape,
-        ):
-            # No basic ensemble models are created against custom models [TODO]
-            _infer_exact_helper(
-                self,
-                "onnx",
-                input_shape,
-                8,
-                input_dtype,
-                output0_dtype,
-                output1_dtype,
-                output0_raw=output0_raw,
-                output1_raw=output1_raw,
-                swap=swap,
-            )
-
         if tu.validate_for_libtorch_model(
             input_dtype,
             output0_dtype,
diff --git a/qa/L0_infer_variable/test.sh b/qa/L0_infer_variable/test.sh
index 9760583b94..22e25fcc03 100755
--- a/qa/L0_infer_variable/test.sh
+++ b/qa/L0_infer_variable/test.sh
@@ -74,6 +74,7 @@ for TARGET in cpu gpu; do
         done
     done
 
+    rm `find ./models/ -name '*onnx*'` -rf
     run_server
     if [ "$SERVER_PID" == "0" ]; then
         echo -e "\n***\n*** Failed to start $SERVER\n***"
diff --git a/qa/L0_infer_zero/test.sh b/qa/L0_infer_zero/test.sh
index 02676b2f85..b3ae9dc247 100755
--- a/qa/L0_infer_zero/test.sh
+++ b/qa/L0_infer_zero/test.sh
@@ -54,6 +54,7 @@ rm -f $SERVER_LOG $CLIENT_LOG
 rm -fr models && mkdir models
 cp -r /data/inferenceserver/${REPO_VERSION}/qa_identity_model_repository/* models/. && \
     cp -r /data/inferenceserver/${REPO_VERSION}/qa_ensemble_model_repository/qa_identity_model_repository/* models/.
+rm `find ./models/ -name '*onnx*'` -rf
 
 # Remove version-compatible TensorRT models, as they require version-compatibility
 # mode to be turned on when starting the server.
diff --git a/qa/L0_io/test.sh b/qa/L0_io/test.sh
index 84ab4fb0c0..6da32f9bac 100755
--- a/qa/L0_io/test.sh
+++ b/qa/L0_io/test.sh
@@ -60,7 +60,7 @@ RET=0
 # Prepare float32 models with basic config
 rm -rf $MODELSDIR
 
-for trial in graphdef savedmodel onnx libtorch plan python python_dlpack; do
+for trial in graphdef savedmodel libtorch plan python python_dlpack; do
     full=${trial}_float32_float32_float32
     if [ "$trial" == "python" ]; then
         mkdir -p $MODELSDIR/${full}/1 && \
@@ -126,7 +126,7 @@ for trial in graphdef savedmodel onnx libtorch plan python python_dlpack; do
 done
 
 # Prepare string models with basic config
-for trial in graphdef savedmodel onnx ; do
+for trial in graphdef savedmodel ; do
     full=${trial}_object_object_object
     mkdir -p $MODELSDIR/${full}/1 && \
         cp -r $DATADIR/${full}/1/* $MODELSDIR/${full}/1/. && \
@@ -163,7 +163,7 @@ if [ $? -ne 0 ]; then
 fi
 set -e
 
-TRIALS="graphdef savedmodel onnx libtorch plan python python_dlpack libtorch_multi_gpu libtorch_multi_device"
+TRIALS="graphdef savedmodel libtorch plan python python_dlpack libtorch_multi_gpu libtorch_multi_device"
 for input_device in -1 0 1; do
     for output_device in -1 0 1; do
         for trial in ${TRIALS}; do
@@ -230,7 +230,7 @@ for input_device in -1 0 1; do
             done
         done
 
-        for trial in graphdef savedmodel onnx; do
+        for trial in graphdef savedmodel; do
             model_devices="-1 0 1"
             for model_device in $model_devices; do
                 full=${trial}_object_object_object
diff --git a/qa/L0_java_resnet/test.sh b/qa/L0_java_resnet/test.sh
index 1ca08b4c65..2aa2319824 100755
--- a/qa/L0_java_resnet/test.sh
+++ b/qa/L0_java_resnet/test.sh
@@ -47,7 +47,7 @@ JAVACPP_BRANCH_TAG=${JAVACPP_BRANCH_TAG:="master"}
 # Create local model repository
 mkdir -p ${MODEL_REPO}
 # TODO: fix build to support GPU only resnet50v1.5_fp16_savedmodel
-for BACKEND in _fp32_libtorch _fp32_onnx; do
+for BACKEND in _fp32_libtorch ; do
     cp -r $DATADIR/perf_model_store/resnet50${BACKEND} ${MODEL_REPO}/
     echo ${MODEL_REPO}/resnet50${BACKEND}/config.pbtxt
     sed -i "s/kind: KIND_GPU/kind: KIND_CPU/" ${MODEL_REPO}/resnet50${BACKEND}/config.pbtxt
@@ -78,7 +78,7 @@ if [ $? -ne 0 ]; then
 fi
 
 # TODO: fix build to support GPU only resnet so can test TF as well
-for BACKEND in ONNX TORCH; do
+for BACKEND in TORCH; do
     if [ `grep -c "${BACKEND} test PASSED" ${CLIENT_LOG}` != "1" ]; then
         echo -e "\n***\n*** ${BACKEND} backend test FAILED. Expected '${BACKEND} test PASSED'\n***"
         RET=1
diff --git a/qa/L0_java_sequence_batcher/test.sh b/qa/L0_java_sequence_batcher/test.sh
index 2f988322d9..243951a81a 100755
--- a/qa/L0_java_sequence_batcher/test.sh
+++ b/qa/L0_java_sequence_batcher/test.sh
@@ -62,7 +62,7 @@ sed -i 's/Simple/SequenceTest/g' $SAMPLES_REPO/pom.xml
 rm -f *.log
 RET=0
 
-for BACKEND in graphdef libtorch onnx savedmodel; do
+for BACKEND in graphdef libtorch savedmodel; do
     # Create local model repository
     mkdir -p ${MODEL_REPO}
     MODEL=${BACKEND}_nobatch_sequence_int32
diff --git a/qa/L0_large_payload/large_payload_test.py b/qa/L0_large_payload/large_payload_test.py
index fff57290ef..6dc2fc890b 100755
--- a/qa/L0_large_payload/large_payload_test.py
+++ b/qa/L0_large_payload/large_payload_test.py
@@ -148,7 +148,7 @@ def test_savedmodel(self):
             )
             self._test_helper(client, model_name)
 
-    def test_onnx(self):
+    def _test_onnx(self):
         # onnx_nobatch_zero_1_float32 is identity model with input shape [-1]
         for client in self._clients:
             model_name = tu.get_zero_model_name("onnx_nobatch", 1, self._data_type)
diff --git a/qa/L0_large_payload/test.sh b/qa/L0_large_payload/test.sh
index 325cab4ed5..89a0448c41 100755
--- a/qa/L0_large_payload/test.sh
+++ b/qa/L0_large_payload/test.sh
@@ -56,7 +56,7 @@ RET=0
 
 MODEL_SUFFIX=nobatch_zero_1_float32
 rm -fr all_models && mkdir all_models
-for TARGET in graphdef savedmodel onnx libtorch plan; do
+for TARGET in graphdef savedmodel libtorch plan; do
     cp -r /data/inferenceserver/${REPO_VERSION}/qa_identity_model_repository/${TARGET}_$MODEL_SUFFIX \
        all_models/.
 done
@@ -71,7 +71,7 @@ cp ../python_models/identity_fp32/model.py all_models/python_$MODEL_SUFFIX/1/mod
 
 # Restart server before every test to make sure server state
 # is invariant to previous test
-for TARGET in graphdef savedmodel onnx libtorch plan python; do
+for TARGET in graphdef savedmodel libtorch plan python; do
     rm -fr models && mkdir models && \
         cp -r all_models/${TARGET}_$MODEL_SUFFIX models/.
 
diff --git a/qa/L0_lifecycle/lifecycle_test.py b/qa/L0_lifecycle/lifecycle_test.py
index 9130d2ee02..1342158823 100755
--- a/qa/L0_lifecycle/lifecycle_test.py
+++ b/qa/L0_lifecycle/lifecycle_test.py
@@ -205,7 +205,7 @@ def test_parse_error_modelfail(self):
 
         # And other models should be loaded successfully
         try:
-            for base_name in ["savedmodel", "onnx"]:
+            for base_name in ["savedmodel", "libtorch"]:
                 for triton_client in (
                     httpclient.InferenceServerClient("localhost:8000", verbose=True),
                     grpcclient.InferenceServerClient("localhost:8001", verbose=True),
@@ -268,7 +268,7 @@ def test_parse_error_modelfail_nostrict(self):
 
         # And other models should be loaded successfully
         try:
-            for base_name in ["savedmodel", "onnx"]:
+            for base_name in ["savedmodel", "libtorch"]:
                 for triton_client in (
                     httpclient.InferenceServerClient("localhost:8000", verbose=True),
                     grpcclient.InferenceServerClient("localhost:8001", verbose=True),
@@ -324,7 +324,7 @@ def test_parse_error_no_model_config(self):
 
         # And other models should be loaded successfully
         try:
-            for base_name in ["savedmodel", "onnx"]:
+            for base_name in ["savedmodel", "libtorch"]:
                 model_name = tu.get_model_name(
                     base_name, np.float32, np.float32, np.float32
                 )
@@ -356,7 +356,7 @@ def test_init_error_modelfail(self):
                 self.assertFalse(triton_client.is_server_ready())
 
                 # one model uses sequence batcher while the other uses dynamic batcher
-                model_names = ["onnx_sequence_int32", "onnx_int32_int32_int32"]
+                model_names = ["libtorch_sequence_int32", "libtorch_int32_int32_int32"]
                 for model_name in model_names:
                     self.assertFalse(triton_client.is_model_ready(model_name))
 
@@ -365,7 +365,7 @@ def test_init_error_modelfail(self):
 
             # And other models should be loaded successfully
             try:
-                for base_name in ["graphdef", "savedmodel", "onnx"]:
+                for base_name in ["graphdef", "savedmodel", "libtorch"]:
                     model_name = tu.get_model_name(
                         base_name, np.float32, np.float32, np.float32
                     )
@@ -375,7 +375,7 @@ def test_init_error_modelfail(self):
 
         try:
             tensor_shape = (1, 16)
-            for base_name in ["graphdef", "savedmodel", "onnx"]:
+            for base_name in ["graphdef", "savedmodel", "libtorch"]:
                 iu.infer_exact(
                     self,
                     base_name,
@@ -411,7 +411,7 @@ def test_parse_error_model_no_version(self):
 
             # Sanity check that other models are loaded properly
             try:
-                for base_name in ["savedmodel", "onnx"]:
+                for base_name in ["savedmodel", "libtorch"]:
                     model_name = tu.get_model_name(
                         base_name, np.float32, np.float32, np.float32
                     )
@@ -425,7 +425,7 @@ def test_parse_error_model_no_version(self):
                 self.assertTrue(False, "unexpected error {}".format(ex))
 
         try:
-            for base_name in ["savedmodel", "onnx"]:
+            for base_name in ["savedmodel", "libtorch"]:
                 iu.infer_exact(
                     self,
                     base_name,
@@ -535,7 +535,9 @@ def test_dynamic_model_load_unload(self):
         savedmodel_name = tu.get_model_name(
             "savedmodel", np.float32, np.float32, np.float32
         )
-        onnx_name = tu.get_model_name("onnx", np.float32, np.float32, np.float32)
+        libtorch_name = tu.get_model_name(
+            "libtorch", np.float32, np.float32, np.float32
+        )
 
         # Make sure savedmodel model is not in the status (because
         # initially it is not in the model repository)
@@ -548,8 +550,8 @@ def test_dynamic_model_load_unload(self):
                 self.assertTrue(triton_client.is_server_ready())
                 self.assertFalse(triton_client.is_model_ready(savedmodel_name, "1"))
                 self.assertFalse(triton_client.is_model_ready(savedmodel_name, "3"))
-                self.assertTrue(triton_client.is_model_ready(onnx_name, "1"))
-                self.assertTrue(triton_client.is_model_ready(onnx_name, "3"))
+                self.assertTrue(triton_client.is_model_ready(libtorch_name, "1"))
+                self.assertTrue(triton_client.is_model_ready(libtorch_name, "3"))
             except Exception as ex:
                 self.assertTrue(False, "unexpected error {}".format(ex))
 
@@ -566,8 +568,8 @@ def test_dynamic_model_load_unload(self):
                 self.assertTrue(triton_client.is_server_ready())
                 self.assertTrue(triton_client.is_model_ready(savedmodel_name, "1"))
                 self.assertTrue(triton_client.is_model_ready(savedmodel_name, "3"))
-                self.assertTrue(triton_client.is_model_ready(onnx_name, "1"))
-                self.assertTrue(triton_client.is_model_ready(onnx_name, "3"))
+                self.assertTrue(triton_client.is_model_ready(libtorch_name, "1"))
+                self.assertTrue(triton_client.is_model_ready(libtorch_name, "3"))
         except Exception as ex:
             self.assertTrue(False, "unexpected error {}".format(ex))
 
@@ -642,8 +644,8 @@ def test_dynamic_model_load_unload(self):
                 self.assertTrue(triton_client.is_server_ready())
                 self.assertFalse(triton_client.is_model_ready(savedmodel_name, "1"))
                 self.assertFalse(triton_client.is_model_ready(savedmodel_name, "3"))
-                self.assertTrue(triton_client.is_model_ready(onnx_name, "1"))
-                self.assertTrue(triton_client.is_model_ready(onnx_name, "3"))
+                self.assertTrue(triton_client.is_model_ready(libtorch_name, "1"))
+                self.assertTrue(triton_client.is_model_ready(libtorch_name, "3"))
         except Exception as ex:
             self.assertTrue(False, "unexpected error {}".format(ex))
 
@@ -682,8 +684,8 @@ def test_dynamic_model_load_unload(self):
                 self.assertTrue(triton_client.is_server_ready())
                 self.assertTrue(triton_client.is_model_ready(savedmodel_name, "1"))
                 self.assertTrue(triton_client.is_model_ready(savedmodel_name, "3"))
-                self.assertTrue(triton_client.is_model_ready(onnx_name, "1"))
-                self.assertTrue(triton_client.is_model_ready(onnx_name, "3"))
+                self.assertTrue(triton_client.is_model_ready(libtorch_name, "1"))
+                self.assertTrue(triton_client.is_model_ready(libtorch_name, "3"))
 
             triton_client = httpclient.InferenceServerClient(
                 "localhost:8000", verbose=True
@@ -712,10 +714,10 @@ def test_dynamic_model_load_unload(self):
         except Exception as ex:
             self.assertTrue(False, "unexpected error {}".format(ex))
 
-        # Remove onnx model from the model repository and give it
+        # Remove libtorch model from the model repository and give it
         # time to unload. Make sure that it is unavailable.
         try:
-            shutil.rmtree("models/" + onnx_name)
+            shutil.rmtree("models/" + libtorch_name)
             time.sleep(5)  # wait for model to unload
             for triton_client in (
                 httpclient.InferenceServerClient("localhost:8000", verbose=True),
@@ -725,8 +727,8 @@ def test_dynamic_model_load_unload(self):
                 self.assertTrue(triton_client.is_server_ready())
                 self.assertTrue(triton_client.is_model_ready(savedmodel_name, "1"))
                 self.assertTrue(triton_client.is_model_ready(savedmodel_name, "3"))
-                self.assertFalse(triton_client.is_model_ready(onnx_name, "1"))
-                self.assertFalse(triton_client.is_model_ready(onnx_name, "3"))
+                self.assertFalse(triton_client.is_model_ready(libtorch_name, "1"))
+                self.assertFalse(triton_client.is_model_ready(libtorch_name, "3"))
         except Exception as ex:
             self.assertTrue(False, "unexpected error {}".format(ex))
 
@@ -734,7 +736,7 @@ def test_dynamic_model_load_unload(self):
         try:
             iu.infer_exact(
                 self,
-                "onnx",
+                "libtorch",
                 tensor_shape,
                 1,
                 np.float32,
@@ -742,10 +744,12 @@ def test_dynamic_model_load_unload(self):
                 np.float32,
                 swap=True,
             )
-            self.assertTrue(False, "expected error for unavailable model " + onnx_name)
+            self.assertTrue(
+                False, "expected error for unavailable model " + libtorch_name
+            )
         except Exception as ex:
             self.assertIn(
-                "Request for unknown model: 'onnx_float32_float32_float32' has no available versions",
+                "Request for unknown model: 'libtorch_float32_float32_float32' has no available versions",
                 ex.message(),
             )
 
@@ -754,7 +758,9 @@ def test_dynamic_model_load_unload_disabled(self):
         savedmodel_name = tu.get_model_name(
             "savedmodel", np.float32, np.float32, np.float32
         )
-        onnx_name = tu.get_model_name("onnx", np.float32, np.float32, np.float32)
+        libtorch_name = tu.get_model_name(
+            "libtorch", np.float32, np.float32, np.float32
+        )
 
         # Make sure savedmodel model is not in the status (because
         # initially it is not in the model repository)
@@ -767,8 +773,8 @@ def test_dynamic_model_load_unload_disabled(self):
                 self.assertTrue(triton_client.is_server_ready())
                 self.assertFalse(triton_client.is_model_ready(savedmodel_name, "1"))
                 self.assertFalse(triton_client.is_model_ready(savedmodel_name, "3"))
-                self.assertTrue(triton_client.is_model_ready(onnx_name, "1"))
-                self.assertTrue(triton_client.is_model_ready(onnx_name, "3"))
+                self.assertTrue(triton_client.is_model_ready(libtorch_name, "1"))
+                self.assertTrue(triton_client.is_model_ready(libtorch_name, "3"))
             except Exception as ex:
                 self.assertTrue(False, "unexpected error {}".format(ex))
 
@@ -785,8 +791,8 @@ def test_dynamic_model_load_unload_disabled(self):
                 self.assertTrue(triton_client.is_server_ready())
                 self.assertFalse(triton_client.is_model_ready(savedmodel_name, "1"))
                 self.assertFalse(triton_client.is_model_ready(savedmodel_name, "3"))
-                self.assertTrue(triton_client.is_model_ready(onnx_name, "1"))
-                self.assertTrue(triton_client.is_model_ready(onnx_name, "3"))
+                self.assertTrue(triton_client.is_model_ready(libtorch_name, "1"))
+                self.assertTrue(triton_client.is_model_ready(libtorch_name, "3"))
         except Exception as ex:
             self.assertTrue(False, "unexpected error {}".format(ex))
 
@@ -814,7 +820,7 @@ def test_dynamic_model_load_unload_disabled(self):
         # Remove one of the original models from the model repository.
         # Unloading is disabled so it should remain available in the status.
         try:
-            shutil.rmtree("models/" + onnx_name)
+            shutil.rmtree("models/" + libtorch_name)
             time.sleep(5)  # wait for model to unload (but it shouldn't)
             for triton_client in (
                 httpclient.InferenceServerClient("localhost:8000", verbose=True),
@@ -824,8 +830,8 @@ def test_dynamic_model_load_unload_disabled(self):
                 self.assertTrue(triton_client.is_server_ready())
                 self.assertFalse(triton_client.is_model_ready(savedmodel_name, "1"))
                 self.assertFalse(triton_client.is_model_ready(savedmodel_name, "3"))
-                self.assertTrue(triton_client.is_model_ready(onnx_name, "1"))
-                self.assertTrue(triton_client.is_model_ready(onnx_name, "3"))
+                self.assertTrue(triton_client.is_model_ready(libtorch_name, "1"))
+                self.assertTrue(triton_client.is_model_ready(libtorch_name, "3"))
         except Exception as ex:
             self.assertTrue(False, "unexpected error {}".format(ex))
 
@@ -834,7 +840,7 @@ def test_dynamic_model_load_unload_disabled(self):
         try:
             iu.infer_exact(
                 self,
-                "onnx",
+                "libtorch",
                 tensor_shape,
                 1,
                 np.float32,
@@ -1279,7 +1285,7 @@ def test_multiple_model_repository_polling(self):
             (1,),
             model_shape,
         )
-        self._infer_success_models(["graphdef", "onnx"], (1, 3), model_shape)
+        self._infer_success_models(["graphdef", "libtorch"], (1, 3), model_shape)
 
         # Add the savedmodel to the second model repository, should cause
         # it to be unloaded due to duplication
@@ -1297,7 +1303,7 @@ def test_multiple_model_repository_polling(self):
         except Exception as ex:
             self.assertTrue(False, "unexpected error {}".format(ex))
 
-        self._infer_success_models(["graphdef", "onnx"], (1, 3), model_shape)
+        self._infer_success_models(["graphdef", "libtorch"], (1, 3), model_shape)
 
         # Remove the savedmodel from the first model repository, the
         # model from the second model repository should be loaded
@@ -1306,7 +1312,7 @@ def test_multiple_model_repository_polling(self):
         shutil.rmtree("models/" + savedmodel_name)
         time.sleep(5)  # wait for model to unload
         self._infer_success_models(
-            ["savedmodel", "graphdef", "onnx"], (1, 3), model_shape
+            ["savedmodel", "graphdef", "libtorch"], (1, 3), model_shape
         )
 
     def test_multiple_model_repository_control(self):
@@ -1316,7 +1322,7 @@ def test_multiple_model_repository_control(self):
         savedmodel_name = tu.get_model_name(
             "savedmodel", np.float32, np.float32, np.float32
         )
-        model_bases = ["savedmodel", "graphdef", "onnx"]
+        model_bases = ["savedmodel", "graphdef", "libtorch"]
 
         # Initially models are not loaded
         for base in model_bases:
@@ -1353,7 +1359,7 @@ def test_multiple_model_repository_control(self):
             (1,),
             model_shape,
         )
-        self._infer_success_models(["graphdef", "onnx"], (1, 3), model_shape)
+        self._infer_success_models(["graphdef", "libtorch"], (1, 3), model_shape)
 
         # Add the savedmodel to the second model repository. Because
         # not polling this doesn't change any model state, all models
@@ -1366,7 +1372,7 @@ def test_multiple_model_repository_control(self):
             (1,),
             model_shape,
         )
-        self._infer_success_models(["graphdef", "onnx"], (1, 3), model_shape)
+        self._infer_success_models(["graphdef", "libtorch"], (1, 3), model_shape)
 
         # Load savedmodel again which should fail because it is now duplicated
         # in 2 model repositories. Use HTTP here.
@@ -1394,7 +1400,7 @@ def test_multiple_model_repository_control(self):
         except Exception as ex:
             self.assertTrue(False, "unexpected error {}".format(ex))
 
-        self._infer_success_models(["graphdef", "onnx"], (1, 3), model_shape)
+        self._infer_success_models(["graphdef", "libtorch"], (1, 3), model_shape)
 
         # Remove the savedmodel from the first model repository and
         # explicitly load savedmodel. The savedmodel from the second
@@ -1413,18 +1419,18 @@ def test_multiple_model_repository_control(self):
             self.assertIn("failed to load '{}'".format(savedmodel_name), ex.message())
 
         self._infer_success_models(
-            ["savedmodel", "graphdef", "onnx"], (1, 3), model_shape
+            ["savedmodel", "graphdef", "libtorch"], (1, 3), model_shape
         )
 
     def test_model_control(self):
         model_shape = (1, 16)
-        onnx_name = tu.get_model_name("onnx", np.float32, np.float32, np.float32)
+        libtorch_name = tu.get_model_name("plan", np.float32, np.float32, np.float32)
 
         ensemble_prefix = "simple_"
-        ensemble_name = ensemble_prefix + onnx_name
+        ensemble_name = ensemble_prefix + libtorch_name
 
         # Make sure no models are loaded
-        for model_name in (onnx_name, ensemble_name):
+        for model_name in (libtorch_name, ensemble_name):
             try:
                 for triton_client in (
                     httpclient.InferenceServerClient("localhost:8000", verbose=True),
@@ -1462,44 +1468,44 @@ def test_model_control(self):
 
         self._infer_success_models(
             [
-                "onnx",
+                "plan",
             ],
             (1, 3),
             model_shape,
         )
         self._infer_success_models(
             [
-                "simple_onnx",
+                "simple_plan",
             ],
             (1, 3),
             model_shape,
             swap=True,
         )
 
-        # Delete model configuration for onnx, which will cause
+        # Delete model configuration for libtorch, which will cause
         # the autofiller to use the latest version policy so that only
         # version 3 will be available if the models are re-loaded
-        for model_name in (onnx_name,):
+        for model_name in (libtorch_name,):
             os.remove("models/" + model_name + "/config.pbtxt")
 
         self._infer_success_models(
             [
-                "onnx",
+                "plan",
             ],
             (1, 3),
             model_shape,
         )
         self._infer_success_models(
             [
-                "simple_onnx",
+                "simple_plan",
             ],
             (1, 3),
             model_shape,
             swap=True,
         )
 
-        # Reload models, only version 3 should be available for onnx
-        for model_name in (onnx_name, ensemble_name):
+        # Reload models, only version 3 should be available for libtorch
+        for model_name in (libtorch_name, ensemble_name):
             try:
                 triton_client = grpcclient.InferenceServerClient(
                     "localhost:8001", verbose=True
@@ -1510,21 +1516,21 @@ def test_model_control(self):
 
         self._infer_success_models(
             [
-                "onnx",
+                "plan",
             ],
             (3,),
             model_shape,
         )
         self._infer_success_models(
             [
-                "simple_onnx",
+                "simple_plan",
             ],
             (1, 3),
             model_shape,
             swap=True,
         )
 
-        for model_name in (onnx_name,):
+        for model_name in (libtorch_name,):
             try:
                 for triton_client in (
                     httpclient.InferenceServerClient("localhost:8000", verbose=True),
@@ -1552,11 +1558,11 @@ def test_model_control(self):
             triton_client = httpclient.InferenceServerClient(
                 "localhost:8000", verbose=True
             )
-            triton_client.unload_model(onnx_name)
+            triton_client.unload_model(libtorch_name)
         except Exception as ex:
             self.assertTrue(False, "unexpected error {}".format(ex))
 
-        for model_name in (onnx_name, ensemble_name):
+        for model_name in (libtorch_name, ensemble_name):
             try:
                 for triton_client in (
                     httpclient.InferenceServerClient("localhost:8000", verbose=True),
@@ -1577,13 +1583,13 @@ def test_model_control(self):
                 "localhost:8000", verbose=True
             )
             triton_client.unload_model(ensemble_name)
-            triton_client.load_model(onnx_name)
+            triton_client.load_model(libtorch_name)
         except Exception as ex:
             self.assertTrue(False, "unexpected error {}".format(ex))
 
         self._infer_success_models(
             [
-                "onnx",
+                "plan",
             ],
             (3,),
             model_shape,
@@ -1602,7 +1608,7 @@ def test_model_control(self):
             self.assertTrue(False, "unexpected error {}".format(ex))
 
     def test_model_control_fail(self):
-        model_name = tu.get_model_name("onnx", np.float32, np.float32, np.float32)
+        model_name = tu.get_model_name("plan", np.float32, np.float32, np.float32)
 
         # Make sure no models are loaded
         try:
@@ -1639,13 +1645,15 @@ def test_model_control_fail(self):
 
     def test_model_control_ensemble(self):
         model_shape = (1, 16)
-        onnx_name = tu.get_model_name("onnx", np.float32, np.float32, np.float32)
+        libtorch_name = tu.get_model_name(
+            "savedmodel", np.float32, np.float32, np.float32
+        )
 
         ensemble_prefix = "simple_"
-        ensemble_name = ensemble_prefix + onnx_name
+        ensemble_name = ensemble_prefix + libtorch_name
 
         # Make sure no models are loaded
-        for model_name in (onnx_name, ensemble_name):
+        for model_name in (libtorch_name, ensemble_name):
             try:
                 for triton_client in (
                     httpclient.InferenceServerClient("localhost:8000", verbose=True),
@@ -1669,14 +1677,14 @@ def test_model_control_ensemble(self):
 
         self._infer_success_models(
             [
-                "onnx",
+                "savedmodel",
             ],
             (1, 3),
             model_shape,
         )
         self._infer_success_models(
             [
-                "simple_onnx",
+                "simple_savedmodel",
             ],
             (1, 3),
             model_shape,
@@ -1691,7 +1699,7 @@ def test_model_control_ensemble(self):
             triton_client.unload_model(ensemble_name, unload_dependents=True)
         except Exception as ex:
             self.assertTrue(False, "unexpected error {}".format(ex))
-        for model_name in (onnx_name, ensemble_name):
+        for model_name in (libtorch_name, ensemble_name):
             try:
                 for triton_client in (
                     httpclient.InferenceServerClient("localhost:8000", verbose=True),
@@ -1717,7 +1725,7 @@ def test_model_control_ensemble(self):
 
         self._infer_success_models(
             [
-                "onnx",
+                "savedmodel",
             ],
             (1, 3),
             model_shape,
@@ -1732,8 +1740,8 @@ def test_model_control_ensemble(self):
                 self.assertTrue(triton_client.is_server_ready())
                 self.assertFalse(triton_client.is_model_ready(ensemble_name, "1"))
                 self.assertFalse(triton_client.is_model_ready(ensemble_name, "3"))
-                self.assertTrue(triton_client.is_model_ready(onnx_name, "1"))
-                self.assertTrue(triton_client.is_model_ready(onnx_name, "3"))
+                self.assertTrue(triton_client.is_model_ready(libtorch_name, "1"))
+                self.assertTrue(triton_client.is_model_ready(libtorch_name, "3"))
         except Exception as ex:
             self.assertTrue(False, "unexpected error {}".format(ex))
 
@@ -2030,11 +2038,13 @@ def test_model_reload_fail(self):
 
     def test_multiple_model_repository_control_startup_models(self):
         model_shape = (1, 16)
-        onnx_name = tu.get_model_name("onnx", np.float32, np.float32, np.float32)
+        graphdef_name = tu.get_model_name(
+            "graphdef", np.float32, np.float32, np.float32
+        )
         plan_name = tu.get_model_name("plan", np.float32, np.float32, np.float32)
 
         ensemble_prefix = "simple_"
-        onnx_ensemble_name = ensemble_prefix + onnx_name
+        graphdef_ensemble_name = ensemble_prefix + graphdef_name
         plan_ensemble_name = ensemble_prefix + plan_name
 
         # Make sure unloaded models are not in the status
@@ -2055,14 +2065,14 @@ def test_multiple_model_repository_control_startup_models(self):
         # And loaded models work properly
         self._infer_success_models(
             [
-                "onnx",
+                "graphdef",
             ],
             (1, 3),
             model_shape,
         )
         self._infer_success_models(
             [
-                "simple_onnx",
+                "simple_graphdef",
             ],
             (1, 3),
             model_shape,
@@ -2119,43 +2129,43 @@ def test_multiple_model_repository_control_startup_models(self):
         # Delete model configuration, which will cause the autofiller
         # to use the latest version policy so that only version 3 will
         # be available if the models are re-loaded
-        os.remove("models/" + onnx_name + "/config.pbtxt")
+        os.remove("models/" + plan_name + "/config.pbtxt")
 
         self._infer_success_models(
             [
-                "plan",
+                "graphdef",
             ],
             (1, 3),
             model_shape,
         )
         self._infer_success_models(
             [
-                "simple_plan",
+                "simple_graphdef",
             ],
             (1, 3),
             model_shape,
             swap=True,
         )
 
-        # Reload onnx, only version 3 should be available
+        # Reload plan model, only version 3 should be available
         try:
             triton_client = grpcclient.InferenceServerClient(
                 "localhost:8001", verbose=True
             )
-            triton_client.load_model(onnx_name)
+            triton_client.load_model(plan_name)
         except Exception as ex:
             self.assertTrue(False, "unexpected error {}".format(ex))
 
         self._infer_success_models(
             [
-                "onnx",
+                "plan",
             ],
             (3,),
             model_shape,
         )
         self._infer_success_models(
             [
-                "simple_onnx",
+                "simple_plan",
             ],
             (1, 3),
             model_shape,
@@ -2169,7 +2179,7 @@ def test_multiple_model_repository_control_startup_models(self):
             ):
                 self.assertTrue(triton_client.is_server_live())
                 self.assertTrue(triton_client.is_server_ready())
-                self.assertFalse(triton_client.is_model_ready(onnx_name, "1"))
+                self.assertFalse(triton_client.is_model_ready(plan_name, "1"))
         except Exception as ex:
             self.assertTrue(False, "unexpected error {}".format(ex))
 
@@ -2183,17 +2193,17 @@ def test_multiple_model_repository_control_startup_models(self):
             except Exception as ex:
                 self.assertTrue(False, "unexpected error {}".format(ex))
 
-        # Unload the onnx, as side effect, the ensemble model
+        # Unload the plan, as side effect, the ensemble model
         # will be forced to be unloaded
         try:
             triton_client = httpclient.InferenceServerClient(
                 "localhost:8000", verbose=True
             )
-            triton_client.unload_model(onnx_name)
+            triton_client.unload_model(plan_name)
         except Exception as ex:
             self.assertTrue(False, "unexpected error {}".format(ex))
 
-        for model_name in [onnx_name, onnx_ensemble_name]:
+        for model_name in [plan_name, plan_ensemble_name]:
             try:
                 for triton_client in (
                     httpclient.InferenceServerClient("localhost:8000", verbose=True),
@@ -2206,35 +2216,35 @@ def test_multiple_model_repository_control_startup_models(self):
             except Exception as ex:
                 self.assertTrue(False, "unexpected error {}".format(ex))
 
-        # Explicitly unload the onnx ensemble and load the
+        # Explicitly unload the plan ensemble and load the
         # depending model. The ensemble model should not be reloaded
         # because it was explicitly unloaded.
         try:
             triton_client = httpclient.InferenceServerClient(
                 "localhost:8000", verbose=True
             )
-            triton_client.unload_model(onnx_ensemble_name)
-            triton_client.load_model(onnx_name)
+            triton_client.unload_model(plan_ensemble_name)
+            triton_client.load_model(plan_name)
         except Exception as ex:
             self.assertTrue(False, "unexpected error {}".format(ex))
 
         self._infer_success_models(
             [
-                "onnx",
+                "plan",
             ],
             (3,),
             model_shape,
         )
         self._infer_success_models(
             [
-                "plan",
+                "graphdef",
             ],
             (1, 3),
             model_shape,
         )
         self._infer_success_models(
             [
-                "simple_plan",
+                "simple_graphdef",
             ],
             (1, 3),
             model_shape,
@@ -2248,8 +2258,8 @@ def test_multiple_model_repository_control_startup_models(self):
             ):
                 self.assertTrue(triton_client.is_server_live())
                 self.assertTrue(triton_client.is_server_ready())
-                self.assertFalse(triton_client.is_model_ready(onnx_ensemble_name, "1"))
-                self.assertFalse(triton_client.is_model_ready(onnx_ensemble_name, "3"))
+                self.assertFalse(triton_client.is_model_ready(plan_ensemble_name, "1"))
+                self.assertFalse(triton_client.is_model_ready(plan_ensemble_name, "3"))
         except Exception as ex:
             self.assertTrue(False, "unexpected error {}".format(ex))
 
@@ -2280,7 +2290,7 @@ def test_model_repository_index(self):
                 self.assertTrue(False, "unexpected error {}".format(ex))
 
         # Check model repository index
-        # All models should be in ready state except onnx_float32_float32_float32
+        # All models should be in ready state except libtorch_float32_float32_float32
         # which appears in two repositories.
         model_bases.append("simple_graphdef")
         try:
@@ -2292,7 +2302,7 @@ def test_model_repository_index(self):
             self.assertEqual(len(index), 8)
             for i in index:
                 indexed.append(i["name"])
-                if i["name"] == "onnx_float32_float32_float32":
+                if i["name"] == "libtorch_float32_float32_float32":
                     self.assertEqual(i["state"], "UNAVAILABLE")
                     self.assertEqual(
                         i["reason"], "model appears in two or more repositories"
@@ -2311,7 +2321,7 @@ def test_model_repository_index(self):
             self.assertEqual(len(index.models), 8)
             for i in index.models:
                 indexed.append(i.name)
-                if i.name == "onnx_float32_float32_float32":
+                if i.name == "libtorch_float32_float32_float32":
                     self.assertEqual(i.state, "UNAVAILABLE")
                     self.assertEqual(
                         i.reason, "model appears in two or more repositories"
@@ -2332,7 +2342,7 @@ def test_config_override(self):
             httpclient.InferenceServerClient("localhost:8000", verbose=True),
             grpcclient.InferenceServerClient("localhost:8001", verbose=True),
         ):
-            for base in (("onnx", "onnxruntime"),):
+            for base in (("plan", "tensorrt"),):
                 model_name = tu.get_model_name(
                     base[0], np.float32, np.float32, np.float32
                 )
@@ -2404,7 +2414,7 @@ def test_file_override(self):
         model_shape = (1, 16)
         override_base = "override_model"
 
-        for base in (("onnx", "onnxruntime"),):
+        for base in (("plan", "tensorrt"),):
             model_name = tu.get_model_name(base[0], np.float32, np.float32, np.float32)
             override_model_name = tu.get_model_name(
                 override_base, np.float32, np.float32, np.float32
@@ -2432,7 +2442,7 @@ def test_file_override(self):
                 # not be used.
                 try:
                     triton_client.load_model(
-                        model_name, files={"file:1/model.onnx": file_content}
+                        model_name, files={"file:1/model.plan": file_content}
                     )
                     self.assertTrue(False, "expected error on missing override config")
                 except InferenceServerException as ex:
@@ -2464,7 +2474,7 @@ def test_file_override(self):
                     triton_client.load_model(
                         override_model_name,
                         config="""{{"backend":"{backend}" }}""".format(backend=base[1]),
-                        files={"file:1/model.onnx": file_content},
+                        files={"file:1/model.plan": file_content},
                     )
                 except Exception as ex:
                     self.assertTrue(False, "unexpected error {}".format(ex))
@@ -2501,7 +2511,7 @@ def test_file_override(self):
                     triton_client.load_model(
                         model_name,
                         config="""{{"backend":"{backend}" }}""".format(backend=base[1]),
-                        files={"file:1/model.onnx": file_content},
+                        files={"file:1/model.plan": file_content},
                     )
                 except Exception as ex:
                     self.assertTrue(False, "unexpected error {}".format(ex))
diff --git a/qa/L0_lifecycle/test.sh b/qa/L0_lifecycle/test.sh
index 4476a5db32..564f89fad1 100755
--- a/qa/L0_lifecycle/test.sh
+++ b/qa/L0_lifecycle/test.sh
@@ -96,7 +96,6 @@ kill $SERVER_PID
 wait $SERVER_PID
 
 LOG_IDX=$((LOG_IDX+1))
-
 # LifeCycleTest.test_parse_error_noexit
 SERVER_ARGS="--model-repository=/tmp/xyzx --strict-readiness=false \
              --exit-on-error=false"
@@ -336,7 +335,7 @@ mkdir models models_0
 for i in graphdef savedmodel ; do
     cp -r $DATADIR/qa_model_repository/${i}_float32_float32_float32 models/.
 done
-for i in onnx plan ; do
+for i in plan libtorch ; do
     cp -r $DATADIR/qa_model_repository/${i}_float32_float32_float32 models_0/.
 done
 # Change the model files so that multiple versions will be loaded, and one of
@@ -398,7 +397,7 @@ mkdir models models_0
 for i in graphdef savedmodel ; do
     cp -r $DATADIR/qa_model_repository/${i}_float32_float32_float32 models/.
 done
-for i in onnx plan ; do
+for i in plan libtorch ; do
     cp -r $DATADIR/qa_model_repository/${i}_float32_float32_float32 models_0/.
 done
 rm models/graphdef_float32_float32_float32/config.pbtxt
@@ -439,14 +438,14 @@ LOG_IDX=$((LOG_IDX+1))
 # LifeCycleTest.test_init_error_modelfail
 rm -fr models models_0
 mkdir models models_0
-cp -r $DATADIR/qa_sequence_model_repository/onnx_sequence_int32 models/.
-cp -r $DATADIR/qa_model_repository/onnx_int32_int32_int32 models_0/.
-sed -i "s/OUTPUT/_OUTPUT/" models/onnx_sequence_int32/config.pbtxt
-sed -i "s/OUTPUT/_OUTPUT/" models_0/onnx_int32_int32_int32/config.pbtxt
-for i in graphdef savedmodel; do
+cp -r $DATADIR/qa_sequence_model_repository/savedmodel_sequence_int32 models/.
+cp -r $DATADIR/qa_model_repository/savedmodel_int32_int32_int32 models_0/.
+sed -i "s/OUTPUT/_OUTPUT/" models/savedmodel_sequence_int32/config.pbtxt
+sed -i "s/OUTPUT/_OUTPUT/" models_0/savedmodel_int32_int32_int32/config.pbtxt
+for i in graphdef libtorch; do
     cp -r $DATADIR/qa_model_repository/${i}_float32_float32_float32 models/.
 done
-for i in onnx ; do
+for i in savedmodel ; do
     cp -r $DATADIR/qa_model_repository/${i}_float32_float32_float32 models_0/.
 done
 
@@ -477,7 +476,7 @@ LOG_IDX=$((LOG_IDX+1))
 # LifeCycleTest.test_parse_error_model_no_version
 rm -fr models
 mkdir models
-for i in savedmodel onnx plan ; do
+for i in savedmodel libtorch plan ; do
     cp -r $DATADIR/qa_model_repository/${i}_float32_float32_float32 models/.
 done
 mkdir -p models/graphdef_float32_float32_float32
@@ -581,7 +580,7 @@ LOG_IDX=$((LOG_IDX+1))
 # LifeCycleTest.test_dynamic_model_load_unload
 rm -fr models savedmodel_float32_float32_float32
 mkdir models
-for i in graphdef onnx plan ; do
+for i in graphdef libtorch plan ; do
     cp -r $DATADIR/qa_model_repository/${i}_float32_float32_float32 models/.
 done
 cp -r $DATADIR/qa_model_repository/savedmodel_float32_float32_float32 .
@@ -610,7 +609,7 @@ LOG_IDX=$((LOG_IDX+1))
 # LifeCycleTest.test_dynamic_model_load_unload_disabled
 rm -fr models savedmodel_float32_float32_float32
 mkdir models
-for i in graphdef onnx plan; do
+for i in graphdef libtorch plan; do
     cp -r $DATADIR/qa_model_repository/${i}_float32_float32_float32 models/.
 done
 cp -r $DATADIR/qa_model_repository/savedmodel_float32_float32_float32 .
@@ -762,7 +761,7 @@ mkdir models models_0
 for i in graphdef ; do
     cp -r $DATADIR/qa_model_repository/${i}_float32_float32_float32 models/.
 done
-for i in onnx ; do
+for i in libtorch ; do
     cp -r $DATADIR/qa_model_repository/${i}_float32_float32_float32 models_0/.
 done
 cp -r $DATADIR/qa_model_repository/savedmodel_float32_float32_float32 .
@@ -796,7 +795,7 @@ mkdir models models_0
 for i in graphdef ; do
     cp -r $DATADIR/qa_model_repository/${i}_float32_float32_float32 models/.
 done
-for i in onnx ; do
+for i in libtorch ; do
     cp -r $DATADIR/qa_model_repository/${i}_float32_float32_float32 models_0/.
 done
 cp -r $DATADIR/qa_model_repository/savedmodel_float32_float32_float32 .
@@ -829,7 +828,7 @@ LOG_IDX=$((LOG_IDX+1))
 # LifeCycleTest.test_model_control
 rm -fr models config.pbtxt.*
 mkdir models
-for i in onnx ; do
+for i in plan ; do
     cp -r $DATADIR/qa_model_repository/${i}_float32_float32_float32 models/.
     cp -r $DATADIR/qa_ensemble_model_repository/qa_model_repository/simple_${i}_float32_float32_float32 models/.
     sed -i "s/max_batch_size:.*/max_batch_size: 1/" models/${i}_float32_float32_float32/config.pbtxt
@@ -861,7 +860,7 @@ LOG_IDX=$((LOG_IDX+1))
 # LifeCycleTest.test_model_control_fail
 rm -fr models config.pbtxt.*
 mkdir models
-for i in onnx ; do
+for i in plan ; do
     cp -r $DATADIR/qa_model_repository/${i}_float32_float32_float32 models/.
     # Remove all model files so the model will fail to load
     rm models/${i}_float32_float32_float32/*/*
@@ -893,7 +892,7 @@ LOG_IDX=$((LOG_IDX+1))
 # LifeCycleTest.test_model_control_ensemble
 rm -fr models config.pbtxt.*
 mkdir models
-for i in onnx ; do
+for i in savedmodel ; do
     cp -r $DATADIR/qa_model_repository/${i}_float32_float32_float32 models/.
     cp -r $DATADIR/qa_ensemble_model_repository/qa_model_repository/simple_${i}_float32_float32_float32 models/.
     sed -i "s/max_batch_size:.*/max_batch_size: 1/" models/${i}_float32_float32_float32/config.pbtxt
@@ -926,7 +925,7 @@ LOG_IDX=$((LOG_IDX+1))
 rm -fr models models_0 config.pbtxt.*
 mkdir models models_0
 # Ensemble models in the second repository
-for i in plan onnx ; do
+for i in plan graphdef ; do
     cp -r $DATADIR/qa_model_repository/${i}_float32_float32_float32 models/.
     cp -r $DATADIR/qa_ensemble_model_repository/qa_model_repository/simple_${i}_float32_float32_float32 models_0/.
     sed -i "s/max_batch_size:.*/max_batch_size: 1/" models/${i}_float32_float32_float32/config.pbtxt
@@ -945,7 +944,7 @@ SERVER_ARGS="--model-repository=`pwd`/models --model-repository=`pwd`/models_0 \
              --strict-model-config=false --exit-on-error=false \
              --load-model=savedmodel_float32_float32_float32 \
              --load-model=plan_float32_float32_float32 \
-             --load-model=simple_onnx_float32_float32_float32"
+             --load-model=simple_graphdef_float32_float32_float32"
 SERVER_LOG="./inference_server_$LOG_IDX.log"
 run_server
 if [ "$SERVER_PID" == "0" ]; then
@@ -971,7 +970,7 @@ LOG_IDX=$((LOG_IDX+1))
 rm -fr models models_0 config.pbtxt.*
 mkdir models models_0
 # Ensemble models in the second repository
-for i in plan onnx ; do
+for i in plan graphdef ; do
     cp -r $DATADIR/qa_model_repository/${i}_float32_float32_float32 models/.
     cp -r $DATADIR/qa_ensemble_model_repository/qa_model_repository/simple_${i}_float32_float32_float32 models_0/.
     sed -i "s/max_batch_size:.*/max_batch_size: 1/" models/${i}_float32_float32_float32/config.pbtxt
@@ -1012,7 +1011,7 @@ LOG_IDX=$((LOG_IDX+1))
 # an additional --load-model argument, it should fail
 rm -fr models
 mkdir models
-for i in onnx ; do
+for i in plan ; do
     cp -r $DATADIR/qa_model_repository/${i}_float32_float32_float32 models/.
     sed -i "s/max_batch_size:.*/max_batch_size: 1/" models/${i}_float32_float32_float32/config.pbtxt
 done
@@ -1024,7 +1023,7 @@ SERVER_ARGS="--model-repository=`pwd`/models --model-repository=`pwd`/models_0 \
              --strict-readiness=true \
              --exit-on-error=true \
              --load-model=* \
-             --load-model=onnx_float32_float32_float32"
+             --load-model=plan_float32_float32_float32"
 SERVER_LOG="./inference_server_$LOG_IDX.log"
 run_server
 if [ "$SERVER_PID" != "0" ]; then
@@ -1057,6 +1056,7 @@ if [ "$SERVER_PID" != "0" ]; then
     kill $SERVER_PID
     wait $SERVER_PID
 fi
+
 # check server log for the error messages to make sure they're printed
 if [ `grep -c "model not found in any model repository" $SERVER_LOG` == "0" ]; then
     echo -e "\n***\n*** Server log ${SERVER_LOG} did not print model load failure for non-existent model\n***"
@@ -1075,8 +1075,8 @@ for i in graphdef savedmodel ; do
     cp -r $DATADIR/qa_ensemble_model_repository/qa_model_repository/simple_${i}_float32_float32_float32 models_0/.
 done
 
-# onnx doesn't load because it is duplicated in 2 repositories
-for i in onnx ; do
+# libtorch doesn't load because it is duplicated in 2 repositories
+for i in libtorch ; do
     cp -r $DATADIR/qa_model_repository/${i}_float32_float32_float32 models/.
     cp -r $DATADIR/qa_model_repository/${i}_float32_float32_float32 models_0/.
 done
@@ -1085,7 +1085,7 @@ SERVER_ARGS="--model-repository=`pwd`/models --model-repository=`pwd`/models_0 \
              --model-control-mode=explicit \
              --strict-readiness=false \
              --strict-model-config=false --exit-on-error=false \
-             --load-model=onnx_float32_float32_float32 \
+             --load-model=libtorch_float32_float32_float32 \
              --load-model=graphdef_float32_float32_float32 \
              --load-model=simple_savedmodel_float32_float32_float32"
 SERVER_LOG="./inference_server_$LOG_IDX.log"
@@ -1369,7 +1369,7 @@ done
 # Send HTTP request to control endpoint
 rm -fr models config.pbtxt.*
 mkdir models
-for i in graphdef savedmodel onnx plan ; do
+for i in graphdef savedmodel libtorch plan ; do
     cp -r $DATADIR/qa_model_repository/${i}_float32_float32_float32 models/.
 done
 
@@ -1480,10 +1480,10 @@ LOG_IDX=$((LOG_IDX+1))
 # LifeCycleTest.test_config_override
 rm -fr models config.pbtxt.*
 mkdir models
-cp -r $DATADIR/qa_model_repository/onnx_float32_float32_float32 models/.
+cp -r $DATADIR/qa_model_repository/plan_float32_float32_float32 models/.
 # Make only version 2 is valid version directory while config requests 1, 3
-rm models/onnx_float32_float32_float32/1/*
-rm models/onnx_float32_float32_float32/3/*
+rm models/plan_float32_float32_float32/1/*
+rm models/plan_float32_float32_float32/3/*
 
 SERVER_ARGS="--model-repository=`pwd`/models --model-repository=`pwd`/models \
              --model-control-mode=explicit \
@@ -1512,14 +1512,14 @@ LOG_IDX=$((LOG_IDX+1))
 # LifeCycleTest.test_file_override
 rm -fr models config.pbtxt.*
 mkdir models
-cp -r $DATADIR/qa_model_repository/onnx_float32_float32_float32 models/.
+cp -r $DATADIR/qa_model_repository/plan_float32_float32_float32 models/.
 # Make only version 2, 3 is valid version directory while config requests 1, 3
-rm -rf models/onnx_float32_float32_float32/1
+rm -rf models/plan_float32_float32_float32/1
 
-# Start with EXPLICIT mode and load onnx_float32_float32_float32
+# Start with EXPLICIT mode and load plan_float32_float32_float32
 SERVER_ARGS="--model-repository=`pwd`/models \
              --model-control-mode=explicit \
-             --load-model=onnx_float32_float32_float32 \
+             --load-model=plan_float32_float32_float32 \
              --strict-model-config=false"
 SERVER_LOG="./inference_server_$LOG_IDX.log"
 run_server
@@ -1905,7 +1905,7 @@ LOG_IDX=$((LOG_IDX+1))
 rm -rf models
 mkdir models
 # Sanity check loading multiple instances in parallel for each supported backend
-PARALLEL_BACKENDS="python onnx"
+PARALLEL_BACKENDS="python"
 for backend in ${PARALLEL_BACKENDS} ; do
     model="${backend}_float32_float32_float32"
     model_dir="models/${model}"
diff --git a/qa/L0_logging/test.sh b/qa/L0_logging/test.sh
index 160bffe3dd..0f12166ecd 100755
--- a/qa/L0_logging/test.sh
+++ b/qa/L0_logging/test.sh
@@ -50,7 +50,7 @@ fi
 export CUDA_VISIBLE_DEVICES=0
 
 DATADIR=/data/inferenceserver/${REPO_VERSION}/qa_model_repository
-MODELBASE=onnx_int32_int32_int32
+MODELBASE=savedmodel_int32_int32_int32
 
 MODELSDIR=`pwd`/log_models
 
diff --git a/qa/L0_long_running_stress/test.sh b/qa/L0_long_running_stress/test.sh
index b98a89f955..83d8b0bf3a 100755
--- a/qa/L0_long_running_stress/test.sh
+++ b/qa/L0_long_running_stress/test.sh
@@ -63,7 +63,7 @@ fi
 RET=0
 
 # If BACKENDS not specified, set to all
-BACKENDS=${BACKENDS:="graphdef savedmodel onnx libtorch"}
+BACKENDS=${BACKENDS:="graphdef savedmodel libtorch"}
 export BACKENDS
 
 export CI_JOB_ID=${CI_JOB_ID}
@@ -137,6 +137,8 @@ cp -r $DATADIR/tf_model_store/resnet_v1_50_graphdef $MODEL_DIR/resnet_v1_50_grap
     sed -i 's/^name: "resnet_v1_50_graphdef"/name: "resnet_v1_50_graphdef_def"/' config.pbtxt && \
     echo "optimization { }" >> config.pbtxt)
 
+rm `find $MODEL_DIR/ -name '*onnx*'` -rf
+
 SERVER_ARGS="--model-repository=`pwd`/$MODEL_DIR"
 SERVER_LOG="./server.log"
 run_server
diff --git a/qa/L0_memory_growth/test.sh b/qa/L0_memory_growth/test.sh
index 64277e6b6e..f53db17d8b 100755
--- a/qa/L0_memory_growth/test.sh
+++ b/qa/L0_memory_growth/test.sh
@@ -102,6 +102,7 @@ export MAX_ALLOWED_ALLOC="100"
 # Create local model repository
 mkdir -p models/
 cp -r $DATADIR/perf_model_store/resnet50* models/
+rm -rf models/resnet50_fp32_onnx
 
 # Copy and prepare trt model
 cp -r $DATADIR/caffe_models/trt_model_store/resnet50_plan models/resnet50_fp16_plan
diff --git a/qa/L0_model_config/autofill_noplatform/onnx/bad_input_dims/1/model.onnx b/qa/L0_model_config/autofill_noplatform/onnx/bad_input_dims/1/model.onnx
deleted file mode 100644
index b352d3225f..0000000000
--- a/qa/L0_model_config/autofill_noplatform/onnx/bad_input_dims/1/model.onnx
+++ /dev/null
@@ -1,33 +0,0 @@
-TRTIS:�
-
-INPUT0_INPUT0"Identity
-
-INPUT1_INPUT1"Identity
-
-_INPUT0
-_INPUT1CAST0"Add
-
-_INPUT0
-_INPUT1CAST1"Sub
-!
-CAST0OUTPUT0"Cast*	
-to�
-!
-CAST1OUTPUT1"Cast*	
-to�onnx_int32_int8_int8Z
-INPUT0
-
-var_0
-Z
-INPUT1
-
-var_0
-b
-OUTPUT0
-
-var_1
-b
-OUTPUT1
-
-var_2
-B
diff --git a/qa/L0_model_config/autofill_noplatform/onnx/bad_input_dims/config.pbtxt b/qa/L0_model_config/autofill_noplatform/onnx/bad_input_dims/config.pbtxt
deleted file mode 100644
index b393fb4e00..0000000000
--- a/qa/L0_model_config/autofill_noplatform/onnx/bad_input_dims/config.pbtxt
+++ /dev/null
@@ -1,25 +0,0 @@
-max_batch_size: 1
-input [
-  {
-    name: "INPUT0"
-    data_type: TYPE_INT32
-    dims: [ 16, 1 ]
-  },
-  {
-    name: "INPUT1"
-    data_type: TYPE_INT32
-    dims: [ 16 ]
-  }
-]
-output [
-  {
-    name: "OUTPUT0"
-    data_type: TYPE_INT8
-    dims: [ 16 ]
-  },
-  {
-    name: "OUTPUT1"
-    data_type: TYPE_INT8
-    dims: [ 16 ]
-  }
-]
diff --git a/qa/L0_model_config/autofill_noplatform/onnx/bad_input_dims/expected b/qa/L0_model_config/autofill_noplatform/onnx/bad_input_dims/expected
deleted file mode 100644
index 52d579417e..0000000000
--- a/qa/L0_model_config/autofill_noplatform/onnx/bad_input_dims/expected
+++ /dev/null
@@ -1 +0,0 @@
-model 'bad_input_dims', tensor 'INPUT0': the model expects 2 dimensions (shape \[-1,16\]) but the model configuration specifies 3 dimensions (an initial batch dimension because max_batch_size > 0 followed by the explicit tensor shape, making complete shape \[-1,16,1\])
\ No newline at end of file
diff --git a/qa/L0_model_config/autofill_noplatform/onnx/bad_max_batch_size/1/model.onnx b/qa/L0_model_config/autofill_noplatform/onnx/bad_max_batch_size/1/model.onnx
deleted file mode 100644
index c9f6a92bc7..0000000000
--- a/qa/L0_model_config/autofill_noplatform/onnx/bad_max_batch_size/1/model.onnx
+++ /dev/null
@@ -1,33 +0,0 @@
-triton:�
-
-INPUT0_INPUT0"Identity
-
-INPUT1_INPUT1"Identity
-
-_INPUT0
-_INPUT1CAST0"Add
-
-_INPUT0
-_INPUT1CAST1"Sub
-!
-CAST0OUTPUT0"Cast*	
-to�
-!
-CAST1OUTPUT1"Cast*	
-to�onnx_nobatch_int32_int8_int8Z
-INPUT0
-
-
-Z
-INPUT1
-
-
-b
-OUTPUT0
-
-
-b
-OUTPUT1
-
-
-B
\ No newline at end of file
diff --git a/qa/L0_model_config/autofill_noplatform/onnx/bad_max_batch_size/config.pbtxt b/qa/L0_model_config/autofill_noplatform/onnx/bad_max_batch_size/config.pbtxt
deleted file mode 100644
index 7d4be73dbb..0000000000
--- a/qa/L0_model_config/autofill_noplatform/onnx/bad_max_batch_size/config.pbtxt
+++ /dev/null
@@ -1,13 +0,0 @@
-max_batch_size: 1
-input [
-  {
-    name: "INPUT0"
-    data_type: TYPE_INT32
-    dims: [ 16 ]
-  },
-  {
-    name: "INPUT1"
-    data_type: TYPE_INT32
-    dims: [ 16 ]
-  }
-]
diff --git a/qa/L0_model_config/autofill_noplatform/onnx/bad_max_batch_size/expected b/qa/L0_model_config/autofill_noplatform/onnx/bad_max_batch_size/expected
deleted file mode 100644
index 07ebf4b459..0000000000
--- a/qa/L0_model_config/autofill_noplatform/onnx/bad_max_batch_size/expected
+++ /dev/null
@@ -1 +0,0 @@
-autofill failed for model 'bad_max_batch_size': model does not support batching while non-zero max_batch_size is specified
\ No newline at end of file
diff --git a/qa/L0_model_config/autofill_noplatform/onnx/bad_output_dims/1/model.onnx b/qa/L0_model_config/autofill_noplatform/onnx/bad_output_dims/1/model.onnx
deleted file mode 100644
index b352d3225f..0000000000
--- a/qa/L0_model_config/autofill_noplatform/onnx/bad_output_dims/1/model.onnx
+++ /dev/null
@@ -1,33 +0,0 @@
-TRTIS:�
-
-INPUT0_INPUT0"Identity
-
-INPUT1_INPUT1"Identity
-
-_INPUT0
-_INPUT1CAST0"Add
-
-_INPUT0
-_INPUT1CAST1"Sub
-!
-CAST0OUTPUT0"Cast*	
-to�
-!
-CAST1OUTPUT1"Cast*	
-to�onnx_int32_int8_int8Z
-INPUT0
-
-var_0
-Z
-INPUT1
-
-var_0
-b
-OUTPUT0
-
-var_1
-b
-OUTPUT1
-
-var_2
-B
diff --git a/qa/L0_model_config/autofill_noplatform/onnx/bad_output_dims/config.pbtxt b/qa/L0_model_config/autofill_noplatform/onnx/bad_output_dims/config.pbtxt
deleted file mode 100644
index 004ed9a54f..0000000000
--- a/qa/L0_model_config/autofill_noplatform/onnx/bad_output_dims/config.pbtxt
+++ /dev/null
@@ -1,25 +0,0 @@
-max_batch_size: 1
-input [
-  {
-    name: "INPUT0"
-    data_type: TYPE_INT32
-    dims: [ 16 ]
-  },
-  {
-    name: "INPUT1"
-    data_type: TYPE_INT32
-    dims: [ 16 ]
-  }
-]
-output [
-  {
-    name: "OUTPUT0"
-    data_type: TYPE_INT8
-    dims: [ 16 ]
-  },
-  {
-    name: "OUTPUT1"
-    data_type: TYPE_INT8
-    dims: [ 1 ]
-  }
-]
diff --git a/qa/L0_model_config/autofill_noplatform/onnx/bad_output_dims/expected b/qa/L0_model_config/autofill_noplatform/onnx/bad_output_dims/expected
deleted file mode 100644
index 5a11d49e68..0000000000
--- a/qa/L0_model_config/autofill_noplatform/onnx/bad_output_dims/expected
+++ /dev/null
@@ -1 +0,0 @@
-model 'bad_output_dims', tensor 'OUTPUT1': the model expects 2 dimensions (shape \[-1,16\]) but the model configuration specifies 2 dimensions (an initial batch dimension because max_batch_size > 0 followed by the explicit tensor shape, making complete shape \[-1,1\])
diff --git a/qa/L0_model_config/autofill_noplatform/onnx/too_few_inputs/1/model.onnx b/qa/L0_model_config/autofill_noplatform/onnx/too_few_inputs/1/model.onnx
deleted file mode 100644
index b352d3225f..0000000000
--- a/qa/L0_model_config/autofill_noplatform/onnx/too_few_inputs/1/model.onnx
+++ /dev/null
@@ -1,33 +0,0 @@
-TRTIS:�
-
-INPUT0_INPUT0"Identity
-
-INPUT1_INPUT1"Identity
-
-_INPUT0
-_INPUT1CAST0"Add
-
-_INPUT0
-_INPUT1CAST1"Sub
-!
-CAST0OUTPUT0"Cast*	
-to�
-!
-CAST1OUTPUT1"Cast*	
-to�onnx_int32_int8_int8Z
-INPUT0
-
-var_0
-Z
-INPUT1
-
-var_0
-b
-OUTPUT0
-
-var_1
-b
-OUTPUT1
-
-var_2
-B
diff --git a/qa/L0_model_config/autofill_noplatform/onnx/too_few_inputs/config.pbtxt b/qa/L0_model_config/autofill_noplatform/onnx/too_few_inputs/config.pbtxt
deleted file mode 100644
index 2814fb7e5c..0000000000
--- a/qa/L0_model_config/autofill_noplatform/onnx/too_few_inputs/config.pbtxt
+++ /dev/null
@@ -1,20 +0,0 @@
-max_batch_size: 1
-input [
-  {
-    name: "INPUT1"
-    data_type: TYPE_INT32
-    dims: [ 16 ]
-  }
-]
-output [
-  {
-    name: "OUTPUT0"
-    data_type: TYPE_INT8
-    dims: [ 16 ]
-  },
-  {
-    name: "OUTPUT1"
-    data_type: TYPE_INT8
-    dims: [ 16 ]
-  }
-]
diff --git a/qa/L0_model_config/autofill_noplatform/onnx/too_few_inputs/expected b/qa/L0_model_config/autofill_noplatform/onnx/too_few_inputs/expected
deleted file mode 100644
index f6639e85ae..0000000000
--- a/qa/L0_model_config/autofill_noplatform/onnx/too_few_inputs/expected
+++ /dev/null
@@ -1 +0,0 @@
-unable to load model 'too_few_inputs', configuration expects 1 inputs, model provides 2
\ No newline at end of file
diff --git a/qa/L0_model_config/autofill_noplatform/onnx/too_many_inputs/1/model.onnx b/qa/L0_model_config/autofill_noplatform/onnx/too_many_inputs/1/model.onnx
deleted file mode 100644
index b352d3225f..0000000000
--- a/qa/L0_model_config/autofill_noplatform/onnx/too_many_inputs/1/model.onnx
+++ /dev/null
@@ -1,33 +0,0 @@
-TRTIS:�
-
-INPUT0_INPUT0"Identity
-
-INPUT1_INPUT1"Identity
-
-_INPUT0
-_INPUT1CAST0"Add
-
-_INPUT0
-_INPUT1CAST1"Sub
-!
-CAST0OUTPUT0"Cast*	
-to�
-!
-CAST1OUTPUT1"Cast*	
-to�onnx_int32_int8_int8Z
-INPUT0
-
-var_0
-Z
-INPUT1
-
-var_0
-b
-OUTPUT0
-
-var_1
-b
-OUTPUT1
-
-var_2
-B
diff --git a/qa/L0_model_config/autofill_noplatform/onnx/too_many_inputs/config.pbtxt b/qa/L0_model_config/autofill_noplatform/onnx/too_many_inputs/config.pbtxt
deleted file mode 100644
index 6ba2274876..0000000000
--- a/qa/L0_model_config/autofill_noplatform/onnx/too_many_inputs/config.pbtxt
+++ /dev/null
@@ -1,30 +0,0 @@
-max_batch_size: 1
-input [
-  {
-    name: "INPUT0"
-    data_type: TYPE_INT32
-    dims: [ 16 ]
-  },
-  {
-    name: "INPUT1"
-    data_type: TYPE_INT32
-    dims: [ 16 ]
-  },
-  {
-    name: "INPUT_EXTRA"
-    data_type: TYPE_INT32
-    dims: [ 16 ]
-  }
-]
-output [
-  {
-    name: "OUTPUT0"
-    data_type: TYPE_INT8
-    dims: [ 16 ]
-  },
-  {
-    name: "OUTPUT1"
-    data_type: TYPE_INT8
-    dims: [ 16 ]
-  }
-]
diff --git a/qa/L0_model_config/autofill_noplatform/onnx/too_many_inputs/expected b/qa/L0_model_config/autofill_noplatform/onnx/too_many_inputs/expected
deleted file mode 100644
index e88e97dcfb..0000000000
--- a/qa/L0_model_config/autofill_noplatform/onnx/too_many_inputs/expected
+++ /dev/null
@@ -1 +0,0 @@
-unable to load model 'too_many_inputs', configuration expects 3 inputs, model provides 2
\ No newline at end of file
diff --git a/qa/L0_model_config/autofill_noplatform/onnx/unknown_input/1/model.onnx b/qa/L0_model_config/autofill_noplatform/onnx/unknown_input/1/model.onnx
deleted file mode 100644
index b352d3225f..0000000000
--- a/qa/L0_model_config/autofill_noplatform/onnx/unknown_input/1/model.onnx
+++ /dev/null
@@ -1,33 +0,0 @@
-TRTIS:�
-
-INPUT0_INPUT0"Identity
-
-INPUT1_INPUT1"Identity
-
-_INPUT0
-_INPUT1CAST0"Add
-
-_INPUT0
-_INPUT1CAST1"Sub
-!
-CAST0OUTPUT0"Cast*	
-to�
-!
-CAST1OUTPUT1"Cast*	
-to�onnx_int32_int8_int8Z
-INPUT0
-
-var_0
-Z
-INPUT1
-
-var_0
-b
-OUTPUT0
-
-var_1
-b
-OUTPUT1
-
-var_2
-B
diff --git a/qa/L0_model_config/autofill_noplatform/onnx/unknown_input/config.pbtxt b/qa/L0_model_config/autofill_noplatform/onnx/unknown_input/config.pbtxt
deleted file mode 100644
index 0df318caa8..0000000000
--- a/qa/L0_model_config/autofill_noplatform/onnx/unknown_input/config.pbtxt
+++ /dev/null
@@ -1,25 +0,0 @@
-max_batch_size: 1
-input [
-  {
-    name: "INPUT0"
-    data_type: TYPE_INT32
-    dims: [ 16 ]
-  },
-  {
-    name: "INPUT_UNKNOWN"
-    data_type: TYPE_INT32
-    dims: [ 16 ]
-  }
-]
-output [
-  {
-    name: "OUTPUT0"
-    data_type: TYPE_INT8
-    dims: [ 16 ]
-  },
-  {
-    name: "OUTPUT1"
-    data_type: TYPE_INT8
-    dims: [ 16 ]
-  }
-]
diff --git a/qa/L0_model_config/autofill_noplatform/onnx/unknown_input/expected b/qa/L0_model_config/autofill_noplatform/onnx/unknown_input/expected
deleted file mode 100644
index e2a2abbf09..0000000000
--- a/qa/L0_model_config/autofill_noplatform/onnx/unknown_input/expected
+++ /dev/null
@@ -1 +0,0 @@
-unexpected inference input 'INPUT_UNKNOWN', allowed inputs are: INPUT0, INPUT1
\ No newline at end of file
diff --git a/qa/L0_model_config/autofill_noplatform/onnx/unknown_output/1/model.onnx b/qa/L0_model_config/autofill_noplatform/onnx/unknown_output/1/model.onnx
deleted file mode 100644
index b352d3225f..0000000000
--- a/qa/L0_model_config/autofill_noplatform/onnx/unknown_output/1/model.onnx
+++ /dev/null
@@ -1,33 +0,0 @@
-TRTIS:�
-
-INPUT0_INPUT0"Identity
-
-INPUT1_INPUT1"Identity
-
-_INPUT0
-_INPUT1CAST0"Add
-
-_INPUT0
-_INPUT1CAST1"Sub
-!
-CAST0OUTPUT0"Cast*	
-to�
-!
-CAST1OUTPUT1"Cast*	
-to�onnx_int32_int8_int8Z
-INPUT0
-
-var_0
-Z
-INPUT1
-
-var_0
-b
-OUTPUT0
-
-var_1
-b
-OUTPUT1
-
-var_2
-B
diff --git a/qa/L0_model_config/autofill_noplatform/onnx/unknown_output/config.pbtxt b/qa/L0_model_config/autofill_noplatform/onnx/unknown_output/config.pbtxt
deleted file mode 100644
index 979b05c4ee..0000000000
--- a/qa/L0_model_config/autofill_noplatform/onnx/unknown_output/config.pbtxt
+++ /dev/null
@@ -1,20 +0,0 @@
-max_batch_size: 1
-input [
-  {
-    name: "INPUT0"
-    data_type: TYPE_INT32
-    dims: [ 16 ]
-  },
-  {
-    name: "INPUT1"
-    data_type: TYPE_INT32
-    dims: [ 16 ]
-  }
-]
-output [
-  {
-    name: "OUTPUT_UNKNOWN"
-    data_type: TYPE_INT8
-    dims: [ 16 ]
-  }
-]
diff --git a/qa/L0_model_config/autofill_noplatform/onnx/unknown_output/expected b/qa/L0_model_config/autofill_noplatform/onnx/unknown_output/expected
deleted file mode 100644
index 38fd5e2785..0000000000
--- a/qa/L0_model_config/autofill_noplatform/onnx/unknown_output/expected
+++ /dev/null
@@ -1 +0,0 @@
-unexpected inference output 'OUTPUT_UNKNOWN', allowed outputs are: OUTPUT0, OUTPUT1
\ No newline at end of file
diff --git a/qa/L0_model_config/autofill_noplatform_success/onnx/cpu_instance/config.pbtxt b/qa/L0_model_config/autofill_noplatform_success/onnx/cpu_instance/config.pbtxt
deleted file mode 100644
index 137ad375c8..0000000000
--- a/qa/L0_model_config/autofill_noplatform_success/onnx/cpu_instance/config.pbtxt
+++ /dev/null
@@ -1,23 +0,0 @@
-
-name: "cpu_instance"
-platform: "onnxruntime_onnx"
-max_batch_size: 8
-version_policy: { latest { num_versions: 1 }}
-input [
-  {
-    name: "INPUT0"
-    data_type: TYPE_FP16
-    dims: [ -1,-1 ]
-  }
-]
-output [
-  {
-    name: "OUTPUT0"
-    data_type: TYPE_FP16
-    dims: [ -1,-1 ]
-  }
-]
-instance_group {
-  name: "cpu_instance"
-  kind: KIND_CPU
-}
diff --git a/qa/L0_model_config/autofill_noplatform_success/onnx/cpu_instance/expected b/qa/L0_model_config/autofill_noplatform_success/onnx/cpu_instance/expected
deleted file mode 100644
index 008a7a0b7f..0000000000
--- a/qa/L0_model_config/autofill_noplatform_success/onnx/cpu_instance/expected
+++ /dev/null
@@ -1,36 +0,0 @@
-name: "cpu_instance"
-platform: "onnxruntime_onnx"
-version_policy {
-  latest {
-    num_versions: 1
-  }
-}
-max_batch_size: 8
-input {
-  name: "INPUT0"
-  data_type: TYPE_FP16
-  dims: -1
-  dims: -1
-}
-output {
-  name: "OUTPUT0"
-  data_type: TYPE_FP16
-  dims: -1
-  dims: -1
-}
-instance_group {
-  name: "cpu_instance"
-  count: 2
-  kind: KIND_CPU
-}
-default_model_filename: "model.onnx"
-optimization {
-  input_pinned_memory {
-    enable: true
-  }
-  output_pinned_memory {
-    enable: true
-  }
-}
-backend: "onnxruntime"
-runtime: ""
\ No newline at end of file
diff --git a/qa/L0_model_config/autofill_noplatform_success/onnx/empty_config/1/model.onnx b/qa/L0_model_config/autofill_noplatform_success/onnx/empty_config/1/model.onnx
deleted file mode 100644
index b352d3225f..0000000000
--- a/qa/L0_model_config/autofill_noplatform_success/onnx/empty_config/1/model.onnx
+++ /dev/null
@@ -1,33 +0,0 @@
-TRTIS:�
-
-INPUT0_INPUT0"Identity
-
-INPUT1_INPUT1"Identity
-
-_INPUT0
-_INPUT1CAST0"Add
-
-_INPUT0
-_INPUT1CAST1"Sub
-!
-CAST0OUTPUT0"Cast*	
-to�
-!
-CAST1OUTPUT1"Cast*	
-to�onnx_int32_int8_int8Z
-INPUT0
-
-var_0
-Z
-INPUT1
-
-var_0
-b
-OUTPUT0
-
-var_1
-b
-OUTPUT1
-
-var_2
-B
diff --git a/qa/L0_model_config/autofill_noplatform_success/onnx/empty_config/config.pbtxt b/qa/L0_model_config/autofill_noplatform_success/onnx/empty_config/config.pbtxt
deleted file mode 100644
index e69de29bb2..0000000000
diff --git a/qa/L0_model_config/autofill_noplatform_success/onnx/empty_config/expected b/qa/L0_model_config/autofill_noplatform_success/onnx/empty_config/expected
deleted file mode 100644
index bedc4e44fa..0000000000
--- a/qa/L0_model_config/autofill_noplatform_success/onnx/empty_config/expected
+++ /dev/null
@@ -1,48 +0,0 @@
-name: "empty_config"
-platform: "onnxruntime_onnx"
-version_policy {
-  latest {
-    num_versions: 1
-  }
-}
-max_batch_size: 4
-input {
-  name: "INPUT0"
-  data_type: TYPE_INT32
-  dims: 16
-}
-input {
-  name: "INPUT1"
-  data_type: TYPE_INT32
-  dims: 16
-}
-output {
-  name: "OUTPUT0"
-  data_type: TYPE_INT8
-  dims: 16
-}
-output {
-  name: "OUTPUT1"
-  data_type: TYPE_INT8
-  dims: 16
-}
-instance_group {
-  name: "empty_config"
-  count: 1
-  gpus: 0
-  kind: KIND_GPU
-}
-dynamic_batching {
-  preferred_batch_size: 4
-}
-default_model_filename: "model.onnx"
-optimization {
-  input_pinned_memory {
-    enable: true
-  }
-  output_pinned_memory {
-    enable: true
-  }
-}
-backend: "onnxruntime"
-runtime: ""
diff --git a/qa/L0_model_config/autofill_noplatform_success/onnx/empty_config/expected.1 b/qa/L0_model_config/autofill_noplatform_success/onnx/empty_config/expected.1
deleted file mode 100644
index 7e2a45c522..0000000000
--- a/qa/L0_model_config/autofill_noplatform_success/onnx/empty_config/expected.1
+++ /dev/null
@@ -1,48 +0,0 @@
-name: "empty_config"
-platform: "onnxruntime_onnx"
-version_policy {
-  latest {
-    num_versions: 1
-  }
-}
-max_batch_size: 4
-input {
-  name: "INPUT0"
-  data_type: TYPE_INT32
-  dims: 16
-}
-input {
-  name: "INPUT1"
-  data_type: TYPE_INT32
-  dims: 16
-}
-output {
-  name: "OUTPUT1"
-  data_type: TYPE_INT8
-  dims: 16
-}
-output {
-  name: "OUTPUT0"
-  data_type: TYPE_INT8
-  dims: 16
-}
-instance_group {
-  name: "empty_config"
-  count: 1
-  gpus: 0
-  kind: KIND_GPU
-}
-dynamic_batching {
-  preferred_batch_size: 4
-}
-default_model_filename: "model.onnx"
-optimization {
-  input_pinned_memory {
-    enable: true
-  }
-  output_pinned_memory {
-    enable: true
-  }
-}
-backend: "onnxruntime"
-runtime: ""
diff --git a/qa/L0_model_config/autofill_noplatform_success/onnx/empty_config/expected.2 b/qa/L0_model_config/autofill_noplatform_success/onnx/empty_config/expected.2
deleted file mode 100644
index 56def5c317..0000000000
--- a/qa/L0_model_config/autofill_noplatform_success/onnx/empty_config/expected.2
+++ /dev/null
@@ -1,48 +0,0 @@
-name: "empty_config"
-platform: "onnxruntime_onnx"
-version_policy {
-  latest {
-    num_versions: 1
-  }
-}
-max_batch_size: 4
-input {
-  name: "INPUT1"
-  data_type: TYPE_INT32
-  dims: 16
-}
-input {
-  name: "INPUT0"
-  data_type: TYPE_INT32
-  dims: 16
-}
-output {
-  name: "OUTPUT0"
-  data_type: TYPE_INT8
-  dims: 16
-}
-output {
-  name: "OUTPUT1"
-  data_type: TYPE_INT8
-  dims: 16
-}
-instance_group {
-  name: "empty_config"
-  count: 1
-  gpus: 0
-  kind: KIND_GPU
-}
-dynamic_batching {
-  preferred_batch_size: 4
-}
-default_model_filename: "model.onnx"
-optimization {
-  input_pinned_memory {
-    enable: true
-  }
-  output_pinned_memory {
-    enable: true
-  }
-}
-backend: "onnxruntime"
-runtime: ""
diff --git a/qa/L0_model_config/autofill_noplatform_success/onnx/empty_config/expected.3 b/qa/L0_model_config/autofill_noplatform_success/onnx/empty_config/expected.3
deleted file mode 100644
index 35a82c5be1..0000000000
--- a/qa/L0_model_config/autofill_noplatform_success/onnx/empty_config/expected.3
+++ /dev/null
@@ -1,48 +0,0 @@
-name: "empty_config"
-platform: "onnxruntime_onnx"
-version_policy {
-  latest {
-    num_versions: 1
-  }
-}
-max_batch_size: 4
-input {
-  name: "INPUT1"
-  data_type: TYPE_INT32
-  dims: 16
-}
-input {
-  name: "INPUT0"
-  data_type: TYPE_INT32
-  dims: 16
-}
-output {
-  name: "OUTPUT1"
-  data_type: TYPE_INT8
-  dims: 16
-}
-output {
-  name: "OUTPUT0"
-  data_type: TYPE_INT8
-  dims: 16
-}
-instance_group {
-  name: "empty_config"
-  count: 1
-  gpus: 0
-  kind: KIND_GPU
-}
-dynamic_batching {
-  preferred_batch_size: 4
-}
-default_model_filename: "model.onnx"
-optimization {
-  input_pinned_memory {
-    enable: true
-  }
-  output_pinned_memory {
-    enable: true
-  }
-}
-backend: "onnxruntime"
-runtime: ""
diff --git a/qa/L0_model_config/autofill_noplatform_success/onnx/no_config/1/model.onnx b/qa/L0_model_config/autofill_noplatform_success/onnx/no_config/1/model.onnx
deleted file mode 100644
index b352d3225f..0000000000
--- a/qa/L0_model_config/autofill_noplatform_success/onnx/no_config/1/model.onnx
+++ /dev/null
@@ -1,33 +0,0 @@
-TRTIS:�
-
-INPUT0_INPUT0"Identity
-
-INPUT1_INPUT1"Identity
-
-_INPUT0
-_INPUT1CAST0"Add
-
-_INPUT0
-_INPUT1CAST1"Sub
-!
-CAST0OUTPUT0"Cast*	
-to�
-!
-CAST1OUTPUT1"Cast*	
-to�onnx_int32_int8_int8Z
-INPUT0
-
-var_0
-Z
-INPUT1
-
-var_0
-b
-OUTPUT0
-
-var_1
-b
-OUTPUT1
-
-var_2
-B
diff --git a/qa/L0_model_config/autofill_noplatform_success/onnx/no_config/expected b/qa/L0_model_config/autofill_noplatform_success/onnx/no_config/expected
deleted file mode 100644
index f2a7d4e43e..0000000000
--- a/qa/L0_model_config/autofill_noplatform_success/onnx/no_config/expected
+++ /dev/null
@@ -1,48 +0,0 @@
-name: "no_config"
-platform: "onnxruntime_onnx"
-version_policy {
-  latest {
-    num_versions: 1
-  }
-}
-max_batch_size: 4
-input {
-  name: "INPUT0"
-  data_type: TYPE_INT32
-  dims: 16
-}
-input {
-  name: "INPUT1"
-  data_type: TYPE_INT32
-  dims: 16
-}
-output {
-  name: "OUTPUT0"
-  data_type: TYPE_INT8
-  dims: 16
-}
-output {
-  name: "OUTPUT1"
-  data_type: TYPE_INT8
-  dims: 16
-}
-instance_group {
-  name: "no_config"
-  count: 1
-  gpus: 0
-  kind: KIND_GPU
-}
-dynamic_batching {
-  preferred_batch_size: 4
-}
-default_model_filename: "model.onnx"
-optimization {
-  input_pinned_memory {
-    enable: true
-  }
-  output_pinned_memory {
-    enable: true
-  }
-}
-backend: "onnxruntime"
-runtime: ""
diff --git a/qa/L0_model_config/autofill_noplatform_success/onnx/no_config/expected.1 b/qa/L0_model_config/autofill_noplatform_success/onnx/no_config/expected.1
deleted file mode 100644
index ca6269959f..0000000000
--- a/qa/L0_model_config/autofill_noplatform_success/onnx/no_config/expected.1
+++ /dev/null
@@ -1,48 +0,0 @@
-name: "no_config"
-platform: "onnxruntime_onnx"
-version_policy {
-  latest {
-    num_versions: 1
-  }
-}
-max_batch_size: 4
-input {
-  name: "INPUT0"
-  data_type: TYPE_INT32
-  dims: 16
-}
-input {
-  name: "INPUT1"
-  data_type: TYPE_INT32
-  dims: 16
-}
-output {
-  name: "OUTPUT1"
-  data_type: TYPE_INT8
-  dims: 16
-}
-output {
-  name: "OUTPUT0"
-  data_type: TYPE_INT8
-  dims: 16
-}
-instance_group {
-  name: "no_config"
-  count: 1
-  gpus: 0
-  kind: KIND_GPU
-}
-dynamic_batching {
-  preferred_batch_size: 4
-}
-default_model_filename: "model.onnx"
-optimization {
-  input_pinned_memory {
-    enable: true
-  }
-  output_pinned_memory {
-    enable: true
-  }
-}
-backend: "onnxruntime"
-runtime: ""
diff --git a/qa/L0_model_config/autofill_noplatform_success/onnx/no_config/expected.2 b/qa/L0_model_config/autofill_noplatform_success/onnx/no_config/expected.2
deleted file mode 100644
index 51d73ebdfe..0000000000
--- a/qa/L0_model_config/autofill_noplatform_success/onnx/no_config/expected.2
+++ /dev/null
@@ -1,48 +0,0 @@
-name: "no_config"
-platform: "onnxruntime_onnx"
-version_policy {
-  latest {
-    num_versions: 1
-  }
-}
-max_batch_size: 4
-input {
-  name: "INPUT1"
-  data_type: TYPE_INT32
-  dims: 16
-}
-input {
-  name: "INPUT0"
-  data_type: TYPE_INT32
-  dims: 16
-}
-output {
-  name: "OUTPUT0"
-  data_type: TYPE_INT8
-  dims: 16
-}
-output {
-  name: "OUTPUT1"
-  data_type: TYPE_INT8
-  dims: 16
-}
-instance_group {
-  name: "no_config"
-  count: 1
-  gpus: 0
-  kind: KIND_GPU
-}
-dynamic_batching {
-  preferred_batch_size: 4
-}
-default_model_filename: "model.onnx"
-optimization {
-  input_pinned_memory {
-    enable: true
-  }
-  output_pinned_memory {
-    enable: true
-  }
-}
-backend: "onnxruntime"
-runtime: ""
diff --git a/qa/L0_model_config/autofill_noplatform_success/onnx/no_config/expected.3 b/qa/L0_model_config/autofill_noplatform_success/onnx/no_config/expected.3
deleted file mode 100644
index c5121d60b5..0000000000
--- a/qa/L0_model_config/autofill_noplatform_success/onnx/no_config/expected.3
+++ /dev/null
@@ -1,48 +0,0 @@
-name: "no_config"
-platform: "onnxruntime_onnx"
-version_policy {
-  latest {
-    num_versions: 1
-  }
-}
-max_batch_size: 4
-input {
-  name: "INPUT1"
-  data_type: TYPE_INT32
-  dims: 16
-}
-input {
-  name: "INPUT0"
-  data_type: TYPE_INT32
-  dims: 16
-}
-output {
-  name: "OUTPUT1"
-  data_type: TYPE_INT8
-  dims: 16
-}
-output {
-  name: "OUTPUT0"
-  data_type: TYPE_INT8
-  dims: 16
-}
-instance_group {
-  name: "no_config"
-  count: 1
-  gpus: 0
-  kind: KIND_GPU
-}
-dynamic_batching {
-  preferred_batch_size: 4
-}
-default_model_filename: "model.onnx"
-optimization {
-  input_pinned_memory {
-    enable: true
-  }
-  output_pinned_memory {
-    enable: true
-  }
-}
-backend: "onnxruntime"
-runtime: ""
diff --git a/qa/L0_model_config/autofill_noplatform_success/onnx/no_config_no_batch/1/model.onnx b/qa/L0_model_config/autofill_noplatform_success/onnx/no_config_no_batch/1/model.onnx
deleted file mode 100644
index ebe41ef108..0000000000
--- a/qa/L0_model_config/autofill_noplatform_success/onnx/no_config_no_batch/1/model.onnx
+++ /dev/null
@@ -1,33 +0,0 @@
-TRTIS:�
-
-INPUT0_INPUT0"Identity
-
-INPUT1_INPUT1"Identity
-
-_INPUT0
-_INPUT1CAST0"Add
-
-_INPUT0
-_INPUT1CAST1"Sub
-!
-CAST0OUTPUT0"Cast*	
-to�
-!
-CAST1OUTPUT1"Cast*	
-to�onnx_nobatch_int32_int8_int8Z
-INPUT0
-
-
-Z
-INPUT1
-
-
-b
-OUTPUT0
-
-
-b
-OUTPUT1
-
-
-B
diff --git a/qa/L0_model_config/autofill_noplatform_success/onnx/no_config_no_batch/config.pbtxt b/qa/L0_model_config/autofill_noplatform_success/onnx/no_config_no_batch/config.pbtxt
deleted file mode 100644
index 5913902a76..0000000000
--- a/qa/L0_model_config/autofill_noplatform_success/onnx/no_config_no_batch/config.pbtxt
+++ /dev/null
@@ -1,5 +0,0 @@
-instance_group [
-  {
-    kind: KIND_CPU
-  }
-]
diff --git a/qa/L0_model_config/autofill_noplatform_success/onnx/no_config_no_batch/expected b/qa/L0_model_config/autofill_noplatform_success/onnx/no_config_no_batch/expected
deleted file mode 100644
index 9adc820017..0000000000
--- a/qa/L0_model_config/autofill_noplatform_success/onnx/no_config_no_batch/expected
+++ /dev/null
@@ -1,43 +0,0 @@
-name: "no_config_no_batch"
-platform: "onnxruntime_onnx"
-version_policy {
-  latest {
-    num_versions: 1
-  }
-}
-input {
-  name: "INPUT0"
-  data_type: TYPE_INT32
-  dims: 16
-}
-input {
-  name: "INPUT1"
-  data_type: TYPE_INT32
-  dims: 16
-}
-output {
-  name: "OUTPUT0"
-  data_type: TYPE_INT8
-  dims: 16
-}
-output {
-  name: "OUTPUT1"
-  data_type: TYPE_INT8
-  dims: 16
-}
-instance_group {
-  name: "no_config_no_batch_0"
-  count: 2
-  kind: KIND_CPU
-}
-default_model_filename: "model.onnx"
-optimization {
-  input_pinned_memory {
-    enable: true
-  }
-  output_pinned_memory {
-    enable: true
-  }
-}
-backend: "onnxruntime"
-runtime: ""
diff --git a/qa/L0_model_config/autofill_noplatform_success/onnx/no_config_no_batch/expected.1 b/qa/L0_model_config/autofill_noplatform_success/onnx/no_config_no_batch/expected.1
deleted file mode 100644
index 5ba1985bd6..0000000000
--- a/qa/L0_model_config/autofill_noplatform_success/onnx/no_config_no_batch/expected.1
+++ /dev/null
@@ -1,43 +0,0 @@
-name: "no_config_no_batch"
-platform: "onnxruntime_onnx"
-version_policy {
-  latest {
-    num_versions: 1
-  }
-}
-input {
-  name: "INPUT1"
-  data_type: TYPE_INT32
-  dims: 16
-}
-input {
-  name: "INPUT0"
-  data_type: TYPE_INT32
-  dims: 16
-}
-output {
-  name: "OUTPUT0"
-  data_type: TYPE_INT8
-  dims: 16
-}
-output {
-  name: "OUTPUT1"
-  data_type: TYPE_INT8
-  dims: 16
-}
-instance_group {
-  name: "no_config_no_batch_0"
-  count: 2
-  kind: KIND_CPU
-}
-default_model_filename: "model.onnx"
-optimization {
-  input_pinned_memory {
-    enable: true
-  }
-  output_pinned_memory {
-    enable: true
-  }
-}
-backend: "onnxruntime"
-runtime: ""
diff --git a/qa/L0_model_config/autofill_noplatform_success/onnx/no_config_no_batch/expected.2 b/qa/L0_model_config/autofill_noplatform_success/onnx/no_config_no_batch/expected.2
deleted file mode 100644
index fa82234e53..0000000000
--- a/qa/L0_model_config/autofill_noplatform_success/onnx/no_config_no_batch/expected.2
+++ /dev/null
@@ -1,43 +0,0 @@
-name: "no_config_no_batch"
-platform: "onnxruntime_onnx"
-version_policy {
-  latest {
-    num_versions: 1
-  }
-}
-input {
-  name: "INPUT0"
-  data_type: TYPE_INT32
-  dims: 16
-}
-input {
-  name: "INPUT1"
-  data_type: TYPE_INT32
-  dims: 16
-}
-output {
-  name: "OUTPUT1"
-  data_type: TYPE_INT8
-  dims: 16
-}
-output {
-  name: "OUTPUT0"
-  data_type: TYPE_INT8
-  dims: 16
-}
-instance_group {
-  name: "no_config_no_batch_0"
-  count: 2
-  kind: KIND_CPU
-}
-default_model_filename: "model.onnx"
-optimization {
-  input_pinned_memory {
-    enable: true
-  }
-  output_pinned_memory {
-    enable: true
-  }
-}
-backend: "onnxruntime"
-runtime: ""
diff --git a/qa/L0_model_config/autofill_noplatform_success/onnx/no_config_no_batch/expected.3 b/qa/L0_model_config/autofill_noplatform_success/onnx/no_config_no_batch/expected.3
deleted file mode 100644
index e5e92cb9be..0000000000
--- a/qa/L0_model_config/autofill_noplatform_success/onnx/no_config_no_batch/expected.3
+++ /dev/null
@@ -1,43 +0,0 @@
-name: "no_config_no_batch"
-platform: "onnxruntime_onnx"
-version_policy {
-  latest {
-    num_versions: 1
-  }
-}
-input {
-  name: "INPUT1"
-  data_type: TYPE_INT32
-  dims: 16
-}
-input {
-  name: "INPUT0"
-  data_type: TYPE_INT32
-  dims: 16
-}
-output {
-  name: "OUTPUT1"
-  data_type: TYPE_INT8
-  dims: 16
-}
-output {
-  name: "OUTPUT0"
-  data_type: TYPE_INT8
-  dims: 16
-}
-instance_group {
-  name: "no_config_no_batch_0"
-  count: 2
-  kind: KIND_CPU
-}
-default_model_filename: "model.onnx"
-optimization {
-  input_pinned_memory {
-    enable: true
-  }
-  output_pinned_memory {
-    enable: true
-  }
-}
-backend: "onnxruntime"
-runtime: ""
diff --git a/qa/L0_model_config/test.sh b/qa/L0_model_config/test.sh
index 5b8cf6cf26..ef75c59f17 100755
--- a/qa/L0_model_config/test.sh
+++ b/qa/L0_model_config/test.sh
@@ -48,7 +48,7 @@ source ../common/util.sh
 
 export CUDA_VISIBLE_DEVICES=0
 
-TRIALS="tensorflow_savedmodel tensorflow_graphdef tensorrt_plan onnxruntime_onnx pytorch_libtorch"
+TRIALS="tensorflow_savedmodel tensorflow_graphdef tensorrt_plan pytorch_libtorch"
 
 # Copy fixed TensorRT plans into the test model repositories.
 for modelpath in \
@@ -275,9 +275,9 @@ cp /data/inferenceserver/${REPO_VERSION}/qa_reshape_model_repository/plan_zero_4
     autofill_noplatform_success/tensorrt/reshape_config_provided/1
 
 # Copy identity model into onnx test directories
-mkdir -p autofill_noplatform_success/onnx/cpu_instance/1
-cp -r /data/inferenceserver/${REPO_VERSION}/qa_identity_model_repository/onnx_zero_1_float16/1/model.onnx \
-    autofill_noplatform_success/onnx/cpu_instance/1
+#mkdir -p autofill_noplatform_success/onnx/cpu_instance/1
+#cp -r /data/inferenceserver/${REPO_VERSION}/qa_identity_model_repository/onnx_zero_1_float16/1/model.onnx \
+#    autofill_noplatform_success/onnx/cpu_instance/1
 
 # Copy openvino models into test directories
 for modelpath in \
diff --git a/qa/L0_multi_server/test.sh b/qa/L0_multi_server/test.sh
index cd5ff3d407..2ba0a76c6c 100755
--- a/qa/L0_multi_server/test.sh
+++ b/qa/L0_multi_server/test.sh
@@ -54,13 +54,13 @@ RET=0
 MULTI_SERVER=multi_server
 CLIENT_LOG=$MULTI_SERVER
 MULTI_SERVER=./$MULTI_SERVER
-BACKENDS=(graphdef onnx plan)
+BACKENDS=(graphdef plan)
 THREAD_COUNT=32
 LOOPS=32
 
 EXTRA_ARGS=" -t ${THREAD_COUNT} -l ${LOOPS}"
 for (( I=1; I<${THREAD_COUNT}+2; I++ )); do
-    BACKEND_INDEX=$(((I % 3) - 1))
+    BACKEND_INDEX=$(((I % 2) - 1))
     full=${BACKENDS[$BACKEND_INDEX]}_float32_float32_float32
     mkdir -p ${MODELSDIR}${I}/simple${I}/1 && \
         cp -r $DATADIR/${full}/1/* ${MODELSDIR}${I}/simple${I}/1/. && \
diff --git a/qa/L0_output_name/output_name_test.py b/qa/L0_output_name/output_name_test.py
index 905174640c..19636aed56 100755
--- a/qa/L0_output_name/output_name_test.py
+++ b/qa/L0_output_name/output_name_test.py
@@ -36,7 +36,7 @@
 
 import grpc
 
-_trials = ("graphdef", "libtorch", "onnx", "plan", "savedmodel")
+_trials = ("graphdef", "libtorch", "plan", "savedmodel")
 
 
 class OutputNameValidationTest(tu.TestResultCollector):
diff --git a/qa/L0_output_name/test.sh b/qa/L0_output_name/test.sh
index 7c1a5664a0..d29ceb7c6e 100755
--- a/qa/L0_output_name/test.sh
+++ b/qa/L0_output_name/test.sh
@@ -50,6 +50,7 @@ rm -rf $DATADIR
 mkdir $DATADIR
 
 cp -r /data/inferenceserver/${REPO_VERSION}/qa_identity_model_repository/*_nobatch_zero_1_float32 $DATADIR
+rm `find ./models/ -name '*onnx*'` -rf
 
 SERVER=/opt/tritonserver/bin/tritonserver
 SERVER_ARGS="--model-repository=$DATADIR"
diff --git a/qa/L0_perf_analyzer_report/test.sh b/qa/L0_perf_analyzer_report/test.sh
index 7a04905842..2e35a1e2a6 100755
--- a/qa/L0_perf_analyzer_report/test.sh
+++ b/qa/L0_perf_analyzer_report/test.sh
@@ -98,8 +98,8 @@ SERVER_LOG="./inference_server.log"
 rm -f $SERVER_LOG $CLIENT_LOG
 MODEL_DIR="./models"
 rm -fr ${MODEL_DIR} && mkdir ${MODEL_DIR}
-ENSEMBLE_MODEL="simple_onnx_float32_float32_float32"
-COMPOSING_MODEL="onnx_float32_float32_float32"
+ENSEMBLE_MODEL="simple_libtorch_float32_float32_float32"
+COMPOSING_MODEL="libtorch_float32_float32_float32"
 ENSEMBLE_MODEL_CACHE_ENABLED="${ENSEMBLE_MODEL}_cache_enabled"
 ENSEMBLE_MODEL_CACHE_DISABLED="${ENSEMBLE_MODEL}_cache_disabled"
 COMPOSING_MODEL_CACHE_ENABLED="${COMPOSING_MODEL}_cache_enabled"
diff --git a/qa/L0_sagemaker/test.sh b/qa/L0_sagemaker/test.sh
index b5bd07c519..94b2a25af5 100755
--- a/qa/L0_sagemaker/test.sh
+++ b/qa/L0_sagemaker/test.sh
@@ -65,14 +65,15 @@ ENSEMBLEDIR=/data/inferenceserver/${REPO_VERSION}/qa_ensemble_model_repository/q
 SERVER=/opt/tritonserver/bin/tritonserver
 SERVER_LOG="./server.log"
 # Link model repository to "/opt/ml/model"
+rm -rf /opt/ml
 mkdir /opt/ml/
 ln -s `pwd`/models /opt/ml/model
 source ../common/util.sh
 
 mkdir models && \
-    cp -r $DATADIR/qa_model_repository/onnx_int32_int32_int32 models/sm_model && \
+    cp -r $DATADIR/qa_model_repository/plan_int32_int32_int32 models/sm_model && \
     rm -r models/sm_model/2 && rm -r models/sm_model/3 && \
-    sed -i "s/onnx_int32_int32_int32/sm_model/" models/sm_model/config.pbtxt
+    sed -i "s/plan_int32_int32_int32/sm_model/" models/sm_model/config.pbtxt
 
 # Use SageMaker's ping endpoint to check server status
 # Wait until server health endpoint shows ready. Sets WAIT_RET to 0 on
@@ -376,12 +377,12 @@ MODEL2_PATH="models/987654321ihgfedcba/model"
 mkdir -p "${MODEL1_PATH}"
 mkdir -p "${MODEL2_PATH}"
 
-cp -r $DATADIR/qa_model_repository/onnx_int32_int32_int32/* ${MODEL1_PATH} && \
+cp -r $DATADIR/qa_model_repository/plan_int32_int32_int32/* ${MODEL1_PATH} && \
     rm -r ${MODEL1_PATH}/2 && rm -r ${MODEL1_PATH}/3 && \
-    sed -i "s/onnx_int32_int32_int32/sm_mme_model_1/" ${MODEL1_PATH}/config.pbtxt
+    sed -i "s/plan_int32_int32_int32/sm_mme_model_1/" ${MODEL1_PATH}/config.pbtxt
 
-cp -r $DATADIR/qa_identity_model_repository/onnx_zero_1_float32/* ${MODEL2_PATH} && \
-    sed -i "s/onnx_zero_1_float32/sm_mme_model_2/" ${MODEL2_PATH}/config.pbtxt
+cp -r $DATADIR/qa_identity_model_repository/plan_zero_1_float32/* ${MODEL2_PATH} && \
+    sed -i "s/plan_zero_1_float32/sm_mme_model_2/" ${MODEL2_PATH}/config.pbtxt
 
 # Ensemble model
 ENSEMBLE_MODEL_PATH="models/123456789ensemble/model"
diff --git a/qa/L0_sequence_batcher/sequence_batcher_test.py b/qa/L0_sequence_batcher/sequence_batcher_test.py
index 3e6cfc032a..bd3bcff4b8 100755
--- a/qa/L0_sequence_batcher/sequence_batcher_test.py
+++ b/qa/L0_sequence_batcher/sequence_batcher_test.py
@@ -57,7 +57,7 @@
 else:
     _protocols = ("http",)
 
-BACKENDS = os.environ.get("BACKENDS", "graphdef savedmodel onnx plan custom python")
+BACKENDS = os.environ.get("BACKENDS", "graphdef savedmodel plan custom python")
 ENSEMBLES = bool(int(os.environ.get("ENSEMBLES", 1)))
 
 NO_BATCHING = int(os.environ["NO_BATCHING"]) == 1
@@ -70,14 +70,16 @@
 _trials = ()
 if NO_BATCHING:
     for backend in BACKENDS.split(" "):
-        if backend != "custom":
+        if backend != "custom" and backend != "onnx":
             _trials += (backend + "_nobatch",)
 elif os.environ["BATCHER_TYPE"] == "VARIABLE":
     for backend in BACKENDS.split(" "):
-        if (backend != "libtorch") and (backend != "custom"):
+        if (backend != "libtorch") and (backend != "custom") and (backend != "onnx"):
             _trials += (backend,)
 else:
-    _trials = BACKENDS.split(" ")
+    for backend in BACKENDS.split(" "):
+        if backend != "onnx":
+            _trials += (backend,)
 
 # Add ensemble to the _trials
 ENSEMBLE_PREFIXES = ["simple_", "sequence_", "fan_"]
@@ -174,7 +176,9 @@ def get_expected_result_implicit(
     def test_simple_sequence(self):
         # Send one sequence and check for correct accumulator
         # result. The result should be returned immediately.
+        print(_trials)
         for trial in _trials:
+            print("---------------------------" + str(trial))
             # Run on different protocols.
             for idx, protocol in enumerate(_protocols):
                 dtypes = self.get_datatype(trial)
diff --git a/qa/L0_sequence_batcher/test.sh b/qa/L0_sequence_batcher/test.sh
index d91b433966..98634b0363 100755
--- a/qa/L0_sequence_batcher/test.sh
+++ b/qa/L0_sequence_batcher/test.sh
@@ -123,7 +123,7 @@ source ../common/util.sh
 RET=0
 
 # If BACKENDS not specified, set to all
-BACKENDS=${BACKENDS:="graphdef savedmodel onnx plan libtorch custom python"}
+BACKENDS=${BACKENDS:="graphdef savedmodel plan libtorch custom python"}
 export BACKENDS
 
 # If MODEL_TRIALS not specified set to 0 1 2 4 v
@@ -521,6 +521,8 @@ for model_trial in $MODEL_TRIALS; do
       done
     fi
 
+    rm `find ./$MODEL_PATH/ -name '*onnx*'` -rf
+
     # Need to launch the server for each test so that the model status
     # is reset (which is used to make sure the correct batch size was
     # used for execution). Test everything with fixed-tensor-size
@@ -713,6 +715,7 @@ fi
 MODEL_PATH=queue_delay_models
 # remove ensemble models from the test model repo
 rm -rf queue_delay_models/simple_* queue_delay_models/fan_* queue_delay_models/sequence_*
+rm `find ./queue_delay_models/ -name '*onnx*'` -rf
 for i in $QUEUE_DELAY_TESTS ; do
     export NO_BATCHING=0
     export TRITONSERVER_BACKLOG_DELAY_SCHEDULER=0
diff --git a/qa/L0_sequence_corrid_batcher/sequence_corrid_batcher_test.py b/qa/L0_sequence_corrid_batcher/sequence_corrid_batcher_test.py
index 15f16da352..a472ed8885 100755
--- a/qa/L0_sequence_corrid_batcher/sequence_corrid_batcher_test.py
+++ b/qa/L0_sequence_corrid_batcher/sequence_corrid_batcher_test.py
@@ -46,9 +46,9 @@
 _model_instances = int(os.environ["MODEL_INSTANCES"])
 
 if _no_batching:
-    _trials = ("savedmodel_nobatch", "graphdef_nobatch", "plan_nobatch", "onnx_nobatch")
+    _trials = ("savedmodel_nobatch", "graphdef_nobatch", "plan_nobatch")
 else:
-    _trials = ("savedmodel", "graphdef", "plan", "onnx")
+    _trials = ("savedmodel", "graphdef", "plan")
 
 _protocols = ("http", "grpc")
 _max_sequence_idle_ms = 5000
@@ -67,7 +67,6 @@ def get_expected_result(self, expected_result, corrid, value, trial, flag_str=No
             (("nobatch" not in trial) and ("custom" not in trial))
             or ("graphdef" in trial)
             or ("plan" in trial)
-            or ("onnx" in trial)
         ) or ("libtorch" in trial):
             expected_result = value
             if flag_str is not None:
diff --git a/qa/L0_sequence_corrid_batcher/test.sh b/qa/L0_sequence_corrid_batcher/test.sh
index 8d114a395a..164470d353 100755
--- a/qa/L0_sequence_corrid_batcher/test.sh
+++ b/qa/L0_sequence_corrid_batcher/test.sh
@@ -62,7 +62,6 @@ for m in \
         $DATADIR/qa_dyna_sequence_model_repository/graphdef_dyna_sequence_int32 \
         $DATADIR/qa_dyna_sequence_model_repository/savedmodel_dyna_sequence_int32 \
         $DATADIR/qa_dyna_sequence_model_repository/plan_dyna_sequence_int32 \
-        $DATADIR/qa_dyna_sequence_model_repository/onnx_dyna_sequence_int32 \
         $DATADIR/qa_dyna_sequence_model_repository/libtorch_dyna_sequence_int32; do
     cp -r $m models4/. && \
         (cd models4/$(basename $m) && \
diff --git a/qa/L0_server_status/server_status_test.py b/qa/L0_server_status/server_status_test.py
index 7ab04708f0..3a6996693a 100755
--- a/qa/L0_server_status/server_status_test.py
+++ b/qa/L0_server_status/server_status_test.py
@@ -156,11 +156,11 @@ def test_unknown_model_version(self):
     def test_model_latest_infer(self):
         input_size = 16
         tensor_shape = (1, input_size)
-        platform_name = {"graphdef": "tensorflow_graphdef", "onnx": "onnxruntime_onnx"}
+        platform_name = {"graphdef": "tensorflow_graphdef"}
 
         # There are 3 versions of *_int32_int32_int32 and all
         # should be available.
-        for platform in ("graphdef", "onnx"):
+        for platform in ("graphdef",):
             model_name = platform + "_int32_int32_int32"
 
             # Initially there should be no version stats..
@@ -316,7 +316,7 @@ def test_model_specific_infer(self):
 
         # There are 3 versions of *_float32_float32_float32 but only
         # versions 1 and 3 should be available.
-        for platform in ("graphdef", "onnx", "plan"):
+        for platform in ("graphdef", "plan"):
             tensor_shape = (1, input_size)
             model_name = platform + "_float32_float32_float32"
 
@@ -439,7 +439,7 @@ def test_model_versions_deleted(self):
         # version 3 was executed once. Version 2 and 3 models were
         # deleted from the model repository so now only expect version 1 to
         # be ready and show stats.
-        for platform in ("graphdef", "onnx"):
+        for platform in ("graphdef",):
             model_name = platform + "_int32_int32_int32"
 
             try:
@@ -615,7 +615,7 @@ def test_infer_stats_no_model_version(self):
         # version 3 was executed once. Version 2 and 3 models were
         # deleted from the model repository so now only expect version 1 to
         # be ready and show infer stats.
-        for platform in ("graphdef", "onnx"):
+        for platform in ("graphdef",):
             model_name = platform + "_int32_int32_int32"
 
             try:
@@ -723,8 +723,8 @@ def test_infer_stats_no_model(self):
                     stats = infer_stats.model_stats
                 self.assertEqual(
                     len(stats),
-                    219,
-                    "expected 219 infer stats for all ready versions of all model",
+                    173,
+                    "expected 173 infer stats for all ready versions of all model",
                 )
 
         except InferenceServerException as ex:
diff --git a/qa/L0_server_status/test.sh b/qa/L0_server_status/test.sh
index 1e27339a38..c162c6969f 100755
--- a/qa/L0_server_status/test.sh
+++ b/qa/L0_server_status/test.sh
@@ -55,6 +55,7 @@ source ../common/util.sh
 
 rm -fr models
 cp -r $DATADIR/qa_model_repository models
+rm `find ./models/ -name '*onnx*'` -rf
 
 run_server
 if [ "$SERVER_PID" == "0" ]; then
@@ -85,7 +86,7 @@ fi
 set -e
 
 rm -fr models/graphdef_int32_int32_int32/2 models/graphdef_int32_int32_int32/3
-rm -fr models/onnx_int32_int32_int32/2 models/onnx_int32_int32_int32/3
+#rm -fr models/onnx_int32_int32_int32/2 models/onnx_int32_int32_int32/3
 cp -r models/graphdef_float16_float32_float32/1 models/graphdef_float16_float32_float32/7
 sleep 3
 
diff --git a/qa/L0_simple_lib/test.sh b/qa/L0_simple_lib/test.sh
index 7045f512ef..36975de5b8 100755
--- a/qa/L0_simple_lib/test.sh
+++ b/qa/L0_simple_lib/test.sh
@@ -55,7 +55,7 @@ for SIMPLE_CLIENT in simple ; do
     CLIENT_LOG=$SIMPLE_CLIENT
     SIMPLE_CLIENT=./$SIMPLE_CLIENT
 
-    for trial in graphdef savedmodel onnx libtorch plan; do
+    for trial in graphdef savedmodel libtorch plan; do
         full=${trial}_float32_float32_float32
         rm -rf $MODELSDIR
         mkdir -p $MODELSDIR/simple/1 && \
diff --git a/qa/L0_storage_S3/test.sh b/qa/L0_storage_S3/test.sh
index f16dc81e83..830352725e 100755
--- a/qa/L0_storage_S3/test.sh
+++ b/qa/L0_storage_S3/test.sh
@@ -164,15 +164,16 @@ for ENV_VAR in "env" "env_dummy" "config"; do
 
         # Now start model tests
 
-        for FW in graphdef savedmodel onnx libtorch plan; do
+        for FW in graphdef savedmodel libtorch plan; do
             cp -r /data/inferenceserver/${REPO_VERSION}/qa_model_repository/${FW}_float32_float32_float32/ models/
         done
 
         # Copy models with string inputs and remove nobatch (bs=1) models
         cp -r /data/inferenceserver/${REPO_VERSION}/qa_model_repository/*_object_object_object/ models/
         rm -rf models/*nobatch*
+        rm `find ./models/ -name '*onnx*'` -rf
 
-        for FW in graphdef savedmodel onnx libtorch plan; do
+        for FW in graphdef savedmodel libtorch plan; do
             for MC in `ls models/${FW}*/config.pbtxt`; do
                 echo "instance_group [ { kind: ${KIND} }]" >> $MC
             done
diff --git a/qa/L0_storage_S3_local/test.sh b/qa/L0_storage_S3_local/test.sh
index e60b106b31..07bc5793ae 100755
--- a/qa/L0_storage_S3_local/test.sh
+++ b/qa/L0_storage_S3_local/test.sh
@@ -47,7 +47,7 @@ EXPECTED_NUM_TESTS="3"
 
 DATADIR="/data/inferenceserver/${REPO_VERSION}/qa_model_repository"
 # Used to control which backends are run in infer_test.py
-BACKENDS=${BACKENDS:="graphdef savedmodel onnx libtorch plan"}
+BACKENDS=${BACKENDS:="graphdef savedmodel libtorch plan"}
 
 function run_unit_tests() {
     echo "Running unit tests: ${INFER_TEST}"
@@ -276,7 +276,7 @@ awslocal $ENDPOINT_FLAG s3 rm s3://demo-bucket1.0 --recursive --include "*" && \
 # Test for multiple model repositories using S3 cloud storage
 echo "=== Running multiple-model-repository tests ==="
 BACKENDS1="graphdef libtorch"
-BACKENDS2="onnx plan savedmodel"
+BACKENDS2="plan savedmodel"
 export BACKENDS="$BACKENDS1 $BACKENDS2"
 
 set +e
diff --git a/qa/L0_storage_azure/test.sh b/qa/L0_storage_azure/test.sh
index 15f9c78bcc..6d136b542a 100755
--- a/qa/L0_storage_azure/test.sh
+++ b/qa/L0_storage_azure/test.sh
@@ -82,7 +82,7 @@ rm -f $SERVER_LOG_BASE* $CLIENT_LOG_BASE*
 RET=0
 
 # Used to control which backends are run in infer_test.py
-BACKENDS=${BACKENDS:="graphdef savedmodel onnx libtorch plan"}
+BACKENDS=${BACKENDS:="graphdef savedmodel libtorch plan"}
 
 function run_unit_tests() {
     BACKENDS=$BACKENDS python $INFER_TEST >$CLIENT_LOG 2>&1
@@ -110,6 +110,7 @@ function setup_model_repo() {
     # Copy models with string inputs and remove nobatch (bs=1) models
     cp -r /data/inferenceserver/${REPO_VERSION}/qa_model_repository/*_object_object_object models/
     rm -rf models/*nobatch*
+    rm `find ./models/ -name '*onnx*'` -rf
 }
 
 setup_model_repo
diff --git a/qa/L0_storage_swiftstack/infer_test.py b/qa/L0_storage_swiftstack/infer_test.py
index f8a65a01a4..4f62e0ae30 100755
--- a/qa/L0_storage_swiftstack/infer_test.py
+++ b/qa/L0_storage_swiftstack/infer_test.py
@@ -148,6 +148,8 @@ def _infer_exact_helper(
             (input_size,),
             (input_size,),
         ):
+            pass
+        """
             _infer_exact_helper(
                 self,
                 "onnx",
@@ -160,6 +162,7 @@ def _infer_exact_helper(
                 output1_raw=output1_raw,
                 swap=swap,
             )
+        """
 
         if tu.validate_for_libtorch_model(
             input_dtype,
diff --git a/qa/L0_storage_swiftstack/test.sh b/qa/L0_storage_swiftstack/test.sh
index 99fb5610d6..75e0d14719 100755
--- a/qa/L0_storage_swiftstack/test.sh
+++ b/qa/L0_storage_swiftstack/test.sh
@@ -104,11 +104,11 @@ aws s3 rm $BUCKET_URL/ --recursive --include "*"
 
 # Now start model tests
 
-for FW in graphdef savedmodel onnx libtorch plan; do
+for FW in graphdef savedmodel libtorch plan; do
     cp -r /data/inferenceserver/${REPO_VERSION}/qa_model_repository/${FW}_float32_float32_float32/ models/
 done
 
-for FW in graphdef savedmodel onnx libtorch plan; do
+for FW in graphdef savedmodel libtorch plan; do
     for MC in `ls models/${FW}*/config.pbtxt`; do
         echo "instance_group [ { kind: KIND_GPU }]" >> $MC
     done
diff --git a/qa/L0_trace/test.sh b/qa/L0_trace/test.sh
index b4a17bcd95..83f755b55b 100755
--- a/qa/L0_trace/test.sh
+++ b/qa/L0_trace/test.sh
@@ -52,7 +52,7 @@ export CUDA_VISIBLE_DEVICES=0
 DATADIR=/data/inferenceserver/${REPO_VERSION}/qa_model_repository
 ENSEMBLEDIR=$DATADIR/../qa_ensemble_model_repository/qa_model_repository/
 BLSDIR=../python_models/bls_simple
-MODELBASE=onnx_int32_int32_int32
+MODELBASE=savedmodel_int32_int32_int32
 
 MODELSDIR=`pwd`/trace_models
 
@@ -70,7 +70,7 @@ cp -r $DATADIR/$MODELBASE $MODELSDIR/simple && \
     cp -r $MODELSDIR/simple $MODELSDIR/global_simple && \
     (cd $MODELSDIR/global_simple && \
             sed -i "s/^name:.*/name: \"global_simple\"/" config.pbtxt) && \
-    cp -r $ENSEMBLEDIR/simple_onnx_int32_int32_int32 $MODELSDIR/ensemble_add_sub_int32_int32_int32 && \
+    cp -r $ENSEMBLEDIR/simple_savedmodel_int32_int32_int32 $MODELSDIR/ensemble_add_sub_int32_int32_int32 && \
     rm -r $MODELSDIR/ensemble_add_sub_int32_int32_int32/2 && \
     rm -r $MODELSDIR/ensemble_add_sub_int32_int32_int32/3 && \
     (cd $MODELSDIR/ensemble_add_sub_int32_int32_int32 && \
@@ -720,7 +720,7 @@ rm -r ${MODEL_PATH}
 mkdir -p "${MODEL_PATH}"
 cp -r $DATADIR/$MODELBASE/* ${MODEL_PATH} && \
     rm -r ${MODEL_PATH}/2 && rm -r ${MODEL_PATH}/3 && \
-        sed -i "s/onnx_int32_int32_int32/simple/" ${MODEL_PATH}/config.pbtxt
+        sed -i "s/savedmodel_int32_int32_int32/simple/" ${MODEL_PATH}/config.pbtxt
 
 
 SERVER_ARGS="--allow-sagemaker=true --model-control-mode=explicit \
diff --git a/qa/L0_vertex_ai/test.sh b/qa/L0_vertex_ai/test.sh
index 7403bf14cf..79b3eed78c 100755
--- a/qa/L0_vertex_ai/test.sh
+++ b/qa/L0_vertex_ai/test.sh
@@ -59,12 +59,12 @@ source ../common/util.sh
 
 # Set up the multi model repository with the swap and non-swap versions
 mkdir multi_models && \
-    cp -r $DATADIR/qa_model_repository/onnx_int32_int32_int32 multi_models/addsub && \
+    cp -r $DATADIR/qa_model_repository/plan_int32_int32_int32 multi_models/addsub && \
     rm -r multi_models/addsub/2 && rm -r multi_models/addsub/3 && \
-    sed -i "s/onnx_int32_int32_int32/addsub/" multi_models/addsub/config.pbtxt && \
-    cp -r $DATADIR/qa_model_repository/onnx_int32_int32_int32 multi_models/subadd && \
+    sed -i "s/plan_int32_int32_int32/addsub/" multi_models/addsub/config.pbtxt && \
+    cp -r $DATADIR/qa_model_repository/plan_int32_int32_int32 multi_models/subadd && \
     rm -r multi_models/subadd/1 && rm -r multi_models/subadd/2 && \
-    sed -i "s/onnx_int32_int32_int32/subadd/" multi_models/subadd/config.pbtxt
+    sed -i "s/plan_int32_int32_int32/subadd/" multi_models/subadd/config.pbtxt
 mkdir single_model && \
     cp -r multi_models/addsub single_model/.
 
diff --git a/qa/L0_warmup/test.sh b/qa/L0_warmup/test.sh
index aeed873b25..437dd108ad 100755
--- a/qa/L0_warmup/test.sh
+++ b/qa/L0_warmup/test.sh
@@ -51,7 +51,7 @@ IMAGE="../images/vulture.jpeg"
 DATADIR=`pwd`/models
 
 # If BACKENDS not specified, set to all
-BACKENDS=${BACKENDS:="graphdef savedmodel onnx libtorch plan"}
+BACKENDS=${BACKENDS:="graphdef savedmodel libtorch plan"}
 
 SERVER=/opt/tritonserver/bin/tritonserver
 SERVER_ARGS="--model-repository=$DATADIR --log-verbose=1 --exit-timeout-secs=120"
@@ -177,7 +177,7 @@ for BACKEND in ${BACKENDS}; do
 
     # Test for variable-size data type (string)
     rm -fr models && mkdir models
-    SUPPORT_STRING=0 && ([[ $BACKEND == "savedmodel" ]] || [[ $BACKEND == "onnx" ]] || [[ $BACKEND == "savedmodel" ]]) && SUPPORT_STRING=1
+    SUPPORT_STRING=0 && ([[ $BACKEND == "savedmodel" ]] || [[ $BACKEND == "savedmodel" ]]) && SUPPORT_STRING=1
     if [ "$SUPPORT_STRING" == "1" ] ; then
         cp -r /data/inferenceserver/${REPO_VERSION}/qa_sequence_model_repository/${BACKEND}_sequence_object models/.
         cp -r /data/inferenceserver/${REPO_VERSION}/qa_identity_model_repository/${BACKEND}_zero_1_object models/.
@@ -412,14 +412,14 @@ set -e
 kill $SERVER_PID
 wait $SERVER_PID
 
-# Test the onnx model to verify that the memory type of the output tensor
+# Test the tensorrt model to verify that the memory type of the output tensor
 # remains unchanged with the warmup setting
 pip3 uninstall -y torch
 pip3 install torch==1.13.0+cu117 -f https://download.pytorch.org/whl/torch_stable.html
 
 rm -fr models && mkdir models
-cp -r /data/inferenceserver/${REPO_VERSION}/qa_model_repository/onnx_nobatch_float32_float32_float32 models/.
-(cd models/onnx_nobatch_float32_float32_float32 && \
+cp -r /data/inferenceserver/${REPO_VERSION}/qa_model_repository/plan_nobatch_float32_float32_float32 models/.
+(cd models/plan_nobatch_float32_float32_float32 && \
             echo "" >> config.pbtxt && \
             echo 'instance_group [{' >> config.pbtxt && \
             echo '    kind : KIND_GPU' >> config.pbtxt && \
@@ -445,9 +445,10 @@ cp -r /data/inferenceserver/${REPO_VERSION}/qa_model_repository/onnx_nobatch_flo
             echo '    }' >> config.pbtxt && \
             echo '}]' >> config.pbtxt )
 
-mkdir -p models/bls_onnx_warmup/1/
-cp ../python_models/bls_onnx_warmup/model.py models/bls_onnx_warmup/1/
-cp ../python_models/bls_onnx_warmup/config.pbtxt models/bls_onnx_warmup/.
+mkdir -p models/bls_plan_warmup/1/
+cp ../python_models/bls_onnx_warmup/model.py models/bls_plan_warmup/1/
+cp ../python_models/bls_onnx_warmup/config.pbtxt models/bls_plan_warmup/.
+sed -i -e 's/onnx/plan/g'  models/bls_plan_warmup/1/model.py
 
 cp ../L0_backend_python/python_unittest.py .
 sed -i 's#sys.path.append("../../common")#sys.path.append("../common")#g' python_unittest.py
@@ -461,10 +462,10 @@ fi
 
 set +e
 
-export MODEL_NAME='bls_onnx_warmup'
+export MODEL_NAME='bls_plan_warmup'
 python3 -m pytest --junitxml=warmup.report.xml $CLIENT_PY >> $CLIENT_LOG 2>&1
 if [ $? -ne 0 ]; then
-    echo -e "\n***\n*** 'bls_onnx_warmup' test FAILED. \n***"
+    echo -e "\n***\n*** 'bls_plan_warmup' test FAILED. \n***"
     cat $CLIENT_LOG
     RET=1
 fi
diff --git a/qa/common/check_copyright.py b/qa/common/check_copyright.py
index ff18ca8e39..aa597b51d6 100755
--- a/qa/common/check_copyright.py
+++ b/qa/common/check_copyright.py
@@ -54,6 +54,7 @@
     "deploy/gke-marketplace-app/server-deployer/chart/.helmignore",
     "deploy/gcp/.helmignore",
     "deploy/aws/.helmignore",
+    "deploy/oci/.helmignore",
     "deploy/fleetcommand/.helmignore",
     "docs/.gitignore",
     "docs/_static/.gitattributes",
diff --git a/qa/python_models/bls_model_loading/model.py b/qa/python_models/bls_model_loading/model.py
index 84162e2fac..40f30989aa 100644
--- a/qa/python_models/bls_model_loading/model.py
+++ b/qa/python_models/bls_model_loading/model.py
@@ -33,7 +33,7 @@
 
 class PBBLSModelLoadingTest(unittest.TestCase):
     def setUp(self):
-        self.model_name = "onnx_int32_int32_int32"
+        self.model_name = "plan_int32_int32_int32"
 
     def tearDown(self):
         # The unload call does not wait for the requested model to be fully
@@ -57,7 +57,7 @@ def test_load_with_config_override(self):
         self.assertTrue(pb_utils.is_model_ready(self.model_name))
 
         # Send the config with the wrong format
-        wrong_config = '"parameters": {"config": {{"backend":"onnxruntime", "version_policy":{"specific":{"versions":[2]}}}}}'
+        wrong_config = '"parameters": {"config": {{"backend":"tensorrt", "version_policy":{"specific":{"versions":[2]}}}}}'
         with self.assertRaises(pb_utils.TritonModelException):
             pb_utils.load_model(model_name=self.model_name, config=wrong_config)
         # The model should not be changed after a failed load model request
@@ -70,7 +70,7 @@ def test_load_with_config_override(self):
 
         # Send the config with the correct format
         config = (
-            '{"backend":"onnxruntime", "version_policy":{"specific":{"versions":[2]}}}'
+            '{"backend":"tensorrt", "version_policy":{"specific":{"versions":[2]}}}'
         )
         pb_utils.load_model(self.model_name, config=config)
         # The model should be changed after a successful load model request
@@ -83,10 +83,10 @@ def test_load_with_file_override(self):
         self.assertTrue(pb_utils.is_model_ready(self.model_name))
 
         override_name = "override_model"
-        config = '{"backend":"onnxruntime"}'
-        with open("models/onnx_int32_int32_int32/3/model.onnx", "rb") as file:
+        config = '{"backend":"tensorrt"}'
+        with open("models/plan_int32_int32_int32/3/model.plan", "rb") as file:
             data = file.read()
-        files = {"file:1/model.onnx": data}
+        files = {"file:1/model.plan": data}
 
         # Request to load the model with override file, should fail without
         # providing override config.
diff --git a/qa/python_models/bls_onnx_warmup/config.pbtxt b/qa/python_models/bls_onnx_warmup/config.pbtxt
index 879f85ca81..a549cc39e5 100644
--- a/qa/python_models/bls_onnx_warmup/config.pbtxt
+++ b/qa/python_models/bls_onnx_warmup/config.pbtxt
@@ -24,7 +24,6 @@
 # (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
 # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 
-name: "bls_onnx_warmup"
 backend: "python"
 
 output [