Update CI - Remove deprecated APIs from TensorRT backend (#7193)

* Update CI * Temporary TENSORRT_IMAGE * Plugin * Test * Update L0_cuda_graph * [FIXME] Modify later INT32 to INT64 * Testing * Undo datatype changes int64 * Skip caffe2plan * Undo TRT image name * Update CI * Update CI * Update CI * Update plugin CI * Update gen_qa_model_repository * Removing DIRECT_IO flag from reformat-free I/O * Update * Update copyright * Update CI * Remove caffe2plan.cc * Undo copyright * Fix pre-commit errors * Support INT64 datatype from shape tensors in test cases * Update comments --------- Co-authored-by: tanmayv25 <[email protected]>
triton-inference-server · May 29, 2024 · 42bc242 · 42bc242
1 parent facd1b5
commit 42bc242
Show file tree

Hide file tree

Showing 40 changed files with 1,009 additions and 1,574 deletions.
diff --git a/Dockerfile.QA b/Dockerfile.QA
@@ -154,11 +154,6 @@ RUN mkdir -p qa/pkgs && \
     cp python/triton*.whl qa/pkgs/. && \
     cp -rf python/test/. qa/L0_python_api/.
 
-# caffe2plan will not exist if the build was done without TensorRT enabled
-RUN if [ -f bin/caffe2plan ]; then \
-       cp bin/caffe2plan qa/common/.; \
-    fi
-
 RUN mkdir -p qa/L0_simple_ensemble/models/simple/1 && \
     cp docs/examples/model_repository/simple/1/model.graphdef \
         qa/L0_simple_ensemble/models/simple/1/. && \

diff --git a/qa/L0_cuda_graph/test.sh b/qa/L0_cuda_graph/test.sh
@@ -1,5 +1,5 @@
 #!/bin/bash
-# Copyright (c) 2020, NVIDIA CORPORATION. All rights reserved.
+# Copyright (c) 2020-2024, NVIDIA CORPORATION. All rights reserved.
 #
 # Redistribution and use in source and binary forms, with or without
 # modification, are permitted provided that the following conditions
@@ -205,8 +205,8 @@ fi
 set -e
 
 set +e
-if [ `grep -c "Context with profile default \[0\] is being executed for " $SERVER_LOG` != "2" ]; then
-    echo -e "\n***\n*** Failed. Expected only 2 execution without CUDA graph\n***"
+if [ `grep -c "Context with profile default \[0\] is being executed for " $SERVER_LOG` != "3" ]; then
+    echo -e "\n***\n*** Failed. Expected only 3 execution without CUDA graph\n***"
     RET=1
 fi
 
@@ -321,8 +321,8 @@ fi
 set -e
 
 set +e
-if [ `grep -c "Context with profile default \[0\] is launching CUDA graph " $SERVER_LOG` != "1" ]; then
-    echo -e "\n***\n*** Failed. Expected only one execution with CUDA graph\n***"
+if [ `grep -c "Context with profile default \[0\] is launching CUDA graph " $SERVER_LOG` != "0" ]; then
+    echo -e "\n***\n*** Failed. Expected 0 execution with CUDA graph\n***"
     RET=1
 fi
 

diff --git a/qa/L0_cuda_graph/trt_cuda_graph_test.py b/qa/L0_cuda_graph/trt_cuda_graph_test.py
@@ -1,6 +1,6 @@
 #!/usr/bin/env python3
 
-# Copyright 2020-2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+# Copyright 2020-2024, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
 #
 # Redistribution and use in source and binary forms, with or without
 # modification, are permitted provided that the following conditions
@@ -35,7 +35,7 @@
 import infer_util as iu
 import numpy as np
 import test_util as tu
-from tritonclientutils import *
+from tritonclient.utils import *
 
 
 class TrtCudaGraphTest(tu.TestResultCollector):

diff --git a/qa/L0_device_memory_tracker/test.sh b/qa/L0_device_memory_tracker/test.sh
@@ -1,5 +1,5 @@
 #!/bin/bash
-# Copyright 2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+# Copyright 2024, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
 #
 # Redistribution and use in source and binary forms, with or without
 # modification, are permitted provided that the following conditions
@@ -46,6 +46,7 @@ TEST_PY=test.py
 DATADIR=/data/inferenceserver/${REPO_VERSION}
 rm -f *.log
 
+TRTEXEC=/usr/src/tensorrt/bin/trtexec
 TEST_RESULT_FILE='test_results.txt'
 SERVER=/opt/tritonserver/bin/tritonserver
 SERVER_LOG="./server.log"
@@ -61,32 +62,66 @@ rm -rf models && mkdir models
 cp -r /data/inferenceserver/${REPO_VERSION}/onnx_model_store/* models/.
 rm -r models/*cpu
 
-# Convert to get TRT models against the system
-CAFFE2PLAN=../common/caffe2plan
 set +e
-mkdir -p models/vgg19_plan/1 && rm -f models/vgg19_plan/1/model.plan && \
-    $CAFFE2PLAN -b32 -n prob -o models/vgg19_plan/1/model.plan \
-                $DATADIR/caffe_models/vgg19.prototxt $DATADIR/caffe_models/vgg19.caffemodel
+
+# VGG19 plan
+rm -fr models/vgg19_plan && mkdir -p models/vgg19_plan/1 && \
+cp $DATADIR/qa_dynamic_batch_image_model_repository/vgg19_onnx/1/model.onnx models/vgg19_plan/ && \
+cp $DATADIR/qa_dynamic_batch_image_model_repository/vgg19_onnx/labels.txt models/vgg19_plan/
+
+$TRTEXEC --onnx=models/vgg19_plan/model.onnx --saveEngine=models/vgg19_plan/1/model.plan \
+         --minShapes=input:1x3x224x224 --optShapes=input:32x3x224x224 \
+         --maxShapes=input:32x3x224x224
+
 if [ $? -ne 0 ]; then
     echo -e "\n***\n*** Failed to generate vgg19 PLAN\n***"
     exit 1
 fi
 
-mkdir -p models/resnet50_plan/1 && rm -f models/resnet50_plan/1/model.plan && \
-    $CAFFE2PLAN -b32 -n prob -o models/resnet50_plan/1/model.plan \
-                $DATADIR/caffe_models/resnet50.prototxt $DATADIR/caffe_models/resnet50.caffemodel
+rm models/vgg19_plan/model.onnx
+cp $DATADIR/qa_dynamic_batch_image_model_repository/vgg19_onnx/config.pbtxt models/vgg19_plan/ && \
+sed -i "s/^name: .*/name: \"vgg19_plan\"/g" models/vgg19_plan/config.pbtxt && \
+sed -i 's/^platform: .*/platform: "tensorrt_plan"/g' models/vgg19_plan/config.pbtxt
+
+# Resnet50 plan
+rm -fr models/resnet50_plan && mkdir -p models/resnet50_plan/1 && \
+cp $DATADIR/qa_dynamic_batch_image_model_repository/resnet50_onnx/1/model.onnx models/resnet50_plan/ && \
+cp $DATADIR/qa_dynamic_batch_image_model_repository/resnet50_onnx/labels.txt models/resnet50_plan/
+
+$TRTEXEC --onnx=models/resnet50_plan/model.onnx --saveEngine=models/resnet50_plan/1/model.plan \
+         --minShapes=input:1x3x224x224 --optShapes=input:32x3x224x224 \
+         --maxShapes=input:32x3x224x224
+
 if [ $? -ne 0 ]; then
     echo -e "\n***\n*** Failed to generate resnet50 PLAN\n***"
     exit 1
 fi
 
-mkdir -p models/resnet152_plan/1 && rm -f models/resnet152_plan/1/model.plan && \
-    $CAFFE2PLAN -h -b32 -n prob -o models/resnet152_plan/1/model.plan \
-                $DATADIR/caffe_models/resnet152.prototxt $DATADIR/caffe_models/resnet152.caffemodel
+rm models/resnet50_plan/model.onnx
+cp $DATADIR/qa_dynamic_batch_image_model_repository/resnet50_onnx/config.pbtxt models/resnet50_plan/ && \
+sed -i "s/^name: .*/name: \"resnet50_plan\"/g" models/resnet50_plan/config.pbtxt && \
+sed -i 's/^platform: .*/platform: "tensorrt_plan"/g' models/resnet50_plan/config.pbtxt
+
+
+# Resnet152 plan
+rm -fr models/resnet152_plan && mkdir -p models/resnet152_plan/1 && \
+cp $DATADIR/qa_dynamic_batch_image_model_repository/resnet152_onnx/1/model.onnx models/resnet152_plan/ && \
+cp $DATADIR/qa_dynamic_batch_image_model_repository/resnet152_onnx/labels.txt models/resnet152_plan/
+
+$TRTEXEC --onnx=models/resnet152_plan/model.onnx --saveEngine=models/resnet152_plan/1/model.plan \
+         --minShapes=input:1x3x224x224 --optShapes=input:32x3x224x224 \
+         --maxShapes=input:32x3x224x224
+
 if [ $? -ne 0 ]; then
     echo -e "\n***\n*** Failed to generate resnet152 PLAN\n***"
     exit 1
 fi
+
+rm models/resnet152_plan/model.onnx
+cp $DATADIR/qa_dynamic_batch_image_model_repository/resnet152_onnx/config.pbtxt models/resnet152_plan/ && \
+sed -i "s/^name: .*/name: \"resnet152_plan\"/g" models/resnet152_plan/config.pbtxt && \
+sed -i 's/^platform: .*/platform: "tensorrt_plan"/g' models/resnet152_plan/config.pbtxt
+
 set -e
 
 # Set multiple instances on selected model to test instance-wise collection

diff --git a/qa/L0_memory_growth/test.sh b/qa/L0_memory_growth/test.sh
@@ -46,7 +46,7 @@ PERF_ANALYZER=../clients/perf_analyzer
 IMAGE=../images/vulture.jpeg
 
 # Models
-CAFFE2PLAN=../common/caffe2plan
+TRTEXEC=/usr/src/tensorrt/bin/trtexec
 DATADIR=/data/inferenceserver/${REPO_VERSION}
 
 # Server
@@ -103,24 +103,29 @@ export MAX_ALLOWED_ALLOC="100"
 mkdir -p models/
 cp -r $DATADIR/perf_model_store/resnet50* models/
 
-# Copy and prepare trt model
-cp -r $DATADIR/caffe_models/trt_model_store/resnet50_plan models/resnet50_fp16_plan
-mkdir -p models/resnet50_fp16_plan/1
-sed -i "s/^name:.*/name: \"resnet50_fp16_plan\"/" models/resnet50_fp16_plan/config.pbtxt
+# Create the TensorRT plan from ONNX model
+rm -fr models/resnet50_fp32_plan && mkdir -p models/resnet50_fp32_plan/1 && \
+cp $DATADIR/qa_dynamic_batch_image_model_repository/resnet50_onnx/1/model.onnx models/resnet50_fp32_plan/ && \
+cp $DATADIR/qa_dynamic_batch_image_model_repository/resnet50_onnx/labels.txt models/resnet50_fp32_plan/
 
 set +e
+# Build TRT engine
+$TRTEXEC --onnx=models/resnet50_fp32_plan/model.onnx --saveEngine=models/resnet50_fp32_plan/1/model.plan \
+         --minShapes=input:1x3x224x224 --optShapes=input:${STATIC_BATCH}x3x224x224 \
+         --maxShapes=input:${STATIC_BATCH}x3x224x224
 
-# Create the PLAN
-$CAFFE2PLAN -h -b ${STATIC_BATCH} \
-    -n prob -o models/resnet50_fp16_plan/1/model.plan \
-    $DATADIR/caffe_models/resnet50.prototxt $DATADIR/caffe_models/resnet50.caffemodel
 if [ $? -ne 0 ]; then
     echo -e "\n***\n*** Failed to generate resnet50 PLAN\n***"
     exit 1
 fi
 
 set -e
 
+rm models/resnet50_fp32_plan/model.onnx
+cp $DATADIR/qa_dynamic_batch_image_model_repository/resnet50_onnx/config.pbtxt models/resnet50_fp32_plan/ && \
+sed -i "s/^name: .*/name: \"resnet50_fp32_plan\"/g" models/resnet50_fp32_plan/config.pbtxt && \
+sed -i 's/^platform: .*/platform: "tensorrt_plan"/g' models/resnet50_fp32_plan/config.pbtxt
+
 RET=0
 
 for MODEL in $(ls models); do

diff --git a/qa/L0_model_config/autofill_noplatform/tensorrt/bad_input_dims/config.pbtxt b/qa/L0_model_config/autofill_noplatform/tensorrt/bad_input_dims/config.pbtxt
@@ -8,7 +8,7 @@ input [
   {
     name: "INPUT1"
     data_type: TYPE_FP32
-    dims: [ 16, 1 ]
+    dims: [ 7 ]
   }
 ]
 output [

diff --git a/qa/L0_model_config/autofill_noplatform/tensorrt/bad_input_dims/expected b/qa/L0_model_config/autofill_noplatform/tensorrt/bad_input_dims/expected
@@ -1 +1 @@
-model 'bad_input_dims', tensor 'INPUT1': the model expects 1 dimensions (shape \[16\]) but the model configuration specifies 2 dimensions (shape \[16,1\])
+model 'bad_input_dims', tensor 'INPUT1': the model expects 2 dimensions (shape \[-1,16\]) but the model configuration specifies 2 dimensions (an initial batch dimension because max_batch_size > 0 followed by the explicit tensor shape, making complete shape \[-1,7\])
diff --git a/qa/L0_model_config/autofill_noplatform/tensorrt/bad_input_shape/config.pbtxt b/qa/L0_model_config/autofill_noplatform/tensorrt/bad_input_shape/config.pbtxt
@@ -0,0 +1,26 @@
+
+max_batch_size: 8
+input [
+  {
+    name: "INPUT0"
+    data_type: TYPE_FP32
+    dims: [ 16]
+  },
+  {
+    name: "INPUT1"
+    data_type: TYPE_FP32
+    dims: [ 16, 1 ]
+  }
+]
+output [
+  {
+    name: "OUTPUT0"
+    data_type: TYPE_FP32
+    dims: [ 16 ]
+  },
+  {
+    name: "OUTPUT1"
+    data_type: TYPE_FP32
+    dims: [ 16 ]
+  }
+]
diff --git a/qa/L0_model_config/autofill_noplatform/tensorrt/bad_input_shape/expected b/qa/L0_model_config/autofill_noplatform/tensorrt/bad_input_shape/expected
@@ -0,0 +1 @@
+unable to autofill for 'bad_input_shape', model tensor configurations are contradicting each other in terms of whether batching is supported
diff --git a/qa/L0_model_config/autofill_noplatform/tensorrt/bad_output_dims/expected b/qa/L0_model_config/autofill_noplatform/tensorrt/bad_output_dims/expected
@@ -1 +1 @@
-model 'bad_output_dims', tensor 'OUTPUT1': the model expects 1 dimensions (shape \[16\]) but the model configuration specifies 1 dimensions (shape \[7\])
+model 'bad_output_dims', tensor 'OUTPUT1': the model expects 2 dimensions (shape \[-1,16\]) but the model configuration specifies 2 dimensions (an initial batch dimension because max_batch_size > 0 followed by the explicit tensor shape, making complete shape \[-1,7\])
diff --git a/qa/L0_model_config/autofill_noplatform/tensorrt/bad_output_shape/config.pbtxt b/qa/L0_model_config/autofill_noplatform/tensorrt/bad_output_shape/config.pbtxt
@@ -0,0 +1,25 @@
+max_batch_size: 8
+input [
+  {
+    name: "INPUT0"
+    data_type: TYPE_FP32
+    dims: [ 16 ]
+  },
+  {
+    name: "INPUT1"
+    data_type: TYPE_FP32
+    dims: [ 16 ]
+  }
+]
+output [
+  {
+    name: "OUTPUT0"
+    data_type: TYPE_FP32
+    dims: [ 16 ]
+  },
+  {
+    name: "OUTPUT1"
+    data_type: TYPE_FP32
+    dims: [ 16, 1]
+  }
+]
diff --git a/qa/L0_model_config/autofill_noplatform/tensorrt/bad_output_shape/expected b/qa/L0_model_config/autofill_noplatform/tensorrt/bad_output_shape/expected
@@ -0,0 +1 @@
+unable to autofill for 'bad_output_shape', model tensor configurations are contradicting each other in terms of whether batching is supported
diff --git a/qa/L0_model_config/autofill_noplatform/tensorrt/too_few_inputs/expected b/qa/L0_model_config/autofill_noplatform/tensorrt/too_few_inputs/expected
@@ -1 +1 @@
-expected configuration for input 'INPUT0' for too_few_inputs
+failed to specify the dimensions of all input tensors or values of all input shape tensors
diff --git a/qa/L0_model_config/autofill_noplatform_success/tensorrt/no_config_shape_tensor/expected b/qa/L0_model_config/autofill_noplatform_success/tensorrt/no_config_shape_tensor/expected
@@ -26,7 +26,7 @@ output {
 }
 output {
   name: "OUTPUT0"
-  data_type: TYPE_INT32
+  data_type: TYPE_INT64
   dims: 2
   is_shape_tensor: true
 }

diff --git a/qa/L0_model_config/test.sh b/qa/L0_model_config/test.sh
@@ -53,9 +53,11 @@ TRIALS="tensorflow_savedmodel tensorflow_graphdef tensorrt_plan onnxruntime_onnx
 # Copy fixed TensorRT plans into the test model repositories.
 for modelpath in \
         autofill_noplatform/tensorrt/bad_input_dims/1 \
+        autofill_noplatform/tensorrt/bad_input_shape/1 \
         autofill_noplatform/tensorrt/bad_input_type/1 \
         autofill_noplatform/tensorrt/bad_input_shape_tensor/1 \
         autofill_noplatform/tensorrt/bad_output_dims/1 \
+        autofill_noplatform/tensorrt/bad_output_shape/1 \
         autofill_noplatform/tensorrt/bad_output_type/1 \
         autofill_noplatform/tensorrt/bad_output_shape_tensor/1 \
         autofill_noplatform/tensorrt/too_few_inputs/1 \

diff --git a/qa/L0_perf_deeprecommender/run_test.sh b/qa/L0_perf_deeprecommender/run_test.sh
@@ -1,5 +1,5 @@
 #!/bin/bash
-# Copyright 2019-2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+# Copyright 2019-2024, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
 #
 # Redistribution and use in source and binary forms, with or without
 # modification, are permitted provided that the following conditions
@@ -143,9 +143,9 @@ for STATIC_BATCH in $STATIC_BATCH_SIZES; do
 done
 
 if (( $RET == 0 )); then
-    echo -e "\n***\n*** Test Passed\n***"
+    echo -e "\n***\n*** $FRAMEWORK Test Passed\n***"
 else
-    echo -e "\n***\n*** Test FAILED\n***"
+    echo -e "\n***\n*** $FRAMEWORK Test FAILED\n***"
 fi
 
 exit $RET
Original file line number	Diff line number	Diff line change
		@@ -1 +1 @@
		model 'bad_input_dims', tensor 'INPUT1': the model expects 1 dimensions (shape \[16\]) but the model configuration specifies 2 dimensions (shape \[16,1\])
		model 'bad_input_dims', tensor 'INPUT1': the model expects 2 dimensions (shape \[-1,16\]) but the model configuration specifies 2 dimensions (an initial batch dimension because max_batch_size > 0 followed by the explicit tensor shape, making complete shape \[-1,7\])
Original file line number	Diff line number	Diff line change
		@@ -0,0 +1 @@
		unable to autofill for 'bad_input_shape', model tensor configurations are contradicting each other in terms of whether batching is supported
Original file line number	Diff line number	Diff line change
		@@ -1 +1 @@
		model 'bad_output_dims', tensor 'OUTPUT1': the model expects 1 dimensions (shape \[16\]) but the model configuration specifies 1 dimensions (shape \[7\])
		model 'bad_output_dims', tensor 'OUTPUT1': the model expects 2 dimensions (shape \[-1,16\]) but the model configuration specifies 2 dimensions (an initial batch dimension because max_batch_size > 0 followed by the explicit tensor shape, making complete shape \[-1,7\])
Original file line number	Diff line number	Diff line change
		@@ -0,0 +1 @@
		unable to autofill for 'bad_output_shape', model tensor configurations are contradicting each other in terms of whether batching is supported
Original file line number	Diff line number	Diff line change
		@@ -1 +1 @@
		expected configuration for input 'INPUT0' for too_few_inputs
		failed to specify the dimensions of all input tensors or values of all input shape tensors