microsoft · chilo-ms · Dec 14, 2022 · Nov 4, 2022 · Nov 9, 2022 · Nov 14, 2022
diff --git a/cgmanifests/generate_cgmanifest.py b/cgmanifests/generate_cgmanifest.py
@@ -59,6 +59,7 @@ def add_github_dep(name, parsed_url):
         if dep not in git_deps:
             git_deps[dep] = name
     else:
+        # TODO: support urls like: https://github.com/onnx/onnx-tensorrt/archive/refs/tags/release/7.1.zip
         if len(segments) == 5:
             tag = PurePosixPath(segments[4]).stem
             if tag.endswith(".tar"):
@@ -72,6 +73,7 @@ def add_github_dep(name, parsed_url):
             return
         # Make a REST call to convert to tag to a git commit
         url = "https://api.github.com/repos/%s/%s/git/refs/tags/%s" % (org_name, repo_name, tag)
+        print("requesting %s ..." % url)
         res = requests.get(url, auth=(args.username, args.token))
         response_json = res.json()
         tag_object = response_json["object"]
@@ -148,7 +150,6 @@ def normalize_path_separators(path):
         "submodule",
         "foreach",
         "--quiet",
-        "--recursive",
         "'{}' '{}' $toplevel/$sm_path".format(
             normalize_path_separators(sys.executable),
             normalize_path_separators(os.path.join(SCRIPT_DIR, "print_submodule_info.py")),

diff --git a/cmake/deps.txt b/cmake/deps.txt
@@ -24,13 +24,13 @@ microsoft_wil;https://github.com/microsoft/wil/archive/5f4caba4e7a9017816e47becd
 mimalloc;https://github.com/microsoft/mimalloc/archive/refs/tags/v2.0.3.zip;e4f37b93b2da78a5816c2495603a4188d316214b
 mp11;https://github.com/boostorg/mp11/archive/refs/tags/boost-1.79.0.zip;c8f04e378535ededbe5af52c8f969d2dedbe73d5
 onnx;https://github.com/onnx/onnx/archive/5a5f8a5935762397aa68429b5493084ff970f774.zip;edc8e1338c02f3ab222f3d803a24e17608c13895
-#Branch name: 8.4-GA
-onnx_tensorrt;https://github.com/onnx/onnx-tensorrt/archive/87c7a70688fd98fb355b8976f41425b40e4fe52f.zip;b97d112d9d6efa180c9b94e05268f2ff3294a534
+#use the commit where it's several commits after 8.5-GA branch (https://github.com/onnx/onnx-tensorrt/commit/369d6676423c2a6dbf4a5665c4b5010240d99d3c)
+onnx_tensorrt;https://github.com/onnx/onnx-tensorrt/archive/369d6676423c2a6dbf4a5665c4b5010240d99d3c.zip;62119892edfb78689061790140c439b111491275
 protobuf;https://github.com/protocolbuffers/protobuf/archive/refs/tags/v3.18.3.zip;b95bf7e9de9c2249b6c1f2ca556ace49999e90bd
 psimd;https://github.com/Maratyszcza/psimd/archive/072586a71b55b7f8c584153d223e95687148a900.zip;1f5454b01f06f9656b77e4a5e2e31d7422487013
 pthreadpool;https://github.com/Maratyszcza/pthreadpool/archive/1787867f6183f056420e532eec640cba25efafea.zip;e43e80781560c5ab404a4da20f34d846f5f5d101
 pybind11;https://github.com/pybind/pybind11/archive/refs/tags/v2.10.1.zip;769b6aa67a77f17a770960f604b727645b6f6a13
 pytorch_cpuinfo;https://github.com/pytorch/cpuinfo/archive/5916273f79a21551890fd3d56fc5375a78d1598d.zip;2be4d2ae321fada97cb39eaf4eeba5f8c85597cf
 re2;https://github.com/google/re2/archive/refs/tags/2022-06-01.zip;aa77313b76e91b531ee7f3e45f004c6a502a5374
 safeint;https://github.com/dcleblanc/SafeInt/archive/ff15c6ada150a5018c5ef2172401cb4529eac9c0.zip;913a4046e5274d329af2806cb53194f617d8c0ab
-tensorboard;https://github.com/tensorflow/tensorboard/archive/373eb09e4c5d2b3cc2493f0949dc4be6b6a45e81.zip;67b833913605a4f3f499894ab11528a702c2b381
+tensorboard;https://github.com/tensorflow/tensorboard/archive/373eb09e4c5d2b3cc2493f0949dc4be6b6a45e81.zip;67b833913605a4f3f499894ab11528a702c2b381
diff --git a/cmake/onnxruntime_providers.cmake b/cmake/onnxruntime_providers.cmake
@@ -635,10 +635,12 @@ if (onnxruntime_USE_TENSORRT)
     FetchContent_Declare(
       onnx_tensorrt
       URL ${DEP_URL_onnx_tensorrt}
-	    URL_HASH SHA1=${DEP_SHA1_onnx_tensorrt}
+      URL_HASH SHA1=${DEP_SHA1_onnx_tensorrt}
     )
-    FetchContent_MakeAvailable(onnx_tensorrt)
-	include_directories(${onnx_tensorrt_SOURCE_DIR})
+    # The onnx_tensorrt repo contains a test program, getSupportedAPITest, which doesn't support Windows. It uses
+    # unistd.h. So we must exclude it from our build. onnxruntime_fetchcontent_makeavailable is for the purpose.
+    onnxruntime_fetchcontent_makeavailable(onnx_tensorrt)
+    include_directories(${onnx_tensorrt_SOURCE_DIR})
     set(CMAKE_CXX_FLAGS ${OLD_CMAKE_CXX_FLAGS})
     if ( CMAKE_COMPILER_IS_GNUCC )
       set(CMAKE_CXX_FLAGS  "${CMAKE_CXX_FLAGS} -Wno-unused-parameter")
@@ -1479,7 +1481,7 @@ if (onnxruntime_USE_TVM)
     # wd4100: identifier' : unreferenced formal parameter
     # wd4127: conditional expression is constant
     # wd4244: conversion from 'int' to 'char', possible loss of data
-	# TODO: 4244 should not be disabled
+    # TODO: 4244 should not be disabled
     target_compile_options(onnxruntime_providers_tvm PRIVATE "/wd4100" "/wd4127" "/wd4244")
   else()
     target_compile_options(onnxruntime_providers_tvm PRIVATE "-Wno-error=type-limits")

diff --git a/cmake/onnxruntime_unittests.cmake b/cmake/onnxruntime_unittests.cmake
@@ -708,13 +708,21 @@ endif()
 
 set(test_all_args)
 if (onnxruntime_USE_TENSORRT)
-    # TRT EP CI takes much longer time when updating to TRT 8.2
-    # So, we only run trt ep and exclude other eps to reduce CI test time.
-    #
-    # The test names of model tests were using sequential number in the past.
-    # This PR https://github.com/microsoft/onnxruntime/pull/10220 (Please see ExpandModelName function in model_tests.cc for more details)
-    # made test name contain the "ep" and "model path" information, so we can easily filter the tests using cuda ep or other ep with *cpu__* or *xxx__*.
-    list(APPEND test_all_args "--gtest_filter=-*cpu__*:*cuda__*" )
+    if (onnxruntime_SKIP_AND_PERFORM_FILTERED_TENSORRT_TESTS)
+       # TRT EP package pipelines takes much longer time to run tests with TRT 8.5. We can't use placeholder to reduce testing time due to application test deadlock. 
+       # Therefore we only run filtered TRT EP tests.
+      list(APPEND test_all_args "--gtest_filter=*tensorrt_*:*TensorrtExecutionProviderTest*" )
+      #list(APPEND test_all_args "--gtest_filter=-*cpu_*:*cuda_*:*ContribOpTest*:*QuantGemmTest*:*QLinearConvTest*:*MurmurHash3OpTest*:*PadOpTest*:*QLinearConvTest*" )
+    else()
+      # TRT EP CI takes much longer time when updating to TRT 8.2
+      # So, we only run trt ep and exclude other eps to reduce CI test time.
+      #
+      # The test names of model tests were using sequential number in the past.
+      # This PR https://github.com/microsoft/onnxruntime/pull/10220 (Please see ExpandModelName function in model_tests.cc for more details)
+      # made test name contain the "ep" and "model path" information, so we can easily filter the tests using cuda ep or other ep with *cpu_* or *xxx_*.
+      list(APPEND test_all_args "--gtest_filter=-*cpu_*:*cuda_*" )
+    endif()
+
 endif ()
 
 AddTest(

diff --git a/onnxruntime/test/contrib_ops/quantize_ops_test.cc b/onnxruntime/test/contrib_ops/quantize_ops_test.cc
@@ -203,7 +203,7 @@ void TestQuantizeLinearPerTensorFloatUint8(bool use_initializer_except_x) {
                            255, 0,
                            255, 0,
                            255, 0});
-  test.Run();
+  test.Run(OpTester::ExpectResult::kExpectSuccess, "", {kTensorrtExecutionProvider}); //TensorRT doesn't support support UINT8 for quantization
 }
 
 TEST(QuantizeLinearContribOpTest, QuantizeLinear_per_tensor_float_uint8) {
@@ -270,7 +270,7 @@ TEST(QuantizeLinearContribOpTest, QuantizeLinear_per_tensor_half_uint8) {
                            255, 0,
                            255, 0,
                            255, 0});
-  test.Run();
+  test.Run(OpTester::ExpectResult::kExpectSuccess, "", {kTensorrtExecutionProvider}); //TensorRT doesn't support support UINT8 for quantization
 }
 
 TEST(QuantizeLinearContribOpTest, QuantizeLinear_per_tensor_half_int8) {
@@ -317,7 +317,7 @@ TEST(QuantizeLinearContribOpTest, QuantizeLinear_per_channel) {
                           {0, 2, 3, 255,
                            0, 1, 2, 255,
                            0, 0, 1, 250});
-  test.Run();
+  test.Run(OpTester::ExpectResult::kExpectSuccess, "", {kTensorrtExecutionProvider}); //TensorRT doesn't support support UINT8 for quantization
 }
 
 // quantize with broadcasting and negative axis (-2 resolves to axis 0)
@@ -335,7 +335,7 @@ TEST(QuantizeLinearContribOpTest, QuantizeLinear_per_channel_negative_axis) {
                           {0, 2, 3, 255,
                            0, 1, 2, 255,
                            0, 0, 1, 250});
-  test.Run();
+  test.Run(OpTester::ExpectResult::kExpectSuccess, "", {kTensorrtExecutionProvider}); //TensorRT doesn't support support UINT8 for quantization
 }
 }  // namespace test
 }  // namespace onnxruntime
diff --git a/onnxruntime/test/contrib_ops/tensor_op_test.cc b/onnxruntime/test/contrib_ops/tensor_op_test.cc
@@ -120,7 +120,7 @@ void MeanVarianceNormalizationAcrossChannels(bool across_channels, bool normaliz
   test.AddAttribute("normalize_variance", normalize_variance ? one : zero);
   test.AddInput<float>("input", {N, C, H, W}, X);
   test.AddOutput<float>("output", {N, C, H, W}, result);
-  test.Run(OpTester::ExpectResult::kExpectSuccess, "", {kOpenVINOExecutionProvider}); //OpenVINO doesn't support MVN operator below opset 9
+  test.Run(OpTester::ExpectResult::kExpectSuccess, "", {kOpenVINOExecutionProvider, kTensorrtExecutionProvider}); //OpenVINO doesn't support MVN operator below opset 9. TensorRT doesn't support opset 8 of MVN operator.
 }
 
 void MeanVarianceNormalizationPerChannel(bool across_channels, bool normalize_variance) {
@@ -187,7 +187,7 @@ void MeanVarianceNormalizationPerChannel(bool across_channels, bool normalize_va
   test.AddAttribute("normalize_variance", normalize_variance ? one : zero);
   test.AddInput<float>("input", {N, C, H, W}, X);
   test.AddOutput<float>("output", {N, C, H, W}, result);
-  test.Run(OpTester::ExpectResult::kExpectSuccess, "", {kOpenVINOExecutionProvider}); //OpenVINO doesn't support MVN operator below opset 9
+  test.Run(OpTester::ExpectResult::kExpectSuccess, "", {kOpenVINOExecutionProvider, kTensorrtExecutionProvider}); //OpenVINO doesn't support MVN operator below opset 9. TensorRT doesn't support opset 8 of MVN operator.
 }
 
 TEST(MVNContribOpTest, MeanVarianceNormalizationCPUTest_Version1_TO_8) {

diff --git a/onnxruntime/test/providers/cpu/generator/random_test.cc b/onnxruntime/test/providers/cpu/generator/random_test.cc
@@ -71,7 +71,8 @@ void RunRandomNormalLike3DFloat(bool infer_dtype = false) {
 
   test.AddOutput<float>("Y", dims, expected_output);
 
-  test.Run(OpTester::ExpectResult::kExpectSuccess, "", {kCudaExecutionProvider, kRocmExecutionProvider});
+  // TensorRT does not support manual seed overrides and there will be result mismatch
+  test.Run(OpTester::ExpectResult::kExpectSuccess, "", {kCudaExecutionProvider, kRocmExecutionProvider, kTensorrtExecutionProvider});
 }
 
 TEST(Random, RandomNormalLike3DDouble) {

diff --git a/onnxruntime/test/providers/cpu/math/element_wise_ops_test.cc b/onnxruntime/test/providers/cpu/math/element_wise_ops_test.cc
@@ -2883,7 +2883,7 @@ TEST(ModOpTest, Int8_mixed_sign) {
   test.AddInput<int8_t>("Y", {6}, {2, -3, 8, -2, 3, 5});
   test.AddOutput<int8_t>("Z", {6}, {0, -2, 5, 0, 2, 3});
 
-  test.Run();
+  test.Run(OpTester::ExpectResult::kExpectSuccess, "", {kTensorrtExecutionProvider}); // For TensorRT running in these in INT8 quantization scales are needed, so skip it now
 }
 
 TEST(ModOpTest, Int8_mixed_sign_fmod) {
@@ -2894,7 +2894,7 @@ TEST(ModOpTest, Int8_mixed_sign_fmod) {
   test.AddInput<int8_t>("Y", {6}, {2, -3, 8, -2, 3, 5});
   test.AddOutput<int8_t>("Z", {6}, {0, 1, 5, 0, -1, 3});
 
-  test.Run();
+  test.Run(OpTester::ExpectResult::kExpectSuccess, "", {kTensorrtExecutionProvider}); // For TensorRT running in these in INT8 quantization scales are needed, so skip it now
 }
 
 TEST(ModOpTest, UInt8_mod) {

diff --git a/onnxruntime/test/providers/cpu/nn/shrink_test.cc b/onnxruntime/test/providers/cpu/nn/shrink_test.cc
@@ -97,7 +97,7 @@ const std::vector<MLFloat16> ConvertFloatToMLFloat16(const std::vector<float>& f
 
 TEST(MathOpTest, ShrinkInt8Type) {
   const auto& test_cases = GenerateSignedTestCases<int8_t>();
-  RunShrinkTest<int8_t>(test_cases);
+  RunShrinkTest<int8_t>(test_cases, {kTensorrtExecutionProvider}); // For TensorRT running in these in INT8 quantization scales are needed, so skip it now
 }
 
 TEST(MathOpTest, ShrinkUint8Type) {

diff --git a/onnxruntime/test/providers/cpu/tensor/quantize_linear_test.cc b/onnxruntime/test/providers/cpu/tensor/quantize_linear_test.cc
@@ -228,7 +228,7 @@ TEST(QuantizeLinearOpTest, Uint8) {
   test.AddInput<float>("y_scale", {}, {2.0f});
   test.AddInput<uint8_t>("y_zero_point", {}, {128});
   test.AddOutput<uint8_t>("y", dims, {128, 129, 130, 255, 1, 0});
-  test.Run();
+  test.Run(OpTester::ExpectResult::kExpectSuccess, "", {kTensorrtExecutionProvider}); //TensorRT doesn't support support UINT8 for quantization
 }
 
 // quantize with scalar zero point and scale
@@ -296,7 +296,7 @@ TEST(QuantizeLinearOpTest, 2D) {
                           {0, 0, 1, 250,
                            0, 0, 1, 250,
                            0, 0, 1, 250});
-  test.Run();
+  test.Run(OpTester::ExpectResult::kExpectSuccess, "", {kTensorrtExecutionProvider}); //TensorRT doesn't support support UINT8 for quantization
 }
 
 // quantize with scalar data
@@ -306,7 +306,7 @@ TEST(QuantizeLinearOpTest, Scalar) {
   test.AddInput<float>("y_scale", {}, {2.0f});
   test.AddInput<uint8_t>("y_zero_point", {}, {128});
   test.AddOutput<uint8_t>("y", {}, {130});
-  test.Run();
+  test.Run(OpTester::ExpectResult::kExpectSuccess, "", {kTensorrtExecutionProvider}); //TensorRT doesn't support support UINT8 for quantization
 }
 
 // quantize with scalar data
@@ -315,7 +315,7 @@ TEST(QuantizeLinearOpTest, DISABLED_QuantizeLinear_Without_Zero_Point) {
   test.AddInput<float>("x", {}, {3});
   test.AddInput<float>("y_scale", {}, {2.0f});
   test.AddOutput<uint8_t>("y", {}, {2});
-  test.Run();
+  test.Run(OpTester::ExpectResult::kExpectSuccess, "", {kTensorrtExecutionProvider}); //TensorRT doesn't support support UINT8 for quantization
 }
 
 TEST(QuantizeLinearOpTest, Per_Channel_Axis_Default) {
@@ -331,7 +331,7 @@ TEST(QuantizeLinearOpTest, Per_Channel_Axis_Default) {
                           {64, 101, 127, 177,
                            65, 100, 128, 182,
                            66, 102, 128, 187});
-  test.Run();
+  test.Run(OpTester::ExpectResult::kExpectSuccess, "", {kTensorrtExecutionProvider}); //TensorRT doesn't support support UINT8 for quantization
 }
 
 TEST(QuantizeLinearOpTest, Per_Channel_Axis_0) {
@@ -348,7 +348,7 @@ TEST(QuantizeLinearOpTest, Per_Channel_Axis_0) {
                           {0, 2, 3, 255,
                            0, 1, 2, 255,
                            0, 0, 1, 250});
-  test.Run();
+  test.Run(OpTester::ExpectResult::kExpectSuccess, "", {kTensorrtExecutionProvider}); //TensorRT doesn't support support UINT8 for quantization
 }
 
 // quantize with per-channel and negative axis (-2 resolves to axis 0)
@@ -366,7 +366,7 @@ TEST(QuantizeLinearOpTest, Per_Channel_Axis_neg) {
                           {0, 2, 3, 255,
                            0, 1, 2, 255,
                            0, 0, 1, 250});
-  test.Run();
+  test.Run(OpTester::ExpectResult::kExpectSuccess, "", {kTensorrtExecutionProvider}); //TensorRT doesn't support support UINT8 for quantization
 }
 
 }  // namespace test

diff --git a/tools/ci_build/build.py b/tools/ci_build/build.py
@@ -491,6 +491,11 @@ def convert_arg_line_to_args(self, arg_line):
         "--tensorrt_placeholder_builder", action="store_true", help="Instantiate Placeholder TensorRT Builder"
     )
     parser.add_argument("--tensorrt_home", help="Path to TensorRT installation dir")
+    parser.add_argument(
+        "--skip_and_perform_filtered_tensorrt_tests",
+        action="store_true",
+        help="Skip time-consuming and only perform filtered tests for TensorRT EP",
+    )
     parser.add_argument("--use_migraphx", action="store_true", help="Build with MIGraphX")
     parser.add_argument("--migraphx_home", help="Path to MIGraphX installation dir")
     parser.add_argument("--use_full_protobuf", action="store_true", help="Use the full protobuf library")
@@ -876,6 +881,8 @@ def generate_build_tree(
         "-Donnxruntime_ENABLE_MICROSOFT_INTERNAL=" + ("ON" if args.enable_msinternal else "OFF"),
         "-Donnxruntime_USE_VITISAI=" + ("ON" if args.use_vitisai else "OFF"),
         "-Donnxruntime_USE_TENSORRT=" + ("ON" if args.use_tensorrt else "OFF"),
+        "-Donnxruntime_SKIP_AND_PERFORM_FILTERED_TENSORRT_TESTS="
+        + ("ON" if args.skip_and_perform_filtered_tensorrt_tests else "OFF"),
         "-Donnxruntime_USE_TENSORRT_BUILTIN_PARSER=" + ("ON" if args.use_tensorrt_builtin_parser else "OFF"),
         "-Donnxruntime_TENSORRT_PLACEHOLDER_BUILDER=" + ("ON" if args.tensorrt_placeholder_builder else "OFF"),
         # set vars for TVM

diff --git a/tools/ci_build/github/azure-pipelines/c-api-noopenmp-packaging-pipelines.yml b/tools/ci_build/github/azure-pipelines/c-api-noopenmp-packaging-pipelines.yml
@@ -116,7 +116,7 @@ jobs:
     buildArch: x64
     msbuildPlatform: x64
     packageName: x64-tensorrt
-    buildparameter: --use_tensorrt --tensorrt_home="C:\local\TensorRT-8.4.1.5.Windows10.x86_64.cuda-11.6.cudnn8.4" --cuda_version=11.6 --cuda_home="C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v11.6"  --enable_onnx_tests --enable_wcos --build_java --cmake_extra_defines "CMAKE_CUDA_ARCHITECTURES=37;52;60;61;70;75;80"
+    buildparameter: --use_tensorrt --skip_and_perform_filtered_tensorrt_tests --tensorrt_home="C:\local\TensorRT-8.5.1.7.Windows10.x86_64.cuda-11.8.cudnn8.6" --cuda_version=11.6 --cuda_home="C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v11.6"  --enable_onnx_tests --enable_wcos --build_java --cmake_extra_defines "CMAKE_CUDA_ARCHITECTURES=37;52;60;61;70;75;80"
     runTests: ${{ parameters.RunOnnxRuntimeTests }}
     buildJava: true
     java_artifact_id: onnxruntime_gpu
@@ -294,11 +294,11 @@ jobs:
       Steps:
       - script: |
           tools/ci_build/get_docker_image.py \
-            --dockerfile tools/ci_build/github/linux/docker/Dockerfile.manylinux2014_cuda11_6_tensorrt8_4 \
+            --dockerfile tools/ci_build/github/linux/docker/Dockerfile.manylinux2014_cuda11_6_tensorrt8_5 \
             --context tools/ci_build/github/linux/docker \
             --docker-build-args "--network=host --build-arg POLICY=manylinux2014 --build-arg PLATFORM=x86_64 --build-arg DEVTOOLSET_ROOTPATH=/opt/rh/devtoolset-11/root --build-arg PREPEND_PATH=/opt/rh/devtoolset-11/root/usr/bin: --build-arg LD_LIBRARY_PATH_ARG=/opt/rh/devtoolset-11/root/usr/lib64:/opt/rh/devtoolset-11/root/usr/lib:/opt/rh/devtoolset-11/root/usr/lib64/dyninst:/opt/rh/devtoolset-11/root/usr/lib/dyninst:/usr/local/lib64 --build-arg BUILD_UID=$( id -u )" \
             --container-registry onnxruntimebuildcache \
-            --repository onnxruntimecuda116xtrt84build
+            --repository onnxruntimecuda116xtrt85build
         displayName: "Getonnxruntimecuda116xtrt84build image for tools/ci_build/github/linux/docker/Dockerfile.manylinux2014_cuda11_6_tensorrt8_4"
         workingDirectory: $(Build.SourcesDirectory)/onnxruntime
       ContainerRegistry: onnxruntimebuildcache
@@ -351,7 +351,7 @@ jobs:
     inputs:
       script: |
         docker run --gpus all -e CC=/opt/rh/devtoolset-11/root/usr/bin/cc -e CXX=/opt/rh/devtoolset-11/root/usr/bin/c++ -e CFLAGS="-Wp,-D_FORTIFY_SOURCE=2 -Wp,-D_GLIBCXX_ASSERTIONS -fstack-protector-strong -fstack-clash-protection -fcf-protection -O3 -Wl,--strip-all" -e CXXFLAGS="-Wp,-D_FORTIFY_SOURCE=2 -Wp,-D_GLIBCXX_ASSERTIONS -fstack-protector-strong -fstack-clash-protection -fcf-protection -O3 -Wl,--strip-all" -e NVIDIA_VISIBLE_DEVICES=all --rm --volume $(Build.SourcesDirectory):/src_dir \
-        --volume $(Build.ArtifactStagingDirectory):/artifact_src -e NIGHTLY_BUILD onnxruntimecuda116xtrt84build \
+        --volume $(Build.ArtifactStagingDirectory):/artifact_src -e NIGHTLY_BUILD onnxruntimecuda116xtrt85build \
         /src_dir/onnxruntime-inference-examples/c_cxx/squeezenet/run_capi_application.sh -o /src_dir/onnxruntime -p /artifact_src/onnxruntime-linux-x64-gpu-$(OnnxRuntimeVersion).tgz -w /src_dir/onnxruntime-inference-examples/c_cxx/squeezenet
       workingDirectory: '$(Build.ArtifactStagingDirectory)'