Improve TensorRT GetCapability to Enable More Models (#1012)

* Improve TensorRT GetCapability Accuracy * Update onnxruntime_providers.cmake * made changes based on feedback * update unit tests for TensorRT * update onnx-tensorrt submodule to v5.0 branch * remove uncessary comments * convert int32 to int64 at inferencing output * add more data types in compute * change returns in compute * use StatusCode as return in compute
microsoft · May 24, 2019 · 723d5c7 · 723d5c7
1 parent b44a30b
commit 723d5c7
Show file tree

Hide file tree

Showing 18 changed files with 462 additions and 291 deletions.
diff --git a/.gitmodules b/.gitmodules
@@ -28,6 +28,7 @@
 [submodule "cmake/external/onnx-tensorrt"]
 	path = cmake/external/onnx-tensorrt
 	url = https://github.com/onnx/onnx-tensorrt.git
+        branch = v5.0
 [submodule "cmake/external/eigen"]
 	path = cmake/external/eigen
 	url = https://github.com/eigenteam/eigen-git-mirror.git
diff --git a/cmake/external/onnx-tensorrt b/cmake/external/onnx-tensorrt
diff --git a/cmake/onnxruntime_providers.cmake b/cmake/onnxruntime_providers.cmake
@@ -123,7 +123,7 @@ if (onnxruntime_USE_TENSORRT)
   set(OLD_CMAKE_CXX_FLAGS ${CMAKE_CXX_FLAGS})
   if (WIN32)
     set(OLD_CMAKE_CUDA_FLAGS ${CMAKE_CUDA_FLAGS})
-    set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} /wd4996 /wd4244 /wd4267 /wd4099 /wd4551 /wd4505 /wd4515 /wd4706 /wd4456")
+    set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} /wd4996 /wd4244 /wd4267 /wd4099 /wd4551 /wd4505 /wd4515 /wd4706 /wd4456 /wd2220")
     if (CMAKE_BUILD_TYPE STREQUAL "Debug")
       set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} /wd4701 /wd4805")
     endif()

diff --git a/onnxruntime/core/providers/tensorrt/tensorrt_execution_provider.cc b/onnxruntime/core/providers/tensorrt/tensorrt_execution_provider.cc
diff --git a/onnxruntime/core/providers/tensorrt/tensorrt_execution_provider.h b/onnxruntime/core/providers/tensorrt/tensorrt_execution_provider.h
@@ -12,25 +12,22 @@
 namespace onnxruntime {
 
 class TensorrtLogger : public nvinfer1::ILogger {
-    nvinfer1::ILogger::Severity verbosity_;
-public:
-    TensorrtLogger(Severity verbosity=Severity::kWARNING)
-        : verbosity_(verbosity) {}
-    void log(Severity severity, const char* msg) override {
-        if( severity <= verbosity_ ) {
-            time_t rawtime = std::time(0);
-            char buf[256];
-            strftime(&buf[0], 256,
-                     "%Y-%m-%d %H:%M:%S",
-                     std::gmtime(&rawtime));
-            const char* sevstr = (severity == Severity::kINTERNAL_ERROR ? "    BUG" :
-                                  severity == Severity::kERROR          ? "  ERROR" :
-                                  severity == Severity::kWARNING        ? "WARNING" :
-                                  severity == Severity::kINFO           ? "   INFO" :
-                                  "UNKNOWN");
-            LOGS_DEFAULT(WARNING) << "[" << buf << " " << sevstr << "] " << msg;
-        }
+  nvinfer1::ILogger::Severity verbosity_;
+
+ public:
+  TensorrtLogger(Severity verbosity = Severity::kWARNING)
+      : verbosity_(verbosity) {}
+  void log(Severity severity, const char* msg) override {
+    if (severity <= verbosity_) {
+      time_t rawtime = std::time(0);
+      char buf[256];
+      strftime(&buf[0], 256,
+               "%Y-%m-%d %H:%M:%S",
+               std::gmtime(&rawtime));
+      const char* sevstr = (severity == Severity::kINTERNAL_ERROR ? "    BUG" : severity == Severity::kERROR ? "  ERROR" : severity == Severity::kWARNING ? "WARNING" : severity == Severity::kINFO ? "   INFO" : "UNKNOWN");
+      LOGS_DEFAULT(WARNING) << "[" << buf << " " << sevstr << "] " << msg;
     }
+  }
 };
 
 // Information needed to construct trt execution providers.
@@ -74,16 +71,17 @@ class TensorrtExecutionProvider : public IExecutionProvider {
   std::shared_ptr<KernelRegistry> GetKernelRegistry() const override;
 
   void SetMaxBatchSize(const int batch_size) {
-      max_batch_size_ = batch_size;
+    max_batch_size_ = batch_size;
   }
 
   void SetMaxWorkspaceSize(const size_t workspace_size) {
-      max_workspace_size_ = workspace_size;
+    max_workspace_size_ = workspace_size;
   }
-    
+
  private:
- int max_batch_size_ = 1;
- size_t max_workspace_size_ = 1 << 30; // 1GB
+  int max_batch_size_ = 1;
+  size_t max_workspace_size_ = 1 << 30;  // 1GB
+  int max_parser_iterations_ = 6;
 
   struct InferDeleter {
     template <typename T>
@@ -105,7 +103,20 @@ class TensorrtExecutionProvider : public IExecutionProvider {
   std::unordered_map<std::string, std::vector<std::vector<int>>> input_info_;
   std::unordered_map<std::string, std::vector<std::vector<int>>> output_info_;
   std::unordered_map<std::string, std::vector<std::vector<int64_t>>> output_shapes_;
+
+  /**Get IndexedSubGraph based on node list of the subgraph*/
+  std::unique_ptr<IndexedSubGraph> GetSubGraph(SubGraph_t graph_nodes_index, int& kernels_index,
+                                               const onnxruntime::GraphViewer& graph) const;
+
+  /**
+  Get TensorRT supported node lists by calling Onnx-TensorRT parser recursively. Since each time the parser
+  can only detect first unsupported node failure, it needs to wait for Onnxruntime to partition the graph
+  and then detect next failure again. If there are too many iterations, which means many nodes in the graph
+  are not supported by TensorRT, the process will be terminated and the whole graph is simply assigned to
+  other execution provider.
+  */
+  SubGraphCollection_t GetSupportedList(SubGraphCollection_t supported_nodes_list, int iterations, const int max_iterations,
+                                        const onnxruntime::GraphViewer& graph, bool* early_termination) const;
 };
 
 }  // namespace onnxruntime
-
diff --git a/onnxruntime/test/providers/cpu/math/element_wise_ops_test.cc b/onnxruntime/test/providers/cpu/math/element_wise_ops_test.cc
@@ -23,7 +23,7 @@ TEST(MathOpTest, Add_int64) {
   test.AddInput<int64_t>("A", {3}, {1, 2, 3});
   test.AddInput<int64_t>("B", {3}, {4, 5, 6});
   test.AddOutput<int64_t>("C", {3}, {5, 7, 9});
-  test.Run(OpTester::ExpectResult::kExpectSuccess, "", {kTensorrtExecutionProvider});  //TensorRT: INT64 is not supported
+  test.Run();
 }
 
 TEST(MathOpTest, Add) {
@@ -69,7 +69,7 @@ TEST(MathOpTest, Add_Broadcast_0x0) {
   test.AddInput<float>("A", {}, {10.0f});
   test.AddInput<float>("B", {}, {2.0f});
   test.AddOutput<float>("C", {}, {12.0f});
-  test.Run(OpTester::ExpectResult::kExpectSuccess, "", {kTensorrtExecutionProvider});  //TensorRT: dynamic shape is not supported
+  test.Run();
 }
 
 TEST(MathOpTest, Add_Broadcast_0x1) {
@@ -78,7 +78,7 @@ TEST(MathOpTest, Add_Broadcast_0x1) {
   test.AddInput<float>("A", {}, {10.0f});
   test.AddInput<float>("B", {1}, {2.0f});
   test.AddOutput<float>("C", {1}, {12.0f});
-  test.Run(OpTester::ExpectResult::kExpectSuccess, "", {kTensorrtExecutionProvider});  //TensorRT: dynamic shape is not supported
+  test.Run();
 }
 
 TEST(MathOpTest, Add_Broadcast_1x0) {
@@ -87,7 +87,7 @@ TEST(MathOpTest, Add_Broadcast_1x0) {
   test.AddInput<float>("A", {1}, {10.0f});
   test.AddInput<float>("B", {}, {2.0f});
   test.AddOutput<float>("C", {1}, {12.0f});
-  test.Run(OpTester::ExpectResult::kExpectSuccess, "", {kTensorrtExecutionProvider});  //TensorRT: dynamic shape is not supported
+  test.Run();
 }
 
 TEST(MathOpTest, Add_Broadcast_1x1) {
@@ -134,7 +134,7 @@ TEST(MathOpTest, Add_Broadcast_2x1x4_1x3x1) {
                          211.0f, 212.0f, 213.0f, 214.0f,
                          221.0f, 222.0f, 223.0f, 224.0f,
                          231.0f, 232.0f, 233.0f, 234.0f});
-  test.Run(OpTester::ExpectResult::kExpectSuccess, "", {kTensorrtExecutionProvider});  //Input batch size is inconsistent
+  test.Run(OpTester::ExpectResult::kExpectSuccess, "", {kTensorrtExecutionProvider});  //TensorRT: Input batch size is inconsistent
 }
 
 TEST(MathOpTest, Add_Broadcast_2x1x1_3x4) {
@@ -154,7 +154,7 @@ TEST(MathOpTest, Add_Broadcast_2x1x1_3x4) {
                          211.0f, 212.0f, 213.0f, 214.0f,
                          221.0f, 222.0f, 223.0f, 224.0f,
                          231.0f, 232.0f, 233.0f, 234.0f});
-  test.Run(OpTester::ExpectResult::kExpectSuccess, "", {kTensorrtExecutionProvider});  //Input batch size is inconsistent
+  test.Run(OpTester::ExpectResult::kExpectSuccess, "", {kTensorrtExecutionProvider});  //TensorRT: Input batch size is inconsistent
 }
 
 TEST(MathOpTest, Sub_int32) {
@@ -170,7 +170,7 @@ TEST(MathOpTest, Sub_int64) {
   test.AddInput<int64_t>("A", {3}, {1, 5, 6});
   test.AddInput<int64_t>("B", {3}, {4, 5, 3});
   test.AddOutput<int64_t>("C", {3}, {-3, 0, 3});
-  test.Run(OpTester::ExpectResult::kExpectSuccess, "", {kTensorrtExecutionProvider});  //TensorRT: INT64 is not supported
+  test.Run();
 }
 
 TEST(MathOpTest, Sub) {
@@ -203,7 +203,7 @@ TEST(MathOpTest, Sub_Broadcast_Scalar) {
                         {-4.0f, -3.0f, -6.0f,
                          -5.0f, -3.5f, -105.0f,
                          -10.4f, 4.3f, -10'005.0f});
-  test.Run(OpTester::ExpectResult::kExpectSuccess, "", {kTensorrtExecutionProvider});  //TensorRT: dynamic shape is not supported
+  test.Run();
 }
 
 TEST(MathOpTest, Mul_int32) {
@@ -219,7 +219,7 @@ TEST(MathOpTest, Mul_int64) {
   test.AddInput<int64_t>("A", {3}, {3, 6, -3});
   test.AddInput<int64_t>("B", {3}, {4, -3, -2});
   test.AddOutput<int64_t>("C", {3}, {12, -18, 6});
-  test.Run(OpTester::ExpectResult::kExpectSuccess, "", {kTensorrtExecutionProvider});  //TensorRT: INT64 is not supported
+  test.Run();
 }
 
 TEST(MathOpTest, Mul) {
@@ -253,7 +253,7 @@ TEST(MathOpTest, Div_int64) {
   test.AddInput<int64_t>("A", {3}, {4, 8, 8});
   test.AddInput<int64_t>("B", {3}, {2, 3, 4});
   test.AddOutput<int64_t>("C", {3}, {2, 2, 2});
-  test.Run(OpTester::ExpectResult::kExpectSuccess, "", {kTensorrtExecutionProvider});  //TensorRT: INT64 is not supported
+  test.Run();
 }
 
 TEST(MathOpTest, Div) {
@@ -284,7 +284,7 @@ TEST(MathOpTest, Abs_int8) {
   std::vector<int64_t> dims{4};
   test.AddInput<int8_t>("X", dims, {1, 2, -1, -5});
   test.AddOutput<int8_t>("Y", dims, {1, 2, 1, 5});
-  test.Run(OpTester::ExpectResult::kExpectSuccess, "", {kTensorrtExecutionProvider});
+  test.Run();
 }
 
 TEST(MathOpTest, Abs_int32) {
@@ -312,7 +312,7 @@ TEST(MathOpTest, Neg_int8) {
   std::vector<int64_t> dims{4};
   test.AddInput<int8_t>("X", dims, {1, -2, 0, -10});
   test.AddOutput<int8_t>("Y", dims, {-1, 2, 0, 10});
-  test.Run(OpTester::ExpectResult::kExpectSuccess, "", {kTensorrtExecutionProvider});
+  test.Run();
 }
 
 TEST(MathOpTest, Neg_int32) {
@@ -393,7 +393,7 @@ TEST(MathOpTest, Pow_Broadcast_Scalar0) {
   test.AddInput<float>("X", {}, {2.0f});
   test.AddInput<float>("Y", dims, {1.0f, 2.0f, 3.0f});
   test.AddOutput<float>("Z", dims, {2.0f, 4.0f, 8.0f});
-  test.Run(OpTester::ExpectResult::kExpectSuccess, "", {kTensorrtExecutionProvider});  //TensorRT: dynamic shape is not supported
+  test.Run();
 }
 
 TEST(MathOpTest, Pow_Broadcast_Scalar1) {
@@ -403,7 +403,7 @@ TEST(MathOpTest, Pow_Broadcast_Scalar1) {
   test.AddInput<float>("X", dims, {1.0f, 2.0f, 3.0f});
   test.AddInput<float>("Y", {}, {2.0f});
   test.AddOutput<float>("Z", dims, {1.0f, 4.0f, 9.0f});
-  test.Run(OpTester::ExpectResult::kExpectSuccess, "", {kTensorrtExecutionProvider});  //TensorRT: dynamic shape is not supported
+  test.Run();
 }
 
 TEST(MathOpTest, Exp) {
@@ -416,7 +416,7 @@ TEST(MathOpTest, Exp) {
                         {1.0f, std::exp(1.0f),
                          std::exp(2.0f), std::exp(10.0f)});
   test.SetOutputRelErr("Y", 1e-7f);
-  test.Run(OpTester::ExpectResult::kExpectSuccess, "", {kTensorrtExecutionProvider});
+  test.Run(OpTester::ExpectResult::kExpectSuccess, "", {kTensorrtExecutionProvider}); //TensorRT: result differs
 }
 
 TEST(MathOpTest, Log) {
@@ -470,7 +470,7 @@ TEST(MathOpTest, Sum_8_Test1) {
                          311.0f, 312.0f, 313.0f,
                          321.0f, 322.0f, 323.0f,
                          331.0f, 332.0f, 333.0f});
-  test.Run(OpTester::ExpectResult::kExpectSuccess, "", {kTensorrtExecutionProvider});  // TensorRT parser failed on this test
+  test.Run(OpTester::ExpectResult::kExpectSuccess, "", {kTensorrtExecutionProvider});  //TensorRT: Expected output shape [{3,3,3}] did not match run output shape [{3,1,1}] for sum
 }
 
 TEST(MathOpTest, Sum_8_Test2) {
@@ -499,7 +499,7 @@ TEST(MathOpTest, Sum_8_Test2) {
                          3.3f, 4.4f, -94.7f,
                          59.6f, 64.01f, -8.0f});
 
-  test.Run(OpTester::ExpectResult::kExpectSuccess, "Sum is not correct", {kTensorrtExecutionProvider});
+  test.Run(OpTester::ExpectResult::kExpectSuccess, "Sum is not correct", {kTensorrtExecutionProvider}); //TensorRT: result differs
 }
 
 TEST(MathOpTest, Min_6) {
@@ -582,7 +582,7 @@ TEST(MathOpTest, Max_8) {
                         {10.0f, 20.0f, 30.0f,
                          40.0f, 50.0f, 60.0f,
                          300.0f, 300.0f, 300.0f});
-  test.Run(OpTester::ExpectResult::kExpectSuccess, "", {kTensorrtExecutionProvider});  //Input batch size is inconsistent
+  test.Run(OpTester::ExpectResult::kExpectSuccess, "", {kTensorrtExecutionProvider});  //TensorRT: Input batch size is inconsistent
 }
 
 TEST(MathOpTest, Max_8_2inputbroadcast) {
@@ -597,7 +597,7 @@ TEST(MathOpTest, Max_8_2inputbroadcast) {
                         {10.0f, 20.0f, 30.0f,
                          40.0f, 50.0f, 60.0f,
                          70.0f, 80.0f, 90.0f});
-  test.Run(OpTester::ExpectResult::kExpectSuccess, "", {kTensorrtExecutionProvider});  //Input batch size is inconsistent
+  test.Run(OpTester::ExpectResult::kExpectSuccess, "", {kTensorrtExecutionProvider});  //TensorRT: Input batch size is inconsistent
 }
 
 TEST(MathOpTest, Not) {
@@ -773,7 +773,7 @@ TEST(MathOpTest, Mean_8) {
                         {12.0f / 3.0f, 22.0f / 3.0f, 32.0f / 3.0f,
                          43.0f / 3.0f, 53.0f / 3.0f, 63.0f / 3.0f,
                          74.0f / 3.0f, 84.0f / 3.0f, 94.0f / 3.0f});
-  test.Run(OpTester::ExpectResult::kExpectSuccess, "", {kTensorrtExecutionProvider});  //Input batch size is inconsistent
+  test.Run(OpTester::ExpectResult::kExpectSuccess, "", {kTensorrtExecutionProvider});  //TensorRT: Input batch size is inconsistent
 }
 
 #ifndef DISABLE_CONTRIB_OPS

diff --git a/onnxruntime/test/providers/cpu/math/gemm_test.cc b/onnxruntime/test/providers/cpu/math/gemm_test.cc
@@ -7,8 +7,6 @@
 namespace onnxruntime {
 namespace test {
 
-// Disable TensorRT on some of the tests because TensorRT only support FLOAT, INT8, FLOAT16 and INT32 for now
-
 TEST(GemmOpTest, GemmNoTrans) {
   OpTester test("Gemm");
 
@@ -25,7 +23,7 @@ TEST(GemmOpTest, GemmNoTrans) {
   test.AddOutput<float>("Y", {2, 3},
                         {11.0f, 11.0f, 11.0f,
                          -9.0f, -9.0f, -9.0f});
-  test.Run(OpTester::ExpectResult::kExpectSuccess, "", {kTensorrtExecutionProvider});
+  test.Run();
 }
 
 // Only CUDA kernel has float 16 support
@@ -58,7 +56,7 @@ TEST(GemmOpTest, GemmNoTrans_f16) {
   test.AddInput<MLFloat16>("B", {4, 3}, f_B);
   test.AddInput<MLFloat16>("C", {2, 3}, f_C);
   test.AddOutput<MLFloat16>("Y", {2, 3}, f_Y);
-  test.Run(OpTester::ExpectResult::kExpectSuccess, "", {kTensorrtExecutionProvider});
+  test.Run();
 }
 #endif
 
@@ -78,7 +76,7 @@ TEST(GemmOpTest, GemmBroadcast) {
   test.AddOutput<float>("Y", {2, 3},
                         {11.0f, 12.0f, 13.0f,
                          -9.0f, -8.0f, -7.0f});
-  test.Run(OpTester::ExpectResult::kExpectSuccess, "", {kTensorrtExecutionProvider});
+  test.Run();
 }
 
 TEST(GemmOpTest, GemmTrans) {
@@ -99,7 +97,7 @@ TEST(GemmOpTest, GemmTrans) {
   test.AddOutput<float>("Y", {2, 3},
                         {11.0f, 11.0f, 11.0f,
                          -9.0f, -9.0f, -9.0f});
-  test.Run(OpTester::ExpectResult::kExpectSuccess, "", {kTensorrtExecutionProvider});
+  test.Run();
 }
 
 TEST(GemmOpTest, GemmAlphaBeta) {
@@ -118,7 +116,7 @@ TEST(GemmOpTest, GemmAlphaBeta) {
   test.AddOutput<float>("Y", {2, 3},
                         {7.0f, 7.0f, 7.0f,
                          -3.0f, -3.0f, -3.0f});
-  test.Run(OpTester::ExpectResult::kExpectSuccess, "", {kTensorrtExecutionProvider});
+  test.Run();
 }
 
 TEST(GemmOpTest, GemmNaN) {
@@ -137,7 +135,7 @@ TEST(GemmOpTest, GemmNaN) {
   test.AddOutput<float>("Y", {2, 3},
                         {10.0f, 10.0f, 10.0f,
                          -10.0f, -10.0f, -10.0f});
-  test.Run(OpTester::ExpectResult::kExpectSuccess, "", {kTensorrtExecutionProvider});
+  test.Run();
 }
 
 TEST(GemmOpTest, GemmScalarBroadcast) {
@@ -156,7 +154,7 @@ TEST(GemmOpTest, GemmScalarBroadcast) {
   test.AddOutput<float>("Y", {2, 3},
                         {11.0f, 11.0f, 11.0f,
                          -9.0f, -9.0f, -9.0f});
-  test.Run(OpTester::ExpectResult::kExpectSuccess, "", {kTensorrtExecutionProvider});
+  test.Run();
 }
 
 TEST(MathOpTest, Gemm2DBroadcast) {
@@ -175,7 +173,7 @@ TEST(MathOpTest, Gemm2DBroadcast) {
   test.AddOutput<float>("Y", {2, 3},
                         {11.0f, 11.0f, 11.0f,
                          -8.0f, -8.0f, -8.0f});
-  test.Run(OpTester::ExpectResult::kExpectSuccess, "", {kTensorrtExecutionProvider});
+  test.Run();
 }
 
 TEST(GemmOpTest, GemmFalseBroadcast) {
@@ -194,7 +192,7 @@ TEST(GemmOpTest, GemmFalseBroadcast) {
   test.AddOutput<float>("Y", {2, 3},
                         {11.0f, 11.0f, 11.0f,
                          -8.0f, -8.0f, -8.0f});
-  test.Run(OpTester::ExpectResult::kExpectSuccess, "", {kTensorrtExecutionProvider});
+  test.Run();
 }
 
 TEST(GemmOpTest, GemmEmptyTensor) {
@@ -211,7 +209,7 @@ TEST(GemmOpTest, GemmEmptyTensor) {
   test.AddInput<float>("C", {3}, std::vector<float>(3, 1.0f));
   test.AddOutput<float>("Y", {0, 3},
                         {});
-  test.Run(OpTester::ExpectResult::kExpectSuccess, "", {kTensorrtExecutionProvider});
+  test.Run();
 }
 
 }  // namespace test
+139 −25		ModelImporter.cpp
+2 −3		NvOnnxParserTypedefs.h
+3 −2		PluginFactory.cpp
+44 −3		ShapedWeights.cpp
+2 −0		ShapedWeights.hpp
+9 −0		Status.hpp
+19 −11		builtin_op_importers.cpp
+16 −4		getSupportedAPITest.cpp
+38 −4		onnx2trt_utils.hpp