Skip to content

Commit

Permalink
Improve TensorRT GetCapability to Enable More Models (#1012)
Browse files Browse the repository at this point in the history
* Improve TensorRT GetCapability Accuracy

* Update onnxruntime_providers.cmake

* made changes based on feedback

* update unit tests for TensorRT

* update onnx-tensorrt submodule to v5.0 branch

* remove uncessary comments

* convert int32 to int64 at inferencing output

* add more data types in compute

* change returns in compute

* use StatusCode as return in compute
  • Loading branch information
stevenlix authored May 24, 2019
1 parent b44a30b commit 723d5c7
Show file tree
Hide file tree
Showing 18 changed files with 462 additions and 291 deletions.
1 change: 1 addition & 0 deletions .gitmodules
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,7 @@
[submodule "cmake/external/onnx-tensorrt"]
path = cmake/external/onnx-tensorrt
url = https://github.com/onnx/onnx-tensorrt.git
branch = v5.0
[submodule "cmake/external/eigen"]
path = cmake/external/eigen
url = https://github.com/eigenteam/eigen-git-mirror.git
2 changes: 1 addition & 1 deletion cmake/onnxruntime_providers.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -123,7 +123,7 @@ if (onnxruntime_USE_TENSORRT)
set(OLD_CMAKE_CXX_FLAGS ${CMAKE_CXX_FLAGS})
if (WIN32)
set(OLD_CMAKE_CUDA_FLAGS ${CMAKE_CUDA_FLAGS})
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} /wd4996 /wd4244 /wd4267 /wd4099 /wd4551 /wd4505 /wd4515 /wd4706 /wd4456")
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} /wd4996 /wd4244 /wd4267 /wd4099 /wd4551 /wd4505 /wd4515 /wd4706 /wd4456 /wd2220")
if (CMAKE_BUILD_TYPE STREQUAL "Debug")
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} /wd4701 /wd4805")
endif()
Expand Down
367 changes: 256 additions & 111 deletions onnxruntime/core/providers/tensorrt/tensorrt_execution_provider.cc

Large diffs are not rendered by default.

59 changes: 35 additions & 24 deletions onnxruntime/core/providers/tensorrt/tensorrt_execution_provider.h
Original file line number Diff line number Diff line change
Expand Up @@ -12,25 +12,22 @@
namespace onnxruntime {

class TensorrtLogger : public nvinfer1::ILogger {
nvinfer1::ILogger::Severity verbosity_;
public:
TensorrtLogger(Severity verbosity=Severity::kWARNING)
: verbosity_(verbosity) {}
void log(Severity severity, const char* msg) override {
if( severity <= verbosity_ ) {
time_t rawtime = std::time(0);
char buf[256];
strftime(&buf[0], 256,
"%Y-%m-%d %H:%M:%S",
std::gmtime(&rawtime));
const char* sevstr = (severity == Severity::kINTERNAL_ERROR ? " BUG" :
severity == Severity::kERROR ? " ERROR" :
severity == Severity::kWARNING ? "WARNING" :
severity == Severity::kINFO ? " INFO" :
"UNKNOWN");
LOGS_DEFAULT(WARNING) << "[" << buf << " " << sevstr << "] " << msg;
}
nvinfer1::ILogger::Severity verbosity_;

public:
TensorrtLogger(Severity verbosity = Severity::kWARNING)
: verbosity_(verbosity) {}
void log(Severity severity, const char* msg) override {
if (severity <= verbosity_) {
time_t rawtime = std::time(0);
char buf[256];
strftime(&buf[0], 256,
"%Y-%m-%d %H:%M:%S",
std::gmtime(&rawtime));
const char* sevstr = (severity == Severity::kINTERNAL_ERROR ? " BUG" : severity == Severity::kERROR ? " ERROR" : severity == Severity::kWARNING ? "WARNING" : severity == Severity::kINFO ? " INFO" : "UNKNOWN");
LOGS_DEFAULT(WARNING) << "[" << buf << " " << sevstr << "] " << msg;
}
}
};

// Information needed to construct trt execution providers.
Expand Down Expand Up @@ -74,16 +71,17 @@ class TensorrtExecutionProvider : public IExecutionProvider {
std::shared_ptr<KernelRegistry> GetKernelRegistry() const override;

void SetMaxBatchSize(const int batch_size) {
max_batch_size_ = batch_size;
max_batch_size_ = batch_size;
}

void SetMaxWorkspaceSize(const size_t workspace_size) {
max_workspace_size_ = workspace_size;
max_workspace_size_ = workspace_size;
}

private:
int max_batch_size_ = 1;
size_t max_workspace_size_ = 1 << 30; // 1GB
int max_batch_size_ = 1;
size_t max_workspace_size_ = 1 << 30; // 1GB
int max_parser_iterations_ = 6;

struct InferDeleter {
template <typename T>
Expand All @@ -105,7 +103,20 @@ class TensorrtExecutionProvider : public IExecutionProvider {
std::unordered_map<std::string, std::vector<std::vector<int>>> input_info_;
std::unordered_map<std::string, std::vector<std::vector<int>>> output_info_;
std::unordered_map<std::string, std::vector<std::vector<int64_t>>> output_shapes_;

/**Get IndexedSubGraph based on node list of the subgraph*/
std::unique_ptr<IndexedSubGraph> GetSubGraph(SubGraph_t graph_nodes_index, int& kernels_index,
const onnxruntime::GraphViewer& graph) const;

/**
Get TensorRT supported node lists by calling Onnx-TensorRT parser recursively. Since each time the parser
can only detect first unsupported node failure, it needs to wait for Onnxruntime to partition the graph
and then detect next failure again. If there are too many iterations, which means many nodes in the graph
are not supported by TensorRT, the process will be terminated and the whole graph is simply assigned to
other execution provider.
*/
SubGraphCollection_t GetSupportedList(SubGraphCollection_t supported_nodes_list, int iterations, const int max_iterations,
const onnxruntime::GraphViewer& graph, bool* early_termination) const;
};

} // namespace onnxruntime

40 changes: 20 additions & 20 deletions onnxruntime/test/providers/cpu/math/element_wise_ops_test.cc
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,7 @@ TEST(MathOpTest, Add_int64) {
test.AddInput<int64_t>("A", {3}, {1, 2, 3});
test.AddInput<int64_t>("B", {3}, {4, 5, 6});
test.AddOutput<int64_t>("C", {3}, {5, 7, 9});
test.Run(OpTester::ExpectResult::kExpectSuccess, "", {kTensorrtExecutionProvider}); //TensorRT: INT64 is not supported
test.Run();
}

TEST(MathOpTest, Add) {
Expand Down Expand Up @@ -69,7 +69,7 @@ TEST(MathOpTest, Add_Broadcast_0x0) {
test.AddInput<float>("A", {}, {10.0f});
test.AddInput<float>("B", {}, {2.0f});
test.AddOutput<float>("C", {}, {12.0f});
test.Run(OpTester::ExpectResult::kExpectSuccess, "", {kTensorrtExecutionProvider}); //TensorRT: dynamic shape is not supported
test.Run();
}

TEST(MathOpTest, Add_Broadcast_0x1) {
Expand All @@ -78,7 +78,7 @@ TEST(MathOpTest, Add_Broadcast_0x1) {
test.AddInput<float>("A", {}, {10.0f});
test.AddInput<float>("B", {1}, {2.0f});
test.AddOutput<float>("C", {1}, {12.0f});
test.Run(OpTester::ExpectResult::kExpectSuccess, "", {kTensorrtExecutionProvider}); //TensorRT: dynamic shape is not supported
test.Run();
}

TEST(MathOpTest, Add_Broadcast_1x0) {
Expand All @@ -87,7 +87,7 @@ TEST(MathOpTest, Add_Broadcast_1x0) {
test.AddInput<float>("A", {1}, {10.0f});
test.AddInput<float>("B", {}, {2.0f});
test.AddOutput<float>("C", {1}, {12.0f});
test.Run(OpTester::ExpectResult::kExpectSuccess, "", {kTensorrtExecutionProvider}); //TensorRT: dynamic shape is not supported
test.Run();
}

TEST(MathOpTest, Add_Broadcast_1x1) {
Expand Down Expand Up @@ -134,7 +134,7 @@ TEST(MathOpTest, Add_Broadcast_2x1x4_1x3x1) {
211.0f, 212.0f, 213.0f, 214.0f,
221.0f, 222.0f, 223.0f, 224.0f,
231.0f, 232.0f, 233.0f, 234.0f});
test.Run(OpTester::ExpectResult::kExpectSuccess, "", {kTensorrtExecutionProvider}); //Input batch size is inconsistent
test.Run(OpTester::ExpectResult::kExpectSuccess, "", {kTensorrtExecutionProvider}); //TensorRT: Input batch size is inconsistent
}

TEST(MathOpTest, Add_Broadcast_2x1x1_3x4) {
Expand All @@ -154,7 +154,7 @@ TEST(MathOpTest, Add_Broadcast_2x1x1_3x4) {
211.0f, 212.0f, 213.0f, 214.0f,
221.0f, 222.0f, 223.0f, 224.0f,
231.0f, 232.0f, 233.0f, 234.0f});
test.Run(OpTester::ExpectResult::kExpectSuccess, "", {kTensorrtExecutionProvider}); //Input batch size is inconsistent
test.Run(OpTester::ExpectResult::kExpectSuccess, "", {kTensorrtExecutionProvider}); //TensorRT: Input batch size is inconsistent
}

TEST(MathOpTest, Sub_int32) {
Expand All @@ -170,7 +170,7 @@ TEST(MathOpTest, Sub_int64) {
test.AddInput<int64_t>("A", {3}, {1, 5, 6});
test.AddInput<int64_t>("B", {3}, {4, 5, 3});
test.AddOutput<int64_t>("C", {3}, {-3, 0, 3});
test.Run(OpTester::ExpectResult::kExpectSuccess, "", {kTensorrtExecutionProvider}); //TensorRT: INT64 is not supported
test.Run();
}

TEST(MathOpTest, Sub) {
Expand Down Expand Up @@ -203,7 +203,7 @@ TEST(MathOpTest, Sub_Broadcast_Scalar) {
{-4.0f, -3.0f, -6.0f,
-5.0f, -3.5f, -105.0f,
-10.4f, 4.3f, -10'005.0f});
test.Run(OpTester::ExpectResult::kExpectSuccess, "", {kTensorrtExecutionProvider}); //TensorRT: dynamic shape is not supported
test.Run();
}

TEST(MathOpTest, Mul_int32) {
Expand All @@ -219,7 +219,7 @@ TEST(MathOpTest, Mul_int64) {
test.AddInput<int64_t>("A", {3}, {3, 6, -3});
test.AddInput<int64_t>("B", {3}, {4, -3, -2});
test.AddOutput<int64_t>("C", {3}, {12, -18, 6});
test.Run(OpTester::ExpectResult::kExpectSuccess, "", {kTensorrtExecutionProvider}); //TensorRT: INT64 is not supported
test.Run();
}

TEST(MathOpTest, Mul) {
Expand Down Expand Up @@ -253,7 +253,7 @@ TEST(MathOpTest, Div_int64) {
test.AddInput<int64_t>("A", {3}, {4, 8, 8});
test.AddInput<int64_t>("B", {3}, {2, 3, 4});
test.AddOutput<int64_t>("C", {3}, {2, 2, 2});
test.Run(OpTester::ExpectResult::kExpectSuccess, "", {kTensorrtExecutionProvider}); //TensorRT: INT64 is not supported
test.Run();
}

TEST(MathOpTest, Div) {
Expand Down Expand Up @@ -284,7 +284,7 @@ TEST(MathOpTest, Abs_int8) {
std::vector<int64_t> dims{4};
test.AddInput<int8_t>("X", dims, {1, 2, -1, -5});
test.AddOutput<int8_t>("Y", dims, {1, 2, 1, 5});
test.Run(OpTester::ExpectResult::kExpectSuccess, "", {kTensorrtExecutionProvider});
test.Run();
}

TEST(MathOpTest, Abs_int32) {
Expand Down Expand Up @@ -312,7 +312,7 @@ TEST(MathOpTest, Neg_int8) {
std::vector<int64_t> dims{4};
test.AddInput<int8_t>("X", dims, {1, -2, 0, -10});
test.AddOutput<int8_t>("Y", dims, {-1, 2, 0, 10});
test.Run(OpTester::ExpectResult::kExpectSuccess, "", {kTensorrtExecutionProvider});
test.Run();
}

TEST(MathOpTest, Neg_int32) {
Expand Down Expand Up @@ -393,7 +393,7 @@ TEST(MathOpTest, Pow_Broadcast_Scalar0) {
test.AddInput<float>("X", {}, {2.0f});
test.AddInput<float>("Y", dims, {1.0f, 2.0f, 3.0f});
test.AddOutput<float>("Z", dims, {2.0f, 4.0f, 8.0f});
test.Run(OpTester::ExpectResult::kExpectSuccess, "", {kTensorrtExecutionProvider}); //TensorRT: dynamic shape is not supported
test.Run();
}

TEST(MathOpTest, Pow_Broadcast_Scalar1) {
Expand All @@ -403,7 +403,7 @@ TEST(MathOpTest, Pow_Broadcast_Scalar1) {
test.AddInput<float>("X", dims, {1.0f, 2.0f, 3.0f});
test.AddInput<float>("Y", {}, {2.0f});
test.AddOutput<float>("Z", dims, {1.0f, 4.0f, 9.0f});
test.Run(OpTester::ExpectResult::kExpectSuccess, "", {kTensorrtExecutionProvider}); //TensorRT: dynamic shape is not supported
test.Run();
}

TEST(MathOpTest, Exp) {
Expand All @@ -416,7 +416,7 @@ TEST(MathOpTest, Exp) {
{1.0f, std::exp(1.0f),
std::exp(2.0f), std::exp(10.0f)});
test.SetOutputRelErr("Y", 1e-7f);
test.Run(OpTester::ExpectResult::kExpectSuccess, "", {kTensorrtExecutionProvider});
test.Run(OpTester::ExpectResult::kExpectSuccess, "", {kTensorrtExecutionProvider}); //TensorRT: result differs
}

TEST(MathOpTest, Log) {
Expand Down Expand Up @@ -470,7 +470,7 @@ TEST(MathOpTest, Sum_8_Test1) {
311.0f, 312.0f, 313.0f,
321.0f, 322.0f, 323.0f,
331.0f, 332.0f, 333.0f});
test.Run(OpTester::ExpectResult::kExpectSuccess, "", {kTensorrtExecutionProvider}); // TensorRT parser failed on this test
test.Run(OpTester::ExpectResult::kExpectSuccess, "", {kTensorrtExecutionProvider}); //TensorRT: Expected output shape [{3,3,3}] did not match run output shape [{3,1,1}] for sum
}

TEST(MathOpTest, Sum_8_Test2) {
Expand Down Expand Up @@ -499,7 +499,7 @@ TEST(MathOpTest, Sum_8_Test2) {
3.3f, 4.4f, -94.7f,
59.6f, 64.01f, -8.0f});

test.Run(OpTester::ExpectResult::kExpectSuccess, "Sum is not correct", {kTensorrtExecutionProvider});
test.Run(OpTester::ExpectResult::kExpectSuccess, "Sum is not correct", {kTensorrtExecutionProvider}); //TensorRT: result differs
}

TEST(MathOpTest, Min_6) {
Expand Down Expand Up @@ -582,7 +582,7 @@ TEST(MathOpTest, Max_8) {
{10.0f, 20.0f, 30.0f,
40.0f, 50.0f, 60.0f,
300.0f, 300.0f, 300.0f});
test.Run(OpTester::ExpectResult::kExpectSuccess, "", {kTensorrtExecutionProvider}); //Input batch size is inconsistent
test.Run(OpTester::ExpectResult::kExpectSuccess, "", {kTensorrtExecutionProvider}); //TensorRT: Input batch size is inconsistent
}

TEST(MathOpTest, Max_8_2inputbroadcast) {
Expand All @@ -597,7 +597,7 @@ TEST(MathOpTest, Max_8_2inputbroadcast) {
{10.0f, 20.0f, 30.0f,
40.0f, 50.0f, 60.0f,
70.0f, 80.0f, 90.0f});
test.Run(OpTester::ExpectResult::kExpectSuccess, "", {kTensorrtExecutionProvider}); //Input batch size is inconsistent
test.Run(OpTester::ExpectResult::kExpectSuccess, "", {kTensorrtExecutionProvider}); //TensorRT: Input batch size is inconsistent
}

TEST(MathOpTest, Not) {
Expand Down Expand Up @@ -773,7 +773,7 @@ TEST(MathOpTest, Mean_8) {
{12.0f / 3.0f, 22.0f / 3.0f, 32.0f / 3.0f,
43.0f / 3.0f, 53.0f / 3.0f, 63.0f / 3.0f,
74.0f / 3.0f, 84.0f / 3.0f, 94.0f / 3.0f});
test.Run(OpTester::ExpectResult::kExpectSuccess, "", {kTensorrtExecutionProvider}); //Input batch size is inconsistent
test.Run(OpTester::ExpectResult::kExpectSuccess, "", {kTensorrtExecutionProvider}); //TensorRT: Input batch size is inconsistent
}

#ifndef DISABLE_CONTRIB_OPS
Expand Down
22 changes: 10 additions & 12 deletions onnxruntime/test/providers/cpu/math/gemm_test.cc
Original file line number Diff line number Diff line change
Expand Up @@ -7,8 +7,6 @@
namespace onnxruntime {
namespace test {

// Disable TensorRT on some of the tests because TensorRT only support FLOAT, INT8, FLOAT16 and INT32 for now

TEST(GemmOpTest, GemmNoTrans) {
OpTester test("Gemm");

Expand All @@ -25,7 +23,7 @@ TEST(GemmOpTest, GemmNoTrans) {
test.AddOutput<float>("Y", {2, 3},
{11.0f, 11.0f, 11.0f,
-9.0f, -9.0f, -9.0f});
test.Run(OpTester::ExpectResult::kExpectSuccess, "", {kTensorrtExecutionProvider});
test.Run();
}

// Only CUDA kernel has float 16 support
Expand Down Expand Up @@ -58,7 +56,7 @@ TEST(GemmOpTest, GemmNoTrans_f16) {
test.AddInput<MLFloat16>("B", {4, 3}, f_B);
test.AddInput<MLFloat16>("C", {2, 3}, f_C);
test.AddOutput<MLFloat16>("Y", {2, 3}, f_Y);
test.Run(OpTester::ExpectResult::kExpectSuccess, "", {kTensorrtExecutionProvider});
test.Run();
}
#endif

Expand All @@ -78,7 +76,7 @@ TEST(GemmOpTest, GemmBroadcast) {
test.AddOutput<float>("Y", {2, 3},
{11.0f, 12.0f, 13.0f,
-9.0f, -8.0f, -7.0f});
test.Run(OpTester::ExpectResult::kExpectSuccess, "", {kTensorrtExecutionProvider});
test.Run();
}

TEST(GemmOpTest, GemmTrans) {
Expand All @@ -99,7 +97,7 @@ TEST(GemmOpTest, GemmTrans) {
test.AddOutput<float>("Y", {2, 3},
{11.0f, 11.0f, 11.0f,
-9.0f, -9.0f, -9.0f});
test.Run(OpTester::ExpectResult::kExpectSuccess, "", {kTensorrtExecutionProvider});
test.Run();
}

TEST(GemmOpTest, GemmAlphaBeta) {
Expand All @@ -118,7 +116,7 @@ TEST(GemmOpTest, GemmAlphaBeta) {
test.AddOutput<float>("Y", {2, 3},
{7.0f, 7.0f, 7.0f,
-3.0f, -3.0f, -3.0f});
test.Run(OpTester::ExpectResult::kExpectSuccess, "", {kTensorrtExecutionProvider});
test.Run();
}

TEST(GemmOpTest, GemmNaN) {
Expand All @@ -137,7 +135,7 @@ TEST(GemmOpTest, GemmNaN) {
test.AddOutput<float>("Y", {2, 3},
{10.0f, 10.0f, 10.0f,
-10.0f, -10.0f, -10.0f});
test.Run(OpTester::ExpectResult::kExpectSuccess, "", {kTensorrtExecutionProvider});
test.Run();
}

TEST(GemmOpTest, GemmScalarBroadcast) {
Expand All @@ -156,7 +154,7 @@ TEST(GemmOpTest, GemmScalarBroadcast) {
test.AddOutput<float>("Y", {2, 3},
{11.0f, 11.0f, 11.0f,
-9.0f, -9.0f, -9.0f});
test.Run(OpTester::ExpectResult::kExpectSuccess, "", {kTensorrtExecutionProvider});
test.Run();
}

TEST(MathOpTest, Gemm2DBroadcast) {
Expand All @@ -175,7 +173,7 @@ TEST(MathOpTest, Gemm2DBroadcast) {
test.AddOutput<float>("Y", {2, 3},
{11.0f, 11.0f, 11.0f,
-8.0f, -8.0f, -8.0f});
test.Run(OpTester::ExpectResult::kExpectSuccess, "", {kTensorrtExecutionProvider});
test.Run();
}

TEST(GemmOpTest, GemmFalseBroadcast) {
Expand All @@ -194,7 +192,7 @@ TEST(GemmOpTest, GemmFalseBroadcast) {
test.AddOutput<float>("Y", {2, 3},
{11.0f, 11.0f, 11.0f,
-8.0f, -8.0f, -8.0f});
test.Run(OpTester::ExpectResult::kExpectSuccess, "", {kTensorrtExecutionProvider});
test.Run();
}

TEST(GemmOpTest, GemmEmptyTensor) {
Expand All @@ -211,7 +209,7 @@ TEST(GemmOpTest, GemmEmptyTensor) {
test.AddInput<float>("C", {3}, std::vector<float>(3, 1.0f));
test.AddOutput<float>("Y", {0, 3},
{});
test.Run(OpTester::ExpectResult::kExpectSuccess, "", {kTensorrtExecutionProvider});
test.Run();
}

} // namespace test
Expand Down
Loading

0 comments on commit 723d5c7

Please sign in to comment.