Skip to content

Commit

Permalink
Implement Nuphar execution provider
Browse files Browse the repository at this point in the history
Nuphar execution provider is a TVM-based compilation provider. It has shown great speedups for RNN models using Scan.
This PR is mainly for a preview of the shared codegen library for other TVM-based providers.
  • Loading branch information
KeDengMS committed May 25, 2019
1 parent 723d5c7 commit 3dcf4d6
Show file tree
Hide file tree
Showing 277 changed files with 19,490 additions and 488 deletions.
37 changes: 37 additions & 0 deletions BUILD.md
Original file line number Diff line number Diff line change
Expand Up @@ -137,6 +137,43 @@ You can build from source on Linux by using the following `cmd` from the onnxrun

```
### Nuphar
ONNX Runtime supports Nuphar execution provider (released as preview). It is an execution provider built on top of [TVM](https://github.com/dmlc/tvm) and [LLVM](https://llvm.org). Currently it targets to X64 CPU.
The Nuphar execution provider for ONNX Runtime is built and tested with LLVM 6.0.1. Because of TVM's requirement when building with LLVM, you need to build LLVM from source:
Window with Visual Studio 2017: (Note here builds release flavor. Debug build of LLVM would be needed to build with Debug flavor of ONNX Runtime)
```
REM download llvm source code 6.0.1 and unzip to \llvm\source\path, then install to \llvm\install\path
cd \llvm\source\path
mkdir build
cd build
cmake .. -G "Visual Studio 15 2017 Win64" -DLLVM_TARGETS_TO_BUILD=X86
msbuild llvm.sln /maxcpucount /p:Configuration=Release /p:Platform=x64
cmake -DCMAKE_INSTALL_PREFIX=\llvm\install\path -DBUILD_TYPE=Release -P cmake_install.cmake
```
Linux:
```
# download llvm source code 6.0.1 and unzip to /llvm/source/path, then install to /llvm/install/path
cd /llvm/source/path
mkdir build
cd build
cmake .. -DLLVM_TARGETS_TO_BUILD=X86 -DCMAKE_BUILD_TYPE=Release
cmake --build.
cmake -DCMAKE_INSTALL_PREFIX=/llvm/install/path -DBUILD_TYPE=Release -P cmake_install.cmake
```
Then you can build from source by using following command from the onnxruntime directory:
Windows:
```
build.bat --use_tvm --use_llvm --llvm_path=\llvm\install\path\lib\cmake\llvm --use_mklml --use_nuphar --config=Release
```
Linux:
```
./build.sh --use_tvm --use_llvm --llvm_path=/llvm/install/path/lib/cmake/llvm --use_mklml --use_nuphar --config=Release
```
### OpenBLAS
#### Windows
Expand Down
31 changes: 28 additions & 3 deletions cmake/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -65,7 +65,7 @@ option(onnxruntime_USE_LLVM "Build tvm with LLVM" OFF)
option(onnxruntime_USE_OPENMP "Build with OpenMP support" OFF)
option(onnxruntime_BUILD_SHARED_LIB "Build a shared library" OFF)
option(onnxruntime_ENABLE_MICROSOFT_INTERNAL "Use this option to enable/disable microsoft internal only code" OFF)
option(onnxruntime_USE_NUPHAR "Build with Nupha" OFF)
option(onnxruntime_USE_NUPHAR "Build with Nuphar" OFF)
option(onnxruntime_USE_BRAINSLICE "Build with BrainSlice" OFF)
option(onnxruntime_USE_TENSORRT "Build with TensorRT support" OFF)
option(onnxruntime_ENABLE_LTO "Enable link time optimization" ON)
Expand Down Expand Up @@ -315,6 +315,26 @@ if (onnxruntime_USE_TVM)
if (onnxruntime_USE_LLVM)
set(USE_LLVM ON)
add_definitions(-DUSE_TVM_WITH_LLVM)

# patch for LLVM FCmp assert.
# it can be removed once a fix is available.
set(TVM_PATCH_COMMAND1 git apply ${CMAKE_SOURCE_DIR}/patches/tvm/FCmp.patch)
execute_process(COMMAND ${TVM_PATCH_COMMAND1} WORKING_DIRECTORY ${PROJECT_SOURCE_DIR}/external/tvm)
# patch for disabling the generation of TVM assert statements
set(TVM_PATCH_COMMAND2 git apply ${CMAKE_SOURCE_DIR}/patches/tvm/no_assert_stmt.patch)
execute_process(COMMAND ${TVM_PATCH_COMMAND2} WORKING_DIRECTORY ${PROJECT_SOURCE_DIR}/external/tvm)
# patch for preventing tvm's Halide from applying distributive rule on floating values.
set(TVM_PATCH_COMMAND3 git apply ${CMAKE_SOURCE_DIR}/patches/tvm/halide_fp_simplify.patch)
execute_process(COMMAND ${TVM_PATCH_COMMAND3} WORKING_DIRECTORY ${PROJECT_SOURCE_DIR}/external/tvm/3rdparty/HalideIR)
# Tensorization patch for export lookLLVMIntrinsic in tvm
set(TVM_PATCH_COMMAND4 git apply ${CMAKE_SOURCE_DIR}/patches/tvm/export_llvm_intrin.patch)
execute_process(COMMAND ${TVM_PATCH_COMMAND4} WORKING_DIRECTORY ${PROJECT_SOURCE_DIR}/external/tvm/)
# Tensorization patch for export functions in tvm's Halide
set(TVM_PATCH_COMMAND5 git apply ${CMAKE_SOURCE_DIR}/patches/tvm/halideIR.patch)
execute_process(COMMAND ${TVM_PATCH_COMMAND5} WORKING_DIRECTORY ${PROJECT_SOURCE_DIR}/external/tvm/3rdparty/HalideIR)
# Tensorization patch for enabling llvm IR codegen
set(TVM_PATCH_COMMAND6 git apply ${CMAKE_SOURCE_DIR}/patches/tvm/llvm_codegen.patch)
execute_process(COMMAND ${TVM_PATCH_COMMAND6} WORKING_DIRECTORY ${PROJECT_SOURCE_DIR}/external/tvm)
endif()
add_subdirectory(${PROJECT_SOURCE_DIR}/external/tvm EXCLUDE_FROM_ALL)
set_target_properties(tvm PROPERTIES FOLDER "External/tvm")
Expand All @@ -331,8 +351,13 @@ if (onnxruntime_USE_TVM)
add_definitions(-DUSE_TVM)

set(onnxruntime_tvm_libs onnxruntime_codegen_tvm)
list(APPEND onnxruntime_EXTERNAL_LIBRARIES tvm nnvm_compiler)

# needs to link with stdc++fs in Linux
if(UNIX)
if (NOT APPLE)
set(FS_STDLIB stdc++fs)
endif()
endif()
list(APPEND onnxruntime_EXTERNAL_LIBRARIES tvm nnvm_compiler ${FS_STDLIB})
list(APPEND onnxruntime_EXTERNAL_DEPENDENCIES tvm nnvm_compiler)
endif()

Expand Down
2 changes: 1 addition & 1 deletion cmake/external/tvm
Submodule tvm updated from c2b361 to 3a75b1
17 changes: 13 additions & 4 deletions cmake/onnxruntime_codegen.cmake
Original file line number Diff line number Diff line change
@@ -1,15 +1,24 @@
# Copyright (c) Microsoft Corporation. All rights reserved.
# Licensed under the MIT License.

file(GLOB_RECURSE onnxruntime_codegen_common_srcs
"${ONNXRUNTIME_ROOT}/core/codegen/common/*.h"
"${ONNXRUNTIME_ROOT}/core/codegen/common/*.cc"
)

file(GLOB_RECURSE onnxruntime_codegen_tvm_srcs CONFIGURE_DEPENDS
"${ONNXRUNTIME_ROOT}/core/codegen/tvm/*.h"
"${ONNXRUNTIME_ROOT}/core/codegen/tvm/*.cc"
"${ONNXRUNTIME_ROOT}/core/codegen/mti/*.h"
"${ONNXRUNTIME_ROOT}/core/codegen/mti/*.cc"
"${ONNXRUNTIME_ROOT}/core/codegen/target/*.h"
"${ONNXRUNTIME_ROOT}/core/codegen/target/*.cc"
)

source_group(TREE ${ONNXRUNTIME_ROOT}/core FILES ${onnxruntime_codegen_common_srcs} ${onnxruntime_codegen_tvm_srcs})

#onnxruntime_codegen_tvm depends on onnxruntime framework
add_library(onnxruntime_codegen_tvm ${onnxruntime_codegen_tvm_srcs})
add_library(onnxruntime_codegen_tvm ${onnxruntime_codegen_common_srcs} ${onnxruntime_codegen_tvm_srcs})
set_target_properties(onnxruntime_codegen_tvm PROPERTIES FOLDER "ONNXRuntime")
target_include_directories(onnxruntime_codegen_tvm PRIVATE ${ONNXRUNTIME_ROOT} ${TVM_INCLUDES})
target_include_directories(onnxruntime_codegen_tvm PRIVATE ${ONNXRUNTIME_ROOT} ${TVM_INCLUDES} ${MKLML_INCLUDE_DIR} ${eigen_INCLUDE_DIRS})
onnxruntime_add_include_to_target(onnxruntime_codegen_tvm onnxruntime_common onnxruntime_framework gsl onnx onnx_proto protobuf::libprotobuf)
target_compile_options(onnxruntime_codegen_tvm PRIVATE ${DISABLED_WARNINGS_FOR_TVM})
# need onnx to build to create headers that this project includes
Expand Down
41 changes: 41 additions & 0 deletions cmake/onnxruntime_nblas.cmake
Original file line number Diff line number Diff line change
@@ -0,0 +1,41 @@
# Copyright (c) Microsoft Corporation. All rights reserved.
# Licensed under the MIT License.

add_definitions(-DNUPHAR_USE_AVX2=1)
add_definitions(-DNUPHAR_USE_MKL=1)

if (NOT onnxruntime_USE_MKLML)
message(FATAL_ERROR "onnxruntime_USE_MKLML required for onnxruntime_USE_NUPHAR")
endif()

set(nblas_avx2_srcs
${ONNXRUNTIME_ROOT}/core/providers/nuphar/nblas/nblas_igemv_avx2.cc
${ONNXRUNTIME_ROOT}/core/providers/nuphar/nblas/nblas_igemv_avx2.h
)

set(nblas_mkl_srcs
${ONNXRUNTIME_ROOT}/core/providers/nuphar/nblas/nblas_igemv_mkl.cc
${ONNXRUNTIME_ROOT}/core/providers/nuphar/nblas/nblas_igemv_mkl.h
)

if (MSVC)
# string(APPEND CMAKE_CXX_FLAGS " /arch:AVX2")
set_source_files_properties(${nblas_avx2_srcs} PROPERTIES COMPILE_FLAGS "/arch:AVX2")
else()
# string(APPEND CMAKE_CXX_FLAGS " -march=broadwell")
set_source_files_properties(${nblas_avx2_srcs} PROPERTIES COMPILE_FLAGS "-march=broadwell")
endif()

set(nuphar_blas_srcs
${nblas_avx2_srcs}
${nblas_mkl_srcs}
)

add_library(onnxruntime_nblas ${nuphar_blas_srcs})
target_include_directories(onnxruntime_nblas PRIVATE ${ONNXRUNTIME_ROOT}/core/providers/nuphar/nblas ${MKLML_INCLUDE_DIR})
set_target_properties(onnxruntime_nblas PROPERTIES FOLDER "ONNXRuntime")
add_dependencies(onnxruntime_nblas project_mklml)

list(APPEND onnxruntime_EXTERNAL_LIBRARIES onnxruntime_nblas)
list(APPEND onnxruntime_EXTERNAL_DEPENDENCIES onnxruntime_nblas)
link_directories(${CMAKE_CURRENT_BINARY_DIR}/${CMAKE_BUILD_TYPE})
39 changes: 39 additions & 0 deletions cmake/onnxruntime_providers.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -203,6 +203,45 @@ if (onnxruntime_USE_NGRAPH)
endif()
endif()

if (onnxruntime_USE_NUPHAR)
add_definitions(-DUSE_NUPHAR=1)

if (NOT onnxruntime_USE_TVM)
message(FATAL_ERROR "onnxruntime_USE_TVM required for onnxruntime_USE_NUPHAR")
endif()

if (NOT onnxruntime_USE_LLVM)
message(FATAL_ERROR "onnxruntime_USE_LLVM required for onnxruntime_USE_NUPHAR")
endif()

include(onnxruntime_nblas.cmake)

file(GLOB_RECURSE onnxruntime_providers_nuphar_cc_srcs
"${ONNXRUNTIME_ROOT}/core/providers/nuphar/*.h"
"${ONNXRUNTIME_ROOT}/core/providers/nuphar/*.cc"
)

# following files required different build flag for AVX2 in separate cmake file
list (REMOVE_ITEM onnxruntime_providers_nuphar_cc_srcs "${ONNXRUNTIME_ROOT}/core/providers/nuphar/nblas/nblas_igemv_avx2.cc")
list (REMOVE_ITEM onnxruntime_providers_nuphar_cc_srcs "${ONNXRUNTIME_ROOT}/core/providers/nuphar/nblas/nblas_igemv_avx2.h")
list (REMOVE_ITEM onnxruntime_providers_nuphar_cc_srcs "${ONNXRUNTIME_ROOT}/core/providers/nuphar/nblas/nblas_igemv_mkl.cc")
list (REMOVE_ITEM onnxruntime_providers_nuphar_cc_srcs "${ONNXRUNTIME_ROOT}/core/providers/nuphar/nblas/nblas_igemv_mkl.h")

source_group(TREE ${ONNXRUNTIME_ROOT}/core FILES ${onnxruntime_providers_nuphar_cc_srcs})
add_library(onnxruntime_providers_nuphar ${onnxruntime_providers_nuphar_cc_srcs})
onnxruntime_add_include_to_target(onnxruntime_providers_nuphar onnxruntime_common onnxruntime_framework gsl onnx onnx_proto protobuf::libprotobuf)
set_target_properties(onnxruntime_providers_nuphar PROPERTIES FOLDER "ONNXRuntime")
target_include_directories(onnxruntime_providers_nuphar PRIVATE ${ONNXRUNTIME_ROOT} ${TVM_INCLUDES} ${eigen_INCLUDE_DIRS})
set_target_properties(onnxruntime_providers_nuphar PROPERTIES LINKER_LANGUAGE CXX)
target_compile_options(onnxruntime_providers_nuphar PRIVATE ${DISABLED_WARNINGS_FOR_TVM})
add_dependencies(onnxruntime_providers_nuphar ${onnxruntime_EXTERNAL_DEPENDENCIES})
install(DIRECTORY ${PROJECT_SOURCE_DIR}/../include/onnxruntime/core/providers/nuphar DESTINATION ${CMAKE_INSTALL_INCLUDEDIR}/onnxruntime/core/providers)

# use this if you want this provider to be included in the onnxruntime shared library
list(APPEND onnxruntime_libs onnxruntime_providers_nuphar)
list(APPEND ONNXRUNTIME_PROVIDER_NAMES nuphar)
endif()

if (onnxruntime_ENABLE_MICROSOFT_INTERNAL)
include(onnxruntime_providers_internal.cmake)
endif()
Expand Down
14 changes: 14 additions & 0 deletions cmake/onnxruntime_unittests.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -193,6 +193,14 @@ file(GLOB_RECURSE onnxruntime_test_tvm_src CONFIGURE_DEPENDS
"${ONNXRUNTIME_ROOT}/test/tvm/*.cc"
)

if(onnxruntime_USE_NUPHAR)
list(APPEND onnxruntime_test_framework_src_patterns ${TEST_SRC_DIR}/framework/nuphar/*)
list(APPEND onnxruntime_test_framework_libs onnxruntime_providers_nuphar)
list(APPEND onnxruntime_test_providers_dependencies onnxruntime_providers_nuphar)
list(APPEND onnxruntime_test_providers_libs onnxruntime_providers_nuphar)
list(APPEND onnx_test_libs onnxruntime_providers_nuphar)
endif()

if (onnxruntime_ENABLE_MICROSOFT_INTERNAL)
include(onnxruntime_unittests_internal.cmake)
endif()
Expand Down Expand Up @@ -387,6 +395,12 @@ if(WIN32)
$<TARGET_FILE_DIR:${test_data_target}>
)
endif()
if (onnxruntime_USE_TVM)
add_custom_command(
TARGET ${test_data_target} POST_BUILD
COMMAND ${CMAKE_COMMAND} -E copy $<TARGET_FILE:tvm> $<TARGET_FILE_DIR:${test_data_target}>
)
endif()
endif()

add_library(onnx_test_data_proto ${TEST_SRC_DIR}/proto/tml.proto)
Expand Down
19 changes: 19 additions & 0 deletions cmake/patches/tvm/FCmp.patch
Original file line number Diff line number Diff line change
@@ -0,0 +1,19 @@
diff --git a/src/pass/arg_binder.cc b/src/pass/arg_binder.cc
index 623886c3..8abc6846 100644
--- a/src/pass/arg_binder.cc
+++ b/src/pass/arg_binder.cc
@@ -46,7 +46,12 @@ bool ArgBinder::Bind_(const Expr& arg,
}
return true;
} else {
- BinderAddAssert(it->second == value, arg_name, &asserts_);
+ if (arg.type().is_handle()) {
+ BinderAddAssert(reinterpret(UInt(64), it->second) == reinterpret(UInt(64), value),
+ arg_name, &asserts_);
+ } else {
+ BinderAddAssert(it->second == value, arg_name, &asserts_);
+ }
}
} else {
BinderAddAssert(arg == value, arg_name, &asserts_);

20 changes: 20 additions & 0 deletions cmake/patches/tvm/export_llvm_intrin.patch
Original file line number Diff line number Diff line change
@@ -0,0 +1,20 @@
diff --git a/include/tvm/codegen.h b/include/tvm/codegen.h
index fca88de6..d959477b 100644
--- a/include/tvm/codegen.h
+++ b/include/tvm/codegen.h
@@ -42,6 +42,15 @@ runtime::Module Build(const Array<LoweredFunc>& funcs,
* \return cstr The C string representation of the file.
*/
std::string PackImportsToC(const runtime::Module& m, bool system_lib);
+
+
+/*!
+ * \breif Export LookupLLVMIntrinsic to enable direct call
+ * to llvm instrinsic (e.g. AVX2/AVX512) in tvm tensorization
+ */
+TVM_DLL unsigned LookupLLVMIntrinsic(const std::string& name);
+
+
} // namespace codegen
} // namespace tvm

33 changes: 33 additions & 0 deletions cmake/patches/tvm/halideIR.patch
Original file line number Diff line number Diff line change
@@ -0,0 +1,33 @@
diff --git a/src/ir/IR.cpp b/src/ir/IR.cpp
index 8966fc3..e5e441d 100644
--- a/src/ir/IR.cpp
+++ b/src/ir/IR.cpp
@@ -724,5 +724,10 @@ Call::ConstString Call::cast_mask = "cast_mask";
Call::ConstString Call::select_mask = "select_mask";
Call::ConstString Call::extract_mask_element = "extract_mask_element";
Call::ConstString Call::size_of_halideir_buffer_t = "size_of_halideir_buffer_t";
+// Tensorize exports
+Call::ConstString Call::extract_element = "extract_element";
+Call::ConstString Call::insert_element = "insert_element";
+Call::ConstString Call::vectorlow = "vectorlow";
+Call::ConstString Call::vectorhigh = "vectorhigh";
}
}
diff --git a/src/ir/IR.h b/src/ir/IR.h
index 15e7013..933b774 100644
--- a/src/ir/IR.h
+++ b/src/ir/IR.h
@@ -720,7 +720,12 @@ struct Call : public ExprNode<Call> {
cast_mask,
select_mask,
extract_mask_element,
- size_of_halideir_buffer_t;
+ size_of_halideir_buffer_t,
+ // Tensorize exports
+ extract_element,
+ insert_element,
+ vectorlow,
+ vectorhigh;
// If it's a call to another halide function, this call node holds
// onto a pointer to that function for the purposes of reference
// counting only. Self-references in update definitions do not
21 changes: 21 additions & 0 deletions cmake/patches/tvm/halide_fp_simplify.patch
Original file line number Diff line number Diff line change
@@ -0,0 +1,21 @@
diff --git a/src/arithmetic/Simplify.cpp b/src/arithmetic/Simplify.cpp
index e053831..8a3c841 100644
--- a/src/arithmetic/Simplify.cpp
+++ b/src/arithmetic/Simplify.cpp
@@ -555,6 +555,16 @@ private:

Expr a = mutate(op->a);
Expr b = mutate(op->b);
+
+ if (op->type.is_float()) {
+ if (a.same_as(op->a) && b.same_as(op->b)) {
+ expr = self;
+ } else {
+ expr = Add::make(a, b);
+ }
+ return;
+ }
+
if (propagate_indeterminate_expression(a, b, op->type, &expr)) {
return;
}
34 changes: 34 additions & 0 deletions cmake/patches/tvm/llvm_codegen.patch
Original file line number Diff line number Diff line change
@@ -0,0 +1,34 @@
diff --git a/src/codegen/llvm/codegen_llvm.cc b/src/codegen/llvm/codegen_llvm.cc
index f80bd9e8..2dcde670 100644
--- a/src/codegen/llvm/codegen_llvm.cc
+++ b/src/codegen/llvm/codegen_llvm.cc
@@ -677,9 +677,11 @@ llvm::Value* CodeGenLLVM::CreateIntrinsic(const Call* op) {
value->addIncoming(then_value, then_value_block);
value->addIncoming(else_value, else_value_block);
return value;
- } else if (op->is_intrinsic(Call::reinterpret)) {
+ }
+ // Tensorize exports
+ else if (op->is_intrinsic(Call::reinterpret)) {
llvm::Type * target = LLVMType(op->type);
- return builder_->CreateBitCast(MakeValue(op->args[0]), target);
+ return builder_->CreateBitOrPointerCast(MakeValue(op->args[0]), target);
} else if (op->is_intrinsic("vectorlow")) {
llvm::Value *v = MakeValue(op->args[0]);
int l = v->getType()->getVectorNumElements();
@@ -688,6 +690,15 @@ llvm::Value* CodeGenLLVM::CreateIntrinsic(const Call* op) {
llvm::Value *v = MakeValue(op->args[0]);
int l = v->getType()->getVectorNumElements();
return CreateVecSlice(v, l/2, l/2);
+ } else if (op->is_intrinsic("extract_element")) {
+ llvm::Value* v = MakeValue(op->args[0]);
+ uint64_t id = op->args[1].as<UIntImm>()->value;
+ return builder_->CreateExtractElement(v, id);
+ } else if (op->is_intrinsic("insert_element")) {
+ llvm::Value* v0 = MakeValue(op->args[0]);
+ llvm::Value* v1 = MakeValue(op->args[1]);
+ uint64_t id = op->args[2].as<UIntImm>()->value;
+ return builder_->CreateInsertElement(v0, v1, id);
} else if (op->is_intrinsic("vectorcombine")) {
llvm::Value *v0 = MakeValue(op->args[0]);
llvm::Value *v1 = MakeValue(op->args[1]);
Loading

0 comments on commit 3dcf4d6

Please sign in to comment.