Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add flags to allow platform-specific build #179

Merged
merged 11 commits into from
May 8, 2023
7 changes: 7 additions & 0 deletions CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -81,6 +81,9 @@ project(tritononnxruntimebackend LANGUAGES C CXX)
# - If you want to disable GPU usage, set TRITON_ENABLE_GPU=OFF.
# This will make builds with CUDA and TensorRT flags to fail.
#
# - If you want to optionally set the platform rather than rely on it being detected,
# set TRITON_BUILD_PLATFORM equal to Ubuntu, Windows, or Jetpack.
#
option(TRITON_ENABLE_GPU "Enable GPU support in backend" ON)
option(TRITON_ENABLE_STATS "Include statistics collections in backend" ON)
option(TRITON_ENABLE_ONNXRUNTIME_TENSORRT
Expand All @@ -94,6 +97,7 @@ set(TRITON_BUILD_ONNXRUNTIME_OPENVINO_VERSION "" CACHE STRING "ONNXRuntime OpenV
set(TRITON_BUILD_CUDA_VERSION "" CACHE STRING "Version of CUDA install")
set(TRITON_BUILD_CUDA_HOME "" CACHE PATH "Path to CUDA install")
set(TRITON_BUILD_CUDNN_HOME "" CACHE PATH "Path to CUDNN install")
set(TRITON_BUILD_PLATFORM "" CACHE STRING "Platform of build")
Tabrizian marked this conversation as resolved.
Show resolved Hide resolved
set(TRITON_BUILD_TENSORRT_HOME "" CACHE PATH "Path to TensorRT install")
set(TRITON_ONNXRUNTIME_INCLUDE_PATHS "" CACHE PATH "Paths to ONNXRuntime includes")
set(TRITON_ONNX_TENSORRT_REPO_TAG "" CACHE STRING "Tag for onnx-tensorrt repo")
Expand Down Expand Up @@ -334,6 +338,9 @@ if(TRITON_ONNXRUNTIME_DOCKER_BUILD)
if(${TRITON_ENABLE_ONNXRUNTIME_OPENVINO})
set(_GEN_FLAGS ${_GEN_FLAGS} "--ort-openvino=${TRITON_BUILD_ONNXRUNTIME_OPENVINO_VERSION}")
endif() # TRITON_ENABLE_ONNXRUNTIME_OPENVINO
if(NOT ${TRITON_BUILD_PLATFORM} STREQUAL "")
set(_GEN_FLAGS ${_GEN_FLAGS} "--target-platform=${TRITON_BUILD_PLATFORM}")
endif() # TRITON_BUILD_PLATFORM

set(ENABLE_GPU_EXTRA_ARGS "")
if(${TRITON_ENABLE_GPU})
Expand Down
64 changes: 39 additions & 25 deletions tools/gen_ort_dockerfile.py
Original file line number Diff line number Diff line change
Expand Up @@ -34,15 +34,16 @@

ORT_TO_TRTPARSER_VERSION_MAP = {
'1.9.0': (
'8.2', # TensorRT version
'release/8.2-GA' # ONNX-Tensorrt parser version
'8.2', # TensorRT version
'release/8.2-GA' # ONNX-Tensorrt parser version
),
'1.10.0': (
'8.2', # TensorRT version
'release/8.2-GA' # ONNX-Tensorrt parser version
'8.2', # TensorRT version
'release/8.2-GA' # ONNX-Tensorrt parser version
)
}


def target_platform():
if FLAGS.target_platform is not None:
return FLAGS.target_platform
Expand Down Expand Up @@ -147,10 +148,12 @@ def dockerfile_for_linux(output_file):
wget ${INTEL_COMPUTE_RUNTIME_URL}/intel-ocloc_19.41.14441_amd64.deb && \
dpkg -i *.deb && rm -rf *.deb
'''
## TEMPORARY: Using the tensorrt-8.0 branch until ORT 1.9 release to enable ORT backend with TRT 8.0 support.
# For ORT versions 1.8.0 and below the behavior will remain same. For ORT version 1.8.1 we will
# use tensorrt-8.0 branch instead of using rel-1.8.1
# From ORT 1.9 onwards we will switch back to using rel-* branches


## TEMPORARY: Using the tensorrt-8.0 branch until ORT 1.9 release to enable ORT backend with TRT 8.0 support.
# For ORT versions 1.8.0 and below the behavior will remain same. For ORT version 1.8.1 we will
# use tensorrt-8.0 branch instead of using rel-1.8.1
# From ORT 1.9 onwards we will switch back to using rel-* branches
if FLAGS.ort_version == "1.8.1":
df += '''
#
Expand Down Expand Up @@ -215,10 +218,16 @@ def dockerfile_for_linux(output_file):
if FLAGS.ort_openvino is not None:
ep_flags += ' --use_openvino CPU_FP32'

# DLIS-4658: Once Jetson build supports CUDA 11.8+, include compute_90 for Jetson.
cuda_archs = "52;60;61;70;75;80;86"
if target_platform() != 'jetpack':
cuda_archs += ";90"

df += '''
WORKDIR /workspace/onnxruntime
ARG COMMON_BUILD_ARGS="--config ${ONNXRUNTIME_BUILD_CONFIG} --skip_submodule_sync --parallel --build_shared_lib --build_dir /workspace/build --cmake_extra_defines CMAKE_CUDA_ARCHITECTURES='52;60;61;70;75;80;86;90' "
'''
ARG COMMON_BUILD_ARGS="--config ${{ONNXRUNTIME_BUILD_CONFIG}} --skip_submodule_sync --parallel --build_shared_lib \
--build_dir /workspace/build --cmake_extra_defines CMAKE_CUDA_ARCHITECTURES='{}' "
'''.format(cuda_archs)

# Remove version info from libonnxruntime.so
# This makes it possible to replace ort binaries in released triton containers
Expand Down Expand Up @@ -270,7 +279,6 @@ def dockerfile_for_linux(output_file):
/opt/onnxruntime/lib
'''


if FLAGS.ort_tensorrt:
df += '''
# TensorRT specific headers and libraries
Expand Down Expand Up @@ -438,10 +446,10 @@ def dockerfile_for_windows(output_file):


def preprocess_gpu_flags():
if target_platform() == 'windows':
if target_platform() == 'windows':
# Default to CUDA based on CUDA_PATH envvar and TensorRT in
# C:/tensorrt
if 'CUDA_PATH'in os.environ:
if 'CUDA_PATH' in os.environ:
if FLAGS.cuda_home is None:
FLAGS.cuda_home = os.environ['CUDA_PATH']
elif FLAGS.cuda_home != os.environ['CUDA_PATH']:
Expand All @@ -461,14 +469,16 @@ def preprocess_gpu_flags():
print("warning: --cuda-version does not match CUDA_PATH envvar")

if (FLAGS.cuda_home is None) or (FLAGS.cuda_version is None):
print("error: windows build requires --cuda-version and --cuda-home")
print(
"error: windows build requires --cuda-version and --cuda-home")

if FLAGS.tensorrt_home is None:
FLAGS.tensorrt_home = '/tensorrt'
else:
if 'CUDNN_VERSION'in os.environ:
if 'CUDNN_VERSION' in os.environ:
version = None
m = re.match(r'([0-9]\.[0-9])\.[0-9]\.[0-9]', os.environ['CUDNN_VERSION'])
m = re.match(r'([0-9]\.[0-9])\.[0-9]\.[0-9]',
os.environ['CUDNN_VERSION'])
if m:
version = m.group(1)
if FLAGS.cudnn_home is None:
Expand Down Expand Up @@ -505,7 +515,7 @@ def preprocess_gpu_flags():
help='Enable GPU support')
parser.add_argument('--ort-build-config',
type=str,
default ="Release",
default="Release",
choices=["Debug", "Release", "RelWithDebInfo"],
help='ORT build configuration.')
parser.add_argument(
Expand All @@ -528,10 +538,12 @@ def preprocess_gpu_flags():
type=str,
required=False,
help='Home directory for CUDNN.')
parser.add_argument('--ort-openvino',
type=str,
required=False,
help='Enable OpenVino execution provider using specified OpenVINO version.')
parser.add_argument(
'--ort-openvino',
type=str,
required=False,
help=
'Enable OpenVino execution provider using specified OpenVINO version.')
parser.add_argument('--ort-tensorrt',
action="store_true",
required=False,
Expand All @@ -557,11 +569,13 @@ def preprocess_gpu_flags():
# if the tag is empty - check whether there is an entry in the ORT_TO_TRTPARSER_VERSION_MAP
# map corresponding to ort version + trt version combo. If yes then use it
# otherwise we leave it empty and use the defaults from ort
if FLAGS.onnx_tensorrt_tag == "" and FLAGS.ort_version in ORT_TO_TRTPARSER_VERSION_MAP.keys():
if FLAGS.onnx_tensorrt_tag == "" and FLAGS.ort_version in ORT_TO_TRTPARSER_VERSION_MAP.keys(
):
trt_version = re.match(r'^[0-9]+\.[0-9]+', FLAGS.trt_version)
if trt_version and trt_version.group(0) == ORT_TO_TRTPARSER_VERSION_MAP[FLAGS.ort_version][0]:
FLAGS.onnx_tensorrt_tag = ORT_TO_TRTPARSER_VERSION_MAP[FLAGS.ort_version][1]

if trt_version and trt_version.group(0) == ORT_TO_TRTPARSER_VERSION_MAP[
FLAGS.ort_version][0]:
FLAGS.onnx_tensorrt_tag = ORT_TO_TRTPARSER_VERSION_MAP[
FLAGS.ort_version][1]

if target_platform() == 'windows':
# OpenVINO EP not yet supported for windows build
Expand Down