diff --git a/CMakeLists.txt b/CMakeLists.txt index 72d01a1..ba12eb6 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -81,6 +81,9 @@ project(tritononnxruntimebackend LANGUAGES C CXX) # - If you want to disable GPU usage, set TRITON_ENABLE_GPU=OFF. # This will make builds with CUDA and TensorRT flags to fail. # +# - If you want to optionally set the platform rather than rely on it being detected, +# set TRITON_BUILD_PLATFORM equal to Ubuntu, Windows, or Jetpack. +# option(TRITON_ENABLE_GPU "Enable GPU support in backend" ON) option(TRITON_ENABLE_STATS "Include statistics collections in backend" ON) option(TRITON_ENABLE_ONNXRUNTIME_TENSORRT @@ -94,6 +97,7 @@ set(TRITON_BUILD_ONNXRUNTIME_OPENVINO_VERSION "" CACHE STRING "ONNXRuntime OpenV set(TRITON_BUILD_CUDA_VERSION "" CACHE STRING "Version of CUDA install") set(TRITON_BUILD_CUDA_HOME "" CACHE PATH "Path to CUDA install") set(TRITON_BUILD_CUDNN_HOME "" CACHE PATH "Path to CUDNN install") +set(TRITON_BUILD_PLATFORM "" CACHE STRING "Platform of build") set(TRITON_BUILD_TENSORRT_HOME "" CACHE PATH "Path to TensorRT install") set(TRITON_ONNXRUNTIME_INCLUDE_PATHS "" CACHE PATH "Paths to ONNXRuntime includes") set(TRITON_ONNX_TENSORRT_REPO_TAG "" CACHE STRING "Tag for onnx-tensorrt repo") @@ -334,6 +338,9 @@ if(TRITON_ONNXRUNTIME_DOCKER_BUILD) if(${TRITON_ENABLE_ONNXRUNTIME_OPENVINO}) set(_GEN_FLAGS ${_GEN_FLAGS} "--ort-openvino=${TRITON_BUILD_ONNXRUNTIME_OPENVINO_VERSION}") endif() # TRITON_ENABLE_ONNXRUNTIME_OPENVINO + if(NOT ${TRITON_BUILD_PLATFORM} STREQUAL "") + set(_GEN_FLAGS ${_GEN_FLAGS} "--target-platform=${TRITON_BUILD_PLATFORM}") + endif() # TRITON_BUILD_PLATFORM set(ENABLE_GPU_EXTRA_ARGS "") if(${TRITON_ENABLE_GPU}) diff --git a/tools/gen_ort_dockerfile.py b/tools/gen_ort_dockerfile.py index dd90249..5aabbee 100755 --- a/tools/gen_ort_dockerfile.py +++ b/tools/gen_ort_dockerfile.py @@ -34,15 +34,16 @@ ORT_TO_TRTPARSER_VERSION_MAP = { '1.9.0': ( - '8.2', # TensorRT version - 'release/8.2-GA' # ONNX-Tensorrt parser version + '8.2', # TensorRT version + 'release/8.2-GA' # ONNX-Tensorrt parser version ), '1.10.0': ( - '8.2', # TensorRT version - 'release/8.2-GA' # ONNX-Tensorrt parser version + '8.2', # TensorRT version + 'release/8.2-GA' # ONNX-Tensorrt parser version ) } + def target_platform(): if FLAGS.target_platform is not None: return FLAGS.target_platform @@ -147,10 +148,12 @@ def dockerfile_for_linux(output_file): wget ${INTEL_COMPUTE_RUNTIME_URL}/intel-ocloc_19.41.14441_amd64.deb && \ dpkg -i *.deb && rm -rf *.deb ''' - ## TEMPORARY: Using the tensorrt-8.0 branch until ORT 1.9 release to enable ORT backend with TRT 8.0 support. - # For ORT versions 1.8.0 and below the behavior will remain same. For ORT version 1.8.1 we will - # use tensorrt-8.0 branch instead of using rel-1.8.1 - # From ORT 1.9 onwards we will switch back to using rel-* branches + + +## TEMPORARY: Using the tensorrt-8.0 branch until ORT 1.9 release to enable ORT backend with TRT 8.0 support. +# For ORT versions 1.8.0 and below the behavior will remain same. For ORT version 1.8.1 we will +# use tensorrt-8.0 branch instead of using rel-1.8.1 +# From ORT 1.9 onwards we will switch back to using rel-* branches if FLAGS.ort_version == "1.8.1": df += ''' # @@ -215,10 +218,16 @@ def dockerfile_for_linux(output_file): if FLAGS.ort_openvino is not None: ep_flags += ' --use_openvino CPU_FP32' + # DLIS-4658: Once Jetson build supports CUDA 11.8+, include compute_90 for Jetson. + cuda_archs = "52;60;61;70;75;80;86" + if target_platform() != 'jetpack': + cuda_archs += ";90" + df += ''' WORKDIR /workspace/onnxruntime -ARG COMMON_BUILD_ARGS="--config ${ONNXRUNTIME_BUILD_CONFIG} --skip_submodule_sync --parallel --build_shared_lib --build_dir /workspace/build --cmake_extra_defines CMAKE_CUDA_ARCHITECTURES='52;60;61;70;75;80;86;90' " -''' +ARG COMMON_BUILD_ARGS="--config ${{ONNXRUNTIME_BUILD_CONFIG}} --skip_submodule_sync --parallel --build_shared_lib \ + --build_dir /workspace/build --cmake_extra_defines CMAKE_CUDA_ARCHITECTURES='{}' " +'''.format(cuda_archs) # Remove version info from libonnxruntime.so # This makes it possible to replace ort binaries in released triton containers @@ -270,7 +279,6 @@ def dockerfile_for_linux(output_file): /opt/onnxruntime/lib ''' - if FLAGS.ort_tensorrt: df += ''' # TensorRT specific headers and libraries @@ -438,10 +446,10 @@ def dockerfile_for_windows(output_file): def preprocess_gpu_flags(): - if target_platform() == 'windows': + if target_platform() == 'windows': # Default to CUDA based on CUDA_PATH envvar and TensorRT in # C:/tensorrt - if 'CUDA_PATH'in os.environ: + if 'CUDA_PATH' in os.environ: if FLAGS.cuda_home is None: FLAGS.cuda_home = os.environ['CUDA_PATH'] elif FLAGS.cuda_home != os.environ['CUDA_PATH']: @@ -461,14 +469,16 @@ def preprocess_gpu_flags(): print("warning: --cuda-version does not match CUDA_PATH envvar") if (FLAGS.cuda_home is None) or (FLAGS.cuda_version is None): - print("error: windows build requires --cuda-version and --cuda-home") + print( + "error: windows build requires --cuda-version and --cuda-home") if FLAGS.tensorrt_home is None: FLAGS.tensorrt_home = '/tensorrt' else: - if 'CUDNN_VERSION'in os.environ: + if 'CUDNN_VERSION' in os.environ: version = None - m = re.match(r'([0-9]\.[0-9])\.[0-9]\.[0-9]', os.environ['CUDNN_VERSION']) + m = re.match(r'([0-9]\.[0-9])\.[0-9]\.[0-9]', + os.environ['CUDNN_VERSION']) if m: version = m.group(1) if FLAGS.cudnn_home is None: @@ -505,7 +515,7 @@ def preprocess_gpu_flags(): help='Enable GPU support') parser.add_argument('--ort-build-config', type=str, - default ="Release", + default="Release", choices=["Debug", "Release", "RelWithDebInfo"], help='ORT build configuration.') parser.add_argument( @@ -528,10 +538,12 @@ def preprocess_gpu_flags(): type=str, required=False, help='Home directory for CUDNN.') - parser.add_argument('--ort-openvino', - type=str, - required=False, - help='Enable OpenVino execution provider using specified OpenVINO version.') + parser.add_argument( + '--ort-openvino', + type=str, + required=False, + help= + 'Enable OpenVino execution provider using specified OpenVINO version.') parser.add_argument('--ort-tensorrt', action="store_true", required=False, @@ -557,11 +569,13 @@ def preprocess_gpu_flags(): # if the tag is empty - check whether there is an entry in the ORT_TO_TRTPARSER_VERSION_MAP # map corresponding to ort version + trt version combo. If yes then use it # otherwise we leave it empty and use the defaults from ort - if FLAGS.onnx_tensorrt_tag == "" and FLAGS.ort_version in ORT_TO_TRTPARSER_VERSION_MAP.keys(): + if FLAGS.onnx_tensorrt_tag == "" and FLAGS.ort_version in ORT_TO_TRTPARSER_VERSION_MAP.keys( + ): trt_version = re.match(r'^[0-9]+\.[0-9]+', FLAGS.trt_version) - if trt_version and trt_version.group(0) == ORT_TO_TRTPARSER_VERSION_MAP[FLAGS.ort_version][0]: - FLAGS.onnx_tensorrt_tag = ORT_TO_TRTPARSER_VERSION_MAP[FLAGS.ort_version][1] - + if trt_version and trt_version.group(0) == ORT_TO_TRTPARSER_VERSION_MAP[ + FLAGS.ort_version][0]: + FLAGS.onnx_tensorrt_tag = ORT_TO_TRTPARSER_VERSION_MAP[ + FLAGS.ort_version][1] if target_platform() == 'windows': # OpenVINO EP not yet supported for windows build