Add flags to allow platform-specific build (#179)

* For Jetson, do not build compute_90 arch * Remove unnecessary statement * Fix quote * Move comment outside of quote * Auto-format * Escape brace for gen Bash script * Add platform variable use to ORT build * Fix if statement * Format * Fix flag check * Specify CUDA version required
triton-inference-server · May 8, 2023 · d815e2c · d815e2c
1 parent 0607d73
commit d815e2c
Show file tree

Hide file tree

Showing 2 changed files with 46 additions and 25 deletions.
diff --git a/CMakeLists.txt b/CMakeLists.txt
@@ -81,6 +81,9 @@ project(tritononnxruntimebackend LANGUAGES C CXX)
 #   - If you want to disable GPU usage, set TRITON_ENABLE_GPU=OFF. 
 #    This will make builds with CUDA and TensorRT flags to fail. 
 #
+#   - If you want to optionally set the platform rather than rely on it being detected,
+#     set TRITON_BUILD_PLATFORM equal to Ubuntu, Windows, or Jetpack.
+#
 option(TRITON_ENABLE_GPU "Enable GPU support in backend" ON)
 option(TRITON_ENABLE_STATS "Include statistics collections in backend" ON)
 option(TRITON_ENABLE_ONNXRUNTIME_TENSORRT
@@ -94,6 +97,7 @@ set(TRITON_BUILD_ONNXRUNTIME_OPENVINO_VERSION "" CACHE STRING "ONNXRuntime OpenV
 set(TRITON_BUILD_CUDA_VERSION "" CACHE STRING "Version of CUDA install")
 set(TRITON_BUILD_CUDA_HOME "" CACHE PATH "Path to CUDA install")
 set(TRITON_BUILD_CUDNN_HOME "" CACHE PATH "Path to CUDNN install")
+set(TRITON_BUILD_PLATFORM "" CACHE STRING "Platform of build")
 set(TRITON_BUILD_TENSORRT_HOME "" CACHE PATH "Path to TensorRT install")
 set(TRITON_ONNXRUNTIME_INCLUDE_PATHS "" CACHE PATH "Paths to ONNXRuntime includes")
 set(TRITON_ONNX_TENSORRT_REPO_TAG "" CACHE STRING "Tag for onnx-tensorrt repo")
@@ -334,6 +338,9 @@ if(TRITON_ONNXRUNTIME_DOCKER_BUILD)
   if(${TRITON_ENABLE_ONNXRUNTIME_OPENVINO})
     set(_GEN_FLAGS ${_GEN_FLAGS} "--ort-openvino=${TRITON_BUILD_ONNXRUNTIME_OPENVINO_VERSION}")
   endif() # TRITON_ENABLE_ONNXRUNTIME_OPENVINO
+  if(NOT ${TRITON_BUILD_PLATFORM} STREQUAL "")
+    set(_GEN_FLAGS ${_GEN_FLAGS} "--target-platform=${TRITON_BUILD_PLATFORM}")
+  endif() # TRITON_BUILD_PLATFORM
 
   set(ENABLE_GPU_EXTRA_ARGS "")
   if(${TRITON_ENABLE_GPU})

diff --git a/tools/gen_ort_dockerfile.py b/tools/gen_ort_dockerfile.py
@@ -34,15 +34,16 @@
 
 ORT_TO_TRTPARSER_VERSION_MAP = {
     '1.9.0': (
-        '8.2',         # TensorRT version
-        'release/8.2-GA' # ONNX-Tensorrt parser version
+        '8.2',  # TensorRT version
+        'release/8.2-GA'  # ONNX-Tensorrt parser version
     ),
     '1.10.0': (
-        '8.2',         # TensorRT version
-        'release/8.2-GA' # ONNX-Tensorrt parser version
+        '8.2',  # TensorRT version
+        'release/8.2-GA'  # ONNX-Tensorrt parser version
     )
 }
 
+
 def target_platform():
     if FLAGS.target_platform is not None:
         return FLAGS.target_platform
@@ -147,10 +148,12 @@ def dockerfile_for_linux(output_file):
     wget ${INTEL_COMPUTE_RUNTIME_URL}/intel-ocloc_19.41.14441_amd64.deb && \
     dpkg -i *.deb && rm -rf *.deb
 '''
-   ## TEMPORARY: Using the tensorrt-8.0 branch until ORT 1.9 release to enable ORT backend with TRT 8.0 support.
-   # For ORT versions 1.8.0 and below the behavior will remain same. For ORT version 1.8.1 we will
-   # use tensorrt-8.0 branch instead of using rel-1.8.1
-   # From ORT 1.9 onwards we will switch back to using rel-* branches
+
+
+## TEMPORARY: Using the tensorrt-8.0 branch until ORT 1.9 release to enable ORT backend with TRT 8.0 support.
+# For ORT versions 1.8.0 and below the behavior will remain same. For ORT version 1.8.1 we will
+# use tensorrt-8.0 branch instead of using rel-1.8.1
+# From ORT 1.9 onwards we will switch back to using rel-* branches
     if FLAGS.ort_version == "1.8.1":
         df += '''
     #
@@ -215,10 +218,16 @@ def dockerfile_for_linux(output_file):
     if FLAGS.ort_openvino is not None:
         ep_flags += ' --use_openvino CPU_FP32'
 
+    # DLIS-4658: Once Jetson build supports CUDA 11.8+, include compute_90 for Jetson.
+    cuda_archs = "52;60;61;70;75;80;86"
+    if target_platform() != 'jetpack':
+        cuda_archs += ";90"
+
     df += '''
 WORKDIR /workspace/onnxruntime
-ARG COMMON_BUILD_ARGS="--config ${ONNXRUNTIME_BUILD_CONFIG} --skip_submodule_sync --parallel --build_shared_lib --build_dir /workspace/build --cmake_extra_defines CMAKE_CUDA_ARCHITECTURES='52;60;61;70;75;80;86;90' "
-'''
+ARG COMMON_BUILD_ARGS="--config ${{ONNXRUNTIME_BUILD_CONFIG}} --skip_submodule_sync --parallel --build_shared_lib \
+    --build_dir /workspace/build --cmake_extra_defines CMAKE_CUDA_ARCHITECTURES='{}' "
+'''.format(cuda_archs)
 
     # Remove version info from libonnxruntime.so
     # This makes it possible to replace ort binaries in released triton containers
@@ -270,7 +279,6 @@ def dockerfile_for_linux(output_file):
        /opt/onnxruntime/lib    
 '''
 
-
     if FLAGS.ort_tensorrt:
         df += '''
 # TensorRT specific headers and libraries
@@ -438,10 +446,10 @@ def dockerfile_for_windows(output_file):
 
 
 def preprocess_gpu_flags():
-    if target_platform() == 'windows': 
+    if target_platform() == 'windows':
         # Default to CUDA based on CUDA_PATH envvar and TensorRT in
         # C:/tensorrt
-        if 'CUDA_PATH'in os.environ:
+        if 'CUDA_PATH' in os.environ:
             if FLAGS.cuda_home is None:
                 FLAGS.cuda_home = os.environ['CUDA_PATH']
             elif FLAGS.cuda_home != os.environ['CUDA_PATH']:
@@ -461,14 +469,16 @@ def preprocess_gpu_flags():
             print("warning: --cuda-version does not match CUDA_PATH envvar")
 
         if (FLAGS.cuda_home is None) or (FLAGS.cuda_version is None):
-            print("error: windows build requires --cuda-version and --cuda-home")
+            print(
+                "error: windows build requires --cuda-version and --cuda-home")
 
         if FLAGS.tensorrt_home is None:
             FLAGS.tensorrt_home = '/tensorrt'
     else:
-        if 'CUDNN_VERSION'in os.environ:
+        if 'CUDNN_VERSION' in os.environ:
             version = None
-            m = re.match(r'([0-9]\.[0-9])\.[0-9]\.[0-9]', os.environ['CUDNN_VERSION'])
+            m = re.match(r'([0-9]\.[0-9])\.[0-9]\.[0-9]',
+                         os.environ['CUDNN_VERSION'])
             if m:
                 version = m.group(1)
             if FLAGS.cudnn_home is None:
@@ -505,7 +515,7 @@ def preprocess_gpu_flags():
                         help='Enable GPU support')
     parser.add_argument('--ort-build-config',
                         type=str,
-                        default ="Release",
+                        default="Release",
                         choices=["Debug", "Release", "RelWithDebInfo"],
                         help='ORT build configuration.')
     parser.add_argument(
@@ -528,10 +538,12 @@ def preprocess_gpu_flags():
                         type=str,
                         required=False,
                         help='Home directory for CUDNN.')
-    parser.add_argument('--ort-openvino',
-                        type=str,
-                        required=False,
-                        help='Enable OpenVino execution provider using specified OpenVINO version.')
+    parser.add_argument(
+        '--ort-openvino',
+        type=str,
+        required=False,
+        help=
+        'Enable OpenVino execution provider using specified OpenVINO version.')
     parser.add_argument('--ort-tensorrt',
                         action="store_true",
                         required=False,
@@ -557,11 +569,13 @@ def preprocess_gpu_flags():
     # if the tag is empty - check whether there is an entry in the ORT_TO_TRTPARSER_VERSION_MAP
     # map corresponding to ort version + trt version combo. If yes then use it
     # otherwise we leave it empty and use the defaults from ort
-    if FLAGS.onnx_tensorrt_tag == "" and FLAGS.ort_version in ORT_TO_TRTPARSER_VERSION_MAP.keys(): 
+    if FLAGS.onnx_tensorrt_tag == "" and FLAGS.ort_version in ORT_TO_TRTPARSER_VERSION_MAP.keys(
+    ):
         trt_version = re.match(r'^[0-9]+\.[0-9]+', FLAGS.trt_version)
-        if trt_version and trt_version.group(0) == ORT_TO_TRTPARSER_VERSION_MAP[FLAGS.ort_version][0]:
-            FLAGS.onnx_tensorrt_tag = ORT_TO_TRTPARSER_VERSION_MAP[FLAGS.ort_version][1]
-
+        if trt_version and trt_version.group(0) == ORT_TO_TRTPARSER_VERSION_MAP[
+                FLAGS.ort_version][0]:
+            FLAGS.onnx_tensorrt_tag = ORT_TO_TRTPARSER_VERSION_MAP[
+                FLAGS.ort_version][1]
 
     if target_platform() == 'windows':
         # OpenVINO EP not yet supported for windows build