Skip to content

Commit

Permalink
Update README and versions for 2.43.0 / 24.02 (#6886)
Browse files Browse the repository at this point in the history
* Update README and versions for 2.43.0 / 24.02

* Update Dockefile to reduce image size.

* Update path in patch file for model generation

Update README.md post-24.02
  • Loading branch information
mc-nv committed Mar 2, 2024
1 parent ad25365 commit 3bc6863
Show file tree
Hide file tree
Showing 28 changed files with 106 additions and 101 deletions.
2 changes: 1 addition & 1 deletion Dockerfile.sdk
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,7 @@
#

# Base image on the minimum Triton container
ARG BASE_IMAGE=nvcr.io/nvidia/tritonserver:24.01-py3-min
ARG BASE_IMAGE=nvcr.io/nvidia/tritonserver:24.02-py3-min

ARG TRITON_CLIENT_REPO_SUBDIR=clientrepo
ARG TRITON_COMMON_REPO_TAG=main
Expand Down
115 changes: 60 additions & 55 deletions Dockerfile.win10.min
Original file line number Diff line number Diff line change
Expand Up @@ -28,10 +28,55 @@

ARG BASE_IMAGE=mcr.microsoft.com/windows:10.0.19042.1889

FROM ${BASE_IMAGE}
FROM ${BASE_IMAGE} as dependency_base

RUN powershell.exe Set-ExecutionPolicy -ExecutionPolicy RemoteSigned -Scope LocalMachine
RUN powershell.exe [Net.ServicePointManager]::Expect100Continue=$true;[Net.ServicePointManager]::SecurityProtocol=[Net.SecurityProtocolType]::Tls,[Net.SecurityProtocolType]::Tls11,[Net.SecurityProtocolType]::Tls12,[Net.SecurityProtocolType]::Ssl3;Invoke-Expression( New-Object System.Net.WebClient ).DownloadString('https://chocolatey.org/install.ps1')
RUN choco install unzip -y

#
# Installing TensorRT
#
ARG TENSORRT_VERSION
ARG TENSORRT_ZIP="TensorRT-${TENSORRT_VERSION}.Windows10.x86_64.cuda-12.0.zip"
ARG TENSORRT_SOURCE=${TENSORRT_ZIP}
# COPY ${TENSORRT_ZIP} /tmp/${TENSORRT_ZIP}
ADD ${TENSORRT_SOURCE} /tmp/${TENSORRT_ZIP}
RUN unzip /tmp/%TENSORRT_ZIP%
RUN move TensorRT-* TensorRT

LABEL TENSORRT_VERSION="${TENSORRT_VERSION}"


#
# Installing cuDNN
#
ARG CUDNN_VERSION
ARG CUDNN_ZIP=cudnn-windows-x86_64-${CUDNN_VERSION}_cuda12-archive.zip
ARG CUDNN_SOURCE=${CUDNN_ZIP}
ADD ${CUDNN_SOURCE} /tmp/${CUDNN_ZIP}
RUN unzip /tmp/%CUDNN_ZIP%
RUN move cudnn-* cudnn

LABEL CUDNN_VERSION="${CUDNN_VERSION}"


FROM ${BASE_IMAGE} as build_base

SHELL ["cmd", "/S", "/C"]

ARG CUDNN_VERSION
ENV CUDNN_VERSION ${CUDNN_VERSION}
COPY --from=dependency_base /cudnn /cudnn
RUN setx PATH "c:\cudnn\bin;c:\cudnn\lib\x64;c:\cudnn\include;%PATH%"
LABEL CUDNN_VERSION="${CUDNN_VERSION}"

ARG TENSORRT_VERSION
ENV TRT_VERSION ${TENSORRT_VERSION}
COPY --from=dependency_base /TensorRT /TensorRT
RUN setx PATH "c:\TensorRT\lib;%PATH%"
LABEL TENSORRT_VERSION="${TENSORRT_VERSION}"

RUN mkdir c:\tmp
WORKDIR /tmp

Expand All @@ -40,33 +85,30 @@ RUN powershell.exe [Net.ServicePointManager]::Expect100Continue=$true;[Net.Servi
RUN choco install git docker unzip -y

#
# Installing CMake
# Installing python
#
ARG CMAKE_VERSION=3.27.1
ARG CMAKE_FILE=cmake-${CMAKE_VERSION}-windows-x86_64
ARG CMAKE_SOURCE=https://github.com/Kitware/CMake/releases/download/v${CMAKE_VERSION}/${CMAKE_FILE}.zip
ARG PYTHON_VERSION=3.10.11
ARG PYTHON_SOURCE=https://www.python.org/ftp/python/${PYTHON_VERSION}/python-${PYTHON_VERSION}-amd64.exe
ADD ${PYTHON_SOURCE} python-${PYTHON_VERSION}-amd64.exe
RUN python-%PYTHON_VERSION%-amd64.exe /quiet InstallAllUsers=1 PrependPath=1 Include_doc=0 TargetDir="C:\python%PYTHON_VERSION%"
RUN mklink "C:\python%PYTHON_VERSION%\python3.exe" "C:\python%PYTHON_VERSION%\python.exe"
RUN pip install --upgrade wheel setuptools docker
RUN pip install grpcio-tools

ADD ${CMAKE_SOURCE} ${CMAKE_FILE}.zip
RUN unzip %CMAKE_FILE%.zip
RUN move %CMAKE_FILE% "c:\CMake"
RUN setx PATH "c:\CMake\bin;%PATH%"
LABEL PYTHON_VERSION=${PYTHON_VERSION}

#
# Installing CMake
#
ARG CMAKE_VERSION=3.27.1
RUN pip install cmake==%CMAKE_VERSION%
ENV CMAKE_TOOLCHAIN_FILE /vcpkg/scripts/buildsystems/vcpkg.cmake
ENV VCPKG_TARGET_TRIPLET x64-windows

LABEL CMAKE_VERSION=${CMAKE_VERSION}

# Be aware that pip can interact badly with VS cmd shell so need to pip install before
# vsdevcmd.bat (see https://bugs.python.org/issue38989)
ARG PYTHON_VERSION=3.8.10
ARG PYTHON_SOURCE=https://www.python.org/ftp/python/${PYTHON_VERSION}/python-${PYTHON_VERSION}-amd64.exe
ADD ${PYTHON_SOURCE} python-${PYTHON_VERSION}-amd64.exe
RUN python-%PYTHON_VERSION%-amd64.exe /quiet InstallAllUsers=1 PrependPath=1 Include_doc=0 TargetDir="C:\python%PYTHON_VERSION%"
RUN mklink "C:\python%PYTHON_VERSION%\python3.exe" "C:\python%PYTHON_VERSION%\python.exe"
RUN pip install --upgrade wheel setuptools docker
RUN pip install grpcio-tools

LABEL PYTHON_VERSION=${PYTHON_VERSION}

#
# Installing Visual Studio BuildTools: VS17 2022
Expand Down Expand Up @@ -149,43 +191,6 @@ RUN copy "%CUDA_INSTALL_ROOT_WP%\extras\visual_studio_integration\MSBuildExtensi
RUN setx PATH "%CUDA_INSTALL_ROOT_WP%\bin;%PATH%"

LABEL CUDA_VERSION="${CUDA_VERSION}"

#
# Installing TensorRT
#
ARG TENSORRT_VERSION=8.6.1.6
ARG TENSORRT_ZIP="TensorRT-${TENSORRT_VERSION}.Windows10.x86_64.cuda-12.0.zip"
ARG TENSORRT_SOURCE=${TENSORRT_ZIP}
# COPY ${TENSORRT_ZIP} /tmp/${TENSORRT_ZIP}
ADD ${TENSORRT_SOURCE} /tmp/${TENSORRT_ZIP}

RUN unzip /tmp/%TENSORRT_ZIP%
RUN move TensorRT-* TensorRT
ENV TRT_VERSION ${TENSORRT_VERSION}

RUN setx PATH "c:\TensorRT\lib;%PATH%"

LABEL TENSORRT_VERSION="${TENSORRT_VERSION}"


#
# Installing cuDNN
#
ARG CUDNN_VERSION=8.9.7.29
ARG CUDNN_ZIP=cudnn-windows-x86_64-${CUDNN_VERSION}_cuda12-archive.zip
ARG CUDNN_SOURCE=${CUDNN_ZIP}

ADD ${CUDNN_SOURCE} /tmp/${CUDNN_ZIP}

RUN unzip /tmp/%CUDNN_ZIP%
RUN move cudnn-* cudnn
RUN copy cudnn\bin\cudnn*.dll "%CUDA_INSTALL_ROOT_WP%\bin\."
RUN copy cudnn\lib\x64\cudnn*.lib "%CUDA_INSTALL_ROOT_WP%\lib\x64\."
RUN copy cudnn\include\cudnn*.h "%CUDA_INSTALL_ROOT_WP%\include\."

ENV CUDNN_VERSION ${CUDNN_VERSION}

LABEL CUDNN_VERSION="${CUDNN_VERSION}"
# It is important that the entrypoint initialize VisualStudio
# environment otherwise the build will fail. Also set
# CMAKE_TOOLCHAIN_FILE and VCPKG_TARGET_TRIPLET so
Expand Down
8 changes: 4 additions & 4 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -33,7 +33,7 @@
> [!WARNING]
> ##### LATEST RELEASE
> You are currently on the `main` branch which tracks under-development progress towards the next release.
> The current release is version [2.42.0](https://github.com/triton-inference-server/server/releases/latest) and corresponds to the 24.01 container release on NVIDIA GPU Cloud (NGC).
> The current release is version [2.43.0](https://github.com/triton-inference-server/server/releases/latest) and corresponds to the 24.02 container release on NVIDIA GPU Cloud (NGC).
Triton Inference Server is an open source inference serving software that
streamlines AI inferencing. Triton enables teams to deploy any AI model from
Expand Down Expand Up @@ -91,16 +91,16 @@ Inference Server with the

```bash
# Step 1: Create the example model repository
git clone -b r24.01 https://github.com/triton-inference-server/server.git
git clone -b r24.02 https://github.com/triton-inference-server/server.git
cd server/docs/examples
./fetch_models.sh

# Step 2: Launch triton from the NGC Triton container
docker run --gpus=1 --rm --net=host -v ${PWD}/model_repository:/models nvcr.io/nvidia/tritonserver:24.01-py3 tritonserver --model-repository=/models
docker run --gpus=1 --rm --net=host -v ${PWD}/model_repository:/models nvcr.io/nvidia/tritonserver:24.02-py3 tritonserver --model-repository=/models

# Step 3: Sending an Inference Request
# In a separate console, launch the image_client example from the NGC Triton SDK container
docker run -it --rm --net=host nvcr.io/nvidia/tritonserver:24.01-py3-sdk
docker run -it --rm --net=host nvcr.io/nvidia/tritonserver:24.02-py3-sdk
/workspace/install/bin/image_client -m densenet_onnx -c 3 -s INCEPTION /workspace/images/mug.jpg

# Inference should return the following
Expand Down
4 changes: 2 additions & 2 deletions build.py
Original file line number Diff line number Diff line change
Expand Up @@ -72,7 +72,7 @@
TRITON_VERSION_MAP = {
"2.44.0dev": (
"24.03dev", # triton container
"24.01", # upstream container
"24.02", # upstream container
"1.16.3", # ORT
"2023.3.0", # ORT OpenVINO
"2023.3.0", # Standalone OpenVINO
Expand Down Expand Up @@ -1337,7 +1337,7 @@ def add_cpu_libs_to_linux_dockerfile(backends, target_machine):
COPY --from=min_container /opt/hpcx/ucx/lib/libucs.so.0 /opt/hpcx/ucx/lib/libucs.so.0
COPY --from=min_container /opt/hpcx/ucx/lib/libuct.so.0 /opt/hpcx/ucx/lib/libuct.so.0
COPY --from=min_container /usr/lib/{libs_arch}-linux-gnu/libcudnn.so.8 /usr/lib/{libs_arch}-linux-gnu/libcudnn.so.8
COPY --from=min_container /usr/lib/{libs_arch}-linux-gnu/libcudnn.so.9 /usr/lib/{libs_arch}-linux-gnu/libcudnn.so.9
# patchelf is needed to add deps of libcublasLt.so.12 to libtorch_cuda.so
RUN apt-get update && \
Expand Down
2 changes: 1 addition & 1 deletion deploy/aws/values.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,7 @@
replicaCount: 1

image:
imageName: nvcr.io/nvidia/tritonserver:24.01-py3
imageName: nvcr.io/nvidia/tritonserver:24.02-py3
pullPolicy: IfNotPresent
modelRepositoryPath: s3://triton-inference-server-repository/model_repository
numGpus: 1
Expand Down
2 changes: 1 addition & 1 deletion deploy/fleetcommand/Chart.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,7 @@

apiVersion: v1
# appVersion is the Triton version; update when changing release
appVersion: "2.42.0"
appVersion: "2.43.0"
description: Triton Inference Server (Fleet Command)
name: triton-inference-server
# version is the Chart version; update when changing anything in the chart
Expand Down
6 changes: 3 additions & 3 deletions deploy/fleetcommand/values.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,7 @@
replicaCount: 1

image:
imageName: nvcr.io/nvidia/tritonserver:24.01-py3
imageName: nvcr.io/nvidia/tritonserver:24.02-py3
pullPolicy: IfNotPresent
numGpus: 1
serverCommand: tritonserver
Expand All @@ -46,13 +46,13 @@ image:
# Model Control Mode (Optional, default: none)
#
# To set model control mode, uncomment and configure below
# See https://github.com/triton-inference-server/server/blob/r24.01/docs/model_management.md
# See https://github.com/triton-inference-server/server/blob/r24.02/docs/model_management.md
# for more details
#- --model-control-mode=explicit|poll|none
#
# Additional server args
#
# see https://github.com/triton-inference-server/server/blob/r24.01/README.md
# see https://github.com/triton-inference-server/server/blob/r24.02/README.md
# for more details

service:
Expand Down
2 changes: 1 addition & 1 deletion deploy/gcp/values.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,7 @@
replicaCount: 1

image:
imageName: nvcr.io/nvidia/tritonserver:24.01-py3
imageName: nvcr.io/nvidia/tritonserver:24.02-py3
pullPolicy: IfNotPresent
modelRepositoryPath: gs://triton-inference-server-repository/model_repository
numGpus: 1
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -33,7 +33,7 @@ metadata:
namespace: default
spec:
containers:
- image: nvcr.io/nvidia/tritonserver:24.01-py3-sdk
- image: nvcr.io/nvidia/tritonserver:24.02-py3-sdk
imagePullPolicy: Always
name: nv-triton-client
securityContext:
Expand Down
4 changes: 2 additions & 2 deletions deploy/gke-marketplace-app/server-deployer/build_and_push.sh
Original file line number Diff line number Diff line change
Expand Up @@ -28,8 +28,8 @@
export REGISTRY=gcr.io/$(gcloud config get-value project | tr ':' '/')
export APP_NAME=tritonserver
export MAJOR_VERSION=2.41
export MINOR_VERSION=2.42.0
export NGC_VERSION=24.01-py3
export MINOR_VERSION=2.43.0
export NGC_VERSION=24.02-py3

docker pull nvcr.io/nvidia/$APP_NAME:$NGC_VERSION

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -28,4 +28,4 @@ apiVersion: v1
appVersion: "2.41"
description: Triton Inference Server
name: triton-inference-server
version: 2.42.0
version: 2.43.0
Original file line number Diff line number Diff line change
Expand Up @@ -32,13 +32,13 @@ tritonProtocol: HTTP
# HPA GPU utilization autoscaling target
HPATargetAverageValue: 85
modelRepositoryPath: gs://triton_sample_models/23_12
publishedVersion: '2.42.0'
publishedVersion: '2.43.0'
gcpMarketplace: true

image:
registry: gcr.io
repository: nvidia-ngc-public/tritonserver
tag: 24.01-py3
tag: 24.02-py3
pullPolicy: IfNotPresent
# modify the model repository here to match your GCP storage bucket
numGpus: 1
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,7 @@
x-google-marketplace:
schemaVersion: v2
applicationApiVersion: v1beta1
publishedVersion: '2.42.0'
publishedVersion: '2.43.0'
publishedVersionMetadata:
releaseNote: >-
Initial release.
Expand Down
2 changes: 1 addition & 1 deletion deploy/gke-marketplace-app/server-deployer/schema.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,7 @@
x-google-marketplace:
schemaVersion: v2
applicationApiVersion: v1beta1
publishedVersion: '2.42.0'
publishedVersion: '2.43.0'
publishedVersionMetadata:
releaseNote: >-
Initial release.
Expand Down
2 changes: 1 addition & 1 deletion deploy/gke-marketplace-app/trt-engine/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -33,7 +33,7 @@
```
docker run --gpus all -it --network host \
--shm-size=1g --ulimit memlock=-1 --ulimit stack=67108864 \
-v ~:/scripts nvcr.io/nvidia/tensorrt:24.01-py3
-v ~:/scripts nvcr.io/nvidia/tensorrt:24.02-py3
pip install onnx six torch tf2onnx tensorflow
Expand Down
2 changes: 1 addition & 1 deletion deploy/k8s-onprem/values.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,7 @@ tags:
loadBalancing: true

image:
imageName: nvcr.io/nvidia/tritonserver:24.01-py3
imageName: nvcr.io/nvidia/tritonserver:24.02-py3
pullPolicy: IfNotPresent
modelRepositoryServer: < Replace with the IP Address of your file server >
modelRepositoryPath: /srv/models
Expand Down
2 changes: 1 addition & 1 deletion deploy/oci/values.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,7 @@
replicaCount: 1

image:
imageName: nvcr.io/nvidia/tritonserver:24.01-py3
imageName: nvcr.io/nvidia/tritonserver:24.02-py3
pullPolicy: IfNotPresent
modelRepositoryPath: s3://https://<OCI_NAMESPACE>.compat.objectstorage.<OCI_REGION>.oraclecloud.com:443/triton-inference-server-repository
numGpus: 1
Expand Down
6 changes: 3 additions & 3 deletions docs/customization_guide/build.md
Original file line number Diff line number Diff line change
Expand Up @@ -173,7 +173,7 @@ $ ./build.py ... --repo-tag=common:<container tag> --repo-tag=core:<container ta

If you are building on a release branch then `<container tag>` will
default to the branch name. For example, if you are building on the
r24.01 branch, `<container tag>` will default to r24.01. If you are
r24.02 branch, `<container tag>` will default to r24.02. If you are
building on any other branch (including the *main* branch) then
`<container tag>` will default to "main". Therefore, you typically do
not need to provide `<container tag>` at all (nor the preceding
Expand Down Expand Up @@ -334,8 +334,8 @@ python build.py --cmake-dir=<path/to/repo>/build --build-dir=/tmp/citritonbuild
If you are building on *main* branch then '<container tag>' will
default to "main". If you are building on a release branch then
'<container tag>' will default to the branch name. For example, if you
are building on the r24.01 branch, '<container tag>' will default to
r24.01. Therefore, you typically do not need to provide '<container
are building on the r24.02 branch, '<container tag>' will default to
r24.02. Therefore, you typically do not need to provide '<container
tag>' at all (nor the preceding colon). You can use a different
'<container tag>' for a component to instead use the corresponding
branch/tag in the build. For example, if you have a branch called
Expand Down
14 changes: 7 additions & 7 deletions docs/customization_guide/compose.md
Original file line number Diff line number Diff line change
Expand Up @@ -44,8 +44,8 @@ from source to get more exact customization.
The `compose.py` script can be found in the [server repository](https://github.com/triton-inference-server/server).
Simply clone the repository and run `compose.py` to create a custom container.
Note: Created container version will depend on the branch that was cloned.
For example branch [r24.01](https://github.com/triton-inference-server/server/tree/r24.01)
should be used to create a image based on the NGC 24.01 Triton release.
For example branch [r24.02](https://github.com/triton-inference-server/server/tree/r24.02)
should be used to create a image based on the NGC 24.02 Triton release.

`compose.py` provides `--backend`, `--repoagent` options that allow you to
specify which backends and repository agents to include in the custom image.
Expand Down Expand Up @@ -76,19 +76,19 @@ For example, running
```
python3 compose.py --backend tensorflow1 --repoagent checksum
```
on branch [r24.01](https://github.com/triton-inference-server/server/tree/r24.01) pulls:
- `min` container `nvcr.io/nvidia/tritonserver:24.01-py3-min`
- `full` container `nvcr.io/nvidia/tritonserver:24.01-py3`
on branch [r24.02](https://github.com/triton-inference-server/server/tree/r24.02) pulls:
- `min` container `nvcr.io/nvidia/tritonserver:24.02-py3-min`
- `full` container `nvcr.io/nvidia/tritonserver:24.02-py3`

Alternatively, users can specify the version of Triton container to pull from any branch by either:
1. Adding flag `--container-version <container version>` to branch
```
python3 compose.py --backend tensorflow1 --repoagent checksum --container-version 24.01
python3 compose.py --backend tensorflow1 --repoagent checksum --container-version 24.02
```
2. Specifying `--image min,<min container image name> --image full,<full container image name>`.
The user is responsible for specifying compatible `min` and `full` containers.
```
python3 compose.py --backend tensorflow1 --repoagent checksum --image min,nvcr.io/nvidia/tritonserver:24.01-py3-min --image full,nvcr.io/nvidia/tritonserver:24.01-py3
python3 compose.py --backend tensorflow1 --repoagent checksum --image min,nvcr.io/nvidia/tritonserver:24.02-py3-min --image full,nvcr.io/nvidia/tritonserver:24.02-py3
```
Method 1 and 2 will result in the same composed container. Furthermore, `--image` flag overrides the `--container-version` flag when both are specified.

Expand Down
Loading

0 comments on commit 3bc6863

Please sign in to comment.