diff --git a/Dockerfile.sdk b/Dockerfile.sdk
index d30a0ac5ff..e1dd354889 100644
--- a/Dockerfile.sdk
+++ b/Dockerfile.sdk
@@ -29,7 +29,7 @@
 #
 
 # Base image on the minimum Triton container
-ARG BASE_IMAGE=nvcr.io/nvidia/tritonserver:24.01-py3-min
+ARG BASE_IMAGE=nvcr.io/nvidia/tritonserver:24.02-py3-min
 
 ARG TRITON_CLIENT_REPO_SUBDIR=clientrepo
 ARG TRITON_COMMON_REPO_TAG=main
diff --git a/Dockerfile.win10.min b/Dockerfile.win10.min
index dfa3706280..27d7f7c00f 100644
--- a/Dockerfile.win10.min
+++ b/Dockerfile.win10.min
@@ -28,10 +28,55 @@
 
 ARG BASE_IMAGE=mcr.microsoft.com/windows:10.0.19042.1889
 
-FROM ${BASE_IMAGE}
+FROM ${BASE_IMAGE} as dependency_base
+
+RUN powershell.exe Set-ExecutionPolicy -ExecutionPolicy RemoteSigned -Scope LocalMachine
+RUN powershell.exe [Net.ServicePointManager]::Expect100Continue=$true;[Net.ServicePointManager]::SecurityProtocol=[Net.SecurityProtocolType]::Tls,[Net.SecurityProtocolType]::Tls11,[Net.SecurityProtocolType]::Tls12,[Net.SecurityProtocolType]::Ssl3;Invoke-Expression( New-Object System.Net.WebClient ).DownloadString('https://chocolatey.org/install.ps1')
+RUN choco install unzip -y
+
+#
+# Installing TensorRT
+#
+ARG TENSORRT_VERSION
+ARG TENSORRT_ZIP="TensorRT-${TENSORRT_VERSION}.Windows10.x86_64.cuda-12.0.zip"
+ARG TENSORRT_SOURCE=${TENSORRT_ZIP}
+# COPY ${TENSORRT_ZIP} /tmp/${TENSORRT_ZIP}
+ADD ${TENSORRT_SOURCE} /tmp/${TENSORRT_ZIP}
+RUN unzip /tmp/%TENSORRT_ZIP%
+RUN move TensorRT-* TensorRT
+
+LABEL TENSORRT_VERSION="${TENSORRT_VERSION}"
+
+
+#
+# Installing cuDNN
+#
+ARG CUDNN_VERSION
+ARG CUDNN_ZIP=cudnn-windows-x86_64-${CUDNN_VERSION}_cuda12-archive.zip
+ARG CUDNN_SOURCE=${CUDNN_ZIP}
+ADD ${CUDNN_SOURCE} /tmp/${CUDNN_ZIP}
+RUN unzip /tmp/%CUDNN_ZIP%
+RUN move cudnn-* cudnn
+
+LABEL CUDNN_VERSION="${CUDNN_VERSION}"
+
+
+FROM ${BASE_IMAGE} as build_base
 
 SHELL ["cmd", "/S", "/C"]
 
+ARG CUDNN_VERSION
+ENV CUDNN_VERSION ${CUDNN_VERSION}
+COPY --from=dependency_base /cudnn /cudnn
+RUN setx PATH "c:\cudnn\bin;c:\cudnn\lib\x64;c:\cudnn\include;%PATH%"
+LABEL CUDNN_VERSION="${CUDNN_VERSION}"
+
+ARG TENSORRT_VERSION
+ENV TRT_VERSION ${TENSORRT_VERSION}
+COPY --from=dependency_base /TensorRT /TensorRT
+RUN setx PATH "c:\TensorRT\lib;%PATH%"
+LABEL TENSORRT_VERSION="${TENSORRT_VERSION}"
+
 RUN mkdir c:\tmp
 WORKDIR /tmp
 
@@ -40,33 +85,30 @@ RUN powershell.exe [Net.ServicePointManager]::Expect100Continue=$true;[Net.Servi
 RUN choco install git docker unzip -y
 
 #
-# Installing CMake
+# Installing python
 #
-ARG CMAKE_VERSION=3.27.1
-ARG CMAKE_FILE=cmake-${CMAKE_VERSION}-windows-x86_64
-ARG CMAKE_SOURCE=https://github.com/Kitware/CMake/releases/download/v${CMAKE_VERSION}/${CMAKE_FILE}.zip
+ARG PYTHON_VERSION=3.10.11
+ARG PYTHON_SOURCE=https://www.python.org/ftp/python/${PYTHON_VERSION}/python-${PYTHON_VERSION}-amd64.exe
+ADD ${PYTHON_SOURCE} python-${PYTHON_VERSION}-amd64.exe
+RUN python-%PYTHON_VERSION%-amd64.exe /quiet InstallAllUsers=1 PrependPath=1 Include_doc=0 TargetDir="C:\python%PYTHON_VERSION%"
+RUN mklink "C:\python%PYTHON_VERSION%\python3.exe" "C:\python%PYTHON_VERSION%\python.exe"
+RUN pip install --upgrade wheel setuptools docker
+RUN pip install grpcio-tools
 
-ADD ${CMAKE_SOURCE} ${CMAKE_FILE}.zip
-RUN unzip %CMAKE_FILE%.zip
-RUN move %CMAKE_FILE% "c:\CMake"
-RUN setx PATH "c:\CMake\bin;%PATH%"
+LABEL PYTHON_VERSION=${PYTHON_VERSION}
 
+#
+# Installing CMake
+#
+ARG CMAKE_VERSION=3.27.1
+RUN pip install cmake==%CMAKE_VERSION%
 ENV CMAKE_TOOLCHAIN_FILE /vcpkg/scripts/buildsystems/vcpkg.cmake
 ENV VCPKG_TARGET_TRIPLET x64-windows
-
 LABEL CMAKE_VERSION=${CMAKE_VERSION}
 
 # Be aware that pip can interact badly with VS cmd shell so need to pip install before
 # vsdevcmd.bat (see https://bugs.python.org/issue38989)
-ARG PYTHON_VERSION=3.8.10
-ARG PYTHON_SOURCE=https://www.python.org/ftp/python/${PYTHON_VERSION}/python-${PYTHON_VERSION}-amd64.exe
-ADD ${PYTHON_SOURCE} python-${PYTHON_VERSION}-amd64.exe
-RUN python-%PYTHON_VERSION%-amd64.exe /quiet InstallAllUsers=1 PrependPath=1 Include_doc=0 TargetDir="C:\python%PYTHON_VERSION%"
-RUN mklink "C:\python%PYTHON_VERSION%\python3.exe" "C:\python%PYTHON_VERSION%\python.exe"
-RUN pip install --upgrade wheel setuptools docker
-RUN pip install grpcio-tools
 
-LABEL PYTHON_VERSION=${PYTHON_VERSION}
 
 #
 # Installing Visual Studio BuildTools: VS17 2022
@@ -149,43 +191,6 @@ RUN copy "%CUDA_INSTALL_ROOT_WP%\extras\visual_studio_integration\MSBuildExtensi
 RUN setx PATH "%CUDA_INSTALL_ROOT_WP%\bin;%PATH%"
 
 LABEL CUDA_VERSION="${CUDA_VERSION}"
-
-#
-# Installing TensorRT
-#
-ARG TENSORRT_VERSION=8.6.1.6
-ARG TENSORRT_ZIP="TensorRT-${TENSORRT_VERSION}.Windows10.x86_64.cuda-12.0.zip"
-ARG TENSORRT_SOURCE=${TENSORRT_ZIP}
-# COPY ${TENSORRT_ZIP} /tmp/${TENSORRT_ZIP}
-ADD ${TENSORRT_SOURCE} /tmp/${TENSORRT_ZIP}
-
-RUN unzip /tmp/%TENSORRT_ZIP%
-RUN move TensorRT-* TensorRT
-ENV TRT_VERSION ${TENSORRT_VERSION}
-
-RUN setx PATH "c:\TensorRT\lib;%PATH%"
-
-LABEL TENSORRT_VERSION="${TENSORRT_VERSION}"
-
-
-#
-# Installing cuDNN
-#
-ARG CUDNN_VERSION=8.9.7.29
-ARG CUDNN_ZIP=cudnn-windows-x86_64-${CUDNN_VERSION}_cuda12-archive.zip
-ARG CUDNN_SOURCE=${CUDNN_ZIP}
-
-ADD ${CUDNN_SOURCE} /tmp/${CUDNN_ZIP}
-
-RUN unzip /tmp/%CUDNN_ZIP%
-RUN move cudnn-* cudnn
-RUN copy cudnn\bin\cudnn*.dll "%CUDA_INSTALL_ROOT_WP%\bin\."
-RUN copy cudnn\lib\x64\cudnn*.lib "%CUDA_INSTALL_ROOT_WP%\lib\x64\."
-RUN copy cudnn\include\cudnn*.h "%CUDA_INSTALL_ROOT_WP%\include\."
-
-ENV CUDNN_VERSION ${CUDNN_VERSION}
-
-LABEL CUDNN_VERSION="${CUDNN_VERSION}"
 # It is important that the entrypoint initialize VisualStudio
 # environment otherwise the build will fail. Also set
 # CMAKE_TOOLCHAIN_FILE and VCPKG_TARGET_TRIPLET so
diff --git a/README.md b/README.md
index 48e7157a04..ea95c8534f 100644
--- a/README.md
+++ b/README.md
@@ -31,234 +31,4 @@
 [![License](https://img.shields.io/badge/License-BSD3-lightgrey.svg)](https://opensource.org/licenses/BSD-3-Clause)
 
 > [!WARNING]
-> ##### LATEST RELEASE
-> You are currently on the `main` branch which tracks under-development progress towards the next release.
-> The current release is version [2.42.0](https://github.com/triton-inference-server/server/releases/latest) and corresponds to the 24.01 container release on NVIDIA GPU Cloud (NGC).
-
-Triton Inference Server is an open source inference serving software that
-streamlines AI inferencing. Triton enables teams to deploy any AI model from
-multiple deep learning and machine learning frameworks, including TensorRT,
-TensorFlow, PyTorch, ONNX, OpenVINO, Python, RAPIDS FIL, and more. Triton
-Inference Server supports inference across cloud, data center, edge and embedded
-devices on NVIDIA GPUs, x86 and ARM CPU, or AWS Inferentia. Triton Inference
-Server delivers optimized performance for many query types, including real time,
-batched, ensembles and audio/video streaming. Triton inference Server is part of
-[NVIDIA AI Enterprise](https://www.nvidia.com/en-us/data-center/products/ai-enterprise/),
-a software platform that accelerates the data science pipeline and streamlines
-the development and deployment of production AI.
-
-Major features include:
-
-- [Supports multiple deep learning
-  frameworks](https://github.com/triton-inference-server/backend#where-can-i-find-all-the-backends-that-are-available-for-triton)
-- [Supports multiple machine learning
-  frameworks](https://github.com/triton-inference-server/fil_backend)
-- [Concurrent model
-  execution](docs/user_guide/architecture.md#concurrent-model-execution)
-- [Dynamic batching](docs/user_guide/model_configuration.md#dynamic-batcher)
-- [Sequence batching](docs/user_guide/model_configuration.md#sequence-batcher) and
-  [implicit state management](docs/user_guide/architecture.md#implicit-state-management)
-  for stateful models
-- Provides [Backend API](https://github.com/triton-inference-server/backend) that
-  allows adding custom backends and pre/post processing operations
-- Supports writing custom backends in python, a.k.a.
-  [Python-based backends.](https://github.com/triton-inference-server/backend/blob/main/docs/python_based_backends.md#python-based-backends)
-- Model pipelines using
-  [Ensembling](docs/user_guide/architecture.md#ensemble-models) or [Business
-  Logic Scripting
-  (BLS)](https://github.com/triton-inference-server/python_backend#business-logic-scripting)
-- [HTTP/REST and GRPC inference
-  protocols](docs/customization_guide/inference_protocols.md) based on the community
-  developed [KServe
-  protocol](https://github.com/kserve/kserve/tree/master/docs/predict-api/v2)
-- A [C API](docs/customization_guide/inference_protocols.md#in-process-triton-server-api) and
-  [Java API](docs/customization_guide/inference_protocols.md#java-bindings-for-in-process-triton-server-api)
-  allow Triton to link directly into your application for edge and other in-process use cases
-- [Metrics](docs/user_guide/metrics.md) indicating GPU utilization, server
-  throughput, server latency, and more
-
-**New to Triton Inference Server?** Make use of
-[these tutorials](https://github.com/triton-inference-server/tutorials)
-to begin your Triton journey!
-
-Join the [Triton and TensorRT community](https://www.nvidia.com/en-us/deep-learning-ai/triton-tensorrt-newsletter/) and
-stay current on the latest product updates, bug fixes, content, best practices,
-and more.  Need enterprise support?  NVIDIA global support is available for Triton
-Inference Server with the
-[NVIDIA AI Enterprise software suite](https://www.nvidia.com/en-us/data-center/products/ai-enterprise/).
-
-## Serve a Model in 3 Easy Steps
-
-```bash
-# Step 1: Create the example model repository
-git clone -b r24.01 https://github.com/triton-inference-server/server.git
-cd server/docs/examples
-./fetch_models.sh
-
-# Step 2: Launch triton from the NGC Triton container
-docker run --gpus=1 --rm --net=host -v ${PWD}/model_repository:/models nvcr.io/nvidia/tritonserver:24.01-py3 tritonserver --model-repository=/models
-
-# Step 3: Sending an Inference Request
-# In a separate console, launch the image_client example from the NGC Triton SDK container
-docker run -it --rm --net=host nvcr.io/nvidia/tritonserver:24.01-py3-sdk
-/workspace/install/bin/image_client -m densenet_onnx -c 3 -s INCEPTION /workspace/images/mug.jpg
-
-# Inference should return the following
-Image '/workspace/images/mug.jpg':
-    15.346230 (504) = COFFEE MUG
-    13.224326 (968) = CUP
-    10.422965 (505) = COFFEEPOT
-```
-Please read the [QuickStart](docs/getting_started/quickstart.md) guide for additional information
-regarding this example. The quickstart guide also contains an example of how to launch Triton on [CPU-only systems](docs/getting_started/quickstart.md#run-on-cpu-only-system). New to Triton and wondering where to get started? Watch the [Getting Started video](https://youtu.be/NQDtfSi5QF4).
-
-## Examples and Tutorials
-
-Check out [NVIDIA LaunchPad](https://www.nvidia.com/en-us/data-center/products/ai-enterprise-suite/trial/)
-for free access to a set of hands-on labs with Triton Inference Server hosted on
-NVIDIA infrastructure.
-
-Specific end-to-end examples for popular models, such as ResNet, BERT, and DLRM
-are located in the
-[NVIDIA Deep Learning Examples](https://github.com/NVIDIA/DeepLearningExamples)
-page on GitHub. The
-[NVIDIA Developer Zone](https://developer.nvidia.com/nvidia-triton-inference-server)
-contains additional documentation, presentations, and examples.
-
-## Documentation
-
-### Build and Deploy
-
-The recommended way to build and use Triton Inference Server is with Docker
-images.
-
-- [Install Triton Inference Server with Docker containers](docs/customization_guide/build.md#building-with-docker) (*Recommended*)
-- [Install Triton Inference Server without Docker containers](docs/customization_guide/build.md#building-without-docker)
-- [Build a custom Triton Inference Server Docker container](docs/customization_guide/compose.md)
-- [Build Triton Inference Server from source](docs/customization_guide/build.md#building-on-unsupported-platforms)
-- [Build Triton Inference Server for Windows 10](docs/customization_guide/build.md#building-for-windows-10)
-- Examples for deploying Triton Inference Server with Kubernetes and Helm on [GCP](deploy/gcp/README.md),
-  [AWS](deploy/aws/README.md), and [NVIDIA FleetCommand](deploy/fleetcommand/README.md)
-- [Secure Deployment Considerations](docs/customization_guide/deploy.md)
-
-### Using Triton
-
-#### Preparing Models for Triton Inference Server
-
-The first step in using Triton to serve your models is to place one or
-more models into a [model repository](docs/user_guide/model_repository.md). Depending on
-the type of the model and on what Triton capabilities you want to enable for
-the model, you may need to create a [model
-configuration](docs/user_guide/model_configuration.md) for the model.
-
-- [Add custom operations to Triton if needed by your model](docs/user_guide/custom_operations.md)
-- Enable model pipelining with [Model Ensemble](docs/user_guide/architecture.md#ensemble-models)
-  and [Business Logic Scripting (BLS)](https://github.com/triton-inference-server/python_backend#business-logic-scripting)
-- Optimize your models setting [scheduling and batching](docs/user_guide/architecture.md#models-and-schedulers)
-  parameters and [model instances](docs/user_guide/model_configuration.md#instance-groups).
-- Use the [Model Analyzer tool](https://github.com/triton-inference-server/model_analyzer)
-  to help optimize your model configuration with profiling
-- Learn how to [explicitly manage what models are available by loading and
-  unloading models](docs/user_guide/model_management.md)
-
-#### Configure and Use Triton Inference Server
-
-- Read the [Quick Start Guide](docs/getting_started/quickstart.md) to run Triton Inference
-  Server on both GPU and CPU
-- Triton supports multiple execution engines, called
-  [backends](https://github.com/triton-inference-server/backend#where-can-i-find-all-the-backends-that-are-available-for-triton), including
-  [TensorRT](https://github.com/triton-inference-server/tensorrt_backend),
-  [TensorFlow](https://github.com/triton-inference-server/tensorflow_backend),
-  [PyTorch](https://github.com/triton-inference-server/pytorch_backend),
-  [ONNX](https://github.com/triton-inference-server/onnxruntime_backend),
-  [OpenVINO](https://github.com/triton-inference-server/openvino_backend),
-  [Python](https://github.com/triton-inference-server/python_backend), and more
-- Not all the above backends are supported on every platform supported by Triton.
-  Look at the
-  [Backend-Platform Support Matrix](https://github.com/triton-inference-server/backend/blob/main/docs/backend_platform_support_matrix.md)
-  to learn which backends are supported on your target platform.
-- Learn how to [optimize performance](docs/user_guide/optimization.md) using the
-  [Performance Analyzer](https://github.com/triton-inference-server/client/blob/main/src/c++/perf_analyzer/README.md)
-  and
-  [Model Analyzer](https://github.com/triton-inference-server/model_analyzer)
-- Learn how to [manage loading and unloading models](docs/user_guide/model_management.md) in
-  Triton
-- Send requests directly to Triton with the [HTTP/REST JSON-based
-  or gRPC protocols](docs/customization_guide/inference_protocols.md#httprest-and-grpc-protocols)
-
-#### Client Support and Examples
-
-A Triton *client* application sends inference and other requests to Triton. The
-[Python and C++ client libraries](https://github.com/triton-inference-server/client)
-provide APIs to simplify this communication.
-
-- Review client examples for [C++](https://github.com/triton-inference-server/client/blob/main/src/c%2B%2B/examples),
-  [Python](https://github.com/triton-inference-server/client/blob/main/src/python/examples),
-  and [Java](https://github.com/triton-inference-server/client/blob/main/src/java/src/main/java/triton/client/examples)
-- Configure [HTTP](https://github.com/triton-inference-server/client#http-options)
-  and [gRPC](https://github.com/triton-inference-server/client#grpc-options)
-  client options
-- Send input data (e.g. a jpeg image) directly to Triton in the [body of an HTTP
-  request without any additional metadata](https://github.com/triton-inference-server/server/blob/main/docs/protocol/extension_binary_data.md#raw-binary-request)
-
-### Extend Triton
-
-[Triton Inference Server's architecture](docs/user_guide/architecture.md) is specifically
-designed for modularity and flexibility
-
-- [Customize Triton Inference Server container](docs/customization_guide/compose.md) for your use case
-- [Create custom backends](https://github.com/triton-inference-server/backend)
-  in either [C/C++](https://github.com/triton-inference-server/backend/blob/main/README.md#triton-backend-api)
-  or [Python](https://github.com/triton-inference-server/python_backend)
-- Create [decoupled backends and models](docs/user_guide/decoupled_models.md) that can send
-  multiple responses for a request or not send any responses for a request
-- Use a [Triton repository agent](docs/customization_guide/repository_agents.md) to add functionality
-  that operates when a model is loaded and unloaded, such as authentication,
-  decryption, or conversion
-- Deploy Triton on [Jetson and JetPack](docs/user_guide/jetson.md)
-- [Use Triton on AWS
-   Inferentia](https://github.com/triton-inference-server/python_backend/tree/main/inferentia)
-
-### Additional Documentation
-
-- [FAQ](docs/user_guide/faq.md)
-- [User Guide](docs/README.md#user-guide)
-- [Customization Guide](docs/README.md#customization-guide)
-- [Release Notes](https://docs.nvidia.com/deeplearning/triton-inference-server/release-notes/index.html)
-- [GPU, Driver, and CUDA Support
-Matrix](https://docs.nvidia.com/deeplearning/dgx/support-matrix/index.html)
-
-## Contributing
-
-Contributions to Triton Inference Server are more than welcome. To
-contribute please review the [contribution
-guidelines](CONTRIBUTING.md). If you have a backend, client,
-example or similar contribution that is not modifying the core of
-Triton, then you should file a PR in the [contrib
-repo](https://github.com/triton-inference-server/contrib).
-
-## Reporting problems, asking questions
-
-We appreciate any feedback, questions or bug reporting regarding this project.
-When posting [issues in GitHub](https://github.com/triton-inference-server/server/issues),
-follow the process outlined in the [Stack Overflow document](https://stackoverflow.com/help/mcve).
-Ensure posted examples are:
-- minimal – use as little code as possible that still produces the
-  same problem
-- complete – provide all parts needed to reproduce the problem. Check
-  if you can strip external dependencies and still show the problem. The
-  less time we spend on reproducing problems the more time we have to
-  fix it
-- verifiable – test the code you're about to provide to make sure it
-  reproduces the problem. Remove all other problems that are not
-  related to your request/question.
-
-For issues, please use the provided bug report and feature request templates.
-
-For questions, we recommend posting in our community
-[GitHub Discussions.](https://github.com/triton-inference-server/server/discussions)
-
-## For more information
-
-Please refer to the [NVIDIA Developer Triton page](https://developer.nvidia.com/nvidia-triton-inference-server)
-for more information.
+> THIS BRANCH IS UNDER DEVELOPENT AND IS NOT YET STABLE.44
\ No newline at end of file
diff --git a/TRITON_VERSION b/TRITON_VERSION
index 8a3555062d..5b9cd9afd5 100644
--- a/TRITON_VERSION
+++ b/TRITON_VERSION
@@ -1 +1 @@
-2.43.0dev
+2.43.0
diff --git a/build.py b/build.py
index 60ef90e441..325f0904e4 100755
--- a/build.py
+++ b/build.py
@@ -70,9 +70,9 @@
 # incorrectly load the other version of the openvino libraries.
 #
 TRITON_VERSION_MAP = {
-    "2.43.0dev": (
-        "24.02dev",  # triton container
-        "24.01",  # upstream container
+    "2.43.0": (
+        "24.02",  # triton container
+        "24.02",  # upstream container
         "1.16.3",  # ORT
         "2023.0.0",  # ORT OpenVINO
         "2023.0.0",  # Standalone OpenVINO
@@ -1337,7 +1337,7 @@ def add_cpu_libs_to_linux_dockerfile(backends, target_machine):
 COPY --from=min_container /opt/hpcx/ucx/lib/libucs.so.0 /opt/hpcx/ucx/lib/libucs.so.0
 COPY --from=min_container /opt/hpcx/ucx/lib/libuct.so.0 /opt/hpcx/ucx/lib/libuct.so.0
 
-COPY --from=min_container /usr/lib/{libs_arch}-linux-gnu/libcudnn.so.8 /usr/lib/{libs_arch}-linux-gnu/libcudnn.so.8
+COPY --from=min_container /usr/lib/{libs_arch}-linux-gnu/libcudnn.so.9 /usr/lib/{libs_arch}-linux-gnu/libcudnn.so.9
 
 # patchelf is needed to add deps of libcublasLt.so.12 to libtorch_cuda.so
 RUN apt-get update && \
diff --git a/deploy/aws/values.yaml b/deploy/aws/values.yaml
index aa15a50085..16ed8323d7 100644
--- a/deploy/aws/values.yaml
+++ b/deploy/aws/values.yaml
@@ -27,7 +27,7 @@
 replicaCount: 1
 
 image:
-  imageName: nvcr.io/nvidia/tritonserver:24.01-py3
+  imageName: nvcr.io/nvidia/tritonserver:24.02-py3
   pullPolicy: IfNotPresent
   modelRepositoryPath: s3://triton-inference-server-repository/model_repository
   numGpus: 1
diff --git a/deploy/fleetcommand/Chart.yaml b/deploy/fleetcommand/Chart.yaml
index 04998a3254..ad83541baf 100644
--- a/deploy/fleetcommand/Chart.yaml
+++ b/deploy/fleetcommand/Chart.yaml
@@ -26,7 +26,7 @@
 
 apiVersion: v1
 # appVersion is the Triton version; update when changing release
-appVersion: "2.42.0"
+appVersion: "2.43.0"
 description: Triton Inference Server (Fleet Command)
 name: triton-inference-server
 # version is the Chart version; update when changing anything in the chart
diff --git a/deploy/fleetcommand/values.yaml b/deploy/fleetcommand/values.yaml
index 35a778bfc4..655185c6a9 100644
--- a/deploy/fleetcommand/values.yaml
+++ b/deploy/fleetcommand/values.yaml
@@ -27,7 +27,7 @@
 replicaCount: 1
 
 image:
-  imageName: nvcr.io/nvidia/tritonserver:24.01-py3
+  imageName: nvcr.io/nvidia/tritonserver:24.02-py3
   pullPolicy: IfNotPresent
   numGpus: 1
   serverCommand: tritonserver
@@ -46,13 +46,13 @@ image:
     # Model Control Mode (Optional, default: none)
     #
     # To set model control mode, uncomment and configure below
-    # See https://github.com/triton-inference-server/server/blob/r24.01/docs/model_management.md
+    # See https://github.com/triton-inference-server/server/blob/r24.02/docs/model_management.md
     #  for more details
     #- --model-control-mode=explicit|poll|none
     #
     # Additional server args
     #
-    # see https://github.com/triton-inference-server/server/blob/r24.01/README.md
+    # see https://github.com/triton-inference-server/server/blob/r24.02/README.md
     #  for more details
 
 service:
diff --git a/deploy/gcp/values.yaml b/deploy/gcp/values.yaml
index 680bc266bc..264005b539 100644
--- a/deploy/gcp/values.yaml
+++ b/deploy/gcp/values.yaml
@@ -27,7 +27,7 @@
 replicaCount: 1
 
 image:
-  imageName: nvcr.io/nvidia/tritonserver:24.01-py3
+  imageName: nvcr.io/nvidia/tritonserver:24.02-py3
   pullPolicy: IfNotPresent
   modelRepositoryPath: gs://triton-inference-server-repository/model_repository
   numGpus: 1
diff --git a/deploy/gke-marketplace-app/benchmark/perf-analyzer-script/triton_client.yaml b/deploy/gke-marketplace-app/benchmark/perf-analyzer-script/triton_client.yaml
index 80b4013645..a0d931f42d 100644
--- a/deploy/gke-marketplace-app/benchmark/perf-analyzer-script/triton_client.yaml
+++ b/deploy/gke-marketplace-app/benchmark/perf-analyzer-script/triton_client.yaml
@@ -33,7 +33,7 @@ metadata:
   namespace: default
 spec:
   containers:
-  - image: nvcr.io/nvidia/tritonserver:24.01-py3-sdk
+  - image: nvcr.io/nvidia/tritonserver:24.02-py3-sdk
     imagePullPolicy: Always
     name: nv-triton-client
     securityContext:
diff --git a/deploy/gke-marketplace-app/server-deployer/build_and_push.sh b/deploy/gke-marketplace-app/server-deployer/build_and_push.sh
index 8c55ca8b90..952498c53f 100755
--- a/deploy/gke-marketplace-app/server-deployer/build_and_push.sh
+++ b/deploy/gke-marketplace-app/server-deployer/build_and_push.sh
@@ -28,8 +28,8 @@
 export REGISTRY=gcr.io/$(gcloud config get-value project | tr ':' '/')
 export APP_NAME=tritonserver
 export MAJOR_VERSION=2.41
-export MINOR_VERSION=2.42.0
-export NGC_VERSION=24.01-py3
+export MINOR_VERSION=2.43.0
+export NGC_VERSION=24.02-py3
 
 docker pull nvcr.io/nvidia/$APP_NAME:$NGC_VERSION
 
diff --git a/deploy/gke-marketplace-app/server-deployer/chart/triton/Chart.yaml b/deploy/gke-marketplace-app/server-deployer/chart/triton/Chart.yaml
index bdb2725d7c..d973852daf 100644
--- a/deploy/gke-marketplace-app/server-deployer/chart/triton/Chart.yaml
+++ b/deploy/gke-marketplace-app/server-deployer/chart/triton/Chart.yaml
@@ -28,4 +28,4 @@ apiVersion: v1
 appVersion: "2.41"
 description: Triton Inference Server
 name: triton-inference-server
-version: 2.42.0
+version: 2.43.0
diff --git a/deploy/gke-marketplace-app/server-deployer/chart/triton/values.yaml b/deploy/gke-marketplace-app/server-deployer/chart/triton/values.yaml
index 23728a6d91..3890f2b2f2 100644
--- a/deploy/gke-marketplace-app/server-deployer/chart/triton/values.yaml
+++ b/deploy/gke-marketplace-app/server-deployer/chart/triton/values.yaml
@@ -32,13 +32,13 @@ tritonProtocol: HTTP
 # HPA GPU utilization autoscaling target
 HPATargetAverageValue: 85
 modelRepositoryPath: gs://triton_sample_models/23_12
-publishedVersion: '2.42.0'
+publishedVersion: '2.43.0'
 gcpMarketplace: true
 
 image:
   registry: gcr.io
   repository: nvidia-ngc-public/tritonserver
-  tag: 24.01-py3
+  tag: 24.02-py3
   pullPolicy: IfNotPresent
   # modify the model repository here to match your GCP storage bucket
   numGpus: 1
diff --git a/deploy/gke-marketplace-app/server-deployer/data-test/schema.yaml b/deploy/gke-marketplace-app/server-deployer/data-test/schema.yaml
index 54e6498202..57ec3e892d 100644
--- a/deploy/gke-marketplace-app/server-deployer/data-test/schema.yaml
+++ b/deploy/gke-marketplace-app/server-deployer/data-test/schema.yaml
@@ -27,7 +27,7 @@
 x-google-marketplace:
   schemaVersion: v2
   applicationApiVersion: v1beta1
-  publishedVersion: '2.42.0'
+  publishedVersion: '2.43.0'
   publishedVersionMetadata:
     releaseNote: >-
       Initial release.
diff --git a/deploy/gke-marketplace-app/server-deployer/schema.yaml b/deploy/gke-marketplace-app/server-deployer/schema.yaml
index f2f93f12de..1af8f82928 100644
--- a/deploy/gke-marketplace-app/server-deployer/schema.yaml
+++ b/deploy/gke-marketplace-app/server-deployer/schema.yaml
@@ -27,7 +27,7 @@
 x-google-marketplace:
   schemaVersion: v2
   applicationApiVersion: v1beta1
-  publishedVersion: '2.42.0'
+  publishedVersion: '2.43.0'
   publishedVersionMetadata:
     releaseNote: >-
       Initial release.
diff --git a/deploy/gke-marketplace-app/trt-engine/README.md b/deploy/gke-marketplace-app/trt-engine/README.md
index cba0095594..8367057a33 100644
--- a/deploy/gke-marketplace-app/trt-engine/README.md
+++ b/deploy/gke-marketplace-app/trt-engine/README.md
@@ -33,7 +33,7 @@
 ```
 docker run --gpus all -it --network host \
     --shm-size=1g --ulimit memlock=-1 --ulimit stack=67108864 \
-    -v ~:/scripts nvcr.io/nvidia/tensorrt:24.01-py3
+    -v ~:/scripts nvcr.io/nvidia/tensorrt:24.02-py3
 
 pip install onnx six torch tf2onnx tensorflow
 
diff --git a/deploy/k8s-onprem/values.yaml b/deploy/k8s-onprem/values.yaml
index 54f0480d7f..6b0fd27acd 100644
--- a/deploy/k8s-onprem/values.yaml
+++ b/deploy/k8s-onprem/values.yaml
@@ -29,7 +29,7 @@ tags:
   loadBalancing: true
 
 image:
-  imageName: nvcr.io/nvidia/tritonserver:24.01-py3
+  imageName: nvcr.io/nvidia/tritonserver:24.02-py3
   pullPolicy: IfNotPresent
   modelRepositoryServer: < Replace with the IP Address of your file server >
   modelRepositoryPath: /srv/models
diff --git a/deploy/oci/values.yaml b/deploy/oci/values.yaml
index dad31bc412..b8db949363 100644
--- a/deploy/oci/values.yaml
+++ b/deploy/oci/values.yaml
@@ -27,7 +27,7 @@
 replicaCount: 1
 
 image:
-  imageName: nvcr.io/nvidia/tritonserver:24.01-py3
+  imageName: nvcr.io/nvidia/tritonserver:24.02-py3
   pullPolicy: IfNotPresent
   modelRepositoryPath: s3://https://<OCI_NAMESPACE>.compat.objectstorage.<OCI_REGION>.oraclecloud.com:443/triton-inference-server-repository
   numGpus: 1
diff --git a/docs/customization_guide/build.md b/docs/customization_guide/build.md
index 70a258e60e..4c1cf44e78 100644
--- a/docs/customization_guide/build.md
+++ b/docs/customization_guide/build.md
@@ -173,7 +173,7 @@ $ ./build.py ... --repo-tag=common:<container tag> --repo-tag=core:<container ta
 
 If you are building on a release branch then `<container tag>` will
 default to the branch name. For example, if you are building on the
-r24.01 branch, `<container tag>` will default to r24.01. If you are
+r24.02 branch, `<container tag>` will default to r24.02. If you are
 building on any other branch (including the *main* branch) then
 `<container tag>` will default to "main". Therefore, you typically do
 not need to provide `<container tag>` at all (nor the preceding
@@ -334,8 +334,8 @@ python build.py --cmake-dir=<path/to/repo>/build --build-dir=/tmp/citritonbuild
 If you are building on *main* branch then '<container tag>' will
 default to "main". If you are building on a release branch then
 '<container tag>' will default to the branch name. For example, if you
-are building on the r24.01 branch, '<container tag>' will default to
-r24.01. Therefore, you typically do not need to provide '<container
+are building on the r24.02 branch, '<container tag>' will default to
+r24.02. Therefore, you typically do not need to provide '<container
 tag>' at all (nor the preceding colon). You can use a different
 '<container tag>' for a component to instead use the corresponding
 branch/tag in the build. For example, if you have a branch called
diff --git a/docs/customization_guide/compose.md b/docs/customization_guide/compose.md
index b632535a98..859ce91eba 100644
--- a/docs/customization_guide/compose.md
+++ b/docs/customization_guide/compose.md
@@ -44,8 +44,8 @@ from source to get more exact customization.
 The `compose.py` script can be found in the [server repository](https://github.com/triton-inference-server/server).
 Simply clone the repository and run `compose.py` to create a custom container.
 Note: Created container version will depend on the branch that was cloned.
-For example branch [r24.01](https://github.com/triton-inference-server/server/tree/r24.01)
-should be used to create a image based on the NGC 24.01 Triton release.
+For example branch [r24.02](https://github.com/triton-inference-server/server/tree/r24.02)
+should be used to create a image based on the NGC 24.02 Triton release.
 
 `compose.py` provides `--backend`, `--repoagent` options that allow you to
 specify which backends and repository agents to include in the custom image.
@@ -76,19 +76,19 @@ For example, running
 ```
 python3 compose.py --backend tensorflow1 --repoagent checksum
 ```
-on branch [r24.01](https://github.com/triton-inference-server/server/tree/r24.01) pulls:
-- `min` container `nvcr.io/nvidia/tritonserver:24.01-py3-min`
-- `full` container `nvcr.io/nvidia/tritonserver:24.01-py3`
+on branch [r24.02](https://github.com/triton-inference-server/server/tree/r24.02) pulls:
+- `min` container `nvcr.io/nvidia/tritonserver:24.02-py3-min`
+- `full` container `nvcr.io/nvidia/tritonserver:24.02-py3`
 
 Alternatively, users can specify the version of Triton container to pull from any branch by either:
 1. Adding flag `--container-version <container version>` to branch
 ```
-python3 compose.py --backend tensorflow1 --repoagent checksum --container-version 24.01
+python3 compose.py --backend tensorflow1 --repoagent checksum --container-version 24.02
 ```
 2. Specifying `--image min,<min container image name> --image full,<full container image name>`.
    The user is responsible for specifying compatible `min` and `full` containers.
 ```
-python3 compose.py --backend tensorflow1 --repoagent checksum --image min,nvcr.io/nvidia/tritonserver:24.01-py3-min --image full,nvcr.io/nvidia/tritonserver:24.01-py3
+python3 compose.py --backend tensorflow1 --repoagent checksum --image min,nvcr.io/nvidia/tritonserver:24.02-py3-min --image full,nvcr.io/nvidia/tritonserver:24.02-py3
 ```
 Method 1 and 2 will result in the same composed container. Furthermore, `--image` flag overrides the `--container-version` flag when both are specified.
 
diff --git a/docs/customization_guide/test.md b/docs/customization_guide/test.md
index 43144d6180..baa2676b6a 100644
--- a/docs/customization_guide/test.md
+++ b/docs/customization_guide/test.md
@@ -49,7 +49,7 @@ $ ./gen_qa_custom_ops
 ```
 
 This will create multiple model repositories in /tmp/<version>/qa_*
-(for example /tmp/24.01/qa_model_repository).  The TensorRT models
+(for example /tmp/24.02/qa_model_repository).  The TensorRT models
 will be created for the GPU on the system that CUDA considers device 0
 (zero). If you have multiple GPUs on your system see the documentation
 in the scripts for how to target a specific GPU.
diff --git a/docs/user_guide/custom_operations.md b/docs/user_guide/custom_operations.md
index 048860a24d..c82760544e 100644
--- a/docs/user_guide/custom_operations.md
+++ b/docs/user_guide/custom_operations.md
@@ -64,7 +64,7 @@ simple way to ensure you are using the correct version of TensorRT is
 to use the [NGC TensorRT
 container](https://ngc.nvidia.com/catalog/containers/nvidia:tensorrt)
 corresponding to the Triton container. For example, if you are using
-the 24.01 version of Triton, use the 24.01 version of the TensorRT
+the 24.02 version of Triton, use the 24.02 version of the TensorRT
 container.
 
 ## TensorFlow
@@ -123,7 +123,7 @@ simple way to ensure you are using the correct version of TensorFlow
 is to use the [NGC TensorFlow
 container](https://ngc.nvidia.com/catalog/containers/nvidia:tensorflow)
 corresponding to the Triton container. For example, if you are using
-the 24.01 version of Triton, use the 24.01 version of the TensorFlow
+the 24.02 version of Triton, use the 24.02 version of the TensorFlow
 container.
 
 ## PyTorch
@@ -167,7 +167,7 @@ simple way to ensure you are using the correct version of PyTorch is
 to use the [NGC PyTorch
 container](https://ngc.nvidia.com/catalog/containers/nvidia:pytorch)
 corresponding to the Triton container. For example, if you are using
-the 24.01 version of Triton, use the 24.01 version of the PyTorch
+the 24.02 version of Triton, use the 24.02 version of the PyTorch
 container.
 
 ## ONNX
diff --git a/docs/user_guide/metrics.md b/docs/user_guide/metrics.md
index d79e0aa256..e226d02763 100644
--- a/docs/user_guide/metrics.md
+++ b/docs/user_guide/metrics.md
@@ -285,7 +285,7 @@ If building Triton locally, the `TRITON_ENABLE_METRICS_CPU` CMake build flag can
 
 ## Pinned Memory Metrics
 
-Starting in 24.01, Triton offers Pinned Memory metrics to monitor the utilization of the Pinned Memory pool.
+Starting in 24.02, Triton offers Pinned Memory metrics to monitor the utilization of the Pinned Memory pool.
 
 |Category        |Metric            |Metric Name                 |Description                                            |Granularity|Frequency    |
 |----------------|------------------|----------------------------|-------------------------------------------------------|-----------|-------------|
diff --git a/docs/user_guide/performance_tuning.md b/docs/user_guide/performance_tuning.md
index 6f0d3d24e8..b118eb3953 100644
--- a/docs/user_guide/performance_tuning.md
+++ b/docs/user_guide/performance_tuning.md
@@ -235,7 +235,7 @@ with a `tritonserver` binary.
 
 ```bash
 # Start server container
-docker run -ti --rm --gpus=all --network=host -v $PWD:/mnt --name triton-server nvcr.io/nvidia/tritonserver:24.01-py3
+docker run -ti --rm --gpus=all --network=host -v $PWD:/mnt --name triton-server nvcr.io/nvidia/tritonserver:24.02-py3
 
 # Start serving your models
 tritonserver --model-repository=/mnt/models
@@ -284,7 +284,7 @@ by setting the `-u` flag, such as `perf_analyzer -m densenet_onnx -u
 
 ```bash
 # Start the SDK container interactively
-docker run -ti --rm --gpus=all --network=host -v $PWD:/mnt --name triton-client nvcr.io/nvidia/tritonserver:24.01-py3-sdk
+docker run -ti --rm --gpus=all --network=host -v $PWD:/mnt --name triton-client nvcr.io/nvidia/tritonserver:24.02-py3-sdk
 
 # Benchmark model being served from step 3
 perf_analyzer -m densenet_onnx --concurrency-range 1:4
diff --git a/docs/user_guide/trace.md b/docs/user_guide/trace.md
index 1abe019ea1..516135fcc4 100644
--- a/docs/user_guide/trace.md
+++ b/docs/user_guide/trace.md
@@ -595,7 +595,7 @@ The following table shows available OpenTelemetry trace APIs settings for
 ### OpenTelemetry Context Propagation
 
 Triton supports [context propagation](https://opentelemetry.io/docs/concepts/context-propagation/)
-in OpenTelemetry mode starting in version 24.01. Note, that every request
+in OpenTelemetry mode starting in version 24.02. Note, that every request
 with propagated OpenTelemetry context will be traced, regardless of `rate` and
 `count` trace settings. If a user wishes to trace only those requests, for which
 OpenTelemetry context was injected on the client side, please start Triton with
diff --git a/qa/common/cuda_op_kernel.cu.cc.patch b/qa/common/cuda_op_kernel.cu.cc.patch
index 24d915aa20..617521a0f9 100644
--- a/qa/common/cuda_op_kernel.cu.cc.patch
+++ b/qa/common/cuda_op_kernel.cu.cc.patch
@@ -7,7 +7,7 @@ index a9d66f9..a92e218 100644
 
  #if GOOGLE_CUDA
 -#define EIGEN_USE_GPU
--#include "third_party/eigen3/unsupported/Eigen/CXX11/Tensor"
+-#include "unsupported/Eigen/CXX11/Tensor"  // from @eigen_archive
 -#include "tensorflow/core/util/gpu_kernel_helper.h"
 -#include "tensorflow/core/util/gpu_launch_config.h"
 +//#define EIGEN_USE_GPU
diff --git a/qa/common/gen_jetson_trt_models b/qa/common/gen_jetson_trt_models
index 97b613cdc7..7d1416598a 100755
--- a/qa/common/gen_jetson_trt_models
+++ b/qa/common/gen_jetson_trt_models
@@ -34,7 +34,7 @@
 # Make all generated files accessible outside of container
 umask 0000
 # Set the version of the models
-TRITON_VERSION=${TRITON_VERSION:=24.01}
+TRITON_VERSION=${TRITON_VERSION:=24.02}
 # Set the CUDA device to use
 CUDA_DEVICE=${RUNNER_ID:=0}
 # Set TensorRT image
diff --git a/qa/common/gen_qa_custom_ops b/qa/common/gen_qa_custom_ops
index ab7caf4bc2..d13cff36af 100755
--- a/qa/common/gen_qa_custom_ops
+++ b/qa/common/gen_qa_custom_ops
@@ -37,7 +37,7 @@
 ##
 ############################################################################
 
-TRITON_VERSION=${TRITON_VERSION:=24.01}
+TRITON_VERSION=${TRITON_VERSION:=24.02}
 NVIDIA_UPSTREAM_VERSION=${NVIDIA_UPSTREAM_VERSION:=$TRITON_VERSION}
 TENSORFLOW_IMAGE=${TENSORFLOW_IMAGE:=nvcr.io/nvidia/tensorflow:$NVIDIA_UPSTREAM_VERSION-tf2-py3}
 PYTORCH_IMAGE=${PYTORCH_IMAGE:=nvcr.io/nvidia/pytorch:$NVIDIA_UPSTREAM_VERSION-py3}
diff --git a/qa/common/gen_qa_model_repository b/qa/common/gen_qa_model_repository
index ad6ebcd827..8c7f958e25 100755
--- a/qa/common/gen_qa_model_repository
+++ b/qa/common/gen_qa_model_repository
@@ -48,7 +48,7 @@
 ##
 ############################################################################
 
-TRITON_VERSION=${TRITON_VERSION:=24.01}
+TRITON_VERSION=${TRITON_VERSION:=24.02}
 
 # ONNX. Use ONNX_OPSET 0 to use the default for ONNX version
 ONNX_VERSION=1.13.0