Skip to content

Commit

Permalink
Merge branch 'main' of github.com:triton-inference-server/server into…
Browse files Browse the repository at this point in the history
… yinggeh-upgrade-openvino-model-version-24.12
  • Loading branch information
yinggeh committed Dec 20, 2024
2 parents 1abd8fc + 0194c3d commit d2bdd8d
Show file tree
Hide file tree
Showing 50 changed files with 6,229 additions and 1,006 deletions.
42 changes: 28 additions & 14 deletions build.py
Original file line number Diff line number Diff line change
Expand Up @@ -565,7 +565,7 @@ def backend_cmake_args(images, components, be, install_dir, library_paths):
elif be == "tensorflow":
args = tensorflow_cmake_args(images, library_paths)
elif be == "python":
args = []
args = python_cmake_args()
elif be == "dali":
args = dali_cmake_args()
elif be == "pytorch":
Expand Down Expand Up @@ -631,6 +631,18 @@ def backend_cmake_args(images, components, be, install_dir, library_paths):
return cargs


def python_cmake_args():
cargs = []
if target_platform() == "rhel":
cargs.append(
cmake_backend_arg(
"python", "PYBIND11_PYTHON_VERSION", "STRING", FLAGS.rhel_py_version
)
)

return cargs


def pytorch_cmake_args(images):
if "pytorch" in images:
image = images["pytorch"]
Expand Down Expand Up @@ -924,6 +936,7 @@ def create_dockerfile_buildbase_rhel(ddir, dockerfile_name, argmap):
ARG TRITON_VERSION
ARG TRITON_CONTAINER_VERSION
ENV PIP_BREAK_SYSTEM_PACKAGES=1
"""
df += """
# Install docker docker buildx
Expand Down Expand Up @@ -957,6 +970,10 @@ def create_dockerfile_buildbase_rhel(ddir, dockerfile_name, argmap):
pkg-config \\
unzip \\
wget \\
ncurses-devel \\
readline-devel \\
xz-devel \\
bzip2-devel \\
zlib-devel \\
libarchive-devel \\
libxml2-devel \\
Expand Down Expand Up @@ -1025,6 +1042,7 @@ def create_dockerfile_buildbase(ddir, dockerfile_name, argmap):
ARG TRITON_VERSION
ARG TRITON_CONTAINER_VERSION
ENV PIP_BREAK_SYSTEM_PACKAGES=1
"""
# Install the windows- or linux-specific buildbase dependencies
if target_platform() == "windows":
Expand All @@ -1035,7 +1053,6 @@ def create_dockerfile_buildbase(ddir, dockerfile_name, argmap):
df += """
# Ensure apt-get won't prompt for selecting options
ENV DEBIAN_FRONTEND=noninteractive
ENV PIP_BREAK_SYSTEM_PACKAGES=1
# Install docker docker buildx
RUN apt-get update \\
Expand Down Expand Up @@ -1159,6 +1176,7 @@ def create_dockerfile_cibase(ddir, dockerfile_name, argmap):
ENV TRITON_SERVER_VERSION ${TRITON_VERSION}
ENV NVIDIA_TRITON_SERVER_VERSION ${TRITON_CONTAINER_VERSION}
ENV PIP_BREAK_SYSTEM_PACKAGES=1
"""

with open(os.path.join(ddir, dockerfile_name), "w") as dfile:
Expand Down Expand Up @@ -1198,6 +1216,8 @@ def create_dockerfile_linux(
## Production stage: Create container with just inference server executable
############################################################################
FROM ${BASE_IMAGE}
ENV PIP_BREAK_SYSTEM_PACKAGES=1
"""

df += dockerfile_prepare_container_linux(
Expand Down Expand Up @@ -1399,7 +1419,6 @@ def dockerfile_prepare_container_linux(argmap, backends, enable_gpu, target_mach
if "python" in backends:
if target_platform() == "rhel":
df += """
ENV PIP_BREAK_SYSTEM_PACKAGES=1
# python3, python3-pip and some pip installs required for the python backend
RUN yum install -y \\
libarchive-devel \\
Expand All @@ -1418,7 +1437,6 @@ def dockerfile_prepare_container_linux(argmap, backends, enable_gpu, target_mach
"""
else:
df += """
ENV PIP_BREAK_SYSTEM_PACKAGES=1
# python3, python3-pip and some pip installs required for the python backend
RUN apt-get update \\
&& apt-get install -y --no-install-recommends \\
Expand Down Expand Up @@ -1542,7 +1560,7 @@ def add_cpu_libs_to_linux_dockerfile(backends, target_machine):


def change_default_python_version_rhel(version):
df = """
df = f"""
# The python library version available for install via 'yum install python3.X-devel' does not
# match the version of python inside the RHEL base container. This means that python packages
# installed within the container will not be picked up by the python backend stub process pybind
Expand All @@ -1551,21 +1569,17 @@ def change_default_python_version_rhel(version):
RUN curl https://pyenv.run | bash
ENV PATH="${{PYENV_ROOT}}/bin:$PATH"
RUN eval "$(pyenv init -)"
RUN CONFIGURE_OPTS=\"--with-openssl=/usr/lib64\" && pyenv install {} \\
&& cp ${{PYENV_ROOT}}/versions/{}/lib/libpython3* /usr/lib64/""".format(
version, version
)
df += """
RUN CONFIGURE_OPTS=\"--with-openssl=/usr/lib64\" && pyenv install {version} \\
&& cp ${{PYENV_ROOT}}/versions/{version}/lib/libpython3* /usr/lib64/
# RHEL image has several python versions. It's important
# to set the correct version, otherwise, packages that are
# pip installed will not be found during testing.
ENV PYVER={} PYTHONPATH=/opt/python/v
ENV PYVER={version} PYTHONPATH=/opt/python/v
RUN ln -sf ${{PYENV_ROOT}}/versions/${{PYVER}}* ${{PYTHONPATH}}
ENV PYBIN=${{PYTHONPATH}}/bin
ENV PYTHON_BIN_PATH=${{PYBIN}}/python${{PYVER}} PATH=${{PYBIN}}:${{PATH}}
""".format(
version
)
"""
return df


Expand Down
9 changes: 8 additions & 1 deletion docs/Dockerfile.docs
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
# Copyright 2022, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
# Copyright 2022-2024, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions
Expand Down Expand Up @@ -59,6 +59,7 @@ RUN pip3 install \
breathe \
docutils \
exhale \
httplib2 \
ipython \
myst-nb \
nbclient \
Expand All @@ -73,6 +74,12 @@ RUN pip3 install \
sphinx-tabs \
sphinxcontrib-bibtex


# install nvidia-sphinx-theme
RUN pip3 install \
--index-url https://urm.nvidia.com/artifactory/api/pypi/ct-omniverse-pypi/simple/ \
nvidia-sphinx-theme

# Set visitor script to be included on every HTML page
ENV VISITS_COUNTING_SCRIPT="//assets.adobedtm.com/b92787824f2e0e9b68dc2e993f9bd995339fe417/satelliteLib-7ba51e58dc61bcb0e9311aadd02a0108ab24cc6c.js"

6 changes: 3 additions & 3 deletions docs/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -124,9 +124,9 @@ Triton supports batching individual inference requests to improve compute resour
- [Queuing Policies](user_guide/model_configuration.md#queue-policy)
- [Ragged Batching](user_guide/ragged_batching.md)
- [Sequence Batcher](user_guide/model_configuration.md#sequence-batcher)
- [Stateful Models](user_guide/architecture.md#stateful-models)
- [Control Inputs](user_guide/architecture.md#control-inputs)
- [Implicit State - Stateful Inference Using a Stateless Model](user_guide/architecture.md#implicit-state-management)
- [Stateful Models](user_guide/model_execution.md#stateful-models)
- [Control Inputs](user_guide/model_execution.md#control-inputs)
- [Implicit State - Stateful Inference Using a Stateless Model](user_guide/implicit_state_management.md#implicit-state-management)
- [Sequence Scheduling Strategies](user_guide/architecture.md#scheduling-strategies)
- [Direct](user_guide/architecture.md#direct)
- [Oldest](user_guide/architecture.md#oldest)
Expand Down
11 changes: 11 additions & 0 deletions docs/backend_guide/vllm.rst
Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@
########
vLLM
########

.. toctree::
:hidden:
:caption: vLLM
:maxdepth: 2

../vllm_backend/README
Multi-LoRA <../vllm_backend/docs/llama_multi_lora_tutorial>
10 changes: 10 additions & 0 deletions docs/client_guide/api_reference.rst
Original file line number Diff line number Diff line change
@@ -0,0 +1,10 @@
####
API Reference
####

.. toctree::
:maxdepth: 1
:hidden:

OpenAI API <openai_readme.md>
kserve
39 changes: 39 additions & 0 deletions docs/client_guide/in_process.rst
Original file line number Diff line number Diff line change
@@ -0,0 +1,39 @@
####
In-Process Triton Server API
####


The Triton Inference Server provides a backwards-compatible C API/ python-bindings/java-bindings that
allows Triton to be linked directly into a C/C++/java/python application. This API
is called the "Triton Server API" or just "Server API" for short. The
API is implemented in the Triton shared library which is built from
source contained in the `core
repository <https://github.com/triton-inference-server/core>`__. On Linux
this library is libtritonserver.so and on Windows it is
tritonserver.dll. In the Triton Docker image the shared library is
found in /opt/tritonserver/lib. The header file that defines and
documents the Server API is
`tritonserver.h <https://github.com/triton-inference-server/core/blob/main/include/triton/core/tritonserver.h>`__.
`Java bindings for In-Process Triton Server API <../customization_guide/inprocess_java_api.html#java-bindings-for-in-process-triton-server-api>`__
are built on top of `tritonserver.h` and can be used for Java applications that
need to use Tritonserver in-process.

All capabilities of Triton server are encapsulated in the shared
library and are exposed via the Server API. The `tritonserver`
executable implements HTTP/REST and GRPC endpoints and uses the Server
API to communicate with core Triton logic. The primary source files
for the endpoints are `grpc_server.cc <https://github.com/triton-inference-server/server/blob/main/src/grpc/grpc_server.cc>`__ and
`http_server.cc <https://github.com/triton-inference-server/server/blob/main/src/http_server.cc>`__. In these source files you can
see the Server API being used.

You can use the Server API in your own application as well. A simple
example using the Server API can be found in
`simple.cc <https://github.com/triton-inference-server/server/blob/main/src/simple.cc>`__.

.. toctree::
:maxdepth: 1
:hidden:

C/C++ <../customization_guide/inprocess_c_api.md>
python
Java <../customization_guide/inprocess_java_api.md>
15 changes: 15 additions & 0 deletions docs/client_guide/kserve.rst
Original file line number Diff line number Diff line change
@@ -0,0 +1,15 @@
####
KServe API
####


Triton uses the
`KServe community standard inference protocols <https://github.com/kserve/kserve/tree/master/docs/predict-api/v2>`__
to define HTTP/REST and GRPC APIs plus several extensions.

.. toctree::
:maxdepth: 1
:hidden:

HTTP/REST and GRPC Protocol <../customization_guide/inference_protocols.md>
kserve_extension
24 changes: 24 additions & 0 deletions docs/client_guide/kserve_extension.rst
Original file line number Diff line number Diff line change
@@ -0,0 +1,24 @@
####
Extensions
####

To fully enable all capabilities
Triton also implements `HTTP/REST and GRPC
extensions <https://github.com/triton-inference-server/server/tree/main/docs/protocol>`__
to the KServe inference protocol.

.. toctree::
:maxdepth: 1
:hidden:

Binary tensor data extension <../protocol/extension_binary_data.md>
Classification extension <../protocol/extension_classification.md>
Schedule policy extension <../protocol/extension_schedule_policy.md>
Sequence extension <../protocol/extension_sequence.md>
Shared-memory extension <../protocol/extension_shared_memory.md>
Model configuration extension <../protocol/extension_model_configuration.md>
Model repository extension <../protocol/extension_model_repository.md>
Statistics extension <../protocol/extension_statistics.md>
Trace extension <../protocol/extension_trace.md>
Logging extension <../protocol/extension_logging.md>
Parameters extension <../protocol/extension_parameters.md>
1 change: 1 addition & 0 deletions docs/client_guide/openai_readme.md
12 changes: 12 additions & 0 deletions docs/client_guide/python.rst
Original file line number Diff line number Diff line change
@@ -0,0 +1,12 @@
####
Python
####

.. include:: python_readme.rst

.. toctree::
:maxdepth: 1
:hidden:

Kafka I/O <../tutorials/Triton_Inference_Server_Python_API/examples/kafka-io/README.md>
Rayserve <../tutorials/Triton_Inference_Server_Python_API/examples/rayserve/README.md>
Loading

0 comments on commit d2bdd8d

Please sign in to comment.