Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat: Add OpenAI-Compatible Server to NGC Container #7895

Merged
merged 2 commits into from
Dec 19, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 6 additions & 0 deletions build.py
Original file line number Diff line number Diff line change
Expand Up @@ -1216,6 +1216,8 @@ def create_dockerfile_linux(
find /opt/tritonserver/python -maxdepth 1 -type f -name \\
"tritonfrontend-*.whl" | xargs -I {} pip install --upgrade {}[all]

RUN pip3 install -r python/openai/requirements.txt

"""
if not FLAGS.no_core_build:
# Add feature labels for SageMaker endpoint
Expand Down Expand Up @@ -1918,6 +1920,10 @@ def core_build(
os.path.join(install_dir, "include", "triton", "core"),
)

cmake_script.cpdir(
os.path.join(repo_dir, "python", "openai"), os.path.join(install_dir, "python")
)

cmake_script.cp(os.path.join(repo_dir, "LICENSE"), install_dir)
cmake_script.cp(os.path.join(repo_dir, "TRITON_VERSION"), install_dir)

Expand Down
13 changes: 9 additions & 4 deletions qa/L0_openai/test.sh
Original file line number Diff line number Diff line change
Expand Up @@ -29,11 +29,12 @@

function install_deps() {
# Install python bindings for tritonserver and tritonfrontend
pip install /opt/tritonserver/python/triton*.whl
# pip install /opt/tritonserver/python/triton*.whl

# Install application/testing requirements
pushd openai/
pip install -r requirements.txt
# NOTE: Should be pre-installed in container, but can uncomment if needed
# pip install -r requirements.txt
pip install -r requirements-test.txt

if [ "${IMAGE_KIND}" == "TRTLLM" ]; then
Expand All @@ -49,13 +50,17 @@ function prepare_vllm() {
}

function prepare_tensorrtllm() {
# FIXME: Remove when testing TRT-LLM containers built from source
pip install -r requirements.txt

MODEL="llama-3-8b-instruct"
MODEL_REPO="tests/tensorrtllm_models"
rm -rf ${MODEL_REPO}

# FIXME: This will require an upgrade each release to match the TRT-LLM version
# FIXME: This may require an upgrade each release to match the TRT-LLM version,
# and would likely be easier to use trtllm-build directly for test purposes.
# Use Triton CLI to prepare model repository for testing
pip install git+https://github.com/triton-inference-server/triton_cli.git@0.0.10
pip install git+https://github.com/triton-inference-server/triton_cli.git@0.1.1
# NOTE: Could use ENGINE_DEST_PATH set to NFS mount for pre-built engines in future
triton import \
--model ${MODEL} \
Expand Down
Loading