Skip to content

Commit

Permalink
Merge branch 'deepjavalibrary:master' into master
Browse files Browse the repository at this point in the history
  • Loading branch information
HappyAmazonian authored Nov 25, 2024
2 parents effdfe7 + a75865c commit ba6723c
Show file tree
Hide file tree
Showing 4 changed files with 5 additions and 214 deletions.
5 changes: 0 additions & 5 deletions engines/python/setup/djl_python/tensorrt_llm.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,10 +15,8 @@
from djl_python.outputs import Output
from djl_python.rolling_batch.trtllm_rolling_batch import TRTLLMRollingBatch
from djl_python.properties_manager.trt_properties import TensorRtLlmProperties
from djl_python.tensorrt_llm_python import TRTLLMPythonService
from djl_python.utils import rolling_batch_inference
from djl_python.input_parser import parse_input_with_formatter
from typing import List, Tuple


class TRTLLMService(object):
Expand Down Expand Up @@ -91,9 +89,6 @@ def handle(inputs: Input) -> Output:
"""
global _service
if not _service.initialized:
properties = inputs.get_properties()
if properties.get("rolling_batch", "disable") == "disable":
_service = TRTLLMPythonService()
# stateful model
_service.initialize(inputs.get_properties())

Expand Down
203 changes: 0 additions & 203 deletions engines/python/setup/djl_python/tensorrt_llm_python.py

This file was deleted.

10 changes: 5 additions & 5 deletions serving/docker/pytorch-inf2.Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -15,13 +15,13 @@ ARG djl_serving_version
ARG torch_version=2.1.2
ARG torchvision_version=0.16.2
ARG python_version=3.10
ARG neuronsdk_version=2.20.1
ARG torch_neuronx_version=2.1.2.2.3.1
ARG neuronsdk_version=2.20.2
ARG torch_neuronx_version=2.1.2.2.3.2
ARG transformers_neuronx_version=0.12.313
ARG neuronx_distributed_version=0.9.0
ARG neuronx_cc_version=2.15.141.0
ARG neuronx_cc_stubs_version=2.15.141.0
ARG torch_xla_version=2.1.4
ARG neuronx_cc_version=2.15.143.0
ARG neuronx_cc_stubs_version=2.15.143.0
ARG torch_xla_version=2.1.5
ARG transformers_version=4.45.2
ARG accelerate_version=0.29.2
ARG diffusers_version=0.28.2
Expand Down
1 change: 0 additions & 1 deletion wlm/src/main/java/ai/djl/serving/wlm/LmiUtils.java
Original file line number Diff line number Diff line change
Expand Up @@ -138,7 +138,6 @@ static void convertTrtLLM(ModelInfo<?, ?> info) throws IOException {
ppDegree = Utils.getenv("PIPELINE_PARALLEL_DEGREE", "1");
}

// TODO TrtLLM python backend: Change it once TrtLLM supports T5 with inflight batching.
info.prop.put("option.rolling_batch", "trtllm");
if (!isValidTrtLlmModelRepo(trtRepo)) {
info.downloadDir = buildTrtLlmArtifacts(info.prop, modelId, tpDegree, ppDegree);
Expand Down

0 comments on commit ba6723c

Please sign in to comment.