Skip to content

Commit

Permalink
Use extra-args for apply_chat_template
Browse files Browse the repository at this point in the history
  • Loading branch information
dyastremsky committed Jan 29, 2025
1 parent a561ee2 commit 90bed99
Show file tree
Hide file tree
Showing 11 changed files with 69 additions and 54 deletions.
2 changes: 0 additions & 2 deletions genai-perf/genai_perf/config/generate/perf_analyzer_config.py
Original file line number Diff line number Diff line change
Expand Up @@ -41,12 +41,10 @@
# This is the list of GAP CLI args that are not used when creating
# the PA command line
perf_analyzer_ignore_args = [
"apply_chat_template",
"artifact_dir",
"backend",
"batch_size_image",
"batch_size_text",
"chat_template_file",
"concurrency",
"endpoint_type",
"extra_inputs",
Expand Down
1 change: 0 additions & 1 deletion genai-perf/genai_perf/constants.py
Original file line number Diff line number Diff line change
Expand Up @@ -34,4 +34,3 @@
DEFAULT_COMPARE_DIR = "compare"
DEFAULT_PARQUET_FILE = "all_data"
DEFAULT_PROFILE_EXPORT_FILE = "profile_export.json"
DEFAULT_CHAT_TEMPLATE_FILE = "chat_template.json"
1 change: 0 additions & 1 deletion genai-perf/genai_perf/export_data/json_exporter.py
Original file line number Diff line number Diff line change
Expand Up @@ -69,7 +69,6 @@ def _prepare_args_for_export(self) -> None:
self._args.pop("input_file", None)
self._args["profile_export_file"] = str(self._args["profile_export_file"])
self._args["artifact_dir"] = str(self._args["artifact_dir"])
self._args["chat_template_file"] = str(self._args["chat_template_file"])
for k, v in self._args.items():
if isinstance(v, Enum):
self._args[k] = v.name.lower()
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,7 @@
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

from typing import Any, Dict
from typing import Any, Dict, List

from genai_perf.exceptions import GenAIPerfException
from genai_perf.inputs.converters.base_converter import BaseConverter
Expand Down Expand Up @@ -52,9 +52,7 @@ def convert(

for file_data in generic_dataset.files_data.values():
for row in file_data.rows:
token_ids = config.tokenizer.encode(
row.texts[0], add_special_tokens=False
)
token_ids = self._encode_tokens(row.texts[0], config)
payload = {
"input_ids": {
"content": token_ids,
Expand Down Expand Up @@ -84,7 +82,35 @@ def _add_request_params(self, payload: Dict, config: InputsConfig) -> None:
payload["min_length"] = [num_tokens]

for key, value in config.extra_inputs.items():
if key == "set_end_id" and value:
if key == "set_end_id":
payload["end_id"] = [config.tokenizer._tokenizer.eos_token_id]
elif key == "apply_chat_template":
pass
else:
payload[key] = [value]

def _encode_tokens(self, prompt: str, config: InputsConfig) -> List[int]:
if config.extra_inputs.__contains__("apply_chat_template"):
token_ids = self._encode_with_chat_template(prompt, config)
else:
token_ids = config.tokenizer.encode(prompt, add_special_tokens=False)
return token_ids

def _encode_with_chat_template(
self, prompt: str, config: InputsConfig
) -> List[int]:
"""
Apply the default TRT-LLM engine chat template to the prompt
"""
try:
import jinja2
except ImportError:
raise ImportError(
"Jinja2 is required for using TRT-LLM with chat template processing. Install it using: pip install jinja2."
)
default_template = [{"role": "user", "content": prompt}]
return config.tokenizer.encode(
config.tokenizer._tokenizer.apply_chat_template(
default_template, tokenize=False, add_special_tokens=False
)
)
21 changes: 1 addition & 20 deletions genai-perf/genai_perf/parser.py
Original file line number Diff line number Diff line change
Expand Up @@ -37,11 +37,7 @@
import genai_perf.logging as logging
import genai_perf.utils as utils
from genai_perf.config.input.config_command import RunConfigDefaults
from genai_perf.constants import (
DEFAULT_ARTIFACT_DIR,
DEFAULT_CHAT_TEMPLATE_FILE,
DEFAULT_PROFILE_EXPORT_FILE,
)
from genai_perf.constants import DEFAULT_ARTIFACT_DIR, DEFAULT_PROFILE_EXPORT_FILE
from genai_perf.inputs import input_constants as ic
from genai_perf.inputs.retrievers.synthetic_image_generator import ImageFormat
from genai_perf.plots.plot_config_parser import PlotConfigParser
Expand Down Expand Up @@ -992,21 +988,6 @@ def _add_profile_args(parser):
def _add_tokenizer_args(parser):
tokenizer_group = parser.add_argument_group("Tokenizer")

tokenizer_group.add_argument(
"--apply-chat-template",
action="store_true",
required=False,
help="The HuggingFace tokenizer will apply the chat template stored "
"by default or provided via --chat-template-file.",
)

tokenizer_group.add_argument(
"--chat-template-file",
type=Path,
default=Path(DEFAULT_CHAT_TEMPLATE_FILE),
help="The path to the HuggingFace tokenizer chat template.",
)

tokenizer_group.add_argument(
"--tokenizer",
type=str,
Expand Down
2 changes: 0 additions & 2 deletions genai-perf/genai_perf/subcommand/profile.py
Original file line number Diff line number Diff line change
Expand Up @@ -58,8 +58,6 @@ def profile_handler(args: Namespace, extra_args: Optional[List[str]]) -> None:
args.tokenizer,
args.tokenizer_trust_remote_code,
args.tokenizer_revision,
args.apply_chat_template,
args.chat_template_file,
)
generate_inputs(config_options)
telemetry_data_collectors = create_telemetry_data_collectors(args)
Expand Down
19 changes: 1 addition & 18 deletions genai-perf/genai_perf/tokenizer.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,6 @@

import contextlib
import io
from pathlib import Path
from typing import TYPE_CHECKING, List

# Use TYPE_CHECKING to import BatchEncoding only during static type checks
Expand Down Expand Up @@ -42,14 +41,7 @@ def __init__(self) -> None:
self._encode_args = {"add_special_tokens": False}
self._decode_args = {"skip_special_tokens": True}

def set_tokenizer(
self,
name: str,
trust_remote_code: bool,
revision: str,
apply_chat_template: bool,
chat_template_file: Path,
) -> None:
def set_tokenizer(self, name: str, trust_remote_code: bool, revision: str) -> None:
"""
Downloading the tokenizer from Huggingface.co or local filesystem
"""
Expand All @@ -65,11 +57,6 @@ def set_tokenizer(
tokenizer = AutoTokenizer.from_pretrained(
name, trust_remote_code=trust_remote_code, revision=revision
)
if apply_chat_template:
# read the chat template and apply it to the tokenizer
with open(chat_template_file, "r") as f:
chat_template = f.readlines()
tokenizer.apply_chat_template(chat_template, tokenize=False)
except Exception as e:
raise GenAIPerfException(e)
self._tokenizer = tokenizer
Expand Down Expand Up @@ -101,8 +88,6 @@ def get_tokenizer(
tokenizer_model: str,
trust_remote_code: bool = False,
tokenizer_revision: str = DEFAULT_TOKENIZER_REVISION,
apply_chat_template: bool = False,
chat_template_file: Path = Path("."),
) -> Tokenizer:
"""
Return tokenizer for the given model name
Expand All @@ -112,7 +97,5 @@ def get_tokenizer(
tokenizer_model,
trust_remote_code,
tokenizer_revision,
apply_chat_template,
chat_template_file,
)
return tokenizer
1 change: 0 additions & 1 deletion genai-perf/genai_perf/wrapper.py
Original file line number Diff line number Diff line change
Expand Up @@ -65,7 +65,6 @@ def add_inference_load_args(args: Namespace) -> List[str]:
@staticmethod
def build_cmd(args: Namespace, extra_args: Optional[List[str]] = None) -> List[str]:
skip_args = [
"apply_chat_template",
"artifact_dir",
"backend",
"batch_size_image",
Expand Down
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
# Copyright 2024, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
# Copyright 2024-2025, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions
Expand Down Expand Up @@ -94,6 +94,41 @@ def test_convert_default(self):

assert result == expected_result

def test_convert_with_chat_template(self):
generic_dataset = self.create_generic_dataset()
tokenizer = get_tokenizer(DEFAULT_TOKENIZER)
config = InputsConfig(
extra_inputs={"apply_chat_template": True},
model_name=["test_model"],
model_selection_strategy=ModelSelectionStrategy.ROUND_ROBIN,
output_format=OutputFormat.TENSORRTLLM_ENGINE,
tokenizer=tokenizer,
)

trtllm_engine_converter = TensorRTLLMEngineConverter()
result = trtllm_engine_converter.convert(generic_dataset, config)

assert "data" in result
assert isinstance(result["data"], list)
assert len(result["data"]) == 2

for payload in result["data"]:
assert "input_ids" in payload
assert "content" in payload["input_ids"]
assert len(payload["input_ids"]["content"]) > 0

prompt = generic_dataset.files_data["file1"].rows[i].texts[0]
expected_chat_template = [{"role": "user", "content": prompt}]

formatted_chat = tokenizer._tokenizer.apply_chat_template(
expected_chat_template, tokenize=False, add_special_tokens=False
)

expected_token_ids = tokenizer.encode(
formatted_chat, add_special_tokens=False
)
assert payload["input_ids"]["content"] == expected_token_ids

def test_convert_with_request_parameters(self):
generic_dataset = self.create_generic_dataset()

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -62,7 +62,6 @@ class TestOutputReporter:
"artifact_dir": ".",
"extra_inputs": ["max_tokens:200"],
"subcommand": "profile",
"chat_template_file": "chat_template.json",
}
args_namespace = Namespace(**args)

Expand Down
2 changes: 0 additions & 2 deletions genai-perf/tests/test_exporters/test_json_exporter.py
Original file line number Diff line number Diff line change
Expand Up @@ -257,8 +257,6 @@ class TestJsonExporter:
"header": null,
"subcommand": "profile",
"prompt_source": "synthetic",
"chat_template_file": "chat_template.json",
"apply_chat_template": false,
"extra_inputs": {
"max_tokens": 256,
"ignore_eos": true
Expand Down

0 comments on commit 90bed99

Please sign in to comment.