vllm-project · mgoin · Oct 9, 2024 · Oct 9, 2024
diff --git a/vllm/entrypoints/openai/tool_parsers/abstract_tool_parser.py b/vllm/entrypoints/openai/tool_parsers/abstract_tool_parser.py
@@ -1,6 +1,7 @@
 import importlib
 import importlib.util
 import os
+from functools import cached_property
 from typing import Callable, Dict, List, Optional, Sequence, Type, Union
 
 from vllm.entrypoints.openai.protocol import (ChatCompletionRequest,
@@ -29,6 +30,12 @@ def __init__(self, tokenizer: AnyTokenizer):
 
         self.model_tokenizer = tokenizer
 
+    @cached_property
+    def vocab(self) -> Dict[str, int]:
+        # NOTE: Only PreTrainedTokenizerFast is guaranteed to have .vocab
+        # whereas all tokenizers have .get_vocab()
+        return self.model_tokenizer.get_vocab()
+
     def adjust_request(
             self, request: ChatCompletionRequest) -> ChatCompletionRequest:
         """

diff --git a/vllm/entrypoints/openai/tool_parsers/hermes_tool_parser.py b/vllm/entrypoints/openai/tool_parsers/hermes_tool_parser.py
@@ -50,10 +50,9 @@ def __init__(self, tokenizer: AnyTokenizer):
             raise ValueError(
                 "The model tokenizer must be passed to the ToolParser "
                 "constructor during construction.")
-        self.tool_call_start_token_id: int = self.model_tokenizer.vocab.get(
-            self.tool_call_start_token, None)
-        self.tool_call_end_token_id: int = self.model_tokenizer.vocab.get(
-            self.tool_call_end_token, None)
+        self.tool_call_start_token_id = self.vocab.get(
+            self.tool_call_start_token)
+        self.tool_call_end_token_id = self.vocab.get(self.tool_call_end_token)
         if not self.tool_call_start_token_id or not self.tool_call_end_token_id:
             raise RuntimeError(
                 "Hermes 2 Pro Tool parser could not locate tool call start/end "

diff --git a/vllm/entrypoints/openai/tool_parsers/mistral_tool_parser.py b/vllm/entrypoints/openai/tool_parsers/mistral_tool_parser.py
@@ -61,8 +61,7 @@ def __init__(self, tokenizer: AnyTokenizer):
         self.streamed_args_for_tool: List[str] = [
         ]  # map what has been streamed for each tool so far to a list
         self.bot_token = "[TOOL_CALLS]"
-        self.bot_token_id = self.model_tokenizer.get_vocab().get(
-            self.bot_token, None)
+        self.bot_token_id = self.vocab.get(self.bot_token)
         self.tool_call_regex = re.compile(r"\[{.*?}\]", re.DOTALL)
         if not self.bot_token_id:
             raise RuntimeError(