vllm-project · heheda12345 · Jan 15, 2025 · Jan 15, 2025 · Jan 15, 2025 · Jan 15, 2025
diff --git a/examples/offline_inference/basic.py b/examples/offline_inference/basic.py
@@ -13,12 +13,12 @@
 sampling_params = SamplingParams(temperature=0.8, top_p=0.95)
 
 # Create an LLM.
-llm = LLM(model="facebook/opt-125m")
+llm = LLM(model="google/gemma-2-2b-it")
 # Generate texts from the prompts. The output is a list of RequestOutput objects
 # that contain the prompt, generated text, and other information.
 outputs = llm.generate(prompts, sampling_params)
 # Print the outputs.
 for output in outputs:
     prompt = output.prompt
     generated_text = output.outputs[0].text
-    print(f"Prompt: {prompt!r}, Generated text: {generated_text!r}")
+    print(f"Prompt: {prompt!r}, Generated text: {generated_text!r}")
diff --git a/tests/core/block/e2e/test_correctness_sliding_window.py b/tests/core/block/e2e/test_correctness_sliding_window.py
@@ -1,7 +1,7 @@
 # SPDX-License-Identifier: Apache-2.0
 
 import random
-from typing import List
+from typing import List, Tuple
 
 import pytest
 
@@ -120,7 +120,7 @@ def test_sliding_window_chunked_prefill(test_llm_generator, batch_size, seed,
     check_answers(indices, answer, test_texts)
 
 
-def prep_prompts(batch_size: int):
+def prep_prompts(batch_size: int, assign_range: Tuple[int, int] = (800, 1100)):
     """
     Generate prompts which a bunch of assignments,
     then asking for the value of one of them.
@@ -136,7 +136,7 @@ def prep_prompts(batch_size: int):
         indices.append(idx)
         prompt = "```python\n# We set a number of variables, " + \
                  f"x{idx} will be important later\n"
-        ln = random.randint(800, 1100)
+        ln = random.randint(*assign_range)
         for k in range(30, ln):
             v = random.randint(10, 99)
             if k == idx:
@@ -148,7 +148,10 @@ def prep_prompts(batch_size: int):
     return prompts, answer, indices
 
 
-def check_answers(indices: List[int], answer: List[int], outputs: List[str]):
+def check_answers(indices: List[int],
+                  answer: List[int],
+                  outputs: List[str],
+                  accept_rate=0.7):
     answer2 = [int(text[0:2].strip()) for text in outputs]
     print(list(zip(indices, zip(answer, answer2))))
     numok = 0
@@ -157,7 +160,7 @@ def check_answers(indices: List[int], answer: List[int], outputs: List[str]):
             numok += 1
     frac_ok = numok / len(answer)
     print(f"Num OK: {numok}/{len(answer)} {frac_ok}")
-    assert frac_ok > 0.7
+    assert frac_ok >= accept_rate
 
 
 def check_window(prompts: List[str]):

diff --git a/tests/v1/core/test_kv_cache_utils.py b/tests/v1/core/test_kv_cache_utils.py
@@ -5,10 +5,10 @@
 from vllm.multimodal.inputs import MultiModalKwargs
 from vllm.sampling_params import SamplingParams
 from vllm.v1.core.kv_cache_utils import (BlockHashType, FreeKVCacheBlockQueue,
-                                         KVCacheBlock,
+                                         KVCacheBlock, PrefixLengthRange,
                                          generate_block_hash_extra_keys,
                                          hash_block_tokens,
-                                         hash_request_tokens)
+                                         hash_request_tokens, intersect_ranges)
 from vllm.v1.request import Request
 
 
@@ -49,7 +49,9 @@ def test_kv_cache_block():
     assert block.ref_cnt == 0
 
     # Test block hash setting and resetting
-    block_hash = BlockHashType(hash_value=123, token_ids=(1, 2, 3))
+    block_hash = BlockHashType(hash_value=123,
+                               kv_cache_group_id=0,
+                               token_ids=(1, 2, 3))
     block.block_hash = block_hash
     assert block.block_hash == block_hash
 
@@ -190,11 +192,11 @@ def test_hash_block_tokens():
     curr_block_token_ids = (1, 2, 3)
     extra_keys = ("key1", "key2")
 
-    block_hash = hash_block_tokens(parent_block_hash, curr_block_token_ids,
+    block_hash = hash_block_tokens(parent_block_hash, curr_block_token_ids, 0,
                                    extra_keys)
     assert isinstance(block_hash, BlockHashType)
     assert block_hash.hash_value == hash(
-        (parent_block_hash, curr_block_token_ids, extra_keys))
+        (parent_block_hash, curr_block_token_ids, 0, extra_keys))
     assert block_hash.token_ids == curr_block_token_ids
     assert block_hash.extra_keys == extra_keys
 
@@ -214,7 +216,7 @@ def test_hash_request_tokens():
     )
 
     block_size = 3
-    block_hashes = hash_request_tokens(block_size, request)
+    block_hashes = hash_request_tokens(block_size, request, 0)
 
     assert len(block_hashes) == 2
     assert isinstance(block_hashes[0], BlockHashType)
@@ -255,8 +257,8 @@ def test_hash_tokens_different_mm_input():
         mm_hashes=["hash3", "hash2"],
     )
     block_size = 3
-    block_hashes1 = hash_request_tokens(block_size, request1)
-    block_hashes2 = hash_request_tokens(block_size, request2)
+    block_hashes1 = hash_request_tokens(block_size, request1, 0)
+    block_hashes2 = hash_request_tokens(block_size, request2, 0)
     assert block_hashes1[0] != block_hashes2[0]
     assert block_hashes1[1] != block_hashes2[1]
 
@@ -270,10 +272,35 @@ def test_hash_request_tokens_no_mm_inputs():
     )
 
     block_size = 3
-    block_hashes = hash_request_tokens(block_size, request)
+    block_hashes = hash_request_tokens(block_size, request, 0)
 
     assert len(block_hashes) == 2
     assert block_hashes[0].token_ids == (0, 1, 2)
     assert block_hashes[0].extra_keys is None
     assert block_hashes[1].token_ids == (3, 4, 5)
     assert block_hashes[1].extra_keys is None
+
+
+def test_prefix_length_range_intersection():
+    range0 = [
+        PrefixLengthRange(1, 5),
+        PrefixLengthRange(10, 14),
+        PrefixLengthRange(16, 18)
+    ]
+    range1 = [
+        PrefixLengthRange(2, 6),
+        PrefixLengthRange(8, 12),
+        PrefixLengthRange(15, 17)
+    ]
+    range2 = [PrefixLengthRange(3, 11), PrefixLengthRange(13, 19)]
+    ranges = [range0, range1, range2]
+
+    intersection = intersect_ranges(ranges)
+    assert intersection == [
+        PrefixLengthRange(3, 5),
+        PrefixLengthRange(10, 11),
+        PrefixLengthRange(16, 17)
+    ]
+
+
+# TODO: add tests for hash of kv_cache_group_id