remove test

vllm-project · Sep 6, 2024 · af63d52 · af63d52
1 parent c0b7190
commit af63d52
Showing 1 changed file with 0 additions and 102 deletions.
diff --git a/tests/core/test_chunked_prefill_scheduler.py b/tests/core/test_chunked_prefill_scheduler.py
@@ -4,7 +4,6 @@
 import pytest  # noqa
 
 from vllm.config import CacheConfig, SchedulerConfig
-from vllm.core.interfaces import AllocStatus
 from vllm.core.scheduler import Scheduler
 from vllm.sequence import Logprob, SequenceGroup
 
@@ -369,107 +368,6 @@ def cannot_append_second_group(seq_group, num_lookahead_slots):
     assert out.blocks_to_swap_out == []
 
 
-def test_running_prefill_prioritized_over_swap():
-    block_size = 4
-    max_seqs = 30
-    max_model_len = 200
-    max_num_batched_tokens = 30
-    scheduler_config = SchedulerConfig(max_num_batched_tokens,
-                                       max_seqs,
-                                       max_model_len,
-                                       enable_chunked_prefill=True)
-    cache_config = CacheConfig(block_size, 1.0, 1, "auto")
-    cache_config.num_cpu_blocks = 8
-    cache_config.num_gpu_blocks = 8
-    scheduler = Scheduler(scheduler_config, cache_config, None)
-
-    # Artificial priority is needed for testing with the fcfs policy in
-    # _schedule_running. This seq will be prioritized more among running
-    # seqs but less among waiting seqs.
-    _, seq_group2 = create_dummy_prompt("2", prompt_length=20 + 30 + 30 + 30)
-
-    _, seq_group = create_dummy_prompt("1", prompt_length=30 + 10, best_of=2)
-    scheduler.add_seq_group(seq_group)
-    _, out = schedule_and_update_computed_tokens(scheduler)
-    # The request is chunked.
-    # prefill scheduled now.
-    assert len(out.scheduled_seq_groups) == 1
-    assert out.num_prefill_groups == 1
-    assert seq_group.is_prefill()
-    assert out.num_batched_tokens == max_num_batched_tokens
-
-    # Add 1 more task.
-    scheduler.add_seq_group(seq_group2)
-    _, out = schedule_and_update_computed_tokens(scheduler)
-    # task 1 finished the last 10 tokens of prefill.
-    # task 2 started the first 20 tokens of prefill.
-    assert len(out.scheduled_seq_groups) == 2
-    assert out.num_prefill_groups == 2
-    assert not seq_group.is_prefill()
-    assert seq_group2.is_prefill()
-    assert out.num_batched_tokens == max_num_batched_tokens
-
-    # seq_group starts decoding with best_of=2
-    # see vllm/engine/output_processor/single_step.py
-    seq = seq_group.seqs_dict[1]
-    new_seq_id = 3
-    new_seq = seq.fork(new_seq_id)
-    seq_group.add(new_seq)
-    scheduler.fork_seq(seq, new_seq)
-    append_new_token(seq_group, 1)
-
-    # The first request should be swapped out.
-    scheduler.block_manager.can_append_slots = MagicMock()
-
-    def cannot_append_second_group(seq_group, num_lookahead_slots):
-        return seq_group.request_id != "1"
-
-    scheduler.block_manager.can_append_slots.side_effect = (
-        cannot_append_second_group)
-
-    _, out = schedule_and_update_computed_tokens(scheduler)
-    assert len(out.scheduled_seq_groups) == 1
-    assert out.num_batched_tokens == 30
-    assert out.blocks_to_swap_in == []
-    assert out.blocks_to_swap_out != []
-    assert out.scheduled_seq_groups[0].seq_group == seq_group2
-
-    # Swap is not possible, so prefill is running.
-    scheduler.block_manager.can_swap_in = MagicMock()
-    scheduler.block_manager.can_swap_in.return_value = AllocStatus.LATER
-
-    _, out = schedule_and_update_computed_tokens(scheduler)
-    assert len(out.scheduled_seq_groups) == 1
-    assert out.num_batched_tokens == 30
-    assert out.blocks_to_swap_in == []
-    assert out.blocks_to_swap_out == []
-    assert seq_group2.is_prefill()
-    assert out.scheduled_seq_groups[0].seq_group == seq_group2
-
-    # Now although swap is possible, running prefill is prioritized.
-    scheduler.block_manager.can_swap_in.return_value = AllocStatus.OK
-    _, out = schedule_and_update_computed_tokens(scheduler)
-    assert len(out.scheduled_seq_groups) == 1
-    assert out.num_batched_tokens == 30
-    assert out.blocks_to_swap_in == []
-    assert out.blocks_to_swap_out == []
-    assert not seq_group2.is_prefill()
-    assert out.scheduled_seq_groups[0].seq_group == seq_group2
-    append_new_token(seq_group2, 1)
-
-    # Decoding is prioritized.
-    _, out = schedule_and_update_computed_tokens(scheduler)
-    assert len(out.scheduled_seq_groups) == 2
-    # 3 decodes. It is swapped in.
-    assert out.num_batched_tokens == 3
-    assert out.blocks_to_swap_in != []
-    assert out.blocks_to_swap_out == []
-    assert not seq_group.is_prefill()
-    assert not seq_group2.is_prefill()
-    append_new_token(seq_group, 1)
-    append_new_token(seq_group2, 1)
-
-
 def test_chunked_prefill_preempt():
     """Verify preempt works with chunked prefill requests"""
     block_size = 4