Skip to content

Commit

Permalink
remove test
Browse files Browse the repository at this point in the history
  • Loading branch information
toslunar committed Sep 6, 2024
1 parent c0b7190 commit af63d52
Showing 1 changed file with 0 additions and 102 deletions.
102 changes: 0 additions & 102 deletions tests/core/test_chunked_prefill_scheduler.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,6 @@
import pytest # noqa

from vllm.config import CacheConfig, SchedulerConfig
from vllm.core.interfaces import AllocStatus
from vllm.core.scheduler import Scheduler
from vllm.sequence import Logprob, SequenceGroup

Expand Down Expand Up @@ -369,107 +368,6 @@ def cannot_append_second_group(seq_group, num_lookahead_slots):
assert out.blocks_to_swap_out == []


def test_running_prefill_prioritized_over_swap():
block_size = 4
max_seqs = 30
max_model_len = 200
max_num_batched_tokens = 30
scheduler_config = SchedulerConfig(max_num_batched_tokens,
max_seqs,
max_model_len,
enable_chunked_prefill=True)
cache_config = CacheConfig(block_size, 1.0, 1, "auto")
cache_config.num_cpu_blocks = 8
cache_config.num_gpu_blocks = 8
scheduler = Scheduler(scheduler_config, cache_config, None)

# Artificial priority is needed for testing with the fcfs policy in
# _schedule_running. This seq will be prioritized more among running
# seqs but less among waiting seqs.
_, seq_group2 = create_dummy_prompt("2", prompt_length=20 + 30 + 30 + 30)

_, seq_group = create_dummy_prompt("1", prompt_length=30 + 10, best_of=2)
scheduler.add_seq_group(seq_group)
_, out = schedule_and_update_computed_tokens(scheduler)
# The request is chunked.
# prefill scheduled now.
assert len(out.scheduled_seq_groups) == 1
assert out.num_prefill_groups == 1
assert seq_group.is_prefill()
assert out.num_batched_tokens == max_num_batched_tokens

# Add 1 more task.
scheduler.add_seq_group(seq_group2)
_, out = schedule_and_update_computed_tokens(scheduler)
# task 1 finished the last 10 tokens of prefill.
# task 2 started the first 20 tokens of prefill.
assert len(out.scheduled_seq_groups) == 2
assert out.num_prefill_groups == 2
assert not seq_group.is_prefill()
assert seq_group2.is_prefill()
assert out.num_batched_tokens == max_num_batched_tokens

# seq_group starts decoding with best_of=2
# see vllm/engine/output_processor/single_step.py
seq = seq_group.seqs_dict[1]
new_seq_id = 3
new_seq = seq.fork(new_seq_id)
seq_group.add(new_seq)
scheduler.fork_seq(seq, new_seq)
append_new_token(seq_group, 1)

# The first request should be swapped out.
scheduler.block_manager.can_append_slots = MagicMock()

def cannot_append_second_group(seq_group, num_lookahead_slots):
return seq_group.request_id != "1"

scheduler.block_manager.can_append_slots.side_effect = (
cannot_append_second_group)

_, out = schedule_and_update_computed_tokens(scheduler)
assert len(out.scheduled_seq_groups) == 1
assert out.num_batched_tokens == 30
assert out.blocks_to_swap_in == []
assert out.blocks_to_swap_out != []
assert out.scheduled_seq_groups[0].seq_group == seq_group2

# Swap is not possible, so prefill is running.
scheduler.block_manager.can_swap_in = MagicMock()
scheduler.block_manager.can_swap_in.return_value = AllocStatus.LATER

_, out = schedule_and_update_computed_tokens(scheduler)
assert len(out.scheduled_seq_groups) == 1
assert out.num_batched_tokens == 30
assert out.blocks_to_swap_in == []
assert out.blocks_to_swap_out == []
assert seq_group2.is_prefill()
assert out.scheduled_seq_groups[0].seq_group == seq_group2

# Now although swap is possible, running prefill is prioritized.
scheduler.block_manager.can_swap_in.return_value = AllocStatus.OK
_, out = schedule_and_update_computed_tokens(scheduler)
assert len(out.scheduled_seq_groups) == 1
assert out.num_batched_tokens == 30
assert out.blocks_to_swap_in == []
assert out.blocks_to_swap_out == []
assert not seq_group2.is_prefill()
assert out.scheduled_seq_groups[0].seq_group == seq_group2
append_new_token(seq_group2, 1)

# Decoding is prioritized.
_, out = schedule_and_update_computed_tokens(scheduler)
assert len(out.scheduled_seq_groups) == 2
# 3 decodes. It is swapped in.
assert out.num_batched_tokens == 3
assert out.blocks_to_swap_in != []
assert out.blocks_to_swap_out == []
assert not seq_group.is_prefill()
assert not seq_group2.is_prefill()
append_new_token(seq_group, 1)
append_new_token(seq_group2, 1)


def test_chunked_prefill_preempt():
"""Verify preempt works with chunked prefill requests"""
block_size = 4
Expand Down

0 comments on commit af63d52

Please sign in to comment.