Skip to content

Commit

Permalink
fix some bugs (vllm-project#2689)
Browse files Browse the repository at this point in the history
  • Loading branch information
zspo authored and jimpang committed Feb 20, 2024
1 parent 3f4374a commit 99e5204
Show file tree
Hide file tree
Showing 2 changed files with 8 additions and 3 deletions.
5 changes: 4 additions & 1 deletion vllm/config.py
Original file line number Diff line number Diff line change
Expand Up @@ -355,6 +355,9 @@ class ParallelConfig:
worker_use_ray: Whether to use Ray for model workers. Will be set to
True if either pipeline_parallel_size or tensor_parallel_size is
greater than 1.
max_parallel_loading_workers: Maximum number of multiple batches
when load model sequentially. To avoid RAM OOM when using tensor
parallel and large models.
disable_custom_all_reduce: Disable the custom all-reduce kernel and
fall back to NCCL.
"""
Expand Down Expand Up @@ -470,7 +473,7 @@ def __post_init__(self):
elif self.max_cpu_loras < self.max_loras:
raise ValueError(
f"max_cpu_loras ({self.max_cpu_loras}) must be >= "
f"max_num_seqs ({self.max_loras})")
f"max_loras ({self.max_loras})")

def verify_with_model_config(self, model_config: ModelConfig):
if self.lora_dtype in (None, "auto"):
Expand Down
6 changes: 4 additions & 2 deletions vllm/engine/async_llm_engine.py
Original file line number Diff line number Diff line change
Expand Up @@ -295,6 +295,8 @@ class AsyncLLMEngine:
async frontend will be executed in a separate process as the
model workers.
log_requests: Whether to log the requests.
max_log_len: Maximum number of prompt characters or prompt ID numbers
being printed in log.
start_engine_loop: If True, the background task to run the engine
will be automatically started in the generate call.
*args: Arguments for LLMEngine.
Expand Down Expand Up @@ -433,8 +435,8 @@ async def add_request(
logger.info(f"Received request {request_id}: "
f"prompt: {shortened_prompt!r}, "
f"prefix_pos: {prefix_pos},"
f"sampling params: {sampling_params}, "
f"prompt token ids: {shortened_token_ids}, "
f"sampling_params: {sampling_params}, "
f"prompt_token_ids: {shortened_token_ids}, "
f"lora_request: {lora_request}.")

if not self.is_running:
Expand Down

0 comments on commit 99e5204

Please sign in to comment.