diff --git a/vllm/distributed/parallel_state.py b/vllm/distributed/parallel_state.py index 321902d11fd73..bfc41703b94dc 100644 --- a/vllm/distributed/parallel_state.py +++ b/vllm/distributed/parallel_state.py @@ -1024,13 +1024,6 @@ def initialize_model_parallel( backend = backend or torch.distributed.get_backend( get_world_group().device_group) - if (world_size - != tensor_model_parallel_size * pipeline_model_parallel_size): - raise RuntimeError( - f"world_size ({world_size}) is not equal to " - f"tensor_model_parallel_size ({tensor_model_parallel_size}) x " - f"pipeline_model_parallel_size ({pipeline_model_parallel_size})") - # Build the tensor model-parallel groups. num_tensor_model_parallel_groups: int = (world_size // tensor_model_parallel_size) diff --git a/vllm/executor/uniproc_executor.py b/vllm/executor/uniproc_executor.py index dcb4a8f27c252..e5464cafaecbf 100644 --- a/vllm/executor/uniproc_executor.py +++ b/vllm/executor/uniproc_executor.py @@ -101,7 +101,7 @@ def _init_executor(self) -> None: # - MASTER_PORT distributed_init_method = "env://" rank = int(os.environ["RANK"]) - local_rank = rank + local_rank = int(os.environ["LOCAL_RANK"]) is_driver_worker = True kwargs = dict( vllm_config=self.vllm_config,