diff --git a/csrc/moe/moe_align_sum_kernels.cu b/csrc/moe/moe_align_sum_kernels.cu index 01dac40446501..c072744f06685 100644 --- a/csrc/moe/moe_align_sum_kernels.cu +++ b/csrc/moe/moe_align_sum_kernels.cu @@ -3,7 +3,7 @@ #include #include -#include +#include #include "../cuda_compat.h" #include "../dispatch_utils.h" diff --git a/csrc/rocm/attention.cu b/csrc/rocm/attention.cu index ffa9d44610a7f..366b3cdc23aa1 100644 --- a/csrc/rocm/attention.cu +++ b/csrc/rocm/attention.cu @@ -1122,4 +1122,4 @@ void paged_attention( #undef WARP_SIZE #undef MAX #undef MIN -#undef DIVIDE_ROUND_UP \ No newline at end of file +#undef DIVIDE_ROUND_UP diff --git a/vllm/model_executor/models/registry.py b/vllm/model_executor/models/registry.py index ebf6a88f21b8e..198b6d134718f 100644 --- a/vllm/model_executor/models/registry.py +++ b/vllm/model_executor/models/registry.py @@ -205,6 +205,14 @@ **_FALLBACK_MODEL, } +# This variable is used as the args for subprocess.run(). We +# can modify this variable to alter the args if needed. e.g. +# when we use par format to pack things together, sys.executable +# might not be the target we want to run. +_SUBPROCESS_COMMAND = [ + sys.executable, "-m", "vllm.model_executor.models.registry" +] + @dataclass(frozen=True) class _ModelInfo: @@ -502,10 +510,9 @@ def _run_in_subprocess(fn: Callable[[], _T]) -> _T: # cannot use `sys.executable __file__` here because the script # contains relative imports - returned = subprocess.run( - [sys.executable, "-m", "vllm.model_executor.models.registry"], - input=input_bytes, - capture_output=True) + returned = subprocess.run(_SUBPROCESS_COMMAND, + input=input_bytes, + capture_output=True) # check if the subprocess is successful try: diff --git a/vllm/transformers_utils/configs/__init__.py b/vllm/transformers_utils/configs/__init__.py index c484a755ab4ec..9060565596b21 100644 --- a/vllm/transformers_utils/configs/__init__.py +++ b/vllm/transformers_utils/configs/__init__.py @@ -45,4 +45,4 @@ "SolarConfig", "Telechat2Config", "UltravoxConfig", -] \ No newline at end of file +]