diff --git a/vllm/v1/engine/async_llm.py b/vllm/v1/engine/async_llm.py index f80262083b445..d459db163b92f 100644 --- a/vllm/v1/engine/async_llm.py +++ b/vllm/v1/engine/async_llm.py @@ -251,13 +251,14 @@ async def _run_output_handler(self): # event loop for too long. num_outputs = len(outputs.new_token_id_offsets) - if num_outputs <= VLLM_V1_OUTPUT_PROC_CHUNK_SIZE: + if True or num_outputs <= VLLM_V1_OUTPUT_PROC_CHUNK_SIZE: slices = ((0, num_outputs), ) else: slices = [] parts = np.linspace( num_outputs, - cdiv(num_outputs, VLLM_V1_OUTPUT_PROC_CHUNK_SIZE)) + cdiv(num_outputs, VLLM_V1_OUTPUT_PROC_CHUNK_SIZE), + dtype='int') last = 0 for i in parts: slices.append((last, i))