diff --git a/vllm/model_executor/models/llava_onevision.py b/vllm/model_executor/models/llava_onevision.py index 2aa2207300631..9d312ed415f58 100644 --- a/vllm/model_executor/models/llava_onevision.py +++ b/vllm/model_executor/models/llava_onevision.py @@ -192,7 +192,7 @@ def _get_max_video_frames( max_total_tokens = self.ctx.model_config.max_model_len max_total_frames = int(max_total_tokens / self._get_max_frame_tokens()) - return (max_total_frames - num_images * i2f) // max(num_videos, 1) + return max(max_total_frames - num_images * i2f, 0) // max(num_videos, 1) def _get_max_video_tokens(self) -> int: return self._get_max_frame_tokens() * self._get_max_video_frames() diff --git a/vllm/model_executor/models/qwen2_vl.py b/vllm/model_executor/models/qwen2_vl.py index a3b47d06e4467..a0e2fd3bb8944 100644 --- a/vllm/model_executor/models/qwen2_vl.py +++ b/vllm/model_executor/models/qwen2_vl.py @@ -780,7 +780,7 @@ def _get_max_video_frames( max_total_tokens = self.ctx.model_config.max_model_len max_total_frames = int(max_total_tokens / self._get_max_image_tokens()) - return (max_total_frames - num_images * i2f) // max(num_videos, 1) + return max(max_total_frames - num_images * i2f, 0) // max(num_videos, 1) def _get_max_video_tokens(self) -> int: return self._get_max_image_tokens() * self._get_max_video_frames()