diff --git a/optimum/intel/openvino/modeling_visual_language.py b/optimum/intel/openvino/modeling_visual_language.py index e895a76e7..9bb3b1f67 100644 --- a/optimum/intel/openvino/modeling_visual_language.py +++ b/optimum/intel/openvino/modeling_visual_language.py @@ -1020,6 +1020,10 @@ def preprocess_inputs( prompt = "\n" + text else: prompt = text + + if hasattr(processor, "patch_size") and processor.patch_size is None: + processor.patch_size = config.vision_config.patch_size + inputs = processor(images=image, text=prompt, return_tensors="pt") return inputs