DS4SD · dolfim-ibm · Feb 26, 2025 · Feb 25, 2025 · Feb 25, 2025
diff --git a/docling/cli/models.py b/docling/cli/models.py
@@ -32,9 +32,19 @@ class _AvailableModels(str, Enum):
     CODE_FORMULA = "code_formula"
     PICTURE_CLASSIFIER = "picture_classifier"
     SMOLVLM = "smolvlm"
+    GRANITE_VISION = "granite_vision"
     EASYOCR = "easyocr"
 
 
+_default_models = [
+    _AvailableModels.LAYOUT,
+    _AvailableModels.TABLEFORMER,
+    _AvailableModels.CODE_FORMULA,
+    _AvailableModels.PICTURE_CLASSIFIER,
+    _AvailableModels.EASYOCR,
+]
+
+
 @app.command("download")
 def download(
     output_dir: Annotated[
@@ -73,7 +83,7 @@ def download(
             datefmt="[%X]",
             handlers=[RichHandler(show_level=False, show_time=False, markup=True)],
         )
-    to_download = models or [m for m in _AvailableModels]
+    to_download = models or _default_models
     output_dir = download_models(
         output_dir=output_dir,
         force=force,
@@ -83,6 +93,7 @@ def download(
         with_code_formula=_AvailableModels.CODE_FORMULA in to_download,
         with_picture_classifier=_AvailableModels.PICTURE_CLASSIFIER in to_download,
         with_smolvlm=_AvailableModels.SMOLVLM in to_download,
+        with_granite_vision=_AvailableModels.GRANITE_VISION in to_download,
         with_easyocr=_AvailableModels.EASYOCR in to_download,
     )
 

diff --git a/docling/models/picture_description_vlm_model.py b/docling/models/picture_description_vlm_model.py
@@ -41,9 +41,9 @@ def __init__(
                 )
 
             # Initialize processor and model
-            self.processor = AutoProcessor.from_pretrained(self.options.repo_id)
+            self.processor = AutoProcessor.from_pretrained(artifacts_path)
             self.model = AutoModelForVision2Seq.from_pretrained(
-                self.options.repo_id,
+                artifacts_path,
                 torch_dtype=torch.bfloat16,
                 _attn_implementation=(
                     "flash_attention_2" if self.device.startswith("cuda") else "eager"

diff --git a/docling/utils/model_downloader.py b/docling/utils/model_downloader.py
@@ -2,7 +2,10 @@
 from pathlib import Path
 from typing import Optional
 
-from docling.datamodel.pipeline_options import smolvlm_picture_description
+from docling.datamodel.pipeline_options import (
+    granite_picture_description,
+    smolvlm_picture_description,
+)
 from docling.datamodel.settings import settings
 from docling.models.code_formula_model import CodeFormulaModel
 from docling.models.document_picture_classifier import DocumentPictureClassifier
@@ -23,7 +26,8 @@ def download_models(
     with_tableformer: bool = True,
     with_code_formula: bool = True,
     with_picture_classifier: bool = True,
-    with_smolvlm: bool = True,
+    with_smolvlm: bool = False,
+    with_granite_vision: bool = False,
     with_easyocr: bool = True,
 ):
     if output_dir is None:
@@ -73,6 +77,15 @@ def download_models(
             progress=progress,
         )
 
+    if with_granite_vision:
+        _log.info(f"Downloading Granite Vision model...")
+        PictureDescriptionVlmModel.download_models(
+            repo_id=granite_picture_description.repo_id,
+            local_dir=output_dir / granite_picture_description.repo_cache_folder,
+            force=force,
+            progress=progress,
+        )
+
     if with_easyocr:
         _log.info(f"Downloading easyocr models...")
         EasyOcrModel.download_models(