From be11210ae9d600f7d8148bf4098cb487571a707c Mon Sep 17 00:00:00 2001 From: Maksym Lysak Date: Thu, 13 Feb 2025 10:29:37 +0100 Subject: [PATCH] Removed special html code wrapping when exporting to docling document, cleaned up comments Signed-off-by: Maksym Lysak --- docling/pipeline/vlm_pipeline.py | 13 ------------- 1 file changed, 13 deletions(-) diff --git a/docling/pipeline/vlm_pipeline.py b/docling/pipeline/vlm_pipeline.py index 53c10e42c..fd6e9dfce 100644 --- a/docling/pipeline/vlm_pipeline.py +++ b/docling/pipeline/vlm_pipeline.py @@ -110,13 +110,6 @@ def __init__(self, pipeline_options: VlmPipelineOptions): "code": "lightblue", } - """ - if pipeline_options.artifacts_path is None: - self.artifacts_path = self.download_models_hf() - else: - self.artifacts_path = Path(pipeline_options.artifacts_path) - """ - self.keep_images = ( self.pipeline_options.generate_page_images or self.pipeline_options.generate_picture_images @@ -447,9 +440,6 @@ def parse_table_content(otsl_content: str) -> TableData: text_content = extract_text_from_backend(page, bbox) else: text_content = extract_inner_text(full_chunk) - # If it's code, wrap it with
 tags
-                    if doc_label == DocItemLabel.CODE:
-                        text_content = f"
{text_content}
" doc.add_text( label=doc_label, text=text_content, @@ -472,6 +462,3 @@ def get_default_options(cls) -> PdfPipelineOptions: @classmethod def is_backend_supported(cls, backend: AbstractDocumentBackend): return isinstance(backend, PdfDocumentBackend) - - # def _turn_tags_into_doc(self, document_tags): - # return DoclingDocument()