Skip to content

Commit

Permalink
Removed special html code wrapping when exporting to docling document…
Browse files Browse the repository at this point in the history
…, cleaned up comments

Signed-off-by: Maksym Lysak <[email protected]>
  • Loading branch information
Maksym Lysak committed Feb 13, 2025
1 parent 3aaaa90 commit be11210
Showing 1 changed file with 0 additions and 13 deletions.
13 changes: 0 additions & 13 deletions docling/pipeline/vlm_pipeline.py
Original file line number Diff line number Diff line change
Expand Up @@ -110,13 +110,6 @@ def __init__(self, pipeline_options: VlmPipelineOptions):
"code": "lightblue",
}

"""
if pipeline_options.artifacts_path is None:
self.artifacts_path = self.download_models_hf()
else:
self.artifacts_path = Path(pipeline_options.artifacts_path)
"""

self.keep_images = (
self.pipeline_options.generate_page_images
or self.pipeline_options.generate_picture_images
Expand Down Expand Up @@ -447,9 +440,6 @@ def parse_table_content(otsl_content: str) -> TableData:
text_content = extract_text_from_backend(page, bbox)
else:
text_content = extract_inner_text(full_chunk)
# If it's code, wrap it with <pre><code> tags
if doc_label == DocItemLabel.CODE:
text_content = f"<pre><code>{text_content}</code></pre>"
doc.add_text(
label=doc_label,
text=text_content,
Expand All @@ -472,6 +462,3 @@ def get_default_options(cls) -> PdfPipelineOptions:
@classmethod
def is_backend_supported(cls, backend: AbstractDocumentBackend):
return isinstance(backend, PdfDocumentBackend)

# def _turn_tags_into_doc(self, document_tags):
# return DoclingDocument()

0 comments on commit be11210

Please sign in to comment.