Skip to content

Commit

Permalink
Rebased branch on latest main. changes for CodeItem
Browse files Browse the repository at this point in the history
  • Loading branch information
Matteo Omenetti [email protected] authored and Matteo Omenetti [email protected] committed Jan 15, 2025
1 parent 412e4c9 commit dd6e609
Show file tree
Hide file tree
Showing 10 changed files with 336 additions and 474 deletions.
1 change: 0 additions & 1 deletion docling/backend/asciidoc_backend.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,6 @@


class AsciiDocBackend(DeclarativeDocumentBackend):

def __init__(self, in_doc: InputDocument, path_or_stream: Union[BytesIO, Path]):
super().__init__(in_doc, path_or_stream)

Expand Down
1 change: 0 additions & 1 deletion docling/backend/md_backend.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,6 @@


class MarkdownDocumentBackend(DeclarativeDocumentBackend):

def shorten_underscore_sequences(self, markdown_text, max_length=10):
# This regex will match any sequence of underscores
pattern = r"_+"
Expand Down
1 change: 0 additions & 1 deletion docling/backend/msexcel_backend.py
Original file line number Diff line number Diff line change
Expand Up @@ -44,7 +44,6 @@ class ExcelTable(BaseModel):


class MsExcelDocumentBackend(DeclarativeDocumentBackend):

def __init__(self, in_doc: "InputDocument", path_or_stream: Union[BytesIO, Path]):
super().__init__(in_doc, path_or_stream)

Expand Down
1 change: 0 additions & 1 deletion docling/backend/msword_backend.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,6 @@


class MsWordDocumentBackend(DeclarativeDocumentBackend):

def __init__(self, in_doc: "InputDocument", path_or_stream: Union[BytesIO, Path]):
super().__init__(in_doc, path_or_stream)
self.XML_KEY = (
Expand Down
2 changes: 0 additions & 2 deletions docling/backend/pdf_backend.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,6 @@


class PdfPageBackend(ABC):

@abstractmethod
def get_text_in_rect(self, bbox: BoundingBox) -> str:
pass
Expand Down Expand Up @@ -45,7 +44,6 @@ def unload(self):


class PdfDocumentBackend(PaginatedDocumentBackend):

def __init__(self, in_doc: InputDocument, path_or_stream: Union[BytesIO, Path]):
super().__init__(in_doc, path_or_stream)

Expand Down
1 change: 0 additions & 1 deletion docling/models/tesseract_ocr_cli_model.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,6 @@


class TesseractOcrCliModel(BaseOcrModel):

def __init__(self, enabled: bool, options: TesseractCliOcrOptions):
super().__init__(enabled=enabled, options=options)
self.options: TesseractCliOcrOptions
Expand Down
173 changes: 0 additions & 173 deletions docs/examples/develop_code_equation_enrichment.py

This file was deleted.

2 changes: 0 additions & 2 deletions docs/examples/develop_picture_enrichment.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,6 @@ class ExamplePictureClassifierPipelineOptions(PdfPipelineOptions):


class ExamplePictureClassifierEnrichmentModel(BaseEnrichmentModel):

def __init__(self, enabled: bool):
self.enabled = enabled

Expand Down Expand Up @@ -54,7 +53,6 @@ def __call__(


class ExamplePictureClassifierPipeline(StandardPdfPipeline):

def __init__(self, pipeline_options: ExamplePictureClassifierPipelineOptions):
super().__init__(pipeline_options)
self.pipeline_options: ExamplePictureClassifierPipeline
Expand Down
Loading

0 comments on commit dd6e609

Please sign in to comment.