Skip to content

Commit

Permalink
rename new status, populate ConversionResult errors
Browse files Browse the repository at this point in the history
Signed-off-by: Panos Vagenas <[email protected]>
  • Loading branch information
vagenas committed Dec 2, 2024
1 parent 4138110 commit 8e57c85
Show file tree
Hide file tree
Showing 3 changed files with 18 additions and 5 deletions.
3 changes: 2 additions & 1 deletion docling/datamodel/base_models.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,7 @@ class ConversionStatus(str, Enum):
FAILURE = auto()
SUCCESS = auto()
PARTIAL_SUCCESS = auto()
UNSUPPORTED = auto()
SKIPPED = auto()


class InputFormat(str, Enum):
Expand Down Expand Up @@ -94,6 +94,7 @@ class DoclingComponentType(str, Enum):
DOCUMENT_BACKEND = auto()
MODEL = auto()
DOC_ASSEMBLER = auto()
USER_INPUT = auto()


class ErrorItem(BaseModel):
Expand Down
18 changes: 15 additions & 3 deletions docling/document_converter.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,13 @@
from docling.backend.msexcel_backend import MsExcelDocumentBackend
from docling.backend.mspowerpoint_backend import MsPowerpointDocumentBackend
from docling.backend.msword_backend import MsWordDocumentBackend
from docling.datamodel.base_models import ConversionStatus, DocumentStream, InputFormat
from docling.datamodel.base_models import (
ConversionStatus,
DoclingComponentType,
DocumentStream,
ErrorItem,
InputFormat,
)
from docling.datamodel.document import (
ConversionResult,
InputDocument,
Expand Down Expand Up @@ -262,11 +268,17 @@ def _process_document(
if valid:
conv_res = self._execute_pipeline(in_doc, raises_on_error=raises_on_error)
else:
error_message = f"File format not allowed: {in_doc.file}"
if raises_on_error:
raise ConversionError(f"Unsupported format in: {in_doc.file}")
raise ConversionError(error_message)
else:
error_item = ErrorItem(
component_type=DoclingComponentType.USER_INPUT,
module_name="",
error_message=error_message,
)
conv_res = ConversionResult(
input=in_doc, status=ConversionStatus.UNSUPPORTED
input=in_doc, status=ConversionStatus.SKIPPED, errors=[error_item]
)

return conv_res
Expand Down
2 changes: 1 addition & 1 deletion tests/test_invalid_input.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,7 @@ def test_convert_unsupported_doc_format_wout_exception(converter: DocumentConver
result = converter.convert(
DocumentStream(name="input.xyz", stream=BytesIO(b"xyz")), raises_on_error=False
)
assert result.status == ConversionStatus.UNSUPPORTED
assert result.status == ConversionStatus.SKIPPED


def test_convert_unsupported_doc_format_with_exception(converter: DocumentConverter):
Expand Down

0 comments on commit 8e57c85

Please sign in to comment.