diff --git a/libs/unstructured/example_docs/fake-email-attachment.eml b/libs/unstructured/example_docs/fake-email-attachment.eml new file mode 100644 index 0000000..5d8b036 --- /dev/null +++ b/libs/unstructured/example_docs/fake-email-attachment.eml @@ -0,0 +1,50 @@ +MIME-Version: 1.0 +Date: Fri, 23 Dec 2022 12:08:48 -0600 +Message-ID: +Subject: Fake email with attachment +From: Mallori Harrell +To: Mallori Harrell +Content-Type: multipart/mixed; boundary="0000000000005d654405f082adb7" + +--0000000000005d654405f082adb7 +Content-Type: multipart/alternative; boundary="0000000000005d654205f082adb5" + +--0000000000005d654205f082adb5 +Content-Type: text/plain; charset="UTF-8" + +Hello! + +Here's the attachments! + +It includes: + + - Lots of whitespace + - Little to no content + - and is a quick read + +Best, + +Mallori + +--0000000000005d654205f082adb5 +Content-Type: text/html; charset="UTF-8" +Content-Transfer-Encoding: quoted-printable + +
Hello!=C2=A0

Here's the attachments= +!

It includes:
  • Lots of whitespace
  • Little=C2= +=A0to no content
  • and is a quick read
Best,

Mallori

+ +--0000000000005d654205f082adb5-- +--0000000000005d654405f082adb7 +Content-Type: text/plain; charset="US-ASCII"; name="fake-attachment.txt" +Content-Disposition: attachment; filename="fake-attachment.txt" +Content-Transfer-Encoding: base64 +X-Attachment-Id: f_lc0tto5j0 +Content-ID: + +SGV5IHRoaXMgaXMgYSBmYWtlIGF0dGFjaG1lbnQh +--0000000000005d654405f082adb7-- \ No newline at end of file diff --git a/libs/unstructured/example_docs/layout-parser-paper.pdf b/libs/unstructured/example_docs/layout-parser-paper.pdf new file mode 100644 index 0000000..c4b6c2e Binary files /dev/null and b/libs/unstructured/example_docs/layout-parser-paper.pdf differ diff --git a/libs/unstructured/tests/integration_tests/test_document_loaders.py b/libs/unstructured/tests/integration_tests/test_document_loaders.py index 3b1824f..afe9976 100644 --- a/libs/unstructured/tests/integration_tests/test_document_loaders.py +++ b/libs/unstructured/tests/integration_tests/test_document_loaders.py @@ -8,8 +8,8 @@ from langchain_unstructured import UnstructuredLoader EXAMPLE_DOCS_DIRECTORY = str( - Path(__file__).parent.parent.parent.parent.parent - / "community/tests/integration_tests/examples/" + Path(__file__).parent.parent.parent + / "example_docs/" ) UNSTRUCTURED_API_KEY = os.getenv("UNSTRUCTURED_API_KEY")