Skip to content

Commit

Permalink
Adding some gnarly 1 pager pdfs from kyle
Browse files Browse the repository at this point in the history
  • Loading branch information
jakep-allenai committed Feb 11, 2025
1 parent 87cb957 commit 91eef27
Show file tree
Hide file tree
Showing 3 changed files with 18 additions and 0 deletions.
Binary file added tests/gnarly_pdfs/dolma-page-1.pdf
Binary file not shown.
Binary file added tests/gnarly_pdfs/olmo-page-1.pdf
Binary file not shown.
18 changes: 18 additions & 0 deletions tests/test_anchor.py
Original file line number Diff line number Diff line change
Expand Up @@ -163,6 +163,24 @@ def testExcessiveMapAnchor(self):
print(len(anchor_text))
self.assertLess(len(anchor_text), 4000)

def testKyleOnePageAnchors1(self):
local_pdf_path = os.path.join(os.path.dirname(__file__), "gnarly_pdfs", "dolma-page-1.pdf")

anchor_text = get_anchor_text(local_pdf_path, 1, pdf_engine="pdfreport", target_length=6000)

print(anchor_text)
print(len(anchor_text))
self.assertLess(len(anchor_text), 6000)

def testKyleOnePageAnchors2(self):
local_pdf_path = os.path.join(os.path.dirname(__file__), "gnarly_pdfs", "olmo-page-1.pdf")

anchor_text = get_anchor_text(local_pdf_path, 1, pdf_engine="pdfreport", target_length=6000)

print(anchor_text)
print(len(anchor_text))
self.assertLess(len(anchor_text), 6000)


class BuildSilverTest(unittest.TestCase):
def testSmallPage(self):
Expand Down

0 comments on commit 91eef27

Please sign in to comment.