Skip to content

Commit

Permalink
bugfixes
Browse files Browse the repository at this point in the history
  • Loading branch information
jakep-allenai committed Oct 9, 2024
1 parent c2909f3 commit a90feda
Show file tree
Hide file tree
Showing 3 changed files with 7 additions and 4 deletions.
7 changes: 5 additions & 2 deletions pdelfin/data/runpipeline.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,10 @@
from pdelfin.prompts.anchor import get_anchor_text
from pdelfin.filter import PdfFilter

import logging

logging.getLogger("pypdf").setLevel(logging.ERROR)

pdf_filter = PdfFilter()

def build_page_query(local_pdf_path: str, pretty_pdf_path: str, page: int) -> dict:
Expand Down Expand Up @@ -212,8 +216,7 @@ def main():
cur_file.write("\n")
cur_file_size += request_size

pb.update(1)

pb.update(1)
except Exception as e:
print(f"Error processing a PDF: {str(e)}")

Expand Down
2 changes: 1 addition & 1 deletion pdelfin/eval/evalhtml.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@
from urllib.parse import urlparse
from difflib import SequenceMatcher
from tqdm import tqdm
from pdelfin.silver_data.renderpdf import render_pdf_to_base64png
from pdelfin.data.renderpdf import render_pdf_to_base64png

session = boto3.Session(profile_name='s2')
s3_client = session.client('s3')
Expand Down
2 changes: 1 addition & 1 deletion tests/test_anchor.py
Original file line number Diff line number Diff line change
Expand Up @@ -109,7 +109,7 @@ class BuildSilverTest(unittest.TestCase):
def testSmallPage(self):
local_pdf_path = os.path.join(os.path.dirname(__file__), "gnarly_pdfs", "small_page_size.pdf")

from pdelfin.silver_data.buildsilver import build_page_query
from pdelfin.data.buildsilver import build_page_query

result = build_page_query(local_pdf_path, "s3://test.pdf", 1)

Expand Down

0 comments on commit a90feda

Please sign in to comment.