Skip to content

Commit

Permalink
Add check for poppler installation
Browse files Browse the repository at this point in the history
  • Loading branch information
jakep-allenai committed Nov 1, 2024
1 parent be8fb28 commit 9f010e6
Show file tree
Hide file tree
Showing 2 changed files with 21 additions and 1 deletion.
4 changes: 3 additions & 1 deletion pdelfin/birrpipeline.py
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,7 @@
from pdelfin.prompts import build_finetuning_prompt, PageResponse
from pdelfin.prompts.anchor import get_anchor_text
from pdelfin.s3_utils import parse_custom_id, expand_s3_glob, get_s3_bytes, parse_s3_path

from pdelfin.check import check_poppler_version

# Initialize logger
logger = logging.getLogger(__name__)
Expand Down Expand Up @@ -699,6 +699,8 @@ def get_current_round(s3_workspace: str) -> int:
current_round = get_current_round(args.workspace)
logger.info(f"Current round is {current_round}")

check_poppler_version()

# One shared executor to rule them all
executor = ProcessPoolExecutor(max_workers=args.workers)

Expand Down
18 changes: 18 additions & 0 deletions pdelfin/check.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,18 @@
import sys
import subprocess
import logging

logger = logging.getLogger(__name__)

def check_poppler_version():
try:
result = subprocess.run(['pdftoppm', '-h'], stdout=subprocess.PIPE, stderr=subprocess.PIPE, text=True)
if result.returncode == 0 and "pdftoppm" in result.stdout:
logger.info("pdftoppm is installed and working.")
else:
logger.error("pdftoppm is installed but returned an error.")
sys.exit(1)
except FileNotFoundError:
logger.error("pdftoppm is not installed.")
logger.error("Check the README in the https://github.com/allenai/pdelfin/blob/main/README.md for installation instructions")
sys.exit(1)

0 comments on commit 9f010e6

Please sign in to comment.