From 77f0b9fa849d217c9006a59240e1600264988806 Mon Sep 17 00:00:00 2001 From: Jake Poznanski Date: Fri, 18 Oct 2024 22:39:25 +0000 Subject: [PATCH] help text --- pdelfin/birrpipeline.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/pdelfin/birrpipeline.py b/pdelfin/birrpipeline.py index cc34ceb..d647ff5 100644 --- a/pdelfin/birrpipeline.py +++ b/pdelfin/birrpipeline.py @@ -550,8 +550,8 @@ def get_current_round(s3_workspace: str) -> int: parser = argparse.ArgumentParser(description='Manager for running millions of PDFs through a batch inference pipeline') parser.add_argument('workspace', help='The S3 path where work will be done e.g., s3://bucket/prefix/)') parser.add_argument('--add_pdfs', help='Path to add pdfs stored in s3 to the workspace, can be a glob path s3://bucket/prefix/*.pdf or path to file containing list of pdf paths', default=None) - parser.add_argument('--target_longest_image_dim', type=int, help='Dimension to use for rendering image', default=1024) - parser.add_argument('--target_anchor_text_len', type=int, help='Maximum amount of anchor text to use', default=6000) + parser.add_argument('--target_longest_image_dim', type=int, help='Dimension on longest side to use for rendering the pdf pages', default=1024) + parser.add_argument('--target_anchor_text_len', type=int, help='Maximum amount of anchor text to use (characters)', default=6000) parser.add_argument('--workspace_profile', help='S3 configuration profile for accessing the workspace', default=None) parser.add_argument('--pdf_profile', help='S3 configuration profile for accessing the raw pdf documents', default=None) parser.add_argument('--max_size_mb', type=int, default=250, help='Max file size in MB')