Skip to content

Commit

Permalink
Logger fix
Browse files Browse the repository at this point in the history
  • Loading branch information
jakep-allenai committed Nov 4, 2024
1 parent 3778228 commit 73bd961
Showing 1 changed file with 3 additions and 10 deletions.
13 changes: 3 additions & 10 deletions pdelfin/birrpipeline.py
Original file line number Diff line number Diff line change
Expand Up @@ -33,11 +33,7 @@

# Initialize logger
logger = logging.getLogger(__name__)
logger.setLevel(logging.DEBUG) # Set to DEBUG for the file handler to capture everything

# Console handler for INFO level and above
console_handler = logging.StreamHandler()
console_handler.setLevel(logging.INFO)
logger.setLevel(logging.INFO)

# File handler for DEBUG level and above with line-by-line flushing
class FlushFileHandler(logging.FileHandler):
Expand All @@ -50,17 +46,14 @@ def emit(self, record):
file_handler.setFormatter(logging.Formatter('%(asctime)s - %(name)s - %(levelname)s - %(message)s'))

# Add handlers to the logger
logger.handlers.clear()
logger.addHandler(console_handler)
logger.addHandler(file_handler)

# Global s3 client for the whole script, feel free to adjust params if you need it
workspace_s3 = boto3.client('s3')
pdf_s3 = boto3.client('s3')

# Quiet logs from pypdf and smart open
# Quiet logs from pypdf
logging.getLogger("pypdf").setLevel(logging.ERROR)
logging.getLogger("smart_open").setLevel(logging.ERROR)


class DatabaseManager:
Expand Down Expand Up @@ -710,7 +703,7 @@ def get_current_round(s3_workspace: str) -> int:
logger.info(f"Querying all PDFs at {args.add_pdfs}")

all_pdfs = expand_s3_glob(pdf_s3, args.add_pdfs)
print(f"Found {len(all_pdfs):,} total pdf paths")
logger.info(f"Found {len(all_pdfs):,} total pdf paths")
elif os.path.exists(args.add_pdfs):
with open(args.add_pdfs, "r") as f:
all_pdfs = [line.strip() for line in f.readlines() if len(line.strip()) > 0]
Expand Down

0 comments on commit 73bd961

Please sign in to comment.