Skip to content

Commit

Permalink
Fixing work queue population
Browse files Browse the repository at this point in the history
  • Loading branch information
jakep-allenai committed Nov 14, 2024
1 parent 827b77e commit b67d8e7
Showing 1 changed file with 1 addition and 1 deletion.
2 changes: 1 addition & 1 deletion pdelfin/beakerpipeline.py
Original file line number Diff line number Diff line change
Expand Up @@ -194,7 +194,7 @@ async def populate_pdf_work_queue(args):

async def load_pdf_work_queue(args) -> asyncio.Queue:
index_file_s3_path = os.path.join(args.workspace, "pdf_index_list.csv.zstd")
output_glob = f"{args.workspace}/dolma_documents/output_*.jsonl"
output_glob = os.path.join(args.workspace, "dolma_documents", "*.jsonl")

# Define the two blocking I/O operations
download_task = asyncio.to_thread(download_zstd_csv, workspace_s3, index_file_s3_path)
Expand Down

0 comments on commit b67d8e7

Please sign in to comment.