Skip to content

Commit

Permalink
Fix
Browse files Browse the repository at this point in the history
  • Loading branch information
jakep-allenai committed Oct 4, 2024
1 parent f25cb6c commit db36608
Showing 1 changed file with 12 additions and 1 deletion.
13 changes: 12 additions & 1 deletion pdelfin/silver_data/sendsilver2.py
Original file line number Diff line number Diff line change
Expand Up @@ -100,7 +100,7 @@ def get_state(folder_path: str) -> dict:
} for f in jsonl_files}

with open(state_file, "w") as f:
return json.dump(state, f)
json.dump(state, f)

return state

Expand Down Expand Up @@ -129,7 +129,15 @@ def get_next_work_item(folder_path):

return all_states[0] if len(all_states) > 0 else None

def get_done_total(folder_path):
done, total = 0,0

for state in get_state(folder_path).values():
if state["state"] in FINISHED_STATES:
done += 1
total += 1

return done, total

# Main function to process all .jsonl files in a folder
def process_folder(folder_path: str, max_gb: int):
Expand All @@ -142,6 +150,9 @@ def process_folder(folder_path: str, max_gb: int):
raise ValueError(f"Insufficient free space in OpenAI's file storage: Only {starting_free_space} GB left, but 2x{max_gb} GB are required (1x for your uploads, 1x for your results).")

while not all(state["state"] in FINISHED_STATES for state in get_state(folder_path).values()):
done, total = get_done_total(folder_path)
print(f"Total items {total}, done {done}, {done/total*100:.1f}%")

work_item = get_next_work_item(folder_path)
print(f"Processing {os.path.basename(work_item['filename'])}, cur status = {work_item['state']}")

Expand Down

0 comments on commit db36608

Please sign in to comment.