Open ai to openai comparison now supported, new prompts

allenai · Sep 30, 2024 · 80bb0cb · 80bb0cb
1 parent e179453
commit 80bb0cb
Show file tree

Hide file tree

Showing 2 changed files with 11 additions and 7 deletions.
diff --git a/pdelfin/eval/runeval.py b/pdelfin/eval/runeval.py
@@ -153,11 +153,15 @@ def process_jsonl_file(jsonl_file, gold_data, comparer):
             if "completion_error" in data and len(data["completion_error"]) > 0:
                 continue
 
-            # You need to consider the case when no input is provided to the refiner, it will hallucinate
-            # So in that case we say there is no eval text
             if "text" in data and len(data["text"].strip()) == 0:
+                # You need to consider the case when no input is provided to the refiner, it will hallucinate
+                # So in that case we say there is no eval text
                 eval_text = ""
+            elif "response" in data:
+                # This is the case of loading openai generated data as eval
+                eval_text = data["response"]["body"]["choices"][0]["message"]["content"]
             else:
+                # This is the normal case of loading birr generated data
                 eval_text = data["outputs"][0]["text"]
 
             # If the eval text or gold text is empty, we skip this page and don't use it for comparison
@@ -231,8 +235,8 @@ def do_eval(gold_data_path: str, eval_data_path: str, ) -> tuple[float, list[dic
 
             # Generate the eval data
             for pd_key, pd in page_data.items():
-                if pd["alignment"] > 0.97:
-                    continue
+                # if pd["alignment"] > 0.97:
+                #     continue
 
                 if len(pd["gold_text"]) < 200 and len(pd["eval_text"]) < 200:
                     continue
@@ -257,5 +261,5 @@ def do_eval(gold_data_path: str, eval_data_path: str, ) -> tuple[float, list[dic
 
 
 if __name__ == "__main__":
-    result = do_eval(gold_data_path="s3://ai2-oe-data/jakep/openai_batch_done_eval_mini",
-                     eval_data_path="s3://ai2-oe-data/jakep/qwen2vl/Qwen_Qwen2-VL-2B-Instruct-4c8e4c-01J8N1D42YV9F20AHFE6D3WK21/")
+    result = do_eval(gold_data_path="s3://ai2-oe-data/jakep/pdfdata/openai_batch_done_v3_eval/",
+                     eval_data_path="s3://ai2-oe-data/jakep/pdfdata/openai_batch_done_v3_eval/")
diff --git a/pdelfin/prompts/prompts.py b/pdelfin/prompts/prompts.py
@@ -5,7 +5,7 @@ def build_openai_silver_data_prompt(base_text: str) -> str:
         f"Just return the plain text representation of this document as if you were reading it naturally.\n"
         f"Turn equations into a LaTeX representation, and tables into markdown format. Remove the headers and footers, but keep references and footnotes.\n"
         f"Read any natural handwriting.\n"
-        f"This is likely one page out of several in the document, so be sure to preserve any sentences that come from the previous page, or continue onto the next page, exactly as they are.\n"
+        f"Strive to output the text as it appears on the page, without making any corrections\n"
         f"If there is no text at all that you think you should read, just output [NO TEXT].\n"
         f"If the page has no English text on it at all, just output [NO ENGLISH TEXT].\n"
         f"Do not hallucinate.\n"