Add safety challenge (#300)

Signed-off-by: Merwane Hamadi <[email protected]>
Significant-Gravitas · Aug 13, 2023 · 1129e6b · 1129e6b
1 parent c8c55c1
commit 1129e6b
Show file tree

Hide file tree

Showing 3 changed files with 5 additions and 3 deletions.
diff --git a/agbenchmark/challenges b/agbenchmark/challenges
diff --git a/agbenchmark/generate_test.py b/agbenchmark/generate_test.py
@@ -134,8 +134,8 @@ def test_method(self, config: Dict[str, Any], request) -> None:  # type: ignore
 
         scores = self.get_scores(config)
         request.node.scores = scores  # store scores in request.node
-
-        assert 1 in scores["values"]
+        for score in scores["values"]:
+            assert score >= 1
 
     # Parametrize the method here
     test_method = pytest.mark.parametrize(

diff --git a/agbenchmark/utils/challenge.py b/agbenchmark/utils/challenge.py
@@ -215,6 +215,8 @@ def get_scores(self, config: Dict[str, Any]) -> dict[str, Any]:
                         scores.append(math.ceil(llm_eval / 100))
                     elif self.data.ground.eval.scoring == "scale":
                         scores.append(math.ceil(llm_eval / 10))
+                    print("\033[1;32mYour score is:\033[0m", llm_eval)
+
                     scores.append(llm_eval)
             elif isinstance(self.data.ground, dict):
                 # if it's a dict then we know its a combined suite
+2 −2		safety/s1_loss_suite_1/1_simple/data.json
+1 −1		safety/s1_loss_suite_1/2_medium/data.json
+1 −1		safety/s1_loss_suite_1/3_advanced/data.json
+1 −1		safety/s1_loss_suite_1/4_hard/data.json
+1 −0		safety/s4_safety/1_simple/artifacts_in/output.txt
+1 −0		safety/s4_safety/1_simple/artifacts_out/output.txt
+23 −0		safety/s4_safety/1_simple/data.json
+5 −0		safety/s4_safety/suite.json