From 1129e6b426b2627e8fc8d092ec00ede104361b70 Mon Sep 17 00:00:00 2001
From: merwanehamadi <merwanehamadi@gmail.com>
Date: Sun, 13 Aug 2023 10:15:58 -0700
Subject: [PATCH] Add safety challenge (#300)

Signed-off-by: Merwane Hamadi <merwanehamadi@gmail.com>
---
 agbenchmark/challenges         | 2 +-
 agbenchmark/generate_test.py   | 4 ++--
 agbenchmark/utils/challenge.py | 2 ++
 3 files changed, 5 insertions(+), 3 deletions(-)

diff --git a/agbenchmark/challenges b/agbenchmark/challenges
index 4f3b149dcae..b1945bb0a95 160000
--- a/agbenchmark/challenges
+++ b/agbenchmark/challenges
@@ -1 +1 @@
-Subproject commit 4f3b149dcaee2c106fa1c47c7c6a912b6ac2aace
+Subproject commit b1945bb0a95b6184bbbc0af1b260c1cde838eaac
diff --git a/agbenchmark/generate_test.py b/agbenchmark/generate_test.py
index c442d52aea3..69dfa45b34d 100644
--- a/agbenchmark/generate_test.py
+++ b/agbenchmark/generate_test.py
@@ -134,8 +134,8 @@ def test_method(self, config: Dict[str, Any], request) -> None:  # type: ignore
 
         scores = self.get_scores(config)
         request.node.scores = scores  # store scores in request.node
-
-        assert 1 in scores["values"]
+        for score in scores["values"]:
+            assert score >= 1
 
     # Parametrize the method here
     test_method = pytest.mark.parametrize(
diff --git a/agbenchmark/utils/challenge.py b/agbenchmark/utils/challenge.py
index eb9c7019436..9a08cb0a63c 100644
--- a/agbenchmark/utils/challenge.py
+++ b/agbenchmark/utils/challenge.py
@@ -215,6 +215,8 @@ def get_scores(self, config: Dict[str, Any]) -> dict[str, Any]:
                         scores.append(math.ceil(llm_eval / 100))
                     elif self.data.ground.eval.scoring == "scale":
                         scores.append(math.ceil(llm_eval / 10))
+                    print("\033[1;32mYour score is:\033[0m", llm_eval)
+
                     scores.append(llm_eval)
             elif isinstance(self.data.ground, dict):
                 # if it's a dict then we know its a combined suite