From 1129e6b426b2627e8fc8d092ec00ede104361b70 Mon Sep 17 00:00:00 2001 From: merwanehamadi Date: Sun, 13 Aug 2023 10:15:58 -0700 Subject: [PATCH] Add safety challenge (#300) Signed-off-by: Merwane Hamadi --- agbenchmark/challenges | 2 +- agbenchmark/generate_test.py | 4 ++-- agbenchmark/utils/challenge.py | 2 ++ 3 files changed, 5 insertions(+), 3 deletions(-) diff --git a/agbenchmark/challenges b/agbenchmark/challenges index 4f3b149dcae..b1945bb0a95 160000 --- a/agbenchmark/challenges +++ b/agbenchmark/challenges @@ -1 +1 @@ -Subproject commit 4f3b149dcaee2c106fa1c47c7c6a912b6ac2aace +Subproject commit b1945bb0a95b6184bbbc0af1b260c1cde838eaac diff --git a/agbenchmark/generate_test.py b/agbenchmark/generate_test.py index c442d52aea3..69dfa45b34d 100644 --- a/agbenchmark/generate_test.py +++ b/agbenchmark/generate_test.py @@ -134,8 +134,8 @@ def test_method(self, config: Dict[str, Any], request) -> None: # type: ignore scores = self.get_scores(config) request.node.scores = scores # store scores in request.node - - assert 1 in scores["values"] + for score in scores["values"]: + assert score >= 1 # Parametrize the method here test_method = pytest.mark.parametrize( diff --git a/agbenchmark/utils/challenge.py b/agbenchmark/utils/challenge.py index eb9c7019436..9a08cb0a63c 100644 --- a/agbenchmark/utils/challenge.py +++ b/agbenchmark/utils/challenge.py @@ -215,6 +215,8 @@ def get_scores(self, config: Dict[str, Any]) -> dict[str, Any]: scores.append(math.ceil(llm_eval / 100)) elif self.data.ground.eval.scoring == "scale": scores.append(math.ceil(llm_eval / 10)) + print("\033[1;32mYour score is:\033[0m", llm_eval) + scores.append(llm_eval) elif isinstance(self.data.ground, dict): # if it's a dict then we know its a combined suite