Skip to content
This repository has been archived by the owner on Jun 9, 2024. It is now read-only.

Commit

Permalink
Add optional categories
Browse files Browse the repository at this point in the history
  • Loading branch information
waynehamadi committed Aug 7, 2023
1 parent f157f46 commit 5ed39d4
Show file tree
Hide file tree
Showing 5 changed files with 32 additions and 2 deletions.
2 changes: 1 addition & 1 deletion agbenchmark/challenges
1 change: 1 addition & 0 deletions agbenchmark/generate_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -97,6 +97,7 @@ def create_single_test(

# Define test method within the dynamically created class
def test_method(self, config: Dict[str, Any], request) -> None: # type: ignore
self.skip_optional_categories(config)
from helicone.lock import HeliconeLockManager

if os.environ.get("HELICONE_API_KEY"):
Expand Down
5 changes: 5 additions & 0 deletions agbenchmark/start_benchmark.py
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,11 @@
) = calculate_dynamic_paths()
BENCHMARK_GIT_COMMIT_SHA = get_git_commit_sha(HOME_DIRECTORY / ".." / "..")
AGENT_GIT_COMMIT_SHA = get_git_commit_sha(HOME_DIRECTORY)
# open a file in the challenges/optional_categories
with open(
Path(__file__).resolve().parent / "challenges" / "optional_categories.json"
) as f:
OPTIONAL_CATEGORIES = json.load(f)["optional_categories"]


@click.group()
Expand Down
15 changes: 15 additions & 0 deletions agbenchmark/utils/challenge.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,15 +7,18 @@
from typing import Any, Dict, List

import openai
import pytest

from agbenchmark.agent_interface import MOCK_FLAG
from agbenchmark.start_benchmark import OPTIONAL_CATEGORIES
from agbenchmark.utils.data_types import ChallengeData, Ground
from agbenchmark.utils.prompts import (
END_PROMPT,
FEW_SHOT_EXAMPLES,
PROMPT_MAP,
SCORING_MAP,
)
from agbenchmark.utils.utils import agent_eligibible_for_optional_categories


class Challenge(ABC):
Expand Down Expand Up @@ -262,3 +265,15 @@ def get_dummy_scores(self, test_name: str, scores: dict[str, Any]) -> int | None
return 1

return None

def skip_optional_categories(self, config: Dict[str, Any]) -> None:
challenge_category = self.data.category
categories = [
category
for category in OPTIONAL_CATEGORIES
if category in challenge_category
]
if not agent_eligibible_for_optional_categories(
categories, config.get("category", [])
):
pytest.skip("Agent is not eligible for this category")
11 changes: 10 additions & 1 deletion agbenchmark/utils/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@
import sys
from datetime import datetime
from pathlib import Path
from typing import Any, Optional
from typing import Any, List, Optional

import git
from dotenv import load_dotenv
Expand Down Expand Up @@ -285,3 +285,12 @@ def get_git_commit_sha(directory: Path) -> Optional[str]:
except Exception:
print(f"{directory} is not a git repository!")
return None


def agent_eligibible_for_optional_categories(
optional_challenge_categories: List, agent_categories: List
) -> bool:
for element in optional_challenge_categories:
if element not in agent_categories:
return False
return True

0 comments on commit 5ed39d4

Please sign in to comment.