From 5a225a87059088c81e89047314e64f5a67236197 Mon Sep 17 00:00:00 2001 From: Gnurro Date: Thu, 2 Jan 2025 23:02:34 +0100 Subject: [PATCH] Starting on GameSpec and CLI enhancement; Add GameSpec.unify() method; Add custom game registry with three entries for the wordle variants for testing; Change clemgame.select_game() to return list of GameSpecs; Add unifying GameSpec check to clemgame.select_game(); Change clemcore.run() to iterate over list of GameSpecs; Change cli.py run -i default argument to None; Change GameBenchmark.setup() method to use CLI instances_name if given, but use GameSpec instances value otherwise; Add explicit handling of underspecified GameSpecs; Add handling of benchmark version lists in JSON GameSpec in CLI input - currently not compatible with underspecified GameSpec unification --- clemcore/__init__.py | 34 +++--- clemcore/backends/openai_compatible_api.py | 2 +- clemcore/clemgame/__init__.py | 115 ++++++++++++++++---- clemcore/clemgame/game_registry_custom.json | 34 ++++++ clemcore/cli.py | 2 +- 5 files changed, 148 insertions(+), 39 deletions(-) create mode 100644 clemcore/clemgame/game_registry_custom.json diff --git a/clemcore/__init__.py b/clemcore/__init__.py index 51c120e812..be402e2bb9 100644 --- a/clemcore/__init__.py +++ b/clemcore/__init__.py @@ -1,15 +1,17 @@ """ Main entry point """ import textwrap -from typing import List, Dict +from typing import List, Dict, Union import os.path import logging import logging.config import yaml from datetime import datetime +import json import clemcore.backends as backends import clemcore.clemgame as clemgame import clemcore.utils.file_utils as file_utils +from clemcore.clemgame import GameSpec BANNER = \ r""" @@ -59,11 +61,11 @@ def list_games(): print(game_name, wrapper.fill(game["description"])) -def run(game_name: str, model_specs: List[backends.ModelSpec], gen_args: Dict, +def run(game: Union[str, Dict, GameSpec], model_specs: List[backends.ModelSpec], gen_args: Dict, experiment_name: str = None, instances_name: str = None, results_dir: str = None): """Run specific model/models with a specified clemgame. Args: - game_name: Name of the game, matching the game's name in the game registry. + game: Name of the game, matching the game's name in the game registry, OR GameSpec-like dict, OR GameSpec. model_specs: A list of backends.ModelSpec instances for the player models to run the game with. gen_args: Text generation parameters for the backend; output length and temperature are implemented for the majority of model backends. @@ -78,17 +80,21 @@ def run(game_name: str, model_specs: List[backends.ModelSpec], gen_args: Dict, model.set_gen_args(**gen_args) # todo make this somehow available in generate method? player_models.append(model) - game_spec = clemgame.select_game(game_name) - game = clemgame.load_game(game_spec, instances_name=instances_name) - logger.info(f'Running {game_spec["game_name"]} (models={player_models if player_models is not None else "see experiment configs"})') - stdout_logger.info(f"Running game {game_spec['game_name']}") - if experiment_name: - logger.info("Only running experiment: %s", experiment_name) - game.filter_experiment.append(experiment_name) - time_start = datetime.now() - game.run(player_models=player_models, results_dir=results_dir) - time_end = datetime.now() - logger.info(f'Running {game_spec["game_name"]} took {str(time_end - time_start)}') + game_specs = clemgame.select_game(game) + print("Matched game specs in registry:", " ".join([game_spec.game_name for game_spec in game_specs])) + for game_spec in game_specs: + game_benchmark = clemgame.load_game(game_spec, instances_name=instances_name) + logger.info( + f'Running {game_spec["game_name"]} (models={player_models if player_models is not None else "see experiment configs"})') + stdout_logger.info(f"Running game {game_spec['game_name']}") + if experiment_name: # leaving this as-is for now, needs discussion conclusions + logger.info("Only running experiment: %s", experiment_name) + game_benchmark.filter_experiment.append(experiment_name) + time_start = datetime.now() + game_benchmark.run(player_models=player_models, results_dir=results_dir) + time_end = datetime.now() + logger.info(f'Running {game_spec["game_name"]} took {str(time_end - time_start)}') + except Exception as e: stdout_logger.exception(e) logger.error(e, exc_info=True) diff --git a/clemcore/backends/openai_compatible_api.py b/clemcore/backends/openai_compatible_api.py index b111c415e9..a11af202f6 100644 --- a/clemcore/backends/openai_compatible_api.py +++ b/clemcore/backends/openai_compatible_api.py @@ -52,7 +52,7 @@ def __init__(self, client: openai.OpenAI, model_spec: backends.ModelSpec): super().__init__(model_spec) self.client = client - @retry(tries=3, delay=0, logger=logger) + @retry(tries=3, delay=90, logger=logger) @ensure_messages_format def generate_response(self, messages: List[Dict]) -> Tuple[str, Any, str]: """Request a generated response from the OpenAI-compatible remote API. diff --git a/clemcore/clemgame/__init__.py b/clemcore/clemgame/__init__.py index 4d53dedcfa..e98dab6d1b 100644 --- a/clemcore/clemgame/__init__.py +++ b/clemcore/clemgame/__init__.py @@ -5,13 +5,15 @@ import os.path import sys from datetime import datetime -from typing import List, Dict, Tuple, Any +from re import match +from typing import List, Dict, Tuple, Any, Union from tqdm import tqdm from types import SimpleNamespace import importlib import importlib.util import inspect import logging +import nltk import clemcore.backends as backends import clemcore.utils.file_utils as file_utils @@ -29,16 +31,18 @@ class GameSpec(SimpleNamespace): Holds all necessary information to play game in clembench (see README for list of attributes) """ - def __init__(self, **kwargs): + def __init__(self, allow_underspecified: bool = False, **kwargs): super().__init__(**kwargs) # check for required fields - if "game_name" not in self: - raise KeyError(f"No game name specified in entry {kwargs}") - if "game_path" not in self: - raise KeyError(f"No game path specified in {kwargs}") + if not allow_underspecified: + if "game_name" not in self: + raise KeyError(f"No game name specified in entry {kwargs}") + if "game_path" not in self: + raise KeyError(f"No game path specified in {kwargs}") # make game_path absolute - if not os.path.isabs(self.game_path): - self.game_path = os.path.join(file_utils.project_root(), self.game_path) + if hasattr(self, 'game_path'): + if not os.path.isabs(self.game_path): + self.game_path = os.path.join(file_utils.project_root(), self.game_path) def __repr__(self): """Returns string representation of this GameSpec.""" @@ -68,7 +72,7 @@ def __contains__(self, attribute): return hasattr(self, attribute) @classmethod - def from_dict(cls, spec: Dict): + def from_dict(cls, spec: Dict, allow_underspecified: bool = False): """Initialize a GameSpec from a dictionary. Can be used to directly create a GameSpec from a game registry entry. Args: @@ -76,7 +80,7 @@ def from_dict(cls, spec: Dict): Returns: A GameSpec instance with the data specified by the passed dict. """ - return cls(**spec) + return cls(allow_underspecified, **spec) def matches(self, spec: Dict): """Check if the game features match a given specification. @@ -115,6 +119,21 @@ def game_file_exists(self): """ return True if os.path.isfile(self.get_game_file()) else False + def unify(self, other: "GameSpec") -> "GameSpec": + """Unify two GameSpec instances. + Args: + other: The other GameSpec instance this instance is to be unified with. + Returns: + The GameSpec unification of this GameSpec instance and the passed GameSpec instance. + Raises: + ValueError: A ValueError exception is raised if the passed GameSpec instance does not unify with this + GameSpec instance. + """ + result = nltk.featstruct.unify(self.__dict__, other.__dict__) + if result is None: + raise ValueError(f"{self} does not unify with {other}") + return GameSpec(**result) + def load_custom_game_registry(_game_registry_path: str = None, is_optional=True): """Load a custom game registry. @@ -157,25 +176,69 @@ def load_game_registry(_game_registry_path: str = None, is_mandatory=True): game_registry.append(_game_spec) -def select_game(game_name: str) -> GameSpec: - """Select a GameSpec from the game registry by game name. +def select_game(game: Union[str, Dict, GameSpec]) -> List[GameSpec]: + """Select a list of GameSpecs from the game registry by unifying game spec dict or game name. Args: - game_name: String name of the selected game. + game: String name of the selected game. Returns: A GameSpec instance from the game registry corresponding to the passed game_name. Raises: ValueError: No game specification matching the passed game_name was found in the game registry. """ - # return first entry that matches game_name - for game in game_registry: - if game["game_name"] == game_name: - if game.game_file_exists(): - return game + # check if passed game is parseable JSON: + game_is_dict = False + try: + game = game.replace("'", '"') + game = json.loads(game) + game_is_dict = True + except Exception: + print(f"Passed game {game} does not parse as JSON!") + pass + + # convert passed JSON to GameSpec for unification: + game_is_gamespec = False + if game_is_dict: + game = GameSpec.from_dict(game, allow_underspecified=True) + game_is_gamespec = True + elif type(game) == GameSpec: + game_is_gamespec = True + + if game_is_gamespec: + matching_registered_games: list = list() + # iterate over game registry: + for registered_game_spec in game_registry: + + if hasattr(game, 'benchmark'): + # passed game spec specifies benchmark version + for benchmark_version in game.benchmark: + if benchmark_version in registered_game_spec.benchmark: + if registered_game_spec.game_file_exists(): + matching_registered_games.append(registered_game_spec) + else: - raise ValueError(f"Game master file master.py not found in {game['game_path']}." - f"Update clemcore/clemgame/game_registry.json (or game_registry_custom.json) with the right path for {game_name}.") - raise ValueError(f"No games found matching the given specification '{game_name}'. " - "Make sure the game name matches the name in clemcore/clemgame/game_registry.json (or game_registry_custom.json)") + # get unifying entries: + unifying_game_spec = None + try: + unifying_game_spec = game.unify(registered_game_spec) + if unifying_game_spec.game_file_exists(): + # print(f"Found unifying game registry entry: {unifying_game_spec}") + matching_registered_games.append(unifying_game_spec) + except ValueError: + continue + + return matching_registered_games + else: + # return first entry that matches game_name + for registered_game_spec in game_registry: + if registered_game_spec["game_name"] == game: + if registered_game_spec.game_file_exists(): + return [registered_game_spec] + else: + raise ValueError(f"Game master file master.py not found in {registered_game_spec['game_path']}." + f"Update clemcore/clemgame/game_registry.json (or game_registry_custom.json) with the right path for {registered_game_spec}.") + raise ValueError(f"No games found matching the given specification '{registered_game_spec}'. " + "Make sure the game name matches the name in clemcore/clemgame/game_registry.json (or game_registry_custom.json)") + # extension to select subset of games # (postponed because it introduces more complexity # on things like how to specify specific episodes (which could, however be integrated into the game spec @@ -1045,6 +1108,7 @@ def __init__(self, game_spec: GameSpec): game_spec: The name of the game (as specified in game_registry) """ super().__init__(game_spec["game_name"], game_spec["game_path"]) + self.game_spec = game_spec self.instances = None self.filter_experiment: List[str] = [] self.is_single_player = True if game_spec["players"] == "one" else False @@ -1055,7 +1119,12 @@ def setup(self, instances_name: str = None): game_path: Path to the game directory. instances_name: Name of the instances JSON file to be used for the benchmark run. """ - self.instances = self.load_instances(instances_name) + if instances_name: + self.instances = self.load_instances(instances_name) + elif hasattr(self.game_spec, 'instances'): + self.instances = self.load_instances(self.game_spec.instances) + else: + self.instances = self.load_instances("instances") # fallback to instances.json default def build_transcripts(self, results_dir: str): """Create and store readable HTML and LaTeX episode transcripts. diff --git a/clemcore/clemgame/game_registry_custom.json b/clemcore/clemgame/game_registry_custom.json new file mode 100644 index 0000000000..15d356cbe1 --- /dev/null +++ b/clemcore/clemgame/game_registry_custom.json @@ -0,0 +1,34 @@ +[ + { + "game_name": "wordle", + "game_path": "../clemgames/wordle", + "description": "Wordle 5-letter word guessing game.", + "main_game": "wordle", + "players": "one", + "image": "none", + "languages": ["en"], + "benchmark": ["2.0"] + }, + { + "game_name": "wordle_withclue", + "game_path": "../clemgames/wordle", + "description": "Wordle 5-letter word guessing game with clue giver.", + "main_game": "wordle", + "instances": "instances_withclue", + "players": "two", + "image": "none", + "languages": ["en"], + "benchmark": ["2.0"] + }, + { + "game_name": "wordle_withcritic", + "game_path": "../clemgames/wordle", + "description": "Wordle 5-letter word guessing game with critic.", + "main_game": "wordle", + "instances": "instances_withcritic", + "players": "two", + "image": "none", + "languages": ["en"], + "benchmark": ["2.0"] + } +] \ No newline at end of file diff --git a/clemcore/cli.py b/clemcore/cli.py index 2ec3814cac..9ed2588c80 100644 --- a/clemcore/cli.py +++ b/clemcore/cli.py @@ -128,7 +128,7 @@ def main(args: argparse.Namespace): help="Specify the maximum number of tokens to be generated per turn (except for cohere). " "Be careful with high values which might lead to exceed your API token limits." "Default: 100.") - run_parser.add_argument("-i", "--instances_name", type=str, default="instances", + run_parser.add_argument("-i", "--instances_name", type=str, default=None, help="The instances file name (.json suffix will be added automatically.") run_parser.add_argument("-r", "--results_dir", type=str, default="results", help="A relative or absolute path to the results root directory. "