From f4b13487aec1ddccc08bca0e136bd15915241a32 Mon Sep 17 00:00:00 2001 From: Brian Kroth Date: Thu, 21 Mar 2024 20:50:15 +0000 Subject: [PATCH 01/13] Fixups and testing for cli config file parsing --- .../config/schedulers/sync_scheduler.jsonc | 2 +- mlos_bench/mlos_bench/launcher.py | 5 +- .../mlos_bench/schedulers/base_scheduler.py | 10 ++ .../tests/launcher_parse_args_test.py | 107 +++++++++++------- 4 files changed, 84 insertions(+), 40 deletions(-) diff --git a/mlos_bench/mlos_bench/config/schedulers/sync_scheduler.jsonc b/mlos_bench/mlos_bench/config/schedulers/sync_scheduler.jsonc index daf95d56fae..3dc8ff167ea 100644 --- a/mlos_bench/mlos_bench/config/schedulers/sync_scheduler.jsonc +++ b/mlos_bench/mlos_bench/config/schedulers/sync_scheduler.jsonc @@ -6,7 +6,7 @@ "config": { "trial_config_repeat_count": 3, - "max_trials": -1, // Limited only in hte Optimizer logic/config. + "max_trials": -1, // Limited only in the Optimizer logic/config. "teardown": false } } diff --git a/mlos_bench/mlos_bench/launcher.py b/mlos_bench/mlos_bench/launcher.py index a9aa9e3f461..23e256dcbda 100644 --- a/mlos_bench/mlos_bench/launcher.py +++ b/mlos_bench/mlos_bench/launcher.py @@ -95,11 +95,14 @@ def __init__(self, description: str, long_text: str = "", argv: Optional[List[st self._parent_service: Service = LocalExecService(parent=self._config_loader) + args_dict = vars(args) self.global_config = self._load_config( config.get("globals", []) + (args.globals or []), (args.config_path or []) + config.get("config_path", []), args_rest, - {key: val for (key, val) in config.items() if key not in vars(args)}, + # Include any item from the cli config file that either isn't in the cli + # args at all or whose cli arg is missing. + {key: val for (key, val) in config.items() if key not in args_dict or args_dict[key] is None}, ) # experiment_id is generally taken from --globals files, but we also allow overriding it on the CLI. # It's useful to keep it there explicitly mostly for the --help output. diff --git a/mlos_bench/mlos_bench/schedulers/base_scheduler.py b/mlos_bench/mlos_bench/schedulers/base_scheduler.py index 6e3da151e5d..96c08b9bd08 100644 --- a/mlos_bench/mlos_bench/schedulers/base_scheduler.py +++ b/mlos_bench/mlos_bench/schedulers/base_scheduler.py @@ -85,6 +85,16 @@ def __init__(self, *, _LOG.debug("Scheduler instantiated: %s :: %s", self, config) + @property + def trial_config_repeat_count(self) -> int: + """Gets the number of trials to run for a given config.""" + return self._trial_config_repeat_count + + @property + def max_trials(self) -> int: + """Gets the maximum number of trials to run for a given config, or -1 for no limit.""" + return self._max_trials + def __repr__(self) -> str: """ Produce a human-readable version of the Scheduler (mostly for logging). diff --git a/mlos_bench/mlos_bench/tests/launcher_parse_args_test.py b/mlos_bench/mlos_bench/tests/launcher_parse_args_test.py index 90e52bb880b..53b6c955e4f 100644 --- a/mlos_bench/mlos_bench/tests/launcher_parse_args_test.py +++ b/mlos_bench/mlos_bench/tests/launcher_parse_args_test.py @@ -53,12 +53,11 @@ def config_paths() -> List[str]: ] -def test_launcher_args_parse_1(config_paths: List[str]) -> None: - """ - Test that using multiple --globals arguments works and that multiple space - separated options to --config-paths works. - Check $var expansion and Environment loading. - """ +# This is part of the minimal required args by the Launcher. +ENV_CONF_PATH = 'environments/mock/mock_env.jsonc' + + +def _get_launcher(desc: str, cli_args: str) -> Launcher: # The VSCode pytest wrapper actually starts in a different directory before # changing into the code directory, but doesn't update the PWD environment # variable so we use a separate variable. @@ -67,23 +66,65 @@ def test_launcher_args_parse_1(config_paths: List[str]) -> None: if sys.platform == 'win32': # Some env tweaks for platform compatibility. environ['USER'] = environ['USERNAME'] + launcher = Launcher(description=desc, argv=cli_args.split()) + # Check the basic parent service + assert isinstance(launcher.service, SupportsConfigLoading) # built-in + assert isinstance(launcher.service, SupportsLocalExec) # built-in + return launcher + - # This is part of the minimal required args by the Launcher. - env_conf_path = 'environments/mock/mock_env.jsonc' +def test_launcher_args_parse_defaults(config_paths: List[str]) -> None: + """ + Test that we get the defaults we expect when using minimal config arg examples. + """ + cli_args = '--config-paths ' + ' '.join(config_paths) + \ + f' --environment {ENV_CONF_PATH}' + \ + ' --globals globals/global_test_config.jsonc' + launcher = _get_launcher(__name__, cli_args) + # Check that the first --globals file is loaded and $var expansion is handled. + assert launcher.global_config['experiment_id'] == 'MockExperiment' + assert launcher.global_config['testVmName'] == 'MockExperiment-vm' + # Check that secondary expansion also works. + assert launcher.global_config['testVnetName'] == 'MockExperiment-vm-vnet' + # Check that we can expand a $var in a config file that references an environment variable. + assert path_join(launcher.global_config["pathVarWithEnvVarRef"], abs_path=True) \ + == path_join(os.getcwd(), "foo", abs_path=True) + assert launcher.global_config["varWithEnvVarRef"] == f'user:{getuser()}' + assert launcher.teardown # defaults + # Check that the environment that got loaded looks to be of the right type. + env_config = launcher.config_loader.load_config(ENV_CONF_PATH, ConfigSchema.ENVIRONMENT) + assert env_config["class"] == "mlos_bench.environments.mock_env.MockEnv" + assert check_class_name(launcher.environment, env_config['class']) + # Check that the optimizer looks right. + assert isinstance(launcher.optimizer, OneShotOptimizer) + # Check that the optimizer got initialized with defaults. + assert launcher.optimizer.tunable_params.is_defaults() + assert launcher.optimizer.max_iterations == 1 # value for OneShotOptimizer + # Check that we pick up the right scheduler config: + assert isinstance(launcher.scheduler, SyncScheduler) + assert launcher.scheduler.trial_config_repeat_count == 1 # default + assert launcher.scheduler.max_trials == -1 # default + + +def test_launcher_args_parse_1(config_paths: List[str]) -> None: + """ + Test that using multiple --globals arguments works and that multiple space + separated options to --config-paths works. + Check $var expansion and Environment loading. + """ + # Here we have multiple paths following --config-paths and --service. cli_args = '--config-paths ' + ' '.join(config_paths) + \ ' --service services/remote/mock/mock_auth_service.jsonc' + \ - ' --service services/remote/mock/mock_remote_exec_service.jsonc' + \ + ' services/remote/mock/mock_remote_exec_service.jsonc' + \ ' --scheduler schedulers/sync_scheduler.jsonc' + \ - f' --environment {env_conf_path}' + \ + f' --environment {ENV_CONF_PATH}' + \ ' --globals globals/global_test_config.jsonc' + \ ' --globals globals/global_test_extra_config.jsonc' \ ' --test_global_value_2 from-args' - launcher = Launcher(description=__name__, argv=cli_args.split()) - # Check that the parent service - assert isinstance(launcher.service, SupportsAuth) - assert isinstance(launcher.service, SupportsConfigLoading) - assert isinstance(launcher.service, SupportsLocalExec) - assert isinstance(launcher.service, SupportsRemoteExec) + launcher = _get_launcher(__name__, cli_args) + # Check some additional features of the the parent service + assert isinstance(launcher.service, SupportsAuth) # from --service + assert isinstance(launcher.service, SupportsRemoteExec) # from --service # Check that the first --globals file is loaded and $var expansion is handled. assert launcher.global_config['experiment_id'] == 'MockExperiment' assert launcher.global_config['testVmName'] == 'MockExperiment-vm' @@ -99,8 +140,8 @@ def test_launcher_args_parse_1(config_paths: List[str]) -> None: assert launcher.global_config["varWithEnvVarRef"] == f'user:{getuser()}' assert launcher.teardown # Check that the environment that got loaded looks to be of the right type. - env_config = launcher.config_loader.load_config(env_conf_path, ConfigSchema.ENVIRONMENT) - assert check_class_name(launcher.environment, env_config['class']) + env_config = launcher.config_loader.load_config(ENV_CONF_PATH, ConfigSchema.ENVIRONMENT) + assert env_config["class"] == "mlos_bench.environments.mock_env.MockEnv" # Check that the optimizer looks right. assert isinstance(launcher.optimizer, OneShotOptimizer) # Check that the optimizer got initialized with defaults. @@ -108,8 +149,8 @@ def test_launcher_args_parse_1(config_paths: List[str]) -> None: assert launcher.optimizer.max_iterations == 1 # value for OneShotOptimizer # Check that we pick up the right scheduler config: assert isinstance(launcher.scheduler, SyncScheduler) - assert launcher.scheduler._trial_config_repeat_count == 3 # pylint: disable=protected-access - assert launcher.scheduler._max_trials == -1 # pylint: disable=protected-access + assert launcher.scheduler.trial_config_repeat_count == 3 # from the custom sync_scheduler.jsonc config + assert launcher.scheduler.max_trials == -1 def test_launcher_args_parse_2(config_paths: List[str]) -> None: @@ -117,17 +158,9 @@ def test_launcher_args_parse_2(config_paths: List[str]) -> None: Test multiple --config-path instances, --config file vs --arg, --var=val overrides, $var templates, option args, --random-init, etc. """ - # The VSCode pytest wrapper actually starts in a different directory before - # changing into the code directory, but doesn't update the PWD environment - # variable so we use a separate variable. - # See global_test_config.jsonc for more details. - environ["CUSTOM_PATH_FROM_ENV"] = os.getcwd() - if sys.platform == 'win32': - # Some env tweaks for platform compatibility. - environ['USER'] = environ['USERNAME'] - config_file = 'cli/test-cli-config.jsonc' globals_file = 'globals/global_test_config.jsonc' + # Here we have multiple --config-path and --service args, each with their own path. cli_args = ' '.join([f"--config-path {config_path}" for config_path in config_paths]) + \ f' --config {config_file}' + \ ' --service services/remote/mock/mock_auth_service.jsonc' + \ @@ -139,13 +172,11 @@ def test_launcher_args_parse_2(config_paths: List[str]) -> None: ' --random-seed 1234' + \ ' --trial-config-repeat-count 5' + \ ' --max_trials 200' - launcher = Launcher(description=__name__, argv=cli_args.split()) - # Check that the parent service - assert isinstance(launcher.service, SupportsAuth) - assert isinstance(launcher.service, SupportsConfigLoading) - assert isinstance(launcher.service, SupportsFileShareOps) - assert isinstance(launcher.service, SupportsLocalExec) - assert isinstance(launcher.service, SupportsRemoteExec) + launcher = _get_launcher(__name__, cli_args) + # Check some additional features of the the parent service + assert isinstance(launcher.service, SupportsAuth) # from --service + assert isinstance(launcher.service, SupportsFileShareOps) # from --config + assert isinstance(launcher.service, SupportsRemoteExec) # from --service # Check that the --globals file is loaded and $var expansion is handled # using the value provided on the CLI. assert launcher.global_config['experiment_id'] == 'MockeryExperiment' @@ -189,8 +220,8 @@ def test_launcher_args_parse_2(config_paths: List[str]) -> None: # Check that CLI parameter overrides JSON config: assert isinstance(launcher.scheduler, SyncScheduler) - assert launcher.scheduler._trial_config_repeat_count == 5 # pylint: disable=protected-access - assert launcher.scheduler._max_trials == 200 # pylint: disable=protected-access + assert launcher.scheduler.trial_config_repeat_count == 5 # from cli args + assert launcher.scheduler.max_trials == 200 # Check that the value from the file is overridden by the CLI arg. assert config['random_seed'] == 42 From a08bf724febc3dc2dc4bb3f122055ffda4180700 Mon Sep 17 00:00:00 2001 From: Brian Kroth Date: Thu, 21 Mar 2024 20:57:50 +0000 Subject: [PATCH 02/13] more tests --- .../tests/config/cli/test-cli-config.jsonc | 2 +- .../tests/launcher_parse_args_test.py | 20 ++++++++++++++++++- 2 files changed, 20 insertions(+), 2 deletions(-) diff --git a/mlos_bench/mlos_bench/tests/config/cli/test-cli-config.jsonc b/mlos_bench/mlos_bench/tests/config/cli/test-cli-config.jsonc index 9ffaa51180e..436507ce840 100644 --- a/mlos_bench/mlos_bench/tests/config/cli/test-cli-config.jsonc +++ b/mlos_bench/mlos_bench/tests/config/cli/test-cli-config.jsonc @@ -17,7 +17,7 @@ "services/remote/mock/mock_fileshare_service.jsonc" ], - "trial_config_repeat_count": 1, + "trial_config_repeat_count": 2, "random_seed": 42, "random_init": true diff --git a/mlos_bench/mlos_bench/tests/launcher_parse_args_test.py b/mlos_bench/mlos_bench/tests/launcher_parse_args_test.py index 53b6c955e4f..455dc4ad061 100644 --- a/mlos_bench/mlos_bench/tests/launcher_parse_args_test.py +++ b/mlos_bench/mlos_bench/tests/launcher_parse_args_test.py @@ -231,5 +231,23 @@ def test_launcher_args_parse_2(config_paths: List[str]) -> None: # assert launcher.optimizer.seed == 1234 +def test_launcher_args_parse_3(config_paths: List[str]) -> None: + """ + Check that cli file values take precedence over other values. + """ + config_file = 'cli/test-cli-config.jsonc' + globals_file = 'globals/global_test_config.jsonc' + # Here we have multiple --config-path and --service args, each with their own path. + cli_args = ' '.join([f"--config-path {config_path}" for config_path in config_paths]) + \ + f' --config {config_file}' + \ + f' --globals {globals_file}' + launcher = _get_launcher(__name__, cli_args) + + # Check that CLI file parameter overrides JSON config: + assert isinstance(launcher.scheduler, SyncScheduler) + # from test-cli-config.jsonc (should override scheduler config file) + assert launcher.scheduler.trial_config_repeat_count == 2 + + if __name__ == '__main__': - pytest.main([__file__, "-n1"]) + pytest.main([__file__, "-n0"]) From 0a22b783fe1e47bb7ded0709c0a28e7252e61fb0 Mon Sep 17 00:00:00 2001 From: Brian Kroth Date: Thu, 21 Mar 2024 21:00:57 +0000 Subject: [PATCH 03/13] comments --- mlos_bench/mlos_bench/tests/launcher_parse_args_test.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/mlos_bench/mlos_bench/tests/launcher_parse_args_test.py b/mlos_bench/mlos_bench/tests/launcher_parse_args_test.py index 455dc4ad061..d5a92fc30f6 100644 --- a/mlos_bench/mlos_bench/tests/launcher_parse_args_test.py +++ b/mlos_bench/mlos_bench/tests/launcher_parse_args_test.py @@ -237,7 +237,8 @@ def test_launcher_args_parse_3(config_paths: List[str]) -> None: """ config_file = 'cli/test-cli-config.jsonc' globals_file = 'globals/global_test_config.jsonc' - # Here we have multiple --config-path and --service args, each with their own path. + # Here we don't override values in test-cli-config with cli args but ensure that + # those take precedence over other config files. cli_args = ' '.join([f"--config-path {config_path}" for config_path in config_paths]) + \ f' --config {config_file}' + \ f' --globals {globals_file}' From c09b427e1c774519624ff83d1f33d030922f4283 Mon Sep 17 00:00:00 2001 From: Brian Kroth Date: Thu, 21 Mar 2024 21:14:31 +0000 Subject: [PATCH 04/13] wip --- mlos_bench/mlos_bench/launcher.py | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/mlos_bench/mlos_bench/launcher.py b/mlos_bench/mlos_bench/launcher.py index 23e256dcbda..81d72b0be0a 100644 --- a/mlos_bench/mlos_bench/launcher.py +++ b/mlos_bench/mlos_bench/launcher.py @@ -102,8 +102,12 @@ def __init__(self, description: str, long_text: str = "", argv: Optional[List[st args_rest, # Include any item from the cli config file that either isn't in the cli # args at all or whose cli arg is missing. - {key: val for (key, val) in config.items() if key not in args_dict or args_dict[key] is None}, + # {key: val for (key, val) in config.items() if key not in args_dict or args_dict[key] is None}, + {key: val for (key, val) in config.items() if key not in args_dict}, ) + # FIXME: Something's changed: + # pytest -n0 mlos_bench/mlos_bench/tests/config/cli/test_load_cli_config_examples.py -k azure-redis-bench + raise ValueError(f"global_config: {self.global_config}\nargs_dict: {args_dict}") # experiment_id is generally taken from --globals files, but we also allow overriding it on the CLI. # It's useful to keep it there explicitly mostly for the --help output. if args.experiment_id: From bcf05f93253df1a9f65f2f0bcb5038ac2d66b428 Mon Sep 17 00:00:00 2001 From: Brian Kroth Date: Mon, 13 May 2024 21:55:37 +0000 Subject: [PATCH 05/13] fixups --- mlos_bench/mlos_bench/launcher.py | 47 +++++++++++++------ .../mlos_bench/optimizers/base_optimizer.py | 11 +++-- .../mlos_bench/schedulers/base_scheduler.py | 17 ++++++- 3 files changed, 55 insertions(+), 20 deletions(-) diff --git a/mlos_bench/mlos_bench/launcher.py b/mlos_bench/mlos_bench/launcher.py index 81d72b0be0a..7b2bf506235 100644 --- a/mlos_bench/mlos_bench/launcher.py +++ b/mlos_bench/mlos_bench/launcher.py @@ -64,7 +64,7 @@ def __init__(self, description: str, long_text: str = "", argv: Optional[List[st """ parser = argparse.ArgumentParser(description=f"{description} : {long_text}", epilog=epilog) - (args, args_rest) = self._parse_args(parser, argv) + (args, path_args, args_rest) = self._parse_args(parser, argv) # Bootstrap config loader: command line takes priority. config_path = args.config_path or [] @@ -95,19 +95,23 @@ def __init__(self, description: str, long_text: str = "", argv: Optional[List[st self._parent_service: Service = LocalExecService(parent=self._config_loader) + # Prepare global_config from a combination of global config files, cli configs, and cli args. args_dict = vars(args) + # teardown (bool) conflicts with Environment configs that use it for shell + # commands (list), so we exclude it from copying over + excluded_cli_args = path_args + ["teardown"] + # Include (almost) any item from the cli config file that either isn't in the cli + # args at all or whose cli arg is missing. + cli_config_args = {key: val for (key, val) in config.items() + if (key not in args_dict or args_dict[key] is None) and key not in excluded_cli_args} + self.global_config = self._load_config( - config.get("globals", []) + (args.globals or []), - (args.config_path or []) + config.get("config_path", []), - args_rest, - # Include any item from the cli config file that either isn't in the cli - # args at all or whose cli arg is missing. - # {key: val for (key, val) in config.items() if key not in args_dict or args_dict[key] is None}, - {key: val for (key, val) in config.items() if key not in args_dict}, + args_globals=config.get("globals", []) + (args.globals or []), + config_path=(args.config_path or []) + config.get("config_path", []), + args_rest=args_rest, + global_config=cli_config_args, ) - # FIXME: Something's changed: - # pytest -n0 mlos_bench/mlos_bench/tests/config/cli/test_load_cli_config_examples.py -k azure-redis-bench - raise ValueError(f"global_config: {self.global_config}\nargs_dict: {args_dict}") + # TODO: Can we generalize these two rules using excluded_cli_args? # experiment_id is generally taken from --globals files, but we also allow overriding it on the CLI. # It's useful to keep it there explicitly mostly for the --help output. if args.experiment_id: @@ -171,10 +175,12 @@ def service(self) -> Service: return self._parent_service @staticmethod - def _parse_args(parser: argparse.ArgumentParser, argv: Optional[List[str]]) -> Tuple[argparse.Namespace, List[str]]: + def _parse_args(parser: argparse.ArgumentParser, + argv: Optional[List[str]]) -> Tuple[argparse.Namespace, List[str], List[str]]: """ Parse the command line arguments. """ + path_args = [] parser.add_argument( '--config', required=False, help='Main JSON5 configuration file. Its keys are the same as the' + @@ -182,10 +188,12 @@ def _parse_args(parser: argparse.ArgumentParser, argv: Optional[List[str]]) -> T '\n' + ' See the `mlos_bench/config/` tree at https://github.com/microsoft/MLOS/ ' + ' for additional config examples for this and other arguments.') + path_args.append('config') parser.add_argument( '--log_file', '--log-file', required=False, help='Path to the log file. Use stdout if omitted.') + path_args.append('log_file') parser.add_argument( '--log_level', '--log-level', required=False, type=str, @@ -196,20 +204,26 @@ def _parse_args(parser: argparse.ArgumentParser, argv: Optional[List[str]]) -> T '--config_path', '--config-path', '--config-paths', '--config_paths', nargs="+", action='extend', required=False, help='One or more locations of JSON config files.') + path_args.append('config_path') + path_args.append('config_paths') parser.add_argument( '--service', '--services', nargs='+', action='extend', required=False, help='Path to JSON file with the configuration of the service(s) for environment(s) to use.') + path_args.append('service') + path_args.append('services') parser.add_argument( '--environment', required=False, help='Path to JSON file with the configuration of the benchmarking environment(s).') + path_args.append('environment') parser.add_argument( '--optimizer', required=False, help='Path to the optimizer configuration file. If omitted, run' + ' a single trial with default (or specified in --tunable_values).') + path_args.append('optimizer') parser.add_argument( '--trial_config_repeat_count', '--trial-config-repeat-count', required=False, type=int, @@ -219,11 +233,13 @@ def _parse_args(parser: argparse.ArgumentParser, argv: Optional[List[str]]) -> T '--scheduler', required=False, help='Path to the scheduler configuration file. By default, use' + ' a single worker synchronous scheduler.') + path_args.append('scheduler') parser.add_argument( '--storage', required=False, help='Path to the storage configuration file.' + ' If omitted, use the ephemeral in-memory SQL storage.') + path_args.append('storage') parser.add_argument( '--random_init', '--random-init', required=False, default=False, @@ -239,11 +255,13 @@ def _parse_args(parser: argparse.ArgumentParser, argv: Optional[List[str]]) -> T help='Path to one or more JSON files that contain values of the tunable' + ' parameters. This can be used for a single trial (when no --optimizer' + ' is specified) or as default values for the first run in optimization.') + path_args.append('tunable_values') parser.add_argument( '--globals', nargs="+", action='extend', required=False, help='Path to one or more JSON files that contain additional' + ' [private] parameters of the benchmarking environment.') + path_args.append('globals') parser.add_argument( '--no_teardown', '--no-teardown', required=False, default=None, @@ -270,7 +288,7 @@ def _parse_args(parser: argparse.ArgumentParser, argv: Optional[List[str]]) -> T argv = sys.argv[1:].copy() (args, args_rest) = parser.parse_known_args(argv) - return (args, args_rest) + return (args, path_args, args_rest) @staticmethod def _try_parse_extra_args(cmdline: Iterable[str]) -> Dict[str, TunableValue]: @@ -303,7 +321,7 @@ def _try_parse_extra_args(cmdline: Iterable[str]) -> Dict[str, TunableValue]: _LOG.debug("Parsed config: %s", config) return config - def _load_config(self, + def _load_config(self, *, args_globals: Iterable[str], config_path: Iterable[str], args_rest: Iterable[str], @@ -404,7 +422,6 @@ def _load_scheduler(self, args_scheduler: Optional[str]) -> Scheduler: config={ "experiment_id": "UNDEFINED - override from global config", "trial_id": 0, - "config_id": -1, "trial_config_repeat_count": 1, "teardown": self.teardown, }, diff --git a/mlos_bench/mlos_bench/optimizers/base_optimizer.py b/mlos_bench/mlos_bench/optimizers/base_optimizer.py index 51cbf9694fa..89ee6c9fd13 100644 --- a/mlos_bench/mlos_bench/optimizers/base_optimizer.py +++ b/mlos_bench/mlos_bench/optimizers/base_optimizer.py @@ -135,20 +135,23 @@ def __exit__(self, ex_type: Optional[Type[BaseException]], @property def current_iteration(self) -> int: """ - The current number of iterations (trials) registered. + The current number of iterations (suggestions) registered. Note: this may or may not be the same as the number of configurations. - See Also: Launcher.trial_config_repeat_count. + See Also: Scheduler.trial_config_repeat_count and Scheduler.max_trials. """ return self._iter + # TODO: finish renaming iterations to suggestions. + # See Also: https://github.com/microsoft/MLOS/pull/713 + @property def max_iterations(self) -> int: """ - The maximum number of iterations (trials) to run. + The maximum number of iterations (suggestions) to run. Note: this may or may not be the same as the number of configurations. - See Also: Launcher.trial_config_repeat_count. + See Also: Scheduler.trial_config_repeat_count and Scheduler.max_trials. """ return self._max_iter diff --git a/mlos_bench/mlos_bench/schedulers/base_scheduler.py b/mlos_bench/mlos_bench/schedulers/base_scheduler.py index 7d510c1e20f..1c974da957d 100644 --- a/mlos_bench/mlos_bench/schedulers/base_scheduler.py +++ b/mlos_bench/mlos_bench/schedulers/base_scheduler.py @@ -17,6 +17,7 @@ from pytz import UTC +from mlos_bench.config.schemas import ConfigSchema from mlos_bench.environments.base_environment import Environment from mlos_bench.optimizers.base_optimizer import Optimizer from mlos_bench.storage.base_storage import Storage @@ -63,6 +64,7 @@ def __init__(self, *, self.global_config = global_config config = merge_parameters(dest=config.copy(), source=global_config, required_keys=["experiment_id", "trial_id"]) + self._validate_json_config(config) self._experiment_id = config["experiment_id"].strip() self._trial_id = int(config["trial_id"]) @@ -85,6 +87,19 @@ def __init__(self, *, _LOG.debug("Scheduler instantiated: %s :: %s", self, config) + def _validate_json_config(self, config: dict) -> None: + """ + Reconstructs a basic json config that this class might have been + instantiated from in order to validate configs provided outside the + file loading mechanism. + """ + json_config: dict = { + "class": self.__class__.__module__ + "." + self.__class__.__name__, + } + if config: + json_config["config"] = config + ConfigSchema.SCHEDULER.validate(json_config) + @property def trial_config_repeat_count(self) -> int: """Gets the number of trials to run for a given config.""" @@ -92,7 +107,7 @@ def trial_config_repeat_count(self) -> int: @property def max_trials(self) -> int: - """Gets the maximum number of trials to run for a given config, or -1 for no limit.""" + """Gets the maximum number of trials to run for a given experiment, or -1 for no limit.""" return self._max_trials def __repr__(self) -> str: From 9023eb73a876a4e714bfdbde8744206c37343a0b Mon Sep 17 00:00:00 2001 From: Brian Kroth Date: Mon, 22 Jul 2024 19:49:12 +0000 Subject: [PATCH 06/13] slurp some files from main --- .editorconfig | 3 + .pylintrc | 52 ----- Makefile | 535 +++++++++++++++++++++++++++++++++++++------------ pyproject.toml | 73 +++++++ setup.cfg | 20 +- 5 files changed, 498 insertions(+), 185 deletions(-) delete mode 100644 .pylintrc create mode 100644 pyproject.toml diff --git a/.editorconfig b/.editorconfig index e984d475952..7e753174de4 100644 --- a/.editorconfig +++ b/.editorconfig @@ -12,6 +12,9 @@ charset = utf-8 # Note: this is not currently supported by all editors or their editorconfig plugins. max_line_length = 132 +[*.py] +max_line_length = 99 + # Makefiles need tab indentation [{Makefile,*.mk}] indent_style = tab diff --git a/.pylintrc b/.pylintrc deleted file mode 100644 index 6b308d1966d..00000000000 --- a/.pylintrc +++ /dev/null @@ -1,52 +0,0 @@ -# vim: set ft=dosini: - -[MAIN] -# Specify a score threshold to be exceeded before program exits with error. -fail-under=9.9 - -# Make sure public methods are documented. -# See Also: https://github.com/PyCQA/pydocstyle/issues/309#issuecomment-1426642147 -# Also fail on unused imports. -fail-on= - missing-function-docstring, - unused-import - -# Ignore pylint complaints about an upstream dependency. -ignored-modules=ConfigSpace.hyperparameters - -# Help inform pylint where to find the project's source code without needing to relyon PYTHONPATH. -#init-hook="from pylint.config import find_pylintrc; import os, sys; sys.path.append(os.path.dirname(find_pylintrc())); from logging import warning; warning(sys.path)" -init-hook="from logging import warning; warning(sys.path)" - -# Load some extra checkers. -load-plugins= - pylint.extensions.bad_builtin, - pylint.extensions.code_style, - pylint.extensions.docparams, - pylint.extensions.docstyle, - pylint.extensions.for_any_all, - pylint.extensions.mccabe, - pylint.extensions.no_self_use, - pylint.extensions.private_import, - pylint.extensions.redefined_loop_name, - pylint.extensions.redefined_variable_type, - pylint.extensions.set_membership, - pylint.extensions.typing - -[FORMAT] -# Maximum number of characters on a single line. -max-line-length=132 - -[MESSAGE CONTROL] -disable= - fixme, - no-else-return, - consider-using-assignment-expr, - deprecated-typing-alias, # disable for now - only deprecated recently - docstring-first-line-empty, - consider-alternative-union-syntax, # disable for now - still supporting python 3.8 - missing-raises-doc - -[STRING] -#check-quote-consistency=yes -check-str-concat-over-line-jumps=yes diff --git a/Makefile b/Makefile index a62cd8daeaa..297ba393031 100644 --- a/Makefile +++ b/Makefile @@ -26,18 +26,26 @@ MAKEFLAGS += -j$(shell nproc) #MAKEFLAGS += -Oline .PHONY: all -all: check test dist dist-test doc licenseheaders +all: format check test dist dist-test doc | conda-env .PHONY: conda-env conda-env: build/conda-env.${CONDA_ENV_NAME}.build-stamp -build/conda-env.${CONDA_ENV_NAME}.build-stamp: ${ENV_YML} mlos_core/setup.py mlos_bench/setup.py mlos_viz/setup.py +MLOS_CORE_CONF_FILES := mlos_core/pyproject.toml mlos_core/setup.py mlos_core/MANIFEST.in +MLOS_BENCH_CONF_FILES := mlos_bench/pyproject.toml mlos_bench/setup.py mlos_bench/MANIFEST.in +MLOS_VIZ_CONF_FILES := mlos_viz/pyproject.toml mlos_viz/setup.py mlos_viz/MANIFEST.in +MLOS_GLOBAL_CONF_FILES := setup.cfg pyproject.toml + +MLOS_PKGS := mlos_core mlos_bench mlos_viz +MLOS_PKG_CONF_FILES := $(MLOS_CORE_CONF_FILES) $(MLOS_BENCH_CONF_FILES) $(MLOS_VIZ_CONF_FILES) $(MLOS_GLOBAL_CONF_FILES) + +build/conda-env.${CONDA_ENV_NAME}.build-stamp: ${ENV_YML} $(MLOS_PKG_CONF_FILES) @echo "CONDA_SOLVER: ${CONDA_SOLVER}" @echo "CONDA_EXPERIMENTAL_SOLVER: ${CONDA_EXPERIMENTAL_SOLVER}" @echo "CONDA_INFO_LEVEL: ${CONDA_INFO_LEVEL}" conda env list -q | grep -q "^${CONDA_ENV_NAME} " || conda env create ${CONDA_INFO_LEVEL} -n ${CONDA_ENV_NAME} -f ${ENV_YML} conda env update ${CONDA_INFO_LEVEL} -n ${CONDA_ENV_NAME} --prune -f ${ENV_YML} - $(MAKE) clean-check clean-test clean-doc + $(MAKE) clean-format clean-check clean-test clean-doc clean-dist touch $@ .PHONY: clean-conda-env @@ -45,51 +53,254 @@ clean-conda-env: conda env remove -y ${CONDA_INFO_LEVEL} -n ${CONDA_ENV_NAME} rm -f build/conda-env.${CONDA_ENV_NAME}.build-stamp + +# Since these targets potentially change the files we need to run them in sequence. +# In future versions of make we can do that by marking each as a .NOTPARALLEL psuedo target. +# But with make 4.3 that changes the entire Makefile to be serial. + +# Here we make dynamic prereqs to apply to other targets that need to run in sequence. +FORMAT_PREREQS := + +.PHONY: format +format: build/format.${CONDA_ENV_NAME}.build-stamp + +ifneq (,$(filter format,$(MAKECMDGOALS))) + FORMAT_PREREQS += build/format.${CONDA_ENV_NAME}.build-stamp +endif + +build/format.${CONDA_ENV_NAME}.build-stamp: build/licenseheaders.${CONDA_ENV_NAME}.build-stamp +build/format.${CONDA_ENV_NAME}.build-stamp: build/isort.${CONDA_ENV_NAME}.build-stamp +build/format.${CONDA_ENV_NAME}.build-stamp: build/black.${CONDA_ENV_NAME}.build-stamp +build/format.${CONDA_ENV_NAME}.build-stamp: build/docformatter.${CONDA_ENV_NAME}.build-stamp +build/format.${CONDA_ENV_NAME}.build-stamp: + touch $@ + +.PHONY: licenseheaders +licenseheaders: build/licenseheaders.${CONDA_ENV_NAME}.build-stamp + +ifneq (,$(filter licenseheaders,$(MAKECMDGOALS))) + FORMAT_PREREQS += build/licenseheaders.${CONDA_ENV_NAME}.build-stamp +endif + +build/licenseheaders.${CONDA_ENV_NAME}.build-stamp: build/conda-env.${CONDA_ENV_NAME}.build-stamp +build/licenseheaders.${CONDA_ENV_NAME}.build-stamp: $(PYTHON_FILES) +build/licenseheaders.${CONDA_ENV_NAME}.build-stamp: $(SCRIPT_FILES) +build/licenseheaders.${CONDA_ENV_NAME}.build-stamp: $(SQL_FILES) doc/mit-license.tmpl +build/licenseheaders.${CONDA_ENV_NAME}.build-stamp: doc/mit-license.tmpl +build/licenseheaders.${CONDA_ENV_NAME}.build-stamp: + # Note: to avoid makefile dependency loops, we don't touch the setup.py + # files as that would force the conda-env to be rebuilt. + conda run -n ${CONDA_ENV_NAME} licenseheaders -t doc/mit-license.tmpl \ + -E .py .sh .ps1 .sql .cmd \ + -x mlos_bench/setup.py mlos_core/setup.py mlos_viz/setup.py + touch $@ + +.PHONY: isort +isort: build/isort.${CONDA_ENV_NAME}.build-stamp + +ifneq (,$(filter isort,$(MAKECMDGOALS))) + FORMAT_PREREQS += build/isort.${CONDA_ENV_NAME}.build-stamp +endif + +build/isort.${CONDA_ENV_NAME}.build-stamp: build/isort.mlos_core.${CONDA_ENV_NAME}.build-stamp +build/isort.${CONDA_ENV_NAME}.build-stamp: build/isort.mlos_bench.${CONDA_ENV_NAME}.build-stamp +build/isort.${CONDA_ENV_NAME}.build-stamp: build/isort.mlos_viz.${CONDA_ENV_NAME}.build-stamp +build/isort.${CONDA_ENV_NAME}.build-stamp: + touch $@ + +# NOTE: when using pattern rules (involving %) we can only add one line of +# prerequisities, so we use this pattern to compose the list as variables. + +# black, licenseheaders, isort, and docformatter all alter files, so only run +# one at a time, by adding prerequisites, but only as necessary. +ISORT_COMMON_PREREQS := +ifneq (,$(filter format licenseheaders,$(MAKECMDGOALS))) +ISORT_COMMON_PREREQS += build/licenseheaders.${CONDA_ENV_NAME}.build-stamp +endif +ISORT_COMMON_PREREQS += build/conda-env.${CONDA_ENV_NAME}.build-stamp +ISORT_COMMON_PREREQS += $(MLOS_GLOBAL_CONF_FILES) + +build/isort.mlos_core.${CONDA_ENV_NAME}.build-stamp: $(MLOS_CORE_PYTHON_FILES) +build/isort.mlos_bench.${CONDA_ENV_NAME}.build-stamp: $(MLOS_BENCH_PYTHON_FILES) +build/isort.mlos_viz.${CONDA_ENV_NAME}.build-stamp: $(MLOS_VIZ_PYTHON_FILES) + +build/isort.%.${CONDA_ENV_NAME}.build-stamp: $(ISORT_COMMON_PREREQS) + # Reformat python file imports with isort. + conda run -n ${CONDA_ENV_NAME} isort --verbose --only-modified --atomic -j0 $(filter %.py,$+) + touch $@ + +.PHONY: black +black: build/black.${CONDA_ENV_NAME}.build-stamp + +ifneq (,$(filter black,$(MAKECMDGOALS))) + FORMAT_PREREQS += build/black.${CONDA_ENV_NAME}.build-stamp +endif + +build/black.${CONDA_ENV_NAME}.build-stamp: build/black.mlos_core.${CONDA_ENV_NAME}.build-stamp +build/black.${CONDA_ENV_NAME}.build-stamp: build/black.mlos_bench.${CONDA_ENV_NAME}.build-stamp +build/black.${CONDA_ENV_NAME}.build-stamp: build/black.mlos_viz.${CONDA_ENV_NAME}.build-stamp +build/black.${CONDA_ENV_NAME}.build-stamp: + touch $@ + +# black, licenseheaders, isort, and docformatter all alter files, so only run +# one at a time, by adding prerequisites, but only as necessary. +BLACK_COMMON_PREREQS := +ifneq (,$(filter format licenseheaders,$(MAKECMDGOALS))) +BLACK_COMMON_PREREQS += build/licenseheaders.${CONDA_ENV_NAME}.build-stamp +endif +ifneq (,$(filter format isort,$(MAKECMDGOALS))) +BLACK_COMMON_PREREQS += build/isort.${CONDA_ENV_NAME}.build-stamp +endif +BLACK_COMMON_PREREQS += build/conda-env.${CONDA_ENV_NAME}.build-stamp +BLACK_COMMON_PREREQS += $(MLOS_GLOBAL_CONF_FILES) + +build/black.mlos_core.${CONDA_ENV_NAME}.build-stamp: $(MLOS_CORE_PYTHON_FILES) +build/black.mlos_bench.${CONDA_ENV_NAME}.build-stamp: $(MLOS_BENCH_PYTHON_FILES) +build/black.mlos_viz.${CONDA_ENV_NAME}.build-stamp: $(MLOS_VIZ_PYTHON_FILES) + +build/black.%.${CONDA_ENV_NAME}.build-stamp: $(BLACK_COMMON_PREREQS) + # Reformat python files with black. + conda run -n ${CONDA_ENV_NAME} black $(filter %.py,$+) + touch $@ + +.PHONY: docformatter +docformatter: build/docformatter.${CONDA_ENV_NAME}.build-stamp + +ifneq (,$(filter docformatter,$(MAKECMDGOALS))) + FORMAT_PREREQS += build/docformatter.${CONDA_ENV_NAME}.build-stamp +endif + +build/docformatter.${CONDA_ENV_NAME}.build-stamp: build/docformatter.mlos_core.${CONDA_ENV_NAME}.build-stamp +build/docformatter.${CONDA_ENV_NAME}.build-stamp: build/docformatter.mlos_bench.${CONDA_ENV_NAME}.build-stamp +build/docformatter.${CONDA_ENV_NAME}.build-stamp: build/docformatter.mlos_viz.${CONDA_ENV_NAME}.build-stamp +build/docformatter.${CONDA_ENV_NAME}.build-stamp: + touch $@ + +# black, licenseheaders, isort, and docformatter all alter files, so only run +# one at a time, by adding prerequisites, but only as necessary. +DOCFORMATTER_COMMON_PREREQS := +ifneq (,$(filter format licenseheaders,$(MAKECMDGOALS))) +DOCFORMATTER_COMMON_PREREQS += build/licenseheaders.${CONDA_ENV_NAME}.build-stamp +endif +ifneq (,$(filter format isort,$(MAKECMDGOALS))) +DOCFORMATTER_COMMON_PREREQS += build/isort.${CONDA_ENV_NAME}.build-stamp +endif +ifneq (,$(filter format black,$(MAKECMDGOALS))) +DOCFORMATTER_COMMON_PREREQS += build/black.${CONDA_ENV_NAME}.build-stamp +endif +DOCFORMATTER_COMMON_PREREQS += build/conda-env.${CONDA_ENV_NAME}.build-stamp +DOCFORMATTER_COMMON_PREREQS += $(MLOS_GLOBAL_CONF_FILES) + +build/docformatter.mlos_core.${CONDA_ENV_NAME}.build-stamp: $(MLOS_CORE_PYTHON_FILES) +build/docformatter.mlos_bench.${CONDA_ENV_NAME}.build-stamp: $(MLOS_BENCH_PYTHON_FILES) +build/docformatter.mlos_viz.${CONDA_ENV_NAME}.build-stamp: $(MLOS_VIZ_PYTHON_FILES) + +# docformatter returns non-zero when it changes anything so instead we ignore that +# return code and just have it recheck itself immediately +build/docformatter.%.${CONDA_ENV_NAME}.build-stamp: $(DOCFORMATTER_COMMON_PREREQS) + # Reformat python file docstrings with docformatter. + conda run -n ${CONDA_ENV_NAME} docformatter --in-place $(filter %.py,$+) || true + conda run -n ${CONDA_ENV_NAME} docformatter --check --diff $(filter %.py,$+) + touch $@ + + .PHONY: check -check: pycodestyle pydocstyle pylint mypy # cspell licenseheaders markdown-link-check +check: isort-check black-check docformatter-check pycodestyle pydocstyle pylint mypy # cspell markdown-link-check + +.PHONY: black-check +black-check: build/black-check.mlos_core.${CONDA_ENV_NAME}.build-stamp +black-check: build/black-check.mlos_bench.${CONDA_ENV_NAME}.build-stamp +black-check: build/black-check.mlos_viz.${CONDA_ENV_NAME}.build-stamp + +# Make sure black format rules run before black-check rules. +build/black-check.mlos_core.${CONDA_ENV_NAME}.build-stamp: $(MLOS_CORE_PYTHON_FILES) +build/black-check.mlos_bench.${CONDA_ENV_NAME}.build-stamp: $(MLOS_BENCH_PYTHON_FILES) +build/black-check.mlos_viz.${CONDA_ENV_NAME}.build-stamp: $(MLOS_VIZ_PYTHON_FILES) + +BLACK_CHECK_COMMON_PREREQS := build/conda-env.${CONDA_ENV_NAME}.build-stamp +BLACK_CHECK_COMMON_PREREQS += $(FORMAT_PREREQS) +BLACK_CHECK_COMMON_PREREQS += $(MLOS_GLOBAL_CONF_FILES) + +build/black-check.%.${CONDA_ENV_NAME}.build-stamp: $(BLACK_CHECK_COMMON_PREREQS) + # Check for import sort order. + # Note: if this fails use "make format" or "make black" to fix it. + conda run -n ${CONDA_ENV_NAME} black --verbose --check --diff $(filter %.py,$+) + touch $@ + +.PHONY: docformatter-check +docformatter-check: build/docformatter-check.mlos_core.${CONDA_ENV_NAME}.build-stamp +docformatter-check: build/docformatter-check.mlos_bench.${CONDA_ENV_NAME}.build-stamp +docformatter-check: build/docformatter-check.mlos_viz.${CONDA_ENV_NAME}.build-stamp + +# Make sure docformatter format rules run before docformatter-check rules. +build/docformatter-check.mlos_core.${CONDA_ENV_NAME}.build-stamp: $(MLOS_CORE_PYTHON_FILES) +build/docformatter-check.mlos_bench.${CONDA_ENV_NAME}.build-stamp: $(MLOS_BENCH_PYTHON_FILES) +build/docformatter-check.mlos_viz.${CONDA_ENV_NAME}.build-stamp: $(MLOS_VIZ_PYTHON_FILES) + +BLACK_CHECK_COMMON_PREREQS := build/conda-env.${CONDA_ENV_NAME}.build-stamp +BLACK_CHECK_COMMON_PREREQS += $(FORMAT_PREREQS) +BLACK_CHECK_COMMON_PREREQS += $(MLOS_GLOBAL_CONF_FILES) + +build/docformatter-check.%.${CONDA_ENV_NAME}.build-stamp: $(BLACK_CHECK_COMMON_PREREQS) + # Check for import sort order. + # Note: if this fails use "make format" or "make docformatter" to fix it. + conda run -n ${CONDA_ENV_NAME} docformatter --check --diff $(filter %.py,$+) + touch $@ + +.PHONY: isort-check +isort-check: build/isort-check.mlos_core.${CONDA_ENV_NAME}.build-stamp +isort-check: build/isort-check.mlos_bench.${CONDA_ENV_NAME}.build-stamp +isort-check: build/isort-check.mlos_viz.${CONDA_ENV_NAME}.build-stamp + +# Make sure isort format rules run before isort-check rules. +build/isort-check.mlos_core.${CONDA_ENV_NAME}.build-stamp: $(MLOS_CORE_PYTHON_FILES) +build/isort-check.mlos_bench.${CONDA_ENV_NAME}.build-stamp: $(MLOS_BENCH_PYTHON_FILES) +build/isort-check.mlos_viz.${CONDA_ENV_NAME}.build-stamp: $(MLOS_VIZ_PYTHON_FILES) + +ISORT_CHECK_COMMON_PREREQS := build/conda-env.${CONDA_ENV_NAME}.build-stamp +ISORT_CHECK_COMMON_PREREQS += $(FORMAT_PREREQS) +ISORT_CHECK_COMMON_PREREQS += $(MLOS_GLOBAL_CONF_FILES) + +build/isort-check.%.${CONDA_ENV_NAME}.build-stamp: $(ISORT_CHECK_COMMON_PREREQS) + # Note: if this fails use "make format" or "make isort" to fix it. + conda run -n ${CONDA_ENV_NAME} isort --only-modified --check --diff -j0 $(filter %.py,$+) + touch $@ .PHONY: pycodestyle -pycodestyle: conda-env pycodestyle: build/pycodestyle.mlos_core.${CONDA_ENV_NAME}.build-stamp pycodestyle: build/pycodestyle.mlos_bench.${CONDA_ENV_NAME}.build-stamp pycodestyle: build/pycodestyle.mlos_viz.${CONDA_ENV_NAME}.build-stamp - build/pycodestyle.mlos_core.${CONDA_ENV_NAME}.build-stamp: $(MLOS_CORE_PYTHON_FILES) build/pycodestyle.mlos_bench.${CONDA_ENV_NAME}.build-stamp: $(MLOS_BENCH_PYTHON_FILES) build/pycodestyle.mlos_viz.${CONDA_ENV_NAME}.build-stamp: $(MLOS_VIZ_PYTHON_FILES) -build/pycodestyle.%.${CONDA_ENV_NAME}.build-stamp: build/conda-env.${CONDA_ENV_NAME}.build-stamp setup.cfg +PYCODESTYLE_COMMON_PREREQS := build/conda-env.${CONDA_ENV_NAME}.build-stamp +PYCODESTYLE_COMMON_PREREQS += $(FORMAT_PREREQS) +PYCODESTYLE_COMMON_PREREQS += $(MLOS_GLOBAL_CONF_FILES) + +build/pycodestyle.%.${CONDA_ENV_NAME}.build-stamp: $(PYCODESTYLE_COMMON_PREREQS) # Check for decent pep8 code style with pycodestyle. - # Note: if this fails, try using autopep8 to fix it. - conda run -n ${CONDA_ENV_NAME} pycodestyle $(filter-out setup.cfg,$+) + # Note: if this fails, try using 'make format' to fix it. + conda run -n ${CONDA_ENV_NAME} pycodestyle $(filter %.py,$+) touch $@ .PHONY: pydocstyle -pydocstyle: conda-env pydocstyle: build/pydocstyle.mlos_core.${CONDA_ENV_NAME}.build-stamp pydocstyle: build/pydocstyle.mlos_bench.${CONDA_ENV_NAME}.build-stamp pydocstyle: build/pydocstyle.mlos_viz.${CONDA_ENV_NAME}.build-stamp - build/pydocstyle.mlos_core.${CONDA_ENV_NAME}.build-stamp: $(MLOS_CORE_PYTHON_FILES) build/pydocstyle.mlos_bench.${CONDA_ENV_NAME}.build-stamp: $(MLOS_BENCH_PYTHON_FILES) build/pydocstyle.mlos_viz.${CONDA_ENV_NAME}.build-stamp: $(MLOS_VIZ_PYTHON_FILES) -build/pydocstyle.%.${CONDA_ENV_NAME}.build-stamp: build/conda-env.${CONDA_ENV_NAME}.build-stamp setup.cfg - # Check for decent pep8 doc style with pydocstyle. - conda run -n ${CONDA_ENV_NAME} pydocstyle $(filter-out setup.cfg,$+) - touch $@ +PYDOCSTYLE_COMMON_PREREQS := build/conda-env.${CONDA_ENV_NAME}.build-stamp +PYDOCSTYLE_COMMON_PREREQS += $(FORMAT_PREREQS) +PYDOCSTYLE_COMMON_PREREQS += $(MLOS_GLOBAL_CONF_FILES) -.PHONY: licenseheaders -licenseheaders: build/licenseheaders.${CONDA_ENV_NAME}.build-stamp - -build/licenseheaders.${CONDA_ENV_NAME}.build-stamp: $(PYTHON_FILES) $(SCRIPT_FILES) $(SQL_FILES) doc/mit-license.tmpl - # Note: to avoid makefile dependency loops, we don't touch the setup.py - # files as that would force the conda-env to be rebuilt. - conda run -n ${CONDA_ENV_NAME} licenseheaders -t doc/mit-license.tmpl \ - -E .py .sh .ps1 .sql .cmd \ - -x mlos_bench/setup.py mlos_core/setup.py mlos_viz/setup.py +build/pydocstyle.%.${CONDA_ENV_NAME}.build-stamp: $(PYDOCSTYLE_COMMON_PREREQS) + # Check for decent pep8 doc style with pydocstyle. + conda run -n ${CONDA_ENV_NAME} pydocstyle $(filter %.py,$+) touch $@ .PHONY: cspell @@ -101,7 +312,7 @@ cspell: build/cspell-container.build-stamp ./.devcontainer/scripts/run-cspell.sh endif -build/cspell-container.build-stamp: +build/cspell-container.build-stamp: $(FORMAT_PREREQS) # Build the docker image with cspell in it. $(MAKE) -C .devcontainer/build cspell touch $@ @@ -115,13 +326,12 @@ markdown-link-check: build/markdown-link-check-container.build-stamp ./.devcontainer/scripts/run-markdown-link-check.sh endif -build/markdown-link-check-container.build-stamp: +build/markdown-link-check-container.build-stamp: $(FORMAT_PREREQS) # Build the docker image with markdown-link-check in it. $(MAKE) -C .devcontainer/build markdown-link-check touch $@ .PHONY: pylint -pylint: conda-env pylint: build/pylint.mlos_core.${CONDA_ENV_NAME}.build-stamp pylint: build/pylint.mlos_bench.${CONDA_ENV_NAME}.build-stamp pylint: build/pylint.mlos_viz.${CONDA_ENV_NAME}.build-stamp @@ -131,12 +341,15 @@ build/pylint.mlos_core.${CONDA_ENV_NAME}.build-stamp: $(MLOS_CORE_PYTHON_FILES) build/pylint.mlos_bench.${CONDA_ENV_NAME}.build-stamp: $(MLOS_BENCH_PYTHON_FILES) build/pylint.mlos_viz.${CONDA_ENV_NAME}.build-stamp: $(MLOS_VIZ_PYTHON_FILES) -build/pylint.%.${CONDA_ENV_NAME}.build-stamp: build/conda-env.${CONDA_ENV_NAME}.build-stamp .pylintrc - conda run -n ${CONDA_ENV_NAME} pylint -j0 $(filter-out .pylintrc,$+) +PYLINT_COMMON_PREREQS := build/conda-env.${CONDA_ENV_NAME}.build-stamp +PYLINT_COMMON_PREREQS += $(FORMAT_PREREQS) +PYLINT_COMMON_PREREQS += pyproject.toml + +build/pylint.%.${CONDA_ENV_NAME}.build-stamp: $(PYLINT_COMMON_PREREQS) + conda run -n ${CONDA_ENV_NAME} pylint -j0 $(filter %.py,$+) touch $@ .PHONY: flake8 -flake8: conda-env flake8: build/flake8.mlos_core.${CONDA_ENV_NAME}.build-stamp flake8: build/flake8.mlos_bench.${CONDA_ENV_NAME}.build-stamp flake8: build/flake8.mlos_viz.${CONDA_ENV_NAME}.build-stamp @@ -145,65 +358,65 @@ build/flake8.mlos_core.${CONDA_ENV_NAME}.build-stamp: $(MLOS_CORE_PYTHON_FILES) build/flake8.mlos_bench.${CONDA_ENV_NAME}.build-stamp: $(MLOS_BENCH_PYTHON_FILES) build/flake8.mlos_viz.${CONDA_ENV_NAME}.build-stamp: $(MLOS_VIZ_PYTHON_FILES) -build/flake8.%.${CONDA_ENV_NAME}.build-stamp: build/conda-env.${CONDA_ENV_NAME}.build-stamp setup.cfg - conda run -n ${CONDA_ENV_NAME} flake8 -j0 $(filter-out setup.cfg,$+) +FLAKE8_COMMON_PREREQS := build/conda-env.${CONDA_ENV_NAME}.build-stamp +FLAKE8_COMMON_PREREQS += $(FORMAT_PREREQS) +FLAKE8_COMMON_PREREQS += $(MLOS_GLOBAL_CONF_FILES) + +build/flake8.%.${CONDA_ENV_NAME}.build-stamp: $(FLAKE8_COMMON_PREREQS) + conda run -n ${CONDA_ENV_NAME} flake8 -j0 $(filter %.py,$+) touch $@ .PHONY: mypy -mypy: conda-env mypy: build/mypy.mlos_core.${CONDA_ENV_NAME}.build-stamp mypy: build/mypy.mlos_bench.${CONDA_ENV_NAME}.build-stamp mypy: build/mypy.mlos_viz.${CONDA_ENV_NAME}.build-stamp +# Run these in order. build/mypy.mlos_core.${CONDA_ENV_NAME}.build-stamp: $(MLOS_CORE_PYTHON_FILES) build/mypy.mlos_bench.${CONDA_ENV_NAME}.build-stamp: $(MLOS_BENCH_PYTHON_FILES) build/mypy.mlos_core.${CONDA_ENV_NAME}.build-stamp build/mypy.mlos_viz.${CONDA_ENV_NAME}.build-stamp: $(MLOS_VIZ_PYTHON_FILES) build/mypy.mlos_bench.${CONDA_ENV_NAME}.build-stamp -NON_MYPY_FILES := scripts/dmypy-wrapper.sh setup.cfg -NON_MYPY_FILES += build/conda-env.${CONDA_ENV_NAME}.build-stamp -NON_MYPY_FILES += build/mypy.mlos_core.${CONDA_ENV_NAME}.build-stamp -NON_MYPY_FILES += build/mypy.mlos_bench.${CONDA_ENV_NAME}.build-stamp -build/mypy.%.${CONDA_ENV_NAME}.build-stamp: scripts/dmypy-wrapper.sh build/conda-env.${CONDA_ENV_NAME}.build-stamp setup.cfg +MYPY_COMMON_PREREQS := build/conda-env.${CONDA_ENV_NAME}.build-stamp +MYPY_COMMON_PREREQS += $(FORMAT_PREREQS) +MYPY_COMMON_PREREQS += $(MLOS_GLOBAL_CONF_FILES) +MYPY_COMMON_PREREQS += scripts/dmypy-wrapper.sh + +build/mypy.%.${CONDA_ENV_NAME}.build-stamp: $(MYPY_COMMON_PREREQS) conda run -n ${CONDA_ENV_NAME} scripts/dmypy-wrapper.sh \ - $(filter-out $(NON_MYPY_FILES),$+) + $(filter %.py,$+) touch $@ .PHONY: test test: pytest -PYTEST_MODULES := +PYTEST_CONF_FILES := $(MLOS_GLOBAL_CONF_FILES) conftest.py .PHONY: pytest pytest: conda-env build/pytest.${CONDA_ENV_NAME}.build-stamp -build/pytest.mlos_core.${CONDA_ENV_NAME}.needs-build-stamp: build/conda-env.${CONDA_ENV_NAME}.build-stamp -build/pytest.mlos_core.${CONDA_ENV_NAME}.needs-build-stamp: $(MLOS_CORE_PYTHON_FILES) conftest.py setup.cfg -build/pytest.mlos_core.${CONDA_ENV_NAME}.needs-build-stamp: - # Update the PYTEST_MODULES list to include mlos_core. - $(eval PYTEST_MODULES += mlos_core) - echo "PYTEST_MODULES: $(PYTEST_MODULES)" - touch $@ +pytest-mlos-core: build/pytest.mlos_core.${CONDA_ENV_NAME}.needs-build-stamp +pytest-mlos-bench: build/pytest.mlos_bench.${CONDA_ENV_NAME}.needs-build-stamp +pytest-mlos-viz: build/pytest.mlos_viz.${CONDA_ENV_NAME}.needs-build-stamp -# Run the mlos_bench target update after mlos_core target update. -build/pytest.mlos_bench.${CONDA_ENV_NAME}.needs-build-stamp: build/pytest.mlos_core.${CONDA_ENV_NAME}.needs-build-stamp -build/pytest.mlos_bench.${CONDA_ENV_NAME}.needs-build-stamp: build/conda-env.${CONDA_ENV_NAME}.build-stamp -build/pytest.mlos_bench.${CONDA_ENV_NAME}.needs-build-stamp: $(MLOS_BENCH_PYTHON_FILES) conftest.py setup.cfg -build/pytest.mlos_bench.${CONDA_ENV_NAME}.needs-build-stamp: - # Update the PYTEST_MODULES list to include mlos_bench. - $(eval PYTEST_MODULES += mlos_bench) - echo "PYTEST_MODULES: $(PYTEST_MODULES)" - touch $@ +build/pytest.mlos_core.${CONDA_ENV_NAME}.needs-build-stamp: $(MLOS_CORE_PYTHON_FILES) $(MLOS_CORE_CONF_FILES) +build/pytest.mlos_core.${CONDA_ENV_NAME}.needs-build-stamp: PYTEST_MODULE := mlos_core + +build/pytest.mlos_bench.${CONDA_ENV_NAME}.needs-build-stamp: $(MLOS_BENCH_PYTHON_FILES) $(MLOS_BENCH_CONF_FILES) +build/pytest.mlos_bench.${CONDA_ENV_NAME}.needs-build-stamp: PYTEST_MODULE := mlos_bench -# Run the mlos_viz target update after mlos_bench target update. -build/pytest.mlos_viz.${CONDA_ENV_NAME}.needs-build-stamp: build/pytest.mlos_bench.${CONDA_ENV_NAME}.needs-build-stamp -build/pytest.mlos_viz.${CONDA_ENV_NAME}.needs-build-stamp: build/conda-env.${CONDA_ENV_NAME}.build-stamp -build/pytest.mlos_viz.${CONDA_ENV_NAME}.needs-build-stamp: $(MLOS_VIZ_PYTHON_FILES) conftest.py setup.cfg -build/pytest.mlos_viz.${CONDA_ENV_NAME}.needs-build-stamp: - # Update the PYTEST_MODULES list to include mlos_viz. - $(eval PYTEST_MODULES += mlos_viz) - echo "PYTEST_MODULES: $(PYTEST_MODULES)" +build/pytest.mlos_viz.${CONDA_ENV_NAME}.needs-build-stamp: $(MLOS_VIZ_PYTHON_FILES) $(MLOS_VIZ_CONF_FILES) +build/pytest.mlos_viz.${CONDA_ENV_NAME}.needs-build-stamp: PYTEST_MODULE := mlos_viz + +# Invividual package test rules (for tight loop dev work). +# Skip code coverage tests for these. +PYTEST_COMMON_PREREQS := build/conda-env.${CONDA_ENV_NAME}.build-stamp +PYTEST_COMMON_PREREQS += $(FORMAT_PREREQS) +PYTEST_COMMON_PREREQS += $(PYTEST_CONF_FILES) + +build/pytest.%.${CONDA_ENV_NAME}.needs-build-stamp: $(PYTEST_COMMON_PREREQS) + conda run -n ${CONDA_ENV_NAME} pytest $(PYTEST_EXTRA_OPTIONS) $(PYTEST_MODULE) touch $@ PYTEST_OPTIONS := @@ -212,85 +425,120 @@ PYTEST_OPTIONS := SKIP_COVERAGE := $(shell echo $${SKIP_COVERAGE:-} | grep -i -x -e 1 -e true) ifeq ($(SKIP_COVERAGE),) - PYTEST_OPTIONS += --cov=. --cov-append --cov-fail-under=91.5 --cov-report=xml --cov-report=html --junitxml=junit/test-results.xml --local-badge-output-dir=doc/source/badges/ + PYTEST_OPTIONS += --cov=. --cov-append --cov-fail-under=92 --cov-report=xml --cov-report=html --junitxml=junit/test-results.xml --local-badge-output-dir=doc/source/badges/ endif -# Run the pytest target on only the modules that have changed recently, but -# make sure the coverage report is for both of them when used in the pipeline. +# Global pytest rule that also produces code coverage for the pipeline. # NOTE: When run locally, the junit/test-results.xml will only include the # tests from the latest run, but this file is only used for upstream reporting, # so probably shouldn't matter. -build/pytest.${CONDA_ENV_NAME}.build-stamp: build/pytest.mlos_core.${CONDA_ENV_NAME}.needs-build-stamp -build/pytest.${CONDA_ENV_NAME}.build-stamp: build/pytest.mlos_bench.${CONDA_ENV_NAME}.needs-build-stamp -build/pytest.${CONDA_ENV_NAME}.build-stamp: build/pytest.mlos_viz.${CONDA_ENV_NAME}.needs-build-stamp +build/pytest.${CONDA_ENV_NAME}.build-stamp: $(PYTEST_COMMON_PREREQS) +build/pytest.${CONDA_ENV_NAME}.build-stamp: $(MLOS_CORE_PYTHON_FILES) $(MLOS_CORE_CONF_FILES) +build/pytest.${CONDA_ENV_NAME}.build-stamp: $(MLOS_BENCH_PYTHON_FILES) $(MLOS_BENCH_CONF_FILES) +build/pytest.${CONDA_ENV_NAME}.build-stamp: $(MLOS_VIZ_PYTHON_FILES) $(MLOS_VIZ_CONF_FILES) build/pytest.${CONDA_ENV_NAME}.build-stamp: - # Make sure to update the list of modules needed everytime in case the test fails and we need to rerun it. - for pytest_module in $(PYTEST_MODULES); do rm -f build/pytest.$${pytest_module}.${CONDA_ENV_NAME}.needs-build-stamp; done - # Run pytest for the modules: $(PYTEST_MODULES) + # Remove the markers for individual targets (above). + for pytest_module in $(MLOS_PKGS); do rm -f build/pytest.$${pytest_module}.${CONDA_ENV_NAME}.build-stamp; done + # Run pytest for the modules: $(MLOS_PKGS) mkdir -p doc/source/badges/ - conda run -n ${CONDA_ENV_NAME} pytest $(PYTEST_OPTIONS) $(PYTEST_EXTRA_OPTIONS) $(PYTEST_MODULES) - # Mark those as done again. - for pytest_module in $(PYTEST_MODULES); do touch build/pytest.$${pytest_module}.${CONDA_ENV_NAME}.needs-build-stamp; done + conda run -n ${CONDA_ENV_NAME} pytest $(PYTEST_OPTIONS) $(PYTEST_EXTRA_OPTIONS) $(MLOS_PKGS) + # Global success. Mark the individual targets as done again. + for pytest_module in $(MLOS_PKGS); do touch build/pytest.$${pytest_module}.${CONDA_ENV_NAME}.build-stamp; done touch $@ +# setuptools-scm needs a longer history than Github CI workers have by default. +.PHONY: unshallow +unshallow: build/unshallow.build-stamp + +build/unshallow.build-stamp: + git rev-parse --is-shallow-repository | grep -x -q false || git fetch --unshallow --quiet + touch $@ + .PHONY: dist -dist: bdist_wheel +dist: sdist bdist_wheel + +.PHONY: sdist +sdist: conda-env unshallow +sdist: mlos_core/dist/tmp/mlos_core-latest.tar.gz +sdist: mlos_bench/dist/tmp/mlos_bench-latest.tar.gz +sdist: mlos_viz/dist/tmp/mlos_viz-latest.tar.gz .PHONY: bdist_wheel -bdist_wheel: conda-env +bdist_wheel: conda-env unshallow bdist_wheel: mlos_core/dist/tmp/mlos_core-latest-py3-none-any.whl bdist_wheel: mlos_bench/dist/tmp/mlos_bench-latest-py3-none-any.whl bdist_wheel: mlos_viz/dist/tmp/mlos_viz-latest-py3-none-any.whl -mlos_core/dist/tmp/mlos_core-latest-py3-none-any.whl: mlos_core/dist/tmp/mlos_core-latest.tar +# Make the whl files depend on the .tar.gz files, mostly to prevent conflicts +# with shared use of the their build/ trees. + mlos_core/dist/tmp/mlos_core-latest-py3-none-any.whl: MODULE_NAME := mlos_core mlos_core/dist/tmp/mlos_core-latest-py3-none-any.whl: PACKAGE_NAME := mlos_core -mlos_core/dist/tmp/mlos_core-latest.tar: mlos_core/setup.py mlos_core/MANIFEST.in $(MLOS_CORE_PYTHON_FILES) -mlos_core/dist/tmp/mlos_core-latest.tar: MODULE_NAME := mlos_core -mlos_core/dist/tmp/mlos_core-latest.tar: PACKAGE_NAME := mlos_core +mlos_core/dist/tmp/mlos_core-latest-py3-none-any.whl: mlos_core/dist/tmp/mlos_core-latest.tar.gz +mlos_core/dist/tmp/mlos_core-latest.tar.gz: $(MLOS_CORE_CONF_FILES) $(MLOS_CORE_PYTHON_FILES) +mlos_core/dist/tmp/mlos_core-latest.tar.gz: MODULE_NAME := mlos_core +mlos_core/dist/tmp/mlos_core-latest.tar.gz: PACKAGE_NAME := mlos_core -mlos_bench/dist/tmp/mlos_bench-latest-py3-none-any.whl: mlos_bench/dist/tmp/mlos_bench-latest.tar mlos_bench/dist/tmp/mlos_bench-latest-py3-none-any.whl: MODULE_NAME := mlos_bench mlos_bench/dist/tmp/mlos_bench-latest-py3-none-any.whl: PACKAGE_NAME := mlos_bench -mlos_bench/dist/tmp/mlos_bench-latest.tar: mlos_bench/setup.py mlos_bench/MANIFEST.in $(MLOS_BENCH_PYTHON_FILES) -mlos_bench/dist/tmp/mlos_bench-latest.tar: MODULE_NAME := mlos_bench -mlos_bench/dist/tmp/mlos_bench-latest.tar: PACKAGE_NAME := mlos_bench +mlos_bench/dist/tmp/mlos_bench-latest-py3-none-any.whl: mlos_bench/dist/tmp/mlos_bench-latest.tar.gz +mlos_bench/dist/tmp/mlos_bench-latest.tar.gz: $(MLOS_BENCH_CONF_FILES) $(MLOS_BENCH_PYTHON_FILES) +mlos_bench/dist/tmp/mlos_bench-latest.tar.gz: MODULE_NAME := mlos_bench +mlos_bench/dist/tmp/mlos_bench-latest.tar.gz: PACKAGE_NAME := mlos_bench -mlos_viz/dist/tmp/mlos_viz-latest-py3-none-any.whl: mlos_viz/dist/tmp/mlos_viz-latest.tar mlos_viz/dist/tmp/mlos_viz-latest-py3-none-any.whl: MODULE_NAME := mlos_viz mlos_viz/dist/tmp/mlos_viz-latest-py3-none-any.whl: PACKAGE_NAME := mlos_viz -mlos_viz/dist/tmp/mlos_viz-latest.tar: mlos_viz/setup.py mlos_viz/MANIFEST.in $(mlos_viz_PYTHON_FILES) -mlos_viz/dist/tmp/mlos_viz-latest.tar: MODULE_NAME := mlos_viz -mlos_viz/dist/tmp/mlos_viz-latest.tar: PACKAGE_NAME := mlos_viz +mlos_viz/dist/tmp/mlos_viz-latest-py3-none-any.whl: mlos_viz/dist/tmp/mlos_viz-latest.tar.gz +mlos_viz/dist/tmp/mlos_viz-latest.tar.gz: $(MLOS_VIZ_CONF_FILES) $(MLOS_VIZ_PYTHON_FILES) +mlos_viz/dist/tmp/mlos_viz-latest.tar.gz: MODULE_NAME := mlos_viz +mlos_viz/dist/tmp/mlos_viz-latest.tar.gz: PACKAGE_NAME := mlos_viz + +%-latest.tar.gz: build/conda-env.${CONDA_ENV_NAME}.build-stamp build/unshallow.build-stamp $(FORMAT_PREREQS) + mkdir -p $(MODULE_NAME)/dist/tmp + rm -f $(MODULE_NAME)/dist/$(PACKAGE_NAME)-*.tar{,.gz} + rm -f $(MODULE_NAME)/dist/tmp/$(PACKAGE_NAME)-latest.tar{,.gz} + rm -rf $(MODULE_NAME)/build/ + rm -rf $(MODULE_NAME)/$(MODULE_NAME).egg-info/ + cd $(MODULE_NAME)/ && conda run -n ${CONDA_ENV_NAME} python3 -m build --sdist + # Do some sanity checks on the sdist tarball output. + BASE_VERS=`conda run -n ${CONDA_ENV_NAME} python3 $(MODULE_NAME)/$(MODULE_NAME)/version.py | cut -d. -f-2 | egrep -x '[0-9.]+' || echo err-unknown-base-version` \ + && TAG_VERS=`git tag -l --sort=-version:refname | egrep -x '^v[0-9.]+' | head -n1 | sed 's/^v//' | cut -d. -f-2 | egrep -x '[0-9.]+' || echo err-unknown-tag-version` \ + && ls $(MODULE_NAME)/dist/$(PACKAGE_NAME)-*.tar.gz | grep -F -e $$BASE_VERS -e $$TAG_VERS + # Make sure tests were excluded. + ! ( tar tzf $(MODULE_NAME)/dist/$(PACKAGE_NAME)-*.tar.gz | grep -m1 tests/ ) + # Make sure the py.typed marker file exists. + tar tzf $(MODULE_NAME)/dist/$(PACKAGE_NAME)-*.tar.gz | grep -m1 /py.typed + # Check to make sure the mlos_bench module has the config directory. + [ "$(MODULE_NAME)" != "mlos_bench" ] || tar tzf $(MODULE_NAME)/dist/$(PACKAGE_NAME)-*.tar.gz | grep -m1 mlos_bench/config/ + cd $(MODULE_NAME)/dist/tmp && ln -s ../$(PACKAGE_NAME)-*.tar.gz $(PACKAGE_NAME)-latest.tar.gz -%-latest.tar: build/conda-env.${CONDA_ENV_NAME}.build-stamp -%-latest.tar: +%-latest-py3-none-any.whl: build/conda-env.${CONDA_ENV_NAME}.build-stamp build/unshallow.build-stamp $(FORMAT_PREREQS) mkdir -p $(MODULE_NAME)/dist/tmp - rm -f $(MODULE_NAME)/dist/$(PACKAGE_NAME)-*.tar - rm -f $(MODULE_NAME)/dist/tmp/$(PACKAGE_NAME)-latest.tar - cd $(MODULE_NAME)/ && conda run -n ${CONDA_ENV_NAME} python3 setup.py sdist --formats tar - ls $(MODULE_NAME)/dist/$(PACKAGE_NAME)-*.tar - ! ( tar tf $(MODULE_NAME)/dist/$(PACKAGE_NAME)-*.tar | grep -m1 tests/ ) - [ "$(MODULE_NAME)" != "mlos_bench" ] || tar tf $(MODULE_NAME)/dist/$(PACKAGE_NAME)-*.tar | grep -m1 mlos_bench/config/ - cd $(MODULE_NAME)/dist/tmp && ln -s ../$(PACKAGE_NAME)-*.tar $(PACKAGE_NAME)-latest.tar - -%-latest-py3-none-any.whl: build/conda-env.${CONDA_ENV_NAME}.build-stamp -%-latest-py3-none-any.whl: rm -f $(MODULE_NAME)/dist/$(MODULE_NAME)-*-py3-none-any.whl rm -f $(MODULE_NAME)/dist/tmp/$(MODULE_NAME)-latest-py3-none-any.whl - cd $(MODULE_NAME)/ && conda run -n ${CONDA_ENV_NAME} pip wheel --no-index --no-deps --wheel-dir dist dist/tmp/$(PACKAGE_NAME)-latest.tar - ls $(MODULE_NAME)/dist/$(MODULE_NAME)-*-py3-none-any.whl + rm -rf $(MODULE_NAME)/build/ + rm -rf $(MODULE_NAME)/$(MODULE_NAME).egg-info/ + cd $(MODULE_NAME)/ && conda run -n ${CONDA_ENV_NAME} python3 -m build --wheel + # Do some sanity checks on the wheel output. + BASE_VERS=`conda run -n ${CONDA_ENV_NAME} python3 $(MODULE_NAME)/$(MODULE_NAME)/version.py | cut -d. -f-2 | egrep -o '^[0-9.]+' || echo err-unknown-base-version` \ + && TAG_VERS=`git tag -l --sort=-version:refname | egrep -x '^v[0-9.]+' | head -n1 | sed 's/^v//' | cut -d. -f-2 | egrep -x '[0-9.]+' || echo err-unknown-tag-version` \ + && ls $(MODULE_NAME)/dist/$(MODULE_NAME)-*-py3-none-any.whl | grep -F -e $$BASE_VERS -e $$TAG_VERS # Check to make sure the tests were excluded from the wheel. ! ( unzip -t $(MODULE_NAME)/dist/$(MODULE_NAME)-*-py3-none-any.whl | grep -m1 tests/ ) + # Make sure the py.typed marker file exists. + unzip -t $(MODULE_NAME)/dist/$(MODULE_NAME)-*-py3-none-any.whl | grep -m1 /py.typed # Check to make sure the mlos_bench module has the config directory. [ "$(MODULE_NAME)" != "mlos_bench" ] || unzip -t $(MODULE_NAME)/dist/$(MODULE_NAME)-*-py3-none-any.whl | grep -m1 mlos_bench/config/ - cd $(MODULE_NAME)/dist/tmp && ln -s ../$(MODULE_NAME)-*-py3-none-any.whl $(MODULE_NAME)-latest-py3-none-any.whl # Check to make sure the README contents made it into the package metadata. - unzip -p $(MODULE_NAME)/dist/tmp/$(MODULE_NAME)-latest-py3-none-any.whl */METADATA | egrep -v '^[A-Z][a-zA-Z-]+:' | grep -q -i '^# mlos' + unzip -p $(MODULE_NAME)/dist/$(MODULE_NAME)-*-py3-none-any.whl */METADATA | egrep -v '^[A-Z][a-zA-Z-]+:' | grep -q -i '^# mlos' + # Also check that the they include the URL + unzip -p $(MODULE_NAME)/dist/$(MODULE_NAME)-*-py3-none-any.whl */METADATA | grep -q -e '](https://github.com/microsoft/MLOS/' + # Link it into place + cd $(MODULE_NAME)/dist/tmp && ln -s ../$(MODULE_NAME)-*-py3-none-any.whl $(MODULE_NAME)-latest-py3-none-any.whl -.PHONY: dist-test-env-clean -dist-test-env-clean: +.PHONY: clean-dist-test-env +clean-dist-test-env: # Remove any existing mlos-dist-test environment so we can start clean. conda env remove -y ${CONDA_INFO_LEVEL} -n mlos-dist-test-$(PYTHON_VERSION) 2>/dev/null || true rm -f build/dist-test-env.$(PYTHON_VERSION).build-stamp @@ -305,7 +553,7 @@ build/dist-test-env.$(PYTHON_VERSION).build-stamp: mlos_core/dist/tmp/mlos_core- build/dist-test-env.$(PYTHON_VERSION).build-stamp: mlos_bench/dist/tmp/mlos_bench-latest-py3-none-any.whl build/dist-test-env.$(PYTHON_VERSION).build-stamp: mlos_viz/dist/tmp/mlos_viz-latest-py3-none-any.whl # Create a clean test environment for checking the wheel files. - $(MAKE) dist-test-env-clean + $(MAKE) clean-dist-test-env conda create -y ${CONDA_INFO_LEVEL} -n mlos-dist-test-$(PYTHON_VERSION) python=$(PYTHON_VERS_REQ) # Install some additional dependencies necessary for clean building some of the wheels. conda install -y ${CONDA_INFO_LEVEL} -n mlos-dist-test-$(PYTHON_VERSION) swig libpq @@ -320,7 +568,7 @@ build/dist-test-env.$(PYTHON_VERSION).build-stamp: mlos_viz/dist/tmp/mlos_viz-la touch $@ .PHONY: dist-test -#dist-test: dist-clean +#dist-test: clean-dist dist-test: dist-test-env build/dist-test.$(PYTHON_VERSION).build-stamp # Unnecessary if we invoke it as "python3 -m pytest ..." @@ -334,12 +582,14 @@ build/dist-test.$(PYTHON_VERSION).build-stamp: $(PYTHON_FILES) build/dist-test-e conda run -n mlos-dist-test-$(PYTHON_VERSION) python3 -m pytest mlos_core/mlos_core/tests/spaces/spaces_test.py # Run a simple test that uses the mlos_bench wheel (full tests can be checked with `make test`). conda run -n mlos-dist-test-$(PYTHON_VERSION) python3 -m pytest mlos_bench/mlos_bench/tests/environments/mock_env_test.py + # Run a basic cli tool check. + conda run -n mlos-dist-test-$(PYTHON_VERSION) mlos_bench --help 2>&1 | grep '^usage: mlos_bench ' # Run a simple test that uses the mlos_viz wheel (full tests can be checked with `make test`). # To do that, we need the fixtures from mlos_bench, so make those available too. PYTHONPATH=mlos_bench conda run -n mlos-dist-test-$(PYTHON_VERSION) python3 -m pytest mlos_viz/mlos_viz/tests/test_dabl_plot.py touch $@ -dist-test-clean: dist-test-env-clean +clean-dist-test: clean-dist-test-env rm -f build/dist-test-env.$(PYTHON_VERSION).build-stamp @@ -355,13 +605,30 @@ build/publish-pypi-deps.${CONDA_ENV_NAME}.build-stamp: build/conda-env.${CONDA_E PUBLISH_DEPS := build/publish-pypi-deps.${CONDA_ENV_NAME}.build-stamp PUBLISH_DEPS += build/pytest.${CONDA_ENV_NAME}.build-stamp +PUBLISH_DEPS += mlos_core/dist/tmp/mlos_core-latest.tar.gz +PUBLISH_DEPS += mlos_bench/dist/tmp/mlos_bench-latest.tar.gz +PUBLISH_DEPS += mlos_viz/dist/tmp/mlos_viz-latest.tar.gz +PUBLISH_DEPS += mlos_core/dist/tmp/mlos_core-latest-py3-none-any.whl +PUBLISH_DEPS += mlos_bench/dist/tmp/mlos_bench-latest-py3-none-any.whl +PUBLISH_DEPS += mlos_viz/dist/tmp/mlos_viz-latest-py3-none-any.whl PUBLISH_DEPS += build/dist-test.$(PYTHON_VERSION).build-stamp PUBLISH_DEPS += build/check-doc.build-stamp PUBLISH_DEPS += build/linklint-doc.build-stamp build/publish.${CONDA_ENV_NAME}.%.py.build-stamp: $(PUBLISH_DEPS) - rm -f mlos_*/dist/*.tar.gz - ls mlos_*/dist/*.tar | xargs -I% gzip -k % + # Basic sanity checks on files about to be published. + # Run "make clean-dist && make dist" if these fail. + # Check the tar count. + test `ls -1 mlos_core/dist/*.tar.gz | wc -l` -eq 1 + test `ls -1 mlos_bench/dist/*.tar.gz | wc -l` -eq 1 + test `ls -1 mlos_viz/dist/*.tar.gz | wc -l` -eq 1 + test `ls -1 mlos_*/dist/*.tar.gz | wc -l` -eq 3 + # Check the whl count. + test `ls -1 mlos_core/dist/*.whl | wc -l` -eq 1 + test `ls -1 mlos_bench/dist/*.whl | wc -l` -eq 1 + test `ls -1 mlos_viz/dist/*.whl | wc -l` -eq 1 + test `ls -1 mlos_*/dist/*.whl | wc -l` -eq 3 + # Publish the files to the specified repository. repo_name=`echo "$@" | sed -r -e 's|build/publish\.[^.]+\.||' -e 's|\.py\.build-stamp||'` \ && conda run -n ${CONDA_ENV_NAME} python3 -m twine upload --repository $$repo_name \ mlos_*/dist/mlos*-*.tar.gz mlos_*/dist/mlos*-*.whl @@ -370,13 +637,14 @@ build/publish.${CONDA_ENV_NAME}.%.py.build-stamp: $(PUBLISH_DEPS) publish-pypi: build/publish.${CONDA_ENV_NAME}.pypi.py.build-stamp publish-test-pypi: build/publish.${CONDA_ENV_NAME}.testpypi.py.build-stamp + build/doc-prereqs.${CONDA_ENV_NAME}.build-stamp: build/conda-env.${CONDA_ENV_NAME}.build-stamp build/doc-prereqs.${CONDA_ENV_NAME}.build-stamp: doc/requirements.txt conda run -n ${CONDA_ENV_NAME} pip install -U -r doc/requirements.txt touch $@ .PHONY: doc-prereqs -doc-prereqs: build/doc-prereqs.${CONDA_ENV_NAME}.build-stamp +doc-prereqs: build/doc-prereqs.${CONDA_ENV_NAME}.build-stamp build/unshallow.build-stamp .PHONY: clean-doc-env clean-doc-env: @@ -385,14 +653,16 @@ clean-doc-env: COMMON_DOC_FILES := build/doc-prereqs.${CONDA_ENV_NAME}.build-stamp doc/source/*.rst doc/source/_templates/*.rst doc/source/conf.py -doc/source/api/mlos_core/modules.rst: $(MLOS_CORE_PYTHON_FILES) $(COMMON_DOC_FILES) +doc/source/api/mlos_core/modules.rst: $(FORMAT_PREREQS) $(COMMON_DOC_FILES) +doc/source/api/mlos_core/modules.rst: $(MLOS_CORE_PYTHON_FILES) rm -rf doc/source/api/mlos_core cd doc/ && conda run -n ${CONDA_ENV_NAME} sphinx-apidoc -f -e -M \ -o source/api/mlos_core/ \ ../mlos_core/ \ ../mlos_core/setup.py ../mlos_core/mlos_core/tests/ -doc/source/api/mlos_bench/modules.rst: $(MLOS_BENCH_PYTHON_FILES) $(COMMON_DOC_FILES) +doc/source/api/mlos_bench/modules.rst: $(FORMAT_PREREQS) $(COMMON_DOC_FILES) +doc/source/api/mlos_bench/modules.rst: $(MLOS_BENCH_PYTHON_FILES) rm -rf doc/source/api/mlos_bench cd doc/ && conda run -n ${CONDA_ENV_NAME} sphinx-apidoc -f -e -M \ -o source/api/mlos_bench/ \ @@ -405,7 +675,8 @@ doc/source/api/mlos_bench/modules.rst: $(MLOS_BENCH_PYTHON_FILES) $(COMMON_DOC_F echo ".. literalinclude:: mlos_bench.run.usage.txt" >> doc/source/api/mlos_bench/mlos_bench.run.rst echo " :language: none" >> doc/source/api/mlos_bench/mlos_bench.run.rst -doc/source/api/mlos_viz/modules.rst: $(MLOS_VIZ_PYTHON_FILES) $(COMMON_DOC_FILES) +doc/source/api/mlos_viz/modules.rst: $(FORMAT_PREREQS) $(COMMON_DOC_FILES) +doc/source/api/mlos_viz/modules.rst: $(MLOS_VIZ_PYTHON_FILES) rm -rf doc/source/api/mlos_viz cd doc/ && conda run -n ${CONDA_ENV_NAME} sphinx-apidoc -f -e -M \ -o source/api/mlos_viz/ \ @@ -517,25 +788,42 @@ build/linklint-doc.build-stamp: doc/build/html/index.html doc/build/html/htmlcov @echo "OK" touch $@ + .PHONY: clean-doc clean-doc: rm -rf doc/build/ doc/global/ doc/source/api/ doc/source/generated rm -rf doc/source/source_tree_docs/* +.PHONY: clean-format +clean-format: + rm -f build/black.${CONDA_ENV_NAME}.build-stamp + rm -f build/black.mlos_*.${CONDA_ENV_NAME}.build-stamp + rm -f build/docformatter.${CONDA_ENV_NAME}.build-stamp + rm -f build/docformatter.mlos_*.${CONDA_ENV_NAME}.build-stamp + rm -f build/isort.${CONDA_ENV_NAME}.build-stamp + rm -f build/isort.mlos_*.${CONDA_ENV_NAME}.build-stamp + rm -f build/licenseheaders.${CONDA_ENV_NAME}.build-stamp + rm -f build/licenseheaders-prereqs.${CONDA_ENV_NAME}.build-stamp + .PHONY: clean-check clean-check: rm -f build/pylint.build-stamp rm -f build/pylint.${CONDA_ENV_NAME}.build-stamp rm -f build/pylint.mlos_*.${CONDA_ENV_NAME}.build-stamp rm -f build/mypy.mlos_*.${CONDA_ENV_NAME}.build-stamp + rm -f build/black-check.build-stamp + rm -f build/black-check.${CONDA_ENV_NAME}.build-stamp + rm -f build/black-check.mlos_*.${CONDA_ENV_NAME}.build-stamp + rm -f build/docformatter-check.${CONDA_ENV_NAME}.build-stamp + rm -f build/docformatter-check.mlos_*.${CONDA_ENV_NAME}.build-stamp + rm -f build/isort-check.${CONDA_ENV_NAME}.build-stamp + rm -f build/isort-check.mlos_*.${CONDA_ENV_NAME}.build-stamp rm -f build/pycodestyle.build-stamp rm -f build/pycodestyle.${CONDA_ENV_NAME}.build-stamp rm -f build/pycodestyle.mlos_*.${CONDA_ENV_NAME}.build-stamp rm -f build/pydocstyle.build-stamp rm -f build/pydocstyle.${CONDA_ENV_NAME}.build-stamp rm -f build/pydocstyle.mlos_*.${CONDA_ENV_NAME}.build-stamp - rm -f build/licenseheaders.${CONDA_ENV_NAME}.build-stamp - rm -f build/licenseheaders-prereqs.${CONDA_ENV_NAME}.build-stamp .PHONY: clean-test clean-test: @@ -549,15 +837,16 @@ clean-test: rm -rf junit/ rm -rf test-output.xml -.PHONY: dist-clean -dist-clean: - rm -rf build dist +.PHONY: clean-dist +clean-dist: + rm -rf dist rm -rf mlos_core/build mlos_core/dist rm -rf mlos_bench/build mlos_bench/dist rm -rf mlos_viz/build mlos_viz/dist .PHONY: clean -clean: clean-check clean-test dist-clean clean-doc clean-doc-env dist-test-clean +clean: clean-format clean-check clean-test clean-dist clean-doc clean-doc-env clean-dist-test + rm -f build/unshallow.build-stamp rm -f .*.build-stamp rm -f build/conda-env.build-stamp build/conda-env.*.build-stamp rm -rf mlos_core.egg-info diff --git a/pyproject.toml b/pyproject.toml new file mode 100644 index 00000000000..4380245f794 --- /dev/null +++ b/pyproject.toml @@ -0,0 +1,73 @@ +[tool.black] +line-length = 99 +target-version = ["py38", "py39", "py310", "py311", "py312"] +include = '\.pyi?$' + +[tool.isort] +profile = "black" +py_version = 311 +src_paths = ["mlos_core", "mlos_bench", "mlos_viz"] + +# TODO: Consider switching to pydocstringformatter +[tool.docformatter] +recursive = true +black = true +style = "numpy" +pre-summary-newline = true +close-quotes-on-newline = true + +# TODO: move some other setup.cfg configs here + +[tool.pylint.main] +# Specify a score threshold to be exceeded before program exits with error. +fail-under = 9.9 + +# Make sure public methods are documented. +# See Also: https://github.com/PyCQA/pydocstyle/issues/309#issuecomment-1426642147 +# Also fail on unused imports. +fail-on = [ + "missing-function-docstring", + "unused-import", +] + +# Ignore pylint complaints about an upstream dependency. +ignored-modules = ["ConfigSpace.hyperparameters"] + +# Help inform pylint where to find the project's source code without needing to relyon PYTHONPATH. +#init-hook="from pylint.config import find_pylintrc; import os, sys; sys.path.append(os.path.dirname(find_pylintrc())); from logging import warning; warning(sys.path)" +init-hook = "from logging import warning; warning(sys.path)" + +# Load some extra checkers. +load-plugins = [ + "pylint.extensions.bad_builtin", + "pylint.extensions.code_style", + "pylint.extensions.docparams", + "pylint.extensions.docstyle", + "pylint.extensions.for_any_all", + "pylint.extensions.mccabe", + "pylint.extensions.no_self_use", + "pylint.extensions.private_import", + "pylint.extensions.redefined_loop_name", + "pylint.extensions.redefined_variable_type", + "pylint.extensions.set_membership", + "pylint.extensions.typing", +] + +[tool.pylint.format] +# Maximum number of characters on a single line. +max-line-length = 99 + +[tool.pylint."messages control"] +disable = [ + "fixme", + "no-else-return", + "consider-using-assignment-expr", + "deprecated-typing-alias", # disable for now - only deprecated recently + "docstring-first-line-empty", + "consider-alternative-union-syntax", # disable for now - still supporting python 3.8 + "missing-raises-doc", +] + +[tool.pylint.string] +check-quote-consistency = true +check-str-concat-over-line-jumps = true diff --git a/setup.cfg b/setup.cfg index be6ee9c9c6c..6f948f523a9 100644 --- a/setup.cfg +++ b/setup.cfg @@ -1,33 +1,33 @@ # vim: set ft=dosini: -[bdist_wheel] -universal = 1 [pycodestyle] count = True +# E203: Whitespace before : (black incompatibility) # W503: Line break occurred before a binary operator # W504: Line break occurred after a binary operator -ignore = W503,W504 +ignore = E203,W503,W504 format = pylint # See Also: .editorconfig, .pylintrc -max-line-length = 132 +max-line-length = 99 show-source = True statistics = True [pydocstyle] -# D102: Missing docstring in public method (Avoids inheritence bug. Force checked in .pylintrc instead.) +# D102: Missing docstring in public method (Avoids inheritence bug. Force checked in pylint instead.) # D105: Missing docstring in magic method # D107: Missing docstring in __init__ -# D200: One-line docstring should fit on one line with quotes # D401: First line should be in imperative mood # We have many docstrings that are too long to fit on one line, so we ignore both of these two rules: # D205: 1 blank line required between summary line and description # D400: First line should end with a period -add_ignore = D102,D105,D107,D200,D401,D205,D400 +add_ignore = D102,D105,D107,D401,D205,D400 match = .+(? Date: Mon, 22 Jul 2024 19:56:03 +0000 Subject: [PATCH 07/13] apply formatters selectively --- mlos_bench/mlos_bench/launcher.py | 354 +++++++++++------- .../mlos_bench/optimizers/base_optimizer.py | 146 ++++---- .../mlos_bench/schedulers/base_scheduler.py | 171 +++++---- .../tests/launcher_parse_args_test.py | 199 +++++----- 4 files changed, 490 insertions(+), 380 deletions(-) diff --git a/mlos_bench/mlos_bench/launcher.py b/mlos_bench/mlos_bench/launcher.py index 7b2bf506235..e928a983d1a 100644 --- a/mlos_bench/mlos_bench/launcher.py +++ b/mlos_bench/mlos_bench/launcher.py @@ -3,8 +3,8 @@ # Licensed under the MIT License. # """ -A helper class to load the configuration files, parse the command line parameters, -and instantiate the main components of mlos_bench system. +A helper class to load the configuration files, parse the command line parameters, and +instantiate the main components of mlos_bench system. It is used in `mlos_bench.run` module to run the benchmark/optimizer from the command line. @@ -13,34 +13,26 @@ import argparse import logging import sys - from typing import Any, Dict, Iterable, List, Optional, Tuple from mlos_bench.config.schemas import ConfigSchema from mlos_bench.dict_templater import DictTemplater -from mlos_bench.util import try_parse_val - -from mlos_bench.tunables.tunable import TunableValue -from mlos_bench.tunables.tunable_groups import TunableGroups from mlos_bench.environments.base_environment import Environment - from mlos_bench.optimizers.base_optimizer import Optimizer from mlos_bench.optimizers.mock_optimizer import MockOptimizer from mlos_bench.optimizers.one_shot_optimizer import OneShotOptimizer - -from mlos_bench.storage.base_storage import Storage - +from mlos_bench.schedulers.base_scheduler import Scheduler from mlos_bench.services.base_service import Service -from mlos_bench.services.local.local_exec import LocalExecService from mlos_bench.services.config_persistence import ConfigPersistenceService - -from mlos_bench.schedulers.base_scheduler import Scheduler - +from mlos_bench.services.local.local_exec import LocalExecService from mlos_bench.services.types.config_loader_type import SupportsConfigLoading - +from mlos_bench.storage.base_storage import Storage +from mlos_bench.tunables.tunable import TunableValue +from mlos_bench.tunables.tunable_groups import TunableGroups +from mlos_bench.util import try_parse_val _LOG_LEVEL = logging.INFO -_LOG_FORMAT = '%(asctime)s %(filename)s:%(lineno)d %(funcName)s %(levelname)s %(message)s' +_LOG_FORMAT = "%(asctime)s %(filename)s:%(lineno)d %(funcName)s %(levelname)s %(message)s" logging.basicConfig(level=_LOG_LEVEL, format=_LOG_FORMAT) _LOG = logging.getLogger(__name__) @@ -48,9 +40,7 @@ class Launcher: # pylint: disable=too-few-public-methods,too-many-instance-attributes - """ - Command line launcher for mlos_bench and mlos_core. - """ + """Command line launcher for mlos_bench and mlos_core.""" def __init__(self, description: str, long_text: str = "", argv: Optional[List[str]] = None): # pylint: disable=too-many-statements @@ -62,8 +52,7 @@ def __init__(self, description: str, long_text: str = "", argv: Optional[List[st For additional details, please see the website or the README.md files in the source tree: """ - parser = argparse.ArgumentParser(description=f"{description} : {long_text}", - epilog=epilog) + parser = argparse.ArgumentParser(description=f"{description} : {long_text}", epilog=epilog) (args, path_args, args_rest) = self._parse_args(parser, argv) # Bootstrap config loader: command line takes priority. @@ -102,8 +91,11 @@ def __init__(self, description: str, long_text: str = "", argv: Optional[List[st excluded_cli_args = path_args + ["teardown"] # Include (almost) any item from the cli config file that either isn't in the cli # args at all or whose cli arg is missing. - cli_config_args = {key: val for (key, val) in config.items() - if (key not in args_dict or args_dict[key] is None) and key not in excluded_cli_args} + cli_config_args = { + key: val + for (key, val) in config.items() + if (key not in args_dict or args_dict[key] is None) and key not in excluded_cli_args + } self.global_config = self._load_config( args_globals=config.get("globals", []) + (args.globals or []), @@ -115,13 +107,13 @@ def __init__(self, description: str, long_text: str = "", argv: Optional[List[st # experiment_id is generally taken from --globals files, but we also allow overriding it on the CLI. # It's useful to keep it there explicitly mostly for the --help output. if args.experiment_id: - self.global_config['experiment_id'] = args.experiment_id + self.global_config["experiment_id"] = args.experiment_id # trial_config_repeat_count is a scheduler property but it's convenient to set it via command line if args.trial_config_repeat_count: self.global_config["trial_config_repeat_count"] = args.trial_config_repeat_count # Ensure that the trial_id is present since it gets used by some other # configs but is typically controlled by the run optimize loop. - self.global_config.setdefault('trial_id', 1) + self.global_config.setdefault("trial_id", 1) self.global_config = DictTemplater(self.global_config).expand_vars(use_os_env=True) assert isinstance(self.global_config, dict) @@ -129,24 +121,29 @@ def __init__(self, description: str, long_text: str = "", argv: Optional[List[st # --service cli args should override the config file values. service_files: List[str] = config.get("services", []) + (args.service or []) assert isinstance(self._parent_service, SupportsConfigLoading) - self._parent_service = self._parent_service.load_services(service_files, self.global_config, self._parent_service) + self._parent_service = self._parent_service.load_services( + service_files, self.global_config, self._parent_service + ) env_path = args.environment or config.get("environment") if not env_path: _LOG.error("No environment config specified.") - parser.error("At least the Environment config must be specified." + - " Run `mlos_bench --help` and consult `README.md` for more info.") + parser.error( + "At least the Environment config must be specified." + + " Run `mlos_bench --help` and consult `README.md` for more info." + ) self.root_env_config = self._config_loader.resolve_path(env_path) self.environment: Environment = self._config_loader.load_environment( - self.root_env_config, TunableGroups(), self.global_config, service=self._parent_service) + self.root_env_config, TunableGroups(), self.global_config, service=self._parent_service + ) _LOG.info("Init environment: %s", self.environment) # NOTE: Init tunable values *after* the Environment, but *before* the Optimizer self.tunables = self._init_tunable_values( args.random_init or config.get("random_init", False), config.get("random_seed") if args.random_seed is None else args.random_seed, - config.get("tunable_values", []) + (args.tunable_values or []) + config.get("tunable_values", []) + (args.tunable_values or []), ) _LOG.info("Init tunables: %s", self.tunables) @@ -156,120 +153,176 @@ def __init__(self, description: str, long_text: str = "", argv: Optional[List[st self.storage = self._load_storage(args.storage or config.get("storage")) _LOG.info("Init storage: %s", self.storage) - self.teardown: bool = bool(args.teardown) if args.teardown is not None else bool(config.get("teardown", True)) + self.teardown: bool = ( + bool(args.teardown) + if args.teardown is not None + else bool(config.get("teardown", True)) + ) self.scheduler = self._load_scheduler(args.scheduler or config.get("scheduler")) _LOG.info("Init scheduler: %s", self.scheduler) @property def config_loader(self) -> ConfigPersistenceService: - """ - Get the config loader service. - """ + """Get the config loader service.""" return self._config_loader @property def service(self) -> Service: - """ - Get the parent service. - """ + """Get the parent service.""" return self._parent_service @staticmethod - def _parse_args(parser: argparse.ArgumentParser, - argv: Optional[List[str]]) -> Tuple[argparse.Namespace, List[str], List[str]]: - """ - Parse the command line arguments. - """ + def _parse_args( + parser: argparse.ArgumentParser, argv: Optional[List[str]] + ) -> Tuple[argparse.Namespace, List[str], List[str]]: + """Parse the command line arguments.""" path_args = [] parser.add_argument( - '--config', required=False, - help='Main JSON5 configuration file. Its keys are the same as the' + - ' command line options and can be overridden by the latter.\n' + - '\n' + - ' See the `mlos_bench/config/` tree at https://github.com/microsoft/MLOS/ ' + - ' for additional config examples for this and other arguments.') - path_args.append('config') + "--config", + required=False, + help="Main JSON5 configuration file. Its keys are the same as the" + + " command line options and can be overridden by the latter.\n" + + "\n" + + " See the `mlos_bench/config/` tree at https://github.com/microsoft/MLOS/ " + + " for additional config examples for this and other arguments.", + ) + path_args.append("config") parser.add_argument( - '--log_file', '--log-file', required=False, - help='Path to the log file. Use stdout if omitted.') - path_args.append('log_file') + "--log_file", + "--log-file", + required=False, + help="Path to the log file. Use stdout if omitted.", + ) + path_args.append("log_file") parser.add_argument( - '--log_level', '--log-level', required=False, type=str, - help=f'Logging level. Default is {logging.getLevelName(_LOG_LEVEL)}.' + - ' Set to DEBUG for debug, WARNING for warnings only.') + "--log_level", + "--log-level", + required=False, + type=str, + help=f"Logging level. Default is {logging.getLevelName(_LOG_LEVEL)}." + + " Set to DEBUG for debug, WARNING for warnings only.", + ) parser.add_argument( - '--config_path', '--config-path', '--config-paths', '--config_paths', - nargs="+", action='extend', required=False, - help='One or more locations of JSON config files.') - path_args.append('config_path') - path_args.append('config_paths') + "--config_path", + "--config-path", + "--config-paths", + "--config_paths", + nargs="+", + action="extend", + required=False, + help="One or more locations of JSON config files.", + ) + path_args.append("config_path") + path_args.append("config_paths") parser.add_argument( - '--service', '--services', - nargs='+', action='extend', required=False, - help='Path to JSON file with the configuration of the service(s) for environment(s) to use.') - path_args.append('service') - path_args.append('services') + "--service", + "--services", + nargs="+", + action="extend", + required=False, + help="Path to JSON file with the configuration of the service(s) for environment(s) to use.", + ) + path_args.append("service") + path_args.append("services") parser.add_argument( - '--environment', required=False, - help='Path to JSON file with the configuration of the benchmarking environment(s).') - path_args.append('environment') + "--environment", + required=False, + help="Path to JSON file with the configuration of the benchmarking environment(s).", + ) + path_args.append("environment") parser.add_argument( - '--optimizer', required=False, - help='Path to the optimizer configuration file. If omitted, run' + - ' a single trial with default (or specified in --tunable_values).') - path_args.append('optimizer') + "--optimizer", + required=False, + help="Path to the optimizer configuration file. If omitted, run" + + " a single trial with default (or specified in --tunable_values).", + ) + path_args.append("optimizer") parser.add_argument( - '--trial_config_repeat_count', '--trial-config-repeat-count', required=False, type=int, - help='Number of times to repeat each config. Default is 1 trial per config, though more may be advised.') + "--trial_config_repeat_count", + "--trial-config-repeat-count", + required=False, + type=int, + help="Number of times to repeat each config. Default is 1 trial per config, though more may be advised.", + ) parser.add_argument( - '--scheduler', required=False, - help='Path to the scheduler configuration file. By default, use' + - ' a single worker synchronous scheduler.') - path_args.append('scheduler') + "--scheduler", + required=False, + help="Path to the scheduler configuration file. By default, use" + + " a single worker synchronous scheduler.", + ) + path_args.append("scheduler") parser.add_argument( - '--storage', required=False, - help='Path to the storage configuration file.' + - ' If omitted, use the ephemeral in-memory SQL storage.') - path_args.append('storage') + "--storage", + required=False, + help="Path to the storage configuration file." + + " If omitted, use the ephemeral in-memory SQL storage.", + ) + path_args.append("storage") parser.add_argument( - '--random_init', '--random-init', required=False, default=False, - dest='random_init', action='store_true', - help='Initialize tunables with random values. (Before applying --tunable_values).') + "--random_init", + "--random-init", + required=False, + default=False, + dest="random_init", + action="store_true", + help="Initialize tunables with random values. (Before applying --tunable_values).", + ) parser.add_argument( - '--random_seed', '--random-seed', required=False, type=int, - help='Seed to use with --random_init') + "--random_seed", + "--random-seed", + required=False, + type=int, + help="Seed to use with --random_init", + ) parser.add_argument( - '--tunable_values', '--tunable-values', nargs="+", action='extend', required=False, - help='Path to one or more JSON files that contain values of the tunable' + - ' parameters. This can be used for a single trial (when no --optimizer' + - ' is specified) or as default values for the first run in optimization.') - path_args.append('tunable_values') + "--tunable_values", + "--tunable-values", + nargs="+", + action="extend", + required=False, + help="Path to one or more JSON files that contain values of the tunable" + + " parameters. This can be used for a single trial (when no --optimizer" + + " is specified) or as default values for the first run in optimization.", + ) + path_args.append("tunable_values") parser.add_argument( - '--globals', nargs="+", action='extend', required=False, - help='Path to one or more JSON files that contain additional' + - ' [private] parameters of the benchmarking environment.') - path_args.append('globals') + "--globals", + nargs="+", + action="extend", + required=False, + help="Path to one or more JSON files that contain additional" + + " [private] parameters of the benchmarking environment.", + ) + path_args.append("globals") parser.add_argument( - '--no_teardown', '--no-teardown', required=False, default=None, - dest='teardown', action='store_false', - help='Disable teardown of the environment after the benchmark.') + "--no_teardown", + "--no-teardown", + required=False, + default=None, + dest="teardown", + action="store_false", + help="Disable teardown of the environment after the benchmark.", + ) parser.add_argument( - '--experiment_id', '--experiment-id', required=False, default=None, + "--experiment_id", + "--experiment-id", + required=False, + default=None, help=""" Experiment ID to use for the benchmark. If omitted, the value from the --cli config or --globals is used. @@ -279,7 +332,7 @@ def _parse_args(parser: argparse.ArgumentParser, changes are made to config files, scripts, versions, etc. This is left as a manual operation as detection of what is "incompatible" is not easily automatable across systems. - """ + """, ) # By default we use the command line arguments, but allow the caller to @@ -292,9 +345,7 @@ def _parse_args(parser: argparse.ArgumentParser, @staticmethod def _try_parse_extra_args(cmdline: Iterable[str]) -> Dict[str, TunableValue]: - """ - Helper function to parse global key/value pairs from the command line. - """ + """Helper function to parse global key/value pairs from the command line.""" _LOG.debug("Extra args: %s", cmdline) config: Dict[str, TunableValue] = {} @@ -321,16 +372,18 @@ def _try_parse_extra_args(cmdline: Iterable[str]) -> Dict[str, TunableValue]: _LOG.debug("Parsed config: %s", config) return config - def _load_config(self, *, - args_globals: Iterable[str], - config_path: Iterable[str], - args_rest: Iterable[str], - global_config: Dict[str, Any]) -> Dict[str, Any]: - """ - Get key/value pairs of the global configuration parameters - from the specified config files (if any) and command line arguments. + def _load_config( + self, + *, + args_globals: Iterable[str], + config_path: Iterable[str], + args_rest: Iterable[str], + global_config: Dict[str, Any], + ) -> Dict[str, Any]: + """Get key/value pairs of the global configuration parameters from the specified + config files (if any) and command line arguments. """ - for config_file in (args_globals or []): + for config_file in args_globals or []: conf = self._config_loader.load_config(config_file, ConfigSchema.GLOBALS) assert isinstance(conf, dict) global_config.update(conf) @@ -339,19 +392,21 @@ def _load_config(self, *, global_config["config_path"] = config_path return global_config - def _init_tunable_values(self, random_init: bool, seed: Optional[int], - args_tunables: Optional[str]) -> TunableGroups: - """ - Initialize the tunables and load key/value pairs of the tunable values - from given JSON files, if specified. + def _init_tunable_values( + self, random_init: bool, seed: Optional[int], args_tunables: Optional[str] + ) -> TunableGroups: + """Initialize the tunables and load key/value pairs of the tunable values from + given JSON files, if specified. """ tunables = self.environment.tunable_params _LOG.debug("Init tunables: default = %s", tunables) if random_init: tunables = MockOptimizer( - tunables=tunables, service=None, - config={"start_with_defaults": False, "seed": seed}).suggest() + tunables=tunables, + service=None, + config={"start_with_defaults": False, "seed": seed}, + ).suggest() _LOG.debug("Init tunables: random = %s", tunables) if args_tunables is not None: @@ -365,50 +420,62 @@ def _init_tunable_values(self, random_init: bool, seed: Optional[int], def _load_optimizer(self, args_optimizer: Optional[str]) -> Optimizer: """ - Instantiate the Optimizer object from JSON config file, if specified - in the --optimizer command line option. If config file not specified, - create a one-shot optimizer to run a single benchmark trial. + Instantiate the Optimizer object from JSON config file, if specified in the + --optimizer command line option. + + If config file not specified, create a one-shot optimizer to run a single + benchmark trial. """ if args_optimizer is None: # global_config may contain additional properties, so we need to # strip those out before instantiating the basic oneshot optimizer. - config = {key: val for key, val in self.global_config.items() if key in OneShotOptimizer.BASE_SUPPORTED_CONFIG_PROPS} - return OneShotOptimizer( - self.tunables, config=config, service=self._parent_service) + config = { + key: val + for key, val in self.global_config.items() + if key in OneShotOptimizer.BASE_SUPPORTED_CONFIG_PROPS + } + return OneShotOptimizer(self.tunables, config=config, service=self._parent_service) class_config = self._config_loader.load_config(args_optimizer, ConfigSchema.OPTIMIZER) assert isinstance(class_config, Dict) - optimizer = self._config_loader.build_optimizer(tunables=self.tunables, - service=self._parent_service, - config=class_config, - global_config=self.global_config) + optimizer = self._config_loader.build_optimizer( + tunables=self.tunables, + service=self._parent_service, + config=class_config, + global_config=self.global_config, + ) return optimizer def _load_storage(self, args_storage: Optional[str]) -> Storage: """ - Instantiate the Storage object from JSON file provided in the --storage - command line parameter. If omitted, create an ephemeral in-memory SQL - storage instead. + Instantiate the Storage object from JSON file provided in the --storage command + line parameter. + + If omitted, create an ephemeral in-memory SQL storage instead. """ if args_storage is None: # pylint: disable=import-outside-toplevel from mlos_bench.storage.sql.storage import SqlStorage - return SqlStorage(service=self._parent_service, - config={ - "drivername": "sqlite", - "database": ":memory:", - "lazy_schema_create": True, - }) + + return SqlStorage( + service=self._parent_service, + config={ + "drivername": "sqlite", + "database": ":memory:", + "lazy_schema_create": True, + }, + ) class_config = self._config_loader.load_config(args_storage, ConfigSchema.STORAGE) assert isinstance(class_config, Dict) - storage = self._config_loader.build_storage(service=self._parent_service, - config=class_config, - global_config=self.global_config) + storage = self._config_loader.build_storage( + service=self._parent_service, config=class_config, global_config=self.global_config + ) return storage def _load_scheduler(self, args_scheduler: Optional[str]) -> Scheduler: """ Instantiate the Scheduler object from JSON file provided in the --scheduler command line parameter. + Create a simple synchronous single-threaded scheduler if omitted. """ # Set `teardown` for scheduler only to prevent conflicts with other configs. @@ -417,6 +484,7 @@ def _load_scheduler(self, args_scheduler: Optional[str]) -> Scheduler: if args_scheduler is None: # pylint: disable=import-outside-toplevel from mlos_bench.schedulers.sync_scheduler import SyncScheduler + return SyncScheduler( # All config values can be overridden from global config config={ diff --git a/mlos_bench/mlos_bench/optimizers/base_optimizer.py b/mlos_bench/mlos_bench/optimizers/base_optimizer.py index 89ee6c9fd13..7d14cb68bb7 100644 --- a/mlos_bench/mlos_bench/optimizers/base_optimizer.py +++ b/mlos_bench/mlos_bench/optimizers/base_optimizer.py @@ -2,34 +2,32 @@ # Copyright (c) Microsoft Corporation. # Licensed under the MIT License. # -""" -Base class for an interface between the benchmarking framework -and mlos_core optimizers. +"""Base class for an interface between the benchmarking framework and mlos_core +optimizers. """ import logging from abc import ABCMeta, abstractmethod -from distutils.util import strtobool # pylint: disable=deprecated-module - +from distutils.util import strtobool # pylint: disable=deprecated-module from types import TracebackType from typing import Dict, Optional, Sequence, Tuple, Type, Union -from typing_extensions import Literal from ConfigSpace import ConfigurationSpace +from typing_extensions import Literal from mlos_bench.config.schemas import ConfigSchema -from mlos_bench.services.base_service import Service from mlos_bench.environments.status import Status +from mlos_bench.optimizers.convert_configspace import tunable_groups_to_configspace +from mlos_bench.services.base_service import Service from mlos_bench.tunables.tunable import TunableValue from mlos_bench.tunables.tunable_groups import TunableGroups -from mlos_bench.optimizers.convert_configspace import tunable_groups_to_configspace _LOG = logging.getLogger(__name__) -class Optimizer(metaclass=ABCMeta): # pylint: disable=too-many-instance-attributes - """ - An abstract interface between the benchmarking framework and mlos_core optimizers. +class Optimizer(metaclass=ABCMeta): # pylint: disable=too-many-instance-attributes + """An abstract interface between the benchmarking framework and mlos_core + optimizers. """ # See Also: mlos_bench/mlos_bench/config/schemas/optimizers/optimizer-schema.json @@ -40,13 +38,16 @@ class Optimizer(metaclass=ABCMeta): # pylint: disable=too-many-instance-attr "start_with_defaults", } - def __init__(self, - tunables: TunableGroups, - config: dict, - global_config: Optional[dict] = None, - service: Optional[Service] = None): + def __init__( + self, + tunables: TunableGroups, + config: dict, + global_config: Optional[dict] = None, + service: Optional[Service] = None, + ): """ - Create a new optimizer for the given configuration space defined by the tunables. + Create a new optimizer for the given configuration space defined by the + tunables. Parameters ---------- @@ -68,19 +69,20 @@ def __init__(self, self._seed = int(config.get("seed", 42)) self._in_context = False - experiment_id = self._global_config.get('experiment_id') + experiment_id = self._global_config.get("experiment_id") self.experiment_id = str(experiment_id).strip() if experiment_id else None self._iter = 0 # If False, use the optimizer to suggest the initial configuration; # if True (default), use the already initialized values for the first iteration. self._start_with_defaults: bool = bool( - strtobool(str(self._config.pop('start_with_defaults', True)))) - self._max_iter = int(self._config.pop('max_suggestions', 100)) + strtobool(str(self._config.pop("start_with_defaults", True))) + ) + self._max_iter = int(self._config.pop("max_suggestions", 100)) - opt_targets: Dict[str, str] = self._config.pop('optimization_targets', {'score': 'min'}) + opt_targets: Dict[str, str] = self._config.pop("optimization_targets", {"score": "min"}) self._opt_targets: Dict[str, Literal[1, -1]] = {} - for (opt_target, opt_dir) in opt_targets.items(): + for opt_target, opt_dir in opt_targets.items(): if opt_dir == "min": self._opt_targets[opt_target] = 1 elif opt_dir == "max": @@ -89,10 +91,9 @@ def __init__(self, raise ValueError(f"Invalid optimization direction: {opt_dir} for {opt_target}") def _validate_json_config(self, config: dict) -> None: - """ - Reconstructs a basic json config that this class might have been - instantiated from in order to validate configs provided outside the - file loading mechanism. + """Reconstructs a basic json config that this class might have been instantiated + from in order to validate configs provided outside the file loading + mechanism. """ json_config: dict = { "class": self.__class__.__module__ + "." + self.__class__.__name__, @@ -108,21 +109,20 @@ def __repr__(self) -> str: ) return f"{self.name}({opt_targets},config={self._config})" - def __enter__(self) -> 'Optimizer': - """ - Enter the optimizer's context. - """ + def __enter__(self) -> "Optimizer": + """Enter the optimizer's context.""" _LOG.debug("Optimizer START :: %s", self) assert not self._in_context self._in_context = True return self - def __exit__(self, ex_type: Optional[Type[BaseException]], - ex_val: Optional[BaseException], - ex_tb: Optional[TracebackType]) -> Literal[False]: - """ - Exit the context of the optimizer. - """ + def __exit__( + self, + ex_type: Optional[Type[BaseException]], + ex_val: Optional[BaseException], + ex_tb: Optional[TracebackType], + ) -> Literal[False]: + """Exit the context of the optimizer.""" if ex_val is None: _LOG.debug("Optimizer END :: %s", self) else: @@ -157,15 +157,14 @@ def max_iterations(self) -> int: @property def seed(self) -> int: - """ - The random seed for the optimizer. - """ + """The random seed for the optimizer.""" return self._seed @property def start_with_defaults(self) -> bool: """ Return True if the optimizer should start with the default values. + Note: This parameter is mutable and will be reset to False after the defaults are first suggested. """ @@ -201,16 +200,16 @@ def config_space(self) -> ConfigurationSpace: @property def name(self) -> str: """ - The name of the optimizer. We save this information in - mlos_bench storage to track the source of each configuration. + The name of the optimizer. + + We save this information in mlos_bench storage to track the source of each + configuration. """ return self.__class__.__name__ @property - def targets(self) -> Dict[str, Literal['min', 'max']]: - """ - A dictionary of {target: direction} of optimization targets. - """ + def targets(self) -> Dict[str, Literal["min", "max"]]: + """A dictionary of {target: direction} of optimization targets.""" return { opt_target: "min" if opt_dir == 1 else "max" for (opt_target, opt_dir) in self._opt_targets.items() @@ -218,16 +217,18 @@ def targets(self) -> Dict[str, Literal['min', 'max']]: @property def supports_preload(self) -> bool: - """ - Return True if the optimizer supports pre-loading the data from previous experiments. + """Return True if the optimizer supports pre-loading the data from previous + experiments. """ return True @abstractmethod - def bulk_register(self, - configs: Sequence[dict], - scores: Sequence[Optional[Dict[str, TunableValue]]], - status: Optional[Sequence[Status]] = None) -> bool: + def bulk_register( + self, + configs: Sequence[dict], + scores: Sequence[Optional[Dict[str, TunableValue]]], + status: Optional[Sequence[Status]] = None, + ) -> bool: """ Pre-load the optimizer with the bulk data from previous experiments. @@ -245,8 +246,12 @@ def bulk_register(self, is_not_empty : bool True if there is data to register, false otherwise. """ - _LOG.info("Update the optimizer with: %d configs, %d scores, %d status values", - len(configs or []), len(scores or []), len(status or [])) + _LOG.info( + "Update the optimizer with: %d configs, %d scores, %d status values", + len(configs or []), + len(scores or []), + len(status or []), + ) if len(configs or []) != len(scores or []): raise ValueError("Numbers of configs and scores do not match.") if status is not None and len(configs or []) != len(status or []): @@ -259,9 +264,8 @@ def bulk_register(self, def suggest(self) -> TunableGroups: """ - Generate the next suggestion. - Base class' implementation increments the iteration count - and returns the current values of the tunables. + Generate the next suggestion. Base class' implementation increments the + iteration count and returns the current values of the tunables. Returns ------- @@ -275,8 +279,12 @@ def suggest(self) -> TunableGroups: return self._tunables.copy() @abstractmethod - def register(self, tunables: TunableGroups, status: Status, - score: Optional[Dict[str, TunableValue]] = None) -> Optional[Dict[str, float]]: + def register( + self, + tunables: TunableGroups, + status: Status, + score: Optional[Dict[str, TunableValue]] = None, + ) -> Optional[Dict[str, float]]: """ Register the observation for the given configuration. @@ -297,18 +305,19 @@ def register(self, tunables: TunableGroups, status: Status, Benchmark scores extracted (and possibly transformed) from the dataframe that's being MINIMIZED. """ - _LOG.info("Iteration %d :: Register: %s = %s score: %s", - self._iter, tunables, status, score) + _LOG.info( + "Iteration %d :: Register: %s = %s score: %s", self._iter, tunables, status, score + ) if status.is_succeeded() == (score is None): # XOR raise ValueError("Status and score must be consistent.") return self._get_scores(status, score) - def _get_scores(self, status: Status, - scores: Optional[Union[Dict[str, TunableValue], Dict[str, float]]] - ) -> Optional[Dict[str, float]]: + def _get_scores( + self, status: Status, scores: Optional[Union[Dict[str, TunableValue], Dict[str, float]]] + ) -> Optional[Dict[str, float]]: """ - Extract a scalar benchmark score from the dataframe. - Change the sign if we are maximizing. + Extract a scalar benchmark score from the dataframe. Change the sign if we are + maximizing. Parameters ---------- @@ -332,7 +341,7 @@ def _get_scores(self, status: Status, assert scores is not None target_metrics: Dict[str, float] = {} - for (opt_target, opt_dir) in self._opt_targets.items(): + for opt_target, opt_dir in self._opt_targets.items(): val = scores[opt_target] assert val is not None target_metrics[opt_target] = float(val) * opt_dir @@ -342,12 +351,15 @@ def _get_scores(self, status: Status, def not_converged(self) -> bool: """ Return True if not converged, False otherwise. + Base implementation just checks the iteration count. """ return self._iter < self._max_iter @abstractmethod - def get_best_observation(self) -> Union[Tuple[Dict[str, float], TunableGroups], Tuple[None, None]]: + def get_best_observation( + self, + ) -> Union[Tuple[Dict[str, float], TunableGroups], Tuple[None, None]]: """ Get the best observation so far. diff --git a/mlos_bench/mlos_bench/schedulers/base_scheduler.py b/mlos_bench/mlos_bench/schedulers/base_scheduler.py index 1c974da957d..cadd61fc9f4 100644 --- a/mlos_bench/mlos_bench/schedulers/base_scheduler.py +++ b/mlos_bench/mlos_bench/schedulers/base_scheduler.py @@ -2,20 +2,17 @@ # Copyright (c) Microsoft Corporation. # Licensed under the MIT License. # -""" -Base class for the optimization loop scheduling policies. -""" +"""Base class for the optimization loop scheduling policies.""" import json import logging -from datetime import datetime - from abc import ABCMeta, abstractmethod +from datetime import datetime from types import TracebackType from typing import Any, Dict, Optional, Tuple, Type -from typing_extensions import Literal from pytz import UTC +from typing_extensions import Literal from mlos_bench.config.schemas import ConfigSchema from mlos_bench.environments.base_environment import Environment @@ -29,22 +26,23 @@ class Scheduler(metaclass=ABCMeta): # pylint: disable=too-many-instance-attributes - """ - Base class for the optimization loop scheduling policies. - """ - - def __init__(self, *, - config: Dict[str, Any], - global_config: Dict[str, Any], - environment: Environment, - optimizer: Optimizer, - storage: Storage, - root_env_config: str): + """Base class for the optimization loop scheduling policies.""" + + def __init__( + self, + *, + config: Dict[str, Any], + global_config: Dict[str, Any], + environment: Environment, + optimizer: Optimizer, + storage: Storage, + root_env_config: str, + ): """ - Create a new instance of the scheduler. The constructor of this - and the derived classes is called by the persistence service - after reading the class JSON configuration. Other objects like - the Environment and Optimizer are provided by the Launcher. + Create a new instance of the scheduler. The constructor of this and the derived + classes is called by the persistence service after reading the class JSON + configuration. Other objects like the Environment and Optimizer are provided by + the Launcher. Parameters ---------- @@ -62,8 +60,9 @@ def __init__(self, *, Path to the root environment configuration. """ self.global_config = global_config - config = merge_parameters(dest=config.copy(), source=global_config, - required_keys=["experiment_id", "trial_id"]) + config = merge_parameters( + dest=config.copy(), source=global_config, required_keys=["experiment_id", "trial_id"] + ) self._validate_json_config(config) self._experiment_id = config["experiment_id"].strip() @@ -74,7 +73,9 @@ def __init__(self, *, self._trial_config_repeat_count = int(config.get("trial_config_repeat_count", 1)) if self._trial_config_repeat_count <= 0: - raise ValueError(f"Invalid trial_config_repeat_count: {self._trial_config_repeat_count}") + raise ValueError( + f"Invalid trial_config_repeat_count: {self._trial_config_repeat_count}" + ) self._do_teardown = bool(config.get("teardown", True)) @@ -88,10 +89,9 @@ def __init__(self, *, _LOG.debug("Scheduler instantiated: %s :: %s", self, config) def _validate_json_config(self, config: dict) -> None: - """ - Reconstructs a basic json config that this class might have been - instantiated from in order to validate configs provided outside the - file loading mechanism. + """Reconstructs a basic json config that this class might have been instantiated + from in order to validate configs provided outside the file loading + mechanism. """ json_config: dict = { "class": self.__class__.__module__ + "." + self.__class__.__name__, @@ -107,7 +107,9 @@ def trial_config_repeat_count(self) -> int: @property def max_trials(self) -> int: - """Gets the maximum number of trials to run for a given experiment, or -1 for no limit.""" + """Gets the maximum number of trials to run for a given experiment, or -1 for no + limit. + """ return self._max_trials def __repr__(self) -> str: @@ -121,10 +123,8 @@ def __repr__(self) -> str: """ return self.__class__.__name__ - def __enter__(self) -> 'Scheduler': - """ - Enter the scheduler's context. - """ + def __enter__(self) -> "Scheduler": + """Enter the scheduler's context.""" _LOG.debug("Scheduler START :: %s", self) assert self.experiment is None self.environment.__enter__() @@ -143,13 +143,13 @@ def __enter__(self) -> 'Scheduler': ).__enter__() return self - def __exit__(self, - ex_type: Optional[Type[BaseException]], - ex_val: Optional[BaseException], - ex_tb: Optional[TracebackType]) -> Literal[False]: - """ - Exit the context of the scheduler. - """ + def __exit__( + self, + ex_type: Optional[Type[BaseException]], + ex_val: Optional[BaseException], + ex_tb: Optional[TracebackType], + ) -> Literal[False]: + """Exit the context of the scheduler.""" if ex_val is None: _LOG.debug("Scheduler END :: %s", self) else: @@ -164,12 +164,14 @@ def __exit__(self, @abstractmethod def start(self) -> None: - """ - Start the optimization loop. - """ + """Start the optimization loop.""" assert self.experiment is not None - _LOG.info("START: Experiment: %s Env: %s Optimizer: %s", - self.experiment, self.environment, self.optimizer) + _LOG.info( + "START: Experiment: %s Env: %s Optimizer: %s", + self.experiment, + self.environment, + self.optimizer, + ) if _LOG.isEnabledFor(logging.INFO): _LOG.info("Root Environment:\n%s", self.environment.pprint()) @@ -180,6 +182,7 @@ def start(self) -> None: def teardown(self) -> None: """ Tear down the environment. + Call it after the completion of the `.start()` in the scheduler context. """ assert self.experiment is not None @@ -187,17 +190,13 @@ def teardown(self) -> None: self.environment.teardown() def get_best_observation(self) -> Tuple[Optional[Dict[str, float]], Optional[TunableGroups]]: - """ - Get the best observation from the optimizer. - """ + """Get the best observation from the optimizer.""" (best_score, best_config) = self.optimizer.get_best_observation() _LOG.info("Env: %s best score: %s", self.environment, best_score) return (best_score, best_config) def load_config(self, config_id: int) -> TunableGroups: - """ - Load the existing tunable configuration from the storage. - """ + """Load the existing tunable configuration from the storage.""" assert self.experiment is not None tunable_values = self.experiment.load_tunable_config(config_id) tunables = self.environment.tunable_params.assign(tunable_values) @@ -208,9 +207,11 @@ def load_config(self, config_id: int) -> TunableGroups: def _schedule_new_optimizer_suggestions(self) -> bool: """ - Optimizer part of the loop. Load the results of the executed trials - into the optimizer, suggest new configurations, and add them to the queue. - Return True if optimization is not over, False otherwise. + Optimizer part of the loop. + + Load the results of the executed trials into the optimizer, suggest new + configurations, and add them to the queue. Return True if optimization is not + over, False otherwise. """ assert self.experiment is not None (trial_ids, configs, scores, status) = self.experiment.load(self._last_trial_id) @@ -226,33 +227,38 @@ def _schedule_new_optimizer_suggestions(self) -> bool: return not_done def schedule_trial(self, tunables: TunableGroups) -> None: - """ - Add a configuration to the queue of trials. - """ + """Add a configuration to the queue of trials.""" for repeat_i in range(1, self._trial_config_repeat_count + 1): - self._add_trial_to_queue(tunables, config={ - # Add some additional metadata to track for the trial such as the - # optimizer config used. - # Note: these values are unfortunately mutable at the moment. - # Consider them as hints of what the config was the trial *started*. - # It is possible that the experiment configs were changed - # between resuming the experiment (since that is not currently - # prevented). - "optimizer": self.optimizer.name, - "repeat_i": repeat_i, - "is_defaults": tunables.is_defaults, - **{ - f"opt_{key}_{i}": val - for (i, opt_target) in enumerate(self.optimizer.targets.items()) - for (key, val) in zip(["target", "direction"], opt_target) - } - }) - - def _add_trial_to_queue(self, tunables: TunableGroups, - ts_start: Optional[datetime] = None, - config: Optional[Dict[str, Any]] = None) -> None: + self._add_trial_to_queue( + tunables, + config={ + # Add some additional metadata to track for the trial such as the + # optimizer config used. + # Note: these values are unfortunately mutable at the moment. + # Consider them as hints of what the config was the trial *started*. + # It is possible that the experiment configs were changed + # between resuming the experiment (since that is not currently + # prevented). + "optimizer": self.optimizer.name, + "repeat_i": repeat_i, + "is_defaults": tunables.is_defaults, + **{ + f"opt_{key}_{i}": val + for (i, opt_target) in enumerate(self.optimizer.targets.items()) + for (key, val) in zip(["target", "direction"], opt_target) + }, + }, + ) + + def _add_trial_to_queue( + self, + tunables: TunableGroups, + ts_start: Optional[datetime] = None, + config: Optional[Dict[str, Any]] = None, + ) -> None: """ Add a configuration to the queue of trials. + A wrapper for the `Experiment.new_trial` method. """ assert self.experiment is not None @@ -261,7 +267,9 @@ def _add_trial_to_queue(self, tunables: TunableGroups, def _run_schedule(self, running: bool = False) -> None: """ - Scheduler part of the loop. Check for pending trials in the queue and run them. + Scheduler part of the loop. + + Check for pending trials in the queue and run them. """ assert self.experiment is not None for trial in self.experiment.pending_trials(datetime.now(UTC), running=running): @@ -270,6 +278,7 @@ def _run_schedule(self, running: bool = False) -> None: def not_done(self) -> bool: """ Check the stopping conditions. + By default, stop when the optimizer converges or max limit of trials reached. """ return self.optimizer.not_converged() and ( @@ -279,7 +288,9 @@ def not_done(self) -> bool: @abstractmethod def run_trial(self, trial: Storage.Trial) -> None: """ - Set up and run a single trial. Save the results in the storage. + Set up and run a single trial. + + Save the results in the storage. """ assert self.experiment is not None self._trial_count += 1 diff --git a/mlos_bench/mlos_bench/tests/launcher_parse_args_test.py b/mlos_bench/mlos_bench/tests/launcher_parse_args_test.py index d5a92fc30f6..2b9c31c014b 100644 --- a/mlos_bench/mlos_bench/tests/launcher_parse_args_test.py +++ b/mlos_bench/mlos_bench/tests/launcher_parse_args_test.py @@ -14,11 +14,10 @@ import pytest +from mlos_bench.config.schemas import ConfigSchema from mlos_bench.launcher import Launcher -from mlos_bench.optimizers import OneShotOptimizer, MlosCoreOptimizer +from mlos_bench.optimizers import MlosCoreOptimizer, OneShotOptimizer from mlos_bench.os_environ import environ -from mlos_bench.config.schemas import ConfigSchema -from mlos_bench.util import path_join from mlos_bench.schedulers import SyncScheduler from mlos_bench.services.types import ( SupportsAuth, @@ -28,6 +27,7 @@ SupportsRemoteExec, ) from mlos_bench.tests import check_class_name +from mlos_bench.util import path_join if sys.version_info < (3, 10): from importlib_resources import files @@ -48,13 +48,13 @@ def config_paths() -> List[str]: """ return [ path_join(os.getcwd(), abs_path=True), - str(files('mlos_bench.config')), - str(files('mlos_bench.tests.config')), + str(files("mlos_bench.config")), + str(files("mlos_bench.tests.config")), ] # This is part of the minimal required args by the Launcher. -ENV_CONF_PATH = 'environments/mock/mock_env.jsonc' +ENV_CONF_PATH = "environments/mock/mock_env.jsonc" def _get_launcher(desc: str, cli_args: str) -> Launcher: @@ -63,81 +63,90 @@ def _get_launcher(desc: str, cli_args: str) -> Launcher: # variable so we use a separate variable. # See global_test_config.jsonc for more details. environ["CUSTOM_PATH_FROM_ENV"] = os.getcwd() - if sys.platform == 'win32': + if sys.platform == "win32": # Some env tweaks for platform compatibility. - environ['USER'] = environ['USERNAME'] + environ["USER"] = environ["USERNAME"] launcher = Launcher(description=desc, argv=cli_args.split()) # Check the basic parent service assert isinstance(launcher.service, SupportsConfigLoading) # built-in - assert isinstance(launcher.service, SupportsLocalExec) # built-in + assert isinstance(launcher.service, SupportsLocalExec) # built-in return launcher def test_launcher_args_parse_defaults(config_paths: List[str]) -> None: + """Test that we get the defaults we expect when using minimal config arg + examples. """ - Test that we get the defaults we expect when using minimal config arg examples. - """ - cli_args = '--config-paths ' + ' '.join(config_paths) + \ - f' --environment {ENV_CONF_PATH}' + \ - ' --globals globals/global_test_config.jsonc' + cli_args = ( + "--config-paths " + + " ".join(config_paths) + + f" --environment {ENV_CONF_PATH}" + + " --globals globals/global_test_config.jsonc" + ) launcher = _get_launcher(__name__, cli_args) # Check that the first --globals file is loaded and $var expansion is handled. - assert launcher.global_config['experiment_id'] == 'MockExperiment' - assert launcher.global_config['testVmName'] == 'MockExperiment-vm' + assert launcher.global_config["experiment_id"] == "MockExperiment" + assert launcher.global_config["testVmName"] == "MockExperiment-vm" # Check that secondary expansion also works. - assert launcher.global_config['testVnetName'] == 'MockExperiment-vm-vnet' + assert launcher.global_config["testVnetName"] == "MockExperiment-vm-vnet" # Check that we can expand a $var in a config file that references an environment variable. - assert path_join(launcher.global_config["pathVarWithEnvVarRef"], abs_path=True) \ - == path_join(os.getcwd(), "foo", abs_path=True) - assert launcher.global_config["varWithEnvVarRef"] == f'user:{getuser()}' - assert launcher.teardown # defaults + assert path_join(launcher.global_config["pathVarWithEnvVarRef"], abs_path=True) == path_join( + os.getcwd(), "foo", abs_path=True + ) + assert launcher.global_config["varWithEnvVarRef"] == f"user:{getuser()}" + assert launcher.teardown # defaults # Check that the environment that got loaded looks to be of the right type. env_config = launcher.config_loader.load_config(ENV_CONF_PATH, ConfigSchema.ENVIRONMENT) assert env_config["class"] == "mlos_bench.environments.mock_env.MockEnv" - assert check_class_name(launcher.environment, env_config['class']) + assert check_class_name(launcher.environment, env_config["class"]) # Check that the optimizer looks right. assert isinstance(launcher.optimizer, OneShotOptimizer) # Check that the optimizer got initialized with defaults. assert launcher.optimizer.tunable_params.is_defaults() - assert launcher.optimizer.max_iterations == 1 # value for OneShotOptimizer + assert launcher.optimizer.max_iterations == 1 # value for OneShotOptimizer # Check that we pick up the right scheduler config: assert isinstance(launcher.scheduler, SyncScheduler) - assert launcher.scheduler.trial_config_repeat_count == 1 # default - assert launcher.scheduler.max_trials == -1 # default + assert launcher.scheduler.trial_config_repeat_count == 1 # default + assert launcher.scheduler.max_trials == -1 # default def test_launcher_args_parse_1(config_paths: List[str]) -> None: """ - Test that using multiple --globals arguments works and that multiple space - separated options to --config-paths works. + Test that using multiple --globals arguments works and that multiple space separated + options to --config-paths works. + Check $var expansion and Environment loading. """ # Here we have multiple paths following --config-paths and --service. - cli_args = '--config-paths ' + ' '.join(config_paths) + \ - ' --service services/remote/mock/mock_auth_service.jsonc' + \ - ' services/remote/mock/mock_remote_exec_service.jsonc' + \ - ' --scheduler schedulers/sync_scheduler.jsonc' + \ - f' --environment {ENV_CONF_PATH}' + \ - ' --globals globals/global_test_config.jsonc' + \ - ' --globals globals/global_test_extra_config.jsonc' \ - ' --test_global_value_2 from-args' + cli_args = ( + "--config-paths " + + " ".join(config_paths) + + " --service services/remote/mock/mock_auth_service.jsonc" + + " services/remote/mock/mock_remote_exec_service.jsonc" + + " --scheduler schedulers/sync_scheduler.jsonc" + + f" --environment {ENV_CONF_PATH}" + + " --globals globals/global_test_config.jsonc" + + " --globals globals/global_test_extra_config.jsonc" + " --test_global_value_2 from-args" + ) launcher = _get_launcher(__name__, cli_args) # Check some additional features of the the parent service - assert isinstance(launcher.service, SupportsAuth) # from --service - assert isinstance(launcher.service, SupportsRemoteExec) # from --service + assert isinstance(launcher.service, SupportsAuth) # from --service + assert isinstance(launcher.service, SupportsRemoteExec) # from --service # Check that the first --globals file is loaded and $var expansion is handled. - assert launcher.global_config['experiment_id'] == 'MockExperiment' - assert launcher.global_config['testVmName'] == 'MockExperiment-vm' + assert launcher.global_config["experiment_id"] == "MockExperiment" + assert launcher.global_config["testVmName"] == "MockExperiment-vm" # Check that secondary expansion also works. - assert launcher.global_config['testVnetName'] == 'MockExperiment-vm-vnet' + assert launcher.global_config["testVnetName"] == "MockExperiment-vm-vnet" # Check that the second --globals file is loaded. - assert launcher.global_config['test_global_value'] == 'from-file' + assert launcher.global_config["test_global_value"] == "from-file" # Check overriding values in a file from the command line. - assert launcher.global_config['test_global_value_2'] == 'from-args' + assert launcher.global_config["test_global_value_2"] == "from-args" # Check that we can expand a $var in a config file that references an environment variable. - assert path_join(launcher.global_config["pathVarWithEnvVarRef"], abs_path=True) \ - == path_join(os.getcwd(), "foo", abs_path=True) - assert launcher.global_config["varWithEnvVarRef"] == f'user:{getuser()}' + assert path_join(launcher.global_config["pathVarWithEnvVarRef"], abs_path=True) == path_join( + os.getcwd(), "foo", abs_path=True + ) + assert launcher.global_config["varWithEnvVarRef"] == f"user:{getuser()}" assert launcher.teardown # Check that the environment that got loaded looks to be of the right type. env_config = launcher.config_loader.load_config(ENV_CONF_PATH, ConfigSchema.ENVIRONMENT) @@ -146,68 +155,78 @@ def test_launcher_args_parse_1(config_paths: List[str]) -> None: assert isinstance(launcher.optimizer, OneShotOptimizer) # Check that the optimizer got initialized with defaults. assert launcher.optimizer.tunable_params.is_defaults() - assert launcher.optimizer.max_iterations == 1 # value for OneShotOptimizer + assert launcher.optimizer.max_iterations == 1 # value for OneShotOptimizer # Check that we pick up the right scheduler config: assert isinstance(launcher.scheduler, SyncScheduler) - assert launcher.scheduler.trial_config_repeat_count == 3 # from the custom sync_scheduler.jsonc config + assert ( + launcher.scheduler.trial_config_repeat_count == 3 + ) # from the custom sync_scheduler.jsonc config assert launcher.scheduler.max_trials == -1 def test_launcher_args_parse_2(config_paths: List[str]) -> None: - """ - Test multiple --config-path instances, --config file vs --arg, --var=val + """Test multiple --config-path instances, --config file vs --arg, --var=val overrides, $var templates, option args, --random-init, etc. """ - config_file = 'cli/test-cli-config.jsonc' - globals_file = 'globals/global_test_config.jsonc' + config_file = "cli/test-cli-config.jsonc" + globals_file = "globals/global_test_config.jsonc" # Here we have multiple --config-path and --service args, each with their own path. - cli_args = ' '.join([f"--config-path {config_path}" for config_path in config_paths]) + \ - f' --config {config_file}' + \ - ' --service services/remote/mock/mock_auth_service.jsonc' + \ - ' --service services/remote/mock/mock_remote_exec_service.jsonc' + \ - f' --globals {globals_file}' + \ - ' --experiment_id MockeryExperiment' + \ - ' --no-teardown' + \ - ' --random-init' + \ - ' --random-seed 1234' + \ - ' --trial-config-repeat-count 5' + \ - ' --max_trials 200' + cli_args = ( + " ".join([f"--config-path {config_path}" for config_path in config_paths]) + + f" --config {config_file}" + + " --service services/remote/mock/mock_auth_service.jsonc" + + " --service services/remote/mock/mock_remote_exec_service.jsonc" + + f" --globals {globals_file}" + + " --experiment_id MockeryExperiment" + + " --no-teardown" + + " --random-init" + + " --random-seed 1234" + + " --trial-config-repeat-count 5" + + " --max_trials 200" + ) launcher = _get_launcher(__name__, cli_args) # Check some additional features of the the parent service - assert isinstance(launcher.service, SupportsAuth) # from --service - assert isinstance(launcher.service, SupportsFileShareOps) # from --config - assert isinstance(launcher.service, SupportsRemoteExec) # from --service + assert isinstance(launcher.service, SupportsAuth) # from --service + assert isinstance(launcher.service, SupportsFileShareOps) # from --config + assert isinstance(launcher.service, SupportsRemoteExec) # from --service # Check that the --globals file is loaded and $var expansion is handled # using the value provided on the CLI. - assert launcher.global_config['experiment_id'] == 'MockeryExperiment' - assert launcher.global_config['testVmName'] == 'MockeryExperiment-vm' + assert launcher.global_config["experiment_id"] == "MockeryExperiment" + assert launcher.global_config["testVmName"] == "MockeryExperiment-vm" # Check that secondary expansion also works. - assert launcher.global_config['testVnetName'] == 'MockeryExperiment-vm-vnet' + assert launcher.global_config["testVnetName"] == "MockeryExperiment-vm-vnet" # Check that we can expand a $var in a config file that references an environment variable. - assert path_join(launcher.global_config["pathVarWithEnvVarRef"], abs_path=True) \ - == path_join(os.getcwd(), "foo", abs_path=True) - assert launcher.global_config["varWithEnvVarRef"] == f'user:{getuser()}' + assert path_join(launcher.global_config["pathVarWithEnvVarRef"], abs_path=True) == path_join( + os.getcwd(), "foo", abs_path=True + ) + assert launcher.global_config["varWithEnvVarRef"] == f"user:{getuser()}" assert not launcher.teardown config = launcher.config_loader.load_config(config_file, ConfigSchema.CLI) - assert launcher.config_loader.config_paths == [path_join(path, abs_path=True) for path in config_paths + config['config_path']] + assert launcher.config_loader.config_paths == [ + path_join(path, abs_path=True) for path in config_paths + config["config_path"] + ] # Check that the environment that got loaded looks to be of the right type. - env_config_file = config['environment'] + env_config_file = config["environment"] env_config = launcher.config_loader.load_config(env_config_file, ConfigSchema.ENVIRONMENT) - assert check_class_name(launcher.environment, env_config['class']) + assert check_class_name(launcher.environment, env_config["class"]) # Check that the optimizer looks right. assert isinstance(launcher.optimizer, MlosCoreOptimizer) - opt_config_file = config['optimizer'] + opt_config_file = config["optimizer"] opt_config = launcher.config_loader.load_config(opt_config_file, ConfigSchema.OPTIMIZER) globals_file_config = launcher.config_loader.load_config(globals_file, ConfigSchema.GLOBALS) # The actual global_config gets overwritten as a part of processing, so to test # this we read the original value out of the source files. - orig_max_iters = globals_file_config.get('max_suggestions', opt_config.get('config', {}).get('max_suggestions', 100)) - assert launcher.optimizer.max_iterations \ - == orig_max_iters \ - == launcher.global_config['max_suggestions'] + orig_max_iters = globals_file_config.get( + "max_suggestions", opt_config.get("config", {}).get("max_suggestions", 100) + ) + assert ( + launcher.optimizer.max_iterations + == orig_max_iters + == launcher.global_config["max_suggestions"] + ) # Check that the optimizer got initialized with random values instead of the defaults. # Note: the environment doesn't get updated until suggest() is called to @@ -220,11 +239,11 @@ def test_launcher_args_parse_2(config_paths: List[str]) -> None: # Check that CLI parameter overrides JSON config: assert isinstance(launcher.scheduler, SyncScheduler) - assert launcher.scheduler.trial_config_repeat_count == 5 # from cli args + assert launcher.scheduler.trial_config_repeat_count == 5 # from cli args assert launcher.scheduler.max_trials == 200 # Check that the value from the file is overridden by the CLI arg. - assert config['random_seed'] == 42 + assert config["random_seed"] == 42 # TODO: This isn't actually respected yet because the `--random-init` only # applies to a temporary Optimizer used to populate the initial values via # random sampling. @@ -232,16 +251,16 @@ def test_launcher_args_parse_2(config_paths: List[str]) -> None: def test_launcher_args_parse_3(config_paths: List[str]) -> None: - """ - Check that cli file values take precedence over other values. - """ - config_file = 'cli/test-cli-config.jsonc' - globals_file = 'globals/global_test_config.jsonc' + """Check that cli file values take precedence over other values.""" + config_file = "cli/test-cli-config.jsonc" + globals_file = "globals/global_test_config.jsonc" # Here we don't override values in test-cli-config with cli args but ensure that # those take precedence over other config files. - cli_args = ' '.join([f"--config-path {config_path}" for config_path in config_paths]) + \ - f' --config {config_file}' + \ - f' --globals {globals_file}' + cli_args = ( + " ".join([f"--config-path {config_path}" for config_path in config_paths]) + + f" --config {config_file}" + + f" --globals {globals_file}" + ) launcher = _get_launcher(__name__, cli_args) # Check that CLI file parameter overrides JSON config: @@ -250,5 +269,5 @@ def test_launcher_args_parse_3(config_paths: List[str]) -> None: assert launcher.scheduler.trial_config_repeat_count == 2 -if __name__ == '__main__': +if __name__ == "__main__": pytest.main([__file__, "-n0"]) From 2dee79ff396b6feda3fba0b1619f995f97efb1cc Mon Sep 17 00:00:00 2001 From: Brian Kroth Date: Mon, 22 Jul 2024 20:30:05 +0000 Subject: [PATCH 08/13] fixups --- mlos_bench/mlos_bench/launcher.py | 2 +- mlos_bench/mlos_bench/schedulers/base_scheduler.py | 6 +++--- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/mlos_bench/mlos_bench/launcher.py b/mlos_bench/mlos_bench/launcher.py index f9fd1ebf261..05f7d1b5448 100644 --- a/mlos_bench/mlos_bench/launcher.py +++ b/mlos_bench/mlos_bench/launcher.py @@ -98,7 +98,7 @@ def __init__(self, description: str, long_text: str = "", argv: Optional[List[st cli_config_args = { key: val for (key, val) in config.items() - if (key not in args_dict or args_dict[key] is None) and key not in excluded_cli_args + if (args_dict.get(key) is None) and key not in excluded_cli_args } self.global_config = self._load_config( diff --git a/mlos_bench/mlos_bench/schedulers/base_scheduler.py b/mlos_bench/mlos_bench/schedulers/base_scheduler.py index c0119d7d258..dac362101b2 100644 --- a/mlos_bench/mlos_bench/schedulers/base_scheduler.py +++ b/mlos_bench/mlos_bench/schedulers/base_scheduler.py @@ -91,9 +91,9 @@ def __init__( # pylint: disable=too-many-arguments _LOG.debug("Scheduler instantiated: %s :: %s", self, config) def _validate_json_config(self, config: dict) -> None: - """Reconstructs a basic json config that this class might have been - instantiated from in order to validate configs provided outside the - file loading mechanism. + """Reconstructs a basic json config that this class might have been instantiated + from in order to validate configs provided outside the file loading + mechanism. """ json_config: dict = { "class": self.__class__.__module__ + "." + self.__class__.__name__, From 990745547a817a5278ad40db6db0983b1ab219a0 Mon Sep 17 00:00:00 2001 From: Brian Kroth Date: Tue, 23 Jul 2024 17:44:29 +0000 Subject: [PATCH 09/13] apply comments --- mlos_bench/mlos_bench/launcher.py | 48 +++++++++++++++---------------- 1 file changed, 24 insertions(+), 24 deletions(-) diff --git a/mlos_bench/mlos_bench/launcher.py b/mlos_bench/mlos_bench/launcher.py index 05f7d1b5448..ae297355966 100644 --- a/mlos_bench/mlos_bench/launcher.py +++ b/mlos_bench/mlos_bench/launcher.py @@ -44,6 +44,7 @@ class Launcher: def __init__(self, description: str, long_text: str = "", argv: Optional[List[str]] = None): # pylint: disable=too-many-statements + # pylint: disable=too-many-locals _LOG.info("Launch: %s", description) epilog = """ Additional --key=value pairs can be specified to augment or override @@ -185,9 +186,20 @@ def _parse_args( argv: Optional[List[str]], ) -> Tuple[argparse.Namespace, List[str], List[str]]: """Parse the command line arguments.""" - path_args = [] + class PathArgsTracker: + """Simple class to help track which arguments are paths.""" - parser.add_argument( + def __init__(self, parser: argparse.ArgumentParser): + self._parser = parser + self.path_args: List[str] = [] + + def add_argument(self, *args: Any, **kwargs: Any) -> None: + """Add an argument to the parser and track its destination.""" + self.path_args.append(self._parser.add_argument(*args, **kwargs).dest) + + path_args_tracker = PathArgsTracker(parser) + + path_args_tracker.add_argument( "--config", required=False, help=( @@ -198,15 +210,13 @@ def _parse_args( "for additional config examples for this and other arguments." ), ) - path_args.append("config") - parser.add_argument( + path_args_tracker.add_argument( "--log_file", "--log-file", required=False, help="Path to the log file. Use stdout if omitted.", ) - path_args.append("log_file") parser.add_argument( "--log_level", @@ -219,7 +229,7 @@ def _parse_args( ), ) - parser.add_argument( + path_args_tracker.add_argument( "--config_path", "--config-path", "--config-paths", @@ -229,10 +239,8 @@ def _parse_args( required=False, help="One or more locations of JSON config files.", ) - path_args.append("config_path") - path_args.append("config_paths") - parser.add_argument( + path_args_tracker.add_argument( "--service", "--services", nargs="+", @@ -243,17 +251,14 @@ def _parse_args( "of the service(s) for environment(s) to use." ), ) - path_args.append("service") - path_args.append("services") - parser.add_argument( + path_args_tracker.add_argument( "--environment", required=False, help="Path to JSON file with the configuration of the benchmarking environment(s).", ) - path_args.append("environment") - parser.add_argument( + path_args_tracker.add_argument( "--optimizer", required=False, help=( @@ -261,7 +266,6 @@ def _parse_args( "a single trial with default (or specified in --tunable_values)." ), ) - path_args.append("optimizer") parser.add_argument( "--trial_config_repeat_count", @@ -274,7 +278,7 @@ def _parse_args( ), ) - parser.add_argument( + path_args_tracker.add_argument( "--scheduler", required=False, help=( @@ -282,9 +286,8 @@ def _parse_args( "a single worker synchronous scheduler." ), ) - path_args.append("scheduler") - parser.add_argument( + path_args_tracker.add_argument( "--storage", required=False, help=( @@ -292,7 +295,6 @@ def _parse_args( "If omitted, use the ephemeral in-memory SQL storage." ), ) - path_args.append("storage") parser.add_argument( "--random_init", @@ -312,7 +314,7 @@ def _parse_args( help="Seed to use with --random_init", ) - parser.add_argument( + path_args_tracker.add_argument( "--tunable_values", "--tunable-values", nargs="+", @@ -324,9 +326,8 @@ def _parse_args( "is specified) or as default values for the first run in optimization." ), ) - path_args.append("tunable_values") - parser.add_argument( + path_args_tracker.add_argument( "--globals", nargs="+", action="extend", @@ -336,7 +337,6 @@ def _parse_args( "[private] parameters of the benchmarking environment." ), ) - path_args.append("globals") parser.add_argument( "--no_teardown", @@ -371,7 +371,7 @@ def _parse_args( argv = sys.argv[1:].copy() (args, args_rest) = parser.parse_known_args(argv) - return (args, path_args, args_rest) + return (args, path_args_tracker.path_args, args_rest) @staticmethod def _try_parse_extra_args(cmdline: Iterable[str]) -> Dict[str, TunableValue]: From f4e9c3f50147aa9438a3f1412039f6b257e4f6e0 Mon Sep 17 00:00:00 2001 From: Brian Kroth Date: Tue, 23 Jul 2024 17:52:58 +0000 Subject: [PATCH 10/13] Ignore negative config_id from the scheduler schema validation --- mlos_bench/mlos_bench/launcher.py | 1 + mlos_bench/mlos_bench/schedulers/base_scheduler.py | 8 +++++++- 2 files changed, 8 insertions(+), 1 deletion(-) diff --git a/mlos_bench/mlos_bench/launcher.py b/mlos_bench/mlos_bench/launcher.py index ae297355966..1d51a1abf42 100644 --- a/mlos_bench/mlos_bench/launcher.py +++ b/mlos_bench/mlos_bench/launcher.py @@ -524,6 +524,7 @@ def _load_scheduler(self, args_scheduler: Optional[str]) -> Scheduler: # All config values can be overridden from global config config={ "experiment_id": "UNDEFINED - override from global config", + "config_id": -1, "trial_id": 0, "trial_config_repeat_count": 1, "teardown": self.teardown, diff --git a/mlos_bench/mlos_bench/schedulers/base_scheduler.py b/mlos_bench/mlos_bench/schedulers/base_scheduler.py index dac362101b2..30805c189e0 100644 --- a/mlos_bench/mlos_bench/schedulers/base_scheduler.py +++ b/mlos_bench/mlos_bench/schedulers/base_scheduler.py @@ -99,7 +99,13 @@ def _validate_json_config(self, config: dict) -> None: "class": self.__class__.__module__ + "." + self.__class__.__name__, } if config: - json_config["config"] = config + json_config["config"] = config.copy() + # The json schema does not allow for -1 as a valid value for config_id. + # As it is just a default placeholder value, and not required, we can + # remove it from the config copy prior to validation safely. + config_id = json_config["config"].get("config_id") + if config_id is not None and isinstance(config_id, int) and config_id < 0: + json_config["config"].pop("config_id") ConfigSchema.SCHEDULER.validate(json_config) @property From 70647dc885b97ef75100564b9d5d763f02f31fb3 Mon Sep 17 00:00:00 2001 From: Brian Kroth Date: Tue, 23 Jul 2024 19:36:04 +0000 Subject: [PATCH 11/13] whitespace --- mlos_bench/mlos_bench/launcher.py | 1 + 1 file changed, 1 insertion(+) diff --git a/mlos_bench/mlos_bench/launcher.py b/mlos_bench/mlos_bench/launcher.py index 1d51a1abf42..10e1731ba43 100644 --- a/mlos_bench/mlos_bench/launcher.py +++ b/mlos_bench/mlos_bench/launcher.py @@ -186,6 +186,7 @@ def _parse_args( argv: Optional[List[str]], ) -> Tuple[argparse.Namespace, List[str], List[str]]: """Parse the command line arguments.""" + class PathArgsTracker: """Simple class to help track which arguments are paths.""" From 4220aacdc872699ba72f2752fc005f94aa916661 Mon Sep 17 00:00:00 2001 From: Brian Kroth Date: Tue, 23 Jul 2024 19:37:37 +0000 Subject: [PATCH 12/13] revert unnecessary lineswap --- mlos_bench/mlos_bench/launcher.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/mlos_bench/mlos_bench/launcher.py b/mlos_bench/mlos_bench/launcher.py index 10e1731ba43..274b35d91e4 100644 --- a/mlos_bench/mlos_bench/launcher.py +++ b/mlos_bench/mlos_bench/launcher.py @@ -525,8 +525,8 @@ def _load_scheduler(self, args_scheduler: Optional[str]) -> Scheduler: # All config values can be overridden from global config config={ "experiment_id": "UNDEFINED - override from global config", - "config_id": -1, "trial_id": 0, + "config_id": -1, "trial_config_repeat_count": 1, "teardown": self.teardown, }, From 2dcffea1c6d8b84f7674916db64bd9118088d4fc Mon Sep 17 00:00:00 2001 From: Brian Kroth Date: Tue, 23 Jul 2024 19:41:25 +0000 Subject: [PATCH 13/13] remove comment --- mlos_bench/mlos_bench/launcher.py | 1 - 1 file changed, 1 deletion(-) diff --git a/mlos_bench/mlos_bench/launcher.py b/mlos_bench/mlos_bench/launcher.py index 274b35d91e4..9024b15adfd 100644 --- a/mlos_bench/mlos_bench/launcher.py +++ b/mlos_bench/mlos_bench/launcher.py @@ -108,7 +108,6 @@ def __init__(self, description: str, long_text: str = "", argv: Optional[List[st args_rest=args_rest, global_config=cli_config_args, ) - # TODO: Can we generalize these two rules using excluded_cli_args? # experiment_id is generally taken from --globals files, but we also allow # overriding it on the CLI. # It's useful to keep it there explicitly mostly for the --help output.