Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Fixups and testing for cli config file parsing #722

Merged
merged 17 commits into from
Jul 23, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@

"config": {
"trial_config_repeat_count": 3,
"max_trials": -1, // Limited only in hte Optimizer logic/config.
"max_trials": -1, // Limited only in the Optimizer logic/config.
"teardown": false
}
}
114 changes: 79 additions & 35 deletions mlos_bench/mlos_bench/launcher.py
Original file line number Diff line number Diff line change
Expand Up @@ -44,6 +44,7 @@ class Launcher:

def __init__(self, description: str, long_text: str = "", argv: Optional[List[str]] = None):
# pylint: disable=too-many-statements
# pylint: disable=too-many-locals
_LOG.info("Launch: %s", description)
epilog = """
Additional --key=value pairs can be specified to augment or override
Expand All @@ -56,7 +57,7 @@ def __init__(self, description: str, long_text: str = "", argv: Optional[List[st
<https://github.com/microsoft/MLOS/tree/main/mlos_bench/>
"""
parser = argparse.ArgumentParser(description=f"{description} : {long_text}", epilog=epilog)
(args, args_rest) = self._parse_args(parser, argv)
(args, path_args, args_rest) = self._parse_args(parser, argv)

# Bootstrap config loader: command line takes priority.
config_path = args.config_path or []
Expand Down Expand Up @@ -87,11 +88,25 @@ def __init__(self, description: str, long_text: str = "", argv: Optional[List[st

self._parent_service: Service = LocalExecService(parent=self._config_loader)

# Prepare global_config from a combination of global config files, cli
# configs, and cli args.
args_dict = vars(args)
# teardown (bool) conflicts with Environment configs that use it for shell
# commands (list), so we exclude it from copying over
excluded_cli_args = path_args + ["teardown"]
# Include (almost) any item from the cli config file that either isn't in
# the cli args at all or whose cli arg is missing.
cli_config_args = {
key: val
for (key, val) in config.items()
if (args_dict.get(key) is None) and key not in excluded_cli_args
}

self.global_config = self._load_config(
config.get("globals", []) + (args.globals or []),
(args.config_path or []) + config.get("config_path", []),
args_rest,
{key: val for (key, val) in config.items() if key not in vars(args)},
args_globals=config.get("globals", []) + (args.globals or []),
config_path=(args.config_path or []) + config.get("config_path", []),
args_rest=args_rest,
global_config=cli_config_args,
)
# experiment_id is generally taken from --globals files, but we also allow
# overriding it on the CLI.
Expand Down Expand Up @@ -168,19 +183,35 @@ def service(self) -> Service:
def _parse_args(
parser: argparse.ArgumentParser,
argv: Optional[List[str]],
) -> Tuple[argparse.Namespace, List[str]]:
) -> Tuple[argparse.Namespace, List[str], List[str]]:
"""Parse the command line arguments."""
parser.add_argument(

class PathArgsTracker:
"""Simple class to help track which arguments are paths."""

def __init__(self, parser: argparse.ArgumentParser):
self._parser = parser
self.path_args: List[str] = []

def add_argument(self, *args: Any, **kwargs: Any) -> None:
"""Add an argument to the parser and track its destination."""
self.path_args.append(self._parser.add_argument(*args, **kwargs).dest)

path_args_tracker = PathArgsTracker(parser)

path_args_tracker.add_argument(
"--config",
required=False,
help="Main JSON5 configuration file. Its keys are the same as the"
+ " command line options and can be overridden by the latter.\n"
+ "\n"
+ " See the `mlos_bench/config/` tree at https://github.com/microsoft/MLOS/ "
+ " for additional config examples for this and other arguments.",
help=(
"Main JSON5 configuration file. Its keys are the same as the "
"command line options and can be overridden by the latter.\n"
"\n"
"See the `mlos_bench/config/` tree at https://github.com/microsoft/MLOS/ "
"for additional config examples for this and other arguments."
motus marked this conversation as resolved.
Show resolved Hide resolved
),
)

parser.add_argument(
path_args_tracker.add_argument(
"--log_file",
"--log-file",
required=False,
Expand All @@ -192,11 +223,13 @@ def _parse_args(
"--log-level",
required=False,
type=str,
help=f"Logging level. Default is {logging.getLevelName(_LOG_LEVEL)}."
+ " Set to DEBUG for debug, WARNING for warnings only.",
help=(
f"Logging level. Default is {logging.getLevelName(_LOG_LEVEL)}. "
"Set to DEBUG for debug, WARNING for warnings only."
),
)

parser.add_argument(
path_args_tracker.add_argument(
"--config_path",
"--config-path",
"--config-paths",
Expand All @@ -207,7 +240,7 @@ def _parse_args(
help="One or more locations of JSON config files.",
)

parser.add_argument(
path_args_tracker.add_argument(
"--service",
"--services",
nargs="+",
Expand All @@ -219,17 +252,19 @@ def _parse_args(
),
)

parser.add_argument(
path_args_tracker.add_argument(
"--environment",
required=False,
help="Path to JSON file with the configuration of the benchmarking environment(s).",
)

parser.add_argument(
path_args_tracker.add_argument(
"--optimizer",
required=False,
help="Path to the optimizer configuration file. If omitted, run"
+ " a single trial with default (or specified in --tunable_values).",
help=(
"Path to the optimizer configuration file. If omitted, run "
"a single trial with default (or specified in --tunable_values)."
),
)

parser.add_argument(
Expand All @@ -243,18 +278,22 @@ def _parse_args(
),
)

parser.add_argument(
path_args_tracker.add_argument(
"--scheduler",
required=False,
help="Path to the scheduler configuration file. By default, use"
+ " a single worker synchronous scheduler.",
help=(
"Path to the scheduler configuration file. By default, use "
"a single worker synchronous scheduler."
),
)

parser.add_argument(
path_args_tracker.add_argument(
"--storage",
required=False,
help="Path to the storage configuration file."
+ " If omitted, use the ephemeral in-memory SQL storage.",
help=(
"Path to the storage configuration file. "
"If omitted, use the ephemeral in-memory SQL storage."
),
)

parser.add_argument(
Expand All @@ -275,24 +314,28 @@ def _parse_args(
help="Seed to use with --random_init",
)

parser.add_argument(
path_args_tracker.add_argument(
"--tunable_values",
"--tunable-values",
nargs="+",
action="extend",
required=False,
help="Path to one or more JSON files that contain values of the tunable"
+ " parameters. This can be used for a single trial (when no --optimizer"
+ " is specified) or as default values for the first run in optimization.",
help=(
"Path to one or more JSON files that contain values of the tunable "
"parameters. This can be used for a single trial (when no --optimizer "
"is specified) or as default values for the first run in optimization."
),
)

parser.add_argument(
path_args_tracker.add_argument(
"--globals",
nargs="+",
action="extend",
required=False,
help="Path to one or more JSON files that contain additional"
+ " [private] parameters of the benchmarking environment.",
help=(
"Path to one or more JSON files that contain additional "
"[private] parameters of the benchmarking environment."
),
)

parser.add_argument(
Expand Down Expand Up @@ -328,7 +371,7 @@ def _parse_args(
argv = sys.argv[1:].copy()
(args, args_rest) = parser.parse_known_args(argv)

return (args, args_rest)
return (args, path_args_tracker.path_args, args_rest)

@staticmethod
def _try_parse_extra_args(cmdline: Iterable[str]) -> Dict[str, TunableValue]:
Expand Down Expand Up @@ -361,6 +404,7 @@ def _try_parse_extra_args(cmdline: Iterable[str]) -> Dict[str, TunableValue]:

def _load_config(
self,
*,
motus marked this conversation as resolved.
Show resolved Hide resolved
args_globals: Iterable[str],
config_path: Iterable[str],
args_rest: Iterable[str],
Expand Down
11 changes: 7 additions & 4 deletions mlos_bench/mlos_bench/optimizers/base_optimizer.py
Original file line number Diff line number Diff line change
Expand Up @@ -135,20 +135,23 @@ def __exit__(
@property
def current_iteration(self) -> int:
"""
The current number of iterations (trials) registered.
The current number of iterations (suggestions) registered.

Note: this may or may not be the same as the number of configurations.
See Also: Launcher.trial_config_repeat_count.
See Also: Scheduler.trial_config_repeat_count and Scheduler.max_trials.
motus marked this conversation as resolved.
Show resolved Hide resolved
"""
return self._iter

# TODO: finish renaming iterations to suggestions.
# See Also: https://github.com/microsoft/MLOS/pull/713

@property
def max_iterations(self) -> int:
"""
The maximum number of iterations (trials) to run.
The maximum number of iterations (suggestions) to run.

Note: this may or may not be the same as the number of configurations.
See Also: Launcher.trial_config_repeat_count.
See Also: Scheduler.trial_config_repeat_count and Scheduler.max_trials.
"""
return self._max_iter

Expand Down
32 changes: 32 additions & 0 deletions mlos_bench/mlos_bench/schedulers/base_scheduler.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@
from pytz import UTC
from typing_extensions import Literal

from mlos_bench.config.schemas import ConfigSchema
from mlos_bench.environments.base_environment import Environment
from mlos_bench.optimizers.base_optimizer import Optimizer
from mlos_bench.storage.base_storage import Storage
Expand Down Expand Up @@ -64,6 +65,7 @@ def __init__( # pylint: disable=too-many-arguments
source=global_config,
required_keys=["experiment_id", "trial_id"],
)
self._validate_json_config(config)
motus marked this conversation as resolved.
Show resolved Hide resolved

self._experiment_id = config["experiment_id"].strip()
self._trial_id = int(config["trial_id"])
Expand All @@ -88,6 +90,36 @@ def __init__( # pylint: disable=too-many-arguments

_LOG.debug("Scheduler instantiated: %s :: %s", self, config)

def _validate_json_config(self, config: dict) -> None:
"""Reconstructs a basic json config that this class might have been instantiated
from in order to validate configs provided outside the file loading
mechanism.
"""
json_config: dict = {
"class": self.__class__.__module__ + "." + self.__class__.__name__,
}
if config:
json_config["config"] = config.copy()
# The json schema does not allow for -1 as a valid value for config_id.
# As it is just a default placeholder value, and not required, we can
# remove it from the config copy prior to validation safely.
config_id = json_config["config"].get("config_id")
if config_id is not None and isinstance(config_id, int) and config_id < 0:
json_config["config"].pop("config_id")
ConfigSchema.SCHEDULER.validate(json_config)

@property
def trial_config_repeat_count(self) -> int:
"""Gets the number of trials to run for a given config."""
return self._trial_config_repeat_count

@property
def max_trials(self) -> int:
"""Gets the maximum number of trials to run for a given experiment, or -1 for no
limit.
"""
return self._max_trials
motus marked this conversation as resolved.
Show resolved Hide resolved

def __repr__(self) -> str:
"""
Produce a human-readable version of the Scheduler (mostly for logging).
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@
"services/remote/mock/mock_fileshare_service.jsonc"
],

"trial_config_repeat_count": 1,
"trial_config_repeat_count": 2,

"random_seed": 42,
"random_init": true
Expand Down
Loading
Loading