diff --git a/pyproject.toml b/pyproject.toml index bda99459d..5df64aa97 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -151,7 +151,17 @@ module = [ "smartsim._core.control.controller", "smartsim._core.control.manifest", "smartsim._core.entrypoints.dragon_client", - "smartsim._core.launcher.*", + "smartsim._core.launcher.colocated", + "smartsim._core.launcher.launcher", + "smartsim._core.launcher.local.*", + "smartsim._core.launcher.lsf.*", + "smartsim._core.launcher.pbs.*", + "smartsim._core.launcher.sge.*", + "smartsim._core.launcher.slurm.*", + "smartsim._core.launcher.step.*", + "smartsim._core.launcher.stepInfo", + "smartsim._core.launcher.stepMapping", + "smartsim._core.launcher.taskManager", "smartsim._core.utils.serialize", "smartsim._core.utils.telemetry.*", "smartsim.database.*", diff --git a/setup.py b/setup.py index 5a382a5ea..4c2479f71 100644 --- a/setup.py +++ b/setup.py @@ -182,7 +182,6 @@ def has_ext_modules(_placeholder): "types-tqdm", "types-tensorflow==2.12.0.9", "types-setuptools", - "typing_extensions>=4.1.0,<4.6", ], "docs": [ "Sphinx==6.2.1", @@ -226,6 +225,7 @@ def has_ext_modules(_placeholder): "pygithub>=2.3.0", "numpy<2", "smartredis>=0.5,<0.6", + "typing_extensions>=4.1.0,<4.6", ], cmdclass={ "build_py": SmartSimBuild, diff --git a/smartsim/_core/config/config.py b/smartsim/_core/config/config.py index 374457f3a..1012129e9 100644 --- a/smartsim/_core/config/config.py +++ b/smartsim/_core/config/config.py @@ -161,10 +161,7 @@ def dragon_dotenv(self) -> Path: @property def dragon_server_path(self) -> t.Optional[str]: - return os.getenv( - "SMARTSIM_DRAGON_SERVER_PATH", - os.getenv("SMARTSIM_DRAGON_SERVER_PATH_EXP", None), - ) + return os.getenv("SMARTSIM_DRAGON_SERVER_PATH", None) @property def dragon_server_timeout(self) -> int: diff --git a/smartsim/_core/launcher/dragon/dragonConnector.py b/smartsim/_core/launcher/dragon/dragonConnector.py index 0cd68c24e..60fbf3ce7 100644 --- a/smartsim/_core/launcher/dragon/dragonConnector.py +++ b/smartsim/_core/launcher/dragon/dragonConnector.py @@ -57,6 +57,11 @@ ) from ...utils.network import find_free_port, get_best_interface_and_address +if t.TYPE_CHECKING: + from typing_extensions import Self + + from smartsim.experiment import Experiment + logger = get_logger(__name__) _SchemaT = t.TypeVar("_SchemaT", bound=t.Union[DragonRequest, DragonResponse]) @@ -69,29 +74,27 @@ class DragonConnector: to start a Dragon server and communicate with it. """ - def __init__(self) -> None: + def __init__(self, path: str | os.PathLike[str]) -> None: self._context: zmq.Context[t.Any] = zmq.Context.instance() self._context.setsockopt(zmq.REQ_CORRELATE, 1) self._context.setsockopt(zmq.REQ_RELAXED, 1) self._authenticator: t.Optional[zmq.auth.thread.ThreadAuthenticator] = None config = get_config() self._reset_timeout(config.dragon_server_timeout) + + # TODO: We should be able to make these "non-optional" + # by simply moving the impl of + # `DragonConnectior.connect_to_dragon` to this method. This is + # fine as we expect the that method should only be called once + # without hitting a guard clause. self._dragon_head_socket: t.Optional[zmq.Socket[t.Any]] = None self._dragon_head_process: t.Optional[subprocess.Popen[bytes]] = None # Returned by dragon head, useful if shutdown is to be requested # but process was started by another connector self._dragon_head_pid: t.Optional[int] = None - self._dragon_server_path = config.dragon_server_path + self._dragon_server_path = _resolve_dragon_path(path) logger.debug(f"Dragon Server path was set to {self._dragon_server_path}") self._env_vars: t.Dict[str, str] = {} - if self._dragon_server_path is None: - raise SmartSimError( - "DragonConnector could not find the dragon server path. " - "This should not happen if the Connector was started by an " - "experiment.\nIf the DragonConnector was started manually, " - "then the environment variable SMARTSIM_DRAGON_SERVER_PATH " - "should be set to an existing directory." - ) @property def is_connected(self) -> bool: @@ -293,8 +296,7 @@ def connect_to_dragon(self) -> None: "Establishing connection with Dragon server or starting a new one..." ) - path = _resolve_dragon_path(self._dragon_server_path) - + path = self._dragon_server_path self._connect_to_existing_server(path) if self.is_connected: return @@ -520,8 +522,25 @@ def _dragon_cleanup( def _resolve_dragon_path(fallback: t.Union[str, "os.PathLike[str]"]) -> Path: - dragon_server_path = get_config().dragon_server_path or os.path.join( - fallback, ".smartsim", "dragon" + """Return the path at which a user should set up a dragon server. + + The order of path resolution is: + 1) If the the user has set a global dragon path via + `Config.dragon_server_path` use that without alteration. + 2) Use the `fallback` path which should be the path to an existing + directory. Append the default dragon server subdirectory defined by + `Config.dragon_default_subdir` + + Currently this function will raise if a user attempts to specify multiple + dragon server paths via `:` seperation. + + :param fallback: The path to an existing directory on the file system to + use if the global dragon directory is not set. + :returns: The path to directory in which the dragon server should run. + """ + config = get_config() + dragon_server_path = config.dragon_server_path or os.path.join( + fallback, config.dragon_default_subdir ) dragon_server_paths = dragon_server_path.split(":") if len(dragon_server_paths) > 1: diff --git a/smartsim/_core/launcher/dragon/dragonLauncher.py b/smartsim/_core/launcher/dragon/dragonLauncher.py index 9078fed54..40d8c0f04 100644 --- a/smartsim/_core/launcher/dragon/dragonLauncher.py +++ b/smartsim/_core/launcher/dragon/dragonLauncher.py @@ -30,6 +30,7 @@ import typing as t from smartsim._core.schemas.dragonRequests import DragonRunPolicy +from smartsim.types import LaunchedJobID from ...._core.launcher.stepMapping import StepMap from ....error import LauncherError, SmartSimError @@ -44,6 +45,7 @@ from ....status import SmartSimStatus from ...schemas import ( DragonRunRequest, + DragonRunRequestView, DragonRunResponse, DragonStopRequest, DragonStopResponse, @@ -57,6 +59,11 @@ from ..stepInfo import StepInfo from .dragonConnector import DragonConnector, _SchemaT +if t.TYPE_CHECKING: + from typing_extensions import Self + + from smartsim.experiment import Experiment + logger = get_logger(__name__) @@ -74,9 +81,9 @@ class DragonLauncher(WLMLauncher): the Job Manager to interact with it. """ - def __init__(self) -> None: + def __init__(self, server_path: str | os.PathLike[str]) -> None: super().__init__() - self._connector = DragonConnector() + self._connector = DragonConnector(server_path) """Connector used to start and interact with the Dragon server""" self._slurm_launcher = SlurmLauncher() """Slurm sub-launcher, used only for batch jobs""" @@ -121,6 +128,22 @@ def add_step_to_mapping_table(self, name: str, step_map: StepMap) -> None: ) sublauncher.add_step_to_mapping_table(name, sublauncher_step_map) + @classmethod + def create(cls, exp: Experiment) -> Self: + self = cls(exp.exp_path) + self._connector.connect_to_dragon() # pylint: disable=protected-access + return self + + def start( + self, args_and_policy: tuple[DragonRunRequestView, DragonRunPolicy] + ) -> LaunchedJobID: + req_args, policy = args_and_policy + self._connector.load_persisted_env() + merged_env = self._connector.merge_persisted_env(os.environ.copy()) + req = DragonRunRequest(**dict(req_args), current_env=merged_env, policy=policy) + res = _assert_schema_type(self._connector.send_request(req), DragonRunResponse) + return LaunchedJobID(res.step_id) + def run(self, step: Step) -> t.Optional[str]: """Run a job step through Slurm @@ -167,15 +190,12 @@ def run(self, step: Step) -> t.Optional[str]: run_args = step.run_settings.run_args req_env = step.run_settings.env_vars self._connector.load_persisted_env() - merged_env = self._connector.merge_persisted_env(os.environ.copy()) nodes = int(run_args.get("nodes", None) or 1) tasks_per_node = int(run_args.get("tasks-per-node", None) or 1) - policy = DragonRunPolicy.from_run_args(run_args) - - response = _assert_schema_type( - self._connector.send_request( - DragonRunRequest( + step_id = self.start( + ( + DragonRunRequestView( exe=cmd[0], exe_args=cmd[1:], path=step.cwd, @@ -183,15 +203,12 @@ def run(self, step: Step) -> t.Optional[str]: nodes=nodes, tasks_per_node=tasks_per_node, env=req_env, - current_env=merged_env, output_file=out, error_file=err, - policy=policy, - ) - ), - DragonRunResponse, + ), + policy, + ) ) - step_id = str(response.step_id) else: # pylint: disable-next=consider-using-with out_strm = open(out, "w+", encoding="utf-8") @@ -325,3 +342,53 @@ def _assert_schema_type(obj: object, typ: t.Type[_SchemaT], /) -> _SchemaT: if not isinstance(obj, typ): raise TypeError(f"Expected schema of type `{typ}`, but got {type(obj)}") return obj + + +# >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>> +# TODO: Remove this registry and move back to builder file after fixing +# circular import caused by `DragonLauncher.supported_rs` +# ----------------------------------------------------------------------------- +from smartsim.settings.arguments.launch.dragon import DragonLaunchArguments +from smartsim.settings.dispatch import ExecutableProtocol, dispatch + + +def _as_run_request_args_and_policy( + run_req_args: DragonLaunchArguments, + exe: ExecutableProtocol, + env: t.Mapping[str, str | None], +) -> tuple[DragonRunRequestView, DragonRunPolicy]: + # ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + # FIXME: This type is 100% unacceptable, but I don't want to spend too much + # time on fixing the dragon launcher API. Something that we need to + # revisit in the future though. + exe_, *args = exe.as_program_arguments() + run_args = dict[str, "int | str | float | None"](run_req_args._launch_args) + policy = DragonRunPolicy.from_run_args(run_args) + return ( + DragonRunRequestView( + exe=exe_, + exe_args=args, + # FIXME: Currently this is hard coded because the schema requires + # it, but in future, it is almost certainly necessary that + # this will need to be injected by the user or by us to have + # the command execute next to any generated files. A similar + # problem exists for the other settings. + # TODO: Find a way to inject this path + path=os.getcwd(), + env=env, + # TODO: Not sure how this info is injected + name=None, + output_file=None, + error_file=None, + **run_args, + ), + policy, + ) + + +dispatch( + DragonLaunchArguments, + with_format=_as_run_request_args_and_policy, + to_launcher=DragonLauncher, +) +# <<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<< diff --git a/smartsim/_core/utils/helpers.py b/smartsim/_core/utils/helpers.py index a1c0d7aa2..d193b6604 100644 --- a/smartsim/_core/utils/helpers.py +++ b/smartsim/_core/utils/helpers.py @@ -27,6 +27,8 @@ """ A file of helper functions for SmartSim """ +from __future__ import annotations + import base64 import collections.abc import os @@ -45,6 +47,7 @@ from types import FrameType +_T = t.TypeVar("_T") _TSignalHandlerFn = t.Callable[[int, t.Optional["FrameType"]], object] @@ -411,6 +414,28 @@ def is_crayex_platform() -> bool: return result.is_cray +def first(predicate: t.Callable[[_T], bool], iterable: t.Iterable[_T]) -> _T | None: + """Return the first instance of an iterable that meets some precondition. + Any elements of the iterable that do not meet the precondition will be + forgotten. If no item in the iterable is found that meets the predicate, + `None` is returned. This is roughly equivalent to + + .. highlight:: python + .. code-block:: python + + next(filter(predicate, iterable), None) + + but does not require the predicate to be a type guard to type check. + + :param predicate: A function that returns `True` or `False` given a element + of the iterable + :param iterable: An iterable that yields elements to evealuate + :returns: The first element of the iterable to make the the `predicate` + return `True` + """ + return next((item for item in iterable if predicate(item)), None) + + @t.final class SignalInterceptionStack(collections.abc.Collection[_TSignalHandlerFn]): """Registers a stack of callables to be called when a signal is diff --git a/smartsim/error/errors.py b/smartsim/error/errors.py index 8500e4947..3f32bd3f0 100644 --- a/smartsim/error/errors.py +++ b/smartsim/error/errors.py @@ -112,6 +112,10 @@ class LauncherUnsupportedFeature(LauncherError): """Raised when the launcher does not support a given method""" +class LauncherNotFoundError(LauncherError): + """A requested launcher could not be found""" + + class AllocationError(LauncherError): """Raised when there is a problem with the user WLM allocation""" diff --git a/smartsim/experiment.py b/smartsim/experiment.py index fa80be037..35d1a5eb1 100644 --- a/smartsim/experiment.py +++ b/smartsim/experiment.py @@ -26,15 +26,19 @@ # pylint: disable=too-many-lines +from __future__ import annotations + import os import os.path as osp +import textwrap import typing as t from os import environ, getcwd from tabulate import tabulate from smartsim._core.config import CONFIG -from smartsim.error.errors import SSUnsupportedError +from smartsim.error import errors +from smartsim.settings import dispatch from smartsim.status import SmartSimStatus from ._core import Controller, Generator, Manifest, previewrenderer @@ -48,8 +52,11 @@ ) from .error import SmartSimError from .log import ctx_exp_path, get_logger, method_contextualizer -from .settings import BatchSettings, Container, RunSettings -from .wlm import detect_launcher + +if t.TYPE_CHECKING: + from smartsim.launchable.job import Job + from smartsim.settings.dispatch import ExecutableProtocol, LauncherProtocol + from smartsim.types import LaunchedJobID logger = get_logger(__name__) @@ -98,19 +105,14 @@ class Experiment: and utilized throughout runtime. """ - def __init__( - self, - name: str, - exp_path: t.Optional[str] = None, - launcher: str = "local", - ): + def __init__(self, name: str, exp_path: str | None = None): """Initialize an Experiment instance. With the default settings, the Experiment will use the local launcher, which will start all Experiment created instances on the localhost. - Example of initializing an Experiment with the local launcher + Example of initializing an Experiment .. highlight:: python .. code-block:: python @@ -143,10 +145,6 @@ def __init__( :param name: name for the ``Experiment`` :param exp_path: path to location of ``Experiment`` directory - :param launcher: type of launcher being used, options are "slurm", "pbs", - "lsf", "sge", or "local". If set to "auto", - an attempt will be made to find an available launcher - on the system. """ self.name = name if exp_path: @@ -159,135 +157,75 @@ def __init__( exp_path = osp.join(getcwd(), name) self.exp_path = exp_path + """The path under which the experiment operate""" - self._launcher = launcher.lower() - - if self._launcher == "auto": - self._launcher = detect_launcher() - if self._launcher == "cobalt": - raise SSUnsupportedError("Cobalt launcher is no longer supported.") + self._active_launchers: set[LauncherProtocol[t.Any]] = set() + """The active launchers created, used, and reused by the experiment""" - if launcher == "dragon": - self._set_dragon_server_path() - - self._control = Controller(launcher=self._launcher) - - self.fs_identifiers: t.Set[str] = set() + self._fs_identifiers: t.Set[str] = set() + """Set of feature store identifiers currently in use by this + experiment + """ self._telemetry_cfg = ExperimentTelemetryConfiguration() - - def _set_dragon_server_path(self) -> None: - """Set path for dragon server through environment varialbes""" - if not "SMARTSIM_DRAGON_SERVER_PATH" in environ: - environ["SMARTSIM_DRAGON_SERVER_PATH_EXP"] = osp.join( - self.exp_path, CONFIG.dragon_default_subdir - ) - - @_contextualize - def start( - self, - *args: t.Union[SmartSimEntity, EntitySequence[SmartSimEntity]], - block: bool = True, - summary: bool = False, - kill_on_interrupt: bool = True, - ) -> None: - """Start passed instances using Experiment launcher - - Any instance ``Application``, ``Ensemble`` or ``FeatureStore`` - instance created by the Experiment can be passed as - an argument to the start method. - - .. highlight:: python - .. code-block:: python - - exp = Experiment(name="my_exp", launcher="slurm") - settings = exp.create_run_settings(exe="./path/to/binary") - application = exp.create_application("my_application", settings) - exp.start(application) - - Multiple entity instances can also be passed to the start method - at once no matter which type of instance they are. These will - all be launched together. - - .. highlight:: python - .. code-block:: python - - exp.start(application_1, application_2, fs, ensemble, block=True) - # alternatively - stage_1 = [application_1, application_2, fs, ensemble] - exp.start(*stage_1, block=True) - - - If `block==True` the Experiment will poll the launched instances - at runtime until all non-feature store jobs have completed. Feature store - jobs *must* be killed by the user by passing them to - ``Experiment.stop``. This allows for multiple stages of a workflow - to produce to and consume from the same FeatureStore feature store. - - If `kill_on_interrupt=True`, then all jobs launched by this - experiment are guaranteed to be killed when ^C (SIGINT) signal is - received. If `kill_on_interrupt=False`, then it is not guaranteed - that all jobs launched by this experiment will be killed, and the - zombie processes will need to be manually killed. - - :param block: block execution until all non-feature store - jobs are finished - :param summary: print a launch summary prior to launch - :param kill_on_interrupt: flag for killing jobs when ^C (SIGINT) - signal is received. + """Switch to specify if telemetry data should be produced for this + experiment """ - start_manifest = Manifest(*args) - self._create_entity_dir(start_manifest) - try: - if summary: - self._launch_summary(start_manifest) - self._control.start( - exp_name=self.name, - exp_path=self.exp_path, - manifest=start_manifest, - block=block, - kill_on_interrupt=kill_on_interrupt, - ) - except SmartSimError as e: - logger.error(e) - raise - - @_contextualize - def stop( - self, *args: t.Union[SmartSimEntity, EntitySequence[SmartSimEntity]] - ) -> None: - """Stop specific instances launched by this ``Experiment`` - Instances of ``Application``, ``Ensemble`` and ``FeatureStore`` - can all be passed as arguments to the stop method. + def start(self, *jobs: Job) -> tuple[LaunchedJobID, ...]: + """Execute a collection of `Job` instances. - Whichever launcher was specified at Experiment initialization - will be used to stop the instance. For example, which using - the slurm launcher, this equates to running `scancel` on the - instance. - - Example - - .. highlight:: python - .. code-block:: python + :param jobs: A collection of other job instances to start + :returns: A sequence of ids with order corresponding to the sequence of + jobs that can be used to query or alter the status of that + particular execution of the job. + """ + return self._dispatch(dispatch.DEFAULT_DISPATCHER, *jobs) + + def _dispatch( + self, dispatcher: dispatch.Dispatcher, job: Job, *jobs: Job + ) -> tuple[LaunchedJobID, ...]: + """Dispatch a series of jobs with a particular dispatcher + + :param dispatcher: The dispatcher that should be used to determine how + to start a job based on its launch settings. + :param job: The first job instance to dispatch + :param jobs: A collection of other job instances to dispatch + :returns: A sequence of ids with order corresponding to the sequence of + jobs that can be used to query or alter the status of that + particular dispatch of the job. + """ - exp.stop(application) - # multiple - exp.stop(application_1, application_2, fs, ensemble) + def execute_dispatch(job: Job) -> LaunchedJobID: + args = job.launch_settings.launch_args + env = job.launch_settings.env_vars + # >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>> + # FIXME: Remove this cast after `SmartSimEntity` conforms to + # protocol. For now, live with the "dangerous" type cast + # --------------------------------------------------------------------- + exe = t.cast("ExecutableProtocol", job.entity) + # <<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<< + dispatch = dispatcher.get_dispatch(args) + try: + # Check to see if one of the existing launchers can be + # configured to handle the launch arguments ... + launch_config = dispatch.configure_first_compatible_launcher( + from_available_launchers=self._active_launchers, + with_arguments=args, + ) + except errors.LauncherNotFoundError: + # ... otherwise create a new launcher that _can_ handle the + # launch arguments and configure _that_ one + launch_config = dispatch.create_new_launcher_configuration( + for_experiment=self, with_arguments=args + ) + # Save the underlying launcher instance. That way we do not need to + # spin up a launcher instance for each individual job, and it makes + # it easier to monitor job statuses + # pylint: disable-next=protected-access + self._active_launchers.add(launch_config._adapted_launcher) + return launch_config.start(exe, env) - :param args: One or more SmartSimEntity or EntitySequence objects. - :raises TypeError: if wrong type - :raises SmartSimError: if stop request fails - """ - stop_manifest = Manifest(*args) - try: - for entity in stop_manifest.applications: - self._control.stop_entity(entity) - fss = stop_manifest.fss - for fs in fss: - self._control.stop_fs(fs) - except SmartSimError as e: - logger.error(e) - raise + return execute_dispatch(job), *map(execute_dispatch, jobs) @_contextualize def generate( @@ -323,128 +261,6 @@ def generate( logger.error(e) raise - @_contextualize - def poll( - self, interval: int = 10, verbose: bool = True, kill_on_interrupt: bool = True - ) -> None: - """Monitor jobs through logging to stdout. - - This method should only be used if jobs were launched - with ``Experiment.start(block=False)`` - - The internal specified will control how often the - logging is performed, not how often the polling occurs. - By default, internal polling is set to every second for - local launcher jobs and every 10 seconds for all other - launchers. - - If internal polling needs to be slower or faster based on - system or site standards, set the ``SMARTSIM_JM_INTERNAL`` - environment variable to control the internal polling interval - for SmartSim. - - For more verbose logging output, the ``SMARTSIM_LOG_LEVEL`` - environment variable can be set to `debug` - - If `kill_on_interrupt=True`, then all jobs launched by this - experiment are guaranteed to be killed when ^C (SIGINT) signal is - received. If `kill_on_interrupt=False`, then it is not guaranteed - that all jobs launched by this experiment will be killed, and the - zombie processes will need to be manually killed. - - :param interval: frequency (in seconds) of logging to stdout - :param verbose: set verbosity - :param kill_on_interrupt: flag for killing jobs when SIGINT is received - :raises SmartSimError: if poll request fails - """ - try: - self._control.poll(interval, verbose, kill_on_interrupt=kill_on_interrupt) - except SmartSimError as e: - logger.error(e) - raise - - @_contextualize - def finished(self, entity: SmartSimEntity) -> bool: - """Query if a job has completed. - - An instance of ``application`` or ``Ensemble`` can be passed - as an argument. - - Passing ``FeatureStore`` will return an error as a - feature store deployment is never finished until stopped - by the user. - - :param entity: object launched by this ``Experiment`` - :returns: True if the job has finished, False otherwise - :raises SmartSimError: if entity has not been launched - by this ``Experiment`` - """ - try: - return self._control.finished(entity) - except SmartSimError as e: - logger.error(e) - raise - - @_contextualize - def get_status( - self, *args: t.Union[SmartSimEntity, EntitySequence[SmartSimEntity]] - ) -> t.List[SmartSimStatus]: - """Query the status of launched entity instances - - Return a smartsim.status string representing - the status of the launched instance. - - .. highlight:: python - .. code-block:: python - - exp.get_status(application) - - As with an Experiment method, multiple instance of - varying types can be passed to and all statuses will - be returned at once. - - .. highlight:: python - .. code-block:: python - - statuses = exp.get_status(application, ensemble, featurestore) - complete = [s == smartsim.status.STATUS_COMPLETED for s in statuses] - assert all(complete) - - :returns: status of the instances passed as arguments - :raises SmartSimError: if status retrieval fails - """ - try: - manifest = Manifest(*args) - statuses: t.List[SmartSimStatus] = [] - for entity in manifest.applications: - statuses.append(self._control.get_entity_status(entity)) - for entity_list in manifest.all_entity_lists: - statuses.extend(self._control.get_entity_list_status(entity_list)) - return statuses - except SmartSimError as e: - logger.error(e) - raise - - @_contextualize - def reconnect_feature_store(self, checkpoint: str) -> FeatureStore: - """Reconnect to a running ``FeatureStore`` - - This method can be used to connect to a ``FeatureStore`` deployment - that was launched by a previous ``Experiment``. This can be - helpful in the case where separate runs of an ``Experiment`` - wish to use the same ``FeatureStore`` instance currently - running on a system. - - :param checkpoint: the `smartsim_db.dat` file created - when an ``FeatureStore`` is launched - """ - try: - feature_store = self._control.reload_saved_fs(checkpoint) - return feature_store - except SmartSimError as e: - logger.error(e) - raise - def preview( self, *args: t.Any, @@ -474,9 +290,6 @@ def preview( output to stdout. Defaults to None. """ - # Retrieve any active feature store jobs - active_fsjobs = self._control.active_feature_store_jobs - preview_manifest = Manifest(*args) previewrenderer.render( @@ -485,13 +298,8 @@ def preview( verbosity_level, output_format, output_filename, - active_fsjobs, ) - @property - def launcher(self) -> str: - return self._launcher - @_contextualize def summary(self, style: str = "github") -> str: """Return a summary of the ``Experiment`` @@ -504,7 +312,6 @@ def summary(self, style: str = "github") -> str: https://github.com/astanin/python-tabulate :return: tabulate string of ``Experiment`` history """ - values = [] headers = [ "Name", "Entity-Type", @@ -514,21 +321,8 @@ def summary(self, style: str = "github") -> str: "Status", "Returncode", ] - for job in self._control.get_jobs().values(): - for run in range(job.history.runs + 1): - values.append( - [ - job.entity.name, - job.entity.type, - job.history.jids[run], - run, - f"{job.history.job_times[run]:.4f}", - job.history.statuses[run], - job.history.returns[run], - ] - ) return tabulate( - values, + [], headers, showindex=True, tablefmt=style, @@ -544,35 +338,11 @@ def telemetry(self) -> TelemetryConfiguration: """ return self._telemetry_cfg - def _launch_summary(self, manifest: Manifest) -> None: - """Experiment pre-launch summary of entities that will be launched - - :param manifest: Manifest of deployables. - """ - - summary = "\n\n=== Launch Summary ===\n" - summary += f"Experiment: {self.name}\n" - summary += f"Experiment Path: {self.exp_path}\n" - summary += f"Launcher: {self._launcher}\n" - if manifest.applications: - summary += f"Applications: {len(manifest.applications)}\n" - - if self._control.feature_store_active: - summary += "Feature Store Status: active\n" - elif manifest.fss: - summary += "Feature Store Status: launching\n" - else: - summary += "Feature Store Status: inactive\n" - - summary += f"\n{str(manifest)}" - - logger.info(summary) - def _create_entity_dir(self, start_manifest: Manifest) -> None: def create_entity_dir( entity: t.Union[FeatureStore, Application, Ensemble] ) -> None: - if not os.path.isdir(entity.path): + if not osp.isdir(entity.path): os.makedirs(entity.path) for application in start_manifest.applications: @@ -589,11 +359,11 @@ def __str__(self) -> str: def _append_to_fs_identifier_list(self, fs_identifier: str) -> None: """Check if fs_identifier already exists when calling create_feature_store""" - if fs_identifier in self.fs_identifiers: + if fs_identifier in self._fs_identifiers: logger.warning( f"A feature store with the identifier {fs_identifier} has already been made " "An error will be raised if multiple Feature Stores are started " "with the same identifier" ) # Otherwise, add - self.fs_identifiers.add(fs_identifier) + self._fs_identifiers.add(fs_identifier) diff --git a/smartsim/settings/builders/__init__.py b/smartsim/settings/arguments/__init__.py similarity index 90% rename from smartsim/settings/builders/__init__.py rename to smartsim/settings/arguments/__init__.py index 9cfdd5f9c..cd216526c 100644 --- a/smartsim/settings/builders/__init__.py +++ b/smartsim/settings/arguments/__init__.py @@ -24,7 +24,7 @@ # OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -from .batchArgBuilder import BatchArgBuilder -from .launchArgBuilder import LaunchArgBuilder +from .batchArguments import BatchArguments +from .launchArguments import LaunchArguments -__all__ = ["LaunchArgBuilder", "BatchArgBuilder"] +__all__ = ["LaunchArguments", "BatchArguments"] diff --git a/smartsim/settings/builders/batch/__init__.py b/smartsim/settings/arguments/batch/__init__.py similarity index 87% rename from smartsim/settings/builders/batch/__init__.py rename to smartsim/settings/arguments/batch/__init__.py index 41dcbbfc2..e6dc055ea 100644 --- a/smartsim/settings/builders/batch/__init__.py +++ b/smartsim/settings/arguments/batch/__init__.py @@ -24,12 +24,12 @@ # OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -from .lsf import BsubBatchArgBuilder -from .pbs import QsubBatchArgBuilder -from .slurm import SlurmBatchArgBuilder +from .lsf import BsubBatchArguments +from .pbs import QsubBatchArguments +from .slurm import SlurmBatchArguments __all__ = [ - "BsubBatchArgBuilder", - "QsubBatchArgBuilder", - "SlurmBatchArgBuilder", + "BsubBatchArguments", + "QsubBatchArguments", + "SlurmBatchArguments", ] diff --git a/smartsim/settings/builders/batch/lsf.py b/smartsim/settings/arguments/batch/lsf.py similarity index 90% rename from smartsim/settings/builders/batch/lsf.py rename to smartsim/settings/arguments/batch/lsf.py index 4bb7bbd27..10dc85763 100644 --- a/smartsim/settings/builders/batch/lsf.py +++ b/smartsim/settings/arguments/batch/lsf.py @@ -32,14 +32,17 @@ from ...batchCommand import SchedulerType from ...common import StringArgument -from ..batchArgBuilder import BatchArgBuilder +from ..batchArguments import BatchArguments logger = get_logger(__name__) -class BsubBatchArgBuilder(BatchArgBuilder): +class BsubBatchArguments(BatchArguments): def scheduler_str(self) -> str: - """Get the string representation of the scheduler""" + """Get the string representation of the scheduler + + :returns: The string representation of the scheduler + """ return SchedulerType.Lsf.value def set_walltime(self, walltime: str) -> None: @@ -130,7 +133,7 @@ def set_queue(self, queue: str) -> None: def format_batch_args(self) -> t.List[str]: """Get the formatted batch arguments for a preview - :return: list of batch arguments for Qsub + :return: list of batch arguments for `bsub` """ opts = [] @@ -146,5 +149,11 @@ def format_batch_args(self) -> t.List[str]: return opts def set(self, key: str, value: str | None) -> None: + """Set an arbitrary scheduler argument + + :param key: The launch argument + :param value: A string representation of the value for the launch + argument (if applicable), otherwise `None` + """ # Store custom arguments in the launcher_args self._scheduler_args[key] = value diff --git a/smartsim/settings/builders/batch/pbs.py b/smartsim/settings/arguments/batch/pbs.py similarity index 92% rename from smartsim/settings/builders/batch/pbs.py rename to smartsim/settings/arguments/batch/pbs.py index d04b4beba..192874c16 100644 --- a/smartsim/settings/builders/batch/pbs.py +++ b/smartsim/settings/arguments/batch/pbs.py @@ -34,14 +34,17 @@ from ....error import SSConfigError from ...batchCommand import SchedulerType from ...common import StringArgument -from ..batchArgBuilder import BatchArgBuilder +from ..batchArguments import BatchArguments logger = get_logger(__name__) -class QsubBatchArgBuilder(BatchArgBuilder): +class QsubBatchArguments(BatchArguments): def scheduler_str(self) -> str: - """Get the string representation of the scheduler""" + """Get the string representation of the scheduler + + :returns: The string representation of the scheduler + """ return SchedulerType.Pbs.value def set_nodes(self, num_nodes: int) -> None: @@ -113,7 +116,7 @@ def set_account(self, account: str) -> None: def format_batch_args(self) -> t.List[str]: """Get the formatted batch arguments for a preview - :return: batch arguments for Qsub + :return: batch arguments for `qsub` :raises ValueError: if options are supplied without values """ opts, batch_arg_copy = self._create_resource_list(self._scheduler_args) @@ -170,5 +173,10 @@ def _create_resource_list( return res, batch_arg_copy def set(self, key: str, value: str | None) -> None: - # Store custom arguments in the launcher_args + """Set an arbitrary launch argument + + :param key: The launch argument + :param value: A string representation of the value for the launch + argument (if applicable), otherwise `None` + """ self._scheduler_args[key] = value diff --git a/smartsim/settings/builders/batch/slurm.py b/smartsim/settings/arguments/batch/slurm.py similarity index 90% rename from smartsim/settings/builders/batch/slurm.py rename to smartsim/settings/arguments/batch/slurm.py index 5a03f5acd..f4725a117 100644 --- a/smartsim/settings/builders/batch/slurm.py +++ b/smartsim/settings/arguments/batch/slurm.py @@ -33,14 +33,17 @@ from ...batchCommand import SchedulerType from ...common import StringArgument -from ..batchArgBuilder import BatchArgBuilder +from ..batchArguments import BatchArguments logger = get_logger(__name__) -class SlurmBatchArgBuilder(BatchArgBuilder): +class SlurmBatchArguments(BatchArguments): def scheduler_str(self) -> str: - """Get the string representation of the scheduler""" + """Get the string representation of the scheduler + + :returns: The string representation of the scheduler + """ return SchedulerType.Slurm.value def set_walltime(self, walltime: str) -> None: @@ -120,7 +123,7 @@ def set_hostlist(self, host_list: t.Union[str, t.List[str]]) -> None: def format_batch_args(self) -> t.List[str]: """Get the formatted batch arguments for a preview - :return: batch arguments for Sbatch + :return: batch arguments for `sbatch` """ opts = [] # TODO add restricted here @@ -139,5 +142,11 @@ def format_batch_args(self) -> t.List[str]: return opts def set(self, key: str, value: str | None) -> None: + """Set an arbitrary scheduler argument + + :param key: The launch argument + :param value: A string representation of the value for the launch + argument (if applicable), otherwise `None` + """ # Store custom arguments in the launcher_args self._scheduler_args[key] = value diff --git a/smartsim/settings/builders/batchArgBuilder.py b/smartsim/settings/arguments/batchArguments.py similarity index 99% rename from smartsim/settings/builders/batchArgBuilder.py rename to smartsim/settings/arguments/batchArguments.py index ad466f254..a85148697 100644 --- a/smartsim/settings/builders/batchArgBuilder.py +++ b/smartsim/settings/arguments/batchArguments.py @@ -37,7 +37,7 @@ logger = get_logger(__name__) -class BatchArgBuilder(ABC): +class BatchArguments(ABC): """Abstract base class that defines all generic scheduler argument methods that are not supported. It is the responsibility of child classes for each launcher to translate diff --git a/smartsim/settings/arguments/launch/__init__.py b/smartsim/settings/arguments/launch/__init__.py new file mode 100644 index 000000000..30502394b --- /dev/null +++ b/smartsim/settings/arguments/launch/__init__.py @@ -0,0 +1,19 @@ +from .alps import AprunLaunchArguments +from .dragon import DragonLaunchArguments +from .local import LocalLaunchArguments +from .lsf import JsrunLaunchArguments +from .mpi import MpiexecLaunchArguments, MpirunLaunchArguments, OrterunLaunchArguments +from .pals import PalsMpiexecLaunchArguments +from .slurm import SlurmLaunchArguments + +__all__ = [ + "AprunLaunchArguments", + "DragonLaunchArguments", + "LocalLaunchArguments", + "JsrunLaunchArguments", + "MpiLaunchArguments", + "MpiexecLaunchArguments", + "OrteLaunchArguments", + "PalsMpiexecLaunchArguments", + "SlurmLaunchArguments", +] diff --git a/smartsim/settings/builders/launch/alps.py b/smartsim/settings/arguments/launch/alps.py similarity index 90% rename from smartsim/settings/builders/launch/alps.py rename to smartsim/settings/arguments/launch/alps.py index a527cafac..1879dd102 100644 --- a/smartsim/settings/builders/launch/alps.py +++ b/smartsim/settings/arguments/launch/alps.py @@ -29,21 +29,30 @@ import typing as t from smartsim.log import get_logger +from smartsim.settings.dispatch import ShellLauncher, dispatch, make_shell_format_fn -from ...common import StringArgument, set_check_input +from ...common import set_check_input from ...launchCommand import LauncherType -from ..launchArgBuilder import LaunchArgBuilder +from ..launchArguments import LaunchArguments logger = get_logger(__name__) +_as_aprun_command = make_shell_format_fn(run_command="aprun") -class AprunArgBuilder(LaunchArgBuilder): +@dispatch(with_format=_as_aprun_command, to_launcher=ShellLauncher) +class AprunLaunchArguments(LaunchArguments): def _reserved_launch_args(self) -> set[str]: - """Return reserved launch arguments.""" + """Return reserved launch arguments. + + :returns: The set of reserved launcher arguments + """ return {"wdir"} def launcher_str(self) -> str: - """Get the string representation of the launcher""" + """Get the string representation of the launcher + + :returns: The string representation of the launcher + """ return LauncherType.Alps.value def set_cpus_per_task(self, cpus_per_task: int) -> None: @@ -200,7 +209,12 @@ def format_launch_args(self) -> t.Union[t.List[str], None]: return args def set(self, key: str, value: str | None) -> None: - """Set the launch arguments""" + """Set an arbitrary launch argument + + :param key: The launch argument + :param value: A string representation of the value for the launch + argument (if applicable), otherwise `None` + """ set_check_input(key, value) if key in self._reserved_launch_args(): logger.warning( diff --git a/smartsim/settings/builders/launch/dragon.py b/smartsim/settings/arguments/launch/dragon.py similarity index 86% rename from smartsim/settings/builders/launch/dragon.py rename to smartsim/settings/arguments/launch/dragon.py index d8d634331..98b91059c 100644 --- a/smartsim/settings/builders/launch/dragon.py +++ b/smartsim/settings/arguments/launch/dragon.py @@ -32,16 +32,19 @@ from smartsim.log import get_logger -from ...common import StringArgument, set_check_input +from ...common import set_check_input from ...launchCommand import LauncherType -from ..launchArgBuilder import LaunchArgBuilder +from ..launchArguments import LaunchArguments logger = get_logger(__name__) -class DragonArgBuilder(LaunchArgBuilder): +class DragonLaunchArguments(LaunchArguments): def launcher_str(self) -> str: - """Get the string representation of the launcher""" + """Get the string representation of the launcher + + :returns: The string representation of the launcher + """ return LauncherType.Dragon.value def set_nodes(self, nodes: int) -> None: @@ -56,11 +59,16 @@ def set_tasks_per_node(self, tasks_per_node: int) -> None: :param tasks_per_node: number of tasks per node """ - self.set("tasks-per-node", str(tasks_per_node)) + self.set("tasks_per_node", str(tasks_per_node)) @override def set(self, key: str, value: str | None) -> None: - """Set the launch arguments""" + """Set an arbitrary launch argument + + :param key: The launch argument + :param value: A string representation of the value for the launch + argument (if applicable), otherwise `None` + """ set_check_input(key, value) if key in self._launch_args and key != self._launch_args[key]: logger.warning(f"Overwritting argument '{key}' with value '{value}'") diff --git a/smartsim/settings/builders/launch/local.py b/smartsim/settings/arguments/launch/local.py similarity index 75% rename from smartsim/settings/builders/launch/local.py rename to smartsim/settings/arguments/launch/local.py index 595514f15..0bbba2584 100644 --- a/smartsim/settings/builders/launch/local.py +++ b/smartsim/settings/arguments/launch/local.py @@ -29,23 +29,30 @@ import typing as t from smartsim.log import get_logger +from smartsim.settings.dispatch import ShellLauncher, dispatch, make_shell_format_fn from ...common import StringArgument, set_check_input from ...launchCommand import LauncherType -from ..launchArgBuilder import LaunchArgBuilder +from ..launchArguments import LaunchArguments logger = get_logger(__name__) +_as_local_command = make_shell_format_fn(run_command=None) -class LocalArgBuilder(LaunchArgBuilder): +@dispatch(with_format=_as_local_command, to_launcher=ShellLauncher) +class LocalLaunchArguments(LaunchArguments): def launcher_str(self) -> str: - """Get the string representation of the launcher""" + """Get the string representation of the launcher + + :returns: The string representation of the launcher + """ return LauncherType.Local.value def format_env_vars(self, env_vars: StringArgument) -> t.Union[t.List[str], None]: - """Build environment variable string + """Build bash compatible sequence of strings to specify an environment - :returns: formatted list of strings to export variables + :param env_vars: An environment mapping + :returns: the formatted string of environment variables """ formatted = [] for key, val in env_vars.items(): @@ -67,7 +74,12 @@ def format_launch_args(self) -> t.Union[t.List[str], None]: return formatted def set(self, key: str, value: str | None) -> None: - """Set the launch arguments""" + """Set an arbitrary launch argument + + :param key: The launch argument + :param value: A string representation of the value for the launch + argument (if applicable), otherwise `None` + """ set_check_input(key, value) if key in self._launch_args and key != self._launch_args[key]: logger.warning(f"Overwritting argument '{key}' with value '{value}'") diff --git a/smartsim/settings/builders/launch/lsf.py b/smartsim/settings/arguments/launch/lsf.py similarity index 83% rename from smartsim/settings/builders/launch/lsf.py rename to smartsim/settings/arguments/launch/lsf.py index 2c72002e5..80cd748f1 100644 --- a/smartsim/settings/builders/launch/lsf.py +++ b/smartsim/settings/arguments/launch/lsf.py @@ -29,21 +29,30 @@ import typing as t from smartsim.log import get_logger +from smartsim.settings.dispatch import ShellLauncher, dispatch, make_shell_format_fn -from ...common import StringArgument, set_check_input +from ...common import set_check_input from ...launchCommand import LauncherType -from ..launchArgBuilder import LaunchArgBuilder +from ..launchArguments import LaunchArguments logger = get_logger(__name__) +_as_jsrun_command = make_shell_format_fn(run_command="jsrun") -class JsrunArgBuilder(LaunchArgBuilder): +@dispatch(with_format=_as_jsrun_command, to_launcher=ShellLauncher) +class JsrunLaunchArguments(LaunchArguments): def launcher_str(self) -> str: - """Get the string representation of the launcher""" + """Get the string representation of the launcher + + :returns: The string representation of the launcher + """ return LauncherType.Lsf.value def _reserved_launch_args(self) -> set[str]: - """Return reserved launch arguments.""" + """Return reserved launch arguments. + + :returns: The set of reserved launcher arguments + """ return {"chdir", "h", "stdio_stdout", "o", "stdio_stderr", "k"} def set_tasks(self, tasks: int) -> None: @@ -102,7 +111,12 @@ def format_launch_args(self) -> t.Union[t.List[str], None]: return args def set(self, key: str, value: str | None) -> None: - """Set the launch arguments""" + """Set an arbitrary launch argument + + :param key: The launch argument + :param value: A string representation of the value for the launch + argument (if applicable), otherwise `None` + """ set_check_input(key, value) if key in self._reserved_launch_args(): logger.warning( diff --git a/smartsim/settings/builders/launch/mpi.py b/smartsim/settings/arguments/launch/mpi.py similarity index 83% rename from smartsim/settings/builders/launch/mpi.py rename to smartsim/settings/arguments/launch/mpi.py index 1331be317..85fd38145 100644 --- a/smartsim/settings/builders/launch/mpi.py +++ b/smartsim/settings/arguments/launch/mpi.py @@ -29,17 +29,24 @@ import typing as t from smartsim.log import get_logger +from smartsim.settings.dispatch import ShellLauncher, dispatch, make_shell_format_fn from ...common import set_check_input from ...launchCommand import LauncherType -from ..launchArgBuilder import LaunchArgBuilder +from ..launchArguments import LaunchArguments logger = get_logger(__name__) +_as_mpirun_command = make_shell_format_fn("mpirun") +_as_mpiexec_command = make_shell_format_fn("mpiexec") +_as_orterun_command = make_shell_format_fn("orterun") -class _BaseMPIArgBuilder(LaunchArgBuilder): +class _BaseMPILaunchArguments(LaunchArguments): def _reserved_launch_args(self) -> set[str]: - """Return reserved launch arguments.""" + """Return reserved launch arguments. + + :returns: The set of reserved launcher arguments + """ return {"wd", "wdir"} def set_task_map(self, task_mapping: str) -> None: @@ -199,7 +206,12 @@ def format_launch_args(self) -> t.List[str]: return args def set(self, key: str, value: str | None) -> None: - """Set the launch arguments""" + """Set an arbitrary launch argument + + :param key: The launch argument + :param value: A string representation of the value for the launch + argument (if applicable), otherwise `None` + """ set_check_input(key, value) if key in self._reserved_launch_args(): logger.warning( @@ -214,37 +226,31 @@ def set(self, key: str, value: str | None) -> None: self._launch_args[key] = value -class MpiArgBuilder(_BaseMPIArgBuilder): - def __init__( - self, - launch_args: t.Dict[str, str | None] | None, - ) -> None: - super().__init__(launch_args) - +@dispatch(with_format=_as_mpirun_command, to_launcher=ShellLauncher) +class MpirunLaunchArguments(_BaseMPILaunchArguments): def launcher_str(self) -> str: - """Get the string representation of the launcher""" - return LauncherType.Mpirun.value + """Get the string representation of the launcher + :returns: The string representation of the launcher + """ + return LauncherType.Mpirun.value -class MpiexecArgBuilder(_BaseMPIArgBuilder): - def __init__( - self, - launch_args: t.Dict[str, str | None] | None, - ) -> None: - super().__init__(launch_args) +@dispatch(with_format=_as_mpiexec_command, to_launcher=ShellLauncher) +class MpiexecLaunchArguments(_BaseMPILaunchArguments): def launcher_str(self) -> str: - """Get the string representation of the launcher""" - return LauncherType.Mpiexec.value + """Get the string representation of the launcher + :returns: The string representation of the launcher + """ + return LauncherType.Mpiexec.value -class OrteArgBuilder(_BaseMPIArgBuilder): - def __init__( - self, - launch_args: t.Dict[str, str | None] | None, - ) -> None: - super().__init__(launch_args) +@dispatch(with_format=_as_orterun_command, to_launcher=ShellLauncher) +class OrterunLaunchArguments(_BaseMPILaunchArguments): def launcher_str(self) -> str: - """Get the string representation of the launcher""" + """Get the string representation of the launcher + + :returns: The string representation of the launcher + """ return LauncherType.Orterun.value diff --git a/smartsim/settings/builders/launch/pals.py b/smartsim/settings/arguments/launch/pals.py similarity index 83% rename from smartsim/settings/builders/launch/pals.py rename to smartsim/settings/arguments/launch/pals.py index 051409c29..3132f1b02 100644 --- a/smartsim/settings/builders/launch/pals.py +++ b/smartsim/settings/arguments/launch/pals.py @@ -29,21 +29,30 @@ import typing as t from smartsim.log import get_logger +from smartsim.settings.dispatch import ShellLauncher, dispatch, make_shell_format_fn -from ...common import StringArgument, set_check_input +from ...common import set_check_input from ...launchCommand import LauncherType -from ..launchArgBuilder import LaunchArgBuilder +from ..launchArguments import LaunchArguments logger = get_logger(__name__) +_as_pals_command = make_shell_format_fn(run_command="mpiexec") -class PalsMpiexecArgBuilder(LaunchArgBuilder): +@dispatch(with_format=_as_pals_command, to_launcher=ShellLauncher) +class PalsMpiexecLaunchArguments(LaunchArguments): def launcher_str(self) -> str: - """Get the string representation of the launcher""" + """Get the string representation of the launcher + + :returns: The string representation of the launcher + """ return LauncherType.Pals.value def _reserved_launch_args(self) -> set[str]: - """Return reserved launch arguments.""" + """Return reserved launch arguments. + + :returns: The set of reserved launcher arguments + """ return {"wdir", "wd"} def set_cpu_binding_type(self, bind_type: str) -> None: @@ -136,14 +145,17 @@ def format_launch_args(self) -> t.List[str]: return args def set(self, key: str, value: str | None) -> None: - """Set the launch arguments""" + """Set an arbitrary launch argument + + :param key: The launch argument + :param value: A string representation of the value for the launch + argument (if applicable), otherwise `None` + """ set_check_input(key, value) if key in self._reserved_launch_args(): logger.warning( - ( - f"Could not set argument '{key}': " - f"it is a reserved argument of '{type(self).__name__}'" - ) + f"Could not set argument '{key}': " + f"it is a reserved argument of '{type(self).__name__}'" ) return if key in self._launch_args and key != self._launch_args[key]: diff --git a/smartsim/settings/builders/launch/slurm.py b/smartsim/settings/arguments/launch/slurm.py similarity index 93% rename from smartsim/settings/builders/launch/slurm.py rename to smartsim/settings/arguments/launch/slurm.py index 80d3d6be2..ac485b7c8 100644 --- a/smartsim/settings/builders/launch/slurm.py +++ b/smartsim/settings/arguments/launch/slurm.py @@ -31,21 +31,30 @@ import typing as t from smartsim.log import get_logger +from smartsim.settings.dispatch import ShellLauncher, dispatch, make_shell_format_fn from ...common import set_check_input from ...launchCommand import LauncherType -from ..launchArgBuilder import LaunchArgBuilder +from ..launchArguments import LaunchArguments logger = get_logger(__name__) +_as_srun_command = make_shell_format_fn(run_command="srun") -class SlurmArgBuilder(LaunchArgBuilder): +@dispatch(with_format=_as_srun_command, to_launcher=ShellLauncher) +class SlurmLaunchArguments(LaunchArguments): def launcher_str(self) -> str: - """Get the string representation of the launcher""" + """Get the string representation of the launcher + + :returns: The string representation of the launcher + """ return LauncherType.Slurm.value def _reserved_launch_args(self) -> set[str]: - """Return reserved launch arguments.""" + """Return reserved launch arguments. + + :returns: The set of reserved launcher arguments + """ return {"chdir", "D"} def set_nodes(self, nodes: int) -> None: @@ -302,7 +311,12 @@ def _check_env_vars(self, env_vars: t.Dict[str, t.Optional[str]]) -> None: logger.warning(msg) def set(self, key: str, value: str | None) -> None: - """Set the launch arguments""" + """Set an arbitrary launch argument + + :param key: The launch argument + :param value: A string representation of the value for the launch + argument (if applicable), otherwise `None` + """ set_check_input(key, value) if key in self._reserved_launch_args(): logger.warning( diff --git a/smartsim/settings/builders/launchArgBuilder.py b/smartsim/settings/arguments/launchArguments.py similarity index 65% rename from smartsim/settings/builders/launchArgBuilder.py rename to smartsim/settings/arguments/launchArguments.py index bb1f389f3..61f837d98 100644 --- a/smartsim/settings/builders/launchArgBuilder.py +++ b/smartsim/settings/arguments/launchArguments.py @@ -27,6 +27,7 @@ from __future__ import annotations import copy +import textwrap import typing as t from abc import ABC, abstractmethod @@ -37,28 +38,41 @@ logger = get_logger(__name__) -class LaunchArgBuilder(ABC): - """Abstract base class that defines all generic launcher - argument methods that are not supported. It is the - responsibility of child classes for each launcher to translate - the input parameter to a properly formatted launcher argument. +class LaunchArguments(ABC): + """Abstract base class for launcher arguments. It is the responsibility of + child classes for each launcher to add methods to set input parameters and + to maintain valid state between parameters set by a user. """ def __init__(self, launch_args: t.Dict[str, str | None] | None) -> None: + """Initialize a new `LaunchArguments` instance. + + :param launch_args: A mapping of arguments to (optional) values + """ self._launch_args = copy.deepcopy(launch_args) or {} @abstractmethod def launcher_str(self) -> str: """Get the string representation of the launcher""" - pass @abstractmethod def set(self, arg: str, val: str | None) -> None: - """Set the launch arguments""" - pass + """Set a launch argument + + :param arg: The argument name to set + :param val: The value to set the argument to as a `str` (if + applicable). Otherwise `None` + """ def format_launch_args(self) -> t.Union[t.List[str], None]: - """Build formatted launch arguments""" + """Build formatted launch arguments + + .. warning:: + This method will be removed from this class in a future ticket + + :returns: The launch arguments formatted as a list or `None` if the + arguments cannot be formatted. + """ logger.warning( f"format_launcher_args() not supported for {self.launcher_str()}." ) @@ -71,6 +85,15 @@ def format_comma_sep_env_vars( Slurm takes exports in comma separated lists the list starts with all as to not disturb the rest of the environment for more information on this, see the slurm documentation for srun + + .. warning:: + The return value described in this docstring does not match the + type hint, but I have no idea how this is supposed to be used or + how to resolve the descrepency. I'm not going to try and fix it and + the point is moot as this method is almost certainly going to be + removed in a later ticket. + + :param env_vars: An environment mapping :returns: the formatted string of environment variables """ logger.warning( @@ -82,11 +105,21 @@ def format_env_vars( self, env_vars: t.Dict[str, t.Optional[str]] ) -> t.Union[t.List[str], None]: """Build bash compatible environment variable string for Slurm + + .. warning:: + This method will be removed from this class in a future ticket + + :param env_vars: An environment mapping :returns: the formatted string of environment variables """ logger.warning(f"format_env_vars() not supported for {self.launcher_str()}.") return None def __str__(self) -> str: # pragma: no-cover - string = f"\nLaunch Arguments:\n{fmt_dict(self._launch_args)}" - return string + return textwrap.dedent(f"""\ + Launch Arguments: + Launcher: {self.launcher_str()} + Name: {type(self).__name__} + Arguments: + {fmt_dict(self._launch_args)} + """) diff --git a/smartsim/settings/batchSettings.py b/smartsim/settings/batchSettings.py index 79a559ecb..6649fa5f7 100644 --- a/smartsim/settings/batchSettings.py +++ b/smartsim/settings/batchSettings.py @@ -32,12 +32,12 @@ from smartsim.log import get_logger from .._core.utils.helpers import fmt_dict +from .arguments import BatchArguments +from .arguments.batch.lsf import BsubBatchArguments +from .arguments.batch.pbs import QsubBatchArguments +from .arguments.batch.slurm import SlurmBatchArguments from .baseSettings import BaseSettings from .batchCommand import SchedulerType -from .builders import BatchArgBuilder -from .builders.batch.lsf import BsubBatchArgBuilder -from .builders.batch.pbs import QsubBatchArgBuilder -from .builders.batch.slurm import SlurmBatchArgBuilder from .common import StringArgument logger = get_logger(__name__) @@ -54,7 +54,7 @@ def __init__( self._batch_scheduler = SchedulerType(batch_scheduler) except ValueError: raise ValueError(f"Invalid scheduler type: {batch_scheduler}") from None - self._arg_builder = self._get_arg_builder(scheduler_args) + self._arguments = self._get_arguments(scheduler_args) self.env_vars = env_vars or {} @property @@ -68,9 +68,9 @@ def batch_scheduler(self) -> str: return self._batch_scheduler.value @property - def scheduler_args(self) -> BatchArgBuilder: + def scheduler_args(self) -> BatchArguments: """Return the batch argument translator.""" - return self._arg_builder + return self._arguments @property def env_vars(self) -> StringArgument: @@ -82,16 +82,20 @@ def env_vars(self, value: t.Dict[str, str | None]) -> None: """Set the environment variables.""" self._env_vars = copy.deepcopy(value) - def _get_arg_builder( - self, scheduler_args: StringArgument | None - ) -> BatchArgBuilder: - """Map the Scheduler to the BatchArgBuilder""" + def _get_arguments(self, scheduler_args: StringArgument | None) -> BatchArguments: + """Map the Scheduler to the BatchArguments. This method should only be + called once during construction. + + :param scheduler_args: A mapping of arguments names to values to be + used to initialize the arguments + :returns: The appropriate type for the settings instance. + """ if self._batch_scheduler == SchedulerType.Slurm: - return SlurmBatchArgBuilder(scheduler_args) + return SlurmBatchArguments(scheduler_args) elif self._batch_scheduler == SchedulerType.Lsf: - return BsubBatchArgBuilder(scheduler_args) + return BsubBatchArguments(scheduler_args) elif self._batch_scheduler == SchedulerType.Pbs: - return QsubBatchArgBuilder(scheduler_args) + return QsubBatchArguments(scheduler_args) else: raise ValueError(f"Invalid scheduler type: {self._batch_scheduler}") @@ -100,7 +104,7 @@ def format_batch_args(self) -> t.List[str]: :return: batch arguments for Sbatch """ - return self._arg_builder.format_batch_args() + return self._arguments.format_batch_args() def __str__(self) -> str: # pragma: no-cover string = f"\nScheduler: {self.scheduler}{self.scheduler_args}" diff --git a/smartsim/settings/builders/launch/__init__.py b/smartsim/settings/builders/launch/__init__.py deleted file mode 100644 index d593c59f7..000000000 --- a/smartsim/settings/builders/launch/__init__.py +++ /dev/null @@ -1,19 +0,0 @@ -from .alps import AprunArgBuilder -from .dragon import DragonArgBuilder -from .local import LocalArgBuilder -from .lsf import JsrunArgBuilder -from .mpi import MpiArgBuilder, MpiexecArgBuilder, OrteArgBuilder -from .pals import PalsMpiexecArgBuilder -from .slurm import SlurmArgBuilder - -__all__ = [ - "AprunArgBuilder", - "DragonArgBuilder", - "LocalArgBuilder", - "JsrunArgBuilder", - "MpiArgBuilder", - "MpiexecArgBuilder", - "OrteArgBuilder", - "PalsMpiexecArgBuilder", - "SlurmArgBuilder", -] diff --git a/smartsim/settings/dispatch.py b/smartsim/settings/dispatch.py new file mode 100644 index 000000000..53c6be04d --- /dev/null +++ b/smartsim/settings/dispatch.py @@ -0,0 +1,454 @@ +# BSD 2-Clause License +# +# Copyright (c) 2021-2024, Hewlett Packard Enterprise +# All rights reserved. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions are met: +# +# 1. Redistributions of source code must retain the above copyright notice, this +# list of conditions and the following disclaimer. +# +# 2. Redistributions in binary form must reproduce the above copyright notice, +# this list of conditions and the following disclaimer in the documentation +# and/or other materials provided with the distribution. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE +# DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE +# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR +# SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +# CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, +# OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +from __future__ import annotations + +import dataclasses +import subprocess as sp +import typing as t +import uuid + +from typing_extensions import Self, TypeAlias, TypeVarTuple, Unpack + +from smartsim._core.utils import helpers +from smartsim.error import errors +from smartsim.types import LaunchedJobID + +if t.TYPE_CHECKING: + from smartsim.experiment import Experiment + from smartsim.settings.arguments import LaunchArguments + +_Ts = TypeVarTuple("_Ts") +_T_contra = t.TypeVar("_T_contra", contravariant=True) + +_DispatchableT = t.TypeVar("_DispatchableT", bound="LaunchArguments") +"""Any type of luanch arguments, typically used when the type bound by the type +argument is a key a `Dispatcher` dispatch registry +""" +_LaunchableT = t.TypeVar("_LaunchableT") +"""Any type, typically used to bind to a type accepted as the input parameter +to the to the `LauncherProtocol.start` method +""" + +_EnvironMappingType: TypeAlias = t.Mapping[str, "str | None"] +"""A mapping of user provided mapping of environment variables in which to run +a job +""" +_FormatterType: TypeAlias = t.Callable[ + [_DispatchableT, "ExecutableProtocol", _EnvironMappingType], _LaunchableT +] +"""A callable that is capable of formatting the components of a job into a type +capable of being launched by a launcher. +""" +_LaunchConfigType: TypeAlias = ( + "_LauncherAdapter[ExecutableProtocol, _EnvironMappingType]" +) +"""A launcher adapater that has configured a launcher to launch the components +of a job with some pre-determined launch settings +""" +_UnkownType: TypeAlias = t.NoReturn +"""A type alias for a bottom type. Use this to inform a user that the parameter +a parameter should never be set or a callable will never return +""" + + +@t.final +class Dispatcher: + """A class capable of deciding which launcher type should be used to launch + a given settings type. + + The `Dispatcher` class maintains a type safe API for adding and retrieving + a settings type into the underlying mapping. It does this through two main + methods: `Dispatcher.dispatch` and `Dispatcher.get_dispatch`. + + `Dispatcher.dispatch` takes in a dispatchable type, a launcher type that is + capable of launching a launchable type and formatting function that maps an + instance of the dispatchable type to an instance of the launchable type. + The dispatcher will then take these components and then enter them into its + dispatch registry. `Dispatcher.dispatch` can also be used as a decorator, + to automatically add a dispatchable type dispatch to a dispatcher at type + creation time. + + `Dispatcher.get_dispatch` takes a dispatchable type or instance as a + parameter, and will attempt to look up, in its dispatch registry, how to + dispatch that type. It will then return an object that can configure a + launcher of the expected launcher type. If the dispatchable type was never + registered a `TypeError` will be raised. + """ + + def __init__( + self, + *, + dispatch_registry: ( + t.Mapping[type[LaunchArguments], _DispatchRegistration[t.Any, t.Any]] | None + ) = None, + ) -> None: + """Initialize a new `Dispatcher` + + :param dispatch_registry: A pre-configured dispatch registry that the + dispatcher should use. This registry is not type checked and is + used blindly. This registry is shallow copied, meaning that adding + into the original registry after construction will not mutate the + state of the registry. + """ + self._dispatch_registry = ( + dict(dispatch_registry) if dispatch_registry is not None else {} + ) + + def copy(self) -> Self: + """Create a shallow copy of the Dispatcher""" + return type(self)(dispatch_registry=self._dispatch_registry) + + @t.overload + def dispatch( # Signature when used as a decorator + self, + args: None = ..., + *, + with_format: _FormatterType[_DispatchableT, _LaunchableT], + to_launcher: type[LauncherProtocol[_LaunchableT]], + allow_overwrite: bool = ..., + ) -> t.Callable[[type[_DispatchableT]], type[_DispatchableT]]: ... + @t.overload + def dispatch( # Signature when used as a method + self, + args: type[_DispatchableT], + *, + with_format: _FormatterType[_DispatchableT, _LaunchableT], + to_launcher: type[LauncherProtocol[_LaunchableT]], + allow_overwrite: bool = ..., + ) -> None: ... + def dispatch( # Actual implementation + self, + args: type[_DispatchableT] | None = None, + *, + with_format: _FormatterType[_DispatchableT, _LaunchableT], + to_launcher: type[LauncherProtocol[_LaunchableT]], + allow_overwrite: bool = False, + ) -> t.Callable[[type[_DispatchableT]], type[_DispatchableT]] | None: + """A type safe way to add a mapping of settings type to launcher type + to handle a settings instance at launch time. + """ + err_msg: str | None = None + if getattr(to_launcher, "_is_protocol", False): + err_msg = f"Cannot dispatch to protocol class `{to_launcher.__name__}`" + elif getattr(to_launcher, "__abstractmethods__", frozenset()): + err_msg = f"Cannot dispatch to abstract class `{to_launcher.__name__}`" + if err_msg is not None: + raise TypeError(err_msg) + + def register(args_: type[_DispatchableT], /) -> type[_DispatchableT]: + if args_ in self._dispatch_registry and not allow_overwrite: + launcher_type = self._dispatch_registry[args_].launcher_type + raise TypeError( + f"{args_.__name__} has already been registered to be " + f"launched with {launcher_type}" + ) + self._dispatch_registry[args_] = _DispatchRegistration( + with_format, to_launcher + ) + return args_ + + if args is not None: + register(args) + return None + return register + + def get_dispatch( + self, args: _DispatchableT | type[_DispatchableT] + ) -> _DispatchRegistration[_DispatchableT, _UnkownType]: + """Find a type of launcher that is registered as being able to launch a + settings instance of the provided type + """ + if not isinstance(args, type): + args = type(args) + dispatch_ = self._dispatch_registry.get(args, None) + if dispatch_ is None: + raise TypeError( + f"No dispatch for `{args.__name__}` has been registered " + f"has been registered with {type(self).__name__} `{self}`" + ) + # Note the sleight-of-hand here: we are secretly casting a type of + # `_DispatchRegistration[Any, Any]` -> + # `_DispatchRegistration[_DispatchableT, _LaunchableT]`. + # where `_LaunchableT` is unbound! + # + # This is safe to do if all entries in the mapping were added using a + # type safe method (e.g. `Dispatcher.dispatch`), but if a user were to + # supply a custom dispatch registry or otherwise modify the registry + # this is not necessarily 100% type safe!! + return dispatch_ + + +@t.final +@dataclasses.dataclass(frozen=True) +class _DispatchRegistration(t.Generic[_DispatchableT, _LaunchableT]): + """An entry into the `Dispatcher`'s dispatch registry. This class is simply + a wrapper around a launcher and how to format a `_DispatchableT` instance + to be launched by the afore mentioned launcher. + """ + + formatter: _FormatterType[_DispatchableT, _LaunchableT] + launcher_type: type[LauncherProtocol[_LaunchableT]] + + def _is_compatible_launcher(self, launcher: LauncherProtocol[t.Any]) -> bool: + # Disabling because we want to match the type of the dispatch + # *exactly* as specified by the user + # pylint: disable-next=unidiomatic-typecheck + return type(launcher) is self.launcher_type + + def create_new_launcher_configuration( + self, for_experiment: Experiment, with_arguments: _DispatchableT + ) -> _LaunchConfigType: + """Create a new instance of a launcher for an experiment that the + provided settings where set to dispatch to, and configure it with the + provided launch settings. + + :param for_experiment: The experiment responsible creating the launcher + :param with_settings: The settings with which to configure the newly + created launcher + :returns: A configured launcher + """ + launcher = self.launcher_type.create(for_experiment) + return self.create_adapter_from_launcher(launcher, with_arguments) + + def create_adapter_from_launcher( + self, launcher: LauncherProtocol[_LaunchableT], arguments: _DispatchableT + ) -> _LaunchConfigType: + """Creates configured launcher from an existing launcher using the + provided settings. + + :param launcher: A launcher that the type of `settings` has been + configured to dispatch to. + :param settings: A settings with which to configure the launcher. + :returns: A configured launcher. + """ + if not self._is_compatible_launcher(launcher): + raise TypeError( + f"Cannot create launcher adapter from launcher `{launcher}` " + f"of type `{type(launcher)}`; expected launcher of type " + f"exactly `{self.launcher_type}`" + ) + + def format_(exe: ExecutableProtocol, env: _EnvironMappingType) -> _LaunchableT: + return self.formatter(arguments, exe, env) + + return _LauncherAdapter(launcher, format_) + + def configure_first_compatible_launcher( + self, + with_arguments: _DispatchableT, + from_available_launchers: t.Iterable[LauncherProtocol[t.Any]], + ) -> _LaunchConfigType: + """Configure the first compatible adapter launch to launch with the + provided settings. Launchers are iterated and discarded from the + iterator until the iterator is exhausted. + + :param with_settings: The settings with which to configure the launcher + :param from_available_launchers: An iterable that yields launcher instances + :raises errors.LauncherNotFoundError: No compatible launcher was + yielded from the provided iterator. + :returns: A launcher configured with the provided settings. + """ + launcher = helpers.first(self._is_compatible_launcher, from_available_launchers) + if launcher is None: + raise errors.LauncherNotFoundError( + f"No launcher of exactly type `{self.launcher_type.__name__}` " + "could be found from provided launchers" + ) + return self.create_adapter_from_launcher(launcher, with_arguments) + + +@t.final +class _LauncherAdapter(t.Generic[Unpack[_Ts]]): + """The launcher adapter is an adapter class takes a launcher that is + capable of launching some type `LaunchableT` and a function with a generic + argument list that returns a `LaunchableT`. The launcher adapter will then + provide `start` method that will have the same argument list as the + provided function and launch the output through the provided launcher. + + For example, the launcher adapter could be used like so: + + .. highlight:: python + .. code-block:: python + + class SayHelloLauncher(LauncherProtocol[str]): + ... + def start(self, title: str): + ... + print(f"Hello, {title}") + ... + ... + + @dataclasses.dataclass + class Person: + name: str + honorific: str + + def full_title(self) -> str: + return f"{honorific}. {self.name}" + + mark = Person("Jim", "Mr") + sally = Person("Sally", "Ms") + matt = Person("Matt", "Dr") + hello_person_launcher = _LauncherAdapter(SayHelloLauncher, + Person.full_title) + + hello_person_launcher.start(mark) # prints: "Hello, Mr. Mark" + hello_person_launcher.start(sally) # prints: "Hello, Ms. Sally" + hello_person_launcher.start(matt) # prints: "Hello, Dr. Matt" + """ + + def __init__( + self, + launcher: LauncherProtocol[_LaunchableT], + map_: t.Callable[[Unpack[_Ts]], _LaunchableT], + ) -> None: + """Initialize a launcher adapter + + :param launcher: The launcher instance this class should wrap + :param map_: A callable with arguments for the new `start` method that + can translate them into the expected launching type for the wrapped + launcher. + """ + # NOTE: We need to cast off the `_LaunchableT` -> `Any` in the + # `__init__` method signature to hide the transform from users of + # this class. If possible, this type should not be exposed to + # users of this class! + self._adapt: t.Callable[[Unpack[_Ts]], t.Any] = map_ + self._adapted_launcher: LauncherProtocol[t.Any] = launcher + + def start(self, *args: Unpack[_Ts]) -> LaunchedJobID: + """Start a new job through the wrapped launcher using the custom + `start` signature + + :param args: The custom start arguments + :returns: The launched job id provided by the wrapped launcher + """ + payload = self._adapt(*args) + return self._adapted_launcher.start(payload) + + +DEFAULT_DISPATCHER: t.Final = Dispatcher() +"""A global `Dispatcher` instance that SmartSim automatically configures to +launch its built in launchables +""" + +# Disabling because we want this to look and feel like a top level function, +# but don't want to have a second copy of the nasty overloads +# pylint: disable-next=invalid-name +dispatch: t.Final = DEFAULT_DISPATCHER.dispatch +"""Function that can be used as a decorator to add a dispatch registration into +`DEFAULT_DISPATCHER`. +""" + + +# >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>> +# TODO: move these to a common module under `smartsim._core.launcher` +# ----------------------------------------------------------------------------- + + +def create_job_id() -> LaunchedJobID: + return LaunchedJobID(str(uuid.uuid4())) + + +class ExecutableProtocol(t.Protocol): + def as_program_arguments(self) -> t.Sequence[str]: ... + + +class LauncherProtocol(t.Protocol[_T_contra]): + def start(self, launchable: _T_contra, /) -> LaunchedJobID: ... + @classmethod + def create(cls, exp: Experiment, /) -> Self: ... + + +def make_shell_format_fn( + run_command: str | None, +) -> _FormatterType[LaunchArguments, t.Sequence[str]]: + """A function that builds a function that formats a `LaunchArguments` as a + shell executable sequence of strings for a given launching utility. + + Example usage: + + .. highlight:: python + .. code-block:: python + + echo_hello_world: ExecutableProtocol = ... + env = {} + slurm_args: SlurmLaunchArguments = ... + slurm_args.set_nodes(3) + + as_srun_command = make_shell_format_fn("srun") + fmt_cmd = as_srun_command(slurm_args, echo_hello_world, env) + print(list(fmt_cmd)) + # prints: "['srun', '--nodes=3', '--', 'echo', 'Hello World!']" + + .. note:: + This function was/is a kind of slap-dash implementation, and is likely + to change or be removed entierely as more functionality is added to the + shell launcher. Use with caution and at your own risk! + + :param run_command: Name or path of the launching utility to invoke with + the arguments. + :returns: A function to format an arguments, an executable, and an + environment as a shell launchable sequence for strings. + """ + + def impl( + args: LaunchArguments, exe: ExecutableProtocol, _env: _EnvironMappingType + ) -> t.Sequence[str]: + return ( + ( + run_command, + *(args.format_launch_args() or ()), + "--", + *exe.as_program_arguments(), + ) + if run_command is not None + else exe.as_program_arguments() + ) + + return impl + + +class ShellLauncher: + """Mock launcher for launching/tracking simple shell commands""" + + def __init__(self) -> None: + self._launched: dict[LaunchedJobID, sp.Popen[bytes]] = {} + + def start(self, command: t.Sequence[str]) -> LaunchedJobID: + id_ = create_job_id() + exe, *rest = command + # pylint: disable-next=consider-using-with + self._launched[id_] = sp.Popen((helpers.expand_exe_path(exe), *rest)) + return id_ + + @classmethod + def create(cls, _: Experiment) -> Self: + return cls() + + +# <<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<< diff --git a/smartsim/settings/launchSettings.py b/smartsim/settings/launchSettings.py index dec6034d8..98c199b83 100644 --- a/smartsim/settings/launchSettings.py +++ b/smartsim/settings/launchSettings.py @@ -32,15 +32,19 @@ from smartsim.log import get_logger from .._core.utils.helpers import fmt_dict +from .arguments import LaunchArguments +from .arguments.launch.alps import AprunLaunchArguments +from .arguments.launch.dragon import DragonLaunchArguments +from .arguments.launch.local import LocalLaunchArguments +from .arguments.launch.lsf import JsrunLaunchArguments +from .arguments.launch.mpi import ( + MpiexecLaunchArguments, + MpirunLaunchArguments, + OrterunLaunchArguments, +) +from .arguments.launch.pals import PalsMpiexecLaunchArguments +from .arguments.launch.slurm import SlurmLaunchArguments from .baseSettings import BaseSettings -from .builders import LaunchArgBuilder -from .builders.launch.alps import AprunArgBuilder -from .builders.launch.dragon import DragonArgBuilder -from .builders.launch.local import LocalArgBuilder -from .builders.launch.lsf import JsrunArgBuilder -from .builders.launch.mpi import MpiArgBuilder, MpiexecArgBuilder, OrteArgBuilder -from .builders.launch.pals import PalsMpiexecArgBuilder -from .builders.launch.slurm import SlurmArgBuilder from .common import StringArgument from .launchCommand import LauncherType @@ -58,56 +62,71 @@ def __init__( self._launcher = LauncherType(launcher) except ValueError: raise ValueError(f"Invalid launcher type: {launcher}") - self._arg_builder = self._get_arg_builder(launch_args) + self._arguments = self._get_arguments(launch_args) self.env_vars = env_vars or {} @property def launcher(self) -> str: - """Return the launcher name.""" + """The launcher type + + :returns: The launcher type's string representation + """ return self._launcher.value @property - def launch_args(self) -> LaunchArgBuilder: - """Return the launch argument translator.""" - return self._arg_builder + def launch_args(self) -> LaunchArguments: + """The launch argument - @launch_args.setter - def launch_args(self, args: t.Mapping[str, str]) -> None: - """Update the launch arguments.""" - self.launch_args._launch_args.clear() - for k, v in args.items(): - self.launch_args.set(k, v) + :returns: The launch arguments + """ + return self._arguments @property - def env_vars(self) -> dict[str, str | None]: - """Return an immutable list of attached environment variables.""" + def env_vars(self) -> t.Mapping[str, str | None]: + """A mapping of environment variables to set or remove. This mapping is + a deep copy of the mapping used by the settings and as such altering + will not mutate the settings. + + :returns: An environment mapping + """ return copy.deepcopy(self._env_vars) @env_vars.setter def env_vars(self, value: dict[str, str | None]) -> None: - """Set the environment variables.""" + """Set the environment variables to a new mapping. This setter will + make a copy of the mapping and as such altering the original mapping + will not mutate the settings. + + :param value: The new environment mapping + """ self._env_vars = copy.deepcopy(value) - def _get_arg_builder(self, launch_args: StringArgument | None) -> LaunchArgBuilder: - """Map the Launcher to the LaunchArgBuilder""" + def _get_arguments(self, launch_args: StringArgument | None) -> LaunchArguments: + """Map the Launcher to the LaunchArguments. This method should only be + called once during construction. + + :param launch_args: A mapping of arguments names to values to be used + to initialize the arguments + :returns: The appropriate type for the settings instance. + """ if self._launcher == LauncherType.Slurm: - return SlurmArgBuilder(launch_args) + return SlurmLaunchArguments(launch_args) elif self._launcher == LauncherType.Mpiexec: - return MpiexecArgBuilder(launch_args) + return MpiexecLaunchArguments(launch_args) elif self._launcher == LauncherType.Mpirun: - return MpiArgBuilder(launch_args) + return MpirunLaunchArguments(launch_args) elif self._launcher == LauncherType.Orterun: - return OrteArgBuilder(launch_args) + return OrterunLaunchArguments(launch_args) elif self._launcher == LauncherType.Alps: - return AprunArgBuilder(launch_args) + return AprunLaunchArguments(launch_args) elif self._launcher == LauncherType.Lsf: - return JsrunArgBuilder(launch_args) + return JsrunLaunchArguments(launch_args) elif self._launcher == LauncherType.Pals: - return PalsMpiexecArgBuilder(launch_args) + return PalsMpiexecLaunchArguments(launch_args) elif self._launcher == LauncherType.Dragon: - return DragonArgBuilder(launch_args) + return DragonLaunchArguments(launch_args) elif self._launcher == LauncherType.Local: - return LocalArgBuilder(launch_args) + return LocalLaunchArguments(launch_args) else: raise ValueError(f"Invalid launcher type: {self._launcher}") @@ -137,7 +156,7 @@ def format_env_vars(self) -> t.Union[t.List[str], None]: """Build bash compatible environment variable string for Slurm :returns: the formatted string of environment variables """ - return self._arg_builder.format_env_vars(self._env_vars) + return self._arguments.format_env_vars(self._env_vars) def format_comma_sep_env_vars(self) -> t.Union[t.Tuple[str, t.List[str]], None]: """Build environment variable string for Slurm @@ -146,7 +165,7 @@ def format_comma_sep_env_vars(self) -> t.Union[t.Tuple[str, t.List[str]], None]: for more information on this, see the slurm documentation for srun :returns: the formatted string of environment variables """ - return self._arg_builder.format_comma_sep_env_vars(self._env_vars) + return self._arguments.format_comma_sep_env_vars(self._env_vars) def format_launch_args(self) -> t.Union[t.List[str], None]: """Return formatted launch arguments @@ -154,7 +173,7 @@ def format_launch_args(self) -> t.Union[t.List[str], None]: literally with no formatting. :return: list run arguments for these settings """ - return self._arg_builder.format_launch_args() + return self._arguments.format_launch_args() def __str__(self) -> str: # pragma: no-cover string = f"\nLauncher: {self.launcher}{self.launch_args}" diff --git a/smartsim/types.py b/smartsim/types.py index a08f9f5f6..f756fc6fe 100644 --- a/smartsim/types.py +++ b/smartsim/types.py @@ -29,3 +29,4 @@ import typing as t TODO = t.Any # TODO: remove this after refactor +LaunchedJobID = t.NewType("LaunchedJobID", str) diff --git a/tests/temp_tests/test_settings/conftest.py b/tests/temp_tests/test_settings/conftest.py new file mode 100644 index 000000000..3edf5af6b --- /dev/null +++ b/tests/temp_tests/test_settings/conftest.py @@ -0,0 +1,62 @@ +# BSD 2-Clause License +# +# Copyright (c) 2021-2024, Hewlett Packard Enterprise +# All rights reserved. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions are met: +# +# 1. Redistributions of source code must retain the above copyright notice, this +# list of conditions and the following disclaimer. +# +# 2. Redistributions in binary form must reproduce the above copyright notice, +# this list of conditions and the following disclaimer in the documentation +# and/or other materials provided with the distribution. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE +# DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE +# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR +# SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +# CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, +# OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +import pytest + +from smartsim.settings import dispatch +from smartsim.settings.arguments import launchArguments as launch + + +@pytest.fixture +def mock_echo_executable(): + class _MockExe(dispatch.ExecutableProtocol): + def as_program_arguments(self): + return ("echo", "hello", "world") + + yield _MockExe() + + +@pytest.fixture +def mock_launch_args(): + class _MockLaunchArgs(launch.LaunchArguments): + def set(self, arg, val): ... + def launcher_str(self): + return "mock-laucnh-args" + + yield _MockLaunchArgs({}) + + +@pytest.fixture +def mock_launcher(): + class _MockLauncher(dispatch.LauncherProtocol): + def start(self, launchable): + return dispatch.create_job_id() + + @classmethod + def create(cls, exp): + return cls() + + yield _MockLauncher() diff --git a/tests/temp_tests/test_settings/test_alpsLauncher.py b/tests/temp_tests/test_settings/test_alpsLauncher.py index 642082137..c76b49363 100644 --- a/tests/temp_tests/test_settings/test_alpsLauncher.py +++ b/tests/temp_tests/test_settings/test_alpsLauncher.py @@ -26,7 +26,10 @@ import pytest from smartsim.settings import LaunchSettings -from smartsim.settings.builders.launch.alps import AprunArgBuilder +from smartsim.settings.arguments.launch.alps import ( + AprunLaunchArguments, + _as_aprun_command, +) from smartsim.settings.launchCommand import LauncherType pytestmark = pytest.mark.group_a @@ -111,14 +114,14 @@ def test_launcher_str(): ) def test_alps_class_methods(function, value, flag, result): alpsLauncher = LaunchSettings(launcher=LauncherType.Alps) - assert isinstance(alpsLauncher._arg_builder, AprunArgBuilder) + assert isinstance(alpsLauncher._arguments, AprunLaunchArguments) getattr(alpsLauncher.launch_args, function)(*value) assert alpsLauncher.launch_args._launch_args[flag] == result def test_set_verbose_launch(): alpsLauncher = LaunchSettings(launcher=LauncherType.Alps) - assert isinstance(alpsLauncher._arg_builder, AprunArgBuilder) + assert isinstance(alpsLauncher._arguments, AprunLaunchArguments) alpsLauncher.launch_args.set_verbose_launch(True) assert alpsLauncher.launch_args._launch_args == {"debug": "7"} alpsLauncher.launch_args.set_verbose_launch(False) @@ -127,7 +130,7 @@ def test_set_verbose_launch(): def test_set_quiet_launch(): aprunLauncher = LaunchSettings(launcher=LauncherType.Alps) - assert isinstance(aprunLauncher._arg_builder, AprunArgBuilder) + assert isinstance(aprunLauncher._arguments, AprunLaunchArguments) aprunLauncher.launch_args.set_quiet_launch(True) assert aprunLauncher.launch_args._launch_args == {"quiet": None} aprunLauncher.launch_args.set_quiet_launch(False) @@ -137,7 +140,7 @@ def test_set_quiet_launch(): def test_format_env_vars(): env_vars = {"OMP_NUM_THREADS": "20", "LOGGING": "verbose"} aprunLauncher = LaunchSettings(launcher=LauncherType.Alps, env_vars=env_vars) - assert isinstance(aprunLauncher._arg_builder, AprunArgBuilder) + assert isinstance(aprunLauncher._arguments, AprunLaunchArguments) aprunLauncher.update_env({"OMP_NUM_THREADS": "10"}) formatted = aprunLauncher.format_env_vars() result = ["-e", "OMP_NUM_THREADS=10", "-e", "LOGGING=verbose"] @@ -174,3 +177,39 @@ def test_invalid_exclude_hostlist_format(): alpsLauncher.launch_args.set_excluded_hosts([5]) with pytest.raises(TypeError): alpsLauncher.launch_args.set_excluded_hosts(5) + + +@pytest.mark.parametrize( + "args, expected", + ( + pytest.param({}, ("aprun", "--", "echo", "hello", "world"), id="Empty Args"), + pytest.param( + {"N": "1"}, + ("aprun", "-N", "1", "--", "echo", "hello", "world"), + id="Short Arg", + ), + pytest.param( + {"cpus-per-pe": "1"}, + ("aprun", "--cpus-per-pe=1", "--", "echo", "hello", "world"), + id="Long Arg", + ), + pytest.param( + {"q": None}, + ("aprun", "-q", "--", "echo", "hello", "world"), + id="Short Arg (No Value)", + ), + pytest.param( + {"quiet": None}, + ("aprun", "--quiet", "--", "echo", "hello", "world"), + id="Long Arg (No Value)", + ), + pytest.param( + {"N": "1", "cpus-per-pe": "123"}, + ("aprun", "-N", "1", "--cpus-per-pe=123", "--", "echo", "hello", "world"), + id="Short and Long Args", + ), + ), +) +def test_formatting_launch_args(mock_echo_executable, args, expected): + cmd = _as_aprun_command(AprunLaunchArguments(args), mock_echo_executable, {}) + assert tuple(cmd) == expected diff --git a/tests/temp_tests/test_settings/test_dispatch.py b/tests/temp_tests/test_settings/test_dispatch.py new file mode 100644 index 000000000..9c99cb7d0 --- /dev/null +++ b/tests/temp_tests/test_settings/test_dispatch.py @@ -0,0 +1,408 @@ +# BSD 2-Clause License +# +# Copyright (c) 2021-2024, Hewlett Packard Enterprise +# All rights reserved. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions are met: +# +# 1. Redistributions of source code must retain the above copyright notice, this +# list of conditions and the following disclaimer. +# +# 2. Redistributions in binary form must reproduce the above copyright notice, +# this list of conditions and the following disclaimer in the documentation +# and/or other materials provided with the distribution. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE +# DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE +# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR +# SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +# CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, +# OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +import abc +import contextlib +import dataclasses +import io + +import pytest + +from smartsim.error import errors +from smartsim.settings import dispatch + +pytestmark = pytest.mark.group_a + +FORMATTED = object() + + +def format_fn(args, exe, env): + return FORMATTED + + +@pytest.fixture +def expected_dispatch_registry(mock_launcher, mock_launch_args): + yield { + type(mock_launch_args): dispatch._DispatchRegistration( + format_fn, type(mock_launcher) + ) + } + + +def test_declaritive_form_dispatch_declaration( + mock_launcher, mock_launch_args, expected_dispatch_registry +): + d = dispatch.Dispatcher() + assert type(mock_launch_args) == d.dispatch( + with_format=format_fn, to_launcher=type(mock_launcher) + )(type(mock_launch_args)) + assert d._dispatch_registry == expected_dispatch_registry + + +def test_imperative_form_dispatch_declaration( + mock_launcher, mock_launch_args, expected_dispatch_registry +): + d = dispatch.Dispatcher() + assert None == d.dispatch( + type(mock_launch_args), to_launcher=type(mock_launcher), with_format=format_fn + ) + assert d._dispatch_registry == expected_dispatch_registry + + +def test_dispatchers_from_same_registry_do_not_cross_polute( + mock_launcher, mock_launch_args, expected_dispatch_registry +): + some_starting_registry = {} + d1 = dispatch.Dispatcher(dispatch_registry=some_starting_registry) + d2 = dispatch.Dispatcher(dispatch_registry=some_starting_registry) + assert ( + d1._dispatch_registry == d2._dispatch_registry == some_starting_registry == {} + ) + assert ( + d1._dispatch_registry is not d2._dispatch_registry is not some_starting_registry + ) + + d2.dispatch( + type(mock_launch_args), with_format=format_fn, to_launcher=type(mock_launcher) + ) + assert d1._dispatch_registry == {} + assert d2._dispatch_registry == expected_dispatch_registry + + +def test_copied_dispatchers_do_not_cross_pollute( + mock_launcher, mock_launch_args, expected_dispatch_registry +): + some_starting_registry = {} + d1 = dispatch.Dispatcher(dispatch_registry=some_starting_registry) + d2 = d1.copy() + assert ( + d1._dispatch_registry == d2._dispatch_registry == some_starting_registry == {} + ) + assert ( + d1._dispatch_registry is not d2._dispatch_registry is not some_starting_registry + ) + + d2.dispatch( + type(mock_launch_args), to_launcher=type(mock_launcher), with_format=format_fn + ) + assert d1._dispatch_registry == {} + assert d2._dispatch_registry == expected_dispatch_registry + + +@pytest.mark.parametrize( + "add_dispatch, expected_ctx", + ( + pytest.param( + lambda d, s, l: d.dispatch(s, to_launcher=l, with_format=format_fn), + pytest.raises(TypeError, match="has already been registered"), + id="Imperative -- Disallowed implicitly", + ), + pytest.param( + lambda d, s, l: d.dispatch( + s, to_launcher=l, with_format=format_fn, allow_overwrite=True + ), + contextlib.nullcontext(), + id="Imperative -- Allowed with flag", + ), + pytest.param( + lambda d, s, l: d.dispatch(to_launcher=l, with_format=format_fn)(s), + pytest.raises(TypeError, match="has already been registered"), + id="Declarative -- Disallowed implicitly", + ), + pytest.param( + lambda d, s, l: d.dispatch( + to_launcher=l, with_format=format_fn, allow_overwrite=True + )(s), + contextlib.nullcontext(), + id="Declarative -- Allowed with flag", + ), + ), +) +def test_dispatch_overwriting( + add_dispatch, + expected_ctx, + mock_launcher, + mock_launch_args, + expected_dispatch_registry, +): + d = dispatch.Dispatcher(dispatch_registry=expected_dispatch_registry) + with expected_ctx: + add_dispatch(d, type(mock_launch_args), type(mock_launcher)) + + +@pytest.mark.parametrize( + "type_or_instance", + ( + pytest.param(type, id="type"), + pytest.param(lambda x: x, id="instance"), + ), +) +def test_dispatch_can_retrieve_dispatch_info_from_dispatch_registry( + expected_dispatch_registry, mock_launcher, mock_launch_args, type_or_instance +): + d = dispatch.Dispatcher(dispatch_registry=expected_dispatch_registry) + assert dispatch._DispatchRegistration( + format_fn, type(mock_launcher) + ) == d.get_dispatch(type_or_instance(mock_launch_args)) + + +@pytest.mark.parametrize( + "type_or_instance", + ( + pytest.param(type, id="type"), + pytest.param(lambda x: x, id="instance"), + ), +) +def test_dispatch_raises_if_settings_type_not_registered( + mock_launch_args, type_or_instance +): + d = dispatch.Dispatcher(dispatch_registry={}) + with pytest.raises( + TypeError, match="No dispatch for `.+?(?=`)` has been registered" + ): + d.get_dispatch(type_or_instance(mock_launch_args)) + + +class LauncherABC(abc.ABC): + @abc.abstractmethod + def start(self, launchable): ... + @classmethod + @abc.abstractmethod + def create(cls, exp): ... + + +class PartImplLauncherABC(LauncherABC): + def start(self, launchable): + return dispatch.create_job_id() + + +class FullImplLauncherABC(PartImplLauncherABC): + @classmethod + def create(cls, exp): + return cls() + + +@pytest.mark.parametrize( + "cls, ctx", + ( + pytest.param( + dispatch.LauncherProtocol, + pytest.raises(TypeError, match="Cannot dispatch to protocol"), + id="Cannot dispatch to protocol class", + ), + pytest.param( + "mock_launcher", + contextlib.nullcontext(None), + id="Can dispatch to protocol implementation", + ), + pytest.param( + LauncherABC, + pytest.raises(TypeError, match="Cannot dispatch to abstract class"), + id="Cannot dispatch to abstract class", + ), + pytest.param( + PartImplLauncherABC, + pytest.raises(TypeError, match="Cannot dispatch to abstract class"), + id="Cannot dispatch to partially implemented abstract class", + ), + pytest.param( + FullImplLauncherABC, + contextlib.nullcontext(None), + id="Can dispatch to fully implemented abstract class", + ), + ), +) +def test_register_dispatch_to_launcher_types(request, cls, ctx): + if isinstance(cls, str): + cls = request.getfixturevalue(cls) + d = dispatch.Dispatcher() + with ctx: + d.dispatch(to_launcher=cls, with_format=format_fn) + + +@dataclasses.dataclass +class BufferWriterLauncher(dispatch.LauncherProtocol[list[str]]): + buf: io.StringIO + + @classmethod + def create(cls, exp): + return cls(io.StringIO()) + + def start(self, strs): + self.buf.writelines(f"{s}\n" for s in strs) + return dispatch.create_job_id() + + +class BufferWriterLauncherSubclass(BufferWriterLauncher): ... + + +@pytest.fixture +def buffer_writer_dispatch(): + stub_format_fn = lambda *a, **kw: ["some", "strings"] + return dispatch._DispatchRegistration(stub_format_fn, BufferWriterLauncher) + + +@pytest.mark.parametrize( + "input_, map_, expected", + ( + pytest.param( + ["list", "of", "strings"], + lambda xs: xs, + ["list\n", "of\n", "strings\n"], + id="[str] -> [str]", + ), + pytest.param( + "words on new lines", + lambda x: x.split(), + ["words\n", "on\n", "new\n", "lines\n"], + id="str -> [str]", + ), + pytest.param( + range(1, 4), + lambda xs: [str(x) for x in xs], + ["1\n", "2\n", "3\n"], + id="[int] -> [str]", + ), + ), +) +def test_launcher_adapter_correctly_adapts_input_to_launcher(input_, map_, expected): + buf = io.StringIO() + adapter = dispatch._LauncherAdapter(BufferWriterLauncher(buf), map_) + adapter.start(input_) + buf.seek(0) + assert buf.readlines() == expected + + +@pytest.mark.parametrize( + "launcher_instance, ctx", + ( + pytest.param( + BufferWriterLauncher(io.StringIO()), + contextlib.nullcontext(None), + id="Correctly configures expected launcher", + ), + pytest.param( + BufferWriterLauncherSubclass(io.StringIO()), + pytest.raises( + TypeError, + match="^Cannot create launcher adapter.*expected launcher of type .+$", + ), + id="Errors if launcher types are disparate", + ), + pytest.param( + "mock_launcher", + pytest.raises( + TypeError, + match="^Cannot create launcher adapter.*expected launcher of type .+$", + ), + id="Errors if types are not an exact match", + ), + ), +) +def test_dispatch_registration_can_configure_adapter_for_existing_launcher_instance( + request, mock_launch_args, buffer_writer_dispatch, launcher_instance, ctx +): + if isinstance(launcher_instance, str): + launcher_instance = request.getfixturevalue(launcher_instance) + with ctx: + adapter = buffer_writer_dispatch.create_adapter_from_launcher( + launcher_instance, mock_launch_args + ) + assert adapter._adapted_launcher is launcher_instance + + +@pytest.mark.parametrize( + "launcher_instances, ctx", + ( + pytest.param( + (BufferWriterLauncher(io.StringIO()),), + contextlib.nullcontext(None), + id="Correctly configures expected launcher", + ), + pytest.param( + ( + "mock_launcher", + "mock_launcher", + BufferWriterLauncher(io.StringIO()), + "mock_launcher", + ), + contextlib.nullcontext(None), + id="Correctly ignores incompatible launchers instances", + ), + pytest.param( + (), + pytest.raises( + errors.LauncherNotFoundError, + match="^No launcher of exactly type.+could be found from provided launchers$", + ), + id="Errors if no launcher could be found", + ), + pytest.param( + ( + "mock_launcher", + BufferWriterLauncherSubclass(io.StringIO), + "mock_launcher", + ), + pytest.raises( + errors.LauncherNotFoundError, + match="^No launcher of exactly type.+could be found from provided launchers$", + ), + id="Errors if no launcher matches expected type exactly", + ), + ), +) +def test_dispatch_registration_configures_first_compatible_launcher_from_sequence_of_launchers( + request, mock_launch_args, buffer_writer_dispatch, launcher_instances, ctx +): + def resolve_instance(inst): + return request.getfixturevalue(inst) if isinstance(inst, str) else inst + + launcher_instances = tuple(map(resolve_instance, launcher_instances)) + + with ctx: + adapter = buffer_writer_dispatch.configure_first_compatible_launcher( + with_arguments=mock_launch_args, from_available_launchers=launcher_instances + ) + + +def test_dispatch_registration_can_create_a_laucher_for_an_experiment_and_can_reconfigure_it_later( + mock_launch_args, buffer_writer_dispatch +): + class MockExperiment: ... + + exp = MockExperiment() + adapter_1 = buffer_writer_dispatch.create_new_launcher_configuration( + for_experiment=exp, with_arguments=mock_launch_args + ) + assert type(adapter_1._adapted_launcher) == buffer_writer_dispatch.launcher_type + existing_launcher = adapter_1._adapted_launcher + + adapter_2 = buffer_writer_dispatch.create_adapter_from_launcher( + existing_launcher, mock_launch_args + ) + assert type(adapter_2._adapted_launcher) == buffer_writer_dispatch.launcher_type + assert adapter_1._adapted_launcher is adapter_2._adapted_launcher + assert adapter_1 is not adapter_2 diff --git a/tests/temp_tests/test_settings/test_dragonLauncher.py b/tests/temp_tests/test_settings/test_dragonLauncher.py index fa4e58f75..e3f159b7f 100644 --- a/tests/temp_tests/test_settings/test_dragonLauncher.py +++ b/tests/temp_tests/test_settings/test_dragonLauncher.py @@ -25,8 +25,12 @@ # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. import pytest +from smartsim._core.launcher.dragon.dragonLauncher import ( + _as_run_request_args_and_policy, +) +from smartsim._core.schemas.dragonRequests import DragonRunPolicy, DragonRunRequestView from smartsim.settings import LaunchSettings -from smartsim.settings.builders.launch.dragon import DragonArgBuilder +from smartsim.settings.arguments.launch.dragon import DragonLaunchArguments from smartsim.settings.launchCommand import LauncherType pytestmark = pytest.mark.group_a @@ -43,12 +47,59 @@ def test_launcher_str(): [ pytest.param("set_nodes", (2,), "2", "nodes", id="set_nodes"), pytest.param( - "set_tasks_per_node", (2,), "2", "tasks-per-node", id="set_tasks_per_node" + "set_tasks_per_node", (2,), "2", "tasks_per_node", id="set_tasks_per_node" ), ], ) def test_dragon_class_methods(function, value, flag, result): dragonLauncher = LaunchSettings(launcher=LauncherType.Dragon) - assert isinstance(dragonLauncher._arg_builder, DragonArgBuilder) + assert isinstance(dragonLauncher._arguments, DragonLaunchArguments) getattr(dragonLauncher.launch_args, function)(*value) assert dragonLauncher.launch_args._launch_args[flag] == result + + +NOT_SET = object() + + +@pytest.mark.parametrize("nodes", (NOT_SET, 20, 40)) +@pytest.mark.parametrize("tasks_per_node", (NOT_SET, 1, 20)) +@pytest.mark.parametrize("cpu_affinity", (NOT_SET, [1], [1, 2, 3])) +@pytest.mark.parametrize("gpu_affinity", (NOT_SET, [1], [1, 2, 3])) +def test_formatting_launch_args_into_request( + mock_echo_executable, nodes, tasks_per_node, cpu_affinity, gpu_affinity +): + launch_args = DragonLaunchArguments({}) + if nodes is not NOT_SET: + launch_args.set_nodes(nodes) + if tasks_per_node is not NOT_SET: + launch_args.set_tasks_per_node(tasks_per_node) + if cpu_affinity is not NOT_SET: + launch_args.set_cpu_affinity(cpu_affinity) + if gpu_affinity is not NOT_SET: + launch_args.set_gpu_affinity(gpu_affinity) + req, policy = _as_run_request_args_and_policy(launch_args, mock_echo_executable, {}) + + expected_args = { + k: v + for k, v in { + "nodes": nodes, + "tasks_per_node": tasks_per_node, + }.items() + if v is not NOT_SET + } + expected_run_req = DragonRunRequestView( + exe="echo", exe_args=["hello", "world"], path="/tmp", env={}, **expected_args + ) + assert req.exe == expected_run_req.exe + assert req.exe_args == expected_run_req.exe_args + assert req.nodes == expected_run_req.nodes + assert req.tasks_per_node == expected_run_req.tasks_per_node + assert req.hostlist == expected_run_req.hostlist + assert req.pmi_enabled == expected_run_req.pmi_enabled + + expected_run_policy_args = { + k: v + for k, v in {"cpu_affinity": cpu_affinity, "gpu_affinity": gpu_affinity}.items() + if v is not NOT_SET + } + assert policy == DragonRunPolicy(**expected_run_policy_args) diff --git a/tests/temp_tests/test_settings/test_localLauncher.py b/tests/temp_tests/test_settings/test_localLauncher.py index 39abd0e07..3d18ea462 100644 --- a/tests/temp_tests/test_settings/test_localLauncher.py +++ b/tests/temp_tests/test_settings/test_localLauncher.py @@ -26,7 +26,10 @@ import pytest from smartsim.settings import LaunchSettings -from smartsim.settings.builders.launch.local import LocalArgBuilder +from smartsim.settings.arguments.launch.local import ( + LocalLaunchArguments, + _as_local_command, +) from smartsim.settings.launchCommand import LauncherType pytestmark = pytest.mark.group_a @@ -135,5 +138,10 @@ def test_format_env_vars(): "D": "12", } localLauncher = LaunchSettings(launcher=LauncherType.Local, env_vars=env_vars) - assert isinstance(localLauncher._arg_builder, LocalArgBuilder) + assert isinstance(localLauncher._arguments, LocalLaunchArguments) assert localLauncher.format_env_vars() == ["A=a", "B=", "C=", "D=12"] + + +def test_formatting_returns_original_exe(mock_echo_executable): + cmd = _as_local_command(LocalLaunchArguments({}), mock_echo_executable, {}) + assert tuple(cmd) == ("echo", "hello", "world") diff --git a/tests/temp_tests/test_settings/test_lsfLauncher.py b/tests/temp_tests/test_settings/test_lsfLauncher.py index 1edd07ff2..2e2dddf78 100644 --- a/tests/temp_tests/test_settings/test_lsfLauncher.py +++ b/tests/temp_tests/test_settings/test_lsfLauncher.py @@ -26,7 +26,10 @@ import pytest from smartsim.settings import LaunchSettings -from smartsim.settings.builders.launch.lsf import JsrunArgBuilder +from smartsim.settings.arguments.launch.lsf import ( + JsrunLaunchArguments, + _as_jsrun_command, +) from smartsim.settings.launchCommand import LauncherType pytestmark = pytest.mark.group_a @@ -49,7 +52,7 @@ def test_launcher_str(): ) def test_lsf_class_methods(function, value, flag, result): lsfLauncher = LaunchSettings(launcher=LauncherType.Lsf) - assert isinstance(lsfLauncher._arg_builder, JsrunArgBuilder) + assert isinstance(lsfLauncher._arguments, JsrunLaunchArguments) getattr(lsfLauncher.launch_args, function)(*value) assert lsfLauncher.launch_args._launch_args[flag] == result @@ -57,7 +60,7 @@ def test_lsf_class_methods(function, value, flag, result): def test_format_env_vars(): env_vars = {"OMP_NUM_THREADS": None, "LOGGING": "verbose"} lsfLauncher = LaunchSettings(launcher=LauncherType.Lsf, env_vars=env_vars) - assert isinstance(lsfLauncher._arg_builder, JsrunArgBuilder) + assert isinstance(lsfLauncher._arguments, JsrunLaunchArguments) formatted = lsfLauncher.format_env_vars() assert formatted == ["-E", "OMP_NUM_THREADS", "-E", "LOGGING=verbose"] @@ -72,7 +75,7 @@ def test_launch_args(): "np": 100, } lsfLauncher = LaunchSettings(launcher=LauncherType.Lsf, launch_args=launch_args) - assert isinstance(lsfLauncher._arg_builder, JsrunArgBuilder) + assert isinstance(lsfLauncher._arguments, JsrunLaunchArguments) formatted = lsfLauncher.format_launch_args() result = [ "--latency_priority=gpu-gpu", @@ -83,3 +86,39 @@ def test_launch_args(): "--np=100", ] assert formatted == result + + +@pytest.mark.parametrize( + "args, expected", + ( + pytest.param({}, ("jsrun", "--", "echo", "hello", "world"), id="Empty Args"), + pytest.param( + {"n": "1"}, + ("jsrun", "-n", "1", "--", "echo", "hello", "world"), + id="Short Arg", + ), + pytest.param( + {"nrs": "1"}, + ("jsrun", "--nrs=1", "--", "echo", "hello", "world"), + id="Long Arg", + ), + pytest.param( + {"v": None}, + ("jsrun", "-v", "--", "echo", "hello", "world"), + id="Short Arg (No Value)", + ), + pytest.param( + {"verbose": None}, + ("jsrun", "--verbose", "--", "echo", "hello", "world"), + id="Long Arg (No Value)", + ), + pytest.param( + {"tasks_per_rs": "1", "n": "123"}, + ("jsrun", "--tasks_per_rs=1", "-n", "123", "--", "echo", "hello", "world"), + id="Short and Long Args", + ), + ), +) +def test_formatting_launch_args(mock_echo_executable, args, expected): + cmd = _as_jsrun_command(JsrunLaunchArguments(args), mock_echo_executable, {}) + assert tuple(cmd) == expected diff --git a/tests/temp_tests/test_settings/test_mpiLauncher.py b/tests/temp_tests/test_settings/test_mpiLauncher.py index 1d6a516a9..362d21f06 100644 --- a/tests/temp_tests/test_settings/test_mpiLauncher.py +++ b/tests/temp_tests/test_settings/test_mpiLauncher.py @@ -29,10 +29,13 @@ import pytest from smartsim.settings import LaunchSettings -from smartsim.settings.builders.launch.mpi import ( - MpiArgBuilder, - MpiexecArgBuilder, - OrteArgBuilder, +from smartsim.settings.arguments.launch.mpi import ( + MpiexecLaunchArguments, + MpirunLaunchArguments, + OrterunLaunchArguments, + _as_mpiexec_command, + _as_mpirun_command, + _as_orterun_command, ) from smartsim.settings.launchCommand import LauncherType @@ -130,9 +133,9 @@ def test_launcher_str(launcher): ), ) for l in ( - [LauncherType.Mpirun, MpiArgBuilder], - [LauncherType.Mpiexec, MpiexecArgBuilder], - [LauncherType.Orterun, OrteArgBuilder], + [LauncherType.Mpirun, MpirunLaunchArguments], + [LauncherType.Mpiexec, MpiexecLaunchArguments], + [LauncherType.Orterun, OrterunLaunchArguments], ) ) ) @@ -140,7 +143,7 @@ def test_launcher_str(launcher): ) def test_mpi_class_methods(l, function, value, flag, result): mpiSettings = LaunchSettings(launcher=l[0]) - assert isinstance(mpiSettings._arg_builder, l[1]) + assert isinstance(mpiSettings._arguments, l[1]) getattr(mpiSettings.launch_args, function)(*value) assert mpiSettings.launch_args._launch_args[flag] == result @@ -233,3 +236,53 @@ def test_invalid_hostlist_format(launcher): mpiSettings.launch_args.set_hostlist([5]) with pytest.raises(TypeError): mpiSettings.launch_args.set_hostlist(5) + + +@pytest.mark.parametrize( + "cls, fmt, cmd", + ( + pytest.param( + MpirunLaunchArguments, _as_mpirun_command, "mpirun", id="w/ mpirun" + ), + pytest.param( + MpiexecLaunchArguments, _as_mpiexec_command, "mpiexec", id="w/ mpiexec" + ), + pytest.param( + OrterunLaunchArguments, _as_orterun_command, "orterun", id="w/ orterun" + ), + ), +) +@pytest.mark.parametrize( + "args, expected", + ( + pytest.param({}, ("--", "echo", "hello", "world"), id="Empty Args"), + pytest.param( + {"n": "1"}, + ("--n", "1", "--", "echo", "hello", "world"), + id="Short Arg", + ), + pytest.param( + {"host": "myhost"}, + ("--host", "myhost", "--", "echo", "hello", "world"), + id="Long Arg", + ), + pytest.param( + {"v": None}, + ("--v", "--", "echo", "hello", "world"), + id="Short Arg (No Value)", + ), + pytest.param( + {"verbose": None}, + ("--verbose", "--", "echo", "hello", "world"), + id="Long Arg (No Value)", + ), + pytest.param( + {"n": "1", "host": "myhost"}, + ("--n", "1", "--host", "myhost", "--", "echo", "hello", "world"), + id="Short and Long Args", + ), + ), +) +def test_formatting_launch_args(mock_echo_executable, cls, fmt, cmd, args, expected): + fmt_cmd = fmt(cls(args), mock_echo_executable, {}) + assert tuple(fmt_cmd) == (cmd,) + expected diff --git a/tests/temp_tests/test_settings/test_palsLauncher.py b/tests/temp_tests/test_settings/test_palsLauncher.py index c747e52f8..db66fa829 100644 --- a/tests/temp_tests/test_settings/test_palsLauncher.py +++ b/tests/temp_tests/test_settings/test_palsLauncher.py @@ -27,7 +27,10 @@ import pytest from smartsim.settings import LaunchSettings -from smartsim.settings.builders.launch.pals import PalsMpiexecArgBuilder +from smartsim.settings.arguments.launch.pals import ( + PalsMpiexecLaunchArguments, + _as_pals_command, +) from smartsim.settings.launchCommand import LauncherType pytestmark = pytest.mark.group_a @@ -72,7 +75,7 @@ def test_launcher_str(): ) def test_pals_class_methods(function, value, flag, result): palsLauncher = LaunchSettings(launcher=LauncherType.Pals) - assert isinstance(palsLauncher.launch_args, PalsMpiexecArgBuilder) + assert isinstance(palsLauncher.launch_args, PalsMpiexecLaunchArguments) getattr(palsLauncher.launch_args, function)(*value) assert palsLauncher.launch_args._launch_args[flag] == result assert palsLauncher.format_launch_args() == ["--" + flag, str(result)] @@ -95,3 +98,39 @@ def test_invalid_hostlist_format(): palsLauncher.launch_args.set_hostlist([5]) with pytest.raises(TypeError): palsLauncher.launch_args.set_hostlist(5) + + +@pytest.mark.parametrize( + "args, expected", + ( + pytest.param({}, ("mpiexec", "--", "echo", "hello", "world"), id="Empty Args"), + pytest.param( + {"n": "1"}, + ("mpiexec", "--n", "1", "--", "echo", "hello", "world"), + id="Short Arg", + ), + pytest.param( + {"host": "myhost"}, + ("mpiexec", "--host", "myhost", "--", "echo", "hello", "world"), + id="Long Arg", + ), + pytest.param( + {"v": None}, + ("mpiexec", "--v", "--", "echo", "hello", "world"), + id="Short Arg (No Value)", + ), + pytest.param( + {"verbose": None}, + ("mpiexec", "--verbose", "--", "echo", "hello", "world"), + id="Long Arg (No Value)", + ), + pytest.param( + {"n": "1", "host": "myhost"}, + ("mpiexec", "--n", "1", "--host", "myhost", "--", "echo", "hello", "world"), + id="Short and Long Args", + ), + ), +) +def test_formatting_launch_args(mock_echo_executable, args, expected): + cmd = _as_pals_command(PalsMpiexecLaunchArguments(args), mock_echo_executable, {}) + assert tuple(cmd) == expected diff --git a/tests/temp_tests/test_settings/test_pbsScheduler.py b/tests/temp_tests/test_settings/test_pbsScheduler.py index 28c90f824..1a866c1a1 100644 --- a/tests/temp_tests/test_settings/test_pbsScheduler.py +++ b/tests/temp_tests/test_settings/test_pbsScheduler.py @@ -26,8 +26,8 @@ import pytest from smartsim.settings import BatchSettings +from smartsim.settings.arguments.batch.pbs import QsubBatchArguments from smartsim.settings.batchCommand import SchedulerType -from smartsim.settings.builders.batch.pbs import QsubBatchArgBuilder pytestmark = pytest.mark.group_a @@ -62,7 +62,7 @@ def test_scheduler_str(): ) def test_create_pbs_batch(function, value, flag, result): pbsScheduler = BatchSettings(batch_scheduler=SchedulerType.Pbs) - assert isinstance(pbsScheduler.scheduler_args, QsubBatchArgBuilder) + assert isinstance(pbsScheduler.scheduler_args, QsubBatchArguments) getattr(pbsScheduler.scheduler_args, function)(*value) assert pbsScheduler.scheduler_args._scheduler_args[flag] == result diff --git a/tests/temp_tests/test_settings/test_slurmLauncher.py b/tests/temp_tests/test_settings/test_slurmLauncher.py index 9051d6945..538f2aca4 100644 --- a/tests/temp_tests/test_settings/test_slurmLauncher.py +++ b/tests/temp_tests/test_settings/test_slurmLauncher.py @@ -26,7 +26,10 @@ import pytest from smartsim.settings import LaunchSettings -from smartsim.settings.builders.launch.slurm import SlurmArgBuilder +from smartsim.settings.arguments.launch.slurm import ( + SlurmLaunchArguments, + _as_srun_command, +) from smartsim.settings.launchCommand import LauncherType pytestmark = pytest.mark.group_a @@ -108,7 +111,7 @@ def test_launcher_str(): ) def test_slurm_class_methods(function, value, flag, result): slurmLauncher = LaunchSettings(launcher=LauncherType.Slurm) - assert isinstance(slurmLauncher.launch_args, SlurmArgBuilder) + assert isinstance(slurmLauncher.launch_args, SlurmLaunchArguments) getattr(slurmLauncher.launch_args, function)(*value) assert slurmLauncher.launch_args._launch_args[flag] == result @@ -275,8 +278,44 @@ def test_set_het_groups(monkeypatch): monkeypatch.setenv("SLURM_HET_SIZE", "4") slurmLauncher = LaunchSettings(launcher=LauncherType.Slurm) slurmLauncher.launch_args.set_het_group([1]) - assert slurmLauncher._arg_builder._launch_args["het-group"] == "1" + assert slurmLauncher._arguments._launch_args["het-group"] == "1" slurmLauncher.launch_args.set_het_group([3, 2]) - assert slurmLauncher._arg_builder._launch_args["het-group"] == "3,2" + assert slurmLauncher._arguments._launch_args["het-group"] == "3,2" with pytest.raises(ValueError): slurmLauncher.launch_args.set_het_group([4]) + + +@pytest.mark.parametrize( + "args, expected", + ( + pytest.param({}, ("srun", "--", "echo", "hello", "world"), id="Empty Args"), + pytest.param( + {"N": "1"}, + ("srun", "-N", "1", "--", "echo", "hello", "world"), + id="Short Arg", + ), + pytest.param( + {"nodes": "1"}, + ("srun", "--nodes=1", "--", "echo", "hello", "world"), + id="Long Arg", + ), + pytest.param( + {"v": None}, + ("srun", "-v", "--", "echo", "hello", "world"), + id="Short Arg (No Value)", + ), + pytest.param( + {"verbose": None}, + ("srun", "--verbose", "--", "echo", "hello", "world"), + id="Long Arg (No Value)", + ), + pytest.param( + {"nodes": "1", "n": "123"}, + ("srun", "--nodes=1", "-n", "123", "--", "echo", "hello", "world"), + id="Short and Long Args", + ), + ), +) +def test_formatting_launch_args(mock_echo_executable, args, expected): + cmd = _as_srun_command(SlurmLaunchArguments(args), mock_echo_executable, {}) + assert tuple(cmd) == expected diff --git a/tests/temp_tests/test_settings/test_slurmScheduler.py b/tests/temp_tests/test_settings/test_slurmScheduler.py index ccfa03cf0..a6afcef16 100644 --- a/tests/temp_tests/test_settings/test_slurmScheduler.py +++ b/tests/temp_tests/test_settings/test_slurmScheduler.py @@ -26,8 +26,8 @@ import pytest from smartsim.settings import BatchSettings +from smartsim.settings.arguments.batch.slurm import SlurmBatchArguments from smartsim.settings.batchCommand import SchedulerType -from smartsim.settings.builders.batch.slurm import SlurmBatchArgBuilder pytestmark = pytest.mark.group_a @@ -84,7 +84,7 @@ def test_create_sbatch(): slurmScheduler = BatchSettings( batch_scheduler=SchedulerType.Slurm, scheduler_args=batch_args ) - assert isinstance(slurmScheduler._arg_builder, SlurmBatchArgBuilder) + assert isinstance(slurmScheduler._arguments, SlurmBatchArguments) args = slurmScheduler.format_batch_args() assert args == ["--exclusive", "--oversubscribe"] @@ -132,6 +132,5 @@ def test_sbatch_manual(): slurmScheduler.scheduler_args.set_account("A3531") slurmScheduler.scheduler_args.set_walltime("10:00:00") formatted = slurmScheduler.format_batch_args() - print(f"here: {formatted}") result = ["--nodes=5", "--account=A3531", "--time=10:00:00"] assert formatted == result diff --git a/tests/test_experiment.py b/tests/test_experiment.py new file mode 100644 index 000000000..6571763d7 --- /dev/null +++ b/tests/test_experiment.py @@ -0,0 +1,263 @@ +# BSD 2-Clause License +# +# Copyright (c) 2021-2024, Hewlett Packard Enterprise +# All rights reserved. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions are met: +# +# 1. Redistributions of source code must retain the above copyright notice, this +# list of conditions and the following disclaimer. +# +# 2. Redistributions in binary form must reproduce the above copyright notice, +# this list of conditions and the following disclaimer in the documentation +# and/or other materials provided with the distribution. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE +# DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE +# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR +# SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +# CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, +# OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +from __future__ import annotations + +import dataclasses +import itertools +import tempfile +import typing as t +import uuid +import weakref + +import pytest + +from smartsim.entity import _mock, entity +from smartsim.experiment import Experiment +from smartsim.launchable import job +from smartsim.settings import dispatch, launchSettings +from smartsim.settings.arguments import launchArguments + +pytestmark = pytest.mark.group_a + + +@pytest.fixture +def experiment(monkeypatch, test_dir, dispatcher): + """A simple experiment instance with a unique name anda unique name and its + own directory to be used by tests + """ + exp = Experiment(f"test-exp-{uuid.uuid4()}", test_dir) + monkeypatch.setattr(dispatch, "DEFAULT_DISPATCHER", dispatcher) + yield exp + + +@pytest.fixture +def dispatcher(): + """A pre-configured dispatcher to be used by experiments that simply + dispatches any jobs with `MockLaunchArgs` to a `NoOpRecordLauncher` + """ + d = dispatch.Dispatcher() + to_record: dispatch._FormatterType[MockLaunchArgs, LaunchRecord] = ( + lambda settings, exe, env: LaunchRecord(settings, exe, env) + ) + d.dispatch(MockLaunchArgs, with_format=to_record, to_launcher=NoOpRecordLauncher) + yield d + + +@pytest.fixture +def job_maker(monkeypatch): + """A fixture to generate a never ending stream of `Job` instances each + configured with a unique `MockLaunchArgs` instance, but identical + executable. + """ + + def iter_jobs(): + for i in itertools.count(): + settings = launchSettings.LaunchSettings("local") + monkeypatch.setattr(settings, "_arguments", MockLaunchArgs(i)) + yield job.Job(EchoHelloWorldEntity(), settings) + + jobs = iter_jobs() + yield lambda: next(jobs) + + +JobMakerType: t.TypeAlias = t.Callable[[], job.Job] + + +@dataclasses.dataclass(frozen=True, eq=False) +class NoOpRecordLauncher(dispatch.LauncherProtocol): + """Simple launcher to track the order of and mapping of ids to `start` + method calls. It has exactly three attrs: + + - `created_by_experiment`: + A back ref to the experiment used when calling + `NoOpRecordLauncher.create`. + + - `launched_order`: + An append-only list of `LaunchRecord`s that it has "started". Notice + that this launcher will not actually open any subprocesses/run any + threads/otherwise execute the contents of the record on the system + + - `ids_to_launched`: + A mapping where keys are the generated launched id returned from + a `NoOpRecordLauncher.start` call and the values are the + `LaunchRecord` that was passed into `NoOpRecordLauncher.start` to + cause the id to be generated. + + This is helpful for testing that launchers are handling the expected input + """ + + created_by_experiment: Experiment + launched_order: list[LaunchRecord] = dataclasses.field(default_factory=list) + ids_to_launched: dict[dispatch.LaunchedJobID, LaunchRecord] = dataclasses.field( + default_factory=dict + ) + + __hash__ = object.__hash__ + + @classmethod + def create(cls, exp): + return cls(exp) + + def start(self, record: LaunchRecord): + id_ = dispatch.create_job_id() + self.launched_order.append(record) + self.ids_to_launched[id_] = record + return id_ + + +@dataclasses.dataclass(frozen=True) +class LaunchRecord: + launch_args: launchArguments.LaunchArguments + entity: entity.SmartSimEntity + env: t.Mapping[str, str | None] + + @classmethod + def from_job(cls, job: job.Job): + """Create a launch record for what we would expect a launch record to + look like having gone through the launching process + + :param job: A job that has or will be launched through an experiment + and dispatched to a `NoOpRecordLauncher` + :returns: A `LaunchRecord` that should evaluate to being equivilient to + that of the one stored in the `NoOpRecordLauncher` + """ + args = job._launch_settings.launch_args + entity = job._entity + env = job._launch_settings.env_vars + return cls(args, entity, env) + + +class MockLaunchArgs(launchArguments.LaunchArguments): + """A `LaunchArguments` subclass that will evaluate as true with another if + and only if they were initialized with the same id. In practice this class + has no arguments to set. + """ + + def __init__(self, id_: int): + super().__init__({}) + self.id = id_ + + def __eq__(self, other): + if type(self) is not type(other): + return NotImplemented + return other.id == self.id + + def launcher_str(self): + return "test-launch-args" + + def set(self, arg, val): ... + + +class EchoHelloWorldEntity(entity.SmartSimEntity): + """A simple smartsim entity that meets the `ExecutableProtocol` protocol""" + + def __init__(self): + path = tempfile.TemporaryDirectory() + self._finalizer = weakref.finalize(self, path.cleanup) + super().__init__("test-entity", path, _mock.Mock()) + + def __eq__(self, other): + if type(self) is not type(other): + return NotImplemented + return self.as_program_arguments() == other.as_program_arguments() + + def as_program_arguments(self): + return ("echo", "Hello", "World!") + + +def test_start_raises_if_no_args_supplied(experiment): + with pytest.raises(TypeError, match="missing 1 required positional argument"): + experiment.start() + + +# fmt: off +@pytest.mark.parametrize( + "num_jobs", [pytest.param(i, id=f"{i} job(s)") for i in (1, 2, 3, 5, 10, 100, 1_000)] +) +@pytest.mark.parametrize( + "make_jobs", ( + pytest.param(lambda maker, n: tuple(maker() for _ in range(n)), id="many job instances"), + pytest.param(lambda maker, n: (maker(),) * n , id="same job instance many times"), + ), +) +# fmt: on +def test_start_can_launch_jobs( + experiment: Experiment, + job_maker: JobMakerType, + make_jobs: t.Callable[[JobMakerType, int], tuple[job.Job, ...]], + num_jobs: int, +) -> None: + jobs = make_jobs(job_maker, num_jobs) + assert len(experiment._active_launchers) == 0, "Initialized w/ launchers" + launched_ids = experiment.start(*jobs) + assert len(experiment._active_launchers) == 1, "Unexpected number of launchers" + (launcher,) = experiment._active_launchers + assert isinstance(launcher, NoOpRecordLauncher), "Unexpected launcher type" + assert launcher.created_by_experiment is experiment, "Not created by experiment" + assert ( + len(jobs) == len(launcher.launched_order) == len(launched_ids) == num_jobs + ), "Inconsistent number of jobs/launched jobs/launched ids/expected number of jobs" + expected_launched = [LaunchRecord.from_job(job) for job in jobs] + + # Check that `job_a, job_b, job_c, ...` are started in that order when + # calling `experiemnt.start(job_a, job_b, job_c, ...)` + assert expected_launched == list(launcher.launched_order), "Unexpected launch order" + + # Similarly, check that `id_a, id_b, id_c, ...` corresponds to + # `job_a, job_b, job_c, ...` when calling + # `id_a, id_b, id_c, ... = experiemnt.start(job_a, job_b, job_c, ...)` + expected_id_map = dict(zip(launched_ids, expected_launched)) + assert expected_id_map == launcher.ids_to_launched, "IDs returned in wrong order" + + +@pytest.mark.parametrize( + "num_starts", + [pytest.param(i, id=f"{i} start(s)") for i in (1, 2, 3, 5, 10, 100, 1_000)], +) +def test_start_can_start_a_job_multiple_times_accross_multiple_calls( + experiment: Experiment, job_maker: JobMakerType, num_starts: int +) -> None: + assert len(experiment._active_launchers) == 0, "Initialized w/ launchers" + job = job_maker() + ids_to_launches = { + experiment.start(job)[0]: LaunchRecord.from_job(job) for _ in range(num_starts) + } + assert len(experiment._active_launchers) == 1, "Did not reuse the launcher" + (launcher,) = experiment._active_launchers + assert isinstance(launcher, NoOpRecordLauncher), "Unexpected launcher type" + assert len(launcher.launched_order) == num_starts, "Unexpected number launches" + + # Check that a single `job` instance can be launched and re-launcherd and + # that `id_a, id_b, id_c, ...` corresponds to + # `"start_a", "start_b", "start_c", ...` when calling + # ```py + # id_a = experiment.start(job) # "start_a" + # id_b = experiment.start(job) # "start_b" + # id_c = experiment.start(job) # "start_c" + # ... + # ``` + assert ids_to_launches == launcher.ids_to_launched, "Job was not re-launched"