From 8319397432960cab1fed422030caf543cec39e61 Mon Sep 17 00:00:00 2001 From: Sam Partee Date: Sun, 21 Nov 2021 15:23:52 -0500 Subject: [PATCH 1/3] Experiment.create_batch_settings create_batch_settings is a portability improvement for users writing driver scripts. With this function, a user no longer needs to specify a batch settings class, the function will initialize a given class based on the launcher --- smartsim/experiment.py | 56 ++++++++++++++++++++++++++++- smartsim/settings/base.py | 10 ++++-- smartsim/settings/cobaltSettings.py | 4 ++- smartsim/settings/lsfSettings.py | 3 ++ smartsim/settings/settings.py | 54 ++++++++++++++++++++++++++++ smartsim/settings/slurmSettings.py | 22 ++++++++---- tests/test_batch_settings.py | 40 +++++++++++++++++++++ 7 files changed, 179 insertions(+), 10 deletions(-) create mode 100644 tests/test_batch_settings.py diff --git a/smartsim/experiment.py b/smartsim/experiment.py index 44a6a8b34..83445236d 100644 --- a/smartsim/experiment.py +++ b/smartsim/experiment.py @@ -24,8 +24,8 @@ # OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -import os.path as osp import time +import os.path as osp from os import getcwd from pprint import pformat @@ -388,6 +388,60 @@ class in SmartSim. If found, the class corresponding logger.error(e) raise + def create_batch_settings( + self, nodes=1, time="", queue="", account="", batch_args=None, **kwargs + ): + """Create a ``BatchSettings`` instance + + Batch settings parameterize batch workloads. The result of this + function can be passed to the ``Ensemble`` initialization. + + the `batch_args` parameter can be used to pass in a dictionary + of additional batch command arguments that aren't supported through + the smartsim interface + + + .. highlight:: python + .. code-block:: python + + # i.e. for Slurm + batch_args = { + "distribution": "block" + "exclusive": None + } + bs = exp.create_batch_settings(nodes=3, + time="10:00:00", + batch_args=batch_args) + bs.set_account("default") + + :param nodes: number of nodes for batch job, defaults to 1 + :type nodes: int, optional + :param time: length of batch job, defaults to "" + :type time: str, optional + :param queue: queue or partition (if slurm), defaults to "" + :type queue: str, optional + :param account: user account name for batch system, defaults to "" + :type account: str, optional + :param batch_args: additional batch arguments, defaults to None + :type batch_args: dict[str, str], optional + :return: a newly created BatchSettings instance + :rtype: BatchSettings + :raises SmartSimError: if batch creation fails + """ + try: + return settings.create_batch_settings( + self._launcher, + nodes=nodes, + time=time, + queue=queue, + account=account, + batch_args=batch_args, + **kwargs, + ) + except SmartSimError as e: + logger.error(e) + raise + def reconnect_orchestrator(self, checkpoint): """Reconnect to a running ``Orchestrator`` diff --git a/smartsim/settings/base.py b/smartsim/settings/base.py index 2b119e999..01d74ea6a 100644 --- a/smartsim/settings/base.py +++ b/smartsim/settings/base.py @@ -226,10 +226,16 @@ def batch_cmd(self): def set_nodes(self, num_nodes): raise NotImplementedError + def set_hostlist(self, host_list): + raise NotImplementedError + + def set_queue(self, queue): + raise NotImplementedError + def set_walltime(self, walltime): raise NotImplementedError - def set_account(self, acct): + def set_account(self, account): raise NotImplementedError def format_batch_args(self): @@ -257,7 +263,7 @@ def add_preamble(self, lines): elif isinstance(lines, list): self._preamble += lines else: - raise TypeError + raise TypeError("Expected str or List[str] for lines argument") def __str__(self): string = f"Batch Command: {self._batch_cmd}\n" diff --git a/smartsim/settings/cobaltSettings.py b/smartsim/settings/cobaltSettings.py index 097a7bd48..e70f0b5e6 100644 --- a/smartsim/settings/cobaltSettings.py +++ b/smartsim/settings/cobaltSettings.py @@ -28,7 +28,9 @@ class CobaltBatchSettings(BatchSettings): - def __init__(self, nodes=None, time="", queue=None, account=None, batch_args=None): + def __init__( + self, nodes=None, time="", queue=None, account=None, batch_args=None, **kwargs + ): """Specify settings for a Cobalt ``qsub`` batch launch If the argument doesn't have a parameter, put None diff --git a/smartsim/settings/lsfSettings.py b/smartsim/settings/lsfSettings.py index 36f6b26e8..a6d84b955 100644 --- a/smartsim/settings/lsfSettings.py +++ b/smartsim/settings/lsfSettings.py @@ -346,6 +346,9 @@ def set_project(self, project): """ self.project = project + def set_account(self, account): + self.project = account + def set_nodes(self, num_nodes): """Set the number of nodes for this batch job diff --git a/smartsim/settings/settings.py b/smartsim/settings/settings.py index eeb0aa96a..e8fccb1a7 100644 --- a/smartsim/settings/settings.py +++ b/smartsim/settings/settings.py @@ -29,6 +29,60 @@ from . import * +def create_batch_settings( + launcher, nodes=None, time="", queue=None, account=None, batch_args=None, **kwargs +): + """Create a ``BatchSettings`` instance + + See Experiment.create_batch_settings for details + + :param launcher: launcher for this experiment + :type launcher: str + :param nodes: number of nodes for batch job, defaults to 1 + :type nodes: int, optional + :param time: length of batch job, defaults to "" + :type time: str, optional + :param queue: queue or partition (if slurm), defaults to "" + :type queue: str, optional + :param account: user account name for batch system, defaults to "" + :type account: str, optional + :param batch_args: additional batch arguments, defaults to None + :type batch_args: dict[str, str], optional + :return: a newly created BatchSettings instance + :rtype: BatchSettings + :raises SmartSimError: if batch creation fails + """ + # all supported batch class implementations + by_launcher = { + "cobalt": CobaltBatchSettings, + "pbs": QsubBatchSettings, + "slurm": SbatchSettings, + "lsf": BsubBatchSettings, + } + + if launcher == "local": + raise SmartSimError("Local launcher does not support batch workloads") + + # detect the batch class to use based on the launcher provided by + # the user + try: + batch_class = by_launcher[launcher] + batch_settings = batch_class( + nodes=nodes, time=time, batch_args=batch_args, **kwargs + ) + # put these two in the init once classes like BsubBatch are unified + if queue: + batch_settings.set_queue(queue) + if account: + batch_settings.set_account(account) + return batch_settings + + except KeyError: + raise SmartSimError( + f"User attempted to make batch settings for unsupported launcher {launcher}" + ) from None + + def create_run_settings( launcher, exe, diff --git a/smartsim/settings/slurmSettings.py b/smartsim/settings/slurmSettings.py index 51f586e1a..611e3b253 100644 --- a/smartsim/settings/slurmSettings.py +++ b/smartsim/settings/slurmSettings.py @@ -203,7 +203,7 @@ def add_env_var(var, format_str): class SbatchSettings(BatchSettings): - def __init__(self, nodes=None, time="", account=None, batch_args=None): + def __init__(self, nodes=None, time="", account=None, batch_args=None, **kwargs): """Specify run parameters for a Slurm batch job Slurm `sbatch` arguments can be written into ``batch_args`` @@ -240,7 +240,7 @@ def set_walltime(self, walltime): :param walltime: wall time :type walltime: str """ - # TODO check for errors here + # TODO check for formatting here self.batch_args["time"] = walltime def set_nodes(self, num_nodes): @@ -251,13 +251,13 @@ def set_nodes(self, num_nodes): """ self.batch_args["nodes"] = int(num_nodes) - def set_account(self, acct): + def set_account(self, account): """Set the account for this batch job - :param acct: account id - :type acct: str + :param account: account id + :type account: str """ - self.batch_args["account"] = acct + self.batch_args["account"] = account def set_partition(self, partition): """Set the partition for the batch job @@ -267,6 +267,16 @@ def set_partition(self, partition): """ self.batch_args["partition"] = str(partition) + def set_queue(self, queue): + """alias for set_partition + + Sets the partition for the slurm batch job + + :param queue: the partition to run the batch job on + :type queue: str + """ + self.set_partition(queue) + def set_hostlist(self, host_list): """Specify the hostlist for this job diff --git a/tests/test_batch_settings.py b/tests/test_batch_settings.py new file mode 100644 index 000000000..879171d02 --- /dev/null +++ b/tests/test_batch_settings.py @@ -0,0 +1,40 @@ +from smartsim.settings.settings import create_batch_settings + + +def test_create_pbs_batch(): + pbs_batch = create_batch_settings( + "pbs", nodes=1, time="10:00:00", queue="default", account="myproject", ncpus=10 + ) # test that kwargs make it to class init + args = pbs_batch.format_batch_args() + assert args == [ + "-l select=1:ncpus=10", + "-l place=scatter", + "-l walltime=10:00:00", + "-q default", + "-A myproject", + ] + + +def test_create_sbatch(): + batch_args = {"exclusive": None, "oversubscribe": None} + slurm_batch = create_batch_settings( + "slurm", + nodes=1, + time="10:00:00", + queue="default", # actually sets partition + account="myproject", + batch_args=batch_args, + ncpus=10, + ) # test that kwargs from + # pbs doesn't effect slurm (im thinking this will be common) + + assert slurm_batch.batch_args["partition"] == "default" + args = slurm_batch.format_batch_args() + assert args == [ + "--exclusive", + "--oversubscribe", + "--nodes=1", + "--time=10:00:00", + "--partition=default", + "--account=myproject", + ] From fe7429a31c43e3b3e40365973883530f17d21005 Mon Sep 17 00:00:00 2001 From: Sam Partee Date: Fri, 3 Dec 2021 17:08:16 -0600 Subject: [PATCH 2/3] Add generic batch test with create_batch_settings Before, there was a single generic batch test for Slurm and little coverage of other WLM types. With the addition of the portable create_batch_settings function, this test will now execute on any of the supported launchers. --- ..._slurm_batch.py => test_generic_batch_launch.py} | 13 ++++++------- tests/test_configs/cov/local_cov.cfg | 1 + 2 files changed, 7 insertions(+), 7 deletions(-) rename tests/full_wlm/{test_launch_slurm_batch.py => test_generic_batch_launch.py} (80%) diff --git a/tests/full_wlm/test_launch_slurm_batch.py b/tests/full_wlm/test_generic_batch_launch.py similarity index 80% rename from tests/full_wlm/test_launch_slurm_batch.py rename to tests/full_wlm/test_generic_batch_launch.py index 3679511b4..2ec817b51 100644 --- a/tests/full_wlm/test_launch_slurm_batch.py +++ b/tests/full_wlm/test_generic_batch_launch.py @@ -1,17 +1,16 @@ import pytest from smartsim import Experiment, constants -from smartsim.settings import SbatchSettings # retrieved from pytest fixtures -if pytest.test_launcher != "slurm": - pytestmark = pytest.mark.skip(reason="Test is only for Slurm WLM systems") +if pytest.test_launcher not in pytest.wlm_options: + pytestmark = pytest.mark.skip(reason="Not testing WLM integrations") def test_batch_ensemble(fileutils, wlmutils): """Test the launch of a manually constructed batch ensemble""" - exp_name = "test-slurm-batch-ensemble" + exp_name = "test-batch-ensemble" exp = Experiment(exp_name, launcher=wlmutils.get_test_launcher()) test_dir = fileutils.make_test_dir(exp_name) @@ -20,7 +19,7 @@ def test_batch_ensemble(fileutils, wlmutils): M1 = exp.create_model("m1", path=test_dir, run_settings=settings) M2 = exp.create_model("m2", path=test_dir, run_settings=settings) - batch = SbatchSettings(nodes=2, time="00:01:00") + batch = exp.create_batch_settings(nodes=1, time="00:01:00") ensemble = exp.create_ensemble("batch-ens", batch_settings=batch) ensemble.add_model(M1) ensemble.add_model(M2) @@ -32,14 +31,14 @@ def test_batch_ensemble(fileutils, wlmutils): def test_batch_ensemble_replicas(fileutils, wlmutils): - exp_name = "test-slurm-batch-ensemble-replicas" + exp_name = "test-batch-ensemble-replicas" exp = Experiment(exp_name, launcher=wlmutils.get_test_launcher()) test_dir = fileutils.make_test_dir(exp_name) script = fileutils.get_test_conf_path("sleep.py") settings = wlmutils.get_run_settings("python", f"{script} --time=5") - batch = SbatchSettings(nodes=2, time="00:01:00") + batch = exp.create_batch_settings(nodes=1, time="00:01:00") ensemble = exp.create_ensemble( "batch-ens-replicas", batch_settings=batch, run_settings=settings, replicas=2 ) diff --git a/tests/test_configs/cov/local_cov.cfg b/tests/test_configs/cov/local_cov.cfg index 8deae0a65..950ca35d2 100644 --- a/tests/test_configs/cov/local_cov.cfg +++ b/tests/test_configs/cov/local_cov.cfg @@ -6,6 +6,7 @@ omit = *mpirun* *alps* *lsf* + *redis_starter.py* [report] exclude_lines= From 6502f0a7e1b284addf86b1b3a91764e7cf805bc3 Mon Sep 17 00:00:00 2001 From: Sam Partee Date: Fri, 3 Dec 2021 17:38:34 -0600 Subject: [PATCH 3/3] Add LSF test to test_create_batch Added ``set_queue`` to BsubBatchSettings and added a test for LSF --- smartsim/settings/lsfSettings.py | 12 +++++++++++- tests/test_batch_settings.py | 23 ++++++++++++++++++++++- 2 files changed, 33 insertions(+), 2 deletions(-) diff --git a/smartsim/settings/lsfSettings.py b/smartsim/settings/lsfSettings.py index a6d84b955..f29ff7205 100644 --- a/smartsim/settings/lsfSettings.py +++ b/smartsim/settings/lsfSettings.py @@ -324,6 +324,16 @@ def set_walltime(self, time): """ self.walltime = time + def set_queue(self, queue): + """Set the queue + + This sets ``-q``. + + :param queue: queue name + :type queue: str + """ + self.batch_args["q"] = queue + def set_smts(self, smts): """Set SMTs @@ -421,7 +431,7 @@ def _format_alloc_flags(self): def format_batch_args(self): """Get the formatted batch arguments for a preview - :return: list of batch arguments for Qsub + :return: list of batch arguments for bsub :rtype: list[str] """ opts = [] diff --git a/tests/test_batch_settings.py b/tests/test_batch_settings.py index 879171d02..bc224dff9 100644 --- a/tests/test_batch_settings.py +++ b/tests/test_batch_settings.py @@ -1,11 +1,16 @@ +from smartsim.settings import ( + QsubBatchSettings, + SbatchSettings, + BsubBatchSettings +) from smartsim.settings.settings import create_batch_settings - def test_create_pbs_batch(): pbs_batch = create_batch_settings( "pbs", nodes=1, time="10:00:00", queue="default", account="myproject", ncpus=10 ) # test that kwargs make it to class init args = pbs_batch.format_batch_args() + assert(isinstance(pbs_batch, QsubBatchSettings)) assert args == [ "-l select=1:ncpus=10", "-l place=scatter", @@ -28,6 +33,7 @@ def test_create_sbatch(): ) # test that kwargs from # pbs doesn't effect slurm (im thinking this will be common) + assert(isinstance(slurm_batch, SbatchSettings)) assert slurm_batch.batch_args["partition"] == "default" args = slurm_batch.format_batch_args() assert args == [ @@ -38,3 +44,18 @@ def test_create_sbatch(): "--partition=default", "--account=myproject", ] + + +def test_create_bsub(): + batch_args = {"core_isolation": None} + bsub = create_batch_settings( + "lsf", + nodes=1, + time="10:00:00", + account="myproject", # test that account is set + queue="default", + batch_args=batch_args + ) + assert(isinstance(bsub, BsubBatchSettings)) + args = bsub.format_batch_args() + assert(args == ['-core_isolation', '-nnodes 1', '-q default'])