Skip to content

Commit

Permalink
create_batch_settings (#93)
Browse files Browse the repository at this point in the history
Portable experiment API function for creating batch
settings within SmartSim.

[ committed by @Spartee ]
[ reviewed @al-rigazzi ]
  • Loading branch information
Sam Partee authored Dec 6, 2021
2 parents 0248ffe + 6502f0a commit 79feee5
Show file tree
Hide file tree
Showing 9 changed files with 218 additions and 18 deletions.
56 changes: 55 additions & 1 deletion smartsim/experiment.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,8 +24,8 @@
# OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

import os.path as osp
import time
import os.path as osp
from os import getcwd
from pprint import pformat

Expand Down Expand Up @@ -388,6 +388,60 @@ class in SmartSim. If found, the class corresponding
logger.error(e)
raise

def create_batch_settings(
self, nodes=1, time="", queue="", account="", batch_args=None, **kwargs
):
"""Create a ``BatchSettings`` instance
Batch settings parameterize batch workloads. The result of this
function can be passed to the ``Ensemble`` initialization.
the `batch_args` parameter can be used to pass in a dictionary
of additional batch command arguments that aren't supported through
the smartsim interface
.. highlight:: python
.. code-block:: python
# i.e. for Slurm
batch_args = {
"distribution": "block"
"exclusive": None
}
bs = exp.create_batch_settings(nodes=3,
time="10:00:00",
batch_args=batch_args)
bs.set_account("default")
:param nodes: number of nodes for batch job, defaults to 1
:type nodes: int, optional
:param time: length of batch job, defaults to ""
:type time: str, optional
:param queue: queue or partition (if slurm), defaults to ""
:type queue: str, optional
:param account: user account name for batch system, defaults to ""
:type account: str, optional
:param batch_args: additional batch arguments, defaults to None
:type batch_args: dict[str, str], optional
:return: a newly created BatchSettings instance
:rtype: BatchSettings
:raises SmartSimError: if batch creation fails
"""
try:
return settings.create_batch_settings(
self._launcher,
nodes=nodes,
time=time,
queue=queue,
account=account,
batch_args=batch_args,
**kwargs,
)
except SmartSimError as e:
logger.error(e)
raise

def reconnect_orchestrator(self, checkpoint):
"""Reconnect to a running ``Orchestrator``
Expand Down
10 changes: 8 additions & 2 deletions smartsim/settings/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -226,10 +226,16 @@ def batch_cmd(self):
def set_nodes(self, num_nodes):
raise NotImplementedError

def set_hostlist(self, host_list):
raise NotImplementedError

def set_queue(self, queue):
raise NotImplementedError

def set_walltime(self, walltime):
raise NotImplementedError

def set_account(self, acct):
def set_account(self, account):
raise NotImplementedError

def format_batch_args(self):
Expand Down Expand Up @@ -257,7 +263,7 @@ def add_preamble(self, lines):
elif isinstance(lines, list):
self._preamble += lines
else:
raise TypeError
raise TypeError("Expected str or List[str] for lines argument")

def __str__(self):
string = f"Batch Command: {self._batch_cmd}\n"
Expand Down
4 changes: 3 additions & 1 deletion smartsim/settings/cobaltSettings.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,9 @@


class CobaltBatchSettings(BatchSettings):
def __init__(self, nodes=None, time="", queue=None, account=None, batch_args=None):
def __init__(
self, nodes=None, time="", queue=None, account=None, batch_args=None, **kwargs
):
"""Specify settings for a Cobalt ``qsub`` batch launch
If the argument doesn't have a parameter, put None
Expand Down
15 changes: 14 additions & 1 deletion smartsim/settings/lsfSettings.py
Original file line number Diff line number Diff line change
Expand Up @@ -324,6 +324,16 @@ def set_walltime(self, time):
"""
self.walltime = time

def set_queue(self, queue):
"""Set the queue
This sets ``-q``.
:param queue: queue name
:type queue: str
"""
self.batch_args["q"] = queue

def set_smts(self, smts):
"""Set SMTs
Expand All @@ -346,6 +356,9 @@ def set_project(self, project):
"""
self.project = project

def set_account(self, account):
self.project = account

def set_nodes(self, num_nodes):
"""Set the number of nodes for this batch job
Expand Down Expand Up @@ -418,7 +431,7 @@ def _format_alloc_flags(self):
def format_batch_args(self):
"""Get the formatted batch arguments for a preview
:return: list of batch arguments for Qsub
:return: list of batch arguments for bsub
:rtype: list[str]
"""
opts = []
Expand Down
54 changes: 54 additions & 0 deletions smartsim/settings/settings.py
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,60 @@
from . import *


def create_batch_settings(
launcher, nodes=None, time="", queue=None, account=None, batch_args=None, **kwargs
):
"""Create a ``BatchSettings`` instance
See Experiment.create_batch_settings for details
:param launcher: launcher for this experiment
:type launcher: str
:param nodes: number of nodes for batch job, defaults to 1
:type nodes: int, optional
:param time: length of batch job, defaults to ""
:type time: str, optional
:param queue: queue or partition (if slurm), defaults to ""
:type queue: str, optional
:param account: user account name for batch system, defaults to ""
:type account: str, optional
:param batch_args: additional batch arguments, defaults to None
:type batch_args: dict[str, str], optional
:return: a newly created BatchSettings instance
:rtype: BatchSettings
:raises SmartSimError: if batch creation fails
"""
# all supported batch class implementations
by_launcher = {
"cobalt": CobaltBatchSettings,
"pbs": QsubBatchSettings,
"slurm": SbatchSettings,
"lsf": BsubBatchSettings,
}

if launcher == "local":
raise SmartSimError("Local launcher does not support batch workloads")

# detect the batch class to use based on the launcher provided by
# the user
try:
batch_class = by_launcher[launcher]
batch_settings = batch_class(
nodes=nodes, time=time, batch_args=batch_args, **kwargs
)
# put these two in the init once classes like BsubBatch are unified
if queue:
batch_settings.set_queue(queue)
if account:
batch_settings.set_account(account)
return batch_settings

except KeyError:
raise SmartSimError(
f"User attempted to make batch settings for unsupported launcher {launcher}"
) from None


def create_run_settings(
launcher,
exe,
Expand Down
22 changes: 16 additions & 6 deletions smartsim/settings/slurmSettings.py
Original file line number Diff line number Diff line change
Expand Up @@ -203,7 +203,7 @@ def add_env_var(var, format_str):


class SbatchSettings(BatchSettings):
def __init__(self, nodes=None, time="", account=None, batch_args=None):
def __init__(self, nodes=None, time="", account=None, batch_args=None, **kwargs):
"""Specify run parameters for a Slurm batch job
Slurm `sbatch` arguments can be written into ``batch_args``
Expand Down Expand Up @@ -240,7 +240,7 @@ def set_walltime(self, walltime):
:param walltime: wall time
:type walltime: str
"""
# TODO check for errors here
# TODO check for formatting here
self.batch_args["time"] = walltime

def set_nodes(self, num_nodes):
Expand All @@ -251,13 +251,13 @@ def set_nodes(self, num_nodes):
"""
self.batch_args["nodes"] = int(num_nodes)

def set_account(self, acct):
def set_account(self, account):
"""Set the account for this batch job
:param acct: account id
:type acct: str
:param account: account id
:type account: str
"""
self.batch_args["account"] = acct
self.batch_args["account"] = account

def set_partition(self, partition):
"""Set the partition for the batch job
Expand All @@ -267,6 +267,16 @@ def set_partition(self, partition):
"""
self.batch_args["partition"] = str(partition)

def set_queue(self, queue):
"""alias for set_partition
Sets the partition for the slurm batch job
:param queue: the partition to run the batch job on
:type queue: str
"""
self.set_partition(queue)

def set_hostlist(self, host_list):
"""Specify the hostlist for this job
Expand Down
Original file line number Diff line number Diff line change
@@ -1,17 +1,16 @@
import pytest

from smartsim import Experiment, constants
from smartsim.settings import SbatchSettings

# retrieved from pytest fixtures
if pytest.test_launcher != "slurm":
pytestmark = pytest.mark.skip(reason="Test is only for Slurm WLM systems")
if pytest.test_launcher not in pytest.wlm_options:
pytestmark = pytest.mark.skip(reason="Not testing WLM integrations")


def test_batch_ensemble(fileutils, wlmutils):
"""Test the launch of a manually constructed batch ensemble"""

exp_name = "test-slurm-batch-ensemble"
exp_name = "test-batch-ensemble"
exp = Experiment(exp_name, launcher=wlmutils.get_test_launcher())
test_dir = fileutils.make_test_dir(exp_name)

Expand All @@ -20,7 +19,7 @@ def test_batch_ensemble(fileutils, wlmutils):
M1 = exp.create_model("m1", path=test_dir, run_settings=settings)
M2 = exp.create_model("m2", path=test_dir, run_settings=settings)

batch = SbatchSettings(nodes=2, time="00:01:00")
batch = exp.create_batch_settings(nodes=1, time="00:01:00")
ensemble = exp.create_ensemble("batch-ens", batch_settings=batch)
ensemble.add_model(M1)
ensemble.add_model(M2)
Expand All @@ -32,14 +31,14 @@ def test_batch_ensemble(fileutils, wlmutils):


def test_batch_ensemble_replicas(fileutils, wlmutils):
exp_name = "test-slurm-batch-ensemble-replicas"
exp_name = "test-batch-ensemble-replicas"
exp = Experiment(exp_name, launcher=wlmutils.get_test_launcher())
test_dir = fileutils.make_test_dir(exp_name)

script = fileutils.get_test_conf_path("sleep.py")
settings = wlmutils.get_run_settings("python", f"{script} --time=5")

batch = SbatchSettings(nodes=2, time="00:01:00")
batch = exp.create_batch_settings(nodes=1, time="00:01:00")
ensemble = exp.create_ensemble(
"batch-ens-replicas", batch_settings=batch, run_settings=settings, replicas=2
)
Expand Down
61 changes: 61 additions & 0 deletions tests/test_batch_settings.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,61 @@
from smartsim.settings import (
QsubBatchSettings,
SbatchSettings,
BsubBatchSettings
)
from smartsim.settings.settings import create_batch_settings

def test_create_pbs_batch():
pbs_batch = create_batch_settings(
"pbs", nodes=1, time="10:00:00", queue="default", account="myproject", ncpus=10
) # test that kwargs make it to class init
args = pbs_batch.format_batch_args()
assert(isinstance(pbs_batch, QsubBatchSettings))
assert args == [
"-l select=1:ncpus=10",
"-l place=scatter",
"-l walltime=10:00:00",
"-q default",
"-A myproject",
]


def test_create_sbatch():
batch_args = {"exclusive": None, "oversubscribe": None}
slurm_batch = create_batch_settings(
"slurm",
nodes=1,
time="10:00:00",
queue="default", # actually sets partition
account="myproject",
batch_args=batch_args,
ncpus=10,
) # test that kwargs from
# pbs doesn't effect slurm (im thinking this will be common)

assert(isinstance(slurm_batch, SbatchSettings))
assert slurm_batch.batch_args["partition"] == "default"
args = slurm_batch.format_batch_args()
assert args == [
"--exclusive",
"--oversubscribe",
"--nodes=1",
"--time=10:00:00",
"--partition=default",
"--account=myproject",
]


def test_create_bsub():
batch_args = {"core_isolation": None}
bsub = create_batch_settings(
"lsf",
nodes=1,
time="10:00:00",
account="myproject", # test that account is set
queue="default",
batch_args=batch_args
)
assert(isinstance(bsub, BsubBatchSettings))
args = bsub.format_batch_args()
assert(args == ['-core_isolation', '-nnodes 1', '-q default'])
1 change: 1 addition & 0 deletions tests/test_configs/cov/local_cov.cfg
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@ omit =
*mpirun*
*alps*
*lsf*
*redis_starter.py*

[report]
exclude_lines=
Expand Down

0 comments on commit 79feee5

Please sign in to comment.