Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Renaming Model to Application #579

Merged
merged 14 commits into from
May 30, 2024
Merged
79 changes: 42 additions & 37 deletions conftest.py
Original file line number Diff line number Diff line change
Expand Up @@ -56,7 +56,7 @@
from smartsim._core.config.config import Config
from smartsim._core.utils.telemetry.telemetry import JobEntity
from smartsim.database import Orchestrator
from smartsim.entity import Model
from smartsim.entity import Application
from smartsim.error import SSConfigError, SSInternalError
from smartsim.log import get_logger
from smartsim.settings import (
Expand Down Expand Up @@ -93,6 +93,7 @@
test_hostlist = None
has_aprun = shutil.which("aprun") is not None


def get_account() -> str:
return test_account

Expand Down Expand Up @@ -141,7 +142,7 @@ def pytest_sessionstart(
time.sleep(0.1)

if CONFIG.dragon_server_path is None:
dragon_server_path = os.path.join(test_output_root, "dragon_server")
dragon_server_path = os.path.join(test_output_root, "dragon_server")
os.makedirs(dragon_server_path)
os.environ["SMARTSIM_DRAGON_SERVER_PATH"] = dragon_server_path

Expand Down Expand Up @@ -183,7 +184,7 @@ def build_mpi_app() -> t.Optional[pathlib.Path]:
if cc is None:
return None

path_to_src = pathlib.Path(FileUtils().get_test_conf_path("mpi"))
path_to_src = pathlib.Path(FileUtils().get_test_conf_path("mpi"))
path_to_out = pathlib.Path(test_output_root) / "apps" / "mpi_app"
os.makedirs(path_to_out.parent, exist_ok=True)
cmd = [cc, str(path_to_src / "mpi_hello.c"), "-o", str(path_to_out)]
Expand All @@ -194,11 +195,12 @@ def build_mpi_app() -> t.Optional[pathlib.Path]:
else:
return None


@pytest.fixture(scope="session")
def mpi_app_path() -> t.Optional[pathlib.Path]:
"""Return path to MPI app if it was built

return None if it could not or will not be built
return None if it could not or will not be built
"""
if not CONFIG.test_mpi:
return None
Expand Down Expand Up @@ -227,7 +229,6 @@ def kill_all_test_spawned_processes() -> None:
print("Not all processes were killed after test")



def get_hostlist() -> t.Optional[t.List[str]]:
global test_hostlist
if not test_hostlist:
Expand Down Expand Up @@ -654,10 +655,10 @@ def setup_test_colo(
application_file: str,
db_args: t.Dict[str, t.Any],
colo_settings: t.Optional[RunSettings] = None,
colo_model_name: str = "colocated_model",
colo_application_name: str = "colocated_application",
port: t.Optional[int] = None,
on_wlm: bool = False,
) -> Model:
) -> Application:
"""Setup database needed for the colo pinning tests"""

# get test setup
Expand All @@ -672,31 +673,31 @@ def setup_test_colo(
colo_settings.set_tasks(1)
colo_settings.set_nodes(1)

colo_model = exp.create_model(colo_model_name, colo_settings)
colo_application = exp.create_application(colo_application_name, colo_settings)

if db_type in ["tcp", "deprecated"]:
db_args["port"] = port if port is not None else _find_free_port(test_ports)
db_args["ifname"] = "lo"
if db_type == "uds" and colo_model_name is not None:
if db_type == "uds" and colo_application_name is not None:
tmp_dir = tempfile.gettempdir()
socket_suffix = str(uuid.uuid4())[:7]
socket_name = f"{colo_model_name}_{socket_suffix}.socket"
socket_name = f"{colo_application_name}_{socket_suffix}.socket"
db_args["unix_socket"] = os.path.join(tmp_dir, socket_name)

colocate_fun: t.Dict[str, t.Callable[..., None]] = {
"tcp": colo_model.colocate_db_tcp,
"deprecated": colo_model.colocate_db,
"uds": colo_model.colocate_db_uds,
"tcp": colo_application.colocate_db_tcp,
"deprecated": colo_application.colocate_db,
"uds": colo_application.colocate_db_uds,
}
with warnings.catch_warnings():
if db_type == "deprecated":
message = "`colocate_db` has been deprecated"
warnings.filterwarnings("ignore", message=message)
colocate_fun[db_type](**db_args)
# assert model will launch with colocated db
assert colo_model.colocated
# assert application will launch with colocated db
assert colo_application.colocated
# Check to make sure that limit_db_cpus made it into the colo settings
return colo_model
return colo_application


@pytest.fixture(scope="function")
Expand All @@ -708,7 +709,9 @@ def global_dragon_teardown() -> None:
"""
if test_launcher != "dragon" or CONFIG.dragon_server_path is None:
return
logger.debug(f"Tearing down Dragon infrastructure, server path: {CONFIG.dragon_server_path}")
logger.debug(
f"Tearing down Dragon infrastructure, server path: {CONFIG.dragon_server_path}"
)
dragon_connector = DragonConnector()
dragon_connector.ensure_connected()
dragon_connector.cleanup()
Expand Down Expand Up @@ -875,26 +878,30 @@ def num_calls(self) -> int:
def details(self) -> t.List[t.Tuple[t.Tuple[t.Any, ...], t.Dict[str, t.Any]]]:
return self._details


## Reuse database across tests

database_registry: t.DefaultDict[str, t.Optional[Orchestrator]] = defaultdict(lambda: None)
database_registry: t.DefaultDict[str, t.Optional[Orchestrator]] = defaultdict(
lambda: None
)


@pytest.fixture(scope="function")
def local_experiment(test_dir: str) -> smartsim.Experiment:
"""Create a default experiment that uses the requested launcher"""
name = pathlib.Path(test_dir).stem
return smartsim.Experiment(name, exp_path=test_dir, launcher="local")


@pytest.fixture(scope="function")
def wlm_experiment(test_dir: str, wlmutils: WLMUtils) -> smartsim.Experiment:
"""Create a default experiment that uses the requested launcher"""
name = pathlib.Path(test_dir).stem
return smartsim.Experiment(
name,
exp_path=test_dir,
launcher=wlmutils.get_test_launcher()
name, exp_path=test_dir, launcher=wlmutils.get_test_launcher()
)


def _cleanup_db(name: str) -> None:
global database_registry
db = database_registry[name]
Expand All @@ -906,19 +913,22 @@ def _cleanup_db(name: str) -> None:
except:
pass


@dataclass
class DBConfiguration:
name: str
launcher: str
num_nodes: int
interface: t.Union[str,t.List[str]]
interface: t.Union[str, t.List[str]]
hostlist: t.Optional[t.List[str]]
port: int


@dataclass
class PrepareDatabaseOutput:
orchestrator: t.Optional[Orchestrator] # The actual orchestrator object
new_db: bool # True if a new database was created when calling prepare_db
orchestrator: t.Optional[Orchestrator] # The actual orchestrator object
new_db: bool # True if a new database was created when calling prepare_db


# Reuse databases
@pytest.fixture(scope="session")
Expand All @@ -935,6 +945,7 @@ def local_db() -> t.Generator[DBConfiguration, None, None]:
yield config
_cleanup_db(name)


@pytest.fixture(scope="session")
def single_db(wlmutils: WLMUtils) -> t.Generator[DBConfiguration, None, None]:
hostlist = wlmutils.get_test_hostlist()
Expand All @@ -946,7 +957,7 @@ def single_db(wlmutils: WLMUtils) -> t.Generator[DBConfiguration, None, None]:
1,
wlmutils.get_test_interface(),
hostlist,
_find_free_port(tuple(reversed(test_ports)))
_find_free_port(tuple(reversed(test_ports))),
)
yield config
_cleanup_db(name)
Expand All @@ -971,9 +982,7 @@ def clustered_db(wlmutils: WLMUtils) -> t.Generator[DBConfiguration, None, None]

@pytest.fixture
def register_new_db() -> t.Callable[[DBConfiguration], Orchestrator]:
def _register_new_db(
config: DBConfiguration
) -> Orchestrator:
def _register_new_db(config: DBConfiguration) -> Orchestrator:
exp_path = pathlib.Path(test_output_root, config.name)
exp_path.mkdir(exist_ok=True)
exp = Experiment(
Expand All @@ -986,26 +995,21 @@ def _register_new_db(
batch=False,
interface=config.interface,
hosts=config.hostlist,
db_nodes=config.num_nodes
db_nodes=config.num_nodes,
)
exp.generate(orc, overwrite=True)
exp.start(orc)
global database_registry
database_registry[config.name] = orc
return orc

return _register_new_db


@pytest.fixture(scope="function")
def prepare_db(
register_new_db: t.Callable[
[DBConfiguration],
Orchestrator
]
) -> t.Callable[
[DBConfiguration],
PrepareDatabaseOutput
]:
register_new_db: t.Callable[[DBConfiguration], Orchestrator]
) -> t.Callable[[DBConfiguration], PrepareDatabaseOutput]:
def _prepare_db(db_config: DBConfiguration) -> PrepareDatabaseOutput:
global database_registry
db = database_registry[db_config.name]
Expand All @@ -1021,4 +1025,5 @@ def _prepare_db(db_config: DBConfiguration) -> PrepareDatabaseOutput:
new_db = True

return PrepareDatabaseOutput(db, new_db)

return _prepare_db
6 changes: 6 additions & 0 deletions doc/api/smartsim_api.rst
Original file line number Diff line number Diff line change
Expand Up @@ -91,6 +91,7 @@ launches that utilize a parallel launch binary like

.. autosummary::

RunSettings.add_exe_args
RunSettings.update_env

.. autoclass:: RunSettings
Expand Down Expand Up @@ -120,6 +121,7 @@ steps to a batch.
SrunSettings.set_hostlist
SrunSettings.set_excluded_hosts
SrunSettings.set_cpus_per_task
SrunSettings.add_exe_args
SrunSettings.format_run_args
SrunSettings.format_env_vars
SrunSettings.update_env
Expand Down Expand Up @@ -151,6 +153,7 @@ and within batch launches (e.g., ``QsubBatchSettings``)
AprunSettings.set_tasks
AprunSettings.set_tasks_per_node
AprunSettings.make_mpmd
AprunSettings.add_exe_args
AprunSettings.format_run_args
AprunSettings.format_env_vars
AprunSettings.update_env
Expand Down Expand Up @@ -236,6 +239,7 @@ supported on Slurm and PBSpro.
MpirunSettings.set_tasks
MpirunSettings.set_task_map
MpirunSettings.make_mpmd
MpirunSettings.add_exe_args
MpirunSettings.format_run_args
MpirunSettings.format_env_vars
MpirunSettings.update_env
Expand All @@ -262,6 +266,7 @@ supported on Slurm and PBSpro.
MpiexecSettings.set_tasks
MpiexecSettings.set_task_map
MpiexecSettings.make_mpmd
MpiexecSettings.add_exe_args
MpiexecSettings.format_run_args
MpiexecSettings.format_env_vars
MpiexecSettings.update_env
Expand All @@ -288,6 +293,7 @@ supported on Slurm and PBSpro.
OrterunSettings.set_tasks
OrterunSettings.set_task_map
OrterunSettings.make_mpmd
OrterunSettings.add_exe_args
OrterunSettings.format_run_args
OrterunSettings.format_env_vars
OrterunSettings.update_env
Expand Down
1 change: 0 additions & 1 deletion doc/changelog.md
Original file line number Diff line number Diff line change
Expand Up @@ -31,7 +31,6 @@ Detailed Notes
([SmartSim-PR601](https://github.com/CrayLabs/SmartSim/pull/601))
- Fix packaging failures due to deprecated `pkg_resources`. ([SmartSim-PR598](https://github.com/CrayLabs/SmartSim/pull/598))


### 0.7.0

Released on 14 May, 2024
Expand Down
Loading
Loading