diff --git a/smartsim/_core/launcher/cobalt/cobaltLauncher.py b/smartsim/_core/launcher/cobalt/cobaltLauncher.py index c6281f562..ca0b88a3b 100644 --- a/smartsim/_core/launcher/cobalt/cobaltLauncher.py +++ b/smartsim/_core/launcher/cobalt/cobaltLauncher.py @@ -116,17 +116,17 @@ def run(self, step: Step) -> t.Optional[str]: else: # aprun doesn't direct output for us. out, err = step.get_output_files() + + # LocalStep.run_command omits env, include it here + passed_env = step.env if isinstance(step, LocalStep) else None + # pylint: disable-next=consider-using-with - output = open( - out, "w+", encoding="utf-8" - ) + output = open(out, "w+", encoding="utf-8") # pylint: disable-next=consider-using-with - error = open( - err, "w+", encoding="utf-8" - ) + error = open(err, "w+", encoding="utf-8") task_id = self.task_manager.start_task( - cmd_list, step.cwd, out=output.fileno(), err=error.fileno() + cmd_list, step.cwd, passed_env, out=output.fileno(), err=error.fileno() ) # if batch submission did not successfully retrieve job ID diff --git a/smartsim/_core/launcher/local/local.py b/smartsim/_core/launcher/local/local.py index 1154710ca..7e5c56f7b 100644 --- a/smartsim/_core/launcher/local/local.py +++ b/smartsim/_core/launcher/local/local.py @@ -108,12 +108,11 @@ def run(self, step: Step) -> str: error = open(err, "w+") cmd = step.get_launch_cmd() - env = {} - if isinstance(step, LocalStep): - env = step.env + # LocalStep.run_command omits env, include it here + passed_env = step.env if isinstance(step, LocalStep) else None task_id = self.task_manager.start_task( - cmd, step.cwd, env=env, out=output.fileno(), err=error.fileno() + cmd, step.cwd, env=passed_env, out=output.fileno(), err=error.fileno() ) self.step_mapping.add(step.name, task_id=task_id, managed=False) return task_id diff --git a/smartsim/_core/launcher/lsf/lsfLauncher.py b/smartsim/_core/launcher/lsf/lsfLauncher.py index d7bb7a2b9..a8d0e27aa 100644 --- a/smartsim/_core/launcher/lsf/lsfLauncher.py +++ b/smartsim/_core/launcher/lsf/lsfLauncher.py @@ -116,18 +116,18 @@ def run(self, step: Step) -> t.Optional[str]: step_id = self._get_lsf_step_id(step) logger.debug(f"Gleaned jsrun step id: {step_id} for {step.name}") else: # isinstance(step, MpirunStep) or isinstance(step, LocalStep) - out, err = step.get_output_files() # mpirun and local launch don't direct output for us + out, err = step.get_output_files() + + # LocalStep.run_command omits env, include it here + passed_env = step.env if isinstance(step, LocalStep) else None + # pylint: disable-next=consider-using-with - output = open( - out, "w+", encoding="utf-8" - ) + output = open(out, "w+", encoding="utf-8") # pylint: disable-next=consider-using-with - error = open( - err, "w+", encoding="utf-8" - ) + error = open(err, "w+", encoding="utf-8") task_id = self.task_manager.start_task( - cmd_list, step.cwd, out=output.fileno(), err=error.fileno() + cmd_list, step.cwd, passed_env, out=output.fileno(), err=error.fileno() ) self.step_mapping.add(step.name, step_id, task_id, step.managed) diff --git a/smartsim/_core/launcher/pbs/pbsLauncher.py b/smartsim/_core/launcher/pbs/pbsLauncher.py index d323b8c2c..cbb85337c 100644 --- a/smartsim/_core/launcher/pbs/pbsLauncher.py +++ b/smartsim/_core/launcher/pbs/pbsLauncher.py @@ -108,18 +108,18 @@ def run(self, step: Step) -> t.Optional[str]: step_id = out.strip() logger.debug(f"Gleaned batch job id: {step_id} for {step.name}") else: - # aprun doesn't direct output for us. + # aprun/local doesn't direct output for us. out, err = step.get_output_files() + + # LocalStep.run_command omits env, include it here + passed_env = step.env if isinstance(step, LocalStep) else None + # pylint: disable-next=consider-using-with - output = open( - out, "w+", encoding="utf-8" - ) + output = open(out, "w+", encoding="utf-8") # pylint: disable-next=consider-using-with - error = open( - err, "w+", encoding="utf-8" - ) + error = open(err, "w+", encoding="utf-8") task_id = self.task_manager.start_task( - cmd_list, step.cwd, out=output.fileno(), err=error.fileno() + cmd_list, step.cwd, passed_env, out=output.fileno(), err=error.fileno() ) # if batch submission did not successfully retrieve job ID diff --git a/smartsim/_core/launcher/slurm/slurmLauncher.py b/smartsim/_core/launcher/slurm/slurmLauncher.py index 45ad7e7a7..dc1184dd4 100644 --- a/smartsim/_core/launcher/slurm/slurmLauncher.py +++ b/smartsim/_core/launcher/slurm/slurmLauncher.py @@ -149,17 +149,22 @@ def run(self, step: Step) -> t.Optional[str]: logger.debug(f"Gleaned batch job id: {step_id} for {step.name}") # Launch a in-allocation or on-allocation (if srun) command + elif isinstance(step, SrunStep): + task_id = self.task_manager.start_task(cmd_list, step.cwd) else: - if isinstance(step, SrunStep): - task_id = self.task_manager.start_task(cmd_list, step.cwd) - else: - # Mpirun doesn't direct output for us like srun does - out, err = step.get_output_files() - output = open(out, "w+", encoding="utf-8") # pylint: disable=consider-using-with - error = open(err, "w+", encoding="utf-8") # pylint: disable=consider-using-with - task_id = self.task_manager.start_task( - cmd_list, step.cwd, out=output.fileno(), err=error.fileno() - ) + # MPI/local steps don't direct output like slurm steps + out, err = step.get_output_files() + + # LocalStep.run_command omits env, include it here + passed_env = step.env if isinstance(step, LocalStep) else None + + # pylint: disable-next=consider-using-with + output = open(out, "w+", encoding="utf-8") + # pylint: disable-next=consider-using-with + error = open(err, "w+", encoding="utf-8") + task_id = self.task_manager.start_task( + cmd_list, step.cwd, passed_env, out=output.fileno(), err=error.fileno() + ) if not step_id and step.managed: step_id = self._get_slurm_step_id(step) diff --git a/smartsim/_core/launcher/taskManager.py b/smartsim/_core/launcher/taskManager.py index 4de106478..7068a49b8 100644 --- a/smartsim/_core/launcher/taskManager.py +++ b/smartsim/_core/launcher/taskManager.py @@ -118,7 +118,7 @@ def start_task( :param cwd: current working directory :type cwd: str :param env: environment to launch with - :type env: dict[str, str], optional + :type env: dict[str, str], optional. If None, calling environment is inherited :param out: output file, defaults to PIPE :type out: file, optional :param err: error file, defaults to PIPE diff --git a/tests/on_wlm/test_local_step.py b/tests/on_wlm/test_local_step.py new file mode 100644 index 000000000..0613f41c2 --- /dev/null +++ b/tests/on_wlm/test_local_step.py @@ -0,0 +1,116 @@ +# BSD 2-Clause License +# +# Copyright (c) 2021-2023, Hewlett Packard Enterprise +# All rights reserved. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions are met: +# +# 1. Redistributions of source code must retain the above copyright notice, this +# list of conditions and the following disclaimer. +# +# 2. Redistributions in binary form must reproduce the above copyright notice, +# this list of conditions and the following disclaimer in the documentation +# and/or other materials provided with the distribution. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE +# DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE +# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR +# SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +# CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, +# OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +import os +import uuid +import pytest + +from smartsim import Experiment, status +from smartsim.settings import RunSettings + +# retrieved from pytest fixtures +if pytest.test_launcher != "slurm": + pytestmark = pytest.mark.skip(reason="Test is only for Slurm WLM systems") + +""" +Test execution of local steps within the WLM +""" + + +def test_local_env_pass_implicit(fileutils) -> None: + """Ensure implicitly exported env is available to running task""" + exp_value = str(uuid.uuid4()) + env_key = "test_local_env_pass_implicit" + os.environ[env_key] = exp_value + + test_dir = fileutils.make_test_dir() + exp_dir = f"{test_dir}/exp" + os.makedirs(exp_dir) + script = fileutils.get_test_conf_path("check_env.py") + + exp = Experiment("LRZ", exp_path=exp_dir, launcher="slurm") + + exe_name = "python" + exe_args = [script, env_key] + + # Create the RunSettings associated with the workload manager (WLM) run command + run_args = {"--nodes": 1, "--ntasks": 1, "--time": "00:01:00"} + # NOTE: not passing env_args into run_settings here, relying on --export=ALL default + settings = RunSettings(exe_name, exe_args, run_command="srun", run_args=run_args) + app_name = "echo_app" + app = exp.create_model(app_name, settings) + + # generate the experiment structure and start the model + exp.generate(app, overwrite=True) + exp.start(app, block=True, summary=False) + + assert env_key not in settings.env_vars + os.environ.pop(env_key) + + with open(f"{exp_dir}/{app_name}/{app_name}.out") as app_outfile: + app_output = app_outfile.read() + + # verify application was able to access the env var + assert f"{env_key}=={exp_value}" in app_output + + +def test_local_env_pass_explicit(fileutils) -> None: + """Ensure explicitly exported env is available to running task""" + exp_value = str(uuid.uuid4()) + env_key = "test_local_env_pass_explicit" + + assert env_key not in os.environ + + test_dir = fileutils.make_test_dir() + script = fileutils.get_test_conf_path("check_env.py") + + exp_dir = f"{test_dir}/exp" + os.makedirs(exp_dir) + exp = Experiment("LRZ", exp_path=exp_dir, launcher="slurm") + + exe_name = "python" + exe_args = [script, env_key] + + # Create the RunSettings associated with the workload manager (WLM) run command + run_args = {"--nodes": 1, "--ntasks": 1, "--time": "00:01:00"} + env_vars = {env_key: exp_value} # <-- explicitly passing a new env var to task + settings = RunSettings( + exe_name, exe_args, run_command="srun", run_args=run_args, env_vars=env_vars + ) + app_name = "echo_app" + app = exp.create_model(app_name, settings) + + # generate the experiment structure and start the model + exp.generate(app, overwrite=True) + exp.start(app, block=True, summary=False) + + assert env_key in settings.env_vars + + with open(f"{exp_dir}/{app_name}/{app_name}.out") as app_outfile: + app_output = app_outfile.read() + + # verify application was able to access the env var + assert f"{env_key}=={exp_value}" in app_output diff --git a/tests/test_configs/check_env.py b/tests/test_configs/check_env.py new file mode 100644 index 000000000..72a2107ea --- /dev/null +++ b/tests/test_configs/check_env.py @@ -0,0 +1,11 @@ +import os +import sys + +var_name = sys.argv[1] +env_value = os.environ.get(sys.argv[1], None) + +if env_value: + print(f"{var_name}=={env_value}") + sys.exit(0) + +print('env var not found')