-
Notifications
You must be signed in to change notification settings - Fork 4
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
103 we should be using the logging module #224
Changes from 12 commits
2f6da74
59e56fe
43deac8
63b41f8
023c4d4
ff36e53
d26937b
7686403
222ad43
1ba25b0
7d84532
f618128
44818a4
222d16d
9b49c13
7a3b741
b6bca5b
3957aaf
b65ddbb
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -23,6 +23,7 @@ | |
) | ||
from benchcab.internal import get_met_forcing_file_names | ||
from benchcab.model import Model | ||
from benchcab.utils import get_logger | ||
from benchcab.utils.fs import mkdir, next_path | ||
from benchcab.utils.pbs import render_job_script | ||
from benchcab.utils.repo import SVNRepo, create_repo | ||
|
@@ -41,42 +42,56 @@ | |
benchcab_exe_path: Optional[Path], | ||
validate_env: bool = True, | ||
) -> None: | ||
"""Constructor. | ||
|
||
Parameters | ||
---------- | ||
benchcab_exe_path : Optional[Path] | ||
Path to the executable. | ||
validate_env : bool, optional | ||
Validate the environment, by default True | ||
""" | ||
self.benchcab_exe_path = benchcab_exe_path | ||
self.validate_env = validate_env | ||
|
||
self._config: Optional[dict] = None | ||
self._models: list[Model] = [] | ||
self.tasks: list[Task] = [] # initialise fluxsite tasks lazily | ||
|
||
# Get the logger object | ||
self.logger = get_logger() | ||
|
||
def _validate_environment(self, project: str, modules: list): | ||
"""Performs checks on current user environment.""" | ||
if not self.validate_env: | ||
return | ||
|
||
if "gadi.nci" not in internal.NODENAME: | ||
print("Error: benchcab is currently implemented only on Gadi") | ||
self.logger.error("benchcab is currently implemented only on Gadi") | ||
sys.exit(1) | ||
|
||
namelist_dir = Path(internal.CWD / internal.NAMELIST_DIR) | ||
if not namelist_dir.exists(): | ||
print( | ||
"Error: cannot find 'namelists' directory in current working directory" | ||
self.logger.error( | ||
"Cannot find 'namelists' directory in current working directory" | ||
) | ||
sys.exit(1) | ||
|
||
required_groups = [project, "ks32", "hh5"] | ||
groups = [grp.getgrgid(gid).gr_name for gid in os.getgroups()] | ||
if not set(required_groups).issubset(groups): | ||
print( | ||
"Error: user does not have the required group permissions.", | ||
"The required groups are:", | ||
", ".join(required_groups), | ||
self.logger.error( | ||
[ | ||
"User does not have the required group permissions.", | ||
"The required groups are:", | ||
" ,".join(required_groups), | ||
] | ||
) | ||
sys.exit(1) | ||
|
||
for modname in modules: | ||
if not self.modules_handler.module_is_avail(modname): | ||
print(f"Error: module ({modname}) is not available.") | ||
self.logger.error(f"Module ({modname}) is not available.") | ||
sys.exit(1) | ||
|
||
all_site_ids = set( | ||
|
@@ -86,14 +101,16 @@ | |
for site_id in all_site_ids: | ||
paths = list(internal.MET_DIR.glob(f"{site_id}*")) | ||
if not paths: | ||
print( | ||
f"Error: failed to infer met file for site id '{site_id}' in " | ||
f"{internal.MET_DIR}." | ||
self.logger.error( | ||
[ | ||
f"Failed to infer met file for site id '{site_id}' in " | ||
f"{internal.MET_DIR}." | ||
] | ||
) | ||
sys.exit(1) | ||
if len(paths) > 1: | ||
print( | ||
f"Error: multiple paths infered for site id: '{site_id}' in {internal.MET_DIR}." | ||
self.logger.error( | ||
f"Multiple paths infered for site id: '{site_id}' in {internal.MET_DIR}." | ||
) | ||
sys.exit(1) | ||
|
||
|
@@ -124,13 +141,11 @@ | |
) | ||
return self.tasks | ||
|
||
def validate_config(self, config_path: str, verbose: bool): | ||
def validate_config(self, config_path: str): | ||
"""Endpoint for `benchcab validate_config`.""" | ||
_ = self._get_config(config_path) | ||
|
||
def fluxsite_submit_job( | ||
self, config_path: str, verbose: bool, skip: list[str] | ||
) -> None: | ||
def fluxsite_submit_job(self, config_path: str, skip: list[str]) -> None: | ||
"""Submits the PBS job script step in the fluxsite test workflow.""" | ||
config = self._get_config(config_path) | ||
self._validate_environment(project=config["project"], modules=config["modules"]) | ||
|
@@ -139,17 +154,17 @@ | |
raise RuntimeError(msg) | ||
|
||
job_script_path = Path(internal.QSUB_FNAME) | ||
print( | ||
"Creating PBS job script to run fluxsite tasks on compute " | ||
f"nodes: {job_script_path}" | ||
self.logger.info( | ||
"Creating PBS job script to run fluxsite tasks on compute nodes" | ||
) | ||
|
||
self.logger.info(f"job_script_path = {job_script_path}") | ||
|
||
with job_script_path.open("w", encoding="utf-8") as file: | ||
contents = render_job_script( | ||
project=config["project"], | ||
config_path=config_path, | ||
modules=config["modules"], | ||
pbs_config=config["fluxsite"]["pbs"], | ||
verbose=verbose, | ||
skip_bitwise_cmp="fluxsite-bitwise-cmp" in skip, | ||
benchcab_path=str(self.benchcab_exe_path), | ||
) | ||
|
@@ -159,94 +174,102 @@ | |
proc = self.subprocess_handler.run_cmd( | ||
f"qsub {job_script_path}", | ||
capture_output=True, | ||
verbose=verbose, | ||
) | ||
except CalledProcessError as exc: | ||
print("Error when submitting job to NCI queue") | ||
print(exc.output) | ||
self.logger.error("when submitting job to NCI queue, details to follow") | ||
self.logger.error(exc.output) | ||
raise | ||
|
||
print( | ||
f"PBS job submitted: {proc.stdout.strip()}\n" | ||
"The CABLE log file for each task is written to " | ||
f"{internal.FLUXSITE_DIRS['LOG']}/<task_name>_log.txt\n" | ||
"The CABLE standard output for each task is written to " | ||
f"{internal.FLUXSITE_DIRS['TASKS']}/<task_name>/out.txt\n" | ||
"The NetCDF output for each task is written to " | ||
f"{internal.FLUXSITE_DIRS['OUTPUT']}/<task_name>_out.nc" | ||
) | ||
self.logger.info(f"PBS job submitted: {proc.stdout.strip()}") | ||
self.logger.info("CABLE log file for each task is written to:") | ||
self.logger.info(f"{internal.FLUXSITE_DIRS['LOG']}/<task_name>_log.txt") | ||
self.logger.info("The CABLE standard output for each task is written to:") | ||
self.logger.info(f"{internal.FLUXSITE_DIRS['TASKS']}/<task_name>/out.txt") | ||
self.logger.info("The NetCDF output for each task is written to:") | ||
self.logger.info(f"{internal.FLUXSITE_DIRS['OUTPUT']}/<task_name>_out.nc") | ||
|
||
def checkout(self, config_path: str, verbose: bool): | ||
def checkout(self, config_path: str): | ||
"""Endpoint for `benchcab checkout`.""" | ||
config = self._get_config(config_path) | ||
self._validate_environment(project=config["project"], modules=config["modules"]) | ||
|
||
mkdir(internal.SRC_DIR, exist_ok=True, verbose=True) | ||
mkdir(internal.SRC_DIR, exist_ok=True) | ||
|
||
print("Checking out repositories...") | ||
self.logger.info("Checking out repositories...") | ||
rev_number_log = "" | ||
for model in self._get_models(config): | ||
model.repo.checkout(verbose=verbose) | ||
model.repo.checkout() | ||
rev_number_log += f"{model.name}: {model.repo.get_revision()}\n" | ||
|
||
# TODO(Sean) we should archive revision numbers for CABLE-AUX | ||
cable_aux_repo = SVNRepo( | ||
svn_root=internal.CABLE_SVN_ROOT, | ||
branch_path=internal.CABLE_AUX_RELATIVE_SVN_PATH, | ||
path=internal.SRC_DIR / "CABLE-AUX", | ||
) | ||
cable_aux_repo.checkout(verbose=verbose) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. We removed this in #230 |
||
|
||
rev_number_log_path = next_path("rev_number-*.log") | ||
print(f"Writing revision number info to {rev_number_log_path}") | ||
self.logger.info(f"Writing revision number info to {rev_number_log_path}") | ||
with rev_number_log_path.open("w", encoding="utf-8") as file: | ||
file.write(rev_number_log) | ||
|
||
print("") | ||
|
||
def build(self, config_path: str, verbose: bool): | ||
def build(self, config_path: str): | ||
"""Endpoint for `benchcab build`.""" | ||
config = self._get_config(config_path) | ||
self._validate_environment(project=config["project"], modules=config["modules"]) | ||
|
||
for repo in self._get_models(config): | ||
if repo.build_script: | ||
print( | ||
"Compiling CABLE using custom build script for " | ||
f"realisation {repo.name}..." | ||
) | ||
repo.custom_build(modules=config["modules"], verbose=verbose) | ||
|
||
self.logger.info("Compiling CABLE using custom build script for") | ||
self.logger.info(f"realisation {repo.name}") | ||
repo.custom_build(modules=config["modules"]) | ||
|
||
else: | ||
build_mode = "with MPI" if internal.MPI else "serially" | ||
print(f"Compiling CABLE {build_mode} for realisation {repo.name}...") | ||
repo.pre_build(verbose=verbose) | ||
repo.run_build(modules=config["modules"], verbose=verbose) | ||
repo.post_build(verbose=verbose) | ||
print(f"Successfully compiled CABLE for realisation {repo.name}") | ||
print("") | ||
|
||
def fluxsite_setup_work_directory(self, config_path: str, verbose: bool): | ||
self.logger.info( | ||
f"Compiling CABLE {build_mode} for realisation {repo.name}..." | ||
) | ||
repo.pre_build() | ||
repo.run_build(modules=config["modules"]) | ||
repo.post_build() | ||
self.logger.info(f"Successfully compiled CABLE for realisation {repo.name}") | ||
|
||
def fluxsite_setup_work_directory(self, config_path: str): | ||
"""Endpoint for `benchcab fluxsite-setup-work-dir`.""" | ||
config = self._get_config(config_path) | ||
self._validate_environment(project=config["project"], modules=config["modules"]) | ||
|
||
tasks = self.tasks if self.tasks else self._initialise_tasks(config) | ||
print("Setting up run directory tree for fluxsite tests...") | ||
setup_fluxsite_directory_tree(verbose=verbose) | ||
print("Setting up tasks...") | ||
self.logger.info("Setting up run directory tree for fluxsite tests...") | ||
setup_fluxsite_directory_tree() | ||
self.logger.info("Setting up tasks...") | ||
for task in tasks: | ||
task.setup_task(verbose=verbose) | ||
print("Successfully setup fluxsite tasks") | ||
print("") | ||
task.setup_task() | ||
self.logger.info("Successfully setup fluxsite tasks") | ||
|
||
def fluxsite_run_tasks(self, config_path: str, verbose: bool): | ||
def fluxsite_run_tasks(self, config_path: str): | ||
"""Endpoint for `benchcab fluxsite-run-tasks`.""" | ||
config = self._get_config(config_path) | ||
self._validate_environment(project=config["project"], modules=config["modules"]) | ||
|
||
tasks = self.tasks if self.tasks else self._initialise_tasks(config) | ||
print("Running fluxsite tasks...") | ||
if config["fluxsite"]["multiprocess"]: | ||
ncpus = config["fluxsite"]["pbs"]["ncpus"] | ||
try: | ||
multiprocess = config["fluxsite"]["multiprocess"] | ||
except KeyError: | ||
multiprocess = internal.FLUXSITE_DEFAULT_MULTIPROCESS | ||
if multiprocess: | ||
ncpus = config.get("pbs", {}).get( | ||
"ncpus", internal.FLUXSITE_DEFAULT_PBS["ncpus"] | ||
) | ||
run_tasks_in_parallel(tasks, n_processes=ncpus, verbose=verbose) | ||
else: | ||
run_tasks(tasks, verbose=verbose) | ||
print("Successfully ran fluxsite tasks") | ||
print("") | ||
run_tasks(tasks) | ||
self.logger.info("Successfully ran fluxsite tasks") | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. We want the version in |
||
|
||
def fluxsite_bitwise_cmp(self, config_path: str, verbose: bool): | ||
def fluxsite_bitwise_cmp(self, config_path: str): | ||
"""Endpoint for `benchcab fluxsite-bitwise-cmp`.""" | ||
config = self._get_config(config_path) | ||
self._validate_environment(project=config["project"], modules=config["modules"]) | ||
|
@@ -260,31 +283,36 @@ | |
comparisons = get_fluxsite_comparisons(tasks) | ||
|
||
print("Running comparison tasks...") | ||
if config["fluxsite"]["multiprocess"]: | ||
ncpus = config["fluxsite"]["pbs"]["ncpus"] | ||
try: | ||
multiprocess = config["fluxsite"]["multiprocess"] | ||
except KeyError: | ||
multiprocess = internal.FLUXSITE_DEFAULT_MULTIPROCESS | ||
if multiprocess: | ||
try: | ||
ncpus = config["fluxsite"]["pbs"]["ncpus"] | ||
except KeyError: | ||
ncpus = internal.FLUXSITE_DEFAULT_PBS["ncpus"] | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. We want the version in |
||
run_comparisons_in_parallel(comparisons, n_processes=ncpus, verbose=verbose) | ||
else: | ||
run_comparisons(comparisons, verbose=verbose) | ||
print("Successfully ran comparison tasks") | ||
run_comparisons(comparisons) | ||
self.logger.info("Successfully ran comparison tasks") | ||
|
||
def fluxsite( | ||
self, config_path: str, no_submit: bool, verbose: bool, skip: list[str] | ||
): | ||
def fluxsite(self, config_path: str, no_submit: bool, skip: list[str]): | ||
"""Endpoint for `benchcab fluxsite`.""" | ||
self.checkout(config_path, verbose) | ||
self.build(config_path, verbose) | ||
self.fluxsite_setup_work_directory(config_path, verbose) | ||
self.checkout(config_path) | ||
self.build(config_path) | ||
self.fluxsite_setup_work_directory(config_path) | ||
if no_submit: | ||
self.fluxsite_run_tasks(config_path, verbose) | ||
self.fluxsite_run_tasks(config_path) | ||
if "fluxsite-bitwise-cmp" not in skip: | ||
self.fluxsite_bitwise_cmp(config_path, verbose) | ||
self.fluxsite_bitwise_cmp(config_path) | ||
else: | ||
self.fluxsite_submit_job(config_path, verbose, skip) | ||
self.fluxsite_submit_job(config_path, skip) | ||
|
||
def spatial(self, config_path: str, verbose: bool): | ||
def spatial(self, config_path: str): | ||
"""Endpoint for `benchcab spatial`.""" | ||
|
||
def run(self, config_path: str, no_submit: bool, verbose: bool, skip: list[str]): | ||
def run(self, config_path: str, no_submit: bool, skip: list[str]): | ||
"""Endpoint for `benchcab run`.""" | ||
self.fluxsite(config_path, no_submit, verbose, skip) | ||
self.spatial(config_path, verbose) | ||
self.fluxsite(config_path, no_submit, skip) | ||
self.spatial(config_path) | ||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
We want to keep the
pbs_config
argument here! That comes from #238