CABLE-LSM · SeanBryan51 · Jul 14, 2023 · Jun 9, 2023 · Jun 28, 2023 · Jul 12, 2023
diff --git a/.conda/benchcab-dev.yaml b/.conda/benchcab-dev.yaml
@@ -9,3 +9,4 @@ dependencies:
   - numpy
   - pytest-cov
   - pyyaml
+  - flatdict
diff --git a/.conda/meta.yaml b/.conda/meta.yaml
@@ -25,3 +25,4 @@ requirements:
         - netCDF4
         - PyYAML
         - f90nml
+        - flatdict
diff --git a/benchcab/bench_config.py b/benchcab/bench_config.py
@@ -1,12 +1,9 @@
-"""
-A module containing all *_config() functions.
-
-"""
+"""A module containing all *_config() functions."""
 
 from pathlib import Path
 import yaml
 
-from benchcab.internal import MEORG_EXPERIMENTS, DEFAULT_SCIENCE_CONFIGURATIONS
+from benchcab import internal
 
 
 def check_config(config: dict):
@@ -15,12 +12,12 @@ def check_config(config: dict):
     If the config is invalid, an exception is raised. Otherwise, do nothing.
     """
 
-    required_keys = ["realisations", "project", "modules", "experiment"]
-    if any(key not in config for key in required_keys):
+    if any(key not in config for key in internal.CONFIG_REQUIRED_KEYS):
         raise ValueError(
-            "The config file does not list all required entries. "
-            "Those are: "
-            ", ".join(required_keys)
+            "Keys are missing from the config file: "
+            + ", ".join(
+                key for key in internal.CONFIG_REQUIRED_KEYS if key not in config
+            )
         )
 
     if not isinstance(config["project"], str):
@@ -46,12 +43,13 @@ def check_config(config: dict):
                 "that is compatible with the f90nml python package."
             )
 
-    valid_experiments = list(MEORG_EXPERIMENTS) + MEORG_EXPERIMENTS["five-site-test"]
+    valid_experiments = (
+        list(internal.MEORG_EXPERIMENTS) + internal.MEORG_EXPERIMENTS["five-site-test"]
+    )
     if config["experiment"] not in valid_experiments:
         raise ValueError(
             "The 'experiment' key is invalid.\n"
-            "Valid experiments are: "
-            ", ".join(valid_experiments)
+            "Valid experiments are: " + ", ".join(valid_experiments)
         )
 
     if not isinstance(config["realisations"], list):
@@ -108,17 +106,4 @@ def read_config(config_path: str) -> dict:
 
     check_config(config)
 
-    for branch in config["realisations"]:
-        # Add "name" key if not provided and set to base name of "path" key
-        branch.setdefault("name", Path(branch["path"]).name)
-        # Add "revision" key if not provided and set to default value -1, i.e. HEAD of branch
-        branch.setdefault("revision", -1)
-        # Add "patch" key if not provided and set to default value {}
-        branch.setdefault("patch", {})
-        # Add "build_script" key if not provided and set to default value ""
-        branch.setdefault("build_script", "")
-
-    # Add "science_configurations" if not provided and set to default value
-    config.setdefault("science_configurations", DEFAULT_SCIENCE_CONFIGURATIONS)
-
     return config
diff --git a/benchcab/benchcab.py b/benchcab/benchcab.py
@@ -1,74 +1,195 @@
-#!/usr/bin/env python
-
 """Contains the main program entry point for `benchcab`."""
 
 import sys
-
-from benchcab.job_script import create_job_script, submit_job
+import os
+import grp
+from pathlib import Path
+from typing import Optional
+from subprocess import CalledProcessError
+
+from benchcab import internal
+from benchcab.internal import get_met_sites
 from benchcab.bench_config import read_config
 from benchcab.benchtree import setup_fluxnet_directory_tree, setup_src_dir
-from benchcab.build_cable import build_cable
-from benchcab.get_cable import (
-    checkout_cable,
-    checkout_cable_auxiliary,
-    archive_rev_number,
+from benchcab.repository import CableRepository
+from benchcab.task import (
+    get_fluxnet_tasks,
+    get_fluxnet_comparisons,
+    run_tasks,
+    run_tasks_in_parallel,
+    Task,
 )
-from benchcab.internal import (
-    validate_environment,
-    get_met_sites,
-    MULTIPROCESS,
-    SITE_LOG_DIR,
-    SITE_TASKS_DIR,
-    SITE_OUTPUT_DIR,
-)
-from benchcab.task import get_fluxnet_tasks, get_fluxnet_comparisons, Task
+from benchcab.comparison import run_comparisons, run_comparisons_in_parallel
 from benchcab.cli import generate_parser
-from benchcab.run_cable_site import run_tasks, run_tasks_in_parallel
-from benchcab.run_comparison import run_comparisons, run_comparisons_in_parallel
-from benchcab.environment_modules import module_load, module_is_loaded
+from benchcab.environment_modules import EnvironmentModules, EnvironmentModulesInterface
+from benchcab.utils.subprocess import SubprocessWrapper, SubprocessWrapperInterface
+from benchcab.utils.pbs import render_job_script
+from benchcab.utils.logging import next_path
 
 
 class Benchcab:
     """A class that represents the `benchcab` application."""
 
-    def __init__(self) -> None:
-        self.args = generate_parser().parse_args(
-            sys.argv[1:] if sys.argv[1:] else ["-h"]
-        )
-        self.config = read_config(self.args.config)
+    root_dir: Path = internal.CWD
+    subprocess_handler: SubprocessWrapperInterface = SubprocessWrapper()
+    modules_handler: EnvironmentModulesInterface = EnvironmentModules()
+
+    def __init__(
+        self,
+        argv: list[str],
+        config: Optional[dict] = None,
+        validate_env: bool = True,
+    ) -> None:
+        self.args = generate_parser().parse_args(argv[1:] if argv[1:] else ["-h"])
+        self.config = config if config else read_config(self.args.config)
+        self.repos = [
+            CableRepository(**config, repo_id=id)
+            for id, config in enumerate(self.config["realisations"])
+        ]
         self.tasks: list[Task] = []  # initialise fluxnet tasks lazily
-        validate_environment(
-            project=self.config["project"], modules=self.config["modules"]
+
+        if validate_env:
+            self._validate_environment(
+                project=self.config["project"], modules=self.config["modules"]
+            )
+
+    def _validate_environment(self, project: str, modules: list):
+        """Performs checks on current user environment"""
+
+        if "gadi.nci" not in internal.NODENAME:
+            print("Error: benchcab is currently implemented only on Gadi")
+            sys.exit(1)
+
+        namelist_dir = Path(internal.CWD / internal.NAMELIST_DIR)
+        if not namelist_dir.exists():
+            print(
+                "Error: cannot find 'namelists' directory in current working directory"
+            )
+            sys.exit(1)
+
+        required_groups = [project, "ks32", "hh5"]
+        groups = [grp.getgrgid(gid).gr_name for gid in os.getgroups()]
+        if not set(required_groups).issubset(groups):
+            print(
+                "Error: user does not have the required group permissions.",
+                "The required groups are:",
+                ", ".join(required_groups),
+            )
+            sys.exit(1)
+
+        for modname in modules:
+            if not self.modules_handler.module_is_avail(modname):
+                print(f"Error: module ({modname}) is not available.")
+                sys.exit(1)
+
+        all_site_ids = set(
+            internal.MEORG_EXPERIMENTS["five-site-test"]
+            + internal.MEORG_EXPERIMENTS["forty-two-site-test"]
         )
+        for site_id in all_site_ids:
+            paths = list(internal.MET_DIR.glob(f"{site_id}*"))
+            if not paths:
+                print(
+                    f"Error: failed to infer met file for site id '{site_id}' in "
+                    f"{internal.MET_DIR}."
+                )
+                sys.exit(1)
+            if len(paths) > 1:
+                print(
+                    f"Error: multiple paths infered for site id: '{site_id}' in {internal.MET_DIR}."
+                )
+                sys.exit(1)
 
     def _initialise_tasks(self) -> list[Task]:
         """A helper method that initialises and returns the `tasks` attribute."""
         self.tasks = get_fluxnet_tasks(
-            realisations=self.config["realisations"],
-            science_configurations=self.config["science_configurations"],
+            repos=self.repos,
+            science_configurations=self.config.get(
+                "science_configurations", internal.DEFAULT_SCIENCE_CONFIGURATIONS
+            ),
             met_sites=get_met_sites(self.config["experiment"]),
         )
         return self.tasks
 
+    # TODO(Sean) this method should be the endpoint for the `fluxnet-submit-job`
+    # command line argument.
+    def fluxnet_submit_job(self) -> None:
+        """Submits the PBS job script step in the fluxnet test workflow."""
+
+        job_script_path = self.root_dir / internal.QSUB_FNAME
+        print(
+            "Creating PBS job script to run FLUXNET tasks on compute "
+            f"nodes: {job_script_path.relative_to(self.root_dir)}"
+        )
+        with job_script_path.open("w", encoding="utf-8") as file:
+            contents = render_job_script(
+                project=self.config["project"],
+                config_path=self.args.config,
+                modules=self.config["modules"],
+                storage_flags=[],  # TODO(Sean) add storage flags option to config
+                verbose=self.args.verbose,
+                skip_bitwise_cmp="fluxnet-bitwise-cmp" in self.args.skip,
+            )
+            file.write(contents)
+
+        try:
+            proc = self.subprocess_handler.run_cmd(
+                f"qsub {job_script_path}",
+                capture_output=True,
+                verbose=self.args.verbose,
+            )
+        except CalledProcessError as exc:
+            print("Error when submitting job to NCI queue")
+            print(exc.output)
+            raise
+
+        print(
+            f"PBS job submitted: {proc.stdout.strip()}\n"
+            "The CABLE log file for each task is written to "
+            f"{internal.SITE_LOG_DIR}/<task_name>_log.txt\n"
+            "The CABLE standard output for each task is written to "
+            f"{internal.SITE_TASKS_DIR}/<task_name>/out.txt\n"
+            "The NetCDF output for each task is written to "
+            f"{internal.SITE_OUTPUT_DIR}/<task_name>_out.nc"
+        )
+
     def checkout(self):
         """Endpoint for `benchcab checkout`."""
+
         setup_src_dir()
+
         print("Checking out repositories...")
-        for branch in self.config["realisations"]:
-            checkout_cable(branch, verbose=self.args.verbose)
-        checkout_cable_auxiliary(self.args.verbose)
-        archive_rev_number()
+        rev_number_log = ""
+        for repo in self.repos:
+            repo.checkout(verbose=self.args.verbose)
+            rev_number_log += (
+                f"{repo.name} last changed revision: "
+                f"{repo.svn_info_show_item('last-changed-revision')}\n"
+            )
+
+        # TODO(Sean) we should archive revision numbers for CABLE-AUX
+        cable_aux_repo = CableRepository(path=internal.CABLE_AUX_RELATIVE_SVN_PATH)
+        cable_aux_repo.checkout(verbose=self.args.verbose)
+
+        rev_number_log_path = self.root_dir / next_path(
+            self.root_dir, "rev_number-*.log"
+        )
+        print(
+            f"Writing revision number info to {rev_number_log_path.relative_to(self.root_dir)}"
+        )
+        with open(rev_number_log_path, "w", encoding="utf-8") as file:
+            file.write(rev_number_log)
+
         print("")
 
     def build(self):
         """Endpoint for `benchcab build`."""
-        for branch in self.config["realisations"]:
-            build_cable(
-                branch["build_script"],
-                branch["name"],
-                self.config["modules"],
-                verbose=self.args.verbose,
-            )
+        for repo in self.repos:
+            if repo.build_script:
+                repo.custom_build(verbose=self.args.verbose)
+            else:
+                repo.build(modules=self.config["modules"], verbose=self.args.verbose)
+            print(f"Successfully compiled CABLE for realisation {repo.name}")
         print("")
 
     def fluxnet_setup_work_directory(self):
@@ -86,7 +207,7 @@ def fluxnet_run_tasks(self):
         """Endpoint for `benchcab fluxnet-run-tasks`."""
         tasks = self.tasks if self.tasks else self._initialise_tasks()
         print("Running FLUXNET tasks...")
-        if MULTIPROCESS:
+        if internal.MULTIPROCESS:
             run_tasks_in_parallel(tasks, verbose=self.args.verbose)
         else:
             run_tasks(tasks, verbose=self.args.verbose)
@@ -96,14 +217,16 @@ def fluxnet_run_tasks(self):
     def fluxnet_bitwise_cmp(self):
         """Endpoint for `benchcab fluxnet-bitwise-cmp`."""
 
-        if not module_is_loaded("nccmp"):
-            module_load("nccmp")  # use `nccmp -df` for bitwise comparisons
+        if not self.modules_handler.module_is_loaded("nccmp/1.8.5.0"):
+            self.modules_handler.module_load(
+                "nccmp/1.8.5.0"
+            )  # use `nccmp -df` for bitwise comparisons
 
         tasks = self.tasks if self.tasks else self._initialise_tasks()
         comparisons = get_fluxnet_comparisons(tasks)
 
         print("Running comparison tasks...")
-        if MULTIPROCESS:
+        if internal.MULTIPROCESS:
             run_comparisons_in_parallel(comparisons, verbose=self.args.verbose)
         else:
             run_comparisons(comparisons, verbose=self.args.verbose)
@@ -119,26 +242,7 @@ def fluxnet(self):
             if "fluxnet-bitwise-cmp" not in self.args.skip:
                 self.fluxnet_bitwise_cmp()
         else:
-            create_job_script(
-                project=self.config["project"],
-                config_path=self.args.config,
-                modules=self.config["modules"],
-                verbose=self.args.verbose,
-                skip_bitwise_cmp="fluxnet-bitwise-cmp" in self.args.skip,
-            )
-            submit_job()
-            print(
-                "The CABLE log file for each task is written to "
-                f"{SITE_LOG_DIR}/<task_name>_log.txt"
-            )
-            print(
-                "The CABLE standard output for each task is written to "
-                f"{SITE_TASKS_DIR}/<task_name>/out.txt"
-            )
-            print(
-                "The NetCDF output for each task is written to "
-                f"{SITE_OUTPUT_DIR}/<task_name>_out.nc"
-            )
+            self.fluxnet_submit_job()
 
     def spatial(self):
         """Endpoint for `benchcab spatial`."""
@@ -182,7 +286,7 @@ def main():
     This is required for setup.py entry_points
     """
 
-    app = Benchcab()
+    app = Benchcab(argv=sys.argv)
     app.main()
 
 

diff --git a/benchcab/benchtree.py b/benchcab/benchtree.py
@@ -1,4 +1,5 @@
-"""Contains functions for generating the directory structure used for `benchcab`."""
+"""A module containing functions for generating the directory structure used for `benchcab`."""
+
 from pathlib import Path
 import os
 import shutil
@@ -8,7 +9,7 @@
 
 
 def clean_directory_tree(root_dir=internal.CWD):
-    """Remove pre-existing directories in `root_dir`."""
+    """Remove pre-existing directories in current working directory."""
     src_dir = Path(root_dir, internal.SRC_DIR)
     if src_dir.exists():
         shutil.rmtree(src_dir)
-Original file line number
+Diff line change
@@ Expand Up / @@ -9,3 +9,4 @@ dependencies: @@
       - numpy
       - pytest-cov
       - pyyaml
+      - flatdict