CABLE-LSM · ccarouge · Feb 15, 2024 · Dec 18, 2023 · Dec 20, 2023 · Dec 20, 2023
diff --git a/benchcab/__init__.py b/benchcab/__init__.py
@@ -3,10 +3,14 @@
 
 import importlib.metadata
 
+from benchcab.utils import get_logger
+
 try:
     __version__ = importlib.metadata.version("benchcab")
 except importlib.metadata.PackageNotFoundError:
     __version__ = ""
-    print("Warning: unable to interrogate version string from installed distribution.")
+    get_logger().warn(
+        "unable to interrogate version string from installed distribution."
+    )
     # Note: cannot re-raise exception here as this will break pytest
     # when running without first installing the package
diff --git a/benchcab/benchcab.py b/benchcab/benchcab.py
@@ -23,6 +23,7 @@
 )
 from benchcab.internal import get_met_forcing_file_names
 from benchcab.model import Model
+from benchcab.utils import get_logger
 from benchcab.utils.fs import mkdir, next_path
 from benchcab.utils.pbs import render_job_script
 from benchcab.utils.repo import SVNRepo, create_repo
@@ -41,42 +42,56 @@
         benchcab_exe_path: Optional[Path],
         validate_env: bool = True,
     ) -> None:
+        """Constructor.
+
+        Parameters
+        ----------
+        benchcab_exe_path : Optional[Path]
+            Path to the executable.
+        validate_env : bool, optional
+            Validate the environment, by default True
+        """
         self.benchcab_exe_path = benchcab_exe_path
         self.validate_env = validate_env
 
         self._config: Optional[dict] = None
         self._models: list[Model] = []
         self.tasks: list[Task] = []  # initialise fluxsite tasks lazily
 
+        # Get the logger object
+        self.logger = get_logger()
+
     def _validate_environment(self, project: str, modules: list):
         """Performs checks on current user environment."""
         if not self.validate_env:
             return
 
         if "gadi.nci" not in internal.NODENAME:
-            print("Error: benchcab is currently implemented only on Gadi")
+            self.logger.error("benchcab is currently implemented only on Gadi")
             sys.exit(1)
 
         namelist_dir = Path(internal.CWD / internal.NAMELIST_DIR)
         if not namelist_dir.exists():
-            print(
-                "Error: cannot find 'namelists' directory in current working directory"
+            self.logger.error(
+                "Cannot find 'namelists' directory in current working directory"
             )
             sys.exit(1)
 
         required_groups = [project, "ks32", "hh5"]
         groups = [grp.getgrgid(gid).gr_name for gid in os.getgroups()]
         if not set(required_groups).issubset(groups):
-            print(
-                "Error: user does not have the required group permissions.",
-                "The required groups are:",
-                ", ".join(required_groups),
+            self.logger.error(
+                [
+                    "User does not have the required group permissions.",
+                    "The required groups are:",
+                    " ,".join(required_groups),
+                ]
             )
             sys.exit(1)
 
         for modname in modules:
             if not self.modules_handler.module_is_avail(modname):
-                print(f"Error: module ({modname}) is not available.")
+                self.logger.error(f"Module ({modname}) is not available.")
                 sys.exit(1)
 
         all_site_ids = set(
@@ -86,14 +101,16 @@
         for site_id in all_site_ids:
             paths = list(internal.MET_DIR.glob(f"{site_id}*"))
             if not paths:
-                print(
-                    f"Error: failed to infer met file for site id '{site_id}' in "
-                    f"{internal.MET_DIR}."
+                self.logger.error(
+                    [
+                        f"Failed to infer met file for site id '{site_id}' in "
+                        f"{internal.MET_DIR}."
+                    ]
                 )
                 sys.exit(1)
             if len(paths) > 1:
-                print(
-                    f"Error: multiple paths infered for site id: '{site_id}' in {internal.MET_DIR}."
+                self.logger.error(
+                    f"Multiple paths infered for site id: '{site_id}' in {internal.MET_DIR}."
                 )
                 sys.exit(1)
 
@@ -124,13 +141,11 @@
         )
         return self.tasks
 
-    def validate_config(self, config_path: str, verbose: bool):
+    def validate_config(self, config_path: str):
         """Endpoint for `benchcab validate_config`."""
         _ = self._get_config(config_path)
 
-    def fluxsite_submit_job(
-        self, config_path: str, verbose: bool, skip: list[str]
-    ) -> None:
+    def fluxsite_submit_job(self, config_path: str, skip: list[str]) -> None:
         """Submits the PBS job script step in the fluxsite test workflow."""
         config = self._get_config(config_path)
         self._validate_environment(project=config["project"], modules=config["modules"])
@@ -139,17 +154,17 @@
             raise RuntimeError(msg)
 
         job_script_path = Path(internal.QSUB_FNAME)
-        print(
-            "Creating PBS job script to run fluxsite tasks on compute "
-            f"nodes: {job_script_path}"
+        self.logger.info(
+            "Creating PBS job script to run fluxsite tasks on compute nodes"
         )
+
+        self.logger.info(f"job_script_path = {job_script_path}")
+
         with job_script_path.open("w", encoding="utf-8") as file:
             contents = render_job_script(
                 project=config["project"],
                 config_path=config_path,
                 modules=config["modules"],
-                pbs_config=config["fluxsite"]["pbs"],
-                verbose=verbose,
                 skip_bitwise_cmp="fluxsite-bitwise-cmp" in skip,
                 benchcab_path=str(self.benchcab_exe_path),
             )
@@ -159,94 +174,102 @@
             proc = self.subprocess_handler.run_cmd(
                 f"qsub {job_script_path}",
                 capture_output=True,
-                verbose=verbose,
             )
         except CalledProcessError as exc:
-            print("Error when submitting job to NCI queue")
-            print(exc.output)
+            self.logger.error("when submitting job to NCI queue, details to follow")
+            self.logger.error(exc.output)
             raise
 
-        print(
-            f"PBS job submitted: {proc.stdout.strip()}\n"
-            "The CABLE log file for each task is written to "
-            f"{internal.FLUXSITE_DIRS['LOG']}/<task_name>_log.txt\n"
-            "The CABLE standard output for each task is written to "
-            f"{internal.FLUXSITE_DIRS['TASKS']}/<task_name>/out.txt\n"
-            "The NetCDF output for each task is written to "
-            f"{internal.FLUXSITE_DIRS['OUTPUT']}/<task_name>_out.nc"
-        )
+        self.logger.info(f"PBS job submitted: {proc.stdout.strip()}")
+        self.logger.info("CABLE log file for each task is written to:")
+        self.logger.info(f"{internal.FLUXSITE_DIRS['LOG']}/<task_name>_log.txt")
+        self.logger.info("The CABLE standard output for each task is written to:")
+        self.logger.info(f"{internal.FLUXSITE_DIRS['TASKS']}/<task_name>/out.txt")
+        self.logger.info("The NetCDF output for each task is written to:")
+        self.logger.info(f"{internal.FLUXSITE_DIRS['OUTPUT']}/<task_name>_out.nc")
 
-    def checkout(self, config_path: str, verbose: bool):
+    def checkout(self, config_path: str):
         """Endpoint for `benchcab checkout`."""
         config = self._get_config(config_path)
         self._validate_environment(project=config["project"], modules=config["modules"])
 
-        mkdir(internal.SRC_DIR, exist_ok=True, verbose=True)
+        mkdir(internal.SRC_DIR, exist_ok=True)
 
-        print("Checking out repositories...")
+        self.logger.info("Checking out repositories...")
         rev_number_log = ""
         for model in self._get_models(config):
-            model.repo.checkout(verbose=verbose)
+            model.repo.checkout()
             rev_number_log += f"{model.name}: {model.repo.get_revision()}\n"
 
+        # TODO(Sean) we should archive revision numbers for CABLE-AUX
+        cable_aux_repo = SVNRepo(
+            svn_root=internal.CABLE_SVN_ROOT,
+            branch_path=internal.CABLE_AUX_RELATIVE_SVN_PATH,
+            path=internal.SRC_DIR / "CABLE-AUX",
+        )
+        cable_aux_repo.checkout(verbose=verbose)
+
         rev_number_log_path = next_path("rev_number-*.log")
-        print(f"Writing revision number info to {rev_number_log_path}")
+        self.logger.info(f"Writing revision number info to {rev_number_log_path}")
         with rev_number_log_path.open("w", encoding="utf-8") as file:
             file.write(rev_number_log)
 
-        print("")
-
-    def build(self, config_path: str, verbose: bool):
+    def build(self, config_path: str):
         """Endpoint for `benchcab build`."""
         config = self._get_config(config_path)
         self._validate_environment(project=config["project"], modules=config["modules"])
 
         for repo in self._get_models(config):
             if repo.build_script:
-                print(
-                    "Compiling CABLE using custom build script for "
-                    f"realisation {repo.name}..."
-                )
-                repo.custom_build(modules=config["modules"], verbose=verbose)
+
+                self.logger.info("Compiling CABLE using custom build script for")
+                self.logger.info(f"realisation {repo.name}")
+                repo.custom_build(modules=config["modules"])
+
             else:
                 build_mode = "with MPI" if internal.MPI else "serially"
-                print(f"Compiling CABLE {build_mode} for realisation {repo.name}...")
-                repo.pre_build(verbose=verbose)
-                repo.run_build(modules=config["modules"], verbose=verbose)
-                repo.post_build(verbose=verbose)
-            print(f"Successfully compiled CABLE for realisation {repo.name}")
-        print("")
-
-    def fluxsite_setup_work_directory(self, config_path: str, verbose: bool):
+                self.logger.info(
+                    f"Compiling CABLE {build_mode} for realisation {repo.name}..."
+                )
+                repo.pre_build()
+                repo.run_build(modules=config["modules"])
+                repo.post_build()
+            self.logger.info(f"Successfully compiled CABLE for realisation {repo.name}")
+
+    def fluxsite_setup_work_directory(self, config_path: str):
         """Endpoint for `benchcab fluxsite-setup-work-dir`."""
         config = self._get_config(config_path)
         self._validate_environment(project=config["project"], modules=config["modules"])
 
         tasks = self.tasks if self.tasks else self._initialise_tasks(config)
-        print("Setting up run directory tree for fluxsite tests...")
-        setup_fluxsite_directory_tree(verbose=verbose)
-        print("Setting up tasks...")
+        self.logger.info("Setting up run directory tree for fluxsite tests...")
+        setup_fluxsite_directory_tree()
+        self.logger.info("Setting up tasks...")
         for task in tasks:
-            task.setup_task(verbose=verbose)
-        print("Successfully setup fluxsite tasks")
-        print("")
+            task.setup_task()
+        self.logger.info("Successfully setup fluxsite tasks")
 
-    def fluxsite_run_tasks(self, config_path: str, verbose: bool):
+    def fluxsite_run_tasks(self, config_path: str):
         """Endpoint for `benchcab fluxsite-run-tasks`."""
         config = self._get_config(config_path)
         self._validate_environment(project=config["project"], modules=config["modules"])
 
         tasks = self.tasks if self.tasks else self._initialise_tasks(config)
         print("Running fluxsite tasks...")
-        if config["fluxsite"]["multiprocess"]:
-            ncpus = config["fluxsite"]["pbs"]["ncpus"]
+        try:
+            multiprocess = config["fluxsite"]["multiprocess"]
+        except KeyError:
+            multiprocess = internal.FLUXSITE_DEFAULT_MULTIPROCESS
+        if multiprocess:
+            ncpus = config.get("pbs", {}).get(
+                "ncpus", internal.FLUXSITE_DEFAULT_PBS["ncpus"]
+            )
             run_tasks_in_parallel(tasks, n_processes=ncpus, verbose=verbose)
         else:
-            run_tasks(tasks, verbose=verbose)
-        print("Successfully ran fluxsite tasks")
-        print("")
+            run_tasks(tasks)
+        self.logger.info("Successfully ran fluxsite tasks")
 
-    def fluxsite_bitwise_cmp(self, config_path: str, verbose: bool):
+    def fluxsite_bitwise_cmp(self, config_path: str):
         """Endpoint for `benchcab fluxsite-bitwise-cmp`."""
         config = self._get_config(config_path)
         self._validate_environment(project=config["project"], modules=config["modules"])
@@ -260,31 +283,36 @@
         comparisons = get_fluxsite_comparisons(tasks)
 
         print("Running comparison tasks...")
-        if config["fluxsite"]["multiprocess"]:
-            ncpus = config["fluxsite"]["pbs"]["ncpus"]
+        try:
+            multiprocess = config["fluxsite"]["multiprocess"]
+        except KeyError:
+            multiprocess = internal.FLUXSITE_DEFAULT_MULTIPROCESS
+        if multiprocess:
+            try:
+                ncpus = config["fluxsite"]["pbs"]["ncpus"]
+            except KeyError:
+                ncpus = internal.FLUXSITE_DEFAULT_PBS["ncpus"]
             run_comparisons_in_parallel(comparisons, n_processes=ncpus, verbose=verbose)
         else:
-            run_comparisons(comparisons, verbose=verbose)
-        print("Successfully ran comparison tasks")
+            run_comparisons(comparisons)
+        self.logger.info("Successfully ran comparison tasks")
 
-    def fluxsite(
-        self, config_path: str, no_submit: bool, verbose: bool, skip: list[str]
-    ):
+    def fluxsite(self, config_path: str, no_submit: bool, skip: list[str]):
         """Endpoint for `benchcab fluxsite`."""
-        self.checkout(config_path, verbose)
-        self.build(config_path, verbose)
-        self.fluxsite_setup_work_directory(config_path, verbose)
+        self.checkout(config_path)
+        self.build(config_path)
+        self.fluxsite_setup_work_directory(config_path)
         if no_submit:
-            self.fluxsite_run_tasks(config_path, verbose)
+            self.fluxsite_run_tasks(config_path)
             if "fluxsite-bitwise-cmp" not in skip:
-                self.fluxsite_bitwise_cmp(config_path, verbose)
+                self.fluxsite_bitwise_cmp(config_path)
         else:
-            self.fluxsite_submit_job(config_path, verbose, skip)
+            self.fluxsite_submit_job(config_path, skip)
 
-    def spatial(self, config_path: str, verbose: bool):
+    def spatial(self, config_path: str):
         """Endpoint for `benchcab spatial`."""
 
-    def run(self, config_path: str, no_submit: bool, verbose: bool, skip: list[str]):
+    def run(self, config_path: str, no_submit: bool, skip: list[str]):
         """Endpoint for `benchcab run`."""
-        self.fluxsite(config_path, no_submit, verbose, skip)
-        self.spatial(config_path, verbose)
+        self.fluxsite(config_path, no_submit, skip)
+        self.spatial(config_path)