From 04db77bb3bca135a8c446d266c29813e6a61874e Mon Sep 17 00:00:00 2001
From: George McCabe <23407799+georgemccabe@users.noreply.github.com>
Date: Wed, 28 Sep 2022 16:30:47 -0600
Subject: [PATCH] per #1842, add support for setting multiple jobs for
 StatAnalysis wrapper using STAT_ANALYSIS_JOB<n>, clean up indentation

---
 metplus/wrappers/stat_analysis_wrapper.py     | 183 ++++++++++--------
 .../StatAnalysis/StatAnalysis.conf            |   3 +-
 2 files changed, 103 insertions(+), 83 deletions(-)
diff --git a/metplus/wrappers/stat_analysis_wrapper.py b/metplus/wrappers/stat_analysis_wrapper.py
index de313014c..003efbd2b 100755
--- a/metplus/wrappers/stat_analysis_wrapper.py
+++ b/metplus/wrappers/stat_analysis_wrapper.py
@@ -67,51 +67,53 @@ class StatAnalysisWrapper(CommandBuilder):
         'METPLUS_HSS_EC_VALUE',
     ]
 
-    field_lists = ['FCST_VAR_LIST',
-                   'OBS_VAR_LIST',
-                   'FCST_UNITS_LIST',
-                   'OBS_UNITS_LIST',
-                   'FCST_THRESH_LIST',
-                   'OBS_THRESH_LIST',
-                   'FCST_LEVEL_LIST',
-                   'OBS_LEVEL_LIST',
-                   ]
-
-    format_lists = ['FCST_VALID_HOUR_LIST',
-                    'FCST_INIT_HOUR_LIST',
-                    'OBS_VALID_HOUR_LIST',
-                    'OBS_INIT_HOUR_LIST',
-                    'FCST_LEAD_LIST',
-                    'OBS_LEAD_LIST',
-                    ]
+    field_lists = [
+        'FCST_VAR_LIST',
+        'OBS_VAR_LIST',
+        'FCST_UNITS_LIST',
+        'OBS_UNITS_LIST',
+        'FCST_THRESH_LIST',
+        'OBS_THRESH_LIST',
+        'FCST_LEVEL_LIST',
+        'OBS_LEVEL_LIST',
+    ]
+
+    format_lists = [
+        'FCST_VALID_HOUR_LIST',
+        'FCST_INIT_HOUR_LIST',
+        'OBS_VALID_HOUR_LIST',
+        'OBS_INIT_HOUR_LIST',
+        'FCST_LEAD_LIST',
+        'OBS_LEAD_LIST',
+    ]
 
-    expected_config_lists = ['MODEL_LIST',
-                             'DESC_LIST',
-                             'VX_MASK_LIST',
-                             'INTERP_MTHD_LIST',
-                             'INTERP_PNTS_LIST',
-                             'COV_THRESH_LIST',
-                             'ALPHA_LIST',
-                             'LINE_TYPE_LIST',
-                             ] + format_lists + field_lists
-
-    force_group_for_make_plots_lists = ['MODEL_LIST',
-                                        'FCST_LEAD_LIST',
-                                        'OBS_LEAD_LIST',
-                                        'FCST_LEVEL_LIST',
-                                        'OBS_LEVEL_LIST',
-                                        'FCST_THRESH_LIST',
-                                        'OBS_THRESH_LIST',
-                                        'FCST_UNITS_LIST',
-                                        'OBS_UNITS_LIST',
-                                       ]
+    expected_config_lists = [
+        'MODEL_LIST',
+        'DESC_LIST',
+        'VX_MASK_LIST',
+        'INTERP_MTHD_LIST',
+        'INTERP_PNTS_LIST',
+        'COV_THRESH_LIST',
+        'ALPHA_LIST',
+        'LINE_TYPE_LIST',
+    ] + format_lists + field_lists
+
+    force_group_for_make_plots_lists = [
+        'MODEL_LIST',
+        'FCST_LEAD_LIST',
+        'OBS_LEAD_LIST',
+        'FCST_LEVEL_LIST',
+        'OBS_LEVEL_LIST',
+        'FCST_THRESH_LIST',
+        'OBS_THRESH_LIST',
+        'FCST_UNITS_LIST',
+        'OBS_UNITS_LIST',
+    ]
 
     list_categories = ['GROUP_LIST_ITEMS', 'LOOP_LIST_ITEMS']
-    list_categories_make_plots = ['GROUP_LIST_ITEMS_MAKE_PLOTS', 'LOOP_LIST_ITEMS_MAKE_PLOTS']
-    # what is the used for? these are not formatted later
-    format_later_list = [
-        'MODEL_LIST', 'FCST_VALID_HOUR_LIST', 'OBS_VALID_HOUR_LIST',
-        'FCST_INIT_HOUR_LIST', 'OBS_INIT_HOUR_LIST'
+    list_categories_make_plots = [
+        'GROUP_LIST_ITEMS_MAKE_PLOTS',
+        'LOOP_LIST_ITEMS_MAKE_PLOTS'
     ]
 
     def __init__(self, config, instance=None):
@@ -141,7 +143,7 @@ def get_command(self):
             cmd += f" -out {self.c_dict['OUTPUT_FILENAME']}"
 
         return cmd
-     
+
     def create_c_dict(self):
         """! Create a data structure (dictionary) that contains all the
              values set in the configuration files that are common for 
@@ -158,7 +160,6 @@ def create_c_dict(self):
             self.config.getstr('config', 'LOG_STAT_ANALYSIS_VERBOSITY',
                                c_dict['VERBOSITY'])
         )
-        c_dict['LOOP_ORDER'] = self.config.getstr('config', 'LOOP_ORDER')
 
         # STATAnalysis config file is optional, so
         # don't provide wrapped config file name as default value
@@ -186,10 +187,22 @@ def create_c_dict(self):
             c_dict['DATE_BEG'] = start_dt.strftime('%Y%m%d')
             c_dict['DATE_END'] = end_dt.strftime('%Y%m%d')
 
-        for job_conf in ['JOB_NAME', 'JOB_ARGS']:
-            c_dict[job_conf] = self.config.getstr('config',
-                                                  f'STAT_ANALYSIS_{job_conf}',
-                                                  '')
+        # read jobs from STAT_ANALYSIS_JOB<n> or legacy JOB_NAME/ARGS if unset
+        c_dict['JOBS'] = []
+        job_indices = list(
+            find_indices_in_config_section(r'STAT_ANALYSIS_JOB(\d+)$',
+                                           self.config,
+                                           index_index=1).keys()
+        )
+
+        if job_indices:
+            for j_id in job_indices:
+                job = self.config.getraw('config', f'STAT_ANALYSIS_JOB{j_id}')
+                c_dict['JOBS'].append(job)
+        else:
+            job_name = self.config.getraw('config', 'STAT_ANALYSIS_JOB_NAME')
+            job_args = self.config.getraw('config', 'STAT_ANALYSIS_JOB_ARGS')
+            c_dict['JOBS'].append(f'-job {job_name} {job_args}')
 
         # read in all lists except field lists, which will be read in afterwards and checked
         all_lists_to_read = self.expected_config_lists + self.list_categories
@@ -249,17 +262,25 @@ def create_c_dict(self):
     def c_dict_error_check(self, c_dict):
 
         if not c_dict.get('CONFIG_FILE'):
-            self.logger.info("STAT_ANALYSIS_CONFIG_FILE not set. Passing job arguments to "
-                             "stat_analysis directly on the command line. This will bypass "
-                             "any filtering done unless you add the arguments to "
-                             "STAT_ANALYSIS_JOB_ARGS")
+            if len(c_dict['JOBS']) > 1:
+                self.log_error(
+                    'Only 1 job can be set with STAT_ANALYSIS_JOB<n> if '
+                    'STAT_ANALYSIS_CONFIG_FILE is not set.'
+                )
+            else:
+                self.logger.info("STAT_ANALYSIS_CONFIG_FILE not set. Passing "
+                                 "job arguments to stat_analysis directly on "
+                                 "the command line. This will bypass "
+                                  "any filtering done unless you add the "
+                                 "arguments to STAT_ANALYSIS_JOBS<n>")
 
         if not c_dict['OUTPUT_DIR']:
             self.log_error("Must set STAT_ANALYSIS_OUTPUT_DIR")
 
-        for job_conf in ['JOB_NAME', 'JOB_ARGS']:
-            if not c_dict[job_conf]:
-                self.log_error(f"Must set STAT_ANALYSIS_{job_conf} to run StatAnalysis")
+        if not c_dict['JOBS']:
+            self.log_error(
+                "Must set at least one job with STAT_ANALYSIS_JOB<n>"
+            )
 
         for conf_list in self.list_categories:
             if not c_dict[conf_list]:
@@ -299,8 +320,6 @@ def c_dict_error_check(self, c_dict):
             if len(c_dict['MODEL_LIST']) > 8:
                 self.log_error("Number of models for plotting limited to 8.")
 
-#        self.check_dump_row_templates_for_plotting()
-
         # set forMakePlots to False to begin. When gathering settings to
         # send to MakePlots wrapper, this will be set to True
         self.forMakePlots = False
@@ -383,8 +402,9 @@ def check_MakePlots_config(self, c_dict):
                                   +"StatAnalysis followed by MakePlots.")
 
         # if MakePlots is run but -dump_row is not found in the job args, error
-        if '-dump_row' not in c_dict['JOB_ARGS']:
-            self.log_error("Must include -dump_row in STAT_ANALYSIS_JOB_ARGS if running MakePlots")
+        if not any([item for item in c_dict['JOBS'] if '-dump_row' in item]):
+            self.log_error("Must include -dump_row in at least one "
+                           "STAT_ANALYSIS_JOB<n> if running MakePlots")
 
     def list_to_str(self, list_of_values, add_quotes=True):
         """! Turn a list of values into a single string so it can be 
@@ -436,7 +456,7 @@ def set_lists_loop_or_group(self, c_dict):
         for missing_config in missing_config_list:
 
             # if running MakePlots
-            if (c_dict['LOOP_ORDER'] == 'processes' and self.runMakePlots):
+            if self.runMakePlots:
 
                 # if LINE_TYPE_LIST is missing, add it to group list
                 if missing_config == 'LINE_TYPE_LIST':
@@ -1365,7 +1385,7 @@ def process_job_args(self, job_type, job, model_info,
         output_file = os.path.join(self.c_dict['OUTPUT_DIR'],
                                    output_filename)
 
-        # substitute output filename in JOB_ARGS line
+        # substitute output filename in JOBS line
         job = job.replace(f'[{job_type}_file]', output_file)
         job = job.replace(f'[{job_type}_filename]', output_file)
 
@@ -1400,11 +1420,10 @@ def get_runtime_settings_dict_list(self):
             if model_info is None:
                 return None
 
-            runtime_settings_dict['JOB'] = self.get_job_info(model_info,
-                                                             runtime_settings_dict,
-                                                             loop_lists,
-                                                             group_lists,
-                                                             )
+            runtime_settings_dict['JOBS'] = (
+                self.get_job_info(model_info, runtime_settings_dict,
+                                  loop_lists, group_lists)
+            )
 
             # get -out argument if set
             if self.c_dict['OUTPUT_TEMPLATE']:
@@ -1469,7 +1488,6 @@ def get_runtime_settings(self, c_dict):
 
         for loop_list in loop_lists:
             # if not a threshold list, add quotes around each value in list
-            # if loop_list not in self.format_later_list and 'THRESH' not in loop_list:
             if 'THRESH' not in loop_list:
                 c_dict[loop_list] = [f'"{value}"' for value in c_dict[loop_list]]
 
@@ -1700,18 +1718,21 @@ def get_job_info(self, model_info, runtime_settings_dict, loop_lists, group_list
              @params runtime_settings_dict dictionary containing all settings used in next run
              @returns string containing job information to pass to StatAnalysis config file
         """
-        job = '-job ' + self.c_dict['JOB_NAME'] + ' ' + self.c_dict['JOB_ARGS']
-        for job_type in ['dump_row', 'out_stat']:
-            if f"-{job_type}" in self.c_dict['JOB_ARGS']:
-                job = self.process_job_args(job_type,
-                                            job,
-                                            model_info,
-                                            loop_lists,
-                                            group_lists,
-                                            runtime_settings_dict,
-                                            )
-
-        return job
+        jobs = []
+        for job in self.c_dict['JOBS']:
+            for job_type in ['dump_row', 'out_stat']:
+                if f"-{job_type}" in job:
+                    job = self.process_job_args(job_type,
+                                                job,
+                                                model_info,
+                                                loop_lists,
+                                                group_lists,
+                                                runtime_settings_dict,
+                                                )
+
+            jobs.append(job)
+
+        return jobs
 
     def run_stat_analysis(self):
         """! This runs stat_analysis over a period of valid
@@ -1811,7 +1832,7 @@ def run_stat_analysis_job(self, runtime_settings_dict_list):
                 self.env_var_dict[f'METPLUS_{mp_item}'] = value
 
             value = f'jobs = ["'
-            value += runtime_settings_dict.get('JOB', '')
+            value += '","'.join(runtime_settings_dict['JOBS'])
             value += '"];'
             self.env_var_dict[f'METPLUS_JOBS'] = value
 
@@ -1821,7 +1842,7 @@ def run_stat_analysis_job(self, runtime_settings_dict_list):
             # set lookin dir
             self.logger.debug(f"Setting -lookin dir to {runtime_settings_dict['LOOKIN_DIR']}")
             self.lookindir = runtime_settings_dict['LOOKIN_DIR']
-            self.job_args = runtime_settings_dict['JOB']
+            self.job_args = runtime_settings_dict['JOBS'][0]
 
             # set -out file path if requested, value will be set to None if not
             self.c_dict['OUTPUT_FILENAME'] = (
diff --git a/parm/use_cases/met_tool_wrapper/StatAnalysis/StatAnalysis.conf b/parm/use_cases/met_tool_wrapper/StatAnalysis/StatAnalysis.conf
index 67f5f0b4b..cbcc88d7a 100644
--- a/parm/use_cases/met_tool_wrapper/StatAnalysis/StatAnalysis.conf
+++ b/parm/use_cases/met_tool_wrapper/StatAnalysis/StatAnalysis.conf
@@ -61,8 +61,7 @@ STAT_ANALYSIS_CONFIG_FILE = {PARM_BASE}/met_config/STATAnalysisConfig_wrapped
 
 #STAT_ANALYSIS_HSS_EC_VALUE =
 
-STAT_ANALYSIS_JOB_NAME = filter
-STAT_ANALYSIS_JOB_ARGS = -dump_row [dump_row_file]
+STAT_ANALYSIS_JOB1 = -job filter -dump_row [dump_row_file]
 
 MODEL_LIST = {MODEL1}
 DESC_LIST =