diff --git a/.github/jobs/docker_setup.sh b/.github/jobs/docker_setup.sh index ad20f64ec..47116c023 100755 --- a/.github/jobs/docker_setup.sh +++ b/.github/jobs/docker_setup.sh @@ -67,7 +67,7 @@ fi # skip docker push if credentials are not set -if [ -z ${DOCKER_USERNAME+x} ] || [ -z ${DOCKER_PASSWORD+x} ]; then +if [ -z "${DOCKER_USERNAME}" ] || [ -z "${DOCKER_PASSWORD}" ]; then echo "DockerHub credentials not set. Skipping docker push" exit 0 fi diff --git a/.github/parm/use_case_groups.json b/.github/parm/use_case_groups.json index 2f6814059..056013dd1 100644 --- a/.github/parm/use_case_groups.json +++ b/.github/parm/use_case_groups.json @@ -56,7 +56,7 @@ }, { "category": "data_assimilation", - "index_list": "0", + "index_list": "0-1", "run": false }, { @@ -218,5 +218,10 @@ "category": "tc_and_extra_tc", "index_list": "3-5", "run": false + }, + { + "category": "unstructured_grids", + "index_list": "0", + "run": false } ] diff --git a/docs/_static/data_assimilation-StatAnalysis_fcstGFS_HofX_obsIODAv2_PyEmbed.png b/docs/_static/data_assimilation-StatAnalysis_fcstGFS_HofX_obsIODAv2_PyEmbed.png new file mode 100644 index 000000000..633474d86 Binary files /dev/null and b/docs/_static/data_assimilation-StatAnalysis_fcstGFS_HofX_obsIODAv2_PyEmbed.png differ diff --git a/docs/_static/unstructured_grids-StatAnalysis_fcstLFRIC_UGRID_obsASCII_PyEmbed.png b/docs/_static/unstructured_grids-StatAnalysis_fcstLFRIC_UGRID_obsASCII_PyEmbed.png new file mode 100644 index 000000000..4475eeac0 Binary files /dev/null and b/docs/_static/unstructured_grids-StatAnalysis_fcstLFRIC_UGRID_obsASCII_PyEmbed.png differ diff --git a/docs/use_cases/model_applications/data_assimilation/StatAnalysis_fcstGFS_HofX_obsIODAv2_PyEmbed.py b/docs/use_cases/model_applications/data_assimilation/StatAnalysis_fcstGFS_HofX_obsIODAv2_PyEmbed.py new file mode 100644 index 000000000..dfb8a7d74 --- /dev/null +++ b/docs/use_cases/model_applications/data_assimilation/StatAnalysis_fcstGFS_HofX_obsIODAv2_PyEmbed.py @@ -0,0 +1,155 @@ +""" +StatAnalysis: IODAv2 +=========================================================================== + +model_applications/data_assimilation/StatAnalysis_fcstGFS_HofX_obsIODAv2_PyEmbed.conf + +""" + +########################################### +# Scientific Objective +# -------------------- +# +# This use case demonstrates the Stat-Analysis tool and ingestion of HofX NetCDF files +# that have been output from the Joint Effort for Data assimilation Integration (JEDI) +# data assimilation system. JEDI uses IODA version 2 formatted files, which are NetCDF files +# with certain requirements of variables and naming conventions. These files +# hold observations to be assimilated into forecasts, in this case taken from the JEDI software +# test data, which contained a small number of Global observation-forecast pairs +# derived from the hofx application. +# +# UFO is a component of HofX, which maps the background forecast to observation space +# to form O minus B pairs. The HofX application of JEDI takes the input IODAv2 files and +# adds an additional variable which is the forecast value as interpolated to the +# observation location. These HofX files are used as input to form Matched Pair (MPR) +# formatted lists via Python embedding. In this case, Stat-Analysis then performs an aggregate_stat +# job and outputs statistics in an ascii file. +# +# This use case adopts the IODAv2 formatted NetCDF files, which replace the previous variable +# formatting scheme to make use of NetCDF groups. + +############################################################################## +# Datasets +# -------- +# +# +# | **Data source:** JEDI HofX output files in IODAv2 format +# +# | **Location:** All of the input data required for this use case can be found in the met_test sample data tarball. Click here to the METplus releases page and download sample data for the appropriate release: https://github.com/dtcenter/METplus/releases +# | The tarball should be unpacked into the directory that you will set the value of INPUT_BASE. See `Running METplus`_ section for more information. +# | + +############################################################################## +# METplus Components +# ------------------ +# +# This use case utilizes the METplus StatAnalysis wrapper to search for +# files that are valid for the given case and generate a command to run +# the MET tool stat_analysis. + +############################################################################## +# METplus Workflow +# ---------------- +# +# StatAnalysis is the only tool called in this example. It processes the following +# run times: +# +# | **Valid:** 2018-04-15_00Z +# | **Forecast lead:** 0 hour +# | + +############################################################################## +# METplus Configuration +# --------------------- +# +# METplus first loads all of the configuration files found in parm/metplus_config, +# then it loads any configuration files passed to METplus via the command line +# with the -c option, i.e. -c parm/use_cases/model_applications/data_assimilation/StatAnalysis_fcstGFS_HofX_obsIODAv2_PyEmbed.conf +# +# .. highlight:: bash +# .. literalinclude:: ../../../../parm/use_cases/model_applications/data_assimilation/StatAnalysis_fcstGFS_HofX_obsIODAv2_PyEmbed.conf + +############################################################################## +# MET Configuration +# ----------------- +# +# METplus sets environment variables based on user settings in the METplus configuration file. +# See :ref:`How METplus controls MET config file settings` for more details. +# +# **YOU SHOULD NOT SET ANY OF THESE ENVIRONMENT VARIABLES YOURSELF! THEY WILL BE OVERWRITTEN BY METPLUS WHEN IT CALLS THE MET TOOLS!** +# +# If there is a setting in the MET configuration file that is currently not supported by METplus you'd like to control, please refer to: +# :ref:`Overriding Unsupported MET config file settings` +# +# .. note:: See the :ref:`StatAnalysis MET Configuration` section of the User's Guide for more information on the environment variables used in the file below: +# +# .. highlight:: bash +# .. literalinclude:: ../../../../parm/met_config/STATAnalysisConfig_wrapped + +############################################################################## +# Python Embedding +# ---------------- +# +# This use case uses a Python embedding script to read input data +# +# parm/use_cases/model_applications/data_assimilation/StatAnalysis_fcstGFS_HofX_obsIODAv2_PyEmbed/read_iodav2_mpr.py +# +# .. highlight:: python +# .. literalinclude:: ../../../../parm/use_cases/model_applications/data_assimilation/StatAnalysis_fcstGFS_HofX_obsIODAv2_PyEmbed/read_iodav2_mpr.py +# + +############################################################################## +# Running METplus +# --------------- +# +# It is recommended to run this use case by: +# +# Passing in StatAnalysis_fcstGFS_HofX_obsIODAv2_PyEmbed.conf then a user-specific system configuration file:: +# +# run_metplus.py -c /path/to/StatAnalysis_fcstGFS_HofX_obsIODAv2_PyEmbed.conf -c /path/to/user_system.conf +# +# The following METplus configuration variables must be set correctly to run this example.: +# +# * **INPUT_BASE** - Path to directory where sample data tarballs are unpacked (See Datasets section to obtain tarballs). +# * **OUTPUT_BASE** - Path where METplus output will be written. This must be in a location where you have write permissions +# * **MET_INSTALL_DIR** - Path to location where MET is installed locally +# +# Example User Configuration File:: +# +# [dir] +# INPUT_BASE = /path/to/sample/input/data +# OUTPUT_BASE = /path/to/output/dir +# MET_INSTALL_DIR = /path/to/met-X.Y +# +# **NOTE:** All of these items must be found under the [dir] section. +# + + +############################################################################## +# Expected Output +# --------------- +# +# A successful run will output the following both to the screen and to the logfile:: +# +# INFO: METplus has successfully finished running. +# +# Refer to the value set for **OUTPUT_BASE** to find where the output data was generated. +# Output for this use case will be found in StatAnalysis_IODAv2 (relative to **OUTPUT_BASE**) +# and will contain the following file: +# +# * dump.out + +############################################################################## +# Keywords +# -------- +# +# .. note:: +# +# * StatAnalysisToolUseCase +# * PythonEmbeddingFileUseCase +# * DataAssimilationUseCase +# +# Navigate to the :ref:`quick-search` page to discover other similar use cases. +# +# +# sphinx_gallery_thumbnail_path = '_static/data_assimilation-StatAnalysis_fcstGFS_HofX_obsIODAv2_PyEmbed.png' diff --git a/docs/use_cases/model_applications/unstructured_grids/README.rst b/docs/use_cases/model_applications/unstructured_grids/README.rst new file mode 100644 index 000000000..7b8d40d0f --- /dev/null +++ b/docs/use_cases/model_applications/unstructured_grids/README.rst @@ -0,0 +1,4 @@ +Unstructured Grids +------------------ + +Unstructured grids used by models for numerical weather prediction. diff --git a/docs/use_cases/model_applications/unstructured_grids/StatAnalysis_fcstLFRIC_UGRID_obsASCII_PyEmbed.py b/docs/use_cases/model_applications/unstructured_grids/StatAnalysis_fcstLFRIC_UGRID_obsASCII_PyEmbed.py new file mode 100644 index 000000000..dd52057d2 --- /dev/null +++ b/docs/use_cases/model_applications/unstructured_grids/StatAnalysis_fcstLFRIC_UGRID_obsASCII_PyEmbed.py @@ -0,0 +1,160 @@ +""" +StatAnalysis: UGRID +=========================================================================== + +model_applications/unstructured_grids/StatAnalysis_fcstLFRIC_UGRID_obsASCII_PyEmbed.conf + +""" + +########################################### +# Scientific Objective +# -------------------- +# +# This use case demonstrates the use of python embedding to ingest and perform +# verification on an unstructured grid. This foregoes the need to interpolate +# to a regular grid as a step in the verification process, thereby avoiding +# any incurred interpolation error in the process. +# +# In particular, this use case ingests a UK MET Office LFRic forecast file in +# NetCDF format, which resides in the UGRID format of the cubed-sphere. The python +# library Iris was developed to perform analysis on various UGRID formats, and is +# employed here to ingest the file as well as perform direct interpolation +# from the native forecast grid to observation locations, thereby forming matched +# pairs to pass to stat_analysis. In order to perform the interpolation using a +# nearest-neighbors approach, the geovista python package is also used to form a +# KD tree to be used in identifying the interpolation points to be used. This +# package is located at https://github.com/bjlittle/geovista/ and can be installed +# from a development version. It is also required to install the pyvista python +# package. ASCII files containing observations are also ingested. +# +# The python embedding script itself performs the interpolation in time, and +# for this use case thins the observation data in order to reduce the run time. +# It is also noted that the observations for this use case were fabricated and +# correlated observation-forecast pairs are not expected. +# + +############################################################################## +# Datasets +# -------- +# +# +# | **Data source:** UK MET Office LFRic forecast files in UGRID NetCDF format and observations in ASCII format +# +# | **Location:** All of the input data required for this use case can be found in the met_test sample data tarball. Click here to the METplus releases page and download sample data for the appropriate release: https://github.com/dtcenter/METplus/releases +# | The tarball should be unpacked into the directory that you will set the value of INPUT_BASE. See `Running METplus`_ section for more information. +# | + +############################################################################## +# METplus Components +# ------------------ +# +# This use case utilizes the METplus StatAnalysis wrapper to search for +# files that are valid for the given case and generate a command to run +# the MET tool stat_analysis. + +############################################################################## +# METplus Workflow +# ---------------- +# +# StatAnalysis is the only tool called in this example. It processes the following +# run times: +# +# | **Valid:** 2021-05-05_00Z +# | **Forecast lead:** 12 hour +# | + +############################################################################## +# METplus Configuration +# --------------------- +# +# METplus first loads all of the configuration files found in parm/metplus_config, +# then it loads any configuration files passed to METplus via the command line +# with the -c option, i.e. -c parm/use_cases/model_applications/unstructured_grids/StatAnalysis_fcstLFRIC_UGRID_obsASCII_PyEmbed.conf +# +# .. highlight:: bash +# .. literalinclude:: ../../../../parm/use_cases/model_applications/unstructured_grids/StatAnalysis_fcstLFRIC_UGRID_obsASCII_PyEmbed.conf + +############################################################################## +# MET Configuration +# ----------------- +# +# METplus sets environment variables based on user settings in the METplus configuration file. +# See :ref:`How METplus controls MET config file settings` for more details. +# +# **YOU SHOULD NOT SET ANY OF THESE ENVIRONMENT VARIABLES YOURSELF! THEY WILL BE OVERWRITTEN BY METPLUS WHEN IT CALLS THE MET TOOLS!** +# +# If there is a setting in the MET configuration file that is currently not supported by METplus you'd like to control, please refer to: +# :ref:`Overriding Unsupported MET config file settings` +# +# .. note:: See the :ref:`StatAnalysis MET Configuration` section of the User's Guide for more information on the environment variables used in the file below: +# +# .. highlight:: bash +# .. literalinclude:: ../../../../parm/met_config/STATAnalysisConfig_wrapped + +############################################################################## +# Python Embedding +# ---------------- +# +# This use case uses a Python embedding script to read input data +# +# parm/use_cases/model_applications/unstructured_grids/StatAnalysis_fcstLFRIC_UGRID_obsASCII_PyEmbed/ugrid_lfric_mpr.py +# +# .. highlight:: python +# .. literalinclude:: ../../../../parm/use_cases/model_applications/unstructured_grids/StatAnalysis_fcstLFRIC_UGRID_obsASCII_PyEmbed/ugrid_lfric_mpr.py +# + +############################################################################## +# Running METplus +# --------------- +# +# It is recommended to run this use case by: +# +# Passing in StatAnalysis_fcstLFRIC_UGRID_obsASCII_PyEmbed.conf then a user-specific system configuration file:: +# +# run_metplus.py -c /path/to/StatAnalysis_fcstLFRIC_UGRID_obsASCII_PyEmbed.conf -c /path/to/user_system.conf +# +# The following METplus configuration variables must be set correctly to run this example.: +# +# * **INPUT_BASE** - Path to directory where sample data tarballs are unpacked (See Datasets section to obtain tarballs). +# * **OUTPUT_BASE** - Path where METplus output will be written. This must be in a location where you have write permissions +# * **MET_INSTALL_DIR** - Path to location where MET is installed locally +# +# Example User Configuration File:: +# +# [dir] +# INPUT_BASE = /path/to/sample/input/data +# OUTPUT_BASE = /path/to/output/dir +# MET_INSTALL_DIR = /path/to/met-X.Y +# +# **NOTE:** All of these items must be found under the [dir] section. +# + + +############################################################################## +# Expected Output +# --------------- +# +# A successful run will output the following both to the screen and to the logfile:: +# +# INFO: METplus has successfully finished running. +# +# Refer to the value set for **OUTPUT_BASE** to find where the output data was generated. +# Output for this use case will be found in StatAnalysis_UGRID (relative to **OUTPUT_BASE**) +# and will contain the following file: +# +# * dump.out + +############################################################################## +# Keywords +# -------- +# +# .. note:: +# +# * StatAnalysisToolUseCase +# * PythonEmbeddingFileUseCase +# * UnstructureGridsUseCase +# +# Navigate to the :ref:`quick-search` page to discover other similar use cases. +# +# +# sphinx_gallery_thumbnail_path = '_static/unstructured_grids-StatAnalysis_fcstLFRIC_UGRID_obsASCII_PyEmbed.png' diff --git a/internal/tests/pytests/wrappers/command_builder/test_command_builder.py b/internal/tests/pytests/wrappers/command_builder/test_command_builder.py index c62609b98..31a2652fd 100644 --- a/internal/tests/pytests/wrappers/command_builder/test_command_builder.py +++ b/internal/tests/pytests/wrappers/command_builder/test_command_builder.py @@ -6,7 +6,7 @@ import datetime from metplus.wrappers.command_builder import CommandBuilder -from metplus.util import ti_calculate +from metplus.util import ti_calculate, add_field_info_to_time_info def get_data_dir(config): @@ -27,9 +27,9 @@ def test_find_data_no_dated(metplus_config, data_type): config = metplus_config pcw = CommandBuilder(config) - v = {} - v['fcst_level'] = "6" - v['obs_level'] = "6" + var_info = {} + var_info['fcst_level'] = "6" + var_info['obs_level'] = "6" task_info = {} task_info['valid'] = datetime.datetime.strptime("201802010000",'%Y%m%d%H%M') task_info['lead'] = 0 @@ -39,7 +39,8 @@ def test_find_data_no_dated(metplus_config, data_type): pcw.c_dict[f'{data_type}FILE_WINDOW_END'] = 3600 pcw.c_dict[f'{data_type}INPUT_DIR'] = get_data_dir(pcw.config) pcw.c_dict[f'{data_type}INPUT_TEMPLATE'] = "{valid?fmt=%Y%m%d}_{valid?fmt=%H%M}" - obs_file = pcw.find_data(time_info, v, data_type) + add_field_info_to_time_info(time_info, var_info) + obs_file = pcw.find_data(time_info, data_type) assert obs_file == pcw.c_dict[f'{data_type}INPUT_DIR']+'/20180201_0045' @@ -67,7 +68,7 @@ def test_find_data_not_a_path(metplus_config, data_type): pcw.c_dict[f'{data_type}FILE_WINDOW_END'] = 0 pcw.c_dict[f'{data_type}INPUT_DIR'] = '' pcw.c_dict[f'{data_type}INPUT_TEMPLATE'] = 'G003' - obs_file = pcw.find_data(time_info, var_info=None, data_type=data_type) + obs_file = pcw.find_data(time_info, data_type=data_type) assert obs_file == 'G003' @@ -76,8 +77,8 @@ def test_find_obs_no_dated(metplus_config): config = metplus_config pcw = CommandBuilder(config) - v = {} - v['obs_level'] = "6" + var_info = {} + var_info['obs_level'] = "6" task_info = {} task_info['valid'] = datetime.datetime.strptime("201802010000", '%Y%m%d%H%M') task_info['lead'] = 0 @@ -87,7 +88,8 @@ def test_find_obs_no_dated(metplus_config): pcw.c_dict['OBS_FILE_WINDOW_END'] = 3600 pcw.c_dict['OBS_INPUT_DIR'] = get_data_dir(pcw.config) pcw.c_dict['OBS_INPUT_TEMPLATE'] = "{valid?fmt=%Y%m%d}_{valid?fmt=%H%M}" - obs_file = pcw.find_obs(time_info, v) + add_field_info_to_time_info(time_info, var_info) + obs_file = pcw.find_obs(time_info) assert obs_file == pcw.c_dict['OBS_INPUT_DIR'] + '/20180201_0045' @@ -96,8 +98,8 @@ def test_find_obs_dated(metplus_config): config = metplus_config pcw = CommandBuilder(config) - v = {} - v['obs_level'] = "6" + var_info = {} + var_info['obs_level'] = "6" task_info = {} task_info['valid'] = datetime.datetime.strptime("201802010000", '%Y%m%d%H%M') task_info['lead'] = 0 @@ -107,7 +109,8 @@ def test_find_obs_dated(metplus_config): pcw.c_dict['OBS_FILE_WINDOW_END'] = 3600 pcw.c_dict['OBS_INPUT_DIR'] = get_data_dir(pcw.config) pcw.c_dict['OBS_INPUT_TEMPLATE'] = '{valid?fmt=%Y%m%d}/{valid?fmt=%Y%m%d}_{valid?fmt=%H%M}' - obs_file = pcw.find_obs(time_info, v) + add_field_info_to_time_info(time_info, var_info) + obs_file = pcw.find_obs(time_info) assert obs_file == pcw.c_dict['OBS_INPUT_DIR']+'/20180201/20180201_0013' @@ -126,8 +129,8 @@ def test_find_obs_offset(metplus_config, offsets, expected_file, offset_seconds) config = metplus_config pcw = CommandBuilder(config) - v = {} - v['obs_level'] = "6" + var_info = {} + var_info['obs_level'] = "6" task_info = {} task_info['valid'] = datetime.datetime.strptime("2020020112", '%Y%m%d%H') task_info['lead'] = 0 @@ -136,7 +139,8 @@ def test_find_obs_offset(metplus_config, offsets, expected_file, offset_seconds) pcw.c_dict['OFFSETS'] = offsets pcw.c_dict['OBS_INPUT_DIR'] = get_data_dir(pcw.config) pcw.c_dict['OBS_INPUT_TEMPLATE'] = "{da_init?fmt=%2H}z.prepbufr.tm{offset?fmt=%2H}.{da_init?fmt=%Y%m%d}" - obs_file, time_info = pcw.find_obs_offset(time_info, v) + add_field_info_to_time_info(time_info, var_info) + obs_file, time_info = pcw.find_obs_offset(time_info) print(f"OBSFILE: {obs_file}") print(f"EXPECTED FILE: {expected_file}") @@ -153,8 +157,8 @@ def test_find_obs_dated_previous_day(metplus_config): config = metplus_config pcw = CommandBuilder(config) - v = {} - v['obs_level'] = "6" + var_info = {} + var_info['obs_level'] = "6" task_info = {} task_info['valid'] = datetime.datetime.strptime("201802010000", '%Y%m%d%H%M') task_info['lead'] = 0 @@ -164,7 +168,8 @@ def test_find_obs_dated_previous_day(metplus_config): pcw.c_dict['OBS_INPUT_TEMPLATE'] = '{valid?fmt=%Y%m%d}/{valid?fmt=%Y%m%d}_{valid?fmt=%H%M}' pcw.c_dict['OBS_FILE_WINDOW_BEGIN'] = -3600 pcw.c_dict['OBS_FILE_WINDOW_END'] = 0 - obs_file = pcw.find_obs(time_info, v) + add_field_info_to_time_info(time_info, var_info) + obs_file = pcw.find_obs(time_info) assert obs_file == pcw.c_dict['OBS_INPUT_DIR']+'/20180131/20180131_2345' @@ -173,8 +178,9 @@ def test_find_obs_dated_next_day(metplus_config): config = metplus_config pcw = CommandBuilder(config) - v = {} - v['obs_level'] = "6" + var_info = { + 'obs_level': "6" + } task_info = {} task_info['valid'] = datetime.datetime.strptime("201802012345", '%Y%m%d%H%M') task_info['lead'] = 0 @@ -184,7 +190,8 @@ def test_find_obs_dated_next_day(metplus_config): pcw.c_dict['OBS_INPUT_TEMPLATE'] = '{valid?fmt=%Y%m%d}/{valid?fmt=%Y%m%d}_{valid?fmt=%H%M}' pcw.c_dict['OBS_FILE_WINDOW_BEGIN'] = 0 pcw.c_dict['OBS_FILE_WINDOW_END'] = 3600 - obs_file = pcw.find_obs(time_info, v) + add_field_info_to_time_info(time_info, var_info) + obs_file = pcw.find_obs(time_info) assert obs_file == pcw.c_dict['OBS_INPUT_DIR']+'/20180202/20180202_0013' diff --git a/internal/tests/pytests/wrappers/ensemble_stat/test_ensemble_stat_wrapper.py b/internal/tests/pytests/wrappers/ensemble_stat/test_ensemble_stat_wrapper.py index 9328c3112..2045453a5 100644 --- a/internal/tests/pytests/wrappers/ensemble_stat/test_ensemble_stat_wrapper.py +++ b/internal/tests/pytests/wrappers/ensemble_stat/test_ensemble_stat_wrapper.py @@ -59,6 +59,49 @@ def set_minimum_config_settings(config, set_fields=True): config.set('config', 'OBS_VAR1_LEVELS', obs_level) +@pytest.mark.parametrize( + 'config_overrides, expected_filename', [ + # 0 - set forecast level + ({'FCST_VAR1_NAME': 'fcst_file', + 'FCST_VAR1_LEVELS': 'A06', + 'OBS_VAR1_NAME': 'obs_file', + 'OBS_VAR1_LEVELS': 'A06', + 'FCST_ENSEMBLE_STAT_INPUT_TEMPLATE': '{fcst_name}_A{level?fmt=%3H}', + }, + f'{fcst_dir}/fcst_file_A006'), + # 1 - don't set forecast level + ({'FCST_ENSEMBLE_STAT_INPUT_TEMPLATE': 'fcst_file_A{level?fmt=%3H}'}, + f'{fcst_dir}/fcst_file_A000'), + ] +) +@pytest.mark.wrapper_c +def test_ensemble_stat_level_in_template(metplus_config, config_overrides, + expected_filename): + + config = metplus_config + + set_minimum_config_settings(config, set_fields=False) + + # set config variable overrides + for key, value in config_overrides.items(): + config.set('config', key, value) + + wrapper = EnsembleStatWrapper(config) + assert wrapper.isOK + + file_list_dir = wrapper.config.getdir('FILE_LISTS_DIR') + file_list_file = f"{file_list_dir}/20050807000000_12_ensemble_stat.txt" + if os.path.exists(file_list_file): + os.remove(file_list_file) + + wrapper.run_all_times() + assert os.path.exists(file_list_file) + with open(file_list_file, 'r') as file_handle: + filenames = file_handle.read().splitlines()[1:] + assert len(filenames) == 1 + assert filenames[0] == expected_filename + + @pytest.mark.parametrize( 'config_overrides, env_var_values', [ # 0 : no ens, 1 fcst, 1 obs @@ -577,8 +620,7 @@ def test_ensemble_stat_single_field(metplus_config, config_overrides, app_path = os.path.join(config.getdir('MET_BIN_DIR'), wrapper.app_name) verbosity = f"-v {wrapper.c_dict['VERBOSITY']}" - file_list_dir = os.path.join(wrapper.config.getdir('STAGING_DIR'), - 'file_lists') + file_list_dir = wrapper.config.getdir('FILE_LISTS_DIR') config_file = wrapper.c_dict.get('CONFIG_FILE') out_dir = wrapper.c_dict.get('OUTPUT_DIR') expected_cmds = [(f"{app_path} {verbosity} " @@ -655,8 +697,7 @@ def test_ensemble_stat_fill_missing(metplus_config, config_overrides, wrapper = EnsembleStatWrapper(config) - file_list_file = os.path.join(wrapper.config.getdir('STAGING_DIR'), - 'file_lists', + file_list_file = os.path.join(wrapper.config.getdir('FILE_LISTS_DIR'), '20050807000000_12_ensemble_stat.txt') if os.path.exists(file_list_file): os.remove(file_list_file) diff --git a/internal/tests/use_cases/all_use_cases.txt b/internal/tests/use_cases/all_use_cases.txt index ef633503a..ca8f1cbc1 100644 --- a/internal/tests/use_cases/all_use_cases.txt +++ b/internal/tests/use_cases/all_use_cases.txt @@ -73,6 +73,7 @@ Category: climate Category: data_assimilation 0::StatAnalysis_fcstHAFS_obsPrepBufr_JEDI_IODA_interface::model_applications/data_assimilation/StatAnalysis_fcstHAFS_obsPrepBufr_JEDI_IODA_interface.conf +0::StatAnalysis_fcstGFS_HofX_obsIODAv2_PyEmbed::model_applications/data_assimilation/StatAnalysis_fcstGFS_HofX_obsIODAv2_PyEmbed.conf:: py_embed Category: marine_and_cryosphere @@ -166,3 +167,7 @@ Category: tc_and_extra_tc 3::GridStat_fcstHAFS_obsTDR_NetCDF:: model_applications/tc_and_extra_tc/GridStat_fcstHAFS_obsTDR_NetCDF.conf:: py_embed 4::TCPairs_TCStat_fcstADECK_obsBDECK_ATCF_BasicExample:: model_applications/tc_and_extra_tc/TCPairs_TCStat_fcstADECK_obsBDECK_ATCF_BasicExample.conf 5::TCGen_fcstGFS_obsBDECK_2021season:: model_applications/tc_and_extra_tc/TCGen_fcstGFS_obsBDECK_2021season.conf + + +Category: unstructured_grids +0::StatAnalysis_fcstLFRIC_UGRID_obsASCII_PyEmbed:: model_applications/unstructured_grids/StatAnalysis_fcstLFRIC_UGRID_obsASCII_PyEmbed.conf:: geovista_env, py_embed diff --git a/metplus/util/string_manip.py b/metplus/util/string_manip.py index 5ddb62e86..c297dde55 100644 --- a/metplus/util/string_manip.py +++ b/metplus/util/string_manip.py @@ -239,6 +239,9 @@ def format_thresh(thresh_str): @returns string of comma-separated list of the threshold(s) with letter format, i.e. gt3,le5.5,eq7 """ + if isinstance(thresh_str, list): + return format_thresh(','.join(thresh_str)) + formatted_thresh_list = [] # separate thresholds by comma and strip off whitespace around values thresh_list = [thresh.strip() for thresh in thresh_str.split(',')] diff --git a/metplus/util/time_util.py b/metplus/util/time_util.py index 6e6c5cfc0..cd53af251 100755 --- a/metplus/util/time_util.py +++ b/metplus/util/time_util.py @@ -13,6 +13,8 @@ from dateutil.relativedelta import relativedelta import re +from .string_manip import split_level, format_thresh + '''!@namespace TimeInfo @brief Utility to handle timing in METplus wrappers @code{.sh} @@ -332,20 +334,10 @@ def _format_time_list(string_value, get_met_format, sort_list=True): def ti_calculate(input_dict_preserve): - out_dict = {} + # copy input dictionary so valid or init can be removed to recalculate it + # without modifying the input to the function input_dict = input_dict_preserve.copy() - - KEYS_TO_COPY = ['custom', 'instance'] - - # set output dictionary to input items - if 'now' in input_dict.keys(): - out_dict['now'] = input_dict['now'] - out_dict['today'] = out_dict['now'].strftime('%Y%m%d') - - # copy over values of some keys if it is set in input dictionary - for key in KEYS_TO_COPY: - if key in input_dict.keys(): - out_dict[key] = input_dict[key] + out_dict = input_dict # read in input dictionary items and compute missing items # valid inputs: valid, init, lead, offset @@ -381,7 +373,6 @@ def ti_calculate(input_dict_preserve): else: out_dict['lead'] = relativedelta(seconds=0) - # set offset to 0 if not specified if 'offset_hours' in input_dict.keys(): out_dict['offset'] = datetime.timedelta(hours=input_dict['offset_hours']) @@ -390,7 +381,6 @@ def ti_calculate(input_dict_preserve): else: out_dict['offset'] = datetime.timedelta(seconds=0) - # if init and valid are set, check which was set first via loop_by # remove the other to recalculate if 'init' in input_dict.keys() and 'valid' in input_dict.keys(): @@ -509,3 +499,25 @@ def add_to_time_input(time_input, clock_time=None, instance=None, custom=None): # otherwise leave it unset so it can be set within the wrapper if custom: time_input['custom'] = custom + + +def add_field_info_to_time_info(time_info, var_info): + """!Add field information from var_info to the time_info dictionary to use + in string template substitution. Sets new items in time_info. + + @param time_info dictionary containing time information to substitute + filename template tags + @param var_info dictionary containing information for the fields to process + """ + if var_info is None: + return + + for key, value in var_info.items(): + # skip index and extra field info + if key == 'index' or key.endswith('extra'): + continue + + if key.endswith('thresh'): + value = format_thresh(value) + + time_info[key] = value diff --git a/metplus/wrappers/ascii2nc_wrapper.py b/metplus/wrappers/ascii2nc_wrapper.py index 02a06fd65..482ce6b9b 100755 --- a/metplus/wrappers/ascii2nc_wrapper.py +++ b/metplus/wrappers/ascii2nc_wrapper.py @@ -297,7 +297,7 @@ def find_input_files(self, time_info): return self.infiles # get list of files even if only one is found (return_list=True) - obs_path = self.find_obs(time_info, var_info=None, return_list=True) + obs_path = self.find_obs(time_info, return_list=True) if obs_path is None: return None diff --git a/metplus/wrappers/command_builder.py b/metplus/wrappers/command_builder.py index 43e424cea..6af2d49a6 100755 --- a/metplus/wrappers/command_builder.py +++ b/metplus/wrappers/command_builder.py @@ -425,47 +425,39 @@ def print_env_item(self, item): """ return f"{item}={self.env[item]}" - def find_model(self, time_info, var_info=None, mandatory=True, - return_list=False): + def find_model(self, time_info, mandatory=True, return_list=False): """! Finds the model file to compare - Args: + @param time_info dictionary containing timing information - @param var_info object containing variable information @param mandatory if True, report error if not found, warning if not, default is True @rtype string @return Returns the path to an model file """ return self.find_data(time_info, - var_info=var_info, data_type="FCST", mandatory=mandatory, return_list=return_list) - def find_obs(self, time_info, var_info=None, mandatory=True, - return_list=False): + def find_obs(self, time_info, mandatory=True, return_list=False): """! Finds the observation file to compare - Args: + @param time_info dictionary containing timing information - @param var_info object containing variable information @param mandatory if True, report error if not found, warning if not, default is True @rtype string @return Returns the path to an observation file """ return self.find_data(time_info, - var_info=var_info, data_type="OBS", mandatory=mandatory, return_list=return_list) - def find_obs_offset(self, time_info, var_info=None, mandatory=True, - return_list=False): + def find_obs_offset(self, time_info, mandatory=True, return_list=False): """! Finds the observation file to compare, looping through offset list until a file is found @param time_info dictionary containing timing information - @param var_info object containing variable information @param mandatory if True, report error if not found, warning if not, default is True @rtype string @@ -483,7 +475,6 @@ def find_obs_offset(self, time_info, var_info=None, mandatory=True, time_info['offset_hours'] = offset time_info = ti_calculate(time_info) obs_path = self.find_obs(time_info, - var_info=var_info, mandatory=is_mandatory, return_list=return_list) @@ -505,12 +496,10 @@ def find_obs_offset(self, time_info, var_info=None, mandatory=True, return None, time_info - def find_data(self, time_info, var_info=None, data_type='', mandatory=True, + def find_data(self, time_info, data_type='', mandatory=True, return_list=False, allow_dir=False): """! Finds the data file to compare - Args: @param time_info dictionary containing timing information - @param var_info object containing variable information @param data_type type of data to find (i.e. FCST_ or OBS_) @param mandatory if True, report error if not found, warning if not. default is True @@ -523,21 +512,14 @@ def find_data(self, time_info, var_info=None, data_type='', mandatory=True, if data_type and not data_type.endswith('_'): data_type_fmt += '_' - if var_info is not None: - # set level based on input data type - if data_type_fmt.startswith("OBS"): - v_level = var_info['obs_level'] - else: - v_level = var_info['fcst_level'] + # set generic 'level' to level that corresponds to data_type if set + level = time_info.get(f'{data_type_fmt.lower()}level', '0') - # separate character from beginning of numeric - # level value if applicable - level = split_level(v_level)[1] + # strip off prefix letter if it exists + level = split_level(level)[1] - # set level to 0 character if it is not a number - if not level.isdigit(): - level = '0' - else: + # set level to 0 character if it is not a number, e.g. NetCDF level + if not level.isdigit(): level = '0' # if level is a range, use the first value, i.e. if 250-500 use 250 @@ -627,7 +609,7 @@ def find_exact_file(self, level, data_type, time_info, mandatory=True, check_file_list.append(full_path) # if it was set, add level back to time_info - if saved_level: + if saved_level is not None: time_info['level'] = saved_level # if multiple files are not supported by the wrapper and multiple @@ -1061,16 +1043,6 @@ def check_gempaktocf(self, gempaktocf_jar): "on how to obtain the tool: parm/use_cases/met_tool_wrapper/GempakToCF/GempakToCF.py") self.isOK = False - def add_field_info_to_time_info(self, time_info, field_info): - """!Add name and level values from field info to time info dict to be used in string substitution - Args: - @param time_info time dictionary to add items to - @param field_info field dictionary to get values from - """ - field_items = ['fcst_name', 'fcst_level', 'obs_name', 'obs_level'] - for field_item in field_items: - time_info[field_item] = field_info[field_item] if field_item in field_info else '' - def set_current_field_config(self, field_info=None): """! Sets config variables for current fcst/obs name/level that can be referenced by other config variables such as OUTPUT_PREFIX. diff --git a/metplus/wrappers/compare_gridded_wrapper.py b/metplus/wrappers/compare_gridded_wrapper.py index 32fef40ba..69f6bc5cb 100755 --- a/metplus/wrappers/compare_gridded_wrapper.py +++ b/metplus/wrappers/compare_gridded_wrapper.py @@ -15,7 +15,7 @@ from ..util import do_string_sub, ti_calculate from ..util import parse_var_list from ..util import get_lead_sequence, skip_time, sub_var_list -from ..util import field_read_prob_info +from ..util import field_read_prob_info, add_field_info_to_time_info from . import CommandBuilder '''!@namespace CompareGriddedWrapper @@ -160,12 +160,14 @@ def run_at_time_once(self, time_info): for var_info in var_list: self.clear() self.c_dict['CURRENT_VAR_INFO'] = var_info + add_field_info_to_time_info(time_info, var_info) self.run_at_time_one_field(time_info, var_info) else: # loop over all variables and all them to the field list, # then call the app once if var_list: self.c_dict['CURRENT_VAR_INFO'] = var_list[0] + add_field_info_to_time_info(time_info, var_list[0]) self.clear() self.run_at_time_all_fields(time_info) @@ -180,7 +182,6 @@ def run_at_time_one_field(self, time_info, var_info): # get model to compare, return None if not found model_path = self.find_model(time_info, - var_info, mandatory=True, return_list=True) if model_path is None: @@ -189,7 +190,6 @@ def run_at_time_one_field(self, time_info, var_info): self.infiles.extend(model_path) # get observation to compare, return None if not found obs_path, time_info = self.find_obs_offset(time_info, - var_info, mandatory=True, return_list=True) if obs_path is None: @@ -225,7 +225,6 @@ def run_at_time_all_fields(self, time_info): # get model from first var to compare model_path = self.find_model(time_info, - var_list[0], mandatory=True, return_list=True) if not model_path: @@ -244,7 +243,6 @@ def run_at_time_all_fields(self, time_info): # get observation to from first var compare obs_path, time_info = self.find_obs_offset(time_info, - var_list[0], mandatory=True, return_list=True) if obs_path is None: diff --git a/metplus/wrappers/ensemble_stat_wrapper.py b/metplus/wrappers/ensemble_stat_wrapper.py index aa392e9b5..2c74d6294 100755 --- a/metplus/wrappers/ensemble_stat_wrapper.py +++ b/metplus/wrappers/ensemble_stat_wrapper.py @@ -422,20 +422,9 @@ def run_at_time_all_fields(self, time_info): fill_missing=fill_missing): return - # parse optional var list for FCST and/or OBS fields - var_list = sub_var_list(self.c_dict['VAR_LIST_TEMP'], time_info) - - # if empty var list for FCST/OBS, use None as first var, - # else use first var in list - if not var_list: - first_var_info = None - else: - first_var_info = var_list[0] - # get point observation file if requested if self.c_dict['OBS_POINT_INPUT_TEMPLATE']: - point_obs_path = self.find_data(time_info, first_var_info, - 'OBS_POINT') + point_obs_path = self.find_data(time_info, data_type='OBS_POINT') if point_obs_path is None: return @@ -443,13 +432,15 @@ def run_at_time_all_fields(self, time_info): # get grid observation file if requested if self.c_dict['OBS_GRID_INPUT_TEMPLATE']: - grid_obs_path = self.find_data(time_info, first_var_info, - 'OBS_GRID') + grid_obs_path = self.find_data(time_info, data_type='OBS_GRID') if grid_obs_path is None: return self.grid_obs_files.append(grid_obs_path) + # parse optional var list for FCST and/or OBS fields + var_list = sub_var_list(self.c_dict['VAR_LIST_TEMP'], time_info) + # set field info fcst_field = self.get_all_field_info(var_list, 'FCST') obs_field = self.get_all_field_info(var_list, 'OBS') diff --git a/metplus/wrappers/ioda2nc_wrapper.py b/metplus/wrappers/ioda2nc_wrapper.py index 4e267e729..dfc75b4c7 100755 --- a/metplus/wrappers/ioda2nc_wrapper.py +++ b/metplus/wrappers/ioda2nc_wrapper.py @@ -140,7 +140,7 @@ def find_input_files(self, time_info): @returns List of files that were found or None if no files were found """ # get list of files even if only one is found (return_list=True) - obs_path = self.find_obs(time_info, var_info=None, return_list=True) + obs_path = self.find_obs(time_info, return_list=True) if obs_path is None: return None diff --git a/metplus/wrappers/mode_wrapper.py b/metplus/wrappers/mode_wrapper.py index 1a539ea02..7d9a044b5 100755 --- a/metplus/wrappers/mode_wrapper.py +++ b/metplus/wrappers/mode_wrapper.py @@ -466,12 +466,12 @@ def run_at_time_one_field(self, time_info, var_info): @param var_info object containing variable information """ # get model to compare - model_path = self.find_model(time_info, var_info) + model_path = self.find_model(time_info) if model_path is None: return # get observation to compare - obs_path = self.find_obs(time_info, var_info) + obs_path = self.find_obs(time_info) if obs_path is None: return diff --git a/metplus/wrappers/mtd_wrapper.py b/metplus/wrappers/mtd_wrapper.py index 217427bad..b74663dda 100755 --- a/metplus/wrappers/mtd_wrapper.py +++ b/metplus/wrappers/mtd_wrapper.py @@ -15,7 +15,7 @@ from ..util import get_lead_sequence, sub_var_list from ..util import ti_calculate from ..util import do_string_sub, skip_time -from ..util import parse_var_list +from ..util import parse_var_list, add_field_info_to_time_info from . import CompareGriddedWrapper class MTDWrapper(CompareGriddedWrapper): @@ -219,6 +219,7 @@ def run_at_time_loop_string(self, input_dict): if self.c_dict.get('EXPLICIT_FILE_LIST', False): time_info = ti_calculate(input_dict) + add_field_info_to_time_info(time_info, var_info) model_list_path = do_string_sub(self.c_dict['FCST_FILE_LIST'], **time_info) self.logger.debug(f"Explicit FCST file: {model_list_path}") @@ -252,14 +253,13 @@ def run_at_time_loop_string(self, input_dict): input_dict['lead'] = lead time_info = ti_calculate(input_dict) + add_field_info_to_time_info(time_info, var_info) tasks.append(time_info) for current_task in tasks: # call find_model/obs as needed - model_file = self.find_model(current_task, var_info, - mandatory=False) - obs_file = self.find_obs(current_task, var_info, - mandatory=False) + model_file = self.find_model(current_task, mandatory=False) + obs_file = self.find_obs(current_task, mandatory=False) if model_file is None and obs_file is None: continue @@ -313,6 +313,7 @@ def run_single_mode(self, input_dict, var_info): if self.c_dict.get('EXPLICIT_FILE_LIST', False): time_info = ti_calculate(input_dict) + add_field_info_to_time_info(time_info, var_info) single_list_path = do_string_sub( self.c_dict[f'{data_src}_FILE_LIST'], **time_info @@ -334,7 +335,7 @@ def run_single_mode(self, input_dict, var_info): input_dict['lead'] = lead current_task = ti_calculate(input_dict) - single_file = find_method(current_task, var_info) + single_file = find_method(current_task) if single_file is None: continue @@ -408,7 +409,6 @@ def process_fields_one_thresh(self, time_info, var_info, model_path, fcst_field_list.extend(fcst_field) - if obs_path: obs_thresh_list = var_info['obs_thresh'] @@ -497,7 +497,6 @@ def set_environment_variables(self, time_info): self.add_env_var("MIN_VOLUME", self.c_dict["MIN_VOLUME"]) - self.add_env_var("FCST_FILE_TYPE", self.c_dict.get('FCST_FILE_TYPE', '')) self.add_env_var("OBS_FILE_TYPE", diff --git a/metplus/wrappers/pb2nc_wrapper.py b/metplus/wrappers/pb2nc_wrapper.py index fff7783f7..6d3848ca4 100755 --- a/metplus/wrappers/pb2nc_wrapper.py +++ b/metplus/wrappers/pb2nc_wrapper.py @@ -226,7 +226,6 @@ def find_input_files(self, input_dict): """ infiles, time_info = self.find_obs_offset(input_dict, - None, mandatory=True, return_list=True) diff --git a/metplus/wrappers/pcp_combine_wrapper.py b/metplus/wrappers/pcp_combine_wrapper.py index f87b07fad..5ede0fdbc 100755 --- a/metplus/wrappers/pcp_combine_wrapper.py +++ b/metplus/wrappers/pcp_combine_wrapper.py @@ -12,6 +12,7 @@ from ..util import get_relativedelta, ti_get_seconds_from_relativedelta from ..util import time_string_to_met_time, seconds_to_met_time from ..util import parse_var_list, template_to_regex, split_level +from ..util import add_field_info_to_time_info from . import ReformatGriddedWrapper '''!@namespace PCPCombineWrapper @@ -252,6 +253,7 @@ def run_at_time_one_field(self, time_info, var_info, data_src): return False time_info['level'] = lookback_seconds + add_field_info_to_time_info(time_info, var_info) # if method is not USER_DEFINED or DERIVE, # check that field information is set @@ -259,7 +261,7 @@ def run_at_time_one_field(self, time_info, var_info, data_src): can_run = self.setup_user_method(time_info, data_src) elif method == "DERIVE": can_run = self.setup_derive_method(time_info, lookback_seconds, - var_info, data_src) + data_src) elif method == "ADD": can_run = self.setup_add_method(time_info, lookback_seconds, data_src) @@ -494,12 +496,11 @@ def setup_add_method(self, time_info, lookback, data_src): return files_found - def setup_derive_method(self, time_info, lookback, var_info, data_src): + def setup_derive_method(self, time_info, lookback, data_src): """! Setup pcp_combine to derive stats @param time_info dictionary containing timing information @param lookback accumulation amount to compute in seconds - @param var_info object containing variable information @param data_src data type (FCST or OBS) @rtype string @return path to output file @@ -524,7 +525,6 @@ def setup_derive_method(self, time_info, lookback, var_info, data_src): level=accum_dict['level'], extra=accum_dict['extra']) input_files = self.find_data(time_info, - var_info, data_type=data_src, return_list=True) if not input_files: diff --git a/metplus/wrappers/plot_data_plane_wrapper.py b/metplus/wrappers/plot_data_plane_wrapper.py index 4f874c42f..26f844772 100755 --- a/metplus/wrappers/plot_data_plane_wrapper.py +++ b/metplus/wrappers/plot_data_plane_wrapper.py @@ -170,9 +170,7 @@ def find_input_files(self, time_info): self.infiles.append(self.c_dict['INPUT_TEMPLATE']) return self.infiles - file_path = self.find_data(time_info, - var_info=None, - return_list=False) + file_path = self.find_data(time_info, return_list=False) if not file_path: return None diff --git a/metplus/wrappers/point2grid_wrapper.py b/metplus/wrappers/point2grid_wrapper.py index a2607d685..0af3fe360 100755 --- a/metplus/wrappers/point2grid_wrapper.py +++ b/metplus/wrappers/point2grid_wrapper.py @@ -201,8 +201,7 @@ def find_input_files(self, time_info): """ # get input file # calling find_obs because we set OBS_ variables in c_dict for the input data - input_path = self.find_obs(time_info, - var_info=None) + input_path = self.find_obs(time_info) if input_path is None: return None diff --git a/metplus/wrappers/regrid_data_plane_wrapper.py b/metplus/wrappers/regrid_data_plane_wrapper.py index c58ebfc7c..0211af427 100755 --- a/metplus/wrappers/regrid_data_plane_wrapper.py +++ b/metplus/wrappers/regrid_data_plane_wrapper.py @@ -12,10 +12,9 @@ import os -from ..util import time_util -from ..util import do_string_sub -from ..util import parse_var_list -from ..util import get_process_list +from ..util import get_seconds_from_string, do_string_sub +from ..util import parse_var_list, get_process_list +from ..util import add_field_info_to_time_info from ..util import remove_quotes, split_level, format_level from . import ReformatGriddedWrapper @@ -173,7 +172,7 @@ def handle_output_file(self, time_info, field_info, data_type): not be run """ _, level = split_level(field_info[f'{data_type.lower()}_level']) - time_info['level'] = time_util.get_seconds_from_string(level, 'H') + time_info['level'] = get_seconds_from_string(level, 'H') return self.find_and_check_output_file(time_info) def run_once_per_field(self, time_info, var_list, data_type): @@ -189,8 +188,7 @@ def run_once_per_field(self, time_info, var_list, data_type): self.set_command_line_arguments() - self.add_field_info_to_time_info(time_info, - field_info) + add_field_info_to_time_info(time_info, field_info) input_name = field_info[f'{data_type.lower()}_name'] input_level = field_info[f'{data_type.lower()}_level'] @@ -271,8 +269,7 @@ def run_once_for_all_fields(self, time_info, var_list, data_type): self.set_command_line_arguments() for field_info in var_list: - self.add_field_info_to_time_info(time_info, - field_info) + add_field_info_to_time_info(time_info, field_info) input_name = field_info[f'{data_type.lower()}_name'] input_level = field_info[f'{data_type.lower()}_level'] @@ -289,9 +286,7 @@ def run_once_for_all_fields(self, time_info, var_list, data_type): # add list of output names self.args.append("-name " + ','.join(output_names)) - if not self.handle_output_file(time_info, - var_list[0], - data_type): + if not self.handle_output_file(time_info, var_list[0], data_type): return False # build and run commands @@ -320,7 +315,8 @@ def run_at_time_once(self, time_info, var_list, data_type): f'NAME or {data_type}_VAR_NAME.') return False - if not self.find_input_files(time_info, data_type, var_list): + add_field_info_to_time_info(time_info, var_list[0]) + if not self.find_input_files(time_info, data_type): return False # set environment variables @@ -334,12 +330,16 @@ def run_at_time_once(self, time_info, var_list, data_type): # if not running once per field, process all fields and run once return self.run_once_for_all_fields(time_info, var_list, data_type) - def find_input_files(self, time_info, data_type, var_list): - """!Get input file and verification grid to process. Use the first field in the - list to substitute level if that is provided in the filename template""" - input_path = self.find_data(time_info, - var_info=var_list[0], - data_type=data_type) + def find_input_files(self, time_info, data_type): + """!Get input file and verification grid to process. Use the first + field in the list to substitute level if that is provided in the + filename template + + @param time_info time dictionary used for string substitution + @param data_type type of data to process, i.e. FCST or OBS + @returns list of input files if files were found, None if not + """ + input_path = self.find_data(time_info, data_type=data_type) if not input_path: return None diff --git a/metplus/wrappers/series_analysis_wrapper.py b/metplus/wrappers/series_analysis_wrapper.py index 8d24ea0c3..ca3897cdf 100755 --- a/metplus/wrappers/series_analysis_wrapper.py +++ b/metplus/wrappers/series_analysis_wrapper.py @@ -30,7 +30,7 @@ from ..util import ti_get_seconds_from_relativedelta from ..util import parse_var_list from ..util import add_to_time_input -from ..util import field_read_prob_info +from ..util import field_read_prob_info, add_field_info_to_time_info from .plot_data_plane_wrapper import PlotDataPlaneWrapper from . import RuntimeFreqWrapper @@ -803,7 +803,7 @@ def build_and_run_series_request(self, time_info, fcst_path, obs_path): self.c_dict['FCST_LIST_PATH'] = fcst_path self.c_dict['OBS_LIST_PATH'] = obs_path - self.add_field_info_to_time_info(time_info, var_info) + add_field_info_to_time_info(time_info, var_info) # get formatted field dictionary to pass into the MET config file fcst_field, obs_field = self.get_formatted_fields(var_info, @@ -916,7 +916,7 @@ def _generate_plots(self, fcst_path, time_info, storm_id): for var_info in self.c_dict['VAR_LIST']: name = var_info['fcst_name'] level = var_info['fcst_level'] - self.add_field_info_to_time_info(time_info, var_info) + add_field_info_to_time_info(time_info, var_info) # change wildcard storm ID to all_storms if storm_id == '*': diff --git a/parm/use_cases/model_applications/data_assimilation/StatAnalysis_fcstGFS_HofX_obsIODAv2_PyEmbed.conf b/parm/use_cases/model_applications/data_assimilation/StatAnalysis_fcstGFS_HofX_obsIODAv2_PyEmbed.conf new file mode 100644 index 000000000..c93fce940 --- /dev/null +++ b/parm/use_cases/model_applications/data_assimilation/StatAnalysis_fcstGFS_HofX_obsIODAv2_PyEmbed.conf @@ -0,0 +1,85 @@ +[config] + +# Documentation for this use case can be found at +# https://metplus.readthedocs.io/en/latest/generated/model_applications/data_assimilation/StatAnalysis_fcstGFS_HofX_obsIODAv2_PyEmbed.html + +# For additional information, please see the METplus Users Guide. +# https://metplus.readthedocs.io/en/latest/Users_Guide + +### +# Processes to run +# https://metplus.readthedocs.io/en/latest/Users_Guide/systemconfiguration.html#process-list +### + +PROCESS_LIST = StatAnalysis + + +### +# Time Info +# LOOP_BY options are INIT, VALID, RETRO, and REALTIME +# If set to INIT or RETRO: +# INIT_TIME_FMT, INIT_BEG, INIT_END, and INIT_INCREMENT must also be set +# If set to VALID or REALTIME: +# VALID_TIME_FMT, VALID_BEG, VALID_END, and VALID_INCREMENT must also be set +# LEAD_SEQ is the list of forecast leads to process +# https://metplus.readthedocs.io/en/latest/Users_Guide/systemconfiguration.html#timing-control +### + +LOOP_BY = VALID + +VALID_TIME_FMT = %Y%m%d%H +VALID_BEG=2018041500 +VALID_END=2018041500 +VALID_INCREMENT = 12H + +LEAD_SEQ = 0 + + +### +# File I/O +# https://metplus.readthedocs.io/en/latest/Users_Guide/systemconfiguration.html#directory-and-filename-template-info +### + +MODEL1_STAT_ANALYSIS_LOOKIN_DIR = python {PARM_BASE}/use_cases/model_applications/data_assimilation/StatAnalysis_fcstGFS_HofX_obsIODAv2_PyEmbed/read_iodav2_mpr.py {INPUT_BASE}/model_applications/data_assimilation/StatAnalysis_fcstGFS_HofX_obsIODAv2_PyEmbed/sample_hofx_output_sondes.nc4 + +STAT_ANALYSIS_OUTPUT_DIR = {OUTPUT_BASE}/StatAnalysis_IODAv2 +STAT_ANALYSIS_OUTPUT_TEMPLATE = job.out +MODEL1_STAT_ANALYSIS_DUMP_ROW_TEMPLATE = dump.out + + +### +# StatAnalysis Settings +# https://metplus.readthedocs.io/en/latest/Users_Guide/wrappers.html#statanalysis +### + +MODEL1 = NA +MODEL1_OBTYPE = NA + +STAT_ANALYSIS_JOB_NAME = aggregate_stat +STAT_ANALYSIS_JOB_ARGS = -out_line_type CNT -dump_row [dump_row_file] -line_type MPR -by FCST_VAR + +MODEL_LIST = +DESC_LIST = +FCST_LEAD_LIST = +OBS_LEAD_LIST = +FCST_VALID_HOUR_LIST = +FCST_INIT_HOUR_LIST = +OBS_VALID_HOUR_LIST = +OBS_INIT_HOUR_LIST = +FCST_VAR_LIST = +OBS_VAR_LIST = +FCST_UNITS_LIST = +OBS_UNITS_LIST = +FCST_LEVEL_LIST = +OBS_LEVEL_LIST = +VX_MASK_LIST = +INTERP_MTHD_LIST = +INTERP_PNTS_LIST = +FCST_THRESH_LIST = +OBS_THRESH_LIST = +COV_THRESH_LIST = +ALPHA_LIST = +LINE_TYPE_LIST = + +GROUP_LIST_ITEMS = +LOOP_LIST_ITEMS = MODEL_LIST diff --git a/parm/use_cases/model_applications/data_assimilation/StatAnalysis_fcstGFS_HofX_obsIODAv2_PyEmbed/read_iodav2_mpr.py b/parm/use_cases/model_applications/data_assimilation/StatAnalysis_fcstGFS_HofX_obsIODAv2_PyEmbed/read_iodav2_mpr.py new file mode 100644 index 000000000..de79edf64 --- /dev/null +++ b/parm/use_cases/model_applications/data_assimilation/StatAnalysis_fcstGFS_HofX_obsIODAv2_PyEmbed/read_iodav2_mpr.py @@ -0,0 +1,103 @@ +from __future__ import print_function + +import pandas as pd +import os +from glob import glob +import sys +import xarray as xr +import datetime as dt + +######################################################################## + +print('Python Script:\t', sys.argv[0]) + +# Input is .nc or .nc4 file + +if len(sys.argv) == 2: + # Read the input file as the first argument + input_path = os.path.expandvars(sys.argv[1]) + try: + print("Input File:\t" + repr(input_path)) + + # Read all the needed groups + ioda_data = xr.open_dataset(input_path, group = 'MetaData') + ioda_hofx_data = xr.open_dataset(input_path, group = 'hofx') + + hofx_vars = list(ioda_hofx_data.keys()) + + # use dataframes + ioda_df = ioda_data.to_dataframe() + ioda_data.close() + + for var_name in hofx_vars: + ioda_df[var_name + '@hofx'] = ioda_hofx_data[var_name] + + # Add columns for needed attributes, for each variable present for hofx + for attribute in ['ObsValue', 'ObsType', 'EffectiveQC']: + ioda_attr_data = xr.open_dataset(input_path, group = attribute) + for var_name in hofx_vars: + ioda_df[var_name + '@' + attribute] = ioda_attr_data[var_name] + + ioda_attr_data.close() + ioda_hofx_data.close() + + nlocs = len(ioda_df.index) + print('Number of locations in set: ' + str(nlocs)) + + # Decode strings + time = list(ioda_df['datetime']) + + for i in range(0,nlocs): + temp = dt.datetime.strptime(time[i], '%Y-%m-%dT%H:%M:%SZ') + time[i] = temp.strftime('%Y%m%d_%H%M%S') + + ioda_df['datetime'] = time + + #set up MPR data + mpr_data = [] + + for var_name in hofx_vars: + + # Set up the needed columns + ioda_df_var = ioda_df[['datetime','station_id',var_name+'@ObsType', + 'latitude','longitude','air_pressure', + var_name+'@hofx',var_name+'@ObsValue', + var_name+'@EffectiveQC']] + + # Cute down to locations with valid ObsValues + ioda_df_var = ioda_df_var[abs(ioda_df_var[var_name+'@ObsValue']) < 1e6] + nlocs = len(ioda_df_var.index) + print(var_name+' has '+str(nlocs)+' valid obs.') + + # Add additional columns + ioda_df_var['lead'] = '000000' + ioda_df_var['MPR'] = 'MPR' + ioda_df_var['nobs'] = nlocs + ioda_df_var['index'] = range(0,nlocs) + ioda_df_var['varname'] = var_name + ioda_df_var['na'] = 'NA' + + # Arrange columns in MPR format + cols = ['na','na','lead','datetime','datetime','lead','datetime', + 'datetime','varname','na','lead','varname','na','na', + var_name+'@ObsType','na','na','lead','na','na','na','na','MPR', + 'nobs','index','station_id','latitude','longitude', + 'air_pressure','na',var_name+'@hofx',var_name+'@ObsValue', + var_name+'@EffectiveQC','na','na'] + + ioda_df_var = ioda_df_var[cols] + + # Into a list and all to strings + mpr_data = mpr_data + [list( map(str,i) ) for i in ioda_df_var.values.tolist() ] + + print("Total Length:\t" + repr(len(mpr_data))) + + except NameError: + print("Can't find the input file.") + print("HofX variables in this file:\t" + repr(hofx_vars)) +else: + print("ERROR: read_iodav2_mpr.py -> Must specify input file.\n") + sys.exit(1) + +######################################################################## + diff --git a/parm/use_cases/model_applications/unstructured_grids/StatAnalysis_fcstLFRIC_UGRID_obsASCII_PyEmbed.conf b/parm/use_cases/model_applications/unstructured_grids/StatAnalysis_fcstLFRIC_UGRID_obsASCII_PyEmbed.conf new file mode 100644 index 000000000..bbca987dd --- /dev/null +++ b/parm/use_cases/model_applications/unstructured_grids/StatAnalysis_fcstLFRIC_UGRID_obsASCII_PyEmbed.conf @@ -0,0 +1,85 @@ +[config] + +# Documentation for this use case can be found at +# https://metplus.readthedocs.io/en/latest/generated/model_applications/unstructured_grids/StatAnalysis_fcstLFRIC_UGRID_obsASCII_PyEmbed.html + +# For additional information, please see the METplus Users Guide. +# https://metplus.readthedocs.io/en/latest/Users_Guide + +### +# Processes to run +# https://metplus.readthedocs.io/en/latest/Users_Guide/systemconfiguration.html#process-list +### + +PROCESS_LIST = StatAnalysis + + +### +# Time Info +# LOOP_BY options are INIT, VALID, RETRO, and REALTIME +# If set to INIT or RETRO: +# INIT_TIME_FMT, INIT_BEG, INIT_END, and INIT_INCREMENT must also be set +# If set to VALID or REALTIME: +# VALID_TIME_FMT, VALID_BEG, VALID_END, and VALID_INCREMENT must also be set +# LEAD_SEQ is the list of forecast leads to process +# https://metplus.readthedocs.io/en/latest/Users_Guide/systemconfiguration.html#timing-control +### + +LOOP_BY = VALID + +VALID_TIME_FMT = %Y%m%d%H +VALID_BEG=2021050500 +VALID_END=2021050500 +VALID_INCREMENT = 6H + +LEAD_SEQ = 0 + + +### +# File I/O +# https://metplus.readthedocs.io/en/latest/Users_Guide/systemconfiguration.html#directory-and-filename-template-info +### + +MODEL1_STAT_ANALYSIS_LOOKIN_DIR = python {PARM_BASE}/use_cases/model_applications/unstructured_grids/StatAnalysis_fcstLFRIC_UGRID_obsASCII_PyEmbed/ugrid_lfric_mpr.py {INPUT_BASE}/model_applications/unstructured_grids/StatAnalysis_fcstLFRIC_UGRID_obsASCII_PyEmbed/fcst_data/lfric_ver_20210505_0000.nc {INPUT_BASE}/model_applications/unstructured_grids/StatAnalysis_fcstLFRIC_UGRID_obsASCII_PyEmbed/obs_data + +STAT_ANALYSIS_OUTPUT_DIR = {OUTPUT_BASE}/StatAnalysis_UGRID +STAT_ANALYSIS_OUTPUT_TEMPLATE = job.out +MODEL1_STAT_ANALYSIS_DUMP_ROW_TEMPLATE = dump.out + + +### +# StatAnalysis Settings +# https://metplus.readthedocs.io/en/latest/Users_Guide/wrappers.html#statanalysis +### + +MODEL1 = NA +MODEL1_OBTYPE = NA + +STAT_ANALYSIS_JOB_NAME = aggregate_stat +STAT_ANALYSIS_JOB_ARGS = -out_line_type CNT -dump_row [dump_row_file] -line_type MPR -by FCST_VAR + +MODEL_LIST = +DESC_LIST = +FCST_LEAD_LIST = +OBS_LEAD_LIST = +FCST_VALID_HOUR_LIST = +FCST_INIT_HOUR_LIST = +OBS_VALID_HOUR_LIST = +OBS_INIT_HOUR_LIST = +FCST_VAR_LIST = +OBS_VAR_LIST = +FCST_UNITS_LIST = +OBS_UNITS_LIST = +FCST_LEVEL_LIST = +OBS_LEVEL_LIST = +VX_MASK_LIST = +INTERP_MTHD_LIST = +INTERP_PNTS_LIST = +FCST_THRESH_LIST = +OBS_THRESH_LIST = +COV_THRESH_LIST = +ALPHA_LIST = +LINE_TYPE_LIST = + +GROUP_LIST_ITEMS = +LOOP_LIST_ITEMS = MODEL_LIST diff --git a/parm/use_cases/model_applications/unstructured_grids/StatAnalysis_fcstLFRIC_UGRID_obsASCII_PyEmbed/ugrid_lfric_mpr.py b/parm/use_cases/model_applications/unstructured_grids/StatAnalysis_fcstLFRIC_UGRID_obsASCII_PyEmbed/ugrid_lfric_mpr.py new file mode 100644 index 000000000..d78e17e82 --- /dev/null +++ b/parm/use_cases/model_applications/unstructured_grids/StatAnalysis_fcstLFRIC_UGRID_obsASCII_PyEmbed/ugrid_lfric_mpr.py @@ -0,0 +1,203 @@ +from __future__ import print_function + +import math +import pandas as pd +import numpy as np +import os +from glob import glob +import sys +import xarray as xr +import datetime as dt +import iris +from iris.experimental.ugrid import PARSE_UGRID_ON_LOAD +#geovista from https://github.com/bjlittle/geovista/ +import geovista as gv +import geovista.theme +from geovista.common import to_xyz +import netCDF4 +import pyvista as pv +from pykdtree.kdtree import KDTree + +from pathlib import Path +from typing import Optional + +import matplotlib.pyplot as plt + +print(f"{iris.__version__=}") +print(f"{gv.__version__=}") + +######################################################################## + +def read_ascii_obs(files): + paths = sorted(glob(files)) + datasets = [pd.read_table(p, header=None, delim_whitespace=True) for p in paths] + combined = pd.concat(datasets) + return combined + +def load_ugrid( + fname: str, + data: Optional[bool] = False, + constraint: Optional[str] = None, + verbose: Optional[bool] = False +) -> pv.PolyData: +# fname = BASE_DIR / fname + with PARSE_UGRID_ON_LOAD.context(): + cube = iris.load_cube(fname, constraint=constraint) + + if cube.ndim > 1: + cube = cube[(0,) * (cube.ndim - 1)] + + if verbose: + print(cube) + + data = cube.data if data else None + + face_node = cube.mesh.face_node_connectivity + indices = face_node.indices_by_location() + lons, lats = cube.mesh.node_coords + + mesh = gv.Transform.from_unstructured( + lons.points, + lats.points, + indices, + data=data, + start_index=face_node.start_index, + name=cube.name(), + ) + + if data is None: + mesh.active_scalars_name = None + + return mesh + +def info(mesh: pv.PolyData) -> None: + print(f"The mesh is a C{int(math.sqrt(mesh.n_cells / 6))}, with 6 panels, {int(mesh.n_cells / 6):,d} cells per panel, and {mesh.n_cells:,d} cells.") + +def find_nearest(tree, points, poi, k): + # lat/lon to xyz + xyz = to_xyz(*poi) + + # find the k nearest euclidean neighbours + dist, idxs = tree.query(xyz, k=k) + + if idxs.ndim > 1: + idxs = idxs[0] + + # retieve the associated xyz points of the k nearest neighbours + nearest = points[idxs] + + return xyz, nearest, idxs + +def to_centers(mesh: pv.PolyData) -> pv.PolyData: + tmp = mesh.copy() + tmp.clear_cell_data() + tmp.clear_point_data() + tmp.clear_field_data() + return tmp.cell_centers() + +######################################################################## +print('Python Script:\t', sys.argv[0]) + +# Input is directory of .nc lfric files and a directory of ascii obs filess + +if len(sys.argv) == 3: + # Read the input file as the first argument + input_fcst_dir = os.path.expandvars(sys.argv[1]) + input_obs_dir = os.path.expandvars(sys.argv[2]) + try: + print("Input Forecast Dir:\t" + repr(input_fcst_dir)) + print("Input Observations Dir:\t" + repr(input_obs_dir)) + + #Read all obs from directory + obs_data = read_ascii_obs(input_obs_dir+'/*.ascii') + print(obs_data.shape) + obs_data = obs_data.iloc[::1000, :]#thin for testing + obs_data = obs_data.rename(columns={0:'message_type', 1:'station_id', 2:'obs_valid_time', 3:'obs_lat', 4:'obs_lon', \ + 5:'elevation', 6:'var_name', 7:'level', 8:'height', 9:'qc_string', 10:'obs_value'}) + + obs_vars = ['UGRD', 'VGRD', 'TMP', 'RH'] + fcst_vars = ['u10m', 'v10m', 't1p5m', 'rh1p5m'] + + #open the netcdf forecast to access data values and list of times + fcst_data = xr.open_dataset(input_fcst_dir) + fcst_times = pd.to_datetime(fcst_data.coords['time_centered']) + + match_df = pd.DataFrame(columns=['message_type', 'station_id', 'obs_valid_time', 'obs_lat', 'obs_lon', \ + 'elevation', 'var_name', 'level', 'height', 'qc_string', 'obs_value', 'idx_nearest, fcst_value']) + + for idx1, (obs_var, fcst_var) in enumerate(zip(obs_vars, fcst_vars)): + + #load forecast as an iris cube + fcst_mesh = load_ugrid(input_fcst_dir, constraint=fcst_var) + info(fcst_mesh) + + #get indices of nearest cell center + fcst_centers = to_centers(fcst_mesh) + points = fcst_centers.points + tree = KDTree(points) + + #get the forecast data values loaded + fcst_df = fcst_data[fcst_var].to_dataframe() + print(fcst_df) + + #get obs data for variable + var_data = obs_data.loc[obs_data['var_name'] == obs_var].reset_index(drop=True) + + for idx2, row in var_data.iterrows(): + xyz, nearest, idx_nearest = find_nearest(tree, points, [row['obs_lat'], row['obs_lon']], k=1) + var_data.at[idx2,'idx_nearest'] = int(idx_nearest) + + #get the obs time, search for closest in the forecast data + time = dt.datetime.strptime(row['obs_valid_time'],'%Y%m%d_%H%M%S') + match_time = min(fcst_times, key=lambda d: abs(d - time)) + match_idx = np.argmin(np.abs(fcst_times - time)) + + #add matched fcst value to data + var_data.at[idx2, 'fcst_value'] = fcst_df.loc[(match_idx,int(idx_nearest)), fcst_var] + var_data.at[idx2, 'fcst_lat'] = fcst_df.loc[(match_idx,int(idx_nearest)), 'Mesh2d_face_x'] + var_data.at[idx2, 'fcst_lon'] = fcst_df.loc[(match_idx,int(idx_nearest)), 'Mesh2d_face_y'] + var_data.at[idx2, 'fcst_time'] = fcst_df.loc[(match_idx,int(idx_nearest)), 'time_centered'] + + #check results + #with pd.option_context('display.max_rows', None): + # print(var_data[['obs_lat','fcst_lat','obs_lon','fcst_lon','obs_value','fcst_value','obs_valid_time','fcst_time']]) + with pd.option_context('display.max_columns', 500, 'display.max_rows', 100, 'display.width', 500): + print(var_data) + ob_vals = var_data['obs_value'].values + f_vals = var_data['fcst_value'].values + + match_df = pd.concat([match_df, var_data], ignore_index=True) + + nlocs = len(match_df.index) + print('Number of locations in matched set: ' + str(nlocs)) + + # Add additional columns + match_df['lead'] = '000000' + match_df['MPR'] = 'MPR' + match_df['nobs'] = nlocs + match_df['index'] = range(0,nlocs) + match_df['na'] = 'NA' + match_df['QC'] = '0' + + # Arrange columns in MPR format + cols = ['na','na','lead','obs_valid_time','obs_valid_time','lead','obs_valid_time', + 'obs_valid_time','var_name','na','lead','var_name','na','na', + 'var_name','na','na','lead','na','na','na','na','MPR', + 'nobs','index','station_id','obs_lat','obs_lon', + 'level','na','fcst_value','obs_value', + 'QC','na','na'] + + match_df = match_df[cols] + + # Into a list and all to strings + mpr_data = [list( map(str,i) ) for i in match_df.values.tolist() ] + + except NameError: + print("Can't find the input files or the variables.") + print("Variables in this file:\t" + repr(var_list)) +else: + print("ERROR: ugrid_lfric_mpr.py -> Must specify directory of files.\n") + sys.exit(1) + +######################################################################## +