Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Parse jediyaml only once #2387

Merged
merged 36 commits into from
Mar 18, 2024
Merged
Show file tree
Hide file tree
Changes from 5 commits
Commits
Show all changes
36 commits
Select commit Hold shift + click to select a range
c740e56
gocart inst_aod fix
CoryMartin-NOAA Feb 15, 2024
a2a9a2c
Missing GOCART spot
CoryMartin-NOAA Feb 16, 2024
d371fca
Update gdas hash
CoryMartin-NOAA Feb 16, 2024
24146ff
modify snow DA test to include aero DA
CoryMartin-NOAA Feb 16, 2024
430cc08
Merge branch 'develop' into feature/gocart_inst_aod
CoryMartin-NOAA Feb 28, 2024
ae81057
Merge branch 'develop' into feature/gocart_inst_aod
CoryMartin-NOAA Feb 29, 2024
f7a1303
random bugfix
CoryMartin-NOAA Feb 29, 2024
9617d65
Merge branch 'feature/gocart_inst_aod' of https://github.com/CoryMart…
CoryMartin-NOAA Feb 29, 2024
8be840a
parse jediyaml only once
aerorahul Mar 8, 2024
03fc903
fix pynorms
aerorahul Mar 8, 2024
b54b525
Merge branch 'develop' into feature/jedi-yaml
aerorahul Mar 8, 2024
ef89d91
Update ush/python/pygfs/task/analysis.py
aerorahul Mar 11, 2024
7b4d7e7
Update ush/python/pygfs/task/analysis.py
aerorahul Mar 11, 2024
3387dd2
Update ush/python/pygfs/task/analysis.py
aerorahul Mar 11, 2024
e9f78e8
Update ush/python/pygfs/task/analysis.py
aerorahul Mar 11, 2024
e2c9a51
throw a KeyError when looking for value in a nested dict if key is no…
aerorahul Mar 11, 2024
fb53150
fix pynorms
aerorahul Mar 11, 2024
d303281
Merge branch 'develop' into feature/gocart_inst_aod
CoryMartin-NOAA Mar 11, 2024
1161b77
Update ush/python/pygfs/task/analysis.py
aerorahul Mar 12, 2024
40847bf
add caveat to the doc-block
aerorahul Mar 12, 2024
f473ee9
Merge branch 'develop' into feature/jedi-yaml
aerorahul Mar 12, 2024
154e792
Merge branch 'develop' into feature/gocart_inst_aod
aerorahul Mar 12, 2024
06b69bd
Merge branch 'feature/gocart_inst_aod' into feature/jedi-yaml
aerorahul Mar 12, 2024
4120f51
parse jedi_yaml once for aerosol and atm jedi tasks
aerorahul Mar 12, 2024
c22686e
update aeroDA config file
aerorahul Mar 13, 2024
55fc774
make ush/forecast_predet.sh shellcheck compliant
aerorahul Mar 13, 2024
c0c8573
update gdasapp hash for aerosol DA j2 templates
aerorahul Mar 13, 2024
8218ee7
update configs and some more yamls;
aerorahul Mar 13, 2024
320730c
update pointer to gdasapp
aerorahul Mar 14, 2024
f1e0149
Merge branch 'develop' into feature/jedi-yaml
aerorahul Mar 14, 2024
c163a4f
The jjob for ocean and ice pp, only defines the component specific hi…
aerorahul Mar 14, 2024
250b5e9
Update scripts/exglobal_oceanice_products.py
aerorahul Mar 14, 2024
816944e
Merge branch 'develop' into feature/jedi-yaml
aerorahul Mar 14, 2024
78aafef
Merge branch 'develop' into feature/jedi-yaml
aerorahul Mar 14, 2024
2d81620
update Jenkinsfile to add renamed and expanded test
aerorahul Mar 15, 2024
3420ed3
Merge branch 'develop' into feature/jedi-yaml
aerorahul Mar 17, 2024
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
90 changes: 72 additions & 18 deletions ush/python/pygfs/task/analysis.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@
import glob
import tarfile
from logging import getLogger
from pprint import pformat
from netCDF4 import Dataset
from typing import List, Dict, Any, Union

Expand All @@ -30,6 +31,10 @@ def __init__(self, config: Dict[str, Any]) -> None:

def initialize(self) -> None:
super().initialize()

# all JEDI analyses need a JEDI config
self.task_config.jedi_config = self.get_jedi_config()

# all analyses need to stage observations
obs_dict = self.get_obs_dict()
FileHandler(obs_dict).sync()
Expand All @@ -41,13 +46,33 @@ def initialize(self) -> None:
# link jedi executable to run directory
self.link_jediexe()

@logit(logger)
def get_jedi_config(self) -> Dict[str, Any]:
"""Compile a dictionary of JEDI configuration from JEDIYAML template file

Parameters
----------

Returns
----------
jedi_config : Dict
a dictionary containing the fully rendered JEDI yaml configuration
"""

# generate JEDI YAML file
logger.info(f"Generate JEDI YAML config: {self.task_config.jedi_yaml}")
jedi_config = parse_j2yaml(self.task_config.JEDIYAML, self.task_config, searchpath=self.gdasapp_j2tmpl_dir)
logger.debug(f"JEDI config:\n{pformat(jedi_config)}")

return jedi_config

@logit(logger)
def get_obs_dict(self) -> Dict[str, Any]:
"""Compile a dictionary of observation files to copy

This method uses the OBS_LIST configuration variable to generate a dictionary
from a list of YAML files that specify what observation files are to be
copied to the run directory from the observation input directory
This method extracts 'observers' from the JEDI yaml and from that list, extracts a list of
observation files that are to be copied to the run directory
from the observation input directory

Parameters
----------
Expand All @@ -57,13 +82,13 @@ def get_obs_dict(self) -> Dict[str, Any]:
obs_dict: Dict
a dictionary containing the list of observation files to copy for FileHandler
"""
logger.debug(f"OBS_LIST: {self.task_config['OBS_LIST']}")
obs_list_config = parse_j2yaml(self.task_config["OBS_LIST"], self.task_config, searchpath=self.gdasapp_j2tmpl_dir)
logger.debug(f"obs_list_config: {obs_list_config}")
# get observers from master dictionary
observers = obs_list_config['observers']

logger.info(f"Extracting a list of observation files from {self.task_config.JEDIYAML}")
observations = find_value_in_nested_dict(self.task_config.jedi_config, 'observations')
logger.debug(f"observations:\n{pformat(observations)}")

copylist = []
for ob in observers:
for ob in observations['observers']:
obfile = ob['obs space']['obsdatain']['engine']['obsfile']
basename = os.path.basename(obfile)
copylist.append([os.path.join(self.task_config['COM_OBS'], basename), obfile])
Expand All @@ -77,9 +102,9 @@ def get_obs_dict(self) -> Dict[str, Any]:
def get_bias_dict(self) -> Dict[str, Any]:
"""Compile a dictionary of observation files to copy

This method uses the OBS_LIST configuration variable to generate a dictionary
from a list of YAML files that specify what observation bias correction files
are to be copied to the run directory from the observation input directory
This method extracts 'observers' from the JEDI yaml and from that list, extracts a list of
observation bias correction files that are to be copied to the run directory
from the observation input directory

Parameters
----------
Expand All @@ -89,13 +114,13 @@ def get_bias_dict(self) -> Dict[str, Any]:
bias_dict: Dict
a dictionary containing the list of observation bias files to copy for FileHandler
"""
logger.debug(f"OBS_LIST: {self.task_config['OBS_LIST']}")
obs_list_config = parse_j2yaml(self.task_config["OBS_LIST"], self.task_config, searchpath=self.gdasapp_j2tmpl_dir)
logger.debug(f"obs_list_config: {obs_list_config}")
# get observers from master dictionary
observers = obs_list_config['observers']

logger.info(f"Extracting a list of bias correction files from {self.task_config.JEDIYAML}")
observations = find_value_in_nested_dict(self.task_config.jedi_config, 'observations')
logger.debug(f"observations:\n{pformat(observations)}")

copylist = []
for ob in observers:
for ob in observations['observers']:
if 'obs bias' in ob.keys():
obfile = ob['obs bias']['input file']
obdir = os.path.dirname(obfile)
Expand All @@ -104,6 +129,7 @@ def get_bias_dict(self) -> Dict[str, Any]:
for file in ['satbias.nc', 'satbias_cov.nc', 'tlapse.txt']:
bfile = f"{prefix}.{file}"
copylist.append([os.path.join(self.task_config.COM_ATMOS_ANALYSIS_PREV, bfile), os.path.join(obdir, bfile)])
# TODO: Why is this specific to ATMOS?

bias_dict = {
'mkdir': [os.path.join(self.runtime_config.DATA, 'bc')],
Expand Down Expand Up @@ -328,3 +354,31 @@ def tgz_diags(statfile: str, diagdir: str) -> None:
# Add diag files to tarball
for diagfile in diags:
tgz.add(diagfile, arcname=os.path.basename(diagfile))


@logit(logger)
def find_value_in_nested_dict(nested_dict: Dict, key: str) -> Any:
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

this is nice!

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

As written, this is going to have strange (possible inconsistent) behavior for keys that appear multiple times. If the top-most dict contains the target key, that will be the value returned. Otherwise, it will iterate through keys looking for the target key, but dicts should not be considered naturally ordered (even though the current implementation of python preserves ordering), so if the target key is contained in multiple nested dicts (but not the top one), the value retrieved could change from execution to execution.

Even without the undetermined ordering issue, the retrieved value won't necessarily be the shallowest (unless it is in the top-level dict) or the deepest matching key.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This is true, and thus, I don't think we can move it to a generic module.
Think of giving someone just a house number to find and leaving them at the edge of town.
Which specific house they find will depend on the choices they make; left, or right.
Same is with this search. This will return the first key the search encounters.

Perhaps we should remove this method and instead look for observations under cost function. If it finds it, then thats it. If not, it should look for observations at the top-level.
Those are the use cases for now.
If we run into problems, we can revisit this and implement a find_observations method targeted for this rather than a generic find_key_in_nested_dict.

"""
Recursively search through a nested dictionary and return the value for the target key.
Parameters
----------
nested_dict : Dict
Dictionary to search
key : str
Key to search for

Returns
-------
Any
Value of the key

Raises
------
KeyError
If key is not found in dictionary

TODO: move this to a utility module so it can be used elsewhere
"""
if not isinstance(nested_dict, dict):
return None
return nested_dict.get(key) or next((find_value_in_nested_dict(vv, key) for vv in nested_dict.values() if isinstance(vv, dict)), None)
7 changes: 3 additions & 4 deletions ush/python/pygfs/task/snow_analysis.py
Original file line number Diff line number Diff line change
Expand Up @@ -269,11 +269,10 @@ def initialize(self) -> None:
logger.info("Staging ensemble backgrounds")
FileHandler(self.get_ens_bkg_dict(localconf)).sync()

# generate letkfoi YAML file
logger.info(f"Generate JEDI LETKF YAML file: {self.task_config.jedi_yaml}")
letkfoi_yaml = parse_j2yaml(self.task_config.JEDIYAML, self.task_config, searchpath=self.gdasapp_j2tmpl_dir)
save_as_yaml(letkfoi_yaml, self.task_config.jedi_yaml)
# Write out letkfoi YAML file
save_as_yaml(self.task_config.jedi_config, self.task_config.jedi_yaml)
logger.info(f"Wrote letkfoi YAML to: {self.task_config.jedi_yaml}")

# need output dir for diags and anl
logger.info("Create empty output [anl, diags] directories to receive output from executable")
newdirs = [
Expand Down
Loading