Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Development fx restructured #21

Closed
wants to merge 23 commits into from
Closed
Show file tree
Hide file tree
Changes from 21 commits
Commits
Show all changes
23 commits
Select commit Hold shift + click to select a range
5068594
removed all traces of specific fx file retrieval
valeriupredoi May 12, 2019
909e34f
do time checks only if not fx variable
valeriupredoi May 12, 2019
1bbf364
removed references to fx files and dirs
valeriupredoi May 12, 2019
3e63424
removed fx files function
valeriupredoi May 12, 2019
33b9066
restructuring to allow for fx vars treated like any other var
valeriupredoi May 12, 2019
5614f7c
cleaning up
valeriupredoi May 13, 2019
475cd8f
cleaning up
valeriupredoi May 13, 2019
0ee4a72
fixed test for new fx file retrieval
valeriupredoi May 13, 2019
6989951
added checks for no time gating
valeriupredoi May 13, 2019
988d09c
fixed the test to reflect the new standards
valeriupredoi May 13, 2019
775715c
fixed the test to reflect the new standards
valeriupredoi May 13, 2019
32b6376
fixed the data structure standard to reflect new standards
valeriupredoi May 13, 2019
d50465c
Merge branch 'version2_development' into version2_development_fx_REST…
valeriupredoi May 13, 2019
2ff537d
fixed little bug
valeriupredoi May 13, 2019
62922ed
Merge branch 'version2_development' into version2_development_fx_REST…
valeriupredoi Jun 10, 2019
b36f312
removed stray import from conflict
valeriupredoi Jun 10, 2019
1522c89
Merge branch 'development' into development_fx_RESTRUCTURED
valeriupredoi Jun 10, 2019
cf05187
fixed stray import mistakes introduced by merge
valeriupredoi Jun 10, 2019
e799ec8
removed unused import
valeriupredoi Jun 10, 2019
1a4a766
Merge branch 'development' into development_fx_RESTRUCTURED
valeriupredoi Jun 14, 2019
7ab878f
removed unused import
valeriupredoi Jun 14, 2019
0fc4392
removed useless variable
valeriupredoi Jun 14, 2019
6f51e4b
Merge branch 'development' into development_fx_RESTRUCTURED
valeriupredoi Jul 15, 2019
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
12 changes: 0 additions & 12 deletions esmvalcore/_config.py
Original file line number Diff line number Diff line change
Expand Up @@ -179,18 +179,6 @@ def get_institutes(variable):
return CFG.get(project, {}).get('institutes', {}).get(dataset, [])


def replace_mip_fx(fx_file):
"""Replace MIP so to retrieve correct fx files."""
default_mip = 'Amon'
if fx_file not in CFG['CMIP5']['fx_mip_change']:
logger.warning(
'mip for fx variable %s is not specified in '
'config_developer.yml, using default (%s)', fx_file, default_mip)
new_mip = CFG['CMIP5']['fx_mip_change'].get(fx_file, default_mip)
logger.debug("Switching mip for fx file finding to %s", new_mip)
return new_mip


TAGS_CONFIG_FILE = os.path.join(
DIAGNOSTICS_PATH, 'config-references.yml')

Expand Down
56 changes: 19 additions & 37 deletions esmvalcore/_data_finder.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,8 +9,7 @@
import os
import re

from ._config import get_project_config, replace_mip_fx
from .cmor.table import CMOR_TABLES
from ._config import get_project_config

logger = logging.getLogger(__name__)

Expand Down Expand Up @@ -93,7 +92,7 @@ def select_files(filenames, start_year, end_year):
return selection


def _replace_tags(path, variable, fx_var=None):
def _replace_tags(path, variable):
"""Replace tags in the config-developer's file with actual values."""
path = path.strip('/')

Expand All @@ -104,9 +103,7 @@ def _replace_tags(path, variable, fx_var=None):
original_tag = tag
tag, _, _ = _get_caps_options(tag)

if tag == 'fx_var':
replacewith = fx_var
elif tag == 'latestversion': # handled separately later
if tag == 'latestversion': # handled separately later
continue
elif tag in variable:
replacewith = variable[tag]
Expand Down Expand Up @@ -196,16 +193,16 @@ def get_rootpath(rootpath, project):
raise KeyError('default rootpath must be specified in config-user file')


def _find_input_dirs(variable, rootpath, drs, fx_var=None):
def _find_input_dirs(variable, rootpath, drs):
"""Return a the full paths to input directories."""
project = variable['project']

root = get_rootpath(rootpath, project)
input_type = 'input_{}dir'.format('fx_' if fx_var else '')
input_type = 'input_dir'
path_template = _select_drs(input_type, drs, project)

dirnames = []
for dirname_template in _replace_tags(path_template, variable, fx_var):
for dirname_template in _replace_tags(path_template, variable):
for base_path in root:
dirname = os.path.join(base_path, dirname_template)
dirname = _resolve_latestversion(dirname)
Expand All @@ -218,50 +215,35 @@ def _find_input_dirs(variable, rootpath, drs, fx_var=None):
return dirnames


def _get_filenames_glob(variable, drs, fx_var=None):
def _get_filenames_glob(variable, drs):
"""Return patterns that can be used to look for input files."""
input_type = 'input_{}file'.format('fx_' if fx_var else '')
input_type = 'input_file'
path_template = _select_drs(input_type, drs, variable['project'])
filenames_glob = _replace_tags(path_template, variable, fx_var)
filenames_glob = _replace_tags(path_template, variable)
return filenames_glob


def _find_input_files(variable, rootpath, drs, fx_var=None):
logger.debug("Looking for input %sfiles for variable %s of dataset %s",
fx_var + ' fx ' if fx_var else '', variable['short_name'],
variable['dataset'])

input_dirs = _find_input_dirs(variable, rootpath, drs, fx_var)
filenames_glob = _get_filenames_glob(variable, drs, fx_var)
def _find_input_files(variable, rootpath, drs):
input_dirs = _find_input_dirs(variable, rootpath, drs)
filenames_glob = _get_filenames_glob(variable, drs)
files = find_files(input_dirs, filenames_glob)

return files


def get_input_filelist(variable, rootpath, drs):
"""Return the full path to input files."""
# change ensemble to fixed r0i0p0 for fx variables
if variable['project'] == 'CMIP5'and variable['frequency'] == 'fx':
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Suggested change
if variable['project'] == 'CMIP5'and variable['frequency'] == 'fx':
if variable['project'] == 'CMIP5' and variable['frequency'] == 'fx':

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

whoops

variable['ensemble'] = 'r0i0p0'
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

You're already setting the ensemble in _recipe.py, so I think there is no need to do it again here

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

ermm, I don't think that gets picked up for all the cases, lemme test

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Have you had a chance to do this yet?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

not yet 😢

files = _find_input_files(variable, rootpath, drs)
files = select_files(files, variable['start_year'], variable['end_year'])
# do time gating only for non-fx variables
if variable['frequency'] != 'fx':
files = select_files(files, variable['start_year'],
variable['end_year'])
return files


def get_input_fx_filelist(variable, rootpath, drs):
"""Return a dict with the full path to fx input files."""
fx_files = {}
for fx_var in variable['fx_files']:
var = dict(variable)
var['mip'] = replace_mip_fx(fx_var)
table = CMOR_TABLES[var['cmor_table']].get_table(var['mip'])
var['frequency'] = table.frequency
realm = getattr(table.get(var['short_name']), 'modeling_realm', None)
var['modeling_realm'] = realm if realm else table.realm

files = _find_input_files(var, rootpath, drs, fx_var)
fx_files[fx_var] = files[0] if files else None

return fx_files


def get_output_file(variable, preproc_dir):
"""Return the full path to the output (preprocessed) file."""
cfg = get_project_config(variable['project'])
Expand Down
68 changes: 57 additions & 11 deletions esmvalcore/_recipe.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,13 +6,14 @@
from copy import deepcopy

import yaml

from netCDF4 import Dataset

from . import __version__
from . import _recipe_checks as check
from . import __version__
from ._config import TAGS, get_institutes, replace_tags
from ._data_finder import (get_input_filelist, get_input_fx_filelist,
get_output_file, get_statistic_output_file)
from ._data_finder import (get_input_filelist, get_output_file,
get_statistic_output_file)
from ._provenance import TrackedFile, get_recipe_provenance
from ._recipe_checks import RecipeError
from ._task import (DiagnosticTask, get_flattened_tasks, get_independent_tasks,
Expand Down Expand Up @@ -361,6 +362,45 @@ def _get_default_settings(variable, config_user, derive=False):
return settings


def get_input_fx_filelist(variable, rootpath, drs):
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think this function should be removed altogether, it looks like it migrated here from _data_finder.py

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

reply below in the main body of the issue

"""Return a dict with fx vars keys and full file paths values."""
fx_files_dict = {}
for fx_var_dict in variable['fx_files']:
fx_var = _add_fxvar_keys(fx_var_dict, variable)
fx_files = get_input_filelist(
variable=fx_var,
rootpath=rootpath,
drs=drs)
if fx_files:
fx_files_dict[fx_var['short_name']] = fx_files[0]
else:
fx_files_dict[fx_var['short_name']] = None

return fx_files_dict


def _add_fxvar_keys(fx_var_dict, variable):
"""Add keys specific to fx variable to use get_input_filelist."""
fx_variable = dict(variable)

# set variable names
fx_variable['variable_group'] = fx_var_dict['short_name']
fx_variable['short_name'] = fx_var_dict['short_name']

# specificities of project
if fx_variable['project'] == 'CMIP5':
fx_variable['mip'] = 'fx'
fx_variable['ensemble'] = 'r0i0p0'
elif fx_variable['project'] == 'CMIP6':
fx_variable['grid'] = variable['grid']
if 'mip' in fx_var_dict:
fx_variable['mip'] = fx_var_dict['mip']
# add missing cmor info
_add_cmor_info(fx_variable, override=True)

return fx_variable


def _update_fx_settings(settings, variable, config_user):
"""Find and set the FX derive/mask settings."""
# update for derive
Expand All @@ -384,7 +424,7 @@ def _update_fx_settings(settings, variable, config_user):
settings['mask_landsea']['fx_files'] = []

var = dict(variable)
var['fx_files'] = ['sftlf', 'sftof']
var['fx_files'] = [{'short_name': 'sftlf'}, {'short_name': 'sftof'}]
fx_files_dict = get_input_fx_filelist(
variable=var,
rootpath=config_user['rootpath'],
Expand All @@ -402,7 +442,7 @@ def _update_fx_settings(settings, variable, config_user):
settings['mask_landseaice']['fx_files'] = []

var = dict(variable)
var['fx_files'] = ['sftgif']
var['fx_files'] = [{'short_name': 'sftgif'}]
fx_files_dict = get_input_fx_filelist(
variable=var,
rootpath=config_user['rootpath'],
Expand All @@ -415,11 +455,13 @@ def _update_fx_settings(settings, variable, config_user):

for step in ('area_statistics', 'volume_statistics'):
if settings.get(step, {}).get('fx_files'):
settings[step]['fx_files'] = get_input_fx_filelist(
variable=variable,
var = dict(variable)
var['fx_files'] = settings.get(step, {}).get('fx_files')
fx_files_dict = get_input_fx_filelist(
variable=var,
rootpath=config_user['rootpath'],
drs=config_user['drs'],
)
drs=config_user['drs'])
settings[step]['fx_files'] = fx_files_dict


def _read_attributes(filename):
Expand Down Expand Up @@ -772,6 +814,10 @@ def _get_preprocessor_task(variables, profiles, config_user, task_name):
name=derive_name)
derive_tasks.append(task)

# don't do time gating for fx variables
if variables[0]['frequency'] == 'fx':
profile['extract_time'] = False

# Create (final) preprocessor task
task = _get_single_preprocessor_task(
variables,
Expand Down Expand Up @@ -905,8 +951,8 @@ def _initialize_variables(self, raw_variable, raw_datasets):
variable['institute'] = institute
check.variable(variable, required_keys)
if 'fx_files' in variable:
for fx_file in variable['fx_files']:
DATASET_KEYS.add(fx_file)
for fx_file_dict in variable['fx_files']:
DATASET_KEYS.add(fx_file_dict['short_name'])
# Get the fx files
variable['fx_files'] = get_input_fx_filelist(
variable=variable,
Expand Down
21 changes: 12 additions & 9 deletions esmvalcore/_recipe_checks.py
Original file line number Diff line number Diff line change
Expand Up @@ -97,15 +97,18 @@ def data_availability(input_files, var):

required_years = set(range(var['start_year'], var['end_year'] + 1))
available_years = set()
for filename in input_files:
start, end = get_start_end_year(filename)
available_years.update(range(start, end + 1))

missing_years = required_years - available_years
if missing_years:
raise RecipeError(
"No input data available for years {} in files {}".format(
", ".join(str(year) for year in missing_years), input_files))
# check time avail only for non-fx variables
if var['frequency'] != 'fx':
for filename in input_files:
start, end = get_start_end_year(filename)
available_years.update(range(start, end + 1))

missing_years = required_years - available_years
if missing_years:
raise RecipeError(
"No input data available for years {} in files {}".format(
", ".join(str(year) for year in missing_years),
input_files))


def tasks_valid(tasks):
Expand Down
6 changes: 4 additions & 2 deletions esmvalcore/cmor/check.py
Original file line number Diff line number Diff line change
Expand Up @@ -97,13 +97,15 @@ def check_metadata(self, logger=None):
self._check_fill_value()
self._check_dim_names()
self._check_coords()
self._check_time_coord()
if self.frequency != 'fx':
self._check_time_coord()
self._check_rank()

self.report_warnings(logger)
self.report_errors()

self._add_auxiliar_time_coordinates()
if self.frequency != 'fx':
self._add_auxiliar_time_coordinates()
return self._cube

def report_errors(self):
Expand Down
31 changes: 2 additions & 29 deletions esmvalcore/config-developer.yml
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,7 @@ CMIP6:
BADC: '[institute]/[dataset]/[exp]/[ensemble]/[mip]/[short_name]/[grid]/[latestversion]'
DKRZ: '[institute]/[dataset]/[exp]/[ensemble]/[mip]/[short_name]/[grid]/[latestversion]'
ETHZ: '[exp]/[mip]/[short_name]/[dataset]/[ensemble]/[grid]/'
input_file: '[short_name]_[mip]_[dataset]_[exp]_[ensemble]_[grid]_*.nc'
input_file: '[short_name]_[mip]_[dataset]_[exp]_[ensemble]_[grid]*.nc'
output_file: '[project]_[dataset]_[mip]_[exp]_[ensemble]_[short_name]_[start_year]-[end_year]'
cmor_type: 'CMIP6'
institutes:
Expand Down Expand Up @@ -147,26 +147,7 @@ CMIP5:
ETHZ: '[exp]/[mip]/[short_name]/[dataset]/[ensemble]/'
SMHI: '[dataset]/[ensemble]/[exp]/[frequency]'
BSC: '[type]/[project]/[exp]/[dataset.lower]'
input_file: '[short_name]_[mip]_[dataset]_[exp]_[ensemble]_*.nc'
input_fx_dir:
default: '/'
BADC: '[institute]/[dataset]/[exp]/fx/[modeling_realm]/fx/r0i0p0/[latestversion]/[fx_var]'
CP4CDS: '[institute]/[dataset]/[exp]/fx/[modeling_realm]/fx/r0i0p0/[fx_var]/latest/'
DKRZ: '[institute]/[dataset]/[exp]/fx/[modeling_realm]/fx/r0i0p0/[latestversion]/[fx_var]'
ETHZ: '[exp]/fx/[fx_var]/[dataset]/r0i0p0'
input_fx_file: '[fx_var]_fx_[dataset]_[exp]_r0i0p0.nc'
fx_mip_change:
'areacella': 'Amon'
'areacello': 'Omon'
'basin': 'Omon'
'deptho': 'Omon'
'mrsofc': 'Lmon'
'orog': 'Amon'
'rootd': 'Lmon'
'sftgif': 'Lmon'
'sftlf': 'Amon'
'sftof': 'Omon'
'volcello': 'Omon'
input_file: '[short_name]_[mip]_[dataset]_[exp]_[ensemble]*.nc'
output_file: '[project]_[dataset]_[mip]_[exp]_[ensemble]_[short_name]_[start_year]-[end_year]'
institutes:
'ACCESS1-0': ['CSIRO-BOM']
Expand Down Expand Up @@ -239,10 +220,6 @@ OBS:
input_file:
default: '[project]_[dataset]_[type]_[version]_[mip]_[short_name]_*.nc'
BSC: '[short_name]_*.nc'
input_fx_dir:
default: 'Tier[tier]/[dataset]'
input_fx_file:
default: '[project]_[dataset]_[type]_[version]_fx_[fx_var].nc'
output_file: '[project]_[dataset]_[type]_[version]_[mip]_[short_name]_[start_year]-[end_year]'
cmor_type: 'CMIP5'

Expand All @@ -251,10 +228,6 @@ obs4mips:
input_dir:
default: 'Tier[tier]/[dataset]'
input_file: '[short_name]_[dataset]_[level]_[version]_*.nc'
input_fx_dir:
default: 'Tier[tier]/[dataset]'
input_fx_file:
default: '[project]_[dataset]_fx_[fx_var].nc'
output_file: '[project]_[dataset]_[level]_[version]_[short_name]_[start_year]-[end_year]'
cmor_type: 'CMIP6'
cmor_path: 'obs4mips'
Expand Down
2 changes: 1 addition & 1 deletion esmvalcore/preprocessor/_derive/nbp_grid.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@ class DerivedVariable(DerivedVariableBase):
# Required variables
required = [{
'short_name': 'nbp',
'fx_files': ['sftlf'],
'fx_files': [{'short_name': 'sftlf'}],
}]

@staticmethod
Expand Down
2 changes: 1 addition & 1 deletion tests/integration/preprocessor/_derive/test_interface.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,7 @@ def test_get_required_with_fx():

reference = [{
'short_name': 'nbp',
'fx_files': ['sftlf'],
'fx_files': [{'short_name': 'sftlf'}],
}]

assert variables == reference
Expand Down
Loading