Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Proposal (and bugfix) to implement value-based conditional requirement #1511

Merged
Show file tree
Hide file tree
Changes from 24 commits
Commits
Show all changes
34 commits
Select commit Hold shift + click to select a range
845c49f
Varname mangling for value-based conditional req.
phargogh Jan 24, 2024
969ef23
Forgot a few docstrings. RE:#1503
phargogh Jan 24, 2024
273ef3f
Merge branch 'main' of https://github.com/natcap/invest into bugfix/1…
phargogh Jan 24, 2024
6096ad6
Noting change in HISTORY. RE:#1509
phargogh Jan 24, 2024
85d8028
Merge branch 'main' into bugfix/1509-una-validation-missing-uniform-s…
phargogh Feb 13, 2024
66f1490
Reworking sufficiency for clarity. RE:#1509
phargogh Feb 14, 2024
37a0338
Reworking comprehensions into standard loop for readability.
phargogh Feb 14, 2024
e1bef80
Reworking conditional requirement.
phargogh Feb 14, 2024
658748d
Removing .value in UNA conditional requirement.
phargogh Feb 14, 2024
0ce3303
Removing the variable name rewriting.
phargogh Feb 14, 2024
041f411
Slight refactor for efficiency. RE:#1509
phargogh Feb 14, 2024
4a1f548
First stab at rewriting nested cond. requirement.
phargogh Feb 14, 2024
416240d
Removing unnecessary conditional requirement - table already required…
phargogh Feb 14, 2024
21ae820
Clarifying varnames, copying spec to avoid modifying by reference, no…
phargogh Feb 14, 2024
b5a98a4
Merge branch 'bugfix/1509-una-validation-missing-uniform-search-radiu…
phargogh Feb 14, 2024
4135eec
Adding a test for vector field validity.
phargogh Feb 14, 2024
0b9111f
Adding a test for csv column conditional validity.
phargogh Feb 14, 2024
5e6e35b
Correcting WKT syntax. RE:#1509
phargogh Feb 14, 2024
b02b49c
Adding a test for CSV row conditional validation. RE:#1509
phargogh Feb 14, 2024
fec802a
Fixing expression in UCM. RE:#1509
phargogh Feb 14, 2024
f8497f6
Fixing operands in wind energy expression. RE:#1509
phargogh Feb 14, 2024
cb70a12
Removing a comment that is no longer relevant. RE:#1509
phargogh Feb 14, 2024
ecdc498
Switching bitwise operations to python bools. RE:#1509
phargogh Feb 14, 2024
bf55a26
Correcting boolean operator and->or. RE:#1509
phargogh Feb 14, 2024
482b327
Light linting, mostly around line length. RE:#1509
phargogh Feb 21, 2024
b32fcbf
Removing a deprecated line. RE:#1509
phargogh Feb 21, 2024
1deee3e
Simplifying validate(). RE:#1509
phargogh Feb 21, 2024
880e31f
Adding directory axis. RE:#1509
phargogh Feb 21, 2024
d70b15d
Casting to bool outside of the expression. RE:#1509
phargogh Feb 22, 2024
831fd99
Small test fix from slight return value change.
phargogh Feb 22, 2024
4b6f932
HQ validation now partially covered by expressions.
phargogh Feb 22, 2024
617e76d
Clarifying set names, restoring falsey exclusion.
phargogh Feb 22, 2024
03c7fb0
Noting changes in history.
phargogh Feb 27, 2024
c49c398
Merge branch 'main' into bugfix/1509-una-validation-missing-uniform-s…
phargogh Feb 27, 2024
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 3 additions & 0 deletions HISTORY.rst
Original file line number Diff line number Diff line change
Expand Up @@ -45,6 +45,9 @@ Unreleased Changes
* Fixed an issue where Urban Nature Access would crash if an administrative
boundary geometry did not overlap any people in the population raster.
https://github.com/natcap/invest/issues/1503
* Fixed an issue where validation was failing to catch missing values in
the uniform search radius args key when using uniform search radii.
https://github.com/natcap/invest/issues/1509
* Fixed an issue where the output administrative units vector's
``Pund_adm`` and ``Povr_adm`` fields representing undersupplied and
oversupplied populations, respectively, had values of 0 when running the
Expand Down
6 changes: 3 additions & 3 deletions src/natcap/invest/urban_cooling_model.py
Original file line number Diff line number Diff line change
Expand Up @@ -71,23 +71,23 @@
"considered a green area.")},
"shade": {
"type": "ratio",
"required": "cc_method == factors",
"required": "cc_method == 'factors'",
"about": gettext(
"The proportion of area in this LULC class that is "
"covered by tree canopy at least 2 meters high. "
"Required if the 'factors' option is selected for "
"the Cooling Capacity Calculation Method.")},
"albedo": {
"type": "ratio",
"required": "cc_method == factors",
"required": "cc_method == 'factors'",
"about": gettext(
"The proportion of solar radiation that is directly "
"reflected by this LULC class. Required if the "
"'factors' option is selected for the Cooling "
"Capacity Calculation Method.")},
"building_intensity": {
"type": "ratio",
"required": "cc_method == intensity",
"required": "cc_method == 'intensity'",
"about": gettext(
"The ratio of building floor area to footprint "
"area, with all values in this column normalized "
Expand Down
2 changes: 0 additions & 2 deletions src/natcap/invest/urban_nature_access.py
Original file line number Diff line number Diff line change
Expand Up @@ -272,8 +272,6 @@
'search_radius_m': {
'type': 'number',
'units': u.meter,
'required':
f'search_radius_mode == "{RADIUS_OPT_POP_GROUP}"',
'expression': 'value >= 0',
'about': gettext(
"The search radius in meters to use "
Expand Down
99 changes: 50 additions & 49 deletions src/natcap/invest/validation.py
Original file line number Diff line number Diff line change
@@ -1,23 +1,27 @@
"""Common validation utilities for InVEST models."""
import ast
import copy
import functools
import importlib
import inspect
import io
import logging
import os
import pprint
import queue
import re
import threading
import token
import tokenize
emlys marked this conversation as resolved.
Show resolved Hide resolved
import warnings

import numpy
import pandas
import pint
import pygeoprocessing
from osgeo import gdal
from osgeo import osr
from osgeo import ogr
from osgeo import osr

from . import gettext
from . import spec_utils
Expand Down Expand Up @@ -670,8 +674,6 @@ def get_validated_dataframe(csv_path, columns=None, rows=None, index_col=None,
return df




def check_csv(filepath, **kwargs):
"""Validate a table.

Expand Down Expand Up @@ -844,9 +846,6 @@ def get_headers_to_validate(spec):
"""
headers = []
for key, val in spec.items():
# for now only check headers that are always required
# assume that any conditionally-required headers are validated by the
# model's validate function
# if 'required' isn't a key, it defaults to True
if ('required' not in val) or (val['required'] is True):
emlys marked this conversation as resolved.
Show resolved Hide resolved
# brackets are a special character for our args spec syntax
Expand Down Expand Up @@ -930,65 +929,48 @@ def validate(args, spec, spatial_overlap_opts=None):
conditionally_required_keys.add(key)

if missing_keys:
validation_warnings.append((sorted(missing_keys), MESSAGES['MISSING_KEY']))
validation_warnings.append(
(sorted(missing_keys), MESSAGES['MISSING_KEY']))

if keys_with_no_value:
validation_warnings.append((sorted(keys_with_no_value), MESSAGES['MISSING_VALUE']))
validation_warnings.append(
(sorted(keys_with_no_value), MESSAGES['MISSING_VALUE']))

# step 2: evaluate sufficiency of keys/inputs
# Sufficiency: An input is sufficient when its key is present in args and
# it has a value. A sufficient input need not be valid. Sufficiency is
# used by the conditional requirement phase (step 3 in this function) to
# determine whether a conditionally required input is required.
# The only special case about sufficiency is with boolean values.
# A boolean value absent from args is insufficient. A boolean input that
# is present in args but False is in sufficient. A boolean input that is
# present in args and True is sufficient.
# has a truthy value. If the input is missing from args or is falsy, it is
# insufficient.
insufficient_keys = missing_keys.union(keys_with_no_value)
emlys marked this conversation as resolved.
Show resolved Hide resolved
sufficient_inputs = {}
emlys marked this conversation as resolved.
Show resolved Hide resolved
for key, parameter_spec in spec.items():
# If the key isn't present, no need to validate.
# If it's required and isn't present, we wouldn't have gotten to this
# point in the function.
if key not in args:
insufficient_keys = set()
for key in spec.keys():
try:
sufficient_inputs[key] = bool(args[key])
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Would an arg value of 0 be labeled insufficient here?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This specific line has been removed in a prior edit, but this is a case that the authors of expressions will need to be aware of. One cost of this proposed simplification is that it is possible for there to be a falsey value (like 0) which was previously acknowledged as "sufficient" to now be effectively insufficient. This can be mitigated by better, context-aware expressions in cases where this may be an issue, so I think we'll largely be OK. In practice, I don't think there are many places where we are likely to encounter a value of 0.

except KeyError:
# input is definitely insufficient if it's missing from args
sufficient_inputs[key] = False
insufficient_keys.add(key)
continue

# If the value is empty and it isn't required, then we don't need to
# validate it.
if args[key] in ('', None):
sufficient_inputs[key] = False
if sufficient_inputs[key] is False:
insufficient_keys.add(key)
continue

# Boolean values are special in that their T/F state is equivalent
# to their satisfaction. If a checkbox is checked, it is
# considered satisfied.
if spec[key]['type'] == 'boolean':
sufficient_inputs[key] = args[key]

# Any other input type must be sufficient because it is in args and
# has a value.
else:
sufficient_inputs[key] = True

# step 3: evaluate required status of conditionally required keys
# keep track of keys that are explicity not required due to
# their condition being false
# Step 3: evaluate whether conditionally required keys are required.
emlys marked this conversation as resolved.
Show resolved Hide resolved
# We also want to keep track of keys that are not required due to their
# conditional requirement expression evaluating to False.
#
# An input is conditionally required when the expression given is
# truthy.
expression_values = {
input_key: args.get(input_key, False) for input_key in spec.keys()}
excluded_keys = set()
for key in conditionally_required_keys:
# An input is conditionally required when the expression given
# evaluates to True.
is_conditionally_required = _evaluate_expression(
expression=spec[key]['required'],
variable_map=sufficient_inputs)
expression=f'bool({spec[key]["required"]})',
emlys marked this conversation as resolved.
Show resolved Hide resolved
variable_map=expression_values)
if is_conditionally_required:
if key not in args:
validation_warnings.append(([key], MESSAGES['MISSING_KEY']))
else:
if args[key] in ('', None):
validation_warnings.append(([key], MESSAGES['MISSING_VALUE']))
elif args[key] in ('', None):
validation_warnings.append(([key], MESSAGES['MISSING_VALUE']))
else:
excluded_keys.add(key)

Expand All @@ -1005,11 +987,30 @@ def validate(args, spec, spatial_overlap_opts=None):
# Extra args that don't exist in the MODEL_SPEC are okay
# we don't need to try to validate them
try:
parameter_spec = spec[key]
# Using deepcopy to make sure we don't modify the original spec
parameter_spec = copy.deepcopy(spec[key])
except KeyError:
LOGGER.debug(f'Provided key {key} does not exist in MODEL_SPEC')
continue

# rewrite parameter_spec for any nested, conditional validity
axis_keys = None
if parameter_spec['type'] == 'csv':
axis_keys = ['columns', 'rows']
elif parameter_spec['type'] == 'vector':
axis_keys = ['fields']

emlys marked this conversation as resolved.
Show resolved Hide resolved
if axis_keys:
for axis_key in axis_keys:
if axis_key not in parameter_spec:
continue
for nested_key, nested_spec in parameter_spec[axis_key].items():
if ('required' in nested_spec
and isinstance(nested_spec['required'], str)):
parameter_spec[axis_key][nested_key]['required'] = (
emlys marked this conversation as resolved.
Show resolved Hide resolved
_evaluate_expression(
nested_spec['required'], expression_values))

type_validation_func = _VALIDATION_FUNCS[parameter_spec['type']]

if type_validation_func is None:
Expand Down
35 changes: 16 additions & 19 deletions src/natcap/invest/wind_energy.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,27 +7,24 @@
import tempfile

import numpy
from scipy import integrate

import shapely.wkb
import shapely.wkt
import pygeoprocessing
import shapely.ops
import shapely.prepared
from shapely import speedups

import shapely.wkb
import shapely.wkt
import taskgraph
from osgeo import gdal
from osgeo import ogr
from osgeo import osr
from scipy import integrate
from shapely import speedups

import pygeoprocessing
import taskgraph
from . import utils
from . import gettext
from . import spec_utils
from .unit_registry import u
from . import utils
from . import validation
from .model_metadata import MODEL_METADATA
from . import gettext

from .unit_registry import u

LOGGER = logging.getLogger(__name__)
speedups.enable()
Expand Down Expand Up @@ -117,7 +114,7 @@
**spec_utils.AOI,
"projected": True,
"projection_units": u.meter,
"required": "valuation_container & grid_points_path",
"required": "valuation_container and grid_points_path",
"about": gettext(
"Map of the area(s) of interest over which to run the model "
"and aggregate valuation results. Required if Run Valuation "
Expand All @@ -134,7 +131,7 @@
"type": "vector",
"fields": {},
"geometries": {"POLYGON", "MULTIPOLYGON"},
"required": "min_distance | max_distance | valuation_container",
"required": "min_distance or max_distance or valuation_container",
"about": gettext(
"Map of the coastlines of landmasses in the area of interest. "
"Required if the Minimum Distance and Maximum Distance inputs "
Expand Down Expand Up @@ -353,7 +350,7 @@
"about": gettext("Longitude of the connection point.")
}
},
"required": "valuation_container & (not avg_grid_distance)",
"required": "valuation_container and (not avg_grid_distance)",
"about": gettext(
"Table of grid and land connection points to which cables "
"will connect. Required if Run Valuation is selected and "
Expand All @@ -364,7 +361,7 @@
"expression": "value > 0",
"type": "number",
"units": u.kilometer,
"required": "valuation_container & (not grid_points_path)",
"required": "valuation_container and (not grid_points_path)",
"about": gettext(
"Average distance to the onshore grid from coastal cable "
"landing points. Required if Run Valuation is selected and "
Expand Down Expand Up @@ -399,7 +396,7 @@
"about": gettext("Price of energy for each year.")
}
},
"required": "valuation_container & price_table",
"required": "valuation_container and price_table",
"about": gettext(
"Table of yearly prices for wind energy. There must be a row "
"for each year in the lifespan given in the 'time_period' "
Expand All @@ -410,7 +407,7 @@
"wind_price": {
"type": "number",
"units": u.currency/u.kilowatt_hour,
"required": "valuation_container & (not price_table)",
"required": "valuation_container and (not price_table)",
"about": gettext(
"The initial price of wind energy, at the first year in the "
"wind energy farm lifespan. Required if Run Valuation is "
Expand All @@ -419,7 +416,7 @@
},
"rate_change": {
"type": "ratio",
"required": "valuation_container & (not price_table)",
"required": "valuation_container and (not price_table)",
"about": gettext(
"The annual rate of change in the price of wind energy. "
"Required if Run Valuation is selected and Use Price Table "
Expand Down
13 changes: 13 additions & 0 deletions tests/test_urban_nature_access.py
Original file line number Diff line number Diff line change
Expand Up @@ -1035,3 +1035,16 @@ def test_validate(self):
args['search_radius_mode'] = (
urban_nature_access.RADIUS_OPT_URBAN_NATURE)
self.assertEqual(urban_nature_access.validate(args), [])

def test_validate_uniform_search_radius(self):
"""UNA: Search radius is required when using uniform search radii."""
from natcap.invest import urban_nature_access
from natcap.invest import validation

args = _build_model_args(self.workspace_dir)
args['search_radius_mode'] = urban_nature_access.RADIUS_OPT_UNIFORM
args['search_radius'] = ''

warnings = urban_nature_access.validate(args)
self.assertEqual(warnings, [(['search_radius'],
validation.MESSAGES['MISSING_VALUE'])])
Loading
Loading