From 845c49f2a1091d2ea6437740966d7577cce49ac8 Mon Sep 17 00:00:00 2001 From: James Douglass Date: Wed, 24 Jan 2024 11:55:38 -0800 Subject: [PATCH 01/30] Varname mangling for value-based conditional req. RE:#1503 --- src/natcap/invest/urban_nature_access.py | 54 ++++++++--------- src/natcap/invest/validation.py | 77 ++++++++++++++++++++++-- tests/test_urban_nature_access.py | 13 ++++ tests/test_validation.py | 67 ++++++++++++++++++--- 4 files changed, 172 insertions(+), 39 deletions(-) diff --git a/src/natcap/invest/urban_nature_access.py b/src/natcap/invest/urban_nature_access.py index f856da5dda..5ba306e3f5 100644 --- a/src/natcap/invest/urban_nature_access.py +++ b/src/natcap/invest/urban_nature_access.py @@ -92,7 +92,7 @@ 'type': 'number', 'units': u.meter, 'required': - f'search_radius_mode == "{RADIUS_OPT_URBAN_NATURE}"', + f'search_radius_mode.value == "{RADIUS_OPT_URBAN_NATURE}"', 'expression': 'value >= 0', 'about': ( 'The distance within which a LULC type is relevant ' @@ -125,7 +125,7 @@ "pop_[POP_GROUP]": { "type": "ratio", "required": ( - f"(search_radius_mode == '{RADIUS_OPT_POP_GROUP}') " + f"(search_radius_mode.value == '{RADIUS_OPT_POP_GROUP}') " "or aggregate_by_pop_group"), "about": gettext( "The proportion of the population within each " @@ -248,7 +248,7 @@ 'name': 'uniform search radius', 'units': u.m, 'expression': 'value > 0', - 'required': f'search_radius_mode == "{RADIUS_OPT_UNIFORM}"', + 'required': f'search_radius_mode.value == "{RADIUS_OPT_UNIFORM}"', 'about': gettext( 'The search radius to use when running the model under a ' 'uniform search radius. Required when running the model ' @@ -257,7 +257,7 @@ 'population_group_radii_table': { 'name': 'population group radii table', 'type': 'csv', - 'required': f'search_radius_mode == "{RADIUS_OPT_POP_GROUP}"', + 'required': f'search_radius_mode.value == "{RADIUS_OPT_POP_GROUP}"', 'index_col': 'pop_group', 'columns': { "pop_group": { @@ -273,7 +273,7 @@ 'type': 'number', 'units': u.meter, 'required': - f'search_radius_mode == "{RADIUS_OPT_POP_GROUP}"', + f'search_radius_mode.value == "{RADIUS_OPT_POP_GROUP}"', 'expression': 'value >= 0', 'about': gettext( "The search radius in meters to use " @@ -379,7 +379,7 @@ "group POP_GROUP within this administrative " "unit."), "created_if": ( - f"(search_radius_mode == '{RADIUS_OPT_POP_GROUP}') " + f"(search_radius_mode.value == '{RADIUS_OPT_POP_GROUP}') " "or aggregate_by_pop_group"), }, "Pund_adm_[POP_GROUP]": { @@ -391,7 +391,7 @@ "administrative unit that are undersupplied " "with urban nature."), "created_if": ( - f"(search_radius_mode == '{RADIUS_OPT_POP_GROUP}') " + f"(search_radius_mode.value == '{RADIUS_OPT_POP_GROUP}') " "or aggregate_by_pop_group"), }, "Povr_adm_[POP_GROUP]": { @@ -403,7 +403,7 @@ "administrative unit that is oversupplied " "with urban nature."), "created_if": ( - f"(search_radius_mode == '{RADIUS_OPT_POP_GROUP}') " + f"(search_radius_mode.value == '{RADIUS_OPT_POP_GROUP}') " "or aggregate_by_pop_group"), }, }, @@ -418,7 +418,7 @@ "to the stated urban nature demand."), "bands": {1: {"type": "number", "units": u.m**2/u.person}}, "created_if": - f"search_radius_mode == '{RADIUS_OPT_POP_GROUP}'", + f"search_radius_mode.value == '{RADIUS_OPT_POP_GROUP}'", }, # when RADIUS_OPT_UNIFORM @@ -428,7 +428,7 @@ "radius, weighted by the selected decay function."), "bands": {1: {"type": "number", "units": u.m**2}}, "created_if": - f"search_radius_mode == '{RADIUS_OPT_URBAN_NATURE}'", + f"search_radius_mode.value == '{RADIUS_OPT_URBAN_NATURE}'", }, # When RADIUS_OPT_URBAN_NATURE @@ -439,7 +439,7 @@ "by the selected decay function."), "bands": {1: {"type": "number", "units": u.m**2}}, "created_if": - f"search_radius_mode == '{RADIUS_OPT_URBAN_NATURE}'", + f"search_radius_mode.value == '{RADIUS_OPT_URBAN_NATURE}'", }, # When RADIUS_OPT_POP_GROUP @@ -450,7 +450,7 @@ "selected decay function."), "bands": {1: {"type": "number", "units": u.m**2}}, "created_if": - f"search_radius_mode == '{RADIUS_OPT_POP_GROUP}'", + f"search_radius_mode.value == '{RADIUS_OPT_POP_GROUP}'", }, }, }, @@ -492,8 +492,8 @@ "function."), "bands": {1: {'type': 'number', 'units': u.count}}, "created_if": ( - f"search_radius_mode == '{RADIUS_OPT_UNIFORM}' or " - f"search_radius_mode == '{RADIUS_OPT_URBAN_NATURE}'"), + f"search_radius_mode.value == '{RADIUS_OPT_UNIFORM}' or " + f"search_radius_mode.value == '{RADIUS_OPT_URBAN_NATURE}'"), }, "urban_nature_area.tif": { "about": gettext( @@ -501,15 +501,15 @@ "represented in each pixel."), "bands": {1: {"type": "number", "units": u.m**2}}, "created_if": - (f"search_radius_mode == '{RADIUS_OPT_UNIFORM}' or " - f"search_radius_mode == '{RADIUS_OPT_POP_GROUP}'"), + (f"search_radius_mode.value == '{RADIUS_OPT_UNIFORM}' or " + f"search_radius_mode.value == '{RADIUS_OPT_POP_GROUP}'"), }, "urban_nature_population_ratio.tif": { "about": gettext( "The calculated urban nature/population ratio."), "bands": {1: {"type": "number", "units": u.m**2/u.person}}, "created_if": - f"search_radius_mode == '{RADIUS_OPT_UNIFORM}'", + f"search_radius_mode.value == '{RADIUS_OPT_UNIFORM}'", }, # When RADIUS_OPT_URBAN_NATURE @@ -521,7 +521,7 @@ "land cover code LUCODE."), "bands": {1: {"type": "number", "units": u.m**2}}, "created_if": - f"search_radius_mode == '{RADIUS_OPT_URBAN_NATURE}'", + f"search_radius_mode.value == '{RADIUS_OPT_URBAN_NATURE}'", }, "urban_nature_supply_percapita_lucode_[LUCODE].tif": { "about": gettext( @@ -529,7 +529,7 @@ "land use land cover code LUCODE"), "bands": {1: {"type": "number", "units": u.m**2/u.person}}, "created_if": - f"search_radius_mode == '{RADIUS_OPT_URBAN_NATURE}'", + f"search_radius_mode.value == '{RADIUS_OPT_URBAN_NATURE}'", }, "urban_nature_population_ratio_lucode_[LUCODE].tif": { "about": gettext( @@ -538,7 +538,7 @@ "land cover code LUCODE."), "bands": {1: {"type": "number", "units": u.m**2/u.person}}, "created_if": - f"search_radius_mode == '{RADIUS_OPT_URBAN_NATURE}'", + f"search_radius_mode.value == '{RADIUS_OPT_URBAN_NATURE}'", }, # When RADIUS_OPT_POP_GROUP @@ -549,7 +549,7 @@ "POP_GROUP."), "bands": {1: {"type": "number", "units": u.count}}, "created_if": - f"search_radius_mode == '{RADIUS_OPT_POP_GROUP}'", + f"search_radius_mode.value == '{RADIUS_OPT_POP_GROUP}'", }, "proportion_of_population_in_[POP_GROUP].tif": { "about": gettext( @@ -558,7 +558,7 @@ "POP_GROUP."), "bands": {1: {"type": "number", "units": u.none}}, "created_if": - f"search_radius_mode == '{RADIUS_OPT_POP_GROUP}'", + f"search_radius_mode.value == '{RADIUS_OPT_POP_GROUP}'", }, "distance_weighted_population_in_[POP_GROUP].tif": { "about": gettext( @@ -568,7 +568,7 @@ "decay function."), "bands": {1: {"type": "number", "units": u.people}}, "created_if": - f"search_radius_mode == '{RADIUS_OPT_POP_GROUP}'", + f"search_radius_mode.value == '{RADIUS_OPT_POP_GROUP}'", }, "distance_weighted_population_all_groups.tif": { "about": gettext( @@ -576,7 +576,7 @@ "decay function."), "bands": {1: {"type": "number", "units": u.people}}, "created_if": - f"search_radius_mode == '{RADIUS_OPT_POP_GROUP}'", + f"search_radius_mode.value == '{RADIUS_OPT_POP_GROUP}'", }, "urban_nature_supply_percapita_to_[POP_GROUP].tif": { "about": gettext( @@ -584,7 +584,7 @@ "group POP_GROUP."), "bands": {1: {"type": "number", "units": u.m**2/u.person}}, "created_if": - f"search_radius_mode == '{RADIUS_OPT_POP_GROUP}'", + f"search_radius_mode.value == '{RADIUS_OPT_POP_GROUP}'", }, "undersupplied_population_[POP_GROUP].tif": { "about": gettext( @@ -592,7 +592,7 @@ "are experiencing an urban nature deficit."), "bands": {1: {"type": "number", "units": u.people}}, "created_if": - f"search_radius_mode == '{RADIUS_OPT_POP_GROUP}'", + f"search_radius_mode.value == '{RADIUS_OPT_POP_GROUP}'", }, "oversupplied_population_[POP_GROUP].tif": { "about": gettext( @@ -600,7 +600,7 @@ "are experiencing an urban nature surplus."), "bands": {1: {"type": "number", "units": u.people}}, "created_if": - f"search_radius_mode == '{RADIUS_OPT_POP_GROUP}'", + f"search_radius_mode.value == '{RADIUS_OPT_POP_GROUP}'", } } }, diff --git a/src/natcap/invest/validation.py b/src/natcap/invest/validation.py index 12278b91e8..8657f247e6 100644 --- a/src/natcap/invest/validation.py +++ b/src/natcap/invest/validation.py @@ -3,12 +3,15 @@ import functools import importlib import inspect +import io import logging import os import pprint import queue import re import threading +import token +import tokenize import warnings import numpy @@ -16,8 +19,8 @@ import pint import pygeoprocessing from osgeo import gdal -from osgeo import osr from osgeo import ogr +from osgeo import osr from . import gettext from . import spec_utils @@ -63,6 +66,45 @@ } +def _rewrite_name_dot_value(target_key, expression): + """Rewrite a ``name.value`` attribute as a single variable. + + This function uses python's ``tokenize`` library to tokenize the expression + before checking for the target key and presence of a ``value`` attribute. + This eliminates false-positives with similarly-named variables. + + Args: + target_key (string): The target symbol that we expect to have a + ``.value`` attribute. + expression (string): A string expression likely containing + ``{target_key}.value`` + + Returns: + A rewritten, valid python code string where ``{target_key}.value`` has + been rewritten as ``__{target_key}__value__``. + """ + tokens = [t for t in tokenize.generate_tokens( + io.StringIO(expression).readline)] + + replacement_name = f"__{target_key}__value__" + + output_tokens = [] + index = 0 + while index < (len(tokens) - 2): + if all([tokens[index].string == target_key, + tokens[index+1].string == '.', + tokens[index+2].string == 'value']): + # Only the type and string value are required for untokenization + output_tokens.append((token.NAME, replacement_name)) + index += 3 # skip the "." and "value" tokens. + else: + # We can just use the existing token if we're keeping it + output_tokens.append(tokens[index]) + index += 1 + + return tokenize.untokenize(output_tokens) + + def _evaluate_expression(expression, variable_map): """Evaluate a python expression. @@ -670,8 +712,6 @@ def get_validated_dataframe(csv_path, columns=None, rows=None, index_col=None, return df - - def check_csv(filepath, **kwargs): """Validate a table. @@ -980,9 +1020,36 @@ def validate(args, spec, spatial_overlap_opts=None): for key in conditionally_required_keys: # An input is conditionally required when the expression given # evaluates to True. + # We handle 2 cases of how the expression is written: + # * Case 1: a logical expression of boolean operations on another + # parameter. Example: "not " + # In this case, the symbol is interpreted as a bool. + # * Case 2: a logical expression comparing the value of the + # parameter against another known value. + # Example: ".value == 'uniform radius'" + expression_values = {} + expression_with_value = spec[key]['required'][:] # make a copy + for sufficient_key, is_sufficient in sufficient_inputs.items(): + try: + args_value = args[sufficient_key] + except KeyError: + # Handle the case where a sufficient key (e.g. optional) is + # missing from args. + args_value = None + + # If the expression contains a {key}.value pattern, rewrite the + # name to avoid dot-notation. + # Because sufficiency is a bool and bools are singletons, we cannot + # actually assign a .value attribute on a bool. + value_symbol = f'__{sufficient_key}__value__' + expression_with_value = _rewrite_name_dot_value( + sufficient_key, expression_with_value) + expression_values[value_symbol] = args_value + expression_values[sufficient_key] = is_sufficient + is_conditionally_required = _evaluate_expression( - expression=spec[key]['required'], - variable_map=sufficient_inputs) + expression=expression_with_value, + variable_map=expression_values) if is_conditionally_required: if key not in args: validation_warnings.append(([key], MESSAGES['MISSING_KEY'])) diff --git a/tests/test_urban_nature_access.py b/tests/test_urban_nature_access.py index 4de4c58b9c..34e8993dc4 100644 --- a/tests/test_urban_nature_access.py +++ b/tests/test_urban_nature_access.py @@ -1030,3 +1030,16 @@ def test_validate(self): args['search_radius_mode'] = ( urban_nature_access.RADIUS_OPT_URBAN_NATURE) self.assertEqual(urban_nature_access.validate(args), []) + + def test_validate_uniform_search_radius(self): + """UNA: Search radius is required when using uniform search radii.""" + from natcap.invest import urban_nature_access + from natcap.invest import validation + + args = _build_model_args(self.workspace_dir) + args['search_radius_mode'] = urban_nature_access.RADIUS_OPT_UNIFORM + args['search_radius'] = '' + + warnings = urban_nature_access.validate(args) + self.assertEqual(warnings, [(['search_radius'], + validation.MESSAGES['MISSING_VALUE'])]) diff --git a/tests/test_validation.py b/tests/test_validation.py index 806da695ee..c8a8247194 100644 --- a/tests/test_validation.py +++ b/tests/test_validation.py @@ -1,5 +1,6 @@ """Testing module for validation.""" import codecs +import collections import functools import os import platform @@ -9,12 +10,14 @@ import textwrap import time import unittest -from unittest.mock import Mock import warnings +from unittest.mock import Mock import numpy -from osgeo import gdal, osr, ogr import pandas +from osgeo import gdal +from osgeo import ogr +from osgeo import osr class SpatialOverlapTest(unittest.TestCase): @@ -30,8 +33,8 @@ def tearDown(self): def test_no_overlap(self): """Validation: verify lack of overlap.""" - from natcap.invest import validation import pygeoprocessing + from natcap.invest import validation driver = gdal.GetDriverByName('GTiff') filepath_1 = os.path.join(self.workspace_dir, 'raster_1.tif') @@ -226,7 +229,8 @@ def validate(args, limit_to=None): def test_n_workers(self): """Validation: validation error returned on invalid n_workers.""" - from natcap.invest import spec_utils, validation + from natcap.invest import spec_utils + from natcap.invest import validation args_spec = { 'n_workers': spec_utils.N_WORKERS, @@ -427,7 +431,8 @@ def test_raster_not_projected(self): def test_raster_incorrect_units(self): """Validation: test when a raster projection has wrong units.""" - from natcap.invest import spec_utils, validation + from natcap.invest import spec_utils + from natcap.invest import validation # Use EPSG:32066 # NAD27 / BLM 16N (in US Survey Feet) driver = gdal.GetDriverByName('GTiff') @@ -508,7 +513,8 @@ def test_missing_fieldnames(self): def test_vector_projected_in_m(self): """Validation: test that a vector's projection has expected units.""" - from natcap.invest import spec_utils, validation + from natcap.invest import spec_utils + from natcap.invest import validation driver = gdal.GetDriverByName('GPKG') filepath = os.path.join(self.workspace_dir, 'vector.gpkg') @@ -533,7 +539,8 @@ def test_vector_projected_in_m(self): def test_wrong_geom_type(self): """Validation: checks that the vector's geometry type is correct.""" - from natcap.invest import spec_utils, validation + from natcap.invest import spec_utils + from natcap.invest import validation driver = gdal.GetDriverByName('GPKG') filepath = os.path.join(self.workspace_dir, 'vector.gpkg') vector = driver.Create(filepath, 0, 0, 0, gdal.GDT_Unknown) @@ -1966,3 +1973,49 @@ def test_get_headers_to_validate(self): patterns = validation.get_headers_to_validate(spec) # should only get the patterns that are static and always required self.assertEqual(sorted(patterns), ['a']) + + +class TestExpressionNameRewrite(unittest.TestCase): + def test_rewrite(self): + from natcap.invest import validation + + target_key = "search_radius_mode" + expression = ( + 'search_radius_mode.value == "uniform radius" ' + 'and not my_search_radius_mode') + result = validation._rewrite_name_dot_value(target_key, expression) + + # The spacing is a little weird, but it should still evaluate. + self.assertEqual(result, ( + '__search_radius_mode__value__ =="uniform radius"' + 'and not my_search_radius_mode ')) + + # Make sure we can still evaluate the result if we simulate some + # objects for the local references. + eval_result = eval(result, __builtins__, { + "search_radius_mode": True, + "__search_radius_mode__value__": 1}) + self.assertEqual(eval_result, False) + + def test_rewrite_at_end_of_expression(self): + from natcap.invest import validation + + target_key = "search_radius_mode" + expression = ( + 'my_search_radius_mode.value == "uniform radius" ' + 'and not search_radius_mode.value') + result = validation._rewrite_name_dot_value(target_key, expression) + + # The spacing is a little weird, but it should still evaluate. + self.assertEqual(result, ( + 'my_search_radius_mode.value == "uniform radius" ' + 'and not__search_radius_mode__value__ ')) + + # Make sure we can still evaluate the result if we simulate some + # objects for the local references. + mode_obj_tpl = collections.namedtuple('mode_obj', ['value']) + mode_obj = mode_obj_tpl('foo') + eval_result = eval(result, __builtins__, { + "my_search_radius_mode": mode_obj, + "__search_radius_mode__value__": 1}) + self.assertEqual(eval_result, False) From 969ef23aafec330d307aa6869f4ac38690da93ae Mon Sep 17 00:00:00 2001 From: James Douglass Date: Wed, 24 Jan 2024 11:57:25 -0800 Subject: [PATCH 02/30] Forgot a few docstrings. RE:#1503 --- tests/test_validation.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/tests/test_validation.py b/tests/test_validation.py index c8a8247194..ab7ff83115 100644 --- a/tests/test_validation.py +++ b/tests/test_validation.py @@ -1977,6 +1977,7 @@ def test_get_headers_to_validate(self): class TestExpressionNameRewrite(unittest.TestCase): def test_rewrite(self): + """Validation: test expression name rewriting.""" from natcap.invest import validation target_key = "search_radius_mode" @@ -1998,6 +1999,7 @@ def test_rewrite(self): self.assertEqual(eval_result, False) def test_rewrite_at_end_of_expression(self): + """Validation: Test varname rewriting at the end of an expression.""" from natcap.invest import validation target_key = "search_radius_mode" From 6096ad6889c03560ed57fbc58c11ea5aad04fd34 Mon Sep 17 00:00:00 2001 From: James Douglass Date: Wed, 24 Jan 2024 12:01:32 -0800 Subject: [PATCH 03/30] Noting change in HISTORY. RE:#1509 --- HISTORY.rst | 3 +++ 1 file changed, 3 insertions(+) diff --git a/HISTORY.rst b/HISTORY.rst index 7a0341fc57..f22e85eef4 100644 --- a/HISTORY.rst +++ b/HISTORY.rst @@ -45,6 +45,9 @@ Unreleased Changes * Fixed an issue where Urban Nature Access would crash if an administrative boundary geometry did not overlap any people in the population raster. https://github.com/natcap/invest/issues/1503 + * Fixed an issue where validation was failing to catch missing values in + the uniform search radius args key when using uniform search radii. + https://github.com/natcap/invest/issues/1509 3.14.1 (2023-12-18) ------------------- From 66f149061d44105846303f4904f2018156fa225e Mon Sep 17 00:00:00 2001 From: James Douglass Date: Tue, 13 Feb 2024 17:06:29 -0800 Subject: [PATCH 04/30] Reworking sufficiency for clarity. RE:#1509 --- src/natcap/invest/validation.py | 46 +++++++++++---------------------- 1 file changed, 15 insertions(+), 31 deletions(-) diff --git a/src/natcap/invest/validation.py b/src/natcap/invest/validation.py index 8657f247e6..80ce232908 100644 --- a/src/natcap/invest/validation.py +++ b/src/natcap/invest/validation.py @@ -970,43 +970,27 @@ def validate(args, spec, spatial_overlap_opts=None): conditionally_required_keys.add(key) if missing_keys: - validation_warnings.append((sorted(missing_keys), MESSAGES['MISSING_KEY'])) + validation_warnings.append( + (sorted(missing_keys), MESSAGES['MISSING_KEY'])) if keys_with_no_value: - validation_warnings.append((sorted(keys_with_no_value), MESSAGES['MISSING_VALUE'])) + validation_warnings.append( + (sorted(keys_with_no_value), MESSAGES['MISSING_VALUE'])) # step 2: evaluate sufficiency of keys/inputs # Sufficiency: An input is sufficient when its key is present in args and - # it has a value. A sufficient input need not be valid. Sufficiency is - # used by the conditional requirement phase (step 3 in this function) to - # determine whether a conditionally required input is required. - # The only special case about sufficiency is with boolean values. - # A boolean value absent from args is insufficient. A boolean input that - # is present in args but False is in sufficient. A boolean input that is - # present in args and True is sufficient. + # has a truthy value. If the input is missing from args or is falsy, it is + # insufficient. insufficient_keys = missing_keys.union(keys_with_no_value) - sufficient_inputs = {} - for key, parameter_spec in spec.items(): - # If the key isn't present, no need to validate. - # If it's required and isn't present, we wouldn't have gotten to this - # point in the function. - if key not in args: - sufficient_inputs[key] = False - insufficient_keys.add(key) - continue - - # If the value is empty and it isn't required, then we don't need to - # validate it. - if args[key] in ('', None): - sufficient_inputs[key] = False - insufficient_keys.add(key) - continue - - # Boolean values are special in that their T/F state is equivalent - # to their satisfaction. If a checkbox is checked, it is - # considered satisfied. - if spec[key]['type'] == 'boolean': - sufficient_inputs[key] = args[key] + sufficient_inputs = { + key: bool(args.get(key, False)) for key in spec.keys()} + insufficient_keys = insufficient_keys.union(set( + key for key in sufficient_inputs if not sufficient_inputs[key])) + + # Step 3: evaluate whether conditionally required keys are required. + # We also want to keep track of keys that are not required due to their + # conditional requirement expression evaluating to False. + excluded_keys = set() # Any other input type must be sufficient because it is in args and # has a value. From 37a03389bc5785430bb7862a17e26d5376c9e566 Mon Sep 17 00:00:00 2001 From: James Douglass Date: Tue, 13 Feb 2024 17:08:52 -0800 Subject: [PATCH 05/30] Reworking comprehensions into standard loop for readability. RE:#1509 --- src/natcap/invest/validation.py | 13 +++++++++---- 1 file changed, 9 insertions(+), 4 deletions(-) diff --git a/src/natcap/invest/validation.py b/src/natcap/invest/validation.py index 80ce232908..52389c0279 100644 --- a/src/natcap/invest/validation.py +++ b/src/natcap/invest/validation.py @@ -982,10 +982,15 @@ def validate(args, spec, spatial_overlap_opts=None): # has a truthy value. If the input is missing from args or is falsy, it is # insufficient. insufficient_keys = missing_keys.union(keys_with_no_value) - sufficient_inputs = { - key: bool(args.get(key, False)) for key in spec.keys()} - insufficient_keys = insufficient_keys.union(set( - key for key in sufficient_inputs if not sufficient_inputs[key])) + sufficient_inputs = {} + insufficient_keys = set() + for key in spec.keys(): + try: + sufficient_inputs[key] = bool(args[key]) + except KeyError: + # input is insufficient if it's missing from args + sufficient_inputs[key] = False + insufficient_keys.add(key) # Step 3: evaluate whether conditionally required keys are required. # We also want to keep track of keys that are not required due to their From e1bef802ec78b3534107f5bfbd3aa26c632a732a Mon Sep 17 00:00:00 2001 From: James Douglass Date: Tue, 13 Feb 2024 17:31:42 -0800 Subject: [PATCH 06/30] Reworking conditional requirement. Conditional requirement now evaluates the truthiness of the expression based on the value of the parameter. If the parameter is missing, that parameter's value is Falsy. RE:#1509 --- src/natcap/invest/validation.py | 54 +++++++-------------------------- 1 file changed, 11 insertions(+), 43 deletions(-) diff --git a/src/natcap/invest/validation.py b/src/natcap/invest/validation.py index 52389c0279..005636fe6e 100644 --- a/src/natcap/invest/validation.py +++ b/src/natcap/invest/validation.py @@ -988,63 +988,31 @@ def validate(args, spec, spatial_overlap_opts=None): try: sufficient_inputs[key] = bool(args[key]) except KeyError: - # input is insufficient if it's missing from args + # input is definitely insufficient if it's missing from args sufficient_inputs[key] = False + + if sufficient_inputs[key] is False: insufficient_keys.add(key) # Step 3: evaluate whether conditionally required keys are required. # We also want to keep track of keys that are not required due to their # conditional requirement expression evaluating to False. - excluded_keys = set() - - # Any other input type must be sufficient because it is in args and - # has a value. - else: - sufficient_inputs[key] = True - - # step 3: evaluate required status of conditionally required keys - # keep track of keys that are explicity not required due to - # their condition being false excluded_keys = set() for key in conditionally_required_keys: - # An input is conditionally required when the expression given - # evaluates to True. - # We handle 2 cases of how the expression is written: - # * Case 1: a logical expression of boolean operations on another - # parameter. Example: "not " - # In this case, the symbol is interpreted as a bool. - # * Case 2: a logical expression comparing the value of the - # parameter against another known value. - # Example: ".value == 'uniform radius'" - expression_values = {} - expression_with_value = spec[key]['required'][:] # make a copy - for sufficient_key, is_sufficient in sufficient_inputs.items(): - try: - args_value = args[sufficient_key] - except KeyError: - # Handle the case where a sufficient key (e.g. optional) is - # missing from args. - args_value = None - - # If the expression contains a {key}.value pattern, rewrite the - # name to avoid dot-notation. - # Because sufficiency is a bool and bools are singletons, we cannot - # actually assign a .value attribute on a bool. - value_symbol = f'__{sufficient_key}__value__' - expression_with_value = _rewrite_name_dot_value( - sufficient_key, expression_with_value) - expression_values[value_symbol] = args_value - expression_values[sufficient_key] = is_sufficient + # An input is conditionally required when the expression given is + # truthy. + expression_values = { + input_key: args.get(input_key, False) for input_key in spec.keys() + if input_key in sufficient_inputs} is_conditionally_required = _evaluate_expression( - expression=expression_with_value, + expression=f'bool({spec[key]["required"]})', variable_map=expression_values) if is_conditionally_required: if key not in args: validation_warnings.append(([key], MESSAGES['MISSING_KEY'])) - else: - if args[key] in ('', None): - validation_warnings.append(([key], MESSAGES['MISSING_VALUE'])) + elif args[key] in ('', None): + validation_warnings.append(([key], MESSAGES['MISSING_VALUE'])) else: excluded_keys.add(key) From 658748d4f2a652475d90d8510572f0511fbbf59b Mon Sep 17 00:00:00 2001 From: James Douglass Date: Tue, 13 Feb 2024 17:33:13 -0800 Subject: [PATCH 07/30] Removing .value in UNA conditional requirement. The latest update to using truthiness in conditional requirement expressions handles all of these cases. RE:#1509 --- src/natcap/invest/urban_nature_access.py | 54 ++++++++++++------------ 1 file changed, 27 insertions(+), 27 deletions(-) diff --git a/src/natcap/invest/urban_nature_access.py b/src/natcap/invest/urban_nature_access.py index 5ba306e3f5..f856da5dda 100644 --- a/src/natcap/invest/urban_nature_access.py +++ b/src/natcap/invest/urban_nature_access.py @@ -92,7 +92,7 @@ 'type': 'number', 'units': u.meter, 'required': - f'search_radius_mode.value == "{RADIUS_OPT_URBAN_NATURE}"', + f'search_radius_mode == "{RADIUS_OPT_URBAN_NATURE}"', 'expression': 'value >= 0', 'about': ( 'The distance within which a LULC type is relevant ' @@ -125,7 +125,7 @@ "pop_[POP_GROUP]": { "type": "ratio", "required": ( - f"(search_radius_mode.value == '{RADIUS_OPT_POP_GROUP}') " + f"(search_radius_mode == '{RADIUS_OPT_POP_GROUP}') " "or aggregate_by_pop_group"), "about": gettext( "The proportion of the population within each " @@ -248,7 +248,7 @@ 'name': 'uniform search radius', 'units': u.m, 'expression': 'value > 0', - 'required': f'search_radius_mode.value == "{RADIUS_OPT_UNIFORM}"', + 'required': f'search_radius_mode == "{RADIUS_OPT_UNIFORM}"', 'about': gettext( 'The search radius to use when running the model under a ' 'uniform search radius. Required when running the model ' @@ -257,7 +257,7 @@ 'population_group_radii_table': { 'name': 'population group radii table', 'type': 'csv', - 'required': f'search_radius_mode.value == "{RADIUS_OPT_POP_GROUP}"', + 'required': f'search_radius_mode == "{RADIUS_OPT_POP_GROUP}"', 'index_col': 'pop_group', 'columns': { "pop_group": { @@ -273,7 +273,7 @@ 'type': 'number', 'units': u.meter, 'required': - f'search_radius_mode.value == "{RADIUS_OPT_POP_GROUP}"', + f'search_radius_mode == "{RADIUS_OPT_POP_GROUP}"', 'expression': 'value >= 0', 'about': gettext( "The search radius in meters to use " @@ -379,7 +379,7 @@ "group POP_GROUP within this administrative " "unit."), "created_if": ( - f"(search_radius_mode.value == '{RADIUS_OPT_POP_GROUP}') " + f"(search_radius_mode == '{RADIUS_OPT_POP_GROUP}') " "or aggregate_by_pop_group"), }, "Pund_adm_[POP_GROUP]": { @@ -391,7 +391,7 @@ "administrative unit that are undersupplied " "with urban nature."), "created_if": ( - f"(search_radius_mode.value == '{RADIUS_OPT_POP_GROUP}') " + f"(search_radius_mode == '{RADIUS_OPT_POP_GROUP}') " "or aggregate_by_pop_group"), }, "Povr_adm_[POP_GROUP]": { @@ -403,7 +403,7 @@ "administrative unit that is oversupplied " "with urban nature."), "created_if": ( - f"(search_radius_mode.value == '{RADIUS_OPT_POP_GROUP}') " + f"(search_radius_mode == '{RADIUS_OPT_POP_GROUP}') " "or aggregate_by_pop_group"), }, }, @@ -418,7 +418,7 @@ "to the stated urban nature demand."), "bands": {1: {"type": "number", "units": u.m**2/u.person}}, "created_if": - f"search_radius_mode.value == '{RADIUS_OPT_POP_GROUP}'", + f"search_radius_mode == '{RADIUS_OPT_POP_GROUP}'", }, # when RADIUS_OPT_UNIFORM @@ -428,7 +428,7 @@ "radius, weighted by the selected decay function."), "bands": {1: {"type": "number", "units": u.m**2}}, "created_if": - f"search_radius_mode.value == '{RADIUS_OPT_URBAN_NATURE}'", + f"search_radius_mode == '{RADIUS_OPT_URBAN_NATURE}'", }, # When RADIUS_OPT_URBAN_NATURE @@ -439,7 +439,7 @@ "by the selected decay function."), "bands": {1: {"type": "number", "units": u.m**2}}, "created_if": - f"search_radius_mode.value == '{RADIUS_OPT_URBAN_NATURE}'", + f"search_radius_mode == '{RADIUS_OPT_URBAN_NATURE}'", }, # When RADIUS_OPT_POP_GROUP @@ -450,7 +450,7 @@ "selected decay function."), "bands": {1: {"type": "number", "units": u.m**2}}, "created_if": - f"search_radius_mode.value == '{RADIUS_OPT_POP_GROUP}'", + f"search_radius_mode == '{RADIUS_OPT_POP_GROUP}'", }, }, }, @@ -492,8 +492,8 @@ "function."), "bands": {1: {'type': 'number', 'units': u.count}}, "created_if": ( - f"search_radius_mode.value == '{RADIUS_OPT_UNIFORM}' or " - f"search_radius_mode.value == '{RADIUS_OPT_URBAN_NATURE}'"), + f"search_radius_mode == '{RADIUS_OPT_UNIFORM}' or " + f"search_radius_mode == '{RADIUS_OPT_URBAN_NATURE}'"), }, "urban_nature_area.tif": { "about": gettext( @@ -501,15 +501,15 @@ "represented in each pixel."), "bands": {1: {"type": "number", "units": u.m**2}}, "created_if": - (f"search_radius_mode.value == '{RADIUS_OPT_UNIFORM}' or " - f"search_radius_mode.value == '{RADIUS_OPT_POP_GROUP}'"), + (f"search_radius_mode == '{RADIUS_OPT_UNIFORM}' or " + f"search_radius_mode == '{RADIUS_OPT_POP_GROUP}'"), }, "urban_nature_population_ratio.tif": { "about": gettext( "The calculated urban nature/population ratio."), "bands": {1: {"type": "number", "units": u.m**2/u.person}}, "created_if": - f"search_radius_mode.value == '{RADIUS_OPT_UNIFORM}'", + f"search_radius_mode == '{RADIUS_OPT_UNIFORM}'", }, # When RADIUS_OPT_URBAN_NATURE @@ -521,7 +521,7 @@ "land cover code LUCODE."), "bands": {1: {"type": "number", "units": u.m**2}}, "created_if": - f"search_radius_mode.value == '{RADIUS_OPT_URBAN_NATURE}'", + f"search_radius_mode == '{RADIUS_OPT_URBAN_NATURE}'", }, "urban_nature_supply_percapita_lucode_[LUCODE].tif": { "about": gettext( @@ -529,7 +529,7 @@ "land use land cover code LUCODE"), "bands": {1: {"type": "number", "units": u.m**2/u.person}}, "created_if": - f"search_radius_mode.value == '{RADIUS_OPT_URBAN_NATURE}'", + f"search_radius_mode == '{RADIUS_OPT_URBAN_NATURE}'", }, "urban_nature_population_ratio_lucode_[LUCODE].tif": { "about": gettext( @@ -538,7 +538,7 @@ "land cover code LUCODE."), "bands": {1: {"type": "number", "units": u.m**2/u.person}}, "created_if": - f"search_radius_mode.value == '{RADIUS_OPT_URBAN_NATURE}'", + f"search_radius_mode == '{RADIUS_OPT_URBAN_NATURE}'", }, # When RADIUS_OPT_POP_GROUP @@ -549,7 +549,7 @@ "POP_GROUP."), "bands": {1: {"type": "number", "units": u.count}}, "created_if": - f"search_radius_mode.value == '{RADIUS_OPT_POP_GROUP}'", + f"search_radius_mode == '{RADIUS_OPT_POP_GROUP}'", }, "proportion_of_population_in_[POP_GROUP].tif": { "about": gettext( @@ -558,7 +558,7 @@ "POP_GROUP."), "bands": {1: {"type": "number", "units": u.none}}, "created_if": - f"search_radius_mode.value == '{RADIUS_OPT_POP_GROUP}'", + f"search_radius_mode == '{RADIUS_OPT_POP_GROUP}'", }, "distance_weighted_population_in_[POP_GROUP].tif": { "about": gettext( @@ -568,7 +568,7 @@ "decay function."), "bands": {1: {"type": "number", "units": u.people}}, "created_if": - f"search_radius_mode.value == '{RADIUS_OPT_POP_GROUP}'", + f"search_radius_mode == '{RADIUS_OPT_POP_GROUP}'", }, "distance_weighted_population_all_groups.tif": { "about": gettext( @@ -576,7 +576,7 @@ "decay function."), "bands": {1: {"type": "number", "units": u.people}}, "created_if": - f"search_radius_mode.value == '{RADIUS_OPT_POP_GROUP}'", + f"search_radius_mode == '{RADIUS_OPT_POP_GROUP}'", }, "urban_nature_supply_percapita_to_[POP_GROUP].tif": { "about": gettext( @@ -584,7 +584,7 @@ "group POP_GROUP."), "bands": {1: {"type": "number", "units": u.m**2/u.person}}, "created_if": - f"search_radius_mode.value == '{RADIUS_OPT_POP_GROUP}'", + f"search_radius_mode == '{RADIUS_OPT_POP_GROUP}'", }, "undersupplied_population_[POP_GROUP].tif": { "about": gettext( @@ -592,7 +592,7 @@ "are experiencing an urban nature deficit."), "bands": {1: {"type": "number", "units": u.people}}, "created_if": - f"search_radius_mode.value == '{RADIUS_OPT_POP_GROUP}'", + f"search_radius_mode == '{RADIUS_OPT_POP_GROUP}'", }, "oversupplied_population_[POP_GROUP].tif": { "about": gettext( @@ -600,7 +600,7 @@ "are experiencing an urban nature surplus."), "bands": {1: {"type": "number", "units": u.people}}, "created_if": - f"search_radius_mode.value == '{RADIUS_OPT_POP_GROUP}'", + f"search_radius_mode == '{RADIUS_OPT_POP_GROUP}'", } } }, From 0ce33037a4b1360567f5c6470b659ab6403b57f3 Mon Sep 17 00:00:00 2001 From: James Douglass Date: Tue, 13 Feb 2024 17:34:54 -0800 Subject: [PATCH 08/30] Removing the variable name rewriting. This ended up not being needed if we just work directly with truthiness. RE:#1509 --- src/natcap/invest/validation.py | 39 --------------------------- tests/test_validation.py | 48 --------------------------------- 2 files changed, 87 deletions(-) diff --git a/src/natcap/invest/validation.py b/src/natcap/invest/validation.py index 005636fe6e..9899e781d4 100644 --- a/src/natcap/invest/validation.py +++ b/src/natcap/invest/validation.py @@ -66,45 +66,6 @@ } -def _rewrite_name_dot_value(target_key, expression): - """Rewrite a ``name.value`` attribute as a single variable. - - This function uses python's ``tokenize`` library to tokenize the expression - before checking for the target key and presence of a ``value`` attribute. - This eliminates false-positives with similarly-named variables. - - Args: - target_key (string): The target symbol that we expect to have a - ``.value`` attribute. - expression (string): A string expression likely containing - ``{target_key}.value`` - - Returns: - A rewritten, valid python code string where ``{target_key}.value`` has - been rewritten as ``__{target_key}__value__``. - """ - tokens = [t for t in tokenize.generate_tokens( - io.StringIO(expression).readline)] - - replacement_name = f"__{target_key}__value__" - - output_tokens = [] - index = 0 - while index < (len(tokens) - 2): - if all([tokens[index].string == target_key, - tokens[index+1].string == '.', - tokens[index+2].string == 'value']): - # Only the type and string value are required for untokenization - output_tokens.append((token.NAME, replacement_name)) - index += 3 # skip the "." and "value" tokens. - else: - # We can just use the existing token if we're keeping it - output_tokens.append(tokens[index]) - index += 1 - - return tokenize.untokenize(output_tokens) - - def _evaluate_expression(expression, variable_map): """Evaluate a python expression. diff --git a/tests/test_validation.py b/tests/test_validation.py index ab7ff83115..432f69c617 100644 --- a/tests/test_validation.py +++ b/tests/test_validation.py @@ -1973,51 +1973,3 @@ def test_get_headers_to_validate(self): patterns = validation.get_headers_to_validate(spec) # should only get the patterns that are static and always required self.assertEqual(sorted(patterns), ['a']) - - -class TestExpressionNameRewrite(unittest.TestCase): - def test_rewrite(self): - """Validation: test expression name rewriting.""" - from natcap.invest import validation - - target_key = "search_radius_mode" - expression = ( - 'search_radius_mode.value == "uniform radius" ' - 'and not my_search_radius_mode') - result = validation._rewrite_name_dot_value(target_key, expression) - - # The spacing is a little weird, but it should still evaluate. - self.assertEqual(result, ( - '__search_radius_mode__value__ =="uniform radius"' - 'and not my_search_radius_mode ')) - - # Make sure we can still evaluate the result if we simulate some - # objects for the local references. - eval_result = eval(result, __builtins__, { - "search_radius_mode": True, - "__search_radius_mode__value__": 1}) - self.assertEqual(eval_result, False) - - def test_rewrite_at_end_of_expression(self): - """Validation: Test varname rewriting at the end of an expression.""" - from natcap.invest import validation - - target_key = "search_radius_mode" - expression = ( - 'my_search_radius_mode.value == "uniform radius" ' - 'and not search_radius_mode.value') - result = validation._rewrite_name_dot_value(target_key, expression) - - # The spacing is a little weird, but it should still evaluate. - self.assertEqual(result, ( - 'my_search_radius_mode.value == "uniform radius" ' - 'and not__search_radius_mode__value__ ')) - - # Make sure we can still evaluate the result if we simulate some - # objects for the local references. - mode_obj_tpl = collections.namedtuple('mode_obj', ['value']) - mode_obj = mode_obj_tpl('foo') - eval_result = eval(result, __builtins__, { - "my_search_radius_mode": mode_obj, - "__search_radius_mode__value__": 1}) - self.assertEqual(eval_result, False) From 041f41100d2ced78d94a6f211b1dfa73cc4fbf67 Mon Sep 17 00:00:00 2001 From: James Douglass Date: Tue, 13 Feb 2024 18:00:45 -0800 Subject: [PATCH 09/30] Slight refactor for efficiency. RE:#1509 --- src/natcap/invest/validation.py | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/src/natcap/invest/validation.py b/src/natcap/invest/validation.py index 9899e781d4..e39d31b9b2 100644 --- a/src/natcap/invest/validation.py +++ b/src/natcap/invest/validation.py @@ -958,14 +958,14 @@ def validate(args, spec, spatial_overlap_opts=None): # Step 3: evaluate whether conditionally required keys are required. # We also want to keep track of keys that are not required due to their # conditional requirement expression evaluating to False. + # + # An input is conditionally required when the expression given is + # truthy. + expression_values = { + input_key: args.get(input_key, False) for input_key in spec.keys() + if input_key in sufficient_inputs} excluded_keys = set() for key in conditionally_required_keys: - # An input is conditionally required when the expression given is - # truthy. - expression_values = { - input_key: args.get(input_key, False) for input_key in spec.keys() - if input_key in sufficient_inputs} - is_conditionally_required = _evaluate_expression( expression=f'bool({spec[key]["required"]})', variable_map=expression_values) From 4a1f548e19c15bccd042f1dbc8534adf8822c08d Mon Sep 17 00:00:00 2001 From: James Douglass Date: Tue, 13 Feb 2024 18:01:12 -0800 Subject: [PATCH 10/30] First stab at rewriting nested cond. requirement. RE:#1509 --- src/natcap/invest/validation.py | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) diff --git a/src/natcap/invest/validation.py b/src/natcap/invest/validation.py index e39d31b9b2..b657f47e6e 100644 --- a/src/natcap/invest/validation.py +++ b/src/natcap/invest/validation.py @@ -995,6 +995,22 @@ def validate(args, spec, spatial_overlap_opts=None): LOGGER.debug(f'Provided key {key} does not exist in MODEL_SPEC') continue + # rewrite parameter_spec for any nested, conditional validity + rewrite_keys = None + if parameter_spec['type'] == 'csv': + rewrite_keys = ['columns', 'rows'] + elif parameter_spec['type'] == 'vector': + rewrite_keys = ['fields'] + + if rewrite_keys: + for nested_key in rewrite_keys: + for nested_key, nested_spec in parameter_spec[nested_key].items(): + if ('required' in nested_spec + and isinstance(nested_spec['required'], str)): + parameter_spec[nested_key][nested_key]['required'] = ( + _evaluate_expression( + nested_spec['required'], expression_values)) + type_validation_func = _VALIDATION_FUNCS[parameter_spec['type']] if type_validation_func is None: From 416240d2fbc600faeaba8d21253456c12db49439 Mon Sep 17 00:00:00 2001 From: James Douglass Date: Tue, 13 Feb 2024 18:02:05 -0800 Subject: [PATCH 11/30] Removing unnecessary conditional requirement - table already required. RE:#1509 --- src/natcap/invest/urban_nature_access.py | 2 -- 1 file changed, 2 deletions(-) diff --git a/src/natcap/invest/urban_nature_access.py b/src/natcap/invest/urban_nature_access.py index f856da5dda..6b9c719518 100644 --- a/src/natcap/invest/urban_nature_access.py +++ b/src/natcap/invest/urban_nature_access.py @@ -272,8 +272,6 @@ 'search_radius_m': { 'type': 'number', 'units': u.meter, - 'required': - f'search_radius_mode == "{RADIUS_OPT_POP_GROUP}"', 'expression': 'value >= 0', 'about': gettext( "The search radius in meters to use " From 21ae820b2477c3845eba1a670fd77f4cf9753cb4 Mon Sep 17 00:00:00 2001 From: James Douglass Date: Tue, 13 Feb 2024 18:12:26 -0800 Subject: [PATCH 12/30] Clarifying varnames, copying spec to avoid modifying by reference, noting tests to write. RE:#1509 --- src/natcap/invest/validation.py | 23 +++++++++++++---------- tests/test_validation.py | 3 +++ 2 files changed, 16 insertions(+), 10 deletions(-) diff --git a/src/natcap/invest/validation.py b/src/natcap/invest/validation.py index b657f47e6e..800b294e51 100644 --- a/src/natcap/invest/validation.py +++ b/src/natcap/invest/validation.py @@ -1,5 +1,6 @@ """Common validation utilities for InVEST models.""" import ast +import copy import functools import importlib import inspect @@ -962,8 +963,7 @@ def validate(args, spec, spatial_overlap_opts=None): # An input is conditionally required when the expression given is # truthy. expression_values = { - input_key: args.get(input_key, False) for input_key in spec.keys() - if input_key in sufficient_inputs} + input_key: args.get(input_key, False) for input_key in spec.keys()} excluded_keys = set() for key in conditionally_required_keys: is_conditionally_required = _evaluate_expression( @@ -990,24 +990,27 @@ def validate(args, spec, spatial_overlap_opts=None): # Extra args that don't exist in the MODEL_SPEC are okay # we don't need to try to validate them try: - parameter_spec = spec[key] + # Using deepcopy to make sure we don't modify the original spec + parameter_spec = copy.deepcopy(spec[key]) except KeyError: LOGGER.debug(f'Provided key {key} does not exist in MODEL_SPEC') continue # rewrite parameter_spec for any nested, conditional validity - rewrite_keys = None + axis_keys = None if parameter_spec['type'] == 'csv': - rewrite_keys = ['columns', 'rows'] + axis_keys = ['columns', 'rows'] elif parameter_spec['type'] == 'vector': - rewrite_keys = ['fields'] + axis_keys = ['fields'] - if rewrite_keys: - for nested_key in rewrite_keys: - for nested_key, nested_spec in parameter_spec[nested_key].items(): + if axis_keys: + for axis_key in axis_keys: + if axis_key not in parameter_spec: + continue + for nested_key, nested_spec in parameter_spec[axis_key].items(): if ('required' in nested_spec and isinstance(nested_spec['required'], str)): - parameter_spec[nested_key][nested_key]['required'] = ( + parameter_spec[axis_key][nested_key]['required'] = ( _evaluate_expression( nested_spec['required'], expression_values)) diff --git a/tests/test_validation.py b/tests/test_validation.py index 432f69c617..861d36fd20 100644 --- a/tests/test_validation.py +++ b/tests/test_validation.py @@ -1973,3 +1973,6 @@ def test_get_headers_to_validate(self): patterns = validation.get_headers_to_validate(spec) # should only get the patterns that are static and always required self.assertEqual(sorted(patterns), ['a']) + +# TODO: assert that the target SPEC isn't altered +# TODO: assert this works with vectors and CSVs From 4135eec27c5a95ec58db0ea813b5bed1ad7738c9 Mon Sep 17 00:00:00 2001 From: James Douglass Date: Wed, 14 Feb 2024 09:54:36 -0800 Subject: [PATCH 13/30] Adding a test for vector field validity. RE:#1509 --- tests/test_validation.py | 73 ++++++++++++++++++++++++++++++++++++++++ 1 file changed, 73 insertions(+) diff --git a/tests/test_validation.py b/tests/test_validation.py index 861d36fd20..b899bff8d9 100644 --- a/tests/test_validation.py +++ b/tests/test_validation.py @@ -1625,6 +1625,79 @@ def test_conditionally_required_invalid(self): option_list=spec['string_a']['options']))], validation.validate(args, spec)) + def test_conditionally_required_vector_fields(self): + """Validation: conditionally required vector fields.""" + from natcap.invest import spec_utils + from natcap.invest import validation + spec = { + "some_number": { + "name": "A number", + "about": "About the number", + "type": "number", + "required": True, + "expression": "value > 0.5", + }, + "vector": { + "name": "A vector", + "about": "About the vector", + "type": "vector", + "required": True, + "geometries": spec_utils.POINTS, + "fields": { + "field_a": { + "type": "ratio", + "required": True, + }, + "field_b": { + "type": "ratio", + "required": "some_number == 2", + } + } + } + } + + def _create_vector(filepath, fields=[]): + gpkg_driver = gdal.GetDriverByName('GPKG') + vector = gpkg_driver.Create(filepath, 0, 0, 0, + gdal.GDT_Unknown) + vector_srs = osr.SpatialReference() + vector_srs.ImportFromEPSG(4326) # WGS84 + layer = vector.CreateLayer('layer', vector_srs, ogr.wkbPoint) + for fieldname in fields: + layer.CreateField(ogr.FieldDefn(fieldname, ogr.OFTReal)) + new_feature = ogr.Feature(layer.GetLayerDefn()) + new_feature.SetGeometry(ogr.CreateGeometryFromWkt('POINT (1 1)')) + layer = None + vector = None + + vector_path = os.path.join(self.workspace_dir, 'vector1.gpkg') + _create_vector(vector_path, ['field_a']) + args = { + 'some_number': 1, + 'vector': vector_path, + } + validation_warnings = validation.validate(args, spec) + self.assertEqual(validation_warnings, []) + + args = { + 'some_number': 2, # trigger validation warning + 'vector': vector_path, + } + validation_warnings = validation.validate(args, spec) + self.assertEqual( + validation_warnings, + [(['vector'], validation.MESSAGES['MATCHED_NO_HEADERS'].format( + header='field', header_name='field_b'))]) + + vector_path = os.path.join(self.workspace_dir, 'vector2.gpkg') + _create_vector(vector_path, ['field_a', 'field_b']) + args = { + 'some_number': 2, # field_b is present, no validation warning now + 'vector': vector_path, + } + validation_warnings = validation.validate(args, spec) + self.assertEqual(validation_warnings, []) + def test_validation_exception(self): """Validation: Verify error when an unexpected exception occurs.""" from natcap.invest import validation From 0b9111f09bed20262be4cb3cd5440d7d99ab2ba4 Mon Sep 17 00:00:00 2001 From: James Douglass Date: Wed, 14 Feb 2024 10:04:43 -0800 Subject: [PATCH 14/30] Adding a test for csv column conditional validity. RE:#1509 --- tests/test_validation.py | 67 ++++++++++++++++++++++++++++++++++++++++ 1 file changed, 67 insertions(+) diff --git a/tests/test_validation.py b/tests/test_validation.py index b899bff8d9..ee0b657711 100644 --- a/tests/test_validation.py +++ b/tests/test_validation.py @@ -1698,6 +1698,73 @@ def _create_vector(filepath, fields=[]): validation_warnings = validation.validate(args, spec) self.assertEqual(validation_warnings, []) + def test_conditionally_required_csv_columns(self): + """Validation: conditionally required csv columns.""" + from natcap.invest import validation + spec = { + "some_number": { + "name": "A number", + "about": "About the number", + "type": "number", + "required": True, + "expression": "value > 0.5", + }, + "csv": { + "name": "A table", + "about": "About the table", + "type": "csv", + "required": True, + "columns": { + "field_a": { + "type": "ratio", + "required": True, + }, + "field_b": { + "type": "ratio", + "required": "some_number == 2", + } + } + } + } + # Create a CSV file with only field_a + csv_path = os.path.join(self.workspace_dir, 'table1.csv') + with open(csv_path, 'w') as csv_file: + csv_file.write(textwrap.dedent( + """\ + "field_a" + 1""")) + args = { + 'some_number': 1, + 'csv': csv_path, + } + validation_warnings = validation.validate(args, spec) + self.assertEqual(validation_warnings, []) + + # trigger validation warning when some_number == 2 + args = { + 'some_number': 2, + 'csv': csv_path, + } + validation_warnings = validation.validate(args, spec) + self.assertEqual( + validation_warnings, + [(['csv'], validation.MESSAGES['MATCHED_NO_HEADERS'].format( + header='column', header_name='field_b'))]) + + # Create a CSV file with both field_a and field_b + csv_path = os.path.join(self.workspace_dir, 'table2.csv') + with open(csv_path, 'w') as csv_file: + csv_file.write(textwrap.dedent( + """\ + "field_a","field_b" + 1,2""")) + args = { + 'some_number': 2, # field_b is present, no validation warning now + 'csv': csv_path, + } + validation_warnings = validation.validate(args, spec) + self.assertEqual(validation_warnings, []) + def test_validation_exception(self): """Validation: Verify error when an unexpected exception occurs.""" from natcap.invest import validation From 5e6e35bd44519e6785e4b3bc7a4c39e288d415d3 Mon Sep 17 00:00:00 2001 From: James Douglass Date: Wed, 14 Feb 2024 10:08:23 -0800 Subject: [PATCH 15/30] Correcting WKT syntax. RE:#1509 --- tests/test_validation.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/test_validation.py b/tests/test_validation.py index ee0b657711..4f891561a6 100644 --- a/tests/test_validation.py +++ b/tests/test_validation.py @@ -1884,7 +1884,7 @@ def test_spatial_overlap_error(self): vector_srs.ImportFromEPSG(32731) # UTM 31N layer = vector.CreateLayer('layer', vector_srs, ogr.wkbPoint) new_feature = ogr.Feature(layer.GetLayerDefn()) - new_feature.SetGeometry(ogr.CreateGeometryFromWkt('POINT 1 1')) + new_feature.SetGeometry(ogr.CreateGeometryFromWkt('POINT (1 1)')) new_feature = None layer = None From b02b49c26493ce8d0a7ef717e685c9182f42795d Mon Sep 17 00:00:00 2001 From: James Douglass Date: Wed, 14 Feb 2024 10:08:44 -0800 Subject: [PATCH 16/30] Adding a test for CSV row conditional validation. RE:#1509 --- tests/test_validation.py | 68 ++++++++++++++++++++++++++++++++++++++-- 1 file changed, 65 insertions(+), 3 deletions(-) diff --git a/tests/test_validation.py b/tests/test_validation.py index 4f891561a6..0ebefc6d99 100644 --- a/tests/test_validation.py +++ b/tests/test_validation.py @@ -1765,6 +1765,71 @@ def test_conditionally_required_csv_columns(self): validation_warnings = validation.validate(args, spec) self.assertEqual(validation_warnings, []) + def test_conditionally_required_csv_rows(self): + """Validation: conditionally required csv rows.""" + from natcap.invest import validation + spec = { + "some_number": { + "name": "A number", + "about": "About the number", + "type": "number", + "required": True, + "expression": "value > 0.5", + }, + "csv": { + "name": "A table", + "about": "About the table", + "type": "csv", + "required": True, + "rows": { + "field_a": { + "type": "ratio", + "required": True, + }, + "field_b": { + "type": "ratio", + "required": "some_number == 2", + } + } + } + } + # Create a CSV file with only field_a + csv_path = os.path.join(self.workspace_dir, 'table1.csv') + with open(csv_path, 'w') as csv_file: + csv_file.write(textwrap.dedent( + """"field_a",1""")) + args = { + 'some_number': 1, + 'csv': csv_path, + } + validation_warnings = validation.validate(args, spec) + self.assertEqual(validation_warnings, []) + + # trigger validation warning when some_number == 2 + args = { + 'some_number': 2, + 'csv': csv_path, + } + validation_warnings = validation.validate(args, spec) + self.assertEqual( + validation_warnings, + [(['csv'], validation.MESSAGES['MATCHED_NO_HEADERS'].format( + header='row', header_name='field_b'))]) + + # Create a CSV file with both field_a and field_b + csv_path = os.path.join(self.workspace_dir, 'table2.csv') + with open(csv_path, 'w') as csv_file: + csv_file.write(textwrap.dedent( + """\ + "field_a",1 + "field_b",2""")) + args = { + 'some_number': 2, # field_b is present, no validation warning now + 'csv': csv_path, + } + validation_warnings = validation.validate(args, spec) + self.assertEqual(validation_warnings, []) + def test_validation_exception(self): """Validation: Verify error when an unexpected exception occurs.""" from natcap.invest import validation @@ -2113,6 +2178,3 @@ def test_get_headers_to_validate(self): patterns = validation.get_headers_to_validate(spec) # should only get the patterns that are static and always required self.assertEqual(sorted(patterns), ['a']) - -# TODO: assert that the target SPEC isn't altered -# TODO: assert this works with vectors and CSVs From fec802a282b968d12d64a327d82cac119e946d51 Mon Sep 17 00:00:00 2001 From: James Douglass Date: Wed, 14 Feb 2024 10:10:52 -0800 Subject: [PATCH 17/30] Fixing expression in UCM. RE:#1509 --- src/natcap/invest/urban_cooling_model.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/src/natcap/invest/urban_cooling_model.py b/src/natcap/invest/urban_cooling_model.py index 1581d7521c..eea80d8a12 100644 --- a/src/natcap/invest/urban_cooling_model.py +++ b/src/natcap/invest/urban_cooling_model.py @@ -71,7 +71,7 @@ "considered a green area.")}, "shade": { "type": "ratio", - "required": "cc_method == factors", + "required": "cc_method == 'factors'", "about": gettext( "The proportion of area in this LULC class that is " "covered by tree canopy at least 2 meters high. " @@ -79,7 +79,7 @@ "the Cooling Capacity Calculation Method.")}, "albedo": { "type": "ratio", - "required": "cc_method == factors", + "required": "cc_method == 'factors'", "about": gettext( "The proportion of solar radiation that is directly " "reflected by this LULC class. Required if the " @@ -87,7 +87,7 @@ "Capacity Calculation Method.")}, "building_intensity": { "type": "ratio", - "required": "cc_method == intensity", + "required": "cc_method == 'intensity'", "about": gettext( "The ratio of building floor area to footprint " "area, with all values in this column normalized " From f8497f6ade7f5bff4d8688c0e06f3f63bad354e2 Mon Sep 17 00:00:00 2001 From: James Douglass Date: Wed, 14 Feb 2024 10:14:19 -0800 Subject: [PATCH 18/30] Fixing operands in wind energy expression. RE:#1509 --- src/natcap/invest/wind_energy.py | 23 ++++++++++------------- 1 file changed, 10 insertions(+), 13 deletions(-) diff --git a/src/natcap/invest/wind_energy.py b/src/natcap/invest/wind_energy.py index f7fa4cc739..e0e8e679c8 100644 --- a/src/natcap/invest/wind_energy.py +++ b/src/natcap/invest/wind_energy.py @@ -7,27 +7,24 @@ import tempfile import numpy -from scipy import integrate - -import shapely.wkb -import shapely.wkt +import pygeoprocessing import shapely.ops import shapely.prepared -from shapely import speedups - +import shapely.wkb +import shapely.wkt +import taskgraph from osgeo import gdal from osgeo import ogr from osgeo import osr +from scipy import integrate +from shapely import speedups -import pygeoprocessing -import taskgraph -from . import utils +from . import gettext from . import spec_utils -from .unit_registry import u +from . import utils from . import validation from .model_metadata import MODEL_METADATA -from . import gettext - +from .unit_registry import u LOGGER = logging.getLogger(__name__) speedups.enable() @@ -117,7 +114,7 @@ **spec_utils.AOI, "projected": True, "projection_units": u.meter, - "required": "valuation_container & grid_points_path", + "required": "valuation_container and grid_points_path", "about": gettext( "Map of the area(s) of interest over which to run the model " "and aggregate valuation results. Required if Run Valuation " From cb70a12c7d380d31762fc3c73d863b159fff86d1 Mon Sep 17 00:00:00 2001 From: James Douglass Date: Wed, 14 Feb 2024 10:36:55 -0800 Subject: [PATCH 19/30] Removing a comment that is no longer relevant. RE:#1509 --- src/natcap/invest/validation.py | 3 --- 1 file changed, 3 deletions(-) diff --git a/src/natcap/invest/validation.py b/src/natcap/invest/validation.py index 800b294e51..ce9c03bc70 100644 --- a/src/natcap/invest/validation.py +++ b/src/natcap/invest/validation.py @@ -846,9 +846,6 @@ def get_headers_to_validate(spec): """ headers = [] for key, val in spec.items(): - # for now only check headers that are always required - # assume that any conditionally-required headers are validated by the - # model's validate function # if 'required' isn't a key, it defaults to True if ('required' not in val) or (val['required'] is True): # brackets are a special character for our args spec syntax From ecdc4985f2804755cba436266881698a4a70070f Mon Sep 17 00:00:00 2001 From: James Douglass Date: Wed, 14 Feb 2024 10:43:56 -0800 Subject: [PATCH 20/30] Switching bitwise operations to python bools. RE:#1509 --- src/natcap/invest/wind_energy.py | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/src/natcap/invest/wind_energy.py b/src/natcap/invest/wind_energy.py index e0e8e679c8..fb38535ae2 100644 --- a/src/natcap/invest/wind_energy.py +++ b/src/natcap/invest/wind_energy.py @@ -131,7 +131,7 @@ "type": "vector", "fields": {}, "geometries": {"POLYGON", "MULTIPOLYGON"}, - "required": "min_distance | max_distance | valuation_container", + "required": "min_distance and max_distance and valuation_container", "about": gettext( "Map of the coastlines of landmasses in the area of interest. " "Required if the Minimum Distance and Maximum Distance inputs " @@ -350,7 +350,7 @@ "about": gettext("Longitude of the connection point.") } }, - "required": "valuation_container & (not avg_grid_distance)", + "required": "valuation_container and (not avg_grid_distance)", "about": gettext( "Table of grid and land connection points to which cables " "will connect. Required if Run Valuation is selected and " @@ -361,7 +361,7 @@ "expression": "value > 0", "type": "number", "units": u.kilometer, - "required": "valuation_container & (not grid_points_path)", + "required": "valuation_container and (not grid_points_path)", "about": gettext( "Average distance to the onshore grid from coastal cable " "landing points. Required if Run Valuation is selected and " @@ -396,7 +396,7 @@ "about": gettext("Price of energy for each year.") } }, - "required": "valuation_container & price_table", + "required": "valuation_container and price_table", "about": gettext( "Table of yearly prices for wind energy. There must be a row " "for each year in the lifespan given in the 'time_period' " @@ -407,7 +407,7 @@ "wind_price": { "type": "number", "units": u.currency/u.kilowatt_hour, - "required": "valuation_container & (not price_table)", + "required": "valuation_container and (not price_table)", "about": gettext( "The initial price of wind energy, at the first year in the " "wind energy farm lifespan. Required if Run Valuation is " @@ -416,7 +416,7 @@ }, "rate_change": { "type": "ratio", - "required": "valuation_container & (not price_table)", + "required": "valuation_container and (not price_table)", "about": gettext( "The annual rate of change in the price of wind energy. " "Required if Run Valuation is selected and Use Price Table " From bf55a26ca013340a53208f965c330e86bd04c6c0 Mon Sep 17 00:00:00 2001 From: James Douglass Date: Wed, 14 Feb 2024 14:11:29 -0800 Subject: [PATCH 21/30] Correcting boolean operator and->or. RE:#1509 --- src/natcap/invest/wind_energy.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/natcap/invest/wind_energy.py b/src/natcap/invest/wind_energy.py index fb38535ae2..e3947dbf09 100644 --- a/src/natcap/invest/wind_energy.py +++ b/src/natcap/invest/wind_energy.py @@ -131,7 +131,7 @@ "type": "vector", "fields": {}, "geometries": {"POLYGON", "MULTIPOLYGON"}, - "required": "min_distance and max_distance and valuation_container", + "required": "min_distance or max_distance or valuation_container", "about": gettext( "Map of the coastlines of landmasses in the area of interest. " "Required if the Minimum Distance and Maximum Distance inputs " From 482b327625820df5e88df878a1a5b5a6678d40ca Mon Sep 17 00:00:00 2001 From: James Douglass Date: Wed, 21 Feb 2024 14:24:59 -0800 Subject: [PATCH 22/30] Light linting, mostly around line length. RE:#1509 --- src/natcap/invest/validation.py | 70 +++++++++++++++++++-------------- 1 file changed, 40 insertions(+), 30 deletions(-) diff --git a/src/natcap/invest/validation.py b/src/natcap/invest/validation.py index ce9c03bc70..7220f91f62 100644 --- a/src/natcap/invest/validation.py +++ b/src/natcap/invest/validation.py @@ -4,15 +4,12 @@ import functools import importlib import inspect -import io import logging import os import pprint import queue import re import threading -import token -import tokenize import warnings import numpy @@ -35,15 +32,19 @@ MESSAGES = { 'MISSING_KEY': gettext('Key is missing from the args dict'), 'MISSING_VALUE': gettext('Input is required but has no value'), - 'MATCHED_NO_HEADERS': gettext('Expected the {header} "{header_name}" but did ' - 'not find it'), - 'PATTERN_MATCHED_NONE': gettext('Expected to find at least one {header} matching ' - 'the pattern "{header_name}" but found none'), - 'DUPLICATE_HEADER': gettext('Expected the {header} "{header_name}" only once ' - 'but found it {number} times'), - 'NOT_A_NUMBER': gettext('Value "{value}" could not be interpreted as a number'), - 'WRONG_PROJECTION_UNIT': gettext('Layer must be projected in this unit: ' - '"{unit_a}" but found this unit: "{unit_b}"'), + 'MATCHED_NO_HEADERS': gettext( + 'Expected the {header} "{header_name}" but did not find it'), + 'PATTERN_MATCHED_NONE': gettext( + 'Expected to find at least one {header} matching ' + 'the pattern "{header_name}" but found none'), + 'DUPLICATE_HEADER': gettext( + 'Expected the {header} "{header_name}" only once ' + 'but found it {number} times'), + 'NOT_A_NUMBER': gettext( + 'Value "{value}" could not be interpreted as a number'), + 'WRONG_PROJECTION_UNIT': gettext( + 'Layer must be projected in this unit: ' + '"{unit_a}" but found this unit: "{unit_b}"'), 'UNEXPECTED_ERROR': gettext('An unexpected error occurred in validation'), 'DIR_NOT_FOUND': gettext('Directory not found'), 'NOT_A_DIR': gettext('Path must be a directory'), @@ -53,16 +54,19 @@ 'NOT_GDAL_RASTER': gettext('File could not be opened as a GDAL raster'), 'OVR_FILE': gettext('File found to be an overview ".ovr" file.'), 'NOT_GDAL_VECTOR': gettext('File could not be opened as a GDAL vector'), - 'REGEXP_MISMATCH': gettext("Value did not match expected pattern {regexp}"), + 'REGEXP_MISMATCH': gettext( + "Value did not match expected pattern {regexp}"), 'INVALID_OPTION': gettext("Value must be one of: {option_list}"), 'INVALID_VALUE': gettext('Value does not meet condition {condition}'), 'NOT_WITHIN_RANGE': gettext('Value {value} is not in the range {range}'), 'NOT_AN_INTEGER': gettext('Value "{value}" does not represent an integer'), 'NOT_BOOLEAN': gettext("Value must be either True or False, not {value}"), 'NO_PROJECTION': gettext('Spatial file {filepath} has no projection'), - 'BBOX_NOT_INTERSECT': gettext('Not all of the spatial layers overlap each ' + 'BBOX_NOT_INTERSECT': gettext( + 'Not all of the spatial layers overlap each ' 'other. All bounding boxes must intersect: {bboxes}'), - 'NEED_PERMISSION': gettext('You must have {permission} access to this file'), + 'NEED_PERMISSION': gettext( + 'You must have {permission} access to this file'), 'WRONG_GEOM_TYPE': gettext('Geometry type must be one of {allowed}') } @@ -376,7 +380,8 @@ def check_vector(filepath, geometries, fields=None, projected=False, gdal.PopErrorHandler() geom_map = { - 'POINT': [ogr.wkbPoint, ogr.wkbPointM, ogr.wkbPointZM, ogr.wkbPoint25D], + 'POINT': [ogr.wkbPoint, ogr.wkbPointM, ogr.wkbPointZM, + ogr.wkbPoint25D], 'LINESTRING': [ogr.wkbLineString, ogr.wkbLineStringM, ogr.wkbLineStringZM, ogr.wkbLineString25D], 'POLYGON': [ogr.wkbPolygon, ogr.wkbPolygonM, @@ -384,7 +389,8 @@ def check_vector(filepath, geometries, fields=None, projected=False, 'MULTIPOINT': [ogr.wkbMultiPoint, ogr.wkbMultiPointM, ogr.wkbMultiPointZM, ogr.wkbMultiPoint25D], 'MULTILINESTRING': [ogr.wkbMultiLineString, ogr.wkbMultiLineStringM, - ogr.wkbMultiLineStringZM, ogr.wkbMultiLineString25D], + ogr.wkbMultiLineStringZM, + ogr.wkbMultiLineString25D], 'MULTIPOLYGON': [ogr.wkbMultiPolygon, ogr.wkbMultiPolygonM, ogr.wkbMultiPolygonZM, ogr.wkbMultiPolygon25D] } @@ -396,11 +402,11 @@ def check_vector(filepath, geometries, fields=None, projected=False, if gdal_dataset is None: return MESSAGES['NOT_GDAL_VECTOR'] - # NOTE: this only checks the layer geometry type, not the types of the actual - # geometries (layer.GetGeometryTypes()). This is probably equivalent in most - # cases, and it's more efficient than checking every geometry, but we might - # need to change this in the future if it becomes a problem. Currently not - # supporting ogr.wkbUnknown which allows mixed types. + # NOTE: this only checks the layer geometry type, not the types of the + # actual geometries (layer.GetGeometryTypes()). This is probably equivalent + # in most cases, and it's more efficient than checking every geometry, but + # we might need to change this in the future if it becomes a problem. + # Currently not supporting ogr.wkbUnknown which allows mixed types. layer = gdal_dataset.GetLayer() if layer.GetGeomType() not in allowed_geom_types: return MESSAGES['WRONG_GEOM_TYPE'].format(allowed=geometries) @@ -576,7 +582,8 @@ def check_boolean(value, **kwargs): return MESSAGES['NOT_BOOLEAN'].format(value=value) -def get_validated_dataframe(csv_path, columns=None, rows=None, index_col=None, +def get_validated_dataframe( + csv_path, columns=None, rows=None, index_col=None, read_csv_kwargs={}, **kwargs): """Read a CSV into a dataframe that is guaranteed to match the spec.""" @@ -596,7 +603,8 @@ def get_validated_dataframe(csv_path, columns=None, rows=None, index_col=None, if rows: # swap rows and column - df = df.set_index(df.columns[0]).rename_axis(None, axis=0).T.reset_index(drop=True) + df = df.set_index(df.columns[0]).rename_axis( + None, axis=0).T.reset_index(drop=True) spec = columns if columns else rows @@ -659,7 +667,7 @@ def get_validated_dataframe(csv_path, columns=None, rows=None, index_col=None, header_name=expected, number=count) - # set the index column, if specified + # set the index column, if specified if index_col is not None: index_col = index_col.lower() try: @@ -818,9 +826,10 @@ def wrapper_func(): thread.join(timeout=timeout) if thread.is_alive(): # first arg to `check_csv`, `check_raster`, `check_vector` is the path - warnings.warn(f'Validation of file {args[0]} timed out. If this file ' - 'is stored in a file streaming service, it may be taking a long ' - 'time to download. Try storing it locally instead.') + warnings.warn( + f'Validation of file {args[0]} timed out. If this file ' + 'is stored in a file streaming service, it may be taking a long ' + 'time to download. Try storing it locally instead.') return None else: @@ -1116,8 +1125,9 @@ def _wrapped_validate_func(args, limit_to=None): try: model_module = importlib.import_module(validate_func.__module__) except Exception: - LOGGER.warning('Unable to import module %s: assuming no MODEL_SPEC.', - validate_func.__module__) + LOGGER.warning( + 'Unable to import module %s: assuming no MODEL_SPEC.', + validate_func.__module__) model_module = None # If the module has an MODEL_SPEC defined, validate against that. From b32fcbf963d2266a65e29a8f283ebf430b1932bd Mon Sep 17 00:00:00 2001 From: James Douglass Date: Wed, 21 Feb 2024 14:26:52 -0800 Subject: [PATCH 23/30] Removing a deprecated line. RE:#1509 --- src/natcap/invest/validation.py | 1 - 1 file changed, 1 deletion(-) diff --git a/src/natcap/invest/validation.py b/src/natcap/invest/validation.py index 7220f91f62..69336e1fb4 100644 --- a/src/natcap/invest/validation.py +++ b/src/natcap/invest/validation.py @@ -949,7 +949,6 @@ def validate(args, spec, spatial_overlap_opts=None): # Sufficiency: An input is sufficient when its key is present in args and # has a truthy value. If the input is missing from args or is falsy, it is # insufficient. - insufficient_keys = missing_keys.union(keys_with_no_value) sufficient_inputs = {} insufficient_keys = set() for key in spec.keys(): From 1deee3e55d50c9a6a7cc54cca1c56716b8a7a8f2 Mon Sep 17 00:00:00 2001 From: James Douglass Date: Wed, 21 Feb 2024 14:58:54 -0800 Subject: [PATCH 24/30] Simplifying validate(). RE:#1509 --- src/natcap/invest/validation.py | 73 ++++++++------------------------- tests/test_validation.py | 7 +--- 2 files changed, 18 insertions(+), 62 deletions(-) diff --git a/src/natcap/invest/validation.py b/src/natcap/invest/validation.py index 69336e1fb4..1a7d426ffe 100644 --- a/src/natcap/invest/validation.py +++ b/src/natcap/invest/validation.py @@ -913,30 +913,31 @@ def validate(args, spec, spatial_overlap_opts=None): """ validation_warnings = [] - # step 1: check absolute requirement + # Phase 1: Check whether an input is required and has a value missing_keys = set() keys_with_no_value = set() - conditionally_required_keys = set() + expression_values = { + input_key: args.get(input_key, False) for input_key in spec.keys()} for key, parameter_spec in spec.items(): # Default required to True since this is the most common try: required = parameter_spec['required'] except KeyError: required = True - if required is True: # Might be an args key, can't rely on truthiness + + if isinstance(required, str): + required = _evaluate_expression( + expression=f'bool({spec[key]["required"]})', + variable_map=expression_values) + + # At this point, required is only True or False. + if required: if key not in args: missing_keys.add(key) else: if args[key] in ('', None): keys_with_no_value.add(key) - # If ``required`` is a string, it must represent an expression of - # conditional requirement based on the satisfaction of various args - # keys. We can only evaluate this later, after all other validation - # happens, so add this args key to a set for later. - elif isinstance(required, str): - conditionally_required_keys.add(key) - if missing_keys: validation_warnings.append( (sorted(missing_keys), MESSAGES['MISSING_KEY'])) @@ -945,53 +946,11 @@ def validate(args, spec, spatial_overlap_opts=None): validation_warnings.append( (sorted(keys_with_no_value), MESSAGES['MISSING_VALUE'])) - # step 2: evaluate sufficiency of keys/inputs - # Sufficiency: An input is sufficient when its key is present in args and - # has a truthy value. If the input is missing from args or is falsy, it is - # insufficient. - sufficient_inputs = {} - insufficient_keys = set() - for key in spec.keys(): - try: - sufficient_inputs[key] = bool(args[key]) - except KeyError: - # input is definitely insufficient if it's missing from args - sufficient_inputs[key] = False - - if sufficient_inputs[key] is False: - insufficient_keys.add(key) - - # Step 3: evaluate whether conditionally required keys are required. - # We also want to keep track of keys that are not required due to their - # conditional requirement expression evaluating to False. - # - # An input is conditionally required when the expression given is - # truthy. - expression_values = { - input_key: args.get(input_key, False) for input_key in spec.keys()} - excluded_keys = set() - for key in conditionally_required_keys: - is_conditionally_required = _evaluate_expression( - expression=f'bool({spec[key]["required"]})', - variable_map=expression_values) - if is_conditionally_required: - if key not in args: - validation_warnings.append(([key], MESSAGES['MISSING_KEY'])) - elif args[key] in ('', None): - validation_warnings.append(([key], MESSAGES['MISSING_VALUE'])) - else: - excluded_keys.add(key) - - # step 4: validate keys, but not conditionally excluded ones. - # Making a distinction between keys which are optional (required=False), - # and keys which are conditionally not required - # (required="condition that evaluates to False") - # We want to do validation on optional keys, like `n_workers`, - # but not on conditionally excluded keys, like fields that are greyed out - # because a checkbox is unchecked. + # Phase 2: Check whether any input with a value validates with its + # type-specific check function. invalid_keys = set() - sufficient_keys = set(args.keys()).difference(insufficient_keys) - for key in sufficient_keys.difference(excluded_keys): + insufficient_keys = (missing_keys | keys_with_no_value) + for key in set(args.keys()) - insufficient_keys: # Extra args that don't exist in the MODEL_SPEC are okay # we don't need to try to validate them try: @@ -1037,7 +996,7 @@ def validate(args, spec, spatial_overlap_opts=None): key, args[key]) validation_warnings.append(([key], MESSAGES['UNEXPECTED_ERROR'])) - # step 5: check spatial overlap if applicable + # Phase 3: Check spatial overlap if applicable if spatial_overlap_opts: spatial_keys = set(spatial_overlap_opts['spatial_keys']) diff --git a/tests/test_validation.py b/tests/test_validation.py index 0ebefc6d99..e1c2fe2ab3 100644 --- a/tests/test_validation.py +++ b/tests/test_validation.py @@ -1420,8 +1420,7 @@ def test_conditional_requirement(self): } validation_warnings = validation.validate(args, spec) self.assertEqual(sorted(validation_warnings), [ - (['number_c'], validation.MESSAGES['MISSING_KEY']), - (['number_d'], validation.MESSAGES['MISSING_KEY']), + (['number_c', 'number_d'], validation.MESSAGES['MISSING_KEY']), ]) args = { @@ -1507,12 +1506,10 @@ def test_conditional_requirement_not_required(self): } } - # because condition = True, it shouldn't matter that the - # csv_b parameter wouldn't pass validation args = { "condition": True, "csv_a": csv_a_path, - "csv_b": 'x' + csv_b_path # introduce a typo + # csv_b is absent, which is okay because it's not required } validation_warnings = validation.validate(args, spec) From 880e31fbd477d7ac8af09f631d780c0f1d7946c0 Mon Sep 17 00:00:00 2001 From: James Douglass Date: Wed, 21 Feb 2024 15:46:39 -0800 Subject: [PATCH 25/30] Adding directory axis. RE:#1509 --- src/natcap/invest/validation.py | 2 ++ tests/test_validation.py | 55 +++++++++++++++++++++++++++++++++ 2 files changed, 57 insertions(+) diff --git a/src/natcap/invest/validation.py b/src/natcap/invest/validation.py index 1a7d426ffe..dd3571f5ea 100644 --- a/src/natcap/invest/validation.py +++ b/src/natcap/invest/validation.py @@ -966,6 +966,8 @@ def validate(args, spec, spatial_overlap_opts=None): axis_keys = ['columns', 'rows'] elif parameter_spec['type'] == 'vector': axis_keys = ['fields'] + elif parameter_spec['type'] == 'directory': + axis_keys = ['contents'] if axis_keys: for axis_key in axis_keys: diff --git a/tests/test_validation.py b/tests/test_validation.py index e1c2fe2ab3..9d6d0d45d9 100644 --- a/tests/test_validation.py +++ b/tests/test_validation.py @@ -1857,6 +1857,61 @@ def test_validation_exception(self): validation_warnings, [(['number_a'], validation.MESSAGES['UNEXPECTED_ERROR'])]) + def test_conditionally_required_directory_contents(self): + """Validation: conditionally required directory contents.""" + from natcap.invest import validation + spec = { + "some_number": { + "name": "A number", + "about": "About the number", + "type": "number", + "required": True, + "expression": "value > 0.5", + }, + "directory": { + "name": "A folder", + "about": "About the folder", + "type": "directory", + "required": True, + "contents": { + "file.1": { + "type": "csv", + "required": True, + }, + "file.2": { + "type": "csv", + "required": "some_number == 2", + } + } + } + } + path_1 = os.path.join(self.workspace_dir, 'file.1') + with open(path_1, 'w') as my_file: + my_file.write('col1,col2') + args = { + 'some_number': 1, + 'directory': self.workspace_dir, + } + self.assertEqual([], validation.validate(args, spec)) + + path_2 = os.path.join(self.workspace_dir, 'file.2') + with open(path_2, 'w') as my_file: + my_file.write('col1,col2') + args = { + 'some_number': 2, + 'directory': self.workspace_dir, + } + self.assertEqual([], validation.validate(args, spec)) + + os.remove(path_2) + self.assertFalse(os.path.exists(path_2)) + args = { + 'some_number': 2, + 'directory': self.workspace_dir, + } + # TODO: directory contents are not actually validated right now + self.assertEqual([], validation.validate(args, spec)) + def test_validation_other(self): """Validation: verify no error when 'other' type.""" from natcap.invest import validation From d70b15dae280d2f6bb34263f47a6d9a40754ec94 Mon Sep 17 00:00:00 2001 From: James Douglass Date: Wed, 21 Feb 2024 16:01:09 -0800 Subject: [PATCH 26/30] Casting to bool outside of the expression. RE:#1509 --- src/natcap/invest/validation.py | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/src/natcap/invest/validation.py b/src/natcap/invest/validation.py index dd3571f5ea..7d3f5eb9bf 100644 --- a/src/natcap/invest/validation.py +++ b/src/natcap/invest/validation.py @@ -926,9 +926,9 @@ def validate(args, spec, spatial_overlap_opts=None): required = True if isinstance(required, str): - required = _evaluate_expression( - expression=f'bool({spec[key]["required"]})', - variable_map=expression_values) + required = bool(_evaluate_expression( + expression=f'{spec[key]["required"]}', + variable_map=expression_values)) # At this point, required is only True or False. if required: @@ -977,8 +977,8 @@ def validate(args, spec, spatial_overlap_opts=None): if ('required' in nested_spec and isinstance(nested_spec['required'], str)): parameter_spec[axis_key][nested_key]['required'] = ( - _evaluate_expression( - nested_spec['required'], expression_values)) + bool(_evaluate_expression( + nested_spec['required'], expression_values))) type_validation_func = _VALIDATION_FUNCS[parameter_spec['type']] From 831fd9947499bd318167fd37cbe3617f9ac0c665 Mon Sep 17 00:00:00 2001 From: James Douglass Date: Thu, 22 Feb 2024 15:37:32 -0800 Subject: [PATCH 27/30] Small test fix from slight return value change. RE:#1509 --- tests/test_delineateit.py | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/tests/test_delineateit.py b/tests/test_delineateit.py index a3df6a02ae..897a20661e 100644 --- a/tests/test_delineateit.py +++ b/tests/test_delineateit.py @@ -154,10 +154,9 @@ def test_delineateit_validate(self): from natcap.invest.delineateit import delineateit missing_keys = {} validation_warnings = delineateit.validate(missing_keys) - self.assertEqual(len(validation_warnings), 2) - self.assertTrue('dem_path' in validation_warnings[0][0]) - self.assertTrue('workspace_dir' in validation_warnings[0][0]) - self.assertEqual(validation_warnings[1][0], ['outlet_vector_path']) + self.assertEqual(len(validation_warnings), 1) + self.assertEqual(['dem_path', 'outlet_vector_path', 'workspace_dir'], + validation_warnings[0][0]) missing_values_args = { 'workspace_dir': '', From 4b6f93223ccf44fab45fce8f0dbb88f55f038d84 Mon Sep 17 00:00:00 2001 From: James Douglass Date: Thu, 22 Feb 2024 15:53:55 -0800 Subject: [PATCH 28/30] HQ validation now partially covered by expressions. RE:#1509 --- src/natcap/invest/habitat_quality.py | 19 +++--------------- tests/test_habitat_quality.py | 30 ++++++++++++++++++---------- 2 files changed, 22 insertions(+), 27 deletions(-) diff --git a/src/natcap/invest/habitat_quality.py b/src/natcap/invest/habitat_quality.py index ff0847bf87..75c203294d 100644 --- a/src/natcap/invest/habitat_quality.py +++ b/src/natcap/invest/habitat_quality.py @@ -5,26 +5,22 @@ import os import numpy -from osgeo import gdal import pygeoprocessing import taskgraph +from osgeo import gdal -from .model_metadata import MODEL_METADATA +from . import gettext from . import spec_utils from . import utils from . import validation +from .model_metadata import MODEL_METADATA from .unit_registry import u -from . import gettext - LOGGER = logging.getLogger(__name__) MISSING_SENSITIVITY_TABLE_THREATS_MSG = gettext( 'Threats {threats} does not match any column in the sensitivity table. ' 'Sensitivity columns: {column_names}') # (set of missing threats, set of found columns) -MISSING_COLUMN_MSG = gettext( - "The column '{column_name}' was not found in the Threat Data table for " - "the corresponding input LULC scenario.") MISSING_THREAT_RASTER_MSG = gettext( "A threat raster for threats: {threat_list} was not found or it " "could not be opened by GDAL.") @@ -1116,7 +1112,6 @@ def validate(args, limit_to=None): bad_threat_paths = [] duplicate_paths = [] threat_path_list = [] - bad_threat_columns = [] for lulc_key, lulc_arg in (('_c', 'lulc_cur_path'), ('_f', 'lulc_fut_path'), ('_b', 'lulc_bas_path')): @@ -1126,9 +1121,6 @@ def validate(args, limit_to=None): # threat_raster_folder for threat, row in threat_df.iterrows(): threat_table_path_col = _THREAT_SCENARIO_MAP[lulc_key] - if threat_table_path_col not in row: - bad_threat_columns.append(threat_table_path_col) - break # Threat path from threat CSV is relative to CSV threat_path = row[threat_table_path_col] @@ -1151,11 +1143,6 @@ def validate(args, limit_to=None): duplicate_paths.append( os.path.basename(threat_path)) - if bad_threat_columns: - validation_warnings.append(( - ['threats_table_path'], - MISSING_COLUMN_MSG.format(column_name=bad_threat_columns[0]))) - if bad_threat_paths: validation_warnings.append(( ['threats_table_path'], diff --git a/tests/test_habitat_quality.py b/tests/test_habitat_quality.py index 0e2bbebe36..24f7a7a17e 100644 --- a/tests/test_habitat_quality.py +++ b/tests/test_habitat_quality.py @@ -1,15 +1,15 @@ """Module for Regression Testing the InVEST Habitat Quality model.""" -import unittest -import tempfile -import shutil import os +import shutil +import tempfile +import unittest +import numpy +import pygeoprocessing from osgeo import gdal -from osgeo import osr from osgeo import ogr +from osgeo import osr from shapely.geometry import Polygon -import numpy -import pygeoprocessing def make_raster_from_array( @@ -1292,7 +1292,8 @@ def test_habitat_quality_validation_wrong_spatial_types(self): def test_habitat_quality_validation_missing_sens_header(self): """Habitat Quality: test validation for sens threat header.""" - from natcap.invest import habitat_quality, utils + from natcap.invest import habitat_quality + from natcap.invest import utils args = { 'half_saturation_constant': '0.5', @@ -1938,7 +1939,8 @@ def test_habitat_quality_argspec_spatial_overlap(self): def test_habitat_quality_argspec_missing_projection(self): """Habitat Quality: raise error on missing projection.""" - from natcap.invest import habitat_quality, validation + from natcap.invest import habitat_quality + from natcap.invest import validation args = { 'half_saturation_constant': '0.5', @@ -2014,7 +2016,8 @@ def test_habitat_quality_argspec_missing_projection(self): def test_habitat_quality_argspec_missing_threat_header(self): """Habitat Quality: test validate for a threat header.""" - from natcap.invest import habitat_quality, validation + from natcap.invest import habitat_quality + from natcap.invest import validation args = { 'half_saturation_constant': '0.5', @@ -2065,6 +2068,7 @@ def test_habitat_quality_argspec_missing_threat_header(self): def test_habitat_quality_validate_missing_base_column(self): """Habitat Quality: test validate for a missing base column.""" from natcap.invest import habitat_quality + from natcap.invest import validation args = { 'half_saturation_constant': '0.5', @@ -2109,13 +2113,15 @@ def test_habitat_quality_validate_missing_base_column(self): validate_result = habitat_quality.validate(args, limit_to=None) expected = [( ['threats_table_path'], - habitat_quality.MISSING_COLUMN_MSG.format(column_name='base_path') + validation.MESSAGES['MATCHED_NO_HEADERS'].format( + header="column", header_name="base_path") )] self.assertEqual(validate_result, expected) def test_habitat_quality_validate_missing_fut_column(self): """Habitat Quality: test validate for a missing fut column.""" from natcap.invest import habitat_quality + from natcap.invest import validation args = { 'half_saturation_constant': '0.5', @@ -2159,5 +2165,7 @@ def test_habitat_quality_validate_missing_fut_column(self): validate_result = habitat_quality.validate(args, limit_to=None) expected = [( ['threats_table_path'], - habitat_quality.MISSING_COLUMN_MSG.format(column_name='fut_path'))] + validation.MESSAGES['MATCHED_NO_HEADERS'].format( + header='column', header_name='fut_path') + )] self.assertEqual(validate_result, expected) From 617e76d9bbaaa2ecc66cf041b967d7fd2748a945 Mon Sep 17 00:00:00 2001 From: James Douglass Date: Thu, 22 Feb 2024 15:58:45 -0800 Subject: [PATCH 29/30] Clarifying set names, restoring falsey exclusion. Set names are now clearer, and type-specific validation will no longer take place if the value is falsey. RE:#1509 --- src/natcap/invest/validation.py | 15 ++++++++++----- 1 file changed, 10 insertions(+), 5 deletions(-) diff --git a/src/natcap/invest/validation.py b/src/natcap/invest/validation.py index 7d3f5eb9bf..113fa7f304 100644 --- a/src/natcap/invest/validation.py +++ b/src/natcap/invest/validation.py @@ -915,9 +915,10 @@ def validate(args, spec, spatial_overlap_opts=None): # Phase 1: Check whether an input is required and has a value missing_keys = set() - keys_with_no_value = set() + required_keys_with_no_value = set() expression_values = { input_key: args.get(input_key, False) for input_key in spec.keys()} + keys_with_falsey_values = set() for key, parameter_spec in spec.items(): # Default required to True since this is the most common try: @@ -936,20 +937,24 @@ def validate(args, spec, spatial_overlap_opts=None): missing_keys.add(key) else: if args[key] in ('', None): - keys_with_no_value.add(key) + required_keys_with_no_value.add(key) + elif not expression_values[key]: + # Don't validate falsey values or missing (None, "") values. + keys_with_falsey_values.add(key) if missing_keys: validation_warnings.append( (sorted(missing_keys), MESSAGES['MISSING_KEY'])) - if keys_with_no_value: + if required_keys_with_no_value: validation_warnings.append( - (sorted(keys_with_no_value), MESSAGES['MISSING_VALUE'])) + (sorted(required_keys_with_no_value), MESSAGES['MISSING_VALUE'])) # Phase 2: Check whether any input with a value validates with its # type-specific check function. invalid_keys = set() - insufficient_keys = (missing_keys | keys_with_no_value) + insufficient_keys = ( + missing_keys | required_keys_with_no_value | keys_with_falsey_values) for key in set(args.keys()) - insufficient_keys: # Extra args that don't exist in the MODEL_SPEC are okay # we don't need to try to validate them From 03c7fb051b776dc87c25462ffdecb259c5384044 Mon Sep 17 00:00:00 2001 From: James Douglass Date: Tue, 27 Feb 2024 15:37:01 -0800 Subject: [PATCH 30/30] Noting changes in history. RE:#1509 --- HISTORY.rst | 22 ++++++++++++++++++++++ 1 file changed, 22 insertions(+) diff --git a/HISTORY.rst b/HISTORY.rst index 97871ef26f..da4607735f 100644 --- a/HISTORY.rst +++ b/HISTORY.rst @@ -38,6 +38,28 @@ Unreleased Changes ------------------ +* General + * We have updated validation in several ways that will improve the + developer experience of working with InVEST models, and we hope will also + improve the user experience: + + * Symbols in conditional requirement expressions now represent the values + of parameters instead of whether the value of the parameter is + sufficient. If a symbol is not present in ``args`` as a key, the + symbol will have a value of ``False``. This allows for value-based + comparisons, which is useful in models that have overlapping modes of + operation. https://github.com/natcap/invest/issues/1509 + * Vector fields, CSV rows/columns and the 1st level of directory + contents may now all be conditionally required based on a python + expression. + * Under certain circumstances, validation may return more warnings than + before. This specifically applies to model inputs that have conditional + requirement expressions where their expression evaluates to ``False``, + and the user has provided a value for this parameter. Previous + versions of InVEST would skip these parameters' type-specific + validation. Now, these parameters will be validated with their + type-specific validation checks. + * Urban Nature Access * Fixed a ``NameError`` that occurred when running the model using search radii defined per population group with an exponential search