From 9f7d33ebb45eeb8483a5e159f6ed33b971a8d92b Mon Sep 17 00:00:00 2001 From: David Orme Date: Thu, 30 Jan 2025 15:57:13 +0000 Subject: [PATCH 1/4] Allow config_merge to concatenate lists with matching keys --- tests/core/test_config.py | 14 ++++++++++++++ virtual_ecosystem/core/config.py | 21 +++++++++++++++++---- 2 files changed, 31 insertions(+), 4 deletions(-) diff --git a/tests/core/test_config.py b/tests/core/test_config.py index 291e97eb0..6338461ef 100644 --- a/tests/core/test_config.py +++ b/tests/core/test_config.py @@ -135,6 +135,20 @@ ("b.bb.bbb.bbba.bbbaa",), id="conflict_complex", ), + pytest.param( + {"d1": {"d2": [1, 2, 3]}}, + {"d1": {"d2": [4, 5]}}, + {"d1": {"d2": [1, 2, 3, 4, 5]}}, + (), + id="no_conflict_list_merge", + ), + pytest.param( + {"d1": {"d2": [1, 2, 3]}}, + {"d1": {"d2": "a"}}, + {"d1": {"d2": "a"}}, + ("d1.d2",), + id="conflict_list_and_not_list", + ), ], ) def test_config_merge(dest, source, exp_result, exp_conflicts): diff --git a/virtual_ecosystem/core/config.py b/virtual_ecosystem/core/config.py index 2181546a0..d2176a85f 100644 --- a/virtual_ecosystem/core/config.py +++ b/virtual_ecosystem/core/config.py @@ -34,10 +34,20 @@ def config_merge( """Recursively merge two dictionaries detecting duplicated key definitions. This function returns a copy of the input ``dest`` dictionary that has been extended - recursively with the entries from the input ``source`` dictionary. The two input - dictionaries must not share any key paths and when duplicated key paths are - found, the value from the source dictionary is used and the function extends the - returned ``conflicts`` tuple with the duplicated key path. + recursively with the entries from the input ``source`` dictionary. + + In general, if two input dictionaries share complete key paths (that is a set of + nested dictionary keys leading to a value) then that indicates a duplicated setting. + The values might be identical, but the configuration files should not duplicate + settings. When duplicated key paths are found, the value from the source dictionary + is used and the function extends the returned ``conflicts`` tuple with the + duplicated key path. + + However an exception is where both entries are lists - resulting from a TOML array + of tables (https://toml.io/en/v1.0.0#array-of-tables). In this case, it is + reasonable to append the source values to the destination values. The motivating + example here are `[[core.data.variable]]` entries, which can quite reasonably be + split across files. Args: dest: A dictionary to extend @@ -66,6 +76,9 @@ def config_merge( dest[src_key], conflicts = config_merge( dest_val, src_val, conflicts=conflicts, path=next_path ) + elif isinstance(dest_val, list) and isinstance(src_val, list): + # Both values for this key are lists, so merge the lists + dest[src_key] = [*dest_val, *src_val] elif dest_val is None: # The key is not currently in dest, so add the key value pair dest[src_key] = src_val From 7d4fee768e80a35d0cfe426793b8e72f825ead79 Mon Sep 17 00:00:00 2001 From: David Orme Date: Thu, 30 Jan 2025 16:00:28 +0000 Subject: [PATCH 2/4] Note lack of validation --- virtual_ecosystem/core/config.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/virtual_ecosystem/core/config.py b/virtual_ecosystem/core/config.py index d2176a85f..cd9733450 100644 --- a/virtual_ecosystem/core/config.py +++ b/virtual_ecosystem/core/config.py @@ -47,7 +47,9 @@ def config_merge( of tables (https://toml.io/en/v1.0.0#array-of-tables). In this case, it is reasonable to append the source values to the destination values. The motivating example here are `[[core.data.variable]]` entries, which can quite reasonably be - split across files. + split across configuration sources. Note that no attempt is made to check that the + combined values are congruent - this is deferred to error handling when the + configuration is used. Args: dest: A dictionary to extend From 23f8cca9c9488708586c36ec4ab5e0ca74aea4f7 Mon Sep 17 00:00:00 2001 From: David Orme Date: Thu, 30 Jan 2025 16:03:52 +0000 Subject: [PATCH 3/4] Added test to explicitly note what config_merge does not do with lists --- tests/core/test_config.py | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/tests/core/test_config.py b/tests/core/test_config.py index 6338461ef..6642a6459 100644 --- a/tests/core/test_config.py +++ b/tests/core/test_config.py @@ -142,6 +142,15 @@ (), id="no_conflict_list_merge", ), + # The next example passes just fine, which is intentional, but the test is here + # to highlight the behaviour + pytest.param( + {"d1": {"d2": [1, 2, 3]}}, + {"d1": {"d2": [{"file": "a_path"}]}}, + {"d1": {"d2": [1, 2, 3, {"file": "a_path"}]}}, + (), + id="no_conflict_list_merge_dubious_content", + ), pytest.param( {"d1": {"d2": [1, 2, 3]}}, {"d1": {"d2": "a"}}, From e8d197c47016fc1378b6b4aedc8142468087923b Mon Sep 17 00:00:00 2001 From: David Orme Date: Thu, 30 Jan 2025 16:21:00 +0000 Subject: [PATCH 4/4] Updating docs --- docs/source/using_the_ve/data/data.md | 7 +++---- virtual_ecosystem/core/data.py | 10 +++++----- 2 files changed, 8 insertions(+), 9 deletions(-) diff --git a/docs/source/using_the_ve/data/data.md b/docs/source/using_the_ve/data/data.md index 2ca2ebb69..dccbaed8a 100644 --- a/docs/source/using_the_ve/data/data.md +++ b/docs/source/using_the_ve/data/data.md @@ -210,10 +210,9 @@ file="'../../data/xy_dim.nc'" var_name="temp" ``` -**NOTE**: At the moment, -`core.data.variable` tags cannot be used across multiple toml config files without -causing `ConfigurationError: Duplicated entries in config files: core.data.variable` to -be raised. This means that all variables need to be combined in one `config` file. +You can include `core.data.variable` tags in different files. This can be useful to +group model-specific data with other model configuration options, and allow +configuration files to be swapped in a more modular fashion. To load configuration data , you will typically use the `cfg_paths` argument to pass one or more TOML formatted configuration files to create a diff --git a/virtual_ecosystem/core/data.py b/virtual_ecosystem/core/data.py index f6c88c227..8d80808f2 100644 --- a/virtual_ecosystem/core/data.py +++ b/virtual_ecosystem/core/data.py @@ -107,11 +107,11 @@ [[core.data.variable]] var_name="elev" -Data configurations must not contain repeated data variable names. NOTE: At the moment, -```core.data.variable``` tags cannot be used across multiple toml config files without -causing ```ConfigurationError: Duplicated entries in config files: core.data.variable``` -to be raised. This means that all variables need to be combined in one ```config``` -file. +Data configurations must not contain repeated data variable names. + +You can include ```core.data.variable``` tags in different files. This can be useful to +group model-specific data with other model configuration options, and allow +configuration files to be swapped in a more modular fashion. .. code-block:: python