Implement the '~' merge operation

Co-authored-by: Cristian Le <[email protected]> Co-authored-by: Petr Šplíchal <[email protected]>
teemtee · Jun 5, 2024 · 7793f92 · 7793f92
1 parent 673c83c
commit 7793f92
Show file tree

Hide file tree

Showing 6 changed files with 168 additions and 0 deletions.
diff --git a/docs/features.rst b/docs/features.rst
@@ -121,6 +121,41 @@ attribute, matching regular expressions are replaced by an empty
 string. For dictionaries it's possible to provide list of keys
 which should be removed.
 
+Substitution of current values can be done by appending a ``~``
+suffix to the key name. The pattern and replacement parameters
+need to be provided as values in the form of
+``<d>PATTERN<d>REPLACEMENT<d>``, where ``<d>`` is delimiter which
+can be any character however such character cannot be then used
+within PATTERN and REPLACEMENT text as escaping isn't supported.
+This input can be either a string or list of strings.
+
+The `re.sub`__ is used to do the substitution thus all features of
+``re.Pattern`` can be used (named groups, back referencing...).
+
+In the fmf file it is better to use single quotes ``'`` as they do
+not need such intensive escaping::
+
+    require~: ';^foo;foo-ng;'
+    recommend~:
+      - '/python2-/python3-/'
+
+__ https://docs.python.org/3/library/re.html#re.sub
+
+Remove parent value only if it matches regular expression is done
+using  the ``-~`` suffix. If value matches any of provided
+`regular expressions`__ it is removed. If the parent value is a
+list, the matching item is removed from this list. If the parent
+value is a string, the value is set to an empty string. If the
+parent value is a dictionary, the matching key is removed. These
+regular expressions can be just a single item or a list of
+strings::
+
+    description-~: '.*'
+    require-~:
+      - 'python2.*'
+
+__ https://docs.python.org/3/library/re.html#regular-expression-syntax
+
 
 Elasticity
 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~

diff --git a/examples/merge/parent.fmf b/examples/merge/parent.fmf
@@ -36,3 +36,18 @@ very:
                 x+: 1
                 y-: 1
                 z: 0
+
+# Regexp used to change values - note that the second tag regexp cannot be applied
+# as Tier2 is already modified into 't2'
+/regexp:
+    description~: "/.en(.*) .*/gen\\1/"
+    tags~:
+    - '/Tier(.)/t\1/'
+    - /Tier2/t3/
+
+/minus-regexp:
+    description-~: '.*'
+    tags-~:
+    - '.ier1'
+    vars-~:
+    - 'y'
diff --git a/fmf/base.py b/fmf/base.py
@@ -191,6 +191,49 @@ def _merge_plus(self, data, key, value, prepend=False):
                 "MergeError: Key '{0}' in {1} ({2}).".format(
                     key, self.name, str(error)))
 
+    def _merge_regexp(self, data, key, value):
+        """ Handle substitution of current values """
+        if isinstance(value, str):
+            value = [value]
+        for pattern, replacement in [utils.split_pattern_replacement(v) for v in value]:
+            if isinstance(data[key], list):
+                try:
+                    data[key] = [re.sub(pattern, replacement, original) for original in data[key]]
+                except TypeError:
+                    raise utils.MergeError(
+                        "MergeError: Key '{0}' in {1} (not a string).".format(
+                            key, self.name))
+            elif isinstance(data[key], str):
+                data[key] = re.sub(pattern, replacement, data[key])
+            else:
+                raise utils.MergeError(
+                    "MergeError: Key '{0}' in {1} (wrong type).".format(
+                        key, self.name))
+
+    def _merge_minus_regexp(self, data, key, value):
+        """ Handle removing current values if they match regexp """
+        # A bit faster but essentially `any`
+        def lazy_any_search(item, patterns):
+            for p in patterns:
+                if re.search(p, str(item)):
+                    return True
+            return False
+        if isinstance(value, str):
+            value = [value]
+        if isinstance(data[key], list):
+            data[key] = [item for item in data[key] if not lazy_any_search(item, value)]
+        elif isinstance(data[key], str):
+            if lazy_any_search(data[key], value):
+                data[key] = ''
+        elif isinstance(data[key], dict):
+            for k in list(data[key].keys()):
+                if lazy_any_search(k, value):
+                    data[key].pop(k)
+        else:
+            raise utils.MergeError(
+                "MergeError: Key '{0}' in {1} (wrong type).".format(
+                    key, self.name))
+
     def _merge_minus(self, data, key, value):
         """ Handle reducing attributes using the '-' suffix """
         # Cannot reduce attribute if key is not present in parent
@@ -225,8 +268,12 @@ def _merge_special(self, data, source):
                 self._merge_plus(data, key.rstrip('+'), value)
             elif key.endswith('+<'):
                 self._merge_plus(data, key.rstrip('+<'), value, prepend=True)
+            elif key.endswith('-~'):
+                self._merge_minus_regexp(data, key.rstrip('-~'), value)
             elif key.endswith('-'):
                 self._merge_minus(data, key.rstrip('-'), value)
+            elif key.endswith('~'):
+                self._merge_regexp(data, key.rstrip('~'), value)
             # Otherwise just update the value
             else:
                 data[key] = value

diff --git a/fmf/utils.py b/fmf/utils.py
@@ -937,3 +937,37 @@ def validate_data(data, schema, schema_store=None):
             jsonschema.exceptions.UnknownType
             ) as error:
         raise JsonSchemaError(f'Errors found in provided schema: {error}')
+
+
+class PatternReplacement(NamedTuple):
+    pattern: str
+    replacement: str
+
+
+def split_pattern_replacement(source):
+    """
+    Splits pattern/replacement string input into parts
+
+    Format of the input:
+        <delimiter><PATTERN><delimiter><REPLACEMENT><delimiter>
+
+    Delimiter set by the first character of the input and this character
+    cannot be used in the PATTERN or REPLACEMENT text. Escaping is not
+    supported.
+    """
+
+    try:
+        delimiter = source[0]
+        if source[-1] != delimiter:
+            raise FormatError("'{0}' has to end with '{1}'.".format(source, delimiter))
+    except IndexError:
+        raise FormatError("'{0}' has to start and end with the same delimiter.".format(source))
+
+    try:
+        pattern, replacement = source[1:-1].split(delimiter)
+    except ValueError:
+        raise FormatError("'{0}' can't be split in two parts".format(source))
+
+    if not pattern:
+        raise FormatError("Pattern cannot be empty: '{0}'.".format(source))
+    return PatternReplacement(pattern=pattern, replacement=replacement)
diff --git a/tests/unit/test_base.py b/tests/unit/test_base.py
@@ -178,6 +178,21 @@ def test_merge_minus(self):
             child.data["time-"] = "bad"
             child.inherit()
 
+    def test_merge_regexp(self):
+        """ Do re.sub during the merge """
+        child = self.merge.find('/parent/regexp')
+        assert 'general' == child.data['description']
+        # First rule changes the Tier2 into t2,
+        # thus /Tier2/t3/ no longer matches.
+        assert ['t1', 't2'] == child.data['tags']
+
+    def test_merge_minus_regexp(self):
+        """ Merging with '-~' operation """
+        child = self.merge.find('/parent/minus-regexp')
+        assert '' == child.data['description']
+        assert ['Tier2'] == child.data['tags']
+        assert {'x': 1} == child.data['vars']
+
     def test_merge_deep(self):
         """ Merging a deeply nested dictionary """
         child = self.merge.find('/parent/buried')

diff --git a/tests/unit/test_utils.py b/tests/unit/test_utils.py
@@ -456,3 +456,25 @@ def test_sort(self):
         data = dict(y=2, x=1)
         assert fmf.utils.dict_to_yaml(data) == "y: 2\nx: 1\n"
         assert fmf.utils.dict_to_yaml(data, sort=True) == "x: 1\ny: 2\n"
+
+
+class TestSplitPatternReplacement:
+    @pytest.mark.parametrize("src", [
+        "",  # Empty input
+        "/",  # Not long enough input
+        ";/;",  # Not long enough - missing 'REPL;'
+        ";;;",  # PATTERN and REPL parts empty
+        "/a/b/c",  # Input after trailing deliminer
+        "/a/b/c/d/",  # More than 3 delimiters
+        "/x;y/",  # No REPL part as delimiter is /
+        "/a/a;"  # No trailing delimiter
+        ])
+    def test_invalid(self, src):
+        with pytest.raises(utils.FormatError):
+            utils.split_pattern_replacement(src)
+
+    def test_simple(self):
+        assert utils.split_pattern_replacement(
+            '/a/b/') == utils.PatternReplacement('a', 'b')
+        assert utils.split_pattern_replacement(
+            ';ac/dc;rose;') == utils.PatternReplacement('ac/dc', 'rose')