Skip to content

Commit

Permalink
Implement the '~' merge operation
Browse files Browse the repository at this point in the history
Co-authored-by: Cristian Le <[email protected]>
Co-authored-by: Petr Šplíchal <[email protected]>
  • Loading branch information
3 people committed Jun 5, 2024
1 parent 673c83c commit 7793f92
Show file tree
Hide file tree
Showing 6 changed files with 168 additions and 0 deletions.
35 changes: 35 additions & 0 deletions docs/features.rst
Original file line number Diff line number Diff line change
Expand Up @@ -121,6 +121,41 @@ attribute, matching regular expressions are replaced by an empty
string. For dictionaries it's possible to provide list of keys
which should be removed.

Substitution of current values can be done by appending a ``~``
suffix to the key name. The pattern and replacement parameters
need to be provided as values in the form of
``<d>PATTERN<d>REPLACEMENT<d>``, where ``<d>`` is delimiter which
can be any character however such character cannot be then used
within PATTERN and REPLACEMENT text as escaping isn't supported.
This input can be either a string or list of strings.

The `re.sub`__ is used to do the substitution thus all features of
``re.Pattern`` can be used (named groups, back referencing...).

In the fmf file it is better to use single quotes ``'`` as they do
not need such intensive escaping::

require~: ';^foo;foo-ng;'
recommend~:
- '/python2-/python3-/'

__ https://docs.python.org/3/library/re.html#re.sub

Remove parent value only if it matches regular expression is done
using the ``-~`` suffix. If value matches any of provided
`regular expressions`__ it is removed. If the parent value is a
list, the matching item is removed from this list. If the parent
value is a string, the value is set to an empty string. If the
parent value is a dictionary, the matching key is removed. These
regular expressions can be just a single item or a list of
strings::

description-~: '.*'
require-~:
- 'python2.*'

__ https://docs.python.org/3/library/re.html#regular-expression-syntax


Elasticity
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
Expand Down
15 changes: 15 additions & 0 deletions examples/merge/parent.fmf
Original file line number Diff line number Diff line change
Expand Up @@ -36,3 +36,18 @@ very:
x+: 1
y-: 1
z: 0

# Regexp used to change values - note that the second tag regexp cannot be applied
# as Tier2 is already modified into 't2'
/regexp:
description~: "/.en(.*) .*/gen\\1/"
tags~:
- '/Tier(.)/t\1/'
- /Tier2/t3/

/minus-regexp:
description-~: '.*'
tags-~:
- '.ier1'
vars-~:
- 'y'
47 changes: 47 additions & 0 deletions fmf/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -191,6 +191,49 @@ def _merge_plus(self, data, key, value, prepend=False):
"MergeError: Key '{0}' in {1} ({2}).".format(
key, self.name, str(error)))

def _merge_regexp(self, data, key, value):
""" Handle substitution of current values """
if isinstance(value, str):
value = [value]
for pattern, replacement in [utils.split_pattern_replacement(v) for v in value]:
if isinstance(data[key], list):
try:
data[key] = [re.sub(pattern, replacement, original) for original in data[key]]
except TypeError:
raise utils.MergeError(
"MergeError: Key '{0}' in {1} (not a string).".format(
key, self.name))
elif isinstance(data[key], str):
data[key] = re.sub(pattern, replacement, data[key])
else:
raise utils.MergeError(
"MergeError: Key '{0}' in {1} (wrong type).".format(
key, self.name))

def _merge_minus_regexp(self, data, key, value):
""" Handle removing current values if they match regexp """
# A bit faster but essentially `any`
def lazy_any_search(item, patterns):
for p in patterns:
if re.search(p, str(item)):
return True
return False
if isinstance(value, str):
value = [value]
if isinstance(data[key], list):
data[key] = [item for item in data[key] if not lazy_any_search(item, value)]
elif isinstance(data[key], str):
if lazy_any_search(data[key], value):
data[key] = ''
elif isinstance(data[key], dict):
for k in list(data[key].keys()):
if lazy_any_search(k, value):
data[key].pop(k)
else:
raise utils.MergeError(
"MergeError: Key '{0}' in {1} (wrong type).".format(
key, self.name))

def _merge_minus(self, data, key, value):
""" Handle reducing attributes using the '-' suffix """
# Cannot reduce attribute if key is not present in parent
Expand Down Expand Up @@ -225,8 +268,12 @@ def _merge_special(self, data, source):
self._merge_plus(data, key.rstrip('+'), value)
elif key.endswith('+<'):
self._merge_plus(data, key.rstrip('+<'), value, prepend=True)
elif key.endswith('-~'):
self._merge_minus_regexp(data, key.rstrip('-~'), value)
elif key.endswith('-'):
self._merge_minus(data, key.rstrip('-'), value)
elif key.endswith('~'):
self._merge_regexp(data, key.rstrip('~'), value)
# Otherwise just update the value
else:
data[key] = value
Expand Down
34 changes: 34 additions & 0 deletions fmf/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -937,3 +937,37 @@ def validate_data(data, schema, schema_store=None):
jsonschema.exceptions.UnknownType
) as error:
raise JsonSchemaError(f'Errors found in provided schema: {error}')


class PatternReplacement(NamedTuple):
pattern: str
replacement: str


def split_pattern_replacement(source):
"""
Splits pattern/replacement string input into parts
Format of the input:
<delimiter><PATTERN><delimiter><REPLACEMENT><delimiter>
Delimiter set by the first character of the input and this character
cannot be used in the PATTERN or REPLACEMENT text. Escaping is not
supported.
"""

try:
delimiter = source[0]
if source[-1] != delimiter:
raise FormatError("'{0}' has to end with '{1}'.".format(source, delimiter))
except IndexError:
raise FormatError("'{0}' has to start and end with the same delimiter.".format(source))

try:
pattern, replacement = source[1:-1].split(delimiter)
except ValueError:
raise FormatError("'{0}' can't be split in two parts".format(source))

if not pattern:
raise FormatError("Pattern cannot be empty: '{0}'.".format(source))
return PatternReplacement(pattern=pattern, replacement=replacement)
15 changes: 15 additions & 0 deletions tests/unit/test_base.py
Original file line number Diff line number Diff line change
Expand Up @@ -178,6 +178,21 @@ def test_merge_minus(self):
child.data["time-"] = "bad"
child.inherit()

def test_merge_regexp(self):
""" Do re.sub during the merge """
child = self.merge.find('/parent/regexp')
assert 'general' == child.data['description']
# First rule changes the Tier2 into t2,
# thus /Tier2/t3/ no longer matches.
assert ['t1', 't2'] == child.data['tags']

def test_merge_minus_regexp(self):
""" Merging with '-~' operation """
child = self.merge.find('/parent/minus-regexp')
assert '' == child.data['description']
assert ['Tier2'] == child.data['tags']
assert {'x': 1} == child.data['vars']

def test_merge_deep(self):
""" Merging a deeply nested dictionary """
child = self.merge.find('/parent/buried')
Expand Down
22 changes: 22 additions & 0 deletions tests/unit/test_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -456,3 +456,25 @@ def test_sort(self):
data = dict(y=2, x=1)
assert fmf.utils.dict_to_yaml(data) == "y: 2\nx: 1\n"
assert fmf.utils.dict_to_yaml(data, sort=True) == "x: 1\ny: 2\n"


class TestSplitPatternReplacement:
@pytest.mark.parametrize("src", [
"", # Empty input
"/", # Not long enough input
";/;", # Not long enough - missing 'REPL;'
";;;", # PATTERN and REPL parts empty
"/a/b/c", # Input after trailing deliminer
"/a/b/c/d/", # More than 3 delimiters
"/x;y/", # No REPL part as delimiter is /
"/a/a;" # No trailing delimiter
])
def test_invalid(self, src):
with pytest.raises(utils.FormatError):
utils.split_pattern_replacement(src)

def test_simple(self):
assert utils.split_pattern_replacement(
'/a/b/') == utils.PatternReplacement('a', 'b')
assert utils.split_pattern_replacement(
';ac/dc;rose;') == utils.PatternReplacement('ac/dc', 'rose')

0 comments on commit 7793f92

Please sign in to comment.