Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Implement the ~ merge operation #234

Merged
merged 1 commit into from
Jun 5, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
35 changes: 35 additions & 0 deletions docs/features.rst
Original file line number Diff line number Diff line change
Expand Up @@ -121,6 +121,41 @@ attribute, matching regular expressions are replaced by an empty
string. For dictionaries it's possible to provide list of keys
which should be removed.

Substitution of current values can be done by appending a ``~``
suffix to the key name. The pattern and replacement parameters
need to be provided as values in the form of
``<d>PATTERN<d>REPLACEMENT<d>``, where ``<d>`` is delimiter which
can be any character however such character cannot be then used
within PATTERN and REPLACEMENT text as escaping isn't supported.
This input can be either a string or list of strings.

The `re.sub`__ is used to do the substitution thus all features of
``re.Pattern`` can be used (named groups, back referencing...).

In the fmf file it is better to use single quotes ``'`` as they do
not need such intensive escaping::

require~: ';^foo;foo-ng;'
recommend~:
- '/python2-/python3-/'

__ https://docs.python.org/3/library/re.html#re.sub

Remove parent value only if it matches regular expression is done
using the ``-~`` suffix. If value matches any of provided
`regular expressions`__ it is removed. If the parent value is a
list, the matching item is removed from this list. If the parent
value is a string, the value is set to an empty string. If the
parent value is a dictionary, the matching key is removed. These
regular expressions can be just a single item or a list of
strings::

description-~: '.*'
require-~:
- 'python2.*'

__ https://docs.python.org/3/library/re.html#regular-expression-syntax


Elasticity
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
Expand Down
15 changes: 15 additions & 0 deletions examples/merge/parent.fmf
Original file line number Diff line number Diff line change
Expand Up @@ -36,3 +36,18 @@ very:
x+: 1
y-: 1
z: 0

# Regexp used to change values - note that the second tag regexp cannot be applied
# as Tier2 is already modified into 't2'
/regexp:
description~: "/.en(.*) .*/gen\\1/"
tags~:
- '/Tier(.)/t\1/'
- /Tier2/t3/

/minus-regexp:
description-~: '.*'
tags-~:
- '.ier1'
vars-~:
- 'y'
47 changes: 47 additions & 0 deletions fmf/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -191,6 +191,49 @@ def _merge_plus(self, data, key, value, prepend=False):
"MergeError: Key '{0}' in {1} ({2}).".format(
key, self.name, str(error)))

def _merge_regexp(self, data, key, value):
""" Handle substitution of current values """
if isinstance(value, str):
value = [value]
for pattern, replacement in [utils.split_pattern_replacement(v) for v in value]:
if isinstance(data[key], list):
try:
data[key] = [re.sub(pattern, replacement, original) for original in data[key]]
except TypeError:
raise utils.MergeError(
"MergeError: Key '{0}' in {1} (not a string).".format(
key, self.name))
elif isinstance(data[key], str):
data[key] = re.sub(pattern, replacement, data[key])
else:
raise utils.MergeError(
"MergeError: Key '{0}' in {1} (wrong type).".format(
key, self.name))

def _merge_minus_regexp(self, data, key, value):
""" Handle removing current values if they match regexp """
# A bit faster but essentially `any`
def lazy_any_search(item, patterns):
for p in patterns:
if re.search(p, str(item)):
return True
return False
if isinstance(value, str):
value = [value]
if isinstance(data[key], list):
data[key] = [item for item in data[key] if not lazy_any_search(item, value)]
elif isinstance(data[key], str):
if lazy_any_search(data[key], value):
data[key] = ''
elif isinstance(data[key], dict):
for k in list(data[key].keys()):
if lazy_any_search(k, value):
data[key].pop(k)
else:
raise utils.MergeError(
"MergeError: Key '{0}' in {1} (wrong type).".format(
key, self.name))

def _merge_minus(self, data, key, value):
""" Handle reducing attributes using the '-' suffix """
# Cannot reduce attribute if key is not present in parent
Expand Down Expand Up @@ -225,8 +268,12 @@ def _merge_special(self, data, source):
self._merge_plus(data, key.rstrip('+'), value)
elif key.endswith('+<'):
self._merge_plus(data, key.rstrip('+<'), value, prepend=True)
elif key.endswith('-~'):
self._merge_minus_regexp(data, key.rstrip('-~'), value)
elif key.endswith('-'):
self._merge_minus(data, key.rstrip('-'), value)
elif key.endswith('~'):
self._merge_regexp(data, key.rstrip('~'), value)
# Otherwise just update the value
else:
data[key] = value
Expand Down
34 changes: 34 additions & 0 deletions fmf/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -948,3 +948,37 @@ def validate_data(data, schema, schema_store=None):
jsonschema.exceptions.UnknownType
) as error:
raise JsonSchemaError(f'Errors found in provided schema: {error}')


class PatternReplacement(NamedTuple):
pattern: str
replacement: str


def split_pattern_replacement(source):
"""
Splits pattern/replacement string input into parts

Format of the input:
<delimiter><PATTERN><delimiter><REPLACEMENT><delimiter>

Delimiter set by the first character of the input and this character
cannot be used in the PATTERN or REPLACEMENT text. Escaping is not
supported.
"""

try:
delimiter = source[0]
if source[-1] != delimiter:
raise FormatError("'{0}' has to end with '{1}'.".format(source, delimiter))
except IndexError:
raise FormatError("'{0}' has to start and end with the same delimiter.".format(source))

try:
pattern, replacement = source[1:-1].split(delimiter)
except ValueError:
raise FormatError("'{0}' can't be split in two parts".format(source))

if not pattern:
raise FormatError("Pattern cannot be empty: '{0}'.".format(source))
return PatternReplacement(pattern=pattern, replacement=replacement)
15 changes: 15 additions & 0 deletions tests/unit/test_base.py
Original file line number Diff line number Diff line change
Expand Up @@ -178,6 +178,21 @@ def test_merge_minus(self):
child.data["time-"] = "bad"
child.inherit()

def test_merge_regexp(self):
""" Do re.sub during the merge """
child = self.merge.find('/parent/regexp')
assert 'general' == child.data['description']
# First rule changes the Tier2 into t2,
# thus /Tier2/t3/ no longer matches.
assert ['t1', 't2'] == child.data['tags']

def test_merge_minus_regexp(self):
""" Merging with '-~' operation """
child = self.merge.find('/parent/minus-regexp')
assert '' == child.data['description']
assert ['Tier2'] == child.data['tags']
assert {'x': 1} == child.data['vars']

def test_merge_deep(self):
""" Merging a deeply nested dictionary """
child = self.merge.find('/parent/buried')
Expand Down
22 changes: 22 additions & 0 deletions tests/unit/test_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -483,3 +483,25 @@ def test_sort(self):
data = dict(y=2, x=1)
assert fmf.utils.dict_to_yaml(data) == "y: 2\nx: 1\n"
assert fmf.utils.dict_to_yaml(data, sort=True) == "x: 1\ny: 2\n"


class TestSplitPatternReplacement:
@pytest.mark.parametrize("src", [
"", # Empty input
"/", # Not long enough input
";/;", # Not long enough - missing 'REPL;'
";;;", # PATTERN and REPL parts empty
"/a/b/c", # Input after trailing deliminer
"/a/b/c/d/", # More than 3 delimiters
"/x;y/", # No REPL part as delimiter is /
"/a/a;" # No trailing delimiter
])
def test_invalid(self, src):
with pytest.raises(utils.FormatError):
utils.split_pattern_replacement(src)

def test_simple(self):
assert utils.split_pattern_replacement(
'/a/b/') == utils.PatternReplacement('a', 'b')
assert utils.split_pattern_replacement(
';ac/dc;rose;') == utils.PatternReplacement('ac/dc', 'rose')
Loading