From 734e0a85ced5654321b414c104c977871586315d Mon Sep 17 00:00:00 2001 From: Inessa Vasilevskaya Date: Wed, 8 Mar 2023 12:12:51 +0100 Subject: [PATCH 1/4] Fix false positive non-utf symlinks reported Because of botched up check for python2 valid utf symlinks were reported as non-utf ones. OAMG-8629 --- .../system_upgrade/common/actors/rootscanner/actor.py | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) diff --git a/repos/system_upgrade/common/actors/rootscanner/actor.py b/repos/system_upgrade/common/actors/rootscanner/actor.py index 515fd7d790..dc02c7a271 100644 --- a/repos/system_upgrade/common/actors/rootscanner/actor.py +++ b/repos/system_upgrade/common/actors/rootscanner/actor.py @@ -28,8 +28,16 @@ def _create_a_subdir(subdir_cls, name, path): return subdir_cls(name=name) for subdir in os.listdir('/'): - # Note(ivasilev) non-utf encoded string will appear as byte strings + # Note(ivasilev) in py3 env non-utf encoded string will appear as byte strings + # However in py2 env subdir will be always of str type, so verification if this is a valid utf-8 string + # should be done differently than formerly suggested plain six.binary_type check + decoded = True if isinstance(subdir, six.binary_type): + try: + subdir.decode('utf-8') + except (AttributeError, UnicodeDecodeError): + decoded = False + if not decoded: invalid_subdirs.append(_create_a_subdir(InvalidRootSubdirectory, subdir, os.path.join(b'/', subdir))) else: subdirs.append(_create_a_subdir(RootSubdirectory, subdir, os.path.join('/', subdir))) From cd490a7ef9b7cf0be357571c8a269feb6982e964 Mon Sep 17 00:00:00 2001 From: Inessa Vasilevskaya Date: Thu, 23 Mar 2023 15:35:08 +0100 Subject: [PATCH 2/4] Refactor rootscanner to use library Also introduce tests for the nonutf symlinks --- commands/upgrade/breadcrumbs.py | 2 -- .../common/actors/rootscanner/actor.py | 32 ++--------------- .../rootscanner/libraries/rootscanner.py | 34 +++++++++++++++++++ .../rootscanner/tests/test_rootscanner.py | 31 +++++++++++++++++ 4 files changed, 68 insertions(+), 31 deletions(-) create mode 100644 repos/system_upgrade/common/actors/rootscanner/libraries/rootscanner.py create mode 100644 repos/system_upgrade/common/actors/rootscanner/tests/test_rootscanner.py diff --git a/commands/upgrade/breadcrumbs.py b/commands/upgrade/breadcrumbs.py index 61660fb1e2..16903ee00f 100644 --- a/commands/upgrade/breadcrumbs.py +++ b/commands/upgrade/breadcrumbs.py @@ -128,8 +128,6 @@ def _get_packages(self): def _verify_leapp_pkgs(self): if not os.environ.get('LEAPP_IPU_IN_PROGRESS'): - # NOTE(ivasilev) this can happen if LEAPP_DEVEL_TARGET_RELEASE is specified and pointing to an impossible - # version return [] upg_path = os.environ.get('LEAPP_IPU_IN_PROGRESS').split('to') cmd = ['/bin/bash', '-c', 'rpm -V leapp leapp-upgrade-el{}toel{}'.format(upg_path[0], upg_path[1])] diff --git a/repos/system_upgrade/common/actors/rootscanner/actor.py b/repos/system_upgrade/common/actors/rootscanner/actor.py index dc02c7a271..a3fbb55d72 100644 --- a/repos/system_upgrade/common/actors/rootscanner/actor.py +++ b/repos/system_upgrade/common/actors/rootscanner/actor.py @@ -1,9 +1,6 @@ -import os - -import six - from leapp.actors import Actor -from leapp.models import InvalidRootSubdirectory, RootDirectory, RootSubdirectory +from leapp.libraries.actor.rootscanner import scan_dir +from leapp.models import RootDirectory from leapp.tags import FactsPhaseTag, IPUWorkflowTag @@ -19,27 +16,4 @@ class RootScanner(Actor): tags = (IPUWorkflowTag, FactsPhaseTag) def process(self): - subdirs = [] - invalid_subdirs = [] - - def _create_a_subdir(subdir_cls, name, path): - if os.path.islink(path): - return subdir_cls(name=name, target=os.readlink(path)) - return subdir_cls(name=name) - - for subdir in os.listdir('/'): - # Note(ivasilev) in py3 env non-utf encoded string will appear as byte strings - # However in py2 env subdir will be always of str type, so verification if this is a valid utf-8 string - # should be done differently than formerly suggested plain six.binary_type check - decoded = True - if isinstance(subdir, six.binary_type): - try: - subdir.decode('utf-8') - except (AttributeError, UnicodeDecodeError): - decoded = False - if not decoded: - invalid_subdirs.append(_create_a_subdir(InvalidRootSubdirectory, subdir, os.path.join(b'/', subdir))) - else: - subdirs.append(_create_a_subdir(RootSubdirectory, subdir, os.path.join('/', subdir))) - - self.produce(RootDirectory(items=subdirs, invalid_items=invalid_subdirs)) + self.produce(scan_dir(b'/')) diff --git a/repos/system_upgrade/common/actors/rootscanner/libraries/rootscanner.py b/repos/system_upgrade/common/actors/rootscanner/libraries/rootscanner.py new file mode 100644 index 0000000000..3f29c065b0 --- /dev/null +++ b/repos/system_upgrade/common/actors/rootscanner/libraries/rootscanner.py @@ -0,0 +1,34 @@ +import os + +import six + +from leapp.models import InvalidRootSubdirectory, RootDirectory, RootSubdirectory + + +def scan_dir(root_dir=b'/'): + """ + Scan root directory and return a RootDirectory(subdirs, invalid_subdirs) model object + """ + subdirs = [] + invalid_subdirs = [] + + def _create_a_subdir(subdir_cls, name, path): + if os.path.islink(path): + return subdir_cls(name=name, target=os.readlink(path)) + return subdir_cls(name=name) + + for subdir in os.listdir(root_dir): + # Note(ivasilev) in py3 env non-utf encoded string will appear as byte strings + # However in py2 env subdir will be always of str type, so verification if this is a valid utf-8 string + # should be done differently than formerly suggested plain six.binary_type check + decoded = True + if isinstance(subdir, six.binary_type): + try: + subdir = subdir.decode('utf-8') + except (AttributeError, UnicodeDecodeError): + decoded = False + if not decoded: + invalid_subdirs.append(_create_a_subdir(InvalidRootSubdirectory, subdir, os.path.join(b'/', subdir))) + else: + subdirs.append(_create_a_subdir(RootSubdirectory, subdir, os.path.join('/', subdir))) + return RootDirectory(items=subdirs, invalid_items=invalid_subdirs) diff --git a/repos/system_upgrade/common/actors/rootscanner/tests/test_rootscanner.py b/repos/system_upgrade/common/actors/rootscanner/tests/test_rootscanner.py new file mode 100644 index 0000000000..d0e5626a61 --- /dev/null +++ b/repos/system_upgrade/common/actors/rootscanner/tests/test_rootscanner.py @@ -0,0 +1,31 @@ +import os +import shutil +import tempfile + +import pytest + +from leapp.libraries.actor.rootscanner import scan_dir + + +@pytest.mark.parametrize("filename,symlink,count_invalid", + [(u'a_utf_file'.encode('utf-8'), u"utf8_symlink".encode('utf-8'), 0), + (u'простофайл'.encode('koi8-r'), u"этонеутф8".encode('koi8-r'), 2), + (u'a_utf_file'.encode('utf-8'), u"этонеутф8".encode('koi8-r'), 1)]) +def test_invalid_symlinks(filename, symlink, count_invalid): + # Let's create a directory with both valid utf-8 and non-utf symlinks + # NOTE(ivasilev) As this has to run for python2 as well can't use the nice tempfile.TemporaryDirectory way + tmpdirname = tempfile.mkdtemp() + # create the file in the temp directory + path_to_file = os.path.join(tmpdirname.encode('utf-8'), filename) + path_to_symlink = os.path.join(tmpdirname.encode('utf-8'), symlink) + with open(path_to_file, 'w') as f: + f.write('Some data here') + # create a symlink + os.symlink(path_to_file, path_to_symlink) + # run scan_dir + model = scan_dir(tmpdirname.encode('utf-8')) + # verify the results + assert len(model.items) == 2 - count_invalid + assert len(model.invalid_items) == count_invalid + # cleanup + shutil.rmtree(tmpdirname) From 4e8f9f8b8b388af6a6f98682cf70430d02afb524 Mon Sep 17 00:00:00 2001 From: Inessa Vasilevskaya Date: Thu, 23 Mar 2023 15:39:22 +0100 Subject: [PATCH 3/4] update .pylintrc --- .pylintrc | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/.pylintrc b/.pylintrc index f5da1f1f68..7ddb58d668 100644 --- a/.pylintrc +++ b/.pylintrc @@ -52,7 +52,8 @@ disable= consider-using-with, # on bunch spaces we cannot change that... duplicate-string-formatting-argument, # TMP: will be fixed in close future consider-using-f-string, # sorry, not gonna happen, still have to support py2 - use-dict-literal + use-dict-literal, + redundant-u-string-prefix # still have py2 to support [FORMAT] # Maximum number of characters on a single line. From b158f9c6f167b76f19eafc9114815e5c7179371b Mon Sep 17 00:00:00 2001 From: Inessa Vasilevskaya Date: Thu, 23 Mar 2023 17:11:33 +0100 Subject: [PATCH 4/4] Set encoding for tests --- .../common/actors/rootscanner/tests/test_rootscanner.py | 1 + 1 file changed, 1 insertion(+) diff --git a/repos/system_upgrade/common/actors/rootscanner/tests/test_rootscanner.py b/repos/system_upgrade/common/actors/rootscanner/tests/test_rootscanner.py index d0e5626a61..659a3017e7 100644 --- a/repos/system_upgrade/common/actors/rootscanner/tests/test_rootscanner.py +++ b/repos/system_upgrade/common/actors/rootscanner/tests/test_rootscanner.py @@ -1,3 +1,4 @@ +# -*- coding: utf-8 -*- import os import shutil import tempfile