Skip to content

Commit

Permalink
Simplify handling of '', '.' and '..' segments in patterns.
Browse files Browse the repository at this point in the history
  • Loading branch information
barneygale committed Jan 20, 2024
1 parent 3e71a3e commit 0c20f70
Show file tree
Hide file tree
Showing 2 changed files with 27 additions and 28 deletions.
4 changes: 3 additions & 1 deletion Lib/pathlib/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -603,7 +603,7 @@ def iterdir(self):
def _scandir(self):
return os.scandir(self)

def _make_child_entry(self, entry, is_dir=False):
def _make_child_entry(self, entry):
# Transform an entry yielded from _scandir() into a path object.
path_str = entry.name if str(self) == '.' else entry.path
path = self.with_segments(path_str)
Expand All @@ -614,6 +614,8 @@ def _make_child_entry(self, entry, is_dir=False):
return path

def _make_child_relpath(self, name):
if not name:
return self
path_str = str(self)
tail = self._tail
if tail:
Expand Down
51 changes: 24 additions & 27 deletions Lib/pathlib/_abc.py
Original file line number Diff line number Diff line change
Expand Up @@ -63,6 +63,12 @@ def _compile_pattern(pat, sep, case_sensitive):
return re.compile(regex, flags=flags).match


def _select_special(paths, part):
"""Yield special literal children of the given paths."""
for path in paths:
yield path._make_child_relpath(part)


def _select_children(parent_paths, dir_only, follow_symlinks, match):
"""Yield direct children of given paths, filtering by name and type."""
if follow_symlinks is None:
Expand All @@ -84,7 +90,7 @@ def _select_children(parent_paths, dir_only, follow_symlinks, match):
except OSError:
continue
if match(entry.name):
yield parent_path._make_child_entry(entry, dir_only)
yield parent_path._make_child_entry(entry)


def _select_recursive(parent_paths, dir_only, follow_symlinks):
Expand All @@ -107,7 +113,7 @@ def _select_recursive(parent_paths, dir_only, follow_symlinks):
for entry in entries:
try:
if entry.is_dir(follow_symlinks=follow_symlinks):
paths.append(path._make_child_entry(entry, dir_only))
paths.append(path._make_child_entry(entry))
continue
except OSError:
pass
Expand Down Expand Up @@ -721,10 +727,8 @@ def _scandir(self):
from contextlib import nullcontext
return nullcontext(self.iterdir())

def _make_child_entry(self, entry, is_dir=False):
def _make_child_entry(self, entry):
# Transform an entry yielded from _scandir() into a path object.
if is_dir:
return entry.joinpath('')
return entry

def _make_child_relpath(self, name):
Expand All @@ -740,39 +744,27 @@ def glob(self, pattern, *, case_sensitive=None, follow_symlinks=None):
# TODO: evaluate case-sensitivity of each directory in _select_children().
case_sensitive = _is_case_sensitive(self.pathmod)

# If symlinks are handled consistently, and the pattern does not
# contain '..' components, then we can use a 'walk-and-match' strategy
# when expanding '**' wildcards. When a '**' wildcard is encountered,
# all following pattern parts are immediately consumed and used to
# build a `re.Pattern` object. This pattern is used to filter the
# recursive walk. As a result, pattern parts following a '**' wildcard
# do not perform any filesystem access, which can be much faster!
stack = pattern._pattern_stack
filter_paths = follow_symlinks is not None and '..' not in stack
specials = ('', '.', '..')
filter_paths = False
deduplicate_paths = False
sep = self.pathmod.sep
paths = iter([self.joinpath('')] if self.is_dir() else [])
while stack:
part = stack.pop()
if part == '':
# Trailing slash.
pass
elif part == '..':
paths = (path._make_child_relpath('..') for path in paths)
if part in specials:
paths = _select_special(paths, part)
elif part == '**':
# Consume adjacent '**' components.
while stack and stack[-1] == '**':
stack.pop()

if filter_paths and stack and stack[-1] != '':
dir_only = stack[0] == ''
paths = _select_recursive(paths, dir_only, follow_symlinks)

# Filter out paths that don't match pattern.
prefix_len = len(str(self._make_child_relpath('_'))) - 1
match = _compile_pattern(str(pattern), sep, case_sensitive)
paths = (path for path in paths if match(str(path), prefix_len))
return paths
# Consume adjacent non-special components and enable post-walk
# regex filtering, provided we're treating symlinks consistently.
if follow_symlinks is not None:
while stack and stack[-1] not in specials:
filter_paths = True
stack.pop()

dir_only = bool(stack)
paths = _select_recursive(paths, dir_only, follow_symlinks)
Expand All @@ -786,6 +778,11 @@ def glob(self, pattern, *, case_sensitive=None, follow_symlinks=None):
dir_only = bool(stack)
match = _compile_pattern(part, sep, case_sensitive)
paths = _select_children(paths, dir_only, follow_symlinks, match)
if filter_paths:
# Filter out paths that don't match pattern.
prefix_len = len(str(self._make_child_relpath('_'))) - 1
match = _compile_pattern(str(pattern), sep, case_sensitive)
paths = (path for path in paths if match(str(path), prefix_len))
return paths

def rglob(self, pattern, *, case_sensitive=None, follow_symlinks=None):
Expand Down

0 comments on commit 0c20f70

Please sign in to comment.