Skip to content

Commit

Permalink
pythonGH-113373: Speed up pathlib parsing using __init_subclass__()
Browse files Browse the repository at this point in the history
Set `_sep`, `_altsep` and `_case_sensitive` class attributes from
`_abc.PurePathBase.__init_subclass__()`, which reduces the number of
attribute accesses needed for several pathlib operations.
  • Loading branch information
barneygale committed Dec 22, 2023
1 parent a0d3d3e commit 8b34e4a
Show file tree
Hide file tree
Showing 3 changed files with 23 additions and 20 deletions.
6 changes: 3 additions & 3 deletions Lib/pathlib/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -117,7 +117,7 @@ def _str_normcase(self):
try:
return self._str_normcase_cached
except AttributeError:
if _abc._is_case_sensitive(self.pathmod):
if self._case_sensitive:
self._str_normcase_cached = str(self)
else:
self._str_normcase_cached = str(self).lower()
Expand All @@ -141,7 +141,7 @@ def _parts_normcase(self):
try:
return self._parts_normcase_cached
except AttributeError:
self._parts_normcase_cached = self._str_normcase.split(self.pathmod.sep)
self._parts_normcase_cached = self._str_normcase.split(self._sep)
return self._parts_normcase_cached

def __lt__(self, other):
Expand Down Expand Up @@ -309,7 +309,7 @@ def absolute(self):
drive, root, rel = os.path.splitroot(cwd)
if not rel:
return self._from_parsed_parts(drive, root, self._tail)
tail = rel.split(self.pathmod.sep)
tail = rel.split(self._sep)
tail.extend(self._tail)
return self._from_parsed_parts(drive, root, tail)

Expand Down
35 changes: 18 additions & 17 deletions Lib/pathlib/_abc.py
Original file line number Diff line number Diff line change
Expand Up @@ -41,10 +41,6 @@ def _ignore_error(exception):
getattr(exception, 'winerror', None) in _IGNORED_WINERRORS)


@functools.cache
def _is_case_sensitive(pathmod):
return pathmod.normcase('Aa') == 'Aa'

#
# Globbing helpers
#
Expand Down Expand Up @@ -205,6 +201,12 @@ class PurePathBase:
)
pathmod = posixpath

def __init_subclass__(cls, **kwargs):
super().__init_subclass__(**kwargs)
cls._sep = cls.pathmod.sep
cls._altsep = cls.pathmod.altsep
cls._case_sensitive = cls.pathmod.normcase('Aa') == 'Aa'

def __init__(self, *paths):
self._raw_paths = paths
self._resolving = False
Expand All @@ -220,8 +222,8 @@ def with_segments(self, *pathsegments):
def _parse_path(cls, path):
if not path:
return '', '', []
sep = cls.pathmod.sep
altsep = cls.pathmod.altsep
sep = cls._sep
altsep = cls._altsep
if altsep:
path = path.replace(altsep, sep)
drv, root, rel = cls.pathmod.splitroot(path)
Expand Down Expand Up @@ -261,10 +263,10 @@ def _from_parsed_parts(self, drv, root, tail):
@classmethod
def _format_parsed_parts(cls, drv, root, tail):
if drv or root:
return drv + root + cls.pathmod.sep.join(tail)
return drv + root + cls._sep.join(tail)
elif tail and cls.pathmod.splitdrive(tail[0])[0]:
tail = ['.'] + tail
return cls.pathmod.sep.join(tail)
return cls._sep.join(tail)

def __str__(self):
"""Return the string representation of the path, suitable for
Expand All @@ -279,7 +281,7 @@ def __str__(self):
def as_posix(self):
"""Return the string representation of the path with forward (/)
slashes."""
return str(self).replace(self.pathmod.sep, '/')
return str(self).replace(self._sep, '/')

@property
def drive(self):
Expand Down Expand Up @@ -360,8 +362,7 @@ def stem(self):

def with_name(self, name):
"""Return a new path with the file name changed."""
m = self.pathmod
if not name or m.sep in name or (m.altsep and m.altsep in name) or name == '.':
if not name or self._sep in name or (self._altsep and self._altsep in name) or name == '.':
raise ValueError(f"Invalid name {name!r}")
tail = self._tail.copy()
if not tail:
Expand Down Expand Up @@ -514,8 +515,8 @@ def match(self, path_pattern, *, case_sensitive=None):
if not isinstance(path_pattern, PurePathBase):
path_pattern = self.with_segments(path_pattern)
if case_sensitive is None:
case_sensitive = _is_case_sensitive(self.pathmod)
sep = path_pattern.pathmod.sep
case_sensitive = self._case_sensitive
sep = path_pattern._sep
pattern_str = str(path_pattern)
if path_pattern.drive or path_pattern.root:
pass
Expand Down Expand Up @@ -797,7 +798,7 @@ def _make_child_relpath(self, name):
path_str = str(self)
tail = self._tail
if tail:
path_str = f'{path_str}{self.pathmod.sep}{name}'
path_str = f'{path_str}{self._sep}{name}'
elif path_str != '.':
path_str = f'{path_str}{name}'
else:
Expand Down Expand Up @@ -832,7 +833,7 @@ def _glob(self, pattern, case_sensitive, follow_symlinks):
raise ValueError("Unacceptable pattern: {!r}".format(pattern))

pattern_parts = path_pattern._tail.copy()
if pattern[-1] in (self.pathmod.sep, self.pathmod.altsep):
if pattern[-1] in (self._sep, self._altsep):
# GH-65238: pathlib doesn't preserve trailing slash. Add it back.
pattern_parts.append('')
if pattern_parts[-1] == '**':
Expand All @@ -846,7 +847,7 @@ def _glob(self, pattern, case_sensitive, follow_symlinks):

if case_sensitive is None:
# TODO: evaluate case-sensitivity of each directory in _select_children().
case_sensitive = _is_case_sensitive(self.pathmod)
case_sensitive = self._case_sensitive

# If symlinks are handled consistently, and the pattern does not
# contain '..' components, then we can use a 'walk-and-match' strategy
Expand All @@ -857,7 +858,7 @@ def _glob(self, pattern, case_sensitive, follow_symlinks):
# do not perform any filesystem access, which can be much faster!
filter_paths = follow_symlinks is not None and '..' not in pattern_parts
deduplicate_paths = False
sep = self.pathmod.sep
sep = self._sep
paths = iter([self] if self.is_dir() else [])
part_idx = 0
while part_idx < len(pattern_parts):
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
Speed up path parsing in :mod:`pathlib` by storing path separators and case
sensitivity as private class attributes. Patch by Barney Gale.

0 comments on commit 8b34e4a

Please sign in to comment.