From 9d8e765789883a854326dfad7fe31aeb9e715200 Mon Sep 17 00:00:00 2001 From: Damian Shaw Date: Tue, 9 Jul 2024 19:48:12 -0400 Subject: [PATCH] Consistently use `get_requirement` to cache `Requirement` construction (#12663) --- news/12663.feature.rst | 1 + src/pip/_internal/build_env.py | 4 +-- .../_internal/metadata/importlib/_dists.py | 3 +- src/pip/_internal/pyproject.py | 5 ++-- src/pip/_internal/req/constructors.py | 6 ++-- src/pip/_internal/req/req_install.py | 5 ++-- src/pip/_internal/utils/packaging.py | 2 +- tests/unit/test_req.py | 28 +++++++++++++------ 8 files changed, 34 insertions(+), 20 deletions(-) create mode 100644 news/12663.feature.rst diff --git a/news/12663.feature.rst b/news/12663.feature.rst new file mode 100644 index 00000000000..11300ae47d3 --- /dev/null +++ b/news/12663.feature.rst @@ -0,0 +1 @@ +Improve performance when the same requirement string appears many times during resolution, by consistently caching the parsed requirement string. diff --git a/src/pip/_internal/build_env.py b/src/pip/_internal/build_env.py index 5cb903fa531..be1e0ca85d2 100644 --- a/src/pip/_internal/build_env.py +++ b/src/pip/_internal/build_env.py @@ -12,7 +12,6 @@ from typing import TYPE_CHECKING, Iterable, List, Optional, Set, Tuple, Type, Union from pip._vendor.certifi import where -from pip._vendor.packaging.requirements import Requirement from pip._vendor.packaging.version import Version from pip import __file__ as pip_location @@ -20,6 +19,7 @@ from pip._internal.locations import get_platlib, get_purelib, get_scheme from pip._internal.metadata import get_default_environment, get_environment from pip._internal.utils.logging import VERBOSE +from pip._internal.utils.packaging import get_requirement from pip._internal.utils.subprocess import call_subprocess from pip._internal.utils.temp_dir import TempDirectory, tempdir_kinds @@ -184,7 +184,7 @@ def check_requirements( else get_default_environment() ) for req_str in reqs: - req = Requirement(req_str) + req = get_requirement(req_str) # We're explicitly evaluating with an empty extra value, since build # environments are not provided any mechanism to select specific extras. if req.marker is not None and not req.marker.evaluate({"extra": ""}): diff --git a/src/pip/_internal/metadata/importlib/_dists.py b/src/pip/_internal/metadata/importlib/_dists.py index f65ccb1e706..9d3bedd8a10 100644 --- a/src/pip/_internal/metadata/importlib/_dists.py +++ b/src/pip/_internal/metadata/importlib/_dists.py @@ -27,6 +27,7 @@ Wheel, ) from pip._internal.utils.misc import normalize_path +from pip._internal.utils.packaging import get_requirement from pip._internal.utils.temp_dir import TempDirectory from pip._internal.utils.wheel import parse_wheel, read_wheel_metadata_file @@ -219,7 +220,7 @@ def iter_dependencies(self, extras: Collection[str] = ()) -> Iterable[Requiremen for req_string in self.metadata.get_all("Requires-Dist", []): # strip() because email.message.Message.get_all() may return a leading \n # in case a long header was wrapped. - req = Requirement(req_string.strip()) + req = get_requirement(req_string.strip()) if not req.marker: yield req elif not extras and req.marker.evaluate({"extra": ""}): diff --git a/src/pip/_internal/pyproject.py b/src/pip/_internal/pyproject.py index b9b9e3f19d9..2a9cad4803e 100644 --- a/src/pip/_internal/pyproject.py +++ b/src/pip/_internal/pyproject.py @@ -9,13 +9,14 @@ else: from pip._vendor import tomli as tomllib -from pip._vendor.packaging.requirements import InvalidRequirement, Requirement +from pip._vendor.packaging.requirements import InvalidRequirement from pip._internal.exceptions import ( InstallationError, InvalidPyProjectBuildRequires, MissingPyProjectBuildRequires, ) +from pip._internal.utils.packaging import get_requirement def _is_list_of_str(obj: Any) -> bool: @@ -156,7 +157,7 @@ def load_pyproject_toml( # Each requirement must be valid as per PEP 508 for requirement in requires: try: - Requirement(requirement) + get_requirement(requirement) except InvalidRequirement as error: raise InvalidPyProjectBuildRequires( package=req_name, diff --git a/src/pip/_internal/req/constructors.py b/src/pip/_internal/req/constructors.py index b8e170f2a70..d73236e05c6 100644 --- a/src/pip/_internal/req/constructors.py +++ b/src/pip/_internal/req/constructors.py @@ -81,7 +81,7 @@ def _set_requirement_extras(req: Requirement, new_extras: Set[str]) -> Requireme pre is not None and post is not None ), f"regex group selection for requirement {req} failed, this should never happen" extras: str = "[%s]" % ",".join(sorted(new_extras)) if new_extras else "" - return Requirement(f"{pre}{extras}{post}") + return get_requirement(f"{pre}{extras}{post}") def parse_editable(editable_req: str) -> Tuple[Optional[str], str, Set[str]]: @@ -163,7 +163,7 @@ def check_first_requirement_in_file(filename: str) -> None: # If there is a line continuation, drop it, and append the next line. if line.endswith("\\"): line = line[:-2].strip() + next(lines, "") - Requirement(line) + get_requirement(line) return @@ -205,7 +205,7 @@ def parse_req_from_editable(editable_req: str) -> RequirementParts: if name is not None: try: - req: Optional[Requirement] = Requirement(name) + req: Optional[Requirement] = get_requirement(name) except InvalidRequirement as exc: raise InstallationError(f"Invalid requirement: {name!r}: {exc}") else: diff --git a/src/pip/_internal/req/req_install.py b/src/pip/_internal/req/req_install.py index afd0a9e5e82..1a35189621a 100644 --- a/src/pip/_internal/req/req_install.py +++ b/src/pip/_internal/req/req_install.py @@ -52,6 +52,7 @@ redact_auth_from_requirement, redact_auth_from_url, ) +from pip._internal.utils.packaging import get_requirement from pip._internal.utils.subprocess import runner_with_spinner_message from pip._internal.utils.temp_dir import TempDirectory, tempdir_kinds from pip._internal.utils.unpacking import unpack_file @@ -395,7 +396,7 @@ def _set_requirement(self) -> None: else: op = "===" - self.req = Requirement( + self.req = get_requirement( "".join( [ self.metadata["Name"], @@ -421,7 +422,7 @@ def warn_on_mismatching_name(self) -> None: metadata_name, self.name, ) - self.req = Requirement(metadata_name) + self.req = get_requirement(metadata_name) def check_if_exists(self, use_user_site: bool) -> None: """Find an installed distribution that satisfies or conflicts diff --git a/src/pip/_internal/utils/packaging.py b/src/pip/_internal/utils/packaging.py index b9f6af4d174..4b8fa0fe397 100644 --- a/src/pip/_internal/utils/packaging.py +++ b/src/pip/_internal/utils/packaging.py @@ -34,7 +34,7 @@ def check_requires_python( return python_version in requires_python_specifier -@functools.lru_cache(maxsize=512) +@functools.lru_cache(maxsize=2048) def get_requirement(req_string: str) -> Requirement: """Construct a packaging.Requirement object with caching""" # Parsing requirement strings is expensive, and is also expected to happen diff --git a/tests/unit/test_req.py b/tests/unit/test_req.py index dd0af289e7b..3b78ead3fe3 100644 --- a/tests/unit/test_req.py +++ b/tests/unit/test_req.py @@ -770,11 +770,16 @@ def test_install_req_drop_extras(self, inp: str, out: str) -> None: without_extras = install_req_drop_extras(req) assert not without_extras.extras assert str(without_extras.req) == out - # should always be a copy - assert req is not without_extras - assert req.req is not without_extras.req + + # if there are no extras they should be the same object, + # otherwise they may be a copy due to cache + if req.extras: + assert req is not without_extras + assert req.req is not without_extras.req + # comes_from should point to original assert without_extras.comes_from is req + # all else should be the same assert without_extras.link == req.link assert without_extras.markers == req.markers @@ -790,9 +795,9 @@ def test_install_req_drop_extras(self, inp: str, out: str) -> None: @pytest.mark.parametrize( "inp, extras, out", [ - ("pkg", {}, "pkg"), - ("pkg==1.0", {}, "pkg==1.0"), - ("pkg[ext]", {}, "pkg[ext]"), + ("pkg", set(), "pkg"), + ("pkg==1.0", set(), "pkg==1.0"), + ("pkg[ext]", set(), "pkg[ext]"), ("pkg", {"ext"}, "pkg[ext]"), ("pkg==1.0", {"ext"}, "pkg[ext]==1.0"), ("pkg==1.0", {"ext1", "ext2"}, "pkg[ext1,ext2]==1.0"), @@ -816,9 +821,14 @@ def test_install_req_extend_extras( assert str(extended.req) == out assert extended.req is not None assert set(extended.extras) == set(extended.req.extras) - # should always be a copy - assert req is not extended - assert req.req is not extended.req + + # if extras is not a subset of req.extras then the extended + # requirement object should not be the same, otherwise they + # might be a copy due to cache + if not extras.issubset(req.extras): + assert req is not extended + assert req.req is not extended.req + # all else should be the same assert extended.link == req.link assert extended.markers == req.markers