Skip to content

Commit

Permalink
Merge pull request #6699 from cjerdonek/issue-5874-hash-checking
Browse files Browse the repository at this point in the history
Address #5874: Prefer candidates with allowed hashes
  • Loading branch information
cjerdonek authored Jul 14, 2019
2 parents 34621bf + 74504ff commit 2c36f4d
Show file tree
Hide file tree
Showing 7 changed files with 253 additions and 34 deletions.
2 changes: 2 additions & 0 deletions news/5874.feature
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
When choosing candidates to install, prefer candidates with a hash matching
one of the user-provided hashes.
79 changes: 70 additions & 9 deletions src/pip/_internal/index.py
Original file line number Diff line number Diff line change
Expand Up @@ -50,10 +50,11 @@
from pip._internal.req import InstallRequirement
from pip._internal.download import PipSession
from pip._internal.pep425tags import Pep425Tag
from pip._internal.utils.hashes import Hashes

BuildTag = Tuple[Any, ...] # either empty tuple or Tuple[int, str]
CandidateSortingKey = (
Tuple[int, int, _BaseVersion, BuildTag, Optional[int]]
Tuple[int, int, int, _BaseVersion, BuildTag, Optional[int]]
)
HTMLElement = xml.etree.ElementTree.Element
SecureOrigin = Tuple[str, str, Optional[str]]
Expand Down Expand Up @@ -440,6 +441,45 @@ def evaluate_link(self, link):
return (True, version)


def filter_unallowed_hashes(
candidates, # type: List[InstallationCandidate]
hashes, # type: Hashes
):
# type: (...) -> List[InstallationCandidate]
"""
Filter out candidates whose hashes aren't allowed, and return a new
list of candidates.
If at least one candidate has an allowed hash, then all candidates with
either an allowed hash or no hash specified are returned. Otherwise,
the given candidates are returned.
Including the candidates with no hash specified when there is a match
allows a warning to be logged if there is a more preferred candidate
with no hash specified. Returning all candidates in the case of no
matches lets pip report the hash of the candidate that would otherwise
have been installed (e.g. permitting the user to more easily update
their requirements file with the desired hash).
"""
applicable = []
found_allowed_hash = False
for candidate in candidates:
link = candidate.location
if not link.has_hash:
applicable.append(candidate)
continue

if link.is_hash_allowed(hashes=hashes):
found_allowed_hash = True
applicable.append(candidate)

if found_allowed_hash:
return applicable

# Make sure we're not returning back the given value.
return list(candidates)


class CandidatePreferences(object):

"""
Expand Down Expand Up @@ -473,13 +513,15 @@ def create(
target_python=None, # type: Optional[TargetPython]
prefer_binary=False, # type: bool
allow_all_prereleases=False, # type: bool
hashes=None, # type: Optional[Hashes]
):
# type: (...) -> CandidateEvaluator
"""Create a CandidateEvaluator object.
:param target_python: The target Python interpreter to use when
checking compatibility. If None (the default), a TargetPython
object will be constructed from the running Python.
:param hashes: An optional collection of allowed hashes.
"""
if target_python is None:
target_python = TargetPython()
Expand All @@ -490,20 +532,23 @@ def create(
supported_tags=supported_tags,
prefer_binary=prefer_binary,
allow_all_prereleases=allow_all_prereleases,
hashes=hashes,
)

def __init__(
self,
supported_tags, # type: List[Pep425Tag]
prefer_binary=False, # type: bool
allow_all_prereleases=False, # type: bool
hashes=None, # type: Optional[Hashes]
):
# type: (...) -> None
"""
:param supported_tags: The PEP 425 tags supported by the target
Python in order of preference (most preferred first).
"""
self._allow_all_prereleases = allow_all_prereleases
self._hashes = hashes
self._prefer_binary = prefer_binary
self._supported_tags = supported_tags

Expand Down Expand Up @@ -536,7 +581,10 @@ def get_applicable_candidates(
applicable_candidates = [
c for c in candidates if str(c.version) in versions
]
return applicable_candidates

return filter_unallowed_hashes(
candidates=applicable_candidates, hashes=self._hashes,
)

def make_found_candidates(
self,
Expand Down Expand Up @@ -576,8 +624,14 @@ def _sort_key(self, candidate):
The preference is as follows:
First and foremost, yanked candidates (in the sense of PEP 592) are
always less preferred than candidates that haven't been yanked. Then:
First and foremost, candidates with allowed (matching) hashes are
always preferred over candidates without matching hashes. This is
because e.g. if the only candidate with an allowed hash is yanked,
we still want to use that candidate.
Second, excepting hash considerations, candidates that have been
yanked (in the sense of PEP 592) are always less preferred than
candidates that haven't been yanked. Then:
If not finding wheels, they are sorted by version only.
If finding wheels, then the sort order is by version, then:
Expand Down Expand Up @@ -612,9 +666,11 @@ def _sort_key(self, candidate):
build_tag = (int(build_tag_groups[0]), build_tag_groups[1])
else: # sdist
pri = -(support_num)
has_allowed_hash = int(link.is_hash_allowed(self._hashes))
yank_value = -1 * int(link.is_yanked) # -1 for yanked.
return (
yank_value, binary_preference, candidate.version, build_tag, pri,
has_allowed_hash, yank_value, binary_preference, candidate.version,
build_tag, pri,
)

def get_best_candidate(
Expand Down Expand Up @@ -1049,21 +1105,23 @@ def find_all_candidates(self, project_name):
# This is an intentional priority ordering
return file_versions + find_links_versions + page_versions

def make_candidate_evaluator(self):
# type: (...) -> CandidateEvaluator
def make_candidate_evaluator(self, hashes=None):
# type: (Optional[Hashes]) -> CandidateEvaluator
"""Create a CandidateEvaluator object to use.
"""
candidate_prefs = self._candidate_prefs
return CandidateEvaluator.create(
target_python=self._target_python,
prefer_binary=candidate_prefs.prefer_binary,
allow_all_prereleases=candidate_prefs.allow_all_prereleases,
hashes=hashes,
)

def find_candidates(
self,
project_name, # type: str
specifier=None, # type: Optional[specifiers.BaseSpecifier]
hashes=None, # type: Optional[Hashes]
):
# type: (...) -> FoundCandidates
"""Find matches for the given project and specifier.
Expand All @@ -1075,7 +1133,7 @@ def find_candidates(
:return: A `FoundCandidates` instance.
"""
candidates = self.find_all_candidates(project_name)
candidate_evaluator = self.make_candidate_evaluator()
candidate_evaluator = self.make_candidate_evaluator(hashes=hashes)
return candidate_evaluator.make_found_candidates(
candidates, specifier=specifier,
)
Expand All @@ -1088,7 +1146,10 @@ def find_requirement(self, req, upgrade):
Returns a Link if found,
Raises DistributionNotFound or BestVersionAlreadyInstalled otherwise
"""
candidates = self.find_candidates(req.name, req.specifier)
hashes = req.hashes(trust_internet=False)
candidates = self.find_candidates(
req.name, specifier=req.specifier, hashes=hashes,
)
best_candidate = candidates.get_best()

installed_version = None # type: Optional[_BaseVersion]
Expand Down
15 changes: 15 additions & 0 deletions src/pip/_internal/models/link.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@
if MYPY_CHECK_RUNNING:
from typing import Optional, Text, Tuple, Union
from pip._internal.index import HTMLPage
from pip._internal.utils.hashes import Hashes


class Link(KeyBasedCompareMixin):
Expand Down Expand Up @@ -193,3 +194,17 @@ def is_artifact(self):
def is_yanked(self):
# type: () -> bool
return self.yanked_reason is not None

@property
def has_hash(self):
return self.hash_name is not None

def is_hash_allowed(self, hashes):
# type: (Hashes) -> bool
"""
Return True if the link has a hash and it is allowed.
"""
if not self.has_hash:
return False

return hashes.is_hash_allowed(self.hash_name, hex_digest=self.hash)
8 changes: 8 additions & 0 deletions src/pip/_internal/utils/hashes.py
Original file line number Diff line number Diff line change
Expand Up @@ -44,6 +44,14 @@ def __init__(self, hashes=None):
"""
self._allowed = {} if hashes is None else hashes

def is_hash_allowed(
self,
hash_name, # type: str
hex_digest, # type: str
):
"""Return whether the given hex digest is allowed."""
return hex_digest in self._allowed.get(hash_name, [])

def check_against_chunks(self, chunks):
# type: (Iterator[bytes]) -> None
"""Check good hashes against ones built from iterable of chunks of
Expand Down
Loading

0 comments on commit 2c36f4d

Please sign in to comment.