Skip to content

Commit

Permalink
add testing for uncachable marking
Browse files Browse the repository at this point in the history
  • Loading branch information
cosmicexplorer committed Mar 20, 2020
1 parent 08479d4 commit b19ca97
Show file tree
Hide file tree
Showing 2 changed files with 19 additions and 6 deletions.
8 changes: 7 additions & 1 deletion src/pip/_internal/index/collector.py
Original file line number Diff line number Diff line change
Expand Up @@ -335,6 +335,11 @@ def with_cached_html_pages(
fn, # type: Callable[[HTMLPage], Iterable[Link]]
):
# type: (...) -> Callable[[HTMLPage], List[Link]]
"""
Given a function that parses an Iterable[Link] from an HTMLPage, cache the
function's result (keyed by CacheablePageContent), unless the HTMLPage
`page` has `page.uncacheable_links == True`.
"""

@_lru_cache(maxsize=None)
def wrapper(cacheable_page):
Expand All @@ -345,7 +350,6 @@ def wrapper(cacheable_page):
def wrapper_wrapper(page):
# type: (HTMLPage) -> List[Link]
if page.uncacheable_links:
# Avoid caching when requesting pypi indices.
return list(fn(page))
return wrapper(CacheablePageContent(page))

Expand Down Expand Up @@ -639,6 +643,8 @@ def collect_links(self, project_name):
# We want to filter out anything that does not have a secure origin.
url_locations = [
link for link in itertools.chain(
# Mark PyPI indices as "uncacheable" -- this will avoid caching
# the result of parsing the page for links.
(Link(url, uncacheable=True) for url in index_url_loc),
(Link(url) for url in fl_url_loc),
)
Expand Down
17 changes: 12 additions & 5 deletions tests/unit/test_collector.py
Original file line number Diff line number Diff line change
Expand Up @@ -376,25 +376,27 @@ def test_parse_links_caches_same_page_by_url():
)
html_bytes = html.encode('utf-8')

url = 'https://example.com/simple/'

page_1 = HTMLPage(
html_bytes,
encoding=None,
url='https://example.com/simple/',
url=url,
)
# Make a second page with zero content, to ensure that it's not accessed,
# because the page was cached by url.
page_2 = HTMLPage(
b'',
encoding=None,
url='https://example.com/simple/',
url=url,
)
# Make a third page which represents an index url, which should not be
# cached, even for the same url. We modify the page content slightly to
# ensure that the result is not cached.
# verify that the result is not cached.
page_3 = HTMLPage(
re.sub(b'pkg1', b'pkg2', html_bytes),
encoding=None,
url='https://example.com/simple/',
url=url,
uncacheable_links=True,
)

Expand Down Expand Up @@ -576,13 +578,14 @@ def test_fetch_page(self, mock_get_html_response):
fake_response = make_fake_html_response(url)
mock_get_html_response.return_value = fake_response

location = Link(url)
location = Link(url, uncacheable=True)
link_collector = make_test_link_collector()
actual = link_collector.fetch_page(location)

assert actual.content == fake_response.content
assert actual.encoding is None
assert actual.url == url
assert actual.uncacheable_links == location.uncacheable

# Also check that the right session object was passed to
# _get_html_response().
Expand All @@ -607,8 +610,12 @@ def test_collect_links(self, caplog, data):

assert len(actual.find_links) == 1
check_links_include(actual.find_links, names=['packages'])
# Check that find-links URLs are not marked as cacheable.
assert not actual.find_links[0].uncacheable

assert actual.project_urls == [Link('https://pypi.org/simple/twine/')]
# Check that index URLs are always marked as uncacheable.
assert actual.project_urls[0].uncacheable

expected_message = dedent("""\
1 location(s) to search for versions of twine:
Expand Down

0 comments on commit b19ca97

Please sign in to comment.