add testing for uncachable marking

pypa · Mar 20, 2020 · b19ca97 · b19ca97
1 parent 08479d4
commit b19ca97
Show file tree

Hide file tree

Showing 2 changed files with 19 additions and 6 deletions.
diff --git a/src/pip/_internal/index/collector.py b/src/pip/_internal/index/collector.py
@@ -335,6 +335,11 @@ def with_cached_html_pages(
     fn,    # type: Callable[[HTMLPage], Iterable[Link]]
 ):
     # type: (...) -> Callable[[HTMLPage], List[Link]]
+    """
+    Given a function that parses an Iterable[Link] from an HTMLPage, cache the
+    function's result (keyed by CacheablePageContent), unless the HTMLPage
+    `page` has `page.uncacheable_links == True`.
+    """
 
     @_lru_cache(maxsize=None)
     def wrapper(cacheable_page):
@@ -345,7 +350,6 @@ def wrapper(cacheable_page):
     def wrapper_wrapper(page):
         # type: (HTMLPage) -> List[Link]
         if page.uncacheable_links:
-            # Avoid caching when requesting pypi indices.
             return list(fn(page))
         return wrapper(CacheablePageContent(page))
 
@@ -639,6 +643,8 @@ def collect_links(self, project_name):
         # We want to filter out anything that does not have a secure origin.
         url_locations = [
             link for link in itertools.chain(
+                # Mark PyPI indices as "uncacheable" -- this will avoid caching
+                # the result of parsing the page for links.
                 (Link(url, uncacheable=True) for url in index_url_loc),
                 (Link(url) for url in fl_url_loc),
             )

diff --git a/tests/unit/test_collector.py b/tests/unit/test_collector.py
@@ -376,25 +376,27 @@ def test_parse_links_caches_same_page_by_url():
     )
     html_bytes = html.encode('utf-8')
 
+    url = 'https://example.com/simple/'
+
     page_1 = HTMLPage(
         html_bytes,
         encoding=None,
-        url='https://example.com/simple/',
+        url=url,
     )
     # Make a second page with zero content, to ensure that it's not accessed,
     # because the page was cached by url.
     page_2 = HTMLPage(
         b'',
         encoding=None,
-        url='https://example.com/simple/',
+        url=url,
     )
     # Make a third page which represents an index url, which should not be
     # cached, even for the same url. We modify the page content slightly to
-    # ensure that the result is not cached.
+    # verify that the result is not cached.
     page_3 = HTMLPage(
         re.sub(b'pkg1', b'pkg2', html_bytes),
         encoding=None,
-        url='https://example.com/simple/',
+        url=url,
         uncacheable_links=True,
     )
 
@@ -576,13 +578,14 @@ def test_fetch_page(self, mock_get_html_response):
         fake_response = make_fake_html_response(url)
         mock_get_html_response.return_value = fake_response
 
-        location = Link(url)
+        location = Link(url, uncacheable=True)
         link_collector = make_test_link_collector()
         actual = link_collector.fetch_page(location)
 
         assert actual.content == fake_response.content
         assert actual.encoding is None
         assert actual.url == url
+        assert actual.uncacheable_links == location.uncacheable
 
         # Also check that the right session object was passed to
         # _get_html_response().
@@ -607,8 +610,12 @@ def test_collect_links(self, caplog, data):
 
         assert len(actual.find_links) == 1
         check_links_include(actual.find_links, names=['packages'])
+        # Check that find-links URLs are not marked as cacheable.
+        assert not actual.find_links[0].uncacheable
 
         assert actual.project_urls == [Link('https://pypi.org/simple/twine/')]
+        # Check that index URLs are always marked as uncacheable.
+        assert actual.project_urls[0].uncacheable
 
         expected_message = dedent("""\
         1 location(s) to search for versions of twine: