From bf5f4008e7fe421508950763a0101e78a340f33a Mon Sep 17 00:00:00 2001 From: Jon Dufresne Date: Sun, 8 Aug 2021 07:57:06 -0700 Subject: [PATCH 1/2] Replace vendored html5lib with stdlib The html5lib library isn't strictly required as the same functionality can be achieved through the stdlib html.parser module. The html5lib is one of the largest uses of the six library. By dropping this unnecessary dependency, the pip project is closer to dropping the six library. Additionally, html5lib maintenance has slowed down and the project has rejected pull requests to drop Python 2 support. For now, the html5lib code remains, but is gated behind a command line option: `--use-deprecated=html5lib`. After a sufficient amount of time has passed without any reported bugs, the vendored library and this flag can be removed completely. --- news/10291.feature.rst | 5 + src/pip/_internal/cli/cmdoptions.py | 7 +- src/pip/_internal/cli/req_command.py | 1 + src/pip/_internal/commands/index.py | 1 + src/pip/_internal/commands/list.py | 1 + src/pip/_internal/index/collector.py | 117 +++++++++++++++--- src/pip/_internal/index/package_finder.py | 7 +- src/pip/_internal/self_outdated_check.py | 3 + .../datarequire/fakepackage/index.html | 1 + tests/data/indexes/dev/bar/index.html | 1 + tests/data/indexes/in dex/simple/index.html | 1 + tests/data/indexes/pre/bar/index.html | 1 + tests/data/indexes/simple/simple/index.html | 1 + tests/data/indexes/yanked/simple/index.html | 1 + .../data/indexes/yanked_all/simple/index.html | 1 + tests/data/packages3/dinner/index.html | 1 + tests/data/packages3/index.html | 1 + tests/data/packages3/requiredinner/index.html | 1 + tests/functional/test_build_env.py | 1 + tests/functional/test_new_resolver_hashes.py | 1 + tests/lib/__init__.py | 2 + tests/unit/resolution_resolvelib/conftest.py | 2 +- tests/unit/test_collector.py | 17 ++- tests/unit/test_finder.py | 19 ++- tests/unit/test_index.py | 9 ++ tests/unit/test_self_check_outdated.py | 1 + 26 files changed, 176 insertions(+), 28 deletions(-) create mode 100644 news/10291.feature.rst diff --git a/news/10291.feature.rst b/news/10291.feature.rst new file mode 100644 index 00000000000..7291b629124 --- /dev/null +++ b/news/10291.feature.rst @@ -0,0 +1,5 @@ +Changed ``PackageFinder`` to parse HTML documents using the stdlib +:class:`html.parser.HTMLParser` class instead of the ``html5lib`` package. For +now, the deprecated ``html5lib`` code remains and can be used with the +``--use-deprecated=html5lib`` command line option, but it will be removed in a +future pip release. diff --git a/src/pip/_internal/cli/cmdoptions.py b/src/pip/_internal/cli/cmdoptions.py index e9806fd79d0..71b1d190691 100644 --- a/src/pip/_internal/cli/cmdoptions.py +++ b/src/pip/_internal/cli/cmdoptions.py @@ -964,7 +964,12 @@ def check_list_path_option(options: Values) -> None: metavar="feature", action="append", default=[], - choices=["legacy-resolver", "out-of-tree-build", "backtrack-on-build-failures"], + choices=[ + "legacy-resolver", + "out-of-tree-build", + "backtrack-on-build-failures", + "html5lib", + ], help=("Enable deprecated functionality, that will be removed in the future."), ) diff --git a/src/pip/_internal/cli/req_command.py b/src/pip/_internal/cli/req_command.py index 8dc00e32826..5d4d1f0f45b 100644 --- a/src/pip/_internal/cli/req_command.py +++ b/src/pip/_internal/cli/req_command.py @@ -502,4 +502,5 @@ def _build_package_finder( link_collector=link_collector, selection_prefs=selection_prefs, target_python=target_python, + use_deprecated_html5lib="html5lib" in options.deprecated_features_enabled, ) diff --git a/src/pip/_internal/commands/index.py b/src/pip/_internal/commands/index.py index b4bf0ac06e1..9d8aae3b542 100644 --- a/src/pip/_internal/commands/index.py +++ b/src/pip/_internal/commands/index.py @@ -97,6 +97,7 @@ def _build_package_finder( link_collector=link_collector, selection_prefs=selection_prefs, target_python=target_python, + use_deprecated_html5lib="html5lib" in options.deprecated_features_enabled, ) def get_available_package_versions(self, options: Values, args: List[Any]) -> None: diff --git a/src/pip/_internal/commands/list.py b/src/pip/_internal/commands/list.py index adac51058ce..57f05e00829 100644 --- a/src/pip/_internal/commands/list.py +++ b/src/pip/_internal/commands/list.py @@ -149,6 +149,7 @@ def _build_package_finder( return PackageFinder.create( link_collector=link_collector, selection_prefs=selection_prefs, + use_deprecated_html5lib="html5lib" in options.deprecated_features_enabled, ) def run(self, options: Values, args: List[str]) -> int: diff --git a/src/pip/_internal/index/collector.py b/src/pip/_internal/index/collector.py index d9412234eed..b2d7bec064a 100644 --- a/src/pip/_internal/index/collector.py +++ b/src/pip/_internal/index/collector.py @@ -12,15 +12,20 @@ import urllib.parse import urllib.request import xml.etree.ElementTree +from html.parser import HTMLParser from optparse import Values from typing import ( + TYPE_CHECKING, + Any, Callable, + Dict, Iterable, List, MutableMapping, NamedTuple, Optional, Sequence, + Tuple, Union, ) @@ -39,6 +44,11 @@ from .sources import CandidatesFromPage, LinkSource, build_source +if TYPE_CHECKING: + from typing import Protocol +else: + Protocol = object + logger = logging.getLogger(__name__) HTMLElement = xml.etree.ElementTree.Element @@ -163,6 +173,8 @@ def _determine_base_url(document: HTMLElement, page_url: str) -> str: :param document: An HTML document representation. The current implementation expects the result of ``html5lib.parse()``. :param page_url: The URL of the HTML document. + + TODO: Remove when `html5lib` is dropped. """ for base in document.findall(".//base"): href = base.get("href") @@ -234,20 +246,20 @@ def _clean_link(url: str) -> str: def _create_link_from_element( - anchor: HTMLElement, + element_attribs: Dict[str, Optional[str]], page_url: str, base_url: str, ) -> Optional[Link]: """ - Convert an anchor element in a simple repository page to a Link. + Convert an anchor element's attributes in a simple repository page to a Link. """ - href = anchor.get("href") + href = element_attribs.get("href") if not href: return None url = _clean_link(urllib.parse.urljoin(base_url, href)) - pyrequire = anchor.get("data-requires-python") - yanked_reason = anchor.get("data-yanked") + pyrequire = element_attribs.get("data-requires-python") + yanked_reason = element_attribs.get("data-yanked") link = Link( url, @@ -271,9 +283,14 @@ def __hash__(self) -> int: return hash(self.page.url) -def with_cached_html_pages( - fn: Callable[["HTMLPage"], Iterable[Link]], -) -> Callable[["HTMLPage"], List[Link]]: +class ParseLinks(Protocol): + def __call__( + self, page: "HTMLPage", use_deprecated_html5lib: bool + ) -> Iterable[Link]: + ... + + +def with_cached_html_pages(fn: ParseLinks) -> ParseLinks: """ Given a function that parses an Iterable[Link] from an HTMLPage, cache the function's result (keyed by CacheablePageContent), unless the HTMLPage @@ -281,22 +298,25 @@ def with_cached_html_pages( """ @functools.lru_cache(maxsize=None) - def wrapper(cacheable_page: CacheablePageContent) -> List[Link]: - return list(fn(cacheable_page.page)) + def wrapper( + cacheable_page: CacheablePageContent, use_deprecated_html5lib: bool + ) -> List[Link]: + return list(fn(cacheable_page.page, use_deprecated_html5lib)) @functools.wraps(fn) - def wrapper_wrapper(page: "HTMLPage") -> List[Link]: + def wrapper_wrapper(page: "HTMLPage", use_deprecated_html5lib: bool) -> List[Link]: if page.cache_link_parsing: - return wrapper(CacheablePageContent(page)) - return list(fn(page)) + return wrapper(CacheablePageContent(page), use_deprecated_html5lib) + return list(fn(page, use_deprecated_html5lib)) return wrapper_wrapper -@with_cached_html_pages -def parse_links(page: "HTMLPage") -> Iterable[Link]: +def _parse_links_html5lib(page: "HTMLPage") -> Iterable[Link]: """ Parse an HTML document, and yield its anchor elements as Link objects. + + TODO: Remove when `html5lib` is dropped. """ document = html5lib.parse( page.content, @@ -307,6 +327,31 @@ def parse_links(page: "HTMLPage") -> Iterable[Link]: url = page.url base_url = _determine_base_url(document, url) for anchor in document.findall(".//a"): + link = _create_link_from_element( + anchor.attrib, + page_url=url, + base_url=base_url, + ) + if link is None: + continue + yield link + + +@with_cached_html_pages +def parse_links(page: "HTMLPage", use_deprecated_html5lib: bool) -> Iterable[Link]: + """ + Parse an HTML document, and yield its anchor elements as Link objects. + """ + if use_deprecated_html5lib: + return _parse_links_html5lib(page) + + parser = HTMLLinkParser() + encoding = page.encoding or "utf-8" + parser.feed(page.content.decode(encoding)) + + url = page.url + base_url = parser.base_url or url + for anchor in parser.anchors: link = _create_link_from_element( anchor, page_url=url, @@ -343,6 +388,48 @@ def __str__(self) -> str: return redact_auth_from_url(self.url) +class HTMLLinkParser(HTMLParser): + """ + HTMLParser that keeps the first base HREF and a list of all anchor + elements' attributes. + """ + + def __init__(self, *args: Any, **kwargs: Any) -> None: + super().__init__(*args, **kwargs) + self._seen_decl = False + self.base_url: Optional[str] = None + self.anchors: List[Dict[str, Optional[str]]] = [] + + def handle_decl(self, decl: str) -> None: + if decl != "DOCTYPE html": + self._raise_error() + self._seen_decl = True + + def handle_starttag(self, tag: str, attrs: List[Tuple[str, Optional[str]]]) -> None: + if not self._seen_decl: + self._raise_error() + + if tag == "base" and self.base_url is None: + href = self.get_href(attrs) + if href is not None: + self.base_url = href + elif tag == "a": + self.anchors.append(dict(attrs)) + + def get_href(self, attrs: List[Tuple[str, Optional[str]]]) -> Optional[str]: + for name, value in attrs: + if name == "href": + return value + return None + + def _raise_error(self) -> None: + raise ValueError( + "HTML doctype missing or incorrect. Expected .\n\n" + "If you believe this error to be incorrect, try passing the " + "command line option --use-deprecated=html5lib." + ) + + def _handle_get_page_fail( link: Link, reason: Union[str, Exception], diff --git a/src/pip/_internal/index/package_finder.py b/src/pip/_internal/index/package_finder.py index 86b7b65dd8a..223d06df67e 100644 --- a/src/pip/_internal/index/package_finder.py +++ b/src/pip/_internal/index/package_finder.py @@ -580,6 +580,7 @@ def __init__( link_collector: LinkCollector, target_python: TargetPython, allow_yanked: bool, + use_deprecated_html5lib: bool, format_control: Optional[FormatControl] = None, candidate_prefs: Optional[CandidatePreferences] = None, ignore_requires_python: Optional[bool] = None, @@ -604,6 +605,7 @@ def __init__( self._ignore_requires_python = ignore_requires_python self._link_collector = link_collector self._target_python = target_python + self._use_deprecated_html5lib = use_deprecated_html5lib self.format_control = format_control @@ -620,6 +622,8 @@ def create( link_collector: LinkCollector, selection_prefs: SelectionPreferences, target_python: Optional[TargetPython] = None, + *, + use_deprecated_html5lib: bool, ) -> "PackageFinder": """Create a PackageFinder. @@ -644,6 +648,7 @@ def create( allow_yanked=selection_prefs.allow_yanked, format_control=selection_prefs.format_control, ignore_requires_python=selection_prefs.ignore_requires_python, + use_deprecated_html5lib=use_deprecated_html5lib, ) @property @@ -765,7 +770,7 @@ def process_project_url( if html_page is None: return [] - page_links = list(parse_links(html_page)) + page_links = list(parse_links(html_page, self._use_deprecated_html5lib)) with indent_log(): package_links = self.evaluate_links( diff --git a/src/pip/_internal/self_outdated_check.py b/src/pip/_internal/self_outdated_check.py index 19a5f280f60..7300e0ea4c0 100644 --- a/src/pip/_internal/self_outdated_check.py +++ b/src/pip/_internal/self_outdated_check.py @@ -141,6 +141,9 @@ def pip_self_version_check(session: PipSession, options: optparse.Values) -> Non finder = PackageFinder.create( link_collector=link_collector, selection_prefs=selection_prefs, + use_deprecated_html5lib=( + "html5lib" in options.deprecated_features_enabled + ), ) best_candidate = finder.find_best_candidate("pip").best_candidate if best_candidate is None: diff --git a/tests/data/indexes/datarequire/fakepackage/index.html b/tests/data/indexes/datarequire/fakepackage/index.html index 0ca8b9dc3a2..25bf4aa21d5 100644 --- a/tests/data/indexes/datarequire/fakepackage/index.html +++ b/tests/data/indexes/datarequire/fakepackage/index.html @@ -1,3 +1,4 @@ + Links for fakepackage

Links for fakepackage

fakepackage-1.0.0.tar.gz
fakepackage-2.6.0.tar.gz
diff --git a/tests/data/indexes/dev/bar/index.html b/tests/data/indexes/dev/bar/index.html index bcee309212c..c0da6561310 100644 --- a/tests/data/indexes/dev/bar/index.html +++ b/tests/data/indexes/dev/bar/index.html @@ -1,3 +1,4 @@ + bar-1.0.tar.gz diff --git a/tests/data/indexes/in dex/simple/index.html b/tests/data/indexes/in dex/simple/index.html index dba6cc3ebd6..cb078ea7b19 100644 --- a/tests/data/indexes/in dex/simple/index.html +++ b/tests/data/indexes/in dex/simple/index.html @@ -1,3 +1,4 @@ + simple-1.0.tar.gz diff --git a/tests/data/indexes/pre/bar/index.html b/tests/data/indexes/pre/bar/index.html index c50d88bc863..da76454f604 100644 --- a/tests/data/indexes/pre/bar/index.html +++ b/tests/data/indexes/pre/bar/index.html @@ -1,3 +1,4 @@ + bar-1.0.tar.gz diff --git a/tests/data/indexes/simple/simple/index.html b/tests/data/indexes/simple/simple/index.html index dba6cc3ebd6..cb078ea7b19 100644 --- a/tests/data/indexes/simple/simple/index.html +++ b/tests/data/indexes/simple/simple/index.html @@ -1,3 +1,4 @@ + simple-1.0.tar.gz diff --git a/tests/data/indexes/yanked/simple/index.html b/tests/data/indexes/yanked/simple/index.html index bf4994310be..14181a3a0ad 100644 --- a/tests/data/indexes/yanked/simple/index.html +++ b/tests/data/indexes/yanked/simple/index.html @@ -1,3 +1,4 @@ + simple-1.0.tar.gz diff --git a/tests/data/indexes/yanked_all/simple/index.html b/tests/data/indexes/yanked_all/simple/index.html index 732340baa45..060f9904465 100644 --- a/tests/data/indexes/yanked_all/simple/index.html +++ b/tests/data/indexes/yanked_all/simple/index.html @@ -1,3 +1,4 @@ + simple-1.0.tar.gz diff --git a/tests/data/packages3/dinner/index.html b/tests/data/packages3/dinner/index.html index e258eb16b40..52a16b11686 100644 --- a/tests/data/packages3/dinner/index.html +++ b/tests/data/packages3/dinner/index.html @@ -1,3 +1,4 @@ + PyPI Mirror

PyPI Mirror

diff --git a/tests/data/packages3/index.html b/tests/data/packages3/index.html index d66e70ec631..262207b6a62 100644 --- a/tests/data/packages3/index.html +++ b/tests/data/packages3/index.html @@ -1,3 +1,4 @@ + PyPI Mirror

PyPI Mirror

diff --git a/tests/data/packages3/requiredinner/index.html b/tests/data/packages3/requiredinner/index.html index 0981c9c7246..52a4e66673c 100644 --- a/tests/data/packages3/requiredinner/index.html +++ b/tests/data/packages3/requiredinner/index.html @@ -1,3 +1,4 @@ + PyPI Mirror

PyPI Mirror

diff --git a/tests/functional/test_build_env.py b/tests/functional/test_build_env.py index d114e8d2359..285f21fda89 100644 --- a/tests/functional/test_build_env.py +++ b/tests/functional/test_build_env.py @@ -48,6 +48,7 @@ def run_with_build_env( finder = PackageFinder.create( link_collector=link_collector, selection_prefs=selection_prefs, + use_deprecated_html5lib=False, ) with global_tempdir_manager(): diff --git a/tests/functional/test_new_resolver_hashes.py b/tests/functional/test_new_resolver_hashes.py index 39c1d012c65..4c4c2253e99 100644 --- a/tests/functional/test_new_resolver_hashes.py +++ b/tests/functional/test_new_resolver_hashes.py @@ -26,6 +26,7 @@ def _create_find_links(script: PipTestEnvironment) -> _FindLinks: index_html = script.scratch_path / "index.html" index_html.write_text( """ + {sdist_path.stem} {wheel_path.stem} """.format( diff --git a/tests/lib/__init__.py b/tests/lib/__init__.py index 06849d2d705..cb6364ebbb7 100644 --- a/tests/lib/__init__.py +++ b/tests/lib/__init__.py @@ -141,6 +141,7 @@ def make_test_finder( allow_all_prereleases: bool = False, session: Optional[PipSession] = None, target_python: Optional[TargetPython] = None, + use_deprecated_html5lib: bool = False, ) -> PackageFinder: """ Create a PackageFinder for testing purposes. @@ -159,6 +160,7 @@ def make_test_finder( link_collector=link_collector, selection_prefs=selection_prefs, target_python=target_python, + use_deprecated_html5lib=use_deprecated_html5lib, ) diff --git a/tests/unit/resolution_resolvelib/conftest.py b/tests/unit/resolution_resolvelib/conftest.py index cfd440570e6..545ed7c3506 100644 --- a/tests/unit/resolution_resolvelib/conftest.py +++ b/tests/unit/resolution_resolvelib/conftest.py @@ -26,7 +26,7 @@ def finder(data: TestData) -> Iterator[PackageFinder]: scope = SearchScope([str(data.packages)], []) collector = LinkCollector(session, scope) prefs = SelectionPreferences(allow_yanked=False) - finder = PackageFinder.create(collector, prefs) + finder = PackageFinder.create(collector, prefs, use_deprecated_html5lib=False) yield finder diff --git a/tests/unit/test_collector.py b/tests/unit/test_collector.py index 8b60c302915..2225a32bf45 100644 --- a/tests/unit/test_collector.py +++ b/tests/unit/test_collector.py @@ -421,7 +421,11 @@ def test_clean_link(url: str, clean_url: str) -> None: def _test_parse_links_data_attribute( anchor_html: str, attr: str, expected: Optional[str] ) -> None: - html = f'{anchor_html}' + html = ( + "" + '' + "{}" + ).format(anchor_html) html_bytes = html.encode("utf-8") page = HTMLPage( html_bytes, @@ -430,7 +434,7 @@ def _test_parse_links_data_attribute( # the page content isn't cached. url=f"https://example.com/simple-{uuid.uuid4()}/", ) - links = list(parse_links(page)) + links = list(parse_links(page, use_deprecated_html5lib=False)) (link,) = links actual = getattr(link, attr) assert actual == expected @@ -467,7 +471,7 @@ def test_parse_links__requires_python( # Test not present. ('', None), # Test present with no value. - ('', ""), + ('', None), # Test the empty string. ('', ""), # Test a non-empty string. @@ -492,6 +496,7 @@ def test_parse_links__yanked_reason(anchor_html: str, expected: Optional[str]) - def test_parse_links_caches_same_page_by_url() -> None: html = ( + "" '' '' ) @@ -521,14 +526,14 @@ def test_parse_links_caches_same_page_by_url() -> None: cache_link_parsing=False, ) - parsed_links_1 = list(parse_links(page_1)) + parsed_links_1 = list(parse_links(page_1, use_deprecated_html5lib=False)) assert len(parsed_links_1) == 1 assert "pkg1" in parsed_links_1[0].url - parsed_links_2 = list(parse_links(page_2)) + parsed_links_2 = list(parse_links(page_2, use_deprecated_html5lib=False)) assert parsed_links_2 == parsed_links_1 - parsed_links_3 = list(parse_links(page_3)) + parsed_links_3 = list(parse_links(page_3, use_deprecated_html5lib=False)) assert len(parsed_links_3) == 1 assert parsed_links_3 != parsed_links_1 assert "pkg2" in parsed_links_3[0].url diff --git a/tests/unit/test_finder.py b/tests/unit/test_finder.py index f8646da4f5f..deff295828d 100644 --- a/tests/unit/test_finder.py +++ b/tests/unit/test_finder.py @@ -79,7 +79,10 @@ def test_incorrect_case_file_index(data: TestData) -> None: @pytest.mark.network -def test_finder_detects_latest_already_satisfied_find_links(data: TestData) -> None: +@pytest.mark.parametrize("use_deprecated_html5lib", [False, True]) +def test_finder_detects_latest_already_satisfied_find_links( + data: TestData, use_deprecated_html5lib: bool +) -> None: """Test PackageFinder detects latest already satisfied using find-links""" req = install_req_from_line("simple", None) # the latest simple in local pkgs is 3.0 @@ -89,14 +92,19 @@ def test_finder_detects_latest_already_satisfied_find_links(data: TestData) -> N version=parse_version(latest_version), ) req.satisfied_by = satisfied_by - finder = make_test_finder(find_links=[data.find_links]) + finder = make_test_finder( + find_links=[data.find_links], use_deprecated_html5lib=use_deprecated_html5lib + ) with pytest.raises(BestVersionAlreadyInstalled): finder.find_requirement(req, True) @pytest.mark.network -def test_finder_detects_latest_already_satisfied_pypi_links() -> None: +@pytest.mark.parametrize("use_deprecated_html5lib", [False, True]) +def test_finder_detects_latest_already_satisfied_pypi_links( + use_deprecated_html5lib: bool, +) -> None: """Test PackageFinder detects latest already satisfied using pypi links""" req = install_req_from_line("initools", None) # the latest initools on PyPI is 0.3.1 @@ -106,7 +114,10 @@ def test_finder_detects_latest_already_satisfied_pypi_links() -> None: version=parse_version(latest_version), ) req.satisfied_by = satisfied_by - finder = make_test_finder(index_urls=["http://pypi.org/simple/"]) + finder = make_test_finder( + index_urls=["http://pypi.org/simple/"], + use_deprecated_html5lib=use_deprecated_html5lib, + ) with pytest.raises(BestVersionAlreadyInstalled): finder.find_requirement(req, True) diff --git a/tests/unit/test_index.py b/tests/unit/test_index.py index 39106f63b23..fa98f28c89c 100644 --- a/tests/unit/test_index.py +++ b/tests/unit/test_index.py @@ -575,6 +575,7 @@ def test_create__candidate_prefs( finder = PackageFinder.create( link_collector=link_collector, selection_prefs=selection_prefs, + use_deprecated_html5lib=False, ) candidate_prefs = finder._candidate_prefs assert candidate_prefs.allow_all_prereleases == allow_all_prereleases @@ -591,6 +592,7 @@ def test_create__link_collector(self) -> None: finder = PackageFinder.create( link_collector=link_collector, selection_prefs=SelectionPreferences(allow_yanked=True), + use_deprecated_html5lib=False, ) assert finder._link_collector is link_collector @@ -608,6 +610,7 @@ def test_create__target_python(self) -> None: link_collector=link_collector, selection_prefs=SelectionPreferences(allow_yanked=True), target_python=target_python, + use_deprecated_html5lib=False, ) actual_target_python = finder._target_python # The target_python attribute should be set as is. @@ -627,6 +630,7 @@ def test_create__target_python_none(self) -> None: link_collector=link_collector, selection_prefs=SelectionPreferences(allow_yanked=True), target_python=None, + use_deprecated_html5lib=False, ) # Spot-check the default TargetPython object. actual_target_python = finder._target_python @@ -646,6 +650,7 @@ def test_create__allow_yanked(self, allow_yanked: bool) -> None: finder = PackageFinder.create( link_collector=link_collector, selection_prefs=selection_prefs, + use_deprecated_html5lib=False, ) assert finder._allow_yanked == allow_yanked @@ -665,6 +670,7 @@ def test_create__ignore_requires_python(self, ignore_requires_python: bool) -> N finder = PackageFinder.create( link_collector=link_collector, selection_prefs=selection_prefs, + use_deprecated_html5lib=False, ) assert finder._ignore_requires_python == ignore_requires_python @@ -684,6 +690,7 @@ def test_create__format_control(self) -> None: finder = PackageFinder.create( link_collector=link_collector, selection_prefs=selection_prefs, + use_deprecated_html5lib=False, ) actual_format_control = finder.format_control assert actual_format_control is format_control @@ -724,6 +731,7 @@ def test_make_link_evaluator( allow_yanked=allow_yanked, format_control=format_control, ignore_requires_python=ignore_requires_python, + use_deprecated_html5lib=False, ) # Pass a project_name that will be different from canonical_name. @@ -772,6 +780,7 @@ def test_make_candidate_evaluator( target_python=target_python, allow_yanked=True, candidate_prefs=candidate_prefs, + use_deprecated_html5lib=False, ) specifier = SpecifierSet() diff --git a/tests/unit/test_self_check_outdated.py b/tests/unit/test_self_check_outdated.py index 22214fbcfbd..d313f3fd019 100644 --- a/tests/unit/test_self_check_outdated.py +++ b/tests/unit/test_self_check_outdated.py @@ -84,6 +84,7 @@ def _options() -> mock.Mock: no_index=False, pre=False, cache_dir="", + deprecated_features_enabled=[], ) From 20fe83fe95947c8292e39a5d56c3e196b91f13c8 Mon Sep 17 00:00:00 2001 From: Jon Dufresne Date: Tue, 25 Jan 2022 16:24:08 -0800 Subject: [PATCH 2/2] Point users to https://github.com/pypa/pip/issues/10825 --- src/pip/_internal/index/collector.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/pip/_internal/index/collector.py b/src/pip/_internal/index/collector.py index b2d7bec064a..c30c37661f5 100644 --- a/src/pip/_internal/index/collector.py +++ b/src/pip/_internal/index/collector.py @@ -426,7 +426,8 @@ def _raise_error(self) -> None: raise ValueError( "HTML doctype missing or incorrect. Expected .\n\n" "If you believe this error to be incorrect, try passing the " - "command line option --use-deprecated=html5lib." + "command line option --use-deprecated=html5lib and please leave " + "a comment on the pip issue at https://github.com/pypa/pip/issues/10825." )