diff --git a/CHANGES/5992.feature b/CHANGES/5992.feature new file mode 100644 index 00000000000..5667d2de24b --- /dev/null +++ b/CHANGES/5992.feature @@ -0,0 +1,3 @@ +Added support for HTTPS proxies to the extent CPython's +:py:mod:`asyncio` supports it -- by :user:`bmbouter`, +:user:`jborean93` and :user:`webknjaz`. diff --git a/CONTRIBUTORS.txt b/CONTRIBUTORS.txt index 8669ccd3b2c..45d9a1a5fb5 100644 --- a/CONTRIBUTORS.txt +++ b/CONTRIBUTORS.txt @@ -57,6 +57,7 @@ Boris Feld Borys Vorona Boyi Chen Brett Cannon +Brian Bouterse Brian C. Lane Brian Muller Bruce Merry @@ -165,6 +166,7 @@ Jonas Obrist Jonathan Wright Jonny Tan Joongi Kim +Jordan Borean Josep Cugat Josh Junon Joshu Coats diff --git a/aiohttp/client_reqrep.py b/aiohttp/client_reqrep.py index c63b73bcdf8..8c64db600e6 100644 --- a/aiohttp/client_reqrep.py +++ b/aiohttp/client_reqrep.py @@ -481,8 +481,6 @@ def update_proxy( proxy_auth: Optional[BasicAuth], proxy_headers: Optional[LooseHeaders], ) -> None: - if proxy and not proxy.scheme == "http": - raise ValueError("Only http proxies are supported") if proxy_auth and not isinstance(proxy_auth, helpers.BasicAuth): raise ValueError("proxy_auth must be None or BasicAuth() tuple") self.proxy = proxy diff --git a/aiohttp/connector.py b/aiohttp/connector.py index ab5124966b5..c1b0f89ce16 100644 --- a/aiohttp/connector.py +++ b/aiohttp/connector.py @@ -951,6 +951,100 @@ async def _wrap_create_connection( except OSError as exc: raise client_error(req.connection_key, exc) from exc + def _warn_about_tls_in_tls( + self, + underlying_transport: asyncio.Transport, + req: "ClientRequest", + ) -> None: + """Issue a warning if the requested URL has HTTPS scheme.""" + if req.request_info.url.scheme != "https": + return + + asyncio_supports_tls_in_tls = getattr( + underlying_transport, + "_start_tls_compatible", + False, + ) + + if asyncio_supports_tls_in_tls: + return + + warnings.warn( + "An HTTPS request is being sent through an HTTPS proxy. " + "This support for TLS in TLS is known to be disabled " + "in the stdlib asyncio. This is why you'll probably see " + "an error in the log below.\n\n" + "It is possible to enable it via monkeypatching under " + "Python 3.7 or higher. For more details, see:\n" + "* https://bugs.python.org/issue37179\n" + "* https://github.com/python/cpython/pull/28073\n\n" + "You can temporarily patch this as follows:\n" + "* https://docs.aiohttp.org/en/stable/client_advanced.html#proxy-support\n" + "* https://github.com/aio-libs/aiohttp/discussions/6044\n", + RuntimeWarning, + source=self, + # Why `4`? At least 3 of the calls in the stack originate + # from the methods in this class. + stacklevel=3, + ) + + async def _start_tls_connection( + self, + underlying_transport: asyncio.Transport, + req: "ClientRequest", + timeout: "ClientTimeout", + client_error: Type[Exception] = ClientConnectorError, + ) -> Tuple[asyncio.BaseTransport, ResponseHandler]: + """Wrap the raw TCP transport with TLS.""" + tls_proto = self._factory() # Create a brand new proto for TLS + + # Safety of the `cast()` call here is based on the fact that + # internally `_get_ssl_context()` only returns `None` when + # `req.is_ssl()` evaluates to `False` which is never gonna happen + # in this code path. Of course, it's rather fragile + # maintainability-wise but this is to be solved separately. + sslcontext = cast(ssl.SSLContext, self._get_ssl_context(req)) + + try: + async with ceil_timeout(timeout.sock_connect): + try: + tls_transport = await self._loop.start_tls( + underlying_transport, + tls_proto, + sslcontext, + server_hostname=req.host, + ssl_handshake_timeout=timeout.total, + ) + except BaseException: + # We need to close the underlying transport since + # `start_tls()` probably failed before it had a + # chance to do this: + underlying_transport.close() + raise + except cert_errors as exc: + raise ClientConnectorCertificateError(req.connection_key, exc) from exc + except ssl_errors as exc: + raise ClientConnectorSSLError(req.connection_key, exc) from exc + except OSError as exc: + raise client_error(req.connection_key, exc) from exc + except TypeError as type_err: + # Example cause looks like this: + # TypeError: transport is not supported by start_tls() + + raise ClientConnectionError( + "Cannot initialize a TLS-in-TLS connection to host " + f"{req.host!s}:{req.port:d} through an underlying connection " + f"to an HTTPS proxy {req.proxy!s} ssl:{req.ssl or 'default'} " + f"[{type_err!s}]" + ) from type_err + else: + tls_proto.connection_made( + tls_transport + ) # Kick the state machine of the new TLS protocol + + return tls_transport, tls_proto + async def _create_direct_connection( self, req: "ClientRequest", @@ -1028,7 +1122,7 @@ def drop_exception(fut: "asyncio.Future[List[Dict[str, Any]]]") -> None: async def _create_proxy_connection( self, req: "ClientRequest", traces: List["Trace"], timeout: "ClientTimeout" - ) -> Tuple[asyncio.Transport, ResponseHandler]: + ) -> Tuple[asyncio.BaseTransport, ResponseHandler]: headers = {} # type: Dict[str, str] if req.proxy_headers is not None: headers = req.proxy_headers # type: ignore[assignment] @@ -1063,7 +1157,8 @@ async def _create_proxy_connection( proxy_req.headers[hdrs.PROXY_AUTHORIZATION] = auth if req.is_ssl(): - sslcontext = self._get_ssl_context(req) + self._warn_about_tls_in_tls(transport, req) + # For HTTPS requests over HTTP proxy # we must notify proxy to tunnel connection # so we send CONNECT command: @@ -1083,7 +1178,11 @@ async def _create_proxy_connection( try: protocol = conn._protocol assert protocol is not None - protocol.set_response_params() + + # read_until_eof=True will ensure the connection isn't closed + # once the response is received and processed allowing + # START_TLS to work on the connection below. + protocol.set_response_params(read_until_eof=True) resp = await proxy_resp.start(conn) except BaseException: proxy_resp.close() @@ -1104,21 +1203,19 @@ async def _create_proxy_connection( message=message, headers=resp.headers, ) - rawsock = transport.get_extra_info("socket", default=None) - if rawsock is None: - raise RuntimeError("Transport does not expose socket instance") - # Duplicate the socket, so now we can close proxy transport - rawsock = rawsock.dup() - finally: + except BaseException: + # It shouldn't be closed in `finally` because it's fed to + # `loop.start_tls()` and the docs say not to touch it after + # passing there. transport.close() + raise - transport, proto = await self._wrap_create_connection( - self._factory, - timeout=timeout, - ssl=sslcontext, - sock=rawsock, - server_hostname=req.host, + return await self._start_tls_connection( + # Access the old transport for the last time before it's + # closed and forgotten forever: + transport, req=req, + timeout=timeout, ) finally: proxy_resp.close() diff --git a/docs/client_advanced.rst b/docs/client_advanced.rst index 7a2f4bef217..bfd2d72c17c 100644 --- a/docs/client_advanced.rst +++ b/docs/client_advanced.rst @@ -533,9 +533,11 @@ DER with e.g:: Proxy support ------------- -aiohttp supports plain HTTP proxies and HTTP proxies that can be upgraded to HTTPS -via the HTTP CONNECT method. aiohttp does not support proxies that must be -connected to via ``https://``. To connect, use the *proxy* parameter:: +aiohttp supports plain HTTP proxies and HTTP proxies that can be +upgraded to HTTPS via the HTTP CONNECT method. aiohttp has a limited +support for proxies that must be connected to via ``https://`` — see +the info box below for more details. +To connect, use the *proxy* parameter:: async with aiohttp.ClientSession() as session: async with session.get("http://python.org", @@ -570,6 +572,33 @@ variables* (all are case insensitive):: Proxy credentials are given from ``~/.netrc`` file if present (see :class:`aiohttp.ClientSession` for more details). +.. attention:: + + CPython has introduced the support for TLS in TLS around Python 3.7. + But, as of now (Python 3.10), it's disabled for the transports that + :py:mod:`asyncio` uses. If the further release of Python (say v3.11) + toggles one attribute, it'll *just work™*. + + aiohttp v3.8 and higher is ready for this to happen and has code in + place supports TLS-in-TLS, hence sending HTTPS requests over HTTPS + proxy tunnels. + + For as long as your Python runtime doesn't declare the support for + TLS-in-TLS, please don't file bugs with aiohttp but rather try to + help the CPython upstream enable this feature. Meanwhile, if you + *really* need this to work, there's a patch that may help you make + it happen, include it into your app's code base: + https://github.com/aio-libs/aiohttp/discussions/6044#discussioncomment-1432443. + +.. important:: + + When supplying a custom :py:class:`ssl.SSLContext` instance, bear in + mind that it will be used to not only establish a TLS session with + the HTTPS endpoint you're hitting but also to establish a TLS tunnel + to the HTTPS proxy. To avoid surprises, make sure to set up the trust + chain that would recognize TLS certificates used by both the endpoint + and the proxy. + .. _aiohttp-persistent-session: Persistent session diff --git a/tests/test_client_request.py b/tests/test_client_request.py index cfe2a45edc7..6ab8761778a 100644 --- a/tests/test_client_request.py +++ b/tests/test_client_request.py @@ -119,11 +119,6 @@ def test_version_err(make_request: Any) -> None: make_request("get", "http://python.org/", version="1.c") -def test_https_proxy(make_request: Any) -> None: - with pytest.raises(ValueError): - make_request("get", "http://python.org/", proxy=URL("https://proxy.org")) - - def test_keep_alive(make_request: Any) -> None: req = make_request("get", "http://python.org/", version=(0, 9)) assert not req.keep_alive() diff --git a/tests/test_proxy.py b/tests/test_proxy.py index c778f85f531..af869ee88f7 100644 --- a/tests/test_proxy.py +++ b/tests/test_proxy.py @@ -228,6 +228,7 @@ async def make_conn(): tr, proto = mock.Mock(), mock.Mock() self.loop.create_connection = make_mocked_coro((tr, proto)) + self.loop.start_tls = make_mocked_coro(mock.Mock()) req = ClientRequest( "GET", @@ -242,8 +243,6 @@ async def make_conn(): self.assertEqual(req.url.path, "/") self.assertEqual(proxy_req.method, "CONNECT") self.assertEqual(proxy_req.url, URL("https://www.python.org")) - tr.close.assert_called_once_with() - tr.get_extra_info.assert_called_with("socket", default=None) self.loop.run_until_complete(proxy_req.close()) proxy_resp.close() @@ -287,22 +286,10 @@ async def make_conn(): ] ) - seq = 0 - - async def create_connection(*args, **kwargs): - nonlocal seq - seq += 1 - - # connection to http://proxy.example.com - if seq == 1: - return mock.Mock(), mock.Mock() - # connection to https://www.python.org - elif seq == 2: - raise ssl.CertificateError - else: - assert False - - self.loop.create_connection = create_connection + # Called on connection to http://proxy.example.com + self.loop.create_connection = make_mocked_coro((mock.Mock(), mock.Mock())) + # Called on connection to https://www.python.org + self.loop.start_tls = make_mocked_coro(raise_exception=ssl.CertificateError) req = ClientRequest( "GET", @@ -353,75 +340,12 @@ async def make_conn(): ] ) - seq = 0 - - async def create_connection(*args, **kwargs): - nonlocal seq - seq += 1 - - # connection to http://proxy.example.com - if seq == 1: - return mock.Mock(), mock.Mock() - # connection to https://www.python.org - elif seq == 2: - raise ssl.SSLError - else: - assert False - - self.loop.create_connection = create_connection - - req = ClientRequest( - "GET", - URL("https://www.python.org"), - proxy=URL("http://proxy.example.com"), - loop=self.loop, + # Called on connection to http://proxy.example.com + self.loop.create_connection = make_mocked_coro( + (mock.Mock(), mock.Mock()), ) - with self.assertRaises(aiohttp.ClientConnectorSSLError): - self.loop.run_until_complete( - connector._create_connection(req, None, aiohttp.ClientTimeout()) - ) - - @mock.patch("aiohttp.connector.ClientRequest") - def test_https_connect_runtime_error(self, ClientRequestMock: Any) -> None: - proxy_req = ClientRequest( - "GET", URL("http://proxy.example.com"), loop=self.loop - ) - ClientRequestMock.return_value = proxy_req - - proxy_resp = ClientResponse( - "get", - URL("http://proxy.example.com"), - request_info=mock.Mock(), - writer=mock.Mock(), - continue100=None, - timer=TimerNoop(), - traces=[], - loop=self.loop, - session=mock.Mock(), - ) - proxy_req.send = make_mocked_coro(proxy_resp) - proxy_resp.start = make_mocked_coro(mock.Mock(status=200)) - - async def make_conn(): - return aiohttp.TCPConnector() - - connector = self.loop.run_until_complete(make_conn()) - connector._resolve_host = make_mocked_coro( - [ - { - "hostname": "hostname", - "host": "127.0.0.1", - "port": 80, - "family": socket.AF_INET, - "proto": 0, - "flags": 0, - } - ] - ) - - tr, proto = mock.Mock(), mock.Mock() - tr.get_extra_info.return_value = None - self.loop.create_connection = make_mocked_coro((tr, proto)) + # Called on connection to https://www.python.org + self.loop.start_tls = make_mocked_coro(raise_exception=ssl.SSLError) req = ClientRequest( "GET", @@ -429,17 +353,11 @@ async def make_conn(): proxy=URL("http://proxy.example.com"), loop=self.loop, ) - with self.assertRaisesRegex( - RuntimeError, "Transport does not expose socket instance" - ): + with self.assertRaises(aiohttp.ClientConnectorSSLError): self.loop.run_until_complete( connector._create_connection(req, None, aiohttp.ClientTimeout()) ) - self.loop.run_until_complete(proxy_req.close()) - proxy_resp.close() - self.loop.run_until_complete(req.close()) - @mock.patch("aiohttp.connector.ClientRequest") def test_https_connect_http_proxy_error(self, ClientRequestMock: Any) -> None: proxy_req = ClientRequest( @@ -650,6 +568,7 @@ async def make_conn(): tr, proto = mock.Mock(), mock.Mock() self.loop.create_connection = make_mocked_coro((tr, proto)) + self.loop.start_tls = make_mocked_coro(mock.Mock()) req = ClientRequest( "GET", @@ -661,18 +580,17 @@ async def make_conn(): connector._create_connection(req, None, aiohttp.ClientTimeout()) ) - self.loop.create_connection.assert_called_with( + self.loop.start_tls.assert_called_with( + mock.ANY, mock.ANY, - ssl=connector._make_ssl_context(True), - sock=mock.ANY, + connector._make_ssl_context(True), server_hostname="www.python.org", + ssl_handshake_timeout=mock.ANY, ) self.assertEqual(req.url.path, "/") self.assertEqual(proxy_req.method, "CONNECT") self.assertEqual(proxy_req.url, URL("https://www.python.org")) - tr.close.assert_called_once_with() - tr.get_extra_info.assert_called_with("socket", default=None) self.loop.run_until_complete(proxy_req.close()) proxy_resp.close() @@ -721,6 +639,7 @@ async def make_conn(): tr, proto = mock.Mock(), mock.Mock() self.loop.create_connection = make_mocked_coro((tr, proto)) + self.loop.start_tls = make_mocked_coro(mock.Mock()) self.assertIn("AUTHORIZATION", proxy_req.headers) self.assertNotIn("PROXY-AUTHORIZATION", proxy_req.headers) diff --git a/tests/test_proxy_functional.py b/tests/test_proxy_functional.py index a5091b0a827..52aaf119b5e 100644 --- a/tests/test_proxy_functional.py +++ b/tests/test_proxy_functional.py @@ -2,8 +2,10 @@ import asyncio import os import pathlib +from re import match as match_regex from typing import Any from unittest import mock +from uuid import uuid4 import proxy import pytest @@ -11,6 +13,7 @@ import aiohttp from aiohttp import web +from aiohttp.client_exceptions import ClientConnectionError ASYNCIO_SUPPORTS_TLS_IN_TLS = hasattr( asyncio.sslproto._SSLProtocolTransport, @@ -55,7 +58,7 @@ def listen(self): @pytest.fixture def web_server_endpoint_payload(): - return "Test message" + return str(uuid4()) @pytest.fixture(params=("http", "https")) @@ -96,8 +99,9 @@ async def handler(*args, **kwargs): def _pretend_asyncio_supports_tls_in_tls( monkeypatch, web_server_endpoint_type, -): - if web_server_endpoint_type != "https" or ASYNCIO_SUPPORTS_TLS_IN_TLS: +) -> None: + # return + if web_server_endpoint_type == "http" or ASYNCIO_SUPPORTS_TLS_IN_TLS: return # for https://github.com/python/cpython/pull/28073 @@ -110,10 +114,6 @@ def _pretend_asyncio_supports_tls_in_tls( ) -@pytest.mark.xfail( - reason="https://github.com/aio-libs/aiohttp/pull/5992", - raises=ValueError, -) @pytest.mark.parametrize("web_server_endpoint_type", ("http", "https")) @pytest.mark.usefixtures("_pretend_asyncio_supports_tls_in_tls", "loop") async def test_secure_https_proxy_absolute_path( @@ -122,7 +122,7 @@ async def test_secure_https_proxy_absolute_path( web_server_endpoint_url, web_server_endpoint_payload, ) -> None: - """Test urls can be requested through a secure proxy.""" + """Ensure HTTP(S) sites are accessible through a secure proxy.""" conn = aiohttp.TCPConnector() sess = aiohttp.ClientSession(connector=conn) @@ -140,6 +140,69 @@ async def test_secure_https_proxy_absolute_path( await conn.close() +@pytest.mark.parametrize("web_server_endpoint_type", ("https",)) +@pytest.mark.usefixtures("loop") +async def test_https_proxy_unsupported_tls_in_tls( + client_ssl_ctx, + secure_proxy_url, + web_server_endpoint_type, +) -> None: + """Ensure connecting to TLS endpoints w/ HTTPS proxy needs patching. + + This also checks that a helpful warning on how to patch the env + is displayed. + """ + url = URL.build(scheme=web_server_endpoint_type, host="python.org") + + escaped_host_port = ":".join((url.host.replace(".", r"\."), str(url.port))) + escaped_proxy_url = str(secure_proxy_url).replace(".", r"\.") + + conn = aiohttp.TCPConnector() + sess = aiohttp.ClientSession(connector=conn) + + expected_warning_text = ( + r"^" + r"An HTTPS request is being sent through an HTTPS proxy\. " + "This support for TLS in TLS is known to be disabled " + r"in the stdlib asyncio\. This is why you'll probably see " + r"an error in the log below.\n\n" + "It is possible to enable it via monkeypatching under " + r"Python 3\.7 or higher\. For more details, see:\n" + r"* https://bugs\.python\.org/issue37179\n" + r"* https://github\.com/python/cpython/pull/28073\n\n" + r"You can temporarily patch this as follows:\n" + r"* https://docs\.aiohttp\.org/en/stable/client_advanced\.html#proxy-support\n", + r"* https://github\.com/aio-libs/aiohttp/discussions/6044\n$", + ) + type_err = ( + r"transport is not supported by start_tls\(\)" + ) + expected_exception_reason = ( + r"^" + "Cannot initialize a TLS-in-TLS connection to host " + f"{escaped_host_port!s} through an underlying connection " + f"to an HTTPS proxy {escaped_proxy_url!s} ssl:{client_ssl_ctx!s} " + f"[{type_err!s}]" + r"$" + ) + + with pytest.raises( + ClientConnectionError, + match=expected_exception_reason, + ) as conn_err, pytest.warns( + RuntimeWarning, + match=expected_warning_text, + ): + await sess.get(url, proxy=secure_proxy_url, ssl=client_ssl_ctx) + + assert type(conn_err.value.__cause__) == TypeError + assert match_regex(f"^{type_err!s}$", str(conn_err.value.__cause__)) + + await sess.close() + await conn.close() + + @pytest.fixture def proxy_test_server(aiohttp_raw_server: Any, loop: Any, monkeypatch: Any): # Handle all proxy requests and imitate remote server response.