Skip to content

Commit fb119c7

Browse files
committed
http1connection: Stricter handling of transfer-encoding
Unexpected transfer-encoding values were previously ignored and treated as the HTTP/1.0 default of read-until-close. This can lead to framing issues with certain proxies. We now treat any unexpected value as an error.
1 parent 0efa9a4 commit fb119c7

File tree

3 files changed

+109
-20
lines changed

3 files changed

+109
-20
lines changed

tornado/http1connection.py

+38-19
Original file line numberDiff line numberDiff line change
@@ -389,14 +389,11 @@ def write_headers(
389389
self._request_start_line = start_line
390390
lines.append(utf8("%s %s HTTP/1.1" % (start_line[0], start_line[1])))
391391
# Client requests with a non-empty body must have either a
392-
# Content-Length or a Transfer-Encoding.
392+
# Content-Length or a Transfer-Encoding. If Content-Length is not
393+
# present we'll add our Transfer-Encoding below.
393394
self._chunking_output = (
394395
start_line.method in ("POST", "PUT", "PATCH")
395396
and "Content-Length" not in headers
396-
and (
397-
"Transfer-Encoding" not in headers
398-
or headers["Transfer-Encoding"] == "chunked"
399-
)
400397
)
401398
else:
402399
assert isinstance(start_line, httputil.ResponseStartLine)
@@ -418,9 +415,6 @@ def write_headers(
418415
and (start_line.code < 100 or start_line.code >= 200)
419416
# No need to chunk the output if a Content-Length is specified.
420417
and "Content-Length" not in headers
421-
# Applications are discouraged from touching Transfer-Encoding,
422-
# but if they do, leave it alone.
423-
and "Transfer-Encoding" not in headers
424418
)
425419
# If connection to a 1.1 client will be closed, inform client
426420
if (
@@ -560,7 +554,7 @@ def _can_keep_alive(
560554
return connection_header != "close"
561555
elif (
562556
"Content-Length" in headers
563-
or headers.get("Transfer-Encoding", "").lower() == "chunked"
557+
or is_transfer_encoding_chunked(headers)
564558
or getattr(start_line, "method", None) in ("HEAD", "GET")
565559
):
566560
# start_line may be a request or response start line; only
@@ -598,13 +592,6 @@ def _read_body(
598592
delegate: httputil.HTTPMessageDelegate,
599593
) -> Optional[Awaitable[None]]:
600594
if "Content-Length" in headers:
601-
if "Transfer-Encoding" in headers:
602-
# Response cannot contain both Content-Length and
603-
# Transfer-Encoding headers.
604-
# http://tools.ietf.org/html/rfc7230#section-3.3.3
605-
raise httputil.HTTPInputError(
606-
"Response with both Transfer-Encoding and Content-Length"
607-
)
608595
if "," in headers["Content-Length"]:
609596
# Proxies sometimes cause Content-Length headers to get
610597
# duplicated. If all the values are identical then we can
@@ -631,20 +618,22 @@ def _read_body(
631618
else:
632619
content_length = None
633620

621+
is_chunked = is_transfer_encoding_chunked(headers)
622+
634623
if code == 204:
635624
# This response code is not allowed to have a non-empty body,
636625
# and has an implicit length of zero instead of read-until-close.
637626
# http://www.w3.org/Protocols/rfc2616/rfc2616-sec4.html#sec4.3
638-
if "Transfer-Encoding" in headers or content_length not in (None, 0):
627+
if is_chunked or content_length not in (None, 0):
639628
raise httputil.HTTPInputError(
640629
"Response with code %d should not have body" % code
641630
)
642631
content_length = 0
643632

633+
if is_chunked:
634+
return self._read_chunked_body(delegate)
644635
if content_length is not None:
645636
return self._read_fixed_body(content_length, delegate)
646-
if headers.get("Transfer-Encoding", "").lower() == "chunked":
647-
return self._read_chunked_body(delegate)
648637
if self.is_client:
649638
return self._read_body_until_close(delegate)
650639
return None
@@ -863,3 +852,33 @@ def parse_hex_int(s: str) -> int:
863852
if HEXDIGITS.fullmatch(s) is None:
864853
raise ValueError("not a hexadecimal integer: %r" % s)
865854
return int(s, 16)
855+
856+
857+
def is_transfer_encoding_chunked(headers: httputil.HTTPHeaders) -> bool:
858+
"""Returns true if the headers specify Transfer-Encoding: chunked.
859+
860+
Raise httputil.HTTPInputError if any other transfer encoding is used.
861+
"""
862+
# Note that transfer-encoding is an area in which postel's law can lead
863+
# us astray. If a proxy and a backend server are liberal in what they accept,
864+
# but accept slightly different things, this can lead to mismatched framing
865+
# and request smuggling issues. Therefore we are as strict as possible here
866+
# (even technically going beyond the requirements of the RFCs: a value of
867+
# ",chunked" is legal but doesn't appear in practice for legitimate traffic)
868+
if "Transfer-Encoding" not in headers:
869+
return False
870+
if "Content-Length" in headers:
871+
# Message cannot contain both Content-Length and
872+
# Transfer-Encoding headers.
873+
# http://tools.ietf.org/html/rfc7230#section-3.3.3
874+
raise httputil.HTTPInputError(
875+
"Message with both Transfer-Encoding and Content-Length"
876+
)
877+
if headers["Transfer-Encoding"].lower() == "chunked":
878+
return True
879+
# We do not support any transfer-encodings other than chunked, and we do not
880+
# expect to add any support because the concept of transfer-encoding has
881+
# been removed in HTTP/2.
882+
raise httputil.HTTPInputError(
883+
"Unsupported Transfer-Encoding %s" % headers["Transfer-Encoding"]
884+
)

tornado/test/httpserver_test.py

+70
Original file line numberDiff line numberDiff line change
@@ -581,6 +581,76 @@ def test_chunked_request_body_invalid_size(self):
581581
)
582582
self.assertEqual(400, start_line.code)
583583

584+
def test_chunked_request_body_duplicate_header(self):
585+
# Repeated Transfer-Encoding headers should be an error (and not confuse
586+
# the chunked-encoding detection to mess up framing).
587+
self.stream.write(
588+
b"""\
589+
POST /echo HTTP/1.1
590+
Transfer-Encoding: chunked
591+
Transfer-encoding: chunked
592+
593+
2
594+
ok
595+
0
596+
597+
"""
598+
)
599+
with ExpectLog(
600+
gen_log,
601+
".*Unsupported Transfer-Encoding chunked,chunked",
602+
level=logging.INFO,
603+
):
604+
start_line, headers, response = self.io_loop.run_sync(
605+
lambda: read_stream_body(self.stream)
606+
)
607+
self.assertEqual(400, start_line.code)
608+
609+
def test_chunked_request_body_unsupported_transfer_encoding(self):
610+
# We don't support transfer-encodings other than chunked.
611+
self.stream.write(
612+
b"""\
613+
POST /echo HTTP/1.1
614+
Transfer-Encoding: gzip, chunked
615+
616+
2
617+
ok
618+
0
619+
620+
"""
621+
)
622+
with ExpectLog(
623+
gen_log, ".*Unsupported Transfer-Encoding gzip, chunked", level=logging.INFO
624+
):
625+
start_line, headers, response = self.io_loop.run_sync(
626+
lambda: read_stream_body(self.stream)
627+
)
628+
self.assertEqual(400, start_line.code)
629+
630+
def test_chunked_request_body_transfer_encoding_and_content_length(self):
631+
# Transfer-encoding and content-length are mutually exclusive
632+
self.stream.write(
633+
b"""\
634+
POST /echo HTTP/1.1
635+
Transfer-Encoding: chunked
636+
Content-Length: 2
637+
638+
2
639+
ok
640+
0
641+
642+
"""
643+
)
644+
with ExpectLog(
645+
gen_log,
646+
".*Message with both Transfer-Encoding and Content-Length",
647+
level=logging.INFO,
648+
):
649+
start_line, headers, response = self.io_loop.run_sync(
650+
lambda: read_stream_body(self.stream)
651+
)
652+
self.assertEqual(400, start_line.code)
653+
584654
@gen_test
585655
def test_invalid_content_length(self):
586656
# HTTP only allows decimal digits in content-length. Make sure we don't

tornado/test/simple_httpclient_test.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -828,7 +828,7 @@ def test_chunked_with_content_length(self):
828828
with ExpectLog(
829829
gen_log,
830830
(
831-
"Malformed HTTP message from None: Response "
831+
"Malformed HTTP message from None: Message "
832832
"with both Transfer-Encoding and Content-Length"
833833
),
834834
level=logging.INFO,

0 commit comments

Comments
 (0)