From 689536f32a8183d0e64901029b3bd32c83f1c1e3 Mon Sep 17 00:00:00 2001 From: "J. Nick Koston" <nick@koston.org> Date: Sat, 26 Oct 2024 09:36:01 -1000 Subject: [PATCH 01/59] Seperate http_websocket into multiple files There are not functional changes here. The goal is to be able to able to make it easier to build a Cython implementation for the WebsocketReader --- aiohttp/http_websocket.py | 799 +------------------------------------- 1 file changed, 10 insertions(+), 789 deletions(-) diff --git a/aiohttp/http_websocket.py b/aiohttp/http_websocket.py index 8093d811e20..100f77451ae 100644 --- a/aiohttp/http_websocket.py +++ b/aiohttp/http_websocket.py @@ -1,35 +1,16 @@ """WebSocket protocol versions 13 and 8.""" -import asyncio -import functools -import json -import random -import re -import sys -import zlib -from enum import IntEnum -from functools import partial -from struct import Struct -from typing import ( - Any, - Callable, - Final, - List, - Literal, - NamedTuple, - Optional, - Pattern, - Set, - Tuple, - Union, - cast, +from ._http_websocket_helpers import WS_KEY +from ._http_websocket_models import ( + WS_CLOSED_MESSAGE, + WS_CLOSING_MESSAGE, + WebSocketError, + WSCloseCode, + WSMessage, + WSMsgType, ) - -from .base_protocol import BaseProtocol -from .client_exceptions import ClientConnectionResetError -from .compression_utils import ZLibCompressor, ZLibDecompressor -from .helpers import NO_EXTENSIONS, set_exception -from .streams import DataQueue +from ._http_websocket_reader import WebSocketReader +from ._http_websocket_writer import WebSocketWriter __all__ = ( "WS_CLOSED_MESSAGE", @@ -42,763 +23,3 @@ "WSMsgType", "WSCloseCode", ) - - -class WSCloseCode(IntEnum): - OK = 1000 - GOING_AWAY = 1001 - PROTOCOL_ERROR = 1002 - UNSUPPORTED_DATA = 1003 - ABNORMAL_CLOSURE = 1006 - INVALID_TEXT = 1007 - POLICY_VIOLATION = 1008 - MESSAGE_TOO_BIG = 1009 - MANDATORY_EXTENSION = 1010 - INTERNAL_ERROR = 1011 - SERVICE_RESTART = 1012 - TRY_AGAIN_LATER = 1013 - BAD_GATEWAY = 1014 - - -ALLOWED_CLOSE_CODES: Final[Set[int]] = {int(i) for i in WSCloseCode} - -# For websockets, keeping latency low is extremely important as implementations -# generally expect to be able to send and receive messages quickly. We use a -# larger chunk size than the default to reduce the number of executor calls -# since the executor is a significant source of latency and overhead when -# the chunks are small. A size of 5KiB was chosen because it is also the -# same value python-zlib-ng choose to use as the threshold to release the GIL. - -WEBSOCKET_MAX_SYNC_CHUNK_SIZE = 5 * 1024 - - -class WSMsgType(IntEnum): - # websocket spec types - CONTINUATION = 0x0 - TEXT = 0x1 - BINARY = 0x2 - PING = 0x9 - PONG = 0xA - CLOSE = 0x8 - - # aiohttp specific types - CLOSING = 0x100 - CLOSED = 0x101 - ERROR = 0x102 - - -MESSAGE_TYPES_WITH_CONTENT: Final = frozenset( - { - WSMsgType.BINARY, - WSMsgType.TEXT, - WSMsgType.CONTINUATION, - } -) - -WS_KEY: Final[bytes] = b"258EAFA5-E914-47DA-95CA-C5AB0DC85B11" - - -UNPACK_LEN2 = Struct("!H").unpack_from -UNPACK_LEN3 = Struct("!Q").unpack_from -UNPACK_CLOSE_CODE = Struct("!H").unpack -PACK_LEN1 = Struct("!BB").pack -PACK_LEN2 = Struct("!BBH").pack -PACK_LEN3 = Struct("!BBQ").pack -PACK_CLOSE_CODE = Struct("!H").pack -PACK_RANDBITS = Struct("!L").pack -MSG_SIZE: Final[int] = 2**14 -DEFAULT_LIMIT: Final[int] = 2**16 -MASK_LEN: Final[int] = 4 - - -class WSMessageContinuation(NamedTuple): - data: bytes - extra: Optional[str] = None - type: Literal[WSMsgType.CONTINUATION] = WSMsgType.CONTINUATION - - -class WSMessageText(NamedTuple): - data: str - extra: Optional[str] = None - type: Literal[WSMsgType.TEXT] = WSMsgType.TEXT - - def json( - self, *, loads: Callable[[Union[str, bytes, bytearray]], Any] = json.loads - ) -> Any: - """Return parsed JSON data.""" - return loads(self.data) - - -class WSMessageBinary(NamedTuple): - data: bytes - extra: Optional[str] = None - type: Literal[WSMsgType.BINARY] = WSMsgType.BINARY - - def json( - self, *, loads: Callable[[Union[str, bytes, bytearray]], Any] = json.loads - ) -> Any: - """Return parsed JSON data.""" - return loads(self.data) - - -class WSMessagePing(NamedTuple): - data: bytes - extra: Optional[str] = None - type: Literal[WSMsgType.PING] = WSMsgType.PING - - -class WSMessagePong(NamedTuple): - data: bytes - extra: Optional[str] = None - type: Literal[WSMsgType.PONG] = WSMsgType.PONG - - -class WSMessageClose(NamedTuple): - data: int - extra: Optional[str] = None - type: Literal[WSMsgType.CLOSE] = WSMsgType.CLOSE - - -class WSMessageClosing(NamedTuple): - data: None = None - extra: Optional[str] = None - type: Literal[WSMsgType.CLOSING] = WSMsgType.CLOSING - - -class WSMessageClosed(NamedTuple): - data: None = None - extra: Optional[str] = None - type: Literal[WSMsgType.CLOSED] = WSMsgType.CLOSED - - -class WSMessageError(NamedTuple): - data: BaseException - extra: Optional[str] = None - type: Literal[WSMsgType.ERROR] = WSMsgType.ERROR - - -WSMessage = Union[ - WSMessageContinuation, - WSMessageText, - WSMessageBinary, - WSMessagePing, - WSMessagePong, - WSMessageClose, - WSMessageClosing, - WSMessageClosed, - WSMessageError, -] - -WS_CLOSED_MESSAGE = WSMessageClosed() -WS_CLOSING_MESSAGE = WSMessageClosing() - - -class WebSocketError(Exception): - """WebSocket protocol parser error.""" - - def __init__(self, code: int, message: str) -> None: - self.code = code - super().__init__(code, message) - - def __str__(self) -> str: - return cast(str, self.args[1]) - - -class WSHandshakeError(Exception): - """WebSocket protocol handshake error.""" - - -native_byteorder: Final[str] = sys.byteorder - - -# Used by _websocket_mask_python -@functools.lru_cache -def _xor_table() -> List[bytes]: - return [bytes(a ^ b for a in range(256)) for b in range(256)] - - -def _websocket_mask_python(mask: bytes, data: bytearray) -> None: - """Websocket masking function. - - `mask` is a `bytes` object of length 4; `data` is a `bytearray` - object of any length. The contents of `data` are masked with `mask`, - as specified in section 5.3 of RFC 6455. - - Note that this function mutates the `data` argument. - - This pure-python implementation may be replaced by an optimized - version when available. - - """ - assert isinstance(data, bytearray), data - assert len(mask) == 4, mask - - if data: - _XOR_TABLE = _xor_table() - a, b, c, d = (_XOR_TABLE[n] for n in mask) - data[::4] = data[::4].translate(a) - data[1::4] = data[1::4].translate(b) - data[2::4] = data[2::4].translate(c) - data[3::4] = data[3::4].translate(d) - - -if NO_EXTENSIONS: # pragma: no cover - _websocket_mask = _websocket_mask_python -else: - try: - from ._websocket import _websocket_mask_cython # type: ignore[import-not-found] - - _websocket_mask = _websocket_mask_cython - except ImportError: # pragma: no cover - _websocket_mask = _websocket_mask_python - -_WS_DEFLATE_TRAILING: Final[bytes] = bytes([0x00, 0x00, 0xFF, 0xFF]) - - -_WS_EXT_RE: Final[Pattern[str]] = re.compile( - r"^(?:;\s*(?:" - r"(server_no_context_takeover)|" - r"(client_no_context_takeover)|" - r"(server_max_window_bits(?:=(\d+))?)|" - r"(client_max_window_bits(?:=(\d+))?)))*$" -) - -_WS_EXT_RE_SPLIT: Final[Pattern[str]] = re.compile(r"permessage-deflate([^,]+)?") - - -def ws_ext_parse(extstr: Optional[str], isserver: bool = False) -> Tuple[int, bool]: - if not extstr: - return 0, False - - compress = 0 - notakeover = False - for ext in _WS_EXT_RE_SPLIT.finditer(extstr): - defext = ext.group(1) - # Return compress = 15 when get `permessage-deflate` - if not defext: - compress = 15 - break - match = _WS_EXT_RE.match(defext) - if match: - compress = 15 - if isserver: - # Server never fail to detect compress handshake. - # Server does not need to send max wbit to client - if match.group(4): - compress = int(match.group(4)) - # Group3 must match if group4 matches - # Compress wbit 8 does not support in zlib - # If compress level not support, - # CONTINUE to next extension - if compress > 15 or compress < 9: - compress = 0 - continue - if match.group(1): - notakeover = True - # Ignore regex group 5 & 6 for client_max_window_bits - break - else: - if match.group(6): - compress = int(match.group(6)) - # Group5 must match if group6 matches - # Compress wbit 8 does not support in zlib - # If compress level not support, - # FAIL the parse progress - if compress > 15 or compress < 9: - raise WSHandshakeError("Invalid window size") - if match.group(2): - notakeover = True - # Ignore regex group 5 & 6 for client_max_window_bits - break - # Return Fail if client side and not match - elif not isserver: - raise WSHandshakeError("Extension for deflate not supported" + ext.group(1)) - - return compress, notakeover - - -def ws_ext_gen( - compress: int = 15, isserver: bool = False, server_notakeover: bool = False -) -> str: - # client_notakeover=False not used for server - # compress wbit 8 does not support in zlib - if compress < 9 or compress > 15: - raise ValueError( - "Compress wbits must between 9 and 15, zlib does not support wbits=8" - ) - enabledext = ["permessage-deflate"] - if not isserver: - enabledext.append("client_max_window_bits") - - if compress < 15: - enabledext.append("server_max_window_bits=" + str(compress)) - if server_notakeover: - enabledext.append("server_no_context_takeover") - # if client_notakeover: - # enabledext.append('client_no_context_takeover') - return "; ".join(enabledext) - - -class WSParserState(IntEnum): - READ_HEADER = 1 - READ_PAYLOAD_LENGTH = 2 - READ_PAYLOAD_MASK = 3 - READ_PAYLOAD = 4 - - -class WebSocketReader: - def __init__( - self, queue: DataQueue[WSMessage], max_msg_size: int, compress: bool = True - ) -> None: - self.queue = queue - self._max_msg_size = max_msg_size - - self._exc: Optional[BaseException] = None - self._partial = bytearray() - self._state = WSParserState.READ_HEADER - - self._opcode: Optional[int] = None - self._frame_fin = False - self._frame_opcode: Optional[int] = None - self._frame_payload = bytearray() - - self._tail: bytes = b"" - self._has_mask = False - self._frame_mask: Optional[bytes] = None - self._payload_length = 0 - self._payload_length_flag = 0 - self._compressed: Optional[bool] = None - self._decompressobj: Optional[ZLibDecompressor] = None - self._compress = compress - - def feed_eof(self) -> None: - self.queue.feed_eof() - - def feed_data(self, data: bytes) -> Tuple[bool, bytes]: - if self._exc: - return True, data - - try: - self._feed_data(data) - except Exception as exc: - self._exc = exc - set_exception(self.queue, exc) - return True, b"" - - return False, b"" - - def _feed_data(self, data: bytes) -> None: - msg: WSMessage - for fin, opcode, payload, compressed in self.parse_frame(data): - if opcode in MESSAGE_TYPES_WITH_CONTENT: - # load text/binary - is_continuation = opcode == WSMsgType.CONTINUATION - if not fin: - # got partial frame payload - if not is_continuation: - self._opcode = opcode - self._partial += payload - if self._max_msg_size and len(self._partial) >= self._max_msg_size: - raise WebSocketError( - WSCloseCode.MESSAGE_TOO_BIG, - "Message size {} exceeds limit {}".format( - len(self._partial), self._max_msg_size - ), - ) - continue - - has_partial = bool(self._partial) - if is_continuation: - if self._opcode is None: - raise WebSocketError( - WSCloseCode.PROTOCOL_ERROR, - "Continuation frame for non started message", - ) - opcode = self._opcode - self._opcode = None - # previous frame was non finished - # we should get continuation opcode - elif has_partial: - raise WebSocketError( - WSCloseCode.PROTOCOL_ERROR, - "The opcode in non-fin frame is expected " - "to be zero, got {!r}".format(opcode), - ) - - if has_partial: - assembled_payload = self._partial + payload - self._partial.clear() - else: - assembled_payload = payload - - if self._max_msg_size and len(assembled_payload) >= self._max_msg_size: - raise WebSocketError( - WSCloseCode.MESSAGE_TOO_BIG, - "Message size {} exceeds limit {}".format( - len(assembled_payload), self._max_msg_size - ), - ) - - # Decompress process must to be done after all packets - # received. - if compressed: - if not self._decompressobj: - self._decompressobj = ZLibDecompressor( - suppress_deflate_header=True - ) - payload_merged = self._decompressobj.decompress_sync( - assembled_payload + _WS_DEFLATE_TRAILING, self._max_msg_size - ) - if self._decompressobj.unconsumed_tail: - left = len(self._decompressobj.unconsumed_tail) - raise WebSocketError( - WSCloseCode.MESSAGE_TOO_BIG, - "Decompressed message size {} exceeds limit {}".format( - self._max_msg_size + left, self._max_msg_size - ), - ) - else: - payload_merged = bytes(assembled_payload) - - if opcode == WSMsgType.TEXT: - try: - text = payload_merged.decode("utf-8") - except UnicodeDecodeError as exc: - raise WebSocketError( - WSCloseCode.INVALID_TEXT, "Invalid UTF-8 text message" - ) from exc - - # XXX: The Text and Binary messages here can be a performance - # bottleneck, so we use tuple.__new__ to improve performance. - # This is not type safe, but many tests should fail in - # test_client_ws_functional.py if this is wrong. - msg = tuple.__new__(WSMessageText, (text, "", WSMsgType.TEXT)) - self.queue.feed_data(msg) - continue - - msg = tuple.__new__( - WSMessageBinary, (payload_merged, "", WSMsgType.BINARY) - ) - self.queue.feed_data(msg) - elif opcode == WSMsgType.CLOSE: - if len(payload) >= 2: - close_code = UNPACK_CLOSE_CODE(payload[:2])[0] - if close_code < 3000 and close_code not in ALLOWED_CLOSE_CODES: - raise WebSocketError( - WSCloseCode.PROTOCOL_ERROR, - f"Invalid close code: {close_code}", - ) - try: - close_message = payload[2:].decode("utf-8") - except UnicodeDecodeError as exc: - raise WebSocketError( - WSCloseCode.INVALID_TEXT, "Invalid UTF-8 text message" - ) from exc - msg = WSMessageClose(data=close_code, extra=close_message) - elif payload: - raise WebSocketError( - WSCloseCode.PROTOCOL_ERROR, - f"Invalid close frame: {fin} {opcode} {payload!r}", - ) - else: - msg = WSMessageClose(data=0, extra="") - - self.queue.feed_data(msg) - - elif opcode == WSMsgType.PING: - msg = WSMessagePing(data=payload, extra="") - self.queue.feed_data(msg) - - elif opcode == WSMsgType.PONG: - msg = WSMessagePong(data=payload, extra="") - self.queue.feed_data(msg) - - else: - raise WebSocketError( - WSCloseCode.PROTOCOL_ERROR, f"Unexpected opcode={opcode!r}" - ) - - def parse_frame( - self, buf: bytes - ) -> List[Tuple[bool, Optional[int], bytearray, Optional[bool]]]: - """Return the next frame from the socket.""" - frames: List[Tuple[bool, Optional[int], bytearray, Optional[bool]]] = [] - if self._tail: - buf, self._tail = self._tail + buf, b"" - - start_pos: int = 0 - buf_length = len(buf) - - while True: - # read header - if self._state is WSParserState.READ_HEADER: - if buf_length - start_pos < 2: - break - data = buf[start_pos : start_pos + 2] - start_pos += 2 - first_byte, second_byte = data - - fin = (first_byte >> 7) & 1 - rsv1 = (first_byte >> 6) & 1 - rsv2 = (first_byte >> 5) & 1 - rsv3 = (first_byte >> 4) & 1 - opcode = first_byte & 0xF - - # frame-fin = %x0 ; more frames of this message follow - # / %x1 ; final frame of this message - # frame-rsv1 = %x0 ; - # 1 bit, MUST be 0 unless negotiated otherwise - # frame-rsv2 = %x0 ; - # 1 bit, MUST be 0 unless negotiated otherwise - # frame-rsv3 = %x0 ; - # 1 bit, MUST be 0 unless negotiated otherwise - # - # Remove rsv1 from this test for deflate development - if rsv2 or rsv3 or (rsv1 and not self._compress): - raise WebSocketError( - WSCloseCode.PROTOCOL_ERROR, - "Received frame with non-zero reserved bits", - ) - - if opcode > 0x7 and fin == 0: - raise WebSocketError( - WSCloseCode.PROTOCOL_ERROR, - "Received fragmented control frame", - ) - - has_mask = (second_byte >> 7) & 1 - length = second_byte & 0x7F - - # Control frames MUST have a payload - # length of 125 bytes or less - if opcode > 0x7 and length > 125: - raise WebSocketError( - WSCloseCode.PROTOCOL_ERROR, - "Control frame payload cannot be larger than 125 bytes", - ) - - # Set compress status if last package is FIN - # OR set compress status if this is first fragment - # Raise error if not first fragment with rsv1 = 0x1 - if self._frame_fin or self._compressed is None: - self._compressed = True if rsv1 else False - elif rsv1: - raise WebSocketError( - WSCloseCode.PROTOCOL_ERROR, - "Received frame with non-zero reserved bits", - ) - - self._frame_fin = bool(fin) - self._frame_opcode = opcode - self._has_mask = bool(has_mask) - self._payload_length_flag = length - self._state = WSParserState.READ_PAYLOAD_LENGTH - - # read payload length - if self._state is WSParserState.READ_PAYLOAD_LENGTH: - length_flag = self._payload_length_flag - if length_flag == 126: - if buf_length - start_pos < 2: - break - data = buf[start_pos : start_pos + 2] - start_pos += 2 - self._payload_length = UNPACK_LEN2(data)[0] - elif length_flag > 126: - if buf_length - start_pos < 8: - break - data = buf[start_pos : start_pos + 8] - start_pos += 8 - self._payload_length = UNPACK_LEN3(data)[0] - else: - self._payload_length = length_flag - - self._state = ( - WSParserState.READ_PAYLOAD_MASK - if self._has_mask - else WSParserState.READ_PAYLOAD - ) - - # read payload mask - if self._state is WSParserState.READ_PAYLOAD_MASK: - if buf_length - start_pos < 4: - break - self._frame_mask = buf[start_pos : start_pos + 4] - start_pos += 4 - self._state = WSParserState.READ_PAYLOAD - - if self._state is WSParserState.READ_PAYLOAD: - length = self._payload_length - payload = self._frame_payload - - chunk_len = buf_length - start_pos - if length >= chunk_len: - self._payload_length = length - chunk_len - payload += buf[start_pos:] - start_pos = buf_length - else: - self._payload_length = 0 - payload += buf[start_pos : start_pos + length] - start_pos = start_pos + length - - if self._payload_length != 0: - break - - if self._has_mask: - assert self._frame_mask is not None - _websocket_mask(self._frame_mask, payload) - - frames.append( - (self._frame_fin, self._frame_opcode, payload, self._compressed) - ) - self._frame_payload = bytearray() - self._state = WSParserState.READ_HEADER - - self._tail = buf[start_pos:] - - return frames - - -class WebSocketWriter: - def __init__( - self, - protocol: BaseProtocol, - transport: asyncio.Transport, - *, - use_mask: bool = False, - limit: int = DEFAULT_LIMIT, - random: random.Random = random.Random(), - compress: int = 0, - notakeover: bool = False, - ) -> None: - self.protocol = protocol - self.transport = transport - self.use_mask = use_mask - self.get_random_bits = partial(random.getrandbits, 32) - self.compress = compress - self.notakeover = notakeover - self._closing = False - self._limit = limit - self._output_size = 0 - self._compressobj: Any = None # actually compressobj - - async def send_frame( - self, message: bytes, opcode: int, compress: Optional[int] = None - ) -> None: - """Send a frame over the websocket with message as its payload.""" - if self._closing and not (opcode & WSMsgType.CLOSE): - raise ClientConnectionResetError("Cannot write to closing transport") - - # RSV are the reserved bits in the frame header. They are used to - # indicate that the frame is using an extension. - # https://datatracker.ietf.org/doc/html/rfc6455#section-5.2 - rsv = 0 - # Only compress larger packets (disabled) - # Does small packet needs to be compressed? - # if self.compress and opcode < 8 and len(message) > 124: - if (compress or self.compress) and opcode < 8: - # RSV1 (rsv = 0x40) is set for compressed frames - # https://datatracker.ietf.org/doc/html/rfc7692#section-7.2.3.1 - rsv = 0x40 - - if compress: - # Do not set self._compress if compressing is for this frame - compressobj = self._make_compress_obj(compress) - else: # self.compress - if not self._compressobj: - self._compressobj = self._make_compress_obj(self.compress) - compressobj = self._compressobj - - message = await compressobj.compress(message) - # Its critical that we do not return control to the event - # loop until we have finished sending all the compressed - # data. Otherwise we could end up mixing compressed frames - # if there are multiple coroutines compressing data. - message += compressobj.flush( - zlib.Z_FULL_FLUSH if self.notakeover else zlib.Z_SYNC_FLUSH - ) - if message.endswith(_WS_DEFLATE_TRAILING): - message = message[:-4] - - msg_length = len(message) - - use_mask = self.use_mask - mask_bit = 0x80 if use_mask else 0 - - # Depending on the message length, the header is assembled differently. - # The first byte is reserved for the opcode and the RSV bits. - first_byte = 0x80 | rsv | opcode - if msg_length < 126: - header = PACK_LEN1(first_byte, msg_length | mask_bit) - header_len = 2 - elif msg_length < (1 << 16): - header = PACK_LEN2(first_byte, 126 | mask_bit, msg_length) - header_len = 4 - else: - header = PACK_LEN3(first_byte, 127 | mask_bit, msg_length) - header_len = 10 - - # https://datatracker.ietf.org/doc/html/rfc6455#section-5.3 - # If we are using a mask, we need to generate it randomly - # and apply it to the message before sending it. A mask is - # a 32-bit value that is applied to the message using a - # bitwise XOR operation. It is used to prevent certain types - # of attacks on the websocket protocol. The mask is only used - # when aiohttp is acting as a client. Servers do not use a mask. - if use_mask: - mask = PACK_RANDBITS(self.get_random_bits()) - message = bytearray(message) - _websocket_mask(mask, message) - self._write(header + mask + message) - self._output_size += header_len + MASK_LEN + msg_length - - else: - if msg_length > MSG_SIZE: - self._write(header) - self._write(message) - else: - self._write(header + message) - - self._output_size += header_len + msg_length - - # It is safe to return control to the event loop when using compression - # after this point as we have already sent or buffered all the data. - - # Once we have written output_size up to the limit, we call the - # drain helper which waits for the transport to be ready to accept - # more data. This is a flow control mechanism to prevent the buffer - # from growing too large. The drain helper will return right away - # if the writer is not paused. - if self._output_size > self._limit: - self._output_size = 0 - await self.protocol._drain_helper() - - def _make_compress_obj(self, compress: int) -> ZLibCompressor: - return ZLibCompressor( - level=zlib.Z_BEST_SPEED, - wbits=-compress, - max_sync_chunk_size=WEBSOCKET_MAX_SYNC_CHUNK_SIZE, - ) - - def _write(self, data: bytes) -> None: - if self.transport.is_closing(): - raise ClientConnectionResetError("Cannot write to closing transport") - self.transport.write(data) - - async def pong(self, message: bytes = b"") -> None: - """Send pong message.""" - await self.send_frame(message, WSMsgType.PONG) - - async def ping(self, message: bytes = b"") -> None: - """Send ping message.""" - await self.send_frame(message, WSMsgType.PING) - - async def close(self, code: int = 1000, message: Union[bytes, str] = b"") -> None: - """Close the websocket, sending the specified code and message.""" - if isinstance(message, str): - message = message.encode("utf-8") - try: - await self.send_frame( - PACK_CLOSE_CODE(code) + message, opcode=WSMsgType.CLOSE - ) - finally: - self._closing = True From de38d0cd5ef1007186aa2779a6811352dd58a7da Mon Sep 17 00:00:00 2001 From: "J. Nick Koston" <nick@koston.org> Date: Sat, 26 Oct 2024 09:36:13 -1000 Subject: [PATCH 02/59] Seperate http_websocket into multiple files There are not functional changes here. The goal is to be able to able to make it easier to build a Cython implementation for the WebsocketReader --- aiohttp/_http_websocket_helpers.py | 148 ++++++++++++ aiohttp/_http_websocket_models.py | 145 ++++++++++++ aiohttp/_http_websocket_reader.py | 355 +++++++++++++++++++++++++++++ aiohttp/_http_websocket_writer.py | 174 ++++++++++++++ 4 files changed, 822 insertions(+) create mode 100644 aiohttp/_http_websocket_helpers.py create mode 100644 aiohttp/_http_websocket_models.py create mode 100644 aiohttp/_http_websocket_reader.py create mode 100644 aiohttp/_http_websocket_writer.py diff --git a/aiohttp/_http_websocket_helpers.py b/aiohttp/_http_websocket_helpers.py new file mode 100644 index 00000000000..252144dead3 --- /dev/null +++ b/aiohttp/_http_websocket_helpers.py @@ -0,0 +1,148 @@ +"""WebSocket protocol versions 13 and 8.""" + +import functools +import re +from struct import Struct +from typing import TYPE_CHECKING, Final, List, Optional, Pattern, Tuple + +from ._http_websocket_models import WSHandshakeError +from .helpers import NO_EXTENSIONS + +UNPACK_LEN2 = Struct("!H").unpack_from +UNPACK_LEN3 = Struct("!Q").unpack_from +UNPACK_CLOSE_CODE = Struct("!H").unpack +PACK_LEN1 = Struct("!BB").pack +PACK_LEN2 = Struct("!BBH").pack +PACK_LEN3 = Struct("!BBQ").pack +PACK_CLOSE_CODE = Struct("!H").pack +PACK_RANDBITS = Struct("!L").pack +MSG_SIZE: Final[int] = 2**14 +MASK_LEN: Final[int] = 4 + +WS_KEY: Final[bytes] = b"258EAFA5-E914-47DA-95CA-C5AB0DC85B11" + + +# Used by _websocket_mask_python +@functools.lru_cache +def _xor_table() -> List[bytes]: + return [bytes(a ^ b for a in range(256)) for b in range(256)] + + +def _websocket_mask_python(mask: bytes, data: bytearray) -> None: + """Websocket masking function. + + `mask` is a `bytes` object of length 4; `data` is a `bytearray` + object of any length. The contents of `data` are masked with `mask`, + as specified in section 5.3 of RFC 6455. + + Note that this function mutates the `data` argument. + + This pure-python implementation may be replaced by an optimized + version when available. + + """ + assert isinstance(data, bytearray), data + assert len(mask) == 4, mask + + if data: + _XOR_TABLE = _xor_table() + a, b, c, d = (_XOR_TABLE[n] for n in mask) + data[::4] = data[::4].translate(a) + data[1::4] = data[1::4].translate(b) + data[2::4] = data[2::4].translate(c) + data[3::4] = data[3::4].translate(d) + + +if TYPE_CHECKING or NO_EXTENSIONS: # pragma: no cover + websocket_mask = _websocket_mask_python +else: + try: + from ._websocket import _websocket_mask_cython # type: ignore[import-not-found] + + websocket_mask = _websocket_mask_cython + except ImportError: # pragma: no cover + websocket_mask = _websocket_mask_python + + +_WS_EXT_RE: Final[Pattern[str]] = re.compile( + r"^(?:;\s*(?:" + r"(server_no_context_takeover)|" + r"(client_no_context_takeover)|" + r"(server_max_window_bits(?:=(\d+))?)|" + r"(client_max_window_bits(?:=(\d+))?)))*$" +) + +_WS_EXT_RE_SPLIT: Final[Pattern[str]] = re.compile(r"permessage-deflate([^,]+)?") + + +def ws_ext_parse(extstr: Optional[str], isserver: bool = False) -> Tuple[int, bool]: + if not extstr: + return 0, False + + compress = 0 + notakeover = False + for ext in _WS_EXT_RE_SPLIT.finditer(extstr): + defext = ext.group(1) + # Return compress = 15 when get `permessage-deflate` + if not defext: + compress = 15 + break + match = _WS_EXT_RE.match(defext) + if match: + compress = 15 + if isserver: + # Server never fail to detect compress handshake. + # Server does not need to send max wbit to client + if match.group(4): + compress = int(match.group(4)) + # Group3 must match if group4 matches + # Compress wbit 8 does not support in zlib + # If compress level not support, + # CONTINUE to next extension + if compress > 15 or compress < 9: + compress = 0 + continue + if match.group(1): + notakeover = True + # Ignore regex group 5 & 6 for client_max_window_bits + break + else: + if match.group(6): + compress = int(match.group(6)) + # Group5 must match if group6 matches + # Compress wbit 8 does not support in zlib + # If compress level not support, + # FAIL the parse progress + if compress > 15 or compress < 9: + raise WSHandshakeError("Invalid window size") + if match.group(2): + notakeover = True + # Ignore regex group 5 & 6 for client_max_window_bits + break + # Return Fail if client side and not match + elif not isserver: + raise WSHandshakeError("Extension for deflate not supported" + ext.group(1)) + + return compress, notakeover + + +def ws_ext_gen( + compress: int = 15, isserver: bool = False, server_notakeover: bool = False +) -> str: + # client_notakeover=False not used for server + # compress wbit 8 does not support in zlib + if compress < 9 or compress > 15: + raise ValueError( + "Compress wbits must between 9 and 15, zlib does not support wbits=8" + ) + enabledext = ["permessage-deflate"] + if not isserver: + enabledext.append("client_max_window_bits") + + if compress < 15: + enabledext.append("server_max_window_bits=" + str(compress)) + if server_notakeover: + enabledext.append("server_no_context_takeover") + # if client_notakeover: + # enabledext.append('client_no_context_takeover') + return "; ".join(enabledext) diff --git a/aiohttp/_http_websocket_models.py b/aiohttp/_http_websocket_models.py new file mode 100644 index 00000000000..8e80aa057f1 --- /dev/null +++ b/aiohttp/_http_websocket_models.py @@ -0,0 +1,145 @@ +"""WebSocket protocol versions 13 and 8.""" + +import json +from enum import IntEnum +from typing import Any, Callable, Final, Literal, NamedTuple, Optional, Union, cast + +WS_DEFLATE_TRAILING: Final[bytes] = bytes([0x00, 0x00, 0xFF, 0xFF]) + + +class WSCloseCode(IntEnum): + OK = 1000 + GOING_AWAY = 1001 + PROTOCOL_ERROR = 1002 + UNSUPPORTED_DATA = 1003 + ABNORMAL_CLOSURE = 1006 + INVALID_TEXT = 1007 + POLICY_VIOLATION = 1008 + MESSAGE_TOO_BIG = 1009 + MANDATORY_EXTENSION = 1010 + INTERNAL_ERROR = 1011 + SERVICE_RESTART = 1012 + TRY_AGAIN_LATER = 1013 + BAD_GATEWAY = 1014 + + +# For websockets, keeping latency low is extremely important as implementations +# generally expect to be able to send and receive messages quickly. We use a +# larger chunk size than the default to reduce the number of executor calls +# since the executor is a significant source of latency and overhead when +# the chunks are small. A size of 5KiB was chosen because it is also the +# same value python-zlib-ng choose to use as the threshold to release the GIL. + +WEBSOCKET_MAX_SYNC_CHUNK_SIZE = 5 * 1024 + + +class WSMsgType(IntEnum): + # websocket spec types + CONTINUATION = 0x0 + TEXT = 0x1 + BINARY = 0x2 + PING = 0x9 + PONG = 0xA + CLOSE = 0x8 + + # aiohttp specific types + CLOSING = 0x100 + CLOSED = 0x101 + ERROR = 0x102 + + +class WSMessageContinuation(NamedTuple): + data: bytes + extra: Optional[str] = None + type: Literal[WSMsgType.CONTINUATION] = WSMsgType.CONTINUATION + + +class WSMessageText(NamedTuple): + data: str + extra: Optional[str] = None + type: Literal[WSMsgType.TEXT] = WSMsgType.TEXT + + def json( + self, *, loads: Callable[[Union[str, bytes, bytearray]], Any] = json.loads + ) -> Any: + """Return parsed JSON data.""" + return loads(self.data) + + +class WSMessageBinary(NamedTuple): + data: bytes + extra: Optional[str] = None + type: Literal[WSMsgType.BINARY] = WSMsgType.BINARY + + def json( + self, *, loads: Callable[[Union[str, bytes, bytearray]], Any] = json.loads + ) -> Any: + """Return parsed JSON data.""" + return loads(self.data) + + +class WSMessagePing(NamedTuple): + data: bytes + extra: Optional[str] = None + type: Literal[WSMsgType.PING] = WSMsgType.PING + + +class WSMessagePong(NamedTuple): + data: bytes + extra: Optional[str] = None + type: Literal[WSMsgType.PONG] = WSMsgType.PONG + + +class WSMessageClose(NamedTuple): + data: int + extra: Optional[str] = None + type: Literal[WSMsgType.CLOSE] = WSMsgType.CLOSE + + +class WSMessageClosing(NamedTuple): + data: None = None + extra: Optional[str] = None + type: Literal[WSMsgType.CLOSING] = WSMsgType.CLOSING + + +class WSMessageClosed(NamedTuple): + data: None = None + extra: Optional[str] = None + type: Literal[WSMsgType.CLOSED] = WSMsgType.CLOSED + + +class WSMessageError(NamedTuple): + data: BaseException + extra: Optional[str] = None + type: Literal[WSMsgType.ERROR] = WSMsgType.ERROR + + +WSMessage = Union[ + WSMessageContinuation, + WSMessageText, + WSMessageBinary, + WSMessagePing, + WSMessagePong, + WSMessageClose, + WSMessageClosing, + WSMessageClosed, + WSMessageError, +] + +WS_CLOSED_MESSAGE = WSMessageClosed() +WS_CLOSING_MESSAGE = WSMessageClosing() + + +class WebSocketError(Exception): + """WebSocket protocol parser error.""" + + def __init__(self, code: int, message: str) -> None: + self.code = code + super().__init__(code, message) + + def __str__(self) -> str: + return cast(str, self.args[1]) + + +class WSHandshakeError(Exception): + """WebSocket protocol handshake error.""" diff --git a/aiohttp/_http_websocket_reader.py b/aiohttp/_http_websocket_reader.py new file mode 100644 index 00000000000..bc92c8fdf8b --- /dev/null +++ b/aiohttp/_http_websocket_reader.py @@ -0,0 +1,355 @@ +"""WebSocket protocol versions 13 and 8.""" + +from enum import IntEnum +from typing import Final, List, Optional, Set, Tuple + +from ._http_websocket_helpers import ( + UNPACK_CLOSE_CODE, + UNPACK_LEN2, + UNPACK_LEN3, + WSCloseCode, + websocket_mask, +) +from ._http_websocket_models import ( + WS_DEFLATE_TRAILING, + WebSocketError, + WSMessage, + WSMessageBinary, + WSMessageClose, + WSMessagePing, + WSMessagePong, + WSMessageText, + WSMsgType, +) +from .compression_utils import ZLibDecompressor +from .helpers import set_exception +from .streams import DataQueue + +MESSAGE_TYPES_WITH_CONTENT: Final = frozenset( + { + WSMsgType.BINARY, + WSMsgType.TEXT, + WSMsgType.CONTINUATION, + } +) + +ALLOWED_CLOSE_CODES: Final[Set[int]] = {int(i) for i in WSCloseCode} + + +class WSParserState(IntEnum): + READ_HEADER = 1 + READ_PAYLOAD_LENGTH = 2 + READ_PAYLOAD_MASK = 3 + READ_PAYLOAD = 4 + + +class WebSocketReader: + def __init__( + self, queue: DataQueue[WSMessage], max_msg_size: int, compress: bool = True + ) -> None: + self.queue = queue + self._max_msg_size = max_msg_size + + self._exc: Optional[BaseException] = None + self._partial = bytearray() + self._state = WSParserState.READ_HEADER + + self._opcode: Optional[int] = None + self._frame_fin = False + self._frame_opcode: Optional[int] = None + self._frame_payload = bytearray() + + self._tail: bytes = b"" + self._has_mask = False + self._frame_mask: Optional[bytes] = None + self._payload_length = 0 + self._payload_length_flag = 0 + self._compressed: Optional[bool] = None + self._decompressobj: Optional[ZLibDecompressor] = None + self._compress = compress + + def feed_eof(self) -> None: + self.queue.feed_eof() + + def feed_data(self, data: bytes) -> Tuple[bool, bytes]: + if self._exc: + return True, data + + try: + self._feed_data(data) + except Exception as exc: + self._exc = exc + set_exception(self.queue, exc) + return True, b"" + + return False, b"" + + def _feed_data(self, data: bytes) -> None: + msg: WSMessage + for fin, opcode, payload, compressed in self.parse_frame(data): + if opcode in MESSAGE_TYPES_WITH_CONTENT: + # load text/binary + is_continuation = opcode == WSMsgType.CONTINUATION + if not fin: + # got partial frame payload + if not is_continuation: + self._opcode = opcode + self._partial += payload + if self._max_msg_size and len(self._partial) >= self._max_msg_size: + raise WebSocketError( + WSCloseCode.MESSAGE_TOO_BIG, + "Message size {} exceeds limit {}".format( + len(self._partial), self._max_msg_size + ), + ) + continue + + has_partial = bool(self._partial) + if is_continuation: + if self._opcode is None: + raise WebSocketError( + WSCloseCode.PROTOCOL_ERROR, + "Continuation frame for non started message", + ) + opcode = self._opcode + self._opcode = None + # previous frame was non finished + # we should get continuation opcode + elif has_partial: + raise WebSocketError( + WSCloseCode.PROTOCOL_ERROR, + "The opcode in non-fin frame is expected " + "to be zero, got {!r}".format(opcode), + ) + + if has_partial: + assembled_payload = self._partial + payload + self._partial.clear() + else: + assembled_payload = payload + + if self._max_msg_size and len(assembled_payload) >= self._max_msg_size: + raise WebSocketError( + WSCloseCode.MESSAGE_TOO_BIG, + "Message size {} exceeds limit {}".format( + len(assembled_payload), self._max_msg_size + ), + ) + + # Decompress process must to be done after all packets + # received. + if compressed: + if not self._decompressobj: + self._decompressobj = ZLibDecompressor( + suppress_deflate_header=True + ) + payload_merged = self._decompressobj.decompress_sync( + assembled_payload + WS_DEFLATE_TRAILING, self._max_msg_size + ) + if self._decompressobj.unconsumed_tail: + left = len(self._decompressobj.unconsumed_tail) + raise WebSocketError( + WSCloseCode.MESSAGE_TOO_BIG, + "Decompressed message size {} exceeds limit {}".format( + self._max_msg_size + left, self._max_msg_size + ), + ) + else: + payload_merged = bytes(assembled_payload) + + if opcode == WSMsgType.TEXT: + try: + text = payload_merged.decode("utf-8") + except UnicodeDecodeError as exc: + raise WebSocketError( + WSCloseCode.INVALID_TEXT, "Invalid UTF-8 text message" + ) from exc + + # XXX: The Text and Binary messages here can be a performance + # bottleneck, so we use tuple.__new__ to improve performance. + # This is not type safe, but many tests should fail in + # test_client_ws_functional.py if this is wrong. + msg = tuple.__new__(WSMessageText, (text, "", WSMsgType.TEXT)) + self.queue.feed_data(msg) + continue + + msg = tuple.__new__( + WSMessageBinary, (payload_merged, "", WSMsgType.BINARY) + ) + self.queue.feed_data(msg) + elif opcode == WSMsgType.CLOSE: + if len(payload) >= 2: + close_code = UNPACK_CLOSE_CODE(payload[:2])[0] + if close_code < 3000 and close_code not in ALLOWED_CLOSE_CODES: + raise WebSocketError( + WSCloseCode.PROTOCOL_ERROR, + f"Invalid close code: {close_code}", + ) + try: + close_message = payload[2:].decode("utf-8") + except UnicodeDecodeError as exc: + raise WebSocketError( + WSCloseCode.INVALID_TEXT, "Invalid UTF-8 text message" + ) from exc + msg = WSMessageClose(data=close_code, extra=close_message) + elif payload: + raise WebSocketError( + WSCloseCode.PROTOCOL_ERROR, + f"Invalid close frame: {fin} {opcode} {payload!r}", + ) + else: + msg = WSMessageClose(data=0, extra="") + + self.queue.feed_data(msg) + + elif opcode == WSMsgType.PING: + msg = WSMessagePing(data=payload, extra="") + self.queue.feed_data(msg) + + elif opcode == WSMsgType.PONG: + msg = WSMessagePong(data=payload, extra="") + self.queue.feed_data(msg) + + else: + raise WebSocketError( + WSCloseCode.PROTOCOL_ERROR, f"Unexpected opcode={opcode!r}" + ) + + def parse_frame( + self, buf: bytes + ) -> List[Tuple[bool, Optional[int], bytearray, Optional[bool]]]: + """Return the next frame from the socket.""" + frames: List[Tuple[bool, Optional[int], bytearray, Optional[bool]]] = [] + if self._tail: + buf, self._tail = self._tail + buf, b"" + + start_pos: int = 0 + buf_length = len(buf) + + while True: + # read header + if self._state is WSParserState.READ_HEADER: + if buf_length - start_pos < 2: + break + data = buf[start_pos : start_pos + 2] + start_pos += 2 + first_byte, second_byte = data + + fin = (first_byte >> 7) & 1 + rsv1 = (first_byte >> 6) & 1 + rsv2 = (first_byte >> 5) & 1 + rsv3 = (first_byte >> 4) & 1 + opcode = first_byte & 0xF + + # frame-fin = %x0 ; more frames of this message follow + # / %x1 ; final frame of this message + # frame-rsv1 = %x0 ; + # 1 bit, MUST be 0 unless negotiated otherwise + # frame-rsv2 = %x0 ; + # 1 bit, MUST be 0 unless negotiated otherwise + # frame-rsv3 = %x0 ; + # 1 bit, MUST be 0 unless negotiated otherwise + # + # Remove rsv1 from this test for deflate development + if rsv2 or rsv3 or (rsv1 and not self._compress): + raise WebSocketError( + WSCloseCode.PROTOCOL_ERROR, + "Received frame with non-zero reserved bits", + ) + + if opcode > 0x7 and fin == 0: + raise WebSocketError( + WSCloseCode.PROTOCOL_ERROR, + "Received fragmented control frame", + ) + + has_mask = (second_byte >> 7) & 1 + length = second_byte & 0x7F + + # Control frames MUST have a payload + # length of 125 bytes or less + if opcode > 0x7 and length > 125: + raise WebSocketError( + WSCloseCode.PROTOCOL_ERROR, + "Control frame payload cannot be larger than 125 bytes", + ) + + # Set compress status if last package is FIN + # OR set compress status if this is first fragment + # Raise error if not first fragment with rsv1 = 0x1 + if self._frame_fin or self._compressed is None: + self._compressed = True if rsv1 else False + elif rsv1: + raise WebSocketError( + WSCloseCode.PROTOCOL_ERROR, + "Received frame with non-zero reserved bits", + ) + + self._frame_fin = bool(fin) + self._frame_opcode = opcode + self._has_mask = bool(has_mask) + self._payload_length_flag = length + self._state = WSParserState.READ_PAYLOAD_LENGTH + + # read payload length + if self._state is WSParserState.READ_PAYLOAD_LENGTH: + length_flag = self._payload_length_flag + if length_flag == 126: + if buf_length - start_pos < 2: + break + data = buf[start_pos : start_pos + 2] + start_pos += 2 + self._payload_length = UNPACK_LEN2(data)[0] + elif length_flag > 126: + if buf_length - start_pos < 8: + break + data = buf[start_pos : start_pos + 8] + start_pos += 8 + self._payload_length = UNPACK_LEN3(data)[0] + else: + self._payload_length = length_flag + + self._state = ( + WSParserState.READ_PAYLOAD_MASK + if self._has_mask + else WSParserState.READ_PAYLOAD + ) + + # read payload mask + if self._state is WSParserState.READ_PAYLOAD_MASK: + if buf_length - start_pos < 4: + break + self._frame_mask = buf[start_pos : start_pos + 4] + start_pos += 4 + self._state = WSParserState.READ_PAYLOAD + + if self._state is WSParserState.READ_PAYLOAD: + length = self._payload_length + payload = self._frame_payload + + chunk_len = buf_length - start_pos + if length >= chunk_len: + self._payload_length = length - chunk_len + payload += buf[start_pos:] + start_pos = buf_length + else: + self._payload_length = 0 + payload += buf[start_pos : start_pos + length] + start_pos = start_pos + length + + if self._payload_length != 0: + break + + if self._has_mask: + assert self._frame_mask is not None + websocket_mask(self._frame_mask, payload) + + frames.append( + (self._frame_fin, self._frame_opcode, payload, self._compressed) + ) + self._frame_payload = bytearray() + self._state = WSParserState.READ_HEADER + + self._tail = buf[start_pos:] + + return frames diff --git a/aiohttp/_http_websocket_writer.py b/aiohttp/_http_websocket_writer.py new file mode 100644 index 00000000000..88ea4e35f21 --- /dev/null +++ b/aiohttp/_http_websocket_writer.py @@ -0,0 +1,174 @@ +"""WebSocket protocol versions 13 and 8.""" + +import asyncio +import random +import zlib +from functools import partial +from typing import Any, Final, Optional, Union + +from ._http_websocket_helpers import ( + MASK_LEN, + MSG_SIZE, + PACK_CLOSE_CODE, + PACK_LEN1, + PACK_LEN2, + PACK_LEN3, + PACK_RANDBITS, + websocket_mask, +) +from ._http_websocket_models import ( + WEBSOCKET_MAX_SYNC_CHUNK_SIZE, + WS_DEFLATE_TRAILING, + WSMsgType, +) +from .base_protocol import BaseProtocol +from .client_exceptions import ClientConnectionResetError +from .compression_utils import ZLibCompressor + +DEFAULT_LIMIT: Final[int] = 2**16 + + +class WebSocketWriter: + def __init__( + self, + protocol: BaseProtocol, + transport: asyncio.Transport, + *, + use_mask: bool = False, + limit: int = DEFAULT_LIMIT, + random: random.Random = random.Random(), + compress: int = 0, + notakeover: bool = False, + ) -> None: + self.protocol = protocol + self.transport = transport + self.use_mask = use_mask + self.get_random_bits = partial(random.getrandbits, 32) + self.compress = compress + self.notakeover = notakeover + self._closing = False + self._limit = limit + self._output_size = 0 + self._compressobj: Any = None # actually compressobj + + async def send_frame( + self, message: bytes, opcode: int, compress: Optional[int] = None + ) -> None: + """Send a frame over the websocket with message as its payload.""" + if self._closing and not (opcode & WSMsgType.CLOSE): + raise ClientConnectionResetError("Cannot write to closing transport") + + # RSV are the reserved bits in the frame header. They are used to + # indicate that the frame is using an extension. + # https://datatracker.ietf.org/doc/html/rfc6455#section-5.2 + rsv = 0 + # Only compress larger packets (disabled) + # Does small packet needs to be compressed? + # if self.compress and opcode < 8 and len(message) > 124: + if (compress or self.compress) and opcode < 8: + # RSV1 (rsv = 0x40) is set for compressed frames + # https://datatracker.ietf.org/doc/html/rfc7692#section-7.2.3.1 + rsv = 0x40 + + if compress: + # Do not set self._compress if compressing is for this frame + compressobj = self._make_compress_obj(compress) + else: # self.compress + if not self._compressobj: + self._compressobj = self._make_compress_obj(self.compress) + compressobj = self._compressobj + + message = await compressobj.compress(message) + # Its critical that we do not return control to the event + # loop until we have finished sending all the compressed + # data. Otherwise we could end up mixing compressed frames + # if there are multiple coroutines compressing data. + message += compressobj.flush( + zlib.Z_FULL_FLUSH if self.notakeover else zlib.Z_SYNC_FLUSH + ) + if message.endswith(WS_DEFLATE_TRAILING): + message = message[:-4] + + msg_length = len(message) + + use_mask = self.use_mask + mask_bit = 0x80 if use_mask else 0 + + # Depending on the message length, the header is assembled differently. + # The first byte is reserved for the opcode and the RSV bits. + first_byte = 0x80 | rsv | opcode + if msg_length < 126: + header = PACK_LEN1(first_byte, msg_length | mask_bit) + header_len = 2 + elif msg_length < (1 << 16): + header = PACK_LEN2(first_byte, 126 | mask_bit, msg_length) + header_len = 4 + else: + header = PACK_LEN3(first_byte, 127 | mask_bit, msg_length) + header_len = 10 + + # https://datatracker.ietf.org/doc/html/rfc6455#section-5.3 + # If we are using a mask, we need to generate it randomly + # and apply it to the message before sending it. A mask is + # a 32-bit value that is applied to the message using a + # bitwise XOR operation. It is used to prevent certain types + # of attacks on the websocket protocol. The mask is only used + # when aiohttp is acting as a client. Servers do not use a mask. + if use_mask: + mask = PACK_RANDBITS(self.get_random_bits()) + message = bytearray(message) + websocket_mask(mask, message) + self._write(header + mask + message) + self._output_size += header_len + MASK_LEN + msg_length + + else: + if msg_length > MSG_SIZE: + self._write(header) + self._write(message) + else: + self._write(header + message) + + self._output_size += header_len + msg_length + + # It is safe to return control to the event loop when using compression + # after this point as we have already sent or buffered all the data. + + # Once we have written output_size up to the limit, we call the + # drain helper which waits for the transport to be ready to accept + # more data. This is a flow control mechanism to prevent the buffer + # from growing too large. The drain helper will return right away + # if the writer is not paused. + if self._output_size > self._limit: + self._output_size = 0 + await self.protocol._drain_helper() + + def _make_compress_obj(self, compress: int) -> ZLibCompressor: + return ZLibCompressor( + level=zlib.Z_BEST_SPEED, + wbits=-compress, + max_sync_chunk_size=WEBSOCKET_MAX_SYNC_CHUNK_SIZE, + ) + + def _write(self, data: bytes) -> None: + if self.transport.is_closing(): + raise ClientConnectionResetError("Cannot write to closing transport") + self.transport.write(data) + + async def pong(self, message: bytes = b"") -> None: + """Send pong message.""" + await self.send_frame(message, WSMsgType.PONG) + + async def ping(self, message: bytes = b"") -> None: + """Send ping message.""" + await self.send_frame(message, WSMsgType.PING) + + async def close(self, code: int = 1000, message: Union[bytes, str] = b"") -> None: + """Close the websocket, sending the specified code and message.""" + if isinstance(message, str): + message = message.encode("utf-8") + try: + await self.send_frame( + PACK_CLOSE_CODE(code) + message, opcode=WSMsgType.CLOSE + ) + finally: + self._closing = True From 49150fec2c16df86a417701e242f010280ccdc17 Mon Sep 17 00:00:00 2001 From: "J. Nick Koston" <nick@koston.org> Date: Sat, 26 Oct 2024 09:36:42 -1000 Subject: [PATCH 03/59] Seperate http_websocket into multiple files There are not functional changes here. The goal is to be able to able to make it easier to build a Cython implementation for the WebsocketReader --- aiohttp/_http_websocket_helpers.py | 2 +- aiohttp/_http_websocket_models.py | 2 +- aiohttp/_http_websocket_reader.py | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/aiohttp/_http_websocket_helpers.py b/aiohttp/_http_websocket_helpers.py index 252144dead3..c5839c526ba 100644 --- a/aiohttp/_http_websocket_helpers.py +++ b/aiohttp/_http_websocket_helpers.py @@ -1,4 +1,4 @@ -"""WebSocket protocol versions 13 and 8.""" +"""Helpers for WebSocket protocol versions 13 and 8.""" import functools import re diff --git a/aiohttp/_http_websocket_models.py b/aiohttp/_http_websocket_models.py index 8e80aa057f1..5c2db09c32a 100644 --- a/aiohttp/_http_websocket_models.py +++ b/aiohttp/_http_websocket_models.py @@ -1,4 +1,4 @@ -"""WebSocket protocol versions 13 and 8.""" +"""Models for WebSocket protocol versions 13 and 8.""" import json from enum import IntEnum diff --git a/aiohttp/_http_websocket_reader.py b/aiohttp/_http_websocket_reader.py index bc92c8fdf8b..d0dcd145207 100644 --- a/aiohttp/_http_websocket_reader.py +++ b/aiohttp/_http_websocket_reader.py @@ -1,4 +1,4 @@ -"""WebSocket protocol versions 13 and 8.""" +"""Reader for WebSocket protocol versions 13 and 8.""" from enum import IntEnum from typing import Final, List, Optional, Set, Tuple From 3121b6df15488aa701e95f0757bd82e8ebd7cf38 Mon Sep 17 00:00:00 2001 From: "J. Nick Koston" <nick@koston.org> Date: Sat, 26 Oct 2024 09:38:08 -1000 Subject: [PATCH 04/59] Seperate http_websocket into multiple files There are not functional changes here. The goal is to be able to able to make it easier to build a Cython implementation for the WebsocketReader --- .../{_http_websocket_helpers.py => _websocket_helpers.py} | 2 +- .../{_http_websocket_models.py => _websocket_models.py} | 0 .../{_http_websocket_reader.py => _websocket_reader.py} | 4 ++-- .../{_http_websocket_writer.py => _websocket_writer.py} | 4 ++-- aiohttp/http_websocket.py | 8 ++++---- 5 files changed, 9 insertions(+), 9 deletions(-) rename aiohttp/{_http_websocket_helpers.py => _websocket_helpers.py} (98%) rename aiohttp/{_http_websocket_models.py => _websocket_models.py} (100%) rename aiohttp/{_http_websocket_reader.py => _websocket_reader.py} (99%) rename aiohttp/{_http_websocket_writer.py => _websocket_writer.py} (98%) diff --git a/aiohttp/_http_websocket_helpers.py b/aiohttp/_websocket_helpers.py similarity index 98% rename from aiohttp/_http_websocket_helpers.py rename to aiohttp/_websocket_helpers.py index c5839c526ba..0ae98c2dbb3 100644 --- a/aiohttp/_http_websocket_helpers.py +++ b/aiohttp/_websocket_helpers.py @@ -5,7 +5,7 @@ from struct import Struct from typing import TYPE_CHECKING, Final, List, Optional, Pattern, Tuple -from ._http_websocket_models import WSHandshakeError +from ._websocket_models import WSHandshakeError from .helpers import NO_EXTENSIONS UNPACK_LEN2 = Struct("!H").unpack_from diff --git a/aiohttp/_http_websocket_models.py b/aiohttp/_websocket_models.py similarity index 100% rename from aiohttp/_http_websocket_models.py rename to aiohttp/_websocket_models.py diff --git a/aiohttp/_http_websocket_reader.py b/aiohttp/_websocket_reader.py similarity index 99% rename from aiohttp/_http_websocket_reader.py rename to aiohttp/_websocket_reader.py index d0dcd145207..da082b4da16 100644 --- a/aiohttp/_http_websocket_reader.py +++ b/aiohttp/_websocket_reader.py @@ -3,14 +3,14 @@ from enum import IntEnum from typing import Final, List, Optional, Set, Tuple -from ._http_websocket_helpers import ( +from ._websocket_helpers import ( UNPACK_CLOSE_CODE, UNPACK_LEN2, UNPACK_LEN3, WSCloseCode, websocket_mask, ) -from ._http_websocket_models import ( +from ._websocket_models import ( WS_DEFLATE_TRAILING, WebSocketError, WSMessage, diff --git a/aiohttp/_http_websocket_writer.py b/aiohttp/_websocket_writer.py similarity index 98% rename from aiohttp/_http_websocket_writer.py rename to aiohttp/_websocket_writer.py index 88ea4e35f21..70f879eba35 100644 --- a/aiohttp/_http_websocket_writer.py +++ b/aiohttp/_websocket_writer.py @@ -6,7 +6,7 @@ from functools import partial from typing import Any, Final, Optional, Union -from ._http_websocket_helpers import ( +from ._websocket_helpers import ( MASK_LEN, MSG_SIZE, PACK_CLOSE_CODE, @@ -16,7 +16,7 @@ PACK_RANDBITS, websocket_mask, ) -from ._http_websocket_models import ( +from ._websocket_models import ( WEBSOCKET_MAX_SYNC_CHUNK_SIZE, WS_DEFLATE_TRAILING, WSMsgType, diff --git a/aiohttp/http_websocket.py b/aiohttp/http_websocket.py index 100f77451ae..c66d536f9c3 100644 --- a/aiohttp/http_websocket.py +++ b/aiohttp/http_websocket.py @@ -1,7 +1,7 @@ """WebSocket protocol versions 13 and 8.""" -from ._http_websocket_helpers import WS_KEY -from ._http_websocket_models import ( +from ._websocket_helpers import WS_KEY +from ._websocket_models import ( WS_CLOSED_MESSAGE, WS_CLOSING_MESSAGE, WebSocketError, @@ -9,8 +9,8 @@ WSMessage, WSMsgType, ) -from ._http_websocket_reader import WebSocketReader -from ._http_websocket_writer import WebSocketWriter +from ._websocket_reader import WebSocketReader +from ._websocket_writer import WebSocketWriter __all__ = ( "WS_CLOSED_MESSAGE", From fc598bc7a9ebf0d1ce6ae647f50fa1affe0f5d30 Mon Sep 17 00:00:00 2001 From: "J. Nick Koston" <nick@koston.org> Date: Sat, 26 Oct 2024 09:42:39 -1000 Subject: [PATCH 05/59] fix location --- aiohttp/_websocket_reader.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/aiohttp/_websocket_reader.py b/aiohttp/_websocket_reader.py index da082b4da16..e0171126cab 100644 --- a/aiohttp/_websocket_reader.py +++ b/aiohttp/_websocket_reader.py @@ -7,12 +7,12 @@ UNPACK_CLOSE_CODE, UNPACK_LEN2, UNPACK_LEN3, - WSCloseCode, websocket_mask, ) from ._websocket_models import ( WS_DEFLATE_TRAILING, WebSocketError, + WSCloseCode, WSMessage, WSMessageBinary, WSMessageClose, From ae26d085cea4806842bf4cb51a43bd47444be57e Mon Sep 17 00:00:00 2001 From: "J. Nick Koston" <nick@koston.org> Date: Sat, 26 Oct 2024 09:50:56 -1000 Subject: [PATCH 06/59] more cleanups --- aiohttp/_websocket_models.py | 10 ---------- aiohttp/_websocket_writer.py | 15 ++++++++++----- aiohttp/http_websocket.py | 8 +++++++- tests/test_websocket_parser.py | 30 ++++++++++++++++-------------- tests/test_websocket_writer.py | 2 +- 5 files changed, 34 insertions(+), 31 deletions(-) diff --git a/aiohttp/_websocket_models.py b/aiohttp/_websocket_models.py index 5c2db09c32a..14a4d52c638 100644 --- a/aiohttp/_websocket_models.py +++ b/aiohttp/_websocket_models.py @@ -23,16 +23,6 @@ class WSCloseCode(IntEnum): BAD_GATEWAY = 1014 -# For websockets, keeping latency low is extremely important as implementations -# generally expect to be able to send and receive messages quickly. We use a -# larger chunk size than the default to reduce the number of executor calls -# since the executor is a significant source of latency and overhead when -# the chunks are small. A size of 5KiB was chosen because it is also the -# same value python-zlib-ng choose to use as the threshold to release the GIL. - -WEBSOCKET_MAX_SYNC_CHUNK_SIZE = 5 * 1024 - - class WSMsgType(IntEnum): # websocket spec types CONTINUATION = 0x0 diff --git a/aiohttp/_websocket_writer.py b/aiohttp/_websocket_writer.py index 70f879eba35..eb356eda938 100644 --- a/aiohttp/_websocket_writer.py +++ b/aiohttp/_websocket_writer.py @@ -16,17 +16,22 @@ PACK_RANDBITS, websocket_mask, ) -from ._websocket_models import ( - WEBSOCKET_MAX_SYNC_CHUNK_SIZE, - WS_DEFLATE_TRAILING, - WSMsgType, -) +from ._websocket_models import WS_DEFLATE_TRAILING, WSMsgType from .base_protocol import BaseProtocol from .client_exceptions import ClientConnectionResetError from .compression_utils import ZLibCompressor DEFAULT_LIMIT: Final[int] = 2**16 +# For websockets, keeping latency low is extremely important as implementations +# generally expect to be able to send and receive messages quickly. We use a +# larger chunk size than the default to reduce the number of executor calls +# since the executor is a significant source of latency and overhead when +# the chunks are small. A size of 5KiB was chosen because it is also the +# same value python-zlib-ng choose to use as the threshold to release the GIL. + +WEBSOCKET_MAX_SYNC_CHUNK_SIZE = 5 * 1024 + class WebSocketWriter: def __init__( diff --git a/aiohttp/http_websocket.py b/aiohttp/http_websocket.py index c66d536f9c3..353e4f01e78 100644 --- a/aiohttp/http_websocket.py +++ b/aiohttp/http_websocket.py @@ -1,12 +1,14 @@ """WebSocket protocol versions 13 and 8.""" -from ._websocket_helpers import WS_KEY +from ._websocket_helpers import WS_KEY, ws_ext_gen, ws_ext_parse from ._websocket_models import ( WS_CLOSED_MESSAGE, WS_CLOSING_MESSAGE, WebSocketError, WSCloseCode, + WSHandshakeError, WSMessage, + WSMessageError, WSMsgType, ) from ._websocket_reader import WebSocketReader @@ -22,4 +24,8 @@ "WebSocketError", "WSMsgType", "WSCloseCode", + "ws_ext_gen", + "ws_ext_parse", + "WSMessageError", + "WSHandshakeError", ) diff --git a/tests/test_websocket_parser.py b/tests/test_websocket_parser.py index 79aa9196157..956988a5e2a 100644 --- a/tests/test_websocket_parser.py +++ b/tests/test_websocket_parser.py @@ -8,22 +8,24 @@ import pytest import aiohttp -from aiohttp import http_websocket -from aiohttp.http import WebSocketError, WSCloseCode, WSMessage, WSMsgType -from aiohttp.http_websocket import ( - _WS_DEFLATE_TRAILING, +from aiohttp import _websocket_helpers +from aiohttp._websocket_helpers import ( PACK_CLOSE_CODE, PACK_LEN1, PACK_LEN2, PACK_LEN3, - WebSocketReader, + websocket_mask, +) +from aiohttp._websocket_models import ( + WS_DEFLATE_TRAILING, WSMessageBinary, WSMessageClose, WSMessagePing, WSMessagePong, WSMessageText, - _websocket_mask, ) +from aiohttp.http import WebSocketError, WSCloseCode, WSMessage, WSMsgType +from aiohttp.http_websocket import WebSocketReader def build_frame( @@ -39,7 +41,7 @@ def build_frame( compressobj = zlib.compressobj(wbits=-9) message = compressobj.compress(message) message = message + compressobj.flush(zlib.Z_SYNC_FLUSH) - if message.endswith(_WS_DEFLATE_TRAILING): + if message.endswith(WS_DEFLATE_TRAILING): message = message[:-4] msg_length = len(message) if use_mask: # pragma: no cover @@ -66,7 +68,7 @@ def build_frame( maski = random.randrange(0, 0xFFFFFFFF) mask = maski.to_bytes(4, "big") message = bytearray(message) - _websocket_mask(mask, message) + websocket_mask(mask, message) if noheader: return message else: @@ -443,31 +445,31 @@ def test_continuation_with_close_empty( def test_websocket_mask_python() -> None: message = bytearray(websocket_mask_data) - http_websocket._websocket_mask_python(websocket_mask_mask, message) + _websocket_helpers._websocket_mask_python(websocket_mask_mask, message) assert message == websocket_mask_masked @pytest.mark.skipif( - not hasattr(http_websocket, "_websocket_mask_cython"), reason="Requires Cython" + not hasattr(_websocket_helpers, "_websocket_mask_cython"), reason="Requires Cython" ) def test_websocket_mask_cython() -> None: message = bytearray(websocket_mask_data) - http_websocket._websocket_mask_cython(websocket_mask_mask, message) # type: ignore[attr-defined] + _websocket_helpers._websocket_mask_cython(websocket_mask_mask, message) # type: ignore[attr-defined] assert message == websocket_mask_masked def test_websocket_mask_python_empty() -> None: message = bytearray() - http_websocket._websocket_mask_python(websocket_mask_mask, message) + _websocket_helpers._websocket_mask_python(websocket_mask_mask, message) assert message == bytearray() @pytest.mark.skipif( - not hasattr(http_websocket, "_websocket_mask_cython"), reason="Requires Cython" + not hasattr(_websocket_helpers, "_websocket_mask_cython"), reason="Requires Cython" ) def test_websocket_mask_cython_empty() -> None: message = bytearray() - http_websocket._websocket_mask_cython(websocket_mask_mask, message) # type: ignore[attr-defined] + _websocket_helpers._websocket_mask_cython(websocket_mask_mask, message) # type: ignore[attr-defined] assert message == bytearray() diff --git a/tests/test_websocket_writer.py b/tests/test_websocket_writer.py index ba1229f107b..96ef8a87dd2 100644 --- a/tests/test_websocket_writer.py +++ b/tests/test_websocket_writer.py @@ -150,7 +150,7 @@ async def test_concurrent_messages( and in the executor """ with mock.patch( - "aiohttp.http_websocket.WEBSOCKET_MAX_SYNC_CHUNK_SIZE", max_sync_chunk_size + "aiohttp._websocket_writer.WEBSOCKET_MAX_SYNC_CHUNK_SIZE", max_sync_chunk_size ): writer = WebSocketWriter(protocol, transport, compress=15) queue: DataQueue[WSMessage] = DataQueue(asyncio.get_running_loop()) From dcc924e0f846b42c4d5ead14192bd6c68de83f03 Mon Sep 17 00:00:00 2001 From: "J. Nick Koston" <nick@koston.org> Date: Sat, 26 Oct 2024 09:53:32 -1000 Subject: [PATCH 07/59] keep compat --- aiohttp/http_websocket.py | 16 ++++++++++++++++ tests/test_websocket_parser.py | 8 ++++---- 2 files changed, 20 insertions(+), 4 deletions(-) diff --git a/aiohttp/http_websocket.py b/aiohttp/http_websocket.py index 353e4f01e78..cafe4eeeca2 100644 --- a/aiohttp/http_websocket.py +++ b/aiohttp/http_websocket.py @@ -8,7 +8,15 @@ WSCloseCode, WSHandshakeError, WSMessage, + WSMessageBinary, + WSMessageClose, + WSMessageClosed, + WSMessageClosing, + WSMessageContinuation, WSMessageError, + WSMessagePing, + WSMessagePong, + WSMessageText, WSMsgType, ) from ._websocket_reader import WebSocketReader @@ -28,4 +36,12 @@ "ws_ext_parse", "WSMessageError", "WSHandshakeError", + "WSMessageClose", + "WSMessageClosed", + "WSMessageClosing", + "WSMessagePong", + "WSMessageBinary", + "WSMessageText", + "WSMessagePing", + "WSMessageContinuation", ) diff --git a/tests/test_websocket_parser.py b/tests/test_websocket_parser.py index 956988a5e2a..16ba7507532 100644 --- a/tests/test_websocket_parser.py +++ b/tests/test_websocket_parser.py @@ -16,16 +16,16 @@ PACK_LEN3, websocket_mask, ) -from aiohttp._websocket_models import ( - WS_DEFLATE_TRAILING, +from aiohttp._websocket_models import WS_DEFLATE_TRAILING +from aiohttp.http import WebSocketError, WSCloseCode, WSMessage, WSMsgType +from aiohttp.http_websocket import ( + WebSocketReader, WSMessageBinary, WSMessageClose, WSMessagePing, WSMessagePong, WSMessageText, ) -from aiohttp.http import WebSocketError, WSCloseCode, WSMessage, WSMsgType -from aiohttp.http_websocket import WebSocketReader def build_frame( From 18752e8cb4115f29d350ef42be2607cbbe052be0 Mon Sep 17 00:00:00 2001 From: "J. Nick Koston" <nick@koston.org> Date: Sat, 26 Oct 2024 10:13:18 -1000 Subject: [PATCH 08/59] Add pxd file for _websocket_reader --- aiohttp/_websocket_reader.pxd | 40 +++++++++++++++++++++++++++++++++++ aiohttp/_websocket_reader.py | 35 +++++++++++++----------------- 2 files changed, 55 insertions(+), 20 deletions(-) create mode 100644 aiohttp/_websocket_reader.pxd diff --git a/aiohttp/_websocket_reader.pxd b/aiohttp/_websocket_reader.pxd new file mode 100644 index 00000000000..bd0fdf2ac62 --- /dev/null +++ b/aiohttp/_websocket_reader.pxd @@ -0,0 +1,40 @@ + +cdef unsigned int READ_HEADER +cdef unsigned int READ_PAYLOAD_LENGTH +cdef unsigned int READ_PAYLOAD_MASK +cdef unsigned int READ_PAYLOAD + +cdef class WebSocketReader: + + cdef object queue + cdef unsigned int _max_msg_size + + cdef BaseException _exc + cdef bytearray _partial + cdef unsigned int _state + + cdef object _opcode + cdef bint _frame_fin + cdef object _frame_opcode + cdef bytearray _frame_payload + + cdef bytes _tail + cdef bint _has_mask + cdef bytes _frame_mask + cdef unsigned int _payload_length + cdef unsigned int _payload_length_flag + cdef object _compressed + cdef object _decompressobj + cdef bint _compress + + @cython.locals( + start_pos=unsigned int, + buf_len=unsigned int, + length=unsigned int, + chunk_size=unsigned int, + data=bytes, + payload=bytearray, + first_byte=char + second_byte=char + ) + cpdef parse_frame(self, bytes buf) diff --git a/aiohttp/_websocket_reader.py b/aiohttp/_websocket_reader.py index e0171126cab..5b4c85b2d83 100644 --- a/aiohttp/_websocket_reader.py +++ b/aiohttp/_websocket_reader.py @@ -1,6 +1,5 @@ """Reader for WebSocket protocol versions 13 and 8.""" -from enum import IntEnum from typing import Final, List, Optional, Set, Tuple from ._websocket_helpers import ( @@ -36,11 +35,10 @@ ALLOWED_CLOSE_CODES: Final[Set[int]] = {int(i) for i in WSCloseCode} -class WSParserState(IntEnum): - READ_HEADER = 1 - READ_PAYLOAD_LENGTH = 2 - READ_PAYLOAD_MASK = 3 - READ_PAYLOAD = 4 +READ_HEADER = 1 +READ_PAYLOAD_LENGTH = 2 +READ_PAYLOAD_MASK = 3 +READ_PAYLOAD = 4 class WebSocketReader: @@ -52,7 +50,7 @@ def __init__( self._exc: Optional[BaseException] = None self._partial = bytearray() - self._state = WSParserState.READ_HEADER + self._state = READ_HEADER self._opcode: Optional[int] = None self._frame_fin = False @@ -228,12 +226,13 @@ def parse_frame( while True: # read header - if self._state is WSParserState.READ_HEADER: + if self._state == READ_HEADER: if buf_length - start_pos < 2: break data = buf[start_pos : start_pos + 2] start_pos += 2 - first_byte, second_byte = data + first_byte = data[0] + second_byte = data[1] fin = (first_byte >> 7) & 1 rsv1 = (first_byte >> 6) & 1 @@ -289,10 +288,10 @@ def parse_frame( self._frame_opcode = opcode self._has_mask = bool(has_mask) self._payload_length_flag = length - self._state = WSParserState.READ_PAYLOAD_LENGTH + self._state = READ_PAYLOAD_LENGTH # read payload length - if self._state is WSParserState.READ_PAYLOAD_LENGTH: + if self._state == READ_PAYLOAD_LENGTH: length_flag = self._payload_length_flag if length_flag == 126: if buf_length - start_pos < 2: @@ -309,21 +308,17 @@ def parse_frame( else: self._payload_length = length_flag - self._state = ( - WSParserState.READ_PAYLOAD_MASK - if self._has_mask - else WSParserState.READ_PAYLOAD - ) + self._state = READ_PAYLOAD_MASK if self._has_mask else READ_PAYLOAD # read payload mask - if self._state is WSParserState.READ_PAYLOAD_MASK: + if self._state == READ_PAYLOAD_MASK: if buf_length - start_pos < 4: break self._frame_mask = buf[start_pos : start_pos + 4] start_pos += 4 - self._state = WSParserState.READ_PAYLOAD + self._state = READ_PAYLOAD - if self._state is WSParserState.READ_PAYLOAD: + if self._state == READ_PAYLOAD: length = self._payload_length payload = self._frame_payload @@ -348,7 +343,7 @@ def parse_frame( (self._frame_fin, self._frame_opcode, payload, self._compressed) ) self._frame_payload = bytearray() - self._state = WSParserState.READ_HEADER + self._state = READ_HEADER self._tail = buf[start_pos:] From 05696fb5ef8da93aed979b267a7b5b8c304a4e74 Mon Sep 17 00:00:00 2001 From: "J. Nick Koston" <nick@koston.org> Date: Sat, 26 Oct 2024 10:24:21 -1000 Subject: [PATCH 09/59] Test building from pxd --- Makefile | 7 ++++++- aiohttp/_websocket_reader.pxd | 12 ++++++------ setup.py | 1 + 3 files changed, 13 insertions(+), 7 deletions(-) diff --git a/Makefile b/Makefile index 78538bea09b..d7662c494d6 100644 --- a/Makefile +++ b/Makefile @@ -4,6 +4,7 @@ to-hash-one = $(dir $1).hash/$(addsuffix .hash,$(notdir $1)) to-hash = $(foreach fname,$1,$(call to-hash-one,$(fname))) CYS := $(wildcard aiohttp/*.pyx) $(wildcard aiohttp/*.pyi) $(wildcard aiohttp/*.pxd) +PY_WITH_PXD := aiohttp/_websocket_reader.py PYXS := $(wildcard aiohttp/*.pyx) CS := $(wildcard aiohttp/*.c) PYS := $(wildcard aiohttp/*.py) @@ -56,6 +57,9 @@ endif aiohttp/_find_header.c: $(call to-hash,aiohttp/hdrs.py ./tools/gen.py) ./tools/gen.py +aiohttp/_websocket_reader.c: aiohttp/_websocket_reader.py + cython -3 -o $@ $< -I aiohttp -Werror + # _find_headers generator creates _headers.pyi as well aiohttp/%.c: aiohttp/%.pyx $(call to-hash,$(CYS)) aiohttp/_find_header.c cython -3 -o $@ $< -I aiohttp -Werror @@ -71,7 +75,7 @@ vendor/llhttp/node_modules: vendor/llhttp/package.json generate-llhttp: .llhttp-gen .PHONY: cythonize -cythonize: .install-cython $(PYXS:.pyx=.c) +cythonize: .install-cython $(PYXS:.pyx=.c) $(PY_WITH_PXD:.py=.c) .install-deps: .install-cython $(PYXS:.pyx=.c) $(call to-hash,$(CYS) $(REQS)) @python -m pip install -r requirements/dev.in -c requirements/dev.txt @@ -154,6 +158,7 @@ clean: @rm -f aiohttp/_http_parser.c @rm -f aiohttp/_http_writer.c @rm -f aiohttp/_websocket.c + @rm -f aiohttp/_websocket_reader.c @rm -rf .tox @rm -f .develop @rm -f .flake diff --git a/aiohttp/_websocket_reader.pxd b/aiohttp/_websocket_reader.pxd index bd0fdf2ac62..5a0b937cd43 100644 --- a/aiohttp/_websocket_reader.pxd +++ b/aiohttp/_websocket_reader.pxd @@ -9,7 +9,7 @@ cdef class WebSocketReader: cdef object queue cdef unsigned int _max_msg_size - cdef BaseException _exc + cdef object _exc cdef bytearray _partial cdef unsigned int _state @@ -28,13 +28,13 @@ cdef class WebSocketReader: cdef bint _compress @cython.locals( - start_pos=unsigned int, - buf_len=unsigned int, - length=unsigned int, - chunk_size=unsigned int, + start_pos="unsigned int", + buf_len="unsigned int", + length="unsigned int", + chunk_size="unsigned int", data=bytes, payload=bytearray, - first_byte=char + first_byte=char, second_byte=char ) cpdef parse_frame(self, bytes buf) diff --git a/setup.py b/setup.py index cc66fe214ca..9f58adbdf27 100644 --- a/setup.py +++ b/setup.py @@ -41,6 +41,7 @@ include_dirs=["vendor/llhttp/build"], ), Extension("aiohttp._http_writer", ["aiohttp/_http_writer.c"]), + Extension("aiohttp._websocket_reader", ["aiohttp/_websocket_reader.c"]), ] From 975e80b3d4d611248759f4ef4fb7e33d535122d6 Mon Sep 17 00:00:00 2001 From: "J. Nick Koston" <nick@koston.org> Date: Sat, 26 Oct 2024 10:26:56 -1000 Subject: [PATCH 10/59] subclass to allow patching --- tests/test_websocket_parser.py | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/tests/test_websocket_parser.py b/tests/test_websocket_parser.py index 16ba7507532..7b1a6423391 100644 --- a/tests/test_websocket_parser.py +++ b/tests/test_websocket_parser.py @@ -28,6 +28,10 @@ ) +class PatchableWebsocketReader(WebSocketReader): + """WebSocketReader subclass that allows for patching parse_frame.""" + + def build_frame( message: bytes, opcode: int, @@ -97,8 +101,8 @@ def out(loop: asyncio.AbstractEventLoop) -> aiohttp.DataQueue[WSMessage]: @pytest.fixture() -def parser(out: aiohttp.DataQueue[WSMessage]) -> WebSocketReader: - return WebSocketReader(out, 4 * 1024 * 1024) +def parser(out: aiohttp.DataQueue[WSMessage]) -> PatchableWebsocketReader: + return PatchableWebsocketReader(out, 4 * 1024 * 1024) def test_parse_frame(parser: WebSocketReader) -> None: From 9164179aaeae30bbbd6ee283906564698cb6714a Mon Sep 17 00:00:00 2001 From: "J. Nick Koston" <nick@koston.org> Date: Sat, 26 Oct 2024 10:39:25 -1000 Subject: [PATCH 11/59] more cleanups --- aiohttp/_websocket_reader.pxd | 21 +++++++++++++++++++++ 1 file changed, 21 insertions(+) diff --git a/aiohttp/_websocket_reader.pxd b/aiohttp/_websocket_reader.pxd index 5a0b937cd43..42543d9aa17 100644 --- a/aiohttp/_websocket_reader.pxd +++ b/aiohttp/_websocket_reader.pxd @@ -1,9 +1,19 @@ +import cython + cdef unsigned int READ_HEADER cdef unsigned int READ_PAYLOAD_LENGTH cdef unsigned int READ_PAYLOAD_MASK cdef unsigned int READ_PAYLOAD +cdef object UNPACK_LEN2 +cdef object UNPACK_LEN3 + +cdef object WSMsgType + +cdef object WSMessageText +cdef object WSMessageBinary + cdef class WebSocketReader: cdef object queue @@ -27,11 +37,22 @@ cdef class WebSocketReader: cdef object _decompressobj cdef bint _compress + cpdef feed_data(self, bytes data) + + @cython.locals( + is_continuation=bint, + fin=bint, + has_partial=bint, + payload_merged=bytes + ) + cpdef _feed_data(self, bytes data) + @cython.locals( start_pos="unsigned int", buf_len="unsigned int", length="unsigned int", chunk_size="unsigned int", + buf_length="unsigned int", data=bytes, payload=bytearray, first_byte=char, From 106adcb029478312f0906eec477709e44967e011 Mon Sep 17 00:00:00 2001 From: "J. Nick Koston" <nick@koston.org> Date: Sat, 26 Oct 2024 10:40:17 -1000 Subject: [PATCH 12/59] more types --- aiohttp/_websocket_reader.pxd | 3 +++ 1 file changed, 3 insertions(+) diff --git a/aiohttp/_websocket_reader.pxd b/aiohttp/_websocket_reader.pxd index 42543d9aa17..04cef83fe38 100644 --- a/aiohttp/_websocket_reader.pxd +++ b/aiohttp/_websocket_reader.pxd @@ -14,6 +14,9 @@ cdef object WSMsgType cdef object WSMessageText cdef object WSMessageBinary +cdef set ALLOWED_CLOSE_CODES +cdef set MESSAGE_TYPES_WITH_CONTENT + cdef class WebSocketReader: cdef object queue From 4717a06d52e500736b7bc0a0e60e5136fa893377 Mon Sep 17 00:00:00 2001 From: "J. Nick Koston" <nick@koston.org> Date: Sat, 26 Oct 2024 10:43:03 -1000 Subject: [PATCH 13/59] more types --- aiohttp/_websocket_reader.py | 13 +++++-------- 1 file changed, 5 insertions(+), 8 deletions(-) diff --git a/aiohttp/_websocket_reader.py b/aiohttp/_websocket_reader.py index 5b4c85b2d83..43e9e55a610 100644 --- a/aiohttp/_websocket_reader.py +++ b/aiohttp/_websocket_reader.py @@ -24,14 +24,11 @@ from .helpers import set_exception from .streams import DataQueue -MESSAGE_TYPES_WITH_CONTENT: Final = frozenset( - { - WSMsgType.BINARY, - WSMsgType.TEXT, - WSMsgType.CONTINUATION, - } -) - +MESSAGE_TYPES_WITH_CONTENT: Final[Set[WSMsgType]] = { + WSMsgType.BINARY, + WSMsgType.TEXT, + WSMsgType.CONTINUATION, +} ALLOWED_CLOSE_CODES: Final[Set[int]] = {int(i) for i in WSCloseCode} From 68acafc05a9b961610eae00a35a5e88f2ae3294b Mon Sep 17 00:00:00 2001 From: "J. Nick Koston" <nick@koston.org> Date: Sat, 26 Oct 2024 10:55:06 -1000 Subject: [PATCH 14/59] type cleanups --- aiohttp/_websocket_reader.pxd | 15 ++++++++++++++- aiohttp/_websocket_reader.py | 34 +++++++++++++++++++++------------- 2 files changed, 35 insertions(+), 14 deletions(-) diff --git a/aiohttp/_websocket_reader.pxd b/aiohttp/_websocket_reader.pxd index 04cef83fe38..82c1fca8dc8 100644 --- a/aiohttp/_websocket_reader.pxd +++ b/aiohttp/_websocket_reader.pxd @@ -6,13 +6,24 @@ cdef unsigned int READ_PAYLOAD_LENGTH cdef unsigned int READ_PAYLOAD_MASK cdef unsigned int READ_PAYLOAD +cdef unsigned int OP_CODE_CONTINUATION +cdef unsigned int OP_CODE_TEXT +cdef unsigned int OP_CODE_BINARY +cdef unsigned int OP_CODE_CLOSE +cdef unsigned int OP_CODE_PING +cdef unsigned int OP_CODE_PONG + cdef object UNPACK_LEN2 cdef object UNPACK_LEN3 +cdef object UNPACK_CLOSE_CODE cdef object WSMsgType cdef object WSMessageText cdef object WSMessageBinary +cdef object WSMessagePing +cdef object WSMessagePong +cdef object WSMessageClose cdef set ALLOWED_CLOSE_CODES cdef set MESSAGE_TYPES_WITH_CONTENT @@ -46,7 +57,8 @@ cdef class WebSocketReader: is_continuation=bint, fin=bint, has_partial=bint, - payload_merged=bytes + payload_merged=bytes, + opcode="unsigned int", ) cpdef _feed_data(self, bytes data) @@ -55,6 +67,7 @@ cdef class WebSocketReader: buf_len="unsigned int", length="unsigned int", chunk_size="unsigned int", + chunk_len="unsigned int", buf_length="unsigned int", data=bytes, payload=bytearray, diff --git a/aiohttp/_websocket_reader.py b/aiohttp/_websocket_reader.py index 43e9e55a610..41eccf6e852 100644 --- a/aiohttp/_websocket_reader.py +++ b/aiohttp/_websocket_reader.py @@ -24,19 +24,22 @@ from .helpers import set_exception from .streams import DataQueue -MESSAGE_TYPES_WITH_CONTENT: Final[Set[WSMsgType]] = { - WSMsgType.BINARY, - WSMsgType.TEXT, - WSMsgType.CONTINUATION, -} ALLOWED_CLOSE_CODES: Final[Set[int]] = {int(i) for i in WSCloseCode} - READ_HEADER = 1 READ_PAYLOAD_LENGTH = 2 READ_PAYLOAD_MASK = 3 READ_PAYLOAD = 4 +# WSMsgType values unpacked so they can by +# cythonized to unsigned int +OP_CODE_CONTINUATION = WSMsgType.CONTINUATION.value +OP_CODE_TEXT = WSMsgType.TEXT.value +OP_CODE_BINARY = WSMsgType.BINARY.value +OP_CODE_CLOSE = WSMsgType.CLOSE.value +OP_CODE_PING = WSMsgType.PING.value +OP_CODE_PONG = WSMsgType.PONG.value + class WebSocketReader: def __init__( @@ -81,10 +84,15 @@ def feed_data(self, data: bytes) -> Tuple[bool, bytes]: def _feed_data(self, data: bytes) -> None: msg: WSMessage - for fin, opcode, payload, compressed in self.parse_frame(data): - if opcode in MESSAGE_TYPES_WITH_CONTENT: + for frame in self.parse_frame(data): + fin = frame[0] + opcode = frame[1] + payload = frame[2] + compressed = frame[3] + + is_continuation = opcode == OP_CODE_CONTINUATION + if opcode == OP_CODE_TEXT or opcode == OP_CODE_BINARY or is_continuation: # load text/binary - is_continuation = opcode == WSMsgType.CONTINUATION if not fin: # got partial frame payload if not is_continuation: @@ -152,7 +160,7 @@ def _feed_data(self, data: bytes) -> None: else: payload_merged = bytes(assembled_payload) - if opcode == WSMsgType.TEXT: + if opcode == OP_CODE_TEXT: try: text = payload_merged.decode("utf-8") except UnicodeDecodeError as exc: @@ -172,7 +180,7 @@ def _feed_data(self, data: bytes) -> None: WSMessageBinary, (payload_merged, "", WSMsgType.BINARY) ) self.queue.feed_data(msg) - elif opcode == WSMsgType.CLOSE: + elif opcode == OP_CODE_CLOSE: if len(payload) >= 2: close_code = UNPACK_CLOSE_CODE(payload[:2])[0] if close_code < 3000 and close_code not in ALLOWED_CLOSE_CODES: @@ -197,11 +205,11 @@ def _feed_data(self, data: bytes) -> None: self.queue.feed_data(msg) - elif opcode == WSMsgType.PING: + elif opcode == OP_CODE_PING: msg = WSMessagePing(data=payload, extra="") self.queue.feed_data(msg) - elif opcode == WSMsgType.PONG: + elif opcode == OP_CODE_PONG: msg = WSMessagePong(data=payload, extra="") self.queue.feed_data(msg) From 133b50c89a62bf68a5b5e20837479e2119078b57 Mon Sep 17 00:00:00 2001 From: "J. Nick Koston" <nick@koston.org> Date: Sat, 26 Oct 2024 11:09:27 -1000 Subject: [PATCH 15/59] changelog --- CHANGES/9542.packaging.rst | 1 + 1 file changed, 1 insertion(+) create mode 100644 CHANGES/9542.packaging.rst diff --git a/CHANGES/9542.packaging.rst b/CHANGES/9542.packaging.rst new file mode 100644 index 00000000000..c77b962994f --- /dev/null +++ b/CHANGES/9542.packaging.rst @@ -0,0 +1 @@ +Separated ``aiohttp.http_websocket`` into multiple files to make it easier to maintain -- by :user:`bdraco`. From 0fc268e606e531af0120cde7cf9804edcdbd8000 Mon Sep 17 00:00:00 2001 From: "J. Nick Koston" <nick@koston.org> Date: Sun, 27 Oct 2024 08:13:39 -1000 Subject: [PATCH 16/59] Relocate to _websocket --- Makefile | 11 +++++++---- aiohttp/_websocket/__init__.py | 1 + .../helpers.py} | 4 ++-- aiohttp/{_websocket.pyx => _websocket/mask.pyx} | 0 .../models.py} | 0 .../reader.py} | 15 +++++---------- .../writer.py} | 10 +++++----- aiohttp/http_websocket.py | 8 ++++---- setup.py | 2 +- 9 files changed, 25 insertions(+), 26 deletions(-) create mode 100644 aiohttp/_websocket/__init__.py rename aiohttp/{_websocket_helpers.py => _websocket/helpers.py} (98%) rename aiohttp/{_websocket.pyx => _websocket/mask.pyx} (100%) rename aiohttp/{_websocket_models.py => _websocket/models.py} (100%) rename aiohttp/{_websocket_reader.py => _websocket/reader.py} (98%) rename aiohttp/{_websocket_writer.py => _websocket/writer.py} (96%) diff --git a/Makefile b/Makefile index 78538bea09b..3a8803756ba 100644 --- a/Makefile +++ b/Makefile @@ -3,10 +3,10 @@ to-hash-one = $(dir $1).hash/$(addsuffix .hash,$(notdir $1)) to-hash = $(foreach fname,$1,$(call to-hash-one,$(fname))) -CYS := $(wildcard aiohttp/*.pyx) $(wildcard aiohttp/*.pyi) $(wildcard aiohttp/*.pxd) -PYXS := $(wildcard aiohttp/*.pyx) -CS := $(wildcard aiohttp/*.c) -PYS := $(wildcard aiohttp/*.py) +CYS := $(wildcard aiohttp/*.pyx) $(wildcard aiohttp/*.pyi) $(wildcard aiohttp/*.pxd) $(wildcard aiohttp/_websocket/*.pyx) $(wildcard aiohttp/_websocket/*.pyi) $(wildcard aiohttp/_websocket/*.pxd) +PYXS := $(wildcard aiohttp/*.pyx) $(wildcard aiohttp/_websocket/*.pyx) +CS := $(wildcard aiohttp/*.c) $(wildcard aiohttp/_websocket/*.c) +PYS := $(wildcard aiohttp/*.py) $(wildcard aiohttp/_websocket/*.py) IN := doc-spelling lint cython dev ALLS := $(sort $(CYS) $(CS) $(PYS) $(REQS)) @@ -60,6 +60,9 @@ aiohttp/_find_header.c: $(call to-hash,aiohttp/hdrs.py ./tools/gen.py) aiohttp/%.c: aiohttp/%.pyx $(call to-hash,$(CYS)) aiohttp/_find_header.c cython -3 -o $@ $< -I aiohttp -Werror +aiohttp/_websocket/%.c: aiohttp/_websocket/%.pyx $(call to-hash,$(CYS)) + cython -3 -o $@ $< -I aiohttp -Werror + vendor/llhttp/node_modules: vendor/llhttp/package.json cd vendor/llhttp; npm ci diff --git a/aiohttp/_websocket/__init__.py b/aiohttp/_websocket/__init__.py new file mode 100644 index 00000000000..836257cc47a --- /dev/null +++ b/aiohttp/_websocket/__init__.py @@ -0,0 +1 @@ +"""WebSocket protocol versions 13 and 8.""" diff --git a/aiohttp/_websocket_helpers.py b/aiohttp/_websocket/helpers.py similarity index 98% rename from aiohttp/_websocket_helpers.py rename to aiohttp/_websocket/helpers.py index 0ae98c2dbb3..41273dd3230 100644 --- a/aiohttp/_websocket_helpers.py +++ b/aiohttp/_websocket/helpers.py @@ -5,8 +5,8 @@ from struct import Struct from typing import TYPE_CHECKING, Final, List, Optional, Pattern, Tuple -from ._websocket_models import WSHandshakeError -from .helpers import NO_EXTENSIONS +from ..helpers import NO_EXTENSIONS +from .models import WSHandshakeError UNPACK_LEN2 = Struct("!H").unpack_from UNPACK_LEN3 = Struct("!Q").unpack_from diff --git a/aiohttp/_websocket.pyx b/aiohttp/_websocket/mask.pyx similarity index 100% rename from aiohttp/_websocket.pyx rename to aiohttp/_websocket/mask.pyx diff --git a/aiohttp/_websocket_models.py b/aiohttp/_websocket/models.py similarity index 100% rename from aiohttp/_websocket_models.py rename to aiohttp/_websocket/models.py diff --git a/aiohttp/_websocket_reader.py b/aiohttp/_websocket/reader.py similarity index 98% rename from aiohttp/_websocket_reader.py rename to aiohttp/_websocket/reader.py index e0171126cab..bd6d29e9a77 100644 --- a/aiohttp/_websocket_reader.py +++ b/aiohttp/_websocket/reader.py @@ -3,13 +3,11 @@ from enum import IntEnum from typing import Final, List, Optional, Set, Tuple -from ._websocket_helpers import ( - UNPACK_CLOSE_CODE, - UNPACK_LEN2, - UNPACK_LEN3, - websocket_mask, -) -from ._websocket_models import ( +from ..compression_utils import ZLibDecompressor +from ..helpers import set_exception +from ..streams import DataQueue +from .helpers import UNPACK_CLOSE_CODE, UNPACK_LEN2, UNPACK_LEN3, websocket_mask +from .models import ( WS_DEFLATE_TRAILING, WebSocketError, WSCloseCode, @@ -21,9 +19,6 @@ WSMessageText, WSMsgType, ) -from .compression_utils import ZLibDecompressor -from .helpers import set_exception -from .streams import DataQueue MESSAGE_TYPES_WITH_CONTENT: Final = frozenset( { diff --git a/aiohttp/_websocket_writer.py b/aiohttp/_websocket/writer.py similarity index 96% rename from aiohttp/_websocket_writer.py rename to aiohttp/_websocket/writer.py index eb356eda938..f3c03144e2e 100644 --- a/aiohttp/_websocket_writer.py +++ b/aiohttp/_websocket/writer.py @@ -6,7 +6,10 @@ from functools import partial from typing import Any, Final, Optional, Union -from ._websocket_helpers import ( +from ..base_protocol import BaseProtocol +from ..client_exceptions import ClientConnectionResetError +from ..compression_utils import ZLibCompressor +from .helpers import ( MASK_LEN, MSG_SIZE, PACK_CLOSE_CODE, @@ -16,10 +19,7 @@ PACK_RANDBITS, websocket_mask, ) -from ._websocket_models import WS_DEFLATE_TRAILING, WSMsgType -from .base_protocol import BaseProtocol -from .client_exceptions import ClientConnectionResetError -from .compression_utils import ZLibCompressor +from .models import WS_DEFLATE_TRAILING, WSMsgType DEFAULT_LIMIT: Final[int] = 2**16 diff --git a/aiohttp/http_websocket.py b/aiohttp/http_websocket.py index cafe4eeeca2..54c9bfa8979 100644 --- a/aiohttp/http_websocket.py +++ b/aiohttp/http_websocket.py @@ -1,7 +1,7 @@ """WebSocket protocol versions 13 and 8.""" -from ._websocket_helpers import WS_KEY, ws_ext_gen, ws_ext_parse -from ._websocket_models import ( +from ._websocket.helpers import WS_KEY, ws_ext_gen, ws_ext_parse +from ._websocket.models import ( WS_CLOSED_MESSAGE, WS_CLOSING_MESSAGE, WebSocketError, @@ -19,8 +19,8 @@ WSMessageText, WSMsgType, ) -from ._websocket_reader import WebSocketReader -from ._websocket_writer import WebSocketWriter +from ._websocket.reader import WebSocketReader +from ._websocket.writer import WebSocketWriter __all__ = ( "WS_CLOSED_MESSAGE", diff --git a/setup.py b/setup.py index cc66fe214ca..cbb1944762a 100644 --- a/setup.py +++ b/setup.py @@ -27,7 +27,7 @@ # NOTE: makefile cythonizes all Cython modules extensions = [ - Extension("aiohttp._websocket", ["aiohttp/_websocket.c"]), + Extension("aiohttp._websocket.mask", ["aiohttp/_websocket/mask.c"]), Extension( "aiohttp._http_parser", [ From 33149b071a21f7735c174969a04da5fb7e5f2f87 Mon Sep 17 00:00:00 2001 From: "J. Nick Koston" <nick@koston.org> Date: Sun, 27 Oct 2024 08:17:04 -1000 Subject: [PATCH 17/59] fix path --- tests/test_websocket_parser.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/tests/test_websocket_parser.py b/tests/test_websocket_parser.py index 16ba7507532..767c1843076 100644 --- a/tests/test_websocket_parser.py +++ b/tests/test_websocket_parser.py @@ -8,15 +8,15 @@ import pytest import aiohttp -from aiohttp import _websocket_helpers -from aiohttp._websocket_helpers import ( +from aiohttp._websocket import helpers as _websocket_helpers +from aiohttp._websocket.helpers import ( PACK_CLOSE_CODE, PACK_LEN1, PACK_LEN2, PACK_LEN3, websocket_mask, ) -from aiohttp._websocket_models import WS_DEFLATE_TRAILING +from aiohttp._websocket.models import WS_DEFLATE_TRAILING from aiohttp.http import WebSocketError, WSCloseCode, WSMessage, WSMsgType from aiohttp.http_websocket import ( WebSocketReader, From ddd73dbdc696134693ae31207bf68709e15367d6 Mon Sep 17 00:00:00 2001 From: "J. Nick Koston" <nick@koston.org> Date: Sun, 27 Oct 2024 08:20:54 -1000 Subject: [PATCH 18/59] fix patch target --- tests/test_websocket_writer.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/test_websocket_writer.py b/tests/test_websocket_writer.py index 96ef8a87dd2..c8bd7c119a6 100644 --- a/tests/test_websocket_writer.py +++ b/tests/test_websocket_writer.py @@ -150,7 +150,7 @@ async def test_concurrent_messages( and in the executor """ with mock.patch( - "aiohttp._websocket_writer.WEBSOCKET_MAX_SYNC_CHUNK_SIZE", max_sync_chunk_size + "aiohttp._websocket.writer.WEBSOCKET_MAX_SYNC_CHUNK_SIZE", max_sync_chunk_size ): writer = WebSocketWriter(protocol, transport, compress=15) queue: DataQueue[WSMessage] = DataQueue(asyncio.get_running_loop()) From e077a36ce68b22abc36377549e0ae68a4d4f54aa Mon Sep 17 00:00:00 2001 From: "J. Nick Koston" <nick@koston.org> Date: Sun, 27 Oct 2024 08:29:43 -1000 Subject: [PATCH 19/59] rework so NO_EXTENSIONS is considered --- Makefile | 11 +- aiohttp/_websocket/reader.py | 357 +----------------- .../reader_py.pxd} | 0 aiohttp/_websocket/reader_py.py | 350 +++++++++++++++++ setup.py | 2 +- 5 files changed, 371 insertions(+), 349 deletions(-) rename aiohttp/{_websocket_reader.pxd => _websocket/reader_py.pxd} (100%) create mode 100644 aiohttp/_websocket/reader_py.py diff --git a/Makefile b/Makefile index a9caae451fa..fb707449b25 100644 --- a/Makefile +++ b/Makefile @@ -4,7 +4,6 @@ to-hash-one = $(dir $1).hash/$(addsuffix .hash,$(notdir $1)) to-hash = $(foreach fname,$1,$(call to-hash-one,$(fname))) CYS := $(wildcard aiohttp/*.pyx) $(wildcard aiohttp/*.pyi) $(wildcard aiohttp/*.pxd) $(wildcard aiohttp/_websocket/*.pyx) $(wildcard aiohttp/_websocket/*.pyi) $(wildcard aiohttp/_websocket/*.pxd) -PY_WITH_PXD := aiohttp/_websocket/reader.py PYXS := $(wildcard aiohttp/*.pyx) $(wildcard aiohttp/_websocket/*.pyx) CS := $(wildcard aiohttp/*.c) $(wildcard aiohttp/_websocket/*.c) PYS := $(wildcard aiohttp/*.py) $(wildcard aiohttp/_websocket/*.py) @@ -57,8 +56,10 @@ endif aiohttp/_find_header.c: $(call to-hash,aiohttp/hdrs.py ./tools/gen.py) ./tools/gen.py -aiohttp/_websocket_reader.c: aiohttp/_websocket_reader.py - cython -3 -o $@ $< -I aiohttp -Werror +# Special case for reader since we want to be able to disable +# the extension with AIOHTTP_NO_EXTENSIONS +aiohttp/_websocket/reader_c.c: aiohttp/_websocket/reader_py.py + cython -3 -o aiohttp/_websocket/reader_c.c aiohttp/_websocket/reader_py.py -I aiohttp -Werror # _find_headers generator creates _headers.pyi as well aiohttp/%.c: aiohttp/%.pyx $(call to-hash,$(CYS)) aiohttp/_find_header.c @@ -78,7 +79,7 @@ vendor/llhttp/node_modules: vendor/llhttp/package.json generate-llhttp: .llhttp-gen .PHONY: cythonize -cythonize: .install-cython $(PYXS:.pyx=.c) $(PY_WITH_PXD:.py=.c) +cythonize: .install-cython $(PYXS:.pyx=.c) aiohttp/_websocket/reader_c.c .install-deps: .install-cython $(PYXS:.pyx=.c) $(call to-hash,$(CYS) $(REQS)) @python -m pip install -r requirements/dev.in -c requirements/dev.txt @@ -161,7 +162,7 @@ clean: @rm -f aiohttp/_http_parser.c @rm -f aiohttp/_http_writer.c @rm -f aiohttp/_websocket.c - @rm -f aiohttp/_websocket_reader.c + @rm -f aiohttp/_websocket/reader_c.c @rm -rf .tox @rm -f .develop @rm -f .flake diff --git a/aiohttp/_websocket/reader.py b/aiohttp/_websocket/reader.py index 211b1a316a8..503d863f49b 100644 --- a/aiohttp/_websocket/reader.py +++ b/aiohttp/_websocket/reader.py @@ -1,350 +1,21 @@ """Reader for WebSocket protocol versions 13 and 8.""" -from typing import Final, List, Optional, Set, Tuple +from typing import TYPE_CHECKING -from ..compression_utils import ZLibDecompressor -from ..helpers import set_exception -from ..streams import DataQueue -from .helpers import UNPACK_CLOSE_CODE, UNPACK_LEN2, UNPACK_LEN3, websocket_mask -from .models import ( - WS_DEFLATE_TRAILING, - WebSocketError, - WSCloseCode, - WSMessage, - WSMessageBinary, - WSMessageClose, - WSMessagePing, - WSMessagePong, - WSMessageText, - WSMsgType, -) +from ..helpers import NO_EXTENSIONS -ALLOWED_CLOSE_CODES: Final[Set[int]] = {int(i) for i in WSCloseCode} +if TYPE_CHECKING or NO_EXTENSIONS: # pragma: no cover + from .reader_py import WebSocketReader as WebSocketReaderPython -READ_HEADER = 1 -READ_PAYLOAD_LENGTH = 2 -READ_PAYLOAD_MASK = 3 -READ_PAYLOAD = 4 + WebsocketReader = WebSocketReaderPython +else: + try: + from ._reader_c import ( # type: ignore[import-not-found] + WebSocketReader as WebSocketReaderCython, + ) -# WSMsgType values unpacked so they can by -# cythonized to unsigned int -OP_CODE_CONTINUATION = WSMsgType.CONTINUATION.value -OP_CODE_TEXT = WSMsgType.TEXT.value -OP_CODE_BINARY = WSMsgType.BINARY.value -OP_CODE_CLOSE = WSMsgType.CLOSE.value -OP_CODE_PING = WSMsgType.PING.value -OP_CODE_PONG = WSMsgType.PONG.value + WebsocketReader = WebSocketReaderCython + except ImportError: # pragma: no cover + from .reader_py import WebSocketReader as WebSocketReaderPython - -class WebSocketReader: - def __init__( - self, queue: DataQueue[WSMessage], max_msg_size: int, compress: bool = True - ) -> None: - self.queue = queue - self._max_msg_size = max_msg_size - - self._exc: Optional[BaseException] = None - self._partial = bytearray() - self._state = READ_HEADER - - self._opcode: Optional[int] = None - self._frame_fin = False - self._frame_opcode: Optional[int] = None - self._frame_payload = bytearray() - - self._tail: bytes = b"" - self._has_mask = False - self._frame_mask: Optional[bytes] = None - self._payload_length = 0 - self._payload_length_flag = 0 - self._compressed: Optional[bool] = None - self._decompressobj: Optional[ZLibDecompressor] = None - self._compress = compress - - def feed_eof(self) -> None: - self.queue.feed_eof() - - def feed_data(self, data: bytes) -> Tuple[bool, bytes]: - if self._exc: - return True, data - - try: - self._feed_data(data) - except Exception as exc: - self._exc = exc - set_exception(self.queue, exc) - return True, b"" - - return False, b"" - - def _feed_data(self, data: bytes) -> None: - msg: WSMessage - for frame in self.parse_frame(data): - fin = frame[0] - opcode = frame[1] - payload = frame[2] - compressed = frame[3] - - is_continuation = opcode == OP_CODE_CONTINUATION - if opcode == OP_CODE_TEXT or opcode == OP_CODE_BINARY or is_continuation: - # load text/binary - if not fin: - # got partial frame payload - if not is_continuation: - self._opcode = opcode - self._partial += payload - if self._max_msg_size and len(self._partial) >= self._max_msg_size: - raise WebSocketError( - WSCloseCode.MESSAGE_TOO_BIG, - "Message size {} exceeds limit {}".format( - len(self._partial), self._max_msg_size - ), - ) - continue - - has_partial = bool(self._partial) - if is_continuation: - if self._opcode is None: - raise WebSocketError( - WSCloseCode.PROTOCOL_ERROR, - "Continuation frame for non started message", - ) - opcode = self._opcode - self._opcode = None - # previous frame was non finished - # we should get continuation opcode - elif has_partial: - raise WebSocketError( - WSCloseCode.PROTOCOL_ERROR, - "The opcode in non-fin frame is expected " - "to be zero, got {!r}".format(opcode), - ) - - if has_partial: - assembled_payload = self._partial + payload - self._partial.clear() - else: - assembled_payload = payload - - if self._max_msg_size and len(assembled_payload) >= self._max_msg_size: - raise WebSocketError( - WSCloseCode.MESSAGE_TOO_BIG, - "Message size {} exceeds limit {}".format( - len(assembled_payload), self._max_msg_size - ), - ) - - # Decompress process must to be done after all packets - # received. - if compressed: - if not self._decompressobj: - self._decompressobj = ZLibDecompressor( - suppress_deflate_header=True - ) - payload_merged = self._decompressobj.decompress_sync( - assembled_payload + WS_DEFLATE_TRAILING, self._max_msg_size - ) - if self._decompressobj.unconsumed_tail: - left = len(self._decompressobj.unconsumed_tail) - raise WebSocketError( - WSCloseCode.MESSAGE_TOO_BIG, - "Decompressed message size {} exceeds limit {}".format( - self._max_msg_size + left, self._max_msg_size - ), - ) - else: - payload_merged = bytes(assembled_payload) - - if opcode == OP_CODE_TEXT: - try: - text = payload_merged.decode("utf-8") - except UnicodeDecodeError as exc: - raise WebSocketError( - WSCloseCode.INVALID_TEXT, "Invalid UTF-8 text message" - ) from exc - - # XXX: The Text and Binary messages here can be a performance - # bottleneck, so we use tuple.__new__ to improve performance. - # This is not type safe, but many tests should fail in - # test_client_ws_functional.py if this is wrong. - msg = tuple.__new__(WSMessageText, (text, "", WSMsgType.TEXT)) - self.queue.feed_data(msg) - continue - - msg = tuple.__new__( - WSMessageBinary, (payload_merged, "", WSMsgType.BINARY) - ) - self.queue.feed_data(msg) - elif opcode == OP_CODE_CLOSE: - if len(payload) >= 2: - close_code = UNPACK_CLOSE_CODE(payload[:2])[0] - if close_code < 3000 and close_code not in ALLOWED_CLOSE_CODES: - raise WebSocketError( - WSCloseCode.PROTOCOL_ERROR, - f"Invalid close code: {close_code}", - ) - try: - close_message = payload[2:].decode("utf-8") - except UnicodeDecodeError as exc: - raise WebSocketError( - WSCloseCode.INVALID_TEXT, "Invalid UTF-8 text message" - ) from exc - msg = WSMessageClose(data=close_code, extra=close_message) - elif payload: - raise WebSocketError( - WSCloseCode.PROTOCOL_ERROR, - f"Invalid close frame: {fin} {opcode} {payload!r}", - ) - else: - msg = WSMessageClose(data=0, extra="") - - self.queue.feed_data(msg) - - elif opcode == OP_CODE_PING: - msg = WSMessagePing(data=payload, extra="") - self.queue.feed_data(msg) - - elif opcode == OP_CODE_PONG: - msg = WSMessagePong(data=payload, extra="") - self.queue.feed_data(msg) - - else: - raise WebSocketError( - WSCloseCode.PROTOCOL_ERROR, f"Unexpected opcode={opcode!r}" - ) - - def parse_frame( - self, buf: bytes - ) -> List[Tuple[bool, Optional[int], bytearray, Optional[bool]]]: - """Return the next frame from the socket.""" - frames: List[Tuple[bool, Optional[int], bytearray, Optional[bool]]] = [] - if self._tail: - buf, self._tail = self._tail + buf, b"" - - start_pos: int = 0 - buf_length = len(buf) - - while True: - # read header - if self._state == READ_HEADER: - if buf_length - start_pos < 2: - break - data = buf[start_pos : start_pos + 2] - start_pos += 2 - first_byte = data[0] - second_byte = data[1] - - fin = (first_byte >> 7) & 1 - rsv1 = (first_byte >> 6) & 1 - rsv2 = (first_byte >> 5) & 1 - rsv3 = (first_byte >> 4) & 1 - opcode = first_byte & 0xF - - # frame-fin = %x0 ; more frames of this message follow - # / %x1 ; final frame of this message - # frame-rsv1 = %x0 ; - # 1 bit, MUST be 0 unless negotiated otherwise - # frame-rsv2 = %x0 ; - # 1 bit, MUST be 0 unless negotiated otherwise - # frame-rsv3 = %x0 ; - # 1 bit, MUST be 0 unless negotiated otherwise - # - # Remove rsv1 from this test for deflate development - if rsv2 or rsv3 or (rsv1 and not self._compress): - raise WebSocketError( - WSCloseCode.PROTOCOL_ERROR, - "Received frame with non-zero reserved bits", - ) - - if opcode > 0x7 and fin == 0: - raise WebSocketError( - WSCloseCode.PROTOCOL_ERROR, - "Received fragmented control frame", - ) - - has_mask = (second_byte >> 7) & 1 - length = second_byte & 0x7F - - # Control frames MUST have a payload - # length of 125 bytes or less - if opcode > 0x7 and length > 125: - raise WebSocketError( - WSCloseCode.PROTOCOL_ERROR, - "Control frame payload cannot be larger than 125 bytes", - ) - - # Set compress status if last package is FIN - # OR set compress status if this is first fragment - # Raise error if not first fragment with rsv1 = 0x1 - if self._frame_fin or self._compressed is None: - self._compressed = True if rsv1 else False - elif rsv1: - raise WebSocketError( - WSCloseCode.PROTOCOL_ERROR, - "Received frame with non-zero reserved bits", - ) - - self._frame_fin = bool(fin) - self._frame_opcode = opcode - self._has_mask = bool(has_mask) - self._payload_length_flag = length - self._state = READ_PAYLOAD_LENGTH - - # read payload length - if self._state == READ_PAYLOAD_LENGTH: - length_flag = self._payload_length_flag - if length_flag == 126: - if buf_length - start_pos < 2: - break - data = buf[start_pos : start_pos + 2] - start_pos += 2 - self._payload_length = UNPACK_LEN2(data)[0] - elif length_flag > 126: - if buf_length - start_pos < 8: - break - data = buf[start_pos : start_pos + 8] - start_pos += 8 - self._payload_length = UNPACK_LEN3(data)[0] - else: - self._payload_length = length_flag - - self._state = READ_PAYLOAD_MASK if self._has_mask else READ_PAYLOAD - - # read payload mask - if self._state == READ_PAYLOAD_MASK: - if buf_length - start_pos < 4: - break - self._frame_mask = buf[start_pos : start_pos + 4] - start_pos += 4 - self._state = READ_PAYLOAD - - if self._state == READ_PAYLOAD: - length = self._payload_length - payload = self._frame_payload - - chunk_len = buf_length - start_pos - if length >= chunk_len: - self._payload_length = length - chunk_len - payload += buf[start_pos:] - start_pos = buf_length - else: - self._payload_length = 0 - payload += buf[start_pos : start_pos + length] - start_pos = start_pos + length - - if self._payload_length != 0: - break - - if self._has_mask: - assert self._frame_mask is not None - websocket_mask(self._frame_mask, payload) - - frames.append( - (self._frame_fin, self._frame_opcode, payload, self._compressed) - ) - self._frame_payload = bytearray() - self._state = READ_HEADER - - self._tail = buf[start_pos:] - - return frames + WebsocketReader = WebSocketReaderPython diff --git a/aiohttp/_websocket_reader.pxd b/aiohttp/_websocket/reader_py.pxd similarity index 100% rename from aiohttp/_websocket_reader.pxd rename to aiohttp/_websocket/reader_py.pxd diff --git a/aiohttp/_websocket/reader_py.py b/aiohttp/_websocket/reader_py.py new file mode 100644 index 00000000000..211b1a316a8 --- /dev/null +++ b/aiohttp/_websocket/reader_py.py @@ -0,0 +1,350 @@ +"""Reader for WebSocket protocol versions 13 and 8.""" + +from typing import Final, List, Optional, Set, Tuple + +from ..compression_utils import ZLibDecompressor +from ..helpers import set_exception +from ..streams import DataQueue +from .helpers import UNPACK_CLOSE_CODE, UNPACK_LEN2, UNPACK_LEN3, websocket_mask +from .models import ( + WS_DEFLATE_TRAILING, + WebSocketError, + WSCloseCode, + WSMessage, + WSMessageBinary, + WSMessageClose, + WSMessagePing, + WSMessagePong, + WSMessageText, + WSMsgType, +) + +ALLOWED_CLOSE_CODES: Final[Set[int]] = {int(i) for i in WSCloseCode} + +READ_HEADER = 1 +READ_PAYLOAD_LENGTH = 2 +READ_PAYLOAD_MASK = 3 +READ_PAYLOAD = 4 + +# WSMsgType values unpacked so they can by +# cythonized to unsigned int +OP_CODE_CONTINUATION = WSMsgType.CONTINUATION.value +OP_CODE_TEXT = WSMsgType.TEXT.value +OP_CODE_BINARY = WSMsgType.BINARY.value +OP_CODE_CLOSE = WSMsgType.CLOSE.value +OP_CODE_PING = WSMsgType.PING.value +OP_CODE_PONG = WSMsgType.PONG.value + + +class WebSocketReader: + def __init__( + self, queue: DataQueue[WSMessage], max_msg_size: int, compress: bool = True + ) -> None: + self.queue = queue + self._max_msg_size = max_msg_size + + self._exc: Optional[BaseException] = None + self._partial = bytearray() + self._state = READ_HEADER + + self._opcode: Optional[int] = None + self._frame_fin = False + self._frame_opcode: Optional[int] = None + self._frame_payload = bytearray() + + self._tail: bytes = b"" + self._has_mask = False + self._frame_mask: Optional[bytes] = None + self._payload_length = 0 + self._payload_length_flag = 0 + self._compressed: Optional[bool] = None + self._decompressobj: Optional[ZLibDecompressor] = None + self._compress = compress + + def feed_eof(self) -> None: + self.queue.feed_eof() + + def feed_data(self, data: bytes) -> Tuple[bool, bytes]: + if self._exc: + return True, data + + try: + self._feed_data(data) + except Exception as exc: + self._exc = exc + set_exception(self.queue, exc) + return True, b"" + + return False, b"" + + def _feed_data(self, data: bytes) -> None: + msg: WSMessage + for frame in self.parse_frame(data): + fin = frame[0] + opcode = frame[1] + payload = frame[2] + compressed = frame[3] + + is_continuation = opcode == OP_CODE_CONTINUATION + if opcode == OP_CODE_TEXT or opcode == OP_CODE_BINARY or is_continuation: + # load text/binary + if not fin: + # got partial frame payload + if not is_continuation: + self._opcode = opcode + self._partial += payload + if self._max_msg_size and len(self._partial) >= self._max_msg_size: + raise WebSocketError( + WSCloseCode.MESSAGE_TOO_BIG, + "Message size {} exceeds limit {}".format( + len(self._partial), self._max_msg_size + ), + ) + continue + + has_partial = bool(self._partial) + if is_continuation: + if self._opcode is None: + raise WebSocketError( + WSCloseCode.PROTOCOL_ERROR, + "Continuation frame for non started message", + ) + opcode = self._opcode + self._opcode = None + # previous frame was non finished + # we should get continuation opcode + elif has_partial: + raise WebSocketError( + WSCloseCode.PROTOCOL_ERROR, + "The opcode in non-fin frame is expected " + "to be zero, got {!r}".format(opcode), + ) + + if has_partial: + assembled_payload = self._partial + payload + self._partial.clear() + else: + assembled_payload = payload + + if self._max_msg_size and len(assembled_payload) >= self._max_msg_size: + raise WebSocketError( + WSCloseCode.MESSAGE_TOO_BIG, + "Message size {} exceeds limit {}".format( + len(assembled_payload), self._max_msg_size + ), + ) + + # Decompress process must to be done after all packets + # received. + if compressed: + if not self._decompressobj: + self._decompressobj = ZLibDecompressor( + suppress_deflate_header=True + ) + payload_merged = self._decompressobj.decompress_sync( + assembled_payload + WS_DEFLATE_TRAILING, self._max_msg_size + ) + if self._decompressobj.unconsumed_tail: + left = len(self._decompressobj.unconsumed_tail) + raise WebSocketError( + WSCloseCode.MESSAGE_TOO_BIG, + "Decompressed message size {} exceeds limit {}".format( + self._max_msg_size + left, self._max_msg_size + ), + ) + else: + payload_merged = bytes(assembled_payload) + + if opcode == OP_CODE_TEXT: + try: + text = payload_merged.decode("utf-8") + except UnicodeDecodeError as exc: + raise WebSocketError( + WSCloseCode.INVALID_TEXT, "Invalid UTF-8 text message" + ) from exc + + # XXX: The Text and Binary messages here can be a performance + # bottleneck, so we use tuple.__new__ to improve performance. + # This is not type safe, but many tests should fail in + # test_client_ws_functional.py if this is wrong. + msg = tuple.__new__(WSMessageText, (text, "", WSMsgType.TEXT)) + self.queue.feed_data(msg) + continue + + msg = tuple.__new__( + WSMessageBinary, (payload_merged, "", WSMsgType.BINARY) + ) + self.queue.feed_data(msg) + elif opcode == OP_CODE_CLOSE: + if len(payload) >= 2: + close_code = UNPACK_CLOSE_CODE(payload[:2])[0] + if close_code < 3000 and close_code not in ALLOWED_CLOSE_CODES: + raise WebSocketError( + WSCloseCode.PROTOCOL_ERROR, + f"Invalid close code: {close_code}", + ) + try: + close_message = payload[2:].decode("utf-8") + except UnicodeDecodeError as exc: + raise WebSocketError( + WSCloseCode.INVALID_TEXT, "Invalid UTF-8 text message" + ) from exc + msg = WSMessageClose(data=close_code, extra=close_message) + elif payload: + raise WebSocketError( + WSCloseCode.PROTOCOL_ERROR, + f"Invalid close frame: {fin} {opcode} {payload!r}", + ) + else: + msg = WSMessageClose(data=0, extra="") + + self.queue.feed_data(msg) + + elif opcode == OP_CODE_PING: + msg = WSMessagePing(data=payload, extra="") + self.queue.feed_data(msg) + + elif opcode == OP_CODE_PONG: + msg = WSMessagePong(data=payload, extra="") + self.queue.feed_data(msg) + + else: + raise WebSocketError( + WSCloseCode.PROTOCOL_ERROR, f"Unexpected opcode={opcode!r}" + ) + + def parse_frame( + self, buf: bytes + ) -> List[Tuple[bool, Optional[int], bytearray, Optional[bool]]]: + """Return the next frame from the socket.""" + frames: List[Tuple[bool, Optional[int], bytearray, Optional[bool]]] = [] + if self._tail: + buf, self._tail = self._tail + buf, b"" + + start_pos: int = 0 + buf_length = len(buf) + + while True: + # read header + if self._state == READ_HEADER: + if buf_length - start_pos < 2: + break + data = buf[start_pos : start_pos + 2] + start_pos += 2 + first_byte = data[0] + second_byte = data[1] + + fin = (first_byte >> 7) & 1 + rsv1 = (first_byte >> 6) & 1 + rsv2 = (first_byte >> 5) & 1 + rsv3 = (first_byte >> 4) & 1 + opcode = first_byte & 0xF + + # frame-fin = %x0 ; more frames of this message follow + # / %x1 ; final frame of this message + # frame-rsv1 = %x0 ; + # 1 bit, MUST be 0 unless negotiated otherwise + # frame-rsv2 = %x0 ; + # 1 bit, MUST be 0 unless negotiated otherwise + # frame-rsv3 = %x0 ; + # 1 bit, MUST be 0 unless negotiated otherwise + # + # Remove rsv1 from this test for deflate development + if rsv2 or rsv3 or (rsv1 and not self._compress): + raise WebSocketError( + WSCloseCode.PROTOCOL_ERROR, + "Received frame with non-zero reserved bits", + ) + + if opcode > 0x7 and fin == 0: + raise WebSocketError( + WSCloseCode.PROTOCOL_ERROR, + "Received fragmented control frame", + ) + + has_mask = (second_byte >> 7) & 1 + length = second_byte & 0x7F + + # Control frames MUST have a payload + # length of 125 bytes or less + if opcode > 0x7 and length > 125: + raise WebSocketError( + WSCloseCode.PROTOCOL_ERROR, + "Control frame payload cannot be larger than 125 bytes", + ) + + # Set compress status if last package is FIN + # OR set compress status if this is first fragment + # Raise error if not first fragment with rsv1 = 0x1 + if self._frame_fin or self._compressed is None: + self._compressed = True if rsv1 else False + elif rsv1: + raise WebSocketError( + WSCloseCode.PROTOCOL_ERROR, + "Received frame with non-zero reserved bits", + ) + + self._frame_fin = bool(fin) + self._frame_opcode = opcode + self._has_mask = bool(has_mask) + self._payload_length_flag = length + self._state = READ_PAYLOAD_LENGTH + + # read payload length + if self._state == READ_PAYLOAD_LENGTH: + length_flag = self._payload_length_flag + if length_flag == 126: + if buf_length - start_pos < 2: + break + data = buf[start_pos : start_pos + 2] + start_pos += 2 + self._payload_length = UNPACK_LEN2(data)[0] + elif length_flag > 126: + if buf_length - start_pos < 8: + break + data = buf[start_pos : start_pos + 8] + start_pos += 8 + self._payload_length = UNPACK_LEN3(data)[0] + else: + self._payload_length = length_flag + + self._state = READ_PAYLOAD_MASK if self._has_mask else READ_PAYLOAD + + # read payload mask + if self._state == READ_PAYLOAD_MASK: + if buf_length - start_pos < 4: + break + self._frame_mask = buf[start_pos : start_pos + 4] + start_pos += 4 + self._state = READ_PAYLOAD + + if self._state == READ_PAYLOAD: + length = self._payload_length + payload = self._frame_payload + + chunk_len = buf_length - start_pos + if length >= chunk_len: + self._payload_length = length - chunk_len + payload += buf[start_pos:] + start_pos = buf_length + else: + self._payload_length = 0 + payload += buf[start_pos : start_pos + length] + start_pos = start_pos + length + + if self._payload_length != 0: + break + + if self._has_mask: + assert self._frame_mask is not None + websocket_mask(self._frame_mask, payload) + + frames.append( + (self._frame_fin, self._frame_opcode, payload, self._compressed) + ) + self._frame_payload = bytearray() + self._state = READ_HEADER + + self._tail = buf[start_pos:] + + return frames diff --git a/setup.py b/setup.py index 50114659463..aa53eb2e6de 100644 --- a/setup.py +++ b/setup.py @@ -41,7 +41,7 @@ include_dirs=["vendor/llhttp/build"], ), Extension("aiohttp._http_writer", ["aiohttp/_http_writer.c"]), - Extension("aiohttp._websocket_reader", ["aiohttp/_websocket_reader.c"]), + Extension("aiohttp._websocket.reader_c", ["aiohttp/_websocket.reader_c.c"]), ] From b74b874255e45b44a93b8e5df1f5405a281936fc Mon Sep 17 00:00:00 2001 From: "J. Nick Koston" <nick@koston.org> Date: Sun, 27 Oct 2024 08:32:19 -1000 Subject: [PATCH 20/59] namespace cleanups --- aiohttp/_websocket/helpers.py | 10 +++++----- aiohttp/_websocket/mask.pyx | 2 +- 2 files changed, 6 insertions(+), 6 deletions(-) diff --git a/aiohttp/_websocket/helpers.py b/aiohttp/_websocket/helpers.py index 41273dd3230..022b2b7cb92 100644 --- a/aiohttp/_websocket/helpers.py +++ b/aiohttp/_websocket/helpers.py @@ -28,7 +28,7 @@ def _xor_table() -> List[bytes]: return [bytes(a ^ b for a in range(256)) for b in range(256)] -def _websocket_mask_python(mask: bytes, data: bytearray) -> None: +def _mask_python(mask: bytes, data: bytearray) -> None: """Websocket masking function. `mask` is a `bytes` object of length 4; `data` is a `bytearray` @@ -54,14 +54,14 @@ def _websocket_mask_python(mask: bytes, data: bytearray) -> None: if TYPE_CHECKING or NO_EXTENSIONS: # pragma: no cover - websocket_mask = _websocket_mask_python + websocket_mask = _mask_python else: try: - from ._websocket import _websocket_mask_cython # type: ignore[import-not-found] + from .mask import _mask_cython # type: ignore[import-not-found] - websocket_mask = _websocket_mask_cython + websocket_mask = _mask_cython except ImportError: # pragma: no cover - websocket_mask = _websocket_mask_python + websocket_mask = _mask_python _WS_EXT_RE: Final[Pattern[str]] = re.compile( diff --git a/aiohttp/_websocket/mask.pyx b/aiohttp/_websocket/mask.pyx index 94318d2b1be..355fc7a4dbf 100644 --- a/aiohttp/_websocket/mask.pyx +++ b/aiohttp/_websocket/mask.pyx @@ -8,7 +8,7 @@ cdef extern from "Python.h": from libc.stdint cimport uint32_t, uint64_t, uintmax_t -def _websocket_mask_cython(object mask, object data): +def _mask_cython(object mask, object data): """Note, this function mutates its `data` argument """ cdef: From 6e0fdcbfb986a0cce0090ffeb55333cdb277eb86 Mon Sep 17 00:00:00 2001 From: "J. Nick Koston" <nick@koston.org> Date: Sun, 27 Oct 2024 08:33:21 -1000 Subject: [PATCH 21/59] Revert "namespace cleanups" This reverts commit b74b874255e45b44a93b8e5df1f5405a281936fc. --- aiohttp/_websocket/helpers.py | 10 +++++----- aiohttp/_websocket/mask.pyx | 2 +- 2 files changed, 6 insertions(+), 6 deletions(-) diff --git a/aiohttp/_websocket/helpers.py b/aiohttp/_websocket/helpers.py index 022b2b7cb92..41273dd3230 100644 --- a/aiohttp/_websocket/helpers.py +++ b/aiohttp/_websocket/helpers.py @@ -28,7 +28,7 @@ def _xor_table() -> List[bytes]: return [bytes(a ^ b for a in range(256)) for b in range(256)] -def _mask_python(mask: bytes, data: bytearray) -> None: +def _websocket_mask_python(mask: bytes, data: bytearray) -> None: """Websocket masking function. `mask` is a `bytes` object of length 4; `data` is a `bytearray` @@ -54,14 +54,14 @@ def _mask_python(mask: bytes, data: bytearray) -> None: if TYPE_CHECKING or NO_EXTENSIONS: # pragma: no cover - websocket_mask = _mask_python + websocket_mask = _websocket_mask_python else: try: - from .mask import _mask_cython # type: ignore[import-not-found] + from ._websocket import _websocket_mask_cython # type: ignore[import-not-found] - websocket_mask = _mask_cython + websocket_mask = _websocket_mask_cython except ImportError: # pragma: no cover - websocket_mask = _mask_python + websocket_mask = _websocket_mask_python _WS_EXT_RE: Final[Pattern[str]] = re.compile( diff --git a/aiohttp/_websocket/mask.pyx b/aiohttp/_websocket/mask.pyx index 355fc7a4dbf..94318d2b1be 100644 --- a/aiohttp/_websocket/mask.pyx +++ b/aiohttp/_websocket/mask.pyx @@ -8,7 +8,7 @@ cdef extern from "Python.h": from libc.stdint cimport uint32_t, uint64_t, uintmax_t -def _mask_cython(object mask, object data): +def _websocket_mask_cython(object mask, object data): """Note, this function mutates its `data` argument """ cdef: From 439101c52df08ff0ab63f5506a856b72b44b5170 Mon Sep 17 00:00:00 2001 From: "J. Nick Koston" <nick@koston.org> Date: Sun, 27 Oct 2024 08:37:15 -1000 Subject: [PATCH 22/59] fix build path --- setup.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/setup.py b/setup.py index aa53eb2e6de..c9a2c5c856c 100644 --- a/setup.py +++ b/setup.py @@ -41,7 +41,7 @@ include_dirs=["vendor/llhttp/build"], ), Extension("aiohttp._http_writer", ["aiohttp/_http_writer.c"]), - Extension("aiohttp._websocket.reader_c", ["aiohttp/_websocket.reader_c.c"]), + Extension("aiohttp._websocket.reader_c", ["aiohttp/_websocket/reader_c.c"]), ] From 8cda99aff1987564e4b06f7c13763e2f627dd0f2 Mon Sep 17 00:00:00 2001 From: "J. Nick Koston" <nick@koston.org> Date: Sun, 27 Oct 2024 08:39:34 -1000 Subject: [PATCH 23/59] add missing pxd file for mask --- aiohttp/_websocket/mask.pxd | 3 +++ aiohttp/_websocket/reader_py.pxd | 2 ++ 2 files changed, 5 insertions(+) create mode 100644 aiohttp/_websocket/mask.pxd diff --git a/aiohttp/_websocket/mask.pxd b/aiohttp/_websocket/mask.pxd new file mode 100644 index 00000000000..4a6f51d9452 --- /dev/null +++ b/aiohttp/_websocket/mask.pxd @@ -0,0 +1,3 @@ +"""Cython declarations for websocket masking.""" + +cpdef _websocket_mask_cython(object mask, object data) diff --git a/aiohttp/_websocket/reader_py.pxd b/aiohttp/_websocket/reader_py.pxd index 82c1fca8dc8..cdce723a02d 100644 --- a/aiohttp/_websocket/reader_py.pxd +++ b/aiohttp/_websocket/reader_py.pxd @@ -1,5 +1,7 @@ import cython +from .mask cimport _websocket_mask_cython as websocket_mask + cdef unsigned int READ_HEADER cdef unsigned int READ_PAYLOAD_LENGTH From 3b690f5e17cfaaa956498c096a52e68082ee3f97 Mon Sep 17 00:00:00 2001 From: "J. Nick Koston" <nick@koston.org> Date: Sun, 27 Oct 2024 08:42:14 -1000 Subject: [PATCH 24/59] fix some double conversion --- aiohttp/_websocket/reader_py.pxd | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/aiohttp/_websocket/reader_py.pxd b/aiohttp/_websocket/reader_py.pxd index cdce723a02d..107ed20cbc8 100644 --- a/aiohttp/_websocket/reader_py.pxd +++ b/aiohttp/_websocket/reader_py.pxd @@ -74,6 +74,8 @@ cdef class WebSocketReader: data=bytes, payload=bytearray, first_byte=char, - second_byte=char + second_byte=char, + has_mask=bint, + fin=bint, ) cpdef parse_frame(self, bytes buf) From b6d256a5cf46589646dddb5edd415c4bd43d3f3a Mon Sep 17 00:00:00 2001 From: "J. Nick Koston" <nick@koston.org> Date: Sun, 27 Oct 2024 08:44:51 -1000 Subject: [PATCH 25/59] objects are immutable --- aiohttp/_websocket/reader_py.pxd | 3 +++ aiohttp/_websocket/reader_py.py | 7 +++++-- 2 files changed, 8 insertions(+), 2 deletions(-) diff --git a/aiohttp/_websocket/reader_py.pxd b/aiohttp/_websocket/reader_py.pxd index 107ed20cbc8..8e6ee46dd0e 100644 --- a/aiohttp/_websocket/reader_py.pxd +++ b/aiohttp/_websocket/reader_py.pxd @@ -30,6 +30,9 @@ cdef object WSMessageClose cdef set ALLOWED_CLOSE_CODES cdef set MESSAGE_TYPES_WITH_CONTENT +cdef object EMPTY_FRAME +cdef object EMPTY_FRAME_ERROR + cdef class WebSocketReader: cdef object queue diff --git a/aiohttp/_websocket/reader_py.py b/aiohttp/_websocket/reader_py.py index 211b1a316a8..84119de718e 100644 --- a/aiohttp/_websocket/reader_py.py +++ b/aiohttp/_websocket/reader_py.py @@ -35,6 +35,9 @@ OP_CODE_PING = WSMsgType.PING.value OP_CODE_PONG = WSMsgType.PONG.value +EMPTY_FRAME_ERROR = (True, b"") +EMPTY_FRAME = (False, b"") + class WebSocketReader: def __init__( @@ -73,9 +76,9 @@ def feed_data(self, data: bytes) -> Tuple[bool, bytes]: except Exception as exc: self._exc = exc set_exception(self.queue, exc) - return True, b"" + return EMPTY_FRAME_ERROR - return False, b"" + return EMPTY_FRAME def _feed_data(self, data: bytes) -> None: msg: WSMessage From d8d3c3a87162368da06a104671af3f96627f6b04 Mon Sep 17 00:00:00 2001 From: "J. Nick Koston" <nick@koston.org> Date: Sun, 27 Oct 2024 08:47:30 -1000 Subject: [PATCH 26/59] match defs since we use the cython code insidepython code --- aiohttp/_websocket/mask.pyx | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/aiohttp/_websocket/mask.pyx b/aiohttp/_websocket/mask.pyx index 94318d2b1be..7870392397d 100644 --- a/aiohttp/_websocket/mask.pyx +++ b/aiohttp/_websocket/mask.pyx @@ -8,7 +8,7 @@ cdef extern from "Python.h": from libc.stdint cimport uint32_t, uint64_t, uintmax_t -def _websocket_mask_cython(object mask, object data): +cpdef _websocket_mask_cython(object mask, object data): """Note, this function mutates its `data` argument """ cdef: From f05f3bfa1df03787d5ddbde29b3f52a1c74db9ae Mon Sep 17 00:00:00 2001 From: "J. Nick Koston" <nick@koston.org> Date: Sun, 27 Oct 2024 08:50:45 -1000 Subject: [PATCH 27/59] naming --- aiohttp/_websocket/reader.py | 6 +++--- tests/test_websocket_parser.py | 6 +++--- 2 files changed, 6 insertions(+), 6 deletions(-) diff --git a/aiohttp/_websocket/reader.py b/aiohttp/_websocket/reader.py index 503d863f49b..1f5f10c4219 100644 --- a/aiohttp/_websocket/reader.py +++ b/aiohttp/_websocket/reader.py @@ -7,15 +7,15 @@ if TYPE_CHECKING or NO_EXTENSIONS: # pragma: no cover from .reader_py import WebSocketReader as WebSocketReaderPython - WebsocketReader = WebSocketReaderPython + WebSocketReader = WebSocketReaderPython else: try: from ._reader_c import ( # type: ignore[import-not-found] WebSocketReader as WebSocketReaderCython, ) - WebsocketReader = WebSocketReaderCython + WebSocketReader = WebSocketReaderCython except ImportError: # pragma: no cover from .reader_py import WebSocketReader as WebSocketReaderPython - WebsocketReader = WebSocketReaderPython + WebSocketReader = WebSocketReaderPython diff --git a/tests/test_websocket_parser.py b/tests/test_websocket_parser.py index 2865e5eb334..594c6a94d94 100644 --- a/tests/test_websocket_parser.py +++ b/tests/test_websocket_parser.py @@ -28,7 +28,7 @@ ) -class PatchableWebsocketReader(WebSocketReader): +class PatchableWebSocketReader(WebSocketReader): """WebSocketReader subclass that allows for patching parse_frame.""" @@ -101,8 +101,8 @@ def out(loop: asyncio.AbstractEventLoop) -> aiohttp.DataQueue[WSMessage]: @pytest.fixture() -def parser(out: aiohttp.DataQueue[WSMessage]) -> PatchableWebsocketReader: - return PatchableWebsocketReader(out, 4 * 1024 * 1024) +def parser(out: aiohttp.DataQueue[WSMessage]) -> PatchableWebSocketReader: + return PatchableWebSocketReader(out, 4 * 1024 * 1024) def test_parse_frame(parser: WebSocketReader) -> None: From 9fb530d8e59df5637f260c9f40054fe70569cef7 Mon Sep 17 00:00:00 2001 From: "J. Nick Koston" <nick@koston.org> Date: Sun, 27 Oct 2024 09:11:13 -1000 Subject: [PATCH 28/59] must be named reader_c because PyInit is named --- Makefile | 4 ++-- aiohttp/_websocket/reader_c.py | 1 + 2 files changed, 3 insertions(+), 2 deletions(-) create mode 120000 aiohttp/_websocket/reader_c.py diff --git a/Makefile b/Makefile index fb707449b25..66f9f6046b1 100644 --- a/Makefile +++ b/Makefile @@ -58,8 +58,8 @@ aiohttp/_find_header.c: $(call to-hash,aiohttp/hdrs.py ./tools/gen.py) # Special case for reader since we want to be able to disable # the extension with AIOHTTP_NO_EXTENSIONS -aiohttp/_websocket/reader_c.c: aiohttp/_websocket/reader_py.py - cython -3 -o aiohttp/_websocket/reader_c.c aiohttp/_websocket/reader_py.py -I aiohttp -Werror +aiohttp/_websocket/reader_c.c: aiohttp/_websocket/reader_c.py + cython -3 -o aiohttp/_websocket/reader_c.c aiohttp/_websocket/reader_c.py -I aiohttp -Werror # _find_headers generator creates _headers.pyi as well aiohttp/%.c: aiohttp/%.pyx $(call to-hash,$(CYS)) aiohttp/_find_header.c diff --git a/aiohttp/_websocket/reader_c.py b/aiohttp/_websocket/reader_c.py new file mode 120000 index 00000000000..083cbb4331f --- /dev/null +++ b/aiohttp/_websocket/reader_c.py @@ -0,0 +1 @@ +reader_py.py \ No newline at end of file From 7fed6562f99b30dbc75814da688687fd12e589c4 Mon Sep 17 00:00:00 2001 From: "J. Nick Koston" <nick@koston.org> Date: Sun, 27 Oct 2024 09:12:00 -1000 Subject: [PATCH 29/59] reduce makefile --- Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Makefile b/Makefile index 66f9f6046b1..4876f999cde 100644 --- a/Makefile +++ b/Makefile @@ -59,7 +59,7 @@ aiohttp/_find_header.c: $(call to-hash,aiohttp/hdrs.py ./tools/gen.py) # Special case for reader since we want to be able to disable # the extension with AIOHTTP_NO_EXTENSIONS aiohttp/_websocket/reader_c.c: aiohttp/_websocket/reader_c.py - cython -3 -o aiohttp/_websocket/reader_c.c aiohttp/_websocket/reader_c.py -I aiohttp -Werror + cython -3 -o $@ $< -I aiohttp -Werror # _find_headers generator creates _headers.pyi as well aiohttp/%.c: aiohttp/%.pyx $(call to-hash,$(CYS)) aiohttp/_find_header.c From 34acf92ea676d09471a8b22ec0a29335f250478f Mon Sep 17 00:00:00 2001 From: "J. Nick Koston" <nick@koston.org> Date: Sun, 27 Oct 2024 09:18:37 -1000 Subject: [PATCH 30/59] fix namespace --- aiohttp/_websocket/reader.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/aiohttp/_websocket/reader.py b/aiohttp/_websocket/reader.py index 1f5f10c4219..254288ac7e7 100644 --- a/aiohttp/_websocket/reader.py +++ b/aiohttp/_websocket/reader.py @@ -10,7 +10,7 @@ WebSocketReader = WebSocketReaderPython else: try: - from ._reader_c import ( # type: ignore[import-not-found] + from .reader_c import ( # type: ignore[import-not-found] WebSocketReader as WebSocketReaderCython, ) From df1e21997496b4c7c4fdec4c22740fd8f1f4e0f2 Mon Sep 17 00:00:00 2001 From: "J. Nick Koston" <nick@koston.org> Date: Sun, 27 Oct 2024 09:21:06 -1000 Subject: [PATCH 31/59] namespace fix --- aiohttp/_websocket/{reader_py.pxd => reader_c.pxd} | 0 1 file changed, 0 insertions(+), 0 deletions(-) rename aiohttp/_websocket/{reader_py.pxd => reader_c.pxd} (100%) diff --git a/aiohttp/_websocket/reader_py.pxd b/aiohttp/_websocket/reader_c.pxd similarity index 100% rename from aiohttp/_websocket/reader_py.pxd rename to aiohttp/_websocket/reader_c.pxd From 097899ff98feb38a1f9e21b59a9811ca6f5997e8 Mon Sep 17 00:00:00 2001 From: "J. Nick Koston" <nick@koston.org> Date: Sun, 27 Oct 2024 09:25:47 -1000 Subject: [PATCH 32/59] fix type, its not base exception --- aiohttp/_websocket/reader_c.pxd | 2 +- aiohttp/_websocket/reader_py.py | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/aiohttp/_websocket/reader_c.pxd b/aiohttp/_websocket/reader_c.pxd index 8e6ee46dd0e..eee61176ff2 100644 --- a/aiohttp/_websocket/reader_c.pxd +++ b/aiohttp/_websocket/reader_c.pxd @@ -38,7 +38,7 @@ cdef class WebSocketReader: cdef object queue cdef unsigned int _max_msg_size - cdef object _exc + cdef Exception _exc cdef bytearray _partial cdef unsigned int _state diff --git a/aiohttp/_websocket/reader_py.py b/aiohttp/_websocket/reader_py.py index 84119de718e..8870f36b2a5 100644 --- a/aiohttp/_websocket/reader_py.py +++ b/aiohttp/_websocket/reader_py.py @@ -46,7 +46,7 @@ def __init__( self.queue = queue self._max_msg_size = max_msg_size - self._exc: Optional[BaseException] = None + self._exc: Optional[Exception] = None self._partial = bytearray() self._state = READ_HEADER @@ -68,7 +68,7 @@ def feed_eof(self) -> None: self.queue.feed_eof() def feed_data(self, data: bytes) -> Tuple[bool, bytes]: - if self._exc: + if self._exc is not None: return True, data try: From 2be16097e403088db57998f43fb622ce1e2e35b6 Mon Sep 17 00:00:00 2001 From: "J. Nick Koston" <nick@koston.org> Date: Sun, 27 Oct 2024 09:26:41 -1000 Subject: [PATCH 33/59] reduce opcode type to unsigned int --- aiohttp/_websocket/reader_c.pxd | 4 ++-- aiohttp/_websocket/reader_py.py | 6 +++--- 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/aiohttp/_websocket/reader_c.pxd b/aiohttp/_websocket/reader_c.pxd index eee61176ff2..781f3056657 100644 --- a/aiohttp/_websocket/reader_c.pxd +++ b/aiohttp/_websocket/reader_c.pxd @@ -42,9 +42,9 @@ cdef class WebSocketReader: cdef bytearray _partial cdef unsigned int _state - cdef object _opcode + cdef unsigned int _opcode cdef bint _frame_fin - cdef object _frame_opcode + cdef unsigned int _frame_opcode cdef bytearray _frame_payload cdef bytes _tail diff --git a/aiohttp/_websocket/reader_py.py b/aiohttp/_websocket/reader_py.py index 8870f36b2a5..4a78581ed18 100644 --- a/aiohttp/_websocket/reader_py.py +++ b/aiohttp/_websocket/reader_py.py @@ -50,9 +50,9 @@ def __init__( self._partial = bytearray() self._state = READ_HEADER - self._opcode: Optional[int] = None + self._opcode = -1 self._frame_fin = False - self._frame_opcode: Optional[int] = None + self._frame_opcode = -1 self._frame_payload = bytearray() self._tail: bytes = b"" @@ -113,7 +113,7 @@ def _feed_data(self, data: bytes) -> None: "Continuation frame for non started message", ) opcode = self._opcode - self._opcode = None + self._opcode = -1 # previous frame was non finished # we should get continuation opcode elif has_partial: From 3697714d5c748562698de8be9da062a5dbc8e59b Mon Sep 17 00:00:00 2001 From: "J. Nick Koston" <nick@koston.org> Date: Sun, 27 Oct 2024 09:28:01 -1000 Subject: [PATCH 34/59] reduce opcode type to unsigned int --- aiohttp/_websocket/reader_py.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/aiohttp/_websocket/reader_py.py b/aiohttp/_websocket/reader_py.py index 4a78581ed18..ac5919580d7 100644 --- a/aiohttp/_websocket/reader_py.py +++ b/aiohttp/_websocket/reader_py.py @@ -107,7 +107,7 @@ def _feed_data(self, data: bytes) -> None: has_partial = bool(self._partial) if is_continuation: - if self._opcode is None: + if self._opcode == -1: raise WebSocketError( WSCloseCode.PROTOCOL_ERROR, "Continuation frame for non started message", From 54dab0ad4275ad91d0c61e88f7e3f0ad0ae85356 Mon Sep 17 00:00:00 2001 From: "J. Nick Koston" <nick@koston.org> Date: Sun, 27 Oct 2024 09:33:55 -1000 Subject: [PATCH 35/59] lint --- aiohttp/_websocket/reader_c.pxd | 20 ++++++++++---------- aiohttp/_websocket/reader_py.py | 8 ++++---- 2 files changed, 14 insertions(+), 14 deletions(-) diff --git a/aiohttp/_websocket/reader_c.pxd b/aiohttp/_websocket/reader_c.pxd index 781f3056657..0906e051d22 100644 --- a/aiohttp/_websocket/reader_c.pxd +++ b/aiohttp/_websocket/reader_c.pxd @@ -8,12 +8,12 @@ cdef unsigned int READ_PAYLOAD_LENGTH cdef unsigned int READ_PAYLOAD_MASK cdef unsigned int READ_PAYLOAD -cdef unsigned int OP_CODE_CONTINUATION -cdef unsigned int OP_CODE_TEXT -cdef unsigned int OP_CODE_BINARY -cdef unsigned int OP_CODE_CLOSE -cdef unsigned int OP_CODE_PING -cdef unsigned int OP_CODE_PONG +cdef int OP_CODE_CONTINUATION +cdef int OP_CODE_TEXT +cdef int OP_CODE_BINARY +cdef int OP_CODE_CLOSE +cdef int OP_CODE_PING +cdef int OP_CODE_PONG cdef object UNPACK_LEN2 cdef object UNPACK_LEN3 @@ -42,9 +42,9 @@ cdef class WebSocketReader: cdef bytearray _partial cdef unsigned int _state - cdef unsigned int _opcode + cdef int _opcode cdef bint _frame_fin - cdef unsigned int _frame_opcode + cdef int _frame_opcode cdef bytearray _frame_payload cdef bytes _tail @@ -52,7 +52,7 @@ cdef class WebSocketReader: cdef bytes _frame_mask cdef unsigned int _payload_length cdef unsigned int _payload_length_flag - cdef object _compressed + cdef int _compressed cdef object _decompressobj cdef bint _compress @@ -63,7 +63,7 @@ cdef class WebSocketReader: fin=bint, has_partial=bint, payload_merged=bytes, - opcode="unsigned int", + opcode=int, ) cpdef _feed_data(self, bytes data) diff --git a/aiohttp/_websocket/reader_py.py b/aiohttp/_websocket/reader_py.py index ac5919580d7..c9da9b371bb 100644 --- a/aiohttp/_websocket/reader_py.py +++ b/aiohttp/_websocket/reader_py.py @@ -60,7 +60,7 @@ def __init__( self._frame_mask: Optional[bytes] = None self._payload_length = 0 self._payload_length_flag = 0 - self._compressed: Optional[bool] = None + self._compressed: int = -1 self._decompressobj: Optional[ZLibDecompressor] = None self._compress = compress @@ -218,9 +218,9 @@ def _feed_data(self, data: bytes) -> None: def parse_frame( self, buf: bytes - ) -> List[Tuple[bool, Optional[int], bytearray, Optional[bool]]]: + ) -> List[Tuple[bool, int, bytearray, Optional[bool]]]: """Return the next frame from the socket.""" - frames: List[Tuple[bool, Optional[int], bytearray, Optional[bool]]] = [] + frames: List[Tuple[bool, int, bytearray, Optional[bool]]] = [] if self._tail: buf, self._tail = self._tail + buf, b"" @@ -279,7 +279,7 @@ def parse_frame( # Set compress status if last package is FIN # OR set compress status if this is first fragment # Raise error if not first fragment with rsv1 = 0x1 - if self._frame_fin or self._compressed is None: + if self._frame_fin or self._compressed == -1: self._compressed = True if rsv1 else False elif rsv1: raise WebSocketError( From c03fde4a85b0d7f2429e00c47b8779d2479512be Mon Sep 17 00:00:00 2001 From: "J. Nick Koston" <nick@koston.org> Date: Sun, 27 Oct 2024 09:36:19 -1000 Subject: [PATCH 36/59] fix type --- aiohttp/_websocket/reader_py.py | 8 +++----- 1 file changed, 3 insertions(+), 5 deletions(-) diff --git a/aiohttp/_websocket/reader_py.py b/aiohttp/_websocket/reader_py.py index c9da9b371bb..9b8a1194c16 100644 --- a/aiohttp/_websocket/reader_py.py +++ b/aiohttp/_websocket/reader_py.py @@ -216,11 +216,9 @@ def _feed_data(self, data: bytes) -> None: WSCloseCode.PROTOCOL_ERROR, f"Unexpected opcode={opcode!r}" ) - def parse_frame( - self, buf: bytes - ) -> List[Tuple[bool, int, bytearray, Optional[bool]]]: + def parse_frame(self, buf: bytes) -> List[Tuple[bool, int, bytearray, int]]: """Return the next frame from the socket.""" - frames: List[Tuple[bool, int, bytearray, Optional[bool]]] = [] + frames: List[Tuple[bool, int, bytearray, int]] = [] if self._tail: buf, self._tail = self._tail + buf, b"" @@ -280,7 +278,7 @@ def parse_frame( # OR set compress status if this is first fragment # Raise error if not first fragment with rsv1 = 0x1 if self._frame_fin or self._compressed == -1: - self._compressed = True if rsv1 else False + self._compressed = 1 if rsv1 else 0 elif rsv1: raise WebSocketError( WSCloseCode.PROTOCOL_ERROR, From e3ecce833f03a397b747bc7af24f77c7ed7ad2a0 Mon Sep 17 00:00:00 2001 From: "J. Nick Koston" <nick@koston.org> Date: Sun, 27 Oct 2024 09:46:02 -1000 Subject: [PATCH 37/59] changelog --- CHANGES/9543.feature.rst | 1 + 1 file changed, 1 insertion(+) create mode 100644 CHANGES/9543.feature.rst diff --git a/CHANGES/9543.feature.rst b/CHANGES/9543.feature.rst new file mode 100644 index 00000000000..ee624ddc48d --- /dev/null +++ b/CHANGES/9543.feature.rst @@ -0,0 +1 @@ +Improved performance of reading WebSocket messages with a Cython implementation -- by :user:`bdraco`. From 55b6ad20365ad2dd024e6747e8c158cb333309e0 Mon Sep 17 00:00:00 2001 From: "J. Nick Koston" <nick@koston.org> Date: Sun, 27 Oct 2024 09:47:16 -1000 Subject: [PATCH 38/59] Update aiohttp/_websocket/reader_py.py --- aiohttp/_websocket/reader_py.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/aiohttp/_websocket/reader_py.py b/aiohttp/_websocket/reader_py.py index 9b8a1194c16..5ddfac84b33 100644 --- a/aiohttp/_websocket/reader_py.py +++ b/aiohttp/_websocket/reader_py.py @@ -26,8 +26,7 @@ READ_PAYLOAD_MASK = 3 READ_PAYLOAD = 4 -# WSMsgType values unpacked so they can by -# cythonized to unsigned int +# WSMsgType values unpacked so they can by cythonized to ints OP_CODE_CONTINUATION = WSMsgType.CONTINUATION.value OP_CODE_TEXT = WSMsgType.TEXT.value OP_CODE_BINARY = WSMsgType.BINARY.value From ffba3db10075b3160ab6433355a16f766350e9cd Mon Sep 17 00:00:00 2001 From: "J. Nick Koston" <nick@koston.org> Date: Sun, 27 Oct 2024 10:00:50 -1000 Subject: [PATCH 39/59] Revert "fix type" This reverts commit c03fde4a85b0d7f2429e00c47b8779d2479512be. --- aiohttp/_websocket/reader_py.py | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/aiohttp/_websocket/reader_py.py b/aiohttp/_websocket/reader_py.py index 9b8a1194c16..c9da9b371bb 100644 --- a/aiohttp/_websocket/reader_py.py +++ b/aiohttp/_websocket/reader_py.py @@ -216,9 +216,11 @@ def _feed_data(self, data: bytes) -> None: WSCloseCode.PROTOCOL_ERROR, f"Unexpected opcode={opcode!r}" ) - def parse_frame(self, buf: bytes) -> List[Tuple[bool, int, bytearray, int]]: + def parse_frame( + self, buf: bytes + ) -> List[Tuple[bool, int, bytearray, Optional[bool]]]: """Return the next frame from the socket.""" - frames: List[Tuple[bool, int, bytearray, int]] = [] + frames: List[Tuple[bool, int, bytearray, Optional[bool]]] = [] if self._tail: buf, self._tail = self._tail + buf, b"" @@ -278,7 +280,7 @@ def parse_frame(self, buf: bytes) -> List[Tuple[bool, int, bytearray, int]]: # OR set compress status if this is first fragment # Raise error if not first fragment with rsv1 = 0x1 if self._frame_fin or self._compressed == -1: - self._compressed = 1 if rsv1 else 0 + self._compressed = True if rsv1 else False elif rsv1: raise WebSocketError( WSCloseCode.PROTOCOL_ERROR, From 0f9bea9e08fd560de664bae40343acb1fb496310 Mon Sep 17 00:00:00 2001 From: "J. Nick Koston" <nick@koston.org> Date: Sun, 27 Oct 2024 10:00:51 -1000 Subject: [PATCH 40/59] Revert "lint" This reverts commit 54dab0ad4275ad91d0c61e88f7e3f0ad0ae85356. --- aiohttp/_websocket/reader_c.pxd | 20 ++++++++++---------- aiohttp/_websocket/reader_py.py | 8 ++++---- 2 files changed, 14 insertions(+), 14 deletions(-) diff --git a/aiohttp/_websocket/reader_c.pxd b/aiohttp/_websocket/reader_c.pxd index 0906e051d22..781f3056657 100644 --- a/aiohttp/_websocket/reader_c.pxd +++ b/aiohttp/_websocket/reader_c.pxd @@ -8,12 +8,12 @@ cdef unsigned int READ_PAYLOAD_LENGTH cdef unsigned int READ_PAYLOAD_MASK cdef unsigned int READ_PAYLOAD -cdef int OP_CODE_CONTINUATION -cdef int OP_CODE_TEXT -cdef int OP_CODE_BINARY -cdef int OP_CODE_CLOSE -cdef int OP_CODE_PING -cdef int OP_CODE_PONG +cdef unsigned int OP_CODE_CONTINUATION +cdef unsigned int OP_CODE_TEXT +cdef unsigned int OP_CODE_BINARY +cdef unsigned int OP_CODE_CLOSE +cdef unsigned int OP_CODE_PING +cdef unsigned int OP_CODE_PONG cdef object UNPACK_LEN2 cdef object UNPACK_LEN3 @@ -42,9 +42,9 @@ cdef class WebSocketReader: cdef bytearray _partial cdef unsigned int _state - cdef int _opcode + cdef unsigned int _opcode cdef bint _frame_fin - cdef int _frame_opcode + cdef unsigned int _frame_opcode cdef bytearray _frame_payload cdef bytes _tail @@ -52,7 +52,7 @@ cdef class WebSocketReader: cdef bytes _frame_mask cdef unsigned int _payload_length cdef unsigned int _payload_length_flag - cdef int _compressed + cdef object _compressed cdef object _decompressobj cdef bint _compress @@ -63,7 +63,7 @@ cdef class WebSocketReader: fin=bint, has_partial=bint, payload_merged=bytes, - opcode=int, + opcode="unsigned int", ) cpdef _feed_data(self, bytes data) diff --git a/aiohttp/_websocket/reader_py.py b/aiohttp/_websocket/reader_py.py index c9da9b371bb..ac5919580d7 100644 --- a/aiohttp/_websocket/reader_py.py +++ b/aiohttp/_websocket/reader_py.py @@ -60,7 +60,7 @@ def __init__( self._frame_mask: Optional[bytes] = None self._payload_length = 0 self._payload_length_flag = 0 - self._compressed: int = -1 + self._compressed: Optional[bool] = None self._decompressobj: Optional[ZLibDecompressor] = None self._compress = compress @@ -218,9 +218,9 @@ def _feed_data(self, data: bytes) -> None: def parse_frame( self, buf: bytes - ) -> List[Tuple[bool, int, bytearray, Optional[bool]]]: + ) -> List[Tuple[bool, Optional[int], bytearray, Optional[bool]]]: """Return the next frame from the socket.""" - frames: List[Tuple[bool, int, bytearray, Optional[bool]]] = [] + frames: List[Tuple[bool, Optional[int], bytearray, Optional[bool]]] = [] if self._tail: buf, self._tail = self._tail + buf, b"" @@ -279,7 +279,7 @@ def parse_frame( # Set compress status if last package is FIN # OR set compress status if this is first fragment # Raise error if not first fragment with rsv1 = 0x1 - if self._frame_fin or self._compressed == -1: + if self._frame_fin or self._compressed is None: self._compressed = True if rsv1 else False elif rsv1: raise WebSocketError( From 4ae06ce74b0cf790a069c0d9a17dd080a2f27f72 Mon Sep 17 00:00:00 2001 From: "J. Nick Koston" <nick@koston.org> Date: Sun, 27 Oct 2024 10:00:53 -1000 Subject: [PATCH 41/59] Revert "reduce opcode type to unsigned int" This reverts commit 3697714d5c748562698de8be9da062a5dbc8e59b. --- aiohttp/_websocket/reader_py.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/aiohttp/_websocket/reader_py.py b/aiohttp/_websocket/reader_py.py index ac5919580d7..4a78581ed18 100644 --- a/aiohttp/_websocket/reader_py.py +++ b/aiohttp/_websocket/reader_py.py @@ -107,7 +107,7 @@ def _feed_data(self, data: bytes) -> None: has_partial = bool(self._partial) if is_continuation: - if self._opcode == -1: + if self._opcode is None: raise WebSocketError( WSCloseCode.PROTOCOL_ERROR, "Continuation frame for non started message", From f1c83a04d737ab2f7808c6ca619bd681b3fe7837 Mon Sep 17 00:00:00 2001 From: "J. Nick Koston" <nick@koston.org> Date: Sun, 27 Oct 2024 10:00:54 -1000 Subject: [PATCH 42/59] Revert "reduce opcode type to unsigned int" This reverts commit 2be16097e403088db57998f43fb622ce1e2e35b6. --- aiohttp/_websocket/reader_c.pxd | 4 ++-- aiohttp/_websocket/reader_py.py | 6 +++--- 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/aiohttp/_websocket/reader_c.pxd b/aiohttp/_websocket/reader_c.pxd index 781f3056657..eee61176ff2 100644 --- a/aiohttp/_websocket/reader_c.pxd +++ b/aiohttp/_websocket/reader_c.pxd @@ -42,9 +42,9 @@ cdef class WebSocketReader: cdef bytearray _partial cdef unsigned int _state - cdef unsigned int _opcode + cdef object _opcode cdef bint _frame_fin - cdef unsigned int _frame_opcode + cdef object _frame_opcode cdef bytearray _frame_payload cdef bytes _tail diff --git a/aiohttp/_websocket/reader_py.py b/aiohttp/_websocket/reader_py.py index 4a78581ed18..8870f36b2a5 100644 --- a/aiohttp/_websocket/reader_py.py +++ b/aiohttp/_websocket/reader_py.py @@ -50,9 +50,9 @@ def __init__( self._partial = bytearray() self._state = READ_HEADER - self._opcode = -1 + self._opcode: Optional[int] = None self._frame_fin = False - self._frame_opcode = -1 + self._frame_opcode: Optional[int] = None self._frame_payload = bytearray() self._tail: bytes = b"" @@ -113,7 +113,7 @@ def _feed_data(self, data: bytes) -> None: "Continuation frame for non started message", ) opcode = self._opcode - self._opcode = -1 + self._opcode = None # previous frame was non finished # we should get continuation opcode elif has_partial: From 6432c934d5eee785cd31c8cf7917126d8d4bfda1 Mon Sep 17 00:00:00 2001 From: "J. Nick Koston" <nick@koston.org> Date: Sun, 27 Oct 2024 10:07:53 -1000 Subject: [PATCH 43/59] no need to convert it since its being packed into a tuple --- aiohttp/_websocket/reader_c.pxd | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/aiohttp/_websocket/reader_c.pxd b/aiohttp/_websocket/reader_c.pxd index eee61176ff2..07e02e0852b 100644 --- a/aiohttp/_websocket/reader_c.pxd +++ b/aiohttp/_websocket/reader_c.pxd @@ -43,7 +43,7 @@ cdef class WebSocketReader: cdef unsigned int _state cdef object _opcode - cdef bint _frame_fin + cdef object _frame_fin cdef object _frame_opcode cdef bytearray _frame_payload From d19429537e387e6ba6f5a04315b925e00fe957af Mon Sep 17 00:00:00 2001 From: "J. Nick Koston" <nick@koston.org> Date: Sun, 27 Oct 2024 10:12:00 -1000 Subject: [PATCH 44/59] cleanup type on masking --- aiohttp/_websocket/mask.pxd | 2 +- aiohttp/_websocket/mask.pyx | 10 +--------- 2 files changed, 2 insertions(+), 10 deletions(-) diff --git a/aiohttp/_websocket/mask.pxd b/aiohttp/_websocket/mask.pxd index 4a6f51d9452..3abbcc6750d 100644 --- a/aiohttp/_websocket/mask.pxd +++ b/aiohttp/_websocket/mask.pxd @@ -1,3 +1,3 @@ """Cython declarations for websocket masking.""" -cpdef _websocket_mask_cython(object mask, object data) +cpdef _websocket_mask_cython(bytes mask, bytearray data) diff --git a/aiohttp/_websocket/mask.pyx b/aiohttp/_websocket/mask.pyx index 7870392397d..4b8cec7716d 100644 --- a/aiohttp/_websocket/mask.pyx +++ b/aiohttp/_websocket/mask.pyx @@ -8,7 +8,7 @@ cdef extern from "Python.h": from libc.stdint cimport uint32_t, uint64_t, uintmax_t -cpdef _websocket_mask_cython(object mask, object data): +cpdef _websocket_mask_cython(bytes mask, bytearray data): """Note, this function mutates its `data` argument """ cdef: @@ -21,14 +21,6 @@ cpdef _websocket_mask_cython(object mask, object data): assert len(mask) == 4 - if not isinstance(mask, bytes): - mask = bytes(mask) - - if isinstance(data, bytearray): - data = <bytearray>data - else: - data = bytearray(data) - data_len = len(data) in_buf = <unsigned char*>PyByteArray_AsString(data) mask_buf = <const unsigned char*>PyBytes_AsString(mask) From f101ef1b337f14305de50a275e4fe236677a5ca2 Mon Sep 17 00:00:00 2001 From: "J. Nick Koston" <nick@koston.org> Date: Sun, 27 Oct 2024 10:12:46 -1000 Subject: [PATCH 45/59] cleanup signature on mask --- aiohttp/_websocket/mask.pxd | 2 +- aiohttp/_websocket/mask.pyx | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/aiohttp/_websocket/mask.pxd b/aiohttp/_websocket/mask.pxd index 3abbcc6750d..90983de9ac7 100644 --- a/aiohttp/_websocket/mask.pxd +++ b/aiohttp/_websocket/mask.pxd @@ -1,3 +1,3 @@ """Cython declarations for websocket masking.""" -cpdef _websocket_mask_cython(bytes mask, bytearray data) +cpdef void _websocket_mask_cython(bytes mask, bytearray data) diff --git a/aiohttp/_websocket/mask.pyx b/aiohttp/_websocket/mask.pyx index 4b8cec7716d..2d956c88996 100644 --- a/aiohttp/_websocket/mask.pyx +++ b/aiohttp/_websocket/mask.pyx @@ -8,7 +8,7 @@ cdef extern from "Python.h": from libc.stdint cimport uint32_t, uint64_t, uintmax_t -cpdef _websocket_mask_cython(bytes mask, bytearray data): +cpdef void _websocket_mask_cython(bytes mask, bytearray data): """Note, this function mutates its `data` argument """ cdef: From 35100b66b6dcc7c5e4e14916c3d46da308699bc1 Mon Sep 17 00:00:00 2001 From: "J. Nick Koston" <nick@koston.org> Date: Sun, 27 Oct 2024 10:13:18 -1000 Subject: [PATCH 46/59] cleanup signature on _feed_data --- aiohttp/_websocket/reader_c.pxd | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/aiohttp/_websocket/reader_c.pxd b/aiohttp/_websocket/reader_c.pxd index 07e02e0852b..f3c0bfa50ea 100644 --- a/aiohttp/_websocket/reader_c.pxd +++ b/aiohttp/_websocket/reader_c.pxd @@ -65,7 +65,7 @@ cdef class WebSocketReader: payload_merged=bytes, opcode="unsigned int", ) - cpdef _feed_data(self, bytes data) + cpdef void _feed_data(self, bytes data) @cython.locals( start_pos="unsigned int", From 3eb183edb09b92dd866fab259741849086565006 Mon Sep 17 00:00:00 2001 From: "J. Nick Koston" <nick@koston.org> Date: Sun, 27 Oct 2024 11:45:10 -1000 Subject: [PATCH 47/59] handle data not being bytes --- aiohttp/_websocket/reader_c.pxd | 2 +- aiohttp/_websocket/reader_py.py | 8 ++++++-- 2 files changed, 7 insertions(+), 3 deletions(-) diff --git a/aiohttp/_websocket/reader_c.pxd b/aiohttp/_websocket/reader_c.pxd index f3c0bfa50ea..c36a0052b18 100644 --- a/aiohttp/_websocket/reader_c.pxd +++ b/aiohttp/_websocket/reader_c.pxd @@ -56,7 +56,7 @@ cdef class WebSocketReader: cdef object _decompressobj cdef bint _compress - cpdef feed_data(self, bytes data) + cpdef feed_data(self, object data) @cython.locals( is_continuation=bint, diff --git a/aiohttp/_websocket/reader_py.py b/aiohttp/_websocket/reader_py.py index 59bc919ea17..ed3ec0eced5 100644 --- a/aiohttp/_websocket/reader_py.py +++ b/aiohttp/_websocket/reader_py.py @@ -1,6 +1,6 @@ """Reader for WebSocket protocol versions 13 and 8.""" -from typing import Final, List, Optional, Set, Tuple +from typing import Final, List, Optional, Set, Tuple, Union from ..compression_utils import ZLibDecompressor from ..helpers import set_exception @@ -66,7 +66,11 @@ def __init__( def feed_eof(self) -> None: self.queue.feed_eof() - def feed_data(self, data: bytes) -> Tuple[bool, bytes]: + # data can be bytearray on Windows because proactor event loop uses bytearray + def feed_data(self, data: Union[bytes, bytearray]) -> Tuple[bool, bytes]: + if type(data) is bytearray: + data = bytes(data) + if self._exc is not None: return True, data From 107fb266262ba868a0f393c6af657f0dfa096a98 Mon Sep 17 00:00:00 2001 From: "J. Nick Koston" <nick@koston.org> Date: Sun, 27 Oct 2024 11:47:01 -1000 Subject: [PATCH 48/59] return types --- aiohttp/_websocket/reader_c.pxd | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/aiohttp/_websocket/reader_c.pxd b/aiohttp/_websocket/reader_c.pxd index c36a0052b18..80c239af5e3 100644 --- a/aiohttp/_websocket/reader_c.pxd +++ b/aiohttp/_websocket/reader_c.pxd @@ -56,7 +56,7 @@ cdef class WebSocketReader: cdef object _decompressobj cdef bint _compress - cpdef feed_data(self, object data) + cpdef tuple feed_data(self, object data) @cython.locals( is_continuation=bint, @@ -81,4 +81,4 @@ cdef class WebSocketReader: has_mask=bint, fin=bint, ) - cpdef parse_frame(self, bytes buf) + cpdef list parse_frame(self, bytes buf) From c1ccb7bd481e04dd7551929437b094aa6a7f8a6b Mon Sep 17 00:00:00 2001 From: "J. Nick Koston" <nick@koston.org> Date: Sun, 27 Oct 2024 11:50:51 -1000 Subject: [PATCH 49/59] type tuple --- aiohttp/_websocket/reader_c.pxd | 1 + 1 file changed, 1 insertion(+) diff --git a/aiohttp/_websocket/reader_c.pxd b/aiohttp/_websocket/reader_c.pxd index 80c239af5e3..452fe66400f 100644 --- a/aiohttp/_websocket/reader_c.pxd +++ b/aiohttp/_websocket/reader_c.pxd @@ -64,6 +64,7 @@ cdef class WebSocketReader: has_partial=bint, payload_merged=bytes, opcode="unsigned int", + frame=tuple, ) cpdef void _feed_data(self, bytes data) From c30ac29d09f042b138bccb74f554b9604ee9cdf0 Mon Sep 17 00:00:00 2001 From: "J. Nick Koston" <nick@koston.org> Date: Sun, 27 Oct 2024 11:54:46 -1000 Subject: [PATCH 50/59] Revert "type tuple" This reverts commit c1ccb7bd481e04dd7551929437b094aa6a7f8a6b. --- aiohttp/_websocket/reader_c.pxd | 1 - 1 file changed, 1 deletion(-) diff --git a/aiohttp/_websocket/reader_c.pxd b/aiohttp/_websocket/reader_c.pxd index 452fe66400f..80c239af5e3 100644 --- a/aiohttp/_websocket/reader_c.pxd +++ b/aiohttp/_websocket/reader_c.pxd @@ -64,7 +64,6 @@ cdef class WebSocketReader: has_partial=bint, payload_merged=bytes, opcode="unsigned int", - frame=tuple, ) cpdef void _feed_data(self, bytes data) From 4e44c85dc3022f377a220d6b86dcae53a32ecb7d Mon Sep 17 00:00:00 2001 From: "J. Nick Koston" <nick@koston.org> Date: Sun, 27 Oct 2024 12:09:54 -1000 Subject: [PATCH 51/59] coverage --- tests/test_websocket_parser.py | 15 +++++++++++++++ 1 file changed, 15 insertions(+) diff --git a/tests/test_websocket_parser.py b/tests/test_websocket_parser.py index 594c6a94d94..feb88665263 100644 --- a/tests/test_websocket_parser.py +++ b/tests/test_websocket_parser.py @@ -175,6 +175,21 @@ def test_parse_frame_header_payload_size( parser.parse_frame(struct.pack("!BB", 0b10001000, 0b01111110)) +def test_ping_frame_bytearray( + out: aiohttp.DataQueue[WSMessage], parser: WebSocketReader +) -> None: + """Verify feed_data with bytearray. + + Protractor event loop will call feed_data with bytearray. + """ + with mock.patch.object(parser, "parse_frame", autospec=True) as m: + m.return_value = [(1, WSMsgType.PING, b"data", False)] + + parser.feed_data(bytearray(b"")) + res = out._buffer[0] + assert res == WSMessagePing(data=b"data", extra="") + + def test_ping_frame(out: aiohttp.DataQueue[WSMessage], parser: WebSocketReader) -> None: with mock.patch.object(parser, "parse_frame", autospec=True) as m: m.return_value = [(1, WSMsgType.PING, b"data", False)] From 1ea8caeaca51cd9a70d0242e48a5c9bd3b4f6f4c Mon Sep 17 00:00:00 2001 From: "J. Nick Koston" <nick@koston.org> Date: Sun, 27 Oct 2024 12:15:11 -1000 Subject: [PATCH 52/59] add the missing coverage --- tests/test_websocket_parser.py | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/tests/test_websocket_parser.py b/tests/test_websocket_parser.py index feb88665263..ac3a7b8d408 100644 --- a/tests/test_websocket_parser.py +++ b/tests/test_websocket_parser.py @@ -105,6 +105,17 @@ def parser(out: aiohttp.DataQueue[WSMessage]) -> PatchableWebSocketReader: return PatchableWebSocketReader(out, 4 * 1024 * 1024) +def test_feed_data_remembers_exception(parser: WebSocketReader) -> None: + """Verify that feed_data remembers an exception was already raised internally.""" + error, data = parser.feed_data(struct.pack("!BB", 0b01100000, 0b00000000)) + assert error is True + assert data == b"" + + error, data = parser.feed_data(b"") + assert error is True + assert data == b"" + + def test_parse_frame(parser: WebSocketReader) -> None: parser.parse_frame(struct.pack("!BB", 0b00000001, 0b00000001)) res = parser.parse_frame(b"1") From a8322b5495a5baad89b03e20bae8be70172f29e1 Mon Sep 17 00:00:00 2001 From: "J. Nick Koston" <nick@koston.org> Date: Sun, 27 Oct 2024 12:17:19 -1000 Subject: [PATCH 53/59] fix missing type --- aiohttp/_websocket/reader_c.pxd | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/aiohttp/_websocket/reader_c.pxd b/aiohttp/_websocket/reader_c.pxd index 80c239af5e3..02599379ce7 100644 --- a/aiohttp/_websocket/reader_c.pxd +++ b/aiohttp/_websocket/reader_c.pxd @@ -30,8 +30,8 @@ cdef object WSMessageClose cdef set ALLOWED_CLOSE_CODES cdef set MESSAGE_TYPES_WITH_CONTENT -cdef object EMPTY_FRAME -cdef object EMPTY_FRAME_ERROR +cdef tuple EMPTY_FRAME +cdef tuple EMPTY_FRAME_ERROR cdef class WebSocketReader: From 3d0dd116189dffe9928ddefd706c48b558dfacca Mon Sep 17 00:00:00 2001 From: "J. Nick Koston" <nick@koston.org> Date: Sun, 27 Oct 2024 12:19:35 -1000 Subject: [PATCH 54/59] use fastcall --- aiohttp/_websocket/reader_c.pxd | 1 + aiohttp/_websocket/reader_py.py | 18 +++++++++--------- 2 files changed, 10 insertions(+), 9 deletions(-) diff --git a/aiohttp/_websocket/reader_c.pxd b/aiohttp/_websocket/reader_c.pxd index 02599379ce7..a9ace1eeb72 100644 --- a/aiohttp/_websocket/reader_c.pxd +++ b/aiohttp/_websocket/reader_c.pxd @@ -36,6 +36,7 @@ cdef tuple EMPTY_FRAME_ERROR cdef class WebSocketReader: cdef object queue + cdef object _queue_feed_data cdef unsigned int _max_msg_size cdef Exception _exc diff --git a/aiohttp/_websocket/reader_py.py b/aiohttp/_websocket/reader_py.py index ed3ec0eced5..c352507bc5c 100644 --- a/aiohttp/_websocket/reader_py.py +++ b/aiohttp/_websocket/reader_py.py @@ -43,6 +43,7 @@ def __init__( self, queue: DataQueue[WSMessage], max_msg_size: int, compress: bool = True ) -> None: self.queue = queue + self._queue_feed_data = queue.feed_data self._max_msg_size = max_msg_size self._exc: Optional[Exception] = None @@ -174,13 +175,12 @@ def _feed_data(self, data: bytes) -> None: # This is not type safe, but many tests should fail in # test_client_ws_functional.py if this is wrong. msg = tuple.__new__(WSMessageText, (text, "", WSMsgType.TEXT)) - self.queue.feed_data(msg) - continue + else: + msg = tuple.__new__( + WSMessageBinary, (payload_merged, "", WSMsgType.BINARY) + ) - msg = tuple.__new__( - WSMessageBinary, (payload_merged, "", WSMsgType.BINARY) - ) - self.queue.feed_data(msg) + self._queue_feed_data(msg) elif opcode == OP_CODE_CLOSE: if len(payload) >= 2: close_code = UNPACK_CLOSE_CODE(payload[:2])[0] @@ -204,15 +204,15 @@ def _feed_data(self, data: bytes) -> None: else: msg = WSMessageClose(data=0, extra="") - self.queue.feed_data(msg) + self._queue_feed_data(msg) elif opcode == OP_CODE_PING: msg = WSMessagePing(data=payload, extra="") - self.queue.feed_data(msg) + self._queue_feed_data(msg) elif opcode == OP_CODE_PONG: msg = WSMessagePong(data=payload, extra="") - self.queue.feed_data(msg) + self._queue_feed_data(msg) else: raise WebSocketError( From 26d0e8cd45b11293bd7fbea39c6578b2641196f6 Mon Sep 17 00:00:00 2001 From: "J. Nick Koston" <nick@koston.org> Date: Sun, 27 Oct 2024 12:30:53 -1000 Subject: [PATCH 55/59] Make sure memoryview works as well --- aiohttp/_websocket/reader_py.py | 8 ++++++-- tests/test_websocket_parser.py | 29 +++++++++++++---------------- 2 files changed, 19 insertions(+), 18 deletions(-) diff --git a/aiohttp/_websocket/reader_py.py b/aiohttp/_websocket/reader_py.py index c352507bc5c..6cb0475e023 100644 --- a/aiohttp/_websocket/reader_py.py +++ b/aiohttp/_websocket/reader_py.py @@ -68,8 +68,12 @@ def feed_eof(self) -> None: self.queue.feed_eof() # data can be bytearray on Windows because proactor event loop uses bytearray - def feed_data(self, data: Union[bytes, bytearray]) -> Tuple[bool, bytes]: - if type(data) is bytearray: + # and asyncio types this to Union[bytes, bytearray, memoryview] so we need + # verify that data is bytes + def feed_data( + self, data: Union[bytes, bytearray, memoryview] + ) -> Tuple[bool, bytes]: + if type(data) is not bytes: data = bytes(data) if self._exc is not None: diff --git a/tests/test_websocket_parser.py b/tests/test_websocket_parser.py index ac3a7b8d408..6d490fd15e1 100644 --- a/tests/test_websocket_parser.py +++ b/tests/test_websocket_parser.py @@ -3,6 +3,7 @@ import random import struct import zlib +from typing import Union from unittest import mock import pytest @@ -186,26 +187,22 @@ def test_parse_frame_header_payload_size( parser.parse_frame(struct.pack("!BB", 0b10001000, 0b01111110)) -def test_ping_frame_bytearray( - out: aiohttp.DataQueue[WSMessage], parser: WebSocketReader +# Protractor event loop will call feed_data with bytearray. Since +# asyncio technically supports memoryview as well, we should test that. +@pytest.mark.parametrize( + argnames="data", + argvalues=[b"", bytearray(b""), memoryview(b"")], + ids=["bytes", "bytearray", "memoryview"], +) +def test_ping_frame( + out: aiohttp.DataQueue[WSMessage], + parser: WebSocketReader, + data: Union[bytes, bytearray, memoryview], ) -> None: - """Verify feed_data with bytearray. - - Protractor event loop will call feed_data with bytearray. - """ with mock.patch.object(parser, "parse_frame", autospec=True) as m: m.return_value = [(1, WSMsgType.PING, b"data", False)] - parser.feed_data(bytearray(b"")) - res = out._buffer[0] - assert res == WSMessagePing(data=b"data", extra="") - - -def test_ping_frame(out: aiohttp.DataQueue[WSMessage], parser: WebSocketReader) -> None: - with mock.patch.object(parser, "parse_frame", autospec=True) as m: - m.return_value = [(1, WSMsgType.PING, b"data", False)] - - parser.feed_data(b"") + parser.feed_data(data) res = out._buffer[0] assert res == WSMessagePing(data=b"data", extra="") From 5947443b1b0b015a1aa0a3a5380d7f4cb437743c Mon Sep 17 00:00:00 2001 From: "J. Nick Koston" <nick@koston.org> Date: Sun, 27 Oct 2024 12:32:41 -1000 Subject: [PATCH 56/59] Update aiohttp/_websocket/reader_py.py --- aiohttp/_websocket/reader_py.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/aiohttp/_websocket/reader_py.py b/aiohttp/_websocket/reader_py.py index 6cb0475e023..cd4247e160d 100644 --- a/aiohttp/_websocket/reader_py.py +++ b/aiohttp/_websocket/reader_py.py @@ -69,7 +69,7 @@ def feed_eof(self) -> None: # data can be bytearray on Windows because proactor event loop uses bytearray # and asyncio types this to Union[bytes, bytearray, memoryview] so we need - # verify that data is bytes + # coerce data to bytes if it is not def feed_data( self, data: Union[bytes, bytearray, memoryview] ) -> Tuple[bool, bytes]: From a15e2eb0816bcb3dffc976b409a7c08847db61cd Mon Sep 17 00:00:00 2001 From: "J. Nick Koston" <nick@koston.org> Date: Sun, 27 Oct 2024 12:44:58 -1000 Subject: [PATCH 57/59] cleanup refs --- aiohttp/_websocket/reader_c.pxd | 5 +++++ aiohttp/_websocket/reader_py.py | 13 +++++++++---- 2 files changed, 14 insertions(+), 4 deletions(-) diff --git a/aiohttp/_websocket/reader_c.pxd b/aiohttp/_websocket/reader_c.pxd index a9ace1eeb72..d4a82397f92 100644 --- a/aiohttp/_websocket/reader_c.pxd +++ b/aiohttp/_websocket/reader_c.pxd @@ -18,6 +18,7 @@ cdef unsigned int OP_CODE_PONG cdef object UNPACK_LEN2 cdef object UNPACK_LEN3 cdef object UNPACK_CLOSE_CODE +cdef object TUPLE_NEW cdef object WSMsgType @@ -27,12 +28,16 @@ cdef object WSMessagePing cdef object WSMessagePong cdef object WSMessageClose +cdef object WS_MSG_TYPE_TEXT +cdef object WS_MSG_TYPE_BINARY + cdef set ALLOWED_CLOSE_CODES cdef set MESSAGE_TYPES_WITH_CONTENT cdef tuple EMPTY_FRAME cdef tuple EMPTY_FRAME_ERROR + cdef class WebSocketReader: cdef object queue diff --git a/aiohttp/_websocket/reader_py.py b/aiohttp/_websocket/reader_py.py index cd4247e160d..07d1735163d 100644 --- a/aiohttp/_websocket/reader_py.py +++ b/aiohttp/_websocket/reader_py.py @@ -26,6 +26,9 @@ READ_PAYLOAD_MASK = 3 READ_PAYLOAD = 4 +WS_MSG_TYPE_BINARY = WSMsgType.BINARY +WS_MSG_TYPE_TEXT = WSMsgType.TEXT + # WSMsgType values unpacked so they can by cythonized to ints OP_CODE_CONTINUATION = WSMsgType.CONTINUATION.value OP_CODE_TEXT = WSMsgType.TEXT.value @@ -37,6 +40,8 @@ EMPTY_FRAME_ERROR = (True, b"") EMPTY_FRAME = (False, b"") +TUPLE_NEW = tuple.__new__ + class WebSocketReader: def __init__( @@ -178,11 +183,11 @@ def _feed_data(self, data: bytes) -> None: # bottleneck, so we use tuple.__new__ to improve performance. # This is not type safe, but many tests should fail in # test_client_ws_functional.py if this is wrong. - msg = tuple.__new__(WSMessageText, (text, "", WSMsgType.TEXT)) + tup = (text, "", WS_MSG_TYPE_TEXT) + msg = TUPLE_NEW(WSMessageText, tup) else: - msg = tuple.__new__( - WSMessageBinary, (payload_merged, "", WSMsgType.BINARY) - ) + tup = (payload_merged, "", WS_MSG_TYPE_BINARY) + msg = TUPLE_NEW(WSMessageBinary, tup) self._queue_feed_data(msg) elif opcode == OP_CODE_CLOSE: From cc025d39c0b37beae75b67d4ec9572c0dbc69343 Mon Sep 17 00:00:00 2001 From: "J. Nick Koston" <nick@koston.org> Date: Sun, 27 Oct 2024 12:47:23 -1000 Subject: [PATCH 58/59] cleanup refs --- aiohttp/_websocket/reader_py.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/aiohttp/_websocket/reader_py.py b/aiohttp/_websocket/reader_py.py index 07d1735163d..be7246eb988 100644 --- a/aiohttp/_websocket/reader_py.py +++ b/aiohttp/_websocket/reader_py.py @@ -183,11 +183,11 @@ def _feed_data(self, data: bytes) -> None: # bottleneck, so we use tuple.__new__ to improve performance. # This is not type safe, but many tests should fail in # test_client_ws_functional.py if this is wrong. - tup = (text, "", WS_MSG_TYPE_TEXT) - msg = TUPLE_NEW(WSMessageText, tup) + msg = TUPLE_NEW(WSMessageText, (text, "", WS_MSG_TYPE_TEXT)) else: - tup = (payload_merged, "", WS_MSG_TYPE_BINARY) - msg = TUPLE_NEW(WSMessageBinary, tup) + msg = TUPLE_NEW( + WSMessageBinary, (payload_merged, "", WS_MSG_TYPE_BINARY) + ) self._queue_feed_data(msg) elif opcode == OP_CODE_CLOSE: From 2165de1aa51818ac1320c94dfcc79de344da07e1 Mon Sep 17 00:00:00 2001 From: "J. Nick Koston" <nick@koston.org> Date: Sun, 27 Oct 2024 12:53:32 -1000 Subject: [PATCH 59/59] comments --- aiohttp/_websocket/reader_py.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/aiohttp/_websocket/reader_py.py b/aiohttp/_websocket/reader_py.py index be7246eb988..f7136365129 100644 --- a/aiohttp/_websocket/reader_py.py +++ b/aiohttp/_websocket/reader_py.py @@ -21,6 +21,8 @@ ALLOWED_CLOSE_CODES: Final[Set[int]] = {int(i) for i in WSCloseCode} +# States for the reader, used to parse the WebSocket frame +# integer values are used so they can be cythonized READ_HEADER = 1 READ_PAYLOAD_LENGTH = 2 READ_PAYLOAD_MASK = 3