From 319b8b6bef1a8666b9ef57dfea5030da0e1effc2 Mon Sep 17 00:00:00 2001 From: David Robertson <davidr@element.io> Date: Tue, 14 Sep 2021 11:25:05 +0100 Subject: [PATCH 01/74] Name the type of token in "Invalid token" messages (#10815) I had one of these error messages yesterday and assumed it was an invalid auth token (because that was an HTTP query parameter in the test) I was working on. In fact, it was an invalid next batch token for syncing. --- changelog.d/10815.misc | 1 + synapse/handlers/auth.py | 2 +- synapse/storage/relations.py | 4 ++-- synapse/types.py | 6 +++--- 4 files changed, 7 insertions(+), 6 deletions(-) create mode 100644 changelog.d/10815.misc diff --git a/changelog.d/10815.misc b/changelog.d/10815.misc new file mode 100644 index 000000000..fc2534dc1 --- /dev/null +++ b/changelog.d/10815.misc @@ -0,0 +1 @@ +Specify the type of token in generic "Invalid token" error messages. \ No newline at end of file diff --git a/synapse/handlers/auth.py b/synapse/handlers/auth.py index fbbf6fd83..3ea627008 100644 --- a/synapse/handlers/auth.py +++ b/synapse/handlers/auth.py @@ -1347,7 +1347,7 @@ async def validate_short_term_login_token( try: res = self.macaroon_gen.verify_short_term_login_token(login_token) except Exception: - raise AuthError(403, "Invalid token", errcode=Codes.FORBIDDEN) + raise AuthError(403, "Invalid login token", errcode=Codes.FORBIDDEN) await self.auth.check_auth_blocking(res.user_id) return res diff --git a/synapse/storage/relations.py b/synapse/storage/relations.py index c552dbf04..10a46b5e8 100644 --- a/synapse/storage/relations.py +++ b/synapse/storage/relations.py @@ -73,7 +73,7 @@ def from_string(string: str) -> "RelationPaginationToken": t, s = string.split("-") return RelationPaginationToken(int(t), int(s)) except ValueError: - raise SynapseError(400, "Invalid token") + raise SynapseError(400, "Invalid relation pagination token") def to_string(self) -> str: return "%d-%d" % (self.topological, self.stream) @@ -103,7 +103,7 @@ def from_string(string: str) -> "AggregationPaginationToken": c, s = string.split("-") return AggregationPaginationToken(int(c), int(s)) except ValueError: - raise SynapseError(400, "Invalid token") + raise SynapseError(400, "Invalid aggregation pagination token") def to_string(self) -> str: return "%d-%d" % (self.count, self.stream) diff --git a/synapse/types.py b/synapse/types.py index d4759b2df..90168ce8f 100644 --- a/synapse/types.py +++ b/synapse/types.py @@ -511,7 +511,7 @@ async def parse(cls, store: "DataStore", string: str) -> "RoomStreamToken": ) except Exception: pass - raise SynapseError(400, "Invalid token %r" % (string,)) + raise SynapseError(400, "Invalid room stream token %r" % (string,)) @classmethod def parse_stream_token(cls, string: str) -> "RoomStreamToken": @@ -520,7 +520,7 @@ def parse_stream_token(cls, string: str) -> "RoomStreamToken": return cls(topological=None, stream=int(string[1:])) except Exception: pass - raise SynapseError(400, "Invalid token %r" % (string,)) + raise SynapseError(400, "Invalid room stream token %r" % (string,)) def copy_and_advance(self, other: "RoomStreamToken") -> "RoomStreamToken": """Return a new token such that if an event is after both this token and @@ -619,7 +619,7 @@ async def from_string(cls, store: "DataStore", string: str) -> "StreamToken": await RoomStreamToken.parse(store, keys[0]), *(int(k) for k in keys[1:]) ) except Exception: - raise SynapseError(400, "Invalid Token") + raise SynapseError(400, "Invalid stream token") async def to_string(self, store: "DataStore") -> str: return self._SEPARATOR.join( From b996782df51eaa5dd30635a7c59c93994d3a735e Mon Sep 17 00:00:00 2001 From: Patrick Cloke <clokep@users.noreply.github.com> Date: Tue, 14 Sep 2021 07:09:38 -0400 Subject: [PATCH 02/74] Convert media repo's FileInfo to attrs. (#10785) This is mostly an internal change, but improves type hints in the media code. --- changelog.d/10785.misc | 1 + synapse/rest/media/v1/_base.py | 94 +++++++++++++-------- synapse/rest/media/v1/media_repository.py | 40 +++++---- synapse/rest/media/v1/media_storage.py | 36 ++++---- synapse/rest/media/v1/thumbnail_resource.py | 77 +++++++++-------- 5 files changed, 140 insertions(+), 108 deletions(-) create mode 100644 changelog.d/10785.misc diff --git a/changelog.d/10785.misc b/changelog.d/10785.misc new file mode 100644 index 000000000..3d7f91d51 --- /dev/null +++ b/changelog.d/10785.misc @@ -0,0 +1 @@ +Convert the internal `FileInfo` class to attrs and add type hints. diff --git a/synapse/rest/media/v1/_base.py b/synapse/rest/media/v1/_base.py index 90364ebcf..814f4309f 100644 --- a/synapse/rest/media/v1/_base.py +++ b/synapse/rest/media/v1/_base.py @@ -18,6 +18,8 @@ import urllib from typing import Awaitable, Dict, Generator, List, Optional, Tuple +import attr + from twisted.internet.interfaces import IConsumer from twisted.protocols.basic import FileSender from twisted.web.server import Request @@ -287,44 +289,62 @@ def __exit__(self, exc_type, exc_val, exc_tb): pass -class FileInfo: - """Details about a requested/uploaded file. - - Attributes: - server_name (str): The server name where the media originated from, - or None if local. - file_id (str): The local ID of the file. For local files this is the - same as the media_id - url_cache (bool): If the file is for the url preview cache - thumbnail (bool): Whether the file is a thumbnail or not. - thumbnail_width (int) - thumbnail_height (int) - thumbnail_method (str) - thumbnail_type (str): Content type of thumbnail, e.g. image/png - thumbnail_length (int): The size of the media file, in bytes. - """ +@attr.s(slots=True, frozen=True, auto_attribs=True) +class ThumbnailInfo: + """Details about a generated thumbnail.""" - def __init__( - self, - server_name, - file_id, - url_cache=False, - thumbnail=False, - thumbnail_width=None, - thumbnail_height=None, - thumbnail_method=None, - thumbnail_type=None, - thumbnail_length=None, - ): - self.server_name = server_name - self.file_id = file_id - self.url_cache = url_cache - self.thumbnail = thumbnail - self.thumbnail_width = thumbnail_width - self.thumbnail_height = thumbnail_height - self.thumbnail_method = thumbnail_method - self.thumbnail_type = thumbnail_type - self.thumbnail_length = thumbnail_length + width: int + height: int + method: str + # Content type of thumbnail, e.g. image/png + type: str + # The size of the media file, in bytes. + length: Optional[int] = None + + +@attr.s(slots=True, frozen=True, auto_attribs=True) +class FileInfo: + """Details about a requested/uploaded file.""" + + # The server name where the media originated from, or None if local. + server_name: Optional[str] + # The local ID of the file. For local files this is the same as the media_id + file_id: str + # If the file is for the url preview cache + url_cache: bool = False + # Whether the file is a thumbnail or not. + thumbnail: Optional[ThumbnailInfo] = None + + # The below properties exist to maintain compatibility with third-party modules. + @property + def thumbnail_width(self): + if not self.thumbnail: + return None + return self.thumbnail.width + + @property + def thumbnail_height(self): + if not self.thumbnail: + return None + return self.thumbnail.height + + @property + def thumbnail_method(self): + if not self.thumbnail: + return None + return self.thumbnail.method + + @property + def thumbnail_type(self): + if not self.thumbnail: + return None + return self.thumbnail.type + + @property + def thumbnail_length(self): + if not self.thumbnail: + return None + return self.thumbnail.length def get_filename_from_headers(headers: Dict[bytes, List[bytes]]) -> Optional[str]: diff --git a/synapse/rest/media/v1/media_repository.py b/synapse/rest/media/v1/media_repository.py index 0f5ce41ff..40ce8d2bc 100644 --- a/synapse/rest/media/v1/media_repository.py +++ b/synapse/rest/media/v1/media_repository.py @@ -42,6 +42,7 @@ from ._base import ( FileInfo, Responder, + ThumbnailInfo, get_filename_from_headers, respond_404, respond_with_responder, @@ -210,7 +211,7 @@ async def get_local_media( upload_name = name if name else media_info["upload_name"] url_cache = media_info["url_cache"] - file_info = FileInfo(None, media_id, url_cache=url_cache) + file_info = FileInfo(None, media_id, url_cache=bool(url_cache)) responder = await self.media_storage.fetch_media(file_info) await respond_with_responder( @@ -514,7 +515,7 @@ async def generate_local_exact_thumbnail( t_height: int, t_method: str, t_type: str, - url_cache: Optional[str], + url_cache: bool, ) -> Optional[str]: input_path = await self.media_storage.ensure_media_is_in_local_cache( FileInfo(None, media_id, url_cache=url_cache) @@ -548,11 +549,12 @@ async def generate_local_exact_thumbnail( server_name=None, file_id=media_id, url_cache=url_cache, - thumbnail=True, - thumbnail_width=t_width, - thumbnail_height=t_height, - thumbnail_method=t_method, - thumbnail_type=t_type, + thumbnail=ThumbnailInfo( + width=t_width, + height=t_height, + method=t_method, + type=t_type, + ), ) output_path = await self.media_storage.store_file( @@ -585,7 +587,7 @@ async def generate_remote_exact_thumbnail( t_type: str, ) -> Optional[str]: input_path = await self.media_storage.ensure_media_is_in_local_cache( - FileInfo(server_name, file_id, url_cache=False) + FileInfo(server_name, file_id) ) try: @@ -616,11 +618,12 @@ async def generate_remote_exact_thumbnail( file_info = FileInfo( server_name=server_name, file_id=file_id, - thumbnail=True, - thumbnail_width=t_width, - thumbnail_height=t_height, - thumbnail_method=t_method, - thumbnail_type=t_type, + thumbnail=ThumbnailInfo( + width=t_width, + height=t_height, + method=t_method, + type=t_type, + ), ) output_path = await self.media_storage.store_file( @@ -742,12 +745,13 @@ async def _generate_thumbnails( file_info = FileInfo( server_name=server_name, file_id=file_id, - thumbnail=True, - thumbnail_width=t_width, - thumbnail_height=t_height, - thumbnail_method=t_method, - thumbnail_type=t_type, url_cache=url_cache, + thumbnail=ThumbnailInfo( + width=t_width, + height=t_height, + method=t_method, + type=t_type, + ), ) with self.media_storage.store_into_file(file_info) as (f, fname, finish): diff --git a/synapse/rest/media/v1/media_storage.py b/synapse/rest/media/v1/media_storage.py index 56cdc1b4e..c0bb40c11 100644 --- a/synapse/rest/media/v1/media_storage.py +++ b/synapse/rest/media/v1/media_storage.py @@ -176,9 +176,9 @@ async def fetch_media(self, file_info: FileInfo) -> Optional[Responder]: self.filepaths.remote_media_thumbnail_rel_legacy( server_name=file_info.server_name, file_id=file_info.file_id, - width=file_info.thumbnail_width, - height=file_info.thumbnail_height, - content_type=file_info.thumbnail_type, + width=file_info.thumbnail.width, + height=file_info.thumbnail.height, + content_type=file_info.thumbnail.type, ) ) @@ -220,9 +220,9 @@ async def ensure_media_is_in_local_cache(self, file_info: FileInfo) -> str: legacy_path = self.filepaths.remote_media_thumbnail_rel_legacy( server_name=file_info.server_name, file_id=file_info.file_id, - width=file_info.thumbnail_width, - height=file_info.thumbnail_height, - content_type=file_info.thumbnail_type, + width=file_info.thumbnail.width, + height=file_info.thumbnail.height, + content_type=file_info.thumbnail.type, ) legacy_local_path = os.path.join(self.local_media_directory, legacy_path) if os.path.exists(legacy_local_path): @@ -255,10 +255,10 @@ def _file_info_to_path(self, file_info: FileInfo) -> str: if file_info.thumbnail: return self.filepaths.url_cache_thumbnail_rel( media_id=file_info.file_id, - width=file_info.thumbnail_width, - height=file_info.thumbnail_height, - content_type=file_info.thumbnail_type, - method=file_info.thumbnail_method, + width=file_info.thumbnail.width, + height=file_info.thumbnail.height, + content_type=file_info.thumbnail.type, + method=file_info.thumbnail.method, ) return self.filepaths.url_cache_filepath_rel(file_info.file_id) @@ -267,10 +267,10 @@ def _file_info_to_path(self, file_info: FileInfo) -> str: return self.filepaths.remote_media_thumbnail_rel( server_name=file_info.server_name, file_id=file_info.file_id, - width=file_info.thumbnail_width, - height=file_info.thumbnail_height, - content_type=file_info.thumbnail_type, - method=file_info.thumbnail_method, + width=file_info.thumbnail.width, + height=file_info.thumbnail.height, + content_type=file_info.thumbnail.type, + method=file_info.thumbnail.method, ) return self.filepaths.remote_media_filepath_rel( file_info.server_name, file_info.file_id @@ -279,10 +279,10 @@ def _file_info_to_path(self, file_info: FileInfo) -> str: if file_info.thumbnail: return self.filepaths.local_media_thumbnail_rel( media_id=file_info.file_id, - width=file_info.thumbnail_width, - height=file_info.thumbnail_height, - content_type=file_info.thumbnail_type, - method=file_info.thumbnail_method, + width=file_info.thumbnail.width, + height=file_info.thumbnail.height, + content_type=file_info.thumbnail.type, + method=file_info.thumbnail.method, ) return self.filepaths.local_media_filepath_rel(file_info.file_id) diff --git a/synapse/rest/media/v1/thumbnail_resource.py b/synapse/rest/media/v1/thumbnail_resource.py index 12bd745cb..22f43d853 100644 --- a/synapse/rest/media/v1/thumbnail_resource.py +++ b/synapse/rest/media/v1/thumbnail_resource.py @@ -26,6 +26,7 @@ from ._base import ( FileInfo, + ThumbnailInfo, parse_media_id, respond_404, respond_with_file, @@ -114,7 +115,7 @@ async def _respond_local_thumbnail( thumbnail_infos, media_id, media_id, - url_cache=media_info["url_cache"], + url_cache=bool(media_info["url_cache"]), server_name=None, ) @@ -149,11 +150,12 @@ async def _select_or_generate_local_thumbnail( server_name=None, file_id=media_id, url_cache=media_info["url_cache"], - thumbnail=True, - thumbnail_width=info["thumbnail_width"], - thumbnail_height=info["thumbnail_height"], - thumbnail_type=info["thumbnail_type"], - thumbnail_method=info["thumbnail_method"], + thumbnail=ThumbnailInfo( + width=info["thumbnail_width"], + height=info["thumbnail_height"], + type=info["thumbnail_type"], + method=info["thumbnail_method"], + ), ) t_type = file_info.thumbnail_type @@ -173,7 +175,7 @@ async def _select_or_generate_local_thumbnail( desired_height, desired_method, desired_type, - url_cache=media_info["url_cache"], + url_cache=bool(media_info["url_cache"]), ) if file_path: @@ -210,11 +212,12 @@ async def _select_or_generate_remote_thumbnail( file_info = FileInfo( server_name=server_name, file_id=media_info["filesystem_id"], - thumbnail=True, - thumbnail_width=info["thumbnail_width"], - thumbnail_height=info["thumbnail_height"], - thumbnail_type=info["thumbnail_type"], - thumbnail_method=info["thumbnail_method"], + thumbnail=ThumbnailInfo( + width=info["thumbnail_width"], + height=info["thumbnail_height"], + type=info["thumbnail_type"], + method=info["thumbnail_method"], + ), ) t_type = file_info.thumbnail_type @@ -271,7 +274,7 @@ async def _respond_remote_thumbnail( thumbnail_infos, media_id, media_info["filesystem_id"], - url_cache=None, + url_cache=False, server_name=server_name, ) @@ -285,7 +288,7 @@ async def _select_and_respond_with_thumbnail( thumbnail_infos: List[Dict[str, Any]], media_id: str, file_id: str, - url_cache: Optional[str] = None, + url_cache: bool, server_name: Optional[str] = None, ) -> None: """ @@ -299,7 +302,7 @@ async def _select_and_respond_with_thumbnail( desired_type: The desired content-type of the thumbnail. thumbnail_infos: A list of dictionaries of candidate thumbnails. file_id: The ID of the media that a thumbnail is being requested for. - url_cache: The URL cache value. + url_cache: True if this is from a URL cache. server_name: The server name, if this is a remote thumbnail. """ if thumbnail_infos: @@ -318,13 +321,16 @@ async def _select_and_respond_with_thumbnail( respond_404(request) return + # The thumbnail property must exist. + assert file_info.thumbnail is not None + responder = await self.media_storage.fetch_media(file_info) if responder: await respond_with_responder( request, responder, - file_info.thumbnail_type, - file_info.thumbnail_length, + file_info.thumbnail.type, + file_info.thumbnail.length, ) return @@ -351,18 +357,18 @@ async def _select_and_respond_with_thumbnail( server_name, file_id=file_id, media_id=media_id, - t_width=file_info.thumbnail_width, - t_height=file_info.thumbnail_height, - t_method=file_info.thumbnail_method, - t_type=file_info.thumbnail_type, + t_width=file_info.thumbnail.width, + t_height=file_info.thumbnail.height, + t_method=file_info.thumbnail.method, + t_type=file_info.thumbnail.type, ) else: await self.media_repo.generate_local_exact_thumbnail( media_id=media_id, - t_width=file_info.thumbnail_width, - t_height=file_info.thumbnail_height, - t_method=file_info.thumbnail_method, - t_type=file_info.thumbnail_type, + t_width=file_info.thumbnail.width, + t_height=file_info.thumbnail.height, + t_method=file_info.thumbnail.method, + t_type=file_info.thumbnail.type, url_cache=url_cache, ) @@ -370,8 +376,8 @@ async def _select_and_respond_with_thumbnail( await respond_with_responder( request, responder, - file_info.thumbnail_type, - file_info.thumbnail_length, + file_info.thumbnail.type, + file_info.thumbnail.length, ) else: logger.info("Failed to find any generated thumbnails") @@ -385,7 +391,7 @@ def _select_thumbnail( desired_type: str, thumbnail_infos: List[Dict[str, Any]], file_id: str, - url_cache: Optional[str], + url_cache: bool, server_name: Optional[str], ) -> Optional[FileInfo]: """ @@ -398,7 +404,7 @@ def _select_thumbnail( desired_type: The desired content-type of the thumbnail. thumbnail_infos: A list of dictionaries of candidate thumbnails. file_id: The ID of the media that a thumbnail is being requested for. - url_cache: The URL cache value. + url_cache: True if this is from a URL cache. server_name: The server name, if this is a remote thumbnail. Returns: @@ -495,12 +501,13 @@ def _select_thumbnail( file_id=file_id, url_cache=url_cache, server_name=server_name, - thumbnail=True, - thumbnail_width=thumbnail_info["thumbnail_width"], - thumbnail_height=thumbnail_info["thumbnail_height"], - thumbnail_type=thumbnail_info["thumbnail_type"], - thumbnail_method=thumbnail_info["thumbnail_method"], - thumbnail_length=thumbnail_info["thumbnail_length"], + thumbnail=ThumbnailInfo( + width=thumbnail_info["thumbnail_width"], + height=thumbnail_info["thumbnail_height"], + type=thumbnail_info["thumbnail_type"], + method=thumbnail_info["thumbnail_method"], + length=thumbnail_info["thumbnail_length"], + ), ) # No matching thumbnail was found. From 14b8c0476f93ea2ed3134e75733e45aa0ab6f5a5 Mon Sep 17 00:00:00 2001 From: Sean Quah <8349537+squahtx@users.noreply.github.com> Date: Tue, 14 Sep 2021 13:01:30 +0100 Subject: [PATCH 03/74] Prevent logging context going missing on federation request timeout (#10810) In `MatrixFederationHttpClient._send_request()`, we make a HTTP request using an `Agent`, wrap that request in a timeout and await the resulting `Deferred`. On its own, the `Agent` performing the HTTP request correctly stashes and restores the logging context while waiting. The addition of the timeout introduces a path where the logging context is not restored when execution resumes. To address this, we wrap the timeout `Deferred` in a `make_deferred_yieldable()` to stash the logging context and restore it on completion of the `await`. However this is not sufficient, since by the time we construct the timeout `Deferred`, the `Agent` has already stashed and cleared the logging context when using `make_deferred_yieldable()` to produce its `Deferred` for the request. Hence, we wrap the `Agent` request in a `run_in_background()` to "fork" and preserve the logging context so that we can stash and restore it when `await`ing the timeout `Deferred`. This approach is similar to the one used with `defer.gatherResults`. Note that the code is still not fully correct. When a timeout occurs, the request remains running in the background (existing behavior which is nothing to do with the new call to `run_in_background`) and may re-start the logging context after it has finished. --- changelog.d/10810.bugfix | 1 + synapse/http/matrixfederationclient.py | 17 +++++++++++++---- 2 files changed, 14 insertions(+), 4 deletions(-) create mode 100644 changelog.d/10810.bugfix diff --git a/changelog.d/10810.bugfix b/changelog.d/10810.bugfix new file mode 100644 index 000000000..43e91f1f5 --- /dev/null +++ b/changelog.d/10810.bugfix @@ -0,0 +1 @@ +Fix a case where logging contexts would go missing when federation requests time out. diff --git a/synapse/http/matrixfederationclient.py b/synapse/http/matrixfederationclient.py index 2e9898997..ef10ec093 100644 --- a/synapse/http/matrixfederationclient.py +++ b/synapse/http/matrixfederationclient.py @@ -66,7 +66,7 @@ ) from synapse.http.federation.matrix_federation_agent import MatrixFederationAgent from synapse.logging import opentracing -from synapse.logging.context import make_deferred_yieldable +from synapse.logging.context import make_deferred_yieldable, run_in_background from synapse.logging.opentracing import set_tag, start_active_span, tags from synapse.types import JsonDict from synapse.util import json_decoder @@ -553,20 +553,29 @@ async def _send_request( with Measure(self.clock, "outbound_request"): # we don't want all the fancy cookie and redirect handling # that treq.request gives: just use the raw Agent. - request_deferred = self.agent.request( + + # To preserve the logging context, the timeout is treated + # in a similar way to `defer.gatherResults`: + # * Each logging context-preserving fork is wrapped in + # `run_in_background`. In this case there is only one, + # since the timeout fork is not logging-context aware. + # * The `Deferred` that joins the forks back together is + # wrapped in `make_deferred_yieldable` to restore the + # logging context regardless of the path taken. + request_deferred = run_in_background( + self.agent.request, method_bytes, url_bytes, headers=Headers(headers_dict), bodyProducer=producer, ) - request_deferred = timeout_deferred( request_deferred, timeout=_sec_timeout, reactor=self.reactor, ) - response = await request_deferred + response = await make_deferred_yieldable(request_deferred) except DNSLookupError as e: raise RequestSendFailed(e, can_retry=retry_on_dns_fail) from e except Exception as e: From 8eb7cb2e0dd66d2eb350c1822fb448e09148cd7e Mon Sep 17 00:00:00 2001 From: reivilibre <38398653+reivilibre@users.noreply.github.com> Date: Tue, 14 Sep 2021 16:35:53 +0100 Subject: [PATCH 04/74] Make StateFilter frozen so we can hash it (#10816) Also enables Mypy for related tests. --- changelog.d/10816.misc | 1 + mypy.ini | 1 + synapse/storage/state.py | 45 +++++++++++++++++++++++++----------- tests/storage/test_state.py | 46 +++++++++++++++++++++++-------------- 4 files changed, 63 insertions(+), 30 deletions(-) create mode 100644 changelog.d/10816.misc diff --git a/changelog.d/10816.misc b/changelog.d/10816.misc new file mode 100644 index 000000000..2ca55b334 --- /dev/null +++ b/changelog.d/10816.misc @@ -0,0 +1 @@ +Make `StateFilter` frozen so it is hashable. diff --git a/mypy.ini b/mypy.ini index 09ffdda1b..60dadc478 100644 --- a/mypy.ini +++ b/mypy.ini @@ -86,6 +86,7 @@ files = tests/handlers/test_sync.py, tests/rest/client/test_login.py, tests/rest/client/test_auth.py, + tests/storage/test_state.py, tests/util/test_itertools.py, tests/util/test_stream_change_cache.py diff --git a/synapse/storage/state.py b/synapse/storage/state.py index e5400d681..c76529cb5 100644 --- a/synapse/storage/state.py +++ b/synapse/storage/state.py @@ -25,12 +25,15 @@ ) import attr +from frozendict import frozendict from synapse.api.constants import EventTypes from synapse.events import EventBase from synapse.types import MutableStateMap, StateMap if TYPE_CHECKING: + from typing import FrozenSet # noqa: used within quoted type hint; flake8 sad + from synapse.server import HomeServer from synapse.storage.databases import Databases @@ -40,7 +43,7 @@ T = TypeVar("T") -@attr.s(slots=True) +@attr.s(slots=True, frozen=True) class StateFilter: """A filter used when querying for state. @@ -53,14 +56,19 @@ class StateFilter: appear in `types`. """ - types = attr.ib(type=Dict[str, Optional[Set[str]]]) + types = attr.ib(type="frozendict[str, Optional[FrozenSet[str]]]") include_others = attr.ib(default=False, type=bool) def __attrs_post_init__(self): # If `include_others` is set we canonicalise the filter by removing # wildcards from the types dictionary if self.include_others: - self.types = {k: v for k, v in self.types.items() if v is not None} + # this is needed to work around the fact that StateFilter is frozen + object.__setattr__( + self, + "types", + frozendict({k: v for k, v in self.types.items() if v is not None}), + ) @staticmethod def all() -> "StateFilter": @@ -69,7 +77,7 @@ def all() -> "StateFilter": Returns: The new state filter. """ - return StateFilter(types={}, include_others=True) + return StateFilter(types=frozendict(), include_others=True) @staticmethod def none() -> "StateFilter": @@ -78,7 +86,7 @@ def none() -> "StateFilter": Returns: The new state filter. """ - return StateFilter(types={}, include_others=False) + return StateFilter(types=frozendict(), include_others=False) @staticmethod def from_types(types: Iterable[Tuple[str, Optional[str]]]) -> "StateFilter": @@ -103,7 +111,12 @@ def from_types(types: Iterable[Tuple[str, Optional[str]]]) -> "StateFilter": type_dict.setdefault(typ, set()).add(s) # type: ignore - return StateFilter(types=type_dict) + return StateFilter( + types=frozendict( + (k, frozenset(v) if v is not None else None) + for k, v in type_dict.items() + ) + ) @staticmethod def from_lazy_load_member_list(members: Iterable[str]) -> "StateFilter": @@ -116,7 +129,10 @@ def from_lazy_load_member_list(members: Iterable[str]) -> "StateFilter": Returns: The new state filter """ - return StateFilter(types={EventTypes.Member: set(members)}, include_others=True) + return StateFilter( + types=frozendict({EventTypes.Member: frozenset(members)}), + include_others=True, + ) def return_expanded(self) -> "StateFilter": """Creates a new StateFilter where type wild cards have been removed @@ -173,7 +189,7 @@ def return_expanded(self) -> "StateFilter": # We want to return all non-members, but only particular # memberships return StateFilter( - types={EventTypes.Member: self.types[EventTypes.Member]}, + types=frozendict({EventTypes.Member: self.types[EventTypes.Member]}), include_others=True, ) @@ -245,14 +261,15 @@ def max_entries_returned(self) -> Optional[int]: return len(self.concrete_types()) - def filter_state(self, state_dict: StateMap[T]) -> StateMap[T]: - """Returns the state filtered with by this StateFilter + def filter_state(self, state_dict: StateMap[T]) -> MutableStateMap[T]: + """Returns the state filtered with by this StateFilter. Args: state: The state map to filter Returns: - The filtered state map + The filtered state map. + This is a copy, so it's safe to mutate. """ if self.is_full(): return dict(state_dict) @@ -324,14 +341,16 @@ def get_member_split(self) -> Tuple["StateFilter", "StateFilter"]: if state_keys is None: member_filter = StateFilter.all() else: - member_filter = StateFilter({EventTypes.Member: state_keys}) + member_filter = StateFilter(frozendict({EventTypes.Member: state_keys})) elif self.include_others: member_filter = StateFilter.all() else: member_filter = StateFilter.none() non_member_filter = StateFilter( - types={k: v for k, v in self.types.items() if k != EventTypes.Member}, + types=frozendict( + {k: v for k, v in self.types.items() if k != EventTypes.Member} + ), include_others=self.include_others, ) diff --git a/tests/storage/test_state.py b/tests/storage/test_state.py index 869526459..32060f2ab 100644 --- a/tests/storage/test_state.py +++ b/tests/storage/test_state.py @@ -14,6 +14,8 @@ import logging +from frozendict import frozendict + from synapse.api.constants import EventTypes, Membership from synapse.api.room_versions import RoomVersions from synapse.storage.state import StateFilter @@ -183,7 +185,9 @@ def test_get_state_for_event(self): self.storage.state.get_state_for_event( e5.event_id, state_filter=StateFilter( - types={EventTypes.Member: {self.u_alice.to_string()}}, + types=frozendict( + {EventTypes.Member: frozenset({self.u_alice.to_string()})} + ), include_others=True, ), ) @@ -203,7 +207,8 @@ def test_get_state_for_event(self): self.storage.state.get_state_for_event( e5.event_id, state_filter=StateFilter( - types={EventTypes.Member: set()}, include_others=True + types=frozendict({EventTypes.Member: frozenset()}), + include_others=True, ), ) ) @@ -228,7 +233,7 @@ def test_get_state_for_event(self): self.state_datastore._state_group_cache, group, state_filter=StateFilter( - types={EventTypes.Member: set()}, include_others=True + types=frozendict({EventTypes.Member: frozenset()}), include_others=True ), ) @@ -245,7 +250,7 @@ def test_get_state_for_event(self): self.state_datastore._state_group_members_cache, group, state_filter=StateFilter( - types={EventTypes.Member: set()}, include_others=True + types=frozendict({EventTypes.Member: frozenset()}), include_others=True ), ) @@ -258,7 +263,7 @@ def test_get_state_for_event(self): self.state_datastore._state_group_cache, group, state_filter=StateFilter( - types={EventTypes.Member: None}, include_others=True + types=frozendict({EventTypes.Member: None}), include_others=True ), ) @@ -275,7 +280,7 @@ def test_get_state_for_event(self): self.state_datastore._state_group_members_cache, group, state_filter=StateFilter( - types={EventTypes.Member: None}, include_others=True + types=frozendict({EventTypes.Member: None}), include_others=True ), ) @@ -295,7 +300,8 @@ def test_get_state_for_event(self): self.state_datastore._state_group_cache, group, state_filter=StateFilter( - types={EventTypes.Member: {e5.state_key}}, include_others=True + types=frozendict({EventTypes.Member: frozenset({e5.state_key})}), + include_others=True, ), ) @@ -312,7 +318,8 @@ def test_get_state_for_event(self): self.state_datastore._state_group_members_cache, group, state_filter=StateFilter( - types={EventTypes.Member: {e5.state_key}}, include_others=True + types=frozendict({EventTypes.Member: frozenset({e5.state_key})}), + include_others=True, ), ) @@ -325,7 +332,8 @@ def test_get_state_for_event(self): self.state_datastore._state_group_members_cache, group, state_filter=StateFilter( - types={EventTypes.Member: {e5.state_key}}, include_others=False + types=frozendict({EventTypes.Member: frozenset({e5.state_key})}), + include_others=False, ), ) @@ -375,7 +383,7 @@ def test_get_state_for_event(self): self.state_datastore._state_group_cache, group, state_filter=StateFilter( - types={EventTypes.Member: set()}, include_others=True + types=frozendict({EventTypes.Member: frozenset()}), include_others=True ), ) @@ -387,7 +395,7 @@ def test_get_state_for_event(self): self.state_datastore._state_group_members_cache, group, state_filter=StateFilter( - types={EventTypes.Member: set()}, include_others=True + types=frozendict({EventTypes.Member: frozenset()}), include_others=True ), ) @@ -400,7 +408,7 @@ def test_get_state_for_event(self): self.state_datastore._state_group_cache, group, state_filter=StateFilter( - types={EventTypes.Member: None}, include_others=True + types=frozendict({EventTypes.Member: None}), include_others=True ), ) @@ -411,7 +419,7 @@ def test_get_state_for_event(self): self.state_datastore._state_group_members_cache, group, state_filter=StateFilter( - types={EventTypes.Member: None}, include_others=True + types=frozendict({EventTypes.Member: None}), include_others=True ), ) @@ -430,7 +438,8 @@ def test_get_state_for_event(self): self.state_datastore._state_group_cache, group, state_filter=StateFilter( - types={EventTypes.Member: {e5.state_key}}, include_others=True + types=frozendict({EventTypes.Member: frozenset({e5.state_key})}), + include_others=True, ), ) @@ -441,7 +450,8 @@ def test_get_state_for_event(self): self.state_datastore._state_group_members_cache, group, state_filter=StateFilter( - types={EventTypes.Member: {e5.state_key}}, include_others=True + types=frozendict({EventTypes.Member: frozenset({e5.state_key})}), + include_others=True, ), ) @@ -454,7 +464,8 @@ def test_get_state_for_event(self): self.state_datastore._state_group_cache, group, state_filter=StateFilter( - types={EventTypes.Member: {e5.state_key}}, include_others=False + types=frozendict({EventTypes.Member: frozenset({e5.state_key})}), + include_others=False, ), ) @@ -465,7 +476,8 @@ def test_get_state_for_event(self): self.state_datastore._state_group_members_cache, group, state_filter=StateFilter( - types={EventTypes.Member: {e5.state_key}}, include_others=False + types=frozendict({EventTypes.Member: frozenset({e5.state_key})}), + include_others=False, ), ) From 1c555527b351a8b0dcdf54ba7091141347af2a73 Mon Sep 17 00:00:00 2001 From: Eric Eastwood <erice@element.io> Date: Wed, 15 Sep 2021 03:30:58 -0500 Subject: [PATCH 05/74] Split out `/batch_send` meta events to their own fields (MSC2716) (#10777) --- changelog.d/10777.misc | 1 + synapse/rest/client/room_batch.py | 29 ++++++++++++++++++----------- 2 files changed, 19 insertions(+), 11 deletions(-) create mode 100644 changelog.d/10777.misc diff --git a/changelog.d/10777.misc b/changelog.d/10777.misc new file mode 100644 index 000000000..aed78a16f --- /dev/null +++ b/changelog.d/10777.misc @@ -0,0 +1 @@ +Split out [MSC2716](https://github.com/matrix-org/matrix-doc/pull/2716) meta events to their own fields in the `/batch_send` response. diff --git a/synapse/rest/client/room_batch.py b/synapse/rest/client/room_batch.py index ed9697844..783fecf19 100644 --- a/synapse/rest/client/room_batch.py +++ b/synapse/rest/client/room_batch.py @@ -14,6 +14,7 @@ import logging import re +from http import HTTPStatus from typing import TYPE_CHECKING, Awaitable, List, Tuple from twisted.web.server import Request @@ -179,7 +180,7 @@ async def on_POST( if not requester.app_service: raise AuthError( - 403, + HTTPStatus.FORBIDDEN, "Only application services can use the /batchsend endpoint", ) @@ -192,7 +193,7 @@ async def on_POST( if prev_events_from_query is None: raise SynapseError( - 400, + HTTPStatus.BAD_REQUEST, "prev_event query parameter is required when inserting historical messages back in time", errcode=Codes.MISSING_PARAM, ) @@ -213,7 +214,7 @@ async def on_POST( prev_state_ids = list(prev_state_map.values()) auth_event_ids = prev_state_ids - state_events_at_start = [] + state_event_ids_at_start = [] for state_event in body["state_events_at_start"]: assert_params_in_dict( state_event, ["type", "origin_server_ts", "content", "sender"] @@ -279,7 +280,7 @@ async def on_POST( ) event_id = event.event_id - state_events_at_start.append(event_id) + state_event_ids_at_start.append(event_id) auth_event_ids.append(event_id) events_to_create = body["events"] @@ -424,20 +425,26 @@ async def on_POST( context=context, ) - # Add the base_insertion_event to the bottom of the list we return - if base_insertion_event is not None: - event_ids.append(base_insertion_event.event_id) + insertion_event_id = event_ids[0] + chunk_event_id = event_ids[-1] + historical_event_ids = event_ids[1:-1] - return 200, { - "state_events": state_events_at_start, - "events": event_ids, + response_dict = { + "state_event_ids": state_event_ids_at_start, + "event_ids": historical_event_ids, "next_chunk_id": insertion_event["content"][ EventContentFields.MSC2716_NEXT_CHUNK_ID ], + "insertion_event_id": insertion_event_id, + "chunk_event_id": chunk_event_id, } + if base_insertion_event is not None: + response_dict["base_insertion_event_id"] = base_insertion_event.event_id + + return HTTPStatus.OK, response_dict def on_GET(self, request: Request, room_id: str) -> Tuple[int, str]: - return 501, "Not implemented" + return HTTPStatus.NOT_IMPLEMENTED, "Not implemented" def on_PUT( self, request: SynapseRequest, room_id: str From 145c006ef76ab3955fb8294203cb8e6e61372cd1 Mon Sep 17 00:00:00 2001 From: Eric Eastwood <erice@element.io> Date: Wed, 15 Sep 2021 03:34:30 -0500 Subject: [PATCH 06/74] Verify `?chunk_id` actually corresponds to an insertion event that exists (MSC2716) (#10776) --- changelog.d/10776.feature | 1 + synapse/rest/client/room_batch.py | 13 ++++++- synapse/storage/databases/main/__init__.py | 2 ++ synapse/storage/databases/main/room_batch.py | 36 ++++++++++++++++++++ 4 files changed, 51 insertions(+), 1 deletion(-) create mode 100644 changelog.d/10776.feature create mode 100644 synapse/storage/databases/main/room_batch.py diff --git a/changelog.d/10776.feature b/changelog.d/10776.feature new file mode 100644 index 000000000..aec0685a3 --- /dev/null +++ b/changelog.d/10776.feature @@ -0,0 +1 @@ +Only allow the [MSC2716](https://github.com/matrix-org/matrix-doc/pull/2716) `/batch_send?chunk_id=xxx` endpoint to connect to an already existing insertion event. diff --git a/synapse/rest/client/room_batch.py b/synapse/rest/client/room_batch.py index 783fecf19..d466edeec 100644 --- a/synapse/rest/client/room_batch.py +++ b/synapse/rest/client/room_batch.py @@ -300,7 +300,18 @@ async def on_POST( # event, which causes the HS to ask for the state at the start of # the chunk later. prev_event_ids = [fake_prev_event_id] - # TODO: Verify the chunk_id_from_query corresponds to an insertion event + + # Verify the chunk_id_from_query corresponds to an actual insertion event + # and have the chunk connected. + corresponding_insertion_event_id = ( + await self.store.get_insertion_event_by_chunk_id(chunk_id_from_query) + ) + if corresponding_insertion_event_id is None: + raise SynapseError( + 400, + "No insertion event corresponds to the given ?chunk_id", + errcode=Codes.INVALID_PARAM, + ) pass # Otherwise, create an insertion event to act as a starting point. # diff --git a/synapse/storage/databases/main/__init__.py b/synapse/storage/databases/main/__init__.py index 1dc347f0c..5c21402de 100644 --- a/synapse/storage/databases/main/__init__.py +++ b/synapse/storage/databases/main/__init__.py @@ -61,6 +61,7 @@ from .rejections import RejectionsStore from .relations import RelationsStore from .room import RoomStore +from .room_batch import RoomBatchStore from .roommember import RoomMemberStore from .search import SearchStore from .session import SessionStore @@ -81,6 +82,7 @@ class DataStore( EventsBackgroundUpdatesStore, RoomMemberStore, RoomStore, + RoomBatchStore, RegistrationStore, StreamStore, ProfileStore, diff --git a/synapse/storage/databases/main/room_batch.py b/synapse/storage/databases/main/room_batch.py new file mode 100644 index 000000000..54fa361d3 --- /dev/null +++ b/synapse/storage/databases/main/room_batch.py @@ -0,0 +1,36 @@ +# Copyright 2021 The Matrix.org Foundation C.I.C. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from typing import Optional + +from synapse.storage._base import SQLBaseStore + + +class RoomBatchStore(SQLBaseStore): + async def get_insertion_event_by_chunk_id(self, chunk_id: str) -> Optional[str]: + """Retrieve a insertion event ID. + + Args: + chunk_id: The chunk ID of the insertion event to retrieve. + + Returns: + The event_id of an insertion event, or None if there is no known + insertion event for the given insertion event. + """ + return await self.db_pool.simple_select_one_onecol( + table="insertion_events", + keyvalues={"next_chunk_id": chunk_id}, + retcol="event_id", + allow_none=True, + ) From 8c7a531e277f98ac6b7981b9738649f3a70feb94 Mon Sep 17 00:00:00 2001 From: Patrick Cloke <clokep@users.noreply.github.com> Date: Wed, 15 Sep 2021 08:34:52 -0400 Subject: [PATCH 07/74] Use direct references for some configuration variables (part 2) (#10812) --- changelog.d/10812.misc | 1 + synapse/api/auth.py | 4 ++-- synapse/api/auth_blocking.py | 16 +++++++++------- synapse/crypto/context_factory.py | 8 ++++---- synapse/crypto/keyring.py | 2 +- synapse/federation/federation_server.py | 2 +- synapse/federation/sender/__init__.py | 2 +- synapse/handlers/initial_sync.py | 2 +- synapse/handlers/presence.py | 12 ++++++------ synapse/handlers/sync.py | 2 +- synapse/http/client.py | 7 +++++-- synapse/push/httppusher.py | 2 +- synapse/push/mailer.py | 10 +++++----- synapse/push/pusher.py | 8 ++++---- synapse/push/pusherpool.py | 2 +- synapse/server.py | 16 ++++++++-------- 16 files changed, 51 insertions(+), 45 deletions(-) create mode 100644 changelog.d/10812.misc diff --git a/changelog.d/10812.misc b/changelog.d/10812.misc new file mode 100644 index 000000000..586a0b3a9 --- /dev/null +++ b/changelog.d/10812.misc @@ -0,0 +1 @@ +Use direct references to config flags. diff --git a/synapse/api/auth.py b/synapse/api/auth.py index 05699714e..e6ca9232e 100644 --- a/synapse/api/auth.py +++ b/synapse/api/auth.py @@ -70,8 +70,8 @@ def __init__(self, hs: "HomeServer"): self._auth_blocking = AuthBlocking(self.hs) - self._track_appservice_user_ips = hs.config.track_appservice_user_ips - self._macaroon_secret_key = hs.config.macaroon_secret_key + self._track_appservice_user_ips = hs.config.appservice.track_appservice_user_ips + self._macaroon_secret_key = hs.config.key.macaroon_secret_key self._force_tracing_for_users = hs.config.tracing.force_tracing_for_users async def check_user_in_room( diff --git a/synapse/api/auth_blocking.py b/synapse/api/auth_blocking.py index e6bced93d..a3b95f4de 100644 --- a/synapse/api/auth_blocking.py +++ b/synapse/api/auth_blocking.py @@ -30,13 +30,15 @@ class AuthBlocking: def __init__(self, hs: "HomeServer"): self.store = hs.get_datastore() - self._server_notices_mxid = hs.config.server_notices_mxid - self._hs_disabled = hs.config.hs_disabled - self._hs_disabled_message = hs.config.hs_disabled_message - self._admin_contact = hs.config.admin_contact - self._max_mau_value = hs.config.max_mau_value - self._limit_usage_by_mau = hs.config.limit_usage_by_mau - self._mau_limits_reserved_threepids = hs.config.mau_limits_reserved_threepids + self._server_notices_mxid = hs.config.servernotices.server_notices_mxid + self._hs_disabled = hs.config.server.hs_disabled + self._hs_disabled_message = hs.config.server.hs_disabled_message + self._admin_contact = hs.config.server.admin_contact + self._max_mau_value = hs.config.server.max_mau_value + self._limit_usage_by_mau = hs.config.server.limit_usage_by_mau + self._mau_limits_reserved_threepids = ( + hs.config.server.mau_limits_reserved_threepids + ) self._server_name = hs.hostname self._track_appservice_user_ips = hs.config.appservice.track_appservice_user_ips diff --git a/synapse/crypto/context_factory.py b/synapse/crypto/context_factory.py index c644b4dfc..d310976fe 100644 --- a/synapse/crypto/context_factory.py +++ b/synapse/crypto/context_factory.py @@ -102,7 +102,7 @@ def __init__(self, config): self._config = config # Check if we're using a custom list of a CA certificates - trust_root = config.federation_ca_trust_root + trust_root = config.tls.federation_ca_trust_root if trust_root is None: # Use CA root certs provided by OpenSSL trust_root = platformTrust() @@ -113,7 +113,7 @@ def __init__(self, config): # moving to TLS 1.2 by default, we want to respect the config option if # it is set to 1.0 (which the alternate option, raiseMinimumTo, will not # let us do). - minTLS = _TLS_VERSION_MAP[config.federation_client_minimum_tls_version] + minTLS = _TLS_VERSION_MAP[config.tls.federation_client_minimum_tls_version] _verify_ssl = CertificateOptions( trustRoot=trust_root, insecurelyLowerMinimumTo=minTLS @@ -125,10 +125,10 @@ def __init__(self, config): self._no_verify_ssl_context = _no_verify_ssl.getContext() self._no_verify_ssl_context.set_info_callback(_context_info_cb) - self._should_verify = self._config.federation_verify_certificates + self._should_verify = self._config.tls.federation_verify_certificates self._federation_certificate_verification_whitelist = ( - self._config.federation_certificate_verification_whitelist + self._config.tls.federation_certificate_verification_whitelist ) def get_options(self, host: bytes): diff --git a/synapse/crypto/keyring.py b/synapse/crypto/keyring.py index 9e9b1c1c8..e1e13a241 100644 --- a/synapse/crypto/keyring.py +++ b/synapse/crypto/keyring.py @@ -572,7 +572,7 @@ def __init__(self, hs: "HomeServer"): super().__init__(hs) self.clock = hs.get_clock() self.client = hs.get_federation_http_client() - self.key_servers = self.config.key_servers + self.key_servers = self.config.key.key_servers async def _fetch_keys( self, keys_to_fetch: List[_FetchKeyRequest] diff --git a/synapse/federation/federation_server.py b/synapse/federation/federation_server.py index 214ee948f..638959cbe 100644 --- a/synapse/federation/federation_server.py +++ b/synapse/federation/federation_server.py @@ -1237,7 +1237,7 @@ def register_instances_for_edu( self._edu_type_to_instance[edu_type] = instance_names async def on_edu(self, edu_type: str, origin: str, content: dict) -> None: - if not self.config.use_presence and edu_type == EduTypes.Presence: + if not self.config.server.use_presence and edu_type == EduTypes.Presence: return # Check if we have a handler on this instance diff --git a/synapse/federation/sender/__init__.py b/synapse/federation/sender/__init__.py index 4671ac024..720d7bd74 100644 --- a/synapse/federation/sender/__init__.py +++ b/synapse/federation/sender/__init__.py @@ -594,7 +594,7 @@ def send_presence_to_destinations( destinations (list[str]) """ - if not states or not self.hs.config.use_presence: + if not states or not self.hs.config.server.use_presence: # No-op if presence is disabled. return diff --git a/synapse/handlers/initial_sync.py b/synapse/handlers/initial_sync.py index 4e8f7f1d8..0b24b40eb 100644 --- a/synapse/handlers/initial_sync.py +++ b/synapse/handlers/initial_sync.py @@ -413,7 +413,7 @@ async def _room_initial_sync_joined( async def get_presence(): # If presence is disabled, return an empty list - if not self.hs.config.use_presence: + if not self.hs.config.server.use_presence: return [] states = await presence_handler.get_states( diff --git a/synapse/handlers/presence.py b/synapse/handlers/presence.py index 39b39cd3e..4ab962a84 100644 --- a/synapse/handlers/presence.py +++ b/synapse/handlers/presence.py @@ -374,7 +374,7 @@ def __init__(self, hs: "HomeServer"): self._presence_writer_instance = hs.config.worker.writers.presence[0] - self._presence_enabled = hs.config.use_presence + self._presence_enabled = hs.config.server.use_presence # Route presence EDUs to the right worker hs.get_federation_registry().register_instances_for_edu( @@ -584,7 +584,7 @@ async def set_state( user_id = target_user.to_string() # If presence is disabled, no-op - if not self.hs.config.use_presence: + if not self.hs.config.server.use_presence: return # Proxy request to instance that writes presence @@ -601,7 +601,7 @@ async def bump_presence_active_time(self, user: UserID) -> None: with the app. """ # If presence is disabled, no-op - if not self.hs.config.use_presence: + if not self.hs.config.server.use_presence: return # Proxy request to instance that writes presence @@ -618,7 +618,7 @@ def __init__(self, hs: "HomeServer"): self.server_name = hs.hostname self.wheel_timer: WheelTimer[str] = WheelTimer() self.notifier = hs.get_notifier() - self._presence_enabled = hs.config.use_presence + self._presence_enabled = hs.config.server.use_presence federation_registry = hs.get_federation_registry() @@ -916,7 +916,7 @@ async def bump_presence_active_time(self, user: UserID) -> None: with the app. """ # If presence is disabled, no-op - if not self.hs.config.use_presence: + if not self.hs.config.server.use_presence: return user_id = user.to_string() @@ -949,7 +949,7 @@ async def user_syncing( """ # Override if it should affect the user's presence, if presence is # disabled. - if not self.hs.config.use_presence: + if not self.hs.config.server.use_presence: affect_presence = False if affect_presence: diff --git a/synapse/handlers/sync.py b/synapse/handlers/sync.py index e017b28cd..91d24534e 100644 --- a/synapse/handlers/sync.py +++ b/synapse/handlers/sync.py @@ -1090,7 +1090,7 @@ async def generate_sync_result( block_all_presence_data = ( since_token is None and sync_config.filter_collection.blocks_all_presence() ) - if self.hs_config.use_presence and not block_all_presence_data: + if self.hs_config.server.use_presence and not block_all_presence_data: logger.debug("Fetching presence data") await self._generate_sync_entry_for_presence( sync_result_builder, diff --git a/synapse/http/client.py b/synapse/http/client.py index c2ea51ee1..5204c3d08 100644 --- a/synapse/http/client.py +++ b/synapse/http/client.py @@ -321,8 +321,11 @@ def __init__( self.user_agent = hs.version_string self.clock = hs.get_clock() - if hs.config.user_agent_suffix: - self.user_agent = "%s %s" % (self.user_agent, hs.config.user_agent_suffix) + if hs.config.server.user_agent_suffix: + self.user_agent = "%s %s" % ( + self.user_agent, + hs.config.server.user_agent_suffix, + ) # We use this for our body producers to ensure that they use the correct # reactor. diff --git a/synapse/push/httppusher.py b/synapse/push/httppusher.py index 36aabd842..065948f98 100644 --- a/synapse/push/httppusher.py +++ b/synapse/push/httppusher.py @@ -365,7 +365,7 @@ async def _build_notification_dict( if event.type == "m.room.member" and event.is_state(): d["notification"]["membership"] = event.content["membership"] d["notification"]["user_is_target"] = event.state_key == self.user_id - if self.hs.config.push_include_content and event.content: + if self.hs.config.push.push_include_content and event.content: d["notification"]["content"] = event.content # We no longer send aliases separately, instead, we send the human diff --git a/synapse/push/mailer.py b/synapse/push/mailer.py index b89c6e6f2..e38e3c5d4 100644 --- a/synapse/push/mailer.py +++ b/synapse/push/mailer.py @@ -110,7 +110,7 @@ def __init__( self.state_handler = self.hs.get_state_handler() self.storage = hs.get_storage() self.app_name = app_name - self.email_subjects: EmailSubjectConfig = hs.config.email_subjects + self.email_subjects: EmailSubjectConfig = hs.config.email.email_subjects logger.info("Created Mailer for app_name %s" % app_name) @@ -796,8 +796,8 @@ def _make_room_link(self, room_id: str) -> str: Returns: A link to open a room in the web client. """ - if self.hs.config.email_riot_base_url: - base_url = "%s/#/room" % (self.hs.config.email_riot_base_url) + if self.hs.config.email.email_riot_base_url: + base_url = "%s/#/room" % (self.hs.config.email.email_riot_base_url) elif self.app_name == "Vector": # need /beta for Universal Links to work on iOS base_url = "https://vector.im/beta/#/room" @@ -815,9 +815,9 @@ def _make_notif_link(self, notif: Dict[str, str]) -> str: Returns: A link to open the notification in the web client. """ - if self.hs.config.email_riot_base_url: + if self.hs.config.email.email_riot_base_url: return "%s/#/room/%s/%s" % ( - self.hs.config.email_riot_base_url, + self.hs.config.email.email_riot_base_url, notif["room_id"], notif["event_id"], ) diff --git a/synapse/push/pusher.py b/synapse/push/pusher.py index 021275437..29ed346d3 100644 --- a/synapse/push/pusher.py +++ b/synapse/push/pusher.py @@ -35,12 +35,12 @@ def __init__(self, hs: "HomeServer"): "http": HttpPusher } - logger.info("email enable notifs: %r", hs.config.email_enable_notifs) - if hs.config.email_enable_notifs: + logger.info("email enable notifs: %r", hs.config.email.email_enable_notifs) + if hs.config.email.email_enable_notifs: self.mailers: Dict[str, Mailer] = {} - self._notif_template_html = hs.config.email_notif_template_html - self._notif_template_text = hs.config.email_notif_template_text + self._notif_template_html = hs.config.email.email_notif_template_html + self._notif_template_text = hs.config.email.email_notif_template_text self.pusher_types["email"] = self._create_email_pusher diff --git a/synapse/push/pusherpool.py b/synapse/push/pusherpool.py index a1436f393..26735447a 100644 --- a/synapse/push/pusherpool.py +++ b/synapse/push/pusherpool.py @@ -62,7 +62,7 @@ def __init__(self, hs: "HomeServer"): self.clock = self.hs.get_clock() # We shard the handling of push notifications by user ID. - self._pusher_shard_config = hs.config.push.pusher_shard_config + self._pusher_shard_config = hs.config.worker.pusher_shard_config self._instance_name = hs.get_instance_name() self._should_start_pushers = ( self._instance_name in self._pusher_shard_config.instances diff --git a/synapse/server.py b/synapse/server.py index 4777ef585..637eb15b7 100644 --- a/synapse/server.py +++ b/synapse/server.py @@ -392,7 +392,7 @@ def get_auth(self) -> Auth: @cache_in_self def get_http_client_context_factory(self) -> IPolicyForHTTPS: - if self.config.use_insecure_ssl_client_just_for_testing_do_not_use: + if self.config.tls.use_insecure_ssl_client_just_for_testing_do_not_use: return InsecureInterceptableContextFactory() return RegularPolicyForHTTPS() @@ -418,8 +418,8 @@ def get_proxied_blacklisted_http_client(self) -> SimpleHttpClient: """ return SimpleHttpClient( self, - ip_whitelist=self.config.ip_range_whitelist, - ip_blacklist=self.config.ip_range_blacklist, + ip_whitelist=self.config.server.ip_range_whitelist, + ip_blacklist=self.config.server.ip_range_blacklist, use_proxy=True, ) @@ -801,18 +801,18 @@ def get_outbound_redis_connection(self) -> Optional["RedisProtocol"]: logger.info( "Connecting to redis (host=%r port=%r) for external cache", - self.config.redis_host, - self.config.redis_port, + self.config.redis.redis_host, + self.config.redis.redis_port, ) return lazyConnection( hs=self, - host=self.config.redis_host, - port=self.config.redis_port, + host=self.config.redis.redis_host, + port=self.config.redis.redis_port, password=self.config.redis.redis_password, reconnect=True, ) def should_send_federation(self) -> bool: "Should this server be sending federation traffic directly?" - return self.config.send_federation + return self.config.worker.send_federation From b93259082c7d8d3fe8376a646e130213d90069dc Mon Sep 17 00:00:00 2001 From: Patrick Cloke <clokep@users.noreply.github.com> Date: Wed, 15 Sep 2021 08:45:32 -0400 Subject: [PATCH 08/74] Add missing type hints to non-client REST servlets. (#10817) Including admin, consent, key, synapse, and media. All REST servlets (the synapse.rest module) now require typed method definitions. --- changelog.d/10785.misc | 2 +- changelog.d/10817.misc | 1 + mypy.ini | 2 +- synapse/rest/__init__.py | 11 ++++-- synapse/rest/admin/devices.py | 2 +- synapse/rest/admin/server_notice_servlet.py | 2 +- synapse/rest/admin/users.py | 2 +- synapse/rest/consent/consent_resource.py | 39 ++++++++----------- synapse/rest/health.py | 3 +- synapse/rest/key/v2/__init__.py | 7 +++- synapse/rest/key/v2/local_key_resource.py | 15 ++++--- synapse/rest/key/v2/remote_key_resource.py | 30 +++++++++----- synapse/rest/media/v1/_base.py | 24 +++++++----- synapse/rest/media/v1/filepath.py | 6 +-- synapse/rest/media/v1/media_repository.py | 8 +++- synapse/rest/media/v1/media_storage.py | 32 ++++++++++++--- synapse/rest/media/v1/preview_url_resource.py | 5 ++- synapse/rest/media/v1/storage_provider.py | 4 +- synapse/rest/media/v1/thumbnailer.py | 2 +- .../rest/synapse/client/new_user_consent.py | 6 +-- synapse/rest/synapse/client/oidc/__init__.py | 6 ++- .../synapse/client/oidc/callback_resource.py | 5 ++- synapse/rest/synapse/client/pick_username.py | 9 +++-- synapse/rest/synapse/client/saml2/__init__.py | 6 ++- .../synapse/client/saml2/metadata_resource.py | 9 ++++- .../synapse/client/saml2/response_resource.py | 7 +++- synapse/rest/well_known.py | 20 +++++----- 27 files changed, 169 insertions(+), 96 deletions(-) create mode 100644 changelog.d/10817.misc diff --git a/changelog.d/10785.misc b/changelog.d/10785.misc index 3d7f91d51..39a37b90b 100644 --- a/changelog.d/10785.misc +++ b/changelog.d/10785.misc @@ -1 +1 @@ -Convert the internal `FileInfo` class to attrs and add type hints. +Add missing type hints to REST servlets. diff --git a/changelog.d/10817.misc b/changelog.d/10817.misc new file mode 100644 index 000000000..39a37b90b --- /dev/null +++ b/changelog.d/10817.misc @@ -0,0 +1 @@ +Add missing type hints to REST servlets. diff --git a/mypy.ini b/mypy.ini index 60dadc478..e9052fa01 100644 --- a/mypy.ini +++ b/mypy.ini @@ -90,7 +90,7 @@ files = tests/util/test_itertools.py, tests/util/test_stream_change_cache.py -[mypy-synapse.rest.client.*] +[mypy-synapse.rest.*] disallow_untyped_defs = True [mypy-synapse.util.batching_queue] diff --git a/synapse/rest/__init__.py b/synapse/rest/__init__.py index 3adc57612..e04af705e 100644 --- a/synapse/rest/__init__.py +++ b/synapse/rest/__init__.py @@ -12,7 +12,9 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -from synapse.http.server import JsonResource +from typing import TYPE_CHECKING + +from synapse.http.server import HttpServer, JsonResource from synapse.rest import admin from synapse.rest.client import ( account, @@ -57,6 +59,9 @@ voip, ) +if TYPE_CHECKING: + from synapse.server import HomeServer + class ClientRestResource(JsonResource): """Matrix Client API REST resource. @@ -68,12 +73,12 @@ class ClientRestResource(JsonResource): * etc """ - def __init__(self, hs): + def __init__(self, hs: "HomeServer"): JsonResource.__init__(self, hs, canonical_json=False) self.register_servlets(self, hs) @staticmethod - def register_servlets(client_resource, hs): + def register_servlets(client_resource: HttpServer, hs: "HomeServer") -> None: versions.register_servlets(hs, client_resource) # Deprecated in r0 diff --git a/synapse/rest/admin/devices.py b/synapse/rest/admin/devices.py index 5715190a7..a6fa03c90 100644 --- a/synapse/rest/admin/devices.py +++ b/synapse/rest/admin/devices.py @@ -47,7 +47,7 @@ def __init__(self, hs: "HomeServer"): self.store = hs.get_datastore() async def on_GET( - self, request: SynapseRequest, user_id, device_id: str + self, request: SynapseRequest, user_id: str, device_id: str ) -> Tuple[int, JsonDict]: await assert_requester_is_admin(self.auth, request) diff --git a/synapse/rest/admin/server_notice_servlet.py b/synapse/rest/admin/server_notice_servlet.py index f5a38c267..19f84f33f 100644 --- a/synapse/rest/admin/server_notice_servlet.py +++ b/synapse/rest/admin/server_notice_servlet.py @@ -57,7 +57,7 @@ def __init__(self, hs: "HomeServer"): self.admin_handler = hs.get_admin_handler() self.txns = HttpTransactionCache(hs) - def register(self, json_resource: HttpServer): + def register(self, json_resource: HttpServer) -> None: PATTERN = "/send_server_notice" json_resource.register_paths( "POST", admin_patterns(PATTERN + "$"), self.on_POST, self.__class__.__name__ diff --git a/synapse/rest/admin/users.py b/synapse/rest/admin/users.py index c1a1ba645..681e49182 100644 --- a/synapse/rest/admin/users.py +++ b/synapse/rest/admin/users.py @@ -419,7 +419,7 @@ def __init__(self, hs: "HomeServer"): self.nonces: Dict[str, int] = {} self.hs = hs - def _clear_old_nonces(self): + def _clear_old_nonces(self) -> None: """ Clear out old nonces that are older than NONCE_TIMEOUT. """ diff --git a/synapse/rest/consent/consent_resource.py b/synapse/rest/consent/consent_resource.py index 11f732083..06e0fbde2 100644 --- a/synapse/rest/consent/consent_resource.py +++ b/synapse/rest/consent/consent_resource.py @@ -17,17 +17,22 @@ from hashlib import sha256 from http import HTTPStatus from os import path -from typing import Dict, List +from typing import TYPE_CHECKING, Any, Dict, List import jinja2 from jinja2 import TemplateNotFound +from twisted.web.server import Request + from synapse.api.errors import NotFoundError, StoreError, SynapseError from synapse.config import ConfigError from synapse.http.server import DirectServeHtmlResource, respond_with_html from synapse.http.servlet import parse_bytes_from_args, parse_string from synapse.types import UserID +if TYPE_CHECKING: + from synapse.server import HomeServer + # language to use for the templates. TODO: figure this out from Accept-Language TEMPLATE_LANGUAGE = "en" @@ -69,11 +74,7 @@ class ConsentResource(DirectServeHtmlResource): against the user. """ - def __init__(self, hs): - """ - Args: - hs (synapse.server.HomeServer): homeserver - """ + def __init__(self, hs: "HomeServer"): super().__init__() self.hs = hs @@ -106,18 +107,14 @@ def __init__(self, hs): self._hmac_secret = hs.config.form_secret.encode("utf-8") - async def _async_render_GET(self, request): - """ - Args: - request (twisted.web.http.Request): - """ + async def _async_render_GET(self, request: Request) -> None: version = parse_string(request, "v", default=self._default_consent_version) username = parse_string(request, "u", default="") userhmac = None has_consented = False public_version = username == "" if not public_version: - args: Dict[bytes, List[bytes]] = request.args + args: Dict[bytes, List[bytes]] = request.args # type: ignore userhmac_bytes = parse_bytes_from_args(args, "h", required=True) self._check_hash(username, userhmac_bytes) @@ -147,14 +144,10 @@ async def _async_render_GET(self, request): except TemplateNotFound: raise NotFoundError("Unknown policy version") - async def _async_render_POST(self, request): - """ - Args: - request (twisted.web.http.Request): - """ + async def _async_render_POST(self, request: Request) -> None: version = parse_string(request, "v", required=True) username = parse_string(request, "u", required=True) - args: Dict[bytes, List[bytes]] = request.args + args: Dict[bytes, List[bytes]] = request.args # type: ignore userhmac = parse_bytes_from_args(args, "h", required=True) self._check_hash(username, userhmac) @@ -177,7 +170,9 @@ async def _async_render_POST(self, request): except TemplateNotFound: raise NotFoundError("success.html not found") - def _render_template(self, request, template_name, **template_args): + def _render_template( + self, request: Request, template_name: str, **template_args: Any + ) -> None: # get_template checks for ".." so we don't need to worry too much # about path traversal here. template_html = self._jinja_env.get_template( @@ -186,11 +181,11 @@ def _render_template(self, request, template_name, **template_args): html = template_html.render(**template_args) respond_with_html(request, 200, html) - def _check_hash(self, userid, userhmac): + def _check_hash(self, userid: str, userhmac: bytes) -> None: """ Args: - userid (unicode): - userhmac (bytes): + userid: + userhmac: Raises: SynapseError if the hash doesn't match diff --git a/synapse/rest/health.py b/synapse/rest/health.py index 4487b54ab..78df7af2c 100644 --- a/synapse/rest/health.py +++ b/synapse/rest/health.py @@ -13,6 +13,7 @@ # limitations under the License. from twisted.web.resource import Resource +from twisted.web.server import Request class HealthResource(Resource): @@ -25,6 +26,6 @@ class HealthResource(Resource): isLeaf = 1 - def render_GET(self, request): + def render_GET(self, request: Request) -> bytes: request.setHeader(b"Content-Type", b"text/plain") return b"OK" diff --git a/synapse/rest/key/v2/__init__.py b/synapse/rest/key/v2/__init__.py index c6c63073e..7f8c1de1f 100644 --- a/synapse/rest/key/v2/__init__.py +++ b/synapse/rest/key/v2/__init__.py @@ -12,14 +12,19 @@ # See the License for the specific language governing permissions and # limitations under the License. +from typing import TYPE_CHECKING + from twisted.web.resource import Resource from .local_key_resource import LocalKey from .remote_key_resource import RemoteKey +if TYPE_CHECKING: + from synapse.server import HomeServer + class KeyApiV2Resource(Resource): - def __init__(self, hs): + def __init__(self, hs: "HomeServer"): Resource.__init__(self) self.putChild(b"server", LocalKey(hs)) self.putChild(b"query", RemoteKey(hs)) diff --git a/synapse/rest/key/v2/local_key_resource.py b/synapse/rest/key/v2/local_key_resource.py index 25f6eb842..ebe243bcf 100644 --- a/synapse/rest/key/v2/local_key_resource.py +++ b/synapse/rest/key/v2/local_key_resource.py @@ -12,16 +12,21 @@ # See the License for the specific language governing permissions and # limitations under the License. - import logging +from typing import TYPE_CHECKING from canonicaljson import encode_canonical_json from signedjson.sign import sign_json from unpaddedbase64 import encode_base64 from twisted.web.resource import Resource +from twisted.web.server import Request from synapse.http.server import respond_with_json_bytes +from synapse.types import JsonDict + +if TYPE_CHECKING: + from synapse.server import HomeServer logger = logging.getLogger(__name__) @@ -58,18 +63,18 @@ class LocalKey(Resource): isLeaf = True - def __init__(self, hs): + def __init__(self, hs: "HomeServer"): self.config = hs.config self.clock = hs.get_clock() self.update_response_body(self.clock.time_msec()) Resource.__init__(self) - def update_response_body(self, time_now_msec): + def update_response_body(self, time_now_msec: int) -> None: refresh_interval = self.config.key_refresh_interval self.valid_until_ts = int(time_now_msec + refresh_interval) self.response_body = encode_canonical_json(self.response_json_object()) - def response_json_object(self): + def response_json_object(self) -> JsonDict: verify_keys = {} for key in self.config.signing_key: verify_key_bytes = key.verify_key.encode() @@ -94,7 +99,7 @@ def response_json_object(self): json_object = sign_json(json_object, self.config.server.server_name, key) return json_object - def render_GET(self, request): + def render_GET(self, request: Request) -> int: time_now = self.clock.time_msec() # Update the expiry time if less than half the interval remains. if time_now + self.config.key_refresh_interval / 2 > self.valid_until_ts: diff --git a/synapse/rest/key/v2/remote_key_resource.py b/synapse/rest/key/v2/remote_key_resource.py index 744360e5f..d8fd7938a 100644 --- a/synapse/rest/key/v2/remote_key_resource.py +++ b/synapse/rest/key/v2/remote_key_resource.py @@ -13,17 +13,23 @@ # limitations under the License. import logging -from typing import Dict +from typing import TYPE_CHECKING, Dict from signedjson.sign import sign_json +from twisted.web.server import Request + from synapse.api.errors import Codes, SynapseError from synapse.crypto.keyring import ServerKeyFetcher from synapse.http.server import DirectServeJsonResource, respond_with_json from synapse.http.servlet import parse_integer, parse_json_object_from_request +from synapse.types import JsonDict from synapse.util import json_decoder from synapse.util.async_helpers import yieldable_gather_results +if TYPE_CHECKING: + from synapse.server import HomeServer + logger = logging.getLogger(__name__) @@ -85,7 +91,7 @@ class RemoteKey(DirectServeJsonResource): isLeaf = True - def __init__(self, hs): + def __init__(self, hs: "HomeServer"): super().__init__() self.fetcher = ServerKeyFetcher(hs) @@ -94,7 +100,8 @@ def __init__(self, hs): self.federation_domain_whitelist = hs.config.federation_domain_whitelist self.config = hs.config - async def _async_render_GET(self, request): + async def _async_render_GET(self, request: Request) -> None: + assert request.postpath is not None if len(request.postpath) == 1: (server,) = request.postpath query: dict = {server.decode("ascii"): {}} @@ -110,14 +117,19 @@ async def _async_render_GET(self, request): await self.query_keys(request, query, query_remote_on_cache_miss=True) - async def _async_render_POST(self, request): + async def _async_render_POST(self, request: Request) -> None: content = parse_json_object_from_request(request) query = content["server_keys"] await self.query_keys(request, query, query_remote_on_cache_miss=True) - async def query_keys(self, request, query, query_remote_on_cache_miss=False): + async def query_keys( + self, + request: Request, + query: JsonDict, + query_remote_on_cache_miss: bool = False, + ) -> None: logger.info("Handling query for keys %r", query) store_queries = [] @@ -142,8 +154,8 @@ async def query_keys(self, request, query, query_remote_on_cache_miss=False): # Note that the value is unused. cache_misses: Dict[str, Dict[str, int]] = {} - for (server_name, key_id, _), results in cached.items(): - results = [(result["ts_added_ms"], result) for result in results] + for (server_name, key_id, _), key_results in cached.items(): + results = [(result["ts_added_ms"], result) for result in key_results] if not results and key_id is not None: cache_misses.setdefault(server_name, {})[key_id] = 0 @@ -230,6 +242,6 @@ async def query_keys(self, request, query, query_remote_on_cache_miss=False): signed_keys.append(key_json) - results = {"server_keys": signed_keys} + response = {"server_keys": signed_keys} - respond_with_json(request, 200, results, canonical_json=True) + respond_with_json(request, 200, response, canonical_json=True) diff --git a/synapse/rest/media/v1/_base.py b/synapse/rest/media/v1/_base.py index 814f4309f..7c881f2bd 100644 --- a/synapse/rest/media/v1/_base.py +++ b/synapse/rest/media/v1/_base.py @@ -16,7 +16,8 @@ import logging import os import urllib -from typing import Awaitable, Dict, Generator, List, Optional, Tuple +from types import TracebackType +from typing import Awaitable, Dict, Generator, List, Optional, Tuple, Type import attr @@ -122,7 +123,7 @@ def add_file_headers( upload_name: The name of the requested file, if any. """ - def _quote(x): + def _quote(x: str) -> str: return urllib.parse.quote(x.encode("utf-8")) # Default to a UTF-8 charset for text content types. @@ -282,10 +283,15 @@ def write_to_consumer(self, consumer: IConsumer) -> Awaitable: """ pass - def __enter__(self): + def __enter__(self) -> None: pass - def __exit__(self, exc_type, exc_val, exc_tb): + def __exit__( + self, + exc_type: Optional[Type[BaseException]], + exc_val: Optional[BaseException], + exc_tb: Optional[TracebackType], + ) -> None: pass @@ -317,31 +323,31 @@ class FileInfo: # The below properties exist to maintain compatibility with third-party modules. @property - def thumbnail_width(self): + def thumbnail_width(self) -> Optional[int]: if not self.thumbnail: return None return self.thumbnail.width @property - def thumbnail_height(self): + def thumbnail_height(self) -> Optional[int]: if not self.thumbnail: return None return self.thumbnail.height @property - def thumbnail_method(self): + def thumbnail_method(self) -> Optional[str]: if not self.thumbnail: return None return self.thumbnail.method @property - def thumbnail_type(self): + def thumbnail_type(self) -> Optional[str]: if not self.thumbnail: return None return self.thumbnail.type @property - def thumbnail_length(self): + def thumbnail_length(self) -> Optional[int]: if not self.thumbnail: return None return self.thumbnail.length diff --git a/synapse/rest/media/v1/filepath.py b/synapse/rest/media/v1/filepath.py index 09531ebf5..39bbe4e87 100644 --- a/synapse/rest/media/v1/filepath.py +++ b/synapse/rest/media/v1/filepath.py @@ -16,7 +16,7 @@ import functools import os import re -from typing import Callable, List +from typing import Any, Callable, List NEW_FORMAT_ID_RE = re.compile(r"^\d\d\d\d-\d\d-\d\d") @@ -27,7 +27,7 @@ def _wrap_in_base_path(func: Callable[..., str]) -> Callable[..., str]: """ @functools.wraps(func) - def _wrapped(self, *args, **kwargs): + def _wrapped(self: "MediaFilePaths", *args: Any, **kwargs: Any) -> str: path = func(self, *args, **kwargs) return os.path.join(self.base_path, path) @@ -129,7 +129,7 @@ def remote_media_thumbnail_rel( # using the new path. def remote_media_thumbnail_rel_legacy( self, server_name: str, file_id: str, width: int, height: int, content_type: str - ): + ) -> str: top_level_type, sub_type = content_type.split("/") file_name = "%i-%i-%s-%s" % (width, height, top_level_type, sub_type) return os.path.join( diff --git a/synapse/rest/media/v1/media_repository.py b/synapse/rest/media/v1/media_repository.py index 40ce8d2bc..50e4c9e29 100644 --- a/synapse/rest/media/v1/media_repository.py +++ b/synapse/rest/media/v1/media_repository.py @@ -21,6 +21,7 @@ import twisted.internet.error import twisted.web.http +from twisted.internet.defer import Deferred from twisted.web.resource import Resource from twisted.web.server import Request @@ -32,6 +33,7 @@ SynapseError, ) from synapse.config._base import ConfigError +from synapse.config.repository import ThumbnailRequirement from synapse.logging.context import defer_to_thread from synapse.metrics.background_process_metrics import run_as_background_process from synapse.types import UserID @@ -114,7 +116,7 @@ def __init__(self, hs: "HomeServer"): self._start_update_recently_accessed, UPDATE_RECENTLY_ACCESSED_TS ) - def _start_update_recently_accessed(self): + def _start_update_recently_accessed(self) -> Deferred: return run_as_background_process( "update_recently_accessed_media", self._update_recently_accessed ) @@ -469,7 +471,9 @@ async def _download_remote_file( return media_info - def _get_thumbnail_requirements(self, media_type): + def _get_thumbnail_requirements( + self, media_type: str + ) -> Tuple[ThumbnailRequirement, ...]: scpos = media_type.find(";") if scpos > 0: media_type = media_type[:scpos] diff --git a/synapse/rest/media/v1/media_storage.py b/synapse/rest/media/v1/media_storage.py index c0bb40c11..01fada8fb 100644 --- a/synapse/rest/media/v1/media_storage.py +++ b/synapse/rest/media/v1/media_storage.py @@ -15,7 +15,20 @@ import logging import os import shutil -from typing import IO, TYPE_CHECKING, Any, Callable, Optional, Sequence +from types import TracebackType +from typing import ( + IO, + TYPE_CHECKING, + Any, + Awaitable, + BinaryIO, + Callable, + Generator, + Optional, + Sequence, + Tuple, + Type, +) import attr @@ -83,12 +96,14 @@ async def store_file(self, source: IO, file_info: FileInfo) -> str: return fname - async def write_to_file(self, source: IO, output: IO): + async def write_to_file(self, source: IO, output: IO) -> None: """Asynchronously write the `source` to `output`.""" await defer_to_thread(self.reactor, _write_file_synchronously, source, output) @contextlib.contextmanager - def store_into_file(self, file_info: FileInfo): + def store_into_file( + self, file_info: FileInfo + ) -> Generator[Tuple[BinaryIO, str, Callable[[], Awaitable[None]]], None, None]: """Context manager used to get a file like object to write into, as described by file_info. @@ -125,7 +140,7 @@ def store_into_file(self, file_info: FileInfo): try: with open(fname, "wb") as f: - async def finish(): + async def finish() -> None: # Ensure that all writes have been flushed and close the # file. f.flush() @@ -315,7 +330,12 @@ def write_to_consumer(self, consumer: IConsumer) -> Deferred: FileSender().beginFileTransfer(self.open_file, consumer) ) - def __exit__(self, exc_type, exc_val, exc_tb): + def __exit__( + self, + exc_type: Optional[Type[BaseException]], + exc_val: Optional[BaseException], + exc_tb: Optional[TracebackType], + ) -> None: self.open_file.close() @@ -339,7 +359,7 @@ class ReadableFileWrapper: clock = attr.ib(type=Clock) path = attr.ib(type=str) - async def write_chunks_to(self, callback: Callable[[bytes], None]): + async def write_chunks_to(self, callback: Callable[[bytes], None]) -> None: """Reads the file in chunks and calls the callback with each chunk.""" with open(self.path, "rb") as file: diff --git a/synapse/rest/media/v1/preview_url_resource.py b/synapse/rest/media/v1/preview_url_resource.py index f108da05d..fe0627d9b 100644 --- a/synapse/rest/media/v1/preview_url_resource.py +++ b/synapse/rest/media/v1/preview_url_resource.py @@ -27,6 +27,7 @@ import attr +from twisted.internet.defer import Deferred from twisted.internet.error import DNSLookupError from twisted.web.server import Request @@ -473,7 +474,7 @@ async def _download_url(self, url: str, user: str) -> MediaInfo: etag=etag, ) - def _start_expire_url_cache_data(self): + def _start_expire_url_cache_data(self) -> Deferred: return run_as_background_process( "expire_url_cache_data", self._expire_url_cache_data ) @@ -782,7 +783,7 @@ def _calc_og(tree: "etree.Element", media_uri: str) -> Dict[str, Optional[str]]: def _iterate_over_text( - tree, *tags_to_ignore: Iterable[Union[str, "etree.Comment"]] + tree: "etree.Element", *tags_to_ignore: Iterable[Union[str, "etree.Comment"]] ) -> Generator[str, None, None]: """Iterate over the tree returning text nodes in a depth first fashion, skipping text nodes inside certain tags. diff --git a/synapse/rest/media/v1/storage_provider.py b/synapse/rest/media/v1/storage_provider.py index 0ff6ad3c0..6c9969e55 100644 --- a/synapse/rest/media/v1/storage_provider.py +++ b/synapse/rest/media/v1/storage_provider.py @@ -99,7 +99,7 @@ async def store_file(self, path: str, file_info: FileInfo) -> None: await maybe_awaitable(self.backend.store_file(path, file_info)) # type: ignore else: # TODO: Handle errors. - async def store(): + async def store() -> None: try: return await maybe_awaitable( self.backend.store_file(path, file_info) @@ -128,7 +128,7 @@ def __init__(self, hs: "HomeServer", config: str): self.cache_directory = hs.config.media_store_path self.base_directory = config - def __str__(self): + def __str__(self) -> str: return "FileStorageProviderBackend[%s]" % (self.base_directory,) async def store_file(self, path: str, file_info: FileInfo) -> None: diff --git a/synapse/rest/media/v1/thumbnailer.py b/synapse/rest/media/v1/thumbnailer.py index a65e9e180..df54a4064 100644 --- a/synapse/rest/media/v1/thumbnailer.py +++ b/synapse/rest/media/v1/thumbnailer.py @@ -41,7 +41,7 @@ class Thumbnailer: FORMATS = {"image/jpeg": "JPEG", "image/png": "PNG"} @staticmethod - def set_limits(max_image_pixels: int): + def set_limits(max_image_pixels: int) -> None: Image.MAX_IMAGE_PIXELS = max_image_pixels def __init__(self, input_path: str): diff --git a/synapse/rest/synapse/client/new_user_consent.py b/synapse/rest/synapse/client/new_user_consent.py index 67c1ed1f5..1c1c7b361 100644 --- a/synapse/rest/synapse/client/new_user_consent.py +++ b/synapse/rest/synapse/client/new_user_consent.py @@ -12,7 +12,7 @@ # See the License for the specific language governing permissions and # limitations under the License. import logging -from typing import TYPE_CHECKING +from typing import TYPE_CHECKING, Generator from twisted.web.server import Request @@ -45,7 +45,7 @@ def __init__(self, hs: "HomeServer"): self._server_name = hs.hostname self._consent_version = hs.config.consent.user_consent_version - def template_search_dirs(): + def template_search_dirs() -> Generator[str, None, None]: if hs.config.server.custom_template_directory: yield hs.config.server.custom_template_directory if hs.config.sso.sso_template_dir: @@ -88,7 +88,7 @@ async def _async_render_GET(self, request: Request) -> None: html = template.render(template_params) respond_with_html(request, 200, html) - async def _async_render_POST(self, request: Request): + async def _async_render_POST(self, request: Request) -> None: try: session_id = get_username_mapping_session_cookie_from_request(request) except SynapseError as e: diff --git a/synapse/rest/synapse/client/oidc/__init__.py b/synapse/rest/synapse/client/oidc/__init__.py index 36ba40165..81fec3965 100644 --- a/synapse/rest/synapse/client/oidc/__init__.py +++ b/synapse/rest/synapse/client/oidc/__init__.py @@ -13,16 +13,20 @@ # limitations under the License. import logging +from typing import TYPE_CHECKING from twisted.web.resource import Resource from synapse.rest.synapse.client.oidc.callback_resource import OIDCCallbackResource +if TYPE_CHECKING: + from synapse.server import HomeServer + logger = logging.getLogger(__name__) class OIDCResource(Resource): - def __init__(self, hs): + def __init__(self, hs: "HomeServer"): Resource.__init__(self) self.putChild(b"callback", OIDCCallbackResource(hs)) diff --git a/synapse/rest/synapse/client/oidc/callback_resource.py b/synapse/rest/synapse/client/oidc/callback_resource.py index 7785f17e9..4f375cb74 100644 --- a/synapse/rest/synapse/client/oidc/callback_resource.py +++ b/synapse/rest/synapse/client/oidc/callback_resource.py @@ -16,6 +16,7 @@ from typing import TYPE_CHECKING from synapse.http.server import DirectServeHtmlResource +from synapse.http.site import SynapseRequest if TYPE_CHECKING: from synapse.server import HomeServer @@ -30,10 +31,10 @@ def __init__(self, hs: "HomeServer"): super().__init__() self._oidc_handler = hs.get_oidc_handler() - async def _async_render_GET(self, request): + async def _async_render_GET(self, request: SynapseRequest) -> None: await self._oidc_handler.handle_oidc_callback(request) - async def _async_render_POST(self, request): + async def _async_render_POST(self, request: SynapseRequest) -> None: # the auth response can be returned via an x-www-form-urlencoded form instead # of GET params, as per # https://openid.net/specs/oauth-v2-form-post-response-mode-1_0.html. diff --git a/synapse/rest/synapse/client/pick_username.py b/synapse/rest/synapse/client/pick_username.py index d30b478b9..28ae08349 100644 --- a/synapse/rest/synapse/client/pick_username.py +++ b/synapse/rest/synapse/client/pick_username.py @@ -13,7 +13,7 @@ # limitations under the License. import logging -from typing import TYPE_CHECKING, List +from typing import TYPE_CHECKING, Generator, List, Tuple from twisted.web.resource import Resource from twisted.web.server import Request @@ -27,6 +27,7 @@ ) from synapse.http.servlet import parse_boolean, parse_string from synapse.http.site import SynapseRequest +from synapse.types import JsonDict from synapse.util.templates import build_jinja_env if TYPE_CHECKING: @@ -57,7 +58,7 @@ def __init__(self, hs: "HomeServer"): super().__init__() self._sso_handler = hs.get_sso_handler() - async def _async_render_GET(self, request: Request): + async def _async_render_GET(self, request: Request) -> Tuple[int, JsonDict]: localpart = parse_string(request, "username", required=True) session_id = get_username_mapping_session_cookie_from_request(request) @@ -73,7 +74,7 @@ def __init__(self, hs: "HomeServer"): super().__init__() self._sso_handler = hs.get_sso_handler() - def template_search_dirs(): + def template_search_dirs() -> Generator[str, None, None]: if hs.config.server.custom_template_directory: yield hs.config.server.custom_template_directory if hs.config.sso.sso_template_dir: @@ -104,7 +105,7 @@ async def _async_render_GET(self, request: Request) -> None: html = template.render(template_params) respond_with_html(request, 200, html) - async def _async_render_POST(self, request: SynapseRequest): + async def _async_render_POST(self, request: SynapseRequest) -> None: # This will always be set by the time Twisted calls us. assert request.args is not None diff --git a/synapse/rest/synapse/client/saml2/__init__.py b/synapse/rest/synapse/client/saml2/__init__.py index 781ccb237..3f247e6a2 100644 --- a/synapse/rest/synapse/client/saml2/__init__.py +++ b/synapse/rest/synapse/client/saml2/__init__.py @@ -13,17 +13,21 @@ # limitations under the License. import logging +from typing import TYPE_CHECKING from twisted.web.resource import Resource from synapse.rest.synapse.client.saml2.metadata_resource import SAML2MetadataResource from synapse.rest.synapse.client.saml2.response_resource import SAML2ResponseResource +if TYPE_CHECKING: + from synapse.server import HomeServer + logger = logging.getLogger(__name__) class SAML2Resource(Resource): - def __init__(self, hs): + def __init__(self, hs: "HomeServer"): Resource.__init__(self) self.putChild(b"metadata.xml", SAML2MetadataResource(hs)) self.putChild(b"authn_response", SAML2ResponseResource(hs)) diff --git a/synapse/rest/synapse/client/saml2/metadata_resource.py b/synapse/rest/synapse/client/saml2/metadata_resource.py index b37c7083d..64378ed57 100644 --- a/synapse/rest/synapse/client/saml2/metadata_resource.py +++ b/synapse/rest/synapse/client/saml2/metadata_resource.py @@ -12,10 +12,15 @@ # See the License for the specific language governing permissions and # limitations under the License. +from typing import TYPE_CHECKING import saml2.metadata from twisted.web.resource import Resource +from twisted.web.server import Request + +if TYPE_CHECKING: + from synapse.server import HomeServer class SAML2MetadataResource(Resource): @@ -23,11 +28,11 @@ class SAML2MetadataResource(Resource): isLeaf = 1 - def __init__(self, hs): + def __init__(self, hs: "HomeServer"): Resource.__init__(self) self.sp_config = hs.config.saml2_sp_config - def render_GET(self, request): + def render_GET(self, request: Request) -> bytes: metadata_xml = saml2.metadata.create_metadata_string( configfile=None, config=self.sp_config ) diff --git a/synapse/rest/synapse/client/saml2/response_resource.py b/synapse/rest/synapse/client/saml2/response_resource.py index 774ccd870..47d2a6a22 100644 --- a/synapse/rest/synapse/client/saml2/response_resource.py +++ b/synapse/rest/synapse/client/saml2/response_resource.py @@ -15,7 +15,10 @@ from typing import TYPE_CHECKING +from twisted.web.server import Request + from synapse.http.server import DirectServeHtmlResource +from synapse.http.site import SynapseRequest if TYPE_CHECKING: from synapse.server import HomeServer @@ -31,7 +34,7 @@ def __init__(self, hs: "HomeServer"): self._saml_handler = hs.get_saml_handler() self._sso_handler = hs.get_sso_handler() - async def _async_render_GET(self, request): + async def _async_render_GET(self, request: Request) -> None: # We're not expecting any GET request on that resource if everything goes right, # but some IdPs sometimes end up responding with a 302 redirect on this endpoint. # In this case, just tell the user that something went wrong and they should @@ -40,5 +43,5 @@ async def _async_render_GET(self, request): request, "unexpected_get", "Unexpected GET request on /saml2/authn_response" ) - async def _async_render_POST(self, request): + async def _async_render_POST(self, request: SynapseRequest) -> None: await self._saml_handler.handle_saml_response(request) diff --git a/synapse/rest/well_known.py b/synapse/rest/well_known.py index 6a66a88c5..c80a3a99a 100644 --- a/synapse/rest/well_known.py +++ b/synapse/rest/well_known.py @@ -13,26 +13,26 @@ # limitations under the License. import logging +from typing import TYPE_CHECKING, Optional from twisted.web.resource import Resource +from twisted.web.server import Request from synapse.http.server import set_cors_headers +from synapse.types import JsonDict from synapse.util import json_encoder +if TYPE_CHECKING: + from synapse.server import HomeServer + logger = logging.getLogger(__name__) class WellKnownBuilder: - """Utility to construct the well-known response - - Args: - hs (synapse.server.HomeServer): - """ - - def __init__(self, hs): + def __init__(self, hs: "HomeServer"): self._config = hs.config - def get_well_known(self): + def get_well_known(self) -> Optional[JsonDict]: # if we don't have a public_baseurl, we can't help much here. if self._config.server.public_baseurl is None: return None @@ -52,11 +52,11 @@ class WellKnownResource(Resource): isLeaf = 1 - def __init__(self, hs): + def __init__(self, hs: "HomeServer"): Resource.__init__(self) self._well_known_builder = WellKnownBuilder(hs) - def render_GET(self, request): + def render_GET(self, request: Request) -> bytes: set_cors_headers(request) r = self._well_known_builder.get_well_known() if not r: From 3eba047d388fd0d798229a0779f343dbda8a2887 Mon Sep 17 00:00:00 2001 From: Patrick Cloke <clokep@users.noreply.github.com> Date: Wed, 15 Sep 2021 09:54:13 -0400 Subject: [PATCH 09/74] Add type hints to state database module. (#10823) --- changelog.d/10823.misc | 1 + mypy.ini | 1 + synapse/storage/databases/state/bg_updates.py | 60 +++++--- synapse/storage/databases/state/store.py | 136 +++++++++++------- synapse/storage/state.py | 3 +- synapse/util/caches/dictionary_cache.py | 4 +- 6 files changed, 133 insertions(+), 72 deletions(-) create mode 100644 changelog.d/10823.misc diff --git a/changelog.d/10823.misc b/changelog.d/10823.misc new file mode 100644 index 000000000..053296990 --- /dev/null +++ b/changelog.d/10823.misc @@ -0,0 +1 @@ +Add type hints to the state database. diff --git a/mypy.ini b/mypy.ini index e9052fa01..b21e1555a 100644 --- a/mypy.ini +++ b/mypy.ini @@ -60,6 +60,7 @@ files = synapse/storage/databases/main/session.py, synapse/storage/databases/main/stream.py, synapse/storage/databases/main/ui_auth.py, + synapse/storage/databases/state, synapse/storage/database.py, synapse/storage/engines, synapse/storage/keys.py, diff --git a/synapse/storage/databases/state/bg_updates.py b/synapse/storage/databases/state/bg_updates.py index c2891cb07..eb1118d2c 100644 --- a/synapse/storage/databases/state/bg_updates.py +++ b/synapse/storage/databases/state/bg_updates.py @@ -13,12 +13,20 @@ # limitations under the License. import logging -from typing import Optional +from typing import TYPE_CHECKING, Dict, List, Mapping, Optional, Tuple, Union from synapse.storage._base import SQLBaseStore -from synapse.storage.database import DatabasePool +from synapse.storage.database import ( + DatabasePool, + LoggingDatabaseConnection, + LoggingTransaction, +) from synapse.storage.engines import PostgresEngine from synapse.storage.state import StateFilter +from synapse.types import MutableStateMap, StateMap + +if TYPE_CHECKING: + from synapse.server import HomeServer logger = logging.getLogger(__name__) @@ -31,7 +39,9 @@ class StateGroupBackgroundUpdateStore(SQLBaseStore): updates. """ - def _count_state_group_hops_txn(self, txn, state_group): + def _count_state_group_hops_txn( + self, txn: LoggingTransaction, state_group: int + ) -> int: """Given a state group, count how many hops there are in the tree. This is used to ensure the delta chains don't get too long. @@ -56,7 +66,7 @@ def _count_state_group_hops_txn(self, txn, state_group): else: # We don't use WITH RECURSIVE on sqlite3 as there are distributions # that ship with an sqlite3 version that doesn't support it (e.g. wheezy) - next_group = state_group + next_group: Optional[int] = state_group count = 0 while next_group: @@ -73,11 +83,14 @@ def _count_state_group_hops_txn(self, txn, state_group): return count def _get_state_groups_from_groups_txn( - self, txn, groups, state_filter: Optional[StateFilter] = None - ): + self, + txn: LoggingTransaction, + groups: List[int], + state_filter: Optional[StateFilter] = None, + ) -> Mapping[int, StateMap[str]]: state_filter = state_filter or StateFilter.all() - results = {group: {} for group in groups} + results: Dict[int, MutableStateMap[str]] = {group: {} for group in groups} where_clause, where_args = state_filter.make_sql_filter_clause() @@ -117,7 +130,7 @@ def _get_state_groups_from_groups_txn( """ for group in groups: - args = [group] + args: List[Union[int, str]] = [group] args.extend(where_args) txn.execute(sql % (where_clause,), args) @@ -131,7 +144,7 @@ def _get_state_groups_from_groups_txn( # We don't use WITH RECURSIVE on sqlite3 as there are distributions # that ship with an sqlite3 version that doesn't support it (e.g. wheezy) for group in groups: - next_group = group + next_group: Optional[int] = group while next_group: # We did this before by getting the list of group ids, and @@ -173,6 +186,7 @@ def _get_state_groups_from_groups_txn( allow_none=True, ) + # The results shouldn't be considered mutable. return results @@ -182,7 +196,12 @@ class StateBackgroundUpdateStore(StateGroupBackgroundUpdateStore): STATE_GROUP_INDEX_UPDATE_NAME = "state_group_state_type_index" STATE_GROUPS_ROOM_INDEX_UPDATE_NAME = "state_groups_room_id_idx" - def __init__(self, database: DatabasePool, db_conn, hs): + def __init__( + self, + database: DatabasePool, + db_conn: LoggingDatabaseConnection, + hs: "HomeServer", + ): super().__init__(database, db_conn, hs) self.db_pool.updates.register_background_update_handler( self.STATE_GROUP_DEDUPLICATION_UPDATE_NAME, @@ -198,7 +217,9 @@ def __init__(self, database: DatabasePool, db_conn, hs): columns=["room_id"], ) - async def _background_deduplicate_state(self, progress, batch_size): + async def _background_deduplicate_state( + self, progress: dict, batch_size: int + ) -> int: """This background update will slowly deduplicate state by reencoding them as deltas. """ @@ -218,7 +239,7 @@ async def _background_deduplicate_state(self, progress, batch_size): ) max_group = rows[0][0] - def reindex_txn(txn): + def reindex_txn(txn: LoggingTransaction) -> Tuple[bool, int]: new_last_state_group = last_state_group for count in range(batch_size): txn.execute( @@ -251,7 +272,8 @@ def reindex_txn(txn): " WHERE id < ? AND room_id = ?", (state_group, room_id), ) - (prev_group,) = txn.fetchone() + # There will be a result due to the coalesce. + (prev_group,) = txn.fetchone() # type: ignore new_last_state_group = state_group if prev_group: @@ -261,15 +283,15 @@ def reindex_txn(txn): # otherwise read performance degrades. continue - prev_state = self._get_state_groups_from_groups_txn( + prev_state_by_group = self._get_state_groups_from_groups_txn( txn, [prev_group] ) - prev_state = prev_state[prev_group] + prev_state = prev_state_by_group[prev_group] - curr_state = self._get_state_groups_from_groups_txn( + curr_state_by_group = self._get_state_groups_from_groups_txn( txn, [state_group] ) - curr_state = curr_state[state_group] + curr_state = curr_state_by_group[state_group] if not set(prev_state.keys()) - set(curr_state.keys()): # We can only do a delta if the current has a strict super set @@ -340,8 +362,8 @@ def reindex_txn(txn): return result * BATCH_SIZE_SCALE_FACTOR - async def _background_index_state(self, progress, batch_size): - def reindex_txn(conn): + async def _background_index_state(self, progress: dict, batch_size: int) -> int: + def reindex_txn(conn: LoggingDatabaseConnection) -> None: conn.rollback() if isinstance(self.database_engine, PostgresEngine): # postgres insists on autocommit for the index diff --git a/synapse/storage/databases/state/store.py b/synapse/storage/databases/state/store.py index f839c0c24..f1e3a27e6 100644 --- a/synapse/storage/databases/state/store.py +++ b/synapse/storage/databases/state/store.py @@ -13,43 +13,56 @@ # limitations under the License. import logging -from collections import namedtuple -from typing import Dict, Iterable, List, Optional, Set, Tuple +from typing import TYPE_CHECKING, Collection, Dict, Iterable, List, Optional, Set, Tuple + +import attr from synapse.api.constants import EventTypes from synapse.storage._base import SQLBaseStore -from synapse.storage.database import DatabasePool +from synapse.storage.database import ( + DatabasePool, + LoggingDatabaseConnection, + LoggingTransaction, +) from synapse.storage.databases.state.bg_updates import StateBackgroundUpdateStore from synapse.storage.state import StateFilter from synapse.storage.types import Cursor from synapse.storage.util.sequence import build_sequence_generator -from synapse.types import MutableStateMap, StateMap +from synapse.types import MutableStateMap, StateKey, StateMap from synapse.util.caches.descriptors import cached from synapse.util.caches.dictionary_cache import DictionaryCache +if TYPE_CHECKING: + from synapse.server import HomeServer + logger = logging.getLogger(__name__) MAX_STATE_DELTA_HOPS = 100 -class _GetStateGroupDelta( - namedtuple("_GetStateGroupDelta", ("prev_group", "delta_ids")) -): +@attr.s(slots=True, frozen=True, auto_attribs=True) +class _GetStateGroupDelta: """Return type of get_state_group_delta that implements __len__, which lets - us use the itrable flag when caching + us use the iterable flag when caching """ - __slots__ = [] + prev_group: Optional[int] + delta_ids: Optional[StateMap[str]] - def __len__(self): + def __len__(self) -> int: return len(self.delta_ids) if self.delta_ids else 0 class StateGroupDataStore(StateBackgroundUpdateStore, SQLBaseStore): """A data store for fetching/storing state groups.""" - def __init__(self, database: DatabasePool, db_conn, hs): + def __init__( + self, + database: DatabasePool, + db_conn: LoggingDatabaseConnection, + hs: "HomeServer", + ): super().__init__(database, db_conn, hs) # Originally the state store used a single DictionaryCache to cache the @@ -81,19 +94,21 @@ def __init__(self, database: DatabasePool, db_conn, hs): # We size the non-members cache to be smaller than the members cache as the # vast majority of state in Matrix (today) is member events. - self._state_group_cache = DictionaryCache( + self._state_group_cache: DictionaryCache[int, StateKey, str] = DictionaryCache( "*stateGroupCache*", # TODO: this hasn't been tuned yet 50000, ) - self._state_group_members_cache = DictionaryCache( + self._state_group_members_cache: DictionaryCache[ + int, StateKey, str + ] = DictionaryCache( "*stateGroupMembersCache*", 500000, ) - def get_max_state_group_txn(txn: Cursor): + def get_max_state_group_txn(txn: Cursor) -> int: txn.execute("SELECT COALESCE(max(id), 0) FROM state_groups") - return txn.fetchone()[0] + return txn.fetchone()[0] # type: ignore self._state_group_seq_gen = build_sequence_generator( db_conn, @@ -105,15 +120,15 @@ def get_max_state_group_txn(txn: Cursor): ) @cached(max_entries=10000, iterable=True) - async def get_state_group_delta(self, state_group): + async def get_state_group_delta(self, state_group: int) -> _GetStateGroupDelta: """Given a state group try to return a previous group and a delta between the old and the new. Returns: - (prev_group, delta_ids), where both may be None. + _GetStateGroupDelta containing prev_group and delta_ids, where both may be None. """ - def _get_state_group_delta_txn(txn): + def _get_state_group_delta_txn(txn: LoggingTransaction) -> _GetStateGroupDelta: prev_group = self.db_pool.simple_select_one_onecol_txn( txn, table="state_group_edges", @@ -154,7 +169,7 @@ async def _get_state_groups_from_groups( Returns: Dict of state group to state map. """ - results = {} + results: Dict[int, StateMap[str]] = {} chunks = [groups[i : i + 100] for i in range(0, len(groups), 100)] for chunk in chunks: @@ -168,19 +183,24 @@ async def _get_state_groups_from_groups( return results - def _get_state_for_group_using_cache(self, cache, group, state_filter): + def _get_state_for_group_using_cache( + self, + cache: DictionaryCache[int, StateKey, str], + group: int, + state_filter: StateFilter, + ) -> Tuple[MutableStateMap[str], bool]: """Checks if group is in cache. See `_get_state_for_groups` Args: - cache(DictionaryCache): the state group cache to use - group(int): The state group to lookup - state_filter (StateFilter): The state filter used to fetch state - from the database. + cache: the state group cache to use + group: The state group to lookup + state_filter: The state filter used to fetch state from the database. - Returns 2-tuple (`state_dict`, `got_all`). - `got_all` is a bool indicating if we successfully retrieved all - requests state from the cache, if False we need to query the DB for the - missing state. + Returns: + 2-tuple (`state_dict`, `got_all`). + `got_all` is a bool indicating if we successfully retrieved all + requests state from the cache, if False we need to query the DB for the + missing state. """ cache_entry = cache.get(group) state_dict_ids = cache_entry.value @@ -277,8 +297,11 @@ async def _get_state_for_groups( return state def _get_state_for_groups_using_cache( - self, groups: Iterable[int], cache: DictionaryCache, state_filter: StateFilter - ) -> Tuple[Dict[int, StateMap[str]], Set[int]]: + self, + groups: Iterable[int], + cache: DictionaryCache[int, StateKey, str], + state_filter: StateFilter, + ) -> Tuple[Dict[int, MutableStateMap[str]], Set[int]]: """Gets the state at each of a list of state groups, optionally filtering by type/state_key, querying from a specific cache. @@ -310,21 +333,21 @@ def _get_state_for_groups_using_cache( def _insert_into_cache( self, - group_to_state_dict, - state_filter, - cache_seq_num_members, - cache_seq_num_non_members, - ): + group_to_state_dict: Dict[int, StateMap[str]], + state_filter: StateFilter, + cache_seq_num_members: int, + cache_seq_num_non_members: int, + ) -> None: """Inserts results from querying the database into the relevant cache. Args: - group_to_state_dict (dict): The new entries pulled from database. + group_to_state_dict: The new entries pulled from database. Map from state group to state dict - state_filter (StateFilter): The state filter used to fetch state + state_filter: The state filter used to fetch state from the database. - cache_seq_num_members (int): Sequence number of member cache since + cache_seq_num_members: Sequence number of member cache since last lookup in cache - cache_seq_num_non_members (int): Sequence number of member cache since + cache_seq_num_non_members: Sequence number of member cache since last lookup in cache """ @@ -395,7 +418,7 @@ async def store_state_group( The state group ID """ - def _store_state_group_txn(txn): + def _store_state_group_txn(txn: LoggingTransaction) -> int: if current_state_ids is None: # AFAIK, this can never happen raise Exception("current_state_ids cannot be None") @@ -426,6 +449,8 @@ def _store_state_group_txn(txn): potential_hops = self._count_state_group_hops_txn(txn, prev_group) if prev_group and potential_hops < MAX_STATE_DELTA_HOPS: + assert delta_ids is not None + self.db_pool.simple_insert_txn( txn, table="state_group_edges", @@ -498,7 +523,7 @@ def _store_state_group_txn(txn): ) async def purge_unreferenced_state_groups( - self, room_id: str, state_groups_to_delete + self, room_id: str, state_groups_to_delete: Collection[int] ) -> None: """Deletes no longer referenced state groups and de-deltas any state groups that reference them. @@ -506,8 +531,7 @@ async def purge_unreferenced_state_groups( Args: room_id: The room the state groups belong to (must all be in the same room). - state_groups_to_delete (Collection[int]): Set of all state groups - to delete. + state_groups_to_delete: Set of all state groups to delete. """ await self.db_pool.runInteraction( @@ -517,7 +541,12 @@ async def purge_unreferenced_state_groups( state_groups_to_delete, ) - def _purge_unreferenced_state_groups(self, txn, room_id, state_groups_to_delete): + def _purge_unreferenced_state_groups( + self, + txn: LoggingTransaction, + room_id: str, + state_groups_to_delete: Collection[int], + ) -> None: logger.info( "[purge] found %i state groups to delete", len(state_groups_to_delete) ) @@ -546,8 +575,8 @@ def _purge_unreferenced_state_groups(self, txn, room_id, state_groups_to_delete) # groups to non delta versions. for sg in remaining_state_groups: logger.info("[purge] de-delta-ing remaining state group %s", sg) - curr_state = self._get_state_groups_from_groups_txn(txn, [sg]) - curr_state = curr_state[sg] + curr_state_by_group = self._get_state_groups_from_groups_txn(txn, [sg]) + curr_state = curr_state_by_group[sg] self.db_pool.simple_delete_txn( txn, table="state_groups_state", keyvalues={"state_group": sg} @@ -605,12 +634,14 @@ async def get_previous_state_groups( return {row["state_group"]: row["prev_state_group"] for row in rows} - async def purge_room_state(self, room_id, state_groups_to_delete): + async def purge_room_state( + self, room_id: str, state_groups_to_delete: Collection[int] + ) -> None: """Deletes all record of a room from state tables Args: - room_id (str): - state_groups_to_delete (list[int]): State groups to delete + room_id: + state_groups_to_delete: State groups to delete """ await self.db_pool.runInteraction( @@ -620,7 +651,12 @@ async def purge_room_state(self, room_id, state_groups_to_delete): state_groups_to_delete, ) - def _purge_room_state_txn(self, txn, room_id, state_groups_to_delete): + def _purge_room_state_txn( + self, + txn: LoggingTransaction, + room_id: str, + state_groups_to_delete: Collection[int], + ) -> None: # first we have to delete the state groups states logger.info("[purge] removing %s from state_groups_state", room_id) diff --git a/synapse/storage/state.py b/synapse/storage/state.py index c76529cb5..5e86befde 100644 --- a/synapse/storage/state.py +++ b/synapse/storage/state.py @@ -377,7 +377,8 @@ async def get_state_group_delta( make up the delta between the old and new state groups. """ - return await self.stores.state.get_state_group_delta(state_group) + state_group_delta = await self.stores.state.get_state_group_delta(state_group) + return state_group_delta.prev_group, state_group_delta.delta_ids async def get_state_groups_ids( self, _room_id: str, event_ids: Iterable[str] diff --git a/synapse/util/caches/dictionary_cache.py b/synapse/util/caches/dictionary_cache.py index ade088aae..485ddb189 100644 --- a/synapse/util/caches/dictionary_cache.py +++ b/synapse/util/caches/dictionary_cache.py @@ -130,7 +130,7 @@ def update( sequence: int, key: KT, value: Dict[DKT, DV], - fetched_keys: Optional[Set[DKT]] = None, + fetched_keys: Optional[Iterable[DKT]] = None, ) -> None: """Updates the entry in the cache @@ -155,7 +155,7 @@ def update( self._update_or_insert(key, value, fetched_keys) def _update_or_insert( - self, key: KT, value: Dict[DKT, DV], known_absent: Set[DKT] + self, key: KT, value: Dict[DKT, DV], known_absent: Iterable[DKT] ) -> None: # We pop and reinsert as we need to tell the cache the size may have # changed From bfb4b858a999684ba2459ee4c3aa20270d13062d Mon Sep 17 00:00:00 2001 From: Patrick Cloke <clokep@users.noreply.github.com> Date: Thu, 16 Sep 2021 12:01:14 -0400 Subject: [PATCH 10/74] Create a constant for a small png image in tests. (#10834) To avoid duplicating it between a few tests. --- changelog.d/10834.misc | 1 + tests/replication/test_multi_media_repo.py | 18 ++++-------- tests/rest/admin/test_admin.py | 23 ++++++--------- tests/rest/admin/test_media.py | 34 ++++------------------ tests/rest/admin/test_statistics.py | 12 ++------ tests/rest/admin/test_user.py | 19 +++--------- tests/rest/media/v1/test_media_storage.py | 18 +++--------- tests/test_utils/__init__.py | 14 +++++++-- 8 files changed, 45 insertions(+), 94 deletions(-) create mode 100644 changelog.d/10834.misc diff --git a/changelog.d/10834.misc b/changelog.d/10834.misc new file mode 100644 index 000000000..037695e6e --- /dev/null +++ b/changelog.d/10834.misc @@ -0,0 +1 @@ +Factor out PNG image data to a constant to be used in several tests. diff --git a/tests/replication/test_multi_media_repo.py b/tests/replication/test_multi_media_repo.py index ac419f0db..01b1b0d4a 100644 --- a/tests/replication/test_multi_media_repo.py +++ b/tests/replication/test_multi_media_repo.py @@ -1,4 +1,4 @@ -# Copyright 2020 The Matrix.org Foundation C.I.C. +# Copyright 2020-2021 The Matrix.org Foundation C.I.C. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. @@ -13,7 +13,6 @@ # limitations under the License. import logging import os -from binascii import unhexlify from typing import Optional, Tuple from twisted.internet.protocol import Factory @@ -28,6 +27,7 @@ from tests.http import TestServerTLSConnectionFactory, get_test_ca_cert_file from tests.replication._base import BaseMultiWorkerStreamTestCase from tests.server import FakeChannel, FakeSite, FakeTransport, make_request +from tests.test_utils import SMALL_PNG logger = logging.getLogger(__name__) @@ -190,31 +190,25 @@ def test_download_image_race(self): channel1, request1 = self._get_media_req(hs1, "example.com:443", "PIC1") channel2, request2 = self._get_media_req(hs2, "example.com:443", "PIC1") - png_data = unhexlify( - b"89504e470d0a1a0a0000000d4948445200000001000000010806" - b"0000001f15c4890000000a49444154789c63000100000500010d" - b"0a2db40000000049454e44ae426082" - ) - request1.setResponseCode(200) request1.responseHeaders.setRawHeaders(b"Content-Type", [b"image/png"]) - request1.write(png_data) + request1.write(SMALL_PNG) request1.finish() self.pump(0.1) self.assertEqual(channel1.code, 200, channel1.result["body"]) - self.assertEqual(channel1.result["body"], png_data) + self.assertEqual(channel1.result["body"], SMALL_PNG) request2.setResponseCode(200) request2.responseHeaders.setRawHeaders(b"Content-Type", [b"image/png"]) - request2.write(png_data) + request2.write(SMALL_PNG) request2.finish() self.pump(0.1) self.assertEqual(channel2.code, 200, channel2.result["body"]) - self.assertEqual(channel2.result["body"], png_data) + self.assertEqual(channel2.result["body"], SMALL_PNG) # We expect only three new thumbnails to have been persisted. self.assertEqual(start_count + 3, self._count_remote_thumbnails()) diff --git a/tests/rest/admin/test_admin.py b/tests/rest/admin/test_admin.py index bfa638fb4..febd40b65 100644 --- a/tests/rest/admin/test_admin.py +++ b/tests/rest/admin/test_admin.py @@ -1,4 +1,4 @@ -# Copyright 2018 New Vector Ltd +# Copyright 2018-2021 The Matrix.org Foundation C.I.C. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. @@ -15,7 +15,6 @@ import json import os import urllib.parse -from binascii import unhexlify from unittest.mock import Mock from twisted.internet.defer import Deferred @@ -28,6 +27,7 @@ from tests import unittest from tests.server import FakeSite, make_request +from tests.test_utils import SMALL_PNG class VersionTestCase(unittest.HomeserverTestCase): @@ -150,11 +150,6 @@ def prepare(self, reactor, clock, hs): self.media_repo = hs.get_media_repository_resource() self.download_resource = self.media_repo.children[b"download"] self.upload_resource = self.media_repo.children[b"upload"] - self.image_data = unhexlify( - b"89504e470d0a1a0a0000000d4948445200000001000000010806" - b"0000001f15c4890000000a49444154789c63000100000500010d" - b"0a2db40000000049454e44ae426082" - ) def make_homeserver(self, reactor, clock): @@ -266,7 +261,7 @@ def test_quarantine_media_by_id(self): # Upload some media into the room response = self.helper.upload_media( - self.upload_resource, self.image_data, tok=admin_user_tok + self.upload_resource, SMALL_PNG, tok=admin_user_tok ) # Extract media ID from the response @@ -314,10 +309,10 @@ def test_quarantine_all_media_in_room(self, override_url_template=None): # Upload some media response_1 = self.helper.upload_media( - self.upload_resource, self.image_data, tok=non_admin_user_tok + self.upload_resource, SMALL_PNG, tok=non_admin_user_tok ) response_2 = self.helper.upload_media( - self.upload_resource, self.image_data, tok=non_admin_user_tok + self.upload_resource, SMALL_PNG, tok=non_admin_user_tok ) # Extract mxcs @@ -381,10 +376,10 @@ def test_quarantine_all_media_by_user(self): # Upload some media response_1 = self.helper.upload_media( - self.upload_resource, self.image_data, tok=non_admin_user_tok + self.upload_resource, SMALL_PNG, tok=non_admin_user_tok ) response_2 = self.helper.upload_media( - self.upload_resource, self.image_data, tok=non_admin_user_tok + self.upload_resource, SMALL_PNG, tok=non_admin_user_tok ) # Extract media IDs @@ -421,10 +416,10 @@ def test_cannot_quarantine_safe_media(self): # Upload some media response_1 = self.helper.upload_media( - self.upload_resource, self.image_data, tok=non_admin_user_tok + self.upload_resource, SMALL_PNG, tok=non_admin_user_tok ) response_2 = self.helper.upload_media( - self.upload_resource, self.image_data, tok=non_admin_user_tok + self.upload_resource, SMALL_PNG, tok=non_admin_user_tok ) # Extract media IDs diff --git a/tests/rest/admin/test_media.py b/tests/rest/admin/test_media.py index 972d60570..2f02934e7 100644 --- a/tests/rest/admin/test_media.py +++ b/tests/rest/admin/test_media.py @@ -1,4 +1,5 @@ # Copyright 2020 Dirk Klimpel +# Copyright 2021 The Matrix.org Foundation C.I.C. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. @@ -14,7 +15,6 @@ import json import os -from binascii import unhexlify from parameterized import parameterized @@ -25,6 +25,7 @@ from tests import unittest from tests.server import FakeSite, make_request +from tests.test_utils import SMALL_PNG class DeleteMediaByIDTestCase(unittest.HomeserverTestCase): @@ -110,15 +111,10 @@ def test_delete_media(self): download_resource = self.media_repo.children[b"download"] upload_resource = self.media_repo.children[b"upload"] - image_data = unhexlify( - b"89504e470d0a1a0a0000000d4948445200000001000000010806" - b"0000001f15c4890000000a49444154789c63000100000500010d" - b"0a2db40000000049454e44ae426082" - ) # Upload some media into the room response = self.helper.upload_media( - upload_resource, image_data, tok=self.admin_user_tok, expect_code=200 + upload_resource, SMALL_PNG, tok=self.admin_user_tok, expect_code=200 ) # Extract media ID from the response server_and_media_id = response["content_uri"][6:] # Cut off 'mxc://' @@ -504,16 +500,10 @@ def _create_media(self): Create a media and return media_id and server_and_media_id """ upload_resource = self.media_repo.children[b"upload"] - # file size is 67 Byte - image_data = unhexlify( - b"89504e470d0a1a0a0000000d4948445200000001000000010806" - b"0000001f15c4890000000a49444154789c63000100000500010d" - b"0a2db40000000049454e44ae426082" - ) # Upload some media into the room response = self.helper.upload_media( - upload_resource, image_data, tok=self.admin_user_tok, expect_code=200 + upload_resource, SMALL_PNG, tok=self.admin_user_tok, expect_code=200 ) # Extract media ID from the response server_and_media_id = response["content_uri"][6:] # Cut off 'mxc://' @@ -584,16 +574,10 @@ def prepare(self, reactor, clock, hs): # Create media upload_resource = media_repo.children[b"upload"] - # file size is 67 Byte - image_data = unhexlify( - b"89504e470d0a1a0a0000000d4948445200000001000000010806" - b"0000001f15c4890000000a49444154789c63000100000500010d" - b"0a2db40000000049454e44ae426082" - ) # Upload some media into the room response = self.helper.upload_media( - upload_resource, image_data, tok=self.admin_user_tok, expect_code=200 + upload_resource, SMALL_PNG, tok=self.admin_user_tok, expect_code=200 ) # Extract media ID from the response server_and_media_id = response["content_uri"][6:] # Cut off 'mxc://' @@ -711,16 +695,10 @@ def prepare(self, reactor, clock, hs): # Create media upload_resource = media_repo.children[b"upload"] - # file size is 67 Byte - image_data = unhexlify( - b"89504e470d0a1a0a0000000d4948445200000001000000010806" - b"0000001f15c4890000000a49444154789c63000100000500010d" - b"0a2db40000000049454e44ae426082" - ) # Upload some media into the room response = self.helper.upload_media( - upload_resource, image_data, tok=self.admin_user_tok, expect_code=200 + upload_resource, SMALL_PNG, tok=self.admin_user_tok, expect_code=200 ) # Extract media ID from the response server_and_media_id = response["content_uri"][6:] # Cut off 'mxc://' diff --git a/tests/rest/admin/test_statistics.py b/tests/rest/admin/test_statistics.py index 5cd82209c..ece89a65a 100644 --- a/tests/rest/admin/test_statistics.py +++ b/tests/rest/admin/test_statistics.py @@ -1,4 +1,5 @@ # Copyright 2020 Dirk Klimpel +# Copyright 2021 The Matrix.org Foundation C.I.C. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. @@ -13,7 +14,6 @@ # limitations under the License. import json -from binascii import unhexlify from typing import Any, Dict, List, Optional import synapse.rest.admin @@ -21,6 +21,7 @@ from synapse.rest.client import login from tests import unittest +from tests.test_utils import SMALL_PNG class UserMediaStatisticsTestCase(unittest.HomeserverTestCase): @@ -468,16 +469,9 @@ def _create_media(self, user_token: str, number_media: int): """ upload_resource = self.media_repo.children[b"upload"] for _ in range(number_media): - # file size is 67 Byte - image_data = unhexlify( - b"89504e470d0a1a0a0000000d4948445200000001000000010806" - b"0000001f15c4890000000a49444154789c63000100000500010d" - b"0a2db40000000049454e44ae426082" - ) - # Upload some media into the room self.helper.upload_media( - upload_resource, image_data, tok=user_token, expect_code=200 + upload_resource, SMALL_PNG, tok=user_token, expect_code=200 ) def _check_fields(self, content: List[Dict[str, Any]]): diff --git a/tests/rest/admin/test_user.py b/tests/rest/admin/test_user.py index ee204c404..cc3f16c62 100644 --- a/tests/rest/admin/test_user.py +++ b/tests/rest/admin/test_user.py @@ -1,4 +1,4 @@ -# Copyright 2018 New Vector Ltd +# Copyright 2018-2021 The Matrix.org Foundation C.I.C. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. @@ -33,7 +33,7 @@ from tests import unittest from tests.server import FakeSite, make_request -from tests.test_utils import make_awaitable +from tests.test_utils import SMALL_PNG, make_awaitable from tests.unittest import override_config @@ -2835,11 +2835,7 @@ def test_order_by(self): other_user_tok = self.login("user", "pass") # Resolution: 1×1, MIME type: image/png, Extension: png, Size: 67 B - image_data1 = unhexlify( - b"89504e470d0a1a0a0000000d4948445200000001000000010806" - b"0000001f15c4890000000a49444154789c63000100000500010d" - b"0a2db40000000049454e44ae426082" - ) + image_data1 = SMALL_PNG # Resolution: 1×1, MIME type: image/gif, Extension: gif, Size: 35 B image_data2 = unhexlify( b"47494638376101000100800100000000" @@ -2943,14 +2939,7 @@ def _create_media_for_user(self, user_token: str, number_media: int) -> List[str """ media_ids = [] for _ in range(number_media): - # file size is 67 Byte - image_data = unhexlify( - b"89504e470d0a1a0a0000000d4948445200000001000000010806" - b"0000001f15c4890000000a49444154789c63000100000500010d" - b"0a2db40000000049454e44ae426082" - ) - - media_ids.append(self._create_media_and_access(user_token, image_data)) + media_ids.append(self._create_media_and_access(user_token, SMALL_PNG)) return media_ids diff --git a/tests/rest/media/v1/test_media_storage.py b/tests/rest/media/v1/test_media_storage.py index 2f7eebfe6..9ea1c2bf2 100644 --- a/tests/rest/media/v1/test_media_storage.py +++ b/tests/rest/media/v1/test_media_storage.py @@ -1,4 +1,4 @@ -# Copyright 2018 New Vector Ltd +# Copyright 2018-2021 The Matrix.org Foundation C.I.C. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. @@ -38,6 +38,7 @@ from tests import unittest from tests.server import FakeSite, make_request +from tests.test_utils import SMALL_PNG from tests.utils import default_config @@ -134,11 +135,7 @@ class _TestImage: # smoll png ( _TestImage( - unhexlify( - b"89504e470d0a1a0a0000000d4948445200000001000000010806" - b"0000001f15c4890000000a49444154789c63000100000500010d" - b"0a2db40000000049454e44ae426082" - ), + SMALL_PNG, b"image/png", b".png", unhexlify( @@ -593,15 +590,8 @@ def default_config(self): def test_upload_innocent(self): """Attempt to upload some innocent data that should be allowed.""" - - image_data = unhexlify( - b"89504e470d0a1a0a0000000d4948445200000001000000010806" - b"0000001f15c4890000000a49444154789c63000100000500010d" - b"0a2db40000000049454e44ae426082" - ) - self.helper.upload_media( - self.upload_resource, image_data, tok=self.tok, expect_code=200 + self.upload_resource, SMALL_PNG, tok=self.tok, expect_code=200 ) def test_upload_ban(self): diff --git a/tests/test_utils/__init__.py b/tests/test_utils/__init__.py index be6302d17..15ac2bfeb 100644 --- a/tests/test_utils/__init__.py +++ b/tests/test_utils/__init__.py @@ -1,5 +1,4 @@ -# Copyright 2019 New Vector Ltd -# Copyright 2020 The Matrix.org Foundation C.I.C +# Copyright 2019-2021 The Matrix.org Foundation C.I.C. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. @@ -19,6 +18,7 @@ import sys import warnings from asyncio import Future +from binascii import unhexlify from typing import Any, Awaitable, Callable, TypeVar from unittest.mock import Mock @@ -117,3 +117,13 @@ class FakeResponse: def deliverBody(self, protocol): protocol.dataReceived(self.body) protocol.connectionLost(Failure(ResponseDone())) + + +# A small image used in some tests. +# +# Resolution: 1×1, MIME type: image/png, Extension: png, Size: 67 B +SMALL_PNG = unhexlify( + b"89504e470d0a1a0a0000000d4948445200000001000000010806" + b"0000001f15c4890000000a49444154789c63000100000500010d" + b"0a2db40000000049454e44ae426082" +) From 6b6bb81b23425cf4f8e0c739946783b98ad056b8 Mon Sep 17 00:00:00 2001 From: Charles Wright <cvwright@kombucha.systems> Date: Fri, 17 Sep 2021 12:04:37 -0500 Subject: [PATCH 11/74] =?UTF-8?q?Fix=20#10837=20by=20adding=20JSON=20encod?= =?UTF-8?q?ing/decoding=20to=20the=20Module=20API=20example=E2=80=A6=20(#1?= =?UTF-8?q?0845)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- changelog.d/10845.doc | 1 + docs/modules/spam_checker_callbacks.md | 4 ++-- 2 files changed, 3 insertions(+), 2 deletions(-) create mode 100644 changelog.d/10845.doc diff --git a/changelog.d/10845.doc b/changelog.d/10845.doc new file mode 100644 index 000000000..a13c845ae --- /dev/null +++ b/changelog.d/10845.doc @@ -0,0 +1 @@ +Fix some crashes in the Module API example code, by adding JSON encoding/decoding. diff --git a/docs/modules/spam_checker_callbacks.md b/docs/modules/spam_checker_callbacks.md index c45eafcc4..81574a015 100644 --- a/docs/modules/spam_checker_callbacks.md +++ b/docs/modules/spam_checker_callbacks.md @@ -136,9 +136,9 @@ class IsUserEvilResource(Resource): self.evil_users = config.get("evil_users") or [] def render_GET(self, request: Request): - user = request.args.get(b"user")[0] + user = request.args.get(b"user")[0].decode() request.setHeader(b"Content-Type", b"application/json") - return json.dumps({"evil": user in self.evil_users}) + return json.dumps({"evil": user in self.evil_users}).encode() class ListSpamChecker: From 437961744c6c8761e6483bb215e5e779123ffd97 Mon Sep 17 00:00:00 2001 From: reivilibre <38398653+reivilibre@users.noreply.github.com> Date: Mon, 20 Sep 2021 10:26:13 +0100 Subject: [PATCH 12/74] Fix remove_stale_pushers job on SQLite. (#10843) --- changelog.d/10843.bugfix | 1 + synapse/storage/database.py | 21 +++++++++++-------- .../storage/databases/main/account_data.py | 2 +- synapse/storage/databases/main/events.py | 2 +- .../databases/main/events_bg_updates.py | 4 ++-- synapse/storage/databases/main/pusher.py | 4 ++-- synapse/storage/databases/main/state.py | 4 ++-- synapse/storage/databases/main/ui_auth.py | 6 +++--- synapse/storage/databases/state/store.py | 6 +++--- 9 files changed, 27 insertions(+), 23 deletions(-) create mode 100644 changelog.d/10843.bugfix diff --git a/changelog.d/10843.bugfix b/changelog.d/10843.bugfix new file mode 100644 index 000000000..5027a1dbe --- /dev/null +++ b/changelog.d/10843.bugfix @@ -0,0 +1 @@ +Fix a bug causing the `remove_stale_pushers` background job to repeatedly fail and log errors. This bug affected Synapse servers that had been upgraded from version 1.28 or older and are using SQLite. diff --git a/synapse/storage/database.py b/synapse/storage/database.py index 0084d9f96..f5a8f90a0 100644 --- a/synapse/storage/database.py +++ b/synapse/storage/database.py @@ -1632,7 +1632,7 @@ def simple_select_many_txn( txn: LoggingTransaction, table: str, column: str, - iterable: Iterable[Any], + iterable: Collection[Any], keyvalues: Dict[str, Any], retcols: Iterable[str], ) -> List[Dict[str, Any]]: @@ -1891,29 +1891,32 @@ def simple_delete_many_txn( txn: LoggingTransaction, table: str, column: str, - iterable: Iterable[Any], + values: Collection[Any], keyvalues: Dict[str, Any], ) -> int: """Executes a DELETE query on the named table. - Filters rows by if value of `column` is in `iterable`. + Deletes the rows: + - whose value of `column` is in `values`; AND + - that match extra column-value pairs specified in `keyvalues`. Args: txn: Transaction object table: string giving the table name - column: column name to test for inclusion against `iterable` - iterable: list - keyvalues: dict of column names and values to select the rows with + column: column name to test for inclusion against `values` + values: values of `column` which choose rows to delete + keyvalues: dict of extra column names and values to select the rows + with. They will be ANDed together with the main predicate. Returns: Number rows deleted """ - if not iterable: + if not values: return 0 sql = "DELETE FROM %s" % table - clause, values = make_in_list_sql_clause(txn.database_engine, column, iterable) + clause, values = make_in_list_sql_clause(txn.database_engine, column, values) clauses = [clause] for key, value in keyvalues.items(): @@ -2098,7 +2101,7 @@ def simple_search_list_txn( def make_in_list_sql_clause( - database_engine: BaseDatabaseEngine, column: str, iterable: Iterable + database_engine: BaseDatabaseEngine, column: str, iterable: Collection[Any] ) -> Tuple[str, list]: """Returns an SQL clause that checks the given column is in the iterable. diff --git a/synapse/storage/databases/main/account_data.py b/synapse/storage/databases/main/account_data.py index 1d02795f4..d0cf3460d 100644 --- a/synapse/storage/databases/main/account_data.py +++ b/synapse/storage/databases/main/account_data.py @@ -494,7 +494,7 @@ def _add_account_data_for_user( txn, table="ignored_users", column="ignored_user_id", - iterable=previously_ignored_users - currently_ignored_users, + values=previously_ignored_users - currently_ignored_users, keyvalues={"ignorer_user_id": user_id}, ) diff --git a/synapse/storage/databases/main/events.py b/synapse/storage/databases/main/events.py index 8e691678e..dec7e8594 100644 --- a/synapse/storage/databases/main/events.py +++ b/synapse/storage/databases/main/events.py @@ -667,7 +667,7 @@ def _add_chain_cover_index( table="event_auth_chain_to_calculate", keyvalues={}, column="event_id", - iterable=new_chain_tuples, + values=new_chain_tuples, ) # Now we need to calculate any new links between chains caused by diff --git a/synapse/storage/databases/main/events_bg_updates.py b/synapse/storage/databases/main/events_bg_updates.py index 6fcb2b835..1afc59faf 100644 --- a/synapse/storage/databases/main/events_bg_updates.py +++ b/synapse/storage/databases/main/events_bg_updates.py @@ -490,7 +490,7 @@ def _cleanup_extremities_bg_update_txn(txn): txn=txn, table="event_forward_extremities", column="event_id", - iterable=to_delete, + values=to_delete, keyvalues={}, ) @@ -520,7 +520,7 @@ def _cleanup_extremities_bg_update_txn(txn): txn=txn, table="_extremities_to_check", column="event_id", - iterable=original_set, + values=original_set, keyvalues={}, ) diff --git a/synapse/storage/databases/main/pusher.py b/synapse/storage/databases/main/pusher.py index 63ac09c61..a93caae8d 100644 --- a/synapse/storage/databases/main/pusher.py +++ b/synapse/storage/databases/main/pusher.py @@ -324,7 +324,7 @@ def _delete_pushers(txn) -> int: txn, table="pushers", column="user_name", - iterable=users, + values=users, keyvalues={}, ) @@ -373,7 +373,7 @@ def _delete_pushers(txn) -> int: txn, table="pushers", column="id", - iterable=(pusher_id for pusher_id, token in pushers if token is None), + values=[pusher_id for pusher_id, token in pushers if token is None], keyvalues={}, ) diff --git a/synapse/storage/databases/main/state.py b/synapse/storage/databases/main/state.py index 8e22da99a..a8e8dd457 100644 --- a/synapse/storage/databases/main/state.py +++ b/synapse/storage/databases/main/state.py @@ -473,7 +473,7 @@ def _background_remove_left_rooms_txn(txn): txn, table="current_state_events", column="room_id", - iterable=to_delete, + values=to_delete, keyvalues={}, ) @@ -481,7 +481,7 @@ def _background_remove_left_rooms_txn(txn): txn, table="event_forward_extremities", column="room_id", - iterable=to_delete, + values=to_delete, keyvalues={}, ) diff --git a/synapse/storage/databases/main/ui_auth.py b/synapse/storage/databases/main/ui_auth.py index 4d6bbc94c..340ca9e47 100644 --- a/synapse/storage/databases/main/ui_auth.py +++ b/synapse/storage/databases/main/ui_auth.py @@ -326,7 +326,7 @@ def _delete_old_ui_auth_sessions_txn( txn, table="ui_auth_sessions_ips", column="session_id", - iterable=session_ids, + values=session_ids, keyvalues={}, ) @@ -377,7 +377,7 @@ def _delete_old_ui_auth_sessions_txn( txn, table="ui_auth_sessions_credentials", column="session_id", - iterable=session_ids, + values=session_ids, keyvalues={}, ) @@ -386,7 +386,7 @@ def _delete_old_ui_auth_sessions_txn( txn, table="ui_auth_sessions", column="session_id", - iterable=session_ids, + values=session_ids, keyvalues={}, ) diff --git a/synapse/storage/databases/state/store.py b/synapse/storage/databases/state/store.py index f1e3a27e6..c4c8c0021 100644 --- a/synapse/storage/databases/state/store.py +++ b/synapse/storage/databases/state/store.py @@ -664,7 +664,7 @@ def _purge_room_state_txn( txn, table="state_groups_state", column="state_group", - iterable=state_groups_to_delete, + values=state_groups_to_delete, keyvalues={}, ) @@ -675,7 +675,7 @@ def _purge_room_state_txn( txn, table="state_group_edges", column="state_group", - iterable=state_groups_to_delete, + values=state_groups_to_delete, keyvalues={}, ) @@ -686,6 +686,6 @@ def _purge_room_state_txn( txn, table="state_groups", column="id", - iterable=state_groups_to_delete, + values=state_groups_to_delete, keyvalues={}, ) From b3590614da7e3e17e75530a9d4808df17be9b127 Mon Sep 17 00:00:00 2001 From: Patrick Cloke <clokep@users.noreply.github.com> Date: Mon, 20 Sep 2021 08:56:23 -0400 Subject: [PATCH 13/74] Require type hints in the handlers module. (#10831) Adds missing type hints to methods in the synapse.handlers module and requires all methods to have type hints there. This also removes the unused construct_auth_difference method from the FederationHandler. --- changelog.d/10831.misc | 1 + mypy.ini | 3 + synapse/config/password_auth_providers.py | 4 +- synapse/handlers/_base.py | 14 ++- synapse/handlers/account_data.py | 4 +- synapse/handlers/account_validity.py | 4 +- synapse/handlers/appservice.py | 18 +-- synapse/handlers/auth.py | 45 ++++---- synapse/handlers/cas.py | 18 ++- synapse/handlers/device.py | 2 +- synapse/handlers/e2e_keys.py | 4 +- synapse/handlers/event_auth.py | 7 +- synapse/handlers/federation.py | 130 ---------------------- synapse/handlers/federation_event.py | 8 +- synapse/handlers/groups_local.py | 8 +- synapse/handlers/initial_sync.py | 8 +- synapse/handlers/message.py | 20 ++-- synapse/handlers/oidc.py | 34 +++--- synapse/handlers/pagination.py | 19 ++-- synapse/handlers/presence.py | 45 +++++--- synapse/handlers/profile.py | 4 +- synapse/handlers/receipts.py | 4 +- synapse/handlers/register.py | 2 +- synapse/handlers/room.py | 16 ++- synapse/handlers/room_list.py | 12 +- synapse/handlers/room_member.py | 8 +- synapse/handlers/room_summary.py | 2 +- synapse/handlers/saml.py | 14 +-- synapse/handlers/send_email.py | 4 +- synapse/handlers/sso.py | 6 +- synapse/handlers/stats.py | 2 +- synapse/handlers/sync.py | 11 +- synapse/handlers/typing.py | 4 +- synapse/handlers/ui_auth/checkers.py | 2 +- synapse/handlers/user_directory.py | 2 +- 35 files changed, 194 insertions(+), 295 deletions(-) create mode 100644 changelog.d/10831.misc diff --git a/changelog.d/10831.misc b/changelog.d/10831.misc new file mode 100644 index 000000000..f09af2e00 --- /dev/null +++ b/changelog.d/10831.misc @@ -0,0 +1 @@ +Add missing type hints to handlers. diff --git a/mypy.ini b/mypy.ini index b21e1555a..3cb6cecd7 100644 --- a/mypy.ini +++ b/mypy.ini @@ -91,6 +91,9 @@ files = tests/util/test_itertools.py, tests/util/test_stream_change_cache.py +[mypy-synapse.handlers.*] +disallow_untyped_defs = True + [mypy-synapse.rest.*] disallow_untyped_defs = True diff --git a/synapse/config/password_auth_providers.py b/synapse/config/password_auth_providers.py index 0f5b2b397..83994df79 100644 --- a/synapse/config/password_auth_providers.py +++ b/synapse/config/password_auth_providers.py @@ -12,7 +12,7 @@ # See the License for the specific language governing permissions and # limitations under the License. -from typing import Any, List +from typing import Any, List, Tuple, Type from synapse.util.module_loader import load_module @@ -25,7 +25,7 @@ class PasswordAuthProviderConfig(Config): section = "authproviders" def read_config(self, config, **kwargs): - self.password_providers: List[Any] = [] + self.password_providers: List[Tuple[Type, Any]] = [] providers = [] # We want to be backwards compatible with the old `ldap_config` diff --git a/synapse/handlers/_base.py b/synapse/handlers/_base.py index c23ccd6dd..0ccef884e 100644 --- a/synapse/handlers/_base.py +++ b/synapse/handlers/_base.py @@ -16,6 +16,7 @@ from typing import TYPE_CHECKING, Optional from synapse.api.ratelimiting import Ratelimiter +from synapse.types import Requester if TYPE_CHECKING: from synapse.server import HomeServer @@ -63,16 +64,21 @@ def __init__(self, hs: "HomeServer"): self.event_builder_factory = hs.get_event_builder_factory() - async def ratelimit(self, requester, update=True, is_admin_redaction=False): + async def ratelimit( + self, + requester: Requester, + update: bool = True, + is_admin_redaction: bool = False, + ) -> None: """Ratelimits requests. Args: - requester (Requester) - update (bool): Whether to record that a request is being processed. + requester + update: Whether to record that a request is being processed. Set to False when doing multiple checks for one request (e.g. to check up front if we would reject the request), and set to True for the last call for a given request. - is_admin_redaction (bool): Whether this is a room admin/moderator + is_admin_redaction: Whether this is a room admin/moderator redacting an event. If so then we may apply different ratelimits depending on config. diff --git a/synapse/handlers/account_data.py b/synapse/handlers/account_data.py index affb54e0e..e9e7a7854 100644 --- a/synapse/handlers/account_data.py +++ b/synapse/handlers/account_data.py @@ -13,7 +13,7 @@ # See the License for the specific language governing permissions and # limitations under the License. import random -from typing import TYPE_CHECKING, List, Tuple +from typing import TYPE_CHECKING, Any, List, Tuple from synapse.replication.http.account_data import ( ReplicationAddTagRestServlet, @@ -171,7 +171,7 @@ def get_current_key(self, direction: str = "f") -> int: return self.store.get_max_account_data_stream_id() async def get_new_events( - self, user: UserID, from_key: int, **kwargs + self, user: UserID, from_key: int, **kwargs: Any ) -> Tuple[List[JsonDict], int]: user_id = user.to_string() last_stream_id = from_key diff --git a/synapse/handlers/account_validity.py b/synapse/handlers/account_validity.py index a9c2222f4..4724565ba 100644 --- a/synapse/handlers/account_validity.py +++ b/synapse/handlers/account_validity.py @@ -99,7 +99,7 @@ def register_account_validity_callbacks( on_legacy_send_mail: Optional[ON_LEGACY_SEND_MAIL_CALLBACK] = None, on_legacy_renew: Optional[ON_LEGACY_RENEW_CALLBACK] = None, on_legacy_admin_request: Optional[ON_LEGACY_ADMIN_REQUEST] = None, - ): + ) -> None: """Register callbacks from module for each hook.""" if is_user_expired is not None: self._is_user_expired_callbacks.append(is_user_expired) @@ -165,7 +165,7 @@ async def is_user_expired(self, user_id: str) -> bool: return False - async def on_user_registration(self, user_id: str): + async def on_user_registration(self, user_id: str) -> None: """Tell third-party modules about a user's registration. Args: diff --git a/synapse/handlers/appservice.py b/synapse/handlers/appservice.py index a7b5a4e9c..8bde9ed66 100644 --- a/synapse/handlers/appservice.py +++ b/synapse/handlers/appservice.py @@ -12,7 +12,7 @@ # See the License for the specific language governing permissions and # limitations under the License. import logging -from typing import TYPE_CHECKING, Collection, Dict, List, Optional, Union +from typing import TYPE_CHECKING, Collection, Dict, Iterable, List, Optional, Union from prometheus_client import Counter @@ -58,7 +58,7 @@ def __init__(self, hs: "HomeServer"): self.current_max = 0 self.is_processing = False - def notify_interested_services(self, max_token: RoomStreamToken): + def notify_interested_services(self, max_token: RoomStreamToken) -> None: """Notifies (pushes) all application services interested in this event. Pushing is done asynchronously, so this method won't block for any @@ -82,7 +82,7 @@ def notify_interested_services(self, max_token: RoomStreamToken): self._notify_interested_services(max_token) @wrap_as_background_process("notify_interested_services") - async def _notify_interested_services(self, max_token: RoomStreamToken): + async def _notify_interested_services(self, max_token: RoomStreamToken) -> None: with Measure(self.clock, "notify_interested_services"): self.is_processing = True try: @@ -100,7 +100,7 @@ async def _notify_interested_services(self, max_token: RoomStreamToken): for event in events: events_by_room.setdefault(event.room_id, []).append(event) - async def handle_event(event): + async def handle_event(event: EventBase) -> None: # Gather interested services services = await self._get_services_for_event(event) if len(services) == 0: @@ -116,9 +116,9 @@ async def handle_event(event): if not self.started_scheduler: - async def start_scheduler(): + async def start_scheduler() -> None: try: - return await self.scheduler.start() + await self.scheduler.start() except Exception: logger.error("Application Services Failure") @@ -137,7 +137,7 @@ async def start_scheduler(): "appservice_sender" ).observe((now - ts) / 1000) - async def handle_room_events(events): + async def handle_room_events(events: Iterable[EventBase]) -> None: for event in events: await handle_event(event) @@ -184,7 +184,7 @@ def notify_interested_services_ephemeral( stream_key: str, new_token: Optional[int], users: Optional[Collection[Union[str, UserID]]] = None, - ): + ) -> None: """This is called by the notifier in the background when a ephemeral event handled by the homeserver. @@ -226,7 +226,7 @@ async def _notify_interested_services_ephemeral( stream_key: str, new_token: Optional[int], users: Collection[Union[str, UserID]], - ): + ) -> None: logger.debug("Checking interested services for %s" % (stream_key)) with Measure(self.clock, "notify_interested_services_ephemeral"): for service in services: diff --git a/synapse/handlers/auth.py b/synapse/handlers/auth.py index 3ea627008..bcd4249e0 100644 --- a/synapse/handlers/auth.py +++ b/synapse/handlers/auth.py @@ -29,6 +29,7 @@ Mapping, Optional, Tuple, + Type, Union, cast, ) @@ -439,7 +440,7 @@ async def _get_available_ui_auth_types(self, user: UserID) -> Iterable[str]: return ui_auth_types - def get_enabled_auth_types(self): + def get_enabled_auth_types(self) -> Iterable[str]: """Return the enabled user-interactive authentication types Returns the UI-Auth types which are supported by the homeserver's current @@ -702,7 +703,7 @@ async def get_session_data( except StoreError: raise SynapseError(400, "Unknown session ID: %s" % (session_id,)) - async def _expire_old_sessions(self): + async def _expire_old_sessions(self) -> None: """ Invalidate any user interactive authentication sessions that have expired. """ @@ -1352,7 +1353,7 @@ async def validate_short_term_login_token( await self.auth.check_auth_blocking(res.user_id) return res - async def delete_access_token(self, access_token: str): + async def delete_access_token(self, access_token: str) -> None: """Invalidate a single access token Args: @@ -1381,7 +1382,7 @@ async def delete_access_tokens_for_user( user_id: str, except_token_id: Optional[int] = None, device_id: Optional[str] = None, - ): + ) -> None: """Invalidate access tokens belonging to a user Args: @@ -1409,7 +1410,7 @@ async def delete_access_tokens_for_user( async def add_threepid( self, user_id: str, medium: str, address: str, validated_at: int - ): + ) -> None: # check if medium has a valid value if medium not in ["email", "msisdn"]: raise SynapseError( @@ -1480,7 +1481,7 @@ async def hash(self, password: str) -> str: Hashed password. """ - def _do_hash(): + def _do_hash() -> str: # Normalise the Unicode in the password pw = unicodedata.normalize("NFKC", password) @@ -1504,7 +1505,7 @@ async def validate_hash( Whether self.hash(password) == stored_hash. """ - def _do_validate_hash(checked_hash: bytes): + def _do_validate_hash(checked_hash: bytes) -> bool: # Normalise the Unicode in the password pw = unicodedata.normalize("NFKC", password) @@ -1581,7 +1582,7 @@ async def complete_sso_login( client_redirect_url: str, extra_attributes: Optional[JsonDict] = None, new_user: bool = False, - ): + ) -> None: """Having figured out a mxid for this user, complete the HTTP request Args: @@ -1627,7 +1628,7 @@ def _complete_sso_login( extra_attributes: Optional[JsonDict] = None, new_user: bool = False, user_profile_data: Optional[ProfileInfo] = None, - ): + ) -> None: """ The synchronous portion of complete_sso_login. @@ -1726,7 +1727,7 @@ def _expire_sso_extra_attributes(self) -> None: del self._extra_attributes[user_id] @staticmethod - def add_query_param_to_url(url: str, param_name: str, param: Any): + def add_query_param_to_url(url: str, param_name: str, param: Any) -> str: url_parts = list(urllib.parse.urlparse(url)) query = urllib.parse.parse_qsl(url_parts[4], keep_blank_values=True) query.append((param_name, param)) @@ -1734,9 +1735,9 @@ def add_query_param_to_url(url: str, param_name: str, param: Any): return urllib.parse.urlunparse(url_parts) -@attr.s(slots=True) +@attr.s(slots=True, auto_attribs=True) class MacaroonGenerator: - hs = attr.ib() + hs: "HomeServer" def generate_guest_access_token(self, user_id: str) -> str: macaroon = self._generate_base_macaroon(user_id) @@ -1816,7 +1817,9 @@ class PasswordProvider: """ @classmethod - def load(cls, module, config, module_api: ModuleApi) -> "PasswordProvider": + def load( + cls, module: Type, config: JsonDict, module_api: ModuleApi + ) -> "PasswordProvider": try: pp = module(config=config, account_handler=module_api) except Exception as e: @@ -1824,7 +1827,7 @@ def load(cls, module, config, module_api: ModuleApi) -> "PasswordProvider": raise return cls(pp, module_api) - def __init__(self, pp, module_api: ModuleApi): + def __init__(self, pp: "PasswordProvider", module_api: ModuleApi): self._pp = pp self._module_api = module_api @@ -1838,7 +1841,7 @@ def __init__(self, pp, module_api: ModuleApi): if g: self._supported_login_types.update(g()) - def __str__(self): + def __str__(self) -> str: return str(self._pp) def get_supported_login_types(self) -> Mapping[str, Iterable[str]]: @@ -1876,19 +1879,19 @@ async def check_auth( """ # first grandfather in a call to check_password if login_type == LoginType.PASSWORD: - g = getattr(self._pp, "check_password", None) - if g: + check_password = getattr(self._pp, "check_password", None) + if check_password: qualified_user_id = self._module_api.get_qualified_user_id(username) - is_valid = await self._pp.check_password( + is_valid = await check_password( qualified_user_id, login_dict["password"] ) if is_valid: return qualified_user_id, None - g = getattr(self._pp, "check_auth", None) - if not g: + check_auth = getattr(self._pp, "check_auth", None) + if not check_auth: return None - result = await g(username, login_type, login_dict) + result = await check_auth(username, login_type, login_dict) # Check if the return value is a str or a tuple if isinstance(result, str): diff --git a/synapse/handlers/cas.py b/synapse/handlers/cas.py index 47ddabbe4..b0b188dc7 100644 --- a/synapse/handlers/cas.py +++ b/synapse/handlers/cas.py @@ -34,20 +34,20 @@ class CasError(Exception): """Used to catch errors when validating the CAS ticket.""" - def __init__(self, error, error_description=None): + def __init__(self, error: str, error_description: Optional[str] = None): self.error = error self.error_description = error_description - def __str__(self): + def __str__(self) -> str: if self.error_description: return f"{self.error}: {self.error_description}" return self.error -@attr.s(slots=True, frozen=True) +@attr.s(slots=True, frozen=True, auto_attribs=True) class CasResponse: - username = attr.ib(type=str) - attributes = attr.ib(type=Dict[str, List[Optional[str]]]) + username: str + attributes: Dict[str, List[Optional[str]]] class CasHandler: @@ -133,11 +133,9 @@ async def _validate_ticket( body = pde.response except HttpResponseException as e: description = ( - ( - 'Authorization server responded with a "{status}" error ' - "while exchanging the authorization code." - ).format(status=e.code), - ) + 'Authorization server responded with a "{status}" error ' + "while exchanging the authorization code." + ).format(status=e.code) raise CasError("server_error", description) from e return self._parse_cas_response(body) diff --git a/synapse/handlers/device.py b/synapse/handlers/device.py index 46ee83440..35334725d 100644 --- a/synapse/handlers/device.py +++ b/synapse/handlers/device.py @@ -267,7 +267,7 @@ def __init__(self, hs: "HomeServer"): hs.get_distributor().observe("user_left_room", self.user_left_room) - def _check_device_name_length(self, name: Optional[str]): + def _check_device_name_length(self, name: Optional[str]) -> None: """ Checks whether a device name is longer than the maximum allowed length. diff --git a/synapse/handlers/e2e_keys.py b/synapse/handlers/e2e_keys.py index 08a137561..d0fb2fc7d 100644 --- a/synapse/handlers/e2e_keys.py +++ b/synapse/handlers/e2e_keys.py @@ -202,7 +202,7 @@ async def query_devices( # Now fetch any devices that we don't have in our cache @trace - async def do_remote_query(destination): + async def do_remote_query(destination: str) -> None: """This is called when we are querying the device list of a user on a remote homeserver and their device list is not in the device list cache. If we share a room with this user and we're not querying for @@ -447,7 +447,7 @@ async def claim_one_time_keys( } @trace - async def claim_client_keys(destination): + async def claim_client_keys(destination: str) -> None: set_tag("destination", destination) device_keys = remote_queries[destination] try: diff --git a/synapse/handlers/event_auth.py b/synapse/handlers/event_auth.py index 4288ffff0..cb81fa098 100644 --- a/synapse/handlers/event_auth.py +++ b/synapse/handlers/event_auth.py @@ -25,6 +25,7 @@ from synapse.api.room_versions import KNOWN_ROOM_VERSIONS, RoomVersion from synapse.events import EventBase from synapse.events.builder import EventBuilder +from synapse.events.snapshot import EventContext from synapse.types import StateMap, get_domain_from_id from synapse.util.metrics import Measure @@ -45,7 +46,11 @@ def __init__(self, hs: "HomeServer"): self._server_name = hs.hostname async def check_from_context( - self, room_version: str, event, context, do_sig_check=True + self, + room_version: str, + event: EventBase, + context: EventContext, + do_sig_check: bool = True, ) -> None: auth_event_ids = event.auth_event_ids() auth_events_by_id = await self._store.get_events(auth_event_ids) diff --git a/synapse/handlers/federation.py b/synapse/handlers/federation.py index 6754c64c3..8e2cf3387 100644 --- a/synapse/handlers/federation.py +++ b/synapse/handlers/federation.py @@ -1221,136 +1221,6 @@ async def on_get_missing_events( return missing_events - async def construct_auth_difference( - self, local_auth: Iterable[EventBase], remote_auth: Iterable[EventBase] - ) -> Dict: - """Given a local and remote auth chain, find the differences. This - assumes that we have already processed all events in remote_auth - - Params: - local_auth - remote_auth - - Returns: - dict - """ - - logger.debug("construct_auth_difference Start!") - - # TODO: Make sure we are OK with local_auth or remote_auth having more - # auth events in them than strictly necessary. - - def sort_fun(ev): - return ev.depth, ev.event_id - - logger.debug("construct_auth_difference after sort_fun!") - - # We find the differences by starting at the "bottom" of each list - # and iterating up on both lists. The lists are ordered by depth and - # then event_id, we iterate up both lists until we find the event ids - # don't match. Then we look at depth/event_id to see which side is - # missing that event, and iterate only up that list. Repeat. - - remote_list = list(remote_auth) - remote_list.sort(key=sort_fun) - - local_list = list(local_auth) - local_list.sort(key=sort_fun) - - local_iter = iter(local_list) - remote_iter = iter(remote_list) - - logger.debug("construct_auth_difference before get_next!") - - def get_next(it, opt=None): - try: - return next(it) - except Exception: - return opt - - current_local = get_next(local_iter) - current_remote = get_next(remote_iter) - - logger.debug("construct_auth_difference before while") - - missing_remotes = [] - missing_locals = [] - while current_local or current_remote: - if current_remote is None: - missing_locals.append(current_local) - current_local = get_next(local_iter) - continue - - if current_local is None: - missing_remotes.append(current_remote) - current_remote = get_next(remote_iter) - continue - - if current_local.event_id == current_remote.event_id: - current_local = get_next(local_iter) - current_remote = get_next(remote_iter) - continue - - if current_local.depth < current_remote.depth: - missing_locals.append(current_local) - current_local = get_next(local_iter) - continue - - if current_local.depth > current_remote.depth: - missing_remotes.append(current_remote) - current_remote = get_next(remote_iter) - continue - - # They have the same depth, so we fall back to the event_id order - if current_local.event_id < current_remote.event_id: - missing_locals.append(current_local) - current_local = get_next(local_iter) - - if current_local.event_id > current_remote.event_id: - missing_remotes.append(current_remote) - current_remote = get_next(remote_iter) - continue - - logger.debug("construct_auth_difference after while") - - # missing locals should be sent to the server - # We should find why we are missing remotes, as they will have been - # rejected. - - # Remove events from missing_remotes if they are referencing a missing - # remote. We only care about the "root" rejected ones. - missing_remote_ids = [e.event_id for e in missing_remotes] - base_remote_rejected = list(missing_remotes) - for e in missing_remotes: - for e_id in e.auth_event_ids(): - if e_id in missing_remote_ids: - try: - base_remote_rejected.remove(e) - except ValueError: - pass - - reason_map = {} - - for e in base_remote_rejected: - reason = await self.store.get_rejection_reason(e.event_id) - if reason is None: - # TODO: e is not in the current state, so we should - # construct some proof of that. - continue - - reason_map[e.event_id] = reason - - logger.debug("construct_auth_difference returning") - - return { - "auth_chain": local_auth, - "rejects": { - e.event_id: {"reason": reason_map[e.event_id], "proof": None} - for e in base_remote_rejected - }, - "missing": [e.event_id for e in missing_locals], - } - @log_function async def exchange_third_party_invite( self, sender_user_id: str, target_user_id: str, room_id: str, signed: JsonDict diff --git a/synapse/handlers/federation_event.py b/synapse/handlers/federation_event.py index 946343fa2..3b95beeb0 100644 --- a/synapse/handlers/federation_event.py +++ b/synapse/handlers/federation_event.py @@ -1016,7 +1016,7 @@ async def _resync_device(self, sender: str) -> None: except Exception: logger.exception("Failed to resync device for %s", sender) - async def _handle_marker_event(self, origin: str, marker_event: EventBase): + async def _handle_marker_event(self, origin: str, marker_event: EventBase) -> None: """Handles backfilling the insertion event when we receive a marker event that points to one. @@ -1109,7 +1109,7 @@ async def _get_events_and_persist( event_map: Dict[str, EventBase] = {} - async def get_event(event_id: str): + async def get_event(event_id: str) -> None: with nested_logging_context(event_id): try: event = await self._federation_client.get_pdu( @@ -1218,7 +1218,7 @@ async def _auth_and_persist_fetched_events( if not event_infos: return - async def prep(ev_info: _NewEventInfo): + async def prep(ev_info: _NewEventInfo) -> EventContext: event = ev_info.event with nested_logging_context(suffix=event.event_id): res = await self._state_handler.compute_event_context(event) @@ -1692,7 +1692,7 @@ async def _update_context_for_auth_events( async def _run_push_actions_and_persist_event( self, event: EventBase, context: EventContext, backfilled: bool = False - ): + ) -> None: """Run the push actions for a received event, and persist it. Args: diff --git a/synapse/handlers/groups_local.py b/synapse/handlers/groups_local.py index 1a6c5c64a..9e270d461 100644 --- a/synapse/handlers/groups_local.py +++ b/synapse/handlers/groups_local.py @@ -14,7 +14,7 @@ # limitations under the License. import logging -from typing import TYPE_CHECKING, Dict, Iterable, List, Set +from typing import TYPE_CHECKING, Any, Awaitable, Callable, Dict, Iterable, List, Set from synapse.api.errors import HttpResponseException, RequestSendFailed, SynapseError from synapse.types import GroupID, JsonDict, get_domain_from_id @@ -25,12 +25,14 @@ logger = logging.getLogger(__name__) -def _create_rerouter(func_name): +def _create_rerouter(func_name: str) -> Callable[..., Awaitable[JsonDict]]: """Returns an async function that looks at the group id and calls the function on federation or the local group server if the group is local """ - async def f(self, group_id, *args, **kwargs): + async def f( + self: "GroupsLocalWorkerHandler", group_id: str, *args: Any, **kwargs: Any + ) -> JsonDict: if not GroupID.is_valid(group_id): raise SynapseError(400, "%s is not a legal group ID" % (group_id,)) diff --git a/synapse/handlers/initial_sync.py b/synapse/handlers/initial_sync.py index 0b24b40eb..c942086e7 100644 --- a/synapse/handlers/initial_sync.py +++ b/synapse/handlers/initial_sync.py @@ -13,7 +13,7 @@ # limitations under the License. import logging -from typing import TYPE_CHECKING, Optional, Tuple +from typing import TYPE_CHECKING, List, Optional, Tuple from twisted.internet import defer @@ -150,7 +150,7 @@ async def _snapshot_all_rooms( if limit is None: limit = 10 - async def handle_room(event: RoomsForUser): + async def handle_room(event: RoomsForUser) -> None: d: JsonDict = { "room_id": event.room_id, "membership": event.membership, @@ -411,7 +411,7 @@ async def _room_initial_sync_joined( presence_handler = self.hs.get_presence_handler() - async def get_presence(): + async def get_presence() -> List[JsonDict]: # If presence is disabled, return an empty list if not self.hs.config.server.use_presence: return [] @@ -428,7 +428,7 @@ async def get_presence(): for s in states ] - async def get_receipts(): + async def get_receipts() -> List[JsonDict]: receipts = await self.store.get_linearized_receipts_for_room( room_id, to_key=now_token.receipt_key ) diff --git a/synapse/handlers/message.py b/synapse/handlers/message.py index 10f1584a0..bf4853630 100644 --- a/synapse/handlers/message.py +++ b/synapse/handlers/message.py @@ -46,6 +46,7 @@ from synapse.events.builder import EventBuilder from synapse.events.snapshot import EventContext from synapse.events.validator import EventValidator +from synapse.handlers.directory import DirectoryHandler from synapse.logging.context import make_deferred_yieldable, run_in_background from synapse.metrics.background_process_metrics import run_as_background_process from synapse.replication.http.send_event import ReplicationSendEventRestServlet @@ -298,7 +299,7 @@ async def get_joined_members(self, requester: Requester, room_id: str) -> dict: for user_id, profile in users_with_profile.items() } - def maybe_schedule_expiry(self, event: EventBase): + def maybe_schedule_expiry(self, event: EventBase) -> None: """Schedule the expiry of an event if there's not already one scheduled, or if the one running is for an event that will expire after the provided timestamp. @@ -318,7 +319,7 @@ def maybe_schedule_expiry(self, event: EventBase): # a task scheduled for a timestamp that's sooner than the provided one. self._schedule_expiry_for_event(event.event_id, expiry_ts) - async def _schedule_next_expiry(self): + async def _schedule_next_expiry(self) -> None: """Retrieve the ID and the expiry timestamp of the next event to be expired, and schedule an expiry task for it. @@ -331,7 +332,7 @@ async def _schedule_next_expiry(self): event_id, expiry_ts = res self._schedule_expiry_for_event(event_id, expiry_ts) - def _schedule_expiry_for_event(self, event_id: str, expiry_ts: int): + def _schedule_expiry_for_event(self, event_id: str, expiry_ts: int) -> None: """Schedule an expiry task for the provided event if there's not already one scheduled at a timestamp that's sooner than the provided one. @@ -367,7 +368,7 @@ def _schedule_expiry_for_event(self, event_id: str, expiry_ts: int): event_id, ) - async def _expire_event(self, event_id: str): + async def _expire_event(self, event_id: str) -> None: """Retrieve and expire an event that needs to be expired from the database. If the event doesn't exist in the database, log it and delete the expiry date @@ -1229,7 +1230,10 @@ async def cache_joined_hosts_for_event( self._external_cache_joined_hosts_updates[state_entry.state_group] = None async def _validate_canonical_alias( - self, directory_handler, room_alias_str: str, expected_room_id: str + self, + directory_handler: DirectoryHandler, + room_alias_str: str, + expected_room_id: str, ) -> None: """ Ensure that the given room alias points to the expected room ID. @@ -1477,7 +1481,7 @@ async def persist_and_notify_client_event( # If there's an expiry timestamp on the event, schedule its expiry. self._message_handler.maybe_schedule_expiry(event) - def _notify(): + def _notify() -> None: try: self.notifier.on_new_room_event( event, event_pos, max_stream_token, extra_users=extra_users @@ -1523,7 +1527,7 @@ async def _bump_active_time(self, user: UserID) -> None: except Exception: logger.exception("Error bumping presence active time") - async def _send_dummy_events_to_fill_extremities(self): + async def _send_dummy_events_to_fill_extremities(self) -> None: """Background task to send dummy events into rooms that have a large number of extremities """ @@ -1600,7 +1604,7 @@ async def _send_dummy_event_for_room(self, room_id: str) -> bool: ) return False - def _expire_rooms_to_exclude_from_dummy_event_insertion(self): + def _expire_rooms_to_exclude_from_dummy_event_insertion(self) -> None: expire_before = self.clock.time_msec() - _DUMMY_EVENT_ROOM_EXCLUSION_EXPIRY to_expire = set() for room_id, time in self._rooms_to_exclude_from_dummy_event_insertion.items(): diff --git a/synapse/handlers/oidc.py b/synapse/handlers/oidc.py index dfc251b2a..aed5a40a7 100644 --- a/synapse/handlers/oidc.py +++ b/synapse/handlers/oidc.py @@ -14,7 +14,7 @@ # limitations under the License. import inspect import logging -from typing import TYPE_CHECKING, Dict, Generic, List, Optional, TypeVar, Union +from typing import TYPE_CHECKING, Any, Dict, Generic, List, Optional, TypeVar, Union from urllib.parse import urlencode, urlparse import attr @@ -249,11 +249,11 @@ async def handle_oidc_callback(self, request: SynapseRequest) -> None: class OidcError(Exception): """Used to catch errors when calling the token_endpoint""" - def __init__(self, error, error_description=None): + def __init__(self, error: str, error_description: Optional[str] = None): self.error = error self.error_description = error_description - def __str__(self): + def __str__(self) -> str: if self.error_description: return f"{self.error}: {self.error_description}" return self.error @@ -1057,13 +1057,13 @@ def __init__( self._cached_secret = b"" self._cached_secret_replacement_time = 0 - def __str__(self): + def __str__(self) -> str: # if client_auth_method is client_secret_basic, then ClientAuth.prepare calls # encode_client_secret_basic, which calls "{}".format(secret), which ends up # here. return self._get_secret().decode("ascii") - def __bytes__(self): + def __bytes__(self) -> bytes: # if client_auth_method is client_secret_post, then ClientAuth.prepare calls # encode_client_secret_post, which ends up here. return self._get_secret() @@ -1197,21 +1197,21 @@ def verify_oidc_session_token( ) -@attr.s(frozen=True, slots=True) +@attr.s(frozen=True, slots=True, auto_attribs=True) class OidcSessionData: """The attributes which are stored in a OIDC session cookie""" # the Identity Provider being used - idp_id = attr.ib(type=str) + idp_id: str # The `nonce` parameter passed to the OIDC provider. - nonce = attr.ib(type=str) + nonce: str # The URL the client gave when it initiated the flow. ("" if this is a UI Auth) - client_redirect_url = attr.ib(type=str) + client_redirect_url: str # The session ID of the ongoing UI Auth ("" if this is a login) - ui_auth_session_id = attr.ib(type=str) + ui_auth_session_id: str class UserAttributeDict(TypedDict): @@ -1290,20 +1290,20 @@ async def get_extra_attributes(self, userinfo: UserInfo, token: Token) -> JsonDi # Used to clear out "None" values in templates -def jinja_finalize(thing): +def jinja_finalize(thing: Any) -> Any: return thing if thing is not None else "" env = Environment(finalize=jinja_finalize) -@attr.s(slots=True, frozen=True) +@attr.s(slots=True, frozen=True, auto_attribs=True) class JinjaOidcMappingConfig: - subject_claim = attr.ib(type=str) - localpart_template = attr.ib(type=Optional[Template]) - display_name_template = attr.ib(type=Optional[Template]) - email_template = attr.ib(type=Optional[Template]) - extra_attributes = attr.ib(type=Dict[str, Template]) + subject_claim: str + localpart_template: Optional[Template] + display_name_template: Optional[Template] + email_template: Optional[Template] + extra_attributes: Dict[str, Template] class JinjaOidcMappingProvider(OidcMappingProvider[JinjaOidcMappingConfig]): diff --git a/synapse/handlers/pagination.py b/synapse/handlers/pagination.py index 7dc0ee4be..08b93b3ec 100644 --- a/synapse/handlers/pagination.py +++ b/synapse/handlers/pagination.py @@ -15,6 +15,8 @@ import logging from typing import TYPE_CHECKING, Any, Dict, Optional, Set +import attr + from twisted.python.failure import Failure from synapse.api.constants import EventTypes, Membership @@ -24,7 +26,7 @@ from synapse.metrics.background_process_metrics import run_as_background_process from synapse.storage.state import StateFilter from synapse.streams.config import PaginationConfig -from synapse.types import Requester +from synapse.types import JsonDict, Requester from synapse.util.async_helpers import ReadWriteLock from synapse.util.stringutils import random_string from synapse.visibility import filter_events_for_client @@ -36,15 +38,12 @@ logger = logging.getLogger(__name__) +@attr.s(slots=True, auto_attribs=True) class PurgeStatus: """Object tracking the status of a purge request This class contains information on the progress of a purge request, for return by get_purge_status. - - Attributes: - status (int): Tracks whether this request has completed. One of - STATUS_{ACTIVE,COMPLETE,FAILED} """ STATUS_ACTIVE = 0 @@ -57,10 +56,10 @@ class PurgeStatus: STATUS_FAILED: "failed", } - def __init__(self): - self.status = PurgeStatus.STATUS_ACTIVE + # Tracks whether this request has completed. One of STATUS_{ACTIVE,COMPLETE,FAILED}. + status: int = STATUS_ACTIVE - def asdict(self): + def asdict(self) -> JsonDict: return {"status": PurgeStatus.STATUS_TEXT[self.status]} @@ -107,7 +106,7 @@ def __init__(self, hs: "HomeServer"): async def purge_history_for_rooms_in_range( self, min_ms: Optional[int], max_ms: Optional[int] - ): + ) -> None: """Purge outdated events from rooms within the given retention range. If a default retention policy is defined in the server's configuration and its @@ -291,7 +290,7 @@ async def _purge_history( self._purges_in_progress_by_room.discard(room_id) # remove the purge from the list 24 hours after it completes - def clear_purge(): + def clear_purge() -> None: del self._purges_by_id[purge_id] self.hs.get_reactor().callLater(24 * 3600, clear_purge) diff --git a/synapse/handlers/presence.py b/synapse/handlers/presence.py index 4ab962a84..841c8815b 100644 --- a/synapse/handlers/presence.py +++ b/synapse/handlers/presence.py @@ -26,18 +26,22 @@ import logging from bisect import bisect from contextlib import contextmanager +from types import TracebackType from typing import ( TYPE_CHECKING, Any, + Awaitable, Callable, Collection, Dict, FrozenSet, + Generator, Iterable, List, Optional, Set, Tuple, + Type, Union, ) @@ -240,7 +244,7 @@ async def set_state( """ @abc.abstractmethod - async def bump_presence_active_time(self, user: UserID): + async def bump_presence_active_time(self, user: UserID) -> None: """We've seen the user do something that indicates they're interacting with the app. """ @@ -274,7 +278,7 @@ async def update_external_syncs_clear(self, process_id: str) -> None: async def process_replication_rows( self, stream_name: str, instance_name: str, token: int, rows: list - ): + ) -> None: """Process streams received over replication.""" await self._federation_queue.process_replication_rows( stream_name, instance_name, token, rows @@ -286,7 +290,7 @@ def get_federation_queue(self) -> "PresenceFederationQueue": async def maybe_send_presence_to_interested_destinations( self, states: List[UserPresenceState] - ): + ) -> None: """If this instance is a federation sender, send the states to all destinations that are interested. Filters out any states for remote users. @@ -309,7 +313,7 @@ async def maybe_send_presence_to_interested_destinations( for destination, host_states in hosts_to_states.items(): self._federation.send_presence_to_destinations(host_states, [destination]) - async def send_full_presence_to_users(self, user_ids: Collection[str]): + async def send_full_presence_to_users(self, user_ids: Collection[str]) -> None: """ Adds to the list of users who should receive a full snapshot of presence upon their next sync. Note that this only works for local users. @@ -363,7 +367,12 @@ async def is_visible(self, observed_user: UserID, observer_user: UserID) -> bool class _NullContextManager(ContextManager[None]): """A context manager which does nothing.""" - def __exit__(self, exc_type, exc_val, exc_tb): + def __exit__( + self, + exc_type: Optional[Type[BaseException]], + exc_val: Optional[BaseException], + exc_tb: Optional[TracebackType], + ) -> None: pass @@ -468,7 +477,7 @@ async def user_syncing( if self._user_to_num_current_syncs[user_id] == 1: self.mark_as_coming_online(user_id) - def _end(): + def _end() -> None: # We check that the user_id is in user_to_num_current_syncs because # user_to_num_current_syncs may have been cleared if we are # shutting down. @@ -480,7 +489,7 @@ def _end(): self.mark_as_going_offline(user_id) @contextlib.contextmanager - def _user_syncing(): + def _user_syncing() -> Generator[None, None, None]: try: yield finally: @@ -503,7 +512,7 @@ async def notify_from_replication( async def process_replication_rows( self, stream_name: str, instance_name: str, token: int, rows: list - ): + ) -> None: await super().process_replication_rows(stream_name, instance_name, token, rows) if stream_name != PresenceStream.NAME: @@ -689,7 +698,7 @@ def __init__(self, hs: "HomeServer"): # Start a LoopingCall in 30s that fires every 5s. # The initial delay is to allow disconnected clients a chance to # reconnect before we treat them as offline. - def run_timeout_handler(): + def run_timeout_handler() -> Awaitable[None]: return run_as_background_process( "handle_presence_timeouts", self._handle_timeouts ) @@ -698,7 +707,7 @@ def run_timeout_handler(): 30, self.clock.looping_call, run_timeout_handler, 5000 ) - def run_persister(): + def run_persister() -> Awaitable[None]: return run_as_background_process( "persist_presence_changes", self._persist_unpersisted_changes ) @@ -942,8 +951,8 @@ async def user_syncing( when users disconnect/reconnect. Args: - user_id (str) - affect_presence (bool): If false this function will be a no-op. + user_id + affect_presence: If false this function will be a no-op. Useful for streams that are not associated with an actual client that is being used by a user. """ @@ -978,7 +987,7 @@ async def user_syncing( ] ) - async def _end(): + async def _end() -> None: try: self.user_to_num_current_syncs[user_id] -= 1 @@ -994,7 +1003,7 @@ async def _end(): logger.exception("Error updating presence after sync") @contextmanager - def _user_syncing(): + def _user_syncing() -> Generator[None, None, None]: try: yield finally: @@ -1264,7 +1273,7 @@ def notify_new_event(self) -> None: if self._event_processing: return - async def _process_presence(): + async def _process_presence() -> None: assert not self._event_processing self._event_processing = True @@ -1513,7 +1522,7 @@ async def get_new_events( room_ids: Optional[List[str]] = None, include_offline: bool = True, explicit_room_id: Optional[str] = None, - **kwargs, + **kwargs: Any, ) -> Tuple[List[UserPresenceState], int]: # The process for getting presence events are: # 1. Get the rooms the user is in. @@ -2074,7 +2083,7 @@ def __init__(self, hs: "HomeServer", presence_handler: BasePresenceHandler): if self._queue_presence_updates: self._clock.looping_call(self._clear_queue, self._CLEAR_ITEMS_EVERY_MS) - def _clear_queue(self): + def _clear_queue(self) -> None: """Clear out older entries from the queue.""" clear_before = self._clock.time_msec() - self._KEEP_ITEMS_IN_QUEUE_FOR_MS @@ -2205,7 +2214,7 @@ async def get_replication_rows( async def process_replication_rows( self, stream_name: str, instance_name: str, token: int, rows: list - ): + ) -> None: if stream_name != PresenceFederationStream.NAME: return diff --git a/synapse/handlers/profile.py b/synapse/handlers/profile.py index 51adf8762..246eb9828 100644 --- a/synapse/handlers/profile.py +++ b/synapse/handlers/profile.py @@ -254,7 +254,7 @@ async def set_avatar_url( requester: Requester, new_avatar_url: str, by_admin: bool = False, - ): + ) -> None: """Set a new avatar URL for a user. Args: @@ -425,7 +425,7 @@ async def check_profile_query_allowed( raise @wrap_as_background_process("Update remote profile") - async def _update_remote_profile_cache(self): + async def _update_remote_profile_cache(self) -> None: """Called periodically to check profiles of remote users we haven't checked in a while. """ diff --git a/synapse/handlers/receipts.py b/synapse/handlers/receipts.py index a49b8ee4b..c7567ac05 100644 --- a/synapse/handlers/receipts.py +++ b/synapse/handlers/receipts.py @@ -12,7 +12,7 @@ # See the License for the specific language governing permissions and # limitations under the License. import logging -from typing import TYPE_CHECKING, List, Optional, Tuple +from typing import TYPE_CHECKING, Any, List, Optional, Tuple from synapse.api.constants import ReadReceiptEventFields from synapse.appservice import ApplicationService @@ -216,7 +216,7 @@ def filter_out_hidden(events: List[JsonDict], user_id: str) -> List[JsonDict]: return visible_events async def get_new_events( - self, from_key: int, room_ids: List[str], user: UserID, **kwargs + self, from_key: int, room_ids: List[str], user: UserID, **kwargs: Any ) -> Tuple[List[JsonDict], int]: from_key = int(from_key) to_key = self.get_current_key() diff --git a/synapse/handlers/register.py b/synapse/handlers/register.py index 38c4993da..efb7d2676 100644 --- a/synapse/handlers/register.py +++ b/synapse/handlers/register.py @@ -125,7 +125,7 @@ async def check_username( localpart: str, guest_access_token: Optional[str] = None, assigned_user_id: Optional[str] = None, - ): + ) -> None: if types.contains_invalid_mxid_characters(localpart): raise SynapseError( 400, diff --git a/synapse/handlers/room.py b/synapse/handlers/room.py index 9345ae02e..abdd50616 100644 --- a/synapse/handlers/room.py +++ b/synapse/handlers/room.py @@ -1,6 +1,4 @@ -# Copyright 2014 - 2016 OpenMarket Ltd -# Copyright 2018-2019 New Vector Ltd -# Copyright 2019 The Matrix.org Foundation C.I.C. +# Copyright 2016-2021 The Matrix.org Foundation C.I.C. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. @@ -186,7 +184,7 @@ async def upgrade_room( async def _upgrade_room( self, requester: Requester, old_room_id: str, new_version: RoomVersion - ): + ) -> str: """ Args: requester: the user requesting the upgrade @@ -512,7 +510,7 @@ async def _move_aliases_to_new_room( old_room_id: str, new_room_id: str, old_room_state: StateMap[str], - ): + ) -> None: # check to see if we have a canonical alias. canonical_alias_event = None canonical_alias_event_id = old_room_state.get((EventTypes.CanonicalAlias, "")) @@ -902,7 +900,7 @@ async def _send_events_for_new_room( event_keys = {"room_id": room_id, "sender": creator_id, "state_key": ""} - def create(etype: str, content: JsonDict, **kwargs) -> JsonDict: + def create(etype: str, content: JsonDict, **kwargs: Any) -> JsonDict: e = {"type": etype, "content": content} e.update(event_keys) @@ -910,7 +908,7 @@ def create(etype: str, content: JsonDict, **kwargs) -> JsonDict: return e - async def send(etype: str, content: JsonDict, **kwargs) -> int: + async def send(etype: str, content: JsonDict, **kwargs: Any) -> int: event = create(etype, content, **kwargs) logger.debug("Sending %s in new room", etype) # Allow these events to be sent even if the user is shadow-banned to @@ -1033,7 +1031,7 @@ async def _generate_room_id( creator_id: str, is_public: bool, room_version: RoomVersion, - ): + ) -> str: # autogen room IDs and try to create it. We may clash, so just # try a few times till one goes through, giving up eventually. attempts = 0 @@ -1097,7 +1095,7 @@ async def get_event_context( users = await self.store.get_users_in_room(room_id) is_peeking = user.to_string() not in users - async def filter_evts(events): + async def filter_evts(events: List[EventBase]) -> List[EventBase]: if use_admin_priviledge: return events return await filter_events_for_client( diff --git a/synapse/handlers/room_list.py b/synapse/handlers/room_list.py index 81680b8df..c83ff585e 100644 --- a/synapse/handlers/room_list.py +++ b/synapse/handlers/room_list.py @@ -14,7 +14,7 @@ import logging from collections import namedtuple -from typing import TYPE_CHECKING, Optional, Tuple +from typing import TYPE_CHECKING, Any, Optional, Tuple import msgpack from unpaddedbase64 import decode_base64, encode_base64 @@ -33,7 +33,7 @@ SynapseError, ) from synapse.types import JsonDict, ThirdPartyInstanceID -from synapse.util.caches.descriptors import cached +from synapse.util.caches.descriptors import _CacheContext, cached from synapse.util.caches.response_cache import ResponseCache from ._base import BaseHandler @@ -169,7 +169,7 @@ async def _get_public_room_list( ignore_non_federatable=from_federation, ) - def build_room_entry(room): + def build_room_entry(room: JsonDict) -> JsonDict: entry = { "room_id": room["room_id"], "name": room["name"], @@ -249,10 +249,10 @@ async def generate_room_entry( self, room_id: str, num_joined_users: int, - cache_context, + cache_context: _CacheContext, with_alias: bool = True, allow_private: bool = False, - ) -> Optional[dict]: + ) -> Optional[JsonDict]: """Returns the entry for a room Args: @@ -507,7 +507,7 @@ def to_token(self) -> str: ) ) - def copy_and_replace(self, **kwds) -> "RoomListNextBatch": + def copy_and_replace(self, **kwds: Any) -> "RoomListNextBatch": return self._replace(**kwds) diff --git a/synapse/handlers/room_member.py b/synapse/handlers/room_member.py index 439020164..a3e13c227 100644 --- a/synapse/handlers/room_member.py +++ b/synapse/handlers/room_member.py @@ -225,7 +225,7 @@ async def ratelimit_multiple_invites( room_id: Optional[str], n_invites: int, update: bool = True, - ): + ) -> None: """Ratelimit more than one invite sent by the given requester in the given room. Args: @@ -249,7 +249,7 @@ async def ratelimit_invite( requester: Optional[Requester], room_id: Optional[str], invitee_user_id: str, - ): + ) -> None: """Ratelimit invites by room and by target user. If room ID is missing then we just rate limit by target user. @@ -386,7 +386,7 @@ async def _local_membership_update( return result_event.event_id, result_event.internal_metadata.stream_ordering async def copy_room_tags_and_direct_to_room( - self, old_room_id, new_room_id, user_id + self, old_room_id: str, new_room_id: str, user_id: str ) -> None: """Copies the tags and direct room state from one room to another. @@ -1030,7 +1030,7 @@ async def send_membership_event( event: EventBase, context: EventContext, ratelimit: bool = True, - ): + ) -> None: """ Change the membership status of a user in a room. diff --git a/synapse/handlers/room_summary.py b/synapse/handlers/room_summary.py index 781da9e81..4e28fb968 100644 --- a/synapse/handlers/room_summary.py +++ b/synapse/handlers/room_summary.py @@ -541,7 +541,7 @@ async def get_federation_hierarchy( origin: str, requested_room_id: str, suggested_only: bool, - ): + ) -> JsonDict: """ Implementation of the room hierarchy Federation API. diff --git a/synapse/handlers/saml.py b/synapse/handlers/saml.py index 0066d570c..185befbe9 100644 --- a/synapse/handlers/saml.py +++ b/synapse/handlers/saml.py @@ -40,15 +40,15 @@ logger = logging.getLogger(__name__) -@attr.s(slots=True) +@attr.s(slots=True, auto_attribs=True) class Saml2SessionData: """Data we track about SAML2 sessions""" # time the session was created, in milliseconds - creation_time = attr.ib() + creation_time: int # The user interactive authentication session ID associated with this SAML # session (or None if this SAML session is for an initial login). - ui_auth_session_id = attr.ib(type=Optional[str], default=None) + ui_auth_session_id: Optional[str] = None class SamlHandler(BaseHandler): @@ -359,7 +359,7 @@ def _remote_id_from_saml_response( return remote_user_id - def expire_sessions(self): + def expire_sessions(self) -> None: expire_before = self.clock.time_msec() - self._saml2_session_lifetime to_expire = set() for reqid, data in self._outstanding_requests_dict.items(): @@ -391,10 +391,10 @@ def dot_replace_for_mxid(username: str) -> str: } -@attr.s +@attr.s(auto_attribs=True) class SamlConfig: - mxid_source_attribute = attr.ib() - mxid_mapper = attr.ib() + mxid_source_attribute: str + mxid_mapper: Callable[[str], str] class DefaultSamlMappingProvider: diff --git a/synapse/handlers/send_email.py b/synapse/handlers/send_email.py index a31fe3e3c..25e6b012b 100644 --- a/synapse/handlers/send_email.py +++ b/synapse/handlers/send_email.py @@ -17,7 +17,7 @@ from email.mime.multipart import MIMEMultipart from email.mime.text import MIMEText from io import BytesIO -from typing import TYPE_CHECKING, Optional +from typing import TYPE_CHECKING, Any, Optional from pkg_resources import parse_version @@ -79,7 +79,7 @@ async def _sendmail( msg = BytesIO(msg_bytes) d: "Deferred[object]" = Deferred() - def build_sender_factory(**kwargs) -> ESMTPSenderFactory: + def build_sender_factory(**kwargs: Any) -> ESMTPSenderFactory: return ESMTPSenderFactory( username, password, diff --git a/synapse/handlers/sso.py b/synapse/handlers/sso.py index 05aa76d6a..e044251a1 100644 --- a/synapse/handlers/sso.py +++ b/synapse/handlers/sso.py @@ -205,7 +205,7 @@ def __init__(self, hs: "HomeServer"): self._consent_at_registration = hs.config.consent.user_consent_at_registration - def register_identity_provider(self, p: SsoIdentityProvider): + def register_identity_provider(self, p: SsoIdentityProvider) -> None: p_id = p.idp_id assert p_id not in self._identity_providers self._identity_providers[p_id] = p @@ -856,7 +856,7 @@ async def handle_submit_username_request( async def handle_terms_accepted( self, request: Request, session_id: str, terms_version: str - ): + ) -> None: """Handle a request to the new-user 'consent' endpoint Will serve an HTTP response to the request. @@ -959,7 +959,7 @@ async def register_sso_user(self, request: Request, session_id: str) -> None: new_user=True, ) - def _expire_old_sessions(self): + def _expire_old_sessions(self) -> None: to_expire = [] now = int(self._clock.time_msec()) diff --git a/synapse/handlers/stats.py b/synapse/handlers/stats.py index b64ce8cab..9fc53333f 100644 --- a/synapse/handlers/stats.py +++ b/synapse/handlers/stats.py @@ -68,7 +68,7 @@ def notify_new_event(self) -> None: self._is_processing = True - async def process(): + async def process() -> None: try: await self._unsafe_process() finally: diff --git a/synapse/handlers/sync.py b/synapse/handlers/sync.py index 7523d8e83..e93db4bdc 100644 --- a/synapse/handlers/sync.py +++ b/synapse/handlers/sync.py @@ -364,7 +364,9 @@ async def _wait_for_sync_for_user( ) else: - async def current_sync_callback(before_token, after_token) -> SyncResult: + async def current_sync_callback( + before_token: StreamToken, after_token: StreamToken + ) -> SyncResult: return await self.current_sync_for_user(sync_config, since_token) result = await self.notifier.wait_for_events( @@ -1532,9 +1534,9 @@ async def _generate_sync_entry_for_rooms( newly_joined_rooms = room_changes.newly_joined_rooms newly_left_rooms = room_changes.newly_left_rooms - async def handle_room_entries(room_entry: "RoomSyncResultBuilder"): + async def handle_room_entries(room_entry: "RoomSyncResultBuilder") -> None: logger.debug("Generating room entry for %s", room_entry.room_id) - res = await self._generate_room_entry( + await self._generate_room_entry( sync_result_builder, ignored_users, room_entry, @@ -1544,7 +1546,6 @@ async def handle_room_entries(room_entry: "RoomSyncResultBuilder"): always_include=sync_result_builder.full_state, ) logger.debug("Generated room entry for %s", room_entry.room_id) - return res await concurrently_execute(handle_room_entries, room_entries, 10) @@ -1925,7 +1926,7 @@ async def _generate_room_entry( tags: Optional[Dict[str, Dict[str, Any]]], account_data: Dict[str, JsonDict], always_include: bool = False, - ): + ) -> None: """Populates the `joined` and `archived` section of `sync_result_builder` based on the `room_builder`. diff --git a/synapse/handlers/typing.py b/synapse/handlers/typing.py index 9cea011e6..4492c8567 100644 --- a/synapse/handlers/typing.py +++ b/synapse/handlers/typing.py @@ -14,7 +14,7 @@ import logging import random from collections import namedtuple -from typing import TYPE_CHECKING, Dict, Iterable, List, Optional, Set, Tuple +from typing import TYPE_CHECKING, Any, Dict, Iterable, List, Optional, Set, Tuple from synapse.api.errors import AuthError, ShadowBanError, SynapseError from synapse.appservice import ApplicationService @@ -485,7 +485,7 @@ async def get_new_events_as( return (events, handler._latest_room_serial) async def get_new_events( - self, from_key: int, room_ids: Iterable[str], **kwargs + self, from_key: int, room_ids: Iterable[str], **kwargs: Any ) -> Tuple[List[JsonDict], int]: with Measure(self.clock, "typing.get_new_events"): from_key = int(from_key) diff --git a/synapse/handlers/ui_auth/checkers.py b/synapse/handlers/ui_auth/checkers.py index d3828dec6..ea9325e96 100644 --- a/synapse/handlers/ui_auth/checkers.py +++ b/synapse/handlers/ui_auth/checkers.py @@ -70,7 +70,7 @@ async def check_auth(self, authdict: dict, clientip: str) -> Any: class TermsAuthChecker(UserInteractiveAuthChecker): AUTH_TYPE = LoginType.TERMS - def is_enabled(self): + def is_enabled(self) -> bool: return True async def check_auth(self, authdict: dict, clientip: str) -> Any: diff --git a/synapse/handlers/user_directory.py b/synapse/handlers/user_directory.py index 6faa1d84b..8dc46d767 100644 --- a/synapse/handlers/user_directory.py +++ b/synapse/handlers/user_directory.py @@ -114,7 +114,7 @@ def notify_new_event(self) -> None: if self._is_processing: return - async def process(): + async def process() -> None: try: await self._unsafe_process() finally: From f455b0e4209cd581ea7b75436b178d2843cc6e0d Mon Sep 17 00:00:00 2001 From: David Robertson <davidr@element.io> Date: Mon, 20 Sep 2021 17:35:16 +0100 Subject: [PATCH 14/74] GHA: reintroduce an env var for `$GITHUB_HEAD_REF` (#10659) This should ensure GHA runs synapse against the same-named sytest branch --- .github/workflows/tests.yml | 1 + changelog.d/10659.misc | 1 + 2 files changed, 2 insertions(+) create mode 100644 changelog.d/10659.misc diff --git a/.github/workflows/tests.yml b/.github/workflows/tests.yml index 8736699ad..fa9c5e036 100644 --- a/.github/workflows/tests.yml +++ b/.github/workflows/tests.yml @@ -192,6 +192,7 @@ jobs: volumes: - ${{ github.workspace }}:/src env: + SYTEST_BRANCH: ${{ github.head_ref }} POSTGRES: ${{ matrix.postgres && 1}} MULTI_POSTGRES: ${{ (matrix.postgres == 'multi-postgres') && 1}} WORKERS: ${{ matrix.workers && 1 }} diff --git a/changelog.d/10659.misc b/changelog.d/10659.misc new file mode 100644 index 000000000..d677a521c --- /dev/null +++ b/changelog.d/10659.misc @@ -0,0 +1 @@ +Fix GitHub Actions config so we can run sytest on synapse from parallel branches. \ No newline at end of file From 6a751ff5e064bbb1fae2915e533031531c9d74e7 Mon Sep 17 00:00:00 2001 From: Aaron Raimist <aaron@raim.ist> Date: Tue, 21 Sep 2021 05:23:34 -0500 Subject: [PATCH 15/74] Allow sending a membership event to unban a user (#10807) * Allow membership event to unban user Signed-off-by: Aaron Raimist <aaron@raim.ist> --- changelog.d/10807.bugfix | 1 + synapse/handlers/room_member.py | 2 +- tests/rest/client/test_rooms.py | 87 ++++++++++++++++++++++++++++++++- tests/rest/client/utils.py | 11 +++++ 4 files changed, 99 insertions(+), 2 deletions(-) create mode 100644 changelog.d/10807.bugfix diff --git a/changelog.d/10807.bugfix b/changelog.d/10807.bugfix new file mode 100644 index 000000000..be03f5c73 --- /dev/null +++ b/changelog.d/10807.bugfix @@ -0,0 +1 @@ +Allow sending a membership event to unban a user. Contributed by @aaronraimist. \ No newline at end of file diff --git a/synapse/handlers/room_member.py b/synapse/handlers/room_member.py index a3e13c227..7bb3f0bc4 100644 --- a/synapse/handlers/room_member.py +++ b/synapse/handlers/room_member.py @@ -668,7 +668,7 @@ async def update_membership_locked( " (membership=%s)" % old_membership, errcode=Codes.BAD_STATE, ) - if old_membership == "ban" and action != "unban": + if old_membership == "ban" and action not in ["ban", "unban", "leave"]: raise SynapseError( 403, "Cannot %s user who was banned" % (action,), diff --git a/tests/rest/client/test_rooms.py b/tests/rest/client/test_rooms.py index 50100a5ae..5a01765f4 100644 --- a/tests/rest/client/test_rooms.py +++ b/tests/rest/client/test_rooms.py @@ -26,7 +26,7 @@ import synapse.rest.admin from synapse.api.constants import EventContentFields, EventTypes, Membership -from synapse.api.errors import HttpResponseException +from synapse.api.errors import Codes, HttpResponseException from synapse.handlers.pagination import PurgeStatus from synapse.rest import admin from synapse.rest.client import account, directory, login, profile, room, sync @@ -377,6 +377,91 @@ def test_leave_permissions(self): expect_code=403, ) + # tests the "from banned" line from the table in https://spec.matrix.org/unstable/client-server-api/#mroommember + def test_member_event_from_ban(self): + room = self.created_rmid + self.helper.invite(room=room, src=self.rmcreator_id, targ=self.user_id) + self.helper.join(room=room, user=self.user_id) + + other = "@burgundy:red" + + # User cannot ban other since they do not have required power level + self.helper.change_membership( + room=room, + src=self.user_id, + targ=other, + membership=Membership.BAN, + expect_code=403, # expect failure + expect_errcode=Codes.FORBIDDEN, + ) + + # Admin bans other + self.helper.change_membership( + room=room, + src=self.rmcreator_id, + targ=other, + membership=Membership.BAN, + expect_code=200, + ) + + # from ban to invite: Must never happen. + self.helper.change_membership( + room=room, + src=self.rmcreator_id, + targ=other, + membership=Membership.INVITE, + expect_code=403, # expect failure + expect_errcode=Codes.BAD_STATE, + ) + + # from ban to join: Must never happen. + self.helper.change_membership( + room=room, + src=other, + targ=other, + membership=Membership.JOIN, + expect_code=403, # expect failure + expect_errcode=Codes.BAD_STATE, + ) + + # from ban to ban: No change. + self.helper.change_membership( + room=room, + src=self.rmcreator_id, + targ=other, + membership=Membership.BAN, + expect_code=200, + ) + + # from ban to knock: Must never happen. + self.helper.change_membership( + room=room, + src=self.rmcreator_id, + targ=other, + membership=Membership.KNOCK, + expect_code=403, # expect failure + expect_errcode=Codes.BAD_STATE, + ) + + # User cannot unban other since they do not have required power level + self.helper.change_membership( + room=room, + src=self.user_id, + targ=other, + membership=Membership.LEAVE, + expect_code=403, # expect failure + expect_errcode=Codes.FORBIDDEN, + ) + + # from ban to leave: User was unbanned. + self.helper.change_membership( + room=room, + src=self.rmcreator_id, + targ=other, + membership=Membership.LEAVE, + expect_code=200, + ) + class RoomsMemberListTestCase(RoomBase): """Tests /rooms/$room_id/members/list REST events.""" diff --git a/tests/rest/client/utils.py b/tests/rest/client/utils.py index 954ad1a1f..c56e45fc1 100644 --- a/tests/rest/client/utils.py +++ b/tests/rest/client/utils.py @@ -138,6 +138,7 @@ def change_membership( extra_data: Optional[dict] = None, tok: Optional[str] = None, expect_code: int = 200, + expect_errcode: str = None, ) -> None: """ Send a membership state event into a room. @@ -150,6 +151,7 @@ def change_membership( extra_data: Extra information to include in the content of the event tok: The user access token to use expect_code: The expected HTTP response code + expect_errcode: The expected Matrix error code """ temp_id = self.auth_user_id self.auth_user_id = src @@ -177,6 +179,15 @@ def change_membership( channel.result["body"], ) + if expect_errcode: + assert ( + str(channel.json_body["errcode"]) == expect_errcode + ), "Expected: %r, got: %r, resp: %r" % ( + expect_errcode, + channel.json_body["errcode"], + channel.result["body"], + ) + self.auth_user_id = temp_id def send( From 60453315bdbbbd364f13ca386de965e015f1062f Mon Sep 17 00:00:00 2001 From: David Robertson <davidr@element.io> Date: Tue, 21 Sep 2021 13:02:34 +0100 Subject: [PATCH 16/74] Always add local users to the user directory (#10796) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit It's a simplification, but one that'll help make the user directory logic easier to follow with the other changes upcoming. It's not strictly required for those changes, but this will help simplify the resulting logic that listens for `m.room.member` events and generally make the logic easier to follow. This means the config option `search_all_users` ends up controlling the search query only, and not the data we store. The cost of doing so is an extra row in the `user_directory` and `user_directory_search` tables for each local user which - belongs to no public rooms - belongs to no private rooms of size ≥ 2 I think the cost of this will be marginal (since they'll already have entries in `users` and `profiles` anyway). As a small upside, a homeserver whose directory was built with this change can toggle `search_all_users` without having to rebuild their directory. Co-authored-by: Richard van der Hoff <1389908+richvdh@users.noreply.github.com> --- changelog.d/10796.misc | 1 + docs/sample_config.yaml | 14 ++++++---- synapse/config/user_directory.py | 14 ++++++---- synapse/handlers/deactivate_account.py | 7 ++--- synapse/handlers/profile.py | 18 ++++++------- synapse/handlers/register.py | 9 +++---- .../storage/databases/main/user_directory.py | 27 +++++++------------ tests/handlers/test_profile.py | 7 +++-- tests/rest/client/test_rooms.py | 12 ++++----- 9 files changed, 54 insertions(+), 55 deletions(-) create mode 100644 changelog.d/10796.misc diff --git a/changelog.d/10796.misc b/changelog.d/10796.misc new file mode 100644 index 000000000..1873b2386 --- /dev/null +++ b/changelog.d/10796.misc @@ -0,0 +1 @@ +Simplify the internal logic which maintains the user directory database tables. \ No newline at end of file diff --git a/docs/sample_config.yaml b/docs/sample_config.yaml index 95cca1655..166cec38d 100644 --- a/docs/sample_config.yaml +++ b/docs/sample_config.yaml @@ -2362,12 +2362,16 @@ user_directory: #enabled: false # Defines whether to search all users visible to your HS when searching - # the user directory, rather than limiting to users visible in public - # rooms. Defaults to false. + # the user directory. If false, search results will only contain users + # visible in public rooms and users sharing a room with the requester. + # Defaults to false. # - # If you set it true, you'll have to rebuild the user_directory search - # indexes, see: - # https://matrix-org.github.io/synapse/latest/user_directory.html + # NB. If you set this to true, and the last time the user_directory search + # indexes were (re)built was before Synapse 1.44, you'll have to + # rebuild the indexes in order to search through all known users. + # These indexes are built the first time Synapse starts; admins can + # manually trigger a rebuild following the instructions at + # https://matrix-org.github.io/synapse/latest/user_directory.html # # Uncomment to return search results containing all known users, even if that # user does not share a room with the requester. diff --git a/synapse/config/user_directory.py b/synapse/config/user_directory.py index b10df8a23..2552f688d 100644 --- a/synapse/config/user_directory.py +++ b/synapse/config/user_directory.py @@ -45,12 +45,16 @@ def generate_config_section(self, config_dir_path, server_name, **kwargs): #enabled: false # Defines whether to search all users visible to your HS when searching - # the user directory, rather than limiting to users visible in public - # rooms. Defaults to false. + # the user directory. If false, search results will only contain users + # visible in public rooms and users sharing a room with the requester. + # Defaults to false. # - # If you set it true, you'll have to rebuild the user_directory search - # indexes, see: - # https://matrix-org.github.io/synapse/latest/user_directory.html + # NB. If you set this to true, and the last time the user_directory search + # indexes were (re)built was before Synapse 1.44, you'll have to + # rebuild the indexes in order to search through all known users. + # These indexes are built the first time Synapse starts; admins can + # manually trigger a rebuild following the instructions at + # https://matrix-org.github.io/synapse/latest/user_directory.html # # Uncomment to return search results containing all known users, even if that # user does not share a room with the requester. diff --git a/synapse/handlers/deactivate_account.py b/synapse/handlers/deactivate_account.py index dcd320c55..a03ff9842 100644 --- a/synapse/handlers/deactivate_account.py +++ b/synapse/handlers/deactivate_account.py @@ -257,11 +257,8 @@ async def activate_account(self, user_id: str) -> None: """ # Add the user to the directory, if necessary. user = UserID.from_string(user_id) - if self.hs.config.user_directory_search_all_users: - profile = await self.store.get_profileinfo(user.localpart) - await self.user_directory_handler.handle_local_profile_change( - user_id, profile - ) + profile = await self.store.get_profileinfo(user.localpart) + await self.user_directory_handler.handle_local_profile_change(user_id, profile) # Ensure the user is not marked as erased. await self.store.mark_user_not_erased(user_id) diff --git a/synapse/handlers/profile.py b/synapse/handlers/profile.py index 246eb9828..f06070bfc 100644 --- a/synapse/handlers/profile.py +++ b/synapse/handlers/profile.py @@ -214,11 +214,10 @@ async def set_displayname( target_user.localpart, displayname_to_set ) - if self.hs.config.user_directory_search_all_users: - profile = await self.store.get_profileinfo(target_user.localpart) - await self.user_directory_handler.handle_local_profile_change( - target_user.to_string(), profile - ) + profile = await self.store.get_profileinfo(target_user.localpart) + await self.user_directory_handler.handle_local_profile_change( + target_user.to_string(), profile + ) await self._update_join_states(requester, target_user) @@ -300,11 +299,10 @@ async def set_avatar_url( target_user.localpart, avatar_url_to_set ) - if self.hs.config.user_directory_search_all_users: - profile = await self.store.get_profileinfo(target_user.localpart) - await self.user_directory_handler.handle_local_profile_change( - target_user.to_string(), profile - ) + profile = await self.store.get_profileinfo(target_user.localpart) + await self.user_directory_handler.handle_local_profile_change( + target_user.to_string(), profile + ) await self._update_join_states(requester, target_user) diff --git a/synapse/handlers/register.py b/synapse/handlers/register.py index efb7d2676..1c195c65d 100644 --- a/synapse/handlers/register.py +++ b/synapse/handlers/register.py @@ -295,11 +295,10 @@ async def register_user( shadow_banned=shadow_banned, ) - if self.hs.config.user_directory_search_all_users: - profile = await self.store.get_profileinfo(localpart) - await self.user_directory_handler.handle_local_profile_change( - user_id, profile - ) + profile = await self.store.get_profileinfo(localpart) + await self.user_directory_handler.handle_local_profile_change( + user_id, profile + ) else: # autogen a sequential user ID diff --git a/synapse/storage/databases/main/user_directory.py b/synapse/storage/databases/main/user_directory.py index 8aebdc281..718f3e997 100644 --- a/synapse/storage/databases/main/user_directory.py +++ b/synapse/storage/databases/main/user_directory.py @@ -85,19 +85,17 @@ def _make_staging_area(txn): self.db_pool.simple_insert_many_txn(txn, TEMP_TABLE + "_rooms", rooms) del rooms - # If search all users is on, get all the users we want to add. - if self.hs.config.user_directory_search_all_users: - sql = ( - "CREATE TABLE IF NOT EXISTS " - + TEMP_TABLE - + "_users(user_id TEXT NOT NULL)" - ) - txn.execute(sql) + sql = ( + "CREATE TABLE IF NOT EXISTS " + + TEMP_TABLE + + "_users(user_id TEXT NOT NULL)" + ) + txn.execute(sql) - txn.execute("SELECT name FROM users") - users = [{"user_id": x[0]} for x in txn.fetchall()] + txn.execute("SELECT name FROM users") + users = [{"user_id": x[0]} for x in txn.fetchall()] - self.db_pool.simple_insert_many_txn(txn, TEMP_TABLE + "_users", users) + self.db_pool.simple_insert_many_txn(txn, TEMP_TABLE + "_users", users) new_pos = await self.get_max_stream_id_in_current_state_deltas() await self.db_pool.runInteraction( @@ -265,13 +263,8 @@ def _get_next_batch(txn): async def _populate_user_directory_process_users(self, progress, batch_size): """ - If search_all_users is enabled, add all of the users to the user directory. + Add all local users to the user directory. """ - if not self.hs.config.user_directory_search_all_users: - await self.db_pool.updates._end_background_update( - "populate_user_directory_process_users" - ) - return 1 def _get_next_batch(txn): sql = "SELECT user_id FROM %s LIMIT %s" % ( diff --git a/tests/handlers/test_profile.py b/tests/handlers/test_profile.py index 2928c4f48..57cc3e264 100644 --- a/tests/handlers/test_profile.py +++ b/tests/handlers/test_profile.py @@ -16,6 +16,7 @@ import synapse.types from synapse.api.errors import AuthError, SynapseError +from synapse.rest import admin from synapse.types import UserID from tests import unittest @@ -25,6 +26,8 @@ class ProfileTestCase(unittest.HomeserverTestCase): """Tests profile management.""" + servlets = [admin.register_servlets] + def make_homeserver(self, reactor, clock): self.mock_federation = Mock() self.mock_registry = Mock() @@ -46,11 +49,11 @@ def register_query_handler(query_type, handler): def prepare(self, reactor, clock, hs): self.store = hs.get_datastore() - self.frank = UserID.from_string("@1234ABCD:test") + self.frank = UserID.from_string("@1234abcd:test") self.bob = UserID.from_string("@4567:test") self.alice = UserID.from_string("@alice:remote") - self.get_success(self.store.create_profile(self.frank.localpart)) + self.get_success(self.register_user(self.frank.localpart, "frankpassword")) self.handler = hs.get_profile_handler() diff --git a/tests/rest/client/test_rooms.py b/tests/rest/client/test_rooms.py index 5a01765f4..ef847f0f5 100644 --- a/tests/rest/client/test_rooms.py +++ b/tests/rest/client/test_rooms.py @@ -869,6 +869,12 @@ class RoomJoinRatelimitTestCase(RoomBase): room.register_servlets, ] + def prepare(self, reactor, clock, homeserver): + super().prepare(reactor, clock, homeserver) + # profile changes expect that the user is actually registered + user = UserID.from_string(self.user_id) + self.get_success(self.register_user(user.localpart, "supersecretpassword")) + @unittest.override_config( {"rc_joins": {"local": {"per_second": 0.5, "burst_count": 3}}} ) @@ -898,12 +904,6 @@ def test_join_local_ratelimit_profile_change(self): # join in a second. room_ids.append(self.helper.create_room_as(self.user_id)) - # Create a profile for the user, since it hasn't been done on registration. - store = self.hs.get_datastore() - self.get_success( - store.create_profile(UserID.from_string(self.user_id).localpart) - ) - # Update the display name for the user. path = "/_matrix/client/r0/profile/%s/displayname" % self.user_id channel = self.make_request("PUT", path, {"displayname": "John Doe"}) From ee557b5375e376e5664f6a3e372c946f7a754f75 Mon Sep 17 00:00:00 2001 From: Eric Eastwood <erice@element.io> Date: Tue, 21 Sep 2021 08:10:01 -0500 Subject: [PATCH 17/74] Rename `/batch_send` query parameter from `?prev_event` to more obvious usage with `?prev_event_id` (MSC2716) (#10839) As mentioned in https://github.com/matrix-org/matrix-doc/pull/2716#discussion_r705872887 and https://github.com/matrix-org/synapse/issues/10737 --- changelog.d/10839.misc | 1 + synapse/rest/client/room_batch.py | 16 +++++++++------- 2 files changed, 10 insertions(+), 7 deletions(-) create mode 100644 changelog.d/10839.misc diff --git a/changelog.d/10839.misc b/changelog.d/10839.misc new file mode 100644 index 000000000..d0e10f31d --- /dev/null +++ b/changelog.d/10839.misc @@ -0,0 +1 @@ +Rename [MSC2716](https://github.com/matrix-org/matrix-doc/pull/2716) `/batch_send` query parameter from `?prev_event` to more obvious usage with `?prev_event_id`. diff --git a/synapse/rest/client/room_batch.py b/synapse/rest/client/room_batch.py index d466edeec..f73ccc7f6 100644 --- a/synapse/rest/client/room_batch.py +++ b/synapse/rest/client/room_batch.py @@ -61,7 +61,7 @@ class RoomBatchSendEventRestServlet(RestServlet): some messages, you can only insert older ones after that. tldr; Insert chunks from your most recent history -> oldest history. - POST /_matrix/client/unstable/org.matrix.msc2716/rooms/<roomID>/batch_send?prev_event=<eventID>&chunk_id=<chunkID> + POST /_matrix/client/unstable/org.matrix.msc2716/rooms/<roomID>/batch_send?prev_event_id=<eventID>&chunk_id=<chunkID> { "events": [ ... ], "state_events_at_start": [ ... ] @@ -188,24 +188,26 @@ async def on_POST( assert_params_in_dict(body, ["state_events_at_start", "events"]) assert request.args is not None - prev_events_from_query = parse_strings_from_args(request.args, "prev_event") + prev_event_ids_from_query = parse_strings_from_args( + request.args, "prev_event_id" + ) chunk_id_from_query = parse_string(request, "chunk_id") - if prev_events_from_query is None: + if prev_event_ids_from_query is None: raise SynapseError( HTTPStatus.BAD_REQUEST, "prev_event query parameter is required when inserting historical messages back in time", errcode=Codes.MISSING_PARAM, ) - # For the event we are inserting next to (`prev_events_from_query`), + # For the event we are inserting next to (`prev_event_ids_from_query`), # find the most recent auth events (derived from state events) that # allowed that message to be sent. We will use that as a base # to auth our historical messages against. ( most_recent_prev_event_id, _, - ) = await self.store.get_max_depth_of(prev_events_from_query) + ) = await self.store.get_max_depth_of(prev_event_ids_from_query) # mapping from (type, state_key) -> state_event_id prev_state_map = await self.state_store.get_state_ids_for_event( most_recent_prev_event_id @@ -286,7 +288,7 @@ async def on_POST( events_to_create = body["events"] inherited_depth = await self._inherit_depth_from_prev_ids( - prev_events_from_query + prev_event_ids_from_query ) # Figure out which chunk to connect to. If they passed in @@ -321,7 +323,7 @@ async def on_POST( # an insertion event), in which case we just create a new insertion event # that can then get pointed to by a "marker" event later. else: - prev_event_ids = prev_events_from_query + prev_event_ids = prev_event_ids_from_query base_insertion_event_dict = self._create_insertion_event_dict( sender=requester.user.to_string(), From 5fca3c8ae62c66a1777bcb85c98679669369b061 Mon Sep 17 00:00:00 2001 From: Hillery Shay <shaysquared@gmail.com> Date: Tue, 21 Sep 2021 08:04:35 -0700 Subject: [PATCH 18/74] Allow Synapse Admin API's Room Search to accept non-ASCII characters (#10859) * add tests for checking if room search works with non-ascii char * change encoding on parse_string to UTF-8 * lints * properly encode search term * lints * add changelog file * update changelog number * set changelog entry filetype to .bugfix * Revert "set changelog entry filetype to .bugfix" This reverts commit be8e5a314251438ec4ec7dbc59ba32162c93e550. * update changelog message and file type * change parse_string default encoding back to ascii and update room search admin api calll to parse string * refactor tests * Update tests/rest/admin/test_room.py Co-authored-by: Patrick Cloke <clokep@users.noreply.github.com> Co-authored-by: Patrick Cloke <clokep@users.noreply.github.com> --- changelog.d/10859.bugfix | 1 + synapse/rest/admin/rooms.py | 2 +- tests/rest/admin/test_room.py | 27 +++++++++++++++++++++++++++ 3 files changed, 29 insertions(+), 1 deletion(-) create mode 100644 changelog.d/10859.bugfix diff --git a/changelog.d/10859.bugfix b/changelog.d/10859.bugfix new file mode 100644 index 000000000..c1bfe22d5 --- /dev/null +++ b/changelog.d/10859.bugfix @@ -0,0 +1 @@ +Fix a bug in Unicode support of the room search admin API. It is now possible to search for rooms with non-ASCII characters. \ No newline at end of file diff --git a/synapse/rest/admin/rooms.py b/synapse/rest/admin/rooms.py index ad83d4b54..8f781f745 100644 --- a/synapse/rest/admin/rooms.py +++ b/synapse/rest/admin/rooms.py @@ -125,7 +125,7 @@ async def on_GET(self, request: SynapseRequest) -> Tuple[int, JsonDict]: errcode=Codes.INVALID_PARAM, ) - search_term = parse_string(request, "search_term") + search_term = parse_string(request, "search_term", encoding="utf-8") if search_term == "": raise SynapseError( 400, diff --git a/tests/rest/admin/test_room.py b/tests/rest/admin/test_room.py index 40e032df7..e798513ac 100644 --- a/tests/rest/admin/test_room.py +++ b/tests/rest/admin/test_room.py @@ -941,6 +941,33 @@ def _search_test( _search_test(None, "bar") _search_test(None, "", expected_http_code=400) + def test_search_term_non_ascii(self): + """Test that searching for a room with non-ASCII characters works correctly""" + + # Create test room + room_id = self.helper.create_room_as(self.admin_user, tok=self.admin_user_tok) + room_name = "ж" + + # Set the name for the room + self.helper.send_state( + room_id, + "m.room.name", + {"name": room_name}, + tok=self.admin_user_tok, + ) + + # make the request and test that the response is what we wanted + search_term = urllib.parse.quote("ж", "utf-8") + url = "/_synapse/admin/v1/rooms?search_term=%s" % (search_term,) + channel = self.make_request( + "GET", + url.encode("ascii"), + access_token=self.admin_user_tok, + ) + self.assertEqual(200, channel.code, msg=channel.json_body) + self.assertEqual(room_id, channel.json_body.get("rooms")[0].get("room_id")) + self.assertEqual("ж", channel.json_body.get("rooms")[0].get("name")) + def test_single_room(self): """Test that a single room can be requested correctly""" # Create two test rooms From 2843058a8b5456dd63b83ad39a992d5d1a285eb6 Mon Sep 17 00:00:00 2001 From: Brendan Abolivier <babolivier@matrix.org> Date: Tue, 21 Sep 2021 17:40:20 +0200 Subject: [PATCH 19/74] Test that state events sent by modules correctly end up in the room's state (#10835) Test for #10830 Ideally the test would also make sure the new state event comes down sync, but this is probably good enough. --- changelog.d/10835.misc | 1 + tests/rest/client/test_third_party_rules.py | 84 +++++++++++++++++++++ 2 files changed, 85 insertions(+) create mode 100644 changelog.d/10835.misc diff --git a/changelog.d/10835.misc b/changelog.d/10835.misc new file mode 100644 index 000000000..0c3d13477 --- /dev/null +++ b/changelog.d/10835.misc @@ -0,0 +1 @@ +Add a test to ensure state events sent by modules get persisted correctly. diff --git a/tests/rest/client/test_third_party_rules.py b/tests/rest/client/test_third_party_rules.py index 0ae402964..38ac9be11 100644 --- a/tests/rest/client/test_third_party_rules.py +++ b/tests/rest/client/test_third_party_rules.py @@ -15,6 +15,7 @@ from typing import Dict from unittest.mock import Mock +from synapse.api.constants import EventTypes from synapse.events import EventBase from synapse.events.third_party_rules import load_legacy_third_party_event_rules from synapse.module_api import ModuleApi @@ -327,3 +328,86 @@ def test_legacy_on_create_room(self): correctly. """ self.helper.create_room_as(self.user_id, tok=self.tok, expect_code=403) + + def test_sent_event_end_up_in_room_state(self): + """Tests that a state event sent by a module while processing another state event + doesn't get dropped from the state of the room. This is to guard against a bug + where Synapse has been observed doing so, see https://github.com/matrix-org/synapse/issues/10830 + """ + event_type = "org.matrix.test_state" + + # This content will be updated later on, and since we actually use a reference on + # the dict it does the right thing. It's a bit hacky but a handy way of making + # sure the state actually gets updated. + event_content = {"i": -1} + + api = self.hs.get_module_api() + + # Define a callback that sends a custom event on power levels update. + async def test_fn(event: EventBase, state_events): + if event.is_state and event.type == EventTypes.PowerLevels: + await api.create_and_send_event_into_room( + { + "room_id": event.room_id, + "sender": event.sender, + "type": event_type, + "content": event_content, + "state_key": "", + } + ) + return True, None + + self.hs.get_third_party_event_rules()._check_event_allowed_callbacks = [test_fn] + + # Sometimes the bug might not happen the first time the event type is added + # to the state but might happen when an event updates the state of the room for + # that type, so we test updating the state several times. + for i in range(5): + # Update the content of the custom state event to be sent by the callback. + event_content["i"] = i + + # Update the room's power levels with a different value each time so Synapse + # doesn't consider an update redundant. + self._update_power_levels(event_default=i) + + # Check that the new event made it to the room's state. + channel = self.make_request( + method="GET", + path="/rooms/" + self.room_id + "/state/" + event_type, + access_token=self.tok, + ) + + self.assertEqual(channel.code, 200) + self.assertEqual(channel.json_body["i"], i) + + def _update_power_levels(self, event_default: int = 0): + """Updates the room's power levels. + + Args: + event_default: Value to use for 'events_default'. + """ + self.helper.send_state( + room_id=self.room_id, + event_type=EventTypes.PowerLevels, + body={ + "ban": 50, + "events": { + "m.room.avatar": 50, + "m.room.canonical_alias": 50, + "m.room.encryption": 100, + "m.room.history_visibility": 100, + "m.room.name": 50, + "m.room.power_levels": 100, + "m.room.server_acl": 100, + "m.room.tombstone": 100, + }, + "events_default": event_default, + "invite": 0, + "kick": 50, + "redact": 50, + "state_default": 50, + "users": {self.user_id: 100}, + "users_default": 0, + }, + tok=self.tok, + ) From ba7a91aea5fd624bf048f0fda0dca80da7a1945e Mon Sep 17 00:00:00 2001 From: Patrick Cloke <clokep@users.noreply.github.com> Date: Tue, 21 Sep 2021 12:09:57 -0400 Subject: [PATCH 20/74] Refactor oEmbed previews (#10814) The major change is moving the decision of whether to use oEmbed further up the call-stack. This reverts the _download_url method to being a "dumb" functionwhich takes a single URL and downloads it (as it was before #7920). This also makes more minor refactorings: * Renames internal variables for clarity. * Factors out shared code between the HTML and rich oEmbed previews. * Fixes tests to preview an oEmbed image. --- changelog.d/10814.feature | 1 + docs/development/url_previews.md | 21 +- synapse/rest/media/v1/oembed.py | 145 +++++--- synapse/rest/media/v1/preview_url_resource.py | 326 ++++++++++-------- tests/rest/media/v1/test_url_preview.py | 26 +- 5 files changed, 299 insertions(+), 220 deletions(-) create mode 100644 changelog.d/10814.feature diff --git a/changelog.d/10814.feature b/changelog.d/10814.feature new file mode 100644 index 000000000..4fa95a6cc --- /dev/null +++ b/changelog.d/10814.feature @@ -0,0 +1 @@ +Improve oEmbed previews by processing the author name, photo, and video information. diff --git a/docs/development/url_previews.md b/docs/development/url_previews.md index bbe05e281..aff381360 100644 --- a/docs/development/url_previews.md +++ b/docs/development/url_previews.md @@ -25,16 +25,14 @@ When Synapse is asked to preview a URL it does the following: 3. Kicks off a background process to generate a preview: 1. Checks the database cache by URL and timestamp and returns the result if it has not expired and was successful (a 2xx return code). - 2. Checks if the URL matches an oEmbed pattern. If it does, fetch the oEmbed - response. If this is an image, replace the URL to fetch and continue. If - if it is HTML content, use the HTML as the document and continue. - 3. If it doesn't match an oEmbed pattern, downloads the URL and stores it - into a file via the media storage provider and saves the local media - metadata. - 5. If the media is an image: + 2. Checks if the URL matches an [oEmbed](https://oembed.com/) pattern. If it + does, update the URL to download. + 3. Downloads the URL and stores it into a file via the media storage provider + and saves the local media metadata. + 4. If the media is an image: 1. Generates thumbnails. 2. Generates an Open Graph response based on image properties. - 6. If the media is HTML: + 5. If the media is HTML: 1. Decodes the HTML via the stored file. 2. Generates an Open Graph response from the HTML. 3. If an image exists in the Open Graph response: @@ -42,6 +40,13 @@ When Synapse is asked to preview a URL it does the following: provider and saves the local media metadata. 2. Generates thumbnails. 3. Updates the Open Graph response based on image properties. + 6. If the media is JSON and an oEmbed URL was found: + 1. Convert the oEmbed response to an Open Graph response. + 2. If a thumbnail or image is in the oEmbed response: + 1. Downloads the URL and stores it into a file via the media storage + provider and saves the local media metadata. + 2. Generates thumbnails. + 3. Updates the Open Graph response based on image properties. 7. Stores the result in the database cache. 4. Returns the result. diff --git a/synapse/rest/media/v1/oembed.py b/synapse/rest/media/v1/oembed.py index 2e6706dbf..8b74e7265 100644 --- a/synapse/rest/media/v1/oembed.py +++ b/synapse/rest/media/v1/oembed.py @@ -12,11 +12,14 @@ # See the License for the specific language governing permissions and # limitations under the License. import logging +import urllib.parse from typing import TYPE_CHECKING, Optional import attr from synapse.http.client import SimpleHttpClient +from synapse.types import JsonDict +from synapse.util import json_decoder if TYPE_CHECKING: from synapse.server import HomeServer @@ -24,18 +27,15 @@ logger = logging.getLogger(__name__) -@attr.s(slots=True, auto_attribs=True) +@attr.s(slots=True, frozen=True, auto_attribs=True) class OEmbedResult: - # Either HTML content or URL must be provided. - html: Optional[str] - url: Optional[str] - title: Optional[str] - # Number of seconds to cache the content. - cache_age: int - - -class OEmbedError(Exception): - """An error occurred processing the oEmbed object.""" + # The Open Graph result (converted from the oEmbed result). + open_graph_result: JsonDict + # Number of seconds to cache the content, according to the oEmbed response. + # + # This will be None if no cache-age is provided in the oEmbed response (or + # if the oEmbed response cannot be turned into an Open Graph response). + cache_age: Optional[int] class OEmbedProvider: @@ -81,75 +81,106 @@ def get_oembed_url(self, url: str) -> Optional[str]: """ for url_pattern, endpoint in self._oembed_patterns.items(): if url_pattern.fullmatch(url): - return endpoint + # TODO Specify max height / width. + + # Note that only the JSON format is supported, some endpoints want + # this in the URL, others want it as an argument. + endpoint = endpoint.replace("{format}", "json") + + args = {"url": url, "format": "json"} + query_str = urllib.parse.urlencode(args, True) + return f"{endpoint}?{query_str}" # No match. return None - async def get_oembed_content(self, endpoint: str, url: str) -> OEmbedResult: + def parse_oembed_response(self, url: str, raw_body: bytes) -> OEmbedResult: """ - Request content from an oEmbed endpoint. + Parse the oEmbed response into an Open Graph response. Args: - endpoint: The oEmbed API endpoint. - url: The URL to pass to the API. + url: The URL which is being previewed (not the one which was + requested). + raw_body: The oEmbed response as JSON encoded as bytes. Returns: - An object representing the metadata returned. - - Raises: - OEmbedError if fetching or parsing of the oEmbed information fails. + json-encoded Open Graph data """ - try: - logger.debug("Trying to get oEmbed content for url '%s'", url) - # Note that only the JSON format is supported, some endpoints want - # this in the URL, others want it as an argument. - endpoint = endpoint.replace("{format}", "json") - - result = await self._client.get_json( - endpoint, - # TODO Specify max height / width. - args={"url": url, "format": "json"}, - ) + try: + # oEmbed responses *must* be UTF-8 according to the spec. + oembed = json_decoder.decode(raw_body.decode("utf-8")) # Ensure there's a version of 1.0. - if result.get("version") != "1.0": - raise OEmbedError("Invalid version: %s" % (result.get("version"),)) - - oembed_type = result.get("type") + oembed_version = oembed["version"] + if oembed_version != "1.0": + raise RuntimeError(f"Invalid version: {oembed_version}") # Ensure the cache age is None or an int. - cache_age = result.get("cache_age") + cache_age = oembed.get("cache_age") if cache_age: cache_age = int(cache_age) - oembed_result = OEmbedResult(None, None, result.get("title"), cache_age) + # The results. + open_graph_response = {"og:title": oembed.get("title")} - # HTML content. + # If a thumbnail exists, use it. Note that dimensions will be calculated later. + if "thumbnail_url" in oembed: + open_graph_response["og:image"] = oembed["thumbnail_url"] + + # Process each type separately. + oembed_type = oembed["type"] if oembed_type == "rich": - oembed_result.html = result.get("html") - return oembed_result + calc_description_and_urls(open_graph_response, oembed["html"]) - if oembed_type == "photo": - oembed_result.url = result.get("url") - return oembed_result + elif oembed_type == "photo": + # If this is a photo, use the full image, not the thumbnail. + open_graph_response["og:image"] = oembed["url"] - # TODO Handle link and video types. + else: + raise RuntimeError(f"Unknown oEmbed type: {oembed_type}") - if "thumbnail_url" in result: - oembed_result.url = result.get("thumbnail_url") - return oembed_result + except Exception as e: + # Trap any exception and let the code follow as usual. + logger.warning(f"Error parsing oEmbed metadata from {url}: {e:r}") + open_graph_response = {} + cache_age = None - raise OEmbedError("Incompatible oEmbed information.") + return OEmbedResult(open_graph_response, cache_age) - except OEmbedError as e: - # Trap OEmbedErrors first so we can directly re-raise them. - logger.warning("Error parsing oEmbed metadata from %s: %r", url, e) - raise - except Exception as e: - # Trap any exception and let the code follow as usual. - # FIXME: pass through 404s and other error messages nicely - logger.warning("Error downloading oEmbed metadata from %s: %r", url, e) - raise OEmbedError() from e +def calc_description_and_urls(open_graph_response: JsonDict, html_body: str) -> None: + """ + Calculate description for an HTML document. + + This uses lxml to convert the HTML document into plaintext. If errors + occur during processing of the document, an empty response is returned. + + Args: + open_graph_response: The current Open Graph summary. This is updated with additional fields. + html_body: The HTML document, as bytes. + + Returns: + The summary + """ + # If there's no body, nothing useful is going to be found. + if not html_body: + return + + from lxml import etree + + # Create an HTML parser. If this fails, log and return no metadata. + parser = etree.HTMLParser(recover=True, encoding="utf-8") + + # Attempt to parse the body. If this fails, log and return no metadata. + tree = etree.fromstring(html_body, parser) + + # The data was successfully parsed, but no tree was found. + if tree is None: + return + + from synapse.rest.media.v1.preview_url_resource import _calc_description + + description = _calc_description(tree) + if description: + open_graph_response["og:description"] = description diff --git a/synapse/rest/media/v1/preview_url_resource.py b/synapse/rest/media/v1/preview_url_resource.py index fe0627d9b..0a0b476d2 100644 --- a/synapse/rest/media/v1/preview_url_resource.py +++ b/synapse/rest/media/v1/preview_url_resource.py @@ -44,7 +44,7 @@ from synapse.metrics.background_process_metrics import run_as_background_process from synapse.rest.media.v1._base import get_filename_from_headers from synapse.rest.media.v1.media_storage import MediaStorage -from synapse.rest.media.v1.oembed import OEmbedError, OEmbedProvider +from synapse.rest.media.v1.oembed import OEmbedProvider from synapse.types import JsonDict from synapse.util import json_encoder from synapse.util.async_helpers import ObservableDeferred @@ -73,6 +73,7 @@ OG_TAG_VALUE_MAXLEN = 1000 ONE_HOUR = 60 * 60 * 1000 +ONE_DAY = 24 * ONE_HOUR @attr.s(slots=True, frozen=True, auto_attribs=True) @@ -255,10 +256,19 @@ async def _do_preview(self, url: str, user: str, ts: int) -> bytes: og = og.encode("utf8") return og - media_info = await self._download_url(url, user) + # If this URL can be accessed via oEmbed, use that instead. + url_to_download = url + oembed_url = self._oembed.get_oembed_url(url) + if oembed_url: + url_to_download = oembed_url + + media_info = await self._download_url(url_to_download, user) logger.debug("got media_info of '%s'", media_info) + # The number of milliseconds that the response should be considered valid. + expiration_ms = media_info.expires + if _is_media(media_info.media_type): file_id = media_info.filesystem_id dims = await self.media_repo._generate_thumbnails( @@ -288,34 +298,22 @@ async def _do_preview(self, url: str, user: str, ts: int) -> bytes: encoding = get_html_media_encoding(body, media_info.media_type) og = decode_and_calc_og(body, media_info.uri, encoding) - # pre-cache the image for posterity - # FIXME: it might be cleaner to use the same flow as the main /preview_url - # request itself and benefit from the same caching etc. But for now we - # just rely on the caching on the master request to speed things up. - if "og:image" in og and og["og:image"]: - image_info = await self._download_url( - _rebase_url(og["og:image"], media_info.uri), user - ) + await self._precache_image_url(user, media_info, og) + + elif oembed_url and _is_json(media_info.media_type): + # Handle an oEmbed response. + with open(media_info.filename, "rb") as file: + body = file.read() + + oembed_response = self._oembed.parse_oembed_response(media_info.uri, body) + og = oembed_response.open_graph_result + + # Use the cache age from the oEmbed result, instead of the HTTP response. + if oembed_response.cache_age is not None: + expiration_ms = oembed_response.cache_age + + await self._precache_image_url(user, media_info, og) - if _is_media(image_info.media_type): - # TODO: make sure we don't choke on white-on-transparent images - file_id = image_info.filesystem_id - dims = await self.media_repo._generate_thumbnails( - None, file_id, file_id, image_info.media_type, url_cache=True - ) - if dims: - og["og:image:width"] = dims["width"] - og["og:image:height"] = dims["height"] - else: - logger.warning("Couldn't get dims for %s", og["og:image"]) - - og[ - "og:image" - ] = f"mxc://{self.server_name}/{image_info.filesystem_id}" - og["og:image:type"] = image_info.media_type - og["matrix:image:size"] = image_info.media_length - else: - del og["og:image"] else: logger.warning("Failed to find any OG data in %s", url) og = {} @@ -336,12 +334,15 @@ async def _do_preview(self, url: str, user: str, ts: int) -> bytes: jsonog = json_encoder.encode(og) + # Cap the amount of time to consider a response valid. + expiration_ms = min(expiration_ms, ONE_DAY) + # store OG in history-aware DB cache await self.store.store_url_cache( url, media_info.response_code, media_info.etag, - media_info.expires + media_info.created_ts_ms, + media_info.created_ts_ms + expiration_ms, jsonog, media_info.filesystem_id, media_info.created_ts_ms, @@ -358,88 +359,52 @@ async def _download_url(self, url: str, user: str) -> MediaInfo: file_info = FileInfo(server_name=None, file_id=file_id, url_cache=True) - # If this URL can be accessed via oEmbed, use that instead. - url_to_download: Optional[str] = url - oembed_url = self._oembed.get_oembed_url(url) - if oembed_url: - # The result might be a new URL to download, or it might be HTML content. + with self.media_storage.store_into_file(file_info) as (f, fname, finish): try: - oembed_result = await self._oembed.get_oembed_content(oembed_url, url) - if oembed_result.url: - url_to_download = oembed_result.url - elif oembed_result.html: - url_to_download = None - except OEmbedError: - # If an error occurs, try doing a normal preview. - pass + logger.debug("Trying to get preview for url '%s'", url) + length, headers, uri, code = await self.client.get_file( + url, + output_stream=f, + max_size=self.max_spider_size, + headers={"Accept-Language": self.url_preview_accept_language}, + ) + except SynapseError: + # Pass SynapseErrors through directly, so that the servlet + # handler will return a SynapseError to the client instead of + # blank data or a 500. + raise + except DNSLookupError: + # DNS lookup returned no results + # Note: This will also be the case if one of the resolved IP + # addresses is blacklisted + raise SynapseError( + 502, + "DNS resolution failure during URL preview generation", + Codes.UNKNOWN, + ) + except Exception as e: + # FIXME: pass through 404s and other error messages nicely + logger.warning("Error downloading %s: %r", url, e) - if url_to_download: - with self.media_storage.store_into_file(file_info) as (f, fname, finish): - try: - logger.debug("Trying to get preview for url '%s'", url_to_download) - length, headers, uri, code = await self.client.get_file( - url_to_download, - output_stream=f, - max_size=self.max_spider_size, - headers={"Accept-Language": self.url_preview_accept_language}, - ) - except SynapseError: - # Pass SynapseErrors through directly, so that the servlet - # handler will return a SynapseError to the client instead of - # blank data or a 500. - raise - except DNSLookupError: - # DNS lookup returned no results - # Note: This will also be the case if one of the resolved IP - # addresses is blacklisted - raise SynapseError( - 502, - "DNS resolution failure during URL preview generation", - Codes.UNKNOWN, - ) - except Exception as e: - # FIXME: pass through 404s and other error messages nicely - logger.warning("Error downloading %s: %r", url_to_download, e) - - raise SynapseError( - 500, - "Failed to download content: %s" - % (traceback.format_exception_only(sys.exc_info()[0], e),), - Codes.UNKNOWN, - ) - await finish() - - if b"Content-Type" in headers: - media_type = headers[b"Content-Type"][0].decode("ascii") - else: - media_type = "application/octet-stream" + raise SynapseError( + 500, + "Failed to download content: %s" + % (traceback.format_exception_only(sys.exc_info()[0], e),), + Codes.UNKNOWN, + ) + await finish() - download_name = get_filename_from_headers(headers) + if b"Content-Type" in headers: + media_type = headers[b"Content-Type"][0].decode("ascii") + else: + media_type = "application/octet-stream" - # FIXME: we should calculate a proper expiration based on the - # Cache-Control and Expire headers. But for now, assume 1 hour. - expires = ONE_HOUR - etag = ( - headers[b"ETag"][0].decode("ascii") if b"ETag" in headers else None - ) - else: - # we can only get here if we did an oembed request and have an oembed_result.html - assert oembed_result.html is not None - assert oembed_url is not None - - html_bytes = oembed_result.html.encode("utf-8") - with self.media_storage.store_into_file(file_info) as (f, fname, finish): - f.write(html_bytes) - await finish() - - media_type = "text/html" - download_name = oembed_result.title - length = len(html_bytes) - # If a specific cache age was not given, assume 1 hour. - expires = oembed_result.cache_age or ONE_HOUR - uri = oembed_url - code = 200 - etag = None + download_name = get_filename_from_headers(headers) + + # FIXME: we should calculate a proper expiration based on the + # Cache-Control and Expire headers. But for now, assume 1 hour. + expires = ONE_HOUR + etag = headers[b"ETag"][0].decode("ascii") if b"ETag" in headers else None try: time_now_ms = self.clock.time_msec() @@ -474,6 +439,46 @@ async def _download_url(self, url: str, user: str) -> MediaInfo: etag=etag, ) + async def _precache_image_url( + self, user: str, media_info: MediaInfo, og: JsonDict + ) -> None: + """ + Pre-cache the image (if one exists) for posterity + + Args: + user: The user requesting the preview. + media_info: The media being previewed. + og: The Open Graph dictionary. This is modified with image information. + """ + # If there's no image or it is blank, there's nothing to do. + if "og:image" not in og or not og["og:image"]: + return + + # FIXME: it might be cleaner to use the same flow as the main /preview_url + # request itself and benefit from the same caching etc. But for now we + # just rely on the caching on the master request to speed things up. + image_info = await self._download_url( + _rebase_url(og["og:image"], media_info.uri), user + ) + + if _is_media(image_info.media_type): + # TODO: make sure we don't choke on white-on-transparent images + file_id = image_info.filesystem_id + dims = await self.media_repo._generate_thumbnails( + None, file_id, file_id, image_info.media_type, url_cache=True + ) + if dims: + og["og:image:width"] = dims["width"] + og["og:image:height"] = dims["height"] + else: + logger.warning("Couldn't get dims for %s", og["og:image"]) + + og["og:image"] = f"mxc://{self.server_name}/{image_info.filesystem_id}" + og["og:image:type"] = image_info.media_type + og["matrix:image:size"] = image_info.media_length + else: + del og["og:image"] + def _start_expire_url_cache_data(self) -> Deferred: return run_as_background_process( "expire_url_cache_data", self._expire_url_cache_data @@ -527,7 +532,7 @@ async def _expire_url_cache_data(self) -> None: # These may be cached for a bit on the client (i.e., they # may have a room open with a preview url thing open). # So we wait a couple of days before deleting, just in case. - expire_before = now - 2 * 24 * ONE_HOUR + expire_before = now - 2 * ONE_DAY media_ids = await self.store.get_url_cache_media_before(expire_before) removed_media = [] @@ -669,7 +674,18 @@ def _attempt_calc_og(body_attempt: Union[bytes, str]) -> Dict[str, Optional[str] def _calc_og(tree: "etree.Element", media_uri: str) -> Dict[str, Optional[str]]: - # suck our tree into lxml and define our OG response. + """ + Calculate metadata for an HTML document. + + This uses lxml to search the HTML document for Open Graph data. + + Args: + tree: The parsed HTML document. + media_url: The URI used to download the body. + + Returns: + The Open Graph response as a dictionary. + """ # if we see any image URLs in the OG response, then spider them # (although the client could choose to do this by asking for previews of those @@ -743,35 +759,7 @@ def _calc_og(tree: "etree.Element", media_uri: str) -> Dict[str, Optional[str]]: if meta_description: og["og:description"] = meta_description[0] else: - # grab any text nodes which are inside the <body/> tag... - # unless they are within an HTML5 semantic markup tag... - # <header/>, <nav/>, <aside/>, <footer/> - # ...or if they are within a <script/> or <style/> tag. - # This is a very very very coarse approximation to a plain text - # render of the page. - - # We don't just use XPATH here as that is slow on some machines. - - from lxml import etree - - TAGS_TO_REMOVE = ( - "header", - "nav", - "aside", - "footer", - "script", - "noscript", - "style", - etree.Comment, - ) - - # Split all the text nodes into paragraphs (by splitting on new - # lines) - text_nodes = ( - re.sub(r"\s+", "\n", el).strip() - for el in _iterate_over_text(tree.find("body"), *TAGS_TO_REMOVE) - ) - og["og:description"] = summarize_paragraphs(text_nodes) + og["og:description"] = _calc_description(tree) elif og["og:description"]: # This must be a non-empty string at this point. assert isinstance(og["og:description"], str) @@ -782,6 +770,46 @@ def _calc_og(tree: "etree.Element", media_uri: str) -> Dict[str, Optional[str]]: return og +def _calc_description(tree: "etree.Element") -> Optional[str]: + """ + Calculate a text description based on an HTML document. + + Grabs any text nodes which are inside the <body/> tag, unless they are within + an HTML5 semantic markup tag (<header/>, <nav/>, <aside/>, <footer/>), or + if they are within a <script/> or <style/> tag. + + This is a very very very coarse approximation to a plain text render of the page. + + Args: + tree: The parsed HTML document. + + Returns: + The plain text description, or None if one cannot be generated. + """ + # We don't just use XPATH here as that is slow on some machines. + + from lxml import etree + + TAGS_TO_REMOVE = ( + "header", + "nav", + "aside", + "footer", + "script", + "noscript", + "style", + etree.Comment, + ) + + # Split all the text nodes into paragraphs (by splitting on new + # lines) + text_nodes = ( + re.sub(r"\s+", "\n", el).strip() + for el in _iterate_over_text(tree.find("body"), *TAGS_TO_REMOVE) + ) + return summarize_paragraphs(text_nodes) + + def _iterate_over_text( tree: "etree.Element", *tags_to_ignore: Iterable[Union[str, "etree.Comment"]] ) -> Generator[str, None, None]: @@ -841,11 +869,25 @@ def _is_html(content_type: str) -> bool: ) +def _is_json(content_type: str) -> bool: + return content_type.lower().startswith("application/json") + + def summarize_paragraphs( text_nodes: Iterable[str], min_size: int = 200, max_size: int = 500 ) -> Optional[str]: - # Try to get a summary of between 200 and 500 words, respecting - # first paragraph and then word boundaries. + """ + Try to get a summary respecting first paragraph and then word boundaries. + + Args: + text_nodes: The paragraphs to summarize. + min_size: The minimum number of words to include. + max_size: The maximum number of words to include. + + Returns: + A summary of the text nodes, or None if that was not possible. + """ + # TODO: Respect sentences? description = "" @@ -868,7 +910,7 @@ def summarize_paragraphs( new_desc = "" # This splits the paragraph into words, but keeping the - # (preceeding) whitespace intact so we can easily concat + # (preceding) whitespace intact so we can easily concat # words back together. for match in re.finditer(r"\s*\S+", description): word = match.group() diff --git a/tests/rest/media/v1/test_url_preview.py b/tests/rest/media/v1/test_url_preview.py index 9f6fbfe6d..9d1389958 100644 --- a/tests/rest/media/v1/test_url_preview.py +++ b/tests/rest/media/v1/test_url_preview.py @@ -24,6 +24,7 @@ from tests import unittest from tests.server import FakeTransport +from tests.test_utils import SMALL_PNG try: import lxml @@ -576,13 +577,6 @@ def test_oembed_photo(self): } oembed_content = json.dumps(result).encode("utf-8") - end_content = ( - b"<html><head>" - b"<title>Some Title</title>" - b'<meta property="og:description" content="hi" />' - b"</head></html>" - ) - channel = self.make_request( "GET", "preview_url?url=http://twitter.com/matrixdotorg/status/12345", @@ -606,6 +600,7 @@ def test_oembed_photo(self): self.pump() + # Ensure a second request is made to the photo URL. client = self.reactor.tcpClients[1][2].buildProtocol(None) server = AccumulatingProtocol() server.makeConnection(FakeTransport(client, self.reactor)) @@ -613,18 +608,23 @@ def test_oembed_photo(self): client.dataReceived( ( b"HTTP/1.0 200 OK\r\nContent-Length: %d\r\n" - b'Content-Type: text/html; charset="utf8"\r\n\r\n' + b"Content-Type: image/png\r\n\r\n" ) - % (len(end_content),) - + end_content + % (len(SMALL_PNG),) + + SMALL_PNG ) self.pump() + # Ensure the URL is what was requested. + self.assertIn(b"/matrixdotorg", server.data) + self.assertEqual(channel.code, 200) - self.assertEqual( - channel.json_body, {"og:title": "Some Title", "og:description": "hi"} - ) + self.assertIsNone(channel.json_body["og:title"]) + self.assertTrue(channel.json_body["og:image"].startswith("mxc://")) + self.assertEqual(channel.json_body["og:image:height"], 1) + self.assertEqual(channel.json_body["og:image:width"], 1) + self.assertEqual(channel.json_body["og:image:type"], "image/png") def test_oembed_rich(self): """Test an oEmbed endpoint which returns HTML content via the 'rich' type.""" From ebd8baf61ff8e00f8de3b63c00531765672000c8 Mon Sep 17 00:00:00 2001 From: Patrick Cloke <clokep@users.noreply.github.com> Date: Tue, 21 Sep 2021 12:32:46 -0400 Subject: [PATCH 21/74] Clear our destination directories before copying files to GitHub pages. (#10869) This should fix stale deleted files being still accessible. --- .github/workflows/docs.yaml | 1 - changelog.d/10869.doc | 1 + 2 files changed, 1 insertion(+), 1 deletion(-) create mode 100644 changelog.d/10869.doc diff --git a/.github/workflows/docs.yaml b/.github/workflows/docs.yaml index 808f82533..2bf32e376 100644 --- a/.github/workflows/docs.yaml +++ b/.github/workflows/docs.yaml @@ -61,6 +61,5 @@ jobs: uses: peaceiris/actions-gh-pages@068dc23d9710f1ba62e86896f84735d869951305 # v3.8.0 with: github_token: ${{ secrets.GITHUB_TOKEN }} - keep_files: true publish_dir: ./book destination_dir: ./${{ steps.vars.outputs.branch-version }} diff --git a/changelog.d/10869.doc b/changelog.d/10869.doc new file mode 100644 index 000000000..c11738607 --- /dev/null +++ b/changelog.d/10869.doc @@ -0,0 +1 @@ +Properly remove deleted files from GitHub pages when generating the documentation. From b25a494779e7c86181c4b61f9bbb56c63ba529ed Mon Sep 17 00:00:00 2001 From: Erik Johnston <erik@matrix.org> Date: Tue, 21 Sep 2021 17:41:27 +0100 Subject: [PATCH 22/74] Add types to http.site (#10867) --- changelog.d/10867.misc | 1 + synapse/http/site.py | 40 ++++++++++++++++++++++------------------ 2 files changed, 23 insertions(+), 18 deletions(-) create mode 100644 changelog.d/10867.misc diff --git a/changelog.d/10867.misc b/changelog.d/10867.misc new file mode 100644 index 000000000..01e51fbc6 --- /dev/null +++ b/changelog.d/10867.misc @@ -0,0 +1 @@ +Add type hints to `synapse.http.site`. diff --git a/synapse/http/site.py b/synapse/http/site.py index c665a9d5d..dd4c749e1 100644 --- a/synapse/http/site.py +++ b/synapse/http/site.py @@ -21,7 +21,7 @@ from twisted.internet.interfaces import IAddress, IReactorTime from twisted.python.failure import Failure -from twisted.web.resource import IResource +from twisted.web.resource import IResource, Resource from twisted.web.server import Request, Site from synapse.config.server import ListenerConfig @@ -61,7 +61,7 @@ class SynapseRequest(Request): logcontext: the log context for this request """ - def __init__(self, channel, *args, max_request_body_size=1024, **kw): + def __init__(self, channel, *args, max_request_body_size: int = 1024, **kw): Request.__init__(self, channel, *args, **kw) self._max_request_body_size = max_request_body_size self.site: SynapseSite = channel.site @@ -83,13 +83,13 @@ def __init__(self, channel, *args, max_request_body_size=1024, **kw): self._is_processing = False # the time when the asynchronous request handler completed its processing - self._processing_finished_time = None + self._processing_finished_time: Optional[float] = None # what time we finished sending the response to the client (or the connection # dropped) - self.finish_time = None + self.finish_time: Optional[float] = None - def __repr__(self): + def __repr__(self) -> str: # We overwrite this so that we don't log ``access_token`` return "<%s at 0x%x method=%r uri=%r clientproto=%r site=%r>" % ( self.__class__.__name__, @@ -100,7 +100,7 @@ def __repr__(self): self.site.site_tag, ) - def handleContentChunk(self, data): + def handleContentChunk(self, data: bytes) -> None: # we should have a `content` by now. assert self.content, "handleContentChunk() called before gotLength()" if self.content.tell() + len(data) > self._max_request_body_size: @@ -139,7 +139,7 @@ def requester(self, value: Union[Requester, str]) -> None: # If there's no authenticated entity, it was the requester. self.logcontext.request.authenticated_entity = authenticated_entity or requester - def get_request_id(self): + def get_request_id(self) -> str: return "%s-%i" % (self.get_method(), self.request_seq) def get_redacted_uri(self) -> str: @@ -205,7 +205,7 @@ def get_authenticated_entity(self) -> Tuple[Optional[str], Optional[str]]: return None, None - def render(self, resrc): + def render(self, resrc: Resource) -> None: # this is called once a Resource has been found to serve the request; in our # case the Resource in question will normally be a JsonResource. @@ -282,7 +282,7 @@ async def handle_request(request): if self.finish_time is not None: self._finished_processing() - def finish(self): + def finish(self) -> None: """Called when all response data has been written to this Request. Overrides twisted.web.server.Request.finish to record the finish time and do @@ -295,7 +295,7 @@ def finish(self): with PreserveLoggingContext(self.logcontext): self._finished_processing() - def connectionLost(self, reason): + def connectionLost(self, reason: Union[Failure, Exception]) -> None: """Called when the client connection is closed before the response is written. Overrides twisted.web.server.Request.connectionLost to record the finish time and @@ -327,7 +327,7 @@ def connectionLost(self, reason): if not self._is_processing: self._finished_processing() - def _started_processing(self, servlet_name): + def _started_processing(self, servlet_name: str) -> None: """Record the fact that we are processing this request. This will log the request's arrival. Once the request completes, @@ -354,9 +354,11 @@ def _started_processing(self, servlet_name): self.get_redacted_uri(), ) - def _finished_processing(self): + def _finished_processing(self) -> None: """Log the completion of this request and update the metrics""" assert self.logcontext is not None + assert self.finish_time is not None + usage = self.logcontext.get_resource_usage() if self._processing_finished_time is None: @@ -437,7 +439,7 @@ class XForwardedForRequest(SynapseRequest): _forwarded_for: "Optional[_XForwardedForAddress]" = None _forwarded_https: bool = False - def requestReceived(self, command, path, version): + def requestReceived(self, command: bytes, path: bytes, version: bytes) -> None: # this method is called by the Channel once the full request has been # received, to dispatch the request to a resource. # We can use it to set the IP address and protocol according to the @@ -445,7 +447,7 @@ def requestReceived(self, command, path, version): self._process_forwarded_headers() return super().requestReceived(command, path, version) - def _process_forwarded_headers(self): + def _process_forwarded_headers(self) -> None: headers = self.requestHeaders.getRawHeaders(b"x-forwarded-for") if not headers: return @@ -470,7 +472,7 @@ def _process_forwarded_headers(self): ) self._forwarded_https = True - def isSecure(self): + def isSecure(self) -> bool: if self._forwarded_https: return True return super().isSecure() @@ -545,14 +547,16 @@ def __init__( proxied = config.http_options.x_forwarded request_class = XForwardedForRequest if proxied else SynapseRequest - def request_factory(channel, queued) -> Request: + def request_factory(channel, queued: bool) -> Request: return request_class( - channel, max_request_body_size=max_request_body_size, queued=queued + channel, + max_request_body_size=max_request_body_size, + queued=queued, ) self.requestFactory = request_factory # type: ignore self.access_logger = logging.getLogger(logger_name) self.server_version_string = server_version_string.encode("ascii") - def log(self, request): + def log(self, request: SynapseRequest) -> None: pass From 4054dfa409fa17b45ab8f265813994956ed97bae Mon Sep 17 00:00:00 2001 From: Patrick Cloke <clokep@users.noreply.github.com> Date: Tue, 21 Sep 2021 13:34:26 -0400 Subject: [PATCH 23/74] Add type hints for event streams. (#10856) --- changelog.d/10856.misc | 1 + synapse/handlers/account_data.py | 13 ++++-- synapse/handlers/appservice.py | 6 +-- synapse/handlers/initial_sync.py | 2 +- synapse/handlers/presence.py | 8 ++-- synapse/handlers/receipts.py | 13 ++++-- synapse/handlers/room.py | 18 ++++++-- synapse/handlers/sync.py | 6 +-- synapse/handlers/typing.py | 13 ++++-- synapse/module_api/__init__.py | 2 +- synapse/notifier.py | 2 +- synapse/storage/databases/main/receipts.py | 6 +-- synapse/streams/__init__.py | 22 ++++++++++ synapse/streams/events.py | 49 +++++++++++++--------- tests/handlers/test_receipts.py | 2 +- tests/handlers/test_typing.py | 46 ++++++++++++++++---- tests/rest/client/test_shadow_banned.py | 10 ++++- tests/rest/client/test_typing.py | 10 ++++- 18 files changed, 169 insertions(+), 60 deletions(-) create mode 100644 changelog.d/10856.misc diff --git a/changelog.d/10856.misc b/changelog.d/10856.misc new file mode 100644 index 000000000..f09af2e00 --- /dev/null +++ b/changelog.d/10856.misc @@ -0,0 +1 @@ +Add missing type hints to handlers. diff --git a/synapse/handlers/account_data.py b/synapse/handlers/account_data.py index e9e7a7854..96273e2f8 100644 --- a/synapse/handlers/account_data.py +++ b/synapse/handlers/account_data.py @@ -13,7 +13,7 @@ # See the License for the specific language governing permissions and # limitations under the License. import random -from typing import TYPE_CHECKING, Any, List, Tuple +from typing import TYPE_CHECKING, Collection, List, Optional, Tuple from synapse.replication.http.account_data import ( ReplicationAddTagRestServlet, @@ -21,6 +21,7 @@ ReplicationRoomAccountDataRestServlet, ReplicationUserAccountDataRestServlet, ) +from synapse.streams import EventSource from synapse.types import JsonDict, UserID if TYPE_CHECKING: @@ -163,7 +164,7 @@ async def remove_tag_from_room(self, user_id: str, room_id: str, tag: str) -> in return response["max_stream_id"] -class AccountDataEventSource: +class AccountDataEventSource(EventSource[int, JsonDict]): def __init__(self, hs: "HomeServer"): self.store = hs.get_datastore() @@ -171,7 +172,13 @@ def get_current_key(self, direction: str = "f") -> int: return self.store.get_max_account_data_stream_id() async def get_new_events( - self, user: UserID, from_key: int, **kwargs: Any + self, + user: UserID, + from_key: int, + limit: Optional[int], + room_ids: Collection[str], + is_guest: bool, + explicit_room_id: Optional[str] = None, ) -> Tuple[List[JsonDict], int]: user_id = user.to_string() last_stream_id = from_key diff --git a/synapse/handlers/appservice.py b/synapse/handlers/appservice.py index 8bde9ed66..b7213b67a 100644 --- a/synapse/handlers/appservice.py +++ b/synapse/handlers/appservice.py @@ -254,7 +254,7 @@ async def _notify_interested_services_ephemeral( async def _handle_typing( self, service: ApplicationService, new_token: int ) -> List[JsonDict]: - typing_source = self.event_sources.sources["typing"] + typing_source = self.event_sources.sources.typing # Get the typing events from just before current typing, _ = await typing_source.get_new_events_as( service=service, @@ -269,7 +269,7 @@ async def _handle_receipts(self, service: ApplicationService) -> List[JsonDict]: from_key = await self.store.get_type_stream_id_for_appservice( service, "read_receipt" ) - receipts_source = self.event_sources.sources["receipt"] + receipts_source = self.event_sources.sources.receipt receipts, _ = await receipts_source.get_new_events_as( service=service, from_key=from_key ) @@ -279,7 +279,7 @@ async def _handle_presence( self, service: ApplicationService, users: Collection[Union[str, UserID]] ) -> List[JsonDict]: events: List[JsonDict] = [] - presence_source = self.event_sources.sources["presence"] + presence_source = self.event_sources.sources.presence from_key = await self.store.get_type_stream_id_for_appservice( service, "presence" ) diff --git a/synapse/handlers/initial_sync.py b/synapse/handlers/initial_sync.py index c942086e7..9ad39a65d 100644 --- a/synapse/handlers/initial_sync.py +++ b/synapse/handlers/initial_sync.py @@ -125,7 +125,7 @@ async def _snapshot_all_rooms( now_token = self.hs.get_event_sources().get_current_token() - presence_stream = self.hs.get_event_sources().sources["presence"] + presence_stream = self.hs.get_event_sources().sources.presence presence, _ = await presence_stream.get_new_events( user, from_key=None, include_offline=False ) diff --git a/synapse/handlers/presence.py b/synapse/handlers/presence.py index 841c8815b..983c837c6 100644 --- a/synapse/handlers/presence.py +++ b/synapse/handlers/presence.py @@ -65,6 +65,7 @@ from synapse.replication.tcp.commands import ClearUserSyncsCommand from synapse.replication.tcp.streams import PresenceFederationStream, PresenceStream from synapse.storage.databases.main import DataStore +from synapse.streams import EventSource from synapse.types import JsonDict, UserID, get_domain_from_id from synapse.util.async_helpers import Linearizer from synapse.util.caches.descriptors import _CacheContext, cached @@ -1500,7 +1501,7 @@ def format_user_presence_state( return content -class PresenceEventSource: +class PresenceEventSource(EventSource[int, UserPresenceState]): def __init__(self, hs: "HomeServer"): # We can't call get_presence_handler here because there's a cycle: # @@ -1519,10 +1520,11 @@ async def get_new_events( self, user: UserID, from_key: Optional[int], + limit: Optional[int] = None, room_ids: Optional[List[str]] = None, - include_offline: bool = True, + is_guest: bool = False, explicit_room_id: Optional[str] = None, - **kwargs: Any, + include_offline: bool = True, ) -> Tuple[List[UserPresenceState], int]: # The process for getting presence events are: # 1. Get the rooms the user is in. diff --git a/synapse/handlers/receipts.py b/synapse/handlers/receipts.py index c7567ac05..5881f09eb 100644 --- a/synapse/handlers/receipts.py +++ b/synapse/handlers/receipts.py @@ -12,11 +12,12 @@ # See the License for the specific language governing permissions and # limitations under the License. import logging -from typing import TYPE_CHECKING, Any, List, Optional, Tuple +from typing import TYPE_CHECKING, Iterable, List, Optional, Tuple from synapse.api.constants import ReadReceiptEventFields from synapse.appservice import ApplicationService from synapse.handlers._base import BaseHandler +from synapse.streams import EventSource from synapse.types import JsonDict, ReadReceipt, UserID, get_domain_from_id if TYPE_CHECKING: @@ -162,7 +163,7 @@ async def received_client_receipt( await self.federation_sender.send_read_receipt(receipt) -class ReceiptEventSource: +class ReceiptEventSource(EventSource[int, JsonDict]): def __init__(self, hs: "HomeServer"): self.store = hs.get_datastore() self.config = hs.config @@ -216,7 +217,13 @@ def filter_out_hidden(events: List[JsonDict], user_id: str) -> List[JsonDict]: return visible_events async def get_new_events( - self, from_key: int, room_ids: List[str], user: UserID, **kwargs: Any + self, + user: UserID, + from_key: int, + limit: Optional[int], + room_ids: Iterable[str], + is_guest: bool, + explicit_room_id: Optional[str] = None, ) -> Tuple[List[JsonDict], int]: from_key = int(from_key) to_key = self.get_current_key() diff --git a/synapse/handlers/room.py b/synapse/handlers/room.py index abdd50616..287ea2fd0 100644 --- a/synapse/handlers/room.py +++ b/synapse/handlers/room.py @@ -20,7 +20,16 @@ import random import string from collections import OrderedDict -from typing import TYPE_CHECKING, Any, Awaitable, Dict, List, Optional, Tuple +from typing import ( + TYPE_CHECKING, + Any, + Awaitable, + Collection, + Dict, + List, + Optional, + Tuple, +) from synapse.api.constants import ( EventContentFields, @@ -47,6 +56,7 @@ from synapse.events.utils import copy_power_levels_contents from synapse.rest.admin._base import assert_user_is_admin from synapse.storage.state import StateFilter +from synapse.streams import EventSource from synapse.types import ( JsonDict, MutableStateMap, @@ -1173,7 +1183,7 @@ async def filter_evts(events: List[EventBase]) -> List[EventBase]: return results -class RoomEventSource: +class RoomEventSource(EventSource[RoomStreamToken, EventBase]): def __init__(self, hs: "HomeServer"): self.store = hs.get_datastore() @@ -1181,8 +1191,8 @@ async def get_new_events( self, user: UserID, from_key: RoomStreamToken, - limit: int, - room_ids: List[str], + limit: Optional[int], + room_ids: Collection[str], is_guest: bool, explicit_room_id: Optional[str] = None, ) -> Tuple[List[EventBase], RoomStreamToken]: diff --git a/synapse/handlers/sync.py b/synapse/handlers/sync.py index e93db4bdc..2c7c6d63a 100644 --- a/synapse/handlers/sync.py +++ b/synapse/handlers/sync.py @@ -443,7 +443,7 @@ async def ephemeral_by_room( room_ids = sync_result_builder.joined_room_ids - typing_source = self.event_sources.sources["typing"] + typing_source = self.event_sources.sources.typing typing, typing_key = await typing_source.get_new_events( user=sync_config.user, from_key=typing_key, @@ -465,7 +465,7 @@ async def ephemeral_by_room( receipt_key = since_token.receipt_key if since_token else 0 - receipt_source = self.event_sources.sources["receipt"] + receipt_source = self.event_sources.sources.receipt receipts, receipt_key = await receipt_source.get_new_events( user=sync_config.user, from_key=receipt_key, @@ -1415,7 +1415,7 @@ async def _generate_sync_entry_for_presence( sync_config = sync_result_builder.sync_config user = sync_result_builder.sync_config.user - presence_source = self.event_sources.sources["presence"] + presence_source = self.event_sources.sources.presence since_token = sync_result_builder.since_token presence_key = None diff --git a/synapse/handlers/typing.py b/synapse/handlers/typing.py index 4492c8567..9326330c9 100644 --- a/synapse/handlers/typing.py +++ b/synapse/handlers/typing.py @@ -14,7 +14,7 @@ import logging import random from collections import namedtuple -from typing import TYPE_CHECKING, Any, Dict, Iterable, List, Optional, Set, Tuple +from typing import TYPE_CHECKING, Dict, Iterable, List, Optional, Set, Tuple from synapse.api.errors import AuthError, ShadowBanError, SynapseError from synapse.appservice import ApplicationService @@ -23,6 +23,7 @@ wrap_as_background_process, ) from synapse.replication.tcp.streams import TypingStream +from synapse.streams import EventSource from synapse.types import JsonDict, Requester, UserID, get_domain_from_id from synapse.util.caches.stream_change_cache import StreamChangeCache from synapse.util.metrics import Measure @@ -439,7 +440,7 @@ def process_replication_rows( raise Exception("Typing writer instance got typing info over replication") -class TypingNotificationEventSource: +class TypingNotificationEventSource(EventSource[int, JsonDict]): def __init__(self, hs: "HomeServer"): self.hs = hs self.clock = hs.get_clock() @@ -485,7 +486,13 @@ async def get_new_events_as( return (events, handler._latest_room_serial) async def get_new_events( - self, from_key: int, room_ids: Iterable[str], **kwargs: Any + self, + user: UserID, + from_key: int, + limit: Optional[int], + room_ids: Iterable[str], + is_guest: bool, + explicit_room_id: Optional[str] = None, ) -> Tuple[List[JsonDict], int]: with Measure(self.clock, "typing.get_new_events"): from_key = int(from_key) diff --git a/synapse/module_api/__init__.py b/synapse/module_api/__init__.py index 2d403532f..3196c2bec 100644 --- a/synapse/module_api/__init__.py +++ b/synapse/module_api/__init__.py @@ -91,7 +91,7 @@ def __init__(self, hs: "HomeServer", auth_handler): self._auth = hs.get_auth() self._auth_handler = auth_handler self._server_name = hs.hostname - self._presence_stream = hs.get_event_sources().sources["presence"] + self._presence_stream = hs.get_event_sources().sources.presence self._state = hs.get_state_handler() self._clock: Clock = hs.get_clock() self._send_email_handler = hs.get_send_email_handler() diff --git a/synapse/notifier.py b/synapse/notifier.py index bbe337949..1a9f84ba4 100644 --- a/synapse/notifier.py +++ b/synapse/notifier.py @@ -584,7 +584,7 @@ async def check_for_updates( events: List[EventBase] = [] end_token = from_token - for name, source in self.event_sources.sources.items(): + for name, source in self.event_sources.sources.get_sources(): keyname = "%s_key" % name before_id = getattr(before_token, keyname) after_id = getattr(after_token, keyname) diff --git a/synapse/storage/databases/main/receipts.py b/synapse/storage/databases/main/receipts.py index edeaacd7a..01a428130 100644 --- a/synapse/storage/databases/main/receipts.py +++ b/synapse/storage/databases/main/receipts.py @@ -14,7 +14,7 @@ # limitations under the License. import logging -from typing import Any, Dict, List, Optional, Tuple +from typing import Any, Dict, Iterable, List, Optional, Tuple from twisted.internet import defer @@ -153,12 +153,12 @@ def f(txn): } async def get_linearized_receipts_for_rooms( - self, room_ids: List[str], to_key: int, from_key: Optional[int] = None + self, room_ids: Iterable[str], to_key: int, from_key: Optional[int] = None ) -> List[dict]: """Get receipts for multiple rooms for sending to clients. Args: - room_id: List of room_ids. + room_id: The room IDs to fetch receipts of. to_key: Max stream id to fetch receipts up to. from_key: Min stream id to fetch receipts from. None fetches from the start. diff --git a/synapse/streams/__init__.py b/synapse/streams/__init__.py index 5e83dba2e..806b67130 100644 --- a/synapse/streams/__init__.py +++ b/synapse/streams/__init__.py @@ -11,3 +11,25 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. + +from typing import Collection, Generic, List, Optional, Tuple, TypeVar + +from synapse.types import UserID + +# The key, this is either a stream token or int. +K = TypeVar("K") +# The return type. +R = TypeVar("R") + + +class EventSource(Generic[K, R]): + async def get_new_events( + self, + user: UserID, + from_key: K, + limit: Optional[int], + room_ids: Collection[str], + is_guest: bool, + explicit_room_id: Optional[str] = None, + ) -> Tuple[List[R], K]: + ... diff --git a/synapse/streams/events.py b/synapse/streams/events.py index 99b0aac2f..21591d0bf 100644 --- a/synapse/streams/events.py +++ b/synapse/streams/events.py @@ -12,29 +12,40 @@ # See the License for the specific language governing permissions and # limitations under the License. -from typing import Any, Dict +from typing import TYPE_CHECKING, Iterator, Tuple + +import attr from synapse.handlers.account_data import AccountDataEventSource from synapse.handlers.presence import PresenceEventSource from synapse.handlers.receipts import ReceiptEventSource from synapse.handlers.room import RoomEventSource from synapse.handlers.typing import TypingNotificationEventSource +from synapse.streams import EventSource from synapse.types import StreamToken +if TYPE_CHECKING: + from synapse.server import HomeServer -class EventSources: - SOURCE_TYPES = { - "room": RoomEventSource, - "presence": PresenceEventSource, - "typing": TypingNotificationEventSource, - "receipt": ReceiptEventSource, - "account_data": AccountDataEventSource, - } - def __init__(self, hs): - self.sources: Dict[str, Any] = { - name: cls(hs) for name, cls in EventSources.SOURCE_TYPES.items() - } +@attr.s(frozen=True, slots=True, auto_attribs=True) +class _EventSourcesInner: + room: RoomEventSource + presence: PresenceEventSource + typing: TypingNotificationEventSource + receipt: ReceiptEventSource + account_data: AccountDataEventSource + + def get_sources(self) -> Iterator[Tuple[str, EventSource]]: + for attribute in _EventSourcesInner.__attrs_attrs__: # type: ignore[attr-defined] + yield attribute.name, getattr(self, attribute.name) + + +class EventSources: + def __init__(self, hs: "HomeServer"): + self.sources = _EventSourcesInner( + *(attribute.type(hs) for attribute in _EventSourcesInner.__attrs_attrs__) # type: ignore[attr-defined] + ) self.store = hs.get_datastore() def get_current_token(self) -> StreamToken: @@ -44,11 +55,11 @@ def get_current_token(self) -> StreamToken: groups_key = self.store.get_group_stream_token() token = StreamToken( - room_key=self.sources["room"].get_current_key(), - presence_key=self.sources["presence"].get_current_key(), - typing_key=self.sources["typing"].get_current_key(), - receipt_key=self.sources["receipt"].get_current_key(), - account_data_key=self.sources["account_data"].get_current_key(), + room_key=self.sources.room.get_current_key(), + presence_key=self.sources.presence.get_current_key(), + typing_key=self.sources.typing.get_current_key(), + receipt_key=self.sources.receipt.get_current_key(), + account_data_key=self.sources.account_data.get_current_key(), push_rules_key=push_rules_key, to_device_key=to_device_key, device_list_key=device_list_key, @@ -67,7 +78,7 @@ def get_current_token_for_pagination(self) -> StreamToken: The current token for pagination. """ token = StreamToken( - room_key=self.sources["room"].get_current_key(), + room_key=self.sources.room.get_current_key(), presence_key=0, typing_key=0, receipt_key=0, diff --git a/tests/handlers/test_receipts.py b/tests/handlers/test_receipts.py index 732a12c9b..5de89c873 100644 --- a/tests/handlers/test_receipts.py +++ b/tests/handlers/test_receipts.py @@ -23,7 +23,7 @@ class ReceiptsTestCase(unittest.HomeserverTestCase): def prepare(self, reactor, clock, hs): - self.event_source = hs.get_event_sources().sources["receipt"] + self.event_source = hs.get_event_sources().sources.receipt # In the first param of _test_filters_hidden we use "hidden" instead of # ReadReceiptEventFields.MSC2285_HIDDEN. We do this because we're mocking diff --git a/tests/handlers/test_typing.py b/tests/handlers/test_typing.py index fa3cff598..000f9b9fd 100644 --- a/tests/handlers/test_typing.py +++ b/tests/handlers/test_typing.py @@ -89,7 +89,7 @@ def prepare(self, reactor, clock, hs): self.handler = hs.get_typing_handler() - self.event_source = hs.get_event_sources().sources["typing"] + self.event_source = hs.get_event_sources().sources.typing self.datastore = hs.get_datastore() self.datastore.get_destination_retry_timings = Mock( @@ -171,7 +171,9 @@ def test_started_typing_local(self): self.assertEquals(self.event_source.get_current_key(), 1) events = self.get_success( - self.event_source.get_new_events(room_ids=[ROOM_ID], from_key=0) + self.event_source.get_new_events( + user=U_APPLE, from_key=0, limit=None, room_ids=[ROOM_ID], is_guest=False + ) ) self.assertEquals( events[0], @@ -239,7 +241,9 @@ def test_started_typing_remote_recv(self): self.assertEquals(self.event_source.get_current_key(), 1) events = self.get_success( - self.event_source.get_new_events(room_ids=[ROOM_ID], from_key=0) + self.event_source.get_new_events( + user=U_APPLE, from_key=0, limit=None, room_ids=[ROOM_ID], is_guest=False + ) ) self.assertEquals( events[0], @@ -276,7 +280,13 @@ def test_started_typing_remote_recv_not_in_room(self): self.assertEquals(self.event_source.get_current_key(), 0) events = self.get_success( - self.event_source.get_new_events(room_ids=[OTHER_ROOM_ID], from_key=0) + self.event_source.get_new_events( + user=U_APPLE, + from_key=0, + limit=None, + room_ids=[OTHER_ROOM_ID], + is_guest=False, + ) ) self.assertEquals(events[0], []) self.assertEquals(events[1], 0) @@ -324,7 +334,9 @@ def test_stopped_typing(self): self.assertEquals(self.event_source.get_current_key(), 1) events = self.get_success( - self.event_source.get_new_events(room_ids=[ROOM_ID], from_key=0) + self.event_source.get_new_events( + user=U_APPLE, from_key=0, limit=None, room_ids=[ROOM_ID], is_guest=False + ) ) self.assertEquals( events[0], @@ -350,7 +362,13 @@ def test_typing_timeout(self): self.assertEquals(self.event_source.get_current_key(), 1) events = self.get_success( - self.event_source.get_new_events(room_ids=[ROOM_ID], from_key=0) + self.event_source.get_new_events( + user=U_APPLE, + from_key=0, + limit=None, + room_ids=[ROOM_ID], + is_guest=False, + ) ) self.assertEquals( events[0], @@ -369,7 +387,13 @@ def test_typing_timeout(self): self.assertEquals(self.event_source.get_current_key(), 2) events = self.get_success( - self.event_source.get_new_events(room_ids=[ROOM_ID], from_key=1) + self.event_source.get_new_events( + user=U_APPLE, + from_key=1, + limit=None, + room_ids=[ROOM_ID], + is_guest=False, + ) ) self.assertEquals( events[0], @@ -392,7 +416,13 @@ def test_typing_timeout(self): self.assertEquals(self.event_source.get_current_key(), 3) events = self.get_success( - self.event_source.get_new_events(room_ids=[ROOM_ID], from_key=0) + self.event_source.get_new_events( + user=U_APPLE, + from_key=0, + limit=None, + room_ids=[ROOM_ID], + is_guest=False, + ) ) self.assertEquals( events[0], diff --git a/tests/rest/client/test_shadow_banned.py b/tests/rest/client/test_shadow_banned.py index 6a0d9a82b..b0c44af03 100644 --- a/tests/rest/client/test_shadow_banned.py +++ b/tests/rest/client/test_shadow_banned.py @@ -193,7 +193,7 @@ def test_typing(self): self.assertEquals(200, channel.code) # There should be no typing events. - event_source = self.hs.get_event_sources().sources["typing"] + event_source = self.hs.get_event_sources().sources.typing self.assertEquals(event_source.get_current_key(), 0) # The other user can join and send typing events. @@ -210,7 +210,13 @@ def test_typing(self): # These appear in the room. self.assertEquals(event_source.get_current_key(), 1) events = self.get_success( - event_source.get_new_events(from_key=0, room_ids=[room_id]) + event_source.get_new_events( + user=UserID.from_string(self.other_user_id), + from_key=0, + limit=None, + room_ids=[room_id], + is_guest=False, + ) ) self.assertEquals( events[0], diff --git a/tests/rest/client/test_typing.py b/tests/rest/client/test_typing.py index b54b00473..ee0abd529 100644 --- a/tests/rest/client/test_typing.py +++ b/tests/rest/client/test_typing.py @@ -41,7 +41,7 @@ def make_homeserver(self, reactor, clock): federation_client=Mock(), ) - self.event_source = hs.get_event_sources().sources["typing"] + self.event_source = hs.get_event_sources().sources.typing hs.get_federation_handler = Mock() @@ -76,7 +76,13 @@ def test_set_typing(self): self.assertEquals(self.event_source.get_current_key(), 1) events = self.get_success( - self.event_source.get_new_events(from_key=0, room_ids=[self.room_id]) + self.event_source.get_new_events( + user=UserID.from_string(self.user_id), + from_key=0, + limit=None, + room_ids=[self.room_id], + is_guest=False, + ) ) self.assertEquals( events[0], From 51e2db35983953b13e536331ec2f6ad4cae7e0f1 Mon Sep 17 00:00:00 2001 From: Eric Eastwood <erice@element.io> Date: Tue, 21 Sep 2021 15:06:28 -0500 Subject: [PATCH 24/74] Rename MSC2716 things from `chunk` to `batch` to match `/batch_send` endpoint (#10838) See https://github.com/matrix-org/matrix-doc/pull/2716#discussion_r684574497 Dropping support for older MSC2716 room versions so we don't have to worry about supporting both chunk and batch events. --- changelog.d/10838.misc | 1 + synapse/api/constants.py | 10 +-- synapse/api/room_versions.py | 22 +---- synapse/event_auth.py | 8 +- synapse/events/utils.py | 6 +- synapse/handlers/message.py | 2 +- synapse/rest/client/room_batch.py | 86 +++++++++---------- .../databases/main/event_federation.py | 30 +++---- synapse/storage/databases/main/events.py | 46 +++++----- synapse/storage/databases/main/room_batch.py | 6 +- synapse/storage/schema/__init__.py | 2 +- ...msc2716_chunk_to_batch_rename.sql.postgres | 23 +++++ ...01msc2716_chunk_to_batch_rename.sql.sqlite | 37 ++++++++ 13 files changed, 162 insertions(+), 117 deletions(-) create mode 100644 changelog.d/10838.misc create mode 100644 synapse/storage/schema/main/delta/64/01msc2716_chunk_to_batch_rename.sql.postgres create mode 100644 synapse/storage/schema/main/delta/64/01msc2716_chunk_to_batch_rename.sql.sqlite diff --git a/changelog.d/10838.misc b/changelog.d/10838.misc new file mode 100644 index 000000000..b1977d0a2 --- /dev/null +++ b/changelog.d/10838.misc @@ -0,0 +1 @@ +Rename [MSC2716](https://github.com/matrix-org/matrix-doc/pull/2716) fields and event types from `chunk` to `batch` to match the `/batch_send` endpoint. diff --git a/synapse/api/constants.py b/synapse/api/constants.py index 236f0c7f9..39fd9954d 100644 --- a/synapse/api/constants.py +++ b/synapse/api/constants.py @@ -121,7 +121,7 @@ class EventTypes: SpaceParent = "m.space.parent" MSC2716_INSERTION = "org.matrix.msc2716.insertion" - MSC2716_CHUNK = "org.matrix.msc2716.chunk" + MSC2716_BATCH = "org.matrix.msc2716.batch" MSC2716_MARKER = "org.matrix.msc2716.marker" @@ -209,11 +209,11 @@ class EventContentFields: # Used on normal messages to indicate they were historically imported after the fact MSC2716_HISTORICAL = "org.matrix.msc2716.historical" - # For "insertion" events to indicate what the next chunk ID should be in + # For "insertion" events to indicate what the next batch ID should be in # order to connect to it - MSC2716_NEXT_CHUNK_ID = "org.matrix.msc2716.next_chunk_id" - # Used on "chunk" events to indicate which insertion event it connects to - MSC2716_CHUNK_ID = "org.matrix.msc2716.chunk_id" + MSC2716_NEXT_BATCH_ID = "org.matrix.msc2716.next_batch_id" + # Used on "batch" events to indicate which insertion event it connects to + MSC2716_BATCH_ID = "org.matrix.msc2716.batch_id" # For "marker" events MSC2716_MARKER_INSERTION = "org.matrix.msc2716.marker.insertion" diff --git a/synapse/api/room_versions.py b/synapse/api/room_versions.py index 61d9c658a..0a895bba4 100644 --- a/synapse/api/room_versions.py +++ b/synapse/api/room_versions.py @@ -244,24 +244,8 @@ class RoomVersions: msc2716_historical=False, msc2716_redactions=False, ) - MSC2716 = RoomVersion( - "org.matrix.msc2716", - RoomDisposition.UNSTABLE, - EventFormatVersions.V3, - StateResolutionVersions.V2, - enforce_key_validity=True, - special_case_aliases_auth=False, - strict_canonicaljson=True, - limit_notifications_power_levels=True, - msc2176_redaction_rules=False, - msc3083_join_rules=False, - msc3375_redaction_rules=False, - msc2403_knocking=True, - msc2716_historical=True, - msc2716_redactions=False, - ) - MSC2716v2 = RoomVersion( - "org.matrix.msc2716v2", + MSC2716v3 = RoomVersion( + "org.matrix.msc2716v3", RoomDisposition.UNSTABLE, EventFormatVersions.V3, StateResolutionVersions.V2, @@ -289,9 +273,9 @@ class RoomVersions: RoomVersions.V6, RoomVersions.MSC2176, RoomVersions.V7, - RoomVersions.MSC2716, RoomVersions.V8, RoomVersions.V9, + RoomVersions.MSC2716v3, ) } diff --git a/synapse/event_auth.py b/synapse/event_auth.py index cb133f3f8..fc50a0e71 100644 --- a/synapse/event_auth.py +++ b/synapse/event_auth.py @@ -213,7 +213,7 @@ def check( if ( event.type == EventTypes.MSC2716_INSERTION - or event.type == EventTypes.MSC2716_CHUNK + or event.type == EventTypes.MSC2716_BATCH or event.type == EventTypes.MSC2716_MARKER ): check_historical(room_version_obj, event, auth_events) @@ -552,14 +552,14 @@ def check_historical( auth_events: StateMap[EventBase], ) -> None: """Check whether the event sender is allowed to send historical related - events like "insertion", "chunk", and "marker". + events like "insertion", "batch", and "marker". Returns: None Raises: AuthError if the event sender is not allowed to send historical related events - ("insertion", "chunk", and "marker"). + ("insertion", "batch", and "marker"). """ # Ignore the auth checks in room versions that do not support historical # events @@ -573,7 +573,7 @@ def check_historical( if user_level < historical_level: raise AuthError( 403, - 'You don\'t have permission to send send historical related events ("insertion", "chunk", and "marker")', + 'You don\'t have permission to send send historical related events ("insertion", "batch", and "marker")', ) diff --git a/synapse/events/utils.py b/synapse/events/utils.py index fb22337e2..f86113a44 100644 --- a/synapse/events/utils.py +++ b/synapse/events/utils.py @@ -141,9 +141,9 @@ def add_fields(*fields): elif event_type == EventTypes.Redaction and room_version.msc2176_redaction_rules: add_fields("redacts") elif room_version.msc2716_redactions and event_type == EventTypes.MSC2716_INSERTION: - add_fields(EventContentFields.MSC2716_NEXT_CHUNK_ID) - elif room_version.msc2716_redactions and event_type == EventTypes.MSC2716_CHUNK: - add_fields(EventContentFields.MSC2716_CHUNK_ID) + add_fields(EventContentFields.MSC2716_NEXT_BATCH_ID) + elif room_version.msc2716_redactions and event_type == EventTypes.MSC2716_BATCH: + add_fields(EventContentFields.MSC2716_BATCH_ID) elif room_version.msc2716_redactions and event_type == EventTypes.MSC2716_MARKER: add_fields(EventContentFields.MSC2716_MARKER_INSERTION) diff --git a/synapse/handlers/message.py b/synapse/handlers/message.py index bf4853630..6cd694b2d 100644 --- a/synapse/handlers/message.py +++ b/synapse/handlers/message.py @@ -1425,7 +1425,7 @@ async def persist_and_notify_client_event( # structural protocol level). is_msc2716_event = ( original_event.type == EventTypes.MSC2716_INSERTION - or original_event.type == EventTypes.MSC2716_CHUNK + or original_event.type == EventTypes.MSC2716_BATCH or original_event.type == EventTypes.MSC2716_MARKER ) if not room_version_obj.msc2716_historical and is_msc2716_event: diff --git a/synapse/rest/client/room_batch.py b/synapse/rest/client/room_batch.py index f73ccc7f6..bf14ec384 100644 --- a/synapse/rest/client/room_batch.py +++ b/synapse/rest/client/room_batch.py @@ -43,25 +43,25 @@ class RoomBatchSendEventRestServlet(RestServlet): """ - API endpoint which can insert a chunk of events historically back in time + API endpoint which can insert a batch of events historically back in time next to the given `prev_event`. - `chunk_id` comes from `next_chunk_id `in the response of the batch send - endpoint and is derived from the "insertion" events added to each chunk. + `batch_id` comes from `next_batch_id `in the response of the batch send + endpoint and is derived from the "insertion" events added to each batch. It's not required for the first batch send. `state_events_at_start` is used to define the historical state events needed to auth the events like join events. These events will float outside of the normal DAG as outlier's and won't be visible in the chat - history which also allows us to insert multiple chunks without having a bunch - of `@mxid joined the room` noise between each chunk. + history which also allows us to insert multiple batches without having a bunch + of `@mxid joined the room` noise between each batch. - `events` is chronological chunk/list of events you want to insert. - There is a reverse-chronological constraint on chunks so once you insert + `events` is chronological list of events you want to insert. + There is a reverse-chronological constraint on batches so once you insert some messages, you can only insert older ones after that. - tldr; Insert chunks from your most recent history -> oldest history. + tldr; Insert batches from your most recent history -> oldest history. - POST /_matrix/client/unstable/org.matrix.msc2716/rooms/<roomID>/batch_send?prev_event_id=<eventID>&chunk_id=<chunkID> + POST /_matrix/client/unstable/org.matrix.msc2716/rooms/<roomID>/batch_send?prev_event_id=<eventID>&batch_id=<batchID> { "events": [ ... ], "state_events_at_start": [ ... ] @@ -129,7 +129,7 @@ def _create_insertion_event_dict( self, sender: str, room_id: str, origin_server_ts: int ) -> JsonDict: """Creates an event dict for an "insertion" event with the proper fields - and a random chunk ID. + and a random batch ID. Args: sender: The event author MXID @@ -140,13 +140,13 @@ def _create_insertion_event_dict( The new event dictionary to insert. """ - next_chunk_id = random_string(8) + next_batch_id = random_string(8) insertion_event = { "type": EventTypes.MSC2716_INSERTION, "sender": sender, "room_id": room_id, "content": { - EventContentFields.MSC2716_NEXT_CHUNK_ID: next_chunk_id, + EventContentFields.MSC2716_NEXT_BATCH_ID: next_batch_id, EventContentFields.MSC2716_HISTORICAL: True, }, "origin_server_ts": origin_server_ts, @@ -191,7 +191,7 @@ async def on_POST( prev_event_ids_from_query = parse_strings_from_args( request.args, "prev_event_id" ) - chunk_id_from_query = parse_string(request, "chunk_id") + batch_id_from_query = parse_string(request, "batch_id") if prev_event_ids_from_query is None: raise SynapseError( @@ -291,27 +291,27 @@ async def on_POST( prev_event_ids_from_query ) - # Figure out which chunk to connect to. If they passed in - # chunk_id_from_query let's use it. The chunk ID passed in comes - # from the chunk_id in the "insertion" event from the previous chunk. - last_event_in_chunk = events_to_create[-1] - chunk_id_to_connect_to = chunk_id_from_query + # Figure out which batch to connect to. If they passed in + # batch_id_from_query let's use it. The batch ID passed in comes + # from the batch_id in the "insertion" event from the previous batch. + last_event_in_batch = events_to_create[-1] + batch_id_to_connect_to = batch_id_from_query base_insertion_event = None - if chunk_id_from_query: + if batch_id_from_query: # All but the first base insertion event should point at a fake # event, which causes the HS to ask for the state at the start of - # the chunk later. + # the batch later. prev_event_ids = [fake_prev_event_id] - # Verify the chunk_id_from_query corresponds to an actual insertion event - # and have the chunk connected. + # Verify the batch_id_from_query corresponds to an actual insertion event + # and have the batch connected. corresponding_insertion_event_id = ( - await self.store.get_insertion_event_by_chunk_id(chunk_id_from_query) + await self.store.get_insertion_event_by_batch_id(batch_id_from_query) ) if corresponding_insertion_event_id is None: raise SynapseError( 400, - "No insertion event corresponds to the given ?chunk_id", + "No insertion event corresponds to the given ?batch_id", errcode=Codes.INVALID_PARAM, ) pass @@ -328,7 +328,7 @@ async def on_POST( base_insertion_event_dict = self._create_insertion_event_dict( sender=requester.user.to_string(), room_id=room_id, - origin_server_ts=last_event_in_chunk["origin_server_ts"], + origin_server_ts=last_event_in_batch["origin_server_ts"], ) base_insertion_event_dict["prev_events"] = prev_event_ids.copy() @@ -347,38 +347,38 @@ async def on_POST( depth=inherited_depth, ) - chunk_id_to_connect_to = base_insertion_event["content"][ - EventContentFields.MSC2716_NEXT_CHUNK_ID + batch_id_to_connect_to = base_insertion_event["content"][ + EventContentFields.MSC2716_NEXT_BATCH_ID ] - # Connect this current chunk to the insertion event from the previous chunk - chunk_event = { - "type": EventTypes.MSC2716_CHUNK, + # Connect this current batch to the insertion event from the previous batch + batch_event = { + "type": EventTypes.MSC2716_BATCH, "sender": requester.user.to_string(), "room_id": room_id, "content": { - EventContentFields.MSC2716_CHUNK_ID: chunk_id_to_connect_to, + EventContentFields.MSC2716_BATCH_ID: batch_id_to_connect_to, EventContentFields.MSC2716_HISTORICAL: True, }, - # Since the chunk event is put at the end of the chunk, + # Since the batch event is put at the end of the batch, # where the newest-in-time event is, copy the origin_server_ts from # the last event we're inserting - "origin_server_ts": last_event_in_chunk["origin_server_ts"], + "origin_server_ts": last_event_in_batch["origin_server_ts"], } - # Add the chunk event to the end of the chunk (newest-in-time) - events_to_create.append(chunk_event) + # Add the batch event to the end of the batch (newest-in-time) + events_to_create.append(batch_event) - # Add an "insertion" event to the start of each chunk (next to the oldest-in-time - # event in the chunk) so the next chunk can be connected to this one. + # Add an "insertion" event to the start of each batch (next to the oldest-in-time + # event in the batch) so the next batch can be connected to this one. insertion_event = self._create_insertion_event_dict( sender=requester.user.to_string(), room_id=room_id, - # Since the insertion event is put at the start of the chunk, + # Since the insertion event is put at the start of the batch, # where the oldest-in-time event is, copy the origin_server_ts from # the first event we're inserting origin_server_ts=events_to_create[0]["origin_server_ts"], ) - # Prepend the insertion event to the start of the chunk (oldest-in-time) + # Prepend the insertion event to the start of the batch (oldest-in-time) events_to_create = [insertion_event] + events_to_create event_ids = [] @@ -439,17 +439,17 @@ async def on_POST( ) insertion_event_id = event_ids[0] - chunk_event_id = event_ids[-1] + batch_event_id = event_ids[-1] historical_event_ids = event_ids[1:-1] response_dict = { "state_event_ids": state_event_ids_at_start, "event_ids": historical_event_ids, - "next_chunk_id": insertion_event["content"][ - EventContentFields.MSC2716_NEXT_CHUNK_ID + "next_batch_id": insertion_event["content"][ + EventContentFields.MSC2716_NEXT_BATCH_ID ], "insertion_event_id": insertion_event_id, - "chunk_event_id": chunk_event_id, + "batch_event_id": batch_event_id, } if base_insertion_event is not None: response_dict["base_insertion_event_id"] = base_insertion_event.event_id diff --git a/synapse/storage/databases/main/event_federation.py b/synapse/storage/databases/main/event_federation.py index 047782eb0..10184d6ae 100644 --- a/synapse/storage/databases/main/event_federation.py +++ b/synapse/storage/databases/main/event_federation.py @@ -1034,13 +1034,13 @@ def _get_backfill_events(self, txn, room_id, event_list, limit): LIMIT ? """ - # Find any chunk connections of a given insertion event - chunk_connection_query = """ + # Find any batch connections of a given insertion event + batch_connection_query = """ SELECT e.depth, c.event_id FROM insertion_events AS i - /* Find the chunk that connects to the given insertion event */ - INNER JOIN chunk_events AS c - ON i.next_chunk_id = c.chunk_id - /* Get the depth of the chunk start event from the events table */ + /* Find the batch that connects to the given insertion event */ + INNER JOIN batch_events AS c + ON i.next_batch_id = c.batch_id + /* Get the depth of the batch start event from the events table */ INNER JOIN events AS e USING (event_id) /* Find an insertion event which matches the given event_id */ WHERE i.event_id = ? @@ -1077,12 +1077,12 @@ def _get_backfill_events(self, txn, room_id, event_list, limit): event_results.add(event_id) - # Try and find any potential historical chunks of message history. + # Try and find any potential historical batches of message history. # # First we look for an insertion event connected to the current # event (by prev_event). If we find any, we need to go and try to - # find any chunk events connected to the insertion event (by - # chunk_id). If we find any, we'll add them to the queue and + # find any batch events connected to the insertion event (by + # batch_id). If we find any, we'll add them to the queue and # navigate up the DAG like normal in the next iteration of the loop. txn.execute( connected_insertion_event_query, (event_id, limit - len(event_results)) @@ -1097,17 +1097,17 @@ def _get_backfill_events(self, txn, room_id, event_list, limit): connected_insertion_event = row[1] queue.put((-connected_insertion_event_depth, connected_insertion_event)) - # Find any chunk connections for the given insertion event + # Find any batch connections for the given insertion event txn.execute( - chunk_connection_query, + batch_connection_query, (connected_insertion_event, limit - len(event_results)), ) - chunk_start_event_id_results = txn.fetchall() + batch_start_event_id_results = txn.fetchall() logger.debug( - "_get_backfill_events: chunk_start_event_id_results %s", - chunk_start_event_id_results, + "_get_backfill_events: batch_start_event_id_results %s", + batch_start_event_id_results, ) - for row in chunk_start_event_id_results: + for row in batch_start_event_id_results: if row[1] not in event_results: queue.put((-row[0], row[1])) diff --git a/synapse/storage/databases/main/events.py b/synapse/storage/databases/main/events.py index dec7e8594..584f818ff 100644 --- a/synapse/storage/databases/main/events.py +++ b/synapse/storage/databases/main/events.py @@ -1509,7 +1509,7 @@ def _update_metadata_tables_txn( self._handle_event_relations(txn, event) self._handle_insertion_event(txn, event) - self._handle_chunk_event(txn, event) + self._handle_batch_event(txn, event) # Store the labels for this event. labels = event.content.get(EventContentFields.LABELS) @@ -1790,23 +1790,23 @@ def _handle_insertion_event(self, txn: LoggingTransaction, event: EventBase): ): return - next_chunk_id = event.content.get(EventContentFields.MSC2716_NEXT_CHUNK_ID) - if next_chunk_id is None: - # Invalid insertion event without next chunk ID + next_batch_id = event.content.get(EventContentFields.MSC2716_NEXT_BATCH_ID) + if next_batch_id is None: + # Invalid insertion event without next batch ID return logger.debug( - "_handle_insertion_event (next_chunk_id=%s) %s", next_chunk_id, event + "_handle_insertion_event (next_batch_id=%s) %s", next_batch_id, event ) - # Keep track of the insertion event and the chunk ID + # Keep track of the insertion event and the batch ID self.db_pool.simple_insert_txn( txn, table="insertion_events", values={ "event_id": event.event_id, "room_id": event.room_id, - "next_chunk_id": next_chunk_id, + "next_batch_id": next_batch_id, }, ) @@ -1822,8 +1822,8 @@ def _handle_insertion_event(self, txn: LoggingTransaction, event: EventBase): }, ) - def _handle_chunk_event(self, txn: LoggingTransaction, event: EventBase): - """Handles inserting the chunk edges/connections between the chunk event + def _handle_batch_event(self, txn: LoggingTransaction, event: EventBase): + """Handles inserting the batch edges/connections between the batch event and an insertion event. Part of MSC2716. Args: @@ -1831,11 +1831,11 @@ def _handle_chunk_event(self, txn: LoggingTransaction, event: EventBase): event: The event to process """ - if event.type != EventTypes.MSC2716_CHUNK: - # Not a chunk event + if event.type != EventTypes.MSC2716_BATCH: + # Not a batch event return - # Skip processing a chunk event if the room version doesn't + # Skip processing a batch event if the room version doesn't # support it or the event is not from the room creator. room_version = self.store.get_room_version_txn(txn, event.room_id) room_creator = self.db_pool.simple_select_one_onecol_txn( @@ -1852,35 +1852,35 @@ def _handle_chunk_event(self, txn: LoggingTransaction, event: EventBase): ): return - chunk_id = event.content.get(EventContentFields.MSC2716_CHUNK_ID) - if chunk_id is None: - # Invalid chunk event without a chunk ID + batch_id = event.content.get(EventContentFields.MSC2716_BATCH_ID) + if batch_id is None: + # Invalid batch event without a batch ID return - logger.debug("_handle_chunk_event chunk_id=%s %s", chunk_id, event) + logger.debug("_handle_batch_event batch_id=%s %s", batch_id, event) - # Keep track of the insertion event and the chunk ID + # Keep track of the insertion event and the batch ID self.db_pool.simple_insert_txn( txn, - table="chunk_events", + table="batch_events", values={ "event_id": event.event_id, "room_id": event.room_id, - "chunk_id": chunk_id, + "batch_id": batch_id, }, ) - # When we receive an event with a `chunk_id` referencing the - # `next_chunk_id` of the insertion event, we can remove it from the + # When we receive an event with a `batch_id` referencing the + # `next_batch_id` of the insertion event, we can remove it from the # `insertion_event_extremities` table. sql = """ DELETE FROM insertion_event_extremities WHERE event_id IN ( SELECT event_id FROM insertion_events - WHERE next_chunk_id = ? + WHERE next_batch_id = ? ) """ - txn.execute(sql, (chunk_id,)) + txn.execute(sql, (batch_id,)) def _handle_redaction(self, txn, redacted_event_id): """Handles receiving a redaction and checking whether we need to remove diff --git a/synapse/storage/databases/main/room_batch.py b/synapse/storage/databases/main/room_batch.py index 54fa361d3..a38338875 100644 --- a/synapse/storage/databases/main/room_batch.py +++ b/synapse/storage/databases/main/room_batch.py @@ -18,11 +18,11 @@ class RoomBatchStore(SQLBaseStore): - async def get_insertion_event_by_chunk_id(self, chunk_id: str) -> Optional[str]: + async def get_insertion_event_by_batch_id(self, batch_id: str) -> Optional[str]: """Retrieve a insertion event ID. Args: - chunk_id: The chunk ID of the insertion event to retrieve. + batch_id: The batch ID of the insertion event to retrieve. Returns: The event_id of an insertion event, or None if there is no known @@ -30,7 +30,7 @@ async def get_insertion_event_by_chunk_id(self, chunk_id: str) -> Optional[str]: """ return await self.db_pool.simple_select_one_onecol( table="insertion_events", - keyvalues={"next_chunk_id": chunk_id}, + keyvalues={"next_batch_id": batch_id}, retcol="event_id", allow_none=True, ) diff --git a/synapse/storage/schema/__init__.py b/synapse/storage/schema/__init__.py index af9cc6994..aa2ce44c6 100644 --- a/synapse/storage/schema/__init__.py +++ b/synapse/storage/schema/__init__.py @@ -14,7 +14,7 @@ # When updating these values, please leave a short summary of the changes below. -SCHEMA_VERSION = 63 +SCHEMA_VERSION = 64 """Represents the expectations made by the codebase about the database schema This should be incremented whenever the codebase changes its requirements on the diff --git a/synapse/storage/schema/main/delta/64/01msc2716_chunk_to_batch_rename.sql.postgres b/synapse/storage/schema/main/delta/64/01msc2716_chunk_to_batch_rename.sql.postgres new file mode 100644 index 000000000..5f3899320 --- /dev/null +++ b/synapse/storage/schema/main/delta/64/01msc2716_chunk_to_batch_rename.sql.postgres @@ -0,0 +1,23 @@ +/* Copyright 2021 The Matrix.org Foundation C.I.C + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +ALTER TABLE insertion_events RENAME COLUMN next_chunk_id TO next_batch_id; +DROP INDEX insertion_events_next_chunk_id; +CREATE INDEX IF NOT EXISTS insertion_events_next_batch_id ON insertion_events(next_batch_id); + +ALTER TABLE chunk_events RENAME TO batch_events; +ALTER TABLE batch_events RENAME COLUMN chunk_id TO batch_id; +DROP INDEX chunk_events_chunk_id; +CREATE INDEX IF NOT EXISTS batch_events_batch_id ON batch_events(batch_id); diff --git a/synapse/storage/schema/main/delta/64/01msc2716_chunk_to_batch_rename.sql.sqlite b/synapse/storage/schema/main/delta/64/01msc2716_chunk_to_batch_rename.sql.sqlite new file mode 100644 index 000000000..498956399 --- /dev/null +++ b/synapse/storage/schema/main/delta/64/01msc2716_chunk_to_batch_rename.sql.sqlite @@ -0,0 +1,37 @@ +/* Copyright 2021 The Matrix.org Foundation C.I.C + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +-- Re-create the insertion_events table since SQLite doesn't support better +-- renames for columns (next_chunk_id -> next_batch_id) +DROP TABLE insertion_events; +CREATE TABLE IF NOT EXISTS insertion_events( + event_id TEXT NOT NULL, + room_id TEXT NOT NULL, + next_batch_id TEXT NOT NULL +); +CREATE UNIQUE INDEX IF NOT EXISTS insertion_events_event_id ON insertion_events(event_id); +CREATE INDEX IF NOT EXISTS insertion_events_next_batch_id ON insertion_events(next_batch_id); + +-- Re-create the chunk_events table since SQLite doesn't support better renames +-- for columns (chunk_id -> batch_id) +DROP TABLE chunk_events; +CREATE TABLE IF NOT EXISTS batch_events( + event_id TEXT NOT NULL, + room_id TEXT NOT NULL, + batch_id TEXT NOT NULL +); + +CREATE UNIQUE INDEX IF NOT EXISTS batch_events_event_id ON batch_events(event_id); +CREATE INDEX IF NOT EXISTS batch_events_batch_id ON batch_events(batch_id); From a2d7195e0111ee6b2fedaabb0f02cfae648cd347 Mon Sep 17 00:00:00 2001 From: David Robertson <davidr@element.io> Date: Wed, 22 Sep 2021 10:59:52 +0100 Subject: [PATCH 25/74] Track why we're evicting from caches (#10829) So we can see distinguish between "evicting because the cache is too big" and "evicting because the cache entries haven't been recently used". --- changelog.d/10829.misc | 1 + synapse/util/caches/__init__.py | 31 +++++++++++++++++++++------- synapse/util/caches/expiringcache.py | 10 ++++----- synapse/util/caches/lrucache.py | 4 ++-- 4 files changed, 31 insertions(+), 15 deletions(-) create mode 100644 changelog.d/10829.misc diff --git a/changelog.d/10829.misc b/changelog.d/10829.misc new file mode 100644 index 000000000..ac5fd6b04 --- /dev/null +++ b/changelog.d/10829.misc @@ -0,0 +1 @@ +Track cache eviction rates more finely in Prometheus' monitoring. \ No newline at end of file diff --git a/synapse/util/caches/__init__.py b/synapse/util/caches/__init__.py index cab1bf0c1..df4d61e4b 100644 --- a/synapse/util/caches/__init__.py +++ b/synapse/util/caches/__init__.py @@ -12,8 +12,10 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. - +import collections import logging +import typing +from enum import Enum, auto from sys import intern from typing import Callable, Dict, Optional, Sized @@ -34,7 +36,7 @@ cache_size = Gauge("synapse_util_caches_cache:size", "", ["name"]) cache_hits = Gauge("synapse_util_caches_cache:hits", "", ["name"]) -cache_evicted = Gauge("synapse_util_caches_cache:evicted_size", "", ["name"]) +cache_evicted = Gauge("synapse_util_caches_cache:evicted_size", "", ["name", "reason"]) cache_total = Gauge("synapse_util_caches_cache:total", "", ["name"]) cache_max_size = Gauge("synapse_util_caches_cache_max_size", "", ["name"]) cache_memory_usage = Gauge( @@ -46,11 +48,16 @@ response_cache_size = Gauge("synapse_util_caches_response_cache:size", "", ["name"]) response_cache_hits = Gauge("synapse_util_caches_response_cache:hits", "", ["name"]) response_cache_evicted = Gauge( - "synapse_util_caches_response_cache:evicted_size", "", ["name"] + "synapse_util_caches_response_cache:evicted_size", "", ["name", "reason"] ) response_cache_total = Gauge("synapse_util_caches_response_cache:total", "", ["name"]) +class EvictionReason(Enum): + size = auto() + time = auto() + + @attr.s(slots=True) class CacheMetric: @@ -61,7 +68,9 @@ class CacheMetric: hits = attr.ib(default=0) misses = attr.ib(default=0) - evicted_size = attr.ib(default=0) + eviction_size_by_reason: typing.Counter[EvictionReason] = attr.ib( + factory=collections.Counter + ) memory_usage = attr.ib(default=None) def inc_hits(self) -> None: @@ -70,8 +79,8 @@ def inc_hits(self) -> None: def inc_misses(self) -> None: self.misses += 1 - def inc_evictions(self, size: int = 1) -> None: - self.evicted_size += size + def inc_evictions(self, reason: EvictionReason, size: int = 1) -> None: + self.eviction_size_by_reason[reason] += size def inc_memory_usage(self, memory: int) -> None: if self.memory_usage is None: @@ -94,14 +103,20 @@ def collect(self) -> None: if self._cache_type == "response_cache": response_cache_size.labels(self._cache_name).set(len(self._cache)) response_cache_hits.labels(self._cache_name).set(self.hits) - response_cache_evicted.labels(self._cache_name).set(self.evicted_size) + for reason in EvictionReason: + response_cache_evicted.labels(self._cache_name, reason.name).set( + self.eviction_size_by_reason[reason] + ) response_cache_total.labels(self._cache_name).set( self.hits + self.misses ) else: cache_size.labels(self._cache_name).set(len(self._cache)) cache_hits.labels(self._cache_name).set(self.hits) - cache_evicted.labels(self._cache_name).set(self.evicted_size) + for reason in EvictionReason: + cache_evicted.labels(self._cache_name, reason.name).set( + self.eviction_size_by_reason[reason] + ) cache_total.labels(self._cache_name).set(self.hits + self.misses) if getattr(self._cache, "max_size", None): cache_max_size.labels(self._cache_name).set(self._cache.max_size) diff --git a/synapse/util/caches/expiringcache.py b/synapse/util/caches/expiringcache.py index bde16b857..c3f72aa06 100644 --- a/synapse/util/caches/expiringcache.py +++ b/synapse/util/caches/expiringcache.py @@ -22,7 +22,7 @@ from synapse.config import cache as cache_config from synapse.metrics.background_process_metrics import run_as_background_process from synapse.util import Clock -from synapse.util.caches import register_cache +from synapse.util.caches import EvictionReason, register_cache logger = logging.getLogger(__name__) @@ -98,9 +98,9 @@ def evict(self) -> None: while self._max_size and len(self) > self._max_size: _key, value = self._cache.popitem(last=False) if self.iterable: - self.metrics.inc_evictions(len(value.value)) + self.metrics.inc_evictions(EvictionReason.size, len(value.value)) else: - self.metrics.inc_evictions() + self.metrics.inc_evictions(EvictionReason.size) def __getitem__(self, key: KT) -> VT: try: @@ -175,9 +175,9 @@ def _prune_cache(self) -> None: for k in keys_to_delete: value = self._cache.pop(k) if self.iterable: - self.metrics.inc_evictions(len(value.value)) + self.metrics.inc_evictions(EvictionReason.time, len(value.value)) else: - self.metrics.inc_evictions() + self.metrics.inc_evictions(EvictionReason.time) logger.debug( "[%s] _prune_cache before: %d, after len: %d", diff --git a/synapse/util/caches/lrucache.py b/synapse/util/caches/lrucache.py index 39dce9dd4..ea6e8dc8d 100644 --- a/synapse/util/caches/lrucache.py +++ b/synapse/util/caches/lrucache.py @@ -40,7 +40,7 @@ from synapse.config import cache as cache_config from synapse.metrics.background_process_metrics import wrap_as_background_process from synapse.util import Clock, caches -from synapse.util.caches import CacheMetric, register_cache +from synapse.util.caches import CacheMetric, EvictionReason, register_cache from synapse.util.caches.treecache import TreeCache, iterate_tree_cache_entry from synapse.util.linked_list import ListNode @@ -403,7 +403,7 @@ def evict() -> None: evicted_len = delete_node(node) cache.pop(node.key, None) if metrics: - metrics.inc_evictions(evicted_len) + metrics.inc_evictions(EvictionReason.size, evicted_len) def synchronized(f: FT) -> FT: @wraps(f) From 4ecf51812ebf4cbacd3c6042aa29cb37b7855da3 Mon Sep 17 00:00:00 2001 From: Richard van der Hoff <1389908+richvdh@users.noreply.github.com> Date: Wed, 22 Sep 2021 12:30:59 +0100 Subject: [PATCH 26/74] Include outlier status in `str(event)` for V2/V3 events (#10879) I meant to do this before, in #10591, but because I'm stupid I forgot to do it for V2 and V3 events. I've factored the common code out to `EventBase` to save us having two copies of it. This means that for `FrozenEvent` we replace `self.get("event_id", None)` with `self.event_id`, which I think is safe. `get()` is an alias for `self._dict.get()`, whereas `event_id()` is an `@property` method which looks up `self._event_id`, which is populated during construction from the same dict. We don't seem to rely on the fallback, because if the `event_id` key is absent from the dict then construction of the `EventBase` object will fail. Long story short, the only way this could change behaviour is if `event_dict["event_id"]` is changed *after* the `EventBase` object is constructed without updating the `_event_id` field, or vice versa - either of which would be very problematic anyway and the behavior of `str(event)` is the least of our worries. --- changelog.d/10879.misc | 1 + synapse/events/__init__.py | 34 ++++++++++++---------------------- 2 files changed, 13 insertions(+), 22 deletions(-) create mode 100644 changelog.d/10879.misc diff --git a/changelog.d/10879.misc b/changelog.d/10879.misc new file mode 100644 index 000000000..acc04930f --- /dev/null +++ b/changelog.d/10879.misc @@ -0,0 +1 @@ +Include outlier status when we log V2 or V3 events. diff --git a/synapse/events/__init__.py b/synapse/events/__init__.py index a730c1719..49190459c 100644 --- a/synapse/events/__init__.py +++ b/synapse/events/__init__.py @@ -344,6 +344,18 @@ def freeze(self): # this will be a no-op if the event dict is already frozen. self._dict = freeze(self._dict) + def __str__(self): + return self.__repr__() + + def __repr__(self): + return "<%s event_id=%r, type=%r, state_key=%r, outlier=%s>" % ( + self.__class__.__name__, + self.event_id, + self.get("type", None), + self.get("state_key", None), + self.internal_metadata.is_outlier(), + ) + class FrozenEvent(EventBase): format_version = EventFormatVersions.V1 # All events of this type are V1 @@ -392,17 +404,6 @@ def __init__( def event_id(self) -> str: return self._event_id - def __str__(self): - return self.__repr__() - - def __repr__(self): - return "<FrozenEvent event_id=%r, type=%r, state_key=%r, outlier=%s>" % ( - self.get("event_id", None), - self.get("type", None), - self.get("state_key", None), - self.internal_metadata.is_outlier(), - ) - class FrozenEventV2(EventBase): format_version = EventFormatVersions.V2 # All events of this type are V2 @@ -478,17 +479,6 @@ def auth_event_ids(self): """ return self.auth_events - def __str__(self): - return self.__repr__() - - def __repr__(self): - return "<%s event_id=%r, type=%r, state_key=%r>" % ( - self.__class__.__name__, - self.event_id, - self.get("type", None), - self.get("state_key", None), - ) - class FrozenEventV3(FrozenEventV2): """FrozenEventV3, which differs from FrozenEventV2 only in the event_id format""" From 80828eda06f8e3d6a930c9fa45204ad6fef1d411 Mon Sep 17 00:00:00 2001 From: David Teller <D.O.Teller@gmail.com> Date: Wed, 22 Sep 2021 15:09:43 +0200 Subject: [PATCH 27/74] =?UTF-8?q?Extend=20ModuleApi=20with=20the=20methods?= =?UTF-8?q?=20we'll=20need=20to=20reject=20spam=20based=20on=20=E2=80=A6IP?= =?UTF-8?q?=20-=20resolves=20#10832=20(#10833)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Extend ModuleApi with the methods we'll need to reject spam based on IP - resolves #10832 Signed-off-by: David Teller <davidt@element.io> --- changelog.d/10833.misc | 1 + synapse/module_api/__init__.py | 82 +++++++++++++++++++- synapse/storage/databases/main/client_ips.py | 27 +++++-- tests/module_api/test_api.py | 72 +++++++++++++++++ 4 files changed, 174 insertions(+), 8 deletions(-) create mode 100644 changelog.d/10833.misc diff --git a/changelog.d/10833.misc b/changelog.d/10833.misc new file mode 100644 index 000000000..f23c0a1a0 --- /dev/null +++ b/changelog.d/10833.misc @@ -0,0 +1 @@ +Extend the ModuleApi to let plug-ins check whether an ID is local and to access IP + User Agent data. diff --git a/synapse/module_api/__init__.py b/synapse/module_api/__init__.py index 3196c2bec..174e6934a 100644 --- a/synapse/module_api/__init__.py +++ b/synapse/module_api/__init__.py @@ -24,8 +24,10 @@ List, Optional, Tuple, + Union, ) +import attr import jinja2 from twisted.internet import defer @@ -46,7 +48,14 @@ from synapse.storage.database import DatabasePool, LoggingTransaction from synapse.storage.databases.main.roommember import ProfileInfo from synapse.storage.state import StateFilter -from synapse.types import JsonDict, Requester, UserID, UserInfo, create_requester +from synapse.types import ( + DomainSpecificString, + JsonDict, + Requester, + UserID, + UserInfo, + create_requester, +) from synapse.util import Clock from synapse.util.caches.descriptors import cached @@ -79,6 +88,18 @@ logger = logging.getLogger(__name__) +@attr.s(auto_attribs=True) +class UserIpAndAgent: + """ + An IP address and user agent used by a user to connect to this homeserver. + """ + + ip: str + user_agent: str + # The time at which this user agent/ip was last seen. + last_seen: int + + class ModuleApi: """A proxy object that gets passed to various plugin modules so they can register new users etc if necessary. @@ -700,6 +721,65 @@ def read_templates( (td for td in (self.custom_template_dir, custom_template_directory) if td), ) + def is_mine(self, id: Union[str, DomainSpecificString]) -> bool: + """ + Checks whether an ID (user id, room, ...) comes from this homeserver. + + Args: + id: any Matrix id (e.g. user id, room id, ...), either as a raw id, + e.g. string "@user:example.com" or as a parsed UserID, RoomID, ... + Returns: + True if id comes from this homeserver, False otherwise. + + Added in Synapse v1.44.0. + """ + if isinstance(id, DomainSpecificString): + return self._hs.is_mine(id) + else: + return self._hs.is_mine_id(id) + + async def get_user_ip_and_agents( + self, user_id: str, since_ts: int = 0 + ) -> List[UserIpAndAgent]: + """ + Return the list of user IPs and agents for a user. + + Args: + user_id: the id of a user, local or remote + since_ts: a timestamp in seconds since the epoch, + or the epoch itself if not specified. + Returns: + The list of all UserIpAndAgent that the user has + used to connect to this homeserver since `since_ts`. + If the user is remote, this list is empty. + + Added in Synapse v1.44.0. + """ + # Don't hit the db if this is not a local user. + is_mine = False + try: + # Let's be defensive against ill-formed strings. + if self.is_mine(user_id): + is_mine = True + except Exception: + pass + + if is_mine: + raw_data = await self._store.get_user_ip_and_agents( + UserID.from_string(user_id), since_ts + ) + # Sanitize some of the data. We don't want to return tokens. + return [ + UserIpAndAgent( + ip=str(data["ip"]), + user_agent=str(data["user_agent"]), + last_seen=int(data["last_seen"]), + ) + for data in raw_data + ] + else: + return [] + class PublicRoomListManager: """Contains methods for adding to, removing from and querying whether a room diff --git a/synapse/storage/databases/main/client_ips.py b/synapse/storage/databases/main/client_ips.py index 7a98275d9..7e33ae578 100644 --- a/synapse/storage/databases/main/client_ips.py +++ b/synapse/storage/databases/main/client_ips.py @@ -555,8 +555,11 @@ async def get_last_client_ip_by_device( return ret async def get_user_ip_and_agents( - self, user: UserID + self, user: UserID, since_ts: int = 0 ) -> List[Dict[str, Union[str, int]]]: + """ + Fetch IP/User Agent connection since a given timestamp. + """ user_id = user.to_string() results = {} @@ -568,13 +571,23 @@ async def get_user_ip_and_agents( ) = key if uid == user_id: user_agent, _, last_seen = self._batch_row_update[key] - results[(access_token, ip)] = (user_agent, last_seen) + if last_seen >= since_ts: + results[(access_token, ip)] = (user_agent, last_seen) - rows = await self.db_pool.simple_select_list( - table="user_ips", - keyvalues={"user_id": user_id}, - retcols=["access_token", "ip", "user_agent", "last_seen"], - desc="get_user_ip_and_agents", + def get_recent(txn): + txn.execute( + """ + SELECT access_token, ip, user_agent, last_seen FROM user_ips + WHERE last_seen >= ? AND user_id = ? + ORDER BY last_seen + DESC + """, + (since_ts, user_id), + ) + return txn.fetchall() + + rows = await self.db_pool.runInteraction( + desc="get_user_ip_and_agents", func=get_recent ) results.update( diff --git a/tests/module_api/test_api.py b/tests/module_api/test_api.py index 7dd519cd4..9d38974fb 100644 --- a/tests/module_api/test_api.py +++ b/tests/module_api/test_api.py @@ -43,6 +43,7 @@ def prepare(self, reactor, clock, homeserver): self.module_api = homeserver.get_module_api() self.event_creation_handler = homeserver.get_event_creation_handler() self.sync_handler = homeserver.get_sync_handler() + self.auth_handler = homeserver.get_auth_handler() def make_homeserver(self, reactor, clock): return self.setup_test_homeserver( @@ -89,6 +90,77 @@ def test_get_userinfo_by_id__no_user_found(self): found_user = self.get_success(self.module_api.get_userinfo_by_id("@alice:test")) self.assertIsNone(found_user) + def test_get_user_ip_and_agents(self): + user_id = self.register_user("test_get_user_ip_and_agents_user", "1234") + + # Initially, we should have no ip/agent for our user. + info = self.get_success(self.module_api.get_user_ip_and_agents(user_id)) + self.assertEqual(info, []) + + # Insert a first ip, agent. We should be able to retrieve it. + self.get_success( + self.store.insert_client_ip( + user_id, "access_token", "ip_1", "user_agent_1", "device_1", None + ) + ) + info = self.get_success(self.module_api.get_user_ip_and_agents(user_id)) + + self.assertEqual(len(info), 1) + last_seen_1 = info[0].last_seen + + # Insert a second ip, agent at a later date. We should be able to retrieve it. + last_seen_2 = last_seen_1 + 10000 + print("%s => %s" % (last_seen_1, last_seen_2)) + self.get_success( + self.store.insert_client_ip( + user_id, "access_token", "ip_2", "user_agent_2", "device_2", last_seen_2 + ) + ) + info = self.get_success(self.module_api.get_user_ip_and_agents(user_id)) + + self.assertEqual(len(info), 2) + ip_1_seen = False + ip_2_seen = False + + for i in info: + if i.ip == "ip_1": + ip_1_seen = True + self.assertEqual(i.user_agent, "user_agent_1") + self.assertEqual(i.last_seen, last_seen_1) + elif i.ip == "ip_2": + ip_2_seen = True + self.assertEqual(i.user_agent, "user_agent_2") + self.assertEqual(i.last_seen, last_seen_2) + self.assertTrue(ip_1_seen) + self.assertTrue(ip_2_seen) + + # If we fetch from a midpoint between last_seen_1 and last_seen_2, + # we should only find the second ip, agent. + info = self.get_success( + self.module_api.get_user_ip_and_agents( + user_id, (last_seen_1 + last_seen_2) / 2 + ) + ) + self.assertEqual(len(info), 1) + self.assertEqual(info[0].ip, "ip_2") + self.assertEqual(info[0].user_agent, "user_agent_2") + self.assertEqual(info[0].last_seen, last_seen_2) + + # If we fetch from a point later than last_seen_2, we shouldn't + # find anything. + info = self.get_success( + self.module_api.get_user_ip_and_agents(user_id, last_seen_2 + 10000) + ) + self.assertEqual(info, []) + + def test_get_user_ip_and_agents__no_user_found(self): + info = self.get_success( + self.module_api.get_user_ip_and_agents( + "@test_get_user_ip_and_agents_user_nonexistent:example.com" + ) + ) + self.assertEqual(info, []) + def test_sending_events_into_room(self): """Tests that a module can send events into a room""" # Mock out create_and_send_nonmember_event to check whether events are being sent From 724aef9a878cebc137c81f3b261bafb9302fb592 Mon Sep 17 00:00:00 2001 From: David Robertson <davidr@element.io> Date: Wed, 22 Sep 2021 14:21:58 +0100 Subject: [PATCH 28/74] Opt out of cache expiry for `get_users_who_share_room_with_user` (#10826) * Allow LruCaches to opt out of time-based expiry * Don't expire `get_users_who_share_room` & friends --- changelog.d/10826.misc | 2 ++ synapse/storage/databases/main/roommember.py | 11 ++++++++--- synapse/util/caches/deferred_cache.py | 2 ++ synapse/util/caches/descriptors.py | 5 +++++ synapse/util/caches/lrucache.py | 16 +++++++++++++--- 5 files changed, 30 insertions(+), 6 deletions(-) create mode 100644 changelog.d/10826.misc diff --git a/changelog.d/10826.misc b/changelog.d/10826.misc new file mode 100644 index 000000000..53e56fc36 --- /dev/null +++ b/changelog.d/10826.misc @@ -0,0 +1,2 @@ +Opt out of cache expiry for `get_users_who_share_room_with_user`, to hopefully improve `/sync` performance when you +haven't synced recently. diff --git a/synapse/storage/databases/main/roommember.py b/synapse/storage/databases/main/roommember.py index 9beeb96aa..a4ec6bc32 100644 --- a/synapse/storage/databases/main/roommember.py +++ b/synapse/storage/databases/main/roommember.py @@ -162,7 +162,7 @@ def _check_safe_current_state_events_membership_updated_txn(self, txn): self._check_safe_current_state_events_membership_updated_txn, ) - @cached(max_entries=100000, iterable=True) + @cached(max_entries=100000, iterable=True, prune_unread_entries=False) async def get_users_in_room(self, room_id: str) -> List[str]: return await self.db_pool.runInteraction( "get_users_in_room", self.get_users_in_room_txn, room_id @@ -439,7 +439,7 @@ async def get_local_current_membership_for_user_in_room( return results_dict.get("membership"), results_dict.get("event_id") - @cached(max_entries=500000, iterable=True) + @cached(max_entries=500000, iterable=True, prune_unread_entries=False) async def get_rooms_for_user_with_stream_ordering( self, user_id: str ) -> FrozenSet[GetRoomsForUserWithStreamOrdering]: @@ -544,7 +544,12 @@ async def get_rooms_for_user( ) return frozenset(r.room_id for r in rooms) - @cached(max_entries=500000, cache_context=True, iterable=True) + @cached( + max_entries=500000, + cache_context=True, + iterable=True, + prune_unread_entries=False, + ) async def get_users_who_share_room_with_user( self, user_id: str, cache_context: _CacheContext ) -> Set[str]: diff --git a/synapse/util/caches/deferred_cache.py b/synapse/util/caches/deferred_cache.py index f05590da0..6262efe07 100644 --- a/synapse/util/caches/deferred_cache.py +++ b/synapse/util/caches/deferred_cache.py @@ -73,6 +73,7 @@ def __init__( tree: bool = False, iterable: bool = False, apply_cache_factor_from_config: bool = True, + prune_unread_entries: bool = True, ): """ Args: @@ -105,6 +106,7 @@ def metrics_cb() -> None: size_callback=(lambda d: len(d) or 1) if iterable else None, metrics_collection_callback=metrics_cb, apply_cache_factor_from_config=apply_cache_factor_from_config, + prune_unread_entries=prune_unread_entries, ) self.thread: Optional[threading.Thread] = None diff --git a/synapse/util/caches/descriptors.py b/synapse/util/caches/descriptors.py index 1ca31e41a..b9dcca17f 100644 --- a/synapse/util/caches/descriptors.py +++ b/synapse/util/caches/descriptors.py @@ -258,6 +258,7 @@ def __init__( tree=False, cache_context=False, iterable=False, + prune_unread_entries: bool = True, ): super().__init__(orig, num_args=num_args, cache_context=cache_context) @@ -269,6 +270,7 @@ def __init__( self.max_entries = max_entries self.tree = tree self.iterable = iterable + self.prune_unread_entries = prune_unread_entries def __get__(self, obj, owner): cache: DeferredCache[CacheKey, Any] = DeferredCache( @@ -276,6 +278,7 @@ def __get__(self, obj, owner): max_entries=self.max_entries, tree=self.tree, iterable=self.iterable, + prune_unread_entries=self.prune_unread_entries, ) get_cache_key = self.cache_key_builder @@ -507,6 +510,7 @@ def cached( tree: bool = False, cache_context: bool = False, iterable: bool = False, + prune_unread_entries: bool = True, ) -> Callable[[F], _CachedFunction[F]]: func = lambda orig: DeferredCacheDescriptor( orig, @@ -515,6 +519,7 @@ def cached( tree=tree, cache_context=cache_context, iterable=iterable, + prune_unread_entries=prune_unread_entries, ) return cast(Callable[[F], _CachedFunction[F]], func) diff --git a/synapse/util/caches/lrucache.py b/synapse/util/caches/lrucache.py index ea6e8dc8d..4ff62b403 100644 --- a/synapse/util/caches/lrucache.py +++ b/synapse/util/caches/lrucache.py @@ -202,10 +202,11 @@ def __init__( cache: "weakref.ReferenceType[LruCache]", clock: Clock, callbacks: Collection[Callable[[], None]] = (), + prune_unread_entries: bool = True, ): self._list_node = ListNode.insert_after(self, root) - self._global_list_node = None - if USE_GLOBAL_LIST: + self._global_list_node: Optional[_TimedListNode] = None + if USE_GLOBAL_LIST and prune_unread_entries: self._global_list_node = _TimedListNode.insert_after(self, GLOBAL_ROOT) self._global_list_node.update_last_access(clock) @@ -314,6 +315,7 @@ def __init__( metrics_collection_callback: Optional[Callable[[], None]] = None, apply_cache_factor_from_config: bool = True, clock: Optional[Clock] = None, + prune_unread_entries: bool = True, ): """ Args: @@ -427,7 +429,15 @@ def cache_len(): self.len = synchronized(cache_len) def add_node(key, value, callbacks: Collection[Callable[[], None]] = ()): - node = _Node(list_root, key, value, weak_ref_to_self, real_clock, callbacks) + node = _Node( + list_root, + key, + value, + weak_ref_to_self, + real_clock, + callbacks, + prune_unread_entries, + ) cache[key] = node if size_callback: From 52913d56a5a2b07106774d97f4e188148d85a900 Mon Sep 17 00:00:00 2001 From: Patrick Cloke <clokep@users.noreply.github.com> Date: Wed, 22 Sep 2021 09:41:42 -0400 Subject: [PATCH 29/74] Add documentation for experimental feature flags. (#10865) --- changelog.d/10865.doc | 1 + docs/SUMMARY.md | 1 + docs/development/experimental_features.md | 37 +++++++++++++++++++++++ 3 files changed, 39 insertions(+) create mode 100644 changelog.d/10865.doc create mode 100644 docs/development/experimental_features.md diff --git a/changelog.d/10865.doc b/changelog.d/10865.doc new file mode 100644 index 000000000..deeb0eedf --- /dev/null +++ b/changelog.d/10865.doc @@ -0,0 +1 @@ +Add developer documentation about experimental configuration flags. diff --git a/docs/SUMMARY.md b/docs/SUMMARY.md index fd0045e1e..bdb44543b 100644 --- a/docs/SUMMARY.md +++ b/docs/SUMMARY.md @@ -74,6 +74,7 @@ - [Testing]() - [OpenTracing](opentracing.md) - [Database Schemas](development/database_schema.md) + - [Experimental features](development/experimental_features.md) - [Synapse Architecture]() - [Log Contexts](log_contexts.md) - [Replication](replication.md) diff --git a/docs/development/experimental_features.md b/docs/development/experimental_features.md new file mode 100644 index 000000000..d6b11496c --- /dev/null +++ b/docs/development/experimental_features.md @@ -0,0 +1,37 @@ +# Implementing experimental features in Synapse + +It can be desirable to implement "experimental" features which are disabled by +default and must be explicitly enabled via the Synapse configuration. This is +applicable for features which: + +* Are unstable in the Matrix spec (e.g. those defined by an MSC that has not yet been merged). +* Developers are not confident in their use by general Synapse administrators/users + (e.g. a feature is incomplete, buggy, performs poorly, or needs further testing). + +Note that this only really applies to features which are expected to be desirable +to a broad audience. The [module infrastructure](../modules/index.md) should +instead be investigated for non-standard features. + +Guarding experimental features behind configuration flags should help with some +of the following scenarios: + +* Ensure that clients do not assume that unstable features exist (failing + gracefully if they do not). +* Unstable features do not become de-facto standards and can be removed + aggressively (since only those who have opted-in will be affected). +* Ease finding the implementation of unstable features in Synapse (for future + removal or stabilization). +* Ease testing a feature (or removal of feature) due to enabling/disabling without + code changes. It also becomes possible to ask for wider testing, if desired. + +Experimental configuration flags should be disabled by default (requiring Synapse +administrators to explicitly opt-in), although there are situations where it makes +sense (from a product point-of-view) to enable features by default. This is +expected and not an issue. + +It is not a requirement for experimental features to be behind a configuration flag, +but one should be used if unsure. + +New experimental configuration flags should be added under the `experimental` +configuration key (see the `synapse.config.experimental` file) and either explain +(briefly) what is being enabled, or include the MSC number. From 9391de3f373454aeec5b5c2f01b3c576528e76fe Mon Sep 17 00:00:00 2001 From: Sean Quah <8349537+squahtx@users.noreply.github.com> Date: Wed, 22 Sep 2021 14:43:26 +0100 Subject: [PATCH 30/74] Fix /initialSync error due to unhashable `RoomStreamToken` (#10827) The deprecated /initialSync endpoint maintains a cache of responses, using parameter values as part of the cache key. When a `from` or `to` parameter is specified, it gets converted into a `StreamToken`, which contains a `RoomStreamToken` and forms part of the cache key. `RoomStreamToken`s need to be made hashable for this to work. --- changelog.d/10827.bugfix | 1 + synapse/storage/databases/main/stream.py | 4 +++- synapse/types.py | 20 +++++++++++++++----- 3 files changed, 19 insertions(+), 6 deletions(-) create mode 100644 changelog.d/10827.bugfix diff --git a/changelog.d/10827.bugfix b/changelog.d/10827.bugfix new file mode 100644 index 000000000..11a618bf8 --- /dev/null +++ b/changelog.d/10827.bugfix @@ -0,0 +1 @@ +Fix error in deprecated `/initialSync` endpoint when using the undocumented `from` and `to` parameters. diff --git a/synapse/storage/databases/main/stream.py b/synapse/storage/databases/main/stream.py index 959f13de4..9a3b6f4ac 100644 --- a/synapse/storage/databases/main/stream.py +++ b/synapse/storage/databases/main/stream.py @@ -39,6 +39,8 @@ from collections import namedtuple from typing import TYPE_CHECKING, Collection, Dict, List, Optional, Set, Tuple +from frozendict import frozendict + from twisted.internet import defer from synapse.api.filtering import Filter @@ -379,7 +381,7 @@ def get_room_max_token(self) -> RoomStreamToken: if p > min_pos } - return RoomStreamToken(None, min_pos, positions) + return RoomStreamToken(None, min_pos, frozendict(positions)) async def get_room_events_stream_for_rooms( self, diff --git a/synapse/types.py b/synapse/types.py index 90168ce8f..ed831a5c1 100644 --- a/synapse/types.py +++ b/synapse/types.py @@ -30,6 +30,7 @@ ) import attr +from frozendict import frozendict from signedjson.key import decode_verify_key_bytes from unpaddedbase64 import decode_base64 from zope.interface import Interface @@ -457,6 +458,9 @@ class RoomStreamToken: Note: The `RoomStreamToken` cannot have both a topological part and an instance map. + + For caching purposes, `RoomStreamToken`s and by extension, all their + attributes, must be hashable. """ topological = attr.ib( @@ -466,12 +470,12 @@ class RoomStreamToken: stream = attr.ib(type=int, validator=attr.validators.instance_of(int)) instance_map = attr.ib( - type=Dict[str, int], - factory=dict, + type="frozendict[str, int]", + factory=frozendict, validator=attr.validators.deep_mapping( key_validator=attr.validators.instance_of(str), value_validator=attr.validators.instance_of(int), - mapping_validator=attr.validators.instance_of(dict), + mapping_validator=attr.validators.instance_of(frozendict), ), ) @@ -507,7 +511,7 @@ async def parse(cls, store: "DataStore", string: str) -> "RoomStreamToken": return cls( topological=None, stream=stream, - instance_map=instance_map, + instance_map=frozendict(instance_map), ) except Exception: pass @@ -540,7 +544,7 @@ def copy_and_advance(self, other: "RoomStreamToken") -> "RoomStreamToken": for instance in set(self.instance_map).union(other.instance_map) } - return RoomStreamToken(None, max_stream, instance_map) + return RoomStreamToken(None, max_stream, frozendict(instance_map)) def as_historical_tuple(self) -> Tuple[int, int]: """Returns a tuple of `(topological, stream)` for historical tokens. @@ -593,6 +597,12 @@ async def to_string(self, store: "DataStore") -> str: @attr.s(slots=True, frozen=True) class StreamToken: + """A collection of positions within multiple streams. + + For caching purposes, `StreamToken`s and by extension, all their attributes, + must be hashable. + """ + room_key = attr.ib( type=RoomStreamToken, validator=attr.validators.instance_of(RoomStreamToken) ) From 6fc8be9a1b2046e69e8c6f731442887e3addeec0 Mon Sep 17 00:00:00 2001 From: Patrick Cloke <clokep@users.noreply.github.com> Date: Wed, 22 Sep 2021 09:45:20 -0400 Subject: [PATCH 31/74] Include more information in oEmbed previews. (#10819) * Improved titles (fall back to the author name if there's not title) and include the site name. * Handle photo/video payloads. * Include the original URL in the Open Graph response. * Fix the expiration time (by properly converting from seconds to milliseconds). --- changelog.d/10819.feature | 1 + synapse/rest/media/v1/oembed.py | 49 +++++++++++++++++-- synapse/rest/media/v1/preview_url_resource.py | 2 +- tests/rest/media/v1/test_url_preview.py | 30 ++++++++---- 4 files changed, 68 insertions(+), 14 deletions(-) create mode 100644 changelog.d/10819.feature diff --git a/changelog.d/10819.feature b/changelog.d/10819.feature new file mode 100644 index 000000000..4fa95a6cc --- /dev/null +++ b/changelog.d/10819.feature @@ -0,0 +1 @@ +Improve oEmbed previews by processing the author name, photo, and video information. diff --git a/synapse/rest/media/v1/oembed.py b/synapse/rest/media/v1/oembed.py index 8b74e7265..e04671fb9 100644 --- a/synapse/rest/media/v1/oembed.py +++ b/synapse/rest/media/v1/oembed.py @@ -13,7 +13,7 @@ # limitations under the License. import logging import urllib.parse -from typing import TYPE_CHECKING, Optional +from typing import TYPE_CHECKING, List, Optional import attr @@ -22,6 +22,8 @@ from synapse.util import json_decoder if TYPE_CHECKING: + from lxml import etree + from synapse.server import HomeServer logger = logging.getLogger(__name__) @@ -31,7 +33,7 @@ class OEmbedResult: # The Open Graph result (converted from the oEmbed result). open_graph_result: JsonDict - # Number of seconds to cache the content, according to the oEmbed response. + # Number of milliseconds to cache the content, according to the oEmbed response. # # This will be None if no cache-age is provided in the oEmbed response (or # if the oEmbed response cannot be turned into an Open Graph response). @@ -119,10 +121,22 @@ def parse_oembed_response(self, url: str, raw_body: bytes) -> OEmbedResult: # Ensure the cache age is None or an int. cache_age = oembed.get("cache_age") if cache_age: - cache_age = int(cache_age) + cache_age = int(cache_age) * 1000 # The results. - open_graph_response = {"og:title": oembed.get("title")} + open_graph_response = { + "og:url": url, + } + + # Use either title or author's name as the title. + title = oembed.get("title") or oembed.get("author_name") + if title: + open_graph_response["og:title"] = title + + # Use the provider name and as the site. + provider_name = oembed.get("provider_name") + if provider_name: + open_graph_response["og:site_name"] = provider_name # If a thumbnail exists, use it. Note that dimensions will be calculated later. if "thumbnail_url" in oembed: @@ -137,6 +151,15 @@ def parse_oembed_response(self, url: str, raw_body: bytes) -> OEmbedResult: # If this is a photo, use the full image, not the thumbnail. open_graph_response["og:image"] = oembed["url"] + elif oembed_type == "video": + open_graph_response["og:type"] = "video.other" + calc_description_and_urls(open_graph_response, oembed["html"]) + open_graph_response["og:video:width"] = oembed["width"] + open_graph_response["og:video:height"] = oembed["height"] + + elif oembed_type == "link": + open_graph_response["og:type"] = "website" + else: raise RuntimeError(f"Unknown oEmbed type: {oembed_type}") @@ -149,6 +172,14 @@ def parse_oembed_response(self, url: str, raw_body: bytes) -> OEmbedResult: return OEmbedResult(open_graph_response, cache_age) +def _fetch_urls(tree: "etree.Element", tag_name: str) -> List[str]: + results = [] + for tag in tree.xpath("//*/" + tag_name): + if "src" in tag.attrib: + results.append(tag.attrib["src"]) + return results + + def calc_description_and_urls(open_graph_response: JsonDict, html_body: str) -> None: """ Calculate description for an HTML document. @@ -179,6 +210,16 @@ def calc_description_and_urls(open_graph_response: JsonDict, html_body: str) -> if tree is None: return + # Attempt to find interesting URLs (images, videos, embeds). + if "og:image" not in open_graph_response: + image_urls = _fetch_urls(tree, "img") + if image_urls: + open_graph_response["og:image"] = image_urls[0] + + video_urls = _fetch_urls(tree, "video") + _fetch_urls(tree, "embed") + if video_urls: + open_graph_response["og:video"] = video_urls[0] + from synapse.rest.media.v1.preview_url_resource import _calc_description description = _calc_description(tree) diff --git a/synapse/rest/media/v1/preview_url_resource.py b/synapse/rest/media/v1/preview_url_resource.py index 0a0b476d2..9ffa983fb 100644 --- a/synapse/rest/media/v1/preview_url_resource.py +++ b/synapse/rest/media/v1/preview_url_resource.py @@ -305,7 +305,7 @@ async def _do_preview(self, url: str, user: str, ts: int) -> bytes: with open(media_info.filename, "rb") as file: body = file.read() - oembed_response = self._oembed.parse_oembed_response(media_info.uri, body) + oembed_response = self._oembed.parse_oembed_response(url, body) og = oembed_response.open_graph_result # Use the cache age from the oEmbed result, instead of the HTTP response. diff --git a/tests/rest/media/v1/test_url_preview.py b/tests/rest/media/v1/test_url_preview.py index 9d1389958..d83dfacfe 100644 --- a/tests/rest/media/v1/test_url_preview.py +++ b/tests/rest/media/v1/test_url_preview.py @@ -620,11 +620,12 @@ def test_oembed_photo(self): self.assertIn(b"/matrixdotorg", server.data) self.assertEqual(channel.code, 200) - self.assertIsNone(channel.json_body["og:title"]) - self.assertTrue(channel.json_body["og:image"].startswith("mxc://")) - self.assertEqual(channel.json_body["og:image:height"], 1) - self.assertEqual(channel.json_body["og:image:width"], 1) - self.assertEqual(channel.json_body["og:image:type"], "image/png") + body = channel.json_body + self.assertEqual(body["og:url"], "http://twitter.com/matrixdotorg/status/12345") + self.assertTrue(body["og:image"].startswith("mxc://")) + self.assertEqual(body["og:image:height"], 1) + self.assertEqual(body["og:image:width"], 1) + self.assertEqual(body["og:image:type"], "image/png") def test_oembed_rich(self): """Test an oEmbed endpoint which returns HTML content via the 'rich' type.""" @@ -633,6 +634,8 @@ def test_oembed_rich(self): result = { "version": "1.0", "type": "rich", + # Note that this provides the author, not the title. + "author_name": "Alice", "html": "<div>Content Preview</div>", } end_content = json.dumps(result).encode("utf-8") @@ -660,9 +663,14 @@ def test_oembed_rich(self): self.pump() self.assertEqual(channel.code, 200) + body = channel.json_body self.assertEqual( - channel.json_body, - {"og:title": None, "og:description": "Content Preview"}, + body, + { + "og:url": "http://twitter.com/matrixdotorg/status/12345", + "og:title": "Alice", + "og:description": "Content Preview", + }, ) def test_oembed_format(self): @@ -705,7 +713,11 @@ def test_oembed_format(self): self.assertIn(b"format=json", server.data) self.assertEqual(channel.code, 200) + body = channel.json_body self.assertEqual( - channel.json_body, - {"og:title": None, "og:description": "Content Preview"}, + body, + { + "og:url": "http://www.hulu.com/watch/12345", + "og:description": "Content Preview", + }, ) From 8f2a52766bc242c02a309f45406f827e670311e7 Mon Sep 17 00:00:00 2001 From: Richard van der Hoff <1389908+richvdh@users.noreply.github.com> Date: Wed, 22 Sep 2021 15:20:18 +0100 Subject: [PATCH 32/74] Ensure we mark sent knocks as outliers (#10873) --- changelog.d/10873.bugfix | 1 + synapse/handlers/federation.py | 7 +++++++ 2 files changed, 8 insertions(+) create mode 100644 changelog.d/10873.bugfix diff --git a/changelog.d/10873.bugfix b/changelog.d/10873.bugfix new file mode 100644 index 000000000..32b2e50fd --- /dev/null +++ b/changelog.d/10873.bugfix @@ -0,0 +1 @@ +Fix a bug introduced in Synapse 1.37.0 which caused `knock` events which we sent to remote servers to be incorrectly stored in the local database. diff --git a/synapse/handlers/federation.py b/synapse/handlers/federation.py index 8e2cf3387..a03d77dff 100644 --- a/synapse/handlers/federation.py +++ b/synapse/handlers/federation.py @@ -593,6 +593,13 @@ async def do_knock( target_hosts, room_id, knockee, Membership.KNOCK, content, params=params ) + # Mark the knock as an outlier as we don't yet have the state at this point in + # the DAG. + event.internal_metadata.outlier = True + + # ... but tell /sync to send it to clients anyway. + event.internal_metadata.out_of_band_membership = True + # Record the room ID and its version so that we have a record of the room await self._maybe_store_room_on_outlier_membership( room_id=event.room_id, room_version=event_format_version From 03db6701d5379f4aa05037bd9ce23942c501874e Mon Sep 17 00:00:00 2001 From: Tulir Asokan <tulir@beeper.com> Date: Wed, 22 Sep 2021 10:31:05 -0400 Subject: [PATCH 33/74] Fix invalidating OTK count cache after claim (#10875) The invalidation was missing in `_claim_e2e_one_time_key_returning`, which is used on SQLite 3.24+ and Postgres. This could break e2ee if nothing else happened to invalidate the caches before the keys ran out. Signed-off-by: Tulir Asokan <tulir@beeper.com> --- changelog.d/10875.bugfix | 1 + synapse/storage/databases/main/end_to_end_keys.py | 4 ++++ 2 files changed, 5 insertions(+) create mode 100644 changelog.d/10875.bugfix diff --git a/changelog.d/10875.bugfix b/changelog.d/10875.bugfix new file mode 100644 index 000000000..6f370da5c --- /dev/null +++ b/changelog.d/10875.bugfix @@ -0,0 +1 @@ +Fix invalidating one-time key count cache after claiming keys. Contributed by Tulir at Beeper. diff --git a/synapse/storage/databases/main/end_to_end_keys.py b/synapse/storage/databases/main/end_to_end_keys.py index 1f0a39eac..a95ac34f0 100644 --- a/synapse/storage/databases/main/end_to_end_keys.py +++ b/synapse/storage/databases/main/end_to_end_keys.py @@ -824,6 +824,10 @@ def _claim_e2e_one_time_key_returning( if otk_row is None: return None + self._invalidate_cache_and_stream( + txn, self.count_e2e_one_time_keys, (user_id, device_id) + ) + key_id, key_json = otk_row return f"{algorithm}:{key_id}", key_json From f78b68a96b1f179043b38b4109e09fa0a315643d Mon Sep 17 00:00:00 2001 From: Hillery Shay <shaysquared@gmail.com> Date: Wed, 22 Sep 2021 08:25:26 -0700 Subject: [PATCH 34/74] Treat "\u0000" as "\u0020" for the purposes of message search (message indexing) (#10820) * add test to check if null code points are being inserted * add logic to detect and replace null code points before insertion into db * lints * add license to test * change approach to null substitution * add type hint for SearchEntry * Add changelog entry Signed-off-by: H.Shay <shaysquared@gmail.com> * updated changelog * update chanelog message * remove duplicate changelog * Update synapse/storage/databases/main/events.py remove extra space Co-authored-by: Patrick Cloke <clokep@users.noreply.github.com> * rename and move test file, update tests, delete old test file * fix typo in comments * update _find_highlights_in_postgres to replace null byte with space * replace null byte in sqlite search insertion * beef up and reorganize test for this pr * update changelog * add type hints and update docstring * check db engine directly vs using env variable * refactor tests to be less repetetive * move rplace logic into seperate function * requested changes * Fix typo. * Update synapse/storage/databases/main/search.py Co-authored-by: reivilibre <olivier@librepush.net> * Update changelog.d/10820.misc Co-authored-by: Aaron Raimist <aaron@raim.ist> Co-authored-by: Patrick Cloke <clokep@users.noreply.github.com> Co-authored-by: reivilibre <olivier@librepush.net> Co-authored-by: Aaron Raimist <aaron@raim.ist> --- changelog.d/10820.misc | 1 + synapse/storage/databases/main/search.py | 34 ++++++++--- tests/storage/test_room_search.py | 74 ++++++++++++++++++++++++ 3 files changed, 100 insertions(+), 9 deletions(-) create mode 100644 changelog.d/10820.misc create mode 100644 tests/storage/test_room_search.py diff --git a/changelog.d/10820.misc b/changelog.d/10820.misc new file mode 100644 index 000000000..4373bf6f6 --- /dev/null +++ b/changelog.d/10820.misc @@ -0,0 +1 @@ +Fix a long-standing bug where an `m.room.message` event containing a null byte would cause an internal server error. \ No newline at end of file diff --git a/synapse/storage/databases/main/search.py b/synapse/storage/databases/main/search.py index 6480d5a9f..2a1e99e17 100644 --- a/synapse/storage/databases/main/search.py +++ b/synapse/storage/databases/main/search.py @@ -15,12 +15,12 @@ import logging import re from collections import namedtuple -from typing import Collection, List, Optional, Set +from typing import Collection, Iterable, List, Optional, Set from synapse.api.errors import SynapseError from synapse.events import EventBase from synapse.storage._base import SQLBaseStore, db_to_json, make_in_list_sql_clause -from synapse.storage.database import DatabasePool +from synapse.storage.database import DatabasePool, LoggingTransaction from synapse.storage.databases.main.events_worker import EventRedactBehaviour from synapse.storage.engines import PostgresEngine, Sqlite3Engine @@ -32,14 +32,24 @@ ) +def _clean_value_for_search(value: str) -> str: + """ + Replaces any null code points in the string with spaces as + Postgres and SQLite do not like the insertion of strings with + null code points into the full-text search tables. + """ + return value.replace("\u0000", " ") + + class SearchWorkerStore(SQLBaseStore): - def store_search_entries_txn(self, txn, entries): + def store_search_entries_txn( + self, txn: LoggingTransaction, entries: Iterable[SearchEntry] + ) -> None: """Add entries to the search table Args: - txn (cursor): - entries (iterable[SearchEntry]): - entries to be added to the table + txn: + entries: entries to be added to the table """ if not self.hs.config.enable_search: return @@ -55,7 +65,7 @@ def store_search_entries_txn(self, txn, entries): entry.event_id, entry.room_id, entry.key, - entry.value, + _clean_value_for_search(entry.value), entry.stream_ordering, entry.origin_server_ts, ) @@ -70,11 +80,16 @@ def store_search_entries_txn(self, txn, entries): " VALUES (?,?,?,?)" ) args = ( - (entry.event_id, entry.room_id, entry.key, entry.value) + ( + entry.event_id, + entry.room_id, + entry.key, + _clean_value_for_search(entry.value), + ) for entry in entries ) - txn.execute_batch(sql, args) + else: # This should be unreachable. raise Exception("Unrecognized database engine") @@ -646,6 +661,7 @@ def f(txn): for key in ("body", "name", "topic"): v = event.content.get(key, None) if v: + v = _clean_value_for_search(v) values.append(v) if not values: diff --git a/tests/storage/test_room_search.py b/tests/storage/test_room_search.py new file mode 100644 index 000000000..8971ecccb --- /dev/null +++ b/tests/storage/test_room_search.py @@ -0,0 +1,74 @@ +# Copyright 2021 The Matrix.org Foundation C.I.C. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import synapse.rest.admin +from synapse.rest.client import login, room +from synapse.storage.engines import PostgresEngine + +from tests.unittest import HomeserverTestCase + + +class NullByteInsertionTest(HomeserverTestCase): + servlets = [ + synapse.rest.admin.register_servlets_for_client_rest_resource, + login.register_servlets, + room.register_servlets, + ] + + def test_null_byte(self): + """ + Postgres/SQLite don't like null bytes going into the search tables. Internally + we replace those with a space. + + Ensure this doesn't break anything. + """ + + # Register a user and create a room, create some messages + self.register_user("alice", "password") + access_token = self.login("alice", "password") + room_id = self.helper.create_room_as("alice", tok=access_token) + + # Send messages and ensure they don't cause an internal server + # error + for body in ["hi\u0000bob", "another message", "hi alice"]: + response = self.helper.send(room_id, body, tok=access_token) + self.assertIn("event_id", response) + + # Check that search works for the message where the null byte was replaced + store = self.hs.get_datastore() + result = self.get_success( + store.search_msgs([room_id], "hi bob", ["content.body"]) + ) + self.assertEquals(result.get("count"), 1) + if isinstance(store.database_engine, PostgresEngine): + self.assertIn("hi", result.get("highlights")) + self.assertIn("bob", result.get("highlights")) + + # Check that search works for an unrelated message + result = self.get_success( + store.search_msgs([room_id], "another", ["content.body"]) + ) + self.assertEquals(result.get("count"), 1) + if isinstance(store.database_engine, PostgresEngine): + self.assertIn("another", result.get("highlights")) + + # Check that search works for a search term that overlaps with the message + # containing a null byte and an unrelated message. + result = self.get_success(store.search_msgs([room_id], "hi", ["content.body"])) + self.assertEquals(result.get("count"), 2) + result = self.get_success( + store.search_msgs([room_id], "hi alice", ["content.body"]) + ) + if isinstance(store.database_engine, PostgresEngine): + self.assertIn("alice", result.get("highlights")) From 26f2bfedbf5493d8a69d1b38147b6236e7606cd3 Mon Sep 17 00:00:00 2001 From: Richard van der Hoff <1389908+richvdh@users.noreply.github.com> Date: Wed, 22 Sep 2021 17:58:57 +0100 Subject: [PATCH 35/74] Factor out a separate `EventContext.for_outlier` (#10883) Constructing an EventContext for an outlier is actually really simple, and there's no sense in going via an `async` method in the `StateHandler`. This also means that we can resolve a bunch of FIXMEs. --- changelog.d/10883.misc | 1 + synapse/events/snapshot.py | 14 ++++++++---- synapse/handlers/federation.py | 9 ++++---- synapse/handlers/federation_event.py | 7 ++---- synapse/state/__init__.py | 34 ++++------------------------ 5 files changed, 21 insertions(+), 44 deletions(-) create mode 100644 changelog.d/10883.misc diff --git a/changelog.d/10883.misc b/changelog.d/10883.misc new file mode 100644 index 000000000..9a765435d --- /dev/null +++ b/changelog.d/10883.misc @@ -0,0 +1 @@ +Clean up some of the federation event authentication code for clarity. diff --git a/synapse/events/snapshot.py b/synapse/events/snapshot.py index f8d898c3b..5ba01eeef 100644 --- a/synapse/events/snapshot.py +++ b/synapse/events/snapshot.py @@ -80,9 +80,7 @@ class EventContext: (type, state_key) -> event_id - FIXME: what is this for an outlier? it seems ill-defined. It seems like - it could be either {}, or the state we were given by the remote - server, depending on $THINGS + For an outlier, this is {} Note that this is a private attribute: it should be accessed via ``get_current_state_ids``. _AsyncEventContext impl calculates this @@ -96,7 +94,7 @@ class EventContext: (type, state_key) -> event_id - FIXME: again, what is this for an outlier? + For an outlier, this is {} As with _current_state_ids, this is a private attribute. It should be accessed via get_prev_state_ids. @@ -130,6 +128,14 @@ def with_state( delta_ids=delta_ids, ) + @staticmethod + def for_outlier(): + """Return an EventContext instance suitable for persisting an outlier event""" + return EventContext( + current_state_ids={}, + prev_state_ids={}, + ) + async def serialize(self, event: EventBase, store: "DataStore") -> dict: """Converts self to a type that can be serialized as JSON, and then deserialized by `deserialize` diff --git a/synapse/handlers/federation.py b/synapse/handlers/federation.py index a03d77dff..0befe9ce4 100644 --- a/synapse/handlers/federation.py +++ b/synapse/handlers/federation.py @@ -624,7 +624,7 @@ async def do_knock( # in the invitee's sync stream. It is stripped out for all other local users. event.unsigned["knock_room_state"] = stripped_room_state["knock_state_events"] - context = await self.state_handler.compute_event_context(event) + context = EventContext.for_outlier() stream_id = await self._federation_event_handler.persist_events_and_notify( event.room_id, [(event, context)] ) @@ -814,7 +814,7 @@ async def on_invite_request( ) ) - context = await self.state_handler.compute_event_context(event) + context = EventContext.for_outlier() await self._federation_event_handler.persist_events_and_notify( event.room_id, [(event, context)] ) @@ -843,7 +843,7 @@ async def do_remotely_reject_invite( await self.federation_client.send_leave(host_list, event) - context = await self.state_handler.compute_event_context(event) + context = EventContext.for_outlier() stream_id = await self._federation_event_handler.persist_events_and_notify( event.room_id, [(event, context)] ) @@ -1115,8 +1115,7 @@ async def _persist_auth_tree( events_to_context = {} for e in itertools.chain(auth_events, state): e.internal_metadata.outlier = True - ctx = await self.state_handler.compute_event_context(e) - events_to_context[e.event_id] = ctx + events_to_context[e.event_id] = EventContext.for_outlier() event_map = { e.event_id: e for e in itertools.chain(auth_events, state, [event]) diff --git a/synapse/handlers/federation_event.py b/synapse/handlers/federation_event.py index 3b95beeb0..10b3fdc22 100644 --- a/synapse/handlers/federation_event.py +++ b/synapse/handlers/federation_event.py @@ -1221,7 +1221,7 @@ async def _auth_and_persist_fetched_events( async def prep(ev_info: _NewEventInfo) -> EventContext: event = ev_info.event with nested_logging_context(suffix=event.event_id): - res = await self._state_handler.compute_event_context(event) + res = EventContext.for_outlier() res = await self._check_event_auth( origin, event, @@ -1540,10 +1540,7 @@ async def _update_auth_events_and_context_for_auth( event.event_id, auth_event.event_id, ) - missing_auth_event_context = ( - await self._state_handler.compute_event_context(auth_event) - ) - + missing_auth_event_context = EventContext.for_outlier() missing_auth_event_context = await self._check_event_auth( origin, auth_event, diff --git a/synapse/state/__init__.py b/synapse/state/__init__.py index 463ce58da..c981df3f1 100644 --- a/synapse/state/__init__.py +++ b/synapse/state/__init__.py @@ -263,7 +263,9 @@ async def get_hosts_in_room_at_events( async def compute_event_context( self, event: EventBase, old_state: Optional[Iterable[EventBase]] = None ) -> EventContext: - """Build an EventContext structure for the event. + """Build an EventContext structure for a non-outlier event. + + (for an outlier, call EventContext.for_outlier directly) This works out what the current state should be for the event, and generates a new state group if necessary. @@ -278,35 +280,7 @@ async def compute_event_context( The event context. """ - if event.internal_metadata.is_outlier(): - # If this is an outlier, then we know it shouldn't have any current - # state. Certainly store.get_current_state won't return any, and - # persisting the event won't store the state group. - - # FIXME: why do we populate current_state_ids? I thought the point was - # that we weren't supposed to have any state for outliers? - if old_state: - prev_state_ids = {(s.type, s.state_key): s.event_id for s in old_state} - if event.is_state(): - current_state_ids = dict(prev_state_ids) - key = (event.type, event.state_key) - current_state_ids[key] = event.event_id - else: - current_state_ids = prev_state_ids - else: - current_state_ids = {} - prev_state_ids = {} - - # We don't store state for outliers, so we don't generate a state - # group for it. - context = EventContext.with_state( - state_group=None, - state_group_before_event=None, - current_state_ids=current_state_ids, - prev_state_ids=prev_state_ids, - ) - - return context + assert not event.internal_metadata.is_outlier() # # first of all, figure out the state before the event From aa2c027792d04c36b17866710e95a41d31f5d99c Mon Sep 17 00:00:00 2001 From: Andrew Morgan <1342360+anoadragon453@users.noreply.github.com> Date: Thu, 23 Sep 2021 11:59:07 +0100 Subject: [PATCH 36/74] Remove unnecessary parentheses around tuples returned from methods (#10889) --- changelog.d/10889.misc | 1 + synapse/config/server.py | 2 +- synapse/federation/sender/per_destination_queue.py | 4 ++-- synapse/handlers/federation.py | 2 +- synapse/handlers/message.py | 4 ++-- synapse/handlers/receipts.py | 4 ++-- synapse/handlers/room.py | 2 +- synapse/handlers/room_summary.py | 2 +- synapse/handlers/typing.py | 4 ++-- synapse/http/matrixfederationclient.py | 2 +- synapse/rest/admin/rooms.py | 4 ++-- synapse/rest/client/devices.py | 4 ++-- synapse/rest/client/password_policy.py | 4 ++-- synapse/storage/databases/main/account_data.py | 2 +- synapse/storage/databases/main/deviceinbox.py | 6 +++--- synapse/storage/databases/main/events_worker.py | 2 +- synapse/storage/databases/main/state_deltas.py | 2 +- synapse/storage/databases/main/stream.py | 4 ++-- synapse/streams/config.py | 2 +- synapse/types.py | 4 ++-- tests/test_state.py | 2 +- tests/utils.py | 2 +- 22 files changed, 33 insertions(+), 32 deletions(-) create mode 100644 changelog.d/10889.misc diff --git a/changelog.d/10889.misc b/changelog.d/10889.misc new file mode 100644 index 000000000..6d60188f5 --- /dev/null +++ b/changelog.d/10889.misc @@ -0,0 +1 @@ +Clean up some unnecessary parentheses in places around the codebase. \ No newline at end of file diff --git a/synapse/config/server.py b/synapse/config/server.py index 7b9109a59..ad8715da2 100644 --- a/synapse/config/server.py +++ b/synapse/config/server.py @@ -1447,7 +1447,7 @@ def read_gc_thresholds(thresholds): return None try: assert len(thresholds) == 3 - return (int(thresholds[0]), int(thresholds[1]), int(thresholds[2])) + return int(thresholds[0]), int(thresholds[1]), int(thresholds[2]) except Exception: raise ConfigError( "Value of `gc_threshold` must be a list of three integers if set" diff --git a/synapse/federation/sender/per_destination_queue.py b/synapse/federation/sender/per_destination_queue.py index c11d1f6d3..afe35e72b 100644 --- a/synapse/federation/sender/per_destination_queue.py +++ b/synapse/federation/sender/per_destination_queue.py @@ -560,7 +560,7 @@ async def _get_device_update_edus(self, limit: int) -> Tuple[List[Edu], int]: assert len(edus) <= limit, "get_device_updates_by_remote returned too many EDUs" - return (edus, now_stream_id) + return edus, now_stream_id async def _get_to_device_message_edus(self, limit: int) -> Tuple[List[Edu], int]: last_device_stream_id = self._last_device_stream_id @@ -593,7 +593,7 @@ async def _get_to_device_message_edus(self, limit: int) -> Tuple[List[Edu], int] stream_id, ) - return (edus, stream_id) + return edus, stream_id def _start_catching_up(self) -> None: """ diff --git a/synapse/handlers/federation.py b/synapse/handlers/federation.py index 0befe9ce4..4523b2563 100644 --- a/synapse/handlers/federation.py +++ b/synapse/handlers/federation.py @@ -1369,7 +1369,7 @@ async def add_display_name_to_third_party_invite( builder=builder ) EventValidator().validate_new(event, self.config) - return (event, context) + return event, context async def _check_signature(self, event: EventBase, context: EventContext) -> None: """ diff --git a/synapse/handlers/message.py b/synapse/handlers/message.py index 6cd694b2d..7a5d8e6f4 100644 --- a/synapse/handlers/message.py +++ b/synapse/handlers/message.py @@ -666,7 +666,7 @@ async def create_event( self.validator.validate_new(event, self.config) - return (event, context) + return event, context async def _is_exempt_from_privacy_policy( self, builder: EventBuilder, requester: Requester @@ -1004,7 +1004,7 @@ async def create_new_client_event( logger.debug("Created event %s", event.event_id) - return (event, context) + return event, context @measure_func("handle_new_client_event") async def handle_new_client_event( diff --git a/synapse/handlers/receipts.py b/synapse/handlers/receipts.py index 5881f09eb..f21f33ada 100644 --- a/synapse/handlers/receipts.py +++ b/synapse/handlers/receipts.py @@ -238,7 +238,7 @@ async def get_new_events( if self.config.experimental.msc2285_enabled: events = ReceiptEventSource.filter_out_hidden(events, user.to_string()) - return (events, to_key) + return events, to_key async def get_new_events_as( self, from_key: int, service: ApplicationService @@ -270,7 +270,7 @@ async def get_new_events_as( events.append(event) - return (events, to_key) + return events, to_key def get_current_key(self, direction: str = "f") -> int: return self.store.get_max_receipt_stream_id() diff --git a/synapse/handlers/room.py b/synapse/handlers/room.py index 287ea2fd0..b5768220d 100644 --- a/synapse/handlers/room.py +++ b/synapse/handlers/room.py @@ -1235,7 +1235,7 @@ async def get_new_events( else: end_key = to_key - return (events, end_key) + return events, end_key def get_current_key(self) -> RoomStreamToken: return self.store.get_room_max_token() diff --git a/synapse/handlers/room_summary.py b/synapse/handlers/room_summary.py index 4e28fb968..fb26ee7ad 100644 --- a/synapse/handlers/room_summary.py +++ b/synapse/handlers/room_summary.py @@ -1179,4 +1179,4 @@ def _child_events_comparison_key( order = None # Items without an order come last. - return (order is None, order, child.origin_server_ts, child.room_id) + return order is None, order, child.origin_server_ts, child.room_id diff --git a/synapse/handlers/typing.py b/synapse/handlers/typing.py index 9326330c9..d10e9b8ec 100644 --- a/synapse/handlers/typing.py +++ b/synapse/handlers/typing.py @@ -483,7 +483,7 @@ async def get_new_events_as( events.append(self._make_event_for(room_id)) - return (events, handler._latest_room_serial) + return events, handler._latest_room_serial async def get_new_events( self, @@ -507,7 +507,7 @@ async def get_new_events( events.append(self._make_event_for(room_id)) - return (events, handler._latest_room_serial) + return events, handler._latest_room_serial def get_current_key(self) -> int: return self.get_typing_handler()._latest_room_serial diff --git a/synapse/http/matrixfederationclient.py b/synapse/http/matrixfederationclient.py index ef10ec093..e56fa477b 100644 --- a/synapse/http/matrixfederationclient.py +++ b/synapse/http/matrixfederationclient.py @@ -1186,7 +1186,7 @@ async def get_file( request.method, request.uri.decode("ascii"), ) - return (length, headers) + return length, headers def _flatten_response_never_received(e): diff --git a/synapse/rest/admin/rooms.py b/synapse/rest/admin/rooms.py index 8f781f745..a4823ca6e 100644 --- a/synapse/rest/admin/rooms.py +++ b/synapse/rest/admin/rooms.py @@ -213,7 +213,7 @@ async def on_GET( members = await self.store.get_users_in_room(room_id) ret["joined_local_devices"] = await self.store.count_devices_by_users(members) - return (200, ret) + return 200, ret async def on_DELETE( self, request: SynapseRequest, room_id: str @@ -668,4 +668,4 @@ async def _delete_room( if purge: await pagination_handler.purge_room(room_id, force=force_purge) - return (200, ret) + return 200, ret diff --git a/synapse/rest/client/devices.py b/synapse/rest/client/devices.py index 25bc3c8f4..8566dc5cb 100644 --- a/synapse/rest/client/devices.py +++ b/synapse/rest/client/devices.py @@ -211,7 +211,7 @@ async def on_GET(self, request: SynapseRequest) -> Tuple[int, JsonDict]: if dehydrated_device is not None: (device_id, device_data) = dehydrated_device result = {"device_id": device_id, "device_data": device_data} - return (200, result) + return 200, result else: raise errors.NotFoundError("No dehydrated device available") @@ -293,7 +293,7 @@ async def on_POST(self, request: SynapseRequest) -> Tuple[int, JsonDict]: submission["device_id"], ) - return (200, result) + return 200, result def register_servlets(hs: "HomeServer", http_server: HttpServer) -> None: diff --git a/synapse/rest/client/password_policy.py b/synapse/rest/client/password_policy.py index 6d64efb16..0465fd229 100644 --- a/synapse/rest/client/password_policy.py +++ b/synapse/rest/client/password_policy.py @@ -40,7 +40,7 @@ def __init__(self, hs: "HomeServer"): def on_GET(self, request: Request) -> Tuple[int, JsonDict]: if not self.enabled or not self.policy: - return (200, {}) + return 200, {} policy = {} @@ -54,7 +54,7 @@ def on_GET(self, request: Request) -> Tuple[int, JsonDict]: if param in self.policy: policy["m.%s" % param] = self.policy[param] - return (200, policy) + return 200, policy def register_servlets(hs: "HomeServer", http_server: HttpServer) -> None: diff --git a/synapse/storage/databases/main/account_data.py b/synapse/storage/databases/main/account_data.py index d0cf3460d..70ca3e09f 100644 --- a/synapse/storage/databases/main/account_data.py +++ b/synapse/storage/databases/main/account_data.py @@ -324,7 +324,7 @@ def get_updated_account_data_for_user_txn(txn): user_id, int(stream_id) ) if not changed: - return ({}, {}) + return {}, {} return await self.db_pool.runInteraction( "get_updated_account_data_for_user", get_updated_account_data_for_user_txn diff --git a/synapse/storage/databases/main/deviceinbox.py b/synapse/storage/databases/main/deviceinbox.py index c55508867..3154906d4 100644 --- a/synapse/storage/databases/main/deviceinbox.py +++ b/synapse/storage/databases/main/deviceinbox.py @@ -136,7 +136,7 @@ async def get_new_messages_for_device( user_id, last_stream_id ) if not has_changed: - return ([], current_stream_id) + return [], current_stream_id def get_new_messages_for_device_txn(txn): sql = ( @@ -240,11 +240,11 @@ async def get_new_device_msgs_for_remote( ) if not has_changed or last_stream_id == current_stream_id: log_kv({"message": "No new messages in stream"}) - return ([], current_stream_id) + return [], current_stream_id if limit <= 0: # This can happen if we run out of room for EDUs in the transaction. - return ([], last_stream_id) + return [], last_stream_id @trace def get_new_messages_for_remote_destination_txn(txn): diff --git a/synapse/storage/databases/main/events_worker.py b/synapse/storage/databases/main/events_worker.py index d72e716b5..4a1a2f4a6 100644 --- a/synapse/storage/databases/main/events_worker.py +++ b/synapse/storage/databases/main/events_worker.py @@ -1495,7 +1495,7 @@ async def get_event_ordering(self, event_id): if not res: raise SynapseError(404, "Could not find event %s" % (event_id,)) - return (int(res["topological_ordering"]), int(res["stream_ordering"])) + return int(res["topological_ordering"]), int(res["stream_ordering"]) async def get_next_event_to_expire(self) -> Optional[Tuple[str, int]]: """Retrieve the entry with the lowest expiry timestamp in the event_expiry diff --git a/synapse/storage/databases/main/state_deltas.py b/synapse/storage/databases/main/state_deltas.py index bff7d0404..a89747d74 100644 --- a/synapse/storage/databases/main/state_deltas.py +++ b/synapse/storage/databases/main/state_deltas.py @@ -58,7 +58,7 @@ async def get_current_state_deltas( # if the CSDs haven't changed between prev_stream_id and now, we # know for certain that they haven't changed between prev_stream_id and # max_stream_id. - return (max_stream_id, []) + return max_stream_id, [] def get_current_state_deltas_txn(txn): # First we calculate the max stream id that will give us less than diff --git a/synapse/storage/databases/main/stream.py b/synapse/storage/databases/main/stream.py index 9a3b6f4ac..dc7884b1c 100644 --- a/synapse/storage/databases/main/stream.py +++ b/synapse/storage/databases/main/stream.py @@ -624,7 +624,7 @@ async def get_recent_events_for_room( self._set_before_and_after(events, rows) - return (events, token) + return events, token async def get_recent_event_ids_for_room( self, room_id: str, limit: int, end_token: RoomStreamToken @@ -1242,7 +1242,7 @@ async def paginate_room_events( self._set_before_and_after(events, rows) - return (events, token) + return events, token @cached() async def get_id_for_instance(self, instance_name: str) -> int: diff --git a/synapse/streams/config.py b/synapse/streams/config.py index cf4005984..c08d591f2 100644 --- a/synapse/streams/config.py +++ b/synapse/streams/config.py @@ -81,7 +81,7 @@ async def from_request( raise SynapseError(400, "Invalid request.") def __repr__(self) -> str: - return ("PaginationConfig(from_tok=%r, to_tok=%r, direction=%r, limit=%r)") % ( + return "PaginationConfig(from_tok=%r, to_tok=%r, direction=%r, limit=%r)" % ( self.from_token, self.to_token, self.direction, diff --git a/synapse/types.py b/synapse/types.py index ed831a5c1..364ecf7d4 100644 --- a/synapse/types.py +++ b/synapse/types.py @@ -556,7 +556,7 @@ def as_historical_tuple(self) -> Tuple[int, int]: "Cannot call `RoomStreamToken.as_historical_tuple` on live token" ) - return (self.topological, self.stream) + return self.topological, self.stream def get_stream_pos_for_instance(self, instance_name: str) -> int: """Get the stream position that the given writer was at at this token. @@ -766,7 +766,7 @@ def get_verify_key_from_cross_signing_key(key_info): raise ValueError("Invalid key") # and return that one key for key_id, key_data in keys.items(): - return (key_id, decode_verify_key_bytes(key_id, decode_base64(key_data))) + return key_id, decode_verify_key_bytes(key_id, decode_base64(key_data)) @attr.s(auto_attribs=True, frozen=True, slots=True) diff --git a/tests/test_state.py b/tests/test_state.py index e5488df1a..76e0e8ca7 100644 --- a/tests/test_state.py +++ b/tests/test_state.py @@ -106,7 +106,7 @@ async def get_events(self, event_ids, **kwargs): } async def get_state_group_delta(self, name): - return (None, None) + return None, None def register_events(self, events): for e in events: diff --git a/tests/utils.py b/tests/utils.py index f3458ca88..cf8ba5c5d 100644 --- a/tests/utils.py +++ b/tests/utils.py @@ -434,7 +434,7 @@ def trigger( ) return code, response except CodeMessageException as e: - return (e.code, cs_error(e.msg, code=e.errcode)) + return e.code, cs_error(e.msg, code=e.errcode) raise KeyError("No event can handle %s" % path) From e584534403b55ad3f250f92592e30b15b01f0201 Mon Sep 17 00:00:00 2001 From: Patrick Cloke <clokep@users.noreply.github.com> Date: Thu, 23 Sep 2021 07:13:34 -0400 Subject: [PATCH 37/74] Use direct references for some configuration variables (part 3) (#10885) This avoids the overhead of searching through the various configuration classes by directly referencing the class that the attributes are in. It also improves type hints since mypy can now resolve the types of the configuration variables. --- changelog.d/10885.misc | 1 + synapse/app/homeserver.py | 2 +- synapse/config/consent.py | 9 +++-- synapse/handlers/account_validity.py | 2 +- synapse/handlers/appservice.py | 2 +- synapse/handlers/auth.py | 22 +++++----- synapse/handlers/cas.py | 8 ++-- synapse/handlers/identity.py | 12 +++--- synapse/handlers/message.py | 4 +- synapse/handlers/password_policy.py | 4 +- synapse/handlers/register.py | 11 +++-- synapse/handlers/ui_auth/checkers.py | 17 +++++--- synapse/module_api/__init__.py | 8 ++-- synapse/push/pusher.py | 2 +- synapse/rest/admin/users.py | 4 +- synapse/rest/client/account.py | 40 +++++++++---------- synapse/rest/client/auth.py | 10 ++--- synapse/rest/client/login.py | 4 +- synapse/rest/client/password_policy.py | 4 +- synapse/rest/client/register.py | 30 +++++++------- synapse/rest/consent/consent_resource.py | 9 +++-- synapse/rest/synapse/client/password_reset.py | 10 ++--- .../server_notices/consent_server_notices.py | 11 +++-- synapse/storage/databases/main/appservice.py | 2 +- .../databases/main/monthly_active_users.py | 2 +- .../storage/databases/main/registration.py | 2 +- synapse/storage/prepare_database.py | 2 +- .../storage/schema/main/delta/30/as_users.py | 2 +- tests/rest/admin/test_room.py | 2 +- tests/rest/client/test_login.py | 2 +- tests/storage/test_appservice.py | 14 +++---- tests/storage/test_cleanup_extrems.py | 2 +- 32 files changed, 137 insertions(+), 119 deletions(-) create mode 100644 changelog.d/10885.misc diff --git a/changelog.d/10885.misc b/changelog.d/10885.misc new file mode 100644 index 000000000..586a0b3a9 --- /dev/null +++ b/changelog.d/10885.misc @@ -0,0 +1 @@ +Use direct references to config flags. diff --git a/synapse/app/homeserver.py b/synapse/app/homeserver.py index b909f8db8..886e291e4 100644 --- a/synapse/app/homeserver.py +++ b/synapse/app/homeserver.py @@ -195,7 +195,7 @@ def _configure_named_resource(self, name, compress=False): } ) - if self.config.threepid_behaviour_email == ThreepidBehaviour.LOCAL: + if self.config.email.threepid_behaviour_email == ThreepidBehaviour.LOCAL: from synapse.rest.synapse.client.password_reset import ( PasswordResetSubmitTokenResource, ) diff --git a/synapse/config/consent.py b/synapse/config/consent.py index b05a9bd97..ecc43b08b 100644 --- a/synapse/config/consent.py +++ b/synapse/config/consent.py @@ -13,6 +13,7 @@ # limitations under the License. from os import path +from typing import Optional from synapse.config import ConfigError @@ -78,8 +79,8 @@ class ConsentConfig(Config): def __init__(self, *args): super().__init__(*args) - self.user_consent_version = None - self.user_consent_template_dir = None + self.user_consent_version: Optional[str] = None + self.user_consent_template_dir: Optional[str] = None self.user_consent_server_notice_content = None self.user_consent_server_notice_to_guests = False self.block_events_without_consent_error = None @@ -94,7 +95,9 @@ def read_config(self, config, **kwargs): return self.user_consent_version = str(consent_config["version"]) self.user_consent_template_dir = self.abspath(consent_config["template_dir"]) - if not path.isdir(self.user_consent_template_dir): + if not isinstance(self.user_consent_template_dir, str) or not path.isdir( + self.user_consent_template_dir + ): raise ConfigError( "Could not find template directory '%s'" % (self.user_consent_template_dir,) diff --git a/synapse/handlers/account_validity.py b/synapse/handlers/account_validity.py index 4724565ba..5a5f124dd 100644 --- a/synapse/handlers/account_validity.py +++ b/synapse/handlers/account_validity.py @@ -47,7 +47,7 @@ def __init__(self, hs: "HomeServer"): self.send_email_handler = self.hs.get_send_email_handler() self.clock = self.hs.get_clock() - self._app_name = self.hs.config.email_app_name + self._app_name = self.hs.config.email.email_app_name self._account_validity_enabled = ( hs.config.account_validity.account_validity_enabled diff --git a/synapse/handlers/appservice.py b/synapse/handlers/appservice.py index b7213b67a..163278708 100644 --- a/synapse/handlers/appservice.py +++ b/synapse/handlers/appservice.py @@ -52,7 +52,7 @@ def __init__(self, hs: "HomeServer"): self.scheduler = hs.get_application_service_scheduler() self.started_scheduler = False self.clock = hs.get_clock() - self.notify_appservices = hs.config.notify_appservices + self.notify_appservices = hs.config.appservice.notify_appservices self.event_sources = hs.get_event_sources() self.current_max = 0 diff --git a/synapse/handlers/auth.py b/synapse/handlers/auth.py index bcd4249e0..b747f80bc 100644 --- a/synapse/handlers/auth.py +++ b/synapse/handlers/auth.py @@ -210,15 +210,15 @@ def __init__(self, hs: "HomeServer"): self.password_providers = [ PasswordProvider.load(module, config, account_handler) - for module, config in hs.config.password_providers + for module, config in hs.config.authproviders.password_providers ] logger.info("Extra password_providers: %s", self.password_providers) self.hs = hs # FIXME better possibility to access registrationHandler later? self.macaroon_gen = hs.get_macaroon_generator() - self._password_enabled = hs.config.password_enabled - self._password_localdb_enabled = hs.config.password_localdb_enabled + self._password_enabled = hs.config.auth.password_enabled + self._password_localdb_enabled = hs.config.auth.password_localdb_enabled # start out by assuming PASSWORD is enabled; we will remove it later if not. login_types = set() @@ -250,7 +250,7 @@ def __init__(self, hs: "HomeServer"): ) # The number of seconds to keep a UI auth session active. - self._ui_auth_session_timeout = hs.config.ui_auth_session_timeout + self._ui_auth_session_timeout = hs.config.auth.ui_auth_session_timeout # Ratelimitier for failed /login attempts self._failed_login_attempts_ratelimiter = Ratelimiter( @@ -739,19 +739,19 @@ async def _check_auth_dict( return canonical_id def _get_params_recaptcha(self) -> dict: - return {"public_key": self.hs.config.recaptcha_public_key} + return {"public_key": self.hs.config.captcha.recaptcha_public_key} def _get_params_terms(self) -> dict: return { "policies": { "privacy_policy": { - "version": self.hs.config.user_consent_version, + "version": self.hs.config.consent.user_consent_version, "en": { - "name": self.hs.config.user_consent_policy_name, + "name": self.hs.config.consent.user_consent_policy_name, "url": "%s_matrix/consent?v=%s" % ( self.hs.config.server.public_baseurl, - self.hs.config.user_consent_version, + self.hs.config.consent.user_consent_version, ), }, } @@ -1016,7 +1016,7 @@ async def _find_user_id_and_pwd_hash( def can_change_password(self) -> bool: """Get whether users on this server are allowed to change or set a password. - Both `config.password_enabled` and `config.password_localdb_enabled` must be true. + Both `config.auth.password_enabled` and `config.auth.password_localdb_enabled` must be true. Note that any account (even SSO accounts) are allowed to add passwords if the above is true. @@ -1486,7 +1486,7 @@ def _do_hash() -> str: pw = unicodedata.normalize("NFKC", password) return bcrypt.hashpw( - pw.encode("utf8") + self.hs.config.password_pepper.encode("utf8"), + pw.encode("utf8") + self.hs.config.auth.password_pepper.encode("utf8"), bcrypt.gensalt(self.bcrypt_rounds), ).decode("ascii") @@ -1510,7 +1510,7 @@ def _do_validate_hash(checked_hash: bytes) -> bool: pw = unicodedata.normalize("NFKC", password) return bcrypt.checkpw( - pw.encode("utf8") + self.hs.config.password_pepper.encode("utf8"), + pw.encode("utf8") + self.hs.config.auth.password_pepper.encode("utf8"), checked_hash, ) diff --git a/synapse/handlers/cas.py b/synapse/handlers/cas.py index b0b188dc7..5d8f6c50a 100644 --- a/synapse/handlers/cas.py +++ b/synapse/handlers/cas.py @@ -65,10 +65,10 @@ def __init__(self, hs: "HomeServer"): self._auth_handler = hs.get_auth_handler() self._registration_handler = hs.get_registration_handler() - self._cas_server_url = hs.config.cas_server_url - self._cas_service_url = hs.config.cas_service_url - self._cas_displayname_attribute = hs.config.cas_displayname_attribute - self._cas_required_attributes = hs.config.cas_required_attributes + self._cas_server_url = hs.config.cas.cas_server_url + self._cas_service_url = hs.config.cas.cas_service_url + self._cas_displayname_attribute = hs.config.cas.cas_displayname_attribute + self._cas_required_attributes = hs.config.cas.cas_required_attributes self._http_client = hs.get_proxied_http_client() diff --git a/synapse/handlers/identity.py b/synapse/handlers/identity.py index 8b8f1f41c..fe8a99589 100644 --- a/synapse/handlers/identity.py +++ b/synapse/handlers/identity.py @@ -62,7 +62,7 @@ def __init__(self, hs: "HomeServer"): self.federation_http_client = hs.get_federation_http_client() self.hs = hs - self._web_client_location = hs.config.invite_client_location + self._web_client_location = hs.config.email.invite_client_location # Ratelimiters for `/requestToken` endpoints. self._3pid_validation_ratelimiter_ip = Ratelimiter( @@ -419,7 +419,7 @@ async def send_threepid_validation( token_expires = ( self.hs.get_clock().time_msec() - + self.hs.config.email_validation_token_lifetime + + self.hs.config.email.email_validation_token_lifetime ) await self.store.start_or_continue_validation_session( @@ -465,7 +465,7 @@ async def requestEmailToken( if next_link: params["next_link"] = next_link - if self.hs.config.using_identity_server_from_trusted_list: + if self.hs.config.email.using_identity_server_from_trusted_list: # Warn that a deprecated config option is in use logger.warning( 'The config option "trust_identity_server_for_password_resets" ' @@ -518,7 +518,7 @@ async def requestMsisdnToken( if next_link: params["next_link"] = next_link - if self.hs.config.using_identity_server_from_trusted_list: + if self.hs.config.email.using_identity_server_from_trusted_list: # Warn that a deprecated config option is in use logger.warning( 'The config option "trust_identity_server_for_password_resets" ' @@ -572,12 +572,12 @@ async def validate_threepid_session( validation_session = None # Try to validate as email - if self.hs.config.threepid_behaviour_email == ThreepidBehaviour.REMOTE: + if self.hs.config.email.threepid_behaviour_email == ThreepidBehaviour.REMOTE: # Ask our delegated email identity server validation_session = await self.threepid_from_creds( self.hs.config.account_threepid_delegate_email, threepid_creds ) - elif self.hs.config.threepid_behaviour_email == ThreepidBehaviour.LOCAL: + elif self.hs.config.email.threepid_behaviour_email == ThreepidBehaviour.LOCAL: # Get a validated session matching these details validation_session = await self.store.get_threepid_validation_session( "email", client_secret, sid=sid, validated=True diff --git a/synapse/handlers/message.py b/synapse/handlers/message.py index 7a5d8e6f4..ad4e4a3d6 100644 --- a/synapse/handlers/message.py +++ b/synapse/handlers/message.py @@ -443,7 +443,7 @@ def __init__(self, hs: "HomeServer"): ) self._block_events_without_consent_error = ( - self.config.block_events_without_consent_error + self.config.consent.block_events_without_consent_error ) # we need to construct a ConsentURIBuilder here, as it checks that the necessary @@ -744,7 +744,7 @@ async def assert_accepted_privacy_policy(self, requester: Requester) -> None: if u["appservice_id"] is not None: # users registered by an appservice are exempt return - if u["consent_version"] == self.config.user_consent_version: + if u["consent_version"] == self.config.consent.user_consent_version: return consent_uri = self._consent_uri_builder.build_user_consent_uri(user.localpart) diff --git a/synapse/handlers/password_policy.py b/synapse/handlers/password_policy.py index cd21efdcc..eadd7ced0 100644 --- a/synapse/handlers/password_policy.py +++ b/synapse/handlers/password_policy.py @@ -27,8 +27,8 @@ class PasswordPolicyHandler: def __init__(self, hs: "HomeServer"): - self.policy = hs.config.password_policy - self.enabled = hs.config.password_policy_enabled + self.policy = hs.config.auth.password_policy + self.enabled = hs.config.auth.password_policy_enabled # Regexps for the spec'd policy parameters. self.regexp_digit = re.compile("[0-9]") diff --git a/synapse/handlers/register.py b/synapse/handlers/register.py index 1c195c65d..01c5e1385 100644 --- a/synapse/handlers/register.py +++ b/synapse/handlers/register.py @@ -97,6 +97,7 @@ def __init__(self, hs: "HomeServer"): self.ratelimiter = hs.get_registration_ratelimiter() self.macaroon_gen = hs.get_macaroon_generator() self._account_validity_handler = hs.get_account_validity_handler() + self._user_consent_version = self.hs.config.consent.user_consent_version self._server_notices_mxid = hs.config.server_notices_mxid self._server_name = hs.hostname @@ -339,7 +340,7 @@ async def register_user( auth_provider=(auth_provider_id or ""), ).inc() - if not self.hs.config.user_consent_at_registration: + if not self.hs.config.consent.user_consent_at_registration: if not self.hs.config.auto_join_rooms_for_guests and make_guest: logger.info( "Skipping auto-join for %s because auto-join for guests is disabled", @@ -864,7 +865,9 @@ async def post_registration_actions( await self._register_msisdn_threepid(user_id, threepid) if auth_result and LoginType.TERMS in auth_result: - await self._on_user_consented(user_id, self.hs.config.user_consent_version) + # The terms type should only exist if consent is enabled. + assert self._user_consent_version is not None + await self._on_user_consented(user_id, self._user_consent_version) async def _on_user_consented(self, user_id: str, consent_version: str) -> None: """A user consented to the terms on registration @@ -910,8 +913,8 @@ async def _register_email_threepid( # getting mail spam where they weren't before if email # notifs are set up on a homeserver) if ( - self.hs.config.email_enable_notifs - and self.hs.config.email_notif_for_new_users + self.hs.config.email.email_enable_notifs + and self.hs.config.email.email_notif_for_new_users and token ): # Pull the ID of the access token back out of the db diff --git a/synapse/handlers/ui_auth/checkers.py b/synapse/handlers/ui_auth/checkers.py index ea9325e96..8f5d465fa 100644 --- a/synapse/handlers/ui_auth/checkers.py +++ b/synapse/handlers/ui_auth/checkers.py @@ -82,10 +82,10 @@ class RecaptchaAuthChecker(UserInteractiveAuthChecker): def __init__(self, hs: "HomeServer"): super().__init__(hs) - self._enabled = bool(hs.config.recaptcha_private_key) + self._enabled = bool(hs.config.captcha.recaptcha_private_key) self._http_client = hs.get_proxied_http_client() - self._url = hs.config.recaptcha_siteverify_api - self._secret = hs.config.recaptcha_private_key + self._url = hs.config.captcha.recaptcha_siteverify_api + self._secret = hs.config.captcha.recaptcha_private_key def is_enabled(self) -> bool: return self._enabled @@ -161,12 +161,17 @@ async def _check_threepid(self, medium: str, authdict: dict) -> dict: self.hs.config.account_threepid_delegate_msisdn, threepid_creds ) elif medium == "email": - if self.hs.config.threepid_behaviour_email == ThreepidBehaviour.REMOTE: + if ( + self.hs.config.email.threepid_behaviour_email + == ThreepidBehaviour.REMOTE + ): assert self.hs.config.account_threepid_delegate_email threepid = await identity_handler.threepid_from_creds( self.hs.config.account_threepid_delegate_email, threepid_creds ) - elif self.hs.config.threepid_behaviour_email == ThreepidBehaviour.LOCAL: + elif ( + self.hs.config.email.threepid_behaviour_email == ThreepidBehaviour.LOCAL + ): threepid = None row = await self.store.get_threepid_validation_session( medium, @@ -218,7 +223,7 @@ def __init__(self, hs: "HomeServer"): _BaseThreepidAuthChecker.__init__(self, hs) def is_enabled(self) -> bool: - return self.hs.config.threepid_behaviour_email in ( + return self.hs.config.email.threepid_behaviour_email in ( ThreepidBehaviour.REMOTE, ThreepidBehaviour.LOCAL, ) diff --git a/synapse/module_api/__init__.py b/synapse/module_api/__init__.py index 174e6934a..8ae21bc43 100644 --- a/synapse/module_api/__init__.py +++ b/synapse/module_api/__init__.py @@ -119,14 +119,16 @@ def __init__(self, hs: "HomeServer", auth_handler): self.custom_template_dir = hs.config.server.custom_template_directory try: - app_name = self._hs.config.email_app_name + app_name = self._hs.config.email.email_app_name - self._from_string = self._hs.config.email_notif_from % {"app": app_name} + self._from_string = self._hs.config.email.email_notif_from % { + "app": app_name + } except (KeyError, TypeError): # If substitution failed (which can happen if the string contains # placeholders other than just "app", or if the type of the placeholder is # not a string), fall back to the bare strings. - self._from_string = self._hs.config.email_notif_from + self._from_string = self._hs.config.email.email_notif_from self._raw_from = email.utils.parseaddr(self._from_string)[1] diff --git a/synapse/push/pusher.py b/synapse/push/pusher.py index 29ed346d3..b57e09409 100644 --- a/synapse/push/pusher.py +++ b/synapse/push/pusher.py @@ -77,4 +77,4 @@ def _app_name_from_pusherdict(self, pusher_config: PusherConfig) -> str: if isinstance(brand, str): return brand - return self.config.email_app_name + return self.config.email.email_app_name diff --git a/synapse/rest/admin/users.py b/synapse/rest/admin/users.py index 681e49182..46bfec462 100644 --- a/synapse/rest/admin/users.py +++ b/synapse/rest/admin/users.py @@ -368,8 +368,8 @@ async def on_PUT( user_id, medium, address, current_time ) if ( - self.hs.config.email_enable_notifs - and self.hs.config.email_notif_for_new_users + self.hs.config.email.email_enable_notifs + and self.hs.config.email.email_notif_for_new_users ): await self.pusher_pool.add_pusher( user_id=user_id, diff --git a/synapse/rest/client/account.py b/synapse/rest/client/account.py index aefaaa8ae..6a7608d60 100644 --- a/synapse/rest/client/account.py +++ b/synapse/rest/client/account.py @@ -64,17 +64,17 @@ def __init__(self, hs: "HomeServer"): self.config = hs.config self.identity_handler = hs.get_identity_handler() - if self.config.threepid_behaviour_email == ThreepidBehaviour.LOCAL: + if self.config.email.threepid_behaviour_email == ThreepidBehaviour.LOCAL: self.mailer = Mailer( hs=self.hs, - app_name=self.config.email_app_name, - template_html=self.config.email_password_reset_template_html, - template_text=self.config.email_password_reset_template_text, + app_name=self.config.email.email_app_name, + template_html=self.config.email.email_password_reset_template_html, + template_text=self.config.email.email_password_reset_template_text, ) async def on_POST(self, request: SynapseRequest) -> Tuple[int, JsonDict]: - if self.config.threepid_behaviour_email == ThreepidBehaviour.OFF: - if self.config.local_threepid_handling_disabled_due_to_email_config: + if self.config.email.threepid_behaviour_email == ThreepidBehaviour.OFF: + if self.config.email.local_threepid_handling_disabled_due_to_email_config: logger.warning( "User password resets have been disabled due to lack of email config" ) @@ -129,7 +129,7 @@ async def on_POST(self, request: SynapseRequest) -> Tuple[int, JsonDict]: raise SynapseError(400, "Email not found", Codes.THREEPID_NOT_FOUND) - if self.config.threepid_behaviour_email == ThreepidBehaviour.REMOTE: + if self.config.email.threepid_behaviour_email == ThreepidBehaviour.REMOTE: assert self.hs.config.account_threepid_delegate_email # Have the configured identity server handle the request @@ -349,17 +349,17 @@ def __init__(self, hs: "HomeServer"): self.identity_handler = hs.get_identity_handler() self.store = self.hs.get_datastore() - if self.config.threepid_behaviour_email == ThreepidBehaviour.LOCAL: + if self.config.email.threepid_behaviour_email == ThreepidBehaviour.LOCAL: self.mailer = Mailer( hs=self.hs, - app_name=self.config.email_app_name, - template_html=self.config.email_add_threepid_template_html, - template_text=self.config.email_add_threepid_template_text, + app_name=self.config.email.email_app_name, + template_html=self.config.email.email_add_threepid_template_html, + template_text=self.config.email.email_add_threepid_template_text, ) async def on_POST(self, request: SynapseRequest) -> Tuple[int, JsonDict]: - if self.config.threepid_behaviour_email == ThreepidBehaviour.OFF: - if self.config.local_threepid_handling_disabled_due_to_email_config: + if self.config.email.threepid_behaviour_email == ThreepidBehaviour.OFF: + if self.config.email.local_threepid_handling_disabled_due_to_email_config: logger.warning( "Adding emails have been disabled due to lack of an email config" ) @@ -413,7 +413,7 @@ async def on_POST(self, request: SynapseRequest) -> Tuple[int, JsonDict]: raise SynapseError(400, "Email is already in use", Codes.THREEPID_IN_USE) - if self.config.threepid_behaviour_email == ThreepidBehaviour.REMOTE: + if self.config.email.threepid_behaviour_email == ThreepidBehaviour.REMOTE: assert self.hs.config.account_threepid_delegate_email # Have the configured identity server handle the request @@ -534,21 +534,21 @@ def __init__(self, hs: "HomeServer"): self.config = hs.config self.clock = hs.get_clock() self.store = hs.get_datastore() - if self.config.threepid_behaviour_email == ThreepidBehaviour.LOCAL: + if self.config.email.threepid_behaviour_email == ThreepidBehaviour.LOCAL: self._failure_email_template = ( - self.config.email_add_threepid_template_failure_html + self.config.email.email_add_threepid_template_failure_html ) async def on_GET(self, request: Request) -> None: - if self.config.threepid_behaviour_email == ThreepidBehaviour.OFF: - if self.config.local_threepid_handling_disabled_due_to_email_config: + if self.config.email.threepid_behaviour_email == ThreepidBehaviour.OFF: + if self.config.email.local_threepid_handling_disabled_due_to_email_config: logger.warning( "Adding emails have been disabled due to lack of an email config" ) raise SynapseError( 400, "Adding an email to your account is disabled on this server" ) - elif self.config.threepid_behaviour_email == ThreepidBehaviour.REMOTE: + elif self.config.email.threepid_behaviour_email == ThreepidBehaviour.REMOTE: raise SynapseError( 400, "This homeserver is not validating threepids. Use an identity server " @@ -575,7 +575,7 @@ async def on_GET(self, request: Request) -> None: return None # Otherwise show the success template - html = self.config.email_add_threepid_template_success_html_content + html = self.config.email.email_add_threepid_template_success_html_content status_code = 200 except ThreepidValidationError as e: status_code = e.code diff --git a/synapse/rest/client/auth.py b/synapse/rest/client/auth.py index 7bb780147..282861fae 100644 --- a/synapse/rest/client/auth.py +++ b/synapse/rest/client/auth.py @@ -47,7 +47,7 @@ def __init__(self, hs: "HomeServer"): self.auth = hs.get_auth() self.auth_handler = hs.get_auth_handler() self.registration_handler = hs.get_registration_handler() - self.recaptcha_template = hs.config.recaptcha_template + self.recaptcha_template = hs.config.captcha.recaptcha_template self.terms_template = hs.config.terms_template self.registration_token_template = hs.config.registration_token_template self.success_template = hs.config.fallback_success_template @@ -62,7 +62,7 @@ async def on_GET(self, request: SynapseRequest, stagetype: str) -> None: session=session, myurl="%s/r0/auth/%s/fallback/web" % (CLIENT_API_PREFIX, LoginType.RECAPTCHA), - sitekey=self.hs.config.recaptcha_public_key, + sitekey=self.hs.config.captcha.recaptcha_public_key, ) elif stagetype == LoginType.TERMS: html = self.terms_template.render( @@ -70,7 +70,7 @@ async def on_GET(self, request: SynapseRequest, stagetype: str) -> None: terms_url="%s_matrix/consent?v=%s" % ( self.hs.config.server.public_baseurl, - self.hs.config.user_consent_version, + self.hs.config.consent.user_consent_version, ), myurl="%s/r0/auth/%s/fallback/web" % (CLIENT_API_PREFIX, LoginType.TERMS), @@ -118,7 +118,7 @@ async def on_POST(self, request: Request, stagetype: str) -> None: session=session, myurl="%s/r0/auth/%s/fallback/web" % (CLIENT_API_PREFIX, LoginType.RECAPTCHA), - sitekey=self.hs.config.recaptcha_public_key, + sitekey=self.hs.config.captcha.recaptcha_public_key, error=e.msg, ) else: @@ -139,7 +139,7 @@ async def on_POST(self, request: Request, stagetype: str) -> None: terms_url="%s_matrix/consent?v=%s" % ( self.hs.config.server.public_baseurl, - self.hs.config.user_consent_version, + self.hs.config.consent.user_consent_version, ), myurl="%s/r0/auth/%s/fallback/web" % (CLIENT_API_PREFIX, LoginType.TERMS), diff --git a/synapse/rest/client/login.py b/synapse/rest/client/login.py index a6ede7e2f..d766e98dc 100644 --- a/synapse/rest/client/login.py +++ b/synapse/rest/client/login.py @@ -77,7 +77,7 @@ def __init__(self, hs: "HomeServer"): # SSO configuration. self.saml2_enabled = hs.config.saml2_enabled - self.cas_enabled = hs.config.cas_enabled + self.cas_enabled = hs.config.cas.cas_enabled self.oidc_enabled = hs.config.oidc_enabled self._msc2918_enabled = hs.config.access_token_lifetime is not None @@ -559,7 +559,7 @@ def register_servlets(hs: "HomeServer", http_server: HttpServer) -> None: if hs.config.access_token_lifetime is not None: RefreshTokenServlet(hs).register(http_server) SsoRedirectServlet(hs).register(http_server) - if hs.config.cas_enabled: + if hs.config.cas.cas_enabled: CasTicketServlet(hs).register(http_server) diff --git a/synapse/rest/client/password_policy.py b/synapse/rest/client/password_policy.py index 0465fd229..9f1908004 100644 --- a/synapse/rest/client/password_policy.py +++ b/synapse/rest/client/password_policy.py @@ -35,8 +35,8 @@ class PasswordPolicyServlet(RestServlet): def __init__(self, hs: "HomeServer"): super().__init__() - self.policy = hs.config.password_policy - self.enabled = hs.config.password_policy_enabled + self.policy = hs.config.auth.password_policy + self.enabled = hs.config.auth.password_policy_enabled def on_GET(self, request: Request) -> Tuple[int, JsonDict]: if not self.enabled or not self.policy: diff --git a/synapse/rest/client/register.py b/synapse/rest/client/register.py index abe4d7e20..48b0062cf 100644 --- a/synapse/rest/client/register.py +++ b/synapse/rest/client/register.py @@ -75,17 +75,19 @@ def __init__(self, hs: "HomeServer"): self.identity_handler = hs.get_identity_handler() self.config = hs.config - if self.hs.config.threepid_behaviour_email == ThreepidBehaviour.LOCAL: + if self.hs.config.email.threepid_behaviour_email == ThreepidBehaviour.LOCAL: self.mailer = Mailer( hs=self.hs, - app_name=self.config.email_app_name, - template_html=self.config.email_registration_template_html, - template_text=self.config.email_registration_template_text, + app_name=self.config.email.email_app_name, + template_html=self.config.email.email_registration_template_html, + template_text=self.config.email.email_registration_template_text, ) async def on_POST(self, request: SynapseRequest) -> Tuple[int, JsonDict]: - if self.hs.config.threepid_behaviour_email == ThreepidBehaviour.OFF: - if self.hs.config.local_threepid_handling_disabled_due_to_email_config: + if self.hs.config.email.threepid_behaviour_email == ThreepidBehaviour.OFF: + if ( + self.hs.config.email.local_threepid_handling_disabled_due_to_email_config + ): logger.warning( "Email registration has been disabled due to lack of email config" ) @@ -137,7 +139,7 @@ async def on_POST(self, request: SynapseRequest) -> Tuple[int, JsonDict]: raise SynapseError(400, "Email is already in use", Codes.THREEPID_IN_USE) - if self.config.threepid_behaviour_email == ThreepidBehaviour.REMOTE: + if self.config.email.threepid_behaviour_email == ThreepidBehaviour.REMOTE: assert self.hs.config.account_threepid_delegate_email # Have the configured identity server handle the request @@ -259,9 +261,9 @@ def __init__(self, hs: "HomeServer"): self.clock = hs.get_clock() self.store = hs.get_datastore() - if self.config.threepid_behaviour_email == ThreepidBehaviour.LOCAL: + if self.config.email.threepid_behaviour_email == ThreepidBehaviour.LOCAL: self._failure_email_template = ( - self.config.email_registration_template_failure_html + self.config.email.email_registration_template_failure_html ) async def on_GET(self, request: Request, medium: str) -> None: @@ -269,8 +271,8 @@ async def on_GET(self, request: Request, medium: str) -> None: raise SynapseError( 400, "This medium is currently not supported for registration" ) - if self.config.threepid_behaviour_email == ThreepidBehaviour.OFF: - if self.config.local_threepid_handling_disabled_due_to_email_config: + if self.config.email.threepid_behaviour_email == ThreepidBehaviour.OFF: + if self.config.email.local_threepid_handling_disabled_due_to_email_config: logger.warning( "User registration via email has been disabled due to lack of email config" ) @@ -303,7 +305,7 @@ async def on_GET(self, request: Request, medium: str) -> None: return None # Otherwise show the success template - html = self.config.email_registration_template_success_html_content + html = self.config.email.email_registration_template_success_html_content status_code = 200 except ThreepidValidationError as e: status_code = e.code @@ -897,12 +899,12 @@ def _calculate_registration_flows( flows.append([LoginType.MSISDN, LoginType.EMAIL_IDENTITY]) # Prepend m.login.terms to all flows if we're requiring consent - if config.user_consent_at_registration: + if config.consent.user_consent_at_registration: for flow in flows: flow.insert(0, LoginType.TERMS) # Prepend recaptcha to all flows if we're requiring captcha - if config.enable_registration_captcha: + if config.captcha.enable_registration_captcha: for flow in flows: flow.insert(0, LoginType.RECAPTCHA) diff --git a/synapse/rest/consent/consent_resource.py b/synapse/rest/consent/consent_resource.py index 06e0fbde2..fc634a492 100644 --- a/synapse/rest/consent/consent_resource.py +++ b/synapse/rest/consent/consent_resource.py @@ -84,14 +84,15 @@ def __init__(self, hs: "HomeServer"): # this is required by the request_handler wrapper self.clock = hs.get_clock() - self._default_consent_version = hs.config.user_consent_version - if self._default_consent_version is None: + # Consent must be configured to create this resource. + default_consent_version = hs.config.consent.user_consent_version + consent_template_directory = hs.config.consent.user_consent_template_dir + if default_consent_version is None or consent_template_directory is None: raise ConfigError( "Consent resource is enabled but user_consent section is " "missing in config file." ) - - consent_template_directory = hs.config.user_consent_template_dir + self._default_consent_version = default_consent_version # TODO: switch to synapse.util.templates.build_jinja_env loader = jinja2.FileSystemLoader(consent_template_directory) diff --git a/synapse/rest/synapse/client/password_reset.py b/synapse/rest/synapse/client/password_reset.py index f2800bf2d..28a67f04e 100644 --- a/synapse/rest/synapse/client/password_reset.py +++ b/synapse/rest/synapse/client/password_reset.py @@ -47,20 +47,20 @@ def __init__(self, hs: "HomeServer"): self.store = hs.get_datastore() self._local_threepid_handling_disabled_due_to_email_config = ( - hs.config.local_threepid_handling_disabled_due_to_email_config + hs.config.email.local_threepid_handling_disabled_due_to_email_config ) self._confirmation_email_template = ( - hs.config.email_password_reset_template_confirmation_html + hs.config.email.email_password_reset_template_confirmation_html ) self._email_password_reset_template_success_html = ( - hs.config.email_password_reset_template_success_html_content + hs.config.email.email_password_reset_template_success_html_content ) self._failure_email_template = ( - hs.config.email_password_reset_template_failure_html + hs.config.email.email_password_reset_template_failure_html ) # This resource should not be mounted if threepid behaviour is not LOCAL - assert hs.config.threepid_behaviour_email == ThreepidBehaviour.LOCAL + assert hs.config.email.threepid_behaviour_email == ThreepidBehaviour.LOCAL async def _async_render_GET(self, request: Request) -> Tuple[int, bytes]: sid = parse_string(request, "sid", required=True) diff --git a/synapse/server_notices/consent_server_notices.py b/synapse/server_notices/consent_server_notices.py index 4e0f81403..e09a25591 100644 --- a/synapse/server_notices/consent_server_notices.py +++ b/synapse/server_notices/consent_server_notices.py @@ -36,9 +36,11 @@ def __init__(self, hs: "HomeServer"): self._users_in_progress: Set[str] = set() - self._current_consent_version = hs.config.user_consent_version - self._server_notice_content = hs.config.user_consent_server_notice_content - self._send_to_guests = hs.config.user_consent_server_notice_to_guests + self._current_consent_version = hs.config.consent.user_consent_version + self._server_notice_content = ( + hs.config.consent.user_consent_server_notice_content + ) + self._send_to_guests = hs.config.consent.user_consent_server_notice_to_guests if self._server_notice_content is not None: if not self._server_notices_manager.is_enabled(): @@ -63,6 +65,9 @@ async def maybe_send_server_notice_to_user(self, user_id: str) -> None: # not enabled return + # A consent version must be given. + assert self._current_consent_version is not None + # make sure we don't send two messages to the same user at once if user_id in self._users_in_progress: return diff --git a/synapse/storage/databases/main/appservice.py b/synapse/storage/databases/main/appservice.py index e2d1b758b..2da2659f4 100644 --- a/synapse/storage/databases/main/appservice.py +++ b/synapse/storage/databases/main/appservice.py @@ -60,7 +60,7 @@ def _make_exclusive_regex( class ApplicationServiceWorkerStore(SQLBaseStore): def __init__(self, database: DatabasePool, db_conn: Connection, hs: "HomeServer"): self.services_cache = load_appservices( - hs.hostname, hs.config.app_service_config_files + hs.hostname, hs.config.appservice.app_service_config_files ) self.exclusive_user_regex = _make_exclusive_regex(self.services_cache) diff --git a/synapse/storage/databases/main/monthly_active_users.py b/synapse/storage/databases/main/monthly_active_users.py index d213b2670..b76ee51a9 100644 --- a/synapse/storage/databases/main/monthly_active_users.py +++ b/synapse/storage/databases/main/monthly_active_users.py @@ -63,7 +63,7 @@ async def get_monthly_active_count_by_service(self) -> Dict[str, int]: """Generates current count of monthly active users broken down by service. A service is typically an appservice but also includes native matrix users. Since the `monthly_active_users` table is populated from the `user_ips` table - `config.track_appservice_user_ips` must be set to `true` for this + `config.appservice.track_appservice_user_ips` must be set to `true` for this method to return anything other than native matrix users. Returns: diff --git a/synapse/storage/databases/main/registration.py b/synapse/storage/databases/main/registration.py index fafadb88f..52ef9deed 100644 --- a/synapse/storage/databases/main/registration.py +++ b/synapse/storage/databases/main/registration.py @@ -388,7 +388,7 @@ def select_users_txn(txn, now_ms, renew_at): "get_users_expiring_soon", select_users_txn, self._clock.time_msec(), - self.config.account_validity_renew_at, + self.config.account_validity.account_validity_renew_at, ) async def set_renewal_mail_status(self, user_id: str, email_sent: bool) -> None: diff --git a/synapse/storage/prepare_database.py b/synapse/storage/prepare_database.py index d4754c904..f31880b8e 100644 --- a/synapse/storage/prepare_database.py +++ b/synapse/storage/prepare_database.py @@ -545,7 +545,7 @@ def _apply_module_schemas( database_engine: config: application config """ - for (mod, _config) in config.password_providers: + for (mod, _config) in config.authproviders.password_providers: if not hasattr(mod, "get_db_schema_files"): continue modname = ".".join((mod.__module__, mod.__name__)) diff --git a/synapse/storage/schema/main/delta/30/as_users.py b/synapse/storage/schema/main/delta/30/as_users.py index 8a1f34008..22a7901e1 100644 --- a/synapse/storage/schema/main/delta/30/as_users.py +++ b/synapse/storage/schema/main/delta/30/as_users.py @@ -33,7 +33,7 @@ def run_upgrade(cur, database_engine, config, *args, **kwargs): config_files = [] try: - config_files = config.app_service_config_files + config_files = config.appservice.app_service_config_files except AttributeError: logger.warning("Could not get app_service_config_files from config") pass diff --git a/tests/rest/admin/test_room.py b/tests/rest/admin/test_room.py index e798513ac..0fa55e03b 100644 --- a/tests/rest/admin/test_room.py +++ b/tests/rest/admin/test_room.py @@ -47,7 +47,7 @@ class DeleteRoomTestCase(unittest.HomeserverTestCase): def prepare(self, reactor, clock, hs): self.event_creation_handler = hs.get_event_creation_handler() - hs.config.user_consent_version = "1" + hs.config.consent.user_consent_version = "1" consent_uri_builder = Mock() consent_uri_builder.build_user_consent_uri.return_value = "http://example.com" diff --git a/tests/rest/client/test_login.py b/tests/rest/client/test_login.py index f5c195a07..414c8781a 100644 --- a/tests/rest/client/test_login.py +++ b/tests/rest/client/test_login.py @@ -97,7 +97,7 @@ def make_homeserver(self, reactor, clock): self.hs.config.enable_registration = True self.hs.config.registrations_require_3pid = [] self.hs.config.auto_join_rooms = [] - self.hs.config.enable_registration_captcha = False + self.hs.config.captcha.enable_registration_captcha = False return self.hs diff --git a/tests/storage/test_appservice.py b/tests/storage/test_appservice.py index 666bffe25..ebadf4794 100644 --- a/tests/storage/test_appservice.py +++ b/tests/storage/test_appservice.py @@ -41,9 +41,8 @@ def setUp(self): self.addCleanup, federation_sender=Mock(), federation_client=Mock() ) - hs.config.app_service_config_files = self.as_yaml_files + hs.config.appservice.app_service_config_files = self.as_yaml_files hs.config.caches.event_cache_size = 1 - hs.config.password_providers = [] self.as_token = "token1" self.as_url = "some_url" @@ -108,9 +107,8 @@ def setUp(self): self.addCleanup, federation_sender=Mock(), federation_client=Mock() ) - hs.config.app_service_config_files = self.as_yaml_files + hs.config.appservice.app_service_config_files = self.as_yaml_files hs.config.caches.event_cache_size = 1 - hs.config.password_providers = [] self.as_list = [ {"token": "token1", "url": "https://matrix-as.org", "id": "id_1"}, @@ -496,9 +494,8 @@ def test_unique_works(self): self.addCleanup, federation_sender=Mock(), federation_client=Mock() ) - hs.config.app_service_config_files = [f1, f2] + hs.config.appservice.app_service_config_files = [f1, f2] hs.config.caches.event_cache_size = 1 - hs.config.password_providers = [] database = hs.get_datastores().databases[0] ApplicationServiceStore( @@ -514,7 +511,7 @@ def test_duplicate_ids(self): self.addCleanup, federation_sender=Mock(), federation_client=Mock() ) - hs.config.app_service_config_files = [f1, f2] + hs.config.appservice.app_service_config_files = [f1, f2] hs.config.caches.event_cache_size = 1 hs.config.password_providers = [] @@ -540,9 +537,8 @@ def test_duplicate_as_tokens(self): self.addCleanup, federation_sender=Mock(), federation_client=Mock() ) - hs.config.app_service_config_files = [f1, f2] + hs.config.appservice.app_service_config_files = [f1, f2] hs.config.caches.event_cache_size = 1 - hs.config.password_providers = [] with self.assertRaises(ConfigError) as cm: database = hs.get_datastores().databases[0] diff --git a/tests/storage/test_cleanup_extrems.py b/tests/storage/test_cleanup_extrems.py index da98733ce..7cc5e621b 100644 --- a/tests/storage/test_cleanup_extrems.py +++ b/tests/storage/test_cleanup_extrems.py @@ -258,7 +258,7 @@ def prepare(self, reactor, clock, homeserver): info, _ = self.get_success(self.room_creator.create_room(self.requester, {})) self.room_id = info["room_id"] self.event_creator = homeserver.get_event_creation_handler() - homeserver.config.user_consent_version = self.CONSENT_VERSION + homeserver.config.consent.user_consent_version = self.CONSENT_VERSION def test_send_dummy_event(self): self._create_extremity_rich_graph() From dcfd8649704bd0a05bfbffdd96d60fc2b1913a2f Mon Sep 17 00:00:00 2001 From: David Robertson <davidr@element.io> Date: Thu, 23 Sep 2021 13:02:13 +0100 Subject: [PATCH 38/74] Fix reactivated users not being added to the user directory (#10782) Co-authored-by: Dirk Klimpel <5740567+dklimpel@users.noreply.github.com> Co-authored-by: reivilibre <olivier@librepush.net> Co-authored-by: Richard van der Hoff <1389908+richvdh@users.noreply.github.com> --- changelog.d/10782.bugfix | 1 + synapse/handlers/deactivate_account.py | 9 ++++-- tests/handlers/test_user_directory.py | 42 +++++++++++++++++++++++++- 3 files changed, 48 insertions(+), 4 deletions(-) create mode 100644 changelog.d/10782.bugfix diff --git a/changelog.d/10782.bugfix b/changelog.d/10782.bugfix new file mode 100644 index 000000000..3e410447c --- /dev/null +++ b/changelog.d/10782.bugfix @@ -0,0 +1 @@ +Fix a long-standing bug which caused deactivated users that were later reactivated to be missing from the user directory. \ No newline at end of file diff --git a/synapse/handlers/deactivate_account.py b/synapse/handlers/deactivate_account.py index a03ff9842..9ae5b7750 100644 --- a/synapse/handlers/deactivate_account.py +++ b/synapse/handlers/deactivate_account.py @@ -255,13 +255,16 @@ async def activate_account(self, user_id: str) -> None: Args: user_id: ID of user to be re-activated """ - # Add the user to the directory, if necessary. user = UserID.from_string(user_id) - profile = await self.store.get_profileinfo(user.localpart) - await self.user_directory_handler.handle_local_profile_change(user_id, profile) # Ensure the user is not marked as erased. await self.store.mark_user_not_erased(user_id) # Mark the user as active. await self.store.set_user_deactivated_status(user_id, False) + + # Add the user to the directory, if necessary. Note that + # this must be done after the user is re-activated, because + # deactivated users are excluded from the user directory. + profile = await self.store.get_profileinfo(user.localpart) + await self.user_directory_handler.handle_local_profile_change(user_id, profile) diff --git a/tests/handlers/test_user_directory.py b/tests/handlers/test_user_directory.py index ae88ed89a..f3684c34a 100644 --- a/tests/handlers/test_user_directory.py +++ b/tests/handlers/test_user_directory.py @@ -12,6 +12,7 @@ # See the License for the specific language governing permissions and # limitations under the License. from unittest.mock import Mock +from urllib.parse import quote from twisted.internet import defer @@ -20,6 +21,7 @@ from synapse.api.room_versions import RoomVersion, RoomVersions from synapse.rest.client import login, room, user_directory from synapse.storage.roommember import ProfileInfo +from synapse.types import create_requester from tests import unittest from tests.unittest import override_config @@ -32,7 +34,7 @@ class UserDirectoryTestCase(unittest.HomeserverTestCase): servlets = [ login.register_servlets, - synapse.rest.admin.register_servlets_for_client_rest_resource, + synapse.rest.admin.register_servlets, room.register_servlets, ] @@ -130,6 +132,44 @@ def test_handle_user_deactivated_regular_user(self): self.get_success(self.handler.handle_local_user_deactivated(r_user_id)) self.store.remove_from_user_dir.called_once_with(r_user_id) + def test_reactivation_makes_regular_user_searchable(self): + user = self.register_user("regular", "pass") + user_token = self.login(user, "pass") + admin_user = self.register_user("admin", "pass", admin=True) + admin_token = self.login(admin_user, "pass") + + # Ensure the regular user is publicly visible and searchable. + self.helper.create_room_as(user, is_public=True, tok=user_token) + s = self.get_success(self.handler.search_users(admin_user, user, 10)) + self.assertEqual(len(s["results"]), 1) + self.assertEqual(s["results"][0]["user_id"], user) + + # Deactivate the user and check they're not searchable. + deactivate_handler = self.hs.get_deactivate_account_handler() + self.get_success( + deactivate_handler.deactivate_account( + user, erase_data=False, requester=create_requester(admin_user) + ) + ) + s = self.get_success(self.handler.search_users(admin_user, user, 10)) + self.assertEqual(s["results"], []) + + # Reactivate the user + channel = self.make_request( + "PUT", + f"/_synapse/admin/v2/users/{quote(user)}", + access_token=admin_token, + content={"deactivated": False, "password": "pass"}, + ) + self.assertEqual(channel.code, 200) + user_token = self.login(user, "pass") + self.helper.create_room_as(user, is_public=True, tok=user_token) + + # Check they're searchable. + s = self.get_success(self.handler.search_users(admin_user, user, 10)) + self.assertEqual(len(s["results"]), 1) + self.assertEqual(s["results"][0]["user_id"], user) + def test_private_room(self): """ A user can be searched for only by people that are either in a public From a10988983a1cd145fc5ae57c9a00ea95fbaece61 Mon Sep 17 00:00:00 2001 From: David Robertson <davidr@element.io> Date: Thu, 23 Sep 2021 14:45:32 +0100 Subject: [PATCH 39/74] Break down cache expiry reasons in grafana (#10880) A follow-up to #10829 --- changelog.d/10880.misc | 1 + contrib/grafana/synapse.json | 4 ++-- 2 files changed, 3 insertions(+), 2 deletions(-) create mode 100644 changelog.d/10880.misc diff --git a/changelog.d/10880.misc b/changelog.d/10880.misc new file mode 100644 index 000000000..5f58d6198 --- /dev/null +++ b/changelog.d/10880.misc @@ -0,0 +1 @@ +Break down Grafana's cache expiry time series based on reason for eviction---see #10829. \ No newline at end of file diff --git a/contrib/grafana/synapse.json b/contrib/grafana/synapse.json index ed1e8ba7f..2c839c30d 100644 --- a/contrib/grafana/synapse.json +++ b/contrib/grafana/synapse.json @@ -6785,7 +6785,7 @@ "expr": "rate(synapse_util_caches_cache:evicted_size{instance=\"$instance\",job=~\"$job\",index=~\"$index\"}[$bucket_size])", "format": "time_series", "intervalFactor": 1, - "legendFormat": "{{name}} {{job}}-{{index}}", + "legendFormat": "{{name}} ({{reason}}) {{job}}-{{index}}", "refId": "A" } ], @@ -10888,5 +10888,5 @@ "timezone": "", "title": "Synapse", "uid": "000000012", - "version": 99 + "version": 100 } \ No newline at end of file From 47854c71e9bded2c446a251f3ef16f4d5da96ebe Mon Sep 17 00:00:00 2001 From: Patrick Cloke <clokep@users.noreply.github.com> Date: Thu, 23 Sep 2021 12:03:01 -0400 Subject: [PATCH 40/74] Use direct references for configuration variables (part 4). (#10893) --- changelog.d/10893.misc | 1 + synapse/api/urls.py | 4 ++-- synapse/app/_base.py | 6 ++++-- synapse/app/admin_cmd.py | 2 +- synapse/app/generic_worker.py | 4 ++-- synapse/app/homeserver.py | 10 +++++----- synapse/app/phone_stats_home.py | 8 +++++--- synapse/config/logger.py | 2 +- synapse/federation/transport/server/_base.py | 4 +++- synapse/groups/groups_server.py | 6 +++--- synapse/handlers/auth.py | 2 +- synapse/handlers/oidc.py | 2 +- synapse/handlers/profile.py | 2 +- synapse/http/matrixfederationclient.py | 5 +++-- synapse/push/httppusher.py | 4 +++- synapse/rest/client/login.py | 12 ++++++------ synapse/rest/consent/consent_resource.py | 4 ++-- synapse/rest/key/v2/local_key_resource.py | 10 +++++----- synapse/rest/key/v2/remote_key_resource.py | 6 ++++-- synapse/rest/media/v1/media_repository.py | 4 +++- synapse/rest/synapse/client/__init__.py | 2 +- synapse/storage/databases/main/roommember.py | 2 +- tests/api/test_auth.py | 4 ++-- tests/app/test_phone_stats_home.py | 2 +- tests/config/test_load.py | 10 +++++----- tests/config/test_ratelimiting.py | 2 +- tests/handlers/test_auth.py | 2 +- tests/replication/_base.py | 2 +- tests/rest/client/test_login.py | 12 ++++++------ tests/rest/client/test_register.py | 2 +- tests/storage/test_appservice.py | 1 - tests/util/test_ratelimitutils.py | 2 +- 32 files changed, 77 insertions(+), 64 deletions(-) create mode 100644 changelog.d/10893.misc diff --git a/changelog.d/10893.misc b/changelog.d/10893.misc new file mode 100644 index 000000000..586a0b3a9 --- /dev/null +++ b/changelog.d/10893.misc @@ -0,0 +1 @@ +Use direct references to config flags. diff --git a/synapse/api/urls.py b/synapse/api/urls.py index d3270cd6d..032c69b21 100644 --- a/synapse/api/urls.py +++ b/synapse/api/urls.py @@ -39,12 +39,12 @@ def __init__(self, hs_config): Args: hs_config (synapse.config.homeserver.HomeServerConfig): """ - if hs_config.form_secret is None: + if hs_config.key.form_secret is None: raise ConfigError("form_secret not set in config") if hs_config.server.public_baseurl is None: raise ConfigError("public_baseurl not set in config") - self._hmac_secret = hs_config.form_secret.encode("utf-8") + self._hmac_secret = hs_config.key.form_secret.encode("utf-8") self._public_baseurl = hs_config.server.public_baseurl def build_user_consent_uri(self, user_id): diff --git a/synapse/app/_base.py b/synapse/app/_base.py index d1aa2e7fb..f657f11f7 100644 --- a/synapse/app/_base.py +++ b/synapse/app/_base.py @@ -424,12 +424,14 @@ def setup_sentry(hs): hs (synapse.server.HomeServer) """ - if not hs.config.sentry_enabled: + if not hs.config.metrics.sentry_enabled: return import sentry_sdk - sentry_sdk.init(dsn=hs.config.sentry_dsn, release=get_version_string(synapse)) + sentry_sdk.init( + dsn=hs.config.metrics.sentry_dsn, release=get_version_string(synapse) + ) # We set some default tags that give some context to this instance with sentry_sdk.configure_scope() as scope: diff --git a/synapse/app/admin_cmd.py b/synapse/app/admin_cmd.py index 5e956b1e2..259d5ec7c 100644 --- a/synapse/app/admin_cmd.py +++ b/synapse/app/admin_cmd.py @@ -192,7 +192,7 @@ def start(config_options): ): # Since we're meant to be run as a "command" let's not redirect stdio # unless we've actually set log config. - config.no_redirect_stdio = True + config.logging.no_redirect_stdio = True # Explicitly disable background processes config.update_user_directory = False diff --git a/synapse/app/generic_worker.py b/synapse/app/generic_worker.py index 33afd59c7..e0776689c 100644 --- a/synapse/app/generic_worker.py +++ b/synapse/app/generic_worker.py @@ -395,7 +395,7 @@ def start_listening(self): manhole_globals={"hs": self}, ) elif listener.type == "metrics": - if not self.config.enable_metrics: + if not self.config.metrics.enable_metrics: logger.warning( "Metrics listener configured, but " "enable_metrics is not True!" @@ -488,7 +488,7 @@ def start(config_options): register_start(_base.start, hs) # redirect stdio to the logs, if configured. - if not hs.config.no_redirect_stdio: + if not hs.config.logging.no_redirect_stdio: redirect_stdio_to_logs() _base.start_worker_reactor("synapse-generic-worker", config) diff --git a/synapse/app/homeserver.py b/synapse/app/homeserver.py index 886e291e4..f1769f146 100644 --- a/synapse/app/homeserver.py +++ b/synapse/app/homeserver.py @@ -269,7 +269,7 @@ def _configure_named_resource(self, name, compress=False): # https://twistedmatrix.com/trac/ticket/7678 resources[WEB_CLIENT_PREFIX] = File(webclient_loc) - if name == "metrics" and self.config.enable_metrics: + if name == "metrics" and self.config.metrics.enable_metrics: resources[METRICS_PREFIX] = MetricsResource(RegistryProxy) if name == "replication": @@ -278,7 +278,7 @@ def _configure_named_resource(self, name, compress=False): return resources def start_listening(self): - if self.config.redis_enabled: + if self.config.redis.redis_enabled: # If redis is enabled we connect via the replication command handler # in the same way as the workers (since we're effectively a client # rather than a server). @@ -305,7 +305,7 @@ def start_listening(self): for s in services: reactor.addSystemEventTrigger("before", "shutdown", s.stopListening) elif listener.type == "metrics": - if not self.config.enable_metrics: + if not self.config.metrics.enable_metrics: logger.warning( "Metrics listener configured, but " "enable_metrics is not True!" @@ -366,7 +366,7 @@ def setup(config_options): async def start(): # Load the OIDC provider metadatas, if OIDC is enabled. - if hs.config.oidc_enabled: + if hs.config.oidc.oidc_enabled: oidc = hs.get_oidc_handler() # Loading the provider metadata also ensures the provider config is valid. await oidc.load_metadata() @@ -455,7 +455,7 @@ def main(): hs = setup(sys.argv[1:]) # redirect stdio to the logs, if configured. - if not hs.config.no_redirect_stdio: + if not hs.config.logging.no_redirect_stdio: redirect_stdio_to_logs() run(hs) diff --git a/synapse/app/phone_stats_home.py b/synapse/app/phone_stats_home.py index 4a95da90f..49e7a45e5 100644 --- a/synapse/app/phone_stats_home.py +++ b/synapse/app/phone_stats_home.py @@ -131,10 +131,12 @@ async def phone_stats_home(hs, stats, stats_process=_stats_process): log_level = synapse_logger.getEffectiveLevel() stats["log_level"] = logging.getLevelName(log_level) - logger.info("Reporting stats to %s: %s" % (hs.config.report_stats_endpoint, stats)) + logger.info( + "Reporting stats to %s: %s" % (hs.config.metrics.report_stats_endpoint, stats) + ) try: await hs.get_proxied_http_client().put_json( - hs.config.report_stats_endpoint, stats + hs.config.metrics.report_stats_endpoint, stats ) except Exception as e: logger.warning("Error reporting stats: %s", e) @@ -188,7 +190,7 @@ async def generate_monthly_active_users(): clock.looping_call(generate_monthly_active_users, 5 * 60 * 1000) # End of monthly active user settings - if hs.config.report_stats: + if hs.config.metrics.report_stats: logger.info("Scheduling stats reporting for 3 hour intervals") clock.looping_call(phone_stats_home, 3 * 60 * 60 * 1000, hs, stats) diff --git a/synapse/config/logger.py b/synapse/config/logger.py index aca9d467e..bf8ca7d5f 100644 --- a/synapse/config/logger.py +++ b/synapse/config/logger.py @@ -322,7 +322,7 @@ def setup_logging( """ log_config_path = ( - config.worker_log_config if use_worker_options else config.log_config + config.worker_log_config if use_worker_options else config.logging.log_config ) # Perform one-time logging configuration. diff --git a/synapse/federation/transport/server/_base.py b/synapse/federation/transport/server/_base.py index 624c859f1..cef65929c 100644 --- a/synapse/federation/transport/server/_base.py +++ b/synapse/federation/transport/server/_base.py @@ -49,7 +49,9 @@ def __init__(self, hs: HomeServer): self.keyring = hs.get_keyring() self.server_name = hs.hostname self.store = hs.get_datastore() - self.federation_domain_whitelist = hs.config.federation_domain_whitelist + self.federation_domain_whitelist = ( + hs.config.federation.federation_domain_whitelist + ) self.notifier = hs.get_notifier() self.replication_client = None diff --git a/synapse/groups/groups_server.py b/synapse/groups/groups_server.py index d6b75ac27..449bbc700 100644 --- a/synapse/groups/groups_server.py +++ b/synapse/groups/groups_server.py @@ -847,16 +847,16 @@ async def create_group( UserID.from_string(requester_user_id) ) if not is_admin: - if not self.hs.config.enable_group_creation: + if not self.hs.config.groups.enable_group_creation: raise SynapseError( 403, "Only a server admin can create groups on this server" ) localpart = group_id_obj.localpart - if not localpart.startswith(self.hs.config.group_creation_prefix): + if not localpart.startswith(self.hs.config.groups.group_creation_prefix): raise SynapseError( 400, "Can only create groups with prefix %r on this server" - % (self.hs.config.group_creation_prefix,), + % (self.hs.config.groups.group_creation_prefix,), ) profile = content.get("profile", {}) diff --git a/synapse/handlers/auth.py b/synapse/handlers/auth.py index b747f80bc..0f80dfdc4 100644 --- a/synapse/handlers/auth.py +++ b/synapse/handlers/auth.py @@ -1802,7 +1802,7 @@ def _generate_base_macaroon(self, user_id: str) -> pymacaroons.Macaroon: macaroon = pymacaroons.Macaroon( location=self.hs.config.server.server_name, identifier="key", - key=self.hs.config.macaroon_secret_key, + key=self.hs.config.key.macaroon_secret_key, ) macaroon.add_first_party_caveat("gen = 1") macaroon.add_first_party_caveat("user_id = %s" % (user_id,)) diff --git a/synapse/handlers/oidc.py b/synapse/handlers/oidc.py index aed5a40a7..3665d9151 100644 --- a/synapse/handlers/oidc.py +++ b/synapse/handlers/oidc.py @@ -277,7 +277,7 @@ def __init__( self._token_generator = token_generator self._config = provider - self._callback_url: str = hs.config.oidc_callback_url + self._callback_url: str = hs.config.oidc.oidc_callback_url # Calculate the prefix for OIDC callback paths based on the public_baseurl. # We'll insert this into the Path= parameter of any session cookies we set. diff --git a/synapse/handlers/profile.py b/synapse/handlers/profile.py index f06070bfc..b23a1541b 100644 --- a/synapse/handlers/profile.py +++ b/synapse/handlers/profile.py @@ -309,7 +309,7 @@ async def set_avatar_url( async def on_profile_query(self, args: JsonDict) -> JsonDict: """Handles federation profile query requests.""" - if not self.hs.config.allow_profile_lookup_over_federation: + if not self.hs.config.federation.allow_profile_lookup_over_federation: raise SynapseError( 403, "Profile lookup over federation is disabled on this homeserver", diff --git a/synapse/http/matrixfederationclient.py b/synapse/http/matrixfederationclient.py index e56fa477b..cdc36b8d2 100644 --- a/synapse/http/matrixfederationclient.py +++ b/synapse/http/matrixfederationclient.py @@ -465,8 +465,9 @@ async def _send_request( _sec_timeout = self.default_timeout if ( - self.hs.config.federation_domain_whitelist is not None - and request.destination not in self.hs.config.federation_domain_whitelist + self.hs.config.federation.federation_domain_whitelist is not None + and request.destination + not in self.hs.config.federation.federation_domain_whitelist ): raise FederationDeniedError(request.destination) diff --git a/synapse/push/httppusher.py b/synapse/push/httppusher.py index 065948f98..eac65572b 100644 --- a/synapse/push/httppusher.py +++ b/synapse/push/httppusher.py @@ -73,7 +73,9 @@ def __init__(self, hs: "HomeServer", pusher_config: PusherConfig): self.failing_since = pusher_config.failing_since self.timed_call: Optional[IDelayedCall] = None self._is_processing = False - self._group_unread_count_by_room = hs.config.push_group_unread_count_by_room + self._group_unread_count_by_room = ( + hs.config.push.push_group_unread_count_by_room + ) self._pusherpool = hs.get_pusherpool() self.data = pusher_config.data diff --git a/synapse/rest/client/login.py b/synapse/rest/client/login.py index d766e98dc..64446fc48 100644 --- a/synapse/rest/client/login.py +++ b/synapse/rest/client/login.py @@ -69,16 +69,16 @@ def __init__(self, hs: "HomeServer"): self.hs = hs # JWT configuration variables. - self.jwt_enabled = hs.config.jwt_enabled - self.jwt_secret = hs.config.jwt_secret - self.jwt_algorithm = hs.config.jwt_algorithm - self.jwt_issuer = hs.config.jwt_issuer - self.jwt_audiences = hs.config.jwt_audiences + self.jwt_enabled = hs.config.jwt.jwt_enabled + self.jwt_secret = hs.config.jwt.jwt_secret + self.jwt_algorithm = hs.config.jwt.jwt_algorithm + self.jwt_issuer = hs.config.jwt.jwt_issuer + self.jwt_audiences = hs.config.jwt.jwt_audiences # SSO configuration. self.saml2_enabled = hs.config.saml2_enabled self.cas_enabled = hs.config.cas.cas_enabled - self.oidc_enabled = hs.config.oidc_enabled + self.oidc_enabled = hs.config.oidc.oidc_enabled self._msc2918_enabled = hs.config.access_token_lifetime is not None self.auth = hs.get_auth() diff --git a/synapse/rest/consent/consent_resource.py b/synapse/rest/consent/consent_resource.py index fc634a492..3d2afacc5 100644 --- a/synapse/rest/consent/consent_resource.py +++ b/synapse/rest/consent/consent_resource.py @@ -100,13 +100,13 @@ def __init__(self, hs: "HomeServer"): loader=loader, autoescape=jinja2.select_autoescape(["html", "htm", "xml"]) ) - if hs.config.form_secret is None: + if hs.config.key.form_secret is None: raise ConfigError( "Consent resource is enabled but form_secret is not set in " "config file. It should be set to an arbitrary secret string." ) - self._hmac_secret = hs.config.form_secret.encode("utf-8") + self._hmac_secret = hs.config.key.form_secret.encode("utf-8") async def _async_render_GET(self, request: Request) -> None: version = parse_string(request, "v", default=self._default_consent_version) diff --git a/synapse/rest/key/v2/local_key_resource.py b/synapse/rest/key/v2/local_key_resource.py index ebe243bcf..12b3ae120 100644 --- a/synapse/rest/key/v2/local_key_resource.py +++ b/synapse/rest/key/v2/local_key_resource.py @@ -70,19 +70,19 @@ def __init__(self, hs: "HomeServer"): Resource.__init__(self) def update_response_body(self, time_now_msec: int) -> None: - refresh_interval = self.config.key_refresh_interval + refresh_interval = self.config.key.key_refresh_interval self.valid_until_ts = int(time_now_msec + refresh_interval) self.response_body = encode_canonical_json(self.response_json_object()) def response_json_object(self) -> JsonDict: verify_keys = {} - for key in self.config.signing_key: + for key in self.config.key.signing_key: verify_key_bytes = key.verify_key.encode() key_id = "%s:%s" % (key.alg, key.version) verify_keys[key_id] = {"key": encode_base64(verify_key_bytes)} old_verify_keys = {} - for key_id, key in self.config.old_signing_keys.items(): + for key_id, key in self.config.key.old_signing_keys.items(): verify_key_bytes = key.encode() old_verify_keys[key_id] = { "key": encode_base64(verify_key_bytes), @@ -95,13 +95,13 @@ def response_json_object(self) -> JsonDict: "verify_keys": verify_keys, "old_verify_keys": old_verify_keys, } - for key in self.config.signing_key: + for key in self.config.key.signing_key: json_object = sign_json(json_object, self.config.server.server_name, key) return json_object def render_GET(self, request: Request) -> int: time_now = self.clock.time_msec() # Update the expiry time if less than half the interval remains. - if time_now + self.config.key_refresh_interval / 2 > self.valid_until_ts: + if time_now + self.config.key.key_refresh_interval / 2 > self.valid_until_ts: self.update_response_body(time_now) return respond_with_json_bytes(request, 200, self.response_body) diff --git a/synapse/rest/key/v2/remote_key_resource.py b/synapse/rest/key/v2/remote_key_resource.py index d8fd7938a..c111a9d20 100644 --- a/synapse/rest/key/v2/remote_key_resource.py +++ b/synapse/rest/key/v2/remote_key_resource.py @@ -97,7 +97,9 @@ def __init__(self, hs: "HomeServer"): self.fetcher = ServerKeyFetcher(hs) self.store = hs.get_datastore() self.clock = hs.get_clock() - self.federation_domain_whitelist = hs.config.federation_domain_whitelist + self.federation_domain_whitelist = ( + hs.config.federation.federation_domain_whitelist + ) self.config = hs.config async def _async_render_GET(self, request: Request) -> None: @@ -235,7 +237,7 @@ async def query_keys( signed_keys = [] for key_json in json_results: key_json = json_decoder.decode(key_json.decode("utf-8")) - for signing_key in self.config.key_server_signing_keys: + for signing_key in self.config.key.key_server_signing_keys: key_json = sign_json( key_json, self.config.server.server_name, signing_key ) diff --git a/synapse/rest/media/v1/media_repository.py b/synapse/rest/media/v1/media_repository.py index 50e4c9e29..a30007a1e 100644 --- a/synapse/rest/media/v1/media_repository.py +++ b/synapse/rest/media/v1/media_repository.py @@ -92,7 +92,9 @@ def __init__(self, hs: "HomeServer"): self.recently_accessed_remotes: Set[Tuple[str, str]] = set() self.recently_accessed_locals: Set[str] = set() - self.federation_domain_whitelist = hs.config.federation_domain_whitelist + self.federation_domain_whitelist = ( + hs.config.federation.federation_domain_whitelist + ) # List of StorageProviders where we should search for media and # potentially upload to. diff --git a/synapse/rest/synapse/client/__init__.py b/synapse/rest/synapse/client/__init__.py index 47a2f72b3..086c80b72 100644 --- a/synapse/rest/synapse/client/__init__.py +++ b/synapse/rest/synapse/client/__init__.py @@ -45,7 +45,7 @@ def build_synapse_client_resource_tree(hs: "HomeServer") -> Mapping[str, Resourc # provider-specific SSO bits. Only load these if they are enabled, since they # rely on optional dependencies. - if hs.config.oidc_enabled: + if hs.config.oidc.oidc_enabled: from synapse.rest.synapse.client.oidc import OIDCResource resources["/_synapse/client/oidc"] = OIDCResource(hs) diff --git a/synapse/storage/databases/main/roommember.py b/synapse/storage/databases/main/roommember.py index a4ec6bc32..ddb162a4f 100644 --- a/synapse/storage/databases/main/roommember.py +++ b/synapse/storage/databases/main/roommember.py @@ -82,7 +82,7 @@ def __init__(self, database: DatabasePool, db_conn, hs): if ( self.hs.config.worker.run_background_tasks - and self.hs.config.metrics_flags.known_servers + and self.hs.config.metrics.metrics_flags.known_servers ): self._known_servers_count = 1 self.hs.get_clock().looping_call( diff --git a/tests/api/test_auth.py b/tests/api/test_auth.py index f76fea4f6..8a4ef1305 100644 --- a/tests/api/test_auth.py +++ b/tests/api/test_auth.py @@ -217,7 +217,7 @@ def test_get_user_from_macaroon(self): macaroon = pymacaroons.Macaroon( location=self.hs.config.server_name, identifier="key", - key=self.hs.config.macaroon_secret_key, + key=self.hs.config.key.macaroon_secret_key, ) macaroon.add_first_party_caveat("gen = 1") macaroon.add_first_party_caveat("type = access") @@ -239,7 +239,7 @@ def test_get_guest_user_from_macaroon(self): macaroon = pymacaroons.Macaroon( location=self.hs.config.server_name, identifier="key", - key=self.hs.config.macaroon_secret_key, + key=self.hs.config.key.macaroon_secret_key, ) macaroon.add_first_party_caveat("gen = 1") macaroon.add_first_party_caveat("type = access") diff --git a/tests/app/test_phone_stats_home.py b/tests/app/test_phone_stats_home.py index d66aeb00e..19eb4c79d 100644 --- a/tests/app/test_phone_stats_home.py +++ b/tests/app/test_phone_stats_home.py @@ -172,7 +172,7 @@ def make_homeserver(self, reactor, clock): # We don't want our tests to actually report statistics, so check # that it's not enabled - assert not hs.config.report_stats + assert not hs.config.metrics.report_stats # This starts the needed data collection that we rely on to calculate # R30v2 metrics. diff --git a/tests/config/test_load.py b/tests/config/test_load.py index 903c69127..ef6c2beec 100644 --- a/tests/config/test_load.py +++ b/tests/config/test_load.py @@ -52,10 +52,10 @@ def test_generates_and_loads_macaroon_secret_key(self): hasattr(config, "macaroon_secret_key"), "Want config to have attr macaroon_secret_key", ) - if len(config.macaroon_secret_key) < 5: + if len(config.key.macaroon_secret_key) < 5: self.fail( "Want macaroon secret key to be string of at least length 5," - "was: %r" % (config.macaroon_secret_key,) + "was: %r" % (config.key.macaroon_secret_key,) ) config = HomeServerConfig.load_or_generate_config("", ["-c", self.file]) @@ -63,10 +63,10 @@ def test_generates_and_loads_macaroon_secret_key(self): hasattr(config, "macaroon_secret_key"), "Want config to have attr macaroon_secret_key", ) - if len(config.macaroon_secret_key) < 5: + if len(config.key.macaroon_secret_key) < 5: self.fail( "Want macaroon secret key to be string of at least length 5," - "was: %r" % (config.macaroon_secret_key,) + "was: %r" % (config.key.macaroon_secret_key,) ) def test_load_succeeds_if_macaroon_secret_key_missing(self): @@ -101,7 +101,7 @@ def test_stats_enabled(self): # The default Metrics Flags are off by default. config = HomeServerConfig.load_config("", ["-c", self.file]) - self.assertFalse(config.metrics_flags.known_servers) + self.assertFalse(config.metrics.metrics_flags.known_servers) def generate_config(self): with redirect_stdout(StringIO()): diff --git a/tests/config/test_ratelimiting.py b/tests/config/test_ratelimiting.py index 3c7bb32e0..1b63e1adf 100644 --- a/tests/config/test_ratelimiting.py +++ b/tests/config/test_ratelimiting.py @@ -30,7 +30,7 @@ def test_parse_rc_federation(self): config = HomeServerConfig() config.parse_config_dict(config_dict, "", "") - config_obj = config.rc_federation + config_obj = config.ratelimiting.rc_federation self.assertEqual(config_obj.window_size, 20000) self.assertEqual(config_obj.sleep_limit, 693) diff --git a/tests/handlers/test_auth.py b/tests/handlers/test_auth.py index 5f3350e49..12857053e 100644 --- a/tests/handlers/test_auth.py +++ b/tests/handlers/test_auth.py @@ -67,7 +67,7 @@ def verify_guest(caveat): v.satisfy_general(verify_type) v.satisfy_general(verify_nonce) v.satisfy_general(verify_guest) - v.verify(macaroon, self.hs.config.macaroon_secret_key) + v.verify(macaroon, self.hs.config.key.macaroon_secret_key) def test_short_term_login_token_gives_user_id(self): token = self.macaroon_generator.generate_short_term_login_token( diff --git a/tests/replication/_base.py b/tests/replication/_base.py index e9fd99171..c7555c26d 100644 --- a/tests/replication/_base.py +++ b/tests/replication/_base.py @@ -328,7 +328,7 @@ def make_worker_hs( # Set up TCP replication between master and the new worker if we don't # have Redis support enabled. - if not worker_hs.config.redis_enabled: + if not worker_hs.config.redis.redis_enabled: repl_handler = ReplicationCommandHandler(worker_hs) client = ClientReplicationStreamProtocol( worker_hs, diff --git a/tests/rest/client/test_login.py b/tests/rest/client/test_login.py index 414c8781a..371615a01 100644 --- a/tests/rest/client/test_login.py +++ b/tests/rest/client/test_login.py @@ -815,9 +815,9 @@ class JWTTestCase(unittest.HomeserverTestCase): def make_homeserver(self, reactor, clock): self.hs = self.setup_test_homeserver() - self.hs.config.jwt_enabled = True - self.hs.config.jwt_secret = self.jwt_secret - self.hs.config.jwt_algorithm = self.jwt_algorithm + self.hs.config.jwt.jwt_enabled = True + self.hs.config.jwt.jwt_secret = self.jwt_secret + self.hs.config.jwt.jwt_algorithm = self.jwt_algorithm return self.hs def jwt_encode(self, payload: Dict[str, Any], secret: str = jwt_secret) -> str: @@ -1023,9 +1023,9 @@ class JWTPubKeyTestCase(unittest.HomeserverTestCase): def make_homeserver(self, reactor, clock): self.hs = self.setup_test_homeserver() - self.hs.config.jwt_enabled = True - self.hs.config.jwt_secret = self.jwt_pubkey - self.hs.config.jwt_algorithm = "RS256" + self.hs.config.jwt.jwt_enabled = True + self.hs.config.jwt.jwt_secret = self.jwt_pubkey + self.hs.config.jwt.jwt_algorithm = "RS256" return self.hs def jwt_encode(self, payload: Dict[str, Any], secret: str = jwt_privatekey) -> str: diff --git a/tests/rest/client/test_register.py b/tests/rest/client/test_register.py index 9f3ab2c98..72a5a11b4 100644 --- a/tests/rest/client/test_register.py +++ b/tests/rest/client/test_register.py @@ -146,7 +146,7 @@ def test_POST_disabled_registration(self): self.assertEquals(channel.json_body["errcode"], "M_FORBIDDEN") def test_POST_guest_registration(self): - self.hs.config.macaroon_secret_key = "test" + self.hs.config.key.macaroon_secret_key = "test" self.hs.config.allow_guest_access = True channel = self.make_request(b"POST", self.url + b"?kind=guest", b"{}") diff --git a/tests/storage/test_appservice.py b/tests/storage/test_appservice.py index ebadf4794..cf9748f21 100644 --- a/tests/storage/test_appservice.py +++ b/tests/storage/test_appservice.py @@ -513,7 +513,6 @@ def test_duplicate_ids(self): hs.config.appservice.app_service_config_files = [f1, f2] hs.config.caches.event_cache_size = 1 - hs.config.password_providers = [] with self.assertRaises(ConfigError) as cm: database = hs.get_datastores().databases[0] diff --git a/tests/util/test_ratelimitutils.py b/tests/util/test_ratelimitutils.py index 34aaffe85..89d865663 100644 --- a/tests/util/test_ratelimitutils.py +++ b/tests/util/test_ratelimitutils.py @@ -95,4 +95,4 @@ def build_rc_config(settings: Optional[dict] = None): config_dict.update(settings or {}) config = HomeServerConfig() config.parse_config_dict(config_dict, "", "") - return config.rc_federation + return config.ratelimiting.rc_federation From a7304adc7d383caad1b3f83fa707b1090323ecca Mon Sep 17 00:00:00 2001 From: Richard van der Hoff <1389908+richvdh@users.noreply.github.com> Date: Thu, 23 Sep 2021 17:34:33 +0100 Subject: [PATCH 41/74] Factor out `_get_remote_auth_chain_for_event` from `_update_auth_events_and_context_for_auth` (#10884) * Reload auth events from db after fetching and persisting In `_update_auth_events_and_context_for_auth`, when we fetch the remote auth tree and persist the returned events: load the missing events from the database rather than using the copies we got from the remote server. This is mostly in preparation for additional refactors, but does have an advantage in that if we later get around to checking the rejected status, we'll be able to make use of it. * Factor out `_get_remote_auth_chain_for_event` from `_update_auth_events_and_context_for_auth` * changelog --- changelog.d/10884.misc | 1 + synapse/handlers/federation_event.py | 124 ++++++++++++++++----------- 2 files changed, 73 insertions(+), 52 deletions(-) create mode 100644 changelog.d/10884.misc diff --git a/changelog.d/10884.misc b/changelog.d/10884.misc new file mode 100644 index 000000000..9a765435d --- /dev/null +++ b/changelog.d/10884.misc @@ -0,0 +1 @@ +Clean up some of the federation event authentication code for clarity. diff --git a/synapse/handlers/federation_event.py b/synapse/handlers/federation_event.py index 10b3fdc22..7d468bd2d 100644 --- a/synapse/handlers/federation_event.py +++ b/synapse/handlers/federation_event.py @@ -1505,61 +1505,22 @@ async def _update_auth_events_and_context_for_auth( # If we don't have all the auth events, we need to get them. logger.info("auth_events contains unknown events: %s", missing_auth) try: - try: - remote_auth_chain = await self._federation_client.get_event_auth( - origin, event.room_id, event.event_id - ) - except RequestSendFailed as e1: - # The other side isn't around or doesn't implement the - # endpoint, so lets just bail out. - logger.info("Failed to get event auth from remote: %s", e1) - return context, auth_events - - seen_remotes = await self._store.have_seen_events( - event.room_id, [e.event_id for e in remote_auth_chain] + await self._get_remote_auth_chain_for_event( + origin, event.room_id, event.event_id ) - - for auth_event in remote_auth_chain: - if auth_event.event_id in seen_remotes: - continue - - if auth_event.event_id == event.event_id: - continue - - try: - auth_ids = auth_event.auth_event_ids() - auth = { - (e.type, e.state_key): e - for e in remote_auth_chain - if e.event_id in auth_ids or e.type == EventTypes.Create - } - auth_event.internal_metadata.outlier = True - - logger.debug( - "_check_event_auth %s missing_auth: %s", - event.event_id, - auth_event.event_id, - ) - missing_auth_event_context = EventContext.for_outlier() - missing_auth_event_context = await self._check_event_auth( - origin, - auth_event, - missing_auth_event_context, - claimed_auth_event_map=auth, - ) - await self.persist_events_and_notify( - event.room_id, [(auth_event, missing_auth_event_context)] - ) - - if auth_event.event_id in event_auth_events: - auth_events[ - (auth_event.type, auth_event.state_key) - ] = auth_event - except AuthError: - pass - except Exception: logger.exception("Failed to get auth chain") + else: + # load any auth events we might have persisted from the database. This + # has the side-effect of correctly setting the rejected_reason on them. + auth_events.update( + { + (ae.type, ae.state_key): ae + for ae in await self._store.get_events_as_list( + missing_auth, allow_rejected=True + ) + } + ) if event.internal_metadata.is_outlier(): # XXX: given that, for an outlier, we'll be working with the @@ -1633,6 +1594,65 @@ async def _update_auth_events_and_context_for_auth( return context, auth_events + async def _get_remote_auth_chain_for_event( + self, destination: str, room_id: str, event_id: str + ) -> None: + """If we are missing some of an event's auth events, attempt to request them + + Args: + destination: where to fetch the auth tree from + room_id: the room in which we are lacking auth events + event_id: the event for which we are lacking auth events + """ + try: + remote_auth_chain = await self._federation_client.get_event_auth( + destination, room_id, event_id + ) + except RequestSendFailed as e1: + # The other side isn't around or doesn't implement the + # endpoint, so lets just bail out. + logger.info("Failed to get event auth from remote: %s", e1) + return + + seen_remotes = await self._store.have_seen_events( + room_id, [e.event_id for e in remote_auth_chain] + ) + + for auth_event in remote_auth_chain: + if auth_event.event_id in seen_remotes: + continue + + if auth_event.event_id == event_id: + continue + + try: + auth_ids = auth_event.auth_event_ids() + auth = { + (e.type, e.state_key): e + for e in remote_auth_chain + if e.event_id in auth_ids or e.type == EventTypes.Create + } + auth_event.internal_metadata.outlier = True + + logger.debug( + "_check_event_auth %s missing_auth: %s", + event_id, + auth_event.event_id, + ) + missing_auth_event_context = EventContext.for_outlier() + missing_auth_event_context = await self._check_event_auth( + destination, + auth_event, + missing_auth_event_context, + claimed_auth_event_map=auth, + ) + await self.persist_events_and_notify( + room_id, + [(auth_event, missing_auth_event_context)], + ) + except AuthError: + pass + async def _update_context_for_auth_events( self, event: EventBase, context: EventContext, auth_events: StateMap[EventBase] ) -> EventContext: From 90d9fc750514b1ede327f1dfe6e0a1c09b281d6d Mon Sep 17 00:00:00 2001 From: Callum Brown <callum@calcuode.com> Date: Thu, 23 Sep 2021 18:58:12 +0100 Subject: [PATCH 42/74] Allow `.` and `~` chars in registration tokens (#10887) Per updates to MSC3231 in order to use the same grammar as other identifiers. --- changelog.d/10887.bugfix | 1 + synapse/rest/admin/registration_tokens.py | 2 +- tests/rest/admin/test_registration_tokens.py | 8 +++++--- 3 files changed, 7 insertions(+), 4 deletions(-) create mode 100644 changelog.d/10887.bugfix diff --git a/changelog.d/10887.bugfix b/changelog.d/10887.bugfix new file mode 100644 index 000000000..2d1f67489 --- /dev/null +++ b/changelog.d/10887.bugfix @@ -0,0 +1 @@ +Allow the `.` and `~` characters when creating registration tokens as per the change to [MSC3231](https://github.com/matrix-org/matrix-doc/pull/3231). diff --git a/synapse/rest/admin/registration_tokens.py b/synapse/rest/admin/registration_tokens.py index 5a1c929d8..aba48f6e7 100644 --- a/synapse/rest/admin/registration_tokens.py +++ b/synapse/rest/admin/registration_tokens.py @@ -113,7 +113,7 @@ def __init__(self, hs: "HomeServer"): self.store = hs.get_datastore() self.clock = hs.get_clock() # A string of all the characters allowed to be in a registration_token - self.allowed_chars = string.ascii_letters + string.digits + "-_" + self.allowed_chars = string.ascii_letters + string.digits + "._~-" self.allowed_chars_set = set(self.allowed_chars) async def on_POST(self, request: SynapseRequest) -> Tuple[int, JsonDict]: diff --git a/tests/rest/admin/test_registration_tokens.py b/tests/rest/admin/test_registration_tokens.py index 4927321e5..9bac423ae 100644 --- a/tests/rest/admin/test_registration_tokens.py +++ b/tests/rest/admin/test_registration_tokens.py @@ -95,8 +95,10 @@ def test_create_using_defaults(self): def test_create_specifying_fields(self): """Create a token specifying the value of all fields.""" + # As many of the allowed characters as possible with length <= 64 + token = "adefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789._~-" data = { - "token": "abcd", + "token": token, "uses_allowed": 1, "expiry_time": self.clock.time_msec() + 1000000, } @@ -109,7 +111,7 @@ def test_create_specifying_fields(self): ) self.assertEqual(200, int(channel.result["code"]), msg=channel.result["body"]) - self.assertEqual(channel.json_body["token"], "abcd") + self.assertEqual(channel.json_body["token"], token) self.assertEqual(channel.json_body["uses_allowed"], 1) self.assertEqual(channel.json_body["expiry_time"], data["expiry_time"]) self.assertEqual(channel.json_body["pending"], 0) @@ -193,7 +195,7 @@ def test_create_unable_to_generate_token(self): """Check right error is raised when server can't generate unique token.""" # Create all possible single character tokens tokens = [] - for c in string.ascii_letters + string.digits + "-_": + for c in string.ascii_letters + string.digits + "._~-": tokens.append( { "token": c, From e704cc2a48c6adc5d3da79a49ed02961edfc3b4a Mon Sep 17 00:00:00 2001 From: Kokokokoka <vasiliy.boytsov@gmail.com> Date: Fri, 24 Sep 2021 12:19:51 +0300 Subject: [PATCH 43/74] In `_purge_history_txn`, ensure that txn.fetchall has elements before accessing rows (#10690) This change adds a check for row existence before accessing row element, this should fix issue #10669 Signed-off-by: Vasya Boytsov vasiliy.boytsov@phystech.edu --- changelog.d/10690.bugfix | 1 + .../storage/databases/main/purge_events.py | 22 +++++++++++-------- 2 files changed, 14 insertions(+), 9 deletions(-) create mode 100644 changelog.d/10690.bugfix diff --git a/changelog.d/10690.bugfix b/changelog.d/10690.bugfix new file mode 100644 index 000000000..059eea746 --- /dev/null +++ b/changelog.d/10690.bugfix @@ -0,0 +1 @@ +Fix a long-standing bug that caused an `AssertionError` when purging history in certain rooms. Contributed by @Kokokokoka. diff --git a/synapse/storage/databases/main/purge_events.py b/synapse/storage/databases/main/purge_events.py index bccff5e5b..3eb30944b 100644 --- a/synapse/storage/databases/main/purge_events.py +++ b/synapse/storage/databases/main/purge_events.py @@ -102,15 +102,19 @@ def _purge_history_txn( (room_id,), ) rows = txn.fetchall() - max_depth = max(row[1] for row in rows) - - if max_depth < token.topological: - # We need to ensure we don't delete all the events from the database - # otherwise we wouldn't be able to send any events (due to not - # having any backwards extremities) - raise SynapseError( - 400, "topological_ordering is greater than forward extremeties" - ) + # if we already have no forwards extremities (for example because they were + # cleared out by the `delete_old_current_state_events` background database + # update), then we may as well carry on. + if rows: + max_depth = max(row[1] for row in rows) + + if max_depth < token.topological: + # We need to ensure we don't delete all the events from the database + # otherwise we wouldn't be able to send any events (due to not + # having any backwards extremities) + raise SynapseError( + 400, "topological_ordering is greater than forward extremities" + ) logger.info("[purge] looking for events to delete") From 7f3352743e02e0d02ec00eb3a50fd0ceb422286c Mon Sep 17 00:00:00 2001 From: David Robertson <davidr@element.io> Date: Fri, 24 Sep 2021 10:38:22 +0100 Subject: [PATCH 44/74] Improve typing in user_directory files (#10891) * Improve typing in user_directory files This makes the user_directory.py in storage pass most of mypy's checks (including `no-untyped-defs`). Unfortunately that file is in the tangled web of Store class inheritance so doesn't pass mypy at the moment. The handlers directory has already been mypyed. Co-authored-by: reivilibre <olivier@librepush.net> --- changelog.d/10891.misc | 1 + mypy.ini | 2 + .../storage/databases/main/user_directory.py | 124 +++++++++++++----- tests/handlers/test_user_directory.py | 5 +- 4 files changed, 95 insertions(+), 37 deletions(-) create mode 100644 changelog.d/10891.misc diff --git a/changelog.d/10891.misc b/changelog.d/10891.misc new file mode 100644 index 000000000..6eecea406 --- /dev/null +++ b/changelog.d/10891.misc @@ -0,0 +1 @@ +Improve type hinting in the user directory code. \ No newline at end of file diff --git a/mypy.ini b/mypy.ini index 3cb6cecd7..437d0a46a 100644 --- a/mypy.ini +++ b/mypy.ini @@ -85,9 +85,11 @@ files = tests/handlers/test_room_summary.py, tests/handlers/test_send_email.py, tests/handlers/test_sync.py, + tests/handlers/test_user_directory.py, tests/rest/client/test_login.py, tests/rest/client/test_auth.py, tests/storage/test_state.py, + tests/storage/test_user_directory.py, tests/util/test_itertools.py, tests/util/test_stream_change_cache.py diff --git a/synapse/storage/databases/main/user_directory.py b/synapse/storage/databases/main/user_directory.py index 718f3e997..7ca04237a 100644 --- a/synapse/storage/databases/main/user_directory.py +++ b/synapse/storage/databases/main/user_directory.py @@ -14,14 +14,28 @@ import logging import re -from typing import Any, Dict, Iterable, Optional, Set, Tuple +from typing import ( + TYPE_CHECKING, + Dict, + Iterable, + List, + Optional, + Sequence, + Set, + Tuple, + cast, +) + +if TYPE_CHECKING: + from synapse.server import HomeServer from synapse.api.constants import EventTypes, HistoryVisibility, JoinRules -from synapse.storage.database import DatabasePool +from synapse.storage.database import DatabasePool, LoggingTransaction from synapse.storage.databases.main.state import StateFilter from synapse.storage.databases.main.state_deltas import StateDeltasStore from synapse.storage.engines import PostgresEngine, Sqlite3Engine -from synapse.types import get_domain_from_id, get_localpart_from_id +from synapse.storage.types import Connection +from synapse.types import JsonDict, get_domain_from_id, get_localpart_from_id from synapse.util.caches.descriptors import cached logger = logging.getLogger(__name__) @@ -36,7 +50,12 @@ class UserDirectoryBackgroundUpdateStore(StateDeltasStore): # add_users_who_share_private_rooms? SHARE_PRIVATE_WORKING_SET = 500 - def __init__(self, database: DatabasePool, db_conn, hs): + def __init__( + self, + database: DatabasePool, + db_conn: Connection, + hs: "HomeServer", + ): super().__init__(database, db_conn, hs) self.server_name = hs.hostname @@ -57,10 +76,12 @@ def __init__(self, database: DatabasePool, db_conn, hs): "populate_user_directory_cleanup", self._populate_user_directory_cleanup ) - async def _populate_user_directory_createtables(self, progress, batch_size): + async def _populate_user_directory_createtables( + self, progress: JsonDict, batch_size: int + ) -> int: # Get all the rooms that we want to process. - def _make_staging_area(txn): + def _make_staging_area(txn: LoggingTransaction) -> None: sql = ( "CREATE TABLE IF NOT EXISTS " + TEMP_TABLE @@ -110,16 +131,20 @@ def _make_staging_area(txn): ) return 1 - async def _populate_user_directory_cleanup(self, progress, batch_size): + async def _populate_user_directory_cleanup( + self, + progress: JsonDict, + batch_size: int, + ) -> int: """ Update the user directory stream position, then clean up the old tables. """ position = await self.db_pool.simple_select_one_onecol( - TEMP_TABLE + "_position", None, "position" + TEMP_TABLE + "_position", {}, "position" ) await self.update_user_directory_stream_pos(position) - def _delete_staging_area(txn): + def _delete_staging_area(txn: LoggingTransaction) -> None: txn.execute("DROP TABLE IF EXISTS " + TEMP_TABLE + "_rooms") txn.execute("DROP TABLE IF EXISTS " + TEMP_TABLE + "_users") txn.execute("DROP TABLE IF EXISTS " + TEMP_TABLE + "_position") @@ -133,18 +158,32 @@ def _delete_staging_area(txn): ) return 1 - async def _populate_user_directory_process_rooms(self, progress, batch_size): + async def _populate_user_directory_process_rooms( + self, progress: JsonDict, batch_size: int + ) -> int: """ + Rescan the state of all rooms so we can track + + - who's in a public room; + - which local users share a private room with other users (local + and remote); and + - who should be in the user_directory. + Args: progress (dict) batch_size (int): Maximum number of state events to process per cycle. + + Returns: + number of events processed. """ # If we don't have progress filed, delete everything. if not progress: await self.delete_all_from_user_dir() - def _get_next_batch(txn): + def _get_next_batch( + txn: LoggingTransaction, + ) -> Optional[Sequence[Tuple[str, int]]]: # Only fetch 250 rooms, so we don't fetch too many at once, even # if those 250 rooms have less than batch_size state events. sql = """ @@ -155,7 +194,7 @@ def _get_next_batch(txn): TEMP_TABLE + "_rooms", ) txn.execute(sql) - rooms_to_work_on = txn.fetchall() + rooms_to_work_on = cast(List[Tuple[str, int]], txn.fetchall()) if not rooms_to_work_on: return None @@ -163,7 +202,9 @@ def _get_next_batch(txn): # Get how many are left to process, so we can give status on how # far we are in processing txn.execute("SELECT COUNT(*) FROM " + TEMP_TABLE + "_rooms") - progress["remaining"] = txn.fetchone()[0] + result = txn.fetchone() + assert result is not None + progress["remaining"] = result[0] return rooms_to_work_on @@ -261,29 +302,33 @@ def _get_next_batch(txn): return processed_event_count - async def _populate_user_directory_process_users(self, progress, batch_size): + async def _populate_user_directory_process_users( + self, progress: JsonDict, batch_size: int + ) -> int: """ Add all local users to the user directory. """ - def _get_next_batch(txn): + def _get_next_batch(txn: LoggingTransaction) -> Optional[List[str]]: sql = "SELECT user_id FROM %s LIMIT %s" % ( TEMP_TABLE + "_users", str(batch_size), ) txn.execute(sql) - users_to_work_on = txn.fetchall() + user_result = cast(List[Tuple[str]], txn.fetchall()) - if not users_to_work_on: + if not user_result: return None - users_to_work_on = [x[0] for x in users_to_work_on] + users_to_work_on = [x[0] for x in user_result] # Get how many are left to process, so we can give status on how # far we are in processing sql = "SELECT COUNT(*) FROM " + TEMP_TABLE + "_users" txn.execute(sql) - progress["remaining"] = txn.fetchone()[0] + count_result = txn.fetchone() + assert count_result is not None + progress["remaining"] = count_result[0] return users_to_work_on @@ -324,7 +369,7 @@ def _get_next_batch(txn): return len(users_to_work_on) - async def is_room_world_readable_or_publicly_joinable(self, room_id): + async def is_room_world_readable_or_publicly_joinable(self, room_id: str) -> bool: """Check if the room is either world_readable or publically joinable""" # Create a state filter that only queries join and history state event @@ -368,7 +413,7 @@ async def update_profile_in_user_dir( if not isinstance(avatar_url, str): avatar_url = None - def _update_profile_in_user_dir_txn(txn): + def _update_profile_in_user_dir_txn(txn: LoggingTransaction) -> None: self.db_pool.simple_upsert_txn( txn, table="user_directory", @@ -435,7 +480,7 @@ async def add_users_who_share_private_room( for user_id, other_user_id in user_id_tuples ], value_names=(), - value_values=None, + value_values=(), desc="add_users_who_share_room", ) @@ -454,14 +499,14 @@ async def add_users_in_public_rooms( key_names=["user_id", "room_id"], key_values=[(user_id, room_id) for user_id in user_ids], value_names=(), - value_values=None, + value_values=(), desc="add_users_in_public_rooms", ) async def delete_all_from_user_dir(self) -> None: """Delete the entire user directory""" - def _delete_all_from_user_dir_txn(txn): + def _delete_all_from_user_dir_txn(txn: LoggingTransaction) -> None: txn.execute("DELETE FROM user_directory") txn.execute("DELETE FROM user_directory_search") txn.execute("DELETE FROM users_in_public_rooms") @@ -473,7 +518,7 @@ def _delete_all_from_user_dir_txn(txn): ) @cached() - async def get_user_in_directory(self, user_id: str) -> Optional[Dict[str, Any]]: + async def get_user_in_directory(self, user_id: str) -> Optional[Dict[str, str]]: return await self.db_pool.simple_select_one( table="user_directory", keyvalues={"user_id": user_id}, @@ -497,7 +542,12 @@ class UserDirectoryStore(UserDirectoryBackgroundUpdateStore): # add_users_who_share_private_rooms? SHARE_PRIVATE_WORKING_SET = 500 - def __init__(self, database: DatabasePool, db_conn, hs): + def __init__( + self, + database: DatabasePool, + db_conn: Connection, + hs: "HomeServer", + ) -> None: super().__init__(database, db_conn, hs) self._prefer_local_users_in_search = ( @@ -506,7 +556,7 @@ def __init__(self, database: DatabasePool, db_conn, hs): self._server_name = hs.config.server.server_name async def remove_from_user_dir(self, user_id: str) -> None: - def _remove_from_user_dir_txn(txn): + def _remove_from_user_dir_txn(txn: LoggingTransaction) -> None: self.db_pool.simple_delete_txn( txn, table="user_directory", keyvalues={"user_id": user_id} ) @@ -532,7 +582,7 @@ def _remove_from_user_dir_txn(txn): "remove_from_user_dir", _remove_from_user_dir_txn ) - async def get_users_in_dir_due_to_room(self, room_id): + async def get_users_in_dir_due_to_room(self, room_id: str) -> Set[str]: """Get all user_ids that are in the room directory because they're in the given room_id """ @@ -565,7 +615,7 @@ async def remove_user_who_share_room(self, user_id: str, room_id: str) -> None: room_id """ - def _remove_user_who_share_room_txn(txn): + def _remove_user_who_share_room_txn(txn: LoggingTransaction) -> None: self.db_pool.simple_delete_txn( txn, table="users_who_share_private_rooms", @@ -586,7 +636,7 @@ def _remove_user_who_share_room_txn(txn): "remove_user_who_share_room", _remove_user_who_share_room_txn ) - async def get_user_dir_rooms_user_is_in(self, user_id): + async def get_user_dir_rooms_user_is_in(self, user_id: str) -> List[str]: """ Returns the rooms that a user is in. @@ -628,7 +678,9 @@ async def get_shared_rooms_for_users( A set of room ID's that the users share. """ - def _get_shared_rooms_for_users_txn(txn): + def _get_shared_rooms_for_users_txn( + txn: LoggingTransaction, + ) -> List[Dict[str, str]]: txn.execute( """ SELECT p1.room_id @@ -669,7 +721,9 @@ async def get_user_directory_stream_pos(self) -> Optional[int]: desc="get_user_directory_stream_pos", ) - async def search_user_dir(self, user_id, search_term, limit): + async def search_user_dir( + self, user_id: str, search_term: str, limit: int + ) -> JsonDict: """Searches for users in directory Returns: @@ -705,7 +759,7 @@ async def search_user_dir(self, user_id, search_term, limit): # We allow manipulating the ranking algorithm by injecting statements # based on config options. additional_ordering_statements = [] - ordering_arguments = () + ordering_arguments: Tuple[str, ...] = () if isinstance(self.database_engine, PostgresEngine): full_query, exact_query, prefix_query = _parse_query_postgres(search_term) @@ -811,7 +865,7 @@ async def search_user_dir(self, user_id, search_term, limit): return {"limited": limited, "results": results} -def _parse_query_sqlite(search_term): +def _parse_query_sqlite(search_term: str) -> str: """Takes a plain unicode string from the user and converts it into a form that can be passed to database. We use this so that we can add prefix matching, which isn't something @@ -826,7 +880,7 @@ def _parse_query_sqlite(search_term): return " & ".join("(%s* OR %s)" % (result, result) for result in results) -def _parse_query_postgres(search_term): +def _parse_query_postgres(search_term: str) -> Tuple[str, str, str]: """Takes a plain unicode string from the user and converts it into a form that can be passed to database. We use this so that we can add prefix matching, which isn't something diff --git a/tests/handlers/test_user_directory.py b/tests/handlers/test_user_directory.py index f3684c34a..ba32585a1 100644 --- a/tests/handlers/test_user_directory.py +++ b/tests/handlers/test_user_directory.py @@ -11,6 +11,7 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. +from typing import List, Tuple from unittest.mock import Mock from urllib.parse import quote @@ -325,7 +326,7 @@ def _compress_shared(self, shared): r.add((i["user_id"], i["other_user_id"], i["room_id"])) return r - def get_users_in_public_rooms(self): + def get_users_in_public_rooms(self) -> List[Tuple[str, str]]: r = self.get_success( self.store.db_pool.simple_select_list( "users_in_public_rooms", None, ("user_id", "room_id") @@ -336,7 +337,7 @@ def get_users_in_public_rooms(self): retval.append((i["user_id"], i["room_id"])) return retval - def get_users_who_share_private_rooms(self): + def get_users_who_share_private_rooms(self) -> List[Tuple[str, str, str]]: return self.get_success( self.store.db_pool.simple_select_list( "users_who_share_private_rooms", From fa7453638408c2c55fade2d20dba362ff23226e5 Mon Sep 17 00:00:00 2001 From: Jason Robinson <jasonr@element.io> Date: Fri, 24 Sep 2021 12:41:18 +0300 Subject: [PATCH 45/74] Fix AuthBlocking check when requester is appservice (#10881) If the MAU count had been reached, Synapse incorrectly blocked appservice users even though they've been explicitly configured not to be tracked (the default). This was due to bypassing the relevant if as it was chained behind another earlier hit if as an elif. Signed-off-by: Jason Robinson <jasonr@matrix.org> --- changelog.d/10881.bugfix | 1 + synapse/api/auth_blocking.py | 2 +- tests/api/test_auth.py | 62 ++++++++++++++++++++++++++++++++++++ 3 files changed, 64 insertions(+), 1 deletion(-) create mode 100644 changelog.d/10881.bugfix diff --git a/changelog.d/10881.bugfix b/changelog.d/10881.bugfix new file mode 100644 index 000000000..0a8905cc4 --- /dev/null +++ b/changelog.d/10881.bugfix @@ -0,0 +1 @@ +Fix application service users being subject to MAU blocking if MAU had been reached, even if configured not to be blocked. diff --git a/synapse/api/auth_blocking.py b/synapse/api/auth_blocking.py index a3b95f4de..08fe160c9 100644 --- a/synapse/api/auth_blocking.py +++ b/synapse/api/auth_blocking.py @@ -81,7 +81,7 @@ async def check_auth_blocking( # We never block the server from doing actions on behalf of # users. return - elif requester.app_service and not self._track_appservice_user_ips: + if requester.app_service and not self._track_appservice_user_ips: # If we're authenticated as an appservice then we only block # auth if `track_appservice_user_ips` is set, as that option # implicitly means that application services are part of MAU diff --git a/tests/api/test_auth.py b/tests/api/test_auth.py index 8a4ef1305..cccff7af2 100644 --- a/tests/api/test_auth.py +++ b/tests/api/test_auth.py @@ -25,7 +25,9 @@ MissingClientTokenError, ResourceLimitError, ) +from synapse.appservice import ApplicationService from synapse.storage.databases.main.registration import TokenLookupResult +from synapse.types import Requester from tests import unittest from tests.test_utils import simple_async_mock @@ -290,6 +292,66 @@ def test_blocking_mau__depending_on_user_type(self): # Real users not allowed self.get_failure(self.auth.check_auth_blocking(), ResourceLimitError) + def test_blocking_mau__appservice_requester_allowed_when_not_tracking_ips(self): + self.auth_blocking._max_mau_value = 50 + self.auth_blocking._limit_usage_by_mau = True + self.auth_blocking._track_appservice_user_ips = False + + self.store.get_monthly_active_count = simple_async_mock(100) + self.store.user_last_seen_monthly_active = simple_async_mock() + self.store.is_trial_user = simple_async_mock() + + appservice = ApplicationService( + "abcd", + self.hs.config.server_name, + id="1234", + namespaces={ + "users": [{"regex": "@_appservice.*:sender", "exclusive": True}] + }, + sender="@appservice:sender", + ) + requester = Requester( + user="@appservice:server", + access_token_id=None, + device_id="FOOBAR", + is_guest=False, + shadow_banned=False, + app_service=appservice, + authenticated_entity="@appservice:server", + ) + self.get_success(self.auth.check_auth_blocking(requester=requester)) + + def test_blocking_mau__appservice_requester_disallowed_when_tracking_ips(self): + self.auth_blocking._max_mau_value = 50 + self.auth_blocking._limit_usage_by_mau = True + self.auth_blocking._track_appservice_user_ips = True + + self.store.get_monthly_active_count = simple_async_mock(100) + self.store.user_last_seen_monthly_active = simple_async_mock() + self.store.is_trial_user = simple_async_mock() + + appservice = ApplicationService( + "abcd", + self.hs.config.server_name, + id="1234", + namespaces={ + "users": [{"regex": "@_appservice.*:sender", "exclusive": True}] + }, + sender="@appservice:sender", + ) + requester = Requester( + user="@appservice:server", + access_token_id=None, + device_id="FOOBAR", + is_guest=False, + shadow_banned=False, + app_service=appservice, + authenticated_entity="@appservice:server", + ) + self.get_failure( + self.auth.check_auth_blocking(requester=requester), ResourceLimitError + ) + def test_reserved_threepid(self): self.auth_blocking._limit_usage_by_mau = True self.auth_blocking._max_mau_value = 1 From 50022cff966a3991fbd8a1e5c98f490d9b335442 Mon Sep 17 00:00:00 2001 From: Erik Johnston <erik@matrix.org> Date: Fri, 24 Sep 2021 11:01:25 +0100 Subject: [PATCH 46/74] Add reactor to `SynapseRequest` and fix up types. (#10868) --- changelog.d/10868.feature | 1 + synapse/http/server.py | 4 +- synapse/http/site.py | 37 ++++++++++------ synapse/rest/key/v2/remote_key_resource.py | 9 ++-- synapse/rest/media/v1/_base.py | 7 +-- synapse/rest/media/v1/config_resource.py | 4 +- synapse/rest/media/v1/download_resource.py | 5 +-- synapse/rest/media/v1/media_repository.py | 10 +++-- synapse/rest/media/v1/preview_url_resource.py | 3 +- synapse/rest/media/v1/thumbnail_resource.py | 15 +++---- synapse/rest/media/v1/upload_resource.py | 4 +- tests/http/test_additional_resource.py | 8 +++- tests/logging/test_terse_json.py | 3 +- tests/replication/test_multi_media_repo.py | 2 +- tests/rest/admin/test_admin.py | 6 +-- tests/rest/admin/test_media.py | 6 +-- tests/rest/admin/test_user.py | 2 +- tests/rest/client/test_account.py | 4 +- tests/rest/client/test_consent.py | 12 ++++-- tests/rest/client/utils.py | 2 +- tests/rest/key/v2/test_remote_key_resource.py | 4 +- tests/rest/media/v1/test_media_storage.py | 8 ++-- tests/server.py | 6 ++- tests/test_server.py | 43 ++++++++++++++----- 24 files changed, 123 insertions(+), 82 deletions(-) create mode 100644 changelog.d/10868.feature diff --git a/changelog.d/10868.feature b/changelog.d/10868.feature new file mode 100644 index 000000000..07e7b2c6a --- /dev/null +++ b/changelog.d/10868.feature @@ -0,0 +1 @@ +Speed up responding with large JSON objects to requests. diff --git a/synapse/http/server.py b/synapse/http/server.py index b79fa722e..e28b56abb 100644 --- a/synapse/http/server.py +++ b/synapse/http/server.py @@ -320,7 +320,7 @@ def __init__(self, canonical_json=False, extract_context=False): def _send_response( self, - request: Request, + request: SynapseRequest, code: int, response_object: Any, ): @@ -629,7 +629,7 @@ def _encode_json_bytes(json_object: Any) -> Iterator[bytes]: def respond_with_json( - request: Request, + request: SynapseRequest, code: int, json_object: Any, send_cors: bool = False, diff --git a/synapse/http/site.py b/synapse/http/site.py index dd4c749e1..755ad5663 100644 --- a/synapse/http/site.py +++ b/synapse/http/site.py @@ -14,13 +14,14 @@ import contextlib import logging import time -from typing import Optional, Tuple, Union +from typing import Generator, Optional, Tuple, Union import attr from zope.interface import implementer from twisted.internet.interfaces import IAddress, IReactorTime from twisted.python.failure import Failure +from twisted.web.http import HTTPChannel from twisted.web.resource import IResource, Resource from twisted.web.server import Request, Site @@ -61,10 +62,18 @@ class SynapseRequest(Request): logcontext: the log context for this request """ - def __init__(self, channel, *args, max_request_body_size: int = 1024, **kw): - Request.__init__(self, channel, *args, **kw) + def __init__( + self, + channel: HTTPChannel, + site: "SynapseSite", + *args, + max_request_body_size: int = 1024, + **kw, + ): + super().__init__(channel, *args, **kw) self._max_request_body_size = max_request_body_size - self.site: SynapseSite = channel.site + self.synapse_site = site + self.reactor = site.reactor self._channel = channel # this is used by the tests self.start_time = 0.0 @@ -97,7 +106,7 @@ def __repr__(self) -> str: self.get_method(), self.get_redacted_uri(), self.clientproto.decode("ascii", errors="replace"), - self.site.site_tag, + self.synapse_site.site_tag, ) def handleContentChunk(self, data: bytes) -> None: @@ -216,7 +225,7 @@ def render(self, resrc: Resource) -> None: request=ContextRequest( request_id=request_id, ip_address=self.getClientIP(), - site_tag=self.site.site_tag, + site_tag=self.synapse_site.site_tag, # The requester is going to be unknown at this point. requester=None, authenticated_entity=None, @@ -228,7 +237,7 @@ def render(self, resrc: Resource) -> None: ) # override the Server header which is set by twisted - self.setHeader("Server", self.site.server_version_string) + self.setHeader("Server", self.synapse_site.server_version_string) with PreserveLoggingContext(self.logcontext): # we start the request metrics timer here with an initial stab @@ -247,7 +256,7 @@ def render(self, resrc: Resource) -> None: requests_counter.labels(self.get_method(), self.request_metrics.name).inc() @contextlib.contextmanager - def processing(self): + def processing(self) -> Generator[None, None, None]: """Record the fact that we are processing this request. Returns a context manager; the correct way to use this is: @@ -346,10 +355,10 @@ def _started_processing(self, servlet_name: str) -> None: self.start_time, name=servlet_name, method=self.get_method() ) - self.site.access_logger.debug( + self.synapse_site.access_logger.debug( "%s - %s - Received request: %s %s", self.getClientIP(), - self.site.site_tag, + self.synapse_site.site_tag, self.get_method(), self.get_redacted_uri(), ) @@ -388,13 +397,13 @@ def _finished_processing(self) -> None: if authenticated_entity: requester = f"{authenticated_entity}|{requester}" - self.site.access_logger.log( + self.synapse_site.access_logger.log( log_level, "%s - %s - {%s}" " Processed request: %.3fsec/%.3fsec (%.3fsec, %.3fsec) (%.3fsec/%.3fsec/%d)" ' %sB %s "%s %s %s" "%s" [%d dbevts]', self.getClientIP(), - self.site.site_tag, + self.synapse_site.site_tag, requester, processing_time, response_send_time, @@ -522,7 +531,7 @@ def __init__( site_tag: str, config: ListenerConfig, resource: IResource, - server_version_string, + server_version_string: str, max_request_body_size: int, reactor: IReactorTime, ): @@ -542,6 +551,7 @@ def __init__( Site.__init__(self, resource, reactor=reactor) self.site_tag = site_tag + self.reactor = reactor assert config.http_options is not None proxied = config.http_options.x_forwarded @@ -550,6 +560,7 @@ def __init__( def request_factory(channel, queued: bool) -> Request: return request_class( channel, + self, max_request_body_size=max_request_body_size, queued=queued, ) diff --git a/synapse/rest/key/v2/remote_key_resource.py b/synapse/rest/key/v2/remote_key_resource.py index c111a9d20..3923ba843 100644 --- a/synapse/rest/key/v2/remote_key_resource.py +++ b/synapse/rest/key/v2/remote_key_resource.py @@ -17,12 +17,11 @@ from signedjson.sign import sign_json -from twisted.web.server import Request - from synapse.api.errors import Codes, SynapseError from synapse.crypto.keyring import ServerKeyFetcher from synapse.http.server import DirectServeJsonResource, respond_with_json from synapse.http.servlet import parse_integer, parse_json_object_from_request +from synapse.http.site import SynapseRequest from synapse.types import JsonDict from synapse.util import json_decoder from synapse.util.async_helpers import yieldable_gather_results @@ -102,7 +101,7 @@ def __init__(self, hs: "HomeServer"): ) self.config = hs.config - async def _async_render_GET(self, request: Request) -> None: + async def _async_render_GET(self, request: SynapseRequest) -> None: assert request.postpath is not None if len(request.postpath) == 1: (server,) = request.postpath @@ -119,7 +118,7 @@ async def _async_render_GET(self, request: Request) -> None: await self.query_keys(request, query, query_remote_on_cache_miss=True) - async def _async_render_POST(self, request: Request) -> None: + async def _async_render_POST(self, request: SynapseRequest) -> None: content = parse_json_object_from_request(request) query = content["server_keys"] @@ -128,7 +127,7 @@ async def _async_render_POST(self, request: Request) -> None: async def query_keys( self, - request: Request, + request: SynapseRequest, query: JsonDict, query_remote_on_cache_miss: bool = False, ) -> None: diff --git a/synapse/rest/media/v1/_base.py b/synapse/rest/media/v1/_base.py index 7c881f2bd..014fa893d 100644 --- a/synapse/rest/media/v1/_base.py +++ b/synapse/rest/media/v1/_base.py @@ -27,6 +27,7 @@ from synapse.api.errors import Codes, SynapseError, cs_error from synapse.http.server import finish_request, respond_with_json +from synapse.http.site import SynapseRequest from synapse.logging.context import make_deferred_yieldable from synapse.util.stringutils import is_ascii @@ -74,7 +75,7 @@ def parse_media_id(request: Request) -> Tuple[str, str, Optional[str]]: ) -def respond_404(request: Request) -> None: +def respond_404(request: SynapseRequest) -> None: respond_with_json( request, 404, @@ -84,7 +85,7 @@ def respond_404(request: Request) -> None: async def respond_with_file( - request: Request, + request: SynapseRequest, media_type: str, file_path: str, file_size: Optional[int] = None, @@ -221,7 +222,7 @@ def _can_encode_filename_as_token(x: str) -> bool: async def respond_with_responder( - request: Request, + request: SynapseRequest, responder: "Optional[Responder]", media_type: str, file_size: Optional[int], diff --git a/synapse/rest/media/v1/config_resource.py b/synapse/rest/media/v1/config_resource.py index a1d36e5cf..712d4e836 100644 --- a/synapse/rest/media/v1/config_resource.py +++ b/synapse/rest/media/v1/config_resource.py @@ -16,8 +16,6 @@ from typing import TYPE_CHECKING -from twisted.web.server import Request - from synapse.http.server import DirectServeJsonResource, respond_with_json from synapse.http.site import SynapseRequest @@ -39,5 +37,5 @@ async def _async_render_GET(self, request: SynapseRequest) -> None: await self.auth.get_user_by_req(request) respond_with_json(request, 200, self.limits_dict, send_cors=True) - async def _async_render_OPTIONS(self, request: Request) -> None: + async def _async_render_OPTIONS(self, request: SynapseRequest) -> None: respond_with_json(request, 200, {}, send_cors=True) diff --git a/synapse/rest/media/v1/download_resource.py b/synapse/rest/media/v1/download_resource.py index d6d938953..6180fa575 100644 --- a/synapse/rest/media/v1/download_resource.py +++ b/synapse/rest/media/v1/download_resource.py @@ -15,10 +15,9 @@ import logging from typing import TYPE_CHECKING -from twisted.web.server import Request - from synapse.http.server import DirectServeJsonResource, set_cors_headers from synapse.http.servlet import parse_boolean +from synapse.http.site import SynapseRequest from ._base import parse_media_id, respond_404 @@ -37,7 +36,7 @@ def __init__(self, hs: "HomeServer", media_repo: "MediaRepository"): self.media_repo = media_repo self.server_name = hs.hostname - async def _async_render_GET(self, request: Request) -> None: + async def _async_render_GET(self, request: SynapseRequest) -> None: set_cors_headers(request) request.setHeader( b"Content-Security-Policy", diff --git a/synapse/rest/media/v1/media_repository.py b/synapse/rest/media/v1/media_repository.py index a30007a1e..c1bd81100 100644 --- a/synapse/rest/media/v1/media_repository.py +++ b/synapse/rest/media/v1/media_repository.py @@ -23,7 +23,6 @@ import twisted.web.http from twisted.internet.defer import Deferred from twisted.web.resource import Resource -from twisted.web.server import Request from synapse.api.errors import ( FederationDeniedError, @@ -34,6 +33,7 @@ ) from synapse.config._base import ConfigError from synapse.config.repository import ThumbnailRequirement +from synapse.http.site import SynapseRequest from synapse.logging.context import defer_to_thread from synapse.metrics.background_process_metrics import run_as_background_process from synapse.types import UserID @@ -189,7 +189,7 @@ async def create_content( return "mxc://%s/%s" % (self.server_name, media_id) async def get_local_media( - self, request: Request, media_id: str, name: Optional[str] + self, request: SynapseRequest, media_id: str, name: Optional[str] ) -> None: """Responds to requests for local media, if exists, or returns 404. @@ -223,7 +223,11 @@ async def get_local_media( ) async def get_remote_media( - self, request: Request, server_name: str, media_id: str, name: Optional[str] + self, + request: SynapseRequest, + server_name: str, + media_id: str, + name: Optional[str], ) -> None: """Respond to requests for remote media. diff --git a/synapse/rest/media/v1/preview_url_resource.py b/synapse/rest/media/v1/preview_url_resource.py index 9ffa983fb..128706d29 100644 --- a/synapse/rest/media/v1/preview_url_resource.py +++ b/synapse/rest/media/v1/preview_url_resource.py @@ -29,7 +29,6 @@ from twisted.internet.defer import Deferred from twisted.internet.error import DNSLookupError -from twisted.web.server import Request from synapse.api.errors import Codes, SynapseError from synapse.http.client import SimpleHttpClient @@ -168,7 +167,7 @@ def __init__( self._start_expire_url_cache_data, 10 * 1000 ) - async def _async_render_OPTIONS(self, request: Request) -> None: + async def _async_render_OPTIONS(self, request: SynapseRequest) -> None: request.setHeader(b"Allow", b"OPTIONS, GET") respond_with_json(request, 200, {}, send_cors=True) diff --git a/synapse/rest/media/v1/thumbnail_resource.py b/synapse/rest/media/v1/thumbnail_resource.py index 22f43d853..cb2f88676 100644 --- a/synapse/rest/media/v1/thumbnail_resource.py +++ b/synapse/rest/media/v1/thumbnail_resource.py @@ -17,11 +17,10 @@ import logging from typing import TYPE_CHECKING, Any, Dict, List, Optional, Tuple -from twisted.web.server import Request - from synapse.api.errors import SynapseError from synapse.http.server import DirectServeJsonResource, set_cors_headers from synapse.http.servlet import parse_integer, parse_string +from synapse.http.site import SynapseRequest from synapse.rest.media.v1.media_storage import MediaStorage from ._base import ( @@ -57,7 +56,7 @@ def __init__( self.dynamic_thumbnails = hs.config.dynamic_thumbnails self.server_name = hs.hostname - async def _async_render_GET(self, request: Request) -> None: + async def _async_render_GET(self, request: SynapseRequest) -> None: set_cors_headers(request) server_name, media_id, _ = parse_media_id(request) width = parse_integer(request, "width", required=True) @@ -88,7 +87,7 @@ async def _async_render_GET(self, request: Request) -> None: async def _respond_local_thumbnail( self, - request: Request, + request: SynapseRequest, media_id: str, width: int, height: int, @@ -121,7 +120,7 @@ async def _respond_local_thumbnail( async def _select_or_generate_local_thumbnail( self, - request: Request, + request: SynapseRequest, media_id: str, desired_width: int, desired_height: int, @@ -186,7 +185,7 @@ async def _select_or_generate_local_thumbnail( async def _select_or_generate_remote_thumbnail( self, - request: Request, + request: SynapseRequest, server_name: str, media_id: str, desired_width: int, @@ -249,7 +248,7 @@ async def _select_or_generate_remote_thumbnail( async def _respond_remote_thumbnail( self, - request: Request, + request: SynapseRequest, server_name: str, media_id: str, width: int, @@ -280,7 +279,7 @@ async def _respond_remote_thumbnail( async def _select_and_respond_with_thumbnail( self, - request: Request, + request: SynapseRequest, desired_width: int, desired_height: int, desired_method: str, diff --git a/synapse/rest/media/v1/upload_resource.py b/synapse/rest/media/v1/upload_resource.py index 146adca8f..39b29318b 100644 --- a/synapse/rest/media/v1/upload_resource.py +++ b/synapse/rest/media/v1/upload_resource.py @@ -16,8 +16,6 @@ import logging from typing import IO, TYPE_CHECKING, Dict, List, Optional -from twisted.web.server import Request - from synapse.api.errors import Codes, SynapseError from synapse.http.server import DirectServeJsonResource, respond_with_json from synapse.http.servlet import parse_bytes_from_args @@ -46,7 +44,7 @@ def __init__(self, hs: "HomeServer", media_repo: "MediaRepository"): self.max_upload_size = hs.config.max_upload_size self.clock = hs.get_clock() - async def _async_render_OPTIONS(self, request: Request) -> None: + async def _async_render_OPTIONS(self, request: SynapseRequest) -> None: respond_with_json(request, 200, {}, send_cors=True) async def _async_render_POST(self, request: SynapseRequest) -> None: diff --git a/tests/http/test_additional_resource.py b/tests/http/test_additional_resource.py index 768c2ba4e..391196425 100644 --- a/tests/http/test_additional_resource.py +++ b/tests/http/test_additional_resource.py @@ -45,7 +45,9 @@ def test_async(self): handler = _AsyncTestCustomEndpoint({}, None).handle_request resource = AdditionalResource(self.hs, handler) - channel = make_request(self.reactor, FakeSite(resource), "GET", "/") + channel = make_request( + self.reactor, FakeSite(resource, self.reactor), "GET", "/" + ) self.assertEqual(channel.code, 200) self.assertEqual(channel.json_body, {"some_key": "some_value_async"}) @@ -54,7 +56,9 @@ def test_sync(self): handler = _SyncTestCustomEndpoint({}, None).handle_request resource = AdditionalResource(self.hs, handler) - channel = make_request(self.reactor, FakeSite(resource), "GET", "/") + channel = make_request( + self.reactor, FakeSite(resource, self.reactor), "GET", "/" + ) self.assertEqual(channel.code, 200) self.assertEqual(channel.json_body, {"some_key": "some_value_sync"}) diff --git a/tests/logging/test_terse_json.py b/tests/logging/test_terse_json.py index 116071692..f73fcd684 100644 --- a/tests/logging/test_terse_json.py +++ b/tests/logging/test_terse_json.py @@ -152,7 +152,8 @@ def test_with_request_context(self): site = Mock(spec=["site_tag", "server_version_string", "getResourceFor"]) site.site_tag = "test-site" site.server_version_string = "Server v1" - request = SynapseRequest(FakeChannel(site, None)) + site.reactor = Mock() + request = SynapseRequest(FakeChannel(site, None), site) # Call requestReceived to finish instantiating the object. request.content = BytesIO() # Partially skip some of the internal processing of SynapseRequest. diff --git a/tests/replication/test_multi_media_repo.py b/tests/replication/test_multi_media_repo.py index 01b1b0d4a..13aa5eb51 100644 --- a/tests/replication/test_multi_media_repo.py +++ b/tests/replication/test_multi_media_repo.py @@ -68,7 +68,7 @@ def _get_media_req( resource = hs.get_media_repository_resource().children[b"download"] channel = make_request( self.reactor, - FakeSite(resource), + FakeSite(resource, self.reactor), "GET", f"/{target}/{media_id}", shorthand=False, diff --git a/tests/rest/admin/test_admin.py b/tests/rest/admin/test_admin.py index febd40b65..192073c52 100644 --- a/tests/rest/admin/test_admin.py +++ b/tests/rest/admin/test_admin.py @@ -201,7 +201,7 @@ def _ensure_quarantined(self, admin_user_tok, server_and_media_id): """Ensure a piece of media is quarantined when trying to access it.""" channel = make_request( self.reactor, - FakeSite(self.download_resource), + FakeSite(self.download_resource, self.reactor), "GET", server_and_media_id, shorthand=False, @@ -271,7 +271,7 @@ def test_quarantine_media_by_id(self): # Attempt to access the media channel = make_request( self.reactor, - FakeSite(self.download_resource), + FakeSite(self.download_resource, self.reactor), "GET", server_name_and_media_id, shorthand=False, @@ -458,7 +458,7 @@ def test_cannot_quarantine_safe_media(self): # Attempt to access each piece of media channel = make_request( self.reactor, - FakeSite(self.download_resource), + FakeSite(self.download_resource, self.reactor), "GET", server_and_media_id_2, shorthand=False, diff --git a/tests/rest/admin/test_media.py b/tests/rest/admin/test_media.py index 2f02934e7..f81386607 100644 --- a/tests/rest/admin/test_media.py +++ b/tests/rest/admin/test_media.py @@ -125,7 +125,7 @@ def test_delete_media(self): # Attempt to access media channel = make_request( self.reactor, - FakeSite(download_resource), + FakeSite(download_resource, self.reactor), "GET", server_and_media_id, shorthand=False, @@ -164,7 +164,7 @@ def test_delete_media(self): # Attempt to access media channel = make_request( self.reactor, - FakeSite(download_resource), + FakeSite(download_resource, self.reactor), "GET", server_and_media_id, shorthand=False, @@ -525,7 +525,7 @@ def _access_media(self, server_and_media_id, expect_success=True): channel = make_request( self.reactor, - FakeSite(download_resource), + FakeSite(download_resource, self.reactor), "GET", server_and_media_id, shorthand=False, diff --git a/tests/rest/admin/test_user.py b/tests/rest/admin/test_user.py index cc3f16c62..e79e0e185 100644 --- a/tests/rest/admin/test_user.py +++ b/tests/rest/admin/test_user.py @@ -2973,7 +2973,7 @@ def _create_media_and_access( # Try to access a media and to create `last_access_ts` channel = make_request( self.reactor, - FakeSite(download_resource), + FakeSite(download_resource, self.reactor), "GET", server_and_media_id, shorthand=False, diff --git a/tests/rest/client/test_account.py b/tests/rest/client/test_account.py index b946fca8b..9e9e953cf 100644 --- a/tests/rest/client/test_account.py +++ b/tests/rest/client/test_account.py @@ -312,7 +312,7 @@ def _validate_token(self, link): # Load the password reset confirmation page channel = make_request( self.reactor, - FakeSite(self.submit_token_resource), + FakeSite(self.submit_token_resource, self.reactor), "GET", path, shorthand=False, @@ -326,7 +326,7 @@ def _validate_token(self, link): # Confirm the password reset channel = make_request( self.reactor, - FakeSite(self.submit_token_resource), + FakeSite(self.submit_token_resource, self.reactor), "POST", path, content=b"", diff --git a/tests/rest/client/test_consent.py b/tests/rest/client/test_consent.py index 65c58ce70..84d092ca8 100644 --- a/tests/rest/client/test_consent.py +++ b/tests/rest/client/test_consent.py @@ -61,7 +61,11 @@ def test_render_public_consent(self): """You can observe the terms form without specifying a user""" resource = consent_resource.ConsentResource(self.hs) channel = make_request( - self.reactor, FakeSite(resource), "GET", "/consent?v=1", shorthand=False + self.reactor, + FakeSite(resource, self.reactor), + "GET", + "/consent?v=1", + shorthand=False, ) self.assertEqual(channel.code, 200) @@ -83,7 +87,7 @@ def test_accept_consent(self): ) channel = make_request( self.reactor, - FakeSite(resource), + FakeSite(resource, self.reactor), "GET", consent_uri, access_token=access_token, @@ -98,7 +102,7 @@ def test_accept_consent(self): # POST to the consent page, saying we've agreed channel = make_request( self.reactor, - FakeSite(resource), + FakeSite(resource, self.reactor), "POST", consent_uri + "&v=" + version, access_token=access_token, @@ -110,7 +114,7 @@ def test_accept_consent(self): # changed channel = make_request( self.reactor, - FakeSite(resource), + FakeSite(resource, self.reactor), "GET", consent_uri, access_token=access_token, diff --git a/tests/rest/client/utils.py b/tests/rest/client/utils.py index c56e45fc1..3075d3f28 100644 --- a/tests/rest/client/utils.py +++ b/tests/rest/client/utils.py @@ -383,7 +383,7 @@ def upload_media( path = "/_matrix/media/r0/upload?filename=%s" % (filename,) channel = make_request( self.hs.get_reactor(), - FakeSite(resource), + FakeSite(resource, self.hs.get_reactor()), "POST", path, content=image_data, diff --git a/tests/rest/key/v2/test_remote_key_resource.py b/tests/rest/key/v2/test_remote_key_resource.py index a75c0ea3f..4672a6859 100644 --- a/tests/rest/key/v2/test_remote_key_resource.py +++ b/tests/rest/key/v2/test_remote_key_resource.py @@ -84,7 +84,7 @@ def make_notary_request(self, server_name: str, key_id: str) -> dict: Checks that the response is a 200 and returns the decoded json body. """ channel = FakeChannel(self.site, self.reactor) - req = SynapseRequest(channel) + req = SynapseRequest(channel, self.site) req.content = BytesIO(b"") req.requestReceived( b"GET", @@ -183,7 +183,7 @@ async def post_json(destination, path, data): ) channel = FakeChannel(self.site, self.reactor) - req = SynapseRequest(channel) + req = SynapseRequest(channel, self.site) req.content = BytesIO(encode_canonical_json(data)) req.requestReceived( diff --git a/tests/rest/media/v1/test_media_storage.py b/tests/rest/media/v1/test_media_storage.py index 9ea1c2bf2..44a643d50 100644 --- a/tests/rest/media/v1/test_media_storage.py +++ b/tests/rest/media/v1/test_media_storage.py @@ -252,7 +252,7 @@ def _req(self, content_disposition): channel = make_request( self.reactor, - FakeSite(self.download_resource), + FakeSite(self.download_resource, self.reactor), "GET", self.media_id, shorthand=False, @@ -384,7 +384,7 @@ def test_thumbnail_repeated_thumbnail(self): params = "?width=32&height=32&method=scale" channel = make_request( self.reactor, - FakeSite(self.thumbnail_resource), + FakeSite(self.thumbnail_resource, self.reactor), "GET", self.media_id + params, shorthand=False, @@ -413,7 +413,7 @@ def test_thumbnail_repeated_thumbnail(self): channel = make_request( self.reactor, - FakeSite(self.thumbnail_resource), + FakeSite(self.thumbnail_resource, self.reactor), "GET", self.media_id + params, shorthand=False, @@ -433,7 +433,7 @@ def _test_thumbnail(self, method, expected_body, expected_found): params = "?width=32&height=32&method=" + method channel = make_request( self.reactor, - FakeSite(self.thumbnail_resource), + FakeSite(self.thumbnail_resource, self.reactor), "GET", self.media_id + params, shorthand=False, diff --git a/tests/server.py b/tests/server.py index b861c7b86..88dfa8058 100644 --- a/tests/server.py +++ b/tests/server.py @@ -19,6 +19,7 @@ IPullProducer, IPushProducer, IReactorPluggableNameResolver, + IReactorTime, IResolverSimple, ITransport, ) @@ -181,13 +182,14 @@ class FakeSite: site_tag = "test" access_logger = logging.getLogger("synapse.access.http.fake") - def __init__(self, resource: IResource): + def __init__(self, resource: IResource, reactor: IReactorTime): """ Args: resource: the resource to be used for rendering all requests """ self._resource = resource + self.reactor = reactor def getResourceFor(self, request): return self._resource @@ -268,7 +270,7 @@ def make_request( channel = FakeChannel(site, reactor, ip=client_ip) - req = request(channel) + req = request(channel, site) req.content = BytesIO(content) # Twisted expects to be at the end of the content when parsing the request. req.content.seek(SEEK_END) diff --git a/tests/test_server.py b/tests/test_server.py index 407e172e4..f2ffbc895 100644 --- a/tests/test_server.py +++ b/tests/test_server.py @@ -65,7 +65,10 @@ def _callback(request, **kwargs): ) make_request( - self.reactor, FakeSite(res), b"GET", b"/_matrix/foo/%E2%98%83?a=%E2%98%83" + self.reactor, + FakeSite(res, self.reactor), + b"GET", + b"/_matrix/foo/%E2%98%83?a=%E2%98%83", ) self.assertEqual(got_kwargs, {"room_id": "\N{SNOWMAN}"}) @@ -84,7 +87,9 @@ def _callback(request, **kwargs): "GET", [re.compile("^/_matrix/foo$")], _callback, "test_servlet" ) - channel = make_request(self.reactor, FakeSite(res), b"GET", b"/_matrix/foo") + channel = make_request( + self.reactor, FakeSite(res, self.reactor), b"GET", b"/_matrix/foo" + ) self.assertEqual(channel.result["code"], b"500") @@ -100,7 +105,7 @@ def _throw(*args): def _callback(request, **kwargs): d = Deferred() d.addCallback(_throw) - self.reactor.callLater(1, d.callback, True) + self.reactor.callLater(0.5, d.callback, True) return make_deferred_yieldable(d) res = JsonResource(self.homeserver) @@ -108,7 +113,9 @@ def _callback(request, **kwargs): "GET", [re.compile("^/_matrix/foo$")], _callback, "test_servlet" ) - channel = make_request(self.reactor, FakeSite(res), b"GET", b"/_matrix/foo") + channel = make_request( + self.reactor, FakeSite(res, self.reactor), b"GET", b"/_matrix/foo" + ) self.assertEqual(channel.result["code"], b"500") @@ -126,7 +133,9 @@ def _callback(request, **kwargs): "GET", [re.compile("^/_matrix/foo$")], _callback, "test_servlet" ) - channel = make_request(self.reactor, FakeSite(res), b"GET", b"/_matrix/foo") + channel = make_request( + self.reactor, FakeSite(res, self.reactor), b"GET", b"/_matrix/foo" + ) self.assertEqual(channel.result["code"], b"403") self.assertEqual(channel.json_body["error"], "Forbidden!!one!") @@ -148,7 +157,9 @@ def _callback(request, **kwargs): "GET", [re.compile("^/_matrix/foo$")], _callback, "test_servlet" ) - channel = make_request(self.reactor, FakeSite(res), b"GET", b"/_matrix/foobar") + channel = make_request( + self.reactor, FakeSite(res, self.reactor), b"GET", b"/_matrix/foobar" + ) self.assertEqual(channel.result["code"], b"400") self.assertEqual(channel.json_body["error"], "Unrecognized request") @@ -173,7 +184,9 @@ def _callback(request, **kwargs): ) # The path was registered as GET, but this is a HEAD request. - channel = make_request(self.reactor, FakeSite(res), b"HEAD", b"/_matrix/foo") + channel = make_request( + self.reactor, FakeSite(res, self.reactor), b"HEAD", b"/_matrix/foo" + ) self.assertEqual(channel.result["code"], b"200") self.assertNotIn("body", channel.result) @@ -280,7 +293,9 @@ async def callback(request): res = WrapHtmlRequestHandlerTests.TestResource() res.callback = callback - channel = make_request(self.reactor, FakeSite(res), b"GET", b"/path") + channel = make_request( + self.reactor, FakeSite(res, self.reactor), b"GET", b"/path" + ) self.assertEqual(channel.result["code"], b"200") body = channel.result["body"] @@ -298,7 +313,9 @@ async def callback(request, **kwargs): res = WrapHtmlRequestHandlerTests.TestResource() res.callback = callback - channel = make_request(self.reactor, FakeSite(res), b"GET", b"/path") + channel = make_request( + self.reactor, FakeSite(res, self.reactor), b"GET", b"/path" + ) self.assertEqual(channel.result["code"], b"301") headers = channel.result["headers"] @@ -319,7 +336,9 @@ async def callback(request, **kwargs): res = WrapHtmlRequestHandlerTests.TestResource() res.callback = callback - channel = make_request(self.reactor, FakeSite(res), b"GET", b"/path") + channel = make_request( + self.reactor, FakeSite(res, self.reactor), b"GET", b"/path" + ) self.assertEqual(channel.result["code"], b"304") headers = channel.result["headers"] @@ -338,7 +357,9 @@ async def callback(request): res = WrapHtmlRequestHandlerTests.TestResource() res.callback = callback - channel = make_request(self.reactor, FakeSite(res), b"HEAD", b"/path") + channel = make_request( + self.reactor, FakeSite(res, self.reactor), b"HEAD", b"/path" + ) self.assertEqual(channel.result["code"], b"200") self.assertNotIn("body", channel.result) From 261c9763c472f0ea1ceac9729dfc3a5da2799300 Mon Sep 17 00:00:00 2001 From: Richard van der Hoff <1389908+richvdh@users.noreply.github.com> Date: Fri, 24 Sep 2021 11:56:13 +0100 Subject: [PATCH 47/74] Simplify `_auth_and_persist_fetched_events` (#10901) Combine the two loops over the list of events, and hence get rid of `_NewEventInfo`. Also pass the event back alongside the context, so that it's easier to process the result. --- changelog.d/10901.misc | 1 + synapse/handlers/federation_event.py | 91 +++++++--------------------- 2 files changed, 23 insertions(+), 69 deletions(-) create mode 100644 changelog.d/10901.misc diff --git a/changelog.d/10901.misc b/changelog.d/10901.misc new file mode 100644 index 000000000..9a765435d --- /dev/null +++ b/changelog.d/10901.misc @@ -0,0 +1 @@ +Clean up some of the federation event authentication code for clarity. diff --git a/synapse/handlers/federation_event.py b/synapse/handlers/federation_event.py index 7d468bd2d..4eefcc36d 100644 --- a/synapse/handlers/federation_event.py +++ b/synapse/handlers/federation_event.py @@ -27,11 +27,8 @@ Tuple, ) -import attr from prometheus_client import Counter -from twisted.internet import defer - from synapse import event_auth from synapse.api.constants import ( EventContentFields, @@ -54,11 +51,7 @@ from synapse.events import EventBase from synapse.events.snapshot import EventContext from synapse.federation.federation_client import InvalidResponseError -from synapse.logging.context import ( - make_deferred_yieldable, - nested_logging_context, - run_in_background, -) +from synapse.logging.context import nested_logging_context, run_in_background from synapse.logging.utils import log_function from synapse.metrics.background_process_metrics import run_as_background_process from synapse.replication.http.devices import ReplicationUserDevicesResyncRestServlet @@ -75,7 +68,11 @@ UserID, get_domain_from_id, ) -from synapse.util.async_helpers import Linearizer, concurrently_execute +from synapse.util.async_helpers import ( + Linearizer, + concurrently_execute, + yieldable_gather_results, +) from synapse.util.iterutils import batch_iter from synapse.util.retryutils import NotRetryingDestination from synapse.util.stringutils import shortstr @@ -92,30 +89,6 @@ ) -@attr.s(slots=True, frozen=True, auto_attribs=True) -class _NewEventInfo: - """Holds information about a received event, ready for passing to _auth_and_persist_events - - Attributes: - event: the received event - - claimed_auth_event_map: a map of (type, state_key) => event for the event's - claimed auth_events. - - This can include events which have not yet been persisted, in the case that - we are backfilling a batch of events. - - Note: May be incomplete: if we were unable to find all of the claimed auth - events. Also, treat the contents with caution: the events might also have - been rejected, might not yet have been authorized themselves, or they might - be in the wrong room. - - """ - - event: EventBase - claimed_auth_event_map: StateMap[EventBase] - - class FederationEventHandler: """Handles events that originated from federation. @@ -1203,47 +1176,27 @@ async def _auth_and_persist_fetched_events( allow_rejected=True, ) - event_infos = [] - for event in fetched_events: - auth = {} - for auth_event_id in event.auth_event_ids(): - ae = persisted_events.get(auth_event_id) - if ae: - auth[(ae.type, ae.state_key)] = ae - else: - logger.info("Missing auth event %s", auth_event_id) - - event_infos.append(_NewEventInfo(event, auth)) - - if not event_infos: - return - - async def prep(ev_info: _NewEventInfo) -> EventContext: - event = ev_info.event + async def prep(event: EventBase) -> Optional[Tuple[EventBase, EventContext]]: with nested_logging_context(suffix=event.event_id): - res = EventContext.for_outlier() - res = await self._check_event_auth( + auth = {} + for auth_event_id in event.auth_event_ids(): + ae = persisted_events.get(auth_event_id) + if ae: + auth[(ae.type, ae.state_key)] = ae + else: + logger.info("Missing auth event %s", auth_event_id) + + context = EventContext.for_outlier() + context = await self._check_event_auth( origin, event, - res, - claimed_auth_event_map=ev_info.claimed_auth_event_map, + context, + claimed_auth_event_map=auth, ) - return res - - contexts = await make_deferred_yieldable( - defer.gatherResults( - [run_in_background(prep, ev_info) for ev_info in event_infos], - consumeErrors=True, - ) - ) + return event, context - await self.persist_events_and_notify( - room_id, - [ - (ev_info.event, context) - for ev_info, context in zip(event_infos, contexts) - ], - ) + events_to_persist = await yieldable_gather_results(prep, fetched_events) + await self.persist_events_and_notify(room_id, events_to_persist) async def _check_event_auth( self, From 85551b7a8555eb4e4456d5cf2db0fecd4a44621c Mon Sep 17 00:00:00 2001 From: Richard van der Hoff <1389908+richvdh@users.noreply.github.com> Date: Fri, 24 Sep 2021 11:56:33 +0100 Subject: [PATCH 48/74] Factor out common code for persisting fetched auth events (#10896) * Factor more stuff out of `_get_events_and_persist` It turns out that the event-sorting algorithm in `_get_events_and_persist` is also useful in other circumstances. Here we move the current `_auth_and_persist_fetched_events` to `_auth_and_persist_fetched_events_inner`, and then factor the sorting part out to `_auth_and_persist_fetched_events`. * `_get_remote_auth_chain_for_event`: remove redundant `outlier` assignment `get_event_auth` returns events with the outlier flag already set, so this is redundant (though we need to update a test where `get_event_auth` is mocked). * `_get_remote_auth_chain_for_event`: move existing-event tests earlier Move a couple of tests outside the loop. This is a bit inefficient for now, but a future commit will make it better. It should be functionally identical. * `_get_remote_auth_chain_for_event`: use `_auth_and_persist_fetched_events` We can use the same codepath for persisting the events fetched as part of an auth chain as for those fetched individually by `_get_events_and_persist` for building the state at a backwards extremity. * `_get_remote_auth_chain_for_event`: use a dict for efficiency `_auth_and_persist_fetched_events` sorts the events itself, so we no longer need to care about maintaining the ordering from `get_event_auth` (and no longer need to sort by depth in `get_event_auth`). That means that we can use a map, making it easier to filter out events we already have, etc. * changelog * `_auth_and_persist_fetched_events`: improve docstring --- changelog.d/10896.misc | 1 + synapse/federation/federation_client.py | 2 - synapse/handlers/federation_event.py | 103 +++++++++++------------- tests/handlers/test_federation.py | 7 +- 4 files changed, 55 insertions(+), 58 deletions(-) create mode 100644 changelog.d/10896.misc diff --git a/changelog.d/10896.misc b/changelog.d/10896.misc new file mode 100644 index 000000000..41de99584 --- /dev/null +++ b/changelog.d/10896.misc @@ -0,0 +1 @@ + Clean up some of the federation event authentication code for clarity. diff --git a/synapse/federation/federation_client.py b/synapse/federation/federation_client.py index 1416abd0f..584836c04 100644 --- a/synapse/federation/federation_client.py +++ b/synapse/federation/federation_client.py @@ -501,8 +501,6 @@ async def get_event_auth( destination, auth_chain, outlier=True, room_version=room_version ) - signed_auth.sort(key=lambda e: e.depth) - return signed_auth def _is_unknown_endpoint( diff --git a/synapse/handlers/federation_event.py b/synapse/handlers/federation_event.py index 4eefcc36d..8fd9e5104 100644 --- a/synapse/handlers/federation_event.py +++ b/synapse/handlers/federation_event.py @@ -1080,7 +1080,7 @@ async def _get_events_and_persist( room_version = await self._store.get_room_version(room_id) - event_map: Dict[str, EventBase] = {} + events: List[EventBase] = [] async def get_event(event_id: str) -> None: with nested_logging_context(event_id): @@ -1098,8 +1098,7 @@ async def get_event(event_id: str) -> None: event_id, ) return - - event_map[event.event_id] = event + events.append(event) except Exception as e: logger.warning( @@ -1110,11 +1109,29 @@ async def get_event(event_id: str) -> None: ) await concurrently_execute(get_event, event_ids, 5) - logger.info("Fetched %i events of %i requested", len(event_map), len(event_ids)) + logger.info("Fetched %i events of %i requested", len(events), len(event_ids)) + await self._auth_and_persist_fetched_events(destination, room_id, events) + + async def _auth_and_persist_fetched_events( + self, origin: str, room_id: str, events: Iterable[EventBase] + ) -> None: + """Persist the events fetched by _get_events_and_persist or _get_remote_auth_chain_for_event + + The events to be persisted must be outliers. + + We first sort the events to make sure that we process each event's auth_events + before the event itself, and then auth and persist them. + + Notifies about the events where appropriate. + + Params: + origin: where the events came from + room_id: the room that the events are meant to be in (though this has + not yet been checked) + events: the events that have been fetched + """ + event_map = {event.event_id: event for event in events} - # we now need to auth the events in an order which ensures that each event's - # auth_events are authed before the event itself. - # # XXX: it might be possible to kick this process off in parallel with fetching # the events. while event_map: @@ -1141,22 +1158,18 @@ async def get_event(event_id: str) -> None: "Persisting %i of %i remaining events", len(roots), len(event_map) ) - await self._auth_and_persist_fetched_events(destination, room_id, roots) + await self._auth_and_persist_fetched_events_inner(origin, room_id, roots) for ev in roots: del event_map[ev.event_id] - async def _auth_and_persist_fetched_events( + async def _auth_and_persist_fetched_events_inner( self, origin: str, room_id: str, fetched_events: Collection[EventBase] ) -> None: - """Persist the events fetched by _get_events_and_persist. + """Helper for _auth_and_persist_fetched_events - The events should not depend on one another, e.g. this should be used to persist - a bunch of outliers, but not a chunk of individual events that depend - on each other for state calculations. - - We also assume that all of the auth events for all of the events have already - been persisted. + Persists a batch of events where we have (theoretically) already persisted all + of their auth events. Notifies about the events where appropriate. @@ -1164,7 +1177,7 @@ async def _auth_and_persist_fetched_events( origin: where the events came from room_id: the room that the events are meant to be in (though this has not yet been checked) - event_id: map from event_id -> event for the fetched events + fetched_events: the events to persist """ # get all the auth events for all the events in this batch. By now, they should # have been persisted. @@ -1558,53 +1571,33 @@ async def _get_remote_auth_chain_for_event( event_id: the event for which we are lacking auth events """ try: - remote_auth_chain = await self._federation_client.get_event_auth( - destination, room_id, event_id - ) + remote_event_map = { + e.event_id: e + for e in await self._federation_client.get_event_auth( + destination, room_id, event_id + ) + } except RequestSendFailed as e1: # The other side isn't around or doesn't implement the # endpoint, so lets just bail out. logger.info("Failed to get event auth from remote: %s", e1) return - seen_remotes = await self._store.have_seen_events( - room_id, [e.event_id for e in remote_auth_chain] - ) + logger.info("/event_auth returned %i events", len(remote_event_map)) - for auth_event in remote_auth_chain: - if auth_event.event_id in seen_remotes: - continue + # `event` may be returned, but we should not yet process it. + remote_event_map.pop(event_id, None) - if auth_event.event_id == event_id: - continue + # nor should we reprocess any events we have already seen. + seen_remotes = await self._store.have_seen_events( + room_id, remote_event_map.keys() + ) + for s in seen_remotes: + remote_event_map.pop(s, None) - try: - auth_ids = auth_event.auth_event_ids() - auth = { - (e.type, e.state_key): e - for e in remote_auth_chain - if e.event_id in auth_ids or e.type == EventTypes.Create - } - auth_event.internal_metadata.outlier = True - - logger.debug( - "_check_event_auth %s missing_auth: %s", - event_id, - auth_event.event_id, - ) - missing_auth_event_context = EventContext.for_outlier() - missing_auth_event_context = await self._check_event_auth( - destination, - auth_event, - missing_auth_event_context, - claimed_auth_event_map=auth, - ) - await self.persist_events_and_notify( - room_id, - [(auth_event, missing_auth_event_context)], - ) - except AuthError: - pass + await self._auth_and_persist_fetched_events( + destination, room_id, remote_event_map.values() + ) async def _update_context_for_auth_events( self, event: EventBase, context: EventContext, auth_events: StateMap[EventBase] diff --git a/tests/handlers/test_federation.py b/tests/handlers/test_federation.py index 6c67a16de..936ebf3dd 100644 --- a/tests/handlers/test_federation.py +++ b/tests/handlers/test_federation.py @@ -308,7 +308,12 @@ def test_backfill_floating_outlier_membership_auth(self): async def get_event_auth( destination: str, room_id: str, event_id: str ) -> List[EventBase]: - return auth_events + return [ + event_from_pdu_json( + ae.get_pdu_json(), room_version=room_version, outlier=True + ) + for ae in auth_events + ] self.handler.federation_client.get_event_auth = get_event_auth From bb7fdd821b07016a43bdbb245eda5b35356863c0 Mon Sep 17 00:00:00 2001 From: Patrick Cloke <clokep@users.noreply.github.com> Date: Fri, 24 Sep 2021 07:25:21 -0400 Subject: [PATCH 49/74] Use direct references for configuration variables (part 5). (#10897) --- changelog.d/10897.misc | 1 + synapse/app/_base.py | 4 ++-- synapse/app/admin_cmd.py | 6 ++--- synapse/app/generic_worker.py | 6 ++--- synapse/app/homeserver.py | 2 +- synapse/config/logger.py | 4 +++- synapse/crypto/context_factory.py | 4 ++-- synapse/events/spamcheck.py | 2 +- synapse/events/third_party_rules.py | 4 ++-- synapse/handlers/auth.py | 10 ++++---- synapse/handlers/directory.py | 6 ++--- synapse/handlers/federation.py | 2 +- synapse/handlers/message.py | 8 +++---- synapse/handlers/register.py | 2 +- synapse/handlers/room.py | 8 ++++--- synapse/handlers/room_list.py | 2 +- synapse/handlers/room_member.py | 2 +- synapse/handlers/saml.py | 15 ++++++------ synapse/handlers/sso.py | 10 ++++---- synapse/handlers/stats.py | 2 +- synapse/handlers/user_directory.py | 2 +- synapse/logging/opentracing.py | 6 ++--- synapse/replication/http/_base.py | 4 ++-- synapse/replication/tcp/handler.py | 4 ++-- synapse/rest/admin/__init__.py | 2 +- synapse/rest/client/login.py | 2 +- synapse/rest/client/user_directory.py | 2 +- synapse/rest/client/versions.py | 6 ++--- synapse/rest/client/voip.py | 12 +++++----- synapse/rest/media/v1/config_resource.py | 2 +- synapse/rest/media/v1/media_repository.py | 20 +++++++++------- synapse/rest/media/v1/preview_url_resource.py | 10 ++++---- synapse/rest/media/v1/storage_provider.py | 2 +- synapse/rest/media/v1/thumbnail_resource.py | 2 +- synapse/rest/media/v1/upload_resource.py | 2 +- synapse/rest/synapse/client/__init__.py | 2 +- .../synapse/client/saml2/metadata_resource.py | 2 +- .../server_notices/server_notices_manager.py | 23 ++++++++++--------- .../storage/databases/main/registration.py | 2 +- synapse/storage/databases/main/stats.py | 2 +- .../storage/databases/main/user_directory.py | 4 ++-- tests/handlers/test_directory.py | 4 +++- tests/handlers/test_stats.py | 8 +++---- tests/handlers/test_user_directory.py | 6 ++--- tests/rest/admin/test_media.py | 4 ++-- tests/rest/admin/test_user.py | 2 +- tests/rest/media/v1/test_media_storage.py | 2 +- .../test_resource_limits_server_notices.py | 2 +- 48 files changed, 128 insertions(+), 113 deletions(-) create mode 100644 changelog.d/10897.misc diff --git a/changelog.d/10897.misc b/changelog.d/10897.misc new file mode 100644 index 000000000..586a0b3a9 --- /dev/null +++ b/changelog.d/10897.misc @@ -0,0 +1 @@ +Use direct references to config flags. diff --git a/synapse/app/_base.py b/synapse/app/_base.py index f657f11f7..548f6dcde 100644 --- a/synapse/app/_base.py +++ b/synapse/app/_base.py @@ -88,8 +88,8 @@ def start_worker_reactor(appname, config, run_command=reactor.run): appname, soft_file_limit=config.soft_file_limit, gc_thresholds=config.gc_thresholds, - pid_file=config.worker_pid_file, - daemonize=config.worker_daemonize, + pid_file=config.worker.worker_pid_file, + daemonize=config.worker.worker_daemonize, print_pidfile=config.print_pidfile, logger=logger, run_command=run_command, diff --git a/synapse/app/admin_cmd.py b/synapse/app/admin_cmd.py index 259d5ec7c..f2c5b7524 100644 --- a/synapse/app/admin_cmd.py +++ b/synapse/app/admin_cmd.py @@ -186,9 +186,9 @@ def start(config_options): config.worker.worker_app = "synapse.app.admin_cmd" if ( - not config.worker_daemonize - and not config.worker_log_file - and not config.worker_log_config + not config.worker.worker_daemonize + and not config.worker.worker_log_file + and not config.worker.worker_log_config ): # Since we're meant to be run as a "command" let's not redirect stdio # unless we've actually set log config. diff --git a/synapse/app/generic_worker.py b/synapse/app/generic_worker.py index e0776689c..3036e1b4a 100644 --- a/synapse/app/generic_worker.py +++ b/synapse/app/generic_worker.py @@ -140,7 +140,7 @@ def __init__(self, hs): self.auth = hs.get_auth() self.store = hs.get_datastore() self.http_client = hs.get_simple_http_client() - self.main_uri = hs.config.worker_main_http_uri + self.main_uri = hs.config.worker.worker_main_http_uri async def on_POST(self, request: Request, device_id: Optional[str]): requester = await self.auth.get_user_by_req(request, allow_guest=True) @@ -321,7 +321,7 @@ def _listen_http(self, listener_config: ListenerConfig): elif name == "federation": resources.update({FEDERATION_PREFIX: TransportLayerServer(self)}) elif name == "media": - if self.config.can_load_media_repo: + if self.config.media.can_load_media_repo: media_repo = self.get_media_repository_resource() # We need to serve the admin servlets for media on the @@ -384,7 +384,7 @@ def _listen_http(self, listener_config: ListenerConfig): logger.info("Synapse worker now listening on port %d", port) def start_listening(self): - for listener in self.config.worker_listeners: + for listener in self.config.worker.worker_listeners: if listener.type == "http": self._listen_http(listener) elif listener.type == "manhole": diff --git a/synapse/app/homeserver.py b/synapse/app/homeserver.py index f1769f146..205831dcd 100644 --- a/synapse/app/homeserver.py +++ b/synapse/app/homeserver.py @@ -234,7 +234,7 @@ def _configure_named_resource(self, name, compress=False): ) if name in ["media", "federation", "client"]: - if self.config.enable_media_repo: + if self.config.media.enable_media_repo: media_repo = self.get_media_repository_resource() resources.update( {MEDIA_PREFIX: media_repo, LEGACY_MEDIA_PREFIX: media_repo} diff --git a/synapse/config/logger.py b/synapse/config/logger.py index bf8ca7d5f..0a08231e5 100644 --- a/synapse/config/logger.py +++ b/synapse/config/logger.py @@ -322,7 +322,9 @@ def setup_logging( """ log_config_path = ( - config.worker_log_config if use_worker_options else config.logging.log_config + config.worker.worker_log_config + if use_worker_options + else config.logging.log_config ) # Perform one-time logging configuration. diff --git a/synapse/crypto/context_factory.py b/synapse/crypto/context_factory.py index d310976fe..2a6110eb1 100644 --- a/synapse/crypto/context_factory.py +++ b/synapse/crypto/context_factory.py @@ -74,8 +74,8 @@ def configure_context(context, config): context.set_options( SSL.OP_NO_SSLv2 | SSL.OP_NO_SSLv3 | SSL.OP_NO_TLSv1 | SSL.OP_NO_TLSv1_1 ) - context.use_certificate_chain_file(config.tls_certificate_file) - context.use_privatekey(config.tls_private_key) + context.use_certificate_chain_file(config.tls.tls_certificate_file) + context.use_privatekey(config.tls.tls_private_key) # https://hynek.me/articles/hardening-your-web-servers-ssl-ciphers/ context.set_cipher_list( diff --git a/synapse/events/spamcheck.py b/synapse/events/spamcheck.py index 57f1d53fa..19ee246f9 100644 --- a/synapse/events/spamcheck.py +++ b/synapse/events/spamcheck.py @@ -78,7 +78,7 @@ def load_legacy_spam_checkers(hs: "synapse.server.HomeServer"): """ spam_checkers: List[Any] = [] api = hs.get_module_api() - for module, config in hs.config.spam_checkers: + for module, config in hs.config.spamchecker.spam_checkers: # Older spam checkers don't accept the `api` argument, so we # try and detect support. spam_args = inspect.getfullargspec(module) diff --git a/synapse/events/third_party_rules.py b/synapse/events/third_party_rules.py index 7a6eb3e51..d94b1bb4d 100644 --- a/synapse/events/third_party_rules.py +++ b/synapse/events/third_party_rules.py @@ -42,10 +42,10 @@ def load_legacy_third_party_event_rules(hs: "HomeServer"): """Wrapper that loads a third party event rules module configured using the old configuration, and registers the hooks they implement. """ - if hs.config.third_party_event_rules is None: + if hs.config.thirdpartyrules.third_party_event_rules is None: return - module, config = hs.config.third_party_event_rules + module, config = hs.config.thirdpartyrules.third_party_event_rules api = hs.get_module_api() third_party_rules = module(config=config, module_api=api) diff --git a/synapse/handlers/auth.py b/synapse/handlers/auth.py index 0f80dfdc4..a8c717efd 100644 --- a/synapse/handlers/auth.py +++ b/synapse/handlers/auth.py @@ -277,23 +277,25 @@ def __init__(self, hs: "HomeServer"): # after the SSO completes and before redirecting them back to their client. # It notifies the user they are about to give access to their matrix account # to the client. - self._sso_redirect_confirm_template = hs.config.sso_redirect_confirm_template + self._sso_redirect_confirm_template = ( + hs.config.sso.sso_redirect_confirm_template + ) # The following template is shown during user interactive authentication # in the fallback auth scenario. It notifies the user that they are # authenticating for an operation to occur on their account. - self._sso_auth_confirm_template = hs.config.sso_auth_confirm_template + self._sso_auth_confirm_template = hs.config.sso.sso_auth_confirm_template # The following template is shown during the SSO authentication process if # the account is deactivated. self._sso_account_deactivated_template = ( - hs.config.sso_account_deactivated_template + hs.config.sso.sso_account_deactivated_template ) self._server_name = hs.config.server.server_name # cast to tuple for use with str.startswith - self._whitelisted_sso_clients = tuple(hs.config.sso_client_whitelist) + self._whitelisted_sso_clients = tuple(hs.config.sso.sso_client_whitelist) # A mapping of user ID to extra attributes to include in the login # response. diff --git a/synapse/handlers/directory.py b/synapse/handlers/directory.py index d487fee62..5cfba3c81 100644 --- a/synapse/handlers/directory.py +++ b/synapse/handlers/directory.py @@ -48,7 +48,7 @@ def __init__(self, hs: "HomeServer"): self.event_creation_handler = hs.get_event_creation_handler() self.store = hs.get_datastore() self.config = hs.config - self.enable_room_list_search = hs.config.enable_room_list_search + self.enable_room_list_search = hs.config.roomdirectory.enable_room_list_search self.require_membership = hs.config.require_membership_for_aliases self.third_party_event_rules = hs.get_third_party_event_rules() @@ -143,7 +143,7 @@ async def create_association( ): raise AuthError(403, "This user is not permitted to create this alias") - if not self.config.is_alias_creation_allowed( + if not self.config.roomdirectory.is_alias_creation_allowed( user_id, room_id, room_alias_str ): # Lets just return a generic message, as there may be all sorts of @@ -459,7 +459,7 @@ async def edit_published_room_list( if canonical_alias: room_aliases.append(canonical_alias) - if not self.config.is_publishing_room_allowed( + if not self.config.roomdirectory.is_publishing_room_allowed( user_id, room_id, room_aliases ): # Lets just return a generic message, as there may be all sorts of diff --git a/synapse/handlers/federation.py b/synapse/handlers/federation.py index 4523b2563..b17ef2a9a 100644 --- a/synapse/handlers/federation.py +++ b/synapse/handlers/federation.py @@ -91,7 +91,7 @@ def __init__(self, hs: "HomeServer"): self.spam_checker = hs.get_spam_checker() self.event_creation_handler = hs.get_event_creation_handler() self._event_auth_handler = hs.get_event_auth_handler() - self._server_notices_mxid = hs.config.server_notices_mxid + self._server_notices_mxid = hs.config.servernotices.server_notices_mxid self.config = hs.config self.http_client = hs.get_proxied_blacklisted_http_client() self._replication = hs.get_replication_data_handler() diff --git a/synapse/handlers/message.py b/synapse/handlers/message.py index ad4e4a3d6..c66aefe2c 100644 --- a/synapse/handlers/message.py +++ b/synapse/handlers/message.py @@ -692,10 +692,10 @@ async def _is_exempt_from_privacy_policy( return False async def _is_server_notices_room(self, room_id: str) -> bool: - if self.config.server_notices_mxid is None: + if self.config.servernotices.server_notices_mxid is None: return False user_ids = await self.store.get_users_in_room(room_id) - return self.config.server_notices_mxid in user_ids + return self.config.servernotices.server_notices_mxid in user_ids async def assert_accepted_privacy_policy(self, requester: Requester) -> None: """Check if a user has accepted the privacy policy @@ -731,8 +731,8 @@ async def assert_accepted_privacy_policy(self, requester: Requester) -> None: # exempt the system notices user if ( - self.config.server_notices_mxid is not None - and user_id == self.config.server_notices_mxid + self.config.servernotices.server_notices_mxid is not None + and user_id == self.config.servernotices.server_notices_mxid ): return diff --git a/synapse/handlers/register.py b/synapse/handlers/register.py index 01c5e1385..4f99f137a 100644 --- a/synapse/handlers/register.py +++ b/synapse/handlers/register.py @@ -98,7 +98,7 @@ def __init__(self, hs: "HomeServer"): self.macaroon_gen = hs.get_macaroon_generator() self._account_validity_handler = hs.get_account_validity_handler() self._user_consent_version = self.hs.config.consent.user_consent_version - self._server_notices_mxid = hs.config.server_notices_mxid + self._server_notices_mxid = hs.config.servernotices.server_notices_mxid self._server_name = hs.hostname self.spam_checker = hs.get_spam_checker() diff --git a/synapse/handlers/room.py b/synapse/handlers/room.py index b5768220d..408b7d7b7 100644 --- a/synapse/handlers/room.py +++ b/synapse/handlers/room.py @@ -126,7 +126,7 @@ def __init__(self, hs: "HomeServer"): for preset_name, preset_config in self._presets_dict.items(): encrypted = ( preset_name - in self.config.encryption_enabled_by_default_for_room_presets + in self.config.room.encryption_enabled_by_default_for_room_presets ) preset_config["encrypted"] = encrypted @@ -141,7 +141,7 @@ def __init__(self, hs: "HomeServer"): self._upgrade_response_cache: ResponseCache[Tuple[str, str]] = ResponseCache( hs.get_clock(), "room_upgrade", timeout_ms=FIVE_MINUTES_IN_MS ) - self._server_notices_mxid = hs.config.server_notices_mxid + self._server_notices_mxid = hs.config.servernotices.server_notices_mxid self.third_party_event_rules = hs.get_third_party_event_rules() @@ -757,7 +757,9 @@ async def create_room( ) if is_public: - if not self.config.is_publishing_room_allowed(user_id, room_id, room_alias): + if not self.config.roomdirectory.is_publishing_room_allowed( + user_id, room_id, room_alias + ): # Lets just return a generic message, as there may be all sorts of # reasons why we said no. TODO: Allow configurable error messages # per alias creation rule? diff --git a/synapse/handlers/room_list.py b/synapse/handlers/room_list.py index c83ff585e..c3d4199ed 100644 --- a/synapse/handlers/room_list.py +++ b/synapse/handlers/room_list.py @@ -52,7 +52,7 @@ class RoomListHandler(BaseHandler): def __init__(self, hs: "HomeServer"): super().__init__(hs) - self.enable_room_list_search = hs.config.enable_room_list_search + self.enable_room_list_search = hs.config.roomdirectory.enable_room_list_search self.response_cache: ResponseCache[ Tuple[Optional[int], Optional[str], Optional[ThirdPartyInstanceID]] ] = ResponseCache(hs.get_clock(), "room_list") diff --git a/synapse/handlers/room_member.py b/synapse/handlers/room_member.py index 7bb3f0bc4..1a56c82fb 100644 --- a/synapse/handlers/room_member.py +++ b/synapse/handlers/room_member.py @@ -88,7 +88,7 @@ def __init__(self, hs: "HomeServer"): self.clock = hs.get_clock() self.spam_checker = hs.get_spam_checker() self.third_party_event_rules = hs.get_third_party_event_rules() - self._server_notices_mxid = self.config.server_notices_mxid + self._server_notices_mxid = self.config.servernotices.server_notices_mxid self._enable_lookup = hs.config.enable_3pid_lookup self.allow_per_room_profiles = self.config.allow_per_room_profiles diff --git a/synapse/handlers/saml.py b/synapse/handlers/saml.py index 185befbe9..2fed9f377 100644 --- a/synapse/handlers/saml.py +++ b/synapse/handlers/saml.py @@ -54,19 +54,18 @@ class Saml2SessionData: class SamlHandler(BaseHandler): def __init__(self, hs: "HomeServer"): super().__init__(hs) - self._saml_client = Saml2Client(hs.config.saml2_sp_config) - self._saml_idp_entityid = hs.config.saml2_idp_entityid + self._saml_client = Saml2Client(hs.config.saml2.saml2_sp_config) + self._saml_idp_entityid = hs.config.saml2.saml2_idp_entityid - self._saml2_session_lifetime = hs.config.saml2_session_lifetime + self._saml2_session_lifetime = hs.config.saml2.saml2_session_lifetime self._grandfathered_mxid_source_attribute = ( - hs.config.saml2_grandfathered_mxid_source_attribute + hs.config.saml2.saml2_grandfathered_mxid_source_attribute ) self._saml2_attribute_requirements = hs.config.saml2.attribute_requirements - self._error_template = hs.config.sso_error_template # plugin to do custom mapping from saml response to mxid - self._user_mapping_provider = hs.config.saml2_user_mapping_provider_class( - hs.config.saml2_user_mapping_provider_config, + self._user_mapping_provider = hs.config.saml2.saml2_user_mapping_provider_class( + hs.config.saml2.saml2_user_mapping_provider_config, ModuleApi(hs, hs.get_auth_handler()), ) @@ -411,7 +410,7 @@ def __init__(self, parsed_config: SamlConfig, module_api: ModuleApi): self._mxid_mapper = parsed_config.mxid_mapper self._grandfathered_mxid_source_attribute = ( - module_api._hs.config.saml2_grandfathered_mxid_source_attribute + module_api._hs.config.saml2.saml2_grandfathered_mxid_source_attribute ) def get_remote_user_id( diff --git a/synapse/handlers/sso.py b/synapse/handlers/sso.py index e044251a1..49fde01cf 100644 --- a/synapse/handlers/sso.py +++ b/synapse/handlers/sso.py @@ -184,15 +184,17 @@ def __init__(self, hs: "HomeServer"): self._server_name = hs.hostname self._registration_handler = hs.get_registration_handler() self._auth_handler = hs.get_auth_handler() - self._error_template = hs.config.sso_error_template - self._bad_user_template = hs.config.sso_auth_bad_user_template + self._error_template = hs.config.sso.sso_error_template + self._bad_user_template = hs.config.sso.sso_auth_bad_user_template self._profile_handler = hs.get_profile_handler() # The following template is shown after a successful user interactive # authentication session. It tells the user they can close the window. - self._sso_auth_success_template = hs.config.sso_auth_success_template + self._sso_auth_success_template = hs.config.sso.sso_auth_success_template - self._sso_update_profile_information = hs.config.sso_update_profile_information + self._sso_update_profile_information = ( + hs.config.sso.sso_update_profile_information + ) # a lock on the mappings self._mapping_lock = Linearizer(name="sso_user_mapping", clock=hs.get_clock()) diff --git a/synapse/handlers/stats.py b/synapse/handlers/stats.py index 9fc53333f..bd3e6f2ec 100644 --- a/synapse/handlers/stats.py +++ b/synapse/handlers/stats.py @@ -46,7 +46,7 @@ def __init__(self, hs: "HomeServer"): self.notifier = hs.get_notifier() self.is_mine_id = hs.is_mine_id - self.stats_enabled = hs.config.stats_enabled + self.stats_enabled = hs.config.stats.stats_enabled # The current position in the current_state_delta stream self.pos: Optional[int] = None diff --git a/synapse/handlers/user_directory.py b/synapse/handlers/user_directory.py index 8dc46d767..b91e7cb50 100644 --- a/synapse/handlers/user_directory.py +++ b/synapse/handlers/user_directory.py @@ -61,7 +61,7 @@ def __init__(self, hs: "HomeServer"): self.notifier = hs.get_notifier() self.is_mine_id = hs.is_mine_id self.update_user_directory = hs.config.update_user_directory - self.search_all_users = hs.config.user_directory_search_all_users + self.search_all_users = hs.config.userdirectory.user_directory_search_all_users self.spam_checker = hs.get_spam_checker() # The current position in the current_state_delta stream self.pos: Optional[int] = None diff --git a/synapse/logging/opentracing.py b/synapse/logging/opentracing.py index c6c4d3bd2..03d2dd94f 100644 --- a/synapse/logging/opentracing.py +++ b/synapse/logging/opentracing.py @@ -363,7 +363,7 @@ def noop_context_manager(*args, **kwargs): def init_tracer(hs: "HomeServer"): """Set the whitelists and initialise the JaegerClient tracer""" global opentracing - if not hs.config.opentracer_enabled: + if not hs.config.tracing.opentracer_enabled: # We don't have a tracer opentracing = None return @@ -377,12 +377,12 @@ def init_tracer(hs: "HomeServer"): # Pull out the jaeger config if it was given. Otherwise set it to something sensible. # See https://github.com/jaegertracing/jaeger-client-python/blob/master/jaeger_client/config.py - set_homeserver_whitelist(hs.config.opentracer_whitelist) + set_homeserver_whitelist(hs.config.tracing.opentracer_whitelist) from jaeger_client.metrics.prometheus import PrometheusMetricsFactory config = JaegerConfig( - config=hs.config.jaeger_config, + config=hs.config.tracing.jaeger_config, service_name=f"{hs.config.server.server_name} {hs.get_instance_name()}", scope_manager=LogContextScopeManager(hs.config), metrics_factory=PrometheusMetricsFactory(), diff --git a/synapse/replication/http/_base.py b/synapse/replication/http/_base.py index 25589b004..f1b78d09f 100644 --- a/synapse/replication/http/_base.py +++ b/synapse/replication/http/_base.py @@ -168,8 +168,8 @@ def make_client(cls, hs): client = hs.get_simple_http_client() local_instance_name = hs.get_instance_name() - master_host = hs.config.worker_replication_host - master_port = hs.config.worker_replication_http_port + master_host = hs.config.worker.worker_replication_host + master_port = hs.config.worker.worker_replication_http_port instance_map = hs.config.worker.instance_map diff --git a/synapse/replication/tcp/handler.py b/synapse/replication/tcp/handler.py index 509ed7fb1..1438a82b6 100644 --- a/synapse/replication/tcp/handler.py +++ b/synapse/replication/tcp/handler.py @@ -322,8 +322,8 @@ def start_replication(self, hs): else: client_name = hs.get_instance_name() self._factory = DirectTcpReplicationClientFactory(hs, client_name, self) - host = hs.config.worker_replication_host - port = hs.config.worker_replication_port + host = hs.config.worker.worker_replication_host + port = hs.config.worker.worker_replication_port hs.get_reactor().connectTCP(host.encode(), port, self._factory) def get_streams(self) -> Dict[str, Stream]: diff --git a/synapse/rest/admin/__init__.py b/synapse/rest/admin/__init__.py index a03774c98..e1506deb2 100644 --- a/synapse/rest/admin/__init__.py +++ b/synapse/rest/admin/__init__.py @@ -267,7 +267,7 @@ def register_servlets_for_client_rest_resource( # Load the media repo ones if we're using them. Otherwise load the servlets which # don't need a media repo (typically readonly admin APIs). - if hs.config.can_load_media_repo: + if hs.config.media.can_load_media_repo: register_servlets_for_media_repo(hs, http_server) else: ListMediaInRoom(hs).register(http_server) diff --git a/synapse/rest/client/login.py b/synapse/rest/client/login.py index 64446fc48..fa5c173f4 100644 --- a/synapse/rest/client/login.py +++ b/synapse/rest/client/login.py @@ -76,7 +76,7 @@ def __init__(self, hs: "HomeServer"): self.jwt_audiences = hs.config.jwt.jwt_audiences # SSO configuration. - self.saml2_enabled = hs.config.saml2_enabled + self.saml2_enabled = hs.config.saml2.saml2_enabled self.cas_enabled = hs.config.cas.cas_enabled self.oidc_enabled = hs.config.oidc.oidc_enabled self._msc2918_enabled = hs.config.access_token_lifetime is not None diff --git a/synapse/rest/client/user_directory.py b/synapse/rest/client/user_directory.py index 885281111..a47d9bd01 100644 --- a/synapse/rest/client/user_directory.py +++ b/synapse/rest/client/user_directory.py @@ -58,7 +58,7 @@ async def on_POST(self, request: SynapseRequest) -> Tuple[int, JsonDict]: requester = await self.auth.get_user_by_req(request, allow_guest=False) user_id = requester.user.to_string() - if not self.hs.config.user_directory_search_enabled: + if not self.hs.config.userdirectory.user_directory_search_enabled: return 200, {"limited": False, "results": []} body = parse_json_object_from_request(request) diff --git a/synapse/rest/client/versions.py b/synapse/rest/client/versions.py index a1a815cf8..b52a296d8 100644 --- a/synapse/rest/client/versions.py +++ b/synapse/rest/client/versions.py @@ -42,15 +42,15 @@ def __init__(self, hs: "HomeServer"): # Calculate these once since they shouldn't change after start-up. self.e2ee_forced_public = ( RoomCreationPreset.PUBLIC_CHAT - in self.config.encryption_enabled_by_default_for_room_presets + in self.config.room.encryption_enabled_by_default_for_room_presets ) self.e2ee_forced_private = ( RoomCreationPreset.PRIVATE_CHAT - in self.config.encryption_enabled_by_default_for_room_presets + in self.config.room.encryption_enabled_by_default_for_room_presets ) self.e2ee_forced_trusted_private = ( RoomCreationPreset.TRUSTED_PRIVATE_CHAT - in self.config.encryption_enabled_by_default_for_room_presets + in self.config.room.encryption_enabled_by_default_for_room_presets ) def on_GET(self, request: Request) -> Tuple[int, JsonDict]: diff --git a/synapse/rest/client/voip.py b/synapse/rest/client/voip.py index 9d46ed3af..ea2b8aa45 100644 --- a/synapse/rest/client/voip.py +++ b/synapse/rest/client/voip.py @@ -37,14 +37,14 @@ def __init__(self, hs: "HomeServer"): async def on_GET(self, request: SynapseRequest) -> Tuple[int, JsonDict]: requester = await self.auth.get_user_by_req( - request, self.hs.config.turn_allow_guests + request, self.hs.config.voip.turn_allow_guests ) - turnUris = self.hs.config.turn_uris - turnSecret = self.hs.config.turn_shared_secret - turnUsername = self.hs.config.turn_username - turnPassword = self.hs.config.turn_password - userLifetime = self.hs.config.turn_user_lifetime + turnUris = self.hs.config.voip.turn_uris + turnSecret = self.hs.config.voip.turn_shared_secret + turnUsername = self.hs.config.voip.turn_username + turnPassword = self.hs.config.voip.turn_password + userLifetime = self.hs.config.voip.turn_user_lifetime if turnUris and turnSecret and userLifetime: expiry = (self.hs.get_clock().time_msec() + userLifetime) / 1000 diff --git a/synapse/rest/media/v1/config_resource.py b/synapse/rest/media/v1/config_resource.py index 712d4e836..a95804d32 100644 --- a/synapse/rest/media/v1/config_resource.py +++ b/synapse/rest/media/v1/config_resource.py @@ -31,7 +31,7 @@ def __init__(self, hs: "HomeServer"): config = hs.config self.clock = hs.get_clock() self.auth = hs.get_auth() - self.limits_dict = {"m.upload.size": config.max_upload_size} + self.limits_dict = {"m.upload.size": config.media.max_upload_size} async def _async_render_GET(self, request: SynapseRequest) -> None: await self.auth.get_user_by_req(request) diff --git a/synapse/rest/media/v1/media_repository.py b/synapse/rest/media/v1/media_repository.py index c1bd81100..abd88a2d4 100644 --- a/synapse/rest/media/v1/media_repository.py +++ b/synapse/rest/media/v1/media_repository.py @@ -76,16 +76,16 @@ def __init__(self, hs: "HomeServer"): self.clock = hs.get_clock() self.server_name = hs.hostname self.store = hs.get_datastore() - self.max_upload_size = hs.config.max_upload_size - self.max_image_pixels = hs.config.max_image_pixels + self.max_upload_size = hs.config.media.max_upload_size + self.max_image_pixels = hs.config.media.max_image_pixels Thumbnailer.set_limits(self.max_image_pixels) - self.primary_base_path: str = hs.config.media_store_path + self.primary_base_path: str = hs.config.media.media_store_path self.filepaths: MediaFilePaths = MediaFilePaths(self.primary_base_path) - self.dynamic_thumbnails = hs.config.dynamic_thumbnails - self.thumbnail_requirements = hs.config.thumbnail_requirements + self.dynamic_thumbnails = hs.config.media.dynamic_thumbnails + self.thumbnail_requirements = hs.config.media.thumbnail_requirements self.remote_media_linearizer = Linearizer(name="media_remote") @@ -100,7 +100,11 @@ def __init__(self, hs: "HomeServer"): # potentially upload to. storage_providers = [] - for clz, provider_config, wrapper_config in hs.config.media_storage_providers: + for ( + clz, + provider_config, + wrapper_config, + ) in hs.config.media.media_storage_providers: backend = clz(hs, provider_config) provider = StorageProviderWrapper( backend, @@ -975,7 +979,7 @@ class MediaRepositoryResource(Resource): def __init__(self, hs: "HomeServer"): # If we're not configured to use it, raise if we somehow got here. - if not hs.config.can_load_media_repo: + if not hs.config.media.can_load_media_repo: raise ConfigError("Synapse is not configured to use a media repo.") super().__init__() @@ -986,7 +990,7 @@ def __init__(self, hs: "HomeServer"): self.putChild( b"thumbnail", ThumbnailResource(hs, media_repo, media_repo.media_storage) ) - if hs.config.url_preview_enabled: + if hs.config.media.url_preview_enabled: self.putChild( b"preview_url", PreviewUrlResource(hs, media_repo, media_repo.media_storage), diff --git a/synapse/rest/media/v1/preview_url_resource.py b/synapse/rest/media/v1/preview_url_resource.py index 128706d29..0b0c4d646 100644 --- a/synapse/rest/media/v1/preview_url_resource.py +++ b/synapse/rest/media/v1/preview_url_resource.py @@ -125,14 +125,14 @@ def __init__( self.auth = hs.get_auth() self.clock = hs.get_clock() self.filepaths = media_repo.filepaths - self.max_spider_size = hs.config.max_spider_size + self.max_spider_size = hs.config.media.max_spider_size self.server_name = hs.hostname self.store = hs.get_datastore() self.client = SimpleHttpClient( hs, treq_args={"browser_like_redirects": True}, - ip_whitelist=hs.config.url_preview_ip_range_whitelist, - ip_blacklist=hs.config.url_preview_ip_range_blacklist, + ip_whitelist=hs.config.media.url_preview_ip_range_whitelist, + ip_blacklist=hs.config.media.url_preview_ip_range_blacklist, use_proxy=True, ) self.media_repo = media_repo @@ -150,8 +150,8 @@ def __init__( or instance_running_jobs == hs.get_instance_name() ) - self.url_preview_url_blacklist = hs.config.url_preview_url_blacklist - self.url_preview_accept_language = hs.config.url_preview_accept_language + self.url_preview_url_blacklist = hs.config.media.url_preview_url_blacklist + self.url_preview_accept_language = hs.config.media.url_preview_accept_language # memory cache mapping urls to an ObservableDeferred returning # JSON-encoded OG metadata diff --git a/synapse/rest/media/v1/storage_provider.py b/synapse/rest/media/v1/storage_provider.py index 6c9969e55..289e4297f 100644 --- a/synapse/rest/media/v1/storage_provider.py +++ b/synapse/rest/media/v1/storage_provider.py @@ -125,7 +125,7 @@ class FileStorageProviderBackend(StorageProvider): def __init__(self, hs: "HomeServer", config: str): self.hs = hs - self.cache_directory = hs.config.media_store_path + self.cache_directory = hs.config.media.media_store_path self.base_directory = config def __str__(self) -> str: diff --git a/synapse/rest/media/v1/thumbnail_resource.py b/synapse/rest/media/v1/thumbnail_resource.py index cb2f88676..ed91ef5a4 100644 --- a/synapse/rest/media/v1/thumbnail_resource.py +++ b/synapse/rest/media/v1/thumbnail_resource.py @@ -53,7 +53,7 @@ def __init__( self.store = hs.get_datastore() self.media_repo = media_repo self.media_storage = media_storage - self.dynamic_thumbnails = hs.config.dynamic_thumbnails + self.dynamic_thumbnails = hs.config.media.dynamic_thumbnails self.server_name = hs.hostname async def _async_render_GET(self, request: SynapseRequest) -> None: diff --git a/synapse/rest/media/v1/upload_resource.py b/synapse/rest/media/v1/upload_resource.py index 39b29318b..7dcb1428e 100644 --- a/synapse/rest/media/v1/upload_resource.py +++ b/synapse/rest/media/v1/upload_resource.py @@ -41,7 +41,7 @@ def __init__(self, hs: "HomeServer", media_repo: "MediaRepository"): self.clock = hs.get_clock() self.server_name = hs.hostname self.auth = hs.get_auth() - self.max_upload_size = hs.config.max_upload_size + self.max_upload_size = hs.config.media.max_upload_size self.clock = hs.get_clock() async def _async_render_OPTIONS(self, request: SynapseRequest) -> None: diff --git a/synapse/rest/synapse/client/__init__.py b/synapse/rest/synapse/client/__init__.py index 086c80b72..6ad558f5d 100644 --- a/synapse/rest/synapse/client/__init__.py +++ b/synapse/rest/synapse/client/__init__.py @@ -50,7 +50,7 @@ def build_synapse_client_resource_tree(hs: "HomeServer") -> Mapping[str, Resourc resources["/_synapse/client/oidc"] = OIDCResource(hs) - if hs.config.saml2_enabled: + if hs.config.saml2.saml2_enabled: from synapse.rest.synapse.client.saml2 import SAML2Resource res = SAML2Resource(hs) diff --git a/synapse/rest/synapse/client/saml2/metadata_resource.py b/synapse/rest/synapse/client/saml2/metadata_resource.py index 64378ed57..d8eae3970 100644 --- a/synapse/rest/synapse/client/saml2/metadata_resource.py +++ b/synapse/rest/synapse/client/saml2/metadata_resource.py @@ -30,7 +30,7 @@ class SAML2MetadataResource(Resource): def __init__(self, hs: "HomeServer"): Resource.__init__(self) - self.sp_config = hs.config.saml2_sp_config + self.sp_config = hs.config.saml2.saml2_sp_config def render_GET(self, request: Request) -> bytes: metadata_xml = saml2.metadata.create_metadata_string( diff --git a/synapse/server_notices/server_notices_manager.py b/synapse/server_notices/server_notices_manager.py index d87a53891..cd1c5ff6f 100644 --- a/synapse/server_notices/server_notices_manager.py +++ b/synapse/server_notices/server_notices_manager.py @@ -39,7 +39,7 @@ def __init__(self, hs: "HomeServer"): self._server_name = hs.hostname self._notifier = hs.get_notifier() - self.server_notices_mxid = self._config.server_notices_mxid + self.server_notices_mxid = self._config.servernotices.server_notices_mxid def is_enabled(self): """Checks if server notices are enabled on this server. @@ -47,7 +47,7 @@ def is_enabled(self): Returns: bool """ - return self._config.server_notices_mxid is not None + return self.server_notices_mxid is not None async def send_notice( self, @@ -71,9 +71,9 @@ async def send_notice( room_id = await self.get_or_create_notice_room_for_user(user_id) await self.maybe_invite_user_to_room(user_id, room_id) - system_mxid = self._config.server_notices_mxid + assert self.server_notices_mxid is not None requester = create_requester( - system_mxid, authenticated_entity=self._server_name + self.server_notices_mxid, authenticated_entity=self._server_name ) logger.info("Sending server notice to %s", user_id) @@ -81,7 +81,7 @@ async def send_notice( event_dict = { "type": type, "room_id": room_id, - "sender": system_mxid, + "sender": self.server_notices_mxid, "content": event_content, } @@ -106,7 +106,7 @@ async def get_or_create_notice_room_for_user(self, user_id: str) -> str: Returns: room id of notice room. """ - if not self.is_enabled(): + if self.server_notices_mxid is None: raise Exception("Server notices not enabled") assert self._is_mine_id(user_id), "Cannot send server notices to remote users" @@ -139,12 +139,12 @@ async def get_or_create_notice_room_for_user(self, user_id: str) -> str: # avatar, we have to use both. join_profile = None if ( - self._config.server_notices_mxid_display_name is not None - or self._config.server_notices_mxid_avatar_url is not None + self._config.servernotices.server_notices_mxid_display_name is not None + or self._config.servernotices.server_notices_mxid_avatar_url is not None ): join_profile = { - "displayname": self._config.server_notices_mxid_display_name, - "avatar_url": self._config.server_notices_mxid_avatar_url, + "displayname": self._config.servernotices.server_notices_mxid_display_name, + "avatar_url": self._config.servernotices.server_notices_mxid_avatar_url, } requester = create_requester( @@ -154,7 +154,7 @@ async def get_or_create_notice_room_for_user(self, user_id: str) -> str: requester, config={ "preset": RoomCreationPreset.PRIVATE_CHAT, - "name": self._config.server_notices_room_name, + "name": self._config.servernotices.server_notices_room_name, "power_level_content_override": {"users_default": -10}, }, ratelimit=False, @@ -178,6 +178,7 @@ async def maybe_invite_user_to_room(self, user_id: str, room_id: str) -> None: user_id: The ID of the user to invite. room_id: The ID of the room to invite the user to. """ + assert self.server_notices_mxid is not None requester = create_requester( self.server_notices_mxid, authenticated_entity=self._server_name ) diff --git a/synapse/storage/databases/main/registration.py b/synapse/storage/databases/main/registration.py index 52ef9deed..c83089ee6 100644 --- a/synapse/storage/databases/main/registration.py +++ b/synapse/storage/databases/main/registration.py @@ -2015,7 +2015,7 @@ def _register_user( (user_id_obj.localpart, create_profile_with_displayname), ) - if self.hs.config.stats_enabled: + if self.hs.config.stats.stats_enabled: # we create a new completed user statistics row # we don't strictly need current_token since this user really can't diff --git a/synapse/storage/databases/main/stats.py b/synapse/storage/databases/main/stats.py index 343d6efc9..e20033bb2 100644 --- a/synapse/storage/databases/main/stats.py +++ b/synapse/storage/databases/main/stats.py @@ -98,7 +98,7 @@ def __init__(self, database: DatabasePool, db_conn, hs): self.server_name = hs.hostname self.clock = self.hs.get_clock() - self.stats_enabled = hs.config.stats_enabled + self.stats_enabled = hs.config.stats.stats_enabled self.stats_delta_processing_lock = DeferredLock() diff --git a/synapse/storage/databases/main/user_directory.py b/synapse/storage/databases/main/user_directory.py index 7ca04237a..90d65edc4 100644 --- a/synapse/storage/databases/main/user_directory.py +++ b/synapse/storage/databases/main/user_directory.py @@ -551,7 +551,7 @@ def __init__( super().__init__(database, db_conn, hs) self._prefer_local_users_in_search = ( - hs.config.user_directory_search_prefer_local_users + hs.config.userdirectory.user_directory_search_prefer_local_users ) self._server_name = hs.config.server.server_name @@ -741,7 +741,7 @@ async def search_user_dir( } """ - if self.hs.config.user_directory_search_all_users: + if self.hs.config.userdirectory.user_directory_search_all_users: join_args = (user_id,) where_clause = "user_id != ?" else: diff --git a/tests/handlers/test_directory.py b/tests/handlers/test_directory.py index a0a48b564..6a2e76ca4 100644 --- a/tests/handlers/test_directory.py +++ b/tests/handlers/test_directory.py @@ -405,7 +405,9 @@ def prepare(self, reactor, clock, hs): rd_config = RoomDirectoryConfig() rd_config.read_config(config) - self.hs.config.is_alias_creation_allowed = rd_config.is_alias_creation_allowed + self.hs.config.roomdirectory.is_alias_creation_allowed = ( + rd_config.is_alias_creation_allowed + ) return hs diff --git a/tests/handlers/test_stats.py b/tests/handlers/test_stats.py index 1ba4c05b9..24b7ef6ef 100644 --- a/tests/handlers/test_stats.py +++ b/tests/handlers/test_stats.py @@ -118,7 +118,7 @@ def test_initial_room(self): self.assertEqual(len(r), 0) # Disable stats - self.hs.config.stats_enabled = False + self.hs.config.stats.stats_enabled = False self.handler.stats_enabled = False u1 = self.register_user("u1", "pass") @@ -134,7 +134,7 @@ def test_initial_room(self): self.assertEqual(len(r), 0) # Enable stats - self.hs.config.stats_enabled = True + self.hs.config.stats.stats_enabled = True self.handler.stats_enabled = True # Do the initial population of the user directory via the background update @@ -469,7 +469,7 @@ def test_initial_background_update(self): behaviour eventually to still keep current rows. """ - self.hs.config.stats_enabled = False + self.hs.config.stats.stats_enabled = False self.handler.stats_enabled = False u1 = self.register_user("u1", "pass") @@ -481,7 +481,7 @@ def test_initial_background_update(self): self.assertIsNone(self._get_current_stats("room", r1)) self.assertIsNone(self._get_current_stats("user", u1)) - self.hs.config.stats_enabled = True + self.hs.config.stats.stats_enabled = True self.handler.stats_enabled = True self._perform_background_initial_update() diff --git a/tests/handlers/test_user_directory.py b/tests/handlers/test_user_directory.py index ba32585a1..266333c55 100644 --- a/tests/handlers/test_user_directory.py +++ b/tests/handlers/test_user_directory.py @@ -451,7 +451,7 @@ def test_initial_share_all_users(self): visible. """ self.handler.search_all_users = True - self.hs.config.user_directory_search_all_users = True + self.hs.config.userdirectory.user_directory_search_all_users = True u1 = self.register_user("user1", "pass") self.register_user("user2", "pass") @@ -607,7 +607,7 @@ def make_homeserver(self, reactor, clock): return hs def test_disabling_room_list(self): - self.config.user_directory_search_enabled = True + self.config.userdirectory.user_directory_search_enabled = True # First we create a room with another user so that user dir is non-empty # for our user @@ -624,7 +624,7 @@ def test_disabling_room_list(self): self.assertTrue(len(channel.json_body["results"]) > 0) # Disable user directory and check search returns nothing - self.config.user_directory_search_enabled = False + self.config.userdirectory.user_directory_search_enabled = False channel = self.make_request( "POST", b"user_directory/search", b'{"search_term":"user2"}' ) diff --git a/tests/rest/admin/test_media.py b/tests/rest/admin/test_media.py index f81386607..ce30a1921 100644 --- a/tests/rest/admin/test_media.py +++ b/tests/rest/admin/test_media.py @@ -43,7 +43,7 @@ def prepare(self, reactor, clock, hs): self.admin_user = self.register_user("admin", "pass", admin=True) self.admin_user_tok = self.login("admin", "pass") - self.filepaths = MediaFilePaths(hs.config.media_store_path) + self.filepaths = MediaFilePaths(hs.config.media.media_store_path) def test_no_auth(self): """ @@ -200,7 +200,7 @@ def prepare(self, reactor, clock, hs): self.admin_user = self.register_user("admin", "pass", admin=True) self.admin_user_tok = self.login("admin", "pass") - self.filepaths = MediaFilePaths(hs.config.media_store_path) + self.filepaths = MediaFilePaths(hs.config.media.media_store_path) self.url = "/_synapse/admin/v1/media/%s/delete" % self.server_name def test_no_auth(self): diff --git a/tests/rest/admin/test_user.py b/tests/rest/admin/test_user.py index e79e0e185..ee3ae9cce 100644 --- a/tests/rest/admin/test_user.py +++ b/tests/rest/admin/test_user.py @@ -2473,7 +2473,7 @@ class UserMediaRestTestCase(unittest.HomeserverTestCase): def prepare(self, reactor, clock, hs): self.store = hs.get_datastore() self.media_repo = hs.get_media_repository_resource() - self.filepaths = MediaFilePaths(hs.config.media_store_path) + self.filepaths = MediaFilePaths(hs.config.media.media_store_path) self.admin_user = self.register_user("admin", "pass", admin=True) self.admin_user_tok = self.login("admin", "pass") diff --git a/tests/rest/media/v1/test_media_storage.py b/tests/rest/media/v1/test_media_storage.py index 44a643d50..4ae00755c 100644 --- a/tests/rest/media/v1/test_media_storage.py +++ b/tests/rest/media/v1/test_media_storage.py @@ -53,7 +53,7 @@ def prepare(self, reactor, clock, hs): self.primary_base_path = os.path.join(self.test_dir, "primary") self.secondary_base_path = os.path.join(self.test_dir, "secondary") - hs.config.media_store_path = self.primary_base_path + hs.config.media.media_store_path = self.primary_base_path storage_providers = [FileStorageProviderBackend(hs, self.secondary_base_path)] diff --git a/tests/server_notices/test_resource_limits_server_notices.py b/tests/server_notices/test_resource_limits_server_notices.py index 8701b5f7e..7f25200a5 100644 --- a/tests/server_notices/test_resource_limits_server_notices.py +++ b/tests/server_notices/test_resource_limits_server_notices.py @@ -326,7 +326,7 @@ def test_invite_with_notice(self): for event in events: if ( event["type"] == EventTypes.Message - and event["sender"] == self.hs.config.server_notices_mxid + and event["sender"] == self.hs.config.servernotices.server_notices_mxid ): notice_in_room = True From 0420d4e6a5ceb58a453ce0761a15cd8e144da650 Mon Sep 17 00:00:00 2001 From: Richard van der Hoff <1389908+richvdh@users.noreply.github.com> Date: Fri, 24 Sep 2021 14:01:45 +0100 Subject: [PATCH 50/74] Stop trying to auth/persist events whose auth events we do not have. (#10907) --- changelog.d/10907.bugfix | 1 + synapse/handlers/federation_event.py | 24 ++++++++++++++++-------- 2 files changed, 17 insertions(+), 8 deletions(-) create mode 100644 changelog.d/10907.bugfix diff --git a/changelog.d/10907.bugfix b/changelog.d/10907.bugfix new file mode 100644 index 000000000..601b341f9 --- /dev/null +++ b/changelog.d/10907.bugfix @@ -0,0 +1 @@ +Fix a long-standing bug which could cause events pulled over federation to be incorrectly rejected. diff --git a/synapse/handlers/federation_event.py b/synapse/handlers/federation_event.py index 8fd9e5104..01fd84112 100644 --- a/synapse/handlers/federation_event.py +++ b/synapse/handlers/federation_event.py @@ -1194,10 +1194,17 @@ async def prep(event: EventBase) -> Optional[Tuple[EventBase, EventContext]]: auth = {} for auth_event_id in event.auth_event_ids(): ae = persisted_events.get(auth_event_id) - if ae: - auth[(ae.type, ae.state_key)] = ae - else: - logger.info("Missing auth event %s", auth_event_id) + if not ae: + logger.warning( + "Event %s relies on auth_event %s, which could not be found.", + event, + auth_event_id, + ) + # the fact we can't find the auth event doesn't mean it doesn't + # exist, which means it is premature to reject `event`. Instead we + # just ignore it for now. + return None + auth[(ae.type, ae.state_key)] = ae context = EventContext.for_outlier() context = await self._check_event_auth( @@ -1208,8 +1215,10 @@ async def prep(event: EventBase) -> Optional[Tuple[EventBase, EventContext]]: ) return event, context - events_to_persist = await yieldable_gather_results(prep, fetched_events) - await self.persist_events_and_notify(room_id, events_to_persist) + events_to_persist = ( + x for x in await yieldable_gather_results(prep, fetched_events) if x + ) + await self.persist_events_and_notify(room_id, tuple(events_to_persist)) async def _check_event_auth( self, @@ -1235,8 +1244,7 @@ async def _check_event_auth( claimed_auth_event_map: A map of (type, state_key) => event for the event's claimed auth_events. - Possibly incomplete, and possibly including events that are not yet - persisted, or authed, or in the right room. + Possibly including events that were rejected, or are in the wrong room. Only populated when populating outliers. From ea01d4c2de65f29cf23e2d28786bfc10bd5fd881 Mon Sep 17 00:00:00 2001 From: David Robertson <davidr@element.io> Date: Fri, 24 Sep 2021 15:27:09 +0100 Subject: [PATCH 51/74] Update postgresql testing script (#10906) - Use sytest:bionic. Sytest:latest is two years old (do we want CI to push out latest at all?) and comes with Python 3.5, which we explictly no longer support. The script now runs under PostgreSQL 10 as a result. - Advertise script in the docs - Move pg testing script to scripts-dev directory - Write to host as the script's exector, not root A few changes to make it speedier to re-run the tests: - Create blank DB in the container, not the script, so we don't have to `initdb` each time - Use a named volume to persist the tox environment, so we don't have to fetch and install a bunch of packages from PyPI each time Co-authored-by: reivilibre <olivier@librepush.net> --- .gitignore | 1 + changelog.d/10906.misc | 1 + docker/Dockerfile-pgtests | 24 +++++++++++-- docker/run_pg_tests.sh | 7 ++-- docs/development/contributing_guide.md | 47 ++++++++++++++++++++++++++ scripts-dev/test_postgresql.sh | 19 +++++++++++ test_postgresql.sh | 12 ------- 7 files changed, 92 insertions(+), 19 deletions(-) create mode 100644 changelog.d/10906.misc create mode 100755 scripts-dev/test_postgresql.sh delete mode 100755 test_postgresql.sh diff --git a/.gitignore b/.gitignore index 6b9257b5c..fe137f337 100644 --- a/.gitignore +++ b/.gitignore @@ -40,6 +40,7 @@ __pycache__/ /.coverage* /.mypy_cache/ /.tox +/.tox-pg-container /build/ /coverage.* /dist/ diff --git a/changelog.d/10906.misc b/changelog.d/10906.misc new file mode 100644 index 000000000..20a1cbfbd --- /dev/null +++ b/changelog.d/10906.misc @@ -0,0 +1 @@ +Update development testing script `test_postgresql.sh` to use a supported Python version and make re-runs quicker. \ No newline at end of file diff --git a/docker/Dockerfile-pgtests b/docker/Dockerfile-pgtests index 3bfee845c..92b804d19 100644 --- a/docker/Dockerfile-pgtests +++ b/docker/Dockerfile-pgtests @@ -1,6 +1,6 @@ # Use the Sytest image that comes with a lot of the build dependencies # pre-installed -FROM matrixdotorg/sytest:latest +FROM matrixdotorg/sytest:bionic # The Sytest image doesn't come with python, so install that RUN apt-get update && apt-get -qq install -y python3 python3-dev python3-pip @@ -8,5 +8,23 @@ RUN apt-get update && apt-get -qq install -y python3 python3-dev python3-pip # We need tox to run the tests in run_pg_tests.sh RUN python3 -m pip install tox -ADD run_pg_tests.sh /pg_tests.sh -ENTRYPOINT /pg_tests.sh +# Initialise the db +RUN su -c '/usr/lib/postgresql/10/bin/initdb -D /var/lib/postgresql/data -E "UTF-8" --lc-collate="C.UTF-8" --lc-ctype="C.UTF-8" --username=postgres' postgres + +# Add a user with our UID and GID so that files get created on the host owned +# by us, not root. +ARG UID +ARG GID +RUN groupadd --gid $GID user +RUN useradd --uid $UID --gid $GID --groups sudo --no-create-home user + +# Ensure we can start postgres by sudo-ing as the postgres user. +RUN apt-get update && apt-get -qq install -y sudo +RUN echo "user ALL=(ALL) NOPASSWD: ALL" >> /etc/sudoers + +ADD run_pg_tests.sh /run_pg_tests.sh +# Use the "exec form" of ENTRYPOINT (https://docs.docker.com/engine/reference/builder/#entrypoint) +# so that we can `docker run` this container and pass arguments to pg_tests.sh +ENTRYPOINT ["/run_pg_tests.sh"] + +USER user diff --git a/docker/run_pg_tests.sh b/docker/run_pg_tests.sh index 1fd08cb62..58e2177d3 100755 --- a/docker/run_pg_tests.sh +++ b/docker/run_pg_tests.sh @@ -10,11 +10,10 @@ set -e # Set PGUSER so Synapse's tests know what user to connect to the database with export PGUSER=postgres -# Initialise & start the database -su -c '/usr/lib/postgresql/9.6/bin/initdb -D /var/lib/postgresql/data -E "UTF-8" --lc-collate="en_US.UTF-8" --lc-ctype="en_US.UTF-8" --username=postgres' postgres -su -c '/usr/lib/postgresql/9.6/bin/pg_ctl -w -D /var/lib/postgresql/data start' postgres +# Start the database +sudo -u postgres /usr/lib/postgresql/10/bin/pg_ctl -w -D /var/lib/postgresql/data start # Run the tests cd /src export TRIAL_FLAGS="-j 4" -tox --workdir=/tmp -e py35-postgres +tox --workdir=./.tox-pg-container -e py36-postgres "$@" diff --git a/docs/development/contributing_guide.md b/docs/development/contributing_guide.md index 97352b0f2..713366368 100644 --- a/docs/development/contributing_guide.md +++ b/docs/development/contributing_guide.md @@ -170,6 +170,53 @@ To increase the log level for the tests, set `SYNAPSE_TEST_LOG_LEVEL`: SYNAPSE_TEST_LOG_LEVEL=DEBUG trial tests ``` +### Running tests under PostgreSQL + +Invoking `trial` as above will use an in-memory SQLite database. This is great for +quick development and testing. However, we recommend using a PostgreSQL database +in production (and indeed, we have some code paths specific to each database). +This means that we need to run our unit tests against PostgreSQL too. Our CI does +this automatically for pull requests and release candidates, but it's sometimes +useful to reproduce this locally. + +To do so, [configure Postgres](../postgres.md) and run `trial` with the +following environment variables matching your configuration: + +- `SYNAPSE_POSTGRES` to anything nonempty +- `SYNAPSE_POSTGRES_HOST` +- `SYNAPSE_POSTGRES_USER` +- `SYNAPSE_POSTGRES_PASSWORD` + +For example: + +```shell +export SYNAPSE_POSTGRES=1 +export SYNAPSE_POSTGRES_HOST=localhost +export SYNAPSE_POSTGRES_USER=postgres +export SYNAPSE_POSTGRES_PASSWORD=mydevenvpassword +trial +``` + +#### Prebuilt container + +Since configuring PostgreSQL can be fiddly, we can make use of a pre-made +Docker container to set up PostgreSQL and run our tests for us. To do so, run + +```shell +scripts-dev/test_postgresql.sh +``` + +Any extra arguments to the script will be passed to `tox` and then to `trial`, +so we can run a specific test in this container with e.g. + +```shell +scripts-dev/test_postgresql.sh tests.replication.test_sharded_event_persister.EventPersisterShardTestCase +``` + +The container creates a folder in your Synapse checkout called +`.tox-pg-container` and uses this as a tox environment. The output of any +`trial` runs goes into `_trial_temp` in your synapse source directory — the same +as running `trial` directly on your host machine. ## Run the integration tests ([Sytest](https://github.com/matrix-org/sytest)). diff --git a/scripts-dev/test_postgresql.sh b/scripts-dev/test_postgresql.sh new file mode 100755 index 000000000..43cfa256e --- /dev/null +++ b/scripts-dev/test_postgresql.sh @@ -0,0 +1,19 @@ +#!/usr/bin/env bash + +# This script builds the Docker image to run the PostgreSQL tests, and then runs +# the tests. It uses a dedicated tox environment so that we don't have to +# rebuild it each time. + +# Command line arguments to this script are forwarded to "tox" and then to "trial". + +set -e + +# Build, and tag +docker build docker/ \ + --build-arg "UID=$(id -u)" \ + --build-arg "GID=$(id -g)" \ + -f docker/Dockerfile-pgtests \ + -t synapsepgtests + +# Run, mounting the current directory into /src +docker run --rm -it -v "$(pwd):/src" -v synapse-pg-test-tox:/tox synapsepgtests "$@" diff --git a/test_postgresql.sh b/test_postgresql.sh deleted file mode 100755 index c10828fbb..000000000 --- a/test_postgresql.sh +++ /dev/null @@ -1,12 +0,0 @@ -#!/usr/bin/env bash - -# This script builds the Docker image to run the PostgreSQL tests, and then runs -# the tests. - -set -e - -# Build, and tag -docker build docker/ -f docker/Dockerfile-pgtests -t synapsepgtests - -# Run, mounting the current directory into /src -docker run --rm -it -v $(pwd)\:/src synapsepgtests From b10257e87972d158f4b6a0c7d1fe7239014ea10a Mon Sep 17 00:00:00 2001 From: Brendan Abolivier <babolivier@matrix.org> Date: Fri, 24 Sep 2021 16:38:23 +0200 Subject: [PATCH 52/74] Add a spamchecker callback to allow or deny room creation based on invites (#10898) This is in the context of creating new module callbacks that modules in https://github.com/matrix-org/synapse-dinsic can use, in an effort to reconcile the spam checker API in synapse-dinsic with the one in mainline. This adds a callback that's fairly similar to user_may_create_room except it also allows processing based on the invites sent at room creation. --- changelog.d/10898.feature | 1 + docs/modules/spam_checker_callbacks.md | 29 ++++++ synapse/events/spamcheck.py | 42 +++++++++ synapse/handlers/room.py | 14 ++- tests/rest/client/test_rooms.py | 119 ++++++++++++++++++++++++- 5 files changed, 199 insertions(+), 6 deletions(-) create mode 100644 changelog.d/10898.feature diff --git a/changelog.d/10898.feature b/changelog.d/10898.feature new file mode 100644 index 000000000..97fa39fd0 --- /dev/null +++ b/changelog.d/10898.feature @@ -0,0 +1 @@ +Add a `user_may_create_room_with_invites` spam checker callback to allow modules to allow or deny a room creation request based on the invites and/or 3PID invites it includes. diff --git a/docs/modules/spam_checker_callbacks.md b/docs/modules/spam_checker_callbacks.md index 81574a015..7920ac5f8 100644 --- a/docs/modules/spam_checker_callbacks.md +++ b/docs/modules/spam_checker_callbacks.md @@ -38,6 +38,35 @@ async def user_may_create_room(user: str) -> bool Called when processing a room creation request. The module must return a `bool` indicating whether the given user (represented by their Matrix user ID) is allowed to create a room. +### `user_may_create_room_with_invites` + +```python +async def user_may_create_room_with_invites( + user: str, + invites: List[str], + threepid_invites: List[Dict[str, str]], +) -> bool +``` + +Called when processing a room creation request (right after `user_may_create_room`). +The module is given the Matrix user ID of the user trying to create a room, as well as a +list of Matrix users to invite and a list of third-party identifiers (3PID, e.g. email +addresses) to invite. + +An invited Matrix user to invite is represented by their Matrix user IDs, and an invited +3PIDs is represented by a dict that includes the 3PID medium (e.g. "email") through its +`medium` key and its address (e.g. "alice@example.com") through its `address` key. + +See [the Matrix specification](https://matrix.org/docs/spec/appendices#pid-types) for more +information regarding third-party identifiers. + +If no invite and/or 3PID invite were specified in the room creation request, the +corresponding list(s) will be empty. + +**Note**: This callback is not called when a room is cloned (e.g. during a room upgrade) +since no invites are sent when cloning a room. To cover this case, modules also need to +implement `user_may_create_room`. + ### `user_may_create_room_alias` ```python diff --git a/synapse/events/spamcheck.py b/synapse/events/spamcheck.py index 19ee246f9..c389f70b8 100644 --- a/synapse/events/spamcheck.py +++ b/synapse/events/spamcheck.py @@ -46,6 +46,9 @@ ] USER_MAY_INVITE_CALLBACK = Callable[[str, str, str], Awaitable[bool]] USER_MAY_CREATE_ROOM_CALLBACK = Callable[[str], Awaitable[bool]] +USER_MAY_CREATE_ROOM_WITH_INVITES_CALLBACK = Callable[ + [str, List[str], List[Dict[str, str]]], Awaitable[bool] +] USER_MAY_CREATE_ROOM_ALIAS_CALLBACK = Callable[[str, RoomAlias], Awaitable[bool]] USER_MAY_PUBLISH_ROOM_CALLBACK = Callable[[str, str], Awaitable[bool]] CHECK_USERNAME_FOR_SPAM_CALLBACK = Callable[[Dict[str, str]], Awaitable[bool]] @@ -164,6 +167,9 @@ def __init__(self): self._check_event_for_spam_callbacks: List[CHECK_EVENT_FOR_SPAM_CALLBACK] = [] self._user_may_invite_callbacks: List[USER_MAY_INVITE_CALLBACK] = [] self._user_may_create_room_callbacks: List[USER_MAY_CREATE_ROOM_CALLBACK] = [] + self._user_may_create_room_with_invites_callbacks: List[ + USER_MAY_CREATE_ROOM_WITH_INVITES_CALLBACK + ] = [] self._user_may_create_room_alias_callbacks: List[ USER_MAY_CREATE_ROOM_ALIAS_CALLBACK ] = [] @@ -183,6 +189,9 @@ def register_callbacks( check_event_for_spam: Optional[CHECK_EVENT_FOR_SPAM_CALLBACK] = None, user_may_invite: Optional[USER_MAY_INVITE_CALLBACK] = None, user_may_create_room: Optional[USER_MAY_CREATE_ROOM_CALLBACK] = None, + user_may_create_room_with_invites: Optional[ + USER_MAY_CREATE_ROOM_WITH_INVITES_CALLBACK + ] = None, user_may_create_room_alias: Optional[ USER_MAY_CREATE_ROOM_ALIAS_CALLBACK ] = None, @@ -203,6 +212,11 @@ def register_callbacks( if user_may_create_room is not None: self._user_may_create_room_callbacks.append(user_may_create_room) + if user_may_create_room_with_invites is not None: + self._user_may_create_room_with_invites_callbacks.append( + user_may_create_room_with_invites, + ) + if user_may_create_room_alias is not None: self._user_may_create_room_alias_callbacks.append( user_may_create_room_alias, @@ -283,6 +297,34 @@ async def user_may_create_room(self, userid: str) -> bool: return True + async def user_may_create_room_with_invites( + self, + userid: str, + invites: List[str], + threepid_invites: List[Dict[str, str]], + ) -> bool: + """Checks if a given user may create a room with invites + + If this method returns false, the creation request will be rejected. + + Args: + userid: The ID of the user attempting to create a room + invites: The IDs of the Matrix users to be invited if the room creation is + allowed. + threepid_invites: The threepids to be invited if the room creation is allowed, + as a dict including a "medium" key indicating the threepid's medium (e.g. + "email") and an "address" key indicating the threepid's address (e.g. + "alice@example.com") + + Returns: + True if the user may create the room, otherwise False + """ + for callback in self._user_may_create_room_with_invites_callbacks: + if await callback(userid, invites, threepid_invites) is False: + return False + + return True + async def user_may_create_room_alias( self, userid: str, room_alias: RoomAlias ) -> bool: diff --git a/synapse/handlers/room.py b/synapse/handlers/room.py index 408b7d7b7..8fede5e93 100644 --- a/synapse/handlers/room.py +++ b/synapse/handlers/room.py @@ -649,8 +649,16 @@ async def create_room( requester, config, is_requester_admin=is_requester_admin ) - if not is_requester_admin and not await self.spam_checker.user_may_create_room( - user_id + invite_3pid_list = config.get("invite_3pid", []) + invite_list = config.get("invite", []) + + if not is_requester_admin and not ( + await self.spam_checker.user_may_create_room(user_id) + and await self.spam_checker.user_may_create_room_with_invites( + user_id, + invite_list, + invite_3pid_list, + ) ): raise SynapseError(403, "You are not permitted to create rooms") @@ -684,8 +692,6 @@ async def create_room( if mapping: raise SynapseError(400, "Room alias already taken", Codes.ROOM_IN_USE) - invite_3pid_list = config.get("invite_3pid", []) - invite_list = config.get("invite", []) for i in invite_list: try: uid = UserID.from_string(i) diff --git a/tests/rest/client/test_rooms.py b/tests/rest/client/test_rooms.py index ef847f0f5..30bdaa9c2 100644 --- a/tests/rest/client/test_rooms.py +++ b/tests/rest/client/test_rooms.py @@ -18,7 +18,7 @@ """Tests REST events for /rooms paths.""" import json -from typing import Iterable +from typing import Dict, Iterable, List, Optional from unittest.mock import Mock, call from urllib import parse as urlparse @@ -30,7 +30,7 @@ from synapse.handlers.pagination import PurgeStatus from synapse.rest import admin from synapse.rest.client import account, directory, login, profile, room, sync -from synapse.types import JsonDict, RoomAlias, UserID, create_requester +from synapse.types import JsonDict, Requester, RoomAlias, UserID, create_requester from synapse.util.stringutils import random_string from tests import unittest @@ -669,6 +669,121 @@ def test_post_room_invitees_ratelimit(self): channel = self.make_request("POST", "/createRoom", content) self.assertEqual(200, channel.code) + def test_spamchecker_invites(self): + """Tests the user_may_create_room_with_invites spam checker callback.""" + + # Mock do_3pid_invite, so we don't fail from failing to send a 3PID invite to an + # IS. + async def do_3pid_invite( + room_id: str, + inviter: UserID, + medium: str, + address: str, + id_server: str, + requester: Requester, + txn_id: Optional[str], + id_access_token: Optional[str] = None, + ) -> int: + return 0 + + do_3pid_invite_mock = Mock(side_effect=do_3pid_invite) + self.hs.get_room_member_handler().do_3pid_invite = do_3pid_invite_mock + + # Add a mock callback for user_may_create_room_with_invites. Make it allow any + # room creation request for now. + return_value = True + + async def user_may_create_room_with_invites( + user: str, + invites: List[str], + threepid_invites: List[Dict[str, str]], + ) -> bool: + return return_value + + callback_mock = Mock(side_effect=user_may_create_room_with_invites) + self.hs.get_spam_checker()._user_may_create_room_with_invites_callbacks.append( + callback_mock, + ) + + # The MXIDs we'll try to invite. + invited_mxids = [ + "@alice1:red", + "@alice2:red", + "@alice3:red", + "@alice4:red", + ] + + # The 3PIDs we'll try to invite. + invited_3pids = [ + { + "id_server": "example.com", + "id_access_token": "sometoken", + "medium": "email", + "address": "alice1@example.com", + }, + { + "id_server": "example.com", + "id_access_token": "sometoken", + "medium": "email", + "address": "alice2@example.com", + }, + { + "id_server": "example.com", + "id_access_token": "sometoken", + "medium": "email", + "address": "alice3@example.com", + }, + ] + + # Create a room and invite the Matrix users, and check that it succeeded. + channel = self.make_request( + "POST", + "/createRoom", + json.dumps({"invite": invited_mxids}).encode("utf8"), + ) + self.assertEqual(200, channel.code) + + # Check that the callback was called with the right arguments. + expected_call_args = ((self.user_id, invited_mxids, []),) + self.assertEquals( + callback_mock.call_args, + expected_call_args, + callback_mock.call_args, + ) + + # Create a room and invite the 3PIDs, and check that it succeeded. + channel = self.make_request( + "POST", + "/createRoom", + json.dumps({"invite_3pid": invited_3pids}).encode("utf8"), + ) + self.assertEqual(200, channel.code) + + # Check that do_3pid_invite was called the right amount of time + self.assertEquals(do_3pid_invite_mock.call_count, len(invited_3pids)) + + # Check that the callback was called with the right arguments. + expected_call_args = ((self.user_id, [], invited_3pids),) + self.assertEquals( + callback_mock.call_args, + expected_call_args, + callback_mock.call_args, + ) + + # Now deny any room creation. + return_value = False + + # Create a room and invite the 3PIDs, and check that it failed. + channel = self.make_request( + "POST", + "/createRoom", + json.dumps({"invite_3pid": invited_3pids}).encode("utf8"), + ) + self.assertEqual(403, channel.code) + + # Check that do_3pid_invite wasn't called this time. + self.assertEquals(do_3pid_invite_mock.call_count, len(invited_3pids)) + class RoomTopicTestCase(RoomBase): """Tests /rooms/$room_id/topic REST events.""" From d138187045dd3c51689c19124d65ee62e37db755 Mon Sep 17 00:00:00 2001 From: Eric Eastwood <erice@element.io> Date: Fri, 24 Sep 2021 17:09:12 -0500 Subject: [PATCH 53/74] Document changes to schema version 61 - 64 (#10917) As pointed out by @richvdh, https://github.com/matrix-org/synapse/pull/10838#discussion_r715424244 Retroactively summarize `61` - `64` --- changelog.d/10917.misc | 1 + synapse/storage/schema/__init__.py | 11 +++++++++++ 2 files changed, 12 insertions(+) create mode 100644 changelog.d/10917.misc diff --git a/changelog.d/10917.misc b/changelog.d/10917.misc new file mode 100644 index 000000000..9ce6eef94 --- /dev/null +++ b/changelog.d/10917.misc @@ -0,0 +1 @@ +Document and summarize changes in schema version `61` - `64`. diff --git a/synapse/storage/schema/__init__.py b/synapse/storage/schema/__init__.py index aa2ce44c6..573e05a48 100644 --- a/synapse/storage/schema/__init__.py +++ b/synapse/storage/schema/__init__.py @@ -27,11 +27,22 @@ Changes in SCHEMA_VERSION = 61: - The `user_stats_historical` and `room_stats_historical` tables are not written and are not read (previously, they were written but not read). + - MSC2716: Add `insertion_events` and `insertion_event_edges` tables to keep track + of insertion events in order to navigate historical chunks of messages. + - MSC2716: Add `chunk_events` table to track how the chunk is labeled and + determines which insertion event it points to. + +Changes in SCHEMA_VERSION = 62: + - MSC2716: Add `insertion_event_extremities` table that keeps track of which + insertion events need to be backfilled. Changes in SCHEMA_VERSION = 63: - The `public_room_list_stream` table is not written nor read to (previously, it was written and read to, but not for any significant purpose). https://github.com/matrix-org/synapse/pull/10565 + +Changes in SCHEMA_VERSION = 64: + - MSC2716: Rename related tables and columns from "chunks" to "batches". """ From 6c83c2710760a4f551d1a925fc9b1a19ae8797c1 Mon Sep 17 00:00:00 2001 From: Sean Quah <8349537+squahtx@users.noreply.github.com> Date: Mon, 27 Sep 2021 11:29:23 +0100 Subject: [PATCH 54/74] Fix race conditions when creating media store and config directories (#10913) --- changelog.d/10913.bugfix | 1 + synapse/config/_base.py | 9 ++------- synapse/rest/media/v1/media_storage.py | 6 ++---- synapse/rest/media/v1/storage_provider.py | 3 +-- 4 files changed, 6 insertions(+), 13 deletions(-) create mode 100644 changelog.d/10913.bugfix diff --git a/changelog.d/10913.bugfix b/changelog.d/10913.bugfix new file mode 100644 index 000000000..a0015c824 --- /dev/null +++ b/changelog.d/10913.bugfix @@ -0,0 +1 @@ +Fix race conditions when creating media store and config directories. diff --git a/synapse/config/_base.py b/synapse/config/_base.py index 2cc242782..d974a1a2a 100644 --- a/synapse/config/_base.py +++ b/synapse/config/_base.py @@ -200,11 +200,7 @@ def check_file(cls, file_path, config_name): @classmethod def ensure_directory(cls, dir_path): dir_path = cls.abspath(dir_path) - try: - os.makedirs(dir_path) - except OSError as e: - if e.errno != errno.EEXIST: - raise + os.makedirs(dir_path, exist_ok=True) if not os.path.isdir(dir_path): raise ConfigError("%s is not a directory" % (dir_path,)) return dir_path @@ -693,8 +689,7 @@ def load_or_generate_config(cls, description, argv): open_private_ports=config_args.open_private_ports, ) - if not path_exists(config_dir_path): - os.makedirs(config_dir_path) + os.makedirs(config_dir_path, exist_ok=True) with open(config_path, "w") as config_file: config_file.write(config_str) config_file.write("\n\n# vim:ft=yaml") diff --git a/synapse/rest/media/v1/media_storage.py b/synapse/rest/media/v1/media_storage.py index 01fada8fb..fca239d8c 100644 --- a/synapse/rest/media/v1/media_storage.py +++ b/synapse/rest/media/v1/media_storage.py @@ -132,8 +132,7 @@ def store_into_file( fname = os.path.join(self.local_media_directory, path) dirname = os.path.dirname(fname) - if not os.path.exists(dirname): - os.makedirs(dirname) + os.makedirs(dirname, exist_ok=True) finished_called = [False] @@ -244,8 +243,7 @@ async def ensure_media_is_in_local_cache(self, file_info: FileInfo) -> str: return legacy_local_path dirname = os.path.dirname(local_path) - if not os.path.exists(dirname): - os.makedirs(dirname) + os.makedirs(dirname, exist_ok=True) for provider in self.storage_providers: res: Any = await provider.fetch(path, file_info) diff --git a/synapse/rest/media/v1/storage_provider.py b/synapse/rest/media/v1/storage_provider.py index 289e4297f..da78fcee5 100644 --- a/synapse/rest/media/v1/storage_provider.py +++ b/synapse/rest/media/v1/storage_provider.py @@ -138,8 +138,7 @@ async def store_file(self, path: str, file_info: FileInfo) -> None: backup_fname = os.path.join(self.base_directory, path) dirname = os.path.dirname(backup_fname) - if not os.path.exists(dirname): - os.makedirs(dirname) + os.makedirs(dirname, exist_ok=True) await defer_to_thread( self.hs.get_reactor(), shutil.copyfile, primary_fname, backup_fname From f7768f62cbf7579a1a91e694f83d47d275373369 Mon Sep 17 00:00:00 2001 From: Sean Quah <8349537+squahtx@users.noreply.github.com> Date: Mon, 27 Sep 2021 12:55:27 +0100 Subject: [PATCH 55/74] Avoid storing URL cache files in storage providers (#10911) URL cache files are short-lived and it does not make sense to offload them (eg. to the cloud) or back them up. --- changelog.d/10911.bugfix | 1 + docs/upgrade.md | 7 + synapse/rest/media/v1/filepath.py | 11 +- synapse/rest/media/v1/preview_url_resource.py | 1 - synapse/rest/media/v1/storage_provider.py | 10 ++ tests/rest/media/v1/test_url_preview.py | 130 ++++++++++++++++++ 6 files changed, 154 insertions(+), 6 deletions(-) create mode 100644 changelog.d/10911.bugfix diff --git a/changelog.d/10911.bugfix b/changelog.d/10911.bugfix new file mode 100644 index 000000000..96e36bb15 --- /dev/null +++ b/changelog.d/10911.bugfix @@ -0,0 +1 @@ +Avoid storing URL cache files in storage providers. Server admins may safely delete the `url_cache/` and `url_cache_thumbnails/` directories from any configured storage providers to reclaim space. diff --git a/docs/upgrade.md b/docs/upgrade.md index f9b832cb3..a8221372d 100644 --- a/docs/upgrade.md +++ b/docs/upgrade.md @@ -85,6 +85,13 @@ process, for example: dpkg -i matrix-synapse-py3_1.3.0+stretch1_amd64.deb ``` +# Upgrading to v1.44.0 + +## The URL preview cache is no longer mirrored to storage providers +The `url_cache/` and `url_cache_thumbnails/` directories in the media store are +no longer mirrored to storage providers. These two directories can be safely +deleted from any configured storage providers to reclaim space. + # Upgrading to v1.43.0 ## The spaces summary APIs can now be handled by workers diff --git a/synapse/rest/media/v1/filepath.py b/synapse/rest/media/v1/filepath.py index 39bbe4e87..08bd85f66 100644 --- a/synapse/rest/media/v1/filepath.py +++ b/synapse/rest/media/v1/filepath.py @@ -195,23 +195,24 @@ def url_cache_thumbnail_rel( url_cache_thumbnail = _wrap_in_base_path(url_cache_thumbnail_rel) - def url_cache_thumbnail_directory(self, media_id: str) -> str: + def url_cache_thumbnail_directory_rel(self, media_id: str) -> str: # Media id is of the form <DATE><RANDOM_STRING> # E.g.: 2017-09-28-fsdRDt24DS234dsf if NEW_FORMAT_ID_RE.match(media_id): - return os.path.join( - self.base_path, "url_cache_thumbnails", media_id[:10], media_id[11:] - ) + return os.path.join("url_cache_thumbnails", media_id[:10], media_id[11:]) else: return os.path.join( - self.base_path, "url_cache_thumbnails", media_id[0:2], media_id[2:4], media_id[4:], ) + url_cache_thumbnail_directory = _wrap_in_base_path( + url_cache_thumbnail_directory_rel + ) + def url_cache_thumbnail_dirs_to_delete(self, media_id: str) -> List[str]: "The dirs to try and remove if we delete the media_id thumbnails" # Media id is of the form <DATE><RANDOM_STRING> diff --git a/synapse/rest/media/v1/preview_url_resource.py b/synapse/rest/media/v1/preview_url_resource.py index 0b0c4d646..79a42b245 100644 --- a/synapse/rest/media/v1/preview_url_resource.py +++ b/synapse/rest/media/v1/preview_url_resource.py @@ -485,7 +485,6 @@ def _start_expire_url_cache_data(self) -> Deferred: async def _expire_url_cache_data(self) -> None: """Clean up expired url cache content, media and thumbnails.""" - # TODO: Delete from backup media store assert self._worker_run_media_background_jobs diff --git a/synapse/rest/media/v1/storage_provider.py b/synapse/rest/media/v1/storage_provider.py index da78fcee5..18bf977d3 100644 --- a/synapse/rest/media/v1/storage_provider.py +++ b/synapse/rest/media/v1/storage_provider.py @@ -93,6 +93,11 @@ async def store_file(self, path: str, file_info: FileInfo) -> None: if file_info.server_name and not self.store_remote: return None + if file_info.url_cache: + # The URL preview cache is short lived and not worth offloading or + # backing up. + return None + if self.store_synchronous: # store_file is supposed to return an Awaitable, but guard # against improper implementations. @@ -110,6 +115,11 @@ async def store() -> None: run_in_background(store) async def fetch(self, path: str, file_info: FileInfo) -> Optional[Responder]: + if file_info.url_cache: + # Files in the URL preview cache definitely aren't stored here, + # so avoid any potentially slow I/O or network access. + return None + # store_file is supposed to return an Awaitable, but guard # against improper implementations. return await maybe_awaitable(self.backend.fetch(path, file_info)) diff --git a/tests/rest/media/v1/test_url_preview.py b/tests/rest/media/v1/test_url_preview.py index d83dfacfe..4d09b5d07 100644 --- a/tests/rest/media/v1/test_url_preview.py +++ b/tests/rest/media/v1/test_url_preview.py @@ -21,6 +21,7 @@ from twisted.test.proto_helpers import AccumulatingProtocol from synapse.config.oembed import OEmbedEndpointConfig +from synapse.util.stringutils import parse_and_validate_mxc_uri from tests import unittest from tests.server import FakeTransport @@ -721,3 +722,132 @@ def test_oembed_format(self): "og:description": "Content Preview", }, ) + + def _download_image(self): + """Downloads an image into the URL cache. + + Returns: + A (host, media_id) tuple representing the MXC URI of the image. + """ + self.lookups["cdn.twitter.com"] = [(IPv4Address, "10.1.2.3")] + + channel = self.make_request( + "GET", + "preview_url?url=http://cdn.twitter.com/matrixdotorg", + shorthand=False, + await_result=False, + ) + self.pump() + + client = self.reactor.tcpClients[0][2].buildProtocol(None) + server = AccumulatingProtocol() + server.makeConnection(FakeTransport(client, self.reactor)) + client.makeConnection(FakeTransport(server, self.reactor)) + client.dataReceived( + b"HTTP/1.0 200 OK\r\nContent-Length: %d\r\nContent-Type: image/png\r\n\r\n" + % (len(SMALL_PNG),) + + SMALL_PNG + ) + + self.pump() + self.assertEqual(channel.code, 200) + body = channel.json_body + mxc_uri = body["og:image"] + host, _port, media_id = parse_and_validate_mxc_uri(mxc_uri) + self.assertIsNone(_port) + return host, media_id + + def test_storage_providers_exclude_files(self): + """Test that files are not stored in or fetched from storage providers.""" + host, media_id = self._download_image() + + rel_file_path = self.preview_url.filepaths.url_cache_filepath_rel(media_id) + media_store_path = os.path.join(self.media_store_path, rel_file_path) + storage_provider_path = os.path.join(self.storage_path, rel_file_path) + + # Check storage + self.assertTrue(os.path.isfile(media_store_path)) + self.assertFalse( + os.path.isfile(storage_provider_path), + "URL cache file was unexpectedly stored in a storage provider", + ) + + # Check fetching + channel = self.make_request( + "GET", + f"download/{host}/{media_id}", + shorthand=False, + await_result=False, + ) + self.pump() + self.assertEqual(channel.code, 200) + + # Move cached file into the storage provider + os.makedirs(os.path.dirname(storage_provider_path), exist_ok=True) + os.rename(media_store_path, storage_provider_path) + + channel = self.make_request( + "GET", + f"download/{host}/{media_id}", + shorthand=False, + await_result=False, + ) + self.pump() + self.assertEqual( + channel.code, + 404, + "URL cache file was unexpectedly retrieved from a storage provider", + ) + + def test_storage_providers_exclude_thumbnails(self): + """Test that thumbnails are not stored in or fetched from storage providers.""" + host, media_id = self._download_image() + + rel_thumbnail_path = ( + self.preview_url.filepaths.url_cache_thumbnail_directory_rel(media_id) + ) + media_store_thumbnail_path = os.path.join( + self.media_store_path, rel_thumbnail_path + ) + storage_provider_thumbnail_path = os.path.join( + self.storage_path, rel_thumbnail_path + ) + + # Check storage + self.assertTrue(os.path.isdir(media_store_thumbnail_path)) + self.assertFalse( + os.path.isdir(storage_provider_thumbnail_path), + "URL cache thumbnails were unexpectedly stored in a storage provider", + ) + + # Check fetching + channel = self.make_request( + "GET", + f"thumbnail/{host}/{media_id}?width=32&height=32&method=scale", + shorthand=False, + await_result=False, + ) + self.pump() + self.assertEqual(channel.code, 200) + + # Remove the original, otherwise thumbnails will regenerate + rel_file_path = self.preview_url.filepaths.url_cache_filepath_rel(media_id) + media_store_path = os.path.join(self.media_store_path, rel_file_path) + os.remove(media_store_path) + + # Move cached thumbnails into the storage provider + os.makedirs(os.path.dirname(storage_provider_thumbnail_path), exist_ok=True) + os.rename(media_store_thumbnail_path, storage_provider_thumbnail_path) + + channel = self.make_request( + "GET", + f"thumbnail/{host}/{media_id}?width=32&height=32&method=scale", + shorthand=False, + await_result=False, + ) + self.pump() + self.assertEqual( + channel.code, + 404, + "URL cache thumbnail was unexpectedly retrieved from a storage provider", + ) From d37841787a9e152938ddb39af5bc1d93d04bc640 Mon Sep 17 00:00:00 2001 From: Erik Johnston <erik@matrix.org> Date: Mon, 27 Sep 2021 15:39:49 +0100 Subject: [PATCH 56/74] Sign the git tag in release script (#10925) --- changelog.d/10925.misc | 1 + scripts-dev/release.py | 2 +- 2 files changed, 2 insertions(+), 1 deletion(-) create mode 100644 changelog.d/10925.misc diff --git a/changelog.d/10925.misc b/changelog.d/10925.misc new file mode 100644 index 000000000..0c8027ecc --- /dev/null +++ b/changelog.d/10925.misc @@ -0,0 +1 @@ +Update release script to sign the newly created git tags. diff --git a/scripts-dev/release.py b/scripts-dev/release.py index a339260c4..ab2d860ab 100755 --- a/scripts-dev/release.py +++ b/scripts-dev/release.py @@ -276,7 +276,7 @@ def tag(gh_token: Optional[str]): if click.confirm("Edit text?", default=False): changes = click.edit(changes, require_save=False) - repo.create_tag(tag_name, message=changes) + repo.create_tag(tag_name, message=changes, sign=True) if not click.confirm("Push tag to GitHub?", default=True): print("") From 707d5e4e48e839dabd34e4b67426fe8382a2c978 Mon Sep 17 00:00:00 2001 From: Erik Johnston <erik@matrix.org> Date: Tue, 28 Sep 2021 10:37:58 +0100 Subject: [PATCH 57/74] Encode JSON responses on a thread in C, mk2 (#10905) Currently we use `JsonEncoder.iterencode` to write JSON responses, which ensures that we don't block the main reactor thread when encoding huge objects. The downside to this is that `iterencode` falls back to using a pure Python encoder that is *much* less efficient and can easily burn a lot of CPU for huge responses. To fix this, while still ensuring we don't block the reactor loop, we encode the JSON on a threadpool using the standard `JsonEncoder.encode` functions, which is backed by a C library. Doing so, however, requires `respond_with_json` to have access to the reactor, which it previously didn't. There are two ways of doing this: 1. threading through the reactor object, which is a bit fiddly as e.g. `DirectServeJsonResource` doesn't currently take a reactor, but is exposed to modules and so is a PITA to change; or 2. expose the reactor in `SynapseRequest`, which requires updating a bunch of servlet types. I went with the latter as that is just a mechanical change, and I think makes sense as a request already has a reactor associated with it (via its http channel). --- changelog.d/10905.feature | 1 + synapse/http/server.py | 72 +++++++++++++++++++++++++++++-------- synapse/push/emailpusher.py | 2 +- synapse/util/iterutils.py | 19 ++++++++-- 4 files changed, 76 insertions(+), 18 deletions(-) create mode 100644 changelog.d/10905.feature diff --git a/changelog.d/10905.feature b/changelog.d/10905.feature new file mode 100644 index 000000000..07e7b2c6a --- /dev/null +++ b/changelog.d/10905.feature @@ -0,0 +1 @@ +Speed up responding with large JSON objects to requests. diff --git a/synapse/http/server.py b/synapse/http/server.py index e28b56abb..1a50305dc 100644 --- a/synapse/http/server.py +++ b/synapse/http/server.py @@ -21,7 +21,6 @@ import urllib from http import HTTPStatus from inspect import isawaitable -from io import BytesIO from typing import ( Any, Awaitable, @@ -37,7 +36,7 @@ ) import jinja2 -from canonicaljson import iterencode_canonical_json +from canonicaljson import encode_canonical_json from typing_extensions import Protocol from zope.interface import implementer @@ -45,7 +44,7 @@ from twisted.python import failure from twisted.web import resource from twisted.web.server import NOT_DONE_YET, Request -from twisted.web.static import File, NoRangeStaticProducer +from twisted.web.static import File from twisted.web.util import redirectTo from synapse.api.errors import ( @@ -56,10 +55,11 @@ UnrecognizedRequestError, ) from synapse.http.site import SynapseRequest -from synapse.logging.context import preserve_fn +from synapse.logging.context import defer_to_thread, preserve_fn, run_in_background from synapse.logging.opentracing import trace_servlet from synapse.util import json_encoder from synapse.util.caches import intern_dict +from synapse.util.iterutils import chunk_seq logger = logging.getLogger(__name__) @@ -620,12 +620,11 @@ def stopProducing(self) -> None: self._request = None -def _encode_json_bytes(json_object: Any) -> Iterator[bytes]: +def _encode_json_bytes(json_object: Any) -> bytes: """ Encode an object into JSON. Returns an iterator of bytes. """ - for chunk in json_encoder.iterencode(json_object): - yield chunk.encode("utf-8") + return json_encoder.encode(json_object).encode("utf-8") def respond_with_json( @@ -659,7 +658,7 @@ def respond_with_json( return None if canonical_json: - encoder = iterencode_canonical_json + encoder = encode_canonical_json else: encoder = _encode_json_bytes @@ -670,7 +669,9 @@ def respond_with_json( if send_cors: set_cors_headers(request) - _ByteProducer(request, encoder(json_object)) + run_in_background( + _async_write_json_to_request_in_thread, request, encoder, json_object + ) return NOT_DONE_YET @@ -706,15 +707,56 @@ def respond_with_json_bytes( if send_cors: set_cors_headers(request) - # note that this is zero-copy (the bytesio shares a copy-on-write buffer with - # the original `bytes`). - bytes_io = BytesIO(json_bytes) - - producer = NoRangeStaticProducer(request, bytes_io) - producer.start() + _write_bytes_to_request(request, json_bytes) return NOT_DONE_YET +async def _async_write_json_to_request_in_thread( + request: SynapseRequest, + json_encoder: Callable[[Any], bytes], + json_object: Any, +): + """Encodes the given JSON object on a thread and then writes it to the + request. + + This is done so that encoding large JSON objects doesn't block the reactor + thread. + + Note: We don't use JsonEncoder.iterencode here as that falls back to the + Python implementation (rather than the C backend), which is *much* more + expensive. + """ + + json_str = await defer_to_thread(request.reactor, json_encoder, json_object) + + _write_bytes_to_request(request, json_str) + + +def _write_bytes_to_request(request: Request, bytes_to_write: bytes) -> None: + """Writes the bytes to the request using an appropriate producer. + + Note: This should be used instead of `Request.write` to correctly handle + large response bodies. + """ + + # The problem with dumping all of the response into the `Request` object at + # once (via `Request.write`) is that doing so starts the timeout for the + # next request to be received: so if it takes longer than 60s to stream back + # the response to the client, the client never gets it. + # + # The correct solution is to use a Producer; then the timeout is only + # started once all of the content is sent over the TCP connection. + + # To make sure we don't write all of the bytes at once we split it up into + # chunks. + chunk_size = 4096 + bytes_generator = chunk_seq(bytes_to_write, chunk_size) + + # We use a `_ByteProducer` here rather than `NoRangeStaticProducer` as the + # unit tests can't cope with being given a pull producer. + _ByteProducer(request, bytes_generator) + + def set_cors_headers(request: Request): """Set the CORS headers so that javascript running in a web browsers can use this API diff --git a/synapse/push/emailpusher.py b/synapse/push/emailpusher.py index e08e125cb..cf5abdfbd 100644 --- a/synapse/push/emailpusher.py +++ b/synapse/push/emailpusher.py @@ -184,7 +184,7 @@ async def _unsafe_process(self) -> None: should_notify_at = max(notif_ready_at, room_ready_at) - if should_notify_at < self.clock.time_msec(): + if should_notify_at <= self.clock.time_msec(): # one of our notifications is ready for sending, so we send # *one* email updating the user on their notifications, # we then consider all previously outstanding notifications diff --git a/synapse/util/iterutils.py b/synapse/util/iterutils.py index 8ac3eab2f..4938ddf70 100644 --- a/synapse/util/iterutils.py +++ b/synapse/util/iterutils.py @@ -21,13 +21,28 @@ Iterable, Iterator, Mapping, - Sequence, Set, + Sized, Tuple, TypeVar, ) +from typing_extensions import Protocol + T = TypeVar("T") +S = TypeVar("S", bound="_SelfSlice") + + +class _SelfSlice(Sized, Protocol): + """A helper protocol that matches types where taking a slice results in the + same type being returned. + + This is more specific than `Sequence`, which allows another `Sequence` to be + returned. + """ + + def __getitem__(self: S, i: slice) -> S: + ... def batch_iter(iterable: Iterable[T], size: int) -> Iterator[Tuple[T, ...]]: @@ -46,7 +61,7 @@ def batch_iter(iterable: Iterable[T], size: int) -> Iterator[Tuple[T, ...]]: return iter(lambda: tuple(islice(sourceiter, size)), ()) -def chunk_seq(iseq: Sequence[T], maxlen: int) -> Iterable[Sequence[T]]: +def chunk_seq(iseq: S, maxlen: int) -> Iterator[S]: """Split the given sequence into chunks of the given size The last chunk may be shorter than the given size. From a8bbf085761095c49b04af1a08fc67b1a781617d Mon Sep 17 00:00:00 2001 From: Erik Johnston <erik@matrix.org> Date: Tue, 28 Sep 2021 12:13:51 +0100 Subject: [PATCH 58/74] Fix debian package builds. (#10931) This was due to dh-virtualenv builds being broken due to Shpinx removing deprecated APIs. --- changelog.d/10931.bugfix | 1 + docker/Dockerfile-dhvirtualenv | 5 +++-- 2 files changed, 4 insertions(+), 2 deletions(-) create mode 100644 changelog.d/10931.bugfix diff --git a/changelog.d/10931.bugfix b/changelog.d/10931.bugfix new file mode 100644 index 000000000..3f30c9ccf --- /dev/null +++ b/changelog.d/10931.bugfix @@ -0,0 +1 @@ +Fix debian builds due to dh-virtualenv no longer being able to build their docs. diff --git a/docker/Dockerfile-dhvirtualenv b/docker/Dockerfile-dhvirtualenv index 017be8555..1dd88140c 100644 --- a/docker/Dockerfile-dhvirtualenv +++ b/docker/Dockerfile-dhvirtualenv @@ -47,8 +47,9 @@ RUN apt-get update -qq -o Acquire::Languages=none \ && cd /dh-virtualenv \ && env DEBIAN_FRONTEND=noninteractive mk-build-deps -ri -t "apt-get -y --no-install-recommends" -# build it -RUN cd /dh-virtualenv && dpkg-buildpackage -us -uc -b +# Build it. Note that building the docs doesn't work due to differences in +# Sphinx APIs across versions/distros. +RUN cd /dh-virtualenv && DEB_BUILD_OPTIONS=nodoc dpkg-buildpackage -us -uc -b ### ### Stage 1 From 3c50192d3f564ecc2e70441157f309610bbee1cd Mon Sep 17 00:00:00 2001 From: Erik Johnston <erik@matrix.org> Date: Tue, 28 Sep 2021 13:42:21 +0100 Subject: [PATCH 59/74] 1.44.0rc1 --- CHANGES.md | 72 +++++++++++++++++++++++++++++++++++++++ changelog.d/10659.misc | 1 - changelog.d/10690.bugfix | 1 - changelog.d/10776.feature | 1 - changelog.d/10777.misc | 1 - changelog.d/10782.bugfix | 1 - changelog.d/10785.misc | 1 - changelog.d/10796.misc | 1 - changelog.d/10807.bugfix | 1 - changelog.d/10810.bugfix | 1 - changelog.d/10812.misc | 1 - changelog.d/10814.feature | 1 - changelog.d/10815.misc | 1 - changelog.d/10816.misc | 1 - changelog.d/10817.misc | 1 - changelog.d/10819.feature | 1 - changelog.d/10820.misc | 1 - changelog.d/10823.misc | 1 - changelog.d/10826.misc | 2 -- changelog.d/10827.bugfix | 1 - changelog.d/10829.misc | 1 - changelog.d/10831.misc | 1 - changelog.d/10833.misc | 1 - changelog.d/10834.misc | 1 - changelog.d/10835.misc | 1 - changelog.d/10838.misc | 1 - changelog.d/10839.misc | 1 - changelog.d/10843.bugfix | 1 - changelog.d/10845.doc | 1 - changelog.d/10856.misc | 1 - changelog.d/10859.bugfix | 1 - changelog.d/10865.doc | 1 - changelog.d/10867.misc | 1 - changelog.d/10868.feature | 1 - changelog.d/10869.doc | 1 - changelog.d/10873.bugfix | 1 - changelog.d/10875.bugfix | 1 - changelog.d/10879.misc | 1 - changelog.d/10880.misc | 1 - changelog.d/10881.bugfix | 1 - changelog.d/10883.misc | 1 - changelog.d/10884.misc | 1 - changelog.d/10885.misc | 1 - changelog.d/10887.bugfix | 1 - changelog.d/10889.misc | 1 - changelog.d/10891.misc | 1 - changelog.d/10893.misc | 1 - changelog.d/10896.misc | 1 - changelog.d/10897.misc | 1 - changelog.d/10898.feature | 1 - changelog.d/10901.misc | 1 - changelog.d/10905.feature | 1 - changelog.d/10906.misc | 1 - changelog.d/10907.bugfix | 1 - changelog.d/10911.bugfix | 1 - changelog.d/10913.bugfix | 1 - changelog.d/10917.misc | 1 - changelog.d/10925.misc | 1 - changelog.d/10931.bugfix | 1 - debian/changelog | 6 ++++ synapse/__init__.py | 2 +- 61 files changed, 79 insertions(+), 60 deletions(-) delete mode 100644 changelog.d/10659.misc delete mode 100644 changelog.d/10690.bugfix delete mode 100644 changelog.d/10776.feature delete mode 100644 changelog.d/10777.misc delete mode 100644 changelog.d/10782.bugfix delete mode 100644 changelog.d/10785.misc delete mode 100644 changelog.d/10796.misc delete mode 100644 changelog.d/10807.bugfix delete mode 100644 changelog.d/10810.bugfix delete mode 100644 changelog.d/10812.misc delete mode 100644 changelog.d/10814.feature delete mode 100644 changelog.d/10815.misc delete mode 100644 changelog.d/10816.misc delete mode 100644 changelog.d/10817.misc delete mode 100644 changelog.d/10819.feature delete mode 100644 changelog.d/10820.misc delete mode 100644 changelog.d/10823.misc delete mode 100644 changelog.d/10826.misc delete mode 100644 changelog.d/10827.bugfix delete mode 100644 changelog.d/10829.misc delete mode 100644 changelog.d/10831.misc delete mode 100644 changelog.d/10833.misc delete mode 100644 changelog.d/10834.misc delete mode 100644 changelog.d/10835.misc delete mode 100644 changelog.d/10838.misc delete mode 100644 changelog.d/10839.misc delete mode 100644 changelog.d/10843.bugfix delete mode 100644 changelog.d/10845.doc delete mode 100644 changelog.d/10856.misc delete mode 100644 changelog.d/10859.bugfix delete mode 100644 changelog.d/10865.doc delete mode 100644 changelog.d/10867.misc delete mode 100644 changelog.d/10868.feature delete mode 100644 changelog.d/10869.doc delete mode 100644 changelog.d/10873.bugfix delete mode 100644 changelog.d/10875.bugfix delete mode 100644 changelog.d/10879.misc delete mode 100644 changelog.d/10880.misc delete mode 100644 changelog.d/10881.bugfix delete mode 100644 changelog.d/10883.misc delete mode 100644 changelog.d/10884.misc delete mode 100644 changelog.d/10885.misc delete mode 100644 changelog.d/10887.bugfix delete mode 100644 changelog.d/10889.misc delete mode 100644 changelog.d/10891.misc delete mode 100644 changelog.d/10893.misc delete mode 100644 changelog.d/10896.misc delete mode 100644 changelog.d/10897.misc delete mode 100644 changelog.d/10898.feature delete mode 100644 changelog.d/10901.misc delete mode 100644 changelog.d/10905.feature delete mode 100644 changelog.d/10906.misc delete mode 100644 changelog.d/10907.bugfix delete mode 100644 changelog.d/10911.bugfix delete mode 100644 changelog.d/10913.bugfix delete mode 100644 changelog.d/10917.misc delete mode 100644 changelog.d/10925.misc delete mode 100644 changelog.d/10931.bugfix diff --git a/CHANGES.md b/CHANGES.md index 652f4b795..da4d98ac2 100644 --- a/CHANGES.md +++ b/CHANGES.md @@ -1,3 +1,75 @@ +Synapse 1.44.0rc1 (2021-09-28) +============================== + +Features +-------- + +- Only allow the [MSC2716](https://github.com/matrix-org/matrix-doc/pull/2716) `/batch_send?chunk_id=xxx` endpoint to connect to an already existing insertion event. ([\#10776](https://github.com/matrix-org/synapse/issues/10776)) +- Improve oEmbed previews by processing the author name, photo, and video information. ([\#10814](https://github.com/matrix-org/synapse/issues/10814), [\#10819](https://github.com/matrix-org/synapse/issues/10819)) +- Speed up responding with large JSON objects to requests. ([\#10868](https://github.com/matrix-org/synapse/issues/10868), [\#10905](https://github.com/matrix-org/synapse/issues/10905)) +- Add a `user_may_create_room_with_invites` spam checker callback to allow modules to allow or deny a room creation request based on the invites and/or 3PID invites it includes. ([\#10898](https://github.com/matrix-org/synapse/issues/10898)) + + +Bugfixes +-------- + +- Fix a long-standing bug that caused an `AssertionError` when purging history in certain rooms. Contributed by @Kokokokoka. ([\#10690](https://github.com/matrix-org/synapse/issues/10690)) +- Fix a long-standing bug which caused deactivated users that were later reactivated to be missing from the user directory. ([\#10782](https://github.com/matrix-org/synapse/issues/10782)) +- Allow sending a membership event to unban a user. Contributed by @aaronraimist. ([\#10807](https://github.com/matrix-org/synapse/issues/10807)) +- Fix a case where logging contexts would go missing when federation requests time out. ([\#10810](https://github.com/matrix-org/synapse/issues/10810)) +- Fix error in deprecated `/initialSync` endpoint when using the undocumented `from` and `to` parameters. ([\#10827](https://github.com/matrix-org/synapse/issues/10827)) +- Fix a bug causing the `remove_stale_pushers` background job to repeatedly fail and log errors. This bug affected Synapse servers that had been upgraded from version 1.28 or older and are using SQLite. ([\#10843](https://github.com/matrix-org/synapse/issues/10843)) +- Fix a bug in Unicode support of the room search admin API. It is now possible to search for rooms with non-ASCII characters. ([\#10859](https://github.com/matrix-org/synapse/issues/10859)) +- Fix a bug introduced in Synapse 1.37.0 which caused `knock` events which we sent to remote servers to be incorrectly stored in the local database. ([\#10873](https://github.com/matrix-org/synapse/issues/10873)) +- Fix invalidating one-time key count cache after claiming keys. Contributed by Tulir at Beeper. ([\#10875](https://github.com/matrix-org/synapse/issues/10875)) +- Fix application service users being subject to MAU blocking if MAU had been reached, even if configured not to be blocked. ([\#10881](https://github.com/matrix-org/synapse/issues/10881)) +- Allow the `.` and `~` characters when creating registration tokens as per the change to [MSC3231](https://github.com/matrix-org/matrix-doc/pull/3231). ([\#10887](https://github.com/matrix-org/synapse/issues/10887)) +- Fix a long-standing bug which could cause events pulled over federation to be incorrectly rejected. ([\#10907](https://github.com/matrix-org/synapse/issues/10907)) +- Avoid storing URL cache files in storage providers. Server admins may safely delete the `url_cache/` and `url_cache_thumbnails/` directories from any configured storage providers to reclaim space. ([\#10911](https://github.com/matrix-org/synapse/issues/10911)) +- Fix race conditions when creating media store and config directories. ([\#10913](https://github.com/matrix-org/synapse/issues/10913)) +- Fix debian builds due to dh-virtualenv no longer being able to build their docs. ([\#10931](https://github.com/matrix-org/synapse/issues/10931)) + + +Improved Documentation +---------------------- + +- Fix some crashes in the Module API example code, by adding JSON encoding/decoding. ([\#10845](https://github.com/matrix-org/synapse/issues/10845)) +- Add developer documentation about experimental configuration flags. ([\#10865](https://github.com/matrix-org/synapse/issues/10865)) +- Properly remove deleted files from GitHub pages when generating the documentation. ([\#10869](https://github.com/matrix-org/synapse/issues/10869)) + + +Internal Changes +---------------- + +- Fix GitHub Actions config so we can run sytest on synapse from parallel branches. ([\#10659](https://github.com/matrix-org/synapse/issues/10659)) +- Split out [MSC2716](https://github.com/matrix-org/matrix-doc/pull/2716) meta events to their own fields in the `/batch_send` response. ([\#10777](https://github.com/matrix-org/synapse/issues/10777)) +- Add missing type hints to REST servlets. ([\#10785](https://github.com/matrix-org/synapse/issues/10785), [\#10817](https://github.com/matrix-org/synapse/issues/10817)) +- Simplify the internal logic which maintains the user directory database tables. ([\#10796](https://github.com/matrix-org/synapse/issues/10796)) +- Use direct references to config flags. ([\#10812](https://github.com/matrix-org/synapse/issues/10812), [\#10885](https://github.com/matrix-org/synapse/issues/10885), [\#10893](https://github.com/matrix-org/synapse/issues/10893), [\#10897](https://github.com/matrix-org/synapse/issues/10897)) +- Specify the type of token in generic "Invalid token" error messages. ([\#10815](https://github.com/matrix-org/synapse/issues/10815)) +- Make `StateFilter` frozen so it is hashable. ([\#10816](https://github.com/matrix-org/synapse/issues/10816)) +- Fix a long-standing bug where an `m.room.message` event containing a null byte would cause an internal server error. ([\#10820](https://github.com/matrix-org/synapse/issues/10820)) +- Add type hints to the state database. ([\#10823](https://github.com/matrix-org/synapse/issues/10823)) +- Opt out of cache expiry for `get_users_who_share_room_with_user`, to hopefully improve `/sync` performance when you + haven't synced recently. ([\#10826](https://github.com/matrix-org/synapse/issues/10826)) +- Track cache eviction rates more finely in Prometheus' monitoring. ([\#10829](https://github.com/matrix-org/synapse/issues/10829)) +- Add missing type hints to handlers. ([\#10831](https://github.com/matrix-org/synapse/issues/10831), [\#10856](https://github.com/matrix-org/synapse/issues/10856)) +- Extend the ModuleApi to let plug-ins check whether an ID is local and to access IP + User Agent data. ([\#10833](https://github.com/matrix-org/synapse/issues/10833)) +- Factor out PNG image data to a constant to be used in several tests. ([\#10834](https://github.com/matrix-org/synapse/issues/10834)) +- Add a test to ensure state events sent by modules get persisted correctly. ([\#10835](https://github.com/matrix-org/synapse/issues/10835)) +- Rename [MSC2716](https://github.com/matrix-org/matrix-doc/pull/2716) fields and event types from `chunk` to `batch` to match the `/batch_send` endpoint. ([\#10838](https://github.com/matrix-org/synapse/issues/10838)) +- Rename [MSC2716](https://github.com/matrix-org/matrix-doc/pull/2716) `/batch_send` query parameter from `?prev_event` to more obvious usage with `?prev_event_id`. ([\#10839](https://github.com/matrix-org/synapse/issues/10839)) +- Add type hints to `synapse.http.site`. ([\#10867](https://github.com/matrix-org/synapse/issues/10867)) +- Include outlier status when we log V2 or V3 events. ([\#10879](https://github.com/matrix-org/synapse/issues/10879)) +- Break down Grafana's cache expiry time series based on reason for eviction---see #10829. ([\#10880](https://github.com/matrix-org/synapse/issues/10880)) +- Clean up some of the federation event authentication code for clarity. ([\#10883](https://github.com/matrix-org/synapse/issues/10883), [\#10884](https://github.com/matrix-org/synapse/issues/10884), [\#10896](https://github.com/matrix-org/synapse/issues/10896), [\#10901](https://github.com/matrix-org/synapse/issues/10901)) +- Clean up some unnecessary parentheses in places around the codebase. ([\#10889](https://github.com/matrix-org/synapse/issues/10889)) +- Improve type hinting in the user directory code. ([\#10891](https://github.com/matrix-org/synapse/issues/10891)) +- Update development testing script `test_postgresql.sh` to use a supported Python version and make re-runs quicker. ([\#10906](https://github.com/matrix-org/synapse/issues/10906)) +- Document and summarize changes in schema version `61` - `64`. ([\#10917](https://github.com/matrix-org/synapse/issues/10917)) +- Update release script to sign the newly created git tags. ([\#10925](https://github.com/matrix-org/synapse/issues/10925)) + + Synapse 1.43.0 (2021-09-21) =========================== diff --git a/changelog.d/10659.misc b/changelog.d/10659.misc deleted file mode 100644 index d677a521c..000000000 --- a/changelog.d/10659.misc +++ /dev/null @@ -1 +0,0 @@ -Fix GitHub Actions config so we can run sytest on synapse from parallel branches. \ No newline at end of file diff --git a/changelog.d/10690.bugfix b/changelog.d/10690.bugfix deleted file mode 100644 index 059eea746..000000000 --- a/changelog.d/10690.bugfix +++ /dev/null @@ -1 +0,0 @@ -Fix a long-standing bug that caused an `AssertionError` when purging history in certain rooms. Contributed by @Kokokokoka. diff --git a/changelog.d/10776.feature b/changelog.d/10776.feature deleted file mode 100644 index aec0685a3..000000000 --- a/changelog.d/10776.feature +++ /dev/null @@ -1 +0,0 @@ -Only allow the [MSC2716](https://github.com/matrix-org/matrix-doc/pull/2716) `/batch_send?chunk_id=xxx` endpoint to connect to an already existing insertion event. diff --git a/changelog.d/10777.misc b/changelog.d/10777.misc deleted file mode 100644 index aed78a16f..000000000 --- a/changelog.d/10777.misc +++ /dev/null @@ -1 +0,0 @@ -Split out [MSC2716](https://github.com/matrix-org/matrix-doc/pull/2716) meta events to their own fields in the `/batch_send` response. diff --git a/changelog.d/10782.bugfix b/changelog.d/10782.bugfix deleted file mode 100644 index 3e410447c..000000000 --- a/changelog.d/10782.bugfix +++ /dev/null @@ -1 +0,0 @@ -Fix a long-standing bug which caused deactivated users that were later reactivated to be missing from the user directory. \ No newline at end of file diff --git a/changelog.d/10785.misc b/changelog.d/10785.misc deleted file mode 100644 index 39a37b90b..000000000 --- a/changelog.d/10785.misc +++ /dev/null @@ -1 +0,0 @@ -Add missing type hints to REST servlets. diff --git a/changelog.d/10796.misc b/changelog.d/10796.misc deleted file mode 100644 index 1873b2386..000000000 --- a/changelog.d/10796.misc +++ /dev/null @@ -1 +0,0 @@ -Simplify the internal logic which maintains the user directory database tables. \ No newline at end of file diff --git a/changelog.d/10807.bugfix b/changelog.d/10807.bugfix deleted file mode 100644 index be03f5c73..000000000 --- a/changelog.d/10807.bugfix +++ /dev/null @@ -1 +0,0 @@ -Allow sending a membership event to unban a user. Contributed by @aaronraimist. \ No newline at end of file diff --git a/changelog.d/10810.bugfix b/changelog.d/10810.bugfix deleted file mode 100644 index 43e91f1f5..000000000 --- a/changelog.d/10810.bugfix +++ /dev/null @@ -1 +0,0 @@ -Fix a case where logging contexts would go missing when federation requests time out. diff --git a/changelog.d/10812.misc b/changelog.d/10812.misc deleted file mode 100644 index 586a0b3a9..000000000 --- a/changelog.d/10812.misc +++ /dev/null @@ -1 +0,0 @@ -Use direct references to config flags. diff --git a/changelog.d/10814.feature b/changelog.d/10814.feature deleted file mode 100644 index 4fa95a6cc..000000000 --- a/changelog.d/10814.feature +++ /dev/null @@ -1 +0,0 @@ -Improve oEmbed previews by processing the author name, photo, and video information. diff --git a/changelog.d/10815.misc b/changelog.d/10815.misc deleted file mode 100644 index fc2534dc1..000000000 --- a/changelog.d/10815.misc +++ /dev/null @@ -1 +0,0 @@ -Specify the type of token in generic "Invalid token" error messages. \ No newline at end of file diff --git a/changelog.d/10816.misc b/changelog.d/10816.misc deleted file mode 100644 index 2ca55b334..000000000 --- a/changelog.d/10816.misc +++ /dev/null @@ -1 +0,0 @@ -Make `StateFilter` frozen so it is hashable. diff --git a/changelog.d/10817.misc b/changelog.d/10817.misc deleted file mode 100644 index 39a37b90b..000000000 --- a/changelog.d/10817.misc +++ /dev/null @@ -1 +0,0 @@ -Add missing type hints to REST servlets. diff --git a/changelog.d/10819.feature b/changelog.d/10819.feature deleted file mode 100644 index 4fa95a6cc..000000000 --- a/changelog.d/10819.feature +++ /dev/null @@ -1 +0,0 @@ -Improve oEmbed previews by processing the author name, photo, and video information. diff --git a/changelog.d/10820.misc b/changelog.d/10820.misc deleted file mode 100644 index 4373bf6f6..000000000 --- a/changelog.d/10820.misc +++ /dev/null @@ -1 +0,0 @@ -Fix a long-standing bug where an `m.room.message` event containing a null byte would cause an internal server error. \ No newline at end of file diff --git a/changelog.d/10823.misc b/changelog.d/10823.misc deleted file mode 100644 index 053296990..000000000 --- a/changelog.d/10823.misc +++ /dev/null @@ -1 +0,0 @@ -Add type hints to the state database. diff --git a/changelog.d/10826.misc b/changelog.d/10826.misc deleted file mode 100644 index 53e56fc36..000000000 --- a/changelog.d/10826.misc +++ /dev/null @@ -1,2 +0,0 @@ -Opt out of cache expiry for `get_users_who_share_room_with_user`, to hopefully improve `/sync` performance when you -haven't synced recently. diff --git a/changelog.d/10827.bugfix b/changelog.d/10827.bugfix deleted file mode 100644 index 11a618bf8..000000000 --- a/changelog.d/10827.bugfix +++ /dev/null @@ -1 +0,0 @@ -Fix error in deprecated `/initialSync` endpoint when using the undocumented `from` and `to` parameters. diff --git a/changelog.d/10829.misc b/changelog.d/10829.misc deleted file mode 100644 index ac5fd6b04..000000000 --- a/changelog.d/10829.misc +++ /dev/null @@ -1 +0,0 @@ -Track cache eviction rates more finely in Prometheus' monitoring. \ No newline at end of file diff --git a/changelog.d/10831.misc b/changelog.d/10831.misc deleted file mode 100644 index f09af2e00..000000000 --- a/changelog.d/10831.misc +++ /dev/null @@ -1 +0,0 @@ -Add missing type hints to handlers. diff --git a/changelog.d/10833.misc b/changelog.d/10833.misc deleted file mode 100644 index f23c0a1a0..000000000 --- a/changelog.d/10833.misc +++ /dev/null @@ -1 +0,0 @@ -Extend the ModuleApi to let plug-ins check whether an ID is local and to access IP + User Agent data. diff --git a/changelog.d/10834.misc b/changelog.d/10834.misc deleted file mode 100644 index 037695e6e..000000000 --- a/changelog.d/10834.misc +++ /dev/null @@ -1 +0,0 @@ -Factor out PNG image data to a constant to be used in several tests. diff --git a/changelog.d/10835.misc b/changelog.d/10835.misc deleted file mode 100644 index 0c3d13477..000000000 --- a/changelog.d/10835.misc +++ /dev/null @@ -1 +0,0 @@ -Add a test to ensure state events sent by modules get persisted correctly. diff --git a/changelog.d/10838.misc b/changelog.d/10838.misc deleted file mode 100644 index b1977d0a2..000000000 --- a/changelog.d/10838.misc +++ /dev/null @@ -1 +0,0 @@ -Rename [MSC2716](https://github.com/matrix-org/matrix-doc/pull/2716) fields and event types from `chunk` to `batch` to match the `/batch_send` endpoint. diff --git a/changelog.d/10839.misc b/changelog.d/10839.misc deleted file mode 100644 index d0e10f31d..000000000 --- a/changelog.d/10839.misc +++ /dev/null @@ -1 +0,0 @@ -Rename [MSC2716](https://github.com/matrix-org/matrix-doc/pull/2716) `/batch_send` query parameter from `?prev_event` to more obvious usage with `?prev_event_id`. diff --git a/changelog.d/10843.bugfix b/changelog.d/10843.bugfix deleted file mode 100644 index 5027a1dbe..000000000 --- a/changelog.d/10843.bugfix +++ /dev/null @@ -1 +0,0 @@ -Fix a bug causing the `remove_stale_pushers` background job to repeatedly fail and log errors. This bug affected Synapse servers that had been upgraded from version 1.28 or older and are using SQLite. diff --git a/changelog.d/10845.doc b/changelog.d/10845.doc deleted file mode 100644 index a13c845ae..000000000 --- a/changelog.d/10845.doc +++ /dev/null @@ -1 +0,0 @@ -Fix some crashes in the Module API example code, by adding JSON encoding/decoding. diff --git a/changelog.d/10856.misc b/changelog.d/10856.misc deleted file mode 100644 index f09af2e00..000000000 --- a/changelog.d/10856.misc +++ /dev/null @@ -1 +0,0 @@ -Add missing type hints to handlers. diff --git a/changelog.d/10859.bugfix b/changelog.d/10859.bugfix deleted file mode 100644 index c1bfe22d5..000000000 --- a/changelog.d/10859.bugfix +++ /dev/null @@ -1 +0,0 @@ -Fix a bug in Unicode support of the room search admin API. It is now possible to search for rooms with non-ASCII characters. \ No newline at end of file diff --git a/changelog.d/10865.doc b/changelog.d/10865.doc deleted file mode 100644 index deeb0eedf..000000000 --- a/changelog.d/10865.doc +++ /dev/null @@ -1 +0,0 @@ -Add developer documentation about experimental configuration flags. diff --git a/changelog.d/10867.misc b/changelog.d/10867.misc deleted file mode 100644 index 01e51fbc6..000000000 --- a/changelog.d/10867.misc +++ /dev/null @@ -1 +0,0 @@ -Add type hints to `synapse.http.site`. diff --git a/changelog.d/10868.feature b/changelog.d/10868.feature deleted file mode 100644 index 07e7b2c6a..000000000 --- a/changelog.d/10868.feature +++ /dev/null @@ -1 +0,0 @@ -Speed up responding with large JSON objects to requests. diff --git a/changelog.d/10869.doc b/changelog.d/10869.doc deleted file mode 100644 index c11738607..000000000 --- a/changelog.d/10869.doc +++ /dev/null @@ -1 +0,0 @@ -Properly remove deleted files from GitHub pages when generating the documentation. diff --git a/changelog.d/10873.bugfix b/changelog.d/10873.bugfix deleted file mode 100644 index 32b2e50fd..000000000 --- a/changelog.d/10873.bugfix +++ /dev/null @@ -1 +0,0 @@ -Fix a bug introduced in Synapse 1.37.0 which caused `knock` events which we sent to remote servers to be incorrectly stored in the local database. diff --git a/changelog.d/10875.bugfix b/changelog.d/10875.bugfix deleted file mode 100644 index 6f370da5c..000000000 --- a/changelog.d/10875.bugfix +++ /dev/null @@ -1 +0,0 @@ -Fix invalidating one-time key count cache after claiming keys. Contributed by Tulir at Beeper. diff --git a/changelog.d/10879.misc b/changelog.d/10879.misc deleted file mode 100644 index acc04930f..000000000 --- a/changelog.d/10879.misc +++ /dev/null @@ -1 +0,0 @@ -Include outlier status when we log V2 or V3 events. diff --git a/changelog.d/10880.misc b/changelog.d/10880.misc deleted file mode 100644 index 5f58d6198..000000000 --- a/changelog.d/10880.misc +++ /dev/null @@ -1 +0,0 @@ -Break down Grafana's cache expiry time series based on reason for eviction---see #10829. \ No newline at end of file diff --git a/changelog.d/10881.bugfix b/changelog.d/10881.bugfix deleted file mode 100644 index 0a8905cc4..000000000 --- a/changelog.d/10881.bugfix +++ /dev/null @@ -1 +0,0 @@ -Fix application service users being subject to MAU blocking if MAU had been reached, even if configured not to be blocked. diff --git a/changelog.d/10883.misc b/changelog.d/10883.misc deleted file mode 100644 index 9a765435d..000000000 --- a/changelog.d/10883.misc +++ /dev/null @@ -1 +0,0 @@ -Clean up some of the federation event authentication code for clarity. diff --git a/changelog.d/10884.misc b/changelog.d/10884.misc deleted file mode 100644 index 9a765435d..000000000 --- a/changelog.d/10884.misc +++ /dev/null @@ -1 +0,0 @@ -Clean up some of the federation event authentication code for clarity. diff --git a/changelog.d/10885.misc b/changelog.d/10885.misc deleted file mode 100644 index 586a0b3a9..000000000 --- a/changelog.d/10885.misc +++ /dev/null @@ -1 +0,0 @@ -Use direct references to config flags. diff --git a/changelog.d/10887.bugfix b/changelog.d/10887.bugfix deleted file mode 100644 index 2d1f67489..000000000 --- a/changelog.d/10887.bugfix +++ /dev/null @@ -1 +0,0 @@ -Allow the `.` and `~` characters when creating registration tokens as per the change to [MSC3231](https://github.com/matrix-org/matrix-doc/pull/3231). diff --git a/changelog.d/10889.misc b/changelog.d/10889.misc deleted file mode 100644 index 6d60188f5..000000000 --- a/changelog.d/10889.misc +++ /dev/null @@ -1 +0,0 @@ -Clean up some unnecessary parentheses in places around the codebase. \ No newline at end of file diff --git a/changelog.d/10891.misc b/changelog.d/10891.misc deleted file mode 100644 index 6eecea406..000000000 --- a/changelog.d/10891.misc +++ /dev/null @@ -1 +0,0 @@ -Improve type hinting in the user directory code. \ No newline at end of file diff --git a/changelog.d/10893.misc b/changelog.d/10893.misc deleted file mode 100644 index 586a0b3a9..000000000 --- a/changelog.d/10893.misc +++ /dev/null @@ -1 +0,0 @@ -Use direct references to config flags. diff --git a/changelog.d/10896.misc b/changelog.d/10896.misc deleted file mode 100644 index 41de99584..000000000 --- a/changelog.d/10896.misc +++ /dev/null @@ -1 +0,0 @@ - Clean up some of the federation event authentication code for clarity. diff --git a/changelog.d/10897.misc b/changelog.d/10897.misc deleted file mode 100644 index 586a0b3a9..000000000 --- a/changelog.d/10897.misc +++ /dev/null @@ -1 +0,0 @@ -Use direct references to config flags. diff --git a/changelog.d/10898.feature b/changelog.d/10898.feature deleted file mode 100644 index 97fa39fd0..000000000 --- a/changelog.d/10898.feature +++ /dev/null @@ -1 +0,0 @@ -Add a `user_may_create_room_with_invites` spam checker callback to allow modules to allow or deny a room creation request based on the invites and/or 3PID invites it includes. diff --git a/changelog.d/10901.misc b/changelog.d/10901.misc deleted file mode 100644 index 9a765435d..000000000 --- a/changelog.d/10901.misc +++ /dev/null @@ -1 +0,0 @@ -Clean up some of the federation event authentication code for clarity. diff --git a/changelog.d/10905.feature b/changelog.d/10905.feature deleted file mode 100644 index 07e7b2c6a..000000000 --- a/changelog.d/10905.feature +++ /dev/null @@ -1 +0,0 @@ -Speed up responding with large JSON objects to requests. diff --git a/changelog.d/10906.misc b/changelog.d/10906.misc deleted file mode 100644 index 20a1cbfbd..000000000 --- a/changelog.d/10906.misc +++ /dev/null @@ -1 +0,0 @@ -Update development testing script `test_postgresql.sh` to use a supported Python version and make re-runs quicker. \ No newline at end of file diff --git a/changelog.d/10907.bugfix b/changelog.d/10907.bugfix deleted file mode 100644 index 601b341f9..000000000 --- a/changelog.d/10907.bugfix +++ /dev/null @@ -1 +0,0 @@ -Fix a long-standing bug which could cause events pulled over federation to be incorrectly rejected. diff --git a/changelog.d/10911.bugfix b/changelog.d/10911.bugfix deleted file mode 100644 index 96e36bb15..000000000 --- a/changelog.d/10911.bugfix +++ /dev/null @@ -1 +0,0 @@ -Avoid storing URL cache files in storage providers. Server admins may safely delete the `url_cache/` and `url_cache_thumbnails/` directories from any configured storage providers to reclaim space. diff --git a/changelog.d/10913.bugfix b/changelog.d/10913.bugfix deleted file mode 100644 index a0015c824..000000000 --- a/changelog.d/10913.bugfix +++ /dev/null @@ -1 +0,0 @@ -Fix race conditions when creating media store and config directories. diff --git a/changelog.d/10917.misc b/changelog.d/10917.misc deleted file mode 100644 index 9ce6eef94..000000000 --- a/changelog.d/10917.misc +++ /dev/null @@ -1 +0,0 @@ -Document and summarize changes in schema version `61` - `64`. diff --git a/changelog.d/10925.misc b/changelog.d/10925.misc deleted file mode 100644 index 0c8027ecc..000000000 --- a/changelog.d/10925.misc +++ /dev/null @@ -1 +0,0 @@ -Update release script to sign the newly created git tags. diff --git a/changelog.d/10931.bugfix b/changelog.d/10931.bugfix deleted file mode 100644 index 3f30c9ccf..000000000 --- a/changelog.d/10931.bugfix +++ /dev/null @@ -1 +0,0 @@ -Fix debian builds due to dh-virtualenv no longer being able to build their docs. diff --git a/debian/changelog b/debian/changelog index 4b07d0412..191bb97c5 100644 --- a/debian/changelog +++ b/debian/changelog @@ -1,3 +1,9 @@ +matrix-synapse-py3 (1.44.0~rc1) stable; urgency=medium + + * New synapse release 1.44.0~rc1. + + -- Synapse Packaging team <packages@matrix.org> Tue, 28 Sep 2021 13:41:28 +0100 + matrix-synapse-py3 (1.43.0) stable; urgency=medium * New synapse release 1.43.0. diff --git a/synapse/__init__.py b/synapse/__init__.py index 5f5cff1df..a1fec8ad2 100644 --- a/synapse/__init__.py +++ b/synapse/__init__.py @@ -47,7 +47,7 @@ except ImportError: pass -__version__ = "1.43.0" +__version__ = "1.44.0rc1" if bool(os.environ.get("SYNAPSE_TEST_PATCH_LOG_CONTEXTS", False)): # We import here so that we don't have to install a bunch of deps when From bc69d49362dfa0ee2e917427c61a7b67c0d78b34 Mon Sep 17 00:00:00 2001 From: Erik Johnston <erik@matrix.org> Date: Tue, 28 Sep 2021 13:48:42 +0100 Subject: [PATCH 60/74] Fixup changelog --- CHANGES.md | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/CHANGES.md b/CHANGES.md index da4d98ac2..a7a9abf79 100644 --- a/CHANGES.md +++ b/CHANGES.md @@ -19,8 +19,8 @@ Bugfixes - Fix a case where logging contexts would go missing when federation requests time out. ([\#10810](https://github.com/matrix-org/synapse/issues/10810)) - Fix error in deprecated `/initialSync` endpoint when using the undocumented `from` and `to` parameters. ([\#10827](https://github.com/matrix-org/synapse/issues/10827)) - Fix a bug causing the `remove_stale_pushers` background job to repeatedly fail and log errors. This bug affected Synapse servers that had been upgraded from version 1.28 or older and are using SQLite. ([\#10843](https://github.com/matrix-org/synapse/issues/10843)) -- Fix a bug in Unicode support of the room search admin API. It is now possible to search for rooms with non-ASCII characters. ([\#10859](https://github.com/matrix-org/synapse/issues/10859)) -- Fix a bug introduced in Synapse 1.37.0 which caused `knock` events which we sent to remote servers to be incorrectly stored in the local database. ([\#10873](https://github.com/matrix-org/synapse/issues/10873)) +- Fix a bug in Unicode support of the room search admin API breaking search for rooms with non-ASCII characters. ([\#10859](https://github.com/matrix-org/synapse/issues/10859)) +- Fix a bug introduced in Synapse 1.37.0 which caused `knock` membership events which we sent to remote servers to be incorrectly stored in the local database. ([\#10873](https://github.com/matrix-org/synapse/issues/10873)) - Fix invalidating one-time key count cache after claiming keys. Contributed by Tulir at Beeper. ([\#10875](https://github.com/matrix-org/synapse/issues/10875)) - Fix application service users being subject to MAU blocking if MAU had been reached, even if configured not to be blocked. ([\#10881](https://github.com/matrix-org/synapse/issues/10881)) - Allow the `.` and `~` characters when creating registration tokens as per the change to [MSC3231](https://github.com/matrix-org/matrix-doc/pull/3231). ([\#10887](https://github.com/matrix-org/synapse/issues/10887)) From 2b9d174791833d8eb8ee40d98cc59d187c2eb205 Mon Sep 17 00:00:00 2001 From: Erik Johnston <erik@matrix.org> Date: Tue, 28 Sep 2021 13:50:05 +0100 Subject: [PATCH 61/74] Fixup changelog --- CHANGES.md | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/CHANGES.md b/CHANGES.md index a7a9abf79..0b209edd4 100644 --- a/CHANGES.md +++ b/CHANGES.md @@ -52,7 +52,7 @@ Internal Changes - Add type hints to the state database. ([\#10823](https://github.com/matrix-org/synapse/issues/10823)) - Opt out of cache expiry for `get_users_who_share_room_with_user`, to hopefully improve `/sync` performance when you haven't synced recently. ([\#10826](https://github.com/matrix-org/synapse/issues/10826)) -- Track cache eviction rates more finely in Prometheus' monitoring. ([\#10829](https://github.com/matrix-org/synapse/issues/10829)) +- Track cache eviction rates more finely in Prometheus's monitoring. ([\#10829](https://github.com/matrix-org/synapse/issues/10829)) - Add missing type hints to handlers. ([\#10831](https://github.com/matrix-org/synapse/issues/10831), [\#10856](https://github.com/matrix-org/synapse/issues/10856)) - Extend the ModuleApi to let plug-ins check whether an ID is local and to access IP + User Agent data. ([\#10833](https://github.com/matrix-org/synapse/issues/10833)) - Factor out PNG image data to a constant to be used in several tests. ([\#10834](https://github.com/matrix-org/synapse/issues/10834)) @@ -61,7 +61,7 @@ Internal Changes - Rename [MSC2716](https://github.com/matrix-org/matrix-doc/pull/2716) `/batch_send` query parameter from `?prev_event` to more obvious usage with `?prev_event_id`. ([\#10839](https://github.com/matrix-org/synapse/issues/10839)) - Add type hints to `synapse.http.site`. ([\#10867](https://github.com/matrix-org/synapse/issues/10867)) - Include outlier status when we log V2 or V3 events. ([\#10879](https://github.com/matrix-org/synapse/issues/10879)) -- Break down Grafana's cache expiry time series based on reason for eviction---see #10829. ([\#10880](https://github.com/matrix-org/synapse/issues/10880)) +- Break down Grafana's cache expiry time series based on reason for eviction, c.f. #10829. ([\#10880](https://github.com/matrix-org/synapse/issues/10880)) - Clean up some of the federation event authentication code for clarity. ([\#10883](https://github.com/matrix-org/synapse/issues/10883), [\#10884](https://github.com/matrix-org/synapse/issues/10884), [\#10896](https://github.com/matrix-org/synapse/issues/10896), [\#10901](https://github.com/matrix-org/synapse/issues/10901)) - Clean up some unnecessary parentheses in places around the codebase. ([\#10889](https://github.com/matrix-org/synapse/issues/10889)) - Improve type hinting in the user directory code. ([\#10891](https://github.com/matrix-org/synapse/issues/10891)) From 37bb93d1818eeda0d64c02cb772c8dee5596194f Mon Sep 17 00:00:00 2001 From: Erik Johnston <erik@matrix.org> Date: Tue, 28 Sep 2021 14:36:19 +0100 Subject: [PATCH 62/74] Fix exception responding to request that has been closed (#10932) Introduced in #10905 --- changelog.d/10932.feature | 1 + synapse/http/server.py | 14 +++++++++++--- 2 files changed, 12 insertions(+), 3 deletions(-) create mode 100644 changelog.d/10932.feature diff --git a/changelog.d/10932.feature b/changelog.d/10932.feature new file mode 100644 index 000000000..07e7b2c6a --- /dev/null +++ b/changelog.d/10932.feature @@ -0,0 +1 @@ +Speed up responding with large JSON objects to requests. diff --git a/synapse/http/server.py b/synapse/http/server.py index 1a50305dc..0df1bfbee 100644 --- a/synapse/http/server.py +++ b/synapse/http/server.py @@ -561,9 +561,17 @@ def __init__( self._iterator = iterator self._paused = False - # Register the producer and start producing data. - self._request.registerProducer(self, True) - self.resumeProducing() + try: + self._request.registerProducer(self, True) + except RuntimeError as e: + logger.info("Connection disconnected before response was written: %r", e) + + # We drop our references to data we'll not use. + self._request = None + self._iterator = iter(()) + else: + # Start producing if `registerProducer` was successful + self.resumeProducing() def _send_data(self, data: List[bytes]) -> None: """ From 62800a8fe3b531369c09bb859e90f4b97cd98584 Mon Sep 17 00:00:00 2001 From: Erik Johnston <erik@matrix.org> Date: Tue, 28 Sep 2021 17:32:31 +0100 Subject: [PATCH 63/74] Add #10932 to release --- changelog.d/10932.feature | 1 - 1 file changed, 1 deletion(-) delete mode 100644 changelog.d/10932.feature diff --git a/changelog.d/10932.feature b/changelog.d/10932.feature deleted file mode 100644 index 07e7b2c6a..000000000 --- a/changelog.d/10932.feature +++ /dev/null @@ -1 +0,0 @@ -Speed up responding with large JSON objects to requests. From 67815cc3db971f3fd191e6e161e88037dee387d3 Mon Sep 17 00:00:00 2001 From: "Olivier Wilkinson (reivilibre)" <oliverw@matrix.org> Date: Wed, 29 Sep 2021 11:00:56 +0100 Subject: [PATCH 64/74] Tweak changelog --- CHANGES.md | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/CHANGES.md b/CHANGES.md index 0b209edd4..a8163802c 100644 --- a/CHANGES.md +++ b/CHANGES.md @@ -5,7 +5,7 @@ Features -------- - Only allow the [MSC2716](https://github.com/matrix-org/matrix-doc/pull/2716) `/batch_send?chunk_id=xxx` endpoint to connect to an already existing insertion event. ([\#10776](https://github.com/matrix-org/synapse/issues/10776)) -- Improve oEmbed previews by processing the author name, photo, and video information. ([\#10814](https://github.com/matrix-org/synapse/issues/10814), [\#10819](https://github.com/matrix-org/synapse/issues/10819)) +- Improve oEmbed URL previews by processing the author name, photo, and video information. ([\#10814](https://github.com/matrix-org/synapse/issues/10814), [\#10819](https://github.com/matrix-org/synapse/issues/10819)) - Speed up responding with large JSON objects to requests. ([\#10868](https://github.com/matrix-org/synapse/issues/10868), [\#10905](https://github.com/matrix-org/synapse/issues/10905)) - Add a `user_may_create_room_with_invites` spam checker callback to allow modules to allow or deny a room creation request based on the invites and/or 3PID invites it includes. ([\#10898](https://github.com/matrix-org/synapse/issues/10898)) @@ -54,14 +54,14 @@ Internal Changes haven't synced recently. ([\#10826](https://github.com/matrix-org/synapse/issues/10826)) - Track cache eviction rates more finely in Prometheus's monitoring. ([\#10829](https://github.com/matrix-org/synapse/issues/10829)) - Add missing type hints to handlers. ([\#10831](https://github.com/matrix-org/synapse/issues/10831), [\#10856](https://github.com/matrix-org/synapse/issues/10856)) -- Extend the ModuleApi to let plug-ins check whether an ID is local and to access IP + User Agent data. ([\#10833](https://github.com/matrix-org/synapse/issues/10833)) +- Extend the Module API to let plug-ins check whether an ID is local and to access IP + User Agent data. ([\#10833](https://github.com/matrix-org/synapse/issues/10833)) - Factor out PNG image data to a constant to be used in several tests. ([\#10834](https://github.com/matrix-org/synapse/issues/10834)) - Add a test to ensure state events sent by modules get persisted correctly. ([\#10835](https://github.com/matrix-org/synapse/issues/10835)) - Rename [MSC2716](https://github.com/matrix-org/matrix-doc/pull/2716) fields and event types from `chunk` to `batch` to match the `/batch_send` endpoint. ([\#10838](https://github.com/matrix-org/synapse/issues/10838)) - Rename [MSC2716](https://github.com/matrix-org/matrix-doc/pull/2716) `/batch_send` query parameter from `?prev_event` to more obvious usage with `?prev_event_id`. ([\#10839](https://github.com/matrix-org/synapse/issues/10839)) - Add type hints to `synapse.http.site`. ([\#10867](https://github.com/matrix-org/synapse/issues/10867)) - Include outlier status when we log V2 or V3 events. ([\#10879](https://github.com/matrix-org/synapse/issues/10879)) -- Break down Grafana's cache expiry time series based on reason for eviction, c.f. #10829. ([\#10880](https://github.com/matrix-org/synapse/issues/10880)) +- Break down Grafana's cache expiry time series based on reason for eviction, c.f. [\#10829](https://github.com/matrix-org/synapse/issues/10829). ([\#10880](https://github.com/matrix-org/synapse/issues/10880)) - Clean up some of the federation event authentication code for clarity. ([\#10883](https://github.com/matrix-org/synapse/issues/10883), [\#10884](https://github.com/matrix-org/synapse/issues/10884), [\#10896](https://github.com/matrix-org/synapse/issues/10896), [\#10901](https://github.com/matrix-org/synapse/issues/10901)) - Clean up some unnecessary parentheses in places around the codebase. ([\#10889](https://github.com/matrix-org/synapse/issues/10889)) - Improve type hinting in the user directory code. ([\#10891](https://github.com/matrix-org/synapse/issues/10891)) From 1b9ce5e8a6ed37484665b595e3ed01a8e26f9dd7 Mon Sep 17 00:00:00 2001 From: "Olivier Wilkinson (reivilibre)" <oliverw@matrix.org> Date: Wed, 29 Sep 2021 11:09:00 +0100 Subject: [PATCH 65/74] Indicate when bugs were introduced and tidy up --- CHANGES.md | 24 ++++++++++++------------ 1 file changed, 12 insertions(+), 12 deletions(-) diff --git a/CHANGES.md b/CHANGES.md index a8163802c..e27b4aa94 100644 --- a/CHANGES.md +++ b/CHANGES.md @@ -15,19 +15,17 @@ Bugfixes - Fix a long-standing bug that caused an `AssertionError` when purging history in certain rooms. Contributed by @Kokokokoka. ([\#10690](https://github.com/matrix-org/synapse/issues/10690)) - Fix a long-standing bug which caused deactivated users that were later reactivated to be missing from the user directory. ([\#10782](https://github.com/matrix-org/synapse/issues/10782)) -- Allow sending a membership event to unban a user. Contributed by @aaronraimist. ([\#10807](https://github.com/matrix-org/synapse/issues/10807)) -- Fix a case where logging contexts would go missing when federation requests time out. ([\#10810](https://github.com/matrix-org/synapse/issues/10810)) -- Fix error in deprecated `/initialSync` endpoint when using the undocumented `from` and `to` parameters. ([\#10827](https://github.com/matrix-org/synapse/issues/10827)) +- Fix a long-standing bug that caused unbanning a user by sending a membership event to fail. Contributed by @aaronraimist. ([\#10807](https://github.com/matrix-org/synapse/issues/10807)) +- Fix a long-standing bug where logging contexts would go missing when federation requests time out. ([\#10810](https://github.com/matrix-org/synapse/issues/10810)) +- Fix a long-standing bug causing an error in the deprecated `/initialSync` endpoint when using the undocumented `from` and `to` parameters. ([\#10827](https://github.com/matrix-org/synapse/issues/10827)) - Fix a bug causing the `remove_stale_pushers` background job to repeatedly fail and log errors. This bug affected Synapse servers that had been upgraded from version 1.28 or older and are using SQLite. ([\#10843](https://github.com/matrix-org/synapse/issues/10843)) -- Fix a bug in Unicode support of the room search admin API breaking search for rooms with non-ASCII characters. ([\#10859](https://github.com/matrix-org/synapse/issues/10859)) +- Fix a long-standing bug in Unicode support of the room search admin API breaking search for rooms with non-ASCII characters. ([\#10859](https://github.com/matrix-org/synapse/issues/10859)) - Fix a bug introduced in Synapse 1.37.0 which caused `knock` membership events which we sent to remote servers to be incorrectly stored in the local database. ([\#10873](https://github.com/matrix-org/synapse/issues/10873)) -- Fix invalidating one-time key count cache after claiming keys. Contributed by Tulir at Beeper. ([\#10875](https://github.com/matrix-org/synapse/issues/10875)) -- Fix application service users being subject to MAU blocking if MAU had been reached, even if configured not to be blocked. ([\#10881](https://github.com/matrix-org/synapse/issues/10881)) -- Allow the `.` and `~` characters when creating registration tokens as per the change to [MSC3231](https://github.com/matrix-org/matrix-doc/pull/3231). ([\#10887](https://github.com/matrix-org/synapse/issues/10887)) +- Fix invalidating one-time key count cache after claiming keys. The bug was introduced in Synapse v1.41.0. Contributed by Tulir at Beeper. ([\#10875](https://github.com/matrix-org/synapse/issues/10875)) +- Fix a long-standing bug causing application service users to be subject to MAU blocking if the MAU limit had been reached, even if configured not to be blocked. ([\#10881](https://github.com/matrix-org/synapse/issues/10881)) - Fix a long-standing bug which could cause events pulled over federation to be incorrectly rejected. ([\#10907](https://github.com/matrix-org/synapse/issues/10907)) -- Avoid storing URL cache files in storage providers. Server admins may safely delete the `url_cache/` and `url_cache_thumbnails/` directories from any configured storage providers to reclaim space. ([\#10911](https://github.com/matrix-org/synapse/issues/10911)) -- Fix race conditions when creating media store and config directories. ([\#10913](https://github.com/matrix-org/synapse/issues/10913)) -- Fix debian builds due to dh-virtualenv no longer being able to build their docs. ([\#10931](https://github.com/matrix-org/synapse/issues/10931)) +- Fix a long-standing bug causing URL cache files to be stored in storage providers. Server admins may safely delete the `url_cache/` and `url_cache_thumbnails/` directories from any configured storage providers to reclaim space. ([\#10911](https://github.com/matrix-org/synapse/issues/10911)) +- Fix a long-standing bug leading to race conditions when creating media store and config directories. ([\#10913](https://github.com/matrix-org/synapse/issues/10913)) Improved Documentation @@ -53,7 +51,7 @@ Internal Changes - Opt out of cache expiry for `get_users_who_share_room_with_user`, to hopefully improve `/sync` performance when you haven't synced recently. ([\#10826](https://github.com/matrix-org/synapse/issues/10826)) - Track cache eviction rates more finely in Prometheus's monitoring. ([\#10829](https://github.com/matrix-org/synapse/issues/10829)) -- Add missing type hints to handlers. ([\#10831](https://github.com/matrix-org/synapse/issues/10831), [\#10856](https://github.com/matrix-org/synapse/issues/10856)) +- Add missing type hints to `synapse.handlers`. ([\#10831](https://github.com/matrix-org/synapse/issues/10831), [\#10856](https://github.com/matrix-org/synapse/issues/10856)) - Extend the Module API to let plug-ins check whether an ID is local and to access IP + User Agent data. ([\#10833](https://github.com/matrix-org/synapse/issues/10833)) - Factor out PNG image data to a constant to be used in several tests. ([\#10834](https://github.com/matrix-org/synapse/issues/10834)) - Add a test to ensure state events sent by modules get persisted correctly. ([\#10835](https://github.com/matrix-org/synapse/issues/10835)) @@ -63,11 +61,13 @@ Internal Changes - Include outlier status when we log V2 or V3 events. ([\#10879](https://github.com/matrix-org/synapse/issues/10879)) - Break down Grafana's cache expiry time series based on reason for eviction, c.f. [\#10829](https://github.com/matrix-org/synapse/issues/10829). ([\#10880](https://github.com/matrix-org/synapse/issues/10880)) - Clean up some of the federation event authentication code for clarity. ([\#10883](https://github.com/matrix-org/synapse/issues/10883), [\#10884](https://github.com/matrix-org/synapse/issues/10884), [\#10896](https://github.com/matrix-org/synapse/issues/10896), [\#10901](https://github.com/matrix-org/synapse/issues/10901)) +- Allow the `.` and `~` characters when creating registration tokens as per the change to [MSC3231](https://github.com/matrix-org/matrix-doc/pull/3231). ([\#10887](https://github.com/matrix-org/synapse/issues/10887)) - Clean up some unnecessary parentheses in places around the codebase. ([\#10889](https://github.com/matrix-org/synapse/issues/10889)) - Improve type hinting in the user directory code. ([\#10891](https://github.com/matrix-org/synapse/issues/10891)) - Update development testing script `test_postgresql.sh` to use a supported Python version and make re-runs quicker. ([\#10906](https://github.com/matrix-org/synapse/issues/10906)) -- Document and summarize changes in schema version `61` - `64`. ([\#10917](https://github.com/matrix-org/synapse/issues/10917)) +- Document and summarize changes in schema version `61` – `64`. ([\#10917](https://github.com/matrix-org/synapse/issues/10917)) - Update release script to sign the newly created git tags. ([\#10925](https://github.com/matrix-org/synapse/issues/10925)) +- Fix Debian builds due to `dh-virtualenv` no longer being able to build their docs. ([\#10931](https://github.com/matrix-org/synapse/issues/10931)) Synapse 1.43.0 (2021-09-21) From 13032b6603d91d9960592fe2506bb5dcb4ae1ad8 Mon Sep 17 00:00:00 2001 From: "Olivier Wilkinson (reivilibre)" <oliverw@matrix.org> Date: Wed, 29 Sep 2021 11:13:03 +0100 Subject: [PATCH 66/74] Bump the date because the release ran over --- CHANGES.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/CHANGES.md b/CHANGES.md index e27b4aa94..271e2271f 100644 --- a/CHANGES.md +++ b/CHANGES.md @@ -1,4 +1,4 @@ -Synapse 1.44.0rc1 (2021-09-28) +Synapse 1.44.0rc1 (2021-09-29) ============================== Features From e32b9f44ee466ad8dad47fdbea7e2711c11b9dc7 Mon Sep 17 00:00:00 2001 From: Richard van der Hoff <1389908+richvdh@users.noreply.github.com> Date: Wed, 29 Sep 2021 11:57:53 +0100 Subject: [PATCH 67/74] Update installation instructions (#10919) Various updates to the install docs. --- README.rst | 2 +- changelog.d/10919.doc | 1 + docs/setup/installation.md | 328 +++++++++++++++++++------------------ 3 files changed, 167 insertions(+), 164 deletions(-) create mode 100644 changelog.d/10919.doc diff --git a/README.rst b/README.rst index db977c025..524a3a514 100644 --- a/README.rst +++ b/README.rst @@ -288,7 +288,7 @@ Quick start Before setting up a development environment for synapse, make sure you have the system dependencies (such as the python header files) installed - see -`Installing from source <https://matrix-org.github.io/synapse/latest/setup/installation.html#installing-from-source>`_. +`Platform-specific prerequisites <https://matrix-org.github.io/synapse/latest/setup/installation.html#platform-specific-prerequisites>`_. To check out a synapse for development, clone the git repo into a working directory of your choice:: diff --git a/changelog.d/10919.doc b/changelog.d/10919.doc new file mode 100644 index 000000000..d0bddc3f1 --- /dev/null +++ b/changelog.d/10919.doc @@ -0,0 +1 @@ +Minor updates to the installation instructions. diff --git a/docs/setup/installation.md b/docs/setup/installation.md index 06f869cd7..874925e92 100644 --- a/docs/setup/installation.md +++ b/docs/setup/installation.md @@ -18,19 +18,179 @@ that your email address is probably `user@example.com` rather than ## Installing Synapse -### Installing from source +### Prebuilt packages + +Prebuilt packages are available for a number of platforms. These are recommended +for most users. + +#### Docker images and Ansible playbooks + +There is an official synapse image available at +<https://hub.docker.com/r/matrixdotorg/synapse> which can be used with +the docker-compose file available at +[contrib/docker](https://github.com/matrix-org/synapse/tree/develop/contrib/docker). +Further information on this including configuration options is available in the README +on hub.docker.com. + +Alternatively, Andreas Peters (previously Silvio Fricke) has contributed a +Dockerfile to automate a synapse server in a single Docker image, at +<https://hub.docker.com/r/avhost/docker-matrix/tags/> + +Slavi Pantaleev has created an Ansible playbook, +which installs the offical Docker image of Matrix Synapse +along with many other Matrix-related services (Postgres database, Element, coturn, +ma1sd, SSL support, etc.). +For more details, see +<https://github.com/spantaleev/matrix-docker-ansible-deploy> + +#### Debian/Ubuntu + +##### Matrix.org packages + +Matrix.org provides Debian/Ubuntu packages of Synapse, for the amd64 +architecture via <https://packages.matrix.org/debian/>. + +To install the latest release: + +```sh +sudo apt install -y lsb-release wget apt-transport-https +sudo wget -O /usr/share/keyrings/matrix-org-archive-keyring.gpg https://packages.matrix.org/debian/matrix-org-archive-keyring.gpg +echo "deb [signed-by=/usr/share/keyrings/matrix-org-archive-keyring.gpg] https://packages.matrix.org/debian/ $(lsb_release -cs) main" | + sudo tee /etc/apt/sources.list.d/matrix-org.list +sudo apt update +sudo apt install matrix-synapse-py3 +``` + +Packages are also published for release candidates. To enable the prerelease +channel, add `prerelease` to the `sources.list` line. For example: + +```sh +sudo wget -O /usr/share/keyrings/matrix-org-archive-keyring.gpg https://packages.matrix.org/debian/matrix-org-archive-keyring.gpg +echo "deb [signed-by=/usr/share/keyrings/matrix-org-archive-keyring.gpg] https://packages.matrix.org/debian/ $(lsb_release -cs) main prerelease" | + sudo tee /etc/apt/sources.list.d/matrix-org.list +sudo apt update +sudo apt install matrix-synapse-py3 +``` + +The fingerprint of the repository signing key (as shown by `gpg +/usr/share/keyrings/matrix-org-archive-keyring.gpg`) is +`AAF9AE843A7584B5A3E4CD2BCF45A512DE2DA058`. + +##### Downstream Debian packages + +We do not recommend using the packages from the default Debian `buster` +repository at this time, as they are old and suffer from known security +vulnerabilities. You can install the latest version of Synapse from +[our repository](#matrixorg-packages) or from `buster-backports`. Please +see the [Debian documentation](https://backports.debian.org/Instructions/) +for information on how to use backports. + +If you are using Debian `sid` or testing, Synapse is available in the default +repositories and it should be possible to install it simply with: + +```sh +sudo apt install matrix-synapse +``` + +##### Downstream Ubuntu packages + +We do not recommend using the packages in the default Ubuntu repository +at this time, as they are old and suffer from known security vulnerabilities. +The latest version of Synapse can be installed from [our repository](#matrixorg-packages). + +#### Fedora + +Synapse is in the Fedora repositories as `matrix-synapse`: + +```sh +sudo dnf install matrix-synapse +``` + +Oleg Girko provides Fedora RPMs at +<https://obs.infoserver.lv/project/monitor/matrix-synapse> + +#### OpenSUSE + +Synapse is in the OpenSUSE repositories as `matrix-synapse`: + +```sh +sudo zypper install matrix-synapse +``` + +#### SUSE Linux Enterprise Server + +Unofficial package are built for SLES 15 in the openSUSE:Backports:SLE-15 repository at +<https://download.opensuse.org/repositories/openSUSE:/Backports:/SLE-15/standard/> + +#### ArchLinux + +The quickest way to get up and running with ArchLinux is probably with the community package +<https://www.archlinux.org/packages/community/any/matrix-synapse/>, which should pull in most of +the necessary dependencies. + +pip may be outdated (6.0.7-1 and needs to be upgraded to 6.0.8-1 ): + +```sh +sudo pip install --upgrade pip +``` + +If you encounter an error with lib bcrypt causing an Wrong ELF Class: +ELFCLASS32 (x64 Systems), you may need to reinstall py-bcrypt to correctly +compile it under the right architecture. (This should not be needed if +installing under virtualenv): + +```sh +sudo pip uninstall py-bcrypt +sudo pip install py-bcrypt +``` + +#### Void Linux + +Synapse can be found in the void repositories as 'synapse': + +```sh +xbps-install -Su +xbps-install -S synapse +``` + +#### FreeBSD + +Synapse can be installed via FreeBSD Ports or Packages contributed by Brendan Molloy from: + +- Ports: `cd /usr/ports/net-im/py-matrix-synapse && make install clean` +- Packages: `pkg install py37-matrix-synapse` + +#### OpenBSD + +As of OpenBSD 6.7 Synapse is available as a pre-compiled binary. The filesystem +underlying the homeserver directory (defaults to `/var/synapse`) has to be +mounted with `wxallowed` (cf. `mount(8)`), so creating a separate filesystem +and mounting it to `/var/synapse` should be taken into consideration. + +Installing Synapse: + +```sh +doas pkg_add synapse +``` + +#### NixOS + +Robin Lambertz has packaged Synapse for NixOS at: +<https://github.com/NixOS/nixpkgs/blob/master/nixos/modules/services/misc/matrix-synapse.nix> + + +### Installing as a Python module from PyPI -(Prebuilt packages are available for some platforms - see [Prebuilt packages](#prebuilt-packages).) +It's also possible to install Synapse as a Python module from PyPI. -When installing from source please make sure that the [Platform-specific prerequisites](#platform-specific-prerequisites) are already installed. +When following this route please make sure that the [Platform-specific prerequisites](#platform-specific-prerequisites) are already installed. System requirements: - POSIX-compliant system (tested on Linux & OS X) -- Python 3.5.2 or later, up to Python 3.9. +- Python 3.6 or later, up to Python 3.9. - At least 1GB of free RAM if you want to join large public rooms like #matrix:matrix.org - To install the Synapse homeserver run: ```sh @@ -203,164 +363,6 @@ be found at <https://docs.microsoft.com/en-us/windows/wsl/install-win10> for Windows 10 and <https://docs.microsoft.com/en-us/windows/wsl/install-on-server> for Windows Server. -### Prebuilt packages - -As an alternative to installing from source, prebuilt packages are available -for a number of platforms. - -#### Docker images and Ansible playbooks - -There is an official synapse image available at -<https://hub.docker.com/r/matrixdotorg/synapse> which can be used with -the docker-compose file available at -[contrib/docker](https://github.com/matrix-org/synapse/tree/develop/contrib/docker). -Further information on this including configuration options is available in the README -on hub.docker.com. - -Alternatively, Andreas Peters (previously Silvio Fricke) has contributed a -Dockerfile to automate a synapse server in a single Docker image, at -<https://hub.docker.com/r/avhost/docker-matrix/tags/> - -Slavi Pantaleev has created an Ansible playbook, -which installs the offical Docker image of Matrix Synapse -along with many other Matrix-related services (Postgres database, Element, coturn, -ma1sd, SSL support, etc.). -For more details, see -<https://github.com/spantaleev/matrix-docker-ansible-deploy> - -#### Debian/Ubuntu - -##### Matrix.org packages - -Matrix.org provides Debian/Ubuntu packages of Synapse via -<https://packages.matrix.org/debian/>. To install the latest release: - -```sh -sudo apt install -y lsb-release wget apt-transport-https -sudo wget -O /usr/share/keyrings/matrix-org-archive-keyring.gpg https://packages.matrix.org/debian/matrix-org-archive-keyring.gpg -echo "deb [signed-by=/usr/share/keyrings/matrix-org-archive-keyring.gpg] https://packages.matrix.org/debian/ $(lsb_release -cs) main" | - sudo tee /etc/apt/sources.list.d/matrix-org.list -sudo apt update -sudo apt install matrix-synapse-py3 -``` - -Packages are also published for release candidates. To enable the prerelease -channel, add `prerelease` to the `sources.list` line. For example: - -```sh -sudo wget -O /usr/share/keyrings/matrix-org-archive-keyring.gpg https://packages.matrix.org/debian/matrix-org-archive-keyring.gpg -echo "deb [signed-by=/usr/share/keyrings/matrix-org-archive-keyring.gpg] https://packages.matrix.org/debian/ $(lsb_release -cs) main prerelease" | - sudo tee /etc/apt/sources.list.d/matrix-org.list -sudo apt update -sudo apt install matrix-synapse-py3 -``` - -The fingerprint of the repository signing key (as shown by `gpg -/usr/share/keyrings/matrix-org-archive-keyring.gpg`) is -`AAF9AE843A7584B5A3E4CD2BCF45A512DE2DA058`. - -##### Downstream Debian packages - -We do not recommend using the packages from the default Debian `buster` -repository at this time, as they are old and suffer from known security -vulnerabilities. You can install the latest version of Synapse from -[our repository](#matrixorg-packages) or from `buster-backports`. Please -see the [Debian documentation](https://backports.debian.org/Instructions/) -for information on how to use backports. - -If you are using Debian `sid` or testing, Synapse is available in the default -repositories and it should be possible to install it simply with: - -```sh -sudo apt install matrix-synapse -``` - -##### Downstream Ubuntu packages - -We do not recommend using the packages in the default Ubuntu repository -at this time, as they are old and suffer from known security vulnerabilities. -The latest version of Synapse can be installed from [our repository](#matrixorg-packages). - -#### Fedora - -Synapse is in the Fedora repositories as `matrix-synapse`: - -```sh -sudo dnf install matrix-synapse -``` - -Oleg Girko provides Fedora RPMs at -<https://obs.infoserver.lv/project/monitor/matrix-synapse> - -#### OpenSUSE - -Synapse is in the OpenSUSE repositories as `matrix-synapse`: - -```sh -sudo zypper install matrix-synapse -``` - -#### SUSE Linux Enterprise Server - -Unofficial package are built for SLES 15 in the openSUSE:Backports:SLE-15 repository at -<https://download.opensuse.org/repositories/openSUSE:/Backports:/SLE-15/standard/> - -#### ArchLinux - -The quickest way to get up and running with ArchLinux is probably with the community package -<https://www.archlinux.org/packages/community/any/matrix-synapse/>, which should pull in most of -the necessary dependencies. - -pip may be outdated (6.0.7-1 and needs to be upgraded to 6.0.8-1 ): - -```sh -sudo pip install --upgrade pip -``` - -If you encounter an error with lib bcrypt causing an Wrong ELF Class: -ELFCLASS32 (x64 Systems), you may need to reinstall py-bcrypt to correctly -compile it under the right architecture. (This should not be needed if -installing under virtualenv): - -```sh -sudo pip uninstall py-bcrypt -sudo pip install py-bcrypt -``` - -#### Void Linux - -Synapse can be found in the void repositories as 'synapse': - -```sh -xbps-install -Su -xbps-install -S synapse -``` - -#### FreeBSD - -Synapse can be installed via FreeBSD Ports or Packages contributed by Brendan Molloy from: - -- Ports: `cd /usr/ports/net-im/py-matrix-synapse && make install clean` -- Packages: `pkg install py37-matrix-synapse` - -#### OpenBSD - -As of OpenBSD 6.7 Synapse is available as a pre-compiled binary. The filesystem -underlying the homeserver directory (defaults to `/var/synapse`) has to be -mounted with `wxallowed` (cf. `mount(8)`), so creating a separate filesystem -and mounting it to `/var/synapse` should be taken into consideration. - -Installing Synapse: - -```sh -doas pkg_add synapse -``` - -#### NixOS - -Robin Lambertz has packaged Synapse for NixOS at: -<https://github.com/NixOS/nixpkgs/blob/master/nixos/modules/services/misc/matrix-synapse.nix> - ## Setting up Synapse Once you have installed synapse as above, you will need to configure it. From a03ed5e6ae23e52941e91ecb892a7b5c88964d90 Mon Sep 17 00:00:00 2001 From: reivilibre <oliverw@matrix.org> Date: Thu, 30 Sep 2021 11:06:47 +0100 Subject: [PATCH 68/74] Fix issue causing sending presence to ASes to fail (due to incomplete type annotations) (#10944) --- changelog.d/10944.bugfix | 1 + synapse/handlers/presence.py | 4 +++- 2 files changed, 4 insertions(+), 1 deletion(-) create mode 100644 changelog.d/10944.bugfix diff --git a/changelog.d/10944.bugfix b/changelog.d/10944.bugfix new file mode 100644 index 000000000..49baff7df --- /dev/null +++ b/changelog.d/10944.bugfix @@ -0,0 +1 @@ +Fix a bug introduced in v1.44.0rc1 which prevented sending presence events to application services. diff --git a/synapse/handlers/presence.py b/synapse/handlers/presence.py index 983c837c6..404afb940 100644 --- a/synapse/handlers/presence.py +++ b/synapse/handlers/presence.py @@ -52,6 +52,7 @@ from synapse.api.constants import EventTypes, Membership, PresenceState from synapse.api.errors import SynapseError from synapse.api.presence import UserPresenceState +from synapse.appservice import ApplicationService from synapse.events.presence_router import PresenceRouter from synapse.logging.context import run_in_background from synapse.logging.utils import log_function @@ -1521,10 +1522,11 @@ async def get_new_events( user: UserID, from_key: Optional[int], limit: Optional[int] = None, - room_ids: Optional[List[str]] = None, + room_ids: Optional[Collection[str]] = None, is_guest: bool = False, explicit_room_id: Optional[str] = None, include_offline: bool = True, + service: Optional[ApplicationService] = None, ) -> Tuple[List[UserPresenceState], int]: # The process for getting presence events are: # 1. Get the rooms the user is in. From c4bf48ee6fa4662d88a5bf682e79787851fe9cd8 Mon Sep 17 00:00:00 2001 From: Eric Eastwood <erice@element.io> Date: Tue, 28 Sep 2021 22:00:04 -0500 Subject: [PATCH 69/74] Fix event context for outliers in important MSC2716 spot (#10938) Fix event context for outlier causing failures in all of the MSC2716 Complement tests. The `EventContext.for_outlier` refactor happened in https://github.com/matrix-org/synapse/pull/10883 and this spot was left out. --- changelog.d/10938.bugfix | 1 + synapse/handlers/message.py | 13 ++++--------- 2 files changed, 5 insertions(+), 9 deletions(-) create mode 100644 changelog.d/10938.bugfix diff --git a/changelog.d/10938.bugfix b/changelog.d/10938.bugfix new file mode 100644 index 000000000..9cf0ea878 --- /dev/null +++ b/changelog.d/10938.bugfix @@ -0,0 +1 @@ +Fix bug introduced in Synapse 1.44 which caused the experimental [MSC2716](https://github.com/matrix-org/matrix-doc/pull/2716) `/batch_send` endpoint to return a 500 error. diff --git a/synapse/handlers/message.py b/synapse/handlers/message.py index c66aefe2c..fd861e94f 100644 --- a/synapse/handlers/message.py +++ b/synapse/handlers/message.py @@ -952,18 +952,13 @@ async def create_new_client_event( depth=depth, ) - old_state = None - # Pass on the outlier property from the builder to the event # after it is created if builder.internal_metadata.outlier: - event.internal_metadata.outlier = builder.internal_metadata.outlier - - # Calculate the state for outliers that pass in their own `auth_event_ids` - if auth_event_ids: - old_state = await self.store.get_events_as_list(auth_event_ids) - - context = await self.state.compute_event_context(event, old_state=old_state) + event.internal_metadata.outlier = True + context = EventContext.for_outlier() + else: + context = await self.state.compute_event_context(event) if requester: context.app_service = requester.app_service From 3412f5c8d8c8aff5bcf9b0e5012dfa2f4e895464 Mon Sep 17 00:00:00 2001 From: "Olivier Wilkinson (reivilibre)" <oliverw@matrix.org> Date: Thu, 30 Sep 2021 12:40:24 +0100 Subject: [PATCH 70/74] 1.44.0rc2 --- CHANGES.md | 16 ++++++++++++++++ changelog.d/10919.doc | 1 - changelog.d/10938.bugfix | 1 - changelog.d/10944.bugfix | 1 - debian/changelog | 6 ++++++ synapse/__init__.py | 2 +- 6 files changed, 23 insertions(+), 4 deletions(-) delete mode 100644 changelog.d/10919.doc delete mode 100644 changelog.d/10938.bugfix delete mode 100644 changelog.d/10944.bugfix diff --git a/CHANGES.md b/CHANGES.md index 271e2271f..59ff96763 100644 --- a/CHANGES.md +++ b/CHANGES.md @@ -1,3 +1,19 @@ +Synapse 1.44.0rc2 (2021-09-30) +============================== + +Bugfixes +-------- + +- Fix a bug introduced in v1.44.0rc1 which caused the experimental [MSC2716](https://github.com/matrix-org/matrix-doc/pull/2716) `/batch_send` endpoint to return a 500 error. ([\#10938](https://github.com/matrix-org/synapse/issues/10938)) +- Fix a bug introduced in v1.44.0rc1 which prevented sending presence events to application services. ([\#10944](https://github.com/matrix-org/synapse/issues/10944)) + + +Improved Documentation +---------------------- + +- Minor updates to the installation instructions. ([\#10919](https://github.com/matrix-org/synapse/issues/10919)) + + Synapse 1.44.0rc1 (2021-09-29) ============================== diff --git a/changelog.d/10919.doc b/changelog.d/10919.doc deleted file mode 100644 index d0bddc3f1..000000000 --- a/changelog.d/10919.doc +++ /dev/null @@ -1 +0,0 @@ -Minor updates to the installation instructions. diff --git a/changelog.d/10938.bugfix b/changelog.d/10938.bugfix deleted file mode 100644 index 9cf0ea878..000000000 --- a/changelog.d/10938.bugfix +++ /dev/null @@ -1 +0,0 @@ -Fix bug introduced in Synapse 1.44 which caused the experimental [MSC2716](https://github.com/matrix-org/matrix-doc/pull/2716) `/batch_send` endpoint to return a 500 error. diff --git a/changelog.d/10944.bugfix b/changelog.d/10944.bugfix deleted file mode 100644 index 49baff7df..000000000 --- a/changelog.d/10944.bugfix +++ /dev/null @@ -1 +0,0 @@ -Fix a bug introduced in v1.44.0rc1 which prevented sending presence events to application services. diff --git a/debian/changelog b/debian/changelog index 191bb97c5..b08a59278 100644 --- a/debian/changelog +++ b/debian/changelog @@ -1,3 +1,9 @@ +matrix-synapse-py3 (1.44.0~rc2) stable; urgency=medium + + * New synapse release 1.44.0~rc2. + + -- Synapse Packaging team <packages@matrix.org> Thu, 30 Sep 2021 12:39:10 +0100 + matrix-synapse-py3 (1.44.0~rc1) stable; urgency=medium * New synapse release 1.44.0~rc1. diff --git a/synapse/__init__.py b/synapse/__init__.py index a1fec8ad2..8791c20e2 100644 --- a/synapse/__init__.py +++ b/synapse/__init__.py @@ -47,7 +47,7 @@ except ImportError: pass -__version__ = "1.44.0rc1" +__version__ = "1.44.0rc2" if bool(os.environ.get("SYNAPSE_TEST_PATCH_LOG_CONTEXTS", False)): # We import here so that we don't have to install a bunch of deps when From 32072dcdac0072049832cda6204cd75be2d4e38f Mon Sep 17 00:00:00 2001 From: Patrick Cloke <clokep@users.noreply.github.com> Date: Thu, 30 Sep 2021 11:13:59 -0400 Subject: [PATCH 71/74] Strip "join_authorised_via_users_server" from join events which do not need it. (#10933) This fixes a "Event not signed by authorising server" error when transition room member from join -> join, e.g. when updating a display name or avatar URL for restricted rooms. --- changelog.d/10933.bugfix | 1 + synapse/api/constants.py | 3 +++ synapse/event_auth.py | 12 +++++++----- synapse/events/utils.py | 2 +- synapse/federation/federation_base.py | 6 +++--- synapse/federation/federation_client.py | 6 +++--- synapse/federation/federation_server.py | 6 +++--- synapse/handlers/federation.py | 9 +++++++-- synapse/handlers/room_member.py | 10 +++++++++- tests/events/test_utils.py | 7 ++++--- tests/test_event_auth.py | 9 +++++---- 11 files changed, 46 insertions(+), 25 deletions(-) create mode 100644 changelog.d/10933.bugfix diff --git a/changelog.d/10933.bugfix b/changelog.d/10933.bugfix new file mode 100644 index 000000000..e0694fea2 --- /dev/null +++ b/changelog.d/10933.bugfix @@ -0,0 +1 @@ +Fix a bug introduced in Synapse v1.40.0 where changing a user's display name or avatar in a restricted room would cause an authentication error. diff --git a/synapse/api/constants.py b/synapse/api/constants.py index 39fd9954d..a31f03774 100644 --- a/synapse/api/constants.py +++ b/synapse/api/constants.py @@ -217,6 +217,9 @@ class EventContentFields: # For "marker" events MSC2716_MARKER_INSERTION = "org.matrix.msc2716.marker.insertion" + # The authorising user for joining a restricted room. + AUTHORISING_USER = "join_authorised_via_users_server" + class RoomTypes: """Understood values of the room_type field of m.room.create events.""" diff --git a/synapse/event_auth.py b/synapse/event_auth.py index fc50a0e71..650402836 100644 --- a/synapse/event_auth.py +++ b/synapse/event_auth.py @@ -115,11 +115,11 @@ def check( is_invite_via_allow_rule = ( event.type == EventTypes.Member and event.membership == Membership.JOIN - and "join_authorised_via_users_server" in event.content + and EventContentFields.AUTHORISING_USER in event.content ) if is_invite_via_allow_rule: authoriser_domain = get_domain_from_id( - event.content["join_authorised_via_users_server"] + event.content[EventContentFields.AUTHORISING_USER] ) if not event.signatures.get(authoriser_domain): raise AuthError(403, "Event not signed by authorising server") @@ -381,7 +381,9 @@ def _is_membership_change_allowed( # Note that if the caller is in the room or invited, then they do # not need to meet the allow rules. if not caller_in_room and not caller_invited: - authorising_user = event.content.get("join_authorised_via_users_server") + authorising_user = event.content.get( + EventContentFields.AUTHORISING_USER + ) if authorising_user is None: raise AuthError(403, "Join event is missing authorising user.") @@ -836,10 +838,10 @@ def auth_types_for_event( auth_types.add(key) if room_version.msc3083_join_rules and membership == Membership.JOIN: - if "join_authorised_via_users_server" in event.content: + if EventContentFields.AUTHORISING_USER in event.content: key = ( EventTypes.Member, - event.content["join_authorised_via_users_server"], + event.content[EventContentFields.AUTHORISING_USER], ) auth_types.add(key) diff --git a/synapse/events/utils.py b/synapse/events/utils.py index f86113a44..38fccd1ef 100644 --- a/synapse/events/utils.py +++ b/synapse/events/utils.py @@ -105,7 +105,7 @@ def add_fields(*fields): if event_type == EventTypes.Member: add_fields("membership") if room_version.msc3375_redaction_rules: - add_fields("join_authorised_via_users_server") + add_fields(EventContentFields.AUTHORISING_USER) elif event_type == EventTypes.Create: # MSC2176 rules state that create events cannot be redacted. if room_version.msc2176_redaction_rules: diff --git a/synapse/federation/federation_base.py b/synapse/federation/federation_base.py index 024e440ff..0cd424e12 100644 --- a/synapse/federation/federation_base.py +++ b/synapse/federation/federation_base.py @@ -15,7 +15,7 @@ import logging from collections import namedtuple -from synapse.api.constants import MAX_DEPTH, EventTypes, Membership +from synapse.api.constants import MAX_DEPTH, EventContentFields, EventTypes, Membership from synapse.api.errors import Codes, SynapseError from synapse.api.room_versions import EventFormatVersions, RoomVersion from synapse.crypto.event_signing import check_event_content_hash @@ -184,10 +184,10 @@ async def _check_sigs_on_pdu( room_version.msc3083_join_rules and pdu.type == EventTypes.Member and pdu.membership == Membership.JOIN - and "join_authorised_via_users_server" in pdu.content + and EventContentFields.AUTHORISING_USER in pdu.content ): authorising_server = get_domain_from_id( - pdu.content["join_authorised_via_users_server"] + pdu.content[EventContentFields.AUTHORISING_USER] ) try: await keyring.verify_event_for_server( diff --git a/synapse/federation/federation_client.py b/synapse/federation/federation_client.py index 584836c04..2ab4dec88 100644 --- a/synapse/federation/federation_client.py +++ b/synapse/federation/federation_client.py @@ -37,7 +37,7 @@ import attr from prometheus_client import Counter -from synapse.api.constants import EventTypes, Membership +from synapse.api.constants import EventContentFields, EventTypes, Membership from synapse.api.errors import ( CodeMessageException, Codes, @@ -875,9 +875,9 @@ async def _execute(pdu: EventBase) -> None: # If the join is being authorised via allow rules, we need to send # the /send_join back to the same server that was originally used # with /make_join. - if "join_authorised_via_users_server" in pdu.content: + if EventContentFields.AUTHORISING_USER in pdu.content: destinations = [ - get_domain_from_id(pdu.content["join_authorised_via_users_server"]) + get_domain_from_id(pdu.content[EventContentFields.AUTHORISING_USER]) ] return await self._try_destination_list( diff --git a/synapse/federation/federation_server.py b/synapse/federation/federation_server.py index 638959cbe..5f4383eeb 100644 --- a/synapse/federation/federation_server.py +++ b/synapse/federation/federation_server.py @@ -34,7 +34,7 @@ from twisted.internet.abstract import isIPAddress from twisted.python import failure -from synapse.api.constants import EduTypes, EventTypes, Membership +from synapse.api.constants import EduTypes, EventContentFields, EventTypes, Membership from synapse.api.errors import ( AuthError, Codes, @@ -765,11 +765,11 @@ async def _on_send_membership_event( if ( room_version.msc3083_join_rules and event.membership == Membership.JOIN - and "join_authorised_via_users_server" in event.content + and EventContentFields.AUTHORISING_USER in event.content ): # We can only authorise our own users. authorising_server = get_domain_from_id( - event.content["join_authorised_via_users_server"] + event.content[EventContentFields.AUTHORISING_USER] ) if authorising_server != self.server_name: raise SynapseError( diff --git a/synapse/handlers/federation.py b/synapse/handlers/federation.py index b17ef2a9a..adbd150e4 100644 --- a/synapse/handlers/federation.py +++ b/synapse/handlers/federation.py @@ -27,7 +27,12 @@ from twisted.internet import defer from synapse import event_auth -from synapse.api.constants import EventTypes, Membership, RejectedReason +from synapse.api.constants import ( + EventContentFields, + EventTypes, + Membership, + RejectedReason, +) from synapse.api.errors import ( AuthError, CodeMessageException, @@ -712,7 +717,7 @@ async def on_make_join_request( if include_auth_user_id: event_content[ - "join_authorised_via_users_server" + EventContentFields.AUTHORISING_USER ] = await self._event_auth_handler.get_user_which_could_invite( room_id, state_ids, diff --git a/synapse/handlers/room_member.py b/synapse/handlers/room_member.py index 1a56c82fb..afa7e4727 100644 --- a/synapse/handlers/room_member.py +++ b/synapse/handlers/room_member.py @@ -573,6 +573,14 @@ async def update_membership_locked( errcode=Codes.BAD_JSON, ) + # The event content should *not* include the authorising user as + # it won't be properly signed. Strip it out since it might come + # back from a client updating a display name / avatar. + # + # This only applies to restricted rooms, but there should be no reason + # for a client to include it. Unconditionally remove it. + content.pop(EventContentFields.AUTHORISING_USER, None) + effective_membership_state = action if action in ["kick", "unban"]: effective_membership_state = "leave" @@ -939,7 +947,7 @@ async def _should_perform_remote_join( # be included in the event content in order to efficiently validate # the event. content[ - "join_authorised_via_users_server" + EventContentFields.AUTHORISING_USER ] = await self.event_auth_handler.get_user_which_could_invite( room_id, current_state_ids, diff --git a/tests/events/test_utils.py b/tests/events/test_utils.py index 5446fda5e..1dea09e48 100644 --- a/tests/events/test_utils.py +++ b/tests/events/test_utils.py @@ -12,6 +12,7 @@ # See the License for the specific language governing permissions and # limitations under the License. +from synapse.api.constants import EventContentFields from synapse.api.room_versions import RoomVersions from synapse.events import make_event_from_dict from synapse.events.utils import ( @@ -352,7 +353,7 @@ def test_member(self): "event_id": "$test:domain", "content": { "membership": "join", - "join_authorised_via_users_server": "@user:domain", + EventContentFields.AUTHORISING_USER: "@user:domain", "other_key": "stripped", }, }, @@ -372,7 +373,7 @@ def test_member(self): "type": "m.room.member", "content": { "membership": "join", - "join_authorised_via_users_server": "@user:domain", + EventContentFields.AUTHORISING_USER: "@user:domain", "other_key": "stripped", }, }, @@ -380,7 +381,7 @@ def test_member(self): "type": "m.room.member", "content": { "membership": "join", - "join_authorised_via_users_server": "@user:domain", + EventContentFields.AUTHORISING_USER: "@user:domain", }, "signatures": {}, "unsigned": {}, diff --git a/tests/test_event_auth.py b/tests/test_event_auth.py index 6ebd01bcb..1a4d07878 100644 --- a/tests/test_event_auth.py +++ b/tests/test_event_auth.py @@ -16,6 +16,7 @@ from typing import Optional from synapse import event_auth +from synapse.api.constants import EventContentFields from synapse.api.errors import AuthError from synapse.api.room_versions import RoomVersions from synapse.events import EventBase, make_event_from_dict @@ -380,7 +381,7 @@ def test_join_rules_msc3083_restricted(self): authorised_join_event = _join_event( pleb, additional_content={ - "join_authorised_via_users_server": "@creator:example.com" + EventContentFields.AUTHORISING_USER: "@creator:example.com" }, ) event_auth.check( @@ -404,7 +405,7 @@ def test_join_rules_msc3083_restricted(self): _join_event( pleb, additional_content={ - "join_authorised_via_users_server": "@inviter:foo.test" + EventContentFields.AUTHORISING_USER: "@inviter:foo.test" }, ), pl_auth_events, @@ -431,7 +432,7 @@ def test_join_rules_msc3083_restricted(self): _join_event( pleb, additional_content={ - "join_authorised_via_users_server": "@other:example.com" + EventContentFields.AUTHORISING_USER: "@other:example.com" }, ), auth_events, @@ -448,7 +449,7 @@ def test_join_rules_msc3083_restricted(self): "join", sender=creator, additional_content={ - "join_authorised_via_users_server": "@inviter:foo.test" + EventContentFields.AUTHORISING_USER: "@inviter:foo.test" }, ), auth_events, From d1cbad388fc42d483e0e3b107620852f359d2cc8 Mon Sep 17 00:00:00 2001 From: Sean Quah <8349537+squahtx@users.noreply.github.com> Date: Fri, 1 Oct 2021 17:22:13 +0100 Subject: [PATCH 72/74] Fix error in `get_user_ip_and_agents` when fetching from the database (#10968) --- changelog.d/10968.bugfix | 1 + synapse/storage/databases/main/client_ips.py | 4 +-- tests/storage/test_client_ips.py | 34 ++++++++++++++++++++ 3 files changed, 37 insertions(+), 2 deletions(-) create mode 100644 changelog.d/10968.bugfix diff --git a/changelog.d/10968.bugfix b/changelog.d/10968.bugfix new file mode 100644 index 000000000..76624ed73 --- /dev/null +++ b/changelog.d/10968.bugfix @@ -0,0 +1 @@ +Fix `/admin/whois/{user_id}` endpoint, which was broken in v1.44.0rc1. diff --git a/synapse/storage/databases/main/client_ips.py b/synapse/storage/databases/main/client_ips.py index 7e33ae578..cc192f5c8 100644 --- a/synapse/storage/databases/main/client_ips.py +++ b/synapse/storage/databases/main/client_ips.py @@ -591,8 +591,8 @@ def get_recent(txn): ) results.update( - ((row["access_token"], row["ip"]), (row["user_agent"], row["last_seen"])) - for row in rows + ((access_token, ip), (user_agent, last_seen)) + for access_token, ip, user_agent, last_seen in rows ) return [ { diff --git a/tests/storage/test_client_ips.py b/tests/storage/test_client_ips.py index 1c2df54ec..3cc8038f1 100644 --- a/tests/storage/test_client_ips.py +++ b/tests/storage/test_client_ips.py @@ -15,9 +15,12 @@ from unittest.mock import Mock +from parameterized import parameterized + import synapse.rest.admin from synapse.http.site import XForwardedForRequest from synapse.rest.client import login +from synapse.types import UserID from tests import unittest from tests.server import make_request @@ -143,6 +146,37 @@ def test_insert_new_client_ip_none_device_id(self): ], ) + @parameterized.expand([(False,), (True,)]) + def test_get_user_ip_and_agents(self, after_persisting: bool): + """Test `get_user_ip_and_agents` for persisted and unpersisted data""" + self.reactor.advance(12345678) + + user_id = "@user:id" + user = UserID.from_string(user_id) + + # Insert a user IP + self.get_success( + self.store.insert_client_ip( + user_id, "access_token", "ip", "user_agent", "MY_DEVICE" + ) + ) + + if after_persisting: + # Trigger the storage loop + self.reactor.advance(10) + + self.assertEqual( + self.get_success(self.store.get_user_ip_and_agents(user)), + [ + { + "access_token": "access_token", + "ip": "ip", + "user_agent": "user_agent", + "last_seen": 12345678000, + }, + ], + ) + @override_config({"limit_usage_by_mau": False, "max_mau_value": 50}) def test_disabled_monthly_active_user(self): user_id = "@user:server" From 2d2c6a41fe69d4dab82a773bbffc52df95b6b542 Mon Sep 17 00:00:00 2001 From: Brendan Abolivier <babolivier@matrix.org> Date: Mon, 4 Oct 2021 14:57:40 +0100 Subject: [PATCH 73/74] 1.44.0rc3 --- CHANGES.md | 10 ++++++++++ changelog.d/10933.bugfix | 1 - changelog.d/10968.bugfix | 1 - debian/changelog | 6 ++++++ synapse/__init__.py | 2 +- 5 files changed, 17 insertions(+), 3 deletions(-) delete mode 100644 changelog.d/10933.bugfix delete mode 100644 changelog.d/10968.bugfix diff --git a/CHANGES.md b/CHANGES.md index 59ff96763..6c2728d40 100644 --- a/CHANGES.md +++ b/CHANGES.md @@ -1,3 +1,13 @@ +Synapse 1.44.0rc3 (2021-10-04) +============================== + +Bugfixes +-------- + +- Fix a bug introduced in Synapse v1.40.0 where changing a user's display name or avatar in a restricted room would cause an authentication error. ([\#10933](https://github.com/matrix-org/synapse/issues/10933)) +- Fix `/admin/whois/{user_id}` endpoint, which was broken in v1.44.0rc1. ([\#10968](https://github.com/matrix-org/synapse/issues/10968)) + + Synapse 1.44.0rc2 (2021-09-30) ============================== diff --git a/changelog.d/10933.bugfix b/changelog.d/10933.bugfix deleted file mode 100644 index e0694fea2..000000000 --- a/changelog.d/10933.bugfix +++ /dev/null @@ -1 +0,0 @@ -Fix a bug introduced in Synapse v1.40.0 where changing a user's display name or avatar in a restricted room would cause an authentication error. diff --git a/changelog.d/10968.bugfix b/changelog.d/10968.bugfix deleted file mode 100644 index 76624ed73..000000000 --- a/changelog.d/10968.bugfix +++ /dev/null @@ -1 +0,0 @@ -Fix `/admin/whois/{user_id}` endpoint, which was broken in v1.44.0rc1. diff --git a/debian/changelog b/debian/changelog index b08a59278..a0f1bcbdf 100644 --- a/debian/changelog +++ b/debian/changelog @@ -1,3 +1,9 @@ +matrix-synapse-py3 (1.44.0~rc3) stable; urgency=medium + + * New synapse release 1.44.0~rc3. + + -- Synapse Packaging team <packages@matrix.org> Mon, 04 Oct 2021 14:57:22 +0100 + matrix-synapse-py3 (1.44.0~rc2) stable; urgency=medium * New synapse release 1.44.0~rc2. diff --git a/synapse/__init__.py b/synapse/__init__.py index 8791c20e2..a9a7b658b 100644 --- a/synapse/__init__.py +++ b/synapse/__init__.py @@ -47,7 +47,7 @@ except ImportError: pass -__version__ = "1.44.0rc2" +__version__ = "1.44.0rc3" if bool(os.environ.get("SYNAPSE_TEST_PATCH_LOG_CONTEXTS", False)): # We import here so that we don't have to install a bunch of deps when From b2c5e79291b9f93cdb39c9a6f7de50e62f45e64e Mon Sep 17 00:00:00 2001 From: Brendan Abolivier <babolivier@matrix.org> Date: Tue, 5 Oct 2021 13:45:24 +0100 Subject: [PATCH 74/74] 1.44.0 --- CHANGES.md | 6 ++++++ debian/changelog | 6 ++++++ synapse/__init__.py | 2 +- 3 files changed, 13 insertions(+), 1 deletion(-) diff --git a/CHANGES.md b/CHANGES.md index 6c2728d40..3f048ba88 100644 --- a/CHANGES.md +++ b/CHANGES.md @@ -1,3 +1,9 @@ +Synapse 1.44.0 (2021-10-05) +=========================== + +No significant changes since 1.44.0rc3. + + Synapse 1.44.0rc3 (2021-10-04) ============================== diff --git a/debian/changelog b/debian/changelog index a0f1bcbdf..9e878fbc2 100644 --- a/debian/changelog +++ b/debian/changelog @@ -1,3 +1,9 @@ +matrix-synapse-py3 (1.44.0) stable; urgency=medium + + * New synapse release 1.44.0. + + -- Synapse Packaging team <packages@matrix.org> Tue, 05 Oct 2021 13:43:57 +0100 + matrix-synapse-py3 (1.44.0~rc3) stable; urgency=medium * New synapse release 1.44.0~rc3. diff --git a/synapse/__init__.py b/synapse/__init__.py index a9a7b658b..b8979c365 100644 --- a/synapse/__init__.py +++ b/synapse/__init__.py @@ -47,7 +47,7 @@ except ImportError: pass -__version__ = "1.44.0rc3" +__version__ = "1.44.0" if bool(os.environ.get("SYNAPSE_TEST_PATCH_LOG_CONTEXTS", False)): # We import here so that we don't have to install a bunch of deps when