From 4cb7d09746bea64edb8bfa6cd9a43c5f3a272d63 Mon Sep 17 00:00:00 2001 From: Eric Eastwood Date: Fri, 8 Oct 2021 01:54:31 -0500 Subject: [PATCH 1/8] Fix 500 error on /messages when we accumulate more than 5 backwards extremeties Found while working on the Gitter backfill script and noticed it only happened after we sent 7 batches, https://gitlab.com/gitterHQ/webapp/-/merge_requests/2229#note_665906390 When there are more than 5 backwards extremities for a given depth, backfill will throw an error because we sliced the extremity list to 5 but then try to iterate over the full list. This causes us to look for state that we never fetched and we get a KeyError. When calling `/messages`: ``` Traceback (most recent call last): File "/usr/local/lib/python3.8/site-packages/synapse/http/server.py", line 258, in _async_render_wrapper callback_return = await self._async_render(request) File "/usr/local/lib/python3.8/site-packages/synapse/http/server.py", line 446, in _async_render callback_return = await raw_callback_return File "/usr/local/lib/python3.8/site-packages/synapse/rest/client/room.py", line 580, in on_GET msgs = await self.pagination_handler.get_messages( File "/usr/local/lib/python3.8/site-packages/synapse/handlers/pagination.py", line 396, in get_messages await self.hs.get_federation_handler().maybe_backfill( File "/usr/local/lib/python3.8/site-packages/synapse/handlers/federation.py", line 133, in maybe_backfill return await self._maybe_backfill_inner(room_id, current_depth, limit) File "/usr/local/lib/python3.8/site-packages/synapse/handlers/federation.py", line 386, in _maybe_backfill_inner likely_extremeties_domains = get_domains_from_state(states[e_id]) KeyError: '$zpFflMEBtZdgcMQWTakaVItTLMjLFdKcRWUPHbbSZJl' ``` --- synapse/handlers/federation.py | 34 ++++++++++--------- synapse/handlers/pagination.py | 1 + .../storage/databases/main/user_directory.py | 4 +++ 3 files changed, 23 insertions(+), 16 deletions(-) diff --git a/synapse/handlers/federation.py b/synapse/handlers/federation.py index 043ca4a224b8..9309c85de034 100644 --- a/synapse/handlers/federation.py +++ b/synapse/handlers/federation.py @@ -150,14 +150,14 @@ async def _maybe_backfill_inner( insertion_events_to_be_backfilled = ( await self.store.get_insertion_event_backwards_extremities_in_room(room_id) ) - logger.debug( + logger.info( "_maybe_backfill_inner: extremities oldest_events_with_depth=%s insertion_events_to_be_backfilled=%s", oldest_events_with_depth, insertion_events_to_be_backfilled, ) if not oldest_events_with_depth and not insertion_events_to_be_backfilled: - logger.debug("Not backfilling as no extremeties found.") + logger.info("Not backfilling as no extremeties found.") return False # We only want to paginate if we can actually see the events we'll get, @@ -205,7 +205,7 @@ async def _maybe_backfill_inner( redact=False, check_history_visibility_only=True, ) - logger.debug( + logger.info( "_maybe_backfill_inner: filtered_extremities %s", filtered_extremities ) @@ -232,7 +232,7 @@ async def _maybe_backfill_inner( # much larger factor will result in triggering a backfill request much # earlier than necessary. if current_depth - 2 * limit > max_depth: - logger.debug( + logger.info( "Not backfilling as we don't need to. %d < %d - 2 * %d", max_depth, current_depth, @@ -240,18 +240,10 @@ async def _maybe_backfill_inner( ) return False - logger.debug( - "room_id: %s, backfill: current_depth: %s, max_depth: %s, extrems: %s", - room_id, - current_depth, - max_depth, - sorted_extremeties_tuple, - ) - # We ignore extremities that have a greater depth than our current depth # as: # 1. we don't really care about getting events that have happened - # before our current position; and + # after our current position; and # 2. we have likely previously tried and failed to backfill from that # extremity, so to avoid getting "stuck" requesting the same # backfill repeatedly we drop those extremities. @@ -259,9 +251,19 @@ async def _maybe_backfill_inner( t for t in sorted_extremeties_tuple if int(t[1]) <= current_depth ] + logger.info( + "room_id: %s, backfill: current_depth: %s, limit: %s, max_depth: %s, extrems: %s filtered_sorted_extremeties_tuple: %s", + room_id, + current_depth, + limit, + max_depth, + sorted_extremeties_tuple, + filtered_sorted_extremeties_tuple, + ) + # However, we need to check that the filtered extremities are non-empty. # If they are empty then either we can a) bail or b) still attempt to - # backill. We opt to try backfilling anyway just in case we do get + # backfill. We opt to try backfilling anyway just in case we do get # relevant events. if filtered_sorted_extremeties_tuple: sorted_extremeties_tuple = filtered_sorted_extremeties_tuple @@ -318,7 +320,7 @@ async def try_backfill(domains: List[str]) -> bool: for dom in domains: try: await self._federation_event_handler.backfill( - dom, room_id, limit=100, extremities=extremities + dom, room_id, limit=100, extremities=extremities.keys() ) # If this succeeded then we probably already have the # appropriate stuff. @@ -391,7 +393,7 @@ async def try_backfill(domains: List[str]) -> bool: for key, state_dict in states.items() } - for e_id, _ in sorted_extremeties_tuple: + for e_id in event_ids: likely_extremeties_domains = get_domains_from_state(states[e_id]) success = await try_backfill( diff --git a/synapse/handlers/pagination.py b/synapse/handlers/pagination.py index 176e4dfdd432..b84865d574ab 100644 --- a/synapse/handlers/pagination.py +++ b/synapse/handlers/pagination.py @@ -378,6 +378,7 @@ async def get_messages( curr_topo = await self.store.get_current_topological_token( room_id, room_token.stream ) + logger.info("message pagination curr_topo=%s", curr_topo) if membership == Membership.LEAVE: # If they have left the room then clamp the token to be before diff --git a/synapse/storage/databases/main/user_directory.py b/synapse/storage/databases/main/user_directory.py index 5c713a732ee9..7957af19bfe4 100644 --- a/synapse/storage/databases/main/user_directory.py +++ b/synapse/storage/databases/main/user_directory.py @@ -383,6 +383,10 @@ async def should_include_local_user_in_dir(self, user: str) -> bool: """Certain classes of local user are omitted from the user directory. Is this user one of them? """ + # The main app service sender isn't usually contactable, so exclude them + if self.get_app_service_by_user_id(user) is not None: + return False + # App service users aren't usually contactable, so exclude them. if self.get_if_app_services_interested_in_user(user): # TODO we might want to make this configurable for each app service From 94b9efbd7a465390fc8872998f4648dc36ed969e Mon Sep 17 00:00:00 2001 From: Eric Eastwood Date: Fri, 8 Oct 2021 02:27:27 -0500 Subject: [PATCH 2/8] Remove debug logging --- synapse/handlers/federation.py | 8 ++++---- synapse/handlers/pagination.py | 1 - synapse/storage/databases/main/user_directory.py | 3 --- 3 files changed, 4 insertions(+), 8 deletions(-) diff --git a/synapse/handlers/federation.py b/synapse/handlers/federation.py index 9309c85de034..d9eebc77b64b 100644 --- a/synapse/handlers/federation.py +++ b/synapse/handlers/federation.py @@ -150,7 +150,7 @@ async def _maybe_backfill_inner( insertion_events_to_be_backfilled = ( await self.store.get_insertion_event_backwards_extremities_in_room(room_id) ) - logger.info( + logger.debug( "_maybe_backfill_inner: extremities oldest_events_with_depth=%s insertion_events_to_be_backfilled=%s", oldest_events_with_depth, insertion_events_to_be_backfilled, @@ -205,7 +205,7 @@ async def _maybe_backfill_inner( redact=False, check_history_visibility_only=True, ) - logger.info( + logger.debug( "_maybe_backfill_inner: filtered_extremities %s", filtered_extremities ) @@ -232,7 +232,7 @@ async def _maybe_backfill_inner( # much larger factor will result in triggering a backfill request much # earlier than necessary. if current_depth - 2 * limit > max_depth: - logger.info( + logger.debug( "Not backfilling as we don't need to. %d < %d - 2 * %d", max_depth, current_depth, @@ -251,7 +251,7 @@ async def _maybe_backfill_inner( t for t in sorted_extremeties_tuple if int(t[1]) <= current_depth ] - logger.info( + logger.debug( "room_id: %s, backfill: current_depth: %s, limit: %s, max_depth: %s, extrems: %s filtered_sorted_extremeties_tuple: %s", room_id, current_depth, diff --git a/synapse/handlers/pagination.py b/synapse/handlers/pagination.py index b84865d574ab..176e4dfdd432 100644 --- a/synapse/handlers/pagination.py +++ b/synapse/handlers/pagination.py @@ -378,7 +378,6 @@ async def get_messages( curr_topo = await self.store.get_current_topological_token( room_id, room_token.stream ) - logger.info("message pagination curr_topo=%s", curr_topo) if membership == Membership.LEAVE: # If they have left the room then clamp the token to be before diff --git a/synapse/storage/databases/main/user_directory.py b/synapse/storage/databases/main/user_directory.py index 7957af19bfe4..c46b91d2f722 100644 --- a/synapse/storage/databases/main/user_directory.py +++ b/synapse/storage/databases/main/user_directory.py @@ -383,9 +383,6 @@ async def should_include_local_user_in_dir(self, user: str) -> bool: """Certain classes of local user are omitted from the user directory. Is this user one of them? """ - # The main app service sender isn't usually contactable, so exclude them - if self.get_app_service_by_user_id(user) is not None: - return False # App service users aren't usually contactable, so exclude them. if self.get_if_app_services_interested_in_user(user): From 7364b81b1785d77fbc64b3731be3ea0953a9adb4 Mon Sep 17 00:00:00 2001 From: Eric Eastwood Date: Fri, 8 Oct 2021 02:30:43 -0500 Subject: [PATCH 3/8] Add changelog --- changelog.d/11027.bugfix | 1 + 1 file changed, 1 insertion(+) create mode 100644 changelog.d/11027.bugfix diff --git a/changelog.d/11027.bugfix b/changelog.d/11027.bugfix new file mode 100644 index 000000000000..ae6cc444709b --- /dev/null +++ b/changelog.d/11027.bugfix @@ -0,0 +1 @@ +Fix 500 error on `/messages` when the server accumulates more than 5 backwards extremities at a given depth for a room. From 0ceaf254f7b4392b397720b1c5fc3a56c8c6a6b8 Mon Sep 17 00:00:00 2001 From: Eric Eastwood Date: Fri, 8 Oct 2021 02:33:34 -0500 Subject: [PATCH 4/8] Fix up leftover debug logging and lint --- synapse/handlers/federation.py | 2 +- synapse/storage/databases/main/user_directory.py | 1 - 2 files changed, 1 insertion(+), 2 deletions(-) diff --git a/synapse/handlers/federation.py b/synapse/handlers/federation.py index d9eebc77b64b..92ae412d568d 100644 --- a/synapse/handlers/federation.py +++ b/synapse/handlers/federation.py @@ -157,7 +157,7 @@ async def _maybe_backfill_inner( ) if not oldest_events_with_depth and not insertion_events_to_be_backfilled: - logger.info("Not backfilling as no extremeties found.") + logger.debug("Not backfilling as no extremeties found.") return False # We only want to paginate if we can actually see the events we'll get, diff --git a/synapse/storage/databases/main/user_directory.py b/synapse/storage/databases/main/user_directory.py index c46b91d2f722..5c713a732ee9 100644 --- a/synapse/storage/databases/main/user_directory.py +++ b/synapse/storage/databases/main/user_directory.py @@ -383,7 +383,6 @@ async def should_include_local_user_in_dir(self, user: str) -> bool: """Certain classes of local user are omitted from the user directory. Is this user one of them? """ - # App service users aren't usually contactable, so exclude them. if self.get_if_app_services_interested_in_user(user): # TODO we might want to make this configurable for each app service From 694ca81ee255680179bb469ec3f83ce72c3457f0 Mon Sep 17 00:00:00 2001 From: Eric Eastwood Date: Fri, 8 Oct 2021 03:49:02 -0500 Subject: [PATCH 5/8] Add test --- tests/handlers/test_federation.py | 64 +++++++++++++++++++++++++++++++ 1 file changed, 64 insertions(+) diff --git a/tests/handlers/test_federation.py b/tests/handlers/test_federation.py index 936ebf3dde36..a5b9f56024c2 100644 --- a/tests/handlers/test_federation.py +++ b/tests/handlers/test_federation.py @@ -24,12 +24,17 @@ from synapse.rest import admin from synapse.rest.client import login, room from synapse.util.stringutils import random_string +from synapse.types import create_requester from tests import unittest logger = logging.getLogger(__name__) +def generate_fake_event_id() -> str: + return "$fake_" + random_string(43) + + class FederationTestCase(unittest.HomeserverTestCase): servlets = [ admin.register_servlets, @@ -198,6 +203,65 @@ def test_rejected_state_event_state(self): self.assertEqual(sg, sg2) + def test_backfill_with_many_backward_extremities(self): + """ + Check that we can backfill with many backward extremities. + The goal is to make sure that when we only use a portion + of backwards extremities(the magic number is more than 5), + no errors are thrown. + + Regression test, see #11027 + """ + # create the room + user_id = self.register_user("kermit", "test") + tok = self.login("kermit", "test") + requester = create_requester(user_id) + + room_id = self.helper.create_room_as(room_creator=user_id, tok=tok) + + ev1 = self.helper.send(room_id, "first message", tok=tok) + + # Create "many" backward extremities. The magic number we're trying to + # create more than is 5 which corresponds to the number of backward + # extremities we slice off in `_maybe_backfill_inner` + for i in range(0, 8): + event_handler = self.hs.get_event_creation_handler() + event, context = self.get_success( + event_handler.create_event( + requester, + { + "type": "m.room.message", + "content": { + "msgtype": "m.text", + "body": "message connected to fake event", + }, + "room_id": room_id, + "sender": user_id, + }, + prev_event_ids=[ + ev1["event_id"], + # We're creating an backward extremity each time thanks + # to this fake event + generate_fake_event_id(), + ], + ) + ) + self.get_success( + event_handler.handle_new_client_event(requester, event, context) + ) + + current_depth = 1 + limit = 100 + with LoggingContext("receive_pdu"): + # Make sure backfill still works + d = run_in_background( + self.hs.get_federation_handler().maybe_backfill, + room_id, + current_depth, + limit, + ) + self.get_success(d) + def test_backfill_floating_outlier_membership_auth(self): """ As the local homeserver, check that we can properly process a federated From 4e75ea3ab0f37a714b29766efbbb0868be15b386 Mon Sep 17 00:00:00 2001 From: Eric Eastwood Date: Fri, 8 Oct 2021 03:50:57 -0500 Subject: [PATCH 6/8] Fix lint --- tests/handlers/test_federation.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/handlers/test_federation.py b/tests/handlers/test_federation.py index a5b9f56024c2..1bd8d514b46a 100644 --- a/tests/handlers/test_federation.py +++ b/tests/handlers/test_federation.py @@ -23,8 +23,8 @@ from synapse.logging.context import LoggingContext, run_in_background from synapse.rest import admin from synapse.rest.client import login, room -from synapse.util.stringutils import random_string from synapse.types import create_requester +from synapse.util.stringutils import random_string from tests import unittest From 85748bdb24b9b6ea2b85323a0dd80a8a5f4fee5a Mon Sep 17 00:00:00 2001 From: Eric Eastwood Date: Fri, 8 Oct 2021 03:55:14 -0500 Subject: [PATCH 7/8] Fix lint from CI --- tests/handlers/test_federation.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/handlers/test_federation.py b/tests/handlers/test_federation.py index 1bd8d514b46a..e1557566e4bc 100644 --- a/tests/handlers/test_federation.py +++ b/tests/handlers/test_federation.py @@ -224,7 +224,7 @@ def test_backfill_with_many_backward_extremities(self): # Create "many" backward extremities. The magic number we're trying to # create more than is 5 which corresponds to the number of backward # extremities we slice off in `_maybe_backfill_inner` - for i in range(0, 8): + for _ in range(0, 8): event_handler = self.hs.get_event_creation_handler() event, context = self.get_success( event_handler.create_event( From 391bc7354e365b25185e00563aa0055ded013b9a Mon Sep 17 00:00:00 2001 From: Eric Eastwood Date: Thu, 14 Oct 2021 12:04:33 -0500 Subject: [PATCH 8/8] Revert code change and opt to change type hint See https://github.com/matrix-org/synapse/pull/11027#discussion_r724753918 Dict[str, Any] will behave as an Iterable[str] when iterating on it --- synapse/handlers/federation.py | 2 +- synapse/handlers/federation_event.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/synapse/handlers/federation.py b/synapse/handlers/federation.py index e76237572da9..e072efad161f 100644 --- a/synapse/handlers/federation.py +++ b/synapse/handlers/federation.py @@ -318,7 +318,7 @@ async def try_backfill(domains: List[str]) -> bool: for dom in domains: try: await self._federation_event_handler.backfill( - dom, room_id, limit=100, extremities=extremities.keys() + dom, room_id, limit=100, extremities=extremities ) # If this succeeded then we probably already have the # appropriate stuff. diff --git a/synapse/handlers/federation_event.py b/synapse/handlers/federation_event.py index f640b417b39a..0e455678aaf4 100644 --- a/synapse/handlers/federation_event.py +++ b/synapse/handlers/federation_event.py @@ -392,7 +392,7 @@ async def check_join_restrictions( @log_function async def backfill( - self, dest: str, room_id: str, limit: int, extremities: List[str] + self, dest: str, room_id: str, limit: int, extremities: Iterable[str] ) -> None: """Trigger a backfill request to `dest` for the given `room_id`