From e8a96bcc399dc84b44251d64334e68749c93844c Mon Sep 17 00:00:00 2001 From: Patrick Cloke Date: Wed, 25 Jan 2023 15:33:01 -0500 Subject: [PATCH 01/14] Batch look-ups to see if rooms are partial stated. --- changelog.d/14917.misc | 1 + synapse/handlers/sync.py | 12 ++++++------ synapse/storage/databases/main/room.py | 23 +++++++++++++++++++++-- 3 files changed, 28 insertions(+), 8 deletions(-) create mode 100644 changelog.d/14917.misc diff --git a/changelog.d/14917.misc b/changelog.d/14917.misc new file mode 100644 index 000000000000..8a248495b216 --- /dev/null +++ b/changelog.d/14917.misc @@ -0,0 +1 @@ +Improve performance of looking up partial-state status. diff --git a/synapse/handlers/sync.py b/synapse/handlers/sync.py index ee117645673f..396deb39ce78 100644 --- a/synapse/handlers/sync.py +++ b/synapse/handlers/sync.py @@ -1383,9 +1383,9 @@ async def generate_sync_result( if not sync_config.filter_collection.lazy_load_members(): # Non-lazy syncs should never include partially stated rooms. # Exclude all partially stated rooms from this sync. - for room_id in mutable_joined_room_ids: - if await self.store.is_partial_state_room(room_id): - mutable_rooms_to_exclude.add(room_id) + results = await self.store.is_partial_state_rooms(mutable_joined_room_ids) + for room_id, result in results.items(): + mutable_rooms_to_exclude.add(room_id) # Incremental eager syncs should additionally include rooms that # - we are joined to @@ -1401,9 +1401,9 @@ async def generate_sync_result( mutable_joined_room_ids, ) ) - for room_id in un_partial_stated_rooms: - if not await self.store.is_partial_state_room(room_id): - forced_newly_joined_room_ids.add(room_id) + results = await self.store.is_partial_state_rooms(un_partial_stated_rooms) + for room_id, result in results.items(): + forced_newly_joined_room_ids.add(room_id) # Now we have our list of joined room IDs, exclude as configured and freeze joined_room_ids = frozenset( diff --git a/synapse/storage/databases/main/room.py b/synapse/storage/databases/main/room.py index 3aa7b945606e..aa7e2d6e5506 100644 --- a/synapse/storage/databases/main/room.py +++ b/synapse/storage/databases/main/room.py @@ -60,9 +60,9 @@ MultiWriterIdGenerator, StreamIdGenerator, ) -from synapse.types import JsonDict, RetentionPolicy, ThirdPartyInstanceID +from synapse.types import JsonDict, RetentionPolicy, StrCollection, ThirdPartyInstanceID from synapse.util import json_encoder -from synapse.util.caches.descriptors import cached +from synapse.util.caches.descriptors import cached, cachedList from synapse.util.stringutils import MXC_REGEX if TYPE_CHECKING: @@ -1274,6 +1274,25 @@ async def is_partial_state_room(self, room_id: str) -> bool: return entry is not None + @cachedList(cached_method_name="is_partial_State_room", list_name="room_ids") + async def is_partial_state_rooms(self, room_ids: StrCollection) -> Mapping[str, bool]: + """Checks if this room has partial state. + + Returns true if this is a "partial-state" room, which means that the state + at events in the room, and `current_state_events`, may not yet be + complete. + """ + + entries = set(await self.db_pool.simple_select_many_batch( + table="partial_state_rooms", + column="room_id", + iterable=room_ids, + retcols=("room_id",), + desc="is_partial_state_room", + )) + + return {room_id: room_id in entries for room_id in room_ids} + async def get_join_event_id_and_device_lists_stream_id_for_partial_state( self, room_id: str ) -> Tuple[str, int]: From 16487c9b9623a6a67b7edc3a832e4e39095aff47 Mon Sep 17 00:00:00 2001 From: Patrick Cloke Date: Wed, 25 Jan 2023 15:39:39 -0500 Subject: [PATCH 02/14] Fix issues found in linting. --- synapse/handlers/sync.py | 16 +++++++++++----- synapse/storage/databases/main/room.py | 20 ++++++++++++-------- 2 files changed, 23 insertions(+), 13 deletions(-) diff --git a/synapse/handlers/sync.py b/synapse/handlers/sync.py index 396deb39ce78..32f4ed228a05 100644 --- a/synapse/handlers/sync.py +++ b/synapse/handlers/sync.py @@ -1384,15 +1384,18 @@ async def generate_sync_result( # Non-lazy syncs should never include partially stated rooms. # Exclude all partially stated rooms from this sync. results = await self.store.is_partial_state_rooms(mutable_joined_room_ids) - for room_id, result in results.items(): - mutable_rooms_to_exclude.add(room_id) + mutable_rooms_to_exclude.update( + room_id + for room_id, is_partial_state in results.items() + if is_partial_state + ) # Incremental eager syncs should additionally include rooms that # - we are joined to # - are full-stated # - became fully-stated at some point during the sync period # (These rooms will have been omitted during a previous eager sync.) - forced_newly_joined_room_ids = set() + forced_newly_joined_room_ids: Set[str] = set() if since_token and not sync_config.filter_collection.lazy_load_members(): un_partial_stated_rooms = ( await self.store.get_un_partial_stated_rooms_between( @@ -1402,8 +1405,11 @@ async def generate_sync_result( ) ) results = await self.store.is_partial_state_rooms(un_partial_stated_rooms) - for room_id, result in results.items(): - forced_newly_joined_room_ids.add(room_id) + forced_newly_joined_room_ids.update( + room_id + for room_id, is_partial_state in results.items() + if is_partial_state + ) # Now we have our list of joined room IDs, exclude as configured and freeze joined_room_ids = frozenset( diff --git a/synapse/storage/databases/main/room.py b/synapse/storage/databases/main/room.py index aa7e2d6e5506..f9c7be1b3e48 100644 --- a/synapse/storage/databases/main/room.py +++ b/synapse/storage/databases/main/room.py @@ -1275,7 +1275,9 @@ async def is_partial_state_room(self, room_id: str) -> bool: return entry is not None @cachedList(cached_method_name="is_partial_State_room", list_name="room_ids") - async def is_partial_state_rooms(self, room_ids: StrCollection) -> Mapping[str, bool]: + async def is_partial_state_rooms( + self, room_ids: StrCollection + ) -> Mapping[str, bool]: """Checks if this room has partial state. Returns true if this is a "partial-state" room, which means that the state @@ -1283,13 +1285,15 @@ async def is_partial_state_rooms(self, room_ids: StrCollection) -> Mapping[str, complete. """ - entries = set(await self.db_pool.simple_select_many_batch( - table="partial_state_rooms", - column="room_id", - iterable=room_ids, - retcols=("room_id",), - desc="is_partial_state_room", - )) + entries = set( + await self.db_pool.simple_select_many_batch( + table="partial_state_rooms", + column="room_id", + iterable=room_ids, + retcols=("room_id",), + desc="is_partial_state_room", + ) + ) return {room_id: room_id in entries for room_id in room_ids} From 9f4a5c1048f225dbd726b9d76ca152c9a3168de3 Mon Sep 17 00:00:00 2001 From: Patrick Cloke Date: Wed, 25 Jan 2023 15:48:12 -0500 Subject: [PATCH 03/14] Fix typo. --- synapse/storage/databases/main/room.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/synapse/storage/databases/main/room.py b/synapse/storage/databases/main/room.py index f9c7be1b3e48..534baab7c810 100644 --- a/synapse/storage/databases/main/room.py +++ b/synapse/storage/databases/main/room.py @@ -1274,7 +1274,7 @@ async def is_partial_state_room(self, room_id: str) -> bool: return entry is not None - @cachedList(cached_method_name="is_partial_State_room", list_name="room_ids") + @cachedList(cached_method_name="is_partial_state_room", list_name="room_ids") async def is_partial_state_rooms( self, room_ids: StrCollection ) -> Mapping[str, bool]: From 528183bb55ff1f6a7c816d930196f3f1a82d3a60 Mon Sep 17 00:00:00 2001 From: Patrick Cloke Date: Wed, 25 Jan 2023 16:44:39 -0500 Subject: [PATCH 04/14] Apply suggestions from code review Co-authored-by: Sean Quah <8349537+squahtx@users.noreply.github.com> --- changelog.d/14917.misc | 2 +- synapse/handlers/sync.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/changelog.d/14917.misc b/changelog.d/14917.misc index 8a248495b216..4d1dd2639a10 100644 --- a/changelog.d/14917.misc +++ b/changelog.d/14917.misc @@ -1 +1 @@ -Improve performance of looking up partial-state status. +Faster joins: Improve performance of looking up partial-state status of rooms. diff --git a/synapse/handlers/sync.py b/synapse/handlers/sync.py index 32f4ed228a05..6095513277f0 100644 --- a/synapse/handlers/sync.py +++ b/synapse/handlers/sync.py @@ -1408,7 +1408,7 @@ async def generate_sync_result( forced_newly_joined_room_ids.update( room_id for room_id, is_partial_state in results.items() - if is_partial_state + if not is_partial_state ) # Now we have our list of joined room IDs, exclude as configured and freeze From 1ce0bd2d7785f06b923a150755e9a1863d20eeb9 Mon Sep 17 00:00:00 2001 From: Patrick Cloke Date: Wed, 25 Jan 2023 16:45:01 -0500 Subject: [PATCH 05/14] Clarify comments. Co-authored-by: Sean Quah <8349537+squahtx@users.noreply.github.com> --- synapse/storage/databases/main/room.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/synapse/storage/databases/main/room.py b/synapse/storage/databases/main/room.py index 534baab7c810..c28eced8d908 100644 --- a/synapse/storage/databases/main/room.py +++ b/synapse/storage/databases/main/room.py @@ -1278,9 +1278,9 @@ async def is_partial_state_room(self, room_id: str) -> bool: async def is_partial_state_rooms( self, room_ids: StrCollection ) -> Mapping[str, bool]: - """Checks if this room has partial state. + """Checks if the given rooms have partial state. - Returns true if this is a "partial-state" room, which means that the state + Returns true for "partial-state" rooms, which means that the state at events in the room, and `current_state_events`, may not yet be complete. """ From 5c1fe0b20e0cf6f1c77fb215ad7d32e438d46cf4 Mon Sep 17 00:00:00 2001 From: David Robertson Date: Thu, 26 Jan 2023 10:56:33 +0000 Subject: [PATCH 06/14] Also improve the cache size while we're at it --- synapse/storage/databases/main/room.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/synapse/storage/databases/main/room.py b/synapse/storage/databases/main/room.py index c28eced8d908..72e29bbb1ba3 100644 --- a/synapse/storage/databases/main/room.py +++ b/synapse/storage/databases/main/room.py @@ -1255,7 +1255,7 @@ async def get_partial_state_room_resync_info( return room_servers - @cached() + @cached(max_entries=10000) async def is_partial_state_room(self, room_id: str) -> bool: """Checks if this room has partial state. From e01b43ad785127a67f7caea125491ea6dd4c29e4 Mon Sep 17 00:00:00 2001 From: David Robertson Date: Thu, 26 Jan 2023 11:00:07 +0000 Subject: [PATCH 07/14] is_partial_state_rooms -> is_partial_state_room_batched --- synapse/handlers/sync.py | 4 ++-- synapse/storage/databases/main/room.py | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/synapse/handlers/sync.py b/synapse/handlers/sync.py index 6095513277f0..0d4240d014c7 100644 --- a/synapse/handlers/sync.py +++ b/synapse/handlers/sync.py @@ -1383,7 +1383,7 @@ async def generate_sync_result( if not sync_config.filter_collection.lazy_load_members(): # Non-lazy syncs should never include partially stated rooms. # Exclude all partially stated rooms from this sync. - results = await self.store.is_partial_state_rooms(mutable_joined_room_ids) + results = await self.store.is_partial_state_room_batched(mutable_joined_room_ids) mutable_rooms_to_exclude.update( room_id for room_id, is_partial_state in results.items() @@ -1404,7 +1404,7 @@ async def generate_sync_result( mutable_joined_room_ids, ) ) - results = await self.store.is_partial_state_rooms(un_partial_stated_rooms) + results = await self.store.is_partial_state_room_batched(un_partial_stated_rooms) forced_newly_joined_room_ids.update( room_id for room_id, is_partial_state in results.items() diff --git a/synapse/storage/databases/main/room.py b/synapse/storage/databases/main/room.py index 72e29bbb1ba3..1babf71cb0b4 100644 --- a/synapse/storage/databases/main/room.py +++ b/synapse/storage/databases/main/room.py @@ -1275,7 +1275,7 @@ async def is_partial_state_room(self, room_id: str) -> bool: return entry is not None @cachedList(cached_method_name="is_partial_state_room", list_name="room_ids") - async def is_partial_state_rooms( + async def is_partial_state_room_batched( self, room_ids: StrCollection ) -> Mapping[str, bool]: """Checks if the given rooms have partial state. From 71c4fd8e0825ff489d24a2f014641d8f4ee5ea7b Mon Sep 17 00:00:00 2001 From: David Robertson Date: Thu, 26 Jan 2023 11:05:58 +0000 Subject: [PATCH 08/14] Run `black` --- synapse/handlers/sync.py | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/synapse/handlers/sync.py b/synapse/handlers/sync.py index 0d4240d014c7..5ebd3ea855c2 100644 --- a/synapse/handlers/sync.py +++ b/synapse/handlers/sync.py @@ -1383,7 +1383,9 @@ async def generate_sync_result( if not sync_config.filter_collection.lazy_load_members(): # Non-lazy syncs should never include partially stated rooms. # Exclude all partially stated rooms from this sync. - results = await self.store.is_partial_state_room_batched(mutable_joined_room_ids) + results = await self.store.is_partial_state_room_batched( + mutable_joined_room_ids + ) mutable_rooms_to_exclude.update( room_id for room_id, is_partial_state in results.items() @@ -1404,7 +1406,9 @@ async def generate_sync_result( mutable_joined_room_ids, ) ) - results = await self.store.is_partial_state_room_batched(un_partial_stated_rooms) + results = await self.store.is_partial_state_room_batched( + un_partial_stated_rooms + ) forced_newly_joined_room_ids.update( room_id for room_id, is_partial_state in results.items() From e0f863ddab6293cfc237c0ae696e2ab8b875226e Mon Sep 17 00:00:00 2001 From: David Robertson Date: Thu, 26 Jan 2023 15:13:49 +0000 Subject: [PATCH 09/14] Improve annotation for `simple_select_many_batch` --- synapse/storage/database.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/synapse/storage/database.py b/synapse/storage/database.py index 88479a16db0c..e20c5c5302fa 100644 --- a/synapse/storage/database.py +++ b/synapse/storage/database.py @@ -1819,7 +1819,7 @@ async def simple_select_many_batch( keyvalues: Optional[Dict[str, Any]] = None, desc: str = "simple_select_many_batch", batch_size: int = 100, - ) -> List[Any]: + ) -> List[Dict[str, Any]]: """Executes a SELECT query on the named table, which may return zero or more rows, returning the result as a list of dicts. From 874f5208559a20363d2f65f8462a9d2a6df60065 Mon Sep 17 00:00:00 2001 From: David Robertson Date: Thu, 26 Jan 2023 15:14:06 +0000 Subject: [PATCH 10/14] Fix is_partial_state_room_batched impl --- synapse/storage/databases/main/room.py | 19 +++++++++---------- 1 file changed, 9 insertions(+), 10 deletions(-) diff --git a/synapse/storage/databases/main/room.py b/synapse/storage/databases/main/room.py index 1babf71cb0b4..aad76ea1904c 100644 --- a/synapse/storage/databases/main/room.py +++ b/synapse/storage/databases/main/room.py @@ -23,6 +23,7 @@ Collection, Dict, List, + Literal, Mapping, Optional, Sequence, @@ -1285,17 +1286,15 @@ async def is_partial_state_room_batched( complete. """ - entries = set( - await self.db_pool.simple_select_many_batch( - table="partial_state_rooms", - column="room_id", - iterable=room_ids, - retcols=("room_id",), - desc="is_partial_state_room", - ) + rows: List[Dict[str, Literal[1]]] = await self.db_pool.simple_select_many_batch( + table="partial_state_rooms", + column="1", + iterable=room_ids, + retcols=("room_id",), + desc="is_partial_state_room", ) - - return {room_id: room_id in entries for room_id in room_ids} + partial_state_rooms = {room_id for row_dict in rows for room_id in row_dict} + return {room_id: room_id in partial_state_rooms for room_id in room_ids} async def get_join_event_id_and_device_lists_stream_id_for_partial_state( self, room_id: str From 86078049ac74ab44939e4bc3e1e8faaa876c25f2 Mon Sep 17 00:00:00 2001 From: David Robertson Date: Thu, 26 Jan 2023 15:23:41 +0000 Subject: [PATCH 11/14] Okay, _actually_ fix impl --- synapse/storage/databases/main/room.py | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/synapse/storage/databases/main/room.py b/synapse/storage/databases/main/room.py index aad76ea1904c..7b3c95b8f057 100644 --- a/synapse/storage/databases/main/room.py +++ b/synapse/storage/databases/main/room.py @@ -23,7 +23,6 @@ Collection, Dict, List, - Literal, Mapping, Optional, Sequence, @@ -1286,14 +1285,16 @@ async def is_partial_state_room_batched( complete. """ - rows: List[Dict[str, Literal[1]]] = await self.db_pool.simple_select_many_batch( + rows: List[Dict[str, str]] = await self.db_pool.simple_select_many_batch( table="partial_state_rooms", - column="1", + column="room_id", iterable=room_ids, retcols=("room_id",), desc="is_partial_state_room", ) - partial_state_rooms = {room_id for row_dict in rows for room_id in row_dict} + partial_state_rooms = { + room_id for row_dict in rows for room_id in row_dict.values() + } return {room_id: room_id in partial_state_rooms for room_id in room_ids} async def get_join_event_id_and_device_lists_stream_id_for_partial_state( From bafea9eee187c30141d8f41b26cd3f5fee4bfa94 Mon Sep 17 00:00:00 2001 From: Patrick Cloke Date: Thu, 26 Jan 2023 10:24:53 -0500 Subject: [PATCH 12/14] Update description. --- synapse/storage/databases/main/room.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/synapse/storage/databases/main/room.py b/synapse/storage/databases/main/room.py index 7b3c95b8f057..9ca5424136b2 100644 --- a/synapse/storage/databases/main/room.py +++ b/synapse/storage/databases/main/room.py @@ -1290,7 +1290,7 @@ async def is_partial_state_room_batched( column="room_id", iterable=room_ids, retcols=("room_id",), - desc="is_partial_state_room", + desc="is_partial_state_room_batched", ) partial_state_rooms = { room_id for row_dict in rows for room_id in row_dict.values() From c0f61d37759574177984b7754ada83cfff272738 Mon Sep 17 00:00:00 2001 From: David Robertson Date: Thu, 26 Jan 2023 15:31:46 +0000 Subject: [PATCH 13/14] Update synapse/storage/databases/main/room.py Co-authored-by: Patrick Cloke --- synapse/storage/databases/main/room.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/synapse/storage/databases/main/room.py b/synapse/storage/databases/main/room.py index 9ca5424136b2..72dee9062558 100644 --- a/synapse/storage/databases/main/room.py +++ b/synapse/storage/databases/main/room.py @@ -1293,7 +1293,7 @@ async def is_partial_state_room_batched( desc="is_partial_state_room_batched", ) partial_state_rooms = { - room_id for row_dict in rows for room_id in row_dict.values() + row_dict["room_id"] for row_dict in rows } return {room_id: room_id in partial_state_rooms for room_id in room_ids} From 1eb1071b956b1354bf8ff541f75b19c5f1655171 Mon Sep 17 00:00:00 2001 From: Patrick Cloke Date: Thu, 26 Jan 2023 10:35:58 -0500 Subject: [PATCH 14/14] Run black. --- synapse/storage/databases/main/room.py | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/synapse/storage/databases/main/room.py b/synapse/storage/databases/main/room.py index 72dee9062558..fbbc01888701 100644 --- a/synapse/storage/databases/main/room.py +++ b/synapse/storage/databases/main/room.py @@ -1292,9 +1292,7 @@ async def is_partial_state_room_batched( retcols=("room_id",), desc="is_partial_state_room_batched", ) - partial_state_rooms = { - row_dict["room_id"] for row_dict in rows - } + partial_state_rooms = {row_dict["room_id"] for row_dict in rows} return {room_id: room_id in partial_state_rooms for room_id in room_ids} async def get_join_event_id_and_device_lists_stream_id_for_partial_state(