Skip to content
This repository has been archived by the owner on Apr 26, 2024. It is now read-only.

Avoid unnecessary work in the spaces summary. #10085

Closed
wants to merge 9 commits into from
59 changes: 38 additions & 21 deletions synapse/handlers/space_summary.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@
import logging
import re
from collections import deque
from typing import TYPE_CHECKING, Iterable, List, Optional, Sequence, Set, Tuple
from typing import TYPE_CHECKING, Dict, Iterable, List, Optional, Sequence, Set, Tuple

import attr

Expand Down Expand Up @@ -89,7 +89,10 @@ async def get_space_summary(
room_queue = deque((_RoomQueueEntry(room_id, ()),))

# rooms we have already processed
processed_rooms = set() # type: Set[str]
processed_rooms: Dict[str, bool] = {}
clokep marked this conversation as resolved.
Show resolved Hide resolved

# events which are pending knowing if a room is accessible.
clokep marked this conversation as resolved.
Show resolved Hide resolved
pending_events: Dict[str, List[JsonDict]] = {}

rooms_result = [] # type: List[JsonDict]
events_result = [] # type: List[JsonDict]
Expand Down Expand Up @@ -123,21 +126,27 @@ async def get_space_summary(

if room:
rooms_result.append(room)

# Mark whether this room was accessible or not.
processed_rooms[room_id] = bool(room)
clokep marked this conversation as resolved.
Show resolved Hide resolved
else:
fed_rooms, fed_events = await self._summarize_remote_room(
fed_rooms, events = await self._summarize_remote_room(
queue_entry,
suggested_only,
max_children,
exclude_rooms=processed_rooms,
)

# The queried room may or may not have been returned, but don't process
# it again, anyway. (This may be overridden below if it is accessible.)
processed_rooms[room_id] = False

# The results over federation might include rooms that the we,
# as the requesting server, are allowed to see, but the requesting
# user is not permitted see.
#
# Filter the returned results to only what is accessible to the user.
room_ids = set()
events = []
for room in fed_rooms:
fed_room_id = room.get("room_id")
if not fed_room_id or not isinstance(fed_room_id, str):
Expand Down Expand Up @@ -181,34 +190,42 @@ async def get_space_summary(

# All rooms returned don't need visiting again (even if the user
# didn't have access to them).
processed_rooms.add(fed_room_id)

for event in fed_events:
if event.get("room_id") in room_ids:
events.append(event)
processed_rooms[fed_room_id] = include_room

logger.debug(
"Query of %s returned rooms %s, events %s",
room_id,
[room.get("room_id") for room in fed_rooms],
["%s->%s" % (ev["room_id"], ev["state_key"]) for ev in fed_events],
["%s->%s" % (ev["room_id"], ev["state_key"]) for ev in events],
)

# the room we queried may or may not have been returned, but don't process
# it again, anyway.
processed_rooms.add(room_id)
# There might be events which point to this room which are waiting
# to see if it is accessible.
room_pending_events = pending_events.pop(room_id, ())
if not processed_rooms[room_id]:
room_pending_events = ()
clokep marked this conversation as resolved.
Show resolved Hide resolved

# XXX: is it ok that we blindly iterate through any events returned by
# a remote server, whether or not they actually link to any rooms in our
# tree?
for ev in events:
events_result.append(ev)
# Return the events which reference rooms that were found to be
# accessible. Otherwise, queue them until we process those rooms.
clokep marked this conversation as resolved.
Show resolved Hide resolved
for ev in itertools.chain(events, room_pending_events):
parent_room_id = ev["room_id"]
child_room_id = ev["state_key"]

try:
if (
processed_rooms[parent_room_id]
and processed_rooms[child_room_id]
):
events_result.append(ev)
except KeyError:
# Note that events are pending of the child event since if
clokep marked this conversation as resolved.
Show resolved Hide resolved
# the parent event was not accessible it shouldn't be included
clokep marked this conversation as resolved.
Show resolved Hide resolved
# at all.
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I don't really follow this logic. Why do we only add the event to the pending list for the child room?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The thought process was that we only want to send the event in the case of both the parent and child room being accessible to the requester.

I think the assumption here is that if the parent room isn't in processed_rooms something terrible has happened? There are two sources of data we're iterating over:

  • events - m.space.child events from the current room.
    • For local rooms, the parent will be the current room, the child will be a room that may or may not have been processed.
    • For remote rooms, this includes the above, as well as additional events that might be children of children, etc. (Or even events that aren't part of the graph.)
  • room_pending_events - m.space.child events which were previously found to be pointing to the current room, i.e. the child room is the current room and the parent room is a previously processed room.

I'm now having trouble convincing myself that parents must be processed before children and I think that assumption might be bogus actually.

pending_events.setdefault(child_room_id, []).append(ev)

# add the child to the queue. we have already validated
# that the vias are a list of server names.
room_queue.append(
_RoomQueueEntry(ev["state_key"], ev["content"]["via"])
)
room_queue.append(_RoomQueueEntry(child_room_id, ev["content"]["via"]))

# Before returning to the client, remove the allowed_spaces key for any
# rooms.
Expand Down