From 84cf3e47a0318aba51d9f830d5e724182c5d93c4 Mon Sep 17 00:00:00 2001
From: Erik Johnston <erik@matrix.org>
Date: Fri, 28 May 2021 16:28:01 +0100
Subject: [PATCH 01/11] Allow response of `/send_join` to be larger. (#10093)

Fixes #10087.
---
 changelog.d/10093.bugfix               |  1 +
 synapse/federation/transport/client.py |  7 +++++++
 synapse/http/matrixfederationclient.py | 14 +++++++++++++-
 3 files changed, 21 insertions(+), 1 deletion(-)
 create mode 100644 changelog.d/10093.bugfix

diff --git a/changelog.d/10093.bugfix b/changelog.d/10093.bugfix
new file mode 100644
index 000000000000..e50de4b2ea2c
--- /dev/null
+++ b/changelog.d/10093.bugfix
@@ -0,0 +1 @@
+Fix HTTP response size limit to allow joining very large rooms over federation.
diff --git a/synapse/federation/transport/client.py b/synapse/federation/transport/client.py
index e93ab83f7ff4..5b4f5d17f774 100644
--- a/synapse/federation/transport/client.py
+++ b/synapse/federation/transport/client.py
@@ -35,6 +35,11 @@
 
 logger = logging.getLogger(__name__)
 
+# Send join responses can be huge, so we set a separate limit here. The response
+# is parsed in a streaming manner, which helps alleviate the issue of memory
+# usage a bit.
+MAX_RESPONSE_SIZE_SEND_JOIN = 500 * 1024 * 1024
+
 
 class TransportLayerClient:
     """Sends federation HTTP requests to other servers"""
@@ -261,6 +266,7 @@ async def send_join_v1(
             path=path,
             data=content,
             parser=SendJoinParser(room_version, v1_api=True),
+            max_response_size=MAX_RESPONSE_SIZE_SEND_JOIN,
         )
 
         return response
@@ -276,6 +282,7 @@ async def send_join_v2(
             path=path,
             data=content,
             parser=SendJoinParser(room_version, v1_api=False),
+            max_response_size=MAX_RESPONSE_SIZE_SEND_JOIN,
         )
 
         return response
diff --git a/synapse/http/matrixfederationclient.py b/synapse/http/matrixfederationclient.py
index f5503b394b37..1998990a144e 100644
--- a/synapse/http/matrixfederationclient.py
+++ b/synapse/http/matrixfederationclient.py
@@ -205,6 +205,7 @@ async def _handle_response(
     response: IResponse,
     start_ms: int,
     parser: ByteParser[T],
+    max_response_size: Optional[int] = None,
 ) -> T:
     """
     Reads the body of a response with a timeout and sends it to a parser
@@ -216,15 +217,20 @@ async def _handle_response(
         response: response to the request
         start_ms: Timestamp when request was made
         parser: The parser for the response
+        max_response_size: The maximum size to read from the response, if None
+            uses the default.
 
     Returns:
         The parsed response
     """
 
+    if max_response_size is None:
+        max_response_size = MAX_RESPONSE_SIZE
+
     try:
         check_content_type_is(response.headers, parser.CONTENT_TYPE)
 
-        d = read_body_with_max_size(response, parser, MAX_RESPONSE_SIZE)
+        d = read_body_with_max_size(response, parser, max_response_size)
         d = timeout_deferred(d, timeout=timeout_sec, reactor=reactor)
 
         length = await make_deferred_yieldable(d)
@@ -735,6 +741,7 @@ async def put_json(
         backoff_on_404: bool = False,
         try_trailing_slash_on_400: bool = False,
         parser: Literal[None] = None,
+        max_response_size: Optional[int] = None,
     ) -> Union[JsonDict, list]:
         ...
 
@@ -752,6 +759,7 @@ async def put_json(
         backoff_on_404: bool = False,
         try_trailing_slash_on_400: bool = False,
         parser: Optional[ByteParser[T]] = None,
+        max_response_size: Optional[int] = None,
     ) -> T:
         ...
 
@@ -768,6 +776,7 @@ async def put_json(
         backoff_on_404: bool = False,
         try_trailing_slash_on_400: bool = False,
         parser: Optional[ByteParser] = None,
+        max_response_size: Optional[int] = None,
     ):
         """Sends the specified json data using PUT
 
@@ -803,6 +812,8 @@ async def put_json(
                 enabled.
             parser: The parser to use to decode the response. Defaults to
                 parsing as JSON.
+            max_response_size: The maximum size to read from the response, if None
+                uses the default.
 
         Returns:
             Succeeds when we get a 2xx HTTP response. The
@@ -853,6 +864,7 @@ async def put_json(
             response,
             start_ms,
             parser=parser,
+            max_response_size=max_response_size,
         )
 
         return body

From 1641c5c707fe9cac5f68589863082409c8979da6 Mon Sep 17 00:00:00 2001
From: Erik Johnston <erik@matrix.org>
Date: Fri, 28 May 2021 15:57:53 +0100
Subject: [PATCH 02/11] Log method and path when dropping request due to size
 limit (#10091)

---
 changelog.d/10091.misc | 1 +
 synapse/http/site.py   | 4 +++-
 2 files changed, 4 insertions(+), 1 deletion(-)
 create mode 100644 changelog.d/10091.misc

diff --git a/changelog.d/10091.misc b/changelog.d/10091.misc
new file mode 100644
index 000000000000..dbe310fd17a1
--- /dev/null
+++ b/changelog.d/10091.misc
@@ -0,0 +1 @@
+Log method and path when dropping request due to size limit.
diff --git a/synapse/http/site.py b/synapse/http/site.py
index 671fd3fbcc29..40754b7bea7e 100644
--- a/synapse/http/site.py
+++ b/synapse/http/site.py
@@ -105,8 +105,10 @@ def handleContentChunk(self, data):
         assert self.content, "handleContentChunk() called before gotLength()"
         if self.content.tell() + len(data) > self._max_request_body_size:
             logger.warning(
-                "Aborting connection from %s because the request exceeds maximum size",
+                "Aborting connection from %s because the request exceeds maximum size: %s %s",
                 self.client,
+                self.get_method(),
+                self.get_redacted_uri(),
             )
             self.transport.abortConnection()
             return

From 9408b86f5c3616e8cfaa2c183e787780a3a64f95 Mon Sep 17 00:00:00 2001
From: Brendan Abolivier <babolivier@matrix.org>
Date: Thu, 27 May 2021 18:10:58 +0200
Subject: [PATCH 03/11] Limit the number of events sent over replication when
 persisting events. (#10082)

---
 changelog.d/10082.bugfix       |  1 +
 synapse/handlers/federation.py | 17 ++++++++++-------
 2 files changed, 11 insertions(+), 7 deletions(-)
 create mode 100644 changelog.d/10082.bugfix

diff --git a/changelog.d/10082.bugfix b/changelog.d/10082.bugfix
new file mode 100644
index 000000000000..b4f8bcc4fa3c
--- /dev/null
+++ b/changelog.d/10082.bugfix
@@ -0,0 +1 @@
+Fixed a bug causing replication requests to fail when receiving a lot of events via federation.
diff --git a/synapse/handlers/federation.py b/synapse/handlers/federation.py
index 678f6b770797..bf113152517f 100644
--- a/synapse/handlers/federation.py
+++ b/synapse/handlers/federation.py
@@ -91,6 +91,7 @@
     get_domain_from_id,
 )
 from synapse.util.async_helpers import Linearizer, concurrently_execute
+from synapse.util.iterutils import batch_iter
 from synapse.util.retryutils import NotRetryingDestination
 from synapse.util.stringutils import shortstr
 from synapse.visibility import filter_events_for_server
@@ -3053,13 +3054,15 @@ async def persist_events_and_notify(
         """
         instance = self.config.worker.events_shard_config.get_instance(room_id)
         if instance != self._instance_name:
-            result = await self._send_events(
-                instance_name=instance,
-                store=self.store,
-                room_id=room_id,
-                event_and_contexts=event_and_contexts,
-                backfilled=backfilled,
-            )
+            # Limit the number of events sent over federation.
+            for batch in batch_iter(event_and_contexts, 1000):
+                result = await self._send_events(
+                    instance_name=instance,
+                    store=self.store,
+                    room_id=room_id,
+                    event_and_contexts=batch,
+                    backfilled=backfilled,
+                )
             return result["max_stream_id"]
         else:
             assert self.storage.persistence

From 258a9a9e8bea851493fc2275ac6b81639c997afb Mon Sep 17 00:00:00 2001
From: Erik Johnston <erik@matrix.org>
Date: Fri, 28 May 2021 17:06:05 +0100
Subject: [PATCH 04/11] 1.35.0rc3

---
 CHANGES.md               | 16 ++++++++++++++++
 changelog.d/10082.bugfix |  1 -
 changelog.d/10091.misc   |  1 -
 changelog.d/10093.bugfix |  1 -
 synapse/__init__.py      |  2 +-
 5 files changed, 17 insertions(+), 4 deletions(-)
 delete mode 100644 changelog.d/10082.bugfix
 delete mode 100644 changelog.d/10091.misc
 delete mode 100644 changelog.d/10093.bugfix

diff --git a/CHANGES.md b/CHANGES.md
index 1fac16580d13..8bd05c318dcf 100644
--- a/CHANGES.md
+++ b/CHANGES.md
@@ -1,3 +1,19 @@
+Synapse 1.35.0rc3 (2021-05-28)
+==============================
+
+Bugfixes
+--------
+
+- Fixed a bug causing replication requests to fail when receiving a lot of events via federation. ([\#10082](https://github.com/matrix-org/synapse/issues/10082))
+- Fix HTTP response size limit to allow joining very large rooms over federation. ([\#10093](https://github.com/matrix-org/synapse/issues/10093))
+
+
+Internal Changes
+----------------
+
+- Log method and path when dropping request due to size limit. ([\#10091](https://github.com/matrix-org/synapse/issues/10091))
+
+
 Synapse 1.35.0rc2 (2021-05-27)
 ==============================
 
diff --git a/changelog.d/10082.bugfix b/changelog.d/10082.bugfix
deleted file mode 100644
index b4f8bcc4fa3c..000000000000
--- a/changelog.d/10082.bugfix
+++ /dev/null
@@ -1 +0,0 @@
-Fixed a bug causing replication requests to fail when receiving a lot of events via federation.
diff --git a/changelog.d/10091.misc b/changelog.d/10091.misc
deleted file mode 100644
index dbe310fd17a1..000000000000
--- a/changelog.d/10091.misc
+++ /dev/null
@@ -1 +0,0 @@
-Log method and path when dropping request due to size limit.
diff --git a/changelog.d/10093.bugfix b/changelog.d/10093.bugfix
deleted file mode 100644
index e50de4b2ea2c..000000000000
--- a/changelog.d/10093.bugfix
+++ /dev/null
@@ -1 +0,0 @@
-Fix HTTP response size limit to allow joining very large rooms over federation.
diff --git a/synapse/__init__.py b/synapse/__init__.py
index 6faf31dbbce5..4591246bd187 100644
--- a/synapse/__init__.py
+++ b/synapse/__init__.py
@@ -47,7 +47,7 @@
 except ImportError:
     pass
 
-__version__ = "1.35.0rc2"
+__version__ = "1.35.0rc3"
 
 if bool(os.environ.get("SYNAPSE_TEST_PATCH_LOG_CONTEXTS", False)):
     # We import here so that we don't have to install a bunch of deps when

From 4f41b711d8b37da3403ce67c88d62133f732a459 Mon Sep 17 00:00:00 2001
From: Erik Johnston <erik@matrix.org>
Date: Fri, 28 May 2021 17:13:57 +0100
Subject: [PATCH 05/11] CHANGELOG

---
 CHANGES.md | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/CHANGES.md b/CHANGES.md
index 8bd05c318dcf..7e6f478d425e 100644
--- a/CHANGES.md
+++ b/CHANGES.md
@@ -4,8 +4,8 @@ Synapse 1.35.0rc3 (2021-05-28)
 Bugfixes
 --------
 
-- Fixed a bug causing replication requests to fail when receiving a lot of events via federation. ([\#10082](https://github.com/matrix-org/synapse/issues/10082))
-- Fix HTTP response size limit to allow joining very large rooms over federation. ([\#10093](https://github.com/matrix-org/synapse/issues/10093))
+- Fixed a bug causing replication requests to fail when receiving a lot of events via federation. Introduced in v1.33.0. ([\#10082](https://github.com/matrix-org/synapse/issues/10082))
+- Fix HTTP response size limit to allow joining very large rooms over federation. Introduced in v1.33.0. ([\#10093](https://github.com/matrix-org/synapse/issues/10093))
 
 
 Internal Changes

From 10e6d2abce644d5b6d6b59516061562f54382b94 Mon Sep 17 00:00:00 2001
From: Brad Murray <bradtgmurray@gmail.com>
Date: Tue, 1 Jun 2021 03:40:26 -0400
Subject: [PATCH 06/11] Fix opentracing inject to use the SpanContext, not the
 Span (#10074)

Signed-off-by: Brad Murray brad@beeper.com
---
 changelog.d/10074.misc         |  1 +
 synapse/logging/opentracing.py | 10 +++++-----
 2 files changed, 6 insertions(+), 5 deletions(-)
 create mode 100644 changelog.d/10074.misc

diff --git a/changelog.d/10074.misc b/changelog.d/10074.misc
new file mode 100644
index 000000000000..8dbe2cd2bcad
--- /dev/null
+++ b/changelog.d/10074.misc
@@ -0,0 +1 @@
+Update opentracing to inject the right context into the carrier.
diff --git a/synapse/logging/opentracing.py b/synapse/logging/opentracing.py
index 428831dad6ad..f64845b80cc0 100644
--- a/synapse/logging/opentracing.py
+++ b/synapse/logging/opentracing.py
@@ -594,7 +594,7 @@ def inject_active_span_twisted_headers(headers, destination, check_destination=T
 
     span = opentracing.tracer.active_span
     carrier = {}  # type: Dict[str, str]
-    opentracing.tracer.inject(span, opentracing.Format.HTTP_HEADERS, carrier)
+    opentracing.tracer.inject(span.context, opentracing.Format.HTTP_HEADERS, carrier)
 
     for key, value in carrier.items():
         headers.addRawHeaders(key, value)
@@ -631,7 +631,7 @@ def inject_active_span_byte_dict(headers, destination, check_destination=True):
     span = opentracing.tracer.active_span
 
     carrier = {}  # type: Dict[str, str]
-    opentracing.tracer.inject(span, opentracing.Format.HTTP_HEADERS, carrier)
+    opentracing.tracer.inject(span.context, opentracing.Format.HTTP_HEADERS, carrier)
 
     for key, value in carrier.items():
         headers[key.encode()] = [value.encode()]
@@ -665,7 +665,7 @@ def inject_active_span_text_map(carrier, destination, check_destination=True):
         return
 
     opentracing.tracer.inject(
-        opentracing.tracer.active_span, opentracing.Format.TEXT_MAP, carrier
+        opentracing.tracer.active_span.context, opentracing.Format.TEXT_MAP, carrier
     )
 
 
@@ -687,7 +687,7 @@ def get_active_span_text_map(destination=None):
 
     carrier = {}  # type: Dict[str, str]
     opentracing.tracer.inject(
-        opentracing.tracer.active_span, opentracing.Format.TEXT_MAP, carrier
+        opentracing.tracer.active_span.context, opentracing.Format.TEXT_MAP, carrier
     )
 
     return carrier
@@ -702,7 +702,7 @@ def active_span_context_as_string():
     carrier = {}  # type: Dict[str, str]
     if opentracing:
         opentracing.tracer.inject(
-            opentracing.tracer.active_span, opentracing.Format.TEXT_MAP, carrier
+            opentracing.tracer.active_span.context, opentracing.Format.TEXT_MAP, carrier
         )
     return json_encoder.encode(carrier)
 

From b4b2fd2ecee26214fa6b322bcb62bec1ea324c1a Mon Sep 17 00:00:00 2001
From: Richard van der Hoff <1389908+richvdh@users.noreply.github.com>
Date: Tue, 1 Jun 2021 12:04:47 +0100
Subject: [PATCH 07/11] add a cache to have_seen_event (#9953)

Empirically, this helped my server considerably when handling gaps in Matrix HQ. The problem was that we would repeatedly call have_seen_events for the same set of (50K or so) auth_events, each of which would take many minutes to complete, even though it's only an index scan.
---
 changelog.d/9953.feature                      |  1 +
 changelog.d/9973.feature                      |  1 +
 changelog.d/9973.misc                         |  1 -
 synapse/handlers/federation.py                | 12 ++-
 synapse/storage/databases/main/cache.py       |  1 +
 .../storage/databases/main/events_worker.py   | 61 ++++++++++--
 .../storage/databases/main/purge_events.py    | 26 ++++-
 tests/storage/databases/__init__.py           | 13 +++
 tests/storage/databases/main/__init__.py      | 13 +++
 .../databases/main/test_events_worker.py      | 96 +++++++++++++++++++
 10 files changed, 205 insertions(+), 20 deletions(-)
 create mode 100644 changelog.d/9953.feature
 create mode 100644 changelog.d/9973.feature
 delete mode 100644 changelog.d/9973.misc
 create mode 100644 tests/storage/databases/__init__.py
 create mode 100644 tests/storage/databases/main/__init__.py
 create mode 100644 tests/storage/databases/main/test_events_worker.py

diff --git a/changelog.d/9953.feature b/changelog.d/9953.feature
new file mode 100644
index 000000000000..6b3d1adc707f
--- /dev/null
+++ b/changelog.d/9953.feature
@@ -0,0 +1 @@
+Improve performance of incoming federation transactions in large rooms.
diff --git a/changelog.d/9973.feature b/changelog.d/9973.feature
new file mode 100644
index 000000000000..6b3d1adc707f
--- /dev/null
+++ b/changelog.d/9973.feature
@@ -0,0 +1 @@
+Improve performance of incoming federation transactions in large rooms.
diff --git a/changelog.d/9973.misc b/changelog.d/9973.misc
deleted file mode 100644
index 7f22d42291b2..000000000000
--- a/changelog.d/9973.misc
+++ /dev/null
@@ -1 +0,0 @@
-Make `LruCache.invalidate` support tree invalidation, and remove `invalidate_many`.
diff --git a/synapse/handlers/federation.py b/synapse/handlers/federation.py
index bf113152517f..49ed7cabcc8d 100644
--- a/synapse/handlers/federation.py
+++ b/synapse/handlers/federation.py
@@ -577,7 +577,9 @@ async def _get_state_for_room(
 
         # Fetch the state events from the DB, and check we have the auth events.
         event_map = await self.store.get_events(state_event_ids, allow_rejected=True)
-        auth_events_in_store = await self.store.have_seen_events(auth_event_ids)
+        auth_events_in_store = await self.store.have_seen_events(
+            room_id, auth_event_ids
+        )
 
         # Check for missing events. We handle state and auth event seperately,
         # as we want to pull the state from the DB, but we don't for the auth
@@ -610,7 +612,7 @@ async def _get_state_for_room(
 
             if missing_auth_events:
                 auth_events_in_store = await self.store.have_seen_events(
-                    missing_auth_events
+                    room_id, missing_auth_events
                 )
                 missing_auth_events.difference_update(auth_events_in_store)
 
@@ -710,7 +712,7 @@ async def _get_state_after_missing_prev_event(
 
         missing_auth_events = set(auth_event_ids) - fetched_events.keys()
         missing_auth_events.difference_update(
-            await self.store.have_seen_events(missing_auth_events)
+            await self.store.have_seen_events(room_id, missing_auth_events)
         )
         logger.debug("We are also missing %i auth events", len(missing_auth_events))
 
@@ -2475,7 +2477,7 @@ async def _update_auth_events_and_context_for_auth(
         #
         # we start by checking if they are in the store, and then try calling /event_auth/.
         if missing_auth:
-            have_events = await self.store.have_seen_events(missing_auth)
+            have_events = await self.store.have_seen_events(event.room_id, missing_auth)
             logger.debug("Events %s are in the store", have_events)
             missing_auth.difference_update(have_events)
 
@@ -2494,7 +2496,7 @@ async def _update_auth_events_and_context_for_auth(
                     return context
 
                 seen_remotes = await self.store.have_seen_events(
-                    [e.event_id for e in remote_auth_chain]
+                    event.room_id, [e.event_id for e in remote_auth_chain]
                 )
 
                 for e in remote_auth_chain:
diff --git a/synapse/storage/databases/main/cache.py b/synapse/storage/databases/main/cache.py
index f7872501a06c..c57ae5ef15c6 100644
--- a/synapse/storage/databases/main/cache.py
+++ b/synapse/storage/databases/main/cache.py
@@ -168,6 +168,7 @@ def _invalidate_caches_for_event(
         backfilled,
     ):
         self._invalidate_get_event_cache(event_id)
+        self.have_seen_event.invalidate((room_id, event_id))
 
         self.get_latest_event_ids_in_room.invalidate((room_id,))
 
diff --git a/synapse/storage/databases/main/events_worker.py b/synapse/storage/databases/main/events_worker.py
index 6963bbf7f484..403a5ddaba3e 100644
--- a/synapse/storage/databases/main/events_worker.py
+++ b/synapse/storage/databases/main/events_worker.py
@@ -22,6 +22,7 @@
     Iterable,
     List,
     Optional,
+    Set,
     Tuple,
     overload,
 )
@@ -55,7 +56,7 @@
 from synapse.storage.util.id_generators import MultiWriterIdGenerator, StreamIdGenerator
 from synapse.storage.util.sequence import build_sequence_generator
 from synapse.types import JsonDict, get_domain_from_id
-from synapse.util.caches.descriptors import cached
+from synapse.util.caches.descriptors import cached, cachedList
 from synapse.util.caches.lrucache import LruCache
 from synapse.util.iterutils import batch_iter
 from synapse.util.metrics import Measure
@@ -1045,32 +1046,74 @@ async def have_events_in_timeline(self, event_ids):
 
         return {r["event_id"] for r in rows}
 
-    async def have_seen_events(self, event_ids):
+    async def have_seen_events(
+        self, room_id: str, event_ids: Iterable[str]
+    ) -> Set[str]:
         """Given a list of event ids, check if we have already processed them.
 
+        The room_id is only used to structure the cache (so that it can later be
+        invalidated by room_id) - there is no guarantee that the events are actually
+        in the room in question.
+
         Args:
-            event_ids (iterable[str]):
+            room_id: Room we are polling
+            event_ids: events we are looking for
 
         Returns:
             set[str]: The events we have already seen.
         """
+        res = await self._have_seen_events_dict(
+            (room_id, event_id) for event_id in event_ids
+        )
+        return {eid for ((_rid, eid), have_event) in res.items() if have_event}
+
+    @cachedList("have_seen_event", "keys")
+    async def _have_seen_events_dict(
+        self, keys: Iterable[Tuple[str, str]]
+    ) -> Dict[Tuple[str, str], bool]:
+        """Helper for have_seen_events
+
+        Returns:
+             a dict {(room_id, event_id)-> bool}
+        """
         # if the event cache contains the event, obviously we've seen it.
-        results = {x for x in event_ids if self._get_event_cache.contains(x)}
 
-        def have_seen_events_txn(txn, chunk):
-            sql = "SELECT event_id FROM events as e WHERE "
+        cache_results = {
+            (rid, eid) for (rid, eid) in keys if self._get_event_cache.contains((eid,))
+        }
+        results = {x: True for x in cache_results}
+
+        def have_seen_events_txn(txn, chunk: Tuple[Tuple[str, str], ...]):
+            # we deliberately do *not* query the database for room_id, to make the
+            # query an index-only lookup on `events_event_id_key`.
+            #
+            # We therefore pull the events from the database into a set...
+
+            sql = "SELECT event_id FROM events AS e WHERE "
             clause, args = make_in_list_sql_clause(
-                txn.database_engine, "e.event_id", chunk
+                txn.database_engine, "e.event_id", [eid for (_rid, eid) in chunk]
             )
             txn.execute(sql + clause, args)
-            results.update(row[0] for row in txn)
+            found_events = {eid for eid, in txn}
 
-        for chunk in batch_iter((x for x in event_ids if x not in results), 100):
+            # ... and then we can update the results for each row in the batch
+            results.update({(rid, eid): (eid in found_events) for (rid, eid) in chunk})
+
+        # each batch requires its own index scan, so we make the batches as big as
+        # possible.
+        for chunk in batch_iter((k for k in keys if k not in cache_results), 500):
             await self.db_pool.runInteraction(
                 "have_seen_events", have_seen_events_txn, chunk
             )
+
         return results
 
+    @cached(max_entries=100000, tree=True)
+    async def have_seen_event(self, room_id: str, event_id: str):
+        # this only exists for the benefit of the @cachedList descriptor on
+        # _have_seen_events_dict
+        raise NotImplementedError()
+
     def _get_current_state_event_counts_txn(self, txn, room_id):
         """
         See get_current_state_event_counts.
diff --git a/synapse/storage/databases/main/purge_events.py b/synapse/storage/databases/main/purge_events.py
index 8f83748b5edb..7fb7780d0ffd 100644
--- a/synapse/storage/databases/main/purge_events.py
+++ b/synapse/storage/databases/main/purge_events.py
@@ -16,14 +16,14 @@
 from typing import Any, List, Set, Tuple
 
 from synapse.api.errors import SynapseError
-from synapse.storage._base import SQLBaseStore
+from synapse.storage.databases.main import CacheInvalidationWorkerStore
 from synapse.storage.databases.main.state import StateGroupWorkerStore
 from synapse.types import RoomStreamToken
 
 logger = logging.getLogger(__name__)
 
 
-class PurgeEventsStore(StateGroupWorkerStore, SQLBaseStore):
+class PurgeEventsStore(StateGroupWorkerStore, CacheInvalidationWorkerStore):
     async def purge_history(
         self, room_id: str, token: str, delete_local_events: bool
     ) -> Set[int]:
@@ -203,8 +203,6 @@ def _purge_history_txn(
             "DELETE FROM event_to_state_groups "
             "WHERE event_id IN (SELECT event_id from events_to_purge)"
         )
-        for event_id, _ in event_rows:
-            txn.call_after(self._get_state_group_for_event.invalidate, (event_id,))
 
         # Delete all remote non-state events
         for table in (
@@ -283,6 +281,20 @@ def _purge_history_txn(
         # so make sure to keep this actually last.
         txn.execute("DROP TABLE events_to_purge")
 
+        for event_id, should_delete in event_rows:
+            self._invalidate_cache_and_stream(
+                txn, self._get_state_group_for_event, (event_id,)
+            )
+
+            # XXX: This is racy, since have_seen_events could be called between the
+            #    transaction completing and the invalidation running. On the other hand,
+            #    that's no different to calling `have_seen_events` just before the
+            #    event is deleted from the database.
+            if should_delete:
+                self._invalidate_cache_and_stream(
+                    txn, self.have_seen_event, (room_id, event_id)
+                )
+
         logger.info("[purge] done")
 
         return referenced_state_groups
@@ -422,7 +434,11 @@ def _purge_room_txn(self, txn, room_id: str) -> List[int]:
         #       index on them. In any case we should be clearing out 'stream' tables
         #       periodically anyway (#5888)
 
-        # TODO: we could probably usefully do a bunch of cache invalidation here
+        # TODO: we could probably usefully do a bunch more cache invalidation here
+
+        # XXX: as with purge_history, this is racy, but no worse than other races
+        #   that already exist.
+        self._invalidate_cache_and_stream(txn, self.have_seen_event, (room_id,))
 
         logger.info("[purge] done")
 
diff --git a/tests/storage/databases/__init__.py b/tests/storage/databases/__init__.py
new file mode 100644
index 000000000000..c24c7ecd92c2
--- /dev/null
+++ b/tests/storage/databases/__init__.py
@@ -0,0 +1,13 @@
+# Copyright 2021 The Matrix.org Foundation C.I.C.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
diff --git a/tests/storage/databases/main/__init__.py b/tests/storage/databases/main/__init__.py
new file mode 100644
index 000000000000..c24c7ecd92c2
--- /dev/null
+++ b/tests/storage/databases/main/__init__.py
@@ -0,0 +1,13 @@
+# Copyright 2021 The Matrix.org Foundation C.I.C.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
diff --git a/tests/storage/databases/main/test_events_worker.py b/tests/storage/databases/main/test_events_worker.py
new file mode 100644
index 000000000000..932970fd9ad1
--- /dev/null
+++ b/tests/storage/databases/main/test_events_worker.py
@@ -0,0 +1,96 @@
+# Copyright 2021 The Matrix.org Foundation C.I.C.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+import json
+
+from synapse.logging.context import LoggingContext
+from synapse.storage.databases.main.events_worker import EventsWorkerStore
+
+from tests import unittest
+
+
+class HaveSeenEventsTestCase(unittest.HomeserverTestCase):
+    def prepare(self, reactor, clock, hs):
+        self.store: EventsWorkerStore = hs.get_datastore()
+
+        # insert some test data
+        for rid in ("room1", "room2"):
+            self.get_success(
+                self.store.db_pool.simple_insert(
+                    "rooms",
+                    {"room_id": rid, "room_version": 4},
+                )
+            )
+
+        for idx, (rid, eid) in enumerate(
+            (
+                ("room1", "event10"),
+                ("room1", "event11"),
+                ("room1", "event12"),
+                ("room2", "event20"),
+            )
+        ):
+            self.get_success(
+                self.store.db_pool.simple_insert(
+                    "events",
+                    {
+                        "event_id": eid,
+                        "room_id": rid,
+                        "topological_ordering": idx,
+                        "stream_ordering": idx,
+                        "type": "test",
+                        "processed": True,
+                        "outlier": False,
+                    },
+                )
+            )
+            self.get_success(
+                self.store.db_pool.simple_insert(
+                    "event_json",
+                    {
+                        "event_id": eid,
+                        "room_id": rid,
+                        "json": json.dumps({"type": "test", "room_id": rid}),
+                        "internal_metadata": "{}",
+                        "format_version": 3,
+                    },
+                )
+            )
+
+    def test_simple(self):
+        with LoggingContext(name="test") as ctx:
+            res = self.get_success(
+                self.store.have_seen_events("room1", ["event10", "event19"])
+            )
+            self.assertEquals(res, {"event10"})
+
+            # that should result in a single db query
+            self.assertEquals(ctx.get_resource_usage().db_txn_count, 1)
+
+        # a second lookup of the same events should cause no queries
+        with LoggingContext(name="test") as ctx:
+            res = self.get_success(
+                self.store.have_seen_events("room1", ["event10", "event19"])
+            )
+            self.assertEquals(res, {"event10"})
+            self.assertEquals(ctx.get_resource_usage().db_txn_count, 0)
+
+    def test_query_via_event_cache(self):
+        # fetch an event into the event cache
+        self.get_success(self.store.get_event("event10"))
+
+        # looking it up should now cause no db hits
+        with LoggingContext(name="test") as ctx:
+            res = self.get_success(self.store.have_seen_events("room1", ["event10"]))
+            self.assertEquals(res, {"event10"})
+            self.assertEquals(ctx.get_resource_usage().db_txn_count, 0)

From 408ecf8ece397bcf08564031379b461d5c9b0de5 Mon Sep 17 00:00:00 2001
From: Erik Johnston <erik@matrix.org>
Date: Tue, 1 Jun 2021 13:19:50 +0100
Subject: [PATCH 08/11] Announce deprecation of experimental `msc2858_enabled`
 option. (#10101)

c.f. https://github.com/matrix-org/synapse/pull/9617 and https://github.com/matrix-org/matrix-doc/blob/master/proposals/2858-Multiple-SSO-Identity-Providers.md

Fixes #9627.
---
 changelog.d/10101.removal | 1 +
 1 file changed, 1 insertion(+)
 create mode 100644 changelog.d/10101.removal

diff --git a/changelog.d/10101.removal b/changelog.d/10101.removal
new file mode 100644
index 000000000000..f2020e9ddf38
--- /dev/null
+++ b/changelog.d/10101.removal
@@ -0,0 +1 @@
+The core Synapse development team plan to drop support for the [unstable API of MSC2858](https://github.com/matrix-org/matrix-doc/blob/master/proposals/2858-Multiple-SSO-Identity-Providers.md#unstable-prefix), including the undocumented `experimental.msc2858_enabled` config option, in August 2021. Client authors should ensure that their clients are updated to use the stable API (which has been supported since Synapse 1.30) well before that time, to give their users time to upgrade.

From a8372ad591e07fa76e194a22732a5301d9e55b6f Mon Sep 17 00:00:00 2001
From: Andrew Morgan <andrew@amorgan.xyz>
Date: Tue, 1 Jun 2021 13:23:55 +0100
Subject: [PATCH 09/11] 1.35.0

---
 CHANGES.md                | 9 +++++++++
 changelog.d/10101.removal | 1 -
 debian/changelog          | 6 ++++++
 synapse/__init__.py       | 2 +-
 4 files changed, 16 insertions(+), 2 deletions(-)
 delete mode 100644 changelog.d/10101.removal

diff --git a/CHANGES.md b/CHANGES.md
index 7e6f478d425e..09f0be8e176e 100644
--- a/CHANGES.md
+++ b/CHANGES.md
@@ -1,3 +1,12 @@
+Synapse 1.35.0 (2021-06-01)
+===========================
+
+Deprecations and Removals
+-------------------------
+
+- The core Synapse development team plan to drop support for the [unstable API of MSC2858](https://github.com/matrix-org/matrix-doc/blob/master/proposals/2858-Multiple-SSO-Identity-Providers.md#unstable-prefix), including the undocumented `experimental.msc2858_enabled` config option, in August 2021. Client authors should ensure that their clients are updated to use the stable API (which has been supported since Synapse 1.30) well before that time, to give their users time to upgrade. ([\#10101](https://github.com/matrix-org/synapse/issues/10101))
+
+
 Synapse 1.35.0rc3 (2021-05-28)
 ==============================
 
diff --git a/changelog.d/10101.removal b/changelog.d/10101.removal
deleted file mode 100644
index f2020e9ddf38..000000000000
--- a/changelog.d/10101.removal
+++ /dev/null
@@ -1 +0,0 @@
-The core Synapse development team plan to drop support for the [unstable API of MSC2858](https://github.com/matrix-org/matrix-doc/blob/master/proposals/2858-Multiple-SSO-Identity-Providers.md#unstable-prefix), including the undocumented `experimental.msc2858_enabled` config option, in August 2021. Client authors should ensure that their clients are updated to use the stable API (which has been supported since Synapse 1.30) well before that time, to give their users time to upgrade.
diff --git a/debian/changelog b/debian/changelog
index bf99ae772c9e..d5efb8ccba63 100644
--- a/debian/changelog
+++ b/debian/changelog
@@ -1,3 +1,9 @@
+matrix-synapse-py3 (1.35.0) stable; urgency=medium
+
+  * New synapse release 1.35.0.
+
+ -- Synapse Packaging team <packages@matrix.org>  Tue, 01 Jun 2021 13:23:35 +0100
+
 matrix-synapse-py3 (1.34.0) stable; urgency=medium
 
   * New synapse release 1.34.0.
diff --git a/synapse/__init__.py b/synapse/__init__.py
index 4591246bd187..d9843a170860 100644
--- a/synapse/__init__.py
+++ b/synapse/__init__.py
@@ -47,7 +47,7 @@
 except ImportError:
     pass
 
-__version__ = "1.35.0rc3"
+__version__ = "1.35.0"
 
 if bool(os.environ.get("SYNAPSE_TEST_PATCH_LOG_CONTEXTS", False)):
     # We import here so that we don't have to install a bunch of deps when

From 08e54345b1332889cd9e88a778bc13caca7b556f Mon Sep 17 00:00:00 2001
From: Andrew Morgan <andrew@amorgan.xyz>
Date: Tue, 1 Jun 2021 13:25:18 +0100
Subject: [PATCH 10/11] Indicate that there were no functional changes since
 v1.35.0rc3

---
 CHANGES.md | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/CHANGES.md b/CHANGES.md
index 09f0be8e176e..c969fe1ebddc 100644
--- a/CHANGES.md
+++ b/CHANGES.md
@@ -1,6 +1,8 @@
 Synapse 1.35.0 (2021-06-01)
 ===========================
 
+No changes since v1.35.0rc3.
+
 Deprecations and Removals
 -------------------------
 

From 3fdaf4df55f52ccf283cf6b0ca73a3f98cd5e8f0 Mon Sep 17 00:00:00 2001
From: Andrew Morgan <andrew@amorgan.xyz>
Date: Tue, 1 Jun 2021 13:40:46 +0100
Subject: [PATCH 11/11] Merge v1.35.0rc3 into v1.35.0 due to incorrect tagging

---
 CHANGES.md | 6 +-----
 1 file changed, 1 insertion(+), 5 deletions(-)

diff --git a/CHANGES.md b/CHANGES.md
index c969fe1ebddc..f03a53affc1a 100644
--- a/CHANGES.md
+++ b/CHANGES.md
@@ -1,17 +1,13 @@
 Synapse 1.35.0 (2021-06-01)
 ===========================
 
-No changes since v1.35.0rc3.
+Note that [the tag](https://github.com/matrix-org/synapse/releases/tag/v1.35.0rc3) and [docker images](https://hub.docker.com/layers/matrixdotorg/synapse/v1.35.0rc3/images/sha256-34ccc87bd99a17e2cbc0902e678b5937d16bdc1991ead097eee6096481ecf2c4?context=explore) for `v1.35.0rc3` were incorrectly built. If you are experiencing issues with either, it is recommended to upgrade to the equivalent tag or docker image for the `v1.35.0` release.
 
 Deprecations and Removals
 -------------------------
 
 - The core Synapse development team plan to drop support for the [unstable API of MSC2858](https://github.com/matrix-org/matrix-doc/blob/master/proposals/2858-Multiple-SSO-Identity-Providers.md#unstable-prefix), including the undocumented `experimental.msc2858_enabled` config option, in August 2021. Client authors should ensure that their clients are updated to use the stable API (which has been supported since Synapse 1.30) well before that time, to give their users time to upgrade. ([\#10101](https://github.com/matrix-org/synapse/issues/10101))
 
-
-Synapse 1.35.0rc3 (2021-05-28)
-==============================
-
 Bugfixes
 --------