From 64d9900b8386af4e878038a5ff25970d693485cc Mon Sep 17 00:00:00 2001
From: Eric Eastwood <erice@element.io>
Date: Mon, 15 Aug 2022 20:31:00 -0500
Subject: [PATCH 1/3] Add specific metric to time long-running /messages
 requests

Split out from https://github.com/matrix-org/synapse/pull/13478#discussion_r946109821
---
 synapse/rest/client/room.py | 27 +++++++++++++++++++++++++++
 1 file changed, 27 insertions(+)

diff --git a/synapse/rest/client/room.py b/synapse/rest/client/room.py
index 2f513164cb84..2027b950f07e 100644
--- a/synapse/rest/client/room.py
+++ b/synapse/rest/client/room.py
@@ -19,6 +19,8 @@
 from typing import TYPE_CHECKING, Awaitable, Dict, List, Optional, Tuple
 from urllib import parse as urlparse
 
+from prometheus_client.core import Histogram
+
 from twisted.web.server import Request
 
 from synapse import event_auth
@@ -60,6 +62,30 @@
 
 logger = logging.getLogger(__name__)
 
+messsages_response_timer = Histogram(
+    "synapse_room_message_list_rest_servlet_response_time_seconds",
+    "sec",
+    [],
+    buckets=(
+        0.005,
+        0.01,
+        0.025,
+        0.05,
+        0.1,
+        0.25,
+        0.5,
+        1.0,
+        2.5,
+        5.0,
+        10.0,
+        30.0,
+        60.0,
+        120.0,
+        180.0,
+        "+Inf",
+    ),
+)
+
 
 class TransactionRestServlet(RestServlet):
     def __init__(self, hs: "HomeServer"):
@@ -560,6 +586,7 @@ def __init__(self, hs: "HomeServer"):
         self.auth = hs.get_auth()
         self.store = hs.get_datastores().main
 
+    @messsages_response_timer.time()
     async def on_GET(
         self, request: SynapseRequest, room_id: str
     ) -> Tuple[int, JsonDict]:

From e8e4af25ec2b0c0d7cc77b0126b2c1549e7cc0e9 Mon Sep 17 00:00:00 2001
From: Eric Eastwood <erice@element.io>
Date: Mon, 15 Aug 2022 20:49:37 -0500
Subject: [PATCH 2/3] Add changelog

---
 changelog.d/13533.misc | 1 +
 1 file changed, 1 insertion(+)
 create mode 100644 changelog.d/13533.misc

diff --git a/changelog.d/13533.misc b/changelog.d/13533.misc
new file mode 100644
index 000000000000..ab4b18887ae2
--- /dev/null
+++ b/changelog.d/13533.misc
@@ -0,0 +1 @@
+Track HTTP response times over 10 seconds from `/messages` (`synapse_room_message_list_rest_servlet_response_time_seconds`).

From f106a645496cdaff35e0619ebb3374354c28afcc Mon Sep 17 00:00:00 2001
From: Eric Eastwood <erice@element.io>
Date: Tue, 16 Aug 2022 15:02:21 -0500
Subject: [PATCH 3/3] Explain that this is on top of the existing response time
 metric

See https://github.com/matrix-org/synapse/pull/13533#discussion_r946484395
---
 synapse/rest/client/room.py | 5 +++++
 1 file changed, 5 insertions(+)

diff --git a/synapse/rest/client/room.py b/synapse/rest/client/room.py
index 2027b950f07e..d29417fafc63 100644
--- a/synapse/rest/client/room.py
+++ b/synapse/rest/client/room.py
@@ -62,6 +62,11 @@
 
 logger = logging.getLogger(__name__)
 
+# This is an extra metric on top of `synapse_http_server_response_time_seconds`
+# which times the same sort of thing but this one allows us to see values
+# greater than 10s. We use a separate dedicated histogram with its own buckets
+# so that we don't increase the cardinality of the general one because it's
+# multiplied across hundreds of servlets.
 messsages_response_timer = Histogram(
     "synapse_room_message_list_rest_servlet_response_time_seconds",
     "sec",