This repository has been archived by the owner on Apr 26, 2024. It is now read-only.
-
-
Notifications
You must be signed in to change notification settings - Fork 2.1k
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Add admin endpoint to query room sizes (#15482)
- Loading branch information
1 parent
710502c
commit 9900f7c
Showing
6 changed files
with
195 additions
and
1 deletion.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1 @@ | ||
Add admin endpoint to query the largest rooms by disk space used in the database. |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,113 @@ | ||
# Copyright 2023 The Matrix.org Foundation C.I.C. | ||
# | ||
# Licensed under the Apache License, Version 2.0 (the "License"); | ||
# you may not use this file except in compliance with the License. | ||
# You may obtain a copy of the License at | ||
# | ||
# http://www.apache.org/licenses/LICENSE-2.0 | ||
# | ||
# Unless required by applicable law or agreed to in writing, software | ||
# distributed under the License is distributed on an "AS IS" BASIS, | ||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | ||
# See the License for the specific language governing permissions and | ||
# limitations under the License. | ||
|
||
import logging | ||
from collections import Counter | ||
from typing import TYPE_CHECKING, Collection, List, Tuple | ||
|
||
from synapse.api.errors import SynapseError | ||
from synapse.storage.database import LoggingTransaction | ||
from synapse.storage.databases import Databases | ||
from synapse.storage.engines import PostgresEngine | ||
|
||
if TYPE_CHECKING: | ||
from synapse.server import HomeServer | ||
|
||
logger = logging.getLogger(__name__) | ||
|
||
|
||
class StatsController: | ||
"""High level interface for getting statistics.""" | ||
|
||
def __init__(self, hs: "HomeServer", stores: Databases): | ||
self.stores = stores | ||
|
||
async def get_room_db_size_estimate(self) -> List[Tuple[str, int]]: | ||
"""Get an estimate of the largest rooms and how much database space they | ||
use, in bytes. | ||
Only works against PostgreSQL. | ||
Note: this uses the postgres statistics so is a very rough estimate. | ||
""" | ||
|
||
# Note: We look at both tables on the main and state databases. | ||
if not isinstance(self.stores.main.database_engine, PostgresEngine): | ||
raise SynapseError(400, "Endpoint requires using PostgreSQL") | ||
|
||
if not isinstance(self.stores.state.database_engine, PostgresEngine): | ||
raise SynapseError(400, "Endpoint requires using PostgreSQL") | ||
|
||
# For each "large" table, we go through and get the largest rooms | ||
# and an estimate of how much space they take. We can then sum the | ||
# results and return the top 10. | ||
# | ||
# This isn't the most accurate, but given all of these are estimates | ||
# anyway its good enough. | ||
room_estimates: Counter[str] = Counter() | ||
|
||
# Return size of the table on disk, including indexes and TOAST. | ||
table_sql = """ | ||
SELECT pg_total_relation_size(?) | ||
""" | ||
|
||
# Get an estimate for the largest rooms and their frequency. | ||
# | ||
# Note: the cast here is a hack to cast from `anyarray` to an actual | ||
# type. This ensures that psycopg2 passes us a back a a Python list. | ||
column_sql = """ | ||
SELECT | ||
most_common_vals::TEXT::TEXT[], most_common_freqs::TEXT::NUMERIC[] | ||
FROM pg_stats | ||
WHERE tablename = ? and attname = 'room_id' | ||
""" | ||
|
||
def get_room_db_size_estimate_txn( | ||
txn: LoggingTransaction, | ||
tables: Collection[str], | ||
) -> None: | ||
for table in tables: | ||
txn.execute(table_sql, (table,)) | ||
row = txn.fetchone() | ||
assert row is not None | ||
(table_size,) = row | ||
|
||
txn.execute(column_sql, (table,)) | ||
row = txn.fetchone() | ||
assert row is not None | ||
vals, freqs = row | ||
|
||
for room_id, freq in zip(vals, freqs): | ||
room_estimates[room_id] += int(freq * table_size) | ||
|
||
await self.stores.main.db_pool.runInteraction( | ||
"get_room_db_size_estimate_main", | ||
get_room_db_size_estimate_txn, | ||
( | ||
"event_json", | ||
"events", | ||
"event_search", | ||
"event_edges", | ||
"event_push_actions", | ||
"stream_ordering_to_exterm", | ||
), | ||
) | ||
|
||
await self.stores.state.db_pool.runInteraction( | ||
"get_room_db_size_estimate_state", | ||
get_room_db_size_estimate_txn, | ||
("state_groups_state",), | ||
) | ||
|
||
return room_estimates.most_common(10) |