This repository has been archived by the owner on Apr 26, 2024. It is now read-only.
-
-
Notifications
You must be signed in to change notification settings - Fork 2.1k
Fix MAU reaping where reserved users are specified. #6168
Merged
Merged
Changes from 6 commits
Commits
Show all changes
7 commits
Select commit
Hold shift + click to select a range
985953f
fix misshandling mau reaping where reserved users are present
neilisfragile 0d146cb
fix logic and remove unused code
neilisfragile 760688e
towncrier
neilisfragile 4b0cf83
black
neilisfragile 2ffa761
respond to review comments
neilisfragile df2833f
respond to review comments
neilisfragile f9e74eb
respond to review comments
neilisfragile File filter
Filter by extension
Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1 @@ | ||
Fix monthly active user reaping where reserved users are specified. |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change | ||||||
---|---|---|---|---|---|---|---|---|
|
@@ -32,7 +32,6 @@ def __init__(self, dbconn, hs): | |||||||
super(MonthlyActiveUsersStore, self).__init__(None, hs) | ||||||||
self._clock = hs.get_clock() | ||||||||
self.hs = hs | ||||||||
self.reserved_users = () | ||||||||
# Do not add more reserved users than the total allowable number | ||||||||
self._new_transaction( | ||||||||
dbconn, | ||||||||
|
@@ -51,7 +50,6 @@ def _initialise_reserved_users(self, txn, threepids): | |||||||
txn (cursor): | ||||||||
threepids (list[dict]): List of threepid dicts to reserve | ||||||||
""" | ||||||||
reserved_user_list = [] | ||||||||
|
||||||||
for tp in threepids: | ||||||||
user_id = self.get_user_id_by_threepid_txn(txn, tp["medium"], tp["address"]) | ||||||||
|
@@ -60,10 +58,8 @@ def _initialise_reserved_users(self, txn, threepids): | |||||||
is_support = self.is_support_user_txn(txn, user_id) | ||||||||
if not is_support: | ||||||||
self.upsert_monthly_active_user_txn(txn, user_id) | ||||||||
reserved_user_list.append(user_id) | ||||||||
else: | ||||||||
logger.warning("mau limit reserved threepid %s not found in db" % tp) | ||||||||
self.reserved_users = tuple(reserved_user_list) | ||||||||
|
||||||||
@defer.inlineCallbacks | ||||||||
def reap_monthly_active_users(self): | ||||||||
|
@@ -74,29 +70,31 @@ def reap_monthly_active_users(self): | |||||||
Deferred[] | ||||||||
""" | ||||||||
|
||||||||
def _reap_users(txn): | ||||||||
# Purge stale users | ||||||||
def _reap_users(txn, reserved_users): | ||||||||
""" | ||||||||
Args: | ||||||||
reserved_users (tuple): reserved users to preserve | ||||||||
""" | ||||||||
|
||||||||
thirty_days_ago = int(self._clock.time_msec()) - (1000 * 60 * 60 * 24 * 30) | ||||||||
query_args = [thirty_days_ago] | ||||||||
base_sql = "DELETE FROM monthly_active_users WHERE timestamp < ?" | ||||||||
|
||||||||
# Need if/else since 'AND user_id NOT IN ({})' fails on Postgres | ||||||||
# when len(reserved_users) == 0. Works fine on sqlite. | ||||||||
if len(self.reserved_users) > 0: | ||||||||
if len(reserved_users) > 0: | ||||||||
# questionmarks is a hack to overcome sqlite not supporting | ||||||||
# tuples in 'WHERE IN %s' | ||||||||
questionmarks = "?" * len(self.reserved_users) | ||||||||
question_marks = ",".join("?" * len(reserved_users)) | ||||||||
|
||||||||
query_args.extend(self.reserved_users) | ||||||||
sql = base_sql + """ AND user_id NOT IN ({})""".format( | ||||||||
",".join(questionmarks) | ||||||||
) | ||||||||
query_args.extend(reserved_users) | ||||||||
sql = base_sql + """ AND user_id NOT IN ({})""".format(question_marks) | ||||||||
else: | ||||||||
sql = base_sql | ||||||||
|
||||||||
txn.execute(sql, query_args) | ||||||||
|
||||||||
max_mau_value = self.hs.config.max_mau_value | ||||||||
if self.hs.config.limit_usage_by_mau: | ||||||||
# If MAU user count still exceeds the MAU threshold, then delete on | ||||||||
# a least recently active basis. | ||||||||
|
@@ -106,31 +104,51 @@ def _reap_users(txn): | |||||||
# While Postgres does not require 'LIMIT', but also does not support | ||||||||
# negative LIMIT values. So there is no way to write it that both can | ||||||||
# support | ||||||||
safe_guard = self.hs.config.max_mau_value - len(self.reserved_users) | ||||||||
# Must be greater than zero for postgres | ||||||||
safe_guard = safe_guard if safe_guard > 0 else 0 | ||||||||
query_args = [safe_guard] | ||||||||
|
||||||||
base_sql = """ | ||||||||
DELETE FROM monthly_active_users | ||||||||
WHERE user_id NOT IN ( | ||||||||
SELECT user_id FROM monthly_active_users | ||||||||
ORDER BY timestamp DESC | ||||||||
LIMIT ? | ||||||||
if len(reserved_users) == 0: | ||||||||
sql = """ | ||||||||
DELETE FROM monthly_active_users | ||||||||
WHERE user_id NOT IN ( | ||||||||
SELECT user_id FROM monthly_active_users | ||||||||
ORDER BY timestamp DESC | ||||||||
LIMIT ? | ||||||||
) | ||||||||
""" | ||||||||
""" | ||||||||
txn.execute(sql, (max_mau_value,)) | ||||||||
# Need if/else since 'AND user_id NOT IN ({})' fails on Postgres | ||||||||
# when len(reserved_users) == 0. Works fine on sqlite. | ||||||||
if len(self.reserved_users) > 0: | ||||||||
query_args.extend(self.reserved_users) | ||||||||
sql = base_sql + """ AND user_id NOT IN ({})""".format( | ||||||||
",".join(questionmarks) | ||||||||
) | ||||||||
else: | ||||||||
sql = base_sql | ||||||||
txn.execute(sql, query_args) | ||||||||
# Must be >= 0 for postgres | ||||||||
num_of_non_reserved_users_to_remove = max( | ||||||||
max_mau_value - len(reserved_users), 0 | ||||||||
) | ||||||||
|
||||||||
# It is important to filter reserved users twice to guard | ||||||||
# against the case where the reserved user is present in the | ||||||||
# SELECT, meaning that a legitmate mau is deleted. | ||||||||
sql = """ | ||||||||
DELETE FROM monthly_active_users | ||||||||
WHERE user_id NOT IN ( | ||||||||
SELECT user_id FROM monthly_active_users | ||||||||
WHERE user_id NOT IN ({}) | ||||||||
ORDER BY timestamp DESC | ||||||||
LIMIT ? | ||||||||
) | ||||||||
AND user_id NOT IN ({})""".format( | ||||||||
erikjohnston marked this conversation as resolved.
Show resolved
Hide resolved
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
Suggested change
|
||||||||
question_marks, question_marks | ||||||||
) | ||||||||
|
||||||||
query_args = [ | ||||||||
*reserved_users, | ||||||||
num_of_non_reserved_users_to_remove, | ||||||||
*reserved_users, | ||||||||
] | ||||||||
|
||||||||
yield self.runInteraction("reap_monthly_active_users", _reap_users) | ||||||||
txn.execute(sql, query_args) | ||||||||
|
||||||||
reserved_users = yield self.get_registered_reserved_users() | ||||||||
yield self.runInteraction( | ||||||||
"reap_monthly_active_users", _reap_users, reserved_users | ||||||||
) | ||||||||
# It seems poor to invalidate the whole cache, Postgres supports | ||||||||
# 'Returning' which would allow me to invalidate only the | ||||||||
# specific users, but sqlite has no way to do this and instead | ||||||||
|
@@ -159,21 +177,25 @@ def _count_users(txn): | |||||||
return self.runInteraction("count_users", _count_users) | ||||||||
|
||||||||
@defer.inlineCallbacks | ||||||||
def get_registered_reserved_users_count(self): | ||||||||
"""Of the reserved threepids defined in config, how many are associated | ||||||||
def get_registered_reserved_users(self): | ||||||||
"""Of the reserved threepids defined in config, which are associated | ||||||||
with registered users? | ||||||||
|
||||||||
Returns: | ||||||||
Defered[int]: Number of real reserved users | ||||||||
Defered[tuple]: Real reserved users | ||||||||
""" | ||||||||
count = 0 | ||||||||
for tp in self.hs.config.mau_limits_reserved_threepids: | ||||||||
users = () | ||||||||
|
||||||||
for tp in self.hs.config.mau_limits_reserved_threepids[ | ||||||||
: self.hs.config.max_mau_value | ||||||||
]: | ||||||||
user_id = yield self.hs.get_datastore().get_user_id_by_threepid( | ||||||||
tp["medium"], tp["address"] | ||||||||
) | ||||||||
if user_id: | ||||||||
count = count + 1 | ||||||||
return count | ||||||||
users = users + (user_id,) | ||||||||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. It'd probably be easier to return a list rather than using tuples, really. |
||||||||
|
||||||||
return users | ||||||||
|
||||||||
@defer.inlineCallbacks | ||||||||
def upsert_monthly_active_user(self, user_id): | ||||||||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -50,6 +50,7 @@ def test_initialise_reserved_users(self): | |
{"medium": "email", "address": user2_email}, | ||
{"medium": "email", "address": user3_email}, | ||
] | ||
self.hs.config.mau_limits_reserved_threepids = threepids | ||
# -1 because user3 is a support user and does not count | ||
user_num = len(threepids) - 1 | ||
|
||
|
@@ -84,6 +85,7 @@ def test_initialise_reserved_users(self): | |
self.hs.config.max_mau_value = 0 | ||
|
||
self.reactor.advance(FORTY_DAYS) | ||
self.hs.config.max_mau_value = 5 | ||
|
||
self.store.reap_monthly_active_users() | ||
self.pump() | ||
|
@@ -147,9 +149,7 @@ def test_reap_monthly_active_users(self): | |
self.store.reap_monthly_active_users() | ||
self.pump() | ||
count = self.store.get_monthly_active_count() | ||
self.assertEquals( | ||
self.get_success(count), initial_users - self.hs.config.max_mau_value | ||
) | ||
self.assertEquals(self.get_success(count), self.hs.config.max_mau_value) | ||
|
||
self.reactor.advance(FORTY_DAYS) | ||
self.store.reap_monthly_active_users() | ||
|
@@ -158,6 +158,44 @@ def test_reap_monthly_active_users(self): | |
count = self.store.get_monthly_active_count() | ||
self.assertEquals(self.get_success(count), 0) | ||
|
||
def test_reap_monthly_active_users_reserved_users(self): | ||
""" Tests that reaping correctly handles reaping where reserved users are | ||
present""" | ||
|
||
self.hs.config.max_mau_value = 5 | ||
initial_users = 5 | ||
reserved_user_number = initial_users - 1 | ||
threepids = [] | ||
for i in range(initial_users): | ||
user = "@user%d:server" % i | ||
email = "user%[email protected]" % i | ||
self.get_success(self.store.upsert_monthly_active_user(user)) | ||
threepids.append({"medium": "email", "address": email}) | ||
# Need to ensure that the most recent entries in the | ||
# monthly_active_users table are reserved | ||
now = int(self.hs.get_clock().time_msec()) | ||
if i != 0: | ||
self.get_success( | ||
self.store.register_user(user_id=user, password_hash=None) | ||
) | ||
self.get_success( | ||
self.store.user_add_threepid(user, "email", email, now, now) | ||
) | ||
|
||
self.hs.config.mau_limits_reserved_threepids = threepids | ||
self.store.runInteraction( | ||
"initialise", self.store._initialise_reserved_users, threepids | ||
) | ||
count = self.store.get_monthly_active_count() | ||
self.assertTrue(self.get_success(count), initial_users) | ||
|
||
users = self.store.get_registered_reserved_users() | ||
self.assertEquals(len(self.get_success(users)), reserved_user_number) | ||
|
||
self.get_success(self.store.reap_monthly_active_users()) | ||
count = self.store.get_monthly_active_count() | ||
self.assertEquals(self.get_success(count), self.hs.config.max_mau_value) | ||
|
||
def test_populate_monthly_users_is_guest(self): | ||
# Test that guest users are not added to mau list | ||
user_id = "@user_id:host" | ||
|
@@ -192,12 +230,13 @@ def test_populate_monthly_users_should_not_update(self): | |
|
||
def test_get_reserved_real_user_account(self): | ||
# Test no reserved users, or reserved threepids | ||
count = self.store.get_registered_reserved_users_count() | ||
self.assertEquals(self.get_success(count), 0) | ||
users = self.get_success(self.store.get_registered_reserved_users()) | ||
self.assertEquals(len(users), 0) | ||
# Test reserved users but no registered users | ||
|
||
user1 = "@user1:example.com" | ||
user2 = "@user2:example.com" | ||
|
||
user1_email = "[email protected]" | ||
user2_email = "[email protected]" | ||
threepids = [ | ||
|
@@ -210,8 +249,8 @@ def test_get_reserved_real_user_account(self): | |
) | ||
|
||
self.pump() | ||
count = self.store.get_registered_reserved_users_count() | ||
self.assertEquals(self.get_success(count), 0) | ||
users = self.get_success(self.store.get_registered_reserved_users()) | ||
self.assertEquals(len(users), 0) | ||
|
||
# Test reserved registed users | ||
self.store.register_user(user_id=user1, password_hash=None) | ||
|
@@ -221,8 +260,9 @@ def test_get_reserved_real_user_account(self): | |
now = int(self.hs.get_clock().time_msec()) | ||
self.store.user_add_threepid(user1, "email", user1_email, now, now) | ||
self.store.user_add_threepid(user2, "email", user2_email, now, now) | ||
count = self.store.get_registered_reserved_users_count() | ||
self.assertEquals(self.get_success(count), len(threepids)) | ||
|
||
users = self.get_success(self.store.get_registered_reserved_users()) | ||
self.assertEquals(len(users), len(threepids)) | ||
|
||
def test_support_user_not_add_to_mau_limits(self): | ||
support_user_id = "@support:test" | ||
|
Add this suggestion to a batch that can be applied as a single commit.
This suggestion is invalid because no changes were made to the code.
Suggestions cannot be applied while the pull request is closed.
Suggestions cannot be applied while viewing a subset of changes.
Only one suggestion per line can be applied in a batch.
Add this suggestion to a batch that can be applied as a single commit.
Applying suggestions on deleted lines is not supported.
You must change the existing code in this line in order to create a valid suggestion.
Outdated suggestions cannot be applied.
This suggestion has been applied or marked resolved.
Suggestions cannot be applied from pending reviews.
Suggestions cannot be applied on multi-line comments.
Suggestions cannot be applied while the pull request is queued to merge.
Suggestion cannot be applied right now. Please check back later.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.