Skip to content
This repository has been archived by the owner on Apr 26, 2024. It is now read-only.

Fix MAU reaping where reserved users are specified. #6168

Merged
merged 7 commits into from
Oct 11, 2019
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions changelog.d/6168.bugfix
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
Fix monthly active user reaping where reserved users are specified.
6 changes: 3 additions & 3 deletions synapse/app/homeserver.py
Original file line number Diff line number Diff line change
Expand Up @@ -605,13 +605,13 @@ def reap_monthly_active_users():
@defer.inlineCallbacks
def generate_monthly_active_users():
current_mau_count = 0
reserved_count = 0
reserved_users = ()
store = hs.get_datastore()
if hs.config.limit_usage_by_mau or hs.config.mau_stats_only:
current_mau_count = yield store.get_monthly_active_count()
reserved_count = yield store.get_registered_reserved_users_count()
reserved_users = yield store.get_registered_reserved_users()
current_mau_gauge.set(float(current_mau_count))
registered_reserved_users_mau_gauge.set(float(reserved_count))
registered_reserved_users_mau_gauge.set(float(len(reserved_users)))
max_mau_gauge.set(float(hs.config.max_mau_value))

def start_generate_monthly_active_users():
Expand Down
101 changes: 62 additions & 39 deletions synapse/storage/monthly_active_users.py
Original file line number Diff line number Diff line change
Expand Up @@ -32,7 +32,6 @@ def __init__(self, dbconn, hs):
super(MonthlyActiveUsersStore, self).__init__(None, hs)
self._clock = hs.get_clock()
self.hs = hs
self.reserved_users = ()
# Do not add more reserved users than the total allowable number
self._new_transaction(
dbconn,
Expand All @@ -51,7 +50,6 @@ def _initialise_reserved_users(self, txn, threepids):
txn (cursor):
threepids (list[dict]): List of threepid dicts to reserve
"""
reserved_user_list = []

for tp in threepids:
user_id = self.get_user_id_by_threepid_txn(txn, tp["medium"], tp["address"])
Expand All @@ -60,10 +58,8 @@ def _initialise_reserved_users(self, txn, threepids):
is_support = self.is_support_user_txn(txn, user_id)
if not is_support:
self.upsert_monthly_active_user_txn(txn, user_id)
reserved_user_list.append(user_id)
else:
logger.warning("mau limit reserved threepid %s not found in db" % tp)
self.reserved_users = tuple(reserved_user_list)

@defer.inlineCallbacks
def reap_monthly_active_users(self):
Expand All @@ -74,29 +70,31 @@ def reap_monthly_active_users(self):
Deferred[]
"""

def _reap_users(txn):
# Purge stale users
def _reap_users(txn, reserved_users):
"""
Args:
reserved_users (tuple): reserved users to preserve
"""

thirty_days_ago = int(self._clock.time_msec()) - (1000 * 60 * 60 * 24 * 30)
query_args = [thirty_days_ago]
base_sql = "DELETE FROM monthly_active_users WHERE timestamp < ?"

# Need if/else since 'AND user_id NOT IN ({})' fails on Postgres
# when len(reserved_users) == 0. Works fine on sqlite.
if len(self.reserved_users) > 0:
if len(reserved_users) > 0:
# questionmarks is a hack to overcome sqlite not supporting
# tuples in 'WHERE IN %s'
questionmarks = "?" * len(self.reserved_users)
question_marks = ",".join("?" * len(reserved_users))

query_args.extend(self.reserved_users)
sql = base_sql + """ AND user_id NOT IN ({})""".format(
",".join(questionmarks)
)
query_args.extend(reserved_users)
sql = base_sql + " AND user_id NOT IN ({})".format(question_marks)
else:
sql = base_sql

txn.execute(sql, query_args)

max_mau_value = self.hs.config.max_mau_value
if self.hs.config.limit_usage_by_mau:
# If MAU user count still exceeds the MAU threshold, then delete on
# a least recently active basis.
Expand All @@ -106,31 +104,52 @@ def _reap_users(txn):
# While Postgres does not require 'LIMIT', but also does not support
# negative LIMIT values. So there is no way to write it that both can
# support
safe_guard = self.hs.config.max_mau_value - len(self.reserved_users)
# Must be greater than zero for postgres
safe_guard = safe_guard if safe_guard > 0 else 0
query_args = [safe_guard]

base_sql = """
DELETE FROM monthly_active_users
WHERE user_id NOT IN (
SELECT user_id FROM monthly_active_users
ORDER BY timestamp DESC
LIMIT ?
if len(reserved_users) == 0:
sql = """
DELETE FROM monthly_active_users
WHERE user_id NOT IN (
SELECT user_id FROM monthly_active_users
ORDER BY timestamp DESC
LIMIT ?
)
"""
"""
txn.execute(sql, (max_mau_value,))
# Need if/else since 'AND user_id NOT IN ({})' fails on Postgres
# when len(reserved_users) == 0. Works fine on sqlite.
if len(self.reserved_users) > 0:
query_args.extend(self.reserved_users)
sql = base_sql + """ AND user_id NOT IN ({})""".format(
",".join(questionmarks)
)
else:
sql = base_sql
txn.execute(sql, query_args)
# Must be >= 0 for postgres
num_of_non_reserved_users_to_remove = max(
max_mau_value - len(reserved_users), 0
)

# It is important to filter reserved users twice to guard
# against the case where the reserved user is present in the
# SELECT, meaning that a legitmate mau is deleted.
sql = """
DELETE FROM monthly_active_users
WHERE user_id NOT IN (
SELECT user_id FROM monthly_active_users
WHERE user_id NOT IN ({})
ORDER BY timestamp DESC
LIMIT ?
)
AND user_id NOT IN ({})
""".format(
question_marks, question_marks
)

query_args = [
*reserved_users,
num_of_non_reserved_users_to_remove,
*reserved_users,
]

yield self.runInteraction("reap_monthly_active_users", _reap_users)
txn.execute(sql, query_args)

reserved_users = yield self.get_registered_reserved_users()
yield self.runInteraction(
"reap_monthly_active_users", _reap_users, reserved_users
)
# It seems poor to invalidate the whole cache, Postgres supports
# 'Returning' which would allow me to invalidate only the
# specific users, but sqlite has no way to do this and instead
Expand Down Expand Up @@ -159,21 +178,25 @@ def _count_users(txn):
return self.runInteraction("count_users", _count_users)

@defer.inlineCallbacks
def get_registered_reserved_users_count(self):
"""Of the reserved threepids defined in config, how many are associated
def get_registered_reserved_users(self):
"""Of the reserved threepids defined in config, which are associated
with registered users?

Returns:
Defered[int]: Number of real reserved users
Defered[list]: Real reserved users
"""
count = 0
for tp in self.hs.config.mau_limits_reserved_threepids:
users = []

for tp in self.hs.config.mau_limits_reserved_threepids[
: self.hs.config.max_mau_value
]:
user_id = yield self.hs.get_datastore().get_user_id_by_threepid(
tp["medium"], tp["address"]
)
if user_id:
count = count + 1
return count
users.append(user_id)

return users

@defer.inlineCallbacks
def upsert_monthly_active_user(self, user_id):
Expand Down
58 changes: 49 additions & 9 deletions tests/storage/test_monthly_active_users.py
Original file line number Diff line number Diff line change
Expand Up @@ -50,6 +50,7 @@ def test_initialise_reserved_users(self):
{"medium": "email", "address": user2_email},
{"medium": "email", "address": user3_email},
]
self.hs.config.mau_limits_reserved_threepids = threepids
# -1 because user3 is a support user and does not count
user_num = len(threepids) - 1

Expand Down Expand Up @@ -84,6 +85,7 @@ def test_initialise_reserved_users(self):
self.hs.config.max_mau_value = 0

self.reactor.advance(FORTY_DAYS)
self.hs.config.max_mau_value = 5

self.store.reap_monthly_active_users()
self.pump()
Expand Down Expand Up @@ -147,9 +149,7 @@ def test_reap_monthly_active_users(self):
self.store.reap_monthly_active_users()
self.pump()
count = self.store.get_monthly_active_count()
self.assertEquals(
self.get_success(count), initial_users - self.hs.config.max_mau_value
)
self.assertEquals(self.get_success(count), self.hs.config.max_mau_value)

self.reactor.advance(FORTY_DAYS)
self.store.reap_monthly_active_users()
Expand All @@ -158,6 +158,44 @@ def test_reap_monthly_active_users(self):
count = self.store.get_monthly_active_count()
self.assertEquals(self.get_success(count), 0)

def test_reap_monthly_active_users_reserved_users(self):
""" Tests that reaping correctly handles reaping where reserved users are
present"""

self.hs.config.max_mau_value = 5
initial_users = 5
reserved_user_number = initial_users - 1
threepids = []
for i in range(initial_users):
user = "@user%d:server" % i
email = "user%[email protected]" % i
self.get_success(self.store.upsert_monthly_active_user(user))
threepids.append({"medium": "email", "address": email})
# Need to ensure that the most recent entries in the
# monthly_active_users table are reserved
now = int(self.hs.get_clock().time_msec())
if i != 0:
self.get_success(
self.store.register_user(user_id=user, password_hash=None)
)
self.get_success(
self.store.user_add_threepid(user, "email", email, now, now)
)

self.hs.config.mau_limits_reserved_threepids = threepids
self.store.runInteraction(
"initialise", self.store._initialise_reserved_users, threepids
)
count = self.store.get_monthly_active_count()
self.assertTrue(self.get_success(count), initial_users)

users = self.store.get_registered_reserved_users()
self.assertEquals(len(self.get_success(users)), reserved_user_number)

self.get_success(self.store.reap_monthly_active_users())
count = self.store.get_monthly_active_count()
self.assertEquals(self.get_success(count), self.hs.config.max_mau_value)

def test_populate_monthly_users_is_guest(self):
# Test that guest users are not added to mau list
user_id = "@user_id:host"
Expand Down Expand Up @@ -192,12 +230,13 @@ def test_populate_monthly_users_should_not_update(self):

def test_get_reserved_real_user_account(self):
# Test no reserved users, or reserved threepids
count = self.store.get_registered_reserved_users_count()
self.assertEquals(self.get_success(count), 0)
users = self.get_success(self.store.get_registered_reserved_users())
self.assertEquals(len(users), 0)
# Test reserved users but no registered users

user1 = "@user1:example.com"
user2 = "@user2:example.com"

user1_email = "[email protected]"
user2_email = "[email protected]"
threepids = [
Expand All @@ -210,8 +249,8 @@ def test_get_reserved_real_user_account(self):
)

self.pump()
count = self.store.get_registered_reserved_users_count()
self.assertEquals(self.get_success(count), 0)
users = self.get_success(self.store.get_registered_reserved_users())
self.assertEquals(len(users), 0)

# Test reserved registed users
self.store.register_user(user_id=user1, password_hash=None)
Expand All @@ -221,8 +260,9 @@ def test_get_reserved_real_user_account(self):
now = int(self.hs.get_clock().time_msec())
self.store.user_add_threepid(user1, "email", user1_email, now, now)
self.store.user_add_threepid(user2, "email", user2_email, now, now)
count = self.store.get_registered_reserved_users_count()
self.assertEquals(self.get_success(count), len(threepids))

users = self.get_success(self.store.get_registered_reserved_users())
self.assertEquals(len(users), len(threepids))

def test_support_user_not_add_to_mau_limits(self):
support_user_id = "@support:test"
Expand Down