From 7daa8a78c5183a46ae462c0718939ca52fa0130f Mon Sep 17 00:00:00 2001 From: Neil Johnson Date: Mon, 6 Aug 2018 22:55:05 +0100 Subject: [PATCH 1/7] load mau limit threepids --- synapse/config/server.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/synapse/config/server.py b/synapse/config/server.py index 8fd23197590c..cacac253ab0f 100644 --- a/synapse/config/server.py +++ b/synapse/config/server.py @@ -74,6 +74,9 @@ def read_config(self, config): self.max_mau_value = config.get( "max_mau_value", 0, ) + self.mau_limits_reserved_threepids = config.get( + "mau_limit_reserved_threepid", [] + ) # FIXME: federation_domain_whitelist needs sytests self.federation_domain_whitelist = None From fbe255f9a4c46fe82a394b9efab6ad8811ae71e0 Mon Sep 17 00:00:00 2001 From: Neil Johnson Date: Mon, 6 Aug 2018 23:24:54 +0100 Subject: [PATCH 2/7] add default mau_limits_reserved_threepids --- tests/utils.py | 1 + 1 file changed, 1 insertion(+) diff --git a/tests/utils.py b/tests/utils.py index ec40428e7438..baeed5d60047 100644 --- a/tests/utils.py +++ b/tests/utils.py @@ -74,6 +74,7 @@ def setup_test_homeserver(name="test", datastore=None, config=None, reactor=None config.media_storage_providers = [] config.auto_join_rooms = [] config.limit_usage_by_mau = False + config.mau_limits_reserved_threepids = [] # disable user directory updates, because they get done in the # background, which upsets the test runner. From a74b25faaaf47b4696cb30207ef2eae6a3a5d8c7 Mon Sep 17 00:00:00 2001 From: Neil Johnson Date: Mon, 6 Aug 2018 23:25:25 +0100 Subject: [PATCH 3/7] WIP building out mau reserved users --- synapse/storage/monthly_active_users.py | 16 +++++++++++++++- 1 file changed, 15 insertions(+), 1 deletion(-) diff --git a/synapse/storage/monthly_active_users.py b/synapse/storage/monthly_active_users.py index abe1e6bb99ad..6a37d6fc22af 100644 --- a/synapse/storage/monthly_active_users.py +++ b/synapse/storage/monthly_active_users.py @@ -19,16 +19,30 @@ from ._base import SQLBaseStore + # Number of msec of granularity to store the monthly_active_user timestamp # This means it is not necessary to update the table on every request LAST_SEEN_GRANULARITY = 60 * 60 * 1000 class MonthlyActiveUsersStore(SQLBaseStore): + @defer.inlineCallbacks def __init__(self, dbconn, hs): super(MonthlyActiveUsersStore, self).__init__(None, hs) self._clock = hs.get_clock() self.hs = hs + threepids = self.hs.config.mau_limits_reserved_threepids + self.reserved_user_ids = set() + for tp in threepids: + user_id = yield hs.get_datastore().get_user_id_by_threepid( + tp["medium"], tp["address"] + ) + if user_id: + self.reserved_user_ids.add(user_id) + else: + logger.warning( + "mau limit reserved threepid %s not found in db" % tp + ) def reap_monthly_active_users(self): """ @@ -78,7 +92,7 @@ def _reap_users(txn): @cached(num_args=0) def get_monthly_active_count(self): - """Generates current count of monthly active users.abs + """Generates current count of monthly active users Returns: Defered[int]: Number of current monthly active users From e8eba2b4e3a99d35f08c96205ebb18211adddcb9 Mon Sep 17 00:00:00 2001 From: Neil Johnson Date: Tue, 7 Aug 2018 17:49:43 +0100 Subject: [PATCH 4/7] implement reserved users for mau limits --- synapse/app/homeserver.py | 6 +++ synapse/config/server.py | 2 +- synapse/storage/monthly_active_users.py | 45 +++++++++++++---- tests/storage/test_monthly_active_users.py | 59 +++++++++++++++++++++- 4 files changed, 99 insertions(+), 13 deletions(-) diff --git a/synapse/app/homeserver.py b/synapse/app/homeserver.py index 3a67db8b30db..a4a65e728660 100755 --- a/synapse/app/homeserver.py +++ b/synapse/app/homeserver.py @@ -518,6 +518,8 @@ def generate_user_daily_visit_stats(): # If you increase the loop period, the accuracy of user_daily_visits # table will decrease clock.looping_call(generate_user_daily_visit_stats, 5 * 60 * 1000) + + # monthly active user limiting functionality clock.looping_call( hs.get_datastore().reap_monthly_active_users, 1000 * 60 * 60 ) @@ -530,9 +532,13 @@ def generate_monthly_active_users(): current_mau_gauge.set(float(count)) max_mau_value_gauge.set(float(hs.config.max_mau_value)) + hs.get_datastore().initialise_reserved_users( + hs.config.mau_limits_reserved_threepids + ) generate_monthly_active_users() if hs.config.limit_usage_by_mau: clock.looping_call(generate_monthly_active_users, 5 * 60 * 1000) + # End of monthly active user settings if hs.config.report_stats: logger.info("Scheduling stats reporting for 3 hour intervals") diff --git a/synapse/config/server.py b/synapse/config/server.py index cacac253ab0f..114d7a981595 100644 --- a/synapse/config/server.py +++ b/synapse/config/server.py @@ -75,7 +75,7 @@ def read_config(self, config): "max_mau_value", 0, ) self.mau_limits_reserved_threepids = config.get( - "mau_limit_reserved_threepid", [] + "mau_limit_reserved_threepids", [] ) # FIXME: federation_domain_whitelist needs sytests diff --git a/synapse/storage/monthly_active_users.py b/synapse/storage/monthly_active_users.py index 6a37d6fc22af..168f564ed5d1 100644 --- a/synapse/storage/monthly_active_users.py +++ b/synapse/storage/monthly_active_users.py @@ -12,6 +12,7 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. +import logging from twisted.internet import defer @@ -19,6 +20,7 @@ from ._base import SQLBaseStore +logger = logging.getLogger(__name__) # Number of msec of granularity to store the monthly_active_user timestamp # This means it is not necessary to update the table on every request @@ -26,24 +28,31 @@ class MonthlyActiveUsersStore(SQLBaseStore): - @defer.inlineCallbacks def __init__(self, dbconn, hs): super(MonthlyActiveUsersStore, self).__init__(None, hs) self._clock = hs.get_clock() self.hs = hs - threepids = self.hs.config.mau_limits_reserved_threepids - self.reserved_user_ids = set() + self.reserved_users = () + + @defer.inlineCallbacks + def initialise_reserved_users(self, threepids): + # TODO Why can't I do this in init? + store = self.hs.get_datastore() + reserved_user_list = [] for tp in threepids: - user_id = yield hs.get_datastore().get_user_id_by_threepid( + user_id = yield store.get_user_id_by_threepid( tp["medium"], tp["address"] ) if user_id: - self.reserved_user_ids.add(user_id) + self.upsert_monthly_active_user(user_id) + reserved_user_list.append(user_id) else: logger.warning( "mau limit reserved threepid %s not found in db" % tp ) + self.reserved_users = tuple(reserved_user_list) + @defer.inlineCallbacks def reap_monthly_active_users(self): """ Cleans out monthly active user table to ensure that no stale @@ -58,8 +67,20 @@ def _reap_users(txn): int(self._clock.time_msec()) - (1000 * 60 * 60 * 24 * 30) ) # Purge stale users - sql = "DELETE FROM monthly_active_users WHERE timestamp < ?" - txn.execute(sql, (thirty_days_ago,)) + + # questionmarks is a hack to overcome sqlite not supporting + # tuples in 'WHERE IN %s' + questionmarks = '?' * len(self.reserved_users) + query_args = [thirty_days_ago] + query_args.extend(self.reserved_users) + + sql = """ + DELETE FROM monthly_active_users + WHERE timestamp < ? + AND user_id NOT IN ({}) + """.format(','.join(questionmarks)) + + txn.execute(sql, query_args) # If MAU user count still exceeds the MAU threshold, then delete on # a least recently active basis. @@ -69,6 +90,8 @@ def _reap_users(txn): # While Postgres does not require 'LIMIT', but also does not support # negative LIMIT values. So there is no way to write it that both can # support + query_args = [self.hs.config.max_mau_value] + query_args.extend(self.reserved_users) sql = """ DELETE FROM monthly_active_users WHERE user_id NOT IN ( @@ -76,8 +99,9 @@ def _reap_users(txn): ORDER BY timestamp DESC LIMIT ? ) - """ - txn.execute(sql, (self.hs.config.max_mau_value,)) + AND user_id NOT IN ({}) + """.format(','.join(questionmarks)) + txn.execute(sql, query_args) yield self.runInteraction("reap_monthly_active_users", _reap_users) # It seems poor to invalidate the whole cache, Postgres supports @@ -136,7 +160,7 @@ def _user_last_seen_monthly_active(self, user_id): Arguments: user_id (str): user to add/update Return: - int : timestamp since last seen, None if never seen + Deferred[int] : timestamp since last seen, None if never seen """ @@ -158,7 +182,6 @@ def populate_monthly_active_users(self, user_id): Args: user_id(str): the user_id to query """ - if self.hs.config.limit_usage_by_mau: last_seen_timestamp = yield self._user_last_seen_monthly_active(user_id) now = self.hs.get_clock().time_msec() diff --git a/tests/storage/test_monthly_active_users.py b/tests/storage/test_monthly_active_users.py index 0bfd24a7fb3d..cbd480cd4215 100644 --- a/tests/storage/test_monthly_active_users.py +++ b/tests/storage/test_monthly_active_users.py @@ -19,6 +19,8 @@ import tests.utils from tests.utils import setup_test_homeserver +FORTY_DAYS = 40 * 24 * 60 * 60 + class MonthlyActiveUsersTestCase(tests.unittest.TestCase): def __init__(self, *args, **kwargs): @@ -29,6 +31,56 @@ def setUp(self): self.hs = yield setup_test_homeserver() self.store = self.hs.get_datastore() + @defer.inlineCallbacks + def test_initialise_reserved_users(self): + + user1 = "@user1:server" + user1_email = "user1@matrix.org" + user2 = "@user2:server" + user2_email = "user2@matrix.org" + threepids = [ + {'medium': 'email', 'address': user1_email}, + {'medium': 'email', 'address': user2_email} + ] + user_num = len(threepids) + + yield self.store.register( + user_id=user1, + token="123", + password_hash=None) + + yield self.store.register( + user_id=user2, + token="456", + password_hash=None) + + now = int(self.hs.get_clock().time_msec()) + yield self.store.user_add_threepid(user1, "email", user1_email, now, now) + yield self.store.user_add_threepid(user2, "email", user2_email, now, now) + yield self.store.initialise_reserved_users(threepids) + + active_count = yield self.store.get_monthly_active_count() + + # Test total counts + self.assertEquals(active_count, user_num) + + # Test user is marked as active + + timestamp = yield self.store._user_last_seen_monthly_active(user1) + self.assertTrue(timestamp) + timestamp = yield self.store._user_last_seen_monthly_active(user2) + self.assertTrue(timestamp) + + # Test that users are never removed from the db. + self.hs.config.max_mau_value = 0 + + self.hs.get_clock().advance_time(FORTY_DAYS) + + yield self.store.reap_monthly_active_users() + + active_count = yield self.store.get_monthly_active_count() + self.assertEquals(active_count, user_num) + @defer.inlineCallbacks def test_can_insert_and_count_mau(self): count = yield self.store.get_monthly_active_count() @@ -63,4 +115,9 @@ def test_reap_monthly_active_users(self): self.assertTrue(count, initial_users) yield self.store.reap_monthly_active_users() count = yield self.store.get_monthly_active_count() - self.assertTrue(count, initial_users - self.hs.config.max_mau_value) + self.assertEquals(count, initial_users - self.hs.config.max_mau_value) + + self.hs.get_clock().advance_time(FORTY_DAYS) + yield self.store.reap_monthly_active_users() + count = yield self.store.get_monthly_active_count() + self.assertEquals(count, 0) From ef3589063adddd1eee89f136e6a54250cce414a1 Mon Sep 17 00:00:00 2001 From: Neil Johnson Date: Tue, 7 Aug 2018 17:59:27 +0100 Subject: [PATCH 5/7] prevent total number of reserved users being too large --- synapse/storage/monthly_active_users.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/synapse/storage/monthly_active_users.py b/synapse/storage/monthly_active_users.py index 168f564ed5d1..54de5a8686fa 100644 --- a/synapse/storage/monthly_active_users.py +++ b/synapse/storage/monthly_active_users.py @@ -39,7 +39,9 @@ def initialise_reserved_users(self, threepids): # TODO Why can't I do this in init? store = self.hs.get_datastore() reserved_user_list = [] - for tp in threepids: + + # Do not add more reserved users than the total allowable number + for tp in threepids[:self.hs.config.max_mau_value]: user_id = yield store.get_user_id_by_threepid( tp["medium"], tp["address"] ) From 7f3d897e7af7c5227fc27355ead082055a8c0ecf Mon Sep 17 00:00:00 2001 From: Neil Johnson Date: Wed, 8 Aug 2018 11:46:23 +0100 Subject: [PATCH 6/7] mock config.max_mau_value --- tests/utils.py | 1 + 1 file changed, 1 insertion(+) diff --git a/tests/utils.py b/tests/utils.py index baeed5d60047..e7894819c025 100644 --- a/tests/utils.py +++ b/tests/utils.py @@ -74,6 +74,7 @@ def setup_test_homeserver(name="test", datastore=None, config=None, reactor=None config.media_storage_providers = [] config.auto_join_rooms = [] config.limit_usage_by_mau = False + config.max_mau_value = 50 config.mau_limits_reserved_threepids = [] # disable user directory updates, because they get done in the From d92675a4861eab5f5eec0e8a39a7505d5bda850d Mon Sep 17 00:00:00 2001 From: Neil Johnson Date: Wed, 8 Aug 2018 12:06:42 +0100 Subject: [PATCH 7/7] Ability to whitelist specific threepids against monthly active user limiting --- changelog.d/3662.feature | 1 + 1 file changed, 1 insertion(+) create mode 100644 changelog.d/3662.feature diff --git a/changelog.d/3662.feature b/changelog.d/3662.feature new file mode 100644 index 000000000000..daacef086d03 --- /dev/null +++ b/changelog.d/3662.feature @@ -0,0 +1 @@ +Ability to whitelist specific threepids against monthly active user limiting