From 9187e0762f0b4f028d15fac4502e458f513d6642 Mon Sep 17 00:00:00 2001 From: Neil Johnson Date: Wed, 28 Mar 2018 10:02:32 +0100 Subject: [PATCH 01/12] count_daily_users failed if db was sqlite due to type failure - presumably this prevcented all sqlite homeservers reporting home --- synapse/storage/__init__.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/synapse/storage/__init__.py b/synapse/storage/__init__.py index de00cae44750..b97e5e5ff4a6 100644 --- a/synapse/storage/__init__.py +++ b/synapse/storage/__init__.py @@ -260,7 +260,7 @@ def _count_users(txn): ) u """ - txn.execute(sql, (yesterday,)) + txn.execute(sql, (str(yesterday),)) count, = txn.fetchone() return count From a32d2548d986f7075e8310184ce0b70c69513a02 Mon Sep 17 00:00:00 2001 From: Neil Johnson Date: Wed, 28 Mar 2018 10:39:13 +0100 Subject: [PATCH 02/12] query and call for r30 stats --- synapse/app/homeserver.py | 2 ++ synapse/storage/__init__.py | 36 ++++++++++++++++++++++++++++++++++++ 2 files changed, 38 insertions(+) diff --git a/synapse/app/homeserver.py b/synapse/app/homeserver.py index c00afbba28c5..8bce9f1ace40 100755 --- a/synapse/app/homeserver.py +++ b/synapse/app/homeserver.py @@ -425,6 +425,8 @@ def phone_stats_home(): stats["daily_active_rooms"] = yield hs.get_datastore().count_daily_active_rooms() stats["daily_messages"] = yield hs.get_datastore().count_daily_messages() + stats["r30_users"] = yield hs.get_datastore().count_r30_users() + daily_sent_messages = yield hs.get_datastore().count_daily_sent_messages() stats["daily_sent_messages"] = daily_sent_messages diff --git a/synapse/storage/__init__.py b/synapse/storage/__init__.py index b97e5e5ff4a6..10f99c3cd5dc 100644 --- a/synapse/storage/__init__.py +++ b/synapse/storage/__init__.py @@ -267,6 +267,42 @@ def _count_users(txn): ret = yield self.runInteraction("count_users", _count_users) defer.returnValue(ret) + @defer.inlineCallbacks + def count_r30_users(self): + """ + Counts the number of 30 day retained users, defined as:- + * Users who have created their accounts more than 30 days + * Where last seen at most 30 days ago + * Where account creation and last_seen are > 30 days + """ + def _count_r30_users(txn): + thirty_days_in_secs = 86400 * 30 + now = int(self._clock.time_msec()) + thirty_days_ago_in_secs = now - thirty_days_in_secs + + sql = """ + SELECT COALESCE(count(*), 0) FROM ( + SELECT users.name, users.creation_ts * 1000, MAX(user_ips.last_seen) + FROM users, user_ips + WHERE users.name = user_ips.user_id + AND appservice_id is NULL + AND users.creation_ts < ? + AND user_ips.last_seen/1000 > ? + AND (user_ips.last_seen/1000) - users.creation_ts > ? + GROUP BY users.name, users.creation_ts + ) u + """ + + txn.execute(sql, (thirty_days_ago_in_secs, + thirty_days_ago_in_secs, + thirty_days_in_secs)) + + count, = txn.fetchone() + return count + + ret = yield self.runInteraction("count_r30_users", _count_r30_users) + defer.returnValue(ret) + def get_users(self): """Function to reterive a list of users in users table. From 0f890f477eb2ed03b8fd48710d1960210f44a334 Mon Sep 17 00:00:00 2001 From: Neil Johnson Date: Wed, 28 Mar 2018 11:49:57 +0100 Subject: [PATCH 03/12] No need to cast in count_daily_users --- synapse/storage/__init__.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/synapse/storage/__init__.py b/synapse/storage/__init__.py index 10f99c3cd5dc..ba43b2d8ecfd 100644 --- a/synapse/storage/__init__.py +++ b/synapse/storage/__init__.py @@ -250,7 +250,7 @@ def count_daily_users(self): Counts the number of users who used this homeserver in the last 24 hours. """ def _count_users(txn): - yesterday = int(self._clock.time_msec()) - (1000 * 60 * 60 * 24), + yesterday = int(self._clock.time_msec()) - (1000 * 60 * 60 * 24) sql = """ SELECT COALESCE(count(*), 0) FROM ( @@ -260,7 +260,7 @@ def _count_users(txn): ) u """ - txn.execute(sql, (str(yesterday),)) + txn.execute(sql, (yesterday,)) count, = txn.fetchone() return count From 788e69098c93f2433ef907015666c624bb39318f Mon Sep 17 00:00:00 2001 From: Neil Johnson Date: Wed, 28 Mar 2018 12:03:13 +0100 Subject: [PATCH 04/12] Add user_ips last seen index --- synapse/storage/client_ips.py | 7 +++++++ .../delta/48/add_user_ips_last_seen_index.sql | 17 +++++++++++++++++ 2 files changed, 24 insertions(+) create mode 100644 synapse/storage/schema/delta/48/add_user_ips_last_seen_index.sql diff --git a/synapse/storage/client_ips.py b/synapse/storage/client_ips.py index a03d1d6104b3..7b44dae0fcaa 100644 --- a/synapse/storage/client_ips.py +++ b/synapse/storage/client_ips.py @@ -48,6 +48,13 @@ def __init__(self, db_conn, hs): columns=["user_id", "device_id", "last_seen"], ) + self.register_background_index_update( + "user_ips_last_seen_index", + index_name="user_ips_last_seen", + table="user_ips", + columns=["user_id", "last_seen"], + ) + # (user_id, access_token, ip) -> (user_agent, device_id, last_seen) self._batch_row_update = {} diff --git a/synapse/storage/schema/delta/48/add_user_ips_last_seen_index.sql b/synapse/storage/schema/delta/48/add_user_ips_last_seen_index.sql new file mode 100644 index 000000000000..9248b0b24ad4 --- /dev/null +++ b/synapse/storage/schema/delta/48/add_user_ips_last_seen_index.sql @@ -0,0 +1,17 @@ +/* Copyright 2018 New Vector Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +INSERT into background_updates (update_name, progress_json) + VALUES ('user_ips_last_seen_index', '{}'); From 792d340572026becf48fe73421f0b73cf575fe46 Mon Sep 17 00:00:00 2001 From: Neil Johnson Date: Wed, 28 Mar 2018 12:25:02 +0100 Subject: [PATCH 05/12] rename stat to future proof --- synapse/app/homeserver.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/synapse/app/homeserver.py b/synapse/app/homeserver.py index 8bce9f1ace40..286f4dcf7b15 100755 --- a/synapse/app/homeserver.py +++ b/synapse/app/homeserver.py @@ -425,7 +425,7 @@ def phone_stats_home(): stats["daily_active_rooms"] = yield hs.get_datastore().count_daily_active_rooms() stats["daily_messages"] = yield hs.get_datastore().count_daily_messages() - stats["r30_users"] = yield hs.get_datastore().count_r30_users() + stats["r30_users_all"] = yield hs.get_datastore().count_r30_users() daily_sent_messages = yield hs.get_datastore().count_daily_sent_messages() stats["daily_sent_messages"] = daily_sent_messages From 86932be2cb1837688d154ff78fb6418f78483133 Mon Sep 17 00:00:00 2001 From: Neil Johnson Date: Wed, 28 Mar 2018 14:36:53 +0100 Subject: [PATCH 06/12] Support multi client R30 for psql --- synapse/app/homeserver.py | 4 +++- synapse/storage/__init__.py | 36 ++++++++++++++++++++++++++++-------- 2 files changed, 31 insertions(+), 9 deletions(-) diff --git a/synapse/app/homeserver.py b/synapse/app/homeserver.py index 286f4dcf7b15..35e2b00f1b87 100755 --- a/synapse/app/homeserver.py +++ b/synapse/app/homeserver.py @@ -425,7 +425,9 @@ def phone_stats_home(): stats["daily_active_rooms"] = yield hs.get_datastore().count_daily_active_rooms() stats["daily_messages"] = yield hs.get_datastore().count_daily_messages() - stats["r30_users_all"] = yield hs.get_datastore().count_r30_users() + r30_results = yield hs.get_datastore().count_r30_users() + for name, count in r30_results.items(): + stats["r30_users_" + name] = count daily_sent_messages = yield hs.get_datastore().count_daily_sent_messages() stats["daily_sent_messages"] = daily_sent_messages diff --git a/synapse/storage/__init__.py b/synapse/storage/__init__.py index ba43b2d8ecfd..b651973c7964 100644 --- a/synapse/storage/__init__.py +++ b/synapse/storage/__init__.py @@ -280,6 +280,15 @@ def _count_r30_users(txn): now = int(self._clock.time_msec()) thirty_days_ago_in_secs = now - thirty_days_in_secs + # Are these filters sufficiently robust? + filters = { + "ALL": "", + "IOS": "^(Vector|Riot|Riot\.im)\/.* iOS", + "ANDROID": "^(Dalvik|Riot|Riot\.im)\/.* Android", + "ELECTRON": "Electron", + "WEB": "(Gecko|Mozilla)", + } + sql = """ SELECT COALESCE(count(*), 0) FROM ( SELECT users.name, users.creation_ts * 1000, MAX(user_ips.last_seen) @@ -289,16 +298,27 @@ def _count_r30_users(txn): AND users.creation_ts < ? AND user_ips.last_seen/1000 > ? AND (user_ips.last_seen/1000) - users.creation_ts > ? - GROUP BY users.name, users.creation_ts - ) u """ - txn.execute(sql, (thirty_days_ago_in_secs, - thirty_days_ago_in_secs, - thirty_days_in_secs)) - - count, = txn.fetchone() - return count + if isinstance(self.database_engine, PostgresEngine): + sql = sql + "AND user_ips.user_agent ~ ? " + sql = sql + "GROUP BY users.name, users.creation_ts ) u" + + results = {} + if isinstance(self.database_engine, PostgresEngine): + for filter_name, user_agent_filter in filters.items(): + txn.execute(sql, (thirty_days_ago_in_secs, + thirty_days_ago_in_secs, + thirty_days_in_secs, + user_agent_filter)) + results[filter_name], = txn.fetchone() + + else: + txn.execute(sql, (thirty_days_ago_in_secs, + thirty_days_ago_in_secs, + thirty_days_in_secs)) + results["ALL"], = txn.fetchone() + return results ret = yield self.runInteraction("count_r30_users", _count_r30_users) defer.returnValue(ret) From 4262aba17b643bc82c5cce92298dac0a27b2727c Mon Sep 17 00:00:00 2001 From: Neil Johnson Date: Wed, 28 Mar 2018 14:40:03 +0100 Subject: [PATCH 07/12] bump schema version --- synapse/storage/prepare_database.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/synapse/storage/prepare_database.py b/synapse/storage/prepare_database.py index c845a0cec510..68675e15d2ac 100644 --- a/synapse/storage/prepare_database.py +++ b/synapse/storage/prepare_database.py @@ -25,7 +25,7 @@ # Remember to update this number every time a change is made to database # schema files, so the users will be informed on server restarts. -SCHEMA_VERSION = 47 +SCHEMA_VERSION = 48 dir_path = os.path.abspath(os.path.dirname(__file__)) From 241e4e86873d5880f564791e3768247fa55c3fa8 Mon Sep 17 00:00:00 2001 From: Neil Johnson Date: Wed, 28 Mar 2018 16:25:53 +0100 Subject: [PATCH 08/12] remove twisted deferral cruft --- synapse/storage/__init__.py | 9 +++------ 1 file changed, 3 insertions(+), 6 deletions(-) diff --git a/synapse/storage/__init__.py b/synapse/storage/__init__.py index b651973c7964..b2b85e266da2 100644 --- a/synapse/storage/__init__.py +++ b/synapse/storage/__init__.py @@ -244,7 +244,6 @@ def _get_active_presence(self, db_conn): return [UserPresenceState(**row) for row in rows] - @defer.inlineCallbacks def count_daily_users(self): """ Counts the number of users who used this homeserver in the last 24 hours. @@ -264,10 +263,9 @@ def _count_users(txn): count, = txn.fetchone() return count - ret = yield self.runInteraction("count_users", _count_users) - defer.returnValue(ret) + return self.runInteraction("count_users", _count_users) + - @defer.inlineCallbacks def count_r30_users(self): """ Counts the number of 30 day retained users, defined as:- @@ -320,8 +318,7 @@ def _count_r30_users(txn): results["ALL"], = txn.fetchone() return results - ret = yield self.runInteraction("count_r30_users", _count_r30_users) - defer.returnValue(ret) + return self.runInteraction("count_r30_users", _count_r30_users) def get_users(self): """Function to reterive a list of users in users table. From dc7c020b33dc9606089fa66fdec2dacb7f807f6d Mon Sep 17 00:00:00 2001 From: Neil Johnson Date: Wed, 28 Mar 2018 17:25:15 +0100 Subject: [PATCH 09/12] fix pep8 errors --- synapse/storage/__init__.py | 3 --- 1 file changed, 3 deletions(-) diff --git a/synapse/storage/__init__.py b/synapse/storage/__init__.py index b2b85e266da2..70c61714049a 100644 --- a/synapse/storage/__init__.py +++ b/synapse/storage/__init__.py @@ -14,8 +14,6 @@ # See the License for the specific language governing permissions and # limitations under the License. -from twisted.internet import defer - from synapse.storage.devices import DeviceStore from .appservice import ( ApplicationServiceStore, ApplicationServiceTransactionStore @@ -265,7 +263,6 @@ def _count_users(txn): return self.runInteraction("count_users", _count_users) - def count_r30_users(self): """ Counts the number of 30 day retained users, defined as:- From 9ee44a372d4fcf6a461b610230a285610613e8ac Mon Sep 17 00:00:00 2001 From: Neil Johnson Date: Thu, 29 Mar 2018 16:45:34 +0100 Subject: [PATCH 10/12] Remove need for sqlite specific query --- synapse/storage/__init__.py | 87 ++++++++++++++++++++++++------------- 1 file changed, 57 insertions(+), 30 deletions(-) diff --git a/synapse/storage/__init__.py b/synapse/storage/__init__.py index 70c61714049a..0b4693041f26 100644 --- a/synapse/storage/__init__.py +++ b/synapse/storage/__init__.py @@ -269,50 +269,77 @@ def count_r30_users(self): * Users who have created their accounts more than 30 days * Where last seen at most 30 days ago * Where account creation and last_seen are > 30 days + + Returns counts globaly for a given user as well as breaking + by platform """ def _count_r30_users(txn): thirty_days_in_secs = 86400 * 30 now = int(self._clock.time_msec()) thirty_days_ago_in_secs = now - thirty_days_in_secs - # Are these filters sufficiently robust? - filters = { - "ALL": "", - "IOS": "^(Vector|Riot|Riot\.im)\/.* iOS", - "ANDROID": "^(Dalvik|Riot|Riot\.im)\/.* Android", - "ELECTRON": "Electron", - "WEB": "(Gecko|Mozilla)", - } + sql = """ + SELECT platform, COALESCE(count(*), 0) FROM ( + SELECT users.name, platform, users.creation_ts * 1000, MAX(uip.last_seen) + FROM users + INNER JOIN ( + SELECT + user_id, + last_seen, + CASE + WHEN user_agent LIKE '%Android%' THEN 'android' + WHEN user_agent LIKE '%iOS%' THEN 'ios' + WHEN user_agent LIKE '%Electron%' THEN 'electron' + WHEN user_agent LIKE '%Mozilla%' THEN 'web' + WHEN user_agent LIKE '%Gecko%' THEN 'web' + ELSE 'unknown' + END + AS platform + FROM user_ips + ) uip + ON users.name = uip.user_id + AND users.appservice_id is NULL + AND users.creation_ts < ? + AND uip.last_seen/1000 > ? + AND (uip.last_seen/1000) - users.creation_ts > 86400 * 30 + GROUP BY users.name, platform, users.creation_ts + ) u GROUP BY platform + """ + + results = {} + txn.execute(sql, (thirty_days_ago_in_secs, + thirty_days_ago_in_secs)) + rows = txn.fetchall() + for row in rows: + if row[0] is 'unknown': + pass + results[row[0]] = row[1] sql = """ SELECT COALESCE(count(*), 0) FROM ( - SELECT users.name, users.creation_ts * 1000, MAX(user_ips.last_seen) - FROM users, user_ips - WHERE users.name = user_ips.user_id + SELECT users.name, users.creation_ts * 1000, MAX(uip.last_seen) + FROM users + INNER JOIN ( + SELECT + user_id, + last_seen + FROM user_ips + ) uip + ON users.name = uip.user_id AND appservice_id is NULL AND users.creation_ts < ? - AND user_ips.last_seen/1000 > ? - AND (user_ips.last_seen/1000) - users.creation_ts > ? + AND uip.last_seen/1000 > ? + AND (uip.last_seen/1000) - users.creation_ts > 86400 * 30 + GROUP BY users.name, users.creation_ts + ) u """ - if isinstance(self.database_engine, PostgresEngine): - sql = sql + "AND user_ips.user_agent ~ ? " - sql = sql + "GROUP BY users.name, users.creation_ts ) u" + txn.execute(sql, (thirty_days_ago_in_secs, + thirty_days_ago_in_secs)) + + count, = txn.fetchone() + results['all'] = count - results = {} - if isinstance(self.database_engine, PostgresEngine): - for filter_name, user_agent_filter in filters.items(): - txn.execute(sql, (thirty_days_ago_in_secs, - thirty_days_ago_in_secs, - thirty_days_in_secs, - user_agent_filter)) - results[filter_name], = txn.fetchone() - - else: - txn.execute(sql, (thirty_days_ago_in_secs, - thirty_days_ago_in_secs, - thirty_days_in_secs)) - results["ALL"], = txn.fetchone() return results return self.runInteraction("count_r30_users", _count_r30_users) From b4e37c6f50b91dd0ea90c773185884659e3a738a Mon Sep 17 00:00:00 2001 From: Neil Johnson Date: Thu, 29 Mar 2018 17:27:39 +0100 Subject: [PATCH 11/12] pep8 --- synapse/storage/__init__.py | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/synapse/storage/__init__.py b/synapse/storage/__init__.py index 0b4693041f26..f68e436df068 100644 --- a/synapse/storage/__init__.py +++ b/synapse/storage/__init__.py @@ -280,7 +280,8 @@ def _count_r30_users(txn): sql = """ SELECT platform, COALESCE(count(*), 0) FROM ( - SELECT users.name, platform, users.creation_ts * 1000, MAX(uip.last_seen) + SELECT users.name, platform, users.creation_ts * 1000, + MAX(uip.last_seen) FROM users INNER JOIN ( SELECT @@ -317,7 +318,8 @@ def _count_r30_users(txn): sql = """ SELECT COALESCE(count(*), 0) FROM ( - SELECT users.name, users.creation_ts * 1000, MAX(uip.last_seen) + SELECT users.name, users.creation_ts * 1000, + MAX(uip.last_seen) FROM users INNER JOIN ( SELECT From 0e5f479fc05ef9257c1bfce033c8fb91e6244ffe Mon Sep 17 00:00:00 2001 From: Neil Johnson Date: Thu, 5 Apr 2018 12:16:46 +0100 Subject: [PATCH 12/12] Review comments Use iteritems over item to loop over dict formatting --- synapse/app/homeserver.py | 2 +- synapse/storage/__init__.py | 9 +++++---- 2 files changed, 6 insertions(+), 5 deletions(-) diff --git a/synapse/app/homeserver.py b/synapse/app/homeserver.py index 35e2b00f1b87..777e9c529a9d 100755 --- a/synapse/app/homeserver.py +++ b/synapse/app/homeserver.py @@ -426,7 +426,7 @@ def phone_stats_home(): stats["daily_messages"] = yield hs.get_datastore().count_daily_messages() r30_results = yield hs.get_datastore().count_r30_users() - for name, count in r30_results.items(): + for name, count in r30_results.iteritems(): stats["r30_users_" + name] = count daily_sent_messages = yield hs.get_datastore().count_daily_sent_messages() diff --git a/synapse/storage/__init__.py b/synapse/storage/__init__.py index f68e436df068..4800584b590e 100644 --- a/synapse/storage/__init__.py +++ b/synapse/storage/__init__.py @@ -280,8 +280,9 @@ def _count_r30_users(txn): sql = """ SELECT platform, COALESCE(count(*), 0) FROM ( - SELECT users.name, platform, users.creation_ts * 1000, - MAX(uip.last_seen) + SELECT + users.name, platform, users.creation_ts * 1000, + MAX(uip.last_seen) FROM users INNER JOIN ( SELECT @@ -310,8 +311,8 @@ def _count_r30_users(txn): results = {} txn.execute(sql, (thirty_days_ago_in_secs, thirty_days_ago_in_secs)) - rows = txn.fetchall() - for row in rows: + + for row in txn: if row[0] is 'unknown': pass results[row[0]] = row[1]