-
-
Notifications
You must be signed in to change notification settings - Fork 2.1k
Cohort analytics #3163
Cohort analytics #3163
Changes from 9 commits
48c01ae
617bf40
fb6015d
5917562
d085770
dd1a832
977765b
f077e97
05ac15a
c92a8aa
31c2502
a2204cc
be11a02
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -49,3 +49,4 @@ env/ | |
*.config | ||
|
||
.vscode/ | ||
.ropeproject/ | ||
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -14,6 +14,11 @@ | |
# See the License for the specific language governing permissions and | ||
# limitations under the License. | ||
|
||
import datetime | ||
from dateutil import tz | ||
import time | ||
import logging | ||
|
||
from synapse.storage.devices import DeviceStore | ||
from .appservice import ( | ||
ApplicationServiceStore, ApplicationServiceTransactionStore | ||
|
@@ -55,10 +60,6 @@ | |
from synapse.api.constants import PresenceState | ||
from synapse.util.caches.stream_change_cache import StreamChangeCache | ||
|
||
|
||
import logging | ||
|
||
|
||
logger = logging.getLogger(__name__) | ||
|
||
|
||
|
@@ -213,6 +214,9 @@ def __init__(self, db_conn, hs): | |
self._stream_order_on_start = self.get_room_max_stream_ordering() | ||
self._min_stream_order_on_start = self.get_room_min_stream_ordering() | ||
|
||
# Used in _generate_user_daily_visits to keep track of progress | ||
self._last_user_visit_update = self._get_start_of_day() | ||
|
||
super(DataStore, self).__init__(db_conn, hs) | ||
|
||
def take_presence_startup_info(self): | ||
|
@@ -347,6 +351,62 @@ def _count_r30_users(txn): | |
|
||
return self.runInteraction("count_r30_users", _count_r30_users) | ||
|
||
def _get_start_of_day(self): | ||
""" | ||
Returns millisecond unixtime for start of UTC day. | ||
""" | ||
now = datetime.datetime.utcnow() | ||
today_start = datetime.datetime(now.year, now.month, | ||
now.day, tzinfo=tz.tzutc()) | ||
return int(time.mktime(today_start.timetuple())) * 1000 | ||
|
||
def generate_user_daily_visits(self): | ||
""" | ||
Generates daily visit data for use in cohort/ retention analysis | ||
""" | ||
def _generate_user_daily_visits(txn): | ||
logger.info("Calling _generate_user_daily_visits") | ||
today_start = self._get_start_of_day() | ||
a_day_in_milliseconds = 24 * 60 * 60 * 1000 | ||
|
||
sql = """ | ||
INSERT INTO user_daily_visits (user_id, device_id, timestamp) | ||
SELECT u.user_id, u.device_id, ? | ||
FROM user_ips AS u | ||
LEFT JOIN ( | ||
SELECT user_id, device_id, timestamp FROM user_daily_visits | ||
WHERE timestamp IS ? | ||
) udv | ||
ON u.user_id = udv.user_id AND u.device_id=udv.device_id | ||
WHERE last_seen > ? AND last_seen <= ? AND udv.timestamp IS NULL | ||
""" | ||
|
||
# This means that the day has rolled over but there could still | ||
# be entries from the previous day. There is an edge case | ||
# where if the user logs in at 23:59 and overwrites their | ||
# last_seen at 00:01 then they will not be counted in the | ||
# previous day's stats - it is important that the query is run | ||
# to minimise this case. | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Do you mean:
|
||
if today_start > self._last_user_visit_update: | ||
yesterday_start = today_start - a_day_in_milliseconds | ||
txn.execute(sql, (yesterday_start, yesterday_start, | ||
self._last_user_visit_update, today_start)) | ||
self._last_user_visit_update = today_start | ||
|
||
txn.execute(sql, (today_start, today_start, | ||
self._last_user_visit_update, | ||
today_start + a_day_in_milliseconds)) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. (The identing style we use tends to not use this continuation indent. See other comment for how it normally looks) |
||
# Update _last_user_visit_update to now. The reason to do this | ||
# rather just clamping to the beginning of the day is to limit | ||
# the size of the join - meaning that the query can be run more | ||
# frequently | ||
|
||
now = datetime.datetime.utcnow() | ||
self._last_user_visit_update = int(time.mktime(now.timetuple())) * 1000 | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. You can just use There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. In fact, You probably want to get this before the query, so you do: now = self.clock.time_msec()
txn.execute(sql, (
today_start, today_start,
self._last_user_visit_update,
now,
))
self._last_user_visit_update = now This ensures there isn't going to be any overlaps or missed updates. |
||
|
||
return self.runInteraction("generate_user_daily_visits", | ||
_generate_user_daily_visits) | ||
|
||
def get_users(self): | ||
"""Function to reterive a list of users in users table. | ||
|
||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,21 @@ | ||
/* Copyright 2018 New Vector Ltd | ||
* | ||
* Licensed under the Apache License, Version 2.0 (the "License"); | ||
* you may not use this file except in compliance with the License. | ||
* You may obtain a copy of the License at | ||
* | ||
* http://www.apache.org/licenses/LICENSE-2.0 | ||
* | ||
* Unless required by applicable law or agreed to in writing, software | ||
* distributed under the License is distributed on an "AS IS" BASIS, | ||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | ||
* See the License for the specific language governing permissions and | ||
* limitations under the License. | ||
*/ | ||
|
||
|
||
CREATE TABLE user_daily_visits ( user_id TEXT NOT NULL, | ||
device_id TEXT, | ||
timestamp BIGINT NOT NULL ); | ||
CREATE INDEX user_daily_visits_uts_idx ON user_daily_visits(user_id, timestamp); | ||
CREATE INDEX user_daily_visits_ts_idx ON user_daily_visits(timestamp); |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,17 @@ | ||
/* Copyright 2018 New Vector Ltd | ||
* | ||
* Licensed under the Apache License, Version 2.0 (the "License"); | ||
* you may not use this file except in compliance with the License. | ||
* You may obtain a copy of the License at | ||
* | ||
* http://www.apache.org/licenses/LICENSE-2.0 | ||
* | ||
* Unless required by applicable law or agreed to in writing, software | ||
* distributed under the License is distributed on an "AS IS" BASIS, | ||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | ||
* See the License for the specific language governing permissions and | ||
* limitations under the License. | ||
*/ | ||
|
||
INSERT into background_updates (update_name, progress_json) | ||
VALUES ('user_ips_last_seen_only_index', '{}'); |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
unrelated to this PR just want to ignore Atom python ide