From e674eba0f60fb66afc63631fe152fe23c114ae37 Mon Sep 17 00:00:00 2001 From: Eric Date: Tue, 12 Jan 2021 16:31:42 -0500 Subject: [PATCH 1/5] add scaling function --- ee/clickhouse/queries/sessions/average.py | 5 ++++- posthog/api/insight.py | 1 - posthog/queries/sessions.py | 17 ++++++++++++++++- 3 files changed, 20 insertions(+), 3 deletions(-) diff --git a/ee/clickhouse/queries/sessions/average.py b/ee/clickhouse/queries/sessions/average.py index 06c042f50c9c6..81a37ac759c10 100644 --- a/ee/clickhouse/queries/sessions/average.py +++ b/ee/clickhouse/queries/sessions/average.py @@ -11,6 +11,7 @@ from ee.clickhouse.sql.sessions.average_per_period import AVERAGE_PER_PERIOD_SQL from ee.clickhouse.sql.sessions.no_events import SESSIONS_NO_EVENTS_SQL from posthog.models import Filter, Team +from posthog.queries.sessions import scale_time_series from posthog.utils import append_data, friendly_time @@ -44,6 +45,8 @@ def calculate_avg(self, filter: Filter, team: Team): response = sync_execute(final_query, params) values = self.clean_values(filter, response) time_series_data = append_data(values, interval=filter.interval, math=None) + scaled_data, _ = scale_time_series(time_series_data["data"]) + time_series_data.update({"data": scaled_data}) # calculate average total = sum(val[1] for val in values) @@ -71,5 +74,5 @@ def _format_avg(self, avg: float): time_series_data.update( {"label": "Average Duration of Session ({})".format(avg_split[1]), "count": int(avg_split[0]),} ) - time_series_data.update({"chartLabel": "Average Duration of Session (seconds)"}) + time_series_data.update({"chartLabel": "Average Duration of Session ({})".format(avg_split[1])}) return time_series_data diff --git a/posthog/api/insight.py b/posthog/api/insight.py index 10f00181cc98f..ef566e73cfab0 100644 --- a/posthog/api/insight.py +++ b/posthog/api/insight.py @@ -176,7 +176,6 @@ def session(self, request: request.Request, *args: Any, **kwargs: Any) -> Respon return Response(result) - @cached_function() def calculate_session(self, request: request.Request) -> List[Dict[str, Any]]: return sessions.Sessions().run(filter=SessionsFilter(request=request), team=self.team) diff --git a/posthog/queries/sessions.py b/posthog/queries/sessions.py index dcbce7cf9a966..e76e42bc2ffc9 100644 --- a/posthog/queries/sessions.py +++ b/posthog/queries/sessions.py @@ -154,6 +154,8 @@ def _determineInterval(interval): values = [(key, round(value[0])) if len(value) > 0 else (key, 0) for key, value in df_dates.iterrows()] time_series_data = append_data(values, interval=filter.interval, math=None) + scaled_data, label = scale_time_series(time_series_data["data"]) + time_series_data.update({"data": scaled_data}) # calculate average totals = [sum(x) for x in list(zip(*time_series_avg))[2:4]] overall_average = (totals[0] / totals[1]) if totals else 0 @@ -167,7 +169,7 @@ def _determineInterval(interval): "aggregated_value": int(avg_split[0]), } ) - time_series_data.update({"chartLabel": "Average Duration of Session (seconds)"}) + time_series_data.update({"chartLabel": "Average Duration of Session ({})".format(label)}) result = [time_series_data] return result @@ -195,3 +197,16 @@ def _session_dist(self, base_query: Query, params: QueryParams) -> List[Dict[str for index in range(len(DIST_LABELS)) ] return result + + +def scale_time_series(data: List[float]) -> Tuple[List, str]: + avg = sum(data) / len(data) + minutes, _ = divmod(avg, 60.0) + hours, _ = divmod(minutes, 60.0) + + if hours > 0: + return [round(value / 3600, 2) for value in data], "hours" + elif minutes > 0: + return [round(value / 60, 2) for value in data], "minutes" + else: + return data, "seconds" From 1280956414e59e7181b2fdbca7111241b7ebc863 Mon Sep 17 00:00:00 2001 From: Eric Date: Tue, 12 Jan 2021 16:43:22 -0500 Subject: [PATCH 2/5] adjust test --- posthog/queries/sessions.py | 2 +- posthog/queries/test/test_sessions.py | 5 +++-- 2 files changed, 4 insertions(+), 3 deletions(-) diff --git a/posthog/queries/sessions.py b/posthog/queries/sessions.py index e76e42bc2ffc9..12c5432a4501c 100644 --- a/posthog/queries/sessions.py +++ b/posthog/queries/sessions.py @@ -200,7 +200,7 @@ def _session_dist(self, base_query: Query, params: QueryParams) -> List[Dict[str def scale_time_series(data: List[float]) -> Tuple[List, str]: - avg = sum(data) / len(data) + avg = sum(data) / len([value for value in data if value > 0]) minutes, _ = divmod(avg, 60.0) hours, _ = divmod(minutes, 60.0) diff --git a/posthog/queries/test/test_sessions.py b/posthog/queries/test/test_sessions.py index 2c4cd079855f6..edb527541401a 100644 --- a/posthog/queries/test/test_sessions.py +++ b/posthog/queries/test/test_sessions.py @@ -34,12 +34,13 @@ def test_sessions_avg_length(self): self.assertEqual(response[0]["count"], 3) # average length of all sessions # time series - self.assertEqual(response[0]["data"][0], 240) - self.assertEqual(response[0]["data"][1], 120) + self.assertEqual(response[0]["data"][0], 4.0) + self.assertEqual(response[0]["data"][1], 2.0) self.assertEqual(response[0]["labels"][0], "Sat. 14 January") self.assertEqual(response[0]["labels"][1], "Sun. 15 January") self.assertEqual(response[0]["days"][0], "2012-01-14") self.assertEqual(response[0]["days"][1], "2012-01-15") + self.assertEqual(response[0]["chartLabel"], "Average Duration of Session (minutes)") def test_sessions_avg_length_interval(self): with freeze_time("2012-01-14T03:21:34.000Z"): From 98e6a63ad38164f68c2fe8aa373a22df74d26712 Mon Sep 17 00:00:00 2001 From: Eric Date: Tue, 12 Jan 2021 17:25:26 -0500 Subject: [PATCH 3/5] update tests --- posthog/queries/test/test_sessions.py | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/posthog/queries/test/test_sessions.py b/posthog/queries/test/test_sessions.py index edb527541401a..3c392b8cf3b1d 100644 --- a/posthog/queries/test/test_sessions.py +++ b/posthog/queries/test/test_sessions.py @@ -73,8 +73,8 @@ def test_sessions_avg_length_interval(self): self.team, ) - self.assertEqual(month_response[0]["data"][0], 180) - self.assertEqual(month_response[0]["data"][2], 180) + self.assertEqual(month_response[0]["data"][0], 3.0) + self.assertEqual(month_response[0]["data"][2], 3.0) self.assertEqual(month_response[0]["labels"][0], "Tue. 31 January") self.assertEqual(month_response[0]["labels"][1], "Wed. 29 February") self.assertEqual(month_response[0]["days"][0], "2012-01-31") @@ -87,8 +87,8 @@ def test_sessions_avg_length_interval(self): ), self.team, ) - self.assertEqual(week_response[0]["data"][1], 240.0) - self.assertEqual(week_response[0]["data"][3], 120.0) + self.assertEqual(week_response[0]["data"][1], 4.0) + self.assertEqual(week_response[0]["data"][3], 2.0) self.assertEqual(week_response[0]["labels"][0], "Sun. 1 January") self.assertEqual(week_response[0]["labels"][1], "Sun. 8 January") self.assertEqual(week_response[0]["days"][0], "2012-01-01") @@ -101,8 +101,8 @@ def test_sessions_avg_length_interval(self): ), self.team, ) - self.assertEqual(hour_response[0]["data"][3], 240.0) - self.assertEqual(hour_response[0]["data"][27], 120.0) + self.assertEqual(hour_response[0]["data"][3], 4.0) + self.assertEqual(hour_response[0]["data"][27], 2.0) self.assertEqual(hour_response[0]["labels"][0], "Wed. 14 March, 00:00") self.assertEqual(hour_response[0]["labels"][1], "Wed. 14 March, 01:00") self.assertEqual(hour_response[0]["days"][0], "2012-03-14 00:00:00") @@ -142,8 +142,8 @@ def test_compare(self): # Run without anything to compare to compare_response = sessions().run(filter=filter, team=self.team) - self.assertEqual(compare_response[0]["data"][5], 120.0) - self.assertEqual(compare_response[1]["data"][4], 240.0) + self.assertEqual(compare_response[0]["data"][5], 2.0) + self.assertEqual(compare_response[1]["data"][4], 4.0) def test_sessions_count_buckets_default(self): with freeze_time("2012-01-11T01:25:30.000Z"): From bfb27e646fdc1cad755f91fdc45ec66ff88e509d Mon Sep 17 00:00:00 2001 From: Eric Date: Tue, 12 Jan 2021 18:28:12 -0500 Subject: [PATCH 4/5] patch errors --- ee/clickhouse/queries/sessions/average.py | 4 ++-- posthog/api/insight.py | 1 + posthog/queries/sessions.py | 14 +++++++++----- posthog/queries/test/test_sessions.py | 2 +- 4 files changed, 13 insertions(+), 8 deletions(-) diff --git a/ee/clickhouse/queries/sessions/average.py b/ee/clickhouse/queries/sessions/average.py index 81a37ac759c10..1f3c4cc665f09 100644 --- a/ee/clickhouse/queries/sessions/average.py +++ b/ee/clickhouse/queries/sessions/average.py @@ -72,7 +72,7 @@ def _format_avg(self, avg: float): avg_split = avg_formatted.split(" ") time_series_data = {} time_series_data.update( - {"label": "Average Duration of Session ({})".format(avg_split[1]), "count": int(avg_split[0]),} + {"label": "Average Session Length ({})".format(avg_split[1]), "count": int(avg_split[0]),} ) - time_series_data.update({"chartLabel": "Average Duration of Session ({})".format(avg_split[1])}) + time_series_data.update({"chartLabel": "Average Session Length ({})".format(avg_split[1])}) return time_series_data diff --git a/posthog/api/insight.py b/posthog/api/insight.py index ef566e73cfab0..10f00181cc98f 100644 --- a/posthog/api/insight.py +++ b/posthog/api/insight.py @@ -176,6 +176,7 @@ def session(self, request: request.Request, *args: Any, **kwargs: Any) -> Respon return Response(result) + @cached_function() def calculate_session(self, request: request.Request) -> List[Dict[str, Any]]: return sessions.Sessions().run(filter=SessionsFilter(request=request), team=self.team) diff --git a/posthog/queries/sessions.py b/posthog/queries/sessions.py index 12c5432a4501c..d490df47e2842 100644 --- a/posthog/queries/sessions.py +++ b/posthog/queries/sessions.py @@ -164,12 +164,12 @@ def _determineInterval(interval): time_series_data.update( { - "label": "Average Duration of Session ({})".format(avg_split[1]), + "label": "Average Session Length ({})".format(avg_split[1]), "count": int(avg_split[0]), "aggregated_value": int(avg_split[0]), } ) - time_series_data.update({"chartLabel": "Average Duration of Session ({})".format(label)}) + time_series_data.update({"chartLabel": "Average Session Length ({})".format(label)}) result = [time_series_data] return result @@ -200,9 +200,13 @@ def _session_dist(self, base_query: Query, params: QueryParams) -> List[Dict[str def scale_time_series(data: List[float]) -> Tuple[List, str]: - avg = sum(data) / len([value for value in data if value > 0]) - minutes, _ = divmod(avg, 60.0) - hours, _ = divmod(minutes, 60.0) + _len = len([value for value in data if value > 0]) + if _len == 0: + return data, "seconds" + + avg = sum(data) / _len + minutes = avg // 60.0 + hours = minutes // 60.0 if hours > 0: return [round(value / 3600, 2) for value in data], "hours" diff --git a/posthog/queries/test/test_sessions.py b/posthog/queries/test/test_sessions.py index 3c392b8cf3b1d..8bfc3f9d0fb60 100644 --- a/posthog/queries/test/test_sessions.py +++ b/posthog/queries/test/test_sessions.py @@ -40,7 +40,7 @@ def test_sessions_avg_length(self): self.assertEqual(response[0]["labels"][1], "Sun. 15 January") self.assertEqual(response[0]["days"][0], "2012-01-14") self.assertEqual(response[0]["days"][1], "2012-01-15") - self.assertEqual(response[0]["chartLabel"], "Average Duration of Session (minutes)") + self.assertEqual(response[0]["chartLabel"], "Average Session Length (minutes)") def test_sessions_avg_length_interval(self): with freeze_time("2012-01-14T03:21:34.000Z"): From b8d3c8e32a42bef2469b01ac80b5a8f6a23055a2 Mon Sep 17 00:00:00 2001 From: Eric Date: Thu, 21 Jan 2021 09:43:24 -0500 Subject: [PATCH 5/5] fix import --- ee/clickhouse/queries/sessions/average.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ee/clickhouse/queries/sessions/average.py b/ee/clickhouse/queries/sessions/average.py index 59895b6d2c3b1..60dd1791289e3 100644 --- a/ee/clickhouse/queries/sessions/average.py +++ b/ee/clickhouse/queries/sessions/average.py @@ -11,7 +11,7 @@ from ee.clickhouse.sql.sessions.average_per_period import AVERAGE_PER_PERIOD_SQL from ee.clickhouse.sql.sessions.no_events import SESSIONS_NO_EVENTS_SQL from posthog.models import Filter, Team -from posthog.queries.sessions import scale_time_series +from posthog.queries.sessions.sessions import scale_time_series from posthog.utils import append_data, friendly_time