diff --git a/ee/clickhouse/queries/paths/paths.py b/ee/clickhouse/queries/paths/paths.py index b58b317b44a02..4879b5ea2641f 100644 --- a/ee/clickhouse/queries/paths/paths.py +++ b/ee/clickhouse/queries/paths/paths.py @@ -1,4 +1,4 @@ -from typing import Dict, List, Optional, Tuple +from typing import Dict, List, Literal, Optional, Tuple from rest_framework.exceptions import ValidationError @@ -65,9 +65,19 @@ def get_path_query(self) -> str: path_event_query, params = PathEventQuery(filter=self._filter, team_id=self._team.pk).get_query() self.params.update(params) - boundary_event_filter, start_params = self.get_start_point_filter() + boundary_event_filter, start_params = ( + self.get_end_point_filter() if self._filter.end_point else self.get_start_point_filter() + ) + path_limiting_clause, time_limiting_clause = self.get_filtered_path_ordering() + compacting_function = self.get_array_compacting_function() self.params.update(start_params) - return PATH_ARRAY_QUERY.format(path_event_query=path_event_query, boundary_event_filter=boundary_event_filter) + return PATH_ARRAY_QUERY.format( + path_event_query=path_event_query, + boundary_event_filter=boundary_event_filter, + path_limiting_clause=path_limiting_clause, + time_limiting_clause=time_limiting_clause, + compacting_function=compacting_function, + ) def get_path_query_by_funnel(self, funnel_filter: Filter): path_query = self.get_path_query() @@ -87,6 +97,31 @@ def get_path_query_by_funnel(self, funnel_filter: Filter): def get_start_point_filter(self) -> Tuple[str, Dict]: if not self._filter.start_point: - return "", {"start_point": None} + return "", {"target_point": None} + + return "WHERE arrayElement(limited_path, 1) = %(target_point)s", {"target_point": self._filter.start_point} + + def get_end_point_filter(self) -> Tuple[str, Dict]: + if not self._filter.end_point: + return "", {"target_point": None} + + return "WHERE arrayElement(limited_path, -1) = %(target_point)s", {"target_point": self._filter.end_point} - return "WHERE arrayElement(limited_path, 1) = %(start_point)s", {"start_point": self._filter.start_point} + def get_array_compacting_function(self) -> Literal["arrayResize", "arraySlice"]: + if self._filter.end_point: + return "arrayResize" + else: + return "arraySlice" + + def get_filtered_path_ordering(self) -> Tuple[str, str]: + + if self._filter.end_point: + return ( + "arraySlice(filtered_path, (-1) * %(event_in_session_limit)s)", + "arraySlice(filtered_timings, (-1) * %(event_in_session_limit)s)", + ) + else: + return ( + "arraySlice(filtered_path, 1, %(event_in_session_limit)s)", + "arraySlice(filtered_timings, 1, %(event_in_session_limit)s)", + ) diff --git a/ee/clickhouse/queries/test/test_paths.py b/ee/clickhouse/queries/test/test_paths.py index 09bf5b8e58f89..c87281ecb9e87 100644 --- a/ee/clickhouse/queries/test/test_paths.py +++ b/ee/clickhouse/queries/test/test_paths.py @@ -11,6 +11,7 @@ from posthog.models.filters import Filter, PathFilter from posthog.models.person import Person from posthog.queries.test.test_paths import paths_test_factory +from posthog.test.base import test_with_materialized_columns def _create_event(**kwargs): @@ -260,6 +261,127 @@ def test_path_by_funnel(self): ], ) + @test_with_materialized_columns(["$current_url"]) + def test_paths_end(self): + Person.objects.create(team_id=self.team.pk, distinct_ids=["person_1"]) + _create_event( + properties={"$current_url": "/1"}, + distinct_id="person_1", + event="$pageview", + team=self.team, + timestamp="2021-05-01 00:01:00", + ) + _create_event( + properties={"$current_url": "/2"}, + distinct_id="person_1", + event="$pageview", + team=self.team, + timestamp="2021-05-01 00:02:00", + ) + _create_event( + properties={"$current_url": "/3"}, + distinct_id="person_1", + event="$pageview", + team=self.team, + timestamp="2021-05-01 00:03:00", + ) + _create_event( + properties={"$current_url": "/4"}, + distinct_id="person_1", + event="$pageview", + team=self.team, + timestamp="2021-05-01 00:04:00", + ) + _create_event( + properties={"$current_url": "/5"}, + distinct_id="person_1", + event="$pageview", + team=self.team, + timestamp="2021-05-01 00:05:00", + ) + _create_event( + properties={"$current_url": "/about"}, + distinct_id="person_1", + event="$pageview", + team=self.team, + timestamp="2021-05-01 00:06:00", + ) + _create_event( + properties={"$current_url": "/after"}, + distinct_id="person_1", + event="$pageview", + team=self.team, + timestamp="2021-05-01 00:07:00", + ) + + Person.objects.create(team_id=self.team.pk, distinct_ids=["person_2"]) + _create_event( + properties={"$current_url": "/5"}, + distinct_id="person_2", + event="$pageview", + team=self.team, + timestamp="2021-05-01 00:01:00", + ) + _create_event( + properties={"$current_url": "/about"}, + distinct_id="person_2", + event="$pageview", + team=self.team, + timestamp="2021-05-01 00:02:00", + ) + + Person.objects.create(team_id=self.team.pk, distinct_ids=["person_3"]) + _create_event( + properties={"$current_url": "/3"}, + distinct_id="person_3", + event="$pageview", + team=self.team, + timestamp="2021-05-01 00:01:00", + ) + _create_event( + properties={"$current_url": "/4"}, + distinct_id="person_3", + event="$pageview", + team=self.team, + timestamp="2021-05-01 00:02:00", + ) + _create_event( + properties={"$current_url": "/about"}, + distinct_id="person_3", + event="$pageview", + team=self.team, + timestamp="2021-05-01 00:03:00", + ) + _create_event( + properties={"$current_url": "/after"}, + distinct_id="person_3", + event="$pageview", + team=self.team, + timestamp="2021-05-01 00:04:00", + ) + + filter = PathFilter( + data={ + "path_type": "$pageview", + "end_point": "/about", + "date_from": "2021-05-01 00:00:00", + "date_to": "2021-05-07 00:00:00", + } + ) + response = ClickhousePathsNew(team=self.team, filter=filter).run(team=self.team, filter=filter,) + self.assertEqual( + response, + [ + {"source": "1_/2", "target": "2_/3", "value": 1, "average_conversion_time": 60000.0}, + {"source": "1_/3", "target": "2_/4", "value": 1, "average_conversion_time": 60000.0}, + {"source": "1_/5", "target": "2_/about", "value": 1, "average_conversion_time": 60000.0}, + {"source": "2_/3", "target": "3_/4", "value": 1, "average_conversion_time": 60000.0}, + {"source": "2_/4", "target": "3_/about", "value": 1, "average_conversion_time": 60000.0}, + {"source": "3_/4", "target": "4_/5", "value": 1, "average_conversion_time": 60000.0}, + {"source": "4_/5", "target": "5_/about", "value": 1, "average_conversion_time": 60000.0}, + ], + ) + def test_event_inclusion_exclusion_filters(self): # P1 for pageview event diff --git a/ee/clickhouse/sql/paths/path.py b/ee/clickhouse/sql/paths/path.py index 98f64010cf6d4..51df27605a348 100644 --- a/ee/clickhouse/sql/paths/path.py +++ b/ee/clickhouse/sql/paths/path.py @@ -168,11 +168,11 @@ , arrayMap((x,y) -> if(x=y, 0, 1), path_basic, path_basic_0) as mapping , arrayFilter((x,y) -> y, time, mapping) as timings , arrayFilter((x,y)->y, path_basic, mapping) as compact_path - , indexOf(compact_path, %(start_point)s) as start_index - , if(start_index > 0, arraySlice(compact_path, start_index), compact_path) as filtered_path - , if(start_index > 0, arraySlice(timings, start_index), timings) as filtered_timings - , arraySlice(filtered_path, 1, %(event_in_session_limit)s) as limited_path - , arraySlice(filtered_timings, 1, %(event_in_session_limit)s) as limited_timings + , indexOf(compact_path, %(target_point)s) as target_index + , if(target_index > 0, {compacting_function}(compact_path, target_index), compact_path) as filtered_path + , if(target_index > 0, {compacting_function}(timings, target_index), timings) as filtered_timings + , {path_limiting_clause} as limited_path + , {time_limiting_clause} as limited_timings , arrayZip(limited_path, limited_timings) as limited_path_timings FROM ( SELECT person_id diff --git a/posthog/constants.py b/posthog/constants.py index 203629263728b..182525344c29b 100644 --- a/posthog/constants.py +++ b/posthog/constants.py @@ -78,6 +78,7 @@ TOTAL_INTERVALS = "total_intervals" SELECTED_INTERVAL = "selected_interval" START_POINT = "start_point" +END_POINT = "end_point" STEP_LIMIT = "step_limit" TARGET_ENTITY = "target_entity" RETURNING_ENTITY = "returning_entity" diff --git a/posthog/models/filters/mixins/paths.py b/posthog/models/filters/mixins/paths.py index 4a0eb1fe2e164..44b6baf37c797 100644 --- a/posthog/models/filters/mixins/paths.py +++ b/posthog/models/filters/mixins/paths.py @@ -3,6 +3,7 @@ from posthog.constants import ( AUTOCAPTURE_EVENT, CUSTOM_EVENT, + END_POINT, FUNNEL_PATHS, PAGEVIEW_EVENT, PATH_TYPE, @@ -39,6 +40,16 @@ def start_point_to_dict(self): return {"start_point": self.start_point} if self.start_point else {} +class EndPointMixin(BaseParamMixin): + @cached_property + def end_point(self) -> Optional[str]: + return self._data.get(END_POINT, None) + + @include_dict + def end_point_to_dict(self): + return {"end_point": self.end_point} if self.end_point else {} + + class PropTypeDerivedMixin(PathTypeMixin): @cached_property def prop_type(self) -> str: diff --git a/posthog/models/filters/path_filter.py b/posthog/models/filters/path_filter.py index 781fdbd39aeae..e18c33a8600c3 100644 --- a/posthog/models/filters/path_filter.py +++ b/posthog/models/filters/path_filter.py @@ -15,6 +15,7 @@ ) from posthog.models.filters.mixins.paths import ( ComparatorDerivedMixin, + EndPointMixin, FunnelPathsMixin, PathStepLimitMixin, PropTypeDerivedMixin, @@ -27,6 +28,7 @@ class PathFilter( StartPointMixin, + EndPointMixin, TargetEventDerivedMixin, ComparatorDerivedMixin, PropTypeDerivedMixin,