-
Notifications
You must be signed in to change notification settings - Fork 1.4k
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Experimentation backend #7492
Experimentation backend #7492
Changes from 11 commits
4426fbe
f3c5523
1e194b2
56dc027
12ef608
15b6d8e
ae3b38c
da32a01
ac12971
4c6775c
e75faa9
cca1e30
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,126 @@ | ||
import dataclasses | ||
from datetime import datetime | ||
from typing import List, Optional, Tuple, Type | ||
|
||
from numpy.random import default_rng | ||
from rest_framework.exceptions import ValidationError | ||
|
||
from ee.clickhouse.queries.funnels import ClickhouseFunnel, funnel | ||
from posthog.models.filters.filter import Filter | ||
from posthog.models.team import Team | ||
|
||
|
||
@dataclasses.dataclass | ||
class Variant: | ||
name: str | ||
success_count: int | ||
failure_count: int | ||
|
||
|
||
SIMULATION_COUNT = 100_000 | ||
|
||
|
||
class ClickhouseFunnelExperimentResult: | ||
""" | ||
This class calculates Experiment Results. | ||
It returns two things: | ||
1. A Funnel Breakdown based on Feature Flag values | ||
2. Probability that Feature Flag value 1 has better conversion rate then FeatureFlag value 2 | ||
|
||
Currently, it only supports two feature flag values: control and test | ||
|
||
The passed in Filter determines which funnel to create, along with the experiment start & end date values | ||
|
||
Calculating (2) uses sampling from a Beta distribution. If `control` value for the feature flag has 10 successes and 12 conversion failures, | ||
we assume the conversion rate follows a Beta(10, 12) distribution. Same for `test` variant. | ||
|
||
Then, we calculcate how many times a sample from `test` variant is higher than a sample from the `control` variant. This becomes the | ||
probability. | ||
""" | ||
|
||
def __init__( | ||
self, | ||
filter: Filter, | ||
team: Team, | ||
feature_flag: str, | ||
experiment_start_date: datetime, | ||
experiment_end_date: Optional[datetime] = None, | ||
funnel_class: Type[ClickhouseFunnel] = ClickhouseFunnel, | ||
): | ||
|
||
breakdown_key = f"$feature/{feature_flag}" | ||
|
||
query_filter = filter.with_data( | ||
{ | ||
"date_from": experiment_start_date, | ||
"date_to": experiment_end_date, | ||
"breakdown": breakdown_key, | ||
"breakdown_type": "event", | ||
"properties": [ | ||
{"key": breakdown_key, "value": ["control", "test"], "operator": "exact", "type": "event"} | ||
], | ||
# :TRICKY: We don't use properties set on filters, instead using experiment variant options | ||
} | ||
) | ||
self.funnel = funnel_class(query_filter, team) | ||
|
||
def get_results(self): | ||
funnel_results = self.funnel.run() | ||
variants = self.get_variants(funnel_results) | ||
|
||
probability = self.calculate_results(variants) | ||
|
||
return {"funnel": funnel_results, "probability": probability} | ||
|
||
def get_variants(self, funnel_results): | ||
variants = [] | ||
for result in funnel_results: | ||
total = sum([step["count"] for step in result]) | ||
success = result[-1]["count"] | ||
failure = total - success | ||
breakdown_value = result[0]["breakdown_value"][0] | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Will this work for multi-breakdowns? Seems really really suspect. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Nope, won't work for multi-breakdowns, but since we're creating the funnel ourselves, we know that there isn't a multi-breakdown. Hmm, this is valuable feedback, I've been wrangling with this for a few days now, so much that I've forgotten the fresh-eyes look. Will add more documentation at the top of the class to explain what's going on. I didn't want to move this yet to use |
||
|
||
variants.append(Variant(breakdown_value, success, failure)) | ||
|
||
# Default variant names: control and test | ||
return sorted(variants, key=lambda variant: variant.name, reverse=True) | ||
|
||
@staticmethod | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This doesn't need to be in a class. Extract this and cover it with extensive tests instead which don't need any database setup. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I do test this without any DB setup, but ok with extracting it out of the class as well! |
||
def calculate_results( | ||
neilkakkar marked this conversation as resolved.
Show resolved
Hide resolved
|
||
variants: List[Variant], priors: Tuple[int, int] = (1, 1), simulations_count: int = SIMULATION_COUNT | ||
): | ||
""" | ||
# Calculates probability that A is better than B | ||
# Only supports 2 variants today | ||
|
||
For each variant, we create a Beta distribution of conversion rates, | ||
where alpha (successes) = success count of variant + prior success | ||
beta (failures) = failure count + variant + prior failures | ||
|
||
The prior is information about the world we already know. For example, a stronger prior for failures implies | ||
you'd need extra evidence of successes to confirm that the variant is indeed better. | ||
|
||
By default, we choose a non-informative prior. That is, both success & failure are equally likely. | ||
|
||
""" | ||
if len(variants) > 2: | ||
raise ValidationError("Can't calculate A/B test results for more than 2 variants") | ||
|
||
if len(variants) < 2: | ||
raise ValidationError("Can't calculate A/B test results for less than 2 variants") | ||
|
||
prior_success, prior_failure = priors | ||
|
||
random_sampler = default_rng() | ||
variant_samples = [] | ||
for variant in variants: | ||
# Get `N=simulations` samples from a Beta distribution with alpha = prior_success + variant_sucess, | ||
# and beta = prior_failure + variant_failure | ||
samples = random_sampler.beta( | ||
variant.success_count + prior_success, variant.failure_count + prior_failure, simulations_count | ||
) | ||
variant_samples.append(samples) | ||
|
||
probability = sum(sample_a > sample_b for (sample_a, sample_b) in zip(*variant_samples)) / simulations_count | ||
|
||
return probability |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,13 @@ | ||
import unittest | ||
|
||
from ee.clickhouse.queries.experiments.funnel_experiment_result import ClickhouseFunnelExperimentResult, Variant | ||
|
||
|
||
class TestFunnelExperimentCalculator(unittest.TestCase): | ||
def test_calculate_results(self): | ||
|
||
variant_a = Variant("A", 100, 10) | ||
variant_b = Variant("B", 100, 18) | ||
|
||
probability = ClickhouseFunnelExperimentResult.calculate_results([variant_a, variant_b]) | ||
self.assertTrue(probability > 0.9) |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,139 @@ | ||
from typing import Any | ||
|
||
from rest_framework import request, serializers, viewsets | ||
from rest_framework.decorators import action | ||
from rest_framework.exceptions import ValidationError | ||
from rest_framework.permissions import IsAuthenticated | ||
from rest_framework.request import Request | ||
from rest_framework.response import Response | ||
|
||
from ee.clickhouse.queries.experiments.funnel_experiment_result import ClickhouseFunnelExperimentResult | ||
from posthog.api.routing import StructuredViewSetMixin | ||
from posthog.models.experiment import Experiment | ||
from posthog.models.feature_flag import FeatureFlag | ||
from posthog.models.filters.filter import Filter | ||
from posthog.models.team import Team | ||
from posthog.permissions import ProjectMembershipNecessaryPermissions, TeamMemberAccessPermission | ||
|
||
|
||
class ExperimentSerializer(serializers.ModelSerializer): | ||
|
||
feature_flag_key = serializers.CharField(source="get_feature_flag_key") | ||
neilkakkar marked this conversation as resolved.
Show resolved
Hide resolved
|
||
|
||
class Meta: | ||
model = Experiment | ||
fields = [ | ||
"id", | ||
"name", | ||
"description", | ||
"start_date", | ||
"end_date", | ||
"feature_flag_key", | ||
"parameters", | ||
"filters", | ||
"created_by", | ||
"created_at", | ||
"updated_at", | ||
] | ||
read_only_fields = [ | ||
"id", | ||
"created_by", | ||
"created_at", | ||
"updated_at", | ||
] | ||
|
||
def validate_feature_flag_key(self, value): | ||
if FeatureFlag.objects.filter(key=value, team_id=self.context["team_id"], deleted=False).exists(): | ||
raise ValidationError("Feature Flag key already exists. Please select a unique key") | ||
neilkakkar marked this conversation as resolved.
Show resolved
Hide resolved
|
||
|
||
return value | ||
|
||
def create(self, validated_data: dict, *args: Any, **kwargs: Any) -> Experiment: | ||
request = self.context["request"] | ||
validated_data["created_by"] = request.user | ||
team = Team.objects.get(id=self.context["team_id"]) | ||
|
||
feature_flag_key = validated_data.pop("get_feature_flag_key") | ||
|
||
is_draft = "start_date" in validated_data | ||
|
||
properties = validated_data["filters"].get("properties", []) | ||
filters = { | ||
"groups": [{"properties": properties, "rollout_percentage": None}], | ||
"multivariate": { | ||
"variants": [ | ||
{"key": "control", "name": "Control Group", "rollout_percentage": 50}, | ||
{"key": "test", "name": "Test Variant", "rollout_percentage": 50}, | ||
] | ||
}, | ||
} | ||
|
||
feature_flag = FeatureFlag.objects.create( | ||
key=feature_flag_key, | ||
name=f'Feature Flag for Experiment {validated_data["name"]}', | ||
team=team, | ||
created_by=request.user, | ||
filters=filters, | ||
active=False if is_draft else True, | ||
) | ||
|
||
experiment = Experiment.objects.create(team=team, feature_flag=feature_flag, **validated_data) | ||
return experiment | ||
|
||
def update(self, instance: Experiment, validated_data: dict, *args: Any, **kwargs: Any) -> Experiment: | ||
|
||
expected_keys = set(["name", "description", "start_date", "end_date", "parameters"]) | ||
given_keys = set(validated_data.keys()) | ||
|
||
extra_keys = given_keys - expected_keys | ||
|
||
if extra_keys: | ||
raise ValidationError(f"Can't update keys: {', '.join(sorted(extra_keys))} on Experiment") | ||
|
||
has_start_date = "start_date" in validated_data | ||
|
||
feature_flag = instance.feature_flag | ||
|
||
if instance.is_draft and has_start_date: | ||
feature_flag.active = True | ||
feature_flag.save() | ||
return super().update(instance, validated_data) | ||
|
||
elif has_start_date: | ||
raise ValidationError("Can't change experiment start date after experiment has begun") | ||
else: | ||
# Not a draft, doesn't have start date | ||
# Or draft without start date | ||
return super().update(instance, validated_data) | ||
|
||
|
||
class ClickhouseExperimentsViewSet(StructuredViewSetMixin, viewsets.ModelViewSet): | ||
serializer_class = ExperimentSerializer | ||
queryset = Experiment.objects.all() | ||
permission_classes = [IsAuthenticated, ProjectMembershipNecessaryPermissions, TeamMemberAccessPermission] | ||
|
||
def get_queryset(self): | ||
return super().get_queryset() | ||
|
||
# ****************************************** | ||
# /projects/:id/experiments/:experiment_id/results | ||
# | ||
# Returns current results of an experiment, and graphs | ||
# 1. Probability of success | ||
# 2. Funnel breakdown graph to display | ||
# ****************************************** | ||
@action(methods=["GET"], detail=True) | ||
def results(self, request: Request, *args: Any, **kwargs: Any) -> Response: | ||
experiment: Experiment = self.get_object() | ||
|
||
if not experiment.filters: | ||
raise ValidationError("Experiment has no target metric") | ||
|
||
result = ClickhouseFunnelExperimentResult( | ||
Filter(experiment.filters), | ||
self.team, | ||
experiment.feature_flag.key, | ||
experiment.start_date, | ||
experiment.end_date, | ||
).get_results() | ||
return Response(result) |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,106 @@ | ||
# name: ClickhouseTestFunnelExperimentResults.test_experiment_flow_with_event_results | ||
' | ||
/* request:api_projects_(?P<parent_lookup_team_id>[^_.]+)_experiments_(?P<pk>[^_.]+)_results_?$ (ClickhouseExperimentsViewSet) */ | ||
SELECT groupArray(value) | ||
FROM | ||
(SELECT array(trim(BOTH '"' | ||
FROM JSONExtractRaw(properties, '$feature/a-b-test'))) AS value, | ||
count(*) as count | ||
FROM events e | ||
WHERE team_id = 2 | ||
AND event = '$pageview' | ||
AND timestamp >= '2020-01-01 00:00:00' | ||
AND timestamp <= '2020-01-06 23:59:59' | ||
AND has(['control', 'test'], trim(BOTH '"' | ||
FROM JSONExtractRaw(e.properties, '$feature/a-b-test'))) | ||
GROUP BY value | ||
ORDER BY count DESC | ||
LIMIT 10 | ||
OFFSET 0) | ||
' | ||
--- | ||
# name: ClickhouseTestFunnelExperimentResults.test_experiment_flow_with_event_results.1 | ||
' | ||
/* request:api_projects_(?P<parent_lookup_team_id>[^_.]+)_experiments_(?P<pk>[^_.]+)_results_?$ (ClickhouseExperimentsViewSet) */ | ||
SELECT countIf(steps = 1) step_1, | ||
countIf(steps = 2) step_2, | ||
avg(step_1_average_conversion_time_inner) step_1_average_conversion_time, | ||
median(step_1_median_conversion_time_inner) step_1_median_conversion_time, | ||
prop | ||
FROM | ||
(SELECT aggregation_target, | ||
steps, | ||
avg(step_1_conversion_time) step_1_average_conversion_time_inner, | ||
median(step_1_conversion_time) step_1_median_conversion_time_inner, | ||
prop | ||
FROM | ||
(SELECT aggregation_target, | ||
steps, | ||
max(steps) over (PARTITION BY aggregation_target, | ||
prop) as max_steps, | ||
step_1_conversion_time, | ||
prop | ||
FROM | ||
(SELECT *, | ||
if(latest_0 < latest_1 | ||
AND latest_1 <= latest_0 + INTERVAL 14 DAY, 2, 1) AS steps , | ||
if(isNotNull(latest_1) | ||
AND latest_1 <= latest_0 + INTERVAL 14 DAY, dateDiff('second', toDateTime(latest_0), toDateTime(latest_1)), NULL) step_1_conversion_time, | ||
prop | ||
FROM | ||
(SELECT aggregation_target, | ||
timestamp, | ||
step_0, | ||
latest_0, | ||
step_1, | ||
min(latest_1) over (PARTITION by aggregation_target, | ||
prop | ||
ORDER BY timestamp DESC ROWS BETWEEN UNBOUNDED PRECEDING AND 0 PRECEDING) latest_1 , | ||
if(has([['test'], ['control']], prop), prop, ['Other']) as prop | ||
FROM | ||
(SELECT aggregation_target, | ||
timestamp, | ||
if(event = '$pageview', 1, 0) as step_0, | ||
if(step_0 = 1, timestamp, null) as latest_0, | ||
if(event = '$pageleave', 1, 0) as step_1, | ||
if(step_1 = 1, timestamp, null) as latest_1, | ||
array(trim(BOTH '"' | ||
FROM JSONExtractRaw(properties, '$feature/a-b-test'))) AS prop | ||
FROM | ||
(SELECT e.event as event, | ||
e.team_id as team_id, | ||
e.distinct_id as distinct_id, | ||
e.timestamp as timestamp, | ||
pdi.person_id as aggregation_target, | ||
e.properties as properties | ||
FROM events e | ||
INNER JOIN | ||
(SELECT distinct_id, | ||
argMax(person_id, _timestamp) as person_id | ||
FROM | ||
(SELECT distinct_id, | ||
person_id, | ||
max(_timestamp) as _timestamp | ||
FROM person_distinct_id | ||
WHERE team_id = 2 | ||
GROUP BY person_id, | ||
distinct_id, | ||
team_id | ||
HAVING max(is_deleted) = 0) | ||
GROUP BY distinct_id) AS pdi ON events.distinct_id = pdi.distinct_id | ||
WHERE team_id = 2 | ||
AND event IN ['$pageleave', '$pageview'] | ||
AND timestamp >= '2020-01-01 00:00:00' | ||
AND timestamp <= '2020-01-06 23:59:59' | ||
AND has(['control', 'test'], trim(BOTH '"' | ||
FROM JSONExtractRaw(properties, '$feature/a-b-test'))) ) events | ||
WHERE (step_0 = 1 | ||
OR step_1 = 1) )) | ||
WHERE step_0 = 1 SETTINGS allow_experimental_window_functions = 1 )) | ||
GROUP BY aggregation_target, | ||
steps, | ||
prop | ||
HAVING steps = max_steps SETTINGS allow_experimental_window_functions = 1) | ||
GROUP BY prop SETTINGS allow_experimental_window_functions = 1 | ||
' | ||
--- |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Should this use new multi-breakdowns?