diff --git a/docs/custom-metrics.md b/docs/custom-metrics.md index f252e2d..2328ed1 100644 --- a/docs/custom-metrics.md +++ b/docs/custom-metrics.md @@ -45,7 +45,7 @@ user: [[0,1,2,3,4,...,3995,3996,3997,3998,3999]] variant: [[1,0,1,1,0,...,0,0,0,0,0]] sessions: [[2,2,2,2,1,...,2,2,3,1,5]] orders: [[1,1,1,1,1,...,0,0,0,0,2]] -revenue: [[9.166147128806545,6.4340787057460656,7.943873223822707,15.928674729738708,7.136917019113867,...,0,0,0,0,17.162458516177704]] +revenue: [[9.17,6.43,7.94,15.93,7.14,...,0,0,0,0,17.16]] has_order: [[1,1,1,1,1,...,0,0,0,0,1]] ``` @@ -225,7 +225,7 @@ Now we can perform the Mann-Whitney U test: >>> print(result.to_string(("metric", "pvalue", "statistic"))) metric pvalue statistic mwu_orders 0.0263 2069092 -mwu_revenue 0.0300 2068063 +mwu_revenue 0.0300 2068060 ``` diff --git a/docs/data-backends.md b/docs/data-backends.md index b1cd740..efcbf31 100644 --- a/docs/data-backends.md +++ b/docs/data-backends.md @@ -83,17 +83,17 @@ Ibis Table is a lazy object. It doesn't fetch the data when created. You can use ```pycon >>> ibis.options.interactive = True >>> print(data.head(5)) -┏━━━━━━━┳━━━━━━━━━┳━━━━━━━━━━┳━━━━━━━━┳━━━━━━━━━━━┓ -┃ user ┃ variant ┃ sessions ┃ orders ┃ revenue ┃ -┡━━━━━━━╇━━━━━━━━━╇━━━━━━━━━━╇━━━━━━━━╇━━━━━━━━━━━┩ -│ int64 │ int64 │ int64 │ int64 │ float64 │ -├───────┼─────────┼──────────┼────────┼───────────┤ -│ 0 │ 1 │ 2 │ 1 │ 9.166147 │ -│ 1 │ 0 │ 2 │ 1 │ 6.434079 │ -│ 2 │ 1 │ 2 │ 1 │ 7.943873 │ -│ 3 │ 1 │ 2 │ 1 │ 15.928675 │ -│ 4 │ 0 │ 1 │ 1 │ 7.136917 │ -└───────┴─────────┴──────────┴────────┴───────────┘ +┏━━━━━━━┳━━━━━━━━━┳━━━━━━━━━━┳━━━━━━━━┳━━━━━━━━━┓ +┃ user ┃ variant ┃ sessions ┃ orders ┃ revenue ┃ +┡━━━━━━━╇━━━━━━━━━╇━━━━━━━━━━╇━━━━━━━━╇━━━━━━━━━┩ +│ int64 │ int64 │ int64 │ int64 │ float64 │ +├───────┼─────────┼──────────┼────────┼─────────┤ +│ 0 │ 1 │ 2 │ 1 │ 9.17 │ +│ 1 │ 0 │ 2 │ 1 │ 6.43 │ +│ 2 │ 1 │ 2 │ 1 │ 7.94 │ +│ 3 │ 1 │ 2 │ 1 │ 15.93 │ +│ 4 │ 0 │ 1 │ 1 │ 7.14 │ +└───────┴─────────┴──────────┴────────┴─────────┘ >>> ibis.options.interactive = False @@ -142,8 +142,8 @@ Aggregate[r0] ┡━━━━━━━━━╇━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━━━┩ │ int64 │ float64 │ float64 │ float64 │ float64 │ ├─────────┼───────────────────┼────────────────────┼─────────────────┼──────────────────┤ -│ 0 │ 1.996045 │ 0.265726 │ 0.530400 │ 5.241079 │ -│ 1 │ 1.982802 │ 0.289031 │ 0.573091 │ 5.730132 │ +│ 0 │ 1.996045 │ 0.265726 │ 0.530400 │ 5.241028 │ +│ 1 │ 1.982802 │ 0.289031 │ 0.573091 │ 5.730111 │ └─────────┴───────────────────┴────────────────────┴─────────────────┴──────────────────┘ >>> ibis.options.interactive = False @@ -234,7 +234,7 @@ DatabaseTable: memory.main.users_data_with_cov sessions_per_user 2.00 1.98 -0.68% [-3.2%, 1.9%] 0.603 orders_per_session 0.262 0.293 12% [4.2%, 21%] 0.00229 orders_per_user 0.523 0.581 11% [2.9%, 20%] 0.00733 - revenue_per_user 5.12 5.85 14% [3.8%, 26%] 0.00675 + revenue_per_user 5.12 5.85 14% [3.8%, 26%] 0.00674 ``` diff --git a/docs/multiple-testing.md b/docs/multiple-testing.md index de99637..91bc6b6 100644 --- a/docs/multiple-testing.md +++ b/docs/multiple-testing.md @@ -38,23 +38,23 @@ As an example, consider an experiment with three variants, a control and two tre ... )) >>> print(data) shape: (6_046, 5) -┌──────┬─────────┬──────────┬────────┬───────────┐ -│ user ┆ variant ┆ sessions ┆ orders ┆ revenue │ -│ --- ┆ --- ┆ --- ┆ --- ┆ --- │ -│ i64 ┆ i64 ┆ i64 ┆ i64 ┆ f64 │ -╞══════╪═════════╪══════════╪════════╪═══════════╡ -│ 0 ┆ 1 ┆ 2 ┆ 1 ┆ 9.58279 │ -│ 1 ┆ 0 ┆ 2 ┆ 1 ┆ 6.434079 │ -│ 2 ┆ 1 ┆ 2 ┆ 1 ┆ 8.304958 │ -│ 3 ┆ 1 ┆ 2 ┆ 1 ┆ 16.652705 │ -│ 4 ┆ 0 ┆ 1 ┆ 1 ┆ 7.136917 │ -│ … ┆ … ┆ … ┆ … ┆ … │ -│ 3989 ┆ 2 ┆ 4 ┆ 4 ┆ 34.931448 │ -│ 3991 ┆ 2 ┆ 1 ┆ 0 ┆ 0.0 │ -│ 3992 ┆ 2 ┆ 3 ┆ 3 ┆ 27.964647 │ -│ 3994 ┆ 2 ┆ 2 ┆ 1 ┆ 17.217892 │ -│ 3998 ┆ 2 ┆ 3 ┆ 0 ┆ 0.0 │ -└──────┴─────────┴──────────┴────────┴───────────┘ +┌──────┬─────────┬──────────┬────────┬─────────┐ +│ user ┆ variant ┆ sessions ┆ orders ┆ revenue │ +│ --- ┆ --- ┆ --- ┆ --- ┆ --- │ +│ i64 ┆ i64 ┆ i64 ┆ i64 ┆ f64 │ +╞══════╪═════════╪══════════╪════════╪═════════╡ +│ 0 ┆ 1 ┆ 2 ┆ 1 ┆ 9.58 │ +│ 1 ┆ 0 ┆ 2 ┆ 1 ┆ 6.43 │ +│ 2 ┆ 1 ┆ 2 ┆ 1 ┆ 8.3 │ +│ 3 ┆ 1 ┆ 2 ┆ 1 ┆ 16.65 │ +│ 4 ┆ 0 ┆ 1 ┆ 1 ┆ 7.14 │ +│ … ┆ … ┆ … ┆ … ┆ … │ +│ 3989 ┆ 2 ┆ 4 ┆ 4 ┆ 34.93 │ +│ 3991 ┆ 2 ┆ 1 ┆ 0 ┆ 0.0 │ +│ 3992 ┆ 2 ┆ 3 ┆ 3 ┆ 27.96 │ +│ 3994 ┆ 2 ┆ 2 ┆ 1 ┆ 17.22 │ +│ 3998 ┆ 2 ┆ 3 ┆ 0 ┆ 0.0 │ +└──────┴─────────┴──────────┴────────┴─────────┘ ``` @@ -74,7 +74,7 @@ variants metric control treatment rel_effect_size rel_effect_size_ci (0, 1) sessions_per_user 2.00 1.98 -0.66% [-3.7%, 2.5%] 0.674 (0, 1) orders_per_session 0.266 0.289 8.8% [-0.89%, 19%] 0.0762 (0, 1) orders_per_user 0.530 0.573 8.0% [-2.0%, 19%] 0.118 - (0, 1) revenue_per_user 5.24 5.99 14% [2.1%, 28%] 0.0212 + (0, 1) revenue_per_user 5.24 5.99 14% [2.1%, 28%] 0.0211 (0, 2) sessions_per_user 2.00 2.02 0.98% [-2.1%, 4.1%] 0.532 (0, 2) orders_per_session 0.266 0.295 11% [1.2%, 22%] 0.0273 (0, 2) orders_per_user 0.530 0.594 12% [1.7%, 23%] 0.0213 @@ -100,7 +100,7 @@ False discovery rate (FDR) is the expected value of the proportion of false disc >>> print(adjusted_results_fdr) comparison metric control treatment rel_effect_size pvalue pvalue_adj (0, 1) orders_per_user 0.530 0.573 8.0% 0.118 0.245 - (0, 1) revenue_per_user 5.24 5.99 14% 0.0212 0.0592 + (0, 1) revenue_per_user 5.24 5.99 14% 0.0211 0.0592 (0, 2) orders_per_user 0.530 0.594 12% 0.0213 0.0592 (0, 2) revenue_per_user 5.24 6.25 19% 0.00218 0.0182 @@ -122,7 +122,7 @@ The method also adjusts the significance level alpha and saves it as `alpha_adj` ... ))) comparison metric control treatment rel_effect_size pvalue alpha_adj (0, 1) orders_per_user 0.530 0.573 8.0% 0.118 0.0240 - (0, 1) revenue_per_user 5.24 5.99 14% 0.0212 0.0120 + (0, 1) revenue_per_user 5.24 5.99 14% 0.0211 0.0120 (0, 2) orders_per_user 0.530 0.594 12% 0.0213 0.0180 (0, 2) revenue_per_user 5.24 6.25 19% 0.00218 0.00600 @@ -134,9 +134,9 @@ By default, **tea-tasting** assumes arbitrary dependence between hypotheses and >>> print(tt.adjust_fdr(results, metrics, arbitrary_dependence=False)) comparison metric control treatment rel_effect_size pvalue pvalue_adj (0, 1) orders_per_user 0.530 0.573 8.0% 0.118 0.118 - (0, 1) revenue_per_user 5.24 5.99 14% 0.0212 0.0284 + (0, 1) revenue_per_user 5.24 5.99 14% 0.0211 0.0284 (0, 2) orders_per_user 0.530 0.594 12% 0.0213 0.0284 - (0, 2) revenue_per_user 5.24 6.25 19% 0.00218 0.00873 + (0, 2) revenue_per_user 5.24 6.25 19% 0.00218 0.00872 ``` @@ -148,9 +148,9 @@ Family-wise error rate (FWER) is the probability of making at least one type I e >>> print(tt.adjust_fwer(results, metrics)) comparison metric control treatment rel_effect_size pvalue pvalue_adj (0, 1) orders_per_user 0.530 0.573 8.0% 0.118 0.118 - (0, 1) revenue_per_user 5.24 5.99 14% 0.0212 0.0635 - (0, 2) orders_per_user 0.530 0.594 12% 0.0213 0.0635 - (0, 2) revenue_per_user 5.24 6.25 19% 0.00218 0.00873 + (0, 1) revenue_per_user 5.24 5.99 14% 0.0211 0.0634 + (0, 2) orders_per_user 0.530 0.594 12% 0.0213 0.0634 + (0, 2) revenue_per_user 5.24 6.25 19% 0.00218 0.00872 ``` @@ -165,9 +165,9 @@ By default, **tea-tasting** assumes arbitrary dependence between hypotheses and ... )) comparison metric control treatment rel_effect_size pvalue pvalue_adj (0, 1) orders_per_user 0.530 0.573 8.0% 0.118 0.118 - (0, 1) revenue_per_user 5.24 5.99 14% 0.0212 0.0422 + (0, 1) revenue_per_user 5.24 5.99 14% 0.0211 0.0422 (0, 2) orders_per_user 0.530 0.594 12% 0.0213 0.0422 - (0, 2) revenue_per_user 5.24 6.25 19% 0.00218 0.00870 + (0, 2) revenue_per_user 5.24 6.25 19% 0.00218 0.00869 ``` @@ -188,9 +188,9 @@ In the examples above, the methods `adjust_fdr` and `adjust_fwer` received resul ... )) comparison metric control treatment rel_effect_size pvalue pvalue_adj Experiment 1 orders_per_user 0.530 0.573 8.0% 0.118 0.245 -Experiment 1 revenue_per_user 5.24 5.99 14% 0.0212 0.0588 +Experiment 1 revenue_per_user 5.24 5.99 14% 0.0211 0.0587 Experiment 2 orders_per_user 0.514 0.594 16% 0.00427 0.0178 -Experiment 2 revenue_per_user 5.10 6.25 22% 6.27e-04 0.00523 +Experiment 2 revenue_per_user 5.10 6.25 22% 6.27e-04 0.00522 ``` diff --git a/docs/user-guide.md b/docs/user-guide.md index f66adb4..3505d87 100644 --- a/docs/user-guide.md +++ b/docs/user-guide.md @@ -62,7 +62,7 @@ user: [[0,1,2,3,4,...,3995,3996,3997,3998,3999]] variant: [[1,0,1,1,0,...,0,0,0,0,0]] sessions: [[2,2,2,2,1,...,2,2,3,1,5]] orders: [[1,1,1,1,1,...,0,0,0,0,2]] -revenue: [[9.166147128806545,6.4340787057460656,7.943873223822707,15.928674729738708,7.136917019113867,...,0,0,0,0,17.162458516177704]] +revenue: [[9.17,6.43,7.94,15.93,7.14,...,0,0,0,0,17.16]] ``` @@ -251,7 +251,7 @@ Example usage: sessions_per_user 2.00 1.98 -0.68% [-3.2%, 1.9%] 0.603 orders_per_session 0.262 0.293 12% [4.2%, 21%] 0.00229 orders_per_user 0.523 0.581 11% [2.9%, 20%] 0.00733 - revenue_per_user 5.12 5.85 14% [3.8%, 26%] 0.00675 + revenue_per_user 5.12 5.85 14% [3.8%, 26%] 0.00674 ``` diff --git a/src/tea_tasting/datasets.py b/src/tea_tasting/datasets.py index 5605c98..f6d8874 100644 --- a/src/tea_tasting/datasets.py +++ b/src/tea_tasting/datasets.py @@ -153,7 +153,7 @@ def make_users_data( variant: [[1,0,1,1,0,...,0,0,0,0,0]] sessions: [[2,2,2,2,1,...,2,2,3,1,5]] orders: [[1,1,1,1,1,...,0,0,0,0,2]] - revenue: [[9.166147128806545,6.4340787057460656,7.943873223822707,15.928674729738708,7.136917019113867,...,0,0,0,0,17.162458516177704]] + revenue: [[9.17,6.43,7.94,15.93,7.14,...,0,0,0,0,17.16]] ``` @@ -176,10 +176,10 @@ def make_users_data( variant: [[1,0,1,1,0,...,0,0,0,0,0]] sessions: [[2,2,2,2,1,...,2,2,3,1,5]] orders: [[1,1,1,1,1,...,0,0,0,0,2]] - revenue: [[9.166147128806545,6.4340787057460656,7.943873223822707,15.928674729738708,7.136917019113867,...,0,0,0,0,17.162458516177704]] + revenue: [[9.17,6.43,7.94,15.93,7.14,...,0,0,0,0,17.16]] sessions_covariate: [[3,4,4,1,1,...,1,3,2,1,5]] orders_covariate: [[2,1,2,0,1,...,0,1,0,0,0]] - revenue_covariate: [[19.191712010123307,2.7707490091913525,22.56842219448677,0,13.683796263730468,...,0,13.517967243105218,0,0,0]] + revenue_covariate: [[19.19,2.77,22.57,0,13.68,...,0,13.52,0,0,0]] ``` @@ -188,18 +188,18 @@ def make_users_data( ```pycon >>> data = tt.make_users_data(seed=42, return_type="pandas") >>> print(data) - user variant sessions orders revenue - 0 0 1 2 1 9.166147 - 1 1 0 2 1 6.434079 - 2 2 1 2 1 7.943873 - 3 3 1 2 1 15.928675 - 4 4 0 1 1 7.136917 - ... ... ... ... ... ... - 3995 3995 0 2 0 0.000000 - 3996 3996 0 2 0 0.000000 - 3997 3997 0 3 0 0.000000 - 3998 3998 0 1 0 0.000000 - 3999 3999 0 5 2 17.162459 + user variant sessions orders revenue + 0 0 1 2 1 9.17 + 1 1 0 2 1 6.43 + 2 2 1 2 1 7.94 + 3 3 1 2 1 15.93 + 4 4 0 1 1 7.14 + ... ... ... ... ... ... + 3995 3995 0 2 0 0.00 + 3996 3996 0 2 0 0.00 + 3997 3997 0 3 0 0.00 + 3998 3998 0 1 0 0.00 + 3999 3999 0 5 2 17.16 [4000 rows x 5 columns] @@ -211,26 +211,26 @@ def make_users_data( >>> data = tt.make_users_data(seed=42, return_type="polars") >>> print(data) shape: (4_000, 5) - ┌──────┬─────────┬──────────┬────────┬───────────┐ - │ user ┆ variant ┆ sessions ┆ orders ┆ revenue │ - │ --- ┆ --- ┆ --- ┆ --- ┆ --- │ - │ i64 ┆ i64 ┆ i64 ┆ i64 ┆ f64 │ - ╞══════╪═════════╪══════════╪════════╪═══════════╡ - │ 0 ┆ 1 ┆ 2 ┆ 1 ┆ 9.166147 │ - │ 1 ┆ 0 ┆ 2 ┆ 1 ┆ 6.434079 │ - │ 2 ┆ 1 ┆ 2 ┆ 1 ┆ 7.943873 │ - │ 3 ┆ 1 ┆ 2 ┆ 1 ┆ 15.928675 │ - │ 4 ┆ 0 ┆ 1 ┆ 1 ┆ 7.136917 │ - │ … ┆ … ┆ … ┆ … ┆ … │ - │ 3995 ┆ 0 ┆ 2 ┆ 0 ┆ 0.0 │ - │ 3996 ┆ 0 ┆ 2 ┆ 0 ┆ 0.0 │ - │ 3997 ┆ 0 ┆ 3 ┆ 0 ┆ 0.0 │ - │ 3998 ┆ 0 ┆ 1 ┆ 0 ┆ 0.0 │ - │ 3999 ┆ 0 ┆ 5 ┆ 2 ┆ 17.162459 │ - └──────┴─────────┴──────────┴────────┴───────────┘ + ┌──────┬─────────┬──────────┬────────┬─────────┐ + │ user ┆ variant ┆ sessions ┆ orders ┆ revenue │ + │ --- ┆ --- ┆ --- ┆ --- ┆ --- │ + │ i64 ┆ i64 ┆ i64 ┆ i64 ┆ f64 │ + ╞══════╪═════════╪══════════╪════════╪═════════╡ + │ 0 ┆ 1 ┆ 2 ┆ 1 ┆ 9.17 │ + │ 1 ┆ 0 ┆ 2 ┆ 1 ┆ 6.43 │ + │ 2 ┆ 1 ┆ 2 ┆ 1 ┆ 7.94 │ + │ 3 ┆ 1 ┆ 2 ┆ 1 ┆ 15.93 │ + │ 4 ┆ 0 ┆ 1 ┆ 1 ┆ 7.14 │ + │ … ┆ … ┆ … ┆ … ┆ … │ + │ 3995 ┆ 0 ┆ 2 ┆ 0 ┆ 0.0 │ + │ 3996 ┆ 0 ┆ 2 ┆ 0 ┆ 0.0 │ + │ 3997 ┆ 0 ┆ 3 ┆ 0 ┆ 0.0 │ + │ 3998 ┆ 0 ┆ 1 ┆ 0 ┆ 0.0 │ + │ 3999 ┆ 0 ┆ 5 ┆ 2 ┆ 17.16 │ + └──────┴─────────┴──────────┴────────┴─────────┘ ``` - """ # noqa: E501 + """ return _make_data( covariates=covariates, seed=seed, @@ -373,7 +373,7 @@ def make_sessions_data( variant: [[1,1,0,0,1,...,0,0,0,0,0]] sessions: [[1,1,1,1,1,...,1,1,1,1,1]] orders: [[1,1,1,1,1,...,1,0,1,1,0]] - revenue: [[5.88717816119309,6.131079903793326,2.614675492093661,12.296074812201192,11.573409274639534,...,23.63494099585371,0,2.396078290493153,24.538111422839766,0]] + revenue: [[5.89,6.13,2.61,12.3,11.57,...,23.63,0,2.4,24.54,0]] ``` @@ -396,10 +396,10 @@ def make_sessions_data( variant: [[1,1,0,0,1,...,0,0,0,0,0]] sessions: [[1,1,1,1,1,...,1,1,1,1,1]] orders: [[1,1,1,1,1,...,1,0,1,1,0]] - revenue: [[5.88717816119309,6.131079903793326,2.614675492093661,12.296074812201192,11.573409274639534,...,23.63494099585371,0,2.396078290493153,24.538111422839766,0]] + revenue: [[5.89,6.13,2.61,12.3,11.57,...,23.63,0,2.4,24.54,0]] sessions_covariate: [[1.5,1.5,0,0,1.5,...,0.2,0.2,0.2,0.2,0.2]] orders_covariate: [[0.5,0.5,0,0,1.5,...,0,0,0,0,0]] - revenue_covariate: [[1.2367323749905585,1.2367323749905585,0,0,12.324434081065741,...,0,0,0,0,0]] + revenue_covariate: [[1.24,1.24,0,0,12.32,...,0,0,0,0,0]] ``` @@ -408,18 +408,18 @@ def make_sessions_data( ```pycon >>> data = tt.make_sessions_data(seed=42, return_type="pandas") >>> print(data) - user variant sessions orders revenue - 0 0 1 1 1 5.887178 - 1 0 1 1 1 6.131080 - 2 1 0 1 1 2.614675 - 3 1 0 1 1 12.296075 - 4 2 1 1 1 11.573409 - ... ... ... ... ... ... - 7953 3999 0 1 1 23.634941 - 7954 3999 0 1 0 0.000000 - 7955 3999 0 1 1 2.396078 - 7956 3999 0 1 1 24.538111 - 7957 3999 0 1 0 0.000000 + user variant sessions orders revenue + 0 0 1 1 1 5.89 + 1 0 1 1 1 6.13 + 2 1 0 1 1 2.61 + 3 1 0 1 1 12.30 + 4 2 1 1 1 11.57 + ... ... ... ... ... ... + 7953 3999 0 1 1 23.63 + 7954 3999 0 1 0 0.00 + 7955 3999 0 1 1 2.40 + 7956 3999 0 1 1 24.54 + 7957 3999 0 1 0 0.00 [7958 rows x 5 columns] @@ -431,26 +431,26 @@ def make_sessions_data( >>> data = tt.make_sessions_data(seed=42, return_type="polars") >>> print(data) shape: (7_958, 5) - ┌──────┬─────────┬──────────┬────────┬───────────┐ - │ user ┆ variant ┆ sessions ┆ orders ┆ revenue │ - │ --- ┆ --- ┆ --- ┆ --- ┆ --- │ - │ i64 ┆ i64 ┆ i64 ┆ i64 ┆ f64 │ - ╞══════╪═════════╪══════════╪════════╪═══════════╡ - │ 0 ┆ 1 ┆ 1 ┆ 1 ┆ 5.887178 │ - │ 0 ┆ 1 ┆ 1 ┆ 1 ┆ 6.13108 │ - │ 1 ┆ 0 ┆ 1 ┆ 1 ┆ 2.614675 │ - │ 1 ┆ 0 ┆ 1 ┆ 1 ┆ 12.296075 │ - │ 2 ┆ 1 ┆ 1 ┆ 1 ┆ 11.573409 │ - │ … ┆ … ┆ … ┆ … ┆ … │ - │ 3999 ┆ 0 ┆ 1 ┆ 1 ┆ 23.634941 │ - │ 3999 ┆ 0 ┆ 1 ┆ 0 ┆ 0.0 │ - │ 3999 ┆ 0 ┆ 1 ┆ 1 ┆ 2.396078 │ - │ 3999 ┆ 0 ┆ 1 ┆ 1 ┆ 24.538111 │ - │ 3999 ┆ 0 ┆ 1 ┆ 0 ┆ 0.0 │ - └──────┴─────────┴──────────┴────────┴───────────┘ + ┌──────┬─────────┬──────────┬────────┬─────────┐ + │ user ┆ variant ┆ sessions ┆ orders ┆ revenue │ + │ --- ┆ --- ┆ --- ┆ --- ┆ --- │ + │ i64 ┆ i64 ┆ i64 ┆ i64 ┆ f64 │ + ╞══════╪═════════╪══════════╪════════╪═════════╡ + │ 0 ┆ 1 ┆ 1 ┆ 1 ┆ 5.89 │ + │ 0 ┆ 1 ┆ 1 ┆ 1 ┆ 6.13 │ + │ 1 ┆ 0 ┆ 1 ┆ 1 ┆ 2.61 │ + │ 1 ┆ 0 ┆ 1 ┆ 1 ┆ 12.3 │ + │ 2 ┆ 1 ┆ 1 ┆ 1 ┆ 11.57 │ + │ … ┆ … ┆ … ┆ … ┆ … │ + │ 3999 ┆ 0 ┆ 1 ┆ 1 ┆ 23.63 │ + │ 3999 ┆ 0 ┆ 1 ┆ 0 ┆ 0.0 │ + │ 3999 ┆ 0 ┆ 1 ┆ 1 ┆ 2.4 │ + │ 3999 ┆ 0 ┆ 1 ┆ 1 ┆ 24.54 │ + │ 3999 ┆ 0 ┆ 1 ┆ 0 ┆ 0.0 │ + └──────┴─────────┴──────────┴────────┴─────────┘ ``` - """ # noqa: E501 + """ return _make_data( covariates=covariates, seed=seed, @@ -537,7 +537,7 @@ def _make_data( "variant": variant[user], "sessions": sessions, "orders": orders, - "revenue": revenue, + "revenue": revenue.round(2), } if covariates: @@ -569,7 +569,7 @@ def _make_data( data |= { "sessions_covariate": sessions_covariate, "orders_covariate": orders_covariate, - "revenue_covariate": revenue_covariate, + "revenue_covariate": revenue_covariate.round(2), } if return_type == "pandas": diff --git a/src/tea_tasting/experiment.py b/src/tea_tasting/experiment.py index 649fac6..3640d8b 100644 --- a/src/tea_tasting/experiment.py +++ b/src/tea_tasting/experiment.py @@ -63,17 +63,17 @@ def to_dicts(self) -> tuple[dict[str, Any], ...]: 'rel_effect_size_ci_upper': 0.1906880061278886, 'statistic': 1.5647028839586707, 'treatment': 0.5730905412240769}, - {'control': 5.241078645860599, - 'effect_size': 0.48905301100469245, - 'effect_size_ci_lower': -0.13265634499246626, - 'effect_size_ci_upper': 1.1107623670018512, + {'control': 5.241028175976273, + 'effect_size': 0.4890831037404775, + 'effect_size_ci_lower': -0.13261881482742033, + 'effect_size_ci_upper': 1.1107850223083753, 'metric': 'revenue_per_user', - 'pvalue': np.float64(0.1230974173674023), - 'rel_effect_size': 0.09331151925967496, - 'rel_effect_size_ci_lower': -0.023744208691728885, - 'rel_effect_size_ci_upper': 0.22440254776265967, - 'statistic': 1.5422307220453753, - 'treatment': 5.730131656865291}) + 'pvalue': np.float64(0.1230698855425058), + 'rel_effect_size': 0.09331815958981626, + 'rel_effect_size_ci_lower': -0.02373770894855798, + 'rel_effect_size_ci_upper': 0.22440926894909308, + 'statistic': 1.5423440700784083, + 'treatment': 5.73011127971675}) ``` """ diff --git a/src/tea_tasting/metrics/mean.py b/src/tea_tasting/metrics/mean.py index e0b3c1c..4368e3b 100644 --- a/src/tea_tasting/metrics/mean.py +++ b/src/tea_tasting/metrics/mean.py @@ -823,7 +823,7 @@ def __init__( # noqa: PLR0913 >>> print(result) metric control treatment rel_effect_size rel_effect_size_ci pvalue orders_per_user 0.523 0.581 11% [2.9%, 20%] 0.00733 - revenue_per_user 5.12 5.85 14% [3.8%, 26%] 0.00675 + revenue_per_user 5.12 5.85 14% [3.8%, 26%] 0.00674 ``` diff --git a/src/tea_tasting/metrics/resampling.py b/src/tea_tasting/metrics/resampling.py index 39d1125..833d9f9 100644 --- a/src/tea_tasting/metrics/resampling.py +++ b/src/tea_tasting/metrics/resampling.py @@ -333,7 +333,7 @@ def __init__( >>> result = experiment.analyze(data) >>> print(result) metric control treatment rel_effect_size rel_effect_size_ci pvalue - revenue_per_user_p80 10.6 11.6 9.1% [-1.3%, 21%] - + revenue_per_user_p80 10.6 11.6 9.1% [-1.2%, 21%] - ``` """ # noqa: E501 diff --git a/src/tea_tasting/multiplicity.py b/src/tea_tasting/multiplicity.py index 434e63b..06322cd 100644 --- a/src/tea_tasting/multiplicity.py +++ b/src/tea_tasting/multiplicity.py @@ -119,23 +119,23 @@ def adjust_fdr( ... )) >>> print(data) shape: (6_046, 5) - ┌──────┬─────────┬──────────┬────────┬───────────┐ - │ user ┆ variant ┆ sessions ┆ orders ┆ revenue │ - │ --- ┆ --- ┆ --- ┆ --- ┆ --- │ - │ i64 ┆ i64 ┆ i64 ┆ i64 ┆ f64 │ - ╞══════╪═════════╪══════════╪════════╪═══════════╡ - │ 0 ┆ 1 ┆ 2 ┆ 1 ┆ 9.58279 │ - │ 1 ┆ 0 ┆ 2 ┆ 1 ┆ 6.434079 │ - │ 2 ┆ 1 ┆ 2 ┆ 1 ┆ 8.304958 │ - │ 3 ┆ 1 ┆ 2 ┆ 1 ┆ 16.652705 │ - │ 4 ┆ 0 ┆ 1 ┆ 1 ┆ 7.136917 │ - │ … ┆ … ┆ … ┆ … ┆ … │ - │ 3989 ┆ 2 ┆ 4 ┆ 4 ┆ 34.931448 │ - │ 3991 ┆ 2 ┆ 1 ┆ 0 ┆ 0.0 │ - │ 3992 ┆ 2 ┆ 3 ┆ 3 ┆ 27.964647 │ - │ 3994 ┆ 2 ┆ 2 ┆ 1 ┆ 17.217892 │ - │ 3998 ┆ 2 ┆ 3 ┆ 0 ┆ 0.0 │ - └──────┴─────────┴──────────┴────────┴───────────┘ + ┌──────┬─────────┬──────────┬────────┬─────────┐ + │ user ┆ variant ┆ sessions ┆ orders ┆ revenue │ + │ --- ┆ --- ┆ --- ┆ --- ┆ --- │ + │ i64 ┆ i64 ┆ i64 ┆ i64 ┆ f64 │ + ╞══════╪═════════╪══════════╪════════╪═════════╡ + │ 0 ┆ 1 ┆ 2 ┆ 1 ┆ 9.58 │ + │ 1 ┆ 0 ┆ 2 ┆ 1 ┆ 6.43 │ + │ 2 ┆ 1 ┆ 2 ┆ 1 ┆ 8.3 │ + │ 3 ┆ 1 ┆ 2 ┆ 1 ┆ 16.65 │ + │ 4 ┆ 0 ┆ 1 ┆ 1 ┆ 7.14 │ + │ … ┆ … ┆ … ┆ … ┆ … │ + │ 3989 ┆ 2 ┆ 4 ┆ 4 ┆ 34.93 │ + │ 3991 ┆ 2 ┆ 1 ┆ 0 ┆ 0.0 │ + │ 3992 ┆ 2 ┆ 3 ┆ 3 ┆ 27.96 │ + │ 3994 ┆ 2 ┆ 2 ┆ 1 ┆ 17.22 │ + │ 3998 ┆ 2 ┆ 3 ┆ 0 ┆ 0.0 │ + └──────┴─────────┴──────────┴────────┴─────────┘ >>> experiment = tt.Experiment( ... sessions_per_user=tt.Mean("sessions"), @@ -151,7 +151,7 @@ def adjust_fdr( (0, 1) sessions_per_user 2.00 1.98 -0.66% [-3.7%, 2.5%] 0.674 (0, 1) orders_per_session 0.266 0.289 8.8% [-0.89%, 19%] 0.0762 (0, 1) orders_per_user 0.530 0.573 8.0% [-2.0%, 19%] 0.118 - (0, 1) revenue_per_user 5.24 5.99 14% [2.1%, 28%] 0.0212 + (0, 1) revenue_per_user 5.24 5.99 14% [2.1%, 28%] 0.0211 (0, 2) sessions_per_user 2.00 2.02 0.98% [-2.1%, 4.1%] 0.532 (0, 2) orders_per_session 0.266 0.295 11% [1.2%, 22%] 0.0273 (0, 2) orders_per_user 0.530 0.594 12% [1.7%, 23%] 0.0213 @@ -166,7 +166,7 @@ def adjust_fdr( >>> print(adjusted_results_fdr) comparison metric control treatment rel_effect_size pvalue pvalue_adj (0, 1) orders_per_user 0.530 0.573 8.0% 0.118 0.245 - (0, 1) revenue_per_user 5.24 5.99 14% 0.0212 0.0592 + (0, 1) revenue_per_user 5.24 5.99 14% 0.0211 0.0592 (0, 2) orders_per_user 0.530 0.594 12% 0.0213 0.0592 (0, 2) revenue_per_user 5.24 6.25 19% 0.00218 0.0182 @@ -182,7 +182,7 @@ def adjust_fdr( ... ))) comparison metric control treatment rel_effect_size pvalue alpha_adj (0, 1) orders_per_user 0.530 0.573 8.0% 0.118 0.0240 - (0, 1) revenue_per_user 5.24 5.99 14% 0.0212 0.0120 + (0, 1) revenue_per_user 5.24 5.99 14% 0.0211 0.0120 (0, 2) orders_per_user 0.530 0.594 12% 0.0213 0.0180 (0, 2) revenue_per_user 5.24 6.25 19% 0.00218 0.00600 @@ -191,9 +191,9 @@ def adjust_fdr( >>> print(tt.adjust_fdr(results, metrics, arbitrary_dependence=False)) comparison metric control treatment rel_effect_size pvalue pvalue_adj (0, 1) orders_per_user 0.530 0.573 8.0% 0.118 0.118 - (0, 1) revenue_per_user 5.24 5.99 14% 0.0212 0.0284 + (0, 1) revenue_per_user 5.24 5.99 14% 0.0211 0.0284 (0, 2) orders_per_user 0.530 0.594 12% 0.0213 0.0284 - (0, 2) revenue_per_user 5.24 6.25 19% 0.00218 0.00873 + (0, 2) revenue_per_user 5.24 6.25 19% 0.00218 0.00872 ``` """ # noqa: E501 @@ -297,23 +297,23 @@ def adjust_fwer( ... )) >>> print(data) shape: (6_046, 5) - ┌──────┬─────────┬──────────┬────────┬───────────┐ - │ user ┆ variant ┆ sessions ┆ orders ┆ revenue │ - │ --- ┆ --- ┆ --- ┆ --- ┆ --- │ - │ i64 ┆ i64 ┆ i64 ┆ i64 ┆ f64 │ - ╞══════╪═════════╪══════════╪════════╪═══════════╡ - │ 0 ┆ 1 ┆ 2 ┆ 1 ┆ 9.58279 │ - │ 1 ┆ 0 ┆ 2 ┆ 1 ┆ 6.434079 │ - │ 2 ┆ 1 ┆ 2 ┆ 1 ┆ 8.304958 │ - │ 3 ┆ 1 ┆ 2 ┆ 1 ┆ 16.652705 │ - │ 4 ┆ 0 ┆ 1 ┆ 1 ┆ 7.136917 │ - │ … ┆ … ┆ … ┆ … ┆ … │ - │ 3989 ┆ 2 ┆ 4 ┆ 4 ┆ 34.931448 │ - │ 3991 ┆ 2 ┆ 1 ┆ 0 ┆ 0.0 │ - │ 3992 ┆ 2 ┆ 3 ┆ 3 ┆ 27.964647 │ - │ 3994 ┆ 2 ┆ 2 ┆ 1 ┆ 17.217892 │ - │ 3998 ┆ 2 ┆ 3 ┆ 0 ┆ 0.0 │ - └──────┴─────────┴──────────┴────────┴───────────┘ + ┌──────┬─────────┬──────────┬────────┬─────────┐ + │ user ┆ variant ┆ sessions ┆ orders ┆ revenue │ + │ --- ┆ --- ┆ --- ┆ --- ┆ --- │ + │ i64 ┆ i64 ┆ i64 ┆ i64 ┆ f64 │ + ╞══════╪═════════╪══════════╪════════╪═════════╡ + │ 0 ┆ 1 ┆ 2 ┆ 1 ┆ 9.58 │ + │ 1 ┆ 0 ┆ 2 ┆ 1 ┆ 6.43 │ + │ 2 ┆ 1 ┆ 2 ┆ 1 ┆ 8.3 │ + │ 3 ┆ 1 ┆ 2 ┆ 1 ┆ 16.65 │ + │ 4 ┆ 0 ┆ 1 ┆ 1 ┆ 7.14 │ + │ … ┆ … ┆ … ┆ … ┆ … │ + │ 3989 ┆ 2 ┆ 4 ┆ 4 ┆ 34.93 │ + │ 3991 ┆ 2 ┆ 1 ┆ 0 ┆ 0.0 │ + │ 3992 ┆ 2 ┆ 3 ┆ 3 ┆ 27.96 │ + │ 3994 ┆ 2 ┆ 2 ┆ 1 ┆ 17.22 │ + │ 3998 ┆ 2 ┆ 3 ┆ 0 ┆ 0.0 │ + └──────┴─────────┴──────────┴────────┴─────────┘ >>> experiment = tt.Experiment( ... sessions_per_user=tt.Mean("sessions"), @@ -329,7 +329,7 @@ def adjust_fwer( (0, 1) sessions_per_user 2.00 1.98 -0.66% [-3.7%, 2.5%] 0.674 (0, 1) orders_per_session 0.266 0.289 8.8% [-0.89%, 19%] 0.0762 (0, 1) orders_per_user 0.530 0.573 8.0% [-2.0%, 19%] 0.118 - (0, 1) revenue_per_user 5.24 5.99 14% [2.1%, 28%] 0.0212 + (0, 1) revenue_per_user 5.24 5.99 14% [2.1%, 28%] 0.0211 (0, 2) sessions_per_user 2.00 2.02 0.98% [-2.1%, 4.1%] 0.532 (0, 2) orders_per_session 0.266 0.295 11% [1.2%, 22%] 0.0273 (0, 2) orders_per_user 0.530 0.594 12% [1.7%, 23%] 0.0213 @@ -344,9 +344,9 @@ def adjust_fwer( >>> print(adjusted_results_fwer) comparison metric control treatment rel_effect_size pvalue pvalue_adj (0, 1) orders_per_user 0.530 0.573 8.0% 0.118 0.118 - (0, 1) revenue_per_user 5.24 5.99 14% 0.0212 0.0635 - (0, 2) orders_per_user 0.530 0.594 12% 0.0213 0.0635 - (0, 2) revenue_per_user 5.24 6.25 19% 0.00218 0.00873 + (0, 1) revenue_per_user 5.24 5.99 14% 0.0211 0.0634 + (0, 2) orders_per_user 0.530 0.594 12% 0.0213 0.0634 + (0, 2) revenue_per_user 5.24 6.25 19% 0.00218 0.00872 >>> # The adjusted confidence level alpha. >>> print(adjusted_results_fwer.to_string(keys=( @@ -360,7 +360,7 @@ def adjust_fwer( ... ))) comparison metric control treatment rel_effect_size pvalue alpha_adj (0, 1) orders_per_user 0.530 0.573 8.0% 0.118 0.0167 - (0, 1) revenue_per_user 5.24 5.99 14% 0.0212 0.0167 + (0, 1) revenue_per_user 5.24 5.99 14% 0.0211 0.0167 (0, 2) orders_per_user 0.530 0.594 12% 0.0213 0.0167 (0, 2) revenue_per_user 5.24 6.25 19% 0.00218 0.0125 @@ -374,9 +374,9 @@ def adjust_fwer( ... )) comparison metric control treatment rel_effect_size pvalue pvalue_adj (0, 1) orders_per_user 0.530 0.573 8.0% 0.118 0.118 - (0, 1) revenue_per_user 5.24 5.99 14% 0.0212 0.0422 + (0, 1) revenue_per_user 5.24 5.99 14% 0.0211 0.0422 (0, 2) orders_per_user 0.530 0.594 12% 0.0213 0.0422 - (0, 2) revenue_per_user 5.24 6.25 19% 0.00218 0.00870 + (0, 2) revenue_per_user 5.24 6.25 19% 0.00218 0.00869 ``` """ # noqa: E501, RUF002 diff --git a/tests/metrics/test_resampling.py b/tests/metrics/test_resampling.py index 01ed997..b175c92 100644 --- a/tests/metrics/test_resampling.py +++ b/tests/metrics/test_resampling.py @@ -87,14 +87,14 @@ def test_bootstrap_analyze_default(data_gran: dict[Any, pa.Table]): ) result = metric.analyze(data_gran, 0, 1) assert isinstance(result, tea_tasting.metrics.resampling.BootstrapResult) - assert result.control == pytest.approx(5.029606016096378) - assert result.treatment == pytest.approx(5.430045947447926) - assert result.effect_size == pytest.approx(0.4004399313515483) - assert result.effect_size_ci_lower == pytest.approx(-3.269115518352006) - assert result.effect_size_ci_upper == pytest.approx(7.220410053935425) - assert result.rel_effect_size == pytest.approx(0.07961656043634635) - assert result.rel_effect_size_ci_lower == pytest.approx(-0.5658060166766641) - assert result.rel_effect_size_ci_upper == pytest.approx(1.8185107973505807) + assert result.control == pytest.approx(5.029811320754717) + assert result.treatment == pytest.approx(5.43) + assert result.effect_size == pytest.approx(0.4001886792452831) + assert result.effect_size_ci_lower == pytest.approx(-3.269396309565539) + assert result.effect_size_ci_upper == pytest.approx(7.219843380442667) + assert result.rel_effect_size == pytest.approx(0.07956335809137971) + assert result.rel_effect_size_ci_lower == pytest.approx(-0.5658493834599828) + assert result.rel_effect_size_ci_upper == pytest.approx(1.8185473860534842) def test_bootstrap_analyze_multiple_columns(data_gran: dict[Any, pa.Table]): def ratio_of_means( @@ -153,11 +153,11 @@ def test_quantile(data_gran: dict[Any, pa.Table]): assert metric.q == 0.8 result = metric.analyze(data_gran, 0, 1) assert isinstance(result, tea_tasting.metrics.resampling.BootstrapResult) - assert result.control == pytest.approx(11.97241622964322) - assert result.treatment == pytest.approx(6.283899054876212) - assert result.effect_size == pytest.approx(-5.688517174767009) - assert result.effect_size_ci_lower == pytest.approx(-10.875502551863555) + assert result.control == pytest.approx(11.972000000000001) + assert result.treatment == pytest.approx(6.2820000000000045) + assert result.effect_size == pytest.approx(-5.689999999999997) + assert result.effect_size_ci_lower == pytest.approx(-10.875800000000003) assert result.effect_size_ci_upper == float("inf") - assert result.rel_effect_size == pytest.approx(-0.4751352664036579 ) - assert result.rel_effect_size_ci_lower == pytest.approx(-0.8744367099313992) + assert result.rel_effect_size == pytest.approx(-0.47527564316739024) + assert result.rel_effect_size_ci_lower == pytest.approx(-0.8743329817472134) assert result.rel_effect_size_ci_upper == float("inf")