From e2d16dc36bf37d1cc3fc2bcf27995fbfd15b5fe7 Mon Sep 17 00:00:00 2001 From: Carlos Timoteo Date: Thu, 21 Sep 2023 18:19:49 -0400 Subject: [PATCH] Improve backfill procedures (#35) * Update invoke_backfill_customer_lifetime_value_label.sqlx * rewriting the backfill queries avoiding the for loop * templating backfill cltv label * templating the other queries * fixing typo * fixing typo on backfill user dimensions * adjust post-installation instructions * substitute create temp table for CREATE OR REPLACE temp table * bug fixing user_session_event_aggregated_metrics * bug fixing dates_interval on backfill user_segmentation_dimensions * bug fixing dimensions and labels features calculations * bug fixing dimensions and labels features calculations * fixing typos * removing nth_* features and fixing typos * removing nth_* features * removing fixing first_traffic features * removing unnecessary features and adjusting calculations to avoid duplicates * fixing purchases labels and user_ltv_revenue values dependencies so that labels dont count todays purchases * fixing purchases labels andremoved features from tables schemas * removing duplicate rows * adding more sp invokes to readme * fixing typo * fixing typo * fixing missing feature on dimensions procedures * fixing removing unnecessary features from audience segmentation inference prep --------- Co-authored-by: Carlos Timoteo --- config/config.yaml.tftpl | 64 +- infrastructure/terraform/README.md | 27 +- ...ce_segmentation_inference_preparation.sqlx | 33 +- ...nce_segmentation_training_preparation.sqlx | 12 +- ..._lifetime_value_inference_preparation.sqlx | 73 +- ...r_lifetime_value_training_preparation.sqlx | 59 +- ...hase_propensity_inference_preparation.sqlx | 68 +- sql/procedure/purchase_propensity_label.sqlx | 3 - ...chase_propensity_training_preparation.sqlx | 3503 ++++++++--------- sql/procedure/user_dimensions.sqlx | 214 +- sql/procedure/user_lifetime_dimensions.sqlx | 217 +- .../user_segmentation_dimensions.sqlx | 240 +- ...user_session_event_aggregated_metrics.sqlx | 547 +-- ...ackfill_customer_lifetime_value_label.sqlx | 89 +- ...ke_backfill_purchase_propensity_label.sqlx | 109 +- .../invoke_backfill_user_dimensions.sqlx | 126 +- ...oke_backfill_user_lifetime_dimensions.sqlx | 127 +- ...invoke_backfill_user_lookback_metrics.sqlx | 197 +- ..._user_rolling_window_lifetime_metrics.sqlx | 252 +- ..._backfill_user_rolling_window_metrics.sqlx | 250 +- ...backfill_user_scoped_lifetime_metrics.sqlx | 148 +- .../invoke_backfill_user_scoped_metrics.sqlx | 148 +- ...fill_user_scoped_segmentation_metrics.sqlx | 147 +- ...backfill_user_segmentation_dimensions.sqlx | 126 +- ...user_session_event_aggregated_metrics.sqlx | 328 +- ...ce_segmentation_inference_preparation.json | 25 - ..._lifetime_value_inference_preparation.json | 25 - ...hase_propensity_inference_preparation.json | 25 - .../table/purchase_propensity_label.json | 5 - sql/schema/table/user_dimensions.json | 25 - .../table/user_lifetime_dimensions.json | 25 - .../table/user_segmentation_dimensions.json | 25 - 32 files changed, 4165 insertions(+), 3097 deletions(-) diff --git a/config/config.yaml.tftpl b/config/config.yaml.tftpl index 479d9f25..37d2f25c 100644 --- a/config/config.yaml.tftpl +++ b/config/config.yaml.tftpl @@ -563,80 +563,80 @@ bigquery: mds_project_id: "${project_id}" mds_dataset: "${mds_dataset}" invoke_backfill_user_lifetime_dimensions: + mds_project_id: "${project_id}" + mds_dataset: "${mds_dataset}" project_id: "${project_id}" dataset: "feature_store" - stored_procedure: "user_lifetime_dimensions" + insert_table: "user_lifetime_dimensions" interval_max_date: 180 interval_min_date: 180 interval_end_date: 180 - mds_project_id: "${project_id}" - mds_dataset: "${mds_dataset}" invoke_backfill_user_scoped_lifetime_metrics: + mds_project_id: "${project_id}" + mds_dataset: "${mds_dataset}" project_id: "${project_id}" dataset: "feature_store" - stored_procedure: "user_scoped_lifetime_metrics" + insert_table: "user_scoped_lifetime_metrics" interval_max_date: 180 interval_min_date: 180 interval_end_date: 180 + invoke_backfill_user_session_event_aggregated_metrics: mds_project_id: "${project_id}" mds_dataset: "${mds_dataset}" - invoke_backfill_user_session_event_aggregated_metrics: project_id: "${project_id}" dataset: "feature_store" - stored_procedure: "user_session_event_aggregated_metrics" + insert_table: "user_session_event_aggregated_metrics" interval_max_date: 15 interval_min_date: 30 interval_end_date: 30 - mds_project_id: "${project_id}" - mds_dataset: "${mds_dataset}" invoke_backfill_customer_lifetime_value_label: + mds_project_id: "${project_id}" + mds_dataset: "${mds_dataset}" project_id: "${project_id}" dataset: "feature_store" - stored_procedure: "customer_lifetime_value_label" + insert_table: "customer_lifetime_value_label" interval_max_date: 180 interval_min_date: 180 interval_end_date: 180 + invoke_backfill_user_lookback_metrics: mds_project_id: "${project_id}" mds_dataset: "${mds_dataset}" - invoke_backfill_user_lookback_metrics: project_id: "${project_id}" dataset: "feature_store" - stored_procedure: "user_lookback_metrics" + insert_table: "user_lookback_metrics" interval_min_date: 15 interval_end_date: 15 - mds_project_id: "${project_id}" - mds_dataset: "${mds_dataset}" invoke_backfill_user_rolling_window_lifetime_metrics: + mds_project_id: "${project_id}" + mds_dataset: "${mds_dataset}" project_id: "${project_id}" dataset: "feature_store" - stored_procedure: "user_rolling_window_lifetime_metrics" + insert_table: "user_rolling_window_lifetime_metrics" interval_max_date: 180 interval_min_date: 180 interval_end_date: 180 - mds_project_id: "${project_id}" - mds_dataset: "${mds_dataset}" invoke_backfill_user_scoped_segmentation_metrics: + mds_project_id: "${project_id}" + mds_dataset: "${mds_dataset}" project_id: "${project_id}" dataset: "feature_store" - stored_procedure: "user_scoped_segmentation_metrics" + insert_table: "user_scoped_segmentation_metrics" interval_min_date: 15 - interval_end_date: 15 - mds_project_id: "${project_id}" - mds_dataset: "${mds_dataset}" + interval_end_date: 15 invoke_backfill_user_segmentation_dimensions: + mds_project_id: "${project_id}" + mds_dataset: "${mds_dataset}" project_id: "${project_id}" dataset: "feature_store" - stored_procedure: "user_segmentation_dimensions" + insert_table: "user_segmentation_dimensions" interval_min_date: 15 interval_end_date: 15 - mds_project_id: "${project_id}" - mds_dataset: "${mds_dataset}" invoke_backfill_purchase_propensity_label: mds_project_id: "${project_id}" mds_dataset: "${mds_dataset}" project_id: "${project_id}" dataset: "feature_store" - stored_procedure: "purchase_propensity_label" + insert_table: "purchase_propensity_label" interval_max_date: 15 interval_min_date: 30 interval_end_date: 30 @@ -645,28 +645,28 @@ bigquery: mds_dataset: "${mds_dataset}" project_id: "${project_id}" dataset: "feature_store" - stored_procedure: "user_dimensions" + insert_table: "user_dimensions" interval_max_date: 15 interval_min_date: 30 interval_end_date: 30 invoke_backfill_user_rolling_window_metrics: + mds_project_id: "${project_id}" + mds_dataset: "${mds_dataset}" project_id: "${project_id}" dataset: "feature_store" - stored_procedure: "user_rolling_window_metrics" + insert_table: "user_rolling_window_metrics" interval_max_date: 15 interval_min_date: 30 interval_end_date: 30 - mds_project_id: "${project_id}" - mds_dataset: "${mds_dataset}" invoke_backfill_user_scoped_metrics: + mds_project_id: "${project_id}" + mds_dataset: "${mds_dataset}" project_id: "${project_id}" dataset: "feature_store" - stored_procedure: "user_scoped_metrics" + insert_table: "user_scoped_metrics" interval_max_date: 15 interval_min_date: 30 - interval_end_date: 30 - mds_project_id: "${project_id}" - mds_dataset: "${mds_dataset}" + interval_end_date: 30 invoke_customer_lifetime_value_label: project_id: "${project_id}" dataset: "feature_store" diff --git a/infrastructure/terraform/README.md b/infrastructure/terraform/README.md index bc934602..f00fbc02 100644 --- a/infrastructure/terraform/README.md +++ b/infrastructure/terraform/README.md @@ -115,27 +115,38 @@ To manually start the data flow you must perform the following tasks: On the Google Cloud console, navigate to Workflows page. You will see a Workflow named `dataform-prod-incremental`, then under Actions, click on the three dots and `Execute` the Workflow. - **Note:** If you have a considerable amount of data (>XX GBs of data) in your exported GA4 and Ads BigQuery datasets, it can take several minutes or hours to process all the data. Make sure that the processing has completed successfully before you continue to the next step. + **Note:** If you have a considerable amount of data (>XXX GBs of data) in your exported GA4 and Ads BigQuery datasets, it can take several minutes or hours to process all the data. Make sure that the processing has completed successfully before you continue to the next step. 1. Invoke the BigQuery stored procedures having the prefix `invoke_backfill_*` to backfill the feature store in case the GA4 Export has been enabled a long time ago before installing MDE. On the Google Cloud console, navigate to BigQuery page. On the query composer, run the following queries to invoke the stored procedures. ```sql + ## Backfill customer ltv tables CALL `feature_store.invoke_backfill_customer_lifetime_value_label`(); - CALL `feature_store.invoke_backfill_purchase_propensity_label`(); - CALL `feature_store.invoke_backfill_user_dimensions`(); CALL `feature_store.invoke_backfill_user_lifetime_dimensions`(); - CALL `feature_store.invoke_backfill_user_lookback_metrics`(); CALL `feature_store.invoke_backfill_user_rolling_window_lifetime_metrics`(); - CALL `feature_store.invoke_backfill_user_rolling_window_metrics`(); CALL `feature_store.invoke_backfill_user_scoped_lifetime_metrics`(); + CALL `customer_lifetime_value.invoke_customer_lifetime_value_training_preparation`(); + CALL `customer_lifetime_value.invoke_customer_lifetime_value_inference_preparation`(); + + ## Backfill purchase propensity tables + CALL `feature_store.invoke_backfill_user_dimensions`(); + CALL `feature_store.invoke_backfill_user_rolling_window_metrics`(); CALL `feature_store.invoke_backfill_user_scoped_metrics`(); - CALL `feature_store.invoke_backfill_user_scoped_segmentation_metrics`(); - CALL `feature_store.invoke_backfill_user_segmentation_dimensions`(); CALL `feature_store.invoke_backfill_user_session_event_aggregated_metrics`(); + CALL `feature_store.invoke_backfill_purchase_propensity_label`(); + CALL `purchase_propensity.invoke_purchase_propensity_training_preparation`(); + CALL `purchase_propensity.invoke_purchase_propensity_inference_preparation`(); + + ## Backfill audience segmentation tables + CALL `feature_store.invoke_backfill_user_segmentation_dimensions`(); + CALL `feature_store.invoke_backfill_user_lookback_metrics`(); + CALL `feature_store.invoke_backfill_user_scoped_segmentation_metrics`(); + CALL `audience_segmentation.invoke_audience_segmentation_training_preparation`(); + CALL `audience_segmentation.invoke_audience_segmentation_inference_preparation`(); ``` - **Note:** If you have a considerable amount of data (>XX GBs of data) in your exported GA4 BigQuery datasets over the last six months, it can take several hours to backfill the feature data so that you can train your ML model. Make sure that backfilling doesn't fail in the first several minutes before you continue to the next step. + **Note:** If you have a considerable amount of data (>XXX GBs of data) in your exported GA4 BigQuery datasets over the last six months, it can take several hours to backfill the feature data so that you can train your ML model. Make sure that the backfill procedures starts without errors before you continue to the next step. 1. Redeploy the ML pipelines using Terraform. diff --git a/sql/procedure/audience_segmentation_inference_preparation.sqlx b/sql/procedure/audience_segmentation_inference_preparation.sqlx index 05b6ac1c..d6ab0eb5 100644 --- a/sql/procedure/audience_segmentation_inference_preparation.sqlx +++ b/sql/procedure/audience_segmentation_inference_preparation.sqlx @@ -16,7 +16,7 @@ SET inference_date = DATE_SUB(inference_date, INTERVAL 1 DAY); CREATE TEMP TABLE inference_preparation AS ( - SELECT + SELECT DISTINCT UD.user_pseudo_id, UD.user_id, UD.feature_date, @@ -24,11 +24,6 @@ CREATE TEMP TABLE inference_preparation AS ( UD.week_of_the_year, UD.day_of_the_month, UD.day_of_week, - UD.hour_of_day, - UD.nth_day, - UD.nth_hour, - UD.nth_week, - UD.nth_month, UD.device_category, UD.device_mobile_brand_name, UD.device_mobile_model_name, @@ -102,6 +97,8 @@ CREATE TEMP TABLE inference_preparation AS ( WHERE -- Define the training+validation subset interval UD.feature_date = inference_date ); + + INSERT INTO `{{project_id}}.{{dataset}}.{{insert_table}}` (feature_date, user_pseudo_id, @@ -110,11 +107,6 @@ INSERT INTO week_of_the_year, day_of_the_month, day_of_week, - hour_of_day, - nth_day, - nth_hour, - nth_week, - nth_month, device_category, device_mobile_brand_name, device_mobile_model_name, @@ -182,11 +174,11 @@ SELECT week_of_the_year, day_of_the_month, day_of_week, - hour_of_day, - nth_day, - nth_hour, - nth_week, - nth_month, + NULL as hour_of_day, + NULL as nth_day, + NULL as nth_hour, + NULL as nth_week, + NULL as nth_month, device_category, device_mobile_brand_name, device_mobile_model_name, @@ -251,7 +243,7 @@ FROM CREATE OR REPLACE TABLE `{{project_id}}.{{dataset}}.audience_segmentation_inference_15` AS( - SELECT + SELECT DISTINCT CURRENT_TIMESTAMP() AS processed_timestamp, feature_date, user_pseudo_id, @@ -260,11 +252,6 @@ CREATE OR REPLACE TABLE week_of_the_year, day_of_the_month, day_of_week, - hour_of_day, - nth_day, - nth_hour, - nth_week, - nth_month, device_category, device_mobile_brand_name, device_mobile_model_name, @@ -334,7 +321,6 @@ CREATE OR REPLACE VIEW user_id, day_of_the_month, day_of_week, - hour_of_day, device_category, device_mobile_model_name, device_os_version, @@ -372,7 +358,6 @@ SELECT user_id, day_of_the_month, day_of_week, - hour_of_day, device_category, device_mobile_model_name, device_os_version, diff --git a/sql/procedure/audience_segmentation_training_preparation.sqlx b/sql/procedure/audience_segmentation_training_preparation.sqlx index 0da55c4b..1716c4ec 100644 --- a/sql/procedure/audience_segmentation_training_preparation.sqlx +++ b/sql/procedure/audience_segmentation_training_preparation.sqlx @@ -20,7 +20,7 @@ SELECT max_date; SELECT min_date; CREATE TEMP TABLE training_preparation as ( - SELECT + SELECT DISTINCT UD.user_pseudo_id, UD.user_id, UD.feature_date, @@ -28,11 +28,6 @@ CREATE TEMP TABLE training_preparation as ( UD.week_of_the_year, UD.day_of_the_month, UD.day_of_week, - UD.hour_of_day, - UD.nth_day, - UD.nth_hour, - UD.nth_week, - UD.nth_month, UD.device_category, UD.device_mobile_brand_name, UD.device_mobile_model_name, @@ -121,7 +116,6 @@ CREATE TEMP TABLE DataForTargetTable AS( user_id, day_of_the_month, day_of_week, - hour_of_day, device_category, device_mobile_model_name, device_os_version, @@ -163,7 +157,6 @@ CREATE OR REPLACE TABLE `{{project_id}}.{{dataset}}.audience_segmentation_traini user_id, day_of_the_month, day_of_week, - hour_of_day, device_category, device_mobile_model_name, device_os_version, @@ -201,7 +194,6 @@ CREATE OR REPLACE VIEW `{{project_id}}.{{dataset}}.v_audience_segmentation_train user_id, day_of_the_month, day_of_week, - hour_of_day, device_category, device_mobile_model_name, device_os_version, @@ -243,7 +235,6 @@ SELECT user_id, day_of_the_month, day_of_week, - hour_of_day, device_category, device_mobile_model_name, device_os_version, @@ -279,7 +270,6 @@ FROM ( user_id, day_of_the_month, day_of_week, - hour_of_day, device_category, device_mobile_model_name, device_os_version, diff --git a/sql/procedure/customer_lifetime_value_inference_preparation.sqlx b/sql/procedure/customer_lifetime_value_inference_preparation.sqlx index c8b6c33e..dbb1c349 100644 --- a/sql/procedure/customer_lifetime_value_inference_preparation.sqlx +++ b/sql/procedure/customer_lifetime_value_inference_preparation.sqlx @@ -16,7 +16,7 @@ SET inference_date = DATE_SUB(inference_date, INTERVAL 1 DAY); CREATE TEMP TABLE inference_preparation as ( - SELECT + SELECT DISTINCT UD.user_pseudo_id, UD.user_id, UD.feature_date, @@ -24,11 +24,11 @@ CREATE TEMP TABLE inference_preparation as ( UD.week_of_the_year, UD.day_of_the_month, UD.day_of_week, - UD.hour_of_day, - UD.nth_day, - UD.nth_hour, - UD.nth_week, - UD.nth_month, + --UD.hour_of_day, + --UD.nth_day, + --UD.nth_hour, + --UD.nth_week, + --UD.nth_month, UD.device_category, UD.device_mobile_brand_name, UD.device_mobile_model_name, @@ -139,11 +139,6 @@ INSERT INTO `{{project_id}}.{{dataset}}.{{insert_table}}` week_of_the_year, day_of_the_month, day_of_week, - hour_of_day, - nth_day, - nth_hour, - nth_week, - nth_month, device_category, device_mobile_brand_name, device_mobile_model_name, @@ -237,11 +232,6 @@ feature_date, week_of_the_year, day_of_the_month, day_of_week, - hour_of_day, - nth_day, - nth_hour, - nth_week, - nth_month, device_category, device_mobile_brand_name, device_mobile_model_name, @@ -330,7 +320,7 @@ feature_date, CREATE OR REPLACE TABLE `{{project_id}}.{{dataset}}.customer_lifetime_value_inference_180_30` AS( - SELECT + SELECT DISTINCT CURRENT_TIMESTAMP() AS processed_timestamp, feature_date, user_pseudo_id, @@ -339,11 +329,6 @@ CREATE OR REPLACE TABLE `{{project_id}}.{{dataset}}.customer_lifetime_value_infe week_of_the_year, day_of_the_month, day_of_week, - hour_of_day, - nth_day, - nth_hour, - nth_week, - nth_month, device_category, device_mobile_brand_name, device_mobile_model_name, @@ -432,7 +417,7 @@ CREATE OR REPLACE TABLE `{{project_id}}.{{dataset}}.customer_lifetime_value_infe ); CREATE OR REPLACE TABLE `{{project_id}}.{{dataset}}.customer_lifetime_value_inference_180_90` AS( - SELECT + SELECT DISTINCT CURRENT_TIMESTAMP() AS processed_timestamp, feature_date, user_pseudo_id, @@ -441,11 +426,6 @@ CREATE OR REPLACE TABLE `{{project_id}}.{{dataset}}.customer_lifetime_value_infe week_of_the_year, day_of_the_month, day_of_week, - hour_of_day, - nth_day, - nth_hour, - nth_week, - nth_month, device_category, device_mobile_brand_name, device_mobile_model_name, @@ -534,7 +514,7 @@ CREATE OR REPLACE TABLE `{{project_id}}.{{dataset}}.customer_lifetime_value_infe ); CREATE OR REPLACE TABLE `{{project_id}}.{{dataset}}.customer_lifetime_value_inference_180_180` AS( - SELECT + SELECT DISTINCT CURRENT_TIMESTAMP() AS processed_timestamp, feature_date, user_pseudo_id, @@ -543,11 +523,6 @@ CREATE OR REPLACE TABLE `{{project_id}}.{{dataset}}.customer_lifetime_value_infe week_of_the_year, day_of_the_month, day_of_week, - hour_of_day, - nth_day, - nth_hour, - nth_week, - nth_month, device_category, device_mobile_brand_name, device_mobile_model_name, @@ -645,11 +620,6 @@ CREATE OR REPLACE VIEW `{{project_id}}.{{dataset}}.v_customer_lifetime_value_inf week_of_the_year, day_of_the_month, day_of_week, - hour_of_day, - nth_day, - nth_hour, - nth_week, - nth_month, device_category, device_mobile_brand_name, device_mobile_model_name, @@ -749,11 +719,6 @@ SELECT DISTINCT week_of_the_year, day_of_the_month, day_of_week, - hour_of_day, - nth_day, - nth_hour, - nth_week, - nth_month, device_category, device_mobile_brand_name, device_mobile_model_name, @@ -852,11 +817,6 @@ CREATE OR REPLACE VIEW `{{project_id}}.{{dataset}}.v_customer_lifetime_value_inf week_of_the_year, day_of_the_month, day_of_week, - hour_of_day, - nth_day, - nth_hour, - nth_week, - nth_month, device_category, device_mobile_brand_name, device_mobile_model_name, @@ -956,11 +916,6 @@ SELECT DISTINCT week_of_the_year, day_of_the_month, day_of_week, - hour_of_day, - nth_day, - nth_hour, - nth_week, - nth_month, device_category, device_mobile_brand_name, device_mobile_model_name, @@ -1057,11 +1012,6 @@ SELECT DISTINCT week_of_the_year, day_of_the_month, day_of_week, - hour_of_day, - nth_day, - nth_hour, - nth_week, - nth_month, device_category, device_mobile_brand_name, device_mobile_model_name, @@ -1161,11 +1111,6 @@ SELECT DISTINCT week_of_the_year, day_of_the_month, day_of_week, - hour_of_day, - nth_day, - nth_hour, - nth_week, - nth_month, device_category, device_mobile_brand_name, device_mobile_model_name, diff --git a/sql/procedure/customer_lifetime_value_training_preparation.sqlx b/sql/procedure/customer_lifetime_value_training_preparation.sqlx index eddb1751..debdf17f 100644 --- a/sql/procedure/customer_lifetime_value_training_preparation.sqlx +++ b/sql/procedure/customer_lifetime_value_training_preparation.sqlx @@ -20,7 +20,7 @@ SELECT max_date; SELECT min_date; CREATE TEMP TABLE training_preparation as ( - SELECT + SELECT DISTINCT UD.user_pseudo_id, UD.user_id, UD.feature_date, @@ -28,11 +28,6 @@ CREATE TEMP TABLE training_preparation as ( UD.week_of_the_year, UD.day_of_the_month, UD.day_of_week, - UD.hour_of_day, - UD.nth_day, - UD.nth_hour, - UD.nth_week, - UD.nth_month, UD.device_category, UD.device_mobile_brand_name, UD.device_mobile_model_name, @@ -156,11 +151,6 @@ CREATE TEMP TABLE DataForTargetTable AS( week_of_the_year, day_of_the_month, day_of_week, - hour_of_day, - nth_day, - nth_hour, - nth_week, - nth_month, device_category, device_mobile_brand_name, device_mobile_model_name, @@ -256,7 +246,7 @@ WHERE data_split IS NOT NULL; CREATE OR REPLACE TABLE `{{project_id}}.{{dataset}}.customer_lifetime_value_training_180_30` AS( - SELECT + SELECT DISTINCT CURRENT_TIMESTAMP() AS processed_timestamp, data_split, feature_date, @@ -266,11 +256,6 @@ CREATE OR REPLACE TABLE `{{project_id}}.{{dataset}}.customer_lifetime_value_trai week_of_the_year, day_of_the_month, day_of_week, - hour_of_day, - nth_day, - nth_hour, - nth_week, - nth_month, device_category, device_mobile_brand_name, device_mobile_model_name, @@ -370,11 +355,6 @@ CREATE OR REPLACE TABLE `{{project_id}}.{{dataset}}.customer_lifetime_value_trai week_of_the_year, day_of_the_month, day_of_week, - hour_of_day, - nth_day, - nth_hour, - nth_week, - nth_month, device_category, device_mobile_brand_name, device_mobile_model_name, @@ -474,11 +454,6 @@ CREATE OR REPLACE TABLE `{{project_id}}.{{dataset}}.customer_lifetime_value_trai week_of_the_year, day_of_the_month, day_of_week, - hour_of_day, - nth_day, - nth_hour, - nth_week, - nth_month, device_category, device_mobile_brand_name, device_mobile_model_name, @@ -577,11 +552,6 @@ CREATE OR REPLACE VIEW `{{project_id}}.{{dataset}}.v_customer_lifetime_value_tra week_of_the_year, day_of_the_month, day_of_week, - hour_of_day, - nth_day, - nth_hour, - nth_week, - nth_month, device_category, device_mobile_brand_name, device_mobile_model_name, @@ -683,11 +653,6 @@ SELECT DISTINCT week_of_the_year, day_of_the_month, day_of_week, - hour_of_day, - nth_day, - nth_hour, - nth_week, - nth_month, device_category, device_mobile_brand_name, device_mobile_model_name, @@ -788,11 +753,6 @@ CREATE OR REPLACE VIEW `{{project_id}}.{{dataset}}.v_customer_lifetime_value_tra week_of_the_year, day_of_the_month, day_of_week, - hour_of_day, - nth_day, - nth_hour, - nth_week, - nth_month, device_category, device_mobile_brand_name, device_mobile_model_name, @@ -894,11 +854,6 @@ SELECT DISTINCT week_of_the_year, day_of_the_month, day_of_week, - hour_of_day, - nth_day, - nth_hour, - nth_week, - nth_month, device_category, device_mobile_brand_name, device_mobile_model_name, @@ -997,11 +952,6 @@ SELECT DISTINCT week_of_the_year, day_of_the_month, day_of_week, - hour_of_day, - nth_day, - nth_hour, - nth_week, - nth_month, device_category, device_mobile_brand_name, device_mobile_model_name, @@ -1103,11 +1053,6 @@ SELECT DISTINCT week_of_the_year, day_of_the_month, day_of_week, - hour_of_day, - nth_day, - nth_hour, - nth_week, - nth_month, device_category, device_mobile_brand_name, device_mobile_model_name, diff --git a/sql/procedure/purchase_propensity_inference_preparation.sqlx b/sql/procedure/purchase_propensity_inference_preparation.sqlx index 2e21e974..9b671078 100644 --- a/sql/procedure/purchase_propensity_inference_preparation.sqlx +++ b/sql/procedure/purchase_propensity_inference_preparation.sqlx @@ -24,11 +24,6 @@ CREATE TEMP TABLE inference_preparation as ( UD.week_of_the_year, UD.day_of_the_month, UD.day_of_week, - UD.hour_of_day, - UD.nth_day, - UD.nth_hour, - UD.nth_week, - UD.nth_month, UD.user_ltv_revenue, UD.device_category, UD.device_mobile_brand_name, @@ -191,11 +186,6 @@ INSERT INTO `{{project_id}}.{{dataset}}.{{insert_table}}` week_of_the_year, day_of_the_month, day_of_week, - hour_of_day, - nth_day, - nth_hour, - nth_week, - nth_month, user_ltv_revenue, device_category, device_mobile_brand_name, @@ -336,12 +326,7 @@ feature_date, week_of_the_year, day_of_the_month, day_of_week, - hour_of_day, - nth_day, - nth_hour, - nth_week, - nth_month, - user_ltv_revenue, + MIN(user_ltv_revenue) OVER(PARTITION BY user_pseudo_id, feature_date) as user_ltv_revenue, device_category, device_mobile_brand_name, device_mobile_model_name, @@ -476,7 +461,7 @@ FROM inference_preparation; CREATE OR REPLACE TABLE `{{project_id}}.{{dataset}}.purchase_propensity_inference_30_15` AS( - SELECT + SELECT DISTINCT CURRENT_TIMESTAMP() AS processed_timestamp, feature_date, user_pseudo_id, @@ -485,11 +470,6 @@ CREATE OR REPLACE TABLE `{{project_id}}.{{dataset}}.purchase_propensity_inferenc week_of_the_year, day_of_the_month, day_of_week, - hour_of_day, - nth_day, - nth_hour, - nth_week, - nth_month, user_ltv_revenue, device_category, device_mobile_brand_name, @@ -625,7 +605,7 @@ CREATE OR REPLACE TABLE `{{project_id}}.{{dataset}}.purchase_propensity_inferenc ); CREATE OR REPLACE TABLE `{{project_id}}.{{dataset}}.purchase_propensity_inference_15_15` AS( - SELECT + SELECT DISTINCT CURRENT_TIMESTAMP() AS processed_timestamp, feature_date, user_pseudo_id, @@ -634,11 +614,6 @@ CREATE OR REPLACE TABLE `{{project_id}}.{{dataset}}.purchase_propensity_inferenc week_of_the_year, day_of_the_month, day_of_week, - hour_of_day, - nth_day, - nth_hour, - nth_week, - nth_month, user_ltv_revenue, device_category, device_mobile_brand_name, @@ -774,7 +749,7 @@ CREATE OR REPLACE TABLE `{{project_id}}.{{dataset}}.purchase_propensity_inferenc ); CREATE OR REPLACE TABLE `{{project_id}}.{{dataset}}.purchase_propensity_inference_15_7` AS( - SELECT + SELECT DISTINCT CURRENT_TIMESTAMP() AS processed_timestamp, feature_date, user_pseudo_id, @@ -783,11 +758,6 @@ CREATE OR REPLACE TABLE `{{project_id}}.{{dataset}}.purchase_propensity_inferenc week_of_the_year, day_of_the_month, day_of_week, - hour_of_day, - nth_day, - nth_hour, - nth_week, - nth_month, user_ltv_revenue, device_category, device_mobile_brand_name, @@ -931,11 +901,6 @@ CREATE OR REPLACE VIEW `{{project_id}}.{{dataset}}.v_purchase_propensity_inferen week_of_the_year, day_of_the_month, day_of_week, - hour_of_day, - nth_day, - nth_hour, - nth_week, - nth_month, user_ltv_revenue, device_category, device_mobile_brand_name, @@ -1082,11 +1047,6 @@ SELECT DISTINCT week_of_the_year, day_of_the_month, day_of_week, - hour_of_day, - nth_day, - nth_hour, - nth_week, - nth_month, user_ltv_revenue, device_category, device_mobile_brand_name, @@ -1232,11 +1192,6 @@ CREATE OR REPLACE VIEW `{{project_id}}.{{dataset}}.v_purchase_propensity_inferen week_of_the_year, day_of_the_month, day_of_week, - hour_of_day, - nth_day, - nth_hour, - nth_week, - nth_month, user_ltv_revenue, device_category, device_mobile_brand_name, @@ -1384,11 +1339,6 @@ SELECT DISTINCT week_of_the_year, day_of_the_month, day_of_week, - hour_of_day, - nth_day, - nth_hour, - nth_week, - nth_month, user_ltv_revenue, device_category, device_mobile_brand_name, @@ -1532,11 +1482,6 @@ SELECT DISTINCT week_of_the_year, day_of_the_month, day_of_week, - hour_of_day, - nth_day, - nth_hour, - nth_week, - nth_month, user_ltv_revenue, device_category, device_mobile_brand_name, @@ -1683,11 +1628,6 @@ SELECT DISTINCT week_of_the_year, day_of_the_month, day_of_week, - hour_of_day, - nth_day, - nth_hour, - nth_week, - nth_month, user_ltv_revenue, device_category, device_mobile_brand_name, diff --git a/sql/procedure/purchase_propensity_label.sqlx b/sql/procedure/purchase_propensity_label.sqlx index b8634937..029df0d3 100644 --- a/sql/procedure/purchase_propensity_label.sqlx +++ b/sql/procedure/purchase_propensity_label.sqlx @@ -22,7 +22,6 @@ CREATE TEMP TABLE future_purchases_per_user AS ( SELECT user_pseudo_id, input_date as event_date, - MAX(COUNT(DISTINCT CASE DATE_DIFF(event_date, input_date, DAY) = 0 WHEN TRUE THEN ecommerce.transaction_id END)) OVER(PARTITION BY user_pseudo_id) AS purchase_day_0, MAX(COUNT(DISTINCT CASE DATE_DIFF(event_date, input_date, DAY) = 1 WHEN TRUE THEN ecommerce.transaction_id END)) OVER(PARTITION BY user_pseudo_id) AS purchase_day_1, MAX(COUNT(DISTINCT CASE DATE_DIFF(event_date, input_date, DAY) = 2 WHEN TRUE THEN ecommerce.transaction_id END)) OVER(PARTITION BY user_pseudo_id) AS purchase_day_2, MAX(COUNT(DISTINCT CASE DATE_DIFF(event_date, input_date, DAY) = 3 WHEN TRUE THEN ecommerce.transaction_id END)) OVER(PARTITION BY user_pseudo_id) AS purchase_day_3, @@ -54,7 +53,6 @@ CREATE TEMP TABLE future_purchases_per_user AS ( SELECT DISTINCT Users.user_pseudo_id, Days.event_date as event_date, - NULL as purchase_day_0, NULL as purchase_day_1, NULL as purchase_day_2, NULL as purchase_day_3, @@ -85,7 +83,6 @@ CREATE TEMP TABLE DataForTargetTable AS CURRENT_TIMESTAMP() AS processed_timestamp, A.event_date as feature_date, A.user_pseudo_id, - LEAST(COALESCE(B.purchase_day_0, 0), 1) AS purchase_day_0, LEAST(COALESCE(B.purchase_day_1, 0), 1) AS purchase_day_1, LEAST(COALESCE(B.purchase_day_2, 0), 1) AS purchase_day_2, LEAST(COALESCE(B.purchase_day_3, 0), 1) AS purchase_day_3, diff --git a/sql/procedure/purchase_propensity_training_preparation.sqlx b/sql/procedure/purchase_propensity_training_preparation.sqlx index bc32f96a..d2bd13ac 100644 --- a/sql/procedure/purchase_propensity_training_preparation.sqlx +++ b/sql/procedure/purchase_propensity_training_preparation.sqlx @@ -1,1781 +1,1726 @@ --- Copyright 2023 Google LLC --- --- Licensed under the Apache License, Version 2.0 (the "License"); --- you may not use this file except in compliance with the License. --- You may obtain a copy of the License at --- --- http://www.apache.org/licenses/LICENSE-2.0 --- --- Unless required by applicable law or agreed to in writing, software --- distributed under the License is distributed on an "AS IS" BASIS, --- WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. --- See the License for the specific language governing permissions and --- limitations under the License. - -DECLARE max_date DATE; -DECLARE min_date DATE; -SET max_date = (SELECT DATE_SUB(MAX(event_date), INTERVAL 15 DAY) FROM `{{mds_project_id}}.{{mds_dataset}}.event`); -SET min_date = (SELECT DATE_ADD(MIN(event_date), INTERVAL 30 DAY) FROM `{{mds_project_id}}.{{mds_dataset}}.event`); -SELECT max_date; -SELECT min_date; - -CREATE TEMP TABLE training_preparation as ( +CREATE OR REPLACE PROCEDURE `{{project_id}}.{{dataset}}.purchase_propensity_training_preparation`(INOUT start_date DATE, INOUT end_date DATE, INOUT train_split_end_number INT64, INOUT validation_split_end_number INT64) +OPTIONS (description="Procedure that prepares features for Purchase Propensity model training. User-per-day granularity level features. Run this procedure every time before Purchase Propensity model train.") +BEGIN + -- Copyright 2023 Google LLC + -- + -- Licensed under the Apache License, Version 2.0 (the "License"); + -- you may not use this file except in compliance with the License. + -- You may obtain a copy of the License at + -- + -- http://www.apache.org/licenses/LICENSE-2.0 + -- + -- Unless required by applicable law or agreed to in writing, software + -- distributed under the License is distributed on an "AS IS" BASIS, + -- WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + -- See the License for the specific language governing permissions and + -- limitations under the License. + + DECLARE max_date DATE; + DECLARE min_date DATE; + SET max_date = (SELECT DATE_SUB(MAX(event_date), INTERVAL 15 DAY) FROM `{{mds_project_id}}.{{mds_dataset}}.event`); + SET min_date = (SELECT DATE_ADD(MIN(event_date), INTERVAL 30 DAY) FROM `{{mds_project_id}}.{{mds_dataset}}.event`); + + CREATE TEMP TABLE training_preparation as ( + SELECT DISTINCT + UD.user_pseudo_id, + UD.user_id, + UD.feature_date, + UD.month_of_the_year, + UD.week_of_the_year, + UD.day_of_the_month, + UD.day_of_week, + UD.user_ltv_revenue, + UD.device_category, + UD.device_mobile_brand_name, + UD.device_mobile_model_name, + UD.device_os, + UD.device_os_version, + UD.device_language, + UD.device_web_browser, + UD.device_web_browser_version, + UD.geo_sub_continent, + UD.geo_country, + UD.geo_region, + UD.geo_city, + UD.geo_metro, + UD.last_traffic_source_medium, + UD.last_traffic_source_name, + UD.last_traffic_source_source, + UD.first_traffic_source_medium, + UD.first_traffic_source_name, + UD.first_traffic_source_source, + UD.has_signed_in_with_user_id, + engagement_rate, + engaged_sessions_per_user, + session_conversion_rate, + bounces, + bounce_rate_per_user, + sessions_per_user, + avg_views_per_session, + sum_engagement_time_seconds, + avg_engagement_time_seconds, + new_visits, + returning_visits, + add_to_carts, + cart_to_view_rate, + checkouts, + ecommerce_purchases, + ecommerce_quantity, + ecommerce_revenue, + item_revenue, + item_quantity, + item_view_events, + items_clicked_in_promotion, + items_clicked_in_list, + items_checked_out, + items_added_to_cart, + item_list_view_events, + purchase_revenue, + purchase_to_view_rate, + transactions_per_purchaser, + user_conversion_rate, + how_many_purchased_before, + has_abandoned_cart, + active_users_past_1_day, + active_users_past_2_day, + active_users_past_3_day, + active_users_past_4_day, + active_users_past_5_day, + active_users_past_6_day, + active_users_past_7_day, + active_users_past_8_14_day, + active_users_past_15_30_day, + purchases_past_1_day, + purchases_past_2_day, + purchases_past_3_day, + purchases_past_4_day, + purchases_past_5_day, + purchases_past_6_day, + purchases_past_7_day, + purchases_past_8_14_day, + purchases_past_15_30_day, + visits_past_1_day, + visits_past_2_day, + visits_past_3_day, + visits_past_4_day, + visits_past_5_day, + visits_past_6_day, + visits_past_7_day, + visits_past_8_14_day, + visits_past_15_30_day, + view_items_past_1_day, + view_items_past_2_day, + view_items_past_3_day, + view_items_past_4_day, + view_items_past_5_day, + view_items_past_6_day, + view_items_past_7_day, + view_items_past_8_14_day, + view_items_past_15_30_day, + add_to_carts_past_1_day, + add_to_carts_past_2_day, + add_to_carts_past_3_day, + add_to_carts_past_4_day, + add_to_carts_past_5_day, + add_to_carts_past_6_day, + add_to_carts_past_7_day, + add_to_carts_past_8_14_day, + add_to_carts_past_15_30_day, + checkouts_past_1_day, + checkouts_past_2_day, + checkouts_past_3_day, + checkouts_past_4_day, + checkouts_past_5_day, + checkouts_past_6_day, + checkouts_past_7_day, + checkouts_past_8_14_day, + checkouts_past_15_30_day, + purchasers_users, + average_daily_purchasers, + active_users, + DAU, + MAU, + WAU, + dau_per_mau, + dau_per_wau, + wau_per_mau, + users_engagement_duration_seconds, + average_engagement_time, + average_engagement_time_per_session, + average_sessions_per_user, + ARPPU, + ARPU, + average_daily_revenue, + max_daily_revenue, + min_daily_revenue, + new_users, + returning_users, + first_time_purchasers, + first_time_purchaser_conversion, + first_time_purchasers_per_new_user, + avg_user_conversion_rate, + avg_session_conversion_rate, + purchase_day_1, + purchase_day_2, + purchase_day_3, + purchase_day_4, + purchase_day_5, + purchase_day_6, + purchase_day_7, + purchase_day_8, + purchase_day_9, + purchase_day_10, + purchase_day_11, + purchase_day_12, + purchase_day_13, + purchase_day_14 + FROM + `{{feature_store_project_id}}.{{feature_store_dataset}}.user_dimensions` UD + INNER JOIN + `{{feature_store_project_id}}.{{feature_store_dataset}}.user_session_event_aggregated_metrics` USEAM + ON + USEAM.user_pseudo_id = UD.user_pseudo_id + AND USEAM.feature_date = UD.feature_date + INNER JOIN + `{{feature_store_project_id}}.{{feature_store_dataset}}.user_rolling_window_metrics` UWM + ON + UWM.user_pseudo_id = UD.user_pseudo_id + AND UWM.feature_date = UD.feature_date + INNER JOIN + `{{feature_store_project_id}}.{{feature_store_dataset}}.user_scoped_metrics` UM + ON + UM.feature_date = UD.feature_date + INNER JOIN + `{{feature_store_project_id}}.{{feature_store_dataset}}.purchase_propensity_label` LABEL + ON + LABEL.user_pseudo_id = UD.user_pseudo_id + AND UD.feature_date = LABEL.feature_date + WHERE + -- Define the training subset interval + UD.feature_date BETWEEN GREATEST(start_date, min_date) AND LEAST(end_date, max_date) + ); + + + CREATE TEMP TABLE DataForTargetTable AS( + SELECT DISTINCT + CASE + WHEN (ABS(MOD(FARM_FINGERPRINT(user_pseudo_id), 10)) BETWEEN 0 AND train_split_end_number) THEN "TRAIN" + WHEN (ABS(MOD(FARM_FINGERPRINT(user_pseudo_id), 10)) BETWEEN train_split_end_number AND validation_split_end_number) THEN "VALIDATE" + WHEN (ABS(MOD(FARM_FINGERPRINT(user_pseudo_id), 10)) BETWEEN validation_split_end_number AND 9) THEN "TEST" + END as data_split, + feature_date, + user_pseudo_id, + user_id, + month_of_the_year, + week_of_the_year, + day_of_the_month, + day_of_week, + user_ltv_revenue, + device_category, + device_mobile_brand_name, + device_mobile_model_name, + device_os, + device_os_version, + device_language, + device_web_browser, + device_web_browser_version, + geo_sub_continent, + geo_country, + geo_region, + geo_city, + geo_metro, + last_traffic_source_medium, + last_traffic_source_name, + last_traffic_source_source, + first_traffic_source_medium, + first_traffic_source_name, + first_traffic_source_source, + has_signed_in_with_user_id, + engagement_rate, + engaged_sessions_per_user, + session_conversion_rate, + bounces, + bounce_rate_per_user, + sessions_per_user, + avg_views_per_session, + sum_engagement_time_seconds, + avg_engagement_time_seconds, + new_visits, + returning_visits, + add_to_carts, + cart_to_view_rate, + checkouts, + ecommerce_purchases, + ecommerce_quantity, + ecommerce_revenue, + item_revenue, + item_quantity, + item_view_events, + items_clicked_in_promotion, + items_clicked_in_list, + items_checked_out, + items_added_to_cart, + item_list_view_events, + purchase_revenue, + purchase_to_view_rate, + transactions_per_purchaser, + user_conversion_rate, + how_many_purchased_before, + has_abandoned_cart, + active_users_past_1_day, + active_users_past_2_day, + active_users_past_3_day, + active_users_past_4_day, + active_users_past_5_day, + active_users_past_6_day, + active_users_past_7_day, + active_users_past_8_14_day, + active_users_past_15_30_day, + purchases_past_1_day, + purchases_past_2_day, + purchases_past_3_day, + purchases_past_4_day, + purchases_past_5_day, + purchases_past_6_day, + purchases_past_7_day, + purchases_past_8_14_day, + purchases_past_15_30_day, + visits_past_1_day, + visits_past_2_day, + visits_past_3_day, + visits_past_4_day, + visits_past_5_day, + visits_past_6_day, + visits_past_7_day, + visits_past_8_14_day, + visits_past_15_30_day, + view_items_past_1_day, + view_items_past_2_day, + view_items_past_3_day, + view_items_past_4_day, + view_items_past_5_day, + view_items_past_6_day, + view_items_past_7_day, + view_items_past_8_14_day, + view_items_past_15_30_day, + add_to_carts_past_1_day, + add_to_carts_past_2_day, + add_to_carts_past_3_day, + add_to_carts_past_4_day, + add_to_carts_past_5_day, + add_to_carts_past_6_day, + add_to_carts_past_7_day, + add_to_carts_past_8_14_day, + add_to_carts_past_15_30_day, + checkouts_past_1_day, + checkouts_past_2_day, + checkouts_past_3_day, + checkouts_past_4_day, + checkouts_past_5_day, + checkouts_past_6_day, + checkouts_past_7_day, + checkouts_past_8_14_day, + checkouts_past_15_30_day, + purchasers_users, + average_daily_purchasers, + active_users, + DAU, + MAU, + WAU, + dau_per_mau, + dau_per_wau, + wau_per_mau, + users_engagement_duration_seconds, + average_engagement_time, + average_engagement_time_per_session, + average_sessions_per_user, + ARPPU, + ARPU, + average_daily_revenue, + max_daily_revenue, + min_daily_revenue, + new_users, + returning_users, + first_time_purchasers, + first_time_purchaser_conversion, + first_time_purchasers_per_new_user, + avg_user_conversion_rate, + avg_session_conversion_rate, + purchase_day_1, + purchase_day_2, + purchase_day_3, + purchase_day_4, + purchase_day_5, + purchase_day_6, + purchase_day_7, + purchase_day_8, + purchase_day_9, + purchase_day_10, + purchase_day_11, + purchase_day_12, + purchase_day_13, + purchase_day_14 + FROM training_preparation); + + CREATE OR REPLACE TABLE `{{project_id}}.{{dataset}}.purchase_propensity_training_full_dataset` AS + SELECT DISTINCT * FROM DataForTargetTable + WHERE data_split IS NOT NULL; + + + CREATE OR REPLACE TABLE `{{project_id}}.{{dataset}}.purchase_propensity_training_30_15` AS( + SELECT + CURRENT_TIMESTAMP() AS processed_timestamp, + data_split, + feature_date, + user_pseudo_id, + user_id, + month_of_the_year, + week_of_the_year, + day_of_the_month, + day_of_week, + MIN(user_ltv_revenue) OVER(PARTITION BY user_pseudo_id, feature_date) as user_ltv_revenue, + device_category, + device_mobile_brand_name, + device_mobile_model_name, + device_os, + device_os_version, + device_language, + device_web_browser, + device_web_browser_version, + geo_sub_continent, + geo_country, + geo_region, + geo_city, + geo_metro, + last_traffic_source_medium, + last_traffic_source_name, + last_traffic_source_source, + first_traffic_source_medium, + first_traffic_source_name, + first_traffic_source_source, + has_signed_in_with_user_id, + engagement_rate, + engaged_sessions_per_user, + session_conversion_rate, + bounces, + bounce_rate_per_user, + sessions_per_user, + avg_views_per_session, + sum_engagement_time_seconds, + avg_engagement_time_seconds, + new_visits, + returning_visits, + add_to_carts, + cart_to_view_rate, + checkouts, + ecommerce_purchases, + ecommerce_quantity, + ecommerce_revenue, + item_revenue, + item_quantity, + item_view_events, + items_clicked_in_promotion, + items_clicked_in_list, + items_checked_out, + items_added_to_cart, + item_list_view_events, + purchase_revenue, + purchase_to_view_rate, + transactions_per_purchaser, + user_conversion_rate, + how_many_purchased_before, + has_abandoned_cart, + active_users_past_1_day, + active_users_past_2_day, + active_users_past_3_day, + active_users_past_4_day, + active_users_past_5_day, + active_users_past_6_day, + active_users_past_7_day, + active_users_past_8_14_day, + active_users_past_15_30_day, + purchases_past_1_day, + purchases_past_2_day, + purchases_past_3_day, + purchases_past_4_day, + purchases_past_5_day, + purchases_past_6_day, + purchases_past_7_day, + purchases_past_8_14_day, + purchases_past_15_30_day, + visits_past_1_day, + visits_past_2_day, + visits_past_3_day, + visits_past_4_day, + visits_past_5_day, + visits_past_6_day, + visits_past_7_day, + visits_past_8_14_day, + visits_past_15_30_day, + view_items_past_1_day, + view_items_past_2_day, + view_items_past_3_day, + view_items_past_4_day, + view_items_past_5_day, + view_items_past_6_day, + view_items_past_7_day, + view_items_past_8_14_day, + view_items_past_15_30_day, + add_to_carts_past_1_day, + add_to_carts_past_2_day, + add_to_carts_past_3_day, + add_to_carts_past_4_day, + add_to_carts_past_5_day, + add_to_carts_past_6_day, + add_to_carts_past_7_day, + add_to_carts_past_8_14_day, + add_to_carts_past_15_30_day, + checkouts_past_1_day, + checkouts_past_2_day, + checkouts_past_3_day, + checkouts_past_4_day, + checkouts_past_5_day, + checkouts_past_6_day, + checkouts_past_7_day, + checkouts_past_8_14_day, + checkouts_past_15_30_day, + purchasers_users, + average_daily_purchasers, + active_users, + DAU, + MAU, + WAU, + dau_per_mau, + dau_per_wau, + wau_per_mau, + users_engagement_duration_seconds, + average_engagement_time, + average_engagement_time_per_session, + average_sessions_per_user, + ARPPU, + ARPU, + average_daily_revenue, + max_daily_revenue, + min_daily_revenue, + new_users, + returning_users, + first_time_purchasers, + first_time_purchaser_conversion, + first_time_purchasers_per_new_user, + avg_user_conversion_rate, + avg_session_conversion_rate, + MAX(CASE WHEN ( + purchase_day_1+ + purchase_day_2+ + purchase_day_3+ + purchase_day_4+ + purchase_day_5+ + purchase_day_6+ + purchase_day_7+ + purchase_day_8+ + purchase_day_9+ + purchase_day_10+ + purchase_day_11+ + purchase_day_12+ + purchase_day_13+ + purchase_day_14) = 0 THEN 0 ELSE 1 END) OVER(PARTITION BY user_pseudo_id, feature_date) as will_purchase + FROM `{{project_id}}.{{dataset}}.purchase_propensity_training_full_dataset` + ); + + CREATE OR REPLACE TABLE `{{project_id}}.{{dataset}}.purchase_propensity_training_15_15` AS( + SELECT + CURRENT_TIMESTAMP() AS processed_timestamp, + data_split, + feature_date, + user_pseudo_id, + user_id, + month_of_the_year, + week_of_the_year, + day_of_the_month, + day_of_week, + MIN(user_ltv_revenue) OVER(PARTITION BY user_pseudo_id, feature_date) as user_ltv_revenue, + device_category, + device_mobile_brand_name, + device_mobile_model_name, + device_os, + device_os_version, + device_language, + device_web_browser, + device_web_browser_version, + geo_sub_continent, + geo_country, + geo_region, + geo_city, + geo_metro, + last_traffic_source_medium, + last_traffic_source_name, + last_traffic_source_source, + first_traffic_source_medium, + first_traffic_source_name, + first_traffic_source_source, + has_signed_in_with_user_id, + engagement_rate, + engaged_sessions_per_user, + session_conversion_rate, + bounces, + bounce_rate_per_user, + sessions_per_user, + avg_views_per_session, + sum_engagement_time_seconds, + avg_engagement_time_seconds, + new_visits, + returning_visits, + add_to_carts, + cart_to_view_rate, + checkouts, + ecommerce_purchases, + ecommerce_quantity, + ecommerce_revenue, + item_revenue, + item_quantity, + item_view_events, + items_clicked_in_promotion, + items_clicked_in_list, + items_checked_out, + items_added_to_cart, + item_list_view_events, + purchase_revenue, + purchase_to_view_rate, + transactions_per_purchaser, + user_conversion_rate, + how_many_purchased_before, + has_abandoned_cart, + active_users_past_1_day, + active_users_past_2_day, + active_users_past_3_day, + active_users_past_4_day, + active_users_past_5_day, + active_users_past_6_day, + active_users_past_7_day, + active_users_past_8_14_day, + --active_users_past_15_30_day, + purchases_past_1_day, + purchases_past_2_day, + purchases_past_3_day, + purchases_past_4_day, + purchases_past_5_day, + purchases_past_6_day, + purchases_past_7_day, + purchases_past_8_14_day, + --purchases_past_15_30_day, + visits_past_1_day, + visits_past_2_day, + visits_past_3_day, + visits_past_4_day, + visits_past_5_day, + visits_past_6_day, + visits_past_7_day, + visits_past_8_14_day, + --visits_past_15_30_day, + view_items_past_1_day, + view_items_past_2_day, + view_items_past_3_day, + view_items_past_4_day, + view_items_past_5_day, + view_items_past_6_day, + view_items_past_7_day, + view_items_past_8_14_day, + --view_items_past_15_30_day, + add_to_carts_past_1_day, + add_to_carts_past_2_day, + add_to_carts_past_3_day, + add_to_carts_past_4_day, + add_to_carts_past_5_day, + add_to_carts_past_6_day, + add_to_carts_past_7_day, + add_to_carts_past_8_14_day, + --add_to_carts_past_15_30_day, + checkouts_past_1_day, + checkouts_past_2_day, + checkouts_past_3_day, + checkouts_past_4_day, + checkouts_past_5_day, + checkouts_past_6_day, + checkouts_past_7_day, + checkouts_past_8_14_day, + --checkouts_past_15_30_day, + purchasers_users, + average_daily_purchasers, + active_users, + DAU, + MAU, + WAU, + dau_per_mau, + dau_per_wau, + wau_per_mau, + users_engagement_duration_seconds, + average_engagement_time, + average_engagement_time_per_session, + average_sessions_per_user, + ARPPU, + ARPU, + average_daily_revenue, + max_daily_revenue, + min_daily_revenue, + new_users, + returning_users, + first_time_purchasers, + first_time_purchaser_conversion, + first_time_purchasers_per_new_user, + avg_user_conversion_rate, + avg_session_conversion_rate, + (CASE WHEN ( + purchase_day_1+ + purchase_day_2+ + purchase_day_3+ + purchase_day_4+ + purchase_day_5+ + purchase_day_6+ + purchase_day_7+ + purchase_day_8+ + purchase_day_9+ + purchase_day_10+ + purchase_day_11+ + purchase_day_12+ + purchase_day_13+ + purchase_day_14) = 0 THEN 0 ELSE 1 END) as will_purchase + FROM `{{project_id}}.{{dataset}}.purchase_propensity_training_full_dataset` + ); + + CREATE OR REPLACE TABLE `{{project_id}}.{{dataset}}.purchase_propensity_training_15_7` AS( + SELECT + CURRENT_TIMESTAMP() AS processed_timestamp, + data_split, + feature_date, + user_pseudo_id, + user_id, + month_of_the_year, + week_of_the_year, + day_of_the_month, + day_of_week, + MIN(user_ltv_revenue) OVER(PARTITION BY user_pseudo_id, feature_date) as user_ltv_revenue, + device_category, + device_mobile_brand_name, + device_mobile_model_name, + device_os, + device_os_version, + device_language, + device_web_browser, + device_web_browser_version, + geo_sub_continent, + geo_country, + geo_region, + geo_city, + geo_metro, + last_traffic_source_medium, + last_traffic_source_name, + last_traffic_source_source, + first_traffic_source_medium, + first_traffic_source_name, + first_traffic_source_source, + has_signed_in_with_user_id, + engagement_rate, + engaged_sessions_per_user, + session_conversion_rate, + bounces, + bounce_rate_per_user, + sessions_per_user, + avg_views_per_session, + sum_engagement_time_seconds, + avg_engagement_time_seconds, + new_visits, + returning_visits, + add_to_carts, + cart_to_view_rate, + checkouts, + ecommerce_purchases, + ecommerce_quantity, + ecommerce_revenue, + item_revenue, + item_quantity, + item_view_events, + items_clicked_in_promotion, + items_clicked_in_list, + items_checked_out, + items_added_to_cart, + item_list_view_events, + purchase_revenue, + purchase_to_view_rate, + transactions_per_purchaser, + user_conversion_rate, + how_many_purchased_before, + has_abandoned_cart, + active_users_past_1_day, + active_users_past_2_day, + active_users_past_3_day, + active_users_past_4_day, + active_users_past_5_day, + active_users_past_6_day, + active_users_past_7_day, + active_users_past_8_14_day, + --active_users_past_15_30_day, + purchases_past_1_day, + purchases_past_2_day, + purchases_past_3_day, + purchases_past_4_day, + purchases_past_5_day, + purchases_past_6_day, + purchases_past_7_day, + purchases_past_8_14_day, + --purchases_past_15_30_day, + visits_past_1_day, + visits_past_2_day, + visits_past_3_day, + visits_past_4_day, + visits_past_5_day, + visits_past_6_day, + visits_past_7_day, + visits_past_8_14_day, + --visits_past_15_30_day, + view_items_past_1_day, + view_items_past_2_day, + view_items_past_3_day, + view_items_past_4_day, + view_items_past_5_day, + view_items_past_6_day, + view_items_past_7_day, + view_items_past_8_14_day, + --view_items_past_15_30_day, + add_to_carts_past_1_day, + add_to_carts_past_2_day, + add_to_carts_past_3_day, + add_to_carts_past_4_day, + add_to_carts_past_5_day, + add_to_carts_past_6_day, + add_to_carts_past_7_day, + add_to_carts_past_8_14_day, + --add_to_carts_past_15_30_day, + checkouts_past_1_day, + checkouts_past_2_day, + checkouts_past_3_day, + checkouts_past_4_day, + checkouts_past_5_day, + checkouts_past_6_day, + checkouts_past_7_day, + checkouts_past_8_14_day, + --checkouts_past_15_30_day, + purchasers_users, + average_daily_purchasers, + active_users, + DAU, + MAU, + WAU, + dau_per_mau, + dau_per_wau, + wau_per_mau, + users_engagement_duration_seconds, + average_engagement_time, + average_engagement_time_per_session, + average_sessions_per_user, + ARPPU, + ARPU, + average_daily_revenue, + max_daily_revenue, + min_daily_revenue, + new_users, + returning_users, + first_time_purchasers, + first_time_purchaser_conversion, + first_time_purchasers_per_new_user, + avg_user_conversion_rate, + avg_session_conversion_rate, + (CASE WHEN ( + purchase_day_1+ + purchase_day_2+ + purchase_day_3+ + purchase_day_4+ + purchase_day_5+ + purchase_day_6+ + purchase_day_7) = 0 THEN 0 ELSE 1 END) as will_purchase + FROM `{{project_id}}.{{dataset}}.purchase_propensity_training_full_dataset` + ); + + CREATE OR REPLACE VIEW `{{project_id}}.{{dataset}}.v_purchase_propensity_training_15_15` + (processed_timestamp, + data_split, + feature_date, + user_pseudo_id, + user_id, + month_of_the_year, + week_of_the_year, + day_of_the_month, + day_of_week, + user_ltv_revenue, + device_category, + device_mobile_brand_name, + device_mobile_model_name, + device_os, + device_os_version, + device_language, + device_web_browser, + device_web_browser_version, + geo_sub_continent, + geo_country, + geo_region, + geo_city, + geo_metro, + last_traffic_source_medium, + last_traffic_source_name, + last_traffic_source_source, + first_traffic_source_medium, + first_traffic_source_name, + first_traffic_source_source, + has_signed_in_with_user_id, + engagement_rate, + engaged_sessions_per_user, + session_conversion_rate, + bounces, + bounce_rate_per_user, + sessions_per_user, + avg_views_per_session, + sum_engagement_time_seconds, + avg_engagement_time_seconds, + new_visits, + returning_visits, + add_to_carts, + cart_to_view_rate, + checkouts, + ecommerce_purchases, + ecommerce_quantity, + ecommerce_revenue, + item_revenue, + item_quantity, + item_view_events, + items_clicked_in_promotion, + items_clicked_in_list, + items_checked_out, + items_added_to_cart, + item_list_view_events, + purchase_revenue, + purchase_to_view_rate, + transactions_per_purchaser, + user_conversion_rate, + how_many_purchased_before, + has_abandoned_cart, + active_users_past_1_day, + active_users_past_2_day, + active_users_past_3_day, + active_users_past_4_day, + active_users_past_5_day, + active_users_past_6_day, + active_users_past_7_day, + active_users_past_8_14_day, + --active_users_past_15_30_day, + purchases_past_1_day, + purchases_past_2_day, + purchases_past_3_day, + purchases_past_4_day, + purchases_past_5_day, + purchases_past_6_day, + purchases_past_7_day, + purchases_past_8_14_day, + --purchases_past_15_30_day, + visits_past_1_day, + visits_past_2_day, + visits_past_3_day, + visits_past_4_day, + visits_past_5_day, + visits_past_6_day, + visits_past_7_day, + visits_past_8_14_day, + --visits_past_15_30_day, + view_items_past_1_day, + view_items_past_2_day, + view_items_past_3_day, + view_items_past_4_day, + view_items_past_5_day, + view_items_past_6_day, + view_items_past_7_day, + view_items_past_8_14_day, + --view_items_past_15_30_day, + add_to_carts_past_1_day, + add_to_carts_past_2_day, + add_to_carts_past_3_day, + add_to_carts_past_4_day, + add_to_carts_past_5_day, + add_to_carts_past_6_day, + add_to_carts_past_7_day, + add_to_carts_past_8_14_day, + --add_to_carts_past_15_30_day, + checkouts_past_1_day, + checkouts_past_2_day, + checkouts_past_3_day, + checkouts_past_4_day, + checkouts_past_5_day, + checkouts_past_6_day, + checkouts_past_7_day, + checkouts_past_8_14_day, + --checkouts_past_15_30_day, + purchasers_users, + average_daily_purchasers, + active_users, + DAU, + MAU, + WAU, + dau_per_mau, + dau_per_wau, + wau_per_mau, + users_engagement_duration_seconds, + average_engagement_time, + average_engagement_time_per_session, + average_sessions_per_user, + ARPPU, + ARPU, + average_daily_revenue, + max_daily_revenue, + min_daily_revenue, + new_users, + returning_users, + first_time_purchasers, + first_time_purchaser_conversion, + first_time_purchasers_per_new_user, + avg_user_conversion_rate, + avg_session_conversion_rate, + will_purchase) + OPTIONS( + expiration_timestamp=TIMESTAMP_ADD(CURRENT_TIMESTAMP(), INTERVAL 48 HOUR), + friendly_name="v_purchase_propensity_training_15_15", + description="View Purchase Propensity Training dataset using 15 days back to predict 15 days ahead. View expires after 48h and should run daily.", + labels=[("org_unit", "development")] + ) AS SELECT DISTINCT - UD.user_pseudo_id, - UD.user_id, - UD.feature_date, - UD.month_of_the_year, - UD.week_of_the_year, - UD.day_of_the_month, - UD.day_of_week, - UD.hour_of_day, - UD.nth_day, - UD.nth_hour, - UD.nth_week, - UD.nth_month, - UD.user_ltv_revenue, - UD.device_category, - UD.device_mobile_brand_name, - UD.device_mobile_model_name, - UD.device_os, - UD.device_os_version, - UD.device_language, - UD.device_web_browser, - UD.device_web_browser_version, - UD.geo_sub_continent, - UD.geo_country, - UD.geo_region, - UD.geo_city, - UD.geo_metro, - UD.last_traffic_source_medium, - UD.last_traffic_source_name, - UD.last_traffic_source_source, - UD.first_traffic_source_medium, - UD.first_traffic_source_name, - UD.first_traffic_source_source, - UD.has_signed_in_with_user_id, - engagement_rate, - engaged_sessions_per_user, - session_conversion_rate, - bounces, - bounce_rate_per_user, - sessions_per_user, - avg_views_per_session, - sum_engagement_time_seconds, - avg_engagement_time_seconds, - new_visits, - returning_visits, - add_to_carts, - cart_to_view_rate, - checkouts, - ecommerce_purchases, - ecommerce_quantity, - ecommerce_revenue, - item_revenue, - item_quantity, - item_view_events, - items_clicked_in_promotion, - items_clicked_in_list, - items_checked_out, - items_added_to_cart, - item_list_view_events, - purchase_revenue, - purchase_to_view_rate, - transactions_per_purchaser, - user_conversion_rate, - how_many_purchased_before, - has_abandoned_cart, - active_users_past_1_day, - active_users_past_2_day, - active_users_past_3_day, - active_users_past_4_day, - active_users_past_5_day, - active_users_past_6_day, - active_users_past_7_day, - active_users_past_8_14_day, - active_users_past_15_30_day, - purchases_past_1_day, - purchases_past_2_day, - purchases_past_3_day, - purchases_past_4_day, - purchases_past_5_day, - purchases_past_6_day, - purchases_past_7_day, - purchases_past_8_14_day, - purchases_past_15_30_day, - visits_past_1_day, - visits_past_2_day, - visits_past_3_day, - visits_past_4_day, - visits_past_5_day, - visits_past_6_day, - visits_past_7_day, - visits_past_8_14_day, - visits_past_15_30_day, - view_items_past_1_day, - view_items_past_2_day, - view_items_past_3_day, - view_items_past_4_day, - view_items_past_5_day, - view_items_past_6_day, - view_items_past_7_day, - view_items_past_8_14_day, - view_items_past_15_30_day, - add_to_carts_past_1_day, - add_to_carts_past_2_day, - add_to_carts_past_3_day, - add_to_carts_past_4_day, - add_to_carts_past_5_day, - add_to_carts_past_6_day, - add_to_carts_past_7_day, - add_to_carts_past_8_14_day, - add_to_carts_past_15_30_day, - checkouts_past_1_day, - checkouts_past_2_day, - checkouts_past_3_day, - checkouts_past_4_day, - checkouts_past_5_day, - checkouts_past_6_day, - checkouts_past_7_day, - checkouts_past_8_14_day, - checkouts_past_15_30_day, - purchasers_users, - average_daily_purchasers, - active_users, - DAU, - MAU, - WAU, - dau_per_mau, - dau_per_wau, - wau_per_mau, - users_engagement_duration_seconds, - average_engagement_time, - average_engagement_time_per_session, - average_sessions_per_user, - ARPPU, - ARPU, - average_daily_revenue, - max_daily_revenue, - min_daily_revenue, - new_users, - returning_users, - first_time_purchasers, - first_time_purchaser_conversion, - first_time_purchasers_per_new_user, - avg_user_conversion_rate, - avg_session_conversion_rate, - purchase_day_0, - purchase_day_1, - purchase_day_2, - purchase_day_3, - purchase_day_4, - purchase_day_5, - purchase_day_6, - purchase_day_7, - purchase_day_8, - purchase_day_9, - purchase_day_10, - purchase_day_11, - purchase_day_12, - purchase_day_13, - purchase_day_14 -FROM - `{{feature_store_project_id}}.{{feature_store_dataset}}.user_dimensions` UD -INNER JOIN - `{{feature_store_project_id}}.{{feature_store_dataset}}.user_session_event_aggregated_metrics` USEAM -ON - USEAM.user_pseudo_id = UD.user_pseudo_id - AND USEAM.feature_date = UD.feature_date -INNER JOIN - `{{feature_store_project_id}}.{{feature_store_dataset}}.user_rolling_window_metrics` UWM -ON - UWM.user_pseudo_id = UD.user_pseudo_id - AND UWM.feature_date = UD.feature_date -INNER JOIN - `{{feature_store_project_id}}.{{feature_store_dataset}}.user_scoped_metrics` UM -ON - UM.feature_date = UD.feature_date -INNER JOIN - `{{feature_store_project_id}}.{{feature_store_dataset}}.purchase_propensity_label` LABEL -ON - LABEL.user_pseudo_id = UD.user_pseudo_id - AND UD.feature_date = LABEL.feature_date -WHERE - -- Define the training subset interval - UD.feature_date BETWEEN GREATEST(start_date, min_date) AND LEAST(end_date, max_date) -); - - -CREATE TEMP TABLE DataForTargetTable AS( + processed_timestamp, + data_split, + feature_date, + user_pseudo_id, + user_id, + month_of_the_year, + week_of_the_year, + day_of_the_month, + day_of_week, + MIN(user_ltv_revenue) OVER(PARTITION BY user_pseudo_id, feature_date) as user_ltv_revenue, + device_category, + device_mobile_brand_name, + device_mobile_model_name, + device_os, + device_os_version, + device_language, + device_web_browser, + device_web_browser_version, + geo_sub_continent, + geo_country, + geo_region, + geo_city, + geo_metro, + last_traffic_source_medium, + last_traffic_source_name, + last_traffic_source_source, + first_traffic_source_medium, + first_traffic_source_name, + first_traffic_source_source, + has_signed_in_with_user_id, + engagement_rate, + engaged_sessions_per_user, + session_conversion_rate, + bounces, + bounce_rate_per_user, + sessions_per_user, + avg_views_per_session, + sum_engagement_time_seconds, + avg_engagement_time_seconds, + new_visits, + returning_visits, + add_to_carts, + cart_to_view_rate, + checkouts, + ecommerce_purchases, + ecommerce_quantity, + ecommerce_revenue, + item_revenue, + item_quantity, + item_view_events, + items_clicked_in_promotion, + items_clicked_in_list, + items_checked_out, + items_added_to_cart, + item_list_view_events, + purchase_revenue, + purchase_to_view_rate, + transactions_per_purchaser, + user_conversion_rate, + how_many_purchased_before, + has_abandoned_cart, + active_users_past_1_day, + active_users_past_2_day, + active_users_past_3_day, + active_users_past_4_day, + active_users_past_5_day, + active_users_past_6_day, + active_users_past_7_day, + active_users_past_8_14_day, + ---active_users_past_15_30_day, + purchases_past_1_day, + purchases_past_2_day, + purchases_past_3_day, + purchases_past_4_day, + purchases_past_5_day, + purchases_past_6_day, + purchases_past_7_day, + purchases_past_8_14_day, + --purchases_past_15_30_day, + visits_past_1_day, + visits_past_2_day, + visits_past_3_day, + visits_past_4_day, + visits_past_5_day, + visits_past_6_day, + visits_past_7_day, + visits_past_8_14_day, + --visits_past_15_30_day, + view_items_past_1_day, + view_items_past_2_day, + view_items_past_3_day, + view_items_past_4_day, + view_items_past_5_day, + view_items_past_6_day, + view_items_past_7_day, + view_items_past_8_14_day, + --view_items_past_15_30_day, + add_to_carts_past_1_day, + add_to_carts_past_2_day, + add_to_carts_past_3_day, + add_to_carts_past_4_day, + add_to_carts_past_5_day, + add_to_carts_past_6_day, + add_to_carts_past_7_day, + add_to_carts_past_8_14_day, + --add_to_carts_past_15_30_day, + checkouts_past_1_day, + checkouts_past_2_day, + checkouts_past_3_day, + checkouts_past_4_day, + checkouts_past_5_day, + checkouts_past_6_day, + checkouts_past_7_day, + checkouts_past_8_14_day, + --checkouts_past_15_30_day, + purchasers_users, + average_daily_purchasers, + active_users, + DAU, + MAU, + WAU, + dau_per_mau, + dau_per_wau, + wau_per_mau, + users_engagement_duration_seconds, + average_engagement_time, + average_engagement_time_per_session, + average_sessions_per_user, + ARPPU, + ARPU, + average_daily_revenue, + max_daily_revenue, + min_daily_revenue, + new_users, + returning_users, + first_time_purchasers, + first_time_purchaser_conversion, + first_time_purchasers_per_new_user, + avg_user_conversion_rate, + avg_session_conversion_rate, + MAX(will_purchase) OVER(PARTITION BY user_pseudo_id, feature_date) as will_purchase + FROM `{{project_id}}.{{dataset}}.purchase_propensity_training_15_15`; + + + + CREATE OR REPLACE VIEW `{{project_id}}.{{dataset}}.v_purchase_propensity_training_15_7` + ( + processed_timestamp, + data_split, + feature_date, + user_pseudo_id, + user_id, + month_of_the_year, + week_of_the_year, + day_of_the_month, + day_of_week, + user_ltv_revenue, + device_category, + device_mobile_brand_name, + device_mobile_model_name, + device_os, + device_os_version, + device_language, + device_web_browser, + device_web_browser_version, + geo_sub_continent, + geo_country, + geo_region, + geo_city, + geo_metro, + last_traffic_source_medium, + last_traffic_source_name, + last_traffic_source_source, + first_traffic_source_medium, + first_traffic_source_name, + first_traffic_source_source, + has_signed_in_with_user_id, + engagement_rate, + engaged_sessions_per_user, + session_conversion_rate, + bounces, + bounce_rate_per_user, + sessions_per_user, + avg_views_per_session, + sum_engagement_time_seconds, + avg_engagement_time_seconds, + new_visits, + returning_visits, + add_to_carts, + cart_to_view_rate, + checkouts, + ecommerce_purchases, + ecommerce_quantity, + ecommerce_revenue, + item_revenue, + item_quantity, + item_view_events, + items_clicked_in_promotion, + items_clicked_in_list, + items_checked_out, + items_added_to_cart, + item_list_view_events, + purchase_revenue, + purchase_to_view_rate, + transactions_per_purchaser, + user_conversion_rate, + how_many_purchased_before, + has_abandoned_cart, + active_users_past_1_day, + active_users_past_2_day, + active_users_past_3_day, + active_users_past_4_day, + active_users_past_5_day, + active_users_past_6_day, + active_users_past_7_day, + active_users_past_8_14_day, + --active_users_past_15_30_day, + purchases_past_1_day, + purchases_past_2_day, + purchases_past_3_day, + purchases_past_4_day, + purchases_past_5_day, + purchases_past_6_day, + purchases_past_7_day, + purchases_past_8_14_day, + --purchases_past_15_30_day, + visits_past_1_day, + visits_past_2_day, + visits_past_3_day, + visits_past_4_day, + visits_past_5_day, + visits_past_6_day, + visits_past_7_day, + visits_past_8_14_day, + --visits_past_15_30_day, + view_items_past_1_day, + view_items_past_2_day, + view_items_past_3_day, + view_items_past_4_day, + view_items_past_5_day, + view_items_past_6_day, + view_items_past_7_day, + view_items_past_8_14_day, + --view_items_past_15_30_day, + add_to_carts_past_1_day, + add_to_carts_past_2_day, + add_to_carts_past_3_day, + add_to_carts_past_4_day, + add_to_carts_past_5_day, + add_to_carts_past_6_day, + add_to_carts_past_7_day, + add_to_carts_past_8_14_day, + --add_to_carts_past_15_30_day, + checkouts_past_1_day, + checkouts_past_2_day, + checkouts_past_3_day, + checkouts_past_4_day, + checkouts_past_5_day, + checkouts_past_6_day, + checkouts_past_7_day, + checkouts_past_8_14_day, + --checkouts_past_15_30_day, + purchasers_users, + average_daily_purchasers, + active_users, + DAU, + MAU, + WAU, + dau_per_mau, + dau_per_wau, + wau_per_mau, + users_engagement_duration_seconds, + average_engagement_time, + average_engagement_time_per_session, + average_sessions_per_user, + ARPPU, + ARPU, + average_daily_revenue, + max_daily_revenue, + min_daily_revenue, + new_users, + returning_users, + first_time_purchasers, + first_time_purchaser_conversion, + first_time_purchasers_per_new_user, + avg_user_conversion_rate, + avg_session_conversion_rate, + will_purchase + ) + OPTIONS( + expiration_timestamp=TIMESTAMP_ADD(CURRENT_TIMESTAMP(), INTERVAL 48 HOUR), + friendly_name="v_purchase_propensity_training_15_7", + description="View Purchase Propensity Training dataset using 15 days back to predict 7 days ahead. View expires after 48h and should run daily.", + labels=[("org_unit", "development")] + ) AS SELECT DISTINCT - CASE - WHEN (ABS(MOD(FARM_FINGERPRINT(user_pseudo_id), 10)) BETWEEN 0 AND train_split_end_number) THEN "TRAIN" - WHEN (ABS(MOD(FARM_FINGERPRINT(user_pseudo_id), 10)) BETWEEN train_split_end_number AND validation_split_end_number) THEN "VALIDATE" - WHEN (ABS(MOD(FARM_FINGERPRINT(user_pseudo_id), 10)) BETWEEN validation_split_end_number AND 9) THEN "TEST" - END as data_split, - feature_date, - user_pseudo_id, - user_id, - month_of_the_year, - week_of_the_year, - day_of_the_month, - day_of_week, - hour_of_day, - nth_day, - nth_hour, - nth_week, - nth_month, - user_ltv_revenue, - device_category, - device_mobile_brand_name, - device_mobile_model_name, - device_os, - device_os_version, - device_language, - device_web_browser, - device_web_browser_version, - geo_sub_continent, - geo_country, - geo_region, - geo_city, - geo_metro, - last_traffic_source_medium, - last_traffic_source_name, - last_traffic_source_source, - first_traffic_source_medium, - first_traffic_source_name, - first_traffic_source_source, - has_signed_in_with_user_id, - engagement_rate, - engaged_sessions_per_user, - session_conversion_rate, - bounces, - bounce_rate_per_user, - sessions_per_user, - avg_views_per_session, - sum_engagement_time_seconds, - avg_engagement_time_seconds, - new_visits, - returning_visits, - add_to_carts, - cart_to_view_rate, - checkouts, - ecommerce_purchases, - ecommerce_quantity, - ecommerce_revenue, - item_revenue, - item_quantity, - item_view_events, - items_clicked_in_promotion, - items_clicked_in_list, - items_checked_out, - items_added_to_cart, - item_list_view_events, - purchase_revenue, - purchase_to_view_rate, - transactions_per_purchaser, - user_conversion_rate, - how_many_purchased_before, - has_abandoned_cart, - active_users_past_1_day, - active_users_past_2_day, - active_users_past_3_day, - active_users_past_4_day, - active_users_past_5_day, - active_users_past_6_day, - active_users_past_7_day, - active_users_past_8_14_day, - active_users_past_15_30_day, - purchases_past_1_day, - purchases_past_2_day, - purchases_past_3_day, - purchases_past_4_day, - purchases_past_5_day, - purchases_past_6_day, - purchases_past_7_day, - purchases_past_8_14_day, - purchases_past_15_30_day, - visits_past_1_day, - visits_past_2_day, - visits_past_3_day, - visits_past_4_day, - visits_past_5_day, - visits_past_6_day, - visits_past_7_day, - visits_past_8_14_day, - visits_past_15_30_day, - view_items_past_1_day, - view_items_past_2_day, - view_items_past_3_day, - view_items_past_4_day, - view_items_past_5_day, - view_items_past_6_day, - view_items_past_7_day, - view_items_past_8_14_day, - view_items_past_15_30_day, - add_to_carts_past_1_day, - add_to_carts_past_2_day, - add_to_carts_past_3_day, - add_to_carts_past_4_day, - add_to_carts_past_5_day, - add_to_carts_past_6_day, - add_to_carts_past_7_day, - add_to_carts_past_8_14_day, - add_to_carts_past_15_30_day, - checkouts_past_1_day, - checkouts_past_2_day, - checkouts_past_3_day, - checkouts_past_4_day, - checkouts_past_5_day, - checkouts_past_6_day, - checkouts_past_7_day, - checkouts_past_8_14_day, - checkouts_past_15_30_day, - purchasers_users, - average_daily_purchasers, - active_users, - DAU, - MAU, - WAU, - dau_per_mau, - dau_per_wau, - wau_per_mau, - users_engagement_duration_seconds, - average_engagement_time, - average_engagement_time_per_session, - average_sessions_per_user, - ARPPU, - ARPU, - average_daily_revenue, - max_daily_revenue, - min_daily_revenue, - new_users, - returning_users, - first_time_purchasers, - first_time_purchaser_conversion, - first_time_purchasers_per_new_user, - avg_user_conversion_rate, - avg_session_conversion_rate, - purchase_day_0, - purchase_day_1, - purchase_day_2, - purchase_day_3, - purchase_day_4, - purchase_day_5, - purchase_day_6, - purchase_day_7, - purchase_day_8, - purchase_day_9, - purchase_day_10, - purchase_day_11, - purchase_day_12, - purchase_day_13, - purchase_day_14 - FROM training_preparation); - -CREATE OR REPLACE TABLE `{{project_id}}.{{dataset}}.{{insert_table}}` AS -SELECT DISTINCT * FROM DataForTargetTable -WHERE data_split IS NOT NULL; - - -CREATE OR REPLACE TABLE `{{project_id}}.{{dataset}}.purchase_propensity_training_30_15` AS( - SELECT - CURRENT_TIMESTAMP() AS processed_timestamp, - data_split, - feature_date, - user_pseudo_id, - user_id, - month_of_the_year, - week_of_the_year, - day_of_the_month, - day_of_week, - hour_of_day, - nth_day, - nth_hour, - nth_week, - nth_month, - user_ltv_revenue, - device_category, - device_mobile_brand_name, - device_mobile_model_name, - device_os, - device_os_version, - device_language, - device_web_browser, - device_web_browser_version, - geo_sub_continent, - geo_country, - geo_region, - geo_city, - geo_metro, - last_traffic_source_medium, - last_traffic_source_name, - last_traffic_source_source, - first_traffic_source_medium, - first_traffic_source_name, - first_traffic_source_source, - has_signed_in_with_user_id, - engagement_rate, - engaged_sessions_per_user, - session_conversion_rate, - bounces, - bounce_rate_per_user, - sessions_per_user, - avg_views_per_session, - sum_engagement_time_seconds, - avg_engagement_time_seconds, - new_visits, - returning_visits, - add_to_carts, - cart_to_view_rate, - checkouts, - ecommerce_purchases, - ecommerce_quantity, - ecommerce_revenue, - item_revenue, - item_quantity, - item_view_events, - items_clicked_in_promotion, - items_clicked_in_list, - items_checked_out, - items_added_to_cart, - item_list_view_events, - purchase_revenue, - purchase_to_view_rate, - transactions_per_purchaser, - user_conversion_rate, - how_many_purchased_before, - has_abandoned_cart, - active_users_past_1_day, - active_users_past_2_day, - active_users_past_3_day, - active_users_past_4_day, - active_users_past_5_day, - active_users_past_6_day, - active_users_past_7_day, - active_users_past_8_14_day, - active_users_past_15_30_day, - purchases_past_1_day, - purchases_past_2_day, - purchases_past_3_day, - purchases_past_4_day, - purchases_past_5_day, - purchases_past_6_day, - purchases_past_7_day, - purchases_past_8_14_day, - purchases_past_15_30_day, - visits_past_1_day, - visits_past_2_day, - visits_past_3_day, - visits_past_4_day, - visits_past_5_day, - visits_past_6_day, - visits_past_7_day, - visits_past_8_14_day, - visits_past_15_30_day, - view_items_past_1_day, - view_items_past_2_day, - view_items_past_3_day, - view_items_past_4_day, - view_items_past_5_day, - view_items_past_6_day, - view_items_past_7_day, - view_items_past_8_14_day, - view_items_past_15_30_day, - add_to_carts_past_1_day, - add_to_carts_past_2_day, - add_to_carts_past_3_day, - add_to_carts_past_4_day, - add_to_carts_past_5_day, - add_to_carts_past_6_day, - add_to_carts_past_7_day, - add_to_carts_past_8_14_day, - add_to_carts_past_15_30_day, - checkouts_past_1_day, - checkouts_past_2_day, - checkouts_past_3_day, - checkouts_past_4_day, - checkouts_past_5_day, - checkouts_past_6_day, - checkouts_past_7_day, - checkouts_past_8_14_day, - checkouts_past_15_30_day, - purchasers_users, - average_daily_purchasers, - active_users, - DAU, - MAU, - WAU, - dau_per_mau, - dau_per_wau, - wau_per_mau, - users_engagement_duration_seconds, - average_engagement_time, - average_engagement_time_per_session, - average_sessions_per_user, - ARPPU, - ARPU, - average_daily_revenue, - max_daily_revenue, - min_daily_revenue, - new_users, - returning_users, - first_time_purchasers, - first_time_purchaser_conversion, - first_time_purchasers_per_new_user, - avg_user_conversion_rate, - avg_session_conversion_rate, - (CASE WHEN (purchase_day_0+ - purchase_day_1+ - purchase_day_2+ - purchase_day_3+ - purchase_day_4+ - purchase_day_5+ - purchase_day_6+ - purchase_day_7+ - purchase_day_8+ - purchase_day_9+ - purchase_day_10+ - purchase_day_11+ - purchase_day_12+ - purchase_day_13+ - purchase_day_14) = 0 THEN 0 ELSE 1 END) as will_purchase - FROM `{{project_id}}.{{dataset}}.{{insert_table}}` -); - -CREATE OR REPLACE TABLE `{{project_id}}.{{dataset}}.purchase_propensity_training_15_15` AS( - SELECT - CURRENT_TIMESTAMP() AS processed_timestamp, - data_split, - feature_date, - user_pseudo_id, - user_id, - month_of_the_year, - week_of_the_year, - day_of_the_month, - day_of_week, - hour_of_day, - nth_day, - nth_hour, - nth_week, - nth_month, - user_ltv_revenue, - device_category, - device_mobile_brand_name, - device_mobile_model_name, - device_os, - device_os_version, - device_language, - device_web_browser, - device_web_browser_version, - geo_sub_continent, - geo_country, - geo_region, - geo_city, - geo_metro, - last_traffic_source_medium, - last_traffic_source_name, - last_traffic_source_source, - first_traffic_source_medium, - first_traffic_source_name, - first_traffic_source_source, - has_signed_in_with_user_id, - engagement_rate, - engaged_sessions_per_user, - session_conversion_rate, - bounces, - bounce_rate_per_user, - sessions_per_user, - avg_views_per_session, - sum_engagement_time_seconds, - avg_engagement_time_seconds, - new_visits, - returning_visits, - add_to_carts, - cart_to_view_rate, - checkouts, - ecommerce_purchases, - ecommerce_quantity, - ecommerce_revenue, - item_revenue, - item_quantity, - item_view_events, - items_clicked_in_promotion, - items_clicked_in_list, - items_checked_out, - items_added_to_cart, - item_list_view_events, - purchase_revenue, - purchase_to_view_rate, - transactions_per_purchaser, - user_conversion_rate, - how_many_purchased_before, - has_abandoned_cart, - active_users_past_1_day, - active_users_past_2_day, - active_users_past_3_day, - active_users_past_4_day, - active_users_past_5_day, - active_users_past_6_day, - active_users_past_7_day, - active_users_past_8_14_day, - --active_users_past_15_30_day, - purchases_past_1_day, - purchases_past_2_day, - purchases_past_3_day, - purchases_past_4_day, - purchases_past_5_day, - purchases_past_6_day, - purchases_past_7_day, - purchases_past_8_14_day, - --purchases_past_15_30_day, - visits_past_1_day, - visits_past_2_day, - visits_past_3_day, - visits_past_4_day, - visits_past_5_day, - visits_past_6_day, - visits_past_7_day, - visits_past_8_14_day, - --visits_past_15_30_day, - view_items_past_1_day, - view_items_past_2_day, - view_items_past_3_day, - view_items_past_4_day, - view_items_past_5_day, - view_items_past_6_day, - view_items_past_7_day, - view_items_past_8_14_day, - --view_items_past_15_30_day, - add_to_carts_past_1_day, - add_to_carts_past_2_day, - add_to_carts_past_3_day, - add_to_carts_past_4_day, - add_to_carts_past_5_day, - add_to_carts_past_6_day, - add_to_carts_past_7_day, - add_to_carts_past_8_14_day, - --add_to_carts_past_15_30_day, - checkouts_past_1_day, - checkouts_past_2_day, - checkouts_past_3_day, - checkouts_past_4_day, - checkouts_past_5_day, - checkouts_past_6_day, - checkouts_past_7_day, - checkouts_past_8_14_day, - --checkouts_past_15_30_day, - purchasers_users, - average_daily_purchasers, - active_users, - DAU, - MAU, - WAU, - dau_per_mau, - dau_per_wau, - wau_per_mau, - users_engagement_duration_seconds, - average_engagement_time, - average_engagement_time_per_session, - average_sessions_per_user, - ARPPU, - ARPU, - average_daily_revenue, - max_daily_revenue, - min_daily_revenue, - new_users, - returning_users, - first_time_purchasers, - first_time_purchaser_conversion, - first_time_purchasers_per_new_user, - avg_user_conversion_rate, - avg_session_conversion_rate, - (CASE WHEN (purchase_day_0+ - purchase_day_1+ - purchase_day_2+ - purchase_day_3+ - purchase_day_4+ - purchase_day_5+ - purchase_day_6+ - purchase_day_7+ - purchase_day_8+ - purchase_day_9+ - purchase_day_10+ - purchase_day_11+ - purchase_day_12+ - purchase_day_13+ - purchase_day_14) = 0 THEN 0 ELSE 1 END) as will_purchase - FROM `{{project_id}}.{{dataset}}.{{insert_table}}` -); - -CREATE OR REPLACE TABLE `{{project_id}}.{{dataset}}.purchase_propensity_training_15_7` AS( - SELECT - CURRENT_TIMESTAMP() AS processed_timestamp, - data_split, - feature_date, - user_pseudo_id, - user_id, - month_of_the_year, - week_of_the_year, - day_of_the_month, - day_of_week, - hour_of_day, - nth_day, - nth_hour, - nth_week, - nth_month, - user_ltv_revenue, - device_category, - device_mobile_brand_name, - device_mobile_model_name, - device_os, - device_os_version, - device_language, - device_web_browser, - device_web_browser_version, - geo_sub_continent, - geo_country, - geo_region, - geo_city, - geo_metro, - last_traffic_source_medium, - last_traffic_source_name, - last_traffic_source_source, - first_traffic_source_medium, - first_traffic_source_name, - first_traffic_source_source, - has_signed_in_with_user_id, - engagement_rate, - engaged_sessions_per_user, - session_conversion_rate, - bounces, - bounce_rate_per_user, - sessions_per_user, - avg_views_per_session, - sum_engagement_time_seconds, - avg_engagement_time_seconds, - new_visits, - returning_visits, - add_to_carts, - cart_to_view_rate, - checkouts, - ecommerce_purchases, - ecommerce_quantity, - ecommerce_revenue, - item_revenue, - item_quantity, - item_view_events, - items_clicked_in_promotion, - items_clicked_in_list, - items_checked_out, - items_added_to_cart, - item_list_view_events, - purchase_revenue, - purchase_to_view_rate, - transactions_per_purchaser, - user_conversion_rate, - how_many_purchased_before, - has_abandoned_cart, - active_users_past_1_day, - active_users_past_2_day, - active_users_past_3_day, - active_users_past_4_day, - active_users_past_5_day, - active_users_past_6_day, - active_users_past_7_day, - active_users_past_8_14_day, - --active_users_past_15_30_day, - purchases_past_1_day, - purchases_past_2_day, - purchases_past_3_day, - purchases_past_4_day, - purchases_past_5_day, - purchases_past_6_day, - purchases_past_7_day, - purchases_past_8_14_day, - --purchases_past_15_30_day, - visits_past_1_day, - visits_past_2_day, - visits_past_3_day, - visits_past_4_day, - visits_past_5_day, - visits_past_6_day, - visits_past_7_day, - visits_past_8_14_day, - --visits_past_15_30_day, - view_items_past_1_day, - view_items_past_2_day, - view_items_past_3_day, - view_items_past_4_day, - view_items_past_5_day, - view_items_past_6_day, - view_items_past_7_day, - view_items_past_8_14_day, - --view_items_past_15_30_day, - add_to_carts_past_1_day, - add_to_carts_past_2_day, - add_to_carts_past_3_day, - add_to_carts_past_4_day, - add_to_carts_past_5_day, - add_to_carts_past_6_day, - add_to_carts_past_7_day, - add_to_carts_past_8_14_day, - --add_to_carts_past_15_30_day, - checkouts_past_1_day, - checkouts_past_2_day, - checkouts_past_3_day, - checkouts_past_4_day, - checkouts_past_5_day, - checkouts_past_6_day, - checkouts_past_7_day, - checkouts_past_8_14_day, - --checkouts_past_15_30_day, - purchasers_users, - average_daily_purchasers, - active_users, - DAU, - MAU, - WAU, - dau_per_mau, - dau_per_wau, - wau_per_mau, - users_engagement_duration_seconds, - average_engagement_time, - average_engagement_time_per_session, - average_sessions_per_user, - ARPPU, - ARPU, - average_daily_revenue, - max_daily_revenue, - min_daily_revenue, - new_users, - returning_users, - first_time_purchasers, - first_time_purchaser_conversion, - first_time_purchasers_per_new_user, - avg_user_conversion_rate, - avg_session_conversion_rate, - (CASE WHEN (purchase_day_0+ - purchase_day_1+ - purchase_day_2+ - purchase_day_3+ - purchase_day_4+ - purchase_day_5+ - purchase_day_6+ - purchase_day_7) = 0 THEN 0 ELSE 1 END) as will_purchase - FROM `{{project_id}}.{{dataset}}.{{insert_table}}` -); - -CREATE OR REPLACE VIEW `{{project_id}}.{{dataset}}.v_purchase_propensity_training_15_15` -(processed_timestamp, - data_split, - feature_date, - user_pseudo_id, - user_id, - month_of_the_year, - week_of_the_year, - day_of_the_month, - day_of_week, - hour_of_day, - nth_day, - nth_hour, - nth_week, - nth_month, - user_ltv_revenue, - device_category, - device_mobile_brand_name, - device_mobile_model_name, - device_os, - device_os_version, - device_language, - device_web_browser, - device_web_browser_version, - geo_sub_continent, - geo_country, - geo_region, - geo_city, - geo_metro, - last_traffic_source_medium, - last_traffic_source_name, - last_traffic_source_source, - first_traffic_source_medium, - first_traffic_source_name, - first_traffic_source_source, - has_signed_in_with_user_id, - engagement_rate, - engaged_sessions_per_user, - session_conversion_rate, - bounces, - bounce_rate_per_user, - sessions_per_user, - avg_views_per_session, - sum_engagement_time_seconds, - avg_engagement_time_seconds, - new_visits, - returning_visits, - add_to_carts, - cart_to_view_rate, - checkouts, - ecommerce_purchases, - ecommerce_quantity, - ecommerce_revenue, - item_revenue, - item_quantity, - item_view_events, - items_clicked_in_promotion, - items_clicked_in_list, - items_checked_out, - items_added_to_cart, - item_list_view_events, - purchase_revenue, - purchase_to_view_rate, - transactions_per_purchaser, - user_conversion_rate, - how_many_purchased_before, - has_abandoned_cart, - active_users_past_1_day, - active_users_past_2_day, - active_users_past_3_day, - active_users_past_4_day, - active_users_past_5_day, - active_users_past_6_day, - active_users_past_7_day, - active_users_past_8_14_day, - --active_users_past_15_30_day, - purchases_past_1_day, - purchases_past_2_day, - purchases_past_3_day, - purchases_past_4_day, - purchases_past_5_day, - purchases_past_6_day, - purchases_past_7_day, - purchases_past_8_14_day, - --purchases_past_15_30_day, - visits_past_1_day, - visits_past_2_day, - visits_past_3_day, - visits_past_4_day, - visits_past_5_day, - visits_past_6_day, - visits_past_7_day, - visits_past_8_14_day, - --visits_past_15_30_day, - view_items_past_1_day, - view_items_past_2_day, - view_items_past_3_day, - view_items_past_4_day, - view_items_past_5_day, - view_items_past_6_day, - view_items_past_7_day, - view_items_past_8_14_day, - --view_items_past_15_30_day, - add_to_carts_past_1_day, - add_to_carts_past_2_day, - add_to_carts_past_3_day, - add_to_carts_past_4_day, - add_to_carts_past_5_day, - add_to_carts_past_6_day, - add_to_carts_past_7_day, - add_to_carts_past_8_14_day, - --add_to_carts_past_15_30_day, - checkouts_past_1_day, - checkouts_past_2_day, - checkouts_past_3_day, - checkouts_past_4_day, - checkouts_past_5_day, - checkouts_past_6_day, - checkouts_past_7_day, - checkouts_past_8_14_day, - --checkouts_past_15_30_day, - purchasers_users, - average_daily_purchasers, - active_users, - DAU, - MAU, - WAU, - dau_per_mau, - dau_per_wau, - wau_per_mau, - users_engagement_duration_seconds, - average_engagement_time, - average_engagement_time_per_session, - average_sessions_per_user, - ARPPU, - ARPU, - average_daily_revenue, - max_daily_revenue, - min_daily_revenue, - new_users, - returning_users, - first_time_purchasers, - first_time_purchaser_conversion, - first_time_purchasers_per_new_user, - avg_user_conversion_rate, - avg_session_conversion_rate, - will_purchase) -OPTIONS( - expiration_timestamp=TIMESTAMP_ADD(CURRENT_TIMESTAMP(), INTERVAL {{expiration_duration_hours}} HOUR), - friendly_name="v_purchase_propensity_training_15_15", - description="View Purchase Propensity Training dataset using 15 days back to predict 15 days ahead. View expires after 48h and should run daily.", - labels=[("org_unit", "development")] -) AS -SELECT DISTINCT - processed_timestamp, - data_split, - feature_date, - user_pseudo_id, - user_id, - month_of_the_year, - week_of_the_year, - day_of_the_month, - day_of_week, - hour_of_day, - nth_day, - nth_hour, - nth_week, - nth_month, - user_ltv_revenue, - device_category, - device_mobile_brand_name, - device_mobile_model_name, - device_os, - device_os_version, - device_language, - device_web_browser, - device_web_browser_version, - geo_sub_continent, - geo_country, - geo_region, - geo_city, - geo_metro, - last_traffic_source_medium, - last_traffic_source_name, - last_traffic_source_source, - first_traffic_source_medium, - first_traffic_source_name, - first_traffic_source_source, - has_signed_in_with_user_id, - engagement_rate, - engaged_sessions_per_user, - session_conversion_rate, - bounces, - bounce_rate_per_user, - sessions_per_user, - avg_views_per_session, - sum_engagement_time_seconds, - avg_engagement_time_seconds, - new_visits, - returning_visits, - add_to_carts, - cart_to_view_rate, - checkouts, - ecommerce_purchases, - ecommerce_quantity, - ecommerce_revenue, - item_revenue, - item_quantity, - item_view_events, - items_clicked_in_promotion, - items_clicked_in_list, - items_checked_out, - items_added_to_cart, - item_list_view_events, - purchase_revenue, - purchase_to_view_rate, - transactions_per_purchaser, - user_conversion_rate, - how_many_purchased_before, - has_abandoned_cart, - active_users_past_1_day, - active_users_past_2_day, - active_users_past_3_day, - active_users_past_4_day, - active_users_past_5_day, - active_users_past_6_day, - active_users_past_7_day, - active_users_past_8_14_day, - ---active_users_past_15_30_day, - purchases_past_1_day, - purchases_past_2_day, - purchases_past_3_day, - purchases_past_4_day, - purchases_past_5_day, - purchases_past_6_day, - purchases_past_7_day, - purchases_past_8_14_day, - --purchases_past_15_30_day, - visits_past_1_day, - visits_past_2_day, - visits_past_3_day, - visits_past_4_day, - visits_past_5_day, - visits_past_6_day, - visits_past_7_day, - visits_past_8_14_day, - --visits_past_15_30_day, - view_items_past_1_day, - view_items_past_2_day, - view_items_past_3_day, - view_items_past_4_day, - view_items_past_5_day, - view_items_past_6_day, - view_items_past_7_day, - view_items_past_8_14_day, - --view_items_past_15_30_day, - add_to_carts_past_1_day, - add_to_carts_past_2_day, - add_to_carts_past_3_day, - add_to_carts_past_4_day, - add_to_carts_past_5_day, - add_to_carts_past_6_day, - add_to_carts_past_7_day, - add_to_carts_past_8_14_day, - --add_to_carts_past_15_30_day, - checkouts_past_1_day, - checkouts_past_2_day, - checkouts_past_3_day, - checkouts_past_4_day, - checkouts_past_5_day, - checkouts_past_6_day, - checkouts_past_7_day, - checkouts_past_8_14_day, - --checkouts_past_15_30_day, - purchasers_users, - average_daily_purchasers, - active_users, - DAU, - MAU, - WAU, - dau_per_mau, - dau_per_wau, - wau_per_mau, - users_engagement_duration_seconds, - average_engagement_time, - average_engagement_time_per_session, - average_sessions_per_user, - ARPPU, - ARPU, - average_daily_revenue, - max_daily_revenue, - min_daily_revenue, - new_users, - returning_users, - first_time_purchasers, - first_time_purchaser_conversion, - first_time_purchasers_per_new_user, - avg_user_conversion_rate, - avg_session_conversion_rate, - will_purchase - FROM `{{project_id}}.{{dataset}}.purchase_propensity_training_15_15`; - - - -CREATE OR REPLACE VIEW `{{project_id}}.{{dataset}}.v_purchase_propensity_training_15_7` -( - processed_timestamp, - data_split, - feature_date, - user_pseudo_id, - user_id, - month_of_the_year, - week_of_the_year, - day_of_the_month, - day_of_week, - hour_of_day, - nth_day, - nth_hour, - nth_week, - nth_month, - user_ltv_revenue, - device_category, - device_mobile_brand_name, - device_mobile_model_name, - device_os, - device_os_version, - device_language, - device_web_browser, - device_web_browser_version, - geo_sub_continent, - geo_country, - geo_region, - geo_city, - geo_metro, - last_traffic_source_medium, - last_traffic_source_name, - last_traffic_source_source, - first_traffic_source_medium, - first_traffic_source_name, - first_traffic_source_source, - has_signed_in_with_user_id, - engagement_rate, - engaged_sessions_per_user, - session_conversion_rate, - bounces, - bounce_rate_per_user, - sessions_per_user, - avg_views_per_session, - sum_engagement_time_seconds, - avg_engagement_time_seconds, - new_visits, - returning_visits, - add_to_carts, - cart_to_view_rate, - checkouts, - ecommerce_purchases, - ecommerce_quantity, - ecommerce_revenue, - item_revenue, - item_quantity, - item_view_events, - items_clicked_in_promotion, - items_clicked_in_list, - items_checked_out, - items_added_to_cart, - item_list_view_events, - purchase_revenue, - purchase_to_view_rate, - transactions_per_purchaser, - user_conversion_rate, - how_many_purchased_before, - has_abandoned_cart, - active_users_past_1_day, - active_users_past_2_day, - active_users_past_3_day, - active_users_past_4_day, - active_users_past_5_day, - active_users_past_6_day, - active_users_past_7_day, - active_users_past_8_14_day, - --active_users_past_15_30_day, - purchases_past_1_day, - purchases_past_2_day, - purchases_past_3_day, - purchases_past_4_day, - purchases_past_5_day, - purchases_past_6_day, - purchases_past_7_day, - purchases_past_8_14_day, - --purchases_past_15_30_day, - visits_past_1_day, - visits_past_2_day, - visits_past_3_day, - visits_past_4_day, - visits_past_5_day, - visits_past_6_day, - visits_past_7_day, - visits_past_8_14_day, - --visits_past_15_30_day, - view_items_past_1_day, - view_items_past_2_day, - view_items_past_3_day, - view_items_past_4_day, - view_items_past_5_day, - view_items_past_6_day, - view_items_past_7_day, - view_items_past_8_14_day, - --view_items_past_15_30_day, - add_to_carts_past_1_day, - add_to_carts_past_2_day, - add_to_carts_past_3_day, - add_to_carts_past_4_day, - add_to_carts_past_5_day, - add_to_carts_past_6_day, - add_to_carts_past_7_day, - add_to_carts_past_8_14_day, - --add_to_carts_past_15_30_day, - checkouts_past_1_day, - checkouts_past_2_day, - checkouts_past_3_day, - checkouts_past_4_day, - checkouts_past_5_day, - checkouts_past_6_day, - checkouts_past_7_day, - checkouts_past_8_14_day, - --checkouts_past_15_30_day, - purchasers_users, - average_daily_purchasers, - active_users, - DAU, - MAU, - WAU, - dau_per_mau, - dau_per_wau, - wau_per_mau, - users_engagement_duration_seconds, - average_engagement_time, - average_engagement_time_per_session, - average_sessions_per_user, - ARPPU, - ARPU, - average_daily_revenue, - max_daily_revenue, - min_daily_revenue, - new_users, - returning_users, - first_time_purchasers, - first_time_purchaser_conversion, - first_time_purchasers_per_new_user, - avg_user_conversion_rate, - avg_session_conversion_rate, - will_purchase - ) -OPTIONS( - expiration_timestamp=TIMESTAMP_ADD(CURRENT_TIMESTAMP(), INTERVAL {{expiration_duration_hours}} HOUR), - friendly_name="v_purchase_propensity_training_15_7", - description="View Purchase Propensity Training dataset using 15 days back to predict 7 days ahead. View expires after 48h and should run daily.", - labels=[("org_unit", "development")] -) AS -SELECT DISTINCT - processed_timestamp, - data_split, - feature_date, - user_pseudo_id, - user_id, - month_of_the_year, - week_of_the_year, - day_of_the_month, - day_of_week, - hour_of_day, - nth_day, - nth_hour, - nth_week, - nth_month, - user_ltv_revenue, - device_category, - device_mobile_brand_name, - device_mobile_model_name, - device_os, - device_os_version, - device_language, - device_web_browser, - device_web_browser_version, - geo_sub_continent, - geo_country, - geo_region, - geo_city, - geo_metro, - last_traffic_source_medium, - last_traffic_source_name, - last_traffic_source_source, - first_traffic_source_medium, - first_traffic_source_name, - first_traffic_source_source, - has_signed_in_with_user_id, - engagement_rate, - engaged_sessions_per_user, - session_conversion_rate, - bounces, - bounce_rate_per_user, - sessions_per_user, - avg_views_per_session, - sum_engagement_time_seconds, - avg_engagement_time_seconds, - new_visits, - returning_visits, - add_to_carts, - cart_to_view_rate, - checkouts, - ecommerce_purchases, - ecommerce_quantity, - ecommerce_revenue, - item_revenue, - item_quantity, - item_view_events, - items_clicked_in_promotion, - items_clicked_in_list, - items_checked_out, - items_added_to_cart, - item_list_view_events, - purchase_revenue, - purchase_to_view_rate, - transactions_per_purchaser, - user_conversion_rate, - how_many_purchased_before, - has_abandoned_cart, - active_users_past_1_day, - active_users_past_2_day, - active_users_past_3_day, - active_users_past_4_day, - active_users_past_5_day, - active_users_past_6_day, - active_users_past_7_day, - active_users_past_8_14_day, - --active_users_past_15_30_day, - purchases_past_1_day, - purchases_past_2_day, - purchases_past_3_day, - purchases_past_4_day, - purchases_past_5_day, - purchases_past_6_day, - purchases_past_7_day, - purchases_past_8_14_day, - --purchases_past_15_30_day, - visits_past_1_day, - visits_past_2_day, - visits_past_3_day, - visits_past_4_day, - visits_past_5_day, - visits_past_6_day, - visits_past_7_day, - visits_past_8_14_day, - --visits_past_15_30_day, - view_items_past_1_day, - view_items_past_2_day, - view_items_past_3_day, - view_items_past_4_day, - view_items_past_5_day, - view_items_past_6_day, - view_items_past_7_day, - view_items_past_8_14_day, - --view_items_past_15_30_day, - add_to_carts_past_1_day, - add_to_carts_past_2_day, - add_to_carts_past_3_day, - add_to_carts_past_4_day, - add_to_carts_past_5_day, - add_to_carts_past_6_day, - add_to_carts_past_7_day, - add_to_carts_past_8_14_day, - --add_to_carts_past_15_30_day, - checkouts_past_1_day, - checkouts_past_2_day, - checkouts_past_3_day, - checkouts_past_4_day, - checkouts_past_5_day, - checkouts_past_6_day, - checkouts_past_7_day, - checkouts_past_8_14_day, - --checkouts_past_15_30_day, - purchasers_users, - average_daily_purchasers, - active_users, - DAU, - MAU, - WAU, - dau_per_mau, - dau_per_wau, - wau_per_mau, - users_engagement_duration_seconds, - average_engagement_time, - average_engagement_time_per_session, - average_sessions_per_user, - ARPPU, - ARPU, - average_daily_revenue, - max_daily_revenue, - min_daily_revenue, - new_users, - returning_users, - first_time_purchasers, - first_time_purchaser_conversion, - first_time_purchasers_per_new_user, - avg_user_conversion_rate, - avg_session_conversion_rate, - will_purchase - FROM `{{project_id}}.{{dataset}}.purchase_propensity_training_15_7`; - - - CREATE OR REPLACE VIEW `{{project_id}}.{{dataset}}.v_purchase_propensity_training_30_15` -(processed_timestamp, - data_split, - feature_date, - user_pseudo_id, - user_id, - month_of_the_year, - week_of_the_year, - day_of_the_month, - day_of_week, - hour_of_day, - nth_day, - nth_hour, - nth_week, - nth_month, - user_ltv_revenue, - device_category, - device_mobile_brand_name, - device_mobile_model_name, - device_os, - device_os_version, - device_language, - device_web_browser, - device_web_browser_version, - geo_sub_continent, - geo_country, - geo_region, - geo_city, - geo_metro, - last_traffic_source_medium, - last_traffic_source_name, - last_traffic_source_source, - first_traffic_source_medium, - first_traffic_source_name, - first_traffic_source_source, - has_signed_in_with_user_id, - engagement_rate, - engaged_sessions_per_user, - session_conversion_rate, - bounces, - bounce_rate_per_user, - sessions_per_user, - avg_views_per_session, - sum_engagement_time_seconds, - avg_engagement_time_seconds, - new_visits, - returning_visits, - add_to_carts, - cart_to_view_rate, - checkouts, - ecommerce_purchases, - ecommerce_quantity, - ecommerce_revenue, - item_revenue, - item_quantity, - item_view_events, - items_clicked_in_promotion, - items_clicked_in_list, - items_checked_out, - items_added_to_cart, - item_list_view_events, - purchase_revenue, - purchase_to_view_rate, - transactions_per_purchaser, - user_conversion_rate, - how_many_purchased_before, - has_abandoned_cart, - active_users_past_1_day, - active_users_past_2_day, - active_users_past_3_day, - active_users_past_4_day, - active_users_past_5_day, - active_users_past_6_day, - active_users_past_7_day, - active_users_past_8_14_day, - active_users_past_15_30_day, - purchases_past_1_day, - purchases_past_2_day, - purchases_past_3_day, - purchases_past_4_day, - purchases_past_5_day, - purchases_past_6_day, - purchases_past_7_day, - purchases_past_8_14_day, - purchases_past_15_30_day, - visits_past_1_day, - visits_past_2_day, - visits_past_3_day, - visits_past_4_day, - visits_past_5_day, - visits_past_6_day, - visits_past_7_day, - visits_past_8_14_day, - visits_past_15_30_day, - view_items_past_1_day, - view_items_past_2_day, - view_items_past_3_day, - view_items_past_4_day, - view_items_past_5_day, - view_items_past_6_day, - view_items_past_7_day, - view_items_past_8_14_day, - view_items_past_15_30_day, - add_to_carts_past_1_day, - add_to_carts_past_2_day, - add_to_carts_past_3_day, - add_to_carts_past_4_day, - add_to_carts_past_5_day, - add_to_carts_past_6_day, - add_to_carts_past_7_day, - add_to_carts_past_8_14_day, - add_to_carts_past_15_30_day, - checkouts_past_1_day, - checkouts_past_2_day, - checkouts_past_3_day, - checkouts_past_4_day, - checkouts_past_5_day, - checkouts_past_6_day, - checkouts_past_7_day, - checkouts_past_8_14_day, - checkouts_past_15_30_day, - purchasers_users, - average_daily_purchasers, - active_users, - DAU, - MAU, - WAU, - dau_per_mau, - dau_per_wau, - wau_per_mau, - users_engagement_duration_seconds, - average_engagement_time, - average_engagement_time_per_session, - average_sessions_per_user, - ARPPU, - ARPU, - average_daily_revenue, - max_daily_revenue, - min_daily_revenue, - new_users, - returning_users, - first_time_purchasers, - first_time_purchaser_conversion, - first_time_purchasers_per_new_user, - avg_user_conversion_rate, - avg_session_conversion_rate, - will_purchase) -OPTIONS( - expiration_timestamp=TIMESTAMP_ADD(CURRENT_TIMESTAMP(), INTERVAL {{expiration_duration_hours}} HOUR), - friendly_name="v_purchase_propensity_training_30_15", - description="View Purchase Propensity Training dataset using 30 days back to predict 15 days ahead. View expires after 48h and should run daily.", - labels=[("org_unit", "development")] -) AS -SELECT DISTINCT - processed_timestamp, - data_split, - feature_date, - user_pseudo_id, - user_id, - month_of_the_year, - week_of_the_year, - day_of_the_month, - day_of_week, - hour_of_day, - nth_day, - nth_hour, - nth_week, - nth_month, - user_ltv_revenue, - device_category, - device_mobile_brand_name, - device_mobile_model_name, - device_os, - device_os_version, - device_language, - device_web_browser, - device_web_browser_version, - geo_sub_continent, - geo_country, - geo_region, - geo_city, - geo_metro, - last_traffic_source_medium, - last_traffic_source_name, - last_traffic_source_source, - first_traffic_source_medium, - first_traffic_source_name, - first_traffic_source_source, - has_signed_in_with_user_id, - engagement_rate, - engaged_sessions_per_user, - session_conversion_rate, - bounces, - bounce_rate_per_user, - sessions_per_user, - avg_views_per_session, - sum_engagement_time_seconds, - avg_engagement_time_seconds, - new_visits, - returning_visits, - add_to_carts, - cart_to_view_rate, - checkouts, - ecommerce_purchases, - ecommerce_quantity, - ecommerce_revenue, - item_revenue, - item_quantity, - item_view_events, - items_clicked_in_promotion, - items_clicked_in_list, - items_checked_out, - items_added_to_cart, - item_list_view_events, - purchase_revenue, - purchase_to_view_rate, - transactions_per_purchaser, - user_conversion_rate, - how_many_purchased_before, - has_abandoned_cart, - active_users_past_1_day, - active_users_past_2_day, - active_users_past_3_day, - active_users_past_4_day, - active_users_past_5_day, - active_users_past_6_day, - active_users_past_7_day, - active_users_past_8_14_day, - active_users_past_15_30_day, - purchases_past_1_day, - purchases_past_2_day, - purchases_past_3_day, - purchases_past_4_day, - purchases_past_5_day, - purchases_past_6_day, - purchases_past_7_day, - purchases_past_8_14_day, - purchases_past_15_30_day, - visits_past_1_day, - visits_past_2_day, - visits_past_3_day, - visits_past_4_day, - visits_past_5_day, - visits_past_6_day, - visits_past_7_day, - visits_past_8_14_day, - visits_past_15_30_day, - view_items_past_1_day, - view_items_past_2_day, - view_items_past_3_day, - view_items_past_4_day, - view_items_past_5_day, - view_items_past_6_day, - view_items_past_7_day, - view_items_past_8_14_day, - view_items_past_15_30_day, - add_to_carts_past_1_day, - add_to_carts_past_2_day, - add_to_carts_past_3_day, - add_to_carts_past_4_day, - add_to_carts_past_5_day, - add_to_carts_past_6_day, - add_to_carts_past_7_day, - add_to_carts_past_8_14_day, - add_to_carts_past_15_30_day, - checkouts_past_1_day, - checkouts_past_2_day, - checkouts_past_3_day, - checkouts_past_4_day, - checkouts_past_5_day, - checkouts_past_6_day, - checkouts_past_7_day, - checkouts_past_8_14_day, - checkouts_past_15_30_day, - purchasers_users, - average_daily_purchasers, - active_users, - DAU, - MAU, - WAU, - dau_per_mau, - dau_per_wau, - wau_per_mau, - users_engagement_duration_seconds, - average_engagement_time, - average_engagement_time_per_session, - average_sessions_per_user, - ARPPU, - ARPU, - average_daily_revenue, - max_daily_revenue, - min_daily_revenue, - new_users, - returning_users, - first_time_purchasers, - first_time_purchaser_conversion, - first_time_purchasers_per_new_user, - avg_user_conversion_rate, - avg_session_conversion_rate, - will_purchase - FROM `{{project_id}}.{{dataset}}.purchase_propensity_training_30_15`; - -DROP TABLE training_preparation; -DROP TABLE DataForTargetTable; \ No newline at end of file + processed_timestamp, + data_split, + feature_date, + user_pseudo_id, + user_id, + month_of_the_year, + week_of_the_year, + day_of_the_month, + day_of_week, + MIN(user_ltv_revenue) OVER(PARTITION BY user_pseudo_id, feature_date) as user_ltv_revenue, + device_category, + device_mobile_brand_name, + device_mobile_model_name, + device_os, + device_os_version, + device_language, + device_web_browser, + device_web_browser_version, + geo_sub_continent, + geo_country, + geo_region, + geo_city, + geo_metro, + last_traffic_source_medium, + last_traffic_source_name, + last_traffic_source_source, + first_traffic_source_medium, + first_traffic_source_name, + first_traffic_source_source, + has_signed_in_with_user_id, + engagement_rate, + engaged_sessions_per_user, + session_conversion_rate, + bounces, + bounce_rate_per_user, + sessions_per_user, + avg_views_per_session, + sum_engagement_time_seconds, + avg_engagement_time_seconds, + new_visits, + returning_visits, + add_to_carts, + cart_to_view_rate, + checkouts, + ecommerce_purchases, + ecommerce_quantity, + ecommerce_revenue, + item_revenue, + item_quantity, + item_view_events, + items_clicked_in_promotion, + items_clicked_in_list, + items_checked_out, + items_added_to_cart, + item_list_view_events, + purchase_revenue, + purchase_to_view_rate, + transactions_per_purchaser, + user_conversion_rate, + how_many_purchased_before, + has_abandoned_cart, + active_users_past_1_day, + active_users_past_2_day, + active_users_past_3_day, + active_users_past_4_day, + active_users_past_5_day, + active_users_past_6_day, + active_users_past_7_day, + active_users_past_8_14_day, + --active_users_past_15_30_day, + purchases_past_1_day, + purchases_past_2_day, + purchases_past_3_day, + purchases_past_4_day, + purchases_past_5_day, + purchases_past_6_day, + purchases_past_7_day, + purchases_past_8_14_day, + --purchases_past_15_30_day, + visits_past_1_day, + visits_past_2_day, + visits_past_3_day, + visits_past_4_day, + visits_past_5_day, + visits_past_6_day, + visits_past_7_day, + visits_past_8_14_day, + --visits_past_15_30_day, + view_items_past_1_day, + view_items_past_2_day, + view_items_past_3_day, + view_items_past_4_day, + view_items_past_5_day, + view_items_past_6_day, + view_items_past_7_day, + view_items_past_8_14_day, + --view_items_past_15_30_day, + add_to_carts_past_1_day, + add_to_carts_past_2_day, + add_to_carts_past_3_day, + add_to_carts_past_4_day, + add_to_carts_past_5_day, + add_to_carts_past_6_day, + add_to_carts_past_7_day, + add_to_carts_past_8_14_day, + --add_to_carts_past_15_30_day, + checkouts_past_1_day, + checkouts_past_2_day, + checkouts_past_3_day, + checkouts_past_4_day, + checkouts_past_5_day, + checkouts_past_6_day, + checkouts_past_7_day, + checkouts_past_8_14_day, + --checkouts_past_15_30_day, + purchasers_users, + average_daily_purchasers, + active_users, + DAU, + MAU, + WAU, + dau_per_mau, + dau_per_wau, + wau_per_mau, + users_engagement_duration_seconds, + average_engagement_time, + average_engagement_time_per_session, + average_sessions_per_user, + ARPPU, + ARPU, + average_daily_revenue, + max_daily_revenue, + min_daily_revenue, + new_users, + returning_users, + first_time_purchasers, + first_time_purchaser_conversion, + first_time_purchasers_per_new_user, + avg_user_conversion_rate, + avg_session_conversion_rate, + MAX(will_purchase) OVER(PARTITION BY user_pseudo_id, feature_date) as will_purchase + FROM `{{project_id}}.{{dataset}}.purchase_propensity_training_15_7`; + + + CREATE OR REPLACE VIEW `{{project_id}}.{{dataset}}.v_purchase_propensity_training_30_15` + (processed_timestamp, + data_split, + feature_date, + user_pseudo_id, + user_id, + month_of_the_year, + week_of_the_year, + day_of_the_month, + day_of_week, + user_ltv_revenue, + device_category, + device_mobile_brand_name, + device_mobile_model_name, + device_os, + device_os_version, + device_language, + device_web_browser, + device_web_browser_version, + geo_sub_continent, + geo_country, + geo_region, + geo_city, + geo_metro, + last_traffic_source_medium, + last_traffic_source_name, + last_traffic_source_source, + first_traffic_source_medium, + first_traffic_source_name, + first_traffic_source_source, + has_signed_in_with_user_id, + engagement_rate, + engaged_sessions_per_user, + session_conversion_rate, + bounces, + bounce_rate_per_user, + sessions_per_user, + avg_views_per_session, + sum_engagement_time_seconds, + avg_engagement_time_seconds, + new_visits, + returning_visits, + add_to_carts, + cart_to_view_rate, + checkouts, + ecommerce_purchases, + ecommerce_quantity, + ecommerce_revenue, + item_revenue, + item_quantity, + item_view_events, + items_clicked_in_promotion, + items_clicked_in_list, + items_checked_out, + items_added_to_cart, + item_list_view_events, + purchase_revenue, + purchase_to_view_rate, + transactions_per_purchaser, + user_conversion_rate, + how_many_purchased_before, + has_abandoned_cart, + active_users_past_1_day, + active_users_past_2_day, + active_users_past_3_day, + active_users_past_4_day, + active_users_past_5_day, + active_users_past_6_day, + active_users_past_7_day, + active_users_past_8_14_day, + active_users_past_15_30_day, + purchases_past_1_day, + purchases_past_2_day, + purchases_past_3_day, + purchases_past_4_day, + purchases_past_5_day, + purchases_past_6_day, + purchases_past_7_day, + purchases_past_8_14_day, + purchases_past_15_30_day, + visits_past_1_day, + visits_past_2_day, + visits_past_3_day, + visits_past_4_day, + visits_past_5_day, + visits_past_6_day, + visits_past_7_day, + visits_past_8_14_day, + visits_past_15_30_day, + view_items_past_1_day, + view_items_past_2_day, + view_items_past_3_day, + view_items_past_4_day, + view_items_past_5_day, + view_items_past_6_day, + view_items_past_7_day, + view_items_past_8_14_day, + view_items_past_15_30_day, + add_to_carts_past_1_day, + add_to_carts_past_2_day, + add_to_carts_past_3_day, + add_to_carts_past_4_day, + add_to_carts_past_5_day, + add_to_carts_past_6_day, + add_to_carts_past_7_day, + add_to_carts_past_8_14_day, + add_to_carts_past_15_30_day, + checkouts_past_1_day, + checkouts_past_2_day, + checkouts_past_3_day, + checkouts_past_4_day, + checkouts_past_5_day, + checkouts_past_6_day, + checkouts_past_7_day, + checkouts_past_8_14_day, + checkouts_past_15_30_day, + purchasers_users, + average_daily_purchasers, + active_users, + DAU, + MAU, + WAU, + dau_per_mau, + dau_per_wau, + wau_per_mau, + users_engagement_duration_seconds, + average_engagement_time, + average_engagement_time_per_session, + average_sessions_per_user, + ARPPU, + ARPU, + average_daily_revenue, + max_daily_revenue, + min_daily_revenue, + new_users, + returning_users, + first_time_purchasers, + first_time_purchaser_conversion, + first_time_purchasers_per_new_user, + avg_user_conversion_rate, + avg_session_conversion_rate, + will_purchase) + OPTIONS( + expiration_timestamp=TIMESTAMP_ADD(CURRENT_TIMESTAMP(), INTERVAL 48 HOUR), + friendly_name="v_purchase_propensity_training_30_15", + description="View Purchase Propensity Training dataset using 30 days back to predict 15 days ahead. View expires after 48h and should run daily.", + labels=[("org_unit", "development")] + ) AS + SELECT DISTINCT + processed_timestamp, + data_split, + feature_date, + user_pseudo_id, + user_id, + month_of_the_year, + week_of_the_year, + day_of_the_month, + day_of_week, + MIN(user_ltv_revenue) OVER(PARTITION BY user_pseudo_id, feature_date) as user_ltv_revenue, + device_category, + device_mobile_brand_name, + device_mobile_model_name, + device_os, + device_os_version, + device_language, + device_web_browser, + device_web_browser_version, + geo_sub_continent, + geo_country, + geo_region, + geo_city, + geo_metro, + last_traffic_source_medium, + last_traffic_source_name, + last_traffic_source_source, + first_traffic_source_medium, + first_traffic_source_name, + first_traffic_source_source, + has_signed_in_with_user_id, + engagement_rate, + engaged_sessions_per_user, + session_conversion_rate, + bounces, + bounce_rate_per_user, + sessions_per_user, + avg_views_per_session, + sum_engagement_time_seconds, + avg_engagement_time_seconds, + new_visits, + returning_visits, + add_to_carts, + cart_to_view_rate, + checkouts, + ecommerce_purchases, + ecommerce_quantity, + ecommerce_revenue, + item_revenue, + item_quantity, + item_view_events, + items_clicked_in_promotion, + items_clicked_in_list, + items_checked_out, + items_added_to_cart, + item_list_view_events, + purchase_revenue, + purchase_to_view_rate, + transactions_per_purchaser, + user_conversion_rate, + how_many_purchased_before, + has_abandoned_cart, + active_users_past_1_day, + active_users_past_2_day, + active_users_past_3_day, + active_users_past_4_day, + active_users_past_5_day, + active_users_past_6_day, + active_users_past_7_day, + active_users_past_8_14_day, + active_users_past_15_30_day, + purchases_past_1_day, + purchases_past_2_day, + purchases_past_3_day, + purchases_past_4_day, + purchases_past_5_day, + purchases_past_6_day, + purchases_past_7_day, + purchases_past_8_14_day, + purchases_past_15_30_day, + visits_past_1_day, + visits_past_2_day, + visits_past_3_day, + visits_past_4_day, + visits_past_5_day, + visits_past_6_day, + visits_past_7_day, + visits_past_8_14_day, + visits_past_15_30_day, + view_items_past_1_day, + view_items_past_2_day, + view_items_past_3_day, + view_items_past_4_day, + view_items_past_5_day, + view_items_past_6_day, + view_items_past_7_day, + view_items_past_8_14_day, + view_items_past_15_30_day, + add_to_carts_past_1_day, + add_to_carts_past_2_day, + add_to_carts_past_3_day, + add_to_carts_past_4_day, + add_to_carts_past_5_day, + add_to_carts_past_6_day, + add_to_carts_past_7_day, + add_to_carts_past_8_14_day, + add_to_carts_past_15_30_day, + checkouts_past_1_day, + checkouts_past_2_day, + checkouts_past_3_day, + checkouts_past_4_day, + checkouts_past_5_day, + checkouts_past_6_day, + checkouts_past_7_day, + checkouts_past_8_14_day, + checkouts_past_15_30_day, + purchasers_users, + average_daily_purchasers, + active_users, + DAU, + MAU, + WAU, + dau_per_mau, + dau_per_wau, + wau_per_mau, + users_engagement_duration_seconds, + average_engagement_time, + average_engagement_time_per_session, + average_sessions_per_user, + ARPPU, + ARPU, + average_daily_revenue, + max_daily_revenue, + min_daily_revenue, + new_users, + returning_users, + first_time_purchasers, + first_time_purchaser_conversion, + first_time_purchasers_per_new_user, + avg_user_conversion_rate, + avg_session_conversion_rate, + MAX(will_purchase) OVER(PARTITION BY user_pseudo_id, feature_date) as will_purchase + FROM `{{project_id}}.{{dataset}}.purchase_propensity_training_30_15`; + + DROP TABLE training_preparation; + DROP TABLE DataForTargetTable; +END; \ No newline at end of file diff --git a/sql/procedure/user_dimensions.sqlx b/sql/procedure/user_dimensions.sqlx index 6d9f6e15..5716f071 100644 --- a/sql/procedure/user_dimensions.sqlx +++ b/sql/procedure/user_dimensions.sqlx @@ -16,97 +16,53 @@ SET input_date = DATE_SUB(input_date, INTERVAL 1 DAY); SET end_date = DATE_SUB(end_date, INTERVAL 1 DAY); --- What is the user-per-day dimensions? -CREATE TEMP TABLE user_dimensions_event_scoped as ( -SELECT DISTINCT - user_pseudo_id, - user_id, - FIRST_VALUE(format_date('%m',event_date)) OVER(PARTITION BY user_pseudo_id ORDER BY event_timestamp DESC) AS month_of_the_year, - FIRST_VALUE(format_date('%U',event_date)) OVER(PARTITION BY user_pseudo_id ORDER BY event_timestamp DESC) AS week_of_the_year, - FIRST_VALUE(format_date('%d',event_date)) OVER(PARTITION BY user_pseudo_id ORDER BY event_timestamp DESC) AS day_of_the_month, - FIRST_VALUE(format_date('%w',event_date)) OVER(PARTITION BY user_pseudo_id ORDER BY event_timestamp DESC) AS day_of_week, - FIRST_VALUE(format("%02d",extract(hour from event_timestamp))) OVER(PARTITION BY user_pseudo_id ORDER BY event_timestamp DESC) as hour_of_day, - FIRST_VALUE(DATE_DIFF(event_date, end_date, DAY)) OVER(PARTITION BY user_pseudo_id ORDER BY event_timestamp DESC) AS nth_day, - FIRST_VALUE(TIMESTAMP_DIFF(event_timestamp, TIMESTAMP(end_date), HOUR)) OVER(PARTITION BY user_pseudo_id ORDER BY event_timestamp DESC) AS nth_hour, - FIRST_VALUE(DATE_DIFF(event_date, end_date, WEEK)) OVER(PARTITION BY user_pseudo_id ORDER BY event_timestamp DESC) AS nth_week, - FIRST_VALUE(DATE_DIFF(event_date, end_date, MONTH)) OVER(PARTITION BY user_pseudo_id ORDER BY event_timestamp DESC) AS nth_month, - FIRST_VALUE(user_ltv_revenue) OVER(PARTITION BY user_pseudo_id ORDER BY event_timestamp DESC) AS user_ltv_revenue, - FIRST_VALUE(T.traffic_source_medium) OVER(PARTITION BY user_pseudo_id ORDER BY event_timestamp DESC) AS last_traffic_source_medium, - FIRST_VALUE(T.traffic_source_name) OVER(PARTITION BY user_pseudo_id ORDER BY event_timestamp DESC) AS last_traffic_source_name, - FIRST_VALUE(T.traffic_source) OVER(PARTITION BY user_pseudo_id ORDER BY event_timestamp DESC) AS last_traffic_source_source, - FIRST_VALUE(CASE WHEN (TIMESTAMP_DIFF(event_timestamp, user_first_touch_timestamp, DAY) < 7) OR (user_first_touch_timestamp IS NULL) THEN 'new' ELSE 'existing' END) OVER(PARTITION BY user_pseudo_id ORDER BY event_timestamp DESC) AS new_or_established_user -FROM `{{mds_project_id}}.{{mds_dataset}}.event` as E -INNER JOIN `{{mds_project_id}}.{{mds_dataset}}.traffic_source` as T -ON E.traffic_source_id = T.traffic_source_id -INNER JOIN `{{mds_project_id}}.{{mds_dataset}}.device` as D -ON E.device_type_id = D.device_type_id -WHERE ga_session_id IS NOT NULL - AND D.device_os IS NOT NULL - AND event_date BETWEEN end_date AND input_date -); - -CREATE TEMP TABLE user_dimensions_user_scoped as ( -SELECT DISTINCT - user_pseudo_id, - user_id, - FIRST_VALUE(L.subcontinent) OVER(PARTITION BY user_pseudo_id ORDER BY event_timestamp DESC) as geo_sub_continent, - FIRST_VALUE(L.country) OVER(PARTITION BY user_pseudo_id ORDER BY event_timestamp DESC) as geo_country, - FIRST_VALUE(L.region) OVER(PARTITION BY user_pseudo_id ORDER BY event_timestamp DESC) as geo_region, - FIRST_VALUE(L.city) OVER(PARTITION BY user_pseudo_id ORDER BY event_timestamp DESC) as geo_city, - FIRST_VALUE(L.metro) OVER(PARTITION BY user_pseudo_id ORDER BY event_timestamp DESC) as geo_metro, - FIRST_VALUE(T.traffic_source_medium) OVER(PARTITION BY user_pseudo_id ORDER BY event_timestamp ASC) AS first_traffic_source_medium, - FIRST_VALUE(T.traffic_source_name) OVER(PARTITION BY user_pseudo_id ORDER BY event_timestamp ASC) AS first_traffic_source_name, - FIRST_VALUE(T.traffic_source) OVER(PARTITION BY user_pseudo_id ORDER BY event_timestamp ASC) AS first_traffic_source_source -FROM `{{mds_project_id}}.{{mds_dataset}}.event` as E -INNER JOIN `{{mds_project_id}}.{{mds_dataset}}.location` as L -ON E.location_id = L.location_id -INNER JOIN `{{mds_project_id}}.{{mds_dataset}}.traffic_source` as T -ON E.traffic_source_id = T.traffic_source_id -INNER JOIN `{{mds_project_id}}.{{mds_dataset}}.device` as D -ON E.device_type_id = D.device_type_id -WHERE ga_session_id IS NOT NULL - AND D.device_os IS NOT NULL - AND event_date BETWEEN end_date AND input_date -); - -CREATE TEMP TABLE user_dimensions_session_scoped as ( -SELECT DISTINCT +CREATE OR REPLACE TEMP TABLE user_dimensions_event_session_scoped as ( + SELECT DISTINCT user_pseudo_id, - user_id, - MAX(CASE WHEN user_id IS NOT NULL THEN TRUE ELSE FALSE END) OVER(PARTITION BY user_pseudo_id, session_id ORDER BY event_date ASC) AS has_signed_in_with_user_id, - FIRST_VALUE(category) OVER(PARTITION BY user_pseudo_id, session_id ORDER BY event_date DESC) as device_category, - FIRST_VALUE(mobile_brand_name) OVER(PARTITION BY user_pseudo_id, session_id ORDER BY event_date DESC) as device_mobile_brand_name, - FIRST_VALUE(mobile_model_name) OVER(PARTITION BY user_pseudo_id, session_id ORDER BY event_date DESC) as device_mobile_model_name, - FIRST_VALUE(operating_system) OVER(PARTITION BY user_pseudo_id, session_id ORDER BY event_date DESC) as device_os, - --FIRST_VALUE(operating_system_version) OVER(PARTITION BY user_pseudo_id, session_id ORDER BY event_date DESC) as device_os_version, - FIRST_VALUE(SPLIT(operating_system_version, '.')[OFFSET(0)]) OVER(PARTITION BY user_pseudo_id, session_id ORDER BY event_date DESC) as device_os_version, - FIRST_VALUE(language) OVER(PARTITION BY user_pseudo_id, session_id ORDER BY event_date DESC) AS device_language, - FIRST_VALUE(browser) OVER(PARTITION BY user_pseudo_id, session_id ORDER BY event_date DESC) as device_web_browser, - --FIRST_VALUE(browser_version) OVER(PARTITION BY user_pseudo_id, session_id ORDER BY event_date DESC) as device_web_browser_version, - FIRST_VALUE(SPLIT(browser_version, '.')[OFFSET(0)]) OVER(PARTITION BY user_pseudo_id, session_id ORDER BY event_date DESC) as device_web_browser_version, - FIRST_VALUE(advertising_id) OVER(PARTITION BY user_pseudo_id, session_id ORDER BY event_date DESC) as device_advertising_id -FROM - (SELECT - user_pseudo_id, - user_id, - event_date, - MAX(device_advertising_id) AS advertising_id, - MAX(device_category) AS category, - MAX(device_mobile_brand_name) AS mobile_brand_name, - MAX(device_mobile_model_name) AS mobile_model_name, - MAX(device_os) AS operating_system, - MAX(device_os_version) AS operating_system_version, - MAX(language) AS language, - MAX(device_web_browser) AS browser, - MAX(device_web_browser_version) AS browser_version, - ga_session_id as session_id, - FROM `{{mds_project_id}}.{{mds_dataset}}.event` as E - INNER JOIN `{{mds_project_id}}.{{mds_dataset}}.device` as D - ON E.device_type_id = D.device_type_id - WHERE ga_session_id IS NOT NULL - AND D.device_os IS NOT NULL - AND event_date BETWEEN end_date AND input_date - GROUP BY user_pseudo_id, user_id, ga_session_id, event_date) + LAST_VALUE(format_date('%m',event_date)) OVER(PARTITION BY user_pseudo_id, input_date ORDER BY event_timestamp DESC) AS month_of_the_year, + LAST_VALUE(format_date('%U',event_date)) OVER(PARTITION BY user_pseudo_id, input_date ORDER BY event_timestamp DESC) AS week_of_the_year, + LAST_VALUE(format_date('%d',event_date)) OVER(PARTITION BY user_pseudo_id, input_date ORDER BY event_timestamp DESC) AS day_of_the_month, + LAST_VALUE(format_date('%w',event_date)) OVER(PARTITION BY user_pseudo_id, input_date ORDER BY event_timestamp DESC) AS day_of_week, + --LAST_VALUE(format("%02d",extract(hour from event_timestamp))) OVER(PARTITION BY user_pseudo_id, input_date ORDER BY event_timestamp DESC) as hour_of_day, + --LAST_VALUE(DATE_DIFF(event_date, input_date, DAY)) OVER(PARTITION BY user_pseudo_id, input_date ORDER BY event_timestamp DESC) AS nth_day, + --LAST_VALUE(TIMESTAMP_DIFF(event_timestamp, TIMESTAMP(input_date), HOUR)) OVER(PARTITION BY user_pseudo_id, input_date ORDER BY event_timestamp DESC) AS nth_hour, + --LAST_VALUE(DATE_DIFF(event_date, input_date, WEEK)) OVER(PARTITION BY user_pseudo_id, input_date ORDER BY event_timestamp DESC) AS nth_week, + --LAST_VALUE(DATE_DIFF(event_date, input_date, MONTH)) OVER(PARTITION BY user_pseudo_id, input_date ORDER BY event_timestamp DESC) AS nth_month, + LAST_VALUE(user_ltv_revenue) OVER(PARTITION BY user_pseudo_id, input_date ORDER BY event_timestamp DESC) AS user_ltv_revenue, + LAST_VALUE(T.traffic_source_medium) OVER(PARTITION BY user_pseudo_id, input_date ORDER BY event_timestamp DESC) AS last_traffic_source_medium, + LAST_VALUE(T.traffic_source_name) OVER(PARTITION BY user_pseudo_id, input_date ORDER BY event_timestamp DESC) AS last_traffic_source_name, + LAST_VALUE(T.traffic_source) OVER(PARTITION BY user_pseudo_id, input_date ORDER BY event_timestamp DESC) AS last_traffic_source_source, + LAST_VALUE(CASE WHEN (TIMESTAMP_DIFF(event_timestamp, user_first_touch_timestamp, DAY) < 7) OR (user_first_touch_timestamp IS NULL) THEN 'new' ELSE 'existing' END) OVER(PARTITION BY user_pseudo_id, input_date ORDER BY event_timestamp DESC) AS new_or_established_user, + LAST_VALUE(L.subcontinent) OVER(PARTITION BY user_pseudo_id, input_date ORDER BY event_timestamp DESC) as geo_sub_continent, + LAST_VALUE(L.country) OVER(PARTITION BY user_pseudo_id, input_date ORDER BY event_timestamp DESC) as geo_country, + LAST_VALUE(L.region) OVER(PARTITION BY user_pseudo_id, input_date ORDER BY event_timestamp DESC) as geo_region, + LAST_VALUE(L.city) OVER(PARTITION BY user_pseudo_id, input_date ORDER BY event_timestamp DESC) as geo_city, + LAST_VALUE(L.metro) OVER(PARTITION BY user_pseudo_id, input_date ORDER BY event_timestamp DESC) as geo_metro, + FIRST_VALUE(T.traffic_source_medium) OVER(PARTITION BY user_pseudo_id, input_date ORDER BY event_timestamp ASC) AS first_traffic_source_medium, + FIRST_VALUE(T.traffic_source_name) OVER(PARTITION BY user_pseudo_id, input_date ORDER BY event_timestamp ASC) AS first_traffic_source_name, + FIRST_VALUE(T.traffic_source) OVER(PARTITION BY user_pseudo_id, input_date ORDER BY event_timestamp ASC) AS first_traffic_source_source, + MAX(CASE WHEN user_id IS NOT NULL THEN TRUE ELSE FALSE END) OVER(PARTITION BY user_pseudo_id, input_date ORDER BY event_timestamp ASC) AS has_signed_in_with_user_id, + LAST_VALUE(device_category) OVER(PARTITION BY user_pseudo_id, input_date ORDER BY event_timestamp DESC) as device_category, + LAST_VALUE(device_mobile_brand_name) OVER(PARTITION BY user_pseudo_id, input_date ORDER BY event_timestamp DESC) as device_mobile_brand_name, + LAST_VALUE(device_mobile_model_name) OVER(PARTITION BY user_pseudo_id, input_date ORDER BY event_timestamp DESC) as device_mobile_model_name, + LAST_VALUE(device_os) OVER(PARTITION BY user_pseudo_id, input_date ORDER BY event_timestamp DESC) as device_os, + --LAST_VALUE(device_os_version) OVER(PARTITION BY user_pseudo_id, input_date ORDER BY event_timestamp DESC) as device_os_version, + LAST_VALUE(SPLIT(device_os_version, '.')[OFFSET(0)]) OVER(PARTITION BY user_pseudo_id, input_date ORDER BY event_timestamp DESC) as device_os_version, + LAST_VALUE(language) OVER(PARTITION BY user_pseudo_id, input_date ORDER BY event_timestamp DESC) AS device_language, + LAST_VALUE(device_web_browser) OVER(PARTITION BY user_pseudo_id, input_date ORDER BY event_timestamp DESC) as device_web_browser, + --LAST_VALUE(browser_version) OVER(PARTITION BY user_pseudo_id, input_date ORDER BY event_timestamp DESC) as device_web_browser_version, + LAST_VALUE(SPLIT(device_web_browser_version, '.')[OFFSET(0)]) OVER(PARTITION BY user_pseudo_id, input_date ORDER BY event_timestamp DESC) as device_web_browser_version, + LAST_VALUE(device_advertising_id) OVER(PARTITION BY user_pseudo_id, input_date ORDER BY event_timestamp DESC) as device_advertising_id + FROM `{{mds_project_id}}.{{mds_dataset}}.event` as E + INNER JOIN `{{mds_project_id}}.{{mds_dataset}}.traffic_source` as T + ON E.traffic_source_id = T.traffic_source_id + INNER JOIN `{{mds_project_id}}.{{mds_dataset}}.device` as D + ON E.device_type_id = D.device_type_id + INNER JOIN `{{mds_project_id}}.{{mds_dataset}}.location` as L + ON E.location_id = L.location_id + WHERE event_date BETWEEN end_date AND input_date + AND ga_session_id IS NOT NULL + AND D.device_os IS NOT NULL ); -- All users in the platform @@ -125,56 +81,42 @@ CREATE TEMP TABLE events_users as ( CREATE TEMP TABLE DataForTargetTable AS SELECT DISTINCT - CURRENT_TIMESTAMP() AS processed_timestamp, - input_date as feature_date, - EU.user_pseudo_id, - EU.user_id, - UDES.month_of_the_year, - UDES.week_of_the_year, - UDES.day_of_the_month, - UDES.day_of_week, - UDES.hour_of_day, - UDES.nth_day, - UDES.nth_hour, - UDES.nth_week, - UDES.nth_month, - UDES.user_ltv_revenue, - UDSS.device_category, - UDSS.device_mobile_brand_name, - UDSS.device_mobile_model_name, - UDSS.device_os, - UDSS.device_os_version, - UDSS.device_language, - UDSS.device_web_browser, - UDSS.device_web_browser_version, - UDUS.geo_sub_continent, - UDUS.geo_country, - UDUS.geo_region, - UDUS.geo_city, - UDUS.geo_metro, - UDES.last_traffic_source_medium, - UDES.last_traffic_source_name, - UDES.last_traffic_source_source, - UDUS.first_traffic_source_medium, - UDUS.first_traffic_source_name, - UDUS.first_traffic_source_source, - UDSS.has_signed_in_with_user_id + CURRENT_TIMESTAMP() AS processed_timestamp, + input_date as feature_date, + EU.user_pseudo_id, + MAX(EU.user_id) as user_id, + MAX(UDESS.month_of_the_year) as month_of_the_year, + MAX(UDESS.week_of_the_year) as week_of_the_year, + MAX(UDESS.day_of_the_month) as day_of_the_month, + MAX(UDESS.day_of_week) as day_of_week, + MAX(UDESS.user_ltv_revenue) as user_ltv_revenue, + MAX(UDESS.device_category) as device_category, + MAX(UDESS.device_mobile_brand_name) as device_mobile_brand_name, + MAX(UDESS.device_mobile_model_name) as device_mobile_model_name, + MAX(UDESS.device_os) as device_os, + MAX(UDESS.device_os_version) as device_os_version, + MAX(UDESS.device_language) as device_language, + MAX(UDESS.device_web_browser) as device_web_browser, + MAX(UDESS.device_web_browser_version) as device_web_browser_version, + APPROX_TOP_COUNT(UDESS.geo_sub_continent, 1)[OFFSET(0)].value as geo_sub_continent, + APPROX_TOP_COUNT(UDESS.geo_country, 1)[OFFSET(0)].value as geo_country, + APPROX_TOP_COUNT(UDESS.geo_region, 1)[OFFSET(0)].value as geo_region, + APPROX_TOP_COUNT(UDESS.geo_city, 1)[OFFSET(0)].value as geo_city, + APPROX_TOP_COUNT(UDESS.geo_metro, 1)[OFFSET(0)].value as geo_metro, + MAX(UDESS.last_traffic_source_medium) as last_traffic_source_medium, + MAX(UDESS.last_traffic_source_name) as last_traffic_source_name, + MAX(UDESS.last_traffic_source_source) as last_traffic_source_source, + MAX(UDESS.first_traffic_source_medium) as first_traffic_source_medium, + MAX(UDESS.first_traffic_source_name) as first_traffic_source_name, + MAX(UDESS.first_traffic_source_source) as first_traffic_source_source, + MAX(UDESS.has_signed_in_with_user_id) as has_signed_in_with_user_id FROM events_users EU - INNER JOIN user_dimensions_event_scoped UDES - ON EU.user_pseudo_id = UDES.user_pseudo_id --AND EU.user_id = UDES.user_id - INNER JOIN user_dimensions_user_scoped UDUS - ON EU.user_pseudo_id = UDUS.user_pseudo_id --AND EU.user_id = UDUS.user_id - INNER JOIN user_dimensions_session_scoped UDSS - ON EU.user_pseudo_id = UDSS.user_pseudo_id --AND EU.user_id = UDSS.user_id + INNER JOIN user_dimensions_event_session_scoped UDESS + ON EU.user_pseudo_id = UDESS.user_pseudo_id --AND EU.user_id = UDESS.user_id + GROUP BY input_date, EU.user_pseudo_id ; INSERT INTO `{{project_id}}.{{dataset}}.{{insert_table}}` SELECT * FROM DataForTargetTable; SET rows_added = (SELECT COUNT(DISTINCT user_pseudo_id) FROM `{{project_id}}.{{dataset}}.{{insert_table}}`); - -DROP TABLE user_dimensions_event_scoped; -DROP TABLE user_dimensions_user_scoped; -DROP TABLE user_dimensions_session_scoped; -DROP TABLE events_users; -DROP TABLE DataForTargetTable; \ No newline at end of file diff --git a/sql/procedure/user_lifetime_dimensions.sqlx b/sql/procedure/user_lifetime_dimensions.sqlx index 5d4b5392..5716f071 100644 --- a/sql/procedure/user_lifetime_dimensions.sqlx +++ b/sql/procedure/user_lifetime_dimensions.sqlx @@ -16,103 +16,60 @@ SET input_date = DATE_SUB(input_date, INTERVAL 1 DAY); SET end_date = DATE_SUB(end_date, INTERVAL 1 DAY); --- What is the user-per-day dimensions? -CREATE TEMP TABLE user_dimensions_event_scoped as ( -SELECT DISTINCT - user_pseudo_id, - user_id, - FIRST_VALUE(format_date('%m',event_date)) OVER(PARTITION BY user_pseudo_id ORDER BY event_timestamp DESC) AS month_of_the_year, - FIRST_VALUE(format_date('%U',event_date)) OVER(PARTITION BY user_pseudo_id ORDER BY event_timestamp DESC) AS week_of_the_year, - FIRST_VALUE(format_date('%d',event_date)) OVER(PARTITION BY user_pseudo_id ORDER BY event_timestamp DESC) AS day_of_the_month, - FIRST_VALUE(format_date('%w',event_date)) OVER(PARTITION BY user_pseudo_id ORDER BY event_timestamp DESC) AS day_of_week, - FIRST_VALUE(format("%02d",extract(hour from event_timestamp))) OVER(PARTITION BY user_pseudo_id ORDER BY event_timestamp DESC) as hour_of_day, - FIRST_VALUE(DATE_DIFF(event_date, end_date, DAY)) OVER(PARTITION BY user_pseudo_id ORDER BY event_timestamp DESC) AS nth_day, - FIRST_VALUE(TIMESTAMP_DIFF(event_timestamp, TIMESTAMP(end_date), HOUR)) OVER(PARTITION BY user_pseudo_id ORDER BY event_timestamp DESC) AS nth_hour, - FIRST_VALUE(DATE_DIFF(event_date, end_date, WEEK)) OVER(PARTITION BY user_pseudo_id ORDER BY event_timestamp DESC) AS nth_week, - FIRST_VALUE(DATE_DIFF(event_date, end_date, MONTH)) OVER(PARTITION BY user_pseudo_id ORDER BY event_timestamp DESC) AS nth_month, - FIRST_VALUE(user_ltv_revenue) OVER(PARTITION BY user_pseudo_id ORDER BY event_timestamp DESC) AS user_ltv_revenue, - FIRST_VALUE(T.traffic_source_medium) OVER(PARTITION BY user_pseudo_id ORDER BY event_timestamp DESC) AS last_traffic_source_medium, - FIRST_VALUE(T.traffic_source_name) OVER(PARTITION BY user_pseudo_id ORDER BY event_timestamp DESC) AS last_traffic_source_name, - FIRST_VALUE(T.traffic_source) OVER(PARTITION BY user_pseudo_id ORDER BY event_timestamp DESC) AS last_traffic_source_source, - FIRST_VALUE(CASE WHEN (TIMESTAMP_DIFF(event_timestamp, user_first_touch_timestamp, DAY) < 7) OR (user_first_touch_timestamp IS NULL) THEN 'new' ELSE 'existing' END) OVER(PARTITION BY user_pseudo_id ORDER BY event_timestamp DESC) AS new_or_established_user -FROM `{{mds_project_id}}.{{mds_dataset}}.event` as E -INNER JOIN `{{mds_project_id}}.{{mds_dataset}}.traffic_source` as T -ON E.traffic_source_id = T.traffic_source_id -INNER JOIN `{{mds_project_id}}.{{mds_dataset}}.device` as D -ON E.device_type_id = D.device_type_id -WHERE ga_session_id IS NOT NULL - AND D.device_os IS NOT NULL - AND event_date BETWEEN end_date AND input_date -); - -CREATE TEMP TABLE user_dimensions_user_scoped as ( -SELECT DISTINCT - user_pseudo_id, - user_id, - FIRST_VALUE(L.subcontinent) OVER(PARTITION BY user_pseudo_id ORDER BY event_timestamp DESC) as geo_sub_continent, - FIRST_VALUE(L.country) OVER(PARTITION BY user_pseudo_id ORDER BY event_timestamp DESC) as geo_country, - FIRST_VALUE(L.region) OVER(PARTITION BY user_pseudo_id ORDER BY event_timestamp DESC) as geo_region, - FIRST_VALUE(L.city) OVER(PARTITION BY user_pseudo_id ORDER BY event_timestamp DESC) as geo_city, - FIRST_VALUE(L.metro) OVER(PARTITION BY user_pseudo_id ORDER BY event_timestamp DESC) as geo_metro, - FIRST_VALUE(T.traffic_source_medium) OVER(PARTITION BY user_pseudo_id ORDER BY event_timestamp ASC) AS first_traffic_source_medium, - FIRST_VALUE(T.traffic_source_name) OVER(PARTITION BY user_pseudo_id ORDER BY event_timestamp ASC) AS first_traffic_source_name, - FIRST_VALUE(T.traffic_source) OVER(PARTITION BY user_pseudo_id ORDER BY event_timestamp ASC) AS first_traffic_source_source -FROM `{{mds_project_id}}.{{mds_dataset}}.event` as E -INNER JOIN `{{mds_project_id}}.{{mds_dataset}}.location` as L -ON E.location_id = L.location_id -INNER JOIN `{{mds_project_id}}.{{mds_dataset}}.traffic_source` as T -ON E.traffic_source_id = T.traffic_source_id -INNER JOIN `{{mds_project_id}}.{{mds_dataset}}.device` as D -ON E.device_type_id = D.device_type_id -WHERE ga_session_id IS NOT NULL - AND D.device_os IS NOT NULL - AND event_date BETWEEN end_date AND input_date -); - -CREATE TEMP TABLE user_dimensions_session_scoped as ( -SELECT DISTINCT +CREATE OR REPLACE TEMP TABLE user_dimensions_event_session_scoped as ( + SELECT DISTINCT user_pseudo_id, - user_id, - MAX(CASE WHEN user_id IS NOT NULL THEN TRUE ELSE FALSE END) OVER(PARTITION BY user_pseudo_id, session_id ORDER BY event_date ASC) AS has_signed_in_with_user_id, - FIRST_VALUE(category) OVER(PARTITION BY user_pseudo_id, session_id ORDER BY event_date DESC) as device_category, - FIRST_VALUE(mobile_brand_name) OVER(PARTITION BY user_pseudo_id, session_id ORDER BY event_date DESC) as device_mobile_brand_name, - FIRST_VALUE(mobile_model_name) OVER(PARTITION BY user_pseudo_id, session_id ORDER BY event_date DESC) as device_mobile_model_name, - FIRST_VALUE(operating_system) OVER(PARTITION BY user_pseudo_id, session_id ORDER BY event_date DESC) as device_os, - --FIRST_VALUE(operating_system_version) OVER(PARTITION BY user_pseudo_id, session_id ORDER BY event_date DESC) as device_os_version, - FIRST_VALUE(SPLIT(operating_system_version, '.')[OFFSET(0)]) OVER(PARTITION BY user_pseudo_id, session_id ORDER BY event_date DESC) as device_os_version, - FIRST_VALUE(language) OVER(PARTITION BY user_pseudo_id, session_id ORDER BY event_date DESC) AS device_language, - FIRST_VALUE(browser) OVER(PARTITION BY user_pseudo_id, session_id ORDER BY event_date DESC) as device_web_browser, - --FIRST_VALUE(browser_version) OVER(PARTITION BY user_pseudo_id, session_id ORDER BY event_date DESC) as device_web_browser_version, - FIRST_VALUE(SPLIT(browser_version, '.')[OFFSET(0)]) OVER(PARTITION BY user_pseudo_id, session_id ORDER BY event_date DESC) as device_web_browser_version, - FIRST_VALUE(advertising_id) OVER(PARTITION BY user_pseudo_id, session_id ORDER BY event_date DESC) as device_advertising_id -FROM - (SELECT - user_pseudo_id, - user_id, - event_date, - MAX(device_advertising_id) AS advertising_id, - MAX(device_category) AS category, - MAX(device_mobile_brand_name) AS mobile_brand_name, - MAX(device_mobile_model_name) AS mobile_model_name, - MAX(device_os) AS operating_system, - MAX(device_os_version) AS operating_system_version, - MAX(language) AS language, - MAX(device_web_browser) AS browser, - MAX(device_web_browser_version) AS browser_version, - ga_session_id as session_id, - FROM `{{mds_project_id}}.{{mds_dataset}}.event` as E - INNER JOIN `{{mds_project_id}}.{{mds_dataset}}.device` as D - ON E.device_type_id = D.device_type_id - WHERE ga_session_id IS NOT NULL - AND D.device_os IS NOT NULL - AND event_date BETWEEN end_date AND input_date - GROUP BY user_pseudo_id, user_id, ga_session_id, event_date) + LAST_VALUE(format_date('%m',event_date)) OVER(PARTITION BY user_pseudo_id, input_date ORDER BY event_timestamp DESC) AS month_of_the_year, + LAST_VALUE(format_date('%U',event_date)) OVER(PARTITION BY user_pseudo_id, input_date ORDER BY event_timestamp DESC) AS week_of_the_year, + LAST_VALUE(format_date('%d',event_date)) OVER(PARTITION BY user_pseudo_id, input_date ORDER BY event_timestamp DESC) AS day_of_the_month, + LAST_VALUE(format_date('%w',event_date)) OVER(PARTITION BY user_pseudo_id, input_date ORDER BY event_timestamp DESC) AS day_of_week, + --LAST_VALUE(format("%02d",extract(hour from event_timestamp))) OVER(PARTITION BY user_pseudo_id, input_date ORDER BY event_timestamp DESC) as hour_of_day, + --LAST_VALUE(DATE_DIFF(event_date, input_date, DAY)) OVER(PARTITION BY user_pseudo_id, input_date ORDER BY event_timestamp DESC) AS nth_day, + --LAST_VALUE(TIMESTAMP_DIFF(event_timestamp, TIMESTAMP(input_date), HOUR)) OVER(PARTITION BY user_pseudo_id, input_date ORDER BY event_timestamp DESC) AS nth_hour, + --LAST_VALUE(DATE_DIFF(event_date, input_date, WEEK)) OVER(PARTITION BY user_pseudo_id, input_date ORDER BY event_timestamp DESC) AS nth_week, + --LAST_VALUE(DATE_DIFF(event_date, input_date, MONTH)) OVER(PARTITION BY user_pseudo_id, input_date ORDER BY event_timestamp DESC) AS nth_month, + LAST_VALUE(user_ltv_revenue) OVER(PARTITION BY user_pseudo_id, input_date ORDER BY event_timestamp DESC) AS user_ltv_revenue, + LAST_VALUE(T.traffic_source_medium) OVER(PARTITION BY user_pseudo_id, input_date ORDER BY event_timestamp DESC) AS last_traffic_source_medium, + LAST_VALUE(T.traffic_source_name) OVER(PARTITION BY user_pseudo_id, input_date ORDER BY event_timestamp DESC) AS last_traffic_source_name, + LAST_VALUE(T.traffic_source) OVER(PARTITION BY user_pseudo_id, input_date ORDER BY event_timestamp DESC) AS last_traffic_source_source, + LAST_VALUE(CASE WHEN (TIMESTAMP_DIFF(event_timestamp, user_first_touch_timestamp, DAY) < 7) OR (user_first_touch_timestamp IS NULL) THEN 'new' ELSE 'existing' END) OVER(PARTITION BY user_pseudo_id, input_date ORDER BY event_timestamp DESC) AS new_or_established_user, + LAST_VALUE(L.subcontinent) OVER(PARTITION BY user_pseudo_id, input_date ORDER BY event_timestamp DESC) as geo_sub_continent, + LAST_VALUE(L.country) OVER(PARTITION BY user_pseudo_id, input_date ORDER BY event_timestamp DESC) as geo_country, + LAST_VALUE(L.region) OVER(PARTITION BY user_pseudo_id, input_date ORDER BY event_timestamp DESC) as geo_region, + LAST_VALUE(L.city) OVER(PARTITION BY user_pseudo_id, input_date ORDER BY event_timestamp DESC) as geo_city, + LAST_VALUE(L.metro) OVER(PARTITION BY user_pseudo_id, input_date ORDER BY event_timestamp DESC) as geo_metro, + FIRST_VALUE(T.traffic_source_medium) OVER(PARTITION BY user_pseudo_id, input_date ORDER BY event_timestamp ASC) AS first_traffic_source_medium, + FIRST_VALUE(T.traffic_source_name) OVER(PARTITION BY user_pseudo_id, input_date ORDER BY event_timestamp ASC) AS first_traffic_source_name, + FIRST_VALUE(T.traffic_source) OVER(PARTITION BY user_pseudo_id, input_date ORDER BY event_timestamp ASC) AS first_traffic_source_source, + MAX(CASE WHEN user_id IS NOT NULL THEN TRUE ELSE FALSE END) OVER(PARTITION BY user_pseudo_id, input_date ORDER BY event_timestamp ASC) AS has_signed_in_with_user_id, + LAST_VALUE(device_category) OVER(PARTITION BY user_pseudo_id, input_date ORDER BY event_timestamp DESC) as device_category, + LAST_VALUE(device_mobile_brand_name) OVER(PARTITION BY user_pseudo_id, input_date ORDER BY event_timestamp DESC) as device_mobile_brand_name, + LAST_VALUE(device_mobile_model_name) OVER(PARTITION BY user_pseudo_id, input_date ORDER BY event_timestamp DESC) as device_mobile_model_name, + LAST_VALUE(device_os) OVER(PARTITION BY user_pseudo_id, input_date ORDER BY event_timestamp DESC) as device_os, + --LAST_VALUE(device_os_version) OVER(PARTITION BY user_pseudo_id, input_date ORDER BY event_timestamp DESC) as device_os_version, + LAST_VALUE(SPLIT(device_os_version, '.')[OFFSET(0)]) OVER(PARTITION BY user_pseudo_id, input_date ORDER BY event_timestamp DESC) as device_os_version, + LAST_VALUE(language) OVER(PARTITION BY user_pseudo_id, input_date ORDER BY event_timestamp DESC) AS device_language, + LAST_VALUE(device_web_browser) OVER(PARTITION BY user_pseudo_id, input_date ORDER BY event_timestamp DESC) as device_web_browser, + --LAST_VALUE(browser_version) OVER(PARTITION BY user_pseudo_id, input_date ORDER BY event_timestamp DESC) as device_web_browser_version, + LAST_VALUE(SPLIT(device_web_browser_version, '.')[OFFSET(0)]) OVER(PARTITION BY user_pseudo_id, input_date ORDER BY event_timestamp DESC) as device_web_browser_version, + LAST_VALUE(device_advertising_id) OVER(PARTITION BY user_pseudo_id, input_date ORDER BY event_timestamp DESC) as device_advertising_id + FROM `{{mds_project_id}}.{{mds_dataset}}.event` as E + INNER JOIN `{{mds_project_id}}.{{mds_dataset}}.traffic_source` as T + ON E.traffic_source_id = T.traffic_source_id + INNER JOIN `{{mds_project_id}}.{{mds_dataset}}.device` as D + ON E.device_type_id = D.device_type_id + INNER JOIN `{{mds_project_id}}.{{mds_dataset}}.location` as L + ON E.location_id = L.location_id + WHERE event_date BETWEEN end_date AND input_date + AND ga_session_id IS NOT NULL + AND D.device_os IS NOT NULL ); -- All users in the platform CREATE TEMP TABLE events_users as ( SELECT DISTINCT Users.user_pseudo_id, + Users.user_id, FROM `{{mds_project_id}}.{{mds_dataset}}.event` Users INNER JOIN `{{mds_project_id}}.{{mds_dataset}}.device` as D ON Users.device_type_id = D.device_type_id @@ -123,57 +80,43 @@ CREATE TEMP TABLE events_users as ( CREATE TEMP TABLE DataForTargetTable AS -SELECT DISTINCT - CURRENT_TIMESTAMP() AS processed_timestamp, - input_date as feature_date, - EU.user_pseudo_id, - UDES.user_id, - UDES.month_of_the_year, - UDES.week_of_the_year, - UDES.day_of_the_month, - UDES.day_of_week, - UDES.hour_of_day, - UDES.nth_day, - UDES.nth_hour, - UDES.nth_week, - UDES.nth_month, - UDES.user_ltv_revenue, - UDSS.device_category, - UDSS.device_mobile_brand_name, - UDSS.device_mobile_model_name, - UDSS.device_os, - UDSS.device_os_version, - UDSS.device_language, - UDSS.device_web_browser, - UDSS.device_web_browser_version, - UDUS.geo_sub_continent, - UDUS.geo_country, - UDUS.geo_region, - UDUS.geo_city, - UDUS.geo_metro, - UDES.last_traffic_source_medium, - UDES.last_traffic_source_name, - UDES.last_traffic_source_source, - UDUS.first_traffic_source_medium, - UDUS.first_traffic_source_name, - UDUS.first_traffic_source_source, - UDSS.has_signed_in_with_user_id + SELECT DISTINCT + CURRENT_TIMESTAMP() AS processed_timestamp, + input_date as feature_date, + EU.user_pseudo_id, + MAX(EU.user_id) as user_id, + MAX(UDESS.month_of_the_year) as month_of_the_year, + MAX(UDESS.week_of_the_year) as week_of_the_year, + MAX(UDESS.day_of_the_month) as day_of_the_month, + MAX(UDESS.day_of_week) as day_of_week, + MAX(UDESS.user_ltv_revenue) as user_ltv_revenue, + MAX(UDESS.device_category) as device_category, + MAX(UDESS.device_mobile_brand_name) as device_mobile_brand_name, + MAX(UDESS.device_mobile_model_name) as device_mobile_model_name, + MAX(UDESS.device_os) as device_os, + MAX(UDESS.device_os_version) as device_os_version, + MAX(UDESS.device_language) as device_language, + MAX(UDESS.device_web_browser) as device_web_browser, + MAX(UDESS.device_web_browser_version) as device_web_browser_version, + APPROX_TOP_COUNT(UDESS.geo_sub_continent, 1)[OFFSET(0)].value as geo_sub_continent, + APPROX_TOP_COUNT(UDESS.geo_country, 1)[OFFSET(0)].value as geo_country, + APPROX_TOP_COUNT(UDESS.geo_region, 1)[OFFSET(0)].value as geo_region, + APPROX_TOP_COUNT(UDESS.geo_city, 1)[OFFSET(0)].value as geo_city, + APPROX_TOP_COUNT(UDESS.geo_metro, 1)[OFFSET(0)].value as geo_metro, + MAX(UDESS.last_traffic_source_medium) as last_traffic_source_medium, + MAX(UDESS.last_traffic_source_name) as last_traffic_source_name, + MAX(UDESS.last_traffic_source_source) as last_traffic_source_source, + MAX(UDESS.first_traffic_source_medium) as first_traffic_source_medium, + MAX(UDESS.first_traffic_source_name) as first_traffic_source_name, + MAX(UDESS.first_traffic_source_source) as first_traffic_source_source, + MAX(UDESS.has_signed_in_with_user_id) as has_signed_in_with_user_id FROM events_users EU - INNER JOIN user_dimensions_event_scoped UDES - ON EU.user_pseudo_id = UDES.user_pseudo_id --AND EU.user_id = UDES.user_id - INNER JOIN user_dimensions_user_scoped UDUS - ON EU.user_pseudo_id = UDUS.user_pseudo_id --AND EU.user_id = UDUS.user_id - INNER JOIN user_dimensions_session_scoped UDSS - ON EU.user_pseudo_id = UDSS.user_pseudo_id --AND EU.user_id = UDSS.user_id + INNER JOIN user_dimensions_event_session_scoped UDESS + ON EU.user_pseudo_id = UDESS.user_pseudo_id --AND EU.user_id = UDESS.user_id + GROUP BY input_date, EU.user_pseudo_id ; INSERT INTO `{{project_id}}.{{dataset}}.{{insert_table}}` SELECT * FROM DataForTargetTable; SET rows_added = (SELECT COUNT(DISTINCT user_pseudo_id) FROM `{{project_id}}.{{dataset}}.{{insert_table}}`); - -DROP TABLE user_dimensions_event_scoped; -DROP TABLE user_dimensions_user_scoped; -DROP TABLE user_dimensions_session_scoped; -DROP TABLE events_users; -DROP TABLE DataForTargetTable; \ No newline at end of file diff --git a/sql/procedure/user_segmentation_dimensions.sqlx b/sql/procedure/user_segmentation_dimensions.sqlx index bf745135..5716f071 100644 --- a/sql/procedure/user_segmentation_dimensions.sqlx +++ b/sql/procedure/user_segmentation_dimensions.sqlx @@ -16,165 +16,107 @@ SET input_date = DATE_SUB(input_date, INTERVAL 1 DAY); SET end_date = DATE_SUB(end_date, INTERVAL 1 DAY); --- What is the user-per-day dimensions? -CREATE TEMP TABLE user_dimensions_event_scoped as ( - SELECT DISTINCT - user_pseudo_id, - user_id, - FIRST_VALUE(format_date('%m',event_date)) OVER(PARTITION BY user_pseudo_id ORDER BY event_timestamp DESC) AS month_of_the_year, - FIRST_VALUE(format_date('%U',event_date)) OVER(PARTITION BY user_pseudo_id ORDER BY event_timestamp DESC) AS week_of_the_year, - FIRST_VALUE(format_date('%d',event_date)) OVER(PARTITION BY user_pseudo_id ORDER BY event_timestamp DESC) AS day_of_the_month, - FIRST_VALUE(format_date('%w',event_date)) OVER(PARTITION BY user_pseudo_id ORDER BY event_timestamp DESC) AS day_of_week, - FIRST_VALUE(format("%02d",extract(hour from event_timestamp))) OVER(PARTITION BY user_pseudo_id ORDER BY event_timestamp DESC) as hour_of_day, - FIRST_VALUE(DATE_DIFF(event_date, end_date, DAY)) OVER(PARTITION BY user_pseudo_id ORDER BY event_timestamp DESC) AS nth_day, - FIRST_VALUE(TIMESTAMP_DIFF(event_timestamp, TIMESTAMP(end_date), HOUR)) OVER(PARTITION BY user_pseudo_id ORDER BY event_timestamp DESC) AS nth_hour, - FIRST_VALUE(DATE_DIFF(event_date, end_date, WEEK)) OVER(PARTITION BY user_pseudo_id ORDER BY event_timestamp DESC) AS nth_week, - FIRST_VALUE(DATE_DIFF(event_date, end_date, MONTH)) OVER(PARTITION BY user_pseudo_id ORDER BY event_timestamp DESC) AS nth_month, - FIRST_VALUE(user_ltv_revenue) OVER(PARTITION BY user_pseudo_id ORDER BY event_timestamp DESC) AS user_ltv_revenue, - FIRST_VALUE(T.traffic_source_medium) OVER(PARTITION BY user_pseudo_id ORDER BY event_timestamp DESC) AS last_traffic_source_medium, - FIRST_VALUE(T.traffic_source_name) OVER(PARTITION BY user_pseudo_id ORDER BY event_timestamp DESC) AS last_traffic_source_name, - FIRST_VALUE(T.traffic_source) OVER(PARTITION BY user_pseudo_id ORDER BY event_timestamp DESC) AS last_traffic_source_source, - FIRST_VALUE(CASE WHEN (TIMESTAMP_DIFF(event_timestamp, user_first_touch_timestamp, DAY) < 7) OR (user_first_touch_timestamp IS NULL) THEN 'new' ELSE 'existing' END) OVER(PARTITION BY user_pseudo_id ORDER BY event_timestamp DESC) AS new_or_established_user - FROM `{{mds_project_id}}.{{mds_dataset}}.event` as E - INNER JOIN `{{mds_project_id}}.{{mds_dataset}}.traffic_source` as T - ON E.traffic_source_id = T.traffic_source_id - INNER JOIN `{{mds_project_id}}.{{mds_dataset}}.device` as D - ON E.device_type_id = D.device_type_id - WHERE ga_session_id IS NOT NULL - AND D.device_os IS NOT NULL - AND event_date BETWEEN end_date AND input_date -); - -CREATE TEMP TABLE user_dimensions_user_scoped as ( - SELECT DISTINCT - user_pseudo_id, - user_id, - FIRST_VALUE(L.subcontinent) OVER(PARTITION BY user_pseudo_id ORDER BY event_timestamp DESC) as geo_sub_continent, - FIRST_VALUE(L.country) OVER(PARTITION BY user_pseudo_id ORDER BY event_timestamp DESC) as geo_country, - FIRST_VALUE(L.region) OVER(PARTITION BY user_pseudo_id ORDER BY event_timestamp DESC) as geo_region, - FIRST_VALUE(L.city) OVER(PARTITION BY user_pseudo_id ORDER BY event_timestamp DESC) as geo_city, - FIRST_VALUE(L.metro) OVER(PARTITION BY user_pseudo_id ORDER BY event_timestamp DESC) as geo_metro, - FIRST_VALUE(T.traffic_source_medium) OVER(PARTITION BY user_pseudo_id ORDER BY event_timestamp ASC) AS first_traffic_source_medium, - FIRST_VALUE(T.traffic_source_name) OVER(PARTITION BY user_pseudo_id ORDER BY event_timestamp ASC) AS first_traffic_source_name, - FIRST_VALUE(T.traffic_source) OVER(PARTITION BY user_pseudo_id ORDER BY event_timestamp ASC) AS first_traffic_source_source - FROM `{{mds_project_id}}.{{mds_dataset}}.event` as E - INNER JOIN `{{mds_project_id}}.{{mds_dataset}}.location` as L - ON E.location_id = L.location_id - INNER JOIN `{{mds_project_id}}.{{mds_dataset}}.traffic_source` as T - ON E.traffic_source_id = T.traffic_source_id - INNER JOIN `{{mds_project_id}}.{{mds_dataset}}.device` as D - ON E.device_type_id = D.device_type_id - WHERE ga_session_id IS NOT NULL - AND D.device_os IS NOT NULL - AND event_date BETWEEN end_date AND input_date -); - -CREATE TEMP TABLE user_dimensions_session_scoped as ( - SELECT DISTINCT - user_pseudo_id, - user_id, - MAX(CASE WHEN user_id IS NOT NULL THEN TRUE ELSE FALSE END) OVER(PARTITION BY user_pseudo_id, session_id ORDER BY event_date ASC) AS has_signed_in_with_user_id, - FIRST_VALUE(category) OVER(PARTITION BY user_pseudo_id, session_id ORDER BY event_date DESC) as device_category, - FIRST_VALUE(mobile_brand_name) OVER(PARTITION BY user_pseudo_id, session_id ORDER BY event_date DESC) as device_mobile_brand_name, - FIRST_VALUE(mobile_model_name) OVER(PARTITION BY user_pseudo_id, session_id ORDER BY event_date DESC) as device_mobile_model_name, - FIRST_VALUE(operating_system) OVER(PARTITION BY user_pseudo_id, session_id ORDER BY event_date DESC) as device_os, - --FIRST_VALUE(operating_system_version) OVER(PARTITION BY user_pseudo_id, session_id ORDER BY event_date DESC) as device_os_version, - FIRST_VALUE(SPLIT(operating_system_version, '.')[OFFSET(0)]) OVER(PARTITION BY user_pseudo_id, session_id ORDER BY event_date DESC) as device_os_version, - FIRST_VALUE(language) OVER(PARTITION BY user_pseudo_id, session_id ORDER BY event_date DESC) AS device_language, - FIRST_VALUE(browser) OVER(PARTITION BY user_pseudo_id, session_id ORDER BY event_date DESC) as device_web_browser, - --FIRST_VALUE(browser_version) OVER(PARTITION BY user_pseudo_id, session_id ORDER BY event_date DESC) as device_web_browser_version, - FIRST_VALUE(SPLIT(browser_version, '.')[OFFSET(0)]) OVER(PARTITION BY user_pseudo_id, session_id ORDER BY event_date DESC) as device_web_browser_version, - FIRST_VALUE(advertising_id) OVER(PARTITION BY user_pseudo_id, session_id ORDER BY event_date DESC) as device_advertising_id - FROM - (SELECT - user_pseudo_id, - user_id, - event_date, - MAX(device_advertising_id) AS advertising_id, - MAX(device_category) AS category, - MAX(device_mobile_brand_name) AS mobile_brand_name, - MAX(device_mobile_model_name) AS mobile_model_name, - MAX(device_os) AS operating_system, - MAX(device_os_version) AS operating_system_version, - MAX(language) AS language, - MAX(device_web_browser) AS browser, - MAX(device_web_browser_version) AS browser_version, - ga_session_id as session_id, - FROM `{{mds_project_id}}.{{mds_dataset}}.event` as E - INNER JOIN `{{mds_project_id}}.{{mds_dataset}}.device` as D - ON E.device_type_id = D.device_type_id - WHERE ga_session_id IS NOT NULL - AND D.device_os IS NOT NULL - AND event_date BETWEEN end_date AND input_date - GROUP BY user_pseudo_id, user_id, ga_session_id, event_date) +CREATE OR REPLACE TEMP TABLE user_dimensions_event_session_scoped as ( + SELECT DISTINCT + user_pseudo_id, + LAST_VALUE(format_date('%m',event_date)) OVER(PARTITION BY user_pseudo_id, input_date ORDER BY event_timestamp DESC) AS month_of_the_year, + LAST_VALUE(format_date('%U',event_date)) OVER(PARTITION BY user_pseudo_id, input_date ORDER BY event_timestamp DESC) AS week_of_the_year, + LAST_VALUE(format_date('%d',event_date)) OVER(PARTITION BY user_pseudo_id, input_date ORDER BY event_timestamp DESC) AS day_of_the_month, + LAST_VALUE(format_date('%w',event_date)) OVER(PARTITION BY user_pseudo_id, input_date ORDER BY event_timestamp DESC) AS day_of_week, + --LAST_VALUE(format("%02d",extract(hour from event_timestamp))) OVER(PARTITION BY user_pseudo_id, input_date ORDER BY event_timestamp DESC) as hour_of_day, + --LAST_VALUE(DATE_DIFF(event_date, input_date, DAY)) OVER(PARTITION BY user_pseudo_id, input_date ORDER BY event_timestamp DESC) AS nth_day, + --LAST_VALUE(TIMESTAMP_DIFF(event_timestamp, TIMESTAMP(input_date), HOUR)) OVER(PARTITION BY user_pseudo_id, input_date ORDER BY event_timestamp DESC) AS nth_hour, + --LAST_VALUE(DATE_DIFF(event_date, input_date, WEEK)) OVER(PARTITION BY user_pseudo_id, input_date ORDER BY event_timestamp DESC) AS nth_week, + --LAST_VALUE(DATE_DIFF(event_date, input_date, MONTH)) OVER(PARTITION BY user_pseudo_id, input_date ORDER BY event_timestamp DESC) AS nth_month, + LAST_VALUE(user_ltv_revenue) OVER(PARTITION BY user_pseudo_id, input_date ORDER BY event_timestamp DESC) AS user_ltv_revenue, + LAST_VALUE(T.traffic_source_medium) OVER(PARTITION BY user_pseudo_id, input_date ORDER BY event_timestamp DESC) AS last_traffic_source_medium, + LAST_VALUE(T.traffic_source_name) OVER(PARTITION BY user_pseudo_id, input_date ORDER BY event_timestamp DESC) AS last_traffic_source_name, + LAST_VALUE(T.traffic_source) OVER(PARTITION BY user_pseudo_id, input_date ORDER BY event_timestamp DESC) AS last_traffic_source_source, + LAST_VALUE(CASE WHEN (TIMESTAMP_DIFF(event_timestamp, user_first_touch_timestamp, DAY) < 7) OR (user_first_touch_timestamp IS NULL) THEN 'new' ELSE 'existing' END) OVER(PARTITION BY user_pseudo_id, input_date ORDER BY event_timestamp DESC) AS new_or_established_user, + LAST_VALUE(L.subcontinent) OVER(PARTITION BY user_pseudo_id, input_date ORDER BY event_timestamp DESC) as geo_sub_continent, + LAST_VALUE(L.country) OVER(PARTITION BY user_pseudo_id, input_date ORDER BY event_timestamp DESC) as geo_country, + LAST_VALUE(L.region) OVER(PARTITION BY user_pseudo_id, input_date ORDER BY event_timestamp DESC) as geo_region, + LAST_VALUE(L.city) OVER(PARTITION BY user_pseudo_id, input_date ORDER BY event_timestamp DESC) as geo_city, + LAST_VALUE(L.metro) OVER(PARTITION BY user_pseudo_id, input_date ORDER BY event_timestamp DESC) as geo_metro, + FIRST_VALUE(T.traffic_source_medium) OVER(PARTITION BY user_pseudo_id, input_date ORDER BY event_timestamp ASC) AS first_traffic_source_medium, + FIRST_VALUE(T.traffic_source_name) OVER(PARTITION BY user_pseudo_id, input_date ORDER BY event_timestamp ASC) AS first_traffic_source_name, + FIRST_VALUE(T.traffic_source) OVER(PARTITION BY user_pseudo_id, input_date ORDER BY event_timestamp ASC) AS first_traffic_source_source, + MAX(CASE WHEN user_id IS NOT NULL THEN TRUE ELSE FALSE END) OVER(PARTITION BY user_pseudo_id, input_date ORDER BY event_timestamp ASC) AS has_signed_in_with_user_id, + LAST_VALUE(device_category) OVER(PARTITION BY user_pseudo_id, input_date ORDER BY event_timestamp DESC) as device_category, + LAST_VALUE(device_mobile_brand_name) OVER(PARTITION BY user_pseudo_id, input_date ORDER BY event_timestamp DESC) as device_mobile_brand_name, + LAST_VALUE(device_mobile_model_name) OVER(PARTITION BY user_pseudo_id, input_date ORDER BY event_timestamp DESC) as device_mobile_model_name, + LAST_VALUE(device_os) OVER(PARTITION BY user_pseudo_id, input_date ORDER BY event_timestamp DESC) as device_os, + --LAST_VALUE(device_os_version) OVER(PARTITION BY user_pseudo_id, input_date ORDER BY event_timestamp DESC) as device_os_version, + LAST_VALUE(SPLIT(device_os_version, '.')[OFFSET(0)]) OVER(PARTITION BY user_pseudo_id, input_date ORDER BY event_timestamp DESC) as device_os_version, + LAST_VALUE(language) OVER(PARTITION BY user_pseudo_id, input_date ORDER BY event_timestamp DESC) AS device_language, + LAST_VALUE(device_web_browser) OVER(PARTITION BY user_pseudo_id, input_date ORDER BY event_timestamp DESC) as device_web_browser, + --LAST_VALUE(browser_version) OVER(PARTITION BY user_pseudo_id, input_date ORDER BY event_timestamp DESC) as device_web_browser_version, + LAST_VALUE(SPLIT(device_web_browser_version, '.')[OFFSET(0)]) OVER(PARTITION BY user_pseudo_id, input_date ORDER BY event_timestamp DESC) as device_web_browser_version, + LAST_VALUE(device_advertising_id) OVER(PARTITION BY user_pseudo_id, input_date ORDER BY event_timestamp DESC) as device_advertising_id + FROM `{{mds_project_id}}.{{mds_dataset}}.event` as E + INNER JOIN `{{mds_project_id}}.{{mds_dataset}}.traffic_source` as T + ON E.traffic_source_id = T.traffic_source_id + INNER JOIN `{{mds_project_id}}.{{mds_dataset}}.device` as D + ON E.device_type_id = D.device_type_id + INNER JOIN `{{mds_project_id}}.{{mds_dataset}}.location` as L + ON E.location_id = L.location_id + WHERE event_date BETWEEN end_date AND input_date + AND ga_session_id IS NOT NULL + AND D.device_os IS NOT NULL ); -- All users in the platform CREATE TEMP TABLE events_users as ( - SELECT DISTINCT - Users.user_pseudo_id, - Users.user_id, - FROM `{{mds_project_id}}.{{mds_dataset}}.event` Users - INNER JOIN `{{mds_project_id}}.{{mds_dataset}}.device` as D - ON Users.device_type_id = D.device_type_id - WHERE ga_session_id IS NOT NULL - AND device_os IS NOT NULL - AND event_date BETWEEN end_date AND input_date + SELECT DISTINCT + Users.user_pseudo_id, + Users.user_id, + FROM `{{mds_project_id}}.{{mds_dataset}}.event` Users + INNER JOIN `{{mds_project_id}}.{{mds_dataset}}.device` as D + ON Users.device_type_id = D.device_type_id + WHERE ga_session_id IS NOT NULL + AND device_os IS NOT NULL + AND event_date BETWEEN end_date AND input_date ); CREATE TEMP TABLE DataForTargetTable AS - SELECT DISTINCT - CURRENT_TIMESTAMP() AS processed_timestamp, - input_date as feature_date, - EU.user_pseudo_id, - EU.user_id, - UDES.month_of_the_year, - UDES.week_of_the_year, - UDES.day_of_the_month, - UDES.day_of_week, - UDES.hour_of_day, - UDES.nth_day, - UDES.nth_hour, - UDES.nth_week, - UDES.nth_month, - UDES.user_ltv_revenue, - UDSS.device_category, - UDSS.device_mobile_brand_name, - UDSS.device_mobile_model_name, - UDSS.device_os, - UDSS.device_os_version, - UDSS.device_language, - UDSS.device_web_browser, - UDSS.device_web_browser_version, - UDUS.geo_sub_continent, - UDUS.geo_country, - UDUS.geo_region, - UDUS.geo_city, - UDUS.geo_metro, - UDES.last_traffic_source_medium, - UDES.last_traffic_source_name, - UDES.last_traffic_source_source, - UDUS.first_traffic_source_medium, - UDUS.first_traffic_source_name, - UDUS.first_traffic_source_source, - UDSS.has_signed_in_with_user_id - FROM events_users EU - INNER JOIN user_dimensions_event_scoped UDES - ON EU.user_pseudo_id = UDES.user_pseudo_id --AND EU.user_id = UDES.user_id - INNER JOIN user_dimensions_user_scoped UDUS - ON EU.user_pseudo_id = UDUS.user_pseudo_id --AND EU.user_id = UDUS.user_id - INNER JOIN user_dimensions_session_scoped UDSS - ON EU.user_pseudo_id = UDSS.user_pseudo_id --AND EU.user_id = UDSS.user_id + SELECT DISTINCT + CURRENT_TIMESTAMP() AS processed_timestamp, + input_date as feature_date, + EU.user_pseudo_id, + MAX(EU.user_id) as user_id, + MAX(UDESS.month_of_the_year) as month_of_the_year, + MAX(UDESS.week_of_the_year) as week_of_the_year, + MAX(UDESS.day_of_the_month) as day_of_the_month, + MAX(UDESS.day_of_week) as day_of_week, + MAX(UDESS.user_ltv_revenue) as user_ltv_revenue, + MAX(UDESS.device_category) as device_category, + MAX(UDESS.device_mobile_brand_name) as device_mobile_brand_name, + MAX(UDESS.device_mobile_model_name) as device_mobile_model_name, + MAX(UDESS.device_os) as device_os, + MAX(UDESS.device_os_version) as device_os_version, + MAX(UDESS.device_language) as device_language, + MAX(UDESS.device_web_browser) as device_web_browser, + MAX(UDESS.device_web_browser_version) as device_web_browser_version, + APPROX_TOP_COUNT(UDESS.geo_sub_continent, 1)[OFFSET(0)].value as geo_sub_continent, + APPROX_TOP_COUNT(UDESS.geo_country, 1)[OFFSET(0)].value as geo_country, + APPROX_TOP_COUNT(UDESS.geo_region, 1)[OFFSET(0)].value as geo_region, + APPROX_TOP_COUNT(UDESS.geo_city, 1)[OFFSET(0)].value as geo_city, + APPROX_TOP_COUNT(UDESS.geo_metro, 1)[OFFSET(0)].value as geo_metro, + MAX(UDESS.last_traffic_source_medium) as last_traffic_source_medium, + MAX(UDESS.last_traffic_source_name) as last_traffic_source_name, + MAX(UDESS.last_traffic_source_source) as last_traffic_source_source, + MAX(UDESS.first_traffic_source_medium) as first_traffic_source_medium, + MAX(UDESS.first_traffic_source_name) as first_traffic_source_name, + MAX(UDESS.first_traffic_source_source) as first_traffic_source_source, + MAX(UDESS.has_signed_in_with_user_id) as has_signed_in_with_user_id + FROM events_users EU + INNER JOIN user_dimensions_event_session_scoped UDESS + ON EU.user_pseudo_id = UDESS.user_pseudo_id --AND EU.user_id = UDESS.user_id + GROUP BY input_date, EU.user_pseudo_id ; INSERT INTO `{{project_id}}.{{dataset}}.{{insert_table}}` - SELECT * FROM DataForTargetTable; + SELECT * FROM DataForTargetTable; SET rows_added = (SELECT COUNT(DISTINCT user_pseudo_id) FROM `{{project_id}}.{{dataset}}.{{insert_table}}`); - -DROP TABLE user_dimensions_event_scoped; -DROP TABLE user_dimensions_user_scoped; -DROP TABLE user_dimensions_session_scoped; -DROP TABLE events_users; -DROP TABLE DataForTargetTable; \ No newline at end of file diff --git a/sql/procedure/user_session_event_aggregated_metrics.sqlx b/sql/procedure/user_session_event_aggregated_metrics.sqlx index bf23c47b..42790c9b 100644 --- a/sql/procedure/user_session_event_aggregated_metrics.sqlx +++ b/sql/procedure/user_session_event_aggregated_metrics.sqlx @@ -16,112 +16,147 @@ SET input_date = DATE_SUB(input_date, INTERVAL 1 DAY); SET end_date = DATE_SUB(end_date, INTERVAL 1 DAY); --- What is the user-per-day session-scoped engagement? -CREATE TEMP TABLE engagement_per_day_session_scoped as ( -SELECT - user_pseudo_id, - event_date, - SAFE_DIVIDE( - COUNT(distinct case when session_engaged = true then concat(user_pseudo_id,session_id) end), - COUNT(DISTINCT session_id) - ) AS engagement_rate, - COUNT(distinct case when session_engaged = true then concat(user_pseudo_id,session_id) end) as engaged_sessions_per_user, - SAFE_DIVIDE( - COUNTIF(session_conversion = true), - COUNT(DISTINCT session_id) - ) AS session_conversion_rate, - COUNT(distinct case when session_engaged = false then concat(user_pseudo_id,session_id) end) as bounces, - SAFE_DIVIDE( - COUNT(distinct case when session_engaged = false then concat(user_pseudo_id,session_id) end), - COUNT(DISTINCT session_id) - ) as bounce_rate_per_user, - COUNT(DISTINCT session_id) AS sessions_per_user, - IFNULL(AVG(page_views_per_session),0) as avg_views_per_session, - IFNULL(ROUND(SUM(engagement_time_msec)/1000),0) as sum_engagement_time_seconds, - IFNULL(ROUND(AVG(engagement_time_msec)/1000),0) as avg_engagement_time_seconds, - COUNTIF(new_visitor = true) as new_visits, - COUNTIF(returning_visitor = true) as returning_visits -FROM ( +-- Copyright 2023 Google LLC + -- + -- Licensed under the Apache License, Version 2.0 (the "License"); + -- you may not use this file except in compliance with the License. + -- You may obtain a copy of the License at + -- + -- http://www.apache.org/licenses/LICENSE-2.0 + -- + -- Unless required by applicable law or agreed to in writing, software + -- distributed under the License is distributed on an "AS IS" BASIS, + -- WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + -- See the License for the specific language governing permissions and + -- limitations under the License. + + -- Setting procedure to lookback from the day before `input_date` until the day before `end_date` + SET input_date = DATE_SUB(input_date, INTERVAL 1 DAY); + SET end_date = DATE_SUB(end_date, INTERVAL 1 DAY); + + CREATE OR REPLACE TEMP TABLE dates_interval as ( + SELECT + input_date, + end_date + ); + + -- What is the user-per-day session-scoped engagement? + CREATE OR REPLACE TEMP TABLE engagement_per_day_session_scoped as ( SELECT - E.user_pseudo_id, - E.event_date, - E.ga_session_id as session_id, - IFNULL(MAX(S.session_engaged), false) as session_engaged, - SUM(S.total_engagement_time_secs) as engagement_time_msec, - MAX(S.new_visitor) as new_visitor, - MAX(S.returning_visitor) as returning_visitor, - MAX(CASE WHEN E.event_name='purchase' AND E.ecommerce.transaction_id IS NOT NULL THEN true ELSE false END) as session_conversion, - COUNTIF(E.event_name='page_view') as page_views_per_session - FROM - `{{mds_project_id}}.{{mds_dataset}}.event` as E + user_pseudo_id, + feature_date, + SAFE_DIVIDE( + COUNT(distinct case when session_engaged = true then concat(user_pseudo_id,session_id) end), + COUNT(DISTINCT session_id) + ) AS engagement_rate, + COUNT(distinct case when session_engaged = true then concat(user_pseudo_id,session_id) end) as engaged_sessions_per_user, + SAFE_DIVIDE( + COUNTIF(session_conversion = true), + COUNT(DISTINCT session_id) + ) AS session_conversion_rate, + COUNT(distinct case when session_engaged = false then concat(user_pseudo_id,session_id) end) as bounces, + SAFE_DIVIDE( + COUNT(distinct case when session_engaged = false then concat(user_pseudo_id,session_id) end), + COUNT(DISTINCT session_id) + ) as bounce_rate_per_user, + COUNT(DISTINCT session_id) AS sessions_per_user, + IFNULL(AVG(page_views_per_session),0) as avg_views_per_session, + IFNULL(ROUND(SUM(engagement_time_msec)/1000),0) as sum_engagement_time_seconds, + IFNULL(ROUND(AVG(engagement_time_msec)/1000),0) as avg_engagement_time_seconds, + COUNTIF(new_visitor = true) as new_visits, + COUNTIF(returning_visitor = true) as returning_visits + FROM ( + SELECT + E.user_pseudo_id, + input_date as feature_date, + E.ga_session_id as session_id, + IFNULL(MAX(S.session_engaged), false) as session_engaged, + SUM(S.total_engagement_time_secs) as engagement_time_msec, + MAX(S.new_visitor) as new_visitor, + MAX(S.returning_visitor) as returning_visitor, + MAX(CASE WHEN E.event_name='purchase' AND E.ecommerce.transaction_id IS NOT NULL THEN true ELSE false END) as session_conversion, + COUNTIF(E.event_name='page_view') as page_views_per_session + FROM `{{mds_project_id}}.{{mds_dataset}}.event` as E INNER JOIN `{{mds_project_id}}.{{mds_dataset}}.session` as S ON E.ga_session_id = S.ga_session_id INNER JOIN `{{mds_project_id}}.{{mds_dataset}}.device` as D ON E.device_type_id = D.device_type_id - WHERE E.ga_session_id IS NOT NULL - AND D.device_os IS NOT NULL - AND event_date BETWEEN end_date AND input_date - GROUP BY - user_pseudo_id, - session_id, - event_date) - GROUP BY user_pseudo_id, event_date -); + CROSS JOIN dates_interval as DI + WHERE E.event_date BETWEEN DI.end_date AND DI.input_date + AND E.ga_session_id IS NOT NULL + AND D.device_os IS NOT NULL + GROUP BY + user_pseudo_id, + feature_date, + session_id) + GROUP BY user_pseudo_id, feature_date + ); --- Has the user purchased before, another day? -CREATE TEMP TABLE returning_customers as ( -SELECT user_pseudo_id, event_date, MAX(unique_purchase) as unique_purchase + -- Has the user purchased before, another day? + CREATE OR REPLACE TEMP TABLE returning_customers as ( + SELECT + user_pseudo_id, + feature_date, + MAX(unique_purchase) as unique_purchase FROM ( SELECT user_pseudo_id, - event_date, + input_date as feature_date, RANK() OVER (PARTITION BY user_pseudo_id ORDER BY event_timestamp ASC) AS unique_purchase - FROM - `{{mds_project_id}}.{{mds_dataset}}.event` AS GA - WHERE event_name = 'purchase' - AND event_date BETWEEN end_date AND input_date - GROUP BY user_pseudo_id, event_date, event_timestamp + FROM `{{mds_project_id}}.{{mds_dataset}}.event` AS GA + CROSS JOIN dates_interval as DI + WHERE event_date BETWEEN DI.end_date AND DI.input_date + AND event_name = 'purchase' + GROUP BY user_pseudo_id, feature_date, event_timestamp ) WHERE unique_purchase >= 2 - GROUP BY user_pseudo_id, event_date -); -CREATE TEMP TABLE non_returning_customers as ( -SELECT - user_pseudo_id, - event_date, + GROUP BY user_pseudo_id, feature_date + ); + CREATE OR REPLACE TEMP TABLE non_returning_customers as ( + SELECT + GA.user_pseudo_id, + input_date as feature_date FROM `{{mds_project_id}}.{{mds_dataset}}.event` AS GA - WHERE user_pseudo_id NOT IN (SELECT user_pseudo_id FROM returning_customers) - AND event_date BETWEEN end_date AND input_date - GROUP BY user_pseudo_id, event_date -); -CREATE TEMP TABLE combined as ( -SELECT user_pseudo_id, event_date, unique_purchase + CROSS JOIN dates_interval as DI + WHERE event_date BETWEEN DI.end_date AND DI.input_date + GROUP BY user_pseudo_id, feature_date + EXCEPT DISTINCT SELECT user_pseudo_id, feature_date FROM returning_customers as RC + ); + CREATE OR REPLACE TEMP TABLE combined as ( + SELECT + user_pseudo_id, + feature_date, + unique_purchase FROM returning_customers UNION ALL - SELECT user_pseudo_id, event_date, -1 - FROM non_returning_customers - GROUP BY user_pseudo_id, event_date -); -CREATE TEMP TABLE repeated_purchase as ( -SELECT - user_pseudo_id, - event_date, - CASE WHEN unique_purchase >= 0 THEN unique_purchase ELSE 0 END AS how_many_purchased_before -FROM - combined -); + SELECT + user_pseudo_id, + feature_date, + -1 + FROM non_returning_customers + GROUP BY user_pseudo_id, feature_date + ); + CREATE OR REPLACE TEMP TABLE repeated_purchase as ( + SELECT + user_pseudo_id, + feature_date, + CASE WHEN unique_purchase >= 0 THEN unique_purchase ELSE 0 END AS how_many_purchased_before + FROM + combined + ); --- Has the user abandoned any cart by day? -CREATE TEMP TABLE returned_cart_to_purchase as ( -SELECT -user_pseudo_id, event_date, -MAX(session_order) as max_session_order_in_day, -CASE WHEN MAX(begun_checkout) >= 1 THEN True ELSE False END as begun_checkout, -CASE WHEN MAX(event_order) >= 2 THEN True ELSE False END as has_purchased + -- Has the user abandoned any cart by day? + CREATE OR REPLACE TEMP TABLE returned_cart_to_purchase as ( + SELECT + user_pseudo_id, + feature_date, + MAX(session_order) as max_session_order_in_day, + CASE WHEN MAX(begun_checkout) >= 1 THEN True ELSE False END as begun_checkout, + CASE WHEN MAX(event_order) >= 2 THEN True ELSE False END as has_purchased FROM ( SELECT GA.user_pseudo_id, - GA.event_date, + input_date as feature_date, MAX(S.session_number) as session_order, CASE WHEN MAX(event_name) = 'begin_checkout' THEN 1 ELSE 0 END as begun_checkout, RANK() OVER (PARTITION BY GA.user_pseudo_id ORDER BY event_timestamp ASC ) AS event_order @@ -130,189 +165,179 @@ CASE WHEN MAX(event_order) >= 2 THEN True ELSE False END as has_purchased ON GA.ga_session_id = S.ga_session_id INNER JOIN `{{mds_project_id}}.{{mds_dataset}}.device` as D ON GA.device_type_id = D.device_type_id - WHERE event_name IN ('begin_checkout','purchase') + CROSS JOIN dates_interval as DI + WHERE event_date BETWEEN DI.end_date AND DI.input_date + AND event_name IN ('begin_checkout','purchase') AND GA.ga_session_id IS NOT NULL AND D.device_os IS NOT NULL - AND GA.event_date BETWEEN end_date AND input_date - GROUP BY GA.user_pseudo_id, GA.event_date, GA.event_timestamp + GROUP BY GA.user_pseudo_id, feature_date, GA.event_timestamp ) - GROUP BY user_pseudo_id, event_date -); - -CREATE TEMP TABLE cart_to_purchase AS ( -SELECT - GA.user_pseudo_id, - GA.event_date, - CASE WHEN (MAX(r.begun_checkout) AND NOT MAX(r.has_purchased)) THEN True ELSE False END as has_abandoned_cart, -FROM `{{mds_project_id}}.{{mds_dataset}}.event` AS GA -INNER JOIN `{{mds_project_id}}.{{mds_dataset}}.device` as D - ON GA.device_type_id = D.device_type_id -LEFT JOIN returned_cart_to_purchase AS r -ON r.user_pseudo_id = GA.user_pseudo_id AND r.event_date = GA.event_date -WHERE GA.ga_session_id IS NOT NULL - AND D.device_os IS NOT NULL - AND GA.event_date BETWEEN end_date AND input_date -GROUP BY user_pseudo_id, event_date -); + GROUP BY user_pseudo_id, feature_date + ); --- What is the user-events-per-day event-scoped metrics performance? -CREATE TEMP TABLE user_events_per_day_event_scoped as ( -SELECT - user_pseudo_id, - event_date, - IFNULL(SUM(add_to_cart_per_session),0) as add_to_carts, - IFNULL(SAFE_DIVIDE( - SUM(add_to_cart_per_session), - SUM(view_item_per_session) - ), 0.0) AS cart_to_view_rate, - IFNULL(SUM(checkouts_per_session),0) AS checkouts, - IFNULL(SUM(ecommerce_purchases_per_session),0) AS ecommerce_purchases, - IFNULL(SUM(ecommerce_quantity_per_session),0) AS ecommerce_quantity, - IFNULL(SUM(ecommerce_revenue_per_session),0) AS ecommerce_revenue, - --IFNULL(SUM(item_discount_per_session),0) AS item_discount_amount, - IFNULL(SUM(item_revenue_per_session),0.0) AS item_revenue, - IFNULL(SUM(item_quantity_per_session),0) AS item_quantity, - IFNULL(SUM(item_refund_amount_per_session),0.0) AS item_refund_amount, - IFNULL(SUM(item_view_events_per_session),0) AS item_view_events, - IFNULL(SUM(items_clicked_in_promotion_per_session),0) AS items_clicked_in_promotion, - IFNULL(SUM(items_clicked_in_list_per_session),0) AS items_clicked_in_list, - IFNULL(SUM(items_checked_out_per_session),0) AS items_checked_out, - IFNULL(SUM(items_added_to_cart_per_session),0) AS items_added_to_cart, - IFNULL(SUM(item_list_click_events_per_session),0) AS item_list_click_events, - IFNULL(SUM(item_list_view_events_per_session),0) AS item_list_view_events, - IFNULL(SUM(purchase_revenue_per_session),0.0) AS purchase_revenue, - IFNULL(SAFE_DIVIDE( - SUM(user_who_purchased_per_session), - SUM(user_who_viewed_items_per_session) - ), 0.0) AS purchase_to_view_rate, - IFNULL(SUM(refunds_per_session),0.0) AS refunds, - IFNULL(SAFE_DIVIDE( - SUM(transactions_per_session), - SUM(purchasers_per_session) - ), 0.0) AS transactions_per_purchaser, - IFNULL(SAFE_DIVIDE( - SUM(ecommerce_purchases_per_session), - COUNT(DISTINCT event_timestamp)) - ,0.0) AS user_conversion_rate -FROM ( + CREATE OR REPLACE TEMP TABLE cart_to_purchase AS ( SELECT - user_pseudo_id, - event_date, - event_timestamp, - ga_session_id as session_id, - COUNTIF(event_name='add_to_cart') as add_to_cart_per_session, - COUNTIF(event_name='view_item') as view_item_per_session, - COUNTIF(event_name='begin_checkout') as checkouts_per_session, - COUNTIF(event_name='purchase' AND ecommerce.transaction_id IS NOT NULL) as ecommerce_purchases_per_session, - SUM(CASE WHEN event_name='purchase' AND ecommerce.transaction_id IS NOT NULL THEN ecommerce.total_item_quantity ELSE 0 END) as ecommerce_quantity_per_session, - SUM(CASE WHEN event_name='purchase' AND ecommerce.transaction_id IS NOT NULL THEN ecommerce.purchase_revenue_in_usd ELSE 0 END) as ecommerce_revenue_per_session, - --SUM((select SUM(discount*quantity) from unnest(items) where event_name = 'purchase' AND ecommerce.transaction_id IS NOT NULL AND discount IS NOT NULL)) as item_discount_per_session, - SUM((select SUM(item_revenue) from unnest(items) where event_name = 'purchase' AND ecommerce.transaction_id IS NOT NULL AND item_revenue IS NOT NULL)) as item_revenue_per_session, - SUM((select SUM(quantity) from unnest(items) where event_name = 'purchase' AND ecommerce.transaction_id IS NOT NULL AND quantity IS NOT NULL)) as item_quantity_per_session, - SUM((select item_refund from unnest(items) where event_name = 'purchase' AND ecommerce.transaction_id IS NOT NULL AND item_refund IS NOT NULL)) as item_refund_amount_per_session, - COUNTIF(event_name='view_item') as item_view_events_per_session, - SUM((select COUNT(DISTINCT item_id) from unnest(items) where event_name='view_item' AND (promotion_id IS NOT NULL OR promotion_name IS NOT NULL))) as items_clicked_in_promotion_per_session, - SUM((select COUNT(DISTINCT item_id) from unnest(items) where event_name='click' AND (item_list_id IS NOT NULL OR item_list_name IS NOT NULL))) as items_clicked_in_list_per_session, - SUM((select COUNT(DISTINCT item_id) from unnest(items) where event_name='begin_checkout' AND item_id IS NOT NULL)) as items_checked_out_per_session, - SUM((select COUNT(DISTINCT item_id) from unnest(items) where event_name='add_to_cart' AND item_id IS NOT NULL)) as items_added_to_cart_per_session, - SUM((select COUNT(DISTINCT item_list_id) from unnest(items) where event_name='click' AND (item_list_id IS NOT NULL OR item_list_name IS NOT NULL))) as item_list_click_events_per_session, - SUM((select COUNT(DISTINCT item_list_id) from unnest(items) where event_name='view_item_list' AND (item_list_id IS NOT NULL OR item_list_name IS NOT NULL))) as item_list_view_events_per_session, - (SUM(CASE WHEN event_name='purchase' AND ecommerce.transaction_id IS NOT NULL THEN ecommerce.purchase_revenue_in_usd ELSE 0.0 END) - SUM(CASE WHEN event_name='refund' AND ecommerce.transaction_id IS NOT NULL THEN ecommerce.refund_value_in_usd ELSE 0.0 END)) as purchase_revenue_per_session, - COUNT(DISTINCT CASE WHEN event_name='purchase' AND ecommerce.transaction_id IS NOT NULL THEN user_pseudo_id END) as user_who_purchased_per_session, - COUNT(DISTINCT CASE WHEN event_name='view_item' THEN user_pseudo_id END) as user_who_viewed_items_per_session, - SUM(CASE WHEN event_name='refund' AND ecommerce.transaction_id IS NOT NULL THEN 1 ELSE 0 END) as refunds_per_session, - COUNT(DISTINCT CASE WHEN ecommerce.transaction_id IS NOT NULL THEN ecommerce.transaction_id END) as transactions_per_session, - COUNT(DISTINCT CASE WHEN event_name='purchase' AND ecommerce.transaction_id IS NOT NULL THEN user_pseudo_id END) as purchasers_per_session, - FROM - `{{mds_project_id}}.{{mds_dataset}}.event` as E + GA.user_pseudo_id, + input_date as feature_date, + CASE WHEN (MAX(r.begun_checkout) AND NOT MAX(r.has_purchased)) THEN True ELSE False END as has_abandoned_cart, + FROM `{{mds_project_id}}.{{mds_dataset}}.event` AS GA INNER JOIN `{{mds_project_id}}.{{mds_dataset}}.device` as D - ON E.device_type_id = D.device_type_id - WHERE E.ga_session_id IS NOT NULL + ON GA.device_type_id = D.device_type_id + CROSS JOIN dates_interval as DI + LEFT JOIN returned_cart_to_purchase AS r + ON r.user_pseudo_id = GA.user_pseudo_id AND r.feature_date = DI.input_date + WHERE event_date BETWEEN DI.end_date AND DI.input_date + AND GA.ga_session_id IS NOT NULL AND D.device_os IS NOT NULL - AND E.event_date BETWEEN end_date AND input_date - GROUP BY - E.user_pseudo_id, - E.ga_session_id, - E.event_date, - E.event_timestamp) - GROUP BY user_pseudo_id, event_date); + GROUP BY user_pseudo_id, feature_date + ); --- All users in the platform -CREATE TEMP TABLE events_users_days as ( -SELECT DISTINCT -Users.user_pseudo_id, -Days.event_date -FROM `{{mds_project_id}}.{{mds_dataset}}.event` Users -CROSS JOIN -(SELECT DISTINCT event_date FROM `{{mds_project_id}}.{{mds_dataset}}.event`) Days -INNER JOIN `{{mds_project_id}}.{{mds_dataset}}.device` as D -ON Users.device_type_id = D.device_type_id -WHERE Users.ga_session_id IS NOT NULL -AND D.device_os IS NOT NULL -AND Days.event_date BETWEEN end_date AND input_date -); + -- What is the user-events-per-day event-scoped metrics performance? + CREATE OR REPLACE TEMP TABLE user_events_per_day_event_scoped as ( + SELECT + user_pseudo_id, + feature_date, + IFNULL(SUM(add_to_cart_per_session),0) as add_to_carts, + IFNULL(SAFE_DIVIDE( + SUM(add_to_cart_per_session), + SUM(view_item_per_session) + ), 0.0) AS cart_to_view_rate, + IFNULL(SUM(checkouts_per_session),0) AS checkouts, + IFNULL(SUM(ecommerce_purchases_per_session),0) AS ecommerce_purchases, + IFNULL(SUM(ecommerce_quantity_per_session),0) AS ecommerce_quantity, + IFNULL(SUM(ecommerce_revenue_per_session),0) AS ecommerce_revenue, + --IFNULL(SUM(item_discount_per_session),0) AS item_discount_amount, + IFNULL(SUM(item_revenue_per_session),0.0) AS item_revenue, + IFNULL(SUM(item_quantity_per_session),0) AS item_quantity, + IFNULL(SUM(item_refund_amount_per_session),0.0) AS item_refund_amount, + IFNULL(SUM(item_view_events_per_session),0) AS item_view_events, + IFNULL(SUM(items_clicked_in_promotion_per_session),0) AS items_clicked_in_promotion, + IFNULL(SUM(items_clicked_in_list_per_session),0) AS items_clicked_in_list, + IFNULL(SUM(items_checked_out_per_session),0) AS items_checked_out, + IFNULL(SUM(items_added_to_cart_per_session),0) AS items_added_to_cart, + IFNULL(SUM(item_list_click_events_per_session),0) AS item_list_click_events, + IFNULL(SUM(item_list_view_events_per_session),0) AS item_list_view_events, + IFNULL(SUM(purchase_revenue_per_session),0.0) AS purchase_revenue, + IFNULL(SAFE_DIVIDE( + SUM(user_who_purchased_per_session), + SUM(user_who_viewed_items_per_session) + ), 0.0) AS purchase_to_view_rate, + IFNULL(SUM(refunds_per_session),0.0) AS refunds, + IFNULL(SAFE_DIVIDE( + SUM(transactions_per_session), + SUM(purchasers_per_session) + ), 0.0) AS transactions_per_purchaser, + IFNULL(SAFE_DIVIDE( + SUM(ecommerce_purchases_per_session), + COUNT(DISTINCT event_timestamp)) + ,0.0) AS user_conversion_rate + FROM ( + SELECT + user_pseudo_id, + input_date as feature_date, + event_timestamp, + ga_session_id as session_id, + COUNTIF(event_name='add_to_cart') as add_to_cart_per_session, + COUNTIF(event_name='view_item') as view_item_per_session, + COUNTIF(event_name='begin_checkout') as checkouts_per_session, + COUNTIF(event_name='purchase' AND ecommerce.transaction_id IS NOT NULL) as ecommerce_purchases_per_session, + SUM(CASE WHEN event_name='purchase' AND ecommerce.transaction_id IS NOT NULL THEN ecommerce.total_item_quantity ELSE 0 END) as ecommerce_quantity_per_session, + SUM(CASE WHEN event_name='purchase' AND ecommerce.transaction_id IS NOT NULL THEN ecommerce.purchase_revenue_in_usd ELSE 0 END) as ecommerce_revenue_per_session, + --SUM((select SUM(discount*quantity) from unnest(items) where event_name = 'purchase' AND ecommerce.transaction_id IS NOT NULL AND discount IS NOT NULL)) as item_discount_per_session, + SUM((select SUM(item_revenue) from unnest(items) where event_name = 'purchase' AND ecommerce.transaction_id IS NOT NULL AND item_revenue IS NOT NULL)) as item_revenue_per_session, + SUM((select SUM(quantity) from unnest(items) where event_name = 'purchase' AND ecommerce.transaction_id IS NOT NULL AND quantity IS NOT NULL)) as item_quantity_per_session, + SUM((select item_refund from unnest(items) where event_name = 'purchase' AND ecommerce.transaction_id IS NOT NULL AND item_refund IS NOT NULL)) as item_refund_amount_per_session, + COUNTIF(event_name='view_item') as item_view_events_per_session, + SUM((select COUNT(DISTINCT item_id) from unnest(items) where event_name='view_item' AND (promotion_id IS NOT NULL OR promotion_name IS NOT NULL))) as items_clicked_in_promotion_per_session, + SUM((select COUNT(DISTINCT item_id) from unnest(items) where event_name='click' AND (item_list_id IS NOT NULL OR item_list_name IS NOT NULL))) as items_clicked_in_list_per_session, + SUM((select COUNT(DISTINCT item_id) from unnest(items) where event_name='begin_checkout' AND item_id IS NOT NULL)) as items_checked_out_per_session, + SUM((select COUNT(DISTINCT item_id) from unnest(items) where event_name='add_to_cart' AND item_id IS NOT NULL)) as items_added_to_cart_per_session, + SUM((select COUNT(DISTINCT item_list_id) from unnest(items) where event_name='click' AND (item_list_id IS NOT NULL OR item_list_name IS NOT NULL))) as item_list_click_events_per_session, + SUM((select COUNT(DISTINCT item_list_id) from unnest(items) where event_name='view_item_list' AND (item_list_id IS NOT NULL OR item_list_name IS NOT NULL))) as item_list_view_events_per_session, + (SUM(CASE WHEN event_name='purchase' AND ecommerce.transaction_id IS NOT NULL THEN ecommerce.purchase_revenue_in_usd ELSE 0.0 END) - SUM(CASE WHEN event_name='refund' AND ecommerce.transaction_id IS NOT NULL THEN ecommerce.refund_value_in_usd ELSE 0.0 END)) as purchase_revenue_per_session, + COUNT(DISTINCT CASE WHEN event_name='purchase' AND ecommerce.transaction_id IS NOT NULL THEN user_pseudo_id END) as user_who_purchased_per_session, + COUNT(DISTINCT CASE WHEN event_name='view_item' THEN user_pseudo_id END) as user_who_viewed_items_per_session, + SUM(CASE WHEN event_name='refund' AND ecommerce.transaction_id IS NOT NULL THEN 1 ELSE 0 END) as refunds_per_session, + COUNT(DISTINCT CASE WHEN ecommerce.transaction_id IS NOT NULL THEN ecommerce.transaction_id END) as transactions_per_session, + COUNT(DISTINCT CASE WHEN event_name='purchase' AND ecommerce.transaction_id IS NOT NULL THEN user_pseudo_id END) as purchasers_per_session, + FROM + `{{mds_project_id}}.{{mds_dataset}}.event` as E + INNER JOIN `{{mds_project_id}}.{{mds_dataset}}.device` as D + ON E.device_type_id = D.device_type_id + CROSS JOIN dates_interval as DI + WHERE event_date BETWEEN DI.end_date AND DI.input_date + AND E.ga_session_id IS NOT NULL + AND D.device_os IS NOT NULL + GROUP BY + E.user_pseudo_id, + E.ga_session_id, + feature_date, + E.event_timestamp) + GROUP BY user_pseudo_id, feature_date + ); -CREATE TEMP TABLE DataForTargetTable AS -SELECT -CURRENT_TIMESTAMP() AS processed_timestamp, -input_date as feature_date, -EUD.user_pseudo_id, -EPDSS.engagement_rate, -EPDSS.engaged_sessions_per_user, -EPDSS.session_conversion_rate, -EPDSS.bounces, -EPDSS.bounce_rate_per_user, -EPDSS.sessions_per_user, -EPDSS.avg_views_per_session, -EPDSS.sum_engagement_time_seconds, -EPDSS.avg_engagement_time_seconds, -EPDSS.new_visits, -EPDSS.returning_visits, -UEPDES.add_to_carts, -UEPDES.cart_to_view_rate, -UEPDES.checkouts, -UEPDES.ecommerce_purchases, -UEPDES.ecommerce_quantity, -UEPDES.ecommerce_revenue, -UEPDES.item_revenue, -UEPDES.item_quantity, -UEPDES.item_refund_amount, -UEPDES.item_view_events, -UEPDES.items_clicked_in_promotion, -UEPDES.items_clicked_in_list, -UEPDES.items_checked_out, -UEPDES.items_added_to_cart, -UEPDES.item_list_click_events, -UEPDES.item_list_view_events, -UEPDES.purchase_revenue, -UEPDES.purchase_to_view_rate, -UEPDES.refunds, -UEPDES.transactions_per_purchaser, -UEPDES.user_conversion_rate, -R.how_many_purchased_before, -CP.has_abandoned_cart -FROM events_users_days EUD -INNER JOIN engagement_per_day_session_scoped EPDSS -ON EUD.user_pseudo_id = EPDSS.user_pseudo_id AND EUD.event_date = EPDSS.event_date -INNER JOIN user_events_per_day_event_scoped UEPDES -ON UEPDES.user_pseudo_id = EPDSS.user_pseudo_id AND UEPDES.event_date = EPDSS.event_date -INNER JOIN repeated_purchase R -ON R.user_pseudo_id = EPDSS.user_pseudo_id AND R.event_date = EPDSS.event_date -INNER JOIN cart_to_purchase CP -ON CP.user_pseudo_id = EPDSS.user_pseudo_id AND CP.event_date = EPDSS.event_date; + -- All users in the platform + CREATE OR REPLACE TEMP TABLE events_users_days as ( + SELECT DISTINCT + Users.user_pseudo_id, + DI.input_date as feature_date + FROM `{{mds_project_id}}.{{mds_dataset}}.event` Users + INNER JOIN `{{mds_project_id}}.{{mds_dataset}}.device` as D + ON Users.device_type_id = D.device_type_id + CROSS JOIN dates_interval as DI + WHERE event_date BETWEEN DI.end_date AND DI.input_date + AND Users.ga_session_id IS NOT NULL + AND D.device_os IS NOT NULL + ); -INSERT INTO `{{project_id}}.{{dataset}}.{{insert_table}}` -SELECT * FROM DataForTargetTable; + INSERT INTO `{{project_id}}.{{dataset}}.{{insert_table}}` + SELECT + CURRENT_TIMESTAMP() AS processed_timestamp, + EUD.feature_date, + EUD.user_pseudo_id, + EPDSS.engagement_rate, + EPDSS.engaged_sessions_per_user, + EPDSS.session_conversion_rate, + EPDSS.bounces, + EPDSS.bounce_rate_per_user, + EPDSS.sessions_per_user, + EPDSS.avg_views_per_session, + EPDSS.sum_engagement_time_seconds, + EPDSS.avg_engagement_time_seconds, + EPDSS.new_visits, + EPDSS.returning_visits, + UEPDES.add_to_carts, + UEPDES.cart_to_view_rate, + UEPDES.checkouts, + UEPDES.ecommerce_purchases, + UEPDES.ecommerce_quantity, + UEPDES.ecommerce_revenue, + UEPDES.item_revenue, + UEPDES.item_quantity, + UEPDES.item_refund_amount, + UEPDES.item_view_events, + UEPDES.items_clicked_in_promotion, + UEPDES.items_clicked_in_list, + UEPDES.items_checked_out, + UEPDES.items_added_to_cart, + UEPDES.item_list_click_events, + UEPDES.item_list_view_events, + UEPDES.purchase_revenue, + UEPDES.purchase_to_view_rate, + UEPDES.refunds, + UEPDES.transactions_per_purchaser, + UEPDES.user_conversion_rate, + R.how_many_purchased_before, + CP.has_abandoned_cart + FROM events_users_days EUD + INNER JOIN engagement_per_day_session_scoped EPDSS + ON EUD.user_pseudo_id = EPDSS.user_pseudo_id AND EUD.feature_date = EPDSS.feature_date + INNER JOIN user_events_per_day_event_scoped UEPDES + ON UEPDES.user_pseudo_id = EPDSS.user_pseudo_id AND UEPDES.feature_date = EPDSS.feature_date + INNER JOIN repeated_purchase R + ON R.user_pseudo_id = EPDSS.user_pseudo_id AND R.feature_date = EPDSS.feature_date + INNER JOIN cart_to_purchase CP + ON CP.user_pseudo_id = EPDSS.user_pseudo_id AND CP.feature_date = EPDSS.feature_date + ; SET rows_added = (SELECT COUNT(DISTINCT user_pseudo_id) FROM `{{project_id}}.{{dataset}}.{{insert_table}}`); - -DROP TABLE engagement_per_day_session_scoped; -DROP TABLE returning_customers; -DROP TABLE non_returning_customers; -DROP TABLE combined; -DROP TABLE repeated_purchase; -DROP TABLE returned_cart_to_purchase; -DROP TABLE cart_to_purchase; -DROP TABLE user_events_per_day_event_scoped; -DROP TABLE events_users_days; -DROP TABLE DataForTargetTable; \ No newline at end of file diff --git a/sql/query/invoke_backfill_customer_lifetime_value_label.sqlx b/sql/query/invoke_backfill_customer_lifetime_value_label.sqlx index d710b9fe..2a11c8de 100644 --- a/sql/query/invoke_backfill_customer_lifetime_value_label.sqlx +++ b/sql/query/invoke_backfill_customer_lifetime_value_label.sqlx @@ -12,26 +12,79 @@ -- See the License for the specific language governing permissions and -- limitations under the License. -DECLARE input_date DATE; -DECLARE end_date DATE; -DECLARE rows_added INT64 DEFAULT NULL; - DECLARE max_date DATE; DECLARE min_date DATE; SET max_date = (SELECT DATE_SUB(MAX(event_date), INTERVAL {{interval_max_date}} DAY) FROM `{{mds_project_id}}.{{mds_dataset}}.event`); SET min_date = (SELECT DATE_ADD(MIN(event_date), INTERVAL {{interval_min_date}} DAY) FROM `{{mds_project_id}}.{{mds_dataset}}.event`); -SELECT max_date; -SELECT min_date; -FOR record IN - (SELECT DISTINCT - event_date - FROM `{{mds_project_id}}.{{mds_dataset}}.event` - WHERE event_date BETWEEN min_date AND max_date - ORDER BY event_date ASC ) -DO - SET input_date= (SELECT record.event_date); - SET end_date= (SELECT DATE_ADD(record.event_date, INTERVAL {{interval_end_date}} DAY)); - CALL `{{project_id}}.{{dataset}}.{{stored_procedure}}`(input_date, end_date, rows_added); - SELECT rows_added; -END FOR; \ No newline at end of file +CREATE OR REPLACE TEMP TABLE dates_interval as ( + SELECT DISTINCT + event_date as input_date, + DATE_ADD(event_date, INTERVAL {{interval_end_date}} DAY) as end_date + FROM `{{mds_project_id}}.{{mds_dataset}}.event` + WHERE event_date BETWEEN min_date AND max_date + ORDER BY input_date DESC +); + + +## All users in the platform +CREATE OR REPLACE TEMP TABLE events_users_days as ( + SELECT DISTINCT + Users.user_pseudo_id + FROM `{{mds_project_id}}.{{mds_dataset}}.event` Users + INNER JOIN `{{mds_project_id}}.{{mds_dataset}}.device` as D + ON Users.device_type_id = D.device_type_id + WHERE Users.ga_session_id IS NOT NULL + AND D.device_os IS NOT NULL + AND Users.event_date BETWEEN min_date AND max_date +); + +## Future User metrics: today future purchases per user, 1-30-day future purchases per user, 1-90-day future purchases per user, 1–180-day future purchases per user +CREATE OR REPLACE TEMP TABLE future_revenue_per_user AS ( + SELECT + user_pseudo_id, + input_date as feature_date, + MAX(GREATEST( + SUM(DISTINCT CASE DATE_DIFF(event_date, input_date, DAY) = 0 WHEN TRUE THEN IFNULL(ecommerce.purchase_revenue_in_usd,0.0) END), + MAX(DISTINCT CASE DATE_DIFF(event_date, input_date, DAY) = 0 WHEN TRUE THEN IFNULL(user_ltv_revenue,0.0) END) + )) + OVER(PARTITION BY user_pseudo_id) AS user_ltv_revenue_today, + MAX(GREATEST( + SUM(DISTINCT CASE DATE_DIFF(event_date, input_date, DAY) BETWEEN 1 AND 30 WHEN TRUE THEN IFNULL(ecommerce.purchase_revenue_in_usd,0.0) END), + MAX(DISTINCT CASE DATE_DIFF(event_date, input_date, DAY) BETWEEN 1 AND 30 WHEN TRUE THEN IFNULL(user_ltv_revenue,0.0) END) + )) + OVER(PARTITION BY user_pseudo_id) AS user_ltv_revenue_1_30, + MAX(GREATEST( + SUM(DISTINCT CASE DATE_DIFF(event_date, input_date, DAY) BETWEEN 1 AND 90 WHEN TRUE THEN IFNULL(ecommerce.purchase_revenue_in_usd,0.0) END), + MAX(DISTINCT CASE DATE_DIFF(event_date, input_date, DAY) BETWEEN 1 AND 90 WHEN TRUE THEN IFNULL(user_ltv_revenue,0.0) END) + )) + OVER(PARTITION BY user_pseudo_id) AS user_ltv_revenue_1_90, + MAX(GREATEST( + SUM(DISTINCT CASE DATE_DIFF(event_date, input_date, DAY) BETWEEN 1 AND 180 WHEN TRUE THEN IFNULL(ecommerce.purchase_revenue_in_usd,0.0) END), + MAX(DISTINCT CASE DATE_DIFF(event_date, input_date, DAY) BETWEEN 1 AND 180 WHEN TRUE THEN IFNULL(user_ltv_revenue,0.0) END) + )) + OVER(PARTITION BY user_pseudo_id) AS user_ltv_revenue_1_180 + FROM `{{mds_project_id}}.{{mds_dataset}}.event` as E + INNER JOIN `{{mds_project_id}}.{{mds_dataset}}.device` as D + ON E.device_type_id = D.device_type_id + CROSS JOIN dates_interval as DI + WHERE E.event_date BETWEEN input_date AND end_date + AND E.ga_session_id IS NOT NULL + AND ecommerce.transaction_id IS NOT NULL + AND ecommerce.transaction_id <> '(not set)' + AND D.device_os IS NOT NULL + GROUP BY user_pseudo_id, feature_date +); + +INSERT INTO `{{project_id}}.{{dataset}}.{{insert_table}}` + SELECT DISTINCT + CURRENT_TIMESTAMP() AS processed_timestamp, + B.feature_date, + A.user_pseudo_id, + GREATEST(0.0, COALESCE(COALESCE(B.user_ltv_revenue_1_30,0.0) - COALESCE(B.user_ltv_revenue_today,0.0), 0.0)) AS pltv_revenue_30_days, + GREATEST(0.0, COALESCE(COALESCE(B.user_ltv_revenue_1_90,0.0) - COALESCE(B.user_ltv_revenue_today,0.0), 0.0)) AS pltv_revenue_90_days, + GREATEST(0.0, COALESCE(COALESCE(B.user_ltv_revenue_1_180,0.0) - COALESCE(B.user_ltv_revenue_today,0.0), 0.0)) AS pltv_revenue_180_days, + FROM events_users_days AS A + LEFT JOIN future_revenue_per_user AS B + ON A.user_pseudo_id = B.user_pseudo_id +; \ No newline at end of file diff --git a/sql/query/invoke_backfill_purchase_propensity_label.sqlx b/sql/query/invoke_backfill_purchase_propensity_label.sqlx index d710b9fe..1da23501 100644 --- a/sql/query/invoke_backfill_purchase_propensity_label.sqlx +++ b/sql/query/invoke_backfill_purchase_propensity_label.sqlx @@ -12,26 +12,99 @@ -- See the License for the specific language governing permissions and -- limitations under the License. -DECLARE input_date DATE; -DECLARE end_date DATE; -DECLARE rows_added INT64 DEFAULT NULL; - DECLARE max_date DATE; DECLARE min_date DATE; SET max_date = (SELECT DATE_SUB(MAX(event_date), INTERVAL {{interval_max_date}} DAY) FROM `{{mds_project_id}}.{{mds_dataset}}.event`); SET min_date = (SELECT DATE_ADD(MIN(event_date), INTERVAL {{interval_min_date}} DAY) FROM `{{mds_project_id}}.{{mds_dataset}}.event`); -SELECT max_date; -SELECT min_date; -FOR record IN - (SELECT DISTINCT - event_date - FROM `{{mds_project_id}}.{{mds_dataset}}.event` - WHERE event_date BETWEEN min_date AND max_date - ORDER BY event_date ASC ) -DO - SET input_date= (SELECT record.event_date); - SET end_date= (SELECT DATE_ADD(record.event_date, INTERVAL {{interval_end_date}} DAY)); - CALL `{{project_id}}.{{dataset}}.{{stored_procedure}}`(input_date, end_date, rows_added); - SELECT rows_added; -END FOR; \ No newline at end of file +CREATE OR REPLACE TEMP TABLE dates_interval as ( + SELECT DISTINCT + event_date as input_date, + DATE_ADD(event_date, INTERVAL {{interval_end_date}} DAY) as end_date + FROM `{{mds_project_id}}.{{mds_dataset}}.event` + WHERE event_date BETWEEN min_date AND max_date + ORDER BY input_date DESC +); + +## All users in the platform +CREATE OR REPLACE TEMP TABLE all_users_possible_purchases as ( + SELECT DISTINCT + Users.user_pseudo_id, + DI.event_date as feature_date, + NULL as purchase_day_1, + NULL as purchase_day_2, + NULL as purchase_day_3, + NULL as purchase_day_4, + NULL as purchase_day_5, + NULL as purchase_day_6, + NULL as purchase_day_7, + NULL as purchase_day_8, + NULL as purchase_day_9, + NULL as purchase_day_10, + NULL as purchase_day_11, + NULL as purchase_day_12, + NULL as purchase_day_13, + NULL as purchase_day_14, + FROM `{{mds_project_id}}.{{mds_dataset}}.event` Users + CROSS JOIN (SELECT event_date FROM UNNEST(GENERATE_DATE_ARRAY(min_date, max_date, INTERVAL 1 DAY)) as event_date) as DI + WHERE Users.event_name='purchase' + AND Users.ga_session_id IS NOT NULL + AND Users.ecommerce.transaction_id IS NOT NULL + AND Users.ecommerce.transaction_id <> '(not set)' + ); + + ## Future User metrics: 1-7-day future purchases per user, 1-15-day future purchases per user, 1-30-day future purchases per user, 1–90-day future purchases per user + CREATE OR REPLACE TEMP TABLE future_purchases_per_user AS ( + SELECT + user_pseudo_id, + input_date as feature_date, + MAX(COUNT(DISTINCT CASE DATE_DIFF(event_date, input_date, DAY) = 1 WHEN TRUE THEN ecommerce.transaction_id END)) OVER(PARTITION BY user_pseudo_id, input_date) AS purchase_day_1, + MAX(COUNT(DISTINCT CASE DATE_DIFF(event_date, input_date, DAY) = 2 WHEN TRUE THEN ecommerce.transaction_id END)) OVER(PARTITION BY user_pseudo_id, input_date) AS purchase_day_2, + MAX(COUNT(DISTINCT CASE DATE_DIFF(event_date, input_date, DAY) = 3 WHEN TRUE THEN ecommerce.transaction_id END)) OVER(PARTITION BY user_pseudo_id, input_date) AS purchase_day_3, + MAX(COUNT(DISTINCT CASE DATE_DIFF(event_date, input_date, DAY) = 4 WHEN TRUE THEN ecommerce.transaction_id END)) OVER(PARTITION BY user_pseudo_id, input_date) AS purchase_day_4, + MAX(COUNT(DISTINCT CASE DATE_DIFF(event_date, input_date, DAY) = 5 WHEN TRUE THEN ecommerce.transaction_id END)) OVER(PARTITION BY user_pseudo_id, input_date) AS purchase_day_5, + MAX(COUNT(DISTINCT CASE DATE_DIFF(event_date, input_date, DAY) = 6 WHEN TRUE THEN ecommerce.transaction_id END)) OVER(PARTITION BY user_pseudo_id, input_date) AS purchase_day_6, + MAX(COUNT(DISTINCT CASE DATE_DIFF(event_date, input_date, DAY) = 7 WHEN TRUE THEN ecommerce.transaction_id END)) OVER(PARTITION BY user_pseudo_id, input_date) AS purchase_day_7, + MAX(COUNT(DISTINCT CASE DATE_DIFF(event_date, input_date, DAY) = 8 WHEN TRUE THEN ecommerce.transaction_id END)) OVER(PARTITION BY user_pseudo_id, input_date) AS purchase_day_8, + MAX(COUNT(DISTINCT CASE DATE_DIFF(event_date, input_date, DAY) = 9 WHEN TRUE THEN ecommerce.transaction_id END)) OVER(PARTITION BY user_pseudo_id, input_date) AS purchase_day_9, + MAX(COUNT(DISTINCT CASE DATE_DIFF(event_date, input_date, DAY) = 10 WHEN TRUE THEN ecommerce.transaction_id END)) OVER(PARTITION BY user_pseudo_id, input_date) AS purchase_day_10, + MAX(COUNT(DISTINCT CASE DATE_DIFF(event_date, input_date, DAY) = 11 WHEN TRUE THEN ecommerce.transaction_id END)) OVER(PARTITION BY user_pseudo_id, input_date) AS purchase_day_11, + MAX(COUNT(DISTINCT CASE DATE_DIFF(event_date, input_date, DAY) = 12 WHEN TRUE THEN ecommerce.transaction_id END)) OVER(PARTITION BY user_pseudo_id, input_date) AS purchase_day_12, + MAX(COUNT(DISTINCT CASE DATE_DIFF(event_date, input_date, DAY) = 13 WHEN TRUE THEN ecommerce.transaction_id END)) OVER(PARTITION BY user_pseudo_id, input_date) AS purchase_day_13, + MAX(COUNT(DISTINCT CASE DATE_DIFF(event_date, input_date, DAY) = 14 WHEN TRUE THEN ecommerce.transaction_id END)) OVER(PARTITION BY user_pseudo_id, input_date) AS purchase_day_14 + FROM `{{mds_project_id}}.{{mds_dataset}}.event` as E + INNER JOIN `{{mds_project_id}}.{{mds_dataset}}.device` as D + ON E.device_type_id = D.device_type_id + CROSS JOIN dates_interval as DI + WHERE E.event_date BETWEEN DI.input_date AND DI.end_date + AND E.event_name='purchase' + AND E.ga_session_id IS NOT NULL + AND E.ecommerce.transaction_id IS NOT NULL + AND E.ecommerce.transaction_id <> '(not set)' + AND D.device_os IS NOT NULL + GROUP BY user_pseudo_id, feature_date +); + +INSERT INTO `{{project_id}}.{{dataset}}.{{insert_table}}` +SELECT DISTINCT + CURRENT_TIMESTAMP() AS processed_timestamp, + A.feature_date, + A.user_pseudo_id, + LEAST(COALESCE(B.purchase_day_1, 0), 1) AS purchase_day_1, + LEAST(COALESCE(B.purchase_day_2, 0), 1) AS purchase_day_2, + LEAST(COALESCE(B.purchase_day_3, 0), 1) AS purchase_day_3, + LEAST(COALESCE(B.purchase_day_4, 0), 1) AS purchase_day_4, + LEAST(COALESCE(B.purchase_day_5, 0), 1) AS purchase_day_5, + LEAST(COALESCE(B.purchase_day_6, 0), 1) AS purchase_day_6, + LEAST(COALESCE(B.purchase_day_7, 0), 1) AS purchase_day_7, + LEAST(COALESCE(B.purchase_day_8, 0), 1) AS purchase_day_8, + LEAST(COALESCE(B.purchase_day_9, 0), 1) AS purchase_day_9, + LEAST(COALESCE(B.purchase_day_10, 0), 1) AS purchase_day_10, + LEAST(COALESCE(B.purchase_day_11, 0), 1) AS purchase_day_11, + LEAST(COALESCE(B.purchase_day_12, 0), 1) AS purchase_day_12, + LEAST(COALESCE(B.purchase_day_13, 0), 1) AS purchase_day_13, + LEAST(COALESCE(B.purchase_day_14, 0), 1) AS purchase_day_14, +FROM all_users_possible_purchases AS A +LEFT JOIN future_purchases_per_user AS B +ON B.user_pseudo_id = A.user_pseudo_id +; \ No newline at end of file diff --git a/sql/query/invoke_backfill_user_dimensions.sqlx b/sql/query/invoke_backfill_user_dimensions.sqlx index 60967f9e..8f0bcd67 100644 --- a/sql/query/invoke_backfill_user_dimensions.sqlx +++ b/sql/query/invoke_backfill_user_dimensions.sqlx @@ -12,26 +12,116 @@ -- See the License for the specific language governing permissions and -- limitations under the License. -DECLARE input_date DATE; -DECLARE end_date DATE; -DECLARE users_added INT64 DEFAULT NULL; - DECLARE max_date DATE; DECLARE min_date DATE; SET max_date = (SELECT DATE_SUB(MAX(event_date), INTERVAL {{interval_max_date}} DAY) FROM `{{mds_project_id}}.{{mds_dataset}}.event`); SET min_date = (SELECT DATE_ADD(MIN(event_date), INTERVAL {{interval_min_date}} DAY) FROM `{{mds_project_id}}.{{mds_dataset}}.event`); -SELECT max_date; -SELECT min_date; -FOR record IN - (SELECT DISTINCT - event_date as event_date - FROM `{{mds_project_id}}.{{mds_dataset}}.event` - WHERE event_date BETWEEN min_date AND max_date - ORDER BY event_date ASC ) -DO - SET input_date= (SELECT record.event_date); - SET end_date= (SELECT DATE_SUB(record.event_date, INTERVAL {{interval_end_date}} DAY)); - CALL `{{project_id}}.{{dataset}}.{{stored_procedure}}`(input_date, end_date, users_added); - SELECT users_added; -END FOR; \ No newline at end of file +CREATE OR REPLACE TEMP TABLE dates_interval as ( + SELECT DISTINCT + event_date as input_date, + DATE_SUB(event_date, INTERVAL {{interval_end_date}} DAY) as end_date + FROM `{{mds_project_id}}.{{mds_dataset}}.event` + WHERE event_date BETWEEN min_date AND max_date + ORDER BY input_date DESC) +; + +CREATE OR REPLACE TEMP TABLE user_dimensions_event_session_scoped as ( + SELECT DISTINCT + DI.input_date as feature_date, + user_pseudo_id, + LAST_VALUE(format_date('%m',event_date)) OVER(PARTITION BY user_pseudo_id, DI.input_date ORDER BY event_timestamp DESC) AS month_of_the_year, + LAST_VALUE(format_date('%U',event_date)) OVER(PARTITION BY user_pseudo_id, DI.input_date ORDER BY event_timestamp DESC) AS week_of_the_year, + LAST_VALUE(format_date('%d',event_date)) OVER(PARTITION BY user_pseudo_id, DI.input_date ORDER BY event_timestamp DESC) AS day_of_the_month, + LAST_VALUE(format_date('%w',event_date)) OVER(PARTITION BY user_pseudo_id, DI.input_date ORDER BY event_timestamp DESC) AS day_of_week, + --LAST_VALUE(format("%02d",extract(hour from event_timestamp))) OVER(PARTITION BY user_pseudo_id, DI.input_date ORDER BY event_timestamp DESC) as hour_of_day, + --LAST_VALUE(DATE_DIFF(event_date, DI.input_date, DAY)) OVER(PARTITION BY user_pseudo_id, DI.input_date ORDER BY event_timestamp DESC) AS nth_day, + --LAST_VALUE(TIMESTAMP_DIFF(event_timestamp, TIMESTAMP(DI.input_date), HOUR)) OVER(PARTITION BY user_pseudo_id, DI.input_date ORDER BY event_timestamp DESC) AS nth_hour, + --LAST_VALUE(DATE_DIFF(event_date, DI.input_date, WEEK)) OVER(PARTITION BY user_pseudo_id, DI.input_date ORDER BY event_timestamp DESC) AS nth_week, + --LAST_VALUE(DATE_DIFF(event_date, DI.input_date, MONTH)) OVER(PARTITION BY user_pseudo_id, DI.input_date ORDER BY event_timestamp DESC) AS nth_month, + LAST_VALUE(user_ltv_revenue) OVER(PARTITION BY user_pseudo_id, DI.input_date ORDER BY event_timestamp DESC) AS user_ltv_revenue, + LAST_VALUE(T.traffic_source_medium) OVER(PARTITION BY user_pseudo_id, DI.input_date ORDER BY event_timestamp DESC) AS last_traffic_source_medium, + LAST_VALUE(T.traffic_source_name) OVER(PARTITION BY user_pseudo_id, DI.input_date ORDER BY event_timestamp DESC) AS last_traffic_source_name, + LAST_VALUE(T.traffic_source) OVER(PARTITION BY user_pseudo_id, DI.input_date ORDER BY event_timestamp DESC) AS last_traffic_source_source, + LAST_VALUE(CASE WHEN (TIMESTAMP_DIFF(event_timestamp, user_first_touch_timestamp, DAY) < 7) OR (user_first_touch_timestamp IS NULL) THEN 'new' ELSE 'existing' END) OVER(PARTITION BY user_pseudo_id, DI.input_date ORDER BY event_timestamp DESC) AS new_or_established_user, + LAST_VALUE(L.subcontinent) OVER(PARTITION BY user_pseudo_id, DI.input_date ORDER BY event_timestamp DESC) as geo_sub_continent, + LAST_VALUE(L.country) OVER(PARTITION BY user_pseudo_id, DI.input_date ORDER BY event_timestamp DESC) as geo_country, + LAST_VALUE(L.region) OVER(PARTITION BY user_pseudo_id, DI.input_date ORDER BY event_timestamp DESC) as geo_region, + LAST_VALUE(L.city) OVER(PARTITION BY user_pseudo_id, DI.input_date ORDER BY event_timestamp DESC) as geo_city, + LAST_VALUE(L.metro) OVER(PARTITION BY user_pseudo_id, DI.input_date ORDER BY event_timestamp DESC) as geo_metro, + FIRST_VALUE(T.traffic_source_medium) OVER(PARTITION BY user_pseudo_id, DI.input_date ORDER BY event_timestamp ASC) AS first_traffic_source_medium, + FIRST_VALUE(T.traffic_source_name) OVER(PARTITION BY user_pseudo_id, DI.input_date ORDER BY event_timestamp ASC) AS first_traffic_source_name, + FIRST_VALUE(T.traffic_source) OVER(PARTITION BY user_pseudo_id, DI.input_date ORDER BY event_timestamp ASC) AS first_traffic_source_source, + MAX(CASE WHEN user_id IS NOT NULL THEN TRUE ELSE FALSE END) OVER(PARTITION BY user_pseudo_id, DI.input_date ORDER BY event_timestamp ASC) AS has_signed_in_with_user_id, + LAST_VALUE(device_category) OVER(PARTITION BY user_pseudo_id, DI.input_date ORDER BY event_timestamp DESC) as device_category, + LAST_VALUE(device_mobile_brand_name) OVER(PARTITION BY user_pseudo_id, DI.input_date ORDER BY event_timestamp DESC) as device_mobile_brand_name, + LAST_VALUE(device_mobile_model_name) OVER(PARTITION BY user_pseudo_id, DI.input_date ORDER BY event_timestamp DESC) as device_mobile_model_name, + LAST_VALUE(device_os) OVER(PARTITION BY user_pseudo_id, DI.input_date ORDER BY event_timestamp DESC) as device_os, + --LAST_VALUE(device_os_version) OVER(PARTITION BY user_pseudo_id, DI.input_date ORDER BY event_timestamp DESC) as device_os_version, + LAST_VALUE(SPLIT(device_os_version, '.')[OFFSET(0)]) OVER(PARTITION BY user_pseudo_id, DI.input_date ORDER BY event_timestamp DESC) as device_os_version, + LAST_VALUE(language) OVER(PARTITION BY user_pseudo_id, DI.input_date ORDER BY event_timestamp DESC) AS device_language, + LAST_VALUE(device_web_browser) OVER(PARTITION BY user_pseudo_id, DI.input_date ORDER BY event_timestamp DESC) as device_web_browser, + --LAST_VALUE(browser_version) OVER(PARTITION BY user_pseudo_id, DI.input_date ORDER BY event_timestamp DESC) as device_web_browser_version, + LAST_VALUE(SPLIT(device_web_browser_version, '.')[OFFSET(0)]) OVER(PARTITION BY user_pseudo_id, DI.input_date ORDER BY event_timestamp DESC) as device_web_browser_version, + LAST_VALUE(device_advertising_id) OVER(PARTITION BY user_pseudo_id, DI.input_date ORDER BY event_timestamp DESC) as device_advertising_id + FROM `{{mds_project_id}}.{{mds_dataset}}.event` as E + INNER JOIN `{{mds_project_id}}.{{mds_dataset}}.traffic_source` as T + ON E.traffic_source_id = T.traffic_source_id + INNER JOIN `{{mds_project_id}}.{{mds_dataset}}.device` as D + ON E.device_type_id = D.device_type_id + INNER JOIN `{{mds_project_id}}.{{mds_dataset}}.location` as L + ON E.location_id = L.location_id + CROSS JOIN dates_interval as DI + WHERE E.event_date BETWEEN DI.end_date AND DI.input_date + AND ga_session_id IS NOT NULL + AND D.device_os IS NOT NULL +); + +-- All users in the platform +CREATE OR REPLACE TEMP TABLE events_users as ( + SELECT DISTINCT + Users.user_pseudo_id, + Users.user_id, + DI.event_date as feature_date + FROM `{{mds_project_id}}.{{mds_dataset}}.event` Users + CROSS JOIN (SELECT event_date FROM UNNEST(GENERATE_DATE_ARRAY(min_date, max_date, INTERVAL 1 DAY)) as event_date) as DI + WHERE ga_session_id IS NOT NULL) +; + + +INSERT INTO `{{project_id}}.{{dataset}}.{{insert_table}}` + SELECT DISTINCT + CURRENT_TIMESTAMP() AS processed_timestamp, + EU.feature_date, + EU.user_pseudo_id, + MAX(EU.user_id) as user_id, + MAX(UDESS.month_of_the_year) as month_of_the_year, + MAX(UDESS.week_of_the_year) as week_of_the_year, + MAX(UDESS.day_of_the_month) as day_of_the_month, + MAX(UDESS.day_of_week) as day_of_week, + MAX(UDESS.user_ltv_revenue) as user_ltv_revenue, + MAX(UDESS.device_category) as device_category, + MAX(UDESS.device_mobile_brand_name) as device_mobile_brand_name, + MAX(UDESS.device_mobile_model_name) as device_mobile_model_name, + MAX(UDESS.device_os) as device_os, + MAX(UDESS.device_os_version) as device_os_version, + MAX(UDESS.device_language) as device_language, + MAX(UDESS.device_web_browser) as device_web_browser, + MAX(UDESS.device_web_browser_version) as device_web_browser_version, + APPROX_TOP_COUNT(UDESS.geo_sub_continent, 1)[OFFSET(0)].value as geo_sub_continent, + APPROX_TOP_COUNT(UDESS.geo_country, 1)[OFFSET(0)].value as geo_country, + APPROX_TOP_COUNT(UDESS.geo_region, 1)[OFFSET(0)].value as geo_region, + APPROX_TOP_COUNT(UDESS.geo_city, 1)[OFFSET(0)].value as geo_city, + APPROX_TOP_COUNT(UDESS.geo_metro, 1)[OFFSET(0)].value as geo_metro, + MAX(UDESS.last_traffic_source_medium) as last_traffic_source_medium, + MAX(UDESS.last_traffic_source_name) as last_traffic_source_name, + MAX(UDESS.last_traffic_source_source) as last_traffic_source_source, + MAX(UDESS.first_traffic_source_medium) as first_traffic_source_medium, + MAX(UDESS.first_traffic_source_name) as first_traffic_source_name, + MAX(UDESS.first_traffic_source_source) as first_traffic_source_source, + MAX(UDESS.has_signed_in_with_user_id) as has_signed_in_with_user_id + FROM events_users EU + INNER JOIN user_dimensions_event_session_scoped UDESS + ON EU.user_pseudo_id = UDESS.user_pseudo_id AND EU.feature_date = UDESS.feature_date --AND EU.user_id = UDESS.user_id + GROUP BY EU.feature_date, EU.user_pseudo_id +; \ No newline at end of file diff --git a/sql/query/invoke_backfill_user_lifetime_dimensions.sqlx b/sql/query/invoke_backfill_user_lifetime_dimensions.sqlx index b4127d93..987e2b85 100644 --- a/sql/query/invoke_backfill_user_lifetime_dimensions.sqlx +++ b/sql/query/invoke_backfill_user_lifetime_dimensions.sqlx @@ -12,26 +12,117 @@ -- See the License for the specific language governing permissions and -- limitations under the License. -DECLARE input_date DATE; -DECLARE end_date DATE; -DECLARE users_added INT64 DEFAULT NULL; - DECLARE max_date DATE; DECLARE min_date DATE; SET max_date = (SELECT DATE_SUB(MAX(event_date), INTERVAL {{interval_max_date}} DAY) FROM `{{mds_project_id}}.{{mds_dataset}}.event`); SET min_date = (SELECT DATE_ADD(MIN(event_date), INTERVAL {{interval_min_date}} DAY) FROM `{{mds_project_id}}.{{mds_dataset}}.event`); -SELECT max_date; -SELECT min_date; -FOR record IN - (SELECT DISTINCT - event_date - FROM `{{mds_project_id}}.{{mds_dataset}}.event` - WHERE event_date BETWEEN min_date AND max_date - ORDER BY event_date ASC ) -DO - SET input_date= (SELECT record.event_date); - SET end_date= (SELECT DATE_SUB(record.event_date, INTERVAL {{interval_end_date}} DAY)); - CALL `{{project_id}}.{{dataset}}.{{stored_procedure}}`(input_date, end_date, users_added); - SELECT users_added; -END FOR; \ No newline at end of file +CREATE OR REPLACE TEMP TABLE dates_interval as ( + SELECT DISTINCT + event_date as input_date, + DATE_SUB(event_date, INTERVAL {{interval_end_date}} DAY) as end_date + FROM `{{mds_project_id}}.{{mds_dataset}}.event` + WHERE event_date BETWEEN min_date AND max_date + ORDER BY input_date DESC +); + +CREATE OR REPLACE TEMP TABLE user_dimensions_event_session_scoped as ( + SELECT DISTINCT + DI.input_date as feature_date, + user_pseudo_id, + LAST_VALUE(format_date('%m',event_date)) OVER(PARTITION BY user_pseudo_id, DI.input_date ORDER BY event_timestamp DESC) AS month_of_the_year, + LAST_VALUE(format_date('%U',event_date)) OVER(PARTITION BY user_pseudo_id, DI.input_date ORDER BY event_timestamp DESC) AS week_of_the_year, + LAST_VALUE(format_date('%d',event_date)) OVER(PARTITION BY user_pseudo_id, DI.input_date ORDER BY event_timestamp DESC) AS day_of_the_month, + LAST_VALUE(format_date('%w',event_date)) OVER(PARTITION BY user_pseudo_id, DI.input_date ORDER BY event_timestamp DESC) AS day_of_week, + --LAST_VALUE(format("%02d",extract(hour from event_timestamp))) OVER(PARTITION BY user_pseudo_id, DI.input_date ORDER BY event_timestamp DESC) as hour_of_day, + --LAST_VALUE(DATE_DIFF(event_date, DI.input_date, DAY)) OVER(PARTITION BY user_pseudo_id, DI.input_date ORDER BY event_timestamp DESC) AS nth_day, + --LAST_VALUE(TIMESTAMP_DIFF(event_timestamp, TIMESTAMP(DI.input_date), HOUR)) OVER(PARTITION BY user_pseudo_id, DI.input_date ORDER BY event_timestamp DESC) AS nth_hour, + --LAST_VALUE(DATE_DIFF(event_date, DI.input_date, WEEK)) OVER(PARTITION BY user_pseudo_id, DI.input_date ORDER BY event_timestamp DESC) AS nth_week, + --LAST_VALUE(DATE_DIFF(event_date, DI.input_date, MONTH)) OVER(PARTITION BY user_pseudo_id, DI.input_date ORDER BY event_timestamp DESC) AS nth_month, + LAST_VALUE(user_ltv_revenue) OVER(PARTITION BY user_pseudo_id, DI.input_date ORDER BY event_timestamp DESC) AS user_ltv_revenue, + LAST_VALUE(T.traffic_source_medium) OVER(PARTITION BY user_pseudo_id, DI.input_date ORDER BY event_timestamp DESC) AS last_traffic_source_medium, + LAST_VALUE(T.traffic_source_name) OVER(PARTITION BY user_pseudo_id, DI.input_date ORDER BY event_timestamp DESC) AS last_traffic_source_name, + LAST_VALUE(T.traffic_source) OVER(PARTITION BY user_pseudo_id, DI.input_date ORDER BY event_timestamp DESC) AS last_traffic_source_source, + LAST_VALUE(CASE WHEN (TIMESTAMP_DIFF(event_timestamp, user_first_touch_timestamp, DAY) < 7) OR (user_first_touch_timestamp IS NULL) THEN 'new' ELSE 'existing' END) OVER(PARTITION BY user_pseudo_id, DI.input_date ORDER BY event_timestamp DESC) AS new_or_established_user, + LAST_VALUE(L.subcontinent) OVER(PARTITION BY user_pseudo_id, DI.input_date ORDER BY event_timestamp DESC) as geo_sub_continent, + LAST_VALUE(L.country) OVER(PARTITION BY user_pseudo_id, DI.input_date ORDER BY event_timestamp DESC) as geo_country, + LAST_VALUE(L.region) OVER(PARTITION BY user_pseudo_id, DI.input_date ORDER BY event_timestamp DESC) as geo_region, + LAST_VALUE(L.city) OVER(PARTITION BY user_pseudo_id, DI.input_date ORDER BY event_timestamp DESC) as geo_city, + LAST_VALUE(L.metro) OVER(PARTITION BY user_pseudo_id, DI.input_date ORDER BY event_timestamp DESC) as geo_metro, + FIRST_VALUE(T.traffic_source_medium) OVER(PARTITION BY user_pseudo_id, DI.input_date ORDER BY event_timestamp ASC) AS first_traffic_source_medium, + FIRST_VALUE(T.traffic_source_name) OVER(PARTITION BY user_pseudo_id, DI.input_date ORDER BY event_timestamp ASC) AS first_traffic_source_name, + FIRST_VALUE(T.traffic_source) OVER(PARTITION BY user_pseudo_id, DI.input_date ORDER BY event_timestamp ASC) AS first_traffic_source_source, + MAX(CASE WHEN user_id IS NOT NULL THEN TRUE ELSE FALSE END) OVER(PARTITION BY user_pseudo_id, DI.input_date ORDER BY event_timestamp ASC) AS has_signed_in_with_user_id, + LAST_VALUE(device_category) OVER(PARTITION BY user_pseudo_id, DI.input_date ORDER BY event_timestamp DESC) as device_category, + LAST_VALUE(device_mobile_brand_name) OVER(PARTITION BY user_pseudo_id, DI.input_date ORDER BY event_timestamp DESC) as device_mobile_brand_name, + LAST_VALUE(device_mobile_model_name) OVER(PARTITION BY user_pseudo_id, DI.input_date ORDER BY event_timestamp DESC) as device_mobile_model_name, + LAST_VALUE(device_os) OVER(PARTITION BY user_pseudo_id, DI.input_date ORDER BY event_timestamp DESC) as device_os, + --LAST_VALUE(device_os_version) OVER(PARTITION BY user_pseudo_id, DI.input_date ORDER BY event_timestamp DESC) as device_os_version, + LAST_VALUE(SPLIT(device_os_version, '.')[OFFSET(0)]) OVER(PARTITION BY user_pseudo_id, DI.input_date ORDER BY event_timestamp DESC) as device_os_version, + LAST_VALUE(language) OVER(PARTITION BY user_pseudo_id, DI.input_date ORDER BY event_timestamp DESC) AS device_language, + LAST_VALUE(device_web_browser) OVER(PARTITION BY user_pseudo_id, DI.input_date ORDER BY event_timestamp DESC) as device_web_browser, + --LAST_VALUE(browser_version) OVER(PARTITION BY user_pseudo_id, DI.input_date ORDER BY event_timestamp DESC) as device_web_browser_version, + LAST_VALUE(SPLIT(device_web_browser_version, '.')[OFFSET(0)]) OVER(PARTITION BY user_pseudo_id, DI.input_date ORDER BY event_timestamp DESC) as device_web_browser_version, + LAST_VALUE(device_advertising_id) OVER(PARTITION BY user_pseudo_id, DI.input_date ORDER BY event_timestamp DESC) as device_advertising_id + FROM `{{mds_project_id}}.{{mds_dataset}}.event` as E + INNER JOIN `{{mds_project_id}}.{{mds_dataset}}.traffic_source` as T + ON E.traffic_source_id = T.traffic_source_id + INNER JOIN `{{mds_project_id}}.{{mds_dataset}}.device` as D + ON E.device_type_id = D.device_type_id + INNER JOIN `{{mds_project_id}}.{{mds_dataset}}.location` as L + ON E.location_id = L.location_id + CROSS JOIN dates_interval as DI + WHERE E.event_date BETWEEN DI.end_date AND DI.input_date + AND ga_session_id IS NOT NULL + AND D.device_os IS NOT NULL + +); + +-- All users in the platform +CREATE OR REPLACE TEMP TABLE events_users as ( + SELECT DISTINCT + Users.user_pseudo_id, + Users.user_id, + DI.event_date as feature_date + FROM `{{mds_project_id}}.{{mds_dataset}}.event` Users + CROSS JOIN (SELECT event_date FROM UNNEST(GENERATE_DATE_ARRAY(min_date, max_date, INTERVAL 1 DAY)) as event_date) as DI + WHERE ga_session_id IS NOT NULL +); + + +INSERT INTO `{{project_id}}.{{dataset}}.{{insert_table}}` + SELECT DISTINCT + CURRENT_TIMESTAMP() AS processed_timestamp, + EU.feature_date, + EU.user_pseudo_id, + MAX(EU.user_id) as user_id, + MAX(UDESS.month_of_the_year) as month_of_the_year, + MAX(UDESS.week_of_the_year) as week_of_the_year, + MAX(UDESS.day_of_the_month) as day_of_the_month, + MAX(UDESS.day_of_week) as day_of_week, + MAX(UDESS.user_ltv_revenue) as user_ltv_revenue, + MAX(UDESS.device_category) as device_category, + MAX(UDESS.device_mobile_brand_name) as device_mobile_brand_name, + MAX(UDESS.device_mobile_model_name) as device_mobile_model_name, + MAX(UDESS.device_os) as device_os, + MAX(UDESS.device_os_version) as device_os_version, + MAX(UDESS.device_language) as device_language, + MAX(UDESS.device_web_browser) as device_web_browser, + MAX(UDESS.device_web_browser_version) as device_web_browser_version, + APPROX_TOP_COUNT(UDESS.geo_sub_continent, 1)[OFFSET(0)].value as geo_sub_continent, + APPROX_TOP_COUNT(UDESS.geo_country, 1)[OFFSET(0)].value as geo_country, + APPROX_TOP_COUNT(UDESS.geo_region, 1)[OFFSET(0)].value as geo_region, + APPROX_TOP_COUNT(UDESS.geo_city, 1)[OFFSET(0)].value as geo_city, + APPROX_TOP_COUNT(UDESS.geo_metro, 1)[OFFSET(0)].value as geo_metro, + MAX(UDESS.last_traffic_source_medium) as last_traffic_source_medium, + MAX(UDESS.last_traffic_source_name) as last_traffic_source_name, + MAX(UDESS.last_traffic_source_source) as last_traffic_source_source, + MAX(UDESS.first_traffic_source_medium) as first_traffic_source_medium, + MAX(UDESS.first_traffic_source_name) as first_traffic_source_name, + MAX(UDESS.first_traffic_source_source) as first_traffic_source_source, + MAX(UDESS.has_signed_in_with_user_id) as has_signed_in_with_user_id + FROM events_users EU + INNER JOIN user_dimensions_event_session_scoped UDESS + ON EU.user_pseudo_id = UDESS.user_pseudo_id AND EU.feature_date = UDESS.feature_date --AND EU.user_id = UDESS.user_id + GROUP BY EU.feature_date, EU.user_pseudo_id +; \ No newline at end of file diff --git a/sql/query/invoke_backfill_user_lookback_metrics.sqlx b/sql/query/invoke_backfill_user_lookback_metrics.sqlx index d710e52c..76c64fc1 100644 --- a/sql/query/invoke_backfill_user_lookback_metrics.sqlx +++ b/sql/query/invoke_backfill_user_lookback_metrics.sqlx @@ -12,23 +12,184 @@ -- See the License for the specific language governing permissions and -- limitations under the License. -DECLARE input_date DATE; -DECLARE end_date DATE; -DECLARE rows_added INT64 DEFAULT NULL; - DECLARE min_date DATE; SET min_date = (SELECT DATE_ADD(MIN(event_date), INTERVAL {{interval_min_date}} DAY) FROM `{{mds_project_id}}.{{mds_dataset}}.event`); -SELECT min_date; - -FOR record IN - (SELECT DISTINCT - event_date - FROM `{{mds_project_id}}.{{mds_dataset}}.event` - WHERE event_date >= min_date - ORDER BY event_date ASC ) -DO - SET input_date= (SELECT record.event_date); - SET end_date= (SELECT DATE_SUB(record.event_date, INTERVAL {{interval_end_date}} DAY)); - CALL `{{project_id}}.{{dataset}}.{{stored_procedure}}`(input_date, end_date, rows_added); - SELECT rows_added; -END FOR; \ No newline at end of file + +CREATE OR REPLACE TEMP TABLE dates_interval as ( + SELECT DISTINCT + event_date as input_date, + DATE_SUB(event_date, INTERVAL {{interval_end_date}} DAY) as end_date + FROM `{{mds_project_id}}.{{mds_dataset}}.event` + WHERE event_date >= min_date + ORDER BY input_date DESC +); + +##All users metrics: 7-day active users, 8–14-day active users +CREATE OR REPLACE TEMP TABLE rolling_active_users AS ( +SELECT + user_pseudo_id, + input_date as feature_date, + MAX(COUNT(DISTINCT CASE DATE_DIFF(input_date, event_date, DAY) BETWEEN 1 AND 7 WHEN TRUE THEN event_date END)) OVER(PARTITION BY user_pseudo_id, input_date) AS active_users_past_1_7_day, + MAX(COUNT(DISTINCT CASE DATE_DIFF(input_date, event_date, DAY) BETWEEN 8 AND 14 WHEN TRUE THEN event_date END)) OVER(PARTITION BY user_pseudo_id, input_date) AS active_users_past_8_14_day +FROM `{{mds_project_id}}.{{mds_dataset}}.event` as E +CROSS JOIN dates_interval as DI +WHERE E.event_date BETWEEN DI.end_date AND DI.input_date +AND engagement_time_msec > 0 +GROUP BY user_pseudo_id, feature_date +); + +## Past User metrics: 1-day purchase per user, 7-day purchases per user, 8-14-day purchases per user +CREATE OR REPLACE TEMP TABLE rolling_purchases_past_days AS ( +SELECT + user_pseudo_id, + input_date as feature_date, + MAX(COUNT(DISTINCT CASE DATE_DIFF(input_date, event_date, DAY) BETWEEN 1 AND 7 WHEN TRUE THEN ecommerce.transaction_id END)) OVER(PARTITION BY user_pseudo_id, input_date) AS purchases_past_1_7_day, + MAX(COUNT(DISTINCT CASE DATE_DIFF(input_date, event_date, DAY) BETWEEN 8 AND 14 WHEN TRUE THEN ecommerce.transaction_id END)) OVER(PARTITION BY user_pseudo_id, input_date) AS purchases_past_8_14_day +FROM `{{mds_project_id}}.{{mds_dataset}}.event` as E +CROSS JOIN dates_interval as DI +WHERE E.event_date BETWEEN DI.end_date AND DI.input_date +AND event_name='purchase' +AND ga_session_id IS NOT NULL +AND ecommerce.transaction_id IS NOT NULL +AND ecommerce.transaction_id <> '(not set)' +GROUP BY user_pseudo_id, feature_date +); + +## Past User metrics: 1-day visits per user, 7-day visits per user, 8-14-day visits per user +CREATE OR REPLACE TEMP TABLE rolling_visits_past_days AS ( +SELECT + user_pseudo_id, + input_date as feature_date, + MAX(COUNT(DISTINCT CASE DATE_DIFF(input_date, event_date, DAY) BETWEEN 1 AND 7 WHEN TRUE THEN ga_session_id END)) OVER(PARTITION BY user_pseudo_id, input_date) AS visits_past_1_7_day, + MAX(COUNT(DISTINCT CASE DATE_DIFF(input_date, event_date, DAY) BETWEEN 8 AND 14 WHEN TRUE THEN ga_session_id END)) OVER(PARTITION BY user_pseudo_id, input_date) AS visits_past_8_14_day +FROM `{{mds_project_id}}.{{mds_dataset}}.event` as E +CROSS JOIN dates_interval as DI +WHERE E.event_date BETWEEN DI.end_date AND DI.input_date +AND ga_session_id IS NOT NULL +GROUP BY user_pseudo_id, feature_date +); + +## Past User metrics: 1-day view_item per user, 7-day view_item per user, 8-14-day view_item per user +CREATE OR REPLACE TEMP TABLE rolling_view_item_past_days AS ( +SELECT + user_pseudo_id, + input_date as feature_date, + MAX(COUNT(DISTINCT CASE DATE_DIFF(input_date, event_date, DAY) BETWEEN 1 AND 7 WHEN TRUE THEN event_timestamp END)) OVER(PARTITION BY user_pseudo_id, input_date) AS view_items_past_1_7_day, + MAX(COUNT(DISTINCT CASE DATE_DIFF(input_date, event_date, DAY) BETWEEN 8 AND 14 WHEN TRUE THEN event_timestamp END)) OVER(PARTITION BY user_pseudo_id, input_date) AS view_items_past_8_14_day +FROM `{{mds_project_id}}.{{mds_dataset}}.event` as E +CROSS JOIN dates_interval as DI +WHERE E.event_date BETWEEN DI.end_date AND DI.input_date +AND event_name='view_item' +AND ga_session_id IS NOT NULL +GROUP BY user_pseudo_id, feature_date +); + +## Past User metrics: 1-day add_to_cart per user, 7-day add_to_cart per user, 8-14-day add_to_cart per user +CREATE OR REPLACE TEMP TABLE rolling_add_to_cart_past_days AS ( +SELECT + user_pseudo_id, + input_date as feature_date, + MAX(COUNT(DISTINCT CASE DATE_DIFF(input_date, event_date, DAY) BETWEEN 1 AND 7 WHEN TRUE THEN event_timestamp END)) OVER(PARTITION BY user_pseudo_id, input_date) AS add_to_carts_past_1_7_day, + MAX(COUNT(DISTINCT CASE DATE_DIFF(input_date, event_date, DAY) BETWEEN 8 AND 14 WHEN TRUE THEN event_timestamp END)) OVER(PARTITION BY user_pseudo_id, input_date) AS add_to_carts_past_8_14_day +FROM `{{mds_project_id}}.{{mds_dataset}}.event` as E +CROSS JOIN dates_interval as DI +WHERE E.event_date BETWEEN DI.end_date AND DI.input_date +AND event_name='add_to_cart' +AND ga_session_id IS NOT NULL +GROUP BY user_pseudo_id, feature_date +); + +## Past User metrics: 1-day checkout per user, 7-day checkout per user, 8-14-day checkout per user +CREATE OR REPLACE TEMP TABLE rolling_checkout_past_days AS ( +SELECT + user_pseudo_id, + input_date as feature_date, + MAX(COUNT(DISTINCT CASE DATE_DIFF(input_date, event_date, DAY) BETWEEN 1 AND 7 WHEN TRUE THEN event_timestamp END)) OVER(PARTITION BY user_pseudo_id, input_date) AS checkouts_past_1_7_day, + MAX(COUNT(DISTINCT CASE DATE_DIFF(input_date, event_date, DAY) BETWEEN 8 AND 14 WHEN TRUE THEN event_timestamp END)) OVER(PARTITION BY user_pseudo_id, input_date) AS checkouts_past_8_14_day +FROM `{{mds_project_id}}.{{mds_dataset}}.event` as E +CROSS JOIN dates_interval as DI +WHERE E.event_date BETWEEN DI.end_date AND DI.input_date +AND event_name='begin_checkout' +AND ga_session_id IS NOT NULL +GROUP BY user_pseudo_id, feature_date +); + +## Past revenue metrics +CREATE OR REPLACE TEMP TABLE rolling_revenue_per_user AS ( +SELECT DISTINCT + user_pseudo_id, + input_date as feature_date, + MAX(GREATEST( + SUM(DISTINCT CASE DATE_DIFF(input_date, event_date, DAY) = 0 WHEN TRUE THEN IFNULL(ecommerce.purchase_revenue_in_usd,0.0) END), + MAX(DISTINCT CASE DATE_DIFF(input_date, event_date, DAY) = 0 WHEN TRUE THEN IFNULL(user_ltv_revenue,0.0) END) + )) + OVER(PARTITION BY user_pseudo_id) AS user_ltv_revenue_past_0_day, + MAX(GREATEST( + SUM(DISTINCT CASE DATE_DIFF(input_date, event_date, DAY) BETWEEN 1 AND 7 WHEN TRUE THEN IFNULL(ecommerce.purchase_revenue_in_usd,0.0) END), + MAX(DISTINCT CASE DATE_DIFF(input_date, event_date, DAY) BETWEEN 1 AND 7 WHEN TRUE THEN IFNULL(user_ltv_revenue,0.0) END) + )) + OVER(PARTITION BY user_pseudo_id) AS user_ltv_revenue_past_1_7_day, + MAX(GREATEST( + SUM(DISTINCT CASE DATE_DIFF(input_date, event_date, DAY) BETWEEN 1 AND 15 WHEN TRUE THEN IFNULL(ecommerce.purchase_revenue_in_usd,0.0) END), + MAX(DISTINCT CASE DATE_DIFF(input_date, event_date, DAY) BETWEEN 1 AND 15 WHEN TRUE THEN IFNULL(user_ltv_revenue,0.0) END) + )) + OVER(PARTITION BY user_pseudo_id) AS user_ltv_revenue_past_1_15_day +FROM `{{mds_project_id}}.{{mds_dataset}}.event` as E +CROSS JOIN dates_interval as DI +WHERE E.event_date BETWEEN DI.end_date AND DI.input_date +AND E.ga_session_id IS NOT NULL +AND ecommerce.transaction_id IS NOT NULL +AND ecommerce.transaction_id <> '(not set)' +GROUP BY user_pseudo_id, feature_date +); + +-- All users in the platform +CREATE OR REPLACE TEMP TABLE events_users as ( +SELECT DISTINCT + Users.user_pseudo_id, + DI.input_date as feature_date +FROM `{{mds_project_id}}.{{mds_dataset}}.event` Users +INNER JOIN `{{mds_project_id}}.{{mds_dataset}}.device` as D +ON Users.device_type_id = D.device_type_id +CROSS JOIN dates_interval as DI +WHERE Users.ga_session_id IS NOT NULL +AND Users.event_date BETWEEN DI.end_date AND DI.input_date +AND D.device_os IS NOT NULL +); + +INSERT INTO `{{project_id}}.{{dataset}}.{{insert_table}}` + SELECT DISTINCT + CURRENT_TIMESTAMP() AS processed_timestamp, + EUD.feature_date, + EUD.user_pseudo_id, + COALESCE(active_users_past_1_7_day,0) AS active_users_past_1_7_day, + COALESCE(active_users_past_8_14_day,0) AS active_users_past_8_14_day, + COALESCE(purchases_past_1_7_day,0) AS purchases_past_1_7_day, + COALESCE(purchases_past_8_14_day,0) AS purchases_past_8_14_day, + COALESCE(visits_past_1_7_day,0) AS visits_past_1_7_day, + COALESCE(visits_past_8_14_day,0) AS visits_past_8_14_day, + COALESCE(view_items_past_1_7_day,0) AS view_items_past_1_7_day, + COALESCE(view_items_past_8_14_day,0) AS view_items_past_8_14_day, + COALESCE(add_to_carts_past_1_7_day,0) AS add_to_carts_past_1_7_day, + COALESCE(add_to_carts_past_8_14_day,0) AS add_to_carts_past_8_14_day, + COALESCE(checkouts_past_1_7_day,0) AS checkouts_past_1_7_day, + COALESCE(checkouts_past_8_14_day,0) AS checkouts_past_8_14_day, + GREATEST(0.0, COALESCE(COALESCE(G.user_ltv_revenue_past_1_7_day,0.0) - COALESCE(G.user_ltv_revenue_past_0_day,0.0), 0.0)) AS ltv_revenue_past_1_7_day, + GREATEST( 0.0, COALESCE(COALESCE(G.user_ltv_revenue_past_1_15_day,0.0) - COALESCE(G.user_ltv_revenue_past_1_7_day,0.0), 0.0)) AS ltv_revenue_past_7_15_day, + FROM events_users AS EUD + FULL OUTER JOIN rolling_active_users AS A + ON EUD.user_pseudo_id = A.user_pseudo_id AND EUD.feature_date = A.feature_date + FULL OUTER JOIN rolling_purchases_past_days AS B + ON EUD.user_pseudo_id = B.user_pseudo_id AND EUD.feature_date = B.feature_date + FULL OUTER JOIN rolling_visits_past_days AS C + ON EUD.user_pseudo_id = C.user_pseudo_id AND EUD.feature_date = C.feature_date + FULL OUTER JOIN rolling_view_item_past_days AS D + ON EUD.user_pseudo_id = D.user_pseudo_id AND EUD.feature_date = D.feature_date + FULL OUTER JOIN rolling_add_to_cart_past_days AS E + ON EUD.user_pseudo_id = E.user_pseudo_id AND EUD.feature_date = E.feature_date + FULL OUTER JOIN rolling_checkout_past_days AS F + ON EUD.user_pseudo_id = F.user_pseudo_id AND EUD.feature_date = F.feature_date + FULL OUTER JOIN rolling_revenue_per_user AS G + ON EUD.user_pseudo_id = G.user_pseudo_id AND EUD.feature_date = G.feature_date + WHERE EUD.user_pseudo_id IS NOT NULL +; \ No newline at end of file diff --git a/sql/query/invoke_backfill_user_rolling_window_lifetime_metrics.sqlx b/sql/query/invoke_backfill_user_rolling_window_lifetime_metrics.sqlx index 6b110dba..c7c2384e 100644 --- a/sql/query/invoke_backfill_user_rolling_window_lifetime_metrics.sqlx +++ b/sql/query/invoke_backfill_user_rolling_window_lifetime_metrics.sqlx @@ -12,26 +12,240 @@ -- See the License for the specific language governing permissions and -- limitations under the License. -DECLARE input_date DATE; -DECLARE end_date DATE; -DECLARE rows_added INT64 DEFAULT NULL; - DECLARE max_date DATE; DECLARE min_date DATE; SET max_date = (SELECT DATE_SUB(MAX(event_date), INTERVAL {{interval_max_date}} DAY) FROM `{{mds_project_id}}.{{mds_dataset}}.event`); SET min_date = (SELECT DATE_ADD(MIN(event_date), INTERVAL {{interval_min_date}} DAY) FROM `{{mds_project_id}}.{{mds_dataset}}.event`); -SELECT max_date; -SELECT min_date; - -FOR record IN - (SELECT DISTINCT - event_date - FROM `{{mds_project_id}}.{{mds_dataset}}.event` - WHERE event_date BETWEEN min_date AND max_date - ORDER BY event_date ASC ) -DO - SET input_date= (SELECT record.event_date); - SET end_date= (SELECT DATE_SUB(record.event_date, INTERVAL {{interval_end_date}} DAY)); - CALL `{{project_id}}.{{dataset}}.{{stored_procedure}}`(input_date, end_date, rows_added); - SELECT rows_added; -END FOR; \ No newline at end of file + +CREATE OR REPLACE TEMP TABLE dates_interval as ( +SELECT DISTINCT + event_date as input_date, + DATE_SUB(event_date, INTERVAL {{interval_end_date}} DAY) as end_date +FROM `{{mds_project_id}}.{{mds_dataset}}.event` +WHERE event_date BETWEEN min_date AND max_date + ORDER BY input_date DESC +); + +# Run these windows aggregations every day. For each date in training and inference date ranges. +##All users metrics: 7-day active users, 8–30-day active users, 31-90-day active users +CREATE OR REPLACE TEMP TABLE rolling_active_users AS ( +SELECT + user_pseudo_id, + input_date as feature_date, + MAX(COUNT(DISTINCT CASE DATE_DIFF(input_date, event_date, DAY) BETWEEN 1 AND 30 WHEN TRUE THEN event_date END)) OVER(PARTITION BY user_pseudo_id, input_date) AS active_users_past_1_30_day, + MAX(COUNT(DISTINCT CASE DATE_DIFF(input_date, event_date, DAY) BETWEEN 31 AND 60 WHEN TRUE THEN event_date END)) OVER(PARTITION BY user_pseudo_id, input_date) AS active_users_past_30_60_day, + MAX(COUNT(DISTINCT CASE DATE_DIFF(input_date, event_date, DAY) BETWEEN 61 AND 90 WHEN TRUE THEN event_date END)) OVER(PARTITION BY user_pseudo_id, input_date) AS active_users_past_60_90_day, + MAX(COUNT(DISTINCT CASE DATE_DIFF(input_date, event_date, DAY) BETWEEN 91 AND 120 WHEN TRUE THEN event_date END)) OVER(PARTITION BY user_pseudo_id, input_date) AS active_users_past_90_120_day, + MAX(COUNT(DISTINCT CASE DATE_DIFF(input_date, event_date, DAY) BETWEEN 121 AND 150 WHEN TRUE THEN event_date END)) OVER(PARTITION BY user_pseudo_id, input_date) AS active_users_past_120_150_day, + MAX(COUNT(DISTINCT CASE DATE_DIFF(input_date, event_date, DAY) BETWEEN 151 AND 180 WHEN TRUE THEN event_date END)) OVER(PARTITION BY user_pseudo_id, input_date) AS active_users_past_150_180_day + FROM `{{mds_project_id}}.{{mds_dataset}}.event` as E +CROSS JOIN dates_interval as DI +WHERE E.event_date BETWEEN DI.end_date AND DI.input_date +AND E.engagement_time_msec > 0 +GROUP BY user_pseudo_id, feature_date +); + +## Past User metrics: 1-day purchase per user, 2-7-day purchases per user, 8-14-day purchases per user, 15-30-day purchases per user, 31–90-day purchases per user +CREATE OR REPLACE TEMP TABLE rolling_purchases_per_user AS ( +SELECT + user_pseudo_id, + input_date as feature_date, + MAX(COUNT(DISTINCT CASE DATE_DIFF(input_date, event_date, DAY) BETWEEN 1 AND 30 WHEN TRUE THEN ecommerce.transaction_id END)) OVER(PARTITION BY user_pseudo_id, input_date) AS purchases_past_1_30_day, + MAX(COUNT(DISTINCT CASE DATE_DIFF(input_date, event_date, DAY) BETWEEN 31 AND 60 WHEN TRUE THEN ecommerce.transaction_id END)) OVER(PARTITION BY user_pseudo_id, input_date) AS purchases_past_30_60_day, + MAX(COUNT(DISTINCT CASE DATE_DIFF(input_date, event_date, DAY) BETWEEN 61 AND 90 WHEN TRUE THEN ecommerce.transaction_id END)) OVER(PARTITION BY user_pseudo_id, input_date) AS purchases_past_60_90_day, + MAX(COUNT(DISTINCT CASE DATE_DIFF(input_date, event_date, DAY) BETWEEN 91 AND 120 WHEN TRUE THEN ecommerce.transaction_id END)) OVER(PARTITION BY user_pseudo_id, input_date) AS purchases_past_90_120_day, + MAX(COUNT(DISTINCT CASE DATE_DIFF(input_date, event_date, DAY) BETWEEN 121 AND 150 WHEN TRUE THEN ecommerce.transaction_id END)) OVER(PARTITION BY user_pseudo_id, input_date) AS purchases_past_120_150_day, + MAX(COUNT(DISTINCT CASE DATE_DIFF(input_date, event_date, DAY) BETWEEN 151 AND 180 WHEN TRUE THEN ecommerce.transaction_id END)) OVER(PARTITION BY user_pseudo_id, input_date) AS purchases_past_150_180_day +FROM `{{mds_project_id}}.{{mds_dataset}}.event` as E +CROSS JOIN dates_interval as DI +WHERE E.event_date BETWEEN DI.end_date AND DI.input_date +AND event_name='purchase' +AND ga_session_id IS NOT NULL +AND ecommerce.transaction_id IS NOT NULL +AND ecommerce.transaction_id <> '(not set)' +GROUP BY user_pseudo_id, feature_date +); + +## Past User metrics: 1-day visits per user, 2-7-day visits per user, 7-14-day visits per user, 15-30-day visits per user, 31–90-day visits per user +CREATE OR REPLACE TEMP TABLE rolling_visits_per_user AS ( +SELECT + user_pseudo_id, + input_date as feature_date, + MAX(COUNT(DISTINCT CASE DATE_DIFF(input_date, event_date, DAY) BETWEEN 1 AND 31 WHEN TRUE THEN ga_session_id END)) OVER(PARTITION BY user_pseudo_id, input_date) AS visits_past_1_30_day, + MAX(COUNT(DISTINCT CASE DATE_DIFF(input_date, event_date, DAY) BETWEEN 31 AND 61 WHEN TRUE THEN ga_session_id END)) OVER(PARTITION BY user_pseudo_id, input_date) AS visits_past_30_60_day, + MAX(COUNT(DISTINCT CASE DATE_DIFF(input_date, event_date, DAY) BETWEEN 61 AND 91 WHEN TRUE THEN ga_session_id END)) OVER(PARTITION BY user_pseudo_id, input_date) AS visits_past_60_90_day, + MAX(COUNT(DISTINCT CASE DATE_DIFF(input_date, event_date, DAY) BETWEEN 91 AND 121 WHEN TRUE THEN ga_session_id END)) OVER(PARTITION BY user_pseudo_id, input_date) AS visits_past_90_120_day, + MAX(COUNT(DISTINCT CASE DATE_DIFF(input_date, event_date, DAY) BETWEEN 121 AND 151 WHEN TRUE THEN ga_session_id END)) OVER(PARTITION BY user_pseudo_id, input_date) AS visits_past_120_150_day, + MAX(COUNT(DISTINCT CASE DATE_DIFF(input_date, event_date, DAY) BETWEEN 151 AND 181 WHEN TRUE THEN ga_session_id END)) OVER(PARTITION BY user_pseudo_id, input_date) AS visits_past_150_180_day +FROM `{{mds_project_id}}.{{mds_dataset}}.event` as E +CROSS JOIN dates_interval as DI +WHERE E.event_date BETWEEN DI.end_date AND DI.input_date +AND ga_session_id IS NOT NULL +GROUP BY user_pseudo_id, feature_date +); + +## Past User metrics: 1-day view_item per user, 2-7-day view_item per user, 7-14-day view_item per user, 15-30-day view_item per user, 31–90-day view_item per user +CREATE OR REPLACE TEMP TABLE rolling_view_item_per_user AS ( +SELECT + user_pseudo_id, + input_date as feature_date, + MAX(COUNT(DISTINCT CASE DATE_DIFF(input_date, event_date, DAY) BETWEEN 1 AND 31 WHEN TRUE THEN event_timestamp END)) OVER(PARTITION BY user_pseudo_id, input_date) AS view_items_past_1_30_day, + MAX(COUNT(DISTINCT CASE DATE_DIFF(input_date, event_date, DAY) BETWEEN 31 AND 61 WHEN TRUE THEN event_timestamp END)) OVER(PARTITION BY user_pseudo_id, input_date) AS view_items_past_30_60_day, + MAX(COUNT(DISTINCT CASE DATE_DIFF(input_date, event_date, DAY) BETWEEN 61 AND 91 WHEN TRUE THEN event_timestamp END)) OVER(PARTITION BY user_pseudo_id, input_date) AS view_items_past_60_90_day, + MAX(COUNT(DISTINCT CASE DATE_DIFF(input_date, event_date, DAY) BETWEEN 91 AND 121 WHEN TRUE THEN event_timestamp END)) OVER(PARTITION BY user_pseudo_id, input_date) AS view_items_past_90_120_day, + MAX(COUNT(DISTINCT CASE DATE_DIFF(input_date, event_date, DAY) BETWEEN 121 AND 151 WHEN TRUE THEN event_timestamp END)) OVER(PARTITION BY user_pseudo_id, input_date) AS view_items_past_120_150_day, + MAX(COUNT(DISTINCT CASE DATE_DIFF(input_date, event_date, DAY) BETWEEN 151 AND 181 WHEN TRUE THEN event_timestamp END)) OVER(PARTITION BY user_pseudo_id, input_date) AS view_items_past_150_180_day +FROM `{{mds_project_id}}.{{mds_dataset}}.event` as E +CROSS JOIN dates_interval as DI +WHERE E.event_date BETWEEN DI.end_date AND DI.input_date +AND event_name='view_item' +AND ga_session_id IS NOT NULL +GROUP BY user_pseudo_id, feature_date +); + +## Past User metrics: 1-day add_to_cart per user, 2-7-day add_to_cart per user, 7-14-day add_to_cart per user, 15-30-day add_to_cart per user, 31–90-day add_to_cart per user, 1-day checkout per user, 2-7-day checkout per user, 7-14-day checkout per user, 15-30-day checkout per user, 31–90-day checkout per user +CREATE OR REPLACE TEMP TABLE rolling_add_to_cart_per_user AS ( +SELECT + user_pseudo_id, + input_date as feature_date, + MAX(COUNT(DISTINCT CASE DATE_DIFF(input_date, event_date, DAY) BETWEEN 1 AND 31 WHEN TRUE THEN event_timestamp END)) OVER(PARTITION BY user_pseudo_id, input_date) AS add_to_carts_past_1_30_day, + MAX(COUNT(DISTINCT CASE DATE_DIFF(input_date, event_date, DAY) BETWEEN 31 AND 61 WHEN TRUE THEN event_timestamp END)) OVER(PARTITION BY user_pseudo_id, input_date) AS add_to_carts_past_30_60_day, + MAX(COUNT(DISTINCT CASE DATE_DIFF(input_date, event_date, DAY) BETWEEN 61 AND 91 WHEN TRUE THEN event_timestamp END)) OVER(PARTITION BY user_pseudo_id, input_date) AS add_to_carts_past_60_90_day, + MAX(COUNT(DISTINCT CASE DATE_DIFF(input_date, event_date, DAY) BETWEEN 91 AND 121 WHEN TRUE THEN event_timestamp END)) OVER(PARTITION BY user_pseudo_id, input_date) AS add_to_carts_past_90_120_day, + MAX(COUNT(DISTINCT CASE DATE_DIFF(input_date, event_date, DAY) BETWEEN 121 AND 151 WHEN TRUE THEN event_timestamp END)) OVER(PARTITION BY user_pseudo_id, input_date) AS add_to_carts_past_120_150_day, + MAX(COUNT(DISTINCT CASE DATE_DIFF(input_date, event_date, DAY) BETWEEN 151 AND 181 WHEN TRUE THEN event_timestamp END)) OVER(PARTITION BY user_pseudo_id, input_date) AS add_to_carts_past_150_180_day +FROM `{{mds_project_id}}.{{mds_dataset}}.event` as E +CROSS JOIN dates_interval as DI +WHERE E.event_date BETWEEN DI.end_date AND DI.input_date +AND event_name='add_to_cart' +AND ga_session_id IS NOT NULL +GROUP BY user_pseudo_id, feature_date +); + +## Past User metrics: 1-day visits per user, 2-7-day visits per user, 7-14-day visits per user, 15-30-day visits per user, 31–90-day visits per user, 1-day view_item per user, 2-7-day view_item per user, 7-14-day view_item per user, 15-30-day view_item per user, 31–90-day view_item per user, 1-day add_to_cart per user, 2-7-day add_to_cart per user, 7-14-day add_to_cart per user, 15-30-day add_to_cart per user, 31–90-day add_to_cart per user, 1-day checkout per user, 2-7-day checkout per user, 7-14-day checkout per user, 15-30-day checkout per user, 31–90-day checkout per user +CREATE OR REPLACE TEMP TABLE rolling_checkout_per_user AS ( +SELECT + user_pseudo_id, + input_date as feature_date, + MAX(COUNT(DISTINCT CASE DATE_DIFF(input_date, event_date, DAY) BETWEEN 1 AND 31 WHEN TRUE THEN event_timestamp END)) OVER(PARTITION BY user_pseudo_id, input_date) AS checkouts_past_1_30_day, + MAX(COUNT(DISTINCT CASE DATE_DIFF(input_date, event_date, DAY) BETWEEN 31 AND 61 WHEN TRUE THEN event_timestamp END)) OVER(PARTITION BY user_pseudo_id, input_date) AS checkouts_past_30_60_day, + MAX(COUNT(DISTINCT CASE DATE_DIFF(input_date, event_date, DAY) BETWEEN 61 AND 91 WHEN TRUE THEN event_timestamp END)) OVER(PARTITION BY user_pseudo_id, input_date) AS checkouts_past_60_90_day, + MAX(COUNT(DISTINCT CASE DATE_DIFF(input_date, event_date, DAY) BETWEEN 91 AND 121 WHEN TRUE THEN event_timestamp END)) OVER(PARTITION BY user_pseudo_id, input_date) AS checkouts_past_90_120_day, + MAX(COUNT(DISTINCT CASE DATE_DIFF(input_date, event_date, DAY) BETWEEN 121 AND 151 WHEN TRUE THEN event_timestamp END)) OVER(PARTITION BY user_pseudo_id, input_date) AS checkouts_past_120_150_day, + MAX(COUNT(DISTINCT CASE DATE_DIFF(input_date, event_date, DAY) BETWEEN 151 AND 181 WHEN TRUE THEN event_timestamp END)) OVER(PARTITION BY user_pseudo_id, input_date) AS checkouts_past_150_180_day +FROM `{{mds_project_id}}.{{mds_dataset}}.event` as E +CROSS JOIN dates_interval as DI +WHERE E.event_date BETWEEN DI.end_date AND DI.input_date +AND event_name='begin_checkout' +AND ga_session_id IS NOT NULL +GROUP BY user_pseudo_id, feature_date +); + +CREATE OR REPLACE TEMP TABLE rolling_revenue_per_user AS ( +SELECT DISTINCT + user_pseudo_id, + input_date as feature_date, + MAX(GREATEST( + SUM(DISTINCT CASE DATE_DIFF(input_date, event_date, DAY) = 0 WHEN TRUE THEN IFNULL(ecommerce.purchase_revenue_in_usd,0.0) END), + MAX(DISTINCT CASE DATE_DIFF(input_date, event_date, DAY) = 0 WHEN TRUE THEN IFNULL(user_ltv_revenue,0.0) END) + )) + OVER(PARTITION BY user_pseudo_id) AS user_ltv_revenue_past_0_day, + MAX(GREATEST( + SUM(DISTINCT CASE DATE_DIFF(input_date, event_date, DAY) BETWEEN 1 AND 30 WHEN TRUE THEN IFNULL(ecommerce.purchase_revenue_in_usd,0.0) END), + MAX(DISTINCT CASE DATE_DIFF(input_date, event_date, DAY) BETWEEN 1 AND 30 WHEN TRUE THEN IFNULL(user_ltv_revenue,0.0) END) + )) + OVER(PARTITION BY user_pseudo_id) AS user_ltv_revenue_past_1_30_day, + MAX(GREATEST( + SUM(DISTINCT CASE DATE_DIFF(input_date, event_date, DAY) BETWEEN 1 AND 90 WHEN TRUE THEN IFNULL(ecommerce.purchase_revenue_in_usd,0.0) END), + MAX(DISTINCT CASE DATE_DIFF(input_date, event_date, DAY) BETWEEN 1 AND 90 WHEN TRUE THEN IFNULL(user_ltv_revenue,0.0) END) + )) + OVER(PARTITION BY user_pseudo_id) AS user_ltv_revenue_past_1_90_day, + MAX(GREATEST( + SUM(DISTINCT CASE DATE_DIFF(input_date, event_date, DAY) BETWEEN 1 AND 180 WHEN TRUE THEN IFNULL(ecommerce.purchase_revenue_in_usd,0.0) END), + MAX(DISTINCT CASE DATE_DIFF(input_date, event_date, DAY) BETWEEN 1 AND 180 WHEN TRUE THEN IFNULL(user_ltv_revenue,0.0) END) + )) + OVER(PARTITION BY user_pseudo_id) AS user_ltv_revenue_1_180_day +FROM `{{mds_project_id}}.{{mds_dataset}}.event` as E +CROSS JOIN dates_interval as DI +WHERE E.event_date BETWEEN DI.end_date AND DI.input_date +AND E.ga_session_id IS NOT NULL +AND ecommerce.transaction_id IS NOT NULL +AND ecommerce.transaction_id <> '(not set)' +GROUP BY user_pseudo_id, feature_date +); + +## All users in the platform +CREATE OR REPLACE TEMP TABLE events_users as ( +SELECT DISTINCT + Users.user_pseudo_id, + DI.input_date as feature_date +FROM `{{mds_project_id}}.{{mds_dataset}}.event` Users +INNER JOIN `{{mds_project_id}}.{{mds_dataset}}.device` as D +ON Users.device_type_id = D.device_type_id +CROSS JOIN dates_interval as DI +WHERE Users.ga_session_id IS NOT NULL +AND Users.event_date BETWEEN DI.end_date AND DI.input_date +AND D.device_os IS NOT NULL +); + +INSERT INTO `{{project_id}}.{{dataset}}.{{insert_table}}` +SELECT DISTINCT + CURRENT_TIMESTAMP() AS processed_timestamp, + EUD.feature_date, + EUD.user_pseudo_id, + COALESCE(active_users_past_1_30_day,0) AS active_users_past_1_30_day, + COALESCE(active_users_past_30_60_day,0) AS active_users_past_30_60_day, + COALESCE(active_users_past_60_90_day,0) AS active_users_past_60_90_day, + COALESCE(active_users_past_90_120_day,0) AS active_users_past_90_120_day, + COALESCE(active_users_past_120_150_day,0) AS active_users_past_120_150_day, + COALESCE(active_users_past_150_180_day,0) AS active_users_past_150_180_day, + COALESCE(purchases_past_1_30_day,0) AS purchases_past_1_30_day, + COALESCE(purchases_past_30_60_day,0) AS purchases_past_30_60_day, + COALESCE(purchases_past_60_90_day,0) AS purchases_past_60_90_day, + COALESCE(purchases_past_90_120_day,0) AS purchases_past_90_120_day, + COALESCE(purchases_past_120_150_day,0) AS purchases_past_120_150_day, + COALESCE(purchases_past_150_180_day,0) AS purchases_past_150_180_day, + COALESCE(visits_past_1_30_day,0) AS visits_past_1_30_day, + COALESCE(visits_past_30_60_day,0) AS visits_past_30_60_day, + COALESCE(visits_past_60_90_day,0) AS visits_past_60_90_day, + COALESCE(visits_past_90_120_day,0) AS visits_past_90_120_day, + COALESCE(visits_past_120_150_day,0) AS visits_past_120_150_day, + COALESCE(visits_past_150_180_day,0) AS visits_past_150_180_day, + COALESCE(view_items_past_1_30_day,0) AS view_items_past_1_30_day, + COALESCE(view_items_past_30_60_day,0) AS view_items_past_30_60_day, + COALESCE(view_items_past_60_90_day,0) AS view_items_past_60_90_day, + COALESCE(view_items_past_90_120_day,0) AS view_items_past_90_120_day, + COALESCE(view_items_past_120_150_day,0) AS view_items_past_120_150_day, + COALESCE(view_items_past_150_180_day,0) AS view_items_past_150_180_day, + COALESCE(add_to_carts_past_1_30_day,0) AS add_to_carts_past_1_30_day, + COALESCE(add_to_carts_past_30_60_day,0) AS add_to_carts_past_30_60_day, + COALESCE(add_to_carts_past_60_90_day,0) AS add_to_carts_past_60_90_day, + COALESCE(add_to_carts_past_90_120_day,0) AS add_to_carts_past_90_120_day, + COALESCE(add_to_carts_past_120_150_day,0) AS add_to_carts_past_120_150_day, + COALESCE(add_to_carts_past_150_180_day,0) AS add_to_carts_past_150_180_day, + COALESCE(checkouts_past_1_30_day,0) AS checkouts_past_1_30_day, + COALESCE(checkouts_past_30_60_day,0) AS checkouts_past_30_60_day, + COALESCE(checkouts_past_60_90_day,0) AS checkouts_past_60_90_day, + COALESCE(checkouts_past_90_120_day,0) AS checkouts_past_90_120_day, + COALESCE(checkouts_past_120_150_day,0) AS checkouts_past_120_150_day, + COALESCE(checkouts_past_150_180_day,0) AS checkouts_past_150_180_day, + GREATEST(0.0, COALESCE(COALESCE(G.user_ltv_revenue_past_1_30_day,0.0) - COALESCE(G.user_ltv_revenue_past_0_day,0.0), 0.0)) AS ltv_revenue_past_1_30_day, + GREATEST( 0.0, COALESCE(COALESCE(G.user_ltv_revenue_past_1_90_day,0.0) - COALESCE(G.user_ltv_revenue_past_1_30_day,0.0), 0.0)) AS ltv_revenue_past_30_90_day, + GREATEST(0.0, COALESCE(COALESCE(G.user_ltv_revenue_1_180_day,0.0) - COALESCE(G.user_ltv_revenue_past_1_90_day,0.0), 0.0)) AS ltv_revenue_past_90_180_day, + FROM events_users AS EUD + FULL OUTER JOIN rolling_active_users AS A + ON EUD.user_pseudo_id = A.user_pseudo_id AND EUD.feature_date = A.feature_date + FULL OUTER JOIN rolling_purchases_per_user AS B + ON EUD.user_pseudo_id = B.user_pseudo_id AND EUD.feature_date = B.feature_date + FULL OUTER JOIN rolling_visits_per_user AS C + ON EUD.user_pseudo_id = C.user_pseudo_id AND EUD.feature_date = C.feature_date + FULL OUTER JOIN rolling_view_item_per_user AS D + ON EUD.user_pseudo_id = D.user_pseudo_id AND EUD.feature_date = D.feature_date + FULL OUTER JOIN rolling_add_to_cart_per_user AS E + ON EUD.user_pseudo_id = E.user_pseudo_id AND EUD.feature_date = E.feature_date + FULL OUTER JOIN rolling_checkout_per_user AS F + ON EUD.user_pseudo_id = F.user_pseudo_id AND EUD.feature_date = F.feature_date + FULL OUTER JOIN rolling_revenue_per_user AS G + ON EUD.user_pseudo_id = G.user_pseudo_id AND EUD.feature_date = G.feature_date + WHERE EUD.user_pseudo_id IS NOT NULL + ; diff --git a/sql/query/invoke_backfill_user_rolling_window_metrics.sqlx b/sql/query/invoke_backfill_user_rolling_window_metrics.sqlx index 6b110dba..639f6592 100644 --- a/sql/query/invoke_backfill_user_rolling_window_metrics.sqlx +++ b/sql/query/invoke_backfill_user_rolling_window_metrics.sqlx @@ -12,26 +12,238 @@ -- See the License for the specific language governing permissions and -- limitations under the License. -DECLARE input_date DATE; -DECLARE end_date DATE; -DECLARE rows_added INT64 DEFAULT NULL; - DECLARE max_date DATE; DECLARE min_date DATE; SET max_date = (SELECT DATE_SUB(MAX(event_date), INTERVAL {{interval_max_date}} DAY) FROM `{{mds_project_id}}.{{mds_dataset}}.event`); SET min_date = (SELECT DATE_ADD(MIN(event_date), INTERVAL {{interval_min_date}} DAY) FROM `{{mds_project_id}}.{{mds_dataset}}.event`); -SELECT max_date; -SELECT min_date; - -FOR record IN - (SELECT DISTINCT - event_date - FROM `{{mds_project_id}}.{{mds_dataset}}.event` - WHERE event_date BETWEEN min_date AND max_date - ORDER BY event_date ASC ) -DO - SET input_date= (SELECT record.event_date); - SET end_date= (SELECT DATE_SUB(record.event_date, INTERVAL {{interval_end_date}} DAY)); - CALL `{{project_id}}.{{dataset}}.{{stored_procedure}}`(input_date, end_date, rows_added); - SELECT rows_added; -END FOR; \ No newline at end of file + +CREATE OR REPLACE TEMP TABLE dates_interval as ( + SELECT DISTINCT + event_date as input_date, + DATE_SUB(event_date, INTERVAL {{interval_end_date}} DAY) as end_date + FROM `{{mds_project_id}}.{{mds_dataset}}.event` + WHERE event_date BETWEEN min_date AND max_date + ORDER BY input_date DESC +); + +# Run these windows aggregations every day. For each date in training and inference date ranges. +##All users metrics: 7-day active users, 8–30-day active users, 31-90-day active users +CREATE OR REPLACE TEMP TABLE rolling_active_users AS ( + SELECT + user_pseudo_id, + input_date as feature_date, + MAX(COUNT(DISTINCT CASE DATE_DIFF(input_date, event_date, DAY) = 1 WHEN TRUE THEN event_date END)) OVER(PARTITION BY user_pseudo_id, input_date) AS active_users_past_1_day, + MAX(COUNT(DISTINCT CASE DATE_DIFF(input_date, event_date, DAY) = 2 WHEN TRUE THEN event_date END)) OVER(PARTITION BY user_pseudo_id, input_date) AS active_users_past_2_day, + MAX(COUNT(DISTINCT CASE DATE_DIFF(input_date, event_date, DAY) = 3 WHEN TRUE THEN event_date END)) OVER(PARTITION BY user_pseudo_id, input_date) AS active_users_past_3_day, + MAX(COUNT(DISTINCT CASE DATE_DIFF(input_date, event_date, DAY) = 4 WHEN TRUE THEN event_date END)) OVER(PARTITION BY user_pseudo_id, input_date) AS active_users_past_4_day, + MAX(COUNT(DISTINCT CASE DATE_DIFF(input_date, event_date, DAY) = 5 WHEN TRUE THEN event_date END)) OVER(PARTITION BY user_pseudo_id, input_date) AS active_users_past_5_day, + MAX(COUNT(DISTINCT CASE DATE_DIFF(input_date, event_date, DAY) = 6 WHEN TRUE THEN event_date END)) OVER(PARTITION BY user_pseudo_id, input_date) AS active_users_past_6_day, + MAX(COUNT(DISTINCT CASE DATE_DIFF(input_date, event_date, DAY) = 7 WHEN TRUE THEN event_date END)) OVER(PARTITION BY user_pseudo_id, input_date) AS active_users_past_7_day, + MAX(COUNT(DISTINCT CASE DATE_DIFF(input_date, event_date, DAY) BETWEEN 8 AND 14 WHEN TRUE THEN event_date END)) OVER(PARTITION BY user_pseudo_id, input_date) AS active_users_past_8_14_day, + MAX(COUNT(DISTINCT CASE DATE_DIFF(input_date, event_date, DAY) BETWEEN 15 AND 30 WHEN TRUE THEN event_date END)) OVER(PARTITION BY user_pseudo_id, input_date) AS active_users_past_15_30_day + FROM `{{mds_project_id}}.{{mds_dataset}}.event` as E + CROSS JOIN dates_interval as DI + WHERE E.event_date BETWEEN DI.end_date AND DI.input_date + AND E.engagement_time_msec > 0 + GROUP BY user_pseudo_id, feature_date +); + +## Past User metrics: 1-day purchase per user, 2-7-day purchases per user, 8-14-day purchases per user, 15-30-day purchases per user, 31–90-day purchases per user +CREATE OR REPLACE TEMP TABLE rolling_purchases_per_user AS ( + SELECT + user_pseudo_id, + input_date as feature_date, + MAX(COUNT(DISTINCT CASE DATE_DIFF(input_date, event_date, DAY) = 1 WHEN TRUE THEN ecommerce.transaction_id END)) OVER(PARTITION BY user_pseudo_id, input_date) AS purchases_past_1_day, + MAX(COUNT(DISTINCT CASE DATE_DIFF(input_date, event_date, DAY) = 2 WHEN TRUE THEN ecommerce.transaction_id END)) OVER(PARTITION BY user_pseudo_id, input_date) AS purchases_past_2_day, + MAX(COUNT(DISTINCT CASE DATE_DIFF(input_date, event_date, DAY) = 3 WHEN TRUE THEN ecommerce.transaction_id END)) OVER(PARTITION BY user_pseudo_id, input_date) AS purchases_past_3_day, + MAX(COUNT(DISTINCT CASE DATE_DIFF(input_date, event_date, DAY) = 4 WHEN TRUE THEN ecommerce.transaction_id END)) OVER(PARTITION BY user_pseudo_id, input_date) AS purchases_past_4_day, + MAX(COUNT(DISTINCT CASE DATE_DIFF(input_date, event_date, DAY) = 5 WHEN TRUE THEN ecommerce.transaction_id END)) OVER(PARTITION BY user_pseudo_id, input_date) AS purchases_past_5_day, + MAX(COUNT(DISTINCT CASE DATE_DIFF(input_date, event_date, DAY) = 6 WHEN TRUE THEN ecommerce.transaction_id END)) OVER(PARTITION BY user_pseudo_id, input_date) AS purchases_past_6_day, + MAX(COUNT(DISTINCT CASE DATE_DIFF(input_date, event_date, DAY) = 7 WHEN TRUE THEN ecommerce.transaction_id END)) OVER(PARTITION BY user_pseudo_id, input_date) AS purchases_past_7_day, + MAX(COUNT(DISTINCT CASE DATE_DIFF(input_date, event_date, DAY) BETWEEN 8 AND 14 WHEN TRUE THEN ecommerce.transaction_id END)) OVER(PARTITION BY user_pseudo_id, input_date) AS purchases_past_8_14_day, + MAX(COUNT(DISTINCT CASE DATE_DIFF(input_date, event_date, DAY) BETWEEN 15 AND 30 WHEN TRUE THEN ecommerce.transaction_id END)) OVER(PARTITION BY user_pseudo_id, input_date) AS purchases_past_15_30_day, + FROM `{{mds_project_id}}.{{mds_dataset}}.event` as E + CROSS JOIN dates_interval as DI + WHERE E.event_date BETWEEN DI.end_date AND DI.input_date + AND event_name='purchase' + AND ga_session_id IS NOT NULL + AND ecommerce.transaction_id IS NOT NULL + AND ecommerce.transaction_id <> '(not set)' + GROUP BY user_pseudo_id, feature_date +); + +## Past User metrics: 1-day visits per user, 2-7-day visits per user, 7-14-day visits per user, 15-30-day visits per user, 31–90-day visits per user +CREATE OR REPLACE TEMP TABLE rolling_visits_per_user AS ( + SELECT + user_pseudo_id, + input_date as feature_date, + MAX(COUNT(DISTINCT CASE DATE_DIFF(input_date, event_date, DAY) = 1 WHEN TRUE THEN ga_session_id END)) OVER(PARTITION BY user_pseudo_id, input_date) AS visits_past_1_day, + MAX(COUNT(DISTINCT CASE DATE_DIFF(input_date, event_date, DAY) = 2 WHEN TRUE THEN ga_session_id END)) OVER(PARTITION BY user_pseudo_id, input_date) AS visits_past_2_day, + MAX(COUNT(DISTINCT CASE DATE_DIFF(input_date, event_date, DAY) = 3 WHEN TRUE THEN ga_session_id END)) OVER(PARTITION BY user_pseudo_id, input_date) AS visits_past_3_day, + MAX(COUNT(DISTINCT CASE DATE_DIFF(input_date, event_date, DAY) = 4 WHEN TRUE THEN ga_session_id END)) OVER(PARTITION BY user_pseudo_id, input_date) AS visits_past_4_day, + MAX(COUNT(DISTINCT CASE DATE_DIFF(input_date, event_date, DAY) = 5 WHEN TRUE THEN ga_session_id END)) OVER(PARTITION BY user_pseudo_id, input_date) AS visits_past_5_day, + MAX(COUNT(DISTINCT CASE DATE_DIFF(input_date, event_date, DAY) = 6 WHEN TRUE THEN ga_session_id END)) OVER(PARTITION BY user_pseudo_id, input_date) AS visits_past_6_day, + MAX(COUNT(DISTINCT CASE DATE_DIFF(input_date, event_date, DAY) = 7 WHEN TRUE THEN ga_session_id END)) OVER(PARTITION BY user_pseudo_id, input_date) AS visits_past_7_day, + MAX(COUNT(DISTINCT CASE DATE_DIFF(input_date, event_date, DAY) BETWEEN 8 AND 14 WHEN TRUE THEN ga_session_id END)) OVER(PARTITION BY user_pseudo_id, input_date) AS visits_past_8_14_day, + MAX(COUNT(DISTINCT CASE DATE_DIFF(input_date, event_date, DAY) BETWEEN 15 AND 30 WHEN TRUE THEN ga_session_id END)) OVER(PARTITION BY user_pseudo_id, input_date) AS visits_past_15_30_day + FROM `{{mds_project_id}}.{{mds_dataset}}.event` as E + CROSS JOIN dates_interval as DI + WHERE E.event_date BETWEEN DI.end_date AND DI.input_date + AND ga_session_id IS NOT NULL + GROUP BY user_pseudo_id, feature_date +); + +## Past User metrics: 1-day view_item per user, 2-7-day view_item per user, 7-14-day view_item per user, 15-30-day view_item per user, 31–90-day view_item per user +CREATE OR REPLACE TEMP TABLE rolling_view_item_per_user AS ( + SELECT + user_pseudo_id, + input_date as feature_date, + MAX(COUNT(DISTINCT CASE DATE_DIFF(input_date, event_date, DAY) = 1 WHEN TRUE THEN event_timestamp END)) OVER(PARTITION BY user_pseudo_id, input_date) AS view_items_past_1_day, + MAX(COUNT(DISTINCT CASE DATE_DIFF(input_date, event_date, DAY) = 2 WHEN TRUE THEN event_timestamp END)) OVER(PARTITION BY user_pseudo_id, input_date) AS view_items_past_2_day, + MAX(COUNT(DISTINCT CASE DATE_DIFF(input_date, event_date, DAY) = 3 WHEN TRUE THEN event_timestamp END)) OVER(PARTITION BY user_pseudo_id, input_date) AS view_items_past_3_day, + MAX(COUNT(DISTINCT CASE DATE_DIFF(input_date, event_date, DAY) = 4 WHEN TRUE THEN event_timestamp END)) OVER(PARTITION BY user_pseudo_id, input_date) AS view_items_past_4_day, + MAX(COUNT(DISTINCT CASE DATE_DIFF(input_date, event_date, DAY) = 5 WHEN TRUE THEN event_timestamp END)) OVER(PARTITION BY user_pseudo_id, input_date) AS view_items_past_5_day, + MAX(COUNT(DISTINCT CASE DATE_DIFF(input_date, event_date, DAY) = 6 WHEN TRUE THEN event_timestamp END)) OVER(PARTITION BY user_pseudo_id, input_date) AS view_items_past_6_day, + MAX(COUNT(DISTINCT CASE DATE_DIFF(input_date, event_date, DAY) = 7 WHEN TRUE THEN event_timestamp END)) OVER(PARTITION BY user_pseudo_id, input_date) AS view_items_past_7_day, + MAX(COUNT(DISTINCT CASE DATE_DIFF(input_date, event_date, DAY) BETWEEN 8 AND 14 WHEN TRUE THEN event_timestamp END)) OVER(PARTITION BY user_pseudo_id, input_date) AS view_items_past_8_14_day, + MAX(COUNT(DISTINCT CASE DATE_DIFF(input_date, event_date, DAY) BETWEEN 15 AND 30 WHEN TRUE THEN event_timestamp END)) OVER(PARTITION BY user_pseudo_id, input_date) AS view_items_past_15_30_day + FROM `{{mds_project_id}}.{{mds_dataset}}.event` as E + CROSS JOIN dates_interval as DI + WHERE E.event_date BETWEEN DI.end_date AND DI.input_date + AND event_name='view_item' + AND ga_session_id IS NOT NULL + GROUP BY user_pseudo_id, feature_date +); + +## Past User metrics: 1-day add_to_cart per user, 2-7-day add_to_cart per user, 7-14-day add_to_cart per user, 15-30-day add_to_cart per user, 31–90-day add_to_cart per user, 1-day checkout per user, 2-7-day checkout per user, 7-14-day checkout per user, 15-30-day checkout per user, 31–90-day checkout per user +CREATE OR REPLACE TEMP TABLE rolling_add_to_cart_per_user AS ( + SELECT + user_pseudo_id, + input_date as feature_date, + MAX(COUNT(DISTINCT CASE DATE_DIFF(input_date, event_date, DAY) = 1 WHEN TRUE THEN event_timestamp END)) OVER(PARTITION BY user_pseudo_id, input_date) AS add_to_carts_past_1_day, + MAX(COUNT(DISTINCT CASE DATE_DIFF(input_date, event_date, DAY) = 2 WHEN TRUE THEN event_timestamp END)) OVER(PARTITION BY user_pseudo_id, input_date) AS add_to_carts_past_2_day, + MAX(COUNT(DISTINCT CASE DATE_DIFF(input_date, event_date, DAY) = 3 WHEN TRUE THEN event_timestamp END)) OVER(PARTITION BY user_pseudo_id, input_date) AS add_to_carts_past_3_day, + MAX(COUNT(DISTINCT CASE DATE_DIFF(input_date, event_date, DAY) = 4 WHEN TRUE THEN event_timestamp END)) OVER(PARTITION BY user_pseudo_id, input_date) AS add_to_carts_past_4_day, + MAX(COUNT(DISTINCT CASE DATE_DIFF(input_date, event_date, DAY) = 5 WHEN TRUE THEN event_timestamp END)) OVER(PARTITION BY user_pseudo_id, input_date) AS add_to_carts_past_5_day, + MAX(COUNT(DISTINCT CASE DATE_DIFF(input_date, event_date, DAY) = 6 WHEN TRUE THEN event_timestamp END)) OVER(PARTITION BY user_pseudo_id, input_date) AS add_to_carts_past_6_day, + MAX(COUNT(DISTINCT CASE DATE_DIFF(input_date, event_date, DAY) = 7 WHEN TRUE THEN event_timestamp END)) OVER(PARTITION BY user_pseudo_id, input_date) AS add_to_carts_past_7_day, + MAX(COUNT(DISTINCT CASE DATE_DIFF(input_date, event_date, DAY) BETWEEN 8 AND 14 WHEN TRUE THEN event_timestamp END)) OVER(PARTITION BY user_pseudo_id, input_date) AS add_to_carts_past_8_14_day, + MAX(COUNT(DISTINCT CASE DATE_DIFF(input_date, event_date, DAY) BETWEEN 15 AND 30 WHEN TRUE THEN event_timestamp END)) OVER(PARTITION BY user_pseudo_id, input_date) AS add_to_carts_past_15_30_day, + FROM `{{mds_project_id}}.{{mds_dataset}}.event` as E + CROSS JOIN dates_interval as DI + WHERE E.event_date BETWEEN DI.end_date AND DI.input_date + AND event_name='add_to_cart' + AND ga_session_id IS NOT NULL + GROUP BY user_pseudo_id, feature_date +); + +## Past User metrics: 1-day visits per user, 2-7-day visits per user, 7-14-day visits per user, 15-30-day visits per user, 31–90-day visits per user, 1-day view_item per user, 2-7-day view_item per user, 7-14-day view_item per user, 15-30-day view_item per user, 31–90-day view_item per user, 1-day add_to_cart per user, 2-7-day add_to_cart per user, 7-14-day add_to_cart per user, 15-30-day add_to_cart per user, 31–90-day add_to_cart per user, 1-day checkout per user, 2-7-day checkout per user, 7-14-day checkout per user, 15-30-day checkout per user, 31–90-day checkout per user +CREATE OR REPLACE TEMP TABLE rolling_checkout_per_user AS ( + SELECT + user_pseudo_id, + input_date as feature_date, + MAX(COUNT(DISTINCT CASE DATE_DIFF(input_date, event_date, DAY) = 1 WHEN TRUE THEN event_timestamp END)) OVER(PARTITION BY user_pseudo_id, input_date) AS checkouts_past_1_day, + MAX(COUNT(DISTINCT CASE DATE_DIFF(input_date, event_date, DAY) = 2 WHEN TRUE THEN event_timestamp END)) OVER(PARTITION BY user_pseudo_id, input_date) AS checkouts_past_2_day, + MAX(COUNT(DISTINCT CASE DATE_DIFF(input_date, event_date, DAY) = 3 WHEN TRUE THEN event_timestamp END)) OVER(PARTITION BY user_pseudo_id, input_date) AS checkouts_past_3_day, + MAX(COUNT(DISTINCT CASE DATE_DIFF(input_date, event_date, DAY) = 4 WHEN TRUE THEN event_timestamp END)) OVER(PARTITION BY user_pseudo_id, input_date) AS checkouts_past_4_day, + MAX(COUNT(DISTINCT CASE DATE_DIFF(input_date, event_date, DAY) = 5 WHEN TRUE THEN event_timestamp END)) OVER(PARTITION BY user_pseudo_id, input_date) AS checkouts_past_5_day, + MAX(COUNT(DISTINCT CASE DATE_DIFF(input_date, event_date, DAY) = 6 WHEN TRUE THEN event_timestamp END)) OVER(PARTITION BY user_pseudo_id, input_date) AS checkouts_past_6_day, + MAX(COUNT(DISTINCT CASE DATE_DIFF(input_date, event_date, DAY) = 7 WHEN TRUE THEN event_timestamp END)) OVER(PARTITION BY user_pseudo_id, input_date) AS checkouts_past_7_day, + MAX(COUNT(DISTINCT CASE DATE_DIFF(input_date, event_date, DAY) BETWEEN 8 AND 14 WHEN TRUE THEN event_timestamp END)) OVER(PARTITION BY user_pseudo_id, input_date) AS checkouts_past_8_14_day, + MAX(COUNT(DISTINCT CASE DATE_DIFF(input_date, event_date, DAY) BETWEEN 15 AND 30 WHEN TRUE THEN event_timestamp END)) OVER(PARTITION BY user_pseudo_id, input_date) AS checkouts_past_15_30_day + FROM `{{mds_project_id}}.{{mds_dataset}}.event` as E + CROSS JOIN dates_interval as DI + WHERE E.event_date BETWEEN DI.end_date AND DI.input_date + AND event_name='begin_checkout' + AND ga_session_id IS NOT NULL + GROUP BY user_pseudo_id, feature_date +); + +## All users in the platform +CREATE OR REPLACE TEMP TABLE events_users as ( + SELECT DISTINCT + Users.user_pseudo_id, + DI.input_date as feature_date + FROM `{{mds_project_id}}.{{mds_dataset}}.event` Users + INNER JOIN `{{mds_project_id}}.{{mds_dataset}}.device` as D + ON Users.device_type_id = D.device_type_id + CROSS JOIN dates_interval as DI + WHERE Users.ga_session_id IS NOT NULL + AND Users.event_date BETWEEN DI.end_date AND DI.input_date + AND D.device_os IS NOT NULL +); + +INSERT INTO `{{project_id}}.{{dataset}}.{{insert_table}}` + SELECT DISTINCT + CURRENT_TIMESTAMP() AS processed_timestamp, + EUD.feature_date, + EUD.user_pseudo_id, + COALESCE(active_users_past_1_day,0) AS active_users_past_1_day, + COALESCE(active_users_past_2_day,0) AS active_users_past_2_day, + COALESCE(active_users_past_3_day,0) AS active_users_past_3_day, + COALESCE(active_users_past_4_day,0) AS active_users_past_4_day, + COALESCE(active_users_past_5_day,0) AS active_users_past_5_day, + COALESCE(active_users_past_6_day,0) AS active_users_past_6_day, + COALESCE(active_users_past_7_day,0) AS active_users_past_7_day, + COALESCE(active_users_past_8_14_day,0) AS active_users_past_8_14_day, + COALESCE(active_users_past_15_30_day,0) AS active_users_past_15_30_day, + COALESCE(purchases_past_1_day,0) AS purchases_past_1_day, + COALESCE(purchases_past_2_day,0) AS purchases_past_2_day, + COALESCE(purchases_past_3_day,0) AS purchases_past_3_day, + COALESCE(purchases_past_4_day,0) AS purchases_past_4_day, + COALESCE(purchases_past_5_day,0) AS purchases_past_5_day, + COALESCE(purchases_past_6_day,0) AS purchases_past_6_day, + COALESCE(purchases_past_7_day,0) AS purchases_past_7_day, + COALESCE(purchases_past_8_14_day,0) AS purchases_past_8_14_day, + COALESCE(purchases_past_15_30_day,0) AS purchases_past_15_30_day, + COALESCE(visits_past_1_day,0) AS visits_past_1_day, + COALESCE(visits_past_2_day,0) AS visits_past_2_day, + COALESCE(visits_past_3_day,0) AS visits_past_3_day, + COALESCE(visits_past_4_day,0) AS visits_past_4_day, + COALESCE(visits_past_5_day,0) AS visits_past_5_day, + COALESCE(visits_past_6_day,0) AS visits_past_6_day, + COALESCE(visits_past_7_day,0) AS visits_past_7_day, + COALESCE(visits_past_8_14_day,0) AS visits_past_8_14_day, + COALESCE(visits_past_15_30_day,0) AS visits_past_15_30_day, + COALESCE(view_items_past_1_day,0) AS view_items_past_1_day, + COALESCE(view_items_past_2_day,0) AS view_items_past_2_day, + COALESCE(view_items_past_3_day,0) AS view_items_past_3_day, + COALESCE(view_items_past_4_day,0) AS view_items_past_4_day, + COALESCE(view_items_past_5_day,0) AS view_items_past_5_day, + COALESCE(view_items_past_6_day,0) AS view_items_past_6_day, + COALESCE(view_items_past_7_day,0) AS view_items_past_7_day, + COALESCE(view_items_past_8_14_day,0) AS view_items_past_8_14_day, + COALESCE(view_items_past_15_30_day,0) AS view_items_past_15_30_day, + COALESCE(add_to_carts_past_1_day,0) AS add_to_carts_past_1_day, + COALESCE(add_to_carts_past_2_day,0) AS add_to_carts_past_2_day, + COALESCE(add_to_carts_past_3_day,0) AS add_to_carts_past_3_day, + COALESCE(add_to_carts_past_4_day,0) AS add_to_carts_past_4_day, + COALESCE(add_to_carts_past_5_day,0) AS add_to_carts_past_5_day, + COALESCE(add_to_carts_past_6_day,0) AS add_to_carts_past_6_day, + COALESCE(add_to_carts_past_7_day,0) AS add_to_carts_past_7_day, + COALESCE(add_to_carts_past_8_14_day,0) AS add_to_carts_past_8_14_day, + COALESCE(add_to_carts_past_15_30_day,0) AS add_to_carts_past_15_30_day, + COALESCE(checkouts_past_1_day,0) AS checkouts_past_1_day, + COALESCE(checkouts_past_2_day,0) AS checkouts_past_2_day, + COALESCE(checkouts_past_3_day,0) AS checkouts_past_3_day, + COALESCE(checkouts_past_4_day,0) AS checkouts_past_4_day, + COALESCE(checkouts_past_5_day,0) AS checkouts_past_5_day, + COALESCE(checkouts_past_6_day,0) AS checkouts_past_6_day, + COALESCE(checkouts_past_7_day,0) AS checkouts_past_7_day, + COALESCE(checkouts_past_8_14_day,0) AS checkouts_past_8_14_day, + COALESCE(checkouts_past_15_30_day,0) AS checkouts_past_15_30_day + FROM events_users AS EUD + FULL OUTER JOIN rolling_active_users AS A + ON EUD.user_pseudo_id = A.user_pseudo_id AND EUD.feature_date = A.feature_date + FULL OUTER JOIN rolling_purchases_per_user AS B + ON EUD.user_pseudo_id = B.user_pseudo_id AND EUD.feature_date = B.feature_date + FULL OUTER JOIN rolling_visits_per_user AS C + ON EUD.user_pseudo_id = C.user_pseudo_id AND EUD.feature_date = C.feature_date + FULL OUTER JOIN rolling_view_item_per_user AS D + ON EUD.user_pseudo_id = D.user_pseudo_id AND EUD.feature_date = D.feature_date + FULL OUTER JOIN rolling_add_to_cart_per_user AS E + ON EUD.user_pseudo_id = E.user_pseudo_id AND EUD.feature_date = E.feature_date + FULL OUTER JOIN rolling_checkout_per_user AS F + ON EUD.user_pseudo_id = F.user_pseudo_id AND EUD.feature_date = F.feature_date + WHERE EUD.user_pseudo_id IS NOT NULL + ; \ No newline at end of file diff --git a/sql/query/invoke_backfill_user_scoped_lifetime_metrics.sqlx b/sql/query/invoke_backfill_user_scoped_lifetime_metrics.sqlx index a17ba034..f2938a71 100644 --- a/sql/query/invoke_backfill_user_scoped_lifetime_metrics.sqlx +++ b/sql/query/invoke_backfill_user_scoped_lifetime_metrics.sqlx @@ -12,26 +12,138 @@ -- See the License for the specific language governing permissions and -- limitations under the License. -DECLARE input_date DATE; -DECLARE end_date DATE; -DECLARE rows_added INT64 DEFAULT NULL; - DECLARE max_date DATE; DECLARE min_date DATE; SET max_date = (SELECT DATE_SUB(MAX(event_date), INTERVAL {{interval_max_date}} DAY) FROM `{{mds_project_id}}.{{mds_dataset}}.event`); SET min_date = (SELECT DATE_ADD(MIN(event_date), INTERVAL {{interval_min_date}} DAY) FROM `{{mds_project_id}}.{{mds_dataset}}.event`); -SELECT max_date; -SELECT min_date; -FOR record IN - (SELECT DISTINCT - event_date - FROM `{{mds_project_id}}.{{mds_dataset}}.event` - WHERE event_date BETWEEN min_date AND max_date - ORDER BY event_date ASC ) -DO - SET input_date= (SELECT record.event_date); - SET end_date= (SELECT DATE_SUB(record.event_date, INTERVAL {{interval_end_date}} DAY)); - CALL `{{project_id}}.{{dataset}}.{{stored_procedure}}`(input_date, end_date, rows_added); - SELECT rows_added; -END FOR; +CREATE OR REPLACE TEMP TABLE dates_interval as ( + SELECT DISTINCT + event_date as input_date, + DATE_SUB(event_date, INTERVAL {{interval_end_date}} DAY) as end_date + FROM `{{mds_project_id}}.{{mds_dataset}}.event` + WHERE event_date BETWEEN min_date AND max_date + ORDER BY input_date DESC +); + +# Run these once each day. +## Active users, Average daily purchasers, Average engagement time, Average engagement time per session, DAU / MAU, DAU / WAU, First time purchasers, First-time purchaser conversion, First-time purchasers per new user, Max daily purchasers, Min daily purchasers, New users, Returning users, Total purchasers, Total users, User conversion rate, User engagement, WAU / MAU +CREATE OR REPLACE TEMP TABLE engagement as ( + SELECT + user_pseudo_id, + input_date as feature_date, + ga_session_id as session_id, + MAX(engagement_time_msec) as engagement_time_msec, + IFNULL(SUM(ecommerce.purchase_revenue_in_usd),0.0) as purchase_revenue_in_usd, + LOGICAL_OR(CASE WHEN (event_name='purchase' AND ecommerce.transaction_id IS NULL) THEN TRUE ELSE FALSE END) as has_invalid_transactions, + LOGICAL_OR(CASE WHEN (event_name='purchase' AND ga_session_id IS NOT NULL AND ecommerce.transaction_id IS NOT NULL AND ecommerce.transaction_id <> '(not set)') THEN TRUE ELSE FALSE END) as converted_in_session, + FROM `{{mds_project_id}}.{{mds_dataset}}.event` as E + INNER JOIN `{{mds_project_id}}.{{mds_dataset}}.device` as D + ON E.device_type_id = D.device_type_id + CROSS JOIN dates_interval as DI + WHERE E.event_date BETWEEN DI.end_date AND DI.input_date + AND E.ga_session_id IS NOT NULL + AND D.device_os IS NOT NULL + GROUP BY user_pseudo_id, feature_date, session_id +); + +CREATE OR REPLACE TEMP TABLE revenue_users as ( + SELECT + input_date as feature_date, + SUM(ecommerce.purchase_revenue_in_usd) as sum_revenue_per_day, + COUNT(DISTINCT CASE WHEN engagement_time_msec > 0 THEN user_pseudo_id END) as active_users + FROM `{{mds_project_id}}.{{mds_dataset}}.event` as E + INNER JOIN `{{mds_project_id}}.{{mds_dataset}}.device` as D + ON E.device_type_id = D.device_type_id + CROSS JOIN dates_interval as DI + WHERE E.event_date BETWEEN DI.end_date AND DI.input_date + AND E.ga_session_id IS NOT NULL + AND D.device_os IS NOT NULL + GROUP BY feature_date +); + +CREATE OR REPLACE TEMP TABLE first_purchasers as ( + SELECT + input_date as feature_date, + COUNT(first_time_purchasers) as first_time_purchasers + FROM( + SELECT + event_date, + COUNT(user_pseudo_id) as first_time_purchasers + FROM ( + SELECT + user_pseudo_id, + event_date, + RANK() OVER (PARTITION BY user_pseudo_id ORDER BY event_timestamp ASC) AS unique_purchase + FROM `{{mds_project_id}}.{{mds_dataset}}.event` as E + INNER JOIN `{{mds_project_id}}.{{mds_dataset}}.device` as D + ON E.device_type_id = D.device_type_id + WHERE event_name IN ('purchase','in_app_purchase') + AND E.ga_session_id IS NOT NULL + AND D.device_os IS NOT NULL + GROUP BY user_pseudo_id, event_date, event_timestamp) + WHERE unique_purchase=1 + GROUP BY event_date) + CROSS JOIN dates_interval as DI + WHERE event_date BETWEEN DI.end_date AND DI.input_date + GROUP BY feature_date + ); + + CREATE OR REPLACE TEMP TABLE new_users_ as ( + SELECT + input_date as feature_date, + COUNT(distinct case when event_name="first_visit" then user_pseudo_id end) as new_users + FROM `{{mds_project_id}}.{{mds_dataset}}.event` as E + INNER JOIN `{{mds_project_id}}.{{mds_dataset}}.device` as D + ON E.device_type_id = D.device_type_id + CROSS JOIN dates_interval as DI + WHERE event_date BETWEEN DI.end_date AND DI.input_date + AND E.ga_session_id IS NOT NULL + AND D.device_os IS NOT NULL + GROUP BY feature_date +); + +INSERT INTO `{{project_id}}.{{dataset}}.{{insert_table}}` +SELECT + CURRENT_TIMESTAMP() AS processed_timestamp, + DI.input_date AS feature_date, + COALESCE(COUNT(DISTINCT CASE WHEN (event_name='purchase' AND NOT e.has_invalid_transactions) THEN e.user_pseudo_id END),0) AS lifetime_purchasers_users, + COALESCE(SAFE_DIVIDE(COUNT(DISTINCT CASE WHEN (event_name='purchase' AND NOT e.has_invalid_transactions) THEN e.user_pseudo_id END),COUNT(DISTINCT event_date)),0.0) AS lifetime_average_daily_purchasers, + COALESCE(MAX(active_users),0) AS lifetime_active_users, + COALESCE(SAFE_DIVIDE(MAX(active_users),COUNT(DISTINCT event_date)),0.0) AS lifetime_DAU, + COALESCE(SAFE_DIVIDE(MAX(active_users),DATE_DIFF(MAX(event_date), MIN(event_date), MONTH)),0.0) AS lifetime_MAU, + COALESCE(SAFE_DIVIDE(MAX(active_users),DATE_DIFF(MAX(event_date), MIN(event_date), WEEK)),0.0) AS lifetime_WAU, + COALESCE(SAFE_DIVIDE(SAFE_DIVIDE(MAX(active_users),COUNT(DISTINCT event_date)),SAFE_DIVIDE(MAX(active_users),DATE_DIFF(MAX(event_date), MIN(event_date), MONTH))),0.0) AS lifetime_dau_per_mau, + COALESCE(SAFE_DIVIDE(SAFE_DIVIDE(MAX(active_users),COUNT(DISTINCT event_date)),SAFE_DIVIDE(MAX(active_users),DATE_DIFF(MAX(event_date), MIN(event_date), WEEK))),0.0) AS lifetime_dau_per_wau, + COALESCE(SAFE_DIVIDE(SAFE_DIVIDE(MAX(active_users),DATE_DIFF(MAX(event_date), MIN(event_date), WEEK)),SAFE_DIVIDE(MAX(active_users),DATE_DIFF(MAX(event_date), MIN(event_date), MONTH))),0.0) AS lifetime_wau_per_mau, + COALESCE(ROUND(SUM(e.engagement_time_msec)/1000),0) AS lifetime_users_engagement_duration_seconds, + COALESCE(SAFE_DIVIDE(ROUND(SUM(e.engagement_time_msec)/1000),MAX(active_users)),0.0) AS lifetime_average_engagement_time, + COALESCE(SAFE_DIVIDE((ROUND(SUM(e.engagement_time_msec)/1000)),COUNT(e.session_id)),0.0) AS lifetime_average_engagement_time_per_session, + COALESCE(SAFE_DIVIDE(COUNT(DISTINCT e.session_id),COUNT(DISTINCT e.user_pseudo_id)),0.0) AS lifetime_average_sessions_per_user, + COALESCE(SAFE_DIVIDE(SUM(e.purchase_revenue_in_usd),COUNT(DISTINCT CASE WHEN (event_name='purchase' AND NOT e.has_invalid_transactions) THEN e.user_pseudo_id END)),0.0) AS lifetime_ARPPU, + COALESCE(SAFE_DIVIDE(SUM(e.purchase_revenue_in_usd),MAX(active_users)),0.0) AS lifetime_ARPU, + COALESCE(SAFE_DIVIDE(SUM(e.purchase_revenue_in_usd),COUNT(DISTINCT event_date)),0.0) AS lifetime_average_daily_revenue, + COALESCE(MAX(sum_revenue_per_day),0.0) AS lifetime_max_daily_revenue, + COALESCE(MIN(sum_revenue_per_day),0.0) AS lifetime_min_daily_revenue, + COALESCE(MAX(new_users),0) AS lifetime_new_users, + COALESCE(COUNT(DISTINCT e.user_pseudo_id) - MAX(new_users),0) AS lifetime_returning_users, + COALESCE(MAX(first_time_purchasers),0) AS lifetime_first_time_purchasers, + COALESCE(SAFE_DIVIDE(MAX(first_time_purchasers),MAX(active_users)),0.0) AS lifetime_first_time_purchaser_conversion, + COALESCE(SAFE_DIVIDE(MAX(first_time_purchasers),MAX(new_users)),0.0) AS lifetime_first_time_purchasers_per_new_user, + COALESCE(SAFE_DIVIDE(COUNT(DISTINCT CASE WHEN e.converted_in_session = TRUE THEN e.user_pseudo_id END),COUNT(DISTINCT e.user_pseudo_id)),0.0) AS lifetime_avg_user_conversion_rate, + COALESCE(SAFE_DIVIDE(COUNT(DISTINCT CASE WHEN e.converted_in_session = TRUE THEN e.session_id END),COUNT(DISTINCT e.session_id)),0.0) AS lifetime_avg_session_conversion_rate, + FROM `{{mds_project_id}}.{{mds_dataset}}.event` as EV + INNER JOIN `{{mds_project_id}}.{{mds_dataset}}.device` as D + ON EV.device_type_id = D.device_type_id + CROSS JOIN dates_interval as DI + INNER JOIN engagement as e + ON EV.user_pseudo_id = e.user_pseudo_id AND DI.input_date = e.feature_date AND EV.ga_session_id = e.session_id + INNER JOIN revenue_users as r + ON DI.input_date = r.feature_date + INNER JOIN first_purchasers as fp + ON DI.input_date = fp.feature_date + INNER JOIN new_users_ as nu + ON DI.input_date = nu.feature_date + WHERE EV.event_date BETWEEN DI.end_date AND DI.input_date + GROUP BY DI.input_date +; \ No newline at end of file diff --git a/sql/query/invoke_backfill_user_scoped_metrics.sqlx b/sql/query/invoke_backfill_user_scoped_metrics.sqlx index a17ba034..ddb4b8ae 100644 --- a/sql/query/invoke_backfill_user_scoped_metrics.sqlx +++ b/sql/query/invoke_backfill_user_scoped_metrics.sqlx @@ -12,26 +12,138 @@ -- See the License for the specific language governing permissions and -- limitations under the License. -DECLARE input_date DATE; -DECLARE end_date DATE; -DECLARE rows_added INT64 DEFAULT NULL; - DECLARE max_date DATE; DECLARE min_date DATE; SET max_date = (SELECT DATE_SUB(MAX(event_date), INTERVAL {{interval_max_date}} DAY) FROM `{{mds_project_id}}.{{mds_dataset}}.event`); SET min_date = (SELECT DATE_ADD(MIN(event_date), INTERVAL {{interval_min_date}} DAY) FROM `{{mds_project_id}}.{{mds_dataset}}.event`); -SELECT max_date; -SELECT min_date; -FOR record IN - (SELECT DISTINCT - event_date - FROM `{{mds_project_id}}.{{mds_dataset}}.event` - WHERE event_date BETWEEN min_date AND max_date - ORDER BY event_date ASC ) -DO - SET input_date= (SELECT record.event_date); - SET end_date= (SELECT DATE_SUB(record.event_date, INTERVAL {{interval_end_date}} DAY)); - CALL `{{project_id}}.{{dataset}}.{{stored_procedure}}`(input_date, end_date, rows_added); - SELECT rows_added; -END FOR; +CREATE OR REPLACE TEMP TABLE dates_interval as ( + SELECT DISTINCT + event_date as input_date, + DATE_SUB(event_date, INTERVAL {{interval_end_date}} DAY) as end_date + FROM `{{mds_project_id}}.{{mds_dataset}}.event` + WHERE event_date BETWEEN min_date AND max_date + ORDER BY input_date DESC +); + +# Run these once each day. +## Active users, Average daily purchasers, Average engagement time, Average engagement time per session, DAU / MAU, DAU / WAU, First time purchasers, First-time purchaser conversion, First-time purchasers per new user, Max daily purchasers, Min daily purchasers, New users, Returning users, Total purchasers, Total users, User conversion rate, User engagement, WAU / MAU +CREATE OR REPLACE TEMP TABLE engagement as ( + SELECT + user_pseudo_id, + input_date as feature_date, + ga_session_id as session_id, + MAX(engagement_time_msec) as engagement_time_msec, + IFNULL(SUM(ecommerce.purchase_revenue_in_usd),0.0) as purchase_revenue_in_usd, + LOGICAL_OR(CASE WHEN (event_name='purchase' AND ecommerce.transaction_id IS NULL) THEN TRUE ELSE FALSE END) as has_invalid_transactions, + LOGICAL_OR(CASE WHEN (event_name='purchase' AND ga_session_id IS NOT NULL AND ecommerce.transaction_id IS NOT NULL AND ecommerce.transaction_id <> '(not set)') THEN TRUE ELSE FALSE END) as converted_in_session, + FROM `{{mds_project_id}}.{{mds_dataset}}.event` as E + INNER JOIN `{{mds_project_id}}.{{mds_dataset}}.device` as D + ON E.device_type_id = D.device_type_id + CROSS JOIN dates_interval as DI + WHERE E.event_date BETWEEN DI.end_date AND DI.input_date + AND E.ga_session_id IS NOT NULL + AND D.device_os IS NOT NULL + GROUP BY user_pseudo_id, feature_date, session_id +); + +CREATE OR REPLACE TEMP TABLE revenue_users as ( + SELECT + input_date as feature_date, + SUM(ecommerce.purchase_revenue_in_usd) as sum_revenue_per_day, + COUNT(DISTINCT CASE WHEN engagement_time_msec > 0 THEN user_pseudo_id END) as active_users + FROM `{{mds_project_id}}.{{mds_dataset}}.event` as E + INNER JOIN `{{mds_project_id}}.{{mds_dataset}}.device` as D + ON E.device_type_id = D.device_type_id + CROSS JOIN dates_interval as DI + WHERE E.event_date BETWEEN DI.end_date AND DI.input_date + AND E.ga_session_id IS NOT NULL + AND D.device_os IS NOT NULL + GROUP BY feature_date +); + +CREATE OR REPLACE TEMP TABLE first_purchasers as ( + SELECT + input_date as feature_date, + COUNT(first_time_purchasers) as first_time_purchasers + FROM( + SELECT + event_date, + COUNT(user_pseudo_id) as first_time_purchasers + FROM ( + SELECT + user_pseudo_id, + event_date, + RANK() OVER (PARTITION BY user_pseudo_id ORDER BY event_timestamp ASC) AS unique_purchase + FROM `{{mds_project_id}}.{{mds_dataset}}.event` as E + INNER JOIN `{{mds_project_id}}.{{mds_dataset}}.device` as D + ON E.device_type_id = D.device_type_id + WHERE event_name IN ('purchase','in_app_purchase') + AND E.ga_session_id IS NOT NULL + AND D.device_os IS NOT NULL + GROUP BY user_pseudo_id, event_date, event_timestamp) + WHERE unique_purchase=1 + GROUP BY event_date) + CROSS JOIN dates_interval as DI + WHERE event_date BETWEEN DI.end_date AND DI.input_date + GROUP BY feature_date +); + +CREATE OR REPLACE TEMP TABLE new_users_ as ( + SELECT + input_date as feature_date, + COUNT(distinct case when event_name="first_visit" then user_pseudo_id end) as new_users + FROM `{{mds_project_id}}.{{mds_dataset}}.event` as E + INNER JOIN `{{mds_project_id}}.{{mds_dataset}}.device` as D + ON E.device_type_id = D.device_type_id + CROSS JOIN dates_interval as DI + WHERE event_date BETWEEN DI.end_date AND DI.input_date + AND E.ga_session_id IS NOT NULL + AND D.device_os IS NOT NULL + GROUP BY feature_date +); + +INSERT INTO `{{project_id}}.{{dataset}}.{{insert_table}}` + SELECT + CURRENT_TIMESTAMP() AS processed_timestamp, + DI.input_date AS feature_date, + COALESCE(COUNT(DISTINCT CASE WHEN (event_name='purchase' AND NOT e.has_invalid_transactions) THEN e.user_pseudo_id END),0) AS purchasers_users, + COALESCE(SAFE_DIVIDE(COUNT(DISTINCT CASE WHEN (event_name='purchase' AND NOT e.has_invalid_transactions) THEN e.user_pseudo_id END),COUNT(DISTINCT event_date)),0.0) AS average_daily_purchasers, + COALESCE(MAX(active_users),0) AS active_users, + COALESCE(SAFE_DIVIDE(MAX(active_users),COUNT(DISTINCT event_date)),0.0) AS DAU, + COALESCE(SAFE_DIVIDE(MAX(active_users),DATE_DIFF(MAX(event_date), MIN(event_date), MONTH)),0.0) AS MAU, + COALESCE(SAFE_DIVIDE(MAX(active_users),DATE_DIFF(MAX(event_date), MIN(event_date), WEEK)),0.0) AS WAU, + COALESCE(SAFE_DIVIDE(SAFE_DIVIDE(MAX(active_users),COUNT(DISTINCT event_date)),SAFE_DIVIDE(MAX(active_users),DATE_DIFF(MAX(event_date), MIN(event_date), MONTH))),0.0) AS dau_per_mau, + COALESCE(SAFE_DIVIDE(SAFE_DIVIDE(MAX(active_users),COUNT(DISTINCT event_date)),SAFE_DIVIDE(MAX(active_users),DATE_DIFF(MAX(event_date), MIN(event_date), WEEK))),0.0) AS dau_per_wau, + COALESCE(SAFE_DIVIDE(SAFE_DIVIDE(MAX(active_users),DATE_DIFF(MAX(event_date), MIN(event_date), WEEK)),SAFE_DIVIDE(MAX(active_users),DATE_DIFF(MAX(event_date), MIN(event_date), MONTH))),0.0) AS wau_per_mau, + COALESCE(ROUND(SUM(e.engagement_time_msec)/1000),0) AS users_engagement_duration_seconds, + COALESCE(SAFE_DIVIDE(ROUND(SUM(e.engagement_time_msec)/1000),MAX(active_users)),0.0) AS average_engagement_time, + COALESCE(SAFE_DIVIDE((ROUND(SUM(e.engagement_time_msec)/1000)),COUNT(e.session_id)),0.0) AS average_engagement_time_per_session, + COALESCE(SAFE_DIVIDE(COUNT(DISTINCT e.session_id),COUNT(DISTINCT e.user_pseudo_id)),0.0) AS average_sessions_per_user, + COALESCE(SAFE_DIVIDE(SUM(e.purchase_revenue_in_usd),COUNT(DISTINCT CASE WHEN (event_name='purchase' AND NOT e.has_invalid_transactions) THEN e.user_pseudo_id END)),0.0) AS ARPPU, + COALESCE(SAFE_DIVIDE(SUM(e.purchase_revenue_in_usd),MAX(active_users)),0.0) AS ARPU, + COALESCE(SAFE_DIVIDE(SUM(e.purchase_revenue_in_usd),COUNT(DISTINCT event_date)),0.0) AS average_daily_revenue, + COALESCE(MAX(sum_revenue_per_day),0.0) AS max_daily_revenue, + COALESCE(MIN(sum_revenue_per_day),0.0) AS min_daily_revenue, + COALESCE(MAX(new_users),0) AS new_users, + COALESCE(COUNT(DISTINCT e.user_pseudo_id) - MAX(new_users),0) AS returning_users, + COALESCE(MAX(first_time_purchasers),0) AS first_time_purchasers, + COALESCE(SAFE_DIVIDE(MAX(first_time_purchasers),MAX(active_users)),0.0) AS first_time_purchaser_conversion, + COALESCE(SAFE_DIVIDE(MAX(first_time_purchasers),MAX(new_users)),0.0) AS first_time_purchasers_per_new_user, + COALESCE(SAFE_DIVIDE(COUNT(DISTINCT CASE WHEN e.converted_in_session = TRUE THEN e.user_pseudo_id END),COUNT(DISTINCT e.user_pseudo_id)),0.0) AS avg_user_conversion_rate, + COALESCE(SAFE_DIVIDE(COUNT(DISTINCT CASE WHEN e.converted_in_session = TRUE THEN e.session_id END),COUNT(DISTINCT e.session_id)),0.0) AS avg_session_conversion_rate, + FROM `{{mds_project_id}}.{{mds_dataset}}.event` as EV + INNER JOIN `{{mds_project_id}}.{{mds_dataset}}.device` as D + ON EV.device_type_id = D.device_type_id + CROSS JOIN dates_interval as DI + INNER JOIN engagement as e + ON EV.user_pseudo_id = e.user_pseudo_id AND DI.input_date = e.feature_date AND EV.ga_session_id = e.session_id + INNER JOIN revenue_users as r + ON DI.input_date = r.feature_date + INNER JOIN first_purchasers as fp + ON DI.input_date = fp.feature_date + INNER JOIN new_users_ as nu + ON DI.input_date = nu.feature_date + WHERE EV.event_date BETWEEN DI.end_date AND DI.input_date + GROUP BY DI.input_date +; \ No newline at end of file diff --git a/sql/query/invoke_backfill_user_scoped_segmentation_metrics.sqlx b/sql/query/invoke_backfill_user_scoped_segmentation_metrics.sqlx index 05073e70..e1bb24bf 100644 --- a/sql/query/invoke_backfill_user_scoped_segmentation_metrics.sqlx +++ b/sql/query/invoke_backfill_user_scoped_segmentation_metrics.sqlx @@ -12,23 +12,136 @@ -- See the License for the specific language governing permissions and -- limitations under the License. -DECLARE input_date DATE; -DECLARE end_date DATE; -DECLARE rows_added INT64 DEFAULT NULL; - DECLARE min_date DATE; SET min_date = (SELECT DATE_ADD(MIN(event_date), INTERVAL {{interval_min_date}} DAY) FROM `{{mds_project_id}}.{{mds_dataset}}.event`); -SELECT min_date; -FOR record IN - (SELECT DISTINCT - event_date - FROM `{{mds_project_id}}.{{mds_dataset}}.event` - WHERE event_date >= min_date - ORDER BY event_date ASC ) -DO - SET input_date= (SELECT record.event_date); - SET end_date= (SELECT DATE_SUB(record.event_date, INTERVAL 15 DAY)); - CALL `{{project_id}}.{{dataset}}.{{stored_procedure}}`(input_date, end_date, rows_added); - SELECT rows_added; -END FOR; +CREATE OR REPLACE TEMP TABLE dates_interval as ( + SELECT DISTINCT + event_date as input_date, + DATE_SUB(event_date, INTERVAL {{interval_end_date}} DAY) as end_date + FROM `{{mds_project_id}}.{{mds_dataset}}.event` + WHERE event_date >= min_date + ORDER BY input_date DESC +); + +# Run these once each day. +## Active users, Average daily purchasers, Average engagement time, Average engagement time per session, DAU / MAU, DAU / WAU, First time purchasers, First-time purchaser conversion, First-time purchasers per new user, Max daily purchasers, Min daily purchasers, New users, Returning users, Total purchasers, Total users, User conversion rate, User engagement, WAU / MAU +CREATE OR REPLACE TEMP TABLE engagement as ( +SELECT + user_pseudo_id, + input_date as feature_date, + ga_session_id as session_id, + MAX(engagement_time_msec) as engagement_time_msec, + IFNULL(SUM(ecommerce.purchase_revenue_in_usd),0.0) as purchase_revenue_in_usd, + LOGICAL_OR(CASE WHEN (event_name='purchase' AND ecommerce.transaction_id IS NULL) THEN TRUE ELSE FALSE END) as has_invalid_transactions, + LOGICAL_OR(CASE WHEN (event_name='purchase' AND ga_session_id IS NOT NULL AND ecommerce.transaction_id IS NOT NULL AND ecommerce.transaction_id <> '(not set)') THEN TRUE ELSE FALSE END) as converted_in_session, +FROM `{{mds_project_id}}.{{mds_dataset}}.event` as E +INNER JOIN `{{mds_project_id}}.{{mds_dataset}}.device` as D +ON E.device_type_id = D.device_type_id +CROSS JOIN dates_interval as DI +WHERE E.event_date BETWEEN DI.end_date AND DI.input_date + AND E.ga_session_id IS NOT NULL + AND D.device_os IS NOT NULL +GROUP BY user_pseudo_id, feature_date, session_id +); + +CREATE OR REPLACE TEMP TABLE revenue_users as ( +SELECT + input_date as feature_date, + SUM(ecommerce.purchase_revenue_in_usd) as sum_revenue_per_day, + COUNT(DISTINCT CASE WHEN engagement_time_msec > 0 THEN user_pseudo_id END) as active_users +FROM `{{mds_project_id}}.{{mds_dataset}}.event` as E +INNER JOIN `{{mds_project_id}}.{{mds_dataset}}.device` as D +ON E.device_type_id = D.device_type_id +CROSS JOIN dates_interval as DI +WHERE E.event_date BETWEEN DI.end_date AND DI.input_date +AND E.ga_session_id IS NOT NULL +AND D.device_os IS NOT NULL +GROUP BY feature_date +); + +CREATE OR REPLACE TEMP TABLE first_purchasers as ( +SELECT + input_date as feature_date, + COUNT(first_time_purchasers) as first_time_purchasers +FROM( + SELECT + event_date, + COUNT(user_pseudo_id) as first_time_purchasers + FROM ( + SELECT + user_pseudo_id, + event_date, + RANK() OVER (PARTITION BY user_pseudo_id ORDER BY event_timestamp ASC) AS unique_purchase + FROM `{{mds_project_id}}.{{mds_dataset}}.event` as E + INNER JOIN `{{mds_project_id}}.{{mds_dataset}}.device` as D + ON E.device_type_id = D.device_type_id + WHERE event_name IN ('purchase','in_app_purchase') + AND E.ga_session_id IS NOT NULL + AND D.device_os IS NOT NULL + GROUP BY user_pseudo_id, event_date, event_timestamp) + WHERE unique_purchase=1 + GROUP BY event_date) +CROSS JOIN dates_interval as DI +WHERE event_date BETWEEN DI.end_date AND DI.input_date +GROUP BY feature_date +); + +CREATE OR REPLACE TEMP TABLE new_users_ as ( +SELECT + input_date as feature_date, + COUNT(distinct case when event_name="first_visit" then user_pseudo_id end) as new_users +FROM `{{mds_project_id}}.{{mds_dataset}}.event` as E +INNER JOIN `{{mds_project_id}}.{{mds_dataset}}.device` as D +ON E.device_type_id = D.device_type_id +CROSS JOIN dates_interval as DI +WHERE event_date BETWEEN DI.end_date AND DI.input_date +AND E.ga_session_id IS NOT NULL +AND D.device_os IS NOT NULL +GROUP BY feature_date +); + +INSERT INTO `{{project_id}}.{{dataset}}.{{insert_table}}` + SELECT + CURRENT_TIMESTAMP() AS processed_timestamp, + DI.input_date AS feature_date, + COALESCE(COUNT(DISTINCT CASE WHEN (event_name='purchase' AND NOT e.has_invalid_transactions) THEN e.user_pseudo_id END),0) AS purchasers_users, + COALESCE(SAFE_DIVIDE(COUNT(DISTINCT CASE WHEN (event_name='purchase' AND NOT e.has_invalid_transactions) THEN e.user_pseudo_id END),COUNT(DISTINCT event_date)),0.0) AS average_daily_purchasers, + COALESCE(MAX(active_users),0) AS active_users, + COALESCE(SAFE_DIVIDE(MAX(active_users),COUNT(DISTINCT event_date)),0.0) AS DAU, + COALESCE(SAFE_DIVIDE(MAX(active_users),DATE_DIFF(MAX(event_date), MIN(event_date), MONTH)),0.0) AS MAU, + COALESCE(SAFE_DIVIDE(MAX(active_users),DATE_DIFF(MAX(event_date), MIN(event_date), WEEK)),0.0) AS WAU, + COALESCE(SAFE_DIVIDE(SAFE_DIVIDE(MAX(active_users),COUNT(DISTINCT event_date)),SAFE_DIVIDE(MAX(active_users),DATE_DIFF(MAX(event_date), MIN(event_date), MONTH))),0.0) AS dau_per_mau, + COALESCE(SAFE_DIVIDE(SAFE_DIVIDE(MAX(active_users),COUNT(DISTINCT event_date)),SAFE_DIVIDE(MAX(active_users),DATE_DIFF(MAX(event_date), MIN(event_date), WEEK))),0.0) AS dau_per_wau, + COALESCE(SAFE_DIVIDE(SAFE_DIVIDE(MAX(active_users),DATE_DIFF(MAX(event_date), MIN(event_date), WEEK)),SAFE_DIVIDE(MAX(active_users),DATE_DIFF(MAX(event_date), MIN(event_date), MONTH))),0.0) AS wau_per_mau, + COALESCE(ROUND(SUM(e.engagement_time_msec)/1000),0) AS users_engagement_duration_seconds, + COALESCE(SAFE_DIVIDE(ROUND(SUM(e.engagement_time_msec)/1000),MAX(active_users)),0.0) AS average_engagement_time, + COALESCE(SAFE_DIVIDE((ROUND(SUM(e.engagement_time_msec)/1000)),COUNT(e.session_id)),0.0) AS average_engagement_time_per_session, + COALESCE(SAFE_DIVIDE(COUNT(DISTINCT e.session_id),COUNT(DISTINCT e.user_pseudo_id)),0.0) AS average_sessions_per_user, + COALESCE(SAFE_DIVIDE(SUM(e.purchase_revenue_in_usd),COUNT(DISTINCT CASE WHEN (event_name='purchase' AND NOT e.has_invalid_transactions) THEN e.user_pseudo_id END)),0.0) AS ARPPU, + COALESCE(SAFE_DIVIDE(SUM(e.purchase_revenue_in_usd),MAX(active_users)),0.0) AS ARPU, + COALESCE(SAFE_DIVIDE(SUM(e.purchase_revenue_in_usd),COUNT(DISTINCT event_date)),0.0) AS average_daily_revenue, + COALESCE(MAX(sum_revenue_per_day),0.0) AS max_daily_revenue, + COALESCE(MIN(sum_revenue_per_day),0.0) AS min_daily_revenue, + COALESCE(MAX(new_users),0) AS new_users, + COALESCE(COUNT(DISTINCT e.user_pseudo_id) - MAX(new_users),0) AS returning_users, + COALESCE(MAX(first_time_purchasers),0) AS first_time_purchasers, + COALESCE(SAFE_DIVIDE(MAX(first_time_purchasers),MAX(active_users)),0.0) AS first_time_purchaser_conversion, + COALESCE(SAFE_DIVIDE(MAX(first_time_purchasers),MAX(new_users)),0.0) AS first_time_purchasers_per_new_user, + COALESCE(SAFE_DIVIDE(COUNT(DISTINCT CASE WHEN e.converted_in_session = TRUE THEN e.user_pseudo_id END),COUNT(DISTINCT e.user_pseudo_id)),0.0) AS avg_user_conversion_rate, + COALESCE(SAFE_DIVIDE(COUNT(DISTINCT CASE WHEN e.converted_in_session = TRUE THEN e.session_id END),COUNT(DISTINCT e.session_id)),0.0) AS avg_session_conversion_rate, + FROM `{{mds_project_id}}.{{mds_dataset}}.event` as EV + INNER JOIN `{{mds_project_id}}.{{mds_dataset}}.device` as D + ON EV.device_type_id = D.device_type_id + CROSS JOIN dates_interval as DI + INNER JOIN engagement as e + ON EV.user_pseudo_id = e.user_pseudo_id AND DI.input_date = e.feature_date AND EV.ga_session_id = e.session_id + INNER JOIN revenue_users as r + ON DI.input_date = r.feature_date + INNER JOIN first_purchasers as fp + ON DI.input_date = fp.feature_date + INNER JOIN new_users_ as nu + ON DI.input_date = nu.feature_date + WHERE EV.event_date BETWEEN DI.end_date AND DI.input_date + GROUP BY DI.input_date +; \ No newline at end of file diff --git a/sql/query/invoke_backfill_user_segmentation_dimensions.sqlx b/sql/query/invoke_backfill_user_segmentation_dimensions.sqlx index e4e3d568..7c525acd 100644 --- a/sql/query/invoke_backfill_user_segmentation_dimensions.sqlx +++ b/sql/query/invoke_backfill_user_segmentation_dimensions.sqlx @@ -12,23 +12,115 @@ -- See the License for the specific language governing permissions and -- limitations under the License. -DECLARE input_date DATE; -DECLARE end_date DATE; -DECLARE users_added INT64 DEFAULT NULL; - DECLARE min_date DATE; SET min_date = (SELECT DATE_ADD(MIN(event_date), INTERVAL {{interval_min_date}} DAY) FROM `{{mds_project_id}}.{{mds_dataset}}.event`); -SELECT min_date; -FOR record IN - (SELECT DISTINCT - event_date - FROM `{{mds_project_id}}.{{mds_dataset}}.event` - WHERE event_date >= min_date - ORDER BY event_date ASC ) -DO - SET input_date= (SELECT record.event_date); - SET end_date= (SELECT DATE_SUB(record.event_date, INTERVAL {{interval_end_date}} DAY)); - CALL `{{project_id}}.{{dataset}}.{{stored_procedure}}`(input_date, end_date, users_added); - SELECT users_added; -END FOR; \ No newline at end of file +CREATE OR REPLACE TEMP TABLE dates_interval as ( + SELECT DISTINCT + event_date as input_date, + DATE_SUB(event_date, INTERVAL {{interval_end_date}} DAY) as end_date + FROM `{{mds_project_id}}.{{mds_dataset}}.event` + WHERE event_date >= min_date + ORDER BY input_date DESC +); + +CREATE OR REPLACE TEMP TABLE user_dimensions_event_session_scoped as ( + SELECT DISTINCT + DI.input_date as feature_date, + user_pseudo_id, + LAST_VALUE(format_date('%m',event_date)) OVER(PARTITION BY user_pseudo_id, DI.input_date ORDER BY event_timestamp DESC) AS month_of_the_year, + LAST_VALUE(format_date('%U',event_date)) OVER(PARTITION BY user_pseudo_id, DI.input_date ORDER BY event_timestamp DESC) AS week_of_the_year, + LAST_VALUE(format_date('%d',event_date)) OVER(PARTITION BY user_pseudo_id, DI.input_date ORDER BY event_timestamp DESC) AS day_of_the_month, + LAST_VALUE(format_date('%w',event_date)) OVER(PARTITION BY user_pseudo_id, DI.input_date ORDER BY event_timestamp DESC) AS day_of_week, + --LAST_VALUE(format("%02d",extract(hour from event_timestamp))) OVER(PARTITION BY user_pseudo_id, DI.input_date ORDER BY event_timestamp DESC) as hour_of_day, + --LAST_VALUE(DATE_DIFF(event_date, DI.input_date, DAY)) OVER(PARTITION BY user_pseudo_id, DI.input_date ORDER BY event_timestamp DESC) AS nth_day, + --LAST_VALUE(TIMESTAMP_DIFF(event_timestamp, TIMESTAMP(DI.input_date), HOUR)) OVER(PARTITION BY user_pseudo_id, DI.input_date ORDER BY event_timestamp DESC) AS nth_hour, + --LAST_VALUE(DATE_DIFF(event_date, DI.input_date, WEEK)) OVER(PARTITION BY user_pseudo_id, DI.input_date ORDER BY event_timestamp DESC) AS nth_week, + --LAST_VALUE(DATE_DIFF(event_date, DI.input_date, MONTH)) OVER(PARTITION BY user_pseudo_id, DI.input_date ORDER BY event_timestamp DESC) AS nth_month, + LAST_VALUE(user_ltv_revenue) OVER(PARTITION BY user_pseudo_id, DI.input_date ORDER BY event_timestamp DESC) AS user_ltv_revenue, + LAST_VALUE(T.traffic_source_medium) OVER(PARTITION BY user_pseudo_id, DI.input_date ORDER BY event_timestamp DESC) AS last_traffic_source_medium, + LAST_VALUE(T.traffic_source_name) OVER(PARTITION BY user_pseudo_id, DI.input_date ORDER BY event_timestamp DESC) AS last_traffic_source_name, + LAST_VALUE(T.traffic_source) OVER(PARTITION BY user_pseudo_id, DI.input_date ORDER BY event_timestamp DESC) AS last_traffic_source_source, + LAST_VALUE(CASE WHEN (TIMESTAMP_DIFF(event_timestamp, user_first_touch_timestamp, DAY) < 7) OR (user_first_touch_timestamp IS NULL) THEN 'new' ELSE 'existing' END) OVER(PARTITION BY user_pseudo_id, DI.input_date ORDER BY event_timestamp DESC) AS new_or_established_user, + LAST_VALUE(L.subcontinent) OVER(PARTITION BY user_pseudo_id, DI.input_date ORDER BY event_timestamp DESC) as geo_sub_continent, + LAST_VALUE(L.country) OVER(PARTITION BY user_pseudo_id, DI.input_date ORDER BY event_timestamp DESC) as geo_country, + LAST_VALUE(L.region) OVER(PARTITION BY user_pseudo_id, DI.input_date ORDER BY event_timestamp DESC) as geo_region, + LAST_VALUE(L.city) OVER(PARTITION BY user_pseudo_id, DI.input_date ORDER BY event_timestamp DESC) as geo_city, + LAST_VALUE(L.metro) OVER(PARTITION BY user_pseudo_id, DI.input_date ORDER BY event_timestamp DESC) as geo_metro, + FIRST_VALUE(T.traffic_source_medium) OVER(PARTITION BY user_pseudo_id, DI.input_date ORDER BY event_timestamp ASC) AS first_traffic_source_medium, + FIRST_VALUE(T.traffic_source_name) OVER(PARTITION BY user_pseudo_id, DI.input_date ORDER BY event_timestamp ASC) AS first_traffic_source_name, + FIRST_VALUE(T.traffic_source) OVER(PARTITION BY user_pseudo_id, DI.input_date ORDER BY event_timestamp ASC) AS first_traffic_source_source, + MAX(CASE WHEN user_id IS NOT NULL THEN TRUE ELSE FALSE END) OVER(PARTITION BY user_pseudo_id, DI.input_date ORDER BY event_timestamp ASC) AS has_signed_in_with_user_id, + LAST_VALUE(device_category) OVER(PARTITION BY user_pseudo_id, DI.input_date ORDER BY event_timestamp DESC) as device_category, + LAST_VALUE(device_mobile_brand_name) OVER(PARTITION BY user_pseudo_id, DI.input_date ORDER BY event_timestamp DESC) as device_mobile_brand_name, + LAST_VALUE(device_mobile_model_name) OVER(PARTITION BY user_pseudo_id, DI.input_date ORDER BY event_timestamp DESC) as device_mobile_model_name, + LAST_VALUE(device_os) OVER(PARTITION BY user_pseudo_id, DI.input_date ORDER BY event_timestamp DESC) as device_os, + --LAST_VALUE(device_os_version) OVER(PARTITION BY user_pseudo_id, DI.input_date ORDER BY event_timestamp DESC) as device_os_version, + LAST_VALUE(SPLIT(device_os_version, '.')[OFFSET(0)]) OVER(PARTITION BY user_pseudo_id, DI.input_date ORDER BY event_timestamp DESC) as device_os_version, + LAST_VALUE(language) OVER(PARTITION BY user_pseudo_id, DI.input_date ORDER BY event_timestamp DESC) AS device_language, + LAST_VALUE(device_web_browser) OVER(PARTITION BY user_pseudo_id, DI.input_date ORDER BY event_timestamp DESC) as device_web_browser, + --LAST_VALUE(browser_version) OVER(PARTITION BY user_pseudo_id, DI.input_date ORDER BY event_timestamp DESC) as device_web_browser_version, + LAST_VALUE(SPLIT(device_web_browser_version, '.')[OFFSET(0)]) OVER(PARTITION BY user_pseudo_id, DI.input_date ORDER BY event_timestamp DESC) as device_web_browser_version, + LAST_VALUE(device_advertising_id) OVER(PARTITION BY user_pseudo_id, DI.input_date ORDER BY event_timestamp DESC) as device_advertising_id + FROM `{{mds_project_id}}.{{mds_dataset}}.event` as E + INNER JOIN `{{mds_project_id}}.{{mds_dataset}}.traffic_source` as T + ON E.traffic_source_id = T.traffic_source_id + INNER JOIN `{{mds_project_id}}.{{mds_dataset}}.device` as D + ON E.device_type_id = D.device_type_id + INNER JOIN `{{mds_project_id}}.{{mds_dataset}}.location` as L + ON E.location_id = L.location_id + CROSS JOIN dates_interval as DI + WHERE E.event_date BETWEEN DI.end_date AND DI.input_date + AND ga_session_id IS NOT NULL + AND D.device_os IS NOT NULL +); + +-- All users in the platform +CREATE OR REPLACE TEMP TABLE events_users as ( + SELECT DISTINCT + Users.user_pseudo_id, + Users.user_id, + DI.input_date as feature_date + FROM `{{mds_project_id}}.{{mds_dataset}}.event` Users + CROSS JOIN dates_interval as DI + WHERE event_date BETWEEN DI.end_date AND DI.input_date + AND ga_session_id IS NOT NULL +); + + +INSERT INTO `{{project_id}}.{{dataset}}.{{insert_table}}` + SELECT DISTINCT + CURRENT_TIMESTAMP() AS processed_timestamp, + EU.feature_date, + EU.user_pseudo_id, + MAX(EU.user_id) as user_id, + MAX(UDESS.month_of_the_year) as month_of_the_year, + MAX(UDESS.week_of_the_year) as week_of_the_year, + MAX(UDESS.day_of_the_month) as day_of_the_month, + MAX(UDESS.day_of_week) as day_of_week, + MAX(UDESS.user_ltv_revenue) as user_ltv_revenue, + MAX(UDESS.device_category) as device_category, + MAX(UDESS.device_mobile_brand_name) as device_mobile_brand_name, + MAX(UDESS.device_mobile_model_name) as device_mobile_model_name, + MAX(UDESS.device_os) as device_os, + MAX(UDESS.device_os_version) as device_os_version, + MAX(UDESS.device_language) as device_language, + MAX(UDESS.device_web_browser) as device_web_browser, + MAX(UDESS.device_web_browser_version) as device_web_browser_version, + APPROX_TOP_COUNT(UDESS.geo_sub_continent, 1)[OFFSET(0)].value as geo_sub_continent, + APPROX_TOP_COUNT(UDESS.geo_country, 1)[OFFSET(0)].value as geo_country, + APPROX_TOP_COUNT(UDESS.geo_region, 1)[OFFSET(0)].value as geo_region, + APPROX_TOP_COUNT(UDESS.geo_city, 1)[OFFSET(0)].value as geo_city, + APPROX_TOP_COUNT(UDESS.geo_metro, 1)[OFFSET(0)].value as geo_metro, + MAX(UDESS.last_traffic_source_medium) as last_traffic_source_medium, + MAX(UDESS.last_traffic_source_name) as last_traffic_source_name, + MAX(UDESS.last_traffic_source_source) as last_traffic_source_source, + MAX(UDESS.first_traffic_source_medium) as first_traffic_source_medium, + MAX(UDESS.first_traffic_source_name) as first_traffic_source_name, + MAX(UDESS.first_traffic_source_source) as first_traffic_source_source, + MAX(UDESS.has_signed_in_with_user_id) as has_signed_in_with_user_id + FROM events_users EU + INNER JOIN user_dimensions_event_session_scoped UDESS + ON EU.user_pseudo_id = UDESS.user_pseudo_id AND EU.feature_date = UDESS.feature_date --AND EU.user_id = UDESS.user_id + GROUP BY EU.feature_date, EU.user_pseudo_id +; \ No newline at end of file diff --git a/sql/query/invoke_backfill_user_session_event_aggregated_metrics.sqlx b/sql/query/invoke_backfill_user_session_event_aggregated_metrics.sqlx index b4127d93..7ba98214 100644 --- a/sql/query/invoke_backfill_user_session_event_aggregated_metrics.sqlx +++ b/sql/query/invoke_backfill_user_session_event_aggregated_metrics.sqlx @@ -12,26 +12,316 @@ -- See the License for the specific language governing permissions and -- limitations under the License. -DECLARE input_date DATE; -DECLARE end_date DATE; -DECLARE users_added INT64 DEFAULT NULL; - DECLARE max_date DATE; DECLARE min_date DATE; SET max_date = (SELECT DATE_SUB(MAX(event_date), INTERVAL {{interval_max_date}} DAY) FROM `{{mds_project_id}}.{{mds_dataset}}.event`); SET min_date = (SELECT DATE_ADD(MIN(event_date), INTERVAL {{interval_min_date}} DAY) FROM `{{mds_project_id}}.{{mds_dataset}}.event`); -SELECT max_date; -SELECT min_date; - -FOR record IN - (SELECT DISTINCT - event_date - FROM `{{mds_project_id}}.{{mds_dataset}}.event` - WHERE event_date BETWEEN min_date AND max_date - ORDER BY event_date ASC ) -DO - SET input_date= (SELECT record.event_date); - SET end_date= (SELECT DATE_SUB(record.event_date, INTERVAL {{interval_end_date}} DAY)); - CALL `{{project_id}}.{{dataset}}.{{stored_procedure}}`(input_date, end_date, users_added); - SELECT users_added; -END FOR; \ No newline at end of file + +CREATE OR REPLACE TEMP TABLE dates_interval as ( + SELECT DISTINCT + event_date as input_date, + DATE_SUB(event_date, INTERVAL {{interval_end_date}} DAY) as end_date + FROM `{{mds_project_id}}.{{mds_dataset}}.event` + WHERE event_date BETWEEN min_date AND max_date + ORDER BY input_date DESC +); + +-- What is the user-per-day session-scoped engagement? +CREATE OR REPLACE TEMP TABLE engagement_per_day_session_scoped as ( + SELECT + user_pseudo_id, + feature_date, + SAFE_DIVIDE( + COUNT(distinct case when session_engaged = true then concat(user_pseudo_id,session_id) end), + COUNT(DISTINCT session_id) + ) AS engagement_rate, + COUNT(distinct case when session_engaged = true then concat(user_pseudo_id,session_id) end) as engaged_sessions_per_user, + SAFE_DIVIDE( + COUNTIF(session_conversion = true), + COUNT(DISTINCT session_id) + ) AS session_conversion_rate, + COUNT(distinct case when session_engaged = false then concat(user_pseudo_id,session_id) end) as bounces, + SAFE_DIVIDE( + COUNT(distinct case when session_engaged = false then concat(user_pseudo_id,session_id) end), + COUNT(DISTINCT session_id) + ) as bounce_rate_per_user, + COUNT(DISTINCT session_id) AS sessions_per_user, + IFNULL(AVG(page_views_per_session),0) as avg_views_per_session, + IFNULL(ROUND(SUM(engagement_time_msec)/1000),0) as sum_engagement_time_seconds, + IFNULL(ROUND(AVG(engagement_time_msec)/1000),0) as avg_engagement_time_seconds, + COUNTIF(new_visitor = true) as new_visits, + COUNTIF(returning_visitor = true) as returning_visits + FROM ( + SELECT + E.user_pseudo_id, + input_date as feature_date, + E.ga_session_id as session_id, + IFNULL(MAX(S.session_engaged), false) as session_engaged, + SUM(S.total_engagement_time_secs) as engagement_time_msec, + MAX(S.new_visitor) as new_visitor, + MAX(S.returning_visitor) as returning_visitor, + MAX(CASE WHEN E.event_name='purchase' AND E.ecommerce.transaction_id IS NOT NULL THEN true ELSE false END) as session_conversion, + COUNTIF(E.event_name='page_view') as page_views_per_session + FROM `{{mds_project_id}}.{{mds_dataset}}.event` as E + INNER JOIN `{{mds_project_id}}.{{mds_dataset}}.session` as S + ON E.ga_session_id = S.ga_session_id + INNER JOIN `{{mds_project_id}}.{{mds_dataset}}.device` as D + ON E.device_type_id = D.device_type_id + CROSS JOIN dates_interval as DI + WHERE E.event_date BETWEEN DI.end_date AND DI.input_date + AND E.ga_session_id IS NOT NULL + AND D.device_os IS NOT NULL + GROUP BY + user_pseudo_id, + feature_date, + session_id) + GROUP BY user_pseudo_id, feature_date +); + +-- Has the user purchased before, another day? +CREATE OR REPLACE TEMP TABLE returning_customers as ( + SELECT + user_pseudo_id, + feature_date, + MAX(unique_purchase) as unique_purchase + FROM ( + SELECT + user_pseudo_id, + input_date as feature_date, + RANK() OVER (PARTITION BY user_pseudo_id ORDER BY event_timestamp ASC) AS unique_purchase + FROM `{{mds_project_id}}.{{mds_dataset}}.event` AS GA + CROSS JOIN dates_interval as DI + WHERE event_date BETWEEN DI.end_date AND DI.input_date + AND event_name = 'purchase' + GROUP BY user_pseudo_id, feature_date, event_timestamp + ) + WHERE unique_purchase >= 2 + GROUP BY user_pseudo_id, feature_date +); +CREATE OR REPLACE TEMP TABLE non_returning_customers as ( + SELECT + GA.user_pseudo_id, + input_date as feature_date + FROM `{{mds_project_id}}.{{mds_dataset}}.event` AS GA + CROSS JOIN dates_interval as DI + WHERE event_date BETWEEN DI.end_date AND DI.input_date + GROUP BY user_pseudo_id, feature_date + EXCEPT DISTINCT SELECT user_pseudo_id, feature_date FROM returning_customers as RC +); +CREATE OR REPLACE TEMP TABLE combined as ( + SELECT + user_pseudo_id, + feature_date, + unique_purchase + FROM returning_customers + UNION ALL + SELECT + user_pseudo_id, + feature_date, + -1 + FROM non_returning_customers + GROUP BY user_pseudo_id, feature_date +); +CREATE OR REPLACE TEMP TABLE repeated_purchase as ( + SELECT + user_pseudo_id, + feature_date, + CASE WHEN unique_purchase >= 0 THEN unique_purchase ELSE 0 END AS how_many_purchased_before + FROM + combined +); + +-- Has the user abandoned any cart by day? +CREATE OR REPLACE TEMP TABLE returned_cart_to_purchase as ( + SELECT + user_pseudo_id, + feature_date, + MAX(session_order) as max_session_order_in_day, + CASE WHEN MAX(begun_checkout) >= 1 THEN True ELSE False END as begun_checkout, + CASE WHEN MAX(event_order) >= 2 THEN True ELSE False END as has_purchased + FROM ( + SELECT + GA.user_pseudo_id, + input_date as feature_date, + MAX(S.session_number) as session_order, + CASE WHEN MAX(event_name) = 'begin_checkout' THEN 1 ELSE 0 END as begun_checkout, + RANK() OVER (PARTITION BY GA.user_pseudo_id ORDER BY event_timestamp ASC ) AS event_order + FROM `{{mds_project_id}}.{{mds_dataset}}.event` AS GA + INNER JOIN `{{mds_project_id}}.{{mds_dataset}}.session` as S + ON GA.ga_session_id = S.ga_session_id + INNER JOIN `{{mds_project_id}}.{{mds_dataset}}.device` as D + ON GA.device_type_id = D.device_type_id + CROSS JOIN dates_interval as DI + WHERE event_date BETWEEN DI.end_date AND DI.input_date + AND event_name IN ('begin_checkout','purchase') + AND GA.ga_session_id IS NOT NULL + AND D.device_os IS NOT NULL + GROUP BY GA.user_pseudo_id, feature_date, GA.event_timestamp + ) + GROUP BY user_pseudo_id, feature_date +); + +CREATE OR REPLACE TEMP TABLE cart_to_purchase AS ( + SELECT + GA.user_pseudo_id, + input_date as feature_date, + CASE WHEN (MAX(r.begun_checkout) AND NOT MAX(r.has_purchased)) THEN True ELSE False END as has_abandoned_cart, + FROM `{{mds_project_id}}.{{mds_dataset}}.event` AS GA + INNER JOIN `{{mds_project_id}}.{{mds_dataset}}.device` as D + ON GA.device_type_id = D.device_type_id + CROSS JOIN dates_interval as DI + LEFT JOIN returned_cart_to_purchase AS r + ON r.user_pseudo_id = GA.user_pseudo_id AND r.feature_date = DI.input_date + WHERE event_date BETWEEN DI.end_date AND DI.input_date + AND GA.ga_session_id IS NOT NULL + AND D.device_os IS NOT NULL + GROUP BY user_pseudo_id, feature_date +); + +-- What is the user-events-per-day event-scoped metrics performance? +CREATE OR REPLACE TEMP TABLE user_events_per_day_event_scoped as ( + SELECT + user_pseudo_id, + feature_date, + IFNULL(SUM(add_to_cart_per_session),0) as add_to_carts, + IFNULL(SAFE_DIVIDE( + SUM(add_to_cart_per_session), + SUM(view_item_per_session) + ), 0.0) AS cart_to_view_rate, + IFNULL(SUM(checkouts_per_session),0) AS checkouts, + IFNULL(SUM(ecommerce_purchases_per_session),0) AS ecommerce_purchases, + IFNULL(SUM(ecommerce_quantity_per_session),0) AS ecommerce_quantity, + IFNULL(SUM(ecommerce_revenue_per_session),0) AS ecommerce_revenue, + --IFNULL(SUM(item_discount_per_session),0) AS item_discount_amount, + IFNULL(SUM(item_revenue_per_session),0.0) AS item_revenue, + IFNULL(SUM(item_quantity_per_session),0) AS item_quantity, + IFNULL(SUM(item_refund_amount_per_session),0.0) AS item_refund_amount, + IFNULL(SUM(item_view_events_per_session),0) AS item_view_events, + IFNULL(SUM(items_clicked_in_promotion_per_session),0) AS items_clicked_in_promotion, + IFNULL(SUM(items_clicked_in_list_per_session),0) AS items_clicked_in_list, + IFNULL(SUM(items_checked_out_per_session),0) AS items_checked_out, + IFNULL(SUM(items_added_to_cart_per_session),0) AS items_added_to_cart, + IFNULL(SUM(item_list_click_events_per_session),0) AS item_list_click_events, + IFNULL(SUM(item_list_view_events_per_session),0) AS item_list_view_events, + IFNULL(SUM(purchase_revenue_per_session),0.0) AS purchase_revenue, + IFNULL(SAFE_DIVIDE( + SUM(user_who_purchased_per_session), + SUM(user_who_viewed_items_per_session) + ), 0.0) AS purchase_to_view_rate, + IFNULL(SUM(refunds_per_session),0.0) AS refunds, + IFNULL(SAFE_DIVIDE( + SUM(transactions_per_session), + SUM(purchasers_per_session) + ), 0.0) AS transactions_per_purchaser, + IFNULL(SAFE_DIVIDE( + SUM(ecommerce_purchases_per_session), + COUNT(DISTINCT event_timestamp)) + ,0.0) AS user_conversion_rate + FROM ( + SELECT + user_pseudo_id, + input_date as feature_date, + event_timestamp, + ga_session_id as session_id, + COUNTIF(event_name='add_to_cart') as add_to_cart_per_session, + COUNTIF(event_name='view_item') as view_item_per_session, + COUNTIF(event_name='begin_checkout') as checkouts_per_session, + COUNTIF(event_name='purchase' AND ecommerce.transaction_id IS NOT NULL) as ecommerce_purchases_per_session, + SUM(CASE WHEN event_name='purchase' AND ecommerce.transaction_id IS NOT NULL THEN ecommerce.total_item_quantity ELSE 0 END) as ecommerce_quantity_per_session, + SUM(CASE WHEN event_name='purchase' AND ecommerce.transaction_id IS NOT NULL THEN ecommerce.purchase_revenue_in_usd ELSE 0 END) as ecommerce_revenue_per_session, + --SUM((select SUM(discount*quantity) from unnest(items) where event_name = 'purchase' AND ecommerce.transaction_id IS NOT NULL AND discount IS NOT NULL)) as item_discount_per_session, + SUM((select SUM(item_revenue) from unnest(items) where event_name = 'purchase' AND ecommerce.transaction_id IS NOT NULL AND item_revenue IS NOT NULL)) as item_revenue_per_session, + SUM((select SUM(quantity) from unnest(items) where event_name = 'purchase' AND ecommerce.transaction_id IS NOT NULL AND quantity IS NOT NULL)) as item_quantity_per_session, + SUM((select item_refund from unnest(items) where event_name = 'purchase' AND ecommerce.transaction_id IS NOT NULL AND item_refund IS NOT NULL)) as item_refund_amount_per_session, + COUNTIF(event_name='view_item') as item_view_events_per_session, + SUM((select COUNT(DISTINCT item_id) from unnest(items) where event_name='view_item' AND (promotion_id IS NOT NULL OR promotion_name IS NOT NULL))) as items_clicked_in_promotion_per_session, + SUM((select COUNT(DISTINCT item_id) from unnest(items) where event_name='click' AND (item_list_id IS NOT NULL OR item_list_name IS NOT NULL))) as items_clicked_in_list_per_session, + SUM((select COUNT(DISTINCT item_id) from unnest(items) where event_name='begin_checkout' AND item_id IS NOT NULL)) as items_checked_out_per_session, + SUM((select COUNT(DISTINCT item_id) from unnest(items) where event_name='add_to_cart' AND item_id IS NOT NULL)) as items_added_to_cart_per_session, + SUM((select COUNT(DISTINCT item_list_id) from unnest(items) where event_name='click' AND (item_list_id IS NOT NULL OR item_list_name IS NOT NULL))) as item_list_click_events_per_session, + SUM((select COUNT(DISTINCT item_list_id) from unnest(items) where event_name='view_item_list' AND (item_list_id IS NOT NULL OR item_list_name IS NOT NULL))) as item_list_view_events_per_session, + (SUM(CASE WHEN event_name='purchase' AND ecommerce.transaction_id IS NOT NULL THEN ecommerce.purchase_revenue_in_usd ELSE 0.0 END) - SUM(CASE WHEN event_name='refund' AND ecommerce.transaction_id IS NOT NULL THEN ecommerce.refund_value_in_usd ELSE 0.0 END)) as purchase_revenue_per_session, + COUNT(DISTINCT CASE WHEN event_name='purchase' AND ecommerce.transaction_id IS NOT NULL THEN user_pseudo_id END) as user_who_purchased_per_session, + COUNT(DISTINCT CASE WHEN event_name='view_item' THEN user_pseudo_id END) as user_who_viewed_items_per_session, + SUM(CASE WHEN event_name='refund' AND ecommerce.transaction_id IS NOT NULL THEN 1 ELSE 0 END) as refunds_per_session, + COUNT(DISTINCT CASE WHEN ecommerce.transaction_id IS NOT NULL THEN ecommerce.transaction_id END) as transactions_per_session, + COUNT(DISTINCT CASE WHEN event_name='purchase' AND ecommerce.transaction_id IS NOT NULL THEN user_pseudo_id END) as purchasers_per_session, + FROM + `{{mds_project_id}}.{{mds_dataset}}.event` as E + INNER JOIN `{{mds_project_id}}.{{mds_dataset}}.device` as D + ON E.device_type_id = D.device_type_id + CROSS JOIN dates_interval as DI + WHERE event_date BETWEEN DI.end_date AND DI.input_date + AND E.ga_session_id IS NOT NULL + AND D.device_os IS NOT NULL + GROUP BY + E.user_pseudo_id, + E.ga_session_id, + feature_date, + E.event_timestamp) + GROUP BY user_pseudo_id, feature_date +); + +-- All users in the platform +CREATE OR REPLACE TEMP TABLE events_users_days as ( + SELECT DISTINCT + Users.user_pseudo_id, + DI.input_date as feature_date + FROM `{{mds_project_id}}.{{mds_dataset}}.event` Users + INNER JOIN `{{mds_project_id}}.{{mds_dataset}}.device` as D + ON Users.device_type_id = D.device_type_id + CROSS JOIN dates_interval as DI + WHERE event_date BETWEEN DI.end_date AND DI.input_date + AND Users.ga_session_id IS NOT NULL + AND D.device_os IS NOT NULL +); + +INSERT INTO `{{project_id}}.{{dataset}}.{{insert_table}}` + SELECT + CURRENT_TIMESTAMP() AS processed_timestamp, + EUD.feature_date, + EUD.user_pseudo_id, + EPDSS.engagement_rate, + EPDSS.engaged_sessions_per_user, + EPDSS.session_conversion_rate, + EPDSS.bounces, + EPDSS.bounce_rate_per_user, + EPDSS.sessions_per_user, + EPDSS.avg_views_per_session, + EPDSS.sum_engagement_time_seconds, + EPDSS.avg_engagement_time_seconds, + EPDSS.new_visits, + EPDSS.returning_visits, + UEPDES.add_to_carts, + UEPDES.cart_to_view_rate, + UEPDES.checkouts, + UEPDES.ecommerce_purchases, + UEPDES.ecommerce_quantity, + UEPDES.ecommerce_revenue, + UEPDES.item_revenue, + UEPDES.item_quantity, + UEPDES.item_refund_amount, + UEPDES.item_view_events, + UEPDES.items_clicked_in_promotion, + UEPDES.items_clicked_in_list, + UEPDES.items_checked_out, + UEPDES.items_added_to_cart, + UEPDES.item_list_click_events, + UEPDES.item_list_view_events, + UEPDES.purchase_revenue, + UEPDES.purchase_to_view_rate, + UEPDES.refunds, + UEPDES.transactions_per_purchaser, + UEPDES.user_conversion_rate, + R.how_many_purchased_before, + CP.has_abandoned_cart + FROM events_users_days EUD + INNER JOIN engagement_per_day_session_scoped EPDSS + ON EUD.user_pseudo_id = EPDSS.user_pseudo_id AND EUD.feature_date = EPDSS.feature_date + INNER JOIN user_events_per_day_event_scoped UEPDES + ON UEPDES.user_pseudo_id = EPDSS.user_pseudo_id AND UEPDES.feature_date = EPDSS.feature_date + INNER JOIN repeated_purchase R + ON R.user_pseudo_id = EPDSS.user_pseudo_id AND R.feature_date = EPDSS.feature_date + INNER JOIN cart_to_purchase CP + ON CP.user_pseudo_id = EPDSS.user_pseudo_id AND CP.feature_date = EPDSS.feature_date +; \ No newline at end of file diff --git a/sql/schema/table/audience_segmentation_inference_preparation.json b/sql/schema/table/audience_segmentation_inference_preparation.json index cdeafeed..3e99f61b 100644 --- a/sql/schema/table/audience_segmentation_inference_preparation.json +++ b/sql/schema/table/audience_segmentation_inference_preparation.json @@ -34,31 +34,6 @@ "type": "STRING", "description": "feature description" }, - { - "name": "hour_of_day", - "type": "STRING", - "description": "feature description" - }, - { - "name": "nth_day", - "type": "INTEGER", - "description": "feature description" - }, - { - "name": "nth_hour", - "type": "INTEGER", - "description": "feature description" - }, - { - "name": "nth_week", - "type": "INTEGER", - "description": "feature description" - }, - { - "name": "nth_month", - "type": "INTEGER", - "description": "feature description" - }, { "name": "device_category", "type": "STRING", diff --git a/sql/schema/table/customer_lifetime_value_inference_preparation.json b/sql/schema/table/customer_lifetime_value_inference_preparation.json index 66261c20..7dea2b8d 100644 --- a/sql/schema/table/customer_lifetime_value_inference_preparation.json +++ b/sql/schema/table/customer_lifetime_value_inference_preparation.json @@ -34,31 +34,6 @@ "type": "STRING", "description": "user_pseudo_id" }, - { - "name": "hour_of_day", - "type": "STRING", - "description": "user_pseudo_id" - }, - { - "name": "nth_day", - "type": "INTEGER", - "description": "user_pseudo_id" - }, - { - "name": "nth_hour", - "type": "INTEGER", - "description": "user_pseudo_id" - }, - { - "name": "nth_week", - "type": "INTEGER", - "description": "user_pseudo_id" - }, - { - "name": "nth_month", - "type": "INTEGER", - "description": "user_pseudo_id" - }, { "name": "device_category", "type": "STRING", diff --git a/sql/schema/table/purchase_propensity_inference_preparation.json b/sql/schema/table/purchase_propensity_inference_preparation.json index 5c99d4ff..5026f5a3 100644 --- a/sql/schema/table/purchase_propensity_inference_preparation.json +++ b/sql/schema/table/purchase_propensity_inference_preparation.json @@ -34,31 +34,6 @@ "type": "STRING", "description": "user_pseudo_id" }, - { - "name": "hour_of_day", - "type": "STRING", - "description": "user_pseudo_id" - }, - { - "name": "nth_day", - "type": "INTEGER", - "description": "user_pseudo_id" - }, - { - "name": "nth_hour", - "type": "INTEGER", - "description": "user_pseudo_id" - }, - { - "name": "nth_week", - "type": "INTEGER", - "description": "user_pseudo_id" - }, - { - "name": "nth_month", - "type": "INTEGER", - "description": "user_pseudo_id" - }, { "name": "user_ltv_revenue", "type": "FLOAT", diff --git a/sql/schema/table/purchase_propensity_label.json b/sql/schema/table/purchase_propensity_label.json index 201910be..cea1fc69 100644 --- a/sql/schema/table/purchase_propensity_label.json +++ b/sql/schema/table/purchase_propensity_label.json @@ -14,11 +14,6 @@ "type": "STRING", "description": "feature description" }, - { - "name": "purchase_day_0", - "type": "INTEGER", - "description": "feature description" - }, { "name": "purchase_day_1", "type": "INTEGER", diff --git a/sql/schema/table/user_dimensions.json b/sql/schema/table/user_dimensions.json index 52b73bda..19a34385 100644 --- a/sql/schema/table/user_dimensions.json +++ b/sql/schema/table/user_dimensions.json @@ -39,31 +39,6 @@ "type": "STRING", "description": "feature column description" }, - { - "name": "hour_of_day", - "type": "STRING", - "description": "feature column description" - }, - { - "name": "nth_day", - "type": "INTEGER", - "description": "feature column description" - }, - { - "name": "nth_hour", - "type": "INTEGER", - "description": "feature column description" - }, - { - "name": "nth_week", - "type": "INTEGER", - "description": "feature column description" - }, - { - "name": "nth_month", - "type": "INTEGER", - "description": "feature column description" - }, { "name": "user_ltv_revenue", "type": "FLOAT64", diff --git a/sql/schema/table/user_lifetime_dimensions.json b/sql/schema/table/user_lifetime_dimensions.json index 8b9c889f..deb3d95d 100644 --- a/sql/schema/table/user_lifetime_dimensions.json +++ b/sql/schema/table/user_lifetime_dimensions.json @@ -39,31 +39,6 @@ "type": "STRING", "description": "feature column description" }, - { - "name": "hour_of_day", - "type": "STRING", - "description": "feature column description" - }, - { - "name": "nth_day", - "type": "INTEGER", - "description": "feature column description" - }, - { - "name": "nth_hour", - "type": "INTEGER", - "description": "feature column description" - }, - { - "name": "nth_week", - "type": "INTEGER", - "description": "feature column description" - }, - { - "name": "nth_month", - "type": "INTEGER", - "description": "feature column description" - }, { "name": "user_ltv_revenue", "type": "FLOAT", diff --git a/sql/schema/table/user_segmentation_dimensions.json b/sql/schema/table/user_segmentation_dimensions.json index 8b9c889f..deb3d95d 100644 --- a/sql/schema/table/user_segmentation_dimensions.json +++ b/sql/schema/table/user_segmentation_dimensions.json @@ -39,31 +39,6 @@ "type": "STRING", "description": "feature column description" }, - { - "name": "hour_of_day", - "type": "STRING", - "description": "feature column description" - }, - { - "name": "nth_day", - "type": "INTEGER", - "description": "feature column description" - }, - { - "name": "nth_hour", - "type": "INTEGER", - "description": "feature column description" - }, - { - "name": "nth_week", - "type": "INTEGER", - "description": "feature column description" - }, - { - "name": "nth_month", - "type": "INTEGER", - "description": "feature column description" - }, { "name": "user_ltv_revenue", "type": "FLOAT",