Skip to content

Commit

Permalink
Ecommerce 2024 queries (#3786)
Browse files Browse the repository at this point in the history
* Create top_vendors.sql

* Create top_vendors_crux_rank.sql

slightly adjusted ranks from 2021

* Create median_lighthouse_score_ecommsites.sql

* Create core_web_vitals_passingmetrics_byvendor_bydevice.sql

* Create top_analytics_providers_bydevice_wapp.sql

* Create top_abtesting_bydevice.sql

* Create percent_of_ecommsites_using_cmp.sql

* Create top_cmp_bydevice.sql

this is new.

* Update top_vendors_crux_rank.sql

updated for new schema

* Delete sql/2024/ecommerce/top_vendors.sql

included in the rank one

* Update median_lighthouse_score_ecommsites.sql

updated for new schema and also added rank for further analysis

* Update median_lighthouse_score_ecommsites.sql

* Update top_vendors_crux_rank.sql

* Update median_lighthouse_score_ecommsites.sql

* Update top_cmp_bydevice.sql

* Update top_cmp_bydevice.sql

* Delete sql/2024/ecommerce/percent_of_ecommsites_using_cmp.sql

* Update core_web_vitals_passingmetrics_byvendor_bydevice.sql

* Delete sql/2024/ecommerce/top_analytics_providers_bydevice_wapp.sql

* Delete sql/2024/ecommerce/top_abtesting_bydevice.sql

* Create top_shopsystem_by_geo.sql

* Create core_web_vitals_yoy.sql

* Create top_payment_by_geo.sql

* Create top_shipping_by_geo.sql

* Create top_shipping_yoy.sql

* Create top_payment_yoy.sql

* Delete sql/2024/ecommerce/top_cmp_bydevice.sql

* Update top_vendors_crux_rank.sql

Make it more like the CMS one to be 100% sure

* Create top_ecommerce.sql

* Update core_web_vitals_yoy.sql

* Update core_web_vitals_yoy.sql

remove havings

* Update top_payment_yoy.sql

* Update top_shipping_yoy.sql

* Update top_ecommerce.sql

* fix lintin errors
  • Loading branch information
jcmpagel authored Nov 9, 2024
1 parent fd87798 commit 216bf23
Show file tree
Hide file tree
Showing 10 changed files with 986 additions and 0 deletions.
Original file line number Diff line number Diff line change
@@ -0,0 +1,66 @@
#standardSQL
# CrUX Core Web Vitals performance of Ecommerce vendors by device (fid was upated to inp, and is non optinal now)
CREATE TEMP FUNCTION IS_GOOD (good FLOAT64, needs_improvement FLOAT64, poor FLOAT64) RETURNS BOOL AS (
good / (good + needs_improvement + poor) >= 0.75
);

CREATE TEMP FUNCTION IS_NON_ZERO (good FLOAT64, needs_improvement FLOAT64, poor FLOAT64) RETURNS BOOL AS (
good + needs_improvement + poor > 0
);


SELECT
client,
ecomm,
COUNT(DISTINCT origin) AS origins,
# Origins with good LCP divided by origins with any LCP.
SAFE_DIVIDE(
COUNT(DISTINCT IF(IS_GOOD(fast_lcp, avg_lcp, slow_lcp), origin, NULL)),
COUNT(DISTINCT IF(IS_NON_ZERO(fast_lcp, avg_lcp, slow_lcp), origin, NULL))) AS pct_good_lcp,

# Origins with good INP divided by origins with any inp.
SAFE_DIVIDE(
COUNT(DISTINCT IF(IS_GOOD(fast_inp, avg_inp, slow_inp), origin, NULL)),
COUNT(DISTINCT IF(IS_NON_ZERO(fast_inp, avg_inp, slow_inp), origin, NULL))) AS pct_good_inp,

# Origins with good CLS divided by origins with any CLS.
SAFE_DIVIDE(
COUNT(DISTINCT IF(IS_GOOD(small_cls, medium_cls, large_cls), origin, NULL)),
COUNT(DISTINCT IF(IS_NON_ZERO(small_cls, medium_cls, large_cls), origin, NULL))) AS pct_good_cls,

# Origins with good LCP, inp, and CLS divided by origins with any LCP, inp, and CLS.
SAFE_DIVIDE(
COUNT(DISTINCT IF(
IS_GOOD(fast_lcp, avg_lcp, slow_lcp) AND
IS_GOOD(fast_inp, avg_inp, slow_inp) AND
IS_GOOD(small_cls, medium_cls, large_cls), origin, NULL)),
COUNT(DISTINCT IF(
IS_NON_ZERO(fast_lcp, avg_lcp, slow_lcp) AND
IS_NON_ZERO(fast_inp, avg_inp, slow_inp) AND
IS_NON_ZERO(small_cls, medium_cls, large_cls), origin, NULL))) AS pct_good_cwv
FROM
`chrome-ux-report.materialized.device_summary`
JOIN (
SELECT
_TABLE_SUFFIX AS client,
url,
app AS ecomm
FROM
`httparchive.technologies.2024_06_01_*`
WHERE
category = 'Ecommerce' AND
(
app != 'Cart Functionality' AND
app != 'Google Analytics Enhanced eCommerce'
)
)
ON
CONCAT(origin, '/') = url AND
IF(device = 'desktop', 'desktop', 'mobile') = client
WHERE
date = '2024-06-01'
GROUP BY
client,
ecomm
ORDER BY
origins DESC
193 changes: 193 additions & 0 deletions sql/2024/ecommerce/core_web_vitals_yoy.sql
Original file line number Diff line number Diff line change
@@ -0,0 +1,193 @@
# app passing core web vitals
# core_web_vitals_yoy.sql
CREATE TEMP FUNCTION IS_GOOD (good FLOAT64, needs_improvement FLOAT64, poor FLOAT64) RETURNS BOOL AS (
good / (good + needs_improvement + poor) >= 0.75
);

CREATE TEMP FUNCTION IS_NON_ZERO (good FLOAT64, needs_improvement FLOAT64, poor FLOAT64) RETURNS BOOL AS (
good + needs_improvement + poor > 0
);

-- Year 2024
SELECT
2024 AS year,
client,
app,
COUNT(DISTINCT origin) AS origins,
-- Origins with good LCP divided by origins with any LCP.
SAFE_DIVIDE(
COUNT(DISTINCT IF(IS_GOOD(fast_lcp, avg_lcp, slow_lcp), origin, NULL)),
COUNT(DISTINCT IF(IS_NON_ZERO(fast_lcp, avg_lcp, slow_lcp), origin, NULL))
) AS pct_good_lcp,

-- Origins with good INP divided by origins with any INP.
SAFE_DIVIDE(
COUNT(DISTINCT IF(IS_GOOD(fast_inp, avg_inp, slow_inp), origin, NULL)),
COUNT(DISTINCT IF(IS_NON_ZERO(fast_inp, avg_inp, slow_inp), origin, NULL))
) AS pct_good_inp,

-- Origins with good CLS divided by origins with any CLS.
SAFE_DIVIDE(
COUNT(DISTINCT IF(IS_GOOD(small_cls, medium_cls, large_cls), origin, NULL)),
COUNT(DISTINCT IF(IS_NON_ZERO(small_cls, medium_cls, large_cls), origin, NULL))
) AS pct_good_cls,

-- Origins with good LCP, INP (optional), and CLS divided by origins with any LCP and CLS.
SAFE_DIVIDE(
COUNT(DISTINCT IF(
IS_GOOD(fast_lcp, avg_lcp, slow_lcp) AND
IS_GOOD(fast_inp, avg_inp, slow_inp) IS NOT FALSE AND
IS_GOOD(small_cls, medium_cls, large_cls), origin, NULL
)),
COUNT(DISTINCT IF(
IS_NON_ZERO(fast_lcp, avg_lcp, slow_lcp) AND
IS_NON_ZERO(small_cls, medium_cls, large_cls), origin, NULL
))
) AS pct_good_cwv
FROM
`chrome-ux-report.materialized.device_summary`
JOIN (
SELECT
client,
page AS url,
technologies.technology AS app
FROM
`httparchive.all.pages`,
UNNEST(technologies) AS technologies,
UNNEST(technologies.categories) AS cats
WHERE
cats = 'Ecommerce' AND
technologies.technology NOT IN ('Cart Functionality', 'Google Analytics Enhanced eCommerce') AND
technologies.technology != '' AND
date = '2024-06-01' AND
is_root_page
)
ON
CONCAT(origin, '/') = url AND
IF(device = 'desktop', 'desktop', 'mobile') = client
WHERE
date = '2024-06-01'
GROUP BY
client,
app


UNION ALL

-- Year 2023
SELECT
2023 AS year,
client,
app,
COUNT(DISTINCT origin) AS origins,
SAFE_DIVIDE(
COUNT(DISTINCT IF(IS_GOOD(fast_lcp, avg_lcp, slow_lcp), origin, NULL)),
COUNT(DISTINCT IF(IS_NON_ZERO(fast_lcp, avg_lcp, slow_lcp), origin, NULL))
) AS pct_good_lcp,
SAFE_DIVIDE(
COUNT(DISTINCT IF(IS_GOOD(fast_inp, avg_inp, slow_inp), origin, NULL)),
COUNT(DISTINCT IF(IS_NON_ZERO(fast_inp, avg_inp, slow_inp), origin, NULL))
) AS pct_good_inp,
SAFE_DIVIDE(
COUNT(DISTINCT IF(IS_GOOD(small_cls, medium_cls, large_cls), origin, NULL)),
COUNT(DISTINCT IF(IS_NON_ZERO(small_cls, medium_cls, large_cls), origin, NULL))
) AS pct_good_cls,
SAFE_DIVIDE(
COUNT(DISTINCT IF(
IS_GOOD(fast_lcp, avg_lcp, slow_lcp) AND
IS_GOOD(fast_inp, avg_inp, slow_inp) IS NOT FALSE AND
IS_GOOD(small_cls, medium_cls, large_cls), origin, NULL
)),
COUNT(DISTINCT IF(
IS_NON_ZERO(fast_lcp, avg_lcp, slow_lcp) AND
IS_NON_ZERO(small_cls, medium_cls, large_cls), origin, NULL
))
) AS pct_good_cwv
FROM
`chrome-ux-report.materialized.device_summary`
JOIN (
SELECT
client,
page AS url,
technologies.technology AS app
FROM
`httparchive.all.pages`,
UNNEST(technologies) AS technologies,
UNNEST(technologies.categories) AS cats
WHERE
cats = 'Ecommerce' AND
technologies.technology NOT IN ('Cart Functionality', 'Google Analytics Enhanced eCommerce') AND
technologies.technology != '' AND
date = '2023-06-01' AND
is_root_page
)
ON
CONCAT(origin, '/') = url AND
IF(device = 'desktop', 'desktop', 'mobile') = client
WHERE
date = '2023-06-01'
GROUP BY
client,
app

UNION ALL

-- Year 2022
SELECT
2022 AS year,
client,
app,
COUNT(DISTINCT origin) AS origins,
SAFE_DIVIDE(
COUNT(DISTINCT IF(IS_GOOD(fast_lcp, avg_lcp, slow_lcp), origin, NULL)),
COUNT(DISTINCT IF(IS_NON_ZERO(fast_lcp, avg_lcp, slow_lcp), origin, NULL))
) AS pct_good_lcp,
SAFE_DIVIDE(
COUNT(DISTINCT IF(IS_GOOD(fast_inp, avg_inp, slow_inp), origin, NULL)),
COUNT(DISTINCT IF(IS_NON_ZERO(fast_inp, avg_inp, slow_inp), origin, NULL))
) AS pct_good_inp,
SAFE_DIVIDE(
COUNT(DISTINCT IF(IS_GOOD(small_cls, medium_cls, large_cls), origin, NULL)),
COUNT(DISTINCT IF(IS_NON_ZERO(small_cls, medium_cls, large_cls), origin, NULL))
) AS pct_good_cls,
SAFE_DIVIDE(
COUNT(DISTINCT IF(
IS_GOOD(fast_lcp, avg_lcp, slow_lcp) AND
IS_GOOD(fast_inp, avg_inp, slow_inp) IS NOT FALSE AND
IS_GOOD(small_cls, medium_cls, large_cls), origin, NULL
)),
COUNT(DISTINCT IF(
IS_NON_ZERO(fast_lcp, avg_lcp, slow_lcp) AND
IS_NON_ZERO(small_cls, medium_cls, large_cls), origin, NULL
))
) AS pct_good_cwv
FROM
`chrome-ux-report.materialized.device_summary`
JOIN (
SELECT
client,
page AS url,
technologies.technology AS app
FROM
`httparchive.all.pages`,
UNNEST(technologies) AS technologies,
UNNEST(technologies.categories) AS cats
WHERE
cats = 'Ecommerce' AND
technologies.technology NOT IN ('Cart Functionality', 'Google Analytics Enhanced eCommerce') AND
technologies.technology != '' AND
date = '2022-06-01' AND
is_root_page
)
ON
CONCAT(origin, '/') = url AND
IF(device = 'desktop', 'desktop', 'mobile') = client
WHERE
date = '2022-06-01'
GROUP BY
client,
app


ORDER BY
origins DESC
43 changes: 43 additions & 0 deletions sql/2024/ecommerce/median_lighthouse_score_ecommsites.sql
Original file line number Diff line number Diff line change
@@ -0,0 +1,43 @@
WITH technologies AS (
SELECT
client,
page,
category,
technology,
rank,
lighthouse,
COUNT(DISTINCT page) OVER (PARTITION BY client) AS total_websites
FROM `httparchive.all.pages`,
UNNEST(technologies) AS tech,
UNNEST(categories) AS category
WHERE
date = '2024-06-01' AND
is_root_page = TRUE
)

SELECT
client,
rank,
technology,
ARRAY_AGG(DISTINCT category) AS categories,
APPROX_QUANTILES(CAST(JSON_EXTRACT_SCALAR(lighthouse, '$.categories.performance.score') AS NUMERIC), 1000)[OFFSET(500)] AS median_performance,
APPROX_QUANTILES(CAST(JSON_EXTRACT_SCALAR(lighthouse, '$.categories.accessibility.score') AS NUMERIC), 1000)[OFFSET(500)] AS median_accessibility,
APPROX_QUANTILES(CAST(JSON_EXTRACT_SCALAR(lighthouse, '$.categories.seo.score') AS NUMERIC), 1000)[OFFSET(500)] AS median_seo,
APPROX_QUANTILES(CAST(JSON_EXTRACT_SCALAR(lighthouse, '$.categories.best-practices.score') AS NUMERIC), 1000)[OFFSET(500)] AS median_best_practices,
ANY_VALUE(total_websites) AS total_websites,
COUNT(DISTINCT page) AS number_of_websites,
COUNT(DISTINCT page) / ANY_VALUE(total_websites) AS percent_of_websites
FROM technologies
WHERE
category = 'Ecommerce' AND
(
technology != 'Cart Functionality' AND
technology != 'Google Analytics Enhanced eCommerce'
)
GROUP BY
client,
rank,
technology
ORDER BY
client,
number_of_websites DESC
Loading

0 comments on commit 216bf23

Please sign in to comment.