-
-
Notifications
You must be signed in to change notification settings - Fork 183
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
* Add 2024 markup queries * Update Markup 2024 queries * Update Markup queries * Update markup.md * Update markdown.md * Smart quotes * Generate chapter * Generate images * Optimised images with calibre/image-actions * Add author * Lint SQL * Retake hi-res images * Update contributors file * Final edits --------- Co-authored-by: Barry Pollard <[email protected]> Co-authored-by: github-actions[bot] <41898282+github-actions[bot]@users.noreply.github.com>
- Loading branch information
1 parent
51b5f66
commit f084df4
Showing
50 changed files
with
1,693 additions
and
12 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,62 @@ | ||
#standardSQL | ||
# pages almanac metrics grouped by device and element attribute use (frequency) | ||
|
||
CREATE TEMPORARY FUNCTION get_almanac_attribute_info(almanac_string STRING) | ||
RETURNS ARRAY<STRUCT<name STRING, freq INT64>> LANGUAGE js AS ''' | ||
try { | ||
var almanac = JSON.parse(almanac_string); | ||
if (Array.isArray(almanac) || typeof almanac != 'object') return []; | ||
if (almanac.attributes_used_on_elements) { | ||
return Object.entries(almanac.attributes_used_on_elements).map(([name, freq]) => ({name, freq})); | ||
} | ||
} catch (e) { | ||
} | ||
return []; | ||
'''; | ||
|
||
WITH totals AS ( | ||
SELECT | ||
client, | ||
COUNT(0) AS total | ||
FROM | ||
`httparchive.all.pages` | ||
WHERE | ||
date = '2024-06-01' | ||
GROUP BY | ||
client | ||
), attributes AS ( | ||
SELECT | ||
client, | ||
almanac_attribute_info.name, | ||
COUNT(DISTINCT page) AS pages, | ||
ANY_VALUE(total) AS total_pages, | ||
COUNT(DISTINCT page) / ANY_VALUE(total) AS pct_pages, | ||
SUM(almanac_attribute_info.freq) AS freq, | ||
SUM(SUM(almanac_attribute_info.freq)) OVER (PARTITION BY client) AS total, | ||
SUM(almanac_attribute_info.freq) / SUM(SUM(almanac_attribute_info.freq)) OVER (PARTITION BY client) AS pct_ratio | ||
FROM | ||
`httparchive.all.pages`, | ||
UNNEST(get_almanac_attribute_info(JSON_EXTRACT(custom_metrics, '$.almanac'))) AS almanac_attribute_info | ||
JOIN | ||
totals | ||
USING | ||
(client) | ||
WHERE | ||
date = '2024-06-01' | ||
GROUP BY | ||
client, | ||
almanac_attribute_info.name | ||
) | ||
|
||
SELECT | ||
* | ||
FROM | ||
attributes | ||
ORDER BY | ||
pct_ratio DESC | ||
LIMIT | ||
1000 |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,49 @@ | ||
CREATE TEMPORARY FUNCTION get_markup_buttons_info(markup_string STRING) | ||
RETURNS ARRAY<STRING> LANGUAGE js AS ''' | ||
try { | ||
var markup = JSON.parse(markup_string); | ||
var type_total = Object.values(markup.buttons.types).reduce((total, i) => total + i, 0); | ||
var types = []; | ||
if (markup.buttons.total > type_total) { | ||
types = ['NO_TYPE']; | ||
} | ||
return Object.keys(markup.buttons.types).concat(types); | ||
} catch (e) { | ||
return []; | ||
} | ||
'''; | ||
|
||
WITH totals AS ( | ||
SELECT | ||
client, | ||
COUNT(0) AS total | ||
FROM | ||
`httparchive.all.pages` | ||
WHERE | ||
date = '2024-06-01' | ||
GROUP BY | ||
client | ||
) | ||
|
||
SELECT | ||
client AS client, | ||
LOWER(TRIM(button_type)) AS button_type, | ||
COUNT(DISTINCT page) AS page, | ||
ANY_VALUE(total) AS total, | ||
COUNT(DISTINCT page) / ANY_VALUE(total) AS pct_pages | ||
FROM | ||
`httparchive.all.pages` | ||
JOIN | ||
totals | ||
USING | ||
(client), | ||
UNNEST(get_markup_buttons_info(JSON_EXTRACT(custom_metrics, '$.markup'))) AS button_type | ||
WHERE | ||
date = '2024-06-01' | ||
GROUP BY | ||
client, | ||
button_type | ||
ORDER BY | ||
pct_pages DESC | ||
LIMIT | ||
1000 |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,22 @@ | ||
WITH comments AS ( | ||
SELECT | ||
client, | ||
CAST(JSON_VALUE(JSON_EXTRACT(custom_metrics, '$.wpt_bodies'), '$.raw_html.comment_count') AS INT64) AS num_comments, | ||
CAST(JSON_VALUE(JSON_EXTRACT(custom_metrics, '$.wpt_bodies'), '$.raw_html.conditional_comment_count') AS INT64) AS num_conditional_comments | ||
FROM | ||
`httparchive.all.pages` | ||
WHERE | ||
date = '2024-06-01' | ||
) | ||
|
||
SELECT | ||
client, | ||
COUNTIF(num_comments > 0) AS num_comments, | ||
COUNTIF(num_conditional_comments > 0) AS num_conditional_comments, | ||
COUNT(0) AS total, | ||
COUNTIF(num_comments > 0) / COUNT(0) AS pct_comments, | ||
COUNTIF(num_conditional_comments > 0) / COUNT(0) AS pct_conditional_comments | ||
FROM | ||
comments | ||
GROUP BY | ||
client |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,31 @@ | ||
-- Temporary function to extract content-encoding | ||
CREATE TEMPORARY FUNCTION GET_CONTENT_ENCODING(response_headers ARRAY<STRUCT<name STRING, value STRING>>) | ||
RETURNS STRING AS ( | ||
( | ||
SELECT | ||
value | ||
FROM | ||
UNNEST(response_headers) AS header | ||
WHERE | ||
LOWER(header.name) = 'content-encoding' | ||
LIMIT 1 | ||
) | ||
); | ||
|
||
SELECT | ||
date, | ||
client, | ||
GET_CONTENT_ENCODING(response_headers) AS content_encoding, | ||
COUNT(0) AS freq, | ||
SUM(COUNT(0)) OVER (PARTITION BY client) AS total, | ||
COUNT(0) / SUM(COUNT(0)) OVER (PARTITION BY client) AS pct | ||
FROM | ||
`httparchive.all.requests` | ||
WHERE | ||
date = '2024-06-01' AND | ||
is_main_document | ||
GROUP BY | ||
client, | ||
content_encoding | ||
ORDER BY | ||
pct DESC |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,23 @@ | ||
WITH custom_elements AS ( | ||
SELECT | ||
client, | ||
page, | ||
COALESCE(ARRAY_LENGTH(JSON_VALUE_ARRAY(JSON_EXTRACT(custom_metrics, '$.wpt_bodies'), '$.web_components.rendered.customElements.names')) > 0, FALSE) AS has_custom_elements | ||
FROM | ||
`httparchive.all.pages` | ||
WHERE | ||
date IN ('2022-06-01', '2023-06-01', '2024-06-01') | ||
) | ||
|
||
SELECT | ||
date, | ||
client, | ||
COUNT(0) AS total, | ||
COUNTIF(has_custom_elements) AS freq, | ||
COUNTIF(has_custom_elements) / COUNT(0) AS pct_custom_elements | ||
FROM | ||
custom_elements | ||
GROUP BY | ||
date, client | ||
ORDER BY | ||
date ASC |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,41 @@ | ||
WITH js_bytes AS ( | ||
SELECT | ||
client, | ||
page, | ||
SAFE_CAST(JSON_EXTRACT(summary, '$.bytesJS') AS INT64) / 1024 AS kbytes_js | ||
FROM | ||
`httparchive.all.pages` | ||
WHERE | ||
date = '2024-06-01' | ||
), custom_elements AS ( | ||
SELECT | ||
client, | ||
page, | ||
COALESCE(ARRAY_LENGTH(JSON_VALUE_ARRAY(JSON_EXTRACT(custom_metrics, '$.wpt_bodies'), '$.web_components.rendered.customElements.names')) > 0, FALSE) AS has_custom_elements | ||
FROM | ||
`httparchive.all.pages` | ||
WHERE | ||
date = '2024-06-01' | ||
) | ||
|
||
SELECT | ||
percentile, | ||
client, | ||
has_custom_elements, | ||
APPROX_QUANTILES(kbytes_js, 1000)[OFFSET(percentile * 10)] AS kbytes_js, | ||
COUNT(DISTINCT page) AS pages | ||
FROM | ||
custom_elements | ||
JOIN | ||
js_bytes | ||
USING | ||
(client, page), | ||
UNNEST([10, 25, 50, 75, 90]) AS percentile | ||
GROUP BY | ||
percentile, | ||
client, | ||
has_custom_elements | ||
ORDER BY | ||
percentile, | ||
client, | ||
has_custom_elements |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,43 @@ | ||
CREATE TEMPORARY FUNCTION get_almanac_attribute_info(almanac_string STRING) | ||
RETURNS ARRAY<STRUCT<name STRING, freq INT64>> LANGUAGE js AS ''' | ||
try { | ||
var almanac = JSON.parse(almanac_string); | ||
if (Array.isArray(almanac) || typeof almanac != 'object') return []; | ||
if (almanac.attributes_used_on_elements) { | ||
return Object.entries(almanac.attributes_used_on_elements).filter(([name, freq]) => name.startsWith('data-')).map(([name, freq]) => ({name, freq})); | ||
} | ||
} catch (e) {} | ||
return []; | ||
'''; | ||
|
||
WITH totals AS ( | ||
SELECT | ||
client, | ||
COUNT(0) AS total_pages | ||
FROM | ||
`httparchive.all.pages` | ||
WHERE | ||
date = '2024-06-01' | ||
GROUP BY | ||
client | ||
) | ||
|
||
SELECT | ||
client, | ||
COUNT(DISTINCT page) AS pages, | ||
ANY_VALUE(total_pages) AS total_pages, | ||
COUNT(DISTINCT page) / ANY_VALUE(total_pages) AS pct_pages | ||
FROM | ||
`httparchive.all.pages` | ||
JOIN | ||
totals | ||
USING | ||
(client), | ||
UNNEST(get_almanac_attribute_info(JSON_EXTRACT(custom_metrics, '$.almanac'))) AS almanac_attribute_info | ||
WHERE | ||
date = '2024-06-01' | ||
GROUP BY | ||
client |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,57 @@ | ||
CREATE TEMPORARY FUNCTION get_almanac_attribute_info(almanac_string STRING) | ||
RETURNS ARRAY<STRUCT<name STRING, freq INT64>> LANGUAGE js AS ''' | ||
try { | ||
var almanac = JSON.parse(almanac_string); | ||
if (Array.isArray(almanac) || typeof almanac != 'object') return []; | ||
if (almanac.attributes_used_on_elements) { | ||
return Object.entries(almanac.attributes_used_on_elements).filter(([name, freq]) => name.startsWith('data-')).map(([name, freq]) => ({name, freq})); | ||
} | ||
} catch (e) {} | ||
return []; | ||
'''; | ||
|
||
WITH totals AS ( | ||
SELECT | ||
client, | ||
COUNT(0) AS total_pages | ||
FROM | ||
`httparchive.all.pages` | ||
WHERE | ||
date = '2024-06-01' | ||
GROUP BY | ||
client | ||
), data_attrs AS ( | ||
SELECT | ||
client, | ||
almanac_attribute_info.name, | ||
COUNT(DISTINCT page) AS pages, | ||
ANY_VALUE(total_pages) AS total_pages, | ||
COUNT(DISTINCT page) / ANY_VALUE(total_pages) AS pct_pages, | ||
SUM(almanac_attribute_info.freq) AS freq, # total count from all pages | ||
SUM(SUM(almanac_attribute_info.freq)) OVER (PARTITION BY client) AS total, | ||
SUM(almanac_attribute_info.freq) / SUM(SUM(almanac_attribute_info.freq)) OVER (PARTITION BY client) AS pct_ratio | ||
FROM | ||
`httparchive.all.pages` | ||
JOIN | ||
totals | ||
USING | ||
(client), | ||
UNNEST(get_almanac_attribute_info(JSON_EXTRACT(custom_metrics, '$.almanac'))) AS almanac_attribute_info | ||
WHERE | ||
date = '2024-06-01' | ||
GROUP BY | ||
client, | ||
almanac_attribute_info.name | ||
) | ||
|
||
SELECT | ||
* | ||
FROM | ||
data_attrs | ||
ORDER BY | ||
pct_ratio DESC | ||
LIMIT | ||
1000 |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,19 @@ | ||
WITH langs AS ( | ||
SELECT | ||
client, | ||
TRIM(LOWER(JSON_EXTRACT(custom_metrics, '$.almanac.html_node.lang'))) AS lang | ||
FROM | ||
`httparchive.all.pages` | ||
WHERE | ||
date = '2024-06-01' | ||
) | ||
|
||
SELECT | ||
client, | ||
COUNT(DISTINCT IFNULL(lang, '(not set)')) AS distinct_lang_count | ||
FROM | ||
langs | ||
GROUP BY | ||
client | ||
ORDER BY | ||
distinct_lang_count DESC; |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,22 @@ | ||
-- Temporary function to extract doctype | ||
CREATE TEMPORARY FUNCTION EXTRACT_DOCTYPE(summary STRING) RETURNS STRING AS ( | ||
SAFE_CAST(JSON_EXTRACT(summary, '$.doctype') AS STRING) | ||
); | ||
|
||
SELECT | ||
client, | ||
LOWER(REGEXP_REPLACE(TRIM(EXTRACT_DOCTYPE(summary)), r' +', ' ')) AS doctype, # remove extra spaces and make lower case | ||
COUNT(0) AS pages, | ||
SUM(COUNT(0)) OVER (PARTITION BY client) AS total, | ||
COUNT(0) / SUM(COUNT(0)) OVER (PARTITION BY client) AS pct_pages | ||
FROM | ||
`httparchive.all.pages` | ||
WHERE | ||
date = '2024-06-01' | ||
GROUP BY | ||
client, | ||
doctype | ||
ORDER BY | ||
pct_pages DESC | ||
LIMIT | ||
100; |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,19 @@ | ||
-- Temporary function to extract bytesHtml | ||
CREATE TEMPORARY FUNCTION EXTRACT_DOCTYPE(summary STRING) RETURNS INT64 AS ( | ||
SAFE_CAST(JSON_EXTRACT(summary, '$.bytesHtml') AS INT64) | ||
); | ||
|
||
SELECT | ||
percentile, | ||
client, | ||
APPROX_QUANTILES(EXTRACT_DOCTYPE(summary) / 1014, 1000)[OFFSET(percentile * 10)] AS kb_html | ||
FROM | ||
`httparchive.all.pages`, | ||
UNNEST([10, 25, 50, 75, 90]) AS percentile | ||
WHERE | ||
date = '2024-06-01' | ||
GROUP BY | ||
percentile, | ||
client | ||
ORDER BY | ||
client |
Oops, something went wrong.