diff --git a/sql/2024/markup/attributes.sql b/sql/2024/markup/attributes.sql new file mode 100644 index 00000000000..cfff4906c90 --- /dev/null +++ b/sql/2024/markup/attributes.sql @@ -0,0 +1,62 @@ +#standardSQL +# pages almanac metrics grouped by device and element attribute use (frequency) + +CREATE TEMPORARY FUNCTION get_almanac_attribute_info(almanac_string STRING) +RETURNS ARRAY> LANGUAGE js AS ''' +try { + var almanac = JSON.parse(almanac_string); + + if (Array.isArray(almanac) || typeof almanac != 'object') return []; + + if (almanac.attributes_used_on_elements) { + return Object.entries(almanac.attributes_used_on_elements).map(([name, freq]) => ({name, freq})); + } + +} catch (e) { + +} +return []; +'''; + +WITH totals AS ( + SELECT + client, + COUNT(0) AS total + FROM + `httparchive.all.pages` + WHERE + date = '2024-06-01' + GROUP BY + client +), attributes AS ( + SELECT + client, + almanac_attribute_info.name, + COUNT(DISTINCT page) AS pages, + ANY_VALUE(total) AS total_pages, + COUNT(DISTINCT page) / ANY_VALUE(total) AS pct_pages, + SUM(almanac_attribute_info.freq) AS freq, + SUM(SUM(almanac_attribute_info.freq)) OVER (PARTITION BY client) AS total, + SUM(almanac_attribute_info.freq) / SUM(SUM(almanac_attribute_info.freq)) OVER (PARTITION BY client) AS pct_ratio + FROM + `httparchive.all.pages`, + UNNEST(get_almanac_attribute_info(JSON_EXTRACT(custom_metrics, '$.almanac'))) AS almanac_attribute_info + JOIN + totals + USING + (client) + WHERE + date = '2024-06-01' + GROUP BY + client, + almanac_attribute_info.name +) + +SELECT + * +FROM + attributes +ORDER BY + pct_ratio DESC +LIMIT + 1000 diff --git a/sql/2024/markup/buttons.sql b/sql/2024/markup/buttons.sql new file mode 100644 index 00000000000..dbabcaf56b8 --- /dev/null +++ b/sql/2024/markup/buttons.sql @@ -0,0 +1,49 @@ +CREATE TEMPORARY FUNCTION get_markup_buttons_info(markup_string STRING) +RETURNS ARRAY LANGUAGE js AS ''' +try { + var markup = JSON.parse(markup_string); + var type_total = Object.values(markup.buttons.types).reduce((total, i) => total + i, 0); + var types = []; + if (markup.buttons.total > type_total) { + types = ['NO_TYPE']; + } + return Object.keys(markup.buttons.types).concat(types); +} catch (e) { + return []; +} +'''; + +WITH totals AS ( + SELECT + client, + COUNT(0) AS total + FROM + `httparchive.all.pages` + WHERE + date = '2024-06-01' + GROUP BY + client +) + +SELECT + client AS client, + LOWER(TRIM(button_type)) AS button_type, + COUNT(DISTINCT page) AS page, + ANY_VALUE(total) AS total, + COUNT(DISTINCT page) / ANY_VALUE(total) AS pct_pages +FROM + `httparchive.all.pages` +JOIN + totals +USING + (client), + UNNEST(get_markup_buttons_info(JSON_EXTRACT(custom_metrics, '$.markup'))) AS button_type +WHERE + date = '2024-06-01' +GROUP BY + client, + button_type +ORDER BY + pct_pages DESC +LIMIT + 1000 diff --git a/sql/2024/markup/comments.sql b/sql/2024/markup/comments.sql new file mode 100644 index 00000000000..92bb1b77ff6 --- /dev/null +++ b/sql/2024/markup/comments.sql @@ -0,0 +1,22 @@ +WITH comments AS ( + SELECT + client, + CAST(JSON_VALUE(JSON_EXTRACT(custom_metrics, '$.wpt_bodies'), '$.raw_html.comment_count') AS INT64) AS num_comments, + CAST(JSON_VALUE(JSON_EXTRACT(custom_metrics, '$.wpt_bodies'), '$.raw_html.conditional_comment_count') AS INT64) AS num_conditional_comments + FROM + `httparchive.all.pages` + WHERE + date = '2024-06-01' +) + +SELECT + client, + COUNTIF(num_comments > 0) AS num_comments, + COUNTIF(num_conditional_comments > 0) AS num_conditional_comments, + COUNT(0) AS total, + COUNTIF(num_comments > 0) / COUNT(0) AS pct_comments, + COUNTIF(num_conditional_comments > 0) / COUNT(0) AS pct_conditional_comments +FROM + comments +GROUP BY + client diff --git a/sql/2024/markup/content_encoding.sql b/sql/2024/markup/content_encoding.sql new file mode 100644 index 00000000000..94b95e50d11 --- /dev/null +++ b/sql/2024/markup/content_encoding.sql @@ -0,0 +1,31 @@ +-- Temporary function to extract content-encoding +CREATE TEMPORARY FUNCTION GET_CONTENT_ENCODING(response_headers ARRAY>) +RETURNS STRING AS ( + ( + SELECT + value + FROM + UNNEST(response_headers) AS header + WHERE + LOWER(header.name) = 'content-encoding' + LIMIT 1 + ) +); + +SELECT + date, + client, + GET_CONTENT_ENCODING(response_headers) AS content_encoding, + COUNT(0) AS freq, + SUM(COUNT(0)) OVER (PARTITION BY client) AS total, + COUNT(0) / SUM(COUNT(0)) OVER (PARTITION BY client) AS pct +FROM + `httparchive.all.requests` +WHERE + date = '2024-06-01' AND + is_main_document +GROUP BY + client, + content_encoding +ORDER BY + pct DESC diff --git a/sql/2024/markup/custom_elements_adoption.sql b/sql/2024/markup/custom_elements_adoption.sql new file mode 100644 index 00000000000..eaeaeb556b5 --- /dev/null +++ b/sql/2024/markup/custom_elements_adoption.sql @@ -0,0 +1,23 @@ +WITH custom_elements AS ( + SELECT + client, + page, + COALESCE(ARRAY_LENGTH(JSON_VALUE_ARRAY(JSON_EXTRACT(custom_metrics, '$.wpt_bodies'), '$.web_components.rendered.customElements.names')) > 0, FALSE) AS has_custom_elements + FROM + `httparchive.all.pages` + WHERE + date IN ('2022-06-01', '2023-06-01', '2024-06-01') +) + +SELECT + date, + client, + COUNT(0) AS total, + COUNTIF(has_custom_elements) AS freq, + COUNTIF(has_custom_elements) / COUNT(0) AS pct_custom_elements +FROM + custom_elements +GROUP BY + date, client +ORDER BY + date ASC diff --git a/sql/2024/markup/custom_elements_js_bytes_distribution.sql b/sql/2024/markup/custom_elements_js_bytes_distribution.sql new file mode 100644 index 00000000000..a1fc0ce401e --- /dev/null +++ b/sql/2024/markup/custom_elements_js_bytes_distribution.sql @@ -0,0 +1,41 @@ +WITH js_bytes AS ( + SELECT + client, + page, + SAFE_CAST(JSON_EXTRACT(summary, '$.bytesJS') AS INT64) / 1024 AS kbytes_js + FROM + `httparchive.all.pages` + WHERE + date = '2024-06-01' +), custom_elements AS ( + SELECT + client, + page, + COALESCE(ARRAY_LENGTH(JSON_VALUE_ARRAY(JSON_EXTRACT(custom_metrics, '$.wpt_bodies'), '$.web_components.rendered.customElements.names')) > 0, FALSE) AS has_custom_elements + FROM + `httparchive.all.pages` + WHERE + date = '2024-06-01' +) + +SELECT + percentile, + client, + has_custom_elements, + APPROX_QUANTILES(kbytes_js, 1000)[OFFSET(percentile * 10)] AS kbytes_js, + COUNT(DISTINCT page) AS pages +FROM + custom_elements +JOIN + js_bytes +USING + (client, page), + UNNEST([10, 25, 50, 75, 90]) AS percentile +GROUP BY + percentile, + client, + has_custom_elements +ORDER BY + percentile, + client, + has_custom_elements diff --git a/sql/2024/markup/data_attribute_total.sql b/sql/2024/markup/data_attribute_total.sql new file mode 100644 index 00000000000..ad3a93a161c --- /dev/null +++ b/sql/2024/markup/data_attribute_total.sql @@ -0,0 +1,43 @@ +CREATE TEMPORARY FUNCTION get_almanac_attribute_info(almanac_string STRING) +RETURNS ARRAY> LANGUAGE js AS ''' +try { + var almanac = JSON.parse(almanac_string); + + if (Array.isArray(almanac) || typeof almanac != 'object') return []; + + if (almanac.attributes_used_on_elements) { + return Object.entries(almanac.attributes_used_on_elements).filter(([name, freq]) => name.startsWith('data-')).map(([name, freq]) => ({name, freq})); + } + +} catch (e) {} +return []; +'''; + +WITH totals AS ( + SELECT + client, + COUNT(0) AS total_pages + FROM + `httparchive.all.pages` + WHERE + date = '2024-06-01' + GROUP BY + client +) + +SELECT + client, + COUNT(DISTINCT page) AS pages, + ANY_VALUE(total_pages) AS total_pages, + COUNT(DISTINCT page) / ANY_VALUE(total_pages) AS pct_pages +FROM + `httparchive.all.pages` +JOIN + totals +USING + (client), + UNNEST(get_almanac_attribute_info(JSON_EXTRACT(custom_metrics, '$.almanac'))) AS almanac_attribute_info +WHERE + date = '2024-06-01' +GROUP BY + client diff --git a/sql/2024/markup/data_attributes.sql b/sql/2024/markup/data_attributes.sql new file mode 100644 index 00000000000..f90fddd76ec --- /dev/null +++ b/sql/2024/markup/data_attributes.sql @@ -0,0 +1,57 @@ +CREATE TEMPORARY FUNCTION get_almanac_attribute_info(almanac_string STRING) +RETURNS ARRAY> LANGUAGE js AS ''' +try { + var almanac = JSON.parse(almanac_string); + + if (Array.isArray(almanac) || typeof almanac != 'object') return []; + + if (almanac.attributes_used_on_elements) { + return Object.entries(almanac.attributes_used_on_elements).filter(([name, freq]) => name.startsWith('data-')).map(([name, freq]) => ({name, freq})); + } + +} catch (e) {} +return []; +'''; + +WITH totals AS ( + SELECT + client, + COUNT(0) AS total_pages + FROM + `httparchive.all.pages` + WHERE + date = '2024-06-01' + GROUP BY + client +), data_attrs AS ( + SELECT + client, + almanac_attribute_info.name, + COUNT(DISTINCT page) AS pages, + ANY_VALUE(total_pages) AS total_pages, + COUNT(DISTINCT page) / ANY_VALUE(total_pages) AS pct_pages, + SUM(almanac_attribute_info.freq) AS freq, # total count from all pages + SUM(SUM(almanac_attribute_info.freq)) OVER (PARTITION BY client) AS total, + SUM(almanac_attribute_info.freq) / SUM(SUM(almanac_attribute_info.freq)) OVER (PARTITION BY client) AS pct_ratio + FROM + `httparchive.all.pages` + JOIN + totals + USING + (client), + UNNEST(get_almanac_attribute_info(JSON_EXTRACT(custom_metrics, '$.almanac'))) AS almanac_attribute_info + WHERE + date = '2024-06-01' + GROUP BY + client, + almanac_attribute_info.name +) + +SELECT + * +FROM + data_attrs +ORDER BY + pct_ratio DESC +LIMIT + 1000 diff --git a/sql/2024/markup/distinct_lang.sql b/sql/2024/markup/distinct_lang.sql new file mode 100644 index 00000000000..d539b469625 --- /dev/null +++ b/sql/2024/markup/distinct_lang.sql @@ -0,0 +1,19 @@ +WITH langs AS ( + SELECT + client, + TRIM(LOWER(JSON_EXTRACT(custom_metrics, '$.almanac.html_node.lang'))) AS lang + FROM + `httparchive.all.pages` + WHERE + date = '2024-06-01' +) + +SELECT + client, + COUNT(DISTINCT IFNULL(lang, '(not set)')) AS distinct_lang_count +FROM + langs +GROUP BY + client +ORDER BY + distinct_lang_count DESC; diff --git a/sql/2024/markup/doctype.sql b/sql/2024/markup/doctype.sql new file mode 100644 index 00000000000..aa781d9296a --- /dev/null +++ b/sql/2024/markup/doctype.sql @@ -0,0 +1,22 @@ +-- Temporary function to extract doctype +CREATE TEMPORARY FUNCTION EXTRACT_DOCTYPE(summary STRING) RETURNS STRING AS ( + SAFE_CAST(JSON_EXTRACT(summary, '$.doctype') AS STRING) +); + +SELECT + client, + LOWER(REGEXP_REPLACE(TRIM(EXTRACT_DOCTYPE(summary)), r' +', ' ')) AS doctype, # remove extra spaces and make lower case + COUNT(0) AS pages, + SUM(COUNT(0)) OVER (PARTITION BY client) AS total, + COUNT(0) / SUM(COUNT(0)) OVER (PARTITION BY client) AS pct_pages +FROM + `httparchive.all.pages` +WHERE + date = '2024-06-01' +GROUP BY + client, + doctype +ORDER BY + pct_pages DESC +LIMIT + 100; diff --git a/sql/2024/markup/document_size_distribution.sql b/sql/2024/markup/document_size_distribution.sql new file mode 100644 index 00000000000..1f21433f270 --- /dev/null +++ b/sql/2024/markup/document_size_distribution.sql @@ -0,0 +1,19 @@ +-- Temporary function to extract bytesHtml +CREATE TEMPORARY FUNCTION EXTRACT_DOCTYPE(summary STRING) RETURNS INT64 AS ( + SAFE_CAST(JSON_EXTRACT(summary, '$.bytesHtml') AS INT64) +); + +SELECT + percentile, + client, + APPROX_QUANTILES(EXTRACT_DOCTYPE(summary) / 1014, 1000)[OFFSET(percentile * 10)] AS kb_html +FROM + `httparchive.all.pages`, + UNNEST([10, 25, 50, 75, 90]) AS percentile +WHERE + date = '2024-06-01' +GROUP BY + percentile, + client +ORDER BY + client diff --git a/sql/2024/markup/document_trends.sql b/sql/2024/markup/document_trends.sql new file mode 100644 index 00000000000..130d652e85a --- /dev/null +++ b/sql/2024/markup/document_trends.sql @@ -0,0 +1,20 @@ +-- Temporary function to extract bytesHtml +CREATE TEMPORARY FUNCTION EXTRACT_DOCTYPE(summary STRING) RETURNS INT64 AS ( + SAFE_CAST(JSON_EXTRACT(summary, '$.bytesHtml') AS INT64) +); + +SELECT + date, + client, + APPROX_QUANTILES(EXTRACT_DOCTYPE(summary) / 1024, 1000)[OFFSET(500)] AS median_kbytes_html, + COUNT(0) AS total +FROM + `httparchive.all.pages` +WHERE + date IN ('2022-06-01', '2023-06-01', '2024-06-01') +GROUP BY + date, + client +ORDER BY + date, + client diff --git a/sql/2024/markup/document_trends_is_root_page.sql b/sql/2024/markup/document_trends_is_root_page.sql new file mode 100644 index 00000000000..c3d77a55a17 --- /dev/null +++ b/sql/2024/markup/document_trends_is_root_page.sql @@ -0,0 +1,21 @@ +-- Temporary function to extract bytesHtml +CREATE TEMPORARY FUNCTION EXTRACT_DOCTYPE(summary STRING) RETURNS INT64 AS ( + SAFE_CAST(JSON_EXTRACT(summary, '$.bytesHtml') AS INT64) +); + +SELECT + date, + client, + APPROX_QUANTILES(EXTRACT_DOCTYPE(summary) / 1024, 1000)[OFFSET(500)] AS median_kbytes_html, + COUNT(0) AS total +FROM + `httparchive.all.pages` +WHERE + date IN ('2022-06-01', '2023-06-01', '2024-06-01') AND + is_root_page +GROUP BY + date, + client +ORDER BY + date, + client diff --git a/sql/2024/markup/element_count_distribution.sql b/sql/2024/markup/element_count_distribution.sql new file mode 100644 index 00000000000..f098813e6f0 --- /dev/null +++ b/sql/2024/markup/element_count_distribution.sql @@ -0,0 +1,47 @@ +CREATE TEMPORARY FUNCTION get_element_count_info(element_count_string STRING) +RETURNS STRUCT LANGUAGE js AS ''' +var result = {}; +try { + if (!element_count_string) return result; + + var element_count = JSON.parse(element_count_string); + + if (Array.isArray(element_count) || typeof element_count != 'object') return result; + + result.elements_count = Object.values(element_count).reduce((total, freq) => total + (parseInt(freq, 10) || 0), 0); + + result.types_count = Object.keys(element_count).length; + +} catch (e) {} +return result; +'''; + +SELECT + client, + percentile, + COUNT(DISTINCT page) AS total, + + # total number of elements on a page + APPROX_QUANTILES(element_count_info.elements_count, 1000)[OFFSET(percentile * 10)] AS elements_count, + + # number of types of elements on a page + APPROX_QUANTILES(element_count_info.types_count, 1000)[OFFSET(percentile * 10)] AS types_count + +FROM ( + SELECT + client, + percentile, + page, + get_element_count_info(JSON_EXTRACT(custom_metrics, '$.element_count')) AS element_count_info + FROM + `httparchive.all.pages`, + UNNEST([10, 25, 50, 75, 90]) AS percentile + WHERE + date = '2024-06-01' +) +GROUP BY + percentile, + client +ORDER BY + percentile, + client diff --git a/sql/2024/markup/element_frequency.sql b/sql/2024/markup/element_frequency.sql new file mode 100644 index 00000000000..13235141746 --- /dev/null +++ b/sql/2024/markup/element_frequency.sql @@ -0,0 +1,53 @@ +CREATE TEMPORARY FUNCTION get_element_types_info(element_count_string STRING) +RETURNS ARRAY> LANGUAGE js AS ''' +try { + if (!element_count_string) return []; // 2019 had a few cases + + var element_count = JSON.parse(element_count_string); // should be an object with element type properties with values of how often they are present + + if (Array.isArray(element_count) || typeof element_count != 'object') return []; + + return Object.entries(element_count).map(([name, freq]) => ({name, freq})); + +} catch (e) { + return []; +} +'''; + +WITH totals AS ( + SELECT + client, + COUNT(0) AS total + FROM + `httparchive.all.pages` + WHERE + date = '2024-06-01' + GROUP BY + client +) + +SELECT + client AS client, + element_type_info.name, + COUNT(DISTINCT page) AS pages, + ANY_VALUE(total) AS total_pages, + COUNT(DISTINCT page) / ANY_VALUE(total) AS pct_pages, + SUM(element_type_info.freq) AS freq, # total count from all pages + SUM(SUM(element_type_info.freq)) OVER (PARTITION BY client) AS total_freq, + SUM(element_type_info.freq) / SUM(SUM(element_type_info.freq)) OVER (PARTITION BY client) AS pct +FROM + `httparchive.all.pages`, + UNNEST(get_element_types_info(JSON_EXTRACT(custom_metrics, '$.element_count'))) AS element_type_info +JOIN + totals +USING + (client) +WHERE + date = '2024-06-01' +GROUP BY + client, + element_type_info.name +ORDER BY + pct DESC +LIMIT + 1000 diff --git a/sql/2024/markup/element_popularity.sql b/sql/2024/markup/element_popularity.sql new file mode 100644 index 00000000000..ec4ee5d598c --- /dev/null +++ b/sql/2024/markup/element_popularity.sql @@ -0,0 +1,52 @@ +CREATE TEMPORARY FUNCTION get_element_types(element_count_string STRING) +RETURNS ARRAY LANGUAGE js AS ''' +try { + if (!element_count_string) return []; // 2019 had a few cases + + var element_count = JSON.parse(element_count_string); // should be an object with element type properties with values of how often they are present + + if (Array.isArray(element_count)) return []; + if (typeof element_count != 'object') return []; + + return Object.keys(element_count); +} catch (e) { + return []; +} +'''; + +WITH totals AS ( + SELECT + client, + COUNT(0) AS total + FROM + `httparchive.all.pages` + WHERE + date = '2024-06-01' + GROUP BY + client +) + +SELECT + client AS client, + element_type, + COUNT(DISTINCT page) AS pages, + total, + COUNT(DISTINCT page) / total AS pct +FROM + `httparchive.all.pages` +JOIN + totals +USING + (client), + UNNEST(get_element_types(JSON_EXTRACT(custom_metrics, '$.element_count'))) AS element_type +WHERE + date = '2024-06-01' +GROUP BY + client, + total, + element_type +ORDER BY + pct DESC, + client, + pages DESC +LIMIT 1000 diff --git a/sql/2024/markup/favicons.sql b/sql/2024/markup/favicons.sql new file mode 100644 index 00000000000..cd62f85e890 --- /dev/null +++ b/sql/2024/markup/favicons.sql @@ -0,0 +1,69 @@ +CREATE TEMPORARY FUNCTION getFaviconImage(almanac_string STRING) +RETURNS STRUCT< + image_type_extension STRING +> LANGUAGE js AS ''' +var result = {}; +try { + var almanac = JSON.parse(almanac_string); + + if (Array.isArray(almanac) || typeof almanac != 'object') return result; + + if (almanac["link-nodes"] && almanac["link-nodes"].nodes && almanac["link-nodes"].nodes.find) { + var faviconNode = almanac["link-nodes"].nodes.find(n => n.rel && n.rel.split(' ').find(r => r.trim().toLowerCase() == 'icon')); + + if (faviconNode) { + if (faviconNode.href) { + var temp = faviconNode.href; + + if (temp.includes('?')) { + temp = temp.substring(0, temp.indexOf('?')); + } + + if (temp.includes('.')) { + temp = temp.substring(temp.lastIndexOf('.')+1); + + result.image_type_extension = temp.toLowerCase().trim(); + } + else { + result.image_type_extension = "NO_EXTENSION"; + } + + } else { + result.image_type_extension = "NO_HREF"; + } + } else { + result.image_type_extension = "NO_ICON"; + } + } + else { + result.image_type_extension = "NO_DATA"; + } + +} catch (e) {result.image_type_extension = "NO_DATA";} +return result; +'''; + +SELECT + client, + favicon.image_type_extension AS image_type_extension, + COUNT(0) AS freq, + SUM(COUNT(0)) OVER (PARTITION BY client) AS total, + COUNT(0) / SUM(COUNT(0)) OVER (PARTITION BY client) AS pct +FROM + ( + SELECT + client, + getFaviconImage(JSON_EXTRACT(custom_metrics, '$.almanac')) AS favicon + FROM + `httparchive.all.pages` + WHERE + date = '2024-06-01' + ) +GROUP BY + client, + image_type_extension +ORDER BY + pct DESC, + client, + freq DESC +LIMIT 1000 diff --git a/sql/2024/markup/forms.sql b/sql/2024/markup/forms.sql new file mode 100644 index 00000000000..0ae1abdafeb --- /dev/null +++ b/sql/2024/markup/forms.sql @@ -0,0 +1,24 @@ +WITH forms AS ( + SELECT + client, + page, + CAST(IFNULL(JSON_VALUE(JSON_EXTRACT(custom_metrics, '$.element_count'), '$.form'), '0') AS INT64) AS forms_count + FROM + `httparchive.all.pages` + WHERE + date = '2024-06-01' +) + +SELECT + client, + forms_count, + COUNT(0) AS pages, + SUM(COUNT(0)) OVER (PARTITION BY client) AS total, + COUNT(0) / SUM(COUNT(0)) OVER (PARTITION BY client) AS pct_pages +FROM + forms +GROUP BY + client, + forms_count +ORDER BY + forms_count ASC diff --git a/sql/2024/markup/inputs.sql b/sql/2024/markup/inputs.sql new file mode 100644 index 00000000000..eadd87c520c --- /dev/null +++ b/sql/2024/markup/inputs.sql @@ -0,0 +1,56 @@ +CREATE TEMPORARY FUNCTION get_markup_inputs_info(markup_string STRING) +RETURNS ARRAY> LANGUAGE js AS ''' +var result = []; +try { + var markup = JSON.parse(markup_string); + + if (Array.isArray(markup) || typeof markup != 'object') return result; + + if (markup.inputs && markup.inputs.types) { + var total = markup.inputs.total; + var withType = 0; + result = Object.entries(markup.inputs.types).map(([name, freq]) => { withType+=freq; return {name: name.toLowerCase().trim(), freq};}); + + result.push({name:"NO_TYPE", freq: total - withType}) + + return result; + } + +} catch (e) {} +return result; +'''; + +WITH totals AS ( + SELECT + client, + COUNT(0) AS total + FROM + `httparchive.all.pages` + WHERE + date = '2024-06-01' + GROUP BY + client +) + +SELECT + client, + markup_input_info.name AS input_type, + COUNTIF(markup_input_info.freq > 0) AS freq_page_with_input, + COUNTIF(markup_input_info.freq > 0) / ANY_VALUE(total) AS pct_page_with_input, + SUM(markup_input_info.freq) AS freq_input, + SUM(markup_input_info.freq) / SUM(SUM(markup_input_info.freq)) OVER (PARTITION BY client) AS pct_input +FROM + `httparchive.all.pages` +JOIN + totals +USING (client), + UNNEST(get_markup_inputs_info(JSON_EXTRACT(custom_metrics, '$.markup'))) AS markup_input_info +WHERE + date = '2024-06-01' +GROUP BY + client, + input_type +ORDER BY + freq_page_with_input DESC +LIMIT + 1000 diff --git a/sql/2024/markup/inputs_per_form.sql b/sql/2024/markup/inputs_per_form.sql new file mode 100644 index 00000000000..2ec643a5b90 --- /dev/null +++ b/sql/2024/markup/inputs_per_form.sql @@ -0,0 +1,33 @@ +CREATE TEMP FUNCTION getInputsPerForm(markup STRING) RETURNS ARRAY LANGUAGE js AS r''' +try { + markup = JSON.parse(markup); + return markup.form.elements.map(i => i.total); +} catch { + return []; +} +'''; + +WITH inputs AS ( + SELECT + client, + inputs_per_form + FROM + `httparchive.all.pages`, + UNNEST(getInputsPerForm(JSON_EXTRACT(custom_metrics, '$.markup'))) AS inputs_per_form + WHERE + date = '2024-06-01' +) + +SELECT + percentile, + client, + APPROX_QUANTILES(inputs_per_form, 1000)[OFFSET(percentile * 10)] AS inputs_per_form +FROM + inputs, + UNNEST([10, 25, 50, 75, 90]) AS percentile +GROUP BY + percentile, + client +ORDER BY + percentile, + client diff --git a/sql/2024/markup/lang.sql b/sql/2024/markup/lang.sql new file mode 100644 index 00000000000..39f4c3d62d6 --- /dev/null +++ b/sql/2024/markup/lang.sql @@ -0,0 +1,26 @@ +WITH langs AS ( + SELECT + client, + TRIM(LOWER(JSON_EXTRACT(custom_metrics, '$.almanac.html_node.lang'))) AS lang + FROM + `httparchive.all.pages` + WHERE + date = '2024-06-01' +) + +SELECT + client, + IFNULL(lang, '(not set)') AS html_lang_region, + IFNULL(REGEXP_EXTRACT(lang, r'^([^\-]+)'), '(not set)') AS html_lang, + COUNT(0) AS pages, + SUM(COUNT(0)) OVER (PARTITION BY client) AS total, + COUNT(0) / SUM(COUNT(0)) OVER (PARTITION BY client) AS pct_pages +FROM + langs +GROUP BY + client, + lang +HAVING + pages > 100 +ORDER BY + pct_pages DESC diff --git a/sql/2024/markup/links.sql b/sql/2024/markup/links.sql new file mode 100644 index 00000000000..09a79742050 --- /dev/null +++ b/sql/2024/markup/links.sql @@ -0,0 +1,39 @@ +WITH bodies AS ( + SELECT + client, + JSON_EXTRACT(custom_metrics, '$.wpt_bodies') AS wpt_bodies + FROM + `httparchive.all.pages` + WHERE + date = '2024-06-01' +), links AS ( + SELECT + client, + SAFE_CAST(JSON_EXTRACT(wpt_bodies, '$.anchors.rendered.target_blank.total') AS INT64) AS target_blank_total, + SAFE_CAST(JSON_EXTRACT(wpt_bodies, '$.anchors.rendered.target_blank.noopener_noreferrer') AS INT64) AS target_blank_noopener_noreferrer_total, + SAFE_CAST(JSON_EXTRACT(wpt_bodies, '$.anchors.rendered.target_blank.noopener') AS INT64) AS target_blank_noopener_total, + SAFE_CAST(JSON_EXTRACT(wpt_bodies, '$.anchors.rendered.target_blank.noreferrer') AS INT64) AS target_blank_noreferrer_total, + SAFE_CAST(JSON_EXTRACT(wpt_bodies, '$.anchors.rendered.target_blank.neither') AS INT64) AS target_blank_neither_total + FROM + bodies +) + +SELECT + client, + COUNT(0) AS total, + + # pages with all target _banks including rel="noopener noreferrer" + COUNTIF(target_blank_total IS NULL OR target_blank_total = target_blank_noopener_noreferrer_total) / COUNT(0) AS pct_always_target_blank_noopener_noreferrer, + + # pages with some target _banks not using rel="noopener noreferrer" + COUNTIF(target_blank_total > target_blank_noopener_noreferrer_total) / COUNT(0) AS pct_some_target_blank_without_noopener_noreferrer, + + COUNTIF(target_blank_total > 0) / COUNT(0) AS pct_has_target_blank, + COUNTIF(target_blank_noopener_noreferrer_total > 0) / COUNT(0) AS pct_has_target_blank_noopener_noreferrer, + COUNTIF(target_blank_noopener_total > 0) / COUNT(0) AS pct_has_target_blank_noopener, + COUNTIF(target_blank_noreferrer_total > 0) / COUNT(0) AS pct_has_target_blank_noreferrer, + COUNTIF(target_blank_neither_total > 0) / COUNT(0) AS pct_has_target_blank_neither +FROM + links +GROUP BY + client diff --git a/sql/2024/markup/meta_node_names.sql b/sql/2024/markup/meta_node_names.sql new file mode 100644 index 00000000000..b2aa9428f2b --- /dev/null +++ b/sql/2024/markup/meta_node_names.sql @@ -0,0 +1,56 @@ +CREATE TEMPORARY FUNCTION getMetaNodes(custom_metrics STRING) +RETURNS ARRAY +LANGUAGE js AS ''' +try { + var almanac = JSON.parse(custom_metrics); + return almanac['meta-nodes'].nodes.map(n => n.name || n.property); +} catch (e) { + return []; +} +'''; + +WITH totals AS ( + SELECT + client, + COUNT(0) AS total_pages + FROM + `httparchive.all.pages` + WHERE + date = '2024-06-01' + GROUP BY + client +), + +meta AS ( + SELECT + client, + IF(IFNULL(TRIM(name), '') = '', '(not set)', name) AS name, + COUNT(0) AS freq, + COUNT(0) / SUM(COUNT(0)) OVER () AS pct_nodes, + COUNT(DISTINCT page) AS num_urls, + COUNT(DISTINCT page) / total_pages AS pct_pages + FROM + `httparchive.all.pages`, + UNNEST(getMetaNodes(JSON_EXTRACT(custom_metrics, '$.almanac'))) AS name + JOIN + totals + USING + (client) + WHERE + date = '2024-06-01' + GROUP BY + client, + total_pages, + name +) + +SELECT + * +FROM + meta +WHERE + freq > 1 +ORDER BY + pct_nodes DESC +LIMIT + 200 diff --git a/sql/2024/markup/meta_viewports.sql b/sql/2024/markup/meta_viewports.sql new file mode 100644 index 00000000000..5df801a2c39 --- /dev/null +++ b/sql/2024/markup/meta_viewports.sql @@ -0,0 +1,38 @@ +CREATE TEMPORARY FUNCTION normalise(content STRING) RETURNS STRING LANGUAGE js AS ''' +try { + // split by , + // trim + // lower case + // alphabetize + // re join by comma + + return content.split(",").map(c1 => c1.trim().toLowerCase().replace(/ +/g, "").replace(/\\.0*/,"")).sort().join(","); +} catch (e) { + return ''; +} +'''; + +WITH viewports AS ( + SELECT + client, + normalise(SAFE_CAST(JSON_EXTRACT(summary, '$.meta_viewport') AS STRING)) AS meta_viewport, + COUNT(0) AS freq, + SUM(COUNT(0)) OVER (PARTITION BY client) AS total, + COUNT(0) / SUM(COUNT(0)) OVER (PARTITION BY client) AS pct + FROM + `httparchive.all.pages` + WHERE + date = '2024-06-01' + GROUP BY + client, + meta_viewport +) + +SELECT + * +FROM + viewports +ORDER BY + pct DESC +LIMIT + 100 diff --git a/sql/2024/markup/obsolete_elements.sql b/sql/2024/markup/obsolete_elements.sql new file mode 100644 index 00000000000..a74fa90a355 --- /dev/null +++ b/sql/2024/markup/obsolete_elements.sql @@ -0,0 +1,53 @@ +CREATE TEMPORARY FUNCTION get_element_types(element_count_string STRING) +RETURNS ARRAY LANGUAGE js AS ''' +try { + if (!element_count_string) return []; // 2019 had a few cases + + var element_count = JSON.parse(element_count_string); // should be an object with element type properties with values of how often they are present + + if (Array.isArray(element_count)) return []; + if (typeof element_count != 'object') return []; + + return Object.keys(element_count); +} catch (e) { + return []; +} +'''; + +CREATE TEMPORARY FUNCTION is_obsolete(element STRING) AS ( + element IN ('applet', 'acronym', 'basefont', 'bgsound', 'big', 'blink', 'center', 'dir', 'font', 'frame', 'frameset', 'isindex', 'keygen', 'listing', 'marquee', 'menuitem', 'multicol', 'nextid', 'nobr', 'noembed', 'noframes', 'param', 'plaintext', 'rb', 'rtc', 'spacer', 'strike', 'tt', 'xmp') +); + +WITH totals AS ( + SELECT + client, + COUNT(0) AS total + FROM + `httparchive.all.pages` + WHERE + date = '2024-06-01' + GROUP BY client +) + +SELECT + client, + element_type AS obsolete_element_type, + COUNT(DISTINCT page) AS pages, + total AS total_pages, + COUNT(DISTINCT page) / total AS pct_pages_with_obsolete_elements +FROM + `httparchive.all.pages` +JOIN + totals +USING + (client), + UNNEST(get_element_types(JSON_EXTRACT(custom_metrics, '$.element_count'))) AS element_type +WHERE + date = '2024-06-01' AND + is_obsolete(element_type) +GROUP BY + client, + total, + obsolete_element_type +ORDER BY + pct_pages_with_obsolete_elements DESC diff --git a/src/config/2024.json b/src/config/2024.json index 60a26c8cbec..a7d8fc69aa8 100644 --- a/src/config/2024.json +++ b/src/config/2024.json @@ -29,8 +29,7 @@ "part": "I", "chapter_number": "3", "title": "Markup", - "slug": "markup", - "todo": true + "slug": "markup" }, { "part": "I", diff --git a/src/config/contributors.json b/src/config/contributors.json index 4696d5898cc..a074b60b671 100644 --- a/src/config/contributors.json +++ b/src/config/contributors.json @@ -774,6 +774,9 @@ "2022": [ "authors", "reviewers" + ], + "2024": [ + "reviewers" ] }, "twitter": "briankardell", @@ -1412,12 +1415,14 @@ }, "guaca": { "avatar_url": "7644895", + "bluesky": "guaca.bsky.social", "github": "guaca", "linkedin": "estelafranco", "mastodon": "https://toot.cafe/@guaca", "name": "Estela Franco", "teams": { "2024": [ + "authors", "analysts" ] }, @@ -2019,6 +2024,9 @@ "2022": [ "authors", "reviewers" + ], + "2024": [ + "reviewers" ] }, "twitter": "j9t", @@ -3885,6 +3893,9 @@ ], "2022": [ "reviewers" + ], + "2024": [ + "reviewers" ] }, "twitter": "zcorpan" diff --git a/src/content/en/2024/markup.md b/src/content/en/2024/markup.md index a606fdf3773..564dc00893d 100644 --- a/src/content/en/2024/markup.md +++ b/src/content/en/2024/markup.md @@ -2,17 +2,713 @@ #See https://github.com/HTTPArchive/almanac.httparchive.org/wiki/Authors'-Guide#metadata-to-add-at-the-top-of-your-chapters title: Markup description: Markup chapter of the 2024 Web Almanac covering document data (doctypes, compression, languages, HTML conformance, document size), the use of HTML elements and attributes, data attributes and social media. -authors: [] -reviewers: [] +authors: [guaca] +reviewers: [bkardell, j9t, zcorpan] +analysts: [guaca] editors: [] -analysts: [] translators: [] +guaca_bio: Estela Franco is a web performance and technical SEO specialist at Schneider Electric. But apart from that, she loves being connected to the community. That is why she is an international conference speaker, a Google Developer Expert in Web Technologies, a Storyblok ambassador, co-organizer of the Barcelona Web Performance Meetup, and co-founder of the Mujeres en SEO community. results: https://docs.google.com/spreadsheets/d/1TtOMr_w58HvqNBv4RIWX021Lxm6m5ajYOcRykrPdAJc/ -featured_quote: -featured_stat_1: -featured_stat_label_1: -featured_stat_2: -featured_stat_label_2: -featured_stat_3: -featured_stat_label_3: +featured_quote: Every website, every web application, and every online interaction starts with HTML at its core, making it one of the most essential web standards. +featured_stat_1: 92.8% +featured_stat_label_1: Documents using the HTML doctype +featured_stat_2: 32 MB +featured_stat_label_2: Median HTML document transfer size +featured_stat_3: 29% +featured_stat_label_3: Elements that are `div`s --- + +## Introduction + +The web as we know it is built on the foundation of HTML. Every website, every web application, and every online interaction starts with HTML at its core, making it one of the most essential web standards. It's the language that structures content, defines relationships, and communicates with browsers, ensuring that what we create can be viewed, interacted with, and understood by users worldwide. This chapter is dedicated to understanding how HTML continues to shape the web in 2024, exploring trends in its use, the rise of custom elements, and how developers are leveraging new features to build more accessible, efficient, and future-proof websites. + +This year's edition brings a broader perspective, as our dataset now includes not only homepages but also a wide variety of secondary pages. By analyzing pages beyond just the front doors of websites, we're able to capture a richer, more accurate snapshot of how HTML is used across different types of content and contexts. From blog posts and product pages to login screens and article archives, this expanded scope gives us deeper insights into the real-world application of HTML. + +We encourage readers to dive deeper into the data, explore their own insights, and join the conversation about the future of the web's foundational language. + +## General + +Let's start with some of the more general aspects of a markup document. In this section we're covering the document types, the size of the documents, language and compression. +` +### Doctypes + +
+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
DoctypeRendering ModeDesktopMobile
``standards mode91.7%92.8%
`html public "-//w3c//dtd xhtml 1.0 transitional//en" "http://www.w3.org/tr/xhtml1/dtd/xhtml1-transitional.dtd"`almost standards mode3.4%2.7%
No doctypequirks mode2.1%2.2%
`html public "-//w3c//dtd xhtml 1.0 strict//en" "http://www.w3.org/tr/xhtml1/dtd/xhtml1-strict.dtd"`standards mode0.8%0.7%
`html public "-//w3c//dtd html 4.01 transitional//en" "http://www.w3.org/tr/html4/loose.dtd"`almost standards mode0.6%0.4%
`html public "-//w3c//dtd html 4.01 transitional//en"`quirks mode0.3%0.3%
+
+ {{ figure_link( + caption="Doctype usage.", + sheets_gid="1243074845", + sql_file="doctype.sql", + ) }} +
+
+ +{{ figure_markup( + content="92.8%", + caption="Mobile pages using the standard HTML doctype.", + classes="big-number", + sheets_gid="1243074845", + sql_file="doctype.sql", +) }} + +93% of all mobile pages use the standard HTML doctype. That is, ``. + +This is 3 percentage points higher than [the 2022 data](../2022/markup#doctypes). The surprising part is the next most popular: `XHTML 1.1 Transitional`—but slowly disappearing (2.7%, down from 3.9% in 2022). + +### Document size + +A page's document size is the amount of HTML bytes transferred over the network, including compression. + +{{ figure_markup( + image="document_trends.png", + caption="Median transfer size of HTML document", + description="Bar chart showing the median transfer size of HTML document. In 2022, the median was 31 kB on desktop and 29 on mobile. In 2023, 30 kB on desktop and 29 kB on mobile. And in 2024, 33 kB on desktop and 32 kB on mobile.", + chart_url="https://docs.google.com/spreadsheets/d/e/2PACX-1vQTldh1hYi8-zIRgmK_v6IhpKuUOPTAhBxStogg3rt1L6isaX6v8dgODs7WiJ_udh7ZvHnvrTZLlXkW/pubchart?oid=1823253654&format=interactive", + sheets_gid="1730786160", + sql_file="document_trends.sql" + ) +}} + +After a slight decrease in 2023, the HTML transfer size increased this year compared to 2022 and 2023. + +Although the median looks like something reasonable, let's take a closer look at the other percentiles. + +{{ figure_markup( + image="document_size_distribution.png", + caption="Distribution of the transfer size of HTML document", + description="Bar chart showing the 10, 25, 50, 75, and 90th percentile of transfer size. The values for mobile are 6, 13, 32, 71, 147 kB respectively. The values for desktop are 6, 14, 33, 73, 148 kB respectively.", + chart_url="https://docs.google.com/spreadsheets/d/e/2PACX-1vQTldh1hYi8-zIRgmK_v6IhpKuUOPTAhBxStogg3rt1L6isaX6v8dgODs7WiJ_udh7ZvHnvrTZLlXkW/pubchart?oid=1780108383&format=interactive", + sheets_gid="619373506", + sql_file="document_size_distribution.sql" + ) +}} + +The percentile distribution reveals that at the 10th percentile, HTML files are as small as 6 KB, while at the 90th percentile, they reach up to 147 KB. These extremes highlight a significant variation in how developers structure their pages. + +### Compression + +In the context of analyzing HTML document files, compression continues to play a crucial role in improving load times and overall performance. + +{{ figure_markup( + image="content_encoding.png", + caption="HTML document content-encoding", + description="Stacked bar chart, showing 36% of desktop and 37% of mobile HTML documents are being compressed with Brotli, 53% of desktop and 52% of mobile documents are being compressed with Gzip, and 11% of desktop and mobile HTML documents are not being compressed at all.", + chart_url="https://docs.google.com/spreadsheets/d/e/2PACX-1vQTldh1hYi8-zIRgmK_v6IhpKuUOPTAhBxStogg3rt1L6isaX6v8dgODs7WiJ_udh7ZvHnvrTZLlXkW/pubchart?oid=1114599297&format=interactive", + sheets_gid="1573442294", + sql_file="content_encoding.sql" + ) +}} + +One notable trend is the increasing popularity of the Brotli (`br`) compression format. In 2024, Brotli is used on 37% of mobile pages, a steady increase from 28% in 2023. + +While `gzip` remains the most widely used compression method (52% on mobile), its usage has slightly declined from previous year as `br` gains traction (58% in 2022). + +Despite these improvements, a small percentage of HTML files (10.5% on mobile) are still served without any compression, presenting missed opportunities for optimization. + +### Document language + +{{ figure_markup( + content="5,625", + caption="Unique lang attribute codes on mobile", + classes="big-number", + sheets_gid="134927112", + sql_file="distinct_lang.sql", +) }} + +In our analysis, we've encountered 5,625 unique instances of the `lang` attribute on the `html` element on mobile. + +The HTML `lang` attribute plays an important role in helping screen readers and search engines understand the language of a webpage's content. However, interestingly, Google Search ignores the lang attribute when determining the language of a page because [they've identified that "it is almost always wrong"](https://www.youtube.com/watch?v=isW-Ke-AJJU&t=3354s). This may explain why `en` remains dominant in the dataset, with 44.2% of desktop and 40.5% of mobile pages using it as the primary language attribute, even though the actual language of the content might differ. + +{{ figure_markup( + image="popular_lang.png", + caption="Most popular HTML language codes, not including region", + description="Bar chart showing the language usage, for the top ten languages in our data set. 40% use English, 13% are not set, with Spanish, Japanese, German, French, Portuguese, Russian, Italian and Dutch having various minor percentages of usage, from 6% to 2%.", + chart_url="https://docs.google.com/spreadsheets/d/e/2PACX-1vQTldh1hYi8-zIRgmK_v6IhpKuUOPTAhBxStogg3rt1L6isaX6v8dgODs7WiJ_udh7ZvHnvrTZLlXkW/pubchart?oid=1428231971&format=interactive", + sheets_gid="546119077", + sql_file="lang.sql", + width=600, + height=520 + ) +}} + +Additionally, 13% of pages have no `lang` attribute set at all, showing that many websites fail to provide this indicator. + +If we aggregate the percentages of non-English and non-"not set" `lang` values, we still capture around 46% of the total pages, reflecting the truly global nature of web content. However, as mentioned above, it's important to remember that the high proportion of `en` values doesn't always mean the content is in English, given the frequent misconfiguration of the `lang` attribute. + +{{ figure_markup( + image="popular_regional_lang.png", + caption="Most popular HTML language codes, including region", + description="Bar chart showing the language usage, including region, for the top ten languages in our data set. 22% use English, 15% American English, with Japanese, Spanish, Brazilian Portuguese, British English, German German, Russian and German having various minor percentages of usage, from 5% to 2%.", + chart_url="https://docs.google.com/spreadsheets/d/e/2PACX-1vQTldh1hYi8-zIRgmK_v6IhpKuUOPTAhBxStogg3rt1L6isaX6v8dgODs7WiJ_udh7ZvHnvrTZLlXkW/pubchart?oid=672282298&format=interactive", + sheets_gid="546119077", + sql_file="lang.sql", + width=600, + height=520 + ) +}} + +In terms of non-English languages, `ja` (Japanese) and `es` (Spanish) stand out as some of the most popular choices, used on approximately 5-6% of pages. + +The most common regional variant, `en-us`, appears on 16.7% of desktop and 15% of mobile pages. + + +Despite the issues with incorrect lang attribute values, the attribute still plays a vital role in improving accessibility. For users with screen readers, setting the `lang` attribute correctly remains an essential practice in modern web development. + +### Comments + +HTML comments are snippets of text that developers include within their code to leave notes or explanations without affecting the visual display of the webpage. These comments are enclosed in `` tags and are not rendered by browsers, meaning users will never see them. While useful during the development process, HTML comments are not necessary in production code, as they can slightly increase the file size without any benefit to end users. + +{{ figure_markup( + content="86%", + caption="Mobile pages with at least one comment", + classes="big-number", + sheets_gid="1268900609", + sql_file="comments.sql", +) }} + +According to our analysis, 86% of mobile pages still contain at least one comment. + +In addition to regular comments, there's a specific type known as **conditional comments**. These were once used extensively to target specific versions of Internet Explorer (IE), allowing developers to provide custom styles or scripts that only older IE browsers would process. + +`` + +With modern browsers and the retirement of Internet Explorer, conditional comments have become obsolete. Despite this, **26%** of mobile pages still contain conditional comments, likely due to legacy code that was never cleaned up, or because some sites continue to support older versions of Internet Explorer for compatibility reasons. + +## Elements + +In this section, we'll explore HTML elements—what elements are commonly used, how often they appear, and which ones you're likely to find on a typical page. We'll also look into custom and outdated elements. And just to clarify: is "divitis" still around? Yes, it is. + +### Element diversity + +For both desktop and mobile pages, the data shows that the 10th percentile has 22 distinct elements, while the 90th percentile reaches 44 elements on desktop and 43 on mobile. The median number of distinct elements for mobile pages has remained consistent at 32 this year, [the same as in 2022](../2022/markup#element-diversity), and only slightly higher than the [31 observed in 2021](../2021/markup#element-diversity). + +{{ figure_markup( + image="distinct_elements_per_page.png", + caption="Distribution of the number of distinct types of elements per page", + description="Bar chart showing the 10, 25, 50, 75, and 90th percentile of distinct elements per page. The values for mobile are 22, 27, 32, 38 and 43 respectively. The values for desktop are 22, 27, 33, 38 and 44 respectively.", + chart_url="https://docs.google.com/spreadsheets/d/e/2PACX-1vQTldh1hYi8-zIRgmK_v6IhpKuUOPTAhBxStogg3rt1L6isaX6v8dgODs7WiJ_udh7ZvHnvrTZLlXkW/pubchart?oid=1344861022&format=interactive", + sheets_gid="1098213395", + sql_file="element_count_distribution.sql" + ) +}} + +However, there are some differences when checking the distribution of elements per page. The data shows a slight decrease [compared to 2022](../2022/markup#element-diversity). For mobile, the median number of elements has dropped from 653 in 2022 to 594 in 2024. At the lower end, the 10th percentile for mobile shows a small drop from 192 to 180. The 90th percentile also shows a modest decrease, with mobile pages dropping from 1,832 to 1,716. This overall reduction suggests that pages are becoming slightly leaner in terms of the number of HTML elements used. + +{{ figure_markup( + image="elements_per_page.png", + caption="Distribution of the number of elements per page", + description="Bar chart showing the 10, 25, 50, 75, and 90th percentile of total elements per page. The values for mobile are 180, 342, 594, 1,010 and 1,716 respectively.", + chart_url="https://docs.google.com/spreadsheets/d/e/2PACX-1vQTldh1hYi8-zIRgmK_v6IhpKuUOPTAhBxStogg3rt1L6isaX6v8dgODs7WiJ_udh7ZvHnvrTZLlXkW/pubchart?oid=1742977516&format=interactive", + sheets_gid="1098213395", + sql_file="element_count_distribution.sql" + ) +}} + + +### Top elements + +The following elements are used most frequently: + +
+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
2021202220232024
`div``div``div``div`
`a``a``a``a`
`span``span``span``span`
`li``li``li``li`
`img``img``script``script`
`script``script``img``img`
`p``p``p``p`
`link``link``link``link`
`meta``i``meta``path`
`i``meta``path``meta`
+
+ {{ figure_link( + caption="Most used elements.", + sheets_gid="248650818", + sql_file="element_frequency.sql", + ) }} +
+
+ +The list remains largely consistent with previous years, but some shifts have occurred. + +{{ figure_markup( + content="29%", + caption="Percentage of elements which are div elements", + classes="big-number", + sheets_gid="248650818", + sql_file="element_frequency.sql", +) }} + +`
` remains by far the most dominant element. So "divitis" is still a thing, and it doesn't look like it's going to change in the next few years. + +{{ figure_markup( + image="top_elements.png", + caption="Frequency of top HTML elements", + description="Bar chart showing the frquency of top 15 HTML elements. `div` is the most used (28.7% on mobile), followed by `a` (12.6%), `span` (11.2%), `li` (7.7%) and `script` (3.9%). The rest of the top 15 elements are `img`, `p`, `link`, `path`, `meta`, `i`, `option`, `ul`, `br`, and `td`, with values ranging from 3.3% to 1.3%.", + chart_url="https://docs.google.com/spreadsheets/d/e/2PACX-1vQTldh1hYi8-zIRgmK_v6IhpKuUOPTAhBxStogg3rt1L6isaX6v8dgODs7WiJ_udh7ZvHnvrTZLlXkW/pubchart?oid=1080941706&format=interactive", + sheets_gid="248650818", + sql_file="element_frequency.sql", + width=600, + height=656 + ) +}} + +Following `
`, the `` element remains a key player, consistently in second place. As the backbone of hyperlinking, it plays a critical role in navigation, anchoring user journeys across sites. + +One of the notable shifts in recent years has been the increased usage of `