diff --git a/sql/2020/20_Caching/README.md b/sql/2020/20_Caching/README.md index e69de29bb2d..e5166454f31 100644 --- a/sql/2020/20_Caching/README.md +++ b/sql/2020/20_Caching/README.md @@ -0,0 +1 @@ +# Caching Queries diff --git a/sql/2020/20_Caching/appcache_and_serviceworkers.sql b/sql/2020/20_Caching/appcache_and_serviceworkers.sql new file mode 100644 index 00000000000..14436881ccc --- /dev/null +++ b/sql/2020/20_Caching/appcache_and_serviceworkers.sql @@ -0,0 +1,17 @@ +#standardSQL +# Use of AppCache and ServiceWorkers +SELECT + IF(STARTS_WITH(url, 'https'), 'https', 'http') AS http_type, + JSON_EXTRACT_SCALAR(report, "$.audits.appcache-manifest.score") AS using_appcache, + JSON_EXTRACT_SCALAR(report, "$.audits.service-worker.score") AS using_serviceworkers, + COUNT(0) AS occurrences, + SUM(COUNT(0)) OVER () AS total, + COUNT(0) / SUM(COUNT(0)) OVER () AS pct +FROM + `httparchive.lighthouse.2020_08_01_mobile` +GROUP BY + http_type, + using_appcache, + using_serviceworkers +ORDER BY + pct DESC diff --git a/sql/2020/20_Caching/cache_control_and_max_age_and_expires.sql b/sql/2020/20_Caching/cache_control_and_max_age_and_expires.sql new file mode 100644 index 00000000000..89c79407e2c --- /dev/null +++ b/sql/2020/20_Caching/cache_control_and_max_age_and_expires.sql @@ -0,0 +1,32 @@ +#standardSQL +# Use of Cache-Control, max-age in Cache-Control, and Expires +SELECT + client, + COUNT(0) AS total_requests, + COUNTIF(uses_cache_control) AS total_using_cache_control, + COUNTIF(uses_max_age) AS total_using_max_age, + COUNTIF(uses_expires) AS total_using_expires, + COUNTIF(uses_max_age AND uses_expires) AS total_using_max_age_and_expires, + COUNTIF(uses_cache_control AND uses_expires) AS total_using_both, + COUNTIF(NOT uses_cache_control AND NOT uses_expires) AS total_using_neither, + COUNTIF(uses_cache_control AND NOT uses_expires) AS total_using_only_cache_control, + COUNTIF(NOT uses_cache_control AND uses_expires) AS total_using_only_expires, + COUNTIF(uses_cache_control) / COUNT(0) AS pct_cache_control, + COUNTIF(uses_max_age) / COUNT(0) AS pct_using_max_age, + COUNTIF(uses_expires) / COUNT(0) AS pct_using_expires, + COUNTIF(uses_max_age AND uses_expires) / COUNT(0) AS pct_using_max_age_and_expires, + COUNTIF(uses_cache_control AND uses_expires) / COUNT(0) AS pct_using_both, + COUNTIF(NOT uses_cache_control AND NOT uses_expires) / COUNT(0) AS pct_using_neither, + COUNTIF(uses_cache_control AND NOT uses_expires) / COUNT(0) AS pct_using_only_cache_control, + COUNTIF(NOT uses_cache_control AND uses_expires) / COUNT(0) AS pct_using_only_expires +FROM ( + SELECT + _TABLE_SUFFIX AS client, + TRIM(resp_expires) != "" AS uses_expires, + TRIM(resp_cache_control) != "" AS uses_cache_control, + REGEXP_CONTAINS(resp_cache_control, r'(?i)max-age\s*=\s*[0-9]+') AS uses_max_age + FROM + `httparchive.summary_requests.2020_08_01_*` +) +GROUP BY + client diff --git a/sql/2020/20_Caching/cache_control_directives.sql b/sql/2020/20_Caching/cache_control_directives.sql new file mode 100644 index 00000000000..967f29d6161 --- /dev/null +++ b/sql/2020/20_Caching/cache_control_directives.sql @@ -0,0 +1,72 @@ +#standardSQL +# Use of Cache-Control directives +SELECT + client, + COUNT(0) AS total_requests, + COUNTIF(uses_cache_control) AS total_using_cache_control, + COUNTIF(uses_max_age) AS total_using_max_age, + COUNTIF(uses_no_cache) AS total_using_no_cache, + COUNTIF(uses_public) AS total_using_public, + COUNTIF(uses_must_revalidate) AS total_using_must_revalidate, + COUNTIF(uses_no_store) AS total_using_no_store, + COUNTIF(uses_private) AS total_using_private, + COUNTIF(uses_proxy_revalidate) AS total_using_proxy_revalidate, + COUNTIF(uses_s_maxage) AS total_using_s_maxage, + COUNTIF(uses_no_transform) AS total_using_no_transform, + COUNTIF(uses_immutable) AS total_using_immutable, + COUNTIF(uses_stale_while_revalidate) AS total_using_stale_while_revalidate, + COUNTIF(uses_stale_if_error) AS total_using_stale_if_error, + COUNTIF(uses_no_store AND uses_no_cache AND uses_max_age_zero) AS total_using_no_store_and_no_cache_and_max_age_zero, + COUNTIF(uses_no_store AND uses_no_cache AND NOT uses_max_age_zero) AS total_using_no_store_and_no_cache_only, + COUNTIF(uses_no_store AND NOT uses_no_cache AND NOT uses_max_age_zero) AS total_using_no_store_only, + COUNTIF(uses_max_age_zero AND NOT uses_no_store) AS total_using_max_age_zero_without_no_store, + COUNTIF(uses_pre_check_zero AND uses_post_check_zero) AS total_using_pre_check_zero_and_post_check_zero, + COUNTIF(uses_pre_check_zero) AS total_using_pre_check_zero, + COUNTIF(uses_post_check_zero) AS total_using_post_check_zero, + COUNTIF(uses_cache_control AND NOT uses_max_age AND NOT uses_no_cache AND NOT uses_public AND NOT uses_must_revalidate AND NOT uses_no_store AND NOT uses_private AND NOT uses_proxy_revalidate AND NOT uses_s_maxage AND NOT uses_no_transform AND NOT uses_immutable AND NOT uses_stale_while_revalidate AND NOT uses_stale_if_error AND NOT uses_pre_check_zero AND NOT uses_post_check_zero) AS total_erroneous_directives, + COUNTIF(uses_cache_control) / COUNT(0) AS pct_using_cache_control, + COUNTIF(uses_max_age) / COUNT(0) AS pct_using_max_age, + COUNTIF(uses_no_cache) / COUNT(0) AS pct_using_no_cache, + COUNTIF(uses_public) / COUNT(0) AS pct_using_public, + COUNTIF(uses_must_revalidate) / COUNT(0) AS pct_using_must_revalidate, + COUNTIF(uses_no_store) / COUNT(0) AS pct_using_no_store, + COUNTIF(uses_private) / COUNT(0) AS pct_using_private, + COUNTIF(uses_proxy_revalidate) / COUNT(0) AS pct_using_proxy_revalidate, + COUNTIF(uses_s_maxage) / COUNT(0) AS pct_using_s_maxage, + COUNTIF(uses_no_transform) / COUNT(0) AS pct_using_no_transform, + COUNTIF(uses_immutable) / COUNT(0) AS pct_using_immutable, + COUNTIF(uses_stale_while_revalidate) / COUNT(0) AS pct_using_stale_while_revalidate, + COUNTIF(uses_stale_if_error) / COUNT(0) AS pct_using_stale_if_error, + COUNTIF(uses_no_store AND uses_no_cache AND uses_max_age_zero) / COUNT(0) AS pct_using_no_store_and_no_cache_and_max_age_zero, + COUNTIF(uses_no_store AND uses_no_cache AND NOT uses_max_age_zero) / COUNT(0) AS pct_using_no_store_and_no_cache_only, + COUNTIF(uses_no_store AND NOT uses_no_cache AND NOT uses_max_age_zero) / COUNT(0) AS pct_using_no_store_only, + COUNTIF(uses_max_age_zero AND NOT uses_no_store) / COUNT(0) AS pct_using_max_age_zero_without_no_store, + COUNTIF(uses_pre_check_zero AND uses_post_check_zero) / COUNT(0) AS pct_using_pre_check_zero_and_post_check_zero, + COUNTIF(uses_pre_check_zero) / COUNT(0) AS pct_using_pre_check_zero, + COUNTIF(uses_post_check_zero) / COUNT(0) AS pct_using_post_check_zero, + COUNTIF(uses_cache_control AND NOT uses_max_age AND NOT uses_no_cache AND NOT uses_public AND NOT uses_must_revalidate AND NOT uses_no_store AND NOT uses_private AND NOT uses_proxy_revalidate AND NOT uses_s_maxage AND NOT uses_no_transform AND NOT uses_immutable AND NOT uses_stale_while_revalidate AND NOT uses_stale_if_error AND NOT uses_pre_check_zero AND NOT uses_post_check_zero) / COUNT(0) AS pct_erroneous_directives +FROM ( + SELECT + _TABLE_SUFFIX AS client, + TRIM(resp_cache_control) != "" AS uses_cache_control, + REGEXP_CONTAINS(resp_cache_control, r'(?i)max-age\s*=\s*[0-9]+') AS uses_max_age, + REGEXP_CONTAINS(resp_cache_control, r'(?i)max-age\s*=\s*0') AS uses_max_age_zero, + REGEXP_CONTAINS(resp_cache_control, r'(?i)public') AS uses_public, + REGEXP_CONTAINS(resp_cache_control, r'(?i)no-cache') AS uses_no_cache, + REGEXP_CONTAINS(resp_cache_control, r'(?i)must-revalidate') AS uses_must_revalidate, + REGEXP_CONTAINS(resp_cache_control, r'(?i)no-store') AS uses_no_store, + REGEXP_CONTAINS(resp_cache_control, r'(?i)private') AS uses_private, + REGEXP_CONTAINS(resp_cache_control, r'(?i)proxy-revalidate') AS uses_proxy_revalidate, + REGEXP_CONTAINS(resp_cache_control, r'(?i)s-maxage\s*=\s*[0-9]+') AS uses_s_maxage, + REGEXP_CONTAINS(resp_cache_control, r'(?i)no-transform') AS uses_no_transform, + REGEXP_CONTAINS(resp_cache_control, r'(?i)immutable') AS uses_immutable, + REGEXP_CONTAINS(resp_cache_control, r'(?i)stale-while-revalidate\s*=\s*[0-9]+') AS uses_stale_while_revalidate, + REGEXP_CONTAINS(resp_cache_control, r'(?i)stale-if-error\s*=\s*[0-9]+') AS uses_stale_if_error, + REGEXP_CONTAINS(resp_cache_control, r'(?i)pre-check\s*=\s*0') AS uses_pre_check_zero, + REGEXP_CONTAINS(resp_cache_control, r'(?i)post-check\s*=\s*0') AS uses_post_check_zero + FROM + `httparchive.summary_requests.2020_08_01_*` +) +GROUP BY + client + diff --git a/sql/2020/20_Caching/cache_ttl_and_content_age_diff.sql b/sql/2020/20_Caching/cache_ttl_and_content_age_diff.sql new file mode 100644 index 00000000000..9816326cf7d --- /dev/null +++ b/sql/2020/20_Caching/cache_ttl_and_content_age_diff.sql @@ -0,0 +1,35 @@ +#standardSQL +# Difference between Cache TTL and the contents age +CREATE TEMPORARY FUNCTION toTimestamp(date_string STRING) +RETURNS INT64 LANGUAGE js AS ''' + try { + var timestamp = Math.round(new Date(date_string).getTime() / 1000); + return isNaN(timestamp) ? -1 : timestamp; + } catch (e) { + return -1; + } +'''; + +SELECT + client, + percentile, + APPROX_QUANTILES(diff_in_days, 1000 IGNORE NULLS)[OFFSET(percentile * 10)] AS diff_in_days +FROM + ( + SELECT + _TABLE_SUFFIX AS client, + ROUND((expAge - (startedDateTime - toTimestamp(resp_last_modified))) / 86400, 2) AS diff_in_days + FROM + `httparchive.summary_requests.2020_08_01_*` + WHERE + resp_last_modified <> "" AND + expAge > 0 + ), + UNNEST([10, 25, 50, 75, 90]) AS percentile +GROUP BY + client, + percentile +ORDER BY + client, + percentile + diff --git a/sql/2020/20_Caching/content_age_older_than_ttl.sql b/sql/2020/20_Caching/content_age_older_than_ttl.sql new file mode 100644 index 00000000000..237bf95c117 --- /dev/null +++ b/sql/2020/20_Caching/content_age_older_than_ttl.sql @@ -0,0 +1,30 @@ +#standardSQL +# Requests with a content age older than its TTL +CREATE TEMPORARY FUNCTION toTimestamp(date_string STRING) +RETURNS INT64 LANGUAGE js AS ''' + try { + var timestamp = Math.round(new Date(date_string).getTime() / 1000); + return isNaN(timestamp) ? -1 : timestamp; + } catch (e) { + return -1; + } +'''; + +SELECT + client, + COUNT(0) AS total_req, + COUNTIF(diff < 0) AS req_too_short_cache, + COUNTIF(diff < 0) / COUNT(0) AS perc_req_too_short_cache +FROM + ( + SELECT + _TABLE_SUFFIX AS client, + expAge - (startedDateTime - toTimestamp(resp_last_modified)) AS diff + FROM + `httparchive.summary_requests.2020_08_01_*` + WHERE + resp_last_modified <> "" AND + expAge > 0 + ) +GROUP BY + client diff --git a/sql/2020/20_Caching/content_age_older_than_ttl_by_party.sql b/sql/2020/20_Caching/content_age_older_than_ttl_by_party.sql new file mode 100644 index 00000000000..ece3fa287e5 --- /dev/null +++ b/sql/2020/20_Caching/content_age_older_than_ttl_by_party.sql @@ -0,0 +1,54 @@ +#standardSQL +# Difference between Cache TTL and the content age for third party request +CREATE TEMPORARY FUNCTION toTimestamp(date_string STRING) +RETURNS INT64 LANGUAGE js AS ''' + try { + var timestamp = Math.round(new Date(date_string).getTime() / 1000); + return isNaN(timestamp) ? -1 : timestamp; + } catch (e) { + return -1; + } +'''; + +SELECT + client, + party, + COUNT(0) AS total_req, + COUNTIF(diff < 0) AS req_too_short_cache, + COUNTIF(diff < 0) / COUNT(0) AS perc_req_too_short_cache +FROM + ( + SELECT + "desktop" AS client, + IF(STRPOS(NET.HOST(requests.url), REGEXP_EXTRACT(NET.HOST(pages.url), r'([\w-]+)'))>0, 1, 3) AS party, + requests.expAge - (requests.startedDateTime - toTimestamp(requests.resp_last_modified)) AS diff + FROM + `httparchive.summary_requests.2020_08_01_desktop` requests + JOIN + `httparchive.summary_pages.2020_08_01_desktop` pages + ON + pages.pageid = requests.pageid + WHERE + TRIM(requests.resp_last_modified) <> "" AND + expAge > 0 + UNION ALL + SELECT + "mobile" AS client, + IF(STRPOS(NET.HOST(requests.url), REGEXP_EXTRACT(NET.HOST(pages.url), r'([\w-]+)'))>0, 1, 3) AS party, + requests.expAge - (requests.startedDateTime - toTimestamp(requests.resp_last_modified)) AS diff + FROM + `httparchive.summary_requests.2020_08_01_mobile` requests + JOIN + `httparchive.summary_pages.2020_08_01_mobile` pages + ON + pages.pageid = requests.pageid + WHERE + TRIM(requests.resp_last_modified) <> "" AND + expAge > 0 + ) +GROUP BY + client, + party +ORDER BY + client, + party diff --git a/sql/2020/20_Caching/invalid_cache_control_directives.sql b/sql/2020/20_Caching/invalid_cache_control_directives.sql new file mode 100644 index 00000000000..f4e4b9cc809 --- /dev/null +++ b/sql/2020/20_Caching/invalid_cache_control_directives.sql @@ -0,0 +1,68 @@ +#standardSQL +# List of invalid Cache-Control directive names. +SELECT + client, + total_requests, + total_using_cache_control, + directive_name, + directive_occurrences, + pct_of_cache_control, + pct_of_total_requests +FROM +( + ( + SELECT + "desktop" AS client, + total_requests, + total_using_cache_control, + directive_name, + COUNT(0) AS directive_occurrences, + COUNT(0) / total_using_cache_control AS pct_of_cache_control, + COUNT(0) / total_requests AS pct_of_total_requests + FROM + `httparchive.summary_requests.2020_08_01_desktop`, + UNNEST(REGEXP_EXTRACT_ALL(LOWER(resp_cache_control), r'([a-z][^,\s="\']*)')) AS directive_name + CROSS JOIN ( + SELECT + COUNT(0) AS total_requests, + COUNTIF(TRIM(resp_cache_control) != "") AS total_using_cache_control + FROM + `httparchive.summary_requests.2020_08_01_desktop` + ) + GROUP BY + client, + total_requests, + total_using_cache_control, + directive_name + ) + UNION ALL + ( + SELECT + "mobile" AS client, + total_requests, + total_using_cache_control, + directive_name, + COUNT(0) AS directive_occurrences, + COUNT(0) / total_using_cache_control AS pct_of_cache_control, + COUNT(0) / total_requests AS pct_of_total_requests + FROM + `httparchive.summary_requests.2020_08_01_mobile`, + UNNEST(REGEXP_EXTRACT_ALL(LOWER(resp_cache_control), r'([a-z][^,\s="\']*)')) AS directive_name + CROSS JOIN ( + SELECT + COUNT(0) AS total_requests, + COUNTIF(TRIM(resp_cache_control) != "") AS total_using_cache_control + FROM + `httparchive.summary_requests.2020_08_01_mobile` + ) + GROUP BY + client, + total_requests, + total_using_cache_control, + directive_name + ) +) +WHERE + directive_name NOT IN ('max-age', 'public', 'no-cache', 'must-revalidate', 'no-store', 'private', 'proxy-revalidate', 's-maxage', 'no-transform', 'immutable', 'stale-while-revalidate', 'stale-if-error', 'pre-check', 'post-check') +ORDER BY + client, directive_occurrences DESC diff --git a/sql/2020/20_Caching/invalid_last_modified_and_expires_and_date.sql b/sql/2020/20_Caching/invalid_last_modified_and_expires_and_date.sql new file mode 100644 index 00000000000..39fb0132e81 --- /dev/null +++ b/sql/2020/20_Caching/invalid_last_modified_and_expires_and_date.sql @@ -0,0 +1,31 @@ +#standardSQL +# Valid date in Last-Modified, Expires, and Date headers +SELECT + client, + COUNT(0) AS total_requests, + COUNTIF(uses_date) AS total_using_date, + COUNTIF(uses_last_modified) AS total_using_last_modified, + COUNTIF(uses_expires) AS total_using_expires, + COUNTIF(uses_date AND NOT has_valid_date) AS total_using_invalid_date, + COUNTIF(uses_last_modified AND NOT has_valid_last_modified) AS total_using_invalid_last_modified, + COUNTIF(uses_expires AND NOT has_valid_expires) AS total_using_invalid_expires, + COUNTIF(uses_date) / COUNT(0) AS pct_using_date, + COUNTIF(uses_last_modified) / COUNT(0) AS pct_using_last_modified, + COUNTIF(uses_expires) / COUNT(0) AS pct_using_expires, + COUNTIF(uses_date AND NOT has_valid_date) / COUNT(uses_date) AS pct_using_invalid_date, + COUNTIF(uses_last_modified AND NOT has_valid_last_modified) / COUNT(uses_last_modified) AS pct_using_invalid_last_modified, + COUNTIF(uses_expires AND NOT has_valid_expires) / COUNT(uses_expires) AS pct_using_invalid_expires +FROM ( + SELECT + _TABLE_SUFFIX AS client, + TRIM(resp_date) != "" AS uses_date, + TRIM(resp_last_modified) != "" AS uses_last_modified, + TRIM(resp_expires) != "" AS uses_expires, + REGEXP_CONTAINS(TRIM(resp_date), r'^(Mon|Tue|Wed|Thu|Fri|Sat|Sun), \d{1,2} (Jan|Feb|Mar|Apr|May|Jun|Jul|Aug|Sep|Oct|Nov|Dec) \d{4} \d{2}:\d{2}:\d{2} GMT$') AS has_valid_date, + REGEXP_CONTAINS(TRIM(resp_last_modified), r'^(Mon|Tue|Wed|Thu|Fri|Sat|Sun), \d{1,2} (Jan|Feb|Mar|Apr|May|Jun|Jul|Aug|Sep|Oct|Nov|Dec) \d{4} \d{2}:\d{2}:\d{2} GMT$') AS has_valid_last_modified, + REGEXP_CONTAINS(TRIM(resp_expires), r'^(Mon|Tue|Wed|Thu|Fri|Sat|Sun), \d{1,2} (Jan|Feb|Mar|Apr|May|Jun|Jul|Aug|Sep|Oct|Nov|Dec) \d{4} \d{2}:\d{2}:\d{2} GMT$') AS has_valid_expires + FROM + `httparchive.summary_requests.2020_08_01_*` +) +GROUP BY + client diff --git a/sql/2020/20_Caching/last_modified_and_etag.sql b/sql/2020/20_Caching/last_modified_and_etag.sql new file mode 100644 index 00000000000..9228f0cfbfb --- /dev/null +++ b/sql/2020/20_Caching/last_modified_and_etag.sql @@ -0,0 +1,34 @@ +#standardSQL +# Presence of Last-Modified and ETag header, statistics on weak, strong, and invalid ETag. +SELECT + client, + COUNT(0) AS total_requests, + COUNTIF(uses_no_etag) AS total_using_no_etag, + COUNTIF(uses_etag) AS total_using_etag, + COUNTIF(uses_weak_etag) AS total_using_weak_etag, + COUNTIF(uses_strong_etag) AS total_using_strong_etag, + COUNTIF(NOT uses_weak_etag AND NOT uses_strong_etag AND uses_etag) AS total_using_invalid_etag, + COUNTIF(uses_last_modified) AS total_using_last_modified, + COUNTIF(uses_etag AND uses_last_modified) AS total_using_both, + COUNTIF(NOT uses_etag AND NOT uses_last_modified) AS total_using_neither, + COUNTIF(uses_no_etag) / COUNT(0) AS pct_using_no_etag, + COUNTIF(uses_etag) / COUNT(0) AS pct_using_etag, + COUNTIF(uses_weak_etag) / COUNT(0) AS pct_using_weak_etag, + COUNTIF(uses_strong_etag) / COUNT(0) AS pct_using_strong_etag, + COUNTIF(NOT uses_weak_etag AND NOT uses_strong_etag AND uses_etag) / COUNT(0) AS pct_using_invalid_etag, + COUNTIF(uses_last_modified) / COUNT(0) AS pct_using_last_modified, + COUNTIF(uses_etag AND uses_last_modified) / COUNT(0) AS pct_using_both, + COUNTIF(NOT uses_etag AND NOT uses_last_modified) / COUNT(0) AS pct_using_neither +FROM ( + SELECT + _TABLE_SUFFIX AS client, + TRIM(resp_etag) = "" AS uses_no_etag, + TRIM(resp_etag) != "" AS uses_etag, + TRIM(resp_last_modified) != "" AS uses_last_modified, + REGEXP_CONTAINS(TRIM(resp_etag), '^W/\".*\"') AS uses_weak_etag, + REGEXP_CONTAINS(TRIM(resp_etag), '^\".*\"') AS uses_strong_etag + FROM + `httparchive.summary_requests.2020_08_01_*` +) +GROUP BY + client diff --git a/sql/2020/20_Caching/non_cacheable_by_resource_type.sql b/sql/2020/20_Caching/non_cacheable_by_resource_type.sql new file mode 100644 index 00000000000..a2843b9c45b --- /dev/null +++ b/sql/2020/20_Caching/non_cacheable_by_resource_type.sql @@ -0,0 +1,34 @@ +#standardSQL +# Non-cacheable content (no-store present) by resource type +SELECT + client, + resource_type, + COUNT(0) AS total_requests, + COUNTIF(NOT uses_no_store) AS total_cacheable, + COUNTIF(uses_no_store) AS total_non_cacheable, + COUNTIF(NOT uses_cache_control AND NOT uses_expires) AS total_using_neither, + COUNTIF(NOT uses_no_store AND uses_max_age AND exp_age = 0) AS total_exp_age_zero, + COUNTIF(NOT uses_no_store AND uses_max_age AND exp_age > 0) AS total_exp_age_gt_zero, + COUNTIF(NOT uses_no_store) / COUNT(0) AS pct_cacheable, + COUNTIF(uses_no_store) / COUNT(0) AS pct_non_cacheable, + COUNTIF(NOT uses_cache_control AND NOT uses_expires) / COUNTIF(NOT uses_no_store) AS pct_using_neither, + COUNTIF(NOT uses_no_store AND uses_max_age AND exp_age = 0) / COUNTIF(NOT uses_no_store) AS pct_using_exp_age_zero, + COUNTIF(NOT uses_no_store AND uses_max_age AND exp_age > 0) / COUNTIF(NOT uses_no_store) AS pct_using_exp_age_gt_zero +FROM ( + SELECT + _TABLE_SUFFIX AS client, + type AS resource_type, + TRIM(resp_cache_control) != "" AS uses_cache_control, + TRIM(resp_expires) != "" AS uses_expires, + REGEXP_CONTAINS(resp_cache_control, r'(?i)no-store') AS uses_no_store, + REGEXP_CONTAINS(resp_cache_control, r'(?i)max-age\s*=\s*[0-9]+') AS uses_max_age, + expAge AS exp_age + FROM + `httparchive.summary_requests.2020_08_01_*` +) +GROUP BY + client, + resource_type +ORDER BY + client, + resource_type diff --git a/sql/2020/20_Caching/set_cookie.sql b/sql/2020/20_Caching/set_cookie.sql new file mode 100644 index 00000000000..88c7cdc023f --- /dev/null +++ b/sql/2020/20_Caching/set_cookie.sql @@ -0,0 +1,25 @@ +#standardSQL +# Responses with Set-cookie header, absence of no-store means cacheable (max-age, Expires, or heuristic) +SELECT + client, + COUNT(0) AS total_requests, + COUNTIF(NOT uses_no_store) AS total_cacheable, + COUNTIF(NOT uses_no_store AND uses_cookies) AS total_cacheable_set_cookie, + COUNTIF(NOT uses_no_store AND NOT uses_cookies) AS total_cacheable_without_set_cookie, + COUNTIF(NOT uses_no_store AND uses_cookies AND uses_private) AS total_pvt_cacheable_set_cookie, + COUNTIF(NOT uses_no_store AND uses_cookies AND NOT uses_private) AS total_pvt_public_cacheable_set_cookie, + COUNTIF(NOT uses_no_store AND uses_cookies) / COUNTIF(NOT uses_no_store) AS pct_cacheable_set_cookie, + COUNTIF(NOT uses_no_store AND NOT uses_cookies) / COUNTIF(NOT uses_no_store) AS pct_cacheable_without_set_cookie, + COUNTIF(NOT uses_no_store AND uses_cookies AND uses_private) / COUNTIF(NOT uses_no_store AND uses_cookies) AS pct_pvt_cacheable_set_cookie, + COUNTIF(NOT uses_no_store AND uses_cookies AND NOT uses_private) / COUNTIF(NOT uses_no_store AND uses_cookies) AS pct_pvt_public_cacheable_set_cookie +FROM ( + SELECT + _TABLE_SUFFIX AS client, + REGEXP_CONTAINS(resp_cache_control, r'(?i)no-store') AS uses_no_store, + REGEXP_CONTAINS(resp_cache_control, r'(?i)private') AS uses_private, + (reqCookieLen > 0) AS uses_cookies + FROM + `httparchive.summary_requests.2020_08_01_*` +) +GROUP BY + client diff --git a/sql/2020/20_Caching/ttl.sql b/sql/2020/20_Caching/ttl.sql new file mode 100644 index 00000000000..9c8ac6c784c --- /dev/null +++ b/sql/2020/20_Caching/ttl.sql @@ -0,0 +1,28 @@ +#standardSQL +# TTL statistics for cacheable content (no-store absent) +SELECT + client, + COUNT(0) AS total_requests, + COUNTIF(NOT uses_cache_control AND NOT uses_expires) AS total_using_neither, + COUNTIF(NOT uses_no_store AND uses_max_age AND exp_age = 0) AS total_exp_age_zero, + COUNTIF(NOT uses_no_store AND uses_max_age AND exp_age > 0) AS total_exp_age_gt_zero, + COUNTIF(uses_no_store) AS total_not_cacheable, + COUNTIF(NOT uses_no_store) AS total_cacheable, + COUNTIF(NOT uses_cache_control AND NOT uses_expires) / COUNTIF(NOT uses_no_store) AS pct_using_neither, + COUNTIF(NOT uses_no_store AND uses_max_age AND exp_age = 0) / COUNTIF(NOT uses_no_store) AS pct_using_exp_age_zero, + COUNTIF(NOT uses_no_store AND uses_max_age AND exp_age > 0) / COUNTIF(NOT uses_no_store) AS pct_using_exp_age_gt_zero, + COUNTIF(uses_no_store) / COUNT(0) AS pct_not_cacheable, + COUNTIF(NOT uses_no_store) / COUNT(0) AS pct_cacheable +FROM ( + SELECT + _TABLE_SUFFIX AS client, + TRIM(resp_cache_control) != "" AS uses_cache_control, + TRIM(resp_expires) != "" AS uses_expires, + REGEXP_CONTAINS(resp_cache_control, r'(?i)no-store') AS uses_no_store, + REGEXP_CONTAINS(resp_cache_control, r'(?i)max-age\s*=\s*[0-9]+') AS uses_max_age, + expAge AS exp_age + FROM + `httparchive.summary_requests.2020_08_01_*` +) +GROUP BY + client diff --git a/sql/2020/20_Caching/ttl_by_resource.sql b/sql/2020/20_Caching/ttl_by_resource.sql new file mode 100644 index 00000000000..e88327e65a2 --- /dev/null +++ b/sql/2020/20_Caching/ttl_by_resource.sql @@ -0,0 +1,21 @@ +#standardSQL +# TTL by resource type for cacheable (no-store absent) content +SELECT + _TABLE_SUFFIX AS client, + type AS response_type, + percentile, + APPROX_QUANTILES(expAge, 1000)[OFFSET(percentile * 10)] AS ttl +FROM + `httparchive.summary_requests.2020_08_01_*`, + UNNEST([10, 25, 50, 75, 90]) AS percentile +WHERE + NOT REGEXP_CONTAINS(resp_cache_control, r'(?i)no-store') AND + expAge > 0 +GROUP BY + client, + response_type, + percentile +ORDER BY + client, + response_type, + percentile diff --git a/sql/2020/20_Caching/valid_if_modified_since_returns_304.sql b/sql/2020/20_Caching/valid_if_modified_since_returns_304.sql new file mode 100644 index 00000000000..60e6cbd83b9 --- /dev/null +++ b/sql/2020/20_Caching/valid_if_modified_since_returns_304.sql @@ -0,0 +1,26 @@ +#standardSQL +# Whether making a If-Modified-Since request returns a 304 if the content have not changed (as seen from Last-Modified) +SELECT + client, + COUNT(0) AS total_requests, + COUNTIF(status = 304) AS total_304, + COUNTIF(NOT uses_etag AND uses_last_modified AND uses_if_modified AND no_change) AS total_expected_304, + COUNTIF(NOT uses_etag AND uses_last_modified AND uses_if_modified AND no_change AND status = 304) AS total_actual_304, + COUNTIF(status = 304) / COUNT(0) AS pct_304, + COUNTIF(NOT uses_etag AND uses_last_modified AND uses_if_modified AND no_change) / COUNTIF(status = 304) AS pct_expected_304, + COUNTIF(NOT uses_etag AND uses_last_modified AND uses_if_modified AND no_change AND status = 304) / COUNTIF(NOT uses_etag AND uses_last_modified AND uses_if_modified AND no_change) AS pct_actual_304 +FROM ( + SELECT + _TABLE_SUFFIX AS client, + status, + TRIM(resp_last_modified) = TRIM(req_if_modified_since) AS no_change, + TRIM(resp_last_modified) != "" AS uses_last_modified, + TRIM(req_if_modified_since) != "" AS uses_if_modified, + TRIM(resp_etag) != "" AS uses_etag + FROM + `httparchive.summary_requests.2020_08_01_*` +) +GROUP BY + client +ORDER BY + client \ No newline at end of file diff --git a/sql/2020/20_Caching/valid_if_non_match_returns_304.sql b/sql/2020/20_Caching/valid_if_non_match_returns_304.sql new file mode 100644 index 00000000000..390ebbc4e32 --- /dev/null +++ b/sql/2020/20_Caching/valid_if_non_match_returns_304.sql @@ -0,0 +1,24 @@ +# Whether making a If-None-Match request returns a 304 if the content have not changed (as seen from ETag) +SELECT + client, + COUNT(0) AS total_requests, + COUNTIF(status = 304) AS total_304, + COUNTIF(uses_etag AND uses_if_non_match AND no_change) AS total_expected_304, + COUNTIF(uses_etag AND uses_if_non_match AND no_change AND status = 304) AS total_actual_304, + COUNTIF(status = 304) / COUNT(0) AS pct_304, + COUNTIF(uses_etag AND uses_if_non_match AND no_change) / COUNTIF(status = 304) AS pct_expected_304, + COUNTIF(uses_etag AND uses_if_non_match AND no_change AND status = 304) / COUNTIF(uses_etag AND uses_if_non_match AND no_change) AS pct_actual_304 +FROM ( + SELECT + _TABLE_SUFFIX AS client, + status, + TRIM(resp_etag) = TRIM(req_if_none_match) AS no_change, + TRIM(resp_etag) != "" AS uses_etag, + TRIM(req_if_none_match) != "" AS uses_if_non_match + FROM + `httparchive.summary_requests.2020_08_01_*` +) +GROUP BY + client +ORDER BY + client \ No newline at end of file diff --git a/sql/2020/20_Caching/vary_headers.sql b/sql/2020/20_Caching/vary_headers.sql new file mode 100644 index 00000000000..94a635bfff6 --- /dev/null +++ b/sql/2020/20_Caching/vary_headers.sql @@ -0,0 +1,74 @@ +#standardSQL +# List of Vary directive names. +SELECT + client, + total_requests, + total_using_vary, + vary_header, + occurrences, + pct_of_vary, + pct_of_total_requests, + total_using_both / total_using_vary AS pct_of_vary_with_cache_control, + total_using_vary / total_requests AS pct_using_vary +FROM +( + ( + SELECT + "desktop" AS client, + total_requests, + total_using_vary, + total_using_both, + vary_header, + COUNT(0) AS occurrences, + COUNT(0) / total_using_vary AS pct_of_vary, + COUNT(0) / total_requests AS pct_of_total_requests + FROM + `httparchive.summary_requests.2020_08_01_desktop`, + UNNEST(REGEXP_EXTRACT_ALL(LOWER(resp_vary), r'([a-z][^,\s="\']*)')) AS vary_header + CROSS JOIN ( + SELECT + COUNT(0) AS total_requests, + COUNTIF(TRIM(resp_vary) != "") AS total_using_vary, + COUNTIF(TRIM(resp_vary) != "" AND TRIM(resp_cache_control) != "") AS total_using_both + FROM + `httparchive.summary_requests.2020_08_01_desktop` + ) + GROUP BY + client, + total_requests, + total_using_vary, + total_using_both, + vary_header + ) + UNION ALL + ( + SELECT + "mobile" AS client, + total_requests, + total_using_vary, + total_using_both, + vary_header, + COUNT(0) AS occurrences, + COUNT(0) / total_using_vary AS pct_of_vary, + COUNT(0) / total_requests AS pct_of_total_requests + FROM + `httparchive.summary_requests.2020_08_01_mobile`, + UNNEST(REGEXP_EXTRACT_ALL(LOWER(resp_vary), r'([a-z][^,\s="\']*)')) AS vary_header + CROSS JOIN ( + SELECT + COUNT(0) AS total_requests, + COUNTIF(TRIM(resp_vary) != "") AS total_using_vary, + COUNTIF(TRIM(resp_vary) != "" AND TRIM(resp_cache_control) != "") AS total_using_both + FROM + `httparchive.summary_requests.2020_08_01_mobile` + ) + GROUP BY + client, + total_requests, + total_using_vary, + total_using_both, + vary_header + ) +) +ORDER BY + client, occurrences DESC