Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Chapter 5: Add All Byte and Request Count Queries #107

Merged
merged 10 commits into from
Aug 15, 2019
29 changes: 29 additions & 0 deletions sql/2019/05_Third_Parties/05_01.sql
Original file line number Diff line number Diff line change
@@ -0,0 +1,29 @@
#standardSQL
# Percentage of pages that include at least one third party resource.
SELECT
client,
COUNT(0) AS numberOfPages,
COUNTIF(numberOfThirdPartyRequests > 0) AS numberOfPagesWithThirdParty,
ROUND(COUNTIF(numberOfThirdPartyRequests > 0) * 100 / COUNT(0), 2) AS percentOfPagesWithThirdParty
FROM (
SELECT
client,
pageUrl,
COUNTIF(thirdPartyDomain IS NOT NULL) AS numberOfThirdPartyRequests
FROM (
SELECT
client,
page AS pageUrl,
DomainsOver50Table.requestDomain AS thirdPartyDomain
FROM
`httparchive.almanac.summary_requests`
LEFT JOIN
`lighthouse-infrastructure.third_party_web.2019_07_01_all_observed_domains` AS DomainsOver50Table
ON NET.HOST(url) = DomainsOver50Table.requestDomain
)
GROUP BY
client,
pageUrl
)
GROUP BY
client
29 changes: 29 additions & 0 deletions sql/2019/05_Third_Parties/05_02.sql
Original file line number Diff line number Diff line change
@@ -0,0 +1,29 @@
#standardSQL
# Percentage of pages that include at least one ad resource.
SELECT
client,
COUNT(0) AS numberOfPages,
COUNTIF(numberOfAdRequests > 0) AS numberOfPagesWithAd,
ROUND(COUNTIF(numberOfAdRequests > 0) * 100 / COUNT(0), 2) AS percentOfPagesWithAd
FROM (
SELECT
client,
pageUrl,
COUNTIF(thirdPartyCategory = 'ad') AS numberOfAdRequests
FROM (
SELECT
client,
page AS pageUrl,
ThirdPartyTable.category AS thirdPartyCategory
FROM
`httparchive.almanac.summary_requests`
LEFT JOIN
`lighthouse-infrastructure.third_party_web.2019_07_01` AS ThirdPartyTable
ON NET.HOST(url) = ThirdPartyTable.domain
)
GROUP BY
client,
pageUrl
)
GROUP BY
client
23 changes: 23 additions & 0 deletions sql/2019/05_Third_Parties/05_03.sql
Original file line number Diff line number Diff line change
@@ -0,0 +1,23 @@
#standardSQL
# Percentage of requests that are third party requests broken down by third party category by resource type.
patrickhulce marked this conversation as resolved.
Show resolved Hide resolved
SELECT
client,
thirdPartyCategory,
contentType,
COUNT(0) AS totalRequests,
ROUND(COUNT(0) * 100 / SUM(COUNT(0)) OVER (), 4) AS percentRequests
FROM (
SELECT
client,
type AS contentType,
ThirdPartyTable.category AS thirdPartyCategory
FROM
`httparchive.almanac.summary_requests`
LEFT JOIN
`lighthouse-infrastructure.third_party_web.2019_07_01` AS ThirdPartyTable
ON NET.HOST(url) = ThirdPartyTable.domain
)
GROUP BY
client,
thirdPartyCategory,
contentType
25 changes: 25 additions & 0 deletions sql/2019/05_Third_Parties/05_04.sql
Original file line number Diff line number Diff line change
@@ -0,0 +1,25 @@
#standardSQL
# Percentage of total bytes that are from third party requests broken down by third party category by resource type.
patrickhulce marked this conversation as resolved.
Show resolved Hide resolved
SELECT
client,
thirdPartyCategory,
contentType,
SUM(requestBytes) AS totalBytes,
ROUND(SUM(requestBytes) * 100 / SUM(SUM(requestBytes)) OVER (), 4) AS percentBytes
FROM (
SELECT
client,
type AS contentType,
respBodySize AS requestBytes,
ThirdPartyTable.category AS thirdPartyCategory
FROM
`httparchive.almanac.summary_requests`
LEFT JOIN
`lighthouse-infrastructure.third_party_web.2019_07_01` AS ThirdPartyTable
ON NET.HOST(url) = ThirdPartyTable.domain
)
GROUP BY
client,
thirdPartyCategory,
contentType
ORDER BY percentBytes DESC
26 changes: 26 additions & 0 deletions sql/2019/05_Third_Parties/05_06.sql
Original file line number Diff line number Diff line change
@@ -0,0 +1,26 @@
#standardSQL
# Top 100 third party domains by request volume
SELECT
thirdPartyDomain,
COUNT(0) AS totalRequests,
ROUND(COUNT(0) * 100 / MAX(totalRequestCount), 4) AS percentRequests,
SUM(requestBytes) AS totalBytes
FROM (
SELECT
respSize AS requestBytes,
NET.HOST(url) AS requestDomain,
DomainsOver50Table.requestDomain AS thirdPartyDomain
FROM
`httparchive.almanac.summary_requests`
LEFT JOIN
`lighthouse-infrastructure.third_party_web.2019_07_01_all_observed_domains` AS DomainsOver50Table
rviscomi marked this conversation as resolved.
Show resolved Hide resolved
ON NET.HOST(url) = DomainsOver50Table.requestDomain
) t1, (
SELECT COUNT(0) AS totalRequestCount FROM `httparchive.almanac.summary_requests`
) t2
GROUP BY
thirdPartyDomain
ORDER BY
totalRequests DESC
LIMIT 100

27 changes: 27 additions & 0 deletions sql/2019/05_Third_Parties/05_07.sql
Original file line number Diff line number Diff line change
@@ -0,0 +1,27 @@
#standardSQL
# Top 100 third party domains by total byte weight
SELECT
thirdPartyDomain,
patrickhulce marked this conversation as resolved.
Show resolved Hide resolved
COUNT(0) AS totalRequests,
SUM(requestBytes) AS totalBytes,
ROUND(SUM(requestBytes) * 100 / MAX(totalRequestBytes), 2) AS percentBytes
FROM (
SELECT
respSize AS requestBytes,
NET.HOST(url) AS requestDomain,
DomainsOver50Table.requestDomain AS thirdPartyDomain
FROM
`httparchive.almanac.summary_requests`
LEFT JOIN
`lighthouse-infrastructure.third_party_web.2019_07_01_all_observed_domains` AS DomainsOver50Table
ON NET.HOST(url) = DomainsOver50Table.requestDomain
) t1, (
SELECT SUM(respSize) AS totalRequestBytes FROM `httparchive.almanac.summary_requests`
) t2
WHERE thirdPartyDomain IS NOT NULL
GROUP BY
thirdPartyDomain
ORDER BY
totalBytes DESC
LIMIT 100

22 changes: 22 additions & 0 deletions sql/2019/05_Third_Parties/05_09.sql
Original file line number Diff line number Diff line change
@@ -0,0 +1,22 @@
#standardSQL
# Top 100 third party requests by request volume
SELECT
requestUrl,
COUNT(0) AS totalRequests,
SUM(requestBytes) AS totalBytes,
ROUND(COUNT(0) * 100 / MAX(t2.totalRequestCount), 2) AS percentRequestCount
FROM (
SELECT
url AS requestUrl,
respBodySize AS requestBytes
FROM
`httparchive.almanac.summary_requests`
) t1, (
SELECT COUNT(0) AS totalRequestCount FROM `httparchive.almanac.summary_requests`
) t2
GROUP BY
requestUrl
ORDER BY
totalRequests DESC
LIMIT 100

59 changes: 59 additions & 0 deletions sql/2019/05_Third_Parties/05_11.sql
Original file line number Diff line number Diff line change
@@ -0,0 +1,59 @@
#standardSQL
# Percentile breakdown page-relative percentage of requests that are third party requests broken down by third party category.
SELECT
client,
COUNT(0) AS numberOfPages,
COUNTIF(numberOfThirdPartyRequests > 0) AS numberOfPagesWithThirdParty,
APPROX_QUANTILES(numberOfThirdPartyRequests / numberOfRequests, 100) AS percentThirdPartyRequestsQuantiles,
APPROX_QUANTILES(numberOfAdRequests / numberOfRequests, 100) AS percentAdRequestsQuantiles,
APPROX_QUANTILES(numberOfAnalyticsRequests / numberOfRequests, 100) AS percentAnalyticsRequestsQuantiles,
APPROX_QUANTILES(numberOfSocialRequests / numberOfRequests, 100) AS percentSocialRequestsQuantiles,
APPROX_QUANTILES(numberOfVideoRequests / numberOfRequests, 100) AS percentVideoRequestsQuantiles,
APPROX_QUANTILES(numberOfUtilityRequests / numberOfRequests, 100) AS percentUtilityRequestsQuantiles,
APPROX_QUANTILES(numberOfHostingRequests / numberOfRequests, 100) AS percentHostingRequestsQuantiles,
APPROX_QUANTILES(numberOfMarketingRequests / numberOfRequests, 100) AS percentMarketingRequestsQuantiles,
APPROX_QUANTILES(numberOfCustomerSuccessRequests / numberOfRequests, 100) AS percentCustomerSuccessRequestsQuantiles,
APPROX_QUANTILES(numberOfContentRequests / numberOfRequests, 100) AS percentContentRequestsQuantiles,
APPROX_QUANTILES(numberOfCdnRequests / numberOfRequests, 100) AS percentCdnRequestsQuantiles,
APPROX_QUANTILES(numberOfTagManagerRequests / numberOfRequests, 100) AS percentTagManagerRequestsQuantiles,
APPROX_QUANTILES(numberOfOtherRequests / numberOfRequests, 100) AS percentOtherRequestsQuantiles
FROM (
SELECT
client,
pageUrl,
COUNT(0) AS numberOfRequests,
COUNTIF(thirdPartyDomain IS NULL) AS numberOfFirstPartyRequests,
COUNTIF(thirdPartyDomain IS NOT NULL) AS numberOfThirdPartyRequests,
COUNTIF(thirdPartyCategory = 'ad') AS numberOfAdRequests,
COUNTIF(thirdPartyCategory = 'analytics') AS numberOfAnalyticsRequests,
COUNTIF(thirdPartyCategory = 'social') AS numberOfSocialRequests,
COUNTIF(thirdPartyCategory = 'video') AS numberOfVideoRequests,
COUNTIF(thirdPartyCategory = 'utility') AS numberOfUtilityRequests,
COUNTIF(thirdPartyCategory = 'hosting') AS numberOfHostingRequests,
COUNTIF(thirdPartyCategory = 'marketing') AS numberOfMarketingRequests,
COUNTIF(thirdPartyCategory = 'customer-success') AS numberOfCustomerSuccessRequests,
COUNTIF(thirdPartyCategory = 'content') AS numberOfContentRequests,
COUNTIF(thirdPartyCategory = 'cdn') AS numberOfCdnRequests,
COUNTIF(thirdPartyCategory = 'tag-manager') AS numberOfTagManagerRequests,
COUNTIF(thirdPartyCategory = 'other') AS numberOfOtherRequests
FROM (
SELECT
client,
page AS pageUrl,
DomainsOver50Table.requestDomain AS thirdPartyDomain,
ThirdPartyTable.category AS thirdPartyCategory
FROM
`httparchive.almanac.summary_requests`
LEFT JOIN
`lighthouse-infrastructure.third_party_web.2019_07_01` AS ThirdPartyTable
ON NET.HOST(url) = ThirdPartyTable.domain
LEFT JOIN
`lighthouse-infrastructure.third_party_web.2019_07_01_all_observed_domains` AS DomainsOver50Table
ON NET.HOST(url) = DomainsOver50Table.requestDomain
)
GROUP BY
client,
pageUrl
)
GROUP BY
client
59 changes: 59 additions & 0 deletions sql/2019/05_Third_Parties/05_12.sql
Original file line number Diff line number Diff line change
@@ -0,0 +1,59 @@
#standardSQL
# Percentile breakdown page-relative percentage of total bytes that are from third party requests broken down by third party category.
SELECT
client,
COUNT(0) AS numberOfPages,
APPROX_QUANTILES(numberOfThirdPartyBytes / numberOfBytes, 100) AS percentThirdPartyBytesQuantiles,
APPROX_QUANTILES(numberOfAdBytes / numberOfBytes, 100) AS percentAdBytesQuantiles,
APPROX_QUANTILES(numberOfAnalyticsBytes / numberOfBytes, 100) AS percentAnalyticsBytesQuantiles,
APPROX_QUANTILES(numberOfSocialBytes / numberOfBytes, 100) AS percentSocialBytesQuantiles,
APPROX_QUANTILES(numberOfVideoBytes / numberOfBytes, 100) AS percentVideoBytesQuantiles,
APPROX_QUANTILES(numberOfUtilityBytes / numberOfBytes, 100) AS percentUtilityBytesQuantiles,
APPROX_QUANTILES(numberOfHostingBytes / numberOfBytes, 100) AS percentHostingBytesQuantiles,
APPROX_QUANTILES(numberOfMarketingBytes / numberOfBytes, 100) AS percentMarketingBytesQuantiles,
APPROX_QUANTILES(numberOfCustomerSuccessBytes / numberOfBytes, 100) AS percentCustomerSuccessBytesQuantiles,
APPROX_QUANTILES(numberOfContentBytes / numberOfBytes, 100) AS percentContentBytesQuantiles,
APPROX_QUANTILES(numberOfCdnBytes / numberOfBytes, 100) AS percentCdnBytesQuantiles,
APPROX_QUANTILES(numberOfTagManagerBytes / numberOfBytes, 100) AS percentTagManagerBytesQuantiles,
APPROX_QUANTILES(numberOfOtherBytes / numberOfBytes, 100) AS percentOtherBytesQuantiles
FROM (
SELECT
client,
pageUrl,
COUNT(0) AS numberOfRequests,
SUM(requestBytes) AS numberOfBytes,
SUM(IF(thirdPartyDomain IS NULL, requestBytes, 0)) AS numberOfFirstPartyBytes,
SUM(IF(thirdPartyDomain IS NOT NULL, requestBytes, 0)) AS numberOfThirdPartyBytes,
SUM(IF(thirdPartyCategory = 'ad', requestBytes, 0)) AS numberOfAdBytes,
SUM(IF(thirdPartyCategory = 'analytics', requestBytes, 0)) AS numberOfAnalyticsBytes,
SUM(IF(thirdPartyCategory = 'social', requestBytes, 0)) AS numberOfSocialBytes,
SUM(IF(thirdPartyCategory = 'video', requestBytes, 0)) AS numberOfVideoBytes,
SUM(IF(thirdPartyCategory = 'utility', requestBytes, 0)) AS numberOfUtilityBytes,
SUM(IF(thirdPartyCategory = 'hosting', requestBytes, 0)) AS numberOfHostingBytes,
SUM(IF(thirdPartyCategory = 'marketing', requestBytes, 0)) AS numberOfMarketingBytes,
SUM(IF(thirdPartyCategory = 'customer-success', requestBytes, 0)) AS numberOfCustomerSuccessBytes,
SUM(IF(thirdPartyCategory = 'content', requestBytes, 0)) AS numberOfContentBytes,
SUM(IF(thirdPartyCategory = 'cdn', requestBytes, 0)) AS numberOfCdnBytes,
SUM(IF(thirdPartyCategory = 'tag-manager', requestBytes, 0)) AS numberOfTagManagerBytes,
SUM(IF(thirdPartyCategory = 'other', requestBytes, 0)) AS numberOfOtherBytes
FROM (
SELECT
client,
page AS pageUrl,
respBodySize AS requestBytes,
DomainsOver50Table.requestDomain AS thirdPartyDomain,
ThirdPartyTable.category AS thirdPartyCategory
FROM
`httparchive.almanac.summary_requests`
LEFT JOIN
`lighthouse-infrastructure.third_party_web.2019_07_01` AS ThirdPartyTable
ON NET.HOST(url) = ThirdPartyTable.domain
LEFT JOIN
`lighthouse-infrastructure.third_party_web.2019_07_01_all_observed_domains` AS DomainsOver50Table
ON NET.HOST(url) = DomainsOver50Table.requestDomain
)
GROUP BY
pageUrl
)
GROUP BY
client