From c056373407df7cd60a30bf8c963d0c22ea7da688 Mon Sep 17 00:00:00 2001 From: Mike Geyser Date: Thu, 24 Oct 2019 08:39:26 +0200 Subject: [PATCH 01/15] Code review changes. Updating comment about generated code warning, and adding README section explaining how to generate chapters. --- src/README.md | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/src/README.md b/src/README.md index f9e9ebed8c8..a4379b2a788 100644 --- a/src/README.md +++ b/src/README.md @@ -44,7 +44,11 @@ ptw ## Generating chapters +<<<<<<< HEAD The chapter generation is dependent on nodejs, so you will need to have [nodejs](https://nodejs.org/en/) installed as well. All of the following commands must be run from within the `src` directory by executing `cd src` first. +======= +The chapter generation is dependent on nodejs, so is you will need to have [nodejs](https://nodejs.org/en/) installed as well. All of the following commands must be run from within the `src` directory by executing `cd src` first. +>>>>>>> 33bb7cf... Code review changes. Updating comment about generated code warning, and adding README section explaining how to generate chapters. 1. Install the dependencies: From dc6d3a0e4a9537ad59e41ebe44fa88a2bcc6eb36 Mon Sep 17 00:00:00 2001 From: Mike Geyser Date: Thu, 24 Oct 2019 13:47:50 +0200 Subject: [PATCH 02/15] Added another generate script to generate a table of contents. --- src/package-lock.json | 812 ++++++++++++++++++ src/package.json | 1 + src/templates/en/2019/chapter.html | 6 +- src/tools/generate/generate_chapters.js | 12 +- .../generate/generate_table_of_contents.js | 82 ++ 5 files changed, 907 insertions(+), 6 deletions(-) create mode 100644 src/tools/generate/generate_table_of_contents.js diff --git a/src/package-lock.json b/src/package-lock.json index 659875f2b39..0a7e0609bcb 100644 --- a/src/package-lock.json +++ b/src/package-lock.json @@ -4,18 +4,132 @@ "lockfileVersion": 1, "requires": true, "dependencies": { + "abab": { + "version": "2.0.2", + "resolved": "https://registry.npmjs.org/abab/-/abab-2.0.2.tgz", + "integrity": "sha512-2scffjvioEmNz0OyDSLGWDfKCVwaKc6l9Pm9kOIREU13ClXZvHpg/nRL5xyjSSSLhOnXqft2HpsAzNEEA8cFFg==", + "dev": true + }, + "acorn": { + "version": "7.1.0", + "resolved": "https://registry.npmjs.org/acorn/-/acorn-7.1.0.tgz", + "integrity": "sha512-kL5CuoXA/dgxlBbVrflsflzQ3PAas7RYZB52NOm/6839iVYJgKMJ3cQJD+t2i5+qFa8h3MDpEOJiS64E8JLnSQ==", + "dev": true + }, + "acorn-globals": { + "version": "4.3.4", + "resolved": "https://registry.npmjs.org/acorn-globals/-/acorn-globals-4.3.4.tgz", + "integrity": "sha512-clfQEh21R+D0leSbUdWf3OcfqyaCSAQ8Ryq00bofSekfr9W8u1jyYZo6ir0xu9Gtcf7BjcHJpnbZH7JOCpP60A==", + "dev": true, + "requires": { + "acorn": "^6.0.1", + "acorn-walk": "^6.0.1" + }, + "dependencies": { + "acorn": { + "version": "6.3.0", + "resolved": "https://registry.npmjs.org/acorn/-/acorn-6.3.0.tgz", + "integrity": "sha512-/czfa8BwS88b9gWQVhc8eknunSA2DoJpJyTQkhheIf5E48u1N0R4q/YxxsAeqRrmK9TQ/uYfgLDfZo91UlANIA==", + "dev": true + } + } + }, + "acorn-walk": { + "version": "6.2.0", + "resolved": "https://registry.npmjs.org/acorn-walk/-/acorn-walk-6.2.0.tgz", + "integrity": "sha512-7evsyfH1cLOCdAzZAd43Cic04yKydNx0cF+7tiA19p1XnLLPU4dpCQOqpjqwokFe//vS0QqfqqjCS2JkiIs0cA==", + "dev": true + }, + "ajv": { + "version": "6.10.2", + "resolved": "https://registry.npmjs.org/ajv/-/ajv-6.10.2.tgz", + "integrity": "sha512-TXtUUEYHuaTEbLZWIKUr5pmBuhDLy+8KYtPYdcV8qC+pOZL+NKqYwvWSRrVXHn+ZmRRAu8vJTAznH7Oag6RVRw==", + "dev": true, + "requires": { + "fast-deep-equal": "^2.0.1", + "fast-json-stable-stringify": "^2.0.0", + "json-schema-traverse": "^0.4.1", + "uri-js": "^4.2.2" + } + }, "ansi-regex": { "version": "3.0.0", "resolved": "https://registry.npmjs.org/ansi-regex/-/ansi-regex-3.0.0.tgz", "integrity": "sha1-7QMXwyIGT3lGbAKWa922Bas32Zg=", "dev": true }, + "array-equal": { + "version": "1.0.0", + "resolved": "https://registry.npmjs.org/array-equal/-/array-equal-1.0.0.tgz", + "integrity": "sha1-jCpe8kcv2ep0KwTHenUJO6J1fJM=", + "dev": true + }, + "asn1": { + "version": "0.2.4", + "resolved": "https://registry.npmjs.org/asn1/-/asn1-0.2.4.tgz", + "integrity": "sha512-jxwzQpLQjSmWXgwaCZE9Nz+glAG01yF1QnWgbhGwHI5A6FRIEY6IVqtHhIepHqI7/kyEyQEagBC5mBEFlIYvdg==", + "dev": true, + "requires": { + "safer-buffer": "~2.1.0" + } + }, + "assert-plus": { + "version": "1.0.0", + "resolved": "https://registry.npmjs.org/assert-plus/-/assert-plus-1.0.0.tgz", + "integrity": "sha1-8S4PPF13sLHN2RRpQuTpbB5N1SU=", + "dev": true + }, + "async-limiter": { + "version": "1.0.1", + "resolved": "https://registry.npmjs.org/async-limiter/-/async-limiter-1.0.1.tgz", + "integrity": "sha512-csOlWGAcRFJaI6m+F2WKdnMKr4HhdhFVBk0H/QbJFMCr+uO2kwohwXQPxw/9OCxp05r5ghVBFSyioixx3gfkNQ==", + "dev": true + }, + "asynckit": { + "version": "0.4.0", + "resolved": "https://registry.npmjs.org/asynckit/-/asynckit-0.4.0.tgz", + "integrity": "sha1-x57Zf380y48robyXkLzDZkdLS3k=", + "dev": true + }, + "aws-sign2": { + "version": "0.7.0", + "resolved": "https://registry.npmjs.org/aws-sign2/-/aws-sign2-0.7.0.tgz", + "integrity": "sha1-tG6JCTSpWR8tL2+G1+ap8bP+dqg=", + "dev": true + }, + "aws4": { + "version": "1.8.0", + "resolved": "https://registry.npmjs.org/aws4/-/aws4-1.8.0.tgz", + "integrity": "sha512-ReZxvNHIOv88FlT7rxcXIIC0fPt4KZqZbOlivyWtXLt8ESx84zd3kMC6iK5jVeS2qt+g7ftS7ye4fi06X5rtRQ==", + "dev": true + }, + "bcrypt-pbkdf": { + "version": "1.0.2", + "resolved": "https://registry.npmjs.org/bcrypt-pbkdf/-/bcrypt-pbkdf-1.0.2.tgz", + "integrity": "sha1-pDAdOJtqQ/m2f/PKEaP2Y342Dp4=", + "dev": true, + "requires": { + "tweetnacl": "^0.14.3" + } + }, + "browser-process-hrtime": { + "version": "0.1.3", + "resolved": "https://registry.npmjs.org/browser-process-hrtime/-/browser-process-hrtime-0.1.3.tgz", + "integrity": "sha512-bRFnI4NnjO6cnyLmOV/7PVoDEMJChlcfN0z4s1YMBY989/SvlfMI1lgCnkFUs53e9gQF+w7qu7XdllSTiSl8Aw==", + "dev": true + }, "camelcase": { "version": "4.1.0", "resolved": "https://registry.npmjs.org/camelcase/-/camelcase-4.1.0.tgz", "integrity": "sha1-1UVjW+HjPFQmScaRc+Xeas+uNN0=", "dev": true }, + "caseless": { + "version": "0.12.0", + "resolved": "https://registry.npmjs.org/caseless/-/caseless-0.12.0.tgz", + "integrity": "sha1-G2gcIf+EAzyCZUMJBolCDRhxUdw=", + "dev": true + }, "cliui": { "version": "4.1.0", "resolved": "https://registry.npmjs.org/cliui/-/cliui-4.1.0.tgz", @@ -33,6 +147,21 @@ "integrity": "sha1-DQcLTQQ6W+ozovGkDi7bPZpMz3c=", "dev": true }, + "combined-stream": { + "version": "1.0.8", + "resolved": "https://registry.npmjs.org/combined-stream/-/combined-stream-1.0.8.tgz", + "integrity": "sha512-FQN4MRfuJeHf7cBbBMJFXhKSDq+2kAArBlmRBvcvFE5BB1HZKXtSFASDhdlz9zOYwxh8lDdnvmMOe/+5cdoEdg==", + "dev": true, + "requires": { + "delayed-stream": "~1.0.0" + } + }, + "core-util-is": { + "version": "1.0.2", + "resolved": "https://registry.npmjs.org/core-util-is/-/core-util-is-1.0.2.tgz", + "integrity": "sha1-tf1UIgqivFq1eqtxQMlAdUUDwac=", + "dev": true + }, "cross-spawn": { "version": "5.1.0", "resolved": "https://registry.npmjs.org/cross-spawn/-/cross-spawn-5.1.0.tgz", @@ -44,18 +173,123 @@ "which": "^1.2.9" } }, + "cssom": { + "version": "0.4.1", + "resolved": "https://registry.npmjs.org/cssom/-/cssom-0.4.1.tgz", + "integrity": "sha512-6Aajq0XmukE7HdXUU6IoSWuH1H6gH9z6qmagsstTiN7cW2FNTsb+J2Chs+ufPgZCsV/yo8oaEudQLrb9dGxSVQ==", + "dev": true + }, + "cssstyle": { + "version": "2.0.0", + "resolved": "https://registry.npmjs.org/cssstyle/-/cssstyle-2.0.0.tgz", + "integrity": "sha512-QXSAu2WBsSRXCPjvI43Y40m6fMevvyRm8JVAuF9ksQz5jha4pWP1wpaK7Yu5oLFc6+XAY+hj8YhefyXcBB53gg==", + "dev": true, + "requires": { + "cssom": "~0.3.6" + }, + "dependencies": { + "cssom": { + "version": "0.3.8", + "resolved": "https://registry.npmjs.org/cssom/-/cssom-0.3.8.tgz", + "integrity": "sha512-b0tGHbfegbhPJpxpiBPU2sCkigAqtM9O121le6bbOlgyV+NyGyCmVfJ6QW9eRjz8CpNfWEOYBIMIGRYkLwsIYg==", + "dev": true + } + } + }, + "dashdash": { + "version": "1.14.1", + "resolved": "https://registry.npmjs.org/dashdash/-/dashdash-1.14.1.tgz", + "integrity": "sha1-hTz6D3y+L+1d4gMmuN1YEDX24vA=", + "dev": true, + "requires": { + "assert-plus": "^1.0.0" + } + }, + "data-urls": { + "version": "1.1.0", + "resolved": "https://registry.npmjs.org/data-urls/-/data-urls-1.1.0.tgz", + "integrity": "sha512-YTWYI9se1P55u58gL5GkQHW4P6VJBJ5iBT+B5a7i2Tjadhv52paJG0qHX4A0OR6/t52odI64KP2YvFpkDOi3eQ==", + "dev": true, + "requires": { + "abab": "^2.0.0", + "whatwg-mimetype": "^2.2.0", + "whatwg-url": "^7.0.0" + } + }, "decamelize": { "version": "1.2.0", "resolved": "https://registry.npmjs.org/decamelize/-/decamelize-1.2.0.tgz", "integrity": "sha1-9lNNFRSCabIDUue+4m9QH5oZEpA=", "dev": true }, + "deep-is": { + "version": "0.1.3", + "resolved": "https://registry.npmjs.org/deep-is/-/deep-is-0.1.3.tgz", + "integrity": "sha1-s2nW+128E+7PUk+RsHD+7cNXzzQ=", + "dev": true + }, + "delayed-stream": { + "version": "1.0.0", + "resolved": "https://registry.npmjs.org/delayed-stream/-/delayed-stream-1.0.0.tgz", + "integrity": "sha1-3zrhmayt+31ECqrgsp4icrJOxhk=", + "dev": true + }, + "domexception": { + "version": "1.0.1", + "resolved": "https://registry.npmjs.org/domexception/-/domexception-1.0.1.tgz", + "integrity": "sha512-raigMkn7CJNNo6Ihro1fzG7wr3fHuYVytzquZKX5n0yizGsTcYgzdIUwj1X9pK0VvjeihV+XiclP+DjwbsSKug==", + "dev": true, + "requires": { + "webidl-conversions": "^4.0.2" + } + }, + "ecc-jsbn": { + "version": "0.1.2", + "resolved": "https://registry.npmjs.org/ecc-jsbn/-/ecc-jsbn-0.1.2.tgz", + "integrity": "sha1-OoOpBOVDUyh4dMVkt1SThoSamMk=", + "dev": true, + "requires": { + "jsbn": "~0.1.0", + "safer-buffer": "^2.1.0" + } + }, "ejs": { "version": "2.7.1", "resolved": "https://registry.npmjs.org/ejs/-/ejs-2.7.1.tgz", "integrity": "sha512-kS/gEPzZs3Y1rRsbGX4UOSjtP/CeJP0CxSNZHYxGfVM/VgLcv0ZqM7C45YyTj2DI2g7+P9Dd24C+IMIg6D0nYQ==", "dev": true }, + "escodegen": { + "version": "1.12.0", + "resolved": "https://registry.npmjs.org/escodegen/-/escodegen-1.12.0.tgz", + "integrity": "sha512-TuA+EhsanGcme5T3R0L80u4t8CpbXQjegRmf7+FPTJrtCTErXFeelblRgHQa1FofEzqYYJmJ/OqjTwREp9qgmg==", + "dev": true, + "requires": { + "esprima": "^3.1.3", + "estraverse": "^4.2.0", + "esutils": "^2.0.2", + "optionator": "^0.8.1", + "source-map": "~0.6.1" + } + }, + "esprima": { + "version": "3.1.3", + "resolved": "https://registry.npmjs.org/esprima/-/esprima-3.1.3.tgz", + "integrity": "sha1-/cpRzuYTOJXjyI1TXOSdv/YqRjM=", + "dev": true + }, + "estraverse": { + "version": "4.3.0", + "resolved": "https://registry.npmjs.org/estraverse/-/estraverse-4.3.0.tgz", + "integrity": "sha512-39nnKffWz8xN1BU/2c79n9nB9HDzo0niYUqx6xyqUnyoAnQyyWpOTdZEeiCch8BBu515t4wp9ZmgVfVhn9EBpw==", + "dev": true + }, + "esutils": { + "version": "2.0.3", + "resolved": "https://registry.npmjs.org/esutils/-/esutils-2.0.3.tgz", + "integrity": "sha512-kVscqXk4OCp68SZ0dkgEKVi6/8ij300KBWTJq32P/dYeWTSwK41WyTxalN1eRmA5Z9UU/LX9D7FWSmV9SAYx6g==", + "dev": true + }, "execa": { "version": "0.7.0", "resolved": "https://registry.npmjs.org/execa/-/execa-0.7.0.tgz", @@ -71,6 +305,36 @@ "strip-eof": "^1.0.0" } }, + "extend": { + "version": "3.0.2", + "resolved": "https://registry.npmjs.org/extend/-/extend-3.0.2.tgz", + "integrity": "sha512-fjquC59cD7CyW6urNXK0FBufkZcoiGG80wTuPujX590cB5Ttln20E2UB4S/WARVqhXffZl2LNgS+gQdPIIim/g==", + "dev": true + }, + "extsprintf": { + "version": "1.3.0", + "resolved": "https://registry.npmjs.org/extsprintf/-/extsprintf-1.3.0.tgz", + "integrity": "sha1-lpGEQOMEGnpBT4xS48V06zw+HgU=", + "dev": true + }, + "fast-deep-equal": { + "version": "2.0.1", + "resolved": "https://registry.npmjs.org/fast-deep-equal/-/fast-deep-equal-2.0.1.tgz", + "integrity": "sha1-ewUhjd+WZ79/Nwv3/bLLFf3Qqkk=", + "dev": true + }, + "fast-json-stable-stringify": { + "version": "2.0.0", + "resolved": "https://registry.npmjs.org/fast-json-stable-stringify/-/fast-json-stable-stringify-2.0.0.tgz", + "integrity": "sha1-1RQsDK7msRifh9OnYREGT4bIu/I=", + "dev": true + }, + "fast-levenshtein": { + "version": "2.0.6", + "resolved": "https://registry.npmjs.org/fast-levenshtein/-/fast-levenshtein-2.0.6.tgz", + "integrity": "sha1-PYpcZog6FqMMqGQ+hR8Zuqd5eRc=", + "dev": true + }, "find-up": { "version": "2.1.0", "resolved": "https://registry.npmjs.org/find-up/-/find-up-2.1.0.tgz", @@ -80,6 +344,23 @@ "locate-path": "^2.0.0" } }, + "forever-agent": { + "version": "0.6.1", + "resolved": "https://registry.npmjs.org/forever-agent/-/forever-agent-0.6.1.tgz", + "integrity": "sha1-+8cfDEGt6zf5bFd60e1C2P2sypE=", + "dev": true + }, + "form-data": { + "version": "2.3.3", + "resolved": "https://registry.npmjs.org/form-data/-/form-data-2.3.3.tgz", + "integrity": "sha512-1lLKB2Mu3aGP1Q/2eCOx0fNbRMe7XdwktwOruhfqqd0rIJWwN4Dh+E3hrPSlDCXnSR7UtZ1N38rVXm+6+MEhJQ==", + "dev": true, + "requires": { + "asynckit": "^0.4.0", + "combined-stream": "^1.0.6", + "mime-types": "^2.1.12" + } + }, "fs-extra": { "version": "8.1.0", "resolved": "https://registry.npmjs.org/fs-extra/-/fs-extra-8.1.0.tgz", @@ -103,18 +384,78 @@ "integrity": "sha1-jpQ9E1jcN1VQVOy+LtsFqhdO3hQ=", "dev": true }, + "getpass": { + "version": "0.1.7", + "resolved": "https://registry.npmjs.org/getpass/-/getpass-0.1.7.tgz", + "integrity": "sha1-Xv+OPmhNVprkyysSgmBOi6YhSfo=", + "dev": true, + "requires": { + "assert-plus": "^1.0.0" + } + }, "graceful-fs": { "version": "4.2.2", "resolved": "https://registry.npmjs.org/graceful-fs/-/graceful-fs-4.2.2.tgz", "integrity": "sha512-IItsdsea19BoLC7ELy13q1iJFNmd7ofZH5+X/pJr90/nRoPEX0DJo1dHDbgtYWOhJhcCgMDTOw84RZ72q6lB+Q==", "dev": true }, + "har-schema": { + "version": "2.0.0", + "resolved": "https://registry.npmjs.org/har-schema/-/har-schema-2.0.0.tgz", + "integrity": "sha1-qUwiJOvKwEeCoNkDVSHyRzW37JI=", + "dev": true + }, + "har-validator": { + "version": "5.1.3", + "resolved": "https://registry.npmjs.org/har-validator/-/har-validator-5.1.3.tgz", + "integrity": "sha512-sNvOCzEQNr/qrvJgc3UG/kD4QtlHycrzwS+6mfTrrSq97BvaYcPZZI1ZSqGSPR73Cxn4LKTD4PttRwfU7jWq5g==", + "dev": true, + "requires": { + "ajv": "^6.5.5", + "har-schema": "^2.0.0" + } + }, + "html-encoding-sniffer": { + "version": "1.0.2", + "resolved": "https://registry.npmjs.org/html-encoding-sniffer/-/html-encoding-sniffer-1.0.2.tgz", + "integrity": "sha512-71lZziiDnsuabfdYiUeWdCVyKuqwWi23L8YeIgV9jSSZHCtb6wB1BKWooH7L3tn4/FuZJMVWyNaIDr4RGmaSYw==", + "dev": true, + "requires": { + "whatwg-encoding": "^1.0.1" + } + }, + "http-signature": { + "version": "1.2.0", + "resolved": "https://registry.npmjs.org/http-signature/-/http-signature-1.2.0.tgz", + "integrity": "sha1-muzZJRFHcvPZW2WmCruPfBj7rOE=", + "dev": true, + "requires": { + "assert-plus": "^1.0.0", + "jsprim": "^1.2.2", + "sshpk": "^1.7.0" + } + }, + "iconv-lite": { + "version": "0.4.24", + "resolved": "https://registry.npmjs.org/iconv-lite/-/iconv-lite-0.4.24.tgz", + "integrity": "sha512-v3MXnZAcvnywkTUEZomIActle7RXXeedOR31wwl7VlyoXO4Qi9arvSenNQWne1TcRwhCL1HwLI21bEqdpj8/rA==", + "dev": true, + "requires": { + "safer-buffer": ">= 2.1.2 < 3" + } + }, "invert-kv": { "version": "1.0.0", "resolved": "https://registry.npmjs.org/invert-kv/-/invert-kv-1.0.0.tgz", "integrity": "sha1-EEqOSqym09jNFXqO+L+rLXo//bY=", "dev": true }, + "ip-regex": { + "version": "2.1.0", + "resolved": "https://registry.npmjs.org/ip-regex/-/ip-regex-2.1.0.tgz", + "integrity": "sha1-+ni/XS5pE8kRzp+BnuUUa7bYROk=", + "dev": true + }, "is-fullwidth-code-point": { "version": "2.0.0", "resolved": "https://registry.npmjs.org/is-fullwidth-code-point/-/is-fullwidth-code-point-2.0.0.tgz", @@ -127,12 +468,82 @@ "integrity": "sha1-EtSj3U5o4Lec6428hBc66A2RykQ=", "dev": true }, + "is-typedarray": { + "version": "1.0.0", + "resolved": "https://registry.npmjs.org/is-typedarray/-/is-typedarray-1.0.0.tgz", + "integrity": "sha1-5HnICFjfDBsR3dppQPlgEfzaSpo=", + "dev": true + }, "isexe": { "version": "2.0.0", "resolved": "https://registry.npmjs.org/isexe/-/isexe-2.0.0.tgz", "integrity": "sha1-6PvzdNxVb/iUehDcsFctYz8s+hA=", "dev": true }, + "isstream": { + "version": "0.1.2", + "resolved": "https://registry.npmjs.org/isstream/-/isstream-0.1.2.tgz", + "integrity": "sha1-R+Y/evVa+m+S4VAOaQ64uFKcCZo=", + "dev": true + }, + "jsbn": { + "version": "0.1.1", + "resolved": "https://registry.npmjs.org/jsbn/-/jsbn-0.1.1.tgz", + "integrity": "sha1-peZUwuWi3rXyAdls77yoDA7y9RM=", + "dev": true + }, + "jsdom": { + "version": "15.2.0", + "resolved": "https://registry.npmjs.org/jsdom/-/jsdom-15.2.0.tgz", + "integrity": "sha512-+hRyEfjRPFwTYMmSQ3/f7U9nP8ZNZmbkmUek760ZpxnCPWJIhaaLRuUSvpJ36fZKCGENxLwxClzwpOpnXNfChQ==", + "dev": true, + "requires": { + "abab": "^2.0.0", + "acorn": "^7.1.0", + "acorn-globals": "^4.3.2", + "array-equal": "^1.0.0", + "cssom": "^0.4.1", + "cssstyle": "^2.0.0", + "data-urls": "^1.1.0", + "domexception": "^1.0.1", + "escodegen": "^1.11.1", + "html-encoding-sniffer": "^1.0.2", + "nwsapi": "^2.1.4", + "parse5": "5.1.0", + "pn": "^1.1.0", + "request": "^2.88.0", + "request-promise-native": "^1.0.7", + "saxes": "^3.1.9", + "symbol-tree": "^3.2.2", + "tough-cookie": "^3.0.1", + "w3c-hr-time": "^1.0.1", + "w3c-xmlserializer": "^1.1.2", + "webidl-conversions": "^4.0.2", + "whatwg-encoding": "^1.0.5", + "whatwg-mimetype": "^2.3.0", + "whatwg-url": "^7.0.0", + "ws": "^7.0.0", + "xml-name-validator": "^3.0.0" + } + }, + "json-schema": { + "version": "0.2.3", + "resolved": "https://registry.npmjs.org/json-schema/-/json-schema-0.2.3.tgz", + "integrity": "sha1-tIDIkuWaLwWVTOcnvT8qTogvnhM=", + "dev": true + }, + "json-schema-traverse": { + "version": "0.4.1", + "resolved": "https://registry.npmjs.org/json-schema-traverse/-/json-schema-traverse-0.4.1.tgz", + "integrity": "sha512-xbbCH5dCYU5T8LcEhhuh7HJ88HXuW3qsI3Y0zOZFKfZEHcpWiHU/Jxzk629Brsab/mMiHQti9wMP+845RPe3Vg==", + "dev": true + }, + "json-stringify-safe": { + "version": "5.0.1", + "resolved": "https://registry.npmjs.org/json-stringify-safe/-/json-stringify-safe-5.0.1.tgz", + "integrity": "sha1-Epai1Y/UXxmg9s4B1lcB4sc1tus=", + "dev": true + }, "jsonfile": { "version": "4.0.0", "resolved": "https://registry.npmjs.org/jsonfile/-/jsonfile-4.0.0.tgz", @@ -142,6 +553,18 @@ "graceful-fs": "^4.1.6" } }, + "jsprim": { + "version": "1.4.1", + "resolved": "https://registry.npmjs.org/jsprim/-/jsprim-1.4.1.tgz", + "integrity": "sha1-MT5mvB5cwG5Di8G3SZwuXFastqI=", + "dev": true, + "requires": { + "assert-plus": "1.0.0", + "extsprintf": "1.3.0", + "json-schema": "0.2.3", + "verror": "1.10.0" + } + }, "lcid": { "version": "1.0.0", "resolved": "https://registry.npmjs.org/lcid/-/lcid-1.0.0.tgz", @@ -151,6 +574,16 @@ "invert-kv": "^1.0.0" } }, + "levn": { + "version": "0.3.0", + "resolved": "https://registry.npmjs.org/levn/-/levn-0.3.0.tgz", + "integrity": "sha1-OwmSTt+fCDwEkP3UwLxEIeBHZO4=", + "dev": true, + "requires": { + "prelude-ls": "~1.1.2", + "type-check": "~0.3.2" + } + }, "locate-path": { "version": "2.0.0", "resolved": "https://registry.npmjs.org/locate-path/-/locate-path-2.0.0.tgz", @@ -161,6 +594,18 @@ "path-exists": "^3.0.0" } }, + "lodash": { + "version": "4.17.15", + "resolved": "https://registry.npmjs.org/lodash/-/lodash-4.17.15.tgz", + "integrity": "sha512-8xOcRHvCjnocdS5cpwXQXVzmmh5e5+saE2QGoeQmbKmRS6J3VQppPOIt0MnmE+4xlZoumy0GPG0D0MVIQbNA1A==", + "dev": true + }, + "lodash.sortby": { + "version": "4.7.0", + "resolved": "https://registry.npmjs.org/lodash.sortby/-/lodash.sortby-4.7.0.tgz", + "integrity": "sha1-7dFMgk4sycHgsKG0K7UhBRakJDg=", + "dev": true + }, "lru-cache": { "version": "4.1.5", "resolved": "https://registry.npmjs.org/lru-cache/-/lru-cache-4.1.5.tgz", @@ -180,6 +625,21 @@ "mimic-fn": "^1.0.0" } }, + "mime-db": { + "version": "1.40.0", + "resolved": "https://registry.npmjs.org/mime-db/-/mime-db-1.40.0.tgz", + "integrity": "sha512-jYdeOMPy9vnxEqFRRo6ZvTZ8d9oPb+k18PKoYNYUe2stVEBPPwsln/qWzdbmaIvnhZ9v2P+CuecK+fpUfsV2mA==", + "dev": true + }, + "mime-types": { + "version": "2.1.24", + "resolved": "https://registry.npmjs.org/mime-types/-/mime-types-2.1.24.tgz", + "integrity": "sha512-WaFHS3MCl5fapm3oLxU4eYDw77IQM2ACcxQ9RIxfaC3ooc6PFuBMGZZsYpvoXS5D5QTWPieo1jjLdAm3TBP3cQ==", + "dev": true, + "requires": { + "mime-db": "1.40.0" + } + }, "mimic-fn": { "version": "1.2.0", "resolved": "https://registry.npmjs.org/mimic-fn/-/mimic-fn-1.2.0.tgz", @@ -201,6 +661,32 @@ "integrity": "sha1-CXtgK1NCKlIsGvuHkDGDNpQaAR0=", "dev": true }, + "nwsapi": { + "version": "2.1.4", + "resolved": "https://registry.npmjs.org/nwsapi/-/nwsapi-2.1.4.tgz", + "integrity": "sha512-iGfd9Y6SFdTNldEy2L0GUhcarIutFmk+MPWIn9dmj8NMIup03G08uUF2KGbbmv/Ux4RT0VZJoP/sVbWA6d/VIw==", + "dev": true + }, + "oauth-sign": { + "version": "0.9.0", + "resolved": "https://registry.npmjs.org/oauth-sign/-/oauth-sign-0.9.0.tgz", + "integrity": "sha512-fexhUFFPTGV8ybAtSIGbV6gOkSv8UtRbDBnAyLQw4QPKkgNlsH2ByPGtMUqdWkos6YCRmAqViwgZrJc/mRDzZQ==", + "dev": true + }, + "optionator": { + "version": "0.8.2", + "resolved": "https://registry.npmjs.org/optionator/-/optionator-0.8.2.tgz", + "integrity": "sha1-NkxeQJ0/TWMB1sC0wFu6UBgK62Q=", + "dev": true, + "requires": { + "deep-is": "~0.1.3", + "fast-levenshtein": "~2.0.4", + "levn": "~0.3.0", + "prelude-ls": "~1.1.2", + "type-check": "~0.3.2", + "wordwrap": "~1.0.0" + } + }, "os-locale": { "version": "2.1.0", "resolved": "https://registry.npmjs.org/os-locale/-/os-locale-2.1.0.tgz", @@ -242,6 +728,12 @@ "integrity": "sha1-y8ec26+P1CKOE/Yh8rGiN8GyB7M=", "dev": true }, + "parse5": { + "version": "5.1.0", + "resolved": "https://registry.npmjs.org/parse5/-/parse5-5.1.0.tgz", + "integrity": "sha512-fxNG2sQjHvlVAYmzBZS9YlDp6PTSSDwa98vkD4QgVDDCAo84z5X1t5XyJQ62ImdLXx5NdIIfihey6xpum9/gRQ==", + "dev": true + }, "path-exists": { "version": "3.0.0", "resolved": "https://registry.npmjs.org/path-exists/-/path-exists-3.0.0.tgz", @@ -254,12 +746,126 @@ "integrity": "sha1-QRyttXTFoUDTpLGRDUDYDMn0C0A=", "dev": true }, + "performance-now": { + "version": "2.1.0", + "resolved": "https://registry.npmjs.org/performance-now/-/performance-now-2.1.0.tgz", + "integrity": "sha1-Ywn04OX6kT7BxpMHrjZLSzd8nns=", + "dev": true + }, + "pn": { + "version": "1.1.0", + "resolved": "https://registry.npmjs.org/pn/-/pn-1.1.0.tgz", + "integrity": "sha512-2qHaIQr2VLRFoxe2nASzsV6ef4yOOH+Fi9FBOVH6cqeSgUnoyySPZkxzLuzd+RYOQTRpROA0ztTMqxROKSb/nA==", + "dev": true + }, + "prelude-ls": { + "version": "1.1.2", + "resolved": "https://registry.npmjs.org/prelude-ls/-/prelude-ls-1.1.2.tgz", + "integrity": "sha1-IZMqVJ9eUv/ZqCf1cOBL5iqX2lQ=", + "dev": true + }, "pseudomap": { "version": "1.0.2", "resolved": "https://registry.npmjs.org/pseudomap/-/pseudomap-1.0.2.tgz", "integrity": "sha1-8FKijacOYYkX7wqKw0wa5aaChrM=", "dev": true }, + "psl": { + "version": "1.4.0", + "resolved": "https://registry.npmjs.org/psl/-/psl-1.4.0.tgz", + "integrity": "sha512-HZzqCGPecFLyoRj5HLfuDSKYTJkAfB5thKBIkRHtGjWwY7p1dAyveIbXIq4tO0KYfDF2tHqPUgY9SDnGm00uFw==", + "dev": true + }, + "punycode": { + "version": "2.1.1", + "resolved": "https://registry.npmjs.org/punycode/-/punycode-2.1.1.tgz", + "integrity": "sha512-XRsRjdf+j5ml+y/6GKHPZbrF/8p2Yga0JPtdqTIY2Xe5ohJPD9saDJJLPvp9+NSBprVvevdXZybnj2cv8OEd0A==", + "dev": true + }, + "qs": { + "version": "6.5.2", + "resolved": "https://registry.npmjs.org/qs/-/qs-6.5.2.tgz", + "integrity": "sha512-N5ZAX4/LxJmF+7wN74pUD6qAh9/wnvdQcjq9TZjevvXzSUo7bfmw91saqMjzGS2xq91/odN2dW/WOl7qQHNDGA==", + "dev": true + }, + "request": { + "version": "2.88.0", + "resolved": "https://registry.npmjs.org/request/-/request-2.88.0.tgz", + "integrity": "sha512-NAqBSrijGLZdM0WZNsInLJpkJokL72XYjUpnB0iwsRgxh7dB6COrHnTBNwN0E+lHDAJzu7kLAkDeY08z2/A0hg==", + "dev": true, + "requires": { + "aws-sign2": "~0.7.0", + "aws4": "^1.8.0", + "caseless": "~0.12.0", + "combined-stream": "~1.0.6", + "extend": "~3.0.2", + "forever-agent": "~0.6.1", + "form-data": "~2.3.2", + "har-validator": "~5.1.0", + "http-signature": "~1.2.0", + "is-typedarray": "~1.0.0", + "isstream": "~0.1.2", + "json-stringify-safe": "~5.0.1", + "mime-types": "~2.1.19", + "oauth-sign": "~0.9.0", + "performance-now": "^2.1.0", + "qs": "~6.5.2", + "safe-buffer": "^5.1.2", + "tough-cookie": "~2.4.3", + "tunnel-agent": "^0.6.0", + "uuid": "^3.3.2" + }, + "dependencies": { + "punycode": { + "version": "1.4.1", + "resolved": "https://registry.npmjs.org/punycode/-/punycode-1.4.1.tgz", + "integrity": "sha1-wNWmOycYgArY4esPpSachN1BhF4=", + "dev": true + }, + "tough-cookie": { + "version": "2.4.3", + "resolved": "https://registry.npmjs.org/tough-cookie/-/tough-cookie-2.4.3.tgz", + "integrity": "sha512-Q5srk/4vDM54WJsJio3XNn6K2sCG+CQ8G5Wz6bZhRZoAe/+TxjWB/GlFAnYEbkYVlON9FMk/fE3h2RLpPXo4lQ==", + "dev": true, + "requires": { + "psl": "^1.1.24", + "punycode": "^1.4.1" + } + } + } + }, + "request-promise-core": { + "version": "1.1.2", + "resolved": "https://registry.npmjs.org/request-promise-core/-/request-promise-core-1.1.2.tgz", + "integrity": "sha512-UHYyq1MO8GsefGEt7EprS8UrXsm1TxEvFUX1IMTuSLU2Rh7fTIdFtl8xD7JiEYiWU2dl+NYAjCTksTehQUxPag==", + "dev": true, + "requires": { + "lodash": "^4.17.11" + } + }, + "request-promise-native": { + "version": "1.0.7", + "resolved": "https://registry.npmjs.org/request-promise-native/-/request-promise-native-1.0.7.tgz", + "integrity": "sha512-rIMnbBdgNViL37nZ1b3L/VfPOpSi0TqVDQPAvO6U14lMzOLrt5nilxCQqtDKhZeDiW0/hkCXGoQjhgJd/tCh6w==", + "dev": true, + "requires": { + "request-promise-core": "1.1.2", + "stealthy-require": "^1.1.1", + "tough-cookie": "^2.3.3" + }, + "dependencies": { + "tough-cookie": { + "version": "2.5.0", + "resolved": "https://registry.npmjs.org/tough-cookie/-/tough-cookie-2.5.0.tgz", + "integrity": "sha512-nlLsUzgm1kfLXSXfRZMc1KLAugd4hqJHDTvc2hDIwS3mZAfMEuMbc03SujMF+GEcpaX/qboeycw6iO8JwVv2+g==", + "dev": true, + "requires": { + "psl": "^1.1.28", + "punycode": "^2.1.1" + } + } + } + }, "require-directory": { "version": "2.1.1", "resolved": "https://registry.npmjs.org/require-directory/-/require-directory-2.1.1.tgz", @@ -272,6 +878,27 @@ "integrity": "sha1-l/cXtp1IeE9fUmpsWqj/3aBVpNE=", "dev": true }, + "safe-buffer": { + "version": "5.2.0", + "resolved": "https://registry.npmjs.org/safe-buffer/-/safe-buffer-5.2.0.tgz", + "integrity": "sha512-fZEwUGbVl7kouZs1jCdMLdt95hdIv0ZeHg6L7qPeciMZhZ+/gdesW4wgTARkrFWEpspjEATAzUGPG8N2jJiwbg==", + "dev": true + }, + "safer-buffer": { + "version": "2.1.2", + "resolved": "https://registry.npmjs.org/safer-buffer/-/safer-buffer-2.1.2.tgz", + "integrity": "sha512-YZo3K82SD7Riyi0E1EQPojLz7kpepnSQI9IyPbHHg1XXXevb5dJI7tpyN2ADxGcQbHG7vcyRHk0cbwqcQriUtg==", + "dev": true + }, + "saxes": { + "version": "3.1.11", + "resolved": "https://registry.npmjs.org/saxes/-/saxes-3.1.11.tgz", + "integrity": "sha512-Ydydq3zC+WYDJK1+gRxRapLIED9PWeSuuS41wqyoRmzvhhh9nc+QQrVMKJYzJFULazeGhzSV0QleN2wD3boh2g==", + "dev": true, + "requires": { + "xmlchars": "^2.1.1" + } + }, "set-blocking": { "version": "2.0.0", "resolved": "https://registry.npmjs.org/set-blocking/-/set-blocking-2.0.0.tgz", @@ -308,6 +935,36 @@ "integrity": "sha1-tf3AjxKH6hF4Yo5BXiUTK3NkbG0=", "dev": true }, + "source-map": { + "version": "0.6.1", + "resolved": "https://registry.npmjs.org/source-map/-/source-map-0.6.1.tgz", + "integrity": "sha512-UjgapumWlbMhkBgzT7Ykc5YXUT46F0iKu8SGXq0bcwP5dz/h0Plj6enJqjz1Zbq2l5WaqYnrVbwWOWMyF3F47g==", + "dev": true, + "optional": true + }, + "sshpk": { + "version": "1.16.1", + "resolved": "https://registry.npmjs.org/sshpk/-/sshpk-1.16.1.tgz", + "integrity": "sha512-HXXqVUq7+pcKeLqqZj6mHFUMvXtOJt1uoUx09pFW6011inTMxqI8BA8PM95myrIyyKwdnzjdFjLiE6KBPVtJIg==", + "dev": true, + "requires": { + "asn1": "~0.2.3", + "assert-plus": "^1.0.0", + "bcrypt-pbkdf": "^1.0.0", + "dashdash": "^1.12.0", + "ecc-jsbn": "~0.1.1", + "getpass": "^0.1.1", + "jsbn": "~0.1.0", + "safer-buffer": "^2.0.2", + "tweetnacl": "~0.14.0" + } + }, + "stealthy-require": { + "version": "1.1.1", + "resolved": "https://registry.npmjs.org/stealthy-require/-/stealthy-require-1.1.1.tgz", + "integrity": "sha1-NbCYdbT/SfJqd35QmzCQoyJr8ks=", + "dev": true + }, "string-width": { "version": "2.1.1", "resolved": "https://registry.npmjs.org/string-width/-/string-width-2.1.1.tgz", @@ -333,12 +990,140 @@ "integrity": "sha1-u0P/VZim6wXYm1n80SnJgzE2Br8=", "dev": true }, + "symbol-tree": { + "version": "3.2.4", + "resolved": "https://registry.npmjs.org/symbol-tree/-/symbol-tree-3.2.4.tgz", + "integrity": "sha512-9QNk5KwDF+Bvz+PyObkmSYjI5ksVUYtjW7AU22r2NKcfLJcXp96hkDWU3+XndOsUb+AQ9QhfzfCT2O+CNWT5Tw==", + "dev": true + }, + "tough-cookie": { + "version": "3.0.1", + "resolved": "https://registry.npmjs.org/tough-cookie/-/tough-cookie-3.0.1.tgz", + "integrity": "sha512-yQyJ0u4pZsv9D4clxO69OEjLWYw+jbgspjTue4lTQZLfV0c5l1VmK2y1JK8E9ahdpltPOaAThPcp5nKPUgSnsg==", + "dev": true, + "requires": { + "ip-regex": "^2.1.0", + "psl": "^1.1.28", + "punycode": "^2.1.1" + } + }, + "tr46": { + "version": "1.0.1", + "resolved": "https://registry.npmjs.org/tr46/-/tr46-1.0.1.tgz", + "integrity": "sha1-qLE/1r/SSJUZZ0zN5VujaTtwbQk=", + "dev": true, + "requires": { + "punycode": "^2.1.0" + } + }, + "tunnel-agent": { + "version": "0.6.0", + "resolved": "https://registry.npmjs.org/tunnel-agent/-/tunnel-agent-0.6.0.tgz", + "integrity": "sha1-J6XeoGs2sEoKmWZ3SykIaPD8QP0=", + "dev": true, + "requires": { + "safe-buffer": "^5.0.1" + } + }, + "tweetnacl": { + "version": "0.14.5", + "resolved": "https://registry.npmjs.org/tweetnacl/-/tweetnacl-0.14.5.tgz", + "integrity": "sha1-WuaBd/GS1EViadEIr6k/+HQ/T2Q=", + "dev": true + }, + "type-check": { + "version": "0.3.2", + "resolved": "https://registry.npmjs.org/type-check/-/type-check-0.3.2.tgz", + "integrity": "sha1-WITKtRLPHTVeP7eE8wgEsrUg23I=", + "dev": true, + "requires": { + "prelude-ls": "~1.1.2" + } + }, "universalify": { "version": "0.1.2", "resolved": "https://registry.npmjs.org/universalify/-/universalify-0.1.2.tgz", "integrity": "sha512-rBJeI5CXAlmy1pV+617WB9J63U6XcazHHF2f2dbJix4XzpUF0RS3Zbj0FGIOCAva5P/d/GBOYaACQ1w+0azUkg==", "dev": true }, + "uri-js": { + "version": "4.2.2", + "resolved": "https://registry.npmjs.org/uri-js/-/uri-js-4.2.2.tgz", + "integrity": "sha512-KY9Frmirql91X2Qgjry0Wd4Y+YTdrdZheS8TFwvkbLWf/G5KNJDCh6pKL5OZctEW4+0Baa5idK2ZQuELRwPznQ==", + "dev": true, + "requires": { + "punycode": "^2.1.0" + } + }, + "uuid": { + "version": "3.3.3", + "resolved": "https://registry.npmjs.org/uuid/-/uuid-3.3.3.tgz", + "integrity": "sha512-pW0No1RGHgzlpHJO1nsVrHKpOEIxkGg1xB+v0ZmdNH5OAeAwzAVrCnI2/6Mtx+Uys6iaylxa+D3g4j63IKKjSQ==", + "dev": true + }, + "verror": { + "version": "1.10.0", + "resolved": "https://registry.npmjs.org/verror/-/verror-1.10.0.tgz", + "integrity": "sha1-OhBcoXBTr1XW4nDB+CiGguGNpAA=", + "dev": true, + "requires": { + "assert-plus": "^1.0.0", + "core-util-is": "1.0.2", + "extsprintf": "^1.2.0" + } + }, + "w3c-hr-time": { + "version": "1.0.1", + "resolved": "https://registry.npmjs.org/w3c-hr-time/-/w3c-hr-time-1.0.1.tgz", + "integrity": "sha1-gqwr/2PZUOqeMYmlimViX+3xkEU=", + "dev": true, + "requires": { + "browser-process-hrtime": "^0.1.2" + } + }, + "w3c-xmlserializer": { + "version": "1.1.2", + "resolved": "https://registry.npmjs.org/w3c-xmlserializer/-/w3c-xmlserializer-1.1.2.tgz", + "integrity": "sha512-p10l/ayESzrBMYWRID6xbuCKh2Fp77+sA0doRuGn4tTIMrrZVeqfpKjXHY+oDh3K4nLdPgNwMTVP6Vp4pvqbNg==", + "dev": true, + "requires": { + "domexception": "^1.0.1", + "webidl-conversions": "^4.0.2", + "xml-name-validator": "^3.0.0" + } + }, + "webidl-conversions": { + "version": "4.0.2", + "resolved": "https://registry.npmjs.org/webidl-conversions/-/webidl-conversions-4.0.2.tgz", + "integrity": "sha512-YQ+BmxuTgd6UXZW3+ICGfyqRyHXVlD5GtQr5+qjiNW7bF0cqrzX500HVXPBOvgXb5YnzDd+h0zqyv61KUD7+Sg==", + "dev": true + }, + "whatwg-encoding": { + "version": "1.0.5", + "resolved": "https://registry.npmjs.org/whatwg-encoding/-/whatwg-encoding-1.0.5.tgz", + "integrity": "sha512-b5lim54JOPN9HtzvK9HFXvBma/rnfFeqsic0hSpjtDbVxR3dJKLc+KB4V6GgiGOvl7CY/KNh8rxSo9DKQrnUEw==", + "dev": true, + "requires": { + "iconv-lite": "0.4.24" + } + }, + "whatwg-mimetype": { + "version": "2.3.0", + "resolved": "https://registry.npmjs.org/whatwg-mimetype/-/whatwg-mimetype-2.3.0.tgz", + "integrity": "sha512-M4yMwr6mAnQz76TbJm914+gPpB/nCwvZbJU28cUD6dR004SAxDLOOSUaB1JDRqLtaOV/vi0IC5lEAGFgrjGv/g==", + "dev": true + }, + "whatwg-url": { + "version": "7.1.0", + "resolved": "https://registry.npmjs.org/whatwg-url/-/whatwg-url-7.1.0.tgz", + "integrity": "sha512-WUu7Rg1DroM7oQvGWfOiAK21n74Gg+T4elXEQYkOhtyLeWiJFoOGLXPKI/9gzIie9CtwVLm8wtw6YJdKyxSjeg==", + "dev": true, + "requires": { + "lodash.sortby": "^4.7.0", + "tr46": "^1.0.1", + "webidl-conversions": "^4.0.2" + } + }, "which": { "version": "1.3.1", "resolved": "https://registry.npmjs.org/which/-/which-1.3.1.tgz", @@ -354,6 +1139,12 @@ "integrity": "sha1-2e8H3Od7mQK4o6j6SzHD4/fm6Ho=", "dev": true }, + "wordwrap": { + "version": "1.0.0", + "resolved": "https://registry.npmjs.org/wordwrap/-/wordwrap-1.0.0.tgz", + "integrity": "sha1-J1hIEIkUVqQXHI0CJkQa3pDLyus=", + "dev": true + }, "wrap-ansi": { "version": "2.1.0", "resolved": "https://registry.npmjs.org/wrap-ansi/-/wrap-ansi-2.1.0.tgz", @@ -401,6 +1192,27 @@ } } }, + "ws": { + "version": "7.2.0", + "resolved": "https://registry.npmjs.org/ws/-/ws-7.2.0.tgz", + "integrity": "sha512-+SqNqFbwTm/0DC18KYzIsMTnEWpLwJsiasW/O17la4iDRRIO9uaHbvKiAS3AHgTiuuWerK/brj4O6MYZkei9xg==", + "dev": true, + "requires": { + "async-limiter": "^1.0.0" + } + }, + "xml-name-validator": { + "version": "3.0.0", + "resolved": "https://registry.npmjs.org/xml-name-validator/-/xml-name-validator-3.0.0.tgz", + "integrity": "sha512-A5CUptxDsvxKJEU3yO6DuWBSJz/qizqzJKOMIfUJHETbBw/sFaDxgd6fxm1ewUaM0jZ444Fc5vC5ROYurg/4Pw==", + "dev": true + }, + "xmlchars": { + "version": "2.2.0", + "resolved": "https://registry.npmjs.org/xmlchars/-/xmlchars-2.2.0.tgz", + "integrity": "sha512-JZnDKK8B0RCDw84FNdDAIpZK+JuJw+s7Lz8nksI7SIuU3UXJJslUthsi+uWBUYOwPFwW7W7PRLRfUKpxjtjFCw==", + "dev": true + }, "y18n": { "version": "3.2.1", "resolved": "https://registry.npmjs.org/y18n/-/y18n-3.2.1.tgz", diff --git a/src/package.json b/src/package.json index 7b5b4031af5..36ad3e185ee 100644 --- a/src/package.json +++ b/src/package.json @@ -19,6 +19,7 @@ "devDependencies": { "ejs": "^2.7.1", "fs-extra": "^8.1.0", + "jsdom": "^15.2.0", "showdown": "^1.9.0" } } diff --git a/src/templates/en/2019/chapter.html b/src/templates/en/2019/chapter.html index 5b7cd62f475..2a77691eb1d 100644 --- a/src/templates/en/2019/chapter.html +++ b/src/templates/en/2019/chapter.html @@ -18,9 +18,13 @@ {% set metadata = <%- JSON.stringify(metadata) %> %} {% block main %} + +

{{ metadata.get('title') }}

<%- body %>
-{% endblock %} +{% endblock %} \ No newline at end of file diff --git a/src/tools/generate/generate_chapters.js b/src/tools/generate/generate_chapters.js index 1c695e6aa90..d735ac205bb 100644 --- a/src/tools/generate/generate_chapters.js +++ b/src/tools/generate/generate_chapters.js @@ -1,6 +1,7 @@ const fs = require('fs-extra'); const showdown = require('showdown'); const ejs = require('ejs'); +const { generate_table_of_contents } = require('./generate_table_of_contents'); const converter = new showdown.Converter({ tables: true, metadata: true }); converter.setFlavor('github'); @@ -14,9 +15,9 @@ const generate_chapters = async () => { console.log(`\n Generating chapter: ${language}, ${year}, ${chapter}`); - let { metadata, body } = await parse_file(markdown); + let { metadata, body, toc } = await parse_file(markdown); - await write_template(language, year, chapter, metadata, body); + await write_template(language, year, chapter, metadata, body, toc); } } } @@ -24,6 +25,7 @@ const generate_chapters = async () => { const parse_file = async (markdown) => { const body = converter.makeHtml(markdown); + const toc = generate_table_of_contents(body); const m = converter.getMetadata(); const chapter_number = Number(m.chapter_number); @@ -37,14 +39,14 @@ const parse_file = async (markdown) => { reviewers }; - return { metadata, body }; + return { metadata, body, toc }; }; -const write_template = async (language, year, chapter, metadata, body) => { +const write_template = async (language, year, chapter, metadata, body, toc) => { const template = `templates/${language}/${year}/chapter.html`; const path = `templates/${language}/${year}/chapters/${chapter}.html`; - let html = await ejs.renderFile(template, { metadata, body }); + let html = await ejs.renderFile(template, { metadata, body, toc }); await fs.outputFile(path, html, 'utf8'); diff --git a/src/tools/generate/generate_table_of_contents.js b/src/tools/generate/generate_table_of_contents.js new file mode 100644 index 00000000000..84a1f3c0dc4 --- /dev/null +++ b/src/tools/generate/generate_table_of_contents.js @@ -0,0 +1,82 @@ +const { JSDOM } = require('jsdom'); + +const generate_table_of_contents = (html) => { + const dom = new JSDOM(html); + const all_headings = Object.values( + dom.window.document.querySelectorAll('h1, h2, h3, h4, h5, h6') + ); + const starting_level = get_level(all_headings[0]); + const nested_headings = nest_headings(all_headings, starting_level); + console.log(JSON.stringify(nested_headings)); + const toc = generate_html(nested_headings.children); + + return toc; +}; + +const generate_html = (headings) => { + const list = []; + for (const heading of headings) { + const a = `${heading.title}`; + const children = heading.children ? generate_html(heading.children) : ''; + const li = `
  • ${a}${children}
  • `; + list.push(li); + } + + return ``; +}; + +// This is a recursive function to nest the headings. +const nest_headings = (source, current_level = 1) => { + // The list of headings to output. + let target = []; + + while (source.length) { + // Pull the first item off of the source list. + const element = source.shift(); + const id = element.id; + const title = element.textContent; + const level = get_level(element); + + const heading = { + id, + level, + title + }; + + if (level === current_level) { + // The heading is at this level, add it to the list. + target.push(heading); + } else if (level > current_level) { + /* The heading needs to be added to the next level. + - Get the last item on the list, that becomes the parent. + - Use the rest of the source list to recurse and generate the + rest of the children. + - Set the children property of the parent heading, including + * Previous children + * The 'current' heading) + * The recursively generated children + */ + + const parent = target[target.length - 1]; + let { children, nextHeading } = nest_headings(source, level); + parent.children = [...(parent.children || []), heading, ...children]; + if (nextHeading) { + target.push(nextHeading); + } + } else { + /* The next item on the source is at a higher level, break out of this + level of recursion. + */ + + return { children: target, nextHeading: heading }; + } + } + + return { children: target }; +}; + +const get_level = (element) => Number(element.localName.match(/\d+/)[0]); + +module.exports = { + generate_table_of_contents +}; From 831f1e6f09bee7db4c518c889b9099df7ccbc423 Mon Sep 17 00:00:00 2001 From: Mike Geyser Date: Thu, 24 Oct 2019 13:48:51 +0200 Subject: [PATCH 03/15] Removed console.log. --- src/tools/generate/generate_table_of_contents.js | 1 - 1 file changed, 1 deletion(-) diff --git a/src/tools/generate/generate_table_of_contents.js b/src/tools/generate/generate_table_of_contents.js index 84a1f3c0dc4..0b4863e3402 100644 --- a/src/tools/generate/generate_table_of_contents.js +++ b/src/tools/generate/generate_table_of_contents.js @@ -7,7 +7,6 @@ const generate_table_of_contents = (html) => { ); const starting_level = get_level(all_headings[0]); const nested_headings = nest_headings(all_headings, starting_level); - console.log(JSON.stringify(nested_headings)); const toc = generate_html(nested_headings.children); return toc; From c79300f07807357777d8f4b947c79821cbff5df0 Mon Sep 17 00:00:00 2001 From: Mike Geyser Date: Thu, 24 Oct 2019 13:47:50 +0200 Subject: [PATCH 04/15] Added another generate script to generate a table of contents. --- src/package-lock.json | 812 ++++++++++++++++++ src/package.json | 1 + src/templates/en/2019/chapter.html | 6 +- src/tools/generate/generate_chapters.js | 12 +- .../generate/generate_table_of_contents.js | 82 ++ 5 files changed, 907 insertions(+), 6 deletions(-) create mode 100644 src/tools/generate/generate_table_of_contents.js diff --git a/src/package-lock.json b/src/package-lock.json index 659875f2b39..0a7e0609bcb 100644 --- a/src/package-lock.json +++ b/src/package-lock.json @@ -4,18 +4,132 @@ "lockfileVersion": 1, "requires": true, "dependencies": { + "abab": { + "version": "2.0.2", + "resolved": "https://registry.npmjs.org/abab/-/abab-2.0.2.tgz", + "integrity": "sha512-2scffjvioEmNz0OyDSLGWDfKCVwaKc6l9Pm9kOIREU13ClXZvHpg/nRL5xyjSSSLhOnXqft2HpsAzNEEA8cFFg==", + "dev": true + }, + "acorn": { + "version": "7.1.0", + "resolved": "https://registry.npmjs.org/acorn/-/acorn-7.1.0.tgz", + "integrity": "sha512-kL5CuoXA/dgxlBbVrflsflzQ3PAas7RYZB52NOm/6839iVYJgKMJ3cQJD+t2i5+qFa8h3MDpEOJiS64E8JLnSQ==", + "dev": true + }, + "acorn-globals": { + "version": "4.3.4", + "resolved": "https://registry.npmjs.org/acorn-globals/-/acorn-globals-4.3.4.tgz", + "integrity": "sha512-clfQEh21R+D0leSbUdWf3OcfqyaCSAQ8Ryq00bofSekfr9W8u1jyYZo6ir0xu9Gtcf7BjcHJpnbZH7JOCpP60A==", + "dev": true, + "requires": { + "acorn": "^6.0.1", + "acorn-walk": "^6.0.1" + }, + "dependencies": { + "acorn": { + "version": "6.3.0", + "resolved": "https://registry.npmjs.org/acorn/-/acorn-6.3.0.tgz", + "integrity": "sha512-/czfa8BwS88b9gWQVhc8eknunSA2DoJpJyTQkhheIf5E48u1N0R4q/YxxsAeqRrmK9TQ/uYfgLDfZo91UlANIA==", + "dev": true + } + } + }, + "acorn-walk": { + "version": "6.2.0", + "resolved": "https://registry.npmjs.org/acorn-walk/-/acorn-walk-6.2.0.tgz", + "integrity": "sha512-7evsyfH1cLOCdAzZAd43Cic04yKydNx0cF+7tiA19p1XnLLPU4dpCQOqpjqwokFe//vS0QqfqqjCS2JkiIs0cA==", + "dev": true + }, + "ajv": { + "version": "6.10.2", + "resolved": "https://registry.npmjs.org/ajv/-/ajv-6.10.2.tgz", + "integrity": "sha512-TXtUUEYHuaTEbLZWIKUr5pmBuhDLy+8KYtPYdcV8qC+pOZL+NKqYwvWSRrVXHn+ZmRRAu8vJTAznH7Oag6RVRw==", + "dev": true, + "requires": { + "fast-deep-equal": "^2.0.1", + "fast-json-stable-stringify": "^2.0.0", + "json-schema-traverse": "^0.4.1", + "uri-js": "^4.2.2" + } + }, "ansi-regex": { "version": "3.0.0", "resolved": "https://registry.npmjs.org/ansi-regex/-/ansi-regex-3.0.0.tgz", "integrity": "sha1-7QMXwyIGT3lGbAKWa922Bas32Zg=", "dev": true }, + "array-equal": { + "version": "1.0.0", + "resolved": "https://registry.npmjs.org/array-equal/-/array-equal-1.0.0.tgz", + "integrity": "sha1-jCpe8kcv2ep0KwTHenUJO6J1fJM=", + "dev": true + }, + "asn1": { + "version": "0.2.4", + "resolved": "https://registry.npmjs.org/asn1/-/asn1-0.2.4.tgz", + "integrity": "sha512-jxwzQpLQjSmWXgwaCZE9Nz+glAG01yF1QnWgbhGwHI5A6FRIEY6IVqtHhIepHqI7/kyEyQEagBC5mBEFlIYvdg==", + "dev": true, + "requires": { + "safer-buffer": "~2.1.0" + } + }, + "assert-plus": { + "version": "1.0.0", + "resolved": "https://registry.npmjs.org/assert-plus/-/assert-plus-1.0.0.tgz", + "integrity": "sha1-8S4PPF13sLHN2RRpQuTpbB5N1SU=", + "dev": true + }, + "async-limiter": { + "version": "1.0.1", + "resolved": "https://registry.npmjs.org/async-limiter/-/async-limiter-1.0.1.tgz", + "integrity": "sha512-csOlWGAcRFJaI6m+F2WKdnMKr4HhdhFVBk0H/QbJFMCr+uO2kwohwXQPxw/9OCxp05r5ghVBFSyioixx3gfkNQ==", + "dev": true + }, + "asynckit": { + "version": "0.4.0", + "resolved": "https://registry.npmjs.org/asynckit/-/asynckit-0.4.0.tgz", + "integrity": "sha1-x57Zf380y48robyXkLzDZkdLS3k=", + "dev": true + }, + "aws-sign2": { + "version": "0.7.0", + "resolved": "https://registry.npmjs.org/aws-sign2/-/aws-sign2-0.7.0.tgz", + "integrity": "sha1-tG6JCTSpWR8tL2+G1+ap8bP+dqg=", + "dev": true + }, + "aws4": { + "version": "1.8.0", + "resolved": "https://registry.npmjs.org/aws4/-/aws4-1.8.0.tgz", + "integrity": "sha512-ReZxvNHIOv88FlT7rxcXIIC0fPt4KZqZbOlivyWtXLt8ESx84zd3kMC6iK5jVeS2qt+g7ftS7ye4fi06X5rtRQ==", + "dev": true + }, + "bcrypt-pbkdf": { + "version": "1.0.2", + "resolved": "https://registry.npmjs.org/bcrypt-pbkdf/-/bcrypt-pbkdf-1.0.2.tgz", + "integrity": "sha1-pDAdOJtqQ/m2f/PKEaP2Y342Dp4=", + "dev": true, + "requires": { + "tweetnacl": "^0.14.3" + } + }, + "browser-process-hrtime": { + "version": "0.1.3", + "resolved": "https://registry.npmjs.org/browser-process-hrtime/-/browser-process-hrtime-0.1.3.tgz", + "integrity": "sha512-bRFnI4NnjO6cnyLmOV/7PVoDEMJChlcfN0z4s1YMBY989/SvlfMI1lgCnkFUs53e9gQF+w7qu7XdllSTiSl8Aw==", + "dev": true + }, "camelcase": { "version": "4.1.0", "resolved": "https://registry.npmjs.org/camelcase/-/camelcase-4.1.0.tgz", "integrity": "sha1-1UVjW+HjPFQmScaRc+Xeas+uNN0=", "dev": true }, + "caseless": { + "version": "0.12.0", + "resolved": "https://registry.npmjs.org/caseless/-/caseless-0.12.0.tgz", + "integrity": "sha1-G2gcIf+EAzyCZUMJBolCDRhxUdw=", + "dev": true + }, "cliui": { "version": "4.1.0", "resolved": "https://registry.npmjs.org/cliui/-/cliui-4.1.0.tgz", @@ -33,6 +147,21 @@ "integrity": "sha1-DQcLTQQ6W+ozovGkDi7bPZpMz3c=", "dev": true }, + "combined-stream": { + "version": "1.0.8", + "resolved": "https://registry.npmjs.org/combined-stream/-/combined-stream-1.0.8.tgz", + "integrity": "sha512-FQN4MRfuJeHf7cBbBMJFXhKSDq+2kAArBlmRBvcvFE5BB1HZKXtSFASDhdlz9zOYwxh8lDdnvmMOe/+5cdoEdg==", + "dev": true, + "requires": { + "delayed-stream": "~1.0.0" + } + }, + "core-util-is": { + "version": "1.0.2", + "resolved": "https://registry.npmjs.org/core-util-is/-/core-util-is-1.0.2.tgz", + "integrity": "sha1-tf1UIgqivFq1eqtxQMlAdUUDwac=", + "dev": true + }, "cross-spawn": { "version": "5.1.0", "resolved": "https://registry.npmjs.org/cross-spawn/-/cross-spawn-5.1.0.tgz", @@ -44,18 +173,123 @@ "which": "^1.2.9" } }, + "cssom": { + "version": "0.4.1", + "resolved": "https://registry.npmjs.org/cssom/-/cssom-0.4.1.tgz", + "integrity": "sha512-6Aajq0XmukE7HdXUU6IoSWuH1H6gH9z6qmagsstTiN7cW2FNTsb+J2Chs+ufPgZCsV/yo8oaEudQLrb9dGxSVQ==", + "dev": true + }, + "cssstyle": { + "version": "2.0.0", + "resolved": "https://registry.npmjs.org/cssstyle/-/cssstyle-2.0.0.tgz", + "integrity": "sha512-QXSAu2WBsSRXCPjvI43Y40m6fMevvyRm8JVAuF9ksQz5jha4pWP1wpaK7Yu5oLFc6+XAY+hj8YhefyXcBB53gg==", + "dev": true, + "requires": { + "cssom": "~0.3.6" + }, + "dependencies": { + "cssom": { + "version": "0.3.8", + "resolved": "https://registry.npmjs.org/cssom/-/cssom-0.3.8.tgz", + "integrity": "sha512-b0tGHbfegbhPJpxpiBPU2sCkigAqtM9O121le6bbOlgyV+NyGyCmVfJ6QW9eRjz8CpNfWEOYBIMIGRYkLwsIYg==", + "dev": true + } + } + }, + "dashdash": { + "version": "1.14.1", + "resolved": "https://registry.npmjs.org/dashdash/-/dashdash-1.14.1.tgz", + "integrity": "sha1-hTz6D3y+L+1d4gMmuN1YEDX24vA=", + "dev": true, + "requires": { + "assert-plus": "^1.0.0" + } + }, + "data-urls": { + "version": "1.1.0", + "resolved": "https://registry.npmjs.org/data-urls/-/data-urls-1.1.0.tgz", + "integrity": "sha512-YTWYI9se1P55u58gL5GkQHW4P6VJBJ5iBT+B5a7i2Tjadhv52paJG0qHX4A0OR6/t52odI64KP2YvFpkDOi3eQ==", + "dev": true, + "requires": { + "abab": "^2.0.0", + "whatwg-mimetype": "^2.2.0", + "whatwg-url": "^7.0.0" + } + }, "decamelize": { "version": "1.2.0", "resolved": "https://registry.npmjs.org/decamelize/-/decamelize-1.2.0.tgz", "integrity": "sha1-9lNNFRSCabIDUue+4m9QH5oZEpA=", "dev": true }, + "deep-is": { + "version": "0.1.3", + "resolved": "https://registry.npmjs.org/deep-is/-/deep-is-0.1.3.tgz", + "integrity": "sha1-s2nW+128E+7PUk+RsHD+7cNXzzQ=", + "dev": true + }, + "delayed-stream": { + "version": "1.0.0", + "resolved": "https://registry.npmjs.org/delayed-stream/-/delayed-stream-1.0.0.tgz", + "integrity": "sha1-3zrhmayt+31ECqrgsp4icrJOxhk=", + "dev": true + }, + "domexception": { + "version": "1.0.1", + "resolved": "https://registry.npmjs.org/domexception/-/domexception-1.0.1.tgz", + "integrity": "sha512-raigMkn7CJNNo6Ihro1fzG7wr3fHuYVytzquZKX5n0yizGsTcYgzdIUwj1X9pK0VvjeihV+XiclP+DjwbsSKug==", + "dev": true, + "requires": { + "webidl-conversions": "^4.0.2" + } + }, + "ecc-jsbn": { + "version": "0.1.2", + "resolved": "https://registry.npmjs.org/ecc-jsbn/-/ecc-jsbn-0.1.2.tgz", + "integrity": "sha1-OoOpBOVDUyh4dMVkt1SThoSamMk=", + "dev": true, + "requires": { + "jsbn": "~0.1.0", + "safer-buffer": "^2.1.0" + } + }, "ejs": { "version": "2.7.1", "resolved": "https://registry.npmjs.org/ejs/-/ejs-2.7.1.tgz", "integrity": "sha512-kS/gEPzZs3Y1rRsbGX4UOSjtP/CeJP0CxSNZHYxGfVM/VgLcv0ZqM7C45YyTj2DI2g7+P9Dd24C+IMIg6D0nYQ==", "dev": true }, + "escodegen": { + "version": "1.12.0", + "resolved": "https://registry.npmjs.org/escodegen/-/escodegen-1.12.0.tgz", + "integrity": "sha512-TuA+EhsanGcme5T3R0L80u4t8CpbXQjegRmf7+FPTJrtCTErXFeelblRgHQa1FofEzqYYJmJ/OqjTwREp9qgmg==", + "dev": true, + "requires": { + "esprima": "^3.1.3", + "estraverse": "^4.2.0", + "esutils": "^2.0.2", + "optionator": "^0.8.1", + "source-map": "~0.6.1" + } + }, + "esprima": { + "version": "3.1.3", + "resolved": "https://registry.npmjs.org/esprima/-/esprima-3.1.3.tgz", + "integrity": "sha1-/cpRzuYTOJXjyI1TXOSdv/YqRjM=", + "dev": true + }, + "estraverse": { + "version": "4.3.0", + "resolved": "https://registry.npmjs.org/estraverse/-/estraverse-4.3.0.tgz", + "integrity": "sha512-39nnKffWz8xN1BU/2c79n9nB9HDzo0niYUqx6xyqUnyoAnQyyWpOTdZEeiCch8BBu515t4wp9ZmgVfVhn9EBpw==", + "dev": true + }, + "esutils": { + "version": "2.0.3", + "resolved": "https://registry.npmjs.org/esutils/-/esutils-2.0.3.tgz", + "integrity": "sha512-kVscqXk4OCp68SZ0dkgEKVi6/8ij300KBWTJq32P/dYeWTSwK41WyTxalN1eRmA5Z9UU/LX9D7FWSmV9SAYx6g==", + "dev": true + }, "execa": { "version": "0.7.0", "resolved": "https://registry.npmjs.org/execa/-/execa-0.7.0.tgz", @@ -71,6 +305,36 @@ "strip-eof": "^1.0.0" } }, + "extend": { + "version": "3.0.2", + "resolved": "https://registry.npmjs.org/extend/-/extend-3.0.2.tgz", + "integrity": "sha512-fjquC59cD7CyW6urNXK0FBufkZcoiGG80wTuPujX590cB5Ttln20E2UB4S/WARVqhXffZl2LNgS+gQdPIIim/g==", + "dev": true + }, + "extsprintf": { + "version": "1.3.0", + "resolved": "https://registry.npmjs.org/extsprintf/-/extsprintf-1.3.0.tgz", + "integrity": "sha1-lpGEQOMEGnpBT4xS48V06zw+HgU=", + "dev": true + }, + "fast-deep-equal": { + "version": "2.0.1", + "resolved": "https://registry.npmjs.org/fast-deep-equal/-/fast-deep-equal-2.0.1.tgz", + "integrity": "sha1-ewUhjd+WZ79/Nwv3/bLLFf3Qqkk=", + "dev": true + }, + "fast-json-stable-stringify": { + "version": "2.0.0", + "resolved": "https://registry.npmjs.org/fast-json-stable-stringify/-/fast-json-stable-stringify-2.0.0.tgz", + "integrity": "sha1-1RQsDK7msRifh9OnYREGT4bIu/I=", + "dev": true + }, + "fast-levenshtein": { + "version": "2.0.6", + "resolved": "https://registry.npmjs.org/fast-levenshtein/-/fast-levenshtein-2.0.6.tgz", + "integrity": "sha1-PYpcZog6FqMMqGQ+hR8Zuqd5eRc=", + "dev": true + }, "find-up": { "version": "2.1.0", "resolved": "https://registry.npmjs.org/find-up/-/find-up-2.1.0.tgz", @@ -80,6 +344,23 @@ "locate-path": "^2.0.0" } }, + "forever-agent": { + "version": "0.6.1", + "resolved": "https://registry.npmjs.org/forever-agent/-/forever-agent-0.6.1.tgz", + "integrity": "sha1-+8cfDEGt6zf5bFd60e1C2P2sypE=", + "dev": true + }, + "form-data": { + "version": "2.3.3", + "resolved": "https://registry.npmjs.org/form-data/-/form-data-2.3.3.tgz", + "integrity": "sha512-1lLKB2Mu3aGP1Q/2eCOx0fNbRMe7XdwktwOruhfqqd0rIJWwN4Dh+E3hrPSlDCXnSR7UtZ1N38rVXm+6+MEhJQ==", + "dev": true, + "requires": { + "asynckit": "^0.4.0", + "combined-stream": "^1.0.6", + "mime-types": "^2.1.12" + } + }, "fs-extra": { "version": "8.1.0", "resolved": "https://registry.npmjs.org/fs-extra/-/fs-extra-8.1.0.tgz", @@ -103,18 +384,78 @@ "integrity": "sha1-jpQ9E1jcN1VQVOy+LtsFqhdO3hQ=", "dev": true }, + "getpass": { + "version": "0.1.7", + "resolved": "https://registry.npmjs.org/getpass/-/getpass-0.1.7.tgz", + "integrity": "sha1-Xv+OPmhNVprkyysSgmBOi6YhSfo=", + "dev": true, + "requires": { + "assert-plus": "^1.0.0" + } + }, "graceful-fs": { "version": "4.2.2", "resolved": "https://registry.npmjs.org/graceful-fs/-/graceful-fs-4.2.2.tgz", "integrity": "sha512-IItsdsea19BoLC7ELy13q1iJFNmd7ofZH5+X/pJr90/nRoPEX0DJo1dHDbgtYWOhJhcCgMDTOw84RZ72q6lB+Q==", "dev": true }, + "har-schema": { + "version": "2.0.0", + "resolved": "https://registry.npmjs.org/har-schema/-/har-schema-2.0.0.tgz", + "integrity": "sha1-qUwiJOvKwEeCoNkDVSHyRzW37JI=", + "dev": true + }, + "har-validator": { + "version": "5.1.3", + "resolved": "https://registry.npmjs.org/har-validator/-/har-validator-5.1.3.tgz", + "integrity": "sha512-sNvOCzEQNr/qrvJgc3UG/kD4QtlHycrzwS+6mfTrrSq97BvaYcPZZI1ZSqGSPR73Cxn4LKTD4PttRwfU7jWq5g==", + "dev": true, + "requires": { + "ajv": "^6.5.5", + "har-schema": "^2.0.0" + } + }, + "html-encoding-sniffer": { + "version": "1.0.2", + "resolved": "https://registry.npmjs.org/html-encoding-sniffer/-/html-encoding-sniffer-1.0.2.tgz", + "integrity": "sha512-71lZziiDnsuabfdYiUeWdCVyKuqwWi23L8YeIgV9jSSZHCtb6wB1BKWooH7L3tn4/FuZJMVWyNaIDr4RGmaSYw==", + "dev": true, + "requires": { + "whatwg-encoding": "^1.0.1" + } + }, + "http-signature": { + "version": "1.2.0", + "resolved": "https://registry.npmjs.org/http-signature/-/http-signature-1.2.0.tgz", + "integrity": "sha1-muzZJRFHcvPZW2WmCruPfBj7rOE=", + "dev": true, + "requires": { + "assert-plus": "^1.0.0", + "jsprim": "^1.2.2", + "sshpk": "^1.7.0" + } + }, + "iconv-lite": { + "version": "0.4.24", + "resolved": "https://registry.npmjs.org/iconv-lite/-/iconv-lite-0.4.24.tgz", + "integrity": "sha512-v3MXnZAcvnywkTUEZomIActle7RXXeedOR31wwl7VlyoXO4Qi9arvSenNQWne1TcRwhCL1HwLI21bEqdpj8/rA==", + "dev": true, + "requires": { + "safer-buffer": ">= 2.1.2 < 3" + } + }, "invert-kv": { "version": "1.0.0", "resolved": "https://registry.npmjs.org/invert-kv/-/invert-kv-1.0.0.tgz", "integrity": "sha1-EEqOSqym09jNFXqO+L+rLXo//bY=", "dev": true }, + "ip-regex": { + "version": "2.1.0", + "resolved": "https://registry.npmjs.org/ip-regex/-/ip-regex-2.1.0.tgz", + "integrity": "sha1-+ni/XS5pE8kRzp+BnuUUa7bYROk=", + "dev": true + }, "is-fullwidth-code-point": { "version": "2.0.0", "resolved": "https://registry.npmjs.org/is-fullwidth-code-point/-/is-fullwidth-code-point-2.0.0.tgz", @@ -127,12 +468,82 @@ "integrity": "sha1-EtSj3U5o4Lec6428hBc66A2RykQ=", "dev": true }, + "is-typedarray": { + "version": "1.0.0", + "resolved": "https://registry.npmjs.org/is-typedarray/-/is-typedarray-1.0.0.tgz", + "integrity": "sha1-5HnICFjfDBsR3dppQPlgEfzaSpo=", + "dev": true + }, "isexe": { "version": "2.0.0", "resolved": "https://registry.npmjs.org/isexe/-/isexe-2.0.0.tgz", "integrity": "sha1-6PvzdNxVb/iUehDcsFctYz8s+hA=", "dev": true }, + "isstream": { + "version": "0.1.2", + "resolved": "https://registry.npmjs.org/isstream/-/isstream-0.1.2.tgz", + "integrity": "sha1-R+Y/evVa+m+S4VAOaQ64uFKcCZo=", + "dev": true + }, + "jsbn": { + "version": "0.1.1", + "resolved": "https://registry.npmjs.org/jsbn/-/jsbn-0.1.1.tgz", + "integrity": "sha1-peZUwuWi3rXyAdls77yoDA7y9RM=", + "dev": true + }, + "jsdom": { + "version": "15.2.0", + "resolved": "https://registry.npmjs.org/jsdom/-/jsdom-15.2.0.tgz", + "integrity": "sha512-+hRyEfjRPFwTYMmSQ3/f7U9nP8ZNZmbkmUek760ZpxnCPWJIhaaLRuUSvpJ36fZKCGENxLwxClzwpOpnXNfChQ==", + "dev": true, + "requires": { + "abab": "^2.0.0", + "acorn": "^7.1.0", + "acorn-globals": "^4.3.2", + "array-equal": "^1.0.0", + "cssom": "^0.4.1", + "cssstyle": "^2.0.0", + "data-urls": "^1.1.0", + "domexception": "^1.0.1", + "escodegen": "^1.11.1", + "html-encoding-sniffer": "^1.0.2", + "nwsapi": "^2.1.4", + "parse5": "5.1.0", + "pn": "^1.1.0", + "request": "^2.88.0", + "request-promise-native": "^1.0.7", + "saxes": "^3.1.9", + "symbol-tree": "^3.2.2", + "tough-cookie": "^3.0.1", + "w3c-hr-time": "^1.0.1", + "w3c-xmlserializer": "^1.1.2", + "webidl-conversions": "^4.0.2", + "whatwg-encoding": "^1.0.5", + "whatwg-mimetype": "^2.3.0", + "whatwg-url": "^7.0.0", + "ws": "^7.0.0", + "xml-name-validator": "^3.0.0" + } + }, + "json-schema": { + "version": "0.2.3", + "resolved": "https://registry.npmjs.org/json-schema/-/json-schema-0.2.3.tgz", + "integrity": "sha1-tIDIkuWaLwWVTOcnvT8qTogvnhM=", + "dev": true + }, + "json-schema-traverse": { + "version": "0.4.1", + "resolved": "https://registry.npmjs.org/json-schema-traverse/-/json-schema-traverse-0.4.1.tgz", + "integrity": "sha512-xbbCH5dCYU5T8LcEhhuh7HJ88HXuW3qsI3Y0zOZFKfZEHcpWiHU/Jxzk629Brsab/mMiHQti9wMP+845RPe3Vg==", + "dev": true + }, + "json-stringify-safe": { + "version": "5.0.1", + "resolved": "https://registry.npmjs.org/json-stringify-safe/-/json-stringify-safe-5.0.1.tgz", + "integrity": "sha1-Epai1Y/UXxmg9s4B1lcB4sc1tus=", + "dev": true + }, "jsonfile": { "version": "4.0.0", "resolved": "https://registry.npmjs.org/jsonfile/-/jsonfile-4.0.0.tgz", @@ -142,6 +553,18 @@ "graceful-fs": "^4.1.6" } }, + "jsprim": { + "version": "1.4.1", + "resolved": "https://registry.npmjs.org/jsprim/-/jsprim-1.4.1.tgz", + "integrity": "sha1-MT5mvB5cwG5Di8G3SZwuXFastqI=", + "dev": true, + "requires": { + "assert-plus": "1.0.0", + "extsprintf": "1.3.0", + "json-schema": "0.2.3", + "verror": "1.10.0" + } + }, "lcid": { "version": "1.0.0", "resolved": "https://registry.npmjs.org/lcid/-/lcid-1.0.0.tgz", @@ -151,6 +574,16 @@ "invert-kv": "^1.0.0" } }, + "levn": { + "version": "0.3.0", + "resolved": "https://registry.npmjs.org/levn/-/levn-0.3.0.tgz", + "integrity": "sha1-OwmSTt+fCDwEkP3UwLxEIeBHZO4=", + "dev": true, + "requires": { + "prelude-ls": "~1.1.2", + "type-check": "~0.3.2" + } + }, "locate-path": { "version": "2.0.0", "resolved": "https://registry.npmjs.org/locate-path/-/locate-path-2.0.0.tgz", @@ -161,6 +594,18 @@ "path-exists": "^3.0.0" } }, + "lodash": { + "version": "4.17.15", + "resolved": "https://registry.npmjs.org/lodash/-/lodash-4.17.15.tgz", + "integrity": "sha512-8xOcRHvCjnocdS5cpwXQXVzmmh5e5+saE2QGoeQmbKmRS6J3VQppPOIt0MnmE+4xlZoumy0GPG0D0MVIQbNA1A==", + "dev": true + }, + "lodash.sortby": { + "version": "4.7.0", + "resolved": "https://registry.npmjs.org/lodash.sortby/-/lodash.sortby-4.7.0.tgz", + "integrity": "sha1-7dFMgk4sycHgsKG0K7UhBRakJDg=", + "dev": true + }, "lru-cache": { "version": "4.1.5", "resolved": "https://registry.npmjs.org/lru-cache/-/lru-cache-4.1.5.tgz", @@ -180,6 +625,21 @@ "mimic-fn": "^1.0.0" } }, + "mime-db": { + "version": "1.40.0", + "resolved": "https://registry.npmjs.org/mime-db/-/mime-db-1.40.0.tgz", + "integrity": "sha512-jYdeOMPy9vnxEqFRRo6ZvTZ8d9oPb+k18PKoYNYUe2stVEBPPwsln/qWzdbmaIvnhZ9v2P+CuecK+fpUfsV2mA==", + "dev": true + }, + "mime-types": { + "version": "2.1.24", + "resolved": "https://registry.npmjs.org/mime-types/-/mime-types-2.1.24.tgz", + "integrity": "sha512-WaFHS3MCl5fapm3oLxU4eYDw77IQM2ACcxQ9RIxfaC3ooc6PFuBMGZZsYpvoXS5D5QTWPieo1jjLdAm3TBP3cQ==", + "dev": true, + "requires": { + "mime-db": "1.40.0" + } + }, "mimic-fn": { "version": "1.2.0", "resolved": "https://registry.npmjs.org/mimic-fn/-/mimic-fn-1.2.0.tgz", @@ -201,6 +661,32 @@ "integrity": "sha1-CXtgK1NCKlIsGvuHkDGDNpQaAR0=", "dev": true }, + "nwsapi": { + "version": "2.1.4", + "resolved": "https://registry.npmjs.org/nwsapi/-/nwsapi-2.1.4.tgz", + "integrity": "sha512-iGfd9Y6SFdTNldEy2L0GUhcarIutFmk+MPWIn9dmj8NMIup03G08uUF2KGbbmv/Ux4RT0VZJoP/sVbWA6d/VIw==", + "dev": true + }, + "oauth-sign": { + "version": "0.9.0", + "resolved": "https://registry.npmjs.org/oauth-sign/-/oauth-sign-0.9.0.tgz", + "integrity": "sha512-fexhUFFPTGV8ybAtSIGbV6gOkSv8UtRbDBnAyLQw4QPKkgNlsH2ByPGtMUqdWkos6YCRmAqViwgZrJc/mRDzZQ==", + "dev": true + }, + "optionator": { + "version": "0.8.2", + "resolved": "https://registry.npmjs.org/optionator/-/optionator-0.8.2.tgz", + "integrity": "sha1-NkxeQJ0/TWMB1sC0wFu6UBgK62Q=", + "dev": true, + "requires": { + "deep-is": "~0.1.3", + "fast-levenshtein": "~2.0.4", + "levn": "~0.3.0", + "prelude-ls": "~1.1.2", + "type-check": "~0.3.2", + "wordwrap": "~1.0.0" + } + }, "os-locale": { "version": "2.1.0", "resolved": "https://registry.npmjs.org/os-locale/-/os-locale-2.1.0.tgz", @@ -242,6 +728,12 @@ "integrity": "sha1-y8ec26+P1CKOE/Yh8rGiN8GyB7M=", "dev": true }, + "parse5": { + "version": "5.1.0", + "resolved": "https://registry.npmjs.org/parse5/-/parse5-5.1.0.tgz", + "integrity": "sha512-fxNG2sQjHvlVAYmzBZS9YlDp6PTSSDwa98vkD4QgVDDCAo84z5X1t5XyJQ62ImdLXx5NdIIfihey6xpum9/gRQ==", + "dev": true + }, "path-exists": { "version": "3.0.0", "resolved": "https://registry.npmjs.org/path-exists/-/path-exists-3.0.0.tgz", @@ -254,12 +746,126 @@ "integrity": "sha1-QRyttXTFoUDTpLGRDUDYDMn0C0A=", "dev": true }, + "performance-now": { + "version": "2.1.0", + "resolved": "https://registry.npmjs.org/performance-now/-/performance-now-2.1.0.tgz", + "integrity": "sha1-Ywn04OX6kT7BxpMHrjZLSzd8nns=", + "dev": true + }, + "pn": { + "version": "1.1.0", + "resolved": "https://registry.npmjs.org/pn/-/pn-1.1.0.tgz", + "integrity": "sha512-2qHaIQr2VLRFoxe2nASzsV6ef4yOOH+Fi9FBOVH6cqeSgUnoyySPZkxzLuzd+RYOQTRpROA0ztTMqxROKSb/nA==", + "dev": true + }, + "prelude-ls": { + "version": "1.1.2", + "resolved": "https://registry.npmjs.org/prelude-ls/-/prelude-ls-1.1.2.tgz", + "integrity": "sha1-IZMqVJ9eUv/ZqCf1cOBL5iqX2lQ=", + "dev": true + }, "pseudomap": { "version": "1.0.2", "resolved": "https://registry.npmjs.org/pseudomap/-/pseudomap-1.0.2.tgz", "integrity": "sha1-8FKijacOYYkX7wqKw0wa5aaChrM=", "dev": true }, + "psl": { + "version": "1.4.0", + "resolved": "https://registry.npmjs.org/psl/-/psl-1.4.0.tgz", + "integrity": "sha512-HZzqCGPecFLyoRj5HLfuDSKYTJkAfB5thKBIkRHtGjWwY7p1dAyveIbXIq4tO0KYfDF2tHqPUgY9SDnGm00uFw==", + "dev": true + }, + "punycode": { + "version": "2.1.1", + "resolved": "https://registry.npmjs.org/punycode/-/punycode-2.1.1.tgz", + "integrity": "sha512-XRsRjdf+j5ml+y/6GKHPZbrF/8p2Yga0JPtdqTIY2Xe5ohJPD9saDJJLPvp9+NSBprVvevdXZybnj2cv8OEd0A==", + "dev": true + }, + "qs": { + "version": "6.5.2", + "resolved": "https://registry.npmjs.org/qs/-/qs-6.5.2.tgz", + "integrity": "sha512-N5ZAX4/LxJmF+7wN74pUD6qAh9/wnvdQcjq9TZjevvXzSUo7bfmw91saqMjzGS2xq91/odN2dW/WOl7qQHNDGA==", + "dev": true + }, + "request": { + "version": "2.88.0", + "resolved": "https://registry.npmjs.org/request/-/request-2.88.0.tgz", + "integrity": "sha512-NAqBSrijGLZdM0WZNsInLJpkJokL72XYjUpnB0iwsRgxh7dB6COrHnTBNwN0E+lHDAJzu7kLAkDeY08z2/A0hg==", + "dev": true, + "requires": { + "aws-sign2": "~0.7.0", + "aws4": "^1.8.0", + "caseless": "~0.12.0", + "combined-stream": "~1.0.6", + "extend": "~3.0.2", + "forever-agent": "~0.6.1", + "form-data": "~2.3.2", + "har-validator": "~5.1.0", + "http-signature": "~1.2.0", + "is-typedarray": "~1.0.0", + "isstream": "~0.1.2", + "json-stringify-safe": "~5.0.1", + "mime-types": "~2.1.19", + "oauth-sign": "~0.9.0", + "performance-now": "^2.1.0", + "qs": "~6.5.2", + "safe-buffer": "^5.1.2", + "tough-cookie": "~2.4.3", + "tunnel-agent": "^0.6.0", + "uuid": "^3.3.2" + }, + "dependencies": { + "punycode": { + "version": "1.4.1", + "resolved": "https://registry.npmjs.org/punycode/-/punycode-1.4.1.tgz", + "integrity": "sha1-wNWmOycYgArY4esPpSachN1BhF4=", + "dev": true + }, + "tough-cookie": { + "version": "2.4.3", + "resolved": "https://registry.npmjs.org/tough-cookie/-/tough-cookie-2.4.3.tgz", + "integrity": "sha512-Q5srk/4vDM54WJsJio3XNn6K2sCG+CQ8G5Wz6bZhRZoAe/+TxjWB/GlFAnYEbkYVlON9FMk/fE3h2RLpPXo4lQ==", + "dev": true, + "requires": { + "psl": "^1.1.24", + "punycode": "^1.4.1" + } + } + } + }, + "request-promise-core": { + "version": "1.1.2", + "resolved": "https://registry.npmjs.org/request-promise-core/-/request-promise-core-1.1.2.tgz", + "integrity": "sha512-UHYyq1MO8GsefGEt7EprS8UrXsm1TxEvFUX1IMTuSLU2Rh7fTIdFtl8xD7JiEYiWU2dl+NYAjCTksTehQUxPag==", + "dev": true, + "requires": { + "lodash": "^4.17.11" + } + }, + "request-promise-native": { + "version": "1.0.7", + "resolved": "https://registry.npmjs.org/request-promise-native/-/request-promise-native-1.0.7.tgz", + "integrity": "sha512-rIMnbBdgNViL37nZ1b3L/VfPOpSi0TqVDQPAvO6U14lMzOLrt5nilxCQqtDKhZeDiW0/hkCXGoQjhgJd/tCh6w==", + "dev": true, + "requires": { + "request-promise-core": "1.1.2", + "stealthy-require": "^1.1.1", + "tough-cookie": "^2.3.3" + }, + "dependencies": { + "tough-cookie": { + "version": "2.5.0", + "resolved": "https://registry.npmjs.org/tough-cookie/-/tough-cookie-2.5.0.tgz", + "integrity": "sha512-nlLsUzgm1kfLXSXfRZMc1KLAugd4hqJHDTvc2hDIwS3mZAfMEuMbc03SujMF+GEcpaX/qboeycw6iO8JwVv2+g==", + "dev": true, + "requires": { + "psl": "^1.1.28", + "punycode": "^2.1.1" + } + } + } + }, "require-directory": { "version": "2.1.1", "resolved": "https://registry.npmjs.org/require-directory/-/require-directory-2.1.1.tgz", @@ -272,6 +878,27 @@ "integrity": "sha1-l/cXtp1IeE9fUmpsWqj/3aBVpNE=", "dev": true }, + "safe-buffer": { + "version": "5.2.0", + "resolved": "https://registry.npmjs.org/safe-buffer/-/safe-buffer-5.2.0.tgz", + "integrity": "sha512-fZEwUGbVl7kouZs1jCdMLdt95hdIv0ZeHg6L7qPeciMZhZ+/gdesW4wgTARkrFWEpspjEATAzUGPG8N2jJiwbg==", + "dev": true + }, + "safer-buffer": { + "version": "2.1.2", + "resolved": "https://registry.npmjs.org/safer-buffer/-/safer-buffer-2.1.2.tgz", + "integrity": "sha512-YZo3K82SD7Riyi0E1EQPojLz7kpepnSQI9IyPbHHg1XXXevb5dJI7tpyN2ADxGcQbHG7vcyRHk0cbwqcQriUtg==", + "dev": true + }, + "saxes": { + "version": "3.1.11", + "resolved": "https://registry.npmjs.org/saxes/-/saxes-3.1.11.tgz", + "integrity": "sha512-Ydydq3zC+WYDJK1+gRxRapLIED9PWeSuuS41wqyoRmzvhhh9nc+QQrVMKJYzJFULazeGhzSV0QleN2wD3boh2g==", + "dev": true, + "requires": { + "xmlchars": "^2.1.1" + } + }, "set-blocking": { "version": "2.0.0", "resolved": "https://registry.npmjs.org/set-blocking/-/set-blocking-2.0.0.tgz", @@ -308,6 +935,36 @@ "integrity": "sha1-tf3AjxKH6hF4Yo5BXiUTK3NkbG0=", "dev": true }, + "source-map": { + "version": "0.6.1", + "resolved": "https://registry.npmjs.org/source-map/-/source-map-0.6.1.tgz", + "integrity": "sha512-UjgapumWlbMhkBgzT7Ykc5YXUT46F0iKu8SGXq0bcwP5dz/h0Plj6enJqjz1Zbq2l5WaqYnrVbwWOWMyF3F47g==", + "dev": true, + "optional": true + }, + "sshpk": { + "version": "1.16.1", + "resolved": "https://registry.npmjs.org/sshpk/-/sshpk-1.16.1.tgz", + "integrity": "sha512-HXXqVUq7+pcKeLqqZj6mHFUMvXtOJt1uoUx09pFW6011inTMxqI8BA8PM95myrIyyKwdnzjdFjLiE6KBPVtJIg==", + "dev": true, + "requires": { + "asn1": "~0.2.3", + "assert-plus": "^1.0.0", + "bcrypt-pbkdf": "^1.0.0", + "dashdash": "^1.12.0", + "ecc-jsbn": "~0.1.1", + "getpass": "^0.1.1", + "jsbn": "~0.1.0", + "safer-buffer": "^2.0.2", + "tweetnacl": "~0.14.0" + } + }, + "stealthy-require": { + "version": "1.1.1", + "resolved": "https://registry.npmjs.org/stealthy-require/-/stealthy-require-1.1.1.tgz", + "integrity": "sha1-NbCYdbT/SfJqd35QmzCQoyJr8ks=", + "dev": true + }, "string-width": { "version": "2.1.1", "resolved": "https://registry.npmjs.org/string-width/-/string-width-2.1.1.tgz", @@ -333,12 +990,140 @@ "integrity": "sha1-u0P/VZim6wXYm1n80SnJgzE2Br8=", "dev": true }, + "symbol-tree": { + "version": "3.2.4", + "resolved": "https://registry.npmjs.org/symbol-tree/-/symbol-tree-3.2.4.tgz", + "integrity": "sha512-9QNk5KwDF+Bvz+PyObkmSYjI5ksVUYtjW7AU22r2NKcfLJcXp96hkDWU3+XndOsUb+AQ9QhfzfCT2O+CNWT5Tw==", + "dev": true + }, + "tough-cookie": { + "version": "3.0.1", + "resolved": "https://registry.npmjs.org/tough-cookie/-/tough-cookie-3.0.1.tgz", + "integrity": "sha512-yQyJ0u4pZsv9D4clxO69OEjLWYw+jbgspjTue4lTQZLfV0c5l1VmK2y1JK8E9ahdpltPOaAThPcp5nKPUgSnsg==", + "dev": true, + "requires": { + "ip-regex": "^2.1.0", + "psl": "^1.1.28", + "punycode": "^2.1.1" + } + }, + "tr46": { + "version": "1.0.1", + "resolved": "https://registry.npmjs.org/tr46/-/tr46-1.0.1.tgz", + "integrity": "sha1-qLE/1r/SSJUZZ0zN5VujaTtwbQk=", + "dev": true, + "requires": { + "punycode": "^2.1.0" + } + }, + "tunnel-agent": { + "version": "0.6.0", + "resolved": "https://registry.npmjs.org/tunnel-agent/-/tunnel-agent-0.6.0.tgz", + "integrity": "sha1-J6XeoGs2sEoKmWZ3SykIaPD8QP0=", + "dev": true, + "requires": { + "safe-buffer": "^5.0.1" + } + }, + "tweetnacl": { + "version": "0.14.5", + "resolved": "https://registry.npmjs.org/tweetnacl/-/tweetnacl-0.14.5.tgz", + "integrity": "sha1-WuaBd/GS1EViadEIr6k/+HQ/T2Q=", + "dev": true + }, + "type-check": { + "version": "0.3.2", + "resolved": "https://registry.npmjs.org/type-check/-/type-check-0.3.2.tgz", + "integrity": "sha1-WITKtRLPHTVeP7eE8wgEsrUg23I=", + "dev": true, + "requires": { + "prelude-ls": "~1.1.2" + } + }, "universalify": { "version": "0.1.2", "resolved": "https://registry.npmjs.org/universalify/-/universalify-0.1.2.tgz", "integrity": "sha512-rBJeI5CXAlmy1pV+617WB9J63U6XcazHHF2f2dbJix4XzpUF0RS3Zbj0FGIOCAva5P/d/GBOYaACQ1w+0azUkg==", "dev": true }, + "uri-js": { + "version": "4.2.2", + "resolved": "https://registry.npmjs.org/uri-js/-/uri-js-4.2.2.tgz", + "integrity": "sha512-KY9Frmirql91X2Qgjry0Wd4Y+YTdrdZheS8TFwvkbLWf/G5KNJDCh6pKL5OZctEW4+0Baa5idK2ZQuELRwPznQ==", + "dev": true, + "requires": { + "punycode": "^2.1.0" + } + }, + "uuid": { + "version": "3.3.3", + "resolved": "https://registry.npmjs.org/uuid/-/uuid-3.3.3.tgz", + "integrity": "sha512-pW0No1RGHgzlpHJO1nsVrHKpOEIxkGg1xB+v0ZmdNH5OAeAwzAVrCnI2/6Mtx+Uys6iaylxa+D3g4j63IKKjSQ==", + "dev": true + }, + "verror": { + "version": "1.10.0", + "resolved": "https://registry.npmjs.org/verror/-/verror-1.10.0.tgz", + "integrity": "sha1-OhBcoXBTr1XW4nDB+CiGguGNpAA=", + "dev": true, + "requires": { + "assert-plus": "^1.0.0", + "core-util-is": "1.0.2", + "extsprintf": "^1.2.0" + } + }, + "w3c-hr-time": { + "version": "1.0.1", + "resolved": "https://registry.npmjs.org/w3c-hr-time/-/w3c-hr-time-1.0.1.tgz", + "integrity": "sha1-gqwr/2PZUOqeMYmlimViX+3xkEU=", + "dev": true, + "requires": { + "browser-process-hrtime": "^0.1.2" + } + }, + "w3c-xmlserializer": { + "version": "1.1.2", + "resolved": "https://registry.npmjs.org/w3c-xmlserializer/-/w3c-xmlserializer-1.1.2.tgz", + "integrity": "sha512-p10l/ayESzrBMYWRID6xbuCKh2Fp77+sA0doRuGn4tTIMrrZVeqfpKjXHY+oDh3K4nLdPgNwMTVP6Vp4pvqbNg==", + "dev": true, + "requires": { + "domexception": "^1.0.1", + "webidl-conversions": "^4.0.2", + "xml-name-validator": "^3.0.0" + } + }, + "webidl-conversions": { + "version": "4.0.2", + "resolved": "https://registry.npmjs.org/webidl-conversions/-/webidl-conversions-4.0.2.tgz", + "integrity": "sha512-YQ+BmxuTgd6UXZW3+ICGfyqRyHXVlD5GtQr5+qjiNW7bF0cqrzX500HVXPBOvgXb5YnzDd+h0zqyv61KUD7+Sg==", + "dev": true + }, + "whatwg-encoding": { + "version": "1.0.5", + "resolved": "https://registry.npmjs.org/whatwg-encoding/-/whatwg-encoding-1.0.5.tgz", + "integrity": "sha512-b5lim54JOPN9HtzvK9HFXvBma/rnfFeqsic0hSpjtDbVxR3dJKLc+KB4V6GgiGOvl7CY/KNh8rxSo9DKQrnUEw==", + "dev": true, + "requires": { + "iconv-lite": "0.4.24" + } + }, + "whatwg-mimetype": { + "version": "2.3.0", + "resolved": "https://registry.npmjs.org/whatwg-mimetype/-/whatwg-mimetype-2.3.0.tgz", + "integrity": "sha512-M4yMwr6mAnQz76TbJm914+gPpB/nCwvZbJU28cUD6dR004SAxDLOOSUaB1JDRqLtaOV/vi0IC5lEAGFgrjGv/g==", + "dev": true + }, + "whatwg-url": { + "version": "7.1.0", + "resolved": "https://registry.npmjs.org/whatwg-url/-/whatwg-url-7.1.0.tgz", + "integrity": "sha512-WUu7Rg1DroM7oQvGWfOiAK21n74Gg+T4elXEQYkOhtyLeWiJFoOGLXPKI/9gzIie9CtwVLm8wtw6YJdKyxSjeg==", + "dev": true, + "requires": { + "lodash.sortby": "^4.7.0", + "tr46": "^1.0.1", + "webidl-conversions": "^4.0.2" + } + }, "which": { "version": "1.3.1", "resolved": "https://registry.npmjs.org/which/-/which-1.3.1.tgz", @@ -354,6 +1139,12 @@ "integrity": "sha1-2e8H3Od7mQK4o6j6SzHD4/fm6Ho=", "dev": true }, + "wordwrap": { + "version": "1.0.0", + "resolved": "https://registry.npmjs.org/wordwrap/-/wordwrap-1.0.0.tgz", + "integrity": "sha1-J1hIEIkUVqQXHI0CJkQa3pDLyus=", + "dev": true + }, "wrap-ansi": { "version": "2.1.0", "resolved": "https://registry.npmjs.org/wrap-ansi/-/wrap-ansi-2.1.0.tgz", @@ -401,6 +1192,27 @@ } } }, + "ws": { + "version": "7.2.0", + "resolved": "https://registry.npmjs.org/ws/-/ws-7.2.0.tgz", + "integrity": "sha512-+SqNqFbwTm/0DC18KYzIsMTnEWpLwJsiasW/O17la4iDRRIO9uaHbvKiAS3AHgTiuuWerK/brj4O6MYZkei9xg==", + "dev": true, + "requires": { + "async-limiter": "^1.0.0" + } + }, + "xml-name-validator": { + "version": "3.0.0", + "resolved": "https://registry.npmjs.org/xml-name-validator/-/xml-name-validator-3.0.0.tgz", + "integrity": "sha512-A5CUptxDsvxKJEU3yO6DuWBSJz/qizqzJKOMIfUJHETbBw/sFaDxgd6fxm1ewUaM0jZ444Fc5vC5ROYurg/4Pw==", + "dev": true + }, + "xmlchars": { + "version": "2.2.0", + "resolved": "https://registry.npmjs.org/xmlchars/-/xmlchars-2.2.0.tgz", + "integrity": "sha512-JZnDKK8B0RCDw84FNdDAIpZK+JuJw+s7Lz8nksI7SIuU3UXJJslUthsi+uWBUYOwPFwW7W7PRLRfUKpxjtjFCw==", + "dev": true + }, "y18n": { "version": "3.2.1", "resolved": "https://registry.npmjs.org/y18n/-/y18n-3.2.1.tgz", diff --git a/src/package.json b/src/package.json index 7b5b4031af5..36ad3e185ee 100644 --- a/src/package.json +++ b/src/package.json @@ -19,6 +19,7 @@ "devDependencies": { "ejs": "^2.7.1", "fs-extra": "^8.1.0", + "jsdom": "^15.2.0", "showdown": "^1.9.0" } } diff --git a/src/templates/en/2019/chapter.html b/src/templates/en/2019/chapter.html index 5b7cd62f475..2a77691eb1d 100644 --- a/src/templates/en/2019/chapter.html +++ b/src/templates/en/2019/chapter.html @@ -18,9 +18,13 @@ {% set metadata = <%- JSON.stringify(metadata) %> %} {% block main %} + +

    {{ metadata.get('title') }}

    <%- body %>
    -{% endblock %} +{% endblock %} \ No newline at end of file diff --git a/src/tools/generate/generate_chapters.js b/src/tools/generate/generate_chapters.js index 1c695e6aa90..d735ac205bb 100644 --- a/src/tools/generate/generate_chapters.js +++ b/src/tools/generate/generate_chapters.js @@ -1,6 +1,7 @@ const fs = require('fs-extra'); const showdown = require('showdown'); const ejs = require('ejs'); +const { generate_table_of_contents } = require('./generate_table_of_contents'); const converter = new showdown.Converter({ tables: true, metadata: true }); converter.setFlavor('github'); @@ -14,9 +15,9 @@ const generate_chapters = async () => { console.log(`\n Generating chapter: ${language}, ${year}, ${chapter}`); - let { metadata, body } = await parse_file(markdown); + let { metadata, body, toc } = await parse_file(markdown); - await write_template(language, year, chapter, metadata, body); + await write_template(language, year, chapter, metadata, body, toc); } } } @@ -24,6 +25,7 @@ const generate_chapters = async () => { const parse_file = async (markdown) => { const body = converter.makeHtml(markdown); + const toc = generate_table_of_contents(body); const m = converter.getMetadata(); const chapter_number = Number(m.chapter_number); @@ -37,14 +39,14 @@ const parse_file = async (markdown) => { reviewers }; - return { metadata, body }; + return { metadata, body, toc }; }; -const write_template = async (language, year, chapter, metadata, body) => { +const write_template = async (language, year, chapter, metadata, body, toc) => { const template = `templates/${language}/${year}/chapter.html`; const path = `templates/${language}/${year}/chapters/${chapter}.html`; - let html = await ejs.renderFile(template, { metadata, body }); + let html = await ejs.renderFile(template, { metadata, body, toc }); await fs.outputFile(path, html, 'utf8'); diff --git a/src/tools/generate/generate_table_of_contents.js b/src/tools/generate/generate_table_of_contents.js new file mode 100644 index 00000000000..84a1f3c0dc4 --- /dev/null +++ b/src/tools/generate/generate_table_of_contents.js @@ -0,0 +1,82 @@ +const { JSDOM } = require('jsdom'); + +const generate_table_of_contents = (html) => { + const dom = new JSDOM(html); + const all_headings = Object.values( + dom.window.document.querySelectorAll('h1, h2, h3, h4, h5, h6') + ); + const starting_level = get_level(all_headings[0]); + const nested_headings = nest_headings(all_headings, starting_level); + console.log(JSON.stringify(nested_headings)); + const toc = generate_html(nested_headings.children); + + return toc; +}; + +const generate_html = (headings) => { + const list = []; + for (const heading of headings) { + const a = `${heading.title}`; + const children = heading.children ? generate_html(heading.children) : ''; + const li = `
  • ${a}${children}
  • `; + list.push(li); + } + + return `
      ${list.join('')}
    `; +}; + +// This is a recursive function to nest the headings. +const nest_headings = (source, current_level = 1) => { + // The list of headings to output. + let target = []; + + while (source.length) { + // Pull the first item off of the source list. + const element = source.shift(); + const id = element.id; + const title = element.textContent; + const level = get_level(element); + + const heading = { + id, + level, + title + }; + + if (level === current_level) { + // The heading is at this level, add it to the list. + target.push(heading); + } else if (level > current_level) { + /* The heading needs to be added to the next level. + - Get the last item on the list, that becomes the parent. + - Use the rest of the source list to recurse and generate the + rest of the children. + - Set the children property of the parent heading, including + * Previous children + * The 'current' heading) + * The recursively generated children + */ + + const parent = target[target.length - 1]; + let { children, nextHeading } = nest_headings(source, level); + parent.children = [...(parent.children || []), heading, ...children]; + if (nextHeading) { + target.push(nextHeading); + } + } else { + /* The next item on the source is at a higher level, break out of this + level of recursion. + */ + + return { children: target, nextHeading: heading }; + } + } + + return { children: target }; +}; + +const get_level = (element) => Number(element.localName.match(/\d+/)[0]); + +module.exports = { + generate_table_of_contents +}; From 5838d293426c7faba19990cac12d01479bed42ce Mon Sep 17 00:00:00 2001 From: Mike Geyser Date: Thu, 24 Oct 2019 13:48:51 +0200 Subject: [PATCH 05/15] Removed console.log. --- src/tools/generate/generate_table_of_contents.js | 1 - 1 file changed, 1 deletion(-) diff --git a/src/tools/generate/generate_table_of_contents.js b/src/tools/generate/generate_table_of_contents.js index 84a1f3c0dc4..0b4863e3402 100644 --- a/src/tools/generate/generate_table_of_contents.js +++ b/src/tools/generate/generate_table_of_contents.js @@ -7,7 +7,6 @@ const generate_table_of_contents = (html) => { ); const starting_level = get_level(all_headings[0]); const nested_headings = nest_headings(all_headings, starting_level); - console.log(JSON.stringify(nested_headings)); const toc = generate_html(nested_headings.children); return toc; From 3843beca6848fe68136f63b926dacfc07847653f Mon Sep 17 00:00:00 2001 From: Mike Geyser Date: Thu, 24 Oct 2019 17:56:30 +0200 Subject: [PATCH 06/15] Fixed some of the obvious problems with the script. --- .../generate/generate_table_of_contents.js | 21 ++++++++----------- 1 file changed, 9 insertions(+), 12 deletions(-) diff --git a/src/tools/generate/generate_table_of_contents.js b/src/tools/generate/generate_table_of_contents.js index 0b4863e3402..17d693f6734 100644 --- a/src/tools/generate/generate_table_of_contents.js +++ b/src/tools/generate/generate_table_of_contents.js @@ -7,7 +7,7 @@ const generate_table_of_contents = (html) => { ); const starting_level = get_level(all_headings[0]); const nested_headings = nest_headings(all_headings, starting_level); - const toc = generate_html(nested_headings.children); + const toc = generate_html(nested_headings); return toc; }; @@ -47,31 +47,28 @@ const nest_headings = (source, current_level = 1) => { target.push(heading); } else if (level > current_level) { /* The heading needs to be added to the next level. + - Put the element back on the source array. - Get the last item on the list, that becomes the parent. - Use the rest of the source list to recurse and generate the rest of the children. - Set the children property of the parent heading, including * Previous children - * The 'current' heading) * The recursively generated children */ - + source.unshift(element); const parent = target[target.length - 1]; - let { children, nextHeading } = nest_headings(source, level); - parent.children = [...(parent.children || []), heading, ...children]; - if (nextHeading) { - target.push(nextHeading); - } + let children = nest_headings(source, level); + parent.children = [...(parent.children || []), ...children]; } else { /* The next item on the source is at a higher level, break out of this - level of recursion. + level of recursion after putting the element back on the array. */ - - return { children: target, nextHeading: heading }; + source.unshift(element); + return target; } } - return { children: target }; + return target; }; const get_level = (element) => Number(element.localName.match(/\d+/)[0]); From 7203d0d147071aabaa7bc3794f64d66d40d4fefe Mon Sep 17 00:00:00 2001 From: Mike Geyser Date: Tue, 29 Oct 2019 07:47:58 +0200 Subject: [PATCH 07/15] Moved the toc generation to an ejs template. --- src/README.md | 4 ---- src/templates/en/2019/chapter.html | 2 +- src/templates/en/2019/toc.html | 10 ++++++++++ src/tools/generate/generate_table_of_contents.js | 15 +-------------- 4 files changed, 12 insertions(+), 19 deletions(-) create mode 100644 src/templates/en/2019/toc.html diff --git a/src/README.md b/src/README.md index a4379b2a788..89fd2621b09 100644 --- a/src/README.md +++ b/src/README.md @@ -44,11 +44,7 @@ ptw ## Generating chapters -<<<<<<< HEAD -The chapter generation is dependent on nodejs, so you will need to have [nodejs](https://nodejs.org/en/) installed as well. All of the following commands must be run from within the `src` directory by executing `cd src` first. -======= The chapter generation is dependent on nodejs, so is you will need to have [nodejs](https://nodejs.org/en/) installed as well. All of the following commands must be run from within the `src` directory by executing `cd src` first. ->>>>>>> 33bb7cf... Code review changes. Updating comment about generated code warning, and adding README section explaining how to generate chapters. 1. Install the dependencies: diff --git a/src/templates/en/2019/chapter.html b/src/templates/en/2019/chapter.html index 2a77691eb1d..7e7d56f8984 100644 --- a/src/templates/en/2019/chapter.html +++ b/src/templates/en/2019/chapter.html @@ -19,7 +19,7 @@ {% block main %}
    diff --git a/src/templates/en/2019/toc.html b/src/templates/en/2019/toc.html new file mode 100644 index 00000000000..4f9a463d9cc --- /dev/null +++ b/src/templates/en/2019/toc.html @@ -0,0 +1,10 @@ +
      + <% for (let heading of headings ) { %> +
    • + <%= heading.title %> + <% if (heading.children && heading.children.length) { %> + <%- include('toc.html', { headings: heading.children }) %> + <% } %> +
    • + <% } %> +
    diff --git a/src/tools/generate/generate_table_of_contents.js b/src/tools/generate/generate_table_of_contents.js index 17d693f6734..c0f17dc2b2a 100644 --- a/src/tools/generate/generate_table_of_contents.js +++ b/src/tools/generate/generate_table_of_contents.js @@ -6,24 +6,11 @@ const generate_table_of_contents = (html) => { dom.window.document.querySelectorAll('h1, h2, h3, h4, h5, h6') ); const starting_level = get_level(all_headings[0]); - const nested_headings = nest_headings(all_headings, starting_level); - const toc = generate_html(nested_headings); + const toc = nest_headings(all_headings, starting_level); return toc; }; -const generate_html = (headings) => { - const list = []; - for (const heading of headings) { - const a = `${heading.title}`; - const children = heading.children ? generate_html(heading.children) : ''; - const li = `
  • ${a}${children}
  • `; - list.push(li); - } - - return `
      ${list.join('')}
    `; -}; - // This is a recursive function to nest the headings. const nest_headings = (source, current_level = 1) => { // The list of headings to output. From f25cdfdb7f78343d760c3d05bcc02f97babe9215 Mon Sep 17 00:00:00 2001 From: Mike Geyser Date: Tue, 29 Oct 2019 08:05:37 +0200 Subject: [PATCH 08/15] Added prettier and html linting to the the generated output. --- src/content/en/2019/markup.md | 2 +- src/content/en/2019/pwa.md | 10 +++++----- src/package-lock.json | 6 ++++++ src/package.json | 1 + src/tools/generate/generate_chapters.js | 5 ++++- 5 files changed, 17 insertions(+), 7 deletions(-) diff --git a/src/content/en/2019/markup.md b/src/content/en/2019/markup.md index 161f7b546aa..1efff4e2b9b 100644 --- a/src/content/en/2019/markup.md +++ b/src/content/en/2019/markup.md @@ -144,7 +144,7 @@ Additionally, 15% of desktop pages and 16% of mobile pages contain deprecated el desktop - <center> + <center> 7.96% 8.30% diff --git a/src/content/en/2019/pwa.md b/src/content/en/2019/pwa.md index 2783472f1d4..27655a3f8da 100644 --- a/src/content/en/2019/pwa.md +++ b/src/content/en/2019/pwa.md @@ -44,7 +44,7 @@ impressive, but taking traffic data from Chrome Platform Status into account, we [a service worker controlled 15% of all page loads](https://www.chromestatus.com/metrics/feature/timeline/popularity/990), which can be interpreted as popular, high-traffic sites increasingly having started to embrace service workers. - +``` timeseries chart of 11_01b ``` **Figure 1:** Service Worker installation over time for desktop and mobile @@ -73,11 +73,11 @@ cases that service workers enable are the most attractive feature for app develo push notifications. Due to its limited availability, and less common use case, background sync doesn’t play a significant role at the moment. - +``` bar chart of 11_03 mobile ``` **Figure 2a:** Service worker events on mobile, ordered by decreasing frequency. - +``` bar chart of 11_03 desktop ``` **Figure 2b:** Service worker events on desktop, ordered by decreasing frequency. @@ -90,11 +90,11 @@ We note that these stats don’t account for dynamically imported scripts throug [`importScripts()`](https://developer.mozilla.org/en-US/docs/Web/API/WorkerGlobalScope/importScripts) method, which likely skews the results higher. - +``` distribution of 11_03b mobile ``` **Figure 3a:** Percentiles of service worker file sizes on mobile. - +``` distribution of 11_03b desktop ``` **Figure 3b:** Percentiles of service worker file sizes on desktop. diff --git a/src/package-lock.json b/src/package-lock.json index 0a7e0609bcb..cc8e3685b59 100644 --- a/src/package-lock.json +++ b/src/package-lock.json @@ -764,6 +764,12 @@ "integrity": "sha1-IZMqVJ9eUv/ZqCf1cOBL5iqX2lQ=", "dev": true }, + "prettier": { + "version": "1.18.2", + "resolved": "https://registry.npmjs.org/prettier/-/prettier-1.18.2.tgz", + "integrity": "sha512-OeHeMc0JhFE9idD4ZdtNibzY0+TPHSpSSb9h8FqtP+YnoZZ1sl8Vc9b1sasjfymH3SonAF4QcA2+mzHPhMvIiw==", + "dev": true + }, "pseudomap": { "version": "1.0.2", "resolved": "https://registry.npmjs.org/pseudomap/-/pseudomap-1.0.2.tgz", diff --git a/src/package.json b/src/package.json index 36ad3e185ee..8b3eb637576 100644 --- a/src/package.json +++ b/src/package.json @@ -20,6 +20,7 @@ "ejs": "^2.7.1", "fs-extra": "^8.1.0", "jsdom": "^15.2.0", + "prettier": "^1.18.2", "showdown": "^1.9.0" } } diff --git a/src/tools/generate/generate_chapters.js b/src/tools/generate/generate_chapters.js index d735ac205bb..b512a945710 100644 --- a/src/tools/generate/generate_chapters.js +++ b/src/tools/generate/generate_chapters.js @@ -1,6 +1,8 @@ const fs = require('fs-extra'); const showdown = require('showdown'); const ejs = require('ejs'); +const prettier = require('prettier'); + const { generate_table_of_contents } = require('./generate_table_of_contents'); const converter = new showdown.Converter({ tables: true, metadata: true }); @@ -47,8 +49,9 @@ const write_template = async (language, year, chapter, metadata, body, toc) => { const path = `templates/${language}/${year}/chapters/${chapter}.html`; let html = await ejs.renderFile(template, { metadata, body, toc }); + let fomatted_html = prettier.format(html, { parser: "html" }); - await fs.outputFile(path, html, 'utf8'); + await fs.outputFile(path, fomatted_html, 'utf8'); await size_of(path); }; From 9ca34b2dd039f8737385f40a511ef80bbe86f0e0 Mon Sep 17 00:00:00 2001 From: Mike Geyser Date: Tue, 29 Oct 2019 08:08:31 +0200 Subject: [PATCH 09/15] Fixed a grammar error in the readme. --- src/README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/README.md b/src/README.md index 89fd2621b09..f9e9ebed8c8 100644 --- a/src/README.md +++ b/src/README.md @@ -44,7 +44,7 @@ ptw ## Generating chapters -The chapter generation is dependent on nodejs, so is you will need to have [nodejs](https://nodejs.org/en/) installed as well. All of the following commands must be run from within the `src` directory by executing `cd src` first. +The chapter generation is dependent on nodejs, so you will need to have [nodejs](https://nodejs.org/en/) installed as well. All of the following commands must be run from within the `src` directory by executing `cd src` first. 1. Install the dependencies: From 478555a52a7351321c003ba1b462c97c200fa7d2 Mon Sep 17 00:00:00 2001 From: Mike Geyser Date: Tue, 29 Oct 2019 08:25:16 +0200 Subject: [PATCH 10/15] Put the unclosed pseudo html elements inside backticks, so the linting works. --- src/content/en/2019/pwa.md | 30 ++++++++++++++-------------- src/content/en/2019/seo.md | 20 +++++++++---------- src/content/en/2019/third-parties.md | 28 +++++++++++++------------- src/templates/en/2019/chapter.html | 4 ++-- 4 files changed, 41 insertions(+), 41 deletions(-) diff --git a/src/content/en/2019/pwa.md b/src/content/en/2019/pwa.md index 27655a3f8da..ae35d1bcc37 100644 --- a/src/content/en/2019/pwa.md +++ b/src/content/en/2019/pwa.md @@ -44,7 +44,7 @@ impressive, but taking traffic data from Chrome Platform Status into account, we [a service worker controlled 15% of all page loads](https://www.chromestatus.com/metrics/feature/timeline/popularity/990), which can be interpreted as popular, high-traffic sites increasingly having started to embrace service workers. -``` timeseries chart of 11_01b ``` +``` ``` **Figure 1:** Service Worker installation over time for desktop and mobile @@ -73,11 +73,11 @@ cases that service workers enable are the most attractive feature for app develo push notifications. Due to its limited availability, and less common use case, background sync doesn’t play a significant role at the moment. -``` bar chart of 11_03 mobile ``` +``` ``` **Figure 2a:** Service worker events on mobile, ordered by decreasing frequency. -``` bar chart of 11_03 desktop ``` +``` ``` **Figure 2b:** Service worker events on desktop, ordered by decreasing frequency. @@ -90,11 +90,11 @@ We note that these stats don’t account for dynamically imported scripts throug [`importScripts()`](https://developer.mozilla.org/en-US/docs/Web/API/WorkerGlobalScope/importScripts) method, which likely skews the results higher. -``` distribution of 11_03b mobile ``` +``` ``` **Figure 3a:** Percentiles of service worker file sizes on mobile. -``` distribution of 11_03b desktop ``` +``` ``` **Figure 3b:** Percentiles of service worker file sizes on desktop. @@ -124,11 +124,11 @@ a non-trivial amount of mistyped properties, our favorite being `shot_name`. An is the `serviceworker` property which is standard, but not implemented by any browser vendor — nevertheless, it was found on 0.09% of all web app manifests used by mobile and desktop pages. - +`````` **Figure 4a:** Web App Manifest properties ordered by decreasing popularity on mobile. - +`````` **Figure 4b:** Web App Manifest properties ordered by decreasing popularity on desktop. @@ -139,11 +139,11 @@ By choosing `"standalone"`, they make sure no browser UI is shown to the end-use by the majority of apps that make use of the `prefers_related_applications` property: more that 97% of both mobile and desktop applications do *not* prefer native applications. -<11_04c mobile> +```<11_04c mobile>``` **Figure 5a:** Values for the `display` property on mobile. -<11_04c desktop> +```<11_04c desktop>``` **Figure 5b:** Values for the `display` property on desktop. @@ -155,11 +155,11 @@ web application. There were not too many manifests that made use of the property interesting to see the shift from *shopping* being the most popular category on mobile to *business*, *technology*, and *web* (whatever may be meant with that) on desktop that share the first place evenly. -<11_04d mobile> +```<11_04d mobile>``` **Figure 6a:** Values for the `categories` property on mobile. -<11_04d desktop> +```<11_04d desktop>``` **Figure 6b:** Values for the `categories` property on desktop. @@ -170,11 +170,11 @@ Lighthouse’s rule is probably the culprit for 192×192 being the most popular desktop and mobile, despite [Google’s documentation](https://developers.google.com/web/fundamentals/web-app-manifest#icons) additionally explicitly recommending 512×512, which doesn’t show as a particularly prominent option. -<11_04f mobile> +```<11_04f mobile>``` **Figure 7a:** Popular icon sizes on mobile. -<11_04f desktop> +```<11_04f desktop>``` **Figure 7b:** Popular icon sizes on desktop. @@ -184,11 +184,11 @@ in the Screen Orientation API specification. Namely there are `"any"`, `"natural `"portrait"`, `"portrait-primary"`, `"portrait-secondary"`, `"landscape-primary"`, and `"landscape-secondary"`. Portrait orientation is the clear winner on both platforms, followed by any orientation. -<11_04g mobile> +```<11_04g mobile>``` **Figure 8a:** Popular orientation values on mobile. -<11_04g desktop> +```<11_04g desktop>``` **Figure 8b:** Popular orientation values on desktop. diff --git a/src/content/en/2019/seo.md b/src/content/en/2019/seo.md index 25b28df587d..97ad6717628 100644 --- a/src/content/en/2019/seo.md +++ b/src/content/en/2019/seo.md @@ -38,13 +38,13 @@ We assessed the content on the pages by looking for groups of at least 3 words a The median desktop home page has 346 words, and the median mobile home page has a slightly lower word count at 306 words. This shows that mobile sites do serve a bit less content to their users, but at over 300 words, this is still a reasonable amount to read, especially for a home page which will naturally contain less content than an article page, for example. Overall the distribution of words is broad, with between 22 words at the 10th percentile and up to 1,361 at the 90th percentile. - +`````` #### Headings We also looked at whether pages are structured in a way that provides the right context for the content they contain. Headings (`H1`, `H2`, `H3`, etc) are used to format and structure a page and make content easier to read and parse. Despite the importance on headings, 10.67% of pages have no heading tags at all. - +`````` The median number of heading elements per page is 10, with 30 words (on mobile) and 32 words (on desktop) used in headings. This implies that the websites that utilize headings put significant effort in making sure that their pages are readable, descriptive, and clearly outline the page structure and context to search engine bots. @@ -52,7 +52,7 @@ In terms of specific heading length, the median length of the first `H1` element For advice on how to handle `H1`s and headings for SEO and accessibility, take a look at this [video response by John Mueller](https://www.youtube.com/watch?v=zyqJJXWk0gk) in the Ask Google Webmasters series. - +`````` ### Meta tags @@ -64,7 +64,7 @@ Page titles are an important way of communicating the purpose of a page to a use Even though [Google usually displays the first 50-60 characters of a page title](https://moz.com/learn/seo/title-tag) within a SERP, the median length of the `` tag was only 21 characters for mobile pages and 20 characters for desktop pages. Even the 75th percentile is still below the cutoff length. This suggests that some SEOs and content writers aren't making the most of the space allocated to them by search engines for describing their home pages in the SERPs. -<graph histogram length <title> Source: 10.07b, column C, desktop & mobile> +```<graph histogram length <title> Source: 10.07b, column C, desktop & mobile>``` #### Meta descriptions @@ -72,7 +72,7 @@ Compared to the `<title>` tag, fewer pages were detected to have a meta descript The median meta description length was also lower than the [recommended length of 155-160 characters](https://moz.com/learn/seo/meta-description), with desktop pages having descriptions of 123 characters. Interestingly, meta descriptions were consistently longer on mobile than on desktop, despite mobile SERPs traditionally having a shorter pixel limit. This limit has only been extended recently, so perhaps more website owners have been testing the impact of having longer, more descriptive meta descriptions for mobile results. -<graph histogram length <meta description> Source: 10.07c, column C, desktop & mobile> +```<graph histogram length <meta description> Source: 10.07c, column C, desktop & mobile>``` #### Image alt tags @@ -125,9 +125,9 @@ The number of internal and external links included on desktop pages were consist It's important to bear in mind that fewer internal links on the mobile version of a page [might cause an issue](https://moz.com/blog/internal-linking-mobile-first-crawl-paths) for your website. With [mobile-first indexing](https://www.deepcrawl.com/knowledge/white-papers/mobile-first-index-guide/), which for new websites is the default for Google, if a page is only linked from the desktop version and not present on the mobile version, search engines will have a much harder time discovering and ranking it. -<graph histogram count of links by type Source: 10.10, column C desktop only> +```<graph histogram count of links by type Source: 10.10, column C desktop only>``` -<graph histogram count of links by type Source: 10.10, column D, E, desktop only> +```<graph histogram count of links by type Source: 10.10, column D, E, desktop only>``` The median desktop page includes 70 internal (same-site) links, whereas the median mobile page has 60 internal links. The median number of external links per page follows a similar trend, with desktop pages including 10 external links, and mobile pages including 8. @@ -149,7 +149,7 @@ A fast-loading website is also crucial for a good user experience. Users that ha The metrics we used for our analysis of load speed across the web is based on the [Chrome UX Report](../methodology#chrome-ux-report) (CrUX), which collects data from real-world Chrome users. This data shows that an astonishing 63.47% of websites are labelled as **slow**. Split by device, this picture is even bleaker for tablet (82.00%) and phone (77.61%). In the context of our results, per the [PageSpeed Insights classification system](https://developers.google.com/speed/docs/insights/v5/about#categories), a slow website is defined as having 10% of First Contentful Paint (FCP) experiences taking over 2,500 ms or 5% of First Input Delay (FID) experiences measuring over 250 ms. -<graph data 10.15b: CruX image similar to [IMG](https://developers.google.com/web/updates/images/2018/08/crux-dash-fcp.png) per device + speed label> +```<graph data 10.15b: CruX image similar to [IMG](https://developers.google.com/web/updates/images/2018/08/crux-dash-fcp.png) per device + speed label>``` Although the numbers are bleak for the speed of the web, the good news is that SEO experts and tools have been focusing more and more on the technical challenges of speeding up websites. You can learn more about the state of web performance in the [Performance chapter](../performance). @@ -185,9 +185,9 @@ Internationalization is one of the most complex aspects of SEO, even [according While 38.40% of desktop sites (33.79% on mobile) have the HTML lang attribute set to English, only 7.43% (6.79% on mobile) of the sites also contain an `hreflang` link to another language version. This suggests that the vast majority of websites that we analyzed don't offer separate versions of their home page that would require language targeting -- unless these separate versions do exist, but haven't been configured correctly. -<graph 10.04b - [do we want to chart this data, e.g. what does it really mean for SEO?]> +```<graph 10.04b - [do we want to chart this data, e.g. what does it really mean for SEO?]>``` -<include a chart of the languages and country combinations found, SEOs will want to see this breakdown> +```<include a chart of the languages and country combinations found, SEOs will want to see this breakdown>``` Next to English, the most common languages are French, Spanish, and German. These are followed by languages targeted towards specific geographies like English for Americans (`en-us`) or more obscure combinations like Spanish for the Irish (`es-ie`). diff --git a/src/content/en/2019/third-parties.md b/src/content/en/2019/third-parties.md index a65163d589f..fdbba5cfcf9 100644 --- a/src/content/en/2019/third-parties.md +++ b/src/content/en/2019/third-parties.md @@ -47,9 +47,9 @@ This chapter divides third-party providers into one of these broad categories. A Third-party code is everywhere. 93% of pages include at least one third-party resource, 76% of pages issue a request to an analytics domain, the median page requests content from at least 9 _unique_ third-party domains that represent 35% of their total network activity, and the most active 10% of pages issue a whopping 175 third-party requests or more. It’s not a stretch to say that third parties are an integral part of the web. -<insert stylized value of metric 05_01> +```<insert stylized value of metric 05_01>``` -<insert stylized value of metric 05_02> +```<insert stylized value of metric 05_02>``` ### Categories @@ -57,7 +57,7 @@ If the ubiquity of third-party content is unsurprising, perhaps more interesting While advertising might be the most user-visible example of third-party presence on the web, analytics providers are the most common third-party category with 76% of sites including at least one analytics request. CDNs at 63%, ads at 57%, and developer utilities like Sentry, Stripe, and Google Maps SDK at 56% follow up as a close second, third, and fourth for appearing on the most web properties. The popularity of these categories forms the foundation of our web usage patterns identified later in the chapter. -<insert graphic of metric 05_11> +```<insert graphic of metric 05_11>``` ### Providers @@ -65,15 +65,15 @@ A relatively small set of providers dominate the third-party landscape, the top While much could be said about every individual provider’s popularity and performance impact, this more opinionated analysis is left as an exercise for the reader and other purpose-built tools such as [third-party-web](https://thirdpartyweb.today). -<insert table of metric 05_06> +```<insert table of metric 05_06>``` -<insert table of metric 05_09> +```<insert table of metric 05_09>``` ### Resource Types The resource type breakdown of third-party content also lends insight into how third-party code is used across the web. While first-party requests are 56% images, 23% script, 14% CSS, and only 4% HTML, third-party requests skew more heavily toward script and HTML at 32% script, 34% images, 12% HTML, and only 6% CSS. While this suggests that third-party code is less frequently used to aid the design and instead used more frequently to facilitate or observe interactions than first-party code, a breakdown of resource types by party status tells a more nuanced story. While CSS and images are dominantly first-party at 70% and 64% respectively, fonts are largely served by third-party providers with only 28% being served from first-party sources. This concept of usage patterns is explored in more depth later in this chapter. -<insert graphic of metric 05_03> +```<insert graphic of metric 05_03>``` Several other amusing factoids jump out from this data. Tracking pixels (image requests to analytics domains) make up 1.6% of all network requests, six times as many video requests are to social networks like Facebook and Twitter than dedicated video providers like YouTube and Vimeo (presumably because the default YouTube embed consists of HTML and a preview thumbnail but not an autoplaying video), and there are still more requests for first-party images than all scripts combined. @@ -81,7 +81,7 @@ Several other amusing factoids jump out from this data. Tracking pixels (image r 49% of all requests are third-party. At 51%, first-party can still narrowly hold on to the crown in 2019 of comprising the majority of the web resources. Given that just under half of all the requests are third-party yet a small set of pages do not include any at all, the most active third-party users must be doing quite a bit more than their fair share. Indeed, at the 75th, 90th, and 99th percentiles we see nearly all of the page being comprised of third-party content. In fact, for some sites heavily relying on distributed WYSIWYG platforms like Wix and SquareSpace, the root document might be the sole first-party request! -<insert graphic of metric 05_11> +```<insert graphic of metric 05_11>``` The number of requests issued by each third-party provider also varies considerably by category. While analytics are the most widespread third-party category across websites, they account for only 7% of all third-party network requests. Ads, on the other hand, are found on nearly 20% fewer sites yet make up 25% of all third-party network requests. Their outsized resource impact compared to their popularity will be a theme we continue to uncover in the remaining data. @@ -91,27 +91,27 @@ While 49% of requests are third-party, their share of the web in terms of bytes Despite serving 57% of scripts, third parties comprise 64% of script bytes. meaning their scripts are larger on average than first-party scripts. This is an early warning sign for their performance impact to come in the next few sections. -<insert graphic of metric 05_04> +```<insert graphic of metric 05_04>``` -<insert graphic of metric 05_12> +```<insert graphic of metric 05_12>``` As for specific third-party providers, the same juggernauts topping the request count leaderboards make their appearance in byte weight as well. The only few notable movements are the large, media-heavy providers such as YouTube, Shopify, and Twitter which climb to the top of the byte impact charts. -<insert table of metric 05_07> +```<insert table of metric 05_07>``` ### Script Execution 57% of script execution time is from third-party scripts, and the top 100 domains already account for 48% of all script execution time on the web. This underscores just how large an impact a select few entities really have on web performance. This topic is explored more in depth in the [Repercussions > Performance](#performance) section. -<insert graphic of metric 05_05> +```<insert graphic of metric 05_05>``` -<insert graphic of metric 05_13> +```<insert graphic of metric 05_13>``` The category breakdowns among script execution largely follow that of resource counts. Here too advertising looms largest. Ad scripts comprise 25% of third-party script execution time with hosting and social providers in a distant tie for second at 12%. -<insert table of metric 05_08> +```<insert table of metric 05_08>``` -<insert table of metric 05_10> +```<insert table of metric 05_10>``` While much could be said about every individual provider’s popularity and performance impact, this more opinionated analysis is left as an exercise for the reader and other purpose-built tools such as the previously mentioned [third-party-web](https://thirdpartyweb.today). diff --git a/src/templates/en/2019/chapter.html b/src/templates/en/2019/chapter.html index 7e7d56f8984..266aa0ccbf8 100644 --- a/src/templates/en/2019/chapter.html +++ b/src/templates/en/2019/chapter.html @@ -1,4 +1,4 @@ -{# IMPORTANT! +<!-- IMPORTANT! - `chapter.html` is a "template for templates" used by the `generate_chapters.js` script, hence the strange template syntax (eg, mixing ejs and jinja syntax) - if you want to modify `chapter.html`, you must also: @@ -6,7 +6,7 @@ - run the generation script to update each chapter template - if you want to modify the chapter templates (eg `src/templates/<lang>/<year>/chapters/<chapter>.html`): - make changes to the markdown content directly (`src/content/<lang>/<year>/<chapter>.md`) because any changes to the chapter templates will be overwritten by the generation script -#} +--> {% extends "en/2019/base_chapter.html" %} From 4759869662f9ab1697ffc8b5ed2ad0dbbbbe0249 Mon Sep 17 00:00:00 2001 From: Mike Geyser <mikegeyser@gmail.com> Date: Tue, 29 Oct 2019 08:36:43 +0200 Subject: [PATCH 11/15] Added some error handling, so that if chapter generation fails - the processing will continue onto the next chapter. --- src/tools/generate/generate_chapters.js | 17 +++++++++++------ 1 file changed, 11 insertions(+), 6 deletions(-) diff --git a/src/tools/generate/generate_chapters.js b/src/tools/generate/generate_chapters.js index b512a945710..1b7a0e85c45 100644 --- a/src/tools/generate/generate_chapters.js +++ b/src/tools/generate/generate_chapters.js @@ -12,14 +12,19 @@ const generate_chapters = async () => { for (let language of await fs.readdir('content')) { for (let year of await fs.readdir(`content/${language}`)) { for (let file of await fs.readdir(`content/${language}/${year}`)) { - let markdown = await fs.readFile(`content/${language}/${year}/${file}`, 'utf-8'); - let chapter = file.replace('.md', ''); + try { + let markdown = await fs.readFile(`content/${language}/${year}/${file}`, 'utf-8'); + let chapter = file.replace('.md', ''); - console.log(`\n Generating chapter: ${language}, ${year}, ${chapter}`); + console.log(`\n Generating chapter: ${language}, ${year}, ${chapter}`); - let { metadata, body, toc } = await parse_file(markdown); + let { metadata, body, toc } = await parse_file(markdown); - await write_template(language, year, chapter, metadata, body, toc); + await write_template(language, year, chapter, metadata, body, toc); + } catch (error) { + console.error(error); + console.error(' Failed to generate chapter, moving onto the next one. '); + } } } } @@ -49,7 +54,7 @@ const write_template = async (language, year, chapter, metadata, body, toc) => { const path = `templates/${language}/${year}/chapters/${chapter}.html`; let html = await ejs.renderFile(template, { metadata, body, toc }); - let fomatted_html = prettier.format(html, { parser: "html" }); + let fomatted_html = prettier.format(html, { parser: 'html' }); await fs.outputFile(path, fomatted_html, 'utf8'); From eb25cc8dd706193501ee5bc51f0b46929fa756f3 Mon Sep 17 00:00:00 2001 From: Mike Geyser <mikegeyser@gmail.com> Date: Tue, 29 Oct 2019 09:22:55 +0200 Subject: [PATCH 12/15] Combined both sets of comments so that jinja will strip the body of the code comment, but prettier won't try and reformat it. --- src/templates/en/2019/chapter.html | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/templates/en/2019/chapter.html b/src/templates/en/2019/chapter.html index 266aa0ccbf8..522a316131a 100644 --- a/src/templates/en/2019/chapter.html +++ b/src/templates/en/2019/chapter.html @@ -1,4 +1,4 @@ -<!-- IMPORTANT! +<!--{# IMPORTANT! - `chapter.html` is a "template for templates" used by the `generate_chapters.js` script, hence the strange template syntax (eg, mixing ejs and jinja syntax) - if you want to modify `chapter.html`, you must also: @@ -6,7 +6,7 @@ - run the generation script to update each chapter template - if you want to modify the chapter templates (eg `src/templates/<lang>/<year>/chapters/<chapter>.html`): - make changes to the markdown content directly (`src/content/<lang>/<year>/<chapter>.md`) because any changes to the chapter templates will be overwritten by the generation script ---> +#}--> {% extends "en/2019/base_chapter.html" %} From c52dbc77cbb6b594808a4a37e9eb568830abc7a2 Mon Sep 17 00:00:00 2001 From: Rick Viscomi <rviscomi@gmail.com> Date: Tue, 29 Oct 2019 13:54:18 -0400 Subject: [PATCH 13/15] generate chapters --- src/templates/en/2019/chapters/http2.html | 1721 ++++++++++++----- src/templates/en/2019/chapters/markup.html | 1043 +++++++--- .../en/2019/chapters/performance.html | 796 ++++++-- src/templates/en/2019/chapters/pwa.html | 616 ++++-- src/templates/en/2019/chapters/seo.html | 831 ++++++++ .../en/2019/chapters/third-parties.html | 625 ++++-- 6 files changed, 4423 insertions(+), 1209 deletions(-) create mode 100644 src/templates/en/2019/chapters/seo.html diff --git a/src/templates/en/2019/chapters/http2.html b/src/templates/en/2019/chapters/http2.html index 4899295953f..8cc58b7e4d0 100644 --- a/src/templates/en/2019/chapters/http2.html +++ b/src/templates/en/2019/chapters/http2.html @@ -1,540 +1,1209 @@ -{# IMPORTANT! +<!--{# IMPORTANT! -- `chapter.html` is a "template for templates" used by the `generate_chapters.py` script, hence the strange template syntax (eg, double braces `{% ... %}`) +- `chapter.html` is a "template for templates" used by the `generate_chapters.js` script, hence the strange template syntax (eg, mixing ejs and jinja syntax) - if you want to modify `chapter.html`, you must also: - translate the corresponding language-specific templates (eg `src/templates/<lang>/<year>/chapter.html`) - run the generation script to update each chapter template - if you want to modify the chapter templates (eg `src/templates/<lang>/<year>/chapters/<chapter>.html`): - make changes to the markdown content directly (`src/content/<lang>/<year>/<chapter>.md`) because any changes to the chapter templates will be overwritten by the generation script -#} +#}--> -{% extends "en/2019/base_chapter.html" %} +{% extends "en/2019/base_chapter.html" %} {% block styles %} {{ super() }} +<link rel="stylesheet" href="/static/css/chapter.css" /> +{% endblock %} {% set metadata = +{"part_number":"IV","chapter_number":20,"title":"HTTP/2","authors":["tunetheweb"],"reviewers":["bagder"," +rmarx"," dotjs"]} %} {% block main %} +<aside> + <ul> + <li> + <a href="#introduction">Introduction</a> + </li> -{% block styles %} -{{ super() }} -<link rel="stylesheet" href="/static/css/chapter.css"> -{% endblock %} + <li> + <a href="#what-is-http2">What is HTTP/2?</a> + </li> + + <li> + <a href="#adoption-of-http2">Adoption of HTTP/2</a> + </li> + + <li> + <a href="#impact-of-http2">Impact of HTTP/2</a> + </li> + + <li> + <a href="#http2-push">HTTP/2 Push</a> + </li> -{% set metadata = {'part_number': 'IV', 'chapter_number': 20, 'title': 'HTTP/2', 'authors': ['bazzadp'], 'reviewers': ['bagder', 'rmarx', 'dotjs']} %} + <li> + <a href="#issues">Issues</a> + </li> + + <li> + <a href="#http3">HTTP/3</a> + </li> + + <li> + <a href="#conclusion">Conclusion</a> + </li> + </ul> +</aside> -{% block main %} <section class="main"> <h1 class="chapter-title">{{ metadata.get('title') }}</h1> - <img src="/static/images/2019/{{ get_chapter_image_dir(metadata) }}/hero_lg.jpg" class="chapter-hero"> - <p><img src="https://github.com/HTTPArchive/almanac.httparchive.org/raw/master/src/static/images//2019/20_HTTP_2/hero_xl.jpg" alt=""></p> -<h2>Introduction</h2> -<p>HTTP/2 was the first major update to the main transport protocol of the web in nearly 20 years. It arrived with a wealth of expectations: it promised a free performance boost with no downsides. More than that - we could stop doing all the hacks and work arounds that HTTP/1.1 forced us into to get around its inefficiencies. Bundling, spriting, inlining and even sharding domains would all become anti-patterns in an HTTP/2 world, giving performance by default. This meant even those without the skills and resources to concentrate on web performance would suddenly have performant websites. The reality has been, as ever, a little more nuanced than that. It has been over four years since its formal approval as a <a href="https://tools.ietf.org/html/rfc7540">standard in May 2015 as RFC 7540</a>, so now is a good time to look over how this relatively new technology has fared in the real world.</p> -<h2>What is HTTP/2?</h2> -<p>For those not familiar with the technology a bit of background is helpful to make the most of the metrics and findings in this chapter. Up until recently HTTP has always been a text-based protocol. An HTTP client like a web browser opened a TCP connection to a server, and then sent an HTTP command like <code>GET /index.html</code> to ask for a resource. This was enhanced in HTTP/1.0 to add <em>HTTP headers</em> so various pieces of meta data could be made in addition to the request (what browser it is, formats it understands...etc.). These HTTP headers were also text-based and separated by newline characters. Servers parsed the incoming requests by reading the request and any HTTP headers line by line, and then the server responded, with its own HTTP response headers and the actual resource being requested. The protocol seemed simple, but that also meant certain limitations - the main one being that HTTP was basically synchronous: once an HTTP request had been sent, the whole TCP connection was basically off limits for anything else until the response had come back and been read and processed. This was incredibly inefficient and required multiple TCP connections (browsers typically use 6) to allow a limited form of parallelization. That in itself brings its own issues as TCP connections take time and resources to set up and get to full efficiency, especially when using HTTPS which is standard nowadays. HTTP/1.1 improved it somewhat allowing reuse of TCP connections for subsequent requests but still did not solve the parallelization issue.</p> -<p>Despite HTTP being text-based, the reality is that it was rarely used to transport text, at least in it's raw format. While it was true that HTTP headers were still text, the payloads themselves often were not. Text files like HTML, JS and CSS are usually <a href="#">compressed</a> for transport into a binary format using gzip, brotli or similar and non-text files like images, videos... etc. are served in their own formats. The whole HTTP message is then often wrapped in HTTPS to encrypt the messages for security reasons. So, the web had basically moved on from text-based transport a long time ago, but HTTP had not. One reason for this stagnation was because it was so difficult to introduce any breaking changes to such a ubiquitous protocol like HTTP (previous efforts had tried and failed). Many routers, firewalls and other middleboxes understand HTTP and would react badly to major changes to it. Upgrading them all to support a new version is simply not possible.</p> -<p>In 2009 Google announced they were working on an alternative to the text-based HTTP called SPDY. This would take advantage of the fact that HTTP messages were often encrypted in HTTPS which prevents them being read and interfered with en route. Google controlled one of the most popular browsers (Chrome) and some of the most popular websites (Google, YouTube, Gmail...etc.) - so both ends of the connection. Google's idea was to pack HTTP messages into a proprietary format, send them across the internet, and then unpacked them on the other side. The proprietary format (SPDY) was binary-based rather than text-based which solved some of the main performance problems with HTTP/1.1 by allowing more efficient use of a single TCP connection, negating the need to open the 6 connections that had become the norm under HTTP/1.1. By using SPDY in the real world they were able to prove it was more performant for real users, and not just because of some lab-based experimental results. After rolling out SPDY to all Google websites, other servers and browser started implementing it, and then it was time to standardize this proprietary format into an internet standard and thus HTTP/2 was born.</p> -<p>HTTP/2 has the following key concepts:</p> -<ul> -<li>Binary format</li> -<li>Multiplexing</li> -<li>Flow Control</li> -<li>Prioritization</li> -<li>Header compression</li> -<li>Push</li> -</ul> -<p><em>Binary format</em>, means that HTTP/2 messages are wrapped into <em>frames</em> of a pre-defined format. This means HTTP messages are easier to parse and no longer require scanning for newline characters. This is better for security as there <a href="https://www.owasp.org/index.php/HTTP_Response_Splitting">were a number of exploits for previous versions of HTTP</a>. It also means HTTP/2 connections can be <em>multiplexed</em>: different frames for different <em>streams</em> can be sent on the same connection without interfering with each other as each Frame includes a Stream Identifier and its length. Multiplexing allows much more efficient use of a single TCP connection without the overhead of opening additional connections. Ideally we would open a single connection per domain (<a href="https://daniel.haxx.se/blog/2016/08/18/http2-connection-coalescing/">or even for multiple domains</a>!).</p> -<p>Having separate streams does introduce some complexities along with some potential benefits. HTTP/2 needs the concept of <em>flow control</em> to allow the different streams to send data at different rates, whereas previously, with only one response in flight at any one time, this was controlled at a connection level by TCP flow control. <em>Prioritization</em> similarly allows multiple requests to be sent together but with the most important requests getting more of the bandwidth.</p> -<p>Finally, HTTP/2 introduced two new concepts: <em>header compression</em> allowed those text-based HTTP headers to be sent more efficiently (using an HTTP/2-specific <em><a href="https://tools.ietf.org/html/rfc7541">HPACK</a></em> format for security reasons) and <em>HTTP/2 push</em> allowed more than one response to be sent in answer to a request. This allowed the server to "push" resources before a client was even aware it needed them. Push was supposed to solve the performance workaround of having to inline resources like CSS and JavaScript directly into HTML to prevent holding up the page while those resources were requested. With HTTP/2 the CSS and JavaScript could remain as external files but be pushed along with the initial HTML, so they were available immediately. Subsequent page requests would not push these resources, since they would now be cached, and so would not waste bandwidth.</p> -<p>This whistle-stop tour of HTTP/2 gives the main history and concepts of the newish protocol. As should be apparent from this explanation, the main benefit of HTTP/2 is to address performance limitations of the HTTP/1.1 protocol. There were also security improvements as well - perhaps most importantly in being to address performance issues of using HTTPS since HTTP/2, even over HTTPS, is <a href="https://www.httpvshttps.com/">often much faster than plain HTTP</a>. Other than the web browser packing the HTTP messages into the new binary format, and the web server unpacking it at the other side, the core basics of HTTP itself stayed roughly the same. This means web applications do not need to make any changes to support HTTP/2 as the browser and server take care of this. Turning it on should be a free performance boost and because of this adoption should be relatively easy. Of course, there are ways web developers can optimize for HTTP/2 to take full advantage of how it differs.</p> -<h2>Adoption of HTTP/2</h2> -<p>As mentioned above, Internet protocols are often difficult to adopt since they are ingrained into so much of the infrastructure that makes up the internet. This makes introducing any changes slow and difficult. IPv6 for example has been around for 20 years but has <a href="https://www.google.com/intl/en/ipv6/statistics.html">struggled to be adopted</a>. HTTP/2 however, was different as it was effectively hidden in HTTPS (at least for the browser uses cases) removing barriers to adoption as long as both the browser and server supported it. Browser support has been very strong for some time and the advent of auto updating <em>evergreen</em> browsers has meant that an estimated <a href="https://caniuse.com/#feat=http2">95% of global users support HTTP/2 now</a>. For this Web Almanac we use HTTP Archive which runs a Chrome web crawler on the approximately 5 million top websites (on both Desktop and Mobile with a slightly different set for each). This shows that HTTP/2 usage is now the majority protocol - an impressive feat just 4 short years after formal standardization:</p> -<p><img src="https://github.com/bazzadp/almanac.httparchive.org/raw/http2_chapter/src/static/images/2019/20_HTTP_2/http2usage.png" alt=""></p> -<p><strong>Figure 1 - <a href="https://httparchive.org/reports/state-of-the-web#h2">HTTP/2 usage by request</a></strong></p> -<p>Looking at the breakdown of all HTTP versions by request we see the following:</p> -<table> -<thead><tr> -<th>Protocol</th> -<th>Desktop</th> -<th>Mobile</th> -<th>Both</th> -</tr> -</thead> -<tbody> -<tr> -<td></td> -<td>5.60%</td> -<td>0.57%</td> -<td>2.97%</td> -</tr> -<tr> -<td>HTTP/0.9</td> -<td>0.00%</td> -<td>0.00%</td> -<td>0.00%</td> -</tr> -<tr> -<td>HTTP/1.0</td> -<td>0.08%</td> -<td>0.05%</td> -<td>0.06%</td> -</tr> -<tr> -<td>HTTP/1.1</td> -<td>40.36%</td> -<td>45.01%</td> -<td>42.79%</td> -</tr> -<tr> -<td>HTTP/2</td> -<td>53.96%</td> -<td>54.37%</td> -<td>54.18%</td> -</tr> -</tbody> -</table> -<p><strong>Figure 2 - HTTP version usage by request</strong></p> -<p>This shows that HTTP/1.1 and HTTP/2 are the versions used by the vast majority of requests as expected. There are only a very small number of requests on the older HTTP/1.0 and HTTP/0.9 protocols. Annoyingly there is a larger percentage where the protocol was not correctly tracked by the HTTP Archive crawl, particularly on desktop. Digging into this has shown various reasons, some of which I can explain and some of which I can't. Based on spot checks they mostly appear to be HTTP/1.1 requests and, assuming they are, desktop and mobile usage is similar. Despite there being a little larger percentage of noise than I'd like, it doesn't alter the overall message being conveyed here. Other than that, the mobile/desktop similarity is not unexpected - the HTTP Archive crawls using Chrome which supports HTTP/2 for both desktop and mobile. Real world usage may have slightly different stats with some older usage of browsers on both but even then support is widespread so I would not expect a large variation between desktop and mobile.</p> -<p>At present the HTTP Archive does not track HTTP over QUIC (soon to be standardized as HTTP/3) separately, so these are listed under HTTP/2 but we'll look at other ways of measuring that later in this chapter.</p> -<p>Looking at the number of requests will skew the results somewhat due to popular requests. For example, many sites load Google Analytics, which does support HTTP/2, and so would show as an HTTP/2 request even if the embedding site itself does not support HTTP/2. On the other hand, popular websites (that tend to support HTTP/2) are also underrepresented in the above stats as they are only measured once (e.g. google.com and obscuresite.com are given equal weighting). There are lies, damn lies and statistics. However, looking at other sources (for example the <a href="https://telemetry.mozilla.org/new-pipeline/dist.html#!cumulative=0&measure=HTTP_RESPONSE_VERSION">Mozilla telemetry</a> which looks at real-world usage through the Firefox browser) shows similar statistics.</p> -<p>It is still interesting to look at home pages only to get a rough figure on the number of sites that support HTTP/2 (at least on their home page). Figure 3 shows less support than overall requests, as expected, at around 36%:</p> -<table> -<thead><tr> -<th>Protocol</th> -<th>Desktop</th> -<th>Mobile</th> -<th>Both</th> -</tr> -</thead> -<tbody> -<tr> -<td></td> -<td>0.09%</td> -<td>0.08%</td> -<td>0.08%</td> -</tr> -<tr> -<td>HTTP/1.0</td> -<td>0.09%</td> -<td>0.08%</td> -<td>0.09%</td> -</tr> -<tr> -<td>HTTP/1.1</td> -<td>62.36%</td> -<td>63.92%</td> -<td>63.22%</td> -</tr> -<tr> -<td>HTTP/2</td> -<td>37.46%</td> -<td>35.92%</td> -<td>36.61%</td> -</tr> -</tbody> -</table> -<p><strong>Figure 3 - HTTP version usage for home pages</strong></p> -<p>HTTP/2 is only supported by browsers over HTTPS, even though officially HTTP/2 can be used over HTTPS or over unencrypted non-HTTPS connections. As mentioned previously, hiding the new protocol in encrypted HTTPS connections prevents networking appliances which do not understand this new protocol from interfering with (or rejecting!) its usage. Additionally, the HTTPS handshake allows an easy method of the client and server agreeing to use HTTP/2. The web is moving to HTTPS and HTTP/2 turns the traditional argument of HTTPS being bad for performance almost completely on its head. Not every site has made the transition to HTTPS, so HTTP/2 will not even be available to those that have not. Looking at just those sites that use HTTPS, we do see a higher percentage support HTTP/2 at around 55% - similar to the first <em>all requests</em> statistic we started with:</p> -<table> -<thead><tr> -<th>Protocol</th> -<th>Desktop</th> -<th>Mobile</th> -<th>Both</th> -</tr> -</thead> -<tbody> -<tr> -<td></td> -<td>0.09%</td> -<td>0.10%</td> -<td>0.09%</td> -</tr> -<tr> -<td>HTTP/1.0</td> -<td>0.06%</td> -<td>0.06%</td> -<td>0.06%</td> -</tr> -<tr> -<td>HTTP/1.1</td> -<td>45.81%</td> -<td>44.31%</td> -<td>45.01%</td> -</tr> -<tr> -<td>HTTP/2</td> -<td>54.04%</td> -<td>55.53%</td> -<td>54.83%</td> -</tr> -</tbody> -</table> -<p><strong>Figure 4 - HTTP version usage for HTTPS home pages</strong></p> -<p>We have shown that browser support is strong, and there is a safe road to adoption, so why does every site (or at least every HTTPS site) not support HTTP/2? Well here we come to the final item for support we have not measured yet: server support. This is more problematic than browser support as, unlike modern browsers, servers often do not automatically upgrade to the latest version. Even when the server is regularly maintained and patched that will often just apply security patches rather than new features like HTTP/2. Let us look first at the server HTTP header for those sites that do support HTTP/2:</p> -<table> -<thead><tr> -<th>Server</th> -<th>Desktop</th> -<th>Mobile</th> -<th>Both</th> -</tr> -</thead> -<tbody> -<tr> -<td>nginx</td> -<td>34.04%</td> -<td>32.48%</td> -<td>33.19%</td> -</tr> -<tr> -<td>cloudflare</td> -<td>23.76%</td> -<td>22.29%</td> -<td>22.97%</td> -</tr> -<tr> -<td>Apache</td> -<td>17.31%</td> -<td>19.11%</td> -<td>18.28%</td> -</tr> -<tr> -<td></td> -<td>4.56%</td> -<td>5.13%</td> -<td>4.87%</td> -</tr> -<tr> -<td>LiteSpeed</td> -<td>4.11%</td> -<td>4.97%</td> -<td>4.57%</td> -</tr> -<tr> -<td>GSE</td> -<td>2.16%</td> -<td>3.73%</td> -<td>3.01%</td> -</tr> -<tr> -<td>Microsoft-IIS</td> -<td>3.09%</td> -<td>2.66%</td> -<td>2.86%</td> -</tr> -<tr> -<td>openresty</td> -<td>2.15%</td> -<td>2.01%</td> -<td>2.07%</td> -</tr> -<tr> -<td>...</td> -<td>...</td> -<td>...</td> -<td>...</td> -</tr> -</tbody> -</table> -<p><strong>Figure 5 - Servers used for HTTP/2</strong></p> -<p>Nginx provides package repos that allow ease of installing or upgrading to the latest version, so it is no surprise to see it leading the way here. Cloudflare is the <a href="#">most popular CDNs</a> and enables HTTP/2 by default so again it is also not surprising to see this as a large percentage of HTTP/2 sites. Incidently, Cloudflare uses <a href="https://blog.cloudflare.com/nginx-structural-enhancements-for-http-2-performance/">a heavily customised version of nginx as their web server</a>. After this we see Apache at around 20% of usage, followed by some servers who choose to hide what they are and then the smaller players (LiteSpeed, IIS, Google Servlet Engine and openresty - which is nginx based).</p> -<p>What is more interesting is those sites that that do <em>not</em> support HTTP/2:</p> -<table> -<thead><tr> -<th>Server</th> -<th>Desktop</th> -<th>Mobile</th> -<th>Both</th> -</tr> -</thead> -<tbody> -<tr> -<td>Apache</td> -<td>46.76%</td> -<td>46.84%</td> -<td>46.80%</td> -</tr> -<tr> -<td>nginx</td> -<td>21.12%</td> -<td>21.33%</td> -<td>21.24%</td> -</tr> -<tr> -<td>Microsoft-IIS</td> -<td>11.30%</td> -<td>9.60%</td> -<td>10.36%</td> -</tr> -<tr> -<td></td> -<td>7.96%</td> -<td>7.59%</td> -<td>7.75%</td> -</tr> -<tr> -<td>GSE</td> -<td>1.90%</td> -<td>3.84%</td> -<td>2.98%</td> -</tr> -<tr> -<td>cloudflare</td> -<td>2.44%</td> -<td>2.48%</td> -<td>2.46%</td> -</tr> -<tr> -<td>LiteSpeed</td> -<td>1.02%</td> -<td>1.63%</td> -<td>1.36%</td> -</tr> -<tr> -<td>openresty</td> -<td>1.22%</td> -<td>1.36%</td> -<td>1.30%</td> -</tr> -<tr> -<td>...</td> -<td>...</td> -<td>...</td> -<td>...</td> -</tr> -</tbody> -</table> -<p><strong>Figure 6 - Servers used for HTTP/1.1 or lower</strong></p> -<p>Some of this will be non-HTTPS traffic that would use HTTP/1.1 even if the server supported HTTP/2, but a bigger issue is those that do not support HTTP/2. In these stats we see a much greater share for Apache and IIS which are likely running older versions. For Apache in particular it is often not easy to add HTTP/2 support to an existing installation as Apache does not provide an official repository to install this from. This often means resorting to compiling from source or trusting a third-party repo - neither of which is particularly appealing to many administrators. Only the latest versions of Linux distributions (RHEL and CentOS 8, Ubuntu 18 and Debian 9) come with a version of Apache which supports HTTP/2 and many servers are not running those yet. On the Microsoft side only Windows Server 2016 and above supports HTTP/2 so again those running older versions cannot support this in IIS. Merging these two stats together we can see the percentage of installs, of each server, that uses HTTP/2:</p> -<table> -<thead><tr> -<th>Server</th> -<th>Desktop</th> -<th>Mobile</th> -</tr> -</thead> -<tbody> -<tr> -<td>cloudflare</td> -<td>85.40%</td> -<td>83.46%</td> -</tr> -<tr> -<td>LiteSpeed</td> -<td>70.80%</td> -<td>63.08%</td> -</tr> -<tr> -<td>openresty</td> -<td>51.41%</td> -<td>45.24%</td> -</tr> -<tr> -<td>nginx</td> -<td>49.23%</td> -<td>46.19%</td> -</tr> -<tr> -<td>GSE</td> -<td>40.54%</td> -<td>35.25%</td> -</tr> -<tr> -<td></td> -<td>25.57%</td> -<td>27.49%</td> -</tr> -<tr> -<td>Apache</td> -<td>18.09%</td> -<td>18.56%</td> -</tr> -<tr> -<td>Microsoft-IIS</td> -<td>14.10%</td> -<td>13.47%</td> -</tr> -<tr> -<td>...</td> -<td>...</td> -<td>...</td> -</tr> -</tbody> -</table> -<p><strong>Figure 7 - percentage installs of each server used to provide HTTP/2</strong></p> -<p>It's clear Apache and IIS fall way behind with 18% and 14% of their installed based supporting HTTP/2, and this has to be at least in part, a consequence of it being more difficult to upgrade them. A full operating system upgrade is often required for many to get this support easily. Hopefully this will get easier as new versions of operating systems become the norm. None of this is a comment on the HTTP/2 implementations here (<a href="https://twitter.com/tunetheweb/status/988196156697169920?s=20">I happen to think Apache has one of the best implementations</a>), but more in the ease of enabling HTTP/2 in each of these servers - or lack thereof.</p> -<h2>Impact of HTTP/2</h2> -<p>The impact of HTTP/2 is a much more difficult to measure statistic, especially using the HTTP Archive methodology. Ideally sites should be crawled with both HTTP/1.1 and HTTP/2 and the difference measured but that is not possible with the statistics we are investigating here. Additionally, measuring whether the average HTTP/2 site is faster than the average HTTP/1.1 site introduces too many other variables that I feel requires a more exhaustive study than we can cover here.</p> -<p>One impact that can be measured is in the changing use of HTTP now we are in an HTTP/2 world. Multiple connections were a work around with HTTP/1.1 to allow a limited form of parallelization, but this is in fact the opposite of what usually works best with HTTP/2. A single connection reduces the overhead of TCP setup, TCP slow start, HTTPS negotiation and also allows the potential of cross-request prioritization. The HTTP Archive measures the number of TCP connections per page and that is dropping steadily as more sites support HTTP/2 and use its single connection instead of 6 separate connections:</p> -<p><img src="https://github.com/bazzadp/almanac.httparchive.org/raw/http2_chapter/src/static/images/2019/20_HTTP_2/TCPconnections.png" alt=""></p> -<p><strong>Figure 8 - <a href="https://httparchive.org/reports/state-of-the-web#tcp">TCP connections per page</a></strong></p> -<p>Bundling assets to obtain fewer requests was another HTTP/1.1 workaround that went by many names: bundling, concatenation, packaging, spriting, ... etc. It is less necessary when using HTTP/2 as there is less overhead with requests but it should be noted that requests are not free in HTTP/2 and <a href="https://engineering.khanacademy.org/posts/js-packaging-http2.htm">those that experimented with removing bundling completely have noticed a loss in performance</a>. Looking at the number of requests loaded by page over time, we do see a slight decrease in requests, rather than the expected increase:</p> -<p><img src="https://github.com/bazzadp/almanac.httparchive.org/raw/http2_chapter/src/static/images/2019/20_HTTP_2/numresources.png" alt=""></p> -<p><strong>Figure 9 - <a href="https://httparchive.org/reports/state-of-the-web#reqTotal">Total Requests per page</a></strong></p> -<p>This low rate of change can perhaps be attributed to the aforementioned observations that bundling cannot be removed (at least completely) without a negative performance impact and that many build tools currently bundle for historical reasons based on HTTP/1.1 recommendations. It is also likely that many sites may not be willing to penalize HTTP/1.1 users by undoing their HTTP/1.1 performance hacks just yet, or at least that they do not have the confidence (or time!) to feel this is worthwhile. That the number of requests is staying roughly static, and against the background of an ever increasing <a href="#">page weight</a> is interesting though perhaps not really related to HTTP/2.</p> -<h2>HTTP/2 Push</h2> -<p>HTTP/2 push has a mixed history despite being a much-hyped new feature of HTTP/2. The other features were basically under the hood performance improvements, but push was a brand-new concept that completely broke the single request to single response nature of HTTP up until then. It allowed extra responses to be returned: when you asked for the web page, the server could respond with the HTML page as usual, but then also send you the critical CSS and JavaScript, thus avoiding any additional round trips for certain resources. It would in theory allow us to stop inlining CSS and JavaScript into our HTML and yet still get the same performance gains of doing it. After solving that, it could potentially lead to all sorts of new and interesting use cases.</p> -<p>The reality has been... well, a bit disappointing. HTTP/2 push has proved much harder than originally envisaged to use effectively. Some of this has been due to <a href="https://jakearchibald.com/2017/h2-push-tougher-than-i-thought/">the complexity of how HTTP/2 push works</a>, and the implementation issues due to that. A bigger concern is that push can quite easily cause, rather than solve, performance issues. Over-pushing is a real risk. Often the browser is in the best place to decide <em>what</em> to request, and just as crucially <em>when</em> to request it but HTTP/2 push puts that responsibility on the server. Pushing resources that a browser already has in its cache, is a waste of bandwidth (though in my opinion so is inlining CSS but that gets must less of a hard time about that than HTTP/2 push!). <a href="https://lists.w3.org/Archives/Public/ietf-http-wg/2019JanMar/0033.html">Proposals to inform the server about the status of the browser cache have stalled</a> especially on privacy concerns. Even without that problem, there are other potential issues if push is not used correctly. For example, pushing large images and therefore holding up the sending of critical CSS and JavaScript will lead to slower websites than if you'd not pushed at all!</p> -<p>There has also been very little evidence to date that push, even when implemented correctly, results in the performance increase it promised. This is an area that again the HTTP Archive is not best placed to answer, due to the nature of how it runs (a month crawl of popular sites using Chrome in one state) so we won't delve into it too much here, but suffice to say that the performance gains are far from clear cut and the potential problems are real.</p> -<p>Putting that aside let's look at the usage of HTTP/2 push:</p> -<table> -<thead><tr> -<th>Client</th> -<th>Sites Using HTTP/2 Push</th> -<th>Sites Using HTTP/2 Push (%)</th> -</tr> -</thead> -<tbody> -<tr> -<td>Desktop</td> -<td>22,581</td> -<td>0.52%</td> -</tr> -<tr> -<td>Mobile</td> -<td>31,452</td> -<td>0.59%</td> -</tr> -</tbody> -</table> -<p><strong>Figure 10 - Sites using HTTP/2 push</strong></p> -<p>These status show that the uptick of HTTP/2 push is very low - most likely because of the issues described previously. However, when sites do use push, then tend to use it a lot rather than for one or two assets as shown in Figure 11:</p> -<table> -<thead><tr> -<th>Client</th> -<th>Avg Pushed Requests</th> -<th>Avg KB Pushed</th> -</tr> -</thead> -<tbody> -<tr> -<td>Desktop</td> -<td>7.86</td> -<td>162.38</td> -</tr> -<tr> -<td>Mobile</td> -<td>6.35</td> -<td>122.78</td> -</tr> -</tbody> -</table> -<p><strong>Figure 11 - How much is pushed when it is used</strong></p> -<p>This is a concern as previous advice has been to be conservative with push and to <a href="https://docs.google.com/document/d/1K0NykTXBbbbTlv60t5MyJvXjqKGsCVNYHyLEXIxYMv0/edit">"push just enough resources to fill idle network time, and no more"</a>. The above statistics suggest many resources, of a significant combined size are pushed. Looking at what is pushed we see the data in Figure 12:</p> -<p><img src="https://github.com/bazzadp/almanac.httparchive.org/raw/http2_chapter/src/static/images/2019/20_HTTP_2/whatpushisusedfor.png" alt=""></p> -<p><strong>Figure 12 - What asset types is push used for?</strong></p> -<p>JavaScript and then CSS are the overwhelming majority of pushed items, both by volume and by bytes. After this there is a rag tag assortment of images, fonts, data, ...etc. At the tail end we see around 100 sites pushing video - which may be intentional or may be a sign of over-pushing the wrong types of assets!</p> -<p>One concern raised by some, is that HTTP/2 implementations have repurposed the preload HTTP link header as a signal to push. One of the most popular uses of the preload <a href="#">resource hint</a> is to inform the browser of late-discovered resources like fonts and images, that the browser will not see until the CSS for example has been requested, downloaded and parsed. If these are now pushed based on that header, there was a concern that reusing this may result in a lot of unintended pushes. However, the relative low usage of fonts and images may mean that risk is not being seen as much as was feared. <code><link rel="preload" ... ></code> tags are often used in the HTML rather than HTTP link headers and the meta tags are not a signal to push. Statistics in the <a href="#">resource hint</a> chapter show that less than 1% of sites use the preload HTTP link header, and about the same amount use preconnection which has no meaning in HTTP/2, so this would suggest this is not so much of an issue. Though there are a number of fonts and other assets being pushed, which may be a signal of this. As a counter argument to those complaints, if an asset is important enough to preload, then it could be argued these assets should be pushed if possible as browsers treat a preload hints as very high priority requests anyway. Any performance concern is therefore (again arguably) at the overuse of preload, rather than the resulting HTTP/2 push that happens because of this.</p> -<p>To get around this unintended push, you can provide the <code>nopush</code> attribute in your preload header:</p> -<pre><code>link: </assets/jquery.js>; rel=preload; as=script; nopush -</code></pre> -<p>It looks like 5% of preload HTTP headers do make use of this attribute, which is higher than I would have expected as I would have considered this a niche optimization. Then again, so is the use of preload HTTP headers and/or HTTP/2 push itself!</p> -<h2>Issues</h2> -<p>HTTP/2 is mostly a seamless upgrade that, once your server supports it, you can switch on with no need to change your website or application. Of course, you can optimize for HTTP/2 or stop using HTTP/1.1 workarounds as much, but in general a site will usually work without needing any changes - but just be faster. There are a couple of gotchas to be aware of however that can impact any upgrade and some sites have found these out the hard way.</p> -<p>One cause of issues in HTTP/2 is the poor support of HTTP/2 prioritization. This feature allows multiple requests in progress to make the appropriate use of the connection. This is especially important since HTTP/2 has massively increased the number of requests that can be running on the same connection. 100 or 128 parallel requests limits are common in server implementations. Previously the browser had a max of 6 connections per domain and so used its skill and judgement to decide how best to use those connections. Now it rarely needs to queue and can send all requests as soon as it knows about them. This then can lead to the bandwidth being "wasted" on lower priority requests while critical requests are delayed (and incidentally <a href="https://www.lucidchart.com/techblog/2019/04/10/why-turning-on-http2-was-a-mistake/">can also lead to swamping your backend server with more requests than it is used to!</a>). HTTP/2 has a complex prioritization model (too complex many say - hence why it is being reconsidered for HTTP/3!) but few servers honor that properly. This can be because their HTTP/2 implementations are not up to scratch or because of so called <em>bufferbloat</em> where the responses are already en route before the server realizes there is a higher priority request. Due to the varying nature of servers, TCP stacks and locations it is difficult to measure this for most sites, but with CDNs this should be more consistent. <a href="https://twitter.com/patmeenan">Patrick Meenan</a> created <a href="https://github.com/pmeenan/http2priorities/tree/master/stand-alone">an example test page</a> which deliberately tries to download a load of low-priority, off-screen, images, before requesting some high priority on-screen images. A good HTTP/2 server should be able to recognize this and send the high priority images shortly after requested, at the expense of the lower priority images. A poor HTTP/2 server will just respond in the request order and ignore any priority signals. <a href="https://twitter.com/AndyDavies">Andy Davies</a> has <a href="https://github.com/andydavies/http2-prioritization-issues">a page tracking status of various CDNs for Patrick's test</a>. The HTTP Archive identifies when a CDN is used as part of its crawl and merging these two datasets that gives us the results shown in Figure 13:</p> -<table> -<thead><tr> -<th>CDN</th> -<th>Prioritizes Correctly?</th> -<th>Desktop</th> -<th>Mobile</th> -<th>Both</th> -</tr> -</thead> -<tbody> -<tr> -<td>Not using CDN</td> -<td>Unknown</td> -<td>57.81%</td> -<td>60.41%</td> -<td>59.21%</td> -</tr> -<tr> -<td>Cloudflare</td> -<td>Pass</td> -<td>23.15%</td> -<td>21.77%</td> -<td>22.40%</td> -</tr> -<tr> -<td>Google</td> -<td>Fail</td> -<td>6.67%</td> -<td>7.11%</td> -<td>6.90%</td> -</tr> -<tr> -<td>Amazon CloudFront</td> -<td>Fail</td> -<td>2.83%</td> -<td>2.38%</td> -<td>2.59%</td> -</tr> -<tr> -<td>Fastly</td> -<td>Pass</td> -<td>2.40%</td> -<td>1.77%</td> -<td>2.06%</td> -</tr> -<tr> -<td>Akamai</td> -<td>Pass</td> -<td>1.79%</td> -<td>1.50%</td> -<td>1.64%</td> -</tr> -<tr> -<td></td> -<td>Unknown</td> -<td>1.32%</td> -<td>1.58%</td> -<td>1.46%</td> -</tr> -<tr> -<td>WordPress</td> -<td>Pass</td> -<td>1.12%</td> -<td>0.99%</td> -<td>1.05%</td> -</tr> -<tr> -<td>Sucuri Firewall</td> -<td>Fail</td> -<td>0.88%</td> -<td>0.75%</td> -<td>0.81%</td> -</tr> -<tr> -<td>Incapsula</td> -<td>Fail</td> -<td>0.39%</td> -<td>0.34%</td> -<td>0.36%</td> -</tr> -<tr> -<td>Netlify</td> -<td>Fail</td> -<td>0.23%</td> -<td>0.15%</td> -<td>0.19%</td> -</tr> -<tr> -<td>OVH CDN</td> -<td>Unknown</td> -<td>0.19%</td> -<td>0.18%</td> -<td>0.18%</td> -</tr> -</tbody> -</table> -<p><strong>Figure 13 - HTTP/2 prioritization support in common CDNs</strong></p> -<p>This shows that a not insignificant portion of traffic is subject to the identified issue. How much of a problem this is, depends on exactly how your page loads and whether high priority resources are discovered late or not for your site, but it does show another complexity to take into considerations.</p> -<p>Another issue is with the <code>upgrade</code> HTTP header being used incorrectly. Web servers can respond to requests with an <code>upgrade</code> HTTP header suggesting that it supports a better protocol that the client might wish to use (e.g. advertise HTTP/2 to a client only using HTTP/1.1). You might think this would be useful as a way of informing the browser it supports HTTP/2 but since browsers only support HTTP/2 over HTTPS and since use of HTTP/2 can be negotiated through the HTTPS handshake, the use of this <code>upgrade</code> header for advertising HTTP/2 is pretty limited (to browsers at least). Worse than that, is when a server sends an upgrade header in error. This could be because an HTTP/2 supporting backend server is sending the header and then an HTTP/1.1-only edge server is blindly forwarding to the client. Apache emits the <code>upgrade</code> header when mod_http2 is enabled but HTTP/2 is not being used, and a nginx instance sitting in front of such an Apache happily forwards this header even when nginx does not support HTTP/2. This false advertising then leads to clients trying (and failing!) to use HTTP/2 as they are advised to. 108 site use HTTP/2 and yet suggest upgrading to HTTP/2 in this <code>upgrade</code> header. A further 12,767 sites on desktop (15,235 on mobile) suggest upgrading an HTTP/1.1 connection delivered over HTTPS to HTTP/2 when it's clear this was not available, or it would have been used already. These are a small minority of the 4.3 million sites crawled on desktop and 5.3 million sites crawled on mobile for these stats but it shows that this was still an issue affecting a number of sites out there. Browsers handle this inconsistently with Safari in particular attempting to upgrade and then getting itself in a mess and refusing to display the site at all. All this is before we get into sites recommending upgrading to <code>http1.0</code>, <code>http://1.1</code> or even <code>-all,+TLSv1.3,+TLSv1.2</code> (clearly some typos in web server configurations going on here!).</p> -<p>There are further implementation issues we could look at. For example, HTTP/2 is much stricter about HTTP header names, rejecting the whole request if you respond with spaces, colons or other invalid HTTP header names. The header names are also converted to lowercase which catches some by surprise if their application assumes a certain capitalization (which was never guaranteed previously as <a href="https://tools.ietf.org/html/rfc7230#section-3.2">HTTP/1.1 specifically states the header names are case insensitive</a>, but still some have depended on this). The HTTP Archive could potentially be used to identify these issues as well, though some of them will not be apparent on the home page so we did not delve into that this year.</p> -<h2>HTTP/3</h2> -<p>The world does not stand still and despite HTTP/2 not having even reached its official 5th birthday, people are already seeing it as old news and getting more excited about its successor: HTTP/3. HTTP/3 builds on the concepts of HTTP/2 but moves it from working over TCP connections that HTTP has always used to a UDP-based protocol called QUIC. This allows us to fix one edge case where HTTP/2 is slower then HTTP/1.1, when there is high packet loss and the guaranteed nature of TCP holds up all streams and throttles back all streams. It also allows us to address some TCP and HTTPS inefficiencies such as consolidating on one handshake for both, and supporting many ideas for TCP that have proven hard to implement in real life (TCP fast open, 0-RTT, ...etc.). HTTP/3 also cleans up some overlap between TCP and HTTP/2 (e.g. flow control being implemented in both layers) but conceptually it is very similar to HTTP/2. Web developers who understand and have optimized for HTTP/2 should have to make no further changes for HTTP/3. Server operators will have more work to do however as the differences between TCP and QUIC are much more groundbreaking. They will make implementation harder so the roll out of HTTP/3 may take considerably longer than HTTP/2 and initially be limited to those with certain expertise in the field (e.g. CDNs).</p> -<p>QUIC has been implemented by Google for a number of years and it is now undergoing a similar standardization process that SDPY did on its way to HTTP/2. At the end of 2018 it was decided to name the HTTP part of QUIC as HTTP/3 (in Google/s version of QUIC is was simply known as HTTP/2 even though it was not exactly the same as regular HTTP/2). QUIC has ambitions beyond just HTTP but for the moment it is the use case being worked on. Just as this chapter was being written, <a href="https://blog.cloudflare.com/http3-the-past-present-and-future/">Cloudflare, Chrome and Firefox all announced HTTP/3 support</a> despite the fact that HTTP/3 is still not formally complete or approved as a standard yet. This is welcome as QUIC support has been somewhat lacking outside of Google until recently and definitely lags SPDY and HTTP/2 support from a similar stage of standardization.</p> -<p>Because HTTP/3 uses QUIC over UDP rather than TCP it makes the discovery of HTTP/3 support a bigger challenge than HTTP/2 discovery. With HTTP/2 we can mostly use the HTTPS handshake, but as HTTP/3 is on a completely different connection that is not an option here. HTTP/2 did also use the <code>upgrade</code> HTTP header to inform of HTTP/2 support, and although that was not that useful for HTTP/2, a similar mechanism has been put in place for QUIC that is more useful. The <em>alternative services</em> HTTP header (<code>alt-svc</code>) advertises alternative protocols that can be used on completely different connections (as opposed to alternative protocols that can be used on this connection - which is what the <code>upgrade</code> HTTP header is used for). Analysis of this header shows that 7.67% of desktop sites and 8.38% of mobile sites already support QUIC (which roughly represents Google percentage of traffic unsurprisingly enough as it has been using this for a while), and 0.04% are already supporting <code>h3</code> (meaning HTTP/3) in this field. I would imagine by next year’s Almanac this number will have increased significantly.</p> -<h2>Conclusion</h2> -<p>This analysis of the available statistics in HTTP Archive has shown what many of us in the HTTP community were already aware of: HTTP/2 is here and proving very popular. It is already the dominant protocol in terms of number of request but has not quite overtaken HTTP/1.1 in terms of number of sites supported. The long tail of the internet means that it often takes an exponentially longer time to make noticeable gains on the less well-maintained sites than on the high profile, high volume sites.</p> -<p>We've also talked about how it is (still!) not easy to get HTTP/2 support in some installations. Server developers, operating system distributors and end customers all have a part to play in pushing to make that easier. Tying software to operating systems always lengthens deployment time - and in fact one of the very reasons for QUIC is to break a similar barrier with deploying TCP changes. In many instances there is no real reason to tie web server versions to operating systems. Apache (to use one of the more popular examples) will run with HTTP/2 support in older operating systems but getting an up to date version on to the server should not require the expertise or risk it currently does. Nginx does very well here hosting repositories for the common Linux flavors to make installation easier and if the Apache team (or the Linux distribution vendors) do not offer something similar, then I can only see Apache's usage continuing to shrink as it struggles to hold relevance and shake its reputation as old and slow - based on older installs - even though up to date versions have one of the best HTTP/2 implementations. I see that as less of an issue for IIS since it is usually the preferred web server on the Windows side.</p> -<p>Other than that, HTTP/2 has been a relatively easy upgrade path - which is why it has the strong uptake it has already seen. For the most part, it is a painless switch on and therefore, for most, it has turned out to be a hassle-free performance increase that requires little thought once your server supports it. The devil is in the details though (as always), and small differences between server implementations can result in better or worse HTTP/2 usage and ultimately end user experience. There have also been a number of bugs and even <a href="https://github.com/Netflix/security-bulletins/blob/master/advisories/third-party/2019-002.md">security issues</a>, as is to be expected with any new protocol. Ensuring you are using a strong, up to date, well maintained implementation of any newish protocol like HTTP/2 will ensure you stay on top of these issues. However, that can take expertise and managing. The roll out of QUIC and HTTP/3 will likely be even more complicated and require more expertise. Perhaps this is best left to third party service providers like CDNs who have this expertise and can give your site easy access to these features? However, even when left to the experts, this is not a sure thing (as the prioritization statistics show), but if you choose your server provider wisely and engage with them on what your priorities are, then it should be an easier implementation. And on that note it would be great if the CDNs prioritized the issue highlighted above (pun definitely intended!), though I suspect with the advent of a new prioritization method in HTTP/3, many will hold tight. The next year will prove yet more interesting times in the HTTP world.</p> - + <img + src="/static/images/2019/{{ get_chapter_image_dir(metadata) }}/hero_lg.jpg" + class="chapter-hero" + /> + <p> + <img + src="https://github.com/HTTPArchive/almanac.httparchive.org/raw/master/src/static/images//2019/20_HTTP_2/hero_xl.jpg" + alt="" + /> + </p> + <h2 id="introduction">Introduction</h2> + <p> + HTTP/2 was the first major update to the main transport protocol of the web + in nearly 20 years. It arrived with a wealth of expectations: it promised a + free performance boost with no downsides. More than that - we could stop + doing all the hacks and work arounds that HTTP/1.1 forced us into to get + around its inefficiencies. Bundling, spriting, inlining and even sharding + domains would all become anti-patterns in an HTTP/2 world, giving + performance by default. This meant even those without the skills and + resources to concentrate on web performance would suddenly have performant + websites. The reality has been, as ever, a little more nuanced than that. It + has been over four years since its formal approval as a + <a href="https://tools.ietf.org/html/rfc7540" + >standard in May 2015 as RFC 7540</a + >, so now is a good time to look over how this relatively new technology has + fared in the real world. + </p> + <h2 id="what-is-http2">What is HTTP/2?</h2> + <p> + For those not familiar with the technology a bit of background is helpful to + make the most of the metrics and findings in this chapter. Up until recently + HTTP has always been a text-based protocol. An HTTP client like a web + browser opened a TCP connection to a server, and then sent an HTTP command + like <code>GET /index.html</code> to ask for a resource. This was enhanced + in HTTP/1.0 to add <em>HTTP headers</em> so various pieces of meta data + could be made in addition to the request (what browser it is, formats it + understands…etc.). These HTTP headers were also text-based and separated by + newline characters. Servers parsed the incoming requests by reading the + request and any HTTP headers line by line, and then the server responded, + with its own HTTP response headers and the actual resource being requested. + The protocol seemed simple, but that also meant certain limitations - the + main one being that HTTP was basically synchronous: once an HTTP request had + been sent, the whole TCP connection was basically off limits for anything + else until the response had come back and been read and processed. This was + incredibly inefficient and required multiple TCP connections (browsers + typically use 6) to allow a limited form of parallelization. That in itself + brings its own issues as TCP connections take time and resources to set up + and get to full efficiency, especially when using HTTPS which is standard + nowadays. HTTP/1.1 improved it somewhat allowing reuse of TCP connections + for subsequent requests but still did not solve the parallelization issue. + </p> + <p> + Despite HTTP being text-based, the reality is that it was rarely used to + transport text, at least in it's raw format. While it was true that HTTP + headers were still text, the payloads themselves often were not. Text files + like HTML, JS and CSS are usually <a href="#">compressed</a> for transport + into a binary format using gzip, brotli or similar and non-text files like + images, videos… etc. are served in their own formats. The whole HTTP message + is then often wrapped in HTTPS to encrypt the messages for security reasons. + So, the web had basically moved on from text-based transport a long time + ago, but HTTP had not. One reason for this stagnation was because it was so + difficult to introduce any breaking changes to such a ubiquitous protocol + like HTTP (previous efforts had tried and failed). Many routers, firewalls + and other middleboxes understand HTTP and would react badly to major changes + to it. Upgrading them all to support a new version is simply not possible. + </p> + <p> + In 2009 Google announced they were working on an alternative to the + text-based HTTP called SPDY. This would take advantage of the fact that HTTP + messages were often encrypted in HTTPS which prevents them being read and + interfered with en route. Google controlled one of the most popular browsers + (Chrome) and some of the most popular websites (Google, YouTube, Gmail…etc.) + - so both ends of the connection. Google's idea was to pack HTTP messages + into a proprietary format, send them across the internet, and then unpacked + them on the other side. The proprietary format (SPDY) was binary-based + rather than text-based which solved some of the main performance problems + with HTTP/1.1 by allowing more efficient use of a single TCP connection, + negating the need to open the 6 connections that had become the norm under + HTTP/1.1. By using SPDY in the real world they were able to prove it was + more performant for real users, and not just because of some lab-based + experimental results. After rolling out SPDY to all Google websites, other + servers and browser started implementing it, and then it was time to + standardize this proprietary format into an internet standard and thus + HTTP/2 was born. + </p> + <p>HTTP/2 has the following key concepts:</p> + <ul> + <li>Binary format</li> + <li>Multiplexing</li> + <li>Flow Control</li> + <li>Prioritization</li> + <li>Header compression</li> + <li>Push</li> + </ul> + <p> + <em>Binary format</em>, means that HTTP/2 messages are wrapped into + <em>frames</em> of a pre-defined format. This means HTTP messages are easier + to parse and no longer require scanning for newline characters. This is + better for security as there + <a href="https://www.owasp.org/index.php/HTTP_Response_Splitting" + >were a number of exploits for previous versions of HTTP</a + >. It also means HTTP/2 connections can be <em>multiplexed</em>: different + frames for different <em>streams</em> can be sent on the same connection + without interfering with each other as each Frame includes a Stream + Identifier and its length. Multiplexing allows much more efficient use of a + single TCP connection without the overhead of opening additional + connections. Ideally we would open a single connection per domain (<a + href="https://daniel.haxx.se/blog/2016/08/18/http2-connection-coalescing/" + >or even for multiple domains</a + >!). + </p> + <p> + Having separate streams does introduce some complexities along with some + potential benefits. HTTP/2 needs the concept of <em>flow control</em> to + allow the different streams to send data at different rates, whereas + previously, with only one response in flight at any one time, this was + controlled at a connection level by TCP flow control. + <em>Prioritization</em> similarly allows multiple requests to be sent + together but with the most important requests getting more of the bandwidth. + </p> + <p> + Finally, HTTP/2 introduced two new concepts: + <em>header compression</em> allowed those text-based HTTP headers to be sent + more efficiently (using an HTTP/2-specific + <em><a href="https://tools.ietf.org/html/rfc7541">HPACK</a></em> format for + security reasons) and <em>HTTP/2 push</em> allowed more than one response to + be sent in answer to a request. This allowed the server to "push" resources + before a client was even aware it needed them. Push was supposed to solve + the performance workaround of having to inline resources like CSS and + JavaScript directly into HTML to prevent holding up the page while those + resources were requested. With HTTP/2 the CSS and JavaScript could remain as + external files but be pushed along with the initial HTML, so they were + available immediately. Subsequent page requests would not push these + resources, since they would now be cached, and so would not waste bandwidth. + </p> + <p> + This whistle-stop tour of HTTP/2 gives the main history and concepts of the + newish protocol. As should be apparent from this explanation, the main + benefit of HTTP/2 is to address performance limitations of the HTTP/1.1 + protocol. There were also security improvements as well - perhaps most + importantly in being to address performance issues of using HTTPS since + HTTP/2, even over HTTPS, is + <a href="https://www.httpvshttps.com/">often much faster than plain HTTP</a + >. Other than the web browser packing the HTTP messages into the new binary + format, and the web server unpacking it at the other side, the core basics + of HTTP itself stayed roughly the same. This means web applications do not + need to make any changes to support HTTP/2 as the browser and server take + care of this. Turning it on should be a free performance boost and because + of this adoption should be relatively easy. Of course, there are ways web + developers can optimize for HTTP/2 to take full advantage of how it differs. + </p> + <h2 id="adoption-of-http2">Adoption of HTTP/2</h2> + <p> + As mentioned above, Internet protocols are often difficult to adopt since + they are ingrained into so much of the infrastructure that makes up the + internet. This makes introducing any changes slow and difficult. IPv6 for + example has been around for 20 years but has + <a href="https://www.google.com/intl/en/ipv6/statistics.html" + >struggled to be adopted</a + >. HTTP/2 however, was different as it was effectively hidden in HTTPS (at + least for the browser uses cases) removing barriers to adoption as long as + both the browser and server supported it. Browser support has been very + strong for some time and the advent of auto updating + <em>evergreen</em> browsers has meant that an estimated + <a href="https://caniuse.com/#feat=http2" + >95% of global users support HTTP/2 now</a + >. For this Web Almanac we use HTTP Archive which runs a Chrome web crawler + on the approximately 5 million top websites (on both Desktop and Mobile with + a slightly different set for each). This shows that HTTP/2 usage is now the + majority protocol - an impressive feat just 4 short years after formal + standardization: + </p> + <p> + <img + src="https://github.com/HTTPArchive/almanac.httparchive.org/raw/master/src/static/images/2019/20_HTTP_2/http2usage.png" + alt="" + /> + </p> + <p> + <strong + >Figure 1 - + <a href="https://httparchive.org/reports/state-of-the-web#h2" + >HTTP/2 usage by request</a + ></strong + > + </p> + <p> + Looking at the breakdown of all HTTP versions by request we see the + following: + </p> + <table> + <thead> + <tr> + <th id="protocol">Protocol</th> + <th id="desktop">Desktop</th> + <th id="mobile">Mobile</th> + <th id="both">Both</th> + </tr> + </thead> + <tbody> + <tr> + <td></td> + <td>5.60%</td> + <td>0.57%</td> + <td>2.97%</td> + </tr> + <tr> + <td>HTTP/0.9</td> + <td>0.00%</td> + <td>0.00%</td> + <td>0.00%</td> + </tr> + <tr> + <td>HTTP/1.0</td> + <td>0.08%</td> + <td>0.05%</td> + <td>0.06%</td> + </tr> + <tr> + <td>HTTP/1.1</td> + <td>40.36%</td> + <td>45.01%</td> + <td>42.79%</td> + </tr> + <tr> + <td>HTTP/2</td> + <td>53.96%</td> + <td>54.37%</td> + <td>54.18%</td> + </tr> + </tbody> + </table> + <p><strong>Figure 2 - HTTP version usage by request</strong></p> + <p> + This shows that HTTP/1.1 and HTTP/2 are the versions used by the vast + majority of requests as expected. There are only a very small number of + requests on the older HTTP/1.0 and HTTP/0.9 protocols. Annoyingly there is a + larger percentage where the protocol was not correctly tracked by the HTTP + Archive crawl, particularly on desktop. Digging into this has shown various + reasons, some of which I can explain and some of which I can't. Based on + spot checks they mostly appear to be HTTP/1.1 requests and, assuming they + are, desktop and mobile usage is similar. Despite there being a little + larger percentage of noise than I'd like, it doesn't alter the overall + message being conveyed here. Other than that, the mobile/desktop similarity + is not unexpected - the HTTP Archive crawls using Chrome which supports + HTTP/2 for both desktop and mobile. Real world usage may have slightly + different stats with some older usage of browsers on both but even then + support is widespread so I would not expect a large variation between + desktop and mobile. + </p> + <p> + At present the HTTP Archive does not track HTTP over QUIC (soon to be + standardized as HTTP/3) separately, so these are listed under HTTP/2 but + we'll look at other ways of measuring that later in this chapter. + </p> + <p> + Looking at the number of requests will skew the results somewhat due to + popular requests. For example, many sites load Google Analytics, which does + support HTTP/2, and so would show as an HTTP/2 request even if the embedding + site itself does not support HTTP/2. On the other hand, popular websites + (that tend to support HTTP/2) are also underrepresented in the above stats + as they are only measured once (e.g. google.com and obscuresite.com are + given equal weighting). There are lies, damn lies and statistics. However, + looking at other sources (for example the + <a + href="https://telemetry.mozilla.org/new-pipeline/dist.html#!cumulative=0&measure=HTTP_RESPONSE_VERSION" + >Mozilla telemetry</a + > + which looks at real-world usage through the Firefox browser) shows similar + statistics. + </p> + <p> + It is still interesting to look at home pages only to get a rough figure on + the number of sites that support HTTP/2 (at least on their home page). + Figure 3 shows less support than overall requests, as expected, at around + 36%: + </p> + <table> + <thead> + <tr> + <th id="protocol">Protocol</th> + <th id="desktop">Desktop</th> + <th id="mobile">Mobile</th> + <th id="both">Both</th> + </tr> + </thead> + <tbody> + <tr> + <td></td> + <td>0.09%</td> + <td>0.08%</td> + <td>0.08%</td> + </tr> + <tr> + <td>HTTP/1.0</td> + <td>0.09%</td> + <td>0.08%</td> + <td>0.09%</td> + </tr> + <tr> + <td>HTTP/1.1</td> + <td>62.36%</td> + <td>63.92%</td> + <td>63.22%</td> + </tr> + <tr> + <td>HTTP/2</td> + <td>37.46%</td> + <td>35.92%</td> + <td>36.61%</td> + </tr> + </tbody> + </table> + <p><strong>Figure 3 - HTTP version usage for home pages</strong></p> + <p> + HTTP/2 is only supported by browsers over HTTPS, even though officially + HTTP/2 can be used over HTTPS or over unencrypted non-HTTPS connections. As + mentioned previously, hiding the new protocol in encrypted HTTPS connections + prevents networking appliances which do not understand this new protocol + from interfering with (or rejecting!) its usage. Additionally, the HTTPS + handshake allows an easy method of the client and server agreeing to use + HTTP/2. The web is moving to HTTPS and HTTP/2 turns the traditional argument + of HTTPS being bad for performance almost completely on its head. Not every + site has made the transition to HTTPS, so HTTP/2 will not even be available + to those that have not. Looking at just those sites that use HTTPS, we do + see a higher percentage support HTTP/2 at around 55% - similar to the first + <em>all requests</em> statistic we started with: + </p> + <table> + <thead> + <tr> + <th id="protocol">Protocol</th> + <th id="desktop">Desktop</th> + <th id="mobile">Mobile</th> + <th id="both">Both</th> + </tr> + </thead> + <tbody> + <tr> + <td></td> + <td>0.09%</td> + <td>0.10%</td> + <td>0.09%</td> + </tr> + <tr> + <td>HTTP/1.0</td> + <td>0.06%</td> + <td>0.06%</td> + <td>0.06%</td> + </tr> + <tr> + <td>HTTP/1.1</td> + <td>45.81%</td> + <td>44.31%</td> + <td>45.01%</td> + </tr> + <tr> + <td>HTTP/2</td> + <td>54.04%</td> + <td>55.53%</td> + <td>54.83%</td> + </tr> + </tbody> + </table> + <p><strong>Figure 4 - HTTP version usage for HTTPS home pages</strong></p> + <p> + We have shown that browser support is strong, and there is a safe road to + adoption, so why does every site (or at least every HTTPS site) not support + HTTP/2? Well here we come to the final item for support we have not measured + yet: server support. This is more problematic than browser support as, + unlike modern browsers, servers often do not automatically upgrade to the + latest version. Even when the server is regularly maintained and patched + that will often just apply security patches rather than new features like + HTTP/2. Let us look first at the server HTTP header for those sites that do + support HTTP/2: + </p> + <table> + <thead> + <tr> + <th id="server">Server</th> + <th id="desktop">Desktop</th> + <th id="mobile">Mobile</th> + <th id="both">Both</th> + </tr> + </thead> + <tbody> + <tr> + <td>nginx</td> + <td>34.04%</td> + <td>32.48%</td> + <td>33.19%</td> + </tr> + <tr> + <td>cloudflare</td> + <td>23.76%</td> + <td>22.29%</td> + <td>22.97%</td> + </tr> + <tr> + <td>Apache</td> + <td>17.31%</td> + <td>19.11%</td> + <td>18.28%</td> + </tr> + <tr> + <td></td> + <td>4.56%</td> + <td>5.13%</td> + <td>4.87%</td> + </tr> + <tr> + <td>LiteSpeed</td> + <td>4.11%</td> + <td>4.97%</td> + <td>4.57%</td> + </tr> + <tr> + <td>GSE</td> + <td>2.16%</td> + <td>3.73%</td> + <td>3.01%</td> + </tr> + <tr> + <td>Microsoft-IIS</td> + <td>3.09%</td> + <td>2.66%</td> + <td>2.86%</td> + </tr> + <tr> + <td>openresty</td> + <td>2.15%</td> + <td>2.01%</td> + <td>2.07%</td> + </tr> + <tr> + <td>…</td> + <td>…</td> + <td>…</td> + <td>…</td> + </tr> + </tbody> + </table> + <p><strong>Figure 5 - Servers used for HTTP/2</strong></p> + <p> + Nginx provides package repos that allow ease of installing or upgrading to + the latest version, so it is no surprise to see it leading the way here. + Cloudflare is the <a href="#">most popular CDNs</a> and enables HTTP/2 by + default so again it is also not surprising to see this as a large percentage + of HTTP/2 sites. Incidently, Cloudflare uses + <a + href="https://blog.cloudflare.com/nginx-structural-enhancements-for-http-2-performance/" + >a heavily customised version of nginx as their web server</a + >. After this we see Apache at around 20% of usage, followed by some servers + who choose to hide what they are and then the smaller players (LiteSpeed, + IIS, Google Servlet Engine and openresty - which is nginx based). + </p> + <p> + What is more interesting is those sites that that do <em>not</em> support + HTTP/2: + </p> + <table> + <thead> + <tr> + <th id="server">Server</th> + <th id="desktop">Desktop</th> + <th id="mobile">Mobile</th> + <th id="both">Both</th> + </tr> + </thead> + <tbody> + <tr> + <td>Apache</td> + <td>46.76%</td> + <td>46.84%</td> + <td>46.80%</td> + </tr> + <tr> + <td>nginx</td> + <td>21.12%</td> + <td>21.33%</td> + <td>21.24%</td> + </tr> + <tr> + <td>Microsoft-IIS</td> + <td>11.30%</td> + <td>9.60%</td> + <td>10.36%</td> + </tr> + <tr> + <td></td> + <td>7.96%</td> + <td>7.59%</td> + <td>7.75%</td> + </tr> + <tr> + <td>GSE</td> + <td>1.90%</td> + <td>3.84%</td> + <td>2.98%</td> + </tr> + <tr> + <td>cloudflare</td> + <td>2.44%</td> + <td>2.48%</td> + <td>2.46%</td> + </tr> + <tr> + <td>LiteSpeed</td> + <td>1.02%</td> + <td>1.63%</td> + <td>1.36%</td> + </tr> + <tr> + <td>openresty</td> + <td>1.22%</td> + <td>1.36%</td> + <td>1.30%</td> + </tr> + <tr> + <td>…</td> + <td>…</td> + <td>…</td> + <td>…</td> + </tr> + </tbody> + </table> + <p><strong>Figure 6 - Servers used for HTTP/1.1 or lower</strong></p> + <p> + Some of this will be non-HTTPS traffic that would use HTTP/1.1 even if the + server supported HTTP/2, but a bigger issue is those that do not support + HTTP/2. In these stats we see a much greater share for Apache and IIS which + are likely running older versions. For Apache in particular it is often not + easy to add HTTP/2 support to an existing installation as Apache does not + provide an official repository to install this from. This often means + resorting to compiling from source or trusting a third-party repo - neither + of which is particularly appealing to many administrators. Only the latest + versions of Linux distributions (RHEL and CentOS 8, Ubuntu 18 and Debian 9) + come with a version of Apache which supports HTTP/2 and many servers are not + running those yet. On the Microsoft side only Windows Server 2016 and above + supports HTTP/2 so again those running older versions cannot support this in + IIS. Merging these two stats together we can see the percentage of installs, + of each server, that uses HTTP/2: + </p> + <table> + <thead> + <tr> + <th id="server">Server</th> + <th id="desktop">Desktop</th> + <th id="mobile">Mobile</th> + </tr> + </thead> + <tbody> + <tr> + <td>cloudflare</td> + <td>85.40%</td> + <td>83.46%</td> + </tr> + <tr> + <td>LiteSpeed</td> + <td>70.80%</td> + <td>63.08%</td> + </tr> + <tr> + <td>openresty</td> + <td>51.41%</td> + <td>45.24%</td> + </tr> + <tr> + <td>nginx</td> + <td>49.23%</td> + <td>46.19%</td> + </tr> + <tr> + <td>GSE</td> + <td>40.54%</td> + <td>35.25%</td> + </tr> + <tr> + <td></td> + <td>25.57%</td> + <td>27.49%</td> + </tr> + <tr> + <td>Apache</td> + <td>18.09%</td> + <td>18.56%</td> + </tr> + <tr> + <td>Microsoft-IIS</td> + <td>14.10%</td> + <td>13.47%</td> + </tr> + <tr> + <td>…</td> + <td>…</td> + <td>…</td> + </tr> + </tbody> + </table> + <p> + <strong + >Figure 7 - percentage installs of each server used to provide + HTTP/2</strong + > + </p> + <p> + It's clear Apache and IIS fall way behind with 18% and 14% of their + installed based supporting HTTP/2, and this has to be at least in part, a + consequence of it being more difficult to upgrade them. A full operating + system upgrade is often required for many to get this support easily. + Hopefully this will get easier as new versions of operating systems become + the norm. None of this is a comment on the HTTP/2 implementations here (<a + href="https://twitter.com/tunetheweb/status/988196156697169920?s=20" + >I happen to think Apache has one of the best implementations</a + >), but more in the ease of enabling HTTP/2 in each of these servers - or + lack thereof. + </p> + <h2 id="impact-of-http2">Impact of HTTP/2</h2> + <p> + The impact of HTTP/2 is a much more difficult to measure statistic, + especially using the HTTP Archive methodology. Ideally sites should be + crawled with both HTTP/1.1 and HTTP/2 and the difference measured but that + is not possible with the statistics we are investigating here. Additionally, + measuring whether the average HTTP/2 site is faster than the average + HTTP/1.1 site introduces too many other variables that I feel requires a + more exhaustive study than we can cover here. + </p> + <p> + One impact that can be measured is in the changing use of HTTP now we are in + an HTTP/2 world. Multiple connections were a work around with HTTP/1.1 to + allow a limited form of parallelization, but this is in fact the opposite of + what usually works best with HTTP/2. A single connection reduces the + overhead of TCP setup, TCP slow start, HTTPS negotiation and also allows the + potential of cross-request prioritization. The HTTP Archive measures the + number of TCP connections per page and that is dropping steadily as more + sites support HTTP/2 and use its single connection instead of 6 separate + connections: + </p> + <p> + <img + src="https://github.com/HTTPArchive/almanac.httparchive.org/raw/master/src/static/images/2019/20_HTTP_2/TCPconnections.png" + alt="" + /> + </p> + <p> + <strong + >Figure 8 - + <a href="https://httparchive.org/reports/state-of-the-web#tcp" + >TCP connections per page</a + ></strong + > + </p> + <p> + Bundling assets to obtain fewer requests was another HTTP/1.1 workaround + that went by many names: bundling, concatenation, packaging, spriting, … + etc. It is less necessary when using HTTP/2 as there is less overhead with + requests but it should be noted that requests are not free in HTTP/2 and + <a href="https://engineering.khanacademy.org/posts/js-packaging-http2.htm" + >those that experimented with removing bundling completely have noticed a + loss in performance</a + >. Looking at the number of requests loaded by page over time, we do see a + slight decrease in requests, rather than the expected increase: + </p> + <p> + <img + src="https://github.com/HTTPArchive/almanac.httparchive.org/raw/master/src/static/images/2019/20_HTTP_2/numresources.png" + alt="" + /> + </p> + <p> + <strong + >Figure 9 - + <a href="https://httparchive.org/reports/state-of-the-web#reqTotal" + >Total Requests per page</a + ></strong + > + </p> + <p> + This low rate of change can perhaps be attributed to the aforementioned + observations that bundling cannot be removed (at least completely) without a + negative performance impact and that many build tools currently bundle for + historical reasons based on HTTP/1.1 recommendations. It is also likely that + many sites may not be willing to penalize HTTP/1.1 users by undoing their + HTTP/1.1 performance hacks just yet, or at least that they do not have the + confidence (or time!) to feel this is worthwhile. That the number of + requests is staying roughly static, and against the background of an ever + increasing <a href="#">page weight</a> is interesting though perhaps not + really related to HTTP/2. + </p> + <h2 id="http2-push">HTTP/2 Push</h2> + <p> + HTTP/2 push has a mixed history despite being a much-hyped new feature of + HTTP/2. The other features were basically under the hood performance + improvements, but push was a brand-new concept that completely broke the + single request to single response nature of HTTP up until then. It allowed + extra responses to be returned: when you asked for the web page, the server + could respond with the HTML page as usual, but then also send you the + critical CSS and JavaScript, thus avoiding any additional round trips for + certain resources. It would in theory allow us to stop inlining CSS and + JavaScript into our HTML and yet still get the same performance gains of + doing it. After solving that, it could potentially lead to all sorts of new + and interesting use cases. + </p> + <p> + The reality has been… well, a bit disappointing. HTTP/2 push has proved much + harder than originally envisaged to use effectively. Some of this has been + due to + <a href="https://jakearchibald.com/2017/h2-push-tougher-than-i-thought/" + >the complexity of how HTTP/2 push works</a + >, and the implementation issues due to that. A bigger concern is that push + can quite easily cause, rather than solve, performance issues. Over-pushing + is a real risk. Often the browser is in the best place to decide + <em>what</em> to request, and just as crucially <em>when</em> to request it + but HTTP/2 push puts that responsibility on the server. Pushing resources + that a browser already has in its cache, is a waste of bandwidth (though in + my opinion so is inlining CSS but that gets must less of a hard time about + that than HTTP/2 push!). + <a + href="https://lists.w3.org/Archives/Public/ietf-http-wg/2019JanMar/0033.html" + >Proposals to inform the server about the status of the browser cache have + stalled</a + > + especially on privacy concerns. Even without that problem, there are other + potential issues if push is not used correctly. For example, pushing large + images and therefore holding up the sending of critical CSS and JavaScript + will lead to slower websites than if you'd not pushed at all! + </p> + <p> + There has also been very little evidence to date that push, even when + implemented correctly, results in the performance increase it promised. This + is an area that again the HTTP Archive is not best placed to answer, due to + the nature of how it runs (a month crawl of popular sites using Chrome in + one state) so we won't delve into it too much here, but suffice to say that + the performance gains are far from clear cut and the potential problems are + real. + </p> + <p>Putting that aside let's look at the usage of HTTP/2 push:</p> + <table> + <thead> + <tr> + <th id="client">Client</th> + <th id="sites_using_http/2_push">Sites Using HTTP/2 Push</th> + <th id="sites_using_http/2_push_(%)">Sites Using HTTP/2 Push (%)</th> + </tr> + </thead> + <tbody> + <tr> + <td>Desktop</td> + <td>22,581</td> + <td>0.52%</td> + </tr> + <tr> + <td>Mobile</td> + <td>31,452</td> + <td>0.59%</td> + </tr> + </tbody> + </table> + <p><strong>Figure 10 - Sites using HTTP/2 push</strong></p> + <p> + These status show that the uptick of HTTP/2 push is very low - most likely + because of the issues described previously. However, when sites do use push, + then tend to use it a lot rather than for one or two assets as shown in + Figure 11: + </p> + <table> + <thead> + <tr> + <th id="client">Client</th> + <th id="avg_pushed_requests">Avg Pushed Requests</th> + <th id="avg_kb_pushed">Avg KB Pushed</th> + </tr> + </thead> + <tbody> + <tr> + <td>Desktop</td> + <td>7.86</td> + <td>162.38</td> + </tr> + <tr> + <td>Mobile</td> + <td>6.35</td> + <td>122.78</td> + </tr> + </tbody> + </table> + <p><strong>Figure 11 - How much is pushed when it is used</strong></p> + <p> + This is a concern as previous advice has been to be conservative with push + and to + <a + href="https://docs.google.com/document/d/1K0NykTXBbbbTlv60t5MyJvXjqKGsCVNYHyLEXIxYMv0/edit" + >"push just enough resources to fill idle network time, and no more"</a + >. The above statistics suggest many resources, of a significant combined + size are pushed. Looking at what is pushed we see the data in Figure 12: + </p> + <p> + <img + src="https://github.com/HTTPArchive/almanac.httparchive.org/raw/master/src/static/images/2019/20_HTTP_2/whatpushisusedfor.png" + alt="" + /> + </p> + <p><strong>Figure 12 - What asset types is push used for?</strong></p> + <p> + JavaScript and then CSS are the overwhelming majority of pushed items, both + by volume and by bytes. After this there is a rag tag assortment of images, + fonts, data, …etc. At the tail end we see around 100 sites pushing video - + which may be intentional or may be a sign of over-pushing the wrong types of + assets! + </p> + <p> + One concern raised by some, is that HTTP/2 implementations have repurposed + the preload HTTP link header as a signal to push. One of the most popular + uses of the preload <a href="#">resource hint</a> is to inform the browser + of late-discovered resources like fonts and images, that the browser will + not see until the CSS for example has been requested, downloaded and parsed. + If these are now pushed based on that header, there was a concern that + reusing this may result in a lot of unintended pushes. However, the relative + low usage of fonts and images may mean that risk is not being seen as much + as was feared. <code><link rel="preload" ... ></code> tags are often + used in the HTML rather than HTTP link headers and the meta tags are not a + signal to push. Statistics in the <a href="#">resource hint</a> chapter show + that less than 1% of sites use the preload HTTP link header, and about the + same amount use preconnection which has no meaning in HTTP/2, so this would + suggest this is not so much of an issue. Though there are a number of fonts + and other assets being pushed, which may be a signal of this. As a counter + argument to those complaints, if an asset is important enough to preload, + then it could be argued these assets should be pushed if possible as + browsers treat a preload hints as very high priority requests anyway. Any + performance concern is therefore (again arguably) at the overuse of preload, + rather than the resulting HTTP/2 push that happens because of this. + </p> + <p> + To get around this unintended push, you can provide the + <code>nopush</code> attribute in your preload header: + </p> + <pre><code>link: </assets/jquery.js>; rel=preload; as=script; nopush</code></pre> + <p> + It looks like 5% of preload HTTP headers do make use of this attribute, + which is higher than I would have expected as I would have considered this a + niche optimization. Then again, so is the use of preload HTTP headers and/or + HTTP/2 push itself! + </p> + <h2 id="issues">Issues</h2> + <p> + HTTP/2 is mostly a seamless upgrade that, once your server supports it, you + can switch on with no need to change your website or application. Of course, + you can optimize for HTTP/2 or stop using HTTP/1.1 workarounds as much, but + in general a site will usually work without needing any changes - but just + be faster. There are a couple of gotchas to be aware of however that can + impact any upgrade and some sites have found these out the hard way. + </p> + <p> + One cause of issues in HTTP/2 is the poor support of HTTP/2 prioritization. + This feature allows multiple requests in progress to make the appropriate + use of the connection. This is especially important since HTTP/2 has + massively increased the number of requests that can be running on the same + connection. 100 or 128 parallel requests limits are common in server + implementations. Previously the browser had a max of 6 connections per + domain and so used its skill and judgement to decide how best to use those + connections. Now it rarely needs to queue and can send all requests as soon + as it knows about them. This then can lead to the bandwidth being "wasted" + on lower priority requests while critical requests are delayed (and + incidentally + <a + href="https://www.lucidchart.com/techblog/2019/04/10/why-turning-on-http2-was-a-mistake/" + >can also lead to swamping your backend server with more requests than it + is used to!</a + >). HTTP/2 has a complex prioritization model (too complex many say - hence + why it is being reconsidered for HTTP/3!) but few servers honor that + properly. This can be because their HTTP/2 implementations are not up to + scratch or because of so called <em>bufferbloat</em> where the responses are + already en route before the server realizes there is a higher priority + request. Due to the varying nature of servers, TCP stacks and locations it + is difficult to measure this for most sites, but with CDNs this should be + more consistent. + <a href="https://twitter.com/patmeenan">Patrick Meenan</a> created + <a href="https://github.com/pmeenan/http2priorities/tree/master/stand-alone" + >an example test page</a + > + which deliberately tries to download a load of low-priority, off-screen, + images, before requesting some high priority on-screen images. A good HTTP/2 + server should be able to recognize this and send the high priority images + shortly after requested, at the expense of the lower priority images. A poor + HTTP/2 server will just respond in the request order and ignore any priority + signals. <a href="https://twitter.com/AndyDavies">Andy Davies</a> has + <a href="https://github.com/andydavies/http2-prioritization-issues" + >a page tracking status of various CDNs for Patrick's test</a + >. The HTTP Archive identifies when a CDN is used as part of its crawl and + merging these two datasets that gives us the results shown in Figure 13: + </p> + <table> + <thead> + <tr> + <th id="cdn">CDN</th> + <th id="prioritizes_correctly?">Prioritizes Correctly?</th> + <th id="desktop">Desktop</th> + <th id="mobile">Mobile</th> + <th id="both">Both</th> + </tr> + </thead> + <tbody> + <tr> + <td>Not using CDN</td> + <td>Unknown</td> + <td>57.81%</td> + <td>60.41%</td> + <td>59.21%</td> + </tr> + <tr> + <td>Cloudflare</td> + <td>Pass</td> + <td>23.15%</td> + <td>21.77%</td> + <td>22.40%</td> + </tr> + <tr> + <td>Google</td> + <td>Fail</td> + <td>6.67%</td> + <td>7.11%</td> + <td>6.90%</td> + </tr> + <tr> + <td>Amazon CloudFront</td> + <td>Fail</td> + <td>2.83%</td> + <td>2.38%</td> + <td>2.59%</td> + </tr> + <tr> + <td>Fastly</td> + <td>Pass</td> + <td>2.40%</td> + <td>1.77%</td> + <td>2.06%</td> + </tr> + <tr> + <td>Akamai</td> + <td>Pass</td> + <td>1.79%</td> + <td>1.50%</td> + <td>1.64%</td> + </tr> + <tr> + <td></td> + <td>Unknown</td> + <td>1.32%</td> + <td>1.58%</td> + <td>1.46%</td> + </tr> + <tr> + <td>WordPress</td> + <td>Pass</td> + <td>1.12%</td> + <td>0.99%</td> + <td>1.05%</td> + </tr> + <tr> + <td>Sucuri Firewall</td> + <td>Fail</td> + <td>0.88%</td> + <td>0.75%</td> + <td>0.81%</td> + </tr> + <tr> + <td>Incapsula</td> + <td>Fail</td> + <td>0.39%</td> + <td>0.34%</td> + <td>0.36%</td> + </tr> + <tr> + <td>Netlify</td> + <td>Fail</td> + <td>0.23%</td> + <td>0.15%</td> + <td>0.19%</td> + </tr> + <tr> + <td>OVH CDN</td> + <td>Unknown</td> + <td>0.19%</td> + <td>0.18%</td> + <td>0.18%</td> + </tr> + </tbody> + </table> + <p> + <strong>Figure 13 - HTTP/2 prioritization support in common CDNs</strong> + </p> + <p> + This shows that a not insignificant portion of traffic is subject to the + identified issue. How much of a problem this is, depends on exactly how your + page loads and whether high priority resources are discovered late or not + for your site, but it does show another complexity to take into + considerations. + </p> + <p> + Another issue is with the <code>upgrade</code> HTTP header being used + incorrectly. Web servers can respond to requests with an + <code>upgrade</code> HTTP header suggesting that it supports a better + protocol that the client might wish to use (e.g. advertise HTTP/2 to a + client only using HTTP/1.1). You might think this would be useful as a way + of informing the browser it supports HTTP/2 but since browsers only support + HTTP/2 over HTTPS and since use of HTTP/2 can be negotiated through the + HTTPS handshake, the use of this <code>upgrade</code> header for advertising + HTTP/2 is pretty limited (to browsers at least). Worse than that, is when a + server sends an upgrade header in error. This could be because an HTTP/2 + supporting backend server is sending the header and then an HTTP/1.1-only + edge server is blindly forwarding to the client. Apache emits the + <code>upgrade</code> header when mod_http2 is enabled but HTTP/2 is not + being used, and a nginx instance sitting in front of such an Apache happily + forwards this header even when nginx does not support HTTP/2. This false + advertising then leads to clients trying (and failing!) to use HTTP/2 as + they are advised to. 108 site use HTTP/2 and yet suggest upgrading to HTTP/2 + in this <code>upgrade</code> header. A further 12,767 sites on desktop + (15,235 on mobile) suggest upgrading an HTTP/1.1 connection delivered over + HTTPS to HTTP/2 when it's clear this was not available, or it would have + been used already. These are a small minority of the 4.3 million sites + crawled on desktop and 5.3 million sites crawled on mobile for these stats + but it shows that this was still an issue affecting a number of sites out + there. Browsers handle this inconsistently with Safari in particular + attempting to upgrade and then getting itself in a mess and refusing to + display the site at all. All this is before we get into sites recommending + upgrading to <code>http1.0</code>, <code>http://1.1</code> or even + <code>-all,+TLSv1.3,+TLSv1.2</code> (clearly some typos in web server + configurations going on here!). + </p> + <p> + There are further implementation issues we could look at. For example, + HTTP/2 is much stricter about HTTP header names, rejecting the whole request + if you respond with spaces, colons or other invalid HTTP header names. The + header names are also converted to lowercase which catches some by surprise + if their application assumes a certain capitalization (which was never + guaranteed previously as + <a href="https://tools.ietf.org/html/rfc7230#section-3.2" + >HTTP/1.1 specifically states the header names are case insensitive</a + >, but still some have depended on this). The HTTP Archive could potentially + be used to identify these issues as well, though some of them will not be + apparent on the home page so we did not delve into that this year. + </p> + <h2 id="http3">HTTP/3</h2> + <p> + The world does not stand still and despite HTTP/2 not having even reached + its official 5th birthday, people are already seeing it as old news and + getting more excited about its successor: HTTP/3. HTTP/3 builds on the + concepts of HTTP/2 but moves it from working over TCP connections that HTTP + has always used to a UDP-based protocol called QUIC. This allows us to fix + one edge case where HTTP/2 is slower then HTTP/1.1, when there is high + packet loss and the guaranteed nature of TCP holds up all streams and + throttles back all streams. It also allows us to address some TCP and HTTPS + inefficiencies such as consolidating on one handshake for both, and + supporting many ideas for TCP that have proven hard to implement in real + life (TCP fast open, 0-RTT, …etc.). HTTP/3 also cleans up some overlap + between TCP and HTTP/2 (e.g. flow control being implemented in both layers) + but conceptually it is very similar to HTTP/2. Web developers who understand + and have optimized for HTTP/2 should have to make no further changes for + HTTP/3. Server operators will have more work to do however as the + differences between TCP and QUIC are much more groundbreaking. They will + make implementation harder so the roll out of HTTP/3 may take considerably + longer than HTTP/2 and initially be limited to those with certain expertise + in the field (e.g. CDNs). + </p> + <p> + QUIC has been implemented by Google for a number of years and it is now + undergoing a similar standardization process that SDPY did on its way to + HTTP/2. At the end of 2018 it was decided to name the HTTP part of QUIC as + HTTP/3 (in Google/s version of QUIC is was simply known as HTTP/2 even + though it was not exactly the same as regular HTTP/2). QUIC has ambitions + beyond just HTTP but for the moment it is the use case being worked on. Just + as this chapter was being written, + <a href="https://blog.cloudflare.com/http3-the-past-present-and-future/" + >Cloudflare, Chrome and Firefox all announced HTTP/3 support</a + > + despite the fact that HTTP/3 is still not formally complete or approved as a + standard yet. This is welcome as QUIC support has been somewhat lacking + outside of Google until recently and definitely lags SPDY and HTTP/2 support + from a similar stage of standardization. + </p> + <p> + Because HTTP/3 uses QUIC over UDP rather than TCP it makes the discovery of + HTTP/3 support a bigger challenge than HTTP/2 discovery. With HTTP/2 we can + mostly use the HTTPS handshake, but as HTTP/3 is on a completely different + connection that is not an option here. HTTP/2 did also use the + <code>upgrade</code> HTTP header to inform of HTTP/2 support, and although + that was not that useful for HTTP/2, a similar mechanism has been put in + place for QUIC that is more useful. The <em>alternative services</em> HTTP + header (<code>alt-svc</code>) advertises alternative protocols that can be + used on completely different connections (as opposed to alternative + protocols that can be used on this connection - which is what the + <code>upgrade</code> HTTP header is used for). Analysis of this header shows + that 7.67% of desktop sites and 8.38% of mobile sites already support QUIC + (which roughly represents Google percentage of traffic unsurprisingly enough + as it has been using this for a while), and 0.04% are already supporting + <code>h3</code> (meaning HTTP/3) in this field. I would imagine by next + year’s Almanac this number will have increased significantly. + </p> + <h2 id="conclusion">Conclusion</h2> + <p> + This analysis of the available statistics in HTTP Archive has shown what + many of us in the HTTP community were already aware of: HTTP/2 is here and + proving very popular. It is already the dominant protocol in terms of number + of request but has not quite overtaken HTTP/1.1 in terms of number of sites + supported. The long tail of the internet means that it often takes an + exponentially longer time to make noticeable gains on the less + well-maintained sites than on the high profile, high volume sites. + </p> + <p> + We've also talked about how it is (still!) not easy to get HTTP/2 support in + some installations. Server developers, operating system distributors and end + customers all have a part to play in pushing to make that easier. Tying + software to operating systems always lengthens deployment time - and in fact + one of the very reasons for QUIC is to break a similar barrier with + deploying TCP changes. In many instances there is no real reason to tie web + server versions to operating systems. Apache (to use one of the more popular + examples) will run with HTTP/2 support in older operating systems but + getting an up to date version on to the server should not require the + expertise or risk it currently does. Nginx does very well here hosting + repositories for the common Linux flavors to make installation easier and if + the Apache team (or the Linux distribution vendors) do not offer something + similar, then I can only see Apache's usage continuing to shrink as it + struggles to hold relevance and shake its reputation as old and slow - based + on older installs - even though up to date versions have one of the best + HTTP/2 implementations. I see that as less of an issue for IIS since it is + usually the preferred web server on the Windows side. + </p> + <p> + Other than that, HTTP/2 has been a relatively easy upgrade path - which is + why it has the strong uptake it has already seen. For the most part, it is a + painless switch on and therefore, for most, it has turned out to be a + hassle-free performance increase that requires little thought once your + server supports it. The devil is in the details though (as always), and + small differences between server implementations can result in better or + worse HTTP/2 usage and ultimately end user experience. There have also been + a number of bugs and even + <a + href="https://github.com/Netflix/security-bulletins/blob/master/advisories/third-party/2019-002.md" + >security issues</a + >, as is to be expected with any new protocol. Ensuring you are using a + strong, up to date, well maintained implementation of any newish protocol + like HTTP/2 will ensure you stay on top of these issues. However, that can + take expertise and managing. The roll out of QUIC and HTTP/3 will likely be + even more complicated and require more expertise. Perhaps this is best left + to third party service providers like CDNs who have this expertise and can + give your site easy access to these features? However, even when left to the + experts, this is not a sure thing (as the prioritization statistics show), + but if you choose your server provider wisely and engage with them on what + your priorities are, then it should be an easier implementation. And on that + note it would be great if the CDNs prioritized the issue highlighted above + (pun definitely intended!), though I suspect with the advent of a new + prioritization method in HTTP/3, many will hold tight. The next year will + prove yet more interesting times in the HTTP world. + </p> </section> {% endblock %} diff --git a/src/templates/en/2019/chapters/markup.html b/src/templates/en/2019/chapters/markup.html index 1ac66c699a4..faca30ddccc 100644 --- a/src/templates/en/2019/chapters/markup.html +++ b/src/templates/en/2019/chapters/markup.html @@ -1,246 +1,827 @@ -{# IMPORTANT! +<!--{# IMPORTANT! -- `chapter.html` is a "template for templates" used by the `generate_chapters.py` script, hence the strange template syntax (eg, double braces `{% ... %}`) +- `chapter.html` is a "template for templates" used by the `generate_chapters.js` script, hence the strange template syntax (eg, mixing ejs and jinja syntax) - if you want to modify `chapter.html`, you must also: - translate the corresponding language-specific templates (eg `src/templates/<lang>/<year>/chapter.html`) - run the generation script to update each chapter template - if you want to modify the chapter templates (eg `src/templates/<lang>/<year>/chapters/<chapter>.html`): - make changes to the markdown content directly (`src/content/<lang>/<year>/<chapter>.md`) because any changes to the chapter templates will be overwritten by the generation script -#} +#}--> -{% extends "en/2019/base_chapter.html" %} +{% extends "en/2019/base_chapter.html" %} {% block styles %} {{ super() }} +<link rel="stylesheet" href="/static/css/chapter.css" /> +{% endblock %} {% set metadata = +{"part_number":"I","chapter_number":3,"title":"Markup","authors":["bkardell"],"reviewers":["zcorpan"," +tomhodgins"," matthewp"]} %} {% block main %} +<aside> + <ul> + <li> + <a href="#methodology">Methodology</a> + </li> -{% block styles %} -{{ super() }} -<link rel="stylesheet" href="/static/css/chapter.css"> -{% endblock %} + <li> + <a href="#top-elements-and-general-info">Top elements and general info</a> + + <ul> + <li> + <a href="#elements-per-page">Elements per page</a> + </li> + </ul> + </li> + + <li> + <a href="#custom-elements">Custom elements?</a> + </li> + + <li> + <a href="#perspective-on-value-and-usage" + >Perspective on Value and Usage</a + > + </li> + + <li> + <a href="#lots-of-data-real-dom-on-the-real-web" + >Lots of data: Real DOM on the Real Web</a + > -{% set metadata = {'part_number': 'I', 'chapter_number': 3, 'title': 'Markup', 'authors': ['bkardell'], 'reviewers': ['zcorpan', 'tomhodgins', 'matthewp']} %} + <ul> + <li> + <a href="#products-and-libraries-and-their-custom-markup" + >Products (and libraries) and their custom markup</a + > + </li> + + <li> + <a href="#common-use-cases-and-solutions" + >Common use cases and solutions</a + > + </li> + + <li> + <a href="#in-summary">In Summary</a> + </li> + </ul> + </li> + </ul> +</aside> -{% block main %} <section class="main"> <h1 class="chapter-title">{{ metadata.get('title') }}</h1> - <img src="/static/images/2019/{{ get_chapter_image_dir(metadata) }}/hero_lg.jpg" class="chapter-hero"> - <p>In 2005, Ian "Hixie" Hickson posted <a href="https://web.archive.org/web/20060203035414/http://code.google.com/webstats/index.html">some analysis of markup data</a> building upon various previous work. Much of this work aimed to investigate class names to see if there were common informal semantics that were being adopted by developers which it might make sense to standardize upon. Some of this research helped inform new elements in HTML5.</p> -<p>14 years later, it's time to take a fresh look. Since then, we've also had the introduction of Custom Elements and the <a href="https://extensiblewebmanifesto.org/">Extensible Web Manifesto</a> encouraging that we find better ways to pave the cowpaths by allowing developers to explore the space of elements themselves and allow standards bodies to<a href="https://bkardell.com/blog/Dropping-The-F-Bomb-On-Standards.html"> act more like dictionary editors</a>. Unlike CSS class names which might be used for anything, we can be far more certain that authors who used a non-standard <em>element</em> really intended this to be an element.</p> -<p>As of July 2019, the HTTP Archive has begun collecting all used <em>element</em> names in the DOM for about 4.4 million desktop home pages, and about 5.3 million mobile home pages which we can now begin to research and dissect.</p> -<p>This crawl encountered <em>over 5000 distinct non-standard element names</em> in these pages, so we capped the total distinct number of elements that we count to the 'top' (explained below) 5048.</p> -<h2>Methodology</h2> -<p>Names of elements on each page were collected from the DOM itself, post initial run of JavaScript.</p> -<p>Looking at a raw frequency count isn't especially helpful, even for standard elements: About 25% of all elements encountered are <code><div></code>. About 17% are <code><a></code>, about 10.6% are <code><span></code> -- and those are the only elements that account for more than 10% of occurrences. Languages are <a href="https://www.youtube.com/watch?v=fCn8zs912OE">generally like this</a>, a small number of terms are astoundingly used by comparison. Further, when we start looking at non-standard elements for uptake, this would be very misleading as one site could use a certain element a thousand times and thus make it look artificially very popular.</p> -<p>Instead, as in Hixie's original study, what we will look at is how many sites include each element at least once in their homepage (Note: This is, itself, not without some potential biases. Popular products can be used by several sites, which introduce non-standard markup, even 'invisibly' to individual authors. Thus, care must be taken to acknowledge that usage doesn't necessarily imply direct author knowledge and conscious adoption as much as it does the servicing of a common need, in a common way. During our research, we found several examples of this, some we will call out.)</p> -<h2>Top elements and general info</h2> -<p>In 2005, Hixie's survey listed the top few most commonly used elements on pages. The top 3 were <code>html</code>, <code>head</code> and <code>body</code> which he noted as interesting because they are optional and created by the parser if omitted. Given that we use the post-parsed DOM, they'll show up universally in our data. Thus, we'll begin with the 4th most used element. Below is a comparison of the data from then to now (I've included the frequency comparison here as well just for fun).</p> -<table> - <tr> - <td>2005 (per site)</td> - <td>2019 (per site)</td> - <td>2019 (frequency)</td> - </tr> - <tr> - <td>title</td> - <td>title</td> - <td>div</td> - </tr> - <tr> - <td>a</td> - <td>meta</td> - <td>a</td> - </tr> - <tr> - <td>img</td> - <td>a</td> - <td>span</td> - </tr> - <tr> - <td>meta</td> - <td>div</td> - <td>li</td> - </tr> - <tr> - <td>br</td> - <td>link</td> - <td>img</td> - </tr> - <tr> - <td>table</td> - <td>script</td> - <td>script</td> - </tr> - <tr> - <td>td</td> - <td>img</td> - <td>p</td> - </tr> - <tr> - <td>tr</td> - <td>span</td> - <td>option</td> - </tr> -</table><h3>Elements per page</h3> -<p>Comparing data to that of Hixie's report from 2005 shows that the average size of DOM trees has gotten bigger.</p> -<table> - <tr> - <td>2005</td> - <td>2019</td> - </tr> - <tr> - <td> - <img src="/static/images/2019/03_Markup/hixie_elements_per_page.png" width="300px"> - </td> - <td> - <iframe width="600" height="371" seamless frameborder="0" scrolling="no" src="https://docs.google.com/spreadsheets/d/e/2PACX-1vTbHgqcSepZye6DrCTpifFAUYxKT1hEO56585awyMips8oiPMLYu20GETuIE8mALkm814ObJyktEe2P/pubchart?oid=2141583176&format=interactive"></iframe> - </td> - </tr> -</table><p>And also that both the average number of types of elements per page has increased, as well as the maximum numbers of unique elements that we encounter...</p> -<table> - <tr> - <td>2005</td> - <td>2019</td> - </tr> - <tr> - <td> - <img src="/static/images/2019/03_Markup/hixie_element_types_per_page.png"width="300px"> - </td> - <td> - <iframe width="600" height="371" seamless frameborder="0" scrolling="no" src="https://docs.google.com/spreadsheets/d/e/2PACX-1vTbHgqcSepZye6DrCTpifFAUYxKT1hEO56585awyMips8oiPMLYu20GETuIE8mALkm814ObJyktEe2P/pubchart?oid=1500675289&format=interactive"></iframe> - </td> - </tr> -</table><h2>Custom elements?</h2> -<p>Most of the elements we recorded are custom (as in simply 'not standard'), but discussing which elements are and are not custom can get a little challenging. Written down in some spec or proposal somewhere are, actually, quite a few elements. For purposes here, we considered 244 elements as standard (though, some of them are deprecated or unsupported):</p> -<ul> -<li>145 Elements from HTML</li> -<li>68 Elements from SVG</li> -<li>31 Elements from MathML</li> -</ul> -<p>In practice, we encountered only 214 of these:</p> -<ul> -<li>137 from HTML</li> -<li>54 from SVG</li> -<li>23 from MathML</li> -</ul> -<p>In the desktop dataset we collected data for the top 4,834 non-standard elements that we encountered. Of these:</p> -<ul> -<li>155 (3.21%) are identifiable as very probable markup or escaping errors (they contain characters in the parsed tag name which imply that the markup is broken)</li> -<li>341 (7.05%) use XML-style colon namespacing (though, as HTML, they don't use actual XML namespaces)</li> -<li>3207 (66.44%) are valid custom element names</li> -<li>1211 (25.05%) are in the global namespace (non-standard, having neither dash, nor colon)<ul> -<li>216 of these we have flagged as <em>possible </em>typos as they are longer than 2 characters and have a Levenshtein distance of 1 from some standard element name like <code><cript></code>,<code><spsn></code> or <code><artice></code>. Some of these (like <code><jdiv></code>), however, are certainly intentional.</li> -</ul> -</li> -</ul> -<p>Additionally, 15% of desktop pages and 16% of mobile pages contain deprecated elements (NOTE: A lot of this is very likely due to the use of products rather than individual authors continuing to manually create this markup.), here's the most common 10 and the number of pages they appear on in each set...</p> -<table> - <tr> - <td>element</td> - <td>mobile</td> - <td>desktop</td> - </tr> - <tr> - <td><code><center><code></td> - <td>7.96%</td> - <td>8.30%</td> - </tr> - <tr> - <td><code><font></code></td> - <td>7.80%</td> - <td>8.01%</td> - </tr> - <tr> - <td><code><marquee></code></td> - <td>1.20%</td> - <td>1.07%</td> - </tr> - <tr> - <td><code><nobr></code></td> - <td>0.55%</td> - <td>0.71%</td> - </tr> - <tr> - <td><code><big></code></td> - <td>0.47%</td> - <td>0.53%</td> - </tr> - <tr> - <td><code><frame></code></td> - <td>0.35%</td> - <td>0.39%</td> - </tr> - <tr> - <td><code><frameset></code></td> - <td>0.39%</td> - <td>0.35%</td> - </tr> - <tr> - <td><code><strike></code></td> - <td>0.27%</td> - <td>0.32%</td> - </tr> - <tr> - <td><code><noframes></code></td> - <td>0.27%</td> - <td>0.25%</td> - </tr> -</table><p>Most of these can seem like very small numbers, but perspective matters.</p> -<h2>Perspective on Value and Usage</h2> -<p>In order to discuss numbers about the use of elements (standard, deprecated or custom), we first need to establish some perspective.</p> -<p>The top 150 element names, counting the number of pages where they appear, are shown in this chart:</p> -<iframe width="877" height="588" seamless frameborder="0" scrolling="no" src="https://docs.google.com/spreadsheets/d/e/2PACX-1vTbHgqcSepZye6DrCTpifFAUYxKT1hEO56585awyMips8oiPMLYu20GETuIE8mALkm814ObJyktEe2P/pubchart?oid=1694360298&format=interactive"></iframe><p>Note how quickly use drops off.</p> -<p>11 elements occur in over 90% <code><html></code>, <code><head></code>, <code><body></code>, <code><title></code>, <code><meta></code>, <code><a></code>,<code><div></code>, <code><link></code>, <code><script></code>, <code><img></code> and <code><span></code>.</p> -<p>Only 15 more elements occur in at least 50% of the home pages (<code><ul></code>, <code><li></code>, <code><p></code>, <code><style></code>, <code><input></code>, <code><br></code>, <code><form></code>, <code><h2></code>, <code><h1></code>, <code><iframe></code>, <code><h3></code>, <code><button></code>, <code><footer></code>, <code><header></code>, <code><nav></code> are the others).</p> -<p>And only 40 more elements occur on more than 5% of pages.</p> -<p>Even <code><video></code>, for example, doesn't make that cut. It appears on only 4.21% of pages in the dataset (on desktop, only 3.03% on mobile). While these numbers sound very low, 4.21% is actually <em>quite</em> popular by comparison. In fact, only 98 elements occur on more than 1% of pages.</p> -<p>It's interesting, then, to look at what the distribution of these elements looks like and which ones have more than 1% use. Below is a chart that shows the rank of each element and which category they fall into. I've separated the data points into discrete sets simply so that they can be viewed (otherwise there just aren't enough pixels to capture all that data), but they represent a single 'line' of popularity - the left-most being the most common, the right-most being the least common. The arrow points to the end of elements that appear in more than 1% of the pages.</p> -<p>(( TODO: there is a corresponding image in the google doc <a href="https://docs.google.com/document/d/16TY_pV-FyW35DzuvdlaOiENz6o6PWYpl_RviU-HW7Qc/edit">https://docs.google.com/document/d/16TY_pV-FyW35DzuvdlaOiENz6o6PWYpl_RviU-HW7Qc/edit</a> ))</p> -<p>You can observe two things here: First, that the set of elements that have more than 1% use are not exclusively HTML. In fact, <em>27 of the most popular 100 elements aren't even HTML</em> - they are SVG! And there are <em>non-standard tags at or very near that cutoff too</em>! Second, note that a whole lot of HTML elements are used by less than 1% of pages.</p> -<p>So, are all of those elements used by less than 1% of pages "useless?". Definitely not. This is why establishing perspective matters. There are around <a href="https://www.websitehostingrating.com/internet-statistics-facts/">2 billion web sites on the web</a>. If something appears on 0.1% of all websites in our dataset, we can extrapolate that this represents perhaps <em>2 million web sites</em> in the whole web. Even 0.01% extrapolates to a <em>two hundred of thousand of sites</em>. This is also why removing support for elements, even very old ones which we think aren't great ideas, is a very rare occurrence: Breaking hundreds of thousands or millions of sites just isn't a thing that browser vendors can do lightly.</p> -<p>So, lots of elements, even the native ones, have less than 1% use and are still very important and successful. <code><code></code>, for example, is an element that I both use and encounter a lot. It's definitely useful and important - and yet it is used on only 0.57% of these pages. Part of this is skewed based on what we are measuring - home pages are generally <em>less likely</em> to include certain kinds of things (like <code><code></code> for example): They serve a less general purpose than, for example, headings, paragraphs, links and lists, however, the data is generally useful.</p> -<p>We also collected information about which pages contained an author defined (not native) <code>.shadowRoot</code> -- About 0.22% of the pages on desktop, and 0.15% on mobile. This might not sound like a lot, but it is roughly 6.5k sites in the mobile dataset and 10k sites on the desktop and is more than several HTML elements. <code><summary></code> for example, has about equivalent use on the desktop and it is the 146th most popular element.</p> -<p><code><datalist></code> appeared in 0.04% of homepages, it is the 201st most popular element.</p> -<p>In fact, over 15% of elements we're counting as defined by HTML are outside the top 200 in the desktop dataset . <code><meter></code> is the least popular "HTML5 era" element (2004-2011, before HTML moved to a Living Standard model): It is around the 1000th most popular element. <code><slot></code>, the most recently introduced element (April 2016), is only around the 1400th most popular element.</p> -<h2>Lots of data: Real DOM on the Real Web</h2> -<p>With this perspective in mind about what use of native/standard features looks like in the dataset, let's talk about the non-standard stuff.</p> -<p>You might expect that lots of elements we recorded are used only on a single domain, but in fact, no element we're talking about in this list of 5048 elements is used on only a single domain. The least number of domains an element in our dataset appears in is 15. About a fifth of them occur on more than 100 domains. About 7% occur on more than 1000 domains.</p> -<p>To help analyze the data, I hacked together a <a href="https://rainy-periwinkle.glitch.me">little tool with Glitch</a> - where possible I link my observations to a page containing the data. You can use this tool yourself, and please share a permalink back with the <a href="https://twitter.com/HTTPArchive">@HTTPAchive</a> along with your observations (Tommy Hodgins has also built a similar <a href="https://github.com/tomhodgins/hade">CLI tool</a> which you can use to explore).</p> -<p>Let's look at some data...</p> -<h3>Products (and libraries) and their custom markup</h3> -<p>As in Hixie's original research, it seems that several of the extremely popular ones have more to do with being a part of popular <em>products than themselves being universally adopted</em>. Many of the ones <a href="https://web.archive.org/web/20060203031245/http://code.google.com/webstats/2005-12/editors.html">Ian Hickson mentioned 14 years ago</a> seem to have dwindled, but not disappeared, but some are still pretty huge.</p> -<p>Those he mentioned as being pervasive and created by <a href="https://en.wikipedia.org/wiki/Claris_Home_Page">Claris Home Page</a> (whose last stable release was 21 years ago) still appeared on over 100 domains. <a href="https://rainy-periwinkle.glitch.me/permalink/28b0b7abb3980af793a2f63b484e7815365b91c04ae625dd4170389cc1ab0a52.html"><code><x-claris-window></code>, for example still appears on 130 mobile domains</a> (desktop is similar). Some of the <code><actinic:*></code> elements he mentioned appear on even more: <a href="https://rainy-periwinkle.glitch.me/permalink/30dfca0fde9fad9b2ec58b12cb2b0271a272fb5c8970cd40e316adc728a09d19.html"><code>actinic:basehref</code>, still shows up on 154 pages in the desktop data</a>. (These come from British e-commerce provider <a href="https://www.oxatis.co.uk">Oxatis</a>).</p> -<p>Macromedia's elements seem to have largely disappeared, <a href="https://rainy-periwinkle.glitch.me/permalink/17d49e765c4f1bfef2a3bd183ee0961fe40f0623d2b9ddf885ee35e1f251d14c.html">only one appears at all on our list, and on only 22 domains</a>, however Adobe's Go-Live tags like <a href="https://rainy-periwinkle.glitch.me/permalink/579abc77652df3ac2db1338d17aab0a8dc737b9d945510b562085d8522b18799.html"><code><csscriptdict></code></a> <a href="https://rainy-periwinkle.glitch.me/permalink/579abc77652df3ac2db1338d17aab0a8dc737b9d945510b562085d8522b18799.html">still appear on 640 domains in the desktop dataset</a>.</p> -<p><a href="https://rainy-periwinkle.glitch.me/permalink/bc8f154a95dfe06a6d0fdb099b6c8df61727b2289141a0ef16dc17b2b57d3068.html"><code><o:p></code> (created by Microsoft Office) still appears in ~0.46% of desktop pages</a> (that's over 20k domains) and <a href="https://rainy-periwinkle.glitch.me/permalink/66f75e1fd2b8e62a1e77033601d9f65516df3ff8cb1896ce37fbdb932853d5c5.html">0.32% of mobile page</a> (more than a lot of standard HTML elements).</p> -<p>But there are plenty of newcomers that weren't in Hixie's original report too, and with even bigger numbers...</p> -<p><a href="https://rainy-periwinkle.glitch.me/permalink/e8bf0130c4f29b28a97b3c525c09a9a423c31c0c813ae0bd1f227bd74ddec03d.html"><code><ym-measure></code> is used on more than 1% of pages (both desktop and mobile)</a> -- that's <em>huge</em> -- putting it in the top 100. It's a tag injected by Yandex's <a href="https://metrica.yandex.com/about">Metrica</a> analytics <a href="https://www.npmjs.com/package/yandex-metrica-watch">package</a>.</p> -<p><a href="https://rainy-periwinkle.glitch.me/permalink/a532f18bbfd1b565b460776a64fa9a2cdd1aa4cd2ae0d37eb2facc02bfacb40c.html"><code><g:plusone></code> from Google's now defunct Google Plus occurs on over 21k domains (both desktop and mobile)</a>.</p> -<p><a href="https://rainy-periwinkle.glitch.me/permalink/2e2f63858f7715ef84d28625344066480365adba8da8e6ca1a00dfdde105669a.html"><code><fb:like></code> occurs on ~13.8k</a> (mobile, <a href="https://rainy-periwinkle.glitch.me/permalink/a9aceaee7fbe82b3156caf79f48d7ef6b42729bce637f6683dc6c287df52cd5b.html">12.8k on desktop</a>) and <a href="https://rainy-periwinkle.glitch.me/permalink/5a964079ac2a3ec1b4f552503addd406d02ec4ddb4955e61f54971c27b461984.html"><code><fb:like-box></code> occurs on 7.8k</a> (mobile, <a href="https://rainy-periwinkle.glitch.me/permalink/cc56280bb2d659b4426050b0c135b5c15b8ea4f8090756b567c564dac1f0659b.html">7k on desktop</a>)</p> -<p>And <a href="https://rainy-periwinkle.glitch.me/permalink/6997d689f56fe77e5ce345cfb570adbd42d802393f4cc175a1b974833a0e3cb5.html"><code><app-root></code> (generally a framework like Angular) appears on 8.2k mobile sites</a> (<a href="https://rainy-periwinkle.glitch.me/permalink/ee3c9dfbcab568e97c7318d9795b9ecbde0605f247b19b68793afc837796aa5c.html">8.5k on desktop</a>).</p> -<p>Comparing these to a few of the native HTML elements that are below the 5% bar, for perspective, looks something like this (note -- varies slightly based on dataset).</p> -<p>(( TOOD: there is a corresponding image in the google doc <a href="https://docs.google.com/document/d/16TY_pV-FyW35DzuvdlaOiENz6o6PWYpl_RviU-HW7Qc/edit">https://docs.google.com/document/d/16TY_pV-FyW35DzuvdlaOiENz6o6PWYpl_RviU-HW7Qc/edit</a> ))</p> -<p>You could draw interesting observations like these all day long.</p> -<p>Here's one that's a little different: Productization causing popularity is evident in outright errors as well. <a href="https://rainy-periwinkle.glitch.me/permalink/3214f840b6ae3ef1074291f60fa1be4b9d9df401fe0190bfaff4bb078c8614a5.html"><code><pclass="ddc-font-size-large"></code> was a parsed tag name which occurred in our dataset in over 1000 sites</a>. This was thanks to a missing space in a popular 'as a service' kind of product. Happily, we reported this error during our research and it was quickly fixed.</p> -<p>In his original paper, Hixie mentions that "The good thing, if we can be forgiven for trying to remain optimistic in the face of all this non-standard markup, is that at least these elements are all clearly using vendor-specific names. This massively reduces the likelihood that standards bodies will invent elements and attributes that clash with any of them." However, as mentioned above, this is not universal. Over 25% of the non-standard elements that we captured don't use any kind of namespacing strategy to avoid polluting the global namespace. Here is <a href="https://rainy-periwinkle.glitch.me/permalink/53567ec94b328de965eb821010b8b5935b0e0ba316e833267dc04f1fb3b53bd5.html">a list of 1157 elements like that from the mobile dataset</a>. Many of those, as you can see, are probably non-problematic as they are obscure names, misspellings and so on -- but at least a few probably present some challenges. You'll note, for example, that <code><toast></code> (which Googlers <a href="https://www.chromestatus.com/feature/5674896879255552">recently tried to propose as <code><std-toast></code></a>) appears in this list.</p> -<p>Among the probably not challenging, but popular ones are some interesting entries:</p> -<p><a href="https://rainy-periwinkle.glitch.me/permalink/2ba66fb067dce29ecca276201c37e01aa7fe7c191e6be9f36dd59224f9a36e16.html"><code><ymaps></code> (from yahoo maps) appears on ~12.5k mobile sites</a> (<a href="https://rainy-periwinkle.glitch.me/permalink/7f365899dc8a5341ed5c234162ee4eb187e99a23fc28cdea31af2322029d8b48.html">~8.3k desktop</a>)</p> -<p><a href="https://rainy-periwinkle.glitch.me/permalink/5cfe2db53aadf5049e32cf7db0f7f6d8d2f1d4926d06467d9bdcd0842d943a17.html"><code><cufon></code> and <code><cufontext></code> from a font replacement library from 2008, appear on ~10.5k of mobile pages</a> (~<a href="https://rainy-periwinkle.glitch.me/permalink/c9371b2f13e7e6ff74553f7918c18807cd9222024d970699e493b2935608a5f2.html">8.7k desktop</a>)</p> -<p>There is also <a href="https://rainy-periwinkle.glitch.me/permalink/976b0cf78c73d125644d347be9e93e51d3a9112e31a283259c35942bda06e989.html">the <code><jdiv></code> element appears to be injected by Jivo chat, a popular chat solution which appears on ~40.3k of mobile sites</a> (<a href="https://rainy-periwinkle.glitch.me/permalink/98fb3bf4f44c33edabc05439b10a374a121dbbfc5f83af65e00e859039b13acd.html">~37.6k of desktop pages -- that's roughly ~0.86%)</a>!</p> -<p>Placing these into our same chart as above for perspective looks something like this (again, it varies slightly based on the dataset)</p> -<p>(( TODO: there is a corresponding image in the google doc <a href="https://docs.google.com/document/d/16TY_pV-FyW35DzuvdlaOiENz6o6PWYpl_RviU-HW7Qc/edit">https://docs.google.com/document/d/16TY_pV-FyW35DzuvdlaOiENz6o6PWYpl_RviU-HW7Qc/edit</a> ))</p> -<p>The interesting thing about these is that they also introduce a few other ways that our tool can come in very handy: If we're interested in exploring the space of the data, a very specific tag name is just one possible measure. It's definitely the strongest indicator if we can find good slang developing. However, what if that's not all we're interested in?</p> -<h3>Common use cases and solutions</h3> -<p>What if, for example, we were interested in people solving common use cases? This could be because we're looking for solutions to use cases that we currently have ourselves, or for researching more broadly what common use cases people are solving with an eye toward incubating some standardization effort. Let's take a common example: Tabs. Over the years there have been a lot of requests for things like tabs. We can use a fuzzy search here and find that there are <a href="https://rainy-periwinkle.glitch.me/permalink/c6d39f24d61d811b55fc032806cade9f0be437dcb2f5735a4291adb04aa7a0ea.html">many variants of tabs</a>. It's a little harder to count use here since we can't as easily distinguish if two elements appear on the same page, so the count provided there conservatively simply takes the one with the largest count -- in most cases the real number of domains is probably significantly larger.</p> -<p>There are also <a href="https://rainy-periwinkle.glitch.me/permalink/e573cf279bf1d2f0f98a90f0d7e507ac8dbd3e570336b20c6befc9370146220b.html">lots of accordions</a>, <a href="https://rainy-periwinkle.glitch.me/permalink/0bb74b808e7850a441fc9b93b61abf053efc28f05e0a1bc2382937e3b78695d9.html">dialogs</a>, at least <a href="https://rainy-periwinkle.glitch.me/permalink/651e592cb2957c14cdb43d6610b6acf696272b2fbd0d58a74c283e5ad4c79a12.html">65 variants of carousels</a>, lots of stuff about <a href="https://rainy-periwinkle.glitch.me/permalink/981967b19a9346ac466482c51b35c49fc1c1cc66177ede440ab3ee51a7912187.html">'popups'</a>, at least <a href="https://rainy-periwinkle.glitch.me/permalink/2e6827af7c9d2530cb3d2f39a3f904091c523c2ead14daccd4a41428f34da5e8.html">27 variants of toggles and switches</a>, and so on.</p> -<p>Perhaps we could research why we need <a href="https://rainy-periwinkle.glitch.me/permalink/5ae67c941395ca3125e42909c2c3881e27cb49cfa9aaf1cf59471e3779435339.html">92 variants of button related elements that aren't a native button</a>, for example, and try to fill the native gap.</p> -<p>If we notice popular things pop up (like <code><jdiv></code>, solving chat) we can take knowledge of things we know (like, that that is what <code><jdiv></code> is about, or <code><olark></code>) and try to look <a href="https://rainy-periwinkle.glitch.me/permalink/db8fc0e58d2d46d2e2a251ed13e3daab39eba864e46d14d69cc114ab5d684b00.html">at at least 43 things we've built for tackling that</a> and follow connections to survey the space.</p> -<h3>In Summary</h3> -<p>So, there's lots of data here, but to summarize:</p> -<ul> -<li>Pages have more elements than they did 14 years ago -- both on average and max.</li> -<li>The lifetime of things on home pages is <em>very</em> long. Deprecating or discontinuing things doesn't make them go away, and it might never.</li> -<li>There is a lot of broken markup out there in the wild (misspelled tags, missing spaces, bad escaping, misunderstandings)</li> -<li>Measuring what 'useful' means is tricky -- lots of native elements don't pass the 5% bar, or even the 1% bar, but lots of custom ones do -- and for lots of reasons. Passing 1% should definitely grab our attention at least, but perhaps so should 0.5% because that is, according to the data, comparatively <em>very</em> successful.</li> -<li>There is already a ton of custom markup out there. It comes in a lot of forms, but elements containing a dash definitely seem to have taken off.</li> -<li>We need to increasingly study this data and come up with good observations to help find and pave the cowpaths.</li> -</ul> -<p>That last one is where you come in: We'd love to tap into the creativity and curiosity of the larger community to help explore this data using some of the tools (like <a href="https://rainy-periwinkle.glitch.me/">https://rainy-periwinkle.glitch.me/</a>) -- please share your interesting observations and help build our commons of knowledge and understanding.</p> + <img + src="/static/images/2019/{{ get_chapter_image_dir(metadata) }}/hero_lg.jpg" + class="chapter-hero" + /> + <p> + In 2005, Ian "Hixie" Hickson posted + <a + href="https://web.archive.org/web/20060203035414/http://code.google.com/webstats/index.html" + >some analysis of markup data</a + > + building upon various previous work. Much of this work aimed to investigate + class names to see if there were common informal semantics that were being + adopted by developers which it might make sense to standardize upon. Some of + this research helped inform new elements in HTML5. + </p> + <p> + 14 years later, it's time to take a fresh look. Since then, we've also had + the introduction of Custom Elements and the + <a href="https://extensiblewebmanifesto.org/">Extensible Web Manifesto</a> + encouraging that we find better ways to pave the cowpaths by allowing + developers to explore the space of elements themselves and allow standards + bodies to<a + href="https://bkardell.com/blog/Dropping-The-F-Bomb-On-Standards.html" + > + act more like dictionary editors</a + >. Unlike CSS class names which might be used for anything, we can be far + more certain that authors who used a non-standard <em>element</em> really + intended this to be an element. + </p> + <p> + As of July 2019, the HTTP Archive has begun collecting all used + <em>element</em> names in the DOM for about 4.4 million desktop home pages, + and about 5.3 million mobile home pages which we can now begin to research + and dissect. + </p> + <p> + This crawl encountered + <em>over 5000 distinct non-standard element names</em> in these pages, so we + capped the total distinct number of elements that we count to the 'top' + (explained below) 5048. + </p> + <h2 id="methodology">Methodology</h2> + <p> + Names of elements on each page were collected from the DOM itself, post + initial run of JavaScript. + </p> + <p> + Looking at a raw frequency count isn't especially helpful, even for standard + elements: About 25% of all elements encountered are + <code><div></code>. About 17% are <code><a></code>, about 10.6% + are <code><span></code> -- and those are the only elements that + account for more than 10% of occurrences. Languages are + <a href="https://www.youtube.com/watch?v=fCn8zs912OE">generally like this</a + >, a small number of terms are astoundingly used by comparison. Further, + when we start looking at non-standard elements for uptake, this would be + very misleading as one site could use a certain element a thousand times and + thus make it look artificially very popular. + </p> + <p> + Instead, as in Hixie's original study, what we will look at is how many + sites include each element at least once in their homepage (Note: This is, + itself, not without some potential biases. Popular products can be used by + several sites, which introduce non-standard markup, even 'invisibly' to + individual authors. Thus, care must be taken to acknowledge that usage + doesn't necessarily imply direct author knowledge and conscious adoption as + much as it does the servicing of a common need, in a common way. During our + research, we found several examples of this, some we will call out.) + </p> + <h2 id="top-elements-and-general-info">Top elements and general info</h2> + <p> + In 2005, Hixie's survey listed the top few most commonly used elements on + pages. The top 3 were <code>html</code>, <code>head</code> and + <code>body</code> which he noted as interesting because they are optional + and created by the parser if omitted. Given that we use the post-parsed DOM, + they'll show up universally in our data. Thus, we'll begin with the 4th most + used element. Below is a comparison of the data from then to now (I've + included the frequency comparison here as well just for fun). + </p> + <table> + <tr> + <td>2005 (per site)</td> + <td>2019 (per site)</td> + <td>2019 (frequency)</td> + </tr> + <tr> + <td>title</td> + <td>title</td> + <td>div</td> + </tr> + <tr> + <td>a</td> + <td>meta</td> + <td>a</td> + </tr> + <tr> + <td>img</td> + <td>a</td> + <td>span</td> + </tr> + <tr> + <td>meta</td> + <td>div</td> + <td>li</td> + </tr> + <tr> + <td>br</td> + <td>link</td> + <td>img</td> + </tr> + <tr> + <td>table</td> + <td>script</td> + <td>script</td> + </tr> + <tr> + <td>td</td> + <td>img</td> + <td>p</td> + </tr> + <tr> + <td>tr</td> + <td>span</td> + <td>option</td> + </tr> + </table> + + <h3 id="elements-per-page">Elements per page</h3> + <p> + Comparing data to that of Hixie's report from 2005 shows that the average + size of DOM trees has gotten bigger. + </p> + <table> + <tr> + <td>2005</td> + <td>2019</td> + </tr> + <tr> + <td> + <img + src="/static/images/2019/03_Markup/hixie_elements_per_page.png" + width="300px" + /> + </td> + <td> + <iframe + width="600" + height="371" + seamless + frameborder="0" + scrolling="no" + src="https://docs.google.com/spreadsheets/d/e/2PACX-1vTbHgqcSepZye6DrCTpifFAUYxKT1hEO56585awyMips8oiPMLYu20GETuIE8mALkm814ObJyktEe2P/pubchart?oid=2141583176&format=interactive" + ></iframe> + </td> + </tr> + </table> + + <p> + And also that both the average number of types of elements per page has + increased, as well as the maximum numbers of unique elements that we + encounter… + </p> + <table> + <tr> + <td>2005</td> + <td>2019</td> + </tr> + <tr> + <td> + <img + src="/static/images/2019/03_Markup/hixie_element_types_per_page.png" + width="300px" + /> + </td> + <td> + <iframe + width="600" + height="371" + seamless + frameborder="0" + scrolling="no" + src="https://docs.google.com/spreadsheets/d/e/2PACX-1vTbHgqcSepZye6DrCTpifFAUYxKT1hEO56585awyMips8oiPMLYu20GETuIE8mALkm814ObJyktEe2P/pubchart?oid=1500675289&format=interactive" + ></iframe> + </td> + </tr> + </table> + + <h2 id="custom-elements">Custom elements?</h2> + <p> + Most of the elements we recorded are custom (as in simply 'not standard'), + but discussing which elements are and are not custom can get a little + challenging. Written down in some spec or proposal somewhere are, actually, + quite a few elements. For purposes here, we considered 244 elements as + standard (though, some of them are deprecated or unsupported): + </p> + <ul> + <li>145 Elements from HTML</li> + <li>68 Elements from SVG</li> + <li>31 Elements from MathML</li> + </ul> + <p>In practice, we encountered only 214 of these:</p> + <ul> + <li>137 from HTML</li> + <li>54 from SVG</li> + <li>23 from MathML</li> + </ul> + <p> + In the desktop dataset we collected data for the top 4,834 non-standard + elements that we encountered. Of these: + </p> + <ul> + <li> + 155 (3.21%) are identifiable as very probable markup or escaping errors + (they contain characters in the parsed tag name which imply that the + markup is broken) + </li> + <li> + 341 (7.05%) use XML-style colon namespacing (though, as HTML, they don't + use actual XML namespaces) + </li> + <li>3207 (66.44%) are valid custom element names</li> + <li> + 1211 (25.05%) are in the global namespace (non-standard, having neither + dash, nor colon) + <ul> + <li> + 216 of these we have flagged as *possible *typos as they are longer + than 2 characters and have a Levenshtein distance of 1 from some + standard element name like <code><cript></code>,<code + ><spsn></code + > + or <code><artice></code>. Some of these (like + <code><jdiv></code>), however, are certainly intentional. + </li> + </ul> + </li> + </ul> + <p> + Additionally, 15% of desktop pages and 16% of mobile pages contain + deprecated elements (NOTE: A lot of this is very likely due to the use of + products rather than individual authors continuing to manually create this + markup.), here's the most common 10 and the number of pages they appear on + in each set… + </p> + <table> + <tr> + <td>element</td> + <td>mobile</td> + <td>desktop</td> + </tr> + <tr> + <td><code><center></code></td> + <td>7.96%</td> + <td>8.30%</td> + </tr> + <tr> + <td><code><font></code></td> + <td>7.80%</td> + <td>8.01%</td> + </tr> + <tr> + <td><code><marquee></code></td> + <td>1.20%</td> + <td>1.07%</td> + </tr> + <tr> + <td><code><nobr></code></td> + <td>0.55%</td> + <td>0.71%</td> + </tr> + <tr> + <td><code><big></code></td> + <td>0.47%</td> + <td>0.53%</td> + </tr> + <tr> + <td><code><frame></code></td> + <td>0.35%</td> + <td>0.39%</td> + </tr> + <tr> + <td><code><frameset></code></td> + <td>0.39%</td> + <td>0.35%</td> + </tr> + <tr> + <td><code><strike></code></td> + <td>0.27%</td> + <td>0.32%</td> + </tr> + <tr> + <td><code><noframes></code></td> + <td>0.27%</td> + <td>0.25%</td> + </tr> + </table> + <p> + Most of these can seem like very small numbers, but perspective matters. + </p> + <h2 id="perspective-on-value-and-usage">Perspective on Value and Usage</h2> + <p> + In order to discuss numbers about the use of elements (standard, deprecated + or custom), we first need to establish some perspective. + </p> + <p> + The top 150 element names, counting the number of pages where they appear, + are shown in this chart: + </p> + <iframe + width="877" + height="588" + seamless + frameborder="0" + scrolling="no" + src="https://docs.google.com/spreadsheets/d/e/2PACX-1vTbHgqcSepZye6DrCTpifFAUYxKT1hEO56585awyMips8oiPMLYu20GETuIE8mALkm814ObJyktEe2P/pubchart?oid=1694360298&format=interactive" + ></iframe> + <p>Note how quickly use drops off.</p> + <p> + 11 elements occur in over 90% <code><html></code>, + <code><head></code>, <code><body></code>, + <code><title></code>, <code><meta></code>, + <code><a></code>,<code><div></code>, <code><link></code>, + <code><script></code>, <code><img></code> and + <code><span></code>. + </p> + <p> + Only 15 more elements occur in at least 50% of the home pages + (<code><ul></code>, <code><li></code>, <code><p></code>, + <code><style></code>, <code><input></code>, + <code><br></code>, <code><form></code>, <code><h2></code>, + <code><h1></code>, <code><iframe></code>, + <code><h3></code>, <code><button></code>, + <code><footer></code>, <code><header></code>, + <code><nav></code> are the others). + </p> + <p>And only 40 more elements occur on more than 5% of pages.</p> + <p> + Even <code><video></code>, for example, doesn't make that cut. It + appears on only 4.21% of pages in the dataset (on desktop, only 3.03% on + mobile). While these numbers sound very low, 4.21% is actually + <em>quite</em> popular by comparison. In fact, only 98 elements occur on + more than 1% of pages. + </p> + <p> + It's interesting, then, to look at what the distribution of these elements + looks like and which ones have more than 1% use. Below is a chart that shows + the rank of each element and which category they fall into. I've separated + the data points into discrete sets simply so that they can be viewed + (otherwise there just aren't enough pixels to capture all that data), but + they represent a single 'line' of popularity - the left-most being the most + common, the right-most being the least common. The arrow points to the end + of elements that appear in more than 1% of the pages. + </p> + <p> + (( TODO: there is a corresponding image in the google doc + <a + href="https://docs.google.com/document/d/16TY_pV-FyW35DzuvdlaOiENz6o6PWYpl_RviU-HW7Qc/edit" + >https://docs.google.com/document/d/16TY_pV-FyW35DzuvdlaOiENz6o6PWYpl_RviU-HW7Qc/edit</a + > + )) + </p> + <p> + You can observe two things here: First, that the set of elements that have + more than 1% use are not exclusively HTML. In fact, + <em>27 of the most popular 100 elements aren't even HTML</em> - they are + SVG! And there are + <em>non-standard tags at or very near that cutoff too</em>! Second, note + that a whole lot of HTML elements are used by less than 1% of pages. + </p> + <p> + So, are all of those elements used by less than 1% of pages "useless?". + Definitely not. This is why establishing perspective matters. There are + around + <a href="https://www.websitehostingrating.com/internet-statistics-facts/" + >2 billion web sites on the web</a + >. If something appears on 0.1% of all websites in our dataset, we can + extrapolate that this represents perhaps <em>2 million web sites</em> in the + whole web. Even 0.01% extrapolates to a + <em>two hundred of thousand of sites</em>. This is also why removing support + for elements, even very old ones which we think aren't great ideas, is a + very rare occurrence: Breaking hundreds of thousands or millions of sites + just isn't a thing that browser vendors can do lightly. + </p> + <p> + So, lots of elements, even the native ones, have less than 1% use and are + still very important and successful. <code><code></code>, for example, + is an element that I both use and encounter a lot. It's definitely useful + and important - and yet it is used on only 0.57% of these pages. Part of + this is skewed based on what we are measuring - home pages are generally + <em>less likely</em> to include certain kinds of things (like + <code><code></code> for example): They serve a less general purpose + than, for example, headings, paragraphs, links and lists, however, the data + is generally useful. + </p> + <p> + We also collected information about which pages contained an author defined + (not native) <code>.shadowRoot</code> -- About 0.22% of the pages on + desktop, and 0.15% on mobile. This might not sound like a lot, but it is + roughly 6.5k sites in the mobile dataset and 10k sites on the desktop and is + more than several HTML elements. <code><summary></code> for example, + has about equivalent use on the desktop and it is the 146th most popular + element. + </p> + <p> + <code><datalist></code> appeared in 0.04% of homepages, it is the + 201st most popular element. + </p> + <p> + In fact, over 15% of elements we're counting as defined by HTML are outside + the top 200 in the desktop dataset . <code><meter></code> is the least + popular "HTML5 era" element (2004-2011, before HTML moved to a Living + Standard model): It is around the 1000th most popular element. + <code><slot></code>, the most recently introduced element (April + 2016), is only around the 1400th most popular element. + </p> + <h2 id="lots-of-data-real-dom-on-the-real-web"> + Lots of data: Real DOM on the Real Web + </h2> + <p> + With this perspective in mind about what use of native/standard features + looks like in the dataset, let's talk about the non-standard stuff. + </p> + <p> + You might expect that lots of elements we recorded are used only on a single + domain, but in fact, no element we're talking about in this list of 5048 + elements is used on only a single domain. The least number of domains an + element in our dataset appears in is 15. About a fifth of them occur on more + than 100 domains. About 7% occur on more than 1000 domains. + </p> + <p> + To help analyze the data, I hacked together a + <a href="https://rainy-periwinkle.glitch.me">little tool with Glitch</a> - + where possible I link my observations to a page containing the data. You can + use this tool yourself, and please share a permalink back with the + <a href="https://twitter.com/HTTPArchive">@HTTPAchive</a> along with your + observations (Tommy Hodgins has also built a similar + <a href="https://github.com/tomhodgins/hade">CLI tool</a> which you can use + to explore). + </p> + <p>Let's look at some data…</p> + <h3 id="products-and-libraries-and-their-custom-markup"> + Products (and libraries) and their custom markup + </h3> + <p> + As in Hixie's original research, it seems that several of the extremely + popular ones have more to do with being a part of popular + <em>products than themselves being universally adopted</em>. Many of the + ones + <a + href="https://web.archive.org/web/20060203031245/http://code.google.com/webstats/2005-12/editors.html" + >Ian Hickson mentioned 14 years ago</a + > + seem to have dwindled, but not disappeared, but some are still pretty huge. + </p> + <p> + Those he mentioned as being pervasive and created by + <a href="https://en.wikipedia.org/wiki/Claris_Home_Page" + >Claris Home Page</a + > + (whose last stable release was 21 years ago) still appeared on over 100 + domains. + <a + href="https://rainy-periwinkle.glitch.me/permalink/28b0b7abb3980af793a2f63b484e7815365b91c04ae625dd4170389cc1ab0a52.html" + ><code><x-claris-window></code>, for example still appears on 130 + mobile domains</a + > + (desktop is similar). Some of the <code><actinic:*></code> elements he + mentioned appear on even more: + <a + href="https://rainy-periwinkle.glitch.me/permalink/30dfca0fde9fad9b2ec58b12cb2b0271a272fb5c8970cd40e316adc728a09d19.html" + ><code>actinic:basehref</code>, still shows up on 154 pages in the desktop + data</a + >. (These come from British e-commerce provider + <a href="https://www.oxatis.co.uk">Oxatis</a>). + </p> + <p> + Macromedia's elements seem to have largely disappeared, + <a + href="https://rainy-periwinkle.glitch.me/permalink/17d49e765c4f1bfef2a3bd183ee0961fe40f0623d2b9ddf885ee35e1f251d14c.html" + >only one appears at all on our list, and on only 22 domains</a + >, however Adobe's Go-Live tags like + <a + href="https://rainy-periwinkle.glitch.me/permalink/579abc77652df3ac2db1338d17aab0a8dc737b9d945510b562085d8522b18799.html" + ><code><csscriptdict></code></a + > + <a + href="https://rainy-periwinkle.glitch.me/permalink/579abc77652df3ac2db1338d17aab0a8dc737b9d945510b562085d8522b18799.html" + >still appear on 640 domains in the desktop dataset</a + >. + </p> + <p> + <a + href="https://rainy-periwinkle.glitch.me/permalink/bc8f154a95dfe06a6d0fdb099b6c8df61727b2289141a0ef16dc17b2b57d3068.html" + ><code><o:p></code> (created by Microsoft Office) still appears in + ~0.46% of desktop pages</a + > + (that's over 20k domains) and + <a + href="https://rainy-periwinkle.glitch.me/permalink/66f75e1fd2b8e62a1e77033601d9f65516df3ff8cb1896ce37fbdb932853d5c5.html" + >0.32% of mobile page</a + > + (more than a lot of standard HTML elements). + </p> + <p> + But there are plenty of newcomers that weren't in Hixie's original report + too, and with even bigger numbers… + </p> + <p> + <a + href="https://rainy-periwinkle.glitch.me/permalink/e8bf0130c4f29b28a97b3c525c09a9a423c31c0c813ae0bd1f227bd74ddec03d.html" + ><code><ym-measure></code> is used on more than 1% of pages (both + desktop and mobile)</a + > + -- that's <em>huge</em> -- putting it in the top 100. It's a tag injected by + Yandex's <a href="https://metrica.yandex.com/about">Metrica</a> analytics + <a href="https://www.npmjs.com/package/yandex-metrica-watch">package</a>. + </p> + <p> + <a + href="https://rainy-periwinkle.glitch.me/permalink/a532f18bbfd1b565b460776a64fa9a2cdd1aa4cd2ae0d37eb2facc02bfacb40c.html" + ><code><g:plusone></code> from Google's now defunct Google Plus + occurs on over 21k domains (both desktop and mobile)</a + >. + </p> + <p> + <a + href="https://rainy-periwinkle.glitch.me/permalink/2e2f63858f7715ef84d28625344066480365adba8da8e6ca1a00dfdde105669a.html" + ><code><fb:like></code> occurs on ~13.8k</a + > + (mobile, + <a + href="https://rainy-periwinkle.glitch.me/permalink/a9aceaee7fbe82b3156caf79f48d7ef6b42729bce637f6683dc6c287df52cd5b.html" + >12.8k on desktop</a + >) and + <a + href="https://rainy-periwinkle.glitch.me/permalink/5a964079ac2a3ec1b4f552503addd406d02ec4ddb4955e61f54971c27b461984.html" + ><code><fb:like-box></code> occurs on 7.8k</a + > + (mobile, + <a + href="https://rainy-periwinkle.glitch.me/permalink/cc56280bb2d659b4426050b0c135b5c15b8ea4f8090756b567c564dac1f0659b.html" + >7k on desktop</a + >) + </p> + <p> + And + <a + href="https://rainy-periwinkle.glitch.me/permalink/6997d689f56fe77e5ce345cfb570adbd42d802393f4cc175a1b974833a0e3cb5.html" + ><code><app-root></code> (generally a framework like Angular) + appears on 8.2k mobile sites</a + > + (<a + href="https://rainy-periwinkle.glitch.me/permalink/ee3c9dfbcab568e97c7318d9795b9ecbde0605f247b19b68793afc837796aa5c.html" + >8.5k on desktop</a + >). + </p> + <p> + Comparing these to a few of the native HTML elements that are below the 5% + bar, for perspective, looks something like this (note -- varies slightly + based on dataset). + </p> + <p> + (( TOOD: there is a corresponding image in the google doc + <a + href="https://docs.google.com/document/d/16TY_pV-FyW35DzuvdlaOiENz6o6PWYpl_RviU-HW7Qc/edit" + >https://docs.google.com/document/d/16TY_pV-FyW35DzuvdlaOiENz6o6PWYpl_RviU-HW7Qc/edit</a + > + )) + </p> + <p>You could draw interesting observations like these all day long.</p> + <p> + Here's one that's a little different: Productization causing popularity is + evident in outright errors as well. + <a + href="https://rainy-periwinkle.glitch.me/permalink/3214f840b6ae3ef1074291f60fa1be4b9d9df401fe0190bfaff4bb078c8614a5.html" + ><code><pclass="ddc-font-size-large"></code> was a parsed tag name + which occurred in our dataset in over 1000 sites</a + >. This was thanks to a missing space in a popular 'as a service' kind of + product. Happily, we reported this error during our research and it was + quickly fixed. + </p> + <p> + In his original paper, Hixie mentions that "The good thing, if we can be + forgiven for trying to remain optimistic in the face of all this + non-standard markup, is that at least these elements are all clearly using + vendor-specific names. This massively reduces the likelihood that standards + bodies will invent elements and attributes that clash with any of them." + However, as mentioned above, this is not universal. Over 25% of the + non-standard elements that we captured don't use any kind of namespacing + strategy to avoid polluting the global namespace. Here is + <a + href="https://rainy-periwinkle.glitch.me/permalink/53567ec94b328de965eb821010b8b5935b0e0ba316e833267dc04f1fb3b53bd5.html" + >a list of 1157 elements like that from the mobile dataset</a + >. Many of those, as you can see, are probably non-problematic as they are + obscure names, misspellings and so on -- but at least a few probably present + some challenges. You'll note, for example, that + <code><toast></code> (which Googlers + <a href="https://www.chromestatus.com/feature/5674896879255552" + >recently tried to propose as <code><std-toast></code></a + >) appears in this list. + </p> + <p> + Among the probably not challenging, but popular ones are some interesting + entries: + </p> + <p> + <a + href="https://rainy-periwinkle.glitch.me/permalink/2ba66fb067dce29ecca276201c37e01aa7fe7c191e6be9f36dd59224f9a36e16.html" + ><code><ymaps></code> (from yahoo maps) appears on ~12.5k mobile + sites</a + > + (<a + href="https://rainy-periwinkle.glitch.me/permalink/7f365899dc8a5341ed5c234162ee4eb187e99a23fc28cdea31af2322029d8b48.html" + >~8.3k desktop</a + >) + </p> + <p> + <a + href="https://rainy-periwinkle.glitch.me/permalink/5cfe2db53aadf5049e32cf7db0f7f6d8d2f1d4926d06467d9bdcd0842d943a17.html" + ><code><cufon></code> and <code><cufontext></code> from a font + replacement library from 2008, appear on ~10.5k of mobile pages</a + > + (~<a + href="https://rainy-periwinkle.glitch.me/permalink/c9371b2f13e7e6ff74553f7918c18807cd9222024d970699e493b2935608a5f2.html" + >8.7k desktop</a + >) + </p> + <p> + There is also + <a + href="https://rainy-periwinkle.glitch.me/permalink/976b0cf78c73d125644d347be9e93e51d3a9112e31a283259c35942bda06e989.html" + >the <code><jdiv></code> element appears to be injected by Jivo + chat, a popular chat solution which appears on ~40.3k of mobile sites</a + > + (<a + href="https://rainy-periwinkle.glitch.me/permalink/98fb3bf4f44c33edabc05439b10a374a121dbbfc5f83af65e00e859039b13acd.html" + >~37.6k of desktop pages -- that's roughly ~0.86%)</a + >! + </p> + <p> + Placing these into our same chart as above for perspective looks something + like this (again, it varies slightly based on the dataset) + </p> + <p> + (( TODO: there is a corresponding image in the google doc + <a + href="https://docs.google.com/document/d/16TY_pV-FyW35DzuvdlaOiENz6o6PWYpl_RviU-HW7Qc/edit" + >https://docs.google.com/document/d/16TY_pV-FyW35DzuvdlaOiENz6o6PWYpl_RviU-HW7Qc/edit</a + > + )) + </p> + <p> + The interesting thing about these is that they also introduce a few other + ways that our tool can come in very handy: If we're interested in exploring + the space of the data, a very specific tag name is just one possible + measure. It's definitely the strongest indicator if we can find good slang + developing. However, what if that's not all we're interested in? + </p> + <h3 id="common-use-cases-and-solutions">Common use cases and solutions</h3> + <p> + What if, for example, we were interested in people solving common use cases? + This could be because we're looking for solutions to use cases that we + currently have ourselves, or for researching more broadly what common use + cases people are solving with an eye toward incubating some standardization + effort. Let's take a common example: Tabs. Over the years there have been a + lot of requests for things like tabs. We can use a fuzzy search here and + find that there are + <a + href="https://rainy-periwinkle.glitch.me/permalink/c6d39f24d61d811b55fc032806cade9f0be437dcb2f5735a4291adb04aa7a0ea.html" + >many variants of tabs</a + >. It's a little harder to count use here since we can't as easily + distinguish if two elements appear on the same page, so the count provided + there conservatively simply takes the one with the largest count -- in most + cases the real number of domains is probably significantly larger. + </p> + <p> + There are also + <a + href="https://rainy-periwinkle.glitch.me/permalink/e573cf279bf1d2f0f98a90f0d7e507ac8dbd3e570336b20c6befc9370146220b.html" + >lots of accordions</a + >, + <a + href="https://rainy-periwinkle.glitch.me/permalink/0bb74b808e7850a441fc9b93b61abf053efc28f05e0a1bc2382937e3b78695d9.html" + >dialogs</a + >, at least + <a + href="https://rainy-periwinkle.glitch.me/permalink/651e592cb2957c14cdb43d6610b6acf696272b2fbd0d58a74c283e5ad4c79a12.html" + >65 variants of carousels</a + >, lots of stuff about + <a + href="https://rainy-periwinkle.glitch.me/permalink/981967b19a9346ac466482c51b35c49fc1c1cc66177ede440ab3ee51a7912187.html" + >'popups'</a + >, at least + <a + href="https://rainy-periwinkle.glitch.me/permalink/2e6827af7c9d2530cb3d2f39a3f904091c523c2ead14daccd4a41428f34da5e8.html" + >27 variants of toggles and switches</a + >, and so on. + </p> + <p> + Perhaps we could research why we need + <a + href="https://rainy-periwinkle.glitch.me/permalink/5ae67c941395ca3125e42909c2c3881e27cb49cfa9aaf1cf59471e3779435339.html" + >92 variants of button related elements that aren't a native button</a + >, for example, and try to fill the native gap. + </p> + <p> + If we notice popular things pop up (like <code><jdiv></code>, solving + chat) we can take knowledge of things we know (like, that that is what + <code><jdiv></code> is about, or <code><olark></code>) and try + to look + <a + href="https://rainy-periwinkle.glitch.me/permalink/db8fc0e58d2d46d2e2a251ed13e3daab39eba864e46d14d69cc114ab5d684b00.html" + >at at least 43 things we've built for tackling that</a + > + and follow connections to survey the space. + </p> + <h3 id="in-summary">In Summary</h3> + <p>So, there's lots of data here, but to summarize:</p> + <ul> + <li> + Pages have more elements than they did 14 years ago -- both on average and + max. + </li> + <li> + The lifetime of things on home pages is <em>very</em> long. Deprecating or + discontinuing things doesn't make them go away, and it might never. + </li> + <li> + There is a lot of broken markup out there in the wild (misspelled tags, + missing spaces, bad escaping, misunderstandings) + </li> + <li> + Measuring what 'useful' means is tricky -- lots of native elements don't + pass the 5% bar, or even the 1% bar, but lots of custom ones do -- and for + lots of reasons. Passing 1% should definitely grab our attention at least, + but perhaps so should 0.5% because that is, according to the data, + comparatively <em>very</em> successful. + </li> + <li> + There is already a ton of custom markup out there. It comes in a lot of + forms, but elements containing a dash definitely seem to have taken off. + </li> + <li> + We need to increasingly study this data and come up with good observations + to help find and pave the cowpaths. + </li> + </ul> + <p> + That last one is where you come in: We'd love to tap into the creativity and + curiosity of the larger community to help explore this data using some of + the tools (like + <a href="https://rainy-periwinkle.glitch.me/" + >https://rainy-periwinkle.glitch.me/</a + >) -- please share your interesting observations and help build our commons + of knowledge and understanding. + </p> </section> {% endblock %} diff --git a/src/templates/en/2019/chapters/performance.html b/src/templates/en/2019/chapters/performance.html index 15424de0c2c..8515049fc80 100644 --- a/src/templates/en/2019/chapters/performance.html +++ b/src/templates/en/2019/chapters/performance.html @@ -1,192 +1,634 @@ -{# IMPORTANT! +<!--{# IMPORTANT! -- `chapter.html` is a "template for templates" used by the `generate_chapters.py` script, hence the strange template syntax (eg, double braces `{% ... %}`) +- `chapter.html` is a "template for templates" used by the `generate_chapters.js` script, hence the strange template syntax (eg, mixing ejs and jinja syntax) - if you want to modify `chapter.html`, you must also: - translate the corresponding language-specific templates (eg `src/templates/<lang>/<year>/chapter.html`) - run the generation script to update each chapter template - if you want to modify the chapter templates (eg `src/templates/<lang>/<year>/chapters/<chapter>.html`): - make changes to the markdown content directly (`src/content/<lang>/<year>/<chapter>.md`) because any changes to the chapter templates will be overwritten by the generation script -#} +#}--> -{% extends "en/2019/base_chapter.html" %} +{% extends "en/2019/base_chapter.html" %} {% block styles %} {{ super() }} +<link rel="stylesheet" href="/static/css/chapter.css" /> +{% endblock %} {% set metadata = +{"part_number":"II","chapter_number":7,"title":"Performance","authors":["rviscomi"],"reviewers":["JMPerez"," +obto"," sergeychernyshev"," zeman"]} %} {% block main %} +<aside> + <ul> + <li> + <a href="#intro">Intro</a> + </li> -{% block styles %} -{{ super() }} -<link rel="stylesheet" href="/static/css/chapter.css"> -{% endblock %} + <li> + <a href="#the-state-of-performance">The state of performance</a> + + <ul> + <li> + <a href="#fcp">FCP</a> + + <ul> + <li> + <a href="#fcp-by-device">FCP by device</a> + </li> + + <li> + <a href="#fcp-by-ect">FCP by ECT</a> + </li> + + <li> + <a href="#fcp-by-geo">FCP by geo</a> + </li> + </ul> + </li> + + <li> + <a href="#ttfb">TTFB</a> + + <ul> + <li> + <a href="#ttfb-by-geo">TTFB by geo</a> + </li> + </ul> + </li> + + <li> + <a href="#fid">FID</a> -{% set metadata = {'part_number': 'II', 'chapter_number': 7, 'title': 'Performance', 'authors': ['rviscomi'], 'reviewers': ['JMPerez', 'obto', 'sergeychernyshev', 'zeman']} %} + <ul> + <li> + <a href="#fid-by-device">FID by device</a> + </li> + + <li> + <a href="#fid-by-ect">FID by ECT</a> + </li> + + <li> + <a href="#fid-by-geo">FID by geo</a> + </li> + </ul> + </li> + </ul> + </li> + + <li> + <a href="#conclusion">Conclusion</a> + </li> + </ul> +</aside> -{% block main %} <section class="main"> <h1 class="chapter-title">{{ metadata.get('title') }}</h1> - <img src="/static/images/2019/{{ get_chapter_image_dir(metadata) }}/hero_lg.jpg" class="chapter-hero"> - <h2>Intro</h2> -<p>Performance is a visceral part of the user experience. For many websites, an improvement to the user experience by speeding up the page load time aligns with an improvement to conversion rates. Conversely, when performance is poor, users don't convert as often and have even been observed <a href="https://blog.fullstory.com/rage-clicks-turn-analytics-into-actionable-insights/">rage clicking</a> on the page in frustration. For example, <a href="https://wpostats.com/">WPO Stats</a> is a large collection of case studies showing the correlation between performance and business metrics.</p> -<p>There are many ways to quantify web performance. The most important thing is to measure what actually matters to users. Events like <code>onload</code> or <code>DOM content loaded</code> may not necessarily reflect what users experience visually. For example, an email client might have a very fast onload event but the only thing loaded is the interstitial progress bar, meanwhile the inbox contents are loading asynchronously. The loading metric that matters for this web app is the "time to inbox", and focusing on the <code>onload</code> event may be misleading. For that reason this chapter will look at more modern and universally applicable paint, load, and interactivity metrics to try to capture how users are actually experiencing the page.</p> -<p>There are two kinds of performance data: lab and field. You may have heard these referred to as synthetic and real-user measurement (or RUM). Measuring performance in the lab ensures that each website is tested under common conditions like browser, connection speed, physical location, cache state, etc. This guarantee of consistency makes each website comparable with one another. On the other hand, measuring performance in the field represents how users actually experience the web in all of the infinite combinations of conditions that we could never capture in the lab. For the purposes of this chapter and understanding real-world user experiences, we'll look at field data.</p> -<h2>The state of performance</h2> -<p>The <a href="https://httparchive.org/">HTTP Archive</a> is an entirely lab-based tool, so to get field data we need to make use of a different dataset. In this section we're using the <a href="http://bit.ly/chrome-ux-report">Chrome UX Report</a> (CrUX), a public dataset from Google that consists of all the same websites as the HTTP Archive and aggregates how Chrome users actually experience them. Experiences are categorized by:</p> -<ul> -<li>the form factor of the users' devices<ul> -<li>desktop</li> -<li>phone</li> -<li>tablet</li> -</ul> -</li> -<li>users' effective connection type (ECT) in mobile terms<ul> -<li>offline</li> -<li>slow 2G</li> -<li>2G</li> -<li>3G</li> -<li>4G</li> -</ul> -</li> -<li>users' geographic location</li> -</ul> -<p>Experiences are measured monthly including paint, load, and interactivity metrics. The first metric we'll look at is <a href="https://developers.google.com/web/fundamentals/performance/user-centric-performance-metrics#first_paint_and_first_contentful_paint">First Contentful Paint</a> (FCP). This is the time users spend waiting for the page to display something useful to the screen, like an image or text. Next, we'll look at look at a loading metric, <a href="https://web.dev/time-to-first-byte">Time to First Byte</a> (TTFB). This is a measure of how long the web page took from the time of the user's navigation until they received the first byte of the response. And finally, the last field metric we'll look at is <a href="https://developers.google.com/web/updates/2018/05/first-input-delay">First Input Delay</a> (FID). This is a relatively new metric and one that represents parts of the UX other than loading performance. It measures the time from a user's first interaction with a page's UI until the time the browser's main thread is ready to process the event.</p> -<p>So let's dive in and see what kind of insights we can find.</p> -<h3>FCP</h3> -<figure> -// Chart: flame distribution of 07_01 -<figcaption>Figure 1. Distribution of websites' fast, average, and slow FCP performance.</figcaption> -</figure><p>In Figure 1 above you can see how FCP experiences are distributed across the web. Out of the millions of websites in the CrUX dataset, this chart compresses the distribution down to 1,000 websites where each vertical slice represents a single website. The chart is sorted by the percent of fast FCP experiences, which are those occurring in less than 1 second. Slow experiences occur in 2.5 seconds or more, and average experiences are everything in between. At the extremes of the chart, there are some websites with almost 100% fast experiences and some websites with almost 100% slow experiences. In between, websites have a combination of fast, average, and slow performance that seems to lean more towards fast or average than slow, which is good.</p> -<aside>Note that when a user experiences slow performance, it's hard to say what the reason might be. It could be that the website itself was built poorly and inefficiently. Or there could be other environmental factors like the user's slow connection, empty cache, etc. So when looking at this field data we prefer to say that the user experiences themselves are slow and not necessarily the websites.</aside><p>In order to categorize whether a website is sufficiently <strong>fast</strong> we will use the <a href="https://developers.google.com/speed/docs/insights/v5/about#categories">PageSpeed Insights</a> (PSI) methodology where at least 90% of the website's FCP experiences must be faster than 1 second. Similarly a sufficiently <strong>slow</strong> website has 10% or more FCP experiences slower than 2.5 seconds. We say a website has <strong>average</strong> performance when it doesn't meet either of these conditions.</p> -<figure> -// Chart: Bar distribution of 07_03 -<figcaption>Figure 2. Distribution of websites labelled as having fast, average, or slow FCP.</figcaption> -</figure><figure> - -Fast FCP | Average FCP | Slow FCP --- | -- | -- -2.17% | 37.55% | 60.28% - -<figcaption>Figure 3. Table of the percent of websites labelled as having fast, average, or slow FCP.</figcaption> -</figure><p>In Figures 2 and 3, the results show that only 2.17% of websites are considered fast while 60.28% of websites are considered slow. To help us understand how users experience FCP across different devices, let's segment by form factor.</p> -<h4>FCP by device</h4> -<figure> -// Chart: Flame distribution of 07_01b -<figcaption>Figure 4. Distribution of _desktop_ websites' fast, average, and slow FCP performance.</figcaption> -</figure><figure> -// Chart: Flame distribution of 07_01c -<figcaption>Figure 5. Distribution of _phone_ websites' fast, average, and slow FCP performance.</figcaption> -</figure><p>In Figures 4 and 5 above, the FCP distributions of 1,000 sample websites are broken down by desktop and phone. It's subtle, but the torso of the desktop fast FCP distribution appears to be more convex than the distribution for phone users. This visual approximation suggests that desktop users experience a higher overall proportion of fast FCP. To verify this we can apply the PSI methodology to each distribution.</p> -<figure> -// Chart: Bar distributions of 07_03b -<figcaption>Figure 6. Distribution of websites labelled as having fast, average, or slow FCP, broken down by device type.</figcaption> -</figure><figure> - -Device | Fast FCP | Average FCP | Slow FCP --- | -- | -- | -- -desktop | 2.80% | 39.40% | 57.80% -phone | 1.76% | 35.62% | 62.62% - -<figcaption>Figure 7. Table of websites labelled as having fast, average, or slow FCP, broken down by device type.</figcaption> -</figure><p>According to PSI's classification, 2.80% of websites have fast FCP experiences overall for desktop users, compared to 1.76% for mobile users. The entire distribution is skewed slightly faster for desktop experiences, with fewer slow websites and more in the fast and average category.</p> -<h4>FCP by ECT</h4> -<figure> -// Chart: Bar distribution of 07_03c -<figcaption>Figure 8. Distribution of websites labelled as having fast, average, or slow FCP, broken down by <abbr title="effective connection type">ECT</abbr>.</figcaption> -</figure><figure> - -Speed | Fast FCP | Average FCP | Slow FCP --- | -- | -- | -- -4G | 2.31 | 40.10 | 57.59 -3G | 0.04 | 3.48 | 96.49 -2G | 0.03 | 0.30 | 99.68 -slow-2G | 0.03 | 0.08 | 99.89 - -<figcaption>Figure 9. Table of the percent of websites labelled as having fast, average, or slow FCP, broken down by <abbr title="effective connection type">ECT</abbr>.</figcaption> -</figure><p>In Figures 8 and 9 above, FCP experiences are grouped by the ECT of the user experience. Interestingly, there is a correlation between ECT speed and the percent of websites serving fast FCP. As the ECT speeds decrease, the proportion of fast experiences approaches zero. 8.44% of websites serve fast FCP to users with 4G ECT while 57.59% of those websites serve slow FCP. 96.49% of websites serve slow FCP to users with 3G ECT, 99.68% to 2G ECT, and 99.89% to slow-2G ECT. These results suggest that websites almost never serve fast FCP consistently to users on slow connections.</p> -<h4>FCP by geo</h4> -<figure> -// Chart: Bar distribution of 07_03d -<figcaption>Figure 10. Distribution of websites labelled as having fast, average, or slow FCP, broken down by geo.</figcaption> -</figure><p>Finally, we can slice FCP by users' geography (geo). The chart above shows the top 23 geos having the highest number of distinct websites, an indicator of overall popularity of the open web. The geos are sorted by their percent of websites having sufficiently fast FCP experiences. At the top of the list are three <a href="https://en.wikipedia.org/wiki/Asia-Pacific">Asia-Pacific</a> (APAC) geos: Korea, Taiwan, and Japan. This could be explained by the availability of extremely <a href="https://en.wikipedia.org/wiki/List_of_countries_by_Internet_connection_speeds">fast network connection speeds in these regions</a>. Korea has 11.10% of websites meeting the fast FCP bar and only 28.00% rated as slow FCP. Recall that the global distribution of fast/average/slow websites is approximately 2/38/60, making Korea a significantly positive outlier.</p> -<p>Other APAC geos tell a different story. Thailand, Vietnam, Indonesia, and India all have fewer than 1% of fast websites. These geos also have more than double the proportion of slow websites than Korea.</p> -<h3>TTFB</h3> -<p><a href="https://web.dev/time-to-first-byte">Time to First Byte</a> (TTFB) is a measure of how long the web page took from the time of the user's navigation until they received the first byte of the response.</p> -<figure> -![Navigation Timing API diagram of the events in a page navigation](/static/images/2019/07_Performance/nav-timing.png) -<figcaption>Figure 11. Navigation Timing API diagram of the events in a page navigation.</figcaption> -</figure><p>To help explain TTFB and the many factors that affect it, let's borrow a diagram from the Navigation Timing API spec. In Figure 11 above, TTFB is the duration from <code>startTime</code> to <code>responseStart</code>, including everything in between: <code>unload</code>, <code>redirects</code>, <code>AppCache</code>, <code>DNS</code>, <code>SSL</code>, <code>TCP</code>, and the time the server spends handling the request. Given that context, let's see how users are experiencing this metric.</p> -<figure> -// Chart: Flame distribution of 07_07 -<figcaption>Figure 12. Distribution of websites' fast, average, and slow TTFB performance.</figcaption> -</figure><p>Similar to the previous FCP chart, this is a view of 1,000 representative samples ordered by fast TTFB. A <a href="https://developers.google.com/speed/docs/insights/Server#recommendations">fast TTFB</a> is one that happens in under 0.2 seconds (200 ms), a slow TTFB happens in 1 second or more, and everything in between is average.</p> -<p>Looking at the curve of the fast proportions, the shape is quite different from that of FCP. There are very few websites that have a fast TTFB greater than 75%, while more than half are below 25%.</p> -<p>Let's apply a TTFB speed label to each website, similar to the PSI methodology used above for FCP. If a website serves fast TTFB to 90% or more user experiences, it's labelled as <strong>fast</strong>. Otherwise if it serves <strong>slow</strong> TTFB to 10% or more user experiences, it's slow. If neither of those conditions apply, it's <strong>average</strong>.</p> -<figure> -// Chart: Bar distribution of 07_08 -<figcaption>Figure 13. Distribution of websites labelled as having fast, average, or slow TTFB.</figcaption> -</figure><figure> - -Fast TTFB | Average TTFB | Slow TTFB --- | -- | -- -0.13% | 30.67% | 69.20% - -<figcaption>Figure 14. Table of the percent of websites labelled as having fast, average, or slow TTFB.</figcaption> -</figure><p>69.20% of websites have slow TTFB. This is significant because TTFB is a blocker for all other performance metrics to follow. A user cannot possibly experience a fast FCP if the TTFB takes more than 1 second. Recall from the previous FCP section that about 98% of websites do not have fast FCP. Therefore the ~70% of websites that have slow TTFB are completely ineligible to be considered as having fast FCP.</p> -<h4>TTFB by geo</h4> -<figure> -// Chart: Bar distribution of 07_08d -<figcaption>Figure 15. Distribution of websites labelled as having fast, average, or slow TTFB, broken down by geo.</figcaption> -</figure><p>Now let's look at the percent of websites serving fast TTFB to users in different geos. APAC geos like Korea, Taiwan, and Japan are still outperforming users from the rest of the world. But no geo has more than 3% of websites with fast TTFB.</p> -<h3>FID</h3> -<p>The last field metric we'll look at is <a href="https://developers.google.com/web/updates/2018/05/first-input-delay">First Input Delay</a> (FID). This metric represents the time from a user's first interaction with a page's UI until the time the browser's main thread is ready to process the event. Note that this doesn't include the time applications spend actually handling the input. At worst, slow FID results in a page that appears unresponsive and a frustrating user experience.</p> -<p>Let's start by defining some thresholds. According to the <a href="https://developers.google.com/speed/docs/insights/v5/about#categories">PSI methodology</a>, a <strong>fast</strong> FID is one that happens in less than 50 ms. This gives the application enough time to handle the input event and provide feedback to the user in a time that feels instantaneous. A <strong>slow</strong> FID is one that happens in 250 ms or more. Everything in between is <strong>average</strong>.</p> -<figure> -// Chart: Flame distribution of 07_02 -<figcaption>Figure 16. Distribution of websites' fast, average, and slow FID performance.</figcaption> -</figure><p>You know the drill by now. This chart shows the distribution of 1,000 websites' fast, average, and slow FID. This is a dramatically different chart from the ones for FCP and TTFB. The curve of fast FID very slowly descends from 100% to 75% then takes a nosedive. The overwhelming majority of FID experiences are fast for most websites.</p> -<figure> -// Chart: Bar distribution of 07_04 -<figcaption>Figure 17. Distribution of websites labelled as having fast, average, or slow TTFB.</figcaption> -</figure><figure> - -Fast FID | Average FID | Slow FID --- | -- | -- -26.61% | 42.03% | 31.35% - -<figcaption>Figure 18. Table of the percent of websites labelled as having fast, average, or slow FID.</figcaption> -</figure><p>The PSI methodology for labelling a website as having sufficiently fast or slow FID is slightly different than that of FCP. For a site to be fast, 95% of its FID experiences must be fast. A site is slow if 5% of its FID experiences are slow.</p> -<p>The distribution of fast, average, and slow websites appears to be more balanced, with 26.61% of websites qualifying as fast and 31.35% as slow.</p> -<h4>FID by device</h4> -<figure> -// Chart: Flame distribution of 07_02b -<figcaption>Figure 19. Distribution of _desktop_ websites' fast, average, and slow FID performance.</figcaption> -</figure><figure> -// Chart: Flame distribution of 07_02c -<figcaption>Figure 20. Distribution of _phone_ websites' fast, average, and slow FID performance.</figcaption> -</figure><p>Breaking FID down by device, it becomes clear that there are two very different stories. Desktop users enjoy fast FID almost all the time. Sure there are some websites that throw out a slow experience now and then, but the results are predominantly fast. Mobile users, on the other hand, have what seem to be one of two experiences: pretty fast (but not quite as often as desktop) and almost never fast. The latter is experienced by users on only the tail ~10% of websites, but this is still a substantial difference.</p> -<figure> -// Chart: Bar distributions of 07_04b -<figcaption>Figure 21. Distribution of websites labelled as having fast, average, or slow FID, broken down by device type.</figcaption> -</figure><figure> - -Device | Fast FID | Average FID | Slow FID --- | -- | -- | -- -desktop | 70.32% | 23.20% | 6.48% -phone | 13.76% | 43.21% | 43.03% - -<figcaption>Figure 22. Table of websites labelled as having fast, average, or slow FID, broken down by device type.</figcaption> -</figure><p>When we apply the PSI labelling to desktop and phone experiences, the distinction becomes crystal clear. 70.32% of websites' FID experienced by desktop users are fast compared to 6.48% slow. For mobile experiences, 13.76% of websites are fast while 43.03% are slow.</p> -<h4>FID by ECT</h4> -<figure> -// Chart: Bar distribution of 07_04c -<figcaption>Figure 23. Distribution of websites labelled as having fast, average, or slow FID, broken down by <abbr title="effective connection type">ECT</abbr>.</figcaption> -</figure><p>On its face, FID seems like it would be driven primarily by CPU speed. It'd be reasonable to assume that the slower the device itself is, the higher the likelihood that it will be busy when the user attempts to interact with a web page, right?</p> -<p>The ECT results above seem to suggest a correlation between connection speed and FID performance. As users' effective connection speed decreases, the percent of websites on which they experience fast FID also decreases and slow FID increases. Interestingly, the percent of websites with average FID is about the same across ECTs.</p> -<h4>FID by geo</h4> -<figure> -// Chart: Bar distribution of 07_04d -<figcaption>Figure 24. Distribution of websites labelled as having fast, average, or slow FID, broken down by geo.</figcaption> -</figure><p>In this breakdown of FID by geographic location, Korea is out in front of everyone else again. But the top geos have some new faces: the US, Australia, and Canada are next with 35-40% of websites having fast FID.</p> -<p>As with the other geo-specific results, there are so many possible factors that could be contributing to the user experience. For example, perhaps wealthier geos are more privileged to be able to spend more money on better network infrastructure and its residents have more money to spend on desktops and/or high-end mobile phones.</p> -<h2>Conclusion</h2> -<p>Quantifying how fast a web page loads is an imperfect science that can't be represented by a single metric. Conventional metrics like <code>onload</code> can miss the mark entirely by measuring irrelevant or imperceptible parts of the user experience. User-perceived metrics like FCP and FID more faithfully convey what users see and feel. Even still, neither metric can be looked at in isolation to draw conclusions about whether the overall page load experience was fast or slow. Only by looking at many metrics holistically can we start to understand the performance for an individual website and the state of the web.</p> -<p>The data presented in this chapter showed that there is still a lot of work to do to meet the goals set for fast websites. Certain form factors, effective connection types, and geos do correlate with better user experiences, but we can't forget about the combinations of demographics with poor performance. In many cases, the web platform is used for business; making more money from improving conversion rates can be a huge motivator for speeding up a website. Ultimately, for all websites, performance is about delivering positive experiences to users in a way that doesn't impede, frustrate, or enrage them.</p> -<p>As the web gets another year older and our ability to measure how users experience it improves incrementally, I'm looking forward to developers having access to metrics that capture more of the holistic experience. FCP is very early on the timeline of showing useful content to users and newer metrics like <a href="https://web.dev/largest-contentful-paint">Largest Contentful Paint</a> (LCP) are emerging to improve our visibility into how page loads are perceived. Newer metrics from the <a href="https://web.dev/layout-instability-api">Layout Instability API</a> are giving us a new glimpse into the frustration users experience beyond page load. Equipped with these new metrics, the web in 2020 will become even more transparent, better understood, and give developers an advantage to make more meaningful progress to improve performance and contribute to positive user experiences.</p> + <img + src="/static/images/2019/{{ get_chapter_image_dir(metadata) }}/hero_lg.jpg" + class="chapter-hero" + /> + <h2 id="intro">Intro</h2> + <p> + Performance is a visceral part of the user experience. For + <a href="https://wpostats.com/">many websites</a>, an improvement to the + user experience by speeding up the page load time aligns with an improvement + to conversion rates. Conversely, when performance is poor, users don't + convert as often and have even been observed + <a + href="https://blog.fullstory.com/rage-clicks-turn-analytics-into-actionable-insights/" + >rage clicking</a + > + on the page in frustration. + </p> + <p> + There are many ways to quantify web performance. The most important thing is + to measure what actually matters to users. Events like + <code>onload</code> or <code>DOM content loaded</code> may not necessarily + reflect what users experience visually. For example, an email client might + have a very fast onload event but the only thing loaded is the interstitial + progress bar, meanwhile the inbox contents are loading asynchronously. The + loading metric that matters for this web app is the "time to inbox", and + focusing on the <code>onload</code> event may be misleading. For that reason + this chapter will look at more modern and universally applicable paint, + load, and interactivity metrics to try to capture how users are actually + experiencing the page. + </p> + <p> + There are two kinds of performance data: lab and field. You may have heard + these referred to as synthetic and real-user measurement (or RUM). Measuring + performance in the lab ensures that each website is tested under common + conditions like browser, connection speed, physical location, cache state, + etc. This guarantee of consistency makes each website comparable with one + another. On the other hand, measuring performance in the field represents + how users actually experience the web in all of the infinite combinations of + conditions that we could never capture in the lab. For the purposes of this + chapter and understanding real-world user experiences, we'll look at field + data. + </p> + <h2 id="the-state-of-performance">The state of performance</h2> + <p> + Almost all of the other chapters in the Web Almanac are based on data from + the <a href="https://httparchive.org/">HTTP Archive</a>. In order to capture + how real users experience the web, we need a different dataset. In this + section we're using the + <a href="http://bit.ly/chrome-ux-report">Chrome UX Report</a> (CrUX), a + public dataset from Google that consists of all the same websites as the + HTTP Archive and aggregates how Chrome users actually experience them. + Experiences are categorized by: + </p> + <ul> + <li> + the form factor of the users' devices + <ul> + <li>desktop</li> + <li>phone</li> + <li>tablet</li> + </ul> + </li> + <li> + users' effective connection type (ECT) in mobile terms + <ul> + <li>offline</li> + <li>slow 2G</li> + <li>2G</li> + <li>3G</li> + <li>4G</li> + </ul> + </li> + <li>users' geographic location</li> + </ul> + <p> + Experiences are measured monthly including paint, load, and interactivity + metrics. The first metric we'll look at is + <a + href="https://developers.google.com/web/fundamentals/performance/user-centric-performance-metrics#first_paint_and_first_contentful_paint" + >First Contentful Paint</a + > + (FCP). This is the time users spend waiting for the page to display + something useful to the screen, like an image or text. Next, we'll look at + look at a loading metric, + <a + href="https://csswizardry.com/2019/08/time-to-first-byte-what-it-is-and-why-it-matters/" + >Time to First Byte</a + > + (TTFB). This is a measure of how long the web page took from the time of the + user's navigation until they received the first byte of the response. And + finally, the last field metric we'll look at is + <a + href="https://developers.google.com/web/updates/2018/05/first-input-delay" + >First Input Delay</a + > + (FID). This is a relatively new metric and one that represents parts of the + UX other than loading performance. It measures the time from a user's first + interaction with a page's UI until the time the browser's main thread is + ready to process the event. + </p> + <p>So let's dive in and see what kind of insights we can find.</p> + <h3 id="fcp">FCP</h3> + <figure> + // Chart: flame distribution of 07_01 + <figcaption> + Figure 1. Distribution of websites' fast, average, and slow FCP + performance. + </figcaption> + </figure> + <p> + In Figure 1 above you can see how FCP experiences are distributed across the + web. Out of the millions of websites in the CrUX dataset, this chart + compresses the distribution down to 1,000 websites where each vertical slice + represents a single website. The chart is sorted by the percent of fast FCP + experiences, which are those occurring in less than 1 second. Slow + experiences occur in 2.5 seconds or more, and average experiences are + everything in between. At the extremes of the chart, there are some websites + with almost 100% fast experiences and some websites with almost 100% slow + experiences. In between, websites have a combination of fast, average, and + slow performance that seems to lean more towards fast or average than slow, + which is good. + </p> + <aside> + Note that when a user experiences slow performance, it's hard to say what + the reason might be. It could be that the website itself was built poorly + and inefficiently. Or there could be other environmental factors like the + user's slow connection, empty cache, etc. So when looking at this field data + we prefer to say that the user experiences themselves are slow and not + necessarily the websites. + </aside> + <p> + In order to categorize whether a website is sufficiently + <strong>fast</strong> we will use the + <a + href="https://developers.google.com/speed/docs/insights/v5/about#categories" + >PageSpeed Insights</a + > + (PSI) methodology where at least 90% of the website's FCP experiences must + be faster than 1 second. Similarly a sufficiently + <strong>slow</strong> website has 10% or more FCP experiences slower than + 2.5 seconds. We say a website has <strong>average</strong> performance when + it doesn't meet either of these conditions. + </p> + <figure> + // Chart: Bar distribution of 07_03 + <figcaption> + Figure 2. Distribution of websites labelled as having fast, average, or + slow FCP. + </figcaption> + </figure> + <figure> + Fast FCP | Average FCP | Slow FCP -- | -- | -- 2.17% | 37.55% | 60.28% + + <figcaption> + Figure 3. Table of the percent of websites labelled as having fast, + average, or slow FCP. + </figcaption> + </figure> + <p> + In Figures 2 and 3, the results show that only 2.17% of websites are + considered fast while 60.28% of websites are considered slow. To help us + understand how users experience FCP across different devices, let's segment + by form factor. + </p> + <h4 id="fcp-by-device">FCP by device</h4> + <figure> + // Chart: Flame distribution of 07_01b + <figcaption> + Figure 4. Distribution of _desktop_ websites' fast, average, and slow FCP + performance. + </figcaption> + </figure> + <figure> + // Chart: Flame distribution of 07_01c + <figcaption> + Figure 5. Distribution of _phone_ websites' fast, average, and slow FCP + performance. + </figcaption> + </figure> + <p> + In Figures 4 and 5 above, the FCP distributions of 1,000 sample websites are + broken down by desktop and phone. It's subtle, but the torso of the desktop + fast FCP distribution appears to be more convex than the distribution for + phone users. This visual approximation suggests that desktop users + experience a higher overall proportion of fast FCP. To verify this we can + apply the PSI methodology to each distribution. + </p> + <figure> + // Chart: Bar distributions of 07_03b + <figcaption> + Figure 6. Distribution of websites labelled as having fast, average, or + slow FCP, broken down by device type. + </figcaption> + </figure> + <figure> + Device | Fast FCP | Average FCP | Slow FCP -- | -- | -- | -- desktop | 2.80% + | 39.40% | 57.80% phone | 1.76% | 35.62% | 62.62% + + <figcaption> + Figure 7. Table of websites labelled as having fast, average, or slow FCP, + broken down by device type. + </figcaption> + </figure> + <p> + According to PSI's classification, 2.80% of websites have fast FCP + experiences overall for desktop users, compared to 1.76% for mobile users. + The entire distribution is skewed slightly faster for desktop experiences, + with fewer slow websites and more in the fast and average category. + </p> + <h4 id="fcp-by-ect">FCP by ECT</h4> + <figure> + // Chart: Bar distribution of 07_03c + <figcaption> + Figure 8. Distribution of websites labelled as having fast, average, or + slow FCP, broken down by + <abbr title="effective connection type">ECT</abbr>. + </figcaption> + </figure> + <figure> + Speed | Fast FCP | Average FCP | Slow FCP -- | -- | -- | -- 4G | 2.31 | + 40.10 | 57.59 3G | 0.04 | 3.48 | 96.49 2G | 0.03 | 0.30 | 99.68 slow-2G | + 0.03 | 0.08 | 99.89 + + <figcaption> + Figure 9. Table of the percent of websites labelled as having fast, + average, or slow FCP, broken down by + <abbr title="effective connection type">ECT</abbr>. + </figcaption> + </figure> + <p> + In Figures 8 and 9 above, FCP experiences are grouped by the ECT of the user + experience. Interestingly, there is a correlation between ECT speed and the + percent of websites serving fast FCP. As the ECT speeds decrease, the + proportion of fast experiences approaches zero. 8.44% of websites serve fast + FCP to users with 4G ECT while 57.59% of those websites serve slow FCP. + 96.49% of websites serve slow FCP to users with 3G ECT, 99.68% to 2G ECT, + and 99.89% to slow-2G ECT. These results suggest that websites almost never + serve fast FCP consistently to users on slow connections. + </p> + <h4 id="fcp-by-geo">FCP by geo</h4> + <figure> + // Chart: Bar distribution of 07_03d + <figcaption> + Figure 10. Distribution of websites labelled as having fast, average, or + slow FCP, broken down by geo. + </figcaption> + </figure> + <p> + Finally, we can slice FCP by users' geography (geo). The chart above shows + the top 23 geos having the highest number of distinct websites, an indicator + of overall popularity of the open web. The geos are sorted by their percent + of websites having sufficiently fast FCP experiences. At the top of the list + are three + <a href="https://en.wikipedia.org/wiki/Asia-Pacific">Asia-Pacific</a> (APAC) + geos: Korea, Taiwan, and Japan. This could be explained by the availability + of extremely + <a + href="https://en.wikipedia.org/wiki/List_of_countries_by_Internet_connection_speeds" + >fast network connection speeds in these regions</a + >. Korea has 11.10% of websites meeting the fast FCP bar and only 28.00% + rated as slow FCP. Recall that the global distribution of fast/average/slow + websites is approximately 2/38/60, making Korea a significantly positive + outlier. + </p> + <p> + Other APAC geos tell a different story. Thailand, Vietnam, Indonesia, and + India all have fewer than 1% of fast websites. These geos also have more + than double the proportion of slow websites than Korea. + </p> + <h3 id="ttfb">TTFB</h3> + <p> + <a href="https://web.dev/time-to-first-byte">Time to First Byte</a> (TTFB) + is a measure of how long the web page took from the time of the user's + navigation until they received the first byte of the response. + </p> + <figure> + ![Navigation Timing API diagram of the events in a page + navigation](/static/images/2019/07_Performance/nav-timing.png) + <figcaption> + Figure 11. Navigation Timing API diagram of the events in a page + navigation. + </figcaption> + </figure> + <p> + To help explain TTFB and the many factors that affect it, let's borrow a + diagram from the Navigation Timing API spec. In Figure 11 above, TTFB is the + duration from <code>startTime</code> to <code>responseStart</code>, + including everything in between: <code>unload</code>, + <code>redirects</code>, <code>AppCache</code>, <code>DNS</code>, + <code>SSL</code>, <code>TCP</code>, and the time the server spends handling + the request. Given that context, let's see how users are experiencing this + metric. + </p> + <figure> + // Chart: Flame distribution of 07_07 + <figcaption> + Figure 12. Distribution of websites' fast, average, and slow TTFB + performance. + </figcaption> + </figure> + <p> + Similar to the previous FCP chart, this is a view of 1,000 representative + samples ordered by fast TTFB. A + <a + href="https://developers.google.com/speed/docs/insights/Server#recommendations" + >fast TTFB</a + > + is one that happens in under 0.2 seconds (200 ms), a slow TTFB happens in 1 + second or more, and everything in between is average. + </p> + <p> + Looking at the curve of the fast proportions, the shape is quite different + from that of FCP. There are very few websites that have a fast TTFB greater + than 75%, while more than half are below 25%. + </p> + <p> + Let's apply a TTFB speed label to each website, similar to the PSI + methodology used above for FCP. If a website serves fast TTFB to 90% or more + user experiences, it's labelled as <strong>fast</strong>. Otherwise if it + serves <strong>slow</strong> TTFB to 10% or more user experiences, it's + slow. If neither of those conditions apply, it's <strong>average</strong>. + </p> + <figure> + // Chart: Bar distribution of 07_08 + <figcaption> + Figure 13. Distribution of websites labelled as having fast, average, or + slow TTFB. + </figcaption> + </figure> + <figure> + Fast TTFB | Average TTFB | Slow TTFB -- | -- | -- 0.13% | 30.67% | 69.20% + + <figcaption> + Figure 14. Table of the percent of websites labelled as having fast, + average, or slow TTFB. + </figcaption> + </figure> + <p> + 69.20% of websites have slow TTFB. This is significant because TTFB is a + blocker for all other performance metrics to follow. A user cannot possibly + experience a fast FCP if the TTFB takes more than 1 second. Recall from the + previous FCP section that about 98% of websites do not have fast FCP. + Therefore the ~70% of websites that have slow TTFB are completely ineligible + to be considered as having fast FCP. + </p> + <h4 id="ttfb-by-geo">TTFB by geo</h4> + <figure> + // Chart: Bar distribution of 07_08d + <figcaption> + Figure 15. Distribution of websites labelled as having fast, average, or + slow TTFB, broken down by geo. + </figcaption> + </figure> + <p> + Now let's look at the percent of websites serving fast TTFB to users in + different geos. APAC geos like Korea, Taiwan, and Japan are still + outperforming users from the rest of the world. But no geo has more than 3% + of websites with fast TTFB. + </p> + <h3 id="fid">FID</h3> + <p> + The last field metric we'll look at is + <a + href="https://developers.google.com/web/updates/2018/05/first-input-delay" + >First Input Delay</a + > + (FID). This metric represents the time from a user's first interaction with + a page's UI until the time the browser's main thread is ready to process the + event. Note that this doesn't include the time applications spend actually + handling the input. At worst, slow FID results in a page that appears + unresponsive and a frustrating user experience. + </p> + <p> + Let's start by defining some thresholds. According to the + <a + href="https://developers.google.com/speed/docs/insights/v5/about#categories" + >PSI methodology</a + >, a <strong>fast</strong> FID is one that happens in less than 50 ms. This + gives the application enough time to handle the input event and provide + feedback to the user in a time that feels instantaneous. A + <strong>slow</strong> FID is one that happens in 250 ms or more. Everything + in between is <strong>average</strong>. + </p> + <figure> + // Chart: Flame distribution of 07_02 + <figcaption> + Figure 16. Distribution of websites' fast, average, and slow FID + performance. + </figcaption> + </figure> + <p> + You know the drill by now. This chart shows the distribution of 1,000 + websites' fast, average, and slow FID. This is a dramatically different + chart from the ones for FCP and TTFB. The curve of fast FID very slowly + descends from 100% to 75% then takes a nosedive. The overwhelming majority + of FID experiences are fast for most websites. + </p> + <figure> + // Chart: Bar distribution of 07_04 + <figcaption> + Figure 17. Distribution of websites labelled as having fast, average, or + slow TTFB. + </figcaption> + </figure> + <figure> + Fast FID | Average FID | Slow FID -- | -- | -- 26.61% | 42.03% | 31.35% + + <figcaption> + Figure 18. Table of the percent of websites labelled as having fast, + average, or slow FID. + </figcaption> + </figure> + <p> + The PSI methodology for labelling a website as having sufficiently fast or + slow FID is slightly different than that of FCP. For a site to be fast, 95% + of its FID experiences must be fast. A site is slow if 5% of its FID + experiences are slow. + </p> + <p> + The distribution of fast, average, and slow websites appears to be more + balanced, with 26.61% of websites qualifying as fast and 31.35% as slow. + </p> + <h4 id="fid-by-device">FID by device</h4> + <figure> + // Chart: Flame distribution of 07_02b + <figcaption> + Figure 19. Distribution of _desktop_ websites' fast, average, and slow FID + performance. + </figcaption> + </figure> + <figure> + // Chart: Flame distribution of 07_02c + <figcaption> + Figure 20. Distribution of _phone_ websites' fast, average, and slow FID + performance. + </figcaption> + </figure> + <p> + Breaking FID down by device, it becomes clear that there are two very + different stories. Desktop users enjoy fast FID almost all the time. Sure + there are some websites that throw out a slow experience now and then, but + the results are predominantly fast. Mobile users, on the other hand, have + what seem to be one of two experiences: pretty fast (but not quite as often + as desktop) and almost never fast. The latter is experienced by users on + only the tail ~10% of websites, but this is still a substantial difference. + </p> + <figure> + // Chart: Bar distributions of 07_04b + <figcaption> + Figure 21. Distribution of websites labelled as having fast, average, or + slow FID, broken down by device type. + </figcaption> + </figure> + <figure> + Device | Fast FID | Average FID | Slow FID -- | -- | -- | -- desktop | + 70.32% | 23.20% | 6.48% phone | 13.76% | 43.21% | 43.03% + <figcaption> + Figure 22. Table of websites labelled as having fast, average, or slow + FID, broken down by device type. + </figcaption> + </figure> + <p> + When we apply the PSI labelling to desktop and phone experiences, the + distinction becomes crystal clear. 70.32% of websites' FID experienced by + desktop users are fast compared to 6.48% slow. For mobile experiences, + 13.76% of websites are fast while 43.03% are slow. + </p> + <h4 id="fid-by-ect">FID by ECT</h4> + <figure> + // Chart: Bar distribution of 07_04c + <figcaption> + Figure 23. Distribution of websites labelled as having fast, average, or + slow FID, broken down by + <abbr title="effective connection type">ECT</abbr>. + </figcaption> + </figure> + <p> + On its face, FID seems like it would be driven primarily by CPU speed. It'd + be reasonable to assume that the slower the device itself is, the higher the + likelihood that it will be busy when the user attempts to interact with a + web page, right? + </p> + <p> + The ECT results above seem to suggest a correlation between connection speed + and FID performance. As users' effective connection speed decreases, the + percent of websites on which they experience fast FID also decreases and + slow FID increases. Interestingly, the percent of websites with average FID + is about the same across ECTs. + </p> + <h4 id="fid-by-geo">FID by geo</h4> + <figure> + // Chart: Bar distribution of 07_04d + <figcaption> + Figure 24. Distribution of websites labelled as having fast, average, or + slow FID, broken down by geo. + </figcaption> + </figure> + <p> + In this breakdown of FID by geographic location, Korea is out in front of + everyone else again. But the top geos have some new faces: the US, + Australia, and Canada are next with 35-40% of websites having fast FID. + </p> + <p> + As with the other geo-specific results, there are so many possible factors + that could be contributing to the user experience. For example, perhaps + wealthier geos are more privileged to be able to spend more money on better + network infrastructure and its residents have more money to spend on + desktops and/or high-end mobile phones. + </p> + <h2 id="conclusion">Conclusion</h2> + <p> + Quantifying how fast a web page loads is an imperfect science that can't be + represented by a single metric. Conventional metrics like + <code>onload</code> can miss the mark entirely by measuring irrelevant or + imperceptible parts of the user experience. User-perceived metrics like FCP + and FID more faithfully convey what users see and feel. Even still, neither + metric can be looked at in isolation to draw conclusions about whether the + overall page load experience was fast or slow. Only by looking at many + metrics holistically can we start to understand the performance for an + individual website and the state of the web. + </p> + <p> + The data presented in this chapter showed that there is still a lot of work + to do to meet the goals set for fast websites. Certain form factors, + effective connection types, and geos do correlate with better user + experiences, but we can't forget about the combinations of demographics with + poor performance. In many cases, the web platform is used for business; + making more money from improving conversion rates can be a huge motivator + for speeding up a website. Ultimately, for all websites, performance is + about delivering positive experiences to users in a way that doesn't impede, + frustrate, or enrage them. + </p> + <p> + As the web gets another year older and our ability to measure how users + experience it improves incrementally, I'm looking forward to developers + having access to metrics that capture more of the holistic experience. FCP + is very early on the timeline of showing useful content to users and newer + metrics like + <a href="https://web.dev/largest-contentful-paint" + >Largest Contentful Paint</a + > + (LCP) are emerging to improve our visibility into how page loads are + perceived. The + <a href="https://web.dev/layout-instability-api">Layout Instability API</a> + has also given us a novel glimpse into the frustration users experience + beyond page load. Equipped with these new metrics, the web in 2020 will + become even more transparent, better understood, and give developers an + advantage to make more meaningful progress to improve performance and + contribute to positive user experiences. + </p> </section> {% endblock %} diff --git a/src/templates/en/2019/chapters/pwa.html b/src/templates/en/2019/chapters/pwa.html index 7bb80a1b686..4d4e6470d55 100644 --- a/src/templates/en/2019/chapters/pwa.html +++ b/src/templates/en/2019/chapters/pwa.html @@ -1,188 +1,470 @@ -{# IMPORTANT! +<!--{# IMPORTANT! -- `chapter.html` is a "template for templates" used by the `generate_chapters.py` script, hence the strange template syntax (eg, double braces `{% ... %}`) +- `chapter.html` is a "template for templates" used by the `generate_chapters.js` script, hence the strange template syntax (eg, mixing ejs and jinja syntax) - if you want to modify `chapter.html`, you must also: - translate the corresponding language-specific templates (eg `src/templates/<lang>/<year>/chapter.html`) - run the generation script to update each chapter template - if you want to modify the chapter templates (eg `src/templates/<lang>/<year>/chapters/<chapter>.html`): - make changes to the markdown content directly (`src/content/<lang>/<year>/<chapter>.md`) because any changes to the chapter templates will be overwritten by the generation script -#} +#}--> -{% extends "en/2019/base_chapter.html" %} +{% extends "en/2019/base_chapter.html" %} {% block styles %} {{ super() }} +<link rel="stylesheet" href="/static/css/chapter.css" /> +{% endblock %} {% set metadata = +{"part_number":"II","chapter_number":11,"title":"Progressive Web +Apps","authors":["tomayac"," jeffposnick"],"reviewers":["hyperpress"," +ahmadawais"]} %} {% block main %} +<aside> + <ul> + <li> + <a href="#introduction">Introduction</a> + </li> -{% block styles %} -{{ super() }} -<link rel="stylesheet" href="/static/css/chapter.css"> -{% endblock %} + <li> + <a href="#service-workers">Service Workers</a> -{% set metadata = {'part_number': 'II', 'chapter_number': 11, 'title': 'Progressive Web Apps', 'authors': ['tomayac', 'jeffposnick'], 'reviewers': ['hyperpress', 'ahmadawais']} %} + <ul> + <li> + <a href="#service-worker-registrations-and-installability" + >Service Worker Registrations and Installability</a + > + </li> -{% block main %} -<section class="main"> - <h1 class="chapter-title">{{ metadata.get('title') }}</h1> - <img src="/static/images/2019/{{ get_chapter_image_dir(metadata) }}/hero_lg.jpg" class="chapter-hero"> - <h2>Introduction</h2> -<p>Progressive Web Apps (PWA) are a new class of web applications, building on top of platform primitives -like the <a href="https://developer.mozilla.org/en/docs/Web/API/Service_Worker_API">Service Worker APIs</a>. -Service workers allow apps to support network-independent loading by acting as a network proxy, -intercepting your web app's outgoing requests, and replying with programmatic or cached responses. -Service workers can receive push notifications, and synchronize data in the background even when -the corresponding app is not running. Additional, service workers — together with -<a href="https://developer.mozilla.org/en-US/docs/Web/Manifest">Web App Manifests</a> — -allow users to install PWAs to their devices’ home screens.</p> -<p>Service workers were <a href="https://blog.chromium.org/2014/12/chrome-40-beta-powerful-offline-and.html">first implemented in Chrome 40</a>, -back in December 2014, and the term Progressive Web Apps was -<a href="https://infrequently.org/2015/06/progressive-apps-escaping-tabs-without-losing-our-soul/">coined by Frances Berriman and Alex Russell</a> -in 2015. As service workers are now finally <a href="https://jakearchibald.github.io/isserviceworkerready/">implemented in all major browsers</a>, -we were wondering how many PWAs are actually out there, and how do they make use of these new technologies? -Certain advanced APIs like <a href="https://developers.google.com/web/updates/2015/12/background-sync">Background Sync</a> -are currently still <a href="https://caniuse.com/#feat=background-sync">only available on Chromium-based browsers</a>, -so as an additional question, we looked into which features these PWAs actually use.</p> -<p>All data in the HTTP Archive can be <a href="https://github.com/HTTPArchive/legacy.httparchive.org/blob/master/docs/bigquery-gettingstarted.md">queried through BigQuery</a>, -where multiple tables are available in the <code>httparchive</code> project. As these tables tend to get fairly big, -they are partitioned, but multiple associated tables can be queried using the <a href="https://cloud.google.com/bigquery/docs/querying-wildcard-tables">wildcard symbol '*'</a>. -For our research, three families of tables are relevant:</p> -<ul> -<li><code>httparchive.lighthouse.*</code>, which contains data about <a href="https://developers.google.com/web/tools/lighthouse/">Lighthouse</a> runs. Note that Lighthouse data is only available for mobile pages.</li> -<li><code>httparchive.pages.*</code>, which contain the JSON-encoded parent documents’ HAR data.</li> -<li><code>httparchive.response_bodies.*</code>, which contains the raw response bodies of all resources and sub-resources of all sites in the archive.</li> -</ul> -<h2>Service Workers</h2> -<h3>Service Worker Registrations and Installability</h3> -<p>The first metric we explore are service worker installations. Looking at the data exposed through -feature counters in the HTTP Archive, we find that 0.44% of all desktop and 0.37% of all mobile pages -register a service worker, and both curves over time are steeply growing. Now this might not look overly -impressive, but taking traffic data from Chrome Platform Status into account, we can see that about -<a href="https://www.chromestatus.com/metrics/feature/timeline/popularity/990">a service worker controlled 15% of all page loads</a>, -which can be interpreted as popular, high-traffic sites increasingly having started to embrace service workers.</p> -<p><timeseries chart of 11_01b></p> -<p><strong>Figure 1:</strong> Service Worker installation over time for desktop and mobile</p> -<p>Looking at Lighthouse data in the HTTP Archive, 1.56% of mobile pages are <a href="https://developers.google.com/web/tools/lighthouse/audits/install-prompt">installable</a>, -that is, they pass Lighthouse’s <em>user can be prompted to install the web app</em> test. -Lighthouse tests currently are only available for mobile pages. To control the install experience, -0.82% of all desktop and 0.94% of all mobile pages use the <a href="https://w3c.github.io/manifest/#beforeinstallpromptevent-interface"><code>OnBeforeInstallPrompt</code> interface</a>.</p> -<h3>Service Worker Events</h3> -<p>In a service worker one can <a href="https://developers.google.com/web/fundamentals/primers/service-workers/lifecycle">listen for a number of events</a>:</p> -<ul> -<li><code>install</code>, which occurs upon service worker installation. </li> -<li><code>activate</code>, which occurs upon service worker activation. </li> -<li><code>fetch</code>, which occurs whenever a resource is fetched.</li> -<li><code>push</code>, which occurs when a push notification arrives.</li> -<li><code>notificationclick</code>, which occurs when a notification is being clicked.</li> -<li><code>notificationclose</code>, which occurs when a notification is being closed. -<code>- message</code>, which occurs when a message sent via <code>postMessage()</code> arrives.</li> -<li><code>sync</code>, which occurs when a Background Sync event occurs.</li> -</ul> -<p>We have examined which of these events are being listened to by service workers we could find in the HTTP Archive. -The results for mobile and desktop are very similar with <code>fetch</code>, <code>install</code>, and <code>activate</code> being the three -most popular events, followed by <code>notificationclick</code> and <code>push</code>. If we interpret these results, offline use -cases that service workers enable are the most attractive feature for app developers, far ahead of -push notifications. Due to its limited availability, and less common use case, background sync doesn’t -play a significant role at the moment.</p> -<p><bar chart of 11_03 mobile></p> -<p><strong>Figure 2a:</strong> Service worker events on mobile, ordered by decreasing frequency.</p> -<p><bar chart of 11_03 desktop></p> -<p><strong>Figure 2b:</strong> Service worker events on desktop, ordered by decreasing frequency.</p> -<h3>Service Worker File Sizes</h3> -<p>File size or lines of code are in general a bad proxy for the complexity of the task at hand. -In this case, however, it is definitely interesting to compare (compressed) file sizes of service workers -for mobile and desktop. The median service worker file on desktop is 895 bytes, whereas on mobile it’s 694 bytes. -Throughout all percentiles desktop service workers are larger than mobile service workers. -We note that these stats don’t account for dynamically imported scripts through the -<a href="https://developer.mozilla.org/en-US/docs/Web/API/WorkerGlobalScope/importScripts"><code>importScripts()</code></a> method, -which likely skews the results higher.</p> -<p><distribution of 11_03b mobile></p> -<p><strong>Figure 3a:</strong> Percentiles of service worker file sizes on mobile.</p> -<p><distribution of 11_03b desktop></p> -<p><strong>Figure 3b:</strong> Percentiles of service worker file sizes on desktop.</p> -<h2>Web App Manifests</h2> -<h3>Web App Manifest Properties</h3> -<p>The web app manifest is a simple JSON file that tells the browser about a web application -and how it should behave when installed on the user's mobile device or desktop. A typical -manifest file includes information about the app name, icons it should use, the start URL -it should open at when launched, and more. Only 1.54% of all encountered manifests were -invalid JSON, and the rest parsed correctly.</p> -<p>We looked at the different properties defined by the -<a href="https://w3c.github.io/manifest/#webappmanifest-dictionary">specification</a>, -and also considered non-standard proprietary properties. According to the Web App Manifest spec, -the following properties are allowed: <code>dir</code>, <code>lang</code>, <code>name</code>, <code>short_name</code>, <code>description</code>, -<code>icons</code>, <code>screenshots</code>, <code>categories</code>, <code>iarc_rating_id</code>, <code>start_url</code>, <code>display</code>, <code>orientation</code>, -<code>theme_color</code>, <code>background_color</code>, <code>scope</code>, <code>serviceworker</code>, <code>related_applications</code>, and -<code>prefer_related_applications</code>. The only property that we didn’t observe in the wild was -<code>iarc_rating_id</code>, which is a string that represents the International Age Rating Coalition (IARC) -certification code of the web application. It is intended to be used to determine which ages -the web application is appropriate for. The proprietary properties we encountered still -frequently were <code>gcm_sender_id</code> and <code>gcm_user_visible_only</code> from the legacy -Google Cloud Messaging (GCM) service. Interestingly there’re almost no differences between mobile -and desktop. On both platforms, however, there’s a long tail of properties that are not interpreted -by browsers but that contain potentially useful metadata like <code>author</code> or <code>version</code>. We also found -a non-trivial amount of mistyped properties, our favorite being <code>shot_name</code>. An interesting outlier -is the <code>serviceworker</code> property which is standard, but not implemented by any browser vendor — -nevertheless, it was found on 0.09% of all web app manifests used by mobile and desktop pages.</p> -<p><bar chart of 11_04 mobile></p> -<p><strong>Figure 4a:</strong> Web App Manifest properties ordered by decreasing popularity on mobile.</p> -<p><bar chart of 11_04 mobile></p> -<p><strong>Figure 4b:</strong> Web App Manifest properties ordered by decreasing popularity on desktop.</p> -<h3>Display Values</h3> -<p>Looking at the values developers set for the <code>display</code> property, it becomes immediately clear -that they want PWAs to be perceived as “proper” apps that don’t reveal their web technology origins. -By choosing <code>"standalone"</code>, they make sure no browser UI is shown to the end-user. This is reflected -by the majority of apps that make use of the <code>prefers_related_applications</code> property: more that 97% -of both mobile and desktop applications do <em>not</em> prefer native applications.</p> -<11_04c mobile> + <li> + <a href="#service-worker-events">Service Worker Events</a> + </li> -<p><strong>Figure 5a:</strong> Values for the <code>display</code> property on mobile.</p> -<11_04c desktop> + <li> + <a href="#service-worker-file-sizes">Service Worker File Sizes</a> + </li> + </ul> + </li> -<p><strong>Figure 5b:</strong> Values for the <code>display</code> property on desktop.</p> -<h3>Category Values</h3> -<p>The <code>categories</code> member describes the expected application categories to which the web application belongs. -It is only meant as a hint to catalogs or stores listing web applications, and it is expected that -these will make a best effort to find appropriate categories (or category) under which to list the -web application. There were not too many manifests that made use of the property, but it is -interesting to see the shift from <em>shopping</em> being the most popular category on mobile to <em>business</em>, -<em>technology</em>, and <em>web</em> (whatever may be meant with that) on desktop that share the first place evenly.</p> -<11_04d mobile> + <li> + <a href="#web-app-manifests">Web App Manifests</a> -<p><strong>Figure 6a:</strong> Values for the <code>categories</code> property on mobile.</p> -<11_04d desktop> + <ul> + <li> + <a href="#web-app-manifest-properties">Web App Manifest Properties</a> + </li> -<p><strong>Figure 6b:</strong> Values for the <code>categories</code> property on desktop.</p> -<h3>Icon Sizes</h3> -<p>Lighthouse <a href="https://developers.google.com/web/tools/lighthouse/audits/manifest-contains-192px-icon">requires</a> -at least an icon sized 192×192, but common favicon generation tools create a plethora of other sizes, too. -Lighthouse’s rule is probably the culprit for 192×192 being the most popular choice of icon size on both -desktop and mobile, despite <a href="https://developers.google.com/web/fundamentals/web-app-manifest#icons">Google’s documentation</a> -additionally explicitly recommending 512×512, which doesn’t show as a particularly prominent option.</p> -<11_04f mobile> + <li> + <a href="#display-values">Display Values</a> + </li> -<p><strong>Figure 7a:</strong> Popular icon sizes on mobile.</p> -<11_04f desktop> + <li> + <a href="#category-values">Category Values</a> + </li> -<p><strong>Figure 7b:</strong> Popular icon sizes on desktop.</p> -<h3>Orientation Values</h3> -<p>The valid values for the <code>orientation</code> property are <a href="https://www.w3.org/TR/screen-orientation/#dom-orientationlocktype">defined</a> -in the Screen Orientation API specification. Namely there are <code>"any"</code>, <code>"natural"</code>, <code>"landscape"</code>,<br> -<code>"portrait"</code>, <code>"portrait-primary"</code>, <code>"portrait-secondary"</code>, <code>"landscape-primary"</code>, and <code>"landscape-secondary"</code>. -Portrait orientation is the clear winner on both platforms, followed by any orientation.</p> -<11_04g mobile> + <li> + <a href="#icon-sizes">Icon Sizes</a> + </li> -<p><strong>Figure 8a:</strong> Popular orientation values on mobile.</p> -<11_04g desktop> + <li> + <a href="#orientation-values">Orientation Values</a> + </li> + </ul> + </li> -<p><strong>Figure 8b:</strong> Popular orientation values on desktop.</p> -<h2>Workbox</h2> -<p>Workbox is a set of libraries that help with common service worker use cases. For instance, Workbox has tools -that can plug in to your build process and generate a manifest of files, which are then precached by your -service worker. Workbox includes libraries to handle runtime caching, request routing, cache expiration, -background sync, and more.</p> -<p>Given the low-level nature of the service worker APIs, many developers have turned to Workbox as a way of -structuring their service worker logic into higher-level, reusable chunks of code. Workbox adoption is also -driven by its inclusion as a feature in a number of popular JavaScript framework starter kits, like -<a href="https://create-react-app.dev/"><code>create-react-app</code></a> and <a href="https://www.npmjs.com/package/@vue/cli-plugin-pwa">Vue's PWA plugin</a>.</p> -<p>The HTTP Archive shows that, out of the total population of sites that register a service worker, -12.71% of them are using at least one of the Workbox libraries. This percentage is roughly consistent -across desktop and mobile, with a slightly lower percentage (11.46%) on mobile compared to desktop (14.36%).</p> + <li> + <a href="#workbox">Workbox</a> + </li> + </ul> +</aside> +<section class="main"> + <h1 class="chapter-title">{{ metadata.get('title') }}</h1> + <img + src="/static/images/2019/{{ get_chapter_image_dir(metadata) }}/hero_lg.jpg" + class="chapter-hero" + /> + <h2 id="introduction">Introduction</h2> + <p> + Progressive Web Apps (PWA) are a new class of web applications, building on + top of platform primitives <br /> + like the + <a href="https://developer.mozilla.org/en/docs/Web/API/Service_Worker_API" + >Service Worker APIs</a + >. <br /> + Service workers allow apps to support network-independent loading by acting + as a network proxy, <br /> + intercepting your web app's outgoing requests, and replying with + programmatic or cached responses. <br /> + Service workers can receive push notifications, and synchronize data in the + background even when <br /> + the corresponding app is not running. Additional, service workers — together + with <br /> + <a href="https://developer.mozilla.org/en-US/docs/Web/Manifest" + >Web App Manifests</a + > + — <br /> + allow users to install PWAs to their devices’ home screens. + </p> + <p> + Service workers were + <a + href="https://blog.chromium.org/2014/12/chrome-40-beta-powerful-offline-and.html" + >first implemented in Chrome 40</a + >, <br /> + back in December 2014, and the term Progressive Web Apps was <br /> + <a + href="https://infrequently.org/2015/06/progressive-apps-escaping-tabs-without-losing-our-soul/" + >coined by Frances Berriman and Alex Russell</a + > + <br /> + in 2015. As service workers are now finally + <a href="https://jakearchibald.github.io/isserviceworkerready/" + >implemented in all major browsers</a + >, <br /> + we were wondering how many PWAs are actually out there, and how do they make + use of these new technologies? <br /> + Certain advanced APIs like + <a href="https://developers.google.com/web/updates/2015/12/background-sync" + >Background Sync</a + > + <br /> + are currently still + <a href="https://caniuse.com/#feat=background-sync" + >only available on Chromium-based browsers</a + >, <br /> + so as an additional question, we looked into which features these PWAs + actually use. + </p> + <p> + All data in the HTTP Archive can be + <a + href="https://github.com/HTTPArchive/legacy.httparchive.org/blob/master/docs/bigquery-gettingstarted.md" + >queried through BigQuery</a + >,<br /> + where multiple tables are available in the <code>httparchive</code> project. + As these tables tend to get fairly big, <br /> + they are partitioned, but multiple associated tables can be queried using + the + <a href="https://cloud.google.com/bigquery/docs/querying-wildcard-tables" + >wildcard symbol '*'</a + >.<br /> + For our research, three families of tables are relevant: + </p> + <ul> + <li> + <code>httparchive.lighthouse.*</code>, which contains data about + <a href="https://developers.google.com/web/tools/lighthouse/" + >Lighthouse</a + > + runs. Note that Lighthouse data is only available for mobile pages. + </li> + <li> + <code>httparchive.pages.*</code>, which contain the JSON-encoded parent + documents’ HAR data. + </li> + <li> + <code>httparchive.response_bodies.*</code>, which contains the raw + response bodies of all resources and sub-resources of all sites in the + archive. + </li> + </ul> + <h2 id="service-workers">Service Workers</h2> + <h3 id="service-worker-registrations-and-installability"> + Service Worker Registrations and Installability + </h3> + <p> + The first metric we explore are service worker installations. Looking at the + data exposed through <br /> + feature counters in the HTTP Archive, we find that 0.44% of all desktop and + 0.37% of all mobile pages <br /> + register a service worker, and both curves over time are steeply growing. + Now this might not look overly <br /> + impressive, but taking traffic data from Chrome Platform Status into + account, we can see that about <br /> + <a + href="https://www.chromestatus.com/metrics/feature/timeline/popularity/990" + >a service worker controlled 15% of all page loads</a + >,<br /> + which can be interpreted as popular, high-traffic sites increasingly having + started to embrace service workers. + </p> + <p><code><timeseries chart of 11_01b></code></p> + <p> + <strong>Figure 1:</strong> Service Worker installation over time for desktop + and mobile + </p> + <p> + Looking at Lighthouse data in the HTTP Archive, 1.56% of mobile pages are + <a + href="https://developers.google.com/web/tools/lighthouse/audits/install-prompt" + >installable</a + >,<br /> + that is, they pass Lighthouse’s + <em>user can be prompted to install the web app</em> test. <br /> + Lighthouse tests currently are only available for mobile pages. To control + the install experience, <br /> + 0.82% of all desktop and 0.94% of all mobile pages use the + <a href="https://w3c.github.io/manifest/#beforeinstallpromptevent-interface" + ><code>OnBeforeInstallPrompt</code> interface</a + >. + </p> + <h3 id="service-worker-events">Service Worker Events</h3> + <p> + In a service worker one can + <a + href="https://developers.google.com/web/fundamentals/primers/service-workers/lifecycle" + >listen for a number of events</a + >: + </p> + <ul> + <li> + <code>install</code>, which occurs upon service worker installation. + </li> + <li><code>activate</code>, which occurs upon service worker activation.</li> + <li><code>fetch</code>, which occurs whenever a resource is fetched.</li> + <li><code>push</code>, which occurs when a push notification arrives.</li> + <li> + <code>notificationclick</code>, which occurs when a notification is being + clicked. + </li> + <li> + <code>notificationclose</code>, which occurs when a notification is being + closed.<br /> + <code>- message</code>, which occurs when a message sent via + <code>postMessage()</code> arrives. + </li> + <li> + <code>sync</code>, which occurs when a Background Sync event occurs. + </li> + </ul> + <p> + We have examined which of these events are being listened to by service + workers we could find in the HTTP Archive. <br /> + The results for mobile and desktop are very similar with <code>fetch</code>, + <code>install</code>, and <code>activate</code> being the three <br /> + most popular events, followed by <code>notificationclick</code> and + <code>push</code>. If we interpret these results, offline use <br /> + cases that service workers enable are the most attractive feature for app + developers, far ahead of <br /> + push notifications. Due to its limited availability, and less common use + case, background sync doesn’t <br /> + play a significant role at the moment. + </p> + <p><code><bar chart of 11_03 mobile></code></p> + <p> + <strong>Figure 2a:</strong> Service worker events on mobile, ordered by + decreasing frequency. + </p> + <p><code><bar chart of 11_03 desktop></code></p> + <p> + <strong>Figure 2b:</strong> Service worker events on desktop, ordered by + decreasing frequency. + </p> + <h3 id="service-worker-file-sizes">Service Worker File Sizes</h3> + <p> + File size or lines of code are in general a bad proxy for the complexity of + the task at hand. <br /> + In this case, however, it is definitely interesting to compare (compressed) + file sizes of service workers <br /> + for mobile and desktop. The median service worker file on desktop is 895 + bytes, whereas on mobile it’s 694 bytes. <br /> + Throughout all percentiles desktop service workers are larger than mobile + service workers. <br /> + We note that these stats don’t account for dynamically imported scripts + through the <br /> + <a + href="https://developer.mozilla.org/en-US/docs/Web/API/WorkerGlobalScope/importScripts" + ><code>importScripts()</code></a + > + method, <br /> + which likely skews the results higher. + </p> + <p><code><distribution of 11_03b mobile></code></p> + <p> + <strong>Figure 3a:</strong> Percentiles of service worker file sizes on + mobile. + </p> + <p><code><distribution of 11_03b desktop></code></p> + <p> + <strong>Figure 3b:</strong> Percentiles of service worker file sizes on + desktop. + </p> + <h2 id="web-app-manifests">Web App Manifests</h2> + <h3 id="web-app-manifest-properties">Web App Manifest Properties</h3> + <p> + The web app manifest is a simple JSON file that tells the browser about a + web application <br /> + and how it should behave when installed on the user's mobile device or + desktop. A typical <br /> + manifest file includes information about the app name, icons it should use, + the start URL <br /> + it should open at when launched, and more. Only 1.54% of all encountered + manifests were <br /> + invalid JSON, and the rest parsed correctly. + </p> + <p> + We looked at the different properties defined by the <br /> + <a href="https://w3c.github.io/manifest/#webappmanifest-dictionary" + >specification</a + >, <br /> + and also considered non-standard proprietary properties. According to the + Web App Manifest spec, <br /> + the following properties are allowed: <code>dir</code>, <code>lang</code>, + <code>name</code>, <code>short_name</code>, <code>description</code>, <br /> + <code>icons</code>, <code>screenshots</code>, <code>categories</code>, + <code>iarc_rating_id</code>, <code>start_url</code>, <code>display</code>, + <code>orientation</code>, <br /> + <code>theme_color</code>, <code>background_color</code>, <code>scope</code>, + <code>serviceworker</code>, <code>related_applications</code>, and <br /> + <code>prefer_related_applications</code>. The only property that we didn’t + observe in the wild was <br /> + <code>iarc_rating_id</code>, which is a string that represents the + International Age Rating Coalition (IARC) <br /> + certification code of the web application. It is intended to be used to + determine which ages <br /> + the web application is appropriate for. The proprietary properties we + encountered still <br /> + frequently were <code>gcm_sender_id</code> and + <code>gcm_user_visible_only</code> from the legacy <br /> + Google Cloud Messaging (GCM) service. Interestingly there’re almost no + differences between mobile <br /> + and desktop. On both platforms, however, there’s a long tail of properties + that are not interpreted <br /> + by browsers but that contain potentially useful metadata like + <code>author</code> or <code>version</code>. We also found <br /> + a non-trivial amount of mistyped properties, our favorite being + <code>shot_name</code>. An interesting outlier <br /> + is the <code>serviceworker</code> property which is standard, but not + implemented by any browser vendor — <br /> + nevertheless, it was found on 0.09% of all web app manifests used by mobile + and desktop pages. + </p> + <p><code><bar chart of 11_04 mobile></code></p> + <p> + <strong>Figure 4a:</strong> Web App Manifest properties ordered by + decreasing popularity on mobile. + </p> + <p><code><bar chart of 11_04 mobile></code></p> + <p> + <strong>Figure 4b:</strong> Web App Manifest properties ordered by + decreasing popularity on desktop. + </p> + <h3 id="display-values">Display Values</h3> + <p> + Looking at the values developers set for the <code>display</code> property, + it becomes immediately clear <br /> + that they want PWAs to be perceived as “proper” apps that don’t reveal their + web technology origins. <br /> + By choosing <code>"standalone"</code>, they make sure no browser UI is shown + to the end-user. This is reflected <br /> + by the majority of apps that make use of the + <code>prefers_related_applications</code> property: more that 97% <br /> + of both mobile and desktop applications do <em>not</em> prefer native + applications. + </p> + <p><code><11_04c mobile></code></p> + <p> + <strong>Figure 5a:</strong> Values for the <code>display</code> property on + mobile. + </p> + <p><code><11_04c desktop></code></p> + <p> + <strong>Figure 5b:</strong> Values for the <code>display</code> property on + desktop. + </p> + <h3 id="category-values">Category Values</h3> + <p> + The <code>categories</code> member describes the expected application + categories to which the web application belongs. <br /> + It is only meant as a hint to catalogs or stores listing web applications, + and it is expected that <br /> + these will make a best effort to find appropriate categories (or category) + under which to list the <br /> + web application. There were not too many manifests that made use of the + property, but it is <br /> + interesting to see the shift from <em>shopping</em> being the most popular + category on mobile to <em>business</em>, <br /> + <em>technology</em>, and <em>web</em> (whatever may be meant with that) on + desktop that share the first place evenly. + </p> + <p><code><11_04d mobile></code></p> + <p> + <strong>Figure 6a:</strong> Values for the <code>categories</code> property + on mobile. + </p> + <p><code><11_04d desktop></code></p> + <p> + <strong>Figure 6b:</strong> Values for the <code>categories</code> property + on desktop. + </p> + <h3 id="icon-sizes">Icon Sizes</h3> + <p> + Lighthouse + <a + href="https://developers.google.com/web/tools/lighthouse/audits/manifest-contains-192px-icon" + >requires</a + > + <br /> + at least an icon sized 192×192, but common favicon generation tools create a + plethora of other sizes, too. <br /> + Lighthouse’s rule is probably the culprit for 192×192 being the most popular + choice of icon size on both <br /> + desktop and mobile, despite + <a + href="https://developers.google.com/web/fundamentals/web-app-manifest#icons" + >Google’s documentation</a + > + <br /> + additionally explicitly recommending 512×512, which doesn’t show as a + particularly prominent option. + </p> + <p><code><11_04f mobile></code></p> + <p><strong>Figure 7a:</strong> Popular icon sizes on mobile.</p> + <p><code><11_04f desktop></code></p> + <p><strong>Figure 7b:</strong> Popular icon sizes on desktop.</p> + <h3 id="orientation-values">Orientation Values</h3> + <p> + The valid values for the <code>orientation</code> property are + <a href="https://www.w3.org/TR/screen-orientation/#dom-orientationlocktype" + >defined</a + ><br /> + in the Screen Orientation API specification. Namely there are + <code>"any"</code>, <code>"natural"</code>, <code>"landscape"</code>, <br /> + <code>"portrait"</code>, <code>"portrait-primary"</code>, + <code>"portrait-secondary"</code>, <code>"landscape-primary"</code>, and + <code>"landscape-secondary"</code>. <br /> + Portrait orientation is the clear winner on both platforms, followed by any + orientation. + </p> + <p><code><11_04g mobile></code></p> + <p><strong>Figure 8a:</strong> Popular orientation values on mobile.</p> + <p><code><11_04g desktop></code></p> + <p><strong>Figure 8b:</strong> Popular orientation values on desktop.</p> + <h2 id="workbox">Workbox</h2> + <p> + Workbox is a set of libraries that help with common service worker use + cases. For instance, Workbox has tools <br /> + that can plug in to your build process and generate a manifest of files, + which are then precached by your <br /> + service worker. Workbox includes libraries to handle runtime caching, + request routing, cache expiration, <br /> + background sync, and more. + </p> + <p> + Given the low-level nature of the service worker APIs, many developers have + turned to Workbox as a way of <br /> + structuring their service worker logic into higher-level, reusable chunks of + code. Workbox adoption is also <br /> + driven by its inclusion as a feature in a number of popular JavaScript + framework starter kits, like <br /> + <a href="https://create-react-app.dev/"><code>create-react-app</code></a> + and + <a href="https://www.npmjs.com/package/@vue/cli-plugin-pwa" + >Vue's PWA plugin</a + >. + </p> + <p> + The HTTP Archive shows that, out of the total population of sites that + register a service worker, <br /> + 12.71% of them are using at least one of the Workbox libraries. This + percentage is roughly consistent <br /> + across desktop and mobile, with a slightly lower percentage (11.46%) on + mobile compared to desktop (14.36%). + </p> </section> {% endblock %} diff --git a/src/templates/en/2019/chapters/seo.html b/src/templates/en/2019/chapters/seo.html new file mode 100644 index 00000000000..6f8d50112c3 --- /dev/null +++ b/src/templates/en/2019/chapters/seo.html @@ -0,0 +1,831 @@ +<!--{# IMPORTANT! + +- `chapter.html` is a "template for templates" used by the `generate_chapters.js` script, hence the strange template syntax (eg, mixing ejs and jinja syntax) +- if you want to modify `chapter.html`, you must also: + - translate the corresponding language-specific templates (eg `src/templates/<lang>/<year>/chapter.html`) + - run the generation script to update each chapter template +- if you want to modify the chapter templates (eg `src/templates/<lang>/<year>/chapters/<chapter>.html`): + - make changes to the markdown content directly (`src/content/<lang>/<year>/<chapter>.md`) because any changes to the chapter templates will be overwritten by the generation script +#}--> + +{% extends "en/2019/base_chapter.html" %} {% block styles %} {{ super() }} +<link rel="stylesheet" href="/static/css/chapter.css" /> +{% endblock %} {% set metadata = +{"part_number":"I","chapter_number":10,"title":"SEO","authors":["ymschaap"," +rachellcostello"," AVGP"],"reviewers":["clarkeclark"," andylimn"," voltek62"," +AymenLoukil"," catalinred"]} %} {% block main %} +<aside> + <ul> + <li> + <a href="#intro">Intro</a> + </li> + + <li> + <a href="#fundamentals">Fundamentals</a> + + <ul> + <li> + <a href="#content">Content</a> + + <ul> + <li> + <a href="#word-count">Word count</a> + </li> + + <li> + <a href="#headings">Headings</a> + </li> + </ul> + </li> + + <li> + <a href="#meta-tags">Meta tags</a> + + <ul> + <li> + <a href="#page-titles">Page titles</a> + </li> + + <li> + <a href="#meta-descriptions">Meta descriptions</a> + </li> + + <li> + <a href="#image-alt-tags">Image alt tags</a> + </li> + </ul> + </li> + + <li> + <a href="#indexability">Indexability</a> + + <ul> + <li> + <a href="#status-codes">Status codes</a> + </li> + + <li> + <a href="#noindex">Noindex</a> + </li> + + <li> + <a href="#canonicalization">Canonicalization</a> + </li> + + <li> + <a href="#robotstxt">robots.txt</a> + </li> + </ul> + </li> + + <li> + <a href="#linking">Linking</a> + </li> + </ul> + </li> + + <li> + <a href="#advanced">Advanced</a> + + <ul> + <li> + <a href="#speed">Speed</a> + </li> + + <li> + <a href="#structured-data">Structured data</a> + </li> + + <li> + <a href="#internationalization">Internationalization</a> + </li> + + <li> + <a href="#spa-crawlability">SPA crawlability</a> + </li> + + <li> + <a href="#amp">AMP</a> + </li> + + <li> + <a href="#security">Security</a> + </li> + </ul> + </li> + + <li> + <a href="#conclusion">Conclusion</a> + </li> + </ul> +</aside> + +<section class="main"> + <h1 class="chapter-title">{{ metadata.get('title') }}</h1> + <img + src="/static/images/2019/{{ get_chapter_image_dir(metadata) }}/hero_lg.jpg" + class="chapter-hero" + /> + <h2 id="intro">Intro</h2> + <p> + Search Engine Optimization (SEO) isn't just a hobby or a side project for + digital marketers, it is crucial for the success of a website. The primary + goal of SEO is to make sure that a website is optimized for the search + engine bots that need to crawl and index its pages, as well as for the users + that will be navigating the website and consuming its content. SEO impacts + everyone working on a website, from the developer who is building it, + through to the digital marketer who will need to promote it to new potential + customers. + </p> + <p> + Let's put the importance of SEO into perspective. Earlier this year, the SEO + industry looked on in horror (and fascination) as + <a href="https://www.bbc.co.uk/news/business-47877688" + >ASOS reported an 87% decrease in profits</a + > + after a "difficult year". The brand attributed their issues to a drop in + search engine rankings which occurred after they launched over 200 + microsites and significant changes to their website's navigation, among + other technical changes. Yikes. + </p> + <p> + The SEO chapter of the Web Almanac was created to analyze onsite elements of + the web that impact the crawling and indexing of content for search engines, + and ultimately, website performance. In this chapter, we'll take a look at + how well-equipped the top websites are to provide a great experience for + users and search engines, and which ones still have work to do. + </p> + <p> + Our analysis includes data from + <a href="https://developers.google.com/web/tools/lighthouse">Lighthouse</a>, + the + <a + href="https://developers.google.com/web/tools/chrome-user-experience-report" + >Chrome UX Report</a + >, and HTML element analysis. We focused on SEO fundamentals like + <code><title></code> elements, the different types of on-page links, + content and loading speed, but also the more technical aspects of SEO, + including indexability, structured data, internationalization and AMP across + over 5 million websites. + </p> + <p> + Our custom metrics provide insights that, up until now, have not been + exposed before. We are now able to make claims about the adoption and + implementation of elements such as the hreflang tag, rich results + eligibility, heading tag usage, and even anchor-based navigation for single + page apps. + </p> + <aside> + Note: Our data is limited to analyzing home pages only, and has not been + gathered from site-wide crawls. This will impact many metrics we'll discuss, + so we've added any relevant limitations in this case whenever we mention a + specific metric. + </aside> + <p> + Read on to find out more about the current state of the web and its search + engine friendliness. + </p> + <h2 id="fundamentals">Fundamentals</h2> + <p> + Search engines have a 3-step process: crawling, indexing, and ranking. To be + search engine-friendly, a page needs to be discoverable, understandable, and + contain quality content that would provide value to a user who is browsing + the search engine results pages (SERPs). + </p> + <p> + We wanted to analyze how much of the web is meeting the basic standards of + SEO best practices, so we assessed on-page elements such as body content, + meta tags, and internal linking. Let's take a look at the results. + </p> + <h3 id="content">Content</h3> + <p> + To be able to understand what a page is about and decide for which search + queries it provides the most relevant answers, a search engine must be able + to discover and access its content. What content are search engines + currently finding, however? To help answer this, we created two custom + metrics: word count and headings. + </p> + <h4 id="word-count">Word count</h4> + <p> + We assessed the content on the pages by looking for groups of at least 3 + words and counting how many were found in total. We found 2.73% of desktop + pages that didn't have any word groups, meaning that they have no body + content to help search engines understand what the website is about. + </p> + <p> + The median desktop home page has 346 words, and the median mobile home page + has a slightly lower word count at 306 words. This shows that mobile sites + do serve a bit less content to their users, but at over 300 words, this is + still a reasonable amount to read, especially for a home page which will + naturally contain less content than an article page, for example. Overall + the distribution of words is broad, with between 22 words at the 10th + percentile and up to 1,361 at the 90th percentile. + </p> + <p> + <code + ><graph histogram number of words. Source: 10.09, column C, desktop + & mobile></code + > + </p> + <h4 id="headings">Headings</h4> + <p> + We also looked at whether pages are structured in a way that provides the + right context for the content they contain. Headings (<code>H1</code>, + <code>H2</code>, <code>H3</code>, etc) are used to format and structure a + page and make content easier to read and parse. Despite the importance on + headings, 10.67% of pages have no heading tags at all. + </p> + <p> + <code + ><graph histogram number of heading elements. Source: 10.09a, column + F></code + > + </p> + <p> + The median number of heading elements per page is 10, with 30 words (on + mobile) and 32 words (on desktop) used in headings. This implies that the + websites that utilize headings put significant effort in making sure that + their pages are readable, descriptive, and clearly outline the page + structure and context to search engine bots. + </p> + <p> + In terms of specific heading length, the median length of the first + <code>H1</code> element found on desktop is 19 characters. + </p> + <p> + For advice on how to handle <code>H1</code>s and headings for SEO and + accessibility, take a look at this + <a href="https://www.youtube.com/watch?v=zyqJJXWk0gk" + >video response by John Mueller</a + > + in the Ask Google Webmasters series. + </p> + <p> + <code + ><graph histogram h1 tag source: 10.16, column C, desktop & + mobile></code + > + </p> + <h3 id="meta-tags">Meta tags</h3> + <p> + Meta tags allow us to give specific instructions and information to search + engine bots about the different elements and content on a page. Certain meta + tags can convey things like the topical focus of a page, as well as how the + page should be crawled and indexed. We wanted to assess whether or not + websites were making the most of these opportunities that meta tags provide. + </p> + <h4 id="page-titles">Page titles</h4> + <p> + Page titles are an important way of communicating the purpose of a page to a + user or search engine. <code><title></code> tags are also used as + headings in the SERPS and as the title for the browser tab when visiting a + page, so it's no surprise to see that 97.1% of mobile pages have a document + title. + </p> + <p> + Even though + <a href="https://moz.com/learn/seo/title-tag" + >Google usually displays the first 50-60 characters of a page title</a + > + within a SERP, the median length of the <code><title></code> tag was + only 21 characters for mobile pages and 20 characters for desktop pages. + Even the 75th percentile is still below the cutoff length. This suggests + that some SEOs and content writers aren't making the most of the space + allocated to them by search engines for describing their home pages in the + SERPs. + </p> + <p> + <code + ><graph histogram length <title> Source: 10.07b, column C, + desktop & mobile></code + > + </p> + <h4 id="meta-descriptions">Meta descriptions</h4> + <p> + Compared to the <code><title></code> tag, fewer pages were detected to + have a meta description, as only 64.02% of mobile home pages have a meta + description. Considering that Google often rewrites meta descriptions in the + SERPs in response to the searcher's query, perhaps website owners place less + importance on including a meta description at all. + </p> + <p> + The median meta description length was also lower than the + <a href="https://moz.com/learn/seo/meta-description" + >recommended length of 155-160 characters</a + >, with desktop pages having descriptions of 123 characters. Interestingly, + meta descriptions were consistently longer on mobile than on desktop, + despite mobile SERPs traditionally having a shorter pixel limit. This limit + has only been extended recently, so perhaps more website owners have been + testing the impact of having longer, more descriptive meta descriptions for + mobile results. + </p> + <p> + <code + ><graph histogram length <meta description> Source: 10.07c, + column C, desktop & mobile></code + > + </p> + <h4 id="image-alt-tags">Image alt tags</h4> + <p> + Considering the importance of alt text for SEO and accessibility, it is far + from ideal to see that only 46.71% of mobile pages use alt attributes on all + of their images. This means that there are still improvements to be made + with regard to making images across the web more accessible to users and + understandable for search engines. + </p> + <p> + Learn more about this in the + <a href="../accessibility">Accessibility chapter</a>. + </p> + <h3 id="indexability">Indexability</h3> + <p> + To show a page's content to users in the SERPs, search engine crawlers must + first be permitted to access and index that page. Some of the factors that + impact a search engine's ability to crawl and index pages include: + </p> + <ul> + <li>Status codes</li> + <li>Noindex tags</li> + <li>Canonical tags</li> + <li>The robots.txt file</li> + </ul> + <h4 id="status-codes">Status codes</h4> + <p> + It is recommended to maintain a <code>200 OK</code> status code for any + important pages that you want search engines to index. The majority of pages + tested were available for search engines to access, with 87.03% of initial + HTML requests on desktop returning a <code>200</code> status code. The + results were slightly lower for mobile pages, with only 82.95% of pages + returning a <code>200</code> status code. + </p> + <p> + The next most commonly found status code on mobile was <code>302</code>, a + temporary redirect, which was found on 10.45% of mobile pages. This was + higher than on desktop, with only 6.71% desktop home pages returning a + <code>302</code> status code. This could be due to the fact that the + <a + href="https://developers.google.com/search/mobile-sites/mobile-seo/separate-urls" + >mobile home pages were alternates</a + > + to an equivalent desktop page, such as on non-responsive sites that have + separate versions of the website for each device. + </p> + <aside>Note: Our results didn't include `4xx` or `5xx` status codes.</aside> + <h4 id="noindex">Noindex</h4> + <p> + A noindex tag can be served in the HTML <code><head></code> or in the + HTTP headers as an <code>X-Robots</code> tag. A noindex tag basically tells + a search engine not to include that page in its SERPs, but the page will + still be accessible for users when they are navigating through the website. + Noindex tags are usually added to duplicate versions of pages that serve the + same content, or low quality pages that provide no value to users coming to + a website from organic search, such as filtered or faceted pages or internal + search pages. + </p> + <p> + 96.93% of mobile pages passed the + <a href="https://developers.google.com/web/tools/lighthouse/audits/indexing" + >Lighthouse indexing audit</a + >, meaning that these pages didn't contain a noindex tag in the + <code><head></code> or in the HTTP headers. However, this means that + 3.07% of mobile home pages <em>did</em> have a noindex tag, which is cause + for concern, meaning that Google was prevented from indexing these pages. + </p> + <aside> + The websites included in our research are sourced from the [Chrome UX + Report](https://developers.google.com/web/tools/chrome-user-experience-report/#methodology) + dataset, which excludes websites that are not publicly discoverable. This is + a significant source of bias because we're unable to analyze sites that + Chrome determines to be non-public. Learn more about our + [Methodology](../methodology#websites). + </aside> + <h4 id="canonicalization">Canonicalization</h4> + <p> + Canonical tags are used to specify duplicate pages and their preferred + alternates, so that search engines can consolidate authority which might be + spread across multiple pages within the group onto one main page for + improved rankings. + </p> + <p> + 48.34% of mobile home pages were + <a + href="https://developers.google.com/web/tools/lighthouse/audits/canonical" + >detected</a + > + to have a canonical tag. Self-referencing canonical tags aren't essential, + and canonical tags are usually required for duplicate pages. Home pages are + rarely duplicated anywhere else across the site so seeing that less than + half of pages have a canonical tag isn't surprising. + </p> + <h4 id="robotstxt">robots.txt</h4> + <p> + One of the most effective methods for controlling search engine crawling is + the + <a + href="https://www.deepcrawl.com/knowledge/technical-seo-library/robots-txt/" + >robots.txt file</a + >. This is a file that sits on the root domain of a website and specifies + which URLs and URL paths should be disallowed from being crawled by search + engines. + </p> + <p> + It was interesting to observe that only 72.16% of mobile sites have a valid + robots.txt, + <a href="https://developers.google.com/web/tools/lighthouse/audits/robots" + >according to Lighthouse</a + >. The key issues we found are split between 22% of sites having no + robots.txt file at all, and ~6% serving an invalid robots.txt file, and thus + failing the audit. While there are many valid reasons to not have a + robots.txt file, such as having a small website that doesn't struggle with + <a + href="https://webmasters.googleblog.com/2017/01/what-crawl-budget-means-for-googlebot.html" + >crawl budget issues</a + >, having an invalid robots.txt is cause for concern. + </p> + <h3 id="linking">Linking</h3> + <p> + One of the most important attributes of a web page is links. Links help + search engines discover new, relevant pages to add to their index and + navigate through websites. 96% of the web pages in our dataset contain at + least one internal link, and 93% contain at least one external link to + another domain. The small minority of pages that don't have any internal or + external links will be missing out on the immense value that links pass + through to target pages. + </p> + <p> + The number of internal and external links included on desktop pages were + consistently higher than the number found on mobile pages. Often a limited + space on a smaller viewport causes fewer links to be included in the design + of a mobile page compared to desktop. + </p> + <p> + It's important to bear in mind that fewer internal links on the mobile + version of a page + <a href="https://moz.com/blog/internal-linking-mobile-first-crawl-paths" + >might cause an issue</a + > + for your website. With + <a + href="https://www.deepcrawl.com/knowledge/white-papers/mobile-first-index-guide/" + >mobile-first indexing</a + >, which for new websites is the default for Google, if a page is only + linked from the desktop version and not present on the mobile version, + search engines will have a much harder time discovering and ranking it. + </p> + <p> + <code + ><graph histogram count of links by type Source: 10.10, column C + desktop only></code + > + </p> + <p> + <code + ><graph histogram count of links by type Source: 10.10, column D, E, + desktop only></code + > + </p> + <p> + The median desktop page includes 70 internal (same-site) links, whereas the + median mobile page has 60 internal links. The median number of external + links per page follows a similar trend, with desktop pages including 10 + external links, and mobile pages including 8. + </p> + <p> + Anchor links, which link to a certain scroll position on the same page, are + not very popular. Over 65% of home pages have no anchor links. This is + probably due to the fact that home pages don't usually contain any long-form + content. + </p> + <p> + There is good news from our analysis of the descriptive link text metric. + 89.94% of mobile pages pass Lighthouse's + <a + href="https://developers.google.com/web/tools/lighthouse/audits/descriptive-link-text" + >descriptive link text audit</a + >. This means that these pages don't have generic "click here", "go", "here" + or "learn more" links, but use more meaningful link text which helps users + and search engines better understand the context of pages and how they + connect with one another. + </p> + <h2 id="advanced">Advanced</h2> + <p> + Having descriptive, useful content on a page that isn't being blocked from + search engines with a noindex tag or robots.txt directive isn't enough for a + website to succeed in organic search. Those are just the basics. There is a + lot more than can be done to enhance the performance of a website and its + appearance in SERPs. + </p> + <p> + Some of the more technically complex aspects that have been gaining + importance in successfully indexing and ranking websites include: speed, + structured data, internationalization, security, and mobile friendliness. + </p> + <h3 id="speed">Speed</h3> + <p> + Mobile loading speed was first + <a + href="https://webmasters.googleblog.com/2018/01/using-page-speed-in-mobile-search.html" + >announced as a ranking factor</a + > + by Google in 2018. Speed isn't a new focus for Google though. Back in 2010 + it was + <a + href="https://webmasters.googleblog.com/2010/04/using-site-speed-in-web-search-ranking.html" + >revealed that speed had been introduced as a ranking signal</a + >. + </p> + <p> + A fast-loading website is also crucial for a good user experience. Users + that have to wait even a few seconds for a site to load have the tendency to + bounce and try another result from one of your competitors in the SERPs that + loads quickly and meets their expectations of website performance. + </p> + <p> + The metrics we used for our analysis of load speed across the web is based + on the + <a href="../methodology#chrome-ux-report">Chrome UX Report</a> (CrUX), which + collects data from real-world Chrome users. This data shows that an + astonishing 63.47% of websites are labelled as <strong>slow</strong>. Split + by device, this picture is even bleaker for tablet (82.00%) and phone + (77.61%). In the context of our results, per the + <a + href="https://developers.google.com/speed/docs/insights/v5/about#categories" + >PageSpeed Insights classification system</a + >, a slow website is defined as having 10% of First Contentful Paint (FCP) + experiences taking over 2,500 ms or 5% of First Input Delay (FID) + experiences measuring over 250 ms. + </p> + <p> + <code + ><graph data 10.15b: CruX image similar to + [IMG](https://developers.google.com/web/updates/images/2018/08/crux-dash-fcp.png) + per device + speed label></code + > + </p> + <p> + Although the numbers are bleak for the speed of the web, the good news is + that SEO experts and tools have been focusing more and more on the technical + challenges of speeding up websites. You can learn more about the state of + web performance in the <a href="../performance">Performance chapter</a>. + </p> + <h3 id="structured-data">Structured data</h3> + <p> + Structured data allows website owners to add additional semantic data to + their web pages, by adding + <a href="https://en.wikipedia.org/wiki/JSON-LD">JSON-LD</a> snippets or + <a href="https://developer.mozilla.org/en-US/docs/Web/HTML/Microdata" + >Microdata</a + >, for example. Search engines parse this data to better understand these + pages and sometimes use the markup to display additional relevant + information in the search results. The most commonly found types of + structured data are + <a + href="https://developers.google.com/search/docs/data-types/review-snippet" + >reviews</a + >, + <a href="https://developers.google.com/search/docs/data-types/product" + >products</a + >, + <a + href="https://developers.google.com/search/docs/data-types/local-business" + >businesses</a + >, + <a href="https://developers.google.com/search/docs/data-types/movie" + >movies</a + >, and + <a href="https://developers.google.com/search/docs/guides/search-gallery" + >more</a + >. + </p> + <p> + The + <a href="https://developers.google.com/search/docs/guides/enhance-site" + >extra visibility</a + > + that structured data can provide for websites is interesting for site + owners, given that it can help to create more opportunities for traffic. For + example, the relatively new + <a href="https://developers.google.com/search/docs/data-types/faqpage" + >FAQ schema</a + > + will double the size of your snippet and the real estate of your site in the + SERP. + </p> + <p> + During our research, we found that only 14.67% of sites are eligible for + rich results on mobile. Interestingly, desktop site eligibility is slightly + lower at 12.46%. This suggests that there is a lot more that site owners can + be doing to optimize the way their home pages are appearing in search. + </p> + <p> + Among the sites with structured data markup, the five most prevalent types + are: + </p> + <ol> + <li><code>WebSite</code> (16.02%)</li> + <li><code>SearchAction</code> (14.35%)</li> + <li><code>Organization</code> (12.89%)</li> + <li><code>WebPage</code> (11.58%)</li> + <li><code>ImageObject</code> (5.35%)</li> + </ol> + <p> + Interestingly, one of the most popular data types that triggers a search + engine feature is <code>SearchAction</code>, which powers the + <a + href="https://developers.google.com/search/docs/data-types/sitelinks-searchbox" + >sitelinks searchbox</a + >. + </p> + <p> + The top 5 markup types all lead to more visibility in Google's search + results, which might be the fuel for more widespread adoption of these types + of structured data. + </p> + <p> + Seeing as we only looked at home pages within this analysis, the results + might look very different if we were to consider interior pages, too. + </p> + <p> + Review stars are only found on 1.09% of the web's home pages (via + <a href="https://schema.org/AggregateRating">AggregateRating</a>). + </p> + <p> + Also, the newly introduced + <a href="https://schema.org/QAPage">QAPage</a> appeared only in 48 + instances, and the <a href="https://schema.org/FAQPage">FAQPage</a> at a + slightly higher frequency of 218 times. These last two counts are expected + to increase in the future as we run more crawls and dive deeper into Web + Almanac analysis. + </p> + <h3 id="internationalization">Internationalization</h3> + <p> + Internationalization is one of the most complex aspects of SEO, even + <a href="https://twitter.com/JohnMu/status/965507331369984002" + >according to some Google search employees</a + >. Internationalization in SEO focuses on serving the right content from a + website with multiple language or country versions, and making sure that + content is being targeted towards the specific language and location of the + user. + </p> + <p> + While 38.40% of desktop sites (33.79% on mobile) have the HTML lang + attribute set to English, only 7.43% (6.79% on mobile) of the sites also + contain an <code>hreflang</code> link to another language version. This + suggests that the vast majority of websites that we analyzed don't offer + separate versions of their home page that would require language targeting + -- unless these separate versions do exist, but haven't been configured + correctly. + </p> + <p> + <code + ><graph 10.04b - [do we want to chart this data, e.g. what does it + really mean for SEO?]></code + > + </p> + <p> + <code + ><include a chart of the languages and country combinations found, SEOs + will want to see this breakdown></code + > + </p> + <p> + Next to English, the most common languages are French, Spanish, and German. + These are followed by languages targeted towards specific geographies like + English for Americans (<code>en-us</code>) or more obscure combinations like + Spanish for the Irish (<code>es-ie</code>). + </p> + <p> + The analysis did not check for correct implementation, such as whether or + not the different language versions properly link to each other. However, + from looking at the low adoption of + <a + href="https://www.google.com/url?q=https://support.google.com/webmasters/answer/189077?hl%3Den&sa=D&ust=1570627963630000&usg=AFQjCNFwzwglsbysT9au_I-7ZQkwa-QvrA" + >having an x-default version as is recommended</a + > + (only 3.77% on desktop and 1.30% on mobile), this is an indicator that this + element is complex and not always easy to get right. + </p> + <h3 id="spa-crawlability">SPA crawlability</h3> + <p> + Single-page applications (SPAs) built with frameworks like React and Vue.js + come with their own SEO complexity. Websites using a hash-based navigation, + make it especially hard for search engines to properly crawl and index them. + For example, Google had an "AJAX crawling scheme" workaround that turned out + to be complex for search engines as well as developers, so it was + <a + href="https://webmasters.googleblog.com/2015/10/deprecating-our-ajax-crawling-scheme.html" + >deprecated in 2015</a + >. + </p> + <p> + The number of SPAs that were tested had a relatively low number of links + served via hash URLs, with 13.08% of React mobile pages using hash URLs for + navigation, 8.15% of mobile Vue.js pages using them, and 2.37% of mobile + Angular pages using them. These results were very similar for desktop pages + too. This is positive to see from an SEO perspective, considering the impact + that hash URLs can have on content discovery. + </p> + <p> + The higher number of hash URLs in React pages is surprising, especially in + contrast to the lower number of hash URLs found on Angular pages. Both + frameworks promote the adoption of routing packages where the + <a href="https://developer.mozilla.org/en-US/docs/Web/API/History" + >History API</a + > + is the default for links, instead of relying on hash URLs. Vue.js is + <a href="https://github.com/vuejs/rfcs/pull/40" + >considering moving to using the History API as the default</a + > + as well in version 3 of their <code>vue-router</code> package. + </p> + <h3 id="amp">AMP</h3> + <p> + AMP (formerly known as "Accelerated Mobile Pages") was first introduced in + 2015 by Google as an open source HTML framework. It provides components and + infrastructure for websites to provide a faster experience for users, by + using optimizations such as caching, lazy loading, and optimized images. + Notably, Google adopted this for their search engine, where AMP pages are + also served from their own CDN. This feature later became a standards + proposal under the name + <a + href="https://wicg.github.io/webpackage/draft-yasskin-http-origin-signed-responses.html" + >Signed HTTP Exchanges</a + >. + </p> + <p> + Despite this, only 0.62% of mobile home pages contain a link to an AMP + version. Given the visibility this project has had, this suggests that it + has had a relatively low adoption. However, AMP can be more useful for + serving article pages, so our home page-focused analysis won't reflect + adoption across other page types. + </p> + <h3 id="security">Security</h3> + <p> + A strong online shift in recent years has been for the web to move to HTTPS + by default. HTTPS prevents website traffic from being intercepted on public + WiFi networks, for example, where user input data is then transmitted + unsecurely. Google have been pushing for sites to adopt HTTPS, and even made + <a + href="https://webmasters.googleblog.com/2014/08/https-as-ranking-signal.html" + >HTTPS as a ranking signal</a + >. Chrome also supported the move to secure pages by labelling non-HTTPS + pages as ‘<a + href="https://www.blog.google/products/chrome/milestone-chrome-security-marking-http-not-secure/" + >not secure</a + >' in the browser. + </p> + <p> + For more information and guidance from Google on the importance of HTTPS and + how to adopt it, please see + <a + href="https://developers.google.com/web/fundamentals/security/encrypt-in-transit/why-https" + >Why HTTPS Matters</a + >. + </p> + <p> + We found that 67.06% of websites on desktop are now served over HTTPS. Just + under half of websites still haven't migrated to HTTPS and are serving + non-secure pages to their users. This is a significant number. Migrations + can be hard work, so this could be a reason why the adoption rate isn't + higher, but an HTTPS migration usually require an SSL certificate and a + simple change to the <code>.htaccess</code> file. There's no real reason not + to switch to HTTPS. + </p> + <p> + Google's + <a href="https://transparencyreport.google.com/https/overview" + >HTTPS Transparancy Report</a + > + reports a 90% adoption of HTTPS for the top 100 non-Google domains + (representing 25% of all website traffic worldwide). The difference between + this number and ours could be explained by the fact that relatively smaller + sites are adopting HTTPS at a slower rate. + </p> + <h2 id="conclusion">Conclusion</h2> + <p> + Through our analysis, we observed that the majority of websites are getting + the fundamentals right, in that their home pages are crawlable, indexable, + and include the key content required to rank well in search engines' results + pages. Not every person who owns a website will be aware of SEO at all, let + alone best practice guidelines, so it is promising to see that so many sites + have got the basics covered. + </p> + <p> + However, more sites are missing the mark than expected when it comes to some + of the more advanced aspects of SEO and accessibility. Site speed is one of + these factors that many websites are struggling with, especially on mobile. + This is a significant problem, as speed is one of the biggest contributors + to UX, which is something that can impact rankings. The number of websites + that aren't yet served over HTTPS is also problematic to see, considering + the importance of security and keeping user data safe. + </p> + <p> + There is a lot more that we can all be doing to learn about SEO best + practices and industry developments. This is essential due to the evolving + nature of the search industry and the rate at which changes happen. Search + engines make thousands of improvements to their algorithms each year, and we + need to keep up if we want our websites to reach more visitors in organic + search. + </p> +</section> +{% endblock %} diff --git a/src/templates/en/2019/chapters/third-parties.html b/src/templates/en/2019/chapters/third-parties.html index 744d58fd6bf..708558d3e7d 100644 --- a/src/templates/en/2019/chapters/third-parties.html +++ b/src/templates/en/2019/chapters/third-parties.html @@ -1,122 +1,531 @@ -{# IMPORTANT! +<!--{# IMPORTANT! -- `chapter.html` is a "template for templates" used by the `generate_chapters.py` script, hence the strange template syntax (eg, double braces `{% ... %}`) +- `chapter.html` is a "template for templates" used by the `generate_chapters.js` script, hence the strange template syntax (eg, mixing ejs and jinja syntax) - if you want to modify `chapter.html`, you must also: - translate the corresponding language-specific templates (eg `src/templates/<lang>/<year>/chapter.html`) - run the generation script to update each chapter template - if you want to modify the chapter templates (eg `src/templates/<lang>/<year>/chapters/<chapter>.html`): - make changes to the markdown content directly (`src/content/<lang>/<year>/<chapter>.md`) because any changes to the chapter templates will be overwritten by the generation script -#} +#}--> -{% extends "en/2019/base_chapter.html" %} +{% extends "en/2019/base_chapter.html" %} {% block styles %} {{ super() }} +<link rel="stylesheet" href="/static/css/chapter.css" /> +{% endblock %} {% set metadata = +{"part_number":"II","chapter_number":5,"title":"Third +Parties","authors":["patrickhulce"],"reviewers":["simonhearne"," flowlabs"," +jasti"," zeman"]} %} {% block main %} +<aside> + <ul> + <li> + <a href="#chapter-5-third-parties">Chapter 5: Third Parties</a> -{% block styles %} -{{ super() }} -<link rel="stylesheet" href="/static/css/chapter.css"> -{% endblock %} + <ul> + <li> + <a href="#introduction">Introduction</a> + </li> + + <li> + <a href="#definitions">Definitions</a> + + <ul> + <li> + <a href="#“third-party”">“Third Party”</a> + </li> + + <li> + <a href="#provider-categories">Provider Categories</a> + </li> + </ul> + </li> + + <li> + <a href="#data">Data</a> + + <ul> + <li> + <a href="#categories">Categories</a> + </li> + + <li> + <a href="#providers">Providers</a> + </li> + + <li> + <a href="#resource-types">Resource Types</a> + </li> + + <li> + <a href="#request-count">Request Count</a> + </li> + + <li> + <a href="#byte-weight">Byte Weight</a> + </li> -{% set metadata = {'part_number': 'II', 'chapter_number': 5, 'title': 'Third Parties', 'authors': ['patrickhulce'], 'reviewers': ['simonhearne', 'flowlabs', 'jasti', 'zeman']} %} + <li> + <a href="#script-execution">Script Execution</a> + </li> + </ul> + </li> + + <li> + <a href="#analysis">Analysis</a> + + <ul> + <li> + <a href="#usage-patterns">Usage Patterns</a> + </li> + + <li> + <a href="#generate-and-consume-data">Generate and Consume Data</a> + </li> + + <li> + <a href="#monetize-web-traffic">Monetize Web Traffic</a> + </li> + + <li> + <a href="#simplify-development">Simplify Development</a> + </li> + </ul> + </li> + + <li> + <a href="#repercussions">Repercussions</a> + + <ul> + <li> + <a href="#performance">Performance</a> + </li> + + <li> + <a href="#privacy">Privacy</a> + </li> + </ul> + </li> + + <li> + <a href="#caveats">Caveats</a> + </li> + </ul> + </li> + </ul> +</aside> -{% block main %} <section class="main"> <h1 class="chapter-title">{{ metadata.get('title') }}</h1> - <img src="/static/images/2019/{{ get_chapter_image_dir(metadata) }}/hero_lg.jpg" class="chapter-hero"> - <h1>Chapter 5: Third Parties</h1> -<h2>Introduction</h2> -<p>The open web is vast, linkable, and interoperable by design. The ability to grab someone else’s complex library and use it on your site with a single <code><link></code> or <code><script></code> element has supercharged developers’ productivity and enabled awesome new web experiences; on the flip side, the immense popularity of a select few third-party providers raises important performance and privacy concerns. This chapter examines the prevalence and impact of third-party code on the web in 2019, the web usage patterns that lead to the popularity of third-party solutions, and potential repercussions for the future of web performance and privacy.</p> -<h2>Definitions</h2> -<h3>“Third Party”</h3> -<p>A third party is an entity outside the primary site-user relationship, i.e. the aspects of the site not directly within the control of the site owner but present with their approval. For example, the Google Analytics script is an example of a common third-party resource.</p> -<p>Third-party resources are...</p> -<ul> -<li>Hosted on a shared and public origin</li> -<li>Widely used by a variety of sites</li> -<li>Uninfluenced by an individual site owner</li> -</ul> -<p>To match these goals as closely as possible, the formal definition used throughout this chapter of a third-party resource is a resource that originates from a domain whose resources can be found on at least 50 unique pages in the HTTPArchive dataset.</p> -<h3>Provider Categories</h3> -<p>This chapter divides third-party providers into one of these broad categories. A brief description is included below and the mapping of domain to category can be found in the <a href="https://github.com/patrickhulce/third-party-web/blob/8afa2d8cadddec8f0db39e7d715c07e85fb0f8ec/data/entities.json5">third-party-web repository</a>.</p> -<ul> -<li><strong>Ad</strong> - display and measurement of advertisements</li> -<li><strong>Analytics</strong> - tracking site visitor behavior</li> -<li><strong>CDN</strong> - providers that host public shared utilities or private content of their users</li> -<li><strong>Content</strong> - providers that facilitate publishers and host syndicated content</li> -<li><strong>Customer Success</strong> - support and customer relationship management functionality</li> -<li><strong>Hosting</strong> - providers that host the arbitrary content of their users</li> -<li><strong>Marketing</strong> - sales, lead generation, and email marketing functionality</li> -<li><strong>Social</strong> - social networks and their affiliated integrations</li> -<li><strong>Tag Manager</strong> - provider whose sole role is to manage the inclusion of other third parties</li> -<li><strong>Utility</strong> - code that aids the development objectives of the site owner</li> -<li><strong>Video</strong> - providers that host the arbitrary video content of their users</li> -<li><strong>Other</strong> - uncategorized or non-conforming activity</li> -</ul> -<h2>Data</h2> -<p>Third-party code is everywhere. 93% of pages include at least one third-party resource, 76% of pages issue a request to an analytics domain, the median page requests content from at least 9 <em>unique</em> third-party domains that represent 35% of their total network activity, and the most active 10% of pages issue a whopping 175 third-party requests or more. It’s not a stretch to say that third parties form the bedrock of the web.</p> -<p><insert stylized value of metric 05_01></p> -<p><insert stylized value of metric 05_02></p> -<h3>Categories</h3> -<p>If the ubiquity of third-party content is unsurprising, perhaps more interesting is the breakdown of third-party content by provider type.</p> -<p>While advertising might be the most user-visible example of third-party presence on the web, analytics providers are the most common third-party category with 76% of sites including at least one analytics request. CDNs at 63%, ads at 57%, and developer utilities like Sentry, Stripe, and Google Maps SDK at 56% follow up as a close second, third, and fourth for appearing on the most web properties. The popularity of these categories form the foundation of our web usage patterns identified later in the chapter.</p> -<p><insert graphic of metric 05_11></p> -<h3>Providers</h3> -<p>A relatively small set of providers dominate the third-party landscape, the top 100 domains account for 30% of network requests across the web. Powerhouses like Google, Facebook, and YouTube make the headlines here with full percentage points of share each, but smaller entities like Wix and Shopify command a substantial portion of third-party popularity as well.</p> -<p>While much could be said about every individual provider’s popularity and performance impact, this more opinionated analysis is left as an exercise for the reader and other purpose-built tools such as <a href="https://thirdpartyweb.today">third-party-web</a>.</p> -<p><insert table of metric 05_06></p> -<p><insert table of metric 05_09></p> -<h3>Resource Types</h3> -<p>The resource type breakdown of third-party content also lends insight into how third-party code is used across the web. While first-party requests are 56% images, 23% script, 14% CSS, and only 4% HTML, third-party requests skew more heavily toward script and HTML at 32% script, 34% images, 12% HTML, and only 6% CSS. While this suggests that third-party code is less frequently used to aid the design and instead used more frequently to facilitate or observe interactions than first-party code, a breakdown of resource types by party status tells a more nuanced story. While CSS and images are dominantly first-party at 70% and 64% respectively, fonts are largely served by third-party providers with only 28% being served from first-party sources. This concept of usage patterns is explored in more depth later in this chapter.</p> -<p><insert graphic of metric 05_03></p> -<p>Several other amusing factoids jump out from this data. Tracking pixels (image requests to analytics domains) make up 1.6% of all network requests, six times as many video requests are to social networks like Facebook and Twitter than dedicated video providers like YouTube and Vimeo (presumably because the default YouTube embed consists of HTML and a preview thumbnail but not an autoplaying video), and there are still more requests for first-party images than all scripts combined.</p> -<h3>Request Count</h3> -<p>49% of all requests are third-party. At 51%, first-party can still narrowly hold on to the crown in 2019 of comprising the majority of the web resources. Given that half of all the requests are third-party yet a small set of pages do not include any at all, the most active third-party users must be doing quite a bit more than their fair share. Indeed, at the 75th, 90th, and 99th percentiles we see nearly all of the page being comprised of third-party content. In fact, for some sites heavily relying on distributed WYSIWYG platforms like Wix and SquareSpace, the root document might be the sole first-party request!</p> -<p><insert graphic of metric 05_11></p> -<p>The number of requests issued by each third-party provider also varies considerably by category. While analytics are the most widespread third-party category across websites, they account for only 7% of all third-party network requests. Ads, on the other hand, are found on nearly 20% fewer sites yet make up 25% of all third-party network requests. Their outsized resource impact compared to their popularity will be a theme we continue to uncover in the remaining data.</p> -<h3>Byte Weight</h3> -<p>While 49% of requests are third-party, their share of the web in terms of bytes is quite a bit lower at only 28%. The same goes for the breakdown by multiple resource types. Third-party fonts make up 72% of all fonts, but they’re only 53% of font bytes; 74% of HTML requests, but only 39% of HTML bytes; 68% of video requests, but only 31% of video bytes. All this seems to suggest third-party providers are responsible stewards who keep their response sizes low, and, for the most part, that is in fact the case until you look at scripts.</p> -<p>Despite serving 57% of scripts, third parties comprise 64% of script bytes. meaning their scripts are larger on average than first-party scripts. This is an early warning sign for their performance impact to come in the next few sections.</p> -<p><insert graphic of metric 05_04></p> -<p><insert graphic of metric 05_12></p> -<p>As for specific third-party providers, the same juggernauts topping the request count leaderboards make their appearance in byte weight as well. The only few notable movements are the large, image-heavy providers such as YouTube, Shopify, and Twitter which climb to the top of the byte impact charts.</p> -<p><insert table of metric 05_07></p> -<h3>Script Execution</h3> -<p>57% of script execution time is from third-party scripts, and the top 100 domains already account for 48% of all script execution time on the web. This underscores just how large an impact a select few entities really have on web performance. This topic is explored more in depth in the <a href="#performance">Repercussions > Performance</a> section.</p> -<p><insert graphic of metric 05_05></p> -<p><insert graphic of metric 05_13></p> -<p>The category breakdowns among script execution largely follow that of resource counts. Here too advertising looms largest. Ad scripts comprise 25% of third-party script execution time with hosting and social providers in a distant tie for second at 12%.</p> -<p><insert table of metric 05_08></p> -<p><insert table of metric 05_10></p> -<p>While much could be said about every individual provider’s popularity and performance impact, this more opinionated analysis is left as an exercise for the reader and other purpose-built tools such as <a href="https://thirdpartyweb.today">third-party-web</a>.</p> -<h2>Analysis</h2> -<h3>Usage Patterns</h3> -<p>Why do site owners use third-party code? How did third-party content grow to be nearly half of all network requests? What is all of this code doing? Answers to these questions lie in the three primary usage patterns of third-party resources. Broadly, site owners reach for third parties to generate and consume data from their users, monetize their site experiences, and simplify web development.</p> -<h3>Generate and Consume Data</h3> -<p>Analytics is the most popular third-party category found across the web and yet is minimally user-visible. Consider the volume of information at play in the lifetime of a web visit; there’s user context, device, browser, connection quality, location, page interactions, session length, return visitor status, and more being generated continuously. It’s difficult, cumbersome, and expensive to maintain tools that warehouse, normalize, and analyze time series data of this magnitude. While nothing categorically necessitates that analytics fall into the domain of third-party providers, the widespread attractiveness of understanding your users, deep complexity of the problem space, and increasing emphasis on managing data respectfully and responsibly naturally surfaces analytics as a popular third-party usage pattern.</p> -<p>There’s also a flip side to user data though: consumption. While analytics is about generating data from your site’s visitors, other third-party resources focus on consuming data about your visitors that is known only by others. Social providers fall squarely into this usage pattern. A site owner <em>must</em> use Facebook resources if they wish to integrate information from a visitor’s Facebook profile into their site. As long as site owners are interested in personalizing their experience with widgets from social networks and leveraging the social networks of their visitors to increase their reach, social integrations are likely to remain the domain of third-party entities for the foreseeable future.</p> -<h3>Monetize</h3> -<p>The open model of the web does not always serve the financial interests of content creators to their liking and many site owners resort to monetizing their sites with advertising. Because building direct relationships with advertisers and negotiating pricing contracts is a relatively difficult and time-consuming process, this concern is largely handled by third-party providers performing targeted advertising and real-time bidding. Widespread negative public opinion, the popularity of ad blocking technology, and regulatory action in major global markets such as Europe pose the largest threat to the continued use of third-party providers for monetization. While it’s unlikely that site owners suddenly strike their own advertising deals or build bespoke ad networks, alternative monetization models like paywalls and experiments like Brave’s attention token have a real chance of shaking up the third-party ad landscape of the future.</p> -<h3>Simplify Development</h3> -<p>Above all, third-party resources are used to simplify the web development experience. Even previous usage patterns could arguably fall into this pattern as well. Whether analyzing user behavior, communicating with advertisers, or personalizing the user experience, third-party resources are used to make first-party development easier.</p> -<p>Hosting providers are the most extreme example of this pattern. Some of these providers even enable anyone on Earth to become a site owner with no technical expertise necessary. They provide hosting of assets, tools to build sites without coding experience, and domain registration services.</p> -<p>The remainder of third-party providers also tend to fall into this usage pattern. Whether it’s hosting of a utility library such as jQuery for usage by front-end developers cached on Cloudflare’s edge servers or a vast library of common fonts served from a popular Google CDN, third-party content is another way to give the site owner one fewer thing to worry about and, maybe, just maybe make the job of delivering a great experience a little bit easier.</p> -<h2>Repercussions</h2> -<h3>Performance</h3> -<p>The performance impact of third-party content is neither categorically good or bad. There are good and bad actors across the spectrum and different category types have varying levels of influence.</p> -<p>The good: shared third-party font and stylesheet utilities are, on average, delivered more efficiently than their first-party counterparts.</p> -<p>Utilities, CDNs, and Content categories are the brightest spots on the third-party performance landscape. They offer optimized versions of the same sort of content that would otherwise be served from first-party sources. Google Fonts and Typekit serve optimized fonts that are smaller on average than first-party fonts, Cloudflare CDN serves a minified version of open source libraries that might be accidentally served in development mode by some site owners, Google Maps SDK efficiently delivers complex maps that might otherwise be naively shipped as large images.</p> -<p>The bad: a very small set of entities represent a very large chunk of JavaScript execution time carrying out narrow set of functionality on pages.</p> -<p>Ads, social, hosting, and certain analytics providers represent the largest negative impact on web performance. While hosting providers deliver a majority of a site’s content and will understandably have a larger performance impact than other third-party categories, they also serve almost entirely static sites that demand very little JavaScript in most cases that should not justify the volume of script execution time. The other categories hurting performance though have even less of an excuse. They fill very narrow roles on each page they appear on and yet quickly take over a majority of resources. For example, the Facebook "Like" button and associated social widgets take up extraordinarily little screen real estate and are a fraction of most web experiences, and yet the median impact on pages with social third parties is nearly 20% of their total JavaScript execution time; similarily for analytics, these libraries do not directly contribute to the perceived user experience, and yet the 90th percentile impact on pages with analytics third parties is 44% of their total JavaScript execution time.</p> -<p>The silver lining of such a small number of entities enjoying such large market share is that a very limited and concentrated effort can have an enormous impact on the web as a whole. Performance improvements at just the top few hosting providers improve 2-3% of <em>all</em> web requests.</p> -<h3>Privacy</h3> -<p>The abundance of analytics providers and top heavy concentration of script execution raises two primary privacy concerns for site visitors: site owners are more interested in tracking their users than any other third-party use case and a handful of companies receive information on a large swath of web traffic.</p> -<p>The interest of site owners in understanding and analyzing user behavior is not malicious on its own, but the widespread and relatively behind-the-scenes nature of web analytics raises valid concerns, and users, companies, and lawmakers have taken notice in recent years with privacy regulation such as GDPR in Europe and the CCPA in California. Ensuring that developers handle user data responsibly, treat the user respectfully, and are transparent with what data is collected is key to keeping analytics the most popular third-party category and maintaining the symbiotic nature of analyzing user behavior to deliver future user value.</p> -<p>The top heavy concentration of script execution is great for the potential impact of performance improvements, but less exciting for the privacy ramifications. 29% of <em>all</em> script execution time across the web is just from scripts on domains owned by Google or Facebook. That’s a very large percentage of CPU time that is controlled by just two entities. It’s critical to ensure that the same privacy protections held to analytics providers be applied in these other ad, social, and developer utility categories as well.</p> -<h2>Caveats</h2> -<ul> -<li>All data presented here is based on a non-interactive, cold load. These values could start to look quite different after user interaction.</li> -<li>Third-party content served from a first-party domain is counted as first-party content. i.e. self-hosting Google Fonts or bootstrap.css will be considered first-party content.</li> -<li>First-party content served from a third-party domain is counted as third-party content. i.e. first-party images served over a third-party CDN will be considered third-party content.</li> -<li>Roughly 84% of all third-party domains by request volume have been identified and categorized. The remaining 16% fall into the “Other” category.</li> -</ul> - + <img + src="/static/images/2019/{{ get_chapter_image_dir(metadata) }}/hero_lg.jpg" + class="chapter-hero" + /> + <h1 id="chapter-5-third-parties">Chapter 5: Third Parties</h1> + <h2 id="introduction">Introduction</h2> + <p> + The open web is vast, linkable, and interoperable by design. The ability to + grab someone else’s complex library and use it on your site with a single + <code><link></code> or <code><script></code> element has + supercharged developers’ productivity and enabled awesome new web + experiences. On the flip side, the immense popularity of a select few + third-party providers raises important performance and privacy concerns. + This chapter examines the prevalence and impact of third-party code on the + web in 2019, the web usage patterns that lead to the popularity of + third-party solutions, and potential repercussions for the future of web + performance and privacy. + </p> + <h2 id="definitions">Definitions</h2> + <h3 id="“third-party”">“Third Party”</h3> + <p> + A third party is an entity outside the primary site-user relationship, i.e. + the aspects of the site not directly within the control of the site owner + but present with their approval. For example, the Google Analytics script is + an example of a common third-party resource. + </p> + <p>Third-party resources are…</p> + <ul> + <li>Hosted on a shared and public origin</li> + <li>Widely used by a variety of sites</li> + <li>Uninfluenced by an individual site owner</li> + </ul> + <p> + To match these goals as closely as possible, the formal definition used + throughout this chapter of a third-party resource is a resource that + originates from a domain whose resources can be found on at least 50 unique + pages in the HTTPArchive dataset. + </p> + <h3 id="provider-categories">Provider Categories</h3> + <p> + This chapter divides third-party providers into one of these broad + categories. A brief description is included below and the mapping of domain + to category can be found in the + <a + href="https://github.com/patrickhulce/third-party-web/blob/8afa2d8cadddec8f0db39e7d715c07e85fb0f8ec/data/entities.json5" + >third-party-web repository</a + >. + </p> + <ul> + <li><strong>Ad</strong> - display and measurement of advertisements</li> + <li><strong>Analytics</strong> - tracking site visitor behavior</li> + <li> + <strong>CDN</strong> - providers that host public shared utilities or + private content of their users + </li> + <li> + <strong>Content</strong> - providers that facilitate publishers and host + syndicated content + </li> + <li> + <strong>Customer Success</strong> - support and customer relationship + management functionality + </li> + <li> + <strong>Hosting</strong> - providers that host the arbitrary content of + their users + </li> + <li> + <strong>Marketing</strong> - sales, lead generation, and email marketing + functionality + </li> + <li> + <strong>Social</strong> - social networks and their affiliated + integrations + </li> + <li> + <strong>Tag Manager</strong> - provider whose sole role is to manage the + inclusion of other third parties + </li> + <li> + <strong>Utility</strong> - code that aids the development objectives of + the site owner + </li> + <li> + <strong>Video</strong> - providers that host the arbitrary video content + of their users + </li> + <li><strong>Other</strong> - uncategorized or non-conforming activity</li> + </ul> + <h2 id="data">Data</h2> + <p> + Third-party code is everywhere. 93% of pages include at least one + third-party resource, 76% of pages issue a request to an analytics domain, + the median page requests content from at least 9 <em>unique</em> third-party + domains that represent 35% of their total network activity, and the most + active 10% of pages issue a whopping 175 third-party requests or more. It’s + not a stretch to say that third parties are an integral part of the web. + </p> + <p><code><insert stylized value of metric 05_01></code></p> + <p><code><insert stylized value of metric 05_02></code></p> + <h3 id="categories">Categories</h3> + <p> + If the ubiquity of third-party content is unsurprising, perhaps more + interesting is the breakdown of third-party content by provider type. + </p> + <p> + While advertising might be the most user-visible example of third-party + presence on the web, analytics providers are the most common third-party + category with 76% of sites including at least one analytics request. CDNs at + 63%, ads at 57%, and developer utilities like Sentry, Stripe, and Google + Maps SDK at 56% follow up as a close second, third, and fourth for appearing + on the most web properties. The popularity of these categories forms the + foundation of our web usage patterns identified later in the chapter. + </p> + <p><code><insert graphic of metric 05_11></code></p> + <h3 id="providers">Providers</h3> + <p> + A relatively small set of providers dominate the third-party landscape, the + top 100 domains account for 30% of network requests across the web. + Powerhouses like Google, Facebook, and YouTube make the headlines here with + full percentage points of share each, but smaller entities like Wix and + Shopify command a substantial portion of third-party popularity as well. + </p> + <p> + While much could be said about every individual provider’s popularity and + performance impact, this more opinionated analysis is left as an exercise + for the reader and other purpose-built tools such as + <a href="https://thirdpartyweb.today">third-party-web</a>. + </p> + <p><code><insert table of metric 05_06></code></p> + <p><code><insert table of metric 05_09></code></p> + <h3 id="resource-types">Resource Types</h3> + <p> + The resource type breakdown of third-party content also lends insight into + how third-party code is used across the web. While first-party requests are + 56% images, 23% script, 14% CSS, and only 4% HTML, third-party requests skew + more heavily toward script and HTML at 32% script, 34% images, 12% HTML, and + only 6% CSS. While this suggests that third-party code is less frequently + used to aid the design and instead used more frequently to facilitate or + observe interactions than first-party code, a breakdown of resource types by + party status tells a more nuanced story. While CSS and images are dominantly + first-party at 70% and 64% respectively, fonts are largely served by + third-party providers with only 28% being served from first-party sources. + This concept of usage patterns is explored in more depth later in this + chapter. + </p> + <p><code><insert graphic of metric 05_03></code></p> + <p> + Several other amusing factoids jump out from this data. Tracking pixels + (image requests to analytics domains) make up 1.6% of all network requests, + six times as many video requests are to social networks like Facebook and + Twitter than dedicated video providers like YouTube and Vimeo (presumably + because the default YouTube embed consists of HTML and a preview thumbnail + but not an autoplaying video), and there are still more requests for + first-party images than all scripts combined. + </p> + <h3 id="request-count">Request Count</h3> + <p> + 49% of all requests are third-party. At 51%, first-party can still narrowly + hold on to the crown in 2019 of comprising the majority of the web + resources. Given that just under half of all the requests are third-party + yet a small set of pages do not include any at all, the most active + third-party users must be doing quite a bit more than their fair share. + Indeed, at the 75th, 90th, and 99th percentiles we see nearly all of the + page being comprised of third-party content. In fact, for some sites heavily + relying on distributed WYSIWYG platforms like Wix and SquareSpace, the root + document might be the sole first-party request! + </p> + <p><code><insert graphic of metric 05_11></code></p> + <p> + The number of requests issued by each third-party provider also varies + considerably by category. While analytics are the most widespread + third-party category across websites, they account for only 7% of all + third-party network requests. Ads, on the other hand, are found on nearly + 20% fewer sites yet make up 25% of all third-party network requests. Their + outsized resource impact compared to their popularity will be a theme we + continue to uncover in the remaining data. + </p> + <h3 id="byte-weight">Byte Weight</h3> + <p> + While 49% of requests are third-party, their share of the web in terms of + bytes is quite a bit lower at only 28%. The same goes for the breakdown by + multiple resource types. Third-party fonts make up 72% of all fonts, but + they’re only 53% of font bytes; 74% of HTML requests, but only 39% of HTML + bytes; 68% of video requests, but only 31% of video bytes. All this seems to + suggest third-party providers are responsible stewards who keep their + response sizes low, and, for the most part, that is in fact the case until + you look at scripts. + </p> + <p> + Despite serving 57% of scripts, third parties comprise 64% of script bytes. + meaning their scripts are larger on average than first-party scripts. This + is an early warning sign for their performance impact to come in the next + few sections. + </p> + <p><code><insert graphic of metric 05_04></code></p> + <p><code><insert graphic of metric 05_12></code></p> + <p> + As for specific third-party providers, the same juggernauts topping the + request count leaderboards make their appearance in byte weight as well. The + only few notable movements are the large, media-heavy providers such as + YouTube, Shopify, and Twitter which climb to the top of the byte impact + charts. + </p> + <p><code><insert table of metric 05_07></code></p> + <h3 id="script-execution">Script Execution</h3> + <p> + 57% of script execution time is from third-party scripts, and the top 100 + domains already account for 48% of all script execution time on the web. + This underscores just how large an impact a select few entities really have + on web performance. This topic is explored more in depth in the + <a href="#performance">Repercussions > Performance</a> section. + </p> + <p><code><insert graphic of metric 05_05></code></p> + <p><code><insert graphic of metric 05_13></code></p> + <p> + The category breakdowns among script execution largely follow that of + resource counts. Here too advertising looms largest. Ad scripts comprise 25% + of third-party script execution time with hosting and social providers in a + distant tie for second at 12%. + </p> + <p><code><insert table of metric 05_08></code></p> + <p><code><insert table of metric 05_10></code></p> + <p> + While much could be said about every individual provider’s popularity and + performance impact, this more opinionated analysis is left as an exercise + for the reader and other purpose-built tools such as the previously + mentioned <a href="https://thirdpartyweb.today">third-party-web</a>. + </p> + <h2 id="analysis">Analysis</h2> + <h3 id="usage-patterns">Usage Patterns</h3> + <p> + Why do site owners use third-party code? How did third-party content grow to + be nearly half of all network requests? What are all these requests doing? + Answers to these questions lie in the three primary usage patterns of + third-party resources. Broadly, site owners reach for third parties to + generate and consume data from their users, monetize their site experiences, + and simplify web development. + </p> + <h3 id="generate-and-consume-data">Generate and Consume Data</h3> + <p> + Analytics is the most popular third-party category found across the web and + yet is minimally user-visible. Consider the volume of information at play in + the lifetime of a web visit; there’s user context, device, browser, + connection quality, location, page interactions, session length, return + visitor status, and more being generated continuously. It’s difficult, + cumbersome, and expensive to maintain tools that warehouse, normalize, and + analyze time series data of this magnitude. While nothing categorically + necessitates that analytics fall into the domain of third-party providers, + the widespread attractiveness of understanding your users, deep complexity + of the problem space, and increasing emphasis on managing data respectfully + and responsibly naturally surfaces analytics as a popular third-party usage + pattern. + </p> + <p> + There’s also a flip side to user data though: consumption. While analytics + is about generating data from your site’s visitors, other third-party + resources focus on consuming data about your visitors that is known only by + others. Social providers fall squarely into this usage pattern. A site owner + <em>must</em> use Facebook resources if they wish to integrate information + from a visitor’s Facebook profile into their site. As long as site owners + are interested in personalizing their experience with widgets from social + networks and leveraging the social networks of their visitors to increase + their reach, social integrations are likely to remain the domain of + third-party entities for the foreseeable future. + </p> + <h3 id="monetize-web-traffic">Monetize Web Traffic</h3> + <p> + The open model of the web does not always serve the financial interests of + content creators to their liking and many site owners resort to monetizing + their sites with advertising. Because building direct relationships with + advertisers and negotiating pricing contracts is a relatively difficult and + time-consuming process, this concern is largely handled by third-party + providers performing targeted advertising and real-time bidding. Widespread + negative public opinion, the popularity of ad blocking technology, and + regulatory action in major global markets such as Europe pose the largest + threat to the continued use of third-party providers for monetization. While + it’s unlikely that site owners suddenly strike their own advertising deals + or build bespoke ad networks, alternative monetization models like paywalls + and experiments like Brave’s + <a href="https://basicattentiontoken.org/">Basic Attention Token</a> have a + real chance of shaking up the third-party ad landscape of the future. + </p> + <h3 id="simplify-development">Simplify Development</h3> + <p> + Above all, third-party resources are used to simplify the web development + experience. Even previous usage patterns could arguably fall into this + pattern as well. Whether analyzing user behavior, communicating with + advertisers, or personalizing the user experience, third-party resources are + used to make first-party development easier. + </p> + <p> + Hosting providers are the most extreme example of this pattern. Some of + these providers even enable anyone on Earth to become a site owner with no + technical expertise necessary. They provide hosting of assets, tools to + build sites without coding experience, and domain registration services. + </p> + <p> + The remainder of third-party providers also tend to fall into this usage + pattern. Whether it’s hosting of a utility library such as jQuery for usage + by front-end developers cached on Cloudflare’s edge servers or a vast + library of common fonts served from a popular Google CDN, third-party + content is another way to give the site owner one fewer thing to worry about + and, maybe, just maybe make the job of delivering a great experience a + little bit easier. + </p> + <h2 id="repercussions">Repercussions</h2> + <h3 id="performance">Performance</h3> + <p> + The performance impact of third-party content is neither categorically good + nor bad. There are good and bad actors across the spectrum and different + category types have varying levels of influence. + </p> + <p> + The good: shared third-party font and stylesheet utilities are, on average, + delivered more efficiently than their first-party counterparts. + </p> + <p> + Utilities, CDNs, and Content categories are the brightest spots on the + third-party performance landscape. They offer optimized versions of the same + sort of content that would otherwise be served from first-party sources. + Google Fonts and Typekit serve optimized fonts that are smaller on average + than first-party fonts, Cloudflare CDN serves a minified version of open + source libraries that might be accidentally served in development mode by + some site owners, Google Maps SDK efficiently delivers complex maps that + might otherwise be naively shipped as large images. + </p> + <p> + The bad: a very small set of entities represent a very large chunk of + JavaScript execution time carrying out narrow set of functionality on pages. + </p> + <p> + Ads, social, hosting, and certain analytics providers represent the largest + negative impact on web performance. While hosting providers deliver a + majority of a site’s content and will understandably have a larger + performance impact than other third-party categories, they also serve almost + entirely static sites that demand very little JavaScript in most cases that + should not justify the volume of script execution time. The other categories + hurting performance though have even less of an excuse. They fill very + narrow roles on each page they appear on and yet quickly take over a + majority of resources. For example, the Facebook "Like" button and + associated social widgets take up extraordinarily little screen real estate + and are a fraction of most web experiences, and yet the median impact on + pages with social third parties is nearly 20% of their total JavaScript + execution time. The situation is similar for analytics - tracking libraries + do not directly contribute to the perceived user experience, and yet the + 90th percentile impact on pages with analytics third parties is 44% of their + total JavaScript execution time. + </p> + <p> + The silver lining of such a small number of entities enjoying such large + market share is that a very limited and concentrated effort can have an + enormous impact on the web as a whole. Performance improvements at just the + top few hosting providers can improve 2-3% of <em>all</em> web requests. + </p> + <h3 id="privacy">Privacy</h3> + <p> + The abundance of analytics providers and top-heavy concentration of script + execution raises two primary privacy concerns for site visitors: the largest + use case of third-parties is for site owners to track their users and a + handful of companies receive information on a large swath of web traffic. + </p> + <p> + The interest of site owners in understanding and analyzing user behavior is + not malicious on its own, but the widespread and relatively + behind-the-scenes nature of web analytics raises valid concerns, and users, + companies, and lawmakers have taken notice in recent years with privacy + regulation such as + <a href="https://en.wikipedia.org/wiki/General_Data_Protection_Regulation" + >GDPR</a + > + in Europe and the + <a href="https://en.wikipedia.org/wiki/California_Consumer_Privacy_Act" + >CCPA</a + > + in California. Ensuring that developers handle user data responsibly, treat + the user respectfully, and are transparent with what data is collected is + key to keeping analytics the most popular third-party category and + maintaining the symbiotic nature of analyzing user behavior to deliver + future user value. + </p> + <p> + The top-heavy concentration of script execution is great for the potential + impact of performance improvements, but less exciting for the privacy + ramifications. 29% of <em>all</em> script execution time across the web is + just from scripts on domains owned by Google or Facebook. That’s a very + large percentage of CPU time that is controlled by just two entities. It’s + critical to ensure that the same privacy protections held to analytics + providers be applied in these other ad, social, and developer utility + categories as well. + </p> + <h2 id="caveats">Caveats</h2> + <ul> + <li> + All data presented here is based on a non-interactive, cold load. These + values could start to look quite different after user interaction. + </li> + <li> + Third-party content served from a first-party domain is counted as + first-party content. i.e. self-hosting Google Fonts or bootstrap.css will + be considered first-party content. + </li> + <li> + First-party content served from a third-party domain is counted as + third-party content. i.e. first-party images served over a third-party CDN + will be considered third-party content. + </li> + <li> + Roughly 84% of all third-party domains by request volume have been + identified and categorized. The remaining 16% fall into the “Other” + category. + </li> + </ul> </section> {% endblock %} From 0b2a54bc604da8884799b6551bf6dfa9d898b678 Mon Sep 17 00:00:00 2001 From: Mike Geyser <mikegeyser@gmail.com> Date: Tue, 29 Oct 2019 20:49:14 +0200 Subject: [PATCH 14/15] Moving the extends to the top of the template. --- src/templates/en/2019/chapter.html | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/templates/en/2019/chapter.html b/src/templates/en/2019/chapter.html index b1dd76c04d9..cabc2fd0b8f 100644 --- a/src/templates/en/2019/chapter.html +++ b/src/templates/en/2019/chapter.html @@ -1,3 +1,5 @@ +{% extends "en/2019/base_chapter.html" %} + <!--{# IMPORTANT! - `chapter.html` is a "template for templates" used by the `generate_chapters.js` script, hence the strange template syntax (eg, mixing ejs and jinja syntax) @@ -8,8 +10,6 @@ - make changes to the markdown content directly (`src/content/<lang>/<year>/<chapter>.md`) because any changes to the chapter templates will be overwritten by the generation script #}--> -{% extends "en/2019/base_chapter.html" %} - {% set metadata = <%- JSON.stringify(metadata) %> %} {% block main %} From 23747d5892c6a2db1f43532757fff073efa7dcc4 Mon Sep 17 00:00:00 2001 From: Mike Geyser <mikegeyser@gmail.com> Date: Tue, 29 Oct 2019 20:50:59 +0200 Subject: [PATCH 15/15] Regenerated the chapters. --- src/templates/en/2019/chapters/http2.html | 2475 +++++++++-------- src/templates/en/2019/chapters/markup.html | 1670 +++++------ .../en/2019/chapters/performance.html | 1248 +++++---- src/templates/en/2019/chapters/pwa.html | 949 ++++--- src/templates/en/2019/chapters/seo.html | 1621 ++++++----- .../en/2019/chapters/third-parties.html | 1040 +++---- 6 files changed, 4758 insertions(+), 4245 deletions(-) diff --git a/src/templates/en/2019/chapters/http2.html b/src/templates/en/2019/chapters/http2.html index 8cc58b7e4d0..c6c2fa7e285 100644 --- a/src/templates/en/2019/chapters/http2.html +++ b/src/templates/en/2019/chapters/http2.html @@ -1,3 +1,5 @@ +{% extends "en/2019/base_chapter.html" %} + <!--{# IMPORTANT! - `chapter.html` is a "template for templates" used by the `generate_chapters.js` script, hence the strange template syntax (eg, mixing ejs and jinja syntax) @@ -8,1202 +10,1299 @@ - make changes to the markdown content directly (`src/content/<lang>/<year>/<chapter>.md`) because any changes to the chapter templates will be overwritten by the generation script #}--> -{% extends "en/2019/base_chapter.html" %} {% block styles %} {{ super() }} -<link rel="stylesheet" href="/static/css/chapter.css" /> -{% endblock %} {% set metadata = +{% set metadata = {"part_number":"IV","chapter_number":20,"title":"HTTP/2","authors":["tunetheweb"],"reviewers":["bagder"," rmarx"," dotjs"]} %} {% block main %} -<aside> - <ul> - <li> - <a href="#introduction">Introduction</a> - </li> +<article id="chapter" class="main"> + <nav class="index"> + <div class="index-box floating-card"> + <h2 class="header">Index</h2> + <h2 class="header-mobile">Index</h2> + <ul> + <li> + <a href="#introduction">Introduction</a> + </li> + + <li> + <a href="#what-is-http2">What is HTTP/2?</a> + </li> + + <li> + <a href="#adoption-of-http2">Adoption of HTTP/2</a> + </li> + + <li> + <a href="#impact-of-http2">Impact of HTTP/2</a> + </li> + + <li> + <a href="#http2-push">HTTP/2 Push</a> + </li> + + <li> + <a href="#issues">Issues</a> + </li> - <li> - <a href="#what-is-http2">What is HTTP/2?</a> - </li> + <li> + <a href="#http3">HTTP/3</a> + </li> - <li> - <a href="#adoption-of-http2">Adoption of HTTP/2</a> - </li> + <li> + <a href="#conclusion">Conclusion</a> + </li> + </ul> + </div> + </nav> - <li> - <a href="#impact-of-http2">Impact of HTTP/2</a> - </li> + <section class="content"> + <section class="body"> + <h1> + <div class="decorative-line"></div> + {{ metadata.get('title') }} + <div class="decorative-line-mobile"></div> + </h1> + <img + src="/static/images/2019/{{ get_chapter_image_dir(metadata) }}/hero_lg.jpg" + class="content-banner" + /> + <p> + <img + src="https://github.com/HTTPArchive/almanac.httparchive.org/raw/master/src/static/images//2019/20_HTTP_2/hero_xl.jpg" + alt="" + /> + </p> + <h2 id="introduction">Introduction</h2> + <p> + HTTP/2 was the first major update to the main transport protocol of the + web in nearly 20 years. It arrived with a wealth of expectations: it + promised a free performance boost with no downsides. More than that - we + could stop doing all the hacks and work arounds that HTTP/1.1 forced us + into to get around its inefficiencies. Bundling, spriting, inlining and + even sharding domains would all become anti-patterns in an HTTP/2 world, + giving performance by default. This meant even those without the skills + and resources to concentrate on web performance would suddenly have + performant websites. The reality has been, as ever, a little more + nuanced than that. It has been over four years since its formal approval + as a + <a href="https://tools.ietf.org/html/rfc7540" + >standard in May 2015 as RFC 7540</a + >, so now is a good time to look over how this relatively new technology + has fared in the real world. + </p> + <h2 id="what-is-http2">What is HTTP/2?</h2> + <p> + For those not familiar with the technology a bit of background is + helpful to make the most of the metrics and findings in this chapter. Up + until recently HTTP has always been a text-based protocol. An HTTP + client like a web browser opened a TCP connection to a server, and then + sent an HTTP command like <code>GET /index.html</code> to ask for a + resource. This was enhanced in HTTP/1.0 to add <em>HTTP headers</em> so + various pieces of meta data could be made in addition to the request + (what browser it is, formats it understands…etc.). These HTTP headers + were also text-based and separated by newline characters. Servers parsed + the incoming requests by reading the request and any HTTP headers line + by line, and then the server responded, with its own HTTP response + headers and the actual resource being requested. The protocol seemed + simple, but that also meant certain limitations - the main one being + that HTTP was basically synchronous: once an HTTP request had been sent, + the whole TCP connection was basically off limits for anything else + until the response had come back and been read and processed. This was + incredibly inefficient and required multiple TCP connections (browsers + typically use 6) to allow a limited form of parallelization. That in + itself brings its own issues as TCP connections take time and resources + to set up and get to full efficiency, especially when using HTTPS which + is standard nowadays. HTTP/1.1 improved it somewhat allowing reuse of + TCP connections for subsequent requests but still did not solve the + parallelization issue. + </p> + <p> + Despite HTTP being text-based, the reality is that it was rarely used to + transport text, at least in it's raw format. While it was true that HTTP + headers were still text, the payloads themselves often were not. Text + files like HTML, JS and CSS are usually <a href="#">compressed</a> for + transport into a binary format using gzip, brotli or similar and + non-text files like images, videos… etc. are served in their own + formats. The whole HTTP message is then often wrapped in HTTPS to + encrypt the messages for security reasons. So, the web had basically + moved on from text-based transport a long time ago, but HTTP had not. + One reason for this stagnation was because it was so difficult to + introduce any breaking changes to such a ubiquitous protocol like HTTP + (previous efforts had tried and failed). Many routers, firewalls and + other middleboxes understand HTTP and would react badly to major changes + to it. Upgrading them all to support a new version is simply not + possible. + </p> + <p> + In 2009 Google announced they were working on an alternative to the + text-based HTTP called SPDY. This would take advantage of the fact that + HTTP messages were often encrypted in HTTPS which prevents them being + read and interfered with en route. Google controlled one of the most + popular browsers (Chrome) and some of the most popular websites (Google, + YouTube, Gmail…etc.) - so both ends of the connection. Google's idea was + to pack HTTP messages into a proprietary format, send them across the + internet, and then unpacked them on the other side. The proprietary + format (SPDY) was binary-based rather than text-based which solved some + of the main performance problems with HTTP/1.1 by allowing more + efficient use of a single TCP connection, negating the need to open the + 6 connections that had become the norm under HTTP/1.1. By using SPDY in + the real world they were able to prove it was more performant for real + users, and not just because of some lab-based experimental results. + After rolling out SPDY to all Google websites, other servers and browser + started implementing it, and then it was time to standardize this + proprietary format into an internet standard and thus HTTP/2 was born. + </p> + <p>HTTP/2 has the following key concepts:</p> + <ul> + <li>Binary format</li> + <li>Multiplexing</li> + <li>Flow Control</li> + <li>Prioritization</li> + <li>Header compression</li> + <li>Push</li> + </ul> + <p> + <em>Binary format</em>, means that HTTP/2 messages are wrapped into + <em>frames</em> of a pre-defined format. This means HTTP messages are + easier to parse and no longer require scanning for newline characters. + This is better for security as there + <a href="https://www.owasp.org/index.php/HTTP_Response_Splitting" + >were a number of exploits for previous versions of HTTP</a + >. It also means HTTP/2 connections can be <em>multiplexed</em>: + different frames for different <em>streams</em> can be sent on the same + connection without interfering with each other as each Frame includes a + Stream Identifier and its length. Multiplexing allows much more + efficient use of a single TCP connection without the overhead of opening + additional connections. Ideally we would open a single connection per + domain (<a + href="https://daniel.haxx.se/blog/2016/08/18/http2-connection-coalescing/" + >or even for multiple domains</a + >!). + </p> + <p> + Having separate streams does introduce some complexities along with some + potential benefits. HTTP/2 needs the concept of <em>flow control</em> to + allow the different streams to send data at different rates, whereas + previously, with only one response in flight at any one time, this was + controlled at a connection level by TCP flow control. + <em>Prioritization</em> similarly allows multiple requests to be sent + together but with the most important requests getting more of the + bandwidth. + </p> + <p> + Finally, HTTP/2 introduced two new concepts: + <em>header compression</em> allowed those text-based HTTP headers to be + sent more efficiently (using an HTTP/2-specific + <em><a href="https://tools.ietf.org/html/rfc7541">HPACK</a></em> format + for security reasons) and <em>HTTP/2 push</em> allowed more than one + response to be sent in answer to a request. This allowed the server to + "push" resources before a client was even aware it needed them. Push was + supposed to solve the performance workaround of having to inline + resources like CSS and JavaScript directly into HTML to prevent holding + up the page while those resources were requested. With HTTP/2 the CSS + and JavaScript could remain as external files but be pushed along with + the initial HTML, so they were available immediately. Subsequent page + requests would not push these resources, since they would now be cached, + and so would not waste bandwidth. + </p> + <p> + This whistle-stop tour of HTTP/2 gives the main history and concepts of + the newish protocol. As should be apparent from this explanation, the + main benefit of HTTP/2 is to address performance limitations of the + HTTP/1.1 protocol. There were also security improvements as well - + perhaps most importantly in being to address performance issues of using + HTTPS since HTTP/2, even over HTTPS, is + <a href="https://www.httpvshttps.com/" + >often much faster than plain HTTP</a + >. Other than the web browser packing the HTTP messages into the new + binary format, and the web server unpacking it at the other side, the + core basics of HTTP itself stayed roughly the same. This means web + applications do not need to make any changes to support HTTP/2 as the + browser and server take care of this. Turning it on should be a free + performance boost and because of this adoption should be relatively + easy. Of course, there are ways web developers can optimize for HTTP/2 + to take full advantage of how it differs. + </p> + <h2 id="adoption-of-http2">Adoption of HTTP/2</h2> + <p> + As mentioned above, Internet protocols are often difficult to adopt + since they are ingrained into so much of the infrastructure that makes + up the internet. This makes introducing any changes slow and difficult. + IPv6 for example has been around for 20 years but has + <a href="https://www.google.com/intl/en/ipv6/statistics.html" + >struggled to be adopted</a + >. HTTP/2 however, was different as it was effectively hidden in HTTPS + (at least for the browser uses cases) removing barriers to adoption as + long as both the browser and server supported it. Browser support has + been very strong for some time and the advent of auto updating + <em>evergreen</em> browsers has meant that an estimated + <a href="https://caniuse.com/#feat=http2" + >95% of global users support HTTP/2 now</a + >. For this Web Almanac we use HTTP Archive which runs a Chrome web + crawler on the approximately 5 million top websites (on both Desktop and + Mobile with a slightly different set for each). This shows that HTTP/2 + usage is now the majority protocol - an impressive feat just 4 short + years after formal standardization: + </p> + <p> + <img + src="https://github.com/HTTPArchive/almanac.httparchive.org/raw/master/src/static/images/2019/20_HTTP_2/http2usage.png" + alt="" + /> + </p> + <p> + <strong + >Figure 1 - + <a href="https://httparchive.org/reports/state-of-the-web#h2" + >HTTP/2 usage by request</a + ></strong + > + </p> + <p> + Looking at the breakdown of all HTTP versions by request we see the + following: + </p> + <table> + <thead> + <tr> + <th id="protocol">Protocol</th> + <th id="desktop">Desktop</th> + <th id="mobile">Mobile</th> + <th id="both">Both</th> + </tr> + </thead> + <tbody> + <tr> + <td></td> + <td>5.60%</td> + <td>0.57%</td> + <td>2.97%</td> + </tr> + <tr> + <td>HTTP/0.9</td> + <td>0.00%</td> + <td>0.00%</td> + <td>0.00%</td> + </tr> + <tr> + <td>HTTP/1.0</td> + <td>0.08%</td> + <td>0.05%</td> + <td>0.06%</td> + </tr> + <tr> + <td>HTTP/1.1</td> + <td>40.36%</td> + <td>45.01%</td> + <td>42.79%</td> + </tr> + <tr> + <td>HTTP/2</td> + <td>53.96%</td> + <td>54.37%</td> + <td>54.18%</td> + </tr> + </tbody> + </table> + <p><strong>Figure 2 - HTTP version usage by request</strong></p> + <p> + This shows that HTTP/1.1 and HTTP/2 are the versions used by the vast + majority of requests as expected. There are only a very small number of + requests on the older HTTP/1.0 and HTTP/0.9 protocols. Annoyingly there + is a larger percentage where the protocol was not correctly tracked by + the HTTP Archive crawl, particularly on desktop. Digging into this has + shown various reasons, some of which I can explain and some of which I + can't. Based on spot checks they mostly appear to be HTTP/1.1 requests + and, assuming they are, desktop and mobile usage is similar. Despite + there being a little larger percentage of noise than I'd like, it + doesn't alter the overall message being conveyed here. Other than that, + the mobile/desktop similarity is not unexpected - the HTTP Archive + crawls using Chrome which supports HTTP/2 for both desktop and mobile. + Real world usage may have slightly different stats with some older usage + of browsers on both but even then support is widespread so I would not + expect a large variation between desktop and mobile. + </p> + <p> + At present the HTTP Archive does not track HTTP over QUIC (soon to be + standardized as HTTP/3) separately, so these are listed under HTTP/2 but + we'll look at other ways of measuring that later in this chapter. + </p> + <p> + Looking at the number of requests will skew the results somewhat due to + popular requests. For example, many sites load Google Analytics, which + does support HTTP/2, and so would show as an HTTP/2 request even if the + embedding site itself does not support HTTP/2. On the other hand, + popular websites (that tend to support HTTP/2) are also underrepresented + in the above stats as they are only measured once (e.g. google.com and + obscuresite.com are given equal weighting). There are lies, damn lies + and statistics. However, looking at other sources (for example the + <a + href="https://telemetry.mozilla.org/new-pipeline/dist.html#!cumulative=0&measure=HTTP_RESPONSE_VERSION" + >Mozilla telemetry</a + > + which looks at real-world usage through the Firefox browser) shows + similar statistics. + </p> + <p> + It is still interesting to look at home pages only to get a rough figure + on the number of sites that support HTTP/2 (at least on their home + page). Figure 3 shows less support than overall requests, as expected, + at around 36%: + </p> + <table> + <thead> + <tr> + <th id="protocol">Protocol</th> + <th id="desktop">Desktop</th> + <th id="mobile">Mobile</th> + <th id="both">Both</th> + </tr> + </thead> + <tbody> + <tr> + <td></td> + <td>0.09%</td> + <td>0.08%</td> + <td>0.08%</td> + </tr> + <tr> + <td>HTTP/1.0</td> + <td>0.09%</td> + <td>0.08%</td> + <td>0.09%</td> + </tr> + <tr> + <td>HTTP/1.1</td> + <td>62.36%</td> + <td>63.92%</td> + <td>63.22%</td> + </tr> + <tr> + <td>HTTP/2</td> + <td>37.46%</td> + <td>35.92%</td> + <td>36.61%</td> + </tr> + </tbody> + </table> + <p><strong>Figure 3 - HTTP version usage for home pages</strong></p> + <p> + HTTP/2 is only supported by browsers over HTTPS, even though officially + HTTP/2 can be used over HTTPS or over unencrypted non-HTTPS connections. + As mentioned previously, hiding the new protocol in encrypted HTTPS + connections prevents networking appliances which do not understand this + new protocol from interfering with (or rejecting!) its usage. + Additionally, the HTTPS handshake allows an easy method of the client + and server agreeing to use HTTP/2. The web is moving to HTTPS and HTTP/2 + turns the traditional argument of HTTPS being bad for performance almost + completely on its head. Not every site has made the transition to HTTPS, + so HTTP/2 will not even be available to those that have not. Looking at + just those sites that use HTTPS, we do see a higher percentage support + HTTP/2 at around 55% - similar to the first + <em>all requests</em> statistic we started with: + </p> + <table> + <thead> + <tr> + <th id="protocol">Protocol</th> + <th id="desktop">Desktop</th> + <th id="mobile">Mobile</th> + <th id="both">Both</th> + </tr> + </thead> + <tbody> + <tr> + <td></td> + <td>0.09%</td> + <td>0.10%</td> + <td>0.09%</td> + </tr> + <tr> + <td>HTTP/1.0</td> + <td>0.06%</td> + <td>0.06%</td> + <td>0.06%</td> + </tr> + <tr> + <td>HTTP/1.1</td> + <td>45.81%</td> + <td>44.31%</td> + <td>45.01%</td> + </tr> + <tr> + <td>HTTP/2</td> + <td>54.04%</td> + <td>55.53%</td> + <td>54.83%</td> + </tr> + </tbody> + </table> + <p><strong>Figure 4 - HTTP version usage for HTTPS home pages</strong></p> + <p> + We have shown that browser support is strong, and there is a safe road + to adoption, so why does every site (or at least every HTTPS site) not + support HTTP/2? Well here we come to the final item for support we have + not measured yet: server support. This is more problematic than browser + support as, unlike modern browsers, servers often do not automatically + upgrade to the latest version. Even when the server is regularly + maintained and patched that will often just apply security patches + rather than new features like HTTP/2. Let us look first at the server + HTTP header for those sites that do support HTTP/2: + </p> + <table> + <thead> + <tr> + <th id="server">Server</th> + <th id="desktop">Desktop</th> + <th id="mobile">Mobile</th> + <th id="both">Both</th> + </tr> + </thead> + <tbody> + <tr> + <td>nginx</td> + <td>34.04%</td> + <td>32.48%</td> + <td>33.19%</td> + </tr> + <tr> + <td>cloudflare</td> + <td>23.76%</td> + <td>22.29%</td> + <td>22.97%</td> + </tr> + <tr> + <td>Apache</td> + <td>17.31%</td> + <td>19.11%</td> + <td>18.28%</td> + </tr> + <tr> + <td></td> + <td>4.56%</td> + <td>5.13%</td> + <td>4.87%</td> + </tr> + <tr> + <td>LiteSpeed</td> + <td>4.11%</td> + <td>4.97%</td> + <td>4.57%</td> + </tr> + <tr> + <td>GSE</td> + <td>2.16%</td> + <td>3.73%</td> + <td>3.01%</td> + </tr> + <tr> + <td>Microsoft-IIS</td> + <td>3.09%</td> + <td>2.66%</td> + <td>2.86%</td> + </tr> + <tr> + <td>openresty</td> + <td>2.15%</td> + <td>2.01%</td> + <td>2.07%</td> + </tr> + <tr> + <td>…</td> + <td>…</td> + <td>…</td> + <td>…</td> + </tr> + </tbody> + </table> + <p><strong>Figure 5 - Servers used for HTTP/2</strong></p> + <p> + Nginx provides package repos that allow ease of installing or upgrading + to the latest version, so it is no surprise to see it leading the way + here. Cloudflare is the <a href="#">most popular CDNs</a> and enables + HTTP/2 by default so again it is also not surprising to see this as a + large percentage of HTTP/2 sites. Incidently, Cloudflare uses + <a + href="https://blog.cloudflare.com/nginx-structural-enhancements-for-http-2-performance/" + >a heavily customised version of nginx as their web server</a + >. After this we see Apache at around 20% of usage, followed by some + servers who choose to hide what they are and then the smaller players + (LiteSpeed, IIS, Google Servlet Engine and openresty - which is nginx + based). + </p> + <p> + What is more interesting is those sites that that do + <em>not</em> support HTTP/2: + </p> + <table> + <thead> + <tr> + <th id="server">Server</th> + <th id="desktop">Desktop</th> + <th id="mobile">Mobile</th> + <th id="both">Both</th> + </tr> + </thead> + <tbody> + <tr> + <td>Apache</td> + <td>46.76%</td> + <td>46.84%</td> + <td>46.80%</td> + </tr> + <tr> + <td>nginx</td> + <td>21.12%</td> + <td>21.33%</td> + <td>21.24%</td> + </tr> + <tr> + <td>Microsoft-IIS</td> + <td>11.30%</td> + <td>9.60%</td> + <td>10.36%</td> + </tr> + <tr> + <td></td> + <td>7.96%</td> + <td>7.59%</td> + <td>7.75%</td> + </tr> + <tr> + <td>GSE</td> + <td>1.90%</td> + <td>3.84%</td> + <td>2.98%</td> + </tr> + <tr> + <td>cloudflare</td> + <td>2.44%</td> + <td>2.48%</td> + <td>2.46%</td> + </tr> + <tr> + <td>LiteSpeed</td> + <td>1.02%</td> + <td>1.63%</td> + <td>1.36%</td> + </tr> + <tr> + <td>openresty</td> + <td>1.22%</td> + <td>1.36%</td> + <td>1.30%</td> + </tr> + <tr> + <td>…</td> + <td>…</td> + <td>…</td> + <td>…</td> + </tr> + </tbody> + </table> + <p><strong>Figure 6 - Servers used for HTTP/1.1 or lower</strong></p> + <p> + Some of this will be non-HTTPS traffic that would use HTTP/1.1 even if + the server supported HTTP/2, but a bigger issue is those that do not + support HTTP/2. In these stats we see a much greater share for Apache + and IIS which are likely running older versions. For Apache in + particular it is often not easy to add HTTP/2 support to an existing + installation as Apache does not provide an official repository to + install this from. This often means resorting to compiling from source + or trusting a third-party repo - neither of which is particularly + appealing to many administrators. Only the latest versions of Linux + distributions (RHEL and CentOS 8, Ubuntu 18 and Debian 9) come with a + version of Apache which supports HTTP/2 and many servers are not running + those yet. On the Microsoft side only Windows Server 2016 and above + supports HTTP/2 so again those running older versions cannot support + this in IIS. Merging these two stats together we can see the percentage + of installs, of each server, that uses HTTP/2: + </p> + <table> + <thead> + <tr> + <th id="server">Server</th> + <th id="desktop">Desktop</th> + <th id="mobile">Mobile</th> + </tr> + </thead> + <tbody> + <tr> + <td>cloudflare</td> + <td>85.40%</td> + <td>83.46%</td> + </tr> + <tr> + <td>LiteSpeed</td> + <td>70.80%</td> + <td>63.08%</td> + </tr> + <tr> + <td>openresty</td> + <td>51.41%</td> + <td>45.24%</td> + </tr> + <tr> + <td>nginx</td> + <td>49.23%</td> + <td>46.19%</td> + </tr> + <tr> + <td>GSE</td> + <td>40.54%</td> + <td>35.25%</td> + </tr> + <tr> + <td></td> + <td>25.57%</td> + <td>27.49%</td> + </tr> + <tr> + <td>Apache</td> + <td>18.09%</td> + <td>18.56%</td> + </tr> + <tr> + <td>Microsoft-IIS</td> + <td>14.10%</td> + <td>13.47%</td> + </tr> + <tr> + <td>…</td> + <td>…</td> + <td>…</td> + </tr> + </tbody> + </table> + <p> + <strong + >Figure 7 - percentage installs of each server used to provide + HTTP/2</strong + > + </p> + <p> + It's clear Apache and IIS fall way behind with 18% and 14% of their + installed based supporting HTTP/2, and this has to be at least in part, + a consequence of it being more difficult to upgrade them. A full + operating system upgrade is often required for many to get this support + easily. Hopefully this will get easier as new versions of operating + systems become the norm. None of this is a comment on the HTTP/2 + implementations here (<a + href="https://twitter.com/tunetheweb/status/988196156697169920?s=20" + >I happen to think Apache has one of the best implementations</a + >), but more in the ease of enabling HTTP/2 in each of these servers - + or lack thereof. + </p> + <h2 id="impact-of-http2">Impact of HTTP/2</h2> + <p> + The impact of HTTP/2 is a much more difficult to measure statistic, + especially using the HTTP Archive methodology. Ideally sites should be + crawled with both HTTP/1.1 and HTTP/2 and the difference measured but + that is not possible with the statistics we are investigating here. + Additionally, measuring whether the average HTTP/2 site is faster than + the average HTTP/1.1 site introduces too many other variables that I + feel requires a more exhaustive study than we can cover here. + </p> + <p> + One impact that can be measured is in the changing use of HTTP now we + are in an HTTP/2 world. Multiple connections were a work around with + HTTP/1.1 to allow a limited form of parallelization, but this is in fact + the opposite of what usually works best with HTTP/2. A single connection + reduces the overhead of TCP setup, TCP slow start, HTTPS negotiation and + also allows the potential of cross-request prioritization. The HTTP + Archive measures the number of TCP connections per page and that is + dropping steadily as more sites support HTTP/2 and use its single + connection instead of 6 separate connections: + </p> + <p> + <img + src="https://github.com/HTTPArchive/almanac.httparchive.org/raw/master/src/static/images/2019/20_HTTP_2/TCPconnections.png" + alt="" + /> + </p> + <p> + <strong + >Figure 8 - + <a href="https://httparchive.org/reports/state-of-the-web#tcp" + >TCP connections per page</a + ></strong + > + </p> + <p> + Bundling assets to obtain fewer requests was another HTTP/1.1 workaround + that went by many names: bundling, concatenation, packaging, spriting, … + etc. It is less necessary when using HTTP/2 as there is less overhead + with requests but it should be noted that requests are not free in + HTTP/2 and + <a + href="https://engineering.khanacademy.org/posts/js-packaging-http2.htm" + >those that experimented with removing bundling completely have + noticed a loss in performance</a + >. Looking at the number of requests loaded by page over time, we do see + a slight decrease in requests, rather than the expected increase: + </p> + <p> + <img + src="https://github.com/HTTPArchive/almanac.httparchive.org/raw/master/src/static/images/2019/20_HTTP_2/numresources.png" + alt="" + /> + </p> + <p> + <strong + >Figure 9 - + <a href="https://httparchive.org/reports/state-of-the-web#reqTotal" + >Total Requests per page</a + ></strong + > + </p> + <p> + This low rate of change can perhaps be attributed to the aforementioned + observations that bundling cannot be removed (at least completely) + without a negative performance impact and that many build tools + currently bundle for historical reasons based on HTTP/1.1 + recommendations. It is also likely that many sites may not be willing to + penalize HTTP/1.1 users by undoing their HTTP/1.1 performance hacks just + yet, or at least that they do not have the confidence (or time!) to feel + this is worthwhile. That the number of requests is staying roughly + static, and against the background of an ever increasing + <a href="#">page weight</a> is interesting though perhaps not really + related to HTTP/2. + </p> + <h2 id="http2-push">HTTP/2 Push</h2> + <p> + HTTP/2 push has a mixed history despite being a much-hyped new feature + of HTTP/2. The other features were basically under the hood performance + improvements, but push was a brand-new concept that completely broke the + single request to single response nature of HTTP up until then. It + allowed extra responses to be returned: when you asked for the web page, + the server could respond with the HTML page as usual, but then also send + you the critical CSS and JavaScript, thus avoiding any additional round + trips for certain resources. It would in theory allow us to stop + inlining CSS and JavaScript into our HTML and yet still get the same + performance gains of doing it. After solving that, it could potentially + lead to all sorts of new and interesting use cases. + </p> + <p> + The reality has been… well, a bit disappointing. HTTP/2 push has proved + much harder than originally envisaged to use effectively. Some of this + has been due to + <a href="https://jakearchibald.com/2017/h2-push-tougher-than-i-thought/" + >the complexity of how HTTP/2 push works</a + >, and the implementation issues due to that. A bigger concern is that + push can quite easily cause, rather than solve, performance issues. + Over-pushing is a real risk. Often the browser is in the best place to + decide <em>what</em> to request, and just as crucially <em>when</em> to + request it but HTTP/2 push puts that responsibility on the server. + Pushing resources that a browser already has in its cache, is a waste of + bandwidth (though in my opinion so is inlining CSS but that gets must + less of a hard time about that than HTTP/2 push!). + <a + href="https://lists.w3.org/Archives/Public/ietf-http-wg/2019JanMar/0033.html" + >Proposals to inform the server about the status of the browser cache + have stalled</a + > + especially on privacy concerns. Even without that problem, there are + other potential issues if push is not used correctly. For example, + pushing large images and therefore holding up the sending of critical + CSS and JavaScript will lead to slower websites than if you'd not pushed + at all! + </p> + <p> + There has also been very little evidence to date that push, even when + implemented correctly, results in the performance increase it promised. + This is an area that again the HTTP Archive is not best placed to + answer, due to the nature of how it runs (a month crawl of popular sites + using Chrome in one state) so we won't delve into it too much here, but + suffice to say that the performance gains are far from clear cut and the + potential problems are real. + </p> + <p>Putting that aside let's look at the usage of HTTP/2 push:</p> + <table> + <thead> + <tr> + <th id="client">Client</th> + <th id="sites_using_http/2_push">Sites Using HTTP/2 Push</th> + <th id="sites_using_http/2_push_(%)"> + Sites Using HTTP/2 Push (%) + </th> + </tr> + </thead> + <tbody> + <tr> + <td>Desktop</td> + <td>22,581</td> + <td>0.52%</td> + </tr> + <tr> + <td>Mobile</td> + <td>31,452</td> + <td>0.59%</td> + </tr> + </tbody> + </table> + <p><strong>Figure 10 - Sites using HTTP/2 push</strong></p> + <p> + These status show that the uptick of HTTP/2 push is very low - most + likely because of the issues described previously. However, when sites + do use push, then tend to use it a lot rather than for one or two assets + as shown in Figure 11: + </p> + <table> + <thead> + <tr> + <th id="client">Client</th> + <th id="avg_pushed_requests">Avg Pushed Requests</th> + <th id="avg_kb_pushed">Avg KB Pushed</th> + </tr> + </thead> + <tbody> + <tr> + <td>Desktop</td> + <td>7.86</td> + <td>162.38</td> + </tr> + <tr> + <td>Mobile</td> + <td>6.35</td> + <td>122.78</td> + </tr> + </tbody> + </table> + <p><strong>Figure 11 - How much is pushed when it is used</strong></p> + <p> + This is a concern as previous advice has been to be conservative with + push and to + <a + href="https://docs.google.com/document/d/1K0NykTXBbbbTlv60t5MyJvXjqKGsCVNYHyLEXIxYMv0/edit" + >"push just enough resources to fill idle network time, and no + more"</a + >. The above statistics suggest many resources, of a significant + combined size are pushed. Looking at what is pushed we see the data in + Figure 12: + </p> + <p> + <img + src="https://github.com/HTTPArchive/almanac.httparchive.org/raw/master/src/static/images/2019/20_HTTP_2/whatpushisusedfor.png" + alt="" + /> + </p> + <p><strong>Figure 12 - What asset types is push used for?</strong></p> + <p> + JavaScript and then CSS are the overwhelming majority of pushed items, + both by volume and by bytes. After this there is a rag tag assortment of + images, fonts, data, …etc. At the tail end we see around 100 sites + pushing video - which may be intentional or may be a sign of + over-pushing the wrong types of assets! + </p> + <p> + One concern raised by some, is that HTTP/2 implementations have + repurposed the preload HTTP link header as a signal to push. One of the + most popular uses of the preload <a href="#">resource hint</a> is to + inform the browser of late-discovered resources like fonts and images, + that the browser will not see until the CSS for example has been + requested, downloaded and parsed. If these are now pushed based on that + header, there was a concern that reusing this may result in a lot of + unintended pushes. However, the relative low usage of fonts and images + may mean that risk is not being seen as much as was feared. + <code><link rel="preload" ... ></code> tags are often used in the + HTML rather than HTTP link headers and the meta tags are not a signal to + push. Statistics in the <a href="#">resource hint</a> chapter show that + less than 1% of sites use the preload HTTP link header, and about the + same amount use preconnection which has no meaning in HTTP/2, so this + would suggest this is not so much of an issue. Though there are a number + of fonts and other assets being pushed, which may be a signal of this. + As a counter argument to those complaints, if an asset is important + enough to preload, then it could be argued these assets should be pushed + if possible as browsers treat a preload hints as very high priority + requests anyway. Any performance concern is therefore (again arguably) + at the overuse of preload, rather than the resulting HTTP/2 push that + happens because of this. + </p> + <p> + To get around this unintended push, you can provide the + <code>nopush</code> attribute in your preload header: + </p> + <pre><code>link: </assets/jquery.js>; rel=preload; as=script; nopush</code></pre> + <p> + It looks like 5% of preload HTTP headers do make use of this attribute, + which is higher than I would have expected as I would have considered + this a niche optimization. Then again, so is the use of preload HTTP + headers and/or HTTP/2 push itself! + </p> + <h2 id="issues">Issues</h2> + <p> + HTTP/2 is mostly a seamless upgrade that, once your server supports it, + you can switch on with no need to change your website or application. Of + course, you can optimize for HTTP/2 or stop using HTTP/1.1 workarounds + as much, but in general a site will usually work without needing any + changes - but just be faster. There are a couple of gotchas to be aware + of however that can impact any upgrade and some sites have found these + out the hard way. + </p> + <p> + One cause of issues in HTTP/2 is the poor support of HTTP/2 + prioritization. This feature allows multiple requests in progress to + make the appropriate use of the connection. This is especially important + since HTTP/2 has massively increased the number of requests that can be + running on the same connection. 100 or 128 parallel requests limits are + common in server implementations. Previously the browser had a max of 6 + connections per domain and so used its skill and judgement to decide how + best to use those connections. Now it rarely needs to queue and can send + all requests as soon as it knows about them. This then can lead to the + bandwidth being "wasted" on lower priority requests while critical + requests are delayed (and incidentally + <a + href="https://www.lucidchart.com/techblog/2019/04/10/why-turning-on-http2-was-a-mistake/" + >can also lead to swamping your backend server with more requests than + it is used to!</a + >). HTTP/2 has a complex prioritization model (too complex many say - + hence why it is being reconsidered for HTTP/3!) but few servers honor + that properly. This can be because their HTTP/2 implementations are not + up to scratch or because of so called <em>bufferbloat</em> where the + responses are already en route before the server realizes there is a + higher priority request. Due to the varying nature of servers, TCP + stacks and locations it is difficult to measure this for most sites, but + with CDNs this should be more consistent. + <a href="https://twitter.com/patmeenan">Patrick Meenan</a> created + <a + href="https://github.com/pmeenan/http2priorities/tree/master/stand-alone" + >an example test page</a + > + which deliberately tries to download a load of low-priority, off-screen, + images, before requesting some high priority on-screen images. A good + HTTP/2 server should be able to recognize this and send the high + priority images shortly after requested, at the expense of the lower + priority images. A poor HTTP/2 server will just respond in the request + order and ignore any priority signals. + <a href="https://twitter.com/AndyDavies">Andy Davies</a> has + <a href="https://github.com/andydavies/http2-prioritization-issues" + >a page tracking status of various CDNs for Patrick's test</a + >. The HTTP Archive identifies when a CDN is used as part of its crawl + and merging these two datasets that gives us the results shown in Figure + 13: + </p> + <table> + <thead> + <tr> + <th id="cdn">CDN</th> + <th id="prioritizes_correctly?">Prioritizes Correctly?</th> + <th id="desktop">Desktop</th> + <th id="mobile">Mobile</th> + <th id="both">Both</th> + </tr> + </thead> + <tbody> + <tr> + <td>Not using CDN</td> + <td>Unknown</td> + <td>57.81%</td> + <td>60.41%</td> + <td>59.21%</td> + </tr> + <tr> + <td>Cloudflare</td> + <td>Pass</td> + <td>23.15%</td> + <td>21.77%</td> + <td>22.40%</td> + </tr> + <tr> + <td>Google</td> + <td>Fail</td> + <td>6.67%</td> + <td>7.11%</td> + <td>6.90%</td> + </tr> + <tr> + <td>Amazon CloudFront</td> + <td>Fail</td> + <td>2.83%</td> + <td>2.38%</td> + <td>2.59%</td> + </tr> + <tr> + <td>Fastly</td> + <td>Pass</td> + <td>2.40%</td> + <td>1.77%</td> + <td>2.06%</td> + </tr> + <tr> + <td>Akamai</td> + <td>Pass</td> + <td>1.79%</td> + <td>1.50%</td> + <td>1.64%</td> + </tr> + <tr> + <td></td> + <td>Unknown</td> + <td>1.32%</td> + <td>1.58%</td> + <td>1.46%</td> + </tr> + <tr> + <td>WordPress</td> + <td>Pass</td> + <td>1.12%</td> + <td>0.99%</td> + <td>1.05%</td> + </tr> + <tr> + <td>Sucuri Firewall</td> + <td>Fail</td> + <td>0.88%</td> + <td>0.75%</td> + <td>0.81%</td> + </tr> + <tr> + <td>Incapsula</td> + <td>Fail</td> + <td>0.39%</td> + <td>0.34%</td> + <td>0.36%</td> + </tr> + <tr> + <td>Netlify</td> + <td>Fail</td> + <td>0.23%</td> + <td>0.15%</td> + <td>0.19%</td> + </tr> + <tr> + <td>OVH CDN</td> + <td>Unknown</td> + <td>0.19%</td> + <td>0.18%</td> + <td>0.18%</td> + </tr> + </tbody> + </table> + <p> + <strong + >Figure 13 - HTTP/2 prioritization support in common CDNs</strong + > + </p> + <p> + This shows that a not insignificant portion of traffic is subject to the + identified issue. How much of a problem this is, depends on exactly how + your page loads and whether high priority resources are discovered late + or not for your site, but it does show another complexity to take into + considerations. + </p> + <p> + Another issue is with the <code>upgrade</code> HTTP header being used + incorrectly. Web servers can respond to requests with an + <code>upgrade</code> HTTP header suggesting that it supports a better + protocol that the client might wish to use (e.g. advertise HTTP/2 to a + client only using HTTP/1.1). You might think this would be useful as a + way of informing the browser it supports HTTP/2 but since browsers only + support HTTP/2 over HTTPS and since use of HTTP/2 can be negotiated + through the HTTPS handshake, the use of this <code>upgrade</code> header + for advertising HTTP/2 is pretty limited (to browsers at least). Worse + than that, is when a server sends an upgrade header in error. This could + be because an HTTP/2 supporting backend server is sending the header and + then an HTTP/1.1-only edge server is blindly forwarding to the client. + Apache emits the <code>upgrade</code> header when mod_http2 is enabled + but HTTP/2 is not being used, and a nginx instance sitting in front of + such an Apache happily forwards this header even when nginx does not + support HTTP/2. This false advertising then leads to clients trying (and + failing!) to use HTTP/2 as they are advised to. 108 site use HTTP/2 and + yet suggest upgrading to HTTP/2 in this <code>upgrade</code> header. A + further 12,767 sites on desktop (15,235 on mobile) suggest upgrading an + HTTP/1.1 connection delivered over HTTPS to HTTP/2 when it's clear this + was not available, or it would have been used already. These are a small + minority of the 4.3 million sites crawled on desktop and 5.3 million + sites crawled on mobile for these stats but it shows that this was still + an issue affecting a number of sites out there. Browsers handle this + inconsistently with Safari in particular attempting to upgrade and then + getting itself in a mess and refusing to display the site at all. All + this is before we get into sites recommending upgrading to + <code>http1.0</code>, <code>http://1.1</code> or even + <code>-all,+TLSv1.3,+TLSv1.2</code> (clearly some typos in web server + configurations going on here!). + </p> + <p> + There are further implementation issues we could look at. For example, + HTTP/2 is much stricter about HTTP header names, rejecting the whole + request if you respond with spaces, colons or other invalid HTTP header + names. The header names are also converted to lowercase which catches + some by surprise if their application assumes a certain capitalization + (which was never guaranteed previously as + <a href="https://tools.ietf.org/html/rfc7230#section-3.2" + >HTTP/1.1 specifically states the header names are case insensitive</a + >, but still some have depended on this). The HTTP Archive could + potentially be used to identify these issues as well, though some of + them will not be apparent on the home page so we did not delve into that + this year. + </p> + <h2 id="http3">HTTP/3</h2> + <p> + The world does not stand still and despite HTTP/2 not having even + reached its official 5th birthday, people are already seeing it as old + news and getting more excited about its successor: HTTP/3. HTTP/3 builds + on the concepts of HTTP/2 but moves it from working over TCP connections + that HTTP has always used to a UDP-based protocol called QUIC. This + allows us to fix one edge case where HTTP/2 is slower then HTTP/1.1, + when there is high packet loss and the guaranteed nature of TCP holds up + all streams and throttles back all streams. It also allows us to address + some TCP and HTTPS inefficiencies such as consolidating on one handshake + for both, and supporting many ideas for TCP that have proven hard to + implement in real life (TCP fast open, 0-RTT, …etc.). HTTP/3 also cleans + up some overlap between TCP and HTTP/2 (e.g. flow control being + implemented in both layers) but conceptually it is very similar to + HTTP/2. Web developers who understand and have optimized for HTTP/2 + should have to make no further changes for HTTP/3. Server operators will + have more work to do however as the differences between TCP and QUIC are + much more groundbreaking. They will make implementation harder so the + roll out of HTTP/3 may take considerably longer than HTTP/2 and + initially be limited to those with certain expertise in the field (e.g. + CDNs). + </p> + <p> + QUIC has been implemented by Google for a number of years and it is now + undergoing a similar standardization process that SDPY did on its way to + HTTP/2. At the end of 2018 it was decided to name the HTTP part of QUIC + as HTTP/3 (in Google/s version of QUIC is was simply known as HTTP/2 + even though it was not exactly the same as regular HTTP/2). QUIC has + ambitions beyond just HTTP but for the moment it is the use case being + worked on. Just as this chapter was being written, + <a href="https://blog.cloudflare.com/http3-the-past-present-and-future/" + >Cloudflare, Chrome and Firefox all announced HTTP/3 support</a + > + despite the fact that HTTP/3 is still not formally complete or approved + as a standard yet. This is welcome as QUIC support has been somewhat + lacking outside of Google until recently and definitely lags SPDY and + HTTP/2 support from a similar stage of standardization. + </p> + <p> + Because HTTP/3 uses QUIC over UDP rather than TCP it makes the discovery + of HTTP/3 support a bigger challenge than HTTP/2 discovery. With HTTP/2 + we can mostly use the HTTPS handshake, but as HTTP/3 is on a completely + different connection that is not an option here. HTTP/2 did also use the + <code>upgrade</code> HTTP header to inform of HTTP/2 support, and + although that was not that useful for HTTP/2, a similar mechanism has + been put in place for QUIC that is more useful. The + <em>alternative services</em> HTTP header (<code>alt-svc</code>) + advertises alternative protocols that can be used on completely + different connections (as opposed to alternative protocols that can be + used on this connection - which is what the <code>upgrade</code> HTTP + header is used for). Analysis of this header shows that 7.67% of desktop + sites and 8.38% of mobile sites already support QUIC (which roughly + represents Google percentage of traffic unsurprisingly enough as it has + been using this for a while), and 0.04% are already supporting + <code>h3</code> (meaning HTTP/3) in this field. I would imagine by next + year’s Almanac this number will have increased significantly. + </p> + <h2 id="conclusion">Conclusion</h2> + <p> + This analysis of the available statistics in HTTP Archive has shown what + many of us in the HTTP community were already aware of: HTTP/2 is here + and proving very popular. It is already the dominant protocol in terms + of number of request but has not quite overtaken HTTP/1.1 in terms of + number of sites supported. The long tail of the internet means that it + often takes an exponentially longer time to make noticeable gains on the + less well-maintained sites than on the high profile, high volume sites. + </p> + <p> + We've also talked about how it is (still!) not easy to get HTTP/2 + support in some installations. Server developers, operating system + distributors and end customers all have a part to play in pushing to + make that easier. Tying software to operating systems always lengthens + deployment time - and in fact one of the very reasons for QUIC is to + break a similar barrier with deploying TCP changes. In many instances + there is no real reason to tie web server versions to operating systems. + Apache (to use one of the more popular examples) will run with HTTP/2 + support in older operating systems but getting an up to date version on + to the server should not require the expertise or risk it currently + does. Nginx does very well here hosting repositories for the common + Linux flavors to make installation easier and if the Apache team (or the + Linux distribution vendors) do not offer something similar, then I can + only see Apache's usage continuing to shrink as it struggles to hold + relevance and shake its reputation as old and slow - based on older + installs - even though up to date versions have one of the best HTTP/2 + implementations. I see that as less of an issue for IIS since it is + usually the preferred web server on the Windows side. + </p> + <p> + Other than that, HTTP/2 has been a relatively easy upgrade path - which + is why it has the strong uptake it has already seen. For the most part, + it is a painless switch on and therefore, for most, it has turned out to + be a hassle-free performance increase that requires little thought once + your server supports it. The devil is in the details though (as always), + and small differences between server implementations can result in + better or worse HTTP/2 usage and ultimately end user experience. There + have also been a number of bugs and even + <a + href="https://github.com/Netflix/security-bulletins/blob/master/advisories/third-party/2019-002.md" + >security issues</a + >, as is to be expected with any new protocol. Ensuring you are using a + strong, up to date, well maintained implementation of any newish + protocol like HTTP/2 will ensure you stay on top of these issues. + However, that can take expertise and managing. The roll out of QUIC and + HTTP/3 will likely be even more complicated and require more expertise. + Perhaps this is best left to third party service providers like CDNs who + have this expertise and can give your site easy access to these + features? However, even when left to the experts, this is not a sure + thing (as the prioritization statistics show), but if you choose your + server provider wisely and engage with them on what your priorities are, + then it should be an easier implementation. And on that note it would be + great if the CDNs prioritized the issue highlighted above (pun + definitely intended!), though I suspect with the advent of a new + prioritization method in HTTP/3, many will hold tight. The next year + will prove yet more interesting times in the HTTP world. + </p> + </section> + <section class="authors"> + <h4>Authors :</h4> + <ul> + <li> + <img + class="avatar" + alt="Author name" + src="https://www.gravatar.com/avatar/cf58fcc6995e15f35e42532c3775fed6.jpg?d=mp&s=200" + /> + <div class="info"> + <span class="name">Full Name</span> + <span class="social"> + <a class="twitter" href="https://twitter.com/rick_viscomi"> + <img src="/static/images/twitter.png" alt="Twitter account" /> + </a> - <li> - <a href="#http2-push">HTTP/2 Push</a> - </li> + <a class="github" href="https://github.com/rviscomi"> + <img src="/static/images/github.png" alt="github account" /> + </a> + </span> - <li> - <a href="#issues">Issues</a> - </li> + <div class="tagline"> + Tagline of contributor here + </div> + </div> + </li> + </ul> + </section> - <li> - <a href="#http3">HTTP/3</a> - </li> + <nav id="chapter-navigation"> + <a id="previous-chapter" href="#replace-with-url-to-chapter"> + <span class="arrow">⌃</span> + <span class="chapter-no"> + Chapter X + </span> + <span class="chapter-title"> + Chapter title + </span> + </a> - <li> - <a href="#conclusion">Conclusion</a> - </li> - </ul> -</aside> + <a id="next-chapter" href="#replace-with-url-to-chapter"> + <span class="arrow">⌃</span> + <span class="chapter-no"> + Chapter X + </span> + <span class="chapter-title"> + Chapter title + </span> + </a> + </nav> + </section> +</article> -<section class="main"> - <h1 class="chapter-title">{{ metadata.get('title') }}</h1> - <img - src="/static/images/2019/{{ get_chapter_image_dir(metadata) }}/hero_lg.jpg" - class="chapter-hero" - /> - <p> - <img - src="https://github.com/HTTPArchive/almanac.httparchive.org/raw/master/src/static/images//2019/20_HTTP_2/hero_xl.jpg" - alt="" - /> - </p> - <h2 id="introduction">Introduction</h2> - <p> - HTTP/2 was the first major update to the main transport protocol of the web - in nearly 20 years. It arrived with a wealth of expectations: it promised a - free performance boost with no downsides. More than that - we could stop - doing all the hacks and work arounds that HTTP/1.1 forced us into to get - around its inefficiencies. Bundling, spriting, inlining and even sharding - domains would all become anti-patterns in an HTTP/2 world, giving - performance by default. This meant even those without the skills and - resources to concentrate on web performance would suddenly have performant - websites. The reality has been, as ever, a little more nuanced than that. It - has been over four years since its formal approval as a - <a href="https://tools.ietf.org/html/rfc7540" - >standard in May 2015 as RFC 7540</a - >, so now is a good time to look over how this relatively new technology has - fared in the real world. - </p> - <h2 id="what-is-http2">What is HTTP/2?</h2> - <p> - For those not familiar with the technology a bit of background is helpful to - make the most of the metrics and findings in this chapter. Up until recently - HTTP has always been a text-based protocol. An HTTP client like a web - browser opened a TCP connection to a server, and then sent an HTTP command - like <code>GET /index.html</code> to ask for a resource. This was enhanced - in HTTP/1.0 to add <em>HTTP headers</em> so various pieces of meta data - could be made in addition to the request (what browser it is, formats it - understands…etc.). These HTTP headers were also text-based and separated by - newline characters. Servers parsed the incoming requests by reading the - request and any HTTP headers line by line, and then the server responded, - with its own HTTP response headers and the actual resource being requested. - The protocol seemed simple, but that also meant certain limitations - the - main one being that HTTP was basically synchronous: once an HTTP request had - been sent, the whole TCP connection was basically off limits for anything - else until the response had come back and been read and processed. This was - incredibly inefficient and required multiple TCP connections (browsers - typically use 6) to allow a limited form of parallelization. That in itself - brings its own issues as TCP connections take time and resources to set up - and get to full efficiency, especially when using HTTPS which is standard - nowadays. HTTP/1.1 improved it somewhat allowing reuse of TCP connections - for subsequent requests but still did not solve the parallelization issue. - </p> - <p> - Despite HTTP being text-based, the reality is that it was rarely used to - transport text, at least in it's raw format. While it was true that HTTP - headers were still text, the payloads themselves often were not. Text files - like HTML, JS and CSS are usually <a href="#">compressed</a> for transport - into a binary format using gzip, brotli or similar and non-text files like - images, videos… etc. are served in their own formats. The whole HTTP message - is then often wrapped in HTTPS to encrypt the messages for security reasons. - So, the web had basically moved on from text-based transport a long time - ago, but HTTP had not. One reason for this stagnation was because it was so - difficult to introduce any breaking changes to such a ubiquitous protocol - like HTTP (previous efforts had tried and failed). Many routers, firewalls - and other middleboxes understand HTTP and would react badly to major changes - to it. Upgrading them all to support a new version is simply not possible. - </p> - <p> - In 2009 Google announced they were working on an alternative to the - text-based HTTP called SPDY. This would take advantage of the fact that HTTP - messages were often encrypted in HTTPS which prevents them being read and - interfered with en route. Google controlled one of the most popular browsers - (Chrome) and some of the most popular websites (Google, YouTube, Gmail…etc.) - - so both ends of the connection. Google's idea was to pack HTTP messages - into a proprietary format, send them across the internet, and then unpacked - them on the other side. The proprietary format (SPDY) was binary-based - rather than text-based which solved some of the main performance problems - with HTTP/1.1 by allowing more efficient use of a single TCP connection, - negating the need to open the 6 connections that had become the norm under - HTTP/1.1. By using SPDY in the real world they were able to prove it was - more performant for real users, and not just because of some lab-based - experimental results. After rolling out SPDY to all Google websites, other - servers and browser started implementing it, and then it was time to - standardize this proprietary format into an internet standard and thus - HTTP/2 was born. - </p> - <p>HTTP/2 has the following key concepts:</p> - <ul> - <li>Binary format</li> - <li>Multiplexing</li> - <li>Flow Control</li> - <li>Prioritization</li> - <li>Header compression</li> - <li>Push</li> - </ul> - <p> - <em>Binary format</em>, means that HTTP/2 messages are wrapped into - <em>frames</em> of a pre-defined format. This means HTTP messages are easier - to parse and no longer require scanning for newline characters. This is - better for security as there - <a href="https://www.owasp.org/index.php/HTTP_Response_Splitting" - >were a number of exploits for previous versions of HTTP</a - >. It also means HTTP/2 connections can be <em>multiplexed</em>: different - frames for different <em>streams</em> can be sent on the same connection - without interfering with each other as each Frame includes a Stream - Identifier and its length. Multiplexing allows much more efficient use of a - single TCP connection without the overhead of opening additional - connections. Ideally we would open a single connection per domain (<a - href="https://daniel.haxx.se/blog/2016/08/18/http2-connection-coalescing/" - >or even for multiple domains</a - >!). - </p> - <p> - Having separate streams does introduce some complexities along with some - potential benefits. HTTP/2 needs the concept of <em>flow control</em> to - allow the different streams to send data at different rates, whereas - previously, with only one response in flight at any one time, this was - controlled at a connection level by TCP flow control. - <em>Prioritization</em> similarly allows multiple requests to be sent - together but with the most important requests getting more of the bandwidth. - </p> - <p> - Finally, HTTP/2 introduced two new concepts: - <em>header compression</em> allowed those text-based HTTP headers to be sent - more efficiently (using an HTTP/2-specific - <em><a href="https://tools.ietf.org/html/rfc7541">HPACK</a></em> format for - security reasons) and <em>HTTP/2 push</em> allowed more than one response to - be sent in answer to a request. This allowed the server to "push" resources - before a client was even aware it needed them. Push was supposed to solve - the performance workaround of having to inline resources like CSS and - JavaScript directly into HTML to prevent holding up the page while those - resources were requested. With HTTP/2 the CSS and JavaScript could remain as - external files but be pushed along with the initial HTML, so they were - available immediately. Subsequent page requests would not push these - resources, since they would now be cached, and so would not waste bandwidth. - </p> - <p> - This whistle-stop tour of HTTP/2 gives the main history and concepts of the - newish protocol. As should be apparent from this explanation, the main - benefit of HTTP/2 is to address performance limitations of the HTTP/1.1 - protocol. There were also security improvements as well - perhaps most - importantly in being to address performance issues of using HTTPS since - HTTP/2, even over HTTPS, is - <a href="https://www.httpvshttps.com/">often much faster than plain HTTP</a - >. Other than the web browser packing the HTTP messages into the new binary - format, and the web server unpacking it at the other side, the core basics - of HTTP itself stayed roughly the same. This means web applications do not - need to make any changes to support HTTP/2 as the browser and server take - care of this. Turning it on should be a free performance boost and because - of this adoption should be relatively easy. Of course, there are ways web - developers can optimize for HTTP/2 to take full advantage of how it differs. - </p> - <h2 id="adoption-of-http2">Adoption of HTTP/2</h2> - <p> - As mentioned above, Internet protocols are often difficult to adopt since - they are ingrained into so much of the infrastructure that makes up the - internet. This makes introducing any changes slow and difficult. IPv6 for - example has been around for 20 years but has - <a href="https://www.google.com/intl/en/ipv6/statistics.html" - >struggled to be adopted</a - >. HTTP/2 however, was different as it was effectively hidden in HTTPS (at - least for the browser uses cases) removing barriers to adoption as long as - both the browser and server supported it. Browser support has been very - strong for some time and the advent of auto updating - <em>evergreen</em> browsers has meant that an estimated - <a href="https://caniuse.com/#feat=http2" - >95% of global users support HTTP/2 now</a - >. For this Web Almanac we use HTTP Archive which runs a Chrome web crawler - on the approximately 5 million top websites (on both Desktop and Mobile with - a slightly different set for each). This shows that HTTP/2 usage is now the - majority protocol - an impressive feat just 4 short years after formal - standardization: - </p> - <p> - <img - src="https://github.com/HTTPArchive/almanac.httparchive.org/raw/master/src/static/images/2019/20_HTTP_2/http2usage.png" - alt="" - /> - </p> - <p> - <strong - >Figure 1 - - <a href="https://httparchive.org/reports/state-of-the-web#h2" - >HTTP/2 usage by request</a - ></strong - > - </p> - <p> - Looking at the breakdown of all HTTP versions by request we see the - following: - </p> - <table> - <thead> - <tr> - <th id="protocol">Protocol</th> - <th id="desktop">Desktop</th> - <th id="mobile">Mobile</th> - <th id="both">Both</th> - </tr> - </thead> - <tbody> - <tr> - <td></td> - <td>5.60%</td> - <td>0.57%</td> - <td>2.97%</td> - </tr> - <tr> - <td>HTTP/0.9</td> - <td>0.00%</td> - <td>0.00%</td> - <td>0.00%</td> - </tr> - <tr> - <td>HTTP/1.0</td> - <td>0.08%</td> - <td>0.05%</td> - <td>0.06%</td> - </tr> - <tr> - <td>HTTP/1.1</td> - <td>40.36%</td> - <td>45.01%</td> - <td>42.79%</td> - </tr> - <tr> - <td>HTTP/2</td> - <td>53.96%</td> - <td>54.37%</td> - <td>54.18%</td> - </tr> - </tbody> - </table> - <p><strong>Figure 2 - HTTP version usage by request</strong></p> - <p> - This shows that HTTP/1.1 and HTTP/2 are the versions used by the vast - majority of requests as expected. There are only a very small number of - requests on the older HTTP/1.0 and HTTP/0.9 protocols. Annoyingly there is a - larger percentage where the protocol was not correctly tracked by the HTTP - Archive crawl, particularly on desktop. Digging into this has shown various - reasons, some of which I can explain and some of which I can't. Based on - spot checks they mostly appear to be HTTP/1.1 requests and, assuming they - are, desktop and mobile usage is similar. Despite there being a little - larger percentage of noise than I'd like, it doesn't alter the overall - message being conveyed here. Other than that, the mobile/desktop similarity - is not unexpected - the HTTP Archive crawls using Chrome which supports - HTTP/2 for both desktop and mobile. Real world usage may have slightly - different stats with some older usage of browsers on both but even then - support is widespread so I would not expect a large variation between - desktop and mobile. - </p> - <p> - At present the HTTP Archive does not track HTTP over QUIC (soon to be - standardized as HTTP/3) separately, so these are listed under HTTP/2 but - we'll look at other ways of measuring that later in this chapter. - </p> - <p> - Looking at the number of requests will skew the results somewhat due to - popular requests. For example, many sites load Google Analytics, which does - support HTTP/2, and so would show as an HTTP/2 request even if the embedding - site itself does not support HTTP/2. On the other hand, popular websites - (that tend to support HTTP/2) are also underrepresented in the above stats - as they are only measured once (e.g. google.com and obscuresite.com are - given equal weighting). There are lies, damn lies and statistics. However, - looking at other sources (for example the - <a - href="https://telemetry.mozilla.org/new-pipeline/dist.html#!cumulative=0&measure=HTTP_RESPONSE_VERSION" - >Mozilla telemetry</a - > - which looks at real-world usage through the Firefox browser) shows similar - statistics. - </p> - <p> - It is still interesting to look at home pages only to get a rough figure on - the number of sites that support HTTP/2 (at least on their home page). - Figure 3 shows less support than overall requests, as expected, at around - 36%: - </p> - <table> - <thead> - <tr> - <th id="protocol">Protocol</th> - <th id="desktop">Desktop</th> - <th id="mobile">Mobile</th> - <th id="both">Both</th> - </tr> - </thead> - <tbody> - <tr> - <td></td> - <td>0.09%</td> - <td>0.08%</td> - <td>0.08%</td> - </tr> - <tr> - <td>HTTP/1.0</td> - <td>0.09%</td> - <td>0.08%</td> - <td>0.09%</td> - </tr> - <tr> - <td>HTTP/1.1</td> - <td>62.36%</td> - <td>63.92%</td> - <td>63.22%</td> - </tr> - <tr> - <td>HTTP/2</td> - <td>37.46%</td> - <td>35.92%</td> - <td>36.61%</td> - </tr> - </tbody> - </table> - <p><strong>Figure 3 - HTTP version usage for home pages</strong></p> - <p> - HTTP/2 is only supported by browsers over HTTPS, even though officially - HTTP/2 can be used over HTTPS or over unencrypted non-HTTPS connections. As - mentioned previously, hiding the new protocol in encrypted HTTPS connections - prevents networking appliances which do not understand this new protocol - from interfering with (or rejecting!) its usage. Additionally, the HTTPS - handshake allows an easy method of the client and server agreeing to use - HTTP/2. The web is moving to HTTPS and HTTP/2 turns the traditional argument - of HTTPS being bad for performance almost completely on its head. Not every - site has made the transition to HTTPS, so HTTP/2 will not even be available - to those that have not. Looking at just those sites that use HTTPS, we do - see a higher percentage support HTTP/2 at around 55% - similar to the first - <em>all requests</em> statistic we started with: - </p> - <table> - <thead> - <tr> - <th id="protocol">Protocol</th> - <th id="desktop">Desktop</th> - <th id="mobile">Mobile</th> - <th id="both">Both</th> - </tr> - </thead> - <tbody> - <tr> - <td></td> - <td>0.09%</td> - <td>0.10%</td> - <td>0.09%</td> - </tr> - <tr> - <td>HTTP/1.0</td> - <td>0.06%</td> - <td>0.06%</td> - <td>0.06%</td> - </tr> - <tr> - <td>HTTP/1.1</td> - <td>45.81%</td> - <td>44.31%</td> - <td>45.01%</td> - </tr> - <tr> - <td>HTTP/2</td> - <td>54.04%</td> - <td>55.53%</td> - <td>54.83%</td> - </tr> - </tbody> - </table> - <p><strong>Figure 4 - HTTP version usage for HTTPS home pages</strong></p> - <p> - We have shown that browser support is strong, and there is a safe road to - adoption, so why does every site (or at least every HTTPS site) not support - HTTP/2? Well here we come to the final item for support we have not measured - yet: server support. This is more problematic than browser support as, - unlike modern browsers, servers often do not automatically upgrade to the - latest version. Even when the server is regularly maintained and patched - that will often just apply security patches rather than new features like - HTTP/2. Let us look first at the server HTTP header for those sites that do - support HTTP/2: - </p> - <table> - <thead> - <tr> - <th id="server">Server</th> - <th id="desktop">Desktop</th> - <th id="mobile">Mobile</th> - <th id="both">Both</th> - </tr> - </thead> - <tbody> - <tr> - <td>nginx</td> - <td>34.04%</td> - <td>32.48%</td> - <td>33.19%</td> - </tr> - <tr> - <td>cloudflare</td> - <td>23.76%</td> - <td>22.29%</td> - <td>22.97%</td> - </tr> - <tr> - <td>Apache</td> - <td>17.31%</td> - <td>19.11%</td> - <td>18.28%</td> - </tr> - <tr> - <td></td> - <td>4.56%</td> - <td>5.13%</td> - <td>4.87%</td> - </tr> - <tr> - <td>LiteSpeed</td> - <td>4.11%</td> - <td>4.97%</td> - <td>4.57%</td> - </tr> - <tr> - <td>GSE</td> - <td>2.16%</td> - <td>3.73%</td> - <td>3.01%</td> - </tr> - <tr> - <td>Microsoft-IIS</td> - <td>3.09%</td> - <td>2.66%</td> - <td>2.86%</td> - </tr> - <tr> - <td>openresty</td> - <td>2.15%</td> - <td>2.01%</td> - <td>2.07%</td> - </tr> - <tr> - <td>…</td> - <td>…</td> - <td>…</td> - <td>…</td> - </tr> - </tbody> - </table> - <p><strong>Figure 5 - Servers used for HTTP/2</strong></p> - <p> - Nginx provides package repos that allow ease of installing or upgrading to - the latest version, so it is no surprise to see it leading the way here. - Cloudflare is the <a href="#">most popular CDNs</a> and enables HTTP/2 by - default so again it is also not surprising to see this as a large percentage - of HTTP/2 sites. Incidently, Cloudflare uses - <a - href="https://blog.cloudflare.com/nginx-structural-enhancements-for-http-2-performance/" - >a heavily customised version of nginx as their web server</a - >. After this we see Apache at around 20% of usage, followed by some servers - who choose to hide what they are and then the smaller players (LiteSpeed, - IIS, Google Servlet Engine and openresty - which is nginx based). - </p> - <p> - What is more interesting is those sites that that do <em>not</em> support - HTTP/2: - </p> - <table> - <thead> - <tr> - <th id="server">Server</th> - <th id="desktop">Desktop</th> - <th id="mobile">Mobile</th> - <th id="both">Both</th> - </tr> - </thead> - <tbody> - <tr> - <td>Apache</td> - <td>46.76%</td> - <td>46.84%</td> - <td>46.80%</td> - </tr> - <tr> - <td>nginx</td> - <td>21.12%</td> - <td>21.33%</td> - <td>21.24%</td> - </tr> - <tr> - <td>Microsoft-IIS</td> - <td>11.30%</td> - <td>9.60%</td> - <td>10.36%</td> - </tr> - <tr> - <td></td> - <td>7.96%</td> - <td>7.59%</td> - <td>7.75%</td> - </tr> - <tr> - <td>GSE</td> - <td>1.90%</td> - <td>3.84%</td> - <td>2.98%</td> - </tr> - <tr> - <td>cloudflare</td> - <td>2.44%</td> - <td>2.48%</td> - <td>2.46%</td> - </tr> - <tr> - <td>LiteSpeed</td> - <td>1.02%</td> - <td>1.63%</td> - <td>1.36%</td> - </tr> - <tr> - <td>openresty</td> - <td>1.22%</td> - <td>1.36%</td> - <td>1.30%</td> - </tr> - <tr> - <td>…</td> - <td>…</td> - <td>…</td> - <td>…</td> - </tr> - </tbody> - </table> - <p><strong>Figure 6 - Servers used for HTTP/1.1 or lower</strong></p> - <p> - Some of this will be non-HTTPS traffic that would use HTTP/1.1 even if the - server supported HTTP/2, but a bigger issue is those that do not support - HTTP/2. In these stats we see a much greater share for Apache and IIS which - are likely running older versions. For Apache in particular it is often not - easy to add HTTP/2 support to an existing installation as Apache does not - provide an official repository to install this from. This often means - resorting to compiling from source or trusting a third-party repo - neither - of which is particularly appealing to many administrators. Only the latest - versions of Linux distributions (RHEL and CentOS 8, Ubuntu 18 and Debian 9) - come with a version of Apache which supports HTTP/2 and many servers are not - running those yet. On the Microsoft side only Windows Server 2016 and above - supports HTTP/2 so again those running older versions cannot support this in - IIS. Merging these two stats together we can see the percentage of installs, - of each server, that uses HTTP/2: - </p> - <table> - <thead> - <tr> - <th id="server">Server</th> - <th id="desktop">Desktop</th> - <th id="mobile">Mobile</th> - </tr> - </thead> - <tbody> - <tr> - <td>cloudflare</td> - <td>85.40%</td> - <td>83.46%</td> - </tr> - <tr> - <td>LiteSpeed</td> - <td>70.80%</td> - <td>63.08%</td> - </tr> - <tr> - <td>openresty</td> - <td>51.41%</td> - <td>45.24%</td> - </tr> - <tr> - <td>nginx</td> - <td>49.23%</td> - <td>46.19%</td> - </tr> - <tr> - <td>GSE</td> - <td>40.54%</td> - <td>35.25%</td> - </tr> - <tr> - <td></td> - <td>25.57%</td> - <td>27.49%</td> - </tr> - <tr> - <td>Apache</td> - <td>18.09%</td> - <td>18.56%</td> - </tr> - <tr> - <td>Microsoft-IIS</td> - <td>14.10%</td> - <td>13.47%</td> - </tr> - <tr> - <td>…</td> - <td>…</td> - <td>…</td> - </tr> - </tbody> - </table> - <p> - <strong - >Figure 7 - percentage installs of each server used to provide - HTTP/2</strong - > - </p> - <p> - It's clear Apache and IIS fall way behind with 18% and 14% of their - installed based supporting HTTP/2, and this has to be at least in part, a - consequence of it being more difficult to upgrade them. A full operating - system upgrade is often required for many to get this support easily. - Hopefully this will get easier as new versions of operating systems become - the norm. None of this is a comment on the HTTP/2 implementations here (<a - href="https://twitter.com/tunetheweb/status/988196156697169920?s=20" - >I happen to think Apache has one of the best implementations</a - >), but more in the ease of enabling HTTP/2 in each of these servers - or - lack thereof. - </p> - <h2 id="impact-of-http2">Impact of HTTP/2</h2> - <p> - The impact of HTTP/2 is a much more difficult to measure statistic, - especially using the HTTP Archive methodology. Ideally sites should be - crawled with both HTTP/1.1 and HTTP/2 and the difference measured but that - is not possible with the statistics we are investigating here. Additionally, - measuring whether the average HTTP/2 site is faster than the average - HTTP/1.1 site introduces too many other variables that I feel requires a - more exhaustive study than we can cover here. - </p> - <p> - One impact that can be measured is in the changing use of HTTP now we are in - an HTTP/2 world. Multiple connections were a work around with HTTP/1.1 to - allow a limited form of parallelization, but this is in fact the opposite of - what usually works best with HTTP/2. A single connection reduces the - overhead of TCP setup, TCP slow start, HTTPS negotiation and also allows the - potential of cross-request prioritization. The HTTP Archive measures the - number of TCP connections per page and that is dropping steadily as more - sites support HTTP/2 and use its single connection instead of 6 separate - connections: - </p> - <p> - <img - src="https://github.com/HTTPArchive/almanac.httparchive.org/raw/master/src/static/images/2019/20_HTTP_2/TCPconnections.png" - alt="" - /> - </p> - <p> - <strong - >Figure 8 - - <a href="https://httparchive.org/reports/state-of-the-web#tcp" - >TCP connections per page</a - ></strong - > - </p> - <p> - Bundling assets to obtain fewer requests was another HTTP/1.1 workaround - that went by many names: bundling, concatenation, packaging, spriting, … - etc. It is less necessary when using HTTP/2 as there is less overhead with - requests but it should be noted that requests are not free in HTTP/2 and - <a href="https://engineering.khanacademy.org/posts/js-packaging-http2.htm" - >those that experimented with removing bundling completely have noticed a - loss in performance</a - >. Looking at the number of requests loaded by page over time, we do see a - slight decrease in requests, rather than the expected increase: - </p> - <p> - <img - src="https://github.com/HTTPArchive/almanac.httparchive.org/raw/master/src/static/images/2019/20_HTTP_2/numresources.png" - alt="" - /> - </p> - <p> - <strong - >Figure 9 - - <a href="https://httparchive.org/reports/state-of-the-web#reqTotal" - >Total Requests per page</a - ></strong - > - </p> - <p> - This low rate of change can perhaps be attributed to the aforementioned - observations that bundling cannot be removed (at least completely) without a - negative performance impact and that many build tools currently bundle for - historical reasons based on HTTP/1.1 recommendations. It is also likely that - many sites may not be willing to penalize HTTP/1.1 users by undoing their - HTTP/1.1 performance hacks just yet, or at least that they do not have the - confidence (or time!) to feel this is worthwhile. That the number of - requests is staying roughly static, and against the background of an ever - increasing <a href="#">page weight</a> is interesting though perhaps not - really related to HTTP/2. - </p> - <h2 id="http2-push">HTTP/2 Push</h2> - <p> - HTTP/2 push has a mixed history despite being a much-hyped new feature of - HTTP/2. The other features were basically under the hood performance - improvements, but push was a brand-new concept that completely broke the - single request to single response nature of HTTP up until then. It allowed - extra responses to be returned: when you asked for the web page, the server - could respond with the HTML page as usual, but then also send you the - critical CSS and JavaScript, thus avoiding any additional round trips for - certain resources. It would in theory allow us to stop inlining CSS and - JavaScript into our HTML and yet still get the same performance gains of - doing it. After solving that, it could potentially lead to all sorts of new - and interesting use cases. - </p> - <p> - The reality has been… well, a bit disappointing. HTTP/2 push has proved much - harder than originally envisaged to use effectively. Some of this has been - due to - <a href="https://jakearchibald.com/2017/h2-push-tougher-than-i-thought/" - >the complexity of how HTTP/2 push works</a - >, and the implementation issues due to that. A bigger concern is that push - can quite easily cause, rather than solve, performance issues. Over-pushing - is a real risk. Often the browser is in the best place to decide - <em>what</em> to request, and just as crucially <em>when</em> to request it - but HTTP/2 push puts that responsibility on the server. Pushing resources - that a browser already has in its cache, is a waste of bandwidth (though in - my opinion so is inlining CSS but that gets must less of a hard time about - that than HTTP/2 push!). - <a - href="https://lists.w3.org/Archives/Public/ietf-http-wg/2019JanMar/0033.html" - >Proposals to inform the server about the status of the browser cache have - stalled</a - > - especially on privacy concerns. Even without that problem, there are other - potential issues if push is not used correctly. For example, pushing large - images and therefore holding up the sending of critical CSS and JavaScript - will lead to slower websites than if you'd not pushed at all! - </p> - <p> - There has also been very little evidence to date that push, even when - implemented correctly, results in the performance increase it promised. This - is an area that again the HTTP Archive is not best placed to answer, due to - the nature of how it runs (a month crawl of popular sites using Chrome in - one state) so we won't delve into it too much here, but suffice to say that - the performance gains are far from clear cut and the potential problems are - real. - </p> - <p>Putting that aside let's look at the usage of HTTP/2 push:</p> - <table> - <thead> - <tr> - <th id="client">Client</th> - <th id="sites_using_http/2_push">Sites Using HTTP/2 Push</th> - <th id="sites_using_http/2_push_(%)">Sites Using HTTP/2 Push (%)</th> - </tr> - </thead> - <tbody> - <tr> - <td>Desktop</td> - <td>22,581</td> - <td>0.52%</td> - </tr> - <tr> - <td>Mobile</td> - <td>31,452</td> - <td>0.59%</td> - </tr> - </tbody> - </table> - <p><strong>Figure 10 - Sites using HTTP/2 push</strong></p> - <p> - These status show that the uptick of HTTP/2 push is very low - most likely - because of the issues described previously. However, when sites do use push, - then tend to use it a lot rather than for one or two assets as shown in - Figure 11: - </p> - <table> - <thead> - <tr> - <th id="client">Client</th> - <th id="avg_pushed_requests">Avg Pushed Requests</th> - <th id="avg_kb_pushed">Avg KB Pushed</th> - </tr> - </thead> - <tbody> - <tr> - <td>Desktop</td> - <td>7.86</td> - <td>162.38</td> - </tr> - <tr> - <td>Mobile</td> - <td>6.35</td> - <td>122.78</td> - </tr> - </tbody> - </table> - <p><strong>Figure 11 - How much is pushed when it is used</strong></p> - <p> - This is a concern as previous advice has been to be conservative with push - and to - <a - href="https://docs.google.com/document/d/1K0NykTXBbbbTlv60t5MyJvXjqKGsCVNYHyLEXIxYMv0/edit" - >"push just enough resources to fill idle network time, and no more"</a - >. The above statistics suggest many resources, of a significant combined - size are pushed. Looking at what is pushed we see the data in Figure 12: - </p> - <p> - <img - src="https://github.com/HTTPArchive/almanac.httparchive.org/raw/master/src/static/images/2019/20_HTTP_2/whatpushisusedfor.png" - alt="" - /> - </p> - <p><strong>Figure 12 - What asset types is push used for?</strong></p> - <p> - JavaScript and then CSS are the overwhelming majority of pushed items, both - by volume and by bytes. After this there is a rag tag assortment of images, - fonts, data, …etc. At the tail end we see around 100 sites pushing video - - which may be intentional or may be a sign of over-pushing the wrong types of - assets! - </p> - <p> - One concern raised by some, is that HTTP/2 implementations have repurposed - the preload HTTP link header as a signal to push. One of the most popular - uses of the preload <a href="#">resource hint</a> is to inform the browser - of late-discovered resources like fonts and images, that the browser will - not see until the CSS for example has been requested, downloaded and parsed. - If these are now pushed based on that header, there was a concern that - reusing this may result in a lot of unintended pushes. However, the relative - low usage of fonts and images may mean that risk is not being seen as much - as was feared. <code><link rel="preload" ... ></code> tags are often - used in the HTML rather than HTTP link headers and the meta tags are not a - signal to push. Statistics in the <a href="#">resource hint</a> chapter show - that less than 1% of sites use the preload HTTP link header, and about the - same amount use preconnection which has no meaning in HTTP/2, so this would - suggest this is not so much of an issue. Though there are a number of fonts - and other assets being pushed, which may be a signal of this. As a counter - argument to those complaints, if an asset is important enough to preload, - then it could be argued these assets should be pushed if possible as - browsers treat a preload hints as very high priority requests anyway. Any - performance concern is therefore (again arguably) at the overuse of preload, - rather than the resulting HTTP/2 push that happens because of this. - </p> - <p> - To get around this unintended push, you can provide the - <code>nopush</code> attribute in your preload header: - </p> - <pre><code>link: </assets/jquery.js>; rel=preload; as=script; nopush</code></pre> - <p> - It looks like 5% of preload HTTP headers do make use of this attribute, - which is higher than I would have expected as I would have considered this a - niche optimization. Then again, so is the use of preload HTTP headers and/or - HTTP/2 push itself! - </p> - <h2 id="issues">Issues</h2> - <p> - HTTP/2 is mostly a seamless upgrade that, once your server supports it, you - can switch on with no need to change your website or application. Of course, - you can optimize for HTTP/2 or stop using HTTP/1.1 workarounds as much, but - in general a site will usually work without needing any changes - but just - be faster. There are a couple of gotchas to be aware of however that can - impact any upgrade and some sites have found these out the hard way. - </p> - <p> - One cause of issues in HTTP/2 is the poor support of HTTP/2 prioritization. - This feature allows multiple requests in progress to make the appropriate - use of the connection. This is especially important since HTTP/2 has - massively increased the number of requests that can be running on the same - connection. 100 or 128 parallel requests limits are common in server - implementations. Previously the browser had a max of 6 connections per - domain and so used its skill and judgement to decide how best to use those - connections. Now it rarely needs to queue and can send all requests as soon - as it knows about them. This then can lead to the bandwidth being "wasted" - on lower priority requests while critical requests are delayed (and - incidentally - <a - href="https://www.lucidchart.com/techblog/2019/04/10/why-turning-on-http2-was-a-mistake/" - >can also lead to swamping your backend server with more requests than it - is used to!</a - >). HTTP/2 has a complex prioritization model (too complex many say - hence - why it is being reconsidered for HTTP/3!) but few servers honor that - properly. This can be because their HTTP/2 implementations are not up to - scratch or because of so called <em>bufferbloat</em> where the responses are - already en route before the server realizes there is a higher priority - request. Due to the varying nature of servers, TCP stacks and locations it - is difficult to measure this for most sites, but with CDNs this should be - more consistent. - <a href="https://twitter.com/patmeenan">Patrick Meenan</a> created - <a href="https://github.com/pmeenan/http2priorities/tree/master/stand-alone" - >an example test page</a - > - which deliberately tries to download a load of low-priority, off-screen, - images, before requesting some high priority on-screen images. A good HTTP/2 - server should be able to recognize this and send the high priority images - shortly after requested, at the expense of the lower priority images. A poor - HTTP/2 server will just respond in the request order and ignore any priority - signals. <a href="https://twitter.com/AndyDavies">Andy Davies</a> has - <a href="https://github.com/andydavies/http2-prioritization-issues" - >a page tracking status of various CDNs for Patrick's test</a - >. The HTTP Archive identifies when a CDN is used as part of its crawl and - merging these two datasets that gives us the results shown in Figure 13: - </p> - <table> - <thead> - <tr> - <th id="cdn">CDN</th> - <th id="prioritizes_correctly?">Prioritizes Correctly?</th> - <th id="desktop">Desktop</th> - <th id="mobile">Mobile</th> - <th id="both">Both</th> - </tr> - </thead> - <tbody> - <tr> - <td>Not using CDN</td> - <td>Unknown</td> - <td>57.81%</td> - <td>60.41%</td> - <td>59.21%</td> - </tr> - <tr> - <td>Cloudflare</td> - <td>Pass</td> - <td>23.15%</td> - <td>21.77%</td> - <td>22.40%</td> - </tr> - <tr> - <td>Google</td> - <td>Fail</td> - <td>6.67%</td> - <td>7.11%</td> - <td>6.90%</td> - </tr> - <tr> - <td>Amazon CloudFront</td> - <td>Fail</td> - <td>2.83%</td> - <td>2.38%</td> - <td>2.59%</td> - </tr> - <tr> - <td>Fastly</td> - <td>Pass</td> - <td>2.40%</td> - <td>1.77%</td> - <td>2.06%</td> - </tr> - <tr> - <td>Akamai</td> - <td>Pass</td> - <td>1.79%</td> - <td>1.50%</td> - <td>1.64%</td> - </tr> - <tr> - <td></td> - <td>Unknown</td> - <td>1.32%</td> - <td>1.58%</td> - <td>1.46%</td> - </tr> - <tr> - <td>WordPress</td> - <td>Pass</td> - <td>1.12%</td> - <td>0.99%</td> - <td>1.05%</td> - </tr> - <tr> - <td>Sucuri Firewall</td> - <td>Fail</td> - <td>0.88%</td> - <td>0.75%</td> - <td>0.81%</td> - </tr> - <tr> - <td>Incapsula</td> - <td>Fail</td> - <td>0.39%</td> - <td>0.34%</td> - <td>0.36%</td> - </tr> - <tr> - <td>Netlify</td> - <td>Fail</td> - <td>0.23%</td> - <td>0.15%</td> - <td>0.19%</td> - </tr> - <tr> - <td>OVH CDN</td> - <td>Unknown</td> - <td>0.19%</td> - <td>0.18%</td> - <td>0.18%</td> - </tr> - </tbody> - </table> - <p> - <strong>Figure 13 - HTTP/2 prioritization support in common CDNs</strong> - </p> - <p> - This shows that a not insignificant portion of traffic is subject to the - identified issue. How much of a problem this is, depends on exactly how your - page loads and whether high priority resources are discovered late or not - for your site, but it does show another complexity to take into - considerations. - </p> - <p> - Another issue is with the <code>upgrade</code> HTTP header being used - incorrectly. Web servers can respond to requests with an - <code>upgrade</code> HTTP header suggesting that it supports a better - protocol that the client might wish to use (e.g. advertise HTTP/2 to a - client only using HTTP/1.1). You might think this would be useful as a way - of informing the browser it supports HTTP/2 but since browsers only support - HTTP/2 over HTTPS and since use of HTTP/2 can be negotiated through the - HTTPS handshake, the use of this <code>upgrade</code> header for advertising - HTTP/2 is pretty limited (to browsers at least). Worse than that, is when a - server sends an upgrade header in error. This could be because an HTTP/2 - supporting backend server is sending the header and then an HTTP/1.1-only - edge server is blindly forwarding to the client. Apache emits the - <code>upgrade</code> header when mod_http2 is enabled but HTTP/2 is not - being used, and a nginx instance sitting in front of such an Apache happily - forwards this header even when nginx does not support HTTP/2. This false - advertising then leads to clients trying (and failing!) to use HTTP/2 as - they are advised to. 108 site use HTTP/2 and yet suggest upgrading to HTTP/2 - in this <code>upgrade</code> header. A further 12,767 sites on desktop - (15,235 on mobile) suggest upgrading an HTTP/1.1 connection delivered over - HTTPS to HTTP/2 when it's clear this was not available, or it would have - been used already. These are a small minority of the 4.3 million sites - crawled on desktop and 5.3 million sites crawled on mobile for these stats - but it shows that this was still an issue affecting a number of sites out - there. Browsers handle this inconsistently with Safari in particular - attempting to upgrade and then getting itself in a mess and refusing to - display the site at all. All this is before we get into sites recommending - upgrading to <code>http1.0</code>, <code>http://1.1</code> or even - <code>-all,+TLSv1.3,+TLSv1.2</code> (clearly some typos in web server - configurations going on here!). - </p> - <p> - There are further implementation issues we could look at. For example, - HTTP/2 is much stricter about HTTP header names, rejecting the whole request - if you respond with spaces, colons or other invalid HTTP header names. The - header names are also converted to lowercase which catches some by surprise - if their application assumes a certain capitalization (which was never - guaranteed previously as - <a href="https://tools.ietf.org/html/rfc7230#section-3.2" - >HTTP/1.1 specifically states the header names are case insensitive</a - >, but still some have depended on this). The HTTP Archive could potentially - be used to identify these issues as well, though some of them will not be - apparent on the home page so we did not delve into that this year. - </p> - <h2 id="http3">HTTP/3</h2> - <p> - The world does not stand still and despite HTTP/2 not having even reached - its official 5th birthday, people are already seeing it as old news and - getting more excited about its successor: HTTP/3. HTTP/3 builds on the - concepts of HTTP/2 but moves it from working over TCP connections that HTTP - has always used to a UDP-based protocol called QUIC. This allows us to fix - one edge case where HTTP/2 is slower then HTTP/1.1, when there is high - packet loss and the guaranteed nature of TCP holds up all streams and - throttles back all streams. It also allows us to address some TCP and HTTPS - inefficiencies such as consolidating on one handshake for both, and - supporting many ideas for TCP that have proven hard to implement in real - life (TCP fast open, 0-RTT, …etc.). HTTP/3 also cleans up some overlap - between TCP and HTTP/2 (e.g. flow control being implemented in both layers) - but conceptually it is very similar to HTTP/2. Web developers who understand - and have optimized for HTTP/2 should have to make no further changes for - HTTP/3. Server operators will have more work to do however as the - differences between TCP and QUIC are much more groundbreaking. They will - make implementation harder so the roll out of HTTP/3 may take considerably - longer than HTTP/2 and initially be limited to those with certain expertise - in the field (e.g. CDNs). - </p> - <p> - QUIC has been implemented by Google for a number of years and it is now - undergoing a similar standardization process that SDPY did on its way to - HTTP/2. At the end of 2018 it was decided to name the HTTP part of QUIC as - HTTP/3 (in Google/s version of QUIC is was simply known as HTTP/2 even - though it was not exactly the same as regular HTTP/2). QUIC has ambitions - beyond just HTTP but for the moment it is the use case being worked on. Just - as this chapter was being written, - <a href="https://blog.cloudflare.com/http3-the-past-present-and-future/" - >Cloudflare, Chrome and Firefox all announced HTTP/3 support</a - > - despite the fact that HTTP/3 is still not formally complete or approved as a - standard yet. This is welcome as QUIC support has been somewhat lacking - outside of Google until recently and definitely lags SPDY and HTTP/2 support - from a similar stage of standardization. - </p> - <p> - Because HTTP/3 uses QUIC over UDP rather than TCP it makes the discovery of - HTTP/3 support a bigger challenge than HTTP/2 discovery. With HTTP/2 we can - mostly use the HTTPS handshake, but as HTTP/3 is on a completely different - connection that is not an option here. HTTP/2 did also use the - <code>upgrade</code> HTTP header to inform of HTTP/2 support, and although - that was not that useful for HTTP/2, a similar mechanism has been put in - place for QUIC that is more useful. The <em>alternative services</em> HTTP - header (<code>alt-svc</code>) advertises alternative protocols that can be - used on completely different connections (as opposed to alternative - protocols that can be used on this connection - which is what the - <code>upgrade</code> HTTP header is used for). Analysis of this header shows - that 7.67% of desktop sites and 8.38% of mobile sites already support QUIC - (which roughly represents Google percentage of traffic unsurprisingly enough - as it has been using this for a while), and 0.04% are already supporting - <code>h3</code> (meaning HTTP/3) in this field. I would imagine by next - year’s Almanac this number will have increased significantly. - </p> - <h2 id="conclusion">Conclusion</h2> - <p> - This analysis of the available statistics in HTTP Archive has shown what - many of us in the HTTP community were already aware of: HTTP/2 is here and - proving very popular. It is already the dominant protocol in terms of number - of request but has not quite overtaken HTTP/1.1 in terms of number of sites - supported. The long tail of the internet means that it often takes an - exponentially longer time to make noticeable gains on the less - well-maintained sites than on the high profile, high volume sites. - </p> - <p> - We've also talked about how it is (still!) not easy to get HTTP/2 support in - some installations. Server developers, operating system distributors and end - customers all have a part to play in pushing to make that easier. Tying - software to operating systems always lengthens deployment time - and in fact - one of the very reasons for QUIC is to break a similar barrier with - deploying TCP changes. In many instances there is no real reason to tie web - server versions to operating systems. Apache (to use one of the more popular - examples) will run with HTTP/2 support in older operating systems but - getting an up to date version on to the server should not require the - expertise or risk it currently does. Nginx does very well here hosting - repositories for the common Linux flavors to make installation easier and if - the Apache team (or the Linux distribution vendors) do not offer something - similar, then I can only see Apache's usage continuing to shrink as it - struggles to hold relevance and shake its reputation as old and slow - based - on older installs - even though up to date versions have one of the best - HTTP/2 implementations. I see that as less of an issue for IIS since it is - usually the preferred web server on the Windows side. - </p> - <p> - Other than that, HTTP/2 has been a relatively easy upgrade path - which is - why it has the strong uptake it has already seen. For the most part, it is a - painless switch on and therefore, for most, it has turned out to be a - hassle-free performance increase that requires little thought once your - server supports it. The devil is in the details though (as always), and - small differences between server implementations can result in better or - worse HTTP/2 usage and ultimately end user experience. There have also been - a number of bugs and even - <a - href="https://github.com/Netflix/security-bulletins/blob/master/advisories/third-party/2019-002.md" - >security issues</a - >, as is to be expected with any new protocol. Ensuring you are using a - strong, up to date, well maintained implementation of any newish protocol - like HTTP/2 will ensure you stay on top of these issues. However, that can - take expertise and managing. The roll out of QUIC and HTTP/3 will likely be - even more complicated and require more expertise. Perhaps this is best left - to third party service providers like CDNs who have this expertise and can - give your site easy access to these features? However, even when left to the - experts, this is not a sure thing (as the prioritization statistics show), - but if you choose your server provider wisely and engage with them on what - your priorities are, then it should be an easier implementation. And on that - note it would be great if the CDNs prioritized the issue highlighted above - (pun definitely intended!), though I suspect with the advent of a new - prioritization method in HTTP/3, many will hold tight. The next year will - prove yet more interesting times in the HTTP world. - </p> -</section> {% endblock %} diff --git a/src/templates/en/2019/chapters/markup.html b/src/templates/en/2019/chapters/markup.html index faca30ddccc..c8179dbbfee 100644 --- a/src/templates/en/2019/chapters/markup.html +++ b/src/templates/en/2019/chapters/markup.html @@ -1,3 +1,5 @@ +{% extends "en/2019/base_chapter.html" %} + <!--{# IMPORTANT! - `chapter.html` is a "template for templates" used by the `generate_chapters.js` script, hence the strange template syntax (eg, mixing ejs and jinja syntax) @@ -8,820 +10,910 @@ - make changes to the markdown content directly (`src/content/<lang>/<year>/<chapter>.md`) because any changes to the chapter templates will be overwritten by the generation script #}--> -{% extends "en/2019/base_chapter.html" %} {% block styles %} {{ super() }} -<link rel="stylesheet" href="/static/css/chapter.css" /> -{% endblock %} {% set metadata = +{% set metadata = {"part_number":"I","chapter_number":3,"title":"Markup","authors":["bkardell"],"reviewers":["zcorpan"," tomhodgins"," matthewp"]} %} {% block main %} -<aside> - <ul> - <li> - <a href="#methodology">Methodology</a> - </li> - - <li> - <a href="#top-elements-and-general-info">Top elements and general info</a> - +<article id="chapter" class="main"> + <nav class="index"> + <div class="index-box floating-card"> + <h2 class="header">Index</h2> + <h2 class="header-mobile">Index</h2> <ul> <li> - <a href="#elements-per-page">Elements per page</a> + <a href="#methodology">Methodology</a> </li> - </ul> - </li> - - <li> - <a href="#custom-elements">Custom elements?</a> - </li> - <li> - <a href="#perspective-on-value-and-usage" - >Perspective on Value and Usage</a - > - </li> + <li> + <a href="#top-elements-and-general-info" + >Top elements and general info</a + > - <li> - <a href="#lots-of-data-real-dom-on-the-real-web" - >Lots of data: Real DOM on the Real Web</a - > + <ul> + <li> + <a href="#elements-per-page">Elements per page</a> + </li> + </ul> + </li> - <ul> <li> - <a href="#products-and-libraries-and-their-custom-markup" - >Products (and libraries) and their custom markup</a - > + <a href="#custom-elements">Custom elements?</a> </li> <li> - <a href="#common-use-cases-and-solutions" - >Common use cases and solutions</a + <a href="#perspective-on-value-and-usage" + >Perspective on Value and Usage</a > </li> <li> - <a href="#in-summary">In Summary</a> + <a href="#lots-of-data-real-dom-on-the-real-web" + >Lots of data: Real DOM on the Real Web</a + > + + <ul> + <li> + <a href="#products-and-libraries-and-their-custom-markup" + >Products (and libraries) and their custom markup</a + > + </li> + + <li> + <a href="#common-use-cases-and-solutions" + >Common use cases and solutions</a + > + </li> + + <li> + <a href="#in-summary">In Summary</a> + </li> + </ul> </li> </ul> - </li> - </ul> -</aside> + </div> + </nav> -<section class="main"> - <h1 class="chapter-title">{{ metadata.get('title') }}</h1> - <img - src="/static/images/2019/{{ get_chapter_image_dir(metadata) }}/hero_lg.jpg" - class="chapter-hero" - /> - <p> - In 2005, Ian "Hixie" Hickson posted - <a - href="https://web.archive.org/web/20060203035414/http://code.google.com/webstats/index.html" - >some analysis of markup data</a - > - building upon various previous work. Much of this work aimed to investigate - class names to see if there were common informal semantics that were being - adopted by developers which it might make sense to standardize upon. Some of - this research helped inform new elements in HTML5. - </p> - <p> - 14 years later, it's time to take a fresh look. Since then, we've also had - the introduction of Custom Elements and the - <a href="https://extensiblewebmanifesto.org/">Extensible Web Manifesto</a> - encouraging that we find better ways to pave the cowpaths by allowing - developers to explore the space of elements themselves and allow standards - bodies to<a - href="https://bkardell.com/blog/Dropping-The-F-Bomb-On-Standards.html" - > - act more like dictionary editors</a - >. Unlike CSS class names which might be used for anything, we can be far - more certain that authors who used a non-standard <em>element</em> really - intended this to be an element. - </p> - <p> - As of July 2019, the HTTP Archive has begun collecting all used - <em>element</em> names in the DOM for about 4.4 million desktop home pages, - and about 5.3 million mobile home pages which we can now begin to research - and dissect. - </p> - <p> - This crawl encountered - <em>over 5000 distinct non-standard element names</em> in these pages, so we - capped the total distinct number of elements that we count to the 'top' - (explained below) 5048. - </p> - <h2 id="methodology">Methodology</h2> - <p> - Names of elements on each page were collected from the DOM itself, post - initial run of JavaScript. - </p> - <p> - Looking at a raw frequency count isn't especially helpful, even for standard - elements: About 25% of all elements encountered are - <code><div></code>. About 17% are <code><a></code>, about 10.6% - are <code><span></code> -- and those are the only elements that - account for more than 10% of occurrences. Languages are - <a href="https://www.youtube.com/watch?v=fCn8zs912OE">generally like this</a - >, a small number of terms are astoundingly used by comparison. Further, - when we start looking at non-standard elements for uptake, this would be - very misleading as one site could use a certain element a thousand times and - thus make it look artificially very popular. - </p> - <p> - Instead, as in Hixie's original study, what we will look at is how many - sites include each element at least once in their homepage (Note: This is, - itself, not without some potential biases. Popular products can be used by - several sites, which introduce non-standard markup, even 'invisibly' to - individual authors. Thus, care must be taken to acknowledge that usage - doesn't necessarily imply direct author knowledge and conscious adoption as - much as it does the servicing of a common need, in a common way. During our - research, we found several examples of this, some we will call out.) - </p> - <h2 id="top-elements-and-general-info">Top elements and general info</h2> - <p> - In 2005, Hixie's survey listed the top few most commonly used elements on - pages. The top 3 were <code>html</code>, <code>head</code> and - <code>body</code> which he noted as interesting because they are optional - and created by the parser if omitted. Given that we use the post-parsed DOM, - they'll show up universally in our data. Thus, we'll begin with the 4th most - used element. Below is a comparison of the data from then to now (I've - included the frequency comparison here as well just for fun). - </p> - <table> - <tr> - <td>2005 (per site)</td> - <td>2019 (per site)</td> - <td>2019 (frequency)</td> - </tr> - <tr> - <td>title</td> - <td>title</td> - <td>div</td> - </tr> - <tr> - <td>a</td> - <td>meta</td> - <td>a</td> - </tr> - <tr> - <td>img</td> - <td>a</td> - <td>span</td> - </tr> - <tr> - <td>meta</td> - <td>div</td> - <td>li</td> - </tr> - <tr> - <td>br</td> - <td>link</td> - <td>img</td> - </tr> - <tr> - <td>table</td> - <td>script</td> - <td>script</td> - </tr> - <tr> - <td>td</td> - <td>img</td> - <td>p</td> - </tr> - <tr> - <td>tr</td> - <td>span</td> - <td>option</td> - </tr> - </table> + <section class="content"> + <section class="body"> + <h1> + <div class="decorative-line"></div> + {{ metadata.get('title') }} + <div class="decorative-line-mobile"></div> + </h1> + <img + src="/static/images/2019/{{ get_chapter_image_dir(metadata) }}/hero_lg.jpg" + class="content-banner" + /> + <p> + In 2005, Ian "Hixie" Hickson posted + <a + href="https://web.archive.org/web/20060203035414/http://code.google.com/webstats/index.html" + >some analysis of markup data</a + > + building upon various previous work. Much of this work aimed to + investigate class names to see if there were common informal semantics + that were being adopted by developers which it might make sense to + standardize upon. Some of this research helped inform new elements in + HTML5. + </p> + <p> + 14 years later, it's time to take a fresh look. Since then, we've also + had the introduction of Custom Elements and the + <a href="https://extensiblewebmanifesto.org/" + >Extensible Web Manifesto</a + > + encouraging that we find better ways to pave the cowpaths by allowing + developers to explore the space of elements themselves and allow + standards bodies to<a + href="https://bkardell.com/blog/Dropping-The-F-Bomb-On-Standards.html" + > + act more like dictionary editors</a + >. Unlike CSS class names which might be used for anything, we can be + far more certain that authors who used a non-standard + <em>element</em> really intended this to be an element. + </p> + <p> + As of July 2019, the HTTP Archive has begun collecting all used + <em>element</em> names in the DOM for about 4.4 million desktop home + pages, and about 5.3 million mobile home pages which we can now begin to + research and dissect. + </p> + <p> + This crawl encountered + <em>over 5000 distinct non-standard element names</em> in these pages, + so we capped the total distinct number of elements that we count to the + 'top' (explained below) 5048. + </p> + <h2 id="methodology">Methodology</h2> + <p> + Names of elements on each page were collected from the DOM itself, post + initial run of JavaScript. + </p> + <p> + Looking at a raw frequency count isn't especially helpful, even for + standard elements: About 25% of all elements encountered are + <code><div></code>. About 17% are <code><a></code>, about + 10.6% are <code><span></code> -- and those are the only elements + that account for more than 10% of occurrences. Languages are + <a href="https://www.youtube.com/watch?v=fCn8zs912OE" + >generally like this</a + >, a small number of terms are astoundingly used by comparison. Further, + when we start looking at non-standard elements for uptake, this would be + very misleading as one site could use a certain element a thousand times + and thus make it look artificially very popular. + </p> + <p> + Instead, as in Hixie's original study, what we will look at is how many + sites include each element at least once in their homepage (Note: This + is, itself, not without some potential biases. Popular products can be + used by several sites, which introduce non-standard markup, even + 'invisibly' to individual authors. Thus, care must be taken to + acknowledge that usage doesn't necessarily imply direct author knowledge + and conscious adoption as much as it does the servicing of a common + need, in a common way. During our research, we found several examples of + this, some we will call out.) + </p> + <h2 id="top-elements-and-general-info">Top elements and general info</h2> + <p> + In 2005, Hixie's survey listed the top few most commonly used elements + on pages. The top 3 were <code>html</code>, <code>head</code> and + <code>body</code> which he noted as interesting because they are + optional and created by the parser if omitted. Given that we use the + post-parsed DOM, they'll show up universally in our data. Thus, we'll + begin with the 4th most used element. Below is a comparison of the data + from then to now (I've included the frequency comparison here as well + just for fun). + </p> + <table> + <tr> + <td>2005 (per site)</td> + <td>2019 (per site)</td> + <td>2019 (frequency)</td> + </tr> + <tr> + <td>title</td> + <td>title</td> + <td>div</td> + </tr> + <tr> + <td>a</td> + <td>meta</td> + <td>a</td> + </tr> + <tr> + <td>img</td> + <td>a</td> + <td>span</td> + </tr> + <tr> + <td>meta</td> + <td>div</td> + <td>li</td> + </tr> + <tr> + <td>br</td> + <td>link</td> + <td>img</td> + </tr> + <tr> + <td>table</td> + <td>script</td> + <td>script</td> + </tr> + <tr> + <td>td</td> + <td>img</td> + <td>p</td> + </tr> + <tr> + <td>tr</td> + <td>span</td> + <td>option</td> + </tr> + </table> - <h3 id="elements-per-page">Elements per page</h3> - <p> - Comparing data to that of Hixie's report from 2005 shows that the average - size of DOM trees has gotten bigger. - </p> - <table> - <tr> - <td>2005</td> - <td>2019</td> - </tr> - <tr> - <td> - <img - src="/static/images/2019/03_Markup/hixie_elements_per_page.png" - width="300px" - /> - </td> - <td> - <iframe - width="600" - height="371" - seamless - frameborder="0" - scrolling="no" - src="https://docs.google.com/spreadsheets/d/e/2PACX-1vTbHgqcSepZye6DrCTpifFAUYxKT1hEO56585awyMips8oiPMLYu20GETuIE8mALkm814ObJyktEe2P/pubchart?oid=2141583176&format=interactive" - ></iframe> - </td> - </tr> - </table> + <h3 id="elements-per-page">Elements per page</h3> + <p> + Comparing data to that of Hixie's report from 2005 shows that the + average size of DOM trees has gotten bigger. + </p> + <table> + <tr> + <td>2005</td> + <td>2019</td> + </tr> + <tr> + <td> + <img + src="/static/images/2019/03_Markup/hixie_elements_per_page.png" + width="300px" + /> + </td> + <td> + <iframe + width="600" + height="371" + seamless + frameborder="0" + scrolling="no" + src="https://docs.google.com/spreadsheets/d/e/2PACX-1vTbHgqcSepZye6DrCTpifFAUYxKT1hEO56585awyMips8oiPMLYu20GETuIE8mALkm814ObJyktEe2P/pubchart?oid=2141583176&format=interactive" + ></iframe> + </td> + </tr> + </table> - <p> - And also that both the average number of types of elements per page has - increased, as well as the maximum numbers of unique elements that we - encounter… - </p> - <table> - <tr> - <td>2005</td> - <td>2019</td> - </tr> - <tr> - <td> - <img - src="/static/images/2019/03_Markup/hixie_element_types_per_page.png" - width="300px" - /> - </td> - <td> - <iframe - width="600" - height="371" - seamless - frameborder="0" - scrolling="no" - src="https://docs.google.com/spreadsheets/d/e/2PACX-1vTbHgqcSepZye6DrCTpifFAUYxKT1hEO56585awyMips8oiPMLYu20GETuIE8mALkm814ObJyktEe2P/pubchart?oid=1500675289&format=interactive" - ></iframe> - </td> - </tr> - </table> + <p> + And also that both the average number of types of elements per page has + increased, as well as the maximum numbers of unique elements that we + encounter… + </p> + <table> + <tr> + <td>2005</td> + <td>2019</td> + </tr> + <tr> + <td> + <img + src="/static/images/2019/03_Markup/hixie_element_types_per_page.png" + width="300px" + /> + </td> + <td> + <iframe + width="600" + height="371" + seamless + frameborder="0" + scrolling="no" + src="https://docs.google.com/spreadsheets/d/e/2PACX-1vTbHgqcSepZye6DrCTpifFAUYxKT1hEO56585awyMips8oiPMLYu20GETuIE8mALkm814ObJyktEe2P/pubchart?oid=1500675289&format=interactive" + ></iframe> + </td> + </tr> + </table> - <h2 id="custom-elements">Custom elements?</h2> - <p> - Most of the elements we recorded are custom (as in simply 'not standard'), - but discussing which elements are and are not custom can get a little - challenging. Written down in some spec or proposal somewhere are, actually, - quite a few elements. For purposes here, we considered 244 elements as - standard (though, some of them are deprecated or unsupported): - </p> - <ul> - <li>145 Elements from HTML</li> - <li>68 Elements from SVG</li> - <li>31 Elements from MathML</li> - </ul> - <p>In practice, we encountered only 214 of these:</p> - <ul> - <li>137 from HTML</li> - <li>54 from SVG</li> - <li>23 from MathML</li> - </ul> - <p> - In the desktop dataset we collected data for the top 4,834 non-standard - elements that we encountered. Of these: - </p> - <ul> - <li> - 155 (3.21%) are identifiable as very probable markup or escaping errors - (they contain characters in the parsed tag name which imply that the - markup is broken) - </li> - <li> - 341 (7.05%) use XML-style colon namespacing (though, as HTML, they don't - use actual XML namespaces) - </li> - <li>3207 (66.44%) are valid custom element names</li> - <li> - 1211 (25.05%) are in the global namespace (non-standard, having neither - dash, nor colon) + <h2 id="custom-elements">Custom elements?</h2> + <p> + Most of the elements we recorded are custom (as in simply 'not + standard'), but discussing which elements are and are not custom can get + a little challenging. Written down in some spec or proposal somewhere + are, actually, quite a few elements. For purposes here, we considered + 244 elements as standard (though, some of them are deprecated or + unsupported): + </p> + <ul> + <li>145 Elements from HTML</li> + <li>68 Elements from SVG</li> + <li>31 Elements from MathML</li> + </ul> + <p>In practice, we encountered only 214 of these:</p> + <ul> + <li>137 from HTML</li> + <li>54 from SVG</li> + <li>23 from MathML</li> + </ul> + <p> + In the desktop dataset we collected data for the top 4,834 non-standard + elements that we encountered. Of these: + </p> <ul> <li> - 216 of these we have flagged as *possible *typos as they are longer - than 2 characters and have a Levenshtein distance of 1 from some - standard element name like <code><cript></code>,<code - ><spsn></code - > - or <code><artice></code>. Some of these (like - <code><jdiv></code>), however, are certainly intentional. + 155 (3.21%) are identifiable as very probable markup or escaping + errors (they contain characters in the parsed tag name which imply + that the markup is broken) + </li> + <li> + 341 (7.05%) use XML-style colon namespacing (though, as HTML, they + don't use actual XML namespaces) + </li> + <li>3207 (66.44%) are valid custom element names</li> + <li> + 1211 (25.05%) are in the global namespace (non-standard, having + neither dash, nor colon) + <ul> + <li> + 216 of these we have flagged as *possible *typos as they are + longer than 2 characters and have a Levenshtein distance of 1 from + some standard element name like <code><cript></code>,<code + ><spsn></code + > + or <code><artice></code>. Some of these (like + <code><jdiv></code>), however, are certainly intentional. + </li> + </ul> </li> </ul> - </li> - </ul> - <p> - Additionally, 15% of desktop pages and 16% of mobile pages contain - deprecated elements (NOTE: A lot of this is very likely due to the use of - products rather than individual authors continuing to manually create this - markup.), here's the most common 10 and the number of pages they appear on - in each set… - </p> - <table> - <tr> - <td>element</td> - <td>mobile</td> - <td>desktop</td> - </tr> - <tr> - <td><code><center></code></td> - <td>7.96%</td> - <td>8.30%</td> - </tr> - <tr> - <td><code><font></code></td> - <td>7.80%</td> - <td>8.01%</td> - </tr> - <tr> - <td><code><marquee></code></td> - <td>1.20%</td> - <td>1.07%</td> - </tr> - <tr> - <td><code><nobr></code></td> - <td>0.55%</td> - <td>0.71%</td> - </tr> - <tr> - <td><code><big></code></td> - <td>0.47%</td> - <td>0.53%</td> - </tr> - <tr> - <td><code><frame></code></td> - <td>0.35%</td> - <td>0.39%</td> - </tr> - <tr> - <td><code><frameset></code></td> - <td>0.39%</td> - <td>0.35%</td> - </tr> - <tr> - <td><code><strike></code></td> - <td>0.27%</td> - <td>0.32%</td> - </tr> - <tr> - <td><code><noframes></code></td> - <td>0.27%</td> - <td>0.25%</td> - </tr> - </table> + <p> + Additionally, 15% of desktop pages and 16% of mobile pages contain + deprecated elements (NOTE: A lot of this is very likely due to the use + of products rather than individual authors continuing to manually create + this markup.), here's the most common 10 and the number of pages they + appear on in each set… + </p> + <table> + <tr> + <td>element</td> + <td>mobile</td> + <td>desktop</td> + </tr> + <tr> + <td><code><center></code></td> + <td>7.96%</td> + <td>8.30%</td> + </tr> + <tr> + <td><code><font></code></td> + <td>7.80%</td> + <td>8.01%</td> + </tr> + <tr> + <td><code><marquee></code></td> + <td>1.20%</td> + <td>1.07%</td> + </tr> + <tr> + <td><code><nobr></code></td> + <td>0.55%</td> + <td>0.71%</td> + </tr> + <tr> + <td><code><big></code></td> + <td>0.47%</td> + <td>0.53%</td> + </tr> + <tr> + <td><code><frame></code></td> + <td>0.35%</td> + <td>0.39%</td> + </tr> + <tr> + <td><code><frameset></code></td> + <td>0.39%</td> + <td>0.35%</td> + </tr> + <tr> + <td><code><strike></code></td> + <td>0.27%</td> + <td>0.32%</td> + </tr> + <tr> + <td><code><noframes></code></td> + <td>0.27%</td> + <td>0.25%</td> + </tr> + </table> + + <p> + Most of these can seem like very small numbers, but perspective matters. + </p> + <h2 id="perspective-on-value-and-usage"> + Perspective on Value and Usage + </h2> + <p> + In order to discuss numbers about the use of elements (standard, + deprecated or custom), we first need to establish some perspective. + </p> + <p> + The top 150 element names, counting the number of pages where they + appear, are shown in this chart: + </p> + <iframe + width="877" + height="588" + seamless + frameborder="0" + scrolling="no" + src="https://docs.google.com/spreadsheets/d/e/2PACX-1vTbHgqcSepZye6DrCTpifFAUYxKT1hEO56585awyMips8oiPMLYu20GETuIE8mALkm814ObJyktEe2P/pubchart?oid=1694360298&format=interactive" + ></iframe> + <p>Note how quickly use drops off.</p> + <p> + 11 elements occur in over 90% <code><html></code>, + <code><head></code>, <code><body></code>, + <code><title></code>, <code><meta></code>, + <code><a></code>,<code><div></code>, + <code><link></code>, <code><script></code>, + <code><img></code> and <code><span></code>. + </p> + <p> + Only 15 more elements occur in at least 50% of the home pages + (<code><ul></code>, <code><li></code>, + <code><p></code>, <code><style></code>, + <code><input></code>, <code><br></code>, + <code><form></code>, <code><h2></code>, + <code><h1></code>, <code><iframe></code>, + <code><h3></code>, <code><button></code>, + <code><footer></code>, <code><header></code>, + <code><nav></code> are the others). + </p> + <p>And only 40 more elements occur on more than 5% of pages.</p> + <p> + Even <code><video></code>, for example, doesn't make that cut. It + appears on only 4.21% of pages in the dataset (on desktop, only 3.03% on + mobile). While these numbers sound very low, 4.21% is actually + <em>quite</em> popular by comparison. In fact, only 98 elements occur on + more than 1% of pages. + </p> + <p> + It's interesting, then, to look at what the distribution of these + elements looks like and which ones have more than 1% use. Below is a + chart that shows the rank of each element and which category they fall + into. I've separated the data points into discrete sets simply so that + they can be viewed (otherwise there just aren't enough pixels to capture + all that data), but they represent a single 'line' of popularity - the + left-most being the most common, the right-most being the least common. + The arrow points to the end of elements that appear in more than 1% of + the pages. + </p> + <p> + (( TODO: there is a corresponding image in the google doc + <a + href="https://docs.google.com/document/d/16TY_pV-FyW35DzuvdlaOiENz6o6PWYpl_RviU-HW7Qc/edit" + >https://docs.google.com/document/d/16TY_pV-FyW35DzuvdlaOiENz6o6PWYpl_RviU-HW7Qc/edit</a + > + )) + </p> + <p> + You can observe two things here: First, that the set of elements that + have more than 1% use are not exclusively HTML. In fact, + <em>27 of the most popular 100 elements aren't even HTML</em> - they are + SVG! And there are + <em>non-standard tags at or very near that cutoff too</em>! Second, note + that a whole lot of HTML elements are used by less than 1% of pages. + </p> + <p> + So, are all of those elements used by less than 1% of pages "useless?". + Definitely not. This is why establishing perspective matters. There are + around + <a + href="https://www.websitehostingrating.com/internet-statistics-facts/" + >2 billion web sites on the web</a + >. If something appears on 0.1% of all websites in our dataset, we can + extrapolate that this represents perhaps <em>2 million web sites</em> in + the whole web. Even 0.01% extrapolates to a + <em>two hundred of thousand of sites</em>. This is also why removing + support for elements, even very old ones which we think aren't great + ideas, is a very rare occurrence: Breaking hundreds of thousands or + millions of sites just isn't a thing that browser vendors can do + lightly. + </p> + <p> + So, lots of elements, even the native ones, have less than 1% use and + are still very important and successful. <code><code></code>, for + example, is an element that I both use and encounter a lot. It's + definitely useful and important - and yet it is used on only 0.57% of + these pages. Part of this is skewed based on what we are measuring - + home pages are generally <em>less likely</em> to include certain kinds + of things (like <code><code></code> for example): They serve a + less general purpose than, for example, headings, paragraphs, links and + lists, however, the data is generally useful. + </p> + <p> + We also collected information about which pages contained an author + defined (not native) <code>.shadowRoot</code> -- About 0.22% of the + pages on desktop, and 0.15% on mobile. This might not sound like a lot, + but it is roughly 6.5k sites in the mobile dataset and 10k sites on the + desktop and is more than several HTML elements. + <code><summary></code> for example, has about equivalent use on + the desktop and it is the 146th most popular element. + </p> + <p> + <code><datalist></code> appeared in 0.04% of homepages, it is the + 201st most popular element. + </p> + <p> + In fact, over 15% of elements we're counting as defined by HTML are + outside the top 200 in the desktop dataset . + <code><meter></code> is the least popular "HTML5 era" element + (2004-2011, before HTML moved to a Living Standard model): It is around + the 1000th most popular element. <code><slot></code>, the most + recently introduced element (April 2016), is only around the 1400th most + popular element. + </p> + <h2 id="lots-of-data-real-dom-on-the-real-web"> + Lots of data: Real DOM on the Real Web + </h2> + <p> + With this perspective in mind about what use of native/standard features + looks like in the dataset, let's talk about the non-standard stuff. + </p> + <p> + You might expect that lots of elements we recorded are used only on a + single domain, but in fact, no element we're talking about in this list + of 5048 elements is used on only a single domain. The least number of + domains an element in our dataset appears in is 15. About a fifth of + them occur on more than 100 domains. About 7% occur on more than 1000 + domains. + </p> + <p> + To help analyze the data, I hacked together a + <a href="https://rainy-periwinkle.glitch.me">little tool with Glitch</a> + - where possible I link my observations to a page containing the data. + You can use this tool yourself, and please share a permalink back with + the <a href="https://twitter.com/HTTPArchive">@HTTPAchive</a> along with + your observations (Tommy Hodgins has also built a similar + <a href="https://github.com/tomhodgins/hade">CLI tool</a> which you can + use to explore). + </p> + <p>Let's look at some data…</p> + <h3 id="products-and-libraries-and-their-custom-markup"> + Products (and libraries) and their custom markup + </h3> + <p> + As in Hixie's original research, it seems that several of the extremely + popular ones have more to do with being a part of popular + <em>products than themselves being universally adopted</em>. Many of the + ones + <a + href="https://web.archive.org/web/20060203031245/http://code.google.com/webstats/2005-12/editors.html" + >Ian Hickson mentioned 14 years ago</a + > + seem to have dwindled, but not disappeared, but some are still pretty + huge. + </p> + <p> + Those he mentioned as being pervasive and created by + <a href="https://en.wikipedia.org/wiki/Claris_Home_Page" + >Claris Home Page</a + > + (whose last stable release was 21 years ago) still appeared on over 100 + domains. + <a + href="https://rainy-periwinkle.glitch.me/permalink/28b0b7abb3980af793a2f63b484e7815365b91c04ae625dd4170389cc1ab0a52.html" + ><code><x-claris-window></code>, for example still appears on + 130 mobile domains</a + > + (desktop is similar). Some of the + <code><actinic:*></code> elements he mentioned appear on even + more: + <a + href="https://rainy-periwinkle.glitch.me/permalink/30dfca0fde9fad9b2ec58b12cb2b0271a272fb5c8970cd40e316adc728a09d19.html" + ><code>actinic:basehref</code>, still shows up on 154 pages in the + desktop data</a + >. (These come from British e-commerce provider + <a href="https://www.oxatis.co.uk">Oxatis</a>). + </p> + <p> + Macromedia's elements seem to have largely disappeared, + <a + href="https://rainy-periwinkle.glitch.me/permalink/17d49e765c4f1bfef2a3bd183ee0961fe40f0623d2b9ddf885ee35e1f251d14c.html" + >only one appears at all on our list, and on only 22 domains</a + >, however Adobe's Go-Live tags like + <a + href="https://rainy-periwinkle.glitch.me/permalink/579abc77652df3ac2db1338d17aab0a8dc737b9d945510b562085d8522b18799.html" + ><code><csscriptdict></code></a + > + <a + href="https://rainy-periwinkle.glitch.me/permalink/579abc77652df3ac2db1338d17aab0a8dc737b9d945510b562085d8522b18799.html" + >still appear on 640 domains in the desktop dataset</a + >. + </p> + <p> + <a + href="https://rainy-periwinkle.glitch.me/permalink/bc8f154a95dfe06a6d0fdb099b6c8df61727b2289141a0ef16dc17b2b57d3068.html" + ><code><o:p></code> (created by Microsoft Office) still appears + in ~0.46% of desktop pages</a + > + (that's over 20k domains) and + <a + href="https://rainy-periwinkle.glitch.me/permalink/66f75e1fd2b8e62a1e77033601d9f65516df3ff8cb1896ce37fbdb932853d5c5.html" + >0.32% of mobile page</a + > + (more than a lot of standard HTML elements). + </p> + <p> + But there are plenty of newcomers that weren't in Hixie's original + report too, and with even bigger numbers… + </p> + <p> + <a + href="https://rainy-periwinkle.glitch.me/permalink/e8bf0130c4f29b28a97b3c525c09a9a423c31c0c813ae0bd1f227bd74ddec03d.html" + ><code><ym-measure></code> is used on more than 1% of pages + (both desktop and mobile)</a + > + -- that's <em>huge</em> -- putting it in the top 100. It's a tag + injected by Yandex's + <a href="https://metrica.yandex.com/about">Metrica</a> analytics + <a href="https://www.npmjs.com/package/yandex-metrica-watch">package</a + >. + </p> + <p> + <a + href="https://rainy-periwinkle.glitch.me/permalink/a532f18bbfd1b565b460776a64fa9a2cdd1aa4cd2ae0d37eb2facc02bfacb40c.html" + ><code><g:plusone></code> from Google's now defunct Google Plus + occurs on over 21k domains (both desktop and mobile)</a + >. + </p> + <p> + <a + href="https://rainy-periwinkle.glitch.me/permalink/2e2f63858f7715ef84d28625344066480365adba8da8e6ca1a00dfdde105669a.html" + ><code><fb:like></code> occurs on ~13.8k</a + > + (mobile, + <a + href="https://rainy-periwinkle.glitch.me/permalink/a9aceaee7fbe82b3156caf79f48d7ef6b42729bce637f6683dc6c287df52cd5b.html" + >12.8k on desktop</a + >) and + <a + href="https://rainy-periwinkle.glitch.me/permalink/5a964079ac2a3ec1b4f552503addd406d02ec4ddb4955e61f54971c27b461984.html" + ><code><fb:like-box></code> occurs on 7.8k</a + > + (mobile, + <a + href="https://rainy-periwinkle.glitch.me/permalink/cc56280bb2d659b4426050b0c135b5c15b8ea4f8090756b567c564dac1f0659b.html" + >7k on desktop</a + >) + </p> + <p> + And + <a + href="https://rainy-periwinkle.glitch.me/permalink/6997d689f56fe77e5ce345cfb570adbd42d802393f4cc175a1b974833a0e3cb5.html" + ><code><app-root></code> (generally a framework like Angular) + appears on 8.2k mobile sites</a + > + (<a + href="https://rainy-periwinkle.glitch.me/permalink/ee3c9dfbcab568e97c7318d9795b9ecbde0605f247b19b68793afc837796aa5c.html" + >8.5k on desktop</a + >). + </p> + <p> + Comparing these to a few of the native HTML elements that are below the + 5% bar, for perspective, looks something like this (note -- varies + slightly based on dataset). + </p> + <p> + (( TOOD: there is a corresponding image in the google doc + <a + href="https://docs.google.com/document/d/16TY_pV-FyW35DzuvdlaOiENz6o6PWYpl_RviU-HW7Qc/edit" + >https://docs.google.com/document/d/16TY_pV-FyW35DzuvdlaOiENz6o6PWYpl_RviU-HW7Qc/edit</a + > + )) + </p> + <p>You could draw interesting observations like these all day long.</p> + <p> + Here's one that's a little different: Productization causing popularity + is evident in outright errors as well. + <a + href="https://rainy-periwinkle.glitch.me/permalink/3214f840b6ae3ef1074291f60fa1be4b9d9df401fe0190bfaff4bb078c8614a5.html" + ><code><pclass="ddc-font-size-large"></code> was a parsed tag + name which occurred in our dataset in over 1000 sites</a + >. This was thanks to a missing space in a popular 'as a service' kind + of product. Happily, we reported this error during our research and it + was quickly fixed. + </p> + <p> + In his original paper, Hixie mentions that "The good thing, if we can be + forgiven for trying to remain optimistic in the face of all this + non-standard markup, is that at least these elements are all clearly + using vendor-specific names. This massively reduces the likelihood that + standards bodies will invent elements and attributes that clash with any + of them." However, as mentioned above, this is not universal. Over 25% + of the non-standard elements that we captured don't use any kind of + namespacing strategy to avoid polluting the global namespace. Here is + <a + href="https://rainy-periwinkle.glitch.me/permalink/53567ec94b328de965eb821010b8b5935b0e0ba316e833267dc04f1fb3b53bd5.html" + >a list of 1157 elements like that from the mobile dataset</a + >. Many of those, as you can see, are probably non-problematic as they + are obscure names, misspellings and so on -- but at least a few probably + present some challenges. You'll note, for example, that + <code><toast></code> (which Googlers + <a href="https://www.chromestatus.com/feature/5674896879255552" + >recently tried to propose as <code><std-toast></code></a + >) appears in this list. + </p> + <p> + Among the probably not challenging, but popular ones are some + interesting entries: + </p> + <p> + <a + href="https://rainy-periwinkle.glitch.me/permalink/2ba66fb067dce29ecca276201c37e01aa7fe7c191e6be9f36dd59224f9a36e16.html" + ><code><ymaps></code> (from yahoo maps) appears on ~12.5k mobile + sites</a + > + (<a + href="https://rainy-periwinkle.glitch.me/permalink/7f365899dc8a5341ed5c234162ee4eb187e99a23fc28cdea31af2322029d8b48.html" + >~8.3k desktop</a + >) + </p> + <p> + <a + href="https://rainy-periwinkle.glitch.me/permalink/5cfe2db53aadf5049e32cf7db0f7f6d8d2f1d4926d06467d9bdcd0842d943a17.html" + ><code><cufon></code> and <code><cufontext></code> from a + font replacement library from 2008, appear on ~10.5k of mobile + pages</a + > + (~<a + href="https://rainy-periwinkle.glitch.me/permalink/c9371b2f13e7e6ff74553f7918c18807cd9222024d970699e493b2935608a5f2.html" + >8.7k desktop</a + >) + </p> + <p> + There is also + <a + href="https://rainy-periwinkle.glitch.me/permalink/976b0cf78c73d125644d347be9e93e51d3a9112e31a283259c35942bda06e989.html" + >the <code><jdiv></code> element appears to be injected by Jivo + chat, a popular chat solution which appears on ~40.3k of mobile + sites</a + > + (<a + href="https://rainy-periwinkle.glitch.me/permalink/98fb3bf4f44c33edabc05439b10a374a121dbbfc5f83af65e00e859039b13acd.html" + >~37.6k of desktop pages -- that's roughly ~0.86%)</a + >! + </p> + <p> + Placing these into our same chart as above for perspective looks + something like this (again, it varies slightly based on the dataset) + </p> + <p> + (( TODO: there is a corresponding image in the google doc + <a + href="https://docs.google.com/document/d/16TY_pV-FyW35DzuvdlaOiENz6o6PWYpl_RviU-HW7Qc/edit" + >https://docs.google.com/document/d/16TY_pV-FyW35DzuvdlaOiENz6o6PWYpl_RviU-HW7Qc/edit</a + > + )) + </p> + <p> + The interesting thing about these is that they also introduce a few + other ways that our tool can come in very handy: If we're interested in + exploring the space of the data, a very specific tag name is just one + possible measure. It's definitely the strongest indicator if we can find + good slang developing. However, what if that's not all we're interested + in? + </p> + <h3 id="common-use-cases-and-solutions"> + Common use cases and solutions + </h3> + <p> + What if, for example, we were interested in people solving common use + cases? This could be because we're looking for solutions to use cases + that we currently have ourselves, or for researching more broadly what + common use cases people are solving with an eye toward incubating some + standardization effort. Let's take a common example: Tabs. Over the + years there have been a lot of requests for things like tabs. We can use + a fuzzy search here and find that there are + <a + href="https://rainy-periwinkle.glitch.me/permalink/c6d39f24d61d811b55fc032806cade9f0be437dcb2f5735a4291adb04aa7a0ea.html" + >many variants of tabs</a + >. It's a little harder to count use here since we can't as easily + distinguish if two elements appear on the same page, so the count + provided there conservatively simply takes the one with the largest + count -- in most cases the real number of domains is probably + significantly larger. + </p> + <p> + There are also + <a + href="https://rainy-periwinkle.glitch.me/permalink/e573cf279bf1d2f0f98a90f0d7e507ac8dbd3e570336b20c6befc9370146220b.html" + >lots of accordions</a + >, + <a + href="https://rainy-periwinkle.glitch.me/permalink/0bb74b808e7850a441fc9b93b61abf053efc28f05e0a1bc2382937e3b78695d9.html" + >dialogs</a + >, at least + <a + href="https://rainy-periwinkle.glitch.me/permalink/651e592cb2957c14cdb43d6610b6acf696272b2fbd0d58a74c283e5ad4c79a12.html" + >65 variants of carousels</a + >, lots of stuff about + <a + href="https://rainy-periwinkle.glitch.me/permalink/981967b19a9346ac466482c51b35c49fc1c1cc66177ede440ab3ee51a7912187.html" + >'popups'</a + >, at least + <a + href="https://rainy-periwinkle.glitch.me/permalink/2e6827af7c9d2530cb3d2f39a3f904091c523c2ead14daccd4a41428f34da5e8.html" + >27 variants of toggles and switches</a + >, and so on. + </p> + <p> + Perhaps we could research why we need + <a + href="https://rainy-periwinkle.glitch.me/permalink/5ae67c941395ca3125e42909c2c3881e27cb49cfa9aaf1cf59471e3779435339.html" + >92 variants of button related elements that aren't a native button</a + >, for example, and try to fill the native gap. + </p> + <p> + If we notice popular things pop up (like <code><jdiv></code>, + solving chat) we can take knowledge of things we know (like, that that + is what <code><jdiv></code> is about, or + <code><olark></code>) and try to look + <a + href="https://rainy-periwinkle.glitch.me/permalink/db8fc0e58d2d46d2e2a251ed13e3daab39eba864e46d14d69cc114ab5d684b00.html" + >at at least 43 things we've built for tackling that</a + > + and follow connections to survey the space. + </p> + <h3 id="in-summary">In Summary</h3> + <p>So, there's lots of data here, but to summarize:</p> + <ul> + <li> + Pages have more elements than they did 14 years ago -- both on average + and max. + </li> + <li> + The lifetime of things on home pages is <em>very</em> long. + Deprecating or discontinuing things doesn't make them go away, and it + might never. + </li> + <li> + There is a lot of broken markup out there in the wild (misspelled + tags, missing spaces, bad escaping, misunderstandings) + </li> + <li> + Measuring what 'useful' means is tricky -- lots of native elements + don't pass the 5% bar, or even the 1% bar, but lots of custom ones do + -- and for lots of reasons. Passing 1% should definitely grab our + attention at least, but perhaps so should 0.5% because that is, + according to the data, comparatively <em>very</em> successful. + </li> + <li> + There is already a ton of custom markup out there. It comes in a lot + of forms, but elements containing a dash definitely seem to have taken + off. + </li> + <li> + We need to increasingly study this data and come up with good + observations to help find and pave the cowpaths. + </li> + </ul> + <p> + That last one is where you come in: We'd love to tap into the creativity + and curiosity of the larger community to help explore this data using + some of the tools (like + <a href="https://rainy-periwinkle.glitch.me/" + >https://rainy-periwinkle.glitch.me/</a + >) -- please share your interesting observations and help build our + commons of knowledge and understanding. + </p> + </section> + <section class="authors"> + <h4>Authors :</h4> + <ul> + <li> + <img + class="avatar" + alt="Author name" + src="https://www.gravatar.com/avatar/cf58fcc6995e15f35e42532c3775fed6.jpg?d=mp&s=200" + /> + <div class="info"> + <span class="name">Full Name</span> + <span class="social"> + <a class="twitter" href="https://twitter.com/rick_viscomi"> + <img src="/static/images/twitter.png" alt="Twitter account" /> + </a> + + <a class="github" href="https://github.com/rviscomi"> + <img src="/static/images/github.png" alt="github account" /> + </a> + </span> + + <div class="tagline"> + Tagline of contributor here + </div> + </div> + </li> + </ul> + </section> + + <nav id="chapter-navigation"> + <a id="previous-chapter" href="#replace-with-url-to-chapter"> + <span class="arrow">⌃</span> + <span class="chapter-no"> + Chapter X + </span> + <span class="chapter-title"> + Chapter title + </span> + </a> + + <a id="next-chapter" href="#replace-with-url-to-chapter"> + <span class="arrow">⌃</span> + <span class="chapter-no"> + Chapter X + </span> + <span class="chapter-title"> + Chapter title + </span> + </a> + </nav> + </section> +</article> - <p> - Most of these can seem like very small numbers, but perspective matters. - </p> - <h2 id="perspective-on-value-and-usage">Perspective on Value and Usage</h2> - <p> - In order to discuss numbers about the use of elements (standard, deprecated - or custom), we first need to establish some perspective. - </p> - <p> - The top 150 element names, counting the number of pages where they appear, - are shown in this chart: - </p> - <iframe - width="877" - height="588" - seamless - frameborder="0" - scrolling="no" - src="https://docs.google.com/spreadsheets/d/e/2PACX-1vTbHgqcSepZye6DrCTpifFAUYxKT1hEO56585awyMips8oiPMLYu20GETuIE8mALkm814ObJyktEe2P/pubchart?oid=1694360298&format=interactive" - ></iframe> - <p>Note how quickly use drops off.</p> - <p> - 11 elements occur in over 90% <code><html></code>, - <code><head></code>, <code><body></code>, - <code><title></code>, <code><meta></code>, - <code><a></code>,<code><div></code>, <code><link></code>, - <code><script></code>, <code><img></code> and - <code><span></code>. - </p> - <p> - Only 15 more elements occur in at least 50% of the home pages - (<code><ul></code>, <code><li></code>, <code><p></code>, - <code><style></code>, <code><input></code>, - <code><br></code>, <code><form></code>, <code><h2></code>, - <code><h1></code>, <code><iframe></code>, - <code><h3></code>, <code><button></code>, - <code><footer></code>, <code><header></code>, - <code><nav></code> are the others). - </p> - <p>And only 40 more elements occur on more than 5% of pages.</p> - <p> - Even <code><video></code>, for example, doesn't make that cut. It - appears on only 4.21% of pages in the dataset (on desktop, only 3.03% on - mobile). While these numbers sound very low, 4.21% is actually - <em>quite</em> popular by comparison. In fact, only 98 elements occur on - more than 1% of pages. - </p> - <p> - It's interesting, then, to look at what the distribution of these elements - looks like and which ones have more than 1% use. Below is a chart that shows - the rank of each element and which category they fall into. I've separated - the data points into discrete sets simply so that they can be viewed - (otherwise there just aren't enough pixels to capture all that data), but - they represent a single 'line' of popularity - the left-most being the most - common, the right-most being the least common. The arrow points to the end - of elements that appear in more than 1% of the pages. - </p> - <p> - (( TODO: there is a corresponding image in the google doc - <a - href="https://docs.google.com/document/d/16TY_pV-FyW35DzuvdlaOiENz6o6PWYpl_RviU-HW7Qc/edit" - >https://docs.google.com/document/d/16TY_pV-FyW35DzuvdlaOiENz6o6PWYpl_RviU-HW7Qc/edit</a - > - )) - </p> - <p> - You can observe two things here: First, that the set of elements that have - more than 1% use are not exclusively HTML. In fact, - <em>27 of the most popular 100 elements aren't even HTML</em> - they are - SVG! And there are - <em>non-standard tags at or very near that cutoff too</em>! Second, note - that a whole lot of HTML elements are used by less than 1% of pages. - </p> - <p> - So, are all of those elements used by less than 1% of pages "useless?". - Definitely not. This is why establishing perspective matters. There are - around - <a href="https://www.websitehostingrating.com/internet-statistics-facts/" - >2 billion web sites on the web</a - >. If something appears on 0.1% of all websites in our dataset, we can - extrapolate that this represents perhaps <em>2 million web sites</em> in the - whole web. Even 0.01% extrapolates to a - <em>two hundred of thousand of sites</em>. This is also why removing support - for elements, even very old ones which we think aren't great ideas, is a - very rare occurrence: Breaking hundreds of thousands or millions of sites - just isn't a thing that browser vendors can do lightly. - </p> - <p> - So, lots of elements, even the native ones, have less than 1% use and are - still very important and successful. <code><code></code>, for example, - is an element that I both use and encounter a lot. It's definitely useful - and important - and yet it is used on only 0.57% of these pages. Part of - this is skewed based on what we are measuring - home pages are generally - <em>less likely</em> to include certain kinds of things (like - <code><code></code> for example): They serve a less general purpose - than, for example, headings, paragraphs, links and lists, however, the data - is generally useful. - </p> - <p> - We also collected information about which pages contained an author defined - (not native) <code>.shadowRoot</code> -- About 0.22% of the pages on - desktop, and 0.15% on mobile. This might not sound like a lot, but it is - roughly 6.5k sites in the mobile dataset and 10k sites on the desktop and is - more than several HTML elements. <code><summary></code> for example, - has about equivalent use on the desktop and it is the 146th most popular - element. - </p> - <p> - <code><datalist></code> appeared in 0.04% of homepages, it is the - 201st most popular element. - </p> - <p> - In fact, over 15% of elements we're counting as defined by HTML are outside - the top 200 in the desktop dataset . <code><meter></code> is the least - popular "HTML5 era" element (2004-2011, before HTML moved to a Living - Standard model): It is around the 1000th most popular element. - <code><slot></code>, the most recently introduced element (April - 2016), is only around the 1400th most popular element. - </p> - <h2 id="lots-of-data-real-dom-on-the-real-web"> - Lots of data: Real DOM on the Real Web - </h2> - <p> - With this perspective in mind about what use of native/standard features - looks like in the dataset, let's talk about the non-standard stuff. - </p> - <p> - You might expect that lots of elements we recorded are used only on a single - domain, but in fact, no element we're talking about in this list of 5048 - elements is used on only a single domain. The least number of domains an - element in our dataset appears in is 15. About a fifth of them occur on more - than 100 domains. About 7% occur on more than 1000 domains. - </p> - <p> - To help analyze the data, I hacked together a - <a href="https://rainy-periwinkle.glitch.me">little tool with Glitch</a> - - where possible I link my observations to a page containing the data. You can - use this tool yourself, and please share a permalink back with the - <a href="https://twitter.com/HTTPArchive">@HTTPAchive</a> along with your - observations (Tommy Hodgins has also built a similar - <a href="https://github.com/tomhodgins/hade">CLI tool</a> which you can use - to explore). - </p> - <p>Let's look at some data…</p> - <h3 id="products-and-libraries-and-their-custom-markup"> - Products (and libraries) and their custom markup - </h3> - <p> - As in Hixie's original research, it seems that several of the extremely - popular ones have more to do with being a part of popular - <em>products than themselves being universally adopted</em>. Many of the - ones - <a - href="https://web.archive.org/web/20060203031245/http://code.google.com/webstats/2005-12/editors.html" - >Ian Hickson mentioned 14 years ago</a - > - seem to have dwindled, but not disappeared, but some are still pretty huge. - </p> - <p> - Those he mentioned as being pervasive and created by - <a href="https://en.wikipedia.org/wiki/Claris_Home_Page" - >Claris Home Page</a - > - (whose last stable release was 21 years ago) still appeared on over 100 - domains. - <a - href="https://rainy-periwinkle.glitch.me/permalink/28b0b7abb3980af793a2f63b484e7815365b91c04ae625dd4170389cc1ab0a52.html" - ><code><x-claris-window></code>, for example still appears on 130 - mobile domains</a - > - (desktop is similar). Some of the <code><actinic:*></code> elements he - mentioned appear on even more: - <a - href="https://rainy-periwinkle.glitch.me/permalink/30dfca0fde9fad9b2ec58b12cb2b0271a272fb5c8970cd40e316adc728a09d19.html" - ><code>actinic:basehref</code>, still shows up on 154 pages in the desktop - data</a - >. (These come from British e-commerce provider - <a href="https://www.oxatis.co.uk">Oxatis</a>). - </p> - <p> - Macromedia's elements seem to have largely disappeared, - <a - href="https://rainy-periwinkle.glitch.me/permalink/17d49e765c4f1bfef2a3bd183ee0961fe40f0623d2b9ddf885ee35e1f251d14c.html" - >only one appears at all on our list, and on only 22 domains</a - >, however Adobe's Go-Live tags like - <a - href="https://rainy-periwinkle.glitch.me/permalink/579abc77652df3ac2db1338d17aab0a8dc737b9d945510b562085d8522b18799.html" - ><code><csscriptdict></code></a - > - <a - href="https://rainy-periwinkle.glitch.me/permalink/579abc77652df3ac2db1338d17aab0a8dc737b9d945510b562085d8522b18799.html" - >still appear on 640 domains in the desktop dataset</a - >. - </p> - <p> - <a - href="https://rainy-periwinkle.glitch.me/permalink/bc8f154a95dfe06a6d0fdb099b6c8df61727b2289141a0ef16dc17b2b57d3068.html" - ><code><o:p></code> (created by Microsoft Office) still appears in - ~0.46% of desktop pages</a - > - (that's over 20k domains) and - <a - href="https://rainy-periwinkle.glitch.me/permalink/66f75e1fd2b8e62a1e77033601d9f65516df3ff8cb1896ce37fbdb932853d5c5.html" - >0.32% of mobile page</a - > - (more than a lot of standard HTML elements). - </p> - <p> - But there are plenty of newcomers that weren't in Hixie's original report - too, and with even bigger numbers… - </p> - <p> - <a - href="https://rainy-periwinkle.glitch.me/permalink/e8bf0130c4f29b28a97b3c525c09a9a423c31c0c813ae0bd1f227bd74ddec03d.html" - ><code><ym-measure></code> is used on more than 1% of pages (both - desktop and mobile)</a - > - -- that's <em>huge</em> -- putting it in the top 100. It's a tag injected by - Yandex's <a href="https://metrica.yandex.com/about">Metrica</a> analytics - <a href="https://www.npmjs.com/package/yandex-metrica-watch">package</a>. - </p> - <p> - <a - href="https://rainy-periwinkle.glitch.me/permalink/a532f18bbfd1b565b460776a64fa9a2cdd1aa4cd2ae0d37eb2facc02bfacb40c.html" - ><code><g:plusone></code> from Google's now defunct Google Plus - occurs on over 21k domains (both desktop and mobile)</a - >. - </p> - <p> - <a - href="https://rainy-periwinkle.glitch.me/permalink/2e2f63858f7715ef84d28625344066480365adba8da8e6ca1a00dfdde105669a.html" - ><code><fb:like></code> occurs on ~13.8k</a - > - (mobile, - <a - href="https://rainy-periwinkle.glitch.me/permalink/a9aceaee7fbe82b3156caf79f48d7ef6b42729bce637f6683dc6c287df52cd5b.html" - >12.8k on desktop</a - >) and - <a - href="https://rainy-periwinkle.glitch.me/permalink/5a964079ac2a3ec1b4f552503addd406d02ec4ddb4955e61f54971c27b461984.html" - ><code><fb:like-box></code> occurs on 7.8k</a - > - (mobile, - <a - href="https://rainy-periwinkle.glitch.me/permalink/cc56280bb2d659b4426050b0c135b5c15b8ea4f8090756b567c564dac1f0659b.html" - >7k on desktop</a - >) - </p> - <p> - And - <a - href="https://rainy-periwinkle.glitch.me/permalink/6997d689f56fe77e5ce345cfb570adbd42d802393f4cc175a1b974833a0e3cb5.html" - ><code><app-root></code> (generally a framework like Angular) - appears on 8.2k mobile sites</a - > - (<a - href="https://rainy-periwinkle.glitch.me/permalink/ee3c9dfbcab568e97c7318d9795b9ecbde0605f247b19b68793afc837796aa5c.html" - >8.5k on desktop</a - >). - </p> - <p> - Comparing these to a few of the native HTML elements that are below the 5% - bar, for perspective, looks something like this (note -- varies slightly - based on dataset). - </p> - <p> - (( TOOD: there is a corresponding image in the google doc - <a - href="https://docs.google.com/document/d/16TY_pV-FyW35DzuvdlaOiENz6o6PWYpl_RviU-HW7Qc/edit" - >https://docs.google.com/document/d/16TY_pV-FyW35DzuvdlaOiENz6o6PWYpl_RviU-HW7Qc/edit</a - > - )) - </p> - <p>You could draw interesting observations like these all day long.</p> - <p> - Here's one that's a little different: Productization causing popularity is - evident in outright errors as well. - <a - href="https://rainy-periwinkle.glitch.me/permalink/3214f840b6ae3ef1074291f60fa1be4b9d9df401fe0190bfaff4bb078c8614a5.html" - ><code><pclass="ddc-font-size-large"></code> was a parsed tag name - which occurred in our dataset in over 1000 sites</a - >. This was thanks to a missing space in a popular 'as a service' kind of - product. Happily, we reported this error during our research and it was - quickly fixed. - </p> - <p> - In his original paper, Hixie mentions that "The good thing, if we can be - forgiven for trying to remain optimistic in the face of all this - non-standard markup, is that at least these elements are all clearly using - vendor-specific names. This massively reduces the likelihood that standards - bodies will invent elements and attributes that clash with any of them." - However, as mentioned above, this is not universal. Over 25% of the - non-standard elements that we captured don't use any kind of namespacing - strategy to avoid polluting the global namespace. Here is - <a - href="https://rainy-periwinkle.glitch.me/permalink/53567ec94b328de965eb821010b8b5935b0e0ba316e833267dc04f1fb3b53bd5.html" - >a list of 1157 elements like that from the mobile dataset</a - >. Many of those, as you can see, are probably non-problematic as they are - obscure names, misspellings and so on -- but at least a few probably present - some challenges. You'll note, for example, that - <code><toast></code> (which Googlers - <a href="https://www.chromestatus.com/feature/5674896879255552" - >recently tried to propose as <code><std-toast></code></a - >) appears in this list. - </p> - <p> - Among the probably not challenging, but popular ones are some interesting - entries: - </p> - <p> - <a - href="https://rainy-periwinkle.glitch.me/permalink/2ba66fb067dce29ecca276201c37e01aa7fe7c191e6be9f36dd59224f9a36e16.html" - ><code><ymaps></code> (from yahoo maps) appears on ~12.5k mobile - sites</a - > - (<a - href="https://rainy-periwinkle.glitch.me/permalink/7f365899dc8a5341ed5c234162ee4eb187e99a23fc28cdea31af2322029d8b48.html" - >~8.3k desktop</a - >) - </p> - <p> - <a - href="https://rainy-periwinkle.glitch.me/permalink/5cfe2db53aadf5049e32cf7db0f7f6d8d2f1d4926d06467d9bdcd0842d943a17.html" - ><code><cufon></code> and <code><cufontext></code> from a font - replacement library from 2008, appear on ~10.5k of mobile pages</a - > - (~<a - href="https://rainy-periwinkle.glitch.me/permalink/c9371b2f13e7e6ff74553f7918c18807cd9222024d970699e493b2935608a5f2.html" - >8.7k desktop</a - >) - </p> - <p> - There is also - <a - href="https://rainy-periwinkle.glitch.me/permalink/976b0cf78c73d125644d347be9e93e51d3a9112e31a283259c35942bda06e989.html" - >the <code><jdiv></code> element appears to be injected by Jivo - chat, a popular chat solution which appears on ~40.3k of mobile sites</a - > - (<a - href="https://rainy-periwinkle.glitch.me/permalink/98fb3bf4f44c33edabc05439b10a374a121dbbfc5f83af65e00e859039b13acd.html" - >~37.6k of desktop pages -- that's roughly ~0.86%)</a - >! - </p> - <p> - Placing these into our same chart as above for perspective looks something - like this (again, it varies slightly based on the dataset) - </p> - <p> - (( TODO: there is a corresponding image in the google doc - <a - href="https://docs.google.com/document/d/16TY_pV-FyW35DzuvdlaOiENz6o6PWYpl_RviU-HW7Qc/edit" - >https://docs.google.com/document/d/16TY_pV-FyW35DzuvdlaOiENz6o6PWYpl_RviU-HW7Qc/edit</a - > - )) - </p> - <p> - The interesting thing about these is that they also introduce a few other - ways that our tool can come in very handy: If we're interested in exploring - the space of the data, a very specific tag name is just one possible - measure. It's definitely the strongest indicator if we can find good slang - developing. However, what if that's not all we're interested in? - </p> - <h3 id="common-use-cases-and-solutions">Common use cases and solutions</h3> - <p> - What if, for example, we were interested in people solving common use cases? - This could be because we're looking for solutions to use cases that we - currently have ourselves, or for researching more broadly what common use - cases people are solving with an eye toward incubating some standardization - effort. Let's take a common example: Tabs. Over the years there have been a - lot of requests for things like tabs. We can use a fuzzy search here and - find that there are - <a - href="https://rainy-periwinkle.glitch.me/permalink/c6d39f24d61d811b55fc032806cade9f0be437dcb2f5735a4291adb04aa7a0ea.html" - >many variants of tabs</a - >. It's a little harder to count use here since we can't as easily - distinguish if two elements appear on the same page, so the count provided - there conservatively simply takes the one with the largest count -- in most - cases the real number of domains is probably significantly larger. - </p> - <p> - There are also - <a - href="https://rainy-periwinkle.glitch.me/permalink/e573cf279bf1d2f0f98a90f0d7e507ac8dbd3e570336b20c6befc9370146220b.html" - >lots of accordions</a - >, - <a - href="https://rainy-periwinkle.glitch.me/permalink/0bb74b808e7850a441fc9b93b61abf053efc28f05e0a1bc2382937e3b78695d9.html" - >dialogs</a - >, at least - <a - href="https://rainy-periwinkle.glitch.me/permalink/651e592cb2957c14cdb43d6610b6acf696272b2fbd0d58a74c283e5ad4c79a12.html" - >65 variants of carousels</a - >, lots of stuff about - <a - href="https://rainy-periwinkle.glitch.me/permalink/981967b19a9346ac466482c51b35c49fc1c1cc66177ede440ab3ee51a7912187.html" - >'popups'</a - >, at least - <a - href="https://rainy-periwinkle.glitch.me/permalink/2e6827af7c9d2530cb3d2f39a3f904091c523c2ead14daccd4a41428f34da5e8.html" - >27 variants of toggles and switches</a - >, and so on. - </p> - <p> - Perhaps we could research why we need - <a - href="https://rainy-periwinkle.glitch.me/permalink/5ae67c941395ca3125e42909c2c3881e27cb49cfa9aaf1cf59471e3779435339.html" - >92 variants of button related elements that aren't a native button</a - >, for example, and try to fill the native gap. - </p> - <p> - If we notice popular things pop up (like <code><jdiv></code>, solving - chat) we can take knowledge of things we know (like, that that is what - <code><jdiv></code> is about, or <code><olark></code>) and try - to look - <a - href="https://rainy-periwinkle.glitch.me/permalink/db8fc0e58d2d46d2e2a251ed13e3daab39eba864e46d14d69cc114ab5d684b00.html" - >at at least 43 things we've built for tackling that</a - > - and follow connections to survey the space. - </p> - <h3 id="in-summary">In Summary</h3> - <p>So, there's lots of data here, but to summarize:</p> - <ul> - <li> - Pages have more elements than they did 14 years ago -- both on average and - max. - </li> - <li> - The lifetime of things on home pages is <em>very</em> long. Deprecating or - discontinuing things doesn't make them go away, and it might never. - </li> - <li> - There is a lot of broken markup out there in the wild (misspelled tags, - missing spaces, bad escaping, misunderstandings) - </li> - <li> - Measuring what 'useful' means is tricky -- lots of native elements don't - pass the 5% bar, or even the 1% bar, but lots of custom ones do -- and for - lots of reasons. Passing 1% should definitely grab our attention at least, - but perhaps so should 0.5% because that is, according to the data, - comparatively <em>very</em> successful. - </li> - <li> - There is already a ton of custom markup out there. It comes in a lot of - forms, but elements containing a dash definitely seem to have taken off. - </li> - <li> - We need to increasingly study this data and come up with good observations - to help find and pave the cowpaths. - </li> - </ul> - <p> - That last one is where you come in: We'd love to tap into the creativity and - curiosity of the larger community to help explore this data using some of - the tools (like - <a href="https://rainy-periwinkle.glitch.me/" - >https://rainy-periwinkle.glitch.me/</a - >) -- please share your interesting observations and help build our commons - of knowledge and understanding. - </p> -</section> {% endblock %} diff --git a/src/templates/en/2019/chapters/performance.html b/src/templates/en/2019/chapters/performance.html index 8515049fc80..fca75b38484 100644 --- a/src/templates/en/2019/chapters/performance.html +++ b/src/templates/en/2019/chapters/performance.html @@ -1,3 +1,5 @@ +{% extends "en/2019/base_chapter.html" %} + <!--{# IMPORTANT! - `chapter.html` is a "template for templates" used by the `generate_chapters.js` script, hence the strange template syntax (eg, mixing ejs and jinja syntax) @@ -8,627 +10,697 @@ - make changes to the markdown content directly (`src/content/<lang>/<year>/<chapter>.md`) because any changes to the chapter templates will be overwritten by the generation script #}--> -{% extends "en/2019/base_chapter.html" %} {% block styles %} {{ super() }} -<link rel="stylesheet" href="/static/css/chapter.css" /> -{% endblock %} {% set metadata = +{% set metadata = {"part_number":"II","chapter_number":7,"title":"Performance","authors":["rviscomi"],"reviewers":["JMPerez"," obto"," sergeychernyshev"," zeman"]} %} {% block main %} -<aside> - <ul> - <li> - <a href="#intro">Intro</a> - </li> - - <li> - <a href="#the-state-of-performance">The state of performance</a> - +<article id="chapter" class="main"> + <nav class="index"> + <div class="index-box floating-card"> + <h2 class="header">Index</h2> + <h2 class="header-mobile">Index</h2> <ul> <li> - <a href="#fcp">FCP</a> + <a href="#intro">Intro</a> + </li> + + <li> + <a href="#the-state-of-performance">The state of performance</a> <ul> <li> - <a href="#fcp-by-device">FCP by device</a> + <a href="#fcp">FCP</a> + + <ul> + <li> + <a href="#fcp-by-device">FCP by device</a> + </li> + + <li> + <a href="#fcp-by-ect">FCP by ECT</a> + </li> + + <li> + <a href="#fcp-by-geo">FCP by geo</a> + </li> + </ul> </li> <li> - <a href="#fcp-by-ect">FCP by ECT</a> + <a href="#ttfb">TTFB</a> + + <ul> + <li> + <a href="#ttfb-by-geo">TTFB by geo</a> + </li> + </ul> </li> <li> - <a href="#fcp-by-geo">FCP by geo</a> + <a href="#fid">FID</a> + + <ul> + <li> + <a href="#fid-by-device">FID by device</a> + </li> + + <li> + <a href="#fid-by-ect">FID by ECT</a> + </li> + + <li> + <a href="#fid-by-geo">FID by geo</a> + </li> + </ul> </li> </ul> </li> <li> - <a href="#ttfb">TTFB</a> + <a href="#conclusion">Conclusion</a> + </li> + </ul> + </div> + </nav> + <section class="content"> + <section class="body"> + <h1> + <div class="decorative-line"></div> + {{ metadata.get('title') }} + <div class="decorative-line-mobile"></div> + </h1> + <img + src="/static/images/2019/{{ get_chapter_image_dir(metadata) }}/hero_lg.jpg" + class="content-banner" + /> + <h2 id="intro">Intro</h2> + <p> + Performance is a visceral part of the user experience. For + <a href="https://wpostats.com/">many websites</a>, an improvement to the + user experience by speeding up the page load time aligns with an + improvement to conversion rates. Conversely, when performance is poor, + users don't convert as often and have even been observed + <a + href="https://blog.fullstory.com/rage-clicks-turn-analytics-into-actionable-insights/" + >rage clicking</a + > + on the page in frustration. + </p> + <p> + There are many ways to quantify web performance. The most important + thing is to measure what actually matters to users. Events like + <code>onload</code> or <code>DOM content loaded</code> may not + necessarily reflect what users experience visually. For example, an + email client might have a very fast onload event but the only thing + loaded is the interstitial progress bar, meanwhile the inbox contents + are loading asynchronously. The loading metric that matters for this web + app is the "time to inbox", and focusing on the + <code>onload</code> event may be misleading. For that reason this + chapter will look at more modern and universally applicable paint, load, + and interactivity metrics to try to capture how users are actually + experiencing the page. + </p> + <p> + There are two kinds of performance data: lab and field. You may have + heard these referred to as synthetic and real-user measurement (or RUM). + Measuring performance in the lab ensures that each website is tested + under common conditions like browser, connection speed, physical + location, cache state, etc. This guarantee of consistency makes each + website comparable with one another. On the other hand, measuring + performance in the field represents how users actually experience the + web in all of the infinite combinations of conditions that we could + never capture in the lab. For the purposes of this chapter and + understanding real-world user experiences, we'll look at field data. + </p> + <h2 id="the-state-of-performance">The state of performance</h2> + <p> + Almost all of the other chapters in the Web Almanac are based on data + from the <a href="https://httparchive.org/">HTTP Archive</a>. In order + to capture how real users experience the web, we need a different + dataset. In this section we're using the + <a href="http://bit.ly/chrome-ux-report">Chrome UX Report</a> (CrUX), a + public dataset from Google that consists of all the same websites as the + HTTP Archive and aggregates how Chrome users actually experience them. + Experiences are categorized by: + </p> + <ul> + <li> + the form factor of the users' devices <ul> - <li> - <a href="#ttfb-by-geo">TTFB by geo</a> - </li> + <li>desktop</li> + <li>phone</li> + <li>tablet</li> </ul> </li> - <li> - <a href="#fid">FID</a> - + users' effective connection type (ECT) in mobile terms <ul> - <li> - <a href="#fid-by-device">FID by device</a> - </li> - - <li> - <a href="#fid-by-ect">FID by ECT</a> - </li> - - <li> - <a href="#fid-by-geo">FID by geo</a> - </li> + <li>offline</li> + <li>slow 2G</li> + <li>2G</li> + <li>3G</li> + <li>4G</li> </ul> </li> + <li>users' geographic location</li> </ul> - </li> + <p> + Experiences are measured monthly including paint, load, and + interactivity metrics. The first metric we'll look at is + <a + href="https://developers.google.com/web/fundamentals/performance/user-centric-performance-metrics#first_paint_and_first_contentful_paint" + >First Contentful Paint</a + > + (FCP). This is the time users spend waiting for the page to display + something useful to the screen, like an image or text. Next, we'll look + at look at a loading metric, + <a + href="https://csswizardry.com/2019/08/time-to-first-byte-what-it-is-and-why-it-matters/" + >Time to First Byte</a + > + (TTFB). This is a measure of how long the web page took from the time of + the user's navigation until they received the first byte of the + response. And finally, the last field metric we'll look at is + <a + href="https://developers.google.com/web/updates/2018/05/first-input-delay" + >First Input Delay</a + > + (FID). This is a relatively new metric and one that represents parts of + the UX other than loading performance. It measures the time from a + user's first interaction with a page's UI until the time the browser's + main thread is ready to process the event. + </p> + <p>So let's dive in and see what kind of insights we can find.</p> + <h3 id="fcp">FCP</h3> + <figure> + // Chart: flame distribution of 07_01 + <figcaption> + Figure 1. Distribution of websites' fast, average, and slow FCP + performance. + </figcaption> + </figure> + <p> + In Figure 1 above you can see how FCP experiences are distributed across + the web. Out of the millions of websites in the CrUX dataset, this chart + compresses the distribution down to 1,000 websites where each vertical + slice represents a single website. The chart is sorted by the percent of + fast FCP experiences, which are those occurring in less than 1 second. + Slow experiences occur in 2.5 seconds or more, and average experiences + are everything in between. At the extremes of the chart, there are some + websites with almost 100% fast experiences and some websites with almost + 100% slow experiences. In between, websites have a combination of fast, + average, and slow performance that seems to lean more towards fast or + average than slow, which is good. + </p> + <aside> + Note that when a user experiences slow performance, it's hard to say + what the reason might be. It could be that the website itself was built + poorly and inefficiently. Or there could be other environmental factors + like the user's slow connection, empty cache, etc. So when looking at + this field data we prefer to say that the user experiences themselves + are slow and not necessarily the websites. + </aside> + <p> + In order to categorize whether a website is sufficiently + <strong>fast</strong> we will use the + <a + href="https://developers.google.com/speed/docs/insights/v5/about#categories" + >PageSpeed Insights</a + > + (PSI) methodology where at least 90% of the website's FCP experiences + must be faster than 1 second. Similarly a sufficiently + <strong>slow</strong> website has 10% or more FCP experiences slower + than 2.5 seconds. We say a website has + <strong>average</strong> performance when it doesn't meet either of + these conditions. + </p> + <figure> + // Chart: Bar distribution of 07_03 + <figcaption> + Figure 2. Distribution of websites labelled as having fast, average, + or slow FCP. + </figcaption> + </figure> + <figure> + Fast FCP | Average FCP | Slow FCP -- | -- | -- 2.17% | 37.55% | 60.28% - <li> - <a href="#conclusion">Conclusion</a> - </li> - </ul> -</aside> + <figcaption> + Figure 3. Table of the percent of websites labelled as having fast, + average, or slow FCP. + </figcaption> + </figure> + <p> + In Figures 2 and 3, the results show that only 2.17% of websites are + considered fast while 60.28% of websites are considered slow. To help us + understand how users experience FCP across different devices, let's + segment by form factor. + </p> + <h4 id="fcp-by-device">FCP by device</h4> + <figure> + // Chart: Flame distribution of 07_01b + <figcaption> + Figure 4. Distribution of _desktop_ websites' fast, average, and slow + FCP performance. + </figcaption> + </figure> + <figure> + // Chart: Flame distribution of 07_01c + <figcaption> + Figure 5. Distribution of _phone_ websites' fast, average, and slow + FCP performance. + </figcaption> + </figure> + <p> + In Figures 4 and 5 above, the FCP distributions of 1,000 sample websites + are broken down by desktop and phone. It's subtle, but the torso of the + desktop fast FCP distribution appears to be more convex than the + distribution for phone users. This visual approximation suggests that + desktop users experience a higher overall proportion of fast FCP. To + verify this we can apply the PSI methodology to each distribution. + </p> + <figure> + // Chart: Bar distributions of 07_03b + <figcaption> + Figure 6. Distribution of websites labelled as having fast, average, + or slow FCP, broken down by device type. + </figcaption> + </figure> + <figure> + Device | Fast FCP | Average FCP | Slow FCP -- | -- | -- | -- desktop | + 2.80% | 39.40% | 57.80% phone | 1.76% | 35.62% | 62.62% -<section class="main"> - <h1 class="chapter-title">{{ metadata.get('title') }}</h1> - <img - src="/static/images/2019/{{ get_chapter_image_dir(metadata) }}/hero_lg.jpg" - class="chapter-hero" - /> - <h2 id="intro">Intro</h2> - <p> - Performance is a visceral part of the user experience. For - <a href="https://wpostats.com/">many websites</a>, an improvement to the - user experience by speeding up the page load time aligns with an improvement - to conversion rates. Conversely, when performance is poor, users don't - convert as often and have even been observed - <a - href="https://blog.fullstory.com/rage-clicks-turn-analytics-into-actionable-insights/" - >rage clicking</a - > - on the page in frustration. - </p> - <p> - There are many ways to quantify web performance. The most important thing is - to measure what actually matters to users. Events like - <code>onload</code> or <code>DOM content loaded</code> may not necessarily - reflect what users experience visually. For example, an email client might - have a very fast onload event but the only thing loaded is the interstitial - progress bar, meanwhile the inbox contents are loading asynchronously. The - loading metric that matters for this web app is the "time to inbox", and - focusing on the <code>onload</code> event may be misleading. For that reason - this chapter will look at more modern and universally applicable paint, - load, and interactivity metrics to try to capture how users are actually - experiencing the page. - </p> - <p> - There are two kinds of performance data: lab and field. You may have heard - these referred to as synthetic and real-user measurement (or RUM). Measuring - performance in the lab ensures that each website is tested under common - conditions like browser, connection speed, physical location, cache state, - etc. This guarantee of consistency makes each website comparable with one - another. On the other hand, measuring performance in the field represents - how users actually experience the web in all of the infinite combinations of - conditions that we could never capture in the lab. For the purposes of this - chapter and understanding real-world user experiences, we'll look at field - data. - </p> - <h2 id="the-state-of-performance">The state of performance</h2> - <p> - Almost all of the other chapters in the Web Almanac are based on data from - the <a href="https://httparchive.org/">HTTP Archive</a>. In order to capture - how real users experience the web, we need a different dataset. In this - section we're using the - <a href="http://bit.ly/chrome-ux-report">Chrome UX Report</a> (CrUX), a - public dataset from Google that consists of all the same websites as the - HTTP Archive and aggregates how Chrome users actually experience them. - Experiences are categorized by: - </p> - <ul> - <li> - the form factor of the users' devices - <ul> - <li>desktop</li> - <li>phone</li> - <li>tablet</li> - </ul> - </li> - <li> - users' effective connection type (ECT) in mobile terms - <ul> - <li>offline</li> - <li>slow 2G</li> - <li>2G</li> - <li>3G</li> - <li>4G</li> - </ul> - </li> - <li>users' geographic location</li> - </ul> - <p> - Experiences are measured monthly including paint, load, and interactivity - metrics. The first metric we'll look at is - <a - href="https://developers.google.com/web/fundamentals/performance/user-centric-performance-metrics#first_paint_and_first_contentful_paint" - >First Contentful Paint</a - > - (FCP). This is the time users spend waiting for the page to display - something useful to the screen, like an image or text. Next, we'll look at - look at a loading metric, - <a - href="https://csswizardry.com/2019/08/time-to-first-byte-what-it-is-and-why-it-matters/" - >Time to First Byte</a - > - (TTFB). This is a measure of how long the web page took from the time of the - user's navigation until they received the first byte of the response. And - finally, the last field metric we'll look at is - <a - href="https://developers.google.com/web/updates/2018/05/first-input-delay" - >First Input Delay</a - > - (FID). This is a relatively new metric and one that represents parts of the - UX other than loading performance. It measures the time from a user's first - interaction with a page's UI until the time the browser's main thread is - ready to process the event. - </p> - <p>So let's dive in and see what kind of insights we can find.</p> - <h3 id="fcp">FCP</h3> - <figure> - // Chart: flame distribution of 07_01 - <figcaption> - Figure 1. Distribution of websites' fast, average, and slow FCP - performance. - </figcaption> - </figure> - <p> - In Figure 1 above you can see how FCP experiences are distributed across the - web. Out of the millions of websites in the CrUX dataset, this chart - compresses the distribution down to 1,000 websites where each vertical slice - represents a single website. The chart is sorted by the percent of fast FCP - experiences, which are those occurring in less than 1 second. Slow - experiences occur in 2.5 seconds or more, and average experiences are - everything in between. At the extremes of the chart, there are some websites - with almost 100% fast experiences and some websites with almost 100% slow - experiences. In between, websites have a combination of fast, average, and - slow performance that seems to lean more towards fast or average than slow, - which is good. - </p> - <aside> - Note that when a user experiences slow performance, it's hard to say what - the reason might be. It could be that the website itself was built poorly - and inefficiently. Or there could be other environmental factors like the - user's slow connection, empty cache, etc. So when looking at this field data - we prefer to say that the user experiences themselves are slow and not - necessarily the websites. - </aside> - <p> - In order to categorize whether a website is sufficiently - <strong>fast</strong> we will use the - <a - href="https://developers.google.com/speed/docs/insights/v5/about#categories" - >PageSpeed Insights</a - > - (PSI) methodology where at least 90% of the website's FCP experiences must - be faster than 1 second. Similarly a sufficiently - <strong>slow</strong> website has 10% or more FCP experiences slower than - 2.5 seconds. We say a website has <strong>average</strong> performance when - it doesn't meet either of these conditions. - </p> - <figure> - // Chart: Bar distribution of 07_03 - <figcaption> - Figure 2. Distribution of websites labelled as having fast, average, or - slow FCP. - </figcaption> - </figure> - <figure> - Fast FCP | Average FCP | Slow FCP -- | -- | -- 2.17% | 37.55% | 60.28% + <figcaption> + Figure 7. Table of websites labelled as having fast, average, or slow + FCP, broken down by device type. + </figcaption> + </figure> + <p> + According to PSI's classification, 2.80% of websites have fast FCP + experiences overall for desktop users, compared to 1.76% for mobile + users. The entire distribution is skewed slightly faster for desktop + experiences, with fewer slow websites and more in the fast and average + category. + </p> + <h4 id="fcp-by-ect">FCP by ECT</h4> + <figure> + // Chart: Bar distribution of 07_03c + <figcaption> + Figure 8. Distribution of websites labelled as having fast, average, + or slow FCP, broken down by + <abbr title="effective connection type">ECT</abbr>. + </figcaption> + </figure> + <figure> + Speed | Fast FCP | Average FCP | Slow FCP -- | -- | -- | -- 4G | 2.31 | + 40.10 | 57.59 3G | 0.04 | 3.48 | 96.49 2G | 0.03 | 0.30 | 99.68 slow-2G + | 0.03 | 0.08 | 99.89 + + <figcaption> + Figure 9. Table of the percent of websites labelled as having fast, + average, or slow FCP, broken down by + <abbr title="effective connection type">ECT</abbr>. + </figcaption> + </figure> + <p> + In Figures 8 and 9 above, FCP experiences are grouped by the ECT of the + user experience. Interestingly, there is a correlation between ECT speed + and the percent of websites serving fast FCP. As the ECT speeds + decrease, the proportion of fast experiences approaches zero. 8.44% of + websites serve fast FCP to users with 4G ECT while 57.59% of those + websites serve slow FCP. 96.49% of websites serve slow FCP to users with + 3G ECT, 99.68% to 2G ECT, and 99.89% to slow-2G ECT. These results + suggest that websites almost never serve fast FCP consistently to users + on slow connections. + </p> + <h4 id="fcp-by-geo">FCP by geo</h4> + <figure> + // Chart: Bar distribution of 07_03d + <figcaption> + Figure 10. Distribution of websites labelled as having fast, average, + or slow FCP, broken down by geo. + </figcaption> + </figure> + <p> + Finally, we can slice FCP by users' geography (geo). The chart above + shows the top 23 geos having the highest number of distinct websites, an + indicator of overall popularity of the open web. The geos are sorted by + their percent of websites having sufficiently fast FCP experiences. At + the top of the list are three + <a href="https://en.wikipedia.org/wiki/Asia-Pacific">Asia-Pacific</a> + (APAC) geos: Korea, Taiwan, and Japan. This could be explained by the + availability of extremely + <a + href="https://en.wikipedia.org/wiki/List_of_countries_by_Internet_connection_speeds" + >fast network connection speeds in these regions</a + >. Korea has 11.10% of websites meeting the fast FCP bar and only 28.00% + rated as slow FCP. Recall that the global distribution of + fast/average/slow websites is approximately 2/38/60, making Korea a + significantly positive outlier. + </p> + <p> + Other APAC geos tell a different story. Thailand, Vietnam, Indonesia, + and India all have fewer than 1% of fast websites. These geos also have + more than double the proportion of slow websites than Korea. + </p> + <h3 id="ttfb">TTFB</h3> + <p> + <a href="https://web.dev/time-to-first-byte">Time to First Byte</a> + (TTFB) is a measure of how long the web page took from the time of the + user's navigation until they received the first byte of the response. + </p> + <figure> + ![Navigation Timing API diagram of the events in a page + navigation](/static/images/2019/07_Performance/nav-timing.png) + <figcaption> + Figure 11. Navigation Timing API diagram of the events in a page + navigation. + </figcaption> + </figure> + <p> + To help explain TTFB and the many factors that affect it, let's borrow a + diagram from the Navigation Timing API spec. In Figure 11 above, TTFB is + the duration from <code>startTime</code> to <code>responseStart</code>, + including everything in between: <code>unload</code>, + <code>redirects</code>, <code>AppCache</code>, <code>DNS</code>, + <code>SSL</code>, <code>TCP</code>, and the time the server spends + handling the request. Given that context, let's see how users are + experiencing this metric. + </p> + <figure> + // Chart: Flame distribution of 07_07 + <figcaption> + Figure 12. Distribution of websites' fast, average, and slow TTFB + performance. + </figcaption> + </figure> + <p> + Similar to the previous FCP chart, this is a view of 1,000 + representative samples ordered by fast TTFB. A + <a + href="https://developers.google.com/speed/docs/insights/Server#recommendations" + >fast TTFB</a + > + is one that happens in under 0.2 seconds (200 ms), a slow TTFB happens + in 1 second or more, and everything in between is average. + </p> + <p> + Looking at the curve of the fast proportions, the shape is quite + different from that of FCP. There are very few websites that have a fast + TTFB greater than 75%, while more than half are below 25%. + </p> + <p> + Let's apply a TTFB speed label to each website, similar to the PSI + methodology used above for FCP. If a website serves fast TTFB to 90% or + more user experiences, it's labelled as <strong>fast</strong>. Otherwise + if it serves <strong>slow</strong> TTFB to 10% or more user experiences, + it's slow. If neither of those conditions apply, it's + <strong>average</strong>. + </p> + <figure> + // Chart: Bar distribution of 07_08 + <figcaption> + Figure 13. Distribution of websites labelled as having fast, average, + or slow TTFB. + </figcaption> + </figure> + <figure> + Fast TTFB | Average TTFB | Slow TTFB -- | -- | -- 0.13% | 30.67% | + 69.20% + + <figcaption> + Figure 14. Table of the percent of websites labelled as having fast, + average, or slow TTFB. + </figcaption> + </figure> + <p> + 69.20% of websites have slow TTFB. This is significant because TTFB is a + blocker for all other performance metrics to follow. A user cannot + possibly experience a fast FCP if the TTFB takes more than 1 second. + Recall from the previous FCP section that about 98% of websites do not + have fast FCP. Therefore the ~70% of websites that have slow TTFB are + completely ineligible to be considered as having fast FCP. + </p> + <h4 id="ttfb-by-geo">TTFB by geo</h4> + <figure> + // Chart: Bar distribution of 07_08d + <figcaption> + Figure 15. Distribution of websites labelled as having fast, average, + or slow TTFB, broken down by geo. + </figcaption> + </figure> + <p> + Now let's look at the percent of websites serving fast TTFB to users in + different geos. APAC geos like Korea, Taiwan, and Japan are still + outperforming users from the rest of the world. But no geo has more than + 3% of websites with fast TTFB. + </p> + <h3 id="fid">FID</h3> + <p> + The last field metric we'll look at is + <a + href="https://developers.google.com/web/updates/2018/05/first-input-delay" + >First Input Delay</a + > + (FID). This metric represents the time from a user's first interaction + with a page's UI until the time the browser's main thread is ready to + process the event. Note that this doesn't include the time applications + spend actually handling the input. At worst, slow FID results in a page + that appears unresponsive and a frustrating user experience. + </p> + <p> + Let's start by defining some thresholds. According to the + <a + href="https://developers.google.com/speed/docs/insights/v5/about#categories" + >PSI methodology</a + >, a <strong>fast</strong> FID is one that happens in less than 50 ms. + This gives the application enough time to handle the input event and + provide feedback to the user in a time that feels instantaneous. A + <strong>slow</strong> FID is one that happens in 250 ms or more. + Everything in between is <strong>average</strong>. + </p> + <figure> + // Chart: Flame distribution of 07_02 + <figcaption> + Figure 16. Distribution of websites' fast, average, and slow FID + performance. + </figcaption> + </figure> + <p> + You know the drill by now. This chart shows the distribution of 1,000 + websites' fast, average, and slow FID. This is a dramatically different + chart from the ones for FCP and TTFB. The curve of fast FID very slowly + descends from 100% to 75% then takes a nosedive. The overwhelming + majority of FID experiences are fast for most websites. + </p> + <figure> + // Chart: Bar distribution of 07_04 + <figcaption> + Figure 17. Distribution of websites labelled as having fast, average, + or slow TTFB. + </figcaption> + </figure> + <figure> + Fast FID | Average FID | Slow FID -- | -- | -- 26.61% | 42.03% | 31.35% - <figcaption> - Figure 3. Table of the percent of websites labelled as having fast, - average, or slow FCP. - </figcaption> - </figure> - <p> - In Figures 2 and 3, the results show that only 2.17% of websites are - considered fast while 60.28% of websites are considered slow. To help us - understand how users experience FCP across different devices, let's segment - by form factor. - </p> - <h4 id="fcp-by-device">FCP by device</h4> - <figure> - // Chart: Flame distribution of 07_01b - <figcaption> - Figure 4. Distribution of _desktop_ websites' fast, average, and slow FCP - performance. - </figcaption> - </figure> - <figure> - // Chart: Flame distribution of 07_01c - <figcaption> - Figure 5. Distribution of _phone_ websites' fast, average, and slow FCP - performance. - </figcaption> - </figure> - <p> - In Figures 4 and 5 above, the FCP distributions of 1,000 sample websites are - broken down by desktop and phone. It's subtle, but the torso of the desktop - fast FCP distribution appears to be more convex than the distribution for - phone users. This visual approximation suggests that desktop users - experience a higher overall proportion of fast FCP. To verify this we can - apply the PSI methodology to each distribution. - </p> - <figure> - // Chart: Bar distributions of 07_03b - <figcaption> - Figure 6. Distribution of websites labelled as having fast, average, or - slow FCP, broken down by device type. - </figcaption> - </figure> - <figure> - Device | Fast FCP | Average FCP | Slow FCP -- | -- | -- | -- desktop | 2.80% - | 39.40% | 57.80% phone | 1.76% | 35.62% | 62.62% + <figcaption> + Figure 18. Table of the percent of websites labelled as having fast, + average, or slow FID. + </figcaption> + </figure> + <p> + The PSI methodology for labelling a website as having sufficiently fast + or slow FID is slightly different than that of FCP. For a site to be + fast, 95% of its FID experiences must be fast. A site is slow if 5% of + its FID experiences are slow. + </p> + <p> + The distribution of fast, average, and slow websites appears to be more + balanced, with 26.61% of websites qualifying as fast and 31.35% as slow. + </p> + <h4 id="fid-by-device">FID by device</h4> + <figure> + // Chart: Flame distribution of 07_02b + <figcaption> + Figure 19. Distribution of _desktop_ websites' fast, average, and slow + FID performance. + </figcaption> + </figure> + <figure> + // Chart: Flame distribution of 07_02c + <figcaption> + Figure 20. Distribution of _phone_ websites' fast, average, and slow + FID performance. + </figcaption> + </figure> + <p> + Breaking FID down by device, it becomes clear that there are two very + different stories. Desktop users enjoy fast FID almost all the time. + Sure there are some websites that throw out a slow experience now and + then, but the results are predominantly fast. Mobile users, on the other + hand, have what seem to be one of two experiences: pretty fast (but not + quite as often as desktop) and almost never fast. The latter is + experienced by users on only the tail ~10% of websites, but this is + still a substantial difference. + </p> + <figure> + // Chart: Bar distributions of 07_04b + <figcaption> + Figure 21. Distribution of websites labelled as having fast, average, + or slow FID, broken down by device type. + </figcaption> + </figure> + <figure> + Device | Fast FID | Average FID | Slow FID -- | -- | -- | -- desktop | + 70.32% | 23.20% | 6.48% phone | 13.76% | 43.21% | 43.03% - <figcaption> - Figure 7. Table of websites labelled as having fast, average, or slow FCP, - broken down by device type. - </figcaption> - </figure> - <p> - According to PSI's classification, 2.80% of websites have fast FCP - experiences overall for desktop users, compared to 1.76% for mobile users. - The entire distribution is skewed slightly faster for desktop experiences, - with fewer slow websites and more in the fast and average category. - </p> - <h4 id="fcp-by-ect">FCP by ECT</h4> - <figure> - // Chart: Bar distribution of 07_03c - <figcaption> - Figure 8. Distribution of websites labelled as having fast, average, or - slow FCP, broken down by - <abbr title="effective connection type">ECT</abbr>. - </figcaption> - </figure> - <figure> - Speed | Fast FCP | Average FCP | Slow FCP -- | -- | -- | -- 4G | 2.31 | - 40.10 | 57.59 3G | 0.04 | 3.48 | 96.49 2G | 0.03 | 0.30 | 99.68 slow-2G | - 0.03 | 0.08 | 99.89 + <figcaption> + Figure 22. Table of websites labelled as having fast, average, or slow + FID, broken down by device type. + </figcaption> + </figure> + <p> + When we apply the PSI labelling to desktop and phone experiences, the + distinction becomes crystal clear. 70.32% of websites' FID experienced + by desktop users are fast compared to 6.48% slow. For mobile + experiences, 13.76% of websites are fast while 43.03% are slow. + </p> + <h4 id="fid-by-ect">FID by ECT</h4> + <figure> + // Chart: Bar distribution of 07_04c + <figcaption> + Figure 23. Distribution of websites labelled as having fast, average, + or slow FID, broken down by + <abbr title="effective connection type">ECT</abbr>. + </figcaption> + </figure> + <p> + On its face, FID seems like it would be driven primarily by CPU speed. + It'd be reasonable to assume that the slower the device itself is, the + higher the likelihood that it will be busy when the user attempts to + interact with a web page, right? + </p> + <p> + The ECT results above seem to suggest a correlation between connection + speed and FID performance. As users' effective connection speed + decreases, the percent of websites on which they experience fast FID + also decreases and slow FID increases. Interestingly, the percent of + websites with average FID is about the same across ECTs. + </p> + <h4 id="fid-by-geo">FID by geo</h4> + <figure> + // Chart: Bar distribution of 07_04d + <figcaption> + Figure 24. Distribution of websites labelled as having fast, average, + or slow FID, broken down by geo. + </figcaption> + </figure> + <p> + In this breakdown of FID by geographic location, Korea is out in front + of everyone else again. But the top geos have some new faces: the US, + Australia, and Canada are next with 35-40% of websites having fast FID. + </p> + <p> + As with the other geo-specific results, there are so many possible + factors that could be contributing to the user experience. For example, + perhaps wealthier geos are more privileged to be able to spend more + money on better network infrastructure and its residents have more money + to spend on desktops and/or high-end mobile phones. + </p> + <h2 id="conclusion">Conclusion</h2> + <p> + Quantifying how fast a web page loads is an imperfect science that can't + be represented by a single metric. Conventional metrics like + <code>onload</code> can miss the mark entirely by measuring irrelevant + or imperceptible parts of the user experience. User-perceived metrics + like FCP and FID more faithfully convey what users see and feel. Even + still, neither metric can be looked at in isolation to draw conclusions + about whether the overall page load experience was fast or slow. Only by + looking at many metrics holistically can we start to understand the + performance for an individual website and the state of the web. + </p> + <p> + The data presented in this chapter showed that there is still a lot of + work to do to meet the goals set for fast websites. Certain form + factors, effective connection types, and geos do correlate with better + user experiences, but we can't forget about the combinations of + demographics with poor performance. In many cases, the web platform is + used for business; making more money from improving conversion rates can + be a huge motivator for speeding up a website. Ultimately, for all + websites, performance is about delivering positive experiences to users + in a way that doesn't impede, frustrate, or enrage them. + </p> + <p> + As the web gets another year older and our ability to measure how users + experience it improves incrementally, I'm looking forward to developers + having access to metrics that capture more of the holistic experience. + FCP is very early on the timeline of showing useful content to users and + newer metrics like + <a href="https://web.dev/largest-contentful-paint" + >Largest Contentful Paint</a + > + (LCP) are emerging to improve our visibility into how page loads are + perceived. The + <a href="https://web.dev/layout-instability-api" + >Layout Instability API</a + > + has also given us a novel glimpse into the frustration users experience + beyond page load. Equipped with these new metrics, the web in 2020 will + become even more transparent, better understood, and give developers an + advantage to make more meaningful progress to improve performance and + contribute to positive user experiences. + </p> + </section> + <section class="authors"> + <h4>Authors :</h4> + <ul> + <li> + <img + class="avatar" + alt="Author name" + src="https://www.gravatar.com/avatar/cf58fcc6995e15f35e42532c3775fed6.jpg?d=mp&s=200" + /> + <div class="info"> + <span class="name">Full Name</span> + <span class="social"> + <a class="twitter" href="https://twitter.com/rick_viscomi"> + <img src="/static/images/twitter.png" alt="Twitter account" /> + </a> + + <a class="github" href="https://github.com/rviscomi"> + <img src="/static/images/github.png" alt="github account" /> + </a> + </span> - <figcaption> - Figure 9. Table of the percent of websites labelled as having fast, - average, or slow FCP, broken down by - <abbr title="effective connection type">ECT</abbr>. - </figcaption> - </figure> - <p> - In Figures 8 and 9 above, FCP experiences are grouped by the ECT of the user - experience. Interestingly, there is a correlation between ECT speed and the - percent of websites serving fast FCP. As the ECT speeds decrease, the - proportion of fast experiences approaches zero. 8.44% of websites serve fast - FCP to users with 4G ECT while 57.59% of those websites serve slow FCP. - 96.49% of websites serve slow FCP to users with 3G ECT, 99.68% to 2G ECT, - and 99.89% to slow-2G ECT. These results suggest that websites almost never - serve fast FCP consistently to users on slow connections. - </p> - <h4 id="fcp-by-geo">FCP by geo</h4> - <figure> - // Chart: Bar distribution of 07_03d - <figcaption> - Figure 10. Distribution of websites labelled as having fast, average, or - slow FCP, broken down by geo. - </figcaption> - </figure> - <p> - Finally, we can slice FCP by users' geography (geo). The chart above shows - the top 23 geos having the highest number of distinct websites, an indicator - of overall popularity of the open web. The geos are sorted by their percent - of websites having sufficiently fast FCP experiences. At the top of the list - are three - <a href="https://en.wikipedia.org/wiki/Asia-Pacific">Asia-Pacific</a> (APAC) - geos: Korea, Taiwan, and Japan. This could be explained by the availability - of extremely - <a - href="https://en.wikipedia.org/wiki/List_of_countries_by_Internet_connection_speeds" - >fast network connection speeds in these regions</a - >. Korea has 11.10% of websites meeting the fast FCP bar and only 28.00% - rated as slow FCP. Recall that the global distribution of fast/average/slow - websites is approximately 2/38/60, making Korea a significantly positive - outlier. - </p> - <p> - Other APAC geos tell a different story. Thailand, Vietnam, Indonesia, and - India all have fewer than 1% of fast websites. These geos also have more - than double the proportion of slow websites than Korea. - </p> - <h3 id="ttfb">TTFB</h3> - <p> - <a href="https://web.dev/time-to-first-byte">Time to First Byte</a> (TTFB) - is a measure of how long the web page took from the time of the user's - navigation until they received the first byte of the response. - </p> - <figure> - ![Navigation Timing API diagram of the events in a page - navigation](/static/images/2019/07_Performance/nav-timing.png) - <figcaption> - Figure 11. Navigation Timing API diagram of the events in a page - navigation. - </figcaption> - </figure> - <p> - To help explain TTFB and the many factors that affect it, let's borrow a - diagram from the Navigation Timing API spec. In Figure 11 above, TTFB is the - duration from <code>startTime</code> to <code>responseStart</code>, - including everything in between: <code>unload</code>, - <code>redirects</code>, <code>AppCache</code>, <code>DNS</code>, - <code>SSL</code>, <code>TCP</code>, and the time the server spends handling - the request. Given that context, let's see how users are experiencing this - metric. - </p> - <figure> - // Chart: Flame distribution of 07_07 - <figcaption> - Figure 12. Distribution of websites' fast, average, and slow TTFB - performance. - </figcaption> - </figure> - <p> - Similar to the previous FCP chart, this is a view of 1,000 representative - samples ordered by fast TTFB. A - <a - href="https://developers.google.com/speed/docs/insights/Server#recommendations" - >fast TTFB</a - > - is one that happens in under 0.2 seconds (200 ms), a slow TTFB happens in 1 - second or more, and everything in between is average. - </p> - <p> - Looking at the curve of the fast proportions, the shape is quite different - from that of FCP. There are very few websites that have a fast TTFB greater - than 75%, while more than half are below 25%. - </p> - <p> - Let's apply a TTFB speed label to each website, similar to the PSI - methodology used above for FCP. If a website serves fast TTFB to 90% or more - user experiences, it's labelled as <strong>fast</strong>. Otherwise if it - serves <strong>slow</strong> TTFB to 10% or more user experiences, it's - slow. If neither of those conditions apply, it's <strong>average</strong>. - </p> - <figure> - // Chart: Bar distribution of 07_08 - <figcaption> - Figure 13. Distribution of websites labelled as having fast, average, or - slow TTFB. - </figcaption> - </figure> - <figure> - Fast TTFB | Average TTFB | Slow TTFB -- | -- | -- 0.13% | 30.67% | 69.20% + <div class="tagline"> + Tagline of contributor here + </div> + </div> + </li> + </ul> + </section> - <figcaption> - Figure 14. Table of the percent of websites labelled as having fast, - average, or slow TTFB. - </figcaption> - </figure> - <p> - 69.20% of websites have slow TTFB. This is significant because TTFB is a - blocker for all other performance metrics to follow. A user cannot possibly - experience a fast FCP if the TTFB takes more than 1 second. Recall from the - previous FCP section that about 98% of websites do not have fast FCP. - Therefore the ~70% of websites that have slow TTFB are completely ineligible - to be considered as having fast FCP. - </p> - <h4 id="ttfb-by-geo">TTFB by geo</h4> - <figure> - // Chart: Bar distribution of 07_08d - <figcaption> - Figure 15. Distribution of websites labelled as having fast, average, or - slow TTFB, broken down by geo. - </figcaption> - </figure> - <p> - Now let's look at the percent of websites serving fast TTFB to users in - different geos. APAC geos like Korea, Taiwan, and Japan are still - outperforming users from the rest of the world. But no geo has more than 3% - of websites with fast TTFB. - </p> - <h3 id="fid">FID</h3> - <p> - The last field metric we'll look at is - <a - href="https://developers.google.com/web/updates/2018/05/first-input-delay" - >First Input Delay</a - > - (FID). This metric represents the time from a user's first interaction with - a page's UI until the time the browser's main thread is ready to process the - event. Note that this doesn't include the time applications spend actually - handling the input. At worst, slow FID results in a page that appears - unresponsive and a frustrating user experience. - </p> - <p> - Let's start by defining some thresholds. According to the - <a - href="https://developers.google.com/speed/docs/insights/v5/about#categories" - >PSI methodology</a - >, a <strong>fast</strong> FID is one that happens in less than 50 ms. This - gives the application enough time to handle the input event and provide - feedback to the user in a time that feels instantaneous. A - <strong>slow</strong> FID is one that happens in 250 ms or more. Everything - in between is <strong>average</strong>. - </p> - <figure> - // Chart: Flame distribution of 07_02 - <figcaption> - Figure 16. Distribution of websites' fast, average, and slow FID - performance. - </figcaption> - </figure> - <p> - You know the drill by now. This chart shows the distribution of 1,000 - websites' fast, average, and slow FID. This is a dramatically different - chart from the ones for FCP and TTFB. The curve of fast FID very slowly - descends from 100% to 75% then takes a nosedive. The overwhelming majority - of FID experiences are fast for most websites. - </p> - <figure> - // Chart: Bar distribution of 07_04 - <figcaption> - Figure 17. Distribution of websites labelled as having fast, average, or - slow TTFB. - </figcaption> - </figure> - <figure> - Fast FID | Average FID | Slow FID -- | -- | -- 26.61% | 42.03% | 31.35% + <nav id="chapter-navigation"> + <a id="previous-chapter" href="#replace-with-url-to-chapter"> + <span class="arrow">⌃</span> + <span class="chapter-no"> + Chapter X + </span> + <span class="chapter-title"> + Chapter title + </span> + </a> - <figcaption> - Figure 18. Table of the percent of websites labelled as having fast, - average, or slow FID. - </figcaption> - </figure> - <p> - The PSI methodology for labelling a website as having sufficiently fast or - slow FID is slightly different than that of FCP. For a site to be fast, 95% - of its FID experiences must be fast. A site is slow if 5% of its FID - experiences are slow. - </p> - <p> - The distribution of fast, average, and slow websites appears to be more - balanced, with 26.61% of websites qualifying as fast and 31.35% as slow. - </p> - <h4 id="fid-by-device">FID by device</h4> - <figure> - // Chart: Flame distribution of 07_02b - <figcaption> - Figure 19. Distribution of _desktop_ websites' fast, average, and slow FID - performance. - </figcaption> - </figure> - <figure> - // Chart: Flame distribution of 07_02c - <figcaption> - Figure 20. Distribution of _phone_ websites' fast, average, and slow FID - performance. - </figcaption> - </figure> - <p> - Breaking FID down by device, it becomes clear that there are two very - different stories. Desktop users enjoy fast FID almost all the time. Sure - there are some websites that throw out a slow experience now and then, but - the results are predominantly fast. Mobile users, on the other hand, have - what seem to be one of two experiences: pretty fast (but not quite as often - as desktop) and almost never fast. The latter is experienced by users on - only the tail ~10% of websites, but this is still a substantial difference. - </p> - <figure> - // Chart: Bar distributions of 07_04b - <figcaption> - Figure 21. Distribution of websites labelled as having fast, average, or - slow FID, broken down by device type. - </figcaption> - </figure> - <figure> - Device | Fast FID | Average FID | Slow FID -- | -- | -- | -- desktop | - 70.32% | 23.20% | 6.48% phone | 13.76% | 43.21% | 43.03% + <a id="next-chapter" href="#replace-with-url-to-chapter"> + <span class="arrow">⌃</span> + <span class="chapter-no"> + Chapter X + </span> + <span class="chapter-title"> + Chapter title + </span> + </a> + </nav> + </section> +</article> - <figcaption> - Figure 22. Table of websites labelled as having fast, average, or slow - FID, broken down by device type. - </figcaption> - </figure> - <p> - When we apply the PSI labelling to desktop and phone experiences, the - distinction becomes crystal clear. 70.32% of websites' FID experienced by - desktop users are fast compared to 6.48% slow. For mobile experiences, - 13.76% of websites are fast while 43.03% are slow. - </p> - <h4 id="fid-by-ect">FID by ECT</h4> - <figure> - // Chart: Bar distribution of 07_04c - <figcaption> - Figure 23. Distribution of websites labelled as having fast, average, or - slow FID, broken down by - <abbr title="effective connection type">ECT</abbr>. - </figcaption> - </figure> - <p> - On its face, FID seems like it would be driven primarily by CPU speed. It'd - be reasonable to assume that the slower the device itself is, the higher the - likelihood that it will be busy when the user attempts to interact with a - web page, right? - </p> - <p> - The ECT results above seem to suggest a correlation between connection speed - and FID performance. As users' effective connection speed decreases, the - percent of websites on which they experience fast FID also decreases and - slow FID increases. Interestingly, the percent of websites with average FID - is about the same across ECTs. - </p> - <h4 id="fid-by-geo">FID by geo</h4> - <figure> - // Chart: Bar distribution of 07_04d - <figcaption> - Figure 24. Distribution of websites labelled as having fast, average, or - slow FID, broken down by geo. - </figcaption> - </figure> - <p> - In this breakdown of FID by geographic location, Korea is out in front of - everyone else again. But the top geos have some new faces: the US, - Australia, and Canada are next with 35-40% of websites having fast FID. - </p> - <p> - As with the other geo-specific results, there are so many possible factors - that could be contributing to the user experience. For example, perhaps - wealthier geos are more privileged to be able to spend more money on better - network infrastructure and its residents have more money to spend on - desktops and/or high-end mobile phones. - </p> - <h2 id="conclusion">Conclusion</h2> - <p> - Quantifying how fast a web page loads is an imperfect science that can't be - represented by a single metric. Conventional metrics like - <code>onload</code> can miss the mark entirely by measuring irrelevant or - imperceptible parts of the user experience. User-perceived metrics like FCP - and FID more faithfully convey what users see and feel. Even still, neither - metric can be looked at in isolation to draw conclusions about whether the - overall page load experience was fast or slow. Only by looking at many - metrics holistically can we start to understand the performance for an - individual website and the state of the web. - </p> - <p> - The data presented in this chapter showed that there is still a lot of work - to do to meet the goals set for fast websites. Certain form factors, - effective connection types, and geos do correlate with better user - experiences, but we can't forget about the combinations of demographics with - poor performance. In many cases, the web platform is used for business; - making more money from improving conversion rates can be a huge motivator - for speeding up a website. Ultimately, for all websites, performance is - about delivering positive experiences to users in a way that doesn't impede, - frustrate, or enrage them. - </p> - <p> - As the web gets another year older and our ability to measure how users - experience it improves incrementally, I'm looking forward to developers - having access to metrics that capture more of the holistic experience. FCP - is very early on the timeline of showing useful content to users and newer - metrics like - <a href="https://web.dev/largest-contentful-paint" - >Largest Contentful Paint</a - > - (LCP) are emerging to improve our visibility into how page loads are - perceived. The - <a href="https://web.dev/layout-instability-api">Layout Instability API</a> - has also given us a novel glimpse into the frustration users experience - beyond page load. Equipped with these new metrics, the web in 2020 will - become even more transparent, better understood, and give developers an - advantage to make more meaningful progress to improve performance and - contribute to positive user experiences. - </p> -</section> {% endblock %} diff --git a/src/templates/en/2019/chapters/pwa.html b/src/templates/en/2019/chapters/pwa.html index 4d4e6470d55..747c7385d82 100644 --- a/src/templates/en/2019/chapters/pwa.html +++ b/src/templates/en/2019/chapters/pwa.html @@ -1,3 +1,5 @@ +{% extends "en/2019/base_chapter.html" %} + <!--{# IMPORTANT! - `chapter.html` is a "template for templates" used by the `generate_chapters.js` script, hence the strange template syntax (eg, mixing ejs and jinja syntax) @@ -8,463 +10,544 @@ - make changes to the markdown content directly (`src/content/<lang>/<year>/<chapter>.md`) because any changes to the chapter templates will be overwritten by the generation script #}--> -{% extends "en/2019/base_chapter.html" %} {% block styles %} {{ super() }} -<link rel="stylesheet" href="/static/css/chapter.css" /> -{% endblock %} {% set metadata = -{"part_number":"II","chapter_number":11,"title":"Progressive Web -Apps","authors":["tomayac"," jeffposnick"],"reviewers":["hyperpress"," +{% set metadata = {"part_number":"II","chapter_number":11,"title":"Progressive +Web Apps","authors":["tomayac"," jeffposnick"],"reviewers":["hyperpress"," ahmadawais"]} %} {% block main %} -<aside> - <ul> - <li> - <a href="#introduction">Introduction</a> - </li> - - <li> - <a href="#service-workers">Service Workers</a> - +<article id="chapter" class="main"> + <nav class="index"> + <div class="index-box floating-card"> + <h2 class="header">Index</h2> + <h2 class="header-mobile">Index</h2> <ul> <li> - <a href="#service-worker-registrations-and-installability" - >Service Worker Registrations and Installability</a - > + <a href="#introduction">Introduction</a> + </li> + + <li> + <a href="#service-workers">Service Workers</a> + + <ul> + <li> + <a href="#service-worker-registrations-and-installability" + >Service Worker Registrations and Installability</a + > + </li> + + <li> + <a href="#service-worker-events">Service Worker Events</a> + </li> + + <li> + <a href="#service-worker-file-sizes">Service Worker File Sizes</a> + </li> + </ul> </li> <li> - <a href="#service-worker-events">Service Worker Events</a> + <a href="#web-app-manifests">Web App Manifests</a> + + <ul> + <li> + <a href="#web-app-manifest-properties" + >Web App Manifest Properties</a + > + </li> + + <li> + <a href="#display-values">Display Values</a> + </li> + + <li> + <a href="#category-values">Category Values</a> + </li> + + <li> + <a href="#icon-sizes">Icon Sizes</a> + </li> + + <li> + <a href="#orientation-values">Orientation Values</a> + </li> + </ul> </li> <li> - <a href="#service-worker-file-sizes">Service Worker File Sizes</a> + <a href="#workbox">Workbox</a> </li> </ul> - </li> - - <li> - <a href="#web-app-manifests">Web App Manifests</a> + </div> + </nav> + <section class="content"> + <section class="body"> + <h1> + <div class="decorative-line"></div> + {{ metadata.get('title') }} + <div class="decorative-line-mobile"></div> + </h1> + <img + src="/static/images/2019/{{ get_chapter_image_dir(metadata) }}/hero_lg.jpg" + class="content-banner" + /> + <h2 id="introduction">Introduction</h2> + <p> + Progressive Web Apps (PWA) are a new class of web applications, building + on top of platform primitives <br /> + like the + <a + href="https://developer.mozilla.org/en/docs/Web/API/Service_Worker_API" + >Service Worker APIs</a + >. <br /> + Service workers allow apps to support network-independent loading by + acting as a network proxy, <br /> + intercepting your web app's outgoing requests, and replying with + programmatic or cached responses. <br /> + Service workers can receive push notifications, and synchronize data in + the background even when <br /> + the corresponding app is not running. Additional, service workers — + together with <br /> + <a href="https://developer.mozilla.org/en-US/docs/Web/Manifest" + >Web App Manifests</a + > + — <br /> + allow users to install PWAs to their devices’ home screens. + </p> + <p> + Service workers were + <a + href="https://blog.chromium.org/2014/12/chrome-40-beta-powerful-offline-and.html" + >first implemented in Chrome 40</a + >, <br /> + back in December 2014, and the term Progressive Web Apps was <br /> + <a + href="https://infrequently.org/2015/06/progressive-apps-escaping-tabs-without-losing-our-soul/" + >coined by Frances Berriman and Alex Russell</a + > + <br /> + in 2015. As service workers are now finally + <a href="https://jakearchibald.github.io/isserviceworkerready/" + >implemented in all major browsers</a + >, <br /> + we were wondering how many PWAs are actually out there, and how do they + make use of these new technologies? <br /> + Certain advanced APIs like + <a + href="https://developers.google.com/web/updates/2015/12/background-sync" + >Background Sync</a + > + <br /> + are currently still + <a href="https://caniuse.com/#feat=background-sync" + >only available on Chromium-based browsers</a + >, <br /> + so as an additional question, we looked into which features these PWAs + actually use. + </p> + <p> + All data in the HTTP Archive can be + <a + href="https://github.com/HTTPArchive/legacy.httparchive.org/blob/master/docs/bigquery-gettingstarted.md" + >queried through BigQuery</a + >,<br /> + where multiple tables are available in the + <code>httparchive</code> project. As these tables tend to get fairly + big, <br /> + they are partitioned, but multiple associated tables can be queried + using the + <a + href="https://cloud.google.com/bigquery/docs/querying-wildcard-tables" + >wildcard symbol '*'</a + >.<br /> + For our research, three families of tables are relevant: + </p> <ul> <li> - <a href="#web-app-manifest-properties">Web App Manifest Properties</a> + <code>httparchive.lighthouse.*</code>, which contains data about + <a href="https://developers.google.com/web/tools/lighthouse/" + >Lighthouse</a + > + runs. Note that Lighthouse data is only available for mobile pages. </li> - <li> - <a href="#display-values">Display Values</a> + <code>httparchive.pages.*</code>, which contain the JSON-encoded + parent documents’ HAR data. </li> - <li> - <a href="#category-values">Category Values</a> + <code>httparchive.response_bodies.*</code>, which contains the raw + response bodies of all resources and sub-resources of all sites in the + archive. </li> - + </ul> + <h2 id="service-workers">Service Workers</h2> + <h3 id="service-worker-registrations-and-installability"> + Service Worker Registrations and Installability + </h3> + <p> + The first metric we explore are service worker installations. Looking at + the data exposed through <br /> + feature counters in the HTTP Archive, we find that 0.44% of all desktop + and 0.37% of all mobile pages <br /> + register a service worker, and both curves over time are steeply + growing. Now this might not look overly <br /> + impressive, but taking traffic data from Chrome Platform Status into + account, we can see that about <br /> + <a + href="https://www.chromestatus.com/metrics/feature/timeline/popularity/990" + >a service worker controlled 15% of all page loads</a + >,<br /> + which can be interpreted as popular, high-traffic sites increasingly + having started to embrace service workers. + </p> + <p><code><timeseries chart of 11_01b></code></p> + <p> + <strong>Figure 1:</strong> Service Worker installation over time for + desktop and mobile + </p> + <p> + Looking at Lighthouse data in the HTTP Archive, 1.56% of mobile pages + are + <a + href="https://developers.google.com/web/tools/lighthouse/audits/install-prompt" + >installable</a + >,<br /> + that is, they pass Lighthouse’s + <em>user can be prompted to install the web app</em> test. <br /> + Lighthouse tests currently are only available for mobile pages. To + control the install experience, <br /> + 0.82% of all desktop and 0.94% of all mobile pages use the + <a + href="https://w3c.github.io/manifest/#beforeinstallpromptevent-interface" + ><code>OnBeforeInstallPrompt</code> interface</a + >. + </p> + <h3 id="service-worker-events">Service Worker Events</h3> + <p> + In a service worker one can + <a + href="https://developers.google.com/web/fundamentals/primers/service-workers/lifecycle" + >listen for a number of events</a + >: + </p> + <ul> <li> - <a href="#icon-sizes">Icon Sizes</a> + <code>install</code>, which occurs upon service worker installation. </li> - <li> - <a href="#orientation-values">Orientation Values</a> + <code>activate</code>, which occurs upon service worker activation. + </li> + <li> + <code>fetch</code>, which occurs whenever a resource is fetched. + </li> + <li> + <code>push</code>, which occurs when a push notification arrives. + </li> + <li> + <code>notificationclick</code>, which occurs when a notification is + being clicked. + </li> + <li> + <code>notificationclose</code>, which occurs when a notification is + being closed.<br /> + <code>- message</code>, which occurs when a message sent via + <code>postMessage()</code> arrives. + </li> + <li> + <code>sync</code>, which occurs when a Background Sync event occurs. + </li> + </ul> + <p> + We have examined which of these events are being listened to by service + workers we could find in the HTTP Archive. <br /> + The results for mobile and desktop are very similar with + <code>fetch</code>, <code>install</code>, and + <code>activate</code> being the three <br /> + most popular events, followed by <code>notificationclick</code> and + <code>push</code>. If we interpret these results, offline use <br /> + cases that service workers enable are the most attractive feature for + app developers, far ahead of <br /> + push notifications. Due to its limited availability, and less common use + case, background sync doesn’t <br /> + play a significant role at the moment. + </p> + <p><code><bar chart of 11_03 mobile></code></p> + <p> + <strong>Figure 2a:</strong> Service worker events on mobile, ordered by + decreasing frequency. + </p> + <p><code><bar chart of 11_03 desktop></code></p> + <p> + <strong>Figure 2b:</strong> Service worker events on desktop, ordered by + decreasing frequency. + </p> + <h3 id="service-worker-file-sizes">Service Worker File Sizes</h3> + <p> + File size or lines of code are in general a bad proxy for the complexity + of the task at hand. <br /> + In this case, however, it is definitely interesting to compare + (compressed) file sizes of service workers <br /> + for mobile and desktop. The median service worker file on desktop is 895 + bytes, whereas on mobile it’s 694 bytes. <br /> + Throughout all percentiles desktop service workers are larger than + mobile service workers. <br /> + We note that these stats don’t account for dynamically imported scripts + through the <br /> + <a + href="https://developer.mozilla.org/en-US/docs/Web/API/WorkerGlobalScope/importScripts" + ><code>importScripts()</code></a + > + method, <br /> + which likely skews the results higher. + </p> + <p><code><distribution of 11_03b mobile></code></p> + <p> + <strong>Figure 3a:</strong> Percentiles of service worker file sizes on + mobile. + </p> + <p><code><distribution of 11_03b desktop></code></p> + <p> + <strong>Figure 3b:</strong> Percentiles of service worker file sizes on + desktop. + </p> + <h2 id="web-app-manifests">Web App Manifests</h2> + <h3 id="web-app-manifest-properties">Web App Manifest Properties</h3> + <p> + The web app manifest is a simple JSON file that tells the browser about + a web application <br /> + and how it should behave when installed on the user's mobile device or + desktop. A typical <br /> + manifest file includes information about the app name, icons it should + use, the start URL <br /> + it should open at when launched, and more. Only 1.54% of all encountered + manifests were <br /> + invalid JSON, and the rest parsed correctly. + </p> + <p> + We looked at the different properties defined by the <br /> + <a href="https://w3c.github.io/manifest/#webappmanifest-dictionary" + >specification</a + >, <br /> + and also considered non-standard proprietary properties. According to + the Web App Manifest spec, <br /> + the following properties are allowed: <code>dir</code>, + <code>lang</code>, <code>name</code>, <code>short_name</code>, + <code>description</code>, <br /> + <code>icons</code>, <code>screenshots</code>, <code>categories</code>, + <code>iarc_rating_id</code>, <code>start_url</code>, + <code>display</code>, <code>orientation</code>, <br /> + <code>theme_color</code>, <code>background_color</code>, + <code>scope</code>, <code>serviceworker</code>, + <code>related_applications</code>, and <br /> + <code>prefer_related_applications</code>. The only property that we + didn’t observe in the wild was <br /> + <code>iarc_rating_id</code>, which is a string that represents the + International Age Rating Coalition (IARC) <br /> + certification code of the web application. It is intended to be used to + determine which ages <br /> + the web application is appropriate for. The proprietary properties we + encountered still <br /> + frequently were <code>gcm_sender_id</code> and + <code>gcm_user_visible_only</code> from the legacy <br /> + Google Cloud Messaging (GCM) service. Interestingly there’re almost no + differences between mobile <br /> + and desktop. On both platforms, however, there’s a long tail of + properties that are not interpreted <br /> + by browsers but that contain potentially useful metadata like + <code>author</code> or <code>version</code>. We also found <br /> + a non-trivial amount of mistyped properties, our favorite being + <code>shot_name</code>. An interesting outlier <br /> + is the <code>serviceworker</code> property which is standard, but not + implemented by any browser vendor — <br /> + nevertheless, it was found on 0.09% of all web app manifests used by + mobile and desktop pages. + </p> + <p><code><bar chart of 11_04 mobile></code></p> + <p> + <strong>Figure 4a:</strong> Web App Manifest properties ordered by + decreasing popularity on mobile. + </p> + <p><code><bar chart of 11_04 mobile></code></p> + <p> + <strong>Figure 4b:</strong> Web App Manifest properties ordered by + decreasing popularity on desktop. + </p> + <h3 id="display-values">Display Values</h3> + <p> + Looking at the values developers set for the + <code>display</code> property, it becomes immediately clear <br /> + that they want PWAs to be perceived as “proper” apps that don’t reveal + their web technology origins. <br /> + By choosing <code>"standalone"</code>, they make sure no browser UI is + shown to the end-user. This is reflected <br /> + by the majority of apps that make use of the + <code>prefers_related_applications</code> property: more that 97% <br /> + of both mobile and desktop applications do <em>not</em> prefer native + applications. + </p> + <p><code><11_04c mobile></code></p> + <p> + <strong>Figure 5a:</strong> Values for the <code>display</code> property + on mobile. + </p> + <p><code><11_04c desktop></code></p> + <p> + <strong>Figure 5b:</strong> Values for the <code>display</code> property + on desktop. + </p> + <h3 id="category-values">Category Values</h3> + <p> + The <code>categories</code> member describes the expected application + categories to which the web application belongs. <br /> + It is only meant as a hint to catalogs or stores listing web + applications, and it is expected that <br /> + these will make a best effort to find appropriate categories (or + category) under which to list the <br /> + web application. There were not too many manifests that made use of the + property, but it is <br /> + interesting to see the shift from <em>shopping</em> being the most + popular category on mobile to <em>business</em>, <br /> + <em>technology</em>, and <em>web</em> (whatever may be meant with that) + on desktop that share the first place evenly. + </p> + <p><code><11_04d mobile></code></p> + <p> + <strong>Figure 6a:</strong> Values for the + <code>categories</code> property on mobile. + </p> + <p><code><11_04d desktop></code></p> + <p> + <strong>Figure 6b:</strong> Values for the + <code>categories</code> property on desktop. + </p> + <h3 id="icon-sizes">Icon Sizes</h3> + <p> + Lighthouse + <a + href="https://developers.google.com/web/tools/lighthouse/audits/manifest-contains-192px-icon" + >requires</a + > + <br /> + at least an icon sized 192×192, but common favicon generation tools + create a plethora of other sizes, too. <br /> + Lighthouse’s rule is probably the culprit for 192×192 being the most + popular choice of icon size on both <br /> + desktop and mobile, despite + <a + href="https://developers.google.com/web/fundamentals/web-app-manifest#icons" + >Google’s documentation</a + > + <br /> + additionally explicitly recommending 512×512, which doesn’t show as a + particularly prominent option. + </p> + <p><code><11_04f mobile></code></p> + <p><strong>Figure 7a:</strong> Popular icon sizes on mobile.</p> + <p><code><11_04f desktop></code></p> + <p><strong>Figure 7b:</strong> Popular icon sizes on desktop.</p> + <h3 id="orientation-values">Orientation Values</h3> + <p> + The valid values for the <code>orientation</code> property are + <a + href="https://www.w3.org/TR/screen-orientation/#dom-orientationlocktype" + >defined</a + ><br /> + in the Screen Orientation API specification. Namely there are + <code>"any"</code>, <code>"natural"</code>, <code>"landscape"</code>, + <br /> + <code>"portrait"</code>, <code>"portrait-primary"</code>, + <code>"portrait-secondary"</code>, <code>"landscape-primary"</code>, and + <code>"landscape-secondary"</code>. <br /> + Portrait orientation is the clear winner on both platforms, followed by + any orientation. + </p> + <p><code><11_04g mobile></code></p> + <p><strong>Figure 8a:</strong> Popular orientation values on mobile.</p> + <p><code><11_04g desktop></code></p> + <p><strong>Figure 8b:</strong> Popular orientation values on desktop.</p> + <h2 id="workbox">Workbox</h2> + <p> + Workbox is a set of libraries that help with common service worker use + cases. For instance, Workbox has tools <br /> + that can plug in to your build process and generate a manifest of files, + which are then precached by your <br /> + service worker. Workbox includes libraries to handle runtime caching, + request routing, cache expiration, <br /> + background sync, and more. + </p> + <p> + Given the low-level nature of the service worker APIs, many developers + have turned to Workbox as a way of <br /> + structuring their service worker logic into higher-level, reusable + chunks of code. Workbox adoption is also <br /> + driven by its inclusion as a feature in a number of popular JavaScript + framework starter kits, like <br /> + <a href="https://create-react-app.dev/" + ><code>create-react-app</code></a + > + and + <a href="https://www.npmjs.com/package/@vue/cli-plugin-pwa" + >Vue's PWA plugin</a + >. + </p> + <p> + The HTTP Archive shows that, out of the total population of sites that + register a service worker, <br /> + 12.71% of them are using at least one of the Workbox libraries. This + percentage is roughly consistent <br /> + across desktop and mobile, with a slightly lower percentage (11.46%) on + mobile compared to desktop (14.36%). + </p> + </section> + <section class="authors"> + <h4>Authors :</h4> + <ul> + <li> + <img + class="avatar" + alt="Author name" + src="https://www.gravatar.com/avatar/cf58fcc6995e15f35e42532c3775fed6.jpg?d=mp&s=200" + /> + <div class="info"> + <span class="name">Full Name</span> + <span class="social"> + <a class="twitter" href="https://twitter.com/rick_viscomi"> + <img src="/static/images/twitter.png" alt="Twitter account" /> + </a> + + <a class="github" href="https://github.com/rviscomi"> + <img src="/static/images/github.png" alt="github account" /> + </a> + </span> + + <div class="tagline"> + Tagline of contributor here + </div> + </div> </li> </ul> - </li> + </section> + + <nav id="chapter-navigation"> + <a id="previous-chapter" href="#replace-with-url-to-chapter"> + <span class="arrow">⌃</span> + <span class="chapter-no"> + Chapter X + </span> + <span class="chapter-title"> + Chapter title + </span> + </a> - <li> - <a href="#workbox">Workbox</a> - </li> - </ul> -</aside> + <a id="next-chapter" href="#replace-with-url-to-chapter"> + <span class="arrow">⌃</span> + <span class="chapter-no"> + Chapter X + </span> + <span class="chapter-title"> + Chapter title + </span> + </a> + </nav> + </section> +</article> -<section class="main"> - <h1 class="chapter-title">{{ metadata.get('title') }}</h1> - <img - src="/static/images/2019/{{ get_chapter_image_dir(metadata) }}/hero_lg.jpg" - class="chapter-hero" - /> - <h2 id="introduction">Introduction</h2> - <p> - Progressive Web Apps (PWA) are a new class of web applications, building on - top of platform primitives <br /> - like the - <a href="https://developer.mozilla.org/en/docs/Web/API/Service_Worker_API" - >Service Worker APIs</a - >. <br /> - Service workers allow apps to support network-independent loading by acting - as a network proxy, <br /> - intercepting your web app's outgoing requests, and replying with - programmatic or cached responses. <br /> - Service workers can receive push notifications, and synchronize data in the - background even when <br /> - the corresponding app is not running. Additional, service workers — together - with <br /> - <a href="https://developer.mozilla.org/en-US/docs/Web/Manifest" - >Web App Manifests</a - > - — <br /> - allow users to install PWAs to their devices’ home screens. - </p> - <p> - Service workers were - <a - href="https://blog.chromium.org/2014/12/chrome-40-beta-powerful-offline-and.html" - >first implemented in Chrome 40</a - >, <br /> - back in December 2014, and the term Progressive Web Apps was <br /> - <a - href="https://infrequently.org/2015/06/progressive-apps-escaping-tabs-without-losing-our-soul/" - >coined by Frances Berriman and Alex Russell</a - > - <br /> - in 2015. As service workers are now finally - <a href="https://jakearchibald.github.io/isserviceworkerready/" - >implemented in all major browsers</a - >, <br /> - we were wondering how many PWAs are actually out there, and how do they make - use of these new technologies? <br /> - Certain advanced APIs like - <a href="https://developers.google.com/web/updates/2015/12/background-sync" - >Background Sync</a - > - <br /> - are currently still - <a href="https://caniuse.com/#feat=background-sync" - >only available on Chromium-based browsers</a - >, <br /> - so as an additional question, we looked into which features these PWAs - actually use. - </p> - <p> - All data in the HTTP Archive can be - <a - href="https://github.com/HTTPArchive/legacy.httparchive.org/blob/master/docs/bigquery-gettingstarted.md" - >queried through BigQuery</a - >,<br /> - where multiple tables are available in the <code>httparchive</code> project. - As these tables tend to get fairly big, <br /> - they are partitioned, but multiple associated tables can be queried using - the - <a href="https://cloud.google.com/bigquery/docs/querying-wildcard-tables" - >wildcard symbol '*'</a - >.<br /> - For our research, three families of tables are relevant: - </p> - <ul> - <li> - <code>httparchive.lighthouse.*</code>, which contains data about - <a href="https://developers.google.com/web/tools/lighthouse/" - >Lighthouse</a - > - runs. Note that Lighthouse data is only available for mobile pages. - </li> - <li> - <code>httparchive.pages.*</code>, which contain the JSON-encoded parent - documents’ HAR data. - </li> - <li> - <code>httparchive.response_bodies.*</code>, which contains the raw - response bodies of all resources and sub-resources of all sites in the - archive. - </li> - </ul> - <h2 id="service-workers">Service Workers</h2> - <h3 id="service-worker-registrations-and-installability"> - Service Worker Registrations and Installability - </h3> - <p> - The first metric we explore are service worker installations. Looking at the - data exposed through <br /> - feature counters in the HTTP Archive, we find that 0.44% of all desktop and - 0.37% of all mobile pages <br /> - register a service worker, and both curves over time are steeply growing. - Now this might not look overly <br /> - impressive, but taking traffic data from Chrome Platform Status into - account, we can see that about <br /> - <a - href="https://www.chromestatus.com/metrics/feature/timeline/popularity/990" - >a service worker controlled 15% of all page loads</a - >,<br /> - which can be interpreted as popular, high-traffic sites increasingly having - started to embrace service workers. - </p> - <p><code><timeseries chart of 11_01b></code></p> - <p> - <strong>Figure 1:</strong> Service Worker installation over time for desktop - and mobile - </p> - <p> - Looking at Lighthouse data in the HTTP Archive, 1.56% of mobile pages are - <a - href="https://developers.google.com/web/tools/lighthouse/audits/install-prompt" - >installable</a - >,<br /> - that is, they pass Lighthouse’s - <em>user can be prompted to install the web app</em> test. <br /> - Lighthouse tests currently are only available for mobile pages. To control - the install experience, <br /> - 0.82% of all desktop and 0.94% of all mobile pages use the - <a href="https://w3c.github.io/manifest/#beforeinstallpromptevent-interface" - ><code>OnBeforeInstallPrompt</code> interface</a - >. - </p> - <h3 id="service-worker-events">Service Worker Events</h3> - <p> - In a service worker one can - <a - href="https://developers.google.com/web/fundamentals/primers/service-workers/lifecycle" - >listen for a number of events</a - >: - </p> - <ul> - <li> - <code>install</code>, which occurs upon service worker installation. - </li> - <li><code>activate</code>, which occurs upon service worker activation.</li> - <li><code>fetch</code>, which occurs whenever a resource is fetched.</li> - <li><code>push</code>, which occurs when a push notification arrives.</li> - <li> - <code>notificationclick</code>, which occurs when a notification is being - clicked. - </li> - <li> - <code>notificationclose</code>, which occurs when a notification is being - closed.<br /> - <code>- message</code>, which occurs when a message sent via - <code>postMessage()</code> arrives. - </li> - <li> - <code>sync</code>, which occurs when a Background Sync event occurs. - </li> - </ul> - <p> - We have examined which of these events are being listened to by service - workers we could find in the HTTP Archive. <br /> - The results for mobile and desktop are very similar with <code>fetch</code>, - <code>install</code>, and <code>activate</code> being the three <br /> - most popular events, followed by <code>notificationclick</code> and - <code>push</code>. If we interpret these results, offline use <br /> - cases that service workers enable are the most attractive feature for app - developers, far ahead of <br /> - push notifications. Due to its limited availability, and less common use - case, background sync doesn’t <br /> - play a significant role at the moment. - </p> - <p><code><bar chart of 11_03 mobile></code></p> - <p> - <strong>Figure 2a:</strong> Service worker events on mobile, ordered by - decreasing frequency. - </p> - <p><code><bar chart of 11_03 desktop></code></p> - <p> - <strong>Figure 2b:</strong> Service worker events on desktop, ordered by - decreasing frequency. - </p> - <h3 id="service-worker-file-sizes">Service Worker File Sizes</h3> - <p> - File size or lines of code are in general a bad proxy for the complexity of - the task at hand. <br /> - In this case, however, it is definitely interesting to compare (compressed) - file sizes of service workers <br /> - for mobile and desktop. The median service worker file on desktop is 895 - bytes, whereas on mobile it’s 694 bytes. <br /> - Throughout all percentiles desktop service workers are larger than mobile - service workers. <br /> - We note that these stats don’t account for dynamically imported scripts - through the <br /> - <a - href="https://developer.mozilla.org/en-US/docs/Web/API/WorkerGlobalScope/importScripts" - ><code>importScripts()</code></a - > - method, <br /> - which likely skews the results higher. - </p> - <p><code><distribution of 11_03b mobile></code></p> - <p> - <strong>Figure 3a:</strong> Percentiles of service worker file sizes on - mobile. - </p> - <p><code><distribution of 11_03b desktop></code></p> - <p> - <strong>Figure 3b:</strong> Percentiles of service worker file sizes on - desktop. - </p> - <h2 id="web-app-manifests">Web App Manifests</h2> - <h3 id="web-app-manifest-properties">Web App Manifest Properties</h3> - <p> - The web app manifest is a simple JSON file that tells the browser about a - web application <br /> - and how it should behave when installed on the user's mobile device or - desktop. A typical <br /> - manifest file includes information about the app name, icons it should use, - the start URL <br /> - it should open at when launched, and more. Only 1.54% of all encountered - manifests were <br /> - invalid JSON, and the rest parsed correctly. - </p> - <p> - We looked at the different properties defined by the <br /> - <a href="https://w3c.github.io/manifest/#webappmanifest-dictionary" - >specification</a - >, <br /> - and also considered non-standard proprietary properties. According to the - Web App Manifest spec, <br /> - the following properties are allowed: <code>dir</code>, <code>lang</code>, - <code>name</code>, <code>short_name</code>, <code>description</code>, <br /> - <code>icons</code>, <code>screenshots</code>, <code>categories</code>, - <code>iarc_rating_id</code>, <code>start_url</code>, <code>display</code>, - <code>orientation</code>, <br /> - <code>theme_color</code>, <code>background_color</code>, <code>scope</code>, - <code>serviceworker</code>, <code>related_applications</code>, and <br /> - <code>prefer_related_applications</code>. The only property that we didn’t - observe in the wild was <br /> - <code>iarc_rating_id</code>, which is a string that represents the - International Age Rating Coalition (IARC) <br /> - certification code of the web application. It is intended to be used to - determine which ages <br /> - the web application is appropriate for. The proprietary properties we - encountered still <br /> - frequently were <code>gcm_sender_id</code> and - <code>gcm_user_visible_only</code> from the legacy <br /> - Google Cloud Messaging (GCM) service. Interestingly there’re almost no - differences between mobile <br /> - and desktop. On both platforms, however, there’s a long tail of properties - that are not interpreted <br /> - by browsers but that contain potentially useful metadata like - <code>author</code> or <code>version</code>. We also found <br /> - a non-trivial amount of mistyped properties, our favorite being - <code>shot_name</code>. An interesting outlier <br /> - is the <code>serviceworker</code> property which is standard, but not - implemented by any browser vendor — <br /> - nevertheless, it was found on 0.09% of all web app manifests used by mobile - and desktop pages. - </p> - <p><code><bar chart of 11_04 mobile></code></p> - <p> - <strong>Figure 4a:</strong> Web App Manifest properties ordered by - decreasing popularity on mobile. - </p> - <p><code><bar chart of 11_04 mobile></code></p> - <p> - <strong>Figure 4b:</strong> Web App Manifest properties ordered by - decreasing popularity on desktop. - </p> - <h3 id="display-values">Display Values</h3> - <p> - Looking at the values developers set for the <code>display</code> property, - it becomes immediately clear <br /> - that they want PWAs to be perceived as “proper” apps that don’t reveal their - web technology origins. <br /> - By choosing <code>"standalone"</code>, they make sure no browser UI is shown - to the end-user. This is reflected <br /> - by the majority of apps that make use of the - <code>prefers_related_applications</code> property: more that 97% <br /> - of both mobile and desktop applications do <em>not</em> prefer native - applications. - </p> - <p><code><11_04c mobile></code></p> - <p> - <strong>Figure 5a:</strong> Values for the <code>display</code> property on - mobile. - </p> - <p><code><11_04c desktop></code></p> - <p> - <strong>Figure 5b:</strong> Values for the <code>display</code> property on - desktop. - </p> - <h3 id="category-values">Category Values</h3> - <p> - The <code>categories</code> member describes the expected application - categories to which the web application belongs. <br /> - It is only meant as a hint to catalogs or stores listing web applications, - and it is expected that <br /> - these will make a best effort to find appropriate categories (or category) - under which to list the <br /> - web application. There were not too many manifests that made use of the - property, but it is <br /> - interesting to see the shift from <em>shopping</em> being the most popular - category on mobile to <em>business</em>, <br /> - <em>technology</em>, and <em>web</em> (whatever may be meant with that) on - desktop that share the first place evenly. - </p> - <p><code><11_04d mobile></code></p> - <p> - <strong>Figure 6a:</strong> Values for the <code>categories</code> property - on mobile. - </p> - <p><code><11_04d desktop></code></p> - <p> - <strong>Figure 6b:</strong> Values for the <code>categories</code> property - on desktop. - </p> - <h3 id="icon-sizes">Icon Sizes</h3> - <p> - Lighthouse - <a - href="https://developers.google.com/web/tools/lighthouse/audits/manifest-contains-192px-icon" - >requires</a - > - <br /> - at least an icon sized 192×192, but common favicon generation tools create a - plethora of other sizes, too. <br /> - Lighthouse’s rule is probably the culprit for 192×192 being the most popular - choice of icon size on both <br /> - desktop and mobile, despite - <a - href="https://developers.google.com/web/fundamentals/web-app-manifest#icons" - >Google’s documentation</a - > - <br /> - additionally explicitly recommending 512×512, which doesn’t show as a - particularly prominent option. - </p> - <p><code><11_04f mobile></code></p> - <p><strong>Figure 7a:</strong> Popular icon sizes on mobile.</p> - <p><code><11_04f desktop></code></p> - <p><strong>Figure 7b:</strong> Popular icon sizes on desktop.</p> - <h3 id="orientation-values">Orientation Values</h3> - <p> - The valid values for the <code>orientation</code> property are - <a href="https://www.w3.org/TR/screen-orientation/#dom-orientationlocktype" - >defined</a - ><br /> - in the Screen Orientation API specification. Namely there are - <code>"any"</code>, <code>"natural"</code>, <code>"landscape"</code>, <br /> - <code>"portrait"</code>, <code>"portrait-primary"</code>, - <code>"portrait-secondary"</code>, <code>"landscape-primary"</code>, and - <code>"landscape-secondary"</code>. <br /> - Portrait orientation is the clear winner on both platforms, followed by any - orientation. - </p> - <p><code><11_04g mobile></code></p> - <p><strong>Figure 8a:</strong> Popular orientation values on mobile.</p> - <p><code><11_04g desktop></code></p> - <p><strong>Figure 8b:</strong> Popular orientation values on desktop.</p> - <h2 id="workbox">Workbox</h2> - <p> - Workbox is a set of libraries that help with common service worker use - cases. For instance, Workbox has tools <br /> - that can plug in to your build process and generate a manifest of files, - which are then precached by your <br /> - service worker. Workbox includes libraries to handle runtime caching, - request routing, cache expiration, <br /> - background sync, and more. - </p> - <p> - Given the low-level nature of the service worker APIs, many developers have - turned to Workbox as a way of <br /> - structuring their service worker logic into higher-level, reusable chunks of - code. Workbox adoption is also <br /> - driven by its inclusion as a feature in a number of popular JavaScript - framework starter kits, like <br /> - <a href="https://create-react-app.dev/"><code>create-react-app</code></a> - and - <a href="https://www.npmjs.com/package/@vue/cli-plugin-pwa" - >Vue's PWA plugin</a - >. - </p> - <p> - The HTTP Archive shows that, out of the total population of sites that - register a service worker, <br /> - 12.71% of them are using at least one of the Workbox libraries. This - percentage is roughly consistent <br /> - across desktop and mobile, with a slightly lower percentage (11.46%) on - mobile compared to desktop (14.36%). - </p> -</section> {% endblock %} diff --git a/src/templates/en/2019/chapters/seo.html b/src/templates/en/2019/chapters/seo.html index 6f8d50112c3..3137ef78a8d 100644 --- a/src/templates/en/2019/chapters/seo.html +++ b/src/templates/en/2019/chapters/seo.html @@ -1,3 +1,5 @@ +{% extends "en/2019/base_chapter.html" %} + <!--{# IMPORTANT! - `chapter.html` is a "template for templates" used by the `generate_chapters.js` script, hence the strange template syntax (eg, mixing ejs and jinja syntax) @@ -8,824 +10,907 @@ - make changes to the markdown content directly (`src/content/<lang>/<year>/<chapter>.md`) because any changes to the chapter templates will be overwritten by the generation script #}--> -{% extends "en/2019/base_chapter.html" %} {% block styles %} {{ super() }} -<link rel="stylesheet" href="/static/css/chapter.css" /> -{% endblock %} {% set metadata = +{% set metadata = {"part_number":"I","chapter_number":10,"title":"SEO","authors":["ymschaap"," rachellcostello"," AVGP"],"reviewers":["clarkeclark"," andylimn"," voltek62"," AymenLoukil"," catalinred"]} %} {% block main %} -<aside> - <ul> - <li> - <a href="#intro">Intro</a> - </li> - - <li> - <a href="#fundamentals">Fundamentals</a> - +<article id="chapter" class="main"> + <nav class="index"> + <div class="index-box floating-card"> + <h2 class="header">Index</h2> + <h2 class="header-mobile">Index</h2> <ul> <li> - <a href="#content">Content</a> + <a href="#intro">Intro</a> + </li> + + <li> + <a href="#fundamentals">Fundamentals</a> <ul> <li> - <a href="#word-count">Word count</a> + <a href="#content">Content</a> + + <ul> + <li> + <a href="#word-count">Word count</a> + </li> + + <li> + <a href="#headings">Headings</a> + </li> + </ul> </li> <li> - <a href="#headings">Headings</a> - </li> - </ul> - </li> + <a href="#meta-tags">Meta tags</a> - <li> - <a href="#meta-tags">Meta tags</a> + <ul> + <li> + <a href="#page-titles">Page titles</a> + </li> - <ul> - <li> - <a href="#page-titles">Page titles</a> + <li> + <a href="#meta-descriptions">Meta descriptions</a> + </li> + + <li> + <a href="#image-alt-tags">Image alt tags</a> + </li> + </ul> </li> <li> - <a href="#meta-descriptions">Meta descriptions</a> + <a href="#indexability">Indexability</a> + + <ul> + <li> + <a href="#status-codes">Status codes</a> + </li> + + <li> + <a href="#noindex">Noindex</a> + </li> + + <li> + <a href="#canonicalization">Canonicalization</a> + </li> + + <li> + <a href="#robotstxt">robots.txt</a> + </li> + </ul> </li> <li> - <a href="#image-alt-tags">Image alt tags</a> + <a href="#linking">Linking</a> </li> </ul> </li> <li> - <a href="#indexability">Indexability</a> + <a href="#advanced">Advanced</a> <ul> <li> - <a href="#status-codes">Status codes</a> + <a href="#speed">Speed</a> + </li> + + <li> + <a href="#structured-data">Structured data</a> + </li> + + <li> + <a href="#internationalization">Internationalization</a> </li> <li> - <a href="#noindex">Noindex</a> + <a href="#spa-crawlability">SPA crawlability</a> </li> <li> - <a href="#canonicalization">Canonicalization</a> + <a href="#amp">AMP</a> </li> <li> - <a href="#robotstxt">robots.txt</a> + <a href="#security">Security</a> </li> </ul> </li> <li> - <a href="#linking">Linking</a> + <a href="#conclusion">Conclusion</a> </li> </ul> - </li> - - <li> - <a href="#advanced">Advanced</a> + </div> + </nav> + <section class="content"> + <section class="body"> + <h1> + <div class="decorative-line"></div> + {{ metadata.get('title') }} + <div class="decorative-line-mobile"></div> + </h1> + <img + src="/static/images/2019/{{ get_chapter_image_dir(metadata) }}/hero_lg.jpg" + class="content-banner" + /> + <h2 id="intro">Intro</h2> + <p> + Search Engine Optimization (SEO) isn't just a hobby or a side project + for digital marketers, it is crucial for the success of a website. The + primary goal of SEO is to make sure that a website is optimized for the + search engine bots that need to crawl and index its pages, as well as + for the users that will be navigating the website and consuming its + content. SEO impacts everyone working on a website, from the developer + who is building it, through to the digital marketer who will need to + promote it to new potential customers. + </p> + <p> + Let's put the importance of SEO into perspective. Earlier this year, the + SEO industry looked on in horror (and fascination) as + <a href="https://www.bbc.co.uk/news/business-47877688" + >ASOS reported an 87% decrease in profits</a + > + after a "difficult year". The brand attributed their issues to a drop in + search engine rankings which occurred after they launched over 200 + microsites and significant changes to their website's navigation, among + other technical changes. Yikes. + </p> + <p> + The SEO chapter of the Web Almanac was created to analyze onsite + elements of the web that impact the crawling and indexing of content for + search engines, and ultimately, website performance. In this chapter, + we'll take a look at how well-equipped the top websites are to provide a + great experience for users and search engines, and which ones still have + work to do. + </p> + <p> + Our analysis includes data from + <a href="https://developers.google.com/web/tools/lighthouse" + >Lighthouse</a + >, the + <a + href="https://developers.google.com/web/tools/chrome-user-experience-report" + >Chrome UX Report</a + >, and HTML element analysis. We focused on SEO fundamentals like + <code><title></code> elements, the different types of on-page + links, content and loading speed, but also the more technical aspects of + SEO, including indexability, structured data, internationalization and + AMP across over 5 million websites. + </p> + <p> + Our custom metrics provide insights that, up until now, have not been + exposed before. We are now able to make claims about the adoption and + implementation of elements such as the hreflang tag, rich results + eligibility, heading tag usage, and even anchor-based navigation for + single page apps. + </p> + <aside> + Note: Our data is limited to analyzing home pages only, and has not been + gathered from site-wide crawls. This will impact many metrics we'll + discuss, so we've added any relevant limitations in this case whenever + we mention a specific metric. + </aside> + <p> + Read on to find out more about the current state of the web and its + search engine friendliness. + </p> + <h2 id="fundamentals">Fundamentals</h2> + <p> + Search engines have a 3-step process: crawling, indexing, and ranking. + To be search engine-friendly, a page needs to be discoverable, + understandable, and contain quality content that would provide value to + a user who is browsing the search engine results pages (SERPs). + </p> + <p> + We wanted to analyze how much of the web is meeting the basic standards + of SEO best practices, so we assessed on-page elements such as body + content, meta tags, and internal linking. Let's take a look at the + results. + </p> + <h3 id="content">Content</h3> + <p> + To be able to understand what a page is about and decide for which + search queries it provides the most relevant answers, a search engine + must be able to discover and access its content. What content are search + engines currently finding, however? To help answer this, we created two + custom metrics: word count and headings. + </p> + <h4 id="word-count">Word count</h4> + <p> + We assessed the content on the pages by looking for groups of at least 3 + words and counting how many were found in total. We found 2.73% of + desktop pages that didn't have any word groups, meaning that they have + no body content to help search engines understand what the website is + about. + </p> + <p> + The median desktop home page has 346 words, and the median mobile home + page has a slightly lower word count at 306 words. This shows that + mobile sites do serve a bit less content to their users, but at over 300 + words, this is still a reasonable amount to read, especially for a home + page which will naturally contain less content than an article page, for + example. Overall the distribution of words is broad, with between 22 + words at the 10th percentile and up to 1,361 at the 90th percentile. + </p> + <p> + <code + ><graph histogram number of words. Source: 10.09, column C, desktop + & mobile></code + > + </p> + <h4 id="headings">Headings</h4> + <p> + We also looked at whether pages are structured in a way that provides + the right context for the content they contain. Headings + (<code>H1</code>, <code>H2</code>, <code>H3</code>, etc) are used to + format and structure a page and make content easier to read and parse. + Despite the importance on headings, 10.67% of pages have no heading tags + at all. + </p> + <p> + <code + ><graph histogram number of heading elements. Source: 10.09a, + column F></code + > + </p> + <p> + The median number of heading elements per page is 10, with 30 words (on + mobile) and 32 words (on desktop) used in headings. This implies that + the websites that utilize headings put significant effort in making sure + that their pages are readable, descriptive, and clearly outline the page + structure and context to search engine bots. + </p> + <p> + In terms of specific heading length, the median length of the first + <code>H1</code> element found on desktop is 19 characters. + </p> + <p> + For advice on how to handle <code>H1</code>s and headings for SEO and + accessibility, take a look at this + <a href="https://www.youtube.com/watch?v=zyqJJXWk0gk" + >video response by John Mueller</a + > + in the Ask Google Webmasters series. + </p> + <p> + <code + ><graph histogram h1 tag source: 10.16, column C, desktop & + mobile></code + > + </p> + <h3 id="meta-tags">Meta tags</h3> + <p> + Meta tags allow us to give specific instructions and information to + search engine bots about the different elements and content on a page. + Certain meta tags can convey things like the topical focus of a page, as + well as how the page should be crawled and indexed. We wanted to assess + whether or not websites were making the most of these opportunities that + meta tags provide. + </p> + <h4 id="page-titles">Page titles</h4> + <p> + Page titles are an important way of communicating the purpose of a page + to a user or search engine. <code><title></code> tags are also + used as headings in the SERPS and as the title for the browser tab when + visiting a page, so it's no surprise to see that 97.1% of mobile pages + have a document title. + </p> + <p> + Even though + <a href="https://moz.com/learn/seo/title-tag" + >Google usually displays the first 50-60 characters of a page title</a + > + within a SERP, the median length of the <code><title></code> tag + was only 21 characters for mobile pages and 20 characters for desktop + pages. Even the 75th percentile is still below the cutoff length. This + suggests that some SEOs and content writers aren't making the most of + the space allocated to them by search engines for describing their home + pages in the SERPs. + </p> + <p> + <code + ><graph histogram length <title> Source: 10.07b, column C, + desktop & mobile></code + > + </p> + <h4 id="meta-descriptions">Meta descriptions</h4> + <p> + Compared to the <code><title></code> tag, fewer pages were + detected to have a meta description, as only 64.02% of mobile home pages + have a meta description. Considering that Google often rewrites meta + descriptions in the SERPs in response to the searcher's query, perhaps + website owners place less importance on including a meta description at + all. + </p> + <p> + The median meta description length was also lower than the + <a href="https://moz.com/learn/seo/meta-description" + >recommended length of 155-160 characters</a + >, with desktop pages having descriptions of 123 characters. + Interestingly, meta descriptions were consistently longer on mobile than + on desktop, despite mobile SERPs traditionally having a shorter pixel + limit. This limit has only been extended recently, so perhaps more + website owners have been testing the impact of having longer, more + descriptive meta descriptions for mobile results. + </p> + <p> + <code + ><graph histogram length <meta description> Source: 10.07c, + column C, desktop & mobile></code + > + </p> + <h4 id="image-alt-tags">Image alt tags</h4> + <p> + Considering the importance of alt text for SEO and accessibility, it is + far from ideal to see that only 46.71% of mobile pages use alt + attributes on all of their images. This means that there are still + improvements to be made with regard to making images across the web more + accessible to users and understandable for search engines. + </p> + <p> + Learn more about this in the + <a href="../accessibility">Accessibility chapter</a>. + </p> + <h3 id="indexability">Indexability</h3> + <p> + To show a page's content to users in the SERPs, search engine crawlers + must first be permitted to access and index that page. Some of the + factors that impact a search engine's ability to crawl and index pages + include: + </p> + <ul> + <li>Status codes</li> + <li>Noindex tags</li> + <li>Canonical tags</li> + <li>The robots.txt file</li> + </ul> + <h4 id="status-codes">Status codes</h4> + <p> + It is recommended to maintain a <code>200 OK</code> status code for any + important pages that you want search engines to index. The majority of + pages tested were available for search engines to access, with 87.03% of + initial HTML requests on desktop returning a <code>200</code> status + code. The results were slightly lower for mobile pages, with only 82.95% + of pages returning a <code>200</code> status code. + </p> + <p> + The next most commonly found status code on mobile was <code>302</code>, + a temporary redirect, which was found on 10.45% of mobile pages. This + was higher than on desktop, with only 6.71% desktop home pages returning + a <code>302</code> status code. This could be due to the fact that the + <a + href="https://developers.google.com/search/mobile-sites/mobile-seo/separate-urls" + >mobile home pages were alternates</a + > + to an equivalent desktop page, such as on non-responsive sites that have + separate versions of the website for each device. + </p> + <aside> + Note: Our results didn't include `4xx` or `5xx` status codes. + </aside> + <h4 id="noindex">Noindex</h4> + <p> + A noindex tag can be served in the HTML <code><head></code> or in + the HTTP headers as an <code>X-Robots</code> tag. A noindex tag + basically tells a search engine not to include that page in its SERPs, + but the page will still be accessible for users when they are navigating + through the website. Noindex tags are usually added to duplicate + versions of pages that serve the same content, or low quality pages that + provide no value to users coming to a website from organic search, such + as filtered or faceted pages or internal search pages. + </p> + <p> + 96.93% of mobile pages passed the + <a + href="https://developers.google.com/web/tools/lighthouse/audits/indexing" + >Lighthouse indexing audit</a + >, meaning that these pages didn't contain a noindex tag in the + <code><head></code> or in the HTTP headers. However, this means + that 3.07% of mobile home pages <em>did</em> have a noindex tag, which + is cause for concern, meaning that Google was prevented from indexing + these pages. + </p> + <aside> + The websites included in our research are sourced from the [Chrome UX + Report](https://developers.google.com/web/tools/chrome-user-experience-report/#methodology) + dataset, which excludes websites that are not publicly discoverable. + This is a significant source of bias because we're unable to analyze + sites that Chrome determines to be non-public. Learn more about our + [Methodology](../methodology#websites). + </aside> + <h4 id="canonicalization">Canonicalization</h4> + <p> + Canonical tags are used to specify duplicate pages and their preferred + alternates, so that search engines can consolidate authority which might + be spread across multiple pages within the group onto one main page for + improved rankings. + </p> + <p> + 48.34% of mobile home pages were + <a + href="https://developers.google.com/web/tools/lighthouse/audits/canonical" + >detected</a + > + to have a canonical tag. Self-referencing canonical tags aren't + essential, and canonical tags are usually required for duplicate pages. + Home pages are rarely duplicated anywhere else across the site so seeing + that less than half of pages have a canonical tag isn't surprising. + </p> + <h4 id="robotstxt">robots.txt</h4> + <p> + One of the most effective methods for controlling search engine crawling + is the + <a + href="https://www.deepcrawl.com/knowledge/technical-seo-library/robots-txt/" + >robots.txt file</a + >. This is a file that sits on the root domain of a website and + specifies which URLs and URL paths should be disallowed from being + crawled by search engines. + </p> + <p> + It was interesting to observe that only 72.16% of mobile sites have a + valid robots.txt, + <a + href="https://developers.google.com/web/tools/lighthouse/audits/robots" + >according to Lighthouse</a + >. The key issues we found are split between 22% of sites having no + robots.txt file at all, and ~6% serving an invalid robots.txt file, and + thus failing the audit. While there are many valid reasons to not have a + robots.txt file, such as having a small website that doesn't struggle + with + <a + href="https://webmasters.googleblog.com/2017/01/what-crawl-budget-means-for-googlebot.html" + >crawl budget issues</a + >, having an invalid robots.txt is cause for concern. + </p> + <h3 id="linking">Linking</h3> + <p> + One of the most important attributes of a web page is links. Links help + search engines discover new, relevant pages to add to their index and + navigate through websites. 96% of the web pages in our dataset contain + at least one internal link, and 93% contain at least one external link + to another domain. The small minority of pages that don't have any + internal or external links will be missing out on the immense value that + links pass through to target pages. + </p> + <p> + The number of internal and external links included on desktop pages were + consistently higher than the number found on mobile pages. Often a + limited space on a smaller viewport causes fewer links to be included in + the design of a mobile page compared to desktop. + </p> + <p> + It's important to bear in mind that fewer internal links on the mobile + version of a page + <a href="https://moz.com/blog/internal-linking-mobile-first-crawl-paths" + >might cause an issue</a + > + for your website. With + <a + href="https://www.deepcrawl.com/knowledge/white-papers/mobile-first-index-guide/" + >mobile-first indexing</a + >, which for new websites is the default for Google, if a page is only + linked from the desktop version and not present on the mobile version, + search engines will have a much harder time discovering and ranking it. + </p> + <p> + <code + ><graph histogram count of links by type Source: 10.10, column C + desktop only></code + > + </p> + <p> + <code + ><graph histogram count of links by type Source: 10.10, column D, + E, desktop only></code + > + </p> + <p> + The median desktop page includes 70 internal (same-site) links, whereas + the median mobile page has 60 internal links. The median number of + external links per page follows a similar trend, with desktop pages + including 10 external links, and mobile pages including 8. + </p> + <p> + Anchor links, which link to a certain scroll position on the same page, + are not very popular. Over 65% of home pages have no anchor links. This + is probably due to the fact that home pages don't usually contain any + long-form content. + </p> + <p> + There is good news from our analysis of the descriptive link text + metric. 89.94% of mobile pages pass Lighthouse's + <a + href="https://developers.google.com/web/tools/lighthouse/audits/descriptive-link-text" + >descriptive link text audit</a + >. This means that these pages don't have generic "click here", "go", + "here" or "learn more" links, but use more meaningful link text which + helps users and search engines better understand the context of pages + and how they connect with one another. + </p> + <h2 id="advanced">Advanced</h2> + <p> + Having descriptive, useful content on a page that isn't being blocked + from search engines with a noindex tag or robots.txt directive isn't + enough for a website to succeed in organic search. Those are just the + basics. There is a lot more than can be done to enhance the performance + of a website and its appearance in SERPs. + </p> + <p> + Some of the more technically complex aspects that have been gaining + importance in successfully indexing and ranking websites include: speed, + structured data, internationalization, security, and mobile + friendliness. + </p> + <h3 id="speed">Speed</h3> + <p> + Mobile loading speed was first + <a + href="https://webmasters.googleblog.com/2018/01/using-page-speed-in-mobile-search.html" + >announced as a ranking factor</a + > + by Google in 2018. Speed isn't a new focus for Google though. Back in + 2010 it was + <a + href="https://webmasters.googleblog.com/2010/04/using-site-speed-in-web-search-ranking.html" + >revealed that speed had been introduced as a ranking signal</a + >. + </p> + <p> + A fast-loading website is also crucial for a good user experience. Users + that have to wait even a few seconds for a site to load have the + tendency to bounce and try another result from one of your competitors + in the SERPs that loads quickly and meets their expectations of website + performance. + </p> + <p> + The metrics we used for our analysis of load speed across the web is + based on the + <a href="../methodology#chrome-ux-report">Chrome UX Report</a> (CrUX), + which collects data from real-world Chrome users. This data shows that + an astonishing 63.47% of websites are labelled as <strong>slow</strong>. + Split by device, this picture is even bleaker for tablet (82.00%) and + phone (77.61%). In the context of our results, per the + <a + href="https://developers.google.com/speed/docs/insights/v5/about#categories" + >PageSpeed Insights classification system</a + >, a slow website is defined as having 10% of First Contentful Paint + (FCP) experiences taking over 2,500 ms or 5% of First Input Delay (FID) + experiences measuring over 250 ms. + </p> + <p> + <code + ><graph data 10.15b: CruX image similar to + [IMG](https://developers.google.com/web/updates/images/2018/08/crux-dash-fcp.png) + per device + speed label></code + > + </p> + <p> + Although the numbers are bleak for the speed of the web, the good news + is that SEO experts and tools have been focusing more and more on the + technical challenges of speeding up websites. You can learn more about + the state of web performance in the + <a href="../performance">Performance chapter</a>. + </p> + <h3 id="structured-data">Structured data</h3> + <p> + Structured data allows website owners to add additional semantic data to + their web pages, by adding + <a href="https://en.wikipedia.org/wiki/JSON-LD">JSON-LD</a> snippets or + <a href="https://developer.mozilla.org/en-US/docs/Web/HTML/Microdata" + >Microdata</a + >, for example. Search engines parse this data to better understand + these pages and sometimes use the markup to display additional relevant + information in the search results. The most commonly found types of + structured data are + <a + href="https://developers.google.com/search/docs/data-types/review-snippet" + >reviews</a + >, + <a href="https://developers.google.com/search/docs/data-types/product" + >products</a + >, + <a + href="https://developers.google.com/search/docs/data-types/local-business" + >businesses</a + >, + <a href="https://developers.google.com/search/docs/data-types/movie" + >movies</a + >, and + <a + href="https://developers.google.com/search/docs/guides/search-gallery" + >more</a + >. + </p> + <p> + The + <a href="https://developers.google.com/search/docs/guides/enhance-site" + >extra visibility</a + > + that structured data can provide for websites is interesting for site + owners, given that it can help to create more opportunities for traffic. + For example, the relatively new + <a href="https://developers.google.com/search/docs/data-types/faqpage" + >FAQ schema</a + > + will double the size of your snippet and the real estate of your site in + the SERP. + </p> + <p> + During our research, we found that only 14.67% of sites are eligible for + rich results on mobile. Interestingly, desktop site eligibility is + slightly lower at 12.46%. This suggests that there is a lot more that + site owners can be doing to optimize the way their home pages are + appearing in search. + </p> + <p> + Among the sites with structured data markup, the five most prevalent + types are: + </p> + <ol> + <li><code>WebSite</code> (16.02%)</li> + <li><code>SearchAction</code> (14.35%)</li> + <li><code>Organization</code> (12.89%)</li> + <li><code>WebPage</code> (11.58%)</li> + <li><code>ImageObject</code> (5.35%)</li> + </ol> + <p> + Interestingly, one of the most popular data types that triggers a search + engine feature is <code>SearchAction</code>, which powers the + <a + href="https://developers.google.com/search/docs/data-types/sitelinks-searchbox" + >sitelinks searchbox</a + >. + </p> + <p> + The top 5 markup types all lead to more visibility in Google's search + results, which might be the fuel for more widespread adoption of these + types of structured data. + </p> + <p> + Seeing as we only looked at home pages within this analysis, the results + might look very different if we were to consider interior pages, too. + </p> + <p> + Review stars are only found on 1.09% of the web's home pages (via + <a href="https://schema.org/AggregateRating">AggregateRating</a>). + </p> + <p> + Also, the newly introduced + <a href="https://schema.org/QAPage">QAPage</a> appeared only in 48 + instances, and the <a href="https://schema.org/FAQPage">FAQPage</a> at a + slightly higher frequency of 218 times. These last two counts are + expected to increase in the future as we run more crawls and dive deeper + into Web Almanac analysis. + </p> + <h3 id="internationalization">Internationalization</h3> + <p> + Internationalization is one of the most complex aspects of SEO, even + <a href="https://twitter.com/JohnMu/status/965507331369984002" + >according to some Google search employees</a + >. Internationalization in SEO focuses on serving the right content from + a website with multiple language or country versions, and making sure + that content is being targeted towards the specific language and + location of the user. + </p> + <p> + While 38.40% of desktop sites (33.79% on mobile) have the HTML lang + attribute set to English, only 7.43% (6.79% on mobile) of the sites also + contain an <code>hreflang</code> link to another language version. This + suggests that the vast majority of websites that we analyzed don't offer + separate versions of their home page that would require language + targeting -- unless these separate versions do exist, but haven't been + configured correctly. + </p> + <p> + <code + ><graph 10.04b - [do we want to chart this data, e.g. what does it + really mean for SEO?]></code + > + </p> + <p> + <code + ><include a chart of the languages and country combinations found, + SEOs will want to see this breakdown></code + > + </p> + <p> + Next to English, the most common languages are French, Spanish, and + German. These are followed by languages targeted towards specific + geographies like English for Americans (<code>en-us</code>) or more + obscure combinations like Spanish for the Irish (<code>es-ie</code>). + </p> + <p> + The analysis did not check for correct implementation, such as whether + or not the different language versions properly link to each other. + However, from looking at the low adoption of + <a + href="https://www.google.com/url?q=https://support.google.com/webmasters/answer/189077?hl%3Den&sa=D&ust=1570627963630000&usg=AFQjCNFwzwglsbysT9au_I-7ZQkwa-QvrA" + >having an x-default version as is recommended</a + > + (only 3.77% on desktop and 1.30% on mobile), this is an indicator that + this element is complex and not always easy to get right. + </p> + <h3 id="spa-crawlability">SPA crawlability</h3> + <p> + Single-page applications (SPAs) built with frameworks like React and + Vue.js come with their own SEO complexity. Websites using a hash-based + navigation, make it especially hard for search engines to properly crawl + and index them. For example, Google had an "AJAX crawling scheme" + workaround that turned out to be complex for search engines as well as + developers, so it was + <a + href="https://webmasters.googleblog.com/2015/10/deprecating-our-ajax-crawling-scheme.html" + >deprecated in 2015</a + >. + </p> + <p> + The number of SPAs that were tested had a relatively low number of links + served via hash URLs, with 13.08% of React mobile pages using hash URLs + for navigation, 8.15% of mobile Vue.js pages using them, and 2.37% of + mobile Angular pages using them. These results were very similar for + desktop pages too. This is positive to see from an SEO perspective, + considering the impact that hash URLs can have on content discovery. + </p> + <p> + The higher number of hash URLs in React pages is surprising, especially + in contrast to the lower number of hash URLs found on Angular pages. + Both frameworks promote the adoption of routing packages where the + <a href="https://developer.mozilla.org/en-US/docs/Web/API/History" + >History API</a + > + is the default for links, instead of relying on hash URLs. Vue.js is + <a href="https://github.com/vuejs/rfcs/pull/40" + >considering moving to using the History API as the default</a + > + as well in version 3 of their <code>vue-router</code> package. + </p> + <h3 id="amp">AMP</h3> + <p> + AMP (formerly known as "Accelerated Mobile Pages") was first introduced + in 2015 by Google as an open source HTML framework. It provides + components and infrastructure for websites to provide a faster + experience for users, by using optimizations such as caching, lazy + loading, and optimized images. Notably, Google adopted this for their + search engine, where AMP pages are also served from their own CDN. This + feature later became a standards proposal under the name + <a + href="https://wicg.github.io/webpackage/draft-yasskin-http-origin-signed-responses.html" + >Signed HTTP Exchanges</a + >. + </p> + <p> + Despite this, only 0.62% of mobile home pages contain a link to an AMP + version. Given the visibility this project has had, this suggests that + it has had a relatively low adoption. However, AMP can be more useful + for serving article pages, so our home page-focused analysis won't + reflect adoption across other page types. + </p> + <h3 id="security">Security</h3> + <p> + A strong online shift in recent years has been for the web to move to + HTTPS by default. HTTPS prevents website traffic from being intercepted + on public WiFi networks, for example, where user input data is then + transmitted unsecurely. Google have been pushing for sites to adopt + HTTPS, and even made + <a + href="https://webmasters.googleblog.com/2014/08/https-as-ranking-signal.html" + >HTTPS as a ranking signal</a + >. Chrome also supported the move to secure pages by labelling non-HTTPS + pages as ‘<a + href="https://www.blog.google/products/chrome/milestone-chrome-security-marking-http-not-secure/" + >not secure</a + >' in the browser. + </p> + <p> + For more information and guidance from Google on the importance of HTTPS + and how to adopt it, please see + <a + href="https://developers.google.com/web/fundamentals/security/encrypt-in-transit/why-https" + >Why HTTPS Matters</a + >. + </p> + <p> + We found that 67.06% of websites on desktop are now served over HTTPS. + Just under half of websites still haven't migrated to HTTPS and are + serving non-secure pages to their users. This is a significant number. + Migrations can be hard work, so this could be a reason why the adoption + rate isn't higher, but an HTTPS migration usually require an SSL + certificate and a simple change to the <code>.htaccess</code> file. + There's no real reason not to switch to HTTPS. + </p> + <p> + Google's + <a href="https://transparencyreport.google.com/https/overview" + >HTTPS Transparancy Report</a + > + reports a 90% adoption of HTTPS for the top 100 non-Google domains + (representing 25% of all website traffic worldwide). The difference + between this number and ours could be explained by the fact that + relatively smaller sites are adopting HTTPS at a slower rate. + </p> + <h2 id="conclusion">Conclusion</h2> + <p> + Through our analysis, we observed that the majority of websites are + getting the fundamentals right, in that their home pages are crawlable, + indexable, and include the key content required to rank well in search + engines' results pages. Not every person who owns a website will be + aware of SEO at all, let alone best practice guidelines, so it is + promising to see that so many sites have got the basics covered. + </p> + <p> + However, more sites are missing the mark than expected when it comes to + some of the more advanced aspects of SEO and accessibility. Site speed + is one of these factors that many websites are struggling with, + especially on mobile. This is a significant problem, as speed is one of + the biggest contributors to UX, which is something that can impact + rankings. The number of websites that aren't yet served over HTTPS is + also problematic to see, considering the importance of security and + keeping user data safe. + </p> + <p> + There is a lot more that we can all be doing to learn about SEO best + practices and industry developments. This is essential due to the + evolving nature of the search industry and the rate at which changes + happen. Search engines make thousands of improvements to their + algorithms each year, and we need to keep up if we want our websites to + reach more visitors in organic search. + </p> + </section> + <section class="authors"> + <h4>Authors :</h4> <ul> <li> - <a href="#speed">Speed</a> - </li> - - <li> - <a href="#structured-data">Structured data</a> - </li> - - <li> - <a href="#internationalization">Internationalization</a> - </li> - - <li> - <a href="#spa-crawlability">SPA crawlability</a> - </li> + <img + class="avatar" + alt="Author name" + src="https://www.gravatar.com/avatar/cf58fcc6995e15f35e42532c3775fed6.jpg?d=mp&s=200" + /> + <div class="info"> + <span class="name">Full Name</span> + <span class="social"> + <a class="twitter" href="https://twitter.com/rick_viscomi"> + <img src="/static/images/twitter.png" alt="Twitter account" /> + </a> - <li> - <a href="#amp">AMP</a> - </li> + <a class="github" href="https://github.com/rviscomi"> + <img src="/static/images/github.png" alt="github account" /> + </a> + </span> - <li> - <a href="#security">Security</a> + <div class="tagline"> + Tagline of contributor here + </div> + </div> </li> </ul> - </li> + </section> + + <nav id="chapter-navigation"> + <a id="previous-chapter" href="#replace-with-url-to-chapter"> + <span class="arrow">⌃</span> + <span class="chapter-no"> + Chapter X + </span> + <span class="chapter-title"> + Chapter title + </span> + </a> - <li> - <a href="#conclusion">Conclusion</a> - </li> - </ul> -</aside> + <a id="next-chapter" href="#replace-with-url-to-chapter"> + <span class="arrow">⌃</span> + <span class="chapter-no"> + Chapter X + </span> + <span class="chapter-title"> + Chapter title + </span> + </a> + </nav> + </section> +</article> -<section class="main"> - <h1 class="chapter-title">{{ metadata.get('title') }}</h1> - <img - src="/static/images/2019/{{ get_chapter_image_dir(metadata) }}/hero_lg.jpg" - class="chapter-hero" - /> - <h2 id="intro">Intro</h2> - <p> - Search Engine Optimization (SEO) isn't just a hobby or a side project for - digital marketers, it is crucial for the success of a website. The primary - goal of SEO is to make sure that a website is optimized for the search - engine bots that need to crawl and index its pages, as well as for the users - that will be navigating the website and consuming its content. SEO impacts - everyone working on a website, from the developer who is building it, - through to the digital marketer who will need to promote it to new potential - customers. - </p> - <p> - Let's put the importance of SEO into perspective. Earlier this year, the SEO - industry looked on in horror (and fascination) as - <a href="https://www.bbc.co.uk/news/business-47877688" - >ASOS reported an 87% decrease in profits</a - > - after a "difficult year". The brand attributed their issues to a drop in - search engine rankings which occurred after they launched over 200 - microsites and significant changes to their website's navigation, among - other technical changes. Yikes. - </p> - <p> - The SEO chapter of the Web Almanac was created to analyze onsite elements of - the web that impact the crawling and indexing of content for search engines, - and ultimately, website performance. In this chapter, we'll take a look at - how well-equipped the top websites are to provide a great experience for - users and search engines, and which ones still have work to do. - </p> - <p> - Our analysis includes data from - <a href="https://developers.google.com/web/tools/lighthouse">Lighthouse</a>, - the - <a - href="https://developers.google.com/web/tools/chrome-user-experience-report" - >Chrome UX Report</a - >, and HTML element analysis. We focused on SEO fundamentals like - <code><title></code> elements, the different types of on-page links, - content and loading speed, but also the more technical aspects of SEO, - including indexability, structured data, internationalization and AMP across - over 5 million websites. - </p> - <p> - Our custom metrics provide insights that, up until now, have not been - exposed before. We are now able to make claims about the adoption and - implementation of elements such as the hreflang tag, rich results - eligibility, heading tag usage, and even anchor-based navigation for single - page apps. - </p> - <aside> - Note: Our data is limited to analyzing home pages only, and has not been - gathered from site-wide crawls. This will impact many metrics we'll discuss, - so we've added any relevant limitations in this case whenever we mention a - specific metric. - </aside> - <p> - Read on to find out more about the current state of the web and its search - engine friendliness. - </p> - <h2 id="fundamentals">Fundamentals</h2> - <p> - Search engines have a 3-step process: crawling, indexing, and ranking. To be - search engine-friendly, a page needs to be discoverable, understandable, and - contain quality content that would provide value to a user who is browsing - the search engine results pages (SERPs). - </p> - <p> - We wanted to analyze how much of the web is meeting the basic standards of - SEO best practices, so we assessed on-page elements such as body content, - meta tags, and internal linking. Let's take a look at the results. - </p> - <h3 id="content">Content</h3> - <p> - To be able to understand what a page is about and decide for which search - queries it provides the most relevant answers, a search engine must be able - to discover and access its content. What content are search engines - currently finding, however? To help answer this, we created two custom - metrics: word count and headings. - </p> - <h4 id="word-count">Word count</h4> - <p> - We assessed the content on the pages by looking for groups of at least 3 - words and counting how many were found in total. We found 2.73% of desktop - pages that didn't have any word groups, meaning that they have no body - content to help search engines understand what the website is about. - </p> - <p> - The median desktop home page has 346 words, and the median mobile home page - has a slightly lower word count at 306 words. This shows that mobile sites - do serve a bit less content to their users, but at over 300 words, this is - still a reasonable amount to read, especially for a home page which will - naturally contain less content than an article page, for example. Overall - the distribution of words is broad, with between 22 words at the 10th - percentile and up to 1,361 at the 90th percentile. - </p> - <p> - <code - ><graph histogram number of words. Source: 10.09, column C, desktop - & mobile></code - > - </p> - <h4 id="headings">Headings</h4> - <p> - We also looked at whether pages are structured in a way that provides the - right context for the content they contain. Headings (<code>H1</code>, - <code>H2</code>, <code>H3</code>, etc) are used to format and structure a - page and make content easier to read and parse. Despite the importance on - headings, 10.67% of pages have no heading tags at all. - </p> - <p> - <code - ><graph histogram number of heading elements. Source: 10.09a, column - F></code - > - </p> - <p> - The median number of heading elements per page is 10, with 30 words (on - mobile) and 32 words (on desktop) used in headings. This implies that the - websites that utilize headings put significant effort in making sure that - their pages are readable, descriptive, and clearly outline the page - structure and context to search engine bots. - </p> - <p> - In terms of specific heading length, the median length of the first - <code>H1</code> element found on desktop is 19 characters. - </p> - <p> - For advice on how to handle <code>H1</code>s and headings for SEO and - accessibility, take a look at this - <a href="https://www.youtube.com/watch?v=zyqJJXWk0gk" - >video response by John Mueller</a - > - in the Ask Google Webmasters series. - </p> - <p> - <code - ><graph histogram h1 tag source: 10.16, column C, desktop & - mobile></code - > - </p> - <h3 id="meta-tags">Meta tags</h3> - <p> - Meta tags allow us to give specific instructions and information to search - engine bots about the different elements and content on a page. Certain meta - tags can convey things like the topical focus of a page, as well as how the - page should be crawled and indexed. We wanted to assess whether or not - websites were making the most of these opportunities that meta tags provide. - </p> - <h4 id="page-titles">Page titles</h4> - <p> - Page titles are an important way of communicating the purpose of a page to a - user or search engine. <code><title></code> tags are also used as - headings in the SERPS and as the title for the browser tab when visiting a - page, so it's no surprise to see that 97.1% of mobile pages have a document - title. - </p> - <p> - Even though - <a href="https://moz.com/learn/seo/title-tag" - >Google usually displays the first 50-60 characters of a page title</a - > - within a SERP, the median length of the <code><title></code> tag was - only 21 characters for mobile pages and 20 characters for desktop pages. - Even the 75th percentile is still below the cutoff length. This suggests - that some SEOs and content writers aren't making the most of the space - allocated to them by search engines for describing their home pages in the - SERPs. - </p> - <p> - <code - ><graph histogram length <title> Source: 10.07b, column C, - desktop & mobile></code - > - </p> - <h4 id="meta-descriptions">Meta descriptions</h4> - <p> - Compared to the <code><title></code> tag, fewer pages were detected to - have a meta description, as only 64.02% of mobile home pages have a meta - description. Considering that Google often rewrites meta descriptions in the - SERPs in response to the searcher's query, perhaps website owners place less - importance on including a meta description at all. - </p> - <p> - The median meta description length was also lower than the - <a href="https://moz.com/learn/seo/meta-description" - >recommended length of 155-160 characters</a - >, with desktop pages having descriptions of 123 characters. Interestingly, - meta descriptions were consistently longer on mobile than on desktop, - despite mobile SERPs traditionally having a shorter pixel limit. This limit - has only been extended recently, so perhaps more website owners have been - testing the impact of having longer, more descriptive meta descriptions for - mobile results. - </p> - <p> - <code - ><graph histogram length <meta description> Source: 10.07c, - column C, desktop & mobile></code - > - </p> - <h4 id="image-alt-tags">Image alt tags</h4> - <p> - Considering the importance of alt text for SEO and accessibility, it is far - from ideal to see that only 46.71% of mobile pages use alt attributes on all - of their images. This means that there are still improvements to be made - with regard to making images across the web more accessible to users and - understandable for search engines. - </p> - <p> - Learn more about this in the - <a href="../accessibility">Accessibility chapter</a>. - </p> - <h3 id="indexability">Indexability</h3> - <p> - To show a page's content to users in the SERPs, search engine crawlers must - first be permitted to access and index that page. Some of the factors that - impact a search engine's ability to crawl and index pages include: - </p> - <ul> - <li>Status codes</li> - <li>Noindex tags</li> - <li>Canonical tags</li> - <li>The robots.txt file</li> - </ul> - <h4 id="status-codes">Status codes</h4> - <p> - It is recommended to maintain a <code>200 OK</code> status code for any - important pages that you want search engines to index. The majority of pages - tested were available for search engines to access, with 87.03% of initial - HTML requests on desktop returning a <code>200</code> status code. The - results were slightly lower for mobile pages, with only 82.95% of pages - returning a <code>200</code> status code. - </p> - <p> - The next most commonly found status code on mobile was <code>302</code>, a - temporary redirect, which was found on 10.45% of mobile pages. This was - higher than on desktop, with only 6.71% desktop home pages returning a - <code>302</code> status code. This could be due to the fact that the - <a - href="https://developers.google.com/search/mobile-sites/mobile-seo/separate-urls" - >mobile home pages were alternates</a - > - to an equivalent desktop page, such as on non-responsive sites that have - separate versions of the website for each device. - </p> - <aside>Note: Our results didn't include `4xx` or `5xx` status codes.</aside> - <h4 id="noindex">Noindex</h4> - <p> - A noindex tag can be served in the HTML <code><head></code> or in the - HTTP headers as an <code>X-Robots</code> tag. A noindex tag basically tells - a search engine not to include that page in its SERPs, but the page will - still be accessible for users when they are navigating through the website. - Noindex tags are usually added to duplicate versions of pages that serve the - same content, or low quality pages that provide no value to users coming to - a website from organic search, such as filtered or faceted pages or internal - search pages. - </p> - <p> - 96.93% of mobile pages passed the - <a href="https://developers.google.com/web/tools/lighthouse/audits/indexing" - >Lighthouse indexing audit</a - >, meaning that these pages didn't contain a noindex tag in the - <code><head></code> or in the HTTP headers. However, this means that - 3.07% of mobile home pages <em>did</em> have a noindex tag, which is cause - for concern, meaning that Google was prevented from indexing these pages. - </p> - <aside> - The websites included in our research are sourced from the [Chrome UX - Report](https://developers.google.com/web/tools/chrome-user-experience-report/#methodology) - dataset, which excludes websites that are not publicly discoverable. This is - a significant source of bias because we're unable to analyze sites that - Chrome determines to be non-public. Learn more about our - [Methodology](../methodology#websites). - </aside> - <h4 id="canonicalization">Canonicalization</h4> - <p> - Canonical tags are used to specify duplicate pages and their preferred - alternates, so that search engines can consolidate authority which might be - spread across multiple pages within the group onto one main page for - improved rankings. - </p> - <p> - 48.34% of mobile home pages were - <a - href="https://developers.google.com/web/tools/lighthouse/audits/canonical" - >detected</a - > - to have a canonical tag. Self-referencing canonical tags aren't essential, - and canonical tags are usually required for duplicate pages. Home pages are - rarely duplicated anywhere else across the site so seeing that less than - half of pages have a canonical tag isn't surprising. - </p> - <h4 id="robotstxt">robots.txt</h4> - <p> - One of the most effective methods for controlling search engine crawling is - the - <a - href="https://www.deepcrawl.com/knowledge/technical-seo-library/robots-txt/" - >robots.txt file</a - >. This is a file that sits on the root domain of a website and specifies - which URLs and URL paths should be disallowed from being crawled by search - engines. - </p> - <p> - It was interesting to observe that only 72.16% of mobile sites have a valid - robots.txt, - <a href="https://developers.google.com/web/tools/lighthouse/audits/robots" - >according to Lighthouse</a - >. The key issues we found are split between 22% of sites having no - robots.txt file at all, and ~6% serving an invalid robots.txt file, and thus - failing the audit. While there are many valid reasons to not have a - robots.txt file, such as having a small website that doesn't struggle with - <a - href="https://webmasters.googleblog.com/2017/01/what-crawl-budget-means-for-googlebot.html" - >crawl budget issues</a - >, having an invalid robots.txt is cause for concern. - </p> - <h3 id="linking">Linking</h3> - <p> - One of the most important attributes of a web page is links. Links help - search engines discover new, relevant pages to add to their index and - navigate through websites. 96% of the web pages in our dataset contain at - least one internal link, and 93% contain at least one external link to - another domain. The small minority of pages that don't have any internal or - external links will be missing out on the immense value that links pass - through to target pages. - </p> - <p> - The number of internal and external links included on desktop pages were - consistently higher than the number found on mobile pages. Often a limited - space on a smaller viewport causes fewer links to be included in the design - of a mobile page compared to desktop. - </p> - <p> - It's important to bear in mind that fewer internal links on the mobile - version of a page - <a href="https://moz.com/blog/internal-linking-mobile-first-crawl-paths" - >might cause an issue</a - > - for your website. With - <a - href="https://www.deepcrawl.com/knowledge/white-papers/mobile-first-index-guide/" - >mobile-first indexing</a - >, which for new websites is the default for Google, if a page is only - linked from the desktop version and not present on the mobile version, - search engines will have a much harder time discovering and ranking it. - </p> - <p> - <code - ><graph histogram count of links by type Source: 10.10, column C - desktop only></code - > - </p> - <p> - <code - ><graph histogram count of links by type Source: 10.10, column D, E, - desktop only></code - > - </p> - <p> - The median desktop page includes 70 internal (same-site) links, whereas the - median mobile page has 60 internal links. The median number of external - links per page follows a similar trend, with desktop pages including 10 - external links, and mobile pages including 8. - </p> - <p> - Anchor links, which link to a certain scroll position on the same page, are - not very popular. Over 65% of home pages have no anchor links. This is - probably due to the fact that home pages don't usually contain any long-form - content. - </p> - <p> - There is good news from our analysis of the descriptive link text metric. - 89.94% of mobile pages pass Lighthouse's - <a - href="https://developers.google.com/web/tools/lighthouse/audits/descriptive-link-text" - >descriptive link text audit</a - >. This means that these pages don't have generic "click here", "go", "here" - or "learn more" links, but use more meaningful link text which helps users - and search engines better understand the context of pages and how they - connect with one another. - </p> - <h2 id="advanced">Advanced</h2> - <p> - Having descriptive, useful content on a page that isn't being blocked from - search engines with a noindex tag or robots.txt directive isn't enough for a - website to succeed in organic search. Those are just the basics. There is a - lot more than can be done to enhance the performance of a website and its - appearance in SERPs. - </p> - <p> - Some of the more technically complex aspects that have been gaining - importance in successfully indexing and ranking websites include: speed, - structured data, internationalization, security, and mobile friendliness. - </p> - <h3 id="speed">Speed</h3> - <p> - Mobile loading speed was first - <a - href="https://webmasters.googleblog.com/2018/01/using-page-speed-in-mobile-search.html" - >announced as a ranking factor</a - > - by Google in 2018. Speed isn't a new focus for Google though. Back in 2010 - it was - <a - href="https://webmasters.googleblog.com/2010/04/using-site-speed-in-web-search-ranking.html" - >revealed that speed had been introduced as a ranking signal</a - >. - </p> - <p> - A fast-loading website is also crucial for a good user experience. Users - that have to wait even a few seconds for a site to load have the tendency to - bounce and try another result from one of your competitors in the SERPs that - loads quickly and meets their expectations of website performance. - </p> - <p> - The metrics we used for our analysis of load speed across the web is based - on the - <a href="../methodology#chrome-ux-report">Chrome UX Report</a> (CrUX), which - collects data from real-world Chrome users. This data shows that an - astonishing 63.47% of websites are labelled as <strong>slow</strong>. Split - by device, this picture is even bleaker for tablet (82.00%) and phone - (77.61%). In the context of our results, per the - <a - href="https://developers.google.com/speed/docs/insights/v5/about#categories" - >PageSpeed Insights classification system</a - >, a slow website is defined as having 10% of First Contentful Paint (FCP) - experiences taking over 2,500 ms or 5% of First Input Delay (FID) - experiences measuring over 250 ms. - </p> - <p> - <code - ><graph data 10.15b: CruX image similar to - [IMG](https://developers.google.com/web/updates/images/2018/08/crux-dash-fcp.png) - per device + speed label></code - > - </p> - <p> - Although the numbers are bleak for the speed of the web, the good news is - that SEO experts and tools have been focusing more and more on the technical - challenges of speeding up websites. You can learn more about the state of - web performance in the <a href="../performance">Performance chapter</a>. - </p> - <h3 id="structured-data">Structured data</h3> - <p> - Structured data allows website owners to add additional semantic data to - their web pages, by adding - <a href="https://en.wikipedia.org/wiki/JSON-LD">JSON-LD</a> snippets or - <a href="https://developer.mozilla.org/en-US/docs/Web/HTML/Microdata" - >Microdata</a - >, for example. Search engines parse this data to better understand these - pages and sometimes use the markup to display additional relevant - information in the search results. The most commonly found types of - structured data are - <a - href="https://developers.google.com/search/docs/data-types/review-snippet" - >reviews</a - >, - <a href="https://developers.google.com/search/docs/data-types/product" - >products</a - >, - <a - href="https://developers.google.com/search/docs/data-types/local-business" - >businesses</a - >, - <a href="https://developers.google.com/search/docs/data-types/movie" - >movies</a - >, and - <a href="https://developers.google.com/search/docs/guides/search-gallery" - >more</a - >. - </p> - <p> - The - <a href="https://developers.google.com/search/docs/guides/enhance-site" - >extra visibility</a - > - that structured data can provide for websites is interesting for site - owners, given that it can help to create more opportunities for traffic. For - example, the relatively new - <a href="https://developers.google.com/search/docs/data-types/faqpage" - >FAQ schema</a - > - will double the size of your snippet and the real estate of your site in the - SERP. - </p> - <p> - During our research, we found that only 14.67% of sites are eligible for - rich results on mobile. Interestingly, desktop site eligibility is slightly - lower at 12.46%. This suggests that there is a lot more that site owners can - be doing to optimize the way their home pages are appearing in search. - </p> - <p> - Among the sites with structured data markup, the five most prevalent types - are: - </p> - <ol> - <li><code>WebSite</code> (16.02%)</li> - <li><code>SearchAction</code> (14.35%)</li> - <li><code>Organization</code> (12.89%)</li> - <li><code>WebPage</code> (11.58%)</li> - <li><code>ImageObject</code> (5.35%)</li> - </ol> - <p> - Interestingly, one of the most popular data types that triggers a search - engine feature is <code>SearchAction</code>, which powers the - <a - href="https://developers.google.com/search/docs/data-types/sitelinks-searchbox" - >sitelinks searchbox</a - >. - </p> - <p> - The top 5 markup types all lead to more visibility in Google's search - results, which might be the fuel for more widespread adoption of these types - of structured data. - </p> - <p> - Seeing as we only looked at home pages within this analysis, the results - might look very different if we were to consider interior pages, too. - </p> - <p> - Review stars are only found on 1.09% of the web's home pages (via - <a href="https://schema.org/AggregateRating">AggregateRating</a>). - </p> - <p> - Also, the newly introduced - <a href="https://schema.org/QAPage">QAPage</a> appeared only in 48 - instances, and the <a href="https://schema.org/FAQPage">FAQPage</a> at a - slightly higher frequency of 218 times. These last two counts are expected - to increase in the future as we run more crawls and dive deeper into Web - Almanac analysis. - </p> - <h3 id="internationalization">Internationalization</h3> - <p> - Internationalization is one of the most complex aspects of SEO, even - <a href="https://twitter.com/JohnMu/status/965507331369984002" - >according to some Google search employees</a - >. Internationalization in SEO focuses on serving the right content from a - website with multiple language or country versions, and making sure that - content is being targeted towards the specific language and location of the - user. - </p> - <p> - While 38.40% of desktop sites (33.79% on mobile) have the HTML lang - attribute set to English, only 7.43% (6.79% on mobile) of the sites also - contain an <code>hreflang</code> link to another language version. This - suggests that the vast majority of websites that we analyzed don't offer - separate versions of their home page that would require language targeting - -- unless these separate versions do exist, but haven't been configured - correctly. - </p> - <p> - <code - ><graph 10.04b - [do we want to chart this data, e.g. what does it - really mean for SEO?]></code - > - </p> - <p> - <code - ><include a chart of the languages and country combinations found, SEOs - will want to see this breakdown></code - > - </p> - <p> - Next to English, the most common languages are French, Spanish, and German. - These are followed by languages targeted towards specific geographies like - English for Americans (<code>en-us</code>) or more obscure combinations like - Spanish for the Irish (<code>es-ie</code>). - </p> - <p> - The analysis did not check for correct implementation, such as whether or - not the different language versions properly link to each other. However, - from looking at the low adoption of - <a - href="https://www.google.com/url?q=https://support.google.com/webmasters/answer/189077?hl%3Den&sa=D&ust=1570627963630000&usg=AFQjCNFwzwglsbysT9au_I-7ZQkwa-QvrA" - >having an x-default version as is recommended</a - > - (only 3.77% on desktop and 1.30% on mobile), this is an indicator that this - element is complex and not always easy to get right. - </p> - <h3 id="spa-crawlability">SPA crawlability</h3> - <p> - Single-page applications (SPAs) built with frameworks like React and Vue.js - come with their own SEO complexity. Websites using a hash-based navigation, - make it especially hard for search engines to properly crawl and index them. - For example, Google had an "AJAX crawling scheme" workaround that turned out - to be complex for search engines as well as developers, so it was - <a - href="https://webmasters.googleblog.com/2015/10/deprecating-our-ajax-crawling-scheme.html" - >deprecated in 2015</a - >. - </p> - <p> - The number of SPAs that were tested had a relatively low number of links - served via hash URLs, with 13.08% of React mobile pages using hash URLs for - navigation, 8.15% of mobile Vue.js pages using them, and 2.37% of mobile - Angular pages using them. These results were very similar for desktop pages - too. This is positive to see from an SEO perspective, considering the impact - that hash URLs can have on content discovery. - </p> - <p> - The higher number of hash URLs in React pages is surprising, especially in - contrast to the lower number of hash URLs found on Angular pages. Both - frameworks promote the adoption of routing packages where the - <a href="https://developer.mozilla.org/en-US/docs/Web/API/History" - >History API</a - > - is the default for links, instead of relying on hash URLs. Vue.js is - <a href="https://github.com/vuejs/rfcs/pull/40" - >considering moving to using the History API as the default</a - > - as well in version 3 of their <code>vue-router</code> package. - </p> - <h3 id="amp">AMP</h3> - <p> - AMP (formerly known as "Accelerated Mobile Pages") was first introduced in - 2015 by Google as an open source HTML framework. It provides components and - infrastructure for websites to provide a faster experience for users, by - using optimizations such as caching, lazy loading, and optimized images. - Notably, Google adopted this for their search engine, where AMP pages are - also served from their own CDN. This feature later became a standards - proposal under the name - <a - href="https://wicg.github.io/webpackage/draft-yasskin-http-origin-signed-responses.html" - >Signed HTTP Exchanges</a - >. - </p> - <p> - Despite this, only 0.62% of mobile home pages contain a link to an AMP - version. Given the visibility this project has had, this suggests that it - has had a relatively low adoption. However, AMP can be more useful for - serving article pages, so our home page-focused analysis won't reflect - adoption across other page types. - </p> - <h3 id="security">Security</h3> - <p> - A strong online shift in recent years has been for the web to move to HTTPS - by default. HTTPS prevents website traffic from being intercepted on public - WiFi networks, for example, where user input data is then transmitted - unsecurely. Google have been pushing for sites to adopt HTTPS, and even made - <a - href="https://webmasters.googleblog.com/2014/08/https-as-ranking-signal.html" - >HTTPS as a ranking signal</a - >. Chrome also supported the move to secure pages by labelling non-HTTPS - pages as ‘<a - href="https://www.blog.google/products/chrome/milestone-chrome-security-marking-http-not-secure/" - >not secure</a - >' in the browser. - </p> - <p> - For more information and guidance from Google on the importance of HTTPS and - how to adopt it, please see - <a - href="https://developers.google.com/web/fundamentals/security/encrypt-in-transit/why-https" - >Why HTTPS Matters</a - >. - </p> - <p> - We found that 67.06% of websites on desktop are now served over HTTPS. Just - under half of websites still haven't migrated to HTTPS and are serving - non-secure pages to their users. This is a significant number. Migrations - can be hard work, so this could be a reason why the adoption rate isn't - higher, but an HTTPS migration usually require an SSL certificate and a - simple change to the <code>.htaccess</code> file. There's no real reason not - to switch to HTTPS. - </p> - <p> - Google's - <a href="https://transparencyreport.google.com/https/overview" - >HTTPS Transparancy Report</a - > - reports a 90% adoption of HTTPS for the top 100 non-Google domains - (representing 25% of all website traffic worldwide). The difference between - this number and ours could be explained by the fact that relatively smaller - sites are adopting HTTPS at a slower rate. - </p> - <h2 id="conclusion">Conclusion</h2> - <p> - Through our analysis, we observed that the majority of websites are getting - the fundamentals right, in that their home pages are crawlable, indexable, - and include the key content required to rank well in search engines' results - pages. Not every person who owns a website will be aware of SEO at all, let - alone best practice guidelines, so it is promising to see that so many sites - have got the basics covered. - </p> - <p> - However, more sites are missing the mark than expected when it comes to some - of the more advanced aspects of SEO and accessibility. Site speed is one of - these factors that many websites are struggling with, especially on mobile. - This is a significant problem, as speed is one of the biggest contributors - to UX, which is something that can impact rankings. The number of websites - that aren't yet served over HTTPS is also problematic to see, considering - the importance of security and keeping user data safe. - </p> - <p> - There is a lot more that we can all be doing to learn about SEO best - practices and industry developments. This is essential due to the evolving - nature of the search industry and the rate at which changes happen. Search - engines make thousands of improvements to their algorithms each year, and we - need to keep up if we want our websites to reach more visitors in organic - search. - </p> -</section> {% endblock %} diff --git a/src/templates/en/2019/chapters/third-parties.html b/src/templates/en/2019/chapters/third-parties.html index 708558d3e7d..710a4621ae9 100644 --- a/src/templates/en/2019/chapters/third-parties.html +++ b/src/templates/en/2019/chapters/third-parties.html @@ -1,3 +1,5 @@ +{% extends "en/2019/base_chapter.html" %} + <!--{# IMPORTANT! - `chapter.html` is a "template for templates" used by the `generate_chapters.js` script, hence the strange template syntax (eg, mixing ejs and jinja syntax) @@ -8,524 +10,604 @@ - make changes to the markdown content directly (`src/content/<lang>/<year>/<chapter>.md`) because any changes to the chapter templates will be overwritten by the generation script #}--> -{% extends "en/2019/base_chapter.html" %} {% block styles %} {{ super() }} -<link rel="stylesheet" href="/static/css/chapter.css" /> -{% endblock %} {% set metadata = -{"part_number":"II","chapter_number":5,"title":"Third +{% set metadata = {"part_number":"II","chapter_number":5,"title":"Third Parties","authors":["patrickhulce"],"reviewers":["simonhearne"," flowlabs"," jasti"," zeman"]} %} {% block main %} -<aside> - <ul> - <li> - <a href="#chapter-5-third-parties">Chapter 5: Third Parties</a> - +<article id="chapter" class="main"> + <nav class="index"> + <div class="index-box floating-card"> + <h2 class="header">Index</h2> + <h2 class="header-mobile">Index</h2> <ul> <li> - <a href="#introduction">Introduction</a> - </li> - - <li> - <a href="#definitions">Definitions</a> + <a href="#chapter-5-third-parties">Chapter 5: Third Parties</a> <ul> <li> - <a href="#“third-party”">“Third Party”</a> + <a href="#introduction">Introduction</a> </li> <li> - <a href="#provider-categories">Provider Categories</a> - </li> - </ul> - </li> + <a href="#definitions">Definitions</a> - <li> - <a href="#data">Data</a> + <ul> + <li> + <a href="#“third-party”">“Third Party”</a> + </li> - <ul> - <li> - <a href="#categories">Categories</a> + <li> + <a href="#provider-categories">Provider Categories</a> + </li> + </ul> </li> <li> - <a href="#providers">Providers</a> - </li> + <a href="#data">Data</a> - <li> - <a href="#resource-types">Resource Types</a> - </li> + <ul> + <li> + <a href="#categories">Categories</a> + </li> - <li> - <a href="#request-count">Request Count</a> - </li> + <li> + <a href="#providers">Providers</a> + </li> - <li> - <a href="#byte-weight">Byte Weight</a> - </li> + <li> + <a href="#resource-types">Resource Types</a> + </li> - <li> - <a href="#script-execution">Script Execution</a> - </li> - </ul> - </li> + <li> + <a href="#request-count">Request Count</a> + </li> - <li> - <a href="#analysis">Analysis</a> + <li> + <a href="#byte-weight">Byte Weight</a> + </li> - <ul> - <li> - <a href="#usage-patterns">Usage Patterns</a> + <li> + <a href="#script-execution">Script Execution</a> + </li> + </ul> </li> <li> - <a href="#generate-and-consume-data">Generate and Consume Data</a> - </li> + <a href="#analysis">Analysis</a> - <li> - <a href="#monetize-web-traffic">Monetize Web Traffic</a> + <ul> + <li> + <a href="#usage-patterns">Usage Patterns</a> + </li> + + <li> + <a href="#generate-and-consume-data" + >Generate and Consume Data</a + > + </li> + + <li> + <a href="#monetize-web-traffic">Monetize Web Traffic</a> + </li> + + <li> + <a href="#simplify-development">Simplify Development</a> + </li> + </ul> </li> <li> - <a href="#simplify-development">Simplify Development</a> - </li> - </ul> - </li> + <a href="#repercussions">Repercussions</a> - <li> - <a href="#repercussions">Repercussions</a> + <ul> + <li> + <a href="#performance">Performance</a> + </li> - <ul> - <li> - <a href="#performance">Performance</a> + <li> + <a href="#privacy">Privacy</a> + </li> + </ul> </li> <li> - <a href="#privacy">Privacy</a> + <a href="#caveats">Caveats</a> </li> </ul> </li> + </ul> + </div> + </nav> + <section class="content"> + <section class="body"> + <h1> + <div class="decorative-line"></div> + {{ metadata.get('title') }} + <div class="decorative-line-mobile"></div> + </h1> + <img + src="/static/images/2019/{{ get_chapter_image_dir(metadata) }}/hero_lg.jpg" + class="content-banner" + /> + <h1 id="chapter-5-third-parties">Chapter 5: Third Parties</h1> + <h2 id="introduction">Introduction</h2> + <p> + The open web is vast, linkable, and interoperable by design. The ability + to grab someone else’s complex library and use it on your site with a + single <code><link></code> or <code><script></code> element + has supercharged developers’ productivity and enabled awesome new web + experiences. On the flip side, the immense popularity of a select few + third-party providers raises important performance and privacy concerns. + This chapter examines the prevalence and impact of third-party code on + the web in 2019, the web usage patterns that lead to the popularity of + third-party solutions, and potential repercussions for the future of web + performance and privacy. + </p> + <h2 id="definitions">Definitions</h2> + <h3 id="“third-party”">“Third Party”</h3> + <p> + A third party is an entity outside the primary site-user relationship, + i.e. the aspects of the site not directly within the control of the site + owner but present with their approval. For example, the Google Analytics + script is an example of a common third-party resource. + </p> + <p>Third-party resources are…</p> + <ul> + <li>Hosted on a shared and public origin</li> + <li>Widely used by a variety of sites</li> + <li>Uninfluenced by an individual site owner</li> + </ul> + <p> + To match these goals as closely as possible, the formal definition used + throughout this chapter of a third-party resource is a resource that + originates from a domain whose resources can be found on at least 50 + unique pages in the HTTPArchive dataset. + </p> + <h3 id="provider-categories">Provider Categories</h3> + <p> + This chapter divides third-party providers into one of these broad + categories. A brief description is included below and the mapping of + domain to category can be found in the + <a + href="https://github.com/patrickhulce/third-party-web/blob/8afa2d8cadddec8f0db39e7d715c07e85fb0f8ec/data/entities.json5" + >third-party-web repository</a + >. + </p> + <ul> + <li><strong>Ad</strong> - display and measurement of advertisements</li> + <li><strong>Analytics</strong> - tracking site visitor behavior</li> + <li> + <strong>CDN</strong> - providers that host public shared utilities or + private content of their users + </li> + <li> + <strong>Content</strong> - providers that facilitate publishers and + host syndicated content + </li> + <li> + <strong>Customer Success</strong> - support and customer relationship + management functionality + </li> + <li> + <strong>Hosting</strong> - providers that host the arbitrary content + of their users + </li> + <li> + <strong>Marketing</strong> - sales, lead generation, and email + marketing functionality + </li> + <li> + <strong>Social</strong> - social networks and their affiliated + integrations + </li> + <li> + <strong>Tag Manager</strong> - provider whose sole role is to manage + the inclusion of other third parties + </li> + <li> + <strong>Utility</strong> - code that aids the development objectives + of the site owner + </li> + <li> + <strong>Video</strong> - providers that host the arbitrary video + content of their users + </li> <li> - <a href="#caveats">Caveats</a> + <strong>Other</strong> - uncategorized or non-conforming activity </li> </ul> - </li> - </ul> -</aside> + <h2 id="data">Data</h2> + <p> + Third-party code is everywhere. 93% of pages include at least one + third-party resource, 76% of pages issue a request to an analytics + domain, the median page requests content from at least 9 + <em>unique</em> third-party domains that represent 35% of their total + network activity, and the most active 10% of pages issue a whopping 175 + third-party requests or more. It’s not a stretch to say that third + parties are an integral part of the web. + </p> + <p><code><insert stylized value of metric 05_01></code></p> + <p><code><insert stylized value of metric 05_02></code></p> + <h3 id="categories">Categories</h3> + <p> + If the ubiquity of third-party content is unsurprising, perhaps more + interesting is the breakdown of third-party content by provider type. + </p> + <p> + While advertising might be the most user-visible example of third-party + presence on the web, analytics providers are the most common third-party + category with 76% of sites including at least one analytics request. + CDNs at 63%, ads at 57%, and developer utilities like Sentry, Stripe, + and Google Maps SDK at 56% follow up as a close second, third, and + fourth for appearing on the most web properties. The popularity of these + categories forms the foundation of our web usage patterns identified + later in the chapter. + </p> + <p><code><insert graphic of metric 05_11></code></p> + <h3 id="providers">Providers</h3> + <p> + A relatively small set of providers dominate the third-party landscape, + the top 100 domains account for 30% of network requests across the web. + Powerhouses like Google, Facebook, and YouTube make the headlines here + with full percentage points of share each, but smaller entities like Wix + and Shopify command a substantial portion of third-party popularity as + well. + </p> + <p> + While much could be said about every individual provider’s popularity + and performance impact, this more opinionated analysis is left as an + exercise for the reader and other purpose-built tools such as + <a href="https://thirdpartyweb.today">third-party-web</a>. + </p> + <p><code><insert table of metric 05_06></code></p> + <p><code><insert table of metric 05_09></code></p> + <h3 id="resource-types">Resource Types</h3> + <p> + The resource type breakdown of third-party content also lends insight + into how third-party code is used across the web. While first-party + requests are 56% images, 23% script, 14% CSS, and only 4% HTML, + third-party requests skew more heavily toward script and HTML at 32% + script, 34% images, 12% HTML, and only 6% CSS. While this suggests that + third-party code is less frequently used to aid the design and instead + used more frequently to facilitate or observe interactions than + first-party code, a breakdown of resource types by party status tells a + more nuanced story. While CSS and images are dominantly first-party at + 70% and 64% respectively, fonts are largely served by third-party + providers with only 28% being served from first-party sources. This + concept of usage patterns is explored in more depth later in this + chapter. + </p> + <p><code><insert graphic of metric 05_03></code></p> + <p> + Several other amusing factoids jump out from this data. Tracking pixels + (image requests to analytics domains) make up 1.6% of all network + requests, six times as many video requests are to social networks like + Facebook and Twitter than dedicated video providers like YouTube and + Vimeo (presumably because the default YouTube embed consists of HTML and + a preview thumbnail but not an autoplaying video), and there are still + more requests for first-party images than all scripts combined. + </p> + <h3 id="request-count">Request Count</h3> + <p> + 49% of all requests are third-party. At 51%, first-party can still + narrowly hold on to the crown in 2019 of comprising the majority of the + web resources. Given that just under half of all the requests are + third-party yet a small set of pages do not include any at all, the most + active third-party users must be doing quite a bit more than their fair + share. Indeed, at the 75th, 90th, and 99th percentiles we see nearly all + of the page being comprised of third-party content. In fact, for some + sites heavily relying on distributed WYSIWYG platforms like Wix and + SquareSpace, the root document might be the sole first-party request! + </p> + <p><code><insert graphic of metric 05_11></code></p> + <p> + The number of requests issued by each third-party provider also varies + considerably by category. While analytics are the most widespread + third-party category across websites, they account for only 7% of all + third-party network requests. Ads, on the other hand, are found on + nearly 20% fewer sites yet make up 25% of all third-party network + requests. Their outsized resource impact compared to their popularity + will be a theme we continue to uncover in the remaining data. + </p> + <h3 id="byte-weight">Byte Weight</h3> + <p> + While 49% of requests are third-party, their share of the web in terms + of bytes is quite a bit lower at only 28%. The same goes for the + breakdown by multiple resource types. Third-party fonts make up 72% of + all fonts, but they’re only 53% of font bytes; 74% of HTML requests, but + only 39% of HTML bytes; 68% of video requests, but only 31% of video + bytes. All this seems to suggest third-party providers are responsible + stewards who keep their response sizes low, and, for the most part, that + is in fact the case until you look at scripts. + </p> + <p> + Despite serving 57% of scripts, third parties comprise 64% of script + bytes. meaning their scripts are larger on average than first-party + scripts. This is an early warning sign for their performance impact to + come in the next few sections. + </p> + <p><code><insert graphic of metric 05_04></code></p> + <p><code><insert graphic of metric 05_12></code></p> + <p> + As for specific third-party providers, the same juggernauts topping the + request count leaderboards make their appearance in byte weight as well. + The only few notable movements are the large, media-heavy providers such + as YouTube, Shopify, and Twitter which climb to the top of the byte + impact charts. + </p> + <p><code><insert table of metric 05_07></code></p> + <h3 id="script-execution">Script Execution</h3> + <p> + 57% of script execution time is from third-party scripts, and the top + 100 domains already account for 48% of all script execution time on the + web. This underscores just how large an impact a select few entities + really have on web performance. This topic is explored more in depth in + the <a href="#performance">Repercussions > Performance</a> section. + </p> + <p><code><insert graphic of metric 05_05></code></p> + <p><code><insert graphic of metric 05_13></code></p> + <p> + The category breakdowns among script execution largely follow that of + resource counts. Here too advertising looms largest. Ad scripts comprise + 25% of third-party script execution time with hosting and social + providers in a distant tie for second at 12%. + </p> + <p><code><insert table of metric 05_08></code></p> + <p><code><insert table of metric 05_10></code></p> + <p> + While much could be said about every individual provider’s popularity + and performance impact, this more opinionated analysis is left as an + exercise for the reader and other purpose-built tools such as the + previously mentioned + <a href="https://thirdpartyweb.today">third-party-web</a>. + </p> + <h2 id="analysis">Analysis</h2> + <h3 id="usage-patterns">Usage Patterns</h3> + <p> + Why do site owners use third-party code? How did third-party content + grow to be nearly half of all network requests? What are all these + requests doing? Answers to these questions lie in the three primary + usage patterns of third-party resources. Broadly, site owners reach for + third parties to generate and consume data from their users, monetize + their site experiences, and simplify web development. + </p> + <h3 id="generate-and-consume-data">Generate and Consume Data</h3> + <p> + Analytics is the most popular third-party category found across the web + and yet is minimally user-visible. Consider the volume of information at + play in the lifetime of a web visit; there’s user context, device, + browser, connection quality, location, page interactions, session + length, return visitor status, and more being generated continuously. + It’s difficult, cumbersome, and expensive to maintain tools that + warehouse, normalize, and analyze time series data of this magnitude. + While nothing categorically necessitates that analytics fall into the + domain of third-party providers, the widespread attractiveness of + understanding your users, deep complexity of the problem space, and + increasing emphasis on managing data respectfully and responsibly + naturally surfaces analytics as a popular third-party usage pattern. + </p> + <p> + There’s also a flip side to user data though: consumption. While + analytics is about generating data from your site’s visitors, other + third-party resources focus on consuming data about your visitors that + is known only by others. Social providers fall squarely into this usage + pattern. A site owner <em>must</em> use Facebook resources if they wish + to integrate information from a visitor’s Facebook profile into their + site. As long as site owners are interested in personalizing their + experience with widgets from social networks and leveraging the social + networks of their visitors to increase their reach, social integrations + are likely to remain the domain of third-party entities for the + foreseeable future. + </p> + <h3 id="monetize-web-traffic">Monetize Web Traffic</h3> + <p> + The open model of the web does not always serve the financial interests + of content creators to their liking and many site owners resort to + monetizing their sites with advertising. Because building direct + relationships with advertisers and negotiating pricing contracts is a + relatively difficult and time-consuming process, this concern is largely + handled by third-party providers performing targeted advertising and + real-time bidding. Widespread negative public opinion, the popularity of + ad blocking technology, and regulatory action in major global markets + such as Europe pose the largest threat to the continued use of + third-party providers for monetization. While it’s unlikely that site + owners suddenly strike their own advertising deals or build bespoke ad + networks, alternative monetization models like paywalls and experiments + like Brave’s + <a href="https://basicattentiontoken.org/">Basic Attention Token</a> + have a real chance of shaking up the third-party ad landscape of the + future. + </p> + <h3 id="simplify-development">Simplify Development</h3> + <p> + Above all, third-party resources are used to simplify the web + development experience. Even previous usage patterns could arguably fall + into this pattern as well. Whether analyzing user behavior, + communicating with advertisers, or personalizing the user experience, + third-party resources are used to make first-party development easier. + </p> + <p> + Hosting providers are the most extreme example of this pattern. Some of + these providers even enable anyone on Earth to become a site owner with + no technical expertise necessary. They provide hosting of assets, tools + to build sites without coding experience, and domain registration + services. + </p> + <p> + The remainder of third-party providers also tend to fall into this usage + pattern. Whether it’s hosting of a utility library such as jQuery for + usage by front-end developers cached on Cloudflare’s edge servers or a + vast library of common fonts served from a popular Google CDN, + third-party content is another way to give the site owner one fewer + thing to worry about and, maybe, just maybe make the job of delivering a + great experience a little bit easier. + </p> + <h2 id="repercussions">Repercussions</h2> + <h3 id="performance">Performance</h3> + <p> + The performance impact of third-party content is neither categorically + good nor bad. There are good and bad actors across the spectrum and + different category types have varying levels of influence. + </p> + <p> + The good: shared third-party font and stylesheet utilities are, on + average, delivered more efficiently than their first-party counterparts. + </p> + <p> + Utilities, CDNs, and Content categories are the brightest spots on the + third-party performance landscape. They offer optimized versions of the + same sort of content that would otherwise be served from first-party + sources. Google Fonts and Typekit serve optimized fonts that are smaller + on average than first-party fonts, Cloudflare CDN serves a minified + version of open source libraries that might be accidentally served in + development mode by some site owners, Google Maps SDK efficiently + delivers complex maps that might otherwise be naively shipped as large + images. + </p> + <p> + The bad: a very small set of entities represent a very large chunk of + JavaScript execution time carrying out narrow set of functionality on + pages. + </p> + <p> + Ads, social, hosting, and certain analytics providers represent the + largest negative impact on web performance. While hosting providers + deliver a majority of a site’s content and will understandably have a + larger performance impact than other third-party categories, they also + serve almost entirely static sites that demand very little JavaScript in + most cases that should not justify the volume of script execution time. + The other categories hurting performance though have even less of an + excuse. They fill very narrow roles on each page they appear on and yet + quickly take over a majority of resources. For example, the Facebook + "Like" button and associated social widgets take up extraordinarily + little screen real estate and are a fraction of most web experiences, + and yet the median impact on pages with social third parties is nearly + 20% of their total JavaScript execution time. The situation is similar + for analytics - tracking libraries do not directly contribute to the + perceived user experience, and yet the 90th percentile impact on pages + with analytics third parties is 44% of their total JavaScript execution + time. + </p> + <p> + The silver lining of such a small number of entities enjoying such large + market share is that a very limited and concentrated effort can have an + enormous impact on the web as a whole. Performance improvements at just + the top few hosting providers can improve 2-3% of <em>all</em> web + requests. + </p> + <h3 id="privacy">Privacy</h3> + <p> + The abundance of analytics providers and top-heavy concentration of + script execution raises two primary privacy concerns for site visitors: + the largest use case of third-parties is for site owners to track their + users and a handful of companies receive information on a large swath of + web traffic. + </p> + <p> + The interest of site owners in understanding and analyzing user behavior + is not malicious on its own, but the widespread and relatively + behind-the-scenes nature of web analytics raises valid concerns, and + users, companies, and lawmakers have taken notice in recent years with + privacy regulation such as + <a + href="https://en.wikipedia.org/wiki/General_Data_Protection_Regulation" + >GDPR</a + > + in Europe and the + <a href="https://en.wikipedia.org/wiki/California_Consumer_Privacy_Act" + >CCPA</a + > + in California. Ensuring that developers handle user data responsibly, + treat the user respectfully, and are transparent with what data is + collected is key to keeping analytics the most popular third-party + category and maintaining the symbiotic nature of analyzing user behavior + to deliver future user value. + </p> + <p> + The top-heavy concentration of script execution is great for the + potential impact of performance improvements, but less exciting for the + privacy ramifications. 29% of <em>all</em> script execution time across + the web is just from scripts on domains owned by Google or Facebook. + That’s a very large percentage of CPU time that is controlled by just + two entities. It’s critical to ensure that the same privacy protections + held to analytics providers be applied in these other ad, social, and + developer utility categories as well. + </p> + <h2 id="caveats">Caveats</h2> + <ul> + <li> + All data presented here is based on a non-interactive, cold load. + These values could start to look quite different after user + interaction. + </li> + <li> + Third-party content served from a first-party domain is counted as + first-party content. i.e. self-hosting Google Fonts or bootstrap.css + will be considered first-party content. + </li> + <li> + First-party content served from a third-party domain is counted as + third-party content. i.e. first-party images served over a third-party + CDN will be considered third-party content. + </li> + <li> + Roughly 84% of all third-party domains by request volume have been + identified and categorized. The remaining 16% fall into the “Other” + category. + </li> + </ul> + </section> + <section class="authors"> + <h4>Authors :</h4> + <ul> + <li> + <img + class="avatar" + alt="Author name" + src="https://www.gravatar.com/avatar/cf58fcc6995e15f35e42532c3775fed6.jpg?d=mp&s=200" + /> + <div class="info"> + <span class="name">Full Name</span> + <span class="social"> + <a class="twitter" href="https://twitter.com/rick_viscomi"> + <img src="/static/images/twitter.png" alt="Twitter account" /> + </a> + + <a class="github" href="https://github.com/rviscomi"> + <img src="/static/images/github.png" alt="github account" /> + </a> + </span> + + <div class="tagline"> + Tagline of contributor here + </div> + </div> + </li> + </ul> + </section> + + <nav id="chapter-navigation"> + <a id="previous-chapter" href="#replace-with-url-to-chapter"> + <span class="arrow">⌃</span> + <span class="chapter-no"> + Chapter X + </span> + <span class="chapter-title"> + Chapter title + </span> + </a> + + <a id="next-chapter" href="#replace-with-url-to-chapter"> + <span class="arrow">⌃</span> + <span class="chapter-no"> + Chapter X + </span> + <span class="chapter-title"> + Chapter title + </span> + </a> + </nav> + </section> +</article> -<section class="main"> - <h1 class="chapter-title">{{ metadata.get('title') }}</h1> - <img - src="/static/images/2019/{{ get_chapter_image_dir(metadata) }}/hero_lg.jpg" - class="chapter-hero" - /> - <h1 id="chapter-5-third-parties">Chapter 5: Third Parties</h1> - <h2 id="introduction">Introduction</h2> - <p> - The open web is vast, linkable, and interoperable by design. The ability to - grab someone else’s complex library and use it on your site with a single - <code><link></code> or <code><script></code> element has - supercharged developers’ productivity and enabled awesome new web - experiences. On the flip side, the immense popularity of a select few - third-party providers raises important performance and privacy concerns. - This chapter examines the prevalence and impact of third-party code on the - web in 2019, the web usage patterns that lead to the popularity of - third-party solutions, and potential repercussions for the future of web - performance and privacy. - </p> - <h2 id="definitions">Definitions</h2> - <h3 id="“third-party”">“Third Party”</h3> - <p> - A third party is an entity outside the primary site-user relationship, i.e. - the aspects of the site not directly within the control of the site owner - but present with their approval. For example, the Google Analytics script is - an example of a common third-party resource. - </p> - <p>Third-party resources are…</p> - <ul> - <li>Hosted on a shared and public origin</li> - <li>Widely used by a variety of sites</li> - <li>Uninfluenced by an individual site owner</li> - </ul> - <p> - To match these goals as closely as possible, the formal definition used - throughout this chapter of a third-party resource is a resource that - originates from a domain whose resources can be found on at least 50 unique - pages in the HTTPArchive dataset. - </p> - <h3 id="provider-categories">Provider Categories</h3> - <p> - This chapter divides third-party providers into one of these broad - categories. A brief description is included below and the mapping of domain - to category can be found in the - <a - href="https://github.com/patrickhulce/third-party-web/blob/8afa2d8cadddec8f0db39e7d715c07e85fb0f8ec/data/entities.json5" - >third-party-web repository</a - >. - </p> - <ul> - <li><strong>Ad</strong> - display and measurement of advertisements</li> - <li><strong>Analytics</strong> - tracking site visitor behavior</li> - <li> - <strong>CDN</strong> - providers that host public shared utilities or - private content of their users - </li> - <li> - <strong>Content</strong> - providers that facilitate publishers and host - syndicated content - </li> - <li> - <strong>Customer Success</strong> - support and customer relationship - management functionality - </li> - <li> - <strong>Hosting</strong> - providers that host the arbitrary content of - their users - </li> - <li> - <strong>Marketing</strong> - sales, lead generation, and email marketing - functionality - </li> - <li> - <strong>Social</strong> - social networks and their affiliated - integrations - </li> - <li> - <strong>Tag Manager</strong> - provider whose sole role is to manage the - inclusion of other third parties - </li> - <li> - <strong>Utility</strong> - code that aids the development objectives of - the site owner - </li> - <li> - <strong>Video</strong> - providers that host the arbitrary video content - of their users - </li> - <li><strong>Other</strong> - uncategorized or non-conforming activity</li> - </ul> - <h2 id="data">Data</h2> - <p> - Third-party code is everywhere. 93% of pages include at least one - third-party resource, 76% of pages issue a request to an analytics domain, - the median page requests content from at least 9 <em>unique</em> third-party - domains that represent 35% of their total network activity, and the most - active 10% of pages issue a whopping 175 third-party requests or more. It’s - not a stretch to say that third parties are an integral part of the web. - </p> - <p><code><insert stylized value of metric 05_01></code></p> - <p><code><insert stylized value of metric 05_02></code></p> - <h3 id="categories">Categories</h3> - <p> - If the ubiquity of third-party content is unsurprising, perhaps more - interesting is the breakdown of third-party content by provider type. - </p> - <p> - While advertising might be the most user-visible example of third-party - presence on the web, analytics providers are the most common third-party - category with 76% of sites including at least one analytics request. CDNs at - 63%, ads at 57%, and developer utilities like Sentry, Stripe, and Google - Maps SDK at 56% follow up as a close second, third, and fourth for appearing - on the most web properties. The popularity of these categories forms the - foundation of our web usage patterns identified later in the chapter. - </p> - <p><code><insert graphic of metric 05_11></code></p> - <h3 id="providers">Providers</h3> - <p> - A relatively small set of providers dominate the third-party landscape, the - top 100 domains account for 30% of network requests across the web. - Powerhouses like Google, Facebook, and YouTube make the headlines here with - full percentage points of share each, but smaller entities like Wix and - Shopify command a substantial portion of third-party popularity as well. - </p> - <p> - While much could be said about every individual provider’s popularity and - performance impact, this more opinionated analysis is left as an exercise - for the reader and other purpose-built tools such as - <a href="https://thirdpartyweb.today">third-party-web</a>. - </p> - <p><code><insert table of metric 05_06></code></p> - <p><code><insert table of metric 05_09></code></p> - <h3 id="resource-types">Resource Types</h3> - <p> - The resource type breakdown of third-party content also lends insight into - how third-party code is used across the web. While first-party requests are - 56% images, 23% script, 14% CSS, and only 4% HTML, third-party requests skew - more heavily toward script and HTML at 32% script, 34% images, 12% HTML, and - only 6% CSS. While this suggests that third-party code is less frequently - used to aid the design and instead used more frequently to facilitate or - observe interactions than first-party code, a breakdown of resource types by - party status tells a more nuanced story. While CSS and images are dominantly - first-party at 70% and 64% respectively, fonts are largely served by - third-party providers with only 28% being served from first-party sources. - This concept of usage patterns is explored in more depth later in this - chapter. - </p> - <p><code><insert graphic of metric 05_03></code></p> - <p> - Several other amusing factoids jump out from this data. Tracking pixels - (image requests to analytics domains) make up 1.6% of all network requests, - six times as many video requests are to social networks like Facebook and - Twitter than dedicated video providers like YouTube and Vimeo (presumably - because the default YouTube embed consists of HTML and a preview thumbnail - but not an autoplaying video), and there are still more requests for - first-party images than all scripts combined. - </p> - <h3 id="request-count">Request Count</h3> - <p> - 49% of all requests are third-party. At 51%, first-party can still narrowly - hold on to the crown in 2019 of comprising the majority of the web - resources. Given that just under half of all the requests are third-party - yet a small set of pages do not include any at all, the most active - third-party users must be doing quite a bit more than their fair share. - Indeed, at the 75th, 90th, and 99th percentiles we see nearly all of the - page being comprised of third-party content. In fact, for some sites heavily - relying on distributed WYSIWYG platforms like Wix and SquareSpace, the root - document might be the sole first-party request! - </p> - <p><code><insert graphic of metric 05_11></code></p> - <p> - The number of requests issued by each third-party provider also varies - considerably by category. While analytics are the most widespread - third-party category across websites, they account for only 7% of all - third-party network requests. Ads, on the other hand, are found on nearly - 20% fewer sites yet make up 25% of all third-party network requests. Their - outsized resource impact compared to their popularity will be a theme we - continue to uncover in the remaining data. - </p> - <h3 id="byte-weight">Byte Weight</h3> - <p> - While 49% of requests are third-party, their share of the web in terms of - bytes is quite a bit lower at only 28%. The same goes for the breakdown by - multiple resource types. Third-party fonts make up 72% of all fonts, but - they’re only 53% of font bytes; 74% of HTML requests, but only 39% of HTML - bytes; 68% of video requests, but only 31% of video bytes. All this seems to - suggest third-party providers are responsible stewards who keep their - response sizes low, and, for the most part, that is in fact the case until - you look at scripts. - </p> - <p> - Despite serving 57% of scripts, third parties comprise 64% of script bytes. - meaning their scripts are larger on average than first-party scripts. This - is an early warning sign for their performance impact to come in the next - few sections. - </p> - <p><code><insert graphic of metric 05_04></code></p> - <p><code><insert graphic of metric 05_12></code></p> - <p> - As for specific third-party providers, the same juggernauts topping the - request count leaderboards make their appearance in byte weight as well. The - only few notable movements are the large, media-heavy providers such as - YouTube, Shopify, and Twitter which climb to the top of the byte impact - charts. - </p> - <p><code><insert table of metric 05_07></code></p> - <h3 id="script-execution">Script Execution</h3> - <p> - 57% of script execution time is from third-party scripts, and the top 100 - domains already account for 48% of all script execution time on the web. - This underscores just how large an impact a select few entities really have - on web performance. This topic is explored more in depth in the - <a href="#performance">Repercussions > Performance</a> section. - </p> - <p><code><insert graphic of metric 05_05></code></p> - <p><code><insert graphic of metric 05_13></code></p> - <p> - The category breakdowns among script execution largely follow that of - resource counts. Here too advertising looms largest. Ad scripts comprise 25% - of third-party script execution time with hosting and social providers in a - distant tie for second at 12%. - </p> - <p><code><insert table of metric 05_08></code></p> - <p><code><insert table of metric 05_10></code></p> - <p> - While much could be said about every individual provider’s popularity and - performance impact, this more opinionated analysis is left as an exercise - for the reader and other purpose-built tools such as the previously - mentioned <a href="https://thirdpartyweb.today">third-party-web</a>. - </p> - <h2 id="analysis">Analysis</h2> - <h3 id="usage-patterns">Usage Patterns</h3> - <p> - Why do site owners use third-party code? How did third-party content grow to - be nearly half of all network requests? What are all these requests doing? - Answers to these questions lie in the three primary usage patterns of - third-party resources. Broadly, site owners reach for third parties to - generate and consume data from their users, monetize their site experiences, - and simplify web development. - </p> - <h3 id="generate-and-consume-data">Generate and Consume Data</h3> - <p> - Analytics is the most popular third-party category found across the web and - yet is minimally user-visible. Consider the volume of information at play in - the lifetime of a web visit; there’s user context, device, browser, - connection quality, location, page interactions, session length, return - visitor status, and more being generated continuously. It’s difficult, - cumbersome, and expensive to maintain tools that warehouse, normalize, and - analyze time series data of this magnitude. While nothing categorically - necessitates that analytics fall into the domain of third-party providers, - the widespread attractiveness of understanding your users, deep complexity - of the problem space, and increasing emphasis on managing data respectfully - and responsibly naturally surfaces analytics as a popular third-party usage - pattern. - </p> - <p> - There’s also a flip side to user data though: consumption. While analytics - is about generating data from your site’s visitors, other third-party - resources focus on consuming data about your visitors that is known only by - others. Social providers fall squarely into this usage pattern. A site owner - <em>must</em> use Facebook resources if they wish to integrate information - from a visitor’s Facebook profile into their site. As long as site owners - are interested in personalizing their experience with widgets from social - networks and leveraging the social networks of their visitors to increase - their reach, social integrations are likely to remain the domain of - third-party entities for the foreseeable future. - </p> - <h3 id="monetize-web-traffic">Monetize Web Traffic</h3> - <p> - The open model of the web does not always serve the financial interests of - content creators to their liking and many site owners resort to monetizing - their sites with advertising. Because building direct relationships with - advertisers and negotiating pricing contracts is a relatively difficult and - time-consuming process, this concern is largely handled by third-party - providers performing targeted advertising and real-time bidding. Widespread - negative public opinion, the popularity of ad blocking technology, and - regulatory action in major global markets such as Europe pose the largest - threat to the continued use of third-party providers for monetization. While - it’s unlikely that site owners suddenly strike their own advertising deals - or build bespoke ad networks, alternative monetization models like paywalls - and experiments like Brave’s - <a href="https://basicattentiontoken.org/">Basic Attention Token</a> have a - real chance of shaking up the third-party ad landscape of the future. - </p> - <h3 id="simplify-development">Simplify Development</h3> - <p> - Above all, third-party resources are used to simplify the web development - experience. Even previous usage patterns could arguably fall into this - pattern as well. Whether analyzing user behavior, communicating with - advertisers, or personalizing the user experience, third-party resources are - used to make first-party development easier. - </p> - <p> - Hosting providers are the most extreme example of this pattern. Some of - these providers even enable anyone on Earth to become a site owner with no - technical expertise necessary. They provide hosting of assets, tools to - build sites without coding experience, and domain registration services. - </p> - <p> - The remainder of third-party providers also tend to fall into this usage - pattern. Whether it’s hosting of a utility library such as jQuery for usage - by front-end developers cached on Cloudflare’s edge servers or a vast - library of common fonts served from a popular Google CDN, third-party - content is another way to give the site owner one fewer thing to worry about - and, maybe, just maybe make the job of delivering a great experience a - little bit easier. - </p> - <h2 id="repercussions">Repercussions</h2> - <h3 id="performance">Performance</h3> - <p> - The performance impact of third-party content is neither categorically good - nor bad. There are good and bad actors across the spectrum and different - category types have varying levels of influence. - </p> - <p> - The good: shared third-party font and stylesheet utilities are, on average, - delivered more efficiently than their first-party counterparts. - </p> - <p> - Utilities, CDNs, and Content categories are the brightest spots on the - third-party performance landscape. They offer optimized versions of the same - sort of content that would otherwise be served from first-party sources. - Google Fonts and Typekit serve optimized fonts that are smaller on average - than first-party fonts, Cloudflare CDN serves a minified version of open - source libraries that might be accidentally served in development mode by - some site owners, Google Maps SDK efficiently delivers complex maps that - might otherwise be naively shipped as large images. - </p> - <p> - The bad: a very small set of entities represent a very large chunk of - JavaScript execution time carrying out narrow set of functionality on pages. - </p> - <p> - Ads, social, hosting, and certain analytics providers represent the largest - negative impact on web performance. While hosting providers deliver a - majority of a site’s content and will understandably have a larger - performance impact than other third-party categories, they also serve almost - entirely static sites that demand very little JavaScript in most cases that - should not justify the volume of script execution time. The other categories - hurting performance though have even less of an excuse. They fill very - narrow roles on each page they appear on and yet quickly take over a - majority of resources. For example, the Facebook "Like" button and - associated social widgets take up extraordinarily little screen real estate - and are a fraction of most web experiences, and yet the median impact on - pages with social third parties is nearly 20% of their total JavaScript - execution time. The situation is similar for analytics - tracking libraries - do not directly contribute to the perceived user experience, and yet the - 90th percentile impact on pages with analytics third parties is 44% of their - total JavaScript execution time. - </p> - <p> - The silver lining of such a small number of entities enjoying such large - market share is that a very limited and concentrated effort can have an - enormous impact on the web as a whole. Performance improvements at just the - top few hosting providers can improve 2-3% of <em>all</em> web requests. - </p> - <h3 id="privacy">Privacy</h3> - <p> - The abundance of analytics providers and top-heavy concentration of script - execution raises two primary privacy concerns for site visitors: the largest - use case of third-parties is for site owners to track their users and a - handful of companies receive information on a large swath of web traffic. - </p> - <p> - The interest of site owners in understanding and analyzing user behavior is - not malicious on its own, but the widespread and relatively - behind-the-scenes nature of web analytics raises valid concerns, and users, - companies, and lawmakers have taken notice in recent years with privacy - regulation such as - <a href="https://en.wikipedia.org/wiki/General_Data_Protection_Regulation" - >GDPR</a - > - in Europe and the - <a href="https://en.wikipedia.org/wiki/California_Consumer_Privacy_Act" - >CCPA</a - > - in California. Ensuring that developers handle user data responsibly, treat - the user respectfully, and are transparent with what data is collected is - key to keeping analytics the most popular third-party category and - maintaining the symbiotic nature of analyzing user behavior to deliver - future user value. - </p> - <p> - The top-heavy concentration of script execution is great for the potential - impact of performance improvements, but less exciting for the privacy - ramifications. 29% of <em>all</em> script execution time across the web is - just from scripts on domains owned by Google or Facebook. That’s a very - large percentage of CPU time that is controlled by just two entities. It’s - critical to ensure that the same privacy protections held to analytics - providers be applied in these other ad, social, and developer utility - categories as well. - </p> - <h2 id="caveats">Caveats</h2> - <ul> - <li> - All data presented here is based on a non-interactive, cold load. These - values could start to look quite different after user interaction. - </li> - <li> - Third-party content served from a first-party domain is counted as - first-party content. i.e. self-hosting Google Fonts or bootstrap.css will - be considered first-party content. - </li> - <li> - First-party content served from a third-party domain is counted as - third-party content. i.e. first-party images served over a third-party CDN - will be considered third-party content. - </li> - <li> - Roughly 84% of all third-party domains by request volume have been - identified and categorized. The remaining 16% fall into the “Other” - category. - </li> - </ul> -</section> {% endblock %}