From 9b18df9dcbfcc9307812427619c8f3a9c6f34f31 Mon Sep 17 00:00:00 2001 From: Ali Hassan <24819103+thisalihassan@users.noreply.github.com> Date: Sat, 13 Apr 2024 17:57:12 +0500 Subject: [PATCH] url: implement parse method for safer URL parsing Implement the static parse method as per the WHATWG URL specification. Unlike the URL constructor, URL.parse does not throw on invalid input, instead returning null. This behavior allows safer parsing of URLs without the need for try-catch blocks around constructor calls. The implementation follows the steps outlined in the WHATWG URL standard, ensuring compatibility and consistency with web platform URL parsing APIs. Fixes: https://github.com/nodejs/node/issues/52208 Refs: https://github.com/whatwg/url/pull/825 PR-URL: https://github.com/nodejs/node/pull/52280 Reviewed-By: Yagiz Nizipli Reviewed-By: Matteo Collina Reviewed-By: Daniel Lemire Reviewed-By: Benjamin Gruenbaum --- lib/internal/url.js | 24 ++- src/node_url.cc | 11 +- test/fixtures/wpt/README.md | 2 +- .../wpt/url/resources/urltestdata.json | 185 ++++++++++++++++++ .../fixtures/wpt/url/url-statics-parse.any.js | 50 +++++ test/fixtures/wpt/versions.json | 2 +- 6 files changed, 268 insertions(+), 6 deletions(-) create mode 100644 test/fixtures/wpt/url/url-statics-parse.any.js diff --git a/lib/internal/url.js b/lib/internal/url.js index 0e69ff52b5edef..39d79392bd86bc 100644 --- a/lib/internal/url.js +++ b/lib/internal/url.js @@ -769,6 +769,14 @@ function isURL(self) { return Boolean(self?.href && self.protocol && self.auth === undefined && self.path === undefined); } +/** + * A unique symbol used as a private identifier to safely invoke the URL constructor + * with a special parsing behavior. When passed as the third argument to the URL + * constructor, it signals that the constructor should not throw an exception + * for invalid URL inputs. + */ +const kParseURLSymbol = Symbol('kParseURL'); + class URL { #context = new URLContext(); #searchParams; @@ -787,7 +795,7 @@ class URL { }; } - constructor(input, base = undefined) { + constructor(input, base = undefined, parseSymbol = undefined) { markTransferMode(this, false, false); if (arguments.length === 0) { @@ -801,7 +809,19 @@ class URL { base = `${base}`; } - this.#updateContext(bindingUrl.parse(input, base)); + const raiseException = parseSymbol !== kParseURLSymbol; + const href = bindingUrl.parse(input, base, raiseException); + if (href) { + this.#updateContext(href); + } + } + + static parse(input, base = undefined) { + if (arguments.length === 0) { + throw new ERR_MISSING_ARGS('url'); + } + const parsedURLObject = new URL(input, base, kParseURLSymbol); + return parsedURLObject.href ? parsedURLObject : null; } [inspect.custom](depth, opts) { diff --git a/src/node_url.cc b/src/node_url.cc index 95d15c78407359..74b639c23084b5 100644 --- a/src/node_url.cc +++ b/src/node_url.cc @@ -233,6 +233,9 @@ void BindingData::Parse(const FunctionCallbackInfo& args) { CHECK_GE(args.Length(), 1); CHECK(args[0]->IsString()); // input // args[1] // base url + // args[2] // raise Exception + + const bool raise_exception = args.Length() > 2 && args[2]->IsTrue(); Realm* realm = Realm::GetCurrent(args); BindingData* binding_data = realm->GetBindingData(); @@ -245,16 +248,20 @@ void BindingData::Parse(const FunctionCallbackInfo& args) { if (args[1]->IsString()) { base_ = Utf8Value(isolate, args[1]).ToString(); base = ada::parse(*base_); - if (!base) { + if (!base && raise_exception) { return ThrowInvalidURL(realm->env(), input.ToStringView(), base_); + } else if (!base) { + return; } base_pointer = &base.value(); } auto out = ada::parse(input.ToStringView(), base_pointer); - if (!out) { + if (!out && raise_exception) { return ThrowInvalidURL(realm->env(), input.ToStringView(), base_); + } else if (!out) { + return; } binding_data->UpdateComponents(out->get_components(), out->type); diff --git a/test/fixtures/wpt/README.md b/test/fixtures/wpt/README.md index a4ca98dc7ffa4f..3a2bb782480ef6 100644 --- a/test/fixtures/wpt/README.md +++ b/test/fixtures/wpt/README.md @@ -28,7 +28,7 @@ Last update: - resource-timing: https://github.com/web-platform-tests/wpt/tree/22d38586d0/resource-timing - resources: https://github.com/web-platform-tests/wpt/tree/1e140d63ec/resources - streams: https://github.com/web-platform-tests/wpt/tree/3df6d94318/streams -- url: https://github.com/web-platform-tests/wpt/tree/c2d7e70b52/url +- url: https://github.com/web-platform-tests/wpt/tree/0f550ab9f5/url - user-timing: https://github.com/web-platform-tests/wpt/tree/5ae85bf826/user-timing - wasm/jsapi: https://github.com/web-platform-tests/wpt/tree/cde25e7e3c/wasm/jsapi - wasm/webapi: https://github.com/web-platform-tests/wpt/tree/fd1b23eeaa/wasm/webapi diff --git a/test/fixtures/wpt/url/resources/urltestdata.json b/test/fixtures/wpt/url/resources/urltestdata.json index 287a84b467a48b..9f1be0449c63d3 100644 --- a/test/fixtures/wpt/url/resources/urltestdata.json +++ b/test/fixtures/wpt/url/resources/urltestdata.json @@ -734,6 +734,36 @@ "search": "", "hash": "" }, + { + "input": "http://a:b@c\\", + "base": null, + "href": "http://a:b@c/", + "origin": "http://c", + "protocol": "http:", + "username": "a", + "password": "b", + "host": "c", + "hostname": "c", + "port": "", + "pathname": "/", + "search": "", + "hash": "" + }, + { + "input": "ws://a@b\\c", + "base": null, + "href": "ws://a@b/c", + "origin": "ws://b", + "protocol": "ws:", + "username": "a", + "password": "", + "host": "b", + "hostname": "b", + "port": "", + "pathname": "/c", + "search": "", + "hash": "" + }, { "input": "foo:/", "base": "http://example.org/foo/bar", @@ -9529,5 +9559,160 @@ "pathname": "", "search": "", "hash": "" + }, + "Scheme relative path starting with multiple slashes", + { + "input": "///test", + "base": "http://example.org/", + "href": "http://test/", + "protocol": "http:", + "username": "", + "password": "", + "host": "test", + "hostname": "test", + "port": "", + "pathname": "/", + "search": "", + "hash": "" + }, + { + "input": "///\\//\\//test", + "base": "http://example.org/", + "href": "http://test/", + "protocol": "http:", + "username": "", + "password": "", + "host": "test", + "hostname": "test", + "port": "", + "pathname": "/", + "search": "", + "hash": "" + }, + { + "input": "///example.org/path", + "base": "http://example.org/", + "href": "http://example.org/path", + "protocol": "http:", + "username": "", + "password": "", + "host": "example.org", + "hostname": "example.org", + "port": "", + "pathname": "/path", + "search": "", + "hash": "" + }, + { + "input": "///example.org/../path", + "base": "http://example.org/", + "href": "http://example.org/path", + "protocol": "http:", + "username": "", + "password": "", + "host": "example.org", + "hostname": "example.org", + "port": "", + "pathname": "/path", + "search": "", + "hash": "" + }, + { + "input": "///example.org/../../", + "base": "http://example.org/", + "href": "http://example.org/", + "protocol": "http:", + "username": "", + "password": "", + "host": "example.org", + "hostname": "example.org", + "port": "", + "pathname": "/", + "search": "", + "hash": "" + }, + { + "input": "///example.org/../path/../../", + "base": "http://example.org/", + "href": "http://example.org/", + "protocol": "http:", + "username": "", + "password": "", + "host": "example.org", + "hostname": "example.org", + "port": "", + "pathname": "/", + "search": "", + "hash": "" + }, + { + "input": "///example.org/../path/../../path", + "base": "http://example.org/", + "href": "http://example.org/path", + "protocol": "http:", + "username": "", + "password": "", + "host": "example.org", + "hostname": "example.org", + "port": "", + "pathname": "/path", + "search": "", + "hash": "" + }, + { + "input": "/\\/\\//example.org/../path", + "base": "http://example.org/", + "href": "http://example.org/path", + "protocol": "http:", + "username": "", + "password": "", + "host": "example.org", + "hostname": "example.org", + "port": "", + "pathname": "/path", + "search": "", + "hash": "" + }, + { + "input": "///abcdef/../", + "base": "file:///", + "href": "file:///", + "protocol": "file:", + "username": "", + "password": "", + "host": "", + "hostname": "", + "port": "", + "pathname": "/", + "search": "", + "hash": "" + }, + { + "input": "/\\//\\/a/../", + "base": "file:///", + "href": "file://////", + "protocol": "file:", + "username": "", + "password": "", + "host": "", + "hostname": "", + "port": "", + "pathname": "////", + "search": "", + "hash": "" + }, + { + "input": "//a/../", + "base": "file:///", + "href": "file://a/", + "protocol": "file:", + "username": "", + "password": "", + "host": "a", + "hostname": "a", + "port": "", + "pathname": "/", + "search": "", + "hash": "" } ] diff --git a/test/fixtures/wpt/url/url-statics-parse.any.js b/test/fixtures/wpt/url/url-statics-parse.any.js new file mode 100644 index 00000000000000..0822e9da07af6a --- /dev/null +++ b/test/fixtures/wpt/url/url-statics-parse.any.js @@ -0,0 +1,50 @@ +// This intentionally does not use resources/urltestdata.json to preserve resources. +[ + { + "url": undefined, + "base": undefined, + "expected": false + }, + { + "url": "aaa:b", + "base": undefined, + "expected": true + }, + { + "url": undefined, + "base": "aaa:b", + "expected": false + }, + { + "url": "aaa:/b", + "base": undefined, + "expected": true + }, + { + "url": undefined, + "base": "aaa:/b", + "expected": true + }, + { + "url": "https://test:test", + "base": undefined, + "expected": false + }, + { + "url": "a", + "base": "https://b/", + "expected": true + } +].forEach(({ url, base, expected }) => { + test(() => { + if (expected == false) { + assert_equals(URL.parse(url, base), null); + } else { + assert_equals(URL.parse(url, base).href, new URL(url, base).href); + } + }, `URL.parse(${url}, ${base})`); +}); + +test(() => { + assert_not_equals(URL.parse("https://example/"), URL.parse("https://example/")); +}, `URL.parse() should return a unique object`); diff --git a/test/fixtures/wpt/versions.json b/test/fixtures/wpt/versions.json index a7c655125b5c22..5f65dd1a829c58 100644 --- a/test/fixtures/wpt/versions.json +++ b/test/fixtures/wpt/versions.json @@ -72,7 +72,7 @@ "path": "streams" }, "url": { - "commit": "c2d7e70b52cbd9a5b938aa32f37078d7a71e0b21", + "commit": "0f550ab9f5a07ed293926a306e914866164b346b", "path": "url" }, "user-timing": {