From 72a3cacc95339e1d15a09042d6acfc6da18cf413 Mon Sep 17 00:00:00 2001 From: Timothy Gu Date: Tue, 14 Mar 2017 23:41:57 -0700 Subject: [PATCH] src: remove explicit UTF-8 validity check in url This step was never part of the URL Standard's host parser algorithm, and is rendered unnecessary after IDNA errors are no longer ignored. PR-URL: https://github.com/nodejs/node/pull/12507 Refs: c2a302c50b3787666339371 "src: do not ignore IDNA conversion error" Reviewed-By: James M Snell --- src/node_url.cc | 30 ------------------------------ 1 file changed, 30 deletions(-) diff --git a/src/node_url.cc b/src/node_url.cc index d9213738e7f894..6cd78c2c6c04c8 100644 --- a/src/node_url.cc +++ b/src/node_url.cc @@ -15,11 +15,6 @@ #include #include -#if defined(NODE_HAVE_I18N_SUPPORT) -#include -#include -#endif - #define UNICODE_REPLACEMENT_CHARACTER 0xFFFD namespace node { @@ -74,21 +69,6 @@ namespace url { output->assign(*buf, buf.length()); return true; } - - // Unfortunately there's not really a better way to do this. - // Iterate through each encoded codepoint and verify that - // it is a valid unicode codepoint. - static bool IsValidUTF8(std::string* input) { - const char* p = input->c_str(); - int32_t len = input->length(); - for (int32_t i = 0; i < len;) { - UChar32 c; - U8_NEXT_UNSAFE(p, i, c); - if (!U_IS_UNICODE_CHAR(c)) - return false; - } - return true; - } #else // Intentional non-ops if ICU is not present. static inline bool ToUnicode(std::string* input, std::string* output) { @@ -100,10 +80,6 @@ namespace url { *output = *input; return true; } - - static bool IsValidUTF8(std::string* input) { - return true; - } #endif // If a UTF-16 character is a low/trailing surrogate. @@ -355,12 +331,6 @@ namespace url { // First, we have to percent decode PercentDecode(input, length, &decoded); - // If there are any invalid UTF8 byte sequences, we have to fail. - // Unfortunately this means iterating through the string and checking - // each decoded codepoint. - if (!IsValidUTF8(&decoded)) - goto end; - // Then we have to punycode toASCII if (!ToASCII(&decoded, &decoded)) goto end;