diff --git a/bazel/external/googleurl.patch b/bazel/external/googleurl.patch new file mode 100644 index 000000000000..cb5292f4ae80 --- /dev/null +++ b/bazel/external/googleurl.patch @@ -0,0 +1,162 @@ +# TODO(dio): Consider to remove compiler specific part of this patch when we solely compile the +# project using clang-cl. Tracked in https://github.com/envoyproxy/envoy/issues/11974. + +diff --git a/base/compiler_specific.h b/base/compiler_specific.h +index 0cd36dc..8c4cbd4 100644 +--- a/base/compiler_specific.h ++++ b/base/compiler_specific.h +@@ -7,10 +7,6 @@ + + #include "build/build_config.h" + +-#if defined(COMPILER_MSVC) && !defined(__clang__) +-#error "Only clang-cl is supported on Windows, see https://crbug.com/988071" +-#endif +- + // Annotate a variable indicating it's ok if the variable is not used. + // (Typically used to silence a compiler warning when the assignment + // is important for some other reason.) +@@ -55,8 +51,12 @@ + // prevent code folding, see gurl_base::debug::Alias. + // Use like: + // void NOT_TAIL_CALLED FooBar(); +-#if defined(__clang__) && __has_attribute(not_tail_called) ++#if defined(__clang__) ++#if defined(__has_attribute) ++#if __has_attribute(not_tail_called) + #define NOT_TAIL_CALLED __attribute__((not_tail_called)) ++#endif ++#endif + #else + #define NOT_TAIL_CALLED + #endif +@@ -226,7 +226,9 @@ + #endif + #endif + +-#if defined(__clang__) && __has_attribute(uninitialized) ++#if defined(__clang__) ++#if defined(__has_attribute) ++#if __has_attribute(uninitialized) + // Attribute "uninitialized" disables -ftrivial-auto-var-init=pattern for + // the specified variable. + // Library-wide alternative is +@@ -257,6 +259,8 @@ + // E.g. platform, bot, benchmark or test name in patch description or next to + // the attribute. + #define STACK_UNINITIALIZED __attribute__((uninitialized)) ++#endif ++#endif + #else + #define STACK_UNINITIALIZED + #endif + +# TODO(dio): Consider to remove the following patch when we have IDN-free optional build for URL +# library from the upstream Chromium project. This is tracked in: +# https://github.com/envoyproxy/envoy/issues/14743. + +diff --git a/url/BUILD b/url/BUILD +index f2ec8da..714f90e 100644 +--- a/url/BUILD ++++ b/url/BUILD +@@ -7,7 +7,6 @@ load("@rules_cc//cc:defs.bzl", "cc_library") + cc_library( + name = "url", + srcs = [ +- "gurl.cc", + "third_party/mozilla/url_parse.cc", + "url_canon.cc", + "url_canon_etc.cc", +@@ -26,17 +25,14 @@ cc_library( + "url_canon_stdstring.cc", + "url_canon_stdurl.cc", + "url_constants.cc", +- "url_idna_icu.cc", + "url_parse_file.cc", + "url_parse_internal.h", + "url_util.cc", + "url_util_internal.h", + ], + hdrs = [ +- "gurl.h", + "third_party/mozilla/url_parse.h", + "url_canon.h", +- "url_canon_icu.h", + "url_canon_ip.h", + "url_canon_stdstring.h", + "url_constants.h", +@@ -44,11 +40,10 @@ cc_library( + "url_util.h", + ], + copts = build_config.default_copts, +- linkopts = build_config.url_linkopts, + visibility = ["//visibility:public"], + deps = [ + "//base", + "//base/strings", + "//polyfills", +- ] + build_config.icuuc_deps, ++ ] + ) +diff --git a/url/url_canon_host.cc b/url/url_canon_host.cc +index 28a7c38..dd18acf 100644 +--- a/url/url_canon_host.cc ++++ b/url/url_canon_host.cc +@@ -175,55 +175,7 @@ bool DoSimpleHost(const INCHAR* host, + + // Canonicalizes a host that requires IDN conversion. Returns true on success + bool DoIDNHost(const gurl_base::char16* src, int src_len, CanonOutput* output) { +- int original_output_len = output->length(); // So we can rewind below. +- +- // We need to escape URL before doing IDN conversion, since punicode strings +- // cannot be escaped after they are created. +- RawCanonOutputW url_escaped_host; +- bool has_non_ascii; +- DoSimpleHost(src, src_len, &url_escaped_host, &has_non_ascii); +- if (url_escaped_host.length() > kMaxHostBufferLength) { +- AppendInvalidNarrowString(src, 0, src_len, output); +- return false; +- } +- +- StackBufferW wide_output; +- if (!IDNToASCII(url_escaped_host.data(), +- url_escaped_host.length(), +- &wide_output)) { +- // Some error, give up. This will write some reasonable looking +- // representation of the string to the output. +- AppendInvalidNarrowString(src, 0, src_len, output); +- return false; +- } +- +- // Now we check the ASCII output like a normal host. It will also handle +- // unescaping. Although we unescaped everything before this function call, if +- // somebody does %00 as fullwidth, ICU will convert this to ASCII. +- bool success = DoSimpleHost(wide_output.data(), +- wide_output.length(), +- output, &has_non_ascii); +- if (has_non_ascii) { +- // ICU generated something that DoSimpleHost didn't think looked like +- // ASCII. This is quite rare, but ICU might convert some characters to +- // percent signs which might generate new escape sequences which might in +- // turn be invalid. An example is U+FE6A "small percent" which ICU will +- // name prep into an ASCII percent and then we can interpret the following +- // characters as escaped characters. +- // +- // If DoSimpleHost didn't think the output was ASCII, just escape the +- // thing we gave ICU and give up. DoSimpleHost will have handled a further +- // level of escaping from ICU for simple ASCII cases (i.e. if ICU generates +- // a new escaped ASCII sequence like "%41" we'll unescape it) but it won't +- // do more (like handle escaped non-ASCII sequences). Handling the escaped +- // ASCII isn't strictly necessary, but DoSimpleHost handles this case +- // anyway so we handle it/ +- output->set_length(original_output_len); +- AppendInvalidNarrowString(wide_output.data(), 0, wide_output.length(), +- output); +- return false; +- } +- return success; ++ return false; + } + + // 8-bit convert host to its ASCII version: this converts the UTF-8 input to + diff --git a/bazel/repositories.bzl b/bazel/repositories.bzl index e9e2beceb1f0..32c9eb4196cc 100644 --- a/bazel/repositories.bzl +++ b/bazel/repositories.bzl @@ -688,6 +688,8 @@ def _com_googlesource_quiche(): def _com_googlesource_googleurl(): external_http_archive( name = "com_googlesource_googleurl", + patches = ["@envoy//bazel/external:googleurl.patch"], + patch_args = ["-p1"], ) native.bind( name = "googleurl", diff --git a/bazel/repository_locations.bzl b/bazel/repository_locations.bzl index 8ce15bb5d239..51923dc70123 100644 --- a/bazel/repository_locations.bzl +++ b/bazel/repository_locations.bzl @@ -696,7 +696,7 @@ REPOSITORY_LOCATIONS_SPEC = dict( version = "ef0d23689e240e6c8de4c3a5296b209128c87373", sha256 = "d769283fed1319bca68bae8bdd47fbc3a7933999329eee850eff1f1ea61ce176", urls = ["https://storage.googleapis.com/quiche-envoy-integration/googleurl_{version}.tar.gz"], - use_category = ["dataplane_ext"], + use_category = ["controlplane", "dataplane_core"], extensions = [], release_date = "2020-07-30", cpe = "N/A", diff --git a/docs/root/version_history/current.rst b/docs/root/version_history/current.rst index e036a08636a4..7ff53ec03f37 100644 --- a/docs/root/version_history/current.rst +++ b/docs/root/version_history/current.rst @@ -14,6 +14,10 @@ Minor Behavior Changes ---------------------- *Changes that may cause incompatibilities for some users, but should not for most* +* http: allow to use path canonicalizer from `googleurl `_ + instead of `//source/common/chromium_url`. The new path canonicalizer is enabled by default. To + revert to the legacy path canonicalizer, enable the runtime flag + `envoy.reloadable_features.remove_forked_chromium_url`. * oauth filter: added the optional parameter :ref:`auth_scopes ` with default value of 'user' if not provided. Enables this value to be overridden in the Authorization request to the OAuth provider. * tcp: setting NODELAY in the base connection class. This should have no effect for TCP or HTTP proxying, but may improve throughput in other areas. This behavior can be temporarily reverted by setting `envoy.reloadable_features.always_nodelay` to false. * upstream: host weight changes now cause a full load balancer rebuild as opposed to happening diff --git a/source/common/http/BUILD b/source/common/http/BUILD index a1917b42809c..44cb4ef68b76 100644 --- a/source/common/http/BUILD +++ b/source/common/http/BUILD @@ -434,14 +434,25 @@ envoy_cc_library( name = "path_utility_lib", srcs = ["path_utility.cc"], hdrs = ["path_utility.h"], - external_deps = ["abseil_optional"], + external_deps = [ + "abseil_optional", + "googleurl", + ], deps = [ + ":legacy_path_canonicalizer", "//include/envoy/http:header_map_interface", - "//source/common/chromium_url", "//source/common/common:logger_lib", + "//source/common/runtime:runtime_features_lib", ], ) +envoy_cc_library( + name = "legacy_path_canonicalizer", + srcs = ["legacy_path_canonicalizer.cc"], + hdrs = ["legacy_path_canonicalizer.h"], + deps = ["//source/common/chromium_url"], +) + envoy_cc_library( name = "request_id_extension_lib", srcs = [ diff --git a/source/common/http/legacy_path_canonicalizer.cc b/source/common/http/legacy_path_canonicalizer.cc new file mode 100644 index 000000000000..920e81bef260 --- /dev/null +++ b/source/common/http/legacy_path_canonicalizer.cc @@ -0,0 +1,25 @@ +#include "common/http/legacy_path_canonicalizer.h" + +#include "common/chromium_url/url_canon.h" +#include "common/chromium_url/url_canon_stdstring.h" + +namespace Envoy { +namespace Http { + +absl::optional +LegacyPathCanonicalizer::canonicalizePath(absl::string_view original_path) { + std::string canonical_path; + chromium_url::Component in_component(0, original_path.size()); + chromium_url::Component out_component; + chromium_url::StdStringCanonOutput output(&canonical_path); + if (!chromium_url::CanonicalizePath(original_path.data(), in_component, &output, + &out_component)) { + return absl::nullopt; + } else { + output.Complete(); + return absl::make_optional(std::move(canonical_path)); + } +} + +} // namespace Http +} // namespace Envoy diff --git a/source/common/http/legacy_path_canonicalizer.h b/source/common/http/legacy_path_canonicalizer.h new file mode 100644 index 000000000000..9b0543309d86 --- /dev/null +++ b/source/common/http/legacy_path_canonicalizer.h @@ -0,0 +1,21 @@ +#pragma once + +#include + +#include "absl/strings/string_view.h" +#include "absl/types/optional.h" + +namespace Envoy { +namespace Http { + +/** + * Path canonicalizer based on //source/common/chromium_url. + */ +class LegacyPathCanonicalizer { +public: + // Returns the canonicalized path if successful. + static absl::optional canonicalizePath(absl::string_view original_path); +}; + +} // namespace Http +} // namespace Envoy diff --git a/source/common/http/path_utility.cc b/source/common/http/path_utility.cc index f12790b41103..988e82b9be82 100644 --- a/source/common/http/path_utility.cc +++ b/source/common/http/path_utility.cc @@ -1,29 +1,33 @@ #include "common/http/path_utility.h" -#include "common/chromium_url/url_canon.h" -#include "common/chromium_url/url_canon_stdstring.h" #include "common/common/logger.h" +#include "common/http/legacy_path_canonicalizer.h" +#include "common/runtime/runtime_features.h" #include "absl/strings/str_join.h" #include "absl/strings/str_split.h" #include "absl/types/optional.h" +#include "url/url_canon.h" +#include "url/url_canon_stdstring.h" namespace Envoy { namespace Http { namespace { absl::optional canonicalizePath(absl::string_view original_path) { - std::string canonical_path; - chromium_url::Component in_component(0, original_path.size()); - chromium_url::Component out_component; - chromium_url::StdStringCanonOutput output(&canonical_path); - if (!chromium_url::CanonicalizePath(original_path.data(), in_component, &output, - &out_component)) { - return absl::nullopt; - } else { - output.Complete(); - return absl::make_optional(std::move(canonical_path)); + if (Runtime::runtimeFeatureEnabled("envoy.reloadable_features.remove_forked_chromium_url")) { + std::string canonical_path; + url::Component in_component(0, original_path.size()); + url::Component out_component; + url::StdStringCanonOutput output(&canonical_path); + if (!url::CanonicalizePath(original_path.data(), in_component, &output, &out_component)) { + return absl::nullopt; + } else { + output.Complete(); + return absl::make_optional(std::move(canonical_path)); + } } + return LegacyPathCanonicalizer::canonicalizePath(original_path); } } // namespace diff --git a/source/common/runtime/runtime_features.cc b/source/common/runtime/runtime_features.cc index 116ed1b2fb2d..9c5ea2805f03 100644 --- a/source/common/runtime/runtime_features.cc +++ b/source/common/runtime/runtime_features.cc @@ -77,6 +77,7 @@ constexpr const char* runtime_features[] = { "envoy.reloadable_features.prefer_quic_kernel_bpf_packet_routing", "envoy.reloadable_features.preserve_query_string_in_path_redirects", "envoy.reloadable_features.preserve_upstream_date", + "envoy.reloadable_features.remove_forked_chromium_url", "envoy.reloadable_features.require_ocsp_response_for_must_staple_certs", "envoy.reloadable_features.stop_faking_paths", "envoy.reloadable_features.strict_1xx_and_204_response_headers", diff --git a/test/common/http/BUILD b/test/common/http/BUILD index 5528633aa800..36d60dcddfb8 100644 --- a/test/common/http/BUILD +++ b/test/common/http/BUILD @@ -371,13 +371,22 @@ envoy_cc_test( ], ) +PATH_UTILITY_TEST_DEPS = [ + "//source/common/http:header_map_lib", + "//source/common/http:path_utility_lib", +] + envoy_cc_test( name = "path_utility_test", srcs = ["path_utility_test.cc"], - deps = [ - "//source/common/http:header_map_lib", - "//source/common/http:path_utility_lib", - ], + deps = PATH_UTILITY_TEST_DEPS, +) + +envoy_cc_test( + name = "legacy_path_utility_test", + srcs = ["path_utility_test.cc"], + args = ["--runtime-feature-disable-for-tests=envoy.reloadable_features.remove_forked_chromium_url"], + deps = PATH_UTILITY_TEST_DEPS, ) envoy_cc_test(