envoyproxy · yanavlasov · Jan 27, 2021 · Jan 6, 2021 · Jan 6, 2021 · Jan 6, 2021
diff --git a/bazel/external/googleurl.patch b/bazel/external/googleurl.patch
@@ -0,0 +1,158 @@
+# TODO(dio): Consider to remove compiler specific part of this patch when we solely compile the
+# project using clang-cl. Tracked in https://github.com/envoyproxy/envoy/issues/11974.
+
+diff --git a/base/compiler_specific.h b/base/compiler_specific.h
+index 0cd36dc..8c4cbd4 100644
+--- a/base/compiler_specific.h
++++ b/base/compiler_specific.h
+@@ -7,10 +7,6 @@
+
+ #include "build/build_config.h"
+
+-#if defined(COMPILER_MSVC) && !defined(__clang__)
+-#error "Only clang-cl is supported on Windows, see https://crbug.com/988071"
+-#endif
+-
+ // Annotate a variable indicating it's ok if the variable is not used.
+ // (Typically used to silence a compiler warning when the assignment
+ // is important for some other reason.)
+@@ -55,8 +51,12 @@
+ // prevent code folding, see gurl_base::debug::Alias.
+ // Use like:
+ //   void NOT_TAIL_CALLED FooBar();
+-#if defined(__clang__) && __has_attribute(not_tail_called)
++#if defined(__clang__)
++#if defined(__has_attribute)
++#if __has_attribute(not_tail_called)
+ #define NOT_TAIL_CALLED __attribute__((not_tail_called))
++#endif
++#endif
+ #else
+ #define NOT_TAIL_CALLED
+ #endif
+@@ -226,7 +226,9 @@
+ #endif
+ #endif
+
+-#if defined(__clang__) && __has_attribute(uninitialized)
++#if defined(__clang__)
++#if defined(__has_attribute)
++#if __has_attribute(uninitialized)
+ // Attribute "uninitialized" disables -ftrivial-auto-var-init=pattern for
+ // the specified variable.
+ // Library-wide alternative is
+@@ -257,6 +259,8 @@
+ // E.g. platform, bot, benchmark or test name in patch description or next to
+ // the attribute.
+ #define STACK_UNINITIALIZED __attribute__((uninitialized))
++#endif
++#endif
+ #else
+ #define STACK_UNINITIALIZED
+ #endif
+
+diff --git a/url/BUILD b/url/BUILD
+index f2ec8da..714f90e 100644
+--- a/url/BUILD
++++ b/url/BUILD
+@@ -7,7 +7,6 @@ load("@rules_cc//cc:defs.bzl", "cc_library")
+ cc_library(
+     name = "url",
+     srcs = [
+-        "gurl.cc",
+         "third_party/mozilla/url_parse.cc",
+         "url_canon.cc",
+         "url_canon_etc.cc",
+@@ -26,17 +25,14 @@ cc_library(
+         "url_canon_stdstring.cc",
+         "url_canon_stdurl.cc",
+         "url_constants.cc",
+-        "url_idna_icu.cc",
+         "url_parse_file.cc",
+         "url_parse_internal.h",
+         "url_util.cc",
+         "url_util_internal.h",
+     ],
+     hdrs = [
+-        "gurl.h",
+         "third_party/mozilla/url_parse.h",
+         "url_canon.h",
+-        "url_canon_icu.h",
+         "url_canon_ip.h",
+         "url_canon_stdstring.h",
+         "url_constants.h",
+@@ -44,11 +40,10 @@ cc_library(
+         "url_util.h",
+     ],
+     copts = build_config.default_copts,
+-    linkopts = build_config.url_linkopts,
+     visibility = ["//visibility:public"],
+     deps = [
+         "//base",
+         "//base/strings",
+         "//polyfills",
+-    ] + build_config.icuuc_deps,
++    ]
+ )
+diff --git a/url/url_canon_host.cc b/url/url_canon_host.cc
+index 28a7c38..dd18acf 100644
+--- a/url/url_canon_host.cc
++++ b/url/url_canon_host.cc
+@@ -175,55 +175,7 @@ bool DoSimpleHost(const INCHAR* host,
+
+ // Canonicalizes a host that requires IDN conversion. Returns true on success
+ bool DoIDNHost(const gurl_base::char16* src, int src_len, CanonOutput* output) {
+-  int original_output_len = output->length();  // So we can rewind below.
+-
+-  // We need to escape URL before doing IDN conversion, since punicode strings
+-  // cannot be escaped after they are created.
+-  RawCanonOutputW<kTempHostBufferLen> url_escaped_host;
+-  bool has_non_ascii;
+-  DoSimpleHost(src, src_len, &url_escaped_host, &has_non_ascii);
+-  if (url_escaped_host.length() > kMaxHostBufferLength) {
+-    AppendInvalidNarrowString(src, 0, src_len, output);
+-    return false;
+-  }
+-
+-  StackBufferW wide_output;
+-  if (!IDNToASCII(url_escaped_host.data(),
+-                  url_escaped_host.length(),
+-                  &wide_output)) {
+-    // Some error, give up. This will write some reasonable looking
+-    // representation of the string to the output.
+-    AppendInvalidNarrowString(src, 0, src_len, output);
+-    return false;
+-  }
+-
+-  // Now we check the ASCII output like a normal host. It will also handle
+-  // unescaping. Although we unescaped everything before this function call, if
+-  // somebody does %00 as fullwidth, ICU will convert this to ASCII.
+-  bool success = DoSimpleHost(wide_output.data(),
+-                              wide_output.length(),
+-                              output, &has_non_ascii);
+-  if (has_non_ascii) {
+-    // ICU generated something that DoSimpleHost didn't think looked like
+-    // ASCII. This is quite rare, but ICU might convert some characters to
+-    // percent signs which might generate new escape sequences which might in
+-    // turn be invalid. An example is U+FE6A "small percent" which ICU will
+-    // name prep into an ASCII percent and then we can interpret the following
+-    // characters as escaped characters.
+-    //
+-    // If DoSimpleHost didn't think the output was ASCII, just escape the
+-    // thing we gave ICU and give up. DoSimpleHost will have handled a further
+-    // level of escaping from ICU for simple ASCII cases (i.e. if ICU generates
+-    // a new escaped ASCII sequence like "%41" we'll unescape it) but it won't
+-    // do more (like handle escaped non-ASCII sequences). Handling the escaped
+-    // ASCII isn't strictly necessary, but DoSimpleHost handles this case
+-    // anyway so we handle it/
+-    output->set_length(original_output_len);
+-    AppendInvalidNarrowString(wide_output.data(), 0, wide_output.length(),
+-                              output);
+-    return false;
+-  }
+-  return success;
++  return false;
+ }
+
+ // 8-bit convert host to its ASCII version: this converts the UTF-8 input to
+
diff --git a/bazel/repositories.bzl b/bazel/repositories.bzl
@@ -691,6 +691,8 @@ def _com_googlesource_quiche():
 def _com_googlesource_googleurl():
     external_http_archive(
         name = "com_googlesource_googleurl",
+        patches = ["@envoy//bazel/external:googleurl.patch"],
+        patch_args = ["-p1"],
     )
     native.bind(
         name = "googleurl",

diff --git a/bazel/repository_locations.bzl b/bazel/repository_locations.bzl
@@ -696,7 +696,7 @@ REPOSITORY_LOCATIONS_SPEC = dict(
         version = "ef0d23689e240e6c8de4c3a5296b209128c87373",
         sha256 = "d769283fed1319bca68bae8bdd47fbc3a7933999329eee850eff1f1ea61ce176",
         urls = ["https://storage.googleapis.com/quiche-envoy-integration/googleurl_{version}.tar.gz"],
-        use_category = ["dataplane_ext"],
+        use_category = ["controlplane", "dataplane_core"],
         extensions = [],
         release_date = "2020-07-30",
         cpe = "N/A",

diff --git a/source/common/http/BUILD b/source/common/http/BUILD
@@ -429,13 +429,23 @@ envoy_cc_library(
 
 envoy_cc_library(
     name = "path_utility_lib",
-    srcs = ["path_utility.cc"],
-    hdrs = ["path_utility.h"],
-    external_deps = ["abseil_optional"],
+    srcs = [
+        "deprecated_path_canonicalizer.cc",
+        "path_utility.cc",
+    ],
+    hdrs = [
+        "deprecated_path_canonicalizer.h",
+        "path_utility.h",
+    ],
+    external_deps = [
+        "abseil_optional",
+        "googleurl",
+    ],
     deps = [
         "//include/envoy/http:header_map_interface",
         "//source/common/chromium_url",
         "//source/common/common:logger_lib",
+        "//source/common/runtime:runtime_features_lib",
     ],
 )
 

diff --git a/source/common/http/deprecated_path_canonicalizer.cc b/source/common/http/deprecated_path_canonicalizer.cc
@@ -0,0 +1,25 @@
+#include "common/http/deprecated_path_canonicalizer.h"
+
+#include "common/chromium_url/url_canon.h"
+#include "common/chromium_url/url_canon_stdstring.h"
+
+namespace Envoy {
+namespace Http {
+
+absl::optional<std::string>
+DeprecatedPathCanonicalizer::canonicalizePath(absl::string_view original_path) {
+  std::string canonical_path;
+  chromium_url::Component in_component(0, original_path.size());
+  chromium_url::Component out_component;
+  chromium_url::StdStringCanonOutput output(&canonical_path);
+  if (!chromium_url::CanonicalizePath(original_path.data(), in_component, &output,
+                                      &out_component)) {
+    return absl::nullopt;
+  } else {
+    output.Complete();
+    return absl::make_optional(std::move(canonical_path));
+  }
+}
+
+} // namespace Http
+} // namespace Envoy
diff --git a/source/common/http/deprecated_path_canonicalizer.h b/source/common/http/deprecated_path_canonicalizer.h
@@ -0,0 +1,21 @@
+#pragma once
+
+#include <string>
+
+#include "absl/strings/string_view.h"
+#include "absl/types/optional.h"
+
+namespace Envoy {
+namespace Http {
+
+/**
+ * Path canonicalizer based on //source/common/chromium_url.
+ */
+class DeprecatedPathCanonicalizer {
+public:
+  // Returns the canonicalized path if successful.
+  static absl::optional<std::string> canonicalizePath(absl::string_view original_path);
+};
+
+} // namespace Http
+} // namespace Envoy
diff --git a/source/common/http/path_utility.cc b/source/common/http/path_utility.cc
@@ -1,24 +1,28 @@
 #include "common/http/path_utility.h"
 
-#include "common/chromium_url/url_canon.h"
-#include "common/chromium_url/url_canon_stdstring.h"
 #include "common/common/logger.h"
+#include "common/http/deprecated_path_canonicalizer.h"
+#include "common/runtime/runtime_features.h"
 
 #include "absl/strings/str_join.h"
 #include "absl/strings/str_split.h"
 #include "absl/types/optional.h"
+#include "url/url_canon.h"
+#include "url/url_canon_stdstring.h"
 
 namespace Envoy {
 namespace Http {
 
 namespace {
 absl::optional<std::string> canonicalizePath(absl::string_view original_path) {
+  if (Runtime::runtimeFeatureEnabled("envoy.deprecated_features.use_forked_chromium_url")) {
+    return DeprecatedPathCanonicalizer::canonicalizePath(original_path);
+  }
   std::string canonical_path;
-  chromium_url::Component in_component(0, original_path.size());
-  chromium_url::Component out_component;
-  chromium_url::StdStringCanonOutput output(&canonical_path);
-  if (!chromium_url::CanonicalizePath(original_path.data(), in_component, &output,
-                                      &out_component)) {
+  url::Component in_component(0, original_path.size());
+  url::Component out_component;
+  url::StdStringCanonOutput output(&canonical_path);
+  if (!url::CanonicalizePath(original_path.data(), in_component, &output, &out_component)) {
     return absl::nullopt;
   } else {
     output.Complete();

diff --git a/source/common/runtime/runtime_features.cc b/source/common/runtime/runtime_features.cc
@@ -56,6 +56,7 @@ constexpr const char* runtime_features[] = {
     "envoy.reloadable_features.test_feature_true",
     // Begin alphabetically sorted section.
     "envoy.deprecated_features.allow_deprecated_extension_names",
+    "envoy.deprecated_features.use_forked_chromium_url",
     "envoy.reloadable_features.always_apply_route_header_rules",
     "envoy.reloadable_features.activate_timers_next_event_loop",
     "envoy.reloadable_features.allow_500_after_100",

diff --git a/test/common/http/BUILD b/test/common/http/BUILD
@@ -377,6 +377,7 @@ envoy_cc_test(
     deps = [
         "//source/common/http:header_map_lib",
         "//source/common/http:path_utility_lib",
+        "//test/test_common:test_runtime_lib",
     ],
 )
 

diff --git a/test/common/http/path_utility_test.cc b/test/common/http/path_utility_test.cc
@@ -3,6 +3,7 @@
 
 #include "common/http/path_utility.h"
 
+#include "test/test_common/test_runtime.h"
 #include "test/test_common/utility.h"
 
 #include "gtest/gtest.h"
@@ -26,8 +27,33 @@ class PathUtilityTest : public testing::Test {
   TestRequestHeaderMapImpl headers_;
 };
 
+// Already normalized path don't change using deprecated path canonicalizer.
+TEST_F(PathUtilityTest, DeprecatedAlreadyNormalPaths) {
+  const std::vector<std::string> normal_paths{"/xyz", "/x/y/z"};
+  for (const auto& path : normal_paths) {
+    auto& path_header = pathHeaderEntry(path);
+    const auto result = PathUtil::canonicalPath(headers_);
+    EXPECT_TRUE(result) << "original path: " << path;
+    EXPECT_EQ(path_header.value().getStringView(), absl::string_view(path));
+  }
+}
+
+// Invalid paths are rejected using deprecated path canonicalizer.
+TEST_F(PathUtilityTest, DeprecatedInvalidPaths) {
+  const std::vector<std::string> invalid_paths{"/xyz/.%00../abc", "/xyz/%00.%00./abc",
+                                               "/xyz/AAAAA%%0000/abc"};
+  for (const auto& path : invalid_paths) {
+    pathHeaderEntry(path);
+    EXPECT_FALSE(PathUtil::canonicalPath(headers_)) << "original path: " << path;
+  }
+}
+
 // Already normalized path don't change.
 TEST_F(PathUtilityTest, AlreadyNormalPaths) {
+  TestScopedRuntime scoped_runtime;
+  Runtime::LoaderSingleton::getExisting()->mergeValues(
+      {{"envoy.deprecated_features.use_forked_chromium_url", "false"}});
+
   const std::vector<std::string> normal_paths{"/xyz", "/x/y/z"};
   for (const auto& path : normal_paths) {
     auto& path_header = pathHeaderEntry(path);
@@ -39,6 +65,10 @@ TEST_F(PathUtilityTest, AlreadyNormalPaths) {
 
 // Invalid paths are rejected.
 TEST_F(PathUtilityTest, InvalidPaths) {
+  TestScopedRuntime scoped_runtime;
+  Runtime::LoaderSingleton::getExisting()->mergeValues(
+      {{"envoy.deprecated_features.use_forked_chromium_url", "false"}});
+
   const std::vector<std::string> invalid_paths{"/xyz/.%00../abc", "/xyz/%00.%00./abc",
                                                "/xyz/AAAAA%%0000/abc"};
   for (const auto& path : invalid_paths) {