-
Notifications
You must be signed in to change notification settings - Fork 4.9k
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
common/http: Remove chromium_url #14583
Changes from 12 commits
cb049e1
fdf1930
b7c9780
e87f793
3b7fa2a
2775392
2f750ee
3e5c57f
be0f0b8
6675cc6
f7d0931
d612982
77a50ab
adc14a9
afba654
d50439e
fb52add
07f962b
e4bd547
f205249
32046b6
c802db1
c636150
cfd25ef
aa0e901
5116978
2e90493
2b1322a
eedbe5e
b49d107
a2d9125
5b09a00
079ab7e
4271d7a
c4a7125
d245fb7
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,158 @@ | ||
# TODO(dio): Consider to remove compiler specific part of this patch when we solely compile the | ||
# project using clang-cl. Tracked in https://github.com/envoyproxy/envoy/issues/11974. | ||
|
||
diff --git a/base/compiler_specific.h b/base/compiler_specific.h | ||
index 0cd36dc..8c4cbd4 100644 | ||
--- a/base/compiler_specific.h | ||
+++ b/base/compiler_specific.h | ||
@@ -7,10 +7,6 @@ | ||
|
||
#include "build/build_config.h" | ||
|
||
-#if defined(COMPILER_MSVC) && !defined(__clang__) | ||
-#error "Only clang-cl is supported on Windows, see https://crbug.com/988071" | ||
-#endif | ||
- | ||
// Annotate a variable indicating it's ok if the variable is not used. | ||
// (Typically used to silence a compiler warning when the assignment | ||
// is important for some other reason.) | ||
@@ -55,8 +51,12 @@ | ||
// prevent code folding, see gurl_base::debug::Alias. | ||
// Use like: | ||
// void NOT_TAIL_CALLED FooBar(); | ||
-#if defined(__clang__) && __has_attribute(not_tail_called) | ||
+#if defined(__clang__) | ||
+#if defined(__has_attribute) | ||
+#if __has_attribute(not_tail_called) | ||
#define NOT_TAIL_CALLED __attribute__((not_tail_called)) | ||
+#endif | ||
+#endif | ||
#else | ||
#define NOT_TAIL_CALLED | ||
#endif | ||
@@ -226,7 +226,9 @@ | ||
#endif | ||
#endif | ||
|
||
-#if defined(__clang__) && __has_attribute(uninitialized) | ||
+#if defined(__clang__) | ||
+#if defined(__has_attribute) | ||
+#if __has_attribute(uninitialized) | ||
// Attribute "uninitialized" disables -ftrivial-auto-var-init=pattern for | ||
// the specified variable. | ||
// Library-wide alternative is | ||
@@ -257,6 +259,8 @@ | ||
// E.g. platform, bot, benchmark or test name in patch description or next to | ||
// the attribute. | ||
#define STACK_UNINITIALIZED __attribute__((uninitialized)) | ||
+#endif | ||
+#endif | ||
#else | ||
#define STACK_UNINITIALIZED | ||
#endif | ||
|
||
diff --git a/url/BUILD b/url/BUILD | ||
index f2ec8da..714f90e 100644 | ||
--- a/url/BUILD | ||
+++ b/url/BUILD | ||
@@ -7,7 +7,6 @@ load("@rules_cc//cc:defs.bzl", "cc_library") | ||
cc_library( | ||
name = "url", | ||
srcs = [ | ||
- "gurl.cc", | ||
"third_party/mozilla/url_parse.cc", | ||
"url_canon.cc", | ||
"url_canon_etc.cc", | ||
@@ -26,17 +25,14 @@ cc_library( | ||
"url_canon_stdstring.cc", | ||
"url_canon_stdurl.cc", | ||
"url_constants.cc", | ||
- "url_idna_icu.cc", | ||
"url_parse_file.cc", | ||
"url_parse_internal.h", | ||
"url_util.cc", | ||
"url_util_internal.h", | ||
], | ||
hdrs = [ | ||
- "gurl.h", | ||
"third_party/mozilla/url_parse.h", | ||
"url_canon.h", | ||
- "url_canon_icu.h", | ||
"url_canon_ip.h", | ||
"url_canon_stdstring.h", | ||
"url_constants.h", | ||
@@ -44,11 +40,10 @@ cc_library( | ||
"url_util.h", | ||
], | ||
copts = build_config.default_copts, | ||
- linkopts = build_config.url_linkopts, | ||
visibility = ["//visibility:public"], | ||
deps = [ | ||
"//base", | ||
"//base/strings", | ||
"//polyfills", | ||
- ] + build_config.icuuc_deps, | ||
+ ] | ||
) | ||
diff --git a/url/url_canon_host.cc b/url/url_canon_host.cc | ||
index 28a7c38..dd18acf 100644 | ||
--- a/url/url_canon_host.cc | ||
+++ b/url/url_canon_host.cc | ||
@@ -175,55 +175,7 @@ bool DoSimpleHost(const INCHAR* host, | ||
|
||
// Canonicalizes a host that requires IDN conversion. Returns true on success | ||
bool DoIDNHost(const gurl_base::char16* src, int src_len, CanonOutput* output) { | ||
- int original_output_len = output->length(); // So we can rewind below. | ||
- | ||
- // We need to escape URL before doing IDN conversion, since punicode strings | ||
- // cannot be escaped after they are created. | ||
- RawCanonOutputW<kTempHostBufferLen> url_escaped_host; | ||
- bool has_non_ascii; | ||
- DoSimpleHost(src, src_len, &url_escaped_host, &has_non_ascii); | ||
- if (url_escaped_host.length() > kMaxHostBufferLength) { | ||
- AppendInvalidNarrowString(src, 0, src_len, output); | ||
- return false; | ||
- } | ||
- | ||
- StackBufferW wide_output; | ||
- if (!IDNToASCII(url_escaped_host.data(), | ||
- url_escaped_host.length(), | ||
- &wide_output)) { | ||
- // Some error, give up. This will write some reasonable looking | ||
- // representation of the string to the output. | ||
- AppendInvalidNarrowString(src, 0, src_len, output); | ||
- return false; | ||
- } | ||
- | ||
- // Now we check the ASCII output like a normal host. It will also handle | ||
- // unescaping. Although we unescaped everything before this function call, if | ||
- // somebody does %00 as fullwidth, ICU will convert this to ASCII. | ||
- bool success = DoSimpleHost(wide_output.data(), | ||
- wide_output.length(), | ||
- output, &has_non_ascii); | ||
- if (has_non_ascii) { | ||
- // ICU generated something that DoSimpleHost didn't think looked like | ||
- // ASCII. This is quite rare, but ICU might convert some characters to | ||
- // percent signs which might generate new escape sequences which might in | ||
- // turn be invalid. An example is U+FE6A "small percent" which ICU will | ||
- // name prep into an ASCII percent and then we can interpret the following | ||
- // characters as escaped characters. | ||
- // | ||
- // If DoSimpleHost didn't think the output was ASCII, just escape the | ||
- // thing we gave ICU and give up. DoSimpleHost will have handled a further | ||
- // level of escaping from ICU for simple ASCII cases (i.e. if ICU generates | ||
- // a new escaped ASCII sequence like "%41" we'll unescape it) but it won't | ||
- // do more (like handle escaped non-ASCII sequences). Handling the escaped | ||
- // ASCII isn't strictly necessary, but DoSimpleHost handles this case | ||
- // anyway so we handle it/ | ||
- output->set_length(original_output_len); | ||
- AppendInvalidNarrowString(wide_output.data(), 0, wide_output.length(), | ||
- output); | ||
- return false; | ||
- } | ||
- return success; | ||
+ return false; | ||
} | ||
|
||
// 8-bit convert host to its ASCII version: this converts the UTF-8 input to | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,25 @@ | ||
#include "common/http/deprecated_path_canonicalizer.h" | ||
|
||
#include "common/chromium_url/url_canon.h" | ||
#include "common/chromium_url/url_canon_stdstring.h" | ||
|
||
namespace Envoy { | ||
namespace Http { | ||
|
||
absl::optional<std::string> | ||
DeprecatedPathCanonicalizer::canonicalizePath(absl::string_view original_path) { | ||
std::string canonical_path; | ||
chromium_url::Component in_component(0, original_path.size()); | ||
chromium_url::Component out_component; | ||
chromium_url::StdStringCanonOutput output(&canonical_path); | ||
if (!chromium_url::CanonicalizePath(original_path.data(), in_component, &output, | ||
&out_component)) { | ||
return absl::nullopt; | ||
} else { | ||
output.Complete(); | ||
return absl::make_optional(std::move(canonical_path)); | ||
} | ||
} | ||
|
||
} // namespace Http | ||
} // namespace Envoy |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,21 @@ | ||
#pragma once | ||
|
||
#include <string> | ||
|
||
#include "absl/strings/string_view.h" | ||
#include "absl/types/optional.h" | ||
|
||
namespace Envoy { | ||
namespace Http { | ||
|
||
/** | ||
* Path canonicalizer based on //source/common/chromium_url. | ||
*/ | ||
class DeprecatedPathCanonicalizer { | ||
public: | ||
// Returns the canonicalized path if successful. | ||
static absl::optional<std::string> canonicalizePath(absl::string_view original_path); | ||
}; | ||
|
||
} // namespace Http | ||
} // namespace Envoy |
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -3,6 +3,7 @@ | |
|
||
#include "common/http/path_utility.h" | ||
|
||
#include "test/test_common/test_runtime.h" | ||
#include "test/test_common/utility.h" | ||
|
||
#include "gtest/gtest.h" | ||
|
@@ -26,8 +27,33 @@ class PathUtilityTest : public testing::Test { | |
TestRequestHeaderMapImpl headers_; | ||
}; | ||
|
||
// Already normalized path don't change using deprecated path canonicalizer. | ||
TEST_F(PathUtilityTest, DeprecatedAlreadyNormalPaths) { | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I think you need to use the There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This is added. I actually not sure if we should go with "reloadable" flag or "deprecated_features" semantically. cc. @htuch. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I think it should be |
||
const std::vector<std::string> normal_paths{"/xyz", "/x/y/z"}; | ||
for (const auto& path : normal_paths) { | ||
auto& path_header = pathHeaderEntry(path); | ||
const auto result = PathUtil::canonicalPath(headers_); | ||
EXPECT_TRUE(result) << "original path: " << path; | ||
EXPECT_EQ(path_header.value().getStringView(), absl::string_view(path)); | ||
} | ||
} | ||
|
||
// Invalid paths are rejected using deprecated path canonicalizer. | ||
TEST_F(PathUtilityTest, DeprecatedInvalidPaths) { | ||
const std::vector<std::string> invalid_paths{"/xyz/.%00../abc", "/xyz/%00.%00./abc", | ||
"/xyz/AAAAA%%0000/abc"}; | ||
for (const auto& path : invalid_paths) { | ||
pathHeaderEntry(path); | ||
EXPECT_FALSE(PathUtil::canonicalPath(headers_)) << "original path: " << path; | ||
} | ||
} | ||
|
||
// Already normalized path don't change. | ||
TEST_F(PathUtilityTest, AlreadyNormalPaths) { | ||
TestScopedRuntime scoped_runtime; | ||
Runtime::LoaderSingleton::getExisting()->mergeValues( | ||
{{"envoy.deprecated_features.use_forked_chromium_url", "false"}}); | ||
|
||
const std::vector<std::string> normal_paths{"/xyz", "/x/y/z"}; | ||
for (const auto& path : normal_paths) { | ||
auto& path_header = pathHeaderEntry(path); | ||
|
@@ -39,6 +65,10 @@ TEST_F(PathUtilityTest, AlreadyNormalPaths) { | |
|
||
// Invalid paths are rejected. | ||
TEST_F(PathUtilityTest, InvalidPaths) { | ||
TestScopedRuntime scoped_runtime; | ||
Runtime::LoaderSingleton::getExisting()->mergeValues( | ||
{{"envoy.deprecated_features.use_forked_chromium_url", "false"}}); | ||
|
||
const std::vector<std::string> invalid_paths{"/xyz/.%00../abc", "/xyz/%00.%00./abc", | ||
"/xyz/AAAAA%%0000/abc"}; | ||
for (const auto& path : invalid_paths) { | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Coverage and fuzz_coverage resolves more symbols from
url
lib, hence it is needed to includeurl_canon_host.cc
.There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I feel the shimmed one is safer.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
@yanavlasov @dio thinking about this vs. the shim PR, I think ultimately, if we can upstream Chromium to give us an IDN-free optional build, that would be the ideal, since then we don't need to own a patch permanently. Shimming reduces the size of the patch, but in all cases with a patch we have a permanent patch ownership issue. LGTM modulo comments.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I recall this patch is needed to make fuzzing test happy. I was going to make another pass to see if tests can be fixed to get rid of the patch.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Yes, the last bit of this patch is related to fuzzing test. I agree we should take a deeper look on tests and modify it.