Skip to content

Commit

Permalink
access_log: refactored SubstitutionFormatParser::parseCommand (envoyp…
Browse files Browse the repository at this point in the history
…roxy#16121)

Signed-off-by: Christoph Pakulski <[email protected]>
Signed-off-by: Gokul Nair <[email protected]>
  • Loading branch information
cpakulski authored and Gokul Nair committed May 6, 2021
1 parent d2f6f8f commit 7bbec8c
Show file tree
Hide file tree
Showing 5 changed files with 198 additions and 57 deletions.
69 changes: 28 additions & 41 deletions source/common/formatter/substitution_formatter.cc
Original file line number Diff line number Diff line change
Expand Up @@ -302,45 +302,42 @@ void SubstitutionFormatParser::parseCommandHeader(const std::string& token, cons
std::string& main_header,
std::string& alternative_header,
absl::optional<size_t>& max_length) {
// subs is used only to check if there are more than 2 tokens separated by '?'.
std::vector<std::string> subs;
parseCommand(token, start, "?", main_header, subs, max_length);
if (subs.size() > 1) {
alternative_header = "";
parseCommand(token, start, '?', max_length, main_header, alternative_header, subs);
if (!subs.empty()) {
throw EnvoyException(
// Header format rules support only one alternative header.
// docs/root/configuration/access_log.rst#format-rules
absl::StrCat("More than 1 alternative header specified in token: ", token));
}
if (subs.size() == 1) {
alternative_header = subs.front();
} else {
alternative_header = "";
}

// The main and alternative header should not contain invalid characters {NUL, LR, CF}.
if (std::regex_search(main_header, getNewlinePattern()) ||
std::regex_search(alternative_header, getNewlinePattern())) {
throw EnvoyException("Invalid header configuration. Format string contains newline.");
}
}

void SubstitutionFormatParser::parseCommand(const std::string& token, const size_t start,
const std::string& separator, std::string& main,
std::vector<std::string>& sub_items,
absl::optional<size_t>& max_length) {
// TODO(dnoe): Convert this to use string_view throughout.
const size_t end_request = token.find(')', start);
sub_items.clear();
if (end_request != token.length() - 1) {
void SubstitutionFormatParser::tokenizeCommand(const std::string& command, const size_t start,
const char separator,
std::vector<absl::string_view>& tokens,
absl::optional<size_t>& max_length) {
const size_t end_request = command.find(')', start);
tokens.clear();
if (end_request != command.length() - 1) {
// Closing bracket is not found.
if (end_request == std::string::npos) {
throw EnvoyException(absl::StrCat("Closing bracket is missing in token: ", token));
throw EnvoyException(absl::StrCat("Closing bracket is missing in token: ", command));
}

// Closing bracket should be either last one or followed by ':' to denote limitation.
if (token[end_request + 1] != ':') {
throw EnvoyException(absl::StrCat("Incorrect position of ')' in token: ", token));
if (command[end_request + 1] != ':') {
throw EnvoyException(absl::StrCat("Incorrect position of ')' in token: ", command));
}

const auto length_str = absl::string_view(token).substr(end_request + 2);
const auto length_str = absl::string_view(command).substr(end_request + 2);
uint64_t length_value;

if (!absl::SimpleAtoi(length_str, &length_value)) {
Expand All @@ -350,20 +347,10 @@ void SubstitutionFormatParser::parseCommand(const std::string& token, const size
max_length = length_value;
}

const std::string name_data = token.substr(start, end_request - start);
if (!separator.empty()) {
const std::vector<std::string> keys = absl::StrSplit(name_data, separator);
if (!keys.empty()) {
// The main value is the first key
main = keys.at(0);
if (keys.size() > 1) {
// Sub items contain additional keys
sub_items.insert(sub_items.end(), keys.begin() + 1, keys.end());
}
}
} else {
main = name_data;
}
absl::string_view name_data(command);
name_data.remove_prefix(start);
name_data.remove_suffix(command.length() - end_request);
tokens = absl::StrSplit(name_data, separator);
}

std::vector<FormatterProviderPtr> SubstitutionFormatParser::parse(const std::string& format) {
Expand Down Expand Up @@ -406,30 +393,30 @@ FormatterProviderPtr SubstitutionFormatParser::parseBuiltinCommand(const std::st
std::vector<std::string> path;
const size_t start = DYNAMIC_META_TOKEN.size();

parseCommand(token, start, ":", filter_namespace, path, max_length);
parseCommand(token, start, ':', max_length, filter_namespace, path);
return std::make_unique<DynamicMetadataFormatter>(filter_namespace, path, max_length);
} else if (absl::StartsWith(token, CLUSTER_META_TOKEN)) {
std::string filter_namespace;
absl::optional<size_t> max_length;
std::vector<std::string> path;
const size_t start = CLUSTER_META_TOKEN.size();

parseCommand(token, start, ":", filter_namespace, path, max_length);
parseCommand(token, start, ':', max_length, filter_namespace, path);
return std::make_unique<ClusterMetadataFormatter>(filter_namespace, path, max_length);
} else if (absl::StartsWith(token, FILTER_STATE_TOKEN)) {
std::string key;
std::string key, serialize_type;
absl::optional<size_t> max_length;
std::vector<std::string> path;
std::string path;
const size_t start = FILTER_STATE_TOKEN.size();

parseCommand(token, start, ":", key, path, max_length);
parseCommand(token, start, ':', max_length, key, serialize_type);
if (key.empty()) {
throw EnvoyException("Invalid filter state configuration, key cannot be empty.");
}

const absl::string_view serialize_type =
!path.empty() ? path[path.size() - 1] : TYPED_SERIALIZATION;

if (serialize_type.empty()) {
serialize_type = TYPED_SERIALIZATION;
}
if (serialize_type != PLAIN_SERIALIZATION && serialize_type != TYPED_SERIALIZATION) {
throw EnvoyException("Invalid filter state serialize type, only support PLAIN/TYPED.");
}
Expand Down
62 changes: 47 additions & 15 deletions source/common/formatter/substitution_formatter.h
Original file line number Diff line number Diff line change
Expand Up @@ -38,29 +38,61 @@ class SubstitutionFormatParser {
absl::optional<size_t>& max_length);

/**
* General parse command utility. Will parse token from start position. Token is expected to end
* with ')'. An optional ":max_length" may be specified after the closing ')' char. Token may
* contain multiple values separated by "separator" string. First value will be populated in
* "main" and any additional sub values will be set in the vector "subitems". For example token
* of: "com.test.my_filter:test_object:inner_key):100" with separator of ":" will set the
* following:
* - main: com.test.my_filter
* - subitems: {test_object, inner_key}
* - max_length: 100
* General tokenize utility. Will parse command from start position. Command is expected to end
* with ')'. An optional ":max_length" may be specified after the closing ')' char. Command may
* contain multiple values separated by "separator" character. Those values will be places
* into tokens container. If no separator is found, entire command (up to ')') will be
* placed as only item in the container.
*
* @param token the token to parse
* @param command the command to parse
* @param start the index to start parsing from
* @param separator separator between values
* @param main the first value
* @param sub_items any additional values
* @param tokens values found in command separated by separator
* @param max_length optional max_length will be populated if specified
*
* TODO(glicht) Rewrite with a parser library. See:
* https://github.com/envoyproxy/envoy/issues/2967
*/
static void parseCommand(const std::string& token, const size_t start,
const std::string& separator, std::string& main,
std::vector<std::string>& sub_items, absl::optional<size_t>& max_length);
static void tokenizeCommand(const std::string& command, const size_t start, const char separator,
std::vector<absl::string_view>& tokens,
absl::optional<size_t>& max_length);

/* Variadic function template which invokes tokenizeCommand method to parse the
token command and assigns found tokens to sequence of params.
params must be a sequence of std::string& with optional container storing std::string. Here are
examples of params:
- std::string& token1
- std::string& token1, std::string& token2
- std::string& token1, std::string& token2, std::vector<std::string>& remaining
If command contains more tokens than number of passed params, unassigned tokens will be
ignored. If command contains less tokens than number of passed params, some params will be left
untouched.
*/
template <typename... Tokens>
static void parseCommand(const std::string& command, const size_t start, const char separator,
absl::optional<size_t>& max_length, Tokens&&... params) {
std::vector<absl::string_view> tokens;
tokenizeCommand(command, start, separator, tokens, max_length);
std::vector<absl::string_view>::iterator it = tokens.begin();
(
[&](auto& param) {
if (it != tokens.end()) {
if constexpr (std::is_same_v<typename std::remove_reference<decltype(param)>::type,
std::string>) {
// Compile time handler for std::string.
param = *it;
it++;
} else {
// Compile time handler for container type. It will catch all remaining tokens and
// move iterator to the end.
param.insert(param.begin(), it, tokens.end());
it = tokens.end();
}
}
}(params),
...);
}

/**
* Return a FormatterProviderPtr if a built-in command is parsed from the token. This method
Expand Down
10 changes: 10 additions & 0 deletions test/common/formatter/substitution_formatter_speed_test.cc
Original file line number Diff line number Diff line change
Expand Up @@ -163,4 +163,14 @@ static void BM_TypedJsonAccessLogFormatter(benchmark::State& state) {
}
BENCHMARK(BM_TypedJsonAccessLogFormatter);

// NOLINTNEXTLINE(readability-identifier-naming)
static void BM_FormatterCommandParsing(benchmark::State& state) {
const std::string token = "(Listener:namespace:key):100";
std::string listener, names, key;
absl::optional<size_t> len;
for (auto _ : state) { // NOLINT: Silences warning about dead store
Formatter::SubstitutionFormatParser::parseCommand(token, 1, ':', len, listener, names, key);
}
}
BENCHMARK(BM_FormatterCommandParsing);
} // namespace Envoy
113 changes: 113 additions & 0 deletions test/common/formatter/substitution_formatter_test.cc
Original file line number Diff line number Diff line change
Expand Up @@ -100,6 +100,119 @@ class TestSerializedStringFilterState : public StreamInfo::FilterState::Object {
std::string raw_string_;
};

// Test tests command tokenize utility.
TEST(SubstitutionFormatParser, tokenizer) {
std::vector<absl::string_view> tokens;
absl::optional<size_t> max_length;

std::string command = "COMMAND(item1)";

// The second parameter indicates where command name and opening bracket ends.
// In this case "COMMAND(" ends at index 8 (counting from zero).
SubstitutionFormatParser::tokenizeCommand(command, 8, ':', tokens, max_length);
ASSERT_EQ(tokens.size(), 1);
ASSERT_EQ(tokens.at(0), "item1");
ASSERT_EQ(max_length, absl::nullopt);

command = "COMMAND(item1:item2:item3)";
SubstitutionFormatParser::tokenizeCommand(command, 8, ':', tokens, max_length);
ASSERT_EQ(tokens.size(), 3);
ASSERT_EQ(tokens.at(0), "item1");
ASSERT_EQ(tokens.at(1), "item2");
ASSERT_EQ(tokens.at(2), "item3");
ASSERT_EQ(max_length, absl::nullopt);

command = "COMMAND(item1:item2:item3:item4):234";
SubstitutionFormatParser::tokenizeCommand(command, 8, ':', tokens, max_length);
ASSERT_EQ(tokens.size(), 4);
ASSERT_EQ(tokens.at(0), "item1");
ASSERT_EQ(tokens.at(1), "item2");
ASSERT_EQ(tokens.at(2), "item3");
ASSERT_EQ(tokens.at(3), "item4");
ASSERT_TRUE(max_length.has_value());
ASSERT_EQ(max_length.value(), 234);

// Tokenizing the following commands should fail.
std::vector<std::string> wrong_commands = {
"COMMAND(item1:item2", // Missing closing bracket.
"COMMAND(item1:item2))", // Unexpected second closing bracket.
"COMMAND(item1:item2):", // Missing length field.
"COMMAND(item1:item2):LENGTH", // Length field must be integer.
"COMMAND(item1:item2):100:23", // Length field must be integer.
"COMMAND(item1:item2):100):23"}; // Extra fields after length.

for (const auto& test_command : wrong_commands) {
EXPECT_THROW(
SubstitutionFormatParser::tokenizeCommand(test_command, 8, ':', tokens, max_length),
EnvoyException)
<< test_command;
}
}

// Test tests multiple versions of variadic template method parseCommand
// extracting tokens.
TEST(SubstitutionFormatParser, commandParser) {
std::vector<absl::string_view> tokens;
absl::optional<size_t> max_length;
std::string token1;

std::string command = "COMMAND(item1)";
SubstitutionFormatParser::parseCommand(command, 8, ':', max_length, token1);
ASSERT_EQ(token1, "item1");
ASSERT_EQ(max_length, absl::nullopt);

std::string token2;
command = "COMMAND(item1:item2)";
SubstitutionFormatParser::parseCommand(command, 8, ':', max_length, token1, token2);
ASSERT_EQ(token1, "item1");
ASSERT_EQ(token2, "item2");
ASSERT_EQ(max_length, absl::nullopt);

// Three tokens with optional length.
std::string token3;
command = "COMMAND(item1?item2?item3):345";
SubstitutionFormatParser::parseCommand(command, 8, '?', max_length, token1, token2, token3);
ASSERT_EQ(token1, "item1");
ASSERT_EQ(token2, "item2");
ASSERT_EQ(token3, "item3");
ASSERT_TRUE(max_length.has_value());
ASSERT_EQ(max_length.value(), 345);

// Command string has 4 tokens but 3 are expected.
// The first 3 will be read, the fourth will be ignored.
command = "COMMAND(item1?item2?item3?item4):345";
SubstitutionFormatParser::parseCommand(command, 8, '?', max_length, token1, token2, token3);
ASSERT_EQ(token1, "item1");
ASSERT_EQ(token2, "item2");
ASSERT_EQ(token3, "item3");
ASSERT_TRUE(max_length.has_value());
ASSERT_EQ(max_length.value(), 345);

// Command string has 2 tokens but 3 are expected.
// The third extracted token should be empty.
command = "COMMAND(item1?item2):345";
token3.erase();
SubstitutionFormatParser::parseCommand(command, 8, '?', max_length, token1, token2, token3);
ASSERT_EQ(token1, "item1");
ASSERT_EQ(token2, "item2");
ASSERT_TRUE(token3.empty());
ASSERT_TRUE(max_length.has_value());
ASSERT_EQ(max_length.value(), 345);

// Command string has 4 tokens. Get first 2 into the strings
// and remaining 2 into a vector of strings.
command = "COMMAND(item1?item2?item3?item4):345";
std::vector<std::string> bucket;
SubstitutionFormatParser::parseCommand(command, 8, '?', max_length, token1, token2, bucket);
ASSERT_EQ(token1, "item1");
ASSERT_EQ(token2, "item2");
ASSERT_EQ(bucket.size(), 2);
ASSERT_EQ(bucket.at(0), "item3");
ASSERT_EQ(bucket.at(1), "item4");
ASSERT_TRUE(max_length.has_value());
ASSERT_EQ(max_length.value(), 345);
}

TEST(SubstitutionFormatUtilsTest, protocolToString) {
EXPECT_EQ("HTTP/1.0",
SubstitutionFormatUtils::protocolToString(Http::Protocol::Http10).value().get());
Expand Down
1 change: 0 additions & 1 deletion test/mocks/router/mocks.h
Original file line number Diff line number Diff line change
Expand Up @@ -381,7 +381,6 @@ class MockRouteEntry : public RouteEntry {
MOCK_METHOD(absl::optional<std::chrono::milliseconds>, maxGrpcTimeout, (), (const));
MOCK_METHOD(absl::optional<std::chrono::milliseconds>, grpcTimeoutOffset, (), (const));
MOCK_METHOD(const VirtualCluster*, virtualCluster, (const Http::HeaderMap& headers), (const));
MOCK_METHOD(const std::string&, virtualHostName, (), (const));
MOCK_METHOD(const VirtualHost&, virtualHost, (), (const));
MOCK_METHOD(bool, autoHostRewrite, (), (const));
MOCK_METHOD((const std::multimap<std::string, std::string>&), opaqueConfig, (), (const));
Expand Down

0 comments on commit 7bbec8c

Please sign in to comment.