diff --git a/source/common/formatter/substitution_formatter.cc b/source/common/formatter/substitution_formatter.cc index 9360e6270960..6c1c48c51320 100644 --- a/source/common/formatter/substitution_formatter.cc +++ b/source/common/formatter/substitution_formatter.cc @@ -302,19 +302,17 @@ void SubstitutionFormatParser::parseCommandHeader(const std::string& token, cons std::string& main_header, std::string& alternative_header, absl::optional& max_length) { + // subs is used only to check if there are more than 2 tokens separated by '?'. std::vector subs; - parseCommand(token, start, "?", main_header, subs, max_length); - if (subs.size() > 1) { + alternative_header = ""; + parseCommand(token, start, '?', max_length, main_header, alternative_header, subs); + if (!subs.empty()) { throw EnvoyException( // Header format rules support only one alternative header. // docs/root/configuration/access_log.rst#format-rules absl::StrCat("More than 1 alternative header specified in token: ", token)); } - if (subs.size() == 1) { - alternative_header = subs.front(); - } else { - alternative_header = ""; - } + // The main and alternative header should not contain invalid characters {NUL, LR, CF}. if (std::regex_search(main_header, getNewlinePattern()) || std::regex_search(alternative_header, getNewlinePattern())) { @@ -322,25 +320,24 @@ void SubstitutionFormatParser::parseCommandHeader(const std::string& token, cons } } -void SubstitutionFormatParser::parseCommand(const std::string& token, const size_t start, - const std::string& separator, std::string& main, - std::vector& sub_items, - absl::optional& max_length) { - // TODO(dnoe): Convert this to use string_view throughout. - const size_t end_request = token.find(')', start); - sub_items.clear(); - if (end_request != token.length() - 1) { +void SubstitutionFormatParser::tokenizeCommand(const std::string& command, const size_t start, + const char separator, + std::vector& tokens, + absl::optional& max_length) { + const size_t end_request = command.find(')', start); + tokens.clear(); + if (end_request != command.length() - 1) { // Closing bracket is not found. if (end_request == std::string::npos) { - throw EnvoyException(absl::StrCat("Closing bracket is missing in token: ", token)); + throw EnvoyException(absl::StrCat("Closing bracket is missing in token: ", command)); } // Closing bracket should be either last one or followed by ':' to denote limitation. - if (token[end_request + 1] != ':') { - throw EnvoyException(absl::StrCat("Incorrect position of ')' in token: ", token)); + if (command[end_request + 1] != ':') { + throw EnvoyException(absl::StrCat("Incorrect position of ')' in token: ", command)); } - const auto length_str = absl::string_view(token).substr(end_request + 2); + const auto length_str = absl::string_view(command).substr(end_request + 2); uint64_t length_value; if (!absl::SimpleAtoi(length_str, &length_value)) { @@ -350,20 +347,10 @@ void SubstitutionFormatParser::parseCommand(const std::string& token, const size max_length = length_value; } - const std::string name_data = token.substr(start, end_request - start); - if (!separator.empty()) { - const std::vector keys = absl::StrSplit(name_data, separator); - if (!keys.empty()) { - // The main value is the first key - main = keys.at(0); - if (keys.size() > 1) { - // Sub items contain additional keys - sub_items.insert(sub_items.end(), keys.begin() + 1, keys.end()); - } - } - } else { - main = name_data; - } + absl::string_view name_data(command); + name_data.remove_prefix(start); + name_data.remove_suffix(command.length() - end_request); + tokens = absl::StrSplit(name_data, separator); } std::vector SubstitutionFormatParser::parse(const std::string& format) { @@ -406,7 +393,7 @@ FormatterProviderPtr SubstitutionFormatParser::parseBuiltinCommand(const std::st std::vector path; const size_t start = DYNAMIC_META_TOKEN.size(); - parseCommand(token, start, ":", filter_namespace, path, max_length); + parseCommand(token, start, ':', max_length, filter_namespace, path); return std::make_unique(filter_namespace, path, max_length); } else if (absl::StartsWith(token, CLUSTER_META_TOKEN)) { std::string filter_namespace; @@ -414,22 +401,22 @@ FormatterProviderPtr SubstitutionFormatParser::parseBuiltinCommand(const std::st std::vector path; const size_t start = CLUSTER_META_TOKEN.size(); - parseCommand(token, start, ":", filter_namespace, path, max_length); + parseCommand(token, start, ':', max_length, filter_namespace, path); return std::make_unique(filter_namespace, path, max_length); } else if (absl::StartsWith(token, FILTER_STATE_TOKEN)) { - std::string key; + std::string key, serialize_type; absl::optional max_length; - std::vector path; + std::string path; const size_t start = FILTER_STATE_TOKEN.size(); - parseCommand(token, start, ":", key, path, max_length); + parseCommand(token, start, ':', max_length, key, serialize_type); if (key.empty()) { throw EnvoyException("Invalid filter state configuration, key cannot be empty."); } - const absl::string_view serialize_type = - !path.empty() ? path[path.size() - 1] : TYPED_SERIALIZATION; - + if (serialize_type.empty()) { + serialize_type = TYPED_SERIALIZATION; + } if (serialize_type != PLAIN_SERIALIZATION && serialize_type != TYPED_SERIALIZATION) { throw EnvoyException("Invalid filter state serialize type, only support PLAIN/TYPED."); } diff --git a/source/common/formatter/substitution_formatter.h b/source/common/formatter/substitution_formatter.h index 32d33eab4d3b..2a3150be4f2f 100644 --- a/source/common/formatter/substitution_formatter.h +++ b/source/common/formatter/substitution_formatter.h @@ -38,29 +38,61 @@ class SubstitutionFormatParser { absl::optional& max_length); /** - * General parse command utility. Will parse token from start position. Token is expected to end - * with ')'. An optional ":max_length" may be specified after the closing ')' char. Token may - * contain multiple values separated by "separator" string. First value will be populated in - * "main" and any additional sub values will be set in the vector "subitems". For example token - * of: "com.test.my_filter:test_object:inner_key):100" with separator of ":" will set the - * following: - * - main: com.test.my_filter - * - subitems: {test_object, inner_key} - * - max_length: 100 + * General tokenize utility. Will parse command from start position. Command is expected to end + * with ')'. An optional ":max_length" may be specified after the closing ')' char. Command may + * contain multiple values separated by "separator" character. Those values will be places + * into tokens container. If no separator is found, entire command (up to ')') will be + * placed as only item in the container. * - * @param token the token to parse + * @param command the command to parse * @param start the index to start parsing from * @param separator separator between values - * @param main the first value - * @param sub_items any additional values + * @param tokens values found in command separated by separator * @param max_length optional max_length will be populated if specified * * TODO(glicht) Rewrite with a parser library. See: * https://github.com/envoyproxy/envoy/issues/2967 */ - static void parseCommand(const std::string& token, const size_t start, - const std::string& separator, std::string& main, - std::vector& sub_items, absl::optional& max_length); + static void tokenizeCommand(const std::string& command, const size_t start, const char separator, + std::vector& tokens, + absl::optional& max_length); + + /* Variadic function template which invokes tokenizeCommand method to parse the + token command and assigns found tokens to sequence of params. + params must be a sequence of std::string& with optional container storing std::string. Here are + examples of params: + - std::string& token1 + - std::string& token1, std::string& token2 + - std::string& token1, std::string& token2, std::vector& remaining + + If command contains more tokens than number of passed params, unassigned tokens will be + ignored. If command contains less tokens than number of passed params, some params will be left + untouched. + */ + template + static void parseCommand(const std::string& command, const size_t start, const char separator, + absl::optional& max_length, Tokens&&... params) { + std::vector tokens; + tokenizeCommand(command, start, separator, tokens, max_length); + std::vector::iterator it = tokens.begin(); + ( + [&](auto& param) { + if (it != tokens.end()) { + if constexpr (std::is_same_v::type, + std::string>) { + // Compile time handler for std::string. + param = *it; + it++; + } else { + // Compile time handler for container type. It will catch all remaining tokens and + // move iterator to the end. + param.insert(param.begin(), it, tokens.end()); + it = tokens.end(); + } + } + }(params), + ...); + } /** * Return a FormatterProviderPtr if a built-in command is parsed from the token. This method diff --git a/test/common/formatter/substitution_formatter_speed_test.cc b/test/common/formatter/substitution_formatter_speed_test.cc index 5da6fead0f89..8fbb0573e8c5 100644 --- a/test/common/formatter/substitution_formatter_speed_test.cc +++ b/test/common/formatter/substitution_formatter_speed_test.cc @@ -163,4 +163,14 @@ static void BM_TypedJsonAccessLogFormatter(benchmark::State& state) { } BENCHMARK(BM_TypedJsonAccessLogFormatter); +// NOLINTNEXTLINE(readability-identifier-naming) +static void BM_FormatterCommandParsing(benchmark::State& state) { + const std::string token = "(Listener:namespace:key):100"; + std::string listener, names, key; + absl::optional len; + for (auto _ : state) { // NOLINT: Silences warning about dead store + Formatter::SubstitutionFormatParser::parseCommand(token, 1, ':', len, listener, names, key); + } +} +BENCHMARK(BM_FormatterCommandParsing); } // namespace Envoy diff --git a/test/common/formatter/substitution_formatter_test.cc b/test/common/formatter/substitution_formatter_test.cc index e1c77c75dc18..df374eaef4a8 100644 --- a/test/common/formatter/substitution_formatter_test.cc +++ b/test/common/formatter/substitution_formatter_test.cc @@ -100,6 +100,119 @@ class TestSerializedStringFilterState : public StreamInfo::FilterState::Object { std::string raw_string_; }; +// Test tests command tokenize utility. +TEST(SubstitutionFormatParser, tokenizer) { + std::vector tokens; + absl::optional max_length; + + std::string command = "COMMAND(item1)"; + + // The second parameter indicates where command name and opening bracket ends. + // In this case "COMMAND(" ends at index 8 (counting from zero). + SubstitutionFormatParser::tokenizeCommand(command, 8, ':', tokens, max_length); + ASSERT_EQ(tokens.size(), 1); + ASSERT_EQ(tokens.at(0), "item1"); + ASSERT_EQ(max_length, absl::nullopt); + + command = "COMMAND(item1:item2:item3)"; + SubstitutionFormatParser::tokenizeCommand(command, 8, ':', tokens, max_length); + ASSERT_EQ(tokens.size(), 3); + ASSERT_EQ(tokens.at(0), "item1"); + ASSERT_EQ(tokens.at(1), "item2"); + ASSERT_EQ(tokens.at(2), "item3"); + ASSERT_EQ(max_length, absl::nullopt); + + command = "COMMAND(item1:item2:item3:item4):234"; + SubstitutionFormatParser::tokenizeCommand(command, 8, ':', tokens, max_length); + ASSERT_EQ(tokens.size(), 4); + ASSERT_EQ(tokens.at(0), "item1"); + ASSERT_EQ(tokens.at(1), "item2"); + ASSERT_EQ(tokens.at(2), "item3"); + ASSERT_EQ(tokens.at(3), "item4"); + ASSERT_TRUE(max_length.has_value()); + ASSERT_EQ(max_length.value(), 234); + + // Tokenizing the following commands should fail. + std::vector wrong_commands = { + "COMMAND(item1:item2", // Missing closing bracket. + "COMMAND(item1:item2))", // Unexpected second closing bracket. + "COMMAND(item1:item2):", // Missing length field. + "COMMAND(item1:item2):LENGTH", // Length field must be integer. + "COMMAND(item1:item2):100:23", // Length field must be integer. + "COMMAND(item1:item2):100):23"}; // Extra fields after length. + + for (const auto& test_command : wrong_commands) { + EXPECT_THROW( + SubstitutionFormatParser::tokenizeCommand(test_command, 8, ':', tokens, max_length), + EnvoyException) + << test_command; + } +} + +// Test tests multiple versions of variadic template method parseCommand +// extracting tokens. +TEST(SubstitutionFormatParser, commandParser) { + std::vector tokens; + absl::optional max_length; + std::string token1; + + std::string command = "COMMAND(item1)"; + SubstitutionFormatParser::parseCommand(command, 8, ':', max_length, token1); + ASSERT_EQ(token1, "item1"); + ASSERT_EQ(max_length, absl::nullopt); + + std::string token2; + command = "COMMAND(item1:item2)"; + SubstitutionFormatParser::parseCommand(command, 8, ':', max_length, token1, token2); + ASSERT_EQ(token1, "item1"); + ASSERT_EQ(token2, "item2"); + ASSERT_EQ(max_length, absl::nullopt); + + // Three tokens with optional length. + std::string token3; + command = "COMMAND(item1?item2?item3):345"; + SubstitutionFormatParser::parseCommand(command, 8, '?', max_length, token1, token2, token3); + ASSERT_EQ(token1, "item1"); + ASSERT_EQ(token2, "item2"); + ASSERT_EQ(token3, "item3"); + ASSERT_TRUE(max_length.has_value()); + ASSERT_EQ(max_length.value(), 345); + + // Command string has 4 tokens but 3 are expected. + // The first 3 will be read, the fourth will be ignored. + command = "COMMAND(item1?item2?item3?item4):345"; + SubstitutionFormatParser::parseCommand(command, 8, '?', max_length, token1, token2, token3); + ASSERT_EQ(token1, "item1"); + ASSERT_EQ(token2, "item2"); + ASSERT_EQ(token3, "item3"); + ASSERT_TRUE(max_length.has_value()); + ASSERT_EQ(max_length.value(), 345); + + // Command string has 2 tokens but 3 are expected. + // The third extracted token should be empty. + command = "COMMAND(item1?item2):345"; + token3.erase(); + SubstitutionFormatParser::parseCommand(command, 8, '?', max_length, token1, token2, token3); + ASSERT_EQ(token1, "item1"); + ASSERT_EQ(token2, "item2"); + ASSERT_TRUE(token3.empty()); + ASSERT_TRUE(max_length.has_value()); + ASSERT_EQ(max_length.value(), 345); + + // Command string has 4 tokens. Get first 2 into the strings + // and remaining 2 into a vector of strings. + command = "COMMAND(item1?item2?item3?item4):345"; + std::vector bucket; + SubstitutionFormatParser::parseCommand(command, 8, '?', max_length, token1, token2, bucket); + ASSERT_EQ(token1, "item1"); + ASSERT_EQ(token2, "item2"); + ASSERT_EQ(bucket.size(), 2); + ASSERT_EQ(bucket.at(0), "item3"); + ASSERT_EQ(bucket.at(1), "item4"); + ASSERT_TRUE(max_length.has_value()); + ASSERT_EQ(max_length.value(), 345); +} + TEST(SubstitutionFormatUtilsTest, protocolToString) { EXPECT_EQ("HTTP/1.0", SubstitutionFormatUtils::protocolToString(Http::Protocol::Http10).value().get()); diff --git a/test/mocks/router/mocks.h b/test/mocks/router/mocks.h index 733217eb9110..13f56a239ef5 100644 --- a/test/mocks/router/mocks.h +++ b/test/mocks/router/mocks.h @@ -381,7 +381,6 @@ class MockRouteEntry : public RouteEntry { MOCK_METHOD(absl::optional, maxGrpcTimeout, (), (const)); MOCK_METHOD(absl::optional, grpcTimeoutOffset, (), (const)); MOCK_METHOD(const VirtualCluster*, virtualCluster, (const Http::HeaderMap& headers), (const)); - MOCK_METHOD(const std::string&, virtualHostName, (), (const)); MOCK_METHOD(const VirtualHost&, virtualHost, (), (const)); MOCK_METHOD(bool, autoHostRewrite, (), (const)); MOCK_METHOD((const std::multimap&), opaqueConfig, (), (const));