From de8c5059baf575aaafdcc42572f7b019824987ba Mon Sep 17 00:00:00 2001 From: Alex Konradi Date: Mon, 1 Feb 2021 13:46:58 -0500 Subject: [PATCH 1/4] config: fix crash when xDS endpoints receive unexpected typed config (#14709) When a listener config contains an Any proto with an unknown proto type, the config is rejected. If debug logging is turned on, though, Envoy first attempts to print the JSON representation of the config. This caused a RELEASE_ASSERT to trigger, crashing Envoy. This PR renames the unsafe proto-to-JSON conversion function and replaces its usage with the safe version in the listener manager to prevent the crash. Signed-off-by: Alex Konradi --- docs/root/version_history/current.rst | 1 + source/common/config/xds_context_params.cc | 2 +- .../formatter/substitution_formatter.cc | 7 +-- source/common/protobuf/protobuf.h | 1 + source/common/protobuf/utility.cc | 32 +++++++++---- source/common/protobuf/utility.h | 43 ++++++++++++++++-- source/common/tracing/http_tracer_impl.cc | 4 +- .../upstream/health_checker_base_impl.cc | 5 ++- .../common/upstream/outlier_detection_impl.cc | 10 +++-- source/common/upstream/subset_lb.cc | 3 +- source/extensions/common/tap/admin.cc | 2 +- .../extensions/common/tap/tap_config_base.cc | 2 +- .../http/aws_lambda/aws_lambda_filter.cc | 2 +- .../filters/http/squash/squash_filter.cc | 2 +- .../filters/network/dubbo_proxy/config.cc | 2 +- .../network/http_connection_manager/config.cc | 2 +- .../network/rocketmq_proxy/active_message.cc | 2 +- .../filters/network/rocketmq_proxy/codec.cc | 2 +- .../network/rocketmq_proxy/conn_manager.cc | 2 +- .../filters/network/thrift_proxy/config.cc | 2 +- .../extensions/tracers/dynamic_ot/config.cc | 3 +- .../extensions/tracers/xray/daemon_broker.cc | 4 +- source/extensions/tracers/xray/tracer.cc | 2 +- .../extensions/tracers/zipkin/span_buffer.cc | 2 +- .../tracers/zipkin/zipkin_core_types.h | 4 +- source/server/admin/clusters_handler.cc | 2 +- source/server/admin/config_dump_handler.cc | 2 +- source/server/admin/init_dump_handler.cc | 2 +- source/server/admin/listeners_handler.cc | 2 +- source/server/admin/runtime_handler.cc | 2 +- source/server/admin/server_info_handler.cc | 6 +-- source/server/admin/stats_handler.cc | 4 +- source/server/listener_manager_impl.cc | 16 +++---- .../filesystem_subscription_impl_test.cc | 10 +++-- test/common/json/json_fuzz_test.cc | 2 +- test/common/protobuf/utility_test.cc | 43 +++++++++++++++++- test/config/utility.cc | 2 +- test/config_test/deprecated_configs_test.cc | 6 +-- test/extensions/common/tap/common.cc | 2 +- .../ext_authz/ext_authz_integration_test.cc | 6 +-- .../ext_proc/ext_proc_integration_test.cc | 2 +- .../grpc_json_transcoder_integration_test.cc | 2 +- .../http/jwt_authn/filter_integration_test.cc | 2 +- .../http/oauth2/oauth_integration_test.cc | 2 +- .../ratelimit/ratelimit_integration_test.cc | 2 +- .../http/tap/tap_filter_integration_test.cc | 4 +- .../rocketmq_proxy/topic_route_test.cc | 2 +- .../tracers/zipkin/span_buffer_test.cc | 6 +-- test/integration/base_integration_test.cc | 4 +- .../listener_lds_integration_test.cc | 45 ++++++++++++++++--- test/integration/xds_integration_test.cc | 28 ++++++------ test/tools/router_check/router.cc | 2 +- 52 files changed, 241 insertions(+), 110 deletions(-) diff --git a/docs/root/version_history/current.rst b/docs/root/version_history/current.rst index 581fa3994eff..c6bcd134281a 100644 --- a/docs/root/version_history/current.rst +++ b/docs/root/version_history/current.rst @@ -36,6 +36,7 @@ Bug Fixes * grpc-web: fix local reply and non-proto-encoded gRPC response handling for small response bodies. This fix can be temporarily reverted by setting `envoy.reloadable_features.grpc_web_fix_non_proto_encoded_response_handling` to false. * http: disallowing "host:" in request_headers_to_add for behavioral consistency with rejecting :authority header. This behavior can be temporarily reverted by setting `envoy.reloadable_features.treat_host_like_authority` to false. * http: reverting a behavioral change where upstream connect timeouts were temporarily treated differently from other connection failures. The change back to the original behavior can be temporarily reverted by setting `envoy.reloadable_features.treat_upstream_connect_timeout_as_connect_failure` to false. +* listener: prevent crashing when an unknown listener config proto is received and debug logging is enabled. * upstream: fix handling of moving endpoints between priorities when active health checks are enabled. Previously moving to a higher numbered priority was a NOOP, and moving to a lower numbered priority caused an abort. Removed Config or Runtime diff --git a/source/common/config/xds_context_params.cc b/source/common/config/xds_context_params.cc index e4241f9777ce..d618364dcce5 100644 --- a/source/common/config/xds_context_params.cc +++ b/source/common/config/xds_context_params.cc @@ -42,7 +42,7 @@ const NodeContextRenderers& nodeParamCbs() { void mergeMetadataJson(Protobuf::Map& params, const ProtobufWkt::Struct& metadata, const std::string& prefix) { for (const auto& it : metadata.fields()) { - params[prefix + it.first] = MessageUtil::getJsonStringFromMessage(it.second); + params[prefix + it.first] = MessageUtil::getJsonStringFromMessageOrDie(it.second); } } diff --git a/source/common/formatter/substitution_formatter.cc b/source/common/formatter/substitution_formatter.cc index 2f4737bf3fb4..4de9149e9bf8 100644 --- a/source/common/formatter/substitution_formatter.cc +++ b/source/common/formatter/substitution_formatter.cc @@ -138,10 +138,11 @@ std::string JsonFormatterImpl::format(const Http::RequestHeaderMap& request_head const Http::ResponseTrailerMap& response_trailers, const StreamInfo::StreamInfo& stream_info, absl::string_view local_reply_body) const { - const auto output_struct = struct_formatter_.format( + const ProtobufWkt::Struct output_struct = struct_formatter_.format( request_headers, response_headers, response_trailers, stream_info, local_reply_body); - const std::string log_line = MessageUtil::getJsonStringFromMessage(output_struct, false, true); + const std::string log_line = + MessageUtil::getJsonStringFromMessageOrDie(output_struct, false, true); return absl::StrCat(log_line, "\n"); } @@ -1116,7 +1117,7 @@ MetadataFormatter::formatMetadata(const envoy::config::core::v3::Metadata& metad return absl::nullopt; } - std::string json = MessageUtil::getJsonStringFromMessage(value, false, true); + std::string json = MessageUtil::getJsonStringFromMessageOrDie(value, false, true); truncate(json, max_length_); return json; } diff --git a/source/common/protobuf/protobuf.h b/source/common/protobuf/protobuf.h index b92492de07fb..d07afcdb45ac 100644 --- a/source/common/protobuf/protobuf.h +++ b/source/common/protobuf/protobuf.h @@ -21,6 +21,7 @@ #include "google/protobuf/service.h" #include "google/protobuf/struct.pb.h" #include "google/protobuf/stubs/status.h" +#include "google/protobuf/stubs/statusor.h" #include "google/protobuf/text_format.h" #include "google/protobuf/util/field_mask_util.h" #include "google/protobuf/util/json_util.h" diff --git a/source/common/protobuf/utility.cc b/source/common/protobuf/utility.cc index abea1a600c80..eee9f4701a8e 100644 --- a/source/common/protobuf/utility.cc +++ b/source/common/protobuf/utility.cc @@ -589,10 +589,14 @@ void MessageUtil::checkForUnexpectedFields(const Protobuf::Message& message, std::string MessageUtil::getYamlStringFromMessage(const Protobuf::Message& message, const bool block_print, const bool always_print_primitive_fields) { - std::string json = getJsonStringFromMessage(message, false, always_print_primitive_fields); + + auto json_or_error = getJsonStringFromMessage(message, false, always_print_primitive_fields); + if (!json_or_error.ok()) { + throw EnvoyException(json_or_error.status().ToString()); + } YAML::Node node; try { - node = YAML::Load(json); + node = YAML::Load(json_or_error.value()); } catch (YAML::ParserException& e) { throw EnvoyException(e.what()); } catch (YAML::BadConversion& e) { @@ -612,9 +616,9 @@ std::string MessageUtil::getYamlStringFromMessage(const Protobuf::Message& messa return out.c_str(); } -std::string MessageUtil::getJsonStringFromMessage(const Protobuf::Message& message, - const bool pretty_print, - const bool always_print_primitive_fields) { +ProtobufUtil::StatusOr +MessageUtil::getJsonStringFromMessage(const Protobuf::Message& message, const bool pretty_print, + const bool always_print_primitive_fields) { Protobuf::util::JsonPrintOptions json_options; // By default, proto field names are converted to camelCase when the message is converted to JSON. // Setting this option makes debugging easier because it keeps field names consistent in JSON @@ -629,12 +633,23 @@ std::string MessageUtil::getJsonStringFromMessage(const Protobuf::Message& messa json_options.always_print_primitive_fields = true; } std::string json; - const auto status = Protobuf::util::MessageToJsonString(message, &json, json_options); - // This should always succeed unless something crash-worthy such as out-of-memory. - RELEASE_ASSERT(status.ok(), ""); + if (auto status = Protobuf::util::MessageToJsonString(message, &json, json_options); + !status.ok()) { + return status; + } return json; } +std::string MessageUtil::getJsonStringFromMessageOrError(const Protobuf::Message& message, + bool pretty_print, + bool always_print_primitive_fields) { + auto json_or_error = + getJsonStringFromMessage(message, pretty_print, always_print_primitive_fields); + return json_or_error.ok() ? std::move(json_or_error).value() + : fmt::format("Failed to convert protobuf message to JSON string: {}", + json_or_error.status().ToString()); +} + void MessageUtil::unpackTo(const ProtobufWkt::Any& any_message, Protobuf::Message& message) { // If we don't have a type URL match, try an earlier version. const absl::string_view any_full_name = @@ -1004,5 +1019,4 @@ void TimestampUtil::systemClockToTimestamp(const SystemTime system_clock_time, .time_since_epoch() .count())); } - } // namespace Envoy diff --git a/source/common/protobuf/utility.h b/source/common/protobuf/utility.h index e6480c2aee2f..d2871f3bafb0 100644 --- a/source/common/protobuf/utility.h +++ b/source/common/protobuf/utility.h @@ -422,16 +422,51 @@ class MessageUtil { const bool always_print_primitive_fields = false); /** - * Extract JSON as string from a google.protobuf.Message. + * Extract JSON as string from a google.protobuf.Message. Returns an error if the message cannot + * be represented as JSON, which can occur if it contains an Any proto with an unrecognized type + * URL or invalid data, or if memory cannot be allocated. + * @param message message of type type.googleapis.com/google.protobuf.Message. + * @param pretty_print whether the returned JSON should be formatted. + * @param always_print_primitive_fields whether to include primitive fields set to their default + * values, e.g. an int32 set to 0 or a bool set to false. + * @return ProtobufUtil::StatusOr of formatted JSON object, or an error status if + * conversion fails. + */ + static ProtobufUtil::StatusOr + getJsonStringFromMessage(const Protobuf::Message& message, bool pretty_print = false, + bool always_print_primitive_fields = false); + + /** + * Extract JSON as string from a google.protobuf.Message, crashing if the conversion to JSON + * fails. This method is safe so long as the message does not contain an Any proto with an + * unrecognized type or invalid data. * @param message message of type type.googleapis.com/google.protobuf.Message. * @param pretty_print whether the returned JSON should be formatted. * @param always_print_primitive_fields whether to include primitive fields set to their default * values, e.g. an int32 set to 0 or a bool set to false. * @return std::string of formatted JSON object. */ - static std::string getJsonStringFromMessage(const Protobuf::Message& message, - bool pretty_print = false, - bool always_print_primitive_fields = false); + static std::string getJsonStringFromMessageOrDie(const Protobuf::Message& message, + bool pretty_print = false, + bool always_print_primitive_fields = false) { + auto json_or_error = + getJsonStringFromMessage(message, pretty_print, always_print_primitive_fields); + RELEASE_ASSERT(json_or_error.ok(), json_or_error.status().ToString()); + return std::move(json_or_error).value(); + } + + /** + * Extract JSON as string from a google.protobuf.Message, returning some error string if the + * conversion to JSON fails. + * @param message message of type type.googleapis.com/google.protobuf.Message. + * @param pretty_print whether the returned JSON should be formatted. + * @param always_print_primitive_fields whether to include primitive fields set to their default + * values, e.g. an int32 set to 0 or a bool set to false. + * @return std::string of formatted JSON object, or an error message if conversion fails. + */ + static std::string getJsonStringFromMessageOrError(const Protobuf::Message& message, + bool pretty_print = false, + bool always_print_primitive_fields = false); /** * Utility method to create a Struct containing the passed in key/value strings. diff --git a/source/common/tracing/http_tracer_impl.cc b/source/common/tracing/http_tracer_impl.cc index 4b8e5e595fc9..8d2127a33544 100644 --- a/source/common/tracing/http_tracer_impl.cc +++ b/source/common/tracing/http_tracer_impl.cc @@ -352,10 +352,10 @@ void MetadataCustomTag::apply(Span& span, const CustomTagContext& ctx) const { span.setTag(tag(), value.string_value()); return; case ProtobufWkt::Value::kListValue: - span.setTag(tag(), MessageUtil::getJsonStringFromMessage(value.list_value())); + span.setTag(tag(), MessageUtil::getJsonStringFromMessageOrDie(value.list_value())); return; case ProtobufWkt::Value::kStructValue: - span.setTag(tag(), MessageUtil::getJsonStringFromMessage(value.struct_value())); + span.setTag(tag(), MessageUtil::getJsonStringFromMessageOrDie(value.struct_value())); return; default: break; diff --git a/source/common/upstream/health_checker_base_impl.cc b/source/common/upstream/health_checker_base_impl.cc index f6357559eec8..eae8e808244c 100644 --- a/source/common/upstream/health_checker_base_impl.cc +++ b/source/common/upstream/health_checker_base_impl.cc @@ -450,8 +450,9 @@ void HealthCheckEventLoggerImpl::createHealthCheckEvent( callback(event); // Make sure the type enums make it into the JSON - const auto json = MessageUtil::getJsonStringFromMessage(event, /* pretty_print */ false, - /* always_print_primitive_fields */ true); + const auto json = + MessageUtil::getJsonStringFromMessageOrError(event, /* pretty_print */ false, + /* always_print_primitive_fields */ true); file_->write(fmt::format("{}\n", json)); } } // namespace Upstream diff --git a/source/common/upstream/outlier_detection_impl.cc b/source/common/upstream/outlier_detection_impl.cc index cd9deefede3c..6cf0c3aade41 100644 --- a/source/common/upstream/outlier_detection_impl.cc +++ b/source/common/upstream/outlier_detection_impl.cc @@ -764,8 +764,9 @@ void EventLoggerImpl::logEject(const HostDescriptionConstSharedPtr& host, Detect event.mutable_eject_consecutive_event(); } - const auto json = MessageUtil::getJsonStringFromMessage(event, /* pretty_print */ false, - /* always_print_primitive_fields */ true); + const auto json = + MessageUtil::getJsonStringFromMessageOrError(event, /* pretty_print */ false, + /* always_print_primitive_fields */ true); file_->write(fmt::format("{}\n", json)); } @@ -777,8 +778,9 @@ void EventLoggerImpl::logUneject(const HostDescriptionConstSharedPtr& host) { event.set_action(envoy::data::cluster::v2alpha::UNEJECT); - const auto json = MessageUtil::getJsonStringFromMessage(event, /* pretty_print */ false, - /* always_print_primitive_fields */ true); + const auto json = + MessageUtil::getJsonStringFromMessageOrError(event, /* pretty_print */ false, + /* always_print_primitive_fields */ true); file_->write(fmt::format("{}\n", json)); } diff --git a/source/common/upstream/subset_lb.cc b/source/common/upstream/subset_lb.cc index 309a4ea70922..2ffd34bb79c1 100644 --- a/source/common/upstream/subset_lb.cc +++ b/source/common/upstream/subset_lb.cc @@ -623,7 +623,8 @@ std::string SubsetLoadBalancer::describeMetadata(const SubsetLoadBalancer::Subse first = false; } - buf << it.first << "=" << MessageUtil::getJsonStringFromMessage(it.second); + const ProtobufWkt::Value& value = it.second; + buf << it.first << "=" << MessageUtil::getJsonStringFromMessageOrDie(value); } return buf.str(); diff --git a/source/extensions/common/tap/admin.cc b/source/extensions/common/tap/admin.cc index dbc0d3f50207..ee52d24b0c17 100644 --- a/source/extensions/common/tap/admin.cc +++ b/source/extensions/common/tap/admin.cc @@ -126,7 +126,7 @@ void AdminHandler::AdminPerTapSinkHandle::submitTrace( switch (format) { case envoy::config::tap::v3::OutputSink::JSON_BODY_AS_STRING: case envoy::config::tap::v3::OutputSink::JSON_BODY_AS_BYTES: - output_string = MessageUtil::getJsonStringFromMessage(*trace, true, true); + output_string = MessageUtil::getJsonStringFromMessageOrError(*trace, true, true); break; default: NOT_REACHED_GCOVR_EXCL_LINE; diff --git a/source/extensions/common/tap/tap_config_base.cc b/source/extensions/common/tap/tap_config_base.cc index bee5f14b862a..5e7d77a1a15a 100644 --- a/source/extensions/common/tap/tap_config_base.cc +++ b/source/extensions/common/tap/tap_config_base.cc @@ -209,7 +209,7 @@ void FilePerTapSink::FilePerTapSinkHandle::submitTrace( break; case envoy::config::tap::v3::OutputSink::JSON_BODY_AS_BYTES: case envoy::config::tap::v3::OutputSink::JSON_BODY_AS_STRING: - output_file_ << MessageUtil::getJsonStringFromMessage(*trace, true, true); + output_file_ << MessageUtil::getJsonStringFromMessageOrError(*trace, true, true); break; default: NOT_REACHED_GCOVR_EXCL_LINE; diff --git a/source/extensions/filters/http/aws_lambda/aws_lambda_filter.cc b/source/extensions/filters/http/aws_lambda/aws_lambda_filter.cc index 8822bbf3a53f..b4a43217a978 100644 --- a/source/extensions/filters/http/aws_lambda/aws_lambda_filter.cc +++ b/source/extensions/filters/http/aws_lambda/aws_lambda_filter.cc @@ -310,7 +310,7 @@ void Filter::jsonizeRequest(Http::RequestHeaderMap const& headers, const Buffer: } MessageUtil::validate(json_req, ProtobufMessage::getStrictValidationVisitor()); - const std::string json_data = MessageUtil::getJsonStringFromMessage( + const std::string json_data = MessageUtil::getJsonStringFromMessageOrError( json_req, false /* pretty_print */, true /* always_print_primitive_fields */); out.add(json_data); } diff --git a/source/extensions/filters/http/squash/squash_filter.cc b/source/extensions/filters/http/squash/squash_filter.cc index bca43dceb5a3..4efc21a79401 100644 --- a/source/extensions/filters/http/squash/squash_filter.cc +++ b/source/extensions/filters/http/squash/squash_filter.cc @@ -50,7 +50,7 @@ SquashFilterConfig::SquashFilterConfig( std::string SquashFilterConfig::getAttachment(const ProtobufWkt::Struct& attachment_template) { ProtobufWkt::Struct attachment_json(attachment_template); updateTemplateInStruct(attachment_json); - return MessageUtil::getJsonStringFromMessage(attachment_json); + return MessageUtil::getJsonStringFromMessageOrDie(attachment_json); } void SquashFilterConfig::updateTemplateInStruct(ProtobufWkt::Struct& attachment_template) { diff --git a/source/extensions/filters/network/dubbo_proxy/config.cc b/source/extensions/filters/network/dubbo_proxy/config.cc index 76260b0eeeb7..0a6f49492152 100644 --- a/source/extensions/filters/network/dubbo_proxy/config.cc +++ b/source/extensions/filters/network/dubbo_proxy/config.cc @@ -139,7 +139,7 @@ void ConfigImpl::registerFilter(const DubboFilterConfig& proto_config) { ENVOY_LOG(debug, " dubbo filter #{}", filter_factories_.size()); ENVOY_LOG(debug, " name: {}", string_name); ENVOY_LOG(debug, " config: {}", - MessageUtil::getJsonStringFromMessage(proto_config.config(), true)); + MessageUtil::getJsonStringFromMessageOrError(proto_config.config(), true)); auto& factory = Envoy::Config::Utility::getAndCheckFactoryByName( diff --git a/source/extensions/filters/network/http_connection_manager/config.cc b/source/extensions/filters/network/http_connection_manager/config.cc index 0429e1556145..0721a0cadc1f 100644 --- a/source/extensions/filters/network/http_connection_manager/config.cc +++ b/source/extensions/filters/network/http_connection_manager/config.cc @@ -516,7 +516,7 @@ void HttpConnectionManagerConfig::processFilter( callback, proto_config.name()); ENVOY_LOG(debug, " name: {}", filter_config_provider->name()); ENVOY_LOG(debug, " config: {}", - MessageUtil::getJsonStringFromMessage( + MessageUtil::getJsonStringFromMessageOrError( proto_config.has_typed_config() ? static_cast(proto_config.typed_config()) : static_cast( diff --git a/source/extensions/filters/network/rocketmq_proxy/active_message.cc b/source/extensions/filters/network/rocketmq_proxy/active_message.cc index efe82226eb4a..2a263fe752bb 100644 --- a/source/extensions/filters/network/rocketmq_proxy/active_message.cc +++ b/source/extensions/filters/network/rocketmq_proxy/active_message.cc @@ -209,7 +209,7 @@ void ActiveMessage::onQueryTopicRoute() { TopicRouteData topic_route_data(std::move(queue_data_list), std::move(broker_data_list)); ProtobufWkt::Struct data_struct; topic_route_data.encode(data_struct); - std::string json = MessageUtil::getJsonStringFromMessage(data_struct); + std::string json = MessageUtil::getJsonStringFromMessageOrDie(data_struct); ENVOY_LOG(trace, "Serialize TopicRouteData for {} OK:\n{}", cluster_name, json); RemotingCommandPtr response = std::make_unique( static_cast(ResponseCode::Success), downstreamRequest()->version(), diff --git a/source/extensions/filters/network/rocketmq_proxy/codec.cc b/source/extensions/filters/network/rocketmq_proxy/codec.cc index dd00abdfa330..99201e04f5c9 100644 --- a/source/extensions/filters/network/rocketmq_proxy/codec.cc +++ b/source/extensions/filters/network/rocketmq_proxy/codec.cc @@ -391,7 +391,7 @@ void Encoder::encode(const RemotingCommandPtr& command, Buffer::Instance& data) (*fields)["extFields"] = ext_fields_v; } - std::string json = MessageUtil::getJsonStringFromMessage(command_struct); + std::string json = MessageUtil::getJsonStringFromMessageOrDie(command_struct); int32_t frame_length = 4; int32_t header_length = json.size(); diff --git a/source/extensions/filters/network/rocketmq_proxy/conn_manager.cc b/source/extensions/filters/network/rocketmq_proxy/conn_manager.cc index 0748f80476ff..f25b57eac096 100644 --- a/source/extensions/filters/network/rocketmq_proxy/conn_manager.cc +++ b/source/extensions/filters/network/rocketmq_proxy/conn_manager.cc @@ -301,7 +301,7 @@ void ConnectionManager::onGetConsumerListByGroup(RemotingCommandPtr request) { RemotingCommandPtr response = std::make_unique( enumToSignedInt(ResponseCode::Success), request->version(), request->opaque()); response->markAsResponse(); - std::string json = MessageUtil::getJsonStringFromMessage(body_struct); + std::string json = MessageUtil::getJsonStringFromMessageOrDie(body_struct); response->body().add(json); ENVOY_LOG(trace, "GetConsumerListByGroup respond with body: {}", json); diff --git a/source/extensions/filters/network/thrift_proxy/config.cc b/source/extensions/filters/network/thrift_proxy/config.cc index 52b5030bc57f..8c7a967597c4 100644 --- a/source/extensions/filters/network/thrift_proxy/config.cc +++ b/source/extensions/filters/network/thrift_proxy/config.cc @@ -158,7 +158,7 @@ void ConfigImpl::processFilter( ENVOY_LOG(debug, " thrift filter #{}", filter_factories_.size()); ENVOY_LOG(debug, " name: {}", string_name); ENVOY_LOG(debug, " config: {}", - MessageUtil::getJsonStringFromMessage( + MessageUtil::getJsonStringFromMessageOrError( proto_config.has_typed_config() ? static_cast(proto_config.typed_config()) : static_cast( diff --git a/source/extensions/tracers/dynamic_ot/config.cc b/source/extensions/tracers/dynamic_ot/config.cc index f8ddf4ceeeb5..6e26631bb476 100644 --- a/source/extensions/tracers/dynamic_ot/config.cc +++ b/source/extensions/tracers/dynamic_ot/config.cc @@ -21,7 +21,8 @@ Tracing::HttpTracerSharedPtr DynamicOpenTracingTracerFactory::createHttpTracerTy const envoy::config::trace::v3::DynamicOtConfig& proto_config, Server::Configuration::TracerFactoryContext& context) { const std::string& library = proto_config.library(); - const std::string config = MessageUtil::getJsonStringFromMessage(proto_config.config()); + const ProtobufWkt::Struct& config_struct = proto_config.config(); + const std::string config = MessageUtil::getJsonStringFromMessageOrDie(config_struct); Tracing::DriverPtr dynamic_driver = std::make_unique( context.serverFactoryContext().scope(), library, config); return std::make_shared(std::move(dynamic_driver), diff --git a/source/extensions/tracers/xray/daemon_broker.cc b/source/extensions/tracers/xray/daemon_broker.cc index 82b8a88310c8..7af0945220fe 100644 --- a/source/extensions/tracers/xray/daemon_broker.cc +++ b/source/extensions/tracers/xray/daemon_broker.cc @@ -22,8 +22,8 @@ std::string createHeader(const std::string& format, uint32_t version) { source::extensions::tracers::xray::daemon::Header header; header.set_format(format); header.set_version(version); - return MessageUtil::getJsonStringFromMessage(header, false /* pretty_print */, - false /* always_print_primitive_fields */); + return MessageUtil::getJsonStringFromMessageOrDie(header, false /* pretty_print */, + false /* always_print_primitive_fields */); } } // namespace diff --git a/source/extensions/tracers/xray/tracer.cc b/source/extensions/tracers/xray/tracer.cc index 782387087400..684f36b8b5ea 100644 --- a/source/extensions/tracers/xray/tracer.cc +++ b/source/extensions/tracers/xray/tracer.cc @@ -92,7 +92,7 @@ void Span::finishSpan() { s.mutable_annotations()->insert({item.first, item.second}); } - const std::string json = MessageUtil::getJsonStringFromMessage( + const std::string json = MessageUtil::getJsonStringFromMessageOrDie( s, false /* pretty_print */, false /* always_print_primitive_fields */); broker_.send(json); diff --git a/source/extensions/tracers/zipkin/span_buffer.cc b/source/extensions/tracers/zipkin/span_buffer.cc index d40071c96182..51b094404a9a 100644 --- a/source/extensions/tracers/zipkin/span_buffer.cc +++ b/source/extensions/tracers/zipkin/span_buffer.cc @@ -80,7 +80,7 @@ std::string JsonV2Serializer::serialize(const std::vector& zipkin_spans) { out, absl::StrJoin( toListOfSpans(zipkin_span, replacements), ",", [&replacement_values](std::string* element, const ProtobufWkt::Struct& span) { - const std::string json = MessageUtil::getJsonStringFromMessage( + const std::string json = MessageUtil::getJsonStringFromMessageOrDie( span, /* pretty_print */ false, /* always_print_primitive_fields */ true); diff --git a/source/extensions/tracers/zipkin/zipkin_core_types.h b/source/extensions/tracers/zipkin/zipkin_core_types.h index 02f302301b52..b87573c8f7bf 100644 --- a/source/extensions/tracers/zipkin/zipkin_core_types.h +++ b/source/extensions/tracers/zipkin/zipkin_core_types.h @@ -50,8 +50,8 @@ class ZipkinBase { const std::string toJson() const { Util::Replacements replacements; return absl::StrReplaceAll( - MessageUtil::getJsonStringFromMessage(toStruct(replacements), /* pretty_print */ false, - /* always_print_primitive_fields */ true), + MessageUtil::getJsonStringFromMessageOrDie(toStruct(replacements), /* pretty_print */ false, + /* always_print_primitive_fields */ true), replacements); }; }; diff --git a/source/server/admin/clusters_handler.cc b/source/server/admin/clusters_handler.cc index 801045e63ecd..94c5f820b3ea 100644 --- a/source/server/admin/clusters_handler.cc +++ b/source/server/admin/clusters_handler.cc @@ -181,7 +181,7 @@ void ClustersHandler::writeClustersAsJson(Buffer::Instance& response) { } } } - response.add(MessageUtil::getJsonStringFromMessage(clusters, true)); // pretty-print + response.add(MessageUtil::getJsonStringFromMessageOrError(clusters, true)); // pretty-print } // TODO(efimki): Add support of text readouts stats. diff --git a/source/server/admin/config_dump_handler.cc b/source/server/admin/config_dump_handler.cc index e255f084757c..766b86bbe9d2 100644 --- a/source/server/admin/config_dump_handler.cc +++ b/source/server/admin/config_dump_handler.cc @@ -139,7 +139,7 @@ Http::Code ConfigDumpHandler::handlerConfigDump(absl::string_view url, MessageUtil::redact(dump); response_headers.setReferenceContentType(Http::Headers::get().ContentTypeValues.Json); - response.add(MessageUtil::getJsonStringFromMessage(dump, true)); // pretty-print + response.add(MessageUtil::getJsonStringFromMessageOrError(dump, true)); // pretty-print return Http::Code::OK; } diff --git a/source/server/admin/init_dump_handler.cc b/source/server/admin/init_dump_handler.cc index 8828b64e34e3..11e40cd4a466 100644 --- a/source/server/admin/init_dump_handler.cc +++ b/source/server/admin/init_dump_handler.cc @@ -29,7 +29,7 @@ Http::Code InitDumpHandler::handlerInitDump(absl::string_view url, MessageUtil::redact(dump); response_headers.setReferenceContentType(Http::Headers::get().ContentTypeValues.Json); - response.add(MessageUtil::getJsonStringFromMessage(dump, true)); // pretty-print + response.add(MessageUtil::getJsonStringFromMessageOrError(dump, true)); // pretty-print return Http::Code::OK; } diff --git a/source/server/admin/listeners_handler.cc b/source/server/admin/listeners_handler.cc index 93407d9eb6cc..510393a17b31 100644 --- a/source/server/admin/listeners_handler.cc +++ b/source/server/admin/listeners_handler.cc @@ -61,7 +61,7 @@ void ListenersHandler::writeListenersAsJson(Buffer::Instance& response) { Network::Utility::addressToProtobufAddress(*listener.get().listenSocketFactory().localAddress(), *listener_status.mutable_local_address()); } - response.add(MessageUtil::getJsonStringFromMessage(listeners, true)); // pretty-print + response.add(MessageUtil::getJsonStringFromMessageOrError(listeners, true)); // pretty-print } void ListenersHandler::writeListenersAsText(Buffer::Instance& response) { diff --git a/source/server/admin/runtime_handler.cc b/source/server/admin/runtime_handler.cc index 5719f4ac730e..9c5e4c74b752 100644 --- a/source/server/admin/runtime_handler.cc +++ b/source/server/admin/runtime_handler.cc @@ -74,7 +74,7 @@ Http::Code RuntimeHandler::handlerRuntime(absl::string_view url, (*fields)["layers"] = ValueUtil::listValue(layer_names); (*fields)["entries"] = ValueUtil::structValue(layer_entries); - response.add(MessageUtil::getJsonStringFromMessage(runtime, true, true)); + response.add(MessageUtil::getJsonStringFromMessageOrDie(runtime, true, true)); return Http::Code::OK; } diff --git a/source/server/admin/server_info_handler.cc b/source/server/admin/server_info_handler.cc index 86669949d288..8f5e35797019 100644 --- a/source/server/admin/server_info_handler.cc +++ b/source/server/admin/server_info_handler.cc @@ -30,7 +30,7 @@ Http::Code ServerInfoHandler::handlerCerts(absl::string_view, *cert_chain = *cert_details; } }); - response.add(MessageUtil::getJsonStringFromMessage(certificates, true, true)); + response.add(MessageUtil::getJsonStringFromMessageOrError(certificates, true, true)); return Http::Code::OK; } @@ -52,7 +52,7 @@ Http::Code ServerInfoHandler::handlerMemory(absl::string_view, memory.set_pageheap_unmapped(Memory::Stats::totalPageHeapUnmapped()); memory.set_pageheap_free(Memory::Stats::totalPageHeapFree()); memory.set_total_physical_bytes(Memory::Stats::totalPhysicalBytes()); - response.add(MessageUtil::getJsonStringFromMessage(memory, true, true)); // pretty-print + response.add(MessageUtil::getJsonStringFromMessageOrError(memory, true, true)); // pretty-print return Http::Code::OK; } @@ -89,7 +89,7 @@ Http::Code ServerInfoHandler::handlerServerInfo(absl::string_view, Http::Respons server_info.mutable_command_line_options(); *command_line_options = *server_.options().toCommandLineOptions(); server_info.mutable_node()->MergeFrom(server_.localInfo().node()); - response.add(MessageUtil::getJsonStringFromMessage(server_info, true, true)); + response.add(MessageUtil::getJsonStringFromMessageOrError(server_info, true, true)); headers.setReferenceContentType(Http::Headers::get().ContentTypeValues.Json); return Http::Code::OK; } diff --git a/source/server/admin/stats_handler.cc b/source/server/admin/stats_handler.cc index a211ef82f812..00837e2e393e 100644 --- a/source/server/admin/stats_handler.cc +++ b/source/server/admin/stats_handler.cc @@ -167,7 +167,7 @@ Http::Code StatsHandler::handlerContention(absl::string_view, mutex_stats.set_num_contentions(server_.mutexTracer()->numContentions()); mutex_stats.set_current_wait_cycles(server_.mutexTracer()->currentWaitCycles()); mutex_stats.set_lifetime_wait_cycles(server_.mutexTracer()->lifetimeWaitCycles()); - response.add(MessageUtil::getJsonStringFromMessage(mutex_stats, true, true)); + response.add(MessageUtil::getJsonStringFromMessageOrError(mutex_stats, true, true)); } else { response.add("Mutex contention tracing is not enabled. To enable, run Envoy with flag " "--enable-mutex-tracing."); @@ -253,7 +253,7 @@ StatsHandler::statsAsJson(const std::map& all_stats, auto* document_fields = document.mutable_fields(); (*document_fields)["stats"] = ValueUtil::listValue(stats_array); - return MessageUtil::getJsonStringFromMessage(document, pretty_print, true); + return MessageUtil::getJsonStringFromMessageOrDie(document, pretty_print, true); } } // namespace Server diff --git a/source/server/listener_manager_impl.cc b/source/server/listener_manager_impl.cc index a60451c6e67d..2804d56a2f0e 100644 --- a/source/server/listener_manager_impl.cc +++ b/source/server/listener_manager_impl.cc @@ -74,7 +74,6 @@ void fillState(envoy::admin::v3::ListenersConfigDump::DynamicListenerState& stat state.mutable_listener()->PackFrom(API_RECOVER_ORIGINAL(listener.config())); TimestampUtil::systemClockToTimestamp(listener.last_updated_, *(state.mutable_last_updated())); } - } // namespace bool ListenSocketCreationParams::operator==(const ListenSocketCreationParams& rhs) const { @@ -95,12 +94,11 @@ std::vector ProdListenerComponentFactory::createNetwor ENVOY_LOG(debug, " filter #{}:", i); ENVOY_LOG(debug, " name: {}", proto_config.name()); ENVOY_LOG(debug, " config: {}", - MessageUtil::getJsonStringFromMessage( + MessageUtil::getJsonStringFromMessageOrError( proto_config.has_typed_config() ? static_cast(proto_config.typed_config()) : static_cast( - proto_config.hidden_envoy_deprecated_config()), - true)); + proto_config.hidden_envoy_deprecated_config()))); // Now see if there is a factory that will accept the config. auto& factory = @@ -129,12 +127,11 @@ ProdListenerComponentFactory::createListenerFilterFactoryList_( ENVOY_LOG(debug, " filter #{}:", i); ENVOY_LOG(debug, " name: {}", proto_config.name()); ENVOY_LOG(debug, " config: {}", - MessageUtil::getJsonStringFromMessage( + MessageUtil::getJsonStringFromMessageOrError( proto_config.has_typed_config() ? static_cast(proto_config.typed_config()) : static_cast( - proto_config.hidden_envoy_deprecated_config()), - true)); + proto_config.hidden_envoy_deprecated_config()))); // Now see if there is a factory that will accept the config. auto& factory = @@ -158,12 +155,11 @@ ProdListenerComponentFactory::createUdpListenerFilterFactoryList_( ENVOY_LOG(debug, " filter #{}:", i); ENVOY_LOG(debug, " name: {}", proto_config.name()); ENVOY_LOG(debug, " config: {}", - MessageUtil::getJsonStringFromMessage( + MessageUtil::getJsonStringFromMessageOrError( proto_config.has_typed_config() ? static_cast(proto_config.typed_config()) : static_cast( - proto_config.hidden_envoy_deprecated_config()), - true)); + proto_config.hidden_envoy_deprecated_config()))); // Now see if there is a factory that will accept the config. auto& factory = diff --git a/test/common/config/filesystem_subscription_impl_test.cc b/test/common/config/filesystem_subscription_impl_test.cc index a60f565eeed4..b73b581b2aa5 100644 --- a/test/common/config/filesystem_subscription_impl_test.cc +++ b/test/common/config/filesystem_subscription_impl_test.cc @@ -176,14 +176,15 @@ version: resource.1 address: 0.0.0.0 port_value: 10000 )EOF"); - const std::string resource = fmt::format(R"EOF( + const std::string resource = + fmt::format(R"EOF( version: system.1 resource: "@type": type.googleapis.com/envoy.config.listener.v3.ListenerCollection entries: - inline_entry: {} )EOF", - MessageUtil::getJsonStringFromMessage(inline_entry)); + MessageUtil::getJsonStringFromMessageOrDie(inline_entry)); DecodedResourcesWrapper decoded_resources; decoded_resources.pushBack(std::make_unique(resource_decoder, inline_entry)); EXPECT_CALL(callbacks_, @@ -204,14 +205,15 @@ version: resource.2 address: 0.0.0.1 port_value: 10001 )EOF"); - const std::string resource_2 = fmt::format(R"EOF( + const std::string resource_2 = + fmt::format(R"EOF( version: system.2 resource: "@type": type.googleapis.com/envoy.config.listener.v3.ListenerCollection entries: - inline_entry: {} )EOF", - MessageUtil::getJsonStringFromMessage(inline_entry_2)); + MessageUtil::getJsonStringFromMessageOrDie(inline_entry_2)); { DecodedResourcesWrapper decoded_resources_2; decoded_resources_2.pushBack( diff --git a/test/common/json/json_fuzz_test.cc b/test/common/json/json_fuzz_test.cc index ba91bd1e8ec7..b85742c2c14f 100644 --- a/test/common/json/json_fuzz_test.cc +++ b/test/common/json/json_fuzz_test.cc @@ -21,7 +21,7 @@ DEFINE_FUZZER(const uint8_t* buf, size_t len) { MessageUtil::loadFromJson(json_string, message); // We should be able to serialize, parse again and get the same result. ProtobufWkt::Struct message2; - MessageUtil::loadFromJson(MessageUtil::getJsonStringFromMessage(message), message2); + MessageUtil::loadFromJson(MessageUtil::getJsonStringFromMessageOrDie(message), message2); FUZZ_ASSERT(TestUtility::protoEqual(message, message2)); // MessageUtil::getYamlStringFromMessage automatically convert types, so we have to do another diff --git a/test/common/protobuf/utility_test.cc b/test/common/protobuf/utility_test.cc index eee8bffdfb23..af3c86e45e7a 100644 --- a/test/common/protobuf/utility_test.cc +++ b/test/common/protobuf/utility_test.cc @@ -37,6 +37,10 @@ using namespace std::chrono_literals; namespace Envoy { +using testing::AllOf; +using testing::HasSubstr; +using testing::Property; + class RuntimeStatsHelper : public TestScopedRuntime { public: RuntimeStatsHelper(bool allow_deprecated_v2_api = false) @@ -250,6 +254,36 @@ TEST_F(ProtobufUtilityTest, DowncastAndValidateUnknownFieldsNested) { "unknown field set {1}) has unknown fields"); } +TEST_F(ProtobufUtilityTest, JsonConvertAnyUnknownMessageType) { + ProtobufWkt::Any source_any; + source_any.set_type_url("type.googleapis.com/bad.type.url"); + source_any.set_value("asdf"); + EXPECT_THAT(MessageUtil::getJsonStringFromMessage(source_any, true).status(), + AllOf(Property(&ProtobufUtil::Status::ok, false), + Property(&ProtobufUtil::Status::ToString, testing::HasSubstr("bad.type.url")))); +} + +TEST_F(ProtobufUtilityTest, JsonConvertKnownGoodMessage) { + ProtobufWkt::Any source_any; + source_any.PackFrom(envoy::config::bootstrap::v3::Bootstrap::default_instance()); + EXPECT_THAT(MessageUtil::getJsonStringFromMessageOrDie(source_any, true), + testing::HasSubstr("@type")); +} + +TEST_F(ProtobufUtilityTest, JsonConvertOrErrorAnyWithUnknownMessageType) { + ProtobufWkt::Any source_any; + source_any.set_type_url("type.googleapis.com/bad.type.url"); + source_any.set_value("asdf"); + EXPECT_THAT(MessageUtil::getJsonStringFromMessageOrError(source_any), HasSubstr("unknown type")); +} + +TEST_F(ProtobufUtilityTest, JsonConvertOrDieAnyWithUnknownMessageType) { + ProtobufWkt::Any source_any; + source_any.set_type_url("type.googleapis.com/bad.type.url"); + source_any.set_value("asdf"); + EXPECT_DEATH(MessageUtil::getJsonStringFromMessageOrDie(source_any), "bad.type.url"); +} + TEST_F(ProtobufUtilityTest, LoadBinaryProtoFromFile) { envoy::config::bootstrap::v3::Bootstrap bootstrap; bootstrap.mutable_cluster_manager() @@ -1418,7 +1452,7 @@ TEST_F(ProtobufUtilityTest, JsonConvertCamelSnake) { ProtobufWkt::Struct json; TestUtility::jsonConvert(bootstrap, json); // Verify we can round-trip. This didn't cause the #3665 regression, but useful as a sanity check. - TestUtility::loadFromJson(MessageUtil::getJsonStringFromMessage(json, false), bootstrap); + TestUtility::loadFromJson(MessageUtil::getJsonStringFromMessageOrDie(json, false), bootstrap); // Verify we don't do a camel case conversion. EXPECT_EQ("foo", json.fields() .at("cluster_manager") @@ -1499,6 +1533,13 @@ flags_path: foo)EOF"; EXPECT_EQ(expected_yaml, "\n" + MessageUtil::getYamlStringFromMessage(bootstrap, true, false)); } +TEST_F(ProtobufUtilityTest, GetYamlStringFromProtoInvalidAny) { + ProtobufWkt::Any source_any; + source_any.set_type_url("type.googleapis.com/bad.type.url"); + source_any.set_value("asdf"); + EXPECT_THROW(MessageUtil::getYamlStringFromMessage(source_any, true), EnvoyException); +} + TEST(DurationUtilTest, OutOfRange) { { ProtobufWkt::Duration duration; diff --git a/test/config/utility.cc b/test/config/utility.cc index 928d9416738e..1430e0e5d115 100644 --- a/test/config/utility.cc +++ b/test/config/utility.cc @@ -1211,7 +1211,7 @@ void ConfigHelper::setLds(absl::string_view version_info) { const std::string lds_filename = bootstrap().dynamic_resources().lds_config().path(); std::string file = TestEnvironment::writeStringToFileForTest( - "new_lds_file", MessageUtil::getJsonStringFromMessage(lds)); + "new_lds_file", MessageUtil::getJsonStringFromMessageOrDie(lds)); TestEnvironment::renameFile(file, lds_filename); } diff --git a/test/config_test/deprecated_configs_test.cc b/test/config_test/deprecated_configs_test.cc index c0c68b9dd1d5..3bbf847f73cc 100644 --- a/test/config_test/deprecated_configs_test.cc +++ b/test/config_test/deprecated_configs_test.cc @@ -192,9 +192,9 @@ TEST(DeprecatedConfigsTest, DEPRECATED_FEATURE_TEST(LoadV2BootstrapJsonDeprecate )EOF"); EXPECT_EQ("{\"node\":{\"build_version\":\"foo\"}}", - MessageUtil::getJsonStringFromMessage(bootstrap, false, false)); + MessageUtil::getJsonStringFromMessageOrDie(bootstrap, false, false)); const std::string filename = TestEnvironment::writeStringToFileForTest( - "proto.json", MessageUtil::getJsonStringFromMessage(bootstrap, false, false)); + "proto.json", MessageUtil::getJsonStringFromMessageOrDie(bootstrap, false, false)); // Loading as previous version should work (after upgrade) API_NO_BOOST(envoy::config::bootstrap::v3::Bootstrap) proto_v2_from_file; @@ -216,7 +216,7 @@ TEST(DeprecatedConfigsTest, DEPRECATED_FEATURE_TEST(LoadV2BootstrapJsonDeprecate )EOF"); EXPECT_EQ("{\"node\":{\"hidden_envoy_deprecated_build_version\":\"foo\"}}", - MessageUtil::getJsonStringFromMessage(bootstrap_v3, false, false)); + MessageUtil::getJsonStringFromMessageOrDie(bootstrap_v3, false, false)); const std::string filename_v3 = TestEnvironment::writeStringToFileForTest( "proto_v3.json", MessageUtil::getYamlStringFromMessage(bootstrap_v3, false, false)); diff --git a/test/extensions/common/tap/common.cc b/test/extensions/common/tap/common.cc index 219910483166..eaae84bffa6b 100644 --- a/test/extensions/common/tap/common.cc +++ b/test/extensions/common/tap/common.cc @@ -10,7 +10,7 @@ namespace tap { namespace v3 { std::ostream& operator<<(std::ostream& os, const TraceWrapper& trace) { - return os << Envoy::MessageUtil::getJsonStringFromMessage(trace, true, false); + return os << Envoy::MessageUtil::getJsonStringFromMessageOrDie(trace, true, false); } } // namespace v3 diff --git a/test/extensions/filters/http/ext_authz/ext_authz_integration_test.cc b/test/extensions/filters/http/ext_authz/ext_authz_integration_test.cc index 13e8c40b378b..4336b54dd38f 100644 --- a/test/extensions/filters/http/ext_authz/ext_authz_integration_test.cc +++ b/test/extensions/filters/http/ext_authz/ext_authz_integration_test.cc @@ -65,7 +65,7 @@ class ExtAuthzGrpcIntegrationTest : public Grpc::VersionedGrpcClientIntegrationP envoy::config::listener::v3::Filter ext_authz_filter; ext_authz_filter.set_name(Extensions::HttpFilters::HttpFilterNames::get().ExtAuthorization); ext_authz_filter.mutable_typed_config()->PackFrom(proto_config_); - config_helper_.addFilter(MessageUtil::getJsonStringFromMessage(ext_authz_filter)); + config_helper_.addFilter(MessageUtil::getJsonStringFromMessageOrDie(ext_authz_filter)); }); } @@ -466,7 +466,7 @@ class ExtAuthzHttpIntegrationTest : public HttpIntegrationTest, ext_authz_filter.set_name(Extensions::HttpFilters::HttpFilterNames::get().ExtAuthorization); ext_authz_filter.mutable_typed_config()->PackFrom(proto_config_); - config_helper_.addFilter(MessageUtil::getJsonStringFromMessage(ext_authz_filter)); + config_helper_.addFilter(MessageUtil::getJsonStringFromMessageOrDie(ext_authz_filter)); }); } @@ -691,7 +691,7 @@ TEST_P(ExtAuthzLocalReplyIntegrationTest, DeniedHeaderTest) { envoy::config::listener::v3::Filter ext_authz_filter; ext_authz_filter.set_name(Extensions::HttpFilters::HttpFilterNames::get().ExtAuthorization); ext_authz_filter.mutable_typed_config()->PackFrom(proto_config); - config_helper_.addFilter(MessageUtil::getJsonStringFromMessage(ext_authz_filter)); + config_helper_.addFilter(MessageUtil::getJsonStringFromMessageOrDie(ext_authz_filter)); }); const std::string local_reply_yaml = R"EOF( diff --git a/test/extensions/filters/http/ext_proc/ext_proc_integration_test.cc b/test/extensions/filters/http/ext_proc/ext_proc_integration_test.cc index 1e91e1762d1b..cbe37df747e8 100644 --- a/test/extensions/filters/http/ext_proc/ext_proc_integration_test.cc +++ b/test/extensions/filters/http/ext_proc/ext_proc_integration_test.cc @@ -63,7 +63,7 @@ class ExtProcIntegrationTest : public HttpIntegrationTest, envoy::config::listener::v3::Filter ext_proc_filter; ext_proc_filter.set_name("envoy.filters.http.ext_proc"); ext_proc_filter.mutable_typed_config()->PackFrom(proto_config_); - config_helper_.addFilter(MessageUtil::getJsonStringFromMessage(ext_proc_filter)); + config_helper_.addFilter(MessageUtil::getJsonStringFromMessageOrDie(ext_proc_filter)); }); setDownstreamProtocol(Http::CodecClient::Type::HTTP2); } diff --git a/test/extensions/filters/http/grpc_json_transcoder/grpc_json_transcoder_integration_test.cc b/test/extensions/filters/http/grpc_json_transcoder/grpc_json_transcoder_integration_test.cc index 892dc30753b3..1ec49d47b490 100644 --- a/test/extensions/filters/http/grpc_json_transcoder/grpc_json_transcoder_integration_test.cc +++ b/test/extensions/filters/http/grpc_json_transcoder/grpc_json_transcoder_integration_test.cc @@ -831,7 +831,7 @@ std::string createLargeJson(int level) { (*next->mutable_struct_value()->mutable_fields())["k"] = val; cur = next; } - return MessageUtil::getJsonStringFromMessage(*cur, false, false); + return MessageUtil::getJsonStringFromMessageOrDie(*cur, false, false); } TEST_P(GrpcJsonTranscoderIntegrationTest, LargeStruct) { diff --git a/test/extensions/filters/http/jwt_authn/filter_integration_test.cc b/test/extensions/filters/http/jwt_authn/filter_integration_test.cc index bf0af7934ab3..1470a5d9da2a 100644 --- a/test/extensions/filters/http/jwt_authn/filter_integration_test.cc +++ b/test/extensions/filters/http/jwt_authn/filter_integration_test.cc @@ -75,7 +75,7 @@ std::string getAuthFilterConfig(const std::string& config_str, bool use_local_jw HttpFilter filter; filter.set_name(HttpFilterNames::get().JwtAuthn); filter.mutable_typed_config()->PackFrom(proto_config); - return MessageUtil::getJsonStringFromMessage(filter); + return MessageUtil::getJsonStringFromMessageOrDie(filter); } std::string getFilterConfig(bool use_local_jwks) { diff --git a/test/extensions/filters/http/oauth2/oauth_integration_test.cc b/test/extensions/filters/http/oauth2/oauth_integration_test.cc index 14ec8dca4bac..0811909f55b3 100644 --- a/test/extensions/filters/http/oauth2/oauth_integration_test.cc +++ b/test/extensions/filters/http/oauth2/oauth_integration_test.cc @@ -134,7 +134,7 @@ TEST_F(OauthIntegrationTest, AuthenticationFlow) { api_->timeSource().systemTime().time_since_epoch() + std::chrono::seconds(10)) .count()); - Buffer::OwnedImpl buffer(MessageUtil::getJsonStringFromMessage(oauth_response)); + Buffer::OwnedImpl buffer(MessageUtil::getJsonStringFromMessageOrDie(oauth_response)); upstream_request_->encodeData(buffer, true); // We should get an immediate redirect back. diff --git a/test/extensions/filters/http/ratelimit/ratelimit_integration_test.cc b/test/extensions/filters/http/ratelimit/ratelimit_integration_test.cc index 1a8e83aa5740..f41460bd6530 100644 --- a/test/extensions/filters/http/ratelimit/ratelimit_integration_test.cc +++ b/test/extensions/filters/http/ratelimit/ratelimit_integration_test.cc @@ -59,7 +59,7 @@ class RatelimitIntegrationTest : public Grpc::VersionedGrpcClientIntegrationPara envoy::config::listener::v3::Filter ratelimit_filter; ratelimit_filter.set_name("envoy.filters.http.ratelimit"); ratelimit_filter.mutable_typed_config()->PackFrom(proto_config_); - config_helper_.addFilter(MessageUtil::getJsonStringFromMessage(ratelimit_filter)); + config_helper_.addFilter(MessageUtil::getJsonStringFromMessageOrDie(ratelimit_filter)); }); config_helper_.addConfigModifier( [](envoy::extensions::filters::network::http_connection_manager::v3::HttpConnectionManager& diff --git a/test/extensions/filters/http/tap/tap_filter_integration_test.cc b/test/extensions/filters/http/tap/tap_filter_integration_test.cc index 6edcaa3f254e..05291efb0c94 100644 --- a/test/extensions/filters/http/tap/tap_filter_integration_test.cc +++ b/test/extensions/filters/http/tap/tap_filter_integration_test.cc @@ -361,8 +361,8 @@ config_id: test_config_id startAdminRequest(admin_request_yaml); - ConfigHelper new_config_helper(version_, *api_, - MessageUtil::getJsonStringFromMessage(config_helper_.bootstrap())); + ConfigHelper new_config_helper( + version_, *api_, MessageUtil::getJsonStringFromMessageOrDie(config_helper_.bootstrap())); new_config_helper.addFilter(admin_filter_config_); new_config_helper.renameListener("foo"); new_config_helper.setLds("1"); diff --git a/test/extensions/filters/network/rocketmq_proxy/topic_route_test.cc b/test/extensions/filters/network/rocketmq_proxy/topic_route_test.cc index a337b89ead69..f6e2f82c985f 100644 --- a/test/extensions/filters/network/rocketmq_proxy/topic_route_test.cc +++ b/test/extensions/filters/network/rocketmq_proxy/topic_route_test.cc @@ -64,7 +64,7 @@ TEST(TopicRouteDataTest, Serialization) { } ProtobufWkt::Struct doc; EXPECT_NO_THROW(topic_route_data.encode(doc)); - MessageUtil::getJsonStringFromMessage(doc); + MessageUtil::getJsonStringFromMessageOrDie(doc); } } // namespace RocketmqProxy diff --git a/test/extensions/tracers/zipkin/span_buffer_test.cc b/test/extensions/tracers/zipkin/span_buffer_test.cc index 210e9df37b25..7cfacdf1419b 100644 --- a/test/extensions/tracers/zipkin/span_buffer_test.cc +++ b/test/extensions/tracers/zipkin/span_buffer_test.cc @@ -407,7 +407,7 @@ TEST(ZipkinSpanBufferTest, TestSerializeTimestampInTheFuture) { (*objectWithScientificNotationFields)["timestamp"] = ValueUtil::numberValue( DEFAULT_TEST_TIMESTAMP); // the value of DEFAULT_TEST_TIMESTAMP is 1584324295476870. const auto objectWithScientificNotationJson = - MessageUtil::getJsonStringFromMessage(objectWithScientificNotation, false, true); + MessageUtil::getJsonStringFromMessageOrDie(objectWithScientificNotation, false, true); // Since we use ValueUtil::numberValue to set the timestamp, we expect to // see the value is rendered with scientific notation (1.58432429547687e+15). EXPECT_EQ(R"({"timestamp":1.58432429547687e+15})", objectWithScientificNotationJson); @@ -417,8 +417,8 @@ TEST(ZipkinSpanBufferTest, TestSerializeTimestampInTheFuture) { Util::Replacements replacements; (*objectFields)["timestamp"] = Util::uint64Value(DEFAULT_TEST_TIMESTAMP, "timestamp", replacements); - const auto objectJson = MessageUtil::getJsonStringFromMessage(object, false, true); - // We still have "1584324295476870" from MessageUtil::getJsonStringFromMessage here. + const auto objectJson = MessageUtil::getJsonStringFromMessageOrDie(object, false, true); + // We still have "1584324295476870" from MessageUtil::getJsonStringFromMessageOrDie here. EXPECT_EQ(R"({"timestamp":"1584324295476870"})", objectJson); // However, then the replacement correctly replaces "1584324295476870" with 1584324295476870 // (without quotes). diff --git a/test/integration/base_integration_test.cc b/test/integration/base_integration_test.cc index cda0a100119d..55b5bb99f66a 100644 --- a/test/integration/base_integration_test.cc +++ b/test/integration/base_integration_test.cc @@ -178,8 +178,8 @@ void BaseIntegrationTest::createEnvoy() { ProtobufWkt::Any* resource = lds.add_resources(); resource->PackFrom(listener); } - TestEnvironment::writeStringToFileForTest(lds_path, MessageUtil::getJsonStringFromMessage(lds), - true); + TestEnvironment::writeStringToFileForTest( + lds_path, MessageUtil::getJsonStringFromMessageOrDie(lds), true); // Now that the listeners have been written to the lds file, remove them from static resources // or they will not be reloadable. diff --git a/test/integration/listener_lds_integration_test.cc b/test/integration/listener_lds_integration_test.cc index cdcf60bf8331..58fe34d43cbd 100644 --- a/test/integration/listener_lds_integration_test.cc +++ b/test/integration/listener_lds_integration_test.cc @@ -148,20 +148,29 @@ class ListenerIntegrationTest : public HttpIntegrationTest, createStream(&lds_upstream_info_, getLdsFakeUpstream(), listener_name_); } - void sendLdsResponse(const std::vector& listener_configs, + void sendLdsResponse(const std::vector& listener_configs, const std::string& version) { API_NO_BOOST(envoy::api::v2::DiscoveryResponse) response; response.set_version_info(version); response.set_type_url(Config::TypeUrl::get().Listener); - for (const auto& listener_blob : listener_configs) { - const auto listener_config = - TestUtility::parseYaml(listener_blob); + for (const auto& listener_config : listener_configs) { response.add_resources()->PackFrom(listener_config); } ASSERT(lds_upstream_info_.stream_by_resource_name_[listener_name_] != nullptr); lds_upstream_info_.stream_by_resource_name_[listener_name_]->sendGrpcMessage(response); } + void sendLdsResponse(const std::vector& listener_configs, + const std::string& version) { + std::vector proto_configs; + proto_configs.reserve(listener_configs.size()); + for (const auto& listener_blob : listener_configs) { + proto_configs.emplace_back( + TestUtility::parseYaml(listener_blob)); + } + sendLdsResponse(proto_configs, version); + } + void sendRdsResponse(const std::string& route_config, const std::string& version) { API_NO_BOOST(envoy::api::v2::DiscoveryResponse) response; response.set_version_info(version); @@ -183,6 +192,32 @@ class ListenerIntegrationTest : public HttpIntegrationTest, INSTANTIATE_TEST_SUITE_P(IpVersionsAndGrpcTypes, ListenerIntegrationTest, GRPC_CLIENT_INTEGRATION_PARAMS); +// Tests that an update with an unknown filter config proto is rejected. +TEST_P(ListenerIntegrationTest, CleanlyRejectsUnknownFilterConfigProto) { + on_server_init_function_ = [&]() { + createLdsStream(); + envoy::config::listener::v3::Listener listener = + TestUtility::parseYaml(R"EOF( + name: fake_listener + address: + socket_address: + address: "::" + port_value: 4242 + filter_chains: + - filters: + - name: "filter_name" + )EOF"); + auto* typed_config = + listener.mutable_filter_chains(0)->mutable_filters(0)->mutable_typed_config(); + typed_config->set_type_url("type.googleapis.com/unknown.type.url"); + typed_config->set_value("non-empty config contents"); + sendLdsResponse({listener}, "1"); + }; + initialize(); + registerTestServerPorts({listener_name_}); + test_server_->waitForCounterGe("listener_manager.lds.update_rejected", 1); +} + // Tests that a LDS deletion before Server initManager been initialized will not block the Server // from starting. TEST_P(ListenerIntegrationTest, RemoveLastUninitializedListener) { @@ -200,7 +235,7 @@ TEST_P(ListenerIntegrationTest, RemoveLastUninitializedListener) { EXPECT_EQ(test_server_->server().listenerManager().listeners().size(), 1); // This actually deletes the only listener. - sendLdsResponse({}, "2"); + sendLdsResponse(std::vector{}, "2"); test_server_->waitForCounterGe("listener_manager.lds.update_success", 2); EXPECT_EQ(test_server_->server().listenerManager().listeners().size(), 0); // Server instance is ready now because the listener's destruction marked the listener diff --git a/test/integration/xds_integration_test.cc b/test/integration/xds_integration_test.cc index 5d62e6521d12..ecd01d1d7da7 100644 --- a/test/integration/xds_integration_test.cc +++ b/test/integration/xds_integration_test.cc @@ -187,8 +187,8 @@ TEST_P(LdsInplaceUpdateTcpProxyIntegrationTest, ReloadConfigDeletingFilterChain) FakeRawConnectionPtr fake_upstream_connection_1; ASSERT_TRUE(fake_upstreams_[1]->waitForRawConnection(fake_upstream_connection_1)); - ConfigHelper new_config_helper(version_, *api_, - MessageUtil::getJsonStringFromMessage(config_helper_.bootstrap())); + ConfigHelper new_config_helper( + version_, *api_, MessageUtil::getJsonStringFromMessageOrDie(config_helper_.bootstrap())); new_config_helper.addConfigModifier( [&](envoy::config::bootstrap::v3::Bootstrap& bootstrap) -> void { auto* listener = bootstrap.mutable_static_resources()->mutable_listeners(0); @@ -229,8 +229,8 @@ TEST_P(LdsInplaceUpdateTcpProxyIntegrationTest, ReloadConfigAddingFilterChain) { FakeRawConnectionPtr fake_upstream_connection_0; ASSERT_TRUE(fake_upstreams_[0]->waitForRawConnection(fake_upstream_connection_0)); - ConfigHelper new_config_helper(version_, *api_, - MessageUtil::getJsonStringFromMessage(config_helper_.bootstrap())); + ConfigHelper new_config_helper( + version_, *api_, MessageUtil::getJsonStringFromMessageOrDie(config_helper_.bootstrap())); new_config_helper.addConfigModifier( [&](envoy::config::bootstrap::v3::Bootstrap& bootstrap) -> void { auto* listener = bootstrap.mutable_static_resources()->mutable_listeners(0); @@ -398,8 +398,8 @@ TEST_P(LdsInplaceUpdateHttpIntegrationTest, ReloadConfigDeletingFilterChain) { c0->close(); c_default->close(); }); - ConfigHelper new_config_helper(version_, *api_, - MessageUtil::getJsonStringFromMessage(config_helper_.bootstrap())); + ConfigHelper new_config_helper( + version_, *api_, MessageUtil::getJsonStringFromMessageOrDie(config_helper_.bootstrap())); new_config_helper.addConfigModifier( [&](envoy::config::bootstrap::v3::Bootstrap& bootstrap) -> void { auto* listener = bootstrap.mutable_static_resources()->mutable_listeners(0); @@ -427,8 +427,8 @@ TEST_P(LdsInplaceUpdateHttpIntegrationTest, ReloadConfigAddingFilterChain) { auto codec_client_0 = createHttpCodec("alpn0"); Cleanup cleanup0([c0 = codec_client_0.get()]() { c0->close(); }); - ConfigHelper new_config_helper(version_, *api_, - MessageUtil::getJsonStringFromMessage(config_helper_.bootstrap())); + ConfigHelper new_config_helper( + version_, *api_, MessageUtil::getJsonStringFromMessageOrDie(config_helper_.bootstrap())); new_config_helper.addConfigModifier([&](envoy::config::bootstrap::v3::Bootstrap& bootstrap) -> void { auto* listener = bootstrap.mutable_static_resources()->mutable_listeners(0); @@ -465,8 +465,8 @@ TEST_P(LdsInplaceUpdateHttpIntegrationTest, ReloadConfigUpdatingDefaultFilterCha auto codec_client_default = createHttpCodec("alpndefault"); Cleanup cleanup0([c_default = codec_client_default.get()]() { c_default->close(); }); - ConfigHelper new_config_helper(version_, *api_, - MessageUtil::getJsonStringFromMessage(config_helper_.bootstrap())); + ConfigHelper new_config_helper( + version_, *api_, MessageUtil::getJsonStringFromMessageOrDie(config_helper_.bootstrap())); new_config_helper.addConfigModifier([&](envoy::config::bootstrap::v3::Bootstrap& bootstrap) -> void { auto default_filter_chain = @@ -493,8 +493,8 @@ TEST_P(LdsInplaceUpdateHttpIntegrationTest, OverlappingFilterChainServesNewConne auto codec_client_0 = createHttpCodec("alpn0"); Cleanup cleanup([c0 = codec_client_0.get()]() { c0->close(); }); - ConfigHelper new_config_helper(version_, *api_, - MessageUtil::getJsonStringFromMessage(config_helper_.bootstrap())); + ConfigHelper new_config_helper( + version_, *api_, MessageUtil::getJsonStringFromMessageOrDie(config_helper_.bootstrap())); new_config_helper.addConfigModifier( [&](envoy::config::bootstrap::v3::Bootstrap& bootstrap) -> void { auto* listener = bootstrap.mutable_static_resources()->mutable_listeners(0); @@ -538,8 +538,8 @@ TEST_P(LdsIntegrationTest, ReloadConfig) { EXPECT_TRUE(response.find("HTTP/1.1 426 Upgrade Required\r\n") == 0); // Create a new config with HTTP/1.0 proxying. - ConfigHelper new_config_helper(version_, *api_, - MessageUtil::getJsonStringFromMessage(config_helper_.bootstrap())); + ConfigHelper new_config_helper( + version_, *api_, MessageUtil::getJsonStringFromMessageOrDie(config_helper_.bootstrap())); new_config_helper.addConfigModifier( [&](envoy::extensions::filters::network::http_connection_manager::v3::HttpConnectionManager& hcm) { diff --git a/test/tools/router_check/router.cc b/test/tools/router_check/router.cc index d468b338805e..73a5d1a0d633 100644 --- a/test/tools/router_check/router.cc +++ b/test/tools/router_check/router.cc @@ -203,7 +203,7 @@ RouterCheckTool::RouterCheckTool( Json::ObjectSharedPtr loadFromFile(const std::string& file_path, Api::Api& api) { std::string contents = api.fileSystem().fileReadToEnd(file_path); if (absl::EndsWith(file_path, ".yaml")) { - contents = MessageUtil::getJsonStringFromMessage(ValueUtil::loadFromYaml(contents)); + contents = MessageUtil::getJsonStringFromMessageOrDie(ValueUtil::loadFromYaml(contents)); } return Json::Factory::loadFromString(contents); } From 032a8e75928304e011833babea2ca231f32866dd Mon Sep 17 00:00:00 2001 From: Rei Shimizu Date: Tue, 2 Feb 2021 03:59:05 +0900 Subject: [PATCH 2/4] tracing: tracer integration with Apache SkyWalking SDK (#14493) Fix #13799. This means integration with external Apache SkyWalking SDK for C++ and destroyed redundant parts of implementations. Hired library is https://github.com/skyAPM/cpp2sky. But it is not released and some of missing parts of implementations which are necessary to achieve clean integration. Currently working private fork of that SDK here. https://github.com/Shikugawa/cpp2sky-envoy. Additional Description: cpp2sky dependency policy check list ### MUST - [x] Cloud Native Computing Foundation (CNCF) approved license - [x] Dependencies must not substantially increase the binary size unless they are optional (i.e. confined to specific extensions) - [x] No duplication of existing dependencies Maybe, the dependencies are described [here](https://github.com/SkyAPM/cpp2sky/blob/main/bazel/repositories.bzl) - [x] Hosted on a git repository and the archive fetch must directly reference this repository. We will NOT support intermediate artifacts built by-hand located on GCS, S3, etc. - [x] CVE history appears reasonable, no pathological CVE arcs - [x] Code review (ideally PRs) before merge - [x] Security vulnerability process exists, with contact details and reporting/disclosure process - [x] Tests run in CI ### SHOULD - [ ] > 1 contributor responsible for a non-trivial number of commits - [ ] High test coverage (also static/dynamic analysis, fuzzing) Parts which is used in Envoy have higher test coverage. But gRPC async segment reporter(which is not used in Envoy) has less test. - [ ] Envoy can obtain advanced notification of vulnerabilities or of security releases - [ ] Do other significant projects have shared fate by using this dependency? - [x] Releases (with release notes) - [x] Commits/releases in last 90 days Risk Level: Mid Testing: Unit/Integration Docs Changes: Release Notes: Platform Specific Features: Signed-off-by: Shikugawa --- CODEOWNERS | 2 +- api/bazel/repositories.bzl | 31 -- api/bazel/repository_locations.bzl | 11 - bazel/repositories.bzl | 13 + bazel/repository_locations.bzl | 27 ++ generated_api_shadow/bazel/repositories.bzl | 31 -- .../bazel/repository_locations.bzl | 11 - source/common/router/config_impl.h | 1 - source/extensions/tracers/skywalking/BUILD | 41 +-- .../skywalking/skywalking_client_config.cc | 43 --- .../skywalking/skywalking_client_config.h | 43 --- .../skywalking/skywalking_tracer_impl.cc | 84 +++-- .../skywalking/skywalking_tracer_impl.h | 26 +- .../tracers/skywalking/skywalking_types.cc | 175 --------- .../tracers/skywalking/skywalking_types.h | 313 ---------------- .../skywalking/trace_segment_reporter.cc | 96 +---- .../skywalking/trace_segment_reporter.h | 35 +- .../extensions/tracers/skywalking/tracer.cc | 93 +++-- source/extensions/tracers/skywalking/tracer.h | 115 +++--- test/extensions/tracers/skywalking/BUILD | 29 +- .../skywalking_client_config_test.cc | 100 ----- .../skywalking/skywalking_test_helper.h | 94 +++-- .../skywalking/skywalking_tracer_impl_test.cc | 129 +++---- .../skywalking/skywalking_types_test.cc | 343 ------------------ .../skywalking/trace_segment_reporter_test.cc | 79 ++-- .../tracers/skywalking/tracer_test.cc | 170 +++++---- test/per_file_coverage.sh | 2 +- 27 files changed, 497 insertions(+), 1640 deletions(-) delete mode 100644 source/extensions/tracers/skywalking/skywalking_client_config.cc delete mode 100644 source/extensions/tracers/skywalking/skywalking_client_config.h delete mode 100644 source/extensions/tracers/skywalking/skywalking_types.cc delete mode 100644 source/extensions/tracers/skywalking/skywalking_types.h delete mode 100644 test/extensions/tracers/skywalking/skywalking_client_config_test.cc delete mode 100644 test/extensions/tracers/skywalking/skywalking_types_test.cc diff --git a/CODEOWNERS b/CODEOWNERS index 7f684161d704..2469866d4162 100644 --- a/CODEOWNERS +++ b/CODEOWNERS @@ -55,7 +55,7 @@ extensions/filters/common/original_src @snowp @klarose # tracers.xray extension /*/extensions/tracers/xray @marcomagdy @lavignes @mattklein123 # tracers.skywalking extension -/*/extensions/tracers/skywalking @wbpcode @dio @lizan +/*/extensions/tracers/skywalking @wbpcode @dio @lizan @Shikugawa # mysql_proxy extension /*/extensions/filters/network/mysql_proxy @rshriram @venilnoronha @mattklein123 # postgres_proxy extension diff --git a/api/bazel/repositories.bzl b/api/bazel/repositories.bzl index 983f15967b28..a12a0ea98b3a 100644 --- a/api/bazel/repositories.bzl +++ b/api/bazel/repositories.bzl @@ -40,10 +40,6 @@ def api_dependencies(): name = "com_github_openzipkin_zipkinapi", build_file_content = ZIPKINAPI_BUILD_CONTENT, ) - external_http_archive( - name = "com_github_apache_skywalking_data_collect_protocol", - build_file_content = SKYWALKING_DATA_COLLECT_PROTOCOL_BUILD_CONTENT, - ) PROMETHEUSMETRICS_BUILD_CONTENT = """ load("@envoy_api//bazel:api_build_system.bzl", "api_cc_py_proto_library") @@ -105,30 +101,3 @@ go_proto_library( visibility = ["//visibility:public"], ) """ - -SKYWALKING_DATA_COLLECT_PROTOCOL_BUILD_CONTENT = """ -load("@rules_proto//proto:defs.bzl", "proto_library") -load("@rules_cc//cc:defs.bzl", "cc_proto_library") -load("@io_bazel_rules_go//proto:def.bzl", "go_proto_library") - -proto_library( - name = "protocol", - srcs = [ - "common/Common.proto", - "language-agent/Tracing.proto", - ], - visibility = ["//visibility:public"], -) - -cc_proto_library( - name = "protocol_cc_proto", - deps = [":protocol"], - visibility = ["//visibility:public"], -) - -go_proto_library( - name = "protocol_go_proto", - proto = ":protocol", - visibility = ["//visibility:public"], -) -""" diff --git a/api/bazel/repository_locations.bzl b/api/bazel/repository_locations.bzl index d15978e7dd6c..f75bfdf651f0 100644 --- a/api/bazel/repository_locations.bzl +++ b/api/bazel/repository_locations.bzl @@ -96,15 +96,4 @@ REPOSITORY_LOCATIONS_SPEC = dict( release_date = "2020-08-17", use_category = ["api"], ), - com_github_apache_skywalking_data_collect_protocol = dict( - project_name = "SkyWalking API", - project_desc = "SkyWalking's language independent model and gRPC API Definitions", - project_url = "https://github.com/apache/skywalking-data-collect-protocol", - version = "8.1.0", - sha256 = "ebea8a6968722524d1bcc4426fb6a29907ddc2902aac7de1559012d3eee90cf9", - strip_prefix = "skywalking-data-collect-protocol-{version}", - urls = ["https://github.com/apache/skywalking-data-collect-protocol/archive/v{version}.tar.gz"], - release_date = "2020-07-29", - use_category = ["api"], - ), ) diff --git a/bazel/repositories.bzl b/bazel/repositories.bzl index fbee3008700c..7f8f9fff3219 100644 --- a/bazel/repositories.bzl +++ b/bazel/repositories.bzl @@ -142,6 +142,7 @@ def envoy_dependencies(skip_targets = []): _com_github_luajit_luajit() _com_github_moonjit_moonjit() _com_github_nghttp2_nghttp2() + _com_github_skyapm_cpp2sky() _com_github_nodejs_http_parser() _com_github_tencent_rapidjson() _com_google_absl() @@ -418,6 +419,18 @@ def _com_github_datadog_dd_opentracing_cpp(): actual = "@com_github_datadog_dd_opentracing_cpp//:dd_opentracing_cpp", ) +def _com_github_skyapm_cpp2sky(): + external_http_archive( + name = "com_github_skyapm_cpp2sky", + ) + external_http_archive( + name = "skywalking_data_collect_protocol", + ) + native.bind( + name = "cpp2sky", + actual = "@com_github_skyapm_cpp2sky//source:cpp2sky_data_lib", + ) + def _com_github_tencent_rapidjson(): external_http_archive( name = "com_github_tencent_rapidjson", diff --git a/bazel/repository_locations.bzl b/bazel/repository_locations.bzl index b19b24a48c6b..6f7a51cb21ab 100644 --- a/bazel/repository_locations.bzl +++ b/bazel/repository_locations.bzl @@ -301,6 +301,33 @@ REPOSITORY_LOCATIONS_SPEC = dict( release_date = "2020-08-25", cpe = "N/A", ), + skywalking_data_collect_protocol = dict( + project_name = "skywalking-data-collect-protocol", + project_desc = "Data Collect Protocols of Apache SkyWalking", + project_url = "https://github.com/apache/skywalking-data-collect-protocol", + name = "skywalking_data_collect_protocol", + sha256 = "fa9ac679624217f30b6e8d5c450365386c610e2d08188a20f0340c3b14401833", + urls = ["https://github.com/apache/skywalking-data-collect-protocol/archive/v8.3.0.zip"], + strip_prefix = "skywalking-data-collect-protocol-8.3.0", + version = "8.3.0", + use_category = ["observability_ext"], + extensions = ["envoy.tracers.skywalking"], + release_date = "2020-11-20", + cpe = "N/A", + ), + com_github_skyapm_cpp2sky = dict( + project_name = "cpp2sky", + project_desc = "C++ SDK for Apache SkyWalking", + project_url = "https://github.com/SkyAPM/cpp2sky", + sha256 = "a8d870bb4b1c4a05eae319f689d1948927f3f0a5b5fe524db73a4c04121a339a", + version = "0.1.1", + strip_prefix = "cpp2sky-{version}", + urls = ["https://github.com/SkyAPM/cpp2sky/archive/v{version}.tar.gz"], + use_category = ["observability_ext"], + extensions = ["envoy.tracers.skywalking"], + release_date = "2021-01-15", + cpe = "N/A", + ), com_github_datadog_dd_opentracing_cpp = dict( project_name = "Datadog OpenTracing C++ Client", project_desc = "Datadog OpenTracing C++ Client", diff --git a/generated_api_shadow/bazel/repositories.bzl b/generated_api_shadow/bazel/repositories.bzl index 983f15967b28..a12a0ea98b3a 100644 --- a/generated_api_shadow/bazel/repositories.bzl +++ b/generated_api_shadow/bazel/repositories.bzl @@ -40,10 +40,6 @@ def api_dependencies(): name = "com_github_openzipkin_zipkinapi", build_file_content = ZIPKINAPI_BUILD_CONTENT, ) - external_http_archive( - name = "com_github_apache_skywalking_data_collect_protocol", - build_file_content = SKYWALKING_DATA_COLLECT_PROTOCOL_BUILD_CONTENT, - ) PROMETHEUSMETRICS_BUILD_CONTENT = """ load("@envoy_api//bazel:api_build_system.bzl", "api_cc_py_proto_library") @@ -105,30 +101,3 @@ go_proto_library( visibility = ["//visibility:public"], ) """ - -SKYWALKING_DATA_COLLECT_PROTOCOL_BUILD_CONTENT = """ -load("@rules_proto//proto:defs.bzl", "proto_library") -load("@rules_cc//cc:defs.bzl", "cc_proto_library") -load("@io_bazel_rules_go//proto:def.bzl", "go_proto_library") - -proto_library( - name = "protocol", - srcs = [ - "common/Common.proto", - "language-agent/Tracing.proto", - ], - visibility = ["//visibility:public"], -) - -cc_proto_library( - name = "protocol_cc_proto", - deps = [":protocol"], - visibility = ["//visibility:public"], -) - -go_proto_library( - name = "protocol_go_proto", - proto = ":protocol", - visibility = ["//visibility:public"], -) -""" diff --git a/generated_api_shadow/bazel/repository_locations.bzl b/generated_api_shadow/bazel/repository_locations.bzl index d15978e7dd6c..f75bfdf651f0 100644 --- a/generated_api_shadow/bazel/repository_locations.bzl +++ b/generated_api_shadow/bazel/repository_locations.bzl @@ -96,15 +96,4 @@ REPOSITORY_LOCATIONS_SPEC = dict( release_date = "2020-08-17", use_category = ["api"], ), - com_github_apache_skywalking_data_collect_protocol = dict( - project_name = "SkyWalking API", - project_desc = "SkyWalking's language independent model and gRPC API Definitions", - project_url = "https://github.com/apache/skywalking-data-collect-protocol", - version = "8.1.0", - sha256 = "ebea8a6968722524d1bcc4426fb6a29907ddc2902aac7de1559012d3eee90cf9", - strip_prefix = "skywalking-data-collect-protocol-{version}", - urls = ["https://github.com/apache/skywalking-data-collect-protocol/archive/v{version}.tar.gz"], - release_date = "2020-07-29", - use_category = ["api"], - ), ) diff --git a/source/common/router/config_impl.h b/source/common/router/config_impl.h index 95215ea9bdc1..ff635c62ae91 100644 --- a/source/common/router/config_impl.h +++ b/source/common/router/config_impl.h @@ -220,7 +220,6 @@ class VirtualHostImpl : public VirtualHost { struct VirtualClusterEntry : public StatNameProvider, public VirtualClusterBase { VirtualClusterEntry(const envoy::config::route::v3::VirtualCluster& virtual_cluster, Stats::Scope& scope, const VirtualClusterStatNames& stat_names); - std::vector headers_; }; diff --git a/source/extensions/tracers/skywalking/BUILD b/source/extensions/tracers/skywalking/BUILD index 5cf90c3f976f..158731e9c0ce 100644 --- a/source/extensions/tracers/skywalking/BUILD +++ b/source/extensions/tracers/skywalking/BUILD @@ -15,44 +15,14 @@ envoy_cc_library( name = "trace_segment_reporter_lib", srcs = ["trace_segment_reporter.cc"], hdrs = ["trace_segment_reporter.h"], + external_deps = [ + "cpp2sky", + ], deps = [ - ":skywalking_client_config_lib", ":skywalking_stats_lib", - ":skywalking_types_lib", "//include/envoy/grpc:async_client_manager_interface", "//source/common/common:backoff_lib", "//source/common/grpc:async_client_lib", - "@com_github_apache_skywalking_data_collect_protocol//:protocol_cc_proto", - "@envoy_api//envoy/config/trace/v3:pkg_cc_proto", - ], -) - -envoy_cc_library( - name = "skywalking_types_lib", - srcs = ["skywalking_types.cc"], - hdrs = ["skywalking_types.h"], - deps = [ - ":skywalking_stats_lib", - "//include/envoy/common:random_generator_interface", - "//include/envoy/common:time_interface", - "//include/envoy/http:header_map_interface", - "//include/envoy/tracing:http_tracer_interface", - "//source/common/common:base64_lib", - "//source/common/common:hex_lib", - "//source/common/common:utility_lib", - "@com_github_apache_skywalking_data_collect_protocol//:protocol_cc_proto", - ], -) - -envoy_cc_library( - name = "skywalking_client_config_lib", - srcs = ["skywalking_client_config.cc"], - hdrs = ["skywalking_client_config.h"], - deps = [ - "//include/envoy/secret:secret_provider_interface", - "//include/envoy/server:factory_context_interface", - "//include/envoy/server:tracer_config_interface", - "//source/common/config:datasource_lib", "@envoy_api//envoy/config/trace/v3:pkg_cc_proto", ], ) @@ -67,9 +37,10 @@ envoy_cc_library( "skywalking_tracer_impl.h", "tracer.h", ], + external_deps = [ + "cpp2sky", + ], deps = [ - ":skywalking_client_config_lib", - ":skywalking_types_lib", ":trace_segment_reporter_lib", "//include/envoy/common:time_interface", "//include/envoy/server:tracer_config_interface", diff --git a/source/extensions/tracers/skywalking/skywalking_client_config.cc b/source/extensions/tracers/skywalking/skywalking_client_config.cc deleted file mode 100644 index ed692b3f72bd..000000000000 --- a/source/extensions/tracers/skywalking/skywalking_client_config.cc +++ /dev/null @@ -1,43 +0,0 @@ -#include "extensions/tracers/skywalking/skywalking_client_config.h" - -#include "common/config/datasource.h" - -namespace Envoy { -namespace Extensions { -namespace Tracers { -namespace SkyWalking { - -constexpr uint32_t DEFAULT_DELAYED_SEGMENTS_CACHE_SIZE = 1024; - -// When the user does not provide any available configuration, in order to ensure that the service -// name and instance name are not empty, use this value as the default identifier. In practice, -// user should provide accurate configuration as much as possible to avoid using the default value. -constexpr absl::string_view DEFAULT_SERVICE_AND_INSTANCE = "EnvoyProxy"; - -SkyWalkingClientConfig::SkyWalkingClientConfig(Server::Configuration::TracerFactoryContext& context, - const envoy::config::trace::v3::ClientConfig& config) - : factory_context_(context.serverFactoryContext()), - max_cache_size_(PROTOBUF_GET_WRAPPED_OR_DEFAULT(config, max_cache_size, - DEFAULT_DELAYED_SEGMENTS_CACHE_SIZE)), - service_(config.service_name().empty() ? factory_context_.localInfo().clusterName().empty() - ? DEFAULT_SERVICE_AND_INSTANCE - : factory_context_.localInfo().clusterName() - : config.service_name()), - instance_(config.instance_name().empty() ? factory_context_.localInfo().nodeName().empty() - ? DEFAULT_SERVICE_AND_INSTANCE - : factory_context_.localInfo().nodeName() - : config.instance_name()) { - // Since the SDS API to get backend token is not supported yet, we can get the value of token - // from the backend_token field directly. If the user does not provide the configuration, the - // value of token is kept empty. - backend_token_ = config.backend_token(); -} - -// TODO(wbpcode): currently, backend authentication token can only be configured with inline string. -// It will be possible to get authentication through the SDS API later. -const std::string& SkyWalkingClientConfig::backendToken() const { return backend_token_; } - -} // namespace SkyWalking -} // namespace Tracers -} // namespace Extensions -} // namespace Envoy diff --git a/source/extensions/tracers/skywalking/skywalking_client_config.h b/source/extensions/tracers/skywalking/skywalking_client_config.h deleted file mode 100644 index 8983a1dbf758..000000000000 --- a/source/extensions/tracers/skywalking/skywalking_client_config.h +++ /dev/null @@ -1,43 +0,0 @@ -#pragma once - -#include "envoy/config/trace/v3/skywalking.pb.h" -#include "envoy/secret/secret_provider.h" -#include "envoy/server/factory_context.h" -#include "envoy/server/tracer_config.h" - -#include "absl/synchronization/mutex.h" - -namespace Envoy { -namespace Extensions { -namespace Tracers { -namespace SkyWalking { - -class SkyWalkingClientConfig { -public: - SkyWalkingClientConfig(Server::Configuration::TracerFactoryContext& context, - const envoy::config::trace::v3::ClientConfig& config); - - uint32_t maxCacheSize() const { return max_cache_size_; } - - const std::string& service() const { return service_; } - const std::string& serviceInstance() const { return instance_; } - - const std::string& backendToken() const; - -private: - Server::Configuration::ServerFactoryContext& factory_context_; - - const uint32_t max_cache_size_{0}; - - const std::string service_; - const std::string instance_; - - std::string backend_token_; -}; - -using SkyWalkingClientConfigPtr = std::unique_ptr; - -} // namespace SkyWalking -} // namespace Tracers -} // namespace Extensions -} // namespace Envoy diff --git a/source/extensions/tracers/skywalking/skywalking_tracer_impl.cc b/source/extensions/tracers/skywalking/skywalking_tracer_impl.cc index 130cbd6a9801..ce07ab127bd6 100644 --- a/source/extensions/tracers/skywalking/skywalking_tracer_impl.cc +++ b/source/extensions/tracers/skywalking/skywalking_tracer_impl.cc @@ -6,29 +6,40 @@ #include "common/common/utility.h" #include "common/http/path_utility.h" -#include "extensions/tracers/skywalking/skywalking_types.h" +#include "cpp2sky/propagation.h" namespace Envoy { namespace Extensions { namespace Tracers { namespace SkyWalking { +namespace { +constexpr uint32_t DEFAULT_DELAYED_SEGMENTS_CACHE_SIZE = 1024; + +// When the user does not provide any available configuration, in order to ensure that the service +// name and instance name are not empty, use this value as the default identifier. In practice, +// user should provide accurate configuration as much as possible to avoid using the default value. +constexpr absl::string_view DEFAULT_SERVICE_AND_INSTANCE = "EnvoyProxy"; +} // namespace + +using cpp2sky::createSpanContext; +using cpp2sky::SpanContextPtr; +using cpp2sky::TracerException; + Driver::Driver(const envoy::config::trace::v3::SkyWalkingConfig& proto_config, Server::Configuration::TracerFactoryContext& context) : tracing_stats_{SKYWALKING_TRACER_STATS( POOL_COUNTER_PREFIX(context.serverFactoryContext().scope(), "tracing.skywalking."))}, - client_config_( - std::make_unique(context, proto_config.client_config())), - random_generator_(context.serverFactoryContext().api().randomGenerator()), tls_slot_ptr_(context.serverFactoryContext().threadLocal().allocateSlot()) { - + loadConfig(proto_config.client_config(), context.serverFactoryContext()); + segment_context_factory_ = createSegmentContextFactory(config_); auto& factory_context = context.serverFactoryContext(); tls_slot_ptr_->set([proto_config, &factory_context, this](Event::Dispatcher& dispatcher) { - TracerPtr tracer = std::make_unique(factory_context.timeSource()); - tracer->setReporter(std::make_unique( + TracerPtr tracer = std::make_unique(std::make_unique( factory_context.clusterManager().grpcAsyncClientManager().factoryForGrpcService( proto_config.grpc_service(), factory_context.scope(), false), - dispatcher, factory_context.api().randomGenerator(), tracing_stats_, *client_config_)); + dispatcher, factory_context.api().randomGenerator(), tracing_stats_, + config_.delayed_buffer_size(), config_.token())); return std::make_shared(std::move(tracer)); }); } @@ -37,24 +48,53 @@ Tracing::SpanPtr Driver::startSpan(const Tracing::Config& config, Http::RequestHeaderMap& request_headers, const std::string& operation_name, Envoy::SystemTime start_time, const Tracing::Decision decision) { - auto& tracer = *tls_slot_ptr_->getTyped().tracer_; + auto& tracer = tls_slot_ptr_->getTyped().tracer(); + SegmentContextPtr segment_context; + // TODO(shikugawa): support extension span header. + auto propagation_header = request_headers.get(skywalkingPropagationHeaderKey()); + if (propagation_header.empty()) { + segment_context = segment_context_factory_->create(); + // Sampling status is always true on SkyWalking. But with disabling skip_analysis, + // this span can't be analyzed. + if (!decision.traced) { + segment_context->setSkipAnalysis(); + } + } else { + auto header_value_string = propagation_header[0]->value().getStringView(); + try { + SpanContextPtr span_context = createSpanContext(header_value_string); + segment_context = segment_context_factory_->create(span_context); + } catch (TracerException& e) { + ENVOY_LOG(warn, "New SkyWalking Span/Segment cannot be created for error: {}", e.what()); + return std::make_unique(); + } + } - try { - SpanContextPtr previous_span_context = SpanContext::spanContextFromRequest(request_headers); - auto segment_context = std::make_shared(std::move(previous_span_context), - decision, random_generator_); + return tracer.startSpan(config, start_time, operation_name, segment_context, nullptr); +} - // Initialize fields of current span context. - segment_context->setService(client_config_->service()); - segment_context->setServiceInstance(client_config_->serviceInstance()); +void Driver::loadConfig(const envoy::config::trace::v3::ClientConfig& client_config, + Server::Configuration::ServerFactoryContext& server_factory_context) { + config_.set_service_name(!client_config.service_name().empty() + ? client_config.service_name() + : (!server_factory_context.localInfo().clusterName().empty() + ? server_factory_context.localInfo().clusterName() + : DEFAULT_SERVICE_AND_INSTANCE.data())); + config_.set_instance_name(!client_config.instance_name().empty() + ? client_config.service_name() + : (!server_factory_context.localInfo().nodeName().empty() + ? server_factory_context.localInfo().nodeName() + : DEFAULT_SERVICE_AND_INSTANCE.data())); + config_.set_token(client_config.backend_token()); + config_.set_delayed_buffer_size(PROTOBUF_GET_WRAPPED_OR_DEFAULT( + client_config, max_cache_size, DEFAULT_DELAYED_SEGMENTS_CACHE_SIZE)); +} - return tracer.startSpan(config, start_time, operation_name, std::move(segment_context), - nullptr); +Driver::TlsTracer::TlsTracer(TracerPtr tracer) : tracer_(std::move(tracer)) {} - } catch (const EnvoyException& e) { - ENVOY_LOG(warn, "New SkyWalking Span/Segment cannot be created for error: {}", e.what()); - return std::make_unique(); - } +Tracer& Driver::TlsTracer::tracer() { + ASSERT(tracer_); + return *tracer_; } } // namespace SkyWalking diff --git a/source/extensions/tracers/skywalking/skywalking_tracer_impl.h b/source/extensions/tracers/skywalking/skywalking_tracer_impl.h index f073461deb5d..e6639b5ad36c 100644 --- a/source/extensions/tracers/skywalking/skywalking_tracer_impl.h +++ b/source/extensions/tracers/skywalking/skywalking_tracer_impl.h @@ -7,14 +7,20 @@ #include "common/tracing/http_tracer_impl.h" -#include "extensions/tracers/skywalking/skywalking_client_config.h" #include "extensions/tracers/skywalking/tracer.h" +#include "cpp2sky/exception.h" +#include "cpp2sky/segment_context.h" + namespace Envoy { namespace Extensions { namespace Tracers { namespace SkyWalking { +using cpp2sky::SegmentContextFactoryPtr; +using cpp2sky::SegmentContextPtr; +using cpp2sky::TracerConfig; + class Driver : public Tracing::Driver, public Logger::Loggable { public: explicit Driver(const envoy::config::trace::v3::SkyWalkingConfig& config, @@ -25,19 +31,23 @@ class Driver : public Tracing::Driver, public Logger::Loggable; diff --git a/source/extensions/tracers/skywalking/skywalking_types.cc b/source/extensions/tracers/skywalking/skywalking_types.cc deleted file mode 100644 index 9c750884bc11..000000000000 --- a/source/extensions/tracers/skywalking/skywalking_types.cc +++ /dev/null @@ -1,175 +0,0 @@ -#include "extensions/tracers/skywalking/skywalking_types.h" - -#include "envoy/common/exception.h" - -#include "common/common/base64.h" -#include "common/common/empty_string.h" -#include "common/common/fmt.h" -#include "common/common/hex.h" -#include "common/common/utility.h" - -namespace Envoy { -namespace Extensions { -namespace Tracers { -namespace SkyWalking { - -namespace { - -// The standard header name is "sw8", as mentioned in: -// https://github.com/apache/skywalking/blob/v8.1.0/docs/en/protocols/Skywalking-Cross-Process-Propagation-Headers-Protocol-v3.md. -const Http::LowerCaseString& propagationHeader() { - CONSTRUCT_ON_FIRST_USE(Http::LowerCaseString, "sw8"); -} - -std::string generateId(Random::RandomGenerator& random_generator) { - return absl::StrCat(Hex::uint64ToHex(random_generator.random()), - Hex::uint64ToHex(random_generator.random())); -} - -std::string base64Encode(const absl::string_view input) { - return Base64::encode(input.data(), input.length()); -} - -// Decode and validate fields of propagation header. -std::string base64Decode(absl::string_view input) { - // The input can be Base64 string with or without padding. - std::string result = Base64::decodeWithoutPadding(input); - if (result.empty()) { - throw EnvoyException("Invalid propagation header for SkyWalking: parse error"); - } - return result; -} - -} // namespace - -SpanContextPtr SpanContext::spanContextFromRequest(Http::RequestHeaderMap& headers) { - auto propagation_header = headers.get(propagationHeader()); - if (propagation_header.empty()) { - // No propagation header then Envoy is first hop. - return nullptr; - } - - auto header_value_string = propagation_header[0]->value().getStringView(); - const auto parts = StringUtil::splitToken(header_value_string, "-", false, true); - // Reference: - // https://github.com/apache/skywalking/blob/v8.1.0/docs/en/protocols/Skywalking-Cross-Process-Propagation-Headers-Protocol-v3.md. - if (parts.size() != 8) { - throw EnvoyException( - fmt::format("Invalid propagation header for SkyWalking: {}", header_value_string)); - } - - SpanContextPtr previous_span_context = std::unique_ptr(new SpanContext()); - - // Parse and validate sampling flag. - if (parts[0] == "0") { - previous_span_context->sampled_ = 0; - } else if (parts[0] == "1") { - previous_span_context->sampled_ = 1; - } else { - throw EnvoyException(fmt::format("Invalid propagation header for SkyWalking: sampling flag can " - "only be '0' or '1' but '{}' was provided", - parts[0])); - } - - // Parse trace id. - previous_span_context->trace_id_ = base64Decode(parts[1]); - // Parse segment id. - previous_span_context->trace_segment_id_ = base64Decode(parts[2]); - - // Parse span id. - if (!absl::SimpleAtoi(parts[3], &previous_span_context->span_id_)) { - throw EnvoyException(fmt::format( - "Invalid propagation header for SkyWalking: connot convert '{}' to valid span id", - parts[3])); - } - - // Parse service. - previous_span_context->service_ = base64Decode(parts[4]); - // Parse service instance. - previous_span_context->service_instance_ = base64Decode(parts[5]); - // Parse endpoint. Operation Name of the first entry span in the previous segment. - previous_span_context->endpoint_ = base64Decode(parts[6]); - // Parse target address used at downstream side of this request. - previous_span_context->target_address_ = base64Decode(parts[7]); - - return previous_span_context; -} - -SegmentContext::SegmentContext(SpanContextPtr&& previous_span_context, Tracing::Decision decision, - Random::RandomGenerator& random_generator) - : previous_span_context_(std::move(previous_span_context)) { - - if (previous_span_context_) { - trace_id_ = previous_span_context_->trace_id_; - sampled_ = previous_span_context_->sampled_; - } else { - trace_id_ = generateId(random_generator); - sampled_ = decision.traced; - } - trace_segment_id_ = generateId(random_generator); - - // Some detailed log for debugging. - ENVOY_LOG(trace, "{} and create new SkyWalking segment:", - previous_span_context_ ? "Has previous span context" : "No previous span context"); - - ENVOY_LOG(trace, " Trace ID: {}", trace_id_); - ENVOY_LOG(trace, " Segment ID: {}", trace_segment_id_); - ENVOY_LOG(trace, " Sampled: {}", sampled_); -} - -SpanStore* SegmentContext::createSpanStore(const SpanStore* parent_span_store) { - ENVOY_LOG(trace, "Create new SpanStore object for current segment: {}", trace_segment_id_); - SpanStorePtr new_span_store = std::make_unique(this); - new_span_store->setSpanId(span_list_.size()); - if (!parent_span_store) { - // The parent SpanStore object does not exist. Create the root SpanStore object in the current - // segment. - new_span_store->setSampled(sampled_); - new_span_store->setParentSpanId(-1); - // First span of current segment for Envoy Proxy must be a Entry Span. It is created for - // downstream HTTP request. - new_span_store->setAsEntrySpan(true); - } else { - // Create child SpanStore object. - new_span_store->setSampled(parent_span_store->sampled()); - new_span_store->setParentSpanId(parent_span_store->spanId()); - new_span_store->setAsEntrySpan(false); - } - SpanStore* ref = new_span_store.get(); - span_list_.emplace_back(std::move(new_span_store)); - return ref; -} - -void SpanStore::injectContext(Http::RequestHeaderMap& request_headers) const { - ASSERT(segment_context_); - - // For SkyWalking Entry Span, Envoy does not need to inject tracing context into the request - // headers. - if (is_entry_span_) { - ENVOY_LOG(debug, "Skip tracing context injection for SkyWalking Entry Span"); - return; - } - - ENVOY_LOG(debug, "Inject or update SkyWalking propagation header in upstream request headers"); - const_cast(this)->setPeerAddress(std::string(request_headers.getHostValue())); - - ENVOY_LOG(trace, "'sw8' header: '({}) - ({}) - ({}) - ({}) - ({}) - ({}) - ({}) - ({})'", - sampled_, segment_context_->traceId(), segment_context_->traceSegmentId(), span_id_, - segment_context_->service(), segment_context_->serviceInstance(), - segment_context_->rootSpanStore()->operation(), peer_address_); - - // Reference: - // https://github.com/apache/skywalking/blob/v8.1.0/docs/en/protocols/Skywalking-Cross-Process-Propagation-Headers-Protocol-v3.md. - const auto value = absl::StrCat(sampled_, "-", base64Encode(segment_context_->traceId()), "-", - base64Encode(segment_context_->traceSegmentId()), "-", span_id_, - "-", base64Encode(segment_context_->service()), "-", - base64Encode(segment_context_->serviceInstance()), "-", - base64Encode(segment_context_->rootSpanStore()->operation()), "-", - base64Encode(peer_address_)); - request_headers.setReferenceKey(propagationHeader(), value); -} - -} // namespace SkyWalking -} // namespace Tracers -} // namespace Extensions -} // namespace Envoy diff --git a/source/extensions/tracers/skywalking/skywalking_types.h b/source/extensions/tracers/skywalking/skywalking_types.h deleted file mode 100644 index eacd9a94a075..000000000000 --- a/source/extensions/tracers/skywalking/skywalking_types.h +++ /dev/null @@ -1,313 +0,0 @@ -#pragma once - -#include -#include - -#include "envoy/common/random_generator.h" -#include "envoy/common/time.h" -#include "envoy/http/header_map.h" -#include "envoy/tracing/http_tracer.h" - -#include "language-agent/Tracing.pb.h" - -namespace Envoy { -namespace Extensions { -namespace Tracers { -namespace SkyWalking { - -class SegmentContext; -using SegmentContextSharedPtr = std::shared_ptr; - -class SpanStore; -using SpanStorePtr = std::unique_ptr; - -class SpanContext; -using SpanContextPtr = std::unique_ptr; - -class SpanContext : public Logger::Loggable { -public: - /* - * Parse the context of the previous span from the request and decide whether to sample it or - * not. - * - * @param headers The request headers. - * @return SpanContextPtr The previous span context parsed from request headers. - */ - static SpanContextPtr spanContextFromRequest(Http::RequestHeaderMap& headers); - - // Sampling flag. This field can only be 0 or 1. 1 means this trace need to be sampled and send to - // backend. - int sampled_{0}; - - // This span id points to the parent span in parent trace segment. - int span_id_{0}; - - std::string trace_id_; - - // This trace segment id points to the parent trace segment. - std::string trace_segment_id_; - - std::string service_; - std::string service_instance_; - - // Operation Name of the first entry span in the parent segment. - std::string endpoint_; - - // Target address used at client side of this request. The network address(not must be IP + port) - // used at client side to access this target service. - std::string target_address_; - -private: - // Private default constructor. We can only create SpanContext by 'spanContextFromRequest'. - SpanContext() = default; -}; - -class SegmentContext : public Logger::Loggable { -public: - /* - * Create a new segment context based on the previous span context that parsed from request - * headers. - * - * @param previous_span_context The previous span context. - * @param random_generator The random generator that used to create trace id and segment id. - * @param decision The tracing decision. - */ - SegmentContext(SpanContextPtr&& previous_span_context, Tracing::Decision decision, - Random::RandomGenerator& random_generator); - - /* - * Set service name. - * - * @param service The service name. - */ - void setService(const std::string& service) { service_ = service; } - - /* - * Set service instance name. - * - * @param service_instance The service instance name. - */ - void setServiceInstance(const std::string& service_instance) { - service_instance_ = service_instance; - } - - /* - * Create a new SpanStore object and return its pointer. The ownership of the newly created - * SpanStore object belongs to the current segment context. - * - * @param parent_store The pointer that point to parent SpanStore object. - * @return SpanStore* The pointer that point to newly created SpanStore object. - */ - SpanStore* createSpanStore(const SpanStore* parent_store); - - /* - * Get all SpanStore objects in the current segment. - */ - const std::vector& spanList() const { return span_list_; } - - /* - * Get root SpanStore object in the current segment. - */ - const SpanStore* rootSpanStore() { return span_list_.empty() ? nullptr : span_list_[0].get(); } - - int sampled() const { return sampled_; } - const std::string& traceId() const { return trace_id_; } - const std::string& traceSegmentId() const { return trace_segment_id_; } - - const std::string& service() const { return service_; } - const std::string& serviceInstance() const { return service_instance_; } - - SpanContext* previousSpanContext() const { return previous_span_context_.get(); } - -private: - int sampled_{0}; - // This value is unique in the entire tracing link. If previous_context is null, we will use - // random_generator to create a trace id. - std::string trace_id_; - // Envoy creates a new span when it accepts a new HTTP request. This span and all of its child - // spans belong to the same segment and share the segment id. - std::string trace_segment_id_; - - std::string service_; - std::string service_instance_; - - // The SegmentContext parsed from the request headers. If no propagation headers in request then - // this will be nullptr. - SpanContextPtr previous_span_context_; - - std::vector span_list_; -}; - -using Tag = std::pair; - -/* - * A helper class for the SkyWalking span and is used to store all span-related data, including span - * id, parent span id, tags and so on. Whenever we create a new span, we create a new SpanStore - * object. The new span will hold a pointer to the newly created SpanStore object and write data to - * it or get data from it. - */ -class SpanStore : public Logger::Loggable { -public: - /* - * Construct a SpanStore object using span context and time source. - * - * @param segment_context The pointer that point to current span context. This can not be null. - * @param time_source A time source to get the span end time. - */ - explicit SpanStore(SegmentContext* segment_context) : segment_context_(segment_context) {} - - /* - * Get operation name of span. - */ - const std::string& operation() const { return operation_; } - - /* - * Get peer address. The peer in SkyWalking is different with the tag value of 'peer.address'. The - * tag value of 'peer.address' in Envoy is downstream address and the peer in SkyWalking is - * upstream address. - */ - const std::string& peerAddress() const { return peer_address_; } - - /* - * Get span start time. - */ - uint64_t startTime() const { return start_time_; } - - /* - * Get span end time. - */ - uint64_t endTime() const { return end_time_; } - - /* - * Get span tags. - */ - const std::vector& tags() const { return tags_; } - - /* - * Get span logs. - */ - const std::vector& logs() const { return logs_; } - - /* - * Get span sampling flag. - */ - int sampled() const { return sampled_; } - - /* - * Get span id. - */ - int spanId() const { return span_id_; } - - /* - * Get parent span id. - */ - int parentSpanId() const { return parent_span_id_; } - - /* - * Determines if an error has occurred in the current span. - */ - bool isError() const { return is_error_; } - - /* - * Determines if the current span is an entry span. - * - * Reference: - * https://github.com/apache/skywalking/blob/v8.1.0/docs/en/protocols/Trace-Data-Protocol-v3.md - */ - bool isEntrySpan() const { return is_entry_span_; } - - /* - * Set span start time. This is the time when the HTTP request started, not the time when the span - * was created. - */ - void setStartTime(uint64_t start_time) { start_time_ = start_time; } - - /* - * Set span end time. It is meaningless for now. End time will be set by finish. - */ - void setEndTime(uint64_t end_time) { end_time_ = end_time; } - - /* - * Set operation name. - */ - void setOperation(const std::string& operation) { operation_ = operation; } - - /* - * Set peer address. In SkyWalking, the peer address is only set in Exit Span. And it should the - * upstream address. Since the upstream address cannot be obtained at the request stage, the - * request host is used instead. - */ - void setPeerAddress(const std::string& peer_address) { peer_address_ = peer_address; } - - /* - * Set if the current span has an error. - */ - void setAsError(bool is_error) { is_error_ = is_error; } - - /* - * Set if the current span is a entry span. - */ - void setAsEntrySpan(bool is_entry_span) { is_entry_span_ = is_entry_span; } - - /* - * Add a new tag entry to current span. - */ - void addTag(absl::string_view name, absl::string_view value) { tags_.emplace_back(name, value); } - - /* - * Add a new log entry to current span. Due to different data formats, log is temporarily not - * supported. - */ - void addLog(SystemTime, const std::string&) {} - - /* - * Set span id of current span. The span id in each segment is started from 0. When new span is - * created, its span id is the current max span id plus 1. - */ - void setSpanId(int span_id) { span_id_ = span_id; } - - /* - * Set parent span id. Notice that in SkyWalking, the parent span and the child span belong to the - * same segment. The first span of each segment has a parent span id of -1. - */ - void setParentSpanId(int parent_span_id) { parent_span_id_ = parent_span_id; } - - /* - * Set sampling flag. In general, the sampling flag of span is consistent with the current span - * context. - */ - void setSampled(int sampled) { sampled_ = sampled == 0 ? 0 : 1; } - - /* - * Inject current span context information to request headers. This will update original - * propagation headers. - * - * @param request_headers The request headers. - */ - void injectContext(Http::RequestHeaderMap& request_headers) const; - -private: - SegmentContext* segment_context_{nullptr}; - - int sampled_{0}; - - int span_id_{0}; - int parent_span_id_{-1}; - - uint64_t start_time_{0}; - uint64_t end_time_{0}; - - std::string operation_; - std::string peer_address_; - - bool is_error_{false}; - bool is_entry_span_{true}; - - std::vector tags_; - std::vector logs_; -}; - -} // namespace SkyWalking -} // namespace Tracers -} // namespace Extensions -} // namespace Envoy diff --git a/source/extensions/tracers/skywalking/trace_segment_reporter.cc b/source/extensions/tracers/skywalking/trace_segment_reporter.cc index 5ef0046dc800..eec47fd69c19 100644 --- a/source/extensions/tracers/skywalking/trace_segment_reporter.cc +++ b/source/extensions/tracers/skywalking/trace_segment_reporter.cc @@ -12,81 +12,18 @@ namespace { Http::RegisterCustomInlineHeader authentication_handle(Http::CustomHeaders::get().Authentication); -// Convert SegmentContext to SegmentObject. -TraceSegmentPtr toSegmentObject(const SegmentContext& segment_context) { - auto new_segment_ptr = std::make_unique(); - SegmentObject& segment_object = *new_segment_ptr; - - segment_object.set_traceid(segment_context.traceId()); - segment_object.set_tracesegmentid(segment_context.traceSegmentId()); - segment_object.set_service(segment_context.service()); - segment_object.set_serviceinstance(segment_context.serviceInstance()); - - for (const auto& span_store : segment_context.spanList()) { - if (!span_store->sampled()) { - continue; - } - auto* span = segment_object.mutable_spans()->Add(); - - span->set_spanlayer(SpanLayer::Http); - span->set_spantype(span_store->isEntrySpan() ? SpanType::Entry : SpanType::Exit); - // Please check - // https://github.com/apache/skywalking/blob/master/oap-server/server-bootstrap/src/main/resources/component-libraries.yml - // get more information. - span->set_componentid(9000); - - if (!span_store->peerAddress().empty() && span_store->isEntrySpan()) { - span->set_peer(span_store->peerAddress()); - } - - span->set_spanid(span_store->spanId()); - span->set_parentspanid(span_store->parentSpanId()); - - span->set_starttime(span_store->startTime()); - span->set_endtime(span_store->endTime()); - - span->set_iserror(span_store->isError()); - - span->set_operationname(span_store->operation()); - - auto& tags = *span->mutable_tags(); - tags.Reserve(span_store->tags().size()); - - for (auto& span_tag : span_store->tags()) { - KeyStringValuePair* new_tag = tags.Add(); - new_tag->set_key(span_tag.first); - new_tag->set_value(span_tag.second); - } - - SpanContext* previous_span_context = segment_context.previousSpanContext(); - - if (!previous_span_context || !span_store->isEntrySpan()) { - continue; - } - - auto* ref = span->mutable_refs()->Add(); - ref->set_traceid(previous_span_context->trace_id_); - ref->set_parenttracesegmentid(previous_span_context->trace_segment_id_); - ref->set_parentspanid(previous_span_context->span_id_); - ref->set_parentservice(previous_span_context->service_); - ref->set_parentserviceinstance(previous_span_context->service_instance_); - ref->set_parentendpoint(previous_span_context->endpoint_); - ref->set_networkaddressusedatpeer(previous_span_context->target_address_); - } - return new_segment_ptr; -} - } // namespace TraceSegmentReporter::TraceSegmentReporter(Grpc::AsyncClientFactoryPtr&& factory, Event::Dispatcher& dispatcher, Random::RandomGenerator& random_generator, SkyWalkingTracerStats& stats, - const SkyWalkingClientConfig& client_config) - : tracing_stats_(stats), client_config_(client_config), client_(factory->create()), + uint32_t delayed_buffer_size, const std::string& token) + : tracing_stats_(stats), client_(factory->create()), service_method_(*Protobuf::DescriptorPool::generated_pool()->FindMethodByName( "TraceSegmentReportService.collect")), - random_generator_(random_generator) { + random_generator_(random_generator), token_(token), + delayed_buffer_size_(delayed_buffer_size) { static constexpr uint32_t RetryInitialDelayMs = 500; static constexpr uint32_t RetryMaxDelayMs = 30000; @@ -97,28 +34,27 @@ TraceSegmentReporter::TraceSegmentReporter(Grpc::AsyncClientFactoryPtr&& factory establishNewStream(); } +TraceSegmentReporter::~TraceSegmentReporter() { closeStream(); } + void TraceSegmentReporter::onCreateInitialMetadata(Http::RequestHeaderMap& metadata) { - if (!client_config_.backendToken().empty()) { - metadata.setInline(authentication_handle.handle(), client_config_.backendToken()); + if (!token_.empty()) { + metadata.setInline(authentication_handle.handle(), token_); } } -void TraceSegmentReporter::report(const SegmentContext& segment_context) { - sendTraceSegment(toSegmentObject(segment_context)); -} - -void TraceSegmentReporter::sendTraceSegment(TraceSegmentPtr request) { - ASSERT(request); - ENVOY_LOG(trace, "Try to report segment to SkyWalking Server:\n{}", request->DebugString()); +void TraceSegmentReporter::report(SegmentContextPtr segment_context) { + ASSERT(segment_context); + auto request = segment_context->createSegmentObject(); + ENVOY_LOG(trace, "Try to report segment to SkyWalking Server:\n{}", request.DebugString()); if (stream_ != nullptr) { tracing_stats_.segments_sent_.inc(); - stream_->sendMessage(*request, false); + stream_->sendMessage(request, false); return; } // Null stream_ and cache segment data temporarily. - delayed_segments_cache_.emplace(std::move(request)); - if (delayed_segments_cache_.size() > client_config_.maxCacheSize()) { + delayed_segments_cache_.emplace(request); + if (delayed_segments_cache_.size() > delayed_buffer_size_) { tracing_stats_.segments_dropped_.inc(); delayed_segments_cache_.pop(); } @@ -129,7 +65,7 @@ void TraceSegmentReporter::flushTraceSegments() { while (!delayed_segments_cache_.empty() && stream_ != nullptr) { tracing_stats_.segments_sent_.inc(); tracing_stats_.segments_flushed_.inc(); - stream_->sendMessage(*delayed_segments_cache_.front(), false); + stream_->sendMessage(delayed_segments_cache_.front(), false); delayed_segments_cache_.pop(); } tracing_stats_.cache_flushed_.inc(); diff --git a/source/extensions/tracers/skywalking/trace_segment_reporter.h b/source/extensions/tracers/skywalking/trace_segment_reporter.h index fd70d819917b..35e4841fbb02 100644 --- a/source/extensions/tracers/skywalking/trace_segment_reporter.h +++ b/source/extensions/tracers/skywalking/trace_segment_reporter.h @@ -9,26 +9,25 @@ #include "common/common/backoff_strategy.h" #include "common/grpc/async_client_impl.h" -#include "extensions/tracers/skywalking/skywalking_client_config.h" #include "extensions/tracers/skywalking/skywalking_stats.h" -#include "extensions/tracers/skywalking/skywalking_types.h" -#include "language-agent/Tracing.pb.h" +#include "cpp2sky/segment_context.h" namespace Envoy { namespace Extensions { namespace Tracers { namespace SkyWalking { -using TraceSegmentPtr = std::unique_ptr; +using cpp2sky::SegmentContextPtr; class TraceSegmentReporter : public Logger::Loggable, public Grpc::AsyncStreamCallbacks { public: explicit TraceSegmentReporter(Grpc::AsyncClientFactoryPtr&& factory, Event::Dispatcher& dispatcher, Random::RandomGenerator& random, - SkyWalkingTracerStats& stats, - const SkyWalkingClientConfig& client_config); + SkyWalkingTracerStats& stats, uint32_t delayed_buffer_size, + const std::string& token); + ~TraceSegmentReporter() override; // Grpc::AsyncStreamCallbacks void onCreateInitialMetadata(Http::RequestHeaderMap& metadata) override; @@ -37,42 +36,30 @@ class TraceSegmentReporter : public Logger::Loggable, void onReceiveTrailingMetadata(Http::ResponseTrailerMapPtr&&) override {} void onRemoteClose(Grpc::Status::GrpcStatus, const std::string&) override; + void report(SegmentContextPtr segment_context); + +private: /* * Flush all cached segment objects to the back-end tracing service and close the GRPC stream. */ void closeStream(); - - /* - * Convert the current span context into a segment object and report it to the back-end tracing - * service through the GRPC stream. - * - * @param span_context The span context. - */ - void report(const SegmentContext& span_context); - -private: void flushTraceSegments(); - - void sendTraceSegment(TraceSegmentPtr request); void establishNewStream(); void handleFailure(); void setRetryTimer(); SkyWalkingTracerStats& tracing_stats_; - - const SkyWalkingClientConfig& client_config_; - Grpc::AsyncClient client_; Grpc::AsyncStream stream_{}; const Protobuf::MethodDescriptor& service_method_; - Random::RandomGenerator& random_generator_; // If the connection is unavailable when reporting data, the created SegmentObject will be cached // in the queue, and when a new connection is established, the cached data will be reported. - std::queue delayed_segments_cache_; - + std::queue delayed_segments_cache_; Event::TimerPtr retry_timer_; BackOffStrategyPtr backoff_strategy_; + std::string token_; + uint32_t delayed_buffer_size_{0}; }; using TraceSegmentReporterPtr = std::unique_ptr; diff --git a/source/extensions/tracers/skywalking/tracer.cc b/source/extensions/tracers/skywalking/tracer.cc index f3845b9c4213..9c6644ada262 100644 --- a/source/extensions/tracers/skywalking/tracer.cc +++ b/source/extensions/tracers/skywalking/tracer.cc @@ -1,81 +1,80 @@ #include "extensions/tracers/skywalking/tracer.h" -#include +#include namespace Envoy { namespace Extensions { namespace Tracers { namespace SkyWalking { -constexpr absl::string_view StatusCodeTag = "status_code"; -constexpr absl::string_view UrlTag = "url"; - namespace { - -uint64_t getTimestamp(SystemTime time) { - return std::chrono::duration_cast(time.time_since_epoch()).count(); -} - +static constexpr absl::string_view StatusCodeTag = "status_code"; +static constexpr absl::string_view UrlTag = "url"; } // namespace -Tracing::SpanPtr Tracer::startSpan(const Tracing::Config&, SystemTime start_time, - const std::string& operation, - SegmentContextSharedPtr segment_context, Span* parent) { - SpanStore* span_store = segment_context->createSpanStore(parent ? parent->spanStore() : nullptr); - - span_store->setStartTime(getTimestamp(start_time)); - - span_store->setOperation(operation); - - return std::make_unique(std::move(segment_context), span_store, *this); -} - -void Span::setOperation(absl::string_view operation) { - span_store_->setOperation(std::string(operation)); +const Http::LowerCaseString& skywalkingPropagationHeaderKey() { + CONSTRUCT_ON_FIRST_USE(Http::LowerCaseString, "sw8"); } void Span::setTag(absl::string_view name, absl::string_view value) { if (name == Tracing::Tags::get().HttpUrl) { - span_store_->addTag(UrlTag, value); - return; - } - - if (name == Tracing::Tags::get().HttpStatusCode) { - span_store_->addTag(StatusCodeTag, value); - return; + span_entity_->addTag(UrlTag.data(), value.data()); + } else if (name == Tracing::Tags::get().HttpStatusCode) { + span_entity_->addTag(StatusCodeTag.data(), value.data()); + } else if (name == Tracing::Tags::get().Error) { + span_entity_->setErrorStatus(); + span_entity_->addTag(name.data(), value.data()); + } else { + span_entity_->addTag(name.data(), value.data()); } +} - if (name == Tracing::Tags::get().Error) { - span_store_->setAsError(value == Tracing::Tags::get().True); +void Span::setSampled(bool do_sample) { + // Sampling status is always true on SkyWalking. But with disabling skip_analysis, + // this span can't be analyzed. + if (!do_sample) { + span_entity_->setSkipAnalysis(); } - - span_store_->addTag(name, value); } -// Logs in the SkyWalking format are temporarily unsupported. -void Span::log(SystemTime, const std::string&) {} +void Span::log(SystemTime, const std::string& event) { span_entity_->addLog(EMPTY_STRING, event); } void Span::finishSpan() { - span_store_->setEndTime(DateUtil::nowToMilliseconds(tracer_.time_source_)); - tryToReportSpan(); + span_entity_->endSpan(); + parent_tracer_.sendSegment(segment_context_); } void Span::injectContext(Http::RequestHeaderMap& request_headers) { - span_store_->injectContext(request_headers); + request_headers.setReferenceKey( + skywalkingPropagationHeaderKey(), + segment_context_->createSW8HeaderValue(std::string(request_headers.getHostValue()))); } -Tracing::SpanPtr Span::spawnChild(const Tracing::Config& config, const std::string& operation_name, - SystemTime start_time) { - // The new child span will share the same context with the parent span. - return tracer_.startSpan(config, start_time, operation_name, segment_context_, this); +Tracing::SpanPtr Span::spawnChild(const Tracing::Config&, const std::string& name, SystemTime) { + auto child_span = segment_context_->createCurrentSegmentSpan(span_entity_); + child_span->startSpan(name); + return std::make_unique(child_span, segment_context_, parent_tracer_); } -void Span::setSampled(bool sampled) { span_store_->setSampled(sampled ? 1 : 0); } +Tracer::Tracer(TraceSegmentReporterPtr reporter) : reporter_(std::move(reporter)) {} -std::string Span::getBaggage(absl::string_view) { return EMPTY_STRING; } - -void Span::setBaggage(absl::string_view, absl::string_view) {} +void Tracer::sendSegment(SegmentContextPtr segment_context) { + ASSERT(reporter_); + if (segment_context->readyToSend()) { + reporter_->report(std::move(segment_context)); + } +} +Tracing::SpanPtr Tracer::startSpan(const Tracing::Config&, SystemTime, const std::string& operation, + SegmentContextPtr segment_context, + CurrentSegmentSpanPtr parent) { + Tracing::SpanPtr span; + auto span_entity = parent != nullptr ? segment_context->createCurrentSegmentSpan(parent) + : segment_context->createCurrentSegmentRootSpan(); + span_entity->startSpan(operation); + span = std::make_unique(span_entity, segment_context, *this); + return span; +} } // namespace SkyWalking } // namespace Tracers } // namespace Extensions diff --git a/source/extensions/tracers/skywalking/tracer.h b/source/extensions/tracers/skywalking/tracer.h index c416b22b47ab..e55e4cbb6fff 100644 --- a/source/extensions/tracers/skywalking/tracer.h +++ b/source/extensions/tracers/skywalking/tracer.h @@ -1,42 +1,65 @@ #pragma once -#include #include -#include "envoy/common/pure.h" - -#include "common/common/empty_string.h" #include "common/tracing/http_tracer_impl.h" -#include "extensions/tracers/skywalking/skywalking_types.h" #include "extensions/tracers/skywalking/trace_segment_reporter.h" +#include "cpp2sky/segment_context.h" +#include "cpp2sky/tracer.h" +#include "cpp2sky/well_known_names.h" + namespace Envoy { namespace Extensions { namespace Tracers { namespace SkyWalking { -class Span; +using cpp2sky::CurrentSegmentSpanPtr; +using cpp2sky::SegmentContextPtr; +using SkywalkingTracer = cpp2sky::Tracer; + +const Http::LowerCaseString& skywalkingPropagationHeaderKey(); -class Tracer { +class Span : public Tracing::Span { public: - explicit Tracer(TimeSource& time_source) : time_source_(time_source) {} - virtual ~Tracer() { reporter_->closeStream(); } + Span(CurrentSegmentSpanPtr span_entity, SegmentContextPtr segment_context, + SkywalkingTracer& parent_tracer) + : parent_tracer_(parent_tracer), span_entity_(span_entity), + segment_context_(segment_context) {} - /* - * Set a trace segment reporter to the current Tracer. Whenever a SkyWalking segment ends, the - * reporter will be used to report segment data. - * - * @param reporter The unique ptr of trace segment reporter. - */ - void setReporter(TraceSegmentReporterPtr&& reporter) { reporter_ = std::move(reporter); } + // Tracing::Span + void setOperation(absl::string_view) override {} + void setTag(absl::string_view name, absl::string_view value) override; + void log(SystemTime timestam, const std::string& event) override; + void finishSpan() override; + void injectContext(Http::RequestHeaderMap& request_headers) override; + Tracing::SpanPtr spawnChild(const Tracing::Config& config, const std::string& name, + SystemTime start_time) override; + void setSampled(bool do_sample) override; + std::string getBaggage(absl::string_view) override { return EMPTY_STRING; } + void setBaggage(absl::string_view, absl::string_view) override {} + std::string getTraceIdAsHex() const override { return EMPTY_STRING; } + + const SegmentContextPtr segmentContext() { return segment_context_; } + const CurrentSegmentSpanPtr spanEntity() { return span_entity_; } + +private: + SkywalkingTracer& parent_tracer_; + CurrentSegmentSpanPtr span_entity_; + SegmentContextPtr segment_context_; +}; + +class Tracer : public SkywalkingTracer { +public: + Tracer(TraceSegmentReporterPtr reporter); /* * Report trace segment data to backend tracing service. * * @param segment_context The segment context. */ - void report(const SegmentContext& segment_context) { return reporter_->report(segment_context); } + void sendSegment(SegmentContextPtr segment_context) override; /* * Create a new span based on the segment context and parent span. @@ -52,10 +75,8 @@ class Tracer { * @return The unique ptr to the newly created span. */ Tracing::SpanPtr startSpan(const Tracing::Config& config, SystemTime start_time, - const std::string& operation, SegmentContextSharedPtr segment_context, - Span* parent); - - TimeSource& time_source_; + const std::string& operation, SegmentContextPtr segment_context, + CurrentSegmentSpanPtr parent); private: TraceSegmentReporterPtr reporter_; @@ -63,58 +84,6 @@ class Tracer { using TracerPtr = std::unique_ptr; -class Span : public Tracing::Span { -public: - /* - * Constructor of span. - * - * @param segment_context The SkyWalking segment context. - * @param span_store Pointer to a SpanStore object. Whenever a new span is created, a new - * SpanStore object is created and stored in the segment context. This parameter can never be - * null. - * @param tracer Reference to tracer. - */ - Span(SegmentContextSharedPtr segment_context, SpanStore* span_store, Tracer& tracer) - : segment_context_(std::move(segment_context)), span_store_(span_store), tracer_(tracer) {} - - // Tracing::Span - void setOperation(absl::string_view operation) override; - void setTag(absl::string_view name, absl::string_view value) override; - void log(SystemTime timestamp, const std::string& event) override; - void finishSpan() override; - void injectContext(Http::RequestHeaderMap& request_headers) override; - Tracing::SpanPtr spawnChild(const Tracing::Config& config, const std::string& name, - SystemTime start_time) override; - void setSampled(bool sampled) override; - std::string getBaggage(absl::string_view key) override; - void setBaggage(absl::string_view key, absl::string_view value) override; - - // TODO: This method is unimplemented for OpenTracing. - std::string getTraceIdAsHex() const override { return EMPTY_STRING; }; - - /* - * Get pointer to corresponding SpanStore object. This method is mainly used in testing. Used to - * check the internal data of the span. - */ - SpanStore* spanStore() const { return span_store_; } - SegmentContext* segmentContext() const { return segment_context_.get(); } - -private: - void tryToReportSpan() { - // If the current span is the root span of the entire segment and its sampling flag is not - // false, the data for the entire segment is reported. Please ensure that the root span is the - // last span to end in the entire segment. - if (span_store_->sampled() && span_store_->spanId() == 0) { - tracer_.report(*segment_context_); - } - } - - SegmentContextSharedPtr segment_context_; - SpanStore* span_store_; - - Tracer& tracer_; -}; - } // namespace SkyWalking } // namespace Tracers } // namespace Extensions diff --git a/test/extensions/tracers/skywalking/BUILD b/test/extensions/tracers/skywalking/BUILD index b18f8cfe91dc..92710ca47ee5 100644 --- a/test/extensions/tracers/skywalking/BUILD +++ b/test/extensions/tracers/skywalking/BUILD @@ -24,31 +24,6 @@ envoy_extension_cc_test( ], ) -envoy_extension_cc_test( - name = "skywalking_client_config_test", - srcs = ["skywalking_client_config_test.cc"], - extension_name = "envoy.tracers.skywalking", - deps = [ - "//source/extensions/tracers/skywalking:skywalking_client_config_lib", - "//test/mocks:common_lib", - "//test/mocks/server:tracer_factory_context_mocks", - "//test/test_common:utility_lib", - ], -) - -envoy_extension_cc_test( - name = "skywalking_types_test", - srcs = ["skywalking_types_test.cc"], - extension_name = "envoy.tracers.skywalking", - deps = [ - ":skywalking_test_helper", - "//source/extensions/tracers/skywalking:skywalking_types_lib", - "//test/mocks:common_lib", - "//test/test_common:simulated_time_system_lib", - "//test/test_common:utility_lib", - ], -) - envoy_extension_cc_test( name = "trace_segment_reporter_test", srcs = ["trace_segment_reporter_test.cc"], @@ -70,10 +45,12 @@ envoy_extension_cc_test( name = "skywalking_test_helper", srcs = ["skywalking_test_helper.h"], extension_name = "envoy.tracers.skywalking", + external_deps = [ + "cpp2sky", + ], deps = [ "//source/common/common:base64_lib", "//source/common/common:hex_lib", - "//source/extensions/tracers/skywalking:skywalking_types_lib", "//test/test_common:utility_lib", ], ) diff --git a/test/extensions/tracers/skywalking/skywalking_client_config_test.cc b/test/extensions/tracers/skywalking/skywalking_client_config_test.cc deleted file mode 100644 index c0d4131a8e9d..000000000000 --- a/test/extensions/tracers/skywalking/skywalking_client_config_test.cc +++ /dev/null @@ -1,100 +0,0 @@ -#include "extensions/tracers/skywalking/skywalking_client_config.h" - -#include "test/mocks/common.h" -#include "test/mocks/server/tracer_factory_context.h" -#include "test/test_common/utility.h" - -#include "gmock/gmock.h" -#include "gtest/gtest.h" - -using testing::NiceMock; -using testing::ReturnRef; - -namespace Envoy { -namespace Extensions { -namespace Tracers { -namespace SkyWalking { -namespace { - -class SkyWalkingClientConfigTest : public testing::Test { -public: - void setupSkyWalkingClientConfig(const std::string& yaml_string) { - auto& local_info = context_.server_factory_context_.local_info_; - - ON_CALL(local_info, clusterName()).WillByDefault(ReturnRef(test_string)); - ON_CALL(local_info, nodeName()).WillByDefault(ReturnRef(test_string)); - - envoy::config::trace::v3::SkyWalkingConfig proto_config; - TestUtility::loadFromYaml(yaml_string, proto_config); - - client_config_ = - std::make_unique(context_, proto_config.client_config()); - } - -protected: - NiceMock context_; - - std::string test_string = "ABCDEFGHIJKLMN"; - - SkyWalkingClientConfigPtr client_config_; -}; - -// Test whether the default value can be set correctly when there is no proto client config -// provided. -TEST_F(SkyWalkingClientConfigTest, NoProtoClientConfigTest) { - const std::string yaml_string = R"EOF( - grpc_service: - envoy_grpc: - cluster_name: fake_cluster - )EOF"; - - setupSkyWalkingClientConfig(yaml_string); - - EXPECT_EQ(client_config_->service(), test_string); - EXPECT_EQ(client_config_->serviceInstance(), test_string); - EXPECT_EQ(client_config_->maxCacheSize(), 1024); - EXPECT_EQ(client_config_->backendToken(), ""); -} - -// Test whether the client config can work correctly when the proto client config is provided. -TEST_F(SkyWalkingClientConfigTest, WithProtoClientConfigTest) { - const std::string yaml_string = R"EOF( - grpc_service: - envoy_grpc: - cluster_name: fake_cluster - client_config: - backend_token: "FAKE_FAKE_FAKE_FAKE_FAKE_FAKE" - service_name: "FAKE_FAKE_FAKE" - instance_name: "FAKE_FAKE_FAKE" - max_cache_size: 2333 - )EOF"; - - setupSkyWalkingClientConfig(yaml_string); - - EXPECT_EQ(client_config_->service(), "FAKE_FAKE_FAKE"); - EXPECT_EQ(client_config_->serviceInstance(), "FAKE_FAKE_FAKE"); - EXPECT_EQ(client_config_->maxCacheSize(), 2333); - EXPECT_EQ(client_config_->backendToken(), "FAKE_FAKE_FAKE_FAKE_FAKE_FAKE"); -} - -// Test whether the client config can get default value for service name and instance name. -TEST_F(SkyWalkingClientConfigTest, BothLocalInfoAndClientConfigEmptyTest) { - test_string = ""; - - const std::string yaml_string = R"EOF( - grpc_service: - envoy_grpc: - cluster_name: fake_cluster - )EOF"; - - setupSkyWalkingClientConfig(yaml_string); - - EXPECT_EQ(client_config_->service(), "EnvoyProxy"); - EXPECT_EQ(client_config_->serviceInstance(), "EnvoyProxy"); -} - -} // namespace -} // namespace SkyWalking -} // namespace Tracers -} // namespace Extensions -} // namespace Envoy diff --git a/test/extensions/tracers/skywalking/skywalking_test_helper.h b/test/extensions/tracers/skywalking/skywalking_test_helper.h index e158bb535da8..e49ac3b3bd77 100644 --- a/test/extensions/tracers/skywalking/skywalking_test_helper.h +++ b/test/extensions/tracers/skywalking/skywalking_test_helper.h @@ -3,15 +3,24 @@ #include "common/common/base64.h" #include "common/common/hex.h" -#include "extensions/tracers/skywalking/skywalking_types.h" - #include "test/test_common/utility.h" +#include "cpp2sky/config.pb.h" +#include "cpp2sky/propagation.h" +#include "cpp2sky/segment_context.h" + namespace Envoy { namespace Extensions { namespace Tracers { namespace SkyWalking { +using cpp2sky::createSegmentContextFactory; +using cpp2sky::createSpanContext; +using cpp2sky::CurrentSegmentSpanPtr; +using cpp2sky::SegmentContextPtr; +using cpp2sky::SpanContextPtr; +using cpp2sky::TracerConfig; + /* * A simple helper class for auxiliary testing. Contains some simple static functions, such as * encoding, generating random id, creating SpanContext, etc. @@ -26,47 +35,72 @@ class SkyWalkingTestHelper { return Base64::encode(input.data(), input.length()); } - static SegmentContextSharedPtr createSegmentContext(bool sampled, std::string seed, - std::string prev_seed, - Random::RandomGenerator& random) { + static std::string createPropagatedSW8HeaderValue(bool do_sample, std::string seed) { + TracerConfig config; + config.set_service_name(seed + "#SERVICE"); + config.set_instance_name(seed + "#INSTANCE"); + auto segment_context_factory = createSegmentContextFactory(config); + auto segment_context = segment_context_factory->create(); + + auto span = segment_context->createCurrentSegmentRootSpan(); + span->startSpan(seed + "#OPERATION"); + span->setPeer(seed + "#ADDRESS"); + + if (!do_sample) { + span->setSkipAnalysis(); + } + + span->endSpan(); + + return segment_context->createSW8HeaderValue(seed + "#ENDPOINT"); + } + + static SegmentContextPtr createSegmentContext(bool sampled, std::string seed, + std::string prev_seed) { + TracerConfig config; + config.set_service_name(seed + "#SERVICE"); + config.set_instance_name(seed + "#INSTANCE"); + auto segment_context_factory = createSegmentContextFactory(config); + SpanContextPtr previous_span_context; if (!prev_seed.empty()) { - std::string header_value = - fmt::format("{}-{}-{}-{}-{}-{}-{}-{}", sampled ? 1 : 0, base64Encode(generateId(random)), - base64Encode(generateId(random)), random.random(), - base64Encode(prev_seed + "#SERVICE"), base64Encode(prev_seed + "#INSTANCE"), - base64Encode(prev_seed + "#ENDPOINT"), base64Encode(prev_seed + "#ADDRESS")); - - Http::TestRequestHeaderMapImpl request_headers{{"sw8", header_value}}; - previous_span_context = SpanContext::spanContextFromRequest(request_headers); + std::string header_value = createPropagatedSW8HeaderValue(sampled, prev_seed); + previous_span_context = createSpanContext(header_value); ASSERT(previous_span_context); } - Tracing::Decision decision; - decision.traced = sampled; - decision.reason = Tracing::Reason::Sampling; - - auto segment_context = - std::make_shared(std::move(previous_span_context), decision, random); - segment_context->setService(seed + "#SERVICE"); - segment_context->setServiceInstance(seed + "#INSTANCE"); + SegmentContextPtr segment_context; + if (previous_span_context) { + segment_context = segment_context_factory->create(previous_span_context); + } else { + segment_context = segment_context_factory->create(); + if (!sampled) { + segment_context->setSkipAnalysis(); + } + } return segment_context; } - static SpanStore* createSpanStore(SegmentContext* segment_context, SpanStore* parent_span_store, - std::string seed) { - SpanStore* span_store = segment_context->createSpanStore(parent_span_store); - - span_store->setAsError(false); - span_store->setOperation(seed + "#OPERATION"); - span_store->setPeerAddress("0.0.0.0"); - span_store->setStartTime(22222222); - span_store->setEndTime(33333333); + static CurrentSegmentSpanPtr createSpanStore(SegmentContextPtr segment_context, + CurrentSegmentSpanPtr parent_span_store, + std::string seed, bool sample = true) { + auto span_store = parent_span_store + ? segment_context->createCurrentSegmentSpan(parent_span_store) + : segment_context->createCurrentSegmentRootSpan(); + span_store->startSpan(seed + "#OPERATION"); + span_store->setPeer("0.0.0.0"); span_store->addTag(seed + "#TAG_KEY_A", seed + "#TAG_VALUE_A"); span_store->addTag(seed + "#TAG_KEY_B", seed + "#TAG_VALUE_B"); span_store->addTag(seed + "#TAG_KEY_C", seed + "#TAG_VALUE_C"); + + if (!sample) { + span_store->setSkipAnalysis(); + } + + span_store->endSpan(); + return span_store; } }; diff --git a/test/extensions/tracers/skywalking/skywalking_tracer_impl_test.cc b/test/extensions/tracers/skywalking/skywalking_tracer_impl_test.cc index cb5075665dcc..7fe3e637b6c9 100644 --- a/test/extensions/tracers/skywalking/skywalking_tracer_impl_test.cc +++ b/test/extensions/tracers/skywalking/skywalking_tracer_impl_test.cc @@ -49,12 +49,9 @@ class SkyWalkingDriverTest : public testing::Test { NiceMock context_; NiceMock mock_tracing_config_; Event::SimulatedTimeSystem time_system_; - std::unique_ptr> mock_stream_ptr_{nullptr}; - envoy::config::trace::v3::SkyWalkingConfig config_; std::string test_string = "ABCDEFGHIJKLMN"; - DriverPtr driver_; }; @@ -71,82 +68,92 @@ TEST_F(SkyWalkingDriverTest, SkyWalkingDriverStartSpanTestWithClientConfig) { )EOF"; setupSkyWalkingDriver(yaml_string); - std::string trace_id = - SkyWalkingTestHelper::generateId(context_.server_factory_context_.api_.random_); - std::string segment_id = - SkyWalkingTestHelper::generateId(context_.server_factory_context_.api_.random_); - - // Create new span segment with previous span context. - std::string previous_header_value = - fmt::format("{}-{}-{}-{}-{}-{}-{}-{}", 0, SkyWalkingTestHelper::base64Encode(trace_id), - SkyWalkingTestHelper::base64Encode(segment_id), 233333, - SkyWalkingTestHelper::base64Encode("SERVICE"), - SkyWalkingTestHelper::base64Encode("INSTATNCE"), - SkyWalkingTestHelper::base64Encode("ENDPOINT"), - SkyWalkingTestHelper::base64Encode("ADDRESS")); - - Http::TestRequestHeaderMapImpl request_headers{{"sw8", previous_header_value}, - {":path", "/path"}, - {":method", "GET"}, - {":authority", "test.com"}}; - - ON_CALL(mock_tracing_config_, operationName()) - .WillByDefault(Return(Tracing::OperationName::Ingress)); - Tracing::Decision decision; decision.traced = true; + auto& factory_context = context_.server_factory_context_; - Tracing::SpanPtr org_span = driver_->startSpan(mock_tracing_config_, request_headers, "TEST_OP", - time_system_.systemTime(), decision); - EXPECT_NE(nullptr, org_span.get()); + { + auto previous_header_value = SkyWalkingTestHelper::createPropagatedSW8HeaderValue(false, ""); + Http::TestRequestHeaderMapImpl request_headers{{"sw8", previous_header_value}, + {":path", "/path"}, + {":method", "GET"}, + {":authority", "test.com"}}; + ON_CALL(mock_tracing_config_, operationName()) + .WillByDefault(Return(Tracing::OperationName::Ingress)); - Span* span = dynamic_cast(org_span.get()); - ASSERT(span); + Tracing::SpanPtr org_span = driver_->startSpan(mock_tracing_config_, request_headers, "TEST_OP", + time_system_.systemTime(), decision); + EXPECT_NE(nullptr, org_span.get()); - EXPECT_NE(nullptr, span->segmentContext()->previousSpanContext()); + Span* span = dynamic_cast(org_span.get()); + ASSERT(span); - EXPECT_EQ("FAKE_FAKE_FAKE", span->segmentContext()->service()); - EXPECT_EQ("FAKE_FAKE_FAKE", span->segmentContext()->serviceInstance()); + EXPECT_EQ("FAKE_FAKE_FAKE", span->segmentContext()->service()); + EXPECT_EQ("FAKE_FAKE_FAKE", span->segmentContext()->serviceInstance()); - // Tracing decision will be overwrite by sampling flag in propagation headers. - EXPECT_EQ(0, span->segmentContext()->sampled()); + // Tracing decision will be overwrite by skip analysis flag in propagation headers. + EXPECT_FALSE(span->segmentContext()->skipAnalysis()); - // Since the sampling flag is false, no segment data is reported. - span->finishSpan(); + // Since the sampling flag is false, no segment data is reported. + EXPECT_CALL(*mock_stream_ptr_, sendMessageRaw_(_, _)); + span->finishSpan(); - auto& factory_context = context_.server_factory_context_; - EXPECT_EQ(0U, factory_context.scope_.counter("tracing.skywalking.segments_sent").value()); + EXPECT_EQ(1U, factory_context.scope_.counter("tracing.skywalking.segments_sent").value()); + } - // Create new span segment with no previous span context. - Http::TestRequestHeaderMapImpl new_request_headers{ - {":path", "/path"}, {":method", "GET"}, {":authority", "test.com"}}; + { + // Create new span segment with no previous span context. + Http::TestRequestHeaderMapImpl new_request_headers{ + {":path", "/path"}, {":method", "GET"}, {":authority", "test.com"}}; - Tracing::SpanPtr org_new_span = driver_->startSpan(mock_tracing_config_, new_request_headers, "", - time_system_.systemTime(), decision); + Tracing::SpanPtr org_span = driver_->startSpan(mock_tracing_config_, new_request_headers, "", + time_system_.systemTime(), decision); - Span* new_span = dynamic_cast(org_new_span.get()); - ASSERT(new_span); + Span* span = dynamic_cast(org_span.get()); + ASSERT(span); - EXPECT_EQ(nullptr, new_span->segmentContext()->previousSpanContext()); + EXPECT_FALSE(span->segmentContext()->skipAnalysis()); - EXPECT_EQ(true, new_span->segmentContext()->sampled()); + EXPECT_CALL(*mock_stream_ptr_, sendMessageRaw_(_, _)); + span->finishSpan(); - EXPECT_CALL(*mock_stream_ptr_, sendMessageRaw_(_, _)); - new_span->finishSpan(); - EXPECT_EQ(1U, factory_context.scope_.counter("tracing.skywalking.segments_sent").value()); + EXPECT_EQ(2U, factory_context.scope_.counter("tracing.skywalking.segments_sent").value()); + } + + { + // Create new span segment with error propagation header. + Http::TestRequestHeaderMapImpl error_request_headers{ + {":path", "/path"}, + {":method", "GET"}, + {":authority", "test.com"}, + {"sw8", "xxxxxx-error-propagation-header"}}; + Tracing::SpanPtr org_null_span = + driver_->startSpan(mock_tracing_config_, error_request_headers, "TEST_OP", + time_system_.systemTime(), decision); + + EXPECT_EQ(nullptr, dynamic_cast(org_null_span.get())); + + auto& null_span = *org_null_span; + EXPECT_EQ(typeid(null_span).name(), typeid(Tracing::NullSpan).name()); + } - // Create new span segment with error propagation header. - Http::TestRequestHeaderMapImpl error_request_headers{{":path", "/path"}, - {":method", "GET"}, - {":authority", "test.com"}, - {"sw8", "xxxxxx-error-propagation-header"}}; - Tracing::SpanPtr org_null_span = driver_->startSpan( - mock_tracing_config_, error_request_headers, "TEST_OP", time_system_.systemTime(), decision); + { + // Create root segment span with disabled tracing. + decision.traced = false; + Http::TestRequestHeaderMapImpl request_headers{ + {":path", "/path"}, {":method", "GET"}, {":authority", "test.com"}}; + Tracing::SpanPtr span = driver_->startSpan(mock_tracing_config_, request_headers, "TEST_OP", + time_system_.systemTime(), decision); + Span* new_span = dynamic_cast(span.get()); + ASSERT(new_span); - EXPECT_EQ(nullptr, dynamic_cast(org_null_span.get())); + EXPECT_TRUE(new_span->segmentContext()->skipAnalysis()); - auto& null_span = *org_null_span; - EXPECT_EQ(typeid(null_span).name(), typeid(Tracing::NullSpan).name()); + EXPECT_CALL(*mock_stream_ptr_, sendMessageRaw_(_, _)); + span->finishSpan(); + + EXPECT_EQ(3U, factory_context.scope_.counter("tracing.skywalking.segments_sent").value()); + } } TEST_F(SkyWalkingDriverTest, SkyWalkingDriverStartSpanTestNoClientConfig) { diff --git a/test/extensions/tracers/skywalking/skywalking_types_test.cc b/test/extensions/tracers/skywalking/skywalking_types_test.cc deleted file mode 100644 index eb1d3147558f..000000000000 --- a/test/extensions/tracers/skywalking/skywalking_types_test.cc +++ /dev/null @@ -1,343 +0,0 @@ -#include "common/common/base64.h" -#include "common/common/hex.h" - -#include "extensions/tracers/skywalking/skywalking_types.h" - -#include "test/extensions/tracers/skywalking/skywalking_test_helper.h" -#include "test/mocks/common.h" -#include "test/test_common/simulated_time_system.h" -#include "test/test_common/utility.h" - -#include "gmock/gmock.h" -#include "gtest/gtest.h" - -using testing::NiceMock; -using testing::Return; - -namespace Envoy { -namespace Extensions { -namespace Tracers { -namespace SkyWalking { -namespace { - -// Some constant strings for testing. -constexpr absl::string_view TEST_SERVICE = "EnvoyIngressForTest"; -constexpr absl::string_view TEST_INSTANCE = "node-2.3.4.5~ingress"; -constexpr absl::string_view TEST_ADDRESS = "255.255.255.255"; -constexpr absl::string_view TEST_ENDPOINT = "/POST/path/for/test"; - -// Test whether SpanContext can correctly parse data from propagation headers and throw exceptions -// when errors occur. -TEST(SpanContextTest, SpanContextCommonTest) { - NiceMock mock_random_generator; - ON_CALL(mock_random_generator, random()).WillByDefault(Return(uint64_t(23333))); - - std::string trace_id = SkyWalkingTestHelper::generateId(mock_random_generator); - std::string segment_id = SkyWalkingTestHelper::generateId(mock_random_generator); - - // No propagation header then previous span context will be null. - Http::TestRequestHeaderMapImpl headers_no_propagation; - auto null_span_context = SpanContext::spanContextFromRequest(headers_no_propagation); - EXPECT_EQ(nullptr, null_span_context.get()); - - // Create properly formatted propagation headers and test whether the propagation headers can be - // parsed correctly. - std::string header_value_with_right_format = - fmt::format("{}-{}-{}-{}-{}-{}-{}-{}", 0, SkyWalkingTestHelper::base64Encode(trace_id), - SkyWalkingTestHelper::base64Encode(segment_id), 233333, - SkyWalkingTestHelper::base64Encode(TEST_SERVICE), - SkyWalkingTestHelper::base64Encode(TEST_INSTANCE), - SkyWalkingTestHelper::base64Encode(TEST_ENDPOINT), - SkyWalkingTestHelper::base64Encode(TEST_ADDRESS)); - - Http::TestRequestHeaderMapImpl headers_with_right_format{{"sw8", header_value_with_right_format}}; - - auto previous_span_context = SpanContext::spanContextFromRequest(headers_with_right_format); - EXPECT_NE(nullptr, previous_span_context.get()); - - // Verify that each field parsed from the propagation headers is correct. - EXPECT_EQ(previous_span_context->sampled_, 0); - EXPECT_EQ(previous_span_context->trace_id_, trace_id); - EXPECT_EQ(previous_span_context->trace_segment_id_, segment_id); - EXPECT_EQ(previous_span_context->span_id_, 233333); - EXPECT_EQ(previous_span_context->service_, TEST_SERVICE); - EXPECT_EQ(previous_span_context->service_instance_, TEST_INSTANCE); - EXPECT_EQ(previous_span_context->endpoint_, TEST_ENDPOINT); - EXPECT_EQ(previous_span_context->target_address_, TEST_ADDRESS); - - std::string header_value_with_sampled = - fmt::format("{}-{}-{}-{}-{}-{}-{}-{}", 1, SkyWalkingTestHelper::base64Encode(trace_id), - SkyWalkingTestHelper::base64Encode(segment_id), 233333, - SkyWalkingTestHelper::base64Encode(TEST_SERVICE), - SkyWalkingTestHelper::base64Encode(TEST_INSTANCE), - SkyWalkingTestHelper::base64Encode(TEST_ENDPOINT), - SkyWalkingTestHelper::base64Encode(TEST_ADDRESS)); - - Http::TestRequestHeaderMapImpl headers_with_sampled{{"sw8", header_value_with_sampled}}; - - auto previous_span_context_with_sampled = - SpanContext::spanContextFromRequest(headers_with_sampled); - EXPECT_EQ(previous_span_context_with_sampled->sampled_, 1); - - // Test whether an exception can be correctly thrown when some fields are missing. - std::string header_value_lost_some_parts = - fmt::format("{}-{}-{}-{}-{}-{}", 0, SkyWalkingTestHelper::base64Encode(trace_id), - SkyWalkingTestHelper::base64Encode(segment_id), 3, - SkyWalkingTestHelper::base64Encode(TEST_SERVICE), - SkyWalkingTestHelper::base64Encode(TEST_INSTANCE)); - - Http::TestRequestHeaderMapImpl headers_lost_some_parts{{"sw8", header_value_lost_some_parts}}; - - EXPECT_THROW_WITH_MESSAGE( - SpanContext::spanContextFromRequest(headers_lost_some_parts), EnvoyException, - fmt::format("Invalid propagation header for SkyWalking: {}", header_value_lost_some_parts)); - - // Test whether an exception can be correctly thrown when the sampling flag is wrong. - Http::TestRequestHeaderMapImpl headers_with_error_sampled{ - {"sw8", - fmt::format("{}-{}-{}-{}-{}-{}-{}-{}", 3, SkyWalkingTestHelper::base64Encode(trace_id), - SkyWalkingTestHelper::base64Encode(segment_id), 3, - SkyWalkingTestHelper::base64Encode(TEST_SERVICE), - SkyWalkingTestHelper::base64Encode(TEST_INSTANCE), - SkyWalkingTestHelper::base64Encode(TEST_ENDPOINT), - SkyWalkingTestHelper::base64Encode(TEST_ADDRESS))}}; - - EXPECT_THROW_WITH_MESSAGE(SpanContext::spanContextFromRequest(headers_with_error_sampled), - EnvoyException, - "Invalid propagation header for SkyWalking: sampling flag can only be " - "'0' or '1' but '3' was provided"); - - // Test whether an exception can be correctly thrown when the span id format is wrong. - Http::TestRequestHeaderMapImpl headers_with_error_span_id{ - {"sw8", - fmt::format("{}-{}-{}-{}-{}-{}-{}-{}", 1, SkyWalkingTestHelper::base64Encode(trace_id), - SkyWalkingTestHelper::base64Encode(segment_id), "abc", - SkyWalkingTestHelper::base64Encode(TEST_SERVICE), - SkyWalkingTestHelper::base64Encode(TEST_INSTANCE), - SkyWalkingTestHelper::base64Encode(TEST_ENDPOINT), - SkyWalkingTestHelper::base64Encode(TEST_ADDRESS))}}; - - EXPECT_THROW_WITH_MESSAGE( - SpanContext::spanContextFromRequest(headers_with_error_span_id), EnvoyException, - "Invalid propagation header for SkyWalking: connot convert 'abc' to valid span id"); - - // Test whether an exception can be correctly thrown when a field is empty. - std::string header_value_with_empty_field = - fmt::format("{}-{}-{}-{}-{}-{}-{}-{}", 1, SkyWalkingTestHelper::base64Encode(trace_id), - SkyWalkingTestHelper::base64Encode(segment_id), 4, "", - SkyWalkingTestHelper::base64Encode(TEST_INSTANCE), - SkyWalkingTestHelper::base64Encode(TEST_ENDPOINT), - SkyWalkingTestHelper::base64Encode(TEST_ADDRESS)); - Http::TestRequestHeaderMapImpl headers_with_empty_field{{"sw8", header_value_with_empty_field}}; - - EXPECT_THROW_WITH_MESSAGE( - SpanContext::spanContextFromRequest(headers_with_empty_field), EnvoyException, - fmt::format("Invalid propagation header for SkyWalking: {}", header_value_with_empty_field)); - - // Test whether an exception can be correctly thrown when a string is not properly encoded. - Http::TestRequestHeaderMapImpl headers_with_error_field{ - {"sw8", - fmt::format("{}-{}-{}-{}-{}-{}-{}-{}", 1, SkyWalkingTestHelper::base64Encode(trace_id), - SkyWalkingTestHelper::base64Encode(segment_id), 4, "hhhhhhh", - SkyWalkingTestHelper::base64Encode(TEST_INSTANCE), - SkyWalkingTestHelper::base64Encode(TEST_ENDPOINT), - SkyWalkingTestHelper::base64Encode(TEST_ADDRESS))}}; - - EXPECT_THROW_WITH_MESSAGE(SpanContext::spanContextFromRequest(headers_with_error_field), - EnvoyException, - "Invalid propagation header for SkyWalking: parse error"); -} - -// Test whether the SegmentContext works normally when Envoy is the root node (Propagation headers -// does not exist). -TEST(SegmentContextTest, SegmentContextTestWithEmptyPreviousSpanContext) { - NiceMock mock_random_generator; - - ON_CALL(mock_random_generator, random()).WillByDefault(Return(233333)); - - SegmentContextSharedPtr segment_context = - SkyWalkingTestHelper::createSegmentContext(true, "NEW", "", mock_random_generator); - - // When previous span context is null, the value of the sampling flag depends on the tracing - // decision - EXPECT_EQ(segment_context->sampled(), 1); - // The SegmentContext will use random generator to create new trace id and new trace segment id. - EXPECT_EQ(segment_context->traceId(), SkyWalkingTestHelper::generateId(mock_random_generator)); - EXPECT_EQ(segment_context->traceSegmentId(), - SkyWalkingTestHelper::generateId(mock_random_generator)); - - EXPECT_EQ(segment_context->previousSpanContext(), nullptr); - - // Test whether the value of the fields can be set correctly and the value of the fields can be - // obtained correctly. - EXPECT_EQ(segment_context->service(), "NEW#SERVICE"); - segment_context->setService(std::string(TEST_SERVICE)); - EXPECT_EQ(segment_context->service(), TEST_SERVICE); - - EXPECT_EQ(segment_context->serviceInstance(), "NEW#INSTANCE"); - segment_context->setServiceInstance(std::string(TEST_INSTANCE)); - EXPECT_EQ(segment_context->serviceInstance(), TEST_INSTANCE); - - EXPECT_EQ(segment_context->rootSpanStore(), nullptr); - - // Test whether SegmentContext can correctly create SpanStore object with null parent SpanStore. - SpanStore* root_span = - SkyWalkingTestHelper::createSpanStore(segment_context.get(), nullptr, "PARENT"); - EXPECT_NE(nullptr, root_span); - - // The span id of the first SpanStore in each SegmentContext is 0. Its parent span id is -1. - EXPECT_EQ(root_span->spanId(), 0); - EXPECT_EQ(root_span->parentSpanId(), -1); - - // Root span of current segment should be Entry Span. - EXPECT_EQ(root_span->isEntrySpan(), true); - - // Verify that the SpanStore object is correctly stored in the SegmentContext. - EXPECT_EQ(segment_context->spanList().size(), 1); - EXPECT_EQ(segment_context->spanList()[0].get(), root_span); - - // Test whether SegmentContext can correctly create SpanStore object with a parent SpanStore. - SpanStore* child_span = - SkyWalkingTestHelper::createSpanStore(segment_context.get(), root_span, "CHILD"); - - EXPECT_NE(nullptr, child_span); - - EXPECT_EQ(child_span->spanId(), 1); - EXPECT_EQ(child_span->parentSpanId(), 0); - - // All child spans of current segment should be Exit Span. - EXPECT_EQ(child_span->isEntrySpan(), false); - - EXPECT_EQ(segment_context->spanList().size(), 2); - EXPECT_EQ(segment_context->spanList()[1].get(), child_span); -} - -// Test whether the SegmentContext can work normally when a previous span context exists. -TEST(SegmentContextTest, SegmentContextTestWithPreviousSpanContext) { - NiceMock mock_random_generator; - - ON_CALL(mock_random_generator, random()).WillByDefault(Return(23333)); - - std::string trace_id = SkyWalkingTestHelper::generateId(mock_random_generator); - std::string segment_id = SkyWalkingTestHelper::generateId(mock_random_generator); - - std::string header_value_with_right_format = - fmt::format("{}-{}-{}-{}-{}-{}-{}-{}", 0, SkyWalkingTestHelper::base64Encode(trace_id), - SkyWalkingTestHelper::base64Encode(segment_id), 233333, - SkyWalkingTestHelper::base64Encode(TEST_SERVICE), - SkyWalkingTestHelper::base64Encode(TEST_INSTANCE), - SkyWalkingTestHelper::base64Encode(TEST_ENDPOINT), - SkyWalkingTestHelper::base64Encode(TEST_ADDRESS)); - - Http::TestRequestHeaderMapImpl headers_with_right_format{{"sw8", header_value_with_right_format}}; - - auto previous_span_context = SpanContext::spanContextFromRequest(headers_with_right_format); - SpanContext* previous_span_context_bk = previous_span_context.get(); - - Tracing::Decision decision; - decision.traced = true; - - EXPECT_CALL(mock_random_generator, random()).WillRepeatedly(Return(666666)); - - SegmentContext segment_context(std::move(previous_span_context), decision, mock_random_generator); - - // When a previous span context exists, the sampling flag of the SegmentContext depends on - // previous span context rather than tracing decision. - EXPECT_EQ(segment_context.sampled(), 0); - - // When previous span context exists, the trace id of SegmentContext remains the same as that of - // previous span context. - EXPECT_EQ(segment_context.traceId(), trace_id); - // SegmentContext will always create a new trace segment id. - EXPECT_NE(segment_context.traceSegmentId(), segment_id); - - EXPECT_EQ(segment_context.previousSpanContext(), previous_span_context_bk); -} - -// Test whether SpanStore can work properly. -TEST(SpanStoreTest, SpanStoreCommonTest) { - NiceMock mock_random_generator; - - Event::SimulatedTimeSystem time_system; - Envoy::SystemTime now = time_system.systemTime(); - - ON_CALL(mock_random_generator, random()).WillByDefault(Return(23333)); - - // Create segment context and first span store. - SegmentContextSharedPtr segment_context = - SkyWalkingTestHelper::createSegmentContext(true, "CURR", "PREV", mock_random_generator); - SpanStore* root_store = - SkyWalkingTestHelper::createSpanStore(segment_context.get(), nullptr, "ROOT"); - EXPECT_NE(nullptr, root_store); - EXPECT_EQ(3, root_store->tags().size()); - - root_store->addLog(now, "TestLogStringAndNeverBeStored"); - EXPECT_EQ(0, root_store->logs().size()); - - // The span id of the first SpanStore in each SegmentContext is 0. Its parent span id is -1. - EXPECT_EQ(0, root_store->spanId()); - EXPECT_EQ(-1, root_store->parentSpanId()); - - root_store->setSpanId(123); - EXPECT_EQ(123, root_store->spanId()); - root_store->setParentSpanId(234); - EXPECT_EQ(234, root_store->parentSpanId()); - - EXPECT_EQ(1, root_store->sampled()); - root_store->setSampled(0); - EXPECT_EQ(0, root_store->sampled()); - - // Test whether the value of the fields can be set correctly and the value of the fields can be - // obtained correctly. - EXPECT_EQ(true, root_store->isEntrySpan()); - root_store->setAsEntrySpan(false); - EXPECT_EQ(false, root_store->isEntrySpan()); - - EXPECT_EQ(false, root_store->isError()); - root_store->setAsError(true); - EXPECT_EQ(true, root_store->isError()); - - EXPECT_EQ("ROOT#OPERATION", root_store->operation()); - root_store->setOperation(""); - EXPECT_EQ("", root_store->operation()); - root_store->setOperation("oooooop"); - EXPECT_EQ("oooooop", root_store->operation()); - - EXPECT_EQ("0.0.0.0", root_store->peerAddress()); - root_store->setPeerAddress(std::string(TEST_ADDRESS)); - EXPECT_EQ(TEST_ADDRESS, root_store->peerAddress()); - - EXPECT_EQ(22222222, root_store->startTime()); - root_store->setStartTime(23333); - EXPECT_EQ(23333, root_store->startTime()); - - EXPECT_EQ(33333333, root_store->endTime()); - root_store->setEndTime(25555); - EXPECT_EQ(25555, root_store->endTime()); - - SpanStore* child_store = - SkyWalkingTestHelper::createSpanStore(segment_context.get(), root_store, "CHILD"); - - // Test whether SpanStore can correctly inject propagation headers to request headers. - Http::TestRequestHeaderMapImpl request_headers_no_upstream{{":authority", "test.com"}}; - // Only child span (Exit Span) can inject context header to request headers. - child_store->injectContext(request_headers_no_upstream); - std::string expected_header_value = fmt::format( - "{}-{}-{}-{}-{}-{}-{}-{}", child_store->sampled(), - SkyWalkingTestHelper::base64Encode(SkyWalkingTestHelper::generateId(mock_random_generator)), - SkyWalkingTestHelper::base64Encode(SkyWalkingTestHelper::generateId(mock_random_generator)), - child_store->spanId(), SkyWalkingTestHelper::base64Encode("CURR#SERVICE"), - SkyWalkingTestHelper::base64Encode("CURR#INSTANCE"), - SkyWalkingTestHelper::base64Encode("oooooop"), - SkyWalkingTestHelper::base64Encode("test.com")); - - EXPECT_EQ(child_store->peerAddress(), "test.com"); - - EXPECT_EQ(request_headers_no_upstream.get_("sw8"), expected_header_value); -} - -} // namespace -} // namespace SkyWalking -} // namespace Tracers -} // namespace Extensions -} // namespace Envoy diff --git a/test/extensions/tracers/skywalking/trace_segment_reporter_test.cc b/test/extensions/tracers/skywalking/trace_segment_reporter_test.cc index fa3f59effdb5..67c3c6c2ce2d 100644 --- a/test/extensions/tracers/skywalking/trace_segment_reporter_test.cc +++ b/test/extensions/tracers/skywalking/trace_segment_reporter_test.cc @@ -48,34 +48,25 @@ class TraceSegmentReporterTest : public testing::Test { envoy::config::trace::v3::ClientConfig proto_client_config; TestUtility::loadFromYaml(yaml_string, proto_client_config); - client_config_ = std::make_unique(context_, proto_client_config); - reporter_ = std::make_unique(std::move(mock_client_factory), - mock_dispatcher_, mock_random_generator_, - tracing_stats_, *client_config_); + reporter_ = std::make_unique( + std::move(mock_client_factory), mock_dispatcher_, mock_random_generator_, tracing_stats_, + PROTOBUF_GET_WRAPPED_OR_DEFAULT(proto_client_config, max_cache_size, 1024), + proto_client_config.backend_token()); } protected: NiceMock context_; - NiceMock& mock_dispatcher_ = context_.server_factory_context_.dispatcher_; NiceMock& mock_random_generator_ = context_.server_factory_context_.api_.random_; Event::GlobalTimeSystem& mock_time_source_ = context_.server_factory_context_.time_system_; - NiceMock& mock_scope_ = context_.server_factory_context_.scope_; - NiceMock* mock_client_ptr_{nullptr}; - std::unique_ptr> mock_stream_ptr_{nullptr}; - NiceMock* timer_; Event::TimerCb timer_cb_; - std::string test_string = "ABCDEFGHIJKLMN"; - - SkyWalkingClientConfigPtr client_config_; - SkyWalkingTracerStats tracing_stats_{ SKYWALKING_TRACER_STATS(POOL_COUNTER_PREFIX(mock_scope_, "tracing.skywalking."))}; TraceSegmentReporterPtr reporter_; @@ -106,24 +97,21 @@ TEST_F(TraceSegmentReporterTest, TraceSegmentReporterReportTraceSegment) { setupTraceSegmentReporter("{}"); ON_CALL(mock_random_generator_, random()).WillByDefault(Return(23333)); - SegmentContextSharedPtr segment_context = - SkyWalkingTestHelper::createSegmentContext(true, "NEW", "PRE", mock_random_generator_); - SpanStore* parent_store = - SkyWalkingTestHelper::createSpanStore(segment_context.get(), nullptr, "PARENT"); - // Parent span store has peer address. - parent_store->setPeerAddress("0.0.0.0"); + SegmentContextPtr segment_context = + SkyWalkingTestHelper::createSegmentContext(true, "NEW", "PRE"); + CurrentSegmentSpanPtr parent_store = + SkyWalkingTestHelper::createSpanStore(segment_context, nullptr, "PARENT"); - SpanStore* first_child_sptore = - SkyWalkingTestHelper::createSpanStore(segment_context.get(), parent_store, "CHILD"); // Skip reporting the first child span. - first_child_sptore->setSampled(0); + CurrentSegmentSpanPtr first_child_sptore = + SkyWalkingTestHelper::createSpanStore(segment_context, parent_store, "CHILD", false); // Create second child span. - SkyWalkingTestHelper::createSpanStore(segment_context.get(), parent_store, "CHILD"); + SkyWalkingTestHelper::createSpanStore(segment_context, parent_store, "CHILD"); EXPECT_CALL(*mock_stream_ptr_, sendMessageRaw_(_, _)); - reporter_->report(*segment_context); + reporter_->report(segment_context); EXPECT_EQ(1U, mock_scope_.counter("tracing.skywalking.segments_sent").value()); EXPECT_EQ(0U, mock_scope_.counter("tracing.skywalking.segments_dropped").value()); @@ -131,12 +119,12 @@ TEST_F(TraceSegmentReporterTest, TraceSegmentReporterReportTraceSegment) { EXPECT_EQ(0U, mock_scope_.counter("tracing.skywalking.segments_flushed").value()); // Create a segment context with no previous span context. - SegmentContextSharedPtr second_segment_context = SkyWalkingTestHelper::createSegmentContext( - true, "SECOND_SEGMENT", "", mock_random_generator_); - SkyWalkingTestHelper::createSpanStore(second_segment_context.get(), nullptr, "PARENT"); + SegmentContextPtr second_segment_context = + SkyWalkingTestHelper::createSegmentContext(true, "SECOND_SEGMENT", ""); + SkyWalkingTestHelper::createSpanStore(second_segment_context, nullptr, "PARENT"); EXPECT_CALL(*mock_stream_ptr_, sendMessageRaw_(_, _)); - reporter_->report(*second_segment_context); + reporter_->report(second_segment_context); EXPECT_EQ(2U, mock_scope_.counter("tracing.skywalking.segments_sent").value()); EXPECT_EQ(0U, mock_scope_.counter("tracing.skywalking.segments_dropped").value()); @@ -148,15 +136,15 @@ TEST_F(TraceSegmentReporterTest, TraceSegmentReporterReportWithDefaultCache) { setupTraceSegmentReporter("{}"); ON_CALL(mock_random_generator_, random()).WillByDefault(Return(23333)); - SegmentContextSharedPtr segment_context = - SkyWalkingTestHelper::createSegmentContext(true, "NEW", "PRE", mock_random_generator_); - SpanStore* parent_store = - SkyWalkingTestHelper::createSpanStore(segment_context.get(), nullptr, "PARENT"); - SkyWalkingTestHelper::createSpanStore(segment_context.get(), parent_store, "CHILD"); + SegmentContextPtr segment_context = + SkyWalkingTestHelper::createSegmentContext(true, "NEW", "PRE"); + CurrentSegmentSpanPtr parent_store = + SkyWalkingTestHelper::createSpanStore(segment_context, nullptr, "PARENT"); + SkyWalkingTestHelper::createSpanStore(segment_context, parent_store, "CHILD"); EXPECT_CALL(*mock_stream_ptr_, sendMessageRaw_(_, _)).Times(1025); - reporter_->report(*segment_context); + reporter_->report(segment_context); EXPECT_EQ(1U, mock_scope_.counter("tracing.skywalking.segments_sent").value()); EXPECT_EQ(0U, mock_scope_.counter("tracing.skywalking.segments_dropped").value()); @@ -170,7 +158,7 @@ TEST_F(TraceSegmentReporterTest, TraceSegmentReporterReportWithDefaultCache) { // Try to report 10 segments. Due to the disconnection, the cache size is only 3. So 7 of the // segments will be discarded. for (int i = 0; i < 2048; i++) { - reporter_->report(*segment_context); + reporter_->report(segment_context); } EXPECT_EQ(1U, mock_scope_.counter("tracing.skywalking.segments_sent").value()); @@ -198,15 +186,15 @@ TEST_F(TraceSegmentReporterTest, TraceSegmentReporterReportWithCacheConfig) { ON_CALL(mock_random_generator_, random()).WillByDefault(Return(23333)); - SegmentContextSharedPtr segment_context = - SkyWalkingTestHelper::createSegmentContext(true, "NEW", "PRE", mock_random_generator_); - SpanStore* parent_store = - SkyWalkingTestHelper::createSpanStore(segment_context.get(), nullptr, "PARENT"); - SkyWalkingTestHelper::createSpanStore(segment_context.get(), parent_store, "CHILD"); + SegmentContextPtr segment_context = + SkyWalkingTestHelper::createSegmentContext(true, "NEW", "PRE"); + CurrentSegmentSpanPtr parent_store = + SkyWalkingTestHelper::createSpanStore(segment_context, nullptr, "PARENT"); + SkyWalkingTestHelper::createSpanStore(segment_context, parent_store, "CHILD"); EXPECT_CALL(*mock_stream_ptr_, sendMessageRaw_(_, _)).Times(4); - reporter_->report(*segment_context); + reporter_->report(segment_context); EXPECT_EQ(1U, mock_scope_.counter("tracing.skywalking.segments_sent").value()); EXPECT_EQ(0U, mock_scope_.counter("tracing.skywalking.segments_dropped").value()); @@ -220,7 +208,7 @@ TEST_F(TraceSegmentReporterTest, TraceSegmentReporterReportWithCacheConfig) { // Try to report 10 segments. Due to the disconnection, the cache size is only 3. So 7 of the // segments will be discarded. for (int i = 0; i < 10; i++) { - reporter_->report(*segment_context); + reporter_->report(segment_context); } EXPECT_EQ(1U, mock_scope_.counter("tracing.skywalking.segments_sent").value()); @@ -239,6 +227,13 @@ TEST_F(TraceSegmentReporterTest, TraceSegmentReporterReportWithCacheConfig) { EXPECT_EQ(3U, mock_scope_.counter("tracing.skywalking.segments_flushed").value()); } +TEST_F(TraceSegmentReporterTest, CallAsyncCallbackAndNothingTodo) { + setupTraceSegmentReporter("{}"); + reporter_->onReceiveInitialMetadata(std::make_unique()); + reporter_->onReceiveTrailingMetadata(std::make_unique()); + reporter_->onReceiveMessage(std::make_unique()); +} + } // namespace } // namespace SkyWalking } // namespace Tracers diff --git a/test/extensions/tracers/skywalking/tracer_test.cc b/test/extensions/tracers/skywalking/tracer_test.cc index b6c169487b35..aee24271dfe9 100644 --- a/test/extensions/tracers/skywalking/tracer_test.cc +++ b/test/extensions/tracers/skywalking/tracer_test.cc @@ -1,4 +1,3 @@ -#include "extensions/tracers/skywalking/skywalking_client_config.h" #include "extensions/tracers/skywalking/tracer.h" #include "test/extensions/tracers/skywalking/skywalking_test_helper.h" @@ -46,35 +45,25 @@ class TracerTest : public testing::Test { envoy::config::trace::v3::ClientConfig proto_client_config; TestUtility::loadFromYaml(yaml_string, proto_client_config); - client_config_ = std::make_unique(context_, proto_client_config); - tracer_ = std::make_unique(mock_time_source_); - tracer_->setReporter(std::make_unique( + tracer_ = std::make_unique(std::make_unique( std::move(mock_client_factory), mock_dispatcher_, mock_random_generator_, tracing_stats_, - *client_config_)); + PROTOBUF_GET_WRAPPED_OR_DEFAULT(proto_client_config, max_cache_size, 1024), + proto_client_config.backend_token())); } protected: NiceMock mock_tracing_config_; - NiceMock context_; - NiceMock& mock_dispatcher_ = context_.server_factory_context_.dispatcher_; NiceMock& mock_random_generator_ = context_.server_factory_context_.api_.random_; Event::GlobalTimeSystem& mock_time_source_ = context_.server_factory_context_.time_system_; - NiceMock& mock_scope_ = context_.server_factory_context_.scope_; - std::unique_ptr> mock_stream_ptr_{nullptr}; - std::string test_string = "ABCDEFGHIJKLMN"; - - SkyWalkingClientConfigPtr client_config_; - SkyWalkingTracerStats tracing_stats_{ SKYWALKING_TRACER_STATS(POOL_COUNTER_PREFIX(mock_scope_, "tracing.skywalking."))}; - TracerPtr tracer_; }; @@ -85,103 +74,108 @@ TEST_F(TracerTest, TracerTestCreateNewSpanWithNoPropagationHeaders) { EXPECT_CALL(mock_random_generator_, random()).WillRepeatedly(Return(666666)); // Create a new SegmentContext. - SegmentContextSharedPtr segment_context = - SkyWalkingTestHelper::createSegmentContext(true, "CURR", "", mock_random_generator_); + auto segment_context = SkyWalkingTestHelper::createSegmentContext(true, "CURR", ""); Envoy::Tracing::SpanPtr org_span = tracer_->startSpan( mock_tracing_config_, mock_time_source_.systemTime(), "TEST_OP", segment_context, nullptr); - Span* span = dynamic_cast(org_span.get()); - - EXPECT_EQ(true, span->spanStore()->isEntrySpan()); - - EXPECT_EQ("", span->getBaggage("FakeStringAndNothingToDo")); - span->setBaggage("FakeStringAndNothingToDo", "FakeStringAndNothingToDo"); - - // This method is unimplemented and a noop. - ASSERT_EQ(span->getTraceIdAsHex(), ""); - - // Test whether the basic functions of Span are normal. - - span->setSampled(false); - EXPECT_EQ(false, span->spanStore()->sampled()); - - // The initial operation name is consistent with the 'operation' parameter in the 'startSpan' - // method call. - EXPECT_EQ("TEST_OP", span->spanStore()->operation()); - span->setOperation("op"); - EXPECT_EQ("op", span->spanStore()->operation()); - - // Test whether the tag can be set correctly. - span->setTag("TestTagKeyA", "TestTagValueA"); - span->setTag("TestTagKeyB", "TestTagValueB"); - EXPECT_EQ("TestTagValueA", span->spanStore()->tags().at(0).second); - EXPECT_EQ("TestTagValueB", span->spanStore()->tags().at(1).second); + { + Span* span = dynamic_cast(org_span.get()); + + EXPECT_TRUE(span->spanEntity()->spanType() == SpanType::Entry); + EXPECT_EQ("", span->getBaggage("FakeStringAndNothingToDo")); + span->setOperation("FakeStringAndNothingToDo"); + span->setBaggage("FakeStringAndNothingToDo", "FakeStringAndNothingToDo"); + // This method is unimplemented and a noop. + ASSERT_EQ(span->getTraceIdAsHex(), ""); + // Test whether the basic functions of Span are normal. + EXPECT_FALSE(span->spanEntity()->skipAnalysis()); + span->setSampled(false); + EXPECT_TRUE(span->spanEntity()->skipAnalysis()); + + // The initial operation name is consistent with the 'operation' parameter in the 'startSpan' + // method call. + EXPECT_EQ("TEST_OP", span->spanEntity()->operationName()); + + // Test whether the tag can be set correctly. + span->setTag("TestTagKeyA", "TestTagValueA"); + span->setTag("TestTagKeyB", "TestTagValueB"); + EXPECT_EQ("TestTagValueA", span->spanEntity()->tags().at(0).second); + EXPECT_EQ("TestTagValueB", span->spanEntity()->tags().at(1).second); + + // When setting the status code tag, the corresponding tag name will be rewritten as + // 'status_code'. + span->setTag(Tracing::Tags::get().HttpStatusCode, "200"); + EXPECT_EQ("status_code", span->spanEntity()->tags().at(2).first); + EXPECT_EQ("200", span->spanEntity()->tags().at(2).second); + + // When setting the error tag, the spanEntity object will also mark itself as an error. + span->setTag(Tracing::Tags::get().Error, Tracing::Tags::get().True); + EXPECT_EQ(Tracing::Tags::get().Error, span->spanEntity()->tags().at(3).first); + EXPECT_EQ(Tracing::Tags::get().True, span->spanEntity()->tags().at(3).second); + EXPECT_EQ(true, span->spanEntity()->errorStatus()); + + // When setting http url tag, the corresponding tag name will be rewritten as 'url'. + span->setTag(Tracing::Tags::get().HttpUrl, "http://test.com/test/path"); + EXPECT_EQ("url", span->spanEntity()->tags().at(4).first); + + span->log(SystemTime{std::chrono::duration(100)}, "abc"); + EXPECT_EQ(1, span->spanEntity()->logs().size()); + EXPECT_LT(0, span->spanEntity()->logs().at(0).time()); + EXPECT_EQ("abc", span->spanEntity()->logs().at(0).data().at(0).value()); + } - // When setting the status code tag, the corresponding tag name will be rewritten as - // 'status_code'. - span->setTag(Tracing::Tags::get().HttpStatusCode, "200"); - EXPECT_EQ("status_code", span->spanStore()->tags().at(2).first); - EXPECT_EQ("200", span->spanStore()->tags().at(2).second); + { + Envoy::Tracing::SpanPtr org_first_child_span = + org_span->spawnChild(mock_tracing_config_, "TestChild", mock_time_source_.systemTime()); - // When setting the error tag, the SpanStore object will also mark itself as an error. - span->setTag(Tracing::Tags::get().Error, Tracing::Tags::get().True); - EXPECT_EQ(Tracing::Tags::get().Error, span->spanStore()->tags().at(3).first); - EXPECT_EQ(Tracing::Tags::get().True, span->spanStore()->tags().at(3).second); - EXPECT_EQ(true, span->spanStore()->isError()); + Span* first_child_span = dynamic_cast(org_first_child_span.get()); - // When setting http url tag, the corresponding tag name will be rewritten as 'url'. - span->setTag(Tracing::Tags::get().HttpUrl, "http://test.com/test/path"); - EXPECT_EQ("url", span->spanStore()->tags().at(4).first); + EXPECT_TRUE(first_child_span->spanEntity()->spanType() == SpanType::Exit); - Envoy::Tracing::SpanPtr org_first_child_span = - span->spawnChild(mock_tracing_config_, "TestChild", mock_time_source_.systemTime()); - Span* first_child_span = dynamic_cast(org_first_child_span.get()); + EXPECT_FALSE(first_child_span->spanEntity()->skipAnalysis()); + EXPECT_EQ(1, first_child_span->spanEntity()->spanId()); + EXPECT_EQ(0, first_child_span->spanEntity()->parentSpanId()); - EXPECT_EQ(false, first_child_span->spanStore()->isEntrySpan()); + EXPECT_EQ("TestChild", first_child_span->spanEntity()->operationName()); - EXPECT_EQ(0, first_child_span->spanStore()->sampled()); - EXPECT_EQ(1, first_child_span->spanStore()->spanId()); - EXPECT_EQ(0, first_child_span->spanStore()->parentSpanId()); + first_child_span->finishSpan(); + EXPECT_NE(0, first_child_span->spanEntity()->endTime()); - EXPECT_EQ("TestChild", first_child_span->spanStore()->operation()); + Http::TestRequestHeaderMapImpl first_child_headers{{":authority", "test.com"}}; - Http::TestRequestHeaderMapImpl first_child_headers{{":authority", "test.com"}}; - std::string expected_header_value = fmt::format( - "{}-{}-{}-{}-{}-{}-{}-{}", 0, - SkyWalkingTestHelper::base64Encode(SkyWalkingTestHelper::generateId(mock_random_generator_)), - SkyWalkingTestHelper::base64Encode(SkyWalkingTestHelper::generateId(mock_random_generator_)), - 1, SkyWalkingTestHelper::base64Encode("CURR#SERVICE"), - SkyWalkingTestHelper::base64Encode("CURR#INSTANCE"), SkyWalkingTestHelper::base64Encode("op"), - SkyWalkingTestHelper::base64Encode("test.com")); + first_child_span->injectContext(first_child_headers); + auto sp = createSpanContext(first_child_headers.get_("sw8")); + EXPECT_EQ("CURR#SERVICE", sp->service()); + EXPECT_EQ("CURR#INSTANCE", sp->serviceInstance()); + EXPECT_EQ("TEST_OP", sp->endpoint()); + EXPECT_EQ("test.com", sp->targetAddress()); + } - first_child_span->injectContext(first_child_headers); - EXPECT_EQ(expected_header_value, first_child_headers.get_("sw8")); + segment_context->setSkipAnalysis(); - // Reset sampling flag to true. - span->setSampled(true); - Envoy::Tracing::SpanPtr org_second_child_span = - span->spawnChild(mock_tracing_config_, "TestChild", mock_time_source_.systemTime()); - Span* second_child_span = dynamic_cast(org_second_child_span.get()); + { + Envoy::Tracing::SpanPtr org_second_child_span = + org_span->spawnChild(mock_tracing_config_, "TestChild", mock_time_source_.systemTime()); + Span* second_child_span = dynamic_cast(org_second_child_span.get()); - EXPECT_EQ(1, second_child_span->spanStore()->sampled()); - EXPECT_EQ(2, second_child_span->spanStore()->spanId()); - EXPECT_EQ(0, second_child_span->spanStore()->parentSpanId()); + // SkipAnalysis is true by default with calling setSkipAnalysis() on segment_context. + EXPECT_TRUE(second_child_span->spanEntity()->skipAnalysis()); + EXPECT_EQ(2, second_child_span->spanEntity()->spanId()); + EXPECT_EQ(0, second_child_span->spanEntity()->parentSpanId()); - EXPECT_CALL(*mock_stream_ptr_, sendMessageRaw_(_, _)); + second_child_span->finishSpan(); + EXPECT_NE(0, second_child_span->spanEntity()->endTime()); + } // When the child span ends, the data is not reported immediately, but the end time is set. - first_child_span->finishSpan(); - second_child_span->finishSpan(); - EXPECT_NE(0, first_child_span->spanStore()->endTime()); - EXPECT_NE(0, second_child_span->spanStore()->endTime()); - EXPECT_EQ(0U, mock_scope_.counter("tracing.skywalking.segments_sent").value()); EXPECT_EQ(0U, mock_scope_.counter("tracing.skywalking.segments_dropped").value()); EXPECT_EQ(0U, mock_scope_.counter("tracing.skywalking.cache_flushed").value()); EXPECT_EQ(0U, mock_scope_.counter("tracing.skywalking.segments_flushed").value()); // When the first span in the current segment ends, the entire segment is reported. - span->finishSpan(); + EXPECT_CALL(*mock_stream_ptr_, sendMessageRaw_(_, _)); + org_span->finishSpan(); EXPECT_EQ(1U, mock_scope_.counter("tracing.skywalking.segments_sent").value()); EXPECT_EQ(0U, mock_scope_.counter("tracing.skywalking.segments_dropped").value()); diff --git a/test/per_file_coverage.sh b/test/per_file_coverage.sh index 1151af522eea..c7f368a60cc9 100755 --- a/test/per_file_coverage.sh +++ b/test/per_file_coverage.sh @@ -52,7 +52,7 @@ declare -a KNOWN_LOW_COVERAGE=( "source/extensions/quic_listeners:85.0" "source/extensions/quic_listeners/quiche:84.8" "source/extensions/stat_sinks/statsd:85.2" -"source/extensions/tracers:96.4" +"source/extensions/tracers:96.3" "source/extensions/tracers/opencensus:91.6" "source/extensions/tracers/xray:94.0" "source/extensions/transport_sockets:95.1" From 241a9552fb1ab58378140342c0a26bca7026ba8f Mon Sep 17 00:00:00 2001 From: Greg Greenway Date: Mon, 1 Feb 2021 13:26:34 -0800 Subject: [PATCH 3/4] buffer: improve read reservations to efficiently handle multiple slices (#14054) Enable reading larger chunks from sockets in a single call without drastically increasing memory waste by implementing a system where reservations of multiple slices are made, and unused slices (after the read operation) are put into a small cache for re-use by the next read operation. The largest read operation changed from 16k to 128k. Watermark buffer limits are still enforced; large reads only happen if buffer limits allow and space is available. This improves performance in some high-throughput use cases. Signed-off-by: Greg Greenway --- docs/root/version_history/current.rst | 1 + include/envoy/buffer/BUILD | 1 + include/envoy/buffer/buffer.h | 190 ++++++++++- include/envoy/network/io_handle.h | 6 +- source/common/buffer/buffer_impl.cc | 185 +++++----- source/common/buffer/buffer_impl.h | 128 ++++++- source/common/buffer/watermark_buffer.cc | 30 +- source/common/buffer/watermark_buffer.h | 6 +- source/common/common/utility.h | 22 +- source/common/grpc/common.cc | 20 +- source/common/http/http2/metadata_encoder.cc | 10 +- .../common/network/io_socket_handle_impl.cc | 17 +- source/common/network/io_socket_handle_impl.h | 3 +- source/common/network/raw_buffer_socket.cc | 3 +- source/common/network/utility.cc | 50 +-- .../quiche/envoy_quic_client_stream.cc | 7 +- .../quiche/envoy_quic_server_stream.cc | 7 +- .../platform/quic_mem_slice_storage_impl.cc | 19 +- .../quiche/quic_io_handle_wrapper.h | 3 +- .../stat_sinks/common/statsd/statsd.cc | 18 +- .../stat_sinks/common/statsd/statsd.h | 2 +- .../lightstep/lightstep_tracer_impl.cc | 10 +- .../transport_sockets/tls/ssl_socket.cc | 33 +- .../transport_sockets/tls/ssl_socket.h | 2 +- test/common/buffer/buffer_fuzz.cc | 72 ++-- test/common/buffer/buffer_speed_test.cc | 54 ++- test/common/buffer/owned_impl_test.cc | 323 +++++++++--------- test/common/buffer/watermark_buffer_test.cc | 32 +- test/common/common/utility_test.cc | 20 ++ test/common/http/http2/hpack_fuzz_test.cc | 9 +- .../postgres_proxy/postgres_decoder_test.cc | 7 +- .../transport_sockets/tls/ssl_socket_test.cc | 8 +- .../tls/tls_throughput_benchmark.cc | 24 +- .../socket_interface_integration_test.cc | 8 +- test/mocks/network/io_handle.h | 3 +- 35 files changed, 844 insertions(+), 489 deletions(-) diff --git a/docs/root/version_history/current.rst b/docs/root/version_history/current.rst index c6bcd134281a..b8135a031544 100644 --- a/docs/root/version_history/current.rst +++ b/docs/root/version_history/current.rst @@ -19,6 +19,7 @@ Minor Behavior Changes revert to the legacy path canonicalizer, enable the runtime flag `envoy.reloadable_features.remove_forked_chromium_url`. * oauth filter: added the optional parameter :ref:`auth_scopes ` with default value of 'user' if not provided. Enables this value to be overridden in the Authorization request to the OAuth provider. +* perf: allow reading more bytes per operation from raw sockets to improve performance. * tcp: setting NODELAY in the base connection class. This should have no effect for TCP or HTTP proxying, but may improve throughput in other areas. This behavior can be temporarily reverted by setting `envoy.reloadable_features.always_nodelay` to false. * upstream: host weight changes now cause a full load balancer rebuild as opposed to happening atomically inline. This change has been made to support load balancer pre-computation of data diff --git a/include/envoy/buffer/BUILD b/include/envoy/buffer/BUILD index 499ec8605a2e..9367bedb85ec 100644 --- a/include/envoy/buffer/BUILD +++ b/include/envoy/buffer/BUILD @@ -16,6 +16,7 @@ envoy_cc_library( ], deps = [ "//include/envoy/api:os_sys_calls_interface", + "//source/common/common:assert_lib", "//source/common/common:byte_order_lib", "//source/common/common:utility_lib", ], diff --git a/include/envoy/buffer/buffer.h b/include/envoy/buffer/buffer.h index d9404c47bc82..a4bdfc9459da 100644 --- a/include/envoy/buffer/buffer.h +++ b/include/envoy/buffer/buffer.h @@ -10,6 +10,7 @@ #include "envoy/common/platform.h" #include "envoy/common/pure.h" +#include "common/common/assert.h" #include "common/common/byte_order.h" #include "common/common/utility.h" @@ -72,6 +73,18 @@ class SliceData { using SliceDataPtr = std::unique_ptr; +class Reservation; +class ReservationSingleSlice; + +// Base class for an object to manage the ownership for slices in a `Reservation` or +// `ReservationSingleSlice`. +class ReservationSlicesOwner { +public: + virtual ~ReservationSlicesOwner() = default; +}; + +using ReservationSlicesOwnerPtr = std::unique_ptr; + /** * A basic buffer abstraction. */ @@ -129,16 +142,6 @@ class Instance { */ virtual void prepend(Instance& data) PURE; - /** - * Commit a set of slices originally obtained from reserve(). The number of slices should match - * the number obtained from reserve(). The size of each slice can also be altered. Commit must - * occur once following a reserve() without any mutating operations in between other than to the - * iovecs len_ fields. - * @param iovecs supplies the array of slices to commit. - * @param num_iovecs supplies the size of the slices array. - */ - virtual void commit(RawSlice* iovecs, uint64_t num_iovecs) PURE; - /** * Copy out a section of the buffer. * @param start supplies the buffer index to start copying from. @@ -202,13 +205,22 @@ class Instance { virtual void move(Instance& rhs, uint64_t length) PURE; /** - * Reserve space in the buffer. - * @param length supplies the amount of space to reserve. - * @param iovecs supplies the slices to fill with reserved memory. - * @param num_iovecs supplies the size of the slices array. - * @return the number of iovecs used to reserve the space. + * Reserve space in the buffer for reading into. The amount of space reserved is determined + * based on buffer settings and performance considerations. + * @return a `Reservation`, on which `commit()` can be called, or which can + * be destructed to discard any resources in the `Reservation`. */ - virtual uint64_t reserve(uint64_t length, RawSlice* iovecs, uint64_t num_iovecs) PURE; + virtual Reservation reserveForRead() PURE; + + /** + * Reserve space in the buffer in a single slice. + * @param length the exact length of the reservation. + * @param separate_slice specifies whether the reserved space must be in a separate slice + * from any other data in this buffer. + * @return a `ReservationSingleSlice` which has exactly one slice in it. + */ + virtual ReservationSingleSlice reserveSingleSlice(uint64_t length, + bool separate_slice = false) PURE; /** * Search for an occurrence of data within the buffer. @@ -414,6 +426,17 @@ class Instance { * the low watermark. */ virtual bool highWatermarkTriggered() const PURE; + +private: + friend Reservation; + friend ReservationSingleSlice; + + /** + * Called by a `Reservation` to commit `length` bytes of the + * reservation. + */ + virtual void commit(uint64_t length, absl::Span slices, + ReservationSlicesOwnerPtr slices_owner) PURE; }; using InstancePtr = std::unique_ptr; @@ -441,5 +464,140 @@ class WatermarkFactory { using WatermarkFactoryPtr = std::unique_ptr; using WatermarkFactorySharedPtr = std::shared_ptr; +/** + * Holds an in-progress addition to a buffer. + * + * @note For performance reasons, this class is passed by value to + * avoid an extra allocation, so it cannot have any virtual methods. + */ +class Reservation final { +public: + Reservation(Reservation&&) = default; + ~Reservation() = default; + + /** + * @return an array of `RawSlice` of length `numSlices()`. + */ + RawSlice* slices() { return slices_.data(); } + const RawSlice* slices() const { return slices_.data(); } + + /** + * @return the number of slices present. + */ + uint64_t numSlices() const { return slices_.size(); } + + /** + * @return the total length of the Reservation. + */ + uint64_t length() const { return length_; } + + /** + * Commits some or all of the data in the reservation. + * @param length supplies the number of bytes to commit. This must be + * less than or equal to the size of the `Reservation`. + * + * @note No other methods should be called on the object after `commit()` is called. + */ + void commit(uint64_t length) { + ENVOY_BUG(length <= length_, "commit() length must be <= size of the Reservation"); + ASSERT(length == 0 || !slices_.empty(), + "Reservation.commit() called on empty Reservation; possible double-commit()."); + buffer_.commit(length, absl::MakeSpan(slices_), std::move(slices_owner_)); + length_ = 0; + slices_.clear(); + ASSERT(slices_owner_ == nullptr); + } + + // Tuned to allow reads of 128k, using 16k slices. + static constexpr uint32_t MAX_SLICES_ = 8; + +private: + Reservation(Instance& buffer) : buffer_(buffer) {} + + // The buffer that created this `Reservation`. + Instance& buffer_; + + // The combined length of all slices in the Reservation. + uint64_t length_; + + // The RawSlices in the reservation, usable by operations such as `::readv()`. + absl::InlinedVector slices_; + + // An owner that can be set by the creator of the `Reservation` to free slices upon + // destruction. + ReservationSlicesOwnerPtr slices_owner_; + +public: + // The following are for use only by implementations of Buffer. Because c++ + // doesn't allow inheritance of friendship, these are just trying to make + // misuse easy to spot in a code review. + static Reservation bufferImplUseOnlyConstruct(Instance& buffer) { return Reservation(buffer); } + decltype(slices_)& bufferImplUseOnlySlices() { return slices_; } + ReservationSlicesOwnerPtr& bufferImplUseOnlySlicesOwner() { return slices_owner_; } + void bufferImplUseOnlySetLength(uint64_t length) { length_ = length; } +}; + +/** + * Holds an in-progress addition to a buffer, holding only a single slice. + * + * @note For performance reasons, this class is passed by value to + * avoid an extra allocation, so it cannot have any virtual methods. + */ +class ReservationSingleSlice final { +public: + ReservationSingleSlice(ReservationSingleSlice&&) = default; + ~ReservationSingleSlice() = default; + + /** + * @return the slice in the Reservation. + */ + RawSlice slice() const { return slice_; } + + /** + * @return the total length of the Reservation. + */ + uint64_t length() const { return slice_.len_; } + + /** + * Commits some or all of the data in the reservation. + * @param length supplies the number of bytes to commit. This must be + * less than or equal to the size of the `Reservation`. + * + * @note No other methods should be called on the object after `commit()` is called. + */ + void commit(uint64_t length) { + ENVOY_BUG(length <= slice_.len_, "commit() length must be <= size of the Reservation"); + ASSERT(length == 0 || slice_.mem_ != nullptr, + "Reservation.commit() called on empty Reservation; possible double-commit()."); + buffer_.commit(length, absl::MakeSpan(&slice_, 1), std::move(slice_owner_)); + slice_ = {nullptr, 0}; + ASSERT(slice_owner_ == nullptr); + } + +private: + ReservationSingleSlice(Instance& buffer) : buffer_(buffer) {} + + // The buffer that created this `Reservation`. + Instance& buffer_; + + // The RawSlice in the reservation, usable by anything needing the raw pointer + // and length to read into. + RawSlice slice_{}; + + // An owner that can be set by the creator of the `ReservationSingleSlice` to free the slice upon + // destruction. + ReservationSlicesOwnerPtr slice_owner_; + +public: + // The following are for use only by implementations of Buffer. Because c++ + // doesn't allow inheritance of friendship, these are just trying to make + // misuse easy to spot in a code review. + static ReservationSingleSlice bufferImplUseOnlyConstruct(Instance& buffer) { + return ReservationSingleSlice(buffer); + } + RawSlice& bufferImplUseOnlySlice() { return slice_; } + ReservationSlicesOwnerPtr& bufferImplUseOnlySliceOwner() { return slice_owner_; } +}; + } // namespace Buffer } // namespace Envoy diff --git a/include/envoy/network/io_handle.h b/include/envoy/network/io_handle.h index de530474592a..ab5289507fdc 100644 --- a/include/envoy/network/io_handle.h +++ b/include/envoy/network/io_handle.h @@ -70,11 +70,13 @@ class IoHandle { /** * Read from a io handle directly into buffer. * @param buffer supplies the buffer to read into. - * @param max_length supplies the maximum length to read. + * @param max_length supplies the maximum length to read. A value of absl::nullopt means to read + * as much data as possible, within the constraints of available buffer size. * @return a IoCallUint64Result with err_ = nullptr and rc_ = the number of bytes * read if successful, or err_ = some IoError for failure. If call failed, rc_ shouldn't be used. */ - virtual Api::IoCallUint64Result read(Buffer::Instance& buffer, uint64_t max_length) PURE; + virtual Api::IoCallUint64Result read(Buffer::Instance& buffer, + absl::optional max_length) PURE; /** * Write the data in slices out. diff --git a/source/common/buffer/buffer_impl.cc b/source/common/buffer/buffer_impl.cc index 3d2b27483466..420dd9cd1d2c 100644 --- a/source/common/buffer/buffer_impl.cc +++ b/source/common/buffer/buffer_impl.cc @@ -18,6 +18,8 @@ namespace { constexpr uint64_t CopyThreshold = 512; } // namespace +thread_local absl::InlinedVector Slice::free_list_; + void OwnedImpl::addImpl(const void* data, uint64_t size) { const char* src = static_cast(data); bool new_slice_needed = slices_.empty(); @@ -81,47 +83,6 @@ void OwnedImpl::prepend(Instance& data) { other.postProcess(); } -void OwnedImpl::commit(RawSlice* iovecs, uint64_t num_iovecs) { - if (num_iovecs == 0) { - return; - } - if (slices_.empty()) { - return; - } - // Find the slices in the buffer that correspond to the iovecs: - // First, scan backward from the end of the buffer to find the last slice containing - // any content. Reservations are made from the end of the buffer, and out-of-order commits - // aren't supported, so any slices before this point cannot match the iovecs being committed. - ssize_t slice_index = static_cast(slices_.size()) - 1; - while (slice_index >= 0 && slices_[slice_index].dataSize() == 0) { - slice_index--; - } - if (slice_index < 0) { - // There was no slice containing any data, so rewind the iterator at the first slice. - slice_index = 0; - } - - // Next, scan forward and attempt to match the slices against iovecs. - uint64_t num_slices_committed = 0; - while (num_slices_committed < num_iovecs) { - if (slices_[slice_index].commit(iovecs[num_slices_committed])) { - length_ += iovecs[num_slices_committed].len_; - num_slices_committed++; - } - slice_index++; - if (slice_index == static_cast(slices_.size())) { - break; - } - } - - // In case an extra slice was reserved, remove empty slices from the end of the buffer. - while (!slices_.empty() && slices_.back().dataSize() == 0) { - slices_.pop_back(); - } - - ASSERT(num_slices_committed > 0); -} - void OwnedImpl::copyOut(size_t start, uint64_t size, void* data) const { uint64_t bytes_to_skip = start; uint8_t* dest = static_cast(data); @@ -339,55 +300,119 @@ void OwnedImpl::move(Instance& rhs, uint64_t length) { other.postProcess(); } -uint64_t OwnedImpl::reserve(uint64_t length, RawSlice* iovecs, uint64_t num_iovecs) { - if (num_iovecs == 0 || length == 0) { - return 0; +Reservation OwnedImpl::reserveForRead() { + return reserveWithMaxLength(default_read_reservation_size_); +} + +Reservation OwnedImpl::reserveWithMaxLength(uint64_t max_length) { + Reservation reservation = Reservation::bufferImplUseOnlyConstruct(*this); + if (max_length == 0) { + return reservation; } + + // Remove any empty slices at the end. + while (!slices_.empty() && slices_.back().dataSize() == 0) { + slices_.pop_back(); + } + + uint64_t bytes_remaining = max_length; + uint64_t reserved = 0; + auto& reservation_slices = reservation.bufferImplUseOnlySlices(); + auto slices_owner = std::make_unique(); + // Check whether there are any empty slices with reservable space at the end of the buffer. - size_t first_reservable_slice = slices_.size(); - while (first_reservable_slice > 0) { - if (slices_[first_reservable_slice - 1].reservableSize() == 0) { - break; - } - first_reservable_slice--; - if (slices_[first_reservable_slice].dataSize() != 0) { - // There is some content in this slice, so anything in front of it is non-reservable. + uint64_t reservable_size = slices_.empty() ? 0 : slices_.back().reservableSize(); + if (reservable_size >= max_length || reservable_size >= (Slice::default_slice_size_ / 8)) { + auto& last_slice = slices_.back(); + const uint64_t reservation_size = std::min(last_slice.reservableSize(), bytes_remaining); + auto slice = last_slice.reserve(reservation_size); + reservation_slices.push_back(slice); + slices_owner->owned_slices_.emplace_back(Slice()); + bytes_remaining -= slice.len_; + reserved += slice.len_; + } + + while (bytes_remaining != 0 && reservation_slices.size() < reservation.MAX_SLICES_) { + const uint64_t size = Slice::default_slice_size_; + + // If the next slice would go over the desired size, and the amount already reserved is already + // at least one full slice in size, stop allocating slices. This prevents returning a + // reservation larger than requested, which could go above the watermark limits for a watermark + // buffer, unless the size would be very small (less than 1 full slice). + if (size > bytes_remaining && reserved >= size) { break; } + + Slice slice(size, slices_owner->free_list_); + const auto raw_slice = slice.reserve(size); + reservation_slices.push_back(raw_slice); + slices_owner->owned_slices_.emplace_back(std::move(slice)); + bytes_remaining -= std::min(raw_slice.len_, bytes_remaining); + reserved += raw_slice.len_; } - // Having found the sequence of reservable slices at the back of the buffer, reserve - // as much space as possible from each one. - uint64_t num_slices_used = 0; - uint64_t bytes_remaining = length; - size_t slice_index = first_reservable_slice; - while (slice_index < slices_.size() && bytes_remaining != 0 && num_slices_used < num_iovecs) { - auto& slice = slices_[slice_index]; - const uint64_t reservation_size = std::min(slice.reservableSize(), bytes_remaining); - if (num_slices_used + 1 == num_iovecs && reservation_size < bytes_remaining) { - // There is only one iovec left, and this next slice does not have enough space to - // complete the reservation. Stop iterating, with last one iovec still unpopulated, - // so the code following this loop can allocate a new slice to hold the rest of the - // reservation. - break; - } - iovecs[num_slices_used] = slice.reserve(reservation_size); - bytes_remaining -= iovecs[num_slices_used].len_; - num_slices_used++; - slice_index++; + ASSERT(reservation_slices.size() == slices_owner->owned_slices_.size()); + reservation.bufferImplUseOnlySlicesOwner() = std::move(slices_owner); + reservation.bufferImplUseOnlySetLength(reserved); + + return reservation; +} + +ReservationSingleSlice OwnedImpl::reserveSingleSlice(uint64_t length, bool separate_slice) { + ReservationSingleSlice reservation = ReservationSingleSlice::bufferImplUseOnlyConstruct(*this); + if (length == 0) { + return reservation; + } + + // Remove any empty slices at the end. + while (!slices_.empty() && slices_.back().dataSize() == 0) { + slices_.pop_back(); + } + + auto& reservation_slice = reservation.bufferImplUseOnlySlice(); + auto slice_owner = std::make_unique(); + + // Check whether there are any empty slices with reservable space at the end of the buffer. + uint64_t reservable_size = + (separate_slice || slices_.empty()) ? 0 : slices_.back().reservableSize(); + if (reservable_size >= length) { + reservation_slice = slices_.back().reserve(length); + } else { + Slice slice(length); + reservation_slice = slice.reserve(length); + slice_owner->owned_slice_ = std::move(slice); } - // If needed, allocate one more slice at the end to provide the remainder of the reservation. - if (bytes_remaining != 0) { - slices_.emplace_back(bytes_remaining); - iovecs[num_slices_used] = slices_.back().reserve(bytes_remaining); - bytes_remaining -= iovecs[num_slices_used].len_; - num_slices_used++; + reservation.bufferImplUseOnlySliceOwner() = std::move(slice_owner); + + return reservation; +} + +void OwnedImpl::commit(uint64_t length, absl::Span slices, + ReservationSlicesOwnerPtr slices_owner_base) { + if (length == 0) { + return; } - ASSERT(num_slices_used <= num_iovecs); - ASSERT(bytes_remaining == 0); - return num_slices_used; + ASSERT(dynamic_cast(slices_owner_base.get()) != nullptr); + std::unique_ptr slices_owner( + static_cast(slices_owner_base.release())); + + absl::Span owned_slices = slices_owner->ownedSlices(); + ASSERT(slices.size() == owned_slices.size()); + + uint64_t bytes_remaining = length; + for (uint32_t i = 0; i < slices.size() && bytes_remaining > 0; i++) { + Slice& owned_slice = owned_slices[i]; + if (owned_slice.data() != nullptr) { + slices_.emplace_back(std::move(owned_slice)); + } + slices[i].len_ = std::min(slices[i].len_, bytes_remaining); + bool success = slices_.back().commit(slices[i]); + ASSERT(success); + length_ += slices[i].len_; + bytes_remaining -= slices[i].len_; + } } ssize_t OwnedImpl::search(const void* data, uint64_t size, size_t start, size_t length) const { diff --git a/source/common/buffer/buffer_impl.h b/source/common/buffer/buffer_impl.h index c557107df050..b217d82195ed 100644 --- a/source/common/buffer/buffer_impl.h +++ b/source/common/buffer/buffer_impl.h @@ -33,6 +33,16 @@ namespace Buffer { class Slice { public: using Reservation = RawSlice; + using StoragePtr = std::unique_ptr; + + static constexpr uint32_t free_list_max_ = Buffer::Reservation::MAX_SLICES_; + using FreeListType = absl::InlinedVector; + class FreeListReference { + private: + FreeListReference(FreeListType& free_list) : free_list_(free_list) {} + FreeListType& free_list_; + friend class Slice; + }; /** * Create an empty Slice with 0 capacity. @@ -44,9 +54,9 @@ class Slice { * @param min_capacity number of bytes of space the slice should have. Actual capacity is rounded * up to the next multiple of 4kb. */ - Slice(uint64_t min_capacity) - : capacity_(sliceSize(min_capacity)), storage_(new uint8_t[capacity_]), base_(storage_.get()), - data_(0), reservable_(0) {} + Slice(uint64_t min_capacity, absl::optional free_list = absl::nullopt) + : capacity_(sliceSize(min_capacity)), storage_(newStorage(capacity_, free_list)), + base_(storage_.get()), data_(0), reservable_(0) {} /** * Create an immutable Slice that refers to an external buffer fragment. @@ -77,6 +87,7 @@ class Slice { if (this != &rhs) { callAndClearDrainTrackers(); + freeStorage(std::move(storage_), capacity_); storage_ = std::move(rhs.storage_); drain_trackers_ = std::move(rhs.drain_trackers_); base_ = rhs.base_; @@ -93,7 +104,15 @@ class Slice { return *this; } - ~Slice() { callAndClearDrainTrackers(); } + ~Slice() { + callAndClearDrainTrackers(); + freeStorage(std::move(storage_), capacity_); + } + + void freeStorage(FreeListReference free_list) { + callAndClearDrainTrackers(); + freeStorage(std::move(storage_), capacity_, free_list); + } /** * @return true if the data in the slice is mutable @@ -285,6 +304,10 @@ class Slice { drain_trackers_.clear(); } + static constexpr uint32_t default_slice_size_ = 16384; + + static FreeListReference freeList() { return FreeListReference(free_list_); } + protected: /** * Compute a slice size big enough to hold a specified amount of data. @@ -297,13 +320,56 @@ class Slice { return num_pages * PageSize; } + static StoragePtr newStorage(uint64_t capacity, absl::optional free_list_opt) { + ASSERT(sliceSize(default_slice_size_) == default_slice_size_, + "default_slice_size_ incompatible with sliceSize()"); + ASSERT(sliceSize(capacity) == capacity, + "newStorage should only be called on values returned from sliceSize()"); + ASSERT(!free_list_opt.has_value() || &free_list_opt->free_list_ == &free_list_); + + StoragePtr storage; + if (capacity == default_slice_size_ && free_list_opt.has_value()) { + FreeListType& free_list = free_list_opt->free_list_; + if (!free_list.empty()) { + storage = std::move(free_list.back()); + ASSERT(storage != nullptr); + ASSERT(free_list.back() == nullptr); + free_list.pop_back(); + return storage; + } + } + + storage.reset(new uint8_t[capacity]); + return storage; + } + + static void freeStorage(StoragePtr storage, uint64_t capacity, + absl::optional free_list_opt = absl::nullopt) { + if (storage == nullptr) { + return; + } + + if (capacity == default_slice_size_ && free_list_opt.has_value()) { + FreeListType& free_list = free_list_opt->free_list_; + if (free_list.size() < free_list_max_) { + free_list.emplace_back(std::move(storage)); + ASSERT(storage == nullptr); + return; + } + } + + storage.reset(); + } + + static thread_local FreeListType free_list_; + /** Length of the byte array that base_ points to. This is also the offset in bytes from the start * of the slice to the end of the Reservable section. */ uint64_t capacity_; /** Backing storage for mutable slices which own their own storage. This storage should never be * accessed directly; access base_ instead. */ - std::unique_ptr storage_; + StoragePtr storage_; /** Start of the slice. Points to storage_ iff the slice owns its own storage. */ uint8_t* base_{nullptr}; @@ -319,6 +385,8 @@ class Slice { std::list> drain_trackers_; }; +class OwnedImpl; + class SliceDataImpl : public SliceData { public: explicit SliceDataImpl(Slice&& slice) : slice_(std::move(slice)) {} @@ -330,6 +398,7 @@ class SliceDataImpl : public SliceData { } private: + friend OwnedImpl; Slice slice_; }; @@ -573,7 +642,6 @@ class OwnedImpl : public LibEventInstance { void add(const Instance& data) override; void prepend(absl::string_view data) override; void prepend(Instance& data) override; - void commit(RawSlice* iovecs, uint64_t num_iovecs) override; void copyOut(size_t start, uint64_t size, void* data) const override; void drain(uint64_t size) override; RawSliceVector getRawSlices(absl::optional max_slices = absl::nullopt) const override; @@ -583,7 +651,8 @@ class OwnedImpl : public LibEventInstance { void* linearize(uint32_t size) override; void move(Instance& rhs) override; void move(Instance& rhs, uint64_t length) override; - uint64_t reserve(uint64_t length, RawSlice* iovecs, uint64_t num_iovecs) override; + Reservation reserveForRead() override; + ReservationSingleSlice reserveSingleSlice(uint64_t length, bool separate_slice = false) override; ssize_t search(const void* data, uint64_t size, size_t start, size_t length) const override; bool startsWith(absl::string_view data) const override; std::string toString() const override; @@ -618,6 +687,25 @@ class OwnedImpl : public LibEventInstance { */ std::vector describeSlicesForTest() const; + /** + * Create a reservation for reading with a non-default length. Used in benchmark tests. + */ + Reservation reserveForReadWithLengthForTest(uint64_t length) { + return reserveWithMaxLength(length); + } + +protected: + static constexpr uint64_t default_read_reservation_size_ = + Reservation::MAX_SLICES_ * Slice::default_slice_size_; + + /** + * Create a reservation with a maximum length. + */ + Reservation reserveWithMaxLength(uint64_t max_length); + + void commit(uint64_t length, absl::Span slices, + ReservationSlicesOwnerPtr slices_owner) override; + private: /** * @param rhs another buffer @@ -641,6 +729,32 @@ class OwnedImpl : public LibEventInstance { /** Sum of the dataSize of all slices. */ OverflowDetectingUInt64 length_; + + struct OwnedImplReservationSlicesOwner : public ReservationSlicesOwner { + virtual absl::Span ownedSlices() PURE; + }; + + struct OwnedImplReservationSlicesOwnerMultiple : public OwnedImplReservationSlicesOwner { + // Optimization: get the thread_local freeList() once per Reservation, outside the loop. + OwnedImplReservationSlicesOwnerMultiple() : free_list_(Slice::freeList()) {} + + ~OwnedImplReservationSlicesOwnerMultiple() override { + while (!owned_slices_.empty()) { + owned_slices_.back().freeStorage(free_list_); + owned_slices_.pop_back(); + } + } + absl::Span ownedSlices() override { return absl::MakeSpan(owned_slices_); } + + Slice::FreeListReference free_list_; + absl::InlinedVector owned_slices_; + }; + + struct OwnedImplReservationSlicesOwnerSingle : public OwnedImplReservationSlicesOwner { + absl::Span ownedSlices() override { return absl::MakeSpan(&owned_slice_, 1); } + + Slice owned_slice_; + }; }; using BufferFragmentPtr = std::unique_ptr; diff --git a/source/common/buffer/watermark_buffer.cc b/source/common/buffer/watermark_buffer.cc index 0503266085b7..f7d95e5183f4 100644 --- a/source/common/buffer/watermark_buffer.cc +++ b/source/common/buffer/watermark_buffer.cc @@ -31,8 +31,9 @@ void WatermarkBuffer::prepend(Instance& data) { checkHighAndOverflowWatermarks(); } -void WatermarkBuffer::commit(RawSlice* iovecs, uint64_t num_iovecs) { - OwnedImpl::commit(iovecs, num_iovecs); +void WatermarkBuffer::commit(uint64_t length, absl::Span slices, + ReservationSlicesOwnerPtr slices_owner) { + OwnedImpl::commit(length, slices, std::move(slices_owner)); checkHighAndOverflowWatermarks(); } @@ -57,10 +58,27 @@ SliceDataPtr WatermarkBuffer::extractMutableFrontSlice() { return result; } -uint64_t WatermarkBuffer::reserve(uint64_t length, RawSlice* iovecs, uint64_t num_iovecs) { - uint64_t bytes_reserved = OwnedImpl::reserve(length, iovecs, num_iovecs); - checkHighAndOverflowWatermarks(); - return bytes_reserved; +// Adjust the reservation size based on space available before hitting +// the high watermark to avoid overshooting by a lot and thus violating the limits +// the watermark is imposing. +Reservation WatermarkBuffer::reserveForRead() { + constexpr auto preferred_length = default_read_reservation_size_; + uint64_t adjusted_length = preferred_length; + + if (high_watermark_ > 0 && preferred_length > 0) { + const uint64_t current_length = OwnedImpl::length(); + if (current_length >= high_watermark_) { + // Always allow a read of at least some data. The API doesn't allow returning + // a zero-length reservation. + adjusted_length = Slice::default_slice_size_; + } else { + const uint64_t available_length = high_watermark_ - current_length; + adjusted_length = IntUtil::roundUpToMultiple(available_length, Slice::default_slice_size_); + adjusted_length = std::min(adjusted_length, preferred_length); + } + } + + return OwnedImpl::reserveWithMaxLength(adjusted_length); } void WatermarkBuffer::appendSliceForTest(const void* data, uint64_t size) { diff --git a/source/common/buffer/watermark_buffer.h b/source/common/buffer/watermark_buffer.h index a84f20c21d5d..9150cdaf54f9 100644 --- a/source/common/buffer/watermark_buffer.h +++ b/source/common/buffer/watermark_buffer.h @@ -30,12 +30,11 @@ class WatermarkBuffer : public OwnedImpl { void add(const Instance& data) override; void prepend(absl::string_view data) override; void prepend(Instance& data) override; - void commit(RawSlice* iovecs, uint64_t num_iovecs) override; void drain(uint64_t size) override; void move(Instance& rhs) override; void move(Instance& rhs, uint64_t length) override; SliceDataPtr extractMutableFrontSlice() override; - uint64_t reserve(uint64_t length, RawSlice* iovecs, uint64_t num_iovecs) override; + Reservation reserveForRead() override; void postProcess() override { checkLowWatermark(); } void appendSliceForTest(const void* data, uint64_t size) override; void appendSliceForTest(absl::string_view data) override; @@ -52,6 +51,9 @@ class WatermarkBuffer : public OwnedImpl { void checkLowWatermark(); private: + void commit(uint64_t length, absl::Span slices, + ReservationSlicesOwnerPtr slices_owner) override; + std::function below_low_watermark_; std::function above_high_watermark_; std::function above_overflow_watermark_; diff --git a/source/common/common/utility.h b/source/common/common/utility.h index c995fed2db38..84acc287b0f3 100644 --- a/source/common/common/utility.h +++ b/source/common/common/utility.h @@ -179,6 +179,24 @@ class DateUtil { static uint64_t nowToMilliseconds(TimeSource& time_source); }; +/** + * Utility routines for working with integers. + */ +class IntUtil { +public: + /** + * Round `val` up to the next multiple. Examples: + * roundUpToMultiple(3, 8) -> 8 + * roundUpToMultiple(9, 8) -> 16 + * roundUpToMultiple(8, 8) -> 8 + */ + static uint64_t roundUpToMultiple(uint64_t val, uint32_t multiple) { + ASSERT(multiple > 0); + ASSERT((val + multiple) >= val, "Unsigned overflow"); + return ((val + multiple - 1) / multiple) * multiple; + } +}; + /** * Utility routines for working with strings. */ @@ -619,8 +637,8 @@ template struct TrieLookupTable { } /** - * Finds the entry associated with the longest prefix. Complexity is O(min(longest key prefix, key - * length)) + * Finds the entry associated with the longest prefix. Complexity is O(min(longest key prefix, + * key length)). * @param key the key used to find. * @return the value matching the longest prefix based on the key. */ diff --git a/source/common/grpc/common.cc b/source/common/grpc/common.cc index 8b7551d8bea2..549be3dbbc72 100644 --- a/source/common/grpc/common.cc +++ b/source/common/grpc/common.cc @@ -131,11 +131,9 @@ Buffer::InstancePtr Common::serializeToGrpcFrame(const Protobuf::Message& messag Buffer::InstancePtr body(new Buffer::OwnedImpl()); const uint32_t size = message.ByteSize(); const uint32_t alloc_size = size + 5; - Buffer::RawSlice iovec; - body->reserve(alloc_size, &iovec, 1); - ASSERT(iovec.len_ >= alloc_size); - iovec.len_ = alloc_size; - uint8_t* current = reinterpret_cast(iovec.mem_); + auto reservation = body->reserveSingleSlice(alloc_size); + ASSERT(reservation.slice().len_ >= alloc_size); + uint8_t* current = reinterpret_cast(reservation.slice().mem_); *current++ = 0; // flags const uint32_t nsize = htonl(size); std::memcpy(current, reinterpret_cast(&nsize), sizeof(uint32_t)); @@ -143,22 +141,20 @@ Buffer::InstancePtr Common::serializeToGrpcFrame(const Protobuf::Message& messag Protobuf::io::ArrayOutputStream stream(current, size, -1); Protobuf::io::CodedOutputStream codec_stream(&stream); message.SerializeWithCachedSizes(&codec_stream); - body->commit(&iovec, 1); + reservation.commit(alloc_size); return body; } Buffer::InstancePtr Common::serializeMessage(const Protobuf::Message& message) { auto body = std::make_unique(); const uint32_t size = message.ByteSize(); - Buffer::RawSlice iovec; - body->reserve(size, &iovec, 1); - ASSERT(iovec.len_ >= size); - iovec.len_ = size; - uint8_t* current = reinterpret_cast(iovec.mem_); + auto reservation = body->reserveSingleSlice(size); + ASSERT(reservation.slice().len_ >= size); + uint8_t* current = reinterpret_cast(reservation.slice().mem_); Protobuf::io::ArrayOutputStream stream(current, size, -1); Protobuf::io::CodedOutputStream codec_stream(&stream); message.SerializeWithCachedSizes(&codec_stream); - body->commit(&iovec, 1); + reservation.commit(size); return body; } diff --git a/source/common/http/http2/metadata_encoder.cc b/source/common/http/http2/metadata_encoder.cc index 5e5a6970a872..ac1c06af4f36 100644 --- a/source/common/http/http2/metadata_encoder.cc +++ b/source/common/http/http2/metadata_encoder.cc @@ -62,18 +62,16 @@ bool MetadataEncoder::createHeaderBlockUsingNghttp2(const MetadataMap& metadata_ ENVOY_LOG(error, "Payload size {} exceeds the max bound.", buflen); return false; } - Buffer::RawSlice iovec; - payload_.reserve(buflen, &iovec, 1); - ASSERT(iovec.len_ >= buflen); + auto reservation = payload_.reserveSingleSlice(buflen); + ASSERT(reservation.slice().len_ >= buflen); // Creates payload using nghttp2. - uint8_t* buf = reinterpret_cast(iovec.mem_); + uint8_t* buf = reinterpret_cast(reservation.slice().mem_); const ssize_t result = nghttp2_hd_deflate_hd(deflater_.get(), buf, buflen, nva.begin(), nvlen); RELEASE_ASSERT(result > 0, fmt::format("Failed to deflate metadata payload, with result {}.", result)); - iovec.len_ = result; - payload_.commit(&iovec, 1); + reservation.commit(result); return true; } diff --git a/source/common/network/io_socket_handle_impl.cc b/source/common/network/io_socket_handle_impl.cc index 508ce6400aac..b71a2273c96f 100644 --- a/source/common/network/io_socket_handle_impl.cc +++ b/source/common/network/io_socket_handle_impl.cc @@ -108,21 +108,18 @@ Api::IoCallUint64Result IoSocketHandleImpl::readv(uint64_t max_length, Buffer::R return result; } -Api::IoCallUint64Result IoSocketHandleImpl::read(Buffer::Instance& buffer, uint64_t max_length) { +Api::IoCallUint64Result IoSocketHandleImpl::read(Buffer::Instance& buffer, + absl::optional max_length_opt) { + const uint64_t max_length = max_length_opt.value_or(UINT64_MAX); if (max_length == 0) { return Api::ioCallUint64ResultNoError(); } - constexpr uint64_t MaxSlices = 2; - Buffer::RawSlice slices[MaxSlices]; - const uint64_t num_slices = buffer.reserve(max_length, slices, MaxSlices); - Api::IoCallUint64Result result = readv(max_length, slices, num_slices); + Buffer::Reservation reservation = buffer.reserveForRead(); + Api::IoCallUint64Result result = readv(std::min(reservation.length(), max_length), + reservation.slices(), reservation.numSlices()); uint64_t bytes_to_commit = result.ok() ? result.rc_ : 0; ASSERT(bytes_to_commit <= max_length); - for (uint64_t i = 0; i < num_slices; i++) { - slices[i].len_ = std::min(slices[i].len_, static_cast(bytes_to_commit)); - bytes_to_commit -= slices[i].len_; - } - buffer.commit(slices, num_slices); + reservation.commit(bytes_to_commit); // Emulated edge events need to registered if the socket operation did not complete // because the socket would block. diff --git a/source/common/network/io_socket_handle_impl.h b/source/common/network/io_socket_handle_impl.h index 41603976a736..7fbb2c39d274 100644 --- a/source/common/network/io_socket_handle_impl.h +++ b/source/common/network/io_socket_handle_impl.h @@ -33,7 +33,8 @@ class IoSocketHandleImpl : public IoHandle, protected Logger::Loggable max_length) override; Api::IoCallUint64Result writev(const Buffer::RawSlice* slices, uint64_t num_slice) override; diff --git a/source/common/network/raw_buffer_socket.cc b/source/common/network/raw_buffer_socket.cc index 65a79a1cb770..96f75a1c419b 100644 --- a/source/common/network/raw_buffer_socket.cc +++ b/source/common/network/raw_buffer_socket.cc @@ -18,8 +18,7 @@ IoResult RawBufferSocket::doRead(Buffer::Instance& buffer) { uint64_t bytes_read = 0; bool end_stream = false; do { - // 16K read is arbitrary. TODO(mattklein123) PERF: Tune the read size. - Api::IoCallUint64Result result = callbacks_->ioHandle().read(buffer, 16384); + Api::IoCallUint64Result result = callbacks_->ioHandle().read(buffer, absl::nullopt); if (result.ok()) { ENVOY_CONN_LOG(trace, "read returns: {}", callbacks_->connection(), result.rc_); diff --git a/source/common/network/utility.cc b/source/common/network/utility.cc index 61685068e650..14de3cdc92be 100644 --- a/source/common/network/utility.cc +++ b/source/common/network/utility.cc @@ -101,21 +101,14 @@ Api::IoCallUint64Result receiveMessage(uint64_t max_packet_size, Buffer::Instanc IoHandle::RecvMsgOutput& output, IoHandle& handle, const Address::Instance& local_address) { - Buffer::RawSlice slice; - const uint64_t num_slices = buffer->reserve(max_packet_size, &slice, 1); - ASSERT(num_slices == 1u); + auto reservation = buffer->reserveSingleSlice(max_packet_size); + Buffer::RawSlice slice = reservation.slice(); + Api::IoCallUint64Result result = handle.recvmsg(&slice, 1, local_address.ip()->port(), output); - Api::IoCallUint64Result result = - handle.recvmsg(&slice, num_slices, local_address.ip()->port(), output); - - if (!result.ok()) { - return result; + if (result.ok()) { + reservation.commit(std::min(max_packet_size, result.rc_)); } - // Adjust memory length and commit slice to buffer - slice.len_ = std::min(slice.len_, static_cast(result.rc_)); - buffer->commit(&slice, 1); - return result; } @@ -619,16 +612,27 @@ Api::IoCallUint64Result Utility::readFromSocket(IoHandle& handle, } if (handle.supportsMmsg()) { - const uint32_t num_packets_per_mmsg_call = 16u; - const uint32_t num_slices_per_packet = 1u; - absl::FixedArray buffers(num_packets_per_mmsg_call); + const auto max_packet_size = udp_packet_processor.maxPacketSize(); + + // Buffer::ReservationSingleSlice is always passed by value, and can only be constructed + // by Buffer::Instance::reserve(), so this is needed to keep a fixed array + // in which all elements are legally constructed. + struct BufferAndReservation { + BufferAndReservation(uint64_t max_packet_size) + : buffer_(std::make_unique()), + reservation_(buffer_->reserveSingleSlice(max_packet_size, true)) {} + + Buffer::InstancePtr buffer_; + Buffer::ReservationSingleSlice reservation_; + }; + constexpr uint32_t num_packets_per_mmsg_call = 16u; + constexpr uint32_t num_slices_per_packet = 1u; + absl::InlinedVector buffers; RawSliceArrays slices(num_packets_per_mmsg_call, absl::FixedArray(num_slices_per_packet)); - for (uint32_t i = 0; i < num_packets_per_mmsg_call; ++i) { - buffers[i] = std::make_unique(); - const uint64_t num_slices = buffers[i]->reserve(udp_packet_processor.maxPacketSize(), - slices[i].data(), num_slices_per_packet); - ASSERT(num_slices == num_slices_per_packet); + for (uint32_t i = 0; i < num_packets_per_mmsg_call; i++) { + buffers.push_back(max_packet_size); + slices[i][0] = buffers[i].reservation_.slice(); } IoHandle::RecvMsgOutput output(num_packets_per_mmsg_call, packets_dropped); @@ -650,11 +654,9 @@ Api::IoCallUint64Result Utility::readFromSocket(IoHandle& handle, ENVOY_LOG_MISC(debug, "Receive a packet with {} bytes from {}", msg_len, output.msg_[i].peer_address_->asString()); - // Adjust used memory length and commit slice to buffer - slice->len_ = std::min(slice->len_, static_cast(msg_len)); - buffers[i]->commit(slice, 1); + buffers[i].reservation_.commit(std::min(max_packet_size, msg_len)); - passPayloadToProcessor(msg_len, std::move(buffers[i]), output.msg_[i].peer_address_, + passPayloadToProcessor(msg_len, std::move(buffers[i].buffer_), output.msg_[i].peer_address_, output.msg_[i].local_address_, udp_packet_processor, receive_time); } return result; diff --git a/source/extensions/quic_listeners/quiche/envoy_quic_client_stream.cc b/source/extensions/quic_listeners/quiche/envoy_quic_client_stream.cc index d296af0bd503..d57840cda4e4 100644 --- a/source/extensions/quic_listeners/quiche/envoy_quic_client_stream.cc +++ b/source/extensions/quic_listeners/quiche/envoy_quic_client_stream.cc @@ -188,12 +188,7 @@ void EnvoyQuicClientStream::OnBodyAvailable() { int num_regions = GetReadableRegions(&iov, 1); ASSERT(num_regions > 0); size_t bytes_read = iov.iov_len; - Buffer::RawSlice slice; - buffer->reserve(bytes_read, &slice, 1); - ASSERT(slice.len_ >= bytes_read); - slice.len_ = bytes_read; - memcpy(slice.mem_, iov.iov_base, iov.iov_len); - buffer->commit(&slice, 1); + buffer->add(iov.iov_base, bytes_read); MarkConsumed(bytes_read); } ASSERT(buffer->length() == 0 || !end_stream_decoded_); diff --git a/source/extensions/quic_listeners/quiche/envoy_quic_server_stream.cc b/source/extensions/quic_listeners/quiche/envoy_quic_server_stream.cc index 311966d60ce6..42dde7450303 100644 --- a/source/extensions/quic_listeners/quiche/envoy_quic_server_stream.cc +++ b/source/extensions/quic_listeners/quiche/envoy_quic_server_stream.cc @@ -182,12 +182,7 @@ void EnvoyQuicServerStream::OnBodyAvailable() { int num_regions = GetReadableRegions(&iov, 1); ASSERT(num_regions > 0); size_t bytes_read = iov.iov_len; - Buffer::RawSlice slice; - buffer->reserve(bytes_read, &slice, 1); - ASSERT(slice.len_ >= bytes_read); - slice.len_ = bytes_read; - memcpy(slice.mem_, iov.iov_base, iov.iov_len); - buffer->commit(&slice, 1); + buffer->add(iov.iov_base, bytes_read); MarkConsumed(bytes_read); } diff --git a/source/extensions/quic_listeners/quiche/platform/quic_mem_slice_storage_impl.cc b/source/extensions/quic_listeners/quiche/platform/quic_mem_slice_storage_impl.cc index 1c2c63b39e56..5105ec6815ab 100644 --- a/source/extensions/quic_listeners/quiche/platform/quic_mem_slice_storage_impl.cc +++ b/source/extensions/quic_listeners/quiche/platform/quic_mem_slice_storage_impl.cc @@ -30,19 +30,14 @@ QuicMemSliceStorageImpl::QuicMemSliceStorageImpl(const iovec* iov, int iov_count size_t io_offset = 0; while (io_offset < write_len) { size_t slice_len = std::min(write_len - io_offset, max_slice_len); - Envoy::Buffer::RawSlice slice; - // Populate a temporary buffer instance and then move it to |buffer_|. This is necessary because - // consecutive reserve/commit can return addresses in same slice which violates the restriction - // of |max_slice_len| when ToSpan() is called. - Envoy::Buffer::OwnedImpl buffer; - uint16_t num_slice = buffer.reserve(slice_len, &slice, 1); - ASSERT(num_slice == 1); - QuicUtils::CopyToBuffer(iov, iov_count, io_offset, slice_len, static_cast(slice.mem_)); + + // Use a separate slice so that we do not violate the restriction of |max_slice_len| when + // ToSpan() is called. + auto reservation = buffer_.reserveSingleSlice(slice_len, true); + QuicUtils::CopyToBuffer(iov, iov_count, io_offset, slice_len, + static_cast(reservation.slice().mem_)); io_offset += slice_len; - // OwnedImpl may return a slice longer than needed, trim it to requested length. - slice.len_ = slice_len; - buffer.commit(&slice, num_slice); - buffer_.move(buffer); + reservation.commit(slice_len); } } diff --git a/source/extensions/quic_listeners/quiche/quic_io_handle_wrapper.h b/source/extensions/quic_listeners/quiche/quic_io_handle_wrapper.h index fc273df6038a..a2f66d52e7a4 100644 --- a/source/extensions/quic_listeners/quiche/quic_io_handle_wrapper.h +++ b/source/extensions/quic_listeners/quiche/quic_io_handle_wrapper.h @@ -29,7 +29,8 @@ class QuicIoHandleWrapper : public Network::IoHandle { } return io_handle_.readv(max_length, slices, num_slice); } - Api::IoCallUint64Result read(Buffer::Instance& buffer, uint64_t max_length) override { + Api::IoCallUint64Result read(Buffer::Instance& buffer, + absl::optional max_length) override { if (closed_) { return Api::IoCallUint64Result(0, Api::IoErrorPtr(new Network::IoSocketError(EBADF), Network::IoSocketError::deleteIoError)); diff --git a/source/extensions/stat_sinks/common/statsd/statsd.cc b/source/extensions/stat_sinks/common/statsd/statsd.cc index e1be68c6fd33..a1ad60cf1607 100644 --- a/source/extensions/stat_sinks/common/statsd/statsd.cc +++ b/source/extensions/stat_sinks/common/statsd/statsd.cc @@ -188,12 +188,12 @@ TcpStatsdSink::TlsSink::~TlsSink() { void TcpStatsdSink::TlsSink::beginFlush(bool expect_empty_buffer) { ASSERT(!expect_empty_buffer || buffer_.length() == 0); ASSERT(current_slice_mem_ == nullptr); + ASSERT(!current_buffer_reservation_.has_value()); - uint64_t num_iovecs = buffer_.reserve(FLUSH_SLICE_SIZE_BYTES, ¤t_buffer_slice_, 1); - ASSERT(num_iovecs == 1); + current_buffer_reservation_.emplace(buffer_.reserveSingleSlice(FLUSH_SLICE_SIZE_BYTES)); - ASSERT(current_buffer_slice_.len_ >= FLUSH_SLICE_SIZE_BYTES); - current_slice_mem_ = reinterpret_cast(current_buffer_slice_.mem_); + ASSERT(current_buffer_reservation_->slice().len_ >= FLUSH_SLICE_SIZE_BYTES); + current_slice_mem_ = reinterpret_cast(current_buffer_reservation_->slice().mem_); } void TcpStatsdSink::TlsSink::commonFlush(const std::string& name, uint64_t value, char stat_type) { @@ -201,7 +201,7 @@ void TcpStatsdSink::TlsSink::commonFlush(const std::string& name, uint64_t value // 36 > 1 ("." after prefix) + 1 (":" after name) + 4 (postfix chars, e.g., "|ms\n") + 30 for // number (bigger than it will ever be) const uint32_t max_size = name.size() + parent_.getPrefix().size() + 36; - if (current_buffer_slice_.len_ - usedBuffer() < max_size) { + if (current_buffer_reservation_->slice().len_ - usedBuffer() < max_size) { endFlush(false); beginFlush(false); } @@ -234,8 +234,9 @@ void TcpStatsdSink::TlsSink::flushGauge(const std::string& name, uint64_t value) void TcpStatsdSink::TlsSink::endFlush(bool do_write) { ASSERT(current_slice_mem_ != nullptr); - current_buffer_slice_.len_ = usedBuffer(); - buffer_.commit(¤t_buffer_slice_, 1); + ASSERT(current_buffer_reservation_.has_value()); + current_buffer_reservation_->commit(usedBuffer()); + current_buffer_reservation_.reset(); current_slice_mem_ = nullptr; if (do_write) { write(buffer_); @@ -309,7 +310,8 @@ void TcpStatsdSink::TlsSink::write(Buffer::Instance& buffer) { uint64_t TcpStatsdSink::TlsSink::usedBuffer() const { ASSERT(current_slice_mem_ != nullptr); - return current_slice_mem_ - reinterpret_cast(current_buffer_slice_.mem_); + ASSERT(current_buffer_reservation_.has_value()); + return current_slice_mem_ - reinterpret_cast(current_buffer_reservation_->slice().mem_); } } // namespace Statsd diff --git a/source/extensions/stat_sinks/common/statsd/statsd.h b/source/extensions/stat_sinks/common/statsd/statsd.h index b7eb8bfac627..6abc53bc0661 100644 --- a/source/extensions/stat_sinks/common/statsd/statsd.h +++ b/source/extensions/stat_sinks/common/statsd/statsd.h @@ -135,7 +135,7 @@ class TcpStatsdSink : public Stats::Sink { Event::Dispatcher& dispatcher_; Network::ClientConnectionPtr connection_; Buffer::OwnedImpl buffer_; - Buffer::RawSlice current_buffer_slice_; + absl::optional current_buffer_reservation_; char* current_slice_mem_{}; }; diff --git a/source/extensions/tracers/lightstep/lightstep_tracer_impl.cc b/source/extensions/tracers/lightstep/lightstep_tracer_impl.cc index d82ae4c45453..952d24e4284f 100644 --- a/source/extensions/tracers/lightstep/lightstep_tracer_impl.cc +++ b/source/extensions/tracers/lightstep/lightstep_tracer_impl.cc @@ -24,12 +24,10 @@ namespace Lightstep { static void serializeGrpcMessage(const lightstep::BufferChain& buffer_chain, Buffer::Instance& body) { auto size = buffer_chain.num_bytes(); - Buffer::RawSlice iovec; - body.reserve(size, &iovec, 1); - ASSERT(iovec.len_ >= size); - iovec.len_ = size; - buffer_chain.CopyOut(static_cast(iovec.mem_), size); - body.commit(&iovec, 1); + auto reservation = body.reserveSingleSlice(size); + ASSERT(reservation.slice().len_ >= size); + buffer_chain.CopyOut(static_cast(reservation.slice().mem_), size); + reservation.commit(size); Grpc::Common::prependGrpcFrameHeader(body); } diff --git a/source/extensions/transport_sockets/tls/ssl_socket.cc b/source/extensions/transport_sockets/tls/ssl_socket.cc index 7c005f47c700..dbcb87543c7c 100644 --- a/source/extensions/transport_sockets/tls/ssl_socket.cc +++ b/source/extensions/transport_sockets/tls/ssl_socket.cc @@ -94,16 +94,13 @@ SslSocket::ReadResult SslSocket::sslReadIntoSlice(Buffer::RawSlice& slice) { ASSERT(static_cast(rc) <= remaining); mem += rc; remaining -= rc; - result.commit_slice_ = true; + result.bytes_read_ += rc; } else { result.error_ = absl::make_optional(rc); break; } } - if (result.commit_slice_) { - slice.len_ -= remaining; - } return result; } @@ -123,17 +120,11 @@ Network::IoResult SslSocket::doRead(Buffer::Instance& read_buffer) { PostIoAction action = PostIoAction::KeepOpen; uint64_t bytes_read = 0; while (keep_reading) { - // We use 2 slices here so that we can use the remainder of an existing buffer chain element - // if there is extra space. 16K read is arbitrary and can be tuned later. - Buffer::RawSlice slices[2]; - uint64_t slices_to_commit = 0; - uint64_t num_slices = read_buffer.reserve(16384, slices, 2); - for (uint64_t i = 0; i < num_slices; i++) { - auto result = sslReadIntoSlice(slices[i]); - if (result.commit_slice_) { - slices_to_commit++; - bytes_read += slices[i].len_; - } + uint64_t bytes_read_this_iteration = 0; + Buffer::Reservation reservation = read_buffer.reserveForRead(); + for (uint64_t i = 0; i < reservation.numSlices(); i++) { + auto result = sslReadIntoSlice(reservation.slices()[i]); + bytes_read_this_iteration += result.bytes_read_; if (result.error_.has_value()) { keep_reading = false; int err = SSL_get_error(rawSsl(), result.error_.value()); @@ -165,13 +156,13 @@ Network::IoResult SslSocket::doRead(Buffer::Instance& read_buffer) { } } - if (slices_to_commit > 0) { - read_buffer.commit(slices, slices_to_commit); - if (callbacks_->shouldDrainReadBuffer()) { - callbacks_->setTransportSocketIsReadable(); - keep_reading = false; - } + reservation.commit(bytes_read_this_iteration); + if (bytes_read_this_iteration > 0 && callbacks_->shouldDrainReadBuffer()) { + callbacks_->setTransportSocketIsReadable(); + keep_reading = false; } + + bytes_read += bytes_read_this_iteration; } ENVOY_CONN_LOG(trace, "ssl read {} bytes", callbacks_->connection(), bytes_read); diff --git a/source/extensions/transport_sockets/tls/ssl_socket.h b/source/extensions/transport_sockets/tls/ssl_socket.h index 47b13970026f..cb15d82fa935 100644 --- a/source/extensions/transport_sockets/tls/ssl_socket.h +++ b/source/extensions/transport_sockets/tls/ssl_socket.h @@ -76,7 +76,7 @@ class SslSocket : public Network::TransportSocket, private: struct ReadResult { - bool commit_slice_{}; + uint64_t bytes_read_{0}; absl::optional error_; }; ReadResult sslReadIntoSlice(Buffer::RawSlice& slice); diff --git a/test/common/buffer/buffer_fuzz.cc b/test/common/buffer/buffer_fuzz.cc index 8fa5a2098dcc..f5db5b6e75ab 100644 --- a/test/common/buffer/buffer_fuzz.cc +++ b/test/common/buffer/buffer_fuzz.cc @@ -105,11 +105,6 @@ class StringBuffer : public Buffer::Instance { src.size_ = 0; } - void commit(Buffer::RawSlice* iovecs, uint64_t num_iovecs) override { - FUZZ_ASSERT(num_iovecs == 1); - size_ += iovecs[0].len_; - } - void copyOut(size_t start, uint64_t size, void* data) const override { ::memcpy(data, this->start() + start, size); } @@ -146,12 +141,34 @@ class StringBuffer : public Buffer::Instance { src.size_ -= length; } - uint64_t reserve(uint64_t length, Buffer::RawSlice* iovecs, uint64_t num_iovecs) override { - FUZZ_ASSERT(num_iovecs > 0); + Buffer::Reservation reserveForRead() override { + auto reservation = Buffer::Reservation::bufferImplUseOnlyConstruct(*this); + Buffer::RawSlice slice; + slice.mem_ = mutableEnd(); + slice.len_ = data_.size() - (start_ + size_); + reservation.bufferImplUseOnlySlices().push_back(slice); + reservation.bufferImplUseOnlySetLength(slice.len_); + + return reservation; + } + + Buffer::ReservationSingleSlice reserveSingleSlice(uint64_t length, bool separate_slice) override { + ASSERT(!separate_slice); + FUZZ_ASSERT(start_ + size_ + length <= data_.size()); + + auto reservation = Buffer::ReservationSingleSlice::bufferImplUseOnlyConstruct(*this); + Buffer::RawSlice slice; + slice.mem_ = mutableEnd(); + slice.len_ = length; + reservation.bufferImplUseOnlySlice() = slice; + + return reservation; + } + + void commit(uint64_t length, absl::Span, + Buffer::ReservationSlicesOwnerPtr) override { + size_ += length; FUZZ_ASSERT(start_ + size_ + length <= data_.size()); - iovecs[0].mem_ = mutableEnd(); - iovecs[0].len_ = length; - return 1; } ssize_t search(const void* data, uint64_t size, size_t start, size_t length) const override { @@ -257,31 +274,20 @@ uint32_t bufferAction(Context& ctxt, char insert_value, uint32_t max_alloc, Buff if (reserve_length == 0) { break; } - constexpr uint32_t reserve_slices = 16; - Buffer::RawSlice slices[reserve_slices]; - const uint32_t allocated_slices = target_buffer.reserve(reserve_length, slices, reserve_slices); - uint32_t allocated_length = 0; - for (uint32_t i = 0; i < allocated_slices; ++i) { - ::memset(slices[i].mem_, insert_value, slices[i].len_); - allocated_length += slices[i].len_; - } - FUZZ_ASSERT(reserve_length <= allocated_length); - const uint32_t target_length = - std::min(reserve_length, action.reserve_commit().commit_length()); - uint32_t shrink_length = allocated_length; - int32_t shrink_slice = allocated_slices - 1; - while (shrink_length > target_length) { - FUZZ_ASSERT(shrink_slice >= 0); - const uint32_t available = slices[shrink_slice].len_; - const uint32_t remainder = shrink_length - target_length; - if (available >= remainder) { - slices[shrink_slice].len_ -= remainder; - break; + if (reserve_length < 16384) { + auto reservation = target_buffer.reserveSingleSlice(reserve_length); + ::memset(reservation.slice().mem_, insert_value, reservation.slice().len_); + reservation.commit( + std::min(action.reserve_commit().commit_length(), reservation.length())); + } else { + Buffer::Reservation reservation = target_buffer.reserveForRead(); + for (uint32_t i = 0; i < reservation.numSlices(); ++i) { + ::memset(reservation.slices()[i].mem_, insert_value, reservation.slices()[i].len_); } - shrink_length -= available; - slices[shrink_slice--].len_ = 0; + const uint32_t target_length = + std::min(reservation.length(), action.reserve_commit().commit_length()); + reservation.commit(target_length); } - target_buffer.commit(slices, allocated_slices); break; } case test::common::buffer::Action::kCopyOut: { diff --git a/test/common/buffer/buffer_speed_test.cc b/test/common/buffer/buffer_speed_test.cc index 49240c69f356..653a5bea770a 100644 --- a/test/common/buffer/buffer_speed_test.cc +++ b/test/common/buffer/buffer_speed_test.cc @@ -16,6 +16,7 @@ void deleteFragment(const void*, size_t, const Buffer::BufferFragmentImpl* self) static void bufferCreateEmpty(benchmark::State& state) { uint64_t length = 0; for (auto _ : state) { + UNREFERENCED_PARAMETER(_); Buffer::OwnedImpl buffer; length += buffer.length(); } @@ -29,6 +30,7 @@ static void bufferCreate(benchmark::State& state) { const absl::string_view input(data); uint64_t length = 0; for (auto _ : state) { + UNREFERENCED_PARAMETER(_); Buffer::OwnedImpl buffer(input); length += buffer.length(); } @@ -42,6 +44,7 @@ static void bufferAddSmallIncrement(benchmark::State& state) { const absl::string_view input(data); Buffer::OwnedImpl buffer; for (auto _ : state) { + UNREFERENCED_PARAMETER(_); buffer.add(input); if (buffer.length() >= MaxBufferLength) { // Keep the test's memory usage from growing too large. @@ -62,6 +65,7 @@ static void bufferAddString(benchmark::State& state) { const absl::string_view input(data); Buffer::OwnedImpl buffer(input); for (auto _ : state) { + UNREFERENCED_PARAMETER(_); buffer.add(data); if (buffer.length() >= MaxBufferLength) { buffer.drain(buffer.length()); @@ -79,6 +83,7 @@ static void bufferAddBuffer(benchmark::State& state) { const Buffer::OwnedImpl to_add(data); Buffer::OwnedImpl buffer(input); for (auto _ : state) { + UNREFERENCED_PARAMETER(_); buffer.add(to_add); if (buffer.length() >= MaxBufferLength) { buffer.drain(buffer.length()); @@ -94,6 +99,7 @@ static void bufferPrependString(benchmark::State& state) { const absl::string_view input(data); Buffer::OwnedImpl buffer(input); for (auto _ : state) { + UNREFERENCED_PARAMETER(_); buffer.prepend(data); if (buffer.length() >= MaxBufferLength) { buffer.drain(buffer.length()); @@ -109,6 +115,7 @@ static void bufferPrependBuffer(benchmark::State& state) { const absl::string_view input(data); Buffer::OwnedImpl buffer(input); for (auto _ : state) { + UNREFERENCED_PARAMETER(_); // The prepend method removes the content from its source buffer. To populate a new source // buffer every time without the overhead of a copy, we use an BufferFragment that references // (and never deletes) an external string. @@ -144,6 +151,7 @@ static void bufferDrain(benchmark::State& state) { size_t drain_cycle = 0; for (auto _ : state) { + UNREFERENCED_PARAMETER(_); buffer.add(to_add); buffer.drain(drain_size[drain_cycle]); drain_cycle++; @@ -159,6 +167,7 @@ static void bufferDrainSmallIncrement(benchmark::State& state) { const absl::string_view input(data); Buffer::OwnedImpl buffer(input); for (auto _ : state) { + UNREFERENCED_PARAMETER(_); buffer.drain(state.range(0)); if (buffer.length() == 0) { buffer.add(input); @@ -175,6 +184,7 @@ static void bufferMove(benchmark::State& state) { Buffer::OwnedImpl buffer1(input); Buffer::OwnedImpl buffer2(input); for (auto _ : state) { + UNREFERENCED_PARAMETER(_); buffer1.move(buffer2); // now buffer1 has 2 copies of the input, and buffer2 is empty. buffer2.move(buffer1, input.size()); // now buffer1 and buffer2 are the same size. } @@ -192,6 +202,7 @@ static void bufferMovePartial(benchmark::State& state) { Buffer::OwnedImpl buffer1(input); Buffer::OwnedImpl buffer2(input); for (auto _ : state) { + UNREFERENCED_PARAMETER(_); while (buffer2.length() != 0) { buffer1.move(buffer2, 1); } @@ -207,42 +218,45 @@ BENCHMARK(bufferMovePartial)->Arg(1)->Arg(4096)->Arg(16384)->Arg(65536); static void bufferReserveCommit(benchmark::State& state) { Buffer::OwnedImpl buffer; for (auto _ : state) { - constexpr uint64_t NumSlices = 2; - Buffer::RawSlice slices[NumSlices]; - uint64_t slices_used = buffer.reserve(state.range(0), slices, NumSlices); - uint64_t bytes_to_commit = 0; - for (uint64_t i = 0; i < slices_used; i++) { - bytes_to_commit += static_cast(slices[i].len_); - } - buffer.commit(slices, slices_used); + UNREFERENCED_PARAMETER(_); + auto size = state.range(0); + Buffer::Reservation reservation = buffer.reserveForReadWithLengthForTest(size); + reservation.commit(reservation.length()); if (buffer.length() >= MaxBufferLength) { buffer.drain(buffer.length()); } } benchmark::DoNotOptimize(buffer.length()); } -BENCHMARK(bufferReserveCommit)->Arg(1)->Arg(4096)->Arg(16384)->Arg(65536); +BENCHMARK(bufferReserveCommit) + ->Arg(1) + ->Arg(4 * 1024) + ->Arg(16 * 1024) + ->Arg(64 * 1024) + ->Arg(128 * 1024); // Test the reserve+commit cycle, for the common case where the reserved space is // only partially used (and therefore the commit size is smaller than the reservation size). static void bufferReserveCommitPartial(benchmark::State& state) { Buffer::OwnedImpl buffer; for (auto _ : state) { - constexpr uint64_t NumSlices = 2; - Buffer::RawSlice slices[NumSlices]; - uint64_t slices_used = buffer.reserve(state.range(0), slices, NumSlices); - ASSERT(slices_used > 0); + UNREFERENCED_PARAMETER(_); + auto size = state.range(0); + Buffer::Reservation reservation = buffer.reserveForReadWithLengthForTest(size); // Commit one byte from the first slice and nothing from any subsequent slice. - uint64_t bytes_to_commit = 1; - slices[0].len_ = bytes_to_commit; - buffer.commit(slices, 1); + reservation.commit(1); if (buffer.length() >= MaxBufferLength) { buffer.drain(buffer.length()); } } benchmark::DoNotOptimize(buffer.length()); } -BENCHMARK(bufferReserveCommitPartial)->Arg(1)->Arg(4096)->Arg(16384)->Arg(65536); +BENCHMARK(bufferReserveCommitPartial) + ->Arg(1) + ->Arg(4 * 1024) + ->Arg(16 * 1024) + ->Arg(64 * 1024) + ->Arg(128 * 1024); // Test the linearization of a buffer in the best case where the data is in one slice. static void bufferLinearizeSimple(benchmark::State& state) { @@ -250,6 +264,7 @@ static void bufferLinearizeSimple(benchmark::State& state) { const absl::string_view input(data); Buffer::OwnedImpl buffer; for (auto _ : state) { + UNREFERENCED_PARAMETER(_); buffer.drain(buffer.length()); auto fragment = std::make_unique(input.data(), input.size(), deleteFragment); @@ -267,6 +282,7 @@ static void bufferLinearizeGeneral(benchmark::State& state) { const absl::string_view input(data); Buffer::OwnedImpl buffer; for (auto _ : state) { + UNREFERENCED_PARAMETER(_); buffer.drain(buffer.length()); do { auto fragment = @@ -291,6 +307,7 @@ static void bufferSearch(benchmark::State& state) { Buffer::OwnedImpl buffer(input); ssize_t result = 0; for (auto _ : state) { + UNREFERENCED_PARAMETER(_); result += buffer.search(Pattern.c_str(), Pattern.length(), 0, 0); } benchmark::DoNotOptimize(result); @@ -314,6 +331,7 @@ static void bufferSearchPartialMatch(benchmark::State& state) { Buffer::OwnedImpl buffer(input); ssize_t result = 0; for (auto _ : state) { + UNREFERENCED_PARAMETER(_); result += buffer.search(Pattern.c_str(), Pattern.length(), 0, 0); } benchmark::DoNotOptimize(result); @@ -333,6 +351,7 @@ static void bufferStartsWith(benchmark::State& state) { Buffer::OwnedImpl buffer(input); ssize_t result = 0; for (auto _ : state) { + UNREFERENCED_PARAMETER(_); if (!buffer.startsWith({Pattern.c_str(), Pattern.length()})) { result++; } @@ -356,6 +375,7 @@ static void bufferStartsWithMatch(benchmark::State& state) { Buffer::OwnedImpl buffer(input); ssize_t result = 0; for (auto _ : state) { + UNREFERENCED_PARAMETER(_); if (buffer.startsWith({Prefix.c_str(), Prefix.length()})) { result++; } diff --git a/test/common/buffer/owned_impl_test.cc b/test/common/buffer/owned_impl_test.cc index c23fca2602f9..52930e8adabc 100644 --- a/test/common/buffer/owned_impl_test.cc +++ b/test/common/buffer/owned_impl_test.cc @@ -24,17 +24,6 @@ class OwnedImplTest : public testing::Test { bool release_callback_called_ = false; protected: - static void clearReservation(Buffer::RawSlice* iovecs, uint64_t num_iovecs, OwnedImpl& buffer) { - for (uint64_t i = 0; i < num_iovecs; i++) { - iovecs[i].len_ = 0; - } - buffer.commit(iovecs, num_iovecs); - } - - static void commitReservation(Buffer::RawSlice* iovecs, uint64_t num_iovecs, OwnedImpl& buffer) { - buffer.commit(iovecs, num_iovecs); - } - static void expectSlices(std::vector> buffer_list, OwnedImpl& buffer) { const auto& buffer_slices = buffer.describeSlicesForTest(); ASSERT_EQ(buffer_list.size(), buffer_slices.size()); @@ -794,79 +783,88 @@ TEST_F(OwnedImplTest, ReserveCommit) { { Buffer::OwnedImpl buffer; - // A zero-byte reservation should fail. - static constexpr uint64_t NumIovecs = 16; - Buffer::RawSlice iovecs[NumIovecs]; - uint64_t num_reserved = buffer.reserve(0, iovecs, NumIovecs); - EXPECT_EQ(0, num_reserved); - clearReservation(iovecs, num_reserved, buffer); + // A zero-byte reservation should return an empty reservation. + { + auto reservation = buffer.reserveSingleSlice(0); + EXPECT_EQ(0, reservation.slice().len_); + EXPECT_EQ(0, reservation.length()); + } EXPECT_EQ(0, buffer.length()); // Test and commit a small reservation. This should succeed. - num_reserved = buffer.reserve(1, iovecs, NumIovecs); - EXPECT_EQ(1, num_reserved); - // The implementation might provide a bigger reservation than requested. - EXPECT_LE(1, iovecs[0].len_); - iovecs[0].len_ = 1; - commitReservation(iovecs, num_reserved, buffer); + { + auto reservation = buffer.reserveForRead(); + reservation.commit(1); + } EXPECT_EQ(1, buffer.length()); // Request a reservation that fits in the remaining space at the end of the last slice. - num_reserved = buffer.reserve(1, iovecs, NumIovecs); - EXPECT_EQ(1, num_reserved); - EXPECT_LE(1, iovecs[0].len_); - iovecs[0].len_ = 1; - const void* slice1 = iovecs[0].mem_; - clearReservation(iovecs, num_reserved, buffer); + const void* slice1; + { + auto reservation = buffer.reserveSingleSlice(1); + EXPECT_EQ(1, reservation.slice().len_); + slice1 = reservation.slice().mem_; + } // Request a reservation that is too large to fit in the remaining space at the end of // the last slice, and allow the buffer to use only one slice. This should result in the // creation of a new slice within the buffer. - num_reserved = buffer.reserve(4096, iovecs, 1); - EXPECT_EQ(1, num_reserved); - EXPECT_NE(slice1, iovecs[0].mem_); - clearReservation(iovecs, num_reserved, buffer); + { + auto reservation = buffer.reserveSingleSlice(16384); + EXPECT_EQ(16384, reservation.slice().len_); + EXPECT_NE(slice1, reservation.slice().mem_); + } // Request the same size reservation, but allow the buffer to use multiple slices. This // should result in the buffer creating a second slice and splitting the reservation between the // last two slices. - num_reserved = buffer.reserve(4096, iovecs, NumIovecs); - EXPECT_EQ(2, num_reserved); - EXPECT_EQ(slice1, iovecs[0].mem_); - clearReservation(iovecs, num_reserved, buffer); - - // Request a reservation that too big to fit in the existing slices. This should result - // in the creation of a third slice. - expectSlices({{1, 4095, 4096}}, buffer); - buffer.reserve(4096, iovecs, NumIovecs); - expectSlices({{1, 4095, 4096}, {0, 4096, 4096}}, buffer); - const void* slice2 = iovecs[1].mem_; - num_reserved = buffer.reserve(8192, iovecs, NumIovecs); - expectSlices({{1, 4095, 4096}, {0, 4096, 4096}, {0, 4096, 4096}}, buffer); - EXPECT_EQ(3, num_reserved); - EXPECT_EQ(slice1, iovecs[0].mem_); - EXPECT_EQ(slice2, iovecs[1].mem_); - clearReservation(iovecs, num_reserved, buffer); + { + expectSlices({{1, 16383, 16384}}, buffer); + auto reservation = buffer.reserveForRead(); + EXPECT_GE(reservation.numSlices(), 2); + EXPECT_GE(reservation.length(), 32767); + EXPECT_EQ(slice1, reservation.slices()[0].mem_); + EXPECT_EQ(16383, reservation.slices()[0].len_); + EXPECT_EQ(16384, reservation.slices()[1].len_); + } // Append a fragment to the buffer, and then request a small reservation. The buffer // should make a new slice to satisfy the reservation; it cannot safely use any of // the previously seen slices, because they are no longer at the end of the buffer. - expectSlices({{1, 4095, 4096}}, buffer); - buffer.addBufferFragment(fragment); - EXPECT_EQ(13, buffer.length()); - num_reserved = buffer.reserve(1, iovecs, NumIovecs); - expectSlices({{1, 4095, 4096}, {12, 0, 12}, {0, 4096, 4096}}, buffer); - EXPECT_EQ(1, num_reserved); - EXPECT_NE(slice1, iovecs[0].mem_); - commitReservation(iovecs, num_reserved, buffer); + { + expectSlices({{1, 16383, 16384}}, buffer); + buffer.addBufferFragment(fragment); + EXPECT_EQ(13, buffer.length()); + auto reservation = buffer.reserveForRead(); + EXPECT_NE(slice1, reservation.slices()[0].mem_); + reservation.commit(1); + expectSlices({{1, 16383, 16384}, {12, 0, 12}, {1, 16383, 16384}}, buffer); + } EXPECT_EQ(14, buffer.length()); } + + { + Buffer::OwnedImpl buffer; + uint64_t default_reservation_length; + uint64_t default_slice_length; + { + auto reservation = buffer.reserveForRead(); + default_reservation_length = reservation.length(); + default_slice_length = reservation.slices()[0].len_; + reservation.commit(default_slice_length / 2); + } + { + // Test that the Reservation size is capped at the available space in the Reservation + // inline storage, including using the end of a previous slice, no matter how big the request + // is. + auto reservation = buffer.reserveForReadWithLengthForTest(UINT64_MAX); + EXPECT_EQ(reservation.length(), default_reservation_length - (default_slice_length / 2)); + } + } } TEST_F(OwnedImplTest, ReserveCommitReuse) { Buffer::OwnedImpl buffer; - static constexpr uint64_t NumIovecs = 2; - Buffer::RawSlice iovecs[NumIovecs]; // Reserve 8KB and commit all but a few bytes of it, to ensure that // the last slice of the buffer can hold part but not all of the @@ -874,108 +872,121 @@ TEST_F(OwnedImplTest, ReserveCommitReuse) { // allocate more than the requested 8KB. In case the implementation // uses a power-of-two allocator, the subsequent reservations all // request 16KB. - uint64_t num_reserved = buffer.reserve(8200, iovecs, NumIovecs); - EXPECT_EQ(1, num_reserved); - iovecs[0].len_ = 8000; - buffer.commit(iovecs, 1); + { + auto reservation = buffer.reserveSingleSlice(8200); + reservation.commit(8000); + } EXPECT_EQ(8000, buffer.length()); - // Reserve 16KB. The resulting reservation should span 2 slices. + // Reserve some space. The resulting reservation should span 2 slices. // Commit part of the first slice and none of the second slice. - num_reserved = buffer.reserve(16384, iovecs, NumIovecs); - EXPECT_EQ(2, num_reserved); - const void* first_slice = iovecs[0].mem_; - iovecs[0].len_ = 1; - expectSlices({{8000, 4288, 12288}, {0, 12288, 12288}}, buffer); - buffer.commit(iovecs, 1); + const void* first_slice; + { + expectSlices({{8000, 4288, 12288}}, buffer); + auto reservation = buffer.reserveForRead(); + + // No additional slices are added to the buffer until `commit()` is called + // on the reservation. + expectSlices({{8000, 4288, 12288}}, buffer); + first_slice = reservation.slices()[0].mem_; + + EXPECT_GE(reservation.numSlices(), 2); + reservation.commit(1); + } EXPECT_EQ(8001, buffer.length()); - EXPECT_EQ(first_slice, iovecs[0].mem_); // The second slice is now released because there's nothing in the second slice. expectSlices({{8001, 4287, 12288}}, buffer); - // Reserve 16KB again. - num_reserved = buffer.reserve(16384, iovecs, NumIovecs); - expectSlices({{8001, 4287, 12288}, {0, 12288, 12288}}, buffer); - EXPECT_EQ(2, num_reserved); - EXPECT_EQ(static_cast(first_slice) + 1, - static_cast(iovecs[0].mem_)); + // Reserve again. + { + auto reservation = buffer.reserveForRead(); + EXPECT_GE(reservation.numSlices(), 2); + EXPECT_EQ(static_cast(first_slice) + 1, + static_cast(reservation.slices()[0].mem_)); + } + expectSlices({{8001, 4287, 12288}}, buffer); } -TEST_F(OwnedImplTest, ReserveReuse) { +// Test behavior when the size to commit() is larger than the reservation. +TEST_F(OwnedImplTest, ReserveOverCommit) { Buffer::OwnedImpl buffer; - static constexpr uint64_t NumIovecs = 2; - Buffer::RawSlice iovecs[NumIovecs]; - - // Reserve some space and leave it uncommitted. - uint64_t num_reserved = buffer.reserve(8200, iovecs, NumIovecs); - EXPECT_EQ(1, num_reserved); - const void* first_slice = iovecs[0].mem_; - - // Reserve more space and verify that it begins with the same slice from the last reservation. - num_reserved = buffer.reserve(16384, iovecs, NumIovecs); - EXPECT_EQ(2, num_reserved); - EXPECT_EQ(first_slice, iovecs[0].mem_); - const void* second_slice = iovecs[1].mem_; - - // Repeat the last reservation and verify that it yields the same slices. - num_reserved = buffer.reserve(16384, iovecs, NumIovecs); - EXPECT_EQ(2, num_reserved); - EXPECT_EQ(first_slice, iovecs[0].mem_); - EXPECT_EQ(second_slice, iovecs[1].mem_); - expectSlices({{0, 12288, 12288}, {0, 4096, 4096}}, buffer); - - // Request a larger reservation, verify that the second entry is replaced with a block with a - // larger size. - num_reserved = buffer.reserve(30000, iovecs, NumIovecs); - const void* third_slice = iovecs[1].mem_; - EXPECT_EQ(2, num_reserved); - EXPECT_EQ(first_slice, iovecs[0].mem_); - EXPECT_EQ(12288, iovecs[0].len_); - EXPECT_NE(second_slice, iovecs[1].mem_); - EXPECT_EQ(30000 - iovecs[0].len_, iovecs[1].len_); - expectSlices({{0, 12288, 12288}, {0, 4096, 4096}, {0, 20480, 20480}}, buffer); - - // Repeating a the reservation request for a smaller block returns the previous entry. - num_reserved = buffer.reserve(16384, iovecs, NumIovecs); - EXPECT_EQ(2, num_reserved); - EXPECT_EQ(first_slice, iovecs[0].mem_); - EXPECT_EQ(second_slice, iovecs[1].mem_); - expectSlices({{0, 12288, 12288}, {0, 4096, 4096}, {0, 20480, 20480}}, buffer); - - // Repeat the larger reservation notice that it doesn't match the prior reservation for 30000 - // bytes. - num_reserved = buffer.reserve(30000, iovecs, NumIovecs); - EXPECT_EQ(2, num_reserved); - EXPECT_EQ(first_slice, iovecs[0].mem_); - EXPECT_EQ(12288, iovecs[0].len_); - EXPECT_NE(second_slice, iovecs[1].mem_); - EXPECT_NE(third_slice, iovecs[1].mem_); - EXPECT_EQ(30000 - iovecs[0].len_, iovecs[1].len_); - expectSlices({{0, 12288, 12288}, {0, 4096, 4096}, {0, 20480, 20480}, {0, 20480, 20480}}, buffer); - - // Commit the most recent reservation and verify the representation. - buffer.commit(iovecs, num_reserved); - expectSlices({{12288, 0, 12288}, {0, 4096, 4096}, {0, 20480, 20480}, {17712, 2768, 20480}}, - buffer); + auto reservation = buffer.reserveForRead(); + const auto reservation_length = reservation.length(); + const auto excess_length = reservation_length + 1; +#ifdef NDEBUG + reservation.commit(excess_length); + + // The length should be the Reservation length, not the value passed to commit. + EXPECT_EQ(reservation_length, buffer.length()); +#else + EXPECT_DEATH( + reservation.commit(excess_length), + "length <= length_. Details: commit\\(\\) length must be <= size of the Reservation"); +#endif +} - // Do another reservation. - num_reserved = buffer.reserve(16384, iovecs, NumIovecs); - EXPECT_EQ(2, num_reserved); - expectSlices({{12288, 0, 12288}, - {0, 4096, 4096}, - {0, 20480, 20480}, - {17712, 2768, 20480}, - {0, 16384, 16384}}, - buffer); +// Test behavior when the size to commit() is larger than the reservation. +TEST_F(OwnedImplTest, ReserveSingleOverCommit) { + Buffer::OwnedImpl buffer; + auto reservation = buffer.reserveSingleSlice(10); + const auto reservation_length = reservation.length(); + const auto excess_length = reservation_length + 1; +#ifdef NDEBUG + reservation.commit(excess_length); + + // The length should be the Reservation length, not the value passed to commit. + EXPECT_EQ(reservation_length, buffer.length()); +#else + EXPECT_DEATH( + reservation.commit(excess_length), + "length <= slice_.len_. Details: commit\\(\\) length must be <= size of the Reservation"); +#endif +} - // And commit. - buffer.commit(iovecs, num_reserved); - expectSlices({{12288, 0, 12288}, - {0, 4096, 4096}, - {0, 20480, 20480}, - {20480, 0, 20480}, - {13616, 2768, 16384}}, - buffer); +// Test functionality of the `freelist` (a performance optimization) +TEST_F(OwnedImplTest, SliceFreeList) { + Buffer::OwnedImpl b1, b2; + std::vector slices; + { + auto r = b1.reserveForRead(); + for (auto& slice : absl::MakeSpan(r.slices(), r.numSlices())) { + slices.push_back(slice.mem_); + } + r.commit(1); + EXPECT_EQ(slices[0], b1.getRawSlices()[0].mem_); + } + + { + auto r = b2.reserveForRead(); + EXPECT_EQ(slices[1], r.slices()[0].mem_); + r.commit(1); + EXPECT_EQ(slices[1], b2.getRawSlices()[0].mem_); + } + + b1.drain(1); + EXPECT_EQ(0, b1.getRawSlices().size()); + { + auto r = b2.reserveForRead(); + // slices()[0] is the partially used slice that is already part of this buffer. + EXPECT_EQ(slices[2], r.slices()[1].mem_); + } + { + auto r = b1.reserveForRead(); + EXPECT_EQ(slices[2], r.slices()[0].mem_); + } + { + // This causes an underflow in the `freelist` on creation, and overflows it on deletion. + auto r1 = b1.reserveForRead(); + auto r2 = b2.reserveForRead(); + for (auto& r1_slice : absl::MakeSpan(r1.slices(), r1.numSlices())) { + // r1 reservation does not contain the slice that is a part of b2. + EXPECT_NE(r1_slice.mem_, b2.getRawSlices()[0].mem_); + for (auto& r2_slice : absl::MakeSpan(r2.slices(), r2.numSlices())) { + // The two reservations do not share any slices. + EXPECT_NE(r1_slice.mem_, r2_slice.mem_); + } + } + } } TEST_F(OwnedImplTest, Search) { @@ -1139,13 +1150,9 @@ TEST_F(OwnedImplTest, ReserveZeroCommit) { buf.addBufferFragment(frag); buf.prepend("bbbbb"); buf.add(""); - constexpr uint32_t reserve_slices = 16; - Buffer::RawSlice slices[reserve_slices]; - const uint32_t allocated_slices = buf.reserve(1280, slices, reserve_slices); - for (uint32_t i = 0; i < allocated_slices; ++i) { - slices[i].len_ = 0; - } - buf.commit(slices, allocated_slices); + expectSlices({{5, 0, 4096}, {0, 0, 0}}, buf); + { auto reservation = buf.reserveSingleSlice(1280); } + expectSlices({{5, 0, 4096}}, buf); os_fd_t pipe_fds[2] = {0, 0}; auto& os_sys_calls = Api::OsSysCallsSingleton::get(); #ifdef WIN32 @@ -1166,7 +1173,7 @@ TEST_F(OwnedImplTest, ReserveZeroCommit) { ASSERT_EQ(os_sys_calls.close(pipe_fds[1]).rc_, 0); ASSERT_EQ(previous_length, buf.search(data.data(), rc, previous_length, 0)); EXPECT_EQ("bbbbb", buf.toString().substr(0, 5)); - expectSlices({{5, 0, 4096}, {1953, 2143, 4096}}, buf); + expectSlices({{5, 0, 4096}, {1953, 14431, 16384}}, buf); } TEST_F(OwnedImplTest, ReadReserveAndCommit) { diff --git a/test/common/buffer/watermark_buffer_test.cc b/test/common/buffer/watermark_buffer_test.cc index 1ce8149f3dac..b007625b2cd8 100644 --- a/test/common/buffer/watermark_buffer_test.cc +++ b/test/common/buffer/watermark_buffer_test.cc @@ -121,13 +121,20 @@ TEST_F(WatermarkBufferTest, PrependBuffer) { TEST_F(WatermarkBufferTest, Commit) { buffer_.add(TEN_BYTES, 10); EXPECT_EQ(0, times_high_watermark_called_); - RawSlice out; - buffer_.reserve(10, &out, 1); - memcpy(out.mem_, &TEN_BYTES[0], 10); - out.len_ = 10; - buffer_.commit(&out, 1); + { + auto reservation = buffer_.reserveForRead(); + reservation.commit(10); + } EXPECT_EQ(1, times_high_watermark_called_); EXPECT_EQ(20, buffer_.length()); + + { + auto reservation = buffer_.reserveSingleSlice(10); + reservation.commit(10); + } + // Buffer is already above high watermark, so it won't be called a second time. + EXPECT_EQ(1, times_high_watermark_called_); + EXPECT_EQ(30, buffer_.length()); } TEST_F(WatermarkBufferTest, Drain) { @@ -474,10 +481,8 @@ TEST_F(WatermarkBufferTest, OverflowWatermarkDisabledOnVeryHighValue) { const uint32_t segment_denominator = 128; const uint32_t big_segment_len = std::numeric_limits::max() / segment_denominator + 1; for (uint32_t i = 0; i < segment_denominator; ++i) { - Buffer::RawSlice iovecs[2]; - uint64_t num_reserved = buffer1.reserve(big_segment_len, iovecs, 2); - EXPECT_GE(num_reserved, 1); - buffer1.commit(iovecs, num_reserved); + auto reservation = buffer1.reserveSingleSlice(big_segment_len); + reservation.commit(big_segment_len); } EXPECT_GT(buffer1.length(), std::numeric_limits::max()); EXPECT_LT(buffer1.length(), high_watermark_threshold * overflow_multiplier); @@ -486,12 +491,9 @@ TEST_F(WatermarkBufferTest, OverflowWatermarkDisabledOnVeryHighValue) { // Reserve and commit additional space on the buffer beyond the expected // high_watermark_threshold * overflow_multiplier threshold. - // Adding high_watermark_threshold * overflow_multiplier - buffer1.length() + 1 bytes - Buffer::RawSlice iovecs[2]; - uint64_t num_reserved = buffer1.reserve( - high_watermark_threshold * overflow_multiplier - buffer1.length() + 1, iovecs, 2); - EXPECT_GE(num_reserved, 1); - buffer1.commit(iovecs, num_reserved); + const uint64_t size = high_watermark_threshold * overflow_multiplier - buffer1.length() + 1; + auto reservation = buffer1.reserveSingleSlice(size); + reservation.commit(size); EXPECT_EQ(buffer1.length(), high_watermark_threshold * overflow_multiplier + 1); EXPECT_EQ(1, high_watermark_buffer1); EXPECT_EQ(0, overflow_watermark_buffer1); diff --git a/test/common/common/utility_test.cc b/test/common/common/utility_test.cc index 48b82909bfce..348a0ae75221 100644 --- a/test/common/common/utility_test.cc +++ b/test/common/common/utility_test.cc @@ -29,6 +29,26 @@ using testing::Not; namespace Envoy { +TEST(IntUtil, roundUpToMultiple) { + // Round up to non-power-of-2 + EXPECT_EQ(3, IntUtil::roundUpToMultiple(1, 3)); + EXPECT_EQ(3, IntUtil::roundUpToMultiple(3, 3)); + EXPECT_EQ(6, IntUtil::roundUpToMultiple(4, 3)); + EXPECT_EQ(6, IntUtil::roundUpToMultiple(5, 3)); + EXPECT_EQ(6, IntUtil::roundUpToMultiple(6, 3)); + EXPECT_EQ(21, IntUtil::roundUpToMultiple(20, 3)); + EXPECT_EQ(21, IntUtil::roundUpToMultiple(21, 3)); + + // Round up to power-of-2 + EXPECT_EQ(0, IntUtil::roundUpToMultiple(0, 4)); + EXPECT_EQ(4, IntUtil::roundUpToMultiple(3, 4)); + EXPECT_EQ(4, IntUtil::roundUpToMultiple(4, 4)); + EXPECT_EQ(8, IntUtil::roundUpToMultiple(5, 4)); + EXPECT_EQ(8, IntUtil::roundUpToMultiple(8, 4)); + EXPECT_EQ(24, IntUtil::roundUpToMultiple(21, 4)); + EXPECT_EQ(24, IntUtil::roundUpToMultiple(24, 4)); +} + TEST(StringUtil, strtoull) { uint64_t out; const char* rest; diff --git a/test/common/http/http2/hpack_fuzz_test.cc b/test/common/http/http2/hpack_fuzz_test.cc index 49fa83db3dbc..cd090faaaabe 100644 --- a/test/common/http/http2/hpack_fuzz_test.cc +++ b/test/common/http/http2/hpack_fuzz_test.cc @@ -41,20 +41,17 @@ Buffer::OwnedImpl encodeHeaders(nghttp2_hd_deflater* deflater, // Estimate the upper bound const size_t buflen = nghttp2_hd_deflate_bound(deflater, input_nv.data(), input_nv.size()); - Buffer::RawSlice iovec; Buffer::OwnedImpl payload; - payload.reserve(buflen, &iovec, 1); - ASSERT(iovec.len_ >= buflen); + auto reservation = payload.reserveSingleSlice(buflen); // Encode using nghttp2 - uint8_t* buf = reinterpret_cast(iovec.mem_); + uint8_t* buf = reinterpret_cast(reservation.slice().mem_); ASSERT(input_nv.data() != nullptr); const ssize_t result = nghttp2_hd_deflate_hd(deflater, buf, buflen, input_nv.data(), input_nv.size()); ASSERT(result >= 0, absl::StrCat("Failed to decode with result ", result)); - iovec.len_ = result; - payload.commit(&iovec, 1); + reservation.commit(result); return payload; } diff --git a/test/extensions/filters/network/postgres_proxy/postgres_decoder_test.cc b/test/extensions/filters/network/postgres_proxy/postgres_decoder_test.cc index b2c81d77de1f..f9ff8b482a6d 100644 --- a/test/extensions/filters/network/postgres_proxy/postgres_decoder_test.cc +++ b/test/extensions/filters/network/postgres_proxy/postgres_decoder_test.cc @@ -519,7 +519,6 @@ class FakeBuffer : public Buffer::Instance { MOCK_METHOD(void, add, (const Instance&), (override)); MOCK_METHOD(void, prepend, (absl::string_view), (override)); MOCK_METHOD(void, prepend, (Instance&), (override)); - MOCK_METHOD(void, commit, (Buffer::RawSlice*, uint64_t), (override)); MOCK_METHOD(void, copyOut, (size_t, uint64_t, void*), (const, override)); MOCK_METHOD(void, drain, (uint64_t), (override)); MOCK_METHOD(Buffer::RawSliceVector, getRawSlices, (absl::optional), (const, override)); @@ -529,7 +528,11 @@ class FakeBuffer : public Buffer::Instance { MOCK_METHOD(void*, linearize, (uint32_t), (override)); MOCK_METHOD(void, move, (Instance&), (override)); MOCK_METHOD(void, move, (Instance&, uint64_t), (override)); - MOCK_METHOD(uint64_t, reserve, (uint64_t, Buffer::RawSlice*, uint64_t), (override)); + MOCK_METHOD(Buffer::Reservation, reserveForRead, (), (override)); + MOCK_METHOD(Buffer::ReservationSingleSlice, reserveSingleSlice, (uint64_t, bool), (override)); + MOCK_METHOD(void, commit, + (uint64_t, absl::Span, Buffer::ReservationSlicesOwnerPtr), + (override)); MOCK_METHOD(ssize_t, search, (const void*, uint64_t, size_t, size_t), (const, override)); MOCK_METHOD(bool, startsWith, (absl::string_view), (const, override)); MOCK_METHOD(std::string, toString, (), (const, override)); diff --git a/test/extensions/transport_sockets/tls/ssl_socket_test.cc b/test/extensions/transport_sockets/tls/ssl_socket_test.cc index e080412caa3e..0de35c9b8b65 100644 --- a/test/extensions/transport_sockets/tls/ssl_socket_test.cc +++ b/test/extensions/transport_sockets/tls/ssl_socket_test.cc @@ -4887,13 +4887,9 @@ class SslReadBufferLimitTest : public SslSocketTest { for (uint32_t i = 0; i < num_writes; i++) { Buffer::OwnedImpl data(std::string(write_size, 'a')); - // Incredibly contrived way of making sure that the write buffer has an empty chain in it. if (reserve_write_space) { - Buffer::RawSlice iovecs[2]; - EXPECT_EQ(2UL, data.reserve(16384, iovecs, 2)); - iovecs[0].len_ = 0; - iovecs[1].len_ = 0; - data.commit(iovecs, 2); + data.appendSliceForTest(absl::string_view()); + ASSERT_EQ(0, data.describeSlicesForTest().back().data); } client_connection_->write(data, false); diff --git a/test/extensions/transport_sockets/tls/tls_throughput_benchmark.cc b/test/extensions/transport_sockets/tls/tls_throughput_benchmark.cc index 19ec111bebad..f1e90313b0ca 100644 --- a/test/extensions/transport_sockets/tls/tls_throughput_benchmark.cc +++ b/test/extensions/transport_sockets/tls/tls_throughput_benchmark.cc @@ -33,36 +33,30 @@ static void handleSslError(SSL* ssl, int err, bool is_server) { } static void appendSlice(Buffer::Instance& buffer, uint32_t size) { - Buffer::RawSlice slice; std::string data(size, 'a'); RELEASE_ASSERT(data.size() <= 16384, "short_slice_size can't be larger than full slice"); // A 16kb request currently has inline metadata, which makes it 16384+8. This gets rounded up // to the next page size. Request enough that there is no extra space, to ensure that this results // in a new slice. - buffer.reserve(16384, &slice, 1); + auto reservation = buffer.reserveSingleSlice(16384); - memcpy(slice.mem_, data.data(), data.size()); - slice.len_ = data.size(); - buffer.commit(&slice, 1); + memcpy(reservation.slice().mem_, data.data(), data.size()); + reservation.commit(data.size()); } // If move_slices is true, add full-sized slices using move similar to how HTTP codecs move data // from the filter chain buffer to the output buffer. Else, append full-sized slices directly to the // output buffer like socket read would do. -static void addFullSlices(Buffer::Instance& output_buffer, int num_slices, bool move_slices) { +static void addFullSlices(Buffer::Instance& output_buffer, unsigned num_slices, bool move_slices) { Buffer::OwnedImpl tmp_buf; Buffer::Instance* buffer = move_slices ? &tmp_buf : &output_buffer; - for (int i = 0; i < num_slices; i++) { - auto start_size = buffer->length(); - Buffer::RawSlice slices[2]; - auto num_slices = buffer->reserve(16384, slices, 2); - for (unsigned i = 0; i < num_slices; i++) { - memset(slices[i].mem_, 'a', slices[i].len_); - } - buffer->commit(slices, num_slices); - RELEASE_ASSERT(buffer->length() - start_size == 16384, "correct reserve/commit"); + const auto initial_slices = buffer->getRawSlices().size(); + while ((buffer->getRawSlices().size() - initial_slices) < num_slices) { + Buffer::Reservation reservation = buffer->reserveForRead(); + memset(reservation.slices()[0].mem_, 'a', reservation.slices()[0].len_); + reservation.commit(reservation.slices()[0].len_); } if (move_slices) { diff --git a/test/integration/socket_interface_integration_test.cc b/test/integration/socket_interface_integration_test.cc index bcb54fb2bfd0..235867e57739 100644 --- a/test/integration/socket_interface_integration_test.cc +++ b/test/integration/socket_interface_integration_test.cc @@ -136,13 +136,13 @@ TEST_P(SocketInterfaceIntegrationTest, UdpSendToInternalAddressWithSocketInterfa local_valid_address, nullptr); Buffer::OwnedImpl buffer; - Buffer::RawSlice iovec; - buffer.reserve(100, &iovec, 1); + auto reservation = buffer.reserveSingleSlice(100); + auto slice = reservation.slice(); auto result = - socket->ioHandle().sendmsg(&iovec, 1, 0, local_valid_address->ip(), *peer_internal_address); + socket->ioHandle().sendmsg(&slice, 1, 0, local_valid_address->ip(), *peer_internal_address); ASSERT_FALSE(result.ok()); ASSERT_EQ(result.err_->getErrorCode(), Api::IoError::IoErrorCode::NoSupport); } } // namespace -} // namespace Envoy \ No newline at end of file +} // namespace Envoy diff --git a/test/mocks/network/io_handle.h b/test/mocks/network/io_handle.h index 132f314048d1..c154269585cc 100644 --- a/test/mocks/network/io_handle.h +++ b/test/mocks/network/io_handle.h @@ -24,7 +24,8 @@ class MockIoHandle : public IoHandle { MOCK_METHOD(bool, isOpen, (), (const)); MOCK_METHOD(Api::IoCallUint64Result, readv, (uint64_t max_length, Buffer::RawSlice* slices, uint64_t num_slice)); - MOCK_METHOD(Api::IoCallUint64Result, read, (Buffer::Instance & buffer, uint64_t max_length)); + MOCK_METHOD(Api::IoCallUint64Result, read, + (Buffer::Instance & buffer, absl::optional max_length)); MOCK_METHOD(Api::IoCallUint64Result, writev, (const Buffer::RawSlice* slices, uint64_t num_slice)); MOCK_METHOD(Api::IoCallUint64Result, write, (Buffer::Instance & buffer)); From deed328494064bf28e09055c1cda4e3a3cdd6b67 Mon Sep 17 00:00:00 2001 From: Matt Klein Date: Mon, 1 Feb 2021 13:37:11 -0800 Subject: [PATCH 4/4] healthcheck: exclude hosts when receiving x-envoy-immediate-health-check-fail (#14772) * Send x-envoy-immediate-health-check-fail on all responses that the health check filter processes, not just non-HC responses. * Exclude hosts from load balancing when x-envoy-immediate-health-check-fail is received. * Can be reverted via the envoy.reloadable_features.health_check.immediate_failure_exclude_from_cluster feature flag. Fixes https://github.com/envoyproxy/envoy/issues/9246 Signed-off-by: Matt Klein --- api/envoy/admin/v3/clusters.proto | 9 +- api/envoy/admin/v4alpha/clusters.proto | 9 +- api/envoy/config/cluster/v3/cluster.proto | 22 +--- .../config/cluster/v4alpha/cluster.proto | 22 +--- .../data/core/v3/health_check_event.proto | 1 + .../http/http_filters/health_check_filter.rst | 9 +- .../http/http_filters/router_filter.rst | 13 +- .../cluster_manager/cluster_stats.rst | 5 +- .../upstream/health_checking.rst | 12 +- .../upstream/load_balancing/excluded.rst | 29 +++++ .../load_balancing/load_balancing.rst | 1 + docs/root/version_history/current.rst | 15 +++ .../envoy/admin/v3/clusters.proto | 9 +- .../envoy/admin/v4alpha/clusters.proto | 9 +- .../envoy/config/cluster/v3/cluster.proto | 22 +--- .../config/cluster/v4alpha/cluster.proto | 22 +--- .../data/core/v3/health_check_event.proto | 1 + .../upstream/health_check_host_monitor.h | 12 +- include/envoy/upstream/upstream.h | 28 ++--- source/common/router/router.cc | 4 +- source/common/runtime/runtime_features.cc | 1 + .../upstream/health_checker_base_impl.cc | 44 +++++-- .../upstream/health_checker_base_impl.h | 5 +- source/common/upstream/health_checker_impl.cc | 19 ++- .../upstream/health_discovery_service.cc | 11 +- source/common/upstream/host_utility.cc | 14 +++ source/common/upstream/upstream_impl.cc | 25 ++-- source/common/upstream/upstream_impl.h | 10 +- .../filters/http/health_check/health_check.cc | 6 +- source/server/admin/clusters_handler.cc | 8 ++ test/common/router/router_test.cc | 22 ++-- .../upstream/health_checker_impl_test.cc | 118 +++++++++++++----- test/common/upstream/host_utility_test.cc | 3 +- .../upstream/logical_dns_cluster_test.cc | 9 +- test/common/upstream/upstream_impl_test.cc | 85 +++++++++++-- .../filters/http/health_check/BUILD | 1 + .../http/health_check/health_check_test.cc | 38 +++++- .../health_check_integration_test.cc | 53 +++++++- test/mocks/upstream/host.h | 4 +- 39 files changed, 505 insertions(+), 225 deletions(-) create mode 100644 docs/root/intro/arch_overview/upstream/load_balancing/excluded.rst diff --git a/api/envoy/admin/v3/clusters.proto b/api/envoy/admin/v3/clusters.proto index 8eeaec20becc..7a5e83c9aac4 100644 --- a/api/envoy/admin/v3/clusters.proto +++ b/api/envoy/admin/v3/clusters.proto @@ -139,7 +139,7 @@ message HostStatus { } // Health status for a host. -// [#next-free-field: 7] +// [#next-free-field: 9] message HostHealthStatus { option (udpa.annotations.versioning).previous_message_type = "envoy.admin.v2alpha.HostHealthStatus"; @@ -160,6 +160,13 @@ message HostHealthStatus { // The host has not yet been health checked. bool pending_active_hc = 6; + // The host should be excluded from panic, spillover, etc. calculations because it was explicitly + // taken out of rotation via protocol signal and is not meant to be routed to. + bool excluded_via_immediate_hc_fail = 7; + + // The host failed active HC due to timeout. + bool active_hc_timeout = 8; + // Health status as reported by EDS. Note: only HEALTHY and UNHEALTHY are currently supported // here. // [#comment:TODO(mrice32): pipe through remaining EDS health status possibilities.] diff --git a/api/envoy/admin/v4alpha/clusters.proto b/api/envoy/admin/v4alpha/clusters.proto index 10d920976930..cc4525576fb1 100644 --- a/api/envoy/admin/v4alpha/clusters.proto +++ b/api/envoy/admin/v4alpha/clusters.proto @@ -139,7 +139,7 @@ message HostStatus { } // Health status for a host. -// [#next-free-field: 7] +// [#next-free-field: 9] message HostHealthStatus { option (udpa.annotations.versioning).previous_message_type = "envoy.admin.v3.HostHealthStatus"; @@ -159,6 +159,13 @@ message HostHealthStatus { // The host has not yet been health checked. bool pending_active_hc = 6; + // The host should be excluded from panic, spillover, etc. calculations because it was explicitly + // taken out of rotation via protocol signal and is not meant to be routed to. + bool excluded_via_immediate_hc_fail = 7; + + // The host failed active HC due to timeout. + bool active_hc_timeout = 8; + // Health status as reported by EDS. Note: only HEALTHY and UNHEALTHY are currently supported // here. // [#comment:TODO(mrice32): pipe through remaining EDS health status possibilities.] diff --git a/api/envoy/config/cluster/v3/cluster.proto b/api/envoy/config/cluster/v3/cluster.proto index 90cd990a340a..9ef327990f5c 100644 --- a/api/envoy/config/cluster/v3/cluster.proto +++ b/api/envoy/config/cluster/v3/cluster.proto @@ -536,25 +536,9 @@ message Cluster { // https://github.com/envoyproxy/envoy/pull/3941. google.protobuf.Duration update_merge_window = 4; - // If set to true, Envoy will not consider new hosts when computing load balancing weights until - // they have been health checked for the first time. This will have no effect unless - // active health checking is also configured. - // - // Ignoring a host means that for any load balancing calculations that adjust weights based - // on the ratio of eligible hosts and total hosts (priority spillover, locality weighting and - // panic mode) Envoy will exclude these hosts in the denominator. - // - // For example, with hosts in two priorities P0 and P1, where P0 looks like - // {healthy, unhealthy (new), unhealthy (new)} - // and where P1 looks like - // {healthy, healthy} - // all traffic will still hit P0, as 1 / (3 - 2) = 1. - // - // Enabling this will allow scaling up the number of hosts for a given cluster without entering - // panic mode or triggering priority spillover, assuming the hosts pass the first health check. - // - // If panic mode is triggered, new hosts are still eligible for traffic; they simply do not - // contribute to the calculation when deciding whether panic mode is enabled or not. + // If set to true, Envoy will :ref:`exclude ` new hosts + // when computing load balancing weights until they have been health checked for the first time. + // This will have no effect unless active health checking is also configured. bool ignore_new_hosts_until_first_hc = 5; // If set to `true`, the cluster manager will drain all existing diff --git a/api/envoy/config/cluster/v4alpha/cluster.proto b/api/envoy/config/cluster/v4alpha/cluster.proto index 2d8aa4369b40..0b1fd62ba363 100644 --- a/api/envoy/config/cluster/v4alpha/cluster.proto +++ b/api/envoy/config/cluster/v4alpha/cluster.proto @@ -540,25 +540,9 @@ message Cluster { // https://github.com/envoyproxy/envoy/pull/3941. google.protobuf.Duration update_merge_window = 4; - // If set to true, Envoy will not consider new hosts when computing load balancing weights until - // they have been health checked for the first time. This will have no effect unless - // active health checking is also configured. - // - // Ignoring a host means that for any load balancing calculations that adjust weights based - // on the ratio of eligible hosts and total hosts (priority spillover, locality weighting and - // panic mode) Envoy will exclude these hosts in the denominator. - // - // For example, with hosts in two priorities P0 and P1, where P0 looks like - // {healthy, unhealthy (new), unhealthy (new)} - // and where P1 looks like - // {healthy, healthy} - // all traffic will still hit P0, as 1 / (3 - 2) = 1. - // - // Enabling this will allow scaling up the number of hosts for a given cluster without entering - // panic mode or triggering priority spillover, assuming the hosts pass the first health check. - // - // If panic mode is triggered, new hosts are still eligible for traffic; they simply do not - // contribute to the calculation when deciding whether panic mode is enabled or not. + // If set to true, Envoy will :ref:`exclude ` new hosts + // when computing load balancing weights until they have been health checked for the first time. + // This will have no effect unless active health checking is also configured. bool ignore_new_hosts_until_first_hc = 5; // If set to `true`, the cluster manager will drain all existing diff --git a/api/envoy/data/core/v3/health_check_event.proto b/api/envoy/data/core/v3/health_check_event.proto index 2b0f9d888f46..66624938dc4e 100644 --- a/api/envoy/data/core/v3/health_check_event.proto +++ b/api/envoy/data/core/v3/health_check_event.proto @@ -22,6 +22,7 @@ enum HealthCheckFailureType { ACTIVE = 0; PASSIVE = 1; NETWORK = 2; + NETWORK_TIMEOUT = 3; } enum HealthCheckerType { diff --git a/docs/root/configuration/http/http_filters/health_check_filter.rst b/docs/root/configuration/http/http_filters/health_check_filter.rst index 809b1fd42e98..a3941daa119b 100644 --- a/docs/root/configuration/http/http_filters/health_check_filter.rst +++ b/docs/root/configuration/http/http_filters/health_check_filter.rst @@ -11,7 +11,8 @@ Health check Note that the filter will automatically fail health checks and set the :ref:`x-envoy-immediate-health-check-fail - ` header if the - :ref:`/healthcheck/fail ` admin endpoint has been - called. (The :ref:`/healthcheck/ok ` admin endpoint - reverses this behavior). + ` header on all responses (both + health check and normal requests) if the :ref:`/healthcheck/fail + ` admin endpoint has been called. (The + :ref:`/healthcheck/ok ` admin endpoint reverses this + behavior). diff --git a/docs/root/configuration/http/http_filters/router_filter.rst b/docs/root/configuration/http/http_filters/router_filter.rst index 0c5ecb353783..4cd3659c2d56 100644 --- a/docs/root/configuration/http/http_filters/router_filter.rst +++ b/docs/root/configuration/http/http_filters/router_filter.rst @@ -227,7 +227,7 @@ x-envoy-upstream-rq-timeout-alt-response ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ Setting this header will cause Envoy to set a 204 response code (instead of 504) in the event of a request timeout. -The actual value of the header is ignored; only its presence is considered. See also +The actual value of the header is ignored; only its presence is considered. See also :ref:`config_http_filters_router_x-envoy-upstream-rq-timeout-ms`. .. _config_http_filters_router_x-envoy-upstream-rq-timeout-ms: @@ -294,11 +294,12 @@ x-envoy-immediate-health-check-fail If the upstream host returns this header (set to any value), Envoy will immediately assume the upstream host has failed :ref:`active health checking ` (if the -cluster has been :ref:`configured ` for active health checking). -This can be used to fast fail an upstream host via standard data plane processing without waiting -for the next health check interval. The host can become healthy again via standard active health -checks. See the :ref:`health checking overview ` for more -information. +cluster has been :ref:`configured ` for active health checking) +and :ref:`exclude ` it from load balancing. This can be used +to fast fail an upstream host via standard data plane processing without waiting for the next health +check interval. The host can become healthy again via standard active health checks. See the +:ref:`active health checking fast failure overview ` for +more information. .. _config_http_filters_router_x-envoy-ratelimited: diff --git a/docs/root/configuration/upstream/cluster_manager/cluster_stats.rst b/docs/root/configuration/upstream/cluster_manager/cluster_stats.rst index b6e5389e7da3..fc7390d35c72 100644 --- a/docs/root/configuration/upstream/cluster_manager/cluster_stats.rst +++ b/docs/root/configuration/upstream/cluster_manager/cluster_stats.rst @@ -64,7 +64,7 @@ Every cluster has a statistics tree rooted at *cluster..* with the followi upstream_rq_active, Gauge, Total active requests upstream_rq_pending_total, Counter, Total requests pending a connection pool connection upstream_rq_pending_overflow, Counter, Total requests that overflowed connection pool or requests (mainly for HTTP/2) circuit breaking and were failed - upstream_rq_pending_failure_eject, Counter, Total requests that were failed due to a connection pool connection failure or remote connection termination + upstream_rq_pending_failure_eject, Counter, Total requests that were failed due to a connection pool connection failure or remote connection termination upstream_rq_pending_active, Gauge, Total active requests pending a connection pool connection upstream_rq_cancelled, Counter, Total requests cancelled before obtaining a connection pool connection upstream_rq_maintenance_mode, Counter, Total requests that resulted in an immediate 503 due to :ref:`maintenance mode` @@ -87,7 +87,8 @@ Every cluster has a statistics tree rooted at *cluster..* with the followi upstream_internal_redirect_succeed_total, Counter, Total number of times internal redirects resulted in a second upstream request. membership_change, Counter, Total cluster membership changes membership_healthy, Gauge, Current cluster healthy total (inclusive of both health checking and outlier detection) - membership_degraded, Gauge, Current cluster degraded total + membership_degraded, Gauge, Current cluster :ref:`degraded ` total + membership_excluded, Gauge, Current cluster :ref:`excluded ` total membership_total, Gauge, Current cluster membership total retry_or_shadow_abandoned, Counter, Total number of times shadowing or retry buffering was canceled due to buffer limits config_reload, Counter, Total API fetches that resulted in a config reload due to a different config diff --git a/docs/root/intro/arch_overview/upstream/health_checking.rst b/docs/root/intro/arch_overview/upstream/health_checking.rst index d6b7bf9cc618..d1ec07bd7280 100644 --- a/docs/root/intro/arch_overview/upstream/health_checking.rst +++ b/docs/root/intro/arch_overview/upstream/health_checking.rst @@ -121,6 +121,8 @@ Further reading: * :ref:`/healthcheck/fail ` admin endpoint. * :ref:`/healthcheck/ok ` admin endpoint. +.. _arch_overview_health_checking_fast_failure: + Active health checking fast failure ----------------------------------- @@ -129,10 +131,12 @@ When using active health checking along with passive health checking (:ref:`outl large amount of active health checking traffic. In this case, it is still useful to be able to quickly drain an upstream host when using the :ref:`/healthcheck/fail ` admin endpoint. To support this, the :ref:`router -filter ` will respond to the :ref:`x-envoy-immediate-health-check-fail +filter ` *and* the HTTP active health checker will respond to the +:ref:`x-envoy-immediate-health-check-fail ` header. If this header is set by -an upstream host, Envoy will immediately mark the host as being failed for active health check. Note -that this only occurs if the host's cluster has active health checking :ref:`configured +an upstream host, Envoy will immediately mark the host as being failed for active health check and +:ref:`excluded ` from load balancing. Note that this only +occurs if the host's cluster has active health checking :ref:`configured `. The :ref:`health checking filter ` will automatically set this header if Envoy has been marked as failed via the :ref:`/healthcheck/fail ` admin @@ -152,7 +156,7 @@ is that overall configuration becomes more complicated as every health check URL The Envoy HTTP health checker supports the :ref:`service_name_matcher ` option. If this option is set, -the health checker additionally compares the value of the *x-envoy-upstream-healthchecked-cluster* +the health checker additionally compares the value of the *x-envoy-upstream-healthchecked-cluster* response header to *service_name_matcher*. If the values do not match, the health check does not pass. The upstream health check filter appends *x-envoy-upstream-healthchecked-cluster* to the response headers. The appended value is determined by the :option:`--service-cluster` command line option. diff --git a/docs/root/intro/arch_overview/upstream/load_balancing/excluded.rst b/docs/root/intro/arch_overview/upstream/load_balancing/excluded.rst new file mode 100644 index 000000000000..6147c8dbc35b --- /dev/null +++ b/docs/root/intro/arch_overview/upstream/load_balancing/excluded.rst @@ -0,0 +1,29 @@ +.. _arch_overview_load_balancing_excluded: + +Excluded endpoints +------------------ + +Certain conditions may cause Envoy to *exclude* endpoints from load balancing. Excluding a host +means that for any load balancing calculations that adjust weights based on the ratio of eligible +hosts and total hosts (priority spillover, locality weighting and panic mode) Envoy will exclude +these hosts in the denominator. + +For example, with hosts in two priorities P0 and P1, where P0 looks like {healthy, unhealthy +(excluded), unhealthy (excluded)} and where P1 looks like {healthy, healthy} all traffic will still +hit P0, as 1 / (3 - 2) = 1. + +Excluded hosts allow scaling up or down the number of hosts for a given cluster without entering +panic mode or triggering priority spillover. + +If panic mode is triggered, excluded hosts are still eligible for traffic; they simply do not +contribute to the calculation when deciding whether panic mode is enabled or not. + +Currently, the following two conditions can lead to a host being excluded when using active +health checking: + +* Using the :ref:`ignore_new_hosts_until_first_hc + ` cluster option. +* Receiving the :ref:`x-envoy-immediate-health-check-fail + ` header in a normal routed + response or in response to an :ref:`HTTP active health check + `. \ No newline at end of file diff --git a/docs/root/intro/arch_overview/upstream/load_balancing/load_balancing.rst b/docs/root/intro/arch_overview/upstream/load_balancing/load_balancing.rst index 27767f7fc5e3..36e0fddd3ca8 100644 --- a/docs/root/intro/arch_overview/upstream/load_balancing/load_balancing.rst +++ b/docs/root/intro/arch_overview/upstream/load_balancing/load_balancing.rst @@ -11,6 +11,7 @@ Load Balancing locality_weight overprovisioning panic_threshold + excluded original_dst zone_aware subsets diff --git a/docs/root/version_history/current.rst b/docs/root/version_history/current.rst index b8135a031544..fe3dd1b5e9ea 100644 --- a/docs/root/version_history/current.rst +++ b/docs/root/version_history/current.rst @@ -14,6 +14,21 @@ Minor Behavior Changes ---------------------- *Changes that may cause incompatibilities for some users, but should not for most* +* healthcheck: the :ref:`health check filter ` now sends the + :ref:`x-envoy-immediate-health-check-fail ` header + for all responses when Envoy is in the health check failed state. Additionally, receiving the + :ref:`x-envoy-immediate-health-check-fail ` + header (either in response to normal traffic or in response to an HTTP :ref:`active health check `) will + cause Envoy to immediately :ref:`exclude ` the host from + load balancing calculations. This has the useful property that such hosts, which are being + explicitly told to disable traffic, will not be counted for panic routing calculations. See the + excluded documentation for more information. This behavior can be temporarily reverted by setting + the `envoy.reloadable_features.health_check.immediate_failure_exclude_from_cluster` feature flag + to false. Note that the runtime flag covers *both* the health check filter responding with + `x-envoy-immediate-health-check-fail` in all cases (versus just non-HC requests) as well as + whether receiving `x-envoy-immediate-health-check-fail` will cause exclusion or not. Thus, + depending on the Envoy deployment, the feature flag may need to be flipped on both downstream + and upstream instances, depending on the reason. * http: allow to use path canonicalizer from `googleurl `_ instead of `//source/common/chromium_url`. The new path canonicalizer is enabled by default. To revert to the legacy path canonicalizer, enable the runtime flag diff --git a/generated_api_shadow/envoy/admin/v3/clusters.proto b/generated_api_shadow/envoy/admin/v3/clusters.proto index 8eeaec20becc..7a5e83c9aac4 100644 --- a/generated_api_shadow/envoy/admin/v3/clusters.proto +++ b/generated_api_shadow/envoy/admin/v3/clusters.proto @@ -139,7 +139,7 @@ message HostStatus { } // Health status for a host. -// [#next-free-field: 7] +// [#next-free-field: 9] message HostHealthStatus { option (udpa.annotations.versioning).previous_message_type = "envoy.admin.v2alpha.HostHealthStatus"; @@ -160,6 +160,13 @@ message HostHealthStatus { // The host has not yet been health checked. bool pending_active_hc = 6; + // The host should be excluded from panic, spillover, etc. calculations because it was explicitly + // taken out of rotation via protocol signal and is not meant to be routed to. + bool excluded_via_immediate_hc_fail = 7; + + // The host failed active HC due to timeout. + bool active_hc_timeout = 8; + // Health status as reported by EDS. Note: only HEALTHY and UNHEALTHY are currently supported // here. // [#comment:TODO(mrice32): pipe through remaining EDS health status possibilities.] diff --git a/generated_api_shadow/envoy/admin/v4alpha/clusters.proto b/generated_api_shadow/envoy/admin/v4alpha/clusters.proto index 10d920976930..cc4525576fb1 100644 --- a/generated_api_shadow/envoy/admin/v4alpha/clusters.proto +++ b/generated_api_shadow/envoy/admin/v4alpha/clusters.proto @@ -139,7 +139,7 @@ message HostStatus { } // Health status for a host. -// [#next-free-field: 7] +// [#next-free-field: 9] message HostHealthStatus { option (udpa.annotations.versioning).previous_message_type = "envoy.admin.v3.HostHealthStatus"; @@ -159,6 +159,13 @@ message HostHealthStatus { // The host has not yet been health checked. bool pending_active_hc = 6; + // The host should be excluded from panic, spillover, etc. calculations because it was explicitly + // taken out of rotation via protocol signal and is not meant to be routed to. + bool excluded_via_immediate_hc_fail = 7; + + // The host failed active HC due to timeout. + bool active_hc_timeout = 8; + // Health status as reported by EDS. Note: only HEALTHY and UNHEALTHY are currently supported // here. // [#comment:TODO(mrice32): pipe through remaining EDS health status possibilities.] diff --git a/generated_api_shadow/envoy/config/cluster/v3/cluster.proto b/generated_api_shadow/envoy/config/cluster/v3/cluster.proto index f3859bf35d22..941e41012293 100644 --- a/generated_api_shadow/envoy/config/cluster/v3/cluster.proto +++ b/generated_api_shadow/envoy/config/cluster/v3/cluster.proto @@ -536,25 +536,9 @@ message Cluster { // https://github.com/envoyproxy/envoy/pull/3941. google.protobuf.Duration update_merge_window = 4; - // If set to true, Envoy will not consider new hosts when computing load balancing weights until - // they have been health checked for the first time. This will have no effect unless - // active health checking is also configured. - // - // Ignoring a host means that for any load balancing calculations that adjust weights based - // on the ratio of eligible hosts and total hosts (priority spillover, locality weighting and - // panic mode) Envoy will exclude these hosts in the denominator. - // - // For example, with hosts in two priorities P0 and P1, where P0 looks like - // {healthy, unhealthy (new), unhealthy (new)} - // and where P1 looks like - // {healthy, healthy} - // all traffic will still hit P0, as 1 / (3 - 2) = 1. - // - // Enabling this will allow scaling up the number of hosts for a given cluster without entering - // panic mode or triggering priority spillover, assuming the hosts pass the first health check. - // - // If panic mode is triggered, new hosts are still eligible for traffic; they simply do not - // contribute to the calculation when deciding whether panic mode is enabled or not. + // If set to true, Envoy will :ref:`exclude ` new hosts + // when computing load balancing weights until they have been health checked for the first time. + // This will have no effect unless active health checking is also configured. bool ignore_new_hosts_until_first_hc = 5; // If set to `true`, the cluster manager will drain all existing diff --git a/generated_api_shadow/envoy/config/cluster/v4alpha/cluster.proto b/generated_api_shadow/envoy/config/cluster/v4alpha/cluster.proto index 9fb018b4ee69..882bbf9963e7 100644 --- a/generated_api_shadow/envoy/config/cluster/v4alpha/cluster.proto +++ b/generated_api_shadow/envoy/config/cluster/v4alpha/cluster.proto @@ -541,25 +541,9 @@ message Cluster { // https://github.com/envoyproxy/envoy/pull/3941. google.protobuf.Duration update_merge_window = 4; - // If set to true, Envoy will not consider new hosts when computing load balancing weights until - // they have been health checked for the first time. This will have no effect unless - // active health checking is also configured. - // - // Ignoring a host means that for any load balancing calculations that adjust weights based - // on the ratio of eligible hosts and total hosts (priority spillover, locality weighting and - // panic mode) Envoy will exclude these hosts in the denominator. - // - // For example, with hosts in two priorities P0 and P1, where P0 looks like - // {healthy, unhealthy (new), unhealthy (new)} - // and where P1 looks like - // {healthy, healthy} - // all traffic will still hit P0, as 1 / (3 - 2) = 1. - // - // Enabling this will allow scaling up the number of hosts for a given cluster without entering - // panic mode or triggering priority spillover, assuming the hosts pass the first health check. - // - // If panic mode is triggered, new hosts are still eligible for traffic; they simply do not - // contribute to the calculation when deciding whether panic mode is enabled or not. + // If set to true, Envoy will :ref:`exclude ` new hosts + // when computing load balancing weights until they have been health checked for the first time. + // This will have no effect unless active health checking is also configured. bool ignore_new_hosts_until_first_hc = 5; // If set to `true`, the cluster manager will drain all existing diff --git a/generated_api_shadow/envoy/data/core/v3/health_check_event.proto b/generated_api_shadow/envoy/data/core/v3/health_check_event.proto index 2b0f9d888f46..66624938dc4e 100644 --- a/generated_api_shadow/envoy/data/core/v3/health_check_event.proto +++ b/generated_api_shadow/envoy/data/core/v3/health_check_event.proto @@ -22,6 +22,7 @@ enum HealthCheckFailureType { ACTIVE = 0; PASSIVE = 1; NETWORK = 2; + NETWORK_TIMEOUT = 3; } enum HealthCheckerType { diff --git a/include/envoy/upstream/health_check_host_monitor.h b/include/envoy/upstream/health_check_host_monitor.h index b32dcfa23d65..7bfd71872088 100644 --- a/include/envoy/upstream/health_check_host_monitor.h +++ b/include/envoy/upstream/health_check_host_monitor.h @@ -17,11 +17,21 @@ class HealthCheckHostMonitor { public: virtual ~HealthCheckHostMonitor() = default; + /** + * The reason the host is being set unhealthy via the monitor. + */ + enum class UnhealthyType { + // Protocol indication (e.g., x-envoy-immediate-health-check-fail) that the host should be + // immediately taken out of rotation. + ImmediateHealthCheckFail + }; + /** * Mark the host as unhealthy. Note that this may not be immediate as events may need to be * propagated between multiple threads. + * @param type specifies the reason the host is being marked unhealthy. */ - virtual void setUnhealthy() PURE; + virtual void setUnhealthy(UnhealthyType type) PURE; }; using HealthCheckHostMonitorPtr = std::unique_ptr; diff --git a/include/envoy/upstream/upstream.h b/include/envoy/upstream/upstream.h index 74582b1c41a6..792047b0ee40 100644 --- a/include/envoy/upstream/upstream.h +++ b/include/envoy/upstream/upstream.h @@ -59,7 +59,13 @@ class Host : virtual public HostDescription { /* active HC. */ \ m(PENDING_DYNAMIC_REMOVAL, 0x20) \ /* The host is pending its initial active health check. */ \ - m(PENDING_ACTIVE_HC, 0x40) + m(PENDING_ACTIVE_HC, 0x40) \ + /* The host should be excluded from panic, spillover, etc. calculations */ \ + /* because it was explicitly taken out of rotation via protocol signal and */ \ + /* is not meant to be routed to. */ \ + m(EXCLUDED_VIA_IMMEDIATE_HC_FAIL, 0x80) \ + /* The host failed active HC due to timeout. */ \ + m(ACTIVE_HC_TIMEOUT, 0x100) // clang-format on #define DECLARE_ENUM(name, value) name = value, @@ -68,15 +74,6 @@ class Host : virtual public HostDescription { #undef DECLARE_ENUM - enum class ActiveHealthFailureType { - // The failure type is unknown, all hosts' failure types are initialized as UNKNOWN - UNKNOWN, - // The host is actively responding it's unhealthy - UNHEALTHY, - // The host is timing out - TIMEOUT, - }; - /** * @return host specific counters. */ @@ -156,17 +153,6 @@ class Host : virtual public HostDescription { */ virtual Health health() const PURE; - /** - * Returns the host's ActiveHealthFailureType. Types are specified in ActiveHealthFailureType. - */ - virtual ActiveHealthFailureType getActiveHealthFailureType() const PURE; - - /** - * Set the most recent health failure type for a host. Types are specified in - * ActiveHealthFailureType. - */ - virtual void setActiveHealthFailureType(ActiveHealthFailureType flag) PURE; - /** * Set the host's health checker monitor. Monitors are assumed to be thread safe, however * a new monitor must be installed before the host is used across threads. Thus, diff --git a/source/common/router/router.cc b/source/common/router/router.cc index c2f355e6da35..451af4404acb 100644 --- a/source/common/router/router.cc +++ b/source/common/router/router.cc @@ -12,6 +12,7 @@ #include "envoy/http/conn_pool.h" #include "envoy/runtime/runtime.h" #include "envoy/upstream/cluster_manager.h" +#include "envoy/upstream/health_check_host_monitor.h" #include "envoy/upstream/upstream.h" #include "common/common/assert.h" @@ -1197,7 +1198,8 @@ void Filter::onUpstreamHeaders(uint64_t response_code, Http::ResponseHeaderMapPt } if (headers->EnvoyImmediateHealthCheckFail() != nullptr) { - upstream_request.upstreamHost()->healthChecker().setUnhealthy(); + upstream_request.upstreamHost()->healthChecker().setUnhealthy( + Upstream::HealthCheckHostMonitor::UnhealthyType::ImmediateHealthCheckFail); } bool could_not_retry = false; diff --git a/source/common/runtime/runtime_features.cc b/source/common/runtime/runtime_features.cc index 0f5a70191912..2dd713892d31 100644 --- a/source/common/runtime/runtime_features.cc +++ b/source/common/runtime/runtime_features.cc @@ -67,6 +67,7 @@ constexpr const char* runtime_features[] = { "envoy.reloadable_features.grpc_web_fix_non_proto_encoded_response_handling", "envoy.reloadable_features.hcm_stream_error_on_invalid_message", "envoy.reloadable_features.health_check.graceful_goaway_handling", + "envoy.reloadable_features.health_check.immediate_failure_exclude_from_cluster", "envoy.reloadable_features.http_default_alpn", "envoy.reloadable_features.http_match_on_all_headers", "envoy.reloadable_features.http_set_copy_replace_all_headers", diff --git a/source/common/upstream/health_checker_base_impl.cc b/source/common/upstream/health_checker_base_impl.cc index eae8e808244c..2c22a199dddf 100644 --- a/source/common/upstream/health_checker_base_impl.cc +++ b/source/common/upstream/health_checker_base_impl.cc @@ -158,7 +158,6 @@ HealthCheckerImplBase::intervalWithJitter(uint64_t base_time_ms, void HealthCheckerImplBase::addHosts(const HostVector& hosts) { for (const HostSharedPtr& host : hosts) { active_sessions_[host] = makeSession(host); - host->setActiveHealthFailureType(Host::ActiveHealthFailureType::UNKNOWN); host->setHealthChecker( HealthCheckHostMonitorPtr{new HealthCheckHostMonitorImpl(shared_from_this(), host)}); active_sessions_[host]->start(); @@ -184,15 +183,20 @@ void HealthCheckerImplBase::runCallbacks(HostSharedPtr host, HealthTransition ch } } -void HealthCheckerImplBase::HealthCheckHostMonitorImpl::setUnhealthy() { +void HealthCheckerImplBase::HealthCheckHostMonitorImpl::setUnhealthy(UnhealthyType type) { // This is called cross thread. The cluster/health checker might already be gone. std::shared_ptr health_checker = health_checker_.lock(); if (health_checker) { - health_checker->setUnhealthyCrossThread(host_.lock()); + health_checker->setUnhealthyCrossThread(host_.lock(), type); } } -void HealthCheckerImplBase::setUnhealthyCrossThread(const HostSharedPtr& host) { +void HealthCheckerImplBase::setUnhealthyCrossThread(const HostSharedPtr& host, + HealthCheckHostMonitor::UnhealthyType type) { + if (type == HealthCheckHostMonitor::UnhealthyType::ImmediateHealthCheckFail) { + host->healthFlagSet(Host::HealthFlag::EXCLUDED_VIA_IMMEDIATE_HC_FAIL); + } + // The threading here is complex. The cluster owns the only strong reference to the health // checker. It might go away when we post to the main thread from a worker thread. To deal with // this we use the following sequence of events: @@ -267,6 +271,13 @@ void HealthCheckerImplBase::ActiveHealthCheckSession::handleSuccess(bool degrade // it to healthy. This makes startup faster with a small reduction in overall reliability // depending on the HC settings. if (first_check_ || ++num_healthy_ == parent_.healthy_threshold_) { + // If the host moves to healthy, clear active HC timeout, which may be toggled off and on + // while the host is unhealthy. + host_->healthFlagClear(Host::HealthFlag::ACTIVE_HC_TIMEOUT); + // A host that was told to exclude based on immediate failure, but is now passing, should + // no longer be excluded. + host_->healthFlagClear(Host::HealthFlag::EXCLUDED_VIA_IMMEDIATE_HC_FAIL); + host_->healthFlagClear(Host::HealthFlag::FAILED_ACTIVE_HC); parent_.incHealthy(); changed_state = HealthTransition::Changed; @@ -310,6 +321,14 @@ void HealthCheckerImplBase::ActiveHealthCheckSession::handleSuccess(bool degrade interval_timer_->enableTimer(parent_.interval(HealthState::Healthy, changed_state)); } +namespace { + +bool networkHealthCheckFailureType(envoy::data::core::v3::HealthCheckFailureType type) { + return type == envoy::data::core::v3::NETWORK || type == envoy::data::core::v3::NETWORK_TIMEOUT; +} + +} // namespace + HealthTransition HealthCheckerImplBase::ActiveHealthCheckSession::setUnhealthy( envoy::data::core::v3::HealthCheckFailureType type) { // If we are unhealthy, reset the # of healthy to zero. @@ -317,8 +336,7 @@ HealthTransition HealthCheckerImplBase::ActiveHealthCheckSession::setUnhealthy( HealthTransition changed_state = HealthTransition::Unchanged; if (!host_->healthFlagGet(Host::HealthFlag::FAILED_ACTIVE_HC)) { - if (type != envoy::data::core::v3::NETWORK || - ++num_unhealthy_ == parent_.unhealthy_threshold_) { + if (!networkHealthCheckFailureType(type) || ++num_unhealthy_ == parent_.unhealthy_threshold_) { host_->healthFlagSet(Host::HealthFlag::FAILED_ACTIVE_HC); parent_.decHealthy(); changed_state = HealthTransition::Changed; @@ -330,6 +348,16 @@ HealthTransition HealthCheckerImplBase::ActiveHealthCheckSession::setUnhealthy( } } + // In the case of network timeout and if the host is currently failed, set the timeout flag. + // Otherwise clear it. This allows a host to toggle between timeout and failure if it's continuing + // to fail for different reasons. + if (type == envoy::data::core::v3::NETWORK_TIMEOUT && + host_->healthFlagGet(Host::HealthFlag::FAILED_ACTIVE_HC)) { + host_->healthFlagSet(Host::HealthFlag::ACTIVE_HC_TIMEOUT); + } else { + host_->healthFlagClear(Host::HealthFlag::ACTIVE_HC_TIMEOUT); + } + changed_state = clearPendingFlag(changed_state); if ((first_check_ || parent_.always_log_health_check_failures_) && parent_.event_logger_) { @@ -337,7 +365,7 @@ HealthTransition HealthCheckerImplBase::ActiveHealthCheckSession::setUnhealthy( } parent_.stats_.failure_.inc(); - if (type == envoy::data::core::v3::NETWORK) { + if (networkHealthCheckFailureType(type)) { parent_.stats_.network_failure_.inc(); } else if (type == envoy::data::core::v3::PASSIVE) { parent_.stats_.passive_failure_.inc(); @@ -381,7 +409,7 @@ void HealthCheckerImplBase::ActiveHealthCheckSession::onIntervalBase() { void HealthCheckerImplBase::ActiveHealthCheckSession::onTimeoutBase() { onTimeout(); - handleFailure(envoy::data::core::v3::NETWORK); + handleFailure(envoy::data::core::v3::NETWORK_TIMEOUT); } void HealthCheckerImplBase::ActiveHealthCheckSession::onInitialInterval() { diff --git a/source/common/upstream/health_checker_base_impl.h b/source/common/upstream/health_checker_base_impl.h index c1e4bb7affff..06f75218d4fa 100644 --- a/source/common/upstream/health_checker_base_impl.h +++ b/source/common/upstream/health_checker_base_impl.h @@ -137,7 +137,7 @@ class HealthCheckerImplBase : public HealthChecker, : health_checker_(health_checker), host_(host) {} // Upstream::HealthCheckHostMonitor - void setUnhealthy() override; + void setUnhealthy(UnhealthyType type) override; std::weak_ptr health_checker_; std::weak_ptr host_; @@ -154,7 +154,8 @@ class HealthCheckerImplBase : public HealthChecker, std::chrono::milliseconds interval_jitter) const; void onClusterMemberUpdate(const HostVector& hosts_added, const HostVector& hosts_removed); void runCallbacks(HostSharedPtr host, HealthTransition changed_state); - void setUnhealthyCrossThread(const HostSharedPtr& host); + void setUnhealthyCrossThread(const HostSharedPtr& host, + HealthCheckHostMonitor::UnhealthyType type); static std::shared_ptr initTransportSocketOptions(const envoy::config::core::v3::HealthCheck& config); static MetadataConstSharedPtr diff --git a/source/common/upstream/health_checker_impl.cc b/source/common/upstream/health_checker_impl.cc index be41d9064d16..5f6093811425 100644 --- a/source/common/upstream/health_checker_impl.cc +++ b/source/common/upstream/health_checker_impl.cc @@ -334,11 +334,20 @@ void HttpHealthCheckerImpl::HttpActiveHealthCheckSession::onGoAway( HttpHealthCheckerImpl::HttpActiveHealthCheckSession::HealthCheckResult HttpHealthCheckerImpl::HttpActiveHealthCheckSession::healthCheckResult() { - uint64_t response_code = Http::Utility::getResponseStatus(*response_headers_); + const uint64_t response_code = Http::Utility::getResponseStatus(*response_headers_); ENVOY_CONN_LOG(debug, "hc response={} health_flags={}", *client_, response_code, HostUtility::healthFlagsToString(*host_)); if (!parent_.http_status_checker_.inRange(response_code)) { + // If the HTTP response code would indicate failure AND the immediate health check + // failure header is set, exclude the host from LB. + // TODO(mattklein123): We could consider doing this check for any HTTP response code, but this + // seems like the least surprising behavior and we could consider relaxing this in the future. + // TODO(mattklein123): This will not force a host set rebuild of the host was already failed. + // This is something we could do in the future but seems unnecessary right now. + if (response_headers_->EnvoyImmediateHealthCheckFail() != nullptr) { + host_->healthFlagSet(Host::HealthFlag::EXCLUDED_VIA_IMMEDIATE_HC_FAIL); + } return HealthCheckResult::Failed; } @@ -372,7 +381,6 @@ void HttpHealthCheckerImpl::HttpActiveHealthCheckSession::onResponseComplete() { handleSuccess(true); break; case HealthCheckResult::Failed: - host_->setActiveHealthFailureType(Host::ActiveHealthFailureType::UNHEALTHY); handleFailure(envoy::data::core::v3::ACTIVE); break; } @@ -401,7 +409,6 @@ bool HttpHealthCheckerImpl::HttpActiveHealthCheckSession::shouldClose() const { void HttpHealthCheckerImpl::HttpActiveHealthCheckSession::onTimeout() { request_in_flight_ = false; if (client_) { - host_->setActiveHealthFailureType(Host::ActiveHealthFailureType::TIMEOUT); ENVOY_CONN_LOG(debug, "connection/stream timeout health_flags={}", *client_, HostUtility::healthFlagsToString(*host_)); @@ -489,6 +496,9 @@ void TcpHealthCheckerImpl::TcpActiveHealthCheckSession::onDeferredDelete() { void TcpHealthCheckerImpl::TcpActiveHealthCheckSession::onData(Buffer::Instance& data) { ENVOY_CONN_LOG(trace, "total pending buffer={}", *client_, data.length()); + // TODO(lilika): The TCP health checker does generic pattern matching so we can't differentiate + // between wrong data and not enough data. We could likely do better here and figure out cases in + // which a match is not possible but that is not done now. if (TcpHealthCheckMatcher::match(parent_.receive_bytes_, data)) { ENVOY_CONN_LOG(trace, "healthcheck passed", *client_); data.drain(data.length()); @@ -497,8 +507,6 @@ void TcpHealthCheckerImpl::TcpActiveHealthCheckSession::onData(Buffer::Instance& expect_close_ = true; client_->close(Network::ConnectionCloseType::NoFlush); } - } else { - host_->setActiveHealthFailureType(Host::ActiveHealthFailureType::UNHEALTHY); } } @@ -563,7 +571,6 @@ void TcpHealthCheckerImpl::TcpActiveHealthCheckSession::onInterval() { void TcpHealthCheckerImpl::TcpActiveHealthCheckSession::onTimeout() { expect_close_ = true; - host_->setActiveHealthFailureType(Host::ActiveHealthFailureType::TIMEOUT); client_->close(Network::ConnectionCloseType::NoFlush); } diff --git a/source/common/upstream/health_discovery_service.cc b/source/common/upstream/health_discovery_service.cc index 7b7c94ad24fe..3b91c16ca24e 100644 --- a/source/common/upstream/health_discovery_service.cc +++ b/source/common/upstream/health_discovery_service.cc @@ -128,17 +128,10 @@ envoy::service::health::v3::HealthCheckRequestOrEndpointHealthResponse HdsDelega if (host->health() == Host::Health::Healthy) { endpoint->set_health_status(envoy::config::core::v3::HEALTHY); } else { - switch (host->getActiveHealthFailureType()) { - case Host::ActiveHealthFailureType::TIMEOUT: + if (host->healthFlagGet(Host::HealthFlag::ACTIVE_HC_TIMEOUT)) { endpoint->set_health_status(envoy::config::core::v3::TIMEOUT); - break; - case Host::ActiveHealthFailureType::UNHEALTHY: - case Host::ActiveHealthFailureType::UNKNOWN: + } else { endpoint->set_health_status(envoy::config::core::v3::UNHEALTHY); - break; - default: - NOT_REACHED_GCOVR_EXCL_LINE; - break; } } diff --git a/source/common/upstream/host_utility.cc b/source/common/upstream/host_utility.cc index 969dbf1ad332..10df4ae94029 100644 --- a/source/common/upstream/host_utility.cc +++ b/source/common/upstream/host_utility.cc @@ -56,6 +56,20 @@ void setHealthFlag(Upstream::Host::HealthFlag flag, const Host& host, std::strin } break; } + + case Host::HealthFlag::EXCLUDED_VIA_IMMEDIATE_HC_FAIL: { + if (host.healthFlagGet(Host::HealthFlag::EXCLUDED_VIA_IMMEDIATE_HC_FAIL)) { + health_status += "/excluded_via_immediate_hc_fail"; + } + break; + } + + case Host::HealthFlag::ACTIVE_HC_TIMEOUT: { + if (host.healthFlagGet(Host::HealthFlag::ACTIVE_HC_TIMEOUT)) { + health_status += "/active_hc_timeout"; + } + break; + } } } diff --git a/source/common/upstream/upstream_impl.cc b/source/common/upstream/upstream_impl.cc index c4badf0f937a..436debbaaebf 100644 --- a/source/common/upstream/upstream_impl.cc +++ b/source/common/upstream/upstream_impl.cc @@ -956,6 +956,17 @@ ClusterImplBase::ClusterImplBase( }); } +namespace { + +bool excludeBasedOnHealthFlag(const Host& host) { + return host.healthFlagGet(Host::HealthFlag::PENDING_ACTIVE_HC) || + (host.healthFlagGet(Host::HealthFlag::EXCLUDED_VIA_IMMEDIATE_HC_FAIL) && + Runtime::runtimeFeatureEnabled( + "envoy.reloadable_features.health_check.immediate_failure_exclude_from_cluster")); +} + +} // namespace + std::tuple ClusterImplBase::partitionHostList(const HostVector& hosts) { @@ -970,7 +981,7 @@ ClusterImplBase::partitionHostList(const HostVector& hosts) { if (host->health() == Host::Health::Degraded) { degraded_list->get().emplace_back(host); } - if (host->healthFlagGet(Host::HealthFlag::PENDING_ACTIVE_HC)) { + if (excludeBasedOnHealthFlag(*host)) { excluded_list->get().emplace_back(host); } } @@ -981,10 +992,10 @@ ClusterImplBase::partitionHostList(const HostVector& hosts) { std::tuple ClusterImplBase::partitionHostsPerLocality(const HostsPerLocality& hosts) { - auto filtered_clones = hosts.filter( - {[](const Host& host) { return host.health() == Host::Health::Healthy; }, - [](const Host& host) { return host.health() == Host::Health::Degraded; }, - [](const Host& host) { return host.healthFlagGet(Host::HealthFlag::PENDING_ACTIVE_HC); }}); + auto filtered_clones = + hosts.filter({[](const Host& host) { return host.health() == Host::Health::Healthy; }, + [](const Host& host) { return host.health() == Host::Health::Degraded; }, + [](const Host& host) { return excludeBasedOnHealthFlag(host); }}); return std::make_tuple(std::move(filtered_clones[0]), std::move(filtered_clones[1]), std::move(filtered_clones[2])); @@ -1393,13 +1404,11 @@ bool BaseDynamicClusterImpl::updateDynamicHostList( // Did hosts change? // - // Has the EDS health status changed the health of any endpoint? If so, we + // Have host attributes changed the health of any endpoint? If so, we // rebuild the hosts vectors. We only do this if the health status of an // endpoint has materially changed (e.g. if previously failing active health // checks, we just note it's now failing EDS health status but don't rebuild). // - // Likewise, if metadata for an endpoint changed we rebuild the hosts vectors. - // // TODO(htuch): We can be smarter about this potentially, and not force a full // host set update on health status change. The way this would work is to // implement a HealthChecker subclass that provides thread local health diff --git a/source/common/upstream/upstream_impl.h b/source/common/upstream/upstream_impl.h index 1c9fb7fa9961..6addfd626b99 100644 --- a/source/common/upstream/upstream_impl.h +++ b/source/common/upstream/upstream_impl.h @@ -69,7 +69,7 @@ namespace Upstream { class HealthCheckHostMonitorNullImpl : public HealthCheckHostMonitor { public: // Upstream::HealthCheckHostMonitor - void setUnhealthy() override {} + void setUnhealthy(UnhealthyType) override {} }; /** @@ -204,13 +204,6 @@ class HostImpl : public HostDescriptionImpl, bool healthFlagGet(HealthFlag flag) const override { return health_flags_ & enumToInt(flag); } void healthFlagSet(HealthFlag flag) final { health_flags_ |= enumToInt(flag); } - ActiveHealthFailureType getActiveHealthFailureType() const override { - return active_health_failure_type_; - } - void setActiveHealthFailureType(ActiveHealthFailureType type) override { - active_health_failure_type_ = type; - } - void setHealthChecker(HealthCheckHostMonitorPtr&& health_checker) override { health_checker_ = std::move(health_checker); } @@ -253,7 +246,6 @@ class HostImpl : public HostDescriptionImpl, void setEdsHealthFlag(envoy::config::core::v3::HealthStatus health_status); std::atomic health_flags_{}; - ActiveHealthFailureType active_health_failure_type_{}; std::atomic weight_; std::atomic used_; }; diff --git a/source/extensions/filters/http/health_check/health_check.cc b/source/extensions/filters/http/health_check/health_check.cc index ed1fe45cb240..65713a721c1c 100644 --- a/source/extensions/filters/http/health_check/health_check.cc +++ b/source/extensions/filters/http/health_check/health_check.cc @@ -104,7 +104,11 @@ Http::FilterHeadersStatus HealthCheckFilter::encodeHeaders(Http::ResponseHeaderM } headers.setEnvoyUpstreamHealthCheckedCluster(context_.localInfo().clusterName()); - } else if (context_.healthCheckFailed()) { + } + + if (context_.healthCheckFailed() && + Runtime::runtimeFeatureEnabled( + "envoy.reloadable_features.health_check.immediate_failure_exclude_from_cluster")) { headers.setReferenceEnvoyImmediateHealthCheckFail( Http::Headers::get().EnvoyImmediateHealthCheckFailValues.True); } diff --git a/source/server/admin/clusters_handler.cc b/source/server/admin/clusters_handler.cc index 94c5f820b3ea..e5ef9ef29205 100644 --- a/source/server/admin/clusters_handler.cc +++ b/source/server/admin/clusters_handler.cc @@ -94,6 +94,14 @@ void setHealthFlag(Upstream::Host::HealthFlag flag, const Upstream::Host& host, health_status.set_pending_active_hc( host.healthFlagGet(Upstream::Host::HealthFlag::PENDING_ACTIVE_HC)); break; + case Upstream::Host::HealthFlag::EXCLUDED_VIA_IMMEDIATE_HC_FAIL: + health_status.set_excluded_via_immediate_hc_fail( + host.healthFlagGet(Upstream::Host::HealthFlag::EXCLUDED_VIA_IMMEDIATE_HC_FAIL)); + break; + case Upstream::Host::HealthFlag::ACTIVE_HC_TIMEOUT: + health_status.set_active_hc_timeout( + host.healthFlagGet(Upstream::Host::HealthFlag::ACTIVE_HC_TIMEOUT)); + break; } } diff --git a/test/common/router/router_test.cc b/test/common/router/router_test.cc index 719eafcc1b26..7e4c73094e62 100644 --- a/test/common/router/router_test.cc +++ b/test/common/router/router_test.cc @@ -822,7 +822,8 @@ TEST_F(RouterTest, EnvoyAttemptCountInRequestUpdatedInRetries) { // Normal response. EXPECT_CALL(*router_.retry_state_, shouldRetryHeaders(_, _)).WillOnce(Return(RetryStatus::No)); - EXPECT_CALL(cm_.thread_local_cluster_.conn_pool_.host_->health_checker_, setUnhealthy()).Times(0); + EXPECT_CALL(cm_.thread_local_cluster_.conn_pool_.host_->health_checker_, setUnhealthy(_)) + .Times(0); Http::ResponseHeaderMapPtr response_headers2( new Http::TestResponseHeaderMapImpl{{":status", "200"}}); EXPECT_CALL(cm_.thread_local_cluster_.conn_pool_.host_->outlier_detector_, @@ -944,7 +945,8 @@ TEST_F(RouterTest, EnvoyAttemptCountInResponseWithRetries) { // Normal response. EXPECT_CALL(*router_.retry_state_, shouldRetryHeaders(_, _)).WillOnce(Return(RetryStatus::No)); - EXPECT_CALL(cm_.thread_local_cluster_.conn_pool_.host_->health_checker_, setUnhealthy()).Times(0); + EXPECT_CALL(cm_.thread_local_cluster_.conn_pool_.host_->health_checker_, setUnhealthy(_)) + .Times(0); Http::ResponseHeaderMapPtr response_headers2( new Http::TestResponseHeaderMapImpl{{":status", "200"}}); EXPECT_CALL(cm_.thread_local_cluster_.conn_pool_.host_->outlier_detector_, @@ -1076,7 +1078,8 @@ TEST_F(RouterTest, NoRetriesOverflow) { EXPECT_CALL(callbacks_.stream_info_, setResponseFlag(StreamInfo::ResponseFlag::UpstreamOverflow)); EXPECT_CALL(*router_.retry_state_, shouldRetryHeaders(_, _)) .WillOnce(Return(RetryStatus::NoOverflow)); - EXPECT_CALL(cm_.thread_local_cluster_.conn_pool_.host_->health_checker_, setUnhealthy()).Times(0); + EXPECT_CALL(cm_.thread_local_cluster_.conn_pool_.host_->health_checker_, setUnhealthy(_)) + .Times(0); Http::ResponseHeaderMapPtr response_headers2( new Http::TestResponseHeaderMapImpl{{":status", "503"}}); EXPECT_CALL(cm_.thread_local_cluster_.conn_pool_.host_->outlier_detector_, @@ -3583,7 +3586,8 @@ TEST_F(RouterTest, RetryUpstream5xx) { // Normal response. EXPECT_CALL(*router_.retry_state_, shouldRetryHeaders(_, _)).WillOnce(Return(RetryStatus::No)); - EXPECT_CALL(cm_.thread_local_cluster_.conn_pool_.host_->health_checker_, setUnhealthy()).Times(0); + EXPECT_CALL(cm_.thread_local_cluster_.conn_pool_.host_->health_checker_, setUnhealthy(_)) + .Times(0); Http::ResponseHeaderMapPtr response_headers2( new Http::TestResponseHeaderMapImpl{{":status", "200"}}); EXPECT_CALL(cm_.thread_local_cluster_.conn_pool_.host_->outlier_detector_, @@ -3955,7 +3959,9 @@ TEST_F(RouterTest, RetryUpstream5xxNotComplete) { EXPECT_CALL(cm_.thread_local_cluster_.conn_pool_.host_->outlier_detector_, putHttpResponseCode(200)); EXPECT_CALL(cm_.thread_local_cluster_.conn_pool_.host_->outlier_detector_, putResponseTime(_)); - EXPECT_CALL(cm_.thread_local_cluster_.conn_pool_.host_->health_checker_, setUnhealthy()); + EXPECT_CALL( + cm_.thread_local_cluster_.conn_pool_.host_->health_checker_, + setUnhealthy(Upstream::HealthCheckHostMonitor::UnhealthyType::ImmediateHealthCheckFail)); Http::ResponseHeaderMapPtr response_headers2(new Http::TestResponseHeaderMapImpl{ {":status", "200"}, {"x-envoy-immediate-health-check-fail", "true"}}); response_decoder->decodeHeaders(std::move(response_headers2), true); @@ -4104,7 +4110,8 @@ TEST_F(RouterTest, RetryRespsectsMaxHostSelectionCount) { // Normal response. EXPECT_CALL(*router_.retry_state_, shouldRetryHeaders(_, _)).WillOnce(Return(RetryStatus::No)); - EXPECT_CALL(cm_.thread_local_cluster_.conn_pool_.host_->health_checker_, setUnhealthy()).Times(0); + EXPECT_CALL(cm_.thread_local_cluster_.conn_pool_.host_->health_checker_, setUnhealthy(_)) + .Times(0); Http::ResponseHeaderMapPtr response_headers2( new Http::TestResponseHeaderMapImpl{{":status", "200"}}); EXPECT_CALL(cm_.thread_local_cluster_.conn_pool_.host_->outlier_detector_, @@ -4183,7 +4190,8 @@ TEST_F(RouterTest, RetryRespectsRetryHostPredicate) { // Normal response. EXPECT_CALL(*router_.retry_state_, shouldRetryHeaders(_, _)).WillOnce(Return(RetryStatus::No)); - EXPECT_CALL(cm_.thread_local_cluster_.conn_pool_.host_->health_checker_, setUnhealthy()).Times(0); + EXPECT_CALL(cm_.thread_local_cluster_.conn_pool_.host_->health_checker_, setUnhealthy(_)) + .Times(0); Http::ResponseHeaderMapPtr response_headers2( new Http::TestResponseHeaderMapImpl{{":status", "200"}}); EXPECT_CALL(cm_.thread_local_cluster_.conn_pool_.host_->outlier_detector_, diff --git a/test/common/upstream/health_checker_impl_test.cc b/test/common/upstream/health_checker_impl_test.cc index 56bbe8360f57..e57fe6104857 100644 --- a/test/common/upstream/health_checker_impl_test.cc +++ b/test/common/upstream/health_checker_impl_test.cc @@ -8,6 +8,7 @@ #include "envoy/config/core/v3/health_check.pb.validate.h" #include "envoy/config/endpoint/v3/endpoint_components.pb.h" #include "envoy/data/core/v3/health_check_event.pb.h" +#include "envoy/upstream/health_check_host_monitor.h" #include "common/buffer/buffer_impl.h" #include "common/buffer/zero_copy_input_stream_impl.h" @@ -600,14 +601,13 @@ class HttpHealthCheckerImplTest : public Event::TestUsingSimulatedTime, void respond(size_t index, const std::string& code, bool conn_close, bool proxy_close = false, bool body = false, bool trailers = false, const absl::optional& service_cluster = absl::optional(), - bool degraded = false) { + bool degraded = false, bool immediate_hc_fail = false) { std::unique_ptr response_headers( new Http::TestResponseHeaderMapImpl{{":status", code}}); if (degraded) { response_headers->setEnvoyDegraded(""); } - if (service_cluster) { response_headers->addCopy(Http::Headers::get().EnvoyUpstreamHealthCheckedCluster, service_cluster.value()); @@ -618,6 +618,9 @@ class HttpHealthCheckerImplTest : public Event::TestUsingSimulatedTime, if (proxy_close) { response_headers->addCopy("proxy-connection", "close"); } + if (immediate_hc_fail) { + response_headers->setEnvoyImmediateHealthCheckFail("true"); + } test_sessions_[index]->stream_response_callbacks_->decodeHeaders(std::move(response_headers), !body && !trailers); @@ -1791,11 +1794,11 @@ TEST_F(HttpHealthCheckerImplTest, HttpFail) { respond(0, "503", false); EXPECT_TRUE(cluster_->prioritySet().getMockHostSet(0)->hosts_[0]->healthFlagGet( Host::HealthFlag::FAILED_ACTIVE_HC)); + EXPECT_FALSE(cluster_->prioritySet().getMockHostSet(0)->hosts_[0]->healthFlagGet( + Host::HealthFlag::EXCLUDED_VIA_IMMEDIATE_HC_FAIL)); EXPECT_EQ(Host::Health::Unhealthy, cluster_->prioritySet().getMockHostSet(0)->hosts_[0]->health()); - EXPECT_EQ(cluster_->prioritySet().getMockHostSet(0)->hosts_[0]->getActiveHealthFailureType(), - Host::ActiveHealthFailureType::UNHEALTHY); EXPECT_CALL(*test_sessions_[0]->timeout_timer_, enableTimer(_, _)); expectStreamCreate(0); test_sessions_[0]->interval_timer_->invokeCallback(); @@ -1821,6 +1824,29 @@ TEST_F(HttpHealthCheckerImplTest, HttpFail) { EXPECT_EQ(Host::Health::Healthy, cluster_->prioritySet().getMockHostSet(0)->hosts_[0]->health()); } +TEST_F(HttpHealthCheckerImplTest, ImmediateFailure) { + setupNoServiceValidationHC(); + cluster_->prioritySet().getMockHostSet(0)->hosts_ = { + makeTestHost(cluster_->info_, "tcp://127.0.0.1:80", simTime())}; + expectSessionCreate(); + expectStreamCreate(0); + EXPECT_CALL(*test_sessions_[0]->timeout_timer_, enableTimer(_, _)); + health_checker_->start(); + + EXPECT_CALL(*this, onHostStatus(_, HealthTransition::Changed)); + EXPECT_CALL(event_logger_, logEjectUnhealthy(_, _, _)); + EXPECT_CALL(*test_sessions_[0]->interval_timer_, enableTimer(_, _)); + EXPECT_CALL(*test_sessions_[0]->timeout_timer_, disableTimer()); + EXPECT_CALL(event_logger_, logUnhealthy(_, _, _, true)); + respond(0, "503", false, false, true, false, absl::nullopt, false, true); + EXPECT_TRUE(cluster_->prioritySet().getMockHostSet(0)->hosts_[0]->healthFlagGet( + Host::HealthFlag::FAILED_ACTIVE_HC)); + EXPECT_TRUE(cluster_->prioritySet().getMockHostSet(0)->hosts_[0]->healthFlagGet( + Host::HealthFlag::EXCLUDED_VIA_IMMEDIATE_HC_FAIL)); + EXPECT_EQ(Host::Health::Unhealthy, + cluster_->prioritySet().getMockHostSet(0)->hosts_[0]->health()); +} + TEST_F(HttpHealthCheckerImplTest, HttpFailLogError) { setupNoServiceValidationHCAlwaysLogFailure(); cluster_->prioritySet().getMockHostSet(0)->hosts_ = { @@ -1841,8 +1867,6 @@ TEST_F(HttpHealthCheckerImplTest, HttpFailLogError) { EXPECT_EQ(Host::Health::Unhealthy, cluster_->prioritySet().getMockHostSet(0)->hosts_[0]->health()); - EXPECT_EQ(cluster_->prioritySet().getMockHostSet(0)->hosts_[0]->getActiveHealthFailureType(), - Host::ActiveHealthFailureType::UNHEALTHY); EXPECT_CALL(*test_sessions_[0]->timeout_timer_, enableTimer(_, _)); expectStreamCreate(0); test_sessions_[0]->interval_timer_->invokeCallback(); @@ -1858,8 +1882,6 @@ TEST_F(HttpHealthCheckerImplTest, HttpFailLogError) { EXPECT_EQ(Host::Health::Unhealthy, cluster_->prioritySet().getMockHostSet(0)->hosts_[0]->health()); - EXPECT_EQ(cluster_->prioritySet().getMockHostSet(0)->hosts_[0]->getActiveHealthFailureType(), - Host::ActiveHealthFailureType::UNHEALTHY); EXPECT_CALL(*test_sessions_[0]->timeout_timer_, enableTimer(_, _)); expectStreamCreate(0); test_sessions_[0]->interval_timer_->invokeCallback(); @@ -1937,9 +1959,8 @@ TEST_F(HttpHealthCheckerImplTest, Timeout) { test_sessions_[0]->timeout_timer_->invokeCallback(); EXPECT_EQ(Host::Health::Unhealthy, cluster_->prioritySet().getMockHostSet(0)->hosts_[0]->health()); - - EXPECT_EQ(cluster_->prioritySet().getMockHostSet(0)->hosts_[0]->getActiveHealthFailureType(), - Host::ActiveHealthFailureType::TIMEOUT); + EXPECT_TRUE(cluster_->prioritySet().getMockHostSet(0)->hosts_[0]->healthFlagGet( + Host::HealthFlag::ACTIVE_HC_TIMEOUT)); } // Make sure that a timeout during a partial response works correctly. @@ -2009,9 +2030,8 @@ TEST_F(HttpHealthCheckerImplTest, TimeoutThenRemoteClose) { Host::HealthFlag::FAILED_ACTIVE_HC)); EXPECT_EQ(Host::Health::Unhealthy, cluster_->prioritySet().getMockHostSet(0)->hosts_[0]->health()); - - EXPECT_EQ(cluster_->prioritySet().getMockHostSet(0)->hosts_[0]->getActiveHealthFailureType(), - Host::ActiveHealthFailureType::TIMEOUT); + EXPECT_FALSE(cluster_->prioritySet().getMockHostSet(0)->hosts_[0]->healthFlagGet( + Host::HealthFlag::ACTIVE_HC_TIMEOUT)); } TEST_F(HttpHealthCheckerImplTest, TimeoutAfterDisconnect) { @@ -2228,13 +2248,15 @@ TEST_F(HttpHealthCheckerImplTest, HealthCheckIntervals) { expectStreamCreate(0); test_sessions_[0]->interval_timer_->invokeCallback(); - // First failed check after a run o successful ones should respect unhealthy_edge_interval. A + // First failed check after a run of successful ones should respect unhealthy_edge_interval. A // timeout, being a network type failure, should respect unhealthy threshold before changing the // health state. EXPECT_CALL(*this, onHostStatus(_, HealthTransition::ChangePending)); EXPECT_CALL(*test_sessions_[0]->interval_timer_, enableTimer(std::chrono::milliseconds(3000), _)); EXPECT_CALL(*test_sessions_[0]->timeout_timer_, disableTimer()); test_sessions_[0]->timeout_timer_->invokeCallback(); + EXPECT_FALSE(cluster_->prioritySet().getMockHostSet(0)->hosts_[0]->healthFlagGet( + Host::HealthFlag::ACTIVE_HC_TIMEOUT)); EXPECT_CALL(*test_sessions_[0]->timeout_timer_, enableTimer(_, _)); // Needed after a network timeout. @@ -2247,6 +2269,8 @@ TEST_F(HttpHealthCheckerImplTest, HealthCheckIntervals) { EXPECT_CALL(*test_sessions_[0]->interval_timer_, enableTimer(std::chrono::milliseconds(3000), _)); EXPECT_CALL(*test_sessions_[0]->timeout_timer_, disableTimer()); test_sessions_[0]->timeout_timer_->invokeCallback(); + EXPECT_FALSE(cluster_->prioritySet().getMockHostSet(0)->hosts_[0]->healthFlagGet( + Host::HealthFlag::ACTIVE_HC_TIMEOUT)); EXPECT_CALL(*test_sessions_[0]->timeout_timer_, enableTimer(_, _)); // Needed after a network timeout. @@ -2262,6 +2286,8 @@ TEST_F(HttpHealthCheckerImplTest, HealthCheckIntervals) { EXPECT_CALL(*test_sessions_[0]->interval_timer_, enableTimer(std::chrono::milliseconds(2000), _)); EXPECT_CALL(*test_sessions_[0]->timeout_timer_, disableTimer()); test_sessions_[0]->timeout_timer_->invokeCallback(); + EXPECT_TRUE(cluster_->prioritySet().getMockHostSet(0)->hosts_[0]->healthFlagGet( + Host::HealthFlag::ACTIVE_HC_TIMEOUT)); EXPECT_CALL(*test_sessions_[0]->timeout_timer_, enableTimer(_, _)); // Needed after a network timeout. @@ -2288,6 +2314,8 @@ TEST_F(HttpHealthCheckerImplTest, HealthCheckIntervals) { EXPECT_CALL(*test_sessions_[0]->interval_timer_, enableTimer(std::chrono::milliseconds(4000), _)); EXPECT_CALL(*test_sessions_[0]->timeout_timer_, disableTimer()); respond(0, "200", false); + EXPECT_TRUE(cluster_->prioritySet().getMockHostSet(0)->hosts_[0]->healthFlagGet( + Host::HealthFlag::ACTIVE_HC_TIMEOUT)); EXPECT_CALL(*test_sessions_[0]->timeout_timer_, enableTimer(_, _)); // Needed after a response is sent. @@ -2298,6 +2326,8 @@ TEST_F(HttpHealthCheckerImplTest, HealthCheckIntervals) { EXPECT_CALL(*test_sessions_[0]->interval_timer_, enableTimer(std::chrono::milliseconds(4000), _)); EXPECT_CALL(*test_sessions_[0]->timeout_timer_, disableTimer()); respond(0, "200", false); + EXPECT_TRUE(cluster_->prioritySet().getMockHostSet(0)->hosts_[0]->healthFlagGet( + Host::HealthFlag::ACTIVE_HC_TIMEOUT)); EXPECT_CALL(*test_sessions_[0]->timeout_timer_, enableTimer(_, _)); // Needed after a response is sent. @@ -2311,6 +2341,8 @@ TEST_F(HttpHealthCheckerImplTest, HealthCheckIntervals) { EXPECT_CALL(*test_sessions_[0]->interval_timer_, enableTimer(std::chrono::milliseconds(1000), _)); EXPECT_CALL(*test_sessions_[0]->timeout_timer_, disableTimer()); respond(0, "200", false); + EXPECT_FALSE(cluster_->prioritySet().getMockHostSet(0)->hosts_[0]->healthFlagGet( + Host::HealthFlag::ACTIVE_HC_TIMEOUT)); EXPECT_CALL(*test_sessions_[0]->timeout_timer_, enableTimer(_, _)); // Needed after a response is sent. @@ -3481,10 +3513,10 @@ TEST_F(TcpHealthCheckerImplTest, WrongData) { // These are the expected metric results after testing. EXPECT_EQ(0UL, cluster_->info_->stats_store_.counter("health_check.success").value()); - // TODO(lilika): This should indicate a failure + // TODO(lilika): The TCP health checker does generic pattern matching so we can't differentiate + // between wrong data and not enough data. We could likely do better here and figure out cases in + // which a match is not possible but that is not done now. EXPECT_EQ(0UL, cluster_->info_->stats_store_.counter("health_check.failure").value()); - EXPECT_EQ(cluster_->prioritySet().getMockHostSet(0)->hosts_[0]->getActiveHealthFailureType(), - Host::ActiveHealthFailureType::UNHEALTHY); } TEST_F(TcpHealthCheckerImplTest, TimeoutThenRemoteClose) { @@ -3514,8 +3546,6 @@ TEST_F(TcpHealthCheckerImplTest, TimeoutThenRemoteClose) { EXPECT_CALL(*timeout_timer_, disableTimer()); EXPECT_CALL(*interval_timer_, enableTimer(_, _)); timeout_timer_->invokeCallback(); - EXPECT_EQ(cluster_->prioritySet().getMockHostSet(0)->hosts_[0]->getActiveHealthFailureType(), - Host::ActiveHealthFailureType::TIMEOUT); EXPECT_EQ(Host::Health::Healthy, cluster_->prioritySet().getMockHostSet(0)->hosts_[0]->health()); expectClientCreate(); @@ -3575,8 +3605,8 @@ TEST_F(TcpHealthCheckerImplTest, Timeout) { EXPECT_CALL(*timeout_timer_, disableTimer()); EXPECT_CALL(*interval_timer_, enableTimer(_, _)); timeout_timer_->invokeCallback(); - EXPECT_EQ(cluster_->prioritySet().getMockHostSet(0)->hosts_[0]->getActiveHealthFailureType(), - Host::ActiveHealthFailureType::TIMEOUT); + EXPECT_TRUE(cluster_->prioritySet().getMockHostSet(0)->hosts_[0]->healthFlagGet( + Host::HealthFlag::ACTIVE_HC_TIMEOUT)); EXPECT_EQ(Host::Health::Unhealthy, cluster_->prioritySet().getMockHostSet(0)->hosts_[0]->health()); } @@ -3608,8 +3638,8 @@ TEST_F(TcpHealthCheckerImplTest, DoubleTimeout) { EXPECT_CALL(*timeout_timer_, disableTimer()); EXPECT_CALL(*interval_timer_, enableTimer(_, _)); timeout_timer_->invokeCallback(); - EXPECT_EQ(cluster_->prioritySet().getMockHostSet(0)->hosts_[0]->getActiveHealthFailureType(), - Host::ActiveHealthFailureType::TIMEOUT); + EXPECT_FALSE(cluster_->prioritySet().getMockHostSet(0)->hosts_[0]->healthFlagGet( + Host::HealthFlag::ACTIVE_HC_TIMEOUT)); EXPECT_EQ(Host::Health::Healthy, cluster_->prioritySet().getMockHostSet(0)->hosts_[0]->health()); expectClientCreate(); @@ -3624,8 +3654,8 @@ TEST_F(TcpHealthCheckerImplTest, DoubleTimeout) { EXPECT_CALL(*timeout_timer_, disableTimer()); EXPECT_CALL(*interval_timer_, enableTimer(_, _)); timeout_timer_->invokeCallback(); - EXPECT_EQ(cluster_->prioritySet().getMockHostSet(0)->hosts_[0]->getActiveHealthFailureType(), - Host::ActiveHealthFailureType::TIMEOUT); + EXPECT_TRUE(cluster_->prioritySet().getMockHostSet(0)->hosts_[0]->healthFlagGet( + Host::HealthFlag::ACTIVE_HC_TIMEOUT)); EXPECT_EQ(Host::Health::Unhealthy, cluster_->prioritySet().getMockHostSet(0)->hosts_[0]->health()); @@ -3751,10 +3781,14 @@ TEST_F(TcpHealthCheckerImplTest, PassiveFailure) { health_checker_->start(); // Do multiple passive failures. This will not reset the active HC timers. - cluster_->prioritySet().getMockHostSet(0)->hosts_[0]->healthChecker().setUnhealthy(); - cluster_->prioritySet().getMockHostSet(0)->hosts_[0]->healthChecker().setUnhealthy(); + cluster_->prioritySet().getMockHostSet(0)->hosts_[0]->healthChecker().setUnhealthy( + HealthCheckHostMonitor::UnhealthyType::ImmediateHealthCheckFail); + cluster_->prioritySet().getMockHostSet(0)->hosts_[0]->healthChecker().setUnhealthy( + HealthCheckHostMonitor::UnhealthyType::ImmediateHealthCheckFail); EXPECT_TRUE(cluster_->prioritySet().getMockHostSet(0)->hosts_[0]->healthFlagGet( Host::HealthFlag::FAILED_ACTIVE_HC)); + EXPECT_TRUE(cluster_->prioritySet().getMockHostSet(0)->hosts_[0]->healthFlagGet( + Host::HealthFlag::EXCLUDED_VIA_IMMEDIATE_HC_FAIL)); EXPECT_EQ(Host::Health::Unhealthy, cluster_->prioritySet().getMockHostSet(0)->hosts_[0]->health()); @@ -3765,11 +3799,29 @@ TEST_F(TcpHealthCheckerImplTest, PassiveFailure) { connection_->raiseEvent(Network::ConnectionEvent::Connected); EXPECT_TRUE(cluster_->prioritySet().getMockHostSet(0)->hosts_[0]->healthFlagGet( Host::HealthFlag::FAILED_ACTIVE_HC)); + EXPECT_TRUE(cluster_->prioritySet().getMockHostSet(0)->hosts_[0]->healthFlagGet( + Host::HealthFlag::EXCLUDED_VIA_IMMEDIATE_HC_FAIL)); EXPECT_EQ(Host::Health::Unhealthy, cluster_->prioritySet().getMockHostSet(0)->hosts_[0]->health()); - EXPECT_EQ(1UL, cluster_->info_->stats_store_.counter("health_check.attempt").value()); - EXPECT_EQ(1UL, cluster_->info_->stats_store_.counter("health_check.success").value()); + // Bring back to healthy and check flag clearing. + expectClientCreate(); + EXPECT_CALL(*timeout_timer_, enableTimer(_, _)); + interval_timer_->invokeCallback(); + + EXPECT_CALL(*connection_, close(_)); + EXPECT_CALL(event_logger_, logAddHealthy(_, _, false)); + EXPECT_CALL(*timeout_timer_, disableTimer()); + EXPECT_CALL(*interval_timer_, enableTimer(_, _)); + connection_->raiseEvent(Network::ConnectionEvent::Connected); + EXPECT_FALSE(cluster_->prioritySet().getMockHostSet(0)->hosts_[0]->healthFlagGet( + Host::HealthFlag::FAILED_ACTIVE_HC)); + EXPECT_FALSE(cluster_->prioritySet().getMockHostSet(0)->hosts_[0]->healthFlagGet( + Host::HealthFlag::EXCLUDED_VIA_IMMEDIATE_HC_FAIL)); + EXPECT_EQ(Host::Health::Healthy, cluster_->prioritySet().getMockHostSet(0)->hosts_[0]->health()); + + EXPECT_EQ(2UL, cluster_->info_->stats_store_.counter("health_check.attempt").value()); + EXPECT_EQ(2UL, cluster_->info_->stats_store_.counter("health_check.success").value()); EXPECT_EQ(2UL, cluster_->info_->stats_store_.counter("health_check.failure").value()); EXPECT_EQ(2UL, cluster_->info_->stats_store_.counter("health_check.passive_failure").value()); } @@ -3789,7 +3841,8 @@ TEST_F(TcpHealthCheckerImplTest, PassiveFailureCrossThreadRemoveHostRace) { // Do a passive failure. This will not reset the active HC timers. Event::PostCb post_cb; EXPECT_CALL(dispatcher_, post(_)).WillOnce(SaveArg<0>(&post_cb)); - cluster_->prioritySet().getMockHostSet(0)->hosts_[0]->healthChecker().setUnhealthy(); + cluster_->prioritySet().getMockHostSet(0)->hosts_[0]->healthChecker().setUnhealthy( + HealthCheckHostMonitor::UnhealthyType::ImmediateHealthCheckFail); // Remove before the cross thread event comes in. EXPECT_CALL(*connection_, close(_)); @@ -3818,7 +3871,8 @@ TEST_F(TcpHealthCheckerImplTest, PassiveFailureCrossThreadRemoveClusterRace) { // Do a passive failure. This will not reset the active HC timers. Event::PostCb post_cb; EXPECT_CALL(dispatcher_, post(_)).WillOnce(SaveArg<0>(&post_cb)); - cluster_->prioritySet().getMockHostSet(0)->hosts_[0]->healthChecker().setUnhealthy(); + cluster_->prioritySet().getMockHostSet(0)->hosts_[0]->healthChecker().setUnhealthy( + HealthCheckHostMonitor::UnhealthyType::ImmediateHealthCheckFail); // Remove before the cross thread event comes in. EXPECT_CALL(*connection_, close(_)); diff --git a/test/common/upstream/host_utility_test.cc b/test/common/upstream/host_utility_test.cc index ba0436d5eb92..3e4e81d7bbb1 100644 --- a/test/common/upstream/host_utility_test.cc +++ b/test/common/upstream/host_utility_test.cc @@ -44,7 +44,8 @@ TEST(HostUtilityTest, All) { HEALTH_FLAG_ENUM_VALUES(SET_HEALTH_FLAG) #undef SET_HEALTH_FLAG EXPECT_EQ("/failed_active_hc/failed_outlier_check/failed_eds_health/degraded_active_hc/" - "degraded_eds_health/pending_dynamic_removal/pending_active_hc", + "degraded_eds_health/pending_dynamic_removal/pending_active_hc/" + "excluded_via_immediate_hc_fail/active_hc_timeout", HostUtility::healthFlagsToString(*host)); } diff --git a/test/common/upstream/logical_dns_cluster_test.cc b/test/common/upstream/logical_dns_cluster_test.cc index db1511775067..caef2da1956b 100644 --- a/test/common/upstream/logical_dns_cluster_test.cc +++ b/test/common/upstream/logical_dns_cluster_test.cc @@ -7,6 +7,7 @@ #include "envoy/config/cluster/v3/cluster.pb.h" #include "envoy/config/core/v3/base.pb.h" #include "envoy/stats/scope.h" +#include "envoy/upstream/health_check_host_monitor.h" #include "common/network/utility.h" #include "common/singleton/manager_impl.h" @@ -137,7 +138,8 @@ class LogicalDnsClusterTest : public Event::TestUsingSimulatedTime, public testi EXPECT_TRUE(TestUtility::protoEqual(envoy::config::core::v3::Metadata::default_instance(), *data.host_description_->metadata())); data.host_description_->outlierDetector().putHttpResponseCode(200); - data.host_description_->healthChecker().setUnhealthy(); + data.host_description_->healthChecker().setUnhealthy( + HealthCheckHostMonitor::UnhealthyType::ImmediateHealthCheckFail); expectResolve(Network::DnsLookupFamily::V4Only, expected_address); resolve_timer_->invokeCallback(); @@ -290,7 +292,8 @@ TEST_P(LogicalDnsParamTest, ImmediateResolve) { EXPECT_EQ(1UL, cluster_->prioritySet().hostSetsPerPriority()[0]->healthyHosts().size()); EXPECT_EQ("foo.bar.com", cluster_->prioritySet().hostSetsPerPriority()[0]->hosts()[0]->hostname()); - cluster_->prioritySet().hostSetsPerPriority()[0]->hosts()[0]->healthChecker().setUnhealthy(); + cluster_->prioritySet().hostSetsPerPriority()[0]->hosts()[0]->healthChecker().setUnhealthy( + HealthCheckHostMonitor::UnhealthyType::ImmediateHealthCheckFail); tls_.shutdownThread(); } @@ -399,7 +402,7 @@ TEST_F(LogicalDnsClusterTest, BadConfig) { address: socket_address: address: foo.bar.com - port_value: 443 + port_value: 443 - endpoint: address: socket_address: diff --git a/test/common/upstream/upstream_impl_test.cc b/test/common/upstream/upstream_impl_test.cc index 7d031bd37506..a45d21b7c88d 100644 --- a/test/common/upstream/upstream_impl_test.cc +++ b/test/common/upstream/upstream_impl_test.cc @@ -13,6 +13,7 @@ #include "envoy/http/codec.h" #include "envoy/stats/scope.h" #include "envoy/upstream/cluster_manager.h" +#include "envoy/upstream/health_check_host_monitor.h" #include "envoy/upstream/upstream.h" #include "common/config/metadata.h" @@ -320,7 +321,7 @@ TEST_F(StrictDnsClusterImplTest, Basic) { address: socket_address: address: localhost2 - port_value: 11002 + port_value: 11002 )EOF"; envoy::config::cluster::v3::Cluster cluster_config = parseClusterFromV3Yaml(yaml); @@ -471,7 +472,7 @@ TEST_F(StrictDnsClusterImplTest, HostRemovalActiveHealthSkipped) { address: socket_address: address: foo.bar.com - port_value: 443 + port_value: 443 )EOF"; ResolverData resolver(*dns_resolver_, dispatcher_); @@ -1008,7 +1009,7 @@ TEST_F(StrictDnsClusterImplTest, CustomResolverFails) { socket_address: address: foo.bar.com port_value: 443 - resolver_name: customresolver + resolver_name: customresolver )EOF"; envoy::config::cluster::v3::Cluster cluster_config = parseClusterFromV3Yaml(yaml); @@ -1167,7 +1168,7 @@ TEST_F(StrictDnsClusterImplTest, Http2UserDefinedSettingsParametersValidation) { address: socket_address: address: localhost1 - port_value: 11001 + port_value: 11001 - endpoint: address: socket_address: @@ -1661,7 +1662,7 @@ TEST_F(StaticClusterImplTest, OutlierDetector) { address: socket_address: address: 10.0.0.1 - port_value: 11001 + port_value: 11001 - endpoint: address: socket_address: @@ -1719,7 +1720,7 @@ TEST_F(StaticClusterImplTest, HealthyStat) { address: socket_address: address: 10.0.0.1 - port_value: 11001 + port_value: 11001 - endpoint: address: socket_address: @@ -1860,7 +1861,7 @@ TEST_F(StaticClusterImplTest, UrlConfig) { address: socket_address: address: 10.0.0.1 - port_value: 11001 + port_value: 11001 - endpoint: address: socket_address: @@ -1898,7 +1899,8 @@ TEST_F(StaticClusterImplTest, UrlConfig) { EXPECT_EQ(1UL, cluster.prioritySet().hostSetsPerPriority()[0]->hostsPerLocality().get().size()); EXPECT_EQ(1UL, cluster.prioritySet().hostSetsPerPriority()[0]->healthyHostsPerLocality().get().size()); - cluster.prioritySet().hostSetsPerPriority()[0]->hosts()[0]->healthChecker().setUnhealthy(); + cluster.prioritySet().hostSetsPerPriority()[0]->hosts()[0]->healthChecker().setUnhealthy( + HealthCheckHostMonitor::UnhealthyType::ImmediateHealthCheckFail); } TEST_F(StaticClusterImplTest, UnsupportedLBType) { @@ -3350,19 +3352,77 @@ TEST(HostPartitionTest, PartitionHosts) { HostVector hosts{makeTestHost(info, "tcp://127.0.0.1:80", *time_source), makeTestHost(info, "tcp://127.0.0.1:81", *time_source), makeTestHost(info, "tcp://127.0.0.1:82", *time_source), - makeTestHost(info, "tcp://127.0.0.1:83", *time_source)}; + makeTestHost(info, "tcp://127.0.0.1:83", *time_source), + makeTestHost(info, "tcp://127.0.0.1:84", *time_source)}; hosts[0]->healthFlagSet(Host::HealthFlag::FAILED_ACTIVE_HC); hosts[1]->healthFlagSet(Host::HealthFlag::DEGRADED_ACTIVE_HC); hosts[2]->healthFlagSet(Host::HealthFlag::PENDING_ACTIVE_HC); hosts[2]->healthFlagSet(Host::HealthFlag::FAILED_ACTIVE_HC); + hosts[4]->healthFlagSet(Host::HealthFlag::EXCLUDED_VIA_IMMEDIATE_HC_FAIL); + hosts[4]->healthFlagSet(Host::HealthFlag::FAILED_ACTIVE_HC); - auto hosts_per_locality = makeHostsPerLocality({{hosts[0], hosts[1]}, {hosts[2], hosts[3]}}); + auto hosts_per_locality = + makeHostsPerLocality({{hosts[0], hosts[1]}, {hosts[2], hosts[3], hosts[4]}}); auto update_hosts_params = HostSetImpl::partitionHosts(std::make_shared(hosts), hosts_per_locality); - EXPECT_EQ(4, update_hosts_params.hosts->size()); + EXPECT_EQ(5, update_hosts_params.hosts->size()); + EXPECT_EQ(1, update_hosts_params.healthy_hosts->get().size()); + EXPECT_EQ(hosts[3], update_hosts_params.healthy_hosts->get()[0]); + EXPECT_EQ(1, update_hosts_params.degraded_hosts->get().size()); + EXPECT_EQ(hosts[1], update_hosts_params.degraded_hosts->get()[0]); + EXPECT_EQ(2, update_hosts_params.excluded_hosts->get().size()); + EXPECT_EQ(hosts[2], update_hosts_params.excluded_hosts->get()[0]); + EXPECT_EQ(hosts[4], update_hosts_params.excluded_hosts->get()[1]); + + EXPECT_EQ(2, update_hosts_params.hosts_per_locality->get()[0].size()); + EXPECT_EQ(3, update_hosts_params.hosts_per_locality->get()[1].size()); + + EXPECT_EQ(0, update_hosts_params.healthy_hosts_per_locality->get()[0].size()); + EXPECT_EQ(1, update_hosts_params.healthy_hosts_per_locality->get()[1].size()); + EXPECT_EQ(hosts[3], update_hosts_params.healthy_hosts_per_locality->get()[1][0]); + + EXPECT_EQ(1, update_hosts_params.degraded_hosts_per_locality->get()[0].size()); + EXPECT_EQ(0, update_hosts_params.degraded_hosts_per_locality->get()[1].size()); + EXPECT_EQ(hosts[1], update_hosts_params.degraded_hosts_per_locality->get()[0][0]); + + EXPECT_EQ(0, update_hosts_params.excluded_hosts_per_locality->get()[0].size()); + EXPECT_EQ(2, update_hosts_params.excluded_hosts_per_locality->get()[1].size()); + EXPECT_EQ(hosts[2], update_hosts_params.excluded_hosts_per_locality->get()[1][0]); + EXPECT_EQ(hosts[4], update_hosts_params.excluded_hosts_per_locality->get()[1][1]); +} + +// Verifies that partitionHosts correctly splits hosts based on their health flags when +// "envoy.reloadable_features.health_check.immediate_failure_exclude_from_cluster" is disabled. +TEST(HostPartitionTest, PartitionHostsImmediateFailureExcludeDisabled) { + TestScopedRuntime scoped_runtime; + Runtime::LoaderSingleton::getExisting()->mergeValues( + {{"envoy.reloadable_features.health_check.immediate_failure_exclude_from_cluster", "false"}}); + + std::shared_ptr info{new NiceMock()}; + auto time_source = std::make_unique>(); + HostVector hosts{makeTestHost(info, "tcp://127.0.0.1:80", *time_source), + makeTestHost(info, "tcp://127.0.0.1:81", *time_source), + makeTestHost(info, "tcp://127.0.0.1:82", *time_source), + makeTestHost(info, "tcp://127.0.0.1:83", *time_source), + makeTestHost(info, "tcp://127.0.0.1:84", *time_source)}; + + hosts[0]->healthFlagSet(Host::HealthFlag::FAILED_ACTIVE_HC); + hosts[1]->healthFlagSet(Host::HealthFlag::DEGRADED_ACTIVE_HC); + hosts[2]->healthFlagSet(Host::HealthFlag::PENDING_ACTIVE_HC); + hosts[2]->healthFlagSet(Host::HealthFlag::FAILED_ACTIVE_HC); + hosts[4]->healthFlagSet(Host::HealthFlag::EXCLUDED_VIA_IMMEDIATE_HC_FAIL); + hosts[4]->healthFlagSet(Host::HealthFlag::FAILED_ACTIVE_HC); + + auto hosts_per_locality = + makeHostsPerLocality({{hosts[0], hosts[1]}, {hosts[2], hosts[3], hosts[4]}}); + + auto update_hosts_params = + HostSetImpl::partitionHosts(std::make_shared(hosts), hosts_per_locality); + + EXPECT_EQ(5, update_hosts_params.hosts->size()); EXPECT_EQ(1, update_hosts_params.healthy_hosts->get().size()); EXPECT_EQ(hosts[3], update_hosts_params.healthy_hosts->get()[0]); EXPECT_EQ(1, update_hosts_params.degraded_hosts->get().size()); @@ -3371,7 +3431,7 @@ TEST(HostPartitionTest, PartitionHosts) { EXPECT_EQ(hosts[2], update_hosts_params.excluded_hosts->get()[0]); EXPECT_EQ(2, update_hosts_params.hosts_per_locality->get()[0].size()); - EXPECT_EQ(2, update_hosts_params.hosts_per_locality->get()[1].size()); + EXPECT_EQ(3, update_hosts_params.hosts_per_locality->get()[1].size()); EXPECT_EQ(0, update_hosts_params.healthy_hosts_per_locality->get()[0].size()); EXPECT_EQ(1, update_hosts_params.healthy_hosts_per_locality->get()[1].size()); @@ -3385,6 +3445,7 @@ TEST(HostPartitionTest, PartitionHosts) { EXPECT_EQ(1, update_hosts_params.excluded_hosts_per_locality->get()[1].size()); EXPECT_EQ(hosts[2], update_hosts_params.excluded_hosts_per_locality->get()[1][0]); } + } // namespace } // namespace Upstream } // namespace Envoy diff --git a/test/extensions/filters/http/health_check/BUILD b/test/extensions/filters/http/health_check/BUILD index 30beabc500f0..b2196bdcc5c8 100644 --- a/test/extensions/filters/http/health_check/BUILD +++ b/test/extensions/filters/http/health_check/BUILD @@ -20,6 +20,7 @@ envoy_extension_cc_test( "//source/common/http:header_utility_lib", "//source/extensions/filters/http/health_check:health_check_lib", "//test/mocks/server:factory_context_mocks", + "//test/test_common:test_runtime_lib", "//test/test_common:utility_lib", "@envoy_api//envoy/config/route/v3:pkg_cc_proto", ], diff --git a/test/extensions/filters/http/health_check/health_check_test.cc b/test/extensions/filters/http/health_check/health_check_test.cc index 959aeff11a82..47693910d0cd 100644 --- a/test/extensions/filters/http/health_check/health_check_test.cc +++ b/test/extensions/filters/http/health_check/health_check_test.cc @@ -12,6 +12,7 @@ #include "test/mocks/server/factory_context.h" #include "test/mocks/upstream/cluster_info.h" #include "test/test_common/printers.h" +#include "test/test_common/test_runtime.h" #include "test/test_common/utility.h" #include "gmock/gmock.h" @@ -229,7 +230,38 @@ TEST_F(HealthCheckFilterNoPassThroughTest, ComputedHealth) { } TEST_F(HealthCheckFilterNoPassThroughTest, HealthCheckFailedCallbackCalled) { - EXPECT_CALL(context_, healthCheckFailed()).WillOnce(Return(true)); + EXPECT_CALL(context_, healthCheckFailed()).Times(2).WillRepeatedly(Return(true)); + EXPECT_CALL(callbacks_.stream_info_, healthCheck(true)); + EXPECT_CALL(callbacks_.active_span_, setSampled(false)); + Http::TestResponseHeaderMapImpl health_check_response{{":status", "503"}}; + EXPECT_CALL(callbacks_, encodeHeaders_(HeaderMapEqualRef(&health_check_response), true)) + .Times(1) + .WillRepeatedly(Invoke([&](Http::ResponseHeaderMap& headers, bool end_stream) { + filter_->encodeHeaders(headers, end_stream); + EXPECT_EQ("cluster_name", headers.getEnvoyUpstreamHealthCheckedClusterValue()); + EXPECT_NE(nullptr, headers.EnvoyImmediateHealthCheckFail()); + })); + + EXPECT_CALL(callbacks_.stream_info_, + setResponseFlag(StreamInfo::ResponseFlag::FailedLocalHealthCheck)); + + EXPECT_EQ(Http::FilterHeadersStatus::StopIteration, + filter_->decodeHeaders(request_headers_, false)); + Buffer::OwnedImpl data("hello"); + EXPECT_EQ(Http::FilterDataStatus::StopIterationNoBuffer, filter_->decodeData(data, false)); + Http::TestRequestTrailerMapImpl request_trailers; + EXPECT_EQ(Http::FilterTrailersStatus::StopIteration, filter_->decodeTrailers(request_trailers)); +} + +// Verifies that header is not sent on HC requests when +// "envoy.reloadable_features.health_check.immediate_failure_exclude_from_cluster" is disabled. +TEST_F(HealthCheckFilterNoPassThroughTest, + HealthCheckFailedCallbackCalledImmediateFailureExcludeDisabled) { + TestScopedRuntime scoped_runtime; + Runtime::LoaderSingleton::getExisting()->mergeValues( + {{"envoy.reloadable_features.health_check.immediate_failure_exclude_from_cluster", "false"}}); + + EXPECT_CALL(context_, healthCheckFailed()).Times(2).WillRepeatedly(Return(true)); EXPECT_CALL(callbacks_.stream_info_, healthCheck(true)); EXPECT_CALL(callbacks_.active_span_, setSampled(false)); Http::TestResponseHeaderMapImpl health_check_response{{":status", "503"}}; @@ -253,7 +285,7 @@ TEST_F(HealthCheckFilterNoPassThroughTest, HealthCheckFailedCallbackCalled) { } TEST_F(HealthCheckFilterPassThroughTest, Ok) { - EXPECT_CALL(context_, healthCheckFailed()).WillOnce(Return(false)); + EXPECT_CALL(context_, healthCheckFailed()).Times(2).WillRepeatedly(Return(false)); EXPECT_CALL(callbacks_.stream_info_, healthCheck(true)); EXPECT_CALL(callbacks_.active_span_, setSampled(false)); EXPECT_CALL(callbacks_, encodeHeaders_(_, _)).Times(0); @@ -265,7 +297,7 @@ TEST_F(HealthCheckFilterPassThroughTest, Ok) { } TEST_F(HealthCheckFilterPassThroughTest, OkWithContinue) { - EXPECT_CALL(context_, healthCheckFailed()).WillOnce(Return(false)); + EXPECT_CALL(context_, healthCheckFailed()).Times(2).WillRepeatedly(Return(false)); EXPECT_CALL(callbacks_.stream_info_, healthCheck(true)); EXPECT_CALL(callbacks_.active_span_, setSampled(false)); EXPECT_CALL(callbacks_, encodeHeaders_(_, _)).Times(0); diff --git a/test/integration/health_check_integration_test.cc b/test/integration/health_check_integration_test.cc index f6a403805351..e280b3d2b4cf 100644 --- a/test/integration/health_check_integration_test.cc +++ b/test/integration/health_check_integration_test.cc @@ -248,6 +248,53 @@ TEST_P(HttpHealthCheckIntegrationTest, SingleEndpointUnhealthyHttp) { EXPECT_EQ(1, test_server_->counter("cluster.cluster_1.health_check.failure")->value()); } +// Verify that immediate health check fail causes cluster exclusion. +TEST_P(HttpHealthCheckIntegrationTest, SingleEndpointImmediateHealthcheckFailHttp) { + const uint32_t cluster_idx = 0; + initialize(); + initHttpHealthCheck(cluster_idx); + + EXPECT_EQ(1, test_server_->gauge("cluster.cluster_1.membership_total")->value()); + EXPECT_EQ(0, test_server_->gauge("cluster.cluster_1.membership_excluded")->value()); + EXPECT_EQ(0, test_server_->gauge("cluster.cluster_1.membership_healthy")->value()); + + // Endpoint responds to the health check with unhealthy status and immediate health check failure. + clusters_[cluster_idx].host_stream_->encodeHeaders( + Http::TestResponseHeaderMapImpl{{":status", "503"}, + {"x-envoy-immediate-health-check-fail", "true"}}, + false); + clusters_[cluster_idx].host_stream_->encodeData(1024, true); + + test_server_->waitForCounterGe("cluster.cluster_1.health_check.failure", 1); + EXPECT_EQ(0, test_server_->counter("cluster.cluster_1.health_check.success")->value()); + EXPECT_EQ(1, test_server_->counter("cluster.cluster_1.health_check.failure")->value()); + test_server_->waitForGaugeEq("cluster.cluster_1.membership_excluded", 1); + EXPECT_EQ(1, test_server_->gauge("cluster.cluster_1.membership_total")->value()); + EXPECT_EQ(0, test_server_->gauge("cluster.cluster_1.membership_healthy")->value()); + + // Advance time to cause another health check. This should remove the cluster exclusion. + timeSystem().advanceTimeWait(std::chrono::milliseconds(500)); + + ASSERT_TRUE(clusters_[cluster_idx].host_fake_connection_->waitForNewStream( + *dispatcher_, clusters_[cluster_idx].host_stream_)); + ASSERT_TRUE(clusters_[cluster_idx].host_stream_->waitForEndStream(*dispatcher_)); + + EXPECT_EQ(clusters_[cluster_idx].host_stream_->headers().getPathValue(), "/healthcheck"); + EXPECT_EQ(clusters_[cluster_idx].host_stream_->headers().getMethodValue(), "GET"); + EXPECT_EQ(clusters_[cluster_idx].host_stream_->headers().getHostValue(), + clusters_[cluster_idx].name_); + + clusters_[cluster_idx].host_stream_->encodeHeaders( + Http::TestResponseHeaderMapImpl{{":status", "200"}}, true); + + test_server_->waitForCounterGe("cluster.cluster_1.health_check.success", 1); + EXPECT_EQ(1, test_server_->counter("cluster.cluster_1.health_check.success")->value()); + EXPECT_EQ(1, test_server_->counter("cluster.cluster_1.health_check.failure")->value()); + test_server_->waitForGaugeEq("cluster.cluster_1.membership_excluded", 0); + EXPECT_EQ(1, test_server_->gauge("cluster.cluster_1.membership_total")->value()); + EXPECT_EQ(1, test_server_->gauge("cluster.cluster_1.membership_healthy")->value()); +} + // Tests that no HTTP health check response results in timeout and unhealthy endpoint. TEST_P(HttpHealthCheckIntegrationTest, SingleEndpointTimeoutHttp) { const uint32_t cluster_idx = 0; @@ -368,7 +415,8 @@ TEST_P(RealTimeHttpHealthCheckIntegrationTest, SingleEndpointGoAwayErroSingleEnd EXPECT_EQ(1, test_server_->counter("cluster.cluster_1.health_check.failure")->value()); } -class TcpHealthCheckIntegrationTest : public testing::TestWithParam, +class TcpHealthCheckIntegrationTest : public Event::TestUsingSimulatedTime, + public testing::TestWithParam, public HealthCheckIntegrationTestBase { public: TcpHealthCheckIntegrationTest() : HealthCheckIntegrationTestBase(GetParam()) {} @@ -450,7 +498,8 @@ TEST_P(TcpHealthCheckIntegrationTest, SingleEndpointTimeoutTcp) { EXPECT_EQ(1, test_server_->counter("cluster.cluster_1.health_check.failure")->value()); } -class GrpcHealthCheckIntegrationTest : public testing::TestWithParam, +class GrpcHealthCheckIntegrationTest : public Event::TestUsingSimulatedTime, + public testing::TestWithParam, public HealthCheckIntegrationTestBase { public: GrpcHealthCheckIntegrationTest() : HealthCheckIntegrationTestBase(GetParam()) {} diff --git a/test/mocks/upstream/host.h b/test/mocks/upstream/host.h index 80ebfe3099b0..f5e81da11ac2 100644 --- a/test/mocks/upstream/host.h +++ b/test/mocks/upstream/host.h @@ -74,7 +74,7 @@ class MockHealthCheckHostMonitor : public HealthCheckHostMonitor { MockHealthCheckHostMonitor(); ~MockHealthCheckHostMonitor() override; - MOCK_METHOD(void, setUnhealthy, ()); + MOCK_METHOD(void, setUnhealthy, (UnhealthyType)); }; class MockHostDescription : public HostDescription { @@ -174,9 +174,7 @@ class MockHost : public Host { MOCK_METHOD(HealthCheckHostMonitor&, healthChecker, (), (const)); MOCK_METHOD(void, healthFlagClear, (HealthFlag flag)); MOCK_METHOD(bool, healthFlagGet, (HealthFlag flag), (const)); - MOCK_METHOD(ActiveHealthFailureType, getActiveHealthFailureType, (), (const)); MOCK_METHOD(void, healthFlagSet, (HealthFlag flag)); - MOCK_METHOD(void, setActiveHealthFailureType, (ActiveHealthFailureType type)); MOCK_METHOD(Host::Health, health, (), (const)); MOCK_METHOD(const std::string&, hostnameForHealthChecks, (), (const)); MOCK_METHOD(const std::string&, hostname, (), (const));