Skip to content

Commit

Permalink
Merge remote-tracking branch 'upstream/main' into bufferDump
Browse files Browse the repository at this point in the history
Signed-off-by: Kevin Baichoo <[email protected]>
  • Loading branch information
KBaichoo committed Feb 1, 2021
2 parents 020ed25 + deed328 commit d3a50a5
Show file tree
Hide file tree
Showing 149 changed files with 2,087 additions and 2,464 deletions.
2 changes: 1 addition & 1 deletion CODEOWNERS
Original file line number Diff line number Diff line change
Expand Up @@ -55,7 +55,7 @@ extensions/filters/common/original_src @snowp @klarose
# tracers.xray extension
/*/extensions/tracers/xray @marcomagdy @lavignes @mattklein123
# tracers.skywalking extension
/*/extensions/tracers/skywalking @wbpcode @dio @lizan
/*/extensions/tracers/skywalking @wbpcode @dio @lizan @Shikugawa
# mysql_proxy extension
/*/extensions/filters/network/mysql_proxy @rshriram @venilnoronha @mattklein123
# postgres_proxy extension
Expand Down
31 changes: 0 additions & 31 deletions api/bazel/repositories.bzl
Original file line number Diff line number Diff line change
Expand Up @@ -40,10 +40,6 @@ def api_dependencies():
name = "com_github_openzipkin_zipkinapi",
build_file_content = ZIPKINAPI_BUILD_CONTENT,
)
external_http_archive(
name = "com_github_apache_skywalking_data_collect_protocol",
build_file_content = SKYWALKING_DATA_COLLECT_PROTOCOL_BUILD_CONTENT,
)

PROMETHEUSMETRICS_BUILD_CONTENT = """
load("@envoy_api//bazel:api_build_system.bzl", "api_cc_py_proto_library")
Expand Down Expand Up @@ -105,30 +101,3 @@ go_proto_library(
visibility = ["//visibility:public"],
)
"""

SKYWALKING_DATA_COLLECT_PROTOCOL_BUILD_CONTENT = """
load("@rules_proto//proto:defs.bzl", "proto_library")
load("@rules_cc//cc:defs.bzl", "cc_proto_library")
load("@io_bazel_rules_go//proto:def.bzl", "go_proto_library")
proto_library(
name = "protocol",
srcs = [
"common/Common.proto",
"language-agent/Tracing.proto",
],
visibility = ["//visibility:public"],
)
cc_proto_library(
name = "protocol_cc_proto",
deps = [":protocol"],
visibility = ["//visibility:public"],
)
go_proto_library(
name = "protocol_go_proto",
proto = ":protocol",
visibility = ["//visibility:public"],
)
"""
11 changes: 0 additions & 11 deletions api/bazel/repository_locations.bzl
Original file line number Diff line number Diff line change
Expand Up @@ -96,15 +96,4 @@ REPOSITORY_LOCATIONS_SPEC = dict(
release_date = "2020-08-17",
use_category = ["api"],
),
com_github_apache_skywalking_data_collect_protocol = dict(
project_name = "SkyWalking API",
project_desc = "SkyWalking's language independent model and gRPC API Definitions",
project_url = "https://github.com/apache/skywalking-data-collect-protocol",
version = "8.1.0",
sha256 = "ebea8a6968722524d1bcc4426fb6a29907ddc2902aac7de1559012d3eee90cf9",
strip_prefix = "skywalking-data-collect-protocol-{version}",
urls = ["https://github.com/apache/skywalking-data-collect-protocol/archive/v{version}.tar.gz"],
release_date = "2020-07-29",
use_category = ["api"],
),
)
9 changes: 8 additions & 1 deletion api/envoy/admin/v3/clusters.proto
Original file line number Diff line number Diff line change
Expand Up @@ -139,7 +139,7 @@ message HostStatus {
}

// Health status for a host.
// [#next-free-field: 7]
// [#next-free-field: 9]
message HostHealthStatus {
option (udpa.annotations.versioning).previous_message_type =
"envoy.admin.v2alpha.HostHealthStatus";
Expand All @@ -160,6 +160,13 @@ message HostHealthStatus {
// The host has not yet been health checked.
bool pending_active_hc = 6;

// The host should be excluded from panic, spillover, etc. calculations because it was explicitly
// taken out of rotation via protocol signal and is not meant to be routed to.
bool excluded_via_immediate_hc_fail = 7;

// The host failed active HC due to timeout.
bool active_hc_timeout = 8;

// Health status as reported by EDS. Note: only HEALTHY and UNHEALTHY are currently supported
// here.
// [#comment:TODO(mrice32): pipe through remaining EDS health status possibilities.]
Expand Down
9 changes: 8 additions & 1 deletion api/envoy/admin/v4alpha/clusters.proto

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

22 changes: 3 additions & 19 deletions api/envoy/config/cluster/v3/cluster.proto
Original file line number Diff line number Diff line change
Expand Up @@ -536,25 +536,9 @@ message Cluster {
// https://github.com/envoyproxy/envoy/pull/3941.
google.protobuf.Duration update_merge_window = 4;

// If set to true, Envoy will not consider new hosts when computing load balancing weights until
// they have been health checked for the first time. This will have no effect unless
// active health checking is also configured.
//
// Ignoring a host means that for any load balancing calculations that adjust weights based
// on the ratio of eligible hosts and total hosts (priority spillover, locality weighting and
// panic mode) Envoy will exclude these hosts in the denominator.
//
// For example, with hosts in two priorities P0 and P1, where P0 looks like
// {healthy, unhealthy (new), unhealthy (new)}
// and where P1 looks like
// {healthy, healthy}
// all traffic will still hit P0, as 1 / (3 - 2) = 1.
//
// Enabling this will allow scaling up the number of hosts for a given cluster without entering
// panic mode or triggering priority spillover, assuming the hosts pass the first health check.
//
// If panic mode is triggered, new hosts are still eligible for traffic; they simply do not
// contribute to the calculation when deciding whether panic mode is enabled or not.
// If set to true, Envoy will :ref:`exclude <arch_overview_load_balancing_excluded>` new hosts
// when computing load balancing weights until they have been health checked for the first time.
// This will have no effect unless active health checking is also configured.
bool ignore_new_hosts_until_first_hc = 5;

// If set to `true`, the cluster manager will drain all existing
Expand Down
22 changes: 3 additions & 19 deletions api/envoy/config/cluster/v4alpha/cluster.proto

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

1 change: 1 addition & 0 deletions api/envoy/data/core/v3/health_check_event.proto
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,7 @@ enum HealthCheckFailureType {
ACTIVE = 0;
PASSIVE = 1;
NETWORK = 2;
NETWORK_TIMEOUT = 3;
}

enum HealthCheckerType {
Expand Down
13 changes: 13 additions & 0 deletions bazel/repositories.bzl
Original file line number Diff line number Diff line change
Expand Up @@ -142,6 +142,7 @@ def envoy_dependencies(skip_targets = []):
_com_github_luajit_luajit()
_com_github_moonjit_moonjit()
_com_github_nghttp2_nghttp2()
_com_github_skyapm_cpp2sky()
_com_github_nodejs_http_parser()
_com_github_tencent_rapidjson()
_com_google_absl()
Expand Down Expand Up @@ -418,6 +419,18 @@ def _com_github_datadog_dd_opentracing_cpp():
actual = "@com_github_datadog_dd_opentracing_cpp//:dd_opentracing_cpp",
)

def _com_github_skyapm_cpp2sky():
external_http_archive(
name = "com_github_skyapm_cpp2sky",
)
external_http_archive(
name = "skywalking_data_collect_protocol",
)
native.bind(
name = "cpp2sky",
actual = "@com_github_skyapm_cpp2sky//source:cpp2sky_data_lib",
)

def _com_github_tencent_rapidjson():
external_http_archive(
name = "com_github_tencent_rapidjson",
Expand Down
27 changes: 27 additions & 0 deletions bazel/repository_locations.bzl
Original file line number Diff line number Diff line change
Expand Up @@ -301,6 +301,33 @@ REPOSITORY_LOCATIONS_SPEC = dict(
release_date = "2020-08-25",
cpe = "N/A",
),
skywalking_data_collect_protocol = dict(
project_name = "skywalking-data-collect-protocol",
project_desc = "Data Collect Protocols of Apache SkyWalking",
project_url = "https://github.com/apache/skywalking-data-collect-protocol",
name = "skywalking_data_collect_protocol",
sha256 = "fa9ac679624217f30b6e8d5c450365386c610e2d08188a20f0340c3b14401833",
urls = ["https://github.com/apache/skywalking-data-collect-protocol/archive/v8.3.0.zip"],
strip_prefix = "skywalking-data-collect-protocol-8.3.0",
version = "8.3.0",
use_category = ["observability_ext"],
extensions = ["envoy.tracers.skywalking"],
release_date = "2020-11-20",
cpe = "N/A",
),
com_github_skyapm_cpp2sky = dict(
project_name = "cpp2sky",
project_desc = "C++ SDK for Apache SkyWalking",
project_url = "https://github.com/SkyAPM/cpp2sky",
sha256 = "a8d870bb4b1c4a05eae319f689d1948927f3f0a5b5fe524db73a4c04121a339a",
version = "0.1.1",
strip_prefix = "cpp2sky-{version}",
urls = ["https://github.com/SkyAPM/cpp2sky/archive/v{version}.tar.gz"],
use_category = ["observability_ext"],
extensions = ["envoy.tracers.skywalking"],
release_date = "2021-01-15",
cpe = "N/A",
),
com_github_datadog_dd_opentracing_cpp = dict(
project_name = "Datadog OpenTracing C++ Client",
project_desc = "Datadog OpenTracing C++ Client",
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,8 @@ Health check

Note that the filter will automatically fail health checks and set the
:ref:`x-envoy-immediate-health-check-fail
<config_http_filters_router_x-envoy-immediate-health-check-fail>` header if the
:ref:`/healthcheck/fail <operations_admin_interface_healthcheck_fail>` admin endpoint has been
called. (The :ref:`/healthcheck/ok <operations_admin_interface_healthcheck_ok>` admin endpoint
reverses this behavior).
<config_http_filters_router_x-envoy-immediate-health-check-fail>` header on all responses (both
health check and normal requests) if the :ref:`/healthcheck/fail
<operations_admin_interface_healthcheck_fail>` admin endpoint has been called. (The
:ref:`/healthcheck/ok <operations_admin_interface_healthcheck_ok>` admin endpoint reverses this
behavior).
13 changes: 7 additions & 6 deletions docs/root/configuration/http/http_filters/router_filter.rst
Original file line number Diff line number Diff line change
Expand Up @@ -227,7 +227,7 @@ x-envoy-upstream-rq-timeout-alt-response
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^

Setting this header will cause Envoy to set a 204 response code (instead of 504) in the event of a request timeout.
The actual value of the header is ignored; only its presence is considered. See also
The actual value of the header is ignored; only its presence is considered. See also
:ref:`config_http_filters_router_x-envoy-upstream-rq-timeout-ms`.

.. _config_http_filters_router_x-envoy-upstream-rq-timeout-ms:
Expand Down Expand Up @@ -294,11 +294,12 @@ x-envoy-immediate-health-check-fail

If the upstream host returns this header (set to any value), Envoy will immediately assume the
upstream host has failed :ref:`active health checking <arch_overview_health_checking>` (if the
cluster has been :ref:`configured <config_cluster_manager_cluster_hc>` for active health checking).
This can be used to fast fail an upstream host via standard data plane processing without waiting
for the next health check interval. The host can become healthy again via standard active health
checks. See the :ref:`health checking overview <arch_overview_health_checking>` for more
information.
cluster has been :ref:`configured <config_cluster_manager_cluster_hc>` for active health checking)
and :ref:`exclude <arch_overview_load_balancing_excluded>` it from load balancing. This can be used
to fast fail an upstream host via standard data plane processing without waiting for the next health
check interval. The host can become healthy again via standard active health checks. See the
:ref:`active health checking fast failure overview <arch_overview_health_checking_fast_failure>` for
more information.

.. _config_http_filters_router_x-envoy-ratelimited:

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -64,7 +64,7 @@ Every cluster has a statistics tree rooted at *cluster.<name>.* with the followi
upstream_rq_active, Gauge, Total active requests
upstream_rq_pending_total, Counter, Total requests pending a connection pool connection
upstream_rq_pending_overflow, Counter, Total requests that overflowed connection pool or requests (mainly for HTTP/2) circuit breaking and were failed
upstream_rq_pending_failure_eject, Counter, Total requests that were failed due to a connection pool connection failure or remote connection termination
upstream_rq_pending_failure_eject, Counter, Total requests that were failed due to a connection pool connection failure or remote connection termination
upstream_rq_pending_active, Gauge, Total active requests pending a connection pool connection
upstream_rq_cancelled, Counter, Total requests cancelled before obtaining a connection pool connection
upstream_rq_maintenance_mode, Counter, Total requests that resulted in an immediate 503 due to :ref:`maintenance mode<config_http_filters_router_runtime_maintenance_mode>`
Expand All @@ -87,7 +87,8 @@ Every cluster has a statistics tree rooted at *cluster.<name>.* with the followi
upstream_internal_redirect_succeed_total, Counter, Total number of times internal redirects resulted in a second upstream request.
membership_change, Counter, Total cluster membership changes
membership_healthy, Gauge, Current cluster healthy total (inclusive of both health checking and outlier detection)
membership_degraded, Gauge, Current cluster degraded total
membership_degraded, Gauge, Current cluster :ref:`degraded <arch_overview_load_balancing_degraded>` total
membership_excluded, Gauge, Current cluster :ref:`excluded <arch_overview_load_balancing_excluded>` total
membership_total, Gauge, Current cluster membership total
retry_or_shadow_abandoned, Counter, Total number of times shadowing or retry buffering was canceled due to buffer limits
config_reload, Counter, Total API fetches that resulted in a config reload due to a different config
Expand Down
12 changes: 8 additions & 4 deletions docs/root/intro/arch_overview/upstream/health_checking.rst
Original file line number Diff line number Diff line change
Expand Up @@ -121,6 +121,8 @@ Further reading:
* :ref:`/healthcheck/fail <operations_admin_interface_healthcheck_fail>` admin endpoint.
* :ref:`/healthcheck/ok <operations_admin_interface_healthcheck_ok>` admin endpoint.

.. _arch_overview_health_checking_fast_failure:

Active health checking fast failure
-----------------------------------

Expand All @@ -129,10 +131,12 @@ When using active health checking along with passive health checking (:ref:`outl
large amount of active health checking traffic. In this case, it is still useful to be able to
quickly drain an upstream host when using the :ref:`/healthcheck/fail
<operations_admin_interface_healthcheck_fail>` admin endpoint. To support this, the :ref:`router
filter <config_http_filters_router>` will respond to the :ref:`x-envoy-immediate-health-check-fail
filter <config_http_filters_router>` *and* the HTTP active health checker will respond to the
:ref:`x-envoy-immediate-health-check-fail
<config_http_filters_router_x-envoy-immediate-health-check-fail>` header. If this header is set by
an upstream host, Envoy will immediately mark the host as being failed for active health check. Note
that this only occurs if the host's cluster has active health checking :ref:`configured
an upstream host, Envoy will immediately mark the host as being failed for active health check and
:ref:`excluded <arch_overview_load_balancing_excluded>` from load balancing. Note that this only
occurs if the host's cluster has active health checking :ref:`configured
<config_cluster_manager_cluster_hc>`. The :ref:`health checking filter
<config_http_filters_health_check>` will automatically set this header if Envoy has been marked as
failed via the :ref:`/healthcheck/fail <operations_admin_interface_healthcheck_fail>` admin
Expand All @@ -152,7 +156,7 @@ is that overall configuration becomes more complicated as every health check URL

The Envoy HTTP health checker supports the :ref:`service_name_matcher
<envoy_v3_api_field_config.core.v3.HealthCheck.HttpHealthCheck.service_name_matcher>` option. If this option is set,
the health checker additionally compares the value of the *x-envoy-upstream-healthchecked-cluster*
the health checker additionally compares the value of the *x-envoy-upstream-healthchecked-cluster*
response header to *service_name_matcher*. If the values do not match, the health check does not pass.
The upstream health check filter appends *x-envoy-upstream-healthchecked-cluster* to the response headers.
The appended value is determined by the :option:`--service-cluster` command line option.
Expand Down
29 changes: 29 additions & 0 deletions docs/root/intro/arch_overview/upstream/load_balancing/excluded.rst
Original file line number Diff line number Diff line change
@@ -0,0 +1,29 @@
.. _arch_overview_load_balancing_excluded:

Excluded endpoints
------------------

Certain conditions may cause Envoy to *exclude* endpoints from load balancing. Excluding a host
means that for any load balancing calculations that adjust weights based on the ratio of eligible
hosts and total hosts (priority spillover, locality weighting and panic mode) Envoy will exclude
these hosts in the denominator.

For example, with hosts in two priorities P0 and P1, where P0 looks like {healthy, unhealthy
(excluded), unhealthy (excluded)} and where P1 looks like {healthy, healthy} all traffic will still
hit P0, as 1 / (3 - 2) = 1.

Excluded hosts allow scaling up or down the number of hosts for a given cluster without entering
panic mode or triggering priority spillover.

If panic mode is triggered, excluded hosts are still eligible for traffic; they simply do not
contribute to the calculation when deciding whether panic mode is enabled or not.

Currently, the following two conditions can lead to a host being excluded when using active
health checking:

* Using the :ref:`ignore_new_hosts_until_first_hc
<envoy_api_field_Cluster.CommonLbConfig.ignore_new_hosts_until_first_hc>` cluster option.
* Receiving the :ref:`x-envoy-immediate-health-check-fail
<config_http_filters_router_x-envoy-immediate-health-check-fail>` header in a normal routed
response or in response to an :ref:`HTTP active health check
<arch_overview_health_checking_fast_failure>`.
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@ Load Balancing
locality_weight
overprovisioning
panic_threshold
excluded
original_dst
zone_aware
subsets
Loading

0 comments on commit d3a50a5

Please sign in to comment.