Skip to content

Commit

Permalink
Add v3 API metrics (#9468)
Browse files Browse the repository at this point in the history
* Enable envoy discovery service

* Attempt to Move version to 1.18.2

* Bump version to 1.18.3

* Update server version info

* remove pip3 and use pip

* add pip to envoy image

* fix envoy yaml for api v3

* Add api v3 env

* Add missing metrics

* Update e2e tests

* Fix metadata columns

* Remove assert all

* Assert metric v3 at least 0

* Fix metadata

* Fix bench

* Rename cluster_name tag

* Add v3 metrics description

Co-authored-by: HantingZHANG <[email protected]>
Co-authored-by: Luis Gonzalez <[email protected]>
  • Loading branch information
3 people authored and alexandre-normand committed Jun 23, 2021
1 parent 398d3f0 commit 62b6822
Show file tree
Hide file tree
Showing 30 changed files with 680 additions and 37 deletions.
66 changes: 66 additions & 0 deletions envoy/datadog_checks/envoy/metrics.py
Original file line number Diff line number Diff line change
Expand Up @@ -264,6 +264,14 @@
),
'method': 'monotonic_count',
},
'cluster_manager.cds.update_time': {
'tags': (
(),
(),
(),
),
'method': 'gauge',
},
'cluster_manager.cds.version': {
'tags': (
(),
Expand Down Expand Up @@ -603,6 +611,14 @@
),
'method': 'monotonic_count',
},
'http.rds.update_time': {
'tags': (
('stat_prefix', ),
('route_config_name', ),
(),
),
'method': 'gauge',
},
'http.rds.version': {
'tags': (
('stat_prefix', ),
Expand Down Expand Up @@ -1496,6 +1512,14 @@
),
'method': 'monotonic_count',
},
'listener_manager.lds.update_time': {
'tags': (
(),
(),
(),
),
'method': 'gauge',
},
'listener_manager.lds.version': {
'tags': (
(),
Expand Down Expand Up @@ -1566,6 +1590,13 @@
),
'method': 'monotonic_count',
},
'http.downstream_cx_http3_total': {
'tags': (
('stat_prefix', ),
(),
),
'method': 'monotonic_count',
},
'http.downstream_cx_destroy': {
'tags': (
('stat_prefix', ),
Expand Down Expand Up @@ -1643,6 +1674,13 @@
),
'method': 'gauge',
},
'http.downstream_cx_http3_active': {
'tags': (
('stat_prefix', ),
(),
),
'method': 'gauge',
},
'http.downstream_cx_protocol_error': {
'tags': (
('stat_prefix', ),
Expand Down Expand Up @@ -1734,6 +1772,13 @@
),
'method': 'monotonic_count',
},
'http.downstream_rq_http3_total': {
'tags': (
('stat_prefix', ),
(),
),
'method': 'monotonic_count',
},
'http.downstream_rq_active': {
'tags': (
('stat_prefix', ),
Expand Down Expand Up @@ -2063,6 +2108,13 @@
),
'method': 'monotonic_count',
},
'cluster.upstream_cx_http3_total': {
'tags': (
('envoy_cluster', ),
(),
),
'method': 'monotonic_count',
},
'cluster.upstream_cx_connect_fail': {
'tags': (
('envoy_cluster', ),
Expand Down Expand Up @@ -2273,6 +2325,13 @@
),
'method': 'monotonic_count',
},
'cluster.upstream_rq_max_duration_reached': {
'tags': (
('envoy_cluster', ),
(),
),
'method': 'monotonic_count',
},
'cluster.upstream_rq_timeout': {
'tags': (
('envoy_cluster', ),
Expand Down Expand Up @@ -3358,6 +3417,13 @@
),
'method': 'monotonic_count',
},
'sds.key_rotation_failed': {
'tags': (
('envoy_secret', ),
(),
),
'method': 'monotonic_count',
},
}
# fmt: on

Expand Down
14 changes: 14 additions & 0 deletions envoy/metadata.csv
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@ envoy.cluster_manager.cds.update_attempt,count,,request,,Total API fetches attem
envoy.cluster_manager.cds.update_success,count,,request,,Total API fetches completed successfully,1,envoy,cds successful api accesses
envoy.cluster_manager.cds.update_failure,count,,request,,Total API fetches that failed because of network errors,-1,envoy,cds failed api accesses
envoy.cluster_manager.cds.update_rejected,count,,request,,Total API fetches that failed because of schema/validation errors,-1,envoy,cds rejected api accesses
envoy.cluster_manager.cds.update_time,gauge,,millisecond,,Timestamp of the last successful API fetch attempt as milliseconds since the epoch,0,envoy,cds time api access
envoy.cluster_manager.cds.version,gauge,,item,,Hash of the contents from the last successful API fetch,0,envoy,
envoy.cluster_manager.cds.control_plane.connected_state,gauge,,connection,,A boolean (1 for connected and 0 for disconnected) that indicates the current connection state with management server,0,envoy,cds control plane state
envoy.cluster_manager.cds.control_plane.pending_requests,gauge,,request,,Total number of pending requests when the rate limit was enforced,0,envoy,cds pending control plane requests
Expand Down Expand Up @@ -168,6 +169,7 @@ envoy.listener_manager.lds.update_attempt,count,,request,,Total API fetches atte
envoy.listener_manager.lds.update_success,count,,request,,Total API fetches completed successfully,1,envoy,lds successful api accesses
envoy.listener_manager.lds.update_failure,count,,request,,Total API fetches that failed because of network errors,-1,envoy,lds failed api accesses
envoy.listener_manager.lds.update_rejected,count,,request,,Total API fetches that failed because of schema/validation errors,-1,envoy,lds rejected api accesses
envoy.listener_manager.lds.update_time,gauge,,millisecond,,Timestamp of the last successful API fetch attempt as milliseconds since the epoch,0,envoy,lds time api access
envoy.listener_manager.lds.version,gauge,,item,,Hash of the contents from the last successful API fetch,0,envoy,
envoy.listener_manager.lds.control_plane.connected_state,gauge,,connection,,A boolean (1 for connected and 0 for disconnected) that indicates the current connection state with management server,0,envoy,lds control plane state
envoy.listener_manager.lds.control_plane.pending_requests,gauge,,request,,Total number of pending requests when the rate limit was enforced,0,envoy,lds pending control plane requests
Expand All @@ -177,6 +179,7 @@ envoy.http.downstream_cx_ssl_total,count,,connection,,Total TLS connections,0,en
envoy.http.downstream_cx_http1_total,count,,connection,,Total HTTP/1.1 connections,0,envoy,
envoy.http.downstream_cx_websocket_total,count,,connection,,Total WebSocket connections,0,envoy,
envoy.http.downstream_cx_http2_total,count,,connection,,Total HTTP/2 connections,0,envoy,
envoy.http.downstream_cx_http3_total,count,,connection,,[API v3 only] Total HTTP/3 connections,0,envoy,
envoy.http.downstream_cx_destroy,count,,connection,,Total connections destroyed,0,envoy,
envoy.http.downstream_cx_destroy_remote,count,,connection,,Total connections destroyed due to remote close,0,envoy,
envoy.http.downstream_cx_destroy_local,count,,connection,,Total connections destroyed due to local close,0,envoy,
Expand All @@ -188,6 +191,7 @@ envoy.http.downstream_cx_ssl_active,gauge,,connection,,Total active TLS connecti
envoy.http.downstream_cx_http1_active,gauge,,connection,,Total active HTTP/1.1 connections,0,envoy,
envoy.http.downstream_cx_websocket_active,gauge,,connection,,Total active WebSocket connections,0,envoy,
envoy.http.downstream_cx_http2_active,gauge,,connection,,Total active HTTP/2 connections,0,envoy,
envoy.http.downstream_cx_http3_active,gauge,,connection,,[API v3 only] Total active HTTP/3 connections,0,envoy,
envoy.http.downstream_cx_protocol_error,count,,error,,Total protocol errors,-1,envoy,
envoy.http.downstream_cx_rx_bytes_total,count,,byte,,Total bytes received,0,envoy,
envoy.http.downstream_cx_rx_bytes_buffered,gauge,,byte,,Total received bytes currently buffered,0,envoy,
Expand All @@ -200,6 +204,7 @@ envoy.http.downstream_flow_control_resumed_reading_total,count,,occurrence,,Tota
envoy.http.downstream_rq_total,count,,request,,Total requests,0,envoy,
envoy.http.downstream_rq_http1_total,count,,request,,Total HTTP/1.1 requests,0,envoy,
envoy.http.downstream_rq_http2_total,count,,request,,Total HTTP/2 requests,0,envoy,
envoy.http.downstream_rq_http3_total,count,,request,,[API v3 only] Total HTTP/3 requests,0,envoy,
envoy.http.downstream_rq_active,gauge,,request,,Total active requests,0,envoy,
envoy.http.downstream_rq_response_before_rq_complete,count,,response,,Total responses sent before the request was complete,0,envoy,
envoy.http.downstream_rq_rx_reset,count,,request,,Total request resets received,0,envoy,
Expand Down Expand Up @@ -239,6 +244,7 @@ envoy.cluster.upstream_cx_total,count,,connection,,Total connections,0,envoy,
envoy.cluster.upstream_cx_active,gauge,,connection,,Total active connections,0,envoy,
envoy.cluster.upstream_cx_http1_total,count,,connection,,Total HTTP/1.1 connections,0,envoy,
envoy.cluster.upstream_cx_http2_total,count,,connection,,Total HTTP/2 connections,0,envoy,
envoy.cluster.upstream_cx_http3_total,count,,connection,,[API v3 only] Total HTTP/3 connections,0,envoy,
envoy.cluster.upstream_cx_connect_fail,count,,error,,Total connection failures,-1,envoy,
envoy.cluster.upstream_cx_connect_timeout,count,,timeout,,Total connection timeouts,-1,envoy,
envoy.cluster.upstream_cx_connect_attempts_exceeded,count,,error,,Total consecutive connection failures exceeding configured connection attempts,-1,envoy,
Expand Down Expand Up @@ -267,6 +273,7 @@ envoy.cluster.upstream_rq_pending_failure_eject,count,,request,,Total requests t
envoy.cluster.upstream_rq_pending_active,gauge,,request,,Total active requests pending a connection pool connection,-1,envoy,
envoy.cluster.upstream_rq_cancelled,count,,request,,Total requests cancelled before obtaining a connection pool connection,-1,envoy,
envoy.cluster.upstream_rq_maintenance_mode,count,,request,,Total requests that resulted in an immediate 503 due to maintenance mode,-1,envoy,
envoy.cluster.upstream_rq_max_duration_reached,count,,request,,Total requests closed due to max duration reached,0,envoy,
envoy.cluster.upstream_rq_timeout,count,,request,,Total requests that timed out waiting for a response,-1,envoy,
envoy.cluster.upstream_rq_per_try_timeout,count,,request,,Total requests that hit the per try timeout,-1,envoy,
envoy.cluster.upstream_rq_rx_reset,count,,request,,Total requests that were reset remotely,0,envoy,
Expand Down Expand Up @@ -332,6 +339,7 @@ envoy.cluster.http2.rx_reset,count,,message,,Total number of reset stream frames
envoy.cluster.http2.too_many_header_frames,count,,occurrence,,Total number of times an HTTP2 connection is reset due to receiving too many headers frames. Envoy currently supports proxying at most one header frame for 100-Continue one non-100 response code header frame and one frame with trailers.,-1,envoy,
envoy.cluster.http2.trailers,count,,item,,Total number of trailers seen on requests coming from downstream,0,envoy,
envoy.cluster.http2.tx_reset,count,,message,,Total number of reset stream frames transmitted by Envoy,0,envoy,
envoy.cluster.original_dst_host_invalid,count,,,,Total number of invalid hosts passed to original destination load balancer,0,envoy,
envoy.cluster.outlier_detection.ejections_enforced_total,count,,,,Number of enforced ejections due to any outlier type,-1,envoy,
envoy.cluster.outlier_detection.ejections_active,gauge,,,,Number of currently ejected hosts,-1,envoy,
envoy.cluster.outlier_detection.ejections_overflow,count,,,,Number of ejections aborted due to the max ejection %,-1,envoy,
Expand Down Expand Up @@ -544,6 +552,7 @@ envoy.listener.downstream_cx_length_ms.75percentile,gauge,,millisecond,,Connecti
envoy.listener.downstream_cx_length_ms.90percentile,gauge,,millisecond,,Connection length in milliseconds 90-percentile,-1,envoy,
envoy.listener.downstream_cx_length_ms.95percentile,gauge,,millisecond,,Connection length in milliseconds 95-percentile,-1,envoy,
envoy.listener.downstream_cx_length_ms.99percentile,gauge,,millisecond,,Connection length in milliseconds 99-percentile,-1,envoy,
envoy.listener.downstream_cx_length_ms.99_5percentile,gauge,,millisecond,,Connection length in milliseconds 99.5-percentile,-1,envoy,
envoy.listener.downstream_cx_length_ms.99_9percentile,gauge,,millisecond,,Connection length in milliseconds 99.9-percentile,-1,envoy,
envoy.listener.downstream_cx_length_ms.100percentile,gauge,,millisecond,,Connection length in milliseconds 100-percentile,-1,envoy,
envoy.http.downstream_cx_length_ms.0percentile,gauge,,millisecond,,Connection length in milliseconds 0-percentile,-1,envoy,
Expand All @@ -553,6 +562,7 @@ envoy.http.downstream_cx_length_ms.75percentile,gauge,,millisecond,,Connection l
envoy.http.downstream_cx_length_ms.90percentile,gauge,,millisecond,,Connection length in milliseconds 90-percentile,-1,envoy,
envoy.http.downstream_cx_length_ms.95percentile,gauge,,millisecond,,Connection length in milliseconds 95-percentile,-1,envoy,
envoy.http.downstream_cx_length_ms.99percentile,gauge,,millisecond,,Connection length in milliseconds 99-percentile,-1,envoy,
envoy.http.downstream_cx_length_ms.99_5percentile,gauge,,millisecond,,Connection length in milliseconds 99.5-percentile,-1,envoy,
envoy.http.downstream_cx_length_ms.99_9percentile,gauge,,millisecond,,Connection length in milliseconds 99.9-percentile,-1,envoy,
envoy.http.downstream_cx_length_ms.100percentile,gauge,,millisecond,,Connection length in milliseconds 100-percentile,-1,envoy,
envoy.http.downstream_rq_time.0percentile,gauge,,millisecond,,Request time in milliseconds 0-percentile,-1,envoy,
Expand All @@ -562,6 +572,7 @@ envoy.http.downstream_rq_time.75percentile,gauge,,millisecond,,Request time in m
envoy.http.downstream_rq_time.90percentile,gauge,,millisecond,,Request time in milliseconds 90-percentile,-1,envoy,
envoy.http.downstream_rq_time.95percentile,gauge,,millisecond,,Request time in milliseconds 95-percentile,-1,envoy,
envoy.http.downstream_rq_time.99percentile,gauge,,millisecond,,Request time in milliseconds 99-percentile,-1,envoy,
envoy.http.downstream_rq_time.99_5percentile,gauge,,millisecond,,Request time in milliseconds 99.5-percentile,-1,envoy,
envoy.http.downstream_rq_time.99_9percentile,gauge,,millisecond,,Request time in milliseconds 99.9-percentile,-1,envoy,
envoy.http.downstream_rq_time.100percentile,gauge,,millisecond,,Request time in milliseconds 100-percentile,-1,envoy,
envoy.cluster.upstream_cx_connect_ms.0percentile,gauge,,millisecond,,Connection establishment in milliseconds 0-percentile,-1,envoy,
Expand All @@ -571,6 +582,7 @@ envoy.cluster.upstream_cx_connect_ms.75percentile,gauge,,millisecond,,Connection
envoy.cluster.upstream_cx_connect_ms.90percentile,gauge,,millisecond,,Connection establishment in milliseconds 90-percentile,-1,envoy,
envoy.cluster.upstream_cx_connect_ms.95percentile,gauge,,millisecond,,Connection establishment in milliseconds 95-percentile,-1,envoy,
envoy.cluster.upstream_cx_connect_ms.99percentile,gauge,,millisecond,,Connection establishment in milliseconds 99-percentile,-1,envoy,
envoy.cluster.upstream_cx_connect_ms.99_5percentile,gauge,,millisecond,,Connection establishment in milliseconds 99.5-percentile,-1,envoy,
envoy.cluster.upstream_cx_connect_ms.99_9percentile,gauge,,millisecond,,Connection establishment in milliseconds 99.9-percentile,-1,envoy,
envoy.cluster.upstream_cx_connect_ms.100percentile,gauge,,millisecond,,Connection establishment in milliseconds 100-percentile,-1,envoy,
envoy.cluster.upstream_cx_length_ms.0percentile,gauge,,millisecond,,Connection length in milliseconds 0-percentile,0,envoy,
Expand All @@ -580,6 +592,7 @@ envoy.cluster.upstream_cx_length_ms.75percentile,gauge,,millisecond,,Connection
envoy.cluster.upstream_cx_length_ms.90percentile,gauge,,millisecond,,Connection length in milliseconds 90-percentile,0,envoy,
envoy.cluster.upstream_cx_length_ms.95percentile,gauge,,millisecond,,Connection length in milliseconds 95-percentile,0,envoy,
envoy.cluster.upstream_cx_length_ms.99percentile,gauge,,millisecond,,Connection length in milliseconds 99-percentile,0,envoy,
envoy.cluster.upstream_cx_length_ms.99_5percentile,gauge,,millisecond,,Connection length in milliseconds 99.5-percentile,0,envoy,
envoy.cluster.upstream_cx_length_ms.99_9percentile,gauge,,millisecond,,Connection length in milliseconds 99.9-percentile,0,envoy,
envoy.cluster.upstream_cx_length_ms.100percentile,gauge,,millisecond,,Connection length in milliseconds 100-percentile,0,envoy,
envoy.cluster.upstream_rq_time.0percentile,gauge,,millisecond,,Request time in milliseconds 0-percentile,-1,envoy,
Expand Down Expand Up @@ -627,3 +640,4 @@ envoy.cluster.zone.upstream_rq_time.95percentile,gauge,,millisecond,,Zone reques
envoy.cluster.zone.upstream_rq_time.99percentile,gauge,,millisecond,,Zone request time in milliseconds 99-percentile,-1,envoy,
envoy.cluster.zone.upstream_rq_time.99_9percentile,gauge,,millisecond,,Zone request time in milliseconds 99.9-percentile,-1,envoy,
envoy.cluster.zone.upstream_rq_time.100percentile,gauge,,millisecond,,Zone request time in milliseconds 100-percentile,-1,envoy,
envoy.sds.key_rotation_failed,count,,,,[API v3 only] Total number of filesystem key rotations that failed outside of an SDS update.,-1,envoy,
1 change: 1 addition & 0 deletions envoy/tests/common.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@

HERE = os.path.dirname(os.path.abspath(__file__))
FIXTURE_DIR = os.path.join(HERE, 'fixtures')
FLAVOR = os.getenv('FLAVOR', 'api_v3')

HOST = get_docker_hostname()
PORT = '8001'
Expand Down
5 changes: 2 additions & 3 deletions envoy/tests/conftest.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,19 +4,18 @@

from datadog_checks.dev import docker_run

from .common import INSTANCES
from .common import FLAVOR, INSTANCES

HERE = os.path.dirname(os.path.abspath(__file__))
DOCKER_DIR = os.path.join(HERE, 'docker')


@pytest.fixture(scope='session')
def dd_environment():
flavor = os.getenv('FLAVOR', 'default')
instance = INSTANCES['main']

with docker_run(
os.path.join(DOCKER_DIR, flavor, 'docker-compose.yaml'),
os.path.join(DOCKER_DIR, FLAVOR, 'docker-compose.yaml'),
build=True,
endpoints=instance['stats_url'],
log_patterns=['all dependencies initialized. starting workers'],
Expand Down
Original file line number Diff line number Diff line change
@@ -1,6 +1,5 @@
FROM envoyproxy/envoy-alpine:v1.14.1

RUN apk update && apk add python3 bash
RUN apk update && apk add python3 bash py-pip
RUN python3 --version && pip3 --version
RUN pip3 install -q Flask==0.11.1 requests==2.18.4
RUN mkdir /code
Expand Down
File renamed without changes.
File renamed without changes.
Original file line number Diff line number Diff line change
@@ -1,21 +1,19 @@
static_resources:
listeners:
- address:
socket_address:
address: 0.0.0.0
port_value: 80
socket_address: {address: 0.0.0.0, port_value: 80}
filter_chains:
- filters:
- name: envoy.http_connection_manager
config:
codec_type: auto
typed_config:
"@type": type.googleapis.com/envoy.extensions.filters.network.http_connection_manager.v3.HttpConnectionManager
codec_type: AUTO
stat_prefix: ingress_http
route_config:
name: local_route
virtual_hosts:
- name: backend
domains:
- "*"
domains: ["*"]
routes:
- match:
prefix: "/service/1"
Expand All @@ -27,12 +25,11 @@ static_resources:
cluster: service2
http_filters:
- name: envoy.router
config: {}
clusters:
- name: service1
connect_timeout: 0.25s
type: strict_dns
lb_policy: round_robin
type: STRICT_DNS
lb_policy: ROUND_ROBIN
http2_protocol_options: {}
load_assignment:
cluster_name: service1
Expand Down Expand Up @@ -73,15 +70,19 @@ static_resources:
port_value: 8080
dynamic_resources:
cds_config:
resource_api_version: V3
api_config_source:
api_type: GRPC
transport_api_version: V3
grpc_services:
- envoy_grpc:
cluster_name: xds_cluster
set_node_on_first_message_only: true
lds_config:
resource_api_version: V3
api_config_source:
api_type: GRPC
transport_api_version: V3
grpc_services:
- envoy_grpc:
cluster_name: xds_cluster
Expand Down
File renamed without changes.
File renamed without changes.
Original file line number Diff line number Diff line change
@@ -1,34 +1,29 @@
static_resources:
listeners:
- address:
socket_address:
address: 0.0.0.0
port_value: 80
socket_address: {address: 0.0.0.0, port_value: 80}
filter_chains:
- filters:
- name: envoy.http_connection_manager
config:
codec_type: auto
typed_config:
"@type": type.googleapis.com/envoy.extensions.filters.network.http_connection_manager.v3.HttpConnectionManager
codec_type: AUTO
stat_prefix: ingress_http
route_config:
name: local_route
virtual_hosts:
- name: service
domains:
- "*"
domains: ["*"]
routes:
- match:
prefix: "/service"
route:
cluster: local_service
- match: {prefix: "/service"}
route: {cluster: local_service}
http_filters:
- name: envoy.router
config: {}
clusters:
- name: local_service
connect_timeout: 0.25s
type: strict_dns
lb_policy: round_robin
type: STRICT_DNS
lb_policy: ROUND_ROBIN
load_assignment:
cluster_name: local_service
endpoints:
Expand Down
File renamed without changes.
File renamed without changes.
Loading

0 comments on commit 62b6822

Please sign in to comment.