Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add Prometheus remote write forwarding integration test #1940

Merged
merged 1 commit into from
Sep 15, 2019
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
66 changes: 31 additions & 35 deletions scripts/docker-integration-tests/common.sh
Original file line number Diff line number Diff line change
Expand Up @@ -44,17 +44,9 @@ function retry_with_backoff {
}

function setup_single_m3db_node {
wait_for_db_init
}

function setup_two_m3db_nodes {
local dbnode_id_1=${DBNODE_ID_01:-m3db_local_1}
local dbnode_id_2=${DBNODE_ID_02:-m3db_local_2}
local dbnode_host_1=${DBNODE_HOST_01:-dbnode01}
local dbnode_host_2=${DBNODE_HOST_02:-dbnode02}
local dbnode_host=${DBNODE_HOST:-dbnode01}
local dbnode_port=${DBNODE_PORT:-9000}
local dbnode_host_1_health_port=${DBNODE_HEALTH_PORT_01:-9012}
local dbnode_host_2_health_port=${DBNODE_HEALTH_PORT_02:-9022}
local dbnode_health_port=${DBNODE_HEALTH_PORT:-9002}
local coordinator_port=${COORDINATOR_PORT:-7201}

echo "Wait for API to be available"
Expand All @@ -66,45 +58,39 @@ function setup_two_m3db_nodes {
"type": "cluster",
"namespaceName": "agg",
"retentionTime": "6h",
"num_shards": 2,
"replicationFactor": 2,
"num_shards": 4,
"replicationFactor": 1,
"hosts": [
{
"id": "'"${dbnode_id_1}"'",
"id": "m3db_local",
"isolation_group": "rack-a",
"zone": "embedded",
"weight": 1024,
"address": "'"${dbnode_host_1}"'",
"port": '"${dbnode_port}"'
},
{
"id": "'"${dbnode_id_2}"'",
"isolation_group": "rack-b",
"zone": "embedded",
"weight": 1024,
"address": "'"${dbnode_host_2}"'",
"address": "'"${dbnode_host}"'",
"port": '"${dbnode_port}"'
}
]
}'

echo "Wait until placement is init'd"
ATTEMPTS=10 MAX_TIMEOUT=4 TIMEOUT=1 retry_with_backoff \
'[ "$(curl -sSf 0.0.0.0:'"${coordinator_port}"'/api/v1/placement | jq .placement.instances.'"${dbnode_id_1}"'.id)" == \"'"${dbnode_id_1}"'\" ]'
'[ "$(curl -sSf 0.0.0.0:'"${coordinator_port}"'/api/v1/placement | jq .placement.instances.m3db_local.id)" == \"m3db_local\" ]'

wait_for_namespaces

echo "Wait until bootstrapped"
ATTEMPTS=100 MAX_TIMEOUT=4 TIMEOUT=1 retry_with_backoff \
'[ "$(curl -sSf 0.0.0.0:'"${dbnode_host_1_health_port}"'/health | jq .bootstrapped)" == true ]'
ATTEMPTS=100 MAX_TIMEOUT=4 TIMEOUT=1 retry_with_backoff \
'[ "$(curl -sSf 0.0.0.0:'"${dbnode_host_2_health_port}"'/health | jq .bootstrapped)" == true ]'
'[ "$(curl -sSf 0.0.0.0:'"${dbnode_health_port}"'/health | jq .bootstrapped)" == true ]'
}

function wait_for_db_init {
local dbnode_host=${DBNODE_HOST:-dbnode01}
function setup_two_m3db_nodes {
local dbnode_id_1=${DBNODE_ID_01:-m3db_local_1}
local dbnode_id_2=${DBNODE_ID_02:-m3db_local_2}
local dbnode_host_1=${DBNODE_HOST_01:-dbnode01}
local dbnode_host_2=${DBNODE_HOST_02:-dbnode02}
local dbnode_port=${DBNODE_PORT:-9000}
local dbnode_health_port=${DBNODE_HEALTH_PORT:-9002}
local dbnode_host_1_health_port=${DBNODE_HEALTH_PORT_01:-9012}
local dbnode_host_2_health_port=${DBNODE_HEALTH_PORT_02:-9022}
local coordinator_port=${COORDINATOR_PORT:-7201}

echo "Wait for API to be available"
Expand All @@ -116,29 +102,39 @@ function wait_for_db_init {
"type": "cluster",
"namespaceName": "agg",
"retentionTime": "6h",
"num_shards": 4,
"replicationFactor": 1,
"num_shards": 2,
"replicationFactor": 2,
"hosts": [
{
"id": "m3db_local",
"id": "'"${dbnode_id_1}"'",
"isolation_group": "rack-a",
"zone": "embedded",
"weight": 1024,
"address": "'"${dbnode_host}"'",
"address": "'"${dbnode_host_1}"'",
"port": '"${dbnode_port}"'
},
{
"id": "'"${dbnode_id_2}"'",
"isolation_group": "rack-b",
"zone": "embedded",
"weight": 1024,
"address": "'"${dbnode_host_2}"'",
"port": '"${dbnode_port}"'
}
]
}'

echo "Wait until placement is init'd"
ATTEMPTS=10 MAX_TIMEOUT=4 TIMEOUT=1 retry_with_backoff \
'[ "$(curl -sSf 0.0.0.0:'"${coordinator_port}"'/api/v1/placement | jq .placement.instances.m3db_local.id)" == \"m3db_local\" ]'
'[ "$(curl -sSf 0.0.0.0:'"${coordinator_port}"'/api/v1/placement | jq .placement.instances.'"${dbnode_id_1}"'.id)" == \"'"${dbnode_id_1}"'\" ]'

wait_for_namespaces

echo "Wait until bootstrapped"
ATTEMPTS=100 MAX_TIMEOUT=4 TIMEOUT=1 retry_with_backoff \
'[ "$(curl -sSf 0.0.0.0:'"${dbnode_health_port}"'/health | jq .bootstrapped)" == true ]'
'[ "$(curl -sSf 0.0.0.0:'"${dbnode_host_1_health_port}"'/health | jq .bootstrapped)" == true ]'
ATTEMPTS=100 MAX_TIMEOUT=4 TIMEOUT=1 retry_with_backoff \
'[ "$(curl -sSf 0.0.0.0:'"${dbnode_host_2_health_port}"'/health | jq .bootstrapped)" == true ]'
}

function wait_for_namespaces {
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,48 @@
version: "3.5"
services:
dbnode01:
expose:
- "9000-9004"
- "2379-2380"
ports:
- "0.0.0.0:9000-9004:9000-9004"
- "0.0.0.0:2379-2380:2379-2380"
networks:
- backend
image: "m3dbnode_integration:${REVISION}"
coordinator01:
expose:
- "7201"
- "7203"
ports:
- "0.0.0.0:7201:7201"
- "0.0.0.0:7203:7203"
networks:
- backend
image: "m3coordinator_integration:${REVISION}"
volumes:
- "./m3coordinator01.yml:/etc/m3coordinator/m3coordinator.yml"
dbnode02:
expose:
- "19000-19004"
- "12379-12380"
ports:
- "0.0.0.0:19000-19004:9000-9004"
- "0.0.0.0:12379-12380:2379-2380"
networks:
- backend
image: "m3dbnode_integration:${REVISION}"
coordinator02:
expose:
- "17201"
- "17203"
ports:
- "0.0.0.0:17201:7201"
- "0.0.0.0:17203:7203"
networks:
- backend
image: "m3coordinator_integration:${REVISION}"
volumes:
- "./m3coordinator02.yml:/etc/m3coordinator/m3coordinator.yml"
networks:
backend:
Original file line number Diff line number Diff line change
@@ -0,0 +1,47 @@
listenAddress:
type: "config"
value: "0.0.0.0:7201"

logging:
level: info

metrics:
scope:
prefix: "coordinator"
prometheus:
handlerPath: /metrics
listenAddress: 0.0.0.0:7203 # until https://github.com/m3db/m3/issues/682 is resolved
sanitization: prometheus
samplingRate: 1.0
extended: none

writeForwarding:
promRemoteWrite:
targets:
- url: http://coordinator02:7201/api/v1/prom/remote/write

clusters:
- namespaces:
- namespace: agg
type: aggregated
retention: 10h
resolution: 15s
- namespace: unagg
type: unaggregated
retention: 10m
client:
config:
service:
env: default_env
zone: embedded
service: m3db
cacheDir: /var/lib/m3kv
etcdClusters:
- zone: embedded
endpoints:
- dbnode01:2379
writeConsistencyLevel: majority
readConsistencyLevel: unstrict_majority

tagOptions:
idScheme: quoted
Original file line number Diff line number Diff line change
@@ -0,0 +1,42 @@
listenAddress:
type: "config"
value: "0.0.0.0:7201"

logging:
level: info

metrics:
scope:
prefix: "coordinator"
prometheus:
handlerPath: /metrics
listenAddress: 0.0.0.0:7203 # until https://github.com/m3db/m3/issues/682 is resolved
sanitization: prometheus
samplingRate: 1.0
extended: none

clusters:
- namespaces:
- namespace: agg
type: aggregated
retention: 10h
resolution: 15s
- namespace: unagg
type: unaggregated
retention: 10m
client:
config:
service:
env: default_env
zone: embedded
service: m3db
cacheDir: /var/lib/m3kv
etcdClusters:
- zone: embedded
endpoints:
- dbnode02:2379
writeConsistencyLevel: majority
readConsistencyLevel: unstrict_majority

tagOptions:
idScheme: quoted
96 changes: 96 additions & 0 deletions scripts/docker-integration-tests/prometheus_replication/test.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,96 @@
#!/usr/bin/env bash

set -xe

source $GOPATH/src/github.com/m3db/m3/scripts/docker-integration-tests/common.sh
REVISION=$(git rev-parse HEAD)
COMPOSE_FILE=$GOPATH/src/github.com/m3db/m3/scripts/docker-integration-tests/prometheus_replication/docker-compose.yml
# quay.io/m3db/prometheus_remote_client_golang @ v0.4.3
PROMREMOTECLI_IMAGE=quay.io/m3db/prometheus_remote_client_golang@sha256:fc56df819bff9a5a087484804acf3a584dd4a78c68900c31a28896ed66ca7e7b
JQ_IMAGE=realguess/jq:1.4@sha256:300c5d9fb1d74154248d155ce182e207cf6630acccbaadd0168e18b15bfaa786
export REVISION

echo "Pull containers required for test"
docker pull $PROMREMOTECLI_IMAGE
docker pull $JQ_IMAGE

echo "Run m3dbnode and m3coordinator containers"
docker-compose -f ${COMPOSE_FILE} up -d dbnode01
docker-compose -f ${COMPOSE_FILE} up -d dbnode02
docker-compose -f ${COMPOSE_FILE} up -d coordinator01
docker-compose -f ${COMPOSE_FILE} up -d coordinator02

function defer {
docker-compose -f ${COMPOSE_FILE} down || echo "unable to shutdown containers" # CI fails to stop all containers sometimes
}
trap defer EXIT

echo "Setup dbnode in first cluster"
DBNODE_HOST=dbnode01 \
DBNODE_PORT=9000 \
DBNODE_HEALTH_PORT=9002 \
COORDINATOR_PORT=7201 \
setup_single_m3db_node

echo "Setup dbnode in second cluster"
DBNODE_HOST=dbnode02 \
DBNODE_PORT=9000 \
DBNODE_HEALTH_PORT=19002 \
COORDINATOR_PORT=17201 \
setup_single_m3db_node

function prometheus_remote_write {
local metric_name=$1
local datapoint_timestamp=$2
local datapoint_value=$3
local expect_success=$4
local expect_success_err=$5
local expect_status=$6
local expect_status_err=$7

network=$(docker network ls --format '{{.ID}}' | tail -n 1)
out=$((docker run -it --rm --network $network \
$PROMREMOTECLI_IMAGE \
-u http://coordinator01:7201/api/v1/prom/remote/write \
-t __name__:${metric_name} \
-d ${datapoint_timestamp},${datapoint_value} | grep -v promremotecli_log) || true)
success=$(echo $out | grep -v promremotecli_log | docker run --rm -i $JQ_IMAGE jq .success)
status=$(echo $out | grep -v promremotecli_log | docker run --rm -i $JQ_IMAGE jq .statusCode)
if [[ "$success" != "$expect_success" ]]; then
echo $expect_success_err
return 1
fi
if [[ "$status" != "$expect_status" ]]; then
echo "${expect_status_err}: actual=${status}"
return 1
fi
echo "Returned success=${success}, status=${status} as expected"
return 0
}

function test_replication_forwarding {
now=$(date +"%s")

# Make sure both are up (otherwise forwarding could fail).
echo "Test both clusters responding to queries"
ATTEMPTS=50 TIMEOUT=2 MAX_TIMEOUT=4 retry_with_backoff \
'[[ $(curl -s 0.0.0.0:7201/api/v1/query?query=any | jq -r ".data.result | length") -eq 0 ]]'
ATTEMPTS=50 TIMEOUT=2 MAX_TIMEOUT=4 retry_with_backoff \
'[[ $(curl -s 0.0.0.0:17201/api/v1/query?query=any | jq -r ".data.result | length") -eq 0 ]]'

# Test writing.
echo "Test write data to first cluster"
prometheus_remote_write \
"foo_replicate" now 42.42 \
true "Expected request to succeed" \
200 "Expected request to return status code 200"

# Test queries can eventually read back replicated data from second
# cluster using port 17201 from the second cluster's coordinator
echo "Test read replicated data"
ATTEMPTS=50 TIMEOUT=2 MAX_TIMEOUT=4 retry_with_backoff \
'[[ $(curl -s 0.0.0.0:17201/api/v1/query?query=foo_replicate | jq -r ".data.result | length") -gt 0 ]]'
}

# Run all tests
test_replication_forwarding
1 change: 1 addition & 0 deletions scripts/docker-integration-tests/run.sh
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@ TESTS=(
scripts/docker-integration-tests/simple/test.sh
scripts/docker-integration-tests/cold_writes_simple/test.sh
scripts/docker-integration-tests/prometheus/test.sh
scripts/docker-integration-tests/prometheus_replication/test.sh
scripts/docker-integration-tests/carbon/test.sh
scripts/docker-integration-tests/aggregator/test.sh
scripts/docker-integration-tests/query_fanout/test.sh
Expand Down
Loading