Skip to content

Commit

Permalink
Add e2e test for NPD
Browse files Browse the repository at this point in the history
The first test is a very simple test. It installs NPD on a VM, and then
verifies that NPD reports metric host_uptime in Prometheus format.
  • Loading branch information
Xuewei Zhang committed Aug 13, 2019
1 parent 4a31954 commit 6599e5d
Show file tree
Hide file tree
Showing 13 changed files with 757 additions and 20 deletions.
1 change: 1 addition & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -3,3 +3,4 @@
/*.tar.gz
ci.env
pr.env
junit*.xml
12 changes: 9 additions & 3 deletions Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -75,7 +75,6 @@ ifneq ($(BUILD_TAGS), "")
BUILD_TAGS:=-tags "$(BUILD_TAGS)"
endif


vet:
GO111MODULE=on go list -mod vendor $(BUILD_TAGS) ./... | \
grep -v "./vendor/*" | \
Expand Down Expand Up @@ -107,15 +106,22 @@ Dockerfile: Dockerfile.in
sed -e 's|@BASEIMAGE@|$(BASEIMAGE)|g' $< >$@

test: vet fmt
GO111MODULE=on go test -mod vendor -timeout=1m -v -race $(BUILD_TAGS) ./...
GO111MODULE=on go test -mod vendor -timeout=1m -v -race -short $(BUILD_TAGS) ./...

e2e-test: vet fmt build-tar
GO111MODULE=on go test -mod vendor -timeout=10m -v $(BUILD_TAGS) \
./test/e2e/metriconly/... \
-project=$(PROJECT) -zone=$(ZONE) \
-image=$(VM_IMAGE) -image-project=$(IMAGE_PROJECT) \
-ssh-user=$(SSH_USER) -ssh-key=$(SSH_KEY) -npd-build-tar=`pwd`/$(TARBALL)

build-binaries: ./bin/node-problem-detector ./bin/log-counter

build-container: build-binaries Dockerfile
docker build -t $(IMAGE) .

build-tar: ./bin/node-problem-detector ./bin/log-counter
tar -zcvf $(TARBALL) bin/ config/
tar -zcvf $(TARBALL) bin/ config/ test/e2e-install.sh
sha1sum $(TARBALL)
md5sum $(TARBALL)

Expand Down
15 changes: 15 additions & 0 deletions config/systemd/node-problem-detector-metric-only.service
Original file line number Diff line number Diff line change
@@ -0,0 +1,15 @@
[Unit]
Description=Node problem detector
Wants=local-fs.target
After=local-fs.target

[Service]
Restart=always
RestartSec=10
ExecStart=/home/kubernetes/bin/node-problem-detector --v=2 --logtostderr --enable-k8s-exporter=false \
--config.system-log-monitor=/home/kubernetes/node-problem-detector/config/kernel-monitor.json,/home/kubernetes/node-problem-detector/config/docker-monitor.json,/home/kubernetes/node-problem-detector/config/systemd-monitor.json \
--config.custom-plugin-monitor=/home/kubernetes/node-problem-detector/config/kernel-monitor-counter.json,/home/kubernetes/node-problem-detector/config/systemd-monitor-counter.json \
--config.system-stats-monitor=/home/kubernetes/node-problem-detector/config/system-stats-monitor.json

[Install]
WantedBy=multi-user.target
11 changes: 0 additions & 11 deletions pkg/util/metrics/fakes.go
Original file line number Diff line number Diff line change
Expand Up @@ -21,17 +21,6 @@ import (
"reflect"
)

// Int64MetricRepresentation represents a snapshot of an int64 metrics.
// This is used for inspecting fake metrics.
type Int64MetricRepresentation struct {
// Name is the metric name.
Name string
// Labels contains all metric labels in key-value pair format.
Labels map[string]string
// Value is the value of the metric.
Value int64
}

// Int64MetricInterface is used to create test double for Int64Metric.
type Int64MetricInterface interface {
// Record records a measurement for the metric, with provided tags as metric labels.
Expand Down
100 changes: 94 additions & 6 deletions pkg/util/metrics/helpers.go
Original file line number Diff line number Diff line change
Expand Up @@ -18,8 +18,11 @@ package metrics
import (
"context"
"fmt"
"strings"
"sync"

pcm "github.com/prometheus/client_model/go"
"github.com/prometheus/common/expfmt"
"go.opencensus.io/stats"
"go.opencensus.io/stats/view"
"go.opencensus.io/tag"
Expand All @@ -34,12 +37,6 @@ func init() {
tagMapMutex.Unlock()
}

// Int64Metric represents an int64 metric.
type Int64Metric struct {
name string
measure *stats.Int64Measure
}

// Aggregation defines how measurements should be aggregated into data points.
type Aggregation string

Expand All @@ -50,6 +47,23 @@ const (
Sum Aggregation = "Sum"
)

// Int64MetricRepresentation represents a snapshot of an int64 metrics.
// This is used for inspecting metric internals.
type Int64MetricRepresentation struct {
// Name is the metric name.
Name string
// Labels contains all metric labels in key-value pair format.
Labels map[string]string
// Value is the value of the metric.
Value int64
}

// Int64Metric represents an int64 metric.
type Int64Metric struct {
name string
measure *stats.Int64Measure
}

// NewInt64Metric create a Int64Metric metric, returns nil when name is empty.
func NewInt64Metric(name string, description string, unit string, aggregation Aggregation, tagNames []string) (*Int64Metric, error) {
if name == "" {
Expand Down Expand Up @@ -106,6 +120,17 @@ func (metric *Int64Metric) Record(tags map[string]string, measurement int64) err
metric.measure.M(measurement))
}

// Float64MetricRepresentation represents a snapshot of a float64 metrics.
// This is used for inspecting metric internals.
type Float64MetricRepresentation struct {
// Name is the metric name.
Name string
// Labels contains all metric labels in key-value pair format.
Labels map[string]string
// Value is the value of the metric.
Value float64
}

// Float64Metric represents an float64 metric.
type Float64Metric struct {
name string
Expand Down Expand Up @@ -187,3 +212,66 @@ func getTagKeysFromNames(tagNames []string) ([]tag.Key, error) {
}
return tagKeys, nil
}

// ParsePrometheusMetrics parses Prometheus formatted metrics into metrics under Float64MetricRepresentation.
//
// Note: Prometheus's go library stores all counter/gauge-typed metric values under float64.
func ParsePrometheusMetrics(metricsText string) ([]Float64MetricRepresentation, error) {
var metrics []Float64MetricRepresentation

var textParser expfmt.TextParser
metricFamilies, err := textParser.TextToMetricFamilies(strings.NewReader(metricsText))
if err != nil {
return metrics, err
}

for _, metricFamily := range metricFamilies {
for _, metric := range metricFamily.Metric {
labels := make(map[string]string)
for _, labelPair := range metric.Label {
labels[*labelPair.Name] = *labelPair.Value
}

var value float64
if *metricFamily.Type == pcm.MetricType_COUNTER {
value = *metric.Counter.Value
} else if *metricFamily.Type == pcm.MetricType_GAUGE {
value = *metric.Gauge.Value
} else {
return metrics, fmt.Errorf("unexpected MetricType %s for metric %s",
pcm.MetricType_name[int32(*metricFamily.Type)], *metricFamily.Name)
}

metrics = append(metrics, Float64MetricRepresentation{*metricFamily.Name, labels, value})
}
}

return metrics, nil
}

// GetFloat64Metric finds the metric matching provided name and labels.
// When strictLabelMatching is set to true, the founded metric labels are identical to the provided labels;
// when strictLabelMatching is set to false, the founded metric labels are a superset of the provided labels.
func GetFloat64Metric(metrics []Float64MetricRepresentation, name string, labels map[string]string,
strictLabelMatching bool) (Float64MetricRepresentation, error) {
for _, metric := range metrics {
if metric.Name != name {
continue
}
if strictLabelMatching && len(metric.Labels) != len(labels) {
continue
}
sameLabels := true
for key, value := range labels {
if metric.Labels[key] != value {
sameLabels = false
break
}
}
if !sameLabels {
continue
}
return metric, nil
}
return Float64MetricRepresentation{}, fmt.Errorf("no matching metric found")
}
97 changes: 97 additions & 0 deletions test/e2e-install.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,97 @@
#!/usr/bin/env bash

# Copyright 2019 The Kubernetes Authors.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

# This script is for installing node problem detector (NPD) on a running node
# in metric-only mode, as a setup for NPD e2e tests.

set -o errexit
set -o nounset
set -o pipefail

readonly BIN_DIR=/home/kubernetes/bin
readonly CONFIG_DIR=/home/kubernetes/node-problem-detector/config

function print-help() {
echo "Usage: e2e-install.sh [flags] [command]"
echo
echo "Available flags:"
echo " -t [TARBALL] Specify the path of the NPD tarball (generated by 'make build-tar')."
echo
echo "Available commands:"
echo " help Print this help message"
echo " install Installs NPD to the this machine"
echo
echo "Examples:"
echo " e2e-install.sh help"
echo " e2e-install.sh -t /tmp/npd.tar.gz install"
}

function install-npd() {
if [[ -z "${TARBALL}" ]]; then
echo "ERROR: tarball flag is missing."
exit 1
fi

readonly workdir=$(mktemp -d)
tar -xf "${TARBALL}" --directory "${workdir}"

echo "Preparing NPD binary directory."
mkdir -p "${BIN_DIR}"
mount --bind "${BIN_DIR}" "${BIN_DIR}"
# Below remount is to work around COS's noexec mount on /home.
mount -o remount,exec "${BIN_DIR}"

echo "Installing NPD binary."
cp "${workdir}"/bin/node-problem-detector "${BIN_DIR}"

echo "Installing log-counter binary."
cp "${workdir}"/bin/log-counter "${BIN_DIR}"

echo "Installing NPD configurations."
mkdir -p "${CONFIG_DIR}"
cp -r "${workdir}"/config/* "${CONFIG_DIR}"

echo "Installing NPD systemd service."
cp "${workdir}"/config/systemd/node-problem-detector-metric-only.service /etc/systemd/system/node-problem-detector.service

rm -rf "${workdir}"

# Start systemd service.
echo "Starting NPD systemd service."
systemctl daemon-reload
systemctl stop node-problem-detector.service || true
systemctl start node-problem-detector.service
}

function main() {
case ${1:-} in
help) print-help;;
install) install-npd;;
*) print-help;;
esac
}

TARBALL=""

while getopts "t:" opt; do
case ${opt} in
t) TARBALL="${OPTARG}";;
esac
done
shift "$((OPTIND-1))"


main "${@}"
25 changes: 25 additions & 0 deletions test/e2e/README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,25 @@
# Node Problem Detector End-To-End tests

NPD e2e tests are meant for testing the NPD on a VM environment.

Currently the tests only support Google Compute Engine (GCE) environment. Support for other vendors can be added in future.

## Prerequisites

1. Setup [Google Application Default Credentials](https://developers.google.com/identity/protocols/application-default-credentials), which is [required for authentication](https://godoc.org/google.golang.org/api/compute/v1#hdr-Creating_a_client) by the Compute Engine API.
2. Setup a [project-wide SSH key](https://cloud.google.com/compute/docs/instances/adding-removing-ssh-keys#project-wide) that can be used to SSH into the GCE VMs.

## Running tests

From the node-problem-detector base directory, run:

```
export GOOGLE_APPLICATION_CREDENTIALS=[YOUR_ADC_PATH:~/.config/gcloud/application_default_credentials.json]
export ZONE=[ANY_GCE_ZONE:us-central1-a]
export PROJECT=[YOUR_PROJECT_ID]
export VM_IMAGE=[TESTED_OS_IMAGE:cos-73-11647-217-0]
export IMAGE_PROJECT=[TESTED_OS_IMAGE_PROJECT:cos-cloud]
export SSH_USER=${USER}
export SSH_KEY=~/.ssh/id_rsa
make e2e-test
```
55 changes: 55 additions & 0 deletions test/e2e/lib/gce/gce.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,55 @@
/*
Copyright 2019 The Kubernetes Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/

package gce

import (
"net/http"
"time"

"golang.org/x/oauth2"
"golang.org/x/oauth2/google"
compute "google.golang.org/api/compute/v1"
)

// GetComputeClient creates a GCE client with a 1 minute deadline.
func GetComputeClient() (*compute.Service, error) {
const retries = 10
const backoff = time.Second * 6

// Setup the gce client for provisioning instances
// Getting credentials on gce jenkins is flaky, so try a couple times
var err error
var cs *compute.Service
for i := 0; i < retries; i++ {
if i > 0 {
time.Sleep(backoff)
}

var client *http.Client
client, err = google.DefaultClient(oauth2.NoContext, compute.ComputeScope)
if err != nil {
continue
}

cs, err = compute.New(client)
if err != nil {
continue
}
return cs, nil
}
return nil, err
}
Loading

0 comments on commit 6599e5d

Please sign in to comment.