Skip to content

Commit

Permalink
Refactor and updates (#22)
Browse files Browse the repository at this point in the history
* Updates
- Update README
- Update to go1.18
- Update to k8s v1.25.5
- Reworked vfstats collector
- Implemented endpoint unit tests
- Add netlink support detection
- Add image building to Makefile
- Remove deprecated references
- Add Mellanox driver to drivers DB
- Refactor code to enable testing
- Support for NFD SR-IOV feature label
- Changes to ensure more uniform Makefile
- Implemented initial unit tests
- Implemented vfstats package unit tests

Co-Authored-By: Eoghan1232 <[email protected]>
Co-Authored-By: eoghanlawless <[email protected]>
Co-Authored-By: Ipawlikx <[email protected]>
Co-Authored-By: nhennigan <[email protected]>

* fixing incorrect flag

* fixing typos

* Adding in github action workflow

* Addressed comments

* Fixing vulnerability

* Updating action workflow to run on ubuntu-latest

* fixing go version in action

* Fixing Hadolint scan in action

* testing ginkgo issue in action

* Revert "testing ginkgo issue in action"

This reverts commit 6343cb8.

* Updating Makefile to print coverage per function

---------

Co-authored-by: Eoghan1232 <[email protected]>
Co-authored-by: eoghanlawless <[email protected]>
Co-authored-by: Ipawlikx <[email protected]>
Co-authored-by: nhennigan <[email protected]>
  • Loading branch information
5 people authored May 8, 2023
1 parent 165b761 commit c6489eb
Show file tree
Hide file tree
Showing 26 changed files with 2,615 additions and 1,082 deletions.
106 changes: 106 additions & 0 deletions .github/workflows/build-test-lint.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,106 @@
name: build-test-lint
on: [push, pull_request]
jobs:
build:
name: build
strategy:
matrix:
go-version: [1.18.x]
goarch: [amd64]
os: [ubuntu-latest]
runs-on: ${{ matrix.os }}
steps:
- name: Set up Go matrix
uses: actions/setup-go@v3
with:
go-version: ${{ matrix.go-version }}

- name: Check out code into the Go module directory
uses: actions/checkout@v2

- name: Build
env:
GOARCH: ${{ matrix.goarch }}
GOOS: ${{ matrix.goos }}
run: make build

test:
runs-on: ubuntu-latest
needs: build
name: test
steps:
- name: Set up Go
uses: actions/setup-go@v3
with:
go-version: 1.18.x

- name: Check out code into the Go module directory
uses: actions/checkout@v2

- name: Install hwdata
run: sudo apt-get install hwdata -y

- name: Go test
run: make test

test-coverage:
runs-on: ubuntu-latest
needs: build
name: test-coverage
steps:
- name: Set up Go
uses: actions/setup-go@v3
with:
go-version: 1.18.x

- uses: actions/checkout@v2

- name: Install hwdata
run: sudo apt-get install hwdata -y

- name: Go test with coverage
run: make test-coverage

golangci:
name: Golangci-lint
runs-on: ubuntu-latest
steps:
- name: Set up Go
uses: actions/setup-go@v3
with:
go-version: 1.18.x
- uses: actions/checkout@v2
- name: golangci-lint
uses: golangci/golangci-lint-action@v3
with:
# Required: the version of golangci-lint is required and must be specified without patch version: we always use the latest patch version.
version: v1.46.2

hadolint:
runs-on: ubuntu-latest
name: Hadolint
steps:
- uses: actions/checkout@v2
- uses: brpaz/[email protected]
name: Run Hadolint
with:
dockerfile: ./Dockerfile
ignore: DL3018 # DL3018: GH issue 368

go-check:
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v2

- name: Set up Go
uses: actions/setup-go@v3
with:
go-version: 1.18.x

# if this fails, run go mod tidy
- name: Check if module files are consistent with code
run: go mod tidy && git diff --exit-code

# if this fails, run go mod vendor
- name: Check if vendor directory is consistent with go modules
run: go mod vendor && git diff --exit-code
2 changes: 0 additions & 2 deletions Dockerfile
Original file line number Diff line number Diff line change
@@ -1,7 +1,5 @@
FROM golang:alpine as builder

ENV HTTP_PROXY $http_proxy
ENV HTTPS_PROXY $https_proxy
RUN apk add --no-cache --virtual build-dependencies build-base linux-headers git
COPY ./ /usr/src/sriov-network-metrics-exporter
WORKDIR /usr/src/sriov-network-metrics-exporter
Expand Down
45 changes: 41 additions & 4 deletions Makefile
Original file line number Diff line number Diff line change
@@ -1,10 +1,47 @@
IMAGE_REGISTRY?=localhost:5000/
IMAGE_VERSION?=latest

IMAGE_NAME?=$(IMAGE_REGISTRY)sriov-metrics-exporter:$(IMAGE_VERSION)
IMAGE_BUILDER?=docker

DOCKERARGS?=
ifdef HTTP_PROXY
DOCKERARGS += --build-arg http_proxy=$(HTTP_PROXY)
endif
ifdef HTTPS_PROXY
DOCKERARGS += --build-arg https_proxy=$(HTTPS_PROXY)
endif

all: build image-build test

clean:
rm -rf bin
go clean --modcache

go clean -modcache -testcache
build:
GO111MODULE=on go build -ldflags "-s -w" -buildmode=pie -o bin/sriov-exporter cmd/sriov-network-metrics-exporter.go

image-build:
@echo "Bulding container image $(IMAGE_NAME)"
$(IMAGE_BUILDER) build -f Dockerfile -t $(IMAGE_NAME) $(DOCKERARGS) .

image-push:
$(IMAGE_BUILDER) push $(IMAGE_NAME)

test:
go test ./... -count=1

test-coverage:
go test ./... -coverprofile cover.out
go tool cover -func cover.out

go-lint-install:
go install github.com/golangci/golangci-lint/cmd/[email protected]

go-lint: go-lint-install
go mod tidy
go fmt ./...
golangci-lint run
GO111MODULE=on go build -ldflags "-s -w" -buildmode=pie -o bin/sriov-exporter cmd/sriov-network-metrics-exporter.go
golangci-lint run --color always -v ./...

go-lint-report: go-lint-install
golangci-lint run --color always -v ./... &> golangci-lint.txt
49 changes: 37 additions & 12 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -6,11 +6,14 @@ The SR-IOV Network Metrics Exporter is designed with the Kubernetes SR-IOV stack
**This software is a pre-production alpha version and should not be deployed to production servers.**

## Hardware support
The default netlink implementation for Virtual Function telemetry relies on driver support and a kernel version of 4.4 or higher. This version requires i40e driver of 2.11+ for Intel® 700 series NICs. Updated i40e drivers can be fould at the [Intel Download Center](https://downloadcenter.intel.com/download/24411/Intel-Network-Adapter-Driver-for-PCIe-40-Gigabit-Ethernet-Network-Connections-under-Linux-?v=t)
The sysfs collector for Virtual Function telemetry supports NICs with drivers that implement the SR-IOV sysfs management interface e.g. ice, i40e, mlnx_en and mlnx_ofed.

For kernels older than 4.4 a driver specific collector is enabled which is compatible with Intel® 700 series NICs using and i40e driver of 2.11 or above. To check your current driver version run: ``modinfo i40e | grep ^version``
To upgrade visit the [official driver download site](https://downloadcenter.intel.com/download/24411/Intel-Network-Adapter-Driver-for-PCIe-40-Gigabit-Ethernet-Network-Connections-Under-Linux-).
To use this version the flag collector.netlink must be set to "false".
The netlink collector relies on driver support and a kernel version of 4.4 or higher.
To support netlink, we recommend these driver versions: an i40e driver of 2.11+ or higher for Intel® 700 series NICs and ice driver 1.2+ for Intel® 800 series NICs.

To check your current driver version run: `modinfo <driver> | grep ^version` where driver is `i40e` or `ice`\
i40e drivers: [Intel Download Center](https://downloadcenter.intel.com/download/18026/), [Source Forge](https://sourceforge.net/projects/e1000/files/i40e%20stable/)\
ice drivers: [Intel Download Center](https://www.intel.com/content/www/us/en/download/19630/), [Source Forge](https://sourceforge.net/projects/e1000/files/ice%20stable/)

## Metrics
This exporter will make the following metrics available:
Expand Down Expand Up @@ -42,17 +45,35 @@ Once available through Prometheus VF metrics can be used by metrics applications

## Installation
### Kubernetes installation

#### Building images
Typical deployment is as a daemonset in a cluster. A daemonset requires the image to be available on each node in the cluster or at a registry accessible from each node.
The following assumes a local Docker registry available at localhost:5000, and assumes Docker is being used to build and manage containers in the cluster.

In order to build the container and load it to a local registry run:

```
docker build . -t localhost:5000/sriov-metrics-exporter && docker push localhost:5000/sriov-metrics-exporter
or
make docker-build && make docker-push
```

The above assumes a registry available across the cluster at localhost:5000, for example on using the [Docker Registry Proxy](https://github.com/kubernetes-sigs/kubespray/blob/master/roles/kubernetes-apps/registry/README.md). If your registry is at a different address the image name will need to be changed to reflect that in the [Kubernetes daemonset](/deployment/daemonset.yaml)

#### Labeling nodes

SR-IOV Network Metrics Exporter will only be deployed on nodes labeled with `"feature.node.kubernetes.io/network-sriov.capable": "true"` label. You can label the nodes automatically using [Node Feature Discovery](https://github.com/kubernetes-sigs/node-feature-discovery), or manually, executing the following `kubectl` command:

```
kubectl label node <nodename> feature.node.kubernetes.io/network-sriov.capable="true"
```

If you prefer to use the `Node Feature Discovery` you can refer to the [Quick-start guide](https://github.com/kubernetes-sigs/node-feature-discovery#quick-start--the-short-short-version) on the project's repository.

#### Deploying SR-IOV Network Metrics Exporter

Create monitoring namespace:
```
kubectl create namespace monitoring
Expand Down Expand Up @@ -98,7 +119,7 @@ In order to expose these metrics to Prometheus we need to configure the database
```
The above should be added to the Prometheus configuration as a new target. For more about configuring Prometheus see the [official guide.](https://prometheus.io/docs/prometheus/latest/configuration/configuration/) Once Prometheus is started with this included in its config sriov-metrics should appear on the "Targets page". Metrics should be available by querying the Prometheus API or in the web interface.

In this mode it will serve stats on an endpoint inside the cluster. Prometheus will detect the label on the service endpoint throught the above configuration.
In this mode it will serve stats on an endpoint inside the cluster. Prometheus will detect the label on the service endpoint through the above configuration.

### Standalone installation to an endpoint on the host.

Expand Down Expand Up @@ -145,21 +166,25 @@ The above should be added to the Prometheus configuration as a new target. For m
### Configuration
A number of configuration flags can be passed to the SR-IOV Network Metrics Exporter in order to change enabled collectors, the paths it reads from and some properties of its web endpoint.

The collector.vfstatspriority flag defines the priority of vf stats collectors, each pf will use the first supported collector in the list.\
Example: using the priority, "sysfs,netlink", with Intel® 700 and 800 series NICs installed and vfs initialized, the sysfs collector will be used for the 700 series NIC, and netlink for the 800 series NIC since it doesn't support sysfs collection, therefore it falls back to the netlink driver.

| Flag | Type | Description | Default Value |
|----|:----|:----|:----|
| collector.kubepodcpu | boolean | Enables the kubepodcpu collector | false |
| collector.kubepoddevice | boolean | Enables the kubepoddevice collector | false |
| collector.vfstats | boolean |Enables the vfstats collector | true |
| collector.netlink | boolean |Enables using netlink for vfstats collection | true |
| collector.vfstatspriority | string | Sets the priority of vfstats collectors | sysfs,netlink |
| collector.sysfs | boolean | Enables using sr-iov sysfs for vfstats collection | true |
| collector.netlink | boolean | Enables using netlink for vfstats collection | true |
| path.cpucheckpoint | string | Path for location of cpu manager checkpoint file | /var/lib/kubelet/cpu_manager_state |
| path.kubecgroup |string | Path for location of kubernetes cgroups on the host system | /sys/fs/cgroup/cpuset/kubepods/|
| path.kubeletSocket | string | Path to kubelet resources socket | /var/lib/kubelet/pod-resources/kubelet.sock |
| path.kubecgroup |string | Path for location of kubernetes cgroups on the host system | /sys/fs/cgroup/cpuset/kubepods/ |
| path.kubeletsocket | string | Path to kubelet resources socket | /var/lib/kubelet/pod-resources/kubelet.sock |
| path.nodecpuinfo | string | Path for location of system cpu information | /sys/devices/system/node/ |
| path.sysbuspci | string | Path to sys/bus/pci on host | /sys/bus/pci/devices |
| path.sysclassnet | string | Path to sys/class/net on host | /sys/class/net/ |
| web.listen-address | string | Address to listen on for web interface and telemetry. | :9808 |
| web.rate-burst | int | Maximum per second burst rate for requests. | 10 |
| web.rate-limit | int | Limit for requests per second. | 1 |
| web.listen-address | string | Address to listen on for web interface and telemetry | :9808 |
| web.rate-burst | int | Maximum per second burst rate for requests | 10 |
| web.rate-limit | int | Limit for requests per second | 1 |

## Communication and contribution

Expand Down
38 changes: 22 additions & 16 deletions cmd/sriov-network-metrics-exporter.go
Original file line number Diff line number Diff line change
@@ -1,36 +1,37 @@
// The SR-IOV networks exporter makes metrics from SR-IOV Virtual Functions available in a prometheus format.
// Different classes of metrics are implemented as individual collectors.

package main

import (
"flag"
"log"
"net/http"
"sriov-network-metrics-exporter/collectors"

"github.com/prometheus/client_golang/prometheus/promhttp"
"github.com/k8snetworkplumbingwg/sriov-network-metrics-exporter/collectors"

"github.com/prometheus/client_golang/prometheus"
"github.com/prometheus/client_golang/prometheus/promhttp"
"golang.org/x/time/rate"
)

var (
addr = flag.String("web.listen-address", ":9808", "Address to listen on for web interface and telemetry.")
addr = flag.String("web.listen-address", ":9808", "Port to listen on for web interface and telemetry.")
rateLimit = flag.Int("web.rate-limit", 1, "Limit for requests per second.")
rateBurst = flag.Int("web.rate-burst", 10, "Maximum per second burst rate for requests.")
metricsEndpoint = "/metrics"
)

func main() {
flag.Parse()
verifyFlags()
enabledCollectors := collectors.Enabled()
err := prometheus.Register(enabledCollectors)
parseAndVerifyFlags()

err := prometheus.Register(collectors.Enabled())
if err != nil {
log.Fatalf("collector could not be registered: %v", err)
return
}
//Use the default promhttp handler wrapped with middleware to serve at the metrics endpoint

// Use the default promhttp handler wrapped with middleware to serve at the metrics endpoint
handlerWithMiddleware := limitRequests(
getOnly(
endpointOnly(
Expand All @@ -41,7 +42,12 @@ func main() {
log.Fatalf("ListenAndServe error: %v", http.ListenAndServe(*addr, handlerWithMiddleware))
}

//enpointOnly restricts all responses to 404 where the passed endpoint isn't used. Used to minimize the possible outputs of the server.
func parseAndVerifyFlags() {
flag.Parse()
verifyFlags()
}

// endpointOnly restricts all responses to 404 where the passed endpoint isn't used. Used to minimize the possible outputs of the server.
func endpointOnly(next http.Handler, endpoint string) http.Handler {
return http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
if r.URL.Path != endpoint {
Expand All @@ -56,7 +62,7 @@ func endpointOnly(next http.Handler, endpoint string) http.Handler {
})
}

//getOnly restricts the possible verbs used in a http request to GET only
// getOnly restricts the possible verbs used in a http request to GET only
func getOnly(next http.Handler) http.Handler {
return http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
if r.Method != http.MethodGet {
Expand All @@ -71,7 +77,7 @@ func getOnly(next http.Handler) http.Handler {
})
}

//noBody returns a 400 to any request that contains a body
// noBody returns a 400 to any request that contains a body
func noBody(next http.Handler) http.Handler {
return http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
if r.Body != http.NoBody {
Expand All @@ -86,20 +92,20 @@ func noBody(next http.Handler) http.Handler {
})
}

//limitRequests sets a rate limit and a burst limit for requests to the endpoint
// limitRequests sets a rate limit and a burst limit for requests to the endpoint
func limitRequests(next http.Handler, rateLimit rate.Limit, burstLimit int) http.Handler {
limiter := rate.NewLimiter(rateLimit, burstLimit)
return http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
if !limiter.Allow() {
http.Error(w, http.StatusText(429), http.StatusTooManyRequests)
http.Error(w, http.StatusText(http.StatusTooManyRequests), http.StatusTooManyRequests)
return
}
next.ServeHTTP(w, r)
})
}

func verifyFlags() {
collectors.ResolveSriovDevFilepaths()
collectors.ResolveKubePodCPUFilepaths()
collectors.ResolveKubePodDeviceFilepaths()
if err := collectors.ResolveFilepaths(); err != nil {
log.Panicf("failed to resolve paths\n%v", err)
}
}
Loading

0 comments on commit c6489eb

Please sign in to comment.