Skip to content

Commit

Permalink
Move perf tests to separate connectivity subcommand
Browse files Browse the repository at this point in the history
Before, perf test was part of `connectivity test` subcommand.
Now, perf test has separate `connectivity perf` subcommand.
We allow to run both host and pod network perf test in a single
command.
We allow to test host-to-pod type of traffic too.
Perf test now respects nodeSelector and allows to run for nodes in two
different zones.
Always force deploy test pods for perf test to respect nodeSelector.
You can export perf test results in json format, which is compatible
with perfdash.

Fixes: #1111
Fixes: #2114

Signed-off-by: Marcel Zieba <[email protected]>
  • Loading branch information
marseel authored and nebril committed Dec 15, 2023
1 parent e9d818c commit 4e51788
Show file tree
Hide file tree
Showing 15 changed files with 717 additions and 576 deletions.
2 changes: 1 addition & 1 deletion .github/in-cluster-test-scripts/aks.sh
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@ sleep 10s
cilium connectivity test --debug --all-flows --collect-sysdump-on-failure --external-target bing.com

# Run performance test
cilium connectivity test --perf --perf-duration 1s
cilium connectivity perf --duration 1s

# Retrieve Cilium status
cilium status
2 changes: 1 addition & 1 deletion .github/in-cluster-test-scripts/eks-tunnel.sh
Original file line number Diff line number Diff line change
Expand Up @@ -53,4 +53,4 @@ cilium connectivity test --debug --all-flows --collect-sysdump-on-failure --exte
# - cilium-cli default cilium version has been updated to pick up the fix

# Run performance test
cilium connectivity test --perf --perf-duration 1s
cilium connectivity perf --duration 1s
2 changes: 1 addition & 1 deletion .github/in-cluster-test-scripts/eks.sh
Original file line number Diff line number Diff line change
Expand Up @@ -31,4 +31,4 @@ sleep 10s
cilium connectivity test --debug --all-flows --collect-sysdump-on-failure --external-target amazon.com

# Run performance test
cilium connectivity test --perf --perf-duration 1s
cilium connectivity perf --duration 1s
2 changes: 1 addition & 1 deletion .github/in-cluster-test-scripts/external-workloads.sh
Original file line number Diff line number Diff line change
Expand Up @@ -7,4 +7,4 @@ set -e
cilium connectivity test --debug --all-flows --collect-sysdump-on-failure --external-target google.com

# Run performance test
cilium connectivity test --perf --perf-duration 1s
cilium connectivity perf --duration 1s
2 changes: 1 addition & 1 deletion .github/in-cluster-test-scripts/gke.sh
Original file line number Diff line number Diff line change
Expand Up @@ -29,4 +29,4 @@ sleep 10s
cilium connectivity test --debug --all-flows --collect-sysdump-on-failure --external-target google.com

# Run performance test
cilium connectivity test --perf --perf-duration 1s
cilium connectivity perf --duration 1s
37 changes: 21 additions & 16 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -234,22 +234,27 @@ To install Cilium while automatically detected:

#### Network Performance test

cilium connectivity test --perf
🔥 Performance Test Summary
-----------------------------------------------------------------------------------------------------------------------------
📋 Scenario | Test | Num Samples | Duration | Avg value
-----------------------------------------------------------------------------------------------------------------------------
📋 perf-client-5d7cb4d587-cn8sw | TCP_RR | 1 | 10s | 29975.37 (OP/s)
📋 perf-client-5d7cb4d587-cn8sw | TCP_CRR | 1 | 10s | 3926.56 (OP/s)
📋 perf-client-5d7cb4d587-cn8sw | TCP_STREAM | 1 | 10s | 2275.42 (Mb/s)
📋 perf-client-other-node-7867748554-vfvgt | TCP_RR | 1 | 10s | 964.55 (OP/s)
📋 perf-client-other-node-7867748554-vfvgt | TCP_STREAM | 1 | 10s | 4743.39 (Mb/s)
📋 perf-client-other-node-7867748554-vfvgt | UDP_RR | 1 | 10s | 1134.20 (OP/s)
📋 perf-client-other-node-7867748554-vfvgt | UDP_STREAM | 1 | 10s | 1425.74 (Mb/s)
📋 perf-client-5d7cb4d587-cn8sw | UDP_RR | 1 | 10s | 31737.62 (OP/s)
📋 perf-client-5d7cb4d587-cn8sw | UDP_STREAM | 1 | 10s | 865.24 (Mb/s)
📋 perf-client-other-node-7867748554-vfvgt | TCP_CRR | 1 | 10s | 435.80 (OP/s)
-----------------------------------------------------------------------------------------------------------------------------
cilium connectivity perf
🔥 Network Performance Test Summary:
--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
📋 Scenario | Node | Test | Duration | Min | Mean | Max | P50 | P90 | P99 | Transaction rate OP/s
--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
📋 pod-to-pod | same-node | TCP_RR | 1s | 16µs | 32.39µs | 1.567ms | 20µs | 52µs | 97µs | 30696.13
📋 pod-to-pod | same-node | UDP_RR | 1s | 14µs | 29.86µs | 4.41ms | 17µs | 47µs | 97µs | 33251.51
📋 pod-to-pod | same-node | TCP_CRR | 1s | 290µs | 512.1µs | 13.413ms | 467µs | 626µs | 980µs | 1949.69
📋 pod-to-pod | other-node | TCP_RR | 1s | 350µs | 692.85µs | 3.543ms | 631µs | 1.001ms | 1.483ms | 1438.69
📋 pod-to-pod | other-node | UDP_RR | 1s | 312µs | 865.83µs | 8.731ms | 605µs | 1.444ms | 6ms | 1150.79
📋 pod-to-pod | other-node | TCP_CRR | 1s | 959µs | 2.15805ms | 7.677ms | 1.555ms | 5.425ms | 7.133ms | 461.78
--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
-------------------------------------------------------------------------------------
📋 Scenario | Node | Test | Duration | Throughput Mb/s
-------------------------------------------------------------------------------------
📋 pod-to-pod | same-node | TCP_STREAM | 1s | 631.58
📋 pod-to-pod | same-node | UDP_STREAM | 1s | 458.66
📋 pod-to-pod | other-node | TCP_STREAM | 1s | 411.43
📋 pod-to-pod | other-node | UDP_STREAM | 1s | 144.44
-------------------------------------------------------------------------------------


### ClusterMesh

Expand Down
7 changes: 4 additions & 3 deletions connectivity/check/action.go
Original file line number Diff line number Diff line change
Expand Up @@ -18,13 +18,14 @@ import (
"sync"
"time"

"github.com/cilium/cilium/api/v1/flow"
"github.com/cilium/cilium/api/v1/observer"
"github.com/cilium/cilium/api/v1/relay"
hubprinter "github.com/cilium/hubble/pkg/printer"
"google.golang.org/grpc/codes"
"google.golang.org/grpc/status"

"github.com/cilium/cilium/api/v1/flow"
"github.com/cilium/cilium/api/v1/observer"
"github.com/cilium/cilium/api/v1/relay"

"github.com/cilium/cilium-cli/connectivity/filters"
"github.com/cilium/cilium-cli/defaults"
"github.com/cilium/cilium-cli/utils/features"
Expand Down
6 changes: 4 additions & 2 deletions connectivity/check/check.go
Original file line number Diff line number Diff line change
Expand Up @@ -41,12 +41,14 @@ type Parameters struct {
Timestamp bool
PauseOnFail bool
SkipIPCacheCheck bool
// Perf is not user-facing parameter, but it's used to run perf subcommand
// using connectivity test suite.
Perf bool
PerfReportDir string
PerfDuration time.Duration
PerfCRR bool
PerfHostNet bool
PerfPodNet bool
PerfSamples int
PerfLatency bool
CurlImage string
PerformanceImage string
JSONMockImage string
Expand Down
101 changes: 52 additions & 49 deletions connectivity/check/context.go
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,7 @@ import (
"github.com/cilium/cilium/api/v1/observer"
ciliumv2 "github.com/cilium/cilium/pkg/k8s/apis/cilium.io/v2"

"github.com/cilium/cilium-cli/connectivity/perf/common"
"github.com/cilium/cilium-cli/defaults"
"github.com/cilium/cilium-cli/internal/junit"
"github.com/cilium/cilium-cli/k8s"
Expand Down Expand Up @@ -62,9 +63,9 @@ type ConnectivityTest struct {
echoExternalPods map[string]Pod
clientPods map[string]Pod
clientCPPods map[string]Pod
perfClientPods map[string]Pod
perfServerPod map[string]Pod
PerfResults map[PerfTests]PerfResult
perfClientPods []Pod
perfServerPod []Pod
PerfResults []common.PerfSummary
echoServices map[string]Service
ingressService map[string]Service
k8sService Service
Expand All @@ -87,21 +88,6 @@ type ConnectivityTest struct {
helmYAMLValues string
}

type PerfTests struct {
Pod string
Test string
}

type PerfResult struct {
Metric string
Scenario string
Duration time.Duration
Samples int
Values []float64
Avg float64
Latency map[string][]float64
}

func netIPToCIDRs(netIPs []netip.Addr) (netCIDRs []netip.Prefix) {
for _, ip := range netIPs {
found := false
Expand Down Expand Up @@ -220,9 +206,9 @@ func NewConnectivityTest(client *k8s.Client, p Parameters, version string) (*Con
echoExternalPods: make(map[string]Pod),
clientPods: make(map[string]Pod),
clientCPPods: make(map[string]Pod),
perfClientPods: make(map[string]Pod),
perfServerPod: make(map[string]Pod),
PerfResults: make(map[PerfTests]PerfResult),
perfClientPods: []Pod{},
perfServerPod: []Pod{},
PerfResults: []common.PerfSummary{},
echoServices: make(map[string]Service),
ingressService: make(map[string]Service),
externalWorkloads: make(map[string]ExternalWorkload),
Expand Down Expand Up @@ -593,34 +579,51 @@ func (ct *ConnectivityTest) report() error {
}

if ct.params.Perf {
if ct.params.PerfLatency {
// Report Performance results for latency
ct.Header("🔥 Latency Test Summary:")
ct.Logf("%s", strings.Repeat("-", 233))
ct.Logf("📋 %-15s | %-50s | %-15s | %-15s | %-15s | %-15s | %-15s | %-15s | %-15s | %-15s | %-15s", "Scenario", "Pod", "Test", "Num Samples", "Duration", "Min", "Mean", "Max", "P50", "P90", "P99")
ct.Logf("%s", strings.Repeat("-", 233))
for p, d := range ct.PerfResults {
ct.Logf("📋 %-15s | %-50s | %-15s | %-15d | %-15s | %-12.2f %s | %-12.2f %s | %-12.2f %s | %-12.2f %s | %-12.2f %s | %-12.2f %s",
d.Scenario, p.Pod, p.Test, d.Samples, d.Duration,
d.Latency["min"][0], d.Metric,
d.Latency["mean"][0], d.Metric,
d.Latency["max"][0], d.Metric,
d.Latency["p50"][0], d.Metric,
d.Latency["p90"][0], d.Metric,
d.Latency["p99"][0], d.Metric)
ct.Header("🔥 Network Performance Test Summary:")
ct.Logf("%s", strings.Repeat("-", 200))
ct.Logf("📋 %-15s | %-10s | %-15s | %-15s | %-15s | %-15s | %-15s | %-15s | %-15s | %-15s | %-15s", "Scenario", "Node", "Test", "Duration", "Min", "Mean", "Max", "P50", "P90", "P99", "Transaction rate OP/s")
ct.Logf("%s", strings.Repeat("-", 200))
nodeString := func(sameNode bool) string {
if sameNode {
return "same-node"
}
ct.Logf("%s", strings.Repeat("-", 233))
} else {
// Report Performance results for throughput
ct.Header("🔥 Performance Test Summary:")
ct.Logf("%s", strings.Repeat("-", 145))
ct.Logf("📋 %-15s | %-50s | %-15s | %-15s | %-15s | %-15s", "Scenario", "Pod", "Test", "Num Samples", "Duration", "Avg value")
ct.Logf("%s", strings.Repeat("-", 145))
for p, d := range ct.PerfResults {
ct.Logf("📋 %-15s | %-50s | %-15s | %-15d | %-15s | %.2f (%s)", d.Scenario, p.Pod, p.Test, d.Samples, d.Duration, d.Avg, d.Metric)
ct.Debugf("Individual Values from run : %f", d.Values)
return "other-node"
}
for _, result := range ct.PerfResults {
if result.Result.Latency != nil && result.Result.TransactionRateMetric != nil {
ct.Logf("📋 %-15s | %-10s | %-15s | %-15s | %-15s | %-15s | %-15s | %-15s | %-15s | %-15s | %-12.2f",
result.PerfTest.Scenario,
nodeString(result.PerfTest.SameNode),
result.PerfTest.Test,
result.PerfTest.Duration,
result.Result.Latency.Min,
result.Result.Latency.Avg,
result.Result.Latency.Max,
result.Result.Latency.Perc50,
result.Result.Latency.Perc90,
result.Result.Latency.Perc99,
result.Result.TransactionRateMetric.TransactionRate,
)
}
}
ct.Logf("%s", strings.Repeat("-", 200))
ct.Logf("%s", strings.Repeat("-", 85))
ct.Logf("📋 %-15s | %-10s | %-15s | %-15s | %-15s ", "Scenario", "Node", "Test", "Duration", "Throughput Mb/s")
ct.Logf("%s", strings.Repeat("-", 85))
for _, result := range ct.PerfResults {
if result.Result.ThroughputMetric != nil {
ct.Logf("📋 %-15s | %-10s | %-15s | %-15s | %-12.2f ",
result.PerfTest.Scenario,
nodeString(result.PerfTest.SameNode),
result.PerfTest.Test,
result.PerfTest.Duration,
result.Result.ThroughputMetric.Throughput/1000000,
)
}
ct.Logf("%s", strings.Repeat("-", 145))
}
ct.Logf("%s", strings.Repeat("-", 85))
if ct.Params().PerfReportDir != "" {
common.ExportPerfSummaries(ct.PerfResults, ct.Params().PerfReportDir)
}
}

Expand Down Expand Up @@ -1086,11 +1089,11 @@ func (ct *ConnectivityTest) SecondaryNetworkNodeIPv6() map[string]string {
return ct.secondaryNetworkNodeIPv6
}

func (ct *ConnectivityTest) PerfServerPod() map[string]Pod {
func (ct *ConnectivityTest) PerfServerPod() []Pod {
return ct.perfServerPod
}

func (ct *ConnectivityTest) PerfClientPods() map[string]Pod {
func (ct *ConnectivityTest) PerfClientPods() []Pod {
return ct.perfClientPods
}

Expand Down
Loading

0 comments on commit 4e51788

Please sign in to comment.