From 5b4af012ab64c196b9667bd59737c86c98448bbf Mon Sep 17 00:00:00 2001 From: venkataanil Date: Wed, 12 Jun 2024 13:19:36 +0530 Subject: [PATCH] Add egressip workload (#28) This workload uses egressip object with egress IP addesses. client pods use this ip addresses to communicate with external server. client validates if the packets data matches with the egressIP. README.md is updated with more details about the workload. Signed-off-by: venkataanil --- README.md | 19 ++ cmd/config/egressip/deployment-client.yml | 63 ++++++ cmd/config/egressip/egressip-obj.yml | 16 ++ cmd/config/egressip/egressip.yml | 65 ++++++ cmd/config/egressip/pod_monitor.yml | 12 ++ cmd/config/egressip/prometheus_role.yml | 13 ++ .../egressip/prometheus_role_binding.yml | 12 ++ cmd/config/metrics-egressip.yml | 185 ++++++++++++++++++ cmd/ocp.go | 1 + egressip.go | 178 +++++++++++++++++ go.mod | 3 +- go.sum | 6 +- 12 files changed, 570 insertions(+), 3 deletions(-) create mode 100644 cmd/config/egressip/deployment-client.yml create mode 100644 cmd/config/egressip/egressip-obj.yml create mode 100644 cmd/config/egressip/egressip.yml create mode 100644 cmd/config/egressip/pod_monitor.yml create mode 100644 cmd/config/egressip/prometheus_role.yml create mode 100644 cmd/config/egressip/prometheus_role_binding.yml create mode 100644 cmd/config/metrics-egressip.yml create mode 100644 egressip.go diff --git a/README.md b/README.md index 1711dcd3..c5015830 100644 --- a/README.md +++ b/README.md @@ -187,6 +187,25 @@ With the help of [networkpolicy](https://kubernetes.io/docs/concepts/services-ne - Default deny networkpolicy is applied first - Then for each unique label in a namespace we have a networkpolicy with that label as a podSelector which allows traffic from pods which *don't* have some other randomly-selected label. This translates to 10 networkpolicies/namespace +## EgressIP workloads + +This workload creates an egress IP for the client pods. SDN (OVN) will use egress IP for the traffic from client pods to external server instead of default node IP. + +Each iteration creates the following objects in each of the created namespaces: + +- 1 deployment with the configured number of client pod replicas. Client pod runs the quay.io/cloud-bulldozer/eipvalidator app which periodically sends http request to the configured "EXT_SERVER_HOST" server at an "DELAY_BETWEEN_REQ_SEC" interval with a request timeout of "REQ_TIMEOUT_SEC" seconds. Client pod then validates if the body of the response has configured "EGRESS_IPS". Once the client pod starts running and after receiving first succesful response with configured "EGRESS_IPS", it sets "eip_startup_latency_total" prometheus metric. +- 1 EgressIP object. EgressIP object is cluster scoped. EgressIP object will have number of egress IP addresses which user specified through "addresses-per-iteration" cli option. kube-burner generates these addresses for the egressIP object from the egress IP list provided by kube-burner-ocp. OVN applies egressIPs to the pods in the current job iteration because of "namespaceSelector" and "podSelector" fields in the egressIP object. + +Note: User has to manually create the external server or use the e2e-benchmarking(https://github.com/cloud-bulldozer/e2e-benchmarking/tree/master/workloads/kube-burner-ocp-wrapper#egressip) which deploys external server and runs the workload with required configuration. + +Running 1 iteration with 1 egress IP address per iteration (or egressIP object). + +```console +kube-burner-ocp egressip --addresses-per-iteration=1 --iterations=1 --external-server-ip=10.0.34.43 +``` + +With the command above, each namespace has one pod with a dedicated egress IP. OVN will use this dedicated egress IP for the http requests from client pod's to 10.0.34.43. + ## Web-burner workloads This workload is meant to emulate some telco specific workloads. Before running *web-burner-node-density* or *web-burner-cluster-density* load the environment with *web-burner-init* first (without the garbage collection flag: `--gc=false`). diff --git a/cmd/config/egressip/deployment-client.yml b/cmd/config/egressip/deployment-client.yml new file mode 100644 index 00000000..db83e6af --- /dev/null +++ b/cmd/config/egressip/deployment-client.yml @@ -0,0 +1,63 @@ +kind: Deployment +apiVersion: apps/v1 +metadata: + name: client-{{.Replica}}-{{.Iteration}} +spec: + replicas: {{.podReplicas}} + selector: + matchLabels: + name: client-{{.Replica}}-{{.Iteration}} + template: + metadata: + labels: + name: client-{{.Replica}}-{{.Iteration}} + app: client + spec: + topologySpreadConstraints: + - maxSkew: 1 + topologyKey: kubernetes.io/hostname + whenUnsatisfiable: ScheduleAnyway + labelSelector: + matchLabels: + app: client + affinity: + nodeAffinity: + requiredDuringSchedulingIgnoredDuringExecution: + nodeSelectorTerms: + - matchExpressions: + - key: node-role.kubernetes.io/worker + operator: Exists + - key: node-role.kubernetes.io/infra + operator: DoesNotExist + - key: node-role.kubernetes.io/workload + operator: DoesNotExist + containers: + - name: client-app + image: quay.io/cloud-bulldozer/eipvalidator:latest + resources: + requests: + memory: "10Mi" + cpu: "10m" + ports: + - containerPort: 8080 + name: metrics + env: + - name: EXT_SERVER_HOST + value: "{{.extServerHost}}" + - name: EXT_SERVER_PORT + value: "{{ add 9002 (mod .Iteration 60) }}" + - name: EGRESS_IPS + {{- $eips := (splitList " " (GetIPAddress .eipAddresses .Iteration .addrPerIteration) | join ",") }} + value: "{{$eips}}" + - name: DELAY_BETWEEN_REQ_SEC + value: "1" + - name: REQ_TIMEOUT_SEC + value: "3" + imagePullPolicy: Always + securityContext: + privileged: false + volumeMounts: + restartPolicy: Always + strategy: + type: RollingUpdate + diff --git a/cmd/config/egressip/egressip-obj.yml b/cmd/config/egressip/egressip-obj.yml new file mode 100644 index 00000000..babd6a7a --- /dev/null +++ b/cmd/config/egressip/egressip-obj.yml @@ -0,0 +1,16 @@ +--- +apiVersion: k8s.ovn.org/v1 +kind: EgressIP +metadata: + name: egressip-obj-{{.Iteration}} +spec: + egressIPs: + {{range (splitList " " (GetIPAddress .eipAddresses .Iteration .addrPerIteration))}} + - {{.}} + {{end}} + namespaceSelector: + matchLabels: + kubernetes.io/metadata.name: egressip-{{.Iteration}} + podSelector: + matchLabels: + app: client diff --git a/cmd/config/egressip/egressip.yml b/cmd/config/egressip/egressip.yml new file mode 100644 index 00000000..3e21a11a --- /dev/null +++ b/cmd/config/egressip/egressip.yml @@ -0,0 +1,65 @@ +--- +global: + gc: {{.GC}} + gcMetrics: {{.GC_METRICS}} + measurements: + - name: podLatency + thresholds: + - conditionType: Ready + metric: P99 + threshold: 15s +metricsEndpoints: +{{ if .ES_SERVER }} + - indexer: + esServers: ["{{.ES_SERVER}}"] + insecureSkipVerify: true + defaultIndex: {{.ES_INDEX}} + type: opensearch +{{ end }} +{{ if eq .LOCAL_INDEXING "true" }} + - indexer: + type: local + metricsDirectory: collected-metrics-{{.UUID}} +{{ end }} + +jobs: + - name: egressip + namespace: egressip + jobIterations: {{.JOB_ITERATIONS}} + qps: {{.QPS}} + burst: {{.BURST}} + namespacedIterations: true + podWait: false + waitWhenFinished: true + preLoadImages: false + preLoadPeriod: 15s + namespaceLabels: + security.openshift.io/scc.podSecurityLabelSync: false + pod-security.kubernetes.io/enforce: privileged + pod-security.kubernetes.io/audit: privileged + pod-security.kubernetes.io/warn: privileged + openshift.io/cluster-monitoring: true + objects: + + - objectTemplate: prometheus_role.yml + replicas: 1 + + - objectTemplate: prometheus_role_binding.yml + replicas: 1 + + - objectTemplate: pod_monitor.yml + replicas: 1 + + - objectTemplate: egressip-obj.yml + replicas: 1 + inputVars: + eipAddresses: {{.EIP_ADDRESSES}} + addrPerIteration: {{.ADDRESSES_PER_ITERATION}} + + - objectTemplate: deployment-client.yml + replicas: 1 + inputVars: + podReplicas: 2 + eipAddresses: {{.EIP_ADDRESSES}} + addrPerIteration: {{.ADDRESSES_PER_ITERATION}} + extServerHost: {{.EXTERNAL_SERVER_IP}} diff --git a/cmd/config/egressip/pod_monitor.yml b/cmd/config/egressip/pod_monitor.yml new file mode 100644 index 00000000..a93635d0 --- /dev/null +++ b/cmd/config/egressip/pod_monitor.yml @@ -0,0 +1,12 @@ +apiVersion: monitoring.coreos.com/v1 +kind: PodMonitor +metadata: + name: pod-monitor-{{.Replica}} +spec: + selector: + matchLabels: + app: client + podMetricsEndpoints: + - port: metrics + interval: 15s + scheme: http diff --git a/cmd/config/egressip/prometheus_role.yml b/cmd/config/egressip/prometheus_role.yml new file mode 100644 index 00000000..a7299382 --- /dev/null +++ b/cmd/config/egressip/prometheus_role.yml @@ -0,0 +1,13 @@ +apiVersion: rbac.authorization.k8s.io/v1 +kind: Role +metadata: + name: prometheus-k8s +rules: + - apiGroups: + - "" + resources: + - pods + verbs: + - get + - list + - watch diff --git a/cmd/config/egressip/prometheus_role_binding.yml b/cmd/config/egressip/prometheus_role_binding.yml new file mode 100644 index 00000000..27787397 --- /dev/null +++ b/cmd/config/egressip/prometheus_role_binding.yml @@ -0,0 +1,12 @@ +apiVersion: rbac.authorization.k8s.io/v1 +kind: RoleBinding +metadata: + name: prometheus-k8s +roleRef: + apiGroup: rbac.authorization.k8s.io + kind: Role + name: prometheus-k8s +subjects: + - kind: ServiceAccount + name: prometheus-k8s + namespace: openshift-monitoring diff --git a/cmd/config/metrics-egressip.yml b/cmd/config/metrics-egressip.yml new file mode 100644 index 00000000..4e149292 --- /dev/null +++ b/cmd/config/metrics-egressip.yml @@ -0,0 +1,185 @@ +# EgressIP App metrics +- query: scale_eip_startup_latency_total + metricName: eipStartupLatencyTotal + instant: true + +- query: scale_eip_recovery_latency>0 + metricName: eipRecoveryLatencyTotal + instant: true + +- query: scale_startup_non_eip_total{}>0 + metricName: startupEipNodeIPReqCount + instant: true + +# API server +- query: irate(apiserver_request_total{verb="POST", resource="pods", subresource="binding",code="201"}[2m]) > 0 + metricName: schedulingThroughput + +- query: histogram_quantile(0.99, sum(irate(apiserver_request_duration_seconds_bucket{apiserver="kube-apiserver", verb=~"LIST|GET", subresource!~"log|exec|portforward|attach|proxy"}[2m])) by (le, resource, verb, scope)) > 0 + metricName: readOnlyAPICallsLatency + +- query: histogram_quantile(0.99, sum(irate(apiserver_request_duration_seconds_bucket{apiserver="kube-apiserver", verb=~"POST|PUT|DELETE|PATCH", subresource!~"log|exec|portforward|attach|proxy"}[2m])) by (le, resource, verb, scope)) > 0 + metricName: mutatingAPICallsLatency + +- query: sum(irate(apiserver_request_total{apiserver="kube-apiserver",verb!="WATCH"}[2m])) by (verb,resource,code) > 0 + metricName: APIRequestRate + +# Kubeproxy and OVN service sync latency + +- query: histogram_quantile(0.99, sum(rate(kubeproxy_network_programming_duration_seconds_bucket[2m])) by (le)) > 0 + metricName: serviceSyncLatency + +- query: histogram_quantile(0.99, sum(rate(ovnkube_master_network_programming_duration_seconds_bucket{kind="service"}[2m])) by (le)) + metricName: serviceSyncLatency + +# Containers & pod metrics + +- query: (sum(irate(container_cpu_usage_seconds_total{name!="",container!="POD",namespace=~"openshift-(etcd|oauth-apiserver|sdn|ovn-kubernetes|network-node-identity|multus|.*apiserver|authentication|.*controller-manager|.*scheduler|image-registry|operator-lifecycle-manager)|cilium|stackrox|calico.*|tigera.*"}[2m]) * 100) by (container, pod, namespace, node) and on (node) kube_node_role{role="master"}) > 0 + metricName: containerCPU-Masters + +- query: (avg(irate(container_cpu_usage_seconds_total{name!="",container!="POD",namespace=~"openshift-(sdn|ovn-kubernetes|multus|ingress)|cilium|stackrox|calico.*|tigera.*"}[2m]) * 100 and on (node) kube_node_role{role="worker"}) by (namespace, container)) > 0 + metricName: containerCPU-AggregatedWorkers + +- query: (sum(irate(container_cpu_usage_seconds_total{name!="",container!="POD",namespace=~"openshift-(monitoring|sdn|ovn-kubernetes|multus|ingress)|stackrox"}[2m]) * 100) by (container, pod, namespace, node) and on (node) kube_node_role{role="infra"}) > 0 + metricName: containerCPU-Infra + +- query: (sum(container_memory_rss{name!="",container!="POD",namespace=~"openshift-(etcd|oauth-apiserver|.*apiserver|ovn-kubernetes|network-node-identity|sdn|multus|ingress|authentication|.*controller-manager|.*scheduler|image-registry|operator-lifecycle-manager)|cilium|stackrox|calico.*|tigera.*"}) by (container, pod, namespace, node) and on (node) kube_node_role{role="master"}) > 0 + metricName: containerMemory-Masters + +- query: avg(container_memory_rss{name!="",container!="POD",namespace=~"openshift-(sdn|ovn-kubernetes|multus|ingress)|cilium|stackrox|calico.*|tigera.*"} and on (node) kube_node_role{role="worker"}) by (container, namespace) + metricName: containerMemory-AggregatedWorkers + +- query: (sum(container_memory_rss{name!="",container!="POD",namespace=~"openshift-(sdn|ovn-kubernetes|multus|ingress|monitoring|image-registry)|cilium|stackrox|calico.*|tigera.*"}) by (container, pod, namespace, node) and on (node) kube_node_role{role="infra"}) > 0 + metricName: containerMemory-Infra + +# Node metrics: CPU & Memory + +- query: (sum(irate(node_cpu_seconds_total[2m])) by (mode,instance) and on (instance) label_replace(kube_node_role{role="master"}, "instance", "$1", "node", "(.+)")) > 0 + metricName: nodeCPU-Masters + +- query: (avg((sum(irate(node_cpu_seconds_total[2m])) by (mode,instance) and on (instance) label_replace(kube_node_role{role="worker"}, "instance", "$1", "node", "(.+)"))) by (mode)) > 0 + metricName: nodeCPU-AggregatedWorkers + +- query: (sum(irate(node_cpu_seconds_total[2m])) by (mode,instance) and on (instance) label_replace(kube_node_role{role="infra"}, "instance", "$1", "node", "(.+)")) > 0 + metricName: nodeCPU-Infra + +# We compute memory utilization by substrating available memory to the total +# +- query: avg((node_memory_MemTotal_bytes - node_memory_MemAvailable_bytes) and on (instance) label_replace(kube_node_role{role="worker"}, "instance", "$1", "node", "(.+)")) + metricName: nodeMemoryUtilization-AggregatedWorkers + +- query: (node_memory_MemTotal_bytes - node_memory_MemAvailable_bytes) and on (instance) label_replace(kube_node_role{role="master"}, "instance", "$1", "node", "(.+)") + metricName: nodeMemoryUtilization-Masters + +- query: (node_memory_MemTotal_bytes - node_memory_MemAvailable_bytes) and on (instance) label_replace(kube_node_role{role="infra"}, "instance", "$1", "node", "(.+)") + metricName: nodeMemoryUtilization-Infra + +# Kubelet & CRI-O runtime metrics + +- query: irate(process_cpu_seconds_total{service="kubelet",job="kubelet"}[2m]) * 100 and on (node) topk(3,avg_over_time(irate(process_cpu_seconds_total{service="kubelet",job="kubelet"}[2m])[{{ .elapsed }}:]) and on (node) kube_node_role{role="worker"}) + metricName: kubeletCPU + +- query: process_resident_memory_bytes{service="kubelet",job="kubelet"} and on (node) topk(3,max_over_time(irate(process_resident_memory_bytes{service="kubelet",job="kubelet"}[2m])[{{ .elapsed }}:]) and on (node) kube_node_role{role="worker"}) + metricName: kubeletMemory + +- query: irate(process_cpu_seconds_total{service="kubelet",job="crio"}[2m]) * 100 and on (node) topk(3,avg_over_time(irate(process_cpu_seconds_total{service="kubelet",job="crio"}[2m])[{{ .elapsed }}:]) and on (node) kube_node_role{role="worker"}) + metricName: crioCPU + +- query: process_resident_memory_bytes{service="kubelet",job="crio"} and on (node) topk(3,max_over_time(irate(process_resident_memory_bytes{service="kubelet",job="crio"}[2m])[{{ .elapsed }}:]) and on (node) kube_node_role{role="worker"}) + metricName: crioMemory + +# Etcd metrics + +- query: sum(rate(etcd_server_leader_changes_seen_total[2m])) + metricName: etcdLeaderChangesRate + +- query: histogram_quantile(0.99, rate(etcd_disk_backend_commit_duration_seconds_bucket[2m])) + metricName: 99thEtcdDiskBackendCommitDurationSeconds + +- query: histogram_quantile(0.99, rate(etcd_disk_wal_fsync_duration_seconds_bucket[2m])) + metricName: 99thEtcdDiskWalFsyncDurationSeconds + +- query: histogram_quantile(0.99, rate(etcd_network_peer_round_trip_time_seconds_bucket[5m])) + metricName: 99thEtcdRoundTripTimeSeconds + +- query: sum by (cluster_version)(etcd_cluster_version) + metricName: etcdVersion + instant: true + +# Cluster metrics + +- query: sum(kube_namespace_status_phase) by (phase) > 0 + metricName: namespaceCount + +- query: sum(kube_pod_status_phase{}) by (phase) + metricName: podStatusCount + +- query: count(kube_secret_info{}) + metricName: secretCount + instant: true + +- query: count(kube_deployment_labels{}) + metricName: deploymentCount + instant: true + +- query: count(kube_configmap_info{}) + metricName: configmapCount + instant: true + +- query: count(kube_service_info{}) + metricName: serviceCount + instant: true + +- query: count(openshift_route_created{}) + metricName: routeCount + instant: true + +- query: kube_node_role + metricName: nodeRoles + +- query: sum(kube_node_status_condition{status="true"}) by (condition) + metricName: nodeStatus + +- query: count(kube_replicaset_labels{}) + metricName: replicaSetCount + instant: true + +- query: count(kube_pod_info{} AND ON (pod) kube_pod_status_phase{phase="Running"}==1) by (node) + metricName: podDistribution + +# Prometheus metrics + +- query: openshift:prometheus_tsdb_head_series:sum{job="prometheus-k8s"} + metricName: prometheus-timeseriestotal + +- query: openshift:prometheus_tsdb_head_samples_appended_total:sum{job="prometheus-k8s"} + metricName: prometheus-ingestionrate + +# Retain the raw CPU seconds totals for comparison +- query: sum( node_cpu_seconds_total and on (instance) label_replace(kube_node_role{role="worker",role!="infra"}, "instance", "$1", "node", "(.+)") ) by (mode) + metricName: nodeCPUSeconds-Workers + instant: true + +- query: sum( node_cpu_seconds_total and on (instance) label_replace(kube_node_role{role="master"}, "instance", "$1", "node", "(.+)") ) by (mode) + metricName: nodeCPUSeconds-Masters + instant: true + +- query: sum( node_cpu_seconds_total and on (instance) label_replace(kube_node_role{role="infra"}, "instance", "$1", "node", "(.+)") ) by (mode) + metricName: nodeCPUSeconds-Infra + instant: true + +- query: sum ( container_cpu_usage_seconds_total { id =~ "/system.slice|/system.slice/kubelet.service|/system.slice/ovs-vswitchd.service|/system.slice/crio.service|/kubepods.slice" } and on (node) kube_node_role{ role = "worker",role != "infra" } ) by ( id ) + metricName: cgroupCPUSeconds-Workers + instant: true + +- query: sum ( container_cpu_usage_seconds_total { id =~ "/system.slice|/system.slice/kubelet.service|/system.slice/ovs-vswitchd.service|/system.slice/crio.service|/kubepods.slice" } and on (node) kube_node_role{ role = "master" } ) by ( id ) + metricName: cgroupCPUSeconds-Masters + instant: true + +- query: sum ( container_cpu_usage_seconds_total { id =~ "/system.slice|/system.slice/kubelet.service|/system.slice/ovs-vswitchd.service|/system.slice/crio.service|/kubepods.slice" } and on (node) kube_node_role{ role = "infra" } ) by ( id ) + metricName: cgroupCPUSeconds-Infra + instant: true + +- query: sum( container_cpu_usage_seconds_total{container!~"POD|",namespace=~"openshift-.*"} ) by (namespace) + metricName: cgroupCPUSeconds-namespaces + instant: true diff --git a/cmd/ocp.go b/cmd/ocp.go index 4cb38ddd..1d094c0d 100644 --- a/cmd/ocp.go +++ b/cmd/ocp.go @@ -113,6 +113,7 @@ func openShiftCmd() *cobra.Command { ocp.NewWebBurner(&wh, "web-burner-init"), ocp.NewWebBurner(&wh, "web-burner-node-density"), ocp.NewWebBurner(&wh, "web-burner-cluster-density"), + ocp.NewEgressIP(&wh, "egressip"), ocp.ClusterHealth(), ocp.CustomWorkload(&wh), ) diff --git a/egressip.go b/egressip.go new file mode 100644 index 00000000..110599d2 --- /dev/null +++ b/egressip.go @@ -0,0 +1,178 @@ +// Copyright 2022 The Kube-burner Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package ocp + +import ( + "fmt" + "net" + "context" + "os" + "encoding/json" + "strings" + "time" + + "github.com/kube-burner/kube-burner/pkg/config" + "github.com/kube-burner/kube-burner/pkg/workloads" + "github.com/praserx/ipconv" + log "github.com/sirupsen/logrus" + "github.com/spf13/cobra" + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" +) + +// get egress IP cidr, node IPs from worker node annotations +func getEgressIPCidrNodeIPs() ([]string, string) { + kubeClientProvider := config.NewKubeClientProvider("", "") + clientSet, _:= kubeClientProvider.ClientSet(0, 0) + workers, err := clientSet.CoreV1().Nodes().List(context.Background(), metav1.ListOptions{}) + if err != nil { + log.Errorf("Error retrieving workers: %v", err) + os.Exit(1) + } + + nodeIPs := []string{} + var egressIPCidr string + for _, worker := range workers.Items { + nodeIPs = append(nodeIPs, worker.Status.Addresses[0].Address) + // Add gateway ip to nodeIPs to get excluded while creating egress ip list + gwconfig, exist := worker.ObjectMeta.Annotations["k8s.ovn.org/l3-gateway-config"] + if exist { + var item map[string]interface{} + json.Unmarshal([]byte(gwconfig), &item) + defaultgw := item["default"].(map[string]interface{}) + nodeIPs = append(nodeIPs, defaultgw["next-hop"].(string)) + } + // For cloud based OCP deployedments, egress IP cidr is added as part of cloud.network.openshift.io/egress-ipconfig annotation + // For baremetal, read the cidr from k8s.ovn.org/node-primary-ifaddr + if egressIPCidr == "" { + eipconfig, exist := worker.ObjectMeta.Annotations["cloud.network.openshift.io/egress-ipconfig"] + if exist { + var items []map[string]interface{} + json.Unmarshal([]byte(eipconfig), &items) + ifaddr := items[0]["ifaddr"].(map[string]interface{}) + egressIPCidr = ifaddr["ipv4"].(string) + } else { + nodeAddr, exist := worker.ObjectMeta.Annotations["k8s.ovn.org/node-primary-ifaddr"] + if exist { + var ifaddr map[string]interface{} + json.Unmarshal([]byte(nodeAddr), &ifaddr) + egressIPCidr = ifaddr["ipv4"].(string) + } + } + } + } + return nodeIPs, egressIPCidr +} + +// This function returns first usable address from the cidr +// for example, if cidr is 10.0.132.49/19, first usable address is 10.0.128.1 +func getFirstUsableAddr(cidr string) uint32 { + // Parse the IP address and subnet mask + ip, ipNet, err := net.ParseCIDR(cidr) + if err != nil { + fmt.Println("Error parsing CIDR notation:", err) + os.Exit(1) + } + + // Get the network address by performing a bitwise AND + ipBytes := ip.To4() + networkBytes := make([]byte, 4) + for i := 0; i < 4; i++ { + networkBytes[i] = ipBytes[i] & ipNet.Mask[i] + } + + // Calculate the first usable IP address by skipping first 4 addresses. + // For example, OVN didn't assign eip to node when eip was in between 10.0.0.0 and 10.0.0.3 for cidr 10.0.0.0/19 + firstUsableIP := make(net.IP, len(networkBytes)) + copy(firstUsableIP, networkBytes) + firstUsableIP[3] += 4 // Increment the last byte by 1 for the first usable IP address + + // Output the network address and the first usable IP address in CIDR notation + baseAddrInt, err := ipconv.IPv4ToInt(firstUsableIP) + if err != nil { + log.Fatal("Error converting IP to int: ", err) + os.Exit(1) + } + return baseAddrInt +} + +// egress IPs and node IPs will be in same cidr. So we need to exclude node IPs from CIDR to generate list of avaialble egress IPs. +func generateEgressIPs(numJobIterations int, addressesPerIteration int, externalServerIP string) { + + nodeIPs, egressIPCidr := getEgressIPCidrNodeIPs() + // Add external server ip to nodeIPs to get excluded while creating egress ip list + nodeIPs = append(nodeIPs, externalServerIP) + baseAddrInt := getFirstUsableAddr(egressIPCidr) + // list to host avaialble egress IPs + addrSlice := make([]string, 0, (numJobIterations * addressesPerIteration)) + + // map to store nodeIPs + nodeMap := make(map[uint32]bool) + for _, nodeip := range nodeIPs { + nodeipuint32, err := ipconv.IPv4ToInt(net.ParseIP(nodeip)) + if err != nil { + log.Fatal("Error: ", err) + os.Exit(1) + } + nodeMap[nodeipuint32] = true + } + + // Generate ip addresses from CIDR by excluding nodeIPs + // Extra iterations needed in for loop if we come across node IPs while generating egress IP list + var newAddr uint32 + for i := 0; i < ((numJobIterations * addressesPerIteration) + len(nodeIPs) ); i++ { + newAddr = baseAddrInt + uint32(i) + if !nodeMap[newAddr] { + addrSlice = append(addrSlice, ipconv.IntToIPv4(newAddr).String()) + } + // break if we already got needed egress IPs + if len(addrSlice) >= (numJobIterations * addressesPerIteration) { + break + } + } + + // combine all addresses to a string and export as an environment variable + os.Setenv("EIP_ADDRESSES", strings.Join(addrSlice, " ")) +} + + +// NewClusterDensity holds cluster-density workload +func NewEgressIP(wh *workloads.WorkloadHelper, variant string) *cobra.Command { + var iterations, addressesPerIteration int + var externalServerIP string + var podReadyThreshold time.Duration + cmd := &cobra.Command{ + Use: variant, + Short: fmt.Sprintf("Runs %v workload", variant), + PreRun: func(cmd *cobra.Command, args []string) { + wh.Metadata.Benchmark = cmd.Name() + os.Setenv("JOB_ITERATIONS", fmt.Sprint(iterations)) + os.Setenv("POD_READY_THRESHOLD", fmt.Sprintf("%v", podReadyThreshold)) + os.Setenv("ADDRESSES_PER_ITERATION", fmt.Sprint(addressesPerIteration)) + os.Setenv("EXTERNAL_SERVER_IP", externalServerIP) + generateEgressIPs(iterations, addressesPerIteration, externalServerIP) + }, + Run: func(cmd *cobra.Command, args []string) { + setMetrics(cmd, "metrics-egressip.yml") + wh.Run(cmd.Name()) + }, + } + cmd.Flags().DurationVar(&podReadyThreshold, "pod-ready-threshold", 2*time.Minute, "Pod ready timeout threshold") + cmd.Flags().IntVar(&iterations, "iterations", 0, fmt.Sprintf("%v iterations", variant)) + cmd.Flags().StringVar(&externalServerIP, "external-server-ip", "", "External server IP address") + cmd.Flags().IntVar(&addressesPerIteration, "addresses-per-iteration", 1, fmt.Sprintf("%v iterations", variant)) + cmd.MarkFlagRequired("iterations") + cmd.MarkFlagRequired("external-server-ip") + return cmd +} diff --git a/go.mod b/go.mod index 19eebc4c..9607d130 100644 --- a/go.mod +++ b/go.mod @@ -7,7 +7,7 @@ toolchain go1.22.3 require ( github.com/cloud-bulldozer/go-commons v1.0.15 github.com/google/uuid v1.6.0 - github.com/kube-burner/kube-burner v1.9.7 + github.com/kube-burner/kube-burner v1.9.8 github.com/openshift/api v0.0.0-20240516090725-a20192e21ba6 github.com/openshift/client-go v0.0.0-20240510131258-f646d5f29250 github.com/sirupsen/logrus v1.9.3 @@ -50,6 +50,7 @@ require ( github.com/opensearch-project/opensearch-go v1.1.0 // indirect github.com/openshift/custom-resource-status v1.1.2 // indirect github.com/pborman/uuid v1.2.0 // indirect + github.com/praserx/ipconv v1.2.1 // indirect github.com/prometheus/client_golang v1.19.0 // indirect github.com/prometheus/client_model v0.6.0 // indirect github.com/prometheus/common v0.52.3 // indirect diff --git a/go.sum b/go.sum index 67694071..0957765a 100644 --- a/go.sum +++ b/go.sum @@ -354,8 +354,8 @@ github.com/kr/pty v1.1.1/go.mod h1:pFQYn66WHrOpPYNljwOMqo10TkYh1fy3cYio2l3bCsQ= github.com/kr/text v0.1.0/go.mod h1:4Jbv+DJW3UT/LiOwJeYQe1efqtUx/iVham/4vfdArNI= github.com/kr/text v0.2.0 h1:5Nx0Ya0ZqY2ygV366QzturHI13Jq95ApcVaJBhpS+AY= github.com/kr/text v0.2.0/go.mod h1:eLer722TekiGuMkidMxC/pM04lWEeraHUUmBw8l2grE= -github.com/kube-burner/kube-burner v1.9.7 h1:6VEtblNLTZLd1vXhBPPJh9Up0SZ3pNAv8Bo9TWAneyQ= -github.com/kube-burner/kube-burner v1.9.7/go.mod h1:44exUdvnBXNbjfNn6hhVbnfFWcPtHiC171giwIL2uXY= +github.com/kube-burner/kube-burner v1.9.8 h1:sFiVWNXppdVdJtNb2TZbUPoYbAueqSXSxUeI3/k0ySI= +github.com/kube-burner/kube-burner v1.9.8/go.mod h1:44exUdvnBXNbjfNn6hhVbnfFWcPtHiC171giwIL2uXY= github.com/magiconair/properties v1.8.1/go.mod h1:PppfXfuXeibc/6YijjN8zIbojt8czPbwD3XqdrwzmxQ= github.com/magiconair/properties v1.8.5/go.mod h1:y3VJvCyxH9uVvJTWEGAELF3aiYNyPKd5NZ3oSwXrF60= github.com/mailru/easyjson v0.0.0-20190614124828-94de47d64c63/go.mod h1:C1wdFJiN94OJF2b5HbByQZoLdCWB1Yqtg26g4irojpc= @@ -446,6 +446,8 @@ github.com/pmezard/go-difflib v1.0.0 h1:4DBwDE0NGyQoBHbLQYPwSUPoCMWR5BEzIk/f1lZb github.com/pmezard/go-difflib v1.0.0/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4= github.com/posener/complete v1.1.1/go.mod h1:em0nMJCgc9GFtwrmVmEMR/ZL6WyhyjMBndrE9hABlRI= github.com/pquerna/cachecontrol v0.0.0-20171018203845-0dec1b30a021/go.mod h1:prYjPmNq4d1NPVmpShWobRqXY3q7Vp+80DqgxxUrUIA= +github.com/praserx/ipconv v1.2.1 h1:MWGfrF+OZ0pqIuTlNlMgvJDDbohC3h751oN1+Ov3x4k= +github.com/praserx/ipconv v1.2.1/go.mod h1:DSy+AKre/e3w/npsmUDMio+OR/a2rvmMdI7rerOIgqI= github.com/prometheus/client_golang v0.9.1/go.mod h1:7SWBe2y4D6OKWSNQJUaRYU/AaXPKyh/dDVn+NZz0KFw= github.com/prometheus/client_golang v0.9.3/go.mod h1:/TN21ttK/J9q6uSwhBd54HahCDft0ttaMvbicHlPoso= github.com/prometheus/client_golang v1.0.0/go.mod h1:db9x61etRT2tGnBNRi70OPL5FsnadC4Ky3P0J6CfImo=