Skip to content

Commit

Permalink
connectivity: add retry to external targets
Browse files Browse the repository at this point in the history
This commit adds --retry and --retry-delay
flags to connectivity test. They are used in external
pod-to-world and po-to-cidr tests to try to reconnect
on failures thus making flaky tests less likely.

Fixes comments of PodToWorld and PodToCIDR functions.

Signed-off-by: Birol Bilgin <[email protected]>
  • Loading branch information
brlbil committed Apr 28, 2023
1 parent 4b91b5f commit 2592105
Show file tree
Hide file tree
Showing 7 changed files with 130 additions and 24 deletions.
3 changes: 3 additions & 0 deletions connectivity/check/check.go
Original file line number Diff line number Diff line change
Expand Up @@ -64,6 +64,9 @@ type Parameters struct {

DeleteCiliumOnNodes []string

Retry uint
RetryDelay time.Duration

ConnectTimeout time.Duration
RequestTimeout time.Duration

Expand Down
22 changes: 11 additions & 11 deletions connectivity/suite.go
Original file line number Diff line number Diff line change
Expand Up @@ -216,10 +216,10 @@ func Run(ctx context.Context, ct *check.ConnectivityTest) error {
tests.ClientToClient(),
tests.PodToService(),
tests.PodToHostPort(),
tests.PodToWorld(),
tests.PodToWorld(tests.WithRetryAll()),
tests.PodToHost(),
tests.PodToExternalWorkload(),
tests.PodToCIDR(),
tests.PodToCIDR(tests.WithRetryAll()),
}
if s, ok := ct.Feature(check.FeatureNodeWithoutCilium); ok && s.Enabled {
noPoliciesScenarios = append(noPoliciesScenarios, tests.FromCIDRToPod())
Expand Down Expand Up @@ -286,7 +286,7 @@ func Run(ctx context.Context, ct *check.ConnectivityTest) error {
// 2. Egress to world works because there is no egress policy (so egress traffic originating from a pod is allowed),
// then when replies come back, they are considered as "replies" to the outbound connection.
// so they are not subject to ingress policy.
allIngressDenyScenarios := []check.Scenario{tests.PodToPod(), tests.PodToCIDR()}
allIngressDenyScenarios := []check.Scenario{tests.PodToPod(), tests.PodToCIDR(tests.WithRetryAll())}
if s, ok := ct.Feature(check.FeatureNodeWithoutCilium); ok && s.Enabled {
allIngressDenyScenarios = append(allIngressDenyScenarios, tests.FromCIDRToPod())
}
Expand All @@ -309,7 +309,7 @@ func Run(ctx context.Context, ct *check.ConnectivityTest) error {
// Egress to world works because there is no egress policy (so egress traffic originating from a pod is allowed),
// then when replies come back, they are considered as "replies" to the outbound connection.
// so they are not subject to ingress policy.
tests.PodToCIDR(),
tests.PodToCIDR(tests.WithRetryAll()),
).
WithExpectations(func(a *check.Action) (egress, ingress check.Result) {
if a.Destination().Address(check.GetIPFamily(ct.Params().ExternalOtherIP)) == ct.Params().ExternalOtherIP ||
Expand Down Expand Up @@ -455,7 +455,7 @@ func Run(ctx context.Context, ct *check.ConnectivityTest) error {
// This policy allows UDP to kube-dns and port 80 TCP to all 'world' endpoints.
ct.NewTest("to-entities-world").WithCiliumPolicy(clientEgressToEntitiesWorldPolicyYAML).
WithScenarios(
tests.PodToWorld(),
tests.PodToWorld(tests.WithRetryDestPort(80)),
).
WithExpectations(func(a *check.Action) (egress, ingress check.Result) {
if a.Destination().Port() == 80 {
Expand All @@ -470,7 +470,7 @@ func Run(ctx context.Context, ct *check.ConnectivityTest) error {
ct.NewTest("to-cidr-external").
WithCiliumPolicy(renderedTemplates["clientEgressToCIDRExternalPolicyYAML"]).
WithScenarios(
tests.PodToCIDR(),
tests.PodToCIDR(tests.WithRetryDestIP(ct.Params().ExternalIP)),
).
WithExpectations(func(a *check.Action) (egress, ingress check.Result) {
if a.Destination().Address(check.IPFamilyV4) == ct.Params().ExternalOtherIP {
Expand All @@ -485,7 +485,7 @@ func Run(ctx context.Context, ct *check.ConnectivityTest) error {
ct.NewTest("to-cidr-external-knp").
WithK8SPolicy(renderedTemplates["clientEgressToCIDRExternalPolicyKNPYAML"]).
WithScenarios(
tests.PodToCIDR(),
tests.PodToCIDR(tests.WithRetryDestIP(ct.Params().ExternalIP)),
).
WithExpectations(func(a *check.Action) (egress, ingress check.Result) {
if a.Destination().Address(check.IPFamilyV4) == ct.Params().ExternalOtherIP {
Expand Down Expand Up @@ -616,7 +616,7 @@ func Run(ctx context.Context, ct *check.ConnectivityTest) error {
WithCiliumPolicy(allowAllEgressPolicyYAML). // Allow all egress traffic
WithCiliumPolicy(renderedTemplates["clientEgressToCIDRExternalDenyPolicyYAML"]).
WithScenarios(
tests.PodToCIDR(), // Denies all traffic to ExternalOtherIP, but allow ExternalIP
tests.PodToCIDR(tests.WithRetryDestIP(ct.Params().ExternalIP)), // Denies all traffic to ExternalOtherIP, but allow ExternalIP
).
WithExpectations(func(a *check.Action) (egress, ingress check.Result) {
if a.Destination().Address(check.GetIPFamily(ct.Params().ExternalOtherIP)) == ct.Params().ExternalOtherIP {
Expand Down Expand Up @@ -733,7 +733,7 @@ func Run(ctx context.Context, ct *check.ConnectivityTest) error {
WithCiliumPolicy(renderedTemplates["clientEgressL7HTTPPolicyYAML"]). // L7 allow policy with HTTP introspection
WithScenarios(
tests.PodToPod(),
tests.PodToWorld(),
tests.PodToWorld(tests.WithRetryDestPort(80), tests.WithRetryPodLabel("other", "client")),
).
WithExpectations(func(a *check.Action) (egress, ingress check.Result) {
if a.Source().HasLabel("other", "client") && // Only client2 is allowed to make HTTP calls.
Expand Down Expand Up @@ -761,7 +761,7 @@ func Run(ctx context.Context, ct *check.ConnectivityTest) error {
WithCiliumPolicy(renderedTemplates["clientEgressL7HTTPNamedPortPolicyYAML"]). // L7 allow policy with HTTP introspection (named port)
WithScenarios(
tests.PodToPod(),
tests.PodToWorld(),
tests.PodToWorld(tests.WithRetryDestPort(80), tests.WithRetryPodLabel("other", "client")),
).
WithExpectations(func(a *check.Action) (egress, ingress check.Result) {
if a.Source().HasLabel("other", "client") && // Only client2 is allowed to make HTTP calls.
Expand Down Expand Up @@ -897,7 +897,7 @@ func Run(ctx context.Context, ct *check.ConnectivityTest) error {
ct.NewTest("to-fqdns").WithCiliumPolicy(renderedTemplates["clientEgressToFQDNsCiliumIOPolicyYAML"]).
WithFeatureRequirements(check.RequireFeatureEnabled(check.FeatureL7Proxy)).
WithScenarios(
tests.PodToWorld(),
tests.PodToWorld(tests.WithRetryDestPort(80)),
tests.PodToWorld2(), // resolves cilium.io
).
WithExpectations(func(a *check.Action) (egress, ingress check.Result) {
Expand Down
82 changes: 82 additions & 0 deletions connectivity/tests/common.go
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,12 @@

package tests

import (
"strconv"

"github.com/cilium/cilium-cli/connectivity/check"
)

type labelsContainer interface {
HasLabel(key, value string) bool
}
Expand Down Expand Up @@ -48,3 +54,79 @@ func hasAllLabels(labelsContainer labelsContainer, filters map[string]string) bo
}
return true
}

type retryCondition struct {
podLabels map[string]string
all bool
destPort uint32
destIP string
}

// CurlOptions returns curl retry option or empty slice depending on retry conditions
func (rc *retryCondition) CurlOptions(peer check.TestPeer, ipFam check.IPFamily, pod check.Pod, params check.Parameters) []string {
if params.Retry == 0 {
return []string{}
}
if !rc.all && rc.destIP == "" && rc.destPort == 0 {
return []string{}
}

opts := []string{
"--retry", strconv.FormatInt(int64(params.Retry), 10),
"--retry-all-errors", // add --retry-all-errors to retry on all possible errors
}

if retryDelay := params.RetryDelay.Seconds(); retryDelay > 0.0 {
opts = append(opts, "--retry-delay", strconv.FormatFloat(retryDelay, 'f', -1, 64))
}

if rc.all {
return opts
}
if rc.destIP != "" && peer.Address(ipFam) != rc.destIP {
return []string{}
}
if rc.destPort != 0 && peer.Port() != rc.destPort {
return []string{}
}
for n, v := range rc.podLabels {
if !pod.HasLabel(n, v) {
return []string{}
}
}

return opts
}

type RetryOption func(*retryCondition)

// WithRetryAll sets all condition, returns retry options in every case
func WithRetryAll() RetryOption {
return func(rc *retryCondition) {
rc.all = true
}
}

// WithRetryDestIP sets ip address condition
func WithRetryDestIP(ip string) RetryOption {
return func(rc *retryCondition) {
rc.destIP = ip
}
}

// WithRetryDestPort sets port condition
func WithRetryDestPort(port uint32) RetryOption {
return func(rc *retryCondition) {
rc.destPort = port
}
}

// WithRetryPodLabel sets pod label condition
func WithRetryPodLabel(name, val string) RetryOption {
return func(rc *retryCondition) {
if rc.podLabels == nil {
rc.podLabels = map[string]string{}
}
rc.podLabels[name] = val
}
}
19 changes: 13 additions & 6 deletions connectivity/tests/to-cidr.go
Original file line number Diff line number Diff line change
Expand Up @@ -11,14 +11,20 @@ import (
"github.com/cilium/cilium-cli/connectivity/check"
)

// PodToCIDR sends an ICMP packet from each client Pod
// to ExternalIP and ExternalOtherIP.
func PodToCIDR() check.Scenario {
return &podToCIDR{}
// PodToCIDR sends an HTTPS request from each client Pod
// to ExternalIP and ExternalOtherIP
func PodToCIDR(opts ...RetryOption) check.Scenario {
cond := &retryCondition{}
for _, op := range opts {
op(cond)
}
return &podToCIDR{rc: cond}
}

// podToCIDR implements a Scenario.
type podToCIDR struct{}
type podToCIDR struct {
rc *retryCondition
}

func (s *podToCIDR) Name() string {
return "pod-to-cidr"
Expand All @@ -35,7 +41,8 @@ func (s *podToCIDR) Run(ctx context.Context, t *check.Test) {
src := src // copy to avoid memory aliasing when using reference

t.NewAction(s, fmt.Sprintf("%s-%d", ep.Name(), i), &src, ep, check.IPFamilyAny).Run(func(a *check.Action) {
a.ExecInPod(ctx, ct.CurlCommand(ep, check.IPFamilyAny))
opts := s.rc.CurlOptions(ep, check.IPFamilyAny, src, ct.Params())
a.ExecInPod(ctx, ct.CurlCommand(ep, check.IPFamilyAny, opts...))

a.ValidateFlows(ctx, src, a.GetEgressRequirements(check.FlowParameters{
RSTAllowed: true,
Expand Down
22 changes: 15 additions & 7 deletions connectivity/tests/world.go
Original file line number Diff line number Diff line change
Expand Up @@ -10,14 +10,19 @@ import (
"github.com/cilium/cilium-cli/connectivity/check"
)

// PodToWorld sends multiple HTTP(S) requests to one.one.one.one
// from random client Pods.
func PodToWorld() check.Scenario {
return &podToWorld{}
// PodToWorld sends multiple HTTP(S) requests to ExternalTarget
// from each client Pods.
func PodToWorld(opts ...RetryOption) check.Scenario {
cond := &retryCondition{}
for _, op := range opts {
op(cond)
}
return &podToWorld{rc: cond}
}

// podToWorld implements a Scenario.
type podToWorld struct {
rc *retryCondition
}

func (s *podToWorld) Name() string {
Expand All @@ -42,20 +47,23 @@ func (s *podToWorld) Run(ctx context.Context, t *check.Test) {
client := client // copy to avoid memory aliasing when using reference

// With http, over port 80.
httpOpts := s.rc.CurlOptions(http, check.IPFamilyAny, client, ct.Params())
t.NewAction(s, fmt.Sprintf("http-to-%s-%d", extTarget, i), &client, http, check.IPFamilyAny).Run(func(a *check.Action) {
a.ExecInPod(ctx, ct.CurlCommand(http, check.IPFamilyAny))
a.ExecInPod(ctx, ct.CurlCommand(http, check.IPFamilyAny, httpOpts...))
a.ValidateFlows(ctx, client, a.GetEgressRequirements(fp))
})

// With https, over port 443.
httpsOpts := s.rc.CurlOptions(https, check.IPFamilyAny, client, ct.Params())
t.NewAction(s, fmt.Sprintf("https-to-%s-%d", extTarget, i), &client, https, check.IPFamilyAny).Run(func(a *check.Action) {
a.ExecInPod(ctx, ct.CurlCommand(https, check.IPFamilyAny))
a.ExecInPod(ctx, ct.CurlCommand(https, check.IPFamilyAny, httpsOpts...))
a.ValidateFlows(ctx, client, a.GetEgressRequirements(fp))
})

// With https, over port 443, index.html.
httpsindexOpts := s.rc.CurlOptions(httpsindex, check.IPFamilyAny, client, ct.Params())
t.NewAction(s, fmt.Sprintf("https-to-%s-index-%d", extTarget, i), &client, httpsindex, check.IPFamilyAny).Run(func(a *check.Action) {
a.ExecInPod(ctx, ct.CurlCommand(httpsindex, check.IPFamilyAny))
a.ExecInPod(ctx, ct.CurlCommand(httpsindex, check.IPFamilyAny, httpsindexOpts...))
a.ValidateFlows(ctx, client, a.GetEgressRequirements(fp))
})

Expand Down
3 changes: 3 additions & 0 deletions defaults/defaults.go
Original file line number Diff line number Diff line change
Expand Up @@ -85,6 +85,9 @@ const (

PolicyWaitTimeout = 15 * time.Second

ConnectRetry = 3
ConnectRetryDelay = 3 * time.Second

ConnectTimeout = 2 * time.Second
RequestTimeout = 10 * time.Second

Expand Down
3 changes: 3 additions & 0 deletions internal/cli/cmd/connectivity.go
Original file line number Diff line number Diff line change
Expand Up @@ -156,6 +156,9 @@ func newCmdConnectivityTest() *cobra.Command {
cmd.Flags().StringVar(&params.JSONMockImage, "json-mock-image", defaults.ConnectivityCheckJSONMockImage, "Image path to use for json mock")
cmd.Flags().StringVar(&params.DNSTestServerImage, "dns-test-server-image", defaults.ConnectivityDNSTestServerImage, "Image path to use for CoreDNS")

cmd.Flags().UintVar(&params.Retry, "retry", defaults.ConnectRetry, "Number of retries on connection failure to external targets")
cmd.Flags().DurationVar(&params.RetryDelay, "retry-delay", defaults.ConnectRetryDelay, "Delay between retries for external targets")

cmd.Flags().DurationVar(&params.ConnectTimeout, "connect-timeout", defaults.ConnectTimeout, "Maximum time to allow initiation of the connection to take")
cmd.Flags().DurationVar(&params.RequestTimeout, "request-timeout", defaults.RequestTimeout, "Maximum time to allow a request to take")

Expand Down

0 comments on commit 2592105

Please sign in to comment.