From 6d4e16a56ae3e0ace20ce3fceed7e355a83aadd1 Mon Sep 17 00:00:00 2001 From: Nicolas Busseneau Date: Wed, 1 Sep 2021 19:57:41 +0200 Subject: [PATCH] Switch pod to world testing from `cilium.io` to `one.one.one.one` Tentative fix for #367. We have two main hypothesis around why connections to external domains are flaky: - The external domain itself is unreliable. - The external domain works fine, but CoreDNS is unreliable. `cilium.io` is hosted on single non-HA EC2 instance, which is definitely the most robus thing out there. We propose switching to `one.one.one.one` as a quick fix to check if that helps with the reliability. If yes, we will evaluate moving to a HA system that we control ourselves (e.g. a DNS zone hosted at a major provider). If not, we will investigate the CoreDNS hypothesis. Signed-off-by: Nicolas Busseneau --- .../manifests/client-egress-l7-http.yaml | 8 ++--- ...ient-egress-to-fqdns-one-one-one-one.yaml} | 4 +-- connectivity/suite.go | 14 ++++----- connectivity/tests/world.go | 31 +++++++------------ 4 files changed, 25 insertions(+), 32 deletions(-) rename connectivity/manifests/{client-egress-to-fqdns-cilium-io.yaml => client-egress-to-fqdns-one-one-one-one.yaml} (88%) diff --git a/connectivity/manifests/client-egress-l7-http.yaml b/connectivity/manifests/client-egress-l7-http.yaml index 4e01f6135f..5deaa567e2 100644 --- a/connectivity/manifests/client-egress-l7-http.yaml +++ b/connectivity/manifests/client-egress-l7-http.yaml @@ -1,5 +1,5 @@ --- -# client2 is allowed to contact cilium.io/ on port 80 and the echo Pod +# client2 is allowed to contact one.one.one.one/ on port 80 and the echo Pod # on port 8080. HTTP introspection is enabled for client2. # The toFQDNs section relies on DNS introspection being performed by # the client-egress-only-dns policy. @@ -9,7 +9,7 @@ metadata: namespace: cilium-test name: client-egress-l7-http spec: - description: "Allow GET cilium.io:80/ and GET :8080/ from client2" + description: "Allow GET one.one.one.one:80/ and GET :8080/ from client2" endpointSelector: matchLabels: other: client @@ -26,9 +26,9 @@ spec: http: - method: "GET" path: "/" - # Allow GET / requests, only towards cilium.io. + # Allow GET / requests, only towards one.one.one.one. - toFQDNs: - - matchName: "cilium.io" + - matchName: "one.one.one.one" toPorts: - ports: - port: "80" diff --git a/connectivity/manifests/client-egress-to-fqdns-cilium-io.yaml b/connectivity/manifests/client-egress-to-fqdns-one-one-one-one.yaml similarity index 88% rename from connectivity/manifests/client-egress-to-fqdns-cilium-io.yaml rename to connectivity/manifests/client-egress-to-fqdns-one-one-one-one.yaml index fe0915ab72..a857d3e6aa 100644 --- a/connectivity/manifests/client-egress-to-fqdns-cilium-io.yaml +++ b/connectivity/manifests/client-egress-to-fqdns-one-one-one-one.yaml @@ -2,7 +2,7 @@ apiVersion: cilium.io/v2 kind: CiliumNetworkPolicy metadata: namespace: cilium-test - name: client-egress-to-fqdns-cilium-io + name: client-egress-to-fqdns-one-one-one-one spec: endpointSelector: matchLabels: @@ -17,7 +17,7 @@ spec: - method: "GET" path: "/" toFQDNs: - - matchName: "cilium.io" + - matchName: "one.one.one.one" - toPorts: - ports: - port: "53" diff --git a/connectivity/suite.go b/connectivity/suite.go index 7def6c6354..6cf40cf73d 100644 --- a/connectivity/suite.go +++ b/connectivity/suite.go @@ -24,7 +24,7 @@ var ( //go:embed manifests/client-ingress-from-client2.yaml clientIngressFromClient2PolicyYAML string - //go:embed manifests/client-egress-to-fqdns-cilium-io.yaml + //go:embed manifests/client-egress-to-fqdns-one-one-one-one.yaml clientEgressToFQDNsCiliumIOPolicyYAML string //go:embed manifests/echo-ingress-from-other-client.yaml @@ -74,7 +74,7 @@ func Run(ctx context.Context, ct *check.ConnectivityTest) error { ct.NewTest("dns-only").WithPolicy(clientEgressOnlyDNSPolicyYAML). WithScenarios( tests.PodToPod(""), // connects to other Pods directly, no DNS - tests.PodToWorld(""), // resolves cilium.io + tests.PodToWorld(""), // resolves one.one.one.one ). WithExpectations( func(a *check.Action) (egress check.Result, ingress check.Result) { @@ -112,18 +112,18 @@ func Run(ctx context.Context, ct *check.ConnectivityTest) error { tests.PodToPod(""), ) - // This policy only allows port 80 to "cilium.io". DNS proxy enabled. + // This policy only allows port 80 to "one.one.one.one". DNS proxy enabled. ct.NewTest("to-fqdns").WithPolicy(clientEgressToFQDNsCiliumIOPolicyYAML). WithScenarios( tests.PodToWorld(""), ). WithExpectations(func(a *check.Action) (egress, ingress check.Result) { - if a.Destination().Port() == 80 && a.Destination().Address() == "cilium.io" { + if a.Destination().Port() == 80 && a.Destination().Address() == "one.one.one.one" { if a.Destination().Path() == "/" || a.Destination().Path() == "" { egress = check.ResultDNSOK egress.HTTP = check.HTTP{ Method: "GET", - URL: "http://cilium.io/", + URL: "http://one.one.one.one/", } return egress, check.ResultNone } @@ -173,8 +173,8 @@ func Run(ctx context.Context, ct *check.ConnectivityTest) error { ). WithExpectations(func(a *check.Action) (egress, ingress check.Result) { if a.Source().HasLabel("other", "client") && // Only client2 is allowed to make HTTP calls. - // Outbound HTTP to cilium.io is L7-introspected and allowed. - (a.Destination().Port() == 80 && a.Destination().Address() == "cilium.io" || + // Outbound HTTP to one.one.one.one is L7-introspected and allowed. + (a.Destination().Port() == 80 && a.Destination().Address() == "one.one.one.one" || a.Destination().Port() == 8080) { // 8080 is traffic to echo Pod. if a.Destination().Path() == "/" || a.Destination().Path() == "" { egress = check.ResultOK diff --git a/connectivity/tests/world.go b/connectivity/tests/world.go index 593b9cfd5a..15c75048e3 100644 --- a/connectivity/tests/world.go +++ b/connectivity/tests/world.go @@ -10,7 +10,7 @@ import ( "github.com/cilium/cilium-cli/connectivity/check" ) -// PodToWorld sends multiple HTTP(S) requests to cilium.io +// PodToWorld sends multiple HTTP(S) requests to one.one.one.one // from random client Pods. func PodToWorld(name string) check.Scenario { return &podToWorld{ @@ -32,10 +32,9 @@ func (s *podToWorld) Name() string { } func (s *podToWorld) Run(ctx context.Context, t *check.Test) { - chttp := check.HTTPEndpoint("cilium-io-http", "http://cilium.io") - chttps := check.HTTPEndpoint("cilium-io-https", "https://cilium.io") - chttpindex := check.HTTPEndpoint("cilium-io-http-index", "http://cilium.io/index.html") - jhttp := check.HTTPEndpoint("jenkins-cilium-io-http", "http://jenkins.cilium.io") + http := check.HTTPEndpoint("one-one-one-one-http", "http://one.one.one.one") + https := check.HTTPEndpoint("one-one-one-one-https", "https://one.one.one.one") + httpsindex := check.HTTPEndpoint("one-one-one-one-https-index", "https://one.one.one.one/index.html") fp := check.FlowParameters{ DNSRequired: true, @@ -45,27 +44,21 @@ func (s *podToWorld) Run(ctx context.Context, t *check.Test) { var i int for _, client := range t.Context().ClientPods() { - // With https, over port 443. - t.NewAction(s, fmt.Sprintf("https-to-cilium-io-%d", i), &client, chttps).Run(func(a *check.Action) { - a.ExecInPod(ctx, curl(chttps)) - a.ValidateFlows(ctx, client, a.GetEgressRequirements(fp)) - }) - // With http, over port 80. - t.NewAction(s, fmt.Sprintf("http-to-cilium-io-%d", i), &client, chttp).Run(func(a *check.Action) { - a.ExecInPod(ctx, curl(chttp)) + t.NewAction(s, fmt.Sprintf("http-to-one-one-one-one-%d", i), &client, http).Run(func(a *check.Action) { + a.ExecInPod(ctx, curl(http)) a.ValidateFlows(ctx, client, a.GetEgressRequirements(fp)) }) - // With http, over port 80, index.html - t.NewAction(s, fmt.Sprintf("http-to-cilium-io-index-%d", i), &client, chttpindex).Run(func(a *check.Action) { - a.ExecInPod(ctx, curl(chttpindex)) + // With https, over port 443. + t.NewAction(s, fmt.Sprintf("https-to-one-one-one-one-%d", i), &client, https).Run(func(a *check.Action) { + a.ExecInPod(ctx, curl(https)) a.ValidateFlows(ctx, client, a.GetEgressRequirements(fp)) }) - // With http to jenkins.cilium.io. - t.NewAction(s, fmt.Sprintf("http-to-jenkins-cilium-%d", i), &client, jhttp).Run(func(a *check.Action) { - a.ExecInPod(ctx, curl(jhttp)) + // With https, over port 443, index.html. + t.NewAction(s, fmt.Sprintf("https-to-one-one-one-one-index-%d", i), &client, httpsindex).Run(func(a *check.Action) { + a.ExecInPod(ctx, curl(httpsindex)) a.ValidateFlows(ctx, client, a.GetEgressRequirements(fp)) })