Skip to content

Commit

Permalink
Switch pod to world testing from cilium.io to one.one.one.one
Browse files Browse the repository at this point in the history
Tentative fix for #367.

We have two main hypothesis around why connections to external domains
are flaky:

- The external domain itself is unreliable.
- The external domain works fine, but CoreDNS is unreliable.

`cilium.io` is hosted on single non-HA EC2 instance, which is definitely
the most robus thing out there. We propose switching to
`one.one.one.one` as a quick fix to check if that helps with the
reliability.

If yes, we will evaluate moving to a HA system that we control ourselves
(e.g. a DNS zone hosted at a major provider).

If not, we will investigate the CoreDNS hypothesis.

Signed-off-by: Nicolas Busseneau <[email protected]>
  • Loading branch information
nbusseneau committed Sep 1, 2021
1 parent 55b3dca commit a705db7
Show file tree
Hide file tree
Showing 3 changed files with 22 additions and 29 deletions.
8 changes: 4 additions & 4 deletions connectivity/manifests/client-egress-l7-http.yaml
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
---
# client2 is allowed to contact cilium.io/ on port 80 and the echo Pod
# client2 is allowed to contact one.one.one.one/ on port 80 and the echo Pod
# on port 8080. HTTP introspection is enabled for client2.
# The toFQDNs section relies on DNS introspection being performed by
# the client-egress-only-dns policy.
Expand All @@ -9,7 +9,7 @@ metadata:
namespace: cilium-test
name: client-egress-l7-http
spec:
description: "Allow GET cilium.io:80/ and GET <echo>:8080/ from client2"
description: "Allow GET one.one.one.one:80/ and GET <echo>:8080/ from client2"
endpointSelector:
matchLabels:
other: client
Expand All @@ -26,9 +26,9 @@ spec:
http:
- method: "GET"
path: "/"
# Allow GET / requests, only towards cilium.io.
# Allow GET / requests, only towards one.one.one.one.
- toFQDNs:
- matchName: "cilium.io"
- matchName: "one.one.one.one"
toPorts:
- ports:
- port: "80"
Expand Down
12 changes: 6 additions & 6 deletions connectivity/suite.go
Original file line number Diff line number Diff line change
Expand Up @@ -74,7 +74,7 @@ func Run(ctx context.Context, ct *check.ConnectivityTest) error {
ct.NewTest("dns-only").WithPolicy(clientEgressOnlyDNSPolicyYAML).
WithScenarios(
tests.PodToPod(""), // connects to other Pods directly, no DNS
tests.PodToWorld(""), // resolves cilium.io
tests.PodToWorld(""), // resolves one.one.one.one
).
WithExpectations(
func(a *check.Action) (egress check.Result, ingress check.Result) {
Expand Down Expand Up @@ -112,18 +112,18 @@ func Run(ctx context.Context, ct *check.ConnectivityTest) error {
tests.PodToPod(""),
)

// This policy only allows port 80 to "cilium.io". DNS proxy enabled.
// This policy only allows port 80 to "one.one.one.one". DNS proxy enabled.
ct.NewTest("to-fqdns").WithPolicy(clientEgressToFQDNsCiliumIOPolicyYAML).
WithScenarios(
tests.PodToWorld(""),
).
WithExpectations(func(a *check.Action) (egress, ingress check.Result) {
if a.Destination().Port() == 80 && a.Destination().Address() == "cilium.io" {
if a.Destination().Port() == 80 && a.Destination().Address() == "one.one.one.one" {
if a.Destination().Path() == "/" || a.Destination().Path() == "" {
egress = check.ResultDNSOK
egress.HTTP = check.HTTP{
Method: "GET",
URL: "http://cilium.io/",
URL: "http://one.one.one.one/",
}
return egress, check.ResultNone
}
Expand Down Expand Up @@ -173,8 +173,8 @@ func Run(ctx context.Context, ct *check.ConnectivityTest) error {
).
WithExpectations(func(a *check.Action) (egress, ingress check.Result) {
if a.Source().HasLabel("other", "client") && // Only client2 is allowed to make HTTP calls.
// Outbound HTTP to cilium.io is L7-introspected and allowed.
(a.Destination().Port() == 80 && a.Destination().Address() == "cilium.io" ||
// Outbound HTTP to one.one.one.one is L7-introspected and allowed.
(a.Destination().Port() == 80 && a.Destination().Address() == "one.one.one.one" ||
a.Destination().Port() == 8080) { // 8080 is traffic to echo Pod.
if a.Destination().Path() == "/" || a.Destination().Path() == "" {
egress = check.ResultOK
Expand Down
31 changes: 12 additions & 19 deletions connectivity/tests/world.go
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@ import (
"github.com/cilium/cilium-cli/connectivity/check"
)

// PodToWorld sends multiple HTTP(S) requests to cilium.io
// PodToWorld sends multiple HTTP(S) requests to one.one.one.one
// from random client Pods.
func PodToWorld(name string) check.Scenario {
return &podToWorld{
Expand All @@ -32,10 +32,9 @@ func (s *podToWorld) Name() string {
}

func (s *podToWorld) Run(ctx context.Context, t *check.Test) {
chttp := check.HTTPEndpoint("cilium-io-http", "http://cilium.io")
chttps := check.HTTPEndpoint("cilium-io-https", "https://cilium.io")
chttpindex := check.HTTPEndpoint("cilium-io-http-index", "http://cilium.io/index.html")
jhttp := check.HTTPEndpoint("jenkins-cilium-io-http", "http://jenkins.cilium.io")
http := check.HTTPEndpoint("one-one-one-one-http", "http://one.one.one.one")
https := check.HTTPEndpoint("one-one-one-one-https", "https://one.one.one.one")
httpsindex := check.HTTPEndpoint("one-one-one-one-https-index", "https://one.one.one.one/index.html")

fp := check.FlowParameters{
DNSRequired: true,
Expand All @@ -45,27 +44,21 @@ func (s *podToWorld) Run(ctx context.Context, t *check.Test) {
var i int

for _, client := range t.Context().ClientPods() {
// With https, over port 443.
t.NewAction(s, fmt.Sprintf("https-to-cilium-io-%d", i), &client, chttps).Run(func(a *check.Action) {
a.ExecInPod(ctx, curl(chttps))
a.ValidateFlows(ctx, client, a.GetEgressRequirements(fp))
})

// With http, over port 80.
t.NewAction(s, fmt.Sprintf("http-to-cilium-io-%d", i), &client, chttp).Run(func(a *check.Action) {
a.ExecInPod(ctx, curl(chttp))
t.NewAction(s, fmt.Sprintf("http-to-one-one-one-one-%d", i), &client, http).Run(func(a *check.Action) {
a.ExecInPod(ctx, curl(http))
a.ValidateFlows(ctx, client, a.GetEgressRequirements(fp))
})

// With http, over port 80, index.html
t.NewAction(s, fmt.Sprintf("http-to-cilium-io-index-%d", i), &client, chttpindex).Run(func(a *check.Action) {
a.ExecInPod(ctx, curl(chttpindex))
// With https, over port 443.
t.NewAction(s, fmt.Sprintf("https-to-one-one-one-one-%d", i), &client, https).Run(func(a *check.Action) {
a.ExecInPod(ctx, curl(https))
a.ValidateFlows(ctx, client, a.GetEgressRequirements(fp))
})

// With http to jenkins.cilium.io.
t.NewAction(s, fmt.Sprintf("http-to-jenkins-cilium-%d", i), &client, jhttp).Run(func(a *check.Action) {
a.ExecInPod(ctx, curl(jhttp))
// With https, over port 443, index.html.
t.NewAction(s, fmt.Sprintf("https-to-one-one-one-one-index-%d", i), &client, httpsindex).Run(func(a *check.Action) {
a.ExecInPod(ctx, curl(httpsindex))
a.ValidateFlows(ctx, client, a.GetEgressRequirements(fp))
})

Expand Down

0 comments on commit a705db7

Please sign in to comment.