Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Refactor default backend handling and add better events #21

Merged
merged 5 commits into from
Dec 7, 2016
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion controllers/gce/Makefile
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
all: push

# 0.0 shouldn't clobber any released builds
TAG = 0.8.0
TAG = 0.8.1
PREFIX = gcr.io/google_containers/glbc

server:
Expand Down
2 changes: 1 addition & 1 deletion controllers/gce/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -327,7 +327,7 @@ So simply delete the replication controller:
$ kubectl get rc glbc
CONTROLLER CONTAINER(S) IMAGE(S) SELECTOR REPLICAS AGE
glbc default-http-backend gcr.io/google_containers/defaultbackend:1.0 k8s-app=glbc,version=v0.5 1 2m
l7-lb-controller gcr.io/google_containers/glbc:0.8.0
l7-lb-controller gcr.io/google_containers/glbc:0.8.1

$ kubectl delete rc glbc
replicationcontroller "glbc" deleted
Expand Down
88 changes: 59 additions & 29 deletions controllers/gce/controller/utils.go
Original file line number Diff line number Diff line change
Expand Up @@ -242,7 +242,7 @@ func (t *GCETranslator) toURLMap(ing *extensions.Ingress) (utils.GCEURLMap, erro
// to all other services under the assumption that the user will
// modify nodeport.
if _, ok := err.(errorNodePortNotFound); ok {
glog.Infof("%v", err)
t.recorder.Eventf(ing, api.EventTypeWarning, "Service", err.(errorNodePortNotFound).Error())
continue
}

Expand All @@ -269,6 +269,10 @@ func (t *GCETranslator) toURLMap(ing *extensions.Ingress) (utils.GCEURLMap, erro
}
defaultBackend, _ := t.toGCEBackend(ing.Spec.Backend, ing.Namespace)
hostPathBackend.PutDefaultBackend(defaultBackend)

if defaultBackend != nil && ing.Spec.Backend != nil {
t.recorder.Eventf(ing, api.EventTypeNormal, "GCE", fmt.Sprintf("default backend set to %v:%v", ing.Spec.Backend.ServiceName, defaultBackend.Port))
}
return hostPathBackend, nil
}

Expand Down Expand Up @@ -461,40 +465,66 @@ func (t *GCETranslator) HealthCheck(port int64) (*compute.HttpHealthCheck, error
if err != nil {
return nil, err
}
var ingresses []extensions.Ingress
var healthCheck *compute.HttpHealthCheck
// Find the label and target port of the one service with the given nodePort
for _, s := range sl {
for _, p := range s.Spec.Ports {
if int32(port) == p.NodePort {
rp, err := t.getHTTPProbe(*s, p.TargetPort)
if err != nil {
return nil, err
}
if rp == nil {
glog.Infof("No pod in service %v with node port %v has declared a matching readiness probe for health checks.", s.Name, port)
break
}
healthPath := rp.Handler.HTTPGet.Path
// GCE requires a leading "/" for health check urls.
if string(healthPath[0]) != "/" {
healthPath = fmt.Sprintf("/%v", healthPath)
}
host := rp.Handler.HTTPGet.Host
glog.Infof("Found custom health check for Service %v nodeport %v: %v%v", s.Name, port, host, healthPath)
return &compute.HttpHealthCheck{
Port: port,
RequestPath: healthPath,
Host: host,
Description: "kubernetes L7 health check from readiness probe.",
CheckIntervalSec: int64(rp.PeriodSeconds),
TimeoutSec: int64(rp.TimeoutSeconds),
HealthyThreshold: int64(rp.SuccessThreshold),
UnhealthyThreshold: int64(rp.FailureThreshold),
// TODO: include headers after updating compute godep.
}, nil

// only one Service can match this nodePort, try and look up
// the readiness probe of the pods behind it
if int32(port) != p.NodePort {
continue
}
rp, err := t.getHTTPProbe(*s, p.TargetPort)
if err != nil {
return nil, err
}
if rp == nil {
glog.Infof("No pod in service %v with node port %v has declared a matching readiness probe for health checks.", s.Name, port)
break
}

healthPath := rp.Handler.HTTPGet.Path
// GCE requires a leading "/" for health check urls.
if string(healthPath[0]) != "/" {
healthPath = fmt.Sprintf("/%v", healthPath)
}

host := rp.Handler.HTTPGet.Host
glog.Infof("Found custom health check for Service %v nodeport %v: %v%v", s.Name, port, host, healthPath)
// remember the ingresses that use this Service so we can send
// the right events
ingresses, err = t.ingLister.GetServiceIngress(s)
if err != nil {
glog.Warningf("Failed to list ingresses for service %v", s.Name)
}

healthCheck = &compute.HttpHealthCheck{
Port: port,
RequestPath: healthPath,
Host: host,
Description: "kubernetes L7 health check from readiness probe.",
// set a low health threshold and a high failure threshold.
// We're just trying to detect if the node networking is
// borked, service level outages will get detected sooner
// by kube-proxy.
CheckIntervalSec: int64(rp.PeriodSeconds + utils.DefaultHealthCheckInterval),
TimeoutSec: int64(rp.TimeoutSeconds),
HealthyThreshold: utils.DefaultHealthyThreshold,
UnhealthyThreshold: utils.DefaultUnhealthyThreshold,
// TODO: include headers after updating compute godep.
}
break
}
}
return utils.DefaultHealthCheckTemplate(port), nil
if healthCheck == nil {
healthCheck = utils.DefaultHealthCheckTemplate(port)
}
for _, ing := range ingresses {
t.recorder.Eventf(&ing, api.EventTypeNormal, "GCE", fmt.Sprintf("health check using %v:%v%v", healthCheck.Host, healthCheck.Port, healthCheck.RequestPath))
}
return healthCheck, nil
}

// PodsByCreationTimestamp sorts a list of Pods by creation timestamp, using their names as a tie breaker.
Expand Down
7 changes: 5 additions & 2 deletions controllers/gce/instances/instances.go
Original file line number Diff line number Diff line change
Expand Up @@ -101,9 +101,12 @@ func (i *Instances) DeleteInstanceGroup(name string) error {
return err
}
for _, zone := range zones {
glog.Infof("Deleting instance group %v in zone %v", name, zone)
if err := i.cloud.DeleteInstanceGroup(name, zone); err != nil {
errs = append(errs, err)
if !utils.IsHTTPErrorCode(err, http.StatusNotFound) {
errs = append(errs, err)
}
} else {
glog.Infof("Deleted instance group %v in zone %v", name, zone)
}
}
if len(errs) == 0 {
Expand Down
52 changes: 25 additions & 27 deletions controllers/gce/loadbalancers/loadbalancers.go
Original file line number Diff line number Diff line change
Expand Up @@ -88,19 +88,8 @@ func NewLoadBalancerPool(
}

func (l *L7s) create(ri *L7RuntimeInfo) (*L7, error) {
// Lazily create a default backend so we don't tax users who don't care
// about Ingress by consuming 1 of their 3 GCE BackendServices. This
// BackendService is deleted when there are no more Ingresses, either
// through Sync or Shutdown.
if l.glbcDefaultBackend == nil {
err := l.defaultBackendPool.Add(l.defaultBackendNodePort)
if err != nil {
return nil, err
}
l.glbcDefaultBackend, err = l.defaultBackendPool.Get(l.defaultBackendNodePort)
if err != nil {
return nil, err
}
glog.Warningf("Creating l7 without a default backend")
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I noticed this before when I played with Ingress. Will this incur a lag for the default backend?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

we were always creating it as part of sync: https://github.com/kubernetes/contrib/blob/master/ingress/controllers/gce/loadbalancers/loadbalancers.go#L180, so this was just a redundant check. But we were also deleting it as part of sync, when there are no more lbs: https://github.com/kubernetes/contrib/blob/master/ingress/controllers/gce/loadbalancers/loadbalancers.go#L191

That's not the right time to try and delete it, because a url-map could still be referencing it (the delete would fail saying: "backend already in use", then we'd requeue, come back in, delete the url map, and now the backend delete would pass).

So I moved the delete to GC(), which happens after url map deletion.
https://github.com/kubernetes/ingress/pull/21/files#diff-247e0d4a5e00d30d32c0e454986614cdR210

}
return &L7{
runtimeInfo: ri,
Expand Down Expand Up @@ -175,24 +164,24 @@ func (l *L7s) Delete(name string) error {
func (l *L7s) Sync(lbs []*L7RuntimeInfo) error {
glog.V(3).Infof("Creating loadbalancers %+v", lbs)

// The default backend is completely managed by the l7 pool.
// This includes recreating it if it's deleted, or fixing broken links.
if err := l.defaultBackendPool.Add(l.defaultBackendNodePort); err != nil {
return err
}
// create new loadbalancers, perform an edge hop for existing
for _, ri := range lbs {
if err := l.Add(ri); err != nil {
if len(lbs) != 0 {
// Lazily create a default backend so we don't tax users who don't care
// about Ingress by consuming 1 of their 3 GCE BackendServices. This
// BackendService is GC'd when there are no more Ingresses.
if err := l.defaultBackendPool.Add(l.defaultBackendNodePort); err != nil {
return err
}
defaultBackend, err := l.defaultBackendPool.Get(l.defaultBackendNodePort)
if err != nil {
return err
}
l.glbcDefaultBackend = defaultBackend
}
// Tear down the default backend when there are no more loadbalancers
// because the cluster could go down anytime and we'd leak it otherwise.
if len(lbs) == 0 {
if err := l.defaultBackendPool.Delete(l.defaultBackendNodePort); err != nil {
// create new loadbalancers, validate existing
for _, ri := range lbs {
if err := l.Add(ri); err != nil {
return err
}
l.glbcDefaultBackend = nil
}
return nil
}
Expand All @@ -215,6 +204,15 @@ func (l *L7s) GC(names []string) error {
return err
}
}
// Tear down the default backend when there are no more loadbalancers.
// This needs to happen after we've deleted all url-maps that might be
// using it.
if len(names) == 0 {
if err := l.defaultBackendPool.Delete(l.defaultBackendNodePort); err != nil {
return err
}
l.glbcDefaultBackend = nil
}
return nil
}

Expand Down Expand Up @@ -586,7 +584,7 @@ func (l *L7) edgeHop() error {
}
}
if l.runtimeInfo.TLS != nil {
glog.V(3).Infof("Edge hopping https for %v", l.Name)
glog.V(3).Infof("validating https for %v", l.Name)
if err := l.edgeHopHttps(); err != nil {
return err
}
Expand Down Expand Up @@ -696,7 +694,7 @@ func (l *L7) UpdateUrlMap(ingressRules utils.GCEURLMap) error {
} else {
l.um.DefaultService = l.glbcDefaultBackend.SelfLink
}
glog.V(3).Infof("Updating url map %+v", ingressRules)
glog.Infof("Updating url map:\n%+v", ingressRules)
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The default level is 0 right?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

V(2) is default


// Every update replaces the entire urlmap.
// TODO: when we have multiple loadbalancers point to a single gce url map
Expand Down
2 changes: 1 addition & 1 deletion controllers/gce/main.go
Original file line number Diff line number Diff line change
Expand Up @@ -61,7 +61,7 @@ const (
alphaNumericChar = "0"

// Current docker image version. Only used in debug logging.
imageVersion = "glbc:0.8.0"
imageVersion = "glbc:0.8.1"

// Key used to persist UIDs to configmaps.
uidConfigMapName = "ingress-uid"
Expand Down
8 changes: 4 additions & 4 deletions controllers/gce/rc.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -24,18 +24,18 @@ metadata:
name: l7-lb-controller
labels:
k8s-app: glbc
version: v0.6.2
version: v0.8.1
spec:
# There should never be more than 1 controller alive simultaneously.
replicas: 1
selector:
k8s-app: glbc
version: v0.6.2
version: v0.8.1
template:
metadata:
labels:
k8s-app: glbc
version: v0.6.2
version: v0.8.1
name: glbc
spec:
terminationGracePeriodSeconds: 600
Expand All @@ -61,7 +61,7 @@ spec:
requests:
cpu: 10m
memory: 20Mi
- image: gcr.io/google_containers/glbc:0.8.0
- image: gcr.io/google_containers/glbc:0.8.1
livenessProbe:
httpGet:
path: /healthz
Expand Down
2 changes: 1 addition & 1 deletion controllers/gce/storage/pools.go
Original file line number Diff line number Diff line change
Expand Up @@ -103,7 +103,7 @@ func (c *CloudListingPool) ReplenishPool() {
for i := range items {
key, err := c.keyGetter(items[i])
if err != nil {
glog.V(4).Infof("CloudListingPool: %v", err)
glog.V(5).Infof("CloudListingPool: %v", err)
continue
}
c.InMemoryPool.Add(key, items[i])
Expand Down
17 changes: 13 additions & 4 deletions controllers/gce/utils/utils.go
Original file line number Diff line number Diff line change
Expand Up @@ -79,6 +79,15 @@ const (
// K8sAnnotationPrefix is the prefix used in annotations used to record
// debug information in the Ingress annotations.
K8sAnnotationPrefix = "ingress.kubernetes.io"

// DefaultHealthCheckInterval defines how frequently a probe runs
DefaultHealthCheckInterval = 60
// DefaultHealthyThreshold defines the threshold of success probes that declare a backend "healthy"
DefaultHealthyThreshold = 1
// DefaultUnhealthyThreshold defines the threshold of failure probes that declare a backend "unhealthy"
DefaultUnhealthyThreshold = 10
// DefaultTimeoutSeconds defines the timeout of each probe
DefaultTimeoutSeconds = 60
)

// Namer handles centralized naming for the cluster.
Expand Down Expand Up @@ -305,12 +314,12 @@ func DefaultHealthCheckTemplate(port int64) *compute.HttpHealthCheck {
RequestPath: "",
Description: "Default kubernetes L7 Loadbalancing health check.",
// How often to health check.
CheckIntervalSec: 1,
CheckIntervalSec: DefaultHealthCheckInterval,
// How long to wait before claiming failure of a health check.
TimeoutSec: 1,
TimeoutSec: DefaultTimeoutSeconds,
// Number of healthchecks to pass for a vm to be deemed healthy.
HealthyThreshold: 1,
HealthyThreshold: DefaultHealthyThreshold,
// Number of healthchecks to fail before the vm is deemed unhealthy.
UnhealthyThreshold: 10,
UnhealthyThreshold: DefaultUnhealthyThreshold,
}
}