Skip to content

Commit

Permalink
Only the leader updates metrics for SSL certificate expiration
Browse files Browse the repository at this point in the history
  • Loading branch information
aledbf committed Mar 12, 2019
1 parent 870b89c commit f4e4335
Show file tree
Hide file tree
Showing 4 changed files with 44 additions and 16 deletions.
6 changes: 5 additions & 1 deletion internal/ingress/controller/controller.go
Original file line number Diff line number Diff line change
Expand Up @@ -190,7 +190,11 @@ func (n *NGINXController) syncIngress(interface{}) error {
klog.Infof("Backend successfully reloaded.")
n.metricCollector.ConfigSuccess(hash, true)
n.metricCollector.IncReloadCount()
n.metricCollector.SetSSLExpireTime(servers)

if n.isLeader() {
klog.V(2).Infof("Updating ssl expiration metrics.")
n.metricCollector.SetSSLExpireTime(servers)
}
}

isFirstSync := n.runningConfig.Equal(&ingress.Configuration{})
Expand Down
29 changes: 20 additions & 9 deletions internal/ingress/controller/nginx.go
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,7 @@ import (
"strconv"
"strings"
"sync"
"sync/atomic"
"syscall"
"text/template"
"time"
Expand Down Expand Up @@ -255,6 +256,8 @@ type NGINXController struct {
fileSystem filesystem.Filesystem

metricCollector metric.Collector

currentLeader uint32
}

// Start starts a new NGINX master process running in the foreground.
Expand All @@ -278,19 +281,15 @@ func (n *NGINXController) Start() {
go n.syncStatus.Run(stopCh)
}

n.setLeader(true)
n.metricCollector.OnStartedLeading(electionID)
// manually update SSL expiration metrics
// (to not wait for a reload)
n.metricCollector.SetSSLExpireTime(n.runningConfig.Servers)
},
OnStoppedLeading: func() {
n.setLeader(false)
n.metricCollector.OnStoppedLeading(electionID)

// Remove prometheus metrics related to SSL certificates
srvs := sets.NewString()
for _, s := range n.runningConfig.Servers {
if !srvs.Has(s.Hostname) {
srvs.Insert(s.Hostname)
}
}
n.metricCollector.RemoveMetrics(nil, srvs.List())
},
PodName: n.podInfo.Name,
PodNamespace: n.podInfo.Namespace,
Expand Down Expand Up @@ -1129,3 +1128,15 @@ func buildRedirects(servers []*ingress.Server) []*redirect {

return redirectServers
}

func (n *NGINXController) setLeader(leader bool) {
var i uint32
if leader {
i = 1
}
atomic.StoreUint32(&n.currentLeader, i)
}

func (n *NGINXController) isLeader() bool {
return atomic.LoadUint32(&n.currentLeader) != 0
}
24 changes: 18 additions & 6 deletions internal/ingress/metric/collectors/controller.go
Original file line number Diff line number Diff line change
Expand Up @@ -116,8 +116,10 @@ func NewController(pod, namespace, class string) *Controller {
),
leaderElection: prometheus.NewGaugeVec(
prometheus.GaugeOpts{
Name: "leader_election_status",
Help: "Gauge reporting status of the leader election, 0 indicates follower, 1 indicates leader. 'name' is the string used to identify the lease",
Namespace: PrometheusNamespace,
Name: "leader_election_status",
Help: "Gauge reporting status of the leader election, 0 indicates follower, 1 indicates leader. 'name' is the string used to identify the lease",
ConstLabels: constLabels,
},
[]string{"name"},
),
Expand All @@ -138,12 +140,12 @@ func (cm *Controller) IncReloadErrorCount() {

// OnStartedLeading indicates the pod was elected as the leader
func (cm *Controller) OnStartedLeading(electionID string) {
cm.leaderElection.WithLabelValues(electionID).Set(0)
cm.leaderElection.WithLabelValues(electionID).Set(1.0)
}

// OnStoppedLeading indicates the pod stopped being the leader
func (cm *Controller) OnStoppedLeading(electionID string) {
cm.leaderElection.WithLabelValues(electionID).Set(1.0)
cm.leaderElection.WithLabelValues(electionID).Set(0)
}

// ConfigSuccess set a boolean flag according to the output of the controller configuration reload
Expand All @@ -169,6 +171,7 @@ func (cm Controller) Describe(ch chan<- *prometheus.Desc) {
cm.reloadOperation.Describe(ch)
cm.reloadOperationErrors.Describe(ch)
cm.sslExpireTime.Describe(ch)
cm.leaderElection.Describe(ch)
}

// Collect implements the prometheus.Collector interface.
Expand All @@ -179,6 +182,7 @@ func (cm Controller) Collect(ch chan<- prometheus.Metric) {
cm.reloadOperation.Collect(ch)
cm.reloadOperationErrors.Collect(ch)
cm.sslExpireTime.Collect(ch)
cm.leaderElection.Collect(ch)
}

// SetSSLExpireTime sets the expiration time of SSL Certificates
Expand All @@ -198,13 +202,21 @@ func (cm *Controller) SetSSLExpireTime(servers []*ingress.Server) {

// RemoveMetrics removes metrics for hostnames not available anymore
func (cm *Controller) RemoveMetrics(hosts []string, registry prometheus.Gatherer) {
cm.removeSSLExpireMetrics(true, hosts, registry)
}

// RemoveAllSSLExpireMetrics removes metrics for expiration of SSL Certificates
func (cm *Controller) RemoveAllSSLExpireMetrics(registry prometheus.Gatherer) {
cm.removeSSLExpireMetrics(false, []string{}, registry)
}

func (cm *Controller) removeSSLExpireMetrics(onlyDefinedHosts bool, hosts []string, registry prometheus.Gatherer) {
mfs, err := registry.Gather()
if err != nil {
klog.Errorf("Error gathering metrics: %v", err)
return
}

klog.V(2).Infof("removing SSL certificate metrics for %v hosts", hosts)
toRemove := sets.NewString(hosts...)

for _, mf := range mfs {
Expand All @@ -227,7 +239,7 @@ func (cm *Controller) RemoveMetrics(hosts []string, registry prometheus.Gatherer
continue
}

if !toRemove.Has(host) {
if onlyDefinedHosts && !toRemove.Has(host) {
continue
}

Expand Down
1 change: 1 addition & 0 deletions internal/ingress/metric/main.go
Original file line number Diff line number Diff line change
Expand Up @@ -159,4 +159,5 @@ func (c *collector) OnStartedLeading(electionID string) {
// OnStoppedLeading indicates the pod stopped being the leader
func (c *collector) OnStoppedLeading(electionID string) {
c.ingressController.OnStoppedLeading(electionID)
c.ingressController.RemoveAllSSLExpireMetrics(c.registry)
}

0 comments on commit f4e4335

Please sign in to comment.