Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Refactor health checks and wait until NGINX process ends #4487

Merged
merged 1 commit into from
Sep 1, 2019
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -108,6 +108,7 @@ container: clean-container .container-$(ARCH)
mkdir -p $(TEMP_DIR)/rootfs
cp bin/$(ARCH)/nginx-ingress-controller $(TEMP_DIR)/rootfs/nginx-ingress-controller
cp bin/$(ARCH)/dbg $(TEMP_DIR)/rootfs/dbg
cp bin/$(ARCH)/wait-shutdown $(TEMP_DIR)/rootfs/wait-shutdown

cp -RP ./* $(TEMP_DIR)
$(SED_I) "s|BASEIMAGE|$(BASEIMAGE)|g" $(DOCKERFILE)
Expand Down
9 changes: 9 additions & 0 deletions build/build.sh
Original file line number Diff line number Diff line change
Expand Up @@ -60,3 +60,12 @@ go build \
-X ${PKG}/version.COMMIT=${GIT_COMMIT} \
-X ${PKG}/version.REPO=${REPO_INFO}" \
-o "bin/${ARCH}/dbg" "${PKG}/cmd/dbg"


go build \
"${GOBUILD_FLAGS}" \
-ldflags "-s -w \
-X ${PKG}/version.RELEASE=${TAG} \
-X ${PKG}/version.COMMIT=${GIT_COMMIT} \
-X ${PKG}/version.REPO=${REPO_INFO}" \
-o "bin/${ARCH}/wait-shutdown" "${PKG}/cmd/waitshutdown"
12 changes: 10 additions & 2 deletions cmd/nginx/main.go
Original file line number Diff line number Diff line change
Expand Up @@ -131,7 +131,7 @@ func main() {
mux := http.NewServeMux()

if conf.EnableProfiling {
registerProfiler(mux)
go registerProfiler()
}

registerHealthz(ngx, mux)
Expand Down Expand Up @@ -265,7 +265,9 @@ func registerMetrics(reg *prometheus.Registry, mux *http.ServeMux) {

}

func registerProfiler(mux *http.ServeMux) {
func registerProfiler() {
mux := http.NewServeMux()

mux.HandleFunc("/debug/pprof/", pprof.Index)
mux.HandleFunc("/debug/pprof/heap", pprof.Index)
mux.HandleFunc("/debug/pprof/mutex", pprof.Index)
Expand All @@ -276,6 +278,12 @@ func registerProfiler(mux *http.ServeMux) {
mux.HandleFunc("/debug/pprof/profile", pprof.Profile)
mux.HandleFunc("/debug/pprof/symbol", pprof.Symbol)
mux.HandleFunc("/debug/pprof/trace", pprof.Trace)

server := &http.Server{
Addr: fmt.Sprintf(":10255"),
Handler: mux,
}
klog.Fatal(server.ListenAndServe())
}

func startHTTPServer(port int, mux *http.ServeMux) {
Expand Down
43 changes: 43 additions & 0 deletions cmd/waitshutdown/main.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,43 @@
/*
Copyright 2019 The Kubernetes Authors.

Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at

http://www.apache.org/licenses/LICENSE-2.0

Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/

package main

import (
"os"
"os/exec"
"time"

"k8s.io/ingress-nginx/internal/nginx"
"k8s.io/klog"
)

func main() {
err := exec.Command("bash", "-c", "pkill -SIGTERM -f nginx-ingress-controller").Run()
if err != nil {
klog.Errorf("unexpected error terminating ingress controller: %v", err)
os.Exit(1)
}

// wait for the NGINX process to terminate
timer := time.NewTicker(time.Second * 1)
for range timer.C {
if !nginx.IsRunning() {
timer.Stop()
break
}
}
aledbf marked this conversation as resolved.
Show resolved Hide resolved
}
52 changes: 29 additions & 23 deletions internal/ingress/controller/checker.go
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,6 @@ import (

"github.com/ncabatoff/process-exporter/proc"
"github.com/pkg/errors"
"k8s.io/klog"

"k8s.io/ingress-nginx/internal/nginx"
)
Expand All @@ -37,41 +36,48 @@ func (n NGINXController) Name() string {

// Check returns if the nginx healthz endpoint is returning ok (status code 200)
func (n *NGINXController) Check(_ *http.Request) error {
statusCode, _, err := nginx.NewGetStatusRequest(nginx.HealthPath)
if err != nil {
klog.Errorf("healthcheck error: %v", err)
return err
if n.isShuttingDown {
return fmt.Errorf("the ingress controller is shutting down")
}

if statusCode != 200 {
klog.Errorf("healthcheck error: %v", statusCode)
return fmt.Errorf("ingress controller is not healthy")
// check the nginx master process is running
fs, err := proc.NewFS("/proc", false)
if err != nil {
return errors.Wrap(err, "reading /proc directory")
}

statusCode, _, err = nginx.NewGetStatusRequest("/is-dynamic-lb-initialized")
f, err := ioutil.ReadFile(nginx.PID)
if err != nil {
klog.Errorf("healthcheck error: %v", err)
return err
return errors.Wrapf(err, "reading %v", nginx.PID)
}

if statusCode != 200 {
klog.Errorf("healthcheck error: %v", statusCode)
return fmt.Errorf("dynamic load balancer not started")
pid, err := strconv.Atoi(strings.TrimRight(string(f), "\r\n"))
if err != nil {
return errors.Wrapf(err, "reading NGINX PID from file %v", nginx.PID)
}

// check the nginx master process is running
fs, err := proc.NewFS("/proc", false)
_, err = fs.NewProc(pid)
if err != nil {
return errors.Wrap(err, "unexpected error reading /proc directory")
return errors.Wrapf(err, "checking for NGINX process with PID %v", pid)
}
f, err := ioutil.ReadFile(nginx.PID)

statusCode, _, err := nginx.NewGetStatusRequest(nginx.HealthPath)
if err != nil {
return errors.Wrapf(err, "unexpected error reading %v", nginx.PID)
return errors.Wrapf(err, "checking if NGINX is running")
}
pid, err := strconv.Atoi(strings.TrimRight(string(f), "\r\n"))

if statusCode != 200 {
return fmt.Errorf("ingress controller is not healthy (%v)", statusCode)
}

statusCode, _, err = nginx.NewGetStatusRequest("/is-dynamic-lb-initialized")
if err != nil {
return errors.Wrapf(err, "unexpected error reading the nginx PID from %v", nginx.PID)
return errors.Wrapf(err, "checking if the dynamic load balancer started")
}
_, err = fs.NewProc(pid)
return err

if statusCode != 200 {
return fmt.Errorf("dynamic load balancer not started")
}

return nil
}
5 changes: 2 additions & 3 deletions internal/ingress/controller/checker_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -35,12 +35,11 @@ import (
func TestNginxCheck(t *testing.T) {
mux := http.NewServeMux()

listener, err := net.Listen("unix", nginx.StatusSocket)
listener, err := net.Listen("tcp", fmt.Sprintf(":%v", nginx.StatusPort))
if err != nil {
t.Fatalf("crating unix listener: %s", err)
t.Fatalf("crating tcp listener: %s", err)
}
defer listener.Close()
defer os.Remove(nginx.StatusSocket)

server := &httptest.Server{
Listener: listener,
Expand Down
2 changes: 1 addition & 1 deletion internal/ingress/controller/config/config.go
Original file line number Diff line number Diff line change
Expand Up @@ -796,8 +796,8 @@ type TemplateConfig struct {
EnableMetrics bool

PID string
StatusSocket string
StatusPath string
StatusPort int
StreamSocket string
}

Expand Down
3 changes: 3 additions & 0 deletions internal/ingress/controller/controller.go
Original file line number Diff line number Diff line change
Expand Up @@ -37,6 +37,7 @@ import (
"k8s.io/ingress-nginx/internal/ingress/annotations/proxy"
ngx_config "k8s.io/ingress-nginx/internal/ingress/controller/config"
"k8s.io/ingress-nginx/internal/k8s"
"k8s.io/ingress-nginx/internal/nginx"
"k8s.io/klog"
)

Expand Down Expand Up @@ -268,6 +269,8 @@ func (n *NGINXController) getStreamServices(configmapName string, proto apiv1.Pr
n.cfg.ListenPorts.SSLProxy,
n.cfg.ListenPorts.Health,
n.cfg.ListenPorts.Default,
10255, // profiling port
nginx.StatusPort,
}
reserverdPorts := sets.NewInt(rp...)
// svcRef format: <(str)namespace>/<(str)service>:<(intstr)port>[:<("PROXY")decode>:<("PROXY")encode>]
Expand Down
2 changes: 1 addition & 1 deletion internal/ingress/controller/nginx.go
Original file line number Diff line number Diff line change
Expand Up @@ -605,8 +605,8 @@ func (n NGINXController) generateTemplate(cfg ngx_config.Configuration, ingressC

HealthzURI: nginx.HealthPath,
PID: nginx.PID,
StatusSocket: nginx.StatusSocket,
StatusPath: nginx.StatusPath,
StatusPort: nginx.StatusPort,
StreamSocket: nginx.StreamSocket,
}

Expand Down
11 changes: 5 additions & 6 deletions internal/ingress/controller/nginx_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@ limitations under the License.
package controller

import (
"fmt"
"io"
"io/ioutil"
"net"
Expand Down Expand Up @@ -148,16 +149,15 @@ func TestIsDynamicConfigurationEnough(t *testing.T) {
}

func TestConfigureDynamically(t *testing.T) {
listener, err := net.Listen("unix", nginx.StatusSocket)
listener, err := net.Listen("tcp", fmt.Sprintf(":%v", nginx.StatusPort))
if err != nil {
t.Errorf("crating unix listener: %s", err)
t.Fatalf("crating unix listener: %s", err)
}
defer listener.Close()
defer os.Remove(nginx.StatusSocket)

streamListener, err := net.Listen("unix", nginx.StreamSocket)
if err != nil {
t.Errorf("crating unix listener: %s", err)
t.Fatalf("crating unix listener: %s", err)
}
defer streamListener.Close()
defer os.Remove(nginx.StreamSocket)
Expand Down Expand Up @@ -319,12 +319,11 @@ func TestConfigureDynamically(t *testing.T) {
}

func TestConfigureCertificates(t *testing.T) {
listener, err := net.Listen("unix", nginx.StatusSocket)
listener, err := net.Listen("tcp", fmt.Sprintf(":%v", nginx.StatusPort))
if err != nil {
t.Fatalf("crating unix listener: %s", err)
}
defer listener.Close()
defer os.Remove(nginx.StatusSocket)

streamListener, err := net.Listen("unix", nginx.StreamSocket)
if err != nil {
Expand Down
4 changes: 1 addition & 3 deletions internal/ingress/metric/collectors/nginx_status_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,6 @@ import (
"net"
"net/http"
"net/http/httptest"
"os"
"testing"
"time"

Expand Down Expand Up @@ -97,7 +96,7 @@ func TestStatusCollector(t *testing.T) {

for _, c := range cases {
t.Run(c.name, func(t *testing.T) {
listener, err := net.Listen("unix", nginx.StatusSocket)
listener, err := net.Listen("tcp", fmt.Sprintf(":%v", nginx.StatusPort))
if err != nil {
t.Fatalf("crating unix listener: %s", err)
}
Expand Down Expand Up @@ -145,7 +144,6 @@ func TestStatusCollector(t *testing.T) {
cm.Stop()

listener.Close()
os.Remove(nginx.StatusSocket)
})
}
}
34 changes: 8 additions & 26 deletions internal/nginx/main.go
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,6 @@ import (
"time"

ps "github.com/mitchellh/go-ps"
"github.com/tv42/httpunix"
"k8s.io/klog"
)

Expand All @@ -38,8 +37,8 @@ var TemplatePath = "/etc/nginx/template/nginx.tmpl"
// PID defines the location of the pid file used by NGINX
var PID = "/tmp/nginx.pid"

// StatusSocket defines the location of the unix socket used by NGINX for the status server
var StatusSocket = "/tmp/nginx-status-server.sock"
// StatusPort port used by NGINX for the status server
var StatusPort = 10256
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

What's the reason you're switching to tcp?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

What's the reason you're switching to tcp?

If the node is smaller than should be or during spikes of CPU, the request fails (socket). I cannot reproduce this consistently but when I can is only related to CPU utilization.

After this is merged I will add an e2e test so show how this can be reproduced (using stress)

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@aledbf did this e2e test ever get added? I'm running into a similar situation when CPU spikes and I'm wondering if the switch away from a socket actually resolves it


// HealthPath defines the path used to define the health check location in NGINX
var HealthPath = "/healthz"
Expand All @@ -56,17 +55,12 @@ var StreamSocket = "/tmp/ingress-stream.sock"

var statusLocation = "nginx-status"

var httpClient *http.Client

func init() {
httpClient = buildUnixSocketClient(HealthCheckTimeout)
}

// NewGetStatusRequest creates a new GET request to the internal NGINX status server
func NewGetStatusRequest(path string) (int, []byte, error) {
url := fmt.Sprintf("%v://%v%v", httpunix.Scheme, statusLocation, path)
url := fmt.Sprintf("http://127.0.0.1:%v%v", StatusPort, path)

res, err := httpClient.Get(url)
client := http.Client{}
res, err := client.Get(url)
if err != nil {
return 0, nil, err
}
Expand All @@ -82,14 +76,15 @@ func NewGetStatusRequest(path string) (int, []byte, error) {

// NewPostStatusRequest creates a new POST request to the internal NGINX status server
func NewPostStatusRequest(path, contentType string, data interface{}) (int, []byte, error) {
url := fmt.Sprintf("%v://%v%v", httpunix.Scheme, statusLocation, path)
url := fmt.Sprintf("http://127.0.0.1:%v%v", StatusPort, path)

buf, err := json.Marshal(data)
if err != nil {
return 0, nil, err
}

res, err := httpClient.Post(url, contentType, bytes.NewReader(buf))
client := http.Client{}
res, err := client.Post(url, contentType, bytes.NewReader(buf))
if err != nil {
return 0, nil, err
}
Expand Down Expand Up @@ -142,19 +137,6 @@ func readFileToString(path string) (string, error) {
return string(contents), nil
}

func buildUnixSocketClient(timeout time.Duration) *http.Client {
u := &httpunix.Transport{
DialTimeout: 1 * time.Second,
RequestTimeout: timeout,
ResponseHeaderTimeout: timeout,
}
u.RegisterLocation(statusLocation, StatusSocket)

return &http.Client{
Transport: u,
}
}

// Version return details about NGINX
func Version() string {
flag := "-v"
Expand Down
2 changes: 1 addition & 1 deletion rootfs/etc/nginx/template/nginx.tmpl
Original file line number Diff line number Diff line change
Expand Up @@ -558,7 +558,7 @@ http {

# default server, used for NGINX healthcheck and access to nginx stats
server {
listen unix:{{ .StatusSocket }};
listen 127.0.0.1:{{ .StatusPort }};
set $proxy_upstream_name "internal";

keepalive_timeout 0;
Expand Down
9 changes: 9 additions & 0 deletions test/e2e-image/overlay/deployment-e2e.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -22,5 +22,14 @@ spec:
- name: nginx-ingress-controller
livenessProbe:
timeoutSeconds: 1
initialDelaySeconds: 1
periodSeconds: 2
readinessProbe:
timeoutSeconds: 1
initialDelaySeconds: 1
periodSeconds: 2
lifecycle:
preStop:
exec:
command:
- /wait-shutdown
Loading