Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Use fatal library in path_srv, cert_srv and sciond #2208

Merged
merged 1 commit into from
Dec 12, 2018
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
10 changes: 4 additions & 6 deletions go/cert_srv/main.go
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,7 @@ import (
"github.com/scionproto/scion/go/cert_srv/internal/reiss"
"github.com/scionproto/scion/go/lib/common"
"github.com/scionproto/scion/go/lib/env"
"github.com/scionproto/scion/go/lib/fatal"
"github.com/scionproto/scion/go/lib/infra/infraenv"
"github.com/scionproto/scion/go/lib/infra/messenger"
"github.com/scionproto/scion/go/lib/infra/modules/itopo"
Expand Down Expand Up @@ -63,6 +64,7 @@ func main() {
}

func realMain() int {
fatal.Init()
env.AddFlags()
flag.Parse()
if v, ok := env.CheckFlags(csconfig.Sample); !ok {
Expand Down Expand Up @@ -94,16 +96,12 @@ func realMain() int {
})
// Cleanup when the CS exits.
defer stop()
// Create a channel where prometheus can signal fatal errors
fatalC := make(chan error, 1)
config.Metrics.StartPrometheus(fatalC)
config.Metrics.StartPrometheus()
select {
case <-environment.AppShutdownSignal:
// Whenever we receive a SIGINT or SIGTERM we exit without an error.
return 0
case err := <-fatalC:
// Prometheus encountered a fatal error, thus we exit.
log.Crit("Unable to listen and serve", "err", err)
case <-fatal.Chan():
return 1
}
}
Expand Down
10 changes: 4 additions & 6 deletions go/godispatcher/main.go
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,7 @@ import (

"github.com/scionproto/scion/go/godispatcher/internal/config"
"github.com/scionproto/scion/go/lib/env"
"github.com/scionproto/scion/go/lib/log"
"github.com/scionproto/scion/go/lib/fatal"
)

type Config struct {
Expand All @@ -41,6 +41,7 @@ func main() {
}

func realMain() int {
fatal.Init()
env.AddFlags()
flag.Parse()
if returnCode, ok := env.CheckFlags(config.Sample); !ok {
Expand All @@ -53,11 +54,8 @@ func realMain() int {
defer env.CleanupLog()
defer env.LogAppStopped("Dispatcher", cfg.Dispatcher.ID)

fatalC := make(chan error, 1)
cfg.Metrics.StartPrometheus(fatalC)
err := <-fatalC
// Prometheus encountered a fatal error, thus we exit.
log.Crit("Unable to listen and serve", "err", err)
cfg.Metrics.StartPrometheus()
<-fatal.Chan()
return 1
}

Expand Down
6 changes: 4 additions & 2 deletions go/lib/env/env.go
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,7 @@ import (

"github.com/scionproto/scion/go/lib/addr"
"github.com/scionproto/scion/go/lib/common"
"github.com/scionproto/scion/go/lib/fatal"
"github.com/scionproto/scion/go/lib/infra/modules/itopo"
"github.com/scionproto/scion/go/lib/log"
"github.com/scionproto/scion/go/lib/overlay"
Expand Down Expand Up @@ -204,14 +205,15 @@ type Metrics struct {
Prometheus string
}

func (cfg *Metrics) StartPrometheus(fatalC chan error) {
func (cfg *Metrics) StartPrometheus() {
fatal.Check()
if cfg.Prometheus != "" {
http.Handle("/metrics", promhttp.Handler())
log.Info("Exporting prometheus metrics", "addr", cfg.Prometheus)
go func() {
defer log.LogPanicAndExit()
if err := http.ListenAndServe(cfg.Prometheus, nil); err != nil {
fatalC <- common.NewBasicError("HTTP ListenAndServe error", err)
fatal.Fatal(common.NewBasicError("HTTP ListenAndServe error", err))
}
}()
}
Expand Down
51 changes: 43 additions & 8 deletions go/lib/fatal/fatal.go
Original file line number Diff line number Diff line change
Expand Up @@ -12,25 +12,60 @@
// See the License for the specific language governing permissions and
// limitations under the License.

// Package fatal deals with delivering fatal error conditions to the main
// goroutine. The goroutine can then perform clean shutdown.
// Package fatal provides a way to handle fatal errors.
// 1. It gives the main goroutine an opportunity to cleanly shut down in case of a fatal error.
// 2. If main goroutine is non-responsive it terminates the process.
// 3. To improve debugging, after the first fatal error the other goroutines
// are given a grace period so that we have more logs to investigate.
//
// The main program should call fatal.Init() when it's starting.
//
// Any library producing fatal errors should call fatal.Check() when it starts.
package fatal

import (
"time"

"github.com/scionproto/scion/go/lib/log"
)

var (
fatalC chan error
fatalC chan struct{}
)

// Initialize the package.
func init() {
fatalC = make(chan error)
// This MUST be called in the main coroutine when it starts.
func Init() {
fatalC = make(chan struct{})
}

// Check whether the package was initialized.
// This MUST be called when a library producing fatal errors starts is initialized.
func Check() {
if fatalC == nil {
panic("A library producing fatal errors is being used " +
"but fatal package wasn't initialized.")
}
}

// Signal that the application should shut down.
// Produce a fatal error. This function never exits.
func Fatal(err error) {
fatalC <- err
log.Crit("Fatal error", "err", err)
// Grace period to gather more logs in case that
// the first fatal error wasn't the most informative one.
time.Sleep(1 * time.Second)
// Ask main goroutine to shut down the application.
select {
case fatalC <- struct{}{}:
// Block until the application shuts down.
select {}
case <-time.After(5 * time.Second):
panic("Main goroutine is not responding to the fatal error." +
"It's probably stuck. Shutting down anyway.")
}
}

// Get access to the underlying channel. This is used by main goroutine to wait for fatal errors.
func Chan() chan error {
func Chan() <-chan struct{} {
return fatalC
}
1 change: 1 addition & 0 deletions go/lib/pktdisp/disp.go
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,7 @@ type DispatchFunc func(*DispPkt)
// N.B. the DispPkt passed to f is reused, so applications should make a copy if
// this is a problem.
func PktDispatcher(c snet.Conn, f DispatchFunc, pktDispStop chan struct{}) {
fatal.Check()
var err error
var n int
dp := &DispPkt{Raw: make(common.RawBytes, common.MaxMTU)}
Expand Down
10 changes: 4 additions & 6 deletions go/path_srv/main.go
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,7 @@ import (
"github.com/scionproto/scion/go/lib/addr"
"github.com/scionproto/scion/go/lib/common"
"github.com/scionproto/scion/go/lib/env"
"github.com/scionproto/scion/go/lib/fatal"
"github.com/scionproto/scion/go/lib/infra"
"github.com/scionproto/scion/go/lib/infra/infraenv"
"github.com/scionproto/scion/go/lib/infra/modules/cleaner"
Expand Down Expand Up @@ -67,6 +68,7 @@ func main() {
}

func realMain() int {
fatal.Init()
env.AddFlags()
flag.Parse()
if v, ok := env.CheckFlags(psconfig.Sample); !ok {
Expand Down Expand Up @@ -158,9 +160,7 @@ func realMain() int {
}
}
msger.AddHandler(infra.SegRev, handlers.NewRevocHandler(args))
// Create a channel where prometheus can signal fatal errors
fatalC := make(chan error, 1)
config.Metrics.StartPrometheus(fatalC)
config.Metrics.StartPrometheus()
// Start handling requests/messages
go func() {
defer log.LogPanicAndExit()
Expand All @@ -179,9 +179,7 @@ func realMain() int {
case <-environment.AppShutdownSignal:
// Whenever we receive a SIGINT or SIGTERM we exit without an error.
return 0
case err := <-fatalC:
// Prometheus encountered a fatal error, thus we exit.
log.Crit("Unable to listen and serve", "err", err)
case <-fatal.Chan():
return 1
}
}
Expand Down
21 changes: 9 additions & 12 deletions go/sciond/main.go
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,7 @@ import (
"github.com/scionproto/scion/go/lib/addr"
"github.com/scionproto/scion/go/lib/common"
"github.com/scionproto/scion/go/lib/env"
"github.com/scionproto/scion/go/lib/fatal"
"github.com/scionproto/scion/go/lib/infra/infraenv"
"github.com/scionproto/scion/go/lib/infra/modules/cleaner"
"github.com/scionproto/scion/go/lib/infra/modules/itopo"
Expand Down Expand Up @@ -68,6 +69,7 @@ func main() {
}

func realMain() int {
fatal.Init()
env.AddFlags()
flag.Parse()
if v, ok := env.CheckFlags(sdconfig.Sample); !ok {
Expand Down Expand Up @@ -137,28 +139,23 @@ func realMain() int {
TrustStore: trustStore,
},
}
// Create a channel where server goroutines can signal fatal errors
fatalC := make(chan error, 3)
cleaner := periodic.StartPeriodicTask(cleaner.New(pathDB),
periodic.NewTicker(300*time.Second), 295*time.Second)
defer cleaner.Stop()
// Start servers
rsockServer, shutdownF := NewServer("rsock", config.SD.Reliable, handlers, log.Root())
defer shutdownF()
StartServer("ReliableSockServer", config.SD.Reliable, rsockServer, fatalC)
StartServer("ReliableSockServer", config.SD.Reliable, rsockServer)
unixpacketServer, shutdownF := NewServer("unixpacket", config.SD.Unix, handlers, log.Root())
defer shutdownF()
StartServer("UnixServer", config.SD.Unix, unixpacketServer, fatalC)
config.Metrics.StartPrometheus(fatalC)
StartServer("UnixServer", config.SD.Unix, unixpacketServer)
config.Metrics.StartPrometheus()
select {
case <-environment.AppShutdownSignal:
// Whenever we receive a SIGINT or SIGTERM we exit without an error.
// Deferred shutdowns for all running servers run now.
return 0
case err := <-fatalC:
// At least one of the servers was unable to run or encountered a
// fatal error while running.
log.Crit("Unable to listen and serve", "err", err)
case <-fatal.Chan():
return 1
}
}
Expand Down Expand Up @@ -195,16 +192,16 @@ func NewServer(network string, rsockPath string, handlers servers.HandlerMap,
return server, shutdownF
}

func StartServer(name, sockPath string, server *servers.Server, fatalC chan error) {
func StartServer(name, sockPath string, server *servers.Server) {
go func() {
defer log.LogPanicAndExit()
if config.SD.DeleteSocket {
if err := os.Remove(sockPath); err != nil && !os.IsNotExist(err) {
fatalC <- common.NewBasicError(name+" SocketRemoval error", err)
fatal.Fatal(common.NewBasicError(name+" SocketRemoval error", err))
}
}
if err := server.ListenAndServe(); err != nil {
fatalC <- common.NewBasicError(name+" ListenAndServe error", err)
fatal.Fatal(common.NewBasicError(name+" ListenAndServe error", err))
}
}()
}
7 changes: 3 additions & 4 deletions go/sig/main.go
Original file line number Diff line number Diff line change
Expand Up @@ -64,6 +64,7 @@ func main() {
}

func realMain() int {
fatal.Init()
env.AddFlags()
flag.Parse()
if v, ok := env.CheckFlags(sigconfig.Sample); !ok {
Expand Down Expand Up @@ -106,13 +107,11 @@ func realMain() int {
reader.NewReader(tunIO).Run()
}()
spawnIngressDispatcher(tunIO)
cfg.Metrics.StartPrometheus(fatal.Chan())
cfg.Metrics.StartPrometheus()
select {
case <-environment.AppShutdownSignal:
return 0
case err := <-fatal.Chan():
// Prometheus or the ingress dispatcher encountered a fatal error, thus we exit.
log.Crit("Fatal error during execution", "err", err)
case <-fatal.Chan():
return 1
}
}
Expand Down