Skip to content

Commit

Permalink
Use fatal library in path_srv, cert_srv and sciond (#2208)
Browse files Browse the repository at this point in the history
  • Loading branch information
sustrik authored Dec 12, 2018
1 parent 1ab44bf commit 4164178
Show file tree
Hide file tree
Showing 8 changed files with 72 additions and 44 deletions.
10 changes: 4 additions & 6 deletions go/cert_srv/main.go
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,7 @@ import (
"github.com/scionproto/scion/go/cert_srv/internal/reiss"
"github.com/scionproto/scion/go/lib/common"
"github.com/scionproto/scion/go/lib/env"
"github.com/scionproto/scion/go/lib/fatal"
"github.com/scionproto/scion/go/lib/infra/infraenv"
"github.com/scionproto/scion/go/lib/infra/messenger"
"github.com/scionproto/scion/go/lib/infra/modules/itopo"
Expand Down Expand Up @@ -63,6 +64,7 @@ func main() {
}

func realMain() int {
fatal.Init()
env.AddFlags()
flag.Parse()
if v, ok := env.CheckFlags(csconfig.Sample); !ok {
Expand Down Expand Up @@ -94,16 +96,12 @@ func realMain() int {
})
// Cleanup when the CS exits.
defer stop()
// Create a channel where prometheus can signal fatal errors
fatalC := make(chan error, 1)
config.Metrics.StartPrometheus(fatalC)
config.Metrics.StartPrometheus()
select {
case <-environment.AppShutdownSignal:
// Whenever we receive a SIGINT or SIGTERM we exit without an error.
return 0
case err := <-fatalC:
// Prometheus encountered a fatal error, thus we exit.
log.Crit("Unable to listen and serve", "err", err)
case <-fatal.Chan():
return 1
}
}
Expand Down
10 changes: 4 additions & 6 deletions go/godispatcher/main.go
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,7 @@ import (

"github.com/scionproto/scion/go/godispatcher/internal/config"
"github.com/scionproto/scion/go/lib/env"
"github.com/scionproto/scion/go/lib/log"
"github.com/scionproto/scion/go/lib/fatal"
)

type Config struct {
Expand All @@ -41,6 +41,7 @@ func main() {
}

func realMain() int {
fatal.Init()
env.AddFlags()
flag.Parse()
if returnCode, ok := env.CheckFlags(config.Sample); !ok {
Expand All @@ -53,11 +54,8 @@ func realMain() int {
defer env.CleanupLog()
defer env.LogAppStopped("Dispatcher", cfg.Dispatcher.ID)

fatalC := make(chan error, 1)
cfg.Metrics.StartPrometheus(fatalC)
err := <-fatalC
// Prometheus encountered a fatal error, thus we exit.
log.Crit("Unable to listen and serve", "err", err)
cfg.Metrics.StartPrometheus()
<-fatal.Chan()
return 1
}

Expand Down
6 changes: 4 additions & 2 deletions go/lib/env/env.go
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,7 @@ import (

"github.com/scionproto/scion/go/lib/addr"
"github.com/scionproto/scion/go/lib/common"
"github.com/scionproto/scion/go/lib/fatal"
"github.com/scionproto/scion/go/lib/infra/modules/itopo"
"github.com/scionproto/scion/go/lib/log"
"github.com/scionproto/scion/go/lib/overlay"
Expand Down Expand Up @@ -204,14 +205,15 @@ type Metrics struct {
Prometheus string
}

func (cfg *Metrics) StartPrometheus(fatalC chan error) {
func (cfg *Metrics) StartPrometheus() {
fatal.Check()
if cfg.Prometheus != "" {
http.Handle("/metrics", promhttp.Handler())
log.Info("Exporting prometheus metrics", "addr", cfg.Prometheus)
go func() {
defer log.LogPanicAndExit()
if err := http.ListenAndServe(cfg.Prometheus, nil); err != nil {
fatalC <- common.NewBasicError("HTTP ListenAndServe error", err)
fatal.Fatal(common.NewBasicError("HTTP ListenAndServe error", err))
}
}()
}
Expand Down
51 changes: 43 additions & 8 deletions go/lib/fatal/fatal.go
Original file line number Diff line number Diff line change
Expand Up @@ -12,25 +12,60 @@
// See the License for the specific language governing permissions and
// limitations under the License.

// Package fatal deals with delivering fatal error conditions to the main
// goroutine. The goroutine can then perform clean shutdown.
// Package fatal provides a way to handle fatal errors.
// 1. It gives the main goroutine an opportunity to cleanly shut down in case of a fatal error.
// 2. If main goroutine is non-responsive it terminates the process.
// 3. To improve debugging, after the first fatal error the other goroutines
// are given a grace period so that we have more logs to investigate.
//
// The main program should call fatal.Init() when it's starting.
//
// Any library producing fatal errors should call fatal.Check() when it starts.
package fatal

import (
"time"

"github.com/scionproto/scion/go/lib/log"
)

var (
fatalC chan error
fatalC chan struct{}
)

// Initialize the package.
func init() {
fatalC = make(chan error)
// This MUST be called in the main coroutine when it starts.
func Init() {
fatalC = make(chan struct{})
}

// Check whether the package was initialized.
// This MUST be called when a library producing fatal errors starts is initialized.
func Check() {
if fatalC == nil {
panic("A library producing fatal errors is being used " +
"but fatal package wasn't initialized.")
}
}

// Signal that the application should shut down.
// Produce a fatal error. This function never exits.
func Fatal(err error) {
fatalC <- err
log.Crit("Fatal error", "err", err)
// Grace period to gather more logs in case that
// the first fatal error wasn't the most informative one.
time.Sleep(1 * time.Second)
// Ask main goroutine to shut down the application.
select {
case fatalC <- struct{}{}:
// Block until the application shuts down.
select {}
case <-time.After(5 * time.Second):
panic("Main goroutine is not responding to the fatal error." +
"It's probably stuck. Shutting down anyway.")
}
}

// Get access to the underlying channel. This is used by main goroutine to wait for fatal errors.
func Chan() chan error {
func Chan() <-chan struct{} {
return fatalC
}
1 change: 1 addition & 0 deletions go/lib/pktdisp/disp.go
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,7 @@ type DispatchFunc func(*DispPkt)
// N.B. the DispPkt passed to f is reused, so applications should make a copy if
// this is a problem.
func PktDispatcher(c snet.Conn, f DispatchFunc, pktDispStop chan struct{}) {
fatal.Check()
var err error
var n int
dp := &DispPkt{Raw: make(common.RawBytes, common.MaxMTU)}
Expand Down
10 changes: 4 additions & 6 deletions go/path_srv/main.go
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,7 @@ import (
"github.com/scionproto/scion/go/lib/addr"
"github.com/scionproto/scion/go/lib/common"
"github.com/scionproto/scion/go/lib/env"
"github.com/scionproto/scion/go/lib/fatal"
"github.com/scionproto/scion/go/lib/infra"
"github.com/scionproto/scion/go/lib/infra/infraenv"
"github.com/scionproto/scion/go/lib/infra/modules/cleaner"
Expand Down Expand Up @@ -67,6 +68,7 @@ func main() {
}

func realMain() int {
fatal.Init()
env.AddFlags()
flag.Parse()
if v, ok := env.CheckFlags(psconfig.Sample); !ok {
Expand Down Expand Up @@ -158,9 +160,7 @@ func realMain() int {
}
}
msger.AddHandler(infra.SegRev, handlers.NewRevocHandler(args))
// Create a channel where prometheus can signal fatal errors
fatalC := make(chan error, 1)
config.Metrics.StartPrometheus(fatalC)
config.Metrics.StartPrometheus()
// Start handling requests/messages
go func() {
defer log.LogPanicAndExit()
Expand All @@ -179,9 +179,7 @@ func realMain() int {
case <-environment.AppShutdownSignal:
// Whenever we receive a SIGINT or SIGTERM we exit without an error.
return 0
case err := <-fatalC:
// Prometheus encountered a fatal error, thus we exit.
log.Crit("Unable to listen and serve", "err", err)
case <-fatal.Chan():
return 1
}
}
Expand Down
21 changes: 9 additions & 12 deletions go/sciond/main.go
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,7 @@ import (
"github.com/scionproto/scion/go/lib/addr"
"github.com/scionproto/scion/go/lib/common"
"github.com/scionproto/scion/go/lib/env"
"github.com/scionproto/scion/go/lib/fatal"
"github.com/scionproto/scion/go/lib/infra/infraenv"
"github.com/scionproto/scion/go/lib/infra/modules/cleaner"
"github.com/scionproto/scion/go/lib/infra/modules/itopo"
Expand Down Expand Up @@ -68,6 +69,7 @@ func main() {
}

func realMain() int {
fatal.Init()
env.AddFlags()
flag.Parse()
if v, ok := env.CheckFlags(sdconfig.Sample); !ok {
Expand Down Expand Up @@ -137,28 +139,23 @@ func realMain() int {
TrustStore: trustStore,
},
}
// Create a channel where server goroutines can signal fatal errors
fatalC := make(chan error, 3)
cleaner := periodic.StartPeriodicTask(cleaner.New(pathDB),
periodic.NewTicker(300*time.Second), 295*time.Second)
defer cleaner.Stop()
// Start servers
rsockServer, shutdownF := NewServer("rsock", config.SD.Reliable, handlers, log.Root())
defer shutdownF()
StartServer("ReliableSockServer", config.SD.Reliable, rsockServer, fatalC)
StartServer("ReliableSockServer", config.SD.Reliable, rsockServer)
unixpacketServer, shutdownF := NewServer("unixpacket", config.SD.Unix, handlers, log.Root())
defer shutdownF()
StartServer("UnixServer", config.SD.Unix, unixpacketServer, fatalC)
config.Metrics.StartPrometheus(fatalC)
StartServer("UnixServer", config.SD.Unix, unixpacketServer)
config.Metrics.StartPrometheus()
select {
case <-environment.AppShutdownSignal:
// Whenever we receive a SIGINT or SIGTERM we exit without an error.
// Deferred shutdowns for all running servers run now.
return 0
case err := <-fatalC:
// At least one of the servers was unable to run or encountered a
// fatal error while running.
log.Crit("Unable to listen and serve", "err", err)
case <-fatal.Chan():
return 1
}
}
Expand Down Expand Up @@ -195,16 +192,16 @@ func NewServer(network string, rsockPath string, handlers servers.HandlerMap,
return server, shutdownF
}

func StartServer(name, sockPath string, server *servers.Server, fatalC chan error) {
func StartServer(name, sockPath string, server *servers.Server) {
go func() {
defer log.LogPanicAndExit()
if config.SD.DeleteSocket {
if err := os.Remove(sockPath); err != nil && !os.IsNotExist(err) {
fatalC <- common.NewBasicError(name+" SocketRemoval error", err)
fatal.Fatal(common.NewBasicError(name+" SocketRemoval error", err))
}
}
if err := server.ListenAndServe(); err != nil {
fatalC <- common.NewBasicError(name+" ListenAndServe error", err)
fatal.Fatal(common.NewBasicError(name+" ListenAndServe error", err))
}
}()
}
7 changes: 3 additions & 4 deletions go/sig/main.go
Original file line number Diff line number Diff line change
Expand Up @@ -64,6 +64,7 @@ func main() {
}

func realMain() int {
fatal.Init()
env.AddFlags()
flag.Parse()
if v, ok := env.CheckFlags(sigconfig.Sample); !ok {
Expand Down Expand Up @@ -106,13 +107,11 @@ func realMain() int {
reader.NewReader(tunIO).Run()
}()
spawnIngressDispatcher(tunIO)
cfg.Metrics.StartPrometheus(fatal.Chan())
cfg.Metrics.StartPrometheus()
select {
case <-environment.AppShutdownSignal:
return 0
case err := <-fatal.Chan():
// Prometheus or the ingress dispatcher encountered a fatal error, thus we exit.
log.Crit("Fatal error during execution", "err", err)
case <-fatal.Chan():
return 1
}
}
Expand Down

0 comments on commit 4164178

Please sign in to comment.