Skip to content

Commit

Permalink
Merge pull request #26694 from hashicorp/jbardin/plugin-panics
Browse files Browse the repository at this point in the history
Handle panics in plugins
  • Loading branch information
jbardin authored Oct 26, 2020
2 parents d4716a6 + 5f063ae commit 12c0775
Show file tree
Hide file tree
Showing 7 changed files with 299 additions and 40 deletions.
28 changes: 20 additions & 8 deletions internal/logging/logging.go
Original file line number Diff line number Diff line change
Expand Up @@ -24,14 +24,22 @@ const (
envLogProvider = "TF_LOG_PROVIDER"
)

// ValidLevels are the log level names that Terraform recognizes.
var ValidLevels = []string{"TRACE", "DEBUG", "INFO", "WARN", "ERROR", "OFF"}
var (
// ValidLevels are the log level names that Terraform recognizes.
ValidLevels = []string{"TRACE", "DEBUG", "INFO", "WARN", "ERROR", "OFF"}

// logger is the global hclog logger
var logger hclog.Logger
// logger is the global hclog logger
logger hclog.Logger

// logWriter is a global writer for logs, to be used with the std log package
var logWriter io.Writer
// logWriter is a global writer for logs, to be used with the std log package
logWriter io.Writer

// initialize our cache of panic output from providers
panics = &panicRecorder{
panics: make(map[string][]string),
maxLines: 100,
}
)

func init() {
logger = newHCLogger("")
Expand Down Expand Up @@ -98,13 +106,17 @@ func NewLogger(name string) hclog.Logger {
if name == "" {
panic("logger name required")
}
return logger.Named(name)
return &logPanicWrapper{
Logger: logger.Named(name),
}
}

// NewProviderLogger returns a logger for the provider plugin, possibly with a
// different log level from the global logger.
func NewProviderLogger(prefix string) hclog.Logger {
l := logger.Named(prefix + "provider")
l := &logPanicWrapper{
Logger: logger.Named(prefix + "provider"),
}

level := providerLogLevel()
logger.Debug("created provider logger", "level", level)
Expand Down
111 changes: 111 additions & 0 deletions internal/logging/panic.go
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,10 @@ import (
"io"
"io/ioutil"
"os"
"strings"
"sync"

"github.com/hashicorp/go-hclog"
"github.com/mitchellh/panicwrap"
)

Expand Down Expand Up @@ -69,3 +72,111 @@ func PanicHandler(tmpLogPath string) panicwrap.HandlerFunc {
fmt.Printf(panicOutput, f.Name())
}
}

const pluginPanicOutput = `
Stack trace from the %[1]s plugin:
%s
Error: The %[1]s plugin crashed!
This is always indicative of a bug within the plugin. It would be immensely
helpful if you could report the crash with the plugin's maintainers so that it
can be fixed. The output above should help diagnose the issue.
`

// PluginPanics returns a series of provider panics that were collected during
// execution, and formatted for output.
func PluginPanics() []string {
return panics.allPanics()
}

// panicRecorder provides a registry to check for plugin panics that may have
// happened when a plugin suddenly terminates.
type panicRecorder struct {
sync.Mutex

// panics maps the plugin name to the panic output lines received from
// the logger.
panics map[string][]string

// maxLines is the max number of lines we'll record after seeing a
// panic header. Since this is going to be printed in the UI output, we
// don't want to destroy the scrollback. In most cases, the first few lines
// of the stack trace is all that are required.
maxLines int
}

// registerPlugin returns an accumulator function which will accept lines of
// a panic stack trace to collect into an error when requested.
func (p *panicRecorder) registerPlugin(name string) func(string) {
p.Lock()
defer p.Unlock()

// In most cases we shouldn't be starting a plugin if it already
// panicked, but clear out previous entries just in case.
delete(p.panics, name)

count := 0

// this callback is used by the logger to store panic output
return func(line string) {
p.Lock()
defer p.Unlock()

// stop recording if there are too many lines.
if count > p.maxLines {
return
}
count++

p.panics[name] = append(p.panics[name], line)
}
}

func (p *panicRecorder) allPanics() []string {
p.Lock()
defer p.Unlock()

var res []string
for name, lines := range p.panics {
if len(lines) == 0 {
continue
}

res = append(res, fmt.Sprintf(pluginPanicOutput, name, strings.Join(lines, "\n")))
}
return res
}

// logPanicWrapper wraps an hclog.Logger and intercepts and records any output
// that appears to be a panic.
type logPanicWrapper struct {
hclog.Logger
panicRecorder func(string)
inPanic bool
}

// go-plugin will create a new named logger for each plugin binary.
func (l *logPanicWrapper) Named(name string) hclog.Logger {
return &logPanicWrapper{
Logger: l.Logger.Named(name),
panicRecorder: panics.registerPlugin(name),
}
}

// we only need to implement Debug, since that is the default output level used
// by go-plugin when encountering unstructured output on stderr.
func (l *logPanicWrapper) Debug(msg string, args ...interface{}) {
// We don't have access to the binary itself, so guess based on the stderr
// output if this is the start of the traceback. An occasional false
// positive shouldn't be a big deal, since this is only retrieved after an
// error of some sort.
l.inPanic = l.inPanic || strings.HasPrefix(msg, "panic: ") || strings.HasPrefix(msg, "fatal error: ")

if l.inPanic && l.panicRecorder != nil {
l.panicRecorder(msg)
}

l.Logger.Debug(msg, args...)
}
51 changes: 51 additions & 0 deletions internal/logging/panic_test.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,51 @@
package logging

import (
"fmt"
"strings"
"testing"
)

func TestPanicRecorder(t *testing.T) {
rec := panics.registerPlugin("test")

output := []string{
"panic: test",
" stack info",
}

for _, line := range output {
rec(line)
}

expected := fmt.Sprintf(pluginPanicOutput, "test", strings.Join(output, "\n"))

res := PluginPanics()
if len(res) == 0 {
t.Fatal("no output")
}

if res[0] != expected {
t.Fatalf("expected: %q\ngot: %q", expected, res[0])
}
}

func TestPanicLimit(t *testing.T) {
rec := panics.registerPlugin("test")

rec("panic: test")

for i := 0; i < 200; i++ {
rec(fmt.Sprintf("LINE: %d", i))
}

res := PluginPanics()
// take the extra content into account
max := strings.Count(pluginPanicOutput, "\n") + panics.maxLines
for _, out := range res {
found := strings.Count(out, "\n")
if found > max {
t.Fatalf("expected no more than %d lines, got: %d", max, found)
}
}
}
10 changes: 10 additions & 0 deletions main.go
Original file line number Diff line number Diff line change
Expand Up @@ -288,6 +288,16 @@ func wrappedMain() int {
return 1
}

// if we are exiting with a non-zero code, check if it was caused by any
// plugins crashing
if exitCode != 0 {
for _, panicLog := range logging.PluginPanics() {
// we don't write this to Error, or else panicwrap will think this
// process panicked
Ui.Info(panicLog)
}
}

return exitCode
}

Expand Down
74 changes: 74 additions & 0 deletions plugin/grpc_error.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,74 @@
package plugin

import (
"fmt"
"path"
"runtime"

"github.com/hashicorp/terraform/tfdiags"
"google.golang.org/grpc/codes"
"google.golang.org/grpc/status"
)

// grpcErr extracts some known error types and formats them into better
// representations for core. This must only be called from plugin methods.
// Since we don't use RPC status errors for the plugin protocol, these do not
// contain any useful details, and we can return some text that at least
// indicates the plugin call and possible error condition.
func grpcErr(err error) (diags tfdiags.Diagnostics) {
if err == nil {
return
}

// extract the method name from the caller.
pc, _, _, ok := runtime.Caller(1)
if !ok {
logger.Error("unknown grpc call", "error", err)
return diags.Append(err)
}

f := runtime.FuncForPC(pc)

// Function names will contain the full import path. Take the last
// segment, which will let users know which method was being called.
_, requestName := path.Split(f.Name())

// Here we can at least correlate the error in the logs to a particular binary.
logger.Error(requestName, "error", err)

// TODO: while this expands the error codes into somewhat better messages,
// this still does not easily link the error to an actual user-recognizable
// plugin. The grpc plugin does not know its configured name, and the
// errors are in a list of diagnostics, making it hard for the caller to
// annotate the returned errors.
switch status.Code(err) {
case codes.Unavailable:
// This case is when the plugin has stopped running for some reason,
// and is usually the result of a crash.
diags = diags.Append(tfdiags.Sourceless(
tfdiags.Error,
"Plugin did not respond",
fmt.Sprintf("The plugin encountered an error, and failed to respond to the %s call. "+
"The plugin logs may contain more details.", requestName),
))
case codes.Canceled:
diags = diags.Append(tfdiags.Sourceless(
tfdiags.Error,
"Request cancelled",
fmt.Sprintf("The %s request was cancelled.", requestName),
))
case codes.Unimplemented:
diags = diags.Append(tfdiags.Sourceless(
tfdiags.Error,
"Unsupported plugin method",
fmt.Sprintf("The %s method is not supported by this plugin.", requestName),
))
default:
diags = diags.Append(tfdiags.Sourceless(
tfdiags.Error,
"Plugin error",
fmt.Sprintf("The plugin returned an unexpected error from %s: %v", requestName, err),
))
}
return
}
Loading

0 comments on commit 12c0775

Please sign in to comment.