Skip to content

Commit

Permalink
Detect kubelet and container runtime frequent crashes
Browse files Browse the repository at this point in the history
  • Loading branch information
wangzhen127 committed Nov 20, 2018
1 parent 967c22e commit b398d5f
Show file tree
Hide file tree
Showing 11 changed files with 108 additions and 24 deletions.
2 changes: 1 addition & 1 deletion Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -69,7 +69,7 @@ ifeq ($(ENABLE_JOURNALD), 1)
endif

vet:
go list ./... | grep -v "./vendor/*" | xargs go vet
go list ./... | grep -v "./vendor/*" | xargs go vet $(BUILD_TAGS)

fmt:
find . -type f -name "*.go" | grep -v "./vendor/*" | xargs gofmt -s -w -l
Expand Down
3 changes: 2 additions & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -156,7 +156,8 @@ For example, to test [KernelMonitor](https://github.com/kubernetes/node-problem-

**Note**:
- You can see more rule examples under [test/kernel_log_generator/problems](https://github.com/kubernetes/node-problem-detector/tree/master/test/kernel_log_generator/problems).
- For [KernelMonitor](https://github.com/kubernetes/node-problem-detector/blob/master/config/kernel-monitor.json) message injection, all messages should have ```kernel: ``` prefix (also note there is a space after ```:```).
- For [KernelMonitor](https://github.com/kubernetes/node-problem-detector/blob/master/config/kernel-monitor.json) message injection, all messages should have ```kernel: ``` prefix (also note there is a space after ```:```); or use [generator.sh](https://github.com/kubernetes/node-problem-detector/blob/master/test/kernel_log_generator/generator.sh).
- To inject other logs into journald like systemd logs, use ```echo 'Some systemd message' | systemd-cat -t systemd```.

# Remedy Systems

Expand Down
2 changes: 1 addition & 1 deletion cmd/logcounter/log_counter.go
Original file line number Diff line number Diff line change
Expand Up @@ -32,7 +32,7 @@ func main() {
fedo.AddFlags(pflag.CommandLine)
pflag.Parse()

counter, err := logcounter.NewKmsgLogCounter(fedo)
counter, err := logcounter.NewJournaldLogCounter(fedo)
if err != nil {
fmt.Print(err)
os.Exit(int(types.Unknown))
Expand Down
10 changes: 7 additions & 3 deletions cmd/logcounter/options/options.go
Original file line number Diff line number Diff line change
Expand Up @@ -29,13 +29,17 @@ func NewLogCounterOptions() *LogCounterOptions {
// LogCounterOptions contains frequent event detector command line and application options.
type LogCounterOptions struct {
// command line options. See flag descriptions for the description
Lookback string
Pattern string
Count int
JournaldSource string
LogPath string
Lookback string
Pattern string
Count int
}

// AddFlags adds log counter command line options to pflag.
func (fedo *LogCounterOptions) AddFlags(fs *pflag.FlagSet) {
fs.StringVar(&fedo.JournaldSource, "journald-source", "", "The source configuration of journald, e.g., kernel, kubelet, docker, etc")
fs.StringVar(&fedo.LogPath, "log-path", "", "The log path that log watcher looks up")
fs.StringVar(&fedo.Lookback, "lookback", "", "The time log watcher looks up")
fs.StringVar(&fedo.Pattern, "pattern", "",
"The regular expression to match the problem in log. The pattern must match to the end of the line.")
Expand Down
2 changes: 1 addition & 1 deletion config/docker-monitor.json
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
{
"plugin": "journald",
"pluginConfig": {
"source": "docker"
"source": "dockerd"
},
"logPath": "/var/log/journal",
"lookback": "5m",
Expand Down
2 changes: 2 additions & 0 deletions config/kernel-monitor-counter.json
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,8 @@
"reason": "UnregisterNetDevice",
"path": "/home/kubernetes/bin/log-counter",
"args": [
"--journald-source=kernel",
"--log-path=/var/log/journal",
"--lookback=20m",
"--count=3",
"--pattern=unregister_netdevice: waiting for \\w+ to become free. Usage count = \\d+"
Expand Down
71 changes: 71 additions & 0 deletions config/systemd-monitor-counter.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,71 @@
{
"plugin": "custom",
"pluginConfig": {
"invoke_interval": "5m",
"timeout": "1m",
"max_output_length": 80,
"concurrency": 1
},
"source": "systemd-monitor",
"conditions": [
{
"type": "FrequentKubeletRestart",
"reason": "NoFrequentKubeletRestart",
"message": "kubelet is functioning properly"
},
{
"type": "FrequentDockerRestart",
"reason": "NoFrequentDockerRestart",
"message": "docker is functioning properly"
},
{
"type": "FrequentContainerdRestart",
"reason": "NoFrequentContainerdRestart",
"message": "containerd is functioning properly"
}
],
"rules": [
{
"type": "permanent",
"condition": "FrequentKubeletRestart",
"reason": "FrequentKubeletRestart",
"path": "/home/kubernetes/bin/log-counter",
"args": [
"--journald-source=systemd",
"--log-path=/var/log/journal",
"--lookback=20m",
"--count=5",
"--pattern=Started Kubernetes kubelet."
],
"timeout": "1m"
},
{
"type": "permanent",
"condition": "FrequentDockerRestart",
"reason": "FrequentDockerRestart",
"path": "/home/kubernetes/bin/log-counter",
"args": [
"--journald-source=systemd",
"--log-path=/var/log/journal",
"--lookback=20m",
"--count=5",
"--pattern=Starting Docker Application Container Engine..."
],
"timeout": "1m"
},
{
"type": "permanent",
"condition": "FrequentContainerdRestart",
"reason": "FrequentContainerdRestart",
"path": "/home/kubernetes/bin/log-counter",
"args": [
"--journald-source=systemd",
"--log-path=/var/log/journal",
"--lookback=20m",
"--count=5",
"--pattern=Starting containerd container runtime..."
],
"timeout": "1m"
}
]
}
2 changes: 1 addition & 1 deletion deployment/node-problem-detector-config.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -69,7 +69,7 @@ data:
{
"plugin": "journald",
"pluginConfig": {
"source": "docker"
"source": "dockerd"
},
"logPath": "/var/log/journal",
"lookback": "5m",
Expand Down
12 changes: 6 additions & 6 deletions pkg/custompluginmonitor/types/config.go
Original file line number Diff line number Diff line change
Expand Up @@ -37,26 +37,26 @@ var (

type pluginGlobalConfig struct {
// InvokeIntervalString is the interval string at which plugins will be invoked.
InvokeIntervalString *string `json:"invoke_interval, omitempty"`
InvokeIntervalString *string `json:"invoke_interval,omitempty"`
// TimeoutString is the global plugin execution timeout string.
TimeoutString *string `json:"timeout, omitempty"`
TimeoutString *string `json:"timeout,omitempty"`
// InvokeInterval is the interval at which plugins will be invoked.
InvokeInterval *time.Duration `json:"-"`
// Timeout is the global plugin execution timeout.
Timeout *time.Duration `json:"-"`
// MaxOutputLength is the maximum plugin output message length.
MaxOutputLength *int `json:"max_output_length, omitempty"`
MaxOutputLength *int `json:"max_output_length,omitempty"`
// Concurrency is the number of concurrent running plugins.
Concurrency *int `json:"concurrency, omitempty"`
Concurrency *int `json:"concurrency,omitempty"`
}

// Custom plugin config is the configuration of custom plugin monitor.
type CustomPluginConfig struct {
// Plugin is the name of plugin which is currently used.
// Currently supported: custom.
Plugin string `json:"plugin, omitempty"`
Plugin string `json:"plugin,omitempty"`
// PluginConfig is global plugin configuration.
PluginGlobalConfig pluginGlobalConfig `json:"pluginConfig, omitempty"`
PluginGlobalConfig pluginGlobalConfig `json:"pluginConfig,omitempty"`
// Source is the source name of the custom plugin monitor
Source string `json:"source"`
// DefaultConditions are the default states of all the conditions custom plugin monitor should handle.
Expand Down
18 changes: 12 additions & 6 deletions pkg/logcounter/log_counter.go
Original file line number Diff line number Diff line change
Expand Up @@ -25,14 +25,15 @@ import (
"k8s.io/node-problem-detector/cmd/logcounter/options"
"k8s.io/node-problem-detector/pkg/logcounter/types"
"k8s.io/node-problem-detector/pkg/systemlogmonitor"
"k8s.io/node-problem-detector/pkg/systemlogmonitor/logwatchers/kmsg"
"k8s.io/node-problem-detector/pkg/systemlogmonitor/logwatchers/journald"
watchertypes "k8s.io/node-problem-detector/pkg/systemlogmonitor/logwatchers/types"
systemtypes "k8s.io/node-problem-detector/pkg/systemlogmonitor/types"
)

const (
bufferSize = 1000
timeout = 1 * time.Second
bufferSize = 1000
timeout = 1 * time.Second
journaldSourceKey = "source"
)

type logCounter struct {
Expand All @@ -42,11 +43,16 @@ type logCounter struct {
clock clock.Clock
}

func NewKmsgLogCounter(options *options.LogCounterOptions) (types.LogCounter, error) {
watcher := kmsg.NewKmsgWatcher(watchertypes.WatcherConfig{Lookback: options.Lookback})
func NewJournaldLogCounter(options *options.LogCounterOptions) (types.LogCounter, error) {
watcher := journald.NewJournaldWatcher(watchertypes.WatcherConfig{
Plugin: "journald",
PluginConfig: map[string]string{journaldSourceKey: options.JournaldSource},
LogPath: options.LogPath,
Lookback: options.Lookback,
})
logCh, err := watcher.Watch()
if err != nil {
return nil, fmt.Errorf("error watching kmsg: %v", err)
return nil, fmt.Errorf("error watching journald: %v", err)
}
return &logCounter{
logCh: logCh,
Expand Down
8 changes: 4 additions & 4 deletions pkg/systemlogmonitor/logwatchers/types/log_watcher.go
Original file line number Diff line number Diff line change
Expand Up @@ -32,14 +32,14 @@ type LogWatcher interface {
type WatcherConfig struct {
// Plugin is the name of plugin which is currently used.
// Currently supported: filelog, journald, kmsg.
Plugin string `json:"plugin, omitempty"`
Plugin string `json:"plugin,omitempty"`
// PluginConfig is a key/value configuration of a plugin. Valid configurations
// are defined in different log watcher plugin.
PluginConfig map[string]string `json:"pluginConfig, omitempty"`
PluginConfig map[string]string `json:"pluginConfig,omitempty"`
// LogPath is the path to the log
LogPath string `json:"logPath, omitempty"`
LogPath string `json:"logPath,omitempty"`
// Lookback is the time log watcher looks up
Lookback string `json:"lookback, omitempty"`
Lookback string `json:"lookback,omitempty"`
}

// WatcherCreateFunc is the create function of a log watcher.
Expand Down

0 comments on commit b398d5f

Please sign in to comment.