-
Notifications
You must be signed in to change notification settings - Fork 2k
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Add driver.docker counter metric for OOM Killer events #4185
Changes from 5 commits
5cacfaf
54e1788
feeee1b
8bc4eb2
e7bd558
caeb596
a5166e9
48c5093
ae0b3d9
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change | ||||||||||||||||||||||||||||||
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
|
@@ -17,12 +17,13 @@ import ( | |||||||||||||||||||||||||||||||
"time" | ||||||||||||||||||||||||||||||||
|
||||||||||||||||||||||||||||||||
"github.com/armon/circbuf" | ||||||||||||||||||||||||||||||||
docker "github.com/fsouza/go-dockerclient" | ||||||||||||||||||||||||||||||||
"github.com/fsouza/go-dockerclient" | ||||||||||||||||||||||||||||||||
|
||||||||||||||||||||||||||||||||
"github.com/docker/docker/cli/config/configfile" | ||||||||||||||||||||||||||||||||
"github.com/docker/docker/reference" | ||||||||||||||||||||||||||||||||
"github.com/docker/docker/registry" | ||||||||||||||||||||||||||||||||
|
||||||||||||||||||||||||||||||||
"github.com/armon/go-metrics" | ||||||||||||||||||||||||||||||||
"github.com/hashicorp/go-multierror" | ||||||||||||||||||||||||||||||||
"github.com/hashicorp/go-plugin" | ||||||||||||||||||||||||||||||||
"github.com/hashicorp/nomad/client/allocdir" | ||||||||||||||||||||||||||||||||
|
@@ -478,6 +479,10 @@ type DockerHandle struct { | |||||||||||||||||||||||||||||||
client *docker.Client | ||||||||||||||||||||||||||||||||
waitClient *docker.Client | ||||||||||||||||||||||||||||||||
logger *log.Logger | ||||||||||||||||||||||||||||||||
jobName string | ||||||||||||||||||||||||||||||||
taskGroupName string | ||||||||||||||||||||||||||||||||
taskName string | ||||||||||||||||||||||||||||||||
allocID string | ||||||||||||||||||||||||||||||||
Image string | ||||||||||||||||||||||||||||||||
ImageID string | ||||||||||||||||||||||||||||||||
containerID string | ||||||||||||||||||||||||||||||||
|
@@ -898,6 +903,10 @@ func (d *DockerDriver) Start(ctx *ExecContext, task *structs.Task) (*StartRespon | |||||||||||||||||||||||||||||||
executor: exec, | ||||||||||||||||||||||||||||||||
pluginClient: pluginClient, | ||||||||||||||||||||||||||||||||
logger: d.logger, | ||||||||||||||||||||||||||||||||
jobName: d.DriverContext.jobName, | ||||||||||||||||||||||||||||||||
taskGroupName: d.DriverContext.taskGroupName, | ||||||||||||||||||||||||||||||||
taskName: d.DriverContext.taskName, | ||||||||||||||||||||||||||||||||
allocID: d.DriverContext.allocID, | ||||||||||||||||||||||||||||||||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Two notes on this:
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Need to add to ID and Open method: https://github.com/hashicorp/nomad/blob/master/client/driver/docker.go#L1769-L1798 |
||||||||||||||||||||||||||||||||
Image: d.driverConfig.ImageName, | ||||||||||||||||||||||||||||||||
ImageID: d.imageID, | ||||||||||||||||||||||||||||||||
containerID: container.ID, | ||||||||||||||||||||||||||||||||
|
@@ -1924,6 +1933,25 @@ func (h *DockerHandle) run() { | |||||||||||||||||||||||||||||||
h.logger.Printf("[ERR] driver.docker: failed to inspect container %s: %v", h.containerID, ierr) | ||||||||||||||||||||||||||||||||
} else if container.State.OOMKilled { | ||||||||||||||||||||||||||||||||
werr = fmt.Errorf("OOM Killed") | ||||||||||||||||||||||||||||||||
labels := []metrics.Label{ | ||||||||||||||||||||||||||||||||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. for me the interesting labels would be job, group and task :) There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Yes for me too, but i don't know how to access them from here. I'm happy to implement a solution but I'd need some help. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. @jesusvazquez you could add those fields to the Lines 895 to 909 in fb7e0c1
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. See: #4196 There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Oh really nice @jvrplmlmn Ill update this PR as soon as I get back from my vacations. |
||||||||||||||||||||||||||||||||
{ | ||||||||||||||||||||||||||||||||
Name: "JobName", | ||||||||||||||||||||||||||||||||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Use consistent names: https://github.com/hashicorp/nomad/blob/master/client/task_runner.go#L274: job, task_group, task |
||||||||||||||||||||||||||||||||
Value: h.jobName, | ||||||||||||||||||||||||||||||||
}, | ||||||||||||||||||||||||||||||||
{ | ||||||||||||||||||||||||||||||||
Name: "TaskGroupName", | ||||||||||||||||||||||||||||||||
Value: h.taskGroupName, | ||||||||||||||||||||||||||||||||
}, | ||||||||||||||||||||||||||||||||
{ | ||||||||||||||||||||||||||||||||
Name: "TaskName", | ||||||||||||||||||||||||||||||||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. @jippi this is what you meant right? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. yeah - looks great! I personally don't care for the allocation id below, its a very high cardinality field, making influxdb & friends unhappy fairly fast There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I understand that from the database and query point of view it probably has no value but from the alerting point of view the user can quickly run What would you say here? Should we keep it or drop it. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. my personal preference would be to drop it - sounds like an alert in your logging rather than telemetry if you ask me :) There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I would drop it. As @jippi points out it will make many telemetry systems unhappy |
||||||||||||||||||||||||||||||||
Value: h.taskName, | ||||||||||||||||||||||||||||||||
}, | ||||||||||||||||||||||||||||||||
{ | ||||||||||||||||||||||||||||||||
Name: "AllocID", | ||||||||||||||||||||||||||||||||
Value: h.allocID, | ||||||||||||||||||||||||||||||||
}, | ||||||||||||||||||||||||||||||||
} | ||||||||||||||||||||||||||||||||
metrics.IncrCounterWithLabels([]string{"driver", "docker", "oom"}, 1, labels) | ||||||||||||||||||||||||||||||||
} | ||||||||||||||||||||||||||||||||
|
||||||||||||||||||||||||||||||||
close(h.doneCh) | ||||||||||||||||||||||||||||||||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Goimports suggested this change. Please let me know if you want me to rollback this line.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
its because the imported package in that folder is already called
docker
:)