Skip to content

Commit

Permalink
Refactor diag.Resource and deploy.Resource.Status to contain actionab…
Browse files Browse the repository at this point in the history
…le error proto

proto.ActionableErr

In this PR,
1. For suggesting actionable errors for k8 infra errors, it would make
sense to do it in the diag package.
2. Hence adding ActionableErr to diag.Resource package and propogating to depoyment status in
skaffold.deploy.Resource.Status
  • Loading branch information
tejal29 committed Jun 24, 2020
1 parent 4459eea commit bdea994
Show file tree
Hide file tree
Showing 16 changed files with 507 additions and 424 deletions.
72 changes: 48 additions & 24 deletions docs/content/en/api/skaffold.swagger.json

Large diffs are not rendered by default.

2 changes: 2 additions & 0 deletions docs/content/en/docs/references/api/grpc.md
Original file line number Diff line number Diff line change
Expand Up @@ -785,6 +785,8 @@ skip 408 as STATUSCHECK_UNHEALTH code renumbered as 357 |
| DEVINIT_REGISTER_TEST_DEPS | 702 | Failed to configure watcher for test dependencies in dev loop |
| DEVINIT_REGISTER_DEPLOY_DEPS | 703 | Failed to configure watcher for deploy dependencies in dev loop |
| DEVINIT_REGISTER_CONFIG_DEP | 704 | Failed to configure watcher for Skaffold configuration file. |
| STATUSCHECK_CONTEXT_CANCELLED | 800 | User cancelled the skaffold dev run |
| STATUSCHECK_DEADLINE_EXCEEDED | 801 | Deadline for status check exceeded |



Expand Down
47 changes: 25 additions & 22 deletions pkg/diag/validator/pod.go
Original file line number Diff line number Diff line change
Expand Up @@ -85,7 +85,7 @@ func (p *PodValidator) Validate(ctx context.Context, ns string, opts metav1.List
if po.Kind == "" {
po.Kind = podKind
}
rs = append(rs, NewResourceFromObject(&po, Status(ps.phase), ps.err, ps.statusCode, ps.logs))
rs = append(rs, NewResourceFromObject(&po, Status(ps.phase), ps.ae, ps.logs))
}
return rs, nil
}
Expand Down Expand Up @@ -209,34 +209,35 @@ func processPodEvents(e corev1.EventInterface, pod v1.Pod, ps *podStatus) {
}
switch recentEvent.Reason {
case failedScheduling:
ps.statusCode = proto.StatusCode_STATUSCHECK_FAILED_SCHEDULING
ps.err = fmt.Errorf(recentEvent.Message)
ps.ae.ErrCode = proto.StatusCode_STATUSCHECK_FAILED_SCHEDULING
ps.ae.Message = recentEvent.Message
case unhealthy:
ps.statusCode = proto.StatusCode_STATUSCHECK_UNHEALTHY
ps.err = fmt.Errorf(recentEvent.Message)
ps.ae.ErrCode = proto.StatusCode_STATUSCHECK_UNHEALTHY
ps.ae.Message = recentEvent.Message
default:
// TODO: Add unique error codes for reasons
ps.statusCode = proto.StatusCode_STATUSCHECK_UNKNOWN_EVENT
ps.err = fmt.Errorf("%s: %s", recentEvent.Reason, recentEvent.Message)
ps.ae.ErrCode = proto.StatusCode_STATUSCHECK_UNKNOWN_EVENT
ps.ae.Message = fmt.Sprintf("%s: %s", recentEvent.Reason, recentEvent.Message)
}
}

type podStatus struct {
name string
namespace string
phase string
logs []string
err error
statusCode proto.StatusCode
name string
namespace string
phase string
logs []string
ae *proto.ActionableErr
}

func (p *podStatus) isStable() bool {
return p.phase == success || (p.phase == running && p.err == nil)
return p.phase == success || (p.phase == running && p.ae.Message == "")
}

func (p *podStatus) withErrAndLogs(errCode proto.StatusCode, l []string, err error) *podStatus {
p.err = err
p.statusCode = errCode
if err != nil {
p.ae.Message = err.Error()
}
p.ae.ErrCode = errCode
p.logs = l
return p
}
Expand All @@ -246,8 +247,8 @@ func (p *podStatus) String() string {
case p.isStable():
return ""
default:
if p.err != nil {
return fmt.Sprintf("%s", p.err)
if p.ae.Message != "" {
return p.ae.Message
}
}
return fmt.Sprintf(actionableMessage, p.namespace, p.name)
Expand Down Expand Up @@ -275,10 +276,12 @@ func extractErrorMessageFromWaitingContainerStatus(po *v1.Pod, c v1.ContainerSta

func newPodStatus(n string, ns string, p string) *podStatus {
return &podStatus{
name: n,
namespace: ns,
phase: p,
statusCode: proto.StatusCode_STATUSCHECK_SUCCESS,
name: n,
namespace: ns,
phase: p,
ae: &proto.ActionableErr{
ErrCode: proto.StatusCode_STATUSCHECK_SUCCESS,
},
}
}

Expand Down
75 changes: 54 additions & 21 deletions pkg/diag/validator/pod_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -84,8 +84,10 @@ func TestRun(t *testing.T) {
},
}},
expected: []Resource{NewResource("test", "Pod", "foo", "Pending",
fmt.Errorf("container foo-container is waiting to start: foo-image can't be pulled"),
proto.StatusCode_STATUSCHECK_IMAGE_PULL_ERR, nil)},
&proto.ActionableErr{
Message: "container foo-container is waiting to start: foo-image can't be pulled",
ErrCode: proto.StatusCode_STATUSCHECK_IMAGE_PULL_ERR,
}, nil)},
},
{
description: "pod is Waiting condition due to ErrImageBackOffPullErr",
Expand Down Expand Up @@ -113,8 +115,10 @@ func TestRun(t *testing.T) {
},
}},
expected: []Resource{NewResource("test", "Pod", "foo", "Pending",
fmt.Errorf("container foo-container is waiting to start: foo-image can't be pulled"),
proto.StatusCode_STATUSCHECK_IMAGE_PULL_ERR, nil)},
&proto.ActionableErr{
Message: "container foo-container is waiting to start: foo-image can't be pulled",
ErrCode: proto.StatusCode_STATUSCHECK_IMAGE_PULL_ERR,
}, nil)},
},
{
description: "pod is Waiting due to Image Backoff Pull error",
Expand Down Expand Up @@ -142,8 +146,10 @@ func TestRun(t *testing.T) {
},
}},
expected: []Resource{NewResource("test", "Pod", "foo", "Pending",
fmt.Errorf("container foo-container is waiting to start: foo-image can't be pulled"),
proto.StatusCode_STATUSCHECK_IMAGE_PULL_ERR, nil)},
&proto.ActionableErr{
Message: "container foo-container is waiting to start: foo-image can't be pulled",
ErrCode: proto.StatusCode_STATUSCHECK_IMAGE_PULL_ERR,
}, nil)},
},
{
description: "pod is in Terminated State",
Expand All @@ -158,8 +164,11 @@ func TestRun(t *testing.T) {
Conditions: []v1.PodCondition{{Type: v1.PodScheduled, Status: v1.ConditionTrue}},
},
}},
expected: []Resource{NewResource("test", "Pod", "foo", "Succeeded", nil,
proto.StatusCode_STATUSCHECK_SUCCESS, nil)},
expected: []Resource{NewResource("test", "Pod", "foo", "Succeeded",
&proto.ActionableErr{
Message: "",
ErrCode: proto.StatusCode_STATUSCHECK_SUCCESS,
}, nil)},
},
{
description: "pod is in Stable State",
Expand All @@ -180,8 +189,11 @@ func TestRun(t *testing.T) {
},
},
}},
expected: []Resource{NewResource("test", "Pod", "foo", "Running", nil,
proto.StatusCode_STATUSCHECK_SUCCESS, nil)},
expected: []Resource{NewResource("test", "Pod", "foo", "Running",
&proto.ActionableErr{
Message: "",
ErrCode: proto.StatusCode_STATUSCHECK_SUCCESS,
}, nil)},
},
{
description: "pod condition unknown",
Expand All @@ -201,7 +213,10 @@ func TestRun(t *testing.T) {
},
}},
expected: []Resource{NewResource("test", "Pod", "foo", "Pending",
fmt.Errorf("could not determine"), proto.StatusCode_STATUSCHECK_UNKNOWN, nil)},
&proto.ActionableErr{
Message: "could not determine",
ErrCode: proto.StatusCode_STATUSCHECK_UNKNOWN,
}, nil)},
},
{
description: "pod could not be scheduled",
Expand All @@ -222,8 +237,10 @@ func TestRun(t *testing.T) {
},
}},
expected: []Resource{NewResource("test", "Pod", "foo", "Pending",
fmt.Errorf("Unschedulable: 0/2 nodes available: 1 node has disk pressure, 1 node is unreachable"),
proto.StatusCode_STATUSCHECK_NODE_DISK_PRESSURE, nil)},
&proto.ActionableErr{
Message: "Unschedulable: 0/2 nodes available: 1 node has disk pressure, 1 node is unreachable",
ErrCode: proto.StatusCode_STATUSCHECK_NODE_DISK_PRESSURE,
}, nil)},
},
{
description: "pod is running but container terminated",
Expand All @@ -248,8 +265,10 @@ func TestRun(t *testing.T) {
output: []byte("main.go:57 \ngo panic"),
},
expected: []Resource{NewResource("test", "Pod", "foo", "Running",
fmt.Errorf("container foo-container terminated with exit code 1"),
proto.StatusCode_STATUSCHECK_CONTAINER_TERMINATED, []string{
&proto.ActionableErr{
Message: "container foo-container terminated with exit code 1",
ErrCode: proto.StatusCode_STATUSCHECK_CONTAINER_TERMINATED,
}, []string{
"[foo foo-container] main.go:57 ",
"[foo foo-container] go panic"},
)},
Expand All @@ -276,8 +295,10 @@ func TestRun(t *testing.T) {
err: fmt.Errorf("error"),
},
expected: []Resource{NewResource("test", "pod", "foo", "Running",
fmt.Errorf("container foo-container terminated with exit code 1"),
proto.StatusCode_STATUSCHECK_CONTAINER_TERMINATED, []string{
&proto.ActionableErr{
Message: "container foo-container terminated with exit code 1",
ErrCode: proto.StatusCode_STATUSCHECK_CONTAINER_TERMINATED,
}, []string{
"Error retrieving logs for pod foo. Try `kubectl logs foo -n test -c foo-container`"},
)},
},
Expand Down Expand Up @@ -306,7 +327,10 @@ func TestRun(t *testing.T) {
},
},
expected: []Resource{NewResource("test", "Pod", "foo", "Pending",
fmt.Errorf("eventCode: dummy event"), proto.StatusCode_STATUSCHECK_UNKNOWN_EVENT, nil)},
&proto.ActionableErr{
Message: "eventCode: dummy event",
ErrCode: proto.StatusCode_STATUSCHECK_UNKNOWN_EVENT,
}, nil)},
},
{
description: "pod condition a warning event followed up normal event",
Expand Down Expand Up @@ -338,7 +362,10 @@ func TestRun(t *testing.T) {
},
},
expected: []Resource{NewResource("test", "Pod", "foo", "Pending",
fmt.Errorf("eventCode: dummy event"), proto.StatusCode_STATUSCHECK_UNKNOWN_EVENT, nil)},
&proto.ActionableErr{
Message: "eventCode: dummy event",
ErrCode: proto.StatusCode_STATUSCHECK_UNKNOWN_EVENT,
}, nil)},
},
{
description: "pod condition a normal event followed by a warning event",
Expand Down Expand Up @@ -370,7 +397,10 @@ func TestRun(t *testing.T) {
},
},
expected: []Resource{NewResource("test", "Pod", "foo", "Pending",
fmt.Errorf("eventCode: dummy event"), proto.StatusCode_STATUSCHECK_UNKNOWN_EVENT, nil)},
&proto.ActionableErr{
Message: "eventCode: dummy event",
ErrCode: proto.StatusCode_STATUSCHECK_UNKNOWN_EVENT,
}, nil)},
},
{
description: "pod condition a warning event followed up by warning adds last warning seen",
Expand Down Expand Up @@ -402,7 +432,10 @@ func TestRun(t *testing.T) {
},
},
expected: []Resource{NewResource("test", "Pod", "foo", "Pending",
fmt.Errorf("0/1 nodes are available: 1 node(s) had taint {key: value}, that the pod didn't tolerate"), proto.StatusCode_STATUSCHECK_FAILED_SCHEDULING, nil)},
&proto.ActionableErr{
Message: "0/1 nodes are available: 1 node(s) had taint {key: value}, that the pod didn't tolerate",
ErrCode: proto.StatusCode_STATUSCHECK_FAILED_SCHEDULING,
}, nil)},
},
}

Expand Down
28 changes: 16 additions & 12 deletions pkg/diag/validator/resource.go
Original file line number Diff line number Diff line change
Expand Up @@ -26,31 +26,35 @@ import (
)

type Resource struct {
namespace string
kind string
name string
logs []string
status Status
err error
StatusCode proto.StatusCode
namespace string
kind string
name string
logs []string
status Status
ae *proto.ActionableErr
}

func (r Resource) Kind() string { return r.kind }
func (r Resource) Name() string { return r.name }
func (r Resource) Namespace() string { return r.namespace }
func (r Resource) Status() Status { return r.status }
func (r Resource) Error() error { return r.err }
func (r Resource) Logs() []string { return r.logs }
func (r Resource) String() string {
if r.namespace == "default" {
return fmt.Sprintf("%s/%s", r.kind, r.name)
}
return fmt.Sprintf("%s:%s/%s", r.namespace, r.kind, r.name)
}
func (r Resource) ActionableError() *proto.ActionableErr {
return r.ae
}
func (r Resource) StatusUpdated(another Resource) bool {
return r.ae.ErrCode != another.ae.ErrCode
}

// NewResource creates new Resource of kind
func NewResource(namespace, kind, name string, status Status, err error, statusCode proto.StatusCode, logs []string) Resource {
return Resource{namespace: namespace, kind: kind, name: name, status: status, err: err, StatusCode: statusCode, logs: logs}
func NewResource(namespace, kind, name string, status Status, ae *proto.ActionableErr, logs []string) Resource {
return Resource{namespace: namespace, kind: kind, name: name, status: status, ae: ae, logs: logs}
}

// objectWithMetadata is any k8s object that has kind and object metadata.
Expand All @@ -60,6 +64,6 @@ type objectWithMetadata interface {
}

// NewResourceFromObject creates new Resource with fields populated from object metadata.
func NewResourceFromObject(object objectWithMetadata, status Status, err error, statusCode proto.StatusCode, logs []string) Resource {
return NewResource(object.GetNamespace(), object.GetObjectKind().GroupVersionKind().Kind, object.GetName(), status, err, statusCode, logs)
func NewResourceFromObject(object objectWithMetadata, status Status, ae *proto.ActionableErr, logs []string) Resource {
return NewResource(object.GetNamespace(), object.GetObjectKind().GroupVersionKind().Kind, object.GetName(), status, ae, logs)
}
6 changes: 3 additions & 3 deletions pkg/diag/validator/resource_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -42,7 +42,7 @@ func TestNewResource(t *testing.T) {
Name: "foo",
},
},
expected: Resource{"default", "pod", "foo", nil, Status(""), nil, 0},
expected: Resource{"default", "pod", "foo", nil, Status(""), nil},
expectedName: "pod/foo",
},
{
Expand All @@ -54,13 +54,13 @@ func TestNewResource(t *testing.T) {
Name: "bar",
},
},
expected: Resource{"test", "pod", "bar", nil, Status(""), nil, 0},
expected: Resource{"test", "pod", "bar", nil, Status(""), nil},
expectedName: "test:pod/bar",
},
}
for _, test := range tests {
testutil.Run(t, test.description, func(t *testutil.T) {
actual := NewResourceFromObject(test.resource, Status(""), nil, 0, nil)
actual := NewResourceFromObject(test.resource, Status(""), nil, nil)
t.CheckDeepEqual(test.expected, actual, cmp.AllowUnexported(Resource{}))
t.CheckDeepEqual(test.expectedName, actual.String(), cmp.AllowUnexported(Resource{}))
})
Expand Down
Loading

0 comments on commit bdea994

Please sign in to comment.