Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Teardowns the local network on "local stop" #775

Merged
merged 5 commits into from
May 24, 2019
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
19 changes: 18 additions & 1 deletion ecs-cli/modules/cli/local/local_app.go
Original file line number Diff line number Diff line change
Expand Up @@ -38,9 +38,26 @@ func Create(c *cli.Context) {
// If the container is not running, this command creates a new network for all local ECS tasks to join
// and communicate with the Amazon ECS Local Endpoints container.
func Up(c *cli.Context) {
docker, err := client.NewEnvClient() // Temporary client created to test network.Setup()
// TODO move these clients to a separate file leveraging the DOCKER_API_VERSION,
// these clients are created to test the local network for now.
// See https://github.com/awslabs/amazon-ecs-local-container-endpoints/blob/master/local-container-endpoints/clients/docker/client.go#L49
docker, err := client.NewEnvClient()
if err != nil {
logrus.Fatal("Could not connect to docker", err)
}
network.Setup(docker)
}

// Stop stops a running local ECS task.
//
// If the user stops the last running task in the local network then also remove the network.
func Stop(c *cli.Context) {
// TODO move these clients to a separate file leveraging the DOCKER_API_VERSION,
// these clients are created to test the local network for now.
// See https://github.com/awslabs/amazon-ecs-local-container-endpoints/blob/master/local-container-endpoints/clients/docker/client.go#L49
docker, err := client.NewEnvClient()
if err != nil {
logrus.Fatal("Could not connect to docker", err)
}
defer network.Teardown(docker)
}
3 changes: 2 additions & 1 deletion ecs-cli/modules/cli/local/network/generate_mock.go
Original file line number Diff line number Diff line change
Expand Up @@ -13,4 +13,5 @@

package network

//go:generate mockgen.sh github.com/aws/amazon-ecs-cli/ecs-cli/modules/cli/local/network LocalEndpointsStarter mock_network/network.go
//go:generate mockgen.sh github.com/aws/amazon-ecs-cli/ecs-cli/modules/cli/local/network LocalEndpointsStarter mock_network/setup.go
//go:generate mockgen.sh github.com/aws/amazon-ecs-cli/ecs-cli/modules/cli/local/network LocalEndpointsStopper mock_network/teardown.go
107 changes: 107 additions & 0 deletions ecs-cli/modules/cli/local/network/mock_network/teardown.go

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

5 changes: 3 additions & 2 deletions ecs-cli/modules/cli/local/network/setup.go
Original file line number Diff line number Diff line change
Expand Up @@ -77,7 +77,7 @@ const (
)

// Setup creates a user-defined bridge network with a running Local Container Endpoints container. If the network
// already exists or the container is already running then the operation is a no-op.
// already exists or the container is already running then this function does nothing.
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Very minor nit: this change and the other one in teardown.go probably shouldn't be in the "generate mocks" commit.

If you plan to squash all changes to one commit prior to merge, then don't worry about it

//
// If there is any unexpected errors, we exit the program with a fatal log.
func Setup(dockerClient LocalEndpointsStarter) {
Expand Down Expand Up @@ -112,6 +112,7 @@ func createLocalNetwork(dockerClient networkCreator) {
ctx, cancel := context.WithTimeout(context.Background(), dockerTimeout)
defer cancel()

logrus.Infof("Creating network: %s...", EcsLocalNetworkName)
resp, err := dockerClient.NetworkCreate(ctx, EcsLocalNetworkName, types.NetworkCreate{
IPAM: &network.IPAM{
Config: []network.IPAMConfig{
Expand Down Expand Up @@ -168,7 +169,7 @@ func createLocalEndpointsContainer(dockerClient containerStarter) string {
)
if err != nil {
if strings.Contains(err.Error(), "Conflict") {
// We already created this container before since there is a name conflict, fetch its ID and return it.
// We already created this container before, fetch its ID and return it.
containerID := localEndpointsContainerID(dockerClient)
logrus.Infof("The %s container already exists with ID %s", localEndpointsContainerName, containerID)
return containerID
Expand Down
30 changes: 20 additions & 10 deletions ecs-cli/modules/cli/local/network/setup_test.go
Original file line number Diff line number Diff line change
@@ -1,3 +1,16 @@
// Copyright 2015-2019 Amazon.com, Inc. or its affiliates. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License"). You may
// not use this file except in compliance with the License. A copy of the
// License is located at
//
// http://aws.amazon.com/apache2.0/
//
// or in the "license" file accompanying this file. This file is distributed
// on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either
// express or implied. See the License for the specific language governing
// permissions and limitations under the License.

package network

import (
Expand All @@ -13,7 +26,7 @@ import (
"github.com/aws/amazon-ecs-cli/ecs-cli/modules/cli/local/network/mock_network"
)

type CallsConfigurer func(docker *mock_network.MockLocalEndpointsStarter) *mock_network.MockLocalEndpointsStarter
type mockStarterCalls func(docker *mock_network.MockLocalEndpointsStarter) *mock_network.MockLocalEndpointsStarter

type notFoundErr struct{}

Expand All @@ -30,7 +43,7 @@ func TestSetup(t *testing.T) {
// The validation of whether those fields behave as expected should be captured in integration tests.
// See https://github.com/aws/amazon-ecs-cli/issues/772
tests := map[string]struct {
configureCalls CallsConfigurer
configureCalls mockStarterCalls
}{
"new network and new container": {
configureCalls: func(docker *mock_network.MockLocalEndpointsStarter) *mock_network.MockLocalEndpointsStarter {
Expand Down Expand Up @@ -80,16 +93,13 @@ func TestSetup(t *testing.T) {

for name, tc := range tests {
t.Run(name, func(t *testing.T) {
mockDocker := tc.configureCalls(newMockLocalNetworkStarter(t))
ctrl := gomock.NewController(t)
defer ctrl.Finish()

mockDocker := mock_network.NewMockLocalEndpointsStarter(ctrl)
mockDocker = tc.configureCalls(mockDocker)

Setup(mockDocker)
})
}
}

func newMockLocalNetworkStarter(t *testing.T) *mock_network.MockLocalEndpointsStarter {
ctrl := gomock.NewController(t)
defer ctrl.Finish()

return mock_network.NewMockLocalEndpointsStarter(ctrl)
}
148 changes: 148 additions & 0 deletions ecs-cli/modules/cli/local/network/teardown.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,148 @@
// Copyright 2015-2019 Amazon.com, Inc. or its affiliates. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License"). You may
// not use this file except in compliance with the License. A copy of the
// License is located at
//
// http://aws.amazon.com/apache2.0/
//
// or in the "license" file accompanying this file. This file is distributed
// on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either
// express or implied. See the License for the specific language governing
// permissions and limitations under the License.

package network

import (
"strings"
"time"

"github.com/docker/docker/api/types"
"github.com/sirupsen/logrus"
"golang.org/x/net/context"
)

// LocalEndpointsStopper groups the Docker NetworkInspect, ContainerStop, ContainerRemove, and NetworkRemove functions.
//
// These functions can be used together to remove a network once unwanted containers are stopped.
type LocalEndpointsStopper interface {
networkInspector
containerStopper
containerRemover
networkRemover
}

type networkInspector interface {
NetworkInspect(ctx context.Context, networkID string, options types.NetworkInspectOptions) (types.NetworkResource, error)
}

type containerStopper interface {
ContainerStop(ctx context.Context, containerID string, timeout *time.Duration) error
}

type containerRemover interface {
ContainerRemove(ctx context.Context, containerID string, options types.ContainerRemoveOptions) error
}

type networkRemover interface {
NetworkRemove(ctx context.Context, networkID string) error
}

// Teardown removes both the Local Endpoints container and the Local network created by Setup.
// If there are other containers running in the network besides the endpoints container, this function does nothing.
//
// If there is any unexpected errors, we exit the program with a fatal log.
func Teardown(dockerClient LocalEndpointsStopper) {
if hasRunningTasksInNetwork(dockerClient) {
return
}
logrus.Infof("The network %s has no more running tasks, stopping the endpoints containers...", EcsLocalNetworkName)

stopEndpointsContainer(dockerClient)
removeEndpointsContainer(dockerClient)
removeLocalNetwork(dockerClient)
}

// hasRunningTasksInNetwork returns true if there are other containers besides the
// endpoints container running in the local network, false otherwise.
func hasRunningTasksInNetwork(d networkInspector) bool {
ctx, cancel := context.WithTimeout(context.Background(), dockerTimeout)
defer cancel()

resp, err := d.NetworkInspect(ctx, EcsLocalNetworkName, types.NetworkInspectOptions{})
if err != nil {
logrus.Fatalf("Failed to inspect network %s due to %v", EcsLocalNetworkName, err)
}

if len(resp.Containers) > 1 {
// Has other containers running in the network
logrus.Infof("%d other task(s) running locally, skipping network removal.", len(resp.Containers)-1)
return true
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I'd suggest adding a log statement here so that users have more visibility in the case where teardown is being skipped because they had launched other containers in the network

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Done! Added "%d other task(s) running locally, skipping network removal."

}

for _, container := range resp.Containers {
if container.Name != localEndpointsContainerName {
// The only container running in the network is a task without the endpoints container.
// This scenario should not happen unless the user themselves stopped the endpoints container.
logrus.Warnf("The %s container is running in the %s network without the %s container, please stop it first",
container.Name, EcsLocalNetworkName, localEndpointsContainerName)
return true
}
}

return false
}

func stopEndpointsContainer(d containerStopper) {
ctx, cancel := context.WithTimeout(context.Background(), dockerTimeout)
defer cancel()

err := d.ContainerStop(ctx, localEndpointsContainerName, nil)
if err != nil {
if strings.Contains(strings.ToLower(err.Error()), "no such container") {
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Why is this needed?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Great question, looks like the comment below wasn't precise enough :P

This error happens if the user stopped all their task containers with docker stop instead of ecs-cli local stop. In that situation, hasRunningTasksInNetwork() will return false and we will try to stop the endpoints container again. But since it was already stopped this error is returned.

We don't want to log.Fatal while trying to stop an already stopped container, it should be a no-op hence this if-statement.

I updated the comment to // The containers in the network were already stopped by the user using "docker stop", do nothing.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

An alternative is to not invoke stopEndpointsContainer() or removeEndpointsContainer() if there are 0 containers in the local network.

However, I think that approach pushes complexity upwards instead of downwards (into the function) so I don't prefer it as much. Also if you run ecs-cli local stop on the same task concurrently (two terminals for example) then one of those could trigger this error and fail with fatal instead of continuing cleanly by not doing any work.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Got it... my only concern is that this is yet another place where we are using string matching on errors returned by Docker :/

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yeah, I agree but I think Docker locked themselves and can't change their error messages otherwise they'll break everyone's code :P

Here is a similar example from the agent: https://github.com/aws/amazon-ecs-agent/blob/24c71162368b3a9090689532e5c035fe2bccf133/agent/dockerclient/dockerapi/docker_client.go#L693

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This is my least favorite thing in Go...

// The containers in the network were already stopped by the user using "docker stop", do nothing.
return
}
logrus.Fatalf("Failed to stop %s container due to %v", localEndpointsContainerName, err)
}
logrus.Infof("Stopped the %s container successfully, removing it...", localEndpointsContainerName)
}

// removeEndpointsContainer removes the endpoints container.
//
// If we do not remove the container, then the user will receive a "network not found" error on using "local up".
// Here is a sample scenario:
// 1) User runs "local up" and creates a new local network with an endpoints container.
// 2) User runs "local down" and stops the endpoints container but does not remove it, however the network is removed.
// 3) User runs "local up" again and creates a new local network but re-starts the old endpoints container.
// The old endpoints container tries to connect to the network created in step 1) and fails.
func removeEndpointsContainer(d containerRemover) {
ctx, cancel := context.WithTimeout(context.Background(), dockerTimeout)
defer cancel()

err := d.ContainerRemove(ctx, localEndpointsContainerName, types.ContainerRemoveOptions{})
if err != nil {
if strings.Contains(strings.ToLower(err.Error()), "no such container") {
// The containers in the network were already removed by the user using "docker rm", do nothing.
return
}
logrus.Fatalf("Failed to remove %s container due to %v", localEndpointsContainerName, err)
}
logrus.Infof("Removed the %s container successfully, removing the %s network...",
localEndpointsContainerName, EcsLocalNetworkName)
}

func removeLocalNetwork(d networkRemover) {
ctx, cancel := context.WithTimeout(context.Background(), dockerTimeout)
defer cancel()

err := d.NetworkRemove(ctx, EcsLocalNetworkName)
if err != nil {
if strings.Contains(strings.ToLower(err.Error()), "no such network") {
// The network was removed, do nothing.
return
}
logrus.Fatalf("Failed to remove %s network due to %v", EcsLocalNetworkName, err)
}
logrus.Infof("Removed the %s network successfully", EcsLocalNetworkName)
}
Loading