Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Uploading docker logs on test failures #3236

Merged
merged 16 commits into from
Mar 6, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
9 changes: 9 additions & 0 deletions .github/workflows/e2e-compatibility-workflow-call.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -54,3 +54,12 @@ jobs:
CHAIN_B_TAG: "${{ matrix.chain-b }}"
CHAIN_BINARY: "${{ matrix.chain-binary }}"
RELAYER_TYPE: "${{ matrix.relayer-type }}"
- name: Upload Diagnostics
uses: actions/upload-artifact@v3
# we only want to upload logs on test failures.
if: ${{ failure() }}
continue-on-error: true
with:
name: "${{ matrix.entrypoint }}-${{ matrix.test }}"
path: e2e/diagnostics
retention-days: 5
14 changes: 14 additions & 0 deletions .github/workflows/e2e-test-workflow-call.yml
Original file line number Diff line number Diff line change
Expand Up @@ -61,6 +61,11 @@ on:
required: false
type: boolean
default: false
upload-logs:
description: "Specify flag to indicate that logs should be uploaded on failure"
required: false
type: boolean
default: false

env:
REGISTRY: ghcr.io
Expand Down Expand Up @@ -160,6 +165,15 @@ jobs:
with:
go-version: 1.19
- name: Run e2e Test
id: e2e_test
run: |
cd e2e
make e2e-test entrypoint=${{ matrix.entrypoint }} test=${{ matrix.test }}
- name: Upload Diagnostics
uses: actions/upload-artifact@v3
if: ${{ failure() && inputs.upload-logs }}
continue-on-error: true
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

this means uploading logs failing will not cause a test failure.

with:
name: "${{ matrix.entrypoint }}-${{ matrix.test }}"
path: e2e/diagnostics
retention-days: 5
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

arbitrary value, can probably increase this without any problems.

1 change: 1 addition & 0 deletions .github/workflows/e2e.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -62,6 +62,7 @@ jobs:
secrets: inherit
with:
build-and-push-docker-image: true
upload-logs: true
chain-image: ghcr.io/cosmos/ibc-go-simd
chain-a-tag: "${{ needs.determine-image-tag.outputs.simd-tag }}"
chain-b-tag: "${{ needs.determine-image-tag.outputs.simd-tag }}"
Expand Down
3 changes: 2 additions & 1 deletion .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,7 @@ mytestnet
coverage.txt
profile.out
sim_log_file
e2e/diagnostics

# Vagrant
.vagrant/
Expand All @@ -57,4 +58,4 @@ dependency-graph.png

# Go
go.work
go.work.sum
go.work.sum
6 changes: 6 additions & 0 deletions e2e/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -354,6 +354,12 @@ json matrix files under .github/compatibility-test-matrices and is equivalent to

This issue doesn't seem to occur on other operating systems.

### Accessing Logs

- When a test fails in GitHub. The logs of the test will be uploaded (viewable in the summary page of the workflow). Note: There
may be some discrepancy in the logs collected and the output of interchain test. The containers may run for a some
time after the logs are collected, resulting in the displayed logs to differ slightly.

## Importable Workflow

This repository contains an [importable workflow](https://github.com/cosmos/ibc-go/blob/bc963bcfd115a0e06b8196b114496db5ea011247/.github/workflows/e2e-compatibility-workflow-call.yaml) that can be used from any other repository to test chain upgrades. The workflow
Expand Down
72 changes: 72 additions & 0 deletions e2e/dockerutil/dockerutil.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,72 @@
package dockerutil

import (
"archive/tar"
"context"
"fmt"
"io"
"path"
"testing"

dockertypes "github.com/docker/docker/api/types"
"github.com/docker/docker/api/types/filters"
dockerclient "github.com/docker/docker/client"
)

const testLabel = "ibc-test"

// GetTestContainers returns all docker containers that have been created by interchain test.
func GetTestContainers(t *testing.T, ctx context.Context, dc *dockerclient.Client) ([]dockertypes.Container, error) {
t.Helper()

testContainers, err := dc.ContainerList(ctx, dockertypes.ContainerListOptions{
All: true,
Filters: filters.NewArgs(
// see: https://github.com/strangelove-ventures/interchaintest/blob/0bdc194c2aa11aa32479f32b19e1c50304301981/internal/dockerutil/setup.go#L31-L36
// for the label needed to identify test containers.
filters.Arg("label", testLabel+"="+t.Name()),
),
})

if err != nil {
return nil, fmt.Errorf("failed listing containers: %s", err)
}

return testContainers, nil
}

// GetContainerLogs returns the logs of a container as a byte array.
func GetContainerLogs(ctx context.Context, dc *dockerclient.Client, containerName string) ([]byte, error) {
readCloser, err := dc.ContainerLogs(ctx, containerName, dockertypes.ContainerLogsOptions{
ShowStdout: true,
ShowStderr: true,
})
if err != nil {
return nil, fmt.Errorf("failed reading logs in test cleanup: %s", err)
}
return io.ReadAll(readCloser)
}

// GetFileContentsFromContainer reads the contents of a specific file from a container.
func GetFileContentsFromContainer(ctx context.Context, dc *dockerclient.Client, containerID, absolutePath string) ([]byte, error) {
readCloser, _, err := dc.CopyFromContainer(ctx, containerID, absolutePath)
if err != nil {
return nil, fmt.Errorf("copying from container: %w", err)
}

defer readCloser.Close()

fileName := path.Base(absolutePath)
tr := tar.NewReader(readCloser)

hdr, err := tr.Next()
if err != nil {
return nil, err
}
Comment on lines +62 to +65
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

this function is assuming as single file exists with the name provided, if this is no longer a valid assumption to make we can make this function more flexible.


if hdr.Name != fileName {
return nil, fmt.Errorf("expected to find %s but found %s", fileName, hdr.Name)
}

return io.ReadAll(tr)
}
157 changes: 157 additions & 0 deletions e2e/testsuite/diagnostics/diagnostics.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,157 @@
package diagnostics

import (
"context"
"fmt"
"os"
ospath "path"
"strings"
"testing"

dockertypes "github.com/docker/docker/api/types"
dockerclient "github.com/docker/docker/client"
"github.com/strangelove-ventures/interchaintest/v7/ibc"

"github.com/cosmos/ibc-go/e2e/dockerutil"
"github.com/cosmos/ibc-go/e2e/testconfig"
)

const (
e2eDir = "e2e"
)

// Collect can be used in `t.Cleanup` and will copy all the of the container logs and relevant files
// into e2e/<test-suite>/<test-name>.log. These log files will be uploaded to GH upon test failure.
func Collect(t *testing.T, dc *dockerclient.Client, cfg testconfig.ChainOptions) {
t.Helper()

if !t.Failed() {
t.Logf("test passed, not uploading logs")
return
}

t.Logf("writing logs for test: %s", t.Name())

ctx := context.TODO()
e2eDir, err := getE2EDir(t)
if err != nil {
t.Logf("failed finding log directory: %s", err)
return
}

logsDir := fmt.Sprintf("%s/diagnostics", e2eDir)

if err := os.MkdirAll(fmt.Sprintf("%s/%s", logsDir, t.Name()), 0750); err != nil {
t.Logf("failed creating logs directory in test cleanup: %s", err)
return
}

testContainers, err := dockerutil.GetTestContainers(t, ctx, dc)
if err != nil {
t.Logf("failed listing containers test cleanup: %s", err)
return
}

for _, container := range testContainers {
containerName := getContainerName(t, container)
containerDir := fmt.Sprintf("%s/%s/%s", logsDir, t.Name(), containerName)
if err := os.MkdirAll(containerDir, 0750); err != nil {
t.Logf("failed creating logs directory for container %s: %s", containerDir, err)
continue
}

logsBz, err := dockerutil.GetContainerLogs(ctx, dc, container.ID)
if err != nil {
t.Logf("failed reading logs in test cleanup: %s", err)
continue
}

logFile := fmt.Sprintf("%s/%s.log", containerDir, containerName)
if err := os.WriteFile(logFile, logsBz, 0750); err != nil {
t.Logf("failed writing log file for container %s in test cleanup: %s", containerName, err)
continue
}

t.Logf("successfully wrote log file %s", logFile)
diagnosticFiles := chainDiagnosticAbsoluteFilePaths(*cfg.ChainAConfig)
diagnosticFiles = append(diagnosticFiles, chainDiagnosticAbsoluteFilePaths(*cfg.ChainBConfig)...)
Comment on lines +76 to +77
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

note: this will mean that we are basically trying to get the chain-a files from chain-b and chain-b files from chain-a. They will just be skipped. I chose to make it simpler and just try them all for every container instead of finding only the correct files on a per container basis which would make things a little more complex. I think this doesn't really matter, as this is simply a tear-down function anyway.


for _, absoluteFilePathInContainer := range diagnosticFiles {
localFilePath := ospath.Join(containerDir, ospath.Base(absoluteFilePathInContainer))
if err := fetchAndWriteDiagnosticsFile(ctx, dc, container.ID, localFilePath, absoluteFilePathInContainer); err != nil {
t.Logf("failed to fetch and write file %s for container %s in test cleanup: %s", absoluteFilePathInContainer, containerName, err)
continue
}
t.Logf("successfully wrote diagnostics file %s", absoluteFilePathInContainer)
}
}
}

// getContainerName returns a either the ID of the container or stripped down human-readable
// version of the name if the name is non-empty.
//
// Note: You should still always use the ID when interacting with the docker client.
func getContainerName(t *testing.T, container dockertypes.Container) string {
t.Helper()
// container will always have an id, by may not have a name.
containerName := container.ID
if len(container.Names) > 0 {
containerName = container.Names[0]
// remove the test name from the container as the folder structure will provide this
// information already.
containerName = strings.TrimRight(containerName, "-"+t.Name())
containerName = strings.TrimLeft(containerName, "/")
}
return containerName
}

// fetchAndWriteDiagnosticsFile fetches the contents of a single file from the given container id and writes
// the contents of the file to a local path provided.
func fetchAndWriteDiagnosticsFile(ctx context.Context, dc *dockerclient.Client, containerID, localPath, absoluteFilePathInContainer string) error {
fileBz, err := dockerutil.GetFileContentsFromContainer(ctx, dc, containerID, absoluteFilePathInContainer)
if err != nil {
return err
}

filePath := ospath.Join(localPath)
if err := os.WriteFile(filePath, fileBz, 0750); err != nil {
return err
}

return nil
}

// chainDiagnosticAbsoluteFilePaths returns a slice of absolute file paths (in the containers) which are the files that should be
// copied locally when fetching diagnostics.
func chainDiagnosticAbsoluteFilePaths(cfg ibc.ChainConfig) []string {
return []string{
fmt.Sprintf("/var/cosmos-chain/%s/config/genesis.json", cfg.Name),
fmt.Sprintf("/var/cosmos-chain/%s/config/app.toml", cfg.Name),
fmt.Sprintf("/var/cosmos-chain/%s/config/config.toml", cfg.Name),
fmt.Sprintf("/var/cosmos-chain/%s/config/client.toml", cfg.Name),
}
}

// getE2EDir finds the e2e directory above the test.
func getE2EDir(t *testing.T) (string, error) {
t.Helper()

wd, err := os.Getwd()
if err != nil {
return "", err
}

const maxAttempts = 100
count := 0
for ; !strings.HasSuffix(wd, e2eDir) || count > maxAttempts; wd = ospath.Dir(wd) {
count++
}
Comment on lines +146 to +148
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I'm a bit confused by this. Is it possible to run these e2e's when your wd isn't the e2e directory?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

this is not strictly necessary, I just wanted to make sure that the test errors out if for some reason this case is ever true instead of hanging forever. It's probably safe to remove this entirely but I just went the extra careful route


// arbitrary value to avoid getting stuck in an infinite loop if this is called
// in a context where the e2e directory does not exist.
if count > maxAttempts {
return "", fmt.Errorf("unable to find e2e directory after %d tries", maxAttempts)
}

return wd, nil
}
16 changes: 13 additions & 3 deletions e2e/testsuite/testsuite.go
Original file line number Diff line number Diff line change
Expand Up @@ -35,6 +35,7 @@ import (
"github.com/cosmos/ibc-go/e2e/relayer"
"github.com/cosmos/ibc-go/e2e/semverutil"
"github.com/cosmos/ibc-go/e2e/testconfig"
"github.com/cosmos/ibc-go/e2e/testsuite/diagnostics"
"github.com/cosmos/ibc-go/e2e/testvalues"
controllertypes "github.com/cosmos/ibc-go/v7/modules/apps/27-interchain-accounts/controller/types"
feetypes "github.com/cosmos/ibc-go/v7/modules/apps/29-fee/types"
Expand Down Expand Up @@ -442,17 +443,26 @@ func (s *E2ETestSuite) AssertPacketRelayed(ctx context.Context, chain *cosmos.Co
// test and can be retrieved with GetChains.
func (s *E2ETestSuite) createCosmosChains(chainOptions testconfig.ChainOptions) (*cosmos.CosmosChain, *cosmos.CosmosChain) {
client, network := interchaintest.DockerSetup(s.T())
t := s.T()

s.logger = zap.NewExample()
s.DockerClient = client
s.network = network

logger := zaptest.NewLogger(s.T())
logger := zaptest.NewLogger(t)

numValidators, numFullNodes := getValidatorsAndFullNodes()

chainA := cosmos.NewCosmosChain(s.T().Name(), *chainOptions.ChainAConfig, numValidators, numFullNodes, logger)
chainB := cosmos.NewCosmosChain(s.T().Name(), *chainOptions.ChainBConfig, numValidators, numFullNodes, logger)
chainA := cosmos.NewCosmosChain(t.Name(), *chainOptions.ChainAConfig, numValidators, numFullNodes, logger)
chainB := cosmos.NewCosmosChain(t.Name(), *chainOptions.ChainBConfig, numValidators, numFullNodes, logger)

// this is intentionally called after the interchaintest.DockerSetup function. The above function registers a
// cleanup task which deletes all containers. By registering a cleanup function afterwards, it is executed first
// this allows us to process the logs before the containers are removed.
colin-axner marked this conversation as resolved.
Show resolved Hide resolved
t.Cleanup(func() {
diagnostics.Collect(t, s.DockerClient, chainOptions)
})
colin-axner marked this conversation as resolved.
Show resolved Hide resolved

return chainA, chainB
}

Expand Down