Skip to content

Commit

Permalink
Merge #30146
Browse files Browse the repository at this point in the history
30146: roachtest: use separate log dirs for separate runs r=andreimatei a=andreimatei

Before this patch, roachtest run --count <n> would overwrite the logs
dirs n times (in fact I think it was technically worse - there's one dir
per node and inside there non-unique file would be overwritten and
otherwise stale files would be left from the run the produced it).
This patch moves to unique dirs per run, and otherwise rationalizes the
code around the log and artifact locations.
Each top level tests gets different dirs for the different runs. If
`--count` is not specified, nothing changes.
After this patch, when running:
roachtest run clock acceptance/event-log acceptance/status-server --count=2

you get the following tree structure.

artifacts
├── acceptance
│   ├── run_1
│   │   ├── event-log
│   │   ├── status-server
│   ├── run_2
│   │   ├── event-log
│   │   ├── status-server
└── clock
    ├── run_1
    │   ├── jump
    │   │   ├── large_backward_disabled
    │   │   ├── large_backward_enabled
    └── run_2
        │   ├── large_backward_disabled
        ├── jump
        │   ├── large_backward_disabled
        │   ├── large_forward_enabled

When --count is not specified, nothing changes.

Release note: None

Co-authored-by: Andrei Matei <[email protected]>
  • Loading branch information
craig[bot] and andreimatei committed Oct 8, 2018
2 parents 22b0534 + 8fdb36e commit de935e3
Show file tree
Hide file tree
Showing 4 changed files with 124 additions and 45 deletions.
36 changes: 26 additions & 10 deletions pkg/cmd/roachtest/cluster.go
Original file line number Diff line number Diff line change
Expand Up @@ -50,7 +50,9 @@ import (
)

var (
local bool
local bool
// Path to a local dir where the test logs and artifacts collected from
// cluster will be placed.
artifacts string
cockroach string
encrypt bool
Expand Down Expand Up @@ -336,6 +338,9 @@ type testI interface {
Fatal(args ...interface{})
Fatalf(format string, args ...interface{})
Failed() bool
// Path to a directory where the test is supposed to store its log and other
// artifacts.
ArtifactsDir() string
}

// TODO(tschottdorf): Consider using a more idiomatic approach in which options
Expand Down Expand Up @@ -551,7 +556,8 @@ func newCluster(ctx context.Context, t testI, nodes []nodeSpec) *cluster {
t.Fatalf("TODO(peter): unsupported nodes spec: %v", nodes)
}

l, err := rootLogger(t.Name())
logPath := filepath.Join(t.ArtifactsDir(), "test.log")
l, err := rootLogger(logPath)
if err != nil {
t.Fatal(err)
}
Expand Down Expand Up @@ -640,7 +646,8 @@ func newCluster(ctx context.Context, t testI, nodes []nodeSpec) *cluster {
// clone creates a new cluster object that refers to the same cluster as the
// receiver, but is associated with the specified test.
func (c *cluster) clone(t *test) *cluster {
l, err := rootLogger(t.Name())
logPath := filepath.Join(t.ArtifactsDir(), "test.log")
l, err := rootLogger(logPath)
if err != nil {
t.Fatal(err)
}
Expand Down Expand Up @@ -676,18 +683,27 @@ func (c *cluster) Node(i int) nodeListOption {
return c.Range(i, i)
}

func (c *cluster) FetchLogs(ctx context.Context) {
// FetchLogs downloads the logs from the cluster using `roachprod get`.
// The logs will be placed in the test's artifacts dir.
func (c *cluster) FetchLogs(ctx context.Context) error {
if c.nodes == 0 {
// For tests.
return nil
}

c.l.Printf("fetching logs\n")
c.status("fetching logs")

// Don't hang forever if we can't fetch the logs.
execCtx, cancel := context.WithTimeout(ctx, 2*time.Minute)
defer cancel()

if c.nodes == 0 {
// For tests.
return
path := filepath.Join(c.t.ArtifactsDir(), "logs")
if err := os.MkdirAll(filepath.Dir(path), 0755); err != nil {
return err
}
c.status("retrieving logs")
_ = execCmd(execCtx, c.l, roachprod, "get", c.name, "logs",
filepath.Join(artifacts, teamCityNameEscape(c.t.Name()), "logs"))

return execCmd(execCtx, c.l, roachprod, "get", c.name, "logs" /* src */, path /* dest */)
}

func (c *cluster) Destroy(ctx context.Context) {
Expand Down
22 changes: 15 additions & 7 deletions pkg/cmd/roachtest/cluster_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -57,10 +57,18 @@ func TestClusterNodes(t *testing.T) {
}
}

type testWrapper struct {
*testing.T
}

func (t testWrapper) ArtifactsDir() string {
return ""
}

func TestClusterMonitor(t *testing.T) {
logger := &logger{stdout: os.Stdout, stderr: os.Stderr}
t.Run(`success`, func(t *testing.T) {
c := &cluster{t: t, l: logger}
c := &cluster{t: testWrapper{t}, l: logger}
m := newMonitor(context.Background(), c)
m.Go(func(context.Context) error { return nil })
if err := m.wait(`echo`, `1`); err != nil {
Expand All @@ -69,7 +77,7 @@ func TestClusterMonitor(t *testing.T) {
})

t.Run(`dead`, func(t *testing.T) {
c := &cluster{t: t, l: logger}
c := &cluster{t: testWrapper{t}, l: logger}
m := newMonitor(context.Background(), c)
m.Go(func(ctx context.Context) error {
<-ctx.Done()
Expand All @@ -85,7 +93,7 @@ func TestClusterMonitor(t *testing.T) {
})

t.Run(`worker-fail`, func(t *testing.T) {
c := &cluster{t: t, l: logger}
c := &cluster{t: testWrapper{t}, l: logger}
m := newMonitor(context.Background(), c)
m.Go(func(context.Context) error {
return errors.New(`worker-fail`)
Expand All @@ -103,7 +111,7 @@ func TestClusterMonitor(t *testing.T) {
})

t.Run(`wait-fail`, func(t *testing.T) {
c := &cluster{t: t, l: logger}
c := &cluster{t: testWrapper{t}, l: logger}
m := newMonitor(context.Background(), c)
m.Go(func(ctx context.Context) error {
<-ctx.Done()
Expand All @@ -123,7 +131,7 @@ func TestClusterMonitor(t *testing.T) {
})

t.Run(`wait-ok`, func(t *testing.T) {
c := &cluster{t: t, l: logger}
c := &cluster{t: testWrapper{t}, l: logger}
m := newMonitor(context.Background(), c)
m.Go(func(ctx context.Context) error {
<-ctx.Done()
Expand All @@ -145,7 +153,7 @@ func TestClusterMonitor(t *testing.T) {
// them finish pretty soon (think stress testing). As a matter of fact, `make test` waits
// for these child goroutines to finish (so these tests take seconds).
t.Run(`worker-fd-error`, func(t *testing.T) {
c := &cluster{t: t, l: logger}
c := &cluster{t: testWrapper{t}, l: logger}
m := newMonitor(context.Background(), c)
m.Go(func(ctx context.Context) error {
defer func() {
Expand All @@ -172,7 +180,7 @@ hi
}
})
t.Run(`worker-fd-fatal`, func(t *testing.T) {
c := &cluster{t: t, l: logger}
c := &cluster{t: testWrapper{t}, l: logger}
m := newMonitor(context.Background(), c)
m.Go(func(ctx context.Context) error {
err := execCmd(ctx, logger, "/bin/bash", "-c", "echo foo && sleep 3& wait")
Expand Down
30 changes: 17 additions & 13 deletions pkg/cmd/roachtest/log.go
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,8 @@ import (
)

type loggerConfig struct {
// prefix, if set, is applied to lines written to stderr/stdout. It is not
// applied to lines written to a log file.
prefix string
stdout, stderr io.Writer
}
Expand Down Expand Up @@ -61,14 +63,14 @@ var quietStderr quietStderrOption
// TeamCity build log, if running in CI), while creating a non-interleaved
// record in the build artifacts.
type logger struct {
name string
path string
file *os.File
stdout, stderr io.Writer
}

// newLogger constructs a new logger object. Not intended for direct
// use. Please use logger.ChildLogger instead.
func (cfg *loggerConfig) newLogger(name, filename string) (*logger, error) {
func (cfg *loggerConfig) newLogger(path string) (*logger, error) {
if artifacts == "" {
// Log to stdout/stderr if there is no artifacts directory.
return &logger{
Expand All @@ -77,12 +79,11 @@ func (cfg *loggerConfig) newLogger(name, filename string) (*logger, error) {
}, nil
}

path := filepath.Join(artifacts, teamCityNameEscape(name), filename)
if err := os.MkdirAll(filepath.Dir(path), 0755); err != nil {
return nil, err
}

f, err := os.Create(path + ".log")
f, err := os.Create(path)
if err != nil {
return nil, err
}
Expand All @@ -98,22 +99,22 @@ func (cfg *loggerConfig) newLogger(name, filename string) (*logger, error) {
}

return &logger{
name: name,
path: path,
file: f,
stdout: newWriter(cfg.stdout),
stderr: newWriter(cfg.stderr),
}, nil
}

func rootLogger(name string) (*logger, error) {
func rootLogger(path string) (*logger, error) {
var stdout, stderr io.Writer
// Log to stdout/stderr if we're not running tests in parallel.
if parallelism == 1 {
stdout = os.Stdout
stderr = os.Stderr
}
cfg := &loggerConfig{stdout: stdout, stderr: stderr}
return cfg.newLogger(name, "test")
return cfg.newLogger(path)
}

func (l *logger) close() {
Expand All @@ -122,29 +123,32 @@ func (l *logger) close() {
}
}

// ChildLogger constructs a new logger which logs to the specified
// file. Control of the prefix and teeing of stdout/stdout can be controlled by
// logger options.
// ChildLogger constructs a new logger which logs to the specified file. The
// prefix and teeing of stdout/stdout can be controlled by logger options.
// If the parent logger was logging to a file, the new logger will log to a file
// in the same dir called <name>.log.
func (l *logger) ChildLogger(name string, opts ...loggerOption) (*logger, error) {
// If the parent logger is not logging to a file, then the child will not
// either. However, the child will write to stdout/stderr with a prefix.
if l.file == nil {
p := []byte(name + ": ")
return &logger{
name: name,
path: l.path,
stdout: &prefixWriter{out: l.stdout, prefix: p},
stderr: &prefixWriter{out: l.stderr, prefix: p},
}, nil
}

cfg := &loggerConfig{
prefix: name + ": ",
prefix: name + ": ", // might be overridden by opts
stdout: l.stdout,
stderr: l.stderr,
}
for _, opt := range opts {
opt.apply(cfg)
}

return cfg.newLogger(l.name, name)
return cfg.newLogger(filepath.Join(filepath.Dir(l.path), name+".log"))
}

func (l *logger) Printf(f string, args ...interface{}) {
Expand Down
Loading

0 comments on commit de935e3

Please sign in to comment.