Merge #30146

30146: roachtest: use separate log dirs for separate runs r=andreimatei a=andreimatei Before this patch, roachtest run --count <n> would overwrite the logs dirs n times (in fact I think it was technically worse - there's one dir per node and inside there non-unique file would be overwritten and otherwise stale files would be left from the run the produced it). This patch moves to unique dirs per run, and otherwise rationalizes the code around the log and artifact locations. Each top level tests gets different dirs for the different runs. If `--count` is not specified, nothing changes. After this patch, when running: roachtest run clock acceptance/event-log acceptance/status-server --count=2 you get the following tree structure. artifacts ├── acceptance │ ├── run_1 │ │ ├── event-log │ │ ├── status-server │ ├── run_2 │ │ ├── event-log │ │ ├── status-server └── clock ├── run_1 │ ├── jump │ │ ├── large_backward_disabled │ │ ├── large_backward_enabled └── run_2 │ ├── large_backward_disabled ├── jump │ ├── large_backward_disabled │ ├── large_forward_enabled When --count is not specified, nothing changes. Release note: None Co-authored-by: Andrei Matei <[email protected]>
cockroachdb · Oct 8, 2018 · de935e3 · de935e3
2 parents 22b0534 + 8fdb36e
commit de935e3
Show file tree

Hide file tree

Showing 4 changed files with 124 additions and 45 deletions.
diff --git a/pkg/cmd/roachtest/cluster.go b/pkg/cmd/roachtest/cluster.go
@@ -50,7 +50,9 @@ import (
 )
 
 var (
-	local                 bool
+	local bool
+	// Path to a local dir where the test logs and artifacts collected from
+	// cluster will be placed.
 	artifacts             string
 	cockroach             string
 	encrypt               bool
@@ -336,6 +338,9 @@ type testI interface {
 	Fatal(args ...interface{})
 	Fatalf(format string, args ...interface{})
 	Failed() bool
+	// Path to a directory where the test is supposed to store its log and other
+	// artifacts.
+	ArtifactsDir() string
 }
 
 // TODO(tschottdorf): Consider using a more idiomatic approach in which options
@@ -551,7 +556,8 @@ func newCluster(ctx context.Context, t testI, nodes []nodeSpec) *cluster {
 		t.Fatalf("TODO(peter): unsupported nodes spec: %v", nodes)
 	}
 
-	l, err := rootLogger(t.Name())
+	logPath := filepath.Join(t.ArtifactsDir(), "test.log")
+	l, err := rootLogger(logPath)
 	if err != nil {
 		t.Fatal(err)
 	}
@@ -640,7 +646,8 @@ func newCluster(ctx context.Context, t testI, nodes []nodeSpec) *cluster {
 // clone creates a new cluster object that refers to the same cluster as the
 // receiver, but is associated with the specified test.
 func (c *cluster) clone(t *test) *cluster {
-	l, err := rootLogger(t.Name())
+	logPath := filepath.Join(t.ArtifactsDir(), "test.log")
+	l, err := rootLogger(logPath)
 	if err != nil {
 		t.Fatal(err)
 	}
@@ -676,18 +683,27 @@ func (c *cluster) Node(i int) nodeListOption {
 	return c.Range(i, i)
 }
 
-func (c *cluster) FetchLogs(ctx context.Context) {
+// FetchLogs downloads the logs from the cluster using `roachprod get`.
+// The logs will be placed in the test's artifacts dir.
+func (c *cluster) FetchLogs(ctx context.Context) error {
+	if c.nodes == 0 {
+		// For tests.
+		return nil
+	}
+
+	c.l.Printf("fetching logs\n")
+	c.status("fetching logs")
+
 	// Don't hang forever if we can't fetch the logs.
 	execCtx, cancel := context.WithTimeout(ctx, 2*time.Minute)
 	defer cancel()
 
-	if c.nodes == 0 {
-		// For tests.
-		return
+	path := filepath.Join(c.t.ArtifactsDir(), "logs")
+	if err := os.MkdirAll(filepath.Dir(path), 0755); err != nil {
+		return err
 	}
-	c.status("retrieving logs")
-	_ = execCmd(execCtx, c.l, roachprod, "get", c.name, "logs",
-		filepath.Join(artifacts, teamCityNameEscape(c.t.Name()), "logs"))
+
+	return execCmd(execCtx, c.l, roachprod, "get", c.name, "logs" /* src */, path /* dest */)
 }
 
 func (c *cluster) Destroy(ctx context.Context) {

diff --git a/pkg/cmd/roachtest/cluster_test.go b/pkg/cmd/roachtest/cluster_test.go
@@ -57,10 +57,18 @@ func TestClusterNodes(t *testing.T) {
 	}
 }
 
+type testWrapper struct {
+	*testing.T
+}
+
+func (t testWrapper) ArtifactsDir() string {
+	return ""
+}
+
 func TestClusterMonitor(t *testing.T) {
 	logger := &logger{stdout: os.Stdout, stderr: os.Stderr}
 	t.Run(`success`, func(t *testing.T) {
-		c := &cluster{t: t, l: logger}
+		c := &cluster{t: testWrapper{t}, l: logger}
 		m := newMonitor(context.Background(), c)
 		m.Go(func(context.Context) error { return nil })
 		if err := m.wait(`echo`, `1`); err != nil {
@@ -69,7 +77,7 @@ func TestClusterMonitor(t *testing.T) {
 	})
 
 	t.Run(`dead`, func(t *testing.T) {
-		c := &cluster{t: t, l: logger}
+		c := &cluster{t: testWrapper{t}, l: logger}
 		m := newMonitor(context.Background(), c)
 		m.Go(func(ctx context.Context) error {
 			<-ctx.Done()
@@ -85,7 +93,7 @@ func TestClusterMonitor(t *testing.T) {
 	})
 
 	t.Run(`worker-fail`, func(t *testing.T) {
-		c := &cluster{t: t, l: logger}
+		c := &cluster{t: testWrapper{t}, l: logger}
 		m := newMonitor(context.Background(), c)
 		m.Go(func(context.Context) error {
 			return errors.New(`worker-fail`)
@@ -103,7 +111,7 @@ func TestClusterMonitor(t *testing.T) {
 	})
 
 	t.Run(`wait-fail`, func(t *testing.T) {
-		c := &cluster{t: t, l: logger}
+		c := &cluster{t: testWrapper{t}, l: logger}
 		m := newMonitor(context.Background(), c)
 		m.Go(func(ctx context.Context) error {
 			<-ctx.Done()
@@ -123,7 +131,7 @@ func TestClusterMonitor(t *testing.T) {
 	})
 
 	t.Run(`wait-ok`, func(t *testing.T) {
-		c := &cluster{t: t, l: logger}
+		c := &cluster{t: testWrapper{t}, l: logger}
 		m := newMonitor(context.Background(), c)
 		m.Go(func(ctx context.Context) error {
 			<-ctx.Done()
@@ -145,7 +153,7 @@ func TestClusterMonitor(t *testing.T) {
 	// them finish pretty soon (think stress testing). As a matter of fact, `make test` waits
 	// for these child goroutines to finish (so these tests take seconds).
 	t.Run(`worker-fd-error`, func(t *testing.T) {
-		c := &cluster{t: t, l: logger}
+		c := &cluster{t: testWrapper{t}, l: logger}
 		m := newMonitor(context.Background(), c)
 		m.Go(func(ctx context.Context) error {
 			defer func() {
@@ -172,7 +180,7 @@ hi
 		}
 	})
 	t.Run(`worker-fd-fatal`, func(t *testing.T) {
-		c := &cluster{t: t, l: logger}
+		c := &cluster{t: testWrapper{t}, l: logger}
 		m := newMonitor(context.Background(), c)
 		m.Go(func(ctx context.Context) error {
 			err := execCmd(ctx, logger, "/bin/bash", "-c", "echo foo && sleep 3& wait")

diff --git a/pkg/cmd/roachtest/log.go b/pkg/cmd/roachtest/log.go
@@ -25,6 +25,8 @@ import (
 )
 
 type loggerConfig struct {
+	// prefix, if set, is applied to lines written to stderr/stdout. It is not
+	// applied to lines written to a log file.
 	prefix         string
 	stdout, stderr io.Writer
 }
@@ -61,14 +63,14 @@ var quietStderr quietStderrOption
 // TeamCity build log, if running in CI), while creating a non-interleaved
 // record in the build artifacts.
 type logger struct {
-	name           string
+	path           string
 	file           *os.File
 	stdout, stderr io.Writer
 }
 
 // newLogger constructs a new logger object. Not intended for direct
 // use. Please use logger.ChildLogger instead.
-func (cfg *loggerConfig) newLogger(name, filename string) (*logger, error) {
+func (cfg *loggerConfig) newLogger(path string) (*logger, error) {
 	if artifacts == "" {
 		// Log to stdout/stderr if there is no artifacts directory.
 		return &logger{
@@ -77,12 +79,11 @@ func (cfg *loggerConfig) newLogger(name, filename string) (*logger, error) {
 		}, nil
 	}
 
-	path := filepath.Join(artifacts, teamCityNameEscape(name), filename)
 	if err := os.MkdirAll(filepath.Dir(path), 0755); err != nil {
 		return nil, err
 	}
 
-	f, err := os.Create(path + ".log")
+	f, err := os.Create(path)
 	if err != nil {
 		return nil, err
 	}
@@ -98,22 +99,22 @@ func (cfg *loggerConfig) newLogger(name, filename string) (*logger, error) {
 	}
 
 	return &logger{
-		name:   name,
+		path:   path,
 		file:   f,
 		stdout: newWriter(cfg.stdout),
 		stderr: newWriter(cfg.stderr),
 	}, nil
 }
 
-func rootLogger(name string) (*logger, error) {
+func rootLogger(path string) (*logger, error) {
 	var stdout, stderr io.Writer
 	// Log to stdout/stderr if we're not running tests in parallel.
 	if parallelism == 1 {
 		stdout = os.Stdout
 		stderr = os.Stderr
 	}
 	cfg := &loggerConfig{stdout: stdout, stderr: stderr}
-	return cfg.newLogger(name, "test")
+	return cfg.newLogger(path)
 }
 
 func (l *logger) close() {
@@ -122,29 +123,32 @@ func (l *logger) close() {
 	}
 }
 
-// ChildLogger constructs a new logger which logs to the specified
-// file. Control of the prefix and teeing of stdout/stdout can be controlled by
-// logger options.
+// ChildLogger constructs a new logger which logs to the specified file. The
+// prefix and teeing of stdout/stdout can be controlled by logger options.
+// If the parent logger was logging to a file, the new logger will log to a file
+// in the same dir called <name>.log.
 func (l *logger) ChildLogger(name string, opts ...loggerOption) (*logger, error) {
+	// If the parent logger is not logging to a file, then the child will not
+	// either. However, the child will write to stdout/stderr with a prefix.
 	if l.file == nil {
 		p := []byte(name + ": ")
 		return &logger{
-			name:   name,
+			path:   l.path,
 			stdout: &prefixWriter{out: l.stdout, prefix: p},
 			stderr: &prefixWriter{out: l.stderr, prefix: p},
 		}, nil
 	}
 
 	cfg := &loggerConfig{
-		prefix: name + ": ",
+		prefix: name + ": ", // might be overridden by opts
 		stdout: l.stdout,
 		stderr: l.stderr,
 	}
 	for _, opt := range opts {
 		opt.apply(cfg)
 	}
 
-	return cfg.newLogger(l.name, name)
+	return cfg.newLogger(filepath.Join(filepath.Dir(l.path), name+".log"))
 }
 
 func (l *logger) Printf(f string, args ...interface{}) {