diff --git a/pkg/cmd/roachtest/tests/cluster_to_cluster.go b/pkg/cmd/roachtest/tests/cluster_to_cluster.go index 469aef8622aa..113b1478a7e7 100644 --- a/pkg/cmd/roachtest/tests/cluster_to_cluster.go +++ b/pkg/cmd/roachtest/tests/cluster_to_cluster.go @@ -930,7 +930,14 @@ func (rd *replicationDriver) main(ctx context.Context) { latencyMonitor := rd.newMonitor(ctx) latencyMonitor.Go(func(ctx context.Context) error { - return lv.pollLatencyUntilJobSucceeds(ctx, rd.setup.dst.db, ingestionJobID, time.Second, workloadDoneCh) + if err := lv.pollLatencyUntilJobSucceeds(ctx, rd.setup.dst.db, ingestionJobID, time.Second, workloadDoneCh); err != nil { + // The latency poller may have failed because latency got too high. Grab a + // debug zip before the replication jobs spin down. + rd.fetchDebugZip(ctx, rd.setup.src.nodes, "latency_source_debug.zip") + rd.fetchDebugZip(ctx, rd.setup.dst.nodes, "latency_dest_debug.zip") + return err + } + return nil }) defer latencyMonitor.Wait() diff --git a/pkg/cmd/roachtest/tests/latency_verifier.go b/pkg/cmd/roachtest/tests/latency_verifier.go index 0a1758c410ef..000ae6920648 100644 --- a/pkg/cmd/roachtest/tests/latency_verifier.go +++ b/pkg/cmd/roachtest/tests/latency_verifier.go @@ -181,6 +181,10 @@ func (lv *latencyVerifier) pollLatencyUntilJobSucceeds( lv.logger.Printf("unexpected status: %s, error: %s", status, info.GetError()) return errors.Errorf("unexpected status: %s", status) } + if lv.targetSteadyLatency != 0 && lv.maxSeenSteadyLatency > lv.targetSteadyLatency { + return errors.Errorf("max latency was more than allowed: %s vs %s", + lv.maxSeenSteadyLatency, lv.targetSteadyLatency) + } } }