forked from hashicorp/nomad
-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Snapshot restore progress (hashicorp#490)
When restoring a snapshot (on startup, installed from the leader, or during recovery) the logs are extremely terse. There are typically bookend messages indicating that a restore is going to happen, and that it is complete, but there's a big dead space in the middle. For small snapshots this is probably fine, but for larger multi-GB snapshots this can stretch out and can be unnerving as an operator to know if it's stuck or still making progress. This PR adjusts the logging to indicate a simple progress log message every 10s about overall completion in bytes-consumed.
- Loading branch information
Showing
6 changed files
with
320 additions
and
35 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,130 @@ | ||
package raft | ||
|
||
import ( | ||
"context" | ||
"io" | ||
"sync" | ||
"time" | ||
|
||
hclog "github.com/hashicorp/go-hclog" | ||
) | ||
|
||
const ( | ||
snapshotRestoreMonitorInterval = 10 * time.Second | ||
) | ||
|
||
type snapshotRestoreMonitor struct { | ||
logger hclog.Logger | ||
cr CountingReader | ||
size int64 | ||
networkTransfer bool | ||
|
||
once sync.Once | ||
cancel func() | ||
doneCh chan struct{} | ||
} | ||
|
||
func startSnapshotRestoreMonitor( | ||
logger hclog.Logger, | ||
cr CountingReader, | ||
size int64, | ||
networkTransfer bool, | ||
) *snapshotRestoreMonitor { | ||
ctx, cancel := context.WithCancel(context.Background()) | ||
|
||
m := &snapshotRestoreMonitor{ | ||
logger: logger, | ||
cr: cr, | ||
size: size, | ||
networkTransfer: networkTransfer, | ||
cancel: cancel, | ||
doneCh: make(chan struct{}), | ||
} | ||
go m.run(ctx) | ||
return m | ||
} | ||
|
||
func (m *snapshotRestoreMonitor) run(ctx context.Context) { | ||
defer close(m.doneCh) | ||
|
||
ticker := time.NewTicker(snapshotRestoreMonitorInterval) | ||
defer ticker.Stop() | ||
|
||
ranOnce := false | ||
for { | ||
select { | ||
case <-ctx.Done(): | ||
if !ranOnce { | ||
m.runOnce() | ||
} | ||
return | ||
case <-ticker.C: | ||
m.runOnce() | ||
ranOnce = true | ||
} | ||
} | ||
} | ||
|
||
func (m *snapshotRestoreMonitor) runOnce() { | ||
readBytes := m.cr.Count() | ||
pct := float64(100*readBytes) / float64(m.size) | ||
|
||
message := "snapshot restore progress" | ||
if m.networkTransfer { | ||
message = "snapshot network transfer progress" | ||
} | ||
|
||
m.logger.Info(message, | ||
"read-bytes", readBytes, | ||
"percent-complete", hclog.Fmt("%0.2f%%", pct), | ||
) | ||
} | ||
|
||
func (m *snapshotRestoreMonitor) StopAndWait() { | ||
m.once.Do(func() { | ||
m.cancel() | ||
<-m.doneCh | ||
}) | ||
} | ||
|
||
type CountingReader interface { | ||
io.Reader | ||
Count() int64 | ||
} | ||
|
||
type countingReader struct { | ||
reader io.Reader | ||
|
||
mu sync.Mutex | ||
bytes int64 | ||
} | ||
|
||
func (r *countingReader) Read(p []byte) (n int, err error) { | ||
n, err = r.reader.Read(p) | ||
r.mu.Lock() | ||
r.bytes += int64(n) | ||
r.mu.Unlock() | ||
return n, err | ||
} | ||
|
||
func (r *countingReader) Count() int64 { | ||
r.mu.Lock() | ||
defer r.mu.Unlock() | ||
return r.bytes | ||
} | ||
|
||
func newCountingReader(r io.Reader) *countingReader { | ||
return &countingReader{reader: r} | ||
} | ||
|
||
type countingReadCloser struct { | ||
*countingReader | ||
io.Closer | ||
} | ||
|
||
func newCountingReadCloser(rc io.ReadCloser) *countingReadCloser { | ||
return &countingReadCloser{ | ||
countingReader: newCountingReader(rc), | ||
Closer: rc, | ||
} | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Oops, something went wrong.