From e4ca4518066cfbdcd605638b788f52f9edc20df2 Mon Sep 17 00:00:00 2001 From: Phil Bracikowski <13472206+philjb@users.noreply.github.com> Date: Tue, 6 Feb 2024 18:34:26 -0500 Subject: [PATCH 1/3] fix(tsi1/partition/test): fix data races in test code (#57) * fix(tsi1/partition/test): fix data races in test code This PR is like influxdata/influxdb#24613 but solves it with a setter method for MaxLogFileSize which allows unexporting that value and MaxLogFileAge. There are actually two places locks were needed in test code. The behavior of production code is unchanged. (cherry picked from commit f0235c4daf4b97769db932f7346c1d3aecf57f8f) --- tsdb/index/tsi1/index.go | 4 +- tsdb/index/tsi1/partition.go | 139 ++++++++++++++++-------------- tsdb/index/tsi1/partition_test.go | 6 +- 3 files changed, 80 insertions(+), 69 deletions(-) diff --git a/tsdb/index/tsi1/index.go b/tsdb/index/tsi1/index.go index cff2f4f29fb..44139412026 100644 --- a/tsdb/index/tsi1/index.go +++ b/tsdb/index/tsi1/index.go @@ -269,8 +269,8 @@ func (i *Index) Open() (rErr error) { i.partitions = make([]*Partition, i.PartitionN) for j := 0; j < len(i.partitions); j++ { p := NewPartition(i.sfile, filepath.Join(i.path, fmt.Sprint(j))) - p.MaxLogFileSize = i.maxLogFileSize - p.MaxLogFileAge = i.maxLogFileAge + p.maxLogFileSize = i.maxLogFileSize + p.maxLogFileAge = i.maxLogFileAge p.nosync = i.disableFsync p.logbufferSize = i.logfileBufferSize p.logger = i.logger.With(zap.String("tsi1_partition", fmt.Sprint(j+1))) diff --git a/tsdb/index/tsi1/partition.go b/tsdb/index/tsi1/partition.go index d9a7b3ed705..cb445bf66e5 100644 --- a/tsdb/index/tsi1/partition.go +++ b/tsdb/index/tsi1/partition.go @@ -39,8 +39,7 @@ const ManifestFileName = "MANIFEST" // Partition represents a collection of layered index files and WAL. type Partition struct { - // exported for tests - Mu sync.RWMutex + mu sync.RWMutex opened bool sfile *tsdb.SeriesFile // series lookup file @@ -70,8 +69,9 @@ type Partition struct { id string // id portion of path. // Log file compaction thresholds. - MaxLogFileSize int64 - MaxLogFileAge time.Duration + // Should be read/changed under the lock after a partition is opened. + maxLogFileSize int64 + maxLogFileAge time.Duration nosync bool // when true, flushing and syncing of LogFile will be disabled. logbufferSize int // the LogFile's buffer is set to this value. @@ -98,8 +98,7 @@ func NewPartition(sfile *tsdb.SeriesFile, path string) *Partition { sfile: sfile, seriesIDSet: tsdb.NewSeriesIDSet(), fileSet: &FileSet{}, - MaxLogFileSize: tsdb.DefaultMaxIndexLogFileSize, - MaxLogFileAge: tsdb.DefaultCompactFullWriteColdDuration, + maxLogFileSize: tsdb.DefaultMaxIndexLogFileSize, // compactionEnabled: true, compactionInterrupt: make(chan struct{}), @@ -111,6 +110,17 @@ func NewPartition(sfile *tsdb.SeriesFile, path string) *Partition { return p } +// SetMaxLogFileSize provides a setter for the partition setting of maxLogFileSize +// that is otherwise only available at creation time. Returns the previous value. +// Only for tests! +func (p *Partition) SetMaxLogFileSize(new int64) (old int64) { + p.mu.Lock() + old = p.maxLogFileSize + p.maxLogFileSize = new + p.mu.Unlock() + return old +} + // bytes estimates the memory footprint of this Partition, in bytes. func (p *Partition) bytes() int { var b int @@ -135,8 +145,8 @@ func (p *Partition) bytes() int { b += int(unsafe.Sizeof(p.fieldset)) + p.fieldset.Bytes() b += int(unsafe.Sizeof(p.path)) + len(p.path) b += int(unsafe.Sizeof(p.id)) + len(p.id) - b += int(unsafe.Sizeof(p.MaxLogFileSize)) - b += int(unsafe.Sizeof(p.MaxLogFileAge)) + b += int(unsafe.Sizeof(p.maxLogFileSize)) + b += int(unsafe.Sizeof(p.maxLogFileAge)) b += int(unsafe.Sizeof(p.compactionInterrupt)) b += int(unsafe.Sizeof(p.compactionsDisabled)) b += int(unsafe.Sizeof(p.logger)) @@ -151,8 +161,8 @@ var ErrIncompatibleVersion = errors.New("incompatible tsi1 index MANIFEST") // Open opens the partition. func (p *Partition) Open() (rErr error) { - p.Mu.Lock() - defer p.Mu.Unlock() + p.mu.Lock() + defer p.mu.Unlock() p.closing = make(chan struct{}) @@ -214,7 +224,7 @@ func (p *Partition) Open() (rErr error) { // Make first log file active, if within threshold. sz, _ := f.Stat() - if p.activeLogFile == nil && sz < p.MaxLogFileSize { + if p.activeLogFile == nil && sz < p.maxLogFileSize { p.activeLogFile = f } @@ -340,8 +350,8 @@ func (p *Partition) buildSeriesSet() error { // CurrentCompactionN returns the number of compactions currently running. func (p *Partition) CurrentCompactionN() int { - p.Mu.RLock() - defer p.Mu.RUnlock() + p.mu.RLock() + defer p.mu.RUnlock() return p.currentCompactionN } @@ -368,12 +378,15 @@ func (p *Partition) Close() error { p.Wait() // Lock index and close remaining - p.Mu.Lock() - defer p.Mu.Unlock() + p.mu.Lock() + defer p.mu.Unlock() - var err error + if p.fileSet == nil { + return nil + } // Close log files. + var err error for _, f := range p.fileSet.files { if localErr := f.Close(); localErr != nil { err = localErr @@ -403,8 +416,8 @@ func (p *Partition) SeriesFile() *tsdb.SeriesFile { return p.sfile } // NextSequence returns the next file identifier. func (p *Partition) NextSequence() int { - p.Mu.Lock() - defer p.Mu.Unlock() + p.mu.Lock() + defer p.mu.Unlock() return p.nextSequence() } @@ -446,8 +459,8 @@ func (p *Partition) manifest(newFileSet *FileSet) *Manifest { // SetManifestPathForTest is only to force a bad path in testing func (p *Partition) SetManifestPathForTest(path string) { - p.Mu.Lock() - defer p.Mu.Unlock() + p.mu.Lock() + defer p.mu.Unlock() p.manifestPathFn = func() string { return path } } @@ -458,16 +471,16 @@ func (p *Partition) WithLogger(logger *zap.Logger) { // SetFieldSet sets a shared field set from the engine. func (p *Partition) SetFieldSet(fs *tsdb.MeasurementFieldSet) { - p.Mu.Lock() + p.mu.Lock() p.fieldset = fs - p.Mu.Unlock() + p.mu.Unlock() } // FieldSet returns the fieldset. func (p *Partition) FieldSet() *tsdb.MeasurementFieldSet { - p.Mu.Lock() + p.mu.Lock() fs := p.fieldset - p.Mu.Unlock() + p.mu.Unlock() return fs } @@ -477,8 +490,8 @@ func (p *Partition) RetainFileSet() (*FileSet, error) { case <-p.closing: return nil, tsdb.ErrIndexClosing default: - p.Mu.RLock() - defer p.Mu.RUnlock() + p.mu.RLock() + defer p.mu.RUnlock() return p.retainFileSet(), nil } } @@ -491,8 +504,8 @@ func (p *Partition) retainFileSet() *FileSet { // FileN returns the active files in the file set. func (p *Partition) FileN() int { - p.Mu.RLock() - defer p.Mu.RUnlock() + p.mu.RLock() + defer p.mu.RUnlock() return len(p.fileSet.files) } @@ -672,12 +685,12 @@ func (p *Partition) DropMeasurement(name []byte) error { // Mark measurement as deleted. entries = append(entries, LogEntry{Flag: LogEntryMeasurementTombstoneFlag, Name: name}) - p.Mu.RLock() + p.mu.RLock() if err := p.activeLogFile.Writes(entries); err != nil { - p.Mu.RUnlock() + p.mu.RUnlock() return err } - p.Mu.RUnlock() + p.mu.RUnlock() // Check if the log file needs to be swapped. if err := p.CheckLogFile(); err != nil { @@ -705,14 +718,14 @@ func (p *Partition) createSeriesListIfNotExists(names [][]byte, tagsSlice []mode defer fs.Release() // Ensure fileset cannot change during insert. - p.Mu.RLock() + p.mu.RLock() // Insert series into log file. ids, err := p.activeLogFile.AddSeriesList(p.seriesIDSet, names, tagsSlice, tracker) if err != nil { - p.Mu.RUnlock() + p.mu.RUnlock() return nil, err } - p.Mu.RUnlock() + p.mu.RUnlock() if err := p.CheckLogFile(); err != nil { return nil, err @@ -723,8 +736,8 @@ func (p *Partition) createSeriesListIfNotExists(names [][]byte, tagsSlice []mode func (p *Partition) DropSeries(seriesID uint64) error { // Delete series from index. if err := func() error { - p.Mu.RLock() - defer p.Mu.RUnlock() + p.mu.RLock() + defer p.mu.RUnlock() return p.activeLogFile.DeleteSeriesID(seriesID) }(); err != nil { return err @@ -743,8 +756,8 @@ func (p *Partition) DropSeriesList(seriesIDs []uint64) error { // Delete series from index. if err := func() error { - p.Mu.RLock() - defer p.Mu.RUnlock() + p.mu.RLock() + defer p.mu.RUnlock() return p.activeLogFile.DeleteSeriesIDList(seriesIDs) }(); err != nil { return err @@ -911,14 +924,14 @@ func (p *Partition) AssignShard(k string, shardID uint64) {} // Compact requests a compaction of log files. func (p *Partition) Compact() { - p.Mu.Lock() - defer p.Mu.Unlock() + p.mu.Lock() + defer p.mu.Unlock() p.compact() } func (p *Partition) DisableCompactions() { - p.Mu.Lock() - defer p.Mu.Unlock() + p.mu.Lock() + defer p.mu.Unlock() p.compactionsDisabled++ select { @@ -934,8 +947,8 @@ func (p *Partition) DisableCompactions() { } func (p *Partition) EnableCompactions() { - p.Mu.Lock() - defer p.Mu.Unlock() + p.mu.Lock() + defer p.mu.Unlock() // Already enabled? if p.compactionsEnabled() { @@ -953,9 +966,9 @@ func (p *Partition) runPeriodicCompaction() { p.Compact() // Avoid a race when using Reopen in tests - p.Mu.RLock() + p.mu.RLock() closing := p.closing - p.Mu.RUnlock() + p.mu.RUnlock() // check for compactions once an hour (usually not necessary but a nice safety check) t := time.NewTicker(1 * time.Hour) @@ -978,8 +991,8 @@ func (p *Partition) runPeriodicCompaction() { // If checkRunning = true, only count as needing a compaction if there is not a compaction already // in progress for the level that would be compacted func (p *Partition) NeedsCompaction(checkRunning bool) bool { - p.Mu.RLock() - defer p.Mu.RUnlock() + p.mu.RLock() + defer p.mu.RUnlock() if p.needsLogCompaction() { return true } @@ -1032,10 +1045,10 @@ func (p *Partition) compact() { p.currentCompactionN++ go func() { p.compactLogFile(logFile) - p.Mu.Lock() + p.mu.Lock() p.currentCompactionN-- p.levelCompacting[0] = false - p.Mu.Unlock() + p.mu.Unlock() p.Compact() }() } @@ -1075,10 +1088,10 @@ func (p *Partition) compact() { p.compactToLevel(files, level+1, interrupt) // Ensure compaction lock for the level is released. - p.Mu.Lock() + p.mu.Lock() p.levelCompacting[level] = false p.currentCompactionN-- - p.Mu.Unlock() + p.mu.Unlock() // Check for new compactions p.Compact() @@ -1156,8 +1169,8 @@ func (p *Partition) compactToLevel(files []*IndexFile, level int, interrupt <-ch // Obtain lock to swap in index file and write manifest. if err := func() (rErr error) { - p.Mu.Lock() - defer p.Mu.Unlock() + p.mu.Lock() + defer p.mu.Unlock() // Replace previous files with new index file. newFileSet := p.fileSet.MustReplace(IndexFiles(files).Files(), file) @@ -1215,14 +1228,14 @@ func (p *Partition) Rebuild() {} func (p *Partition) needsLogCompaction() bool { size := p.activeLogFile.Size() modTime := p.activeLogFile.ModTime() - return size >= p.MaxLogFileSize || (size > 0 && modTime.Before(time.Now().Add(-p.MaxLogFileAge))) + return size >= p.maxLogFileSize || (size > 0 && modTime.Before(time.Now().Add(-p.maxLogFileAge))) } func (p *Partition) CheckLogFile() error { // Check log file under read lock. needsCompaction := func() bool { - p.Mu.RLock() - defer p.Mu.RUnlock() + p.mu.RLock() + defer p.mu.RUnlock() return p.needsLogCompaction() }() if !needsCompaction { @@ -1230,8 +1243,8 @@ func (p *Partition) CheckLogFile() error { } // If file size exceeded then recheck under write lock and swap files. - p.Mu.Lock() - defer p.Mu.Unlock() + p.mu.Lock() + defer p.mu.Unlock() return p.checkLogFile() } @@ -1261,9 +1274,9 @@ func (p *Partition) compactLogFile(logFile *LogFile) { return } - p.Mu.Lock() + p.mu.Lock() interrupt := p.compactionInterrupt - p.Mu.Unlock() + p.mu.Unlock() start := time.Now() @@ -1313,8 +1326,8 @@ func (p *Partition) compactLogFile(logFile *LogFile) { // Obtain lock to swap in index file and write manifest. if err := func() (rErr error) { - p.Mu.Lock() - defer p.Mu.Unlock() + p.mu.Lock() + defer p.mu.Unlock() // Replace previous log file with index file. newFileSet := p.fileSet.MustReplace([]File{logFile}, file) diff --git a/tsdb/index/tsi1/partition_test.go b/tsdb/index/tsi1/partition_test.go index 07d74be7a86..fefed38768e 100644 --- a/tsdb/index/tsi1/partition_test.go +++ b/tsdb/index/tsi1/partition_test.go @@ -119,7 +119,7 @@ func TestPartition_PrependLogFile_Write_Fail(t *testing.T) { t.Fatalf("error closing partition: %v", err) } }) - p.Partition.MaxLogFileSize = -1 + p.Partition.SetMaxLogFileSize(-1) fileN := p.FileN() p.CheckLogFile() if fileN >= p.FileN() { @@ -149,9 +149,7 @@ func TestPartition_Compact_Write_Fail(t *testing.T) { t.Fatalf("error closing partition: %v", err) } }) - p.Partition.Mu.Lock() - p.Partition.MaxLogFileSize = -1 - p.Partition.Mu.Unlock() + p.Partition.SetMaxLogFileSize(-1) fileN := p.FileN() p.Compact() if (1 + fileN) != p.FileN() { From 05c5af84acdd6ea80617d7cfd17b6f2616b8cecb Mon Sep 17 00:00:00 2001 From: Devan Date: Mon, 16 Sep 2024 17:07:48 -0500 Subject: [PATCH 2/3] feat: modify error handling to be more idiomatic closes https://github.com/influxdata/influxdb/issues/24042 --- tsdb/index/tsi1/partition.go | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/tsdb/index/tsi1/partition.go b/tsdb/index/tsi1/partition.go index cb445bf66e5..df695c6a362 100644 --- a/tsdb/index/tsi1/partition.go +++ b/tsdb/index/tsi1/partition.go @@ -115,8 +115,7 @@ func NewPartition(sfile *tsdb.SeriesFile, path string) *Partition { // Only for tests! func (p *Partition) SetMaxLogFileSize(new int64) (old int64) { p.mu.Lock() - old = p.maxLogFileSize - p.maxLogFileSize = new + old, p.maxLogFileSize = p.maxLogFileSize, new p.mu.Unlock() return old } @@ -389,7 +388,7 @@ func (p *Partition) Close() error { var err error for _, f := range p.fileSet.files { if localErr := f.Close(); localErr != nil { - err = localErr + err = errors.Join(err, localErr) } } p.fileSet.files = nil From 8f8b48857e14b3d0318f08db7f9353fc90cc6a22 Mon Sep 17 00:00:00 2001 From: Devan Date: Mon, 16 Sep 2024 17:38:24 -0500 Subject: [PATCH 3/3] fix: errors.Join() filters nil errors --- tsdb/index/tsi1/partition.go | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) diff --git a/tsdb/index/tsi1/partition.go b/tsdb/index/tsi1/partition.go index df695c6a362..2718609b5bf 100644 --- a/tsdb/index/tsi1/partition.go +++ b/tsdb/index/tsi1/partition.go @@ -385,15 +385,14 @@ func (p *Partition) Close() error { } // Close log files. - var err error + var err []error for _, f := range p.fileSet.files { - if localErr := f.Close(); localErr != nil { - err = errors.Join(err, localErr) - } + localErr := f.Close() + err = append(err, localErr) } p.fileSet.files = nil - return err + return errors.Join(err...) } // closing returns true if the partition is currently closing. It does not require