-
Notifications
You must be signed in to change notification settings - Fork 33
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
periodic GC for badger datastore #72
Changes from 2 commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -27,6 +27,8 @@ type Datastore struct { | |
closing chan struct{} | ||
|
||
gcDiscardRatio float64 | ||
maxGcDuration time.Duration | ||
gcInterval time.Duration | ||
} | ||
|
||
// Implements the datastore.Txn interface, enabling transaction support for | ||
|
@@ -42,7 +44,12 @@ type txn struct { | |
|
||
// Options are the badger datastore options, reexported here for convenience. | ||
type Options struct { | ||
gcDiscardRatio float64 | ||
// Please refer to the Badger docs to see what this is for | ||
GcDiscardRatio float64 | ||
// Maximum duration to perform a single GC cycle for | ||
MaxGcDuration time.Duration | ||
// Interval between GC cycles | ||
GcInterval time.Duration | ||
|
||
badger.Options | ||
} | ||
|
@@ -52,7 +59,9 @@ var DefaultOptions Options | |
|
||
func init() { | ||
DefaultOptions = Options{ | ||
gcDiscardRatio: 0.1, | ||
GcDiscardRatio: 0.5, | ||
MaxGcDuration: 1 * time.Minute, | ||
GcInterval: 15 * time.Minute, | ||
hsanjuan marked this conversation as resolved.
Show resolved
Hide resolved
|
||
Options: badger.DefaultOptions(""), | ||
} | ||
DefaultOptions.Options.CompactL0OnClose = false | ||
|
@@ -62,6 +71,7 @@ func init() { | |
var _ ds.Datastore = (*Datastore)(nil) | ||
var _ ds.TxnDatastore = (*Datastore)(nil) | ||
var _ ds.TTLDatastore = (*Datastore)(nil) | ||
var _ ds.GCDatastore = (*Datastore)(nil) | ||
|
||
// NewDatastore creates a new badger datastore. | ||
// | ||
|
@@ -70,12 +80,18 @@ func NewDatastore(path string, options *Options) (*Datastore, error) { | |
// Copy the options because we modify them. | ||
var opt badger.Options | ||
var gcDiscardRatio float64 | ||
var maxGcDuration time.Duration | ||
var gcInterval time.Duration | ||
if options == nil { | ||
opt = badger.DefaultOptions("") | ||
gcDiscardRatio = DefaultOptions.gcDiscardRatio | ||
gcDiscardRatio = DefaultOptions.GcDiscardRatio | ||
maxGcDuration = DefaultOptions.MaxGcDuration | ||
gcInterval = DefaultOptions.GcInterval | ||
} else { | ||
opt = options.Options | ||
gcDiscardRatio = options.gcDiscardRatio | ||
gcDiscardRatio = options.GcDiscardRatio | ||
maxGcDuration = options.MaxGcDuration | ||
gcInterval = options.GcInterval | ||
} | ||
|
||
opt.Dir = path | ||
|
@@ -90,11 +106,31 @@ func NewDatastore(path string, options *Options) (*Datastore, error) { | |
return nil, err | ||
} | ||
|
||
return &Datastore{ | ||
ds := &Datastore{ | ||
DB: kv, | ||
closing: make(chan struct{}), | ||
gcDiscardRatio: gcDiscardRatio, | ||
}, nil | ||
maxGcDuration: maxGcDuration, | ||
gcInterval: gcInterval, | ||
} | ||
|
||
// schedule periodic GC till db is closed | ||
go ds.periodicGC() | ||
|
||
return ds, nil | ||
} | ||
|
||
func (d *Datastore) periodicGC() { | ||
for { | ||
select { | ||
case <-time.After(d.gcInterval): | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. isn't it more natural to have a (also depends if the interval should be a fixed interval or actually a countdown since the last GC, in which case the user should not expect GCs happening every 15 mins but after 15 mins the previous GC finished (that need to be more clarified in docs)). There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. @hsanjuan I think waiting for |
||
if err := d.CollectGarbage(); err != nil { | ||
log.Warningf("error during a GC cycle: %s", err) | ||
} | ||
case <-d.closing: | ||
return | ||
} | ||
} | ||
} | ||
|
||
// NewTransaction starts a new transaction. The resulting transaction object | ||
|
@@ -275,17 +311,34 @@ func (d *Datastore) Batch() (ds.Batch, error) { | |
return tx, nil | ||
} | ||
|
||
func (d *Datastore) CollectGarbage() error { | ||
func (d *Datastore) CollectGarbage() (err error) { | ||
d.closeLk.RLock() | ||
defer d.closeLk.RUnlock() | ||
if d.closed { | ||
return ErrClosed | ||
} | ||
|
||
err := d.DB.RunValueLogGC(d.gcDiscardRatio) | ||
gcTimeout := time.NewTimer(d.maxGcDuration) | ||
defer gcTimeout.Stop() | ||
|
||
LOOP: | ||
for { | ||
select { | ||
case <-gcTimeout.C: | ||
break LOOP | ||
default: | ||
if err == nil { | ||
err = d.DB.RunValueLogGC(d.gcDiscardRatio) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Do I understand this correctly as by default running RunValueLogGC() repeteadly for 1 minute non-stop? I'm not sure this works. In order to timeout a GC operation this would need support from badger. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. @hsanjuan From the Badger GC docs :
The idea is to keep calling |
||
} else { | ||
break LOOP | ||
} | ||
} | ||
} | ||
|
||
if err == badger.ErrNoRewrite { | ||
err = nil | ||
} | ||
|
||
return err | ||
} | ||
|
||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
https://github.com/dgraph-io/badger/blob/master/db.go#L1069
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
This will result is GC discarding less when it is run. It is set to a lower ratio so that it at least GCs something.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
(in many cases it seems we don't reach a ratio of deletions up to 0.5 for GC to do anything (but there might have been bugs in Badger too).
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
@hsanjuan I see. Badger recommends a value of 0.5 in the docs with the premise that a lower value will cause more GC's to run. But, I wasn't aware that we don't usually fill up 50 % of a value log file. I'll change it to a lower value.