Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

LFU Cache Implementation #7439

Merged
merged 24 commits into from
Feb 5, 2021
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
24 commits
Select commit Hold shift + click to select a range
187f808
executor: do not lookup in cache twice
vmg Jan 15, 2021
4641065
plan: benchmark plan building for DML vs SELECT statements
vmg Jan 15, 2021
f34f884
cache: reduce API surface
vmg Jan 18, 2021
cbbba9e
cache: abstract into a Cache interface
vmg Jan 18, 2021
76bd931
tools: do not cache E2E tests between runs
vmg Jan 19, 2021
652a94c
cache: configure using total memory usage
vmg Jan 20, 2021
1093c69
cache: do not return `nil` stats
vmg Jan 21, 2021
2e3e0c0
cache: fix flaky memory usage test
vmg Jan 27, 2021
d6a13f1
cache: switch to a new implementation based on Ristretto
vmg Jan 19, 2021
1cade52
endtoend: fix test values
vmg Feb 1, 2021
5f2a612
plan builder: use standard V3 planner
vmg Feb 1, 2021
4c35cfa
cache: make unit test more reliable
vmg Feb 1, 2021
70c8038
cache: speed up clearing large caches
vmg Feb 2, 2021
f681e00
cache: make the cache implementation swappable
vmg Feb 2, 2021
7c0c56f
cache: fix DropUpdates test
vmg Feb 3, 2021
f7ad521
cache: use the legacy LRU cache by default
vmg Feb 3, 2021
9fe7cc0
cache: remove more unused features
vmg Feb 3, 2021
a3feeb8
cache: handle default arguments for both cache types
vmg Feb 4, 2021
d9cd613
Merge branch 'master' into lfu-cache
vmg Feb 4, 2021
15dae44
hack: document empty Goassembly file
vmg Feb 5, 2021
a8b242d
cache: allow configuring both entries & size
vmg Feb 5, 2021
37db725
hack: update license header
vmg Feb 5, 2021
915e019
query_engine: add test for cache pollution
vmg Feb 5, 2021
431da53
cache: review feedback
vmg Feb 5, 2021
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 0 additions & 2 deletions go.mod
Original file line number Diff line number Diff line change
Expand Up @@ -46,15 +46,13 @@ require (
github.com/hashicorp/go-msgpack v0.5.5
github.com/hashicorp/go-sockaddr v1.0.2 // indirect
github.com/hashicorp/go-uuid v1.0.2 // indirect
github.com/hashicorp/golang-lru v0.5.3 // indirect
github.com/hashicorp/serf v0.9.2 // indirect
github.com/howeyc/gopass v0.0.0-20190910152052-7cb4b85ec19c
github.com/icrowley/fake v0.0.0-20180203215853-4178557ae428
github.com/imdario/mergo v0.3.6 // indirect
github.com/klauspost/compress v1.4.1 // indirect
github.com/klauspost/cpuid v1.2.0 // indirect
github.com/klauspost/pgzip v1.2.4
github.com/konsorten/go-windows-terminal-sequences v1.0.2 // indirect
github.com/krishicks/yaml-patch v0.0.10
github.com/magiconair/properties v1.8.1
github.com/martini-contrib/auth v0.0.0-20150219114609-fa62c19b7ae8
Expand Down
27 changes: 27 additions & 0 deletions go.sum

Large diffs are not rendered by default.

80 changes: 80 additions & 0 deletions go/cache/cache.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,80 @@
/*
Copyright 2021 The Vitess Authors.

Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at

http://www.apache.org/licenses/LICENSE-2.0

Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/

package cache

// Cache is a generic interface type for a data structure that keeps recently used
// objects in memory and evicts them when it becomes full.
type Cache interface {
Get(key string) (interface{}, bool)
Set(key string, val interface{}) bool
ForEach(callback func(interface{}) bool)

Delete(key string)
Clear()

// Wait waits for all pending operations on the cache to settle. Since cache writes
// are asynchronous, a write may not be immediately accessible unless the user
// manually calls Wait.
Wait()
vmg marked this conversation as resolved.
Show resolved Hide resolved

Len() int
Evictions() int64
UsedCapacity() int64
MaxCapacity() int64
SetCapacity(int64)
}

type cachedObject interface {
CachedSize(alloc bool) int64
}

// NewDefaultCacheImpl returns the default cache implementation for Vitess. The options in the
// Config struct control the memory and entry limits for the cache, and the underlying cache
// implementation.
func NewDefaultCacheImpl(cfg *Config) Cache {
switch {
case cfg == nil || (cfg.MaxEntries == 0 && cfg.MaxMemoryUsage == 0):
return &nullCache{}

case cfg.LFU:
return NewRistrettoCache(cfg.MaxEntries, cfg.MaxMemoryUsage, func(val interface{}) int64 {
return val.(cachedObject).CachedSize(true)
})

default:
return NewLRUCache(cfg.MaxEntries, func(_ interface{}) int64 {
return 1
})
}
}

// Config is the configuration options for a cache instance
type Config struct {
// MaxEntries is the estimated amount of entries that the cache will hold at capacity
MaxEntries int64
// MaxMemoryUsage is the maximum amount of memory the cache can handle
MaxMemoryUsage int64
// LFU toggles whether to use a new cache implementation with a TinyLFU admission policy
LFU bool
}

// DefaultConfig is the default configuration for a cache instance in Vitess
var DefaultConfig = &Config{
MaxEntries: 5000,
MaxMemoryUsage: 32 * 1024 * 1024,
LFU: false,
}
134 changes: 42 additions & 92 deletions go/cache/lru_cache.go
Original file line number Diff line number Diff line change
Expand Up @@ -25,59 +25,55 @@ package cache

import (
"container/list"
"fmt"
"sync"
"time"
)

var _ Cache = &LRUCache{}

// LRUCache is a typical LRU cache implementation. If the cache
// reaches the capacity, the least recently used item is deleted from
// the cache. Note the capacity is not the number of items, but the
// total sum of the Size() of each item.
// total sum of the CachedSize() of each item.
type LRUCache struct {
mu sync.Mutex

// list & table contain *entry objects.
list *list.List
table map[string]*list.Element
cost func(interface{}) int64

size int64
capacity int64
evictions int64
}

// Value is the interface values that go into LRUCache need to satisfy
type Value interface {
// Size returns how big this value is. If you want to just track
// the cache by number of objects, you may return the size as 1.
Size() int
}

// Item is what is stored in the cache
type Item struct {
Key string
Value Value
Value interface{}
}

type entry struct {
key string
value Value
value interface{}
size int64
timeAccessed time.Time
}

// NewLRUCache creates a new empty cache with the given capacity.
func NewLRUCache(capacity int64) *LRUCache {
func NewLRUCache(capacity int64, cost func(interface{}) int64) *LRUCache {
return &LRUCache{
list: list.New(),
table: make(map[string]*list.Element),
capacity: capacity,
cost: cost,
}
}

// Get returns a value from the cache, and marks the entry as most
// recently used.
func (lru *LRUCache) Get(key string) (v Value, ok bool) {
func (lru *LRUCache) Get(key string) (v interface{}, ok bool) {
lru.mu.Lock()
defer lru.mu.Unlock()

Expand All @@ -89,20 +85,8 @@ func (lru *LRUCache) Get(key string) (v Value, ok bool) {
return element.Value.(*entry).value, true
}

// Peek returns a value from the cache without changing the LRU order.
func (lru *LRUCache) Peek(key string) (v Value, ok bool) {
lru.mu.Lock()
defer lru.mu.Unlock()

element := lru.table[key]
if element == nil {
return nil, false
}
return element.Value.(*entry).value, true
}

// Set sets a value in the cache.
func (lru *LRUCache) Set(key string, value Value) {
func (lru *LRUCache) Set(key string, value interface{}) bool {
lru.mu.Lock()
defer lru.mu.Unlock()

Expand All @@ -111,23 +95,12 @@ func (lru *LRUCache) Set(key string, value Value) {
} else {
lru.addNew(key, value)
}
}

// SetIfAbsent will set the value in the cache if not present. If the
// value exists in the cache, we don't set it.
func (lru *LRUCache) SetIfAbsent(key string, value Value) {
lru.mu.Lock()
defer lru.mu.Unlock()

if element := lru.table[key]; element != nil {
lru.moveToFront(element)
} else {
lru.addNew(key, value)
}
// the LRU cache cannot fail to insert items; it always returns true
return true
}

// Delete removes an entry from the cache, and returns if the entry existed.
func (lru *LRUCache) Delete(key string) bool {
func (lru *LRUCache) delete(key string) bool {
lru.mu.Lock()
defer lru.mu.Unlock()

Expand All @@ -142,6 +115,11 @@ func (lru *LRUCache) Delete(key string) bool {
return true
}

// Delete removes an entry from the cache
func (lru *LRUCache) Delete(key string) {
lru.delete(key)
}

// Clear will clear the entire cache.
func (lru *LRUCache) Clear() {
lru.mu.Lock()
Expand All @@ -152,6 +130,13 @@ func (lru *LRUCache) Clear() {
lru.size = 0
}

// Len returns the size of the cache (in entries)
func (lru *LRUCache) Len() int {
lru.mu.Lock()
defer lru.mu.Unlock()
return lru.list.Len()
}

// SetCapacity will set the capacity of the cache. If the capacity is
// smaller, and the current cache size exceed that capacity, the cache
// will be shrank.
Expand All @@ -163,75 +148,40 @@ func (lru *LRUCache) SetCapacity(capacity int64) {
lru.checkCapacity()
}

// Stats returns a few stats on the cache.
func (lru *LRUCache) Stats() (length, size, capacity, evictions int64, oldest time.Time) {
lru.mu.Lock()
defer lru.mu.Unlock()
if lastElem := lru.list.Back(); lastElem != nil {
oldest = lastElem.Value.(*entry).timeAccessed
}
return int64(lru.list.Len()), lru.size, lru.capacity, lru.evictions, oldest
}

// StatsJSON returns stats as a JSON object in a string.
func (lru *LRUCache) StatsJSON() string {
if lru == nil {
return "{}"
}
l, s, c, e, o := lru.Stats()
return fmt.Sprintf("{\"Length\": %v, \"Size\": %v, \"Capacity\": %v, \"Evictions\": %v, \"OldestAccess\": \"%v\"}", l, s, c, e, o)
}

// Length returns how many elements are in the cache
func (lru *LRUCache) Length() int64 {
lru.mu.Lock()
defer lru.mu.Unlock()
return int64(lru.list.Len())
}
// Wait is a no-op in the LRU cache
func (lru *LRUCache) Wait() {}

// Size returns the sum of the objects' Size() method.
func (lru *LRUCache) Size() int64 {
lru.mu.Lock()
defer lru.mu.Unlock()
// UsedCapacity returns the size of the cache (in bytes)
func (lru *LRUCache) UsedCapacity() int64 {
return lru.size
}

// Capacity returns the cache maximum capacity.
func (lru *LRUCache) Capacity() int64 {
// MaxCapacity returns the cache maximum capacity.
func (lru *LRUCache) MaxCapacity() int64 {
lru.mu.Lock()
defer lru.mu.Unlock()
return lru.capacity
}

// Evictions returns the eviction count.
// Evictions returns the number of evictions
func (lru *LRUCache) Evictions() int64 {
lru.mu.Lock()
defer lru.mu.Unlock()
return lru.evictions
}

// Oldest returns the insertion time of the oldest element in the cache,
// or a IsZero() time if cache is empty.
func (lru *LRUCache) Oldest() (oldest time.Time) {
lru.mu.Lock()
defer lru.mu.Unlock()
if lastElem := lru.list.Back(); lastElem != nil {
oldest = lastElem.Value.(*entry).timeAccessed
}
return
}

// Keys returns all the keys for the cache, ordered from most recently
// ForEach yields all the values for the cache, ordered from most recently
// used to least recently used.
func (lru *LRUCache) Keys() []string {
func (lru *LRUCache) ForEach(callback func(value interface{}) bool) {
lru.mu.Lock()
defer lru.mu.Unlock()

keys := make([]string, 0, lru.list.Len())
for e := lru.list.Front(); e != nil; e = e.Next() {
keys = append(keys, e.Value.(*entry).key)
v := e.Value.(*entry)
if !callback(v.value) {
break
}
}
return keys
}

// Items returns all the values for the cache, ordered from most recently
Expand All @@ -248,8 +198,8 @@ func (lru *LRUCache) Items() []Item {
return items
}

func (lru *LRUCache) updateInplace(element *list.Element, value Value) {
valueSize := int64(value.Size())
func (lru *LRUCache) updateInplace(element *list.Element, value interface{}) {
valueSize := lru.cost(value)
sizeDiff := valueSize - element.Value.(*entry).size
element.Value.(*entry).value = value
element.Value.(*entry).size = valueSize
Expand All @@ -263,8 +213,8 @@ func (lru *LRUCache) moveToFront(element *list.Element) {
element.Value.(*entry).timeAccessed = time.Now()
}

func (lru *LRUCache) addNew(key string, value Value) {
newEntry := &entry{key, value, int64(value.Size()), time.Now()}
func (lru *LRUCache) addNew(key string, value interface{}) {
newEntry := &entry{key, value, lru.cost(value), time.Now()}
element := lru.list.PushFront(newEntry)
lru.table[key] = element
lru.size += newEntry.size
Expand Down
Loading