Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[feat] - Add SizedLRU Cache #3344

Merged
merged 5 commits into from
Sep 30, 2024
Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
64 changes: 64 additions & 0 deletions pkg/cache/decorator.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,64 @@
package cache

// WithMetrics is a decorator that adds metrics collection to any Cache implementation.
type WithMetrics[T any] struct {
wrapped Cache[T]
metrics BaseMetricsCollector
cacheName string
}

// NewCacheWithMetrics creates a new WithMetrics decorator that wraps the provided Cache
// and collects metrics using the provided BaseMetricsCollector.
// The cacheName parameter is used to identify the cache in the collected metrics.
func NewCacheWithMetrics[T any](wrapped Cache[T], metrics BaseMetricsCollector, cacheName string) *WithMetrics[T] {
return &WithMetrics[T]{
wrapped: wrapped,
metrics: metrics,
cacheName: cacheName,
}
}

// Set sets the value for the given key in the cache. It also records a set metric
// for the cache using the provided metrics collector and cache name.
func (c *WithMetrics[T]) Set(key string, val T) {
c.metrics.RecordSet(c.cacheName)
c.wrapped.Set(key, val)
}

// Get retrieves the value for the given key from the underlying cache. It also records
// a hit or miss metric for the cache using the provided metrics collector and cache name.
func (c *WithMetrics[T]) Get(key string) (T, bool) {
val, found := c.wrapped.Get(key)
if found {
c.metrics.RecordHit(c.cacheName)
} else {
c.metrics.RecordMiss(c.cacheName)
}
return val, found
}

// Exists checks if the given key exists in the cache. It records a hit or miss metric
// for the cache using the provided metrics collector and cache name.
func (c *WithMetrics[T]) Exists(key string) bool {
found := c.wrapped.Exists(key)
if found {
c.metrics.RecordHit(c.cacheName)
} else {
c.metrics.RecordMiss(c.cacheName)
}
return found
}

// Delete removes the value for the given key from the cache. It also records a delete metric
// for the cache using the provided metrics collector and cache name.
func (c *WithMetrics[T]) Delete(key string) {
c.wrapped.Delete(key)
c.metrics.RecordDelete(c.cacheName)
}

// Clear removes all entries from the cache. It also records a clear metric
// for the cache using the provided metrics collector and cache name.
func (c *WithMetrics[T]) Clear() {
c.wrapped.Clear()
c.metrics.RecordClear(c.cacheName)
}
107 changes: 107 additions & 0 deletions pkg/cache/metrics.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,107 @@
package cache

import (
"sync"

"github.com/prometheus/client_golang/prometheus"
"github.com/prometheus/client_golang/prometheus/promauto"
)

// BaseMetricsCollector defines the interface for recording cache metrics.
// Each method corresponds to a specific cache-related operation.
type BaseMetricsCollector interface {
RecordHit(cacheName string)
RecordMiss(cacheName string)
RecordSet(cacheName string)
RecordDelete(cacheName string)
RecordClear(cacheName string)
}

// MetricsCollector encapsulates all Prometheus metrics with labels.
// It holds Prometheus counters for cache operations, which help track
// the performance and usage of the cache.
type MetricsCollector struct {
// Base metrics.
hits *prometheus.CounterVec
misses *prometheus.CounterVec
sets *prometheus.CounterVec
deletes *prometheus.CounterVec
clears *prometheus.CounterVec
}

var (
collectorOnce sync.Once // Ensures that the collector is initialized only once.
collector *MetricsCollector
)

// InitializeMetrics initializes the singleton MetricsCollector.
// It sets up Prometheus counters for cache operations (hits, misses, sets, deletes, clears).
// Should be called once at the start of the application.
func InitializeMetrics(namespace, subsystem string) {
collectorOnce.Do(func() {
collector = &MetricsCollector{
Comment on lines +37 to +38
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Consider having this called automatically when the package is imported using init()?

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yea, that makes sense. Good call.

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Also cleans up some of the test code.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Another option is to just declar them as package vars like we do with most others

hits: promauto.NewCounterVec(prometheus.CounterOpts{
Namespace: namespace,
Subsystem: subsystem,
Name: "hits_total",
Help: "Total number of cache hits.",
}, []string{"cache_name"}),

misses: promauto.NewCounterVec(prometheus.CounterOpts{
Namespace: namespace,
Subsystem: subsystem,
Name: "misses_total",
Help: "Total number of cache misses.",
}, []string{"cache_name"}),

sets: promauto.NewCounterVec(prometheus.CounterOpts{
Namespace: namespace,
Subsystem: subsystem,
Name: "sets_total",
Help: "Total number of cache set operations.",
}, []string{"cache_name"}),

deletes: promauto.NewCounterVec(prometheus.CounterOpts{
Namespace: namespace,
Subsystem: subsystem,
Name: "deletes_total",
Help: "Total number of cache delete operations.",
}, []string{"cache_name"}),

clears: promauto.NewCounterVec(prometheus.CounterOpts{
Namespace: namespace,
Subsystem: subsystem,
Name: "clears_total",
Help: "Total number of cache clear operations.",
}, []string{"cache_name"}),
}
})
}

// GetMetricsCollector returns the singleton MetricsCollector instance.
// It panics if InitializeMetrics has not been called to ensure metrics are properly initialized.
// Must be called after InitializeMetrics to avoid runtime issues.
// If you do it before, BAD THINGS WILL HAPPEN.
func GetMetricsCollector() *MetricsCollector {
if collector == nil {
panic("MetricsCollector not initialized. Call InitializeMetrics first.")
}
return collector
}

// Implement BaseMetricsCollector interface methods.

// RecordHit increments the counter for cache hits, tracking how often cache lookups succeed.
func (m *MetricsCollector) RecordHit(cacheName string) { m.hits.WithLabelValues(cacheName).Inc() }

// RecordMiss increments the counter for cache misses, tracking how often cache lookups fail.
func (m *MetricsCollector) RecordMiss(cacheName string) { m.misses.WithLabelValues(cacheName).Inc() }

// RecordSet increments the counter for cache set operations, tracking how often items are added/updated.
func (m *MetricsCollector) RecordSet(cacheName string) { m.sets.WithLabelValues(cacheName).Inc() }

// RecordDelete increments the counter for cache delete operations, tracking how often items are removed.
func (m *MetricsCollector) RecordDelete(cacheName string) { m.deletes.WithLabelValues(cacheName).Inc() }

// RecordClear increments the counter for cache clear operations, tracking how often the cache is completely cleared.
func (m *MetricsCollector) RecordClear(cacheName string) { m.clears.WithLabelValues(cacheName).Inc() }
41 changes: 41 additions & 0 deletions pkg/cache/sizedlru/metrics.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,41 @@
package sizedlru

import (
"github.com/prometheus/client_golang/prometheus"

"github.com/trufflesecurity/trufflehog/v3/pkg/cache"
)

// MetricsCollector should implement the collector interface.
var _ collector = (*MetricsCollector)(nil)

// MetricsCollector extends the BaseMetricsCollector with Sized LRU specific metrics.
// It provides methods to record cache hits, misses, and evictions.
type MetricsCollector struct {
// BaseMetricsCollector is embedded to provide the base metrics functionality.
cache.BaseMetricsCollector

totalEvicts *prometheus.CounterVec
}

// NewSizedLRUMetricsCollector initializes a new MetricsCollector with the provided namespace and subsystem.
func NewSizedLRUMetricsCollector(namespace, subsystem string) *MetricsCollector {
base := cache.GetMetricsCollector()

totalEvicts := prometheus.NewCounterVec(prometheus.CounterOpts{
Namespace: namespace,
Subsystem: subsystem,
Name: "evictions_total",
Help: "Total number of cache evictions.",
}, []string{"cache_name"})

return &MetricsCollector{
BaseMetricsCollector: base,
totalEvicts: totalEvicts,
}
}

// RecordEviction increments the total number of cache evictions for the specified cache.
func (c *MetricsCollector) RecordEviction(cacheName string) {
c.totalEvicts.WithLabelValues(cacheName).Inc()
}
120 changes: 120 additions & 0 deletions pkg/cache/sizedlru/sizedlru.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,120 @@
// Package sizedlru provides a generic, size-limited, LRU (Least Recently Used) cache with optional
// metrics collection and reporting. It wraps the golang-lru/v2 caching library, adding support for custom
// metrics tracking cache hits, misses, evictions, and other cache operations.
//
// This package supports configuring key aspects of cache behavior, including maximum cache size,
// and custom metrics collection.
package sizedlru

import (
"fmt"

lru "github.com/hashicorp/golang-lru/v2"

"github.com/trufflesecurity/trufflehog/v3/pkg/cache"
"github.com/trufflesecurity/trufflehog/v3/pkg/common"
)

// collector is an interface that extends cache.BaseMetricsCollector
// and adds methods for recording cache hits, misses, and evictions.
type collector interface {
cache.BaseMetricsCollector

RecordEviction(cacheName string)
}

// Cache is a generic LRU-sized cache that stores key-value pairs with a maximum size limit.
// It wraps the lru.Cache library and adds support for custom metrics collection.
type Cache[T any] struct {
cache *lru.Cache[string, T]

cacheName string
capacity int
metrics collector
}

// Option defines a functional option for configuring the Cache.
type Option[T any] func(*Cache[T])

// WithMetricsCollector is a functional option to set a custom metrics collector.
// It sets the metrics field of the Cache.
func WithMetricsCollector[T any](collector collector) Option[T] {
return func(lc *Cache[T]) { lc.metrics = collector }
}

// WithCapacity is a functional option to set the maximum capacity of the cache.
// If the capacity is not set, the default value (512MB) is used.
func WithCapacity[T any](capacity int) Option[T] {
return func(lc *Cache[T]) { lc.capacity = capacity }
}
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Can you document the units expected here?

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yep, great idea.

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Whoops, I got the cache implementations mixed up. This capacity refers to the number of items, not total size.


// NewCache creates a new Cache with optional configuration parameters.
// It takes a cache name and a variadic list of options.
func NewCache[T any](cacheName string, opts ...Option[T]) (*Cache[T], error) {
// Default values for cache configuration.
const defaultSize = 1 << 29 // 512MB

Copy link
Contributor

@dustin-decker dustin-decker Sep 30, 2024

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think a smaller default (like 8MB) would be sensible here.

sizedLRU := &Cache[T]{
metrics: NewSizedLRUMetricsCollector(common.MetricsNamespace, common.MetricsSubsystem),
cacheName: cacheName,
}

for _, opt := range opts {
opt(sizedLRU)
}

// Provide a evict callback function to record evictions.
onEvicted := func(string, T) {
sizedLRU.metrics.RecordEviction(sizedLRU.cacheName)
}

lcache, err := lru.NewWithEvict[string, T](defaultSize, onEvicted)
if err != nil {
return nil, fmt.Errorf("failed to create Ristretto cache: %w", err)
}

sizedLRU.cache = lcache

return sizedLRU, nil
}

// Set adds a key-value pair to the cache.
func (lc *Cache[T]) Set(key string, val T) {
lc.cache.Add(key, val)
lc.metrics.RecordSet(lc.cacheName)
}

// Get retrieves a value from the cache by key.
func (lc *Cache[T]) Get(key string) (T, bool) {
value, found := lc.cache.Get(key)
if found {
lc.metrics.RecordHit(lc.cacheName)
return value, true
}
lc.metrics.RecordMiss(lc.cacheName)
var zero T
return zero, false
}

// Exists checks if a key exists in the cache.
func (lc *Cache[T]) Exists(key string) bool {
_, found := lc.cache.Get(key)
if found {
lc.metrics.RecordHit(lc.cacheName)
} else {
lc.metrics.RecordMiss(lc.cacheName)
}
return found
}

// Delete removes a key from the cache.
func (lc *Cache[T]) Delete(key string) {
lc.cache.Remove(key)
lc.metrics.RecordDelete(lc.cacheName)
}

// Clear removes all keys from the cache.
func (lc *Cache[T]) Clear() {
lc.cache.Purge()
lc.metrics.RecordClear(lc.cacheName)
}
Loading
Loading