Skip to content

Commit

Permalink
Track evictions in the PostingsForMatchers cache (#824)
Browse files Browse the repository at this point in the history
* Track evictions in the PostingsForMatchers cache

Signed-off-by: Marco Pracucci <[email protected]>

* Explain checks order

Signed-off-by: Marco Pracucci <[email protected]>

* Remove unnecessary if statements

Signed-off-by: Marco Pracucci <[email protected]>

---------

Signed-off-by: Marco Pracucci <[email protected]>
  • Loading branch information
pracucci authored Jan 28, 2025
1 parent 0cc2978 commit 127392e
Show file tree
Hide file tree
Showing 2 changed files with 170 additions and 15 deletions.
99 changes: 85 additions & 14 deletions tsdb/postings_for_matchers_cache.go
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,14 @@ const (
DefaultPostingsForMatchersCacheForce = false
)

const (
evictionReasonTTL = iota
evictionReasonMaxBytes
evictionReasonMaxItems
evictionReasonUnknown
evictionReasonsLength
)

// IndexPostingsReader is a subset of IndexReader methods, the minimum required to evaluate PostingsForMatchers.
type IndexPostingsReader interface {
// LabelValues returns possible label values which may not be sorted.
Expand Down Expand Up @@ -316,7 +324,7 @@ func (c *PostingsForMatchersCache) expire() {
defer c.expireInProgress.Store(false)

c.cachedMtx.RLock()
if !c.shouldEvictHead() {
if !c.shouldEvictHead(c.timeNow()) {
c.cachedMtx.RUnlock()
return
}
Expand All @@ -327,18 +335,32 @@ func (c *PostingsForMatchersCache) expire() {
c.evictHeadBeforeHook()
}

c.cachedMtx.Lock()
defer c.cachedMtx.Unlock()
var evictionReasons [evictionReasonsLength]int

// Evict the head taking an exclusive lock.
{
c.cachedMtx.Lock()

now := c.timeNow()
for c.shouldEvictHead(now) {
reason := c.evictHead(now)
evictionReasons[reason]++
}

for c.shouldEvictHead() {
c.evictHead()
c.cachedMtx.Unlock()
}

// Keep track of the reason why items where evicted.
c.metrics.evictionsBecauseTTL.Add(float64(evictionReasons[evictionReasonTTL]))
c.metrics.evictionsBecauseMaxBytes.Add(float64(evictionReasons[evictionReasonMaxBytes]))
c.metrics.evictionsBecauseMaxItems.Add(float64(evictionReasons[evictionReasonMaxItems]))
c.metrics.evictionsBecauseUnknown.Add(float64(evictionReasons[evictionReasonUnknown]))
}

// shouldEvictHead returns true if cache head should be evicted, either because it's too old,
// or because the cache has too many elements
// should be called while read lock is held on cachedMtx.
func (c *PostingsForMatchersCache) shouldEvictHead() bool {
func (c *PostingsForMatchersCache) shouldEvictHead(now time.Time) bool {
// The cache should be evicted for sure if the max size (either items or bytes) is reached.
if c.cached.Len() > c.maxItems || c.cachedBytes > c.maxBytes {
return true
Expand All @@ -349,15 +371,37 @@ func (c *PostingsForMatchersCache) shouldEvictHead() bool {
return false
}
ts := h.Value.(*postingsForMatchersCachedCall).ts
return c.timeNow().Sub(ts) >= c.ttl
return now.Sub(ts) >= c.ttl
}

func (c *PostingsForMatchersCache) evictHead() {
func (c *PostingsForMatchersCache) evictHead(now time.Time) (reason int) {
front := c.cached.Front()
oldest := front.Value.(*postingsForMatchersCachedCall)

// Detect the reason why we're evicting it.
//
// The checks order is:
// 1. TTL: if an item is expired, it should be tracked as such even if the cache was full.
// 2. Max bytes: "max items" is deprecated, and we expect to set it to a high value because
// we want to primarily limit by bytes size.
// 3. Max items: the last one.
switch {
case now.Sub(oldest.ts) >= c.ttl:
reason = evictionReasonTTL
case c.cachedBytes > c.maxBytes:
reason = evictionReasonMaxBytes
case c.cached.Len() > c.maxItems:
reason = evictionReasonMaxItems
default:
// This should never happen, but we track it to detect unexpected behaviours.
reason = evictionReasonUnknown
}

c.calls.Delete(oldest.key)
c.cached.Remove(front)
c.cachedBytes -= oldest.sizeBytes

return
}

// onPromiseExecutionDone must be called once the execution of PostingsForMatchers promise has done.
Expand Down Expand Up @@ -555,18 +599,25 @@ func (t *contextsTracker) trackedContextsCount() int {
}

type PostingsForMatchersCacheMetrics struct {
requests prometheus.Counter
hits prometheus.Counter
misses prometheus.Counter
skipsBecauseIneligible prometheus.Counter
skipsBecauseStale prometheus.Counter
skipsBecauseCanceled prometheus.Counter
requests prometheus.Counter
hits prometheus.Counter
misses prometheus.Counter
skipsBecauseIneligible prometheus.Counter
skipsBecauseStale prometheus.Counter
skipsBecauseCanceled prometheus.Counter
evictionsBecauseTTL prometheus.Counter
evictionsBecauseMaxBytes prometheus.Counter
evictionsBecauseMaxItems prometheus.Counter
evictionsBecauseUnknown prometheus.Counter
}

func NewPostingsForMatchersCacheMetrics(reg prometheus.Registerer) *PostingsForMatchersCacheMetrics {
const (
skipsMetric = "postings_for_matchers_cache_skips_total"
skipsHelp = "Total number of requests to the PostingsForMatchers cache that have been skipped the cache. The subsequent result is not cached."

evictionsMetric = "postings_for_matchers_cache_evictions_total"
evictionsHelp = "Total number of evictions from the PostingsForMatchers cache."
)

return &PostingsForMatchersCacheMetrics{
Expand Down Expand Up @@ -597,5 +648,25 @@ func NewPostingsForMatchersCacheMetrics(reg prometheus.Registerer) *PostingsForM
Help: skipsHelp,
ConstLabels: map[string]string{"reason": "canceled-cached-entry"},
}),
evictionsBecauseTTL: promauto.With(reg).NewCounter(prometheus.CounterOpts{
Name: evictionsMetric,
Help: evictionsHelp,
ConstLabels: map[string]string{"reason": "ttl-expired"},
}),
evictionsBecauseMaxBytes: promauto.With(reg).NewCounter(prometheus.CounterOpts{
Name: evictionsMetric,
Help: evictionsHelp,
ConstLabels: map[string]string{"reason": "max-bytes-reached"},
}),
evictionsBecauseMaxItems: promauto.With(reg).NewCounter(prometheus.CounterOpts{
Name: evictionsMetric,
Help: evictionsHelp,
ConstLabels: map[string]string{"reason": "max-items-reached"},
}),
evictionsBecauseUnknown: promauto.With(reg).NewCounter(prometheus.CounterOpts{
Name: evictionsMetric,
Help: evictionsHelp,
ConstLabels: map[string]string{"reason": "unknown"},
}),
}
}
86 changes: 85 additions & 1 deletion tsdb/postings_for_matchers_cache_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -79,6 +79,13 @@ func TestPostingsForMatchersCache(t *testing.T) {
postings_for_matchers_cache_skips_total{reason="canceled-cached-entry"} 0
postings_for_matchers_cache_skips_total{reason="ineligible"} %d
postings_for_matchers_cache_skips_total{reason="stale-cached-entry"} 0
# HELP postings_for_matchers_cache_evictions_total Total number of evictions from the PostingsForMatchers cache.
# TYPE postings_for_matchers_cache_evictions_total counter
postings_for_matchers_cache_evictions_total{reason="max-bytes-reached"} 0
postings_for_matchers_cache_evictions_total{reason="max-items-reached"} 0
postings_for_matchers_cache_evictions_total{reason="ttl-expired"} 0
postings_for_matchers_cache_evictions_total{reason="unknown"} 0
`, expectedMisses, expectedDisabled))))
})
}
Expand Down Expand Up @@ -114,6 +121,13 @@ func TestPostingsForMatchersCache(t *testing.T) {
postings_for_matchers_cache_skips_total{reason="canceled-cached-entry"} 0
postings_for_matchers_cache_skips_total{reason="ineligible"} 0
postings_for_matchers_cache_skips_total{reason="stale-cached-entry"} 0
# HELP postings_for_matchers_cache_evictions_total Total number of evictions from the PostingsForMatchers cache.
# TYPE postings_for_matchers_cache_evictions_total counter
postings_for_matchers_cache_evictions_total{reason="max-bytes-reached"} 0
postings_for_matchers_cache_evictions_total{reason="max-items-reached"} 0
postings_for_matchers_cache_evictions_total{reason="ttl-expired"} 0
postings_for_matchers_cache_evictions_total{reason="unknown"} 0
`)))
})

Expand Down Expand Up @@ -238,6 +252,13 @@ func TestPostingsForMatchersCache(t *testing.T) {
postings_for_matchers_cache_skips_total{reason="canceled-cached-entry"} 0
postings_for_matchers_cache_skips_total{reason="ineligible"} 2
postings_for_matchers_cache_skips_total{reason="stale-cached-entry"} 0
# HELP postings_for_matchers_cache_evictions_total Total number of evictions from the PostingsForMatchers cache.
# TYPE postings_for_matchers_cache_evictions_total counter
postings_for_matchers_cache_evictions_total{reason="max-bytes-reached"} 0
postings_for_matchers_cache_evictions_total{reason="max-items-reached"} 0
postings_for_matchers_cache_evictions_total{reason="ttl-expired"} 0
postings_for_matchers_cache_evictions_total{reason="unknown"} 0
`)))
})

Expand Down Expand Up @@ -279,6 +300,13 @@ func TestPostingsForMatchersCache(t *testing.T) {
postings_for_matchers_cache_skips_total{reason="canceled-cached-entry"} 0
postings_for_matchers_cache_skips_total{reason="ineligible"} 0
postings_for_matchers_cache_skips_total{reason="stale-cached-entry"} 0
# HELP postings_for_matchers_cache_evictions_total Total number of evictions from the PostingsForMatchers cache.
# TYPE postings_for_matchers_cache_evictions_total counter
postings_for_matchers_cache_evictions_total{reason="max-bytes-reached"} 0
postings_for_matchers_cache_evictions_total{reason="max-items-reached"} 0
postings_for_matchers_cache_evictions_total{reason="ttl-expired"} 0
postings_for_matchers_cache_evictions_total{reason="unknown"} 0
`)))
})

Expand Down Expand Up @@ -332,6 +360,13 @@ func TestPostingsForMatchersCache(t *testing.T) {
postings_for_matchers_cache_skips_total{reason="canceled-cached-entry"} 0
postings_for_matchers_cache_skips_total{reason="ineligible"} 0
postings_for_matchers_cache_skips_total{reason="stale-cached-entry"} 0
# HELP postings_for_matchers_cache_evictions_total Total number of evictions from the PostingsForMatchers cache.
# TYPE postings_for_matchers_cache_evictions_total counter
postings_for_matchers_cache_evictions_total{reason="max-bytes-reached"} 0
postings_for_matchers_cache_evictions_total{reason="max-items-reached"} 0
postings_for_matchers_cache_evictions_total{reason="ttl-expired"} 1
postings_for_matchers_cache_evictions_total{reason="unknown"} 0
`)))
})

Expand All @@ -347,7 +382,7 @@ func TestPostingsForMatchersCache(t *testing.T) {
}

callsPerMatchers := map[string]int{}
c := newPostingsForMatchersCache(DefaultPostingsForMatchersCacheTTL, maxItems, 1000, func(_ context.Context, ix IndexPostingsReader, ms ...*labels.Matcher) (index.Postings, error) {
c := newPostingsForMatchersCache(DefaultPostingsForMatchersCacheTTL, maxItems, 100000, func(_ context.Context, ix IndexPostingsReader, ms ...*labels.Matcher) (index.Postings, error) {
k := matchersKey(ms)
callsPerMatchers[k]++
return index.ErrPostings(fmt.Errorf("result from call %d", callsPerMatchers[k])), nil
Expand Down Expand Up @@ -401,6 +436,13 @@ func TestPostingsForMatchersCache(t *testing.T) {
postings_for_matchers_cache_skips_total{reason="canceled-cached-entry"} 0
postings_for_matchers_cache_skips_total{reason="ineligible"} 0
postings_for_matchers_cache_skips_total{reason="stale-cached-entry"} 0
# HELP postings_for_matchers_cache_evictions_total Total number of evictions from the PostingsForMatchers cache.
# TYPE postings_for_matchers_cache_evictions_total counter
postings_for_matchers_cache_evictions_total{reason="max-bytes-reached"} 0
postings_for_matchers_cache_evictions_total{reason="max-items-reached"} 1
postings_for_matchers_cache_evictions_total{reason="ttl-expired"} 0
postings_for_matchers_cache_evictions_total{reason="unknown"} 0
`)))
})

Expand Down Expand Up @@ -497,6 +539,13 @@ func TestPostingsForMatchersCache(t *testing.T) {
postings_for_matchers_cache_skips_total{reason="canceled-cached-entry"} 0
postings_for_matchers_cache_skips_total{reason="ineligible"} 0
postings_for_matchers_cache_skips_total{reason="stale-cached-entry"} 0
# HELP postings_for_matchers_cache_evictions_total Total number of evictions from the PostingsForMatchers cache.
# TYPE postings_for_matchers_cache_evictions_total counter
postings_for_matchers_cache_evictions_total{reason="max-bytes-reached"} 1
postings_for_matchers_cache_evictions_total{reason="max-items-reached"} 0
postings_for_matchers_cache_evictions_total{reason="ttl-expired"} 0
postings_for_matchers_cache_evictions_total{reason="unknown"} 0
`)))
})

Expand Down Expand Up @@ -565,6 +614,13 @@ func TestPostingsForMatchersCache(t *testing.T) {
postings_for_matchers_cache_skips_total{reason="canceled-cached-entry"} 0
postings_for_matchers_cache_skips_total{reason="ineligible"} 0
postings_for_matchers_cache_skips_total{reason="stale-cached-entry"} 0
# HELP postings_for_matchers_cache_evictions_total Total number of evictions from the PostingsForMatchers cache.
# TYPE postings_for_matchers_cache_evictions_total counter
postings_for_matchers_cache_evictions_total{reason="max-bytes-reached"} 0
postings_for_matchers_cache_evictions_total{reason="max-items-reached"} 0
postings_for_matchers_cache_evictions_total{reason="ttl-expired"} 0
postings_for_matchers_cache_evictions_total{reason="unknown"} 0
`)))
})

Expand Down Expand Up @@ -656,6 +712,13 @@ func TestPostingsForMatchersCache(t *testing.T) {
postings_for_matchers_cache_skips_total{reason="canceled-cached-entry"} 0
postings_for_matchers_cache_skips_total{reason="ineligible"} 0
postings_for_matchers_cache_skips_total{reason="stale-cached-entry"} 0
# HELP postings_for_matchers_cache_evictions_total Total number of evictions from the PostingsForMatchers cache.
# TYPE postings_for_matchers_cache_evictions_total counter
postings_for_matchers_cache_evictions_total{reason="max-bytes-reached"} 0
postings_for_matchers_cache_evictions_total{reason="max-items-reached"} 0
postings_for_matchers_cache_evictions_total{reason="ttl-expired"} 0
postings_for_matchers_cache_evictions_total{reason="unknown"} 0
`)))
})

Expand Down Expand Up @@ -740,6 +803,13 @@ func TestPostingsForMatchersCache(t *testing.T) {
postings_for_matchers_cache_skips_total{reason="canceled-cached-entry"} 0
postings_for_matchers_cache_skips_total{reason="ineligible"} 0
postings_for_matchers_cache_skips_total{reason="stale-cached-entry"} 0
# HELP postings_for_matchers_cache_evictions_total Total number of evictions from the PostingsForMatchers cache.
# TYPE postings_for_matchers_cache_evictions_total counter
postings_for_matchers_cache_evictions_total{reason="max-bytes-reached"} 0
postings_for_matchers_cache_evictions_total{reason="max-items-reached"} 0
postings_for_matchers_cache_evictions_total{reason="ttl-expired"} 0
postings_for_matchers_cache_evictions_total{reason="unknown"} 0
`)))
})
}
Expand Down Expand Up @@ -834,6 +904,13 @@ func TestPostingsForMatchersCache_ShouldNotReturnStaleEntriesWhileAnotherGorouti
postings_for_matchers_cache_skips_total{reason="canceled-cached-entry"} 0
postings_for_matchers_cache_skips_total{reason="ineligible"} 0
postings_for_matchers_cache_skips_total{reason="stale-cached-entry"} 1
# HELP postings_for_matchers_cache_evictions_total Total number of evictions from the PostingsForMatchers cache.
# TYPE postings_for_matchers_cache_evictions_total counter
postings_for_matchers_cache_evictions_total{reason="max-bytes-reached"} 0
postings_for_matchers_cache_evictions_total{reason="max-items-reached"} 0
postings_for_matchers_cache_evictions_total{reason="ttl-expired"} 1
postings_for_matchers_cache_evictions_total{reason="unknown"} 0
`)))
}

Expand Down Expand Up @@ -918,6 +995,13 @@ func TestPostingsForMatchersCache_RaceConditionBetweenExecutionContextCancellati
postings_for_matchers_cache_skips_total{reason="canceled-cached-entry"} 1
postings_for_matchers_cache_skips_total{reason="ineligible"} 0
postings_for_matchers_cache_skips_total{reason="stale-cached-entry"} 0
# HELP postings_for_matchers_cache_evictions_total Total number of evictions from the PostingsForMatchers cache.
# TYPE postings_for_matchers_cache_evictions_total counter
postings_for_matchers_cache_evictions_total{reason="max-bytes-reached"} 0
postings_for_matchers_cache_evictions_total{reason="max-items-reached"} 0
postings_for_matchers_cache_evictions_total{reason="ttl-expired"} 0
postings_for_matchers_cache_evictions_total{reason="unknown"} 0
`)))
}

Expand Down

0 comments on commit 127392e

Please sign in to comment.