Skip to content

Commit 35585db

Browse files
Merge branch 'main' into sample-count-and-bytes
2 parents 1822b88 + 47f0236 commit 35585db

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

45 files changed

+18301
-33
lines changed

clients/cmd/promtail/promtail-local-config.yaml

+1
Original file line numberDiff line numberDiff line change
@@ -16,3 +16,4 @@ scrape_configs:
1616
labels:
1717
job: varlogs
1818
__path__: /var/log/*log
19+
stream: stdout

cmd/loki-canary/main.go

+5-1
Original file line numberDiff line numberDiff line change
@@ -79,6 +79,10 @@ func main() {
7979
metricTestQueryRange := flag.Duration("metric-test-range", 24*time.Hour, "The range value [24h] used in the metric test instant-query."+
8080
" Note: this value is truncated to the running time of the canary until this value is reached")
8181

82+
cacheTestInterval := flag.Duration("cache-test-interval", 15*time.Minute, "The interval the cache test query should be run")
83+
cacheTestQueryRange := flag.Duration("cache-test-range", 24*time.Hour, "The range value [24h] used in the cache test instant-query.")
84+
cacheTestQueryNow := flag.Duration("cache-test-now", 1*time.Hour, "duration how far back from current time the execution time (--now) should be set for running this query in the cache test instant-query.")
85+
8286
spotCheckInterval := flag.Duration("spot-check-interval", 15*time.Minute, "Interval that a single result will be kept from sent entries and spot-checked against Loki, "+
8387
"e.g. 15min default one entry every 15 min will be saved and then queried again every 15min until spot-check-max is reached")
8488
spotCheckMax := flag.Duration("spot-check-max", 4*time.Hour, "How far back to check a spot check entry before dropping it")
@@ -189,7 +193,7 @@ func main() {
189193
_, _ = fmt.Fprintf(os.Stderr, "Unable to create reader for Loki querier, check config: %s", err)
190194
os.Exit(1)
191195
}
192-
c.comparator = comparator.NewComparator(os.Stderr, *wait, *maxWait, *pruneInterval, *spotCheckInterval, *spotCheckMax, *spotCheckQueryRate, *spotCheckWait, *metricTestInterval, *metricTestQueryRange, *interval, *buckets, sentChan, receivedChan, c.reader, true)
196+
c.comparator = comparator.NewComparator(os.Stderr, *wait, *maxWait, *pruneInterval, *spotCheckInterval, *spotCheckMax, *spotCheckQueryRate, *spotCheckWait, *metricTestInterval, *metricTestQueryRange, *cacheTestInterval, *cacheTestQueryRange, *cacheTestQueryNow, *interval, *buckets, sentChan, receivedChan, c.reader, true)
193197
}
194198

195199
startCanary()

cmd/lokitool/main.go

+3-1
Original file line numberDiff line numberDiff line change
@@ -12,12 +12,14 @@ import (
1212
)
1313

1414
var (
15-
ruleCommand commands.RuleCommand
15+
ruleCommand commands.RuleCommand
16+
auditCommand commands.AuditCommand
1617
)
1718

1819
func main() {
1920
app := kingpin.New("lokitool", "A command-line tool to manage Loki.")
2021
ruleCommand.Register(app)
22+
auditCommand.Register(app)
2123

2224
app.Command("version", "Get the version of the lokitool CLI").Action(func(k *kingpin.ParseContext) error {
2325
fmt.Println(version.Print("loki"))

go.mod

+3-1
Original file line numberDiff line numberDiff line change
@@ -134,6 +134,7 @@ require (
134134
github.com/prometheus/alertmanager v0.27.0
135135
github.com/prometheus/common/sigv4 v0.1.0
136136
github.com/richardartoul/molecule v1.0.0
137+
github.com/schollz/progressbar/v3 v3.14.2
137138
github.com/shirou/gopsutil/v4 v4.24.0-alpha.1
138139
github.com/thanos-io/objstore v0.0.0-20230829152104-1b257a36f9a3
139140
github.com/willf/bloom v2.0.3+incompatible
@@ -153,6 +154,7 @@ require (
153154
github.com/go-ole/go-ole v1.2.6 // indirect
154155
github.com/lufia/plan9stats v0.0.0-20211012122336-39d0f177ccd0 // indirect
155156
github.com/power-devops/perfstat v0.0.0-20210106213030-5aafc221ea8c // indirect
157+
github.com/rivo/uniseg v0.4.7 // indirect
156158
github.com/shoenig/go-m1cpu v0.1.6 // indirect
157159
github.com/tklauser/go-sysconf v0.3.12 // indirect
158160
github.com/tklauser/numcpus v0.6.1 // indirect
@@ -286,7 +288,7 @@ require (
286288
github.com/leodido/ragel-machinery v0.0.0-20181214104525-299bdde78165 // indirect
287289
github.com/mailru/easyjson v0.7.7 // indirect
288290
github.com/mattn/go-colorable v0.1.13 // indirect
289-
github.com/mattn/go-isatty v0.0.19 // indirect
291+
github.com/mattn/go-isatty v0.0.20 // indirect
290292
github.com/miekg/dns v1.1.58 // indirect
291293
github.com/minio/md5-simd v1.1.2 // indirect
292294
github.com/minio/sha256-simd v1.0.1 // indirect

go.sum

+9-2
Original file line numberDiff line numberDiff line change
@@ -1259,6 +1259,7 @@ github.com/julienschmidt/httprouter v1.2.0/go.mod h1:SYymIcj16QtmaHHD7aYtjjsJG7V
12591259
github.com/julienschmidt/httprouter v1.3.0 h1:U0609e9tgbseu3rBINet9P48AI/D3oJs4dN7jwJOQ1U=
12601260
github.com/julienschmidt/httprouter v1.3.0/go.mod h1:JR6WtHb+2LUe8TCKY3cZOxFyyO8IZAc4RVcycCCAKdM=
12611261
github.com/jung-kurt/gofpdf v1.0.3-0.20190309125859-24315acbbda5/go.mod h1:7Id9E/uU8ce6rXgefFLlgrJj/GYY22cpxn+r32jIOes=
1262+
github.com/k0kubun/go-ansi v0.0.0-20180517002512-3bf9e2903213/go.mod h1:vNUNkEQ1e29fT/6vq2aBdFsgNPmy8qMdSay1npru+Sw=
12621263
github.com/kardianos/service v1.0.0/go.mod h1:8CzDhVuCuugtsHyZoTvsOBuvonN/UDBvl0kH+BUxvbo=
12631264
github.com/karrick/godirwalk v1.8.0/go.mod h1:H5KPZjojv4lE+QYImBI8xVtrBRgYrIVsaRPx4tDPEn4=
12641265
github.com/karrick/godirwalk v1.10.3/go.mod h1:RoGL9dQei4vP9ilrpETWE8CLOZ1kiN0LhBygSwrAsHA=
@@ -1355,8 +1356,8 @@ github.com/mattn/go-isatty v0.0.11/go.mod h1:PhnuNfih5lzO57/f3n+odYbM4JtupLOxQOA
13551356
github.com/mattn/go-isatty v0.0.12/go.mod h1:cbi8OIDigv2wuxKPP5vlRcQ1OAZbq2CE4Kysco4FUpU=
13561357
github.com/mattn/go-isatty v0.0.14/go.mod h1:7GGIvUiUoEMVVmxf/4nioHXj79iQHKdU27kJ6hsGG94=
13571358
github.com/mattn/go-isatty v0.0.16/go.mod h1:kYGgaQfpe5nmfYZH+SKPsOc2e4SrIfOl2e/yFXSvRLM=
1358-
github.com/mattn/go-isatty v0.0.19 h1:JITubQf0MOLdlGRuRq+jtsDlekdYPia9ZFsB8h/APPA=
1359-
github.com/mattn/go-isatty v0.0.19/go.mod h1:W+V8PltTTMOvKvAeJH7IuucS94S2C6jfK/D7dTCTo3Y=
1359+
github.com/mattn/go-isatty v0.0.20 h1:xfD0iDuEKnDkl03q4limB+vH+GxLEtL/jb4xVJSWWEY=
1360+
github.com/mattn/go-isatty v0.0.20/go.mod h1:W+V8PltTTMOvKvAeJH7IuucS94S2C6jfK/D7dTCTo3Y=
13601361
github.com/mattn/go-runewidth v0.0.2/go.mod h1:LwmH8dsx7+W8Uxz3IHJYH5QSwggIsqBzpuz5H//U1FU=
13611362
github.com/mattn/go-runewidth v0.0.3/go.mod h1:LwmH8dsx7+W8Uxz3IHJYH5QSwggIsqBzpuz5H//U1FU=
13621363
github.com/mattn/go-runewidth v0.0.4/go.mod h1:LwmH8dsx7+W8Uxz3IHJYH5QSwggIsqBzpuz5H//U1FU=
@@ -1625,6 +1626,8 @@ github.com/remyoudompheng/bigfft v0.0.0-20200410134404-eec4a21b6bb0/go.mod h1:qq
16251626
github.com/renier/xmlrpc v0.0.0-20170708154548-ce4a1a486c03/go.mod h1:gRAiPF5C5Nd0eyyRdqIu9qTiFSoZzpTq727b5B8fkkU=
16261627
github.com/richardartoul/molecule v1.0.0 h1:+LFA9cT7fn8KF39zy4dhOnwcOwRoqKiBkPqKqya+8+U=
16271628
github.com/richardartoul/molecule v1.0.0/go.mod h1:uvX/8buq8uVeiZiFht+0lqSLBHF+uGV8BrTv8W/SIwk=
1629+
github.com/rivo/uniseg v0.4.7 h1:WUdvkW8uEhrYfLC4ZzdpI2ztxP1I582+49Oc5Mq64VQ=
1630+
github.com/rivo/uniseg v0.4.7/go.mod h1:FN3SvrM+Zdj16jyLfmOkMNblXMcoc8DfTHruCPUcx88=
16281631
github.com/rogpeppe/fastuuid v0.0.0-20150106093220-6724a57986af/go.mod h1:XWv6SoW27p1b0cqNHllgS5HIMJraePCO15w5zCzIWYg=
16291632
github.com/rogpeppe/fastuuid v1.2.0/go.mod h1:jVj6XXZzXRy/MSR5jhDC/2q6DgLz+nrA6LYCDYWNEvQ=
16301633
github.com/rogpeppe/go-internal v1.1.0/go.mod h1:M8bDsm7K2OlrFYOpmOWEs/qY81heoFRclV5y23lUDJ4=
@@ -1650,6 +1653,8 @@ github.com/satori/go.uuid v1.2.0/go.mod h1:dA0hQrYB0VpLJoorglMZABFdXlWrHn1NEOzdh
16501653
github.com/satori/go.uuid v1.2.1-0.20181028125025-b2ce2384e17b/go.mod h1:dA0hQrYB0VpLJoorglMZABFdXlWrHn1NEOzdhQKdks0=
16511654
github.com/scaleway/scaleway-sdk-go v1.0.0-beta.25 h1:/8rfZAdFfafRXOgz+ZpMZZWZ5pYggCY9t7e/BvjaBHM=
16521655
github.com/scaleway/scaleway-sdk-go v1.0.0-beta.25/go.mod h1:fCa7OJZ/9DRTnOKmxvT6pn+LPWUptQAmHF/SBJUGEcg=
1656+
github.com/schollz/progressbar/v3 v3.14.2 h1:EducH6uNLIWsr560zSV1KrTeUb/wZGAHqyMFIEa99ks=
1657+
github.com/schollz/progressbar/v3 v3.14.2/go.mod h1:aQAZQnhF4JGFtRJiw/eobaXpsqpVQAftEQ+hLGXaRc4=
16531658
github.com/sean-/conswriter v0.0.0-20180208195008-f5ae3917a627/go.mod h1:7zjs06qF79/FKAJpBvFx3P8Ww4UTIMAe+lpNXDHziac=
16541659
github.com/sean-/pager v0.0.0-20180208200047-666be9bf53b5/go.mod h1:BeybITEsBEg6qbIiqJ6/Bqeq25bCLbL7YFmpaFfJDuM=
16551660
github.com/sean-/seed v0.0.0-20170313163322-e2103e2c3529 h1:nn5Wsu0esKSJiIVhscUtVbo7ada43DJhG55ua/hjS5I=
@@ -2257,6 +2262,7 @@ golang.org/x/sys v0.5.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
22572262
golang.org/x/sys v0.6.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
22582263
golang.org/x/sys v0.8.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
22592264
golang.org/x/sys v0.11.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
2265+
golang.org/x/sys v0.17.0/go.mod h1:/VUhepiaJMQUp4+oa/7Zr1D23ma6VTLIYjOOTFZPUcA=
22602266
golang.org/x/sys v0.18.0 h1:DBdB3niSjOA/O0blCZBqDefyWNYveAYMNF1Wum0DYQ4=
22612267
golang.org/x/sys v0.18.0/go.mod h1:/VUhepiaJMQUp4+oa/7Zr1D23ma6VTLIYjOOTFZPUcA=
22622268
golang.org/x/term v0.0.0-20201117132131-f5c789dd3221/go.mod h1:Nr5EML6q2oocZ2LXRh80K7BxOlk5/8JxuGnuhpl+muw=
@@ -2266,6 +2272,7 @@ golang.org/x/term v0.2.0/go.mod h1:TVmDHMZPmdnySmBfhjOoOdhjzdE1h4u1VwSiw2l1Nuc=
22662272
golang.org/x/term v0.3.0/go.mod h1:q750SLmJuPmVoN1blW3UFBPREJfb1KmY3vwxfr+nFDA=
22672273
golang.org/x/term v0.5.0/go.mod h1:jMB1sMXY+tzblOD4FWmEbocvup2/aLOaQEp7JmGp78k=
22682274
golang.org/x/term v0.6.0/go.mod h1:m6U89DPEgQRMq3DNkDClhWw02AUbt2daBVO4cn4Hv9U=
2275+
golang.org/x/term v0.17.0/go.mod h1:lLRBjIVuehSbZlaOtGMbcMncT+aqLLLmKrsjNrUguwk=
22692276
golang.org/x/term v0.18.0 h1:FcHjZXDMxI8mM3nwhX9HlKop4C0YQvCVCdwYl2wOtE8=
22702277
golang.org/x/term v0.18.0/go.mod h1:ILwASektA3OnRv7amZ1xhE/KTR+u50pbXfZ03+6Nx58=
22712278
golang.org/x/text v0.0.0-20160726164857-2910a502d2bf/go.mod h1:NqM8EUOU14njkJ3fqMW+pc6Ldnwhi/IjpwHt7yyuwOQ=

pkg/canary/comparator/comparator.go

+90-9
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,7 @@ package comparator
33
import (
44
"fmt"
55
"io"
6+
"math"
67
"math/rand"
78
"sync"
89
"time"
@@ -24,6 +25,8 @@ const (
2425
DebugWebsocketMissingEntry = "websocket missing entry: %v\n"
2526
DebugQueryResult = "confirmation query result: %v\n"
2627
DebugEntryFound = "missing websocket entry %v was found %v seconds after it was originally sent\n"
28+
29+
floatDiffTolerance = 1e-6
2730
)
2831

2932
var (
@@ -90,6 +93,16 @@ var (
9093
Help: "how long the spot check test query execution took in seconds.",
9194
Buckets: instrument.DefBuckets,
9295
})
96+
queryResultsDiff = promauto.NewCounter(prometheus.CounterOpts{
97+
Namespace: "loki_canary",
98+
Name: "cache_test_query_results_diff_total",
99+
Help: "counts number of times the query results was different with and without cache ",
100+
})
101+
queryResultsTotal = promauto.NewCounterVec(prometheus.CounterOpts{
102+
Namespace: "loki_canary",
103+
Name: "cache_test_query_results_total",
104+
Help: "counts number of times the query results test requests are done ",
105+
}, []string{"status"}) // status=success/failure
93106
)
94107

95108
type Comparator struct {
@@ -98,6 +111,7 @@ type Comparator struct {
98111
spotEntMtx sync.Mutex // Locks access to []spotCheck
99112
spotMtx sync.Mutex // Locks spotcheckRunning for single threaded but async spotCheck()
100113
metTestMtx sync.Mutex // Locks metricTestRunning for single threaded but async metricTest()
114+
cacheTestMtx sync.Mutex // Locks cacheTestRunning for single threaded but async cacheTest()
101115
pruneMtx sync.Mutex // Locks pruneEntriesRunning for single threaded but async pruneEntries()
102116
w io.Writer
103117
entries []*time.Time
@@ -116,14 +130,19 @@ type Comparator struct {
116130
metricTestInterval time.Duration
117131
metricTestRange time.Duration
118132
metricTestRunning bool
119-
writeInterval time.Duration
120-
confirmAsync bool
121-
startTime time.Time
122-
sent chan time.Time
123-
recv chan time.Time
124-
rdr reader.LokiReader
125-
quit chan struct{}
126-
done chan struct{}
133+
cacheTestInterval time.Duration
134+
cacheTestRange time.Duration
135+
// how far back from current time the execution time (--now) should be set for running this query.
136+
cacheTestNow time.Duration
137+
cacheTestRunning bool
138+
writeInterval time.Duration
139+
confirmAsync bool
140+
startTime time.Time
141+
sent chan time.Time
142+
recv chan time.Time
143+
rdr reader.LokiReader
144+
quit chan struct{}
145+
done chan struct{}
127146
}
128147

129148
func NewComparator(writer io.Writer,
@@ -133,6 +152,9 @@ func NewComparator(writer io.Writer,
133152
spotCheckInterval, spotCheckMax, spotCheckQueryRate, spotCheckWait time.Duration,
134153
metricTestInterval time.Duration,
135154
metricTestRange time.Duration,
155+
cacheTestInterval time.Duration,
156+
cacheTestRange time.Duration,
157+
cacheTestNow time.Duration,
136158
writeInterval time.Duration,
137159
buckets int,
138160
sentChan chan time.Time,
@@ -155,6 +177,10 @@ func NewComparator(writer io.Writer,
155177
metricTestInterval: metricTestInterval,
156178
metricTestRange: metricTestRange,
157179
metricTestRunning: false,
180+
cacheTestInterval: cacheTestInterval,
181+
cacheTestRange: cacheTestRange,
182+
cacheTestNow: cacheTestNow,
183+
cacheTestRunning: false,
158184
writeInterval: writeInterval,
159185
confirmAsync: confirmAsync,
160186
startTime: time.Now(),
@@ -252,10 +278,12 @@ func (c *Comparator) run() {
252278
randomGenerator := rand.New(rand.NewSource(time.Now().UnixNano()))
253279
mt := time.NewTicker(time.Duration(randomGenerator.Int63n(c.metricTestInterval.Nanoseconds())))
254280
sc := time.NewTicker(c.spotCheckQueryRate)
281+
ct := time.NewTicker(c.cacheTestInterval)
255282
defer func() {
256283
t.Stop()
257284
mt.Stop()
258285
sc.Stop()
286+
ct.Stop()
259287
close(c.done)
260288
}()
261289

@@ -294,12 +322,65 @@ func (c *Comparator) run() {
294322
firstMt = false
295323
mt.Reset(c.metricTestInterval)
296324
}
325+
case <-ct.C:
326+
// Only run one instance of cache tests at a time.
327+
c.cacheTestMtx.Lock()
328+
if !c.cacheTestRunning {
329+
c.cacheTestRunning = true
330+
go c.cacheTest(time.Now())
331+
}
332+
c.cacheTestMtx.Unlock()
333+
297334
case <-c.quit:
298335
return
299336
}
300337
}
301338
}
302339

340+
func (c *Comparator) cacheTest(currTime time.Time) {
341+
defer func() {
342+
c.cacheTestMtx.Lock()
343+
c.cacheTestRunning = false
344+
c.cacheTestMtx.Unlock()
345+
}()
346+
347+
// cacheTest is currently run using `reader.CountOverTime()` which is an instant query.
348+
// We make the query with and without cache over the data that is not changing (e.g: --now="1hr ago") instead of on latest data that is a moving target.
349+
queryStartTime := currTime.Add(-c.cacheTestNow)
350+
351+
// We cannot query for range before the pod even started.
352+
if queryStartTime.Before(c.startTime) {
353+
// we wait.
354+
fmt.Fprintf(c.w, "cacheTest not run. still waiting for query start range(%s) to past the process start time(%s).\n", queryStartTime, c.startTime)
355+
return
356+
}
357+
358+
rangeDuration := c.cacheTestRange
359+
rng := fmt.Sprintf("%.0fs", rangeDuration.Seconds())
360+
361+
// with cache
362+
countCache, err := c.rdr.QueryCountOverTime(rng, queryStartTime, true)
363+
if err != nil {
364+
fmt.Fprintf(c.w, "error running cache query test with cache: %s\n", err.Error())
365+
queryResultsTotal.WithLabelValues("failure").Inc()
366+
return
367+
}
368+
369+
// without cache
370+
countNocache, err := c.rdr.QueryCountOverTime(rng, queryStartTime, false)
371+
if err != nil {
372+
fmt.Fprintf(c.w, "error running cache query test without cache: %s\n", err.Error())
373+
queryResultsTotal.WithLabelValues("failure").Inc()
374+
return
375+
}
376+
377+
queryResultsTotal.WithLabelValues("success").Inc()
378+
if math.Abs(countNocache-countCache) > floatDiffTolerance {
379+
queryResultsDiff.Inc()
380+
fmt.Fprintf(c.w, "found a diff in instant query results time: %s, result_with_cache: %v, result_without_cache: %v\n", queryStartTime, countCache, countNocache)
381+
}
382+
}
383+
303384
// check that the expected # of log lines have been written to Loki
304385
func (c *Comparator) metricTest(currTime time.Time) {
305386
// Always make sure to set the running state back to false
@@ -317,7 +398,7 @@ func (c *Comparator) metricTest(currTime time.Time) {
317398
adjustedRange = currTime.Sub(c.startTime)
318399
}
319400
begin := time.Now()
320-
actualCount, err := c.rdr.QueryCountOverTime(fmt.Sprintf("%.0fs", adjustedRange.Seconds()))
401+
actualCount, err := c.rdr.QueryCountOverTime(fmt.Sprintf("%.0fs", adjustedRange.Seconds()), begin, true)
321402
metricTestLatency.Observe(time.Since(begin).Seconds())
322403
if err != nil {
323404
fmt.Fprintf(c.w, "error running metric query test: %s\n", err.Error())

0 commit comments

Comments
 (0)