Skip to content

Commit 47f0236

Browse files
authored
feat: Introduce index audit to lokitool (#13008)
Adds a new `index audit` command to the `lokitool` cmd. The new `index audit` validates that all chunks required by a given index are available at the object storage. This is useful to validate if you're missing data after a backfill or when migrating data from one Loki instance to another. See `pkg/tool/audit/README.md` for usage instructions.
1 parent 71507a2 commit 47f0236

40 files changed

+18109
-9
lines changed

cmd/lokitool/main.go

+3-1
Original file line numberDiff line numberDiff line change
@@ -12,12 +12,14 @@ import (
1212
)
1313

1414
var (
15-
ruleCommand commands.RuleCommand
15+
ruleCommand commands.RuleCommand
16+
auditCommand commands.AuditCommand
1617
)
1718

1819
func main() {
1920
app := kingpin.New("lokitool", "A command-line tool to manage Loki.")
2021
ruleCommand.Register(app)
22+
auditCommand.Register(app)
2123

2224
app.Command("version", "Get the version of the lokitool CLI").Action(func(k *kingpin.ParseContext) error {
2325
fmt.Println(version.Print("loki"))

go.mod

+3-1
Original file line numberDiff line numberDiff line change
@@ -134,6 +134,7 @@ require (
134134
github.com/prometheus/alertmanager v0.27.0
135135
github.com/prometheus/common/sigv4 v0.1.0
136136
github.com/richardartoul/molecule v1.0.0
137+
github.com/schollz/progressbar/v3 v3.14.2
137138
github.com/shirou/gopsutil/v4 v4.24.0-alpha.1
138139
github.com/thanos-io/objstore v0.0.0-20230829152104-1b257a36f9a3
139140
github.com/willf/bloom v2.0.3+incompatible
@@ -153,6 +154,7 @@ require (
153154
github.com/go-ole/go-ole v1.2.6 // indirect
154155
github.com/lufia/plan9stats v0.0.0-20211012122336-39d0f177ccd0 // indirect
155156
github.com/power-devops/perfstat v0.0.0-20210106213030-5aafc221ea8c // indirect
157+
github.com/rivo/uniseg v0.4.7 // indirect
156158
github.com/shoenig/go-m1cpu v0.1.6 // indirect
157159
github.com/tklauser/go-sysconf v0.3.12 // indirect
158160
github.com/tklauser/numcpus v0.6.1 // indirect
@@ -286,7 +288,7 @@ require (
286288
github.com/leodido/ragel-machinery v0.0.0-20181214104525-299bdde78165 // indirect
287289
github.com/mailru/easyjson v0.7.7 // indirect
288290
github.com/mattn/go-colorable v0.1.13 // indirect
289-
github.com/mattn/go-isatty v0.0.19 // indirect
291+
github.com/mattn/go-isatty v0.0.20 // indirect
290292
github.com/miekg/dns v1.1.58 // indirect
291293
github.com/minio/md5-simd v1.1.2 // indirect
292294
github.com/minio/sha256-simd v1.0.1 // indirect

go.sum

+9-2
Original file line numberDiff line numberDiff line change
@@ -1259,6 +1259,7 @@ github.com/julienschmidt/httprouter v1.2.0/go.mod h1:SYymIcj16QtmaHHD7aYtjjsJG7V
12591259
github.com/julienschmidt/httprouter v1.3.0 h1:U0609e9tgbseu3rBINet9P48AI/D3oJs4dN7jwJOQ1U=
12601260
github.com/julienschmidt/httprouter v1.3.0/go.mod h1:JR6WtHb+2LUe8TCKY3cZOxFyyO8IZAc4RVcycCCAKdM=
12611261
github.com/jung-kurt/gofpdf v1.0.3-0.20190309125859-24315acbbda5/go.mod h1:7Id9E/uU8ce6rXgefFLlgrJj/GYY22cpxn+r32jIOes=
1262+
github.com/k0kubun/go-ansi v0.0.0-20180517002512-3bf9e2903213/go.mod h1:vNUNkEQ1e29fT/6vq2aBdFsgNPmy8qMdSay1npru+Sw=
12621263
github.com/kardianos/service v1.0.0/go.mod h1:8CzDhVuCuugtsHyZoTvsOBuvonN/UDBvl0kH+BUxvbo=
12631264
github.com/karrick/godirwalk v1.8.0/go.mod h1:H5KPZjojv4lE+QYImBI8xVtrBRgYrIVsaRPx4tDPEn4=
12641265
github.com/karrick/godirwalk v1.10.3/go.mod h1:RoGL9dQei4vP9ilrpETWE8CLOZ1kiN0LhBygSwrAsHA=
@@ -1355,8 +1356,8 @@ github.com/mattn/go-isatty v0.0.11/go.mod h1:PhnuNfih5lzO57/f3n+odYbM4JtupLOxQOA
13551356
github.com/mattn/go-isatty v0.0.12/go.mod h1:cbi8OIDigv2wuxKPP5vlRcQ1OAZbq2CE4Kysco4FUpU=
13561357
github.com/mattn/go-isatty v0.0.14/go.mod h1:7GGIvUiUoEMVVmxf/4nioHXj79iQHKdU27kJ6hsGG94=
13571358
github.com/mattn/go-isatty v0.0.16/go.mod h1:kYGgaQfpe5nmfYZH+SKPsOc2e4SrIfOl2e/yFXSvRLM=
1358-
github.com/mattn/go-isatty v0.0.19 h1:JITubQf0MOLdlGRuRq+jtsDlekdYPia9ZFsB8h/APPA=
1359-
github.com/mattn/go-isatty v0.0.19/go.mod h1:W+V8PltTTMOvKvAeJH7IuucS94S2C6jfK/D7dTCTo3Y=
1359+
github.com/mattn/go-isatty v0.0.20 h1:xfD0iDuEKnDkl03q4limB+vH+GxLEtL/jb4xVJSWWEY=
1360+
github.com/mattn/go-isatty v0.0.20/go.mod h1:W+V8PltTTMOvKvAeJH7IuucS94S2C6jfK/D7dTCTo3Y=
13601361
github.com/mattn/go-runewidth v0.0.2/go.mod h1:LwmH8dsx7+W8Uxz3IHJYH5QSwggIsqBzpuz5H//U1FU=
13611362
github.com/mattn/go-runewidth v0.0.3/go.mod h1:LwmH8dsx7+W8Uxz3IHJYH5QSwggIsqBzpuz5H//U1FU=
13621363
github.com/mattn/go-runewidth v0.0.4/go.mod h1:LwmH8dsx7+W8Uxz3IHJYH5QSwggIsqBzpuz5H//U1FU=
@@ -1625,6 +1626,8 @@ github.com/remyoudompheng/bigfft v0.0.0-20200410134404-eec4a21b6bb0/go.mod h1:qq
16251626
github.com/renier/xmlrpc v0.0.0-20170708154548-ce4a1a486c03/go.mod h1:gRAiPF5C5Nd0eyyRdqIu9qTiFSoZzpTq727b5B8fkkU=
16261627
github.com/richardartoul/molecule v1.0.0 h1:+LFA9cT7fn8KF39zy4dhOnwcOwRoqKiBkPqKqya+8+U=
16271628
github.com/richardartoul/molecule v1.0.0/go.mod h1:uvX/8buq8uVeiZiFht+0lqSLBHF+uGV8BrTv8W/SIwk=
1629+
github.com/rivo/uniseg v0.4.7 h1:WUdvkW8uEhrYfLC4ZzdpI2ztxP1I582+49Oc5Mq64VQ=
1630+
github.com/rivo/uniseg v0.4.7/go.mod h1:FN3SvrM+Zdj16jyLfmOkMNblXMcoc8DfTHruCPUcx88=
16281631
github.com/rogpeppe/fastuuid v0.0.0-20150106093220-6724a57986af/go.mod h1:XWv6SoW27p1b0cqNHllgS5HIMJraePCO15w5zCzIWYg=
16291632
github.com/rogpeppe/fastuuid v1.2.0/go.mod h1:jVj6XXZzXRy/MSR5jhDC/2q6DgLz+nrA6LYCDYWNEvQ=
16301633
github.com/rogpeppe/go-internal v1.1.0/go.mod h1:M8bDsm7K2OlrFYOpmOWEs/qY81heoFRclV5y23lUDJ4=
@@ -1650,6 +1653,8 @@ github.com/satori/go.uuid v1.2.0/go.mod h1:dA0hQrYB0VpLJoorglMZABFdXlWrHn1NEOzdh
16501653
github.com/satori/go.uuid v1.2.1-0.20181028125025-b2ce2384e17b/go.mod h1:dA0hQrYB0VpLJoorglMZABFdXlWrHn1NEOzdhQKdks0=
16511654
github.com/scaleway/scaleway-sdk-go v1.0.0-beta.25 h1:/8rfZAdFfafRXOgz+ZpMZZWZ5pYggCY9t7e/BvjaBHM=
16521655
github.com/scaleway/scaleway-sdk-go v1.0.0-beta.25/go.mod h1:fCa7OJZ/9DRTnOKmxvT6pn+LPWUptQAmHF/SBJUGEcg=
1656+
github.com/schollz/progressbar/v3 v3.14.2 h1:EducH6uNLIWsr560zSV1KrTeUb/wZGAHqyMFIEa99ks=
1657+
github.com/schollz/progressbar/v3 v3.14.2/go.mod h1:aQAZQnhF4JGFtRJiw/eobaXpsqpVQAftEQ+hLGXaRc4=
16531658
github.com/sean-/conswriter v0.0.0-20180208195008-f5ae3917a627/go.mod h1:7zjs06qF79/FKAJpBvFx3P8Ww4UTIMAe+lpNXDHziac=
16541659
github.com/sean-/pager v0.0.0-20180208200047-666be9bf53b5/go.mod h1:BeybITEsBEg6qbIiqJ6/Bqeq25bCLbL7YFmpaFfJDuM=
16551660
github.com/sean-/seed v0.0.0-20170313163322-e2103e2c3529 h1:nn5Wsu0esKSJiIVhscUtVbo7ada43DJhG55ua/hjS5I=
@@ -2257,6 +2262,7 @@ golang.org/x/sys v0.5.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
22572262
golang.org/x/sys v0.6.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
22582263
golang.org/x/sys v0.8.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
22592264
golang.org/x/sys v0.11.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
2265+
golang.org/x/sys v0.17.0/go.mod h1:/VUhepiaJMQUp4+oa/7Zr1D23ma6VTLIYjOOTFZPUcA=
22602266
golang.org/x/sys v0.18.0 h1:DBdB3niSjOA/O0blCZBqDefyWNYveAYMNF1Wum0DYQ4=
22612267
golang.org/x/sys v0.18.0/go.mod h1:/VUhepiaJMQUp4+oa/7Zr1D23ma6VTLIYjOOTFZPUcA=
22622268
golang.org/x/term v0.0.0-20201117132131-f5c789dd3221/go.mod h1:Nr5EML6q2oocZ2LXRh80K7BxOlk5/8JxuGnuhpl+muw=
@@ -2266,6 +2272,7 @@ golang.org/x/term v0.2.0/go.mod h1:TVmDHMZPmdnySmBfhjOoOdhjzdE1h4u1VwSiw2l1Nuc=
22662272
golang.org/x/term v0.3.0/go.mod h1:q750SLmJuPmVoN1blW3UFBPREJfb1KmY3vwxfr+nFDA=
22672273
golang.org/x/term v0.5.0/go.mod h1:jMB1sMXY+tzblOD4FWmEbocvup2/aLOaQEp7JmGp78k=
22682274
golang.org/x/term v0.6.0/go.mod h1:m6U89DPEgQRMq3DNkDClhWw02AUbt2daBVO4cn4Hv9U=
2275+
golang.org/x/term v0.17.0/go.mod h1:lLRBjIVuehSbZlaOtGMbcMncT+aqLLLmKrsjNrUguwk=
22692276
golang.org/x/term v0.18.0 h1:FcHjZXDMxI8mM3nwhX9HlKop4C0YQvCVCdwYl2wOtE8=
22702277
golang.org/x/term v0.18.0/go.mod h1:ILwASektA3OnRv7amZ1xhE/KTR+u50pbXfZ03+6Nx58=
22712278
golang.org/x/text v0.0.0-20160726164857-2910a502d2bf/go.mod h1:NqM8EUOU14njkJ3fqMW+pc6Ldnwhi/IjpwHt7yyuwOQ=

pkg/tool/audit/README.md

+40
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,40 @@
1+
# Loki Index Auditing
2+
3+
## Usage
4+
5+
To audit your index data:
6+
1. Make sure you're authenticated to the cloud where your bucket lives in.
7+
In this example I'll be using GCP.
8+
2. Create a new YAML configuration file that defines your storage configuration.
9+
`lokitool` will use it to communicate with your data.
10+
Only TSDB is supported. Make sure you give all three fields: `schema_config`, `storage_config` and `tenant`. In this example I'm naming my file `configfile.yaml`:
11+
```yaml
12+
schema_config:
13+
configs:
14+
- from: "2023-08-21"
15+
index:
16+
period: 24h
17+
prefix: loki_env_tsdb_index_
18+
object_store: gcs
19+
schema: v13
20+
store: tsdb
21+
22+
storage_config:
23+
gcs:
24+
bucket_name: loki-bucket
25+
26+
tenant: 12345
27+
```
28+
3. Build a new `lokitool` binary:
29+
```bash
30+
go build ./cmd/lokitool
31+
```
32+
4. Finally, invoke the `audit index` command the following way:
33+
```bash
34+
./lokitool audit index --period=19856 --config.file=configfile.yaml --index.file=index/loki_env_tsdb_index_19856/12345/1715707992714992001-compactor-1715199977885-1815707796275-g8003361.tsdb.gz
35+
```
36+
The `--period` is the period of the index being audited. You can find it by checking the 5-digits number appended
37+
as a suffix of the Loki environment name in the index file. Example: For `index/loki_env_tsdb_index_19856/12345/...`,
38+
the period is 19856.
39+
The `--config.file` is the YAML configuration described in the first step.
40+
The `--index.file` is the path to the index file you want to audit. Take a look at your bucket to see its exactly path and substitute it accordingly.

pkg/tool/audit/audit.go

+133
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,133 @@
1+
package audit
2+
3+
import (
4+
"context"
5+
"fmt"
6+
"io"
7+
"path"
8+
"strings"
9+
"time"
10+
11+
"github.com/go-kit/log"
12+
"github.com/go-kit/log/level"
13+
progressbar "github.com/schollz/progressbar/v3"
14+
"go.uber.org/atomic"
15+
"golang.org/x/sync/errgroup"
16+
17+
"github.com/grafana/loki/v3/pkg/compactor"
18+
"github.com/grafana/loki/v3/pkg/compactor/retention"
19+
"github.com/grafana/loki/v3/pkg/storage"
20+
loki_storage "github.com/grafana/loki/v3/pkg/storage"
21+
"github.com/grafana/loki/v3/pkg/storage/chunk/client"
22+
indexshipper_storage "github.com/grafana/loki/v3/pkg/storage/stores/shipper/indexshipper/storage"
23+
shipperutil "github.com/grafana/loki/v3/pkg/storage/stores/shipper/indexshipper/storage"
24+
"github.com/grafana/loki/v3/pkg/storage/stores/shipper/indexshipper/tsdb"
25+
util_log "github.com/grafana/loki/v3/pkg/util/log"
26+
)
27+
28+
const (
29+
TsFormat = time.RFC3339Nano
30+
)
31+
32+
func Run(ctx context.Context, cloudIndexPath, table string, cfg Config, logger log.Logger) (int, int, error) {
33+
level.Info(logger).Log("msg", "auditing index", "index", cloudIndexPath, "table", table, "tenant", cfg.Tenant, "working_dir", cfg.WorkingDir)
34+
35+
objClient, err := GetObjectClient(cfg)
36+
if err != nil {
37+
return 0, 0, err
38+
}
39+
40+
localFile, err := DownloadIndexFile(ctx, cfg, cloudIndexPath, objClient, logger)
41+
if err != nil {
42+
return 0, 0, err
43+
}
44+
45+
compactedIdx, err := ParseCompactexIndex(ctx, localFile, table, cfg)
46+
if err != nil {
47+
return 0, 0, err
48+
}
49+
defer compactedIdx.Cleanup()
50+
51+
return ValidateCompactedIndex(ctx, objClient, compactedIdx, cfg.Concurrency, logger)
52+
}
53+
54+
func GetObjectClient(cfg Config) (client.ObjectClient, error) {
55+
periodCfg := cfg.SchemaConfig.Configs[len(cfg.SchemaConfig.Configs)-1] // only check the last period.
56+
57+
objClient, err := loki_storage.NewObjectClient(periodCfg.ObjectType, cfg.StorageConfig, storage.NewClientMetrics())
58+
if err != nil {
59+
return nil, fmt.Errorf("couldn't create object client: %w", err)
60+
}
61+
62+
return objClient, nil
63+
}
64+
65+
func DownloadIndexFile(ctx context.Context, cfg Config, cloudIndexPath string, objClient client.ObjectClient, logger log.Logger) (string, error) {
66+
splitPath := strings.Split(cloudIndexPath, "/")
67+
localFileName := splitPath[len(splitPath)-1]
68+
decompress := indexshipper_storage.IsCompressedFile(cloudIndexPath)
69+
if decompress {
70+
// get rid of the last extension, which is .gz
71+
localFileName = strings.TrimSuffix(localFileName, path.Ext(localFileName))
72+
}
73+
localFilePath := path.Join(cfg.WorkingDir, localFileName)
74+
if err := shipperutil.DownloadFileFromStorage(localFilePath, decompress, false, logger, func() (io.ReadCloser, error) {
75+
r, _, err := objClient.GetObject(ctx, cloudIndexPath)
76+
return r, err
77+
}); err != nil {
78+
return "", fmt.Errorf("couldn't download file %q from storage: %w", cloudIndexPath, err)
79+
}
80+
81+
level.Info(logger).Log("msg", "file successfully downloaded from storage", "path", cloudIndexPath)
82+
return localFileName, nil
83+
}
84+
85+
func ParseCompactexIndex(ctx context.Context, localFilePath, table string, cfg Config) (compactor.CompactedIndex, error) {
86+
periodCfg := cfg.SchemaConfig.Configs[len(cfg.SchemaConfig.Configs)-1] // only check the last period.
87+
idxCompactor := tsdb.NewIndexCompactor()
88+
compactedIdx, err := idxCompactor.OpenCompactedIndexFile(ctx, localFilePath, table, cfg.Tenant, cfg.WorkingDir, periodCfg, util_log.Logger)
89+
if err != nil {
90+
return nil, fmt.Errorf("couldn't open compacted index file %q: %w", localFilePath, err)
91+
}
92+
return compactedIdx, nil
93+
}
94+
95+
func ValidateCompactedIndex(ctx context.Context, objClient client.ObjectClient, compactedIdx compactor.CompactedIndex, parallelism int, logger log.Logger) (int, int, error) {
96+
var missingChunks, foundChunks atomic.Int32
97+
foundChunks.Store(0)
98+
missingChunks.Store(0)
99+
bar := progressbar.NewOptions(-1,
100+
progressbar.OptionShowCount(),
101+
progressbar.OptionSetDescription("Chunks validated"),
102+
)
103+
104+
g, ctx := errgroup.WithContext(ctx)
105+
g.SetLimit(parallelism)
106+
compactedIdx.ForEachChunk(ctx, func(ce retention.ChunkEntry) (deleteChunk bool, err error) { //nolint:errcheck
107+
bar.Add(1) // nolint:errcheck
108+
g.Go(func() error {
109+
exists, err := CheckChunkExistance(string(ce.ChunkID), objClient)
110+
if err != nil || !exists {
111+
missingChunks.Add(1)
112+
logger.Log("msg", "chunk is missing", "err", err, "chunk_id", string(ce.ChunkID))
113+
return nil
114+
}
115+
foundChunks.Add(1)
116+
return nil
117+
})
118+
119+
return false, nil
120+
})
121+
g.Wait() // nolint:errcheck
122+
123+
return int(foundChunks.Load()), int(missingChunks.Load()), nil
124+
}
125+
126+
func CheckChunkExistance(key string, objClient client.ObjectClient) (bool, error) {
127+
exists, err := objClient.ObjectExists(context.Background(), key)
128+
if err != nil {
129+
return false, err
130+
}
131+
132+
return exists, nil
133+
}

pkg/tool/audit/audit_test.go

+59
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,59 @@
1+
package audit
2+
3+
import (
4+
"context"
5+
"strings"
6+
"testing"
7+
8+
"github.com/go-kit/log"
9+
"github.com/stretchr/testify/require"
10+
11+
"github.com/grafana/loki/v3/pkg/compactor"
12+
"github.com/grafana/loki/v3/pkg/compactor/retention"
13+
"github.com/grafana/loki/v3/pkg/storage/chunk/client"
14+
)
15+
16+
type testObjClient struct {
17+
client.ObjectClient
18+
}
19+
20+
func (t testObjClient) ObjectExists(_ context.Context, object string) (bool, error) {
21+
if strings.Contains(object, "missing") {
22+
return false, nil
23+
}
24+
return true, nil
25+
}
26+
27+
type testCompactedIdx struct {
28+
compactor.CompactedIndex
29+
30+
chunks []retention.ChunkEntry
31+
}
32+
33+
func (t testCompactedIdx) ForEachChunk(_ context.Context, f retention.ChunkEntryCallback) error {
34+
for _, chunk := range t.chunks {
35+
if _, err := f(chunk); err != nil {
36+
return err
37+
}
38+
}
39+
return nil
40+
}
41+
42+
func TestAuditIndex(t *testing.T) {
43+
ctx := context.Background()
44+
objClient := testObjClient{}
45+
compactedIdx := testCompactedIdx{
46+
chunks: []retention.ChunkEntry{
47+
{ChunkRef: retention.ChunkRef{ChunkID: []byte("found-1")}},
48+
{ChunkRef: retention.ChunkRef{ChunkID: []byte("found-2")}},
49+
{ChunkRef: retention.ChunkRef{ChunkID: []byte("found-3")}},
50+
{ChunkRef: retention.ChunkRef{ChunkID: []byte("found-4")}},
51+
{ChunkRef: retention.ChunkRef{ChunkID: []byte("missing-1")}},
52+
},
53+
}
54+
logger := log.NewNopLogger()
55+
found, missing, err := ValidateCompactedIndex(ctx, objClient, compactedIdx, 1, logger)
56+
require.NoError(t, err)
57+
require.Equal(t, 4, found)
58+
require.Equal(t, 1, missing)
59+
}

0 commit comments

Comments
 (0)