Skip to content

Commit fc0d609

Browse files
chaudumystkfujii
authored andcommitted
fix(blooms): Handle not found metas gracefully (grafana#12853)
There is a time window between between listing metas and fetching them from object storage which could lead to a race condition that the meta is not found in object storage, because it was deleted and superseded by a newer meta. This can happen when querying recent bloom data, that is still subject to updates, and results in an error like this: ``` rpc error: code = Unknown desc = failed to get meta file bloom/tsdb_index_19843/XXXX/metas/18fbdc8500000000-1921d15dffffffff-270affee.json: storage: object doesn't exist (Trace ID: 4fe28d32cfa3e3df9495c3a5d4a683fb) ``` Signed-off-by: Christian Haudum <[email protected]>
1 parent 6926433 commit fc0d609

File tree

2 files changed

+27
-11
lines changed

2 files changed

+27
-11
lines changed

pkg/storage/stores/shipper/bloomshipper/client.go

+13-6
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,7 @@ import (
1212
"time"
1313

1414
"github.com/go-kit/log"
15+
"github.com/go-kit/log/level"
1516
"github.com/grafana/dskit/concurrency"
1617
"github.com/pkg/errors"
1718
"github.com/prometheus/common/model"
@@ -388,28 +389,34 @@ func (b *BloomClient) GetMetas(ctx context.Context, refs []MetaRef) ([]Meta, err
388389
err := concurrency.ForEachJob(ctx, len(refs), b.concurrency, func(ctx context.Context, idx int) error {
389390
meta, err := b.GetMeta(ctx, refs[idx])
390391
if err != nil {
391-
return err
392+
key := b.KeyResolver.Meta(refs[idx]).Addr()
393+
if !b.IsObjectNotFoundErr(err) {
394+
return fmt.Errorf("failed to get meta file %s: %w", key, err)
395+
}
396+
level.Error(b.logger).Log("msg", "failed to get meta file", "ref", key, "err", err)
392397
}
393398
results[idx] = meta
394399
return nil
395400
})
396401
return results, err
397402
}
398403

404+
// GetMeta fetches the meta file for given MetaRef from object storage and
405+
// decodes the JSON data into a Meta.
406+
// If the meta file is not found in storage or decoding fails, the empty Meta
407+
// is returned along with the error.
399408
func (b *BloomClient) GetMeta(ctx context.Context, ref MetaRef) (Meta, error) {
400-
meta := Meta{
401-
MetaRef: ref,
402-
}
409+
meta := Meta{MetaRef: ref}
403410
key := b.KeyResolver.Meta(ref).Addr()
404411
reader, _, err := b.client.GetObject(ctx, key)
405412
if err != nil {
406-
return Meta{}, fmt.Errorf("failed to get meta file%s: %w", key, err)
413+
return meta, err
407414
}
408415
defer reader.Close()
409416

410417
err = json.NewDecoder(reader).Decode(&meta)
411418
if err != nil {
412-
return Meta{}, fmt.Errorf("failed to decode meta file %s: %w", key, err)
419+
return meta, errors.Wrap(err, "failed to decode JSON")
413420
}
414421
return meta, nil
415422
}

pkg/storage/stores/shipper/bloomshipper/client_test.go

+14-5
Original file line numberDiff line numberDiff line change
@@ -107,11 +107,20 @@ func TestBloomClient_GetMetas(t *testing.T) {
107107
require.Equal(t, metas, []Meta{m1, m2})
108108
})
109109

110-
t.Run("does not exist", func(t *testing.T) {
111-
metas, err := c.GetMetas(ctx, []MetaRef{{}})
112-
require.Error(t, err)
113-
require.True(t, c.client.IsObjectNotFoundErr(err))
114-
require.Equal(t, metas, []Meta{{}})
110+
t.Run("does not exist - yields empty meta", func(t *testing.T) {
111+
ref := MetaRef{
112+
Ref: Ref{
113+
TenantID: "tenant",
114+
TableName: "table",
115+
Bounds: v1.FingerprintBounds{},
116+
StartTimestamp: 1000,
117+
EndTimestamp: 2000,
118+
Checksum: 1234,
119+
},
120+
}
121+
metas, err := c.GetMetas(ctx, []MetaRef{ref})
122+
require.NoError(t, err)
123+
require.Equal(t, metas, []Meta{{MetaRef: ref}})
115124
})
116125
}
117126

0 commit comments

Comments
 (0)