Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

cache: handle crash after snapshot commit #2564

Merged
merged 1 commit into from
Jan 19, 2022
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
51 changes: 36 additions & 15 deletions cache/manager.go
Original file line number Diff line number Diff line change
Expand Up @@ -410,25 +410,32 @@ func (cm *cacheManager) getRecord(ctx context.Context, id string, opts ...RefOpt

if mutableID := md.getEqualMutable(); mutableID != "" {
mutable, err := cm.getRecord(ctx, mutableID)
if err != nil {
// check loading mutable deleted record from disk
if IsNotFound(err) {
if err == nil {
rec := &cacheRecord{
mu: &sync.Mutex{},
cm: cm,
refs: make(map[ref]struct{}),
parentRefs: parents,
cacheMetadata: md,
equalMutable: &mutableRef{cacheRecord: mutable},
}
mutable.equalImmutable = &immutableRef{cacheRecord: rec}
cm.records[id] = rec
return rec, nil
} else if IsNotFound(err) {
// The equal mutable for this ref is not found, check to see if our snapshot exists
if _, statErr := cm.Snapshotter.Stat(ctx, md.getSnapshotID()); statErr != nil {
// this ref's snapshot also doesn't exist, just remove this record
cm.MetadataStore.Clear(id)
return nil, errors.Wrap(errNotFound, id)
}
// Our snapshot exists, so there may have been a crash while finalizing this ref.
// Clear the equal mutable field and continue using this ref.
md.clearEqualMutable()
md.commitMetadata()
} else {
return nil, err
}

rec := &cacheRecord{
mu: &sync.Mutex{},
cm: cm,
refs: make(map[ref]struct{}),
parentRefs: parents,
cacheMetadata: md,
equalMutable: &mutableRef{cacheRecord: mutable},
}
mutable.equalImmutable = &immutableRef{cacheRecord: rec}
cm.records[id] = rec
return rec, nil
}

rec := &cacheRecord{
Expand All @@ -448,6 +455,20 @@ func (cm *cacheManager) getRecord(ctx context.Context, id string, opts ...RefOpt
return nil, errors.Wrapf(errNotFound, "failed to get deleted record %s", id)
}

if rec.mutable {
// If the record is mutable, then the snapshot must exist
if _, err := cm.Snapshotter.Stat(ctx, rec.ID()); err != nil {
if !errdefs.IsNotFound(err) {
return nil, errors.Wrap(err, "failed to check mutable ref snapshot")
}
// the snapshot doesn't exist, clear this record
if err := rec.remove(ctx, true); err != nil {
return nil, errors.Wrap(err, "failed to remove mutable rec with missing snapshot")
}
return nil, errors.Wrap(errNotFound, rec.ID())
}
}

if err := initializeMetadata(rec.cacheMetadata, rec.parentRefs, opts...); err != nil {
return nil, err
}
Expand Down
81 changes: 81 additions & 0 deletions cache/manager_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -1684,6 +1684,87 @@ func TestMergeOp(t *testing.T) {
checkDiskUsage(ctx, t, cm, 0, 0)
}

func TestLoadHalfFinalizedRef(t *testing.T) {
// This test simulates the situation where a ref w/ an equalMutable has its
// snapshot committed but there is a crash before the metadata is updated to
// clear the equalMutable field. It's expected that the mutable will be
// removed and the immutable ref will continue to be usable.
t.Parallel()

ctx := namespaces.WithNamespace(context.Background(), "buildkit-test")

tmpdir, err := ioutil.TempDir("", "cachemanager")
require.NoError(t, err)
defer os.RemoveAll(tmpdir)

snapshotter, err := native.NewSnapshotter(filepath.Join(tmpdir, "snapshots"))
require.NoError(t, err)

co, cleanup, err := newCacheManager(ctx, cmOpt{
tmpdir: tmpdir,
snapshotter: snapshotter,
snapshotterName: "native",
})
require.NoError(t, err)
defer cleanup()
cm := co.manager.(*cacheManager)

mref, err := cm.New(ctx, nil, nil, CachePolicyRetain)
require.NoError(t, err)
mutRef := mref.(*mutableRef)

iref, err := mutRef.Commit(ctx)
require.NoError(t, err)
immutRef := iref.(*immutableRef)

require.NoError(t, mref.Release(ctx))

_, err = co.lm.Create(ctx, func(l *leases.Lease) error {
l.ID = immutRef.ID()
l.Labels = map[string]string{
"containerd.io/gc.flat": time.Now().UTC().Format(time.RFC3339Nano),
}
return nil
})
require.NoError(t, err)
err = co.lm.AddResource(ctx, leases.Lease{ID: immutRef.ID()}, leases.Resource{
ID: immutRef.getSnapshotID(),
Type: "snapshots/" + cm.Snapshotter.Name(),
})
require.NoError(t, err)

err = cm.Snapshotter.Commit(ctx, immutRef.getSnapshotID(), mutRef.getSnapshotID())
require.NoError(t, err)

_, err = cm.Snapshotter.Stat(ctx, mutRef.getSnapshotID())
require.Error(t, err)

require.NoError(t, iref.Release(ctx))

require.NoError(t, cm.Close())
require.NoError(t, cleanup())

co, cleanup, err = newCacheManager(ctx, cmOpt{
tmpdir: tmpdir,
snapshotter: snapshotter,
snapshotterName: "native",
})
require.NoError(t, err)
defer cleanup()
cm = co.manager.(*cacheManager)

_, err = cm.GetMutable(ctx, mutRef.ID())
require.ErrorIs(t, err, errNotFound)

iref, err = cm.Get(ctx, immutRef.ID())
require.NoError(t, err)
require.NoError(t, iref.Finalize(ctx))
immutRef = iref.(*immutableRef)

_, err = cm.Snapshotter.Stat(ctx, immutRef.getSnapshotID())
require.NoError(t, err)
}

func TestMountReadOnly(t *testing.T) {
t.Parallel()
if runtime.GOOS != "linux" {
Expand Down