Skip to content

Commit

Permalink
GC: add support for GC policies
Browse files Browse the repository at this point in the history
For our dist-spec implementation [1][2],
we have a use case where we use umoci's GC code to clean up orphaned
blobs. The issue is that as per dist-spec layer upload and manifest
updates are two different API calls and typically in that order, which
means layers begin life as orphans.

Adding the proposed patch allows us to:
a) mitigate the above issue
b) have a generic policy framework for future expansion

Note that GC() now uses Paths() instead of Reachable()

[1] https://github.com/opencontainers/distribution-spec
[2] https://github.com/anuvu/zot

Signed-off-by: Ramkumar Chinchani <[email protected]>
  • Loading branch information
rchincha committed Jun 30, 2020
1 parent cd9e9b1 commit d373e90
Show file tree
Hide file tree
Showing 2 changed files with 165 additions and 5 deletions.
28 changes: 23 additions & 5 deletions oci/casext/gc.go
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,9 @@ import (
"golang.org/x/net/context"
)

// GCPolicy is a policy function that returns if a blob can be GC'ed
type GCPolicy func(desc ispec.Descriptor) (bool, error)

// GC will perform a mark-and-sweep garbage collection of the OCI image
// referenced by the given CAS engine. The root set is taken to be the set of
// references stored in the image, and all blobs not reachable by following a
Expand All @@ -35,7 +38,7 @@ import (
// functions. In other words, it assumes it is the only user of the image that
// is making modifications. Things will not go well if this assumption is
// challenged.
func (e Engine) GC(ctx context.Context) error {
func (e Engine) GC(ctx context.Context, policies ...GCPolicy) error {
// Generate the root set of descriptors.
var root []ispec.Descriptor

Expand All @@ -52,18 +55,18 @@ func (e Engine) GC(ctx context.Context) error {
}

// Mark from the root sets.
black := map[digest.Digest]struct{}{}
black := map[digest.Digest]ispec.Descriptor{}
for idx, descriptor := range root {
log.WithFields(log.Fields{
"digest": descriptor.Digest,
}).Debugf("GC: marking from root")

reachables, err := e.Reachable(ctx, descriptor)
reachables, err := e.Paths(ctx, descriptor)
if err != nil {
return errors.Wrapf(err, "getting reachables from root %d", idx)
}
for _, reachable := range reachables {
black[reachable] = struct{}{}
black[reachable.Descriptor().Digest] = reachable.Descriptor()
}
}

Expand All @@ -74,11 +77,26 @@ func (e Engine) GC(ctx context.Context) error {
}

n := 0
sweep:
for _, digest := range blobs {
if _, ok := black[digest]; ok {
desc, ok := black[digest]
if ok {
// Digest is in the black set.
continue
}

// Iterate over all policy funcs and if any one of them indicates that GC
// must not be performed, skip that blob
for _, policy := range policies {
ok, err := policy(desc)
if err != nil {
return errors.Wrap(err, "invoking policy failed")
}
if !ok {
continue sweep
}
}

log.Infof("garbage collecting blob: %s", digest)

if err := e.DeleteBlob(ctx, digest); err != nil {
Expand Down
142 changes: 142 additions & 0 deletions oci/casext/gc_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@ package casext
import (
"bytes"
"encoding/json"
"fmt"
"io/ioutil"
"os"
"path/filepath"
Expand Down Expand Up @@ -175,3 +176,144 @@ func TestGCWithNonEmptyIndex(t *testing.T) {
t.Fatalf("expected single-entry blob list after GC")
}
}

func gcOKFunc(desc ispec.Descriptor) (bool, error) {
return true, nil
}

func gcSkipFunc(desc ispec.Descriptor) (bool, error) {
return false, nil
}

func errFunc(desc ispec.Descriptor) (bool, error) {
return false, fmt.Errorf("err policy")
}

func TestGCWithPolicy(t *testing.T) {
ctx := context.Background()

root, err := ioutil.TempDir("", "umoci-TestEngineReference")
if err != nil {
t.Fatal(err)
}
defer os.RemoveAll(root)

image := filepath.Join(root, "image")
if err := dir.Create(image); err != nil {
t.Fatalf("unexpected error creating image: %+v", err)
}

engine, err := dir.Open(image)
if err != nil {
t.Fatalf("unexpected error opening image: %+v", err)
}
engineExt := NewEngine(engine)
defer engine.Close()

// creates an empty index.json and several orphan blobs which should be pruned
descMap, err := fakeSetupEngine(t, engineExt)
if err != nil {
t.Fatalf("unexpected error doing fakeSetupEngine: %+v", err)
}
if descMap == nil {
t.Fatalf("empty descMap")
}

b, err := engine.ListBlobs(ctx)
if err != nil {
t.Fatalf("unable to list blobs: %+v", err)
}
if len(b) == 0 {
t.Fatalf("expected non-empty blob list before GC")
}

// build a blob, manifest, index that will survive GC
content := "this is a test blob"
br := strings.NewReader(content)
digest, size, err := engine.PutBlob(ctx, br)
if err != nil {
t.Fatalf("error writing blob: %+v", err)
}
if size != int64(len(content)) {
t.Fatalf("partially written blob")
}

m := ispec.Manifest{
Versioned: imeta.Versioned{
SchemaVersion: 2,
},
Config: ispec.Descriptor{
MediaType: ispec.MediaTypeImageIndex,
Digest: digest,
Size: size,
},
Layers: []ispec.Descriptor{
{
MediaType: ispec.MediaTypeImageIndex,
Digest: digest,
Size: size,
},
},
}
data, err := json.Marshal(&m)
if err != nil {
t.Fatalf("error marshaling json: %+v", err)
}
mr := bytes.NewReader(data)
digest, size, err = engine.PutBlob(ctx, mr)
if err != nil {
t.Fatalf("error writing blob: %+v", err)
}
if size != int64(len(data)) {
t.Fatalf("partially written blob")
}

idx := ispec.Index{
Versioned: imeta.Versioned{
SchemaVersion: 2,
},
Manifests: []ispec.Descriptor{
{
MediaType: ispec.MediaTypeImageIndex,
Digest: digest,
Size: size,
},
},
}
if err := engine.PutIndex(ctx, idx); err != nil {
t.Fatalf("error writing index: %+v", err)
}

err = engineExt.GC(ctx, errFunc)
// expect this to fail
if err == nil {
t.Fatalf("GC failed: %+v", err)
}

err = engineExt.GC(ctx, gcSkipFunc)
// expect this to succeed but not perform GC
if err != nil {
t.Fatalf("GC failed: %+v", err)
}
b, err = engine.ListBlobs(ctx)
if err != nil {
t.Fatalf("unable to list blobs: %+v", err)
}
if len(b) == 1 {
t.Fatalf("didn't expect single-entry blob list after skip GC policy")
}

err = engineExt.GC(ctx, gcOKFunc)
// expect this to succeed
if err != nil {
t.Fatalf("GC failed: %+v", err)
}

b, err = engine.ListBlobs(ctx)
if err != nil {
t.Fatalf("unable to list blobs: %+v", err)
}
if len(b) != 1 {
t.Fatalf("expected single-entry blob list after GC")
}
}

0 comments on commit d373e90

Please sign in to comment.