From ff87ad884c15ac64d1d3c34972ff6b0c040e59a7 Mon Sep 17 00:00:00 2001 From: Jesse Haka Date: Tue, 6 Jun 2017 11:02:47 +0300 Subject: [PATCH] add possibility to clean untagged manifests add tests add possibility to clean untagged manifests Signed-off-by: Jesse Haka add dry tests Signed-off-by: Jesse Haka remove underscores Signed-off-by: Jesse Haka fixes Signed-off-by: Jesse Haka opts struct+use camelcase Signed-off-by: Jesse Haka doublecheck manifest in paths.go Signed-off-by: Jesse Haka add gofmt Signed-off-by: Jesse Haka fix lint Signed-off-by: Jesse Haka add log print Signed-off-by: Jesse Haka move log to dryrun as well Signed-off-by: Jesse Haka remove counter Signed-off-by: Jesse Haka remove manifest tag references Signed-off-by: Jesse Haka add tag to tests Signed-off-by: Jesse Haka manifestsWithoutTags -> removeUntagged Signed-off-by: Jesse Haka remove RemoveManifestTagReferences and use removemanifests Signed-off-by: Jesse Haka remove comment Signed-off-by: Jesse Haka remove pathfor Signed-off-by: Jesse Haka move removemanifest out of manifestenumerator, it does not work correctly if we delete stuff in it Signed-off-by: Jesse Haka add comment Signed-off-by: Jesse Haka fix context -> dcontext Signed-off-by: Jesse Haka fix gofmt --- registry/root.go | 7 +- registry/storage/garbagecollect.go | 50 ++++++++- registry/storage/garbagecollect_test.go | 133 +++++++++++++++++++++++- registry/storage/vacuum.go | 34 ++++++ 4 files changed, 214 insertions(+), 10 deletions(-) diff --git a/registry/root.go b/registry/root.go index 538139f1..94d22f3c 100644 --- a/registry/root.go +++ b/registry/root.go @@ -18,6 +18,7 @@ func init() { RootCmd.AddCommand(ServeCmd) RootCmd.AddCommand(GCCmd) GCCmd.Flags().BoolVarP(&dryRun, "dry-run", "d", false, "do everything except remove the blobs") + GCCmd.Flags().BoolVarP(&removeUntagged, "delete-untagged", "m", false, "delete manifests that are not currently referenced via tag") RootCmd.Flags().BoolVarP(&showVersion, "version", "v", false, "show the version and exit") } @@ -36,6 +37,7 @@ var RootCmd = &cobra.Command{ } var dryRun bool +var removeUntagged bool // GCCmd is the cobra command that corresponds to the garbage-collect subcommand var GCCmd = &cobra.Command{ @@ -75,7 +77,10 @@ var GCCmd = &cobra.Command{ os.Exit(1) } - err = storage.MarkAndSweep(ctx, driver, registry, dryRun) + err = storage.MarkAndSweep(ctx, driver, registry, storage.GCOpts{ + DryRun: dryRun, + RemoveUntagged: removeUntagged, + }) if err != nil { fmt.Fprintf(os.Stderr, "failed to garbage collect: %v", err) os.Exit(1) diff --git a/registry/storage/garbagecollect.go b/registry/storage/garbagecollect.go index ada48654..48d428a8 100644 --- a/registry/storage/garbagecollect.go +++ b/registry/storage/garbagecollect.go @@ -14,8 +14,21 @@ func emit(format string, a ...interface{}) { fmt.Printf(format+"\n", a...) } +// GCOpts contains options for garbage collector +type GCOpts struct { + DryRun bool + RemoveUntagged bool +} + +// ManifestDel contains manifest structure which will be deleted +type ManifestDel struct { + Name string + Digest digest.Digest + Tags []string +} + // MarkAndSweep performs a mark and sweep of registry data -func MarkAndSweep(ctx context.Context, storageDriver driver.StorageDriver, registry distribution.Namespace, dryRun bool) error { +func MarkAndSweep(ctx context.Context, storageDriver driver.StorageDriver, registry distribution.Namespace, opts GCOpts) error { repositoryEnumerator, ok := registry.(distribution.RepositoryEnumerator) if !ok { return fmt.Errorf("unable to convert Namespace to RepositoryEnumerator") @@ -23,6 +36,7 @@ func MarkAndSweep(ctx context.Context, storageDriver driver.StorageDriver, regis // mark markSet := make(map[digest.Digest]struct{}) + manifestArr := make([]ManifestDel, 0) err := repositoryEnumerator.Enumerate(ctx, func(repoName string) error { emit(repoName) @@ -47,6 +61,25 @@ func MarkAndSweep(ctx context.Context, storageDriver driver.StorageDriver, regis } err = manifestEnumerator.Enumerate(ctx, func(dgst digest.Digest) error { + if opts.RemoveUntagged { + // fetch all tags where this manifest is the latest one + tags, err := repository.Tags(ctx).Lookup(ctx, distribution.Descriptor{Digest: dgst}) + if err != nil { + return fmt.Errorf("failed to retrieve tags for digest %v: %v", dgst, err) + } + if len(tags) == 0 { + emit("manifest eligible for deletion: %s", dgst) + // fetch all tags from repository + // all of these tags could contain manifest in history + // which means that we need check (and delete) those references when deleting manifest + allTags, err := repository.Tags(ctx).All(ctx) + if err != nil { + return fmt.Errorf("failed to retrieve tags %v", err) + } + manifestArr = append(manifestArr, ManifestDel{Name: repoName, Digest: dgst, Tags: allTags}) + return nil + } + } // Mark the manifest's blob emit("%s: marking manifest %s ", repoName, dgst) markSet[dgst] = struct{}{} @@ -84,6 +117,15 @@ func MarkAndSweep(ctx context.Context, storageDriver driver.StorageDriver, regis } // sweep + vacuum := NewVacuum(ctx, storageDriver) + if !opts.DryRun { + for _, obj := range manifestArr { + err = vacuum.RemoveManifest(obj.Name, obj.Digest, obj.Tags) + if err != nil { + return fmt.Errorf("failed to delete manifest %s: %v", obj.Digest, err) + } + } + } blobService := registry.Blobs() deleteSet := make(map[digest.Digest]struct{}) err = blobService.Enumerate(ctx, func(dgst digest.Digest) error { @@ -96,12 +138,10 @@ func MarkAndSweep(ctx context.Context, storageDriver driver.StorageDriver, regis if err != nil { return fmt.Errorf("error enumerating blobs: %v", err) } - emit("\n%d blobs marked, %d blobs eligible for deletion", len(markSet), len(deleteSet)) - // Construct vacuum - vacuum := NewVacuum(ctx, storageDriver) + emit("\n%d blobs marked, %d blobs and %d manifests eligible for deletion", len(markSet), len(deleteSet), len(manifestArr)) for dgst := range deleteSet { emit("blob eligible for deletion: %s", dgst) - if dryRun { + if opts.DryRun { continue } err = vacuum.RemoveBlob(string(dgst)) diff --git a/registry/storage/garbagecollect_test.go b/registry/storage/garbagecollect_test.go index 2e36fddb..1694f891 100644 --- a/registry/storage/garbagecollect_test.go +++ b/registry/storage/garbagecollect_test.go @@ -61,6 +61,23 @@ func makeManifestService(t *testing.T, repository distribution.Repository) distr return manifestService } +func allManifests(t *testing.T, manifestService distribution.ManifestService) map[digest.Digest]struct{} { + ctx := context.Background() + allManMap := make(map[digest.Digest]struct{}) + manifestEnumerator, ok := manifestService.(distribution.ManifestEnumerator) + if !ok { + t.Fatalf("unable to convert ManifestService into ManifestEnumerator") + } + err := manifestEnumerator.Enumerate(ctx, func(dgst digest.Digest) error { + allManMap[dgst] = struct{}{} + return nil + }) + if err != nil { + t.Fatalf("Error getting all manifests: %v", err) + } + return allManMap +} + func allBlobs(t *testing.T, registry distribution.Namespace) map[digest.Digest]struct{} { ctx := context.Background() blobService := registry.Blobs() @@ -169,7 +186,10 @@ func TestNoDeletionNoEffect(t *testing.T) { before := allBlobs(t, registry) // Run GC - err = MarkAndSweep(context.Background(), inmemoryDriver, registry, false) + err = MarkAndSweep(context.Background(), inmemoryDriver, registry, GCOpts{ + DryRun: false, + RemoveUntagged: false, + }) if err != nil { t.Fatalf("Failed mark and sweep: %v", err) } @@ -180,6 +200,102 @@ func TestNoDeletionNoEffect(t *testing.T) { } } +func TestDeleteManifestIfTagNotFound(t *testing.T) { + ctx := context.Background() + inmemoryDriver := inmemory.New() + + registry := createRegistry(t, inmemoryDriver) + repo := makeRepository(t, registry, "deletemanifests") + manifestService, err := repo.Manifests(ctx) + + // Create random layers + randomLayers1, err := testutil.CreateRandomLayers(3) + if err != nil { + t.Fatalf("failed to make layers: %v", err) + } + + randomLayers2, err := testutil.CreateRandomLayers(3) + if err != nil { + t.Fatalf("failed to make layers: %v", err) + } + + // Upload all layers + err = testutil.UploadBlobs(repo, randomLayers1) + if err != nil { + t.Fatalf("failed to upload layers: %v", err) + } + + err = testutil.UploadBlobs(repo, randomLayers2) + if err != nil { + t.Fatalf("failed to upload layers: %v", err) + } + + // Construct manifests + manifest1, err := testutil.MakeSchema1Manifest(getKeys(randomLayers1)) + if err != nil { + t.Fatalf("failed to make manifest: %v", err) + } + + manifest2, err := testutil.MakeSchema1Manifest(getKeys(randomLayers2)) + if err != nil { + t.Fatalf("failed to make manifest: %v", err) + } + + _, err = manifestService.Put(ctx, manifest1) + if err != nil { + t.Fatalf("manifest upload failed: %v", err) + } + + _, err = manifestService.Put(ctx, manifest2) + if err != nil { + t.Fatalf("manifest upload failed: %v", err) + } + + manifestEnumerator, _ := manifestService.(distribution.ManifestEnumerator) + err = manifestEnumerator.Enumerate(ctx, func(dgst digest.Digest) error { + repo.Tags(ctx).Tag(ctx, "test", distribution.Descriptor{Digest: dgst}) + return nil + }) + + before1 := allBlobs(t, registry) + before2 := allManifests(t, manifestService) + + // run GC with dry-run (should not remove anything) + err = MarkAndSweep(context.Background(), inmemoryDriver, registry, GCOpts{ + DryRun: true, + RemoveUntagged: true, + }) + if err != nil { + t.Fatalf("Failed mark and sweep: %v", err) + } + afterDry1 := allBlobs(t, registry) + afterDry2 := allManifests(t, manifestService) + if len(before1) != len(afterDry1) { + t.Fatalf("Garbage collection affected blobs storage: %d != %d", len(before1), len(afterDry1)) + } + if len(before2) != len(afterDry2) { + t.Fatalf("Garbage collection affected manifest storage: %d != %d", len(before2), len(afterDry2)) + } + + // Run GC (removes everything because no manifests with tags exist) + err = MarkAndSweep(context.Background(), inmemoryDriver, registry, GCOpts{ + DryRun: false, + RemoveUntagged: true, + }) + if err != nil { + t.Fatalf("Failed mark and sweep: %v", err) + } + + after1 := allBlobs(t, registry) + after2 := allManifests(t, manifestService) + if len(before1) == len(after1) { + t.Fatalf("Garbage collection affected blobs storage: %d == %d", len(before1), len(after1)) + } + if len(before2) == len(after2) { + t.Fatalf("Garbage collection affected manifest storage: %d == %d", len(before2), len(after2)) + } +} + func TestGCWithMissingManifests(t *testing.T) { ctx := context.Background() d := inmemory.New() @@ -200,7 +316,10 @@ func TestGCWithMissingManifests(t *testing.T) { t.Fatal(err) } - err = MarkAndSweep(context.Background(), d, registry, false) + err = MarkAndSweep(context.Background(), d, registry, GCOpts{ + DryRun: false, + RemoveUntagged: false, + }) if err != nil { t.Fatalf("Failed mark and sweep: %v", err) } @@ -227,7 +346,10 @@ func TestDeletionHasEffect(t *testing.T) { manifests.Delete(ctx, image3.manifestDigest) // Run GC - err = MarkAndSweep(context.Background(), inmemoryDriver, registry, false) + err = MarkAndSweep(context.Background(), inmemoryDriver, registry, GCOpts{ + DryRun: false, + RemoveUntagged: false, + }) if err != nil { t.Fatalf("Failed mark and sweep: %v", err) } @@ -361,7 +483,10 @@ func TestOrphanBlobDeleted(t *testing.T) { uploadRandomSchema2Image(t, repo) // Run GC - err = MarkAndSweep(context.Background(), inmemoryDriver, registry, false) + err = MarkAndSweep(context.Background(), inmemoryDriver, registry, GCOpts{ + DryRun: false, + RemoveUntagged: false, + }) if err != nil { t.Fatalf("Failed mark and sweep: %v", err) } diff --git a/registry/storage/vacuum.go b/registry/storage/vacuum.go index d2ebc539..a43db17a 100644 --- a/registry/storage/vacuum.go +++ b/registry/storage/vacuum.go @@ -50,6 +50,40 @@ func (v Vacuum) RemoveBlob(dgst string) error { return nil } +// RemoveManifest removes a manifest from the filesystem +func (v Vacuum) RemoveManifest(name string, dgst digest.Digest, tags []string) error { + // remove a tag manifest reference, in case of not found continue to next one + for _, tag := range tags { + + tagsPath, err := pathFor(manifestTagIndexEntryPathSpec{name: name, revision: dgst, tag: tag}) + if err != nil { + return err + } + + _, err = v.driver.Stat(v.ctx, tagsPath) + if err != nil { + switch err := err.(type) { + case driver.PathNotFoundError: + continue + default: + return err + } + } + dcontext.GetLogger(v.ctx).Infof("deleting manifest tag reference: %s", tagsPath) + err = v.driver.Delete(v.ctx, tagsPath) + if err != nil { + return err + } + } + + manifestPath, err := pathFor(manifestRevisionPathSpec{name: name, revision: dgst}) + if err != nil { + return err + } + dcontext.GetLogger(v.ctx).Infof("deleting manifest: %s", manifestPath) + return v.driver.Delete(v.ctx, manifestPath) +} + // RemoveRepository removes a repository directory from the // filesystem func (v Vacuum) RemoveRepository(repoName string) error {