Merge pull request #2302 from zetaab/oma

add possibility to clean untagged manifests
This commit is contained in:
Derek McGowan 2018-01-05 15:27:52 -08:00 committed by GitHub
commit 277ed486c9
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
4 changed files with 214 additions and 10 deletions

View file

@ -18,6 +18,7 @@ func init() {
RootCmd.AddCommand(ServeCmd) RootCmd.AddCommand(ServeCmd)
RootCmd.AddCommand(GCCmd) RootCmd.AddCommand(GCCmd)
GCCmd.Flags().BoolVarP(&dryRun, "dry-run", "d", false, "do everything except remove the blobs") GCCmd.Flags().BoolVarP(&dryRun, "dry-run", "d", false, "do everything except remove the blobs")
GCCmd.Flags().BoolVarP(&removeUntagged, "delete-untagged", "m", false, "delete manifests that are not currently referenced via tag")
RootCmd.Flags().BoolVarP(&showVersion, "version", "v", false, "show the version and exit") RootCmd.Flags().BoolVarP(&showVersion, "version", "v", false, "show the version and exit")
} }
@ -36,6 +37,7 @@ var RootCmd = &cobra.Command{
} }
var dryRun bool var dryRun bool
var removeUntagged bool
// GCCmd is the cobra command that corresponds to the garbage-collect subcommand // GCCmd is the cobra command that corresponds to the garbage-collect subcommand
var GCCmd = &cobra.Command{ var GCCmd = &cobra.Command{
@ -75,7 +77,10 @@ var GCCmd = &cobra.Command{
os.Exit(1) os.Exit(1)
} }
err = storage.MarkAndSweep(ctx, driver, registry, dryRun) err = storage.MarkAndSweep(ctx, driver, registry, storage.GCOpts{
DryRun: dryRun,
RemoveUntagged: removeUntagged,
})
if err != nil { if err != nil {
fmt.Fprintf(os.Stderr, "failed to garbage collect: %v", err) fmt.Fprintf(os.Stderr, "failed to garbage collect: %v", err)
os.Exit(1) os.Exit(1)

View file

@ -14,8 +14,21 @@ func emit(format string, a ...interface{}) {
fmt.Printf(format+"\n", a...) fmt.Printf(format+"\n", a...)
} }
// GCOpts contains options for garbage collector
type GCOpts struct {
DryRun bool
RemoveUntagged bool
}
// ManifestDel contains manifest structure which will be deleted
type ManifestDel struct {
Name string
Digest digest.Digest
Tags []string
}
// MarkAndSweep performs a mark and sweep of registry data // MarkAndSweep performs a mark and sweep of registry data
func MarkAndSweep(ctx context.Context, storageDriver driver.StorageDriver, registry distribution.Namespace, dryRun bool) error { func MarkAndSweep(ctx context.Context, storageDriver driver.StorageDriver, registry distribution.Namespace, opts GCOpts) error {
repositoryEnumerator, ok := registry.(distribution.RepositoryEnumerator) repositoryEnumerator, ok := registry.(distribution.RepositoryEnumerator)
if !ok { if !ok {
return fmt.Errorf("unable to convert Namespace to RepositoryEnumerator") return fmt.Errorf("unable to convert Namespace to RepositoryEnumerator")
@ -23,6 +36,7 @@ func MarkAndSweep(ctx context.Context, storageDriver driver.StorageDriver, regis
// mark // mark
markSet := make(map[digest.Digest]struct{}) markSet := make(map[digest.Digest]struct{})
manifestArr := make([]ManifestDel, 0)
err := repositoryEnumerator.Enumerate(ctx, func(repoName string) error { err := repositoryEnumerator.Enumerate(ctx, func(repoName string) error {
emit(repoName) emit(repoName)
@ -47,6 +61,25 @@ func MarkAndSweep(ctx context.Context, storageDriver driver.StorageDriver, regis
} }
err = manifestEnumerator.Enumerate(ctx, func(dgst digest.Digest) error { err = manifestEnumerator.Enumerate(ctx, func(dgst digest.Digest) error {
if opts.RemoveUntagged {
// fetch all tags where this manifest is the latest one
tags, err := repository.Tags(ctx).Lookup(ctx, distribution.Descriptor{Digest: dgst})
if err != nil {
return fmt.Errorf("failed to retrieve tags for digest %v: %v", dgst, err)
}
if len(tags) == 0 {
emit("manifest eligible for deletion: %s", dgst)
// fetch all tags from repository
// all of these tags could contain manifest in history
// which means that we need check (and delete) those references when deleting manifest
allTags, err := repository.Tags(ctx).All(ctx)
if err != nil {
return fmt.Errorf("failed to retrieve tags %v", err)
}
manifestArr = append(manifestArr, ManifestDel{Name: repoName, Digest: dgst, Tags: allTags})
return nil
}
}
// Mark the manifest's blob // Mark the manifest's blob
emit("%s: marking manifest %s ", repoName, dgst) emit("%s: marking manifest %s ", repoName, dgst)
markSet[dgst] = struct{}{} markSet[dgst] = struct{}{}
@ -84,6 +117,15 @@ func MarkAndSweep(ctx context.Context, storageDriver driver.StorageDriver, regis
} }
// sweep // sweep
vacuum := NewVacuum(ctx, storageDriver)
if !opts.DryRun {
for _, obj := range manifestArr {
err = vacuum.RemoveManifest(obj.Name, obj.Digest, obj.Tags)
if err != nil {
return fmt.Errorf("failed to delete manifest %s: %v", obj.Digest, err)
}
}
}
blobService := registry.Blobs() blobService := registry.Blobs()
deleteSet := make(map[digest.Digest]struct{}) deleteSet := make(map[digest.Digest]struct{})
err = blobService.Enumerate(ctx, func(dgst digest.Digest) error { err = blobService.Enumerate(ctx, func(dgst digest.Digest) error {
@ -96,12 +138,10 @@ func MarkAndSweep(ctx context.Context, storageDriver driver.StorageDriver, regis
if err != nil { if err != nil {
return fmt.Errorf("error enumerating blobs: %v", err) return fmt.Errorf("error enumerating blobs: %v", err)
} }
emit("\n%d blobs marked, %d blobs eligible for deletion", len(markSet), len(deleteSet)) emit("\n%d blobs marked, %d blobs and %d manifests eligible for deletion", len(markSet), len(deleteSet), len(manifestArr))
// Construct vacuum
vacuum := NewVacuum(ctx, storageDriver)
for dgst := range deleteSet { for dgst := range deleteSet {
emit("blob eligible for deletion: %s", dgst) emit("blob eligible for deletion: %s", dgst)
if dryRun { if opts.DryRun {
continue continue
} }
err = vacuum.RemoveBlob(string(dgst)) err = vacuum.RemoveBlob(string(dgst))

View file

@ -61,6 +61,23 @@ func makeManifestService(t *testing.T, repository distribution.Repository) distr
return manifestService return manifestService
} }
func allManifests(t *testing.T, manifestService distribution.ManifestService) map[digest.Digest]struct{} {
ctx := context.Background()
allManMap := make(map[digest.Digest]struct{})
manifestEnumerator, ok := manifestService.(distribution.ManifestEnumerator)
if !ok {
t.Fatalf("unable to convert ManifestService into ManifestEnumerator")
}
err := manifestEnumerator.Enumerate(ctx, func(dgst digest.Digest) error {
allManMap[dgst] = struct{}{}
return nil
})
if err != nil {
t.Fatalf("Error getting all manifests: %v", err)
}
return allManMap
}
func allBlobs(t *testing.T, registry distribution.Namespace) map[digest.Digest]struct{} { func allBlobs(t *testing.T, registry distribution.Namespace) map[digest.Digest]struct{} {
ctx := context.Background() ctx := context.Background()
blobService := registry.Blobs() blobService := registry.Blobs()
@ -169,7 +186,10 @@ func TestNoDeletionNoEffect(t *testing.T) {
before := allBlobs(t, registry) before := allBlobs(t, registry)
// Run GC // Run GC
err = MarkAndSweep(context.Background(), inmemoryDriver, registry, false) err = MarkAndSweep(context.Background(), inmemoryDriver, registry, GCOpts{
DryRun: false,
RemoveUntagged: false,
})
if err != nil { if err != nil {
t.Fatalf("Failed mark and sweep: %v", err) t.Fatalf("Failed mark and sweep: %v", err)
} }
@ -180,6 +200,102 @@ func TestNoDeletionNoEffect(t *testing.T) {
} }
} }
func TestDeleteManifestIfTagNotFound(t *testing.T) {
ctx := context.Background()
inmemoryDriver := inmemory.New()
registry := createRegistry(t, inmemoryDriver)
repo := makeRepository(t, registry, "deletemanifests")
manifestService, err := repo.Manifests(ctx)
// Create random layers
randomLayers1, err := testutil.CreateRandomLayers(3)
if err != nil {
t.Fatalf("failed to make layers: %v", err)
}
randomLayers2, err := testutil.CreateRandomLayers(3)
if err != nil {
t.Fatalf("failed to make layers: %v", err)
}
// Upload all layers
err = testutil.UploadBlobs(repo, randomLayers1)
if err != nil {
t.Fatalf("failed to upload layers: %v", err)
}
err = testutil.UploadBlobs(repo, randomLayers2)
if err != nil {
t.Fatalf("failed to upload layers: %v", err)
}
// Construct manifests
manifest1, err := testutil.MakeSchema1Manifest(getKeys(randomLayers1))
if err != nil {
t.Fatalf("failed to make manifest: %v", err)
}
manifest2, err := testutil.MakeSchema1Manifest(getKeys(randomLayers2))
if err != nil {
t.Fatalf("failed to make manifest: %v", err)
}
_, err = manifestService.Put(ctx, manifest1)
if err != nil {
t.Fatalf("manifest upload failed: %v", err)
}
_, err = manifestService.Put(ctx, manifest2)
if err != nil {
t.Fatalf("manifest upload failed: %v", err)
}
manifestEnumerator, _ := manifestService.(distribution.ManifestEnumerator)
err = manifestEnumerator.Enumerate(ctx, func(dgst digest.Digest) error {
repo.Tags(ctx).Tag(ctx, "test", distribution.Descriptor{Digest: dgst})
return nil
})
before1 := allBlobs(t, registry)
before2 := allManifests(t, manifestService)
// run GC with dry-run (should not remove anything)
err = MarkAndSweep(context.Background(), inmemoryDriver, registry, GCOpts{
DryRun: true,
RemoveUntagged: true,
})
if err != nil {
t.Fatalf("Failed mark and sweep: %v", err)
}
afterDry1 := allBlobs(t, registry)
afterDry2 := allManifests(t, manifestService)
if len(before1) != len(afterDry1) {
t.Fatalf("Garbage collection affected blobs storage: %d != %d", len(before1), len(afterDry1))
}
if len(before2) != len(afterDry2) {
t.Fatalf("Garbage collection affected manifest storage: %d != %d", len(before2), len(afterDry2))
}
// Run GC (removes everything because no manifests with tags exist)
err = MarkAndSweep(context.Background(), inmemoryDriver, registry, GCOpts{
DryRun: false,
RemoveUntagged: true,
})
if err != nil {
t.Fatalf("Failed mark and sweep: %v", err)
}
after1 := allBlobs(t, registry)
after2 := allManifests(t, manifestService)
if len(before1) == len(after1) {
t.Fatalf("Garbage collection affected blobs storage: %d == %d", len(before1), len(after1))
}
if len(before2) == len(after2) {
t.Fatalf("Garbage collection affected manifest storage: %d == %d", len(before2), len(after2))
}
}
func TestGCWithMissingManifests(t *testing.T) { func TestGCWithMissingManifests(t *testing.T) {
ctx := context.Background() ctx := context.Background()
d := inmemory.New() d := inmemory.New()
@ -200,7 +316,10 @@ func TestGCWithMissingManifests(t *testing.T) {
t.Fatal(err) t.Fatal(err)
} }
err = MarkAndSweep(context.Background(), d, registry, false) err = MarkAndSweep(context.Background(), d, registry, GCOpts{
DryRun: false,
RemoveUntagged: false,
})
if err != nil { if err != nil {
t.Fatalf("Failed mark and sweep: %v", err) t.Fatalf("Failed mark and sweep: %v", err)
} }
@ -227,7 +346,10 @@ func TestDeletionHasEffect(t *testing.T) {
manifests.Delete(ctx, image3.manifestDigest) manifests.Delete(ctx, image3.manifestDigest)
// Run GC // Run GC
err = MarkAndSweep(context.Background(), inmemoryDriver, registry, false) err = MarkAndSweep(context.Background(), inmemoryDriver, registry, GCOpts{
DryRun: false,
RemoveUntagged: false,
})
if err != nil { if err != nil {
t.Fatalf("Failed mark and sweep: %v", err) t.Fatalf("Failed mark and sweep: %v", err)
} }
@ -361,7 +483,10 @@ func TestOrphanBlobDeleted(t *testing.T) {
uploadRandomSchema2Image(t, repo) uploadRandomSchema2Image(t, repo)
// Run GC // Run GC
err = MarkAndSweep(context.Background(), inmemoryDriver, registry, false) err = MarkAndSweep(context.Background(), inmemoryDriver, registry, GCOpts{
DryRun: false,
RemoveUntagged: false,
})
if err != nil { if err != nil {
t.Fatalf("Failed mark and sweep: %v", err) t.Fatalf("Failed mark and sweep: %v", err)
} }

View file

@ -50,6 +50,40 @@ func (v Vacuum) RemoveBlob(dgst string) error {
return nil return nil
} }
// RemoveManifest removes a manifest from the filesystem
func (v Vacuum) RemoveManifest(name string, dgst digest.Digest, tags []string) error {
// remove a tag manifest reference, in case of not found continue to next one
for _, tag := range tags {
tagsPath, err := pathFor(manifestTagIndexEntryPathSpec{name: name, revision: dgst, tag: tag})
if err != nil {
return err
}
_, err = v.driver.Stat(v.ctx, tagsPath)
if err != nil {
switch err := err.(type) {
case driver.PathNotFoundError:
continue
default:
return err
}
}
dcontext.GetLogger(v.ctx).Infof("deleting manifest tag reference: %s", tagsPath)
err = v.driver.Delete(v.ctx, tagsPath)
if err != nil {
return err
}
}
manifestPath, err := pathFor(manifestRevisionPathSpec{name: name, revision: dgst})
if err != nil {
return err
}
dcontext.GetLogger(v.ctx).Infof("deleting manifest: %s", manifestPath)
return v.driver.Delete(v.ctx, manifestPath)
}
// RemoveRepository removes a repository directory from the // RemoveRepository removes a repository directory from the
// filesystem // filesystem
func (v Vacuum) RemoveRepository(repoName string) error { func (v Vacuum) RemoveRepository(repoName string) error {