diff --git a/docs/gc.md b/docs/gc.md index 34565369..fef0304e 100644 --- a/docs/gc.md +++ b/docs/gc.md @@ -8,21 +8,34 @@ keywords = ["registry, garbage, images, tags, repository, distribution"] # What Garbage Collection Does -Garbage collection is a process that delete blobs to which no manifests refer. -It runs in two phases. First, in the 'mark' phase, the process scans all the -manifests in the registry. From these manifests, it constructs a set of content -address digests. This set is the 'mark set' and denotes the set of blobs to *not* -delete. Secondly, in the 'sweep' phase, the process scans all the blobs and if -a blob's content address digest is not in the mark set, the process will delete -it. +"Garbage collection deletes blobs which no manifests reference. Manifests and +blobs which are deleted by their digest through the Registry API will become +eligible for garbage collection, but the actual blobs will not be removed from +storage until garbage collection is run. +# How Garbage Collection Works + +Garbage collection runs in two phases. First, in the 'mark' phase, the process +scans all the manifests in the registry. From these manifests, it constructs a +set of content address digests. This set is the 'mark set' and denotes the set +of blobs to *not* delete. Secondly, in the 'sweep' phase, the process scans all +the blobs and if a blob's content address digest is not in the mark set, the +process will delete it. + +> **NOTE** You should ensure that the registry is in read-only mode or not running at +> all. If you were to upload an image while garbage collection is running, there is the +> risk that the image's layers will be mistakenly deleted, leading to a corrupted image. + +This type of garbage collection is known as stop-the-world garbage collection. In +future registry versions the intention is that garbage collection will be an +automated background action and this manual process will no longer apply. # How to Run You can run garbage collection by running - docker run --rm registry-image-name garbage-collect /etc/docker/registry/config.yml +`docker run --rm registry-image-name garbage-collect /etc/docker/registry/config.yml` + +Additionally, garbage collection can be run in `dry-run` mode, which will print +the progress of the mark and sweep phases without removing any data. -NOTE: You should ensure that the registry itself is in read-only mode or not running at -all. If you were to upload an image while garbage collection is running, there is the -risk that the image's layers will be mistakenly deleted, leading to a corrupted image. diff --git a/registry/garbagecollect.go b/registry/garbagecollect.go index ecb64c98..1be4546d 100644 --- a/registry/garbagecollect.go +++ b/registry/garbagecollect.go @@ -13,16 +13,17 @@ import ( "github.com/docker/distribution/registry/storage" "github.com/docker/distribution/registry/storage/driver" "github.com/docker/distribution/registry/storage/driver/factory" - + "github.com/docker/libtrust" "github.com/spf13/cobra" ) -func markAndSweep(ctx context.Context, storageDriver driver.StorageDriver) error { - // Construct a registry - registry, err := storage.NewRegistry(ctx, storageDriver) - if err != nil { - return fmt.Errorf("failed to construct registry: %v", err) +func emit(format string, a ...interface{}) { + if dryRun { + fmt.Printf(format+"\n", a...) } +} + +func markAndSweep(ctx context.Context, storageDriver driver.StorageDriver, registry distribution.Namespace) error { repositoryEnumerator, ok := registry.(distribution.RepositoryEnumerator) if !ok { @@ -31,7 +32,9 @@ func markAndSweep(ctx context.Context, storageDriver driver.StorageDriver) error // mark markSet := make(map[digest.Digest]struct{}) - err = repositoryEnumerator.Enumerate(ctx, func(repoName string) error { + err := repositoryEnumerator.Enumerate(ctx, func(repoName string) error { + emit(repoName) + var err error named, err := reference.ParseNamed(repoName) if err != nil { @@ -54,6 +57,7 @@ func markAndSweep(ctx context.Context, storageDriver driver.StorageDriver) error err = manifestEnumerator.Enumerate(ctx, func(dgst digest.Digest) error { // Mark the manifest's blob + emit("%s: marking manifest %s ", repoName, dgst) markSet[dgst] = struct{}{} manifest, err := manifestService.Get(ctx, dgst) @@ -64,6 +68,7 @@ func markAndSweep(ctx context.Context, storageDriver driver.StorageDriver) error descriptors := manifest.References() for _, descriptor := range descriptors { markSet[descriptor.Digest] = struct{}{} + emit("%s: marking blob %s", repoName, descriptor.Digest) } switch manifest.(type) { @@ -77,11 +82,13 @@ func markAndSweep(ctx context.Context, storageDriver driver.StorageDriver) error return fmt.Errorf("failed to get signatures for signed manifest: %v", err) } for _, signatureDigest := range signatures { + emit("%s: marking signature %s", repoName, signatureDigest) markSet[signatureDigest] = struct{}{} } break case *schema2.DeserializedManifest: config := manifest.(*schema2.DeserializedManifest).Config + emit("%s: marking configuration %s", repoName, config.Digest) markSet[config.Digest] = struct{}{} break } @@ -110,9 +117,14 @@ func markAndSweep(ctx context.Context, storageDriver driver.StorageDriver) error return fmt.Errorf("error enumerating blobs: %v", err) } + emit("\n%d blobs marked, %d blobs eligible for deletion", len(markSet), len(deleteSet)) // Construct vacuum vacuum := storage.NewVacuum(ctx, storageDriver) for dgst := range deleteSet { + emit("blob eligible for deletion: %s", dgst) + if dryRun { + continue + } err = vacuum.RemoveBlob(string(dgst)) if err != nil { return fmt.Errorf("failed to delete blob %s: %v\n", dgst, err) @@ -122,13 +134,18 @@ func markAndSweep(ctx context.Context, storageDriver driver.StorageDriver) error return err } +func init() { + GCCmd.Flags().BoolVarP(&dryRun, "dry-run", "d", false, "do everything expect remove the blobs") +} + +var dryRun bool + // GCCmd is the cobra command that corresponds to the garbage-collect subcommand var GCCmd = &cobra.Command{ Use: "garbage-collect ", - Short: "`garbage-collects` deletes layers not referenced by any manifests", - Long: "`garbage-collects` deletes layers not referenced by any manifests", + Short: "`garbage-collect` deletes layers not referenced by any manifests", + Long: "`garbage-collect` deletes layers not referenced by any manifests", Run: func(cmd *cobra.Command, args []string) { - config, err := resolveConfiguration(args) if err != nil { fmt.Fprintf(os.Stderr, "configuration error: %v\n", err) @@ -149,7 +166,19 @@ var GCCmd = &cobra.Command{ os.Exit(1) } - err = markAndSweep(ctx, driver) + k, err := libtrust.GenerateECP256PrivateKey() + if err != nil { + fmt.Fprint(os.Stderr, err) + os.Exit(1) + } + + registry, err := storage.NewRegistry(ctx, driver, storage.DisableSchema1Signatures, storage.Schema1SigningKey(k)) + if err != nil { + fmt.Fprintf(os.Stderr, "failed to construct registry: %v", err) + os.Exit(1) + } + + err = markAndSweep(ctx, driver, registry) if err != nil { fmt.Fprintf(os.Stderr, "failed to garbage collect: %v", err) os.Exit(1) diff --git a/registry/garbagecollect_test.go b/registry/garbagecollect_test.go index 6096e758..dd5fadd5 100644 --- a/registry/garbagecollect_test.go +++ b/registry/garbagecollect_test.go @@ -161,7 +161,7 @@ func TestNoDeletionNoEffect(t *testing.T) { } // Run GC - err = markAndSweep(context.Background(), inmemoryDriver) + err = markAndSweep(context.Background(), inmemoryDriver, registry) if err != nil { t.Fatalf("Failed mark and sweep: %v", err) } @@ -193,7 +193,7 @@ func TestDeletionHasEffect(t *testing.T) { manifests.Delete(ctx, image3.manifestDigest) // Run GC - err = markAndSweep(context.Background(), inmemoryDriver) + err = markAndSweep(context.Background(), inmemoryDriver, registry) if err != nil { t.Fatalf("Failed mark and sweep: %v", err) } @@ -327,7 +327,7 @@ func TestOrphanBlobDeleted(t *testing.T) { uploadRandomSchema2Image(t, repo) // Run GC - err = markAndSweep(context.Background(), inmemoryDriver) + err = markAndSweep(context.Background(), inmemoryDriver, registry) if err != nil { t.Fatalf("Failed mark and sweep: %v", err) } diff --git a/registry/storage/manifeststore.go b/registry/storage/manifeststore.go index f3660c98..5a9165f9 100644 --- a/registry/storage/manifeststore.go +++ b/registry/storage/manifeststore.go @@ -12,6 +12,7 @@ import ( "github.com/docker/distribution/manifest/manifestlist" "github.com/docker/distribution/manifest/schema1" "github.com/docker/distribution/manifest/schema2" + "github.com/docker/distribution/registry/storage/driver" ) // A ManifestHandler gets and puts manifests of a particular type. @@ -161,16 +162,22 @@ func (ms *manifestStore) GetSignatures(ctx context.Context, manifestDigest diges return nil, err } - signaturesPath = path.Join(signaturesPath, "sha256") + var digests []digest.Digest + alg := string(digest.SHA256) + signaturePaths, err := ms.blobStore.driver.List(ctx, path.Join(signaturesPath, alg)) - signaturePaths, err := ms.blobStore.driver.List(ctx, signaturesPath) - if err != nil { + switch err.(type) { + case nil: + break + case driver.PathNotFoundError: + // Manifest may have been pushed with signature store disabled + return digests, nil + default: return nil, err } - var digests []digest.Digest for _, sigPath := range signaturePaths { - sigdigest, err := digest.ParseDigest("sha256:" + path.Base(sigPath)) + sigdigest, err := digest.ParseDigest(alg + ":" + path.Base(sigPath)) if err != nil { // merely found not a digest continue