diff --git a/docs/gc.md b/docs/gc.md index 34565369..fef0304e 100644 --- a/docs/gc.md +++ b/docs/gc.md @@ -8,21 +8,34 @@ keywords = ["registry, garbage, images, tags, repository, distribution"] # What Garbage Collection Does -Garbage collection is a process that delete blobs to which no manifests refer. -It runs in two phases. First, in the 'mark' phase, the process scans all the -manifests in the registry. From these manifests, it constructs a set of content -address digests. This set is the 'mark set' and denotes the set of blobs to *not* -delete. Secondly, in the 'sweep' phase, the process scans all the blobs and if -a blob's content address digest is not in the mark set, the process will delete -it. +"Garbage collection deletes blobs which no manifests reference. Manifests and +blobs which are deleted by their digest through the Registry API will become +eligible for garbage collection, but the actual blobs will not be removed from +storage until garbage collection is run. +# How Garbage Collection Works + +Garbage collection runs in two phases. First, in the 'mark' phase, the process +scans all the manifests in the registry. From these manifests, it constructs a +set of content address digests. This set is the 'mark set' and denotes the set +of blobs to *not* delete. Secondly, in the 'sweep' phase, the process scans all +the blobs and if a blob's content address digest is not in the mark set, the +process will delete it. + +> **NOTE** You should ensure that the registry is in read-only mode or not running at +> all. If you were to upload an image while garbage collection is running, there is the +> risk that the image's layers will be mistakenly deleted, leading to a corrupted image. + +This type of garbage collection is known as stop-the-world garbage collection. In +future registry versions the intention is that garbage collection will be an +automated background action and this manual process will no longer apply. # How to Run You can run garbage collection by running - docker run --rm registry-image-name garbage-collect /etc/docker/registry/config.yml +`docker run --rm registry-image-name garbage-collect /etc/docker/registry/config.yml` + +Additionally, garbage collection can be run in `dry-run` mode, which will print +the progress of the mark and sweep phases without removing any data. -NOTE: You should ensure that the registry itself is in read-only mode or not running at -all. If you were to upload an image while garbage collection is running, there is the -risk that the image's layers will be mistakenly deleted, leading to a corrupted image. diff --git a/registry/garbagecollect.go b/registry/garbagecollect.go index 8df956b9..1be4546d 100644 --- a/registry/garbagecollect.go +++ b/registry/garbagecollect.go @@ -19,8 +19,7 @@ import ( func emit(format string, a ...interface{}) { if dryRun { - fmt.Printf(format, a...) - fmt.Println("") + fmt.Printf(format+"\n", a...) } } @@ -122,8 +121,8 @@ func markAndSweep(ctx context.Context, storageDriver driver.StorageDriver, regis // Construct vacuum vacuum := storage.NewVacuum(ctx, storageDriver) for dgst := range deleteSet { + emit("blob eligible for deletion: %s", dgst) if dryRun { - emit("deleting %s", dgst) continue } err = vacuum.RemoveBlob(string(dgst)) @@ -169,7 +168,7 @@ var GCCmd = &cobra.Command{ k, err := libtrust.GenerateECP256PrivateKey() if err != nil { - fmt.Fprintf(os.Stderr, "%s", err) + fmt.Fprint(os.Stderr, err) os.Exit(1) }