dist: expand functionality of the dist tool

With this change, we add the following commands to the dist tool:

- `ingest`: verify and accept content into storage
- `active`: display active ingest processes
- `list`: list content in storage
- `path`: provide a path to a blob by digest
- `delete`: remove a piece of content from storage

We demonstrate the utility with the following shell pipeline:

```
$ ./dist fetch docker.io/library/redis latest mediatype:application/vnd.docker.distribution.manifest.v2+json | \
    jq -r '.layers[] | "./dist fetch docker.io/library/redis "+.digest + "| ./dist ingest --expected-digest "+.digest+" --expected-size "+(.size | tostring) +" docker.io/library/redis@"+.digest' | xargs -I{} -P10 -n1 sh -c "{}"
```

The above fetches a manifest, pipes it to jq, which assembles a shell
pipeline to ingest each layer into the content store. Because the
transactions are keyed by their digest, concurrent downloads and
downloads of repeated content are ignored. Each process is then executed
parallel using xargs.

Put shortly, this is a parallel layer download.

In a separate shell session, could monitor the active downloads with the
following:

```
$ watch -n0.2 ./dist active
```

For now, the content is downloaded into `.content` in the current
working directory. To watch the contents of this directory, you can use
the following:

```
$ watch -n0.2 tree .content
```

This will help to understand what is going on internally.

To get access to the layers, you can use the path command:

```
$./dist path sha256:010c454d55e53059beaba4044116ea4636f8dd8181e975d893931c7e7204fffa
sha256:010c454d55e53059beaba4044116ea4636f8dd8181e975d893931c7e7204fffa /home/sjd/go/src/github.com/docker/containerd/.content/blobs/sha256/010c454d55e53059beaba4044116ea4636f8dd8181e975d893931c7e7204fffa
```

When you are done, you can clear out the content with the classic xargs
pipeline:

```
$ ./dist list -q | xargs ./dist delete
```

Note that this is mostly a POC. Things like failed downloads and
abandoned download cleanup aren't quite handled. We'll probably make
adjustments around how content store transactions are handled to address
this.

From here, we'll build out full image pull and create tooling to get
runtime bundles from the fetched content.

Signed-off-by: Stephen J Day <stephen.day@docker.com>
This commit is contained in:
Stephen J Day 2017-01-26 14:08:56 -08:00
parent 3c44ec5dbc
commit f9cd9be61a
No known key found for this signature in database
GPG key ID: 67B3DED84EDC823F
10 changed files with 532 additions and 36 deletions

View file

@ -6,6 +6,7 @@ import (
"os"
"path/filepath"
"sync"
"time"
"github.com/docker/containerd/log"
"github.com/nightlyone/lockfile"
@ -43,11 +44,58 @@ func Open(root string) (*Store, error) {
}
type Status struct {
Ref string
Size int64
Meta interface{}
Ref string
Size int64
ModTime time.Time
Meta interface{}
}
func (cs *Store) Exists(dgst digest.Digest) (bool, error) {
if _, err := os.Stat(cs.blobPath(dgst)); err != nil {
if !os.IsNotExist(err) {
return false, err
}
return false, nil
}
return true, nil
}
func (cs *Store) GetPath(dgst digest.Digest) (string, error) {
p := cs.blobPath(dgst)
if _, err := os.Stat(p); err != nil {
if os.IsNotExist(err) {
return "", ErrBlobNotFound
}
return "", err
}
return p, nil
}
// Delete removes a blob by its digest.
//
// While this is safe to do concurrently, safe exist-removal logic must hold
// some global lock on the store.
func (cs *Store) Delete(dgst digest.Digest) error {
if err := os.RemoveAll(cs.blobPath(dgst)); err != nil {
if !os.IsNotExist(err) {
return err
}
return nil
}
return nil
}
func (cs *Store) blobPath(dgst digest.Digest) string {
return filepath.Join(cs.root, "blobs", dgst.Algorithm().String(), dgst.Hex())
}
// Stat returns the current status of a blob by the ingest ref.
func (cs *Store) Stat(ref string) (Status, error) {
dp := filepath.Join(cs.ingestRoot(ref), "data")
return cs.stat(dp)
@ -67,10 +115,10 @@ func (cs *Store) stat(ingestPath string) (Status, error) {
}
return Status{
Ref: ref,
Size: dfi.Size(),
Ref: ref,
Size: dfi.Size(),
ModTime: dfi.ModTime(),
}, nil
}
func (cs *Store) Active() ([]Status, error) {
@ -114,7 +162,14 @@ func (cs *Store) Active() ([]Status, error) {
// TODO(stevvooe): Allow querying the set of blobs in the blob store.
func (cs *Store) Walk(fn func(path string, dgst digest.Digest) error) error {
// WalkFunc defines the callback for a blob walk.
//
// TODO(stevvooe): Remove the file info. Just need size and modtime. Perhaps,
// not a huge deal, considering we have a path, but let's not just let this one
// go without scrunity.
type WalkFunc func(path string, fi os.FileInfo, dgst digest.Digest) error
func (cs *Store) Walk(fn WalkFunc) error {
root := filepath.Join(cs.root, "blobs")
var alg digest.Algorithm
return filepath.Walk(root, func(path string, fi os.FileInfo, err error) error {
@ -148,23 +203,10 @@ func (cs *Store) Walk(fn func(path string, dgst digest.Digest) error) error {
// store or extra paths not expected previously.
}
return fn(path, dgst)
return fn(path, fi, dgst)
})
}
func (cs *Store) GetPath(dgst digest.Digest) (string, error) {
p := filepath.Join(cs.root, "blobs", dgst.Algorithm().String(), dgst.Hex())
if _, err := os.Stat(p); err != nil {
if os.IsNotExist(err) {
return "", ErrBlobNotFound
}
return "", err
}
return p, nil
}
// Begin starts a new write transaction against the blob store.
//
// The argument `ref` is used to identify the transaction. It must be a valid
@ -267,6 +309,20 @@ func (cs *Store) Resume(ref string) (*Writer, error) {
}, nil
}
// Remove an active transaction keyed by ref.
func (cs *Store) Remove(ref string) error {
root := cs.ingestRoot(ref)
if err := os.RemoveAll(root); err != nil {
if os.IsNotExist(err) {
return nil
}
return err
}
return nil
}
func (cs *Store) ingestRoot(ref string) string {
dgst := digest.FromString(ref)
return filepath.Join(cs.root, "ingest", dgst.Hex())