containerd/cmd/dist/fetch.go

253 lines
6.2 KiB
Go
Raw Normal View History

cmd/dist: POC implementation of dist fetch With this changeset we introduce several new things. The first is the top-level dist command. This is a toolkit that implements various distribution primitives, such as fetching, unpacking and ingesting. The first component to this is a simple `fetch` command. It is a low-level command that takes a "remote", identified by a `locator`, and an object identifier. Keyed by the locator, this tool can identify a remote implementation to fetch the content and write it back to standard out. By allowing this to be the unit of pluggability in fetching content, we can have quite a bit of flexibility in how we retrieve images. The current `fetch` implementation provides anonymous access to docker hub images, through the namespace `docker.io`. As an example, one can fetch the manifest for `redis` with the following command: ``` $ ./dist fetch docker.io/library/redis latest mediatype:application/vnd.docker.distribution.manifest.v2+json ``` Note that we have provided a mediatype "hint", nudging the fetch implementation to grab the correct endpoint. We can hash the output of that to fetch the same content by digest: ``` $ ./dist fetch docker.io/library/redis sha256:$(./dist fetch docker.io/library/redis latest mediatype:application/vnd.docker.distribution.manifest.v2+json | shasum -a256) ``` Note that the hint is now elided, since we have affixed the content to a particular hash. If you are not yet entertained, let's bring `jq` and `xargs` into the mix for maximum fun. The following incantation fetches the same manifest and downloads all layers into the convenience of `/dev/null`: ``` $ ./dist fetch docker.io/library/redis sha256:a027a470aa2b9b41cc2539847a97b8a14794ebd0a4c7c5d64e390df6bde56c73 | jq -r '.layers[] | .digest' | xargs -n1 -P10 ./dist fetch docker.io/library/redis > /dev/null ``` This is just the beginning. We should be able to centralize configuration around fetch to implement a number of distribution methodologies that have been challenging or impossible up to this point. The `locator`, mentioned earlier, is a schemaless URL that provides a host and path that can be used to resolve the remote. By dispatching on this common identifier, we should be able to support almost any protocol and discovery mechanism imaginable. When this is more solidified, we can roll these up into higher-level operations that can be orchestrated through the `dist` tool or via GRPC. What a time to be alive! Signed-off-by: Stephen J Day <stephen.day@docker.com>
2017-01-20 03:03:44 +00:00
package main
import (
contextpkg "context"
"encoding/json"
"fmt"
"io"
"io/ioutil"
"net/http"
"net/url"
"os"
"path"
"strings"
"github.com/Sirupsen/logrus"
"github.com/docker/containerd/log"
"github.com/docker/containerd/remotes"
digest "github.com/opencontainers/go-digest"
"github.com/pkg/errors"
"github.com/urfave/cli"
"golang.org/x/net/context/ctxhttp"
)
// TODO(stevvooe): Create "multi-fetch" mode that just takes a remote
// then receives object/hint lines on stdin, returning content as
// needed.
var fetchCommand = cli.Command{
Name: "fetch",
Usage: "retrieve objects from a remote",
ArgsUsage: "[flags] <remote> <object> [<hint>, ...]",
Description: `Fetch objects by identifier from a remote.`,
Flags: []cli.Flag{
cli.DurationFlag{
Name: "timeout",
Usage: "total timeout for fetch",
EnvVar: "CONTAINERD_FETCH_TIMEOUT",
},
},
Action: func(context *cli.Context) error {
var (
ctx = contextpkg.Background()
timeout = context.Duration("timeout")
locator = context.Args().First()
args = context.Args().Tail()
)
if timeout > 0 {
var cancel func()
ctx, cancel = contextpkg.WithTimeout(ctx, timeout)
defer cancel()
}
if locator == "" {
return errors.New("containerd: remote required")
cmd/dist: POC implementation of dist fetch With this changeset we introduce several new things. The first is the top-level dist command. This is a toolkit that implements various distribution primitives, such as fetching, unpacking and ingesting. The first component to this is a simple `fetch` command. It is a low-level command that takes a "remote", identified by a `locator`, and an object identifier. Keyed by the locator, this tool can identify a remote implementation to fetch the content and write it back to standard out. By allowing this to be the unit of pluggability in fetching content, we can have quite a bit of flexibility in how we retrieve images. The current `fetch` implementation provides anonymous access to docker hub images, through the namespace `docker.io`. As an example, one can fetch the manifest for `redis` with the following command: ``` $ ./dist fetch docker.io/library/redis latest mediatype:application/vnd.docker.distribution.manifest.v2+json ``` Note that we have provided a mediatype "hint", nudging the fetch implementation to grab the correct endpoint. We can hash the output of that to fetch the same content by digest: ``` $ ./dist fetch docker.io/library/redis sha256:$(./dist fetch docker.io/library/redis latest mediatype:application/vnd.docker.distribution.manifest.v2+json | shasum -a256) ``` Note that the hint is now elided, since we have affixed the content to a particular hash. If you are not yet entertained, let's bring `jq` and `xargs` into the mix for maximum fun. The following incantation fetches the same manifest and downloads all layers into the convenience of `/dev/null`: ``` $ ./dist fetch docker.io/library/redis sha256:a027a470aa2b9b41cc2539847a97b8a14794ebd0a4c7c5d64e390df6bde56c73 | jq -r '.layers[] | .digest' | xargs -n1 -P10 ./dist fetch docker.io/library/redis > /dev/null ``` This is just the beginning. We should be able to centralize configuration around fetch to implement a number of distribution methodologies that have been challenging or impossible up to this point. The `locator`, mentioned earlier, is a schemaless URL that provides a host and path that can be used to resolve the remote. By dispatching on this common identifier, we should be able to support almost any protocol and discovery mechanism imaginable. When this is more solidified, we can roll these up into higher-level operations that can be orchestrated through the `dist` tool or via GRPC. What a time to be alive! Signed-off-by: Stephen J Day <stephen.day@docker.com>
2017-01-20 03:03:44 +00:00
}
if len(args) < 1 {
return errors.New("containerd: object required")
cmd/dist: POC implementation of dist fetch With this changeset we introduce several new things. The first is the top-level dist command. This is a toolkit that implements various distribution primitives, such as fetching, unpacking and ingesting. The first component to this is a simple `fetch` command. It is a low-level command that takes a "remote", identified by a `locator`, and an object identifier. Keyed by the locator, this tool can identify a remote implementation to fetch the content and write it back to standard out. By allowing this to be the unit of pluggability in fetching content, we can have quite a bit of flexibility in how we retrieve images. The current `fetch` implementation provides anonymous access to docker hub images, through the namespace `docker.io`. As an example, one can fetch the manifest for `redis` with the following command: ``` $ ./dist fetch docker.io/library/redis latest mediatype:application/vnd.docker.distribution.manifest.v2+json ``` Note that we have provided a mediatype "hint", nudging the fetch implementation to grab the correct endpoint. We can hash the output of that to fetch the same content by digest: ``` $ ./dist fetch docker.io/library/redis sha256:$(./dist fetch docker.io/library/redis latest mediatype:application/vnd.docker.distribution.manifest.v2+json | shasum -a256) ``` Note that the hint is now elided, since we have affixed the content to a particular hash. If you are not yet entertained, let's bring `jq` and `xargs` into the mix for maximum fun. The following incantation fetches the same manifest and downloads all layers into the convenience of `/dev/null`: ``` $ ./dist fetch docker.io/library/redis sha256:a027a470aa2b9b41cc2539847a97b8a14794ebd0a4c7c5d64e390df6bde56c73 | jq -r '.layers[] | .digest' | xargs -n1 -P10 ./dist fetch docker.io/library/redis > /dev/null ``` This is just the beginning. We should be able to centralize configuration around fetch to implement a number of distribution methodologies that have been challenging or impossible up to this point. The `locator`, mentioned earlier, is a schemaless URL that provides a host and path that can be used to resolve the remote. By dispatching on this common identifier, we should be able to support almost any protocol and discovery mechanism imaginable. When this is more solidified, we can roll these up into higher-level operations that can be orchestrated through the `dist` tool or via GRPC. What a time to be alive! Signed-off-by: Stephen J Day <stephen.day@docker.com>
2017-01-20 03:03:44 +00:00
}
object := args[0]
hints := args[1:]
resolver, err := getResolver(ctx)
if err != nil {
return err
}
remote, err := resolver.Resolve(ctx, locator)
if err != nil {
return err
}
ctx = log.WithLogger(ctx, log.G(ctx).WithFields(
logrus.Fields{
"remote": locator,
"object": object,
}))
log.G(ctx).Infof("fetching")
rc, err := remote.Fetch(ctx, object, hints...)
if err != nil {
return err
}
defer rc.Close()
if _, err := io.Copy(os.Stdout, rc); err != nil {
return err
}
return nil
},
}
// NOTE(stevvooe): Most of the code below this point is prototype code to
// demonstrate a very simplified docker.io fetcher. We have a lot of hard coded
// values but we leave many of the details down to the fetcher, creating a lot
// of room for ways to fetch content.
// getResolver prepares the resolver from the environment and options.
func getResolver(ctx contextpkg.Context) (remotes.Resolver, error) {
return remotes.ResolverFunc(func(ctx contextpkg.Context, locator string) (remotes.Remote, error) {
if !strings.HasPrefix(locator, "docker.io") {
return nil, errors.Errorf("unsupported locator: %q", locator)
}
var (
base = url.URL{
Scheme: "https",
Host: "registry-1.docker.io",
}
prefix = strings.TrimPrefix(locator, "docker.io/")
)
token, err := getToken(ctx, "repository:"+prefix+":pull")
if err != nil {
return nil, err
}
return remotes.RemoteFunc(func(ctx contextpkg.Context, object string, hints ...string) (io.ReadCloser, error) {
ctx = log.WithLogger(ctx, log.G(ctx).WithFields(
logrus.Fields{
"prefix": prefix, // or repo?
"base": base.String(),
"hints": hints,
},
))
paths, err := getV2URLPaths(prefix, object, hints...)
if err != nil {
return nil, err
}
for _, path := range paths {
base.Path = path
url := base.String()
log.G(ctx).WithField("url", url).Debug("fetch content")
req, err := http.NewRequest(http.MethodGet, url, nil)
if err != nil {
return nil, err
}
req.Header.Set("Authorization", fmt.Sprintf("Bearer %s", token))
for _, mediatype := range remotes.HintValues("mediatype", hints...) {
req.Header.Set("Accept", mediatype)
}
resp, err := ctxhttp.Do(ctx, http.DefaultClient, req)
if err != nil {
return nil, err
}
if resp.StatusCode > 299 {
if resp.StatusCode == http.StatusNotFound {
continue // try one of the other urls.
}
resp.Body.Close()
return nil, errors.Errorf("unexpected status code %v: %v", url, resp.Status)
}
return resp.Body, nil
}
return nil, errors.New("not found")
}), nil
}), nil
}
func getToken(ctx contextpkg.Context, scopes ...string) (string, error) {
var (
u = url.URL{
Scheme: "https",
Host: "auth.docker.io",
Path: "/token",
}
q = url.Values{
"scope": scopes,
"service": []string{"registry.docker.io"}, // usually comes from auth challenge
}
)
u.RawQuery = q.Encode()
log.G(ctx).WithField("token.url", u.String()).Debug("requesting token")
resp, err := ctxhttp.Get(ctx, http.DefaultClient, u.String())
if err != nil {
return "", err
}
defer resp.Body.Close()
if resp.StatusCode > 299 {
return "", errors.Errorf("unexpected status code: %v %v", resp.StatusCode, resp.Status)
}
p, err := ioutil.ReadAll(resp.Body)
if err != nil {
return "", err
}
var tokenResponse struct {
Token string `json:"token"`
}
if err := json.Unmarshal(p, &tokenResponse); err != nil {
return "", err
}
return tokenResponse.Token, nil
}
// getV2URLPaths generates the canidate urls paths for the object based on the
// set of hints and the provided object id. URLs are returned in the order of
// most to least likely succeed.
func getV2URLPaths(prefix, object string, hints ...string) ([]string, error) {
var urls []string
// TODO(stevvooe): We can probably define a higher-level "type" hint to
// avoid having to do extra round trips to resolve content, as well as
// avoid the tedium of providing media types.
if remotes.HintExists("mediatype", "application/vnd.docker.distribution.manifest.v2+json", hints...) { // TODO(stevvooe): make this handle oci types, as well.
// fast path out if we know we are getting a manifest. Arguably, we
// should fallback to blobs, just in case.
urls = append(urls, path.Join("/v2", prefix, "manifests", object))
}
_, err := digest.Parse(object)
if err == nil {
// we have a digest, use blob or manifest path, depending on hints, may
// need to try both.
urls = append(urls, path.Join("/v2", prefix, "blobs", object))
}
// probably a take, so we go through the manifests endpoint
urls = append(urls, path.Join("/v2", prefix, "manifests", object))
var (
noduplicates []string
seen = map[string]struct{}{}
)
for _, u := range urls {
if _, ok := seen[u]; !ok {
seen[u] = struct{}{}
noduplicates = append(noduplicates, u)
}
}
return noduplicates, nil
}