diff --git a/reference/helpers.go b/reference/helpers.go index 978df7ea..d10c7ef8 100644 --- a/reference/helpers.go +++ b/reference/helpers.go @@ -32,7 +32,7 @@ func FamiliarString(ref Reference) string { } // FamiliarMatch reports whether ref matches the specified pattern. -// See https://godoc.org/path#Match for supported patterns. +// See [path.Match] for supported patterns. func FamiliarMatch(pattern string, ref Reference) (bool, error) { matched, err := path.Match(pattern, FamiliarString(ref)) if namedRef, isNamed := ref.(Named); isNamed && !matched { diff --git a/reference/normalize.go b/reference/normalize.go index 47de5850..f2ce13ad 100644 --- a/reference/normalize.go +++ b/reference/normalize.go @@ -8,10 +8,35 @@ import ( ) const ( + // legacyDefaultDomain is the legacy domain for Docker Hub (which was + // originally named "the Docker Index"). This domain is still used for + // authentication and image search, which were part of the "v1" Docker + // registry specification. + // + // This domain will continue to be supported, but there are plans to consolidate + // legacy domains to new "canonical" domains. Once those domains are decided + // on, we must update the normalization functions, but preserve compatibility + // with existing installs, clients, and user configuration. legacyDefaultDomain = "index.docker.io" - defaultDomain = "docker.io" - officialRepoPrefix = "library/" - defaultTag = "latest" + + // defaultDomain is the default domain used for images on Docker Hub. + // It is used to normalize "familiar" names to canonical names, for example, + // to convert "ubuntu" to "docker.io/library/ubuntu:latest". + // + // Note that actual domain of Docker Hub's registry is registry-1.docker.io. + // This domain will continue to be supported, but there are plans to consolidate + // legacy domains to new "canonical" domains. Once those domains are decided + // on, we must update the normalization functions, but preserve compatibility + // with existing installs, clients, and user configuration. + defaultDomain = "docker.io" + + // officialRepoPrefix is the namespace used for official images on Docker Hub. + // It is used to normalize "familiar" names to canonical names, for example, + // to convert "ubuntu" to "docker.io/library/ubuntu:latest". + officialRepoPrefix = "library/" + + // defaultTag is the default tag if no tag is provided. + defaultTag = "latest" ) // normalizedNamed represents a name which has been @@ -33,14 +58,14 @@ func ParseNormalizedNamed(s string) (Named, error) { return nil, fmt.Errorf("invalid repository name (%s), cannot specify 64-byte hexadecimal strings", s) } domain, remainder := splitDockerDomain(s) - var remoteName string + var remote string if tagSep := strings.IndexRune(remainder, ':'); tagSep > -1 { - remoteName = remainder[:tagSep] + remote = remainder[:tagSep] } else { - remoteName = remainder + remote = remainder } - if strings.ToLower(remoteName) != remoteName { - return nil, fmt.Errorf("invalid reference format: repository name (%s) must be lowercase", remoteName) + if strings.ToLower(remote) != remote { + return nil, fmt.Errorf("invalid reference format: repository name (%s) must be lowercase", remote) } ref, err := Parse(domain + "/" + remainder) @@ -54,41 +79,53 @@ func ParseNormalizedNamed(s string) (Named, error) { return named, nil } -// ParseDockerRef normalizes the image reference following the docker convention. This is added -// mainly for backward compatibility. -// The reference returned can only be either tagged or digested. For reference contains both tag -// and digest, the function returns digested reference, e.g. docker.io/library/busybox:latest@ -// sha256:7cc4b5aefd1d0cadf8d97d4350462ba51c694ebca145b08d7d41b41acc8db5aa will be returned as -// docker.io/library/busybox@sha256:7cc4b5aefd1d0cadf8d97d4350462ba51c694ebca145b08d7d41b41acc8db5aa. +// namedTaggedDigested is a reference that has both a tag and a digest. +type namedTaggedDigested interface { + NamedTagged + Digested +} + +// ParseDockerRef normalizes the image reference following the docker convention, +// which allows for references to contain both a tag and a digest. It returns a +// reference that is either tagged or digested. For references containing both +// a tag and a digest, it returns a digested reference. For example, the following +// reference: +// +// docker.io/library/busybox:latest@sha256:7cc4b5aefd1d0cadf8d97d4350462ba51c694ebca145b08d7d41b41acc8db5aa +// +// Is returned as a digested reference (with the ":latest" tag removed): +// +// docker.io/library/busybox@sha256:7cc4b5aefd1d0cadf8d97d4350462ba51c694ebca145b08d7d41b41acc8db5aa +// +// References that are already "tagged" or "digested" are returned unmodified: +// +// // Already a digested reference +// docker.io/library/busybox@sha256:7cc4b5aefd1d0cadf8d97d4350462ba51c694ebca145b08d7d41b41acc8db5aa +// +// // Already a named reference +// docker.io/library/busybox:latest func ParseDockerRef(ref string) (Named, error) { named, err := ParseNormalizedNamed(ref) if err != nil { return nil, err } - if _, ok := named.(NamedTagged); ok { - if canonical, ok := named.(Canonical); ok { - // The reference is both tagged and digested, only - // return digested. - newNamed, err := WithName(canonical.Name()) - if err != nil { - return nil, err - } - newCanonical, err := WithDigest(newNamed, canonical.Digest()) - if err != nil { - return nil, err - } - return newCanonical, nil + if canonical, ok := named.(namedTaggedDigested); ok { + // The reference is both tagged and digested; only return digested. + newNamed, err := WithName(canonical.Name()) + if err != nil { + return nil, err } + return WithDigest(newNamed, canonical.Digest()) } return TagNameOnly(named), nil } -// splitDockerDomain splits a repository name to domain and remotename string. +// splitDockerDomain splits a repository name to domain and remote-name. // If no valid domain is found, the default domain is used. Repository name // needs to be already validated before. func splitDockerDomain(name string) (domain, remainder string) { i := strings.IndexRune(name, '/') - if i == -1 || (!strings.ContainsAny(name[:i], ".:") && name[:i] != "localhost" && strings.ToLower(name[:i]) == name[:i]) { + if i == -1 || (!strings.ContainsAny(name[:i], ".:") && name[:i] != localhost && strings.ToLower(name[:i]) == name[:i]) { domain, remainder = defaultDomain, name } else { domain, remainder = name[:i], name[i+1:] @@ -96,11 +133,6 @@ func splitDockerDomain(name string) (domain, remainder string) { if domain == legacyDefaultDomain { domain = defaultDomain } - // TODO(thaJeztah): this check may be too strict, as it assumes the - // "library/" namespace does not have nested namespaces. While this - // is true (currently), technically it would be possible for Docker - // Hub to use those (e.g. "library/distros/ubuntu:latest"). - // See https://github.com/distribution/distribution/pull/3769#issuecomment-1302031785. if domain == defaultDomain && !strings.ContainsRune(remainder, '/') { remainder = officialRepoPrefix + remainder } diff --git a/reference/reference.go b/reference/reference.go index 3499fbf8..e98c44da 100644 --- a/reference/reference.go +++ b/reference/reference.go @@ -4,13 +4,14 @@ // Grammar // // reference := name [ ":" tag ] [ "@" digest ] -// name := [domain '/'] path-component ['/' path-component]* +// name := [domain '/'] remote-name // domain := host [':' port-number] // host := domain-name | IPv4address | \[ IPv6address \] ; rfc3986 appendix-A // domain-name := domain-component ['.' domain-component]* // domain-component := /([a-zA-Z0-9]|[a-zA-Z0-9][a-zA-Z0-9-]*[a-zA-Z0-9])/ // port-number := /[0-9]+/ // path-component := alpha-numeric [separator alpha-numeric]* +// path (or "remote-name") := path-component ['/' path-component]* // alpha-numeric := /[a-z0-9]+/ // separator := /[_.]|__|[-]*/ // @@ -23,7 +24,6 @@ // digest-hex := /[0-9a-fA-F]{32,}/ ; At least 128 bit digest value // // identifier := /[a-f0-9]{64}/ -// short-identifier := /[a-f0-9]{6,64}/ package reference import ( @@ -147,7 +147,7 @@ type namedRepository interface { Path() string } -// Domain returns the domain part of the Named reference +// Domain returns the domain part of the [Named] reference. func Domain(named Named) string { if r, ok := named.(namedRepository); ok { return r.Domain() @@ -156,7 +156,7 @@ func Domain(named Named) string { return domain } -// Path returns the name without the domain part of the Named reference +// Path returns the name without the domain part of the [Named] reference. func Path(named Named) (name string) { if r, ok := named.(namedRepository); ok { return r.Path() @@ -188,7 +188,6 @@ func SplitHostname(named Named) (string, string) { // Parse parses s and returns a syntactically valid Reference. // If an error was encountered it is returned, along with a nil Reference. -// NOTE: Parse will not handle short digests. func Parse(s string) (Reference, error) { matches := ReferenceRegexp.FindStringSubmatch(s) if matches == nil { @@ -240,7 +239,6 @@ func Parse(s string) (Reference, error) { // the Named interface. The reference must have a name and be in the canonical // form, otherwise an error is returned. // If an error was encountered it is returned, along with a nil Reference. -// NOTE: ParseNamed will not handle short digests. func ParseNamed(s string) (Named, error) { named, err := ParseNormalizedNamed(s) if err != nil { diff --git a/reference/regexp.go b/reference/regexp.go index 42f86b8b..2af73840 100644 --- a/reference/regexp.go +++ b/reference/regexp.go @@ -1,14 +1,50 @@ package reference -import "regexp" +import ( + "regexp" + "strings" +) -var ( - // alphaNumeric defines the alpha numeric atom, typically a +// DigestRegexp matches well-formed digests, including algorithm (e.g. "sha256:"). +var DigestRegexp = regexp.MustCompile(digestPat) + +// DomainRegexp matches hostname or IP-addresses, optionally including a port +// number. It defines the structure of potential domain components that may be +// part of image names. This is purposely a subset of what is allowed by DNS to +// ensure backwards compatibility with Docker image names. It may be a subset of +// DNS domain name, an IPv4 address in decimal format, or an IPv6 address between +// square brackets (excluding zone identifiers as defined by [RFC 6874] or special +// addresses such as IPv4-Mapped). +// +// [RFC 6874]: https://www.rfc-editor.org/rfc/rfc6874. +var DomainRegexp = regexp.MustCompile(domainAndPort) + +// IdentifierRegexp is the format for string identifier used as a +// content addressable identifier using sha256. These identifiers +// are like digests without the algorithm, since sha256 is used. +var IdentifierRegexp = regexp.MustCompile(identifier) + +// NameRegexp is the format for the name component of references, including +// an optional domain and port, but without tag or digest suffix. +var NameRegexp = regexp.MustCompile(namePat) + +// ReferenceRegexp is the full supported format of a reference. The regexp +// is anchored and has capturing groups for name, tag, and digest +// components. +var ReferenceRegexp = regexp.MustCompile(referencePat) + +// TagRegexp matches valid tag names. From [docker/docker:graph/tags.go]. +// +// [docker/docker:graph/tags.go]: https://github.com/moby/moby/blob/v1.6.0/graph/tags.go#L26-L28 +var TagRegexp = regexp.MustCompile(tag) + +const ( + // alphanumeric defines the alphanumeric atom, typically a // component of names. This only allows lower case characters and digits. - alphaNumeric = `[a-z0-9]+` + alphanumeric = `[a-z0-9]+` // separator defines the separators allowed to be embedded in name - // components. This allow one period, one or two underscore and multiple + // components. This allows one period, one or two underscore and multiple // dashes. Repeated dashes and underscores are intentionally treated // differently. In order to support valid hostnames as name components, // supporting repeated dash was added. Additionally double underscore is @@ -16,33 +52,51 @@ var ( // supported names. separator = `(?:[._]|__|[-]*)` - // nameComponent restricts registry path component names to start - // with at least one letter or number, with following parts able to be - // separated by one period, one or two underscore and multiple dashes. - nameComponent = expression( - alphaNumeric, - optional(repeated(separator, alphaNumeric))) + // localhost is treated as a special value for domain-name. Any other + // domain-name without a "." or a ":port" are considered a path component. + localhost = `localhost` // domainNameComponent restricts the registry domain component of a // repository name to start with a component as defined by DomainRegexp. domainNameComponent = `(?:[a-zA-Z0-9]|[a-zA-Z0-9][a-zA-Z0-9-]*[a-zA-Z0-9])` + // optionalPort matches an optional port-number including the port separator + // (e.g. ":80"). + optionalPort = `(?::[0-9]+)?` + + // tag matches valid tag names. From docker/docker:graph/tags.go. + tag = `[\w][\w.-]{0,127}` + + // digestPat matches well-formed digests, including algorithm (e.g. "sha256:"). + // + // TODO(thaJeztah): this should follow the same rules as https://pkg.go.dev/github.com/opencontainers/go-digest@v1.0.0#DigestRegexp + // so that go-digest defines the canonical format. Note that the go-digest is + // more relaxed: + // - it allows multiple algorithms (e.g. "sha256+b64:") to allow + // future expansion of supported algorithms. + // - it allows the "" value to use urlsafe base64 encoding as defined + // in [rfc4648, section 5]. + // + // [rfc4648, section 5]: https://www.rfc-editor.org/rfc/rfc4648#section-5. + digestPat = `[A-Za-z][A-Za-z0-9]*(?:[-_+.][A-Za-z][A-Za-z0-9]*)*[:][[:xdigit:]]{32,}` + + // identifier is the format for a content addressable identifier using sha256. + // These identifiers are like digests without the algorithm, since sha256 is used. + identifier = `([a-f0-9]{64})` + // ipv6address are enclosed between square brackets and may be represented // in many ways, see rfc5952. Only IPv6 in compressed or uncompressed format // are allowed, IPv6 zone identifiers (rfc6874) or Special addresses such as // IPv4-Mapped are deliberately excluded. - ipv6address = expression( - literal(`[`), `(?:[a-fA-F0-9:]+)`, literal(`]`), - ) + ipv6address = `\[(?:[a-fA-F0-9:]+)\]` +) +var ( // domainName defines the structure of potential domain components // that may be part of image names. This is purposely a subset of what is // allowed by DNS to ensure backwards compatibility with Docker image // names. This includes IPv4 addresses on decimal format. - domainName = expression( - domainNameComponent, - optional(repeated(literal(`.`), domainNameComponent)), - ) + domainName = domainNameComponent + anyTimes(`\.`+domainNameComponent) // host defines the structure of potential domains based on the URI // Host subcomponent on rfc3986. It may be a subset of DNS domain name, @@ -53,117 +107,57 @@ var ( // allowed by the URI Host subcomponent on rfc3986 to ensure backwards // compatibility with Docker image names. - domain = expression( - host, - optional(literal(`:`), `[0-9]+`)) + domainAndPort = host + optionalPort - // DomainRegexp defines the structure of potential domain components - // that may be part of image names. This is purposely a subset of what is - // allowed by DNS to ensure backwards compatibility with Docker image - // names. - DomainRegexp = regexp.MustCompile(domain) - - tag = `[\w][\w.-]{0,127}` - // TagRegexp matches valid tag names. From docker/docker:graph/tags.go. - TagRegexp = regexp.MustCompile(tag) - - anchoredTag = anchored(tag) // anchoredTagRegexp matches valid tag names, anchored at the start and // end of the matched string. - anchoredTagRegexp = regexp.MustCompile(anchoredTag) + anchoredTagRegexp = regexp.MustCompile(anchored(tag)) - digestPat = `[A-Za-z][A-Za-z0-9]*(?:[-_+.][A-Za-z][A-Za-z0-9]*)*[:][[:xdigit:]]{32,}` - // DigestRegexp matches valid digests. - DigestRegexp = regexp.MustCompile(digestPat) - - anchoredDigest = anchored(digestPat) // anchoredDigestRegexp matches valid digests, anchored at the start and // end of the matched string. - anchoredDigestRegexp = regexp.MustCompile(anchoredDigest) + anchoredDigestRegexp = regexp.MustCompile(anchored(digestPat)) - namePat = expression( - optional(domain, literal(`/`)), - nameComponent, - optional(repeated(literal(`/`), nameComponent))) - // NameRegexp is the format for the name component of references. The - // regexp has capturing groups for the domain and name part omitting - // the separating forward slash from either. - NameRegexp = regexp.MustCompile(namePat) + // pathComponent restricts path-components to start with an alphanumeric + // character, with following parts able to be separated by a separator + // (one period, one or two underscore and multiple dashes). + pathComponent = alphanumeric + anyTimes(separator+alphanumeric) + + // remoteName matches the remote-name of a repository. It consists of one + // or more forward slash (/) delimited path-components: + // + // pathComponent[[/pathComponent] ...] // e.g., "library/ubuntu" + remoteName = pathComponent + anyTimes(`/`+pathComponent) + namePat = optional(domainAndPort+`/`) + remoteName - anchoredName = anchored( - optional(capture(domain), literal(`/`)), - capture(nameComponent, - optional(repeated(literal(`/`), nameComponent)))) // anchoredNameRegexp is used to parse a name value, capturing the // domain and trailing components. - anchoredNameRegexp = regexp.MustCompile(anchoredName) + anchoredNameRegexp = regexp.MustCompile(anchored(optional(capture(domainAndPort), `/`), capture(remoteName))) - referencePat = anchored(capture(namePat), - optional(literal(":"), capture(tag)), - optional(literal("@"), capture(digestPat))) - // ReferenceRegexp is the full supported format of a reference. The regexp - // is anchored and has capturing groups for name, tag, and digest - // components. - ReferenceRegexp = regexp.MustCompile(referencePat) + referencePat = anchored(capture(namePat), optional(`:`, capture(tag)), optional(`@`, capture(digestPat))) - identifier = `([a-f0-9]{64})` - // IdentifierRegexp is the format for string identifier used as a - // content addressable identifier using sha256. These identifiers - // are like digests without the algorithm, since sha256 is used. - IdentifierRegexp = regexp.MustCompile(identifier) - - anchoredIdentifier = anchored(identifier) // anchoredIdentifierRegexp is used to check or match an // identifier value, anchored at start and end of string. - anchoredIdentifierRegexp = regexp.MustCompile(anchoredIdentifier) + anchoredIdentifierRegexp = regexp.MustCompile(anchored(identifier)) ) -// literal compiles s into a literal regular expression, escaping any regexp -// reserved characters. -func literal(s string) string { - re := regexp.MustCompile(regexp.QuoteMeta(s)) - - if _, complete := re.LiteralPrefix(); !complete { - panic("must be a literal") - } - - return re.String() -} - -// expression defines a full expression, where each regular expression must -// follow the previous. -func expression(res ...string) string { - var s string - for _, re := range res { - s += re - } - - return s -} - // optional wraps the expression in a non-capturing group and makes the // production optional. func optional(res ...string) string { - return group(expression(res...)) + `?` + return `(?:` + strings.Join(res, "") + `)?` } -// repeated wraps the regexp in a non-capturing group to get one or more -// matches. -func repeated(res ...string) string { - return group(expression(res...)) + `+` -} - -// group wraps the regexp in a non-capturing group. -func group(res ...string) string { - return `(?:` + expression(res...) + `)` +// anyTimes wraps the expression in a non-capturing group that can occur +// any number of times. +func anyTimes(res ...string) string { + return `(?:` + strings.Join(res, "") + `)*` } // capture wraps the expression in a capturing group. func capture(res ...string) string { - return `(` + expression(res...) + `)` + return `(` + strings.Join(res, "") + `)` } // anchored anchors the regular expression by adding start and end delimiters. func anchored(res ...string) string { - return `^` + expression(res...) + `$` + return `^` + strings.Join(res, "") + `$` } diff --git a/reference/sort.go b/reference/sort.go index 2049c71a..416c37b0 100644 --- a/reference/sort.go +++ b/reference/sort.go @@ -20,14 +20,16 @@ import ( "sort" ) -// Sort sorts string references preferring higher information references +// Sort sorts string references preferring higher information references. +// // The precedence is as follows: -// 1. Name + Tag + Digest -// 2. Name + Tag -// 3. Name + Digest -// 4. Name -// 5. Digest -// 6. Parse error +// +// 1. [Named] + [Tagged] + [Digested] (e.g., "docker.io/library/busybox:latest@sha256:") +// 2. [Named] + [Tagged] (e.g., "docker.io/library/busybox:latest") +// 3. [Named] + [Digested] (e.g., "docker.io/library/busybo@sha256:") +// 4. [Named] (e.g., "docker.io/library/busybox") +// 5. [Digested] (e.g., "docker.io@sha256:") +// 6. Parse error func Sort(references []string) []string { var prefs []Reference var bad []string