diff --git a/reference/reference.go b/reference/reference.go index 7b2cc2e9..c3e77c6e 100644 --- a/reference/reference.go +++ b/reference/reference.go @@ -4,22 +4,16 @@ // Grammar // // reference := repository [ ":" tag ] [ "@" digest ] +// name := [hostname '/'] component ['/' component]* +// hostname := hostcomponent ['.' hostcomponent]* [':' port-number] +// hostcomponent := /([a-z0-9]|[a-z0-9][a-z0-9-]*[a-z0-9])/ +// port-number := /[0-9]+/ +// component := alpha-numeric [separator alpha-numeric]* +// alpha-numeric := /[a-z0-9]+/ +// separator := /[_.]|__|[-]*/ // -// // repository.go -// repository := hostname ['/' component]+ -// hostname := hostcomponent [':' port-number] -// component := subcomponent [separator subcomponent]* -// subcomponent := alpha-numeric ['-'* alpha-numeric]* -// hostcomponent := [hostpart '.']* hostpart -// alpha-numeric := /[a-z0-9]+/ -// separator := /([_.]|__)/ -// port-number := /[0-9]+/ -// hostpart := /([a-z0-9]|[a-z0-9][a-z0-9-]*[a-z0-9])/ -// -// // tag.go // tag := /[\w][\w.-]{0,127}/ // -// // from the digest package // digest := digest-algorithm ":" digest-hex // digest-algorithm := digest-algorithm-component [ digest-algorithm-separator digest-algorithm-component ] // digest-algorithm-separator := /[+.-_]/ diff --git a/reference/regexp.go b/reference/regexp.go index 06ca8db3..a4ffe5b6 100644 --- a/reference/regexp.go +++ b/reference/regexp.go @@ -3,47 +3,122 @@ package reference import "regexp" var ( - // nameSubComponentRegexp defines the part of the name which must be - // begin and end with an alphanumeric character. These characters can - // be separated by any number of dashes. - nameSubComponentRegexp = regexp.MustCompile(`[a-z0-9]+(?:[-]+[a-z0-9]+)*`) + // alphaNumericRegexp defines the alpha numeric atom, typically a + // component of names. This only allows lower case characters and digits. + alphaNumericRegexp = match(`[a-z0-9]+`) - // nameComponentRegexp restricts registry path component names to - // start with at least one letter or number, with following parts able to - // be separated by one period, underscore or double underscore. - nameComponentRegexp = regexp.MustCompile(nameSubComponentRegexp.String() + `(?:(?:[._]|__)` + nameSubComponentRegexp.String() + `)*`) + // separatorRegexp defines the separators allowed to be embedded in name + // components. This allow one period, one or two underscore and multiple + // dashes. + separatorRegexp = match(`(?:[._]|__|[-]*)`) - nameRegexp = regexp.MustCompile(`(?:` + nameComponentRegexp.String() + `/)*` + nameComponentRegexp.String()) + // nameComponentRegexp restricts registry path component names to start + // with at least one letter or number, with following parts able to be + // separated by one period, one or two underscore and multiple dashes. + nameComponentRegexp = expression( + alphaNumericRegexp, + optional(repeated(separatorRegexp, alphaNumericRegexp))) - hostnameComponentRegexp = regexp.MustCompile(`(?:[a-z0-9]|[a-z0-9][a-z0-9-]*[a-z0-9])`) + // hostnameComponentRegexp restricts the registry hostname component of a + // repository name to start with a component as defined by hostnameRegexp + // and followed by an optional port. + hostnameComponentRegexp = match(`(?:[a-z0-9]|[a-z0-9][a-z0-9-]*[a-z0-9])`) - // hostnameComponentRegexp restricts the registry hostname component of a repository name to - // start with a component as defined by hostnameRegexp and followed by an optional port. - hostnameRegexp = regexp.MustCompile(`(?:` + hostnameComponentRegexp.String() + `\.)*` + hostnameComponentRegexp.String() + `(?::[0-9]+)?`) + // hostnameRegexp defines the structure of potential hostname components + // that may be part of image names. This is purposely a subset of what is + // allowed by DNS to ensure backwards compatibility with Docker image + // names. + hostnameRegexp = expression( + hostnameComponentRegexp, + optional(repeated(literal(`.`), hostnameComponentRegexp)), + optional(literal(`:`), match(`[0-9]+`))) // TagRegexp matches valid tag names. From docker/docker:graph/tags.go. - TagRegexp = regexp.MustCompile(`[\w][\w.-]{0,127}`) + TagRegexp = match(`[\w][\w.-]{0,127}`) // anchoredTagRegexp matches valid tag names, anchored at the start and // end of the matched string. - anchoredTagRegexp = regexp.MustCompile(`^` + TagRegexp.String() + `$`) + anchoredTagRegexp = anchored(TagRegexp) // DigestRegexp matches valid digests. - DigestRegexp = regexp.MustCompile(`[A-Za-z][A-Za-z0-9]*(?:[-_+.][A-Za-z][A-Za-z0-9]*)*[:][[:xdigit:]]{32,}`) + DigestRegexp = match(`[A-Za-z][A-Za-z0-9]*(?:[-_+.][A-Za-z][A-Za-z0-9]*)*[:][[:xdigit:]]{32,}`) // anchoredDigestRegexp matches valid digests, anchored at the start and // end of the matched string. - anchoredDigestRegexp = regexp.MustCompile(`^` + DigestRegexp.String() + `$`) + anchoredDigestRegexp = anchored(DigestRegexp) // NameRegexp is the format for the name component of references. The // regexp has capturing groups for the hostname and name part omitting // the seperating forward slash from either. - NameRegexp = regexp.MustCompile(`(?:` + hostnameRegexp.String() + `/)?` + nameRegexp.String()) + NameRegexp = expression( + optional(hostnameRegexp, literal(`/`)), + nameComponentRegexp, + optional(repeated(literal(`/`), nameComponentRegexp))) - // ReferenceRegexp is the full supported format of a reference. The - // regexp has capturing groups for name, tag, and digest components. - ReferenceRegexp = regexp.MustCompile(`^((?:` + hostnameRegexp.String() + `/)?` + nameRegexp.String() + `)(?:[:](` + TagRegexp.String() + `))?(?:[@](` + DigestRegexp.String() + `))?$`) + // anchoredNameRegexp is used to parse a name value, capturing the + // hostname and trailing components. + anchoredNameRegexp = anchored( + optional(capture(hostnameRegexp), literal(`/`)), + capture(nameComponentRegexp, + optional(repeated(literal(`/`), nameComponentRegexp)))) - // anchoredNameRegexp is used to parse a name value, capturing hostname - anchoredNameRegexp = regexp.MustCompile(`^(?:(` + hostnameRegexp.String() + `)/)?(` + nameRegexp.String() + `)$`) + // ReferenceRegexp is the full supported format of a reference. The regexp + // is anchored and has capturing groups for name, tag, and digest + // components. + ReferenceRegexp = anchored(capture(NameRegexp), + optional(literal(":"), capture(TagRegexp)), + optional(literal("@"), capture(DigestRegexp))) ) + +// match compiles the string to a regular expression. +var match = regexp.MustCompile + +// literal compiles s into a literal regular expression, escaping any regexp +// reserved characters. +func literal(s string) *regexp.Regexp { + re := match(regexp.QuoteMeta(s)) + + if _, complete := re.LiteralPrefix(); !complete { + panic("must be a literal") + } + + return re +} + +// expression defines a full expression, where each regular expression must +// follow the previous. +func expression(res ...*regexp.Regexp) *regexp.Regexp { + var s string + for _, re := range res { + s += re.String() + } + + return match(s) +} + +// optional wraps the expression in a non-capturing group and makes the +// production optional. +func optional(res ...*regexp.Regexp) *regexp.Regexp { + return match(group(expression(res...)).String() + `?`) +} + +// repeated wraps the regexp in a non-capturing group to get one or more +// matches. +func repeated(res ...*regexp.Regexp) *regexp.Regexp { + return match(group(expression(res...)).String() + `+`) +} + +// group wraps the regexp in a non-capturing group. +func group(res ...*regexp.Regexp) *regexp.Regexp { + return match(`(?:` + expression(res...).String() + `)`) +} + +// capture wraps the expression in a capturing group. +func capture(res ...*regexp.Regexp) *regexp.Regexp { + return match(`(` + expression(res...).String() + `)`) +} + +// anchored anchors the regular expression by adding start and end delimiters. +func anchored(res ...*regexp.Regexp) *regexp.Regexp { + return match(`^` + expression(res...).String() + `$`) +} diff --git a/reference/regexp_test.go b/reference/regexp_test.go index 530a6eb6..33944918 100644 --- a/reference/regexp_test.go +++ b/reference/regexp_test.go @@ -119,6 +119,11 @@ func TestHostRegexp(t *testing.T) { } func TestFullNameRegexp(t *testing.T) { + if anchoredNameRegexp.NumSubexp() != 2 { + t.Fatalf("anchored name regexp should have two submatches: %v, %v != 2", + anchoredNameRegexp, anchoredNameRegexp.NumSubexp()) + } + testcases := []regexpMatch{ { input: "", @@ -401,6 +406,11 @@ func TestFullNameRegexp(t *testing.T) { } func TestReferenceRegexp(t *testing.T) { + if ReferenceRegexp.NumSubexp() != 3 { + t.Fatalf("anchored name regexp should have three submatches: %v, %v != 3", + ReferenceRegexp, ReferenceRegexp.NumSubexp()) + } + testcases := []regexpMatch{ { input: "registry.com:8080/myapp:tag",