reference: refactor grammar and regular expressions
To resolve some inconsistencies between the grammar and the regular expressions, the grammar has been slightly refactored to match the intent. The regular expressions have been redefined with the same structure to make it easier to verify the grammar is correct. Signed-off-by: Stephen J Day <stephen.day@docker.com>
This commit is contained in:
parent
568bf038af
commit
3150937b9f
3 changed files with 115 additions and 36 deletions
|
@ -4,22 +4,16 @@
|
|||
// Grammar
|
||||
//
|
||||
// reference := repository [ ":" tag ] [ "@" digest ]
|
||||
// name := [hostname '/'] component ['/' component]*
|
||||
// hostname := hostcomponent ['.' hostcomponent]* [':' port-number]
|
||||
// hostcomponent := /([a-z0-9]|[a-z0-9][a-z0-9-]*[a-z0-9])/
|
||||
// port-number := /[0-9]+/
|
||||
// component := alpha-numeric [separator alpha-numeric]*
|
||||
// alpha-numeric := /[a-z0-9]+/
|
||||
// separator := /[_.]|__|[-]*/
|
||||
//
|
||||
// // repository.go
|
||||
// repository := hostname ['/' component]+
|
||||
// hostname := hostcomponent [':' port-number]
|
||||
// component := subcomponent [separator subcomponent]*
|
||||
// subcomponent := alpha-numeric ['-'* alpha-numeric]*
|
||||
// hostcomponent := [hostpart '.']* hostpart
|
||||
// alpha-numeric := /[a-z0-9]+/
|
||||
// separator := /([_.]|__)/
|
||||
// port-number := /[0-9]+/
|
||||
// hostpart := /([a-z0-9]|[a-z0-9][a-z0-9-]*[a-z0-9])/
|
||||
//
|
||||
// // tag.go
|
||||
// tag := /[\w][\w.-]{0,127}/
|
||||
//
|
||||
// // from the digest package
|
||||
// digest := digest-algorithm ":" digest-hex
|
||||
// digest-algorithm := digest-algorithm-component [ digest-algorithm-separator digest-algorithm-component ]
|
||||
// digest-algorithm-separator := /[+.-_]/
|
||||
|
|
|
@ -3,47 +3,122 @@ package reference
|
|||
import "regexp"
|
||||
|
||||
var (
|
||||
// nameSubComponentRegexp defines the part of the name which must be
|
||||
// begin and end with an alphanumeric character. These characters can
|
||||
// be separated by any number of dashes.
|
||||
nameSubComponentRegexp = regexp.MustCompile(`[a-z0-9]+(?:[-]+[a-z0-9]+)*`)
|
||||
// alphaNumericRegexp defines the alpha numeric atom, typically a
|
||||
// component of names. This only allows lower case characters and digits.
|
||||
alphaNumericRegexp = match(`[a-z0-9]+`)
|
||||
|
||||
// nameComponentRegexp restricts registry path component names to
|
||||
// start with at least one letter or number, with following parts able to
|
||||
// be separated by one period, underscore or double underscore.
|
||||
nameComponentRegexp = regexp.MustCompile(nameSubComponentRegexp.String() + `(?:(?:[._]|__)` + nameSubComponentRegexp.String() + `)*`)
|
||||
// separatorRegexp defines the separators allowed to be embedded in name
|
||||
// components. This allow one period, one or two underscore and multiple
|
||||
// dashes.
|
||||
separatorRegexp = match(`(?:[._]|__|[-]*)`)
|
||||
|
||||
nameRegexp = regexp.MustCompile(`(?:` + nameComponentRegexp.String() + `/)*` + nameComponentRegexp.String())
|
||||
// nameComponentRegexp restricts registry path component names to start
|
||||
// with at least one letter or number, with following parts able to be
|
||||
// separated by one period, one or two underscore and multiple dashes.
|
||||
nameComponentRegexp = expression(
|
||||
alphaNumericRegexp,
|
||||
optional(repeated(separatorRegexp, alphaNumericRegexp)))
|
||||
|
||||
hostnameComponentRegexp = regexp.MustCompile(`(?:[a-z0-9]|[a-z0-9][a-z0-9-]*[a-z0-9])`)
|
||||
// hostnameComponentRegexp restricts the registry hostname component of a
|
||||
// repository name to start with a component as defined by hostnameRegexp
|
||||
// and followed by an optional port.
|
||||
hostnameComponentRegexp = match(`(?:[a-z0-9]|[a-z0-9][a-z0-9-]*[a-z0-9])`)
|
||||
|
||||
// hostnameComponentRegexp restricts the registry hostname component of a repository name to
|
||||
// start with a component as defined by hostnameRegexp and followed by an optional port.
|
||||
hostnameRegexp = regexp.MustCompile(`(?:` + hostnameComponentRegexp.String() + `\.)*` + hostnameComponentRegexp.String() + `(?::[0-9]+)?`)
|
||||
// hostnameRegexp defines the structure of potential hostname components
|
||||
// that may be part of image names. This is purposely a subset of what is
|
||||
// allowed by DNS to ensure backwards compatibility with Docker image
|
||||
// names.
|
||||
hostnameRegexp = expression(
|
||||
hostnameComponentRegexp,
|
||||
optional(repeated(literal(`.`), hostnameComponentRegexp)),
|
||||
optional(literal(`:`), match(`[0-9]+`)))
|
||||
|
||||
// TagRegexp matches valid tag names. From docker/docker:graph/tags.go.
|
||||
TagRegexp = regexp.MustCompile(`[\w][\w.-]{0,127}`)
|
||||
TagRegexp = match(`[\w][\w.-]{0,127}`)
|
||||
|
||||
// anchoredTagRegexp matches valid tag names, anchored at the start and
|
||||
// end of the matched string.
|
||||
anchoredTagRegexp = regexp.MustCompile(`^` + TagRegexp.String() + `$`)
|
||||
anchoredTagRegexp = anchored(TagRegexp)
|
||||
|
||||
// DigestRegexp matches valid digests.
|
||||
DigestRegexp = regexp.MustCompile(`[A-Za-z][A-Za-z0-9]*(?:[-_+.][A-Za-z][A-Za-z0-9]*)*[:][[:xdigit:]]{32,}`)
|
||||
DigestRegexp = match(`[A-Za-z][A-Za-z0-9]*(?:[-_+.][A-Za-z][A-Za-z0-9]*)*[:][[:xdigit:]]{32,}`)
|
||||
|
||||
// anchoredDigestRegexp matches valid digests, anchored at the start and
|
||||
// end of the matched string.
|
||||
anchoredDigestRegexp = regexp.MustCompile(`^` + DigestRegexp.String() + `$`)
|
||||
anchoredDigestRegexp = anchored(DigestRegexp)
|
||||
|
||||
// NameRegexp is the format for the name component of references. The
|
||||
// regexp has capturing groups for the hostname and name part omitting
|
||||
// the seperating forward slash from either.
|
||||
NameRegexp = regexp.MustCompile(`(?:` + hostnameRegexp.String() + `/)?` + nameRegexp.String())
|
||||
NameRegexp = expression(
|
||||
optional(hostnameRegexp, literal(`/`)),
|
||||
nameComponentRegexp,
|
||||
optional(repeated(literal(`/`), nameComponentRegexp)))
|
||||
|
||||
// ReferenceRegexp is the full supported format of a reference. The
|
||||
// regexp has capturing groups for name, tag, and digest components.
|
||||
ReferenceRegexp = regexp.MustCompile(`^((?:` + hostnameRegexp.String() + `/)?` + nameRegexp.String() + `)(?:[:](` + TagRegexp.String() + `))?(?:[@](` + DigestRegexp.String() + `))?$`)
|
||||
// anchoredNameRegexp is used to parse a name value, capturing the
|
||||
// hostname and trailing components.
|
||||
anchoredNameRegexp = anchored(
|
||||
optional(capture(hostnameRegexp), literal(`/`)),
|
||||
capture(nameComponentRegexp,
|
||||
optional(repeated(literal(`/`), nameComponentRegexp))))
|
||||
|
||||
// anchoredNameRegexp is used to parse a name value, capturing hostname
|
||||
anchoredNameRegexp = regexp.MustCompile(`^(?:(` + hostnameRegexp.String() + `)/)?(` + nameRegexp.String() + `)$`)
|
||||
// ReferenceRegexp is the full supported format of a reference. The regexp
|
||||
// is anchored and has capturing groups for name, tag, and digest
|
||||
// components.
|
||||
ReferenceRegexp = anchored(capture(NameRegexp),
|
||||
optional(literal(":"), capture(TagRegexp)),
|
||||
optional(literal("@"), capture(DigestRegexp)))
|
||||
)
|
||||
|
||||
// match compiles the string to a regular expression.
|
||||
var match = regexp.MustCompile
|
||||
|
||||
// literal compiles s into a literal regular expression, escaping any regexp
|
||||
// reserved characters.
|
||||
func literal(s string) *regexp.Regexp {
|
||||
re := match(regexp.QuoteMeta(s))
|
||||
|
||||
if _, complete := re.LiteralPrefix(); !complete {
|
||||
panic("must be a literal")
|
||||
}
|
||||
|
||||
return re
|
||||
}
|
||||
|
||||
// expression defines a full expression, where each regular expression must
|
||||
// follow the previous.
|
||||
func expression(res ...*regexp.Regexp) *regexp.Regexp {
|
||||
var s string
|
||||
for _, re := range res {
|
||||
s += re.String()
|
||||
}
|
||||
|
||||
return match(s)
|
||||
}
|
||||
|
||||
// optional wraps the expression in a non-capturing group and makes the
|
||||
// production optional.
|
||||
func optional(res ...*regexp.Regexp) *regexp.Regexp {
|
||||
return match(group(expression(res...)).String() + `?`)
|
||||
}
|
||||
|
||||
// repeated wraps the regexp in a non-capturing group to get one or more
|
||||
// matches.
|
||||
func repeated(res ...*regexp.Regexp) *regexp.Regexp {
|
||||
return match(group(expression(res...)).String() + `+`)
|
||||
}
|
||||
|
||||
// group wraps the regexp in a non-capturing group.
|
||||
func group(res ...*regexp.Regexp) *regexp.Regexp {
|
||||
return match(`(?:` + expression(res...).String() + `)`)
|
||||
}
|
||||
|
||||
// capture wraps the expression in a capturing group.
|
||||
func capture(res ...*regexp.Regexp) *regexp.Regexp {
|
||||
return match(`(` + expression(res...).String() + `)`)
|
||||
}
|
||||
|
||||
// anchored anchors the regular expression by adding start and end delimiters.
|
||||
func anchored(res ...*regexp.Regexp) *regexp.Regexp {
|
||||
return match(`^` + expression(res...).String() + `$`)
|
||||
}
|
||||
|
|
|
@ -119,6 +119,11 @@ func TestHostRegexp(t *testing.T) {
|
|||
}
|
||||
|
||||
func TestFullNameRegexp(t *testing.T) {
|
||||
if anchoredNameRegexp.NumSubexp() != 2 {
|
||||
t.Fatalf("anchored name regexp should have two submatches: %v, %v != 2",
|
||||
anchoredNameRegexp, anchoredNameRegexp.NumSubexp())
|
||||
}
|
||||
|
||||
testcases := []regexpMatch{
|
||||
{
|
||||
input: "",
|
||||
|
@ -401,6 +406,11 @@ func TestFullNameRegexp(t *testing.T) {
|
|||
}
|
||||
|
||||
func TestReferenceRegexp(t *testing.T) {
|
||||
if ReferenceRegexp.NumSubexp() != 3 {
|
||||
t.Fatalf("anchored name regexp should have three submatches: %v, %v != 3",
|
||||
ReferenceRegexp, ReferenceRegexp.NumSubexp())
|
||||
}
|
||||
|
||||
testcases := []regexpMatch{
|
||||
{
|
||||
input: "registry.com:8080/myapp:tag",
|
||||
|
|
Loading…
Reference in a new issue