srvdav/vendor/golang.org/x/net/lex/httplex/httplex.go

// Copyright 2016 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.

// Package httplex contains rules around lexical matters of various
// HTTP-related specifications.
//
// This package is shared by the standard library (which vendors it)
// and x/net/http2. It comes with no API stability promise.
package httplex

import (
	"net"
	"strings"
	"unicode/utf8"

	"golang.org/x/net/idna"
)

var isTokenTable = [127]bool{
	'!':  true,
	'#':  true,
	'$':  true,
	'%':  true,
	'&':  true,
	'\'': true,
	'*':  true,
	'+':  true,
	'-':  true,
	'.':  true,
	'0':  true,
	'1':  true,
	'2':  true,
	'3':  true,
	'4':  true,
	'5':  true,
	'6':  true,
	'7':  true,
	'8':  true,
	'9':  true,
	'A':  true,
	'B':  true,
	'C':  true,
	'D':  true,
	'E':  true,
	'F':  true,
	'G':  true,
	'H':  true,
	'I':  true,
	'J':  true,
	'K':  true,
	'L':  true,
	'M':  true,
	'N':  true,
	'O':  true,
	'P':  true,
	'Q':  true,
	'R':  true,
	'S':  true,
	'T':  true,
	'U':  true,
	'W':  true,
	'V':  true,
	'X':  true,
	'Y':  true,
	'Z':  true,
	'^':  true,
	'_':  true,
	'`':  true,
	'a':  true,
	'b':  true,
	'c':  true,
	'd':  true,
	'e':  true,
	'f':  true,
	'g':  true,
	'h':  true,
	'i':  true,
	'j':  true,
	'k':  true,
	'l':  true,
	'm':  true,
	'n':  true,
	'o':  true,
	'p':  true,
	'q':  true,
	'r':  true,
	's':  true,
	't':  true,
	'u':  true,
	'v':  true,
	'w':  true,
	'x':  true,
	'y':  true,
	'z':  true,
	'|':  true,
	'~':  true,
}

func IsTokenRune(r rune) bool {
	i := int(r)
	return i < len(isTokenTable) && isTokenTable[i]
}

func isNotToken(r rune) bool {
	return !IsTokenRune(r)
}

// HeaderValuesContainsToken reports whether any string in values
// contains the provided token, ASCII case-insensitively.
func HeaderValuesContainsToken(values []string, token string) bool {
	for _, v := range values {
		if headerValueContainsToken(v, token) {
			return true
		}
	}
	return false
}

// isOWS reports whether b is an optional whitespace byte, as defined
// by RFC 7230 section 3.2.3.
func isOWS(b byte) bool { return b == ' ' || b == '\t' }

// trimOWS returns x with all optional whitespace removes from the
// beginning and end.
func trimOWS(x string) string {
	// TODO: consider using strings.Trim(x, " \t") instead,
	// if and when it's fast enough. See issue 10292.
	// But this ASCII-only code will probably always beat UTF-8
	// aware code.
	for len(x) > 0 && isOWS(x[0]) {
		x = x[1:]
	}
	for len(x) > 0 && isOWS(x[len(x)-1]) {
		x = x[:len(x)-1]
	}
	return x
}

// headerValueContainsToken reports whether v (assumed to be a
// 0#element, in the ABNF extension described in RFC 7230 section 7)
// contains token amongst its comma-separated tokens, ASCII
// case-insensitively.
func headerValueContainsToken(v string, token string) bool {
	v = trimOWS(v)
	if comma := strings.IndexByte(v, ','); comma != -1 {
		return tokenEqual(trimOWS(v[:comma]), token) || headerValueContainsToken(v[comma+1:], token)
	}
	return tokenEqual(v, token)
}

// lowerASCII returns the ASCII lowercase version of b.
func lowerASCII(b byte) byte {
	if 'A' <= b && b <= 'Z' {
		return b + ('a' - 'A')
	}
	return b
}

// tokenEqual reports whether t1 and t2 are equal, ASCII case-insensitively.
func tokenEqual(t1, t2 string) bool {
	if len(t1) != len(t2) {
		return false
	}
	for i, b := range t1 {
		if b >= utf8.RuneSelf {
			// No UTF-8 or non-ASCII allowed in tokens.
			return false
		}
		if lowerASCII(byte(b)) != lowerASCII(t2[i]) {
			return false
		}
	}
	return true
}

// isLWS reports whether b is linear white space, according
// to http://www.w3.org/Protocols/rfc2616/rfc2616-sec2.html#sec2.2
//      LWS            = [CRLF] 1*( SP | HT )
func isLWS(b byte) bool { return b == ' ' || b == '\t' }

// isCTL reports whether b is a control byte, according
// to http://www.w3.org/Protocols/rfc2616/rfc2616-sec2.html#sec2.2
//      CTL            = <any US-ASCII control character
//                       (octets 0 - 31) and DEL (127)>
func isCTL(b byte) bool {
	const del = 0x7f // a CTL
	return b < ' ' || b == del
}

// ValidHeaderFieldName reports whether v is a valid HTTP/1.x header name.
// HTTP/2 imposes the additional restriction that uppercase ASCII
// letters are not allowed.
//
//  RFC 7230 says:
//   header-field   = field-name ":" OWS field-value OWS
//   field-name     = token
//   token          = 1*tchar
//   tchar = "!" / "#" / "$" / "%" / "&" / "'" / "*" / "+" / "-" / "." /
//           "^" / "_" / "`" / "|" / "~" / DIGIT / ALPHA
func ValidHeaderFieldName(v string) bool {
	if len(v) == 0 {
		return false
	}
	for _, r := range v {
		if !IsTokenRune(r) {
			return false
		}
	}
	return true
}

// ValidHostHeader reports whether h is a valid host header.
func ValidHostHeader(h string) bool {
	// The latest spec is actually this:
	//
	// http://tools.ietf.org/html/rfc7230#section-5.4
	//     Host = uri-host [ ":" port ]
	//
	// Where uri-host is:
	//     http://tools.ietf.org/html/rfc3986#section-3.2.2
	//
	// But we're going to be much more lenient for now and just
	// search for any byte that's not a valid byte in any of those
	// expressions.
	for i := 0; i < len(h); i++ {
		if !validHostByte[h[i]] {
			return false
		}
	}
	return true
}

// See the validHostHeader comment.
var validHostByte = [256]bool{
	'0': true, '1': true, '2': true, '3': true, '4': true, '5': true, '6': true, '7': true,
	'8': true, '9': true,

	'a': true, 'b': true, 'c': true, 'd': true, 'e': true, 'f': true, 'g': true, 'h': true,
	'i': true, 'j': true, 'k': true, 'l': true, 'm': true, 'n': true, 'o': true, 'p': true,
	'q': true, 'r': true, 's': true, 't': true, 'u': true, 'v': true, 'w': true, 'x': true,
	'y': true, 'z': true,

	'A': true, 'B': true, 'C': true, 'D': true, 'E': true, 'F': true, 'G': true, 'H': true,
	'I': true, 'J': true, 'K': true, 'L': true, 'M': true, 'N': true, 'O': true, 'P': true,
	'Q': true, 'R': true, 'S': true, 'T': true, 'U': true, 'V': true, 'W': true, 'X': true,
	'Y': true, 'Z': true,

	'!':  true, // sub-delims
	'$':  true, // sub-delims
	'%':  true, // pct-encoded (and used in IPv6 zones)
	'&':  true, // sub-delims
	'(':  true, // sub-delims
	')':  true, // sub-delims
	'*':  true, // sub-delims
	'+':  true, // sub-delims
	',':  true, // sub-delims
	'-':  true, // unreserved
	'.':  true, // unreserved
	':':  true, // IPv6address + Host expression's optional port
	';':  true, // sub-delims
	'=':  true, // sub-delims
	'[':  true,
	'\'': true, // sub-delims
	']':  true,
	'_':  true, // unreserved
	'~':  true, // unreserved
}

// ValidHeaderFieldValue reports whether v is a valid "field-value" according to
// http://www.w3.org/Protocols/rfc2616/rfc2616-sec4.html#sec4.2 :
//
//        message-header = field-name ":" [ field-value ]
//        field-value    = *( field-content | LWS )
//        field-content  = <the OCTETs making up the field-value
//                         and consisting of either *TEXT or combinations
//                         of token, separators, and quoted-string>
//
// http://www.w3.org/Protocols/rfc2616/rfc2616-sec2.html#sec2.2 :
//
//        TEXT           = <any OCTET except CTLs,
//                          but including LWS>
//        LWS            = [CRLF] 1*( SP | HT )
//        CTL            = <any US-ASCII control character
//                         (octets 0 - 31) and DEL (127)>
//
// RFC 7230 says:
//  field-value    = *( field-content / obs-fold )
//  obj-fold       =  N/A to http2, and deprecated
//  field-content  = field-vchar [ 1*( SP / HTAB ) field-vchar ]
//  field-vchar    = VCHAR / obs-text
//  obs-text       = %x80-FF
//  VCHAR          = "any visible [USASCII] character"
//
// http2 further says: "Similarly, HTTP/2 allows header field values
// that are not valid. While most of the values that can be encoded
// will not alter header field parsing, carriage return (CR, ASCII
// 0xd), line feed (LF, ASCII 0xa), and the zero character (NUL, ASCII
// 0x0) might be exploited by an attacker if they are translated
// verbatim. Any request or response that contains a character not
// permitted in a header field value MUST be treated as malformed
// (Section 8.1.2.6). Valid characters are defined by the
// field-content ABNF rule in Section 3.2 of [RFC7230]."
//
// This function does not (yet?) properly handle the rejection of
// strings that begin or end with SP or HTAB.
func ValidHeaderFieldValue(v string) bool {
	for i := 0; i < len(v); i++ {
		b := v[i]
		if isCTL(b) && !isLWS(b) {
			return false
		}
	}
	return true
}

func isASCII(s string) bool {
	for i := 0; i < len(s); i++ {
		if s[i] >= utf8.RuneSelf {
			return false
		}
	}
	return true
}

// PunycodeHostPort returns the IDNA Punycode version
// of the provided "host" or "host:port" string.
func PunycodeHostPort(v string) (string, error) {
	if isASCII(v) {
		return v, nil
	}

	host, port, err := net.SplitHostPort(v)
	if err != nil {
		// The input 'v' argument was just a "host" argument,
		// without a port. This error should not be returned
		// to the caller.
		host = v
		port = ""
	}
	host, err = idna.ToASCII(host)
	if err != nil {
		// Non-UTF-8? Not representable in Punycode, in any
		// case.
		return "", err
	}
	if port == "" {
		return host, nil
	}
	return net.JoinHostPort(host, port), nil
}
vendor: dep init Signed-off-by: Vincent Batts <vbatts@hashbangbash.com> 2018-04-04 00:33:38 +00:00			`// Copyright 2016 The Go Authors. All rights reserved.`
			`// Use of this source code is governed by a BSD-style`
			`// license that can be found in the LICENSE file.`

			`// Package httplex contains rules around lexical matters of various`
			`// HTTP-related specifications.`
			`//`
			`// This package is shared by the standard library (which vendors it)`
			`// and x/net/http2. It comes with no API stability promise.`
			`package httplex`

			`import (`
			`"net"`
			`"strings"`
			`"unicode/utf8"`

			`"golang.org/x/net/idna"`
			`)`

			`var isTokenTable = [127]bool{`
			`'!': true,`
			`'#': true,`
			`'$': true,`
			`'%': true,`
			`'&': true,`
			`'\'': true,`
			`'*': true,`
			`'+': true,`
			`'-': true,`
			`'.': true,`
			`'0': true,`
			`'1': true,`
			`'2': true,`
			`'3': true,`
			`'4': true,`
			`'5': true,`
			`'6': true,`
			`'7': true,`
			`'8': true,`
			`'9': true,`
			`'A': true,`
			`'B': true,`
			`'C': true,`
			`'D': true,`
			`'E': true,`
			`'F': true,`
			`'G': true,`
			`'H': true,`
			`'I': true,`
			`'J': true,`
			`'K': true,`
			`'L': true,`
			`'M': true,`
			`'N': true,`
			`'O': true,`
			`'P': true,`
			`'Q': true,`
			`'R': true,`
			`'S': true,`
			`'T': true,`
			`'U': true,`
			`'W': true,`
			`'V': true,`
			`'X': true,`
			`'Y': true,`
			`'Z': true,`
			`'^': true,`
			`'_': true,`
			'`': true,
			`'a': true,`
			`'b': true,`
			`'c': true,`
			`'d': true,`
			`'e': true,`
			`'f': true,`
			`'g': true,`
			`'h': true,`
			`'i': true,`
			`'j': true,`
			`'k': true,`
			`'l': true,`
			`'m': true,`
			`'n': true,`
			`'o': true,`
			`'p': true,`
			`'q': true,`
			`'r': true,`
			`'s': true,`
			`'t': true,`
			`'u': true,`
			`'v': true,`
			`'w': true,`
			`'x': true,`
			`'y': true,`
			`'z': true,`
			`'\|': true,`
			`'~': true,`
			`}`

			`func IsTokenRune(r rune) bool {`
			`i := int(r)`
			`return i < len(isTokenTable) && isTokenTable[i]`
			`}`

			`func isNotToken(r rune) bool {`
			`return !IsTokenRune(r)`
			`}`

			`// HeaderValuesContainsToken reports whether any string in values`
			`// contains the provided token, ASCII case-insensitively.`
			`func HeaderValuesContainsToken(values []string, token string) bool {`
			`for _, v := range values {`
			`if headerValueContainsToken(v, token) {`
			`return true`
			`}`
			`}`
			`return false`
			`}`

			`// isOWS reports whether b is an optional whitespace byte, as defined`
			`// by RFC 7230 section 3.2.3.`
			`func isOWS(b byte) bool { return b == ' ' \|\| b == '\t' }`

			`// trimOWS returns x with all optional whitespace removes from the`
			`// beginning and end.`
			`func trimOWS(x string) string {`
			`// TODO: consider using strings.Trim(x, " \t") instead,`
			`// if and when it's fast enough. See issue 10292.`
			`// But this ASCII-only code will probably always beat UTF-8`
			`// aware code.`
			`for len(x) > 0 && isOWS(x[0]) {`
			`x = x[1:]`
			`}`
			`for len(x) > 0 && isOWS(x[len(x)-1]) {`
			`x = x[:len(x)-1]`
			`}`
			`return x`
			`}`

			`// headerValueContainsToken reports whether v (assumed to be a`
			`// 0#element, in the ABNF extension described in RFC 7230 section 7)`
			`// contains token amongst its comma-separated tokens, ASCII`
			`// case-insensitively.`
			`func headerValueContainsToken(v string, token string) bool {`
			`v = trimOWS(v)`
			`if comma := strings.IndexByte(v, ','); comma != -1 {`
			`return tokenEqual(trimOWS(v[:comma]), token) \|\| headerValueContainsToken(v[comma+1:], token)`
			`}`
			`return tokenEqual(v, token)`
			`}`

			`// lowerASCII returns the ASCII lowercase version of b.`
			`func lowerASCII(b byte) byte {`
			`if 'A' <= b && b <= 'Z' {`
			`return b + ('a' - 'A')`
			`}`
			`return b`
			`}`

			`// tokenEqual reports whether t1 and t2 are equal, ASCII case-insensitively.`
			`func tokenEqual(t1, t2 string) bool {`
			`if len(t1) != len(t2) {`
			`return false`
			`}`
			`for i, b := range t1 {`
			`if b >= utf8.RuneSelf {`
			`// No UTF-8 or non-ASCII allowed in tokens.`
			`return false`
			`}`
			`if lowerASCII(byte(b)) != lowerASCII(t2[i]) {`
			`return false`
			`}`
			`}`
			`return true`
			`}`

			`// isLWS reports whether b is linear white space, according`
			`// to http://www.w3.org/Protocols/rfc2616/rfc2616-sec2.html#sec2.2`
			`// LWS = [CRLF] 1*( SP \| HT )`
			`func isLWS(b byte) bool { return b == ' ' \|\| b == '\t' }`

			`// isCTL reports whether b is a control byte, according`
			`// to http://www.w3.org/Protocols/rfc2616/rfc2616-sec2.html#sec2.2`
			`// CTL = <any US-ASCII control character`
			`// (octets 0 - 31) and DEL (127)>`
			`func isCTL(b byte) bool {`
			`const del = 0x7f // a CTL`
			`return b < ' ' \|\| b == del`
			`}`

			`// ValidHeaderFieldName reports whether v is a valid HTTP/1.x header name.`
			`// HTTP/2 imposes the additional restriction that uppercase ASCII`
			`// letters are not allowed.`
			`//`
			`// RFC 7230 says:`
			`// header-field = field-name ":" OWS field-value OWS`
			`// field-name = token`
			`// token = 1*tchar`
			`// tchar = "!" / "#" / "$" / "%" / "&" / "'" / "*" / "+" / "-" / "." /`
			// "^" / "_" / "`" / "\|" / "~" / DIGIT / ALPHA
			`func ValidHeaderFieldName(v string) bool {`
			`if len(v) == 0 {`
			`return false`
			`}`
			`for _, r := range v {`
			`if !IsTokenRune(r) {`
			`return false`
			`}`
			`}`
			`return true`
			`}`

			`// ValidHostHeader reports whether h is a valid host header.`
			`func ValidHostHeader(h string) bool {`
			`// The latest spec is actually this:`
			`//`
			`// http://tools.ietf.org/html/rfc7230#section-5.4`
			`// Host = uri-host [ ":" port ]`
			`//`
			`// Where uri-host is:`
			`// http://tools.ietf.org/html/rfc3986#section-3.2.2`
			`//`
			`// But we're going to be much more lenient for now and just`
			`// search for any byte that's not a valid byte in any of those`
			`// expressions.`
			`for i := 0; i < len(h); i++ {`
			`if !validHostByte[h[i]] {`
			`return false`
			`}`
			`}`
			`return true`
			`}`

			`// See the validHostHeader comment.`
			`var validHostByte = [256]bool{`
			`'0': true, '1': true, '2': true, '3': true, '4': true, '5': true, '6': true, '7': true,`
			`'8': true, '9': true,`

			`'a': true, 'b': true, 'c': true, 'd': true, 'e': true, 'f': true, 'g': true, 'h': true,`
			`'i': true, 'j': true, 'k': true, 'l': true, 'm': true, 'n': true, 'o': true, 'p': true,`
			`'q': true, 'r': true, 's': true, 't': true, 'u': true, 'v': true, 'w': true, 'x': true,`
			`'y': true, 'z': true,`

			`'A': true, 'B': true, 'C': true, 'D': true, 'E': true, 'F': true, 'G': true, 'H': true,`
			`'I': true, 'J': true, 'K': true, 'L': true, 'M': true, 'N': true, 'O': true, 'P': true,`
			`'Q': true, 'R': true, 'S': true, 'T': true, 'U': true, 'V': true, 'W': true, 'X': true,`
			`'Y': true, 'Z': true,`

			`'!': true, // sub-delims`
			`'$': true, // sub-delims`
			`'%': true, // pct-encoded (and used in IPv6 zones)`
			`'&': true, // sub-delims`
			`'(': true, // sub-delims`
			`')': true, // sub-delims`
			`'*': true, // sub-delims`
			`'+': true, // sub-delims`
			`',': true, // sub-delims`
			`'-': true, // unreserved`
			`'.': true, // unreserved`
			`':': true, // IPv6address + Host expression's optional port`
			`';': true, // sub-delims`
			`'=': true, // sub-delims`
			`'[': true,`
			`'\'': true, // sub-delims`
			`']': true,`
			`'_': true, // unreserved`
			`'~': true, // unreserved`
			`}`

			`// ValidHeaderFieldValue reports whether v is a valid "field-value" according to`
			`// http://www.w3.org/Protocols/rfc2616/rfc2616-sec4.html#sec4.2 :`
			`//`
			`// message-header = field-name ":" [ field-value ]`
			`// field-value = *( field-content \| LWS )`
			`// field-content = <the OCTETs making up the field-value`
			`// and consisting of either *TEXT or combinations`
			`// of token, separators, and quoted-string>`
			`//`
			`// http://www.w3.org/Protocols/rfc2616/rfc2616-sec2.html#sec2.2 :`
			`//`
			`// TEXT = <any OCTET except CTLs,`
			`// but including LWS>`
			`// LWS = [CRLF] 1*( SP \| HT )`
			`// CTL = <any US-ASCII control character`
			`// (octets 0 - 31) and DEL (127)>`
			`//`
			`// RFC 7230 says:`
			`// field-value = *( field-content / obs-fold )`
			`// obj-fold = N/A to http2, and deprecated`
			`// field-content = field-vchar [ 1*( SP / HTAB ) field-vchar ]`
			`// field-vchar = VCHAR / obs-text`
			`// obs-text = %x80-FF`
			`// VCHAR = "any visible [USASCII] character"`
			`//`
			`// http2 further says: "Similarly, HTTP/2 allows header field values`
			`// that are not valid. While most of the values that can be encoded`
			`// will not alter header field parsing, carriage return (CR, ASCII`
			`// 0xd), line feed (LF, ASCII 0xa), and the zero character (NUL, ASCII`
			`// 0x0) might be exploited by an attacker if they are translated`
			`// verbatim. Any request or response that contains a character not`
			`// permitted in a header field value MUST be treated as malformed`
			`// (Section 8.1.2.6). Valid characters are defined by the`
			`// field-content ABNF rule in Section 3.2 of [RFC7230]."`
			`//`
			`// This function does not (yet?) properly handle the rejection of`
			`// strings that begin or end with SP or HTAB.`
			`func ValidHeaderFieldValue(v string) bool {`
			`for i := 0; i < len(v); i++ {`
			`b := v[i]`
			`if isCTL(b) && !isLWS(b) {`
			`return false`
			`}`
			`}`
			`return true`
			`}`

			`func isASCII(s string) bool {`
			`for i := 0; i < len(s); i++ {`
			`if s[i] >= utf8.RuneSelf {`
			`return false`
			`}`
			`}`
			`return true`
			`}`

			`// PunycodeHostPort returns the IDNA Punycode version`
			`// of the provided "host" or "host:port" string.`
			`func PunycodeHostPort(v string) (string, error) {`
			`if isASCII(v) {`
			`return v, nil`
			`}`

			`host, port, err := net.SplitHostPort(v)`
			`if err != nil {`
			`// The input 'v' argument was just a "host" argument,`
			`// without a port. This error should not be returned`
			`// to the caller.`
			`host = v`
			`port = ""`
			`}`
			`host, err = idna.ToASCII(host)`
			`if err != nil {`
			`// Non-UTF-8? Not representable in Punycode, in any`
			`// case.`
			`return "", err`
			`}`
			`if port == "" {`
			`return host, nil`
			`}`
			`return net.JoinHostPort(host, port), nil`
			`}`