// Copyright 2016 The Go Authors. All rights reserved. // Use of this source code is governed by a BSD-style // license that can be found in the LICENSE file. // Package httplex contains rules around lexical matters of various // HTTP-related specifications. // // This package is shared by the standard library (which vendors it) // and x/net/http2. It comes with no API stability promise. package httplex import ( "net" "strings" "unicode/utf8" "golang.org/x/net/idna" ) var isTokenTable = [127]bool{ '!': true, '#': true, '$': true, '%': true, '&': true, '\'': true, '*': true, '+': true, '-': true, '.': true, '0': true, '1': true, '2': true, '3': true, '4': true, '5': true, '6': true, '7': true, '8': true, '9': true, 'A': true, 'B': true, 'C': true, 'D': true, 'E': true, 'F': true, 'G': true, 'H': true, 'I': true, 'J': true, 'K': true, 'L': true, 'M': true, 'N': true, 'O': true, 'P': true, 'Q': true, 'R': true, 'S': true, 'T': true, 'U': true, 'W': true, 'V': true, 'X': true, 'Y': true, 'Z': true, '^': true, '_': true, '`': true, 'a': true, 'b': true, 'c': true, 'd': true, 'e': true, 'f': true, 'g': true, 'h': true, 'i': true, 'j': true, 'k': true, 'l': true, 'm': true, 'n': true, 'o': true, 'p': true, 'q': true, 'r': true, 's': true, 't': true, 'u': true, 'v': true, 'w': true, 'x': true, 'y': true, 'z': true, '|': true, '~': true, } func IsTokenRune(r rune) bool { i := int(r) return i < len(isTokenTable) && isTokenTable[i] } func isNotToken(r rune) bool { return !IsTokenRune(r) } // HeaderValuesContainsToken reports whether any string in values // contains the provided token, ASCII case-insensitively. func HeaderValuesContainsToken(values []string, token string) bool { for _, v := range values { if headerValueContainsToken(v, token) { return true } } return false } // isOWS reports whether b is an optional whitespace byte, as defined // by RFC 7230 section 3.2.3. func isOWS(b byte) bool { return b == ' ' || b == '\t' } // trimOWS returns x with all optional whitespace removes from the // beginning and end. func trimOWS(x string) string { // TODO: consider using strings.Trim(x, " \t") instead, // if and when it's fast enough. See issue 10292. // But this ASCII-only code will probably always beat UTF-8 // aware code. for len(x) > 0 && isOWS(x[0]) { x = x[1:] } for len(x) > 0 && isOWS(x[len(x)-1]) { x = x[:len(x)-1] } return x } // headerValueContainsToken reports whether v (assumed to be a // 0#element, in the ABNF extension described in RFC 7230 section 7) // contains token amongst its comma-separated tokens, ASCII // case-insensitively. func headerValueContainsToken(v string, token string) bool { v = trimOWS(v) if comma := strings.IndexByte(v, ','); comma != -1 { return tokenEqual(trimOWS(v[:comma]), token) || headerValueContainsToken(v[comma+1:], token) } return tokenEqual(v, token) } // lowerASCII returns the ASCII lowercase version of b. func lowerASCII(b byte) byte { if 'A' <= b && b <= 'Z' { return b + ('a' - 'A') } return b } // tokenEqual reports whether t1 and t2 are equal, ASCII case-insensitively. func tokenEqual(t1, t2 string) bool { if len(t1) != len(t2) { return false } for i, b := range t1 { if b >= utf8.RuneSelf { // No UTF-8 or non-ASCII allowed in tokens. return false } if lowerASCII(byte(b)) != lowerASCII(t2[i]) { return false } } return true } // isLWS reports whether b is linear white space, according // to http://www.w3.org/Protocols/rfc2616/rfc2616-sec2.html#sec2.2 // LWS = [CRLF] 1*( SP | HT ) func isLWS(b byte) bool { return b == ' ' || b == '\t' } // isCTL reports whether b is a control byte, according // to http://www.w3.org/Protocols/rfc2616/rfc2616-sec2.html#sec2.2 // CTL = <any US-ASCII control character // (octets 0 - 31) and DEL (127)> func isCTL(b byte) bool { const del = 0x7f // a CTL return b < ' ' || b == del } // ValidHeaderFieldName reports whether v is a valid HTTP/1.x header name. // HTTP/2 imposes the additional restriction that uppercase ASCII // letters are not allowed. // // RFC 7230 says: // header-field = field-name ":" OWS field-value OWS // field-name = token // token = 1*tchar // tchar = "!" / "#" / "$" / "%" / "&" / "'" / "*" / "+" / "-" / "." / // "^" / "_" / "`" / "|" / "~" / DIGIT / ALPHA func ValidHeaderFieldName(v string) bool { if len(v) == 0 { return false } for _, r := range v { if !IsTokenRune(r) { return false } } return true } // ValidHostHeader reports whether h is a valid host header. func ValidHostHeader(h string) bool { // The latest spec is actually this: // // http://tools.ietf.org/html/rfc7230#section-5.4 // Host = uri-host [ ":" port ] // // Where uri-host is: // http://tools.ietf.org/html/rfc3986#section-3.2.2 // // But we're going to be much more lenient for now and just // search for any byte that's not a valid byte in any of those // expressions. for i := 0; i < len(h); i++ { if !validHostByte[h[i]] { return false } } return true } // See the validHostHeader comment. var validHostByte = [256]bool{ '0': true, '1': true, '2': true, '3': true, '4': true, '5': true, '6': true, '7': true, '8': true, '9': true, 'a': true, 'b': true, 'c': true, 'd': true, 'e': true, 'f': true, 'g': true, 'h': true, 'i': true, 'j': true, 'k': true, 'l': true, 'm': true, 'n': true, 'o': true, 'p': true, 'q': true, 'r': true, 's': true, 't': true, 'u': true, 'v': true, 'w': true, 'x': true, 'y': true, 'z': true, 'A': true, 'B': true, 'C': true, 'D': true, 'E': true, 'F': true, 'G': true, 'H': true, 'I': true, 'J': true, 'K': true, 'L': true, 'M': true, 'N': true, 'O': true, 'P': true, 'Q': true, 'R': true, 'S': true, 'T': true, 'U': true, 'V': true, 'W': true, 'X': true, 'Y': true, 'Z': true, '!': true, // sub-delims '$': true, // sub-delims '%': true, // pct-encoded (and used in IPv6 zones) '&': true, // sub-delims '(': true, // sub-delims ')': true, // sub-delims '*': true, // sub-delims '+': true, // sub-delims ',': true, // sub-delims '-': true, // unreserved '.': true, // unreserved ':': true, // IPv6address + Host expression's optional port ';': true, // sub-delims '=': true, // sub-delims '[': true, '\'': true, // sub-delims ']': true, '_': true, // unreserved '~': true, // unreserved } // ValidHeaderFieldValue reports whether v is a valid "field-value" according to // http://www.w3.org/Protocols/rfc2616/rfc2616-sec4.html#sec4.2 : // // message-header = field-name ":" [ field-value ] // field-value = *( field-content | LWS ) // field-content = <the OCTETs making up the field-value // and consisting of either *TEXT or combinations // of token, separators, and quoted-string> // // http://www.w3.org/Protocols/rfc2616/rfc2616-sec2.html#sec2.2 : // // TEXT = <any OCTET except CTLs, // but including LWS> // LWS = [CRLF] 1*( SP | HT ) // CTL = <any US-ASCII control character // (octets 0 - 31) and DEL (127)> // // RFC 7230 says: // field-value = *( field-content / obs-fold ) // obj-fold = N/A to http2, and deprecated // field-content = field-vchar [ 1*( SP / HTAB ) field-vchar ] // field-vchar = VCHAR / obs-text // obs-text = %x80-FF // VCHAR = "any visible [USASCII] character" // // http2 further says: "Similarly, HTTP/2 allows header field values // that are not valid. While most of the values that can be encoded // will not alter header field parsing, carriage return (CR, ASCII // 0xd), line feed (LF, ASCII 0xa), and the zero character (NUL, ASCII // 0x0) might be exploited by an attacker if they are translated // verbatim. Any request or response that contains a character not // permitted in a header field value MUST be treated as malformed // (Section 8.1.2.6). Valid characters are defined by the // field-content ABNF rule in Section 3.2 of [RFC7230]." // // This function does not (yet?) properly handle the rejection of // strings that begin or end with SP or HTAB. func ValidHeaderFieldValue(v string) bool { for i := 0; i < len(v); i++ { b := v[i] if isCTL(b) && !isLWS(b) { return false } } return true } func isASCII(s string) bool { for i := 0; i < len(s); i++ { if s[i] >= utf8.RuneSelf { return false } } return true } // PunycodeHostPort returns the IDNA Punycode version // of the provided "host" or "host:port" string. func PunycodeHostPort(v string) (string, error) { if isASCII(v) { return v, nil } host, port, err := net.SplitHostPort(v) if err != nil { // The input 'v' argument was just a "host" argument, // without a port. This error should not be returned // to the caller. host = v port = "" } host, err = idna.ToASCII(host) if err != nil { // Non-UTF-8? Not representable in Punycode, in any // case. return "", err } if port == "" { return host, nil } return net.JoinHostPort(host, port), nil }