add s3 region filters for cloudfront
Signed-off-by: tifayuki <tifayuki@gmail.com>
This commit is contained in:
parent
3800056b88
commit
e8ecc6dc55
11 changed files with 716 additions and 1548 deletions
68
vendor/golang.org/x/net/idna/idna.go
generated
vendored
68
vendor/golang.org/x/net/idna/idna.go
generated
vendored
|
@ -1,68 +0,0 @@
|
|||
// Copyright 2012 The Go Authors. All rights reserved.
|
||||
// Use of this source code is governed by a BSD-style
|
||||
// license that can be found in the LICENSE file.
|
||||
|
||||
// Package idna implements IDNA2008 (Internationalized Domain Names for
|
||||
// Applications), defined in RFC 5890, RFC 5891, RFC 5892, RFC 5893 and
|
||||
// RFC 5894.
|
||||
package idna // import "golang.org/x/net/idna"
|
||||
|
||||
import (
|
||||
"strings"
|
||||
"unicode/utf8"
|
||||
)
|
||||
|
||||
// TODO(nigeltao): specify when errors occur. For example, is ToASCII(".") or
|
||||
// ToASCII("foo\x00") an error? See also http://www.unicode.org/faq/idn.html#11
|
||||
|
||||
// acePrefix is the ASCII Compatible Encoding prefix.
|
||||
const acePrefix = "xn--"
|
||||
|
||||
// ToASCII converts a domain or domain label to its ASCII form. For example,
|
||||
// ToASCII("bücher.example.com") is "xn--bcher-kva.example.com", and
|
||||
// ToASCII("golang") is "golang".
|
||||
func ToASCII(s string) (string, error) {
|
||||
if ascii(s) {
|
||||
return s, nil
|
||||
}
|
||||
labels := strings.Split(s, ".")
|
||||
for i, label := range labels {
|
||||
if !ascii(label) {
|
||||
a, err := encode(acePrefix, label)
|
||||
if err != nil {
|
||||
return "", err
|
||||
}
|
||||
labels[i] = a
|
||||
}
|
||||
}
|
||||
return strings.Join(labels, "."), nil
|
||||
}
|
||||
|
||||
// ToUnicode converts a domain or domain label to its Unicode form. For example,
|
||||
// ToUnicode("xn--bcher-kva.example.com") is "bücher.example.com", and
|
||||
// ToUnicode("golang") is "golang".
|
||||
func ToUnicode(s string) (string, error) {
|
||||
if !strings.Contains(s, acePrefix) {
|
||||
return s, nil
|
||||
}
|
||||
labels := strings.Split(s, ".")
|
||||
for i, label := range labels {
|
||||
if strings.HasPrefix(label, acePrefix) {
|
||||
u, err := decode(label[len(acePrefix):])
|
||||
if err != nil {
|
||||
return "", err
|
||||
}
|
||||
labels[i] = u
|
||||
}
|
||||
}
|
||||
return strings.Join(labels, "."), nil
|
||||
}
|
||||
|
||||
func ascii(s string) bool {
|
||||
for i := 0; i < len(s); i++ {
|
||||
if s[i] >= utf8.RuneSelf {
|
||||
return false
|
||||
}
|
||||
}
|
||||
return true
|
||||
}
|
200
vendor/golang.org/x/net/idna/punycode.go
generated
vendored
200
vendor/golang.org/x/net/idna/punycode.go
generated
vendored
|
@ -1,200 +0,0 @@
|
|||
// Copyright 2012 The Go Authors. All rights reserved.
|
||||
// Use of this source code is governed by a BSD-style
|
||||
// license that can be found in the LICENSE file.
|
||||
|
||||
package idna
|
||||
|
||||
// This file implements the Punycode algorithm from RFC 3492.
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"math"
|
||||
"strings"
|
||||
"unicode/utf8"
|
||||
)
|
||||
|
||||
// These parameter values are specified in section 5.
|
||||
//
|
||||
// All computation is done with int32s, so that overflow behavior is identical
|
||||
// regardless of whether int is 32-bit or 64-bit.
|
||||
const (
|
||||
base int32 = 36
|
||||
damp int32 = 700
|
||||
initialBias int32 = 72
|
||||
initialN int32 = 128
|
||||
skew int32 = 38
|
||||
tmax int32 = 26
|
||||
tmin int32 = 1
|
||||
)
|
||||
|
||||
// decode decodes a string as specified in section 6.2.
|
||||
func decode(encoded string) (string, error) {
|
||||
if encoded == "" {
|
||||
return "", nil
|
||||
}
|
||||
pos := 1 + strings.LastIndex(encoded, "-")
|
||||
if pos == 1 {
|
||||
return "", fmt.Errorf("idna: invalid label %q", encoded)
|
||||
}
|
||||
if pos == len(encoded) {
|
||||
return encoded[:len(encoded)-1], nil
|
||||
}
|
||||
output := make([]rune, 0, len(encoded))
|
||||
if pos != 0 {
|
||||
for _, r := range encoded[:pos-1] {
|
||||
output = append(output, r)
|
||||
}
|
||||
}
|
||||
i, n, bias := int32(0), initialN, initialBias
|
||||
for pos < len(encoded) {
|
||||
oldI, w := i, int32(1)
|
||||
for k := base; ; k += base {
|
||||
if pos == len(encoded) {
|
||||
return "", fmt.Errorf("idna: invalid label %q", encoded)
|
||||
}
|
||||
digit, ok := decodeDigit(encoded[pos])
|
||||
if !ok {
|
||||
return "", fmt.Errorf("idna: invalid label %q", encoded)
|
||||
}
|
||||
pos++
|
||||
i += digit * w
|
||||
if i < 0 {
|
||||
return "", fmt.Errorf("idna: invalid label %q", encoded)
|
||||
}
|
||||
t := k - bias
|
||||
if t < tmin {
|
||||
t = tmin
|
||||
} else if t > tmax {
|
||||
t = tmax
|
||||
}
|
||||
if digit < t {
|
||||
break
|
||||
}
|
||||
w *= base - t
|
||||
if w >= math.MaxInt32/base {
|
||||
return "", fmt.Errorf("idna: invalid label %q", encoded)
|
||||
}
|
||||
}
|
||||
x := int32(len(output) + 1)
|
||||
bias = adapt(i-oldI, x, oldI == 0)
|
||||
n += i / x
|
||||
i %= x
|
||||
if n > utf8.MaxRune || len(output) >= 1024 {
|
||||
return "", fmt.Errorf("idna: invalid label %q", encoded)
|
||||
}
|
||||
output = append(output, 0)
|
||||
copy(output[i+1:], output[i:])
|
||||
output[i] = n
|
||||
i++
|
||||
}
|
||||
return string(output), nil
|
||||
}
|
||||
|
||||
// encode encodes a string as specified in section 6.3 and prepends prefix to
|
||||
// the result.
|
||||
//
|
||||
// The "while h < length(input)" line in the specification becomes "for
|
||||
// remaining != 0" in the Go code, because len(s) in Go is in bytes, not runes.
|
||||
func encode(prefix, s string) (string, error) {
|
||||
output := make([]byte, len(prefix), len(prefix)+1+2*len(s))
|
||||
copy(output, prefix)
|
||||
delta, n, bias := int32(0), initialN, initialBias
|
||||
b, remaining := int32(0), int32(0)
|
||||
for _, r := range s {
|
||||
if r < 0x80 {
|
||||
b++
|
||||
output = append(output, byte(r))
|
||||
} else {
|
||||
remaining++
|
||||
}
|
||||
}
|
||||
h := b
|
||||
if b > 0 {
|
||||
output = append(output, '-')
|
||||
}
|
||||
for remaining != 0 {
|
||||
m := int32(0x7fffffff)
|
||||
for _, r := range s {
|
||||
if m > r && r >= n {
|
||||
m = r
|
||||
}
|
||||
}
|
||||
delta += (m - n) * (h + 1)
|
||||
if delta < 0 {
|
||||
return "", fmt.Errorf("idna: invalid label %q", s)
|
||||
}
|
||||
n = m
|
||||
for _, r := range s {
|
||||
if r < n {
|
||||
delta++
|
||||
if delta < 0 {
|
||||
return "", fmt.Errorf("idna: invalid label %q", s)
|
||||
}
|
||||
continue
|
||||
}
|
||||
if r > n {
|
||||
continue
|
||||
}
|
||||
q := delta
|
||||
for k := base; ; k += base {
|
||||
t := k - bias
|
||||
if t < tmin {
|
||||
t = tmin
|
||||
} else if t > tmax {
|
||||
t = tmax
|
||||
}
|
||||
if q < t {
|
||||
break
|
||||
}
|
||||
output = append(output, encodeDigit(t+(q-t)%(base-t)))
|
||||
q = (q - t) / (base - t)
|
||||
}
|
||||
output = append(output, encodeDigit(q))
|
||||
bias = adapt(delta, h+1, h == b)
|
||||
delta = 0
|
||||
h++
|
||||
remaining--
|
||||
}
|
||||
delta++
|
||||
n++
|
||||
}
|
||||
return string(output), nil
|
||||
}
|
||||
|
||||
func decodeDigit(x byte) (digit int32, ok bool) {
|
||||
switch {
|
||||
case '0' <= x && x <= '9':
|
||||
return int32(x - ('0' - 26)), true
|
||||
case 'A' <= x && x <= 'Z':
|
||||
return int32(x - 'A'), true
|
||||
case 'a' <= x && x <= 'z':
|
||||
return int32(x - 'a'), true
|
||||
}
|
||||
return 0, false
|
||||
}
|
||||
|
||||
func encodeDigit(digit int32) byte {
|
||||
switch {
|
||||
case 0 <= digit && digit < 26:
|
||||
return byte(digit + 'a')
|
||||
case 26 <= digit && digit < 36:
|
||||
return byte(digit + ('0' - 26))
|
||||
}
|
||||
panic("idna: internal error in punycode encoding")
|
||||
}
|
||||
|
||||
// adapt is the bias adaptation function specified in section 6.1.
|
||||
func adapt(delta, numPoints int32, firstTime bool) int32 {
|
||||
if firstTime {
|
||||
delta /= damp
|
||||
} else {
|
||||
delta /= 2
|
||||
}
|
||||
delta += delta / numPoints
|
||||
k := int32(0)
|
||||
for delta > ((base-tmin)*tmax)/2 {
|
||||
delta /= base - tmin
|
||||
k += base
|
||||
}
|
||||
return k + (base-tmin+1)*delta/(delta+skew)
|
||||
}
|
663
vendor/golang.org/x/net/publicsuffix/gen.go
generated
vendored
663
vendor/golang.org/x/net/publicsuffix/gen.go
generated
vendored
|
@ -1,663 +0,0 @@
|
|||
// Copyright 2012 The Go Authors. All rights reserved.
|
||||
// Use of this source code is governed by a BSD-style
|
||||
// license that can be found in the LICENSE file.
|
||||
|
||||
// +build ignore
|
||||
|
||||
package main
|
||||
|
||||
// This program generates table.go and table_test.go.
|
||||
// Invoke as:
|
||||
//
|
||||
// go run gen.go -version "xxx" >table.go
|
||||
// go run gen.go -version "xxx" -test >table_test.go
|
||||
//
|
||||
// Pass -v to print verbose progress information.
|
||||
//
|
||||
// The version is derived from information found at
|
||||
// https://github.com/publicsuffix/list/commits/master/public_suffix_list.dat
|
||||
//
|
||||
// To fetch a particular git revision, such as 5c70ccd250, pass
|
||||
// -url "https://raw.githubusercontent.com/publicsuffix/list/5c70ccd250/public_suffix_list.dat"
|
||||
|
||||
import (
|
||||
"bufio"
|
||||
"bytes"
|
||||
"flag"
|
||||
"fmt"
|
||||
"go/format"
|
||||
"io"
|
||||
"net/http"
|
||||
"os"
|
||||
"regexp"
|
||||
"sort"
|
||||
"strings"
|
||||
|
||||
"golang.org/x/net/idna"
|
||||
)
|
||||
|
||||
const (
|
||||
// These sum of these four values must be no greater than 32.
|
||||
nodesBitsChildren = 9
|
||||
nodesBitsICANN = 1
|
||||
nodesBitsTextOffset = 15
|
||||
nodesBitsTextLength = 6
|
||||
|
||||
// These sum of these four values must be no greater than 32.
|
||||
childrenBitsWildcard = 1
|
||||
childrenBitsNodeType = 2
|
||||
childrenBitsHi = 14
|
||||
childrenBitsLo = 14
|
||||
)
|
||||
|
||||
var (
|
||||
maxChildren int
|
||||
maxTextOffset int
|
||||
maxTextLength int
|
||||
maxHi uint32
|
||||
maxLo uint32
|
||||
)
|
||||
|
||||
func max(a, b int) int {
|
||||
if a < b {
|
||||
return b
|
||||
}
|
||||
return a
|
||||
}
|
||||
|
||||
func u32max(a, b uint32) uint32 {
|
||||
if a < b {
|
||||
return b
|
||||
}
|
||||
return a
|
||||
}
|
||||
|
||||
const (
|
||||
nodeTypeNormal = 0
|
||||
nodeTypeException = 1
|
||||
nodeTypeParentOnly = 2
|
||||
numNodeType = 3
|
||||
)
|
||||
|
||||
func nodeTypeStr(n int) string {
|
||||
switch n {
|
||||
case nodeTypeNormal:
|
||||
return "+"
|
||||
case nodeTypeException:
|
||||
return "!"
|
||||
case nodeTypeParentOnly:
|
||||
return "o"
|
||||
}
|
||||
panic("unreachable")
|
||||
}
|
||||
|
||||
var (
|
||||
labelEncoding = map[string]uint32{}
|
||||
labelsList = []string{}
|
||||
labelsMap = map[string]bool{}
|
||||
rules = []string{}
|
||||
|
||||
// validSuffix is used to check that the entries in the public suffix list
|
||||
// are in canonical form (after Punycode encoding). Specifically, capital
|
||||
// letters are not allowed.
|
||||
validSuffix = regexp.MustCompile(`^[a-z0-9_\!\*\-\.]+$`)
|
||||
|
||||
subset = flag.Bool("subset", false, "generate only a subset of the full table, for debugging")
|
||||
url = flag.String("url",
|
||||
"https://publicsuffix.org/list/effective_tld_names.dat",
|
||||
"URL of the publicsuffix.org list. If empty, stdin is read instead")
|
||||
v = flag.Bool("v", false, "verbose output (to stderr)")
|
||||
version = flag.String("version", "", "the effective_tld_names.dat version")
|
||||
test = flag.Bool("test", false, "generate table_test.go")
|
||||
)
|
||||
|
||||
func main() {
|
||||
if err := main1(); err != nil {
|
||||
fmt.Fprintln(os.Stderr, err)
|
||||
os.Exit(1)
|
||||
}
|
||||
}
|
||||
|
||||
func main1() error {
|
||||
flag.Parse()
|
||||
if nodesBitsTextLength+nodesBitsTextOffset+nodesBitsICANN+nodesBitsChildren > 32 {
|
||||
return fmt.Errorf("not enough bits to encode the nodes table")
|
||||
}
|
||||
if childrenBitsLo+childrenBitsHi+childrenBitsNodeType+childrenBitsWildcard > 32 {
|
||||
return fmt.Errorf("not enough bits to encode the children table")
|
||||
}
|
||||
if *version == "" {
|
||||
return fmt.Errorf("-version was not specified")
|
||||
}
|
||||
var r io.Reader = os.Stdin
|
||||
if *url != "" {
|
||||
res, err := http.Get(*url)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
if res.StatusCode != http.StatusOK {
|
||||
return fmt.Errorf("bad GET status for %s: %d", *url, res.Status)
|
||||
}
|
||||
r = res.Body
|
||||
defer res.Body.Close()
|
||||
}
|
||||
|
||||
var root node
|
||||
icann := false
|
||||
buf := new(bytes.Buffer)
|
||||
br := bufio.NewReader(r)
|
||||
for {
|
||||
s, err := br.ReadString('\n')
|
||||
if err != nil {
|
||||
if err == io.EOF {
|
||||
break
|
||||
}
|
||||
return err
|
||||
}
|
||||
s = strings.TrimSpace(s)
|
||||
if strings.Contains(s, "BEGIN ICANN DOMAINS") {
|
||||
icann = true
|
||||
continue
|
||||
}
|
||||
if strings.Contains(s, "END ICANN DOMAINS") {
|
||||
icann = false
|
||||
continue
|
||||
}
|
||||
if s == "" || strings.HasPrefix(s, "//") {
|
||||
continue
|
||||
}
|
||||
s, err = idna.ToASCII(s)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
if !validSuffix.MatchString(s) {
|
||||
return fmt.Errorf("bad publicsuffix.org list data: %q", s)
|
||||
}
|
||||
|
||||
if *subset {
|
||||
switch {
|
||||
case s == "ac.jp" || strings.HasSuffix(s, ".ac.jp"):
|
||||
case s == "ak.us" || strings.HasSuffix(s, ".ak.us"):
|
||||
case s == "ao" || strings.HasSuffix(s, ".ao"):
|
||||
case s == "ar" || strings.HasSuffix(s, ".ar"):
|
||||
case s == "arpa" || strings.HasSuffix(s, ".arpa"):
|
||||
case s == "cy" || strings.HasSuffix(s, ".cy"):
|
||||
case s == "dyndns.org" || strings.HasSuffix(s, ".dyndns.org"):
|
||||
case s == "jp":
|
||||
case s == "kobe.jp" || strings.HasSuffix(s, ".kobe.jp"):
|
||||
case s == "kyoto.jp" || strings.HasSuffix(s, ".kyoto.jp"):
|
||||
case s == "om" || strings.HasSuffix(s, ".om"):
|
||||
case s == "uk" || strings.HasSuffix(s, ".uk"):
|
||||
case s == "uk.com" || strings.HasSuffix(s, ".uk.com"):
|
||||
case s == "tw" || strings.HasSuffix(s, ".tw"):
|
||||
case s == "zw" || strings.HasSuffix(s, ".zw"):
|
||||
case s == "xn--p1ai" || strings.HasSuffix(s, ".xn--p1ai"):
|
||||
// xn--p1ai is Russian-Cyrillic "рф".
|
||||
default:
|
||||
continue
|
||||
}
|
||||
}
|
||||
|
||||
rules = append(rules, s)
|
||||
|
||||
nt, wildcard := nodeTypeNormal, false
|
||||
switch {
|
||||
case strings.HasPrefix(s, "*."):
|
||||
s, nt = s[2:], nodeTypeParentOnly
|
||||
wildcard = true
|
||||
case strings.HasPrefix(s, "!"):
|
||||
s, nt = s[1:], nodeTypeException
|
||||
}
|
||||
labels := strings.Split(s, ".")
|
||||
for n, i := &root, len(labels)-1; i >= 0; i-- {
|
||||
label := labels[i]
|
||||
n = n.child(label)
|
||||
if i == 0 {
|
||||
if nt != nodeTypeParentOnly && n.nodeType == nodeTypeParentOnly {
|
||||
n.nodeType = nt
|
||||
}
|
||||
n.icann = n.icann && icann
|
||||
n.wildcard = n.wildcard || wildcard
|
||||
}
|
||||
labelsMap[label] = true
|
||||
}
|
||||
}
|
||||
labelsList = make([]string, 0, len(labelsMap))
|
||||
for label := range labelsMap {
|
||||
labelsList = append(labelsList, label)
|
||||
}
|
||||
sort.Strings(labelsList)
|
||||
|
||||
p := printReal
|
||||
if *test {
|
||||
p = printTest
|
||||
}
|
||||
if err := p(buf, &root); err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
b, err := format.Source(buf.Bytes())
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
_, err = os.Stdout.Write(b)
|
||||
return err
|
||||
}
|
||||
|
||||
func printTest(w io.Writer, n *node) error {
|
||||
fmt.Fprintf(w, "// generated by go run gen.go; DO NOT EDIT\n\n")
|
||||
fmt.Fprintf(w, "package publicsuffix\n\nvar rules = [...]string{\n")
|
||||
for _, rule := range rules {
|
||||
fmt.Fprintf(w, "%q,\n", rule)
|
||||
}
|
||||
fmt.Fprintf(w, "}\n\nvar nodeLabels = [...]string{\n")
|
||||
if err := n.walk(w, printNodeLabel); err != nil {
|
||||
return err
|
||||
}
|
||||
fmt.Fprintf(w, "}\n")
|
||||
return nil
|
||||
}
|
||||
|
||||
func printReal(w io.Writer, n *node) error {
|
||||
const header = `// generated by go run gen.go; DO NOT EDIT
|
||||
|
||||
package publicsuffix
|
||||
|
||||
const version = %q
|
||||
|
||||
const (
|
||||
nodesBitsChildren = %d
|
||||
nodesBitsICANN = %d
|
||||
nodesBitsTextOffset = %d
|
||||
nodesBitsTextLength = %d
|
||||
|
||||
childrenBitsWildcard = %d
|
||||
childrenBitsNodeType = %d
|
||||
childrenBitsHi = %d
|
||||
childrenBitsLo = %d
|
||||
)
|
||||
|
||||
const (
|
||||
nodeTypeNormal = %d
|
||||
nodeTypeException = %d
|
||||
nodeTypeParentOnly = %d
|
||||
)
|
||||
|
||||
// numTLD is the number of top level domains.
|
||||
const numTLD = %d
|
||||
|
||||
`
|
||||
fmt.Fprintf(w, header, *version,
|
||||
nodesBitsChildren, nodesBitsICANN, nodesBitsTextOffset, nodesBitsTextLength,
|
||||
childrenBitsWildcard, childrenBitsNodeType, childrenBitsHi, childrenBitsLo,
|
||||
nodeTypeNormal, nodeTypeException, nodeTypeParentOnly, len(n.children))
|
||||
|
||||
text := combineText(labelsList)
|
||||
if text == "" {
|
||||
return fmt.Errorf("internal error: makeText returned no text")
|
||||
}
|
||||
for _, label := range labelsList {
|
||||
offset, length := strings.Index(text, label), len(label)
|
||||
if offset < 0 {
|
||||
return fmt.Errorf("internal error: could not find %q in text %q", label, text)
|
||||
}
|
||||
maxTextOffset, maxTextLength = max(maxTextOffset, offset), max(maxTextLength, length)
|
||||
if offset >= 1<<nodesBitsTextOffset {
|
||||
return fmt.Errorf("text offset %d is too large, or nodeBitsTextOffset is too small", offset)
|
||||
}
|
||||
if length >= 1<<nodesBitsTextLength {
|
||||
return fmt.Errorf("text length %d is too large, or nodeBitsTextLength is too small", length)
|
||||
}
|
||||
labelEncoding[label] = uint32(offset)<<nodesBitsTextLength | uint32(length)
|
||||
}
|
||||
fmt.Fprintf(w, "// Text is the combined text of all labels.\nconst text = ")
|
||||
for len(text) > 0 {
|
||||
n, plus := len(text), ""
|
||||
if n > 64 {
|
||||
n, plus = 64, " +"
|
||||
}
|
||||
fmt.Fprintf(w, "%q%s\n", text[:n], plus)
|
||||
text = text[n:]
|
||||
}
|
||||
|
||||
if err := n.walk(w, assignIndexes); err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
fmt.Fprintf(w, `
|
||||
|
||||
// nodes is the list of nodes. Each node is represented as a uint32, which
|
||||
// encodes the node's children, wildcard bit and node type (as an index into
|
||||
// the children array), ICANN bit and text.
|
||||
//
|
||||
// In the //-comment after each node's data, the nodes indexes of the children
|
||||
// are formatted as (n0x1234-n0x1256), with * denoting the wildcard bit. The
|
||||
// nodeType is printed as + for normal, ! for exception, and o for parent-only
|
||||
// nodes that have children but don't match a domain label in their own right.
|
||||
// An I denotes an ICANN domain.
|
||||
//
|
||||
// The layout within the uint32, from MSB to LSB, is:
|
||||
// [%2d bits] unused
|
||||
// [%2d bits] children index
|
||||
// [%2d bits] ICANN bit
|
||||
// [%2d bits] text index
|
||||
// [%2d bits] text length
|
||||
var nodes = [...]uint32{
|
||||
`,
|
||||
32-nodesBitsChildren-nodesBitsICANN-nodesBitsTextOffset-nodesBitsTextLength,
|
||||
nodesBitsChildren, nodesBitsICANN, nodesBitsTextOffset, nodesBitsTextLength)
|
||||
if err := n.walk(w, printNode); err != nil {
|
||||
return err
|
||||
}
|
||||
fmt.Fprintf(w, `}
|
||||
|
||||
// children is the list of nodes' children, the parent's wildcard bit and the
|
||||
// parent's node type. If a node has no children then their children index
|
||||
// will be in the range [0, 6), depending on the wildcard bit and node type.
|
||||
//
|
||||
// The layout within the uint32, from MSB to LSB, is:
|
||||
// [%2d bits] unused
|
||||
// [%2d bits] wildcard bit
|
||||
// [%2d bits] node type
|
||||
// [%2d bits] high nodes index (exclusive) of children
|
||||
// [%2d bits] low nodes index (inclusive) of children
|
||||
var children=[...]uint32{
|
||||
`,
|
||||
32-childrenBitsWildcard-childrenBitsNodeType-childrenBitsHi-childrenBitsLo,
|
||||
childrenBitsWildcard, childrenBitsNodeType, childrenBitsHi, childrenBitsLo)
|
||||
for i, c := range childrenEncoding {
|
||||
s := "---------------"
|
||||
lo := c & (1<<childrenBitsLo - 1)
|
||||
hi := (c >> childrenBitsLo) & (1<<childrenBitsHi - 1)
|
||||
if lo != hi {
|
||||
s = fmt.Sprintf("n0x%04x-n0x%04x", lo, hi)
|
||||
}
|
||||
nodeType := int(c>>(childrenBitsLo+childrenBitsHi)) & (1<<childrenBitsNodeType - 1)
|
||||
wildcard := c>>(childrenBitsLo+childrenBitsHi+childrenBitsNodeType) != 0
|
||||
fmt.Fprintf(w, "0x%08x, // c0x%04x (%s)%s %s\n",
|
||||
c, i, s, wildcardStr(wildcard), nodeTypeStr(nodeType))
|
||||
}
|
||||
fmt.Fprintf(w, "}\n\n")
|
||||
fmt.Fprintf(w, "// max children %d (capacity %d)\n", maxChildren, 1<<nodesBitsChildren-1)
|
||||
fmt.Fprintf(w, "// max text offset %d (capacity %d)\n", maxTextOffset, 1<<nodesBitsTextOffset-1)
|
||||
fmt.Fprintf(w, "// max text length %d (capacity %d)\n", maxTextLength, 1<<nodesBitsTextLength-1)
|
||||
fmt.Fprintf(w, "// max hi %d (capacity %d)\n", maxHi, 1<<childrenBitsHi-1)
|
||||
fmt.Fprintf(w, "// max lo %d (capacity %d)\n", maxLo, 1<<childrenBitsLo-1)
|
||||
return nil
|
||||
}
|
||||
|
||||
type node struct {
|
||||
label string
|
||||
nodeType int
|
||||
icann bool
|
||||
wildcard bool
|
||||
// nodesIndex and childrenIndex are the index of this node in the nodes
|
||||
// and the index of its children offset/length in the children arrays.
|
||||
nodesIndex, childrenIndex int
|
||||
// firstChild is the index of this node's first child, or zero if this
|
||||
// node has no children.
|
||||
firstChild int
|
||||
// children are the node's children, in strictly increasing node label order.
|
||||
children []*node
|
||||
}
|
||||
|
||||
func (n *node) walk(w io.Writer, f func(w1 io.Writer, n1 *node) error) error {
|
||||
if err := f(w, n); err != nil {
|
||||
return err
|
||||
}
|
||||
for _, c := range n.children {
|
||||
if err := c.walk(w, f); err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
// child returns the child of n with the given label. The child is created if
|
||||
// it did not exist beforehand.
|
||||
func (n *node) child(label string) *node {
|
||||
for _, c := range n.children {
|
||||
if c.label == label {
|
||||
return c
|
||||
}
|
||||
}
|
||||
c := &node{
|
||||
label: label,
|
||||
nodeType: nodeTypeParentOnly,
|
||||
icann: true,
|
||||
}
|
||||
n.children = append(n.children, c)
|
||||
sort.Sort(byLabel(n.children))
|
||||
return c
|
||||
}
|
||||
|
||||
type byLabel []*node
|
||||
|
||||
func (b byLabel) Len() int { return len(b) }
|
||||
func (b byLabel) Swap(i, j int) { b[i], b[j] = b[j], b[i] }
|
||||
func (b byLabel) Less(i, j int) bool { return b[i].label < b[j].label }
|
||||
|
||||
var nextNodesIndex int
|
||||
|
||||
// childrenEncoding are the encoded entries in the generated children array.
|
||||
// All these pre-defined entries have no children.
|
||||
var childrenEncoding = []uint32{
|
||||
0 << (childrenBitsLo + childrenBitsHi), // Without wildcard bit, nodeTypeNormal.
|
||||
1 << (childrenBitsLo + childrenBitsHi), // Without wildcard bit, nodeTypeException.
|
||||
2 << (childrenBitsLo + childrenBitsHi), // Without wildcard bit, nodeTypeParentOnly.
|
||||
4 << (childrenBitsLo + childrenBitsHi), // With wildcard bit, nodeTypeNormal.
|
||||
5 << (childrenBitsLo + childrenBitsHi), // With wildcard bit, nodeTypeException.
|
||||
6 << (childrenBitsLo + childrenBitsHi), // With wildcard bit, nodeTypeParentOnly.
|
||||
}
|
||||
|
||||
var firstCallToAssignIndexes = true
|
||||
|
||||
func assignIndexes(w io.Writer, n *node) error {
|
||||
if len(n.children) != 0 {
|
||||
// Assign nodesIndex.
|
||||
n.firstChild = nextNodesIndex
|
||||
for _, c := range n.children {
|
||||
c.nodesIndex = nextNodesIndex
|
||||
nextNodesIndex++
|
||||
}
|
||||
|
||||
// The root node's children is implicit.
|
||||
if firstCallToAssignIndexes {
|
||||
firstCallToAssignIndexes = false
|
||||
return nil
|
||||
}
|
||||
|
||||
// Assign childrenIndex.
|
||||
maxChildren = max(maxChildren, len(childrenEncoding))
|
||||
if len(childrenEncoding) >= 1<<nodesBitsChildren {
|
||||
return fmt.Errorf("children table size %d is too large, or nodeBitsChildren is too small", len(childrenEncoding))
|
||||
}
|
||||
n.childrenIndex = len(childrenEncoding)
|
||||
lo := uint32(n.firstChild)
|
||||
hi := lo + uint32(len(n.children))
|
||||
maxLo, maxHi = u32max(maxLo, lo), u32max(maxHi, hi)
|
||||
if lo >= 1<<childrenBitsLo {
|
||||
return fmt.Errorf("children lo %d is too large, or childrenBitsLo is too small", lo)
|
||||
}
|
||||
if hi >= 1<<childrenBitsHi {
|
||||
return fmt.Errorf("children hi %d is too large, or childrenBitsHi is too small", hi)
|
||||
}
|
||||
enc := hi<<childrenBitsLo | lo
|
||||
enc |= uint32(n.nodeType) << (childrenBitsLo + childrenBitsHi)
|
||||
if n.wildcard {
|
||||
enc |= 1 << (childrenBitsLo + childrenBitsHi + childrenBitsNodeType)
|
||||
}
|
||||
childrenEncoding = append(childrenEncoding, enc)
|
||||
} else {
|
||||
n.childrenIndex = n.nodeType
|
||||
if n.wildcard {
|
||||
n.childrenIndex += numNodeType
|
||||
}
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
func printNode(w io.Writer, n *node) error {
|
||||
for _, c := range n.children {
|
||||
s := "---------------"
|
||||
if len(c.children) != 0 {
|
||||
s = fmt.Sprintf("n0x%04x-n0x%04x", c.firstChild, c.firstChild+len(c.children))
|
||||
}
|
||||
encoding := labelEncoding[c.label]
|
||||
if c.icann {
|
||||
encoding |= 1 << (nodesBitsTextLength + nodesBitsTextOffset)
|
||||
}
|
||||
encoding |= uint32(c.childrenIndex) << (nodesBitsTextLength + nodesBitsTextOffset + nodesBitsICANN)
|
||||
fmt.Fprintf(w, "0x%08x, // n0x%04x c0x%04x (%s)%s %s %s %s\n",
|
||||
encoding, c.nodesIndex, c.childrenIndex, s, wildcardStr(c.wildcard),
|
||||
nodeTypeStr(c.nodeType), icannStr(c.icann), c.label,
|
||||
)
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
func printNodeLabel(w io.Writer, n *node) error {
|
||||
for _, c := range n.children {
|
||||
fmt.Fprintf(w, "%q,\n", c.label)
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
func icannStr(icann bool) string {
|
||||
if icann {
|
||||
return "I"
|
||||
}
|
||||
return " "
|
||||
}
|
||||
|
||||
func wildcardStr(wildcard bool) string {
|
||||
if wildcard {
|
||||
return "*"
|
||||
}
|
||||
return " "
|
||||
}
|
||||
|
||||
// combineText combines all the strings in labelsList to form one giant string.
|
||||
// Overlapping strings will be merged: "arpa" and "parliament" could yield
|
||||
// "arparliament".
|
||||
func combineText(labelsList []string) string {
|
||||
beforeLength := 0
|
||||
for _, s := range labelsList {
|
||||
beforeLength += len(s)
|
||||
}
|
||||
|
||||
text := crush(removeSubstrings(labelsList))
|
||||
if *v {
|
||||
fmt.Fprintf(os.Stderr, "crushed %d bytes to become %d bytes\n", beforeLength, len(text))
|
||||
}
|
||||
return text
|
||||
}
|
||||
|
||||
type byLength []string
|
||||
|
||||
func (s byLength) Len() int { return len(s) }
|
||||
func (s byLength) Swap(i, j int) { s[i], s[j] = s[j], s[i] }
|
||||
func (s byLength) Less(i, j int) bool { return len(s[i]) < len(s[j]) }
|
||||
|
||||
// removeSubstrings returns a copy of its input with any strings removed
|
||||
// that are substrings of other provided strings.
|
||||
func removeSubstrings(input []string) []string {
|
||||
// Make a copy of input.
|
||||
ss := append(make([]string, 0, len(input)), input...)
|
||||
sort.Sort(byLength(ss))
|
||||
|
||||
for i, shortString := range ss {
|
||||
// For each string, only consider strings higher than it in sort order, i.e.
|
||||
// of equal length or greater.
|
||||
for _, longString := range ss[i+1:] {
|
||||
if strings.Contains(longString, shortString) {
|
||||
ss[i] = ""
|
||||
break
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Remove the empty strings.
|
||||
sort.Strings(ss)
|
||||
for len(ss) > 0 && ss[0] == "" {
|
||||
ss = ss[1:]
|
||||
}
|
||||
return ss
|
||||
}
|
||||
|
||||
// crush combines a list of strings, taking advantage of overlaps. It returns a
|
||||
// single string that contains each input string as a substring.
|
||||
func crush(ss []string) string {
|
||||
maxLabelLen := 0
|
||||
for _, s := range ss {
|
||||
if maxLabelLen < len(s) {
|
||||
maxLabelLen = len(s)
|
||||
}
|
||||
}
|
||||
|
||||
for prefixLen := maxLabelLen; prefixLen > 0; prefixLen-- {
|
||||
prefixes := makePrefixMap(ss, prefixLen)
|
||||
for i, s := range ss {
|
||||
if len(s) <= prefixLen {
|
||||
continue
|
||||
}
|
||||
mergeLabel(ss, i, prefixLen, prefixes)
|
||||
}
|
||||
}
|
||||
|
||||
return strings.Join(ss, "")
|
||||
}
|
||||
|
||||
// mergeLabel merges the label at ss[i] with the first available matching label
|
||||
// in prefixMap, where the last "prefixLen" characters in ss[i] match the first
|
||||
// "prefixLen" characters in the matching label.
|
||||
// It will merge ss[i] repeatedly until no more matches are available.
|
||||
// All matching labels merged into ss[i] are replaced by "".
|
||||
func mergeLabel(ss []string, i, prefixLen int, prefixes prefixMap) {
|
||||
s := ss[i]
|
||||
suffix := s[len(s)-prefixLen:]
|
||||
for _, j := range prefixes[suffix] {
|
||||
// Empty strings mean "already used." Also avoid merging with self.
|
||||
if ss[j] == "" || i == j {
|
||||
continue
|
||||
}
|
||||
if *v {
|
||||
fmt.Fprintf(os.Stderr, "%d-length overlap at (%4d,%4d): %q and %q share %q\n",
|
||||
prefixLen, i, j, ss[i], ss[j], suffix)
|
||||
}
|
||||
ss[i] += ss[j][prefixLen:]
|
||||
ss[j] = ""
|
||||
// ss[i] has a new suffix, so merge again if possible.
|
||||
// Note: we only have to merge again at the same prefix length. Shorter
|
||||
// prefix lengths will be handled in the next iteration of crush's for loop.
|
||||
// Can there be matches for longer prefix lengths, introduced by the merge?
|
||||
// I believe that any such matches would by necessity have been eliminated
|
||||
// during substring removal or merged at a higher prefix length. For
|
||||
// instance, in crush("abc", "cde", "bcdef"), combining "abc" and "cde"
|
||||
// would yield "abcde", which could be merged with "bcdef." However, in
|
||||
// practice "cde" would already have been elimintated by removeSubstrings.
|
||||
mergeLabel(ss, i, prefixLen, prefixes)
|
||||
return
|
||||
}
|
||||
}
|
||||
|
||||
// prefixMap maps from a prefix to a list of strings containing that prefix. The
|
||||
// list of strings is represented as indexes into a slice of strings stored
|
||||
// elsewhere.
|
||||
type prefixMap map[string][]int
|
||||
|
||||
// makePrefixMap constructs a prefixMap from a slice of strings.
|
||||
func makePrefixMap(ss []string, prefixLen int) prefixMap {
|
||||
prefixes := make(prefixMap)
|
||||
for i, s := range ss {
|
||||
// We use < rather than <= because if a label matches on a prefix equal to
|
||||
// its full length, that's actually a substring match handled by
|
||||
// removeSubstrings.
|
||||
if prefixLen < len(s) {
|
||||
prefix := s[:prefixLen]
|
||||
prefixes[prefix] = append(prefixes[prefix], i)
|
||||
}
|
||||
}
|
||||
|
||||
return prefixes
|
||||
}
|
Loading…
Add table
Add a link
Reference in a new issue