wip

2025-10-04 13:41:03 +00:00 · 2021-04-03 18:01:13 +02:00 · 2021-04-03 18:01:13 +02:00 · 568c711625
commit 568c711625
parent e329b6d9ff
138 changed files with 22876 additions and 90497 deletions
--- a/vendor/github.com/antonmedv/expr/parser/lexer/lexer.go
+++ b/vendor/github.com/antonmedv/expr/parser/lexer/lexer.go
@ -0,0 +1,205 @@
+package lexer
+
+import (
+	"fmt"
+	"strings"
+	"unicode/utf8"
+
+	"github.com/antonmedv/expr/file"
+)
+
+func Lex(source *file.Source) ([]Token, error) {
+	l := &lexer{
+		input:  source.Content(),
+		tokens: make([]Token, 0),
+	}
+
+	l.loc = file.Location{1, 0}
+	l.prev = l.loc
+	l.startLoc = l.loc
+
+	for state := root; state != nil; {
+		state = state(l)
+	}
+
+	if l.err != nil {
+		return nil, l.err.Bind(source)
+	}
+
+	return l.tokens, nil
+}
+
+type lexer struct {
+	input      string
+	state      stateFn
+	tokens     []Token
+	start, end int           // current position in input
+	width      int           // last rune with
+	startLoc   file.Location // start location
+	prev, loc  file.Location // prev location of end location, end location
+	err        *file.Error
+}
+
+const eof rune = -1
+
+func (l *lexer) next() rune {
+	if l.end >= len(l.input) {
+		l.width = 0
+		return eof
+	}
+	r, w := utf8.DecodeRuneInString(l.input[l.end:])
+	l.width = w
+	l.end += w
+
+	l.prev = l.loc
+	if r == '\n' {
+		l.loc.Line++
+		l.loc.Column = 0
+	} else {
+		l.loc.Column++
+	}
+
+	return r
+}
+
+func (l *lexer) peek() rune {
+	r := l.next()
+	l.backup()
+	return r
+}
+
+func (l *lexer) backup() {
+	l.end -= l.width
+	l.loc = l.prev
+}
+
+func (l *lexer) emit(t Kind) {
+	l.emitValue(t, l.word())
+}
+
+func (l *lexer) emitValue(t Kind, value string) {
+	l.tokens = append(l.tokens, Token{
+		Location: l.startLoc,
+		Kind:     t,
+		Value:    value,
+	})
+	l.start = l.end
+	l.startLoc = l.loc
+}
+
+func (l *lexer) emitEOF() {
+	l.tokens = append(l.tokens, Token{
+		Location: l.prev, // Point to previous position for better error messages.
+		Kind:     EOF,
+	})
+	l.start = l.end
+	l.startLoc = l.loc
+}
+
+func (l *lexer) word() string {
+	return l.input[l.start:l.end]
+}
+
+func (l *lexer) ignore() {
+	l.start = l.end
+	l.startLoc = l.loc
+}
+
+func (l *lexer) accept(valid string) bool {
+	if strings.ContainsRune(valid, l.next()) {
+		return true
+	}
+	l.backup()
+	return false
+}
+
+func (l *lexer) acceptRun(valid string) {
+	for strings.ContainsRune(valid, l.next()) {
+	}
+	l.backup()
+}
+
+func (l *lexer) acceptWord(word string) bool {
+	pos := l.end
+	loc := l.loc
+	prev := l.prev
+	for _, ch := range word {
+		if l.next() != ch {
+			l.end = pos
+			l.loc = loc
+			l.prev = prev
+			return false
+		}
+	}
+	return true
+}
+
+func (l *lexer) error(format string, args ...interface{}) stateFn {
+	if l.err == nil { // show first error
+		l.err = &file.Error{
+			Location: l.loc,
+			Message:  fmt.Sprintf(format, args...),
+		}
+	}
+	return nil
+}
+
+func digitVal(ch rune) int {
+	switch {
+	case '0' <= ch && ch <= '9':
+		return int(ch - '0')
+	case 'a' <= lower(ch) && lower(ch) <= 'f':
+		return int(lower(ch) - 'a' + 10)
+	}
+	return 16 // larger than any legal digit val
+}
+
+func lower(ch rune) rune { return ('a' - 'A') | ch } // returns lower-case ch iff ch is ASCII letter
+
+func (l *lexer) scanDigits(ch rune, base, n int) rune {
+	for n > 0 && digitVal(ch) < base {
+		ch = l.next()
+		n--
+	}
+	if n > 0 {
+		l.error("invalid char escape")
+	}
+	return ch
+}
+
+func (l *lexer) scanEscape(quote rune) rune {
+	ch := l.next() // read character after '/'
+	switch ch {
+	case 'a', 'b', 'f', 'n', 'r', 't', 'v', '\\', quote:
+		// nothing to do
+		ch = l.next()
+	case '0', '1', '2', '3', '4', '5', '6', '7':
+		ch = l.scanDigits(ch, 8, 3)
+	case 'x':
+		ch = l.scanDigits(l.next(), 16, 2)
+	case 'u':
+		ch = l.scanDigits(l.next(), 16, 4)
+	case 'U':
+		ch = l.scanDigits(l.next(), 16, 8)
+	default:
+		l.error("invalid char escape")
+	}
+	return ch
+}
+
+func (l *lexer) scanString(quote rune) (n int) {
+	ch := l.next() // read character after quote
+	for ch != quote {
+		if ch == '\n' || ch == eof {
+			l.error("literal not terminated")
+			return
+		}
+		if ch == '\\' {
+			ch = l.scanEscape(quote)
+		} else {
+			ch = l.next()
+		}
+		n++
+	}
+	return
+}
--- a/vendor/github.com/antonmedv/expr/parser/lexer/state.go
+++ b/vendor/github.com/antonmedv/expr/parser/lexer/state.go
@ -0,0 +1,134 @@
+package lexer
+
+import (
+	"strings"
+)
+
+type stateFn func(*lexer) stateFn
+
+func root(l *lexer) stateFn {
+	switch r := l.next(); {
+	case r == eof:
+		l.emitEOF()
+		return nil
+	case IsSpace(r):
+		l.ignore()
+		return root
+	case r == '\'' || r == '"':
+		l.scanString(r)
+		str, err := unescape(l.word())
+		if err != nil {
+			l.error("%v", err)
+		}
+		l.emitValue(String, str)
+	case '0' <= r && r <= '9':
+		l.backup()
+		return number
+	case strings.ContainsRune("([{", r):
+		l.emit(Bracket)
+	case strings.ContainsRune(")]}", r):
+		l.emit(Bracket)
+	case strings.ContainsRune("#,?:%+-/", r): // single rune operator
+		l.emit(Operator)
+	case strings.ContainsRune("&|!=*<>", r): // possible double rune operator
+		l.accept("&|=*")
+		l.emit(Operator)
+	case r == '.':
+		l.backup()
+		return dot
+	case IsAlphaNumeric(r):
+		l.backup()
+		return identifier
+	default:
+		return l.error("unrecognized character: %#U", r)
+	}
+	return root
+}
+
+func number(l *lexer) stateFn {
+	if !l.scanNumber() {
+		return l.error("bad number syntax: %q", l.word())
+	}
+	l.emit(Number)
+	return root
+}
+
+func (l *lexer) scanNumber() bool {
+	digits := "0123456789_"
+	// Is it hex?
+	if l.accept("0") {
+		// Note: Leading 0 does not mean octal in floats.
+		if l.accept("xX") {
+			digits = "0123456789abcdefABCDEF_"
+		} else if l.accept("oO") {
+			digits = "01234567_"
+		} else if l.accept("bB") {
+			digits = "01_"
+		}
+	}
+	l.acceptRun(digits)
+	loc, prev, end := l.loc, l.prev, l.end
+	if l.accept(".") {
+		// Lookup for .. operator: if after dot there is another dot (1..2), it maybe a range operator.
+		if l.peek() == '.' {
+			// We can't backup() here, as it would require two backups,
+			// and backup() func supports only one for now. So, save and
+			// restore it here.
+			l.loc, l.prev, l.end = loc, prev, end
+			return true
+		}
+		l.acceptRun(digits)
+	}
+	if l.accept("eE") {
+		l.accept("+-")
+		l.acceptRun(digits)
+	}
+	// Next thing mustn't be alphanumeric.
+	if IsAlphaNumeric(l.peek()) {
+		l.next()
+		return false
+	}
+	return true
+}
+
+func dot(l *lexer) stateFn {
+	l.next()
+	if l.accept("0123456789") {
+		l.backup()
+		return number
+	}
+	l.accept(".")
+	l.emit(Operator)
+	return root
+}
+
+func identifier(l *lexer) stateFn {
+loop:
+	for {
+		switch r := l.next(); {
+		case IsAlphaNumeric(r):
+			// absorb
+		default:
+			l.backup()
+			switch l.word() {
+			case "not":
+				return not
+			case "in", "or", "and", "matches", "contains", "startsWith", "endsWith":
+				l.emit(Operator)
+			default:
+				l.emit(Identifier)
+			}
+			break loop
+		}
+	}
+	return root
+}
+
+func not(l *lexer) stateFn {
+	if l.acceptWord(" in") {
+		l.emit(Operator)
+	} else {
+		l.emit(Operator)
+	}
+	return root
+}
--- a/vendor/github.com/antonmedv/expr/parser/lexer/token.go
+++ b/vendor/github.com/antonmedv/expr/parser/lexer/token.go
@ -0,0 +1,47 @@
+package lexer
+
+import (
+	"fmt"
+
+	"github.com/antonmedv/expr/file"
+)
+
+type Kind string
+
+const (
+	Identifier Kind = "Identifier"
+	Number          = "Number"
+	String          = "String"
+	Operator        = "Operator"
+	Bracket         = "Bracket"
+	EOF             = "EOF"
+)
+
+type Token struct {
+	file.Location
+	Kind  Kind
+	Value string
+}
+
+func (t Token) String() string {
+	if t.Value == "" {
+		return string(t.Kind)
+	}
+	return fmt.Sprintf("%s(%#v)", t.Kind, t.Value)
+}
+
+func (t Token) Is(kind Kind, values ...string) bool {
+	if len(values) == 0 {
+		return kind == t.Kind
+	}
+
+	for _, v := range values {
+		if v == t.Value {
+			goto found
+		}
+	}
+	return false
+
+found:
+	return kind == t.Kind
+}
--- a/vendor/github.com/antonmedv/expr/parser/lexer/utils.go
+++ b/vendor/github.com/antonmedv/expr/parser/lexer/utils.go
@ -0,0 +1,194 @@
+package lexer
+
+import (
+	"fmt"
+	"strings"
+	"unicode"
+	"unicode/utf8"
+)
+
+func IsSpace(r rune) bool {
+	return unicode.IsSpace(r)
+}
+
+func IsAlphaNumeric(r rune) bool {
+	return IsAlphabetic(r) || unicode.IsDigit(r)
+}
+
+func IsAlphabetic(r rune) bool {
+	return r == '_' || r == '$' || unicode.IsLetter(r)
+}
+
+var (
+	newlineNormalizer = strings.NewReplacer("\r\n", "\n", "\r", "\n")
+)
+
+// Unescape takes a quoted string, unquotes, and unescapes it.
+func unescape(value string) (string, error) {
+	// All strings normalize newlines to the \n representation.
+	value = newlineNormalizer.Replace(value)
+	n := len(value)
+
+	// Nothing to unescape / decode.
+	if n < 2 {
+		return value, fmt.Errorf("unable to unescape string")
+	}
+
+	// Quoted string of some form, must have same first and last char.
+	if value[0] != value[n-1] || (value[0] != '"' && value[0] != '\'') {
+		return value, fmt.Errorf("unable to unescape string")
+	}
+
+	value = value[1 : n-1]
+
+	// The string contains escape characters.
+	// The following logic is adapted from `strconv/quote.go`
+	var runeTmp [utf8.UTFMax]byte
+	buf := make([]byte, 0, 3*n/2)
+	for len(value) > 0 {
+		c, multibyte, rest, err := unescapeChar(value)
+		if err != nil {
+			return "", err
+		}
+		value = rest
+		if c < utf8.RuneSelf || !multibyte {
+			buf = append(buf, byte(c))
+		} else {
+			n := utf8.EncodeRune(runeTmp[:], c)
+			buf = append(buf, runeTmp[:n]...)
+		}
+	}
+	return string(buf), nil
+}
+
+// unescapeChar takes a string input and returns the following info:
+//
+//   value - the escaped unicode rune at the front of the string.
+//   multibyte - whether the rune value might require multiple bytes to represent.
+//   tail - the remainder of the input string.
+//   err - error value, if the character could not be unescaped.
+//
+// When multibyte is true the return value may still fit within a single byte,
+// but a multibyte conversion is attempted which is more expensive than when the
+// value is known to fit within one byte.
+func unescapeChar(s string) (value rune, multibyte bool, tail string, err error) {
+	// 1. Character is not an escape sequence.
+	switch c := s[0]; {
+	case c >= utf8.RuneSelf:
+		r, size := utf8.DecodeRuneInString(s)
+		return r, true, s[size:], nil
+	case c != '\\':
+		return rune(s[0]), false, s[1:], nil
+	}
+
+	// 2. Last character is the start of an escape sequence.
+	if len(s) <= 1 {
+		err = fmt.Errorf("unable to unescape string, found '\\' as last character")
+		return
+	}
+
+	c := s[1]
+	s = s[2:]
+	// 3. Common escape sequences shared with Google SQL
+	switch c {
+	case 'a':
+		value = '\a'
+	case 'b':
+		value = '\b'
+	case 'f':
+		value = '\f'
+	case 'n':
+		value = '\n'
+	case 'r':
+		value = '\r'
+	case 't':
+		value = '\t'
+	case 'v':
+		value = '\v'
+	case '\\':
+		value = '\\'
+	case '\'':
+		value = '\''
+	case '"':
+		value = '"'
+	case '`':
+		value = '`'
+	case '?':
+		value = '?'
+
+	// 4. Unicode escape sequences, reproduced from `strconv/quote.go`
+	case 'x', 'X', 'u', 'U':
+		n := 0
+		switch c {
+		case 'x', 'X':
+			n = 2
+		case 'u':
+			n = 4
+		case 'U':
+			n = 8
+		}
+		var v rune
+		if len(s) < n {
+			err = fmt.Errorf("unable to unescape string")
+			return
+		}
+		for j := 0; j < n; j++ {
+			x, ok := unhex(s[j])
+			if !ok {
+				err = fmt.Errorf("unable to unescape string")
+				return
+			}
+			v = v<<4 | x
+		}
+		s = s[n:]
+		if v > utf8.MaxRune {
+			err = fmt.Errorf("unable to unescape string")
+			return
+		}
+		value = v
+		multibyte = true
+
+	// 5. Octal escape sequences, must be three digits \[0-3][0-7][0-7]
+	case '0', '1', '2', '3':
+		if len(s) < 2 {
+			err = fmt.Errorf("unable to unescape octal sequence in string")
+			return
+		}
+		v := rune(c - '0')
+		for j := 0; j < 2; j++ {
+			x := s[j]
+			if x < '0' || x > '7' {
+				err = fmt.Errorf("unable to unescape octal sequence in string")
+				return
+			}
+			v = v*8 + rune(x-'0')
+		}
+		if v > utf8.MaxRune {
+			err = fmt.Errorf("unable to unescape string")
+			return
+		}
+		value = v
+		s = s[2:]
+		multibyte = true
+
+		// Unknown escape sequence.
+	default:
+		err = fmt.Errorf("unable to unescape string")
+	}
+
+	tail = s
+	return
+}
+
+func unhex(b byte) (rune, bool) {
+	c := rune(b)
+	switch {
+	case '0' <= c && c <= '9':
+		return c - '0', true
+	case 'a' <= c && c <= 'f':
+		return c - 'a' + 10, true
+	case 'A' <= c && c <= 'F':
+		return c - 'A' + 10, true
+	}
+	return 0, false
+}