This commit is contained in:
Adnan Hajdarevic 2021-04-03 18:01:13 +02:00
parent e329b6d9ff
commit 568c711625
138 changed files with 22876 additions and 90497 deletions

205
vendor/github.com/antonmedv/expr/parser/lexer/lexer.go generated vendored Normal file
View file

@ -0,0 +1,205 @@
package lexer
import (
"fmt"
"strings"
"unicode/utf8"
"github.com/antonmedv/expr/file"
)
func Lex(source *file.Source) ([]Token, error) {
l := &lexer{
input: source.Content(),
tokens: make([]Token, 0),
}
l.loc = file.Location{1, 0}
l.prev = l.loc
l.startLoc = l.loc
for state := root; state != nil; {
state = state(l)
}
if l.err != nil {
return nil, l.err.Bind(source)
}
return l.tokens, nil
}
type lexer struct {
input string
state stateFn
tokens []Token
start, end int // current position in input
width int // last rune with
startLoc file.Location // start location
prev, loc file.Location // prev location of end location, end location
err *file.Error
}
const eof rune = -1
func (l *lexer) next() rune {
if l.end >= len(l.input) {
l.width = 0
return eof
}
r, w := utf8.DecodeRuneInString(l.input[l.end:])
l.width = w
l.end += w
l.prev = l.loc
if r == '\n' {
l.loc.Line++
l.loc.Column = 0
} else {
l.loc.Column++
}
return r
}
func (l *lexer) peek() rune {
r := l.next()
l.backup()
return r
}
func (l *lexer) backup() {
l.end -= l.width
l.loc = l.prev
}
func (l *lexer) emit(t Kind) {
l.emitValue(t, l.word())
}
func (l *lexer) emitValue(t Kind, value string) {
l.tokens = append(l.tokens, Token{
Location: l.startLoc,
Kind: t,
Value: value,
})
l.start = l.end
l.startLoc = l.loc
}
func (l *lexer) emitEOF() {
l.tokens = append(l.tokens, Token{
Location: l.prev, // Point to previous position for better error messages.
Kind: EOF,
})
l.start = l.end
l.startLoc = l.loc
}
func (l *lexer) word() string {
return l.input[l.start:l.end]
}
func (l *lexer) ignore() {
l.start = l.end
l.startLoc = l.loc
}
func (l *lexer) accept(valid string) bool {
if strings.ContainsRune(valid, l.next()) {
return true
}
l.backup()
return false
}
func (l *lexer) acceptRun(valid string) {
for strings.ContainsRune(valid, l.next()) {
}
l.backup()
}
func (l *lexer) acceptWord(word string) bool {
pos := l.end
loc := l.loc
prev := l.prev
for _, ch := range word {
if l.next() != ch {
l.end = pos
l.loc = loc
l.prev = prev
return false
}
}
return true
}
func (l *lexer) error(format string, args ...interface{}) stateFn {
if l.err == nil { // show first error
l.err = &file.Error{
Location: l.loc,
Message: fmt.Sprintf(format, args...),
}
}
return nil
}
func digitVal(ch rune) int {
switch {
case '0' <= ch && ch <= '9':
return int(ch - '0')
case 'a' <= lower(ch) && lower(ch) <= 'f':
return int(lower(ch) - 'a' + 10)
}
return 16 // larger than any legal digit val
}
func lower(ch rune) rune { return ('a' - 'A') | ch } // returns lower-case ch iff ch is ASCII letter
func (l *lexer) scanDigits(ch rune, base, n int) rune {
for n > 0 && digitVal(ch) < base {
ch = l.next()
n--
}
if n > 0 {
l.error("invalid char escape")
}
return ch
}
func (l *lexer) scanEscape(quote rune) rune {
ch := l.next() // read character after '/'
switch ch {
case 'a', 'b', 'f', 'n', 'r', 't', 'v', '\\', quote:
// nothing to do
ch = l.next()
case '0', '1', '2', '3', '4', '5', '6', '7':
ch = l.scanDigits(ch, 8, 3)
case 'x':
ch = l.scanDigits(l.next(), 16, 2)
case 'u':
ch = l.scanDigits(l.next(), 16, 4)
case 'U':
ch = l.scanDigits(l.next(), 16, 8)
default:
l.error("invalid char escape")
}
return ch
}
func (l *lexer) scanString(quote rune) (n int) {
ch := l.next() // read character after quote
for ch != quote {
if ch == '\n' || ch == eof {
l.error("literal not terminated")
return
}
if ch == '\\' {
ch = l.scanEscape(quote)
} else {
ch = l.next()
}
n++
}
return
}

134
vendor/github.com/antonmedv/expr/parser/lexer/state.go generated vendored Normal file
View file

@ -0,0 +1,134 @@
package lexer
import (
"strings"
)
type stateFn func(*lexer) stateFn
func root(l *lexer) stateFn {
switch r := l.next(); {
case r == eof:
l.emitEOF()
return nil
case IsSpace(r):
l.ignore()
return root
case r == '\'' || r == '"':
l.scanString(r)
str, err := unescape(l.word())
if err != nil {
l.error("%v", err)
}
l.emitValue(String, str)
case '0' <= r && r <= '9':
l.backup()
return number
case strings.ContainsRune("([{", r):
l.emit(Bracket)
case strings.ContainsRune(")]}", r):
l.emit(Bracket)
case strings.ContainsRune("#,?:%+-/", r): // single rune operator
l.emit(Operator)
case strings.ContainsRune("&|!=*<>", r): // possible double rune operator
l.accept("&|=*")
l.emit(Operator)
case r == '.':
l.backup()
return dot
case IsAlphaNumeric(r):
l.backup()
return identifier
default:
return l.error("unrecognized character: %#U", r)
}
return root
}
func number(l *lexer) stateFn {
if !l.scanNumber() {
return l.error("bad number syntax: %q", l.word())
}
l.emit(Number)
return root
}
func (l *lexer) scanNumber() bool {
digits := "0123456789_"
// Is it hex?
if l.accept("0") {
// Note: Leading 0 does not mean octal in floats.
if l.accept("xX") {
digits = "0123456789abcdefABCDEF_"
} else if l.accept("oO") {
digits = "01234567_"
} else if l.accept("bB") {
digits = "01_"
}
}
l.acceptRun(digits)
loc, prev, end := l.loc, l.prev, l.end
if l.accept(".") {
// Lookup for .. operator: if after dot there is another dot (1..2), it maybe a range operator.
if l.peek() == '.' {
// We can't backup() here, as it would require two backups,
// and backup() func supports only one for now. So, save and
// restore it here.
l.loc, l.prev, l.end = loc, prev, end
return true
}
l.acceptRun(digits)
}
if l.accept("eE") {
l.accept("+-")
l.acceptRun(digits)
}
// Next thing mustn't be alphanumeric.
if IsAlphaNumeric(l.peek()) {
l.next()
return false
}
return true
}
func dot(l *lexer) stateFn {
l.next()
if l.accept("0123456789") {
l.backup()
return number
}
l.accept(".")
l.emit(Operator)
return root
}
func identifier(l *lexer) stateFn {
loop:
for {
switch r := l.next(); {
case IsAlphaNumeric(r):
// absorb
default:
l.backup()
switch l.word() {
case "not":
return not
case "in", "or", "and", "matches", "contains", "startsWith", "endsWith":
l.emit(Operator)
default:
l.emit(Identifier)
}
break loop
}
}
return root
}
func not(l *lexer) stateFn {
if l.acceptWord(" in") {
l.emit(Operator)
} else {
l.emit(Operator)
}
return root
}

47
vendor/github.com/antonmedv/expr/parser/lexer/token.go generated vendored Normal file
View file

@ -0,0 +1,47 @@
package lexer
import (
"fmt"
"github.com/antonmedv/expr/file"
)
type Kind string
const (
Identifier Kind = "Identifier"
Number = "Number"
String = "String"
Operator = "Operator"
Bracket = "Bracket"
EOF = "EOF"
)
type Token struct {
file.Location
Kind Kind
Value string
}
func (t Token) String() string {
if t.Value == "" {
return string(t.Kind)
}
return fmt.Sprintf("%s(%#v)", t.Kind, t.Value)
}
func (t Token) Is(kind Kind, values ...string) bool {
if len(values) == 0 {
return kind == t.Kind
}
for _, v := range values {
if v == t.Value {
goto found
}
}
return false
found:
return kind == t.Kind
}

194
vendor/github.com/antonmedv/expr/parser/lexer/utils.go generated vendored Normal file
View file

@ -0,0 +1,194 @@
package lexer
import (
"fmt"
"strings"
"unicode"
"unicode/utf8"
)
func IsSpace(r rune) bool {
return unicode.IsSpace(r)
}
func IsAlphaNumeric(r rune) bool {
return IsAlphabetic(r) || unicode.IsDigit(r)
}
func IsAlphabetic(r rune) bool {
return r == '_' || r == '$' || unicode.IsLetter(r)
}
var (
newlineNormalizer = strings.NewReplacer("\r\n", "\n", "\r", "\n")
)
// Unescape takes a quoted string, unquotes, and unescapes it.
func unescape(value string) (string, error) {
// All strings normalize newlines to the \n representation.
value = newlineNormalizer.Replace(value)
n := len(value)
// Nothing to unescape / decode.
if n < 2 {
return value, fmt.Errorf("unable to unescape string")
}
// Quoted string of some form, must have same first and last char.
if value[0] != value[n-1] || (value[0] != '"' && value[0] != '\'') {
return value, fmt.Errorf("unable to unescape string")
}
value = value[1 : n-1]
// The string contains escape characters.
// The following logic is adapted from `strconv/quote.go`
var runeTmp [utf8.UTFMax]byte
buf := make([]byte, 0, 3*n/2)
for len(value) > 0 {
c, multibyte, rest, err := unescapeChar(value)
if err != nil {
return "", err
}
value = rest
if c < utf8.RuneSelf || !multibyte {
buf = append(buf, byte(c))
} else {
n := utf8.EncodeRune(runeTmp[:], c)
buf = append(buf, runeTmp[:n]...)
}
}
return string(buf), nil
}
// unescapeChar takes a string input and returns the following info:
//
// value - the escaped unicode rune at the front of the string.
// multibyte - whether the rune value might require multiple bytes to represent.
// tail - the remainder of the input string.
// err - error value, if the character could not be unescaped.
//
// When multibyte is true the return value may still fit within a single byte,
// but a multibyte conversion is attempted which is more expensive than when the
// value is known to fit within one byte.
func unescapeChar(s string) (value rune, multibyte bool, tail string, err error) {
// 1. Character is not an escape sequence.
switch c := s[0]; {
case c >= utf8.RuneSelf:
r, size := utf8.DecodeRuneInString(s)
return r, true, s[size:], nil
case c != '\\':
return rune(s[0]), false, s[1:], nil
}
// 2. Last character is the start of an escape sequence.
if len(s) <= 1 {
err = fmt.Errorf("unable to unescape string, found '\\' as last character")
return
}
c := s[1]
s = s[2:]
// 3. Common escape sequences shared with Google SQL
switch c {
case 'a':
value = '\a'
case 'b':
value = '\b'
case 'f':
value = '\f'
case 'n':
value = '\n'
case 'r':
value = '\r'
case 't':
value = '\t'
case 'v':
value = '\v'
case '\\':
value = '\\'
case '\'':
value = '\''
case '"':
value = '"'
case '`':
value = '`'
case '?':
value = '?'
// 4. Unicode escape sequences, reproduced from `strconv/quote.go`
case 'x', 'X', 'u', 'U':
n := 0
switch c {
case 'x', 'X':
n = 2
case 'u':
n = 4
case 'U':
n = 8
}
var v rune
if len(s) < n {
err = fmt.Errorf("unable to unescape string")
return
}
for j := 0; j < n; j++ {
x, ok := unhex(s[j])
if !ok {
err = fmt.Errorf("unable to unescape string")
return
}
v = v<<4 | x
}
s = s[n:]
if v > utf8.MaxRune {
err = fmt.Errorf("unable to unescape string")
return
}
value = v
multibyte = true
// 5. Octal escape sequences, must be three digits \[0-3][0-7][0-7]
case '0', '1', '2', '3':
if len(s) < 2 {
err = fmt.Errorf("unable to unescape octal sequence in string")
return
}
v := rune(c - '0')
for j := 0; j < 2; j++ {
x := s[j]
if x < '0' || x > '7' {
err = fmt.Errorf("unable to unescape octal sequence in string")
return
}
v = v*8 + rune(x-'0')
}
if v > utf8.MaxRune {
err = fmt.Errorf("unable to unescape string")
return
}
value = v
s = s[2:]
multibyte = true
// Unknown escape sequence.
default:
err = fmt.Errorf("unable to unescape string")
}
tail = s
return
}
func unhex(b byte) (rune, bool) {
c := rune(b)
switch {
case '0' <= c && c <= '9':
return c - '0', true
case 'a' <= c && c <= 'f':
return c - 'a' + 10, true
case 'A' <= c && c <= 'F':
return c - 'A' + 10, true
}
return 0, false
}