1
0
Fork 0
mirror of https://github.com/vbatts/go-mtree.git synced 2025-10-04 04:31:00 +00:00

unvis: switch to methods for parser

Passing the parsers as an argument is very C-like and is not really as
idiomadic as just using methods (in my defence, I was still pretty green
when I wrote this code and I was trying to port some logic from C).

Signed-off-by: Aleksa Sarai <cyphar@cyphar.com>
This commit is contained in:
Aleksa Sarai 2025-09-22 02:07:21 +10:00
parent bcdb71fb56
commit 9bd1dffd9b
No known key found for this signature in database
GPG key ID: 2897FAD2B7E9446F

View file

@ -36,7 +36,13 @@ var (
type unvisParser struct { type unvisParser struct {
tokens []rune tokens []rune
idx int idx int
flag VisFlag flags VisFlag
}
// Input resets the parser with a new input string.
func (p *unvisParser) Input(input string) {
p.tokens = []rune(input)
p.idx = 0
} }
// Next moves the index to the next character. // Next moves the index to the next character.
@ -57,11 +63,11 @@ func (p *unvisParser) End() bool {
return p.idx >= len(p.tokens) return p.idx >= len(p.tokens)
} }
func newParser(input string, flag VisFlag) *unvisParser { func newParser(flags VisFlag) *unvisParser {
return &unvisParser{ return &unvisParser{
tokens: []rune(input), tokens: nil,
idx: 0, idx: 0,
flag: flag, flags: flags,
} }
} }
@ -69,8 +75,8 @@ func newParser(input string, flag VisFlag) *unvisParser {
// codes, this is IMO much easier to read than the ugly 80s coroutine code used // codes, this is IMO much easier to read than the ugly 80s coroutine code used
// by the original unvis(3) parser. Here's the EBNF for an unvis sequence: // by the original unvis(3) parser. Here's the EBNF for an unvis sequence:
// //
// <input> ::= (<rune>)* // <input> ::= (<element>)*
// <rune> ::= ("\" <escape-sequence>) | ("%" <escape-hex>) | <plain-rune> // <element> ::= ("\" <escape-sequence>) | ("%" <escape-hex>) | <plain-rune>
// <plain-rune> ::= any rune // <plain-rune> ::= any rune
// <escape-sequence> ::= ("x" <escape-hex>) | ("M" <escape-meta>) | ("^" <escape-ctrl) | <escape-cstyle> | <escape-octal> // <escape-sequence> ::= ("x" <escape-hex>) | ("M" <escape-meta>) | ("^" <escape-ctrl) | <escape-cstyle> | <escape-octal>
// <escape-meta> ::= ("-" <escape-meta1>) | ("^" <escape-ctrl>) // <escape-meta> ::= ("-" <escape-meta1>) | ("^" <escape-ctrl>)
@ -80,7 +86,7 @@ func newParser(input string, flag VisFlag) *unvisParser {
// <escape-hex> ::= [0-9a-f] [0-9a-f] // <escape-hex> ::= [0-9a-f] [0-9a-f]
// <escape-octal> ::= [0-7] ([0-7] ([0-7])?)? // <escape-octal> ::= [0-7] ([0-7] ([0-7])?)?
func unvisPlainRune(p *unvisParser) ([]byte, error) { func (p *unvisParser) plainRune() ([]byte, error) {
ch, err := p.Peek() ch, err := p.Peek()
if err != nil { if err != nil {
return nil, fmt.Errorf("plain rune: %w", err) return nil, fmt.Errorf("plain rune: %w", err)
@ -89,7 +95,7 @@ func unvisPlainRune(p *unvisParser) ([]byte, error) {
return []byte(string(ch)), nil return []byte(string(ch)), nil
} }
func unvisEscapeCStyle(p *unvisParser) ([]byte, error) { func (p *unvisParser) escapeCStyle() ([]byte, error) {
ch, err := p.Peek() ch, err := p.Peek()
if err != nil { if err != nil {
return nil, fmt.Errorf("escape cstyle: %w", err) return nil, fmt.Errorf("escape cstyle: %w", err)
@ -128,7 +134,7 @@ func unvisEscapeCStyle(p *unvisParser) ([]byte, error) {
return []byte(output), nil return []byte(output), nil
} }
func unvisEscapeDigits(p *unvisParser, base int, force bool) ([]byte, error) { func (p *unvisParser) escapeDigits(base int, force bool) ([]byte, error) {
var code int var code int
for i := int(0xFF); i > 0; i /= base { for i := int(0xFF); i > 0; i /= base {
@ -160,7 +166,7 @@ func unvisEscapeDigits(p *unvisParser, base int, force bool) ([]byte, error) {
return []byte{char}, nil return []byte{char}, nil
} }
func unvisEscapeCtrl(p *unvisParser, mask byte) ([]byte, error) { func (p *unvisParser) escapeCtrl(mask byte) ([]byte, error) {
ch, err := p.Peek() ch, err := p.Peek()
if err != nil { if err != nil {
return nil, fmt.Errorf("escape ctrl: %w", err) return nil, fmt.Errorf("escape ctrl: %w", err)
@ -178,7 +184,7 @@ func unvisEscapeCtrl(p *unvisParser, mask byte) ([]byte, error) {
return []byte{mask | char}, nil return []byte{mask | char}, nil
} }
func unvisEscapeMeta(p *unvisParser) ([]byte, error) { func (p *unvisParser) escapeMeta() ([]byte, error) {
ch, err := p.Peek() ch, err := p.Peek()
if err != nil { if err != nil {
return nil, fmt.Errorf("escape meta: %w", err) return nil, fmt.Errorf("escape meta: %w", err)
@ -190,7 +196,7 @@ func unvisEscapeMeta(p *unvisParser) ([]byte, error) {
case '^': case '^':
// The same as "\^..." except we apply a mask. // The same as "\^..." except we apply a mask.
p.Next() p.Next()
return unvisEscapeCtrl(p, mask) return p.escapeCtrl(mask)
case '-': case '-':
p.Next() p.Next()
@ -211,7 +217,7 @@ func unvisEscapeMeta(p *unvisParser) ([]byte, error) {
return nil, fmt.Errorf("escape meta: %w %q", errUnknownEscapeChar, ch) return nil, fmt.Errorf("escape meta: %w %q", errUnknownEscapeChar, ch)
} }
func unvisEscapeSequence(p *unvisParser) ([]byte, error) { func (p *unvisParser) escapeSequence() ([]byte, error) {
ch, err := p.Peek() ch, err := p.Peek()
if err != nil { if err != nil {
return nil, fmt.Errorf("escape sequence: %w", err) return nil, fmt.Errorf("escape sequence: %w", err)
@ -223,26 +229,26 @@ func unvisEscapeSequence(p *unvisParser) ([]byte, error) {
return []byte("\\"), nil return []byte("\\"), nil
case '0', '1', '2', '3', '4', '5', '6', '7': case '0', '1', '2', '3', '4', '5', '6', '7':
return unvisEscapeDigits(p, 8, false) return p.escapeDigits(8, false)
case 'x': case 'x':
p.Next() p.Next()
return unvisEscapeDigits(p, 16, true) return p.escapeDigits(16, true)
case '^': case '^':
p.Next() p.Next()
return unvisEscapeCtrl(p, 0x00) return p.escapeCtrl(0x00)
case 'M': case 'M':
p.Next() p.Next()
return unvisEscapeMeta(p) return p.escapeMeta()
default: default:
return unvisEscapeCStyle(p) return p.escapeCStyle()
} }
} }
func unvisRune(p *unvisParser) ([]byte, error) { func (p *unvisParser) element() ([]byte, error) {
ch, err := p.Peek() ch, err := p.Peek()
if err != nil { if err != nil {
return nil, err return nil, err
@ -251,22 +257,23 @@ func unvisRune(p *unvisParser) ([]byte, error) {
switch ch { switch ch {
case '\\': case '\\':
p.Next() p.Next()
return unvisEscapeSequence(p) return p.escapeSequence()
case '%': case '%':
// % HEX HEX only applies to HTTPStyle encodings. // % HEX HEX only applies to HTTPStyle encodings.
if p.flag&VisHTTPStyle == VisHTTPStyle { if p.flags&VisHTTPStyle == VisHTTPStyle {
p.Next() p.Next()
return unvisEscapeDigits(p, 16, true) return p.escapeDigits(16, true)
} }
} }
return unvisPlainRune(p) return p.plainRune()
} }
func unvis(p *unvisParser) (string, error) { func (p *unvisParser) unvis(input string) (string, error) {
p.Input(input)
var output []byte var output []byte
for !p.End() { for !p.End() {
ch, err := unvisRune(p) ch, err := p.element()
if err != nil { if err != nil {
return "", err return "", err
} }
@ -283,8 +290,8 @@ func Unvis(input string, flags VisFlag) (string, error) {
if unknown := flags &^ visMask; unknown != 0 { if unknown := flags &^ visMask; unknown != 0 {
return "", unknownVisFlagsError{flags: flags} return "", unknownVisFlagsError{flags: flags}
} }
p := newParser(input, flags) p := newParser(flags)
output, err := unvis(p) output, err := p.unvis(input)
if err != nil { if err != nil {
return "", fmt.Errorf("unvis '%s': %w", input, err) return "", fmt.Errorf("unvis '%s': %w", input, err)
} }