mirror of
https://github.com/vbatts/go-mtree.git
synced 2025-10-03 20:21:01 +00:00
unvis: improve performance by reducing allocations
By using a buffer, we can avoid a bunch of small allocations that the previous implementation did. Based on a few small benchmarks, the performance improvement is very stark (~3x faster for strings that don't require any escaping, and ~20% faster for multi-byte utf8 strings): goos: linux goarch: amd64 pkg: github.com/vbatts/go-mtree/pkg/govis cpu: AMD Ryzen 7 7840U w/ Radeon 780M Graphics │ before │ after │ │ sec/op │ sec/op vs base │ Unvis/NoChange-16 1501.0n ± 0% 497.7n ± 1% -66.84% (p=0.000 n=10) Unvis/Binary-16 1317.5n ± 3% 934.9n ± 9% -29.04% (p=0.000 n=10) Unvis/ASCII-16 1325.5n ± 1% 616.8n ± 1% -53.47% (p=0.000 n=10) Unvis/German-16 1884.5n ± 1% 986.9n ± 2% -47.63% (p=0.000 n=10) Unvis/Russian-16 4.636µ ± 1% 3.796µ ± 1% -18.11% (p=0.000 n=10) Unvis/Japanese-16 3.453µ ± 1% 2.867µ ± 1% -16.99% (p=0.000 n=10) geomean 2.072µ 1.206µ -41.77% Signed-off-by: Aleksa Sarai <cyphar@cyphar.com>
This commit is contained in:
parent
01d93a93e2
commit
70d3b19776
2 changed files with 107 additions and 55 deletions
|
@ -22,6 +22,7 @@ import (
|
||||||
"errors"
|
"errors"
|
||||||
"fmt"
|
"fmt"
|
||||||
"strconv"
|
"strconv"
|
||||||
|
"strings"
|
||||||
"unicode"
|
"unicode"
|
||||||
)
|
)
|
||||||
|
|
||||||
|
@ -34,6 +35,7 @@ var (
|
||||||
|
|
||||||
// unvisParser stores the current state of the token parser.
|
// unvisParser stores the current state of the token parser.
|
||||||
type unvisParser struct {
|
type unvisParser struct {
|
||||||
|
output *strings.Builder
|
||||||
tokens []rune
|
tokens []rune
|
||||||
idx int
|
idx int
|
||||||
flags VisFlag
|
flags VisFlag
|
||||||
|
@ -41,10 +43,18 @@ type unvisParser struct {
|
||||||
|
|
||||||
// Input resets the parser with a new input string.
|
// Input resets the parser with a new input string.
|
||||||
func (p *unvisParser) Input(input string) {
|
func (p *unvisParser) Input(input string) {
|
||||||
|
p.output = new(strings.Builder)
|
||||||
|
p.output.Grow(len(input)) // the output will be at most input-sized
|
||||||
|
|
||||||
p.tokens = []rune(input)
|
p.tokens = []rune(input)
|
||||||
p.idx = 0
|
p.idx = 0
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Output returns the internal [strings.Builder].
|
||||||
|
func (p *unvisParser) Output() *strings.Builder {
|
||||||
|
return p.output
|
||||||
|
}
|
||||||
|
|
||||||
// Step moves the index to the next character.
|
// Step moves the index to the next character.
|
||||||
func (p *unvisParser) Step() {
|
func (p *unvisParser) Step() {
|
||||||
p.idx++
|
p.idx++
|
||||||
|
@ -74,6 +84,7 @@ func (p *unvisParser) End() bool {
|
||||||
|
|
||||||
func newParser(flags VisFlag) *unvisParser {
|
func newParser(flags VisFlag) *unvisParser {
|
||||||
return &unvisParser{
|
return &unvisParser{
|
||||||
|
output: nil,
|
||||||
tokens: nil,
|
tokens: nil,
|
||||||
idx: 0,
|
idx: 0,
|
||||||
flags: flags,
|
flags: flags,
|
||||||
|
@ -95,62 +106,57 @@ func newParser(flags VisFlag) *unvisParser {
|
||||||
// <escape-hex> ::= [0-9a-f] [0-9a-f]
|
// <escape-hex> ::= [0-9a-f] [0-9a-f]
|
||||||
// <escape-octal> ::= [0-7] ([0-7] ([0-7])?)?
|
// <escape-octal> ::= [0-7] ([0-7] ([0-7])?)?
|
||||||
|
|
||||||
func (p *unvisParser) plainRune() ([]byte, error) {
|
func (p *unvisParser) plainRune() error {
|
||||||
ch, err := p.Next()
|
ch, err := p.Next()
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return nil, fmt.Errorf("plain rune: %w", err)
|
return fmt.Errorf("plain rune: %w", err)
|
||||||
}
|
}
|
||||||
return []byte(string(ch)), nil
|
_, err = p.output.WriteRune(ch)
|
||||||
|
return err
|
||||||
}
|
}
|
||||||
|
|
||||||
func (p *unvisParser) escapeCStyle() ([]byte, error) {
|
func (p *unvisParser) escapeCStyle() error {
|
||||||
ch, err := p.Next()
|
ch, err := p.Next()
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return nil, fmt.Errorf("escape cstyle: %w", err)
|
return fmt.Errorf("escape cstyle: %w", err)
|
||||||
}
|
}
|
||||||
|
|
||||||
output := ""
|
|
||||||
switch ch {
|
switch ch {
|
||||||
case 'n':
|
case 'n':
|
||||||
output = "\n"
|
return p.output.WriteByte('\n')
|
||||||
case 'r':
|
case 'r':
|
||||||
output = "\r"
|
return p.output.WriteByte('\r')
|
||||||
case 'b':
|
case 'b':
|
||||||
output = "\b"
|
return p.output.WriteByte('\b')
|
||||||
case 'a':
|
case 'a':
|
||||||
output = "\x07"
|
return p.output.WriteByte('\x07')
|
||||||
case 'v':
|
case 'v':
|
||||||
output = "\v"
|
return p.output.WriteByte('\v')
|
||||||
case 't':
|
case 't':
|
||||||
output = "\t"
|
return p.output.WriteByte('\t')
|
||||||
case 'f':
|
case 'f':
|
||||||
output = "\f"
|
return p.output.WriteByte('\f')
|
||||||
case 's':
|
case 's':
|
||||||
output = " "
|
return p.output.WriteByte(' ')
|
||||||
case 'E':
|
case 'E':
|
||||||
output = "\x1b"
|
return p.output.WriteByte('\x1b')
|
||||||
case '\n':
|
case '\n', '$':
|
||||||
// Hidden newline.
|
// Hidden newline or marker.
|
||||||
case '$':
|
return nil
|
||||||
// Hidden marker.
|
|
||||||
default:
|
|
||||||
// XXX: We should probably allow falling through and return "\" here...
|
|
||||||
return nil, fmt.Errorf("escape cstyle: %w %q", errUnknownEscapeChar, ch)
|
|
||||||
}
|
}
|
||||||
|
// XXX: We should probably allow falling through and return "\" here...
|
||||||
return []byte(output), nil
|
return fmt.Errorf("escape cstyle: %w %q", errUnknownEscapeChar, ch)
|
||||||
}
|
}
|
||||||
|
|
||||||
func (p *unvisParser) escapeDigits(base int, force bool) ([]byte, error) {
|
func (p *unvisParser) escapeDigits(base int, force bool) error {
|
||||||
var code int
|
var code int
|
||||||
|
|
||||||
for i := int(0xFF); i > 0; i /= base {
|
for i := int(0xFF); i > 0; i /= base {
|
||||||
ch, err := p.Peek()
|
ch, err := p.Peek()
|
||||||
if err != nil {
|
if err != nil {
|
||||||
if !force && i != 0xFF {
|
if !force && i != 0xFF {
|
||||||
break
|
break
|
||||||
}
|
}
|
||||||
return nil, fmt.Errorf("escape base %d: %w", base, err)
|
return fmt.Errorf("escape base %d: %w", base, err)
|
||||||
}
|
}
|
||||||
|
|
||||||
digit, err := strconv.ParseInt(string(ch), base, 8)
|
digit, err := strconv.ParseInt(string(ch), base, 8)
|
||||||
|
@ -158,45 +164,40 @@ func (p *unvisParser) escapeDigits(base int, force bool) ([]byte, error) {
|
||||||
if !force && i != 0xFF {
|
if !force && i != 0xFF {
|
||||||
break
|
break
|
||||||
}
|
}
|
||||||
return nil, fmt.Errorf("escape base %d: %w %q: %w", base, errParseDigit, ch, err)
|
return fmt.Errorf("escape base %d: %w %q: %w", base, errParseDigit, ch, err)
|
||||||
}
|
}
|
||||||
|
|
||||||
code = (code * base) + int(digit)
|
code = (code * base) + int(digit)
|
||||||
p.Step() // only consume token if we use it (length is variable)
|
p.Step() // only consume token if we use it (length is variable)
|
||||||
}
|
}
|
||||||
|
|
||||||
if code > unicode.MaxLatin1 {
|
if code > unicode.MaxLatin1 {
|
||||||
return nil, fmt.Errorf("escape base %d: code %+.2x %w", base, code, errOutsideLatin1)
|
return fmt.Errorf("escape base %d: code %+.2x %w", base, code, errOutsideLatin1)
|
||||||
}
|
}
|
||||||
|
return p.output.WriteByte(byte(code))
|
||||||
char := byte(code & 0xFF)
|
|
||||||
return []byte{char}, nil
|
|
||||||
}
|
}
|
||||||
|
|
||||||
func (p *unvisParser) escapeCtrl(mask byte) ([]byte, error) {
|
func (p *unvisParser) escapeCtrl(mask byte) error {
|
||||||
ch, err := p.Next()
|
ch, err := p.Next()
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return nil, fmt.Errorf("escape ctrl: %w", err)
|
return fmt.Errorf("escape ctrl: %w", err)
|
||||||
}
|
}
|
||||||
if ch > unicode.MaxLatin1 {
|
if ch > unicode.MaxLatin1 {
|
||||||
return nil, fmt.Errorf("escape ctrl: code %q %w", ch, errOutsideLatin1)
|
return fmt.Errorf("escape ctrl: code %q %w", ch, errOutsideLatin1)
|
||||||
}
|
}
|
||||||
|
|
||||||
char := byte(ch) & 0x1f
|
char := byte(ch) & 0x1f
|
||||||
if ch == '?' {
|
if ch == '?' {
|
||||||
char = 0x7f
|
char = 0x7f
|
||||||
}
|
}
|
||||||
return []byte{mask | char}, nil
|
return p.output.WriteByte(mask | char)
|
||||||
}
|
}
|
||||||
|
|
||||||
func (p *unvisParser) escapeMeta() ([]byte, error) {
|
func (p *unvisParser) escapeMeta() error {
|
||||||
ch, err := p.Next()
|
ch, err := p.Next()
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return nil, fmt.Errorf("escape meta: %w", err)
|
return fmt.Errorf("escape meta: %w", err)
|
||||||
}
|
}
|
||||||
|
|
||||||
mask := byte(0x80)
|
mask := byte(0x80)
|
||||||
|
|
||||||
switch ch {
|
switch ch {
|
||||||
case '^':
|
case '^':
|
||||||
// The same as "\^..." except we apply a mask.
|
// The same as "\^..." except we apply a mask.
|
||||||
|
@ -205,28 +206,28 @@ func (p *unvisParser) escapeMeta() ([]byte, error) {
|
||||||
case '-':
|
case '-':
|
||||||
ch, err := p.Next()
|
ch, err := p.Next()
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return nil, fmt.Errorf("escape meta1: %w", err)
|
return fmt.Errorf("escape meta1: %w", err)
|
||||||
}
|
}
|
||||||
if ch > unicode.MaxLatin1 {
|
if ch > unicode.MaxLatin1 {
|
||||||
return nil, fmt.Errorf("escape meta1: code %q %w", ch, errOutsideLatin1)
|
return fmt.Errorf("escape meta1: code %q %w", ch, errOutsideLatin1)
|
||||||
}
|
}
|
||||||
// Add mask to character.
|
// Add mask to character.
|
||||||
return []byte{mask | byte(ch)}, nil
|
return p.output.WriteByte(mask | byte(ch))
|
||||||
}
|
}
|
||||||
|
|
||||||
return nil, fmt.Errorf("escape meta: %w %q", errUnknownEscapeChar, ch)
|
return fmt.Errorf("escape meta: %w %q", errUnknownEscapeChar, ch)
|
||||||
}
|
}
|
||||||
|
|
||||||
func (p *unvisParser) escapeSequence() ([]byte, error) {
|
func (p *unvisParser) escapeSequence() error {
|
||||||
ch, err := p.Peek()
|
ch, err := p.Peek()
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return nil, fmt.Errorf("escape sequence: %w", err)
|
return fmt.Errorf("escape sequence: %w", err)
|
||||||
}
|
}
|
||||||
|
|
||||||
switch ch {
|
switch ch {
|
||||||
case '\\':
|
case '\\':
|
||||||
p.Step()
|
p.Step()
|
||||||
return []byte("\\"), nil
|
return p.output.WriteByte('\\')
|
||||||
|
|
||||||
case '0', '1', '2', '3', '4', '5', '6', '7':
|
case '0', '1', '2', '3', '4', '5', '6', '7':
|
||||||
return p.escapeDigits(8, false)
|
return p.escapeDigits(8, false)
|
||||||
|
@ -248,10 +249,10 @@ func (p *unvisParser) escapeSequence() ([]byte, error) {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
func (p *unvisParser) element() ([]byte, error) {
|
func (p *unvisParser) element() error {
|
||||||
ch, err := p.Peek()
|
ch, err := p.Peek()
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return nil, err
|
return err
|
||||||
}
|
}
|
||||||
|
|
||||||
switch ch {
|
switch ch {
|
||||||
|
@ -271,15 +272,12 @@ func (p *unvisParser) element() ([]byte, error) {
|
||||||
|
|
||||||
func (p *unvisParser) unvis(input string) (string, error) {
|
func (p *unvisParser) unvis(input string) (string, error) {
|
||||||
p.Input(input)
|
p.Input(input)
|
||||||
var output []byte
|
|
||||||
for !p.End() {
|
for !p.End() {
|
||||||
ch, err := p.element()
|
if err := p.element(); err != nil {
|
||||||
if err != nil {
|
|
||||||
return "", err
|
return "", err
|
||||||
}
|
}
|
||||||
output = append(output, ch...)
|
|
||||||
}
|
}
|
||||||
return string(output), nil
|
return p.Output().String(), nil
|
||||||
}
|
}
|
||||||
|
|
||||||
// Unvis takes a string formatted with the given Vis flags (though only the
|
// Unvis takes a string formatted with the given Vis flags (though only the
|
||||||
|
|
|
@ -19,6 +19,7 @@
|
||||||
package govis
|
package govis
|
||||||
|
|
||||||
import (
|
import (
|
||||||
|
"crypto/rand"
|
||||||
"strconv"
|
"strconv"
|
||||||
"testing"
|
"testing"
|
||||||
|
|
||||||
|
@ -172,3 +173,56 @@ func TestUnvisUnicode(t *testing.T) {
|
||||||
})
|
})
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func BenchmarkUnvis(b *testing.B) {
|
||||||
|
doBench := func(b *testing.B, text string) {
|
||||||
|
encoded, err := Vis(text, DefaultVisFlags)
|
||||||
|
require.NoErrorf(b, err, "vis(%q)", text)
|
||||||
|
|
||||||
|
decoded, err := Unvis(encoded, DefaultVisFlags)
|
||||||
|
require.NoErrorf(b, err, "unvis(vis(%q) = %q)", text, encoded)
|
||||||
|
require.Equalf(b, text, decoded, "unvis(vis(%q) = %q)", text, encoded)
|
||||||
|
|
||||||
|
for b.Loop() {
|
||||||
|
_, _ = Unvis(encoded, DefaultVisFlags)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
b.Run("NoChange", func(b *testing.B) {
|
||||||
|
text := "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789"
|
||||||
|
doBench(b, text)
|
||||||
|
})
|
||||||
|
|
||||||
|
b.Run("Binary", func(b *testing.B) {
|
||||||
|
var data [32]byte
|
||||||
|
n, err := rand.Read(data[:])
|
||||||
|
require.NoError(b, err, "rand.Read")
|
||||||
|
require.Equal(b, len(data), n, "rand.Read len return")
|
||||||
|
|
||||||
|
text := string(data[:])
|
||||||
|
doBench(b, text)
|
||||||
|
})
|
||||||
|
|
||||||
|
// The rest of these test strings come from a set of test strings collated
|
||||||
|
// in <https://www.w3.org/2001/06/utf-8-test/quickbrown.html>.
|
||||||
|
|
||||||
|
b.Run("ASCII", func(b *testing.B) {
|
||||||
|
text := "The quick brown fox jumps over the lazy dog."
|
||||||
|
doBench(b, text)
|
||||||
|
})
|
||||||
|
|
||||||
|
b.Run("German", func(b *testing.B) {
|
||||||
|
text := "Falsches Üben von Xylophonmusik quält jeden größeren Zwerg"
|
||||||
|
doBench(b, text)
|
||||||
|
})
|
||||||
|
|
||||||
|
b.Run("Russian", func(b *testing.B) {
|
||||||
|
text := "В чащах юга жил бы цитрус? Да, но фальшивый экземпляр!"
|
||||||
|
doBench(b, text)
|
||||||
|
})
|
||||||
|
|
||||||
|
b.Run("Japanese", func(b *testing.B) {
|
||||||
|
text := "いろはにほへとちりぬるをイロハニホヘトチリヌルヲ"
|
||||||
|
doBench(b, text)
|
||||||
|
})
|
||||||
|
}
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue