1
0
Fork 0
mirror of https://github.com/vbatts/go-mtree.git synced 2025-10-03 20:21:01 +00:00

vis: improve performance by reducing allocations

By avoiding lots of small string allocations and reallocations when
appending to the output buffer, we can get a pretty decent performance
improvement (~6x for strings that do not require escaping, and ~2x for
most other multi-byte utf8 strings).

    goos: linux
    goarch: amd64
    pkg: github.com/vbatts/go-mtree/pkg/govis
    cpu: AMD Ryzen 7 7840U w/ Radeon  780M Graphics
                    │    before    │                after                │
                    │    sec/op    │   sec/op     vs base                │
    Vis/NoChange-16   2372.5n ± 2%   379.1n ± 1%  -84.02% (p=0.000 n=10)
    Vis/Binary-16      2.104µ ± 8%   1.319µ ± 8%  -37.35% (p=0.000 n=10)
    Vis/ASCII-16      2070.0n ± 1%   737.3n ± 0%  -64.38% (p=0.000 n=10)
    Vis/German-16      3.380µ ± 1%   1.181µ ± 2%  -65.04% (p=0.000 n=10)
    Vis/Russian-16    10.927µ ± 2%   5.293µ ± 2%  -51.56% (p=0.000 n=10)
    Vis/Japanese-16    7.489µ ± 1%   3.990µ ± 0%  -46.72% (p=0.000 n=10)
    geomean            3.767µ        1.447µ       -61.58%

In theory we could get more performance if switch away from fmt.Sprintf,
but the %.N handling would be a little annoying to implement and so we
can punt on that for now.

Signed-off-by: Aleksa Sarai <cyphar@cyphar.com>
This commit is contained in:
Aleksa Sarai 2025-09-22 02:51:50 +10:00
parent 70d3b19776
commit 47086b0654
No known key found for this signature in database
GPG key ID: 2897FAD2B7E9446F
2 changed files with 88 additions and 28 deletions

View file

@ -20,6 +20,7 @@ package govis
import (
"fmt"
"strings"
"unicode"
)
@ -59,14 +60,15 @@ func isgraph(ch rune) bool {
// the plus side this is actually a benefit on the encoding side (it will
// always work with the simple unvis(3) implementation). It also means that we
// don't have to worry about different multi-byte encodings.
func vis(b byte, flag VisFlag) string {
func vis(output *strings.Builder, b byte, flag VisFlag) {
// Treat the single-byte character as a rune.
ch := rune(b)
// XXX: This is quite a horrible thing to support.
if flag&VisHTTPStyle == VisHTTPStyle {
if !ishttp(ch) {
return "%" + fmt.Sprintf("%.2X", ch)
_, _ = fmt.Fprintf(output, "%%%.2X", ch)
return
}
}
@ -86,35 +88,44 @@ func vis(b byte, flag VisFlag) string {
(flag&VisNewline != VisNewline && ch == '\n') ||
(flag&VisSafe != 0 && isunsafe(ch)) {
encoded := string(ch)
if ch == '\\' && flag&VisNoSlash == 0 {
encoded += "\\"
_ = output.WriteByte('\\')
}
return encoded
_ = output.WriteByte(b)
return
}
// Try to use C-style escapes first.
if flag&VisCStyle == VisCStyle {
switch ch {
case ' ':
return "\\s"
_, _ = output.WriteString("\\s")
return
case '\n':
return "\\n"
_, _ = output.WriteString("\\n")
return
case '\r':
return "\\r"
_, _ = output.WriteString("\\r")
return
case '\b':
return "\\b"
_, _ = output.WriteString("\\b")
return
case '\a':
return "\\a"
_, _ = output.WriteString("\\a")
return
case '\v':
return "\\v"
_, _ = output.WriteString("\\v")
return
case '\t':
return "\\t"
_, _ = output.WriteString("\\t")
return
case '\f':
return "\\f"
_, _ = output.WriteString("\\f")
return
case '\x00':
// Output octal just to be safe.
return "\\000"
_, _ = output.WriteString("\\000")
return
}
}
@ -123,7 +134,8 @@ func vis(b byte, flag VisFlag) string {
// encoded as octal.
if flag&VisOctal == VisOctal || isgraph(ch) || ch&0x7f == ' ' {
// Always output three-character octal just to be safe.
return fmt.Sprintf("\\%.3o", ch)
_, _ = fmt.Fprintf(output, "\\%.3o", ch)
return
}
// Now we have to output meta or ctrl escapes. As far as I can tell, this
@ -131,30 +143,28 @@ func vis(b byte, flag VisFlag) string {
// copied from the original vis(3) implementation. Hopefully nobody
// actually relies on this (octal and hex are better).
encoded := ""
if flag&VisNoSlash == 0 {
encoded += "\\"
_ = output.WriteByte('\\')
}
// Meta characters have 0x80 set, but are otherwise identical to control
// characters.
if b&0x80 != 0 {
b &= 0x7f
encoded += "M"
_ = output.WriteByte('M')
}
if unicode.IsControl(rune(b)) {
encoded += "^"
_ = output.WriteByte('^')
if b == 0x7f {
encoded += "?"
_ = output.WriteByte('?')
} else {
encoded += fmt.Sprintf("%c", b+'@')
_ = output.WriteByte(b + '@')
}
} else {
encoded += fmt.Sprintf("-%c", b)
_ = output.WriteByte('-')
_ = output.WriteByte(b)
}
return encoded
}
// Vis encodes the provided string to a BSD-compatible encoding using BSD's
@ -164,10 +174,10 @@ func Vis(src string, flags VisFlag) (string, error) {
if unknown := flags &^ visMask; unknown != 0 {
return "", unknownVisFlagsError{flags: flags}
}
output := ""
var output strings.Builder
output.Grow(len(src)) // vis() will always take up at least len(src) bytes
for _, ch := range []byte(src) {
output += vis(ch, flags)
vis(&output, ch, flags)
}
return output, nil
return output.String(), nil
}

View file

@ -19,6 +19,7 @@
package govis
import (
"crypto/rand"
"fmt"
"testing"
@ -125,3 +126,52 @@ func TestVisChanged(t *testing.T) {
})
}
}
func BenchmarkVis(b *testing.B) {
doBench := func(b *testing.B, text string) {
_, err := Vis(text, DefaultVisFlags)
require.NoErrorf(b, err, "vis(%q)", text)
for b.Loop() {
_, _ = Vis(text, DefaultVisFlags)
}
}
b.Run("NoChange", func(b *testing.B) {
text := "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789"
doBench(b, text)
})
b.Run("Binary", func(b *testing.B) {
var data [32]byte
n, err := rand.Read(data[:])
require.NoError(b, err, "rand.Read")
require.Equal(b, len(data), n, "rand.Read len return")
text := string(data[:])
doBench(b, text)
})
// The rest of these test strings come from a set of test strings collated
// in <https://www.w3.org/2001/06/utf-8-test/quickbrown.html>.
b.Run("ASCII", func(b *testing.B) {
text := "The quick brown fox jumps over the lazy dog."
doBench(b, text)
})
b.Run("German", func(b *testing.B) {
text := "Falsches Üben von Xylophonmusik quält jeden größeren Zwerg"
doBench(b, text)
})
b.Run("Russian", func(b *testing.B) {
text := "В чащах юга жил бы цитрус? Да, но фальшивый экземпляр!"
doBench(b, text)
})
b.Run("Japanese", func(b *testing.B) {
text := "いろはにほへとちりぬるをイロハニホヘトチリヌルヲ"
doBench(b, text)
})
}