From 47086b06548d97e3feb9db1f3802b16639e274de Mon Sep 17 00:00:00 2001 From: Aleksa Sarai Date: Mon, 22 Sep 2025 02:51:50 +1000 Subject: [PATCH] vis: improve performance by reducing allocations MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit By avoiding lots of small string allocations and reallocations when appending to the output buffer, we can get a pretty decent performance improvement (~6x for strings that do not require escaping, and ~2x for most other multi-byte utf8 strings). goos: linux goarch: amd64 pkg: github.com/vbatts/go-mtree/pkg/govis cpu: AMD Ryzen 7 7840U w/ Radeon 780M Graphics │ before │ after │ │ sec/op │ sec/op vs base │ Vis/NoChange-16 2372.5n ± 2% 379.1n ± 1% -84.02% (p=0.000 n=10) Vis/Binary-16 2.104µ ± 8% 1.319µ ± 8% -37.35% (p=0.000 n=10) Vis/ASCII-16 2070.0n ± 1% 737.3n ± 0% -64.38% (p=0.000 n=10) Vis/German-16 3.380µ ± 1% 1.181µ ± 2% -65.04% (p=0.000 n=10) Vis/Russian-16 10.927µ ± 2% 5.293µ ± 2% -51.56% (p=0.000 n=10) Vis/Japanese-16 7.489µ ± 1% 3.990µ ± 0% -46.72% (p=0.000 n=10) geomean 3.767µ 1.447µ -61.58% In theory we could get more performance if switch away from fmt.Sprintf, but the %.N handling would be a little annoying to implement and so we can punt on that for now. Signed-off-by: Aleksa Sarai --- pkg/govis/vis.go | 66 +++++++++++++++++++++++++------------------ pkg/govis/vis_test.go | 50 ++++++++++++++++++++++++++++++++ 2 files changed, 88 insertions(+), 28 deletions(-) diff --git a/pkg/govis/vis.go b/pkg/govis/vis.go index 620b49a..99bb092 100644 --- a/pkg/govis/vis.go +++ b/pkg/govis/vis.go @@ -20,6 +20,7 @@ package govis import ( "fmt" + "strings" "unicode" ) @@ -59,14 +60,15 @@ func isgraph(ch rune) bool { // the plus side this is actually a benefit on the encoding side (it will // always work with the simple unvis(3) implementation). It also means that we // don't have to worry about different multi-byte encodings. -func vis(b byte, flag VisFlag) string { +func vis(output *strings.Builder, b byte, flag VisFlag) { // Treat the single-byte character as a rune. ch := rune(b) // XXX: This is quite a horrible thing to support. if flag&VisHTTPStyle == VisHTTPStyle { if !ishttp(ch) { - return "%" + fmt.Sprintf("%.2X", ch) + _, _ = fmt.Fprintf(output, "%%%.2X", ch) + return } } @@ -86,35 +88,44 @@ func vis(b byte, flag VisFlag) string { (flag&VisNewline != VisNewline && ch == '\n') || (flag&VisSafe != 0 && isunsafe(ch)) { - encoded := string(ch) if ch == '\\' && flag&VisNoSlash == 0 { - encoded += "\\" + _ = output.WriteByte('\\') } - return encoded + _ = output.WriteByte(b) + return } // Try to use C-style escapes first. if flag&VisCStyle == VisCStyle { switch ch { case ' ': - return "\\s" + _, _ = output.WriteString("\\s") + return case '\n': - return "\\n" + _, _ = output.WriteString("\\n") + return case '\r': - return "\\r" + _, _ = output.WriteString("\\r") + return case '\b': - return "\\b" + _, _ = output.WriteString("\\b") + return case '\a': - return "\\a" + _, _ = output.WriteString("\\a") + return case '\v': - return "\\v" + _, _ = output.WriteString("\\v") + return case '\t': - return "\\t" + _, _ = output.WriteString("\\t") + return case '\f': - return "\\f" + _, _ = output.WriteString("\\f") + return case '\x00': // Output octal just to be safe. - return "\\000" + _, _ = output.WriteString("\\000") + return } } @@ -123,7 +134,8 @@ func vis(b byte, flag VisFlag) string { // encoded as octal. if flag&VisOctal == VisOctal || isgraph(ch) || ch&0x7f == ' ' { // Always output three-character octal just to be safe. - return fmt.Sprintf("\\%.3o", ch) + _, _ = fmt.Fprintf(output, "\\%.3o", ch) + return } // Now we have to output meta or ctrl escapes. As far as I can tell, this @@ -131,30 +143,28 @@ func vis(b byte, flag VisFlag) string { // copied from the original vis(3) implementation. Hopefully nobody // actually relies on this (octal and hex are better). - encoded := "" if flag&VisNoSlash == 0 { - encoded += "\\" + _ = output.WriteByte('\\') } // Meta characters have 0x80 set, but are otherwise identical to control // characters. if b&0x80 != 0 { b &= 0x7f - encoded += "M" + _ = output.WriteByte('M') } if unicode.IsControl(rune(b)) { - encoded += "^" + _ = output.WriteByte('^') if b == 0x7f { - encoded += "?" + _ = output.WriteByte('?') } else { - encoded += fmt.Sprintf("%c", b+'@') + _ = output.WriteByte(b + '@') } } else { - encoded += fmt.Sprintf("-%c", b) + _ = output.WriteByte('-') + _ = output.WriteByte(b) } - - return encoded } // Vis encodes the provided string to a BSD-compatible encoding using BSD's @@ -164,10 +174,10 @@ func Vis(src string, flags VisFlag) (string, error) { if unknown := flags &^ visMask; unknown != 0 { return "", unknownVisFlagsError{flags: flags} } - - output := "" + var output strings.Builder + output.Grow(len(src)) // vis() will always take up at least len(src) bytes for _, ch := range []byte(src) { - output += vis(ch, flags) + vis(&output, ch, flags) } - return output, nil + return output.String(), nil } diff --git a/pkg/govis/vis_test.go b/pkg/govis/vis_test.go index 96c3672..f7b7a32 100644 --- a/pkg/govis/vis_test.go +++ b/pkg/govis/vis_test.go @@ -19,6 +19,7 @@ package govis import ( + "crypto/rand" "fmt" "testing" @@ -125,3 +126,52 @@ func TestVisChanged(t *testing.T) { }) } } + +func BenchmarkVis(b *testing.B) { + doBench := func(b *testing.B, text string) { + _, err := Vis(text, DefaultVisFlags) + require.NoErrorf(b, err, "vis(%q)", text) + + for b.Loop() { + _, _ = Vis(text, DefaultVisFlags) + } + } + + b.Run("NoChange", func(b *testing.B) { + text := "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789" + doBench(b, text) + }) + + b.Run("Binary", func(b *testing.B) { + var data [32]byte + n, err := rand.Read(data[:]) + require.NoError(b, err, "rand.Read") + require.Equal(b, len(data), n, "rand.Read len return") + + text := string(data[:]) + doBench(b, text) + }) + + // The rest of these test strings come from a set of test strings collated + // in . + + b.Run("ASCII", func(b *testing.B) { + text := "The quick brown fox jumps over the lazy dog." + doBench(b, text) + }) + + b.Run("German", func(b *testing.B) { + text := "Falsches Üben von Xylophonmusik quält jeden größeren Zwerg" + doBench(b, text) + }) + + b.Run("Russian", func(b *testing.B) { + text := "В чащах юга жил бы цитрус? Да, но фальшивый экземпляр!" + doBench(b, text) + }) + + b.Run("Japanese", func(b *testing.B) { + text := "いろはにほへとちりぬるをイロハニホヘトチリヌルヲ" + doBench(b, text) + }) +}