mirror of
https://github.com/vbatts/go-mtree.git
synced 2025-10-03 20:21:01 +00:00
vis: improve performance by reducing allocations
By avoiding lots of small string allocations and reallocations when appending to the output buffer, we can get a pretty decent performance improvement (~6x for strings that do not require escaping, and ~2x for most other multi-byte utf8 strings). goos: linux goarch: amd64 pkg: github.com/vbatts/go-mtree/pkg/govis cpu: AMD Ryzen 7 7840U w/ Radeon 780M Graphics │ before │ after │ │ sec/op │ sec/op vs base │ Vis/NoChange-16 2372.5n ± 2% 379.1n ± 1% -84.02% (p=0.000 n=10) Vis/Binary-16 2.104µ ± 8% 1.319µ ± 8% -37.35% (p=0.000 n=10) Vis/ASCII-16 2070.0n ± 1% 737.3n ± 0% -64.38% (p=0.000 n=10) Vis/German-16 3.380µ ± 1% 1.181µ ± 2% -65.04% (p=0.000 n=10) Vis/Russian-16 10.927µ ± 2% 5.293µ ± 2% -51.56% (p=0.000 n=10) Vis/Japanese-16 7.489µ ± 1% 3.990µ ± 0% -46.72% (p=0.000 n=10) geomean 3.767µ 1.447µ -61.58% In theory we could get more performance if switch away from fmt.Sprintf, but the %.N handling would be a little annoying to implement and so we can punt on that for now. Signed-off-by: Aleksa Sarai <cyphar@cyphar.com>
This commit is contained in:
parent
70d3b19776
commit
47086b0654
2 changed files with 88 additions and 28 deletions
|
@ -20,6 +20,7 @@ package govis
|
||||||
|
|
||||||
import (
|
import (
|
||||||
"fmt"
|
"fmt"
|
||||||
|
"strings"
|
||||||
"unicode"
|
"unicode"
|
||||||
)
|
)
|
||||||
|
|
||||||
|
@ -59,14 +60,15 @@ func isgraph(ch rune) bool {
|
||||||
// the plus side this is actually a benefit on the encoding side (it will
|
// the plus side this is actually a benefit on the encoding side (it will
|
||||||
// always work with the simple unvis(3) implementation). It also means that we
|
// always work with the simple unvis(3) implementation). It also means that we
|
||||||
// don't have to worry about different multi-byte encodings.
|
// don't have to worry about different multi-byte encodings.
|
||||||
func vis(b byte, flag VisFlag) string {
|
func vis(output *strings.Builder, b byte, flag VisFlag) {
|
||||||
// Treat the single-byte character as a rune.
|
// Treat the single-byte character as a rune.
|
||||||
ch := rune(b)
|
ch := rune(b)
|
||||||
|
|
||||||
// XXX: This is quite a horrible thing to support.
|
// XXX: This is quite a horrible thing to support.
|
||||||
if flag&VisHTTPStyle == VisHTTPStyle {
|
if flag&VisHTTPStyle == VisHTTPStyle {
|
||||||
if !ishttp(ch) {
|
if !ishttp(ch) {
|
||||||
return "%" + fmt.Sprintf("%.2X", ch)
|
_, _ = fmt.Fprintf(output, "%%%.2X", ch)
|
||||||
|
return
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -86,35 +88,44 @@ func vis(b byte, flag VisFlag) string {
|
||||||
(flag&VisNewline != VisNewline && ch == '\n') ||
|
(flag&VisNewline != VisNewline && ch == '\n') ||
|
||||||
(flag&VisSafe != 0 && isunsafe(ch)) {
|
(flag&VisSafe != 0 && isunsafe(ch)) {
|
||||||
|
|
||||||
encoded := string(ch)
|
|
||||||
if ch == '\\' && flag&VisNoSlash == 0 {
|
if ch == '\\' && flag&VisNoSlash == 0 {
|
||||||
encoded += "\\"
|
_ = output.WriteByte('\\')
|
||||||
}
|
}
|
||||||
return encoded
|
_ = output.WriteByte(b)
|
||||||
|
return
|
||||||
}
|
}
|
||||||
|
|
||||||
// Try to use C-style escapes first.
|
// Try to use C-style escapes first.
|
||||||
if flag&VisCStyle == VisCStyle {
|
if flag&VisCStyle == VisCStyle {
|
||||||
switch ch {
|
switch ch {
|
||||||
case ' ':
|
case ' ':
|
||||||
return "\\s"
|
_, _ = output.WriteString("\\s")
|
||||||
|
return
|
||||||
case '\n':
|
case '\n':
|
||||||
return "\\n"
|
_, _ = output.WriteString("\\n")
|
||||||
|
return
|
||||||
case '\r':
|
case '\r':
|
||||||
return "\\r"
|
_, _ = output.WriteString("\\r")
|
||||||
|
return
|
||||||
case '\b':
|
case '\b':
|
||||||
return "\\b"
|
_, _ = output.WriteString("\\b")
|
||||||
|
return
|
||||||
case '\a':
|
case '\a':
|
||||||
return "\\a"
|
_, _ = output.WriteString("\\a")
|
||||||
|
return
|
||||||
case '\v':
|
case '\v':
|
||||||
return "\\v"
|
_, _ = output.WriteString("\\v")
|
||||||
|
return
|
||||||
case '\t':
|
case '\t':
|
||||||
return "\\t"
|
_, _ = output.WriteString("\\t")
|
||||||
|
return
|
||||||
case '\f':
|
case '\f':
|
||||||
return "\\f"
|
_, _ = output.WriteString("\\f")
|
||||||
|
return
|
||||||
case '\x00':
|
case '\x00':
|
||||||
// Output octal just to be safe.
|
// Output octal just to be safe.
|
||||||
return "\\000"
|
_, _ = output.WriteString("\\000")
|
||||||
|
return
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -123,7 +134,8 @@ func vis(b byte, flag VisFlag) string {
|
||||||
// encoded as octal.
|
// encoded as octal.
|
||||||
if flag&VisOctal == VisOctal || isgraph(ch) || ch&0x7f == ' ' {
|
if flag&VisOctal == VisOctal || isgraph(ch) || ch&0x7f == ' ' {
|
||||||
// Always output three-character octal just to be safe.
|
// Always output three-character octal just to be safe.
|
||||||
return fmt.Sprintf("\\%.3o", ch)
|
_, _ = fmt.Fprintf(output, "\\%.3o", ch)
|
||||||
|
return
|
||||||
}
|
}
|
||||||
|
|
||||||
// Now we have to output meta or ctrl escapes. As far as I can tell, this
|
// Now we have to output meta or ctrl escapes. As far as I can tell, this
|
||||||
|
@ -131,30 +143,28 @@ func vis(b byte, flag VisFlag) string {
|
||||||
// copied from the original vis(3) implementation. Hopefully nobody
|
// copied from the original vis(3) implementation. Hopefully nobody
|
||||||
// actually relies on this (octal and hex are better).
|
// actually relies on this (octal and hex are better).
|
||||||
|
|
||||||
encoded := ""
|
|
||||||
if flag&VisNoSlash == 0 {
|
if flag&VisNoSlash == 0 {
|
||||||
encoded += "\\"
|
_ = output.WriteByte('\\')
|
||||||
}
|
}
|
||||||
|
|
||||||
// Meta characters have 0x80 set, but are otherwise identical to control
|
// Meta characters have 0x80 set, but are otherwise identical to control
|
||||||
// characters.
|
// characters.
|
||||||
if b&0x80 != 0 {
|
if b&0x80 != 0 {
|
||||||
b &= 0x7f
|
b &= 0x7f
|
||||||
encoded += "M"
|
_ = output.WriteByte('M')
|
||||||
}
|
}
|
||||||
|
|
||||||
if unicode.IsControl(rune(b)) {
|
if unicode.IsControl(rune(b)) {
|
||||||
encoded += "^"
|
_ = output.WriteByte('^')
|
||||||
if b == 0x7f {
|
if b == 0x7f {
|
||||||
encoded += "?"
|
_ = output.WriteByte('?')
|
||||||
} else {
|
} else {
|
||||||
encoded += fmt.Sprintf("%c", b+'@')
|
_ = output.WriteByte(b + '@')
|
||||||
}
|
}
|
||||||
} else {
|
} else {
|
||||||
encoded += fmt.Sprintf("-%c", b)
|
_ = output.WriteByte('-')
|
||||||
|
_ = output.WriteByte(b)
|
||||||
}
|
}
|
||||||
|
|
||||||
return encoded
|
|
||||||
}
|
}
|
||||||
|
|
||||||
// Vis encodes the provided string to a BSD-compatible encoding using BSD's
|
// Vis encodes the provided string to a BSD-compatible encoding using BSD's
|
||||||
|
@ -164,10 +174,10 @@ func Vis(src string, flags VisFlag) (string, error) {
|
||||||
if unknown := flags &^ visMask; unknown != 0 {
|
if unknown := flags &^ visMask; unknown != 0 {
|
||||||
return "", unknownVisFlagsError{flags: flags}
|
return "", unknownVisFlagsError{flags: flags}
|
||||||
}
|
}
|
||||||
|
var output strings.Builder
|
||||||
output := ""
|
output.Grow(len(src)) // vis() will always take up at least len(src) bytes
|
||||||
for _, ch := range []byte(src) {
|
for _, ch := range []byte(src) {
|
||||||
output += vis(ch, flags)
|
vis(&output, ch, flags)
|
||||||
}
|
}
|
||||||
return output, nil
|
return output.String(), nil
|
||||||
}
|
}
|
||||||
|
|
|
@ -19,6 +19,7 @@
|
||||||
package govis
|
package govis
|
||||||
|
|
||||||
import (
|
import (
|
||||||
|
"crypto/rand"
|
||||||
"fmt"
|
"fmt"
|
||||||
"testing"
|
"testing"
|
||||||
|
|
||||||
|
@ -125,3 +126,52 @@ func TestVisChanged(t *testing.T) {
|
||||||
})
|
})
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func BenchmarkVis(b *testing.B) {
|
||||||
|
doBench := func(b *testing.B, text string) {
|
||||||
|
_, err := Vis(text, DefaultVisFlags)
|
||||||
|
require.NoErrorf(b, err, "vis(%q)", text)
|
||||||
|
|
||||||
|
for b.Loop() {
|
||||||
|
_, _ = Vis(text, DefaultVisFlags)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
b.Run("NoChange", func(b *testing.B) {
|
||||||
|
text := "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789"
|
||||||
|
doBench(b, text)
|
||||||
|
})
|
||||||
|
|
||||||
|
b.Run("Binary", func(b *testing.B) {
|
||||||
|
var data [32]byte
|
||||||
|
n, err := rand.Read(data[:])
|
||||||
|
require.NoError(b, err, "rand.Read")
|
||||||
|
require.Equal(b, len(data), n, "rand.Read len return")
|
||||||
|
|
||||||
|
text := string(data[:])
|
||||||
|
doBench(b, text)
|
||||||
|
})
|
||||||
|
|
||||||
|
// The rest of these test strings come from a set of test strings collated
|
||||||
|
// in <https://www.w3.org/2001/06/utf-8-test/quickbrown.html>.
|
||||||
|
|
||||||
|
b.Run("ASCII", func(b *testing.B) {
|
||||||
|
text := "The quick brown fox jumps over the lazy dog."
|
||||||
|
doBench(b, text)
|
||||||
|
})
|
||||||
|
|
||||||
|
b.Run("German", func(b *testing.B) {
|
||||||
|
text := "Falsches Üben von Xylophonmusik quält jeden größeren Zwerg"
|
||||||
|
doBench(b, text)
|
||||||
|
})
|
||||||
|
|
||||||
|
b.Run("Russian", func(b *testing.B) {
|
||||||
|
text := "В чащах юга жил бы цитрус? Да, но фальшивый экземпляр!"
|
||||||
|
doBench(b, text)
|
||||||
|
})
|
||||||
|
|
||||||
|
b.Run("Japanese", func(b *testing.B) {
|
||||||
|
text := "いろはにほへとちりぬるをイロハニホヘトチリヌルヲ"
|
||||||
|
doBench(b, text)
|
||||||
|
})
|
||||||
|
}
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue