1
0
Fork 0
mirror of https://github.com/vbatts/go-mtree.git synced 2025-10-04 12:31:00 +00:00
go-mtree/pkg/govis/vis_test.go
Aleksa Sarai 47086b0654
vis: improve performance by reducing allocations
By avoiding lots of small string allocations and reallocations when
appending to the output buffer, we can get a pretty decent performance
improvement (~6x for strings that do not require escaping, and ~2x for
most other multi-byte utf8 strings).

    goos: linux
    goarch: amd64
    pkg: github.com/vbatts/go-mtree/pkg/govis
    cpu: AMD Ryzen 7 7840U w/ Radeon  780M Graphics
                    │    before    │                after                │
                    │    sec/op    │   sec/op     vs base                │
    Vis/NoChange-16   2372.5n ± 2%   379.1n ± 1%  -84.02% (p=0.000 n=10)
    Vis/Binary-16      2.104µ ± 8%   1.319µ ± 8%  -37.35% (p=0.000 n=10)
    Vis/ASCII-16      2070.0n ± 1%   737.3n ± 0%  -64.38% (p=0.000 n=10)
    Vis/German-16      3.380µ ± 1%   1.181µ ± 2%  -65.04% (p=0.000 n=10)
    Vis/Russian-16    10.927µ ± 2%   5.293µ ± 2%  -51.56% (p=0.000 n=10)
    Vis/Japanese-16    7.489µ ± 1%   3.990µ ± 0%  -46.72% (p=0.000 n=10)
    geomean            3.767µ        1.447µ       -61.58%

In theory we could get more performance if switch away from fmt.Sprintf,
but the %.N handling would be a little annoying to implement and so we
can punt on that for now.

Signed-off-by: Aleksa Sarai <cyphar@cyphar.com>
2025-09-23 04:40:25 +10:00

177 lines
10 KiB
Go
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

// SPDX-License-Identifier: Apache-2.0
/*
* govis: unicode aware vis(3) encoding implementation
* Copyright (C) 2017-2025 SUSE LLC.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package govis
import (
"crypto/rand"
"fmt"
"testing"
"github.com/stretchr/testify/assert"
"github.com/stretchr/testify/require"
)
func TestVisUnchanged(t *testing.T) {
for _, test := range []struct {
name string
input string
flag VisFlag
}{
{"Empty", "", DefaultVisFlags},
{"Plain1", "helloworld", DefaultVisFlags},
{"Plain2", "THIS_IS_A_TEST1234", DefaultVisFlags},
{"Plain3", "SomeEncodingsAreCool", DefaultVisFlags},
{"Spaces", "spaces are totally safe", DefaultVisFlags &^ VisSpace},
{"Tabs", "tabs\tare\talso\tsafe!!", DefaultVisFlags &^ VisTab},
{"BasicCtrlChars", "just\a\atrustme\r\b\b!!", DefaultVisFlags | VisSafe},
} {
t.Run(test.name, func(t *testing.T) {
enc, err := Vis(test.input, test.flag)
require.NoErrorf(t, err, "vis(%q, %s)", test.input, test.flag)
assert.Equalf(t, test.input, enc, "encoding of vis(%q, %s) should be unchanged", test.input, test.flag)
})
}
}
func TestVisFlags(t *testing.T) {
for idx, test := range []struct {
input string
output string
flag VisFlag
}{
// Default
{"AC_Ra\u00edz_Certic\u00e1mara_S.A..pem", "AC_Ra\\M-C\\M--z_Certic\\M-C\\M-!mara_S.A..pem", 0},
{"z^i3i$\u00d3\u008anqgh5/t\u00e5<86>\u00b2kzla\\e^lv\u00df\u0093nv\u00df\u00aea|3}\u00d8\u0088\u00d6\u0084", `z^i3i$\M-C\M^S\M-B\M^Jnqgh5/t\M-C\M-%<86>\M-B\M-2kzla\\e^lv\M-C\M^_\M-B\M^Snv\M-C\M^_\M-B\M-.a|3}\M-C\M^X\M-B\M^H\M-C\M^V\M-B\M^D`, 0},
{"@?e1xs+.R_Kjo]7s8pgRP:*nXCE4{!c", "@?e1xs+.R_Kjo]7s8pgRP:*nXCE4{!c", 0},
{"62_\u00c6\u00c62\u00ae\u00b7m\u00db\u00c3r^\u00bfp\u00c6u'q\u00fbc2\u00f0u\u00b8\u00dd\u00e8v\u00ff\u00b0\u00dc\u00c2\u00f53\u00db-k\u00f2sd4\\p\u00da\u00a6\u00d3\u00eea<\u00e6s{\u00a0p\u00f0\u00ffj\u00e0\u00e8\u00b8\u00b8\u00bc\u00fcb", `62_\M-C\M^F\M-C\M^F2\M-B\M-.\M-B\M-7m\M-C\M^[\M-C\M^Cr^\M-B\M-?p\M-C\M^Fu'q\M-C\M-;c2\M-C\M-0u\M-B\M-8\M-C\M^]\M-C\M-(v\M-C\M-?\M-B\M-0\M-C\M^\\M-C\M^B\M-C\M-53\M-C\M^[-k\M-C\M-2sd4\\p\M-C\M^Z\M-B\M-&\M-C\M^S\M-C\M-.a<\M-C\M-&s{\M-B\240p\M-C\M-0\M-C\M-?j\M-C\240\M-C\M-(\M-B\M-8\M-B\M-8\M-B\M-<\M-C\M-<b`, 0},
{"\u9003\"9v1)T798|o;fly jnKX\u0489Be=", `\M-i\M^@\M^C"9v1)T798|o;fly jnKX\M-R\M^IBe=`, 0},
// VisOctal
{"", "", VisOctal},
{"\022", "\\022", VisOctal},
{"\n \t", "\\012\\040\t", VisNewline | VisSpace | VisOctal},
{"\x12\f\a\n\v\b \U00012312", "\\022\\014\\007\n\\013\\010 \\360\\222\\214\\222", VisOctal},
{"AC_Ra\u00edz_Certic\u00e1mara_S.A..pem", "AC_Ra\\303\\255z_Certic\\303\\241mara_S.A..pem", VisOctal},
{"z^i3i$\u00d3\u008anqgh5/t\u00e5<86>\u00b2kzla\\e^lv\u00df\u0093nv\u00df\u00aea|3}\u00d8\u0088\u00d6\u0084", `z^i3i$\303\223\302\212nqgh5/t\303\245<86>\302\262kzla\\e^lv\303\237\302\223nv\303\237\302\256a|3}\303\230\302\210\303\226\302\204`, VisOctal},
{"62_\u00c6\u00c62\u00ae\u00b7m\u00db\u00c3r^\u00bfp\u00c6u'q\u00fbc2\u00f0u\u00b8\u00dd\u00e8v\u00ff\u00b0\u00dc\u00c2\u00f53\u00db-k\u00f2sd4\\p\u00da\u00a6\u00d3\u00eea<\u00e6s{\u00a0p\u00f0\u00ffj\u00e0\u00e8\u00b8\u00b8\u00bc\u00fcb", `62_\303\206\303\2062\302\256\302\267m\303\233\303\203r^\302\277p\303\206u'q\303\273c2\303\260u\302\270\303\235\303\250v\303\277\302\260\303\234\303\202\303\2653\303\233-k\303\262sd4\\p\303\232\302\246\303\223\303\256a<\303\246s{\302\240p\303\260\303\277j\303\240\303\250\302\270\302\270\302\274\303\274b`, VisOctal},
{"\u9003\"9v1)T798|o;fly jnKX\u0489Be=", `\351\200\203"9v1)T798|o;fly jnKX\322\211Be=`, VisOctal},
// VisCStyle
{"\x00 \f \a \n\v\b \r \t\r", "\\000 \\f \\a \n\\v\\b \\r \t\\r", VisCStyle},
{"\t \n\v\b", "\\t \n\\v\\b", VisTab | VisCStyle},
{"\n\v\t ", "\n\\v\t\\s\\s\\s", VisSpace | VisCStyle},
{"\n \n ", "\\n \\n ", VisNewline | VisCStyle},
{"z^i3i$\u00d3\u008anqgh5/t\u00e5<86>\u00b2kzla\\e^lv\u00df\u0093nv\u00df\u00aea|3}\u00d8\u0088\u00d6\u0084", `z^i3i$\M-C\M^S\M-B\M^Jnqgh5/t\M-C\M-%<86>\M-B\M-2kzla\\e^lv\M-C\M^_\M-B\M^Snv\M-C\M^_\M-B\M-.a|3}\M-C\M^X\M-B\M^H\M-C\M^V\M-B\M^D`, VisCStyle},
{"62_\u00c6\u00c62\u00ae\u00b7m\u00db\u00c3r^\u00bfp\u00c6u'q\u00fbc2\u00f0u\u00b8\u00dd\u00e8v\u00ff\u00b0\u00dc\u00c2\u00f53\u00db-k\u00f2sd4\\p\u00da\u00a6\u00d3\u00eea<\u00e6s{\u00a0p\u00f0\u00ffj\u00e0\u00e8\u00b8\u00b8\u00bc\u00fcb", `62_\M-C\M^F\M-C\M^F2\M-B\M-.\M-B\M-7m\M-C\M^[\M-C\M^Cr^\M-B\M-?p\M-C\M^Fu'q\M-C\M-;c2\M-C\M-0u\M-B\M-8\M-C\M^]\M-C\M-(v\M-C\M-?\M-B\M-0\M-C\M^\\M-C\M^B\M-C\M-53\M-C\M^[-k\M-C\M-2sd4\\p\M-C\M^Z\M-B\M-&\M-C\M^S\M-C\M-.a<\M-C\M-&s{\M-B\240p\M-C\M-0\M-C\M-?j\M-C\240\M-C\M-(\M-B\M-8\M-B\M-8\M-B\M-<\M-C\M-<b`, VisCStyle},
{"\u9003\"9v1)T798|o;fly jnKX\u0489Be=", `\M-i\M^@\M^C"9v1)T798|o;fly\sjnKX\M-R\M^IBe=`, VisCStyle | VisSpace},
// VisSpace
{" ", `\040\040`, VisSpace},
{"\t \t", "\t\\040\t", VisSpace},
{"\\040 plenty of characters here ", `\\040\040\040\040plenty\040of\040characters\040here\040\040\040`, VisSpace},
{"Js9L\u00cd\u00b2o?4824y'$|P}FIr%mW /KL9$]~", `Js9L\M-C\M^M\M-B\M-2o?4824y'$|P}FIr%mW\040/KL9$]~`, VisWhite},
{"1\u00c6\u00abTcz+Vda?)k1%\\\"P;`po`h", `1\M-C\M^F\M-B\M-+Tcz+Vda?)k1%\\"P;` + "`po`" + `h`, VisWhite},
{"\u9003\"9v1)T798|o;fly jnKX\u0489Be=", `\M-i\M^@\M^C"9v1)T798|o;fly\040jnKX\M-R\M^IBe=`, VisSpace},
// VisTab
{"\t \v", "\\^I \\^K", VisTab},
{"\t \v", "\\011 \\013", VisTab | VisOctal},
// VisNewline
{"\t\n \v\r\n", "\t\\^J \\^K\\^M\\^J", VisNewline},
{"\t\n \v\r\n", "\t\\012 \\013\\015\\012", VisNewline | VisOctal},
// VisSafe
// VisHTTPStyle
{"\x12\f\a\n\v\b \U00012312", `%12%0C%07%0A%0B%08%20%20%F0%92%8C%92`, VisHTTPStyle},
{"1\u00c6\u00abTcz+Vda?)k1%\\\"P;`po`h", `1%C3%86%C2%ABTcz+Vda%3F)k1%25%5C%22P%3B%60po%60h`, VisHTTPStyle},
{"62_\u00c6\u00c62\u00ae\u00b7m\u00db\u00c3r^\u00bfp\u00c6u'q\u00fbc2\u00f0u\u00b8\u00dd\u00e8v\u00ff\u00b0\u00dc\u00c2\u00f53\u00db-k\u00f2sd4\\p\u00da\u00a6\u00d3\u00eea<\u00e6s{\u00a0p\u00f0\u00ffj\u00e0\u00e8\u00b8\u00b8\u00bc\u00fcb", `62_%C3%86%C3%862%C2%AE%C2%B7m%C3%9B%C3%83r%5E%C2%BFp%C3%86u'q%C3%BBc2%C3%B0u%C2%B8%C3%9D%C3%A8v%C3%BF%C2%B0%C3%9C%C3%82%C3%B53%C3%9B-k%C3%B2sd4%5Cp%C3%9A%C2%A6%C3%93%C3%AEa%3C%C3%A6s%7B%C2%A0p%C3%B0%C3%BFj%C3%A0%C3%A8%C2%B8%C2%B8%C2%BC%C3%BCb`, VisHTTPStyle},
{"'3Ze\u050e|\u02del\u069du-Rpct4+Z5b={@_{b", `'3Ze%D4%8E%7C%CB%9El%DA%9Du-Rpct4+Z5b%3D%7B%40_%7Bb`, VisHTTPStyle},
// VisGlob
{"cat /proc/**/status | grep '[pid]' ;; # cool code here", `cat /proc/\052\052/status | grep '\133pid]' ;; \043 cool code here`, VisGlob},
{"@?e1xs+.R_Kjo]7s8pgRP:*nXCE4{!c", `@\077e1xs+.R_Kjo]7s8pgRP:\052nXCE4{!c`, VisGlob},
{"62_\u00c6\u00c62\u00ae\u00b7m\u00db\u00c3r^\u00bfp\u00c6u'q\u00fbc2\u00f0u\u00b8\u00dd\u00e8v\u00ff\u00b0\u00dc\u00c2\u00f53\u00db-k\u00f2sd4\\p\u00da\u00a6\u00d3\u00eea<\u00e6s{\u00a0p\u00f0\u00ffj\u00e0\u00e8\u00b8\u00b8\u00bc\u00fcb", `62_\M-C\M^F\M-C\M^F2\M-B\M-.\M-B\M-7m\M-C\M^[\M-C\M^Cr^\M-B\M-?p\M-C\M^Fu'q\M-C\M-;c2\M-C\M-0u\M-B\M-8\M-C\M^]\M-C\M-(v\M-C\M-?\M-B\M-0\M-C\M^\\M-C\M^B\M-C\M-53\M-C\M^[-k\M-C\M-2sd4\\p\M-C\M^Z\M-B\M-&\M-C\M^S\M-C\M-.a<\M-C\M-&s{\M-B\240p\M-C\M-0\M-C\M-?j\M-C\240\M-C\M-(\M-B\M-8\M-B\M-8\M-B\M-<\M-C\M-<b`, VisGlob},
{"62_\u00c6\u00c62\u00ae\u00b7m\u00db\u00c3r^\u00bfp\u00c6u'q\u00fbc2\u00f0u\u00b8\u00dd\u00e8v\u00ff\u00b0\u00dc\u00c2\u00f53\u00db-k\u00f2sd4\\p\u00da\u00a6\u00d3\u00eea<\u00e6s{\u00a0p\u00f0\u00ffj\u00e0\u00e8\u00b8\u00b8\u00bc\u00fcb", `62_\303\206\303\2062\302\256\302\267m\303\233\303\203r^\302\277p\303\206u'q\303\273c2\303\260u\302\270\303\235\303\250v\303\277\302\260\303\234\303\202\303\2653\303\233-k\303\262sd4\\p\303\232\302\246\303\223\303\256a<\303\246s{\302\240p\303\260\303\277j\303\240\303\250\302\270\302\270\302\274\303\274b`, VisGlob | VisOctal},
{"'3Ze\u050e|\u02del\u069du-Rpct4+Z5b={@_{b", `'3Ze\M-T\M^N|\M-K\M^^l\M-Z\M^]u-Rpct4+Z5b={@_{b`, VisGlob},
{"'3Ze\u050e|\u02del\u069du-Rpct4+Z5b={@_{b", `'3Ze\324\216|\313\236l\332\235u-Rpct4+Z5b={@_{b`, VisGlob | VisOctal},
} {
t.Run(fmt.Sprintf("Test%.2d", idx), func(t *testing.T) {
enc, err := Vis(test.input, test.flag)
require.NoErrorf(t, err, "vis(%q, %s)", test.input, test.flag)
assert.Equalf(t, test.output, enc, "vis(%q, %s)", test.input, test.flag)
})
}
}
func TestVisChanged(t *testing.T) {
for _, test := range []string{
"hello world",
"THIS\\IS_A_TEST1234",
"AC_Ra\u00edz_Certic\u00e1mara_S.A..pem",
} {
t.Run(test, func(t *testing.T) {
enc, err := Vis(test, DefaultVisFlags)
require.NoErrorf(t, err, "vis(%q)", test)
assert.NotEqualf(t, test, enc, "encoding of %q should be different to original", test)
})
}
}
func BenchmarkVis(b *testing.B) {
doBench := func(b *testing.B, text string) {
_, err := Vis(text, DefaultVisFlags)
require.NoErrorf(b, err, "vis(%q)", text)
for b.Loop() {
_, _ = Vis(text, DefaultVisFlags)
}
}
b.Run("NoChange", func(b *testing.B) {
text := "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789"
doBench(b, text)
})
b.Run("Binary", func(b *testing.B) {
var data [32]byte
n, err := rand.Read(data[:])
require.NoError(b, err, "rand.Read")
require.Equal(b, len(data), n, "rand.Read len return")
text := string(data[:])
doBench(b, text)
})
// The rest of these test strings come from a set of test strings collated
// in <https://www.w3.org/2001/06/utf-8-test/quickbrown.html>.
b.Run("ASCII", func(b *testing.B) {
text := "The quick brown fox jumps over the lazy dog."
doBench(b, text)
})
b.Run("German", func(b *testing.B) {
text := "Falsches Üben von Xylophonmusik quält jeden größeren Zwerg"
doBench(b, text)
})
b.Run("Russian", func(b *testing.B) {
text := "В чащах юга жил бы цитрус? Да, но фальшивый экземпляр!"
doBench(b, text)
})
b.Run("Japanese", func(b *testing.B) {
text := "いろはにほへとちりぬるをイロハニホヘトチリヌルヲ"
doBench(b, text)
})
}