1
0
Fork 0
mirror of https://github.com/vbatts/go-mtree.git synced 2025-10-03 20:21:01 +00:00
go-mtree/pkg/govis/unvis_test.go
Aleksa Sarai 70d3b19776
unvis: improve performance by reducing allocations
By using a buffer, we can avoid a bunch of small allocations that the
previous implementation did. Based on a few small benchmarks, the
performance improvement is very stark (~3x faster for strings that don't
require any escaping, and ~20% faster for multi-byte utf8 strings):

  goos: linux
  goarch: amd64
  pkg: github.com/vbatts/go-mtree/pkg/govis
  cpu: AMD Ryzen 7 7840U w/ Radeon  780M Graphics
                    │    before    │                after                │
                    │    sec/op    │   sec/op     vs base                │
  Unvis/NoChange-16   1501.0n ± 0%   497.7n ± 1%  -66.84% (p=0.000 n=10)
  Unvis/Binary-16     1317.5n ± 3%   934.9n ± 9%  -29.04% (p=0.000 n=10)
  Unvis/ASCII-16      1325.5n ± 1%   616.8n ± 1%  -53.47% (p=0.000 n=10)
  Unvis/German-16     1884.5n ± 1%   986.9n ± 2%  -47.63% (p=0.000 n=10)
  Unvis/Russian-16     4.636µ ± 1%   3.796µ ± 1%  -18.11% (p=0.000 n=10)
  Unvis/Japanese-16    3.453µ ± 1%   2.867µ ± 1%  -16.99% (p=0.000 n=10)
  geomean              2.072µ        1.206µ       -41.77%

Signed-off-by: Aleksa Sarai <cyphar@cyphar.com>
2025-09-23 04:40:23 +10:00

228 lines
7.3 KiB
Go
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

// SPDX-License-Identifier: Apache-2.0
/*
* govis: unicode aware vis(3) encoding implementation
* Copyright (C) 2017-2025 SUSE LLC.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package govis
import (
"crypto/rand"
"strconv"
"testing"
"github.com/stretchr/testify/assert"
"github.com/stretchr/testify/require"
)
func TestUnvisError(t *testing.T) {
for _, test := range []struct {
input string
err error
}{
// Octal escape codes allow you to specify invalid ASCII values.
{"\\777", errOutsideLatin1},
{"\\420\\322\\455", errOutsideLatin1},
{"\\652\\233", errOutsideLatin1},
// Escapes that end abruptly.
{"\\", errEndOfString},
{"\\J", errUnknownEscapeChar},
{"a bad slash: \\", errEndOfString},
{"testing -- \\x", errEndOfString},
{"\\xG0 test", strconv.ErrSyntax},
{" abc \\Mx", errUnknownEscapeChar},
{"\\Mx", errUnknownEscapeChar},
{"\\M-", errEndOfString},
{"\\M-\u5000", errOutsideLatin1},
{"\\M^", errEndOfString},
{"\\^", errEndOfString},
{"\\^\u5000", errOutsideLatin1},
{"\\M", errEndOfString},
} {
t.Run(test.input, func(t *testing.T) {
_, err := Unvis(test.input, DefaultVisFlags)
require.Errorf(t, err, "invalid escape string should give an error")
assert.ErrorIs(t, err, test.err, "unexpected error from invalid escape string")
})
}
}
func TestUnvisCStyleEscape(t *testing.T) {
for _, test := range []struct {
input string
expected string
}{
{"", ""},
{"\\n\\v\\t\\s", "\n\v\t "},
{"\\\\n\\tt", "\\n\tt"},
{"\\b", "\b"},
{"\\r\\b\\n", "\r\b\n"},
{"\\a\\a\\b", "\x07\x07\b"},
{"\\f\\s\\E", "\f \x1b"},
// Hidden markers. They actually aren't generated by vis(3) but for
// some reason, they're supported...
{"test\\\ning", "testing"},
{"test\\$\\$ing", "testing"},
} {
t.Run(test.input, func(t *testing.T) {
got, err := Unvis(test.input, DefaultVisFlags)
require.NoErrorf(t, err, "unvis(%q)", test.input)
assert.Equal(t, test.expected, got, "unvis(%q)", test.input)
})
}
}
func TestUnvisMetaEscape(t *testing.T) {
for _, test := range []struct {
input string
expected string
}{
{"", ""},
{"\\M^ ?\\^ ", "\x80?\x00"},
{"\\M- ?\\^?", "\xa0?\x7f"},
{"\\M-x butterfly\\M^?", "\xf8 butterfly\xff"},
{"\\M^X steady-hand \\^& needle", "\x98 steady-hand \x06 needle"},
// TODO: Add some more of these tests, but I need to have some
// secondary source to verify these outputs properly.
} {
t.Run(test.input, func(t *testing.T) {
got, err := Unvis(test.input, DefaultVisFlags)
require.NoErrorf(t, err, "unvis(%q)", test.input)
assert.Equal(t, test.expected, got, "unvis(%q)", test.input)
})
}
}
func TestUnvisOctalEscape(t *testing.T) {
for _, test := range []struct {
input string
expected string
}{
{"", ""},
{"\\1", "\001"},
{"\\01\\02\\3", "\001\002\003"},
{"\\001\\023\\32", "\001\023\032"},
{"this is a test\\0k1\\133", "this is a test\000k1\133"},
{"\\170YET\\01another test\\1\\\\82", "\170YET\001another test\001\\82"},
{"\\177MORE tests\\09a", "\177MORE tests\x009a"},
{"\\\\710more\\1215testing", "\\710more\1215testing"},
// Make sure that decoding unicode works properly, when it's been encoded as single bytes.
{"\\360\\237\\225\\264", "\U0001f574"},
{"T\\303\\234B\\304\\260TAK_UEKAE_K\\303\\266k_Sertifika_Hizmet_Sa\\304\\237lay\\304\\261c\\304\\261s\\304\\261_-_S\\303\\274r\\303\\274m_3.pem", "TÜBİTAK_UEKAE_Kök_Sertifika_Hizmet_Sağlayıcısı_-_Sürüm_3.pem"},
// Some invalid characters...
{"\\377\\2\\225\\264", "\xff\x02\x95\xb4"},
} {
t.Run(test.input, func(t *testing.T) {
got, err := Unvis(test.input, DefaultVisFlags)
require.NoErrorf(t, err, "unvis(%q)", test.input)
assert.Equal(t, test.expected, got, "unvis(%q)", test.input)
})
}
}
func TestUnvisHexEscape(t *testing.T) {
for _, test := range []struct {
input string
expected string
}{
{"", ""},
{"\\x01", "\x01"},
{"\\x01\\x02\\x7a", "\x01\x02\x7a"},
{"this is a test\\x13\\x52\\x6f", "this is a test\x13\x52\x6f"},
{"\\x170YET\\x01a\\x22nother test\\x11", "\x170YET\x01a\x22nother test\x11"},
{"\\\\x007more\\\\x215testing", "\\x007more\\x215testing"},
// Make sure that decoding unicode works properly, when it's been encoded as single bytes.
{"\\xf0\\x9f\\x95\\xb4", "\U0001f574"},
{"T\\xc3\\x9cB\\xc4\\xb0TAK_UEKAE_K\\xc3\\xb6k_Sertifika_Hizmet_Sa\\xc4\\x9flay\\xc4\\xb1c\\xc4\\xb1s\\xc4\\xb1_-_S\\xc3\\xbcr\\xc3\\xbcm_3.pem", "TÜBİTAK_UEKAE_Kök_Sertifika_Hizmet_Sağlayıcısı_-_Sürüm_3.pem"},
// Some invalid characters...
{"\\xff\\x02\\x95\\xb4", "\xff\x02\x95\xb4"},
} {
t.Run(test.input, func(t *testing.T) {
got, err := Unvis(test.input, DefaultVisFlags)
require.NoErrorf(t, err, "unvis(%q)", test.input)
assert.Equal(t, test.expected, got, "unvis(%q)", test.input)
})
}
}
func TestUnvisUnicode(t *testing.T) {
// Ensure that unicode strings are not messed up by Unvis.
for _, test := range []string{
"",
"this.is.a.normal_string",
"AC_Raíz_Certicámara_S.A..pem",
"NetLock_Arany_=Class_Gold=_Főtanúsítvány.pem",
"TÜBİTAK_UEKAE_Kök_Sertifika_Hizmet_Sağlayıcısı_-_Sürüm_3.pem",
} {
t.Run(test, func(t *testing.T) {
enc, err := Unvis(test, DefaultVisFlags)
require.NoErrorf(t, err, "unvis(%q)", test)
assert.Equalf(t, test, enc, "decoding of %q should be the same as original", test)
})
}
}
func BenchmarkUnvis(b *testing.B) {
doBench := func(b *testing.B, text string) {
encoded, err := Vis(text, DefaultVisFlags)
require.NoErrorf(b, err, "vis(%q)", text)
decoded, err := Unvis(encoded, DefaultVisFlags)
require.NoErrorf(b, err, "unvis(vis(%q) = %q)", text, encoded)
require.Equalf(b, text, decoded, "unvis(vis(%q) = %q)", text, encoded)
for b.Loop() {
_, _ = Unvis(encoded, DefaultVisFlags)
}
}
b.Run("NoChange", func(b *testing.B) {
text := "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789"
doBench(b, text)
})
b.Run("Binary", func(b *testing.B) {
var data [32]byte
n, err := rand.Read(data[:])
require.NoError(b, err, "rand.Read")
require.Equal(b, len(data), n, "rand.Read len return")
text := string(data[:])
doBench(b, text)
})
// The rest of these test strings come from a set of test strings collated
// in <https://www.w3.org/2001/06/utf-8-test/quickbrown.html>.
b.Run("ASCII", func(b *testing.B) {
text := "The quick brown fox jumps over the lazy dog."
doBench(b, text)
})
b.Run("German", func(b *testing.B) {
text := "Falsches Üben von Xylophonmusik quält jeden größeren Zwerg"
doBench(b, text)
})
b.Run("Russian", func(b *testing.B) {
text := "В чащах юга жил бы цитрус? Да, но фальшивый экземпляр!"
doBench(b, text)
})
b.Run("Japanese", func(b *testing.B) {
text := "いろはにほへとちりぬるをイロハニホヘトチリヌルヲ"
doBench(b, text)
})
}