mirror of
https://github.com/vbatts/go-mtree.git
synced 2025-10-03 20:21:01 +00:00
This is supported by both OpenBSD and FreBSD so it seems possible that we will run into \" sequences at some point. The handling is basically identical to \\ sequences. Signed-off-by: Aleksa Sarai <cyphar@cyphar.com>
229 lines
7.3 KiB
Go
229 lines
7.3 KiB
Go
// SPDX-License-Identifier: Apache-2.0
|
||
/*
|
||
* govis: unicode aware vis(3) encoding implementation
|
||
* Copyright (C) 2017-2025 SUSE LLC.
|
||
*
|
||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||
* you may not use this file except in compliance with the License.
|
||
* You may obtain a copy of the License at
|
||
*
|
||
* http://www.apache.org/licenses/LICENSE-2.0
|
||
*
|
||
* Unless required by applicable law or agreed to in writing, software
|
||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||
* See the License for the specific language governing permissions and
|
||
* limitations under the License.
|
||
*/
|
||
|
||
package govis
|
||
|
||
import (
|
||
"crypto/rand"
|
||
"strconv"
|
||
"testing"
|
||
|
||
"github.com/stretchr/testify/assert"
|
||
"github.com/stretchr/testify/require"
|
||
)
|
||
|
||
func TestUnvisError(t *testing.T) {
|
||
for _, test := range []struct {
|
||
input string
|
||
err error
|
||
}{
|
||
// Octal escape codes allow you to specify invalid ASCII values.
|
||
{"\\777", errOutsideLatin1},
|
||
{"\\420\\322\\455", errOutsideLatin1},
|
||
{"\\652\\233", errOutsideLatin1},
|
||
// Escapes that end abruptly.
|
||
{"\\", errEndOfString},
|
||
{"\\J", errUnknownEscapeChar},
|
||
{"a bad slash: \\", errEndOfString},
|
||
{"testing -- \\x", errEndOfString},
|
||
{"\\xG0 test", strconv.ErrSyntax},
|
||
{" abc \\Mx", errUnknownEscapeChar},
|
||
{"\\Mx", errUnknownEscapeChar},
|
||
{"\\M-", errEndOfString},
|
||
{"\\M-\u5000", errOutsideLatin1},
|
||
{"\\M^", errEndOfString},
|
||
{"\\^", errEndOfString},
|
||
{"\\^\u5000", errOutsideLatin1},
|
||
{"\\M", errEndOfString},
|
||
} {
|
||
t.Run(test.input, func(t *testing.T) {
|
||
_, err := Unvis(test.input, DefaultVisFlags)
|
||
require.Errorf(t, err, "invalid escape string should give an error")
|
||
assert.ErrorIs(t, err, test.err, "unexpected error from invalid escape string")
|
||
})
|
||
}
|
||
}
|
||
|
||
func TestUnvisCStyleEscape(t *testing.T) {
|
||
for _, test := range []struct {
|
||
input string
|
||
expected string
|
||
}{
|
||
{"", ""},
|
||
{`\n\v\t\s`, "\n\v\t "},
|
||
{`\\n\tt`, "\\n\tt"},
|
||
{`\b`, "\b"},
|
||
{`\r\b\n`, "\r\b\n"},
|
||
{`\a\a\b`, "\x07\x07\b"},
|
||
{`\f\s\E`, "\f \x1b"},
|
||
{`\"foo\"\\"bar`, `"foo"\"bar`},
|
||
// Hidden markers. They actually aren't generated by vis(3) but for
|
||
// some reason, they're supported...
|
||
{"test\\\ning", "testing"},
|
||
{"test\\$\\$ing", "testing"},
|
||
} {
|
||
t.Run(test.input, func(t *testing.T) {
|
||
got, err := Unvis(test.input, DefaultVisFlags)
|
||
require.NoErrorf(t, err, "unvis(%q)", test.input)
|
||
assert.Equal(t, test.expected, got, "unvis(%q)", test.input)
|
||
})
|
||
}
|
||
}
|
||
|
||
func TestUnvisMetaEscape(t *testing.T) {
|
||
for _, test := range []struct {
|
||
input string
|
||
expected string
|
||
}{
|
||
{"", ""},
|
||
{"\\M^ ?\\^ ", "\x80?\x00"},
|
||
{"\\M- ?\\^?", "\xa0?\x7f"},
|
||
{"\\M-x butterfly\\M^?", "\xf8 butterfly\xff"},
|
||
{"\\M^X steady-hand \\^& needle", "\x98 steady-hand \x06 needle"},
|
||
// TODO: Add some more of these tests, but I need to have some
|
||
// secondary source to verify these outputs properly.
|
||
} {
|
||
t.Run(test.input, func(t *testing.T) {
|
||
got, err := Unvis(test.input, DefaultVisFlags)
|
||
require.NoErrorf(t, err, "unvis(%q)", test.input)
|
||
assert.Equal(t, test.expected, got, "unvis(%q)", test.input)
|
||
})
|
||
}
|
||
}
|
||
|
||
func TestUnvisOctalEscape(t *testing.T) {
|
||
for _, test := range []struct {
|
||
input string
|
||
expected string
|
||
}{
|
||
{"", ""},
|
||
{"\\1", "\001"},
|
||
{"\\01\\02\\3", "\001\002\003"},
|
||
{"\\001\\023\\32", "\001\023\032"},
|
||
{"this is a test\\0k1\\133", "this is a test\000k1\133"},
|
||
{"\\170YET\\01another test\\1\\\\82", "\170YET\001another test\001\\82"},
|
||
{"\\177MORE tests\\09a", "\177MORE tests\x009a"},
|
||
{"\\\\710more\\1215testing", "\\710more\1215testing"},
|
||
// Make sure that decoding unicode works properly, when it's been encoded as single bytes.
|
||
{"\\360\\237\\225\\264", "\U0001f574"},
|
||
{"T\\303\\234B\\304\\260TAK_UEKAE_K\\303\\266k_Sertifika_Hizmet_Sa\\304\\237lay\\304\\261c\\304\\261s\\304\\261_-_S\\303\\274r\\303\\274m_3.pem", "TÜBİTAK_UEKAE_Kök_Sertifika_Hizmet_Sağlayıcısı_-_Sürüm_3.pem"},
|
||
// Some invalid characters...
|
||
{"\\377\\2\\225\\264", "\xff\x02\x95\xb4"},
|
||
} {
|
||
t.Run(test.input, func(t *testing.T) {
|
||
got, err := Unvis(test.input, DefaultVisFlags)
|
||
require.NoErrorf(t, err, "unvis(%q)", test.input)
|
||
assert.Equal(t, test.expected, got, "unvis(%q)", test.input)
|
||
})
|
||
}
|
||
}
|
||
|
||
func TestUnvisHexEscape(t *testing.T) {
|
||
for _, test := range []struct {
|
||
input string
|
||
expected string
|
||
}{
|
||
{"", ""},
|
||
{"\\x01", "\x01"},
|
||
{"\\x01\\x02\\x7a", "\x01\x02\x7a"},
|
||
{"this is a test\\x13\\x52\\x6f", "this is a test\x13\x52\x6f"},
|
||
{"\\x170YET\\x01a\\x22nother test\\x11", "\x170YET\x01a\x22nother test\x11"},
|
||
{"\\\\x007more\\\\x215testing", "\\x007more\\x215testing"},
|
||
// Make sure that decoding unicode works properly, when it's been encoded as single bytes.
|
||
{"\\xf0\\x9f\\x95\\xb4", "\U0001f574"},
|
||
{"T\\xc3\\x9cB\\xc4\\xb0TAK_UEKAE_K\\xc3\\xb6k_Sertifika_Hizmet_Sa\\xc4\\x9flay\\xc4\\xb1c\\xc4\\xb1s\\xc4\\xb1_-_S\\xc3\\xbcr\\xc3\\xbcm_3.pem", "TÜBİTAK_UEKAE_Kök_Sertifika_Hizmet_Sağlayıcısı_-_Sürüm_3.pem"},
|
||
// Some invalid characters...
|
||
{"\\xff\\x02\\x95\\xb4", "\xff\x02\x95\xb4"},
|
||
} {
|
||
t.Run(test.input, func(t *testing.T) {
|
||
got, err := Unvis(test.input, DefaultVisFlags)
|
||
require.NoErrorf(t, err, "unvis(%q)", test.input)
|
||
assert.Equal(t, test.expected, got, "unvis(%q)", test.input)
|
||
})
|
||
}
|
||
}
|
||
|
||
func TestUnvisUnicode(t *testing.T) {
|
||
// Ensure that unicode strings are not messed up by Unvis.
|
||
for _, test := range []string{
|
||
"",
|
||
"this.is.a.normal_string",
|
||
"AC_Raíz_Certicámara_S.A..pem",
|
||
"NetLock_Arany_=Class_Gold=_Főtanúsítvány.pem",
|
||
"TÜBİTAK_UEKAE_Kök_Sertifika_Hizmet_Sağlayıcısı_-_Sürüm_3.pem",
|
||
} {
|
||
t.Run(test, func(t *testing.T) {
|
||
enc, err := Unvis(test, DefaultVisFlags)
|
||
require.NoErrorf(t, err, "unvis(%q)", test)
|
||
assert.Equalf(t, test, enc, "decoding of %q should be the same as original", test)
|
||
})
|
||
}
|
||
}
|
||
|
||
func BenchmarkUnvis(b *testing.B) {
|
||
doBench := func(b *testing.B, text string) {
|
||
encoded, err := Vis(text, DefaultVisFlags)
|
||
require.NoErrorf(b, err, "vis(%q)", text)
|
||
|
||
decoded, err := Unvis(encoded, DefaultVisFlags)
|
||
require.NoErrorf(b, err, "unvis(vis(%q) = %q)", text, encoded)
|
||
require.Equalf(b, text, decoded, "unvis(vis(%q) = %q)", text, encoded)
|
||
|
||
for b.Loop() {
|
||
_, _ = Unvis(encoded, DefaultVisFlags)
|
||
}
|
||
}
|
||
|
||
b.Run("NoChange", func(b *testing.B) {
|
||
text := "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789"
|
||
doBench(b, text)
|
||
})
|
||
|
||
b.Run("Binary", func(b *testing.B) {
|
||
var data [32]byte
|
||
n, err := rand.Read(data[:])
|
||
require.NoError(b, err, "rand.Read")
|
||
require.Equal(b, len(data), n, "rand.Read len return")
|
||
|
||
text := string(data[:])
|
||
doBench(b, text)
|
||
})
|
||
|
||
// The rest of these test strings come from a set of test strings collated
|
||
// in <https://www.w3.org/2001/06/utf-8-test/quickbrown.html>.
|
||
|
||
b.Run("ASCII", func(b *testing.B) {
|
||
text := "The quick brown fox jumps over the lazy dog."
|
||
doBench(b, text)
|
||
})
|
||
|
||
b.Run("German", func(b *testing.B) {
|
||
text := "Falsches Üben von Xylophonmusik quält jeden größeren Zwerg"
|
||
doBench(b, text)
|
||
})
|
||
|
||
b.Run("Russian", func(b *testing.B) {
|
||
text := "В чащах юга жил бы цитрус? Да, но фальшивый экземпляр!"
|
||
doBench(b, text)
|
||
})
|
||
|
||
b.Run("Japanese", func(b *testing.B) {
|
||
text := "いろはにほへとちりぬるをイロハニホヘトチリヌルヲ"
|
||
doBench(b, text)
|
||
})
|
||
}
|