// SPDX-License-Identifier: Apache-2.0 /* * govis: unicode aware vis(3) encoding implementation * Copyright (C) 2017-2025 SUSE LLC. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package govis import ( "crypto/rand" "strconv" "testing" "github.com/stretchr/testify/assert" "github.com/stretchr/testify/require" ) func TestUnvisError(t *testing.T) { for _, test := range []struct { input string err error }{ // Octal escape codes allow you to specify invalid ASCII values. {"\\777", errOutsideLatin1}, {"\\420\\322\\455", errOutsideLatin1}, {"\\652\\233", errOutsideLatin1}, // Escapes that end abruptly. {"\\", errEndOfString}, {"\\J", errUnknownEscapeChar}, {"a bad slash: \\", errEndOfString}, {"testing -- \\x", errEndOfString}, {"\\xG0 test", strconv.ErrSyntax}, {" abc \\Mx", errUnknownEscapeChar}, {"\\Mx", errUnknownEscapeChar}, {"\\M-", errEndOfString}, {"\\M-\u5000", errOutsideLatin1}, {"\\M^", errEndOfString}, {"\\^", errEndOfString}, {"\\^\u5000", errOutsideLatin1}, {"\\M", errEndOfString}, } { t.Run(test.input, func(t *testing.T) { _, err := Unvis(test.input, DefaultVisFlags) require.Errorf(t, err, "invalid escape string should give an error") assert.ErrorIs(t, err, test.err, "unexpected error from invalid escape string") }) } } func TestUnvisCStyleEscape(t *testing.T) { for _, test := range []struct { input string expected string }{ {"", ""}, {"\\n\\v\\t\\s", "\n\v\t "}, {"\\\\n\\tt", "\\n\tt"}, {"\\b", "\b"}, {"\\r\\b\\n", "\r\b\n"}, {"\\a\\a\\b", "\x07\x07\b"}, {"\\f\\s\\E", "\f \x1b"}, // Hidden markers. They actually aren't generated by vis(3) but for // some reason, they're supported... {"test\\\ning", "testing"}, {"test\\$\\$ing", "testing"}, } { t.Run(test.input, func(t *testing.T) { got, err := Unvis(test.input, DefaultVisFlags) require.NoErrorf(t, err, "unvis(%q)", test.input) assert.Equal(t, test.expected, got, "unvis(%q)", test.input) }) } } func TestUnvisMetaEscape(t *testing.T) { for _, test := range []struct { input string expected string }{ {"", ""}, {"\\M^ ?\\^ ", "\x80?\x00"}, {"\\M- ?\\^?", "\xa0?\x7f"}, {"\\M-x butterfly\\M^?", "\xf8 butterfly\xff"}, {"\\M^X steady-hand \\^& needle", "\x98 steady-hand \x06 needle"}, // TODO: Add some more of these tests, but I need to have some // secondary source to verify these outputs properly. } { t.Run(test.input, func(t *testing.T) { got, err := Unvis(test.input, DefaultVisFlags) require.NoErrorf(t, err, "unvis(%q)", test.input) assert.Equal(t, test.expected, got, "unvis(%q)", test.input) }) } } func TestUnvisOctalEscape(t *testing.T) { for _, test := range []struct { input string expected string }{ {"", ""}, {"\\1", "\001"}, {"\\01\\02\\3", "\001\002\003"}, {"\\001\\023\\32", "\001\023\032"}, {"this is a test\\0k1\\133", "this is a test\000k1\133"}, {"\\170YET\\01another test\\1\\\\82", "\170YET\001another test\001\\82"}, {"\\177MORE tests\\09a", "\177MORE tests\x009a"}, {"\\\\710more\\1215testing", "\\710more\1215testing"}, // Make sure that decoding unicode works properly, when it's been encoded as single bytes. {"\\360\\237\\225\\264", "\U0001f574"}, {"T\\303\\234B\\304\\260TAK_UEKAE_K\\303\\266k_Sertifika_Hizmet_Sa\\304\\237lay\\304\\261c\\304\\261s\\304\\261_-_S\\303\\274r\\303\\274m_3.pem", "TÜBİTAK_UEKAE_Kök_Sertifika_Hizmet_Sağlayıcısı_-_Sürüm_3.pem"}, // Some invalid characters... {"\\377\\2\\225\\264", "\xff\x02\x95\xb4"}, } { t.Run(test.input, func(t *testing.T) { got, err := Unvis(test.input, DefaultVisFlags) require.NoErrorf(t, err, "unvis(%q)", test.input) assert.Equal(t, test.expected, got, "unvis(%q)", test.input) }) } } func TestUnvisHexEscape(t *testing.T) { for _, test := range []struct { input string expected string }{ {"", ""}, {"\\x01", "\x01"}, {"\\x01\\x02\\x7a", "\x01\x02\x7a"}, {"this is a test\\x13\\x52\\x6f", "this is a test\x13\x52\x6f"}, {"\\x170YET\\x01a\\x22nother test\\x11", "\x170YET\x01a\x22nother test\x11"}, {"\\\\x007more\\\\x215testing", "\\x007more\\x215testing"}, // Make sure that decoding unicode works properly, when it's been encoded as single bytes. {"\\xf0\\x9f\\x95\\xb4", "\U0001f574"}, {"T\\xc3\\x9cB\\xc4\\xb0TAK_UEKAE_K\\xc3\\xb6k_Sertifika_Hizmet_Sa\\xc4\\x9flay\\xc4\\xb1c\\xc4\\xb1s\\xc4\\xb1_-_S\\xc3\\xbcr\\xc3\\xbcm_3.pem", "TÜBİTAK_UEKAE_Kök_Sertifika_Hizmet_Sağlayıcısı_-_Sürüm_3.pem"}, // Some invalid characters... {"\\xff\\x02\\x95\\xb4", "\xff\x02\x95\xb4"}, } { t.Run(test.input, func(t *testing.T) { got, err := Unvis(test.input, DefaultVisFlags) require.NoErrorf(t, err, "unvis(%q)", test.input) assert.Equal(t, test.expected, got, "unvis(%q)", test.input) }) } } func TestUnvisUnicode(t *testing.T) { // Ensure that unicode strings are not messed up by Unvis. for _, test := range []string{ "", "this.is.a.normal_string", "AC_Raíz_Certicámara_S.A..pem", "NetLock_Arany_=Class_Gold=_Főtanúsítvány.pem", "TÜBİTAK_UEKAE_Kök_Sertifika_Hizmet_Sağlayıcısı_-_Sürüm_3.pem", } { t.Run(test, func(t *testing.T) { enc, err := Unvis(test, DefaultVisFlags) require.NoErrorf(t, err, "unvis(%q)", test) assert.Equalf(t, test, enc, "decoding of %q should be the same as original", test) }) } } func BenchmarkUnvis(b *testing.B) { doBench := func(b *testing.B, text string) { encoded, err := Vis(text, DefaultVisFlags) require.NoErrorf(b, err, "vis(%q)", text) decoded, err := Unvis(encoded, DefaultVisFlags) require.NoErrorf(b, err, "unvis(vis(%q) = %q)", text, encoded) require.Equalf(b, text, decoded, "unvis(vis(%q) = %q)", text, encoded) for b.Loop() { _, _ = Unvis(encoded, DefaultVisFlags) } } b.Run("NoChange", func(b *testing.B) { text := "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789" doBench(b, text) }) b.Run("Binary", func(b *testing.B) { var data [32]byte n, err := rand.Read(data[:]) require.NoError(b, err, "rand.Read") require.Equal(b, len(data), n, "rand.Read len return") text := string(data[:]) doBench(b, text) }) // The rest of these test strings come from a set of test strings collated // in . b.Run("ASCII", func(b *testing.B) { text := "The quick brown fox jumps over the lazy dog." doBench(b, text) }) b.Run("German", func(b *testing.B) { text := "Falsches Üben von Xylophonmusik quält jeden größeren Zwerg" doBench(b, text) }) b.Run("Russian", func(b *testing.B) { text := "В чащах юга жил бы цитрус? Да, но фальшивый экземпляр!" doBench(b, text) }) b.Run("Japanese", func(b *testing.B) { text := "いろはにほへとちりぬるをイロハニホヘトチリヌルヲ" doBench(b, text) }) }