go-mtree/pkg/govis/unvis_test.go

// SPDX-License-Identifier: Apache-2.0
/*
 * govis: unicode aware vis(3) encoding implementation
 * Copyright (C) 2017-2025 SUSE LLC.
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *   http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

package govis

import (
	"crypto/rand"
	"strconv"
	"testing"

	"github.com/stretchr/testify/assert"
	"github.com/stretchr/testify/require"
)

func TestUnvisError(t *testing.T) {
	for _, test := range []struct {
		input string
		err   error
	}{
		// Octal escape codes allow you to specify invalid ASCII values.
		{"\\777", errOutsideLatin1},
		{"\\420\\322\\455", errOutsideLatin1},
		{"\\652\\233", errOutsideLatin1},
		// Escapes that end abruptly.
		{"\\", errEndOfString},
		{"\\J", errUnknownEscapeChar},
		{"a bad slash: \\", errEndOfString},
		{"testing -- \\x", errEndOfString},
		{"\\xG0 test", strconv.ErrSyntax},
		{"  abc \\Mx", errUnknownEscapeChar},
		{"\\Mx", errUnknownEscapeChar},
		{"\\M-", errEndOfString},
		{"\\M-\u5000", errOutsideLatin1},
		{"\\M^", errEndOfString},
		{"\\^", errEndOfString},
		{"\\^\u5000", errOutsideLatin1},
		{"\\M", errEndOfString},
	} {
		t.Run(test.input, func(t *testing.T) {
			_, err := Unvis(test.input, DefaultVisFlags)
			require.Errorf(t, err, "invalid escape string should give an error")
			assert.ErrorIs(t, err, test.err, "unexpected error from invalid escape string")
		})
	}
}

func TestUnvisCStyleEscape(t *testing.T) {
	for _, test := range []struct {
		input    string
		expected string
	}{
		{"", ""},
		{"\\n\\v\\t\\s", "\n\v\t "},
		{"\\\\n\\tt", "\\n\tt"},
		{"\\b", "\b"},
		{"\\r\\b\\n", "\r\b\n"},
		{"\\a\\a\\b", "\x07\x07\b"},
		{"\\f\\s\\E", "\f \x1b"},
		// Hidden markers. They actually aren't generated by vis(3) but for
		// some reason, they're supported...
		{"test\\\ning", "testing"},
		{"test\\$\\$ing", "testing"},
	} {
		t.Run(test.input, func(t *testing.T) {
			got, err := Unvis(test.input, DefaultVisFlags)
			require.NoErrorf(t, err, "unvis(%q)", test.input)
			assert.Equal(t, test.expected, got, "unvis(%q)", test.input)
		})
	}
}

func TestUnvisMetaEscape(t *testing.T) {
	for _, test := range []struct {
		input    string
		expected string
	}{
		{"", ""},
		{"\\M^ ?\\^ ", "\x80?\x00"},
		{"\\M- ?\\^?", "\xa0?\x7f"},
		{"\\M-x butterfly\\M^?", "\xf8 butterfly\xff"},
		{"\\M^X steady-hand \\^& needle", "\x98 steady-hand \x06 needle"},
		// TODO: Add some more of these tests, but I need to have some
		//       secondary source to verify these outputs properly.
	} {
		t.Run(test.input, func(t *testing.T) {
			got, err := Unvis(test.input, DefaultVisFlags)
			require.NoErrorf(t, err, "unvis(%q)", test.input)
			assert.Equal(t, test.expected, got, "unvis(%q)", test.input)
		})
	}
}

func TestUnvisOctalEscape(t *testing.T) {
	for _, test := range []struct {
		input    string
		expected string
	}{
		{"", ""},
		{"\\1", "\001"},
		{"\\01\\02\\3", "\001\002\003"},
		{"\\001\\023\\32", "\001\023\032"},
		{"this is a test\\0k1\\133", "this is a test\000k1\133"},
		{"\\170YET\\01another test\\1\\\\82", "\170YET\001another test\001\\82"},
		{"\\177MORE tests\\09a", "\177MORE tests\x009a"},
		{"\\\\710more\\1215testing", "\\710more\1215testing"},
		// Make sure that decoding unicode works properly, when it's been encoded as single bytes.
		{"\\360\\237\\225\\264", "\U0001f574"},
		{"T\\303\\234B\\304\\260TAK_UEKAE_K\\303\\266k_Sertifika_Hizmet_Sa\\304\\237lay\\304\\261c\\304\\261s\\304\\261_-_S\\303\\274r\\303\\274m_3.pem", "TÜBİTAK_UEKAE_Kök_Sertifika_Hizmet_Sağlayıcısı_-_Sürüm_3.pem"},
		// Some invalid characters...
		{"\\377\\2\\225\\264", "\xff\x02\x95\xb4"},
	} {
		t.Run(test.input, func(t *testing.T) {
			got, err := Unvis(test.input, DefaultVisFlags)
			require.NoErrorf(t, err, "unvis(%q)", test.input)
			assert.Equal(t, test.expected, got, "unvis(%q)", test.input)
		})
	}
}

func TestUnvisHexEscape(t *testing.T) {
	for _, test := range []struct {
		input    string
		expected string
	}{
		{"", ""},
		{"\\x01", "\x01"},
		{"\\x01\\x02\\x7a", "\x01\x02\x7a"},
		{"this is a test\\x13\\x52\\x6f", "this is a test\x13\x52\x6f"},
		{"\\x170YET\\x01a\\x22nother test\\x11", "\x170YET\x01a\x22nother test\x11"},
		{"\\\\x007more\\\\x215testing", "\\x007more\\x215testing"},
		// Make sure that decoding unicode works properly, when it's been encoded as single bytes.
		{"\\xf0\\x9f\\x95\\xb4", "\U0001f574"},
		{"T\\xc3\\x9cB\\xc4\\xb0TAK_UEKAE_K\\xc3\\xb6k_Sertifika_Hizmet_Sa\\xc4\\x9flay\\xc4\\xb1c\\xc4\\xb1s\\xc4\\xb1_-_S\\xc3\\xbcr\\xc3\\xbcm_3.pem", "TÜBİTAK_UEKAE_Kök_Sertifika_Hizmet_Sağlayıcısı_-_Sürüm_3.pem"},
		// Some invalid characters...
		{"\\xff\\x02\\x95\\xb4", "\xff\x02\x95\xb4"},
	} {
		t.Run(test.input, func(t *testing.T) {
			got, err := Unvis(test.input, DefaultVisFlags)
			require.NoErrorf(t, err, "unvis(%q)", test.input)
			assert.Equal(t, test.expected, got, "unvis(%q)", test.input)
		})
	}
}

func TestUnvisUnicode(t *testing.T) {
	// Ensure that unicode strings are not messed up by Unvis.
	for _, test := range []string{
		"",
		"this.is.a.normal_string",
		"AC_Raíz_Certicámara_S.A..pem",
		"NetLock_Arany_=Class_Gold=_Főtanúsítvány.pem",
		"TÜBİTAK_UEKAE_Kök_Sertifika_Hizmet_Sağlayıcısı_-_Sürüm_3.pem",
	} {
		t.Run(test, func(t *testing.T) {
			enc, err := Unvis(test, DefaultVisFlags)
			require.NoErrorf(t, err, "unvis(%q)", test)
			assert.Equalf(t, test, enc, "decoding of %q should be the same as original", test)
		})
	}
}

func BenchmarkUnvis(b *testing.B) {
	doBench := func(b *testing.B, text string) {
		encoded, err := Vis(text, DefaultVisFlags)
		require.NoErrorf(b, err, "vis(%q)", text)

		decoded, err := Unvis(encoded, DefaultVisFlags)
		require.NoErrorf(b, err, "unvis(vis(%q) = %q)", text, encoded)
		require.Equalf(b, text, decoded, "unvis(vis(%q) = %q)", text, encoded)

		for b.Loop() {
			_, _ = Unvis(encoded, DefaultVisFlags)
		}
	}

	b.Run("NoChange", func(b *testing.B) {
		text := "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789"
		doBench(b, text)
	})

	b.Run("Binary", func(b *testing.B) {
		var data [32]byte
		n, err := rand.Read(data[:])
		require.NoError(b, err, "rand.Read")
		require.Equal(b, len(data), n, "rand.Read len return")

		text := string(data[:])
		doBench(b, text)
	})

	// The rest of these test strings come from a set of test strings collated
	// in <https://www.w3.org/2001/06/utf-8-test/quickbrown.html>.

	b.Run("ASCII", func(b *testing.B) {
		text := "The quick brown fox jumps over the lazy dog."
		doBench(b, text)
	})

	b.Run("German", func(b *testing.B) {
		text := "Falsches Üben von Xylophonmusik quält jeden größeren Zwerg"
		doBench(b, text)
	})

	b.Run("Russian", func(b *testing.B) {
		text := "В чащах юга жил бы цитрус? Да, но фальшивый экземпляр!"
		doBench(b, text)
	})

	b.Run("Japanese", func(b *testing.B) {
		text := "いろはにほへとちりぬるをイロハニホヘトチリヌルヲ"
		doBench(b, text)
	})
}