diff --git a/tar/common/utf8.go b/tar/common/utf8.go new file mode 100644 index 0000000..ffb1646 --- /dev/null +++ b/tar/common/utf8.go @@ -0,0 +1,21 @@ +package common + +// IsValidUtf8String checks for in valid UTF-8 characters +func IsValidUtf8String(s string) bool { + for _, r := range s { + if int(r) == 0xfffd { + return false + } + } + return true +} + +// IsValidUtf8Btyes checks for in valid UTF-8 characters +func IsValidUtf8Btyes(b []byte) bool { + for _, r := range string(b) { + if int(r) == 0xfffd { + return false + } + } + return true +} diff --git a/tar/common/utf8_test.go b/tar/common/utf8_test.go new file mode 100644 index 0000000..e546f55 --- /dev/null +++ b/tar/common/utf8_test.go @@ -0,0 +1,34 @@ +package common + +import "testing" + +func TestStringValidation(t *testing.T) { + cases := []struct { + value string + result bool + }{ + {"aä\uFFFD本☺", false}, + {"aä本☺", true}, + } + + for _, c := range cases { + if got := IsValidUtf8String(c.value); got != c.result { + t.Errorf("string %q - expected %v, got %v", c.value, c.result, got) + } + } +} +func TestBytesValidation(t *testing.T) { + cases := []struct { + value []byte + result bool + }{ + {[]byte{0xE4}, false}, + {[]byte("aä本☺"), true}, + } + + for _, c := range cases { + if got := IsValidUtf8Btyes(c.value); got != c.result { + t.Errorf("bytes %q - expected %v, got %v", c.value, c.result, got) + } + } +}