forked from mirrors/tar-split
common: remove in favor of stdlib unicode/utf8
This commit is contained in:
parent
7ef16e6f67
commit
7e38cefd4b
5 changed files with 9 additions and 75 deletions
|
@ -10,9 +10,9 @@ import (
|
||||||
"io/ioutil"
|
"io/ioutil"
|
||||||
"os"
|
"os"
|
||||||
"testing"
|
"testing"
|
||||||
|
"unicode/utf8"
|
||||||
|
|
||||||
"github.com/vbatts/tar-split/archive/tar"
|
"github.com/vbatts/tar-split/archive/tar"
|
||||||
"github.com/vbatts/tar-split/tar/common"
|
|
||||||
"github.com/vbatts/tar-split/tar/storage"
|
"github.com/vbatts/tar-split/tar/storage"
|
||||||
)
|
)
|
||||||
|
|
||||||
|
@ -37,7 +37,7 @@ func TestISO8859(t *testing.T) {
|
||||||
break
|
break
|
||||||
}
|
}
|
||||||
fmt.Println(hdr.Name)
|
fmt.Println(hdr.Name)
|
||||||
if !common.IsValidUtf8String(hdr.Name) {
|
if !utf8.ValidString(hdr.Name) {
|
||||||
fmt.Println([]byte(hdr.Name))
|
fmt.Println([]byte(hdr.Name))
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -1,22 +0,0 @@
|
||||||
package common
|
|
||||||
|
|
||||||
// IsValidUtf8String checks for in valid UTF-8 characters
|
|
||||||
func IsValidUtf8String(s string) bool {
|
|
||||||
return InvalidUtf8Index([]byte(s)) == -1
|
|
||||||
}
|
|
||||||
|
|
||||||
// IsValidUtf8Btyes checks for in valid UTF-8 characters
|
|
||||||
func IsValidUtf8Btyes(b []byte) bool {
|
|
||||||
return InvalidUtf8Index(b) == -1
|
|
||||||
}
|
|
||||||
|
|
||||||
// InvalidUtf8Index returns the offset of the first invalid UTF-8 character.
|
|
||||||
// Default is to return -1 for a wholly valid sequence.
|
|
||||||
func InvalidUtf8Index(b []byte) int {
|
|
||||||
for i, r := range string(b) {
|
|
||||||
if int(r) == 0xfffd {
|
|
||||||
return i
|
|
||||||
}
|
|
||||||
}
|
|
||||||
return -1
|
|
||||||
}
|
|
|
@ -1,43 +0,0 @@
|
||||||
package common
|
|
||||||
|
|
||||||
import "testing"
|
|
||||||
|
|
||||||
func TestStringValidation(t *testing.T) {
|
|
||||||
cases := []struct {
|
|
||||||
value string
|
|
||||||
result bool
|
|
||||||
offset int
|
|
||||||
}{
|
|
||||||
{"aä\uFFFD本☺", false, 3},
|
|
||||||
{"aä本☺", true, -1},
|
|
||||||
}
|
|
||||||
|
|
||||||
for _, c := range cases {
|
|
||||||
if i := InvalidUtf8Index([]byte(c.value)); i != c.offset {
|
|
||||||
t.Errorf("string %q - offset expected %d, got %d", c.value, c.offset, i)
|
|
||||||
}
|
|
||||||
if got := IsValidUtf8String(c.value); got != c.result {
|
|
||||||
t.Errorf("string %q - expected %v, got %v", c.value, c.result, got)
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
func TestBytesValidation(t *testing.T) {
|
|
||||||
cases := []struct {
|
|
||||||
value []byte
|
|
||||||
result bool
|
|
||||||
offset int
|
|
||||||
}{
|
|
||||||
{[]byte{0xE4}, false, 0},
|
|
||||||
{[]byte("aä本☺"), true, -1},
|
|
||||||
}
|
|
||||||
|
|
||||||
for _, c := range cases {
|
|
||||||
if i := InvalidUtf8Index(c.value); i != c.offset {
|
|
||||||
t.Errorf("bytes %q - offset expected %d, got %d", c.value, c.offset, i)
|
|
||||||
}
|
|
||||||
if got := IsValidUtf8Btyes(c.value); got != c.result {
|
|
||||||
t.Errorf("bytes %q - expected %v, got %v", c.value, c.result, got)
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
|
@ -1,6 +1,6 @@
|
||||||
package storage
|
package storage
|
||||||
|
|
||||||
import "github.com/vbatts/tar-split/tar/common"
|
import "unicode/utf8"
|
||||||
|
|
||||||
// Entries is for sorting by Position
|
// Entries is for sorting by Position
|
||||||
type Entries []Entry
|
type Entries []Entry
|
||||||
|
@ -44,7 +44,7 @@ type Entry struct {
|
||||||
// SetName will check name for valid UTF-8 string, and set the appropriate
|
// SetName will check name for valid UTF-8 string, and set the appropriate
|
||||||
// field. See https://github.com/vbatts/tar-split/issues/17
|
// field. See https://github.com/vbatts/tar-split/issues/17
|
||||||
func (e *Entry) SetName(name string) {
|
func (e *Entry) SetName(name string) {
|
||||||
if common.IsValidUtf8String(name) {
|
if utf8.ValidString(name) {
|
||||||
e.Name = name
|
e.Name = name
|
||||||
} else {
|
} else {
|
||||||
e.NameRaw = []byte(name)
|
e.NameRaw = []byte(name)
|
||||||
|
@ -54,10 +54,10 @@ func (e *Entry) SetName(name string) {
|
||||||
// SetNameBytes will check name for valid UTF-8 string, and set the appropriate
|
// SetNameBytes will check name for valid UTF-8 string, and set the appropriate
|
||||||
// field
|
// field
|
||||||
func (e *Entry) SetNameBytes(name []byte) {
|
func (e *Entry) SetNameBytes(name []byte) {
|
||||||
if !common.IsValidUtf8Btyes(name) {
|
if utf8.Valid(name) {
|
||||||
e.NameRaw = name
|
|
||||||
} else {
|
|
||||||
e.Name = string(name)
|
e.Name = string(name)
|
||||||
|
} else {
|
||||||
|
e.NameRaw = name
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -6,8 +6,7 @@ import (
|
||||||
"errors"
|
"errors"
|
||||||
"io"
|
"io"
|
||||||
"path/filepath"
|
"path/filepath"
|
||||||
|
"unicode/utf8"
|
||||||
"github.com/vbatts/tar-split/tar/common"
|
|
||||||
)
|
)
|
||||||
|
|
||||||
// ErrDuplicatePath occurs when a tar archive has more than one entry for the
|
// ErrDuplicatePath occurs when a tar archive has more than one entry for the
|
||||||
|
@ -97,7 +96,7 @@ type seenNames map[string]struct{}
|
||||||
func (jp *jsonPacker) AddEntry(e Entry) (int, error) {
|
func (jp *jsonPacker) AddEntry(e Entry) (int, error) {
|
||||||
// if Name is not valid utf8, switch it to raw first.
|
// if Name is not valid utf8, switch it to raw first.
|
||||||
if e.Name != "" {
|
if e.Name != "" {
|
||||||
if !common.IsValidUtf8String(e.Name) {
|
if !utf8.ValidString(e.Name) {
|
||||||
e.NameRaw = []byte(e.Name)
|
e.NameRaw = []byte(e.Name)
|
||||||
e.Name = ""
|
e.Name = ""
|
||||||
}
|
}
|
||||||
|
|
Loading…
Reference in a new issue