forked from mirrors/tar-split
tar/storage: work with raw (invalid utf8) names
When the entry name is not UTF-8, for example ISO-8859-1, then store the raw bytes. To accommodate this, we will have getters and setters for the entry's name now. Since this most heavily affects the json marshalling, we'll double check the sanity of the name before storing it in the JSONPacker.
This commit is contained in:
parent
39d06b9dc4
commit
032efafc29
3 changed files with 87 additions and 5 deletions
|
@ -1,5 +1,11 @@
|
||||||
package storage
|
package storage
|
||||||
|
|
||||||
|
import (
|
||||||
|
"fmt"
|
||||||
|
|
||||||
|
"github.com/vbatts/tar-split/tar/common"
|
||||||
|
)
|
||||||
|
|
||||||
// Entries is for sorting by Position
|
// Entries is for sorting by Position
|
||||||
type Entries []Entry
|
type Entries []Entry
|
||||||
|
|
||||||
|
@ -33,7 +39,44 @@ const (
|
||||||
type Entry struct {
|
type Entry struct {
|
||||||
Type Type `json:"type"`
|
Type Type `json:"type"`
|
||||||
Name string `json:"name,omitempty"`
|
Name string `json:"name,omitempty"`
|
||||||
|
NameRaw []byte `json:"name_raw,omitempty"`
|
||||||
Size int64 `json:"size,omitempty"`
|
Size int64 `json:"size,omitempty"`
|
||||||
Payload []byte `json:"payload"` // SegmentType stores payload here; FileType stores crc64 checksum here;
|
Payload []byte `json:"payload"` // SegmentType stores payload here; FileType stores crc64 checksum here;
|
||||||
Position int `json:"position"`
|
Position int `json:"position"`
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// SetName will check name for valid UTF-8 string, and set the appropriate
|
||||||
|
// field. See https://github.com/vbatts/tar-split/issues/17
|
||||||
|
func (e *Entry) SetName(name string) {
|
||||||
|
if common.IsValidUtf8String(name) {
|
||||||
|
e.Name = name
|
||||||
|
} else {
|
||||||
|
e.NameRaw = []byte(name)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// SetNameBytes will check name for valid UTF-8 string, and set the appropriate
|
||||||
|
// field
|
||||||
|
func (e *Entry) SetNameBytes(name []byte) {
|
||||||
|
if !common.IsValidUtf8Btyes(name) {
|
||||||
|
e.NameRaw = name
|
||||||
|
} else {
|
||||||
|
e.Name = string(name)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// GetName returns the string for the entry's name, regardless of the field stored in
|
||||||
|
func (e *Entry) GetName() string {
|
||||||
|
if len(e.NameRaw) > 0 {
|
||||||
|
return fmt.Sprintf("%s", e.NameRaw)
|
||||||
|
}
|
||||||
|
return e.Name
|
||||||
|
}
|
||||||
|
|
||||||
|
// GetNameBytes returns the bytes for the entry's name, regardless of the field stored in
|
||||||
|
func (e *Entry) GetNameBytes() []byte {
|
||||||
|
if len(e.NameRaw) > 0 {
|
||||||
|
return e.NameRaw
|
||||||
|
}
|
||||||
|
return []byte(e.Name)
|
||||||
|
}
|
||||||
|
|
|
@ -39,10 +39,10 @@ func TestEntries(t *testing.T) {
|
||||||
func TestFile(t *testing.T) {
|
func TestFile(t *testing.T) {
|
||||||
f := Entry{
|
f := Entry{
|
||||||
Type: FileType,
|
Type: FileType,
|
||||||
Name: "./hello.txt",
|
|
||||||
Size: 100,
|
Size: 100,
|
||||||
Position: 2,
|
Position: 2,
|
||||||
}
|
}
|
||||||
|
f.SetName("./hello.txt")
|
||||||
|
|
||||||
buf, err := json.Marshal(f)
|
buf, err := json.Marshal(f)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
|
@ -54,8 +54,37 @@ func TestFile(t *testing.T) {
|
||||||
t.Fatal(err)
|
t.Fatal(err)
|
||||||
}
|
}
|
||||||
|
|
||||||
if f.Name != f1.Name {
|
if f.GetName() != f1.GetName() {
|
||||||
t.Errorf("expected Name %q, got %q", f.Name, f1.Name)
|
t.Errorf("expected Name %q, got %q", f.GetName(), f1.GetName())
|
||||||
|
}
|
||||||
|
if f.Size != f1.Size {
|
||||||
|
t.Errorf("expected Size %q, got %q", f.Size, f1.Size)
|
||||||
|
}
|
||||||
|
if f.Position != f1.Position {
|
||||||
|
t.Errorf("expected Position %q, got %q", f.Position, f1.Position)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestFileRaw(t *testing.T) {
|
||||||
|
f := Entry{
|
||||||
|
Type: FileType,
|
||||||
|
Size: 100,
|
||||||
|
Position: 2,
|
||||||
|
}
|
||||||
|
f.SetNameBytes([]byte{0x2E, 0x2F, 0x68, 0x65, 0x6C, 0x6C, 0x6F, 0xE4, 0x2E, 0x74, 0x78, 0x74})
|
||||||
|
|
||||||
|
buf, err := json.Marshal(f)
|
||||||
|
if err != nil {
|
||||||
|
t.Fatal(err)
|
||||||
|
}
|
||||||
|
|
||||||
|
f1 := Entry{}
|
||||||
|
if err = json.Unmarshal(buf, &f1); err != nil {
|
||||||
|
t.Fatal(err)
|
||||||
|
}
|
||||||
|
|
||||||
|
if f.GetName() != f1.GetName() {
|
||||||
|
t.Errorf("expected Name %q, got %q", f.GetName(), f1.GetName())
|
||||||
}
|
}
|
||||||
if f.Size != f1.Size {
|
if f.Size != f1.Size {
|
||||||
t.Errorf("expected Size %q, got %q", f.Size, f1.Size)
|
t.Errorf("expected Size %q, got %q", f.Size, f1.Size)
|
||||||
|
|
|
@ -6,6 +6,8 @@ import (
|
||||||
"errors"
|
"errors"
|
||||||
"io"
|
"io"
|
||||||
"path/filepath"
|
"path/filepath"
|
||||||
|
|
||||||
|
"github.com/vbatts/tar-split/tar/common"
|
||||||
)
|
)
|
||||||
|
|
||||||
// ErrDuplicatePath occurs when a tar archive has more than one entry for the
|
// ErrDuplicatePath occurs when a tar archive has more than one entry for the
|
||||||
|
@ -61,7 +63,7 @@ func (jup *jsonUnpacker) Next() (*Entry, error) {
|
||||||
|
|
||||||
// check for dup name
|
// check for dup name
|
||||||
if e.Type == FileType {
|
if e.Type == FileType {
|
||||||
cName := filepath.Clean(e.Name)
|
cName := filepath.Clean(e.GetName())
|
||||||
if _, ok := jup.seen[cName]; ok {
|
if _, ok := jup.seen[cName]; ok {
|
||||||
return nil, ErrDuplicatePath
|
return nil, ErrDuplicatePath
|
||||||
}
|
}
|
||||||
|
@ -93,9 +95,17 @@ type jsonPacker struct {
|
||||||
type seenNames map[string]struct{}
|
type seenNames map[string]struct{}
|
||||||
|
|
||||||
func (jp *jsonPacker) AddEntry(e Entry) (int, error) {
|
func (jp *jsonPacker) AddEntry(e Entry) (int, error) {
|
||||||
|
// if Name is not valid utf8, switch it to raw first.
|
||||||
|
if e.Name != "" {
|
||||||
|
if !common.IsValidUtf8String(e.Name) {
|
||||||
|
e.NameRaw = []byte(e.Name)
|
||||||
|
e.Name = ""
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
// check early for dup name
|
// check early for dup name
|
||||||
if e.Type == FileType {
|
if e.Type == FileType {
|
||||||
cName := filepath.Clean(e.Name)
|
cName := filepath.Clean(e.GetName())
|
||||||
if _, ok := jp.seen[cName]; ok {
|
if _, ok := jp.seen[cName]; ok {
|
||||||
return -1, ErrDuplicatePath
|
return -1, ErrDuplicatePath
|
||||||
}
|
}
|
||||||
|
|
Loading…
Reference in a new issue