diff --git a/tar/storage/entry.go b/tar/storage/entry.go index 38fe7ba..a152ac2 100644 --- a/tar/storage/entry.go +++ b/tar/storage/entry.go @@ -1,5 +1,11 @@ package storage +import ( + "fmt" + + "github.com/vbatts/tar-split/tar/common" +) + // Entries is for sorting by Position type Entries []Entry @@ -33,7 +39,44 @@ const ( type Entry struct { Type Type `json:"type"` Name string `json:"name,omitempty"` + NameRaw []byte `json:"name_raw,omitempty"` Size int64 `json:"size,omitempty"` Payload []byte `json:"payload"` // SegmentType stores payload here; FileType stores crc64 checksum here; Position int `json:"position"` } + +// SetName will check name for valid UTF-8 string, and set the appropriate +// field. See https://github.com/vbatts/tar-split/issues/17 +func (e *Entry) SetName(name string) { + if common.IsValidUtf8String(name) { + e.Name = name + } else { + e.NameRaw = []byte(name) + } +} + +// SetNameBytes will check name for valid UTF-8 string, and set the appropriate +// field +func (e *Entry) SetNameBytes(name []byte) { + if !common.IsValidUtf8Btyes(name) { + e.NameRaw = name + } else { + e.Name = string(name) + } +} + +// GetName returns the string for the entry's name, regardless of the field stored in +func (e *Entry) GetName() string { + if len(e.NameRaw) > 0 { + return fmt.Sprintf("%s", e.NameRaw) + } + return e.Name +} + +// GetNameBytes returns the bytes for the entry's name, regardless of the field stored in +func (e *Entry) GetNameBytes() []byte { + if len(e.NameRaw) > 0 { + return e.NameRaw + } + return []byte(e.Name) +} diff --git a/tar/storage/entry_test.go b/tar/storage/entry_test.go index c797bca..90d103e 100644 --- a/tar/storage/entry_test.go +++ b/tar/storage/entry_test.go @@ -39,10 +39,10 @@ func TestEntries(t *testing.T) { func TestFile(t *testing.T) { f := Entry{ Type: FileType, - Name: "./hello.txt", Size: 100, Position: 2, } + f.SetName("./hello.txt") buf, err := json.Marshal(f) if err != nil { @@ -54,8 +54,37 @@ func TestFile(t *testing.T) { t.Fatal(err) } - if f.Name != f1.Name { - t.Errorf("expected Name %q, got %q", f.Name, f1.Name) + if f.GetName() != f1.GetName() { + t.Errorf("expected Name %q, got %q", f.GetName(), f1.GetName()) + } + if f.Size != f1.Size { + t.Errorf("expected Size %q, got %q", f.Size, f1.Size) + } + if f.Position != f1.Position { + t.Errorf("expected Position %q, got %q", f.Position, f1.Position) + } +} + +func TestFileRaw(t *testing.T) { + f := Entry{ + Type: FileType, + Size: 100, + Position: 2, + } + f.SetNameBytes([]byte{0x2E, 0x2F, 0x68, 0x65, 0x6C, 0x6C, 0x6F, 0xE4, 0x2E, 0x74, 0x78, 0x74}) + + buf, err := json.Marshal(f) + if err != nil { + t.Fatal(err) + } + + f1 := Entry{} + if err = json.Unmarshal(buf, &f1); err != nil { + t.Fatal(err) + } + + if f.GetName() != f1.GetName() { + t.Errorf("expected Name %q, got %q", f.GetName(), f1.GetName()) } if f.Size != f1.Size { t.Errorf("expected Size %q, got %q", f.Size, f1.Size) diff --git a/tar/storage/packer.go b/tar/storage/packer.go index a02a19a..1ea8208 100644 --- a/tar/storage/packer.go +++ b/tar/storage/packer.go @@ -6,6 +6,8 @@ import ( "errors" "io" "path/filepath" + + "github.com/vbatts/tar-split/tar/common" ) // ErrDuplicatePath occurs when a tar archive has more than one entry for the @@ -61,7 +63,7 @@ func (jup *jsonUnpacker) Next() (*Entry, error) { // check for dup name if e.Type == FileType { - cName := filepath.Clean(e.Name) + cName := filepath.Clean(e.GetName()) if _, ok := jup.seen[cName]; ok { return nil, ErrDuplicatePath } @@ -93,9 +95,17 @@ type jsonPacker struct { type seenNames map[string]struct{} func (jp *jsonPacker) AddEntry(e Entry) (int, error) { + // if Name is not valid utf8, switch it to raw first. + if e.Name != "" { + if !common.IsValidUtf8String(e.Name) { + e.NameRaw = []byte(e.Name) + e.Name = "" + } + } + // check early for dup name if e.Type == FileType { - cName := filepath.Clean(e.Name) + cName := filepath.Clean(e.GetName()) if _, ok := jp.seen[cName]; ok { return -1, ErrDuplicatePath }