mirror of
https://github.com/vbatts/tar-split.git
synced 2024-11-29 02:55:39 +00:00
commit
7f56c08c48
9 changed files with 215 additions and 17 deletions
|
@ -39,7 +39,7 @@ func NewOutputTarStream(fg storage.FileGetter, up storage.Unpacker) io.ReadClose
|
||||||
if entry.Size == 0 {
|
if entry.Size == 0 {
|
||||||
continue
|
continue
|
||||||
}
|
}
|
||||||
fh, err := fg.Get(entry.Name)
|
fh, err := fg.Get(entry.GetName())
|
||||||
if err != nil {
|
if err != nil {
|
||||||
pw.CloseWithError(err)
|
pw.CloseWithError(err)
|
||||||
return
|
return
|
||||||
|
@ -56,7 +56,7 @@ func NewOutputTarStream(fg storage.FileGetter, up storage.Unpacker) io.ReadClose
|
||||||
// but since it's coming through the PipeReader, the context of
|
// but since it's coming through the PipeReader, the context of
|
||||||
// _which_ file would be lost...
|
// _which_ file would be lost...
|
||||||
fh.Close()
|
fh.Close()
|
||||||
pw.CloseWithError(fmt.Errorf("file integrity checksum failed for %q", entry.Name))
|
pw.CloseWithError(fmt.Errorf("file integrity checksum failed for %q", entry.GetName()))
|
||||||
return
|
return
|
||||||
}
|
}
|
||||||
fh.Close()
|
fh.Close()
|
||||||
|
|
|
@ -11,9 +11,38 @@ import (
|
||||||
"os"
|
"os"
|
||||||
"testing"
|
"testing"
|
||||||
|
|
||||||
|
"github.com/vbatts/tar-split/archive/tar"
|
||||||
|
"github.com/vbatts/tar-split/tar/common"
|
||||||
"github.com/vbatts/tar-split/tar/storage"
|
"github.com/vbatts/tar-split/tar/storage"
|
||||||
)
|
)
|
||||||
|
|
||||||
|
func TestISO8859(t *testing.T) {
|
||||||
|
fh, err := os.Open("./testdata/iso-8859.tar.gz")
|
||||||
|
if err != nil {
|
||||||
|
t.Fatal(err)
|
||||||
|
}
|
||||||
|
defer fh.Close()
|
||||||
|
gzRdr, err := gzip.NewReader(fh)
|
||||||
|
if err != nil {
|
||||||
|
t.Fatal(err)
|
||||||
|
}
|
||||||
|
defer gzRdr.Close()
|
||||||
|
tr := tar.NewReader(gzRdr)
|
||||||
|
for {
|
||||||
|
hdr, err := tr.Next()
|
||||||
|
if err != nil {
|
||||||
|
if err != io.EOF {
|
||||||
|
t.Error(err)
|
||||||
|
}
|
||||||
|
break
|
||||||
|
}
|
||||||
|
fmt.Println(hdr.Name)
|
||||||
|
if !common.IsValidUtf8String(hdr.Name) {
|
||||||
|
fmt.Println([]byte(hdr.Name))
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
var entries = []struct {
|
var entries = []struct {
|
||||||
Entry storage.Entry
|
Entry storage.Entry
|
||||||
Body []byte
|
Body []byte
|
||||||
|
@ -36,6 +65,15 @@ var entries = []struct {
|
||||||
},
|
},
|
||||||
Body: []byte("café con leche, por favor"),
|
Body: []byte("café con leche, por favor"),
|
||||||
},
|
},
|
||||||
|
{
|
||||||
|
Entry: storage.Entry{
|
||||||
|
Type: storage.FileType,
|
||||||
|
NameRaw: []byte{0x66, 0x69, 0x6c, 0x65, 0x2d, 0xe4}, // this is invalid UTF-8. Just checking the round trip.
|
||||||
|
Payload: []byte{126, 72, 89, 239, 230, 252, 160, 187},
|
||||||
|
Size: 26,
|
||||||
|
},
|
||||||
|
Body: []byte("café con leche, por favor"),
|
||||||
|
},
|
||||||
}
|
}
|
||||||
var entriesMangled = []struct {
|
var entriesMangled = []struct {
|
||||||
Entry storage.Entry
|
Entry storage.Entry
|
||||||
|
@ -61,6 +99,15 @@ var entriesMangled = []struct {
|
||||||
// san not con
|
// san not con
|
||||||
Body: []byte("café sans leche, por favor"),
|
Body: []byte("café sans leche, por favor"),
|
||||||
},
|
},
|
||||||
|
{
|
||||||
|
Entry: storage.Entry{
|
||||||
|
Type: storage.FileType,
|
||||||
|
NameRaw: []byte{0x66, 0x69, 0x6c, 0x65, 0x2d, 0xe4},
|
||||||
|
Payload: []byte{127, 72, 89, 239, 230, 252, 160, 187},
|
||||||
|
Size: 26,
|
||||||
|
},
|
||||||
|
Body: []byte("café con leche, por favor"),
|
||||||
|
},
|
||||||
}
|
}
|
||||||
|
|
||||||
func TestTarStreamMangledGetterPutter(t *testing.T) {
|
func TestTarStreamMangledGetterPutter(t *testing.T) {
|
||||||
|
@ -69,19 +116,19 @@ func TestTarStreamMangledGetterPutter(t *testing.T) {
|
||||||
// first lets prep a GetPutter and Packer
|
// first lets prep a GetPutter and Packer
|
||||||
for i := range entries {
|
for i := range entries {
|
||||||
if entries[i].Entry.Type == storage.FileType {
|
if entries[i].Entry.Type == storage.FileType {
|
||||||
j, csum, err := fgp.Put(entries[i].Entry.Name, bytes.NewBuffer(entries[i].Body))
|
j, csum, err := fgp.Put(entries[i].Entry.GetName(), bytes.NewBuffer(entries[i].Body))
|
||||||
if err != nil {
|
if err != nil {
|
||||||
t.Error(err)
|
t.Error(err)
|
||||||
}
|
}
|
||||||
if j != entries[i].Entry.Size {
|
if j != entries[i].Entry.Size {
|
||||||
t.Errorf("size %q: expected %d; got %d",
|
t.Errorf("size %q: expected %d; got %d",
|
||||||
entries[i].Entry.Name,
|
entries[i].Entry.GetName(),
|
||||||
entries[i].Entry.Size,
|
entries[i].Entry.Size,
|
||||||
j)
|
j)
|
||||||
}
|
}
|
||||||
if !bytes.Equal(csum, entries[i].Entry.Payload) {
|
if !bytes.Equal(csum, entries[i].Entry.Payload) {
|
||||||
t.Errorf("checksum %q: expected %v; got %v",
|
t.Errorf("checksum %q: expected %v; got %v",
|
||||||
entries[i].Entry.Name,
|
entries[i].Entry.GetName(),
|
||||||
entries[i].Entry.Payload,
|
entries[i].Entry.Payload,
|
||||||
csum)
|
csum)
|
||||||
}
|
}
|
||||||
|
@ -90,7 +137,7 @@ func TestTarStreamMangledGetterPutter(t *testing.T) {
|
||||||
|
|
||||||
for _, e := range entriesMangled {
|
for _, e := range entriesMangled {
|
||||||
if e.Entry.Type == storage.FileType {
|
if e.Entry.Type == storage.FileType {
|
||||||
rdr, err := fgp.Get(e.Entry.Name)
|
rdr, err := fgp.Get(e.Entry.GetName())
|
||||||
if err != nil {
|
if err != nil {
|
||||||
t.Error(err)
|
t.Error(err)
|
||||||
}
|
}
|
||||||
|
@ -105,7 +152,7 @@ func TestTarStreamMangledGetterPutter(t *testing.T) {
|
||||||
if bytes.Equal(csum, e.Entry.Payload) {
|
if bytes.Equal(csum, e.Entry.Payload) {
|
||||||
t.Errorf("wrote %d bytes. checksum for %q should not have matched! %v",
|
t.Errorf("wrote %d bytes. checksum for %q should not have matched! %v",
|
||||||
i,
|
i,
|
||||||
e.Entry.Name,
|
e.Entry.GetName(),
|
||||||
csum)
|
csum)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -121,6 +168,7 @@ func TestTarStream(t *testing.T) {
|
||||||
{"./testdata/t.tar.gz", "1eb237ff69bca6e22789ecb05b45d35ca307adbd", 10240},
|
{"./testdata/t.tar.gz", "1eb237ff69bca6e22789ecb05b45d35ca307adbd", 10240},
|
||||||
{"./testdata/longlink.tar.gz", "d9f6babe107b7247953dff6b5b5ae31a3a880add", 20480},
|
{"./testdata/longlink.tar.gz", "d9f6babe107b7247953dff6b5b5ae31a3a880add", 20480},
|
||||||
{"./testdata/fatlonglink.tar.gz", "8537f03f89aeef537382f8b0bb065d93e03b0be8", 26234880},
|
{"./testdata/fatlonglink.tar.gz", "8537f03f89aeef537382f8b0bb065d93e03b0be8", 26234880},
|
||||||
|
{"./testdata/iso-8859.tar.gz", "ddafa51cb03c74ec117ab366ee2240d13bba1ec3", 10240},
|
||||||
}
|
}
|
||||||
|
|
||||||
for _, tc := range testCases {
|
for _, tc := range testCases {
|
||||||
|
@ -163,7 +211,7 @@ func TestTarStream(t *testing.T) {
|
||||||
t.Fatalf("checksum of tar: expected %s; got %x", tc.expectedSHA1Sum, h0.Sum(nil))
|
t.Fatalf("checksum of tar: expected %s; got %x", tc.expectedSHA1Sum, h0.Sum(nil))
|
||||||
}
|
}
|
||||||
|
|
||||||
t.Logf("%s", w.String()) // if we fail, then show the packed info
|
//t.Logf("%s", w.String()) // if we fail, then show the packed info
|
||||||
|
|
||||||
// If we've made it this far, then we'll turn it around and create a tar
|
// If we've made it this far, then we'll turn it around and create a tar
|
||||||
// stream from the packed metadata and buffered file contents.
|
// stream from the packed metadata and buffered file contents.
|
||||||
|
|
|
@ -92,13 +92,16 @@ func NewInputTarStream(r io.Reader, p storage.Packer, fp storage.FilePutter) (io
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// File entries added, regardless of size
|
entry := storage.Entry{
|
||||||
_, err = p.AddEntry(storage.Entry{
|
|
||||||
Type: storage.FileType,
|
Type: storage.FileType,
|
||||||
Name: hdr.Name,
|
|
||||||
Size: hdr.Size,
|
Size: hdr.Size,
|
||||||
Payload: csum,
|
Payload: csum,
|
||||||
})
|
}
|
||||||
|
// For proper marshalling of non-utf8 characters
|
||||||
|
entry.SetName(hdr.Name)
|
||||||
|
|
||||||
|
// File entries added, regardless of size
|
||||||
|
_, err = p.AddEntry(entry)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
pW.CloseWithError(err)
|
pW.CloseWithError(err)
|
||||||
return
|
return
|
||||||
|
|
BIN
tar/asm/testdata/iso-8859.tar.gz
vendored
Normal file
BIN
tar/asm/testdata/iso-8859.tar.gz
vendored
Normal file
Binary file not shown.
22
tar/common/utf8.go
Normal file
22
tar/common/utf8.go
Normal file
|
@ -0,0 +1,22 @@
|
||||||
|
package common
|
||||||
|
|
||||||
|
// IsValidUtf8String checks for in valid UTF-8 characters
|
||||||
|
func IsValidUtf8String(s string) bool {
|
||||||
|
return InvalidUtf8Index([]byte(s)) == -1
|
||||||
|
}
|
||||||
|
|
||||||
|
// IsValidUtf8Btyes checks for in valid UTF-8 characters
|
||||||
|
func IsValidUtf8Btyes(b []byte) bool {
|
||||||
|
return InvalidUtf8Index(b) == -1
|
||||||
|
}
|
||||||
|
|
||||||
|
// InvalidUtf8Index returns the offset of the first invalid UTF-8 character.
|
||||||
|
// Default is to return -1 for a wholly valid sequence.
|
||||||
|
func InvalidUtf8Index(b []byte) int {
|
||||||
|
for i, r := range string(b) {
|
||||||
|
if int(r) == 0xfffd {
|
||||||
|
return i
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return -1
|
||||||
|
}
|
43
tar/common/utf8_test.go
Normal file
43
tar/common/utf8_test.go
Normal file
|
@ -0,0 +1,43 @@
|
||||||
|
package common
|
||||||
|
|
||||||
|
import "testing"
|
||||||
|
|
||||||
|
func TestStringValidation(t *testing.T) {
|
||||||
|
cases := []struct {
|
||||||
|
value string
|
||||||
|
result bool
|
||||||
|
offset int
|
||||||
|
}{
|
||||||
|
{"aä\uFFFD本☺", false, 3},
|
||||||
|
{"aä本☺", true, -1},
|
||||||
|
}
|
||||||
|
|
||||||
|
for _, c := range cases {
|
||||||
|
if i := InvalidUtf8Index([]byte(c.value)); i != c.offset {
|
||||||
|
t.Errorf("string %q - offset expected %d, got %d", c.value, c.offset, i)
|
||||||
|
}
|
||||||
|
if got := IsValidUtf8String(c.value); got != c.result {
|
||||||
|
t.Errorf("string %q - expected %v, got %v", c.value, c.result, got)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestBytesValidation(t *testing.T) {
|
||||||
|
cases := []struct {
|
||||||
|
value []byte
|
||||||
|
result bool
|
||||||
|
offset int
|
||||||
|
}{
|
||||||
|
{[]byte{0xE4}, false, 0},
|
||||||
|
{[]byte("aä本☺"), true, -1},
|
||||||
|
}
|
||||||
|
|
||||||
|
for _, c := range cases {
|
||||||
|
if i := InvalidUtf8Index(c.value); i != c.offset {
|
||||||
|
t.Errorf("bytes %q - offset expected %d, got %d", c.value, c.offset, i)
|
||||||
|
}
|
||||||
|
if got := IsValidUtf8Btyes(c.value); got != c.result {
|
||||||
|
t.Errorf("bytes %q - expected %v, got %v", c.value, c.result, got)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
|
@ -1,5 +1,11 @@
|
||||||
package storage
|
package storage
|
||||||
|
|
||||||
|
import (
|
||||||
|
"fmt"
|
||||||
|
|
||||||
|
"github.com/vbatts/tar-split/tar/common"
|
||||||
|
)
|
||||||
|
|
||||||
// Entries is for sorting by Position
|
// Entries is for sorting by Position
|
||||||
type Entries []Entry
|
type Entries []Entry
|
||||||
|
|
||||||
|
@ -33,7 +39,44 @@ const (
|
||||||
type Entry struct {
|
type Entry struct {
|
||||||
Type Type `json:"type"`
|
Type Type `json:"type"`
|
||||||
Name string `json:"name,omitempty"`
|
Name string `json:"name,omitempty"`
|
||||||
|
NameRaw []byte `json:"name_raw,omitempty"`
|
||||||
Size int64 `json:"size,omitempty"`
|
Size int64 `json:"size,omitempty"`
|
||||||
Payload []byte `json:"payload"` // SegmentType stores payload here; FileType stores crc64 checksum here;
|
Payload []byte `json:"payload"` // SegmentType stores payload here; FileType stores crc64 checksum here;
|
||||||
Position int `json:"position"`
|
Position int `json:"position"`
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// SetName will check name for valid UTF-8 string, and set the appropriate
|
||||||
|
// field. See https://github.com/vbatts/tar-split/issues/17
|
||||||
|
func (e *Entry) SetName(name string) {
|
||||||
|
if common.IsValidUtf8String(name) {
|
||||||
|
e.Name = name
|
||||||
|
} else {
|
||||||
|
e.NameRaw = []byte(name)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// SetNameBytes will check name for valid UTF-8 string, and set the appropriate
|
||||||
|
// field
|
||||||
|
func (e *Entry) SetNameBytes(name []byte) {
|
||||||
|
if !common.IsValidUtf8Btyes(name) {
|
||||||
|
e.NameRaw = name
|
||||||
|
} else {
|
||||||
|
e.Name = string(name)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// GetName returns the string for the entry's name, regardless of the field stored in
|
||||||
|
func (e *Entry) GetName() string {
|
||||||
|
if len(e.NameRaw) > 0 {
|
||||||
|
return fmt.Sprintf("%s", e.NameRaw)
|
||||||
|
}
|
||||||
|
return e.Name
|
||||||
|
}
|
||||||
|
|
||||||
|
// GetNameBytes returns the bytes for the entry's name, regardless of the field stored in
|
||||||
|
func (e *Entry) GetNameBytes() []byte {
|
||||||
|
if len(e.NameRaw) > 0 {
|
||||||
|
return e.NameRaw
|
||||||
|
}
|
||||||
|
return []byte(e.Name)
|
||||||
|
}
|
||||||
|
|
|
@ -39,10 +39,10 @@ func TestEntries(t *testing.T) {
|
||||||
func TestFile(t *testing.T) {
|
func TestFile(t *testing.T) {
|
||||||
f := Entry{
|
f := Entry{
|
||||||
Type: FileType,
|
Type: FileType,
|
||||||
Name: "./hello.txt",
|
|
||||||
Size: 100,
|
Size: 100,
|
||||||
Position: 2,
|
Position: 2,
|
||||||
}
|
}
|
||||||
|
f.SetName("./hello.txt")
|
||||||
|
|
||||||
buf, err := json.Marshal(f)
|
buf, err := json.Marshal(f)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
|
@ -54,8 +54,37 @@ func TestFile(t *testing.T) {
|
||||||
t.Fatal(err)
|
t.Fatal(err)
|
||||||
}
|
}
|
||||||
|
|
||||||
if f.Name != f1.Name {
|
if f.GetName() != f1.GetName() {
|
||||||
t.Errorf("expected Name %q, got %q", f.Name, f1.Name)
|
t.Errorf("expected Name %q, got %q", f.GetName(), f1.GetName())
|
||||||
|
}
|
||||||
|
if f.Size != f1.Size {
|
||||||
|
t.Errorf("expected Size %q, got %q", f.Size, f1.Size)
|
||||||
|
}
|
||||||
|
if f.Position != f1.Position {
|
||||||
|
t.Errorf("expected Position %q, got %q", f.Position, f1.Position)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestFileRaw(t *testing.T) {
|
||||||
|
f := Entry{
|
||||||
|
Type: FileType,
|
||||||
|
Size: 100,
|
||||||
|
Position: 2,
|
||||||
|
}
|
||||||
|
f.SetNameBytes([]byte{0x2E, 0x2F, 0x68, 0x65, 0x6C, 0x6C, 0x6F, 0xE4, 0x2E, 0x74, 0x78, 0x74})
|
||||||
|
|
||||||
|
buf, err := json.Marshal(f)
|
||||||
|
if err != nil {
|
||||||
|
t.Fatal(err)
|
||||||
|
}
|
||||||
|
|
||||||
|
f1 := Entry{}
|
||||||
|
if err = json.Unmarshal(buf, &f1); err != nil {
|
||||||
|
t.Fatal(err)
|
||||||
|
}
|
||||||
|
|
||||||
|
if f.GetName() != f1.GetName() {
|
||||||
|
t.Errorf("expected Name %q, got %q", f.GetName(), f1.GetName())
|
||||||
}
|
}
|
||||||
if f.Size != f1.Size {
|
if f.Size != f1.Size {
|
||||||
t.Errorf("expected Size %q, got %q", f.Size, f1.Size)
|
t.Errorf("expected Size %q, got %q", f.Size, f1.Size)
|
||||||
|
|
|
@ -6,6 +6,8 @@ import (
|
||||||
"errors"
|
"errors"
|
||||||
"io"
|
"io"
|
||||||
"path/filepath"
|
"path/filepath"
|
||||||
|
|
||||||
|
"github.com/vbatts/tar-split/tar/common"
|
||||||
)
|
)
|
||||||
|
|
||||||
// ErrDuplicatePath occurs when a tar archive has more than one entry for the
|
// ErrDuplicatePath occurs when a tar archive has more than one entry for the
|
||||||
|
@ -61,7 +63,7 @@ func (jup *jsonUnpacker) Next() (*Entry, error) {
|
||||||
|
|
||||||
// check for dup name
|
// check for dup name
|
||||||
if e.Type == FileType {
|
if e.Type == FileType {
|
||||||
cName := filepath.Clean(e.Name)
|
cName := filepath.Clean(e.GetName())
|
||||||
if _, ok := jup.seen[cName]; ok {
|
if _, ok := jup.seen[cName]; ok {
|
||||||
return nil, ErrDuplicatePath
|
return nil, ErrDuplicatePath
|
||||||
}
|
}
|
||||||
|
@ -93,9 +95,17 @@ type jsonPacker struct {
|
||||||
type seenNames map[string]struct{}
|
type seenNames map[string]struct{}
|
||||||
|
|
||||||
func (jp *jsonPacker) AddEntry(e Entry) (int, error) {
|
func (jp *jsonPacker) AddEntry(e Entry) (int, error) {
|
||||||
|
// if Name is not valid utf8, switch it to raw first.
|
||||||
|
if e.Name != "" {
|
||||||
|
if !common.IsValidUtf8String(e.Name) {
|
||||||
|
e.NameRaw = []byte(e.Name)
|
||||||
|
e.Name = ""
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
// check early for dup name
|
// check early for dup name
|
||||||
if e.Type == FileType {
|
if e.Type == FileType {
|
||||||
cName := filepath.Clean(e.Name)
|
cName := filepath.Clean(e.GetName())
|
||||||
if _, ok := jp.seen[cName]; ok {
|
if _, ok := jp.seen[cName]; ok {
|
||||||
return -1, ErrDuplicatePath
|
return -1, ErrDuplicatePath
|
||||||
}
|
}
|
||||||
|
|
Loading…
Reference in a new issue