From cde639172fb276d8fbc3e0bbee73791315e30f04 Mon Sep 17 00:00:00 2001 From: Vincent Batts Date: Wed, 23 Sep 2015 15:24:15 -0400 Subject: [PATCH] tar/asm: work with non-utf8 entry names --- tar/asm/assemble.go | 4 +-- tar/asm/assemble_test.go | 60 +++++++++++++++++++++++++++---- tar/asm/disassemble.go | 11 +++--- tar/asm/testdata/iso-8859.tar.gz | Bin 0 -> 187 bytes 4 files changed, 63 insertions(+), 12 deletions(-) create mode 100644 tar/asm/testdata/iso-8859.tar.gz diff --git a/tar/asm/assemble.go b/tar/asm/assemble.go index 74317cb..83d6426 100644 --- a/tar/asm/assemble.go +++ b/tar/asm/assemble.go @@ -39,7 +39,7 @@ func NewOutputTarStream(fg storage.FileGetter, up storage.Unpacker) io.ReadClose if entry.Size == 0 { continue } - fh, err := fg.Get(entry.Name) + fh, err := fg.Get(entry.GetName()) if err != nil { pw.CloseWithError(err) return @@ -56,7 +56,7 @@ func NewOutputTarStream(fg storage.FileGetter, up storage.Unpacker) io.ReadClose // but since it's coming through the PipeReader, the context of // _which_ file would be lost... fh.Close() - pw.CloseWithError(fmt.Errorf("file integrity checksum failed for %q", entry.Name)) + pw.CloseWithError(fmt.Errorf("file integrity checksum failed for %q", entry.GetName())) return } fh.Close() diff --git a/tar/asm/assemble_test.go b/tar/asm/assemble_test.go index da515f2..e7609c0 100644 --- a/tar/asm/assemble_test.go +++ b/tar/asm/assemble_test.go @@ -11,9 +11,38 @@ import ( "os" "testing" + "github.com/vbatts/tar-split/archive/tar" + "github.com/vbatts/tar-split/tar/common" "github.com/vbatts/tar-split/tar/storage" ) +func TestISO8859(t *testing.T) { + fh, err := os.Open("./testdata/iso-8859.tar.gz") + if err != nil { + t.Fatal(err) + } + defer fh.Close() + gzRdr, err := gzip.NewReader(fh) + if err != nil { + t.Fatal(err) + } + defer gzRdr.Close() + tr := tar.NewReader(gzRdr) + for { + hdr, err := tr.Next() + if err != nil { + if err != io.EOF { + t.Error(err) + } + break + } + fmt.Println(hdr.Name) + if !common.IsValidUtf8String(hdr.Name) { + fmt.Println([]byte(hdr.Name)) + } + } +} + var entries = []struct { Entry storage.Entry Body []byte @@ -36,6 +65,15 @@ var entries = []struct { }, Body: []byte("café con leche, por favor"), }, + { + Entry: storage.Entry{ + Type: storage.FileType, + NameRaw: []byte{0x66, 0x69, 0x6c, 0x65, 0x2d, 0xe4}, // this is invalid UTF-8. Just checking the round trip. + Payload: []byte{126, 72, 89, 239, 230, 252, 160, 187}, + Size: 26, + }, + Body: []byte("café con leche, por favor"), + }, } var entriesMangled = []struct { Entry storage.Entry @@ -61,6 +99,15 @@ var entriesMangled = []struct { // san not con Body: []byte("café sans leche, por favor"), }, + { + Entry: storage.Entry{ + Type: storage.FileType, + NameRaw: []byte{0x66, 0x69, 0x6c, 0x65, 0x2d, 0xe4}, + Payload: []byte{127, 72, 89, 239, 230, 252, 160, 187}, + Size: 26, + }, + Body: []byte("café con leche, por favor"), + }, } func TestTarStreamMangledGetterPutter(t *testing.T) { @@ -69,19 +116,19 @@ func TestTarStreamMangledGetterPutter(t *testing.T) { // first lets prep a GetPutter and Packer for i := range entries { if entries[i].Entry.Type == storage.FileType { - j, csum, err := fgp.Put(entries[i].Entry.Name, bytes.NewBuffer(entries[i].Body)) + j, csum, err := fgp.Put(entries[i].Entry.GetName(), bytes.NewBuffer(entries[i].Body)) if err != nil { t.Error(err) } if j != entries[i].Entry.Size { t.Errorf("size %q: expected %d; got %d", - entries[i].Entry.Name, + entries[i].Entry.GetName(), entries[i].Entry.Size, j) } if !bytes.Equal(csum, entries[i].Entry.Payload) { t.Errorf("checksum %q: expected %v; got %v", - entries[i].Entry.Name, + entries[i].Entry.GetName(), entries[i].Entry.Payload, csum) } @@ -90,7 +137,7 @@ func TestTarStreamMangledGetterPutter(t *testing.T) { for _, e := range entriesMangled { if e.Entry.Type == storage.FileType { - rdr, err := fgp.Get(e.Entry.Name) + rdr, err := fgp.Get(e.Entry.GetName()) if err != nil { t.Error(err) } @@ -105,7 +152,7 @@ func TestTarStreamMangledGetterPutter(t *testing.T) { if bytes.Equal(csum, e.Entry.Payload) { t.Errorf("wrote %d bytes. checksum for %q should not have matched! %v", i, - e.Entry.Name, + e.Entry.GetName(), csum) } } @@ -121,6 +168,7 @@ func TestTarStream(t *testing.T) { {"./testdata/t.tar.gz", "1eb237ff69bca6e22789ecb05b45d35ca307adbd", 10240}, {"./testdata/longlink.tar.gz", "d9f6babe107b7247953dff6b5b5ae31a3a880add", 20480}, {"./testdata/fatlonglink.tar.gz", "8537f03f89aeef537382f8b0bb065d93e03b0be8", 26234880}, + {"./testdata/iso-8859.tar.gz", "ddafa51cb03c74ec117ab366ee2240d13bba1ec3", 10240}, } for _, tc := range testCases { @@ -163,7 +211,7 @@ func TestTarStream(t *testing.T) { t.Fatalf("checksum of tar: expected %s; got %x", tc.expectedSHA1Sum, h0.Sum(nil)) } - t.Logf("%s", w.String()) // if we fail, then show the packed info + //t.Logf("%s", w.String()) // if we fail, then show the packed info // If we've made it this far, then we'll turn it around and create a tar // stream from the packed metadata and buffered file contents. diff --git a/tar/asm/disassemble.go b/tar/asm/disassemble.go index 7986890..54ef23a 100644 --- a/tar/asm/disassemble.go +++ b/tar/asm/disassemble.go @@ -92,13 +92,16 @@ func NewInputTarStream(r io.Reader, p storage.Packer, fp storage.FilePutter) (io } } - // File entries added, regardless of size - _, err = p.AddEntry(storage.Entry{ + entry := storage.Entry{ Type: storage.FileType, - Name: hdr.Name, Size: hdr.Size, Payload: csum, - }) + } + // For proper marshalling of non-utf8 characters + entry.SetName(hdr.Name) + + // File entries added, regardless of size + _, err = p.AddEntry(entry) if err != nil { pW.CloseWithError(err) return diff --git a/tar/asm/testdata/iso-8859.tar.gz b/tar/asm/testdata/iso-8859.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..3e87f30a45f5dbf742a51c5c8252688452aeb2d0 GIT binary patch literal 187 zcmb2|=HU3ek133aIkPxl*TTZoQm-Vjh~e!eN3KH#0uC3~*t+To%(z?)O8{fJKT(T_prp5-2ZE8zMWtG k&#}fJ?8SbI-rs7jz2?@jhmJ8IgNS~1mB~df7&I6d0D{m~djJ3c literal 0 HcmV?d00001