From 51b0481d4aecf1c051b1dfc942ab46986e776bef Mon Sep 17 00:00:00 2001 From: Vincent Batts Date: Mon, 3 Aug 2015 17:13:31 -0400 Subject: [PATCH 1/3] tar/asm: adding a failing test due to GNU LongLink --- tar/asm/assemble_test.go | 80 ++++++++++++++++++++++++++++--- tar/asm/testdata/longlink.tar.gz | Bin 0 -> 438 bytes 2 files changed, 74 insertions(+), 6 deletions(-) create mode 100644 tar/asm/testdata/longlink.tar.gz diff --git a/tar/asm/assemble_test.go b/tar/asm/assemble_test.go index 7cf44dc..e37d7f3 100644 --- a/tar/asm/assemble_test.go +++ b/tar/asm/assemble_test.go @@ -114,8 +114,8 @@ func TestTarStreamMangledGetterPutter(t *testing.T) { func TestTarStream(t *testing.T) { var ( - expectedSum = "1eb237ff69bca6e22789ecb05b45d35ca307adbd" - expectedSize int64 = 10240 + expectedSHA1Sum = "1eb237ff69bca6e22789ecb05b45d35ca307adbd" + expectedSize int64 = 10240 ) fh, err := os.Open("./testdata/t.tar.gz") @@ -153,8 +153,8 @@ func TestTarStream(t *testing.T) { if i != expectedSize { t.Errorf("size of tar: expected %d; got %d", expectedSize, i) } - if fmt.Sprintf("%x", h0.Sum(nil)) != expectedSum { - t.Fatalf("checksum of tar: expected %s; got %x", expectedSum, h0.Sum(nil)) + if fmt.Sprintf("%x", h0.Sum(nil)) != expectedSHA1Sum { + t.Fatalf("checksum of tar: expected %s; got %x", expectedSHA1Sum, h0.Sum(nil)) } t.Logf("%s", w.String()) // if we fail, then show the packed info @@ -175,7 +175,75 @@ func TestTarStream(t *testing.T) { if i != expectedSize { t.Errorf("size of output tar: expected %d; got %d", expectedSize, i) } - if fmt.Sprintf("%x", h1.Sum(nil)) != expectedSum { - t.Fatalf("checksum of output tar: expected %s; got %x", expectedSum, h1.Sum(nil)) + if fmt.Sprintf("%x", h1.Sum(nil)) != expectedSHA1Sum { + t.Fatalf("checksum of output tar: expected %s; got %x", expectedSHA1Sum, h1.Sum(nil)) + } +} + +func TestTarGNUTar(t *testing.T) { + var ( + expectedSHA1Sum = "d9f6babe107b7247953dff6b5b5ae31a3a880add" + expectedSize int64 = 20480 + ) + + fh, err := os.Open("./testdata/longlink.tar.gz") + if err != nil { + t.Fatal(err) + } + defer fh.Close() + gzRdr, err := gzip.NewReader(fh) + if err != nil { + t.Fatal(err) + } + defer gzRdr.Close() + + // Setup where we'll store the metadata + w := bytes.NewBuffer([]byte{}) + sp := storage.NewJSONPacker(w) + fgp := storage.NewBufferFileGetPutter() + + // wrap the disassembly stream + tarStream, err := NewInputTarStream(gzRdr, sp, fgp) + if err != nil { + t.Fatal(err) + } + + // get a sum of the stream after it has passed through to ensure it's the same. + h0 := sha1.New() + tRdr0 := io.TeeReader(tarStream, h0) + + // read it all to the bit bucket + i, err := io.Copy(ioutil.Discard, tRdr0) + if err != nil { + t.Fatal(err) + } + + if i != expectedSize { + t.Errorf("size of tar: expected %d; got %d", expectedSize, i) + } + if fmt.Sprintf("%x", h0.Sum(nil)) != expectedSHA1Sum { + t.Fatalf("checksum of tar: expected %s; got %x", expectedSHA1Sum, h0.Sum(nil)) + } + + t.Logf("%s", w.String()) // if we fail, then show the packed info + + // If we've made it this far, then we'll turn it around and create a tar + // stream from the packed metadata and buffered file contents. + r := bytes.NewBuffer(w.Bytes()) + sup := storage.NewJSONUnpacker(r) + // and reuse the fgp that we Put the payloads to. + + rc := NewOutputTarStream(fgp, sup) + h1 := sha1.New() + i, err = io.Copy(h1, rc) + if err != nil { + t.Fatal(err) + } + + if i != expectedSize { + t.Errorf("size of output tar: expected %d; got %d", expectedSize, i) + } + if fmt.Sprintf("%x", h1.Sum(nil)) != expectedSHA1Sum { + t.Fatalf("checksum of output tar: expected %s; got %x", expectedSHA1Sum, h1.Sum(nil)) } } diff --git a/tar/asm/testdata/longlink.tar.gz b/tar/asm/testdata/longlink.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..cb21db5f382892fd9c2e529e7baeff37efee7dbf GIT binary patch literal 438 zcmV;n0ZINJiwFokufJ6Q18i?@XKZP1Yc6zQaschxYihzk6hPrD#T6tolX?8@0^CGe zv6O;>NlTaCq*bU`jW=d)k)97EwFcCl>%@VURnx9lRgshFr4)vvO~LGs_Ure1g>@mM zHRk*+171 z$otoI^7Pkm(m%uHK;GZi*3w_Y@BQap3FQ5QbCLcUF7%&m2;}`^zXH%-!&v`0hCtpw znP}**;i~>O4#9-}!|C)_aDD&jLm=+&OtS3%RS5e}8UlHL8y)+96;t|4hd|!n1<(Fp z#pM2iA&~d4ePsWy;lS22J8(;*P`4}JgIn8^CCqO8j5VY%HrFJCsVnV#d&_{`Bbb36;f|9^!3 z``p61hzuRuROXrO}ey1{h{+o}d)q418?Dp61|G(4dKYXZT^n&%jztdm;jm96~ gpRfM_0000000000000000O0<70{(f!Y5-6G0O>IHkpKVy literal 0 HcmV?d00001 From df8572a1eb56cd5f77ec10482756113cdf42a915 Mon Sep 17 00:00:00 2001 From: Vincent Batts Date: Tue, 11 Aug 2015 15:51:19 -0400 Subject: [PATCH 2/3] tar/asm: check length before adding an entry --- tar/asm/disassemble.go | 31 ++++++++++++++++++------------- 1 file changed, 18 insertions(+), 13 deletions(-) diff --git a/tar/asm/disassemble.go b/tar/asm/disassemble.go index 4a8ed94..7986890 100644 --- a/tar/asm/disassemble.go +++ b/tar/asm/disassemble.go @@ -55,13 +55,15 @@ func NewInputTarStream(r io.Reader, p storage.Packer, fp storage.FilePutter) (io } // even when an EOF is reached, there is often 1024 null bytes on // the end of an archive. Collect them too. - _, err := p.AddEntry(storage.Entry{ - Type: storage.SegmentType, - Payload: tr.RawBytes(), - }) - if err != nil { - pW.CloseWithError(err) - return + if b := tr.RawBytes(); len(b) > 0 { + _, err := p.AddEntry(storage.Entry{ + Type: storage.SegmentType, + Payload: b, + }) + if err != nil { + pW.CloseWithError(err) + return + } } break // not return. We need the end of the reader. } @@ -69,12 +71,15 @@ func NewInputTarStream(r io.Reader, p storage.Packer, fp storage.FilePutter) (io break // not return. We need the end of the reader. } - if _, err := p.AddEntry(storage.Entry{ - Type: storage.SegmentType, - Payload: tr.RawBytes(), - }); err != nil { - pW.CloseWithError(err) - return + if b := tr.RawBytes(); len(b) > 0 { + _, err := p.AddEntry(storage.Entry{ + Type: storage.SegmentType, + Payload: b, + }) + if err != nil { + pW.CloseWithError(err) + return + } } var csum []byte From e46a815cbcaa5270acfb2893b66791150f4d2a87 Mon Sep 17 00:00:00 2001 From: Vincent Batts Date: Tue, 11 Aug 2015 15:51:52 -0400 Subject: [PATCH 3/3] archive/tar: fix carry-over of bytes for GNU types Archives produced with GNU tar can have types of TypeGNULongName and TypeGNULongLink. These fields effectively appear like two file entries in the tar archive. While golang's `archive/tar` transparently provide the file name and headers and file payload, the access to the raw bytes is still needed. This fixes the access to the longlink header, it's payload (of the long file path name), and the following file header and actual file payload. --- archive/tar/reader.go | 28 ++++++++++++++++++++++------ 1 file changed, 22 insertions(+), 6 deletions(-) diff --git a/archive/tar/reader.go b/archive/tar/reader.go index a89957e..f817956 100644 --- a/archive/tar/reader.go +++ b/archive/tar/reader.go @@ -154,44 +154,60 @@ func (tr *Reader) Next() (*Header, error) { } return hdr, nil case TypeGNULongName: + var b *bytes.Buffer + if tr.RawAccounting { + b = bytes.NewBuffer(tr.RawBytes()) + } // We have a GNU long name header. Its contents are the real file name. realname, err := ioutil.ReadAll(tr) if err != nil { return nil, err } - var b []byte if tr.RawAccounting { + if _, err = tr.rawBytes.Write(b.Bytes()); err != nil { + return nil, err + } if _, err = tr.rawBytes.Write(realname); err != nil { return nil, err } - b = tr.RawBytes() + b.Reset() + b.Write(tr.RawBytes()) } hdr, err := tr.Next() // since the above call to Next() resets the buffer, we need to throw the bytes over if tr.RawAccounting { - if _, err = tr.rawBytes.Write(b); err != nil { + b.Write(tr.RawBytes()) + if _, err = tr.rawBytes.Write(b.Bytes()); err != nil { return nil, err } } hdr.Name = cString(realname) return hdr, err case TypeGNULongLink: + var b *bytes.Buffer + if tr.RawAccounting { + b = bytes.NewBuffer(tr.RawBytes()) + } // We have a GNU long link header. realname, err := ioutil.ReadAll(tr) if err != nil { return nil, err } - var b []byte if tr.RawAccounting { + if _, err = tr.rawBytes.Write(b.Bytes()); err != nil { + return nil, err + } if _, err = tr.rawBytes.Write(realname); err != nil { return nil, err } - b = tr.RawBytes() + b.Reset() + b.Write(tr.RawBytes()) } hdr, err := tr.Next() // since the above call to Next() resets the buffer, we need to throw the bytes over if tr.RawAccounting { - if _, err = tr.rawBytes.Write(b); err != nil { + b.Write(tr.RawBytes()) + if _, err = tr.rawBytes.Write(b.Bytes()); err != nil { return nil, err } }