diff --git a/archive/tar/reader.go b/archive/tar/reader.go index a89957e..f817956 100644 --- a/archive/tar/reader.go +++ b/archive/tar/reader.go @@ -154,44 +154,60 @@ func (tr *Reader) Next() (*Header, error) { } return hdr, nil case TypeGNULongName: + var b *bytes.Buffer + if tr.RawAccounting { + b = bytes.NewBuffer(tr.RawBytes()) + } // We have a GNU long name header. Its contents are the real file name. realname, err := ioutil.ReadAll(tr) if err != nil { return nil, err } - var b []byte if tr.RawAccounting { + if _, err = tr.rawBytes.Write(b.Bytes()); err != nil { + return nil, err + } if _, err = tr.rawBytes.Write(realname); err != nil { return nil, err } - b = tr.RawBytes() + b.Reset() + b.Write(tr.RawBytes()) } hdr, err := tr.Next() // since the above call to Next() resets the buffer, we need to throw the bytes over if tr.RawAccounting { - if _, err = tr.rawBytes.Write(b); err != nil { + b.Write(tr.RawBytes()) + if _, err = tr.rawBytes.Write(b.Bytes()); err != nil { return nil, err } } hdr.Name = cString(realname) return hdr, err case TypeGNULongLink: + var b *bytes.Buffer + if tr.RawAccounting { + b = bytes.NewBuffer(tr.RawBytes()) + } // We have a GNU long link header. realname, err := ioutil.ReadAll(tr) if err != nil { return nil, err } - var b []byte if tr.RawAccounting { + if _, err = tr.rawBytes.Write(b.Bytes()); err != nil { + return nil, err + } if _, err = tr.rawBytes.Write(realname); err != nil { return nil, err } - b = tr.RawBytes() + b.Reset() + b.Write(tr.RawBytes()) } hdr, err := tr.Next() // since the above call to Next() resets the buffer, we need to throw the bytes over if tr.RawAccounting { - if _, err = tr.rawBytes.Write(b); err != nil { + b.Write(tr.RawBytes()) + if _, err = tr.rawBytes.Write(b.Bytes()); err != nil { return nil, err } } diff --git a/tar/asm/assemble_test.go b/tar/asm/assemble_test.go index 7cf44dc..e37d7f3 100644 --- a/tar/asm/assemble_test.go +++ b/tar/asm/assemble_test.go @@ -114,8 +114,8 @@ func TestTarStreamMangledGetterPutter(t *testing.T) { func TestTarStream(t *testing.T) { var ( - expectedSum = "1eb237ff69bca6e22789ecb05b45d35ca307adbd" - expectedSize int64 = 10240 + expectedSHA1Sum = "1eb237ff69bca6e22789ecb05b45d35ca307adbd" + expectedSize int64 = 10240 ) fh, err := os.Open("./testdata/t.tar.gz") @@ -153,8 +153,8 @@ func TestTarStream(t *testing.T) { if i != expectedSize { t.Errorf("size of tar: expected %d; got %d", expectedSize, i) } - if fmt.Sprintf("%x", h0.Sum(nil)) != expectedSum { - t.Fatalf("checksum of tar: expected %s; got %x", expectedSum, h0.Sum(nil)) + if fmt.Sprintf("%x", h0.Sum(nil)) != expectedSHA1Sum { + t.Fatalf("checksum of tar: expected %s; got %x", expectedSHA1Sum, h0.Sum(nil)) } t.Logf("%s", w.String()) // if we fail, then show the packed info @@ -175,7 +175,75 @@ func TestTarStream(t *testing.T) { if i != expectedSize { t.Errorf("size of output tar: expected %d; got %d", expectedSize, i) } - if fmt.Sprintf("%x", h1.Sum(nil)) != expectedSum { - t.Fatalf("checksum of output tar: expected %s; got %x", expectedSum, h1.Sum(nil)) + if fmt.Sprintf("%x", h1.Sum(nil)) != expectedSHA1Sum { + t.Fatalf("checksum of output tar: expected %s; got %x", expectedSHA1Sum, h1.Sum(nil)) + } +} + +func TestTarGNUTar(t *testing.T) { + var ( + expectedSHA1Sum = "d9f6babe107b7247953dff6b5b5ae31a3a880add" + expectedSize int64 = 20480 + ) + + fh, err := os.Open("./testdata/longlink.tar.gz") + if err != nil { + t.Fatal(err) + } + defer fh.Close() + gzRdr, err := gzip.NewReader(fh) + if err != nil { + t.Fatal(err) + } + defer gzRdr.Close() + + // Setup where we'll store the metadata + w := bytes.NewBuffer([]byte{}) + sp := storage.NewJSONPacker(w) + fgp := storage.NewBufferFileGetPutter() + + // wrap the disassembly stream + tarStream, err := NewInputTarStream(gzRdr, sp, fgp) + if err != nil { + t.Fatal(err) + } + + // get a sum of the stream after it has passed through to ensure it's the same. + h0 := sha1.New() + tRdr0 := io.TeeReader(tarStream, h0) + + // read it all to the bit bucket + i, err := io.Copy(ioutil.Discard, tRdr0) + if err != nil { + t.Fatal(err) + } + + if i != expectedSize { + t.Errorf("size of tar: expected %d; got %d", expectedSize, i) + } + if fmt.Sprintf("%x", h0.Sum(nil)) != expectedSHA1Sum { + t.Fatalf("checksum of tar: expected %s; got %x", expectedSHA1Sum, h0.Sum(nil)) + } + + t.Logf("%s", w.String()) // if we fail, then show the packed info + + // If we've made it this far, then we'll turn it around and create a tar + // stream from the packed metadata and buffered file contents. + r := bytes.NewBuffer(w.Bytes()) + sup := storage.NewJSONUnpacker(r) + // and reuse the fgp that we Put the payloads to. + + rc := NewOutputTarStream(fgp, sup) + h1 := sha1.New() + i, err = io.Copy(h1, rc) + if err != nil { + t.Fatal(err) + } + + if i != expectedSize { + t.Errorf("size of output tar: expected %d; got %d", expectedSize, i) + } + if fmt.Sprintf("%x", h1.Sum(nil)) != expectedSHA1Sum { + t.Fatalf("checksum of output tar: expected %s; got %x", expectedSHA1Sum, h1.Sum(nil)) } } diff --git a/tar/asm/disassemble.go b/tar/asm/disassemble.go index 4a8ed94..7986890 100644 --- a/tar/asm/disassemble.go +++ b/tar/asm/disassemble.go @@ -55,13 +55,15 @@ func NewInputTarStream(r io.Reader, p storage.Packer, fp storage.FilePutter) (io } // even when an EOF is reached, there is often 1024 null bytes on // the end of an archive. Collect them too. - _, err := p.AddEntry(storage.Entry{ - Type: storage.SegmentType, - Payload: tr.RawBytes(), - }) - if err != nil { - pW.CloseWithError(err) - return + if b := tr.RawBytes(); len(b) > 0 { + _, err := p.AddEntry(storage.Entry{ + Type: storage.SegmentType, + Payload: b, + }) + if err != nil { + pW.CloseWithError(err) + return + } } break // not return. We need the end of the reader. } @@ -69,12 +71,15 @@ func NewInputTarStream(r io.Reader, p storage.Packer, fp storage.FilePutter) (io break // not return. We need the end of the reader. } - if _, err := p.AddEntry(storage.Entry{ - Type: storage.SegmentType, - Payload: tr.RawBytes(), - }); err != nil { - pW.CloseWithError(err) - return + if b := tr.RawBytes(); len(b) > 0 { + _, err := p.AddEntry(storage.Entry{ + Type: storage.SegmentType, + Payload: b, + }) + if err != nil { + pW.CloseWithError(err) + return + } } var csum []byte diff --git a/tar/asm/testdata/longlink.tar.gz b/tar/asm/testdata/longlink.tar.gz new file mode 100644 index 0000000..cb21db5 Binary files /dev/null and b/tar/asm/testdata/longlink.tar.gz differ