From 9a95e026024d3929928021244d72e59f758616b3 Mon Sep 17 00:00:00 2001 From: Kir Kolyshkin Date: Wed, 5 Sep 2018 13:37:46 -0700 Subject: [PATCH] archive/tar: port RawHeader() changes This is a port of commits adding RawHeader() to go-1.11 archive/tar. In addition: * simplify the rawBytes.Write() code in readHeader() * ignore errors from rawBytes.Write(), as (at least for go-1.11) it never returns an error, only panics (if the buffer grew too large) Also, remove the internal/testenv from tar_tar.go to enable go test. As working symlink detection is non-trivial on Windows, just skip the test on that platform. In addition to `go test`, I did some minimal manual testing, and it seems this code creates tar-data.json.gz which is identical to the one made by the old version. Signed-off-by: Kir Kolyshkin --- archive/tar/reader.go | 79 +++++++++++++++++++++++++++++++++++++---- archive/tar/tar_test.go | 8 +++-- 2 files changed, 77 insertions(+), 10 deletions(-) diff --git a/archive/tar/reader.go b/archive/tar/reader.go index 3943718..ea64a38 100644 --- a/archive/tar/reader.go +++ b/archive/tar/reader.go @@ -26,6 +26,9 @@ type Reader struct { // It is only the responsibility of every exported method of Reader to // ensure that this error is sticky. err error + + RawAccounting bool // Whether to enable the access needed to reassemble the tar from raw bytes. Some performance/memory hit for this. + rawBytes *bytes.Buffer // last raw bits } type fileReader interface { @@ -35,6 +38,25 @@ type fileReader interface { WriteTo(io.Writer) (int64, error) } +// RawBytes accesses the raw bytes of the archive, apart from the file payload itself. +// This includes the header and padding. +// +// This call resets the current rawbytes buffer +// +// Only when RawAccounting is enabled, otherwise this returns nil +func (tr *Reader) RawBytes() []byte { + if !tr.RawAccounting { + return nil + } + if tr.rawBytes == nil { + tr.rawBytes = bytes.NewBuffer(nil) + } + defer tr.rawBytes.Reset() // if we've read them, then flush them. + + return tr.rawBytes.Bytes() + +} + // NewReader creates a new Reader reading from r. func NewReader(r io.Reader) *Reader { return &Reader{r: r, curr: ®FileReader{r, 0}} @@ -58,6 +80,14 @@ func (tr *Reader) next() (*Header, error) { var paxHdrs map[string]string var gnuLongName, gnuLongLink string + if tr.RawAccounting { + if tr.rawBytes == nil { + tr.rawBytes = bytes.NewBuffer(nil) + } else { + tr.rawBytes.Reset() + } + } + // Externally, Next iterates through the tar archive as if it is a series of // files. Internally, the tar format often uses fake "files" to add meta // data that describes the next file. These meta data "files" should not @@ -66,12 +96,16 @@ func (tr *Reader) next() (*Header, error) { format := FormatUSTAR | FormatPAX | FormatGNU for { // Discard the remainder of the file and any padding. - if err := discard(tr.r, tr.curr.PhysicalRemaining()); err != nil { + if err := discard(tr, tr.curr.PhysicalRemaining()); err != nil { return nil, err } - if _, err := tryReadFull(tr.r, tr.blk[:tr.pad]); err != nil { + n, err := tryReadFull(tr.r, tr.blk[:tr.pad]) + if err != nil { return nil, err } + if tr.RawAccounting { + tr.rawBytes.Write(tr.blk[:n]) + } tr.pad = 0 hdr, rawHdr, err := tr.readHeader() @@ -109,6 +143,10 @@ func (tr *Reader) next() (*Header, error) { return nil, err } + if tr.RawAccounting { + tr.rawBytes.Write(realname) + } + var p parser switch hdr.Typeflag { case TypeGNULongName: @@ -298,6 +336,12 @@ func parsePAX(r io.Reader) (map[string]string, error) { if err != nil { return nil, err } + // leaving this function for io.Reader makes it more testable + if tr, ok := r.(*Reader); ok && tr.RawAccounting { + if _, err = tr.rawBytes.Write(buf); err != nil { + return nil, err + } + } sbuf := string(buf) // For GNU PAX sparse format 0.0 support. @@ -342,11 +386,20 @@ func parsePAX(r io.Reader) (map[string]string, error) { // * At least 2 blocks of zeros are read. func (tr *Reader) readHeader() (*Header, *block, error) { // Two blocks of zero bytes marks the end of the archive. - if _, err := io.ReadFull(tr.r, tr.blk[:]); err != nil { + n, err := io.ReadFull(tr.r, tr.blk[:]) + if tr.RawAccounting && (err == nil || err == io.EOF) { + tr.rawBytes.Write(tr.blk[:n]) + } + if err != nil { return nil, nil, err // EOF is okay here; exactly 0 bytes read } + if bytes.Equal(tr.blk[:], zeroBlock[:]) { - if _, err := io.ReadFull(tr.r, tr.blk[:]); err != nil { + n, err = io.ReadFull(tr.r, tr.blk[:]) + if tr.RawAccounting && (err == nil || err == io.EOF) { + tr.rawBytes.Write(tr.blk[:n]) + } + if err != nil { return nil, nil, err // EOF is okay here; exactly 1 block of zeros read } if bytes.Equal(tr.blk[:], zeroBlock[:]) { @@ -497,6 +550,9 @@ func (tr *Reader) readOldGNUSparseMap(hdr *Header, blk *block) (sparseDatas, err if _, err := mustReadFull(tr.r, blk[:]); err != nil { return nil, err } + if tr.RawAccounting { + tr.rawBytes.Write(blk[:]) + } s = blk.Sparse() continue } @@ -828,12 +884,20 @@ func tryReadFull(r io.Reader, b []byte) (n int, err error) { } // discard skips n bytes in r, reporting an error if unable to do so. -func discard(r io.Reader, n int64) error { +func discard(tr *Reader, n int64) error { + var seekSkipped, copySkipped int64 + var err error + r := tr.r + if tr.RawAccounting { + + copySkipped, err = io.CopyN(tr.rawBytes, tr.r, n) + goto out + } + // If possible, Seek to the last byte before the end of the data section. // Do this because Seek is often lazy about reporting errors; this will mask // the fact that the stream may be truncated. We can rely on the // io.CopyN done shortly afterwards to trigger any IO errors. - var seekSkipped int64 // Number of bytes skipped via Seek if sr, ok := r.(io.Seeker); ok && n > 1 { // Not all io.Seeker can actually Seek. For example, os.Stdin implements // io.Seeker, but calling Seek always returns an error and performs @@ -850,7 +914,8 @@ func discard(r io.Reader, n int64) error { } } - copySkipped, err := io.CopyN(ioutil.Discard, r, n-seekSkipped) + copySkipped, err = io.CopyN(ioutil.Discard, r, n-seekSkipped) +out: if err == io.EOF && seekSkipped+copySkipped < n { err = io.ErrUnexpectedEOF } diff --git a/archive/tar/tar_test.go b/archive/tar/tar_test.go index 2676853..6227e24 100644 --- a/archive/tar/tar_test.go +++ b/archive/tar/tar_test.go @@ -8,7 +8,6 @@ import ( "bytes" "errors" "fmt" - "internal/testenv" "io" "io/ioutil" "math" @@ -16,6 +15,7 @@ import ( "path" "path/filepath" "reflect" + "runtime" "strings" "testing" "time" @@ -260,8 +260,10 @@ func TestFileInfoHeaderDir(t *testing.T) { } func TestFileInfoHeaderSymlink(t *testing.T) { - testenv.MustHaveSymlink(t) - + switch runtime.GOOS { + case "android", "nacl", "plan9", "windows": + t.Skip("symlinks not supported") + } tmpdir, err := ioutil.TempDir("", "TestFileInfoHeaderSymlink") if err != nil { t.Fatal(err)