diff --git a/archive/tar/reader.go b/archive/tar/reader.go index 3943718..ea64a38 100644 --- a/archive/tar/reader.go +++ b/archive/tar/reader.go @@ -26,6 +26,9 @@ type Reader struct { // It is only the responsibility of every exported method of Reader to // ensure that this error is sticky. err error + + RawAccounting bool // Whether to enable the access needed to reassemble the tar from raw bytes. Some performance/memory hit for this. + rawBytes *bytes.Buffer // last raw bits } type fileReader interface { @@ -35,6 +38,25 @@ type fileReader interface { WriteTo(io.Writer) (int64, error) } +// RawBytes accesses the raw bytes of the archive, apart from the file payload itself. +// This includes the header and padding. +// +// This call resets the current rawbytes buffer +// +// Only when RawAccounting is enabled, otherwise this returns nil +func (tr *Reader) RawBytes() []byte { + if !tr.RawAccounting { + return nil + } + if tr.rawBytes == nil { + tr.rawBytes = bytes.NewBuffer(nil) + } + defer tr.rawBytes.Reset() // if we've read them, then flush them. + + return tr.rawBytes.Bytes() + +} + // NewReader creates a new Reader reading from r. func NewReader(r io.Reader) *Reader { return &Reader{r: r, curr: ®FileReader{r, 0}} @@ -58,6 +80,14 @@ func (tr *Reader) next() (*Header, error) { var paxHdrs map[string]string var gnuLongName, gnuLongLink string + if tr.RawAccounting { + if tr.rawBytes == nil { + tr.rawBytes = bytes.NewBuffer(nil) + } else { + tr.rawBytes.Reset() + } + } + // Externally, Next iterates through the tar archive as if it is a series of // files. Internally, the tar format often uses fake "files" to add meta // data that describes the next file. These meta data "files" should not @@ -66,12 +96,16 @@ func (tr *Reader) next() (*Header, error) { format := FormatUSTAR | FormatPAX | FormatGNU for { // Discard the remainder of the file and any padding. - if err := discard(tr.r, tr.curr.PhysicalRemaining()); err != nil { + if err := discard(tr, tr.curr.PhysicalRemaining()); err != nil { return nil, err } - if _, err := tryReadFull(tr.r, tr.blk[:tr.pad]); err != nil { + n, err := tryReadFull(tr.r, tr.blk[:tr.pad]) + if err != nil { return nil, err } + if tr.RawAccounting { + tr.rawBytes.Write(tr.blk[:n]) + } tr.pad = 0 hdr, rawHdr, err := tr.readHeader() @@ -109,6 +143,10 @@ func (tr *Reader) next() (*Header, error) { return nil, err } + if tr.RawAccounting { + tr.rawBytes.Write(realname) + } + var p parser switch hdr.Typeflag { case TypeGNULongName: @@ -298,6 +336,12 @@ func parsePAX(r io.Reader) (map[string]string, error) { if err != nil { return nil, err } + // leaving this function for io.Reader makes it more testable + if tr, ok := r.(*Reader); ok && tr.RawAccounting { + if _, err = tr.rawBytes.Write(buf); err != nil { + return nil, err + } + } sbuf := string(buf) // For GNU PAX sparse format 0.0 support. @@ -342,11 +386,20 @@ func parsePAX(r io.Reader) (map[string]string, error) { // * At least 2 blocks of zeros are read. func (tr *Reader) readHeader() (*Header, *block, error) { // Two blocks of zero bytes marks the end of the archive. - if _, err := io.ReadFull(tr.r, tr.blk[:]); err != nil { + n, err := io.ReadFull(tr.r, tr.blk[:]) + if tr.RawAccounting && (err == nil || err == io.EOF) { + tr.rawBytes.Write(tr.blk[:n]) + } + if err != nil { return nil, nil, err // EOF is okay here; exactly 0 bytes read } + if bytes.Equal(tr.blk[:], zeroBlock[:]) { - if _, err := io.ReadFull(tr.r, tr.blk[:]); err != nil { + n, err = io.ReadFull(tr.r, tr.blk[:]) + if tr.RawAccounting && (err == nil || err == io.EOF) { + tr.rawBytes.Write(tr.blk[:n]) + } + if err != nil { return nil, nil, err // EOF is okay here; exactly 1 block of zeros read } if bytes.Equal(tr.blk[:], zeroBlock[:]) { @@ -497,6 +550,9 @@ func (tr *Reader) readOldGNUSparseMap(hdr *Header, blk *block) (sparseDatas, err if _, err := mustReadFull(tr.r, blk[:]); err != nil { return nil, err } + if tr.RawAccounting { + tr.rawBytes.Write(blk[:]) + } s = blk.Sparse() continue } @@ -828,12 +884,20 @@ func tryReadFull(r io.Reader, b []byte) (n int, err error) { } // discard skips n bytes in r, reporting an error if unable to do so. -func discard(r io.Reader, n int64) error { +func discard(tr *Reader, n int64) error { + var seekSkipped, copySkipped int64 + var err error + r := tr.r + if tr.RawAccounting { + + copySkipped, err = io.CopyN(tr.rawBytes, tr.r, n) + goto out + } + // If possible, Seek to the last byte before the end of the data section. // Do this because Seek is often lazy about reporting errors; this will mask // the fact that the stream may be truncated. We can rely on the // io.CopyN done shortly afterwards to trigger any IO errors. - var seekSkipped int64 // Number of bytes skipped via Seek if sr, ok := r.(io.Seeker); ok && n > 1 { // Not all io.Seeker can actually Seek. For example, os.Stdin implements // io.Seeker, but calling Seek always returns an error and performs @@ -850,7 +914,8 @@ func discard(r io.Reader, n int64) error { } } - copySkipped, err := io.CopyN(ioutil.Discard, r, n-seekSkipped) + copySkipped, err = io.CopyN(ioutil.Discard, r, n-seekSkipped) +out: if err == io.EOF && seekSkipped+copySkipped < n { err = io.ErrUnexpectedEOF } diff --git a/archive/tar/tar_test.go b/archive/tar/tar_test.go index 2676853..6227e24 100644 --- a/archive/tar/tar_test.go +++ b/archive/tar/tar_test.go @@ -8,7 +8,6 @@ import ( "bytes" "errors" "fmt" - "internal/testenv" "io" "io/ioutil" "math" @@ -16,6 +15,7 @@ import ( "path" "path/filepath" "reflect" + "runtime" "strings" "testing" "time" @@ -260,8 +260,10 @@ func TestFileInfoHeaderDir(t *testing.T) { } func TestFileInfoHeaderSymlink(t *testing.T) { - testenv.MustHaveSymlink(t) - + switch runtime.GOOS { + case "android", "nacl", "plan9", "windows": + t.Skip("symlinks not supported") + } tmpdir, err := ioutil.TempDir("", "TestFileInfoHeaderSymlink") if err != nil { t.Fatal(err)