mirror of
https://github.com/vbatts/tar-split.git
synced 2024-12-19 03:46:30 +00:00
archive/tar: port RawHeader() changes
This is a port of commits adding RawHeader() to go-1.11 archive/tar. In addition: * simplify the rawBytes.Write() code in readHeader() * ignore errors from rawBytes.Write(), as (at least for go-1.11) it never returns an error, only panics (if the buffer grew too large) Also, remove the internal/testenv from tar_tar.go to enable go test. As working symlink detection is non-trivial on Windows, just skip the test on that platform. In addition to `go test`, I did some minimal manual testing, and it seems this code creates tar-data.json.gz which is identical to the one made by the old version. Signed-off-by: Kir Kolyshkin <kolyshkin@gmail.com>
This commit is contained in:
parent
73fdb78c36
commit
9a95e02602
2 changed files with 77 additions and 10 deletions
|
@ -26,6 +26,9 @@ type Reader struct {
|
|||
// It is only the responsibility of every exported method of Reader to
|
||||
// ensure that this error is sticky.
|
||||
err error
|
||||
|
||||
RawAccounting bool // Whether to enable the access needed to reassemble the tar from raw bytes. Some performance/memory hit for this.
|
||||
rawBytes *bytes.Buffer // last raw bits
|
||||
}
|
||||
|
||||
type fileReader interface {
|
||||
|
@ -35,6 +38,25 @@ type fileReader interface {
|
|||
WriteTo(io.Writer) (int64, error)
|
||||
}
|
||||
|
||||
// RawBytes accesses the raw bytes of the archive, apart from the file payload itself.
|
||||
// This includes the header and padding.
|
||||
//
|
||||
// This call resets the current rawbytes buffer
|
||||
//
|
||||
// Only when RawAccounting is enabled, otherwise this returns nil
|
||||
func (tr *Reader) RawBytes() []byte {
|
||||
if !tr.RawAccounting {
|
||||
return nil
|
||||
}
|
||||
if tr.rawBytes == nil {
|
||||
tr.rawBytes = bytes.NewBuffer(nil)
|
||||
}
|
||||
defer tr.rawBytes.Reset() // if we've read them, then flush them.
|
||||
|
||||
return tr.rawBytes.Bytes()
|
||||
|
||||
}
|
||||
|
||||
// NewReader creates a new Reader reading from r.
|
||||
func NewReader(r io.Reader) *Reader {
|
||||
return &Reader{r: r, curr: ®FileReader{r, 0}}
|
||||
|
@ -58,6 +80,14 @@ func (tr *Reader) next() (*Header, error) {
|
|||
var paxHdrs map[string]string
|
||||
var gnuLongName, gnuLongLink string
|
||||
|
||||
if tr.RawAccounting {
|
||||
if tr.rawBytes == nil {
|
||||
tr.rawBytes = bytes.NewBuffer(nil)
|
||||
} else {
|
||||
tr.rawBytes.Reset()
|
||||
}
|
||||
}
|
||||
|
||||
// Externally, Next iterates through the tar archive as if it is a series of
|
||||
// files. Internally, the tar format often uses fake "files" to add meta
|
||||
// data that describes the next file. These meta data "files" should not
|
||||
|
@ -66,12 +96,16 @@ func (tr *Reader) next() (*Header, error) {
|
|||
format := FormatUSTAR | FormatPAX | FormatGNU
|
||||
for {
|
||||
// Discard the remainder of the file and any padding.
|
||||
if err := discard(tr.r, tr.curr.PhysicalRemaining()); err != nil {
|
||||
if err := discard(tr, tr.curr.PhysicalRemaining()); err != nil {
|
||||
return nil, err
|
||||
}
|
||||
if _, err := tryReadFull(tr.r, tr.blk[:tr.pad]); err != nil {
|
||||
n, err := tryReadFull(tr.r, tr.blk[:tr.pad])
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
if tr.RawAccounting {
|
||||
tr.rawBytes.Write(tr.blk[:n])
|
||||
}
|
||||
tr.pad = 0
|
||||
|
||||
hdr, rawHdr, err := tr.readHeader()
|
||||
|
@ -109,6 +143,10 @@ func (tr *Reader) next() (*Header, error) {
|
|||
return nil, err
|
||||
}
|
||||
|
||||
if tr.RawAccounting {
|
||||
tr.rawBytes.Write(realname)
|
||||
}
|
||||
|
||||
var p parser
|
||||
switch hdr.Typeflag {
|
||||
case TypeGNULongName:
|
||||
|
@ -298,6 +336,12 @@ func parsePAX(r io.Reader) (map[string]string, error) {
|
|||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
// leaving this function for io.Reader makes it more testable
|
||||
if tr, ok := r.(*Reader); ok && tr.RawAccounting {
|
||||
if _, err = tr.rawBytes.Write(buf); err != nil {
|
||||
return nil, err
|
||||
}
|
||||
}
|
||||
sbuf := string(buf)
|
||||
|
||||
// For GNU PAX sparse format 0.0 support.
|
||||
|
@ -342,11 +386,20 @@ func parsePAX(r io.Reader) (map[string]string, error) {
|
|||
// * At least 2 blocks of zeros are read.
|
||||
func (tr *Reader) readHeader() (*Header, *block, error) {
|
||||
// Two blocks of zero bytes marks the end of the archive.
|
||||
if _, err := io.ReadFull(tr.r, tr.blk[:]); err != nil {
|
||||
n, err := io.ReadFull(tr.r, tr.blk[:])
|
||||
if tr.RawAccounting && (err == nil || err == io.EOF) {
|
||||
tr.rawBytes.Write(tr.blk[:n])
|
||||
}
|
||||
if err != nil {
|
||||
return nil, nil, err // EOF is okay here; exactly 0 bytes read
|
||||
}
|
||||
|
||||
if bytes.Equal(tr.blk[:], zeroBlock[:]) {
|
||||
if _, err := io.ReadFull(tr.r, tr.blk[:]); err != nil {
|
||||
n, err = io.ReadFull(tr.r, tr.blk[:])
|
||||
if tr.RawAccounting && (err == nil || err == io.EOF) {
|
||||
tr.rawBytes.Write(tr.blk[:n])
|
||||
}
|
||||
if err != nil {
|
||||
return nil, nil, err // EOF is okay here; exactly 1 block of zeros read
|
||||
}
|
||||
if bytes.Equal(tr.blk[:], zeroBlock[:]) {
|
||||
|
@ -497,6 +550,9 @@ func (tr *Reader) readOldGNUSparseMap(hdr *Header, blk *block) (sparseDatas, err
|
|||
if _, err := mustReadFull(tr.r, blk[:]); err != nil {
|
||||
return nil, err
|
||||
}
|
||||
if tr.RawAccounting {
|
||||
tr.rawBytes.Write(blk[:])
|
||||
}
|
||||
s = blk.Sparse()
|
||||
continue
|
||||
}
|
||||
|
@ -828,12 +884,20 @@ func tryReadFull(r io.Reader, b []byte) (n int, err error) {
|
|||
}
|
||||
|
||||
// discard skips n bytes in r, reporting an error if unable to do so.
|
||||
func discard(r io.Reader, n int64) error {
|
||||
func discard(tr *Reader, n int64) error {
|
||||
var seekSkipped, copySkipped int64
|
||||
var err error
|
||||
r := tr.r
|
||||
if tr.RawAccounting {
|
||||
|
||||
copySkipped, err = io.CopyN(tr.rawBytes, tr.r, n)
|
||||
goto out
|
||||
}
|
||||
|
||||
// If possible, Seek to the last byte before the end of the data section.
|
||||
// Do this because Seek is often lazy about reporting errors; this will mask
|
||||
// the fact that the stream may be truncated. We can rely on the
|
||||
// io.CopyN done shortly afterwards to trigger any IO errors.
|
||||
var seekSkipped int64 // Number of bytes skipped via Seek
|
||||
if sr, ok := r.(io.Seeker); ok && n > 1 {
|
||||
// Not all io.Seeker can actually Seek. For example, os.Stdin implements
|
||||
// io.Seeker, but calling Seek always returns an error and performs
|
||||
|
@ -850,7 +914,8 @@ func discard(r io.Reader, n int64) error {
|
|||
}
|
||||
}
|
||||
|
||||
copySkipped, err := io.CopyN(ioutil.Discard, r, n-seekSkipped)
|
||||
copySkipped, err = io.CopyN(ioutil.Discard, r, n-seekSkipped)
|
||||
out:
|
||||
if err == io.EOF && seekSkipped+copySkipped < n {
|
||||
err = io.ErrUnexpectedEOF
|
||||
}
|
||||
|
|
|
@ -8,7 +8,6 @@ import (
|
|||
"bytes"
|
||||
"errors"
|
||||
"fmt"
|
||||
"internal/testenv"
|
||||
"io"
|
||||
"io/ioutil"
|
||||
"math"
|
||||
|
@ -16,6 +15,7 @@ import (
|
|||
"path"
|
||||
"path/filepath"
|
||||
"reflect"
|
||||
"runtime"
|
||||
"strings"
|
||||
"testing"
|
||||
"time"
|
||||
|
@ -260,8 +260,10 @@ func TestFileInfoHeaderDir(t *testing.T) {
|
|||
}
|
||||
|
||||
func TestFileInfoHeaderSymlink(t *testing.T) {
|
||||
testenv.MustHaveSymlink(t)
|
||||
|
||||
switch runtime.GOOS {
|
||||
case "android", "nacl", "plan9", "windows":
|
||||
t.Skip("symlinks not supported")
|
||||
}
|
||||
tmpdir, err := ioutil.TempDir("", "TestFileInfoHeaderSymlink")
|
||||
if err != nil {
|
||||
t.Fatal(err)
|
||||
|
|
Loading…
Reference in a new issue