mirror of
https://github.com/vbatts/tar-split.git
synced 2024-12-19 11:56:30 +00:00
archive/tar: port RawHeader() changes
This is a port of commits adding RawHeader() to go-1.11 archive/tar. In addition: * simplify the rawBytes.Write() code in readHeader() * ignore errors from rawBytes.Write(), as (at least for go-1.11) it never returns an error, only panics (if the buffer grew too large) Also, remove the internal/testenv from tar_tar.go to enable go test. As working symlink detection is non-trivial on Windows, just skip the test on that platform. In addition to `go test`, I did some minimal manual testing, and it seems this code creates tar-data.json.gz which is identical to the one made by the old version. Signed-off-by: Kir Kolyshkin <kolyshkin@gmail.com>
This commit is contained in:
parent
73fdb78c36
commit
9a95e02602
2 changed files with 77 additions and 10 deletions
|
@ -26,6 +26,9 @@ type Reader struct {
|
||||||
// It is only the responsibility of every exported method of Reader to
|
// It is only the responsibility of every exported method of Reader to
|
||||||
// ensure that this error is sticky.
|
// ensure that this error is sticky.
|
||||||
err error
|
err error
|
||||||
|
|
||||||
|
RawAccounting bool // Whether to enable the access needed to reassemble the tar from raw bytes. Some performance/memory hit for this.
|
||||||
|
rawBytes *bytes.Buffer // last raw bits
|
||||||
}
|
}
|
||||||
|
|
||||||
type fileReader interface {
|
type fileReader interface {
|
||||||
|
@ -35,6 +38,25 @@ type fileReader interface {
|
||||||
WriteTo(io.Writer) (int64, error)
|
WriteTo(io.Writer) (int64, error)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// RawBytes accesses the raw bytes of the archive, apart from the file payload itself.
|
||||||
|
// This includes the header and padding.
|
||||||
|
//
|
||||||
|
// This call resets the current rawbytes buffer
|
||||||
|
//
|
||||||
|
// Only when RawAccounting is enabled, otherwise this returns nil
|
||||||
|
func (tr *Reader) RawBytes() []byte {
|
||||||
|
if !tr.RawAccounting {
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
if tr.rawBytes == nil {
|
||||||
|
tr.rawBytes = bytes.NewBuffer(nil)
|
||||||
|
}
|
||||||
|
defer tr.rawBytes.Reset() // if we've read them, then flush them.
|
||||||
|
|
||||||
|
return tr.rawBytes.Bytes()
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
// NewReader creates a new Reader reading from r.
|
// NewReader creates a new Reader reading from r.
|
||||||
func NewReader(r io.Reader) *Reader {
|
func NewReader(r io.Reader) *Reader {
|
||||||
return &Reader{r: r, curr: ®FileReader{r, 0}}
|
return &Reader{r: r, curr: ®FileReader{r, 0}}
|
||||||
|
@ -58,6 +80,14 @@ func (tr *Reader) next() (*Header, error) {
|
||||||
var paxHdrs map[string]string
|
var paxHdrs map[string]string
|
||||||
var gnuLongName, gnuLongLink string
|
var gnuLongName, gnuLongLink string
|
||||||
|
|
||||||
|
if tr.RawAccounting {
|
||||||
|
if tr.rawBytes == nil {
|
||||||
|
tr.rawBytes = bytes.NewBuffer(nil)
|
||||||
|
} else {
|
||||||
|
tr.rawBytes.Reset()
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
// Externally, Next iterates through the tar archive as if it is a series of
|
// Externally, Next iterates through the tar archive as if it is a series of
|
||||||
// files. Internally, the tar format often uses fake "files" to add meta
|
// files. Internally, the tar format often uses fake "files" to add meta
|
||||||
// data that describes the next file. These meta data "files" should not
|
// data that describes the next file. These meta data "files" should not
|
||||||
|
@ -66,12 +96,16 @@ func (tr *Reader) next() (*Header, error) {
|
||||||
format := FormatUSTAR | FormatPAX | FormatGNU
|
format := FormatUSTAR | FormatPAX | FormatGNU
|
||||||
for {
|
for {
|
||||||
// Discard the remainder of the file and any padding.
|
// Discard the remainder of the file and any padding.
|
||||||
if err := discard(tr.r, tr.curr.PhysicalRemaining()); err != nil {
|
if err := discard(tr, tr.curr.PhysicalRemaining()); err != nil {
|
||||||
return nil, err
|
return nil, err
|
||||||
}
|
}
|
||||||
if _, err := tryReadFull(tr.r, tr.blk[:tr.pad]); err != nil {
|
n, err := tryReadFull(tr.r, tr.blk[:tr.pad])
|
||||||
|
if err != nil {
|
||||||
return nil, err
|
return nil, err
|
||||||
}
|
}
|
||||||
|
if tr.RawAccounting {
|
||||||
|
tr.rawBytes.Write(tr.blk[:n])
|
||||||
|
}
|
||||||
tr.pad = 0
|
tr.pad = 0
|
||||||
|
|
||||||
hdr, rawHdr, err := tr.readHeader()
|
hdr, rawHdr, err := tr.readHeader()
|
||||||
|
@ -109,6 +143,10 @@ func (tr *Reader) next() (*Header, error) {
|
||||||
return nil, err
|
return nil, err
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if tr.RawAccounting {
|
||||||
|
tr.rawBytes.Write(realname)
|
||||||
|
}
|
||||||
|
|
||||||
var p parser
|
var p parser
|
||||||
switch hdr.Typeflag {
|
switch hdr.Typeflag {
|
||||||
case TypeGNULongName:
|
case TypeGNULongName:
|
||||||
|
@ -298,6 +336,12 @@ func parsePAX(r io.Reader) (map[string]string, error) {
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return nil, err
|
return nil, err
|
||||||
}
|
}
|
||||||
|
// leaving this function for io.Reader makes it more testable
|
||||||
|
if tr, ok := r.(*Reader); ok && tr.RawAccounting {
|
||||||
|
if _, err = tr.rawBytes.Write(buf); err != nil {
|
||||||
|
return nil, err
|
||||||
|
}
|
||||||
|
}
|
||||||
sbuf := string(buf)
|
sbuf := string(buf)
|
||||||
|
|
||||||
// For GNU PAX sparse format 0.0 support.
|
// For GNU PAX sparse format 0.0 support.
|
||||||
|
@ -342,11 +386,20 @@ func parsePAX(r io.Reader) (map[string]string, error) {
|
||||||
// * At least 2 blocks of zeros are read.
|
// * At least 2 blocks of zeros are read.
|
||||||
func (tr *Reader) readHeader() (*Header, *block, error) {
|
func (tr *Reader) readHeader() (*Header, *block, error) {
|
||||||
// Two blocks of zero bytes marks the end of the archive.
|
// Two blocks of zero bytes marks the end of the archive.
|
||||||
if _, err := io.ReadFull(tr.r, tr.blk[:]); err != nil {
|
n, err := io.ReadFull(tr.r, tr.blk[:])
|
||||||
|
if tr.RawAccounting && (err == nil || err == io.EOF) {
|
||||||
|
tr.rawBytes.Write(tr.blk[:n])
|
||||||
|
}
|
||||||
|
if err != nil {
|
||||||
return nil, nil, err // EOF is okay here; exactly 0 bytes read
|
return nil, nil, err // EOF is okay here; exactly 0 bytes read
|
||||||
}
|
}
|
||||||
|
|
||||||
if bytes.Equal(tr.blk[:], zeroBlock[:]) {
|
if bytes.Equal(tr.blk[:], zeroBlock[:]) {
|
||||||
if _, err := io.ReadFull(tr.r, tr.blk[:]); err != nil {
|
n, err = io.ReadFull(tr.r, tr.blk[:])
|
||||||
|
if tr.RawAccounting && (err == nil || err == io.EOF) {
|
||||||
|
tr.rawBytes.Write(tr.blk[:n])
|
||||||
|
}
|
||||||
|
if err != nil {
|
||||||
return nil, nil, err // EOF is okay here; exactly 1 block of zeros read
|
return nil, nil, err // EOF is okay here; exactly 1 block of zeros read
|
||||||
}
|
}
|
||||||
if bytes.Equal(tr.blk[:], zeroBlock[:]) {
|
if bytes.Equal(tr.blk[:], zeroBlock[:]) {
|
||||||
|
@ -497,6 +550,9 @@ func (tr *Reader) readOldGNUSparseMap(hdr *Header, blk *block) (sparseDatas, err
|
||||||
if _, err := mustReadFull(tr.r, blk[:]); err != nil {
|
if _, err := mustReadFull(tr.r, blk[:]); err != nil {
|
||||||
return nil, err
|
return nil, err
|
||||||
}
|
}
|
||||||
|
if tr.RawAccounting {
|
||||||
|
tr.rawBytes.Write(blk[:])
|
||||||
|
}
|
||||||
s = blk.Sparse()
|
s = blk.Sparse()
|
||||||
continue
|
continue
|
||||||
}
|
}
|
||||||
|
@ -828,12 +884,20 @@ func tryReadFull(r io.Reader, b []byte) (n int, err error) {
|
||||||
}
|
}
|
||||||
|
|
||||||
// discard skips n bytes in r, reporting an error if unable to do so.
|
// discard skips n bytes in r, reporting an error if unable to do so.
|
||||||
func discard(r io.Reader, n int64) error {
|
func discard(tr *Reader, n int64) error {
|
||||||
|
var seekSkipped, copySkipped int64
|
||||||
|
var err error
|
||||||
|
r := tr.r
|
||||||
|
if tr.RawAccounting {
|
||||||
|
|
||||||
|
copySkipped, err = io.CopyN(tr.rawBytes, tr.r, n)
|
||||||
|
goto out
|
||||||
|
}
|
||||||
|
|
||||||
// If possible, Seek to the last byte before the end of the data section.
|
// If possible, Seek to the last byte before the end of the data section.
|
||||||
// Do this because Seek is often lazy about reporting errors; this will mask
|
// Do this because Seek is often lazy about reporting errors; this will mask
|
||||||
// the fact that the stream may be truncated. We can rely on the
|
// the fact that the stream may be truncated. We can rely on the
|
||||||
// io.CopyN done shortly afterwards to trigger any IO errors.
|
// io.CopyN done shortly afterwards to trigger any IO errors.
|
||||||
var seekSkipped int64 // Number of bytes skipped via Seek
|
|
||||||
if sr, ok := r.(io.Seeker); ok && n > 1 {
|
if sr, ok := r.(io.Seeker); ok && n > 1 {
|
||||||
// Not all io.Seeker can actually Seek. For example, os.Stdin implements
|
// Not all io.Seeker can actually Seek. For example, os.Stdin implements
|
||||||
// io.Seeker, but calling Seek always returns an error and performs
|
// io.Seeker, but calling Seek always returns an error and performs
|
||||||
|
@ -850,7 +914,8 @@ func discard(r io.Reader, n int64) error {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
copySkipped, err := io.CopyN(ioutil.Discard, r, n-seekSkipped)
|
copySkipped, err = io.CopyN(ioutil.Discard, r, n-seekSkipped)
|
||||||
|
out:
|
||||||
if err == io.EOF && seekSkipped+copySkipped < n {
|
if err == io.EOF && seekSkipped+copySkipped < n {
|
||||||
err = io.ErrUnexpectedEOF
|
err = io.ErrUnexpectedEOF
|
||||||
}
|
}
|
||||||
|
|
|
@ -8,7 +8,6 @@ import (
|
||||||
"bytes"
|
"bytes"
|
||||||
"errors"
|
"errors"
|
||||||
"fmt"
|
"fmt"
|
||||||
"internal/testenv"
|
|
||||||
"io"
|
"io"
|
||||||
"io/ioutil"
|
"io/ioutil"
|
||||||
"math"
|
"math"
|
||||||
|
@ -16,6 +15,7 @@ import (
|
||||||
"path"
|
"path"
|
||||||
"path/filepath"
|
"path/filepath"
|
||||||
"reflect"
|
"reflect"
|
||||||
|
"runtime"
|
||||||
"strings"
|
"strings"
|
||||||
"testing"
|
"testing"
|
||||||
"time"
|
"time"
|
||||||
|
@ -260,8 +260,10 @@ func TestFileInfoHeaderDir(t *testing.T) {
|
||||||
}
|
}
|
||||||
|
|
||||||
func TestFileInfoHeaderSymlink(t *testing.T) {
|
func TestFileInfoHeaderSymlink(t *testing.T) {
|
||||||
testenv.MustHaveSymlink(t)
|
switch runtime.GOOS {
|
||||||
|
case "android", "nacl", "plan9", "windows":
|
||||||
|
t.Skip("symlinks not supported")
|
||||||
|
}
|
||||||
tmpdir, err := ioutil.TempDir("", "TestFileInfoHeaderSymlink")
|
tmpdir, err := ioutil.TempDir("", "TestFileInfoHeaderSymlink")
|
||||||
if err != nil {
|
if err != nil {
|
||||||
t.Fatal(err)
|
t.Fatal(err)
|
||||||
|
|
Loading…
Reference in a new issue