mirror of
https://github.com/vbatts/tar-split.git
synced 2024-11-15 12:58:38 +00:00
archive/tar: convert Reader.Next to be loop based
Motivation for change: * Recursive logic is hard to follow, since it tends to apply things in reverse. On the other hand, the tar formats tend to describe meta headers as affecting the next entry. * Recursion also applies changes in the wrong order. Two test files are attached that use multiple headers. The previous Go behavior differs from what GNU and BSD tar do. Change-Id: Ic1557256fc1363c5cb26570e5d0b9f65a9e57341 Reviewed-on: https://go-review.googlesource.com/14624 Run-TryBot: Joe Tsai <joetsai@digital-static.net> TryBot-Result: Gobot Gobot <gobot@golang.org> Reviewed-by: Brad Fitzpatrick <bradfitz@golang.org>
This commit is contained in:
parent
64935a5f0f
commit
be9ac88117
4 changed files with 90 additions and 94 deletions
|
@ -138,8 +138,6 @@ func NewReader(r io.Reader) *Reader { return &Reader{r: r} }
|
||||||
//
|
//
|
||||||
// io.EOF is returned at the end of the input.
|
// io.EOF is returned at the end of the input.
|
||||||
func (tr *Reader) Next() (*Header, error) {
|
func (tr *Reader) Next() (*Header, error) {
|
||||||
var p parser
|
|
||||||
var hdr *Header
|
|
||||||
if tr.RawAccounting {
|
if tr.RawAccounting {
|
||||||
if tr.rawBytes == nil {
|
if tr.rawBytes == nil {
|
||||||
tr.rawBytes = bytes.NewBuffer(nil)
|
tr.rawBytes = bytes.NewBuffer(nil)
|
||||||
|
@ -147,114 +145,88 @@ func (tr *Reader) Next() (*Header, error) {
|
||||||
tr.rawBytes.Reset()
|
tr.rawBytes.Reset()
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
if tr.err == nil {
|
|
||||||
tr.skipUnread()
|
|
||||||
}
|
|
||||||
if tr.err != nil {
|
if tr.err != nil {
|
||||||
return hdr, tr.err
|
return nil, tr.err
|
||||||
}
|
}
|
||||||
hdr = tr.readHeader()
|
|
||||||
if hdr == nil {
|
var hdr *Header
|
||||||
return hdr, tr.err
|
var extHdrs map[string]string
|
||||||
}
|
|
||||||
// Check for PAX/GNU header.
|
// Externally, Next iterates through the tar archive as if it is a series of
|
||||||
switch hdr.Typeflag {
|
// files. Internally, the tar format often uses fake "files" to add meta
|
||||||
case TypeXHeader:
|
// data that describes the next file. These meta data "files" should not
|
||||||
// PAX extended header
|
// normally be visible to the outside. As such, this loop iterates through
|
||||||
headers, err := parsePAX(tr)
|
// one or more "header files" until it finds a "normal file".
|
||||||
if err != nil {
|
loop:
|
||||||
return nil, err
|
for {
|
||||||
}
|
tr.err = tr.skipUnread()
|
||||||
// We actually read the whole file,
|
|
||||||
// but this skips alignment padding
|
|
||||||
tr.skipUnread()
|
|
||||||
if tr.err != nil {
|
if tr.err != nil {
|
||||||
return nil, tr.err
|
return nil, tr.err
|
||||||
}
|
}
|
||||||
|
|
||||||
hdr = tr.readHeader()
|
hdr = tr.readHeader()
|
||||||
if hdr == nil {
|
if tr.err != nil {
|
||||||
return nil, tr.err
|
return nil, tr.err
|
||||||
}
|
}
|
||||||
mergePAX(hdr, headers)
|
// Check for PAX/GNU special headers and files.
|
||||||
|
switch hdr.Typeflag {
|
||||||
// Check for a PAX format sparse file
|
case TypeXHeader:
|
||||||
sp, err := tr.checkForGNUSparsePAXHeaders(hdr, headers)
|
extHdrs, tr.err = parsePAX(tr)
|
||||||
if err != nil {
|
|
||||||
tr.err = err
|
|
||||||
return nil, err
|
|
||||||
}
|
|
||||||
if sp != nil {
|
|
||||||
// Sparse files do not make sense when applied to the special header
|
|
||||||
// types that never have a data section.
|
|
||||||
if isHeaderOnlyType(hdr.Typeflag) {
|
|
||||||
tr.err = ErrHeader
|
|
||||||
return nil, tr.err
|
|
||||||
}
|
|
||||||
|
|
||||||
// Current file is a PAX format GNU sparse file.
|
|
||||||
// Set the current file reader to a sparse file reader.
|
|
||||||
tr.curr, tr.err = newSparseFileReader(tr.curr, sp, hdr.Size)
|
|
||||||
if tr.err != nil {
|
if tr.err != nil {
|
||||||
return nil, tr.err
|
return nil, tr.err
|
||||||
}
|
}
|
||||||
}
|
continue loop // This is a meta header affecting the next header
|
||||||
return hdr, nil
|
case TypeGNULongName, TypeGNULongLink:
|
||||||
case TypeGNULongName:
|
var realname []byte
|
||||||
// We have a GNU long name header. Its contents are the real file name.
|
realname, tr.err = ioutil.ReadAll(tr)
|
||||||
realname, err := ioutil.ReadAll(tr)
|
if tr.err != nil {
|
||||||
if err != nil {
|
return nil, tr.err
|
||||||
return nil, err
|
}
|
||||||
}
|
|
||||||
var buf []byte
|
if tr.RawAccounting {
|
||||||
if tr.RawAccounting {
|
if _, tr.err = tr.rawBytes.Write(realname); tr.err != nil {
|
||||||
if _, err = tr.rawBytes.Write(realname); err != nil {
|
return nil, tr.err
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Convert GNU extensions to use PAX headers.
|
||||||
|
if extHdrs == nil {
|
||||||
|
extHdrs = make(map[string]string)
|
||||||
|
}
|
||||||
|
var p parser
|
||||||
|
switch hdr.Typeflag {
|
||||||
|
case TypeGNULongName:
|
||||||
|
extHdrs[paxPath] = p.parseString(realname)
|
||||||
|
case TypeGNULongLink:
|
||||||
|
extHdrs[paxLinkpath] = p.parseString(realname)
|
||||||
|
}
|
||||||
|
if p.err != nil {
|
||||||
|
tr.err = p.err
|
||||||
|
return nil, tr.err
|
||||||
|
}
|
||||||
|
continue loop // This is a meta header affecting the next header
|
||||||
|
default:
|
||||||
|
mergePAX(hdr, extHdrs)
|
||||||
|
|
||||||
|
// Check for a PAX format sparse file
|
||||||
|
sp, err := tr.checkForGNUSparsePAXHeaders(hdr, extHdrs)
|
||||||
|
if err != nil {
|
||||||
|
tr.err = err
|
||||||
return nil, err
|
return nil, err
|
||||||
}
|
}
|
||||||
buf = make([]byte, tr.rawBytes.Len())
|
if sp != nil {
|
||||||
copy(buf[:], tr.RawBytes())
|
// Current file is a PAX format GNU sparse file.
|
||||||
}
|
// Set the current file reader to a sparse file reader.
|
||||||
hdr, err := tr.Next()
|
tr.curr, tr.err = newSparseFileReader(tr.curr, sp, hdr.Size)
|
||||||
// since the above call to Next() resets the buffer, we need to throw the bytes over
|
if tr.err != nil {
|
||||||
if tr.RawAccounting {
|
return nil, tr.err
|
||||||
buf = append(buf, tr.RawBytes()...)
|
}
|
||||||
if _, err = tr.rawBytes.Write(buf); err != nil {
|
|
||||||
return nil, err
|
|
||||||
}
|
}
|
||||||
|
break loop // This is a file, so stop
|
||||||
}
|
}
|
||||||
hdr.Name = p.parseString(realname)
|
|
||||||
if p.err != nil {
|
|
||||||
return nil, p.err
|
|
||||||
}
|
|
||||||
return hdr, nil
|
|
||||||
case TypeGNULongLink:
|
|
||||||
// We have a GNU long link header.
|
|
||||||
realname, err := ioutil.ReadAll(tr)
|
|
||||||
if err != nil {
|
|
||||||
return nil, err
|
|
||||||
}
|
|
||||||
var buf []byte
|
|
||||||
if tr.RawAccounting {
|
|
||||||
if _, err = tr.rawBytes.Write(realname); err != nil {
|
|
||||||
return nil, err
|
|
||||||
}
|
|
||||||
buf = make([]byte, tr.rawBytes.Len())
|
|
||||||
copy(buf[:], tr.RawBytes())
|
|
||||||
}
|
|
||||||
hdr, err := tr.Next()
|
|
||||||
// since the above call to Next() resets the buffer, we need to throw the bytes over
|
|
||||||
if tr.RawAccounting {
|
|
||||||
buf = append(buf, tr.RawBytes()...)
|
|
||||||
if _, err = tr.rawBytes.Write(buf); err != nil {
|
|
||||||
return nil, err
|
|
||||||
}
|
|
||||||
}
|
|
||||||
hdr.Name = p.parseString(realname)
|
|
||||||
if p.err != nil {
|
|
||||||
return nil, p.err
|
|
||||||
}
|
|
||||||
return hdr, nil
|
|
||||||
}
|
}
|
||||||
return hdr, tr.err
|
return hdr, nil
|
||||||
}
|
}
|
||||||
|
|
||||||
// checkForGNUSparsePAXHeaders checks the PAX headers for GNU sparse headers. If they are found, then
|
// checkForGNUSparsePAXHeaders checks the PAX headers for GNU sparse headers. If they are found, then
|
||||||
|
|
|
@ -288,6 +288,30 @@ var untarTests = []*untarTest{
|
||||||
},
|
},
|
||||||
},
|
},
|
||||||
},
|
},
|
||||||
|
{
|
||||||
|
// Matches the behavior of GNU, BSD, and STAR tar utilities.
|
||||||
|
file: "testdata/gnu-multi-hdrs.tar",
|
||||||
|
headers: []*Header{
|
||||||
|
{
|
||||||
|
Name: "GNU2/GNU2/long-path-name",
|
||||||
|
Linkname: "GNU4/GNU4/long-linkpath-name",
|
||||||
|
ModTime: time.Unix(0, 0),
|
||||||
|
Typeflag: '2',
|
||||||
|
},
|
||||||
|
},
|
||||||
|
},
|
||||||
|
{
|
||||||
|
// Matches the behavior of GNU and BSD tar utilities.
|
||||||
|
file: "testdata/pax-multi-hdrs.tar",
|
||||||
|
headers: []*Header{
|
||||||
|
{
|
||||||
|
Name: "bar",
|
||||||
|
Linkname: "PAX4/PAX4/long-linkpath-name",
|
||||||
|
ModTime: time.Unix(0, 0),
|
||||||
|
Typeflag: '2',
|
||||||
|
},
|
||||||
|
},
|
||||||
|
},
|
||||||
{
|
{
|
||||||
file: "testdata/neg-size.tar",
|
file: "testdata/neg-size.tar",
|
||||||
err: ErrHeader,
|
err: ErrHeader,
|
||||||
|
|
BIN
archive/tar/testdata/gnu-multi-hdrs.tar
vendored
Normal file
BIN
archive/tar/testdata/gnu-multi-hdrs.tar
vendored
Normal file
Binary file not shown.
BIN
archive/tar/testdata/pax-multi-hdrs.tar
vendored
Normal file
BIN
archive/tar/testdata/pax-multi-hdrs.tar
vendored
Normal file
Binary file not shown.
Loading…
Reference in a new issue