2015-02-11 13:08:03 +00:00
|
|
|
// Copyright 2009 The Go Authors. All rights reserved.
|
|
|
|
// Use of this source code is governed by a BSD-style
|
|
|
|
// license that can be found in the LICENSE file.
|
|
|
|
|
|
|
|
package tar
|
|
|
|
|
|
|
|
import (
|
|
|
|
"bytes"
|
|
|
|
"io"
|
|
|
|
"io/ioutil"
|
|
|
|
"strconv"
|
|
|
|
"strings"
|
|
|
|
"time"
|
|
|
|
)
|
|
|
|
|
2018-09-05 21:04:10 +00:00
|
|
|
// Reader provides sequential access to the contents of a tar archive.
|
|
|
|
// Reader.Next advances to the next file in the archive (including the first),
|
|
|
|
// and then Reader can be treated as an io.Reader to access the file's data.
|
2015-02-11 13:08:03 +00:00
|
|
|
type Reader struct {
|
2018-09-05 21:04:10 +00:00
|
|
|
r io.Reader
|
|
|
|
pad int64 // Amount of padding (ignored) after current file entry
|
|
|
|
curr fileReader // Reader for current file entry
|
|
|
|
blk block // Buffer to use as temporary local storage
|
|
|
|
|
|
|
|
// err is a persistent error.
|
|
|
|
// It is only the responsibility of every exported method of Reader to
|
|
|
|
// ensure that this error is sticky.
|
|
|
|
err error
|
2015-10-01 08:35:15 +00:00
|
|
|
}
|
|
|
|
|
2018-09-05 21:04:10 +00:00
|
|
|
type fileReader interface {
|
2015-02-11 13:08:03 +00:00
|
|
|
io.Reader
|
2018-09-05 21:04:10 +00:00
|
|
|
fileState
|
2015-02-11 13:08:03 +00:00
|
|
|
|
2018-09-05 21:04:10 +00:00
|
|
|
WriteTo(io.Writer) (int64, error)
|
2015-09-28 23:38:16 +00:00
|
|
|
}
|
|
|
|
|
2015-02-11 13:08:03 +00:00
|
|
|
// NewReader creates a new Reader reading from r.
|
2018-09-05 21:04:10 +00:00
|
|
|
func NewReader(r io.Reader) *Reader {
|
|
|
|
return &Reader{r: r, curr: ®FileReader{r, 0}}
|
|
|
|
}
|
2015-02-11 13:08:03 +00:00
|
|
|
|
|
|
|
// Next advances to the next entry in the tar archive.
|
2018-09-05 21:04:10 +00:00
|
|
|
// The Header.Size determines how many bytes can be read for the next file.
|
|
|
|
// Any remaining data in the current file is automatically discarded.
|
2015-02-11 13:08:03 +00:00
|
|
|
//
|
|
|
|
// io.EOF is returned at the end of the input.
|
|
|
|
func (tr *Reader) Next() (*Header, error) {
|
|
|
|
if tr.err != nil {
|
2015-09-16 07:58:56 +00:00
|
|
|
return nil, tr.err
|
2015-02-11 13:08:03 +00:00
|
|
|
}
|
2018-09-05 21:04:10 +00:00
|
|
|
hdr, err := tr.next()
|
|
|
|
tr.err = err
|
|
|
|
return hdr, err
|
|
|
|
}
|
2015-09-16 07:58:56 +00:00
|
|
|
|
2018-09-05 21:04:10 +00:00
|
|
|
func (tr *Reader) next() (*Header, error) {
|
|
|
|
var paxHdrs map[string]string
|
|
|
|
var gnuLongName, gnuLongLink string
|
2015-09-16 07:58:56 +00:00
|
|
|
|
|
|
|
// Externally, Next iterates through the tar archive as if it is a series of
|
|
|
|
// files. Internally, the tar format often uses fake "files" to add meta
|
|
|
|
// data that describes the next file. These meta data "files" should not
|
|
|
|
// normally be visible to the outside. As such, this loop iterates through
|
|
|
|
// one or more "header files" until it finds a "normal file".
|
2018-09-05 21:04:10 +00:00
|
|
|
format := FormatUSTAR | FormatPAX | FormatGNU
|
2015-09-16 07:58:56 +00:00
|
|
|
for {
|
2018-09-05 21:04:10 +00:00
|
|
|
// Discard the remainder of the file and any padding.
|
|
|
|
if err := discard(tr.r, tr.curr.PhysicalRemaining()); err != nil {
|
|
|
|
return nil, err
|
|
|
|
}
|
|
|
|
if _, err := tryReadFull(tr.r, tr.blk[:tr.pad]); err != nil {
|
|
|
|
return nil, err
|
2015-06-13 08:53:06 +00:00
|
|
|
}
|
2018-09-05 21:04:10 +00:00
|
|
|
tr.pad = 0
|
2015-09-16 07:58:56 +00:00
|
|
|
|
2018-09-05 21:04:10 +00:00
|
|
|
hdr, rawHdr, err := tr.readHeader()
|
|
|
|
if err != nil {
|
|
|
|
return nil, err
|
2015-06-13 08:53:06 +00:00
|
|
|
}
|
2018-09-05 21:04:10 +00:00
|
|
|
if err := tr.handleRegularFile(hdr); err != nil {
|
|
|
|
return nil, err
|
|
|
|
}
|
|
|
|
format.mayOnlyBe(hdr.Format)
|
|
|
|
|
2015-09-16 07:58:56 +00:00
|
|
|
// Check for PAX/GNU special headers and files.
|
|
|
|
switch hdr.Typeflag {
|
2018-09-05 21:04:10 +00:00
|
|
|
case TypeXHeader, TypeXGlobalHeader:
|
|
|
|
format.mayOnlyBe(FormatPAX)
|
|
|
|
paxHdrs, err = parsePAX(tr)
|
|
|
|
if err != nil {
|
|
|
|
return nil, err
|
2015-08-27 18:52:06 +00:00
|
|
|
}
|
2018-09-05 21:04:10 +00:00
|
|
|
if hdr.Typeflag == TypeXGlobalHeader {
|
|
|
|
mergePAX(hdr, paxHdrs)
|
|
|
|
return &Header{
|
|
|
|
Name: hdr.Name,
|
|
|
|
Typeflag: hdr.Typeflag,
|
|
|
|
Xattrs: hdr.Xattrs,
|
|
|
|
PAXRecords: hdr.PAXRecords,
|
|
|
|
Format: format,
|
|
|
|
}, nil
|
2015-09-28 23:38:16 +00:00
|
|
|
}
|
2018-09-05 21:04:10 +00:00
|
|
|
continue // This is a meta header affecting the next header
|
|
|
|
case TypeGNULongName, TypeGNULongLink:
|
|
|
|
format.mayOnlyBe(FormatGNU)
|
|
|
|
realname, err := ioutil.ReadAll(tr)
|
|
|
|
if err != nil {
|
|
|
|
return nil, err
|
2015-02-19 21:49:06 +00:00
|
|
|
}
|
2015-09-16 07:58:56 +00:00
|
|
|
|
|
|
|
var p parser
|
|
|
|
switch hdr.Typeflag {
|
|
|
|
case TypeGNULongName:
|
2018-09-05 21:04:10 +00:00
|
|
|
gnuLongName = p.parseString(realname)
|
2015-09-16 07:58:56 +00:00
|
|
|
case TypeGNULongLink:
|
2018-09-05 21:04:10 +00:00
|
|
|
gnuLongLink = p.parseString(realname)
|
2015-09-16 07:58:56 +00:00
|
|
|
}
|
2018-09-05 21:04:10 +00:00
|
|
|
continue // This is a meta header affecting the next header
|
2015-09-16 07:58:56 +00:00
|
|
|
default:
|
2018-09-05 21:04:10 +00:00
|
|
|
// The old GNU sparse format is handled here since it is technically
|
|
|
|
// just a regular file with additional attributes.
|
2015-09-16 07:58:56 +00:00
|
|
|
|
2018-09-05 21:04:10 +00:00
|
|
|
if err := mergePAX(hdr, paxHdrs); err != nil {
|
2015-02-19 21:49:06 +00:00
|
|
|
return nil, err
|
|
|
|
}
|
2018-09-05 21:04:10 +00:00
|
|
|
if gnuLongName != "" {
|
|
|
|
hdr.Name = gnuLongName
|
|
|
|
}
|
|
|
|
if gnuLongLink != "" {
|
|
|
|
hdr.Linkname = gnuLongLink
|
|
|
|
}
|
|
|
|
if hdr.Typeflag == TypeRegA {
|
|
|
|
if strings.HasSuffix(hdr.Name, "/") {
|
|
|
|
hdr.Typeflag = TypeDir // Legacy archives use trailing slash for directories
|
|
|
|
} else {
|
|
|
|
hdr.Typeflag = TypeReg
|
2015-09-16 07:58:56 +00:00
|
|
|
}
|
|
|
|
}
|
2018-09-05 21:04:10 +00:00
|
|
|
|
|
|
|
// The extended headers may have updated the size.
|
|
|
|
// Thus, setup the regFileReader again after merging PAX headers.
|
|
|
|
if err := tr.handleRegularFile(hdr); err != nil {
|
|
|
|
return nil, err
|
|
|
|
}
|
|
|
|
|
|
|
|
// Sparse formats rely on being able to read from the logical data
|
|
|
|
// section; there must be a preceding call to handleRegularFile.
|
|
|
|
if err := tr.handleSparseFile(hdr, rawHdr); err != nil {
|
|
|
|
return nil, err
|
|
|
|
}
|
|
|
|
|
|
|
|
// Set the final guess at the format.
|
|
|
|
if format.has(FormatUSTAR) && format.has(FormatPAX) {
|
|
|
|
format.mayOnlyBe(FormatUSTAR)
|
|
|
|
}
|
|
|
|
hdr.Format = format
|
|
|
|
return hdr, nil // This is a file, so stop
|
2015-02-19 21:49:06 +00:00
|
|
|
}
|
2015-02-11 13:08:03 +00:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2018-09-05 21:04:10 +00:00
|
|
|
// handleRegularFile sets up the current file reader and padding such that it
|
|
|
|
// can only read the following logical data section. It will properly handle
|
|
|
|
// special headers that contain no data section.
|
|
|
|
func (tr *Reader) handleRegularFile(hdr *Header) error {
|
|
|
|
nb := hdr.Size
|
|
|
|
if isHeaderOnlyType(hdr.Typeflag) {
|
|
|
|
nb = 0
|
2015-02-11 13:08:03 +00:00
|
|
|
}
|
2018-09-05 21:04:10 +00:00
|
|
|
if nb < 0 {
|
|
|
|
return ErrHeader
|
2015-02-11 13:08:03 +00:00
|
|
|
}
|
|
|
|
|
2018-09-05 21:04:10 +00:00
|
|
|
tr.pad = blockPadding(nb)
|
|
|
|
tr.curr = ®FileReader{r: tr.r, nb: nb}
|
|
|
|
return nil
|
|
|
|
}
|
|
|
|
|
|
|
|
// handleSparseFile checks if the current file is a sparse format of any type
|
|
|
|
// and sets the curr reader appropriately.
|
|
|
|
func (tr *Reader) handleSparseFile(hdr *Header, rawHdr *block) error {
|
|
|
|
var spd sparseDatas
|
|
|
|
var err error
|
|
|
|
if hdr.Typeflag == TypeGNUSparse {
|
|
|
|
spd, err = tr.readOldGNUSparseMap(hdr, rawHdr)
|
|
|
|
} else {
|
|
|
|
spd, err = tr.readGNUSparsePAXHeaders(hdr)
|
2015-02-11 13:08:03 +00:00
|
|
|
}
|
2018-09-05 21:04:10 +00:00
|
|
|
|
|
|
|
// If sp is non-nil, then this is a sparse file.
|
|
|
|
// Note that it is possible for len(sp) == 0.
|
|
|
|
if err == nil && spd != nil {
|
|
|
|
if isHeaderOnlyType(hdr.Typeflag) || !validateSparseEntries(spd, hdr.Size) {
|
|
|
|
return ErrHeader
|
2015-02-11 13:08:03 +00:00
|
|
|
}
|
2018-09-05 21:04:10 +00:00
|
|
|
sph := invertSparseEntries(spd, hdr.Size)
|
|
|
|
tr.curr = &sparseFileReader{tr.curr, sph, 0}
|
|
|
|
}
|
|
|
|
return err
|
|
|
|
}
|
|
|
|
|
|
|
|
// readGNUSparsePAXHeaders checks the PAX headers for GNU sparse headers.
|
|
|
|
// If they are found, then this function reads the sparse map and returns it.
|
|
|
|
// This assumes that 0.0 headers have already been converted to 0.1 headers
|
|
|
|
// by the PAX header parsing logic.
|
|
|
|
func (tr *Reader) readGNUSparsePAXHeaders(hdr *Header) (sparseDatas, error) {
|
|
|
|
// Identify the version of GNU headers.
|
|
|
|
var is1x0 bool
|
|
|
|
major, minor := hdr.PAXRecords[paxGNUSparseMajor], hdr.PAXRecords[paxGNUSparseMinor]
|
|
|
|
switch {
|
|
|
|
case major == "0" && (minor == "0" || minor == "1"):
|
|
|
|
is1x0 = false
|
|
|
|
case major == "1" && minor == "0":
|
|
|
|
is1x0 = true
|
|
|
|
case major != "" || minor != "":
|
|
|
|
return nil, nil // Unknown GNU sparse PAX version
|
|
|
|
case hdr.PAXRecords[paxGNUSparseMap] != "":
|
|
|
|
is1x0 = false // 0.0 and 0.1 did not have explicit version records, so guess
|
|
|
|
default:
|
|
|
|
return nil, nil // Not a PAX format GNU sparse file.
|
|
|
|
}
|
|
|
|
hdr.Format.mayOnlyBe(FormatPAX)
|
|
|
|
|
|
|
|
// Update hdr from GNU sparse PAX headers.
|
|
|
|
if name := hdr.PAXRecords[paxGNUSparseName]; name != "" {
|
|
|
|
hdr.Name = name
|
|
|
|
}
|
|
|
|
size := hdr.PAXRecords[paxGNUSparseSize]
|
|
|
|
if size == "" {
|
|
|
|
size = hdr.PAXRecords[paxGNUSparseRealSize]
|
|
|
|
}
|
|
|
|
if size != "" {
|
|
|
|
n, err := strconv.ParseInt(size, 10, 64)
|
2015-02-11 13:08:03 +00:00
|
|
|
if err != nil {
|
|
|
|
return nil, ErrHeader
|
|
|
|
}
|
2018-09-05 21:04:10 +00:00
|
|
|
hdr.Size = n
|
2015-02-11 13:08:03 +00:00
|
|
|
}
|
|
|
|
|
2018-09-05 21:04:10 +00:00
|
|
|
// Read the sparse map according to the appropriate format.
|
|
|
|
if is1x0 {
|
|
|
|
return readGNUSparseMap1x0(tr.curr)
|
2015-02-11 13:08:03 +00:00
|
|
|
}
|
2018-09-05 21:04:10 +00:00
|
|
|
return readGNUSparseMap0x1(hdr.PAXRecords)
|
2015-02-11 13:08:03 +00:00
|
|
|
}
|
|
|
|
|
2018-09-05 21:04:10 +00:00
|
|
|
// mergePAX merges paxHdrs into hdr for all relevant fields of Header.
|
|
|
|
func mergePAX(hdr *Header, paxHdrs map[string]string) (err error) {
|
|
|
|
for k, v := range paxHdrs {
|
|
|
|
if v == "" {
|
|
|
|
continue // Keep the original USTAR value
|
|
|
|
}
|
|
|
|
var id64 int64
|
2015-02-11 13:08:03 +00:00
|
|
|
switch k {
|
|
|
|
case paxPath:
|
|
|
|
hdr.Name = v
|
|
|
|
case paxLinkpath:
|
|
|
|
hdr.Linkname = v
|
|
|
|
case paxUname:
|
|
|
|
hdr.Uname = v
|
2018-09-05 21:04:10 +00:00
|
|
|
case paxGname:
|
|
|
|
hdr.Gname = v
|
2015-02-11 13:08:03 +00:00
|
|
|
case paxUid:
|
2018-09-05 21:04:10 +00:00
|
|
|
id64, err = strconv.ParseInt(v, 10, 64)
|
|
|
|
hdr.Uid = int(id64) // Integer overflow possible
|
2015-02-11 13:08:03 +00:00
|
|
|
case paxGid:
|
2018-09-05 21:04:10 +00:00
|
|
|
id64, err = strconv.ParseInt(v, 10, 64)
|
|
|
|
hdr.Gid = int(id64) // Integer overflow possible
|
2015-02-11 13:08:03 +00:00
|
|
|
case paxAtime:
|
2018-09-05 21:04:10 +00:00
|
|
|
hdr.AccessTime, err = parsePAXTime(v)
|
2015-02-11 13:08:03 +00:00
|
|
|
case paxMtime:
|
2018-09-05 21:04:10 +00:00
|
|
|
hdr.ModTime, err = parsePAXTime(v)
|
2015-02-11 13:08:03 +00:00
|
|
|
case paxCtime:
|
2018-09-05 21:04:10 +00:00
|
|
|
hdr.ChangeTime, err = parsePAXTime(v)
|
2015-02-11 13:08:03 +00:00
|
|
|
case paxSize:
|
2018-09-05 21:04:10 +00:00
|
|
|
hdr.Size, err = strconv.ParseInt(v, 10, 64)
|
2015-02-11 13:08:03 +00:00
|
|
|
default:
|
2018-09-05 21:04:10 +00:00
|
|
|
if strings.HasPrefix(k, paxSchilyXattr) {
|
2015-02-11 13:08:03 +00:00
|
|
|
if hdr.Xattrs == nil {
|
|
|
|
hdr.Xattrs = make(map[string]string)
|
|
|
|
}
|
2018-09-05 21:04:10 +00:00
|
|
|
hdr.Xattrs[k[len(paxSchilyXattr):]] = v
|
2015-02-11 13:08:03 +00:00
|
|
|
}
|
|
|
|
}
|
|
|
|
if err != nil {
|
2018-09-05 21:04:10 +00:00
|
|
|
return ErrHeader
|
2015-02-11 13:08:03 +00:00
|
|
|
}
|
|
|
|
}
|
2018-09-05 21:04:10 +00:00
|
|
|
hdr.PAXRecords = paxHdrs
|
|
|
|
return nil
|
2015-02-11 13:08:03 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
// parsePAX parses PAX headers.
|
|
|
|
// If an extended header (type 'x') is invalid, ErrHeader is returned
|
|
|
|
func parsePAX(r io.Reader) (map[string]string, error) {
|
|
|
|
buf, err := ioutil.ReadAll(r)
|
|
|
|
if err != nil {
|
|
|
|
return nil, err
|
|
|
|
}
|
2015-10-01 08:35:15 +00:00
|
|
|
sbuf := string(buf)
|
2015-02-11 13:08:03 +00:00
|
|
|
|
|
|
|
// For GNU PAX sparse format 0.0 support.
|
2018-09-05 21:04:10 +00:00
|
|
|
// This function transforms the sparse format 0.0 headers into format 0.1
|
|
|
|
// headers since 0.0 headers were not PAX compliant.
|
|
|
|
var sparseMap []string
|
2015-02-11 13:08:03 +00:00
|
|
|
|
2018-09-05 21:04:10 +00:00
|
|
|
paxHdrs := make(map[string]string)
|
2015-10-01 08:35:15 +00:00
|
|
|
for len(sbuf) > 0 {
|
|
|
|
key, value, residual, err := parsePAXRecord(sbuf)
|
|
|
|
if err != nil {
|
2015-02-11 13:08:03 +00:00
|
|
|
return nil, ErrHeader
|
|
|
|
}
|
2015-10-01 08:35:15 +00:00
|
|
|
sbuf = residual
|
2015-02-11 13:08:03 +00:00
|
|
|
|
2018-09-05 21:04:10 +00:00
|
|
|
switch key {
|
|
|
|
case paxGNUSparseOffset, paxGNUSparseNumBytes:
|
|
|
|
// Validate sparse header order and value.
|
|
|
|
if (len(sparseMap)%2 == 0 && key != paxGNUSparseOffset) ||
|
|
|
|
(len(sparseMap)%2 == 1 && key != paxGNUSparseNumBytes) ||
|
|
|
|
strings.Contains(value, ",") {
|
|
|
|
return nil, ErrHeader
|
2015-12-02 23:41:44 +00:00
|
|
|
}
|
2018-09-05 21:04:10 +00:00
|
|
|
sparseMap = append(sparseMap, value)
|
|
|
|
default:
|
|
|
|
paxHdrs[key] = value
|
2015-02-11 13:08:03 +00:00
|
|
|
}
|
|
|
|
}
|
2018-09-05 21:04:10 +00:00
|
|
|
if len(sparseMap) > 0 {
|
|
|
|
paxHdrs[paxGNUSparseMap] = strings.Join(sparseMap, ",")
|
2015-10-01 08:04:24 +00:00
|
|
|
}
|
2018-09-05 21:04:10 +00:00
|
|
|
return paxHdrs, nil
|
2015-02-11 13:08:03 +00:00
|
|
|
}
|
|
|
|
|
2015-10-01 08:04:24 +00:00
|
|
|
// readHeader reads the next block header and assumes that the underlying reader
|
2018-09-05 21:04:10 +00:00
|
|
|
// is already aligned to a block boundary. It returns the raw block of the
|
|
|
|
// header in case further processing is required.
|
2015-10-01 08:04:24 +00:00
|
|
|
//
|
|
|
|
// The err will be set to io.EOF only when one of the following occurs:
|
|
|
|
// * Exactly 0 bytes are read and EOF is hit.
|
|
|
|
// * Exactly 1 block of zeros is read and EOF is hit.
|
|
|
|
// * At least 2 blocks of zeros are read.
|
2018-09-05 21:04:10 +00:00
|
|
|
func (tr *Reader) readHeader() (*Header, *block, error) {
|
2015-02-11 13:08:03 +00:00
|
|
|
// Two blocks of zero bytes marks the end of the archive.
|
2018-09-05 21:04:10 +00:00
|
|
|
if _, err := io.ReadFull(tr.r, tr.blk[:]); err != nil {
|
|
|
|
return nil, nil, err // EOF is okay here; exactly 0 bytes read
|
|
|
|
}
|
|
|
|
if bytes.Equal(tr.blk[:], zeroBlock[:]) {
|
|
|
|
if _, err := io.ReadFull(tr.r, tr.blk[:]); err != nil {
|
|
|
|
return nil, nil, err // EOF is okay here; exactly 1 block of zeros read
|
2015-02-19 21:49:06 +00:00
|
|
|
}
|
2018-09-05 21:04:10 +00:00
|
|
|
if bytes.Equal(tr.blk[:], zeroBlock[:]) {
|
|
|
|
return nil, nil, io.EOF // normal EOF; exactly 2 block of zeros read
|
2015-02-11 13:08:03 +00:00
|
|
|
}
|
2018-09-05 21:04:10 +00:00
|
|
|
return nil, nil, ErrHeader // Zero block and then non-zero block
|
2015-02-11 13:08:03 +00:00
|
|
|
}
|
|
|
|
|
2018-09-05 21:04:10 +00:00
|
|
|
// Verify the header matches a known format.
|
|
|
|
format := tr.blk.GetFormat()
|
|
|
|
if format == FormatUnknown {
|
|
|
|
return nil, nil, ErrHeader
|
2015-02-11 13:08:03 +00:00
|
|
|
}
|
|
|
|
|
2015-10-01 08:35:15 +00:00
|
|
|
var p parser
|
2015-02-11 13:08:03 +00:00
|
|
|
hdr := new(Header)
|
|
|
|
|
2018-09-05 21:04:10 +00:00
|
|
|
// Unpack the V7 header.
|
|
|
|
v7 := tr.blk.V7()
|
|
|
|
hdr.Typeflag = v7.TypeFlag()[0]
|
|
|
|
hdr.Name = p.parseString(v7.Name())
|
|
|
|
hdr.Linkname = p.parseString(v7.LinkName())
|
|
|
|
hdr.Size = p.parseNumeric(v7.Size())
|
|
|
|
hdr.Mode = p.parseNumeric(v7.Mode())
|
|
|
|
hdr.Uid = int(p.parseNumeric(v7.UID()))
|
|
|
|
hdr.Gid = int(p.parseNumeric(v7.GID()))
|
|
|
|
hdr.ModTime = time.Unix(p.parseNumeric(v7.ModTime()), 0)
|
|
|
|
|
|
|
|
// Unpack format specific fields.
|
|
|
|
if format > formatV7 {
|
|
|
|
ustar := tr.blk.USTAR()
|
|
|
|
hdr.Uname = p.parseString(ustar.UserName())
|
|
|
|
hdr.Gname = p.parseString(ustar.GroupName())
|
|
|
|
hdr.Devmajor = p.parseNumeric(ustar.DevMajor())
|
|
|
|
hdr.Devminor = p.parseNumeric(ustar.DevMinor())
|
|
|
|
|
2015-02-11 13:08:03 +00:00
|
|
|
var prefix string
|
2018-09-05 21:04:10 +00:00
|
|
|
switch {
|
|
|
|
case format.has(FormatUSTAR | FormatPAX):
|
|
|
|
hdr.Format = format
|
|
|
|
ustar := tr.blk.USTAR()
|
|
|
|
prefix = p.parseString(ustar.Prefix())
|
|
|
|
|
|
|
|
// For Format detection, check if block is properly formatted since
|
|
|
|
// the parser is more liberal than what USTAR actually permits.
|
|
|
|
notASCII := func(r rune) bool { return r >= 0x80 }
|
|
|
|
if bytes.IndexFunc(tr.blk[:], notASCII) >= 0 {
|
|
|
|
hdr.Format = FormatUnknown // Non-ASCII characters in block.
|
|
|
|
}
|
|
|
|
nul := func(b []byte) bool { return int(b[len(b)-1]) == 0 }
|
|
|
|
if !(nul(v7.Size()) && nul(v7.Mode()) && nul(v7.UID()) && nul(v7.GID()) &&
|
|
|
|
nul(v7.ModTime()) && nul(ustar.DevMajor()) && nul(ustar.DevMinor())) {
|
|
|
|
hdr.Format = FormatUnknown // Numeric fields must end in NUL
|
|
|
|
}
|
|
|
|
case format.has(formatSTAR):
|
|
|
|
star := tr.blk.STAR()
|
|
|
|
prefix = p.parseString(star.Prefix())
|
|
|
|
hdr.AccessTime = time.Unix(p.parseNumeric(star.AccessTime()), 0)
|
|
|
|
hdr.ChangeTime = time.Unix(p.parseNumeric(star.ChangeTime()), 0)
|
|
|
|
case format.has(FormatGNU):
|
|
|
|
hdr.Format = format
|
|
|
|
var p2 parser
|
|
|
|
gnu := tr.blk.GNU()
|
|
|
|
if b := gnu.AccessTime(); b[0] != 0 {
|
|
|
|
hdr.AccessTime = time.Unix(p2.parseNumeric(b), 0)
|
|
|
|
}
|
|
|
|
if b := gnu.ChangeTime(); b[0] != 0 {
|
|
|
|
hdr.ChangeTime = time.Unix(p2.parseNumeric(b), 0)
|
|
|
|
}
|
|
|
|
|
|
|
|
// Prior to Go1.8, the Writer had a bug where it would output
|
|
|
|
// an invalid tar file in certain rare situations because the logic
|
|
|
|
// incorrectly believed that the old GNU format had a prefix field.
|
|
|
|
// This is wrong and leads to an output file that mangles the
|
|
|
|
// atime and ctime fields, which are often left unused.
|
|
|
|
//
|
|
|
|
// In order to continue reading tar files created by former, buggy
|
|
|
|
// versions of Go, we skeptically parse the atime and ctime fields.
|
|
|
|
// If we are unable to parse them and the prefix field looks like
|
|
|
|
// an ASCII string, then we fallback on the pre-Go1.8 behavior
|
|
|
|
// of treating these fields as the USTAR prefix field.
|
|
|
|
//
|
|
|
|
// Note that this will not use the fallback logic for all possible
|
|
|
|
// files generated by a pre-Go1.8 toolchain. If the generated file
|
|
|
|
// happened to have a prefix field that parses as valid
|
|
|
|
// atime and ctime fields (e.g., when they are valid octal strings),
|
|
|
|
// then it is impossible to distinguish between an valid GNU file
|
|
|
|
// and an invalid pre-Go1.8 file.
|
|
|
|
//
|
|
|
|
// See https://golang.org/issues/12594
|
|
|
|
// See https://golang.org/issues/21005
|
|
|
|
if p2.err != nil {
|
|
|
|
hdr.AccessTime, hdr.ChangeTime = time.Time{}, time.Time{}
|
|
|
|
ustar := tr.blk.USTAR()
|
|
|
|
if s := p.parseString(ustar.Prefix()); isASCII(s) {
|
|
|
|
prefix = s
|
|
|
|
}
|
|
|
|
hdr.Format = FormatUnknown // Buggy file is not GNU
|
|
|
|
}
|
2015-02-11 13:08:03 +00:00
|
|
|
}
|
|
|
|
if len(prefix) > 0 {
|
|
|
|
hdr.Name = prefix + "/" + hdr.Name
|
|
|
|
}
|
|
|
|
}
|
2018-09-05 21:04:10 +00:00
|
|
|
return hdr, &tr.blk, p.err
|
|
|
|
}
|
2015-02-11 13:08:03 +00:00
|
|
|
|
2018-09-05 21:04:10 +00:00
|
|
|
// readOldGNUSparseMap reads the sparse map from the old GNU sparse format.
|
|
|
|
// The sparse map is stored in the tar header if it's small enough.
|
|
|
|
// If it's larger than four entries, then one or more extension headers are used
|
|
|
|
// to store the rest of the sparse map.
|
|
|
|
//
|
|
|
|
// The Header.Size does not reflect the size of any extended headers used.
|
|
|
|
// Thus, this function will read from the raw io.Reader to fetch extra headers.
|
|
|
|
// This method mutates blk in the process.
|
|
|
|
func (tr *Reader) readOldGNUSparseMap(hdr *Header, blk *block) (sparseDatas, error) {
|
|
|
|
// Make sure that the input format is GNU.
|
|
|
|
// Unfortunately, the STAR format also has a sparse header format that uses
|
|
|
|
// the same type flag but has a completely different layout.
|
|
|
|
if blk.GetFormat() != FormatGNU {
|
|
|
|
return nil, ErrHeader
|
2015-02-11 13:08:03 +00:00
|
|
|
}
|
2018-09-05 21:04:10 +00:00
|
|
|
hdr.Format.mayOnlyBe(FormatGNU)
|
2015-02-11 13:08:03 +00:00
|
|
|
|
2015-10-01 08:35:15 +00:00
|
|
|
var p parser
|
2018-09-05 21:04:10 +00:00
|
|
|
hdr.Size = p.parseNumeric(blk.GNU().RealSize())
|
|
|
|
if p.err != nil {
|
|
|
|
return nil, p.err
|
2015-02-11 13:08:03 +00:00
|
|
|
}
|
2018-09-05 21:04:10 +00:00
|
|
|
s := blk.GNU().Sparse()
|
|
|
|
spd := make(sparseDatas, 0, s.MaxEntries())
|
|
|
|
for {
|
|
|
|
for i := 0; i < s.MaxEntries(); i++ {
|
|
|
|
// This termination condition is identical to GNU and BSD tar.
|
|
|
|
if s.Entry(i).Offset()[0] == 0x00 {
|
|
|
|
break // Don't return, need to process extended headers (even if empty)
|
2015-02-19 23:07:22 +00:00
|
|
|
}
|
2018-09-05 21:04:10 +00:00
|
|
|
offset := p.parseNumeric(s.Entry(i).Offset())
|
|
|
|
length := p.parseNumeric(s.Entry(i).Length())
|
2015-10-01 08:35:15 +00:00
|
|
|
if p.err != nil {
|
2018-09-05 21:04:10 +00:00
|
|
|
return nil, p.err
|
2015-02-11 13:08:03 +00:00
|
|
|
}
|
2018-09-05 21:04:10 +00:00
|
|
|
spd = append(spd, sparseEntry{Offset: offset, Length: length})
|
|
|
|
}
|
|
|
|
|
|
|
|
if s.IsExtended()[0] > 0 {
|
|
|
|
// There are more entries. Read an extension header and parse its entries.
|
|
|
|
if _, err := mustReadFull(tr.r, blk[:]); err != nil {
|
|
|
|
return nil, err
|
2015-02-11 13:08:03 +00:00
|
|
|
}
|
2018-09-05 21:04:10 +00:00
|
|
|
s = blk.Sparse()
|
|
|
|
continue
|
2015-02-11 13:08:03 +00:00
|
|
|
}
|
2018-09-05 21:04:10 +00:00
|
|
|
return spd, nil // Done
|
2015-02-11 13:08:03 +00:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
archive/tar: properly handle header-only "files" in Reader
Certain special type-flags, specifically 1, 2, 3, 4, 5, 6,
do not have a data section. Thus, regardless of what the size field
says, we should not attempt to read any data for these special types.
The relevant PAX and USTAR specification says:
<<<
If the typeflag field is set to specify a file to be of type 1 (a link)
or 2 (a symbolic link), the size field shall be specified as zero.
If the typeflag field is set to specify a file of type 5 (directory),
the size field shall be interpreted as described under the definition
of that record type. No data logical records are stored for types 1, 2, or 5.
If the typeflag field is set to 3 (character special file),
4 (block special file), or 6 (FIFO), the meaning of the size field is
unspecified by this volume of POSIX.1-2008, and no data logical records shall
be stored on the medium.
Additionally, for type 6, the size field shall be ignored when reading.
If the typeflag field is set to any other value, the number of logical
records written following the header shall be (size+511)/512, ignoring
any fraction in the result of the division.
>>>
Contrary to the specification, we do not assert that the size field
is zero for type 1 and 2 since we liberally accept non-conforming formats.
Change-Id: I666b601597cb9d7a50caa081813d90ca9cfc52ed
Reviewed-on: https://go-review.googlesource.com/16614
Reviewed-by: Brad Fitzpatrick <bradfitz@golang.org>
Run-TryBot: Brad Fitzpatrick <bradfitz@golang.org>
TryBot-Result: Gobot Gobot <gobot@golang.org>
2015-11-04 02:12:31 +00:00
|
|
|
// readGNUSparseMap1x0 reads the sparse map as stored in GNU's PAX sparse format
|
|
|
|
// version 1.0. The format of the sparse map consists of a series of
|
|
|
|
// newline-terminated numeric fields. The first field is the number of entries
|
|
|
|
// and is always present. Following this are the entries, consisting of two
|
2018-09-05 21:04:10 +00:00
|
|
|
// fields (offset, length). This function must stop reading at the end
|
archive/tar: properly handle header-only "files" in Reader
Certain special type-flags, specifically 1, 2, 3, 4, 5, 6,
do not have a data section. Thus, regardless of what the size field
says, we should not attempt to read any data for these special types.
The relevant PAX and USTAR specification says:
<<<
If the typeflag field is set to specify a file to be of type 1 (a link)
or 2 (a symbolic link), the size field shall be specified as zero.
If the typeflag field is set to specify a file of type 5 (directory),
the size field shall be interpreted as described under the definition
of that record type. No data logical records are stored for types 1, 2, or 5.
If the typeflag field is set to 3 (character special file),
4 (block special file), or 6 (FIFO), the meaning of the size field is
unspecified by this volume of POSIX.1-2008, and no data logical records shall
be stored on the medium.
Additionally, for type 6, the size field shall be ignored when reading.
If the typeflag field is set to any other value, the number of logical
records written following the header shall be (size+511)/512, ignoring
any fraction in the result of the division.
>>>
Contrary to the specification, we do not assert that the size field
is zero for type 1 and 2 since we liberally accept non-conforming formats.
Change-Id: I666b601597cb9d7a50caa081813d90ca9cfc52ed
Reviewed-on: https://go-review.googlesource.com/16614
Reviewed-by: Brad Fitzpatrick <bradfitz@golang.org>
Run-TryBot: Brad Fitzpatrick <bradfitz@golang.org>
TryBot-Result: Gobot Gobot <gobot@golang.org>
2015-11-04 02:12:31 +00:00
|
|
|
// boundary of the block containing the last newline.
|
|
|
|
//
|
|
|
|
// Note that the GNU manual says that numeric values should be encoded in octal
|
|
|
|
// format. However, the GNU tar utility itself outputs these values in decimal.
|
|
|
|
// As such, this library treats values as being encoded in decimal.
|
2018-09-05 21:04:10 +00:00
|
|
|
func readGNUSparseMap1x0(r io.Reader) (sparseDatas, error) {
|
|
|
|
var (
|
|
|
|
cntNewline int64
|
|
|
|
buf bytes.Buffer
|
|
|
|
blk block
|
|
|
|
)
|
|
|
|
|
|
|
|
// feedTokens copies data in blocks from r into buf until there are
|
archive/tar: properly handle header-only "files" in Reader
Certain special type-flags, specifically 1, 2, 3, 4, 5, 6,
do not have a data section. Thus, regardless of what the size field
says, we should not attempt to read any data for these special types.
The relevant PAX and USTAR specification says:
<<<
If the typeflag field is set to specify a file to be of type 1 (a link)
or 2 (a symbolic link), the size field shall be specified as zero.
If the typeflag field is set to specify a file of type 5 (directory),
the size field shall be interpreted as described under the definition
of that record type. No data logical records are stored for types 1, 2, or 5.
If the typeflag field is set to 3 (character special file),
4 (block special file), or 6 (FIFO), the meaning of the size field is
unspecified by this volume of POSIX.1-2008, and no data logical records shall
be stored on the medium.
Additionally, for type 6, the size field shall be ignored when reading.
If the typeflag field is set to any other value, the number of logical
records written following the header shall be (size+511)/512, ignoring
any fraction in the result of the division.
>>>
Contrary to the specification, we do not assert that the size field
is zero for type 1 and 2 since we liberally accept non-conforming formats.
Change-Id: I666b601597cb9d7a50caa081813d90ca9cfc52ed
Reviewed-on: https://go-review.googlesource.com/16614
Reviewed-by: Brad Fitzpatrick <bradfitz@golang.org>
Run-TryBot: Brad Fitzpatrick <bradfitz@golang.org>
TryBot-Result: Gobot Gobot <gobot@golang.org>
2015-11-04 02:12:31 +00:00
|
|
|
// at least cnt newlines in buf. It will not read more blocks than needed.
|
2018-09-05 21:04:10 +00:00
|
|
|
feedTokens := func(n int64) error {
|
|
|
|
for cntNewline < n {
|
|
|
|
if _, err := mustReadFull(r, blk[:]); err != nil {
|
archive/tar: properly handle header-only "files" in Reader
Certain special type-flags, specifically 1, 2, 3, 4, 5, 6,
do not have a data section. Thus, regardless of what the size field
says, we should not attempt to read any data for these special types.
The relevant PAX and USTAR specification says:
<<<
If the typeflag field is set to specify a file to be of type 1 (a link)
or 2 (a symbolic link), the size field shall be specified as zero.
If the typeflag field is set to specify a file of type 5 (directory),
the size field shall be interpreted as described under the definition
of that record type. No data logical records are stored for types 1, 2, or 5.
If the typeflag field is set to 3 (character special file),
4 (block special file), or 6 (FIFO), the meaning of the size field is
unspecified by this volume of POSIX.1-2008, and no data logical records shall
be stored on the medium.
Additionally, for type 6, the size field shall be ignored when reading.
If the typeflag field is set to any other value, the number of logical
records written following the header shall be (size+511)/512, ignoring
any fraction in the result of the division.
>>>
Contrary to the specification, we do not assert that the size field
is zero for type 1 and 2 since we liberally accept non-conforming formats.
Change-Id: I666b601597cb9d7a50caa081813d90ca9cfc52ed
Reviewed-on: https://go-review.googlesource.com/16614
Reviewed-by: Brad Fitzpatrick <bradfitz@golang.org>
Run-TryBot: Brad Fitzpatrick <bradfitz@golang.org>
TryBot-Result: Gobot Gobot <gobot@golang.org>
2015-11-04 02:12:31 +00:00
|
|
|
return err
|
2015-02-19 21:49:06 +00:00
|
|
|
}
|
2018-09-05 21:04:10 +00:00
|
|
|
buf.Write(blk[:])
|
archive/tar: properly handle header-only "files" in Reader
Certain special type-flags, specifically 1, 2, 3, 4, 5, 6,
do not have a data section. Thus, regardless of what the size field
says, we should not attempt to read any data for these special types.
The relevant PAX and USTAR specification says:
<<<
If the typeflag field is set to specify a file to be of type 1 (a link)
or 2 (a symbolic link), the size field shall be specified as zero.
If the typeflag field is set to specify a file of type 5 (directory),
the size field shall be interpreted as described under the definition
of that record type. No data logical records are stored for types 1, 2, or 5.
If the typeflag field is set to 3 (character special file),
4 (block special file), or 6 (FIFO), the meaning of the size field is
unspecified by this volume of POSIX.1-2008, and no data logical records shall
be stored on the medium.
Additionally, for type 6, the size field shall be ignored when reading.
If the typeflag field is set to any other value, the number of logical
records written following the header shall be (size+511)/512, ignoring
any fraction in the result of the division.
>>>
Contrary to the specification, we do not assert that the size field
is zero for type 1 and 2 since we liberally accept non-conforming formats.
Change-Id: I666b601597cb9d7a50caa081813d90ca9cfc52ed
Reviewed-on: https://go-review.googlesource.com/16614
Reviewed-by: Brad Fitzpatrick <bradfitz@golang.org>
Run-TryBot: Brad Fitzpatrick <bradfitz@golang.org>
TryBot-Result: Gobot Gobot <gobot@golang.org>
2015-11-04 02:12:31 +00:00
|
|
|
for _, c := range blk {
|
|
|
|
if c == '\n' {
|
|
|
|
cntNewline++
|
|
|
|
}
|
2015-02-11 13:08:03 +00:00
|
|
|
}
|
|
|
|
}
|
archive/tar: properly handle header-only "files" in Reader
Certain special type-flags, specifically 1, 2, 3, 4, 5, 6,
do not have a data section. Thus, regardless of what the size field
says, we should not attempt to read any data for these special types.
The relevant PAX and USTAR specification says:
<<<
If the typeflag field is set to specify a file to be of type 1 (a link)
or 2 (a symbolic link), the size field shall be specified as zero.
If the typeflag field is set to specify a file of type 5 (directory),
the size field shall be interpreted as described under the definition
of that record type. No data logical records are stored for types 1, 2, or 5.
If the typeflag field is set to 3 (character special file),
4 (block special file), or 6 (FIFO), the meaning of the size field is
unspecified by this volume of POSIX.1-2008, and no data logical records shall
be stored on the medium.
Additionally, for type 6, the size field shall be ignored when reading.
If the typeflag field is set to any other value, the number of logical
records written following the header shall be (size+511)/512, ignoring
any fraction in the result of the division.
>>>
Contrary to the specification, we do not assert that the size field
is zero for type 1 and 2 since we liberally accept non-conforming formats.
Change-Id: I666b601597cb9d7a50caa081813d90ca9cfc52ed
Reviewed-on: https://go-review.googlesource.com/16614
Reviewed-by: Brad Fitzpatrick <bradfitz@golang.org>
Run-TryBot: Brad Fitzpatrick <bradfitz@golang.org>
TryBot-Result: Gobot Gobot <gobot@golang.org>
2015-11-04 02:12:31 +00:00
|
|
|
return nil
|
|
|
|
}
|
2015-02-11 13:08:03 +00:00
|
|
|
|
archive/tar: properly handle header-only "files" in Reader
Certain special type-flags, specifically 1, 2, 3, 4, 5, 6,
do not have a data section. Thus, regardless of what the size field
says, we should not attempt to read any data for these special types.
The relevant PAX and USTAR specification says:
<<<
If the typeflag field is set to specify a file to be of type 1 (a link)
or 2 (a symbolic link), the size field shall be specified as zero.
If the typeflag field is set to specify a file of type 5 (directory),
the size field shall be interpreted as described under the definition
of that record type. No data logical records are stored for types 1, 2, or 5.
If the typeflag field is set to 3 (character special file),
4 (block special file), or 6 (FIFO), the meaning of the size field is
unspecified by this volume of POSIX.1-2008, and no data logical records shall
be stored on the medium.
Additionally, for type 6, the size field shall be ignored when reading.
If the typeflag field is set to any other value, the number of logical
records written following the header shall be (size+511)/512, ignoring
any fraction in the result of the division.
>>>
Contrary to the specification, we do not assert that the size field
is zero for type 1 and 2 since we liberally accept non-conforming formats.
Change-Id: I666b601597cb9d7a50caa081813d90ca9cfc52ed
Reviewed-on: https://go-review.googlesource.com/16614
Reviewed-by: Brad Fitzpatrick <bradfitz@golang.org>
Run-TryBot: Brad Fitzpatrick <bradfitz@golang.org>
TryBot-Result: Gobot Gobot <gobot@golang.org>
2015-11-04 02:12:31 +00:00
|
|
|
// nextToken gets the next token delimited by a newline. This assumes that
|
|
|
|
// at least one newline exists in the buffer.
|
2018-09-05 21:04:10 +00:00
|
|
|
nextToken := func() string {
|
archive/tar: properly handle header-only "files" in Reader
Certain special type-flags, specifically 1, 2, 3, 4, 5, 6,
do not have a data section. Thus, regardless of what the size field
says, we should not attempt to read any data for these special types.
The relevant PAX and USTAR specification says:
<<<
If the typeflag field is set to specify a file to be of type 1 (a link)
or 2 (a symbolic link), the size field shall be specified as zero.
If the typeflag field is set to specify a file of type 5 (directory),
the size field shall be interpreted as described under the definition
of that record type. No data logical records are stored for types 1, 2, or 5.
If the typeflag field is set to 3 (character special file),
4 (block special file), or 6 (FIFO), the meaning of the size field is
unspecified by this volume of POSIX.1-2008, and no data logical records shall
be stored on the medium.
Additionally, for type 6, the size field shall be ignored when reading.
If the typeflag field is set to any other value, the number of logical
records written following the header shall be (size+511)/512, ignoring
any fraction in the result of the division.
>>>
Contrary to the specification, we do not assert that the size field
is zero for type 1 and 2 since we liberally accept non-conforming formats.
Change-Id: I666b601597cb9d7a50caa081813d90ca9cfc52ed
Reviewed-on: https://go-review.googlesource.com/16614
Reviewed-by: Brad Fitzpatrick <bradfitz@golang.org>
Run-TryBot: Brad Fitzpatrick <bradfitz@golang.org>
TryBot-Result: Gobot Gobot <gobot@golang.org>
2015-11-04 02:12:31 +00:00
|
|
|
cntNewline--
|
|
|
|
tok, _ := buf.ReadString('\n')
|
2018-09-05 21:04:10 +00:00
|
|
|
return strings.TrimRight(tok, "\n")
|
2015-02-11 13:08:03 +00:00
|
|
|
}
|
|
|
|
|
archive/tar: properly handle header-only "files" in Reader
Certain special type-flags, specifically 1, 2, 3, 4, 5, 6,
do not have a data section. Thus, regardless of what the size field
says, we should not attempt to read any data for these special types.
The relevant PAX and USTAR specification says:
<<<
If the typeflag field is set to specify a file to be of type 1 (a link)
or 2 (a symbolic link), the size field shall be specified as zero.
If the typeflag field is set to specify a file of type 5 (directory),
the size field shall be interpreted as described under the definition
of that record type. No data logical records are stored for types 1, 2, or 5.
If the typeflag field is set to 3 (character special file),
4 (block special file), or 6 (FIFO), the meaning of the size field is
unspecified by this volume of POSIX.1-2008, and no data logical records shall
be stored on the medium.
Additionally, for type 6, the size field shall be ignored when reading.
If the typeflag field is set to any other value, the number of logical
records written following the header shall be (size+511)/512, ignoring
any fraction in the result of the division.
>>>
Contrary to the specification, we do not assert that the size field
is zero for type 1 and 2 since we liberally accept non-conforming formats.
Change-Id: I666b601597cb9d7a50caa081813d90ca9cfc52ed
Reviewed-on: https://go-review.googlesource.com/16614
Reviewed-by: Brad Fitzpatrick <bradfitz@golang.org>
Run-TryBot: Brad Fitzpatrick <bradfitz@golang.org>
TryBot-Result: Gobot Gobot <gobot@golang.org>
2015-11-04 02:12:31 +00:00
|
|
|
// Parse for the number of entries.
|
|
|
|
// Use integer overflow resistant math to check this.
|
|
|
|
if err := feedTokens(1); err != nil {
|
2015-02-11 13:08:03 +00:00
|
|
|
return nil, err
|
|
|
|
}
|
archive/tar: properly handle header-only "files" in Reader
Certain special type-flags, specifically 1, 2, 3, 4, 5, 6,
do not have a data section. Thus, regardless of what the size field
says, we should not attempt to read any data for these special types.
The relevant PAX and USTAR specification says:
<<<
If the typeflag field is set to specify a file to be of type 1 (a link)
or 2 (a symbolic link), the size field shall be specified as zero.
If the typeflag field is set to specify a file of type 5 (directory),
the size field shall be interpreted as described under the definition
of that record type. No data logical records are stored for types 1, 2, or 5.
If the typeflag field is set to 3 (character special file),
4 (block special file), or 6 (FIFO), the meaning of the size field is
unspecified by this volume of POSIX.1-2008, and no data logical records shall
be stored on the medium.
Additionally, for type 6, the size field shall be ignored when reading.
If the typeflag field is set to any other value, the number of logical
records written following the header shall be (size+511)/512, ignoring
any fraction in the result of the division.
>>>
Contrary to the specification, we do not assert that the size field
is zero for type 1 and 2 since we liberally accept non-conforming formats.
Change-Id: I666b601597cb9d7a50caa081813d90ca9cfc52ed
Reviewed-on: https://go-review.googlesource.com/16614
Reviewed-by: Brad Fitzpatrick <bradfitz@golang.org>
Run-TryBot: Brad Fitzpatrick <bradfitz@golang.org>
TryBot-Result: Gobot Gobot <gobot@golang.org>
2015-11-04 02:12:31 +00:00
|
|
|
numEntries, err := strconv.ParseInt(nextToken(), 10, 0) // Intentionally parse as native int
|
|
|
|
if err != nil || numEntries < 0 || int(2*numEntries) < int(numEntries) {
|
|
|
|
return nil, ErrHeader
|
2015-02-19 21:49:06 +00:00
|
|
|
}
|
2015-02-11 13:08:03 +00:00
|
|
|
|
archive/tar: properly handle header-only "files" in Reader
Certain special type-flags, specifically 1, 2, 3, 4, 5, 6,
do not have a data section. Thus, regardless of what the size field
says, we should not attempt to read any data for these special types.
The relevant PAX and USTAR specification says:
<<<
If the typeflag field is set to specify a file to be of type 1 (a link)
or 2 (a symbolic link), the size field shall be specified as zero.
If the typeflag field is set to specify a file of type 5 (directory),
the size field shall be interpreted as described under the definition
of that record type. No data logical records are stored for types 1, 2, or 5.
If the typeflag field is set to 3 (character special file),
4 (block special file), or 6 (FIFO), the meaning of the size field is
unspecified by this volume of POSIX.1-2008, and no data logical records shall
be stored on the medium.
Additionally, for type 6, the size field shall be ignored when reading.
If the typeflag field is set to any other value, the number of logical
records written following the header shall be (size+511)/512, ignoring
any fraction in the result of the division.
>>>
Contrary to the specification, we do not assert that the size field
is zero for type 1 and 2 since we liberally accept non-conforming formats.
Change-Id: I666b601597cb9d7a50caa081813d90ca9cfc52ed
Reviewed-on: https://go-review.googlesource.com/16614
Reviewed-by: Brad Fitzpatrick <bradfitz@golang.org>
Run-TryBot: Brad Fitzpatrick <bradfitz@golang.org>
TryBot-Result: Gobot Gobot <gobot@golang.org>
2015-11-04 02:12:31 +00:00
|
|
|
// Parse for all member entries.
|
|
|
|
// numEntries is trusted after this since a potential attacker must have
|
|
|
|
// committed resources proportional to what this library used.
|
|
|
|
if err := feedTokens(2 * numEntries); err != nil {
|
2015-02-11 13:08:03 +00:00
|
|
|
return nil, err
|
|
|
|
}
|
2018-09-05 21:04:10 +00:00
|
|
|
spd := make(sparseDatas, 0, numEntries)
|
2015-02-11 13:08:03 +00:00
|
|
|
for i := int64(0); i < numEntries; i++ {
|
2018-09-05 21:04:10 +00:00
|
|
|
offset, err1 := strconv.ParseInt(nextToken(), 10, 64)
|
|
|
|
length, err2 := strconv.ParseInt(nextToken(), 10, 64)
|
|
|
|
if err1 != nil || err2 != nil {
|
archive/tar: properly handle header-only "files" in Reader
Certain special type-flags, specifically 1, 2, 3, 4, 5, 6,
do not have a data section. Thus, regardless of what the size field
says, we should not attempt to read any data for these special types.
The relevant PAX and USTAR specification says:
<<<
If the typeflag field is set to specify a file to be of type 1 (a link)
or 2 (a symbolic link), the size field shall be specified as zero.
If the typeflag field is set to specify a file of type 5 (directory),
the size field shall be interpreted as described under the definition
of that record type. No data logical records are stored for types 1, 2, or 5.
If the typeflag field is set to 3 (character special file),
4 (block special file), or 6 (FIFO), the meaning of the size field is
unspecified by this volume of POSIX.1-2008, and no data logical records shall
be stored on the medium.
Additionally, for type 6, the size field shall be ignored when reading.
If the typeflag field is set to any other value, the number of logical
records written following the header shall be (size+511)/512, ignoring
any fraction in the result of the division.
>>>
Contrary to the specification, we do not assert that the size field
is zero for type 1 and 2 since we liberally accept non-conforming formats.
Change-Id: I666b601597cb9d7a50caa081813d90ca9cfc52ed
Reviewed-on: https://go-review.googlesource.com/16614
Reviewed-by: Brad Fitzpatrick <bradfitz@golang.org>
Run-TryBot: Brad Fitzpatrick <bradfitz@golang.org>
TryBot-Result: Gobot Gobot <gobot@golang.org>
2015-11-04 02:12:31 +00:00
|
|
|
return nil, ErrHeader
|
2015-02-11 13:08:03 +00:00
|
|
|
}
|
2018-09-05 21:04:10 +00:00
|
|
|
spd = append(spd, sparseEntry{Offset: offset, Length: length})
|
2015-02-11 13:08:03 +00:00
|
|
|
}
|
2018-09-05 21:04:10 +00:00
|
|
|
return spd, nil
|
2015-02-11 13:08:03 +00:00
|
|
|
}
|
|
|
|
|
2015-10-06 08:04:18 +00:00
|
|
|
// readGNUSparseMap0x1 reads the sparse map as stored in GNU's PAX sparse format
|
|
|
|
// version 0.1. The sparse map is stored in the PAX headers.
|
2018-09-05 21:04:10 +00:00
|
|
|
func readGNUSparseMap0x1(paxHdrs map[string]string) (sparseDatas, error) {
|
2015-10-06 08:04:18 +00:00
|
|
|
// Get number of entries.
|
|
|
|
// Use integer overflow resistant math to check this.
|
2018-09-05 21:04:10 +00:00
|
|
|
numEntriesStr := paxHdrs[paxGNUSparseNumBlocks]
|
2015-10-06 08:04:18 +00:00
|
|
|
numEntries, err := strconv.ParseInt(numEntriesStr, 10, 0) // Intentionally parse as native int
|
|
|
|
if err != nil || numEntries < 0 || int(2*numEntries) < int(numEntries) {
|
2015-02-11 13:08:03 +00:00
|
|
|
return nil, ErrHeader
|
|
|
|
}
|
|
|
|
|
2015-10-06 08:04:18 +00:00
|
|
|
// There should be two numbers in sparseMap for each entry.
|
2018-09-05 21:04:10 +00:00
|
|
|
sparseMap := strings.Split(paxHdrs[paxGNUSparseMap], ",")
|
|
|
|
if len(sparseMap) == 1 && sparseMap[0] == "" {
|
|
|
|
sparseMap = sparseMap[:0]
|
|
|
|
}
|
2015-02-11 13:08:03 +00:00
|
|
|
if int64(len(sparseMap)) != 2*numEntries {
|
|
|
|
return nil, ErrHeader
|
|
|
|
}
|
|
|
|
|
2015-10-06 08:04:18 +00:00
|
|
|
// Loop through the entries in the sparse map.
|
|
|
|
// numEntries is trusted now.
|
2018-09-05 21:04:10 +00:00
|
|
|
spd := make(sparseDatas, 0, numEntries)
|
|
|
|
for len(sparseMap) >= 2 {
|
|
|
|
offset, err1 := strconv.ParseInt(sparseMap[0], 10, 64)
|
|
|
|
length, err2 := strconv.ParseInt(sparseMap[1], 10, 64)
|
|
|
|
if err1 != nil || err2 != nil {
|
2015-02-11 13:08:03 +00:00
|
|
|
return nil, ErrHeader
|
|
|
|
}
|
2018-09-05 21:04:10 +00:00
|
|
|
spd = append(spd, sparseEntry{Offset: offset, Length: length})
|
|
|
|
sparseMap = sparseMap[2:]
|
2015-02-11 13:08:03 +00:00
|
|
|
}
|
2018-09-05 21:04:10 +00:00
|
|
|
return spd, nil
|
2015-02-11 13:08:03 +00:00
|
|
|
}
|
|
|
|
|
2018-09-05 21:04:10 +00:00
|
|
|
// Read reads from the current file in the tar archive.
|
|
|
|
// It returns (0, io.EOF) when it reaches the end of that file,
|
|
|
|
// until Next is called to advance to the next file.
|
|
|
|
//
|
|
|
|
// If the current file is sparse, then the regions marked as a hole
|
|
|
|
// are read back as NUL-bytes.
|
2015-12-17 07:10:14 +00:00
|
|
|
//
|
2018-09-05 21:04:10 +00:00
|
|
|
// Calling Read on special types like TypeLink, TypeSymlink, TypeChar,
|
|
|
|
// TypeBlock, TypeDir, and TypeFifo returns (0, io.EOF) regardless of what
|
2015-12-17 07:10:14 +00:00
|
|
|
// the Header.Size claims.
|
2018-09-05 21:04:10 +00:00
|
|
|
func (tr *Reader) Read(b []byte) (int, error) {
|
2015-10-01 10:08:18 +00:00
|
|
|
if tr.err != nil {
|
|
|
|
return 0, tr.err
|
|
|
|
}
|
2018-09-05 21:04:10 +00:00
|
|
|
n, err := tr.curr.Read(b)
|
2015-02-11 13:08:03 +00:00
|
|
|
if err != nil && err != io.EOF {
|
|
|
|
tr.err = err
|
|
|
|
}
|
2018-09-05 21:04:10 +00:00
|
|
|
return n, err
|
2015-02-11 13:08:03 +00:00
|
|
|
}
|
|
|
|
|
2018-09-05 21:04:10 +00:00
|
|
|
// writeTo writes the content of the current file to w.
|
|
|
|
// The bytes written matches the number of remaining bytes in the current file.
|
|
|
|
//
|
|
|
|
// If the current file is sparse and w is an io.WriteSeeker,
|
|
|
|
// then writeTo uses Seek to skip past holes defined in Header.SparseHoles,
|
|
|
|
// assuming that skipped regions are filled with NULs.
|
|
|
|
// This always writes the last byte to ensure w is the right size.
|
|
|
|
//
|
|
|
|
// TODO(dsnet): Re-export this when adding sparse file support.
|
|
|
|
// See https://golang.org/issue/22735
|
|
|
|
func (tr *Reader) writeTo(w io.Writer) (int64, error) {
|
|
|
|
if tr.err != nil {
|
|
|
|
return 0, tr.err
|
2015-02-11 13:08:03 +00:00
|
|
|
}
|
2018-09-05 21:04:10 +00:00
|
|
|
n, err := tr.curr.WriteTo(w)
|
|
|
|
if err != nil {
|
|
|
|
tr.err = err
|
2015-02-11 13:08:03 +00:00
|
|
|
}
|
2018-09-05 21:04:10 +00:00
|
|
|
return n, err
|
|
|
|
}
|
2015-02-11 13:08:03 +00:00
|
|
|
|
2018-09-05 21:04:10 +00:00
|
|
|
// regFileReader is a fileReader for reading data from a regular file entry.
|
|
|
|
type regFileReader struct {
|
|
|
|
r io.Reader // Underlying Reader
|
|
|
|
nb int64 // Number of remaining bytes to read
|
|
|
|
}
|
|
|
|
|
|
|
|
func (fr *regFileReader) Read(b []byte) (n int, err error) {
|
|
|
|
if int64(len(b)) > fr.nb {
|
|
|
|
b = b[:fr.nb]
|
2015-02-11 13:08:03 +00:00
|
|
|
}
|
2018-09-05 21:04:10 +00:00
|
|
|
if len(b) > 0 {
|
|
|
|
n, err = fr.r.Read(b)
|
|
|
|
fr.nb -= int64(n)
|
|
|
|
}
|
|
|
|
switch {
|
|
|
|
case err == io.EOF && fr.nb > 0:
|
|
|
|
return n, io.ErrUnexpectedEOF
|
|
|
|
case err == nil && fr.nb == 0:
|
|
|
|
return n, io.EOF
|
|
|
|
default:
|
|
|
|
return n, err
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
func (fr *regFileReader) WriteTo(w io.Writer) (int64, error) {
|
|
|
|
return io.Copy(w, struct{ io.Reader }{fr})
|
2015-02-11 13:08:03 +00:00
|
|
|
}
|
|
|
|
|
2018-09-05 21:04:10 +00:00
|
|
|
func (fr regFileReader) LogicalRemaining() int64 {
|
|
|
|
return fr.nb
|
|
|
|
}
|
|
|
|
|
|
|
|
func (fr regFileReader) PhysicalRemaining() int64 {
|
|
|
|
return fr.nb
|
|
|
|
}
|
|
|
|
|
|
|
|
// sparseFileReader is a fileReader for reading data from a sparse file entry.
|
|
|
|
type sparseFileReader struct {
|
|
|
|
fr fileReader // Underlying fileReader
|
|
|
|
sp sparseHoles // Normalized list of sparse holes
|
|
|
|
pos int64 // Current position in sparse file
|
2015-02-11 13:08:03 +00:00
|
|
|
}
|
|
|
|
|
2018-09-05 21:04:10 +00:00
|
|
|
func (sr *sparseFileReader) Read(b []byte) (n int, err error) {
|
|
|
|
finished := int64(len(b)) >= sr.LogicalRemaining()
|
|
|
|
if finished {
|
|
|
|
b = b[:sr.LogicalRemaining()]
|
2015-09-28 23:38:16 +00:00
|
|
|
}
|
|
|
|
|
2018-09-05 21:04:10 +00:00
|
|
|
b0 := b
|
|
|
|
endPos := sr.pos + int64(len(b))
|
|
|
|
for endPos > sr.pos && err == nil {
|
|
|
|
var nf int // Bytes read in fragment
|
|
|
|
holeStart, holeEnd := sr.sp[0].Offset, sr.sp[0].endOffset()
|
|
|
|
if sr.pos < holeStart { // In a data fragment
|
|
|
|
bf := b[:min(int64(len(b)), holeStart-sr.pos)]
|
|
|
|
nf, err = tryReadFull(sr.fr, bf)
|
|
|
|
} else { // In a hole fragment
|
|
|
|
bf := b[:min(int64(len(b)), holeEnd-sr.pos)]
|
|
|
|
nf, err = tryReadFull(zeroReader{}, bf)
|
|
|
|
}
|
|
|
|
b = b[nf:]
|
|
|
|
sr.pos += int64(nf)
|
|
|
|
if sr.pos >= holeEnd && len(sr.sp) > 1 {
|
|
|
|
sr.sp = sr.sp[1:] // Ensure last fragment always remains
|
2015-09-28 23:38:16 +00:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2018-09-05 21:04:10 +00:00
|
|
|
n = len(b0) - len(b)
|
|
|
|
switch {
|
|
|
|
case err == io.EOF:
|
|
|
|
return n, errMissData // Less data in dense file than sparse file
|
|
|
|
case err != nil:
|
|
|
|
return n, err
|
|
|
|
case sr.LogicalRemaining() == 0 && sr.PhysicalRemaining() > 0:
|
|
|
|
return n, errUnrefData // More data in dense file than sparse file
|
|
|
|
case finished:
|
|
|
|
return n, io.EOF
|
|
|
|
default:
|
|
|
|
return n, nil
|
2015-02-11 13:08:03 +00:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2018-09-05 21:04:10 +00:00
|
|
|
func (sr *sparseFileReader) WriteTo(w io.Writer) (n int64, err error) {
|
|
|
|
ws, ok := w.(io.WriteSeeker)
|
|
|
|
if ok {
|
|
|
|
if _, err := ws.Seek(0, io.SeekCurrent); err != nil {
|
|
|
|
ok = false // Not all io.Seeker can really seek
|
|
|
|
}
|
|
|
|
}
|
|
|
|
if !ok {
|
|
|
|
return io.Copy(w, struct{ io.Reader }{sr})
|
|
|
|
}
|
|
|
|
|
|
|
|
var writeLastByte bool
|
|
|
|
pos0 := sr.pos
|
|
|
|
for sr.LogicalRemaining() > 0 && !writeLastByte && err == nil {
|
|
|
|
var nf int64 // Size of fragment
|
|
|
|
holeStart, holeEnd := sr.sp[0].Offset, sr.sp[0].endOffset()
|
|
|
|
if sr.pos < holeStart { // In a data fragment
|
|
|
|
nf = holeStart - sr.pos
|
|
|
|
nf, err = io.CopyN(ws, sr.fr, nf)
|
|
|
|
} else { // In a hole fragment
|
|
|
|
nf = holeEnd - sr.pos
|
|
|
|
if sr.PhysicalRemaining() == 0 {
|
|
|
|
writeLastByte = true
|
|
|
|
nf--
|
|
|
|
}
|
|
|
|
_, err = ws.Seek(nf, io.SeekCurrent)
|
|
|
|
}
|
|
|
|
sr.pos += nf
|
|
|
|
if sr.pos >= holeEnd && len(sr.sp) > 1 {
|
|
|
|
sr.sp = sr.sp[1:] // Ensure last fragment always remains
|
|
|
|
}
|
2015-09-28 23:38:16 +00:00
|
|
|
}
|
|
|
|
|
2018-09-05 21:04:10 +00:00
|
|
|
// If the last fragment is a hole, then seek to 1-byte before EOF, and
|
|
|
|
// write a single byte to ensure the file is the right size.
|
|
|
|
if writeLastByte && err == nil {
|
|
|
|
_, err = ws.Write([]byte{0})
|
|
|
|
sr.pos++
|
2015-02-11 13:08:03 +00:00
|
|
|
}
|
2015-09-28 23:38:16 +00:00
|
|
|
|
2018-09-05 21:04:10 +00:00
|
|
|
n = sr.pos - pos0
|
|
|
|
switch {
|
|
|
|
case err == io.EOF:
|
|
|
|
return n, errMissData // Less data in dense file than sparse file
|
|
|
|
case err != nil:
|
|
|
|
return n, err
|
|
|
|
case sr.LogicalRemaining() == 0 && sr.PhysicalRemaining() > 0:
|
|
|
|
return n, errUnrefData // More data in dense file than sparse file
|
|
|
|
default:
|
|
|
|
return n, nil
|
2015-02-11 13:08:03 +00:00
|
|
|
}
|
2018-09-05 21:04:10 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
func (sr sparseFileReader) LogicalRemaining() int64 {
|
|
|
|
return sr.sp[len(sr.sp)-1].endOffset() - sr.pos
|
|
|
|
}
|
|
|
|
func (sr sparseFileReader) PhysicalRemaining() int64 {
|
|
|
|
return sr.fr.PhysicalRemaining()
|
|
|
|
}
|
2015-02-11 13:08:03 +00:00
|
|
|
|
2018-09-05 21:04:10 +00:00
|
|
|
type zeroReader struct{}
|
|
|
|
|
|
|
|
func (zeroReader) Read(b []byte) (int, error) {
|
|
|
|
for i := range b {
|
|
|
|
b[i] = 0
|
2015-02-11 13:08:03 +00:00
|
|
|
}
|
2018-09-05 21:04:10 +00:00
|
|
|
return len(b), nil
|
|
|
|
}
|
2015-02-11 13:08:03 +00:00
|
|
|
|
2018-09-05 21:04:10 +00:00
|
|
|
// mustReadFull is like io.ReadFull except it returns
|
|
|
|
// io.ErrUnexpectedEOF when io.EOF is hit before len(b) bytes are read.
|
|
|
|
func mustReadFull(r io.Reader, b []byte) (int, error) {
|
|
|
|
n, err := tryReadFull(r, b)
|
2015-09-28 23:38:16 +00:00
|
|
|
if err == io.EOF {
|
2018-09-05 21:04:10 +00:00
|
|
|
err = io.ErrUnexpectedEOF
|
2015-02-11 13:08:03 +00:00
|
|
|
}
|
2018-09-05 21:04:10 +00:00
|
|
|
return n, err
|
|
|
|
}
|
2015-02-11 13:08:03 +00:00
|
|
|
|
2018-09-05 21:04:10 +00:00
|
|
|
// tryReadFull is like io.ReadFull except it returns
|
|
|
|
// io.EOF when it is hit before len(b) bytes are read.
|
|
|
|
func tryReadFull(r io.Reader, b []byte) (n int, err error) {
|
|
|
|
for len(b) > n && err == nil {
|
|
|
|
var nn int
|
|
|
|
nn, err = r.Read(b[n:])
|
|
|
|
n += nn
|
|
|
|
}
|
|
|
|
if len(b) == n && err == io.EOF {
|
|
|
|
err = nil
|
2015-02-11 13:08:03 +00:00
|
|
|
}
|
2015-09-28 23:38:16 +00:00
|
|
|
return n, err
|
2015-02-11 13:08:03 +00:00
|
|
|
}
|
|
|
|
|
2018-09-05 21:04:10 +00:00
|
|
|
// discard skips n bytes in r, reporting an error if unable to do so.
|
|
|
|
func discard(r io.Reader, n int64) error {
|
|
|
|
// If possible, Seek to the last byte before the end of the data section.
|
|
|
|
// Do this because Seek is often lazy about reporting errors; this will mask
|
|
|
|
// the fact that the stream may be truncated. We can rely on the
|
|
|
|
// io.CopyN done shortly afterwards to trigger any IO errors.
|
|
|
|
var seekSkipped int64 // Number of bytes skipped via Seek
|
|
|
|
if sr, ok := r.(io.Seeker); ok && n > 1 {
|
|
|
|
// Not all io.Seeker can actually Seek. For example, os.Stdin implements
|
|
|
|
// io.Seeker, but calling Seek always returns an error and performs
|
|
|
|
// no action. Thus, we try an innocent seek to the current position
|
|
|
|
// to see if Seek is really supported.
|
|
|
|
pos1, err := sr.Seek(0, io.SeekCurrent)
|
|
|
|
if pos1 >= 0 && err == nil {
|
|
|
|
// Seek seems supported, so perform the real Seek.
|
|
|
|
pos2, err := sr.Seek(n-1, io.SeekCurrent)
|
|
|
|
if pos2 < 0 || err != nil {
|
|
|
|
return err
|
|
|
|
}
|
|
|
|
seekSkipped = pos2 - pos1
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
copySkipped, err := io.CopyN(ioutil.Discard, r, n-seekSkipped)
|
|
|
|
if err == io.EOF && seekSkipped+copySkipped < n {
|
|
|
|
err = io.ErrUnexpectedEOF
|
|
|
|
}
|
|
|
|
return err
|
2015-02-11 13:08:03 +00:00
|
|
|
}
|