1
0
Fork 1
mirror of https://github.com/vbatts/tar-split.git synced 2024-11-26 01:35:39 +00:00

Merge pull request #31 from vbatts/tar-go1.6

Tar go1.6
This commit is contained in:
Vincent Batts 2016-02-15 09:41:56 -05:00
commit 862ccd05bc
13 changed files with 1407 additions and 755 deletions

View file

@ -327,3 +327,14 @@ func toASCII(s string) string {
} }
return buf.String() return buf.String()
} }
// isHeaderOnlyType checks if the given type flag is of the type that has no
// data section even if a size is specified.
func isHeaderOnlyType(flag byte) bool {
switch flag {
case TypeLink, TypeSymlink, TypeChar, TypeBlock, TypeDir, TypeFifo:
return true
default:
return false
}
}

View file

@ -26,7 +26,7 @@ func Example() {
}{ }{
{"readme.txt", "This archive contains some text files."}, {"readme.txt", "This archive contains some text files."},
{"gopher.txt", "Gopher names:\nGeorge\nGeoffrey\nGonzo"}, {"gopher.txt", "Gopher names:\nGeorge\nGeoffrey\nGonzo"},
{"todo.txt", "Get animal handling licence."}, {"todo.txt", "Get animal handling license."},
} }
for _, file := range files { for _, file := range files {
hdr := &tar.Header{ hdr := &tar.Header{
@ -76,5 +76,5 @@ func Example() {
// Geoffrey // Geoffrey
// Gonzo // Gonzo
// Contents of todo.txt: // Contents of todo.txt:
// Get animal handling licence. // Get animal handling license.
} }

View file

@ -12,6 +12,7 @@ import (
"errors" "errors"
"io" "io"
"io/ioutil" "io/ioutil"
"math"
"os" "os"
"strconv" "strconv"
"strings" "strings"
@ -39,6 +40,10 @@ type Reader struct {
rawBytes *bytes.Buffer // last raw bits rawBytes *bytes.Buffer // last raw bits
} }
type parser struct {
err error // Last error seen
}
// RawBytes accesses the raw bytes of the archive, apart from the file payload itself. // RawBytes accesses the raw bytes of the archive, apart from the file payload itself.
// This includes the header and padding. // This includes the header and padding.
// //
@ -70,12 +75,36 @@ type regFileReader struct {
nb int64 // number of unread bytes for current file entry nb int64 // number of unread bytes for current file entry
} }
// A sparseFileReader is a numBytesReader for reading sparse file data from a tar archive. // A sparseFileReader is a numBytesReader for reading sparse file data from a
// tar archive.
type sparseFileReader struct { type sparseFileReader struct {
rfr *regFileReader // reads the sparse-encoded file data rfr numBytesReader // Reads the sparse-encoded file data
sp []sparseEntry // the sparse map for the file sp []sparseEntry // The sparse map for the file
pos int64 // keeps track of file position pos int64 // Keeps track of file position
tot int64 // total size of the file total int64 // Total size of the file
}
// A sparseEntry holds a single entry in a sparse file's sparse map.
//
// Sparse files are represented using a series of sparseEntrys.
// Despite the name, a sparseEntry represents an actual data fragment that
// references data found in the underlying archive stream. All regions not
// covered by a sparseEntry are logically filled with zeros.
//
// For example, if the underlying raw file contains the 10-byte data:
// var compactData = "abcdefgh"
//
// And the sparse map has the following entries:
// var sp = []sparseEntry{
// {offset: 2, numBytes: 5} // Data fragment for [2..7]
// {offset: 18, numBytes: 3} // Data fragment for [18..21]
// }
//
// Then the content of the resulting sparse file with a "real" size of 25 is:
// var sparseData = "\x00"*2 + "abcde" + "\x00"*11 + "fgh" + "\x00"*4
type sparseEntry struct {
offset int64 // Starting position of the fragment
numBytes int64 // Length of the fragment
} }
// Keywords for GNU sparse files in a PAX extended header // Keywords for GNU sparse files in a PAX extended header
@ -109,7 +138,6 @@ func NewReader(r io.Reader) *Reader { return &Reader{r: r} }
// //
// io.EOF is returned at the end of the input. // io.EOF is returned at the end of the input.
func (tr *Reader) Next() (*Header, error) { func (tr *Reader) Next() (*Header, error) {
var hdr *Header
if tr.RawAccounting { if tr.RawAccounting {
if tr.rawBytes == nil { if tr.rawBytes == nil {
tr.rawBytes = bytes.NewBuffer(nil) tr.rawBytes = bytes.NewBuffer(nil)
@ -117,38 +145,72 @@ func (tr *Reader) Next() (*Header, error) {
tr.rawBytes.Reset() tr.rawBytes.Reset()
} }
} }
if tr.err == nil {
tr.skipUnread()
}
if tr.err != nil { if tr.err != nil {
return hdr, tr.err return nil, tr.err
} }
var hdr *Header
var extHdrs map[string]string
// Externally, Next iterates through the tar archive as if it is a series of
// files. Internally, the tar format often uses fake "files" to add meta
// data that describes the next file. These meta data "files" should not
// normally be visible to the outside. As such, this loop iterates through
// one or more "header files" until it finds a "normal file".
loop:
for {
tr.err = tr.skipUnread()
if tr.err != nil {
return nil, tr.err
}
hdr = tr.readHeader() hdr = tr.readHeader()
if hdr == nil { if tr.err != nil {
return hdr, tr.err return nil, tr.err
} }
// Check for PAX/GNU header. // Check for PAX/GNU special headers and files.
switch hdr.Typeflag { switch hdr.Typeflag {
case TypeXHeader: case TypeXHeader:
// PAX extended header extHdrs, tr.err = parsePAX(tr)
headers, err := parsePAX(tr)
if err != nil {
return nil, err
}
// We actually read the whole file,
// but this skips alignment padding
tr.skipUnread()
if tr.err != nil { if tr.err != nil {
return nil, tr.err return nil, tr.err
} }
hdr = tr.readHeader() continue loop // This is a meta header affecting the next header
if hdr == nil { case TypeGNULongName, TypeGNULongLink:
var realname []byte
realname, tr.err = ioutil.ReadAll(tr)
if tr.err != nil {
return nil, tr.err return nil, tr.err
} }
mergePAX(hdr, headers)
if tr.RawAccounting {
if _, tr.err = tr.rawBytes.Write(realname); tr.err != nil {
return nil, tr.err
}
}
// Convert GNU extensions to use PAX headers.
if extHdrs == nil {
extHdrs = make(map[string]string)
}
var p parser
switch hdr.Typeflag {
case TypeGNULongName:
extHdrs[paxPath] = p.parseString(realname)
case TypeGNULongLink:
extHdrs[paxLinkpath] = p.parseString(realname)
}
if p.err != nil {
tr.err = p.err
return nil, tr.err
}
continue loop // This is a meta header affecting the next header
default:
mergePAX(hdr, extHdrs)
// Check for a PAX format sparse file // Check for a PAX format sparse file
sp, err := tr.checkForGNUSparsePAXHeaders(hdr, headers) sp, err := tr.checkForGNUSparsePAXHeaders(hdr, extHdrs)
if err != nil { if err != nil {
tr.err = err tr.err = err
return nil, err return nil, err
@ -156,59 +218,15 @@ func (tr *Reader) Next() (*Header, error) {
if sp != nil { if sp != nil {
// Current file is a PAX format GNU sparse file. // Current file is a PAX format GNU sparse file.
// Set the current file reader to a sparse file reader. // Set the current file reader to a sparse file reader.
tr.curr = &sparseFileReader{rfr: tr.curr.(*regFileReader), sp: sp, tot: hdr.Size} tr.curr, tr.err = newSparseFileReader(tr.curr, sp, hdr.Size)
if tr.err != nil {
return nil, tr.err
}
}
break loop // This is a file, so stop
}
} }
return hdr, nil return hdr, nil
case TypeGNULongName:
// We have a GNU long name header. Its contents are the real file name.
realname, err := ioutil.ReadAll(tr)
if err != nil {
return nil, err
}
var buf []byte
if tr.RawAccounting {
if _, err = tr.rawBytes.Write(realname); err != nil {
return nil, err
}
buf = make([]byte, tr.rawBytes.Len())
copy(buf[:], tr.RawBytes())
}
hdr, err := tr.Next()
// since the above call to Next() resets the buffer, we need to throw the bytes over
if tr.RawAccounting {
buf = append(buf, tr.RawBytes()...)
if _, err = tr.rawBytes.Write(buf); err != nil {
return nil, err
}
}
hdr.Name = cString(realname)
return hdr, err
case TypeGNULongLink:
// We have a GNU long link header.
realname, err := ioutil.ReadAll(tr)
if err != nil {
return nil, err
}
var buf []byte
if tr.RawAccounting {
if _, err = tr.rawBytes.Write(realname); err != nil {
return nil, err
}
buf = make([]byte, tr.rawBytes.Len())
copy(buf[:], tr.RawBytes())
}
hdr, err := tr.Next()
// since the above call to Next() resets the buffer, we need to throw the bytes over
if tr.RawAccounting {
buf = append(buf, tr.RawBytes()...)
if _, err = tr.rawBytes.Write(buf); err != nil {
return nil, err
}
}
hdr.Linkname = cString(realname)
return hdr, err
}
return hdr, tr.err
} }
// checkForGNUSparsePAXHeaders checks the PAX headers for GNU sparse headers. If they are found, then // checkForGNUSparsePAXHeaders checks the PAX headers for GNU sparse headers. If they are found, then
@ -385,6 +403,7 @@ func parsePAX(r io.Reader) (map[string]string, error) {
return nil, err return nil, err
} }
} }
sbuf := string(buf)
// For GNU PAX sparse format 0.0 support. // For GNU PAX sparse format 0.0 support.
// This function transforms the sparse format 0.0 headers into sparse format 0.1 headers. // This function transforms the sparse format 0.0 headers into sparse format 0.1 headers.
@ -393,35 +412,17 @@ func parsePAX(r io.Reader) (map[string]string, error) {
headers := make(map[string]string) headers := make(map[string]string)
// Each record is constructed as // Each record is constructed as
// "%d %s=%s\n", length, keyword, value // "%d %s=%s\n", length, keyword, value
for len(buf) > 0 { for len(sbuf) > 0 {
// or the header was empty to start with. key, value, residual, err := parsePAXRecord(sbuf)
var sp int if err != nil {
// The size field ends at the first space.
sp = bytes.IndexByte(buf, ' ')
if sp == -1 {
return nil, ErrHeader return nil, ErrHeader
} }
// Parse the first token as a decimal integer. sbuf = residual
n, err := strconv.ParseInt(string(buf[:sp]), 10, 0)
if err != nil || n < 5 || int64(len(buf)) < n {
return nil, ErrHeader
}
// Extract everything between the decimal and the n -1 on the
// beginning to eat the ' ', -1 on the end to skip the newline.
var record []byte
record, buf = buf[sp+1:n-1], buf[n:]
// The first equals is guaranteed to mark the end of the key.
// Everything else is value.
eq := bytes.IndexByte(record, '=')
if eq == -1 {
return nil, ErrHeader
}
key, value := record[:eq], record[eq+1:]
keyStr := string(key) keyStr := string(key)
if keyStr == paxGNUSparseOffset || keyStr == paxGNUSparseNumBytes { if keyStr == paxGNUSparseOffset || keyStr == paxGNUSparseNumBytes {
// GNU sparse format 0.0 special key. Write to sparseMap instead of using the headers map. // GNU sparse format 0.0 special key. Write to sparseMap instead of using the headers map.
sparseMap.Write(value) sparseMap.WriteString(value)
sparseMap.Write([]byte{','}) sparseMap.Write([]byte{','})
} else { } else {
// Normal key. Set the value in the headers map. // Normal key. Set the value in the headers map.
@ -436,9 +437,42 @@ func parsePAX(r io.Reader) (map[string]string, error) {
return headers, nil return headers, nil
} }
// cString parses bytes as a NUL-terminated C-style string. // parsePAXRecord parses the input PAX record string into a key-value pair.
// If parsing is successful, it will slice off the currently read record and
// return the remainder as r.
//
// A PAX record is of the following form:
// "%d %s=%s\n" % (size, key, value)
func parsePAXRecord(s string) (k, v, r string, err error) {
// The size field ends at the first space.
sp := strings.IndexByte(s, ' ')
if sp == -1 {
return "", "", s, ErrHeader
}
// Parse the first token as a decimal integer.
n, perr := strconv.ParseInt(s[:sp], 10, 0) // Intentionally parse as native int
if perr != nil || n < 5 || int64(len(s)) < n {
return "", "", s, ErrHeader
}
// Extract everything between the space and the final newline.
rec, nl, rem := s[sp+1:n-1], s[n-1:n], s[n:]
if nl != "\n" {
return "", "", s, ErrHeader
}
// The first equals separates the key from the value.
eq := strings.IndexByte(rec, '=')
if eq == -1 {
return "", "", s, ErrHeader
}
return rec[:eq], rec[eq+1:], rem, nil
}
// parseString parses bytes as a NUL-terminated C-style string.
// If a NUL byte is not found then the whole slice is returned as a string. // If a NUL byte is not found then the whole slice is returned as a string.
func cString(b []byte) string { func (*parser) parseString(b []byte) string {
n := 0 n := 0
for n < len(b) && b[n] != 0 { for n < len(b) && b[n] != 0 {
n++ n++
@ -446,19 +480,51 @@ func cString(b []byte) string {
return string(b[0:n]) return string(b[0:n])
} }
func (tr *Reader) octal(b []byte) int64 { // parseNumeric parses the input as being encoded in either base-256 or octal.
// Check for binary format first. // This function may return negative numbers.
// If parsing fails or an integer overflow occurs, err will be set.
func (p *parser) parseNumeric(b []byte) int64 {
// Check for base-256 (binary) format first.
// If the first bit is set, then all following bits constitute a two's
// complement encoded number in big-endian byte order.
if len(b) > 0 && b[0]&0x80 != 0 { if len(b) > 0 && b[0]&0x80 != 0 {
var x int64 // Handling negative numbers relies on the following identity:
for i, c := range b { // -a-1 == ^a
if i == 0 { //
c &= 0x7f // ignore signal bit in first byte // If the number is negative, we use an inversion mask to invert the
} // data bytes and treat the value as an unsigned number.
x = x<<8 | int64(c) var inv byte // 0x00 if positive or zero, 0xff if negative
} if b[0]&0x40 != 0 {
return x inv = 0xff
} }
var x uint64
for i, c := range b {
c ^= inv // Inverts c only if inv is 0xff, otherwise does nothing
if i == 0 {
c &= 0x7f // Ignore signal bit in first byte
}
if (x >> 56) > 0 {
p.err = ErrHeader // Integer overflow
return 0
}
x = x<<8 | uint64(c)
}
if (x >> 63) > 0 {
p.err = ErrHeader // Integer overflow
return 0
}
if inv == 0xff {
return ^int64(x)
}
return int64(x)
}
// Normal case is base-8 (octal) format.
return p.parseOctal(b)
}
func (p *parser) parseOctal(b []byte) int64 {
// Because unused fields are filled with NULs, we need // Because unused fields are filled with NULs, we need
// to skip leading NULs. Fields may also be padded with // to skip leading NULs. Fields may also be padded with
// spaces or NULs. // spaces or NULs.
@ -469,27 +535,55 @@ func (tr *Reader) octal(b []byte) int64 {
if len(b) == 0 { if len(b) == 0 {
return 0 return 0
} }
x, err := strconv.ParseUint(cString(b), 8, 64) x, perr := strconv.ParseUint(p.parseString(b), 8, 64)
if err != nil { if perr != nil {
tr.err = err p.err = ErrHeader
} }
return int64(x) return int64(x)
} }
// skipUnread skips any unread bytes in the existing file entry, as well as any alignment padding. // skipUnread skips any unread bytes in the existing file entry, as well as any
func (tr *Reader) skipUnread() { // alignment padding. It returns io.ErrUnexpectedEOF if any io.EOF is
nr := tr.numBytes() + tr.pad // number of bytes to skip // encountered in the data portion; it is okay to hit io.EOF in the padding.
//
// Note that this function still works properly even when sparse files are being
// used since numBytes returns the bytes remaining in the underlying io.Reader.
func (tr *Reader) skipUnread() error {
dataSkip := tr.numBytes() // Number of data bytes to skip
totalSkip := dataSkip + tr.pad // Total number of bytes to skip
tr.curr, tr.pad = nil, 0 tr.curr, tr.pad = nil, 0
if tr.RawAccounting { if tr.RawAccounting {
_, tr.err = io.CopyN(tr.rawBytes, tr.r, nr) _, tr.err = io.CopyN(tr.rawBytes, tr.r, totalSkip)
return return tr.err
} }
if sr, ok := tr.r.(io.Seeker); ok { // If possible, Seek to the last byte before the end of the data section.
if _, err := sr.Seek(nr, os.SEEK_CUR); err == nil { // Do this because Seek is often lazy about reporting errors; this will mask
return // the fact that the tar stream may be truncated. We can rely on the
// io.CopyN done shortly afterwards to trigger any IO errors.
var seekSkipped int64 // Number of bytes skipped via Seek
if sr, ok := tr.r.(io.Seeker); ok && dataSkip > 1 {
// Not all io.Seeker can actually Seek. For example, os.Stdin implements
// io.Seeker, but calling Seek always returns an error and performs
// no action. Thus, we try an innocent seek to the current position
// to see if Seek is really supported.
pos1, err := sr.Seek(0, os.SEEK_CUR)
if err == nil {
// Seek seems supported, so perform the real Seek.
pos2, err := sr.Seek(dataSkip-1, os.SEEK_CUR)
if err != nil {
tr.err = err
return tr.err
}
seekSkipped = pos2 - pos1
} }
} }
_, tr.err = io.CopyN(ioutil.Discard, tr.r, nr)
var copySkipped int64 // Number of bytes skipped via CopyN
copySkipped, tr.err = io.CopyN(ioutil.Discard, tr.r, totalSkip-seekSkipped)
if tr.err == io.EOF && seekSkipped+copySkipped < dataSkip {
tr.err = io.ErrUnexpectedEOF
}
return tr.err
} }
func (tr *Reader) verifyChecksum(header []byte) bool { func (tr *Reader) verifyChecksum(header []byte) bool {
@ -497,11 +591,19 @@ func (tr *Reader) verifyChecksum(header []byte) bool {
return false return false
} }
given := tr.octal(header[148:156]) var p parser
given := p.parseOctal(header[148:156])
unsigned, signed := checksum(header) unsigned, signed := checksum(header)
return given == unsigned || given == signed return p.err == nil && (given == unsigned || given == signed)
} }
// readHeader reads the next block header and assumes that the underlying reader
// is already aligned to a block boundary.
//
// The err will be set to io.EOF only when one of the following occurs:
// * Exactly 0 bytes are read and EOF is hit.
// * Exactly 1 block of zeros is read and EOF is hit.
// * At least 2 blocks of zeros are read.
func (tr *Reader) readHeader() *Header { func (tr *Reader) readHeader() *Header {
header := tr.hdrBuff[:] header := tr.hdrBuff[:]
copy(header, zeroBlock) copy(header, zeroBlock)
@ -513,7 +615,7 @@ func (tr *Reader) readHeader() *Header {
return nil return nil
} }
} }
return nil return nil // io.EOF is okay here
} }
if tr.RawAccounting { if tr.RawAccounting {
if _, tr.err = tr.rawBytes.Write(header); tr.err != nil { if _, tr.err = tr.rawBytes.Write(header); tr.err != nil {
@ -530,7 +632,7 @@ func (tr *Reader) readHeader() *Header {
return nil return nil
} }
} }
return nil return nil // io.EOF is okay here
} }
if tr.RawAccounting { if tr.RawAccounting {
if _, tr.err = tr.rawBytes.Write(header); tr.err != nil { if _, tr.err = tr.rawBytes.Write(header); tr.err != nil {
@ -551,22 +653,19 @@ func (tr *Reader) readHeader() *Header {
} }
// Unpack // Unpack
var p parser
hdr := new(Header) hdr := new(Header)
s := slicer(header) s := slicer(header)
hdr.Name = cString(s.next(100)) hdr.Name = p.parseString(s.next(100))
hdr.Mode = tr.octal(s.next(8)) hdr.Mode = p.parseNumeric(s.next(8))
hdr.Uid = int(tr.octal(s.next(8))) hdr.Uid = int(p.parseNumeric(s.next(8)))
hdr.Gid = int(tr.octal(s.next(8))) hdr.Gid = int(p.parseNumeric(s.next(8)))
hdr.Size = tr.octal(s.next(12)) hdr.Size = p.parseNumeric(s.next(12))
if hdr.Size < 0 { hdr.ModTime = time.Unix(p.parseNumeric(s.next(12)), 0)
tr.err = ErrHeader
return nil
}
hdr.ModTime = time.Unix(tr.octal(s.next(12)), 0)
s.next(8) // chksum s.next(8) // chksum
hdr.Typeflag = s.next(1)[0] hdr.Typeflag = s.next(1)[0]
hdr.Linkname = cString(s.next(100)) hdr.Linkname = p.parseString(s.next(100))
// The remainder of the header depends on the value of magic. // The remainder of the header depends on the value of magic.
// The original (v7) version of tar had no explicit magic field, // The original (v7) version of tar had no explicit magic field,
@ -586,70 +685,76 @@ func (tr *Reader) readHeader() *Header {
switch format { switch format {
case "posix", "gnu", "star": case "posix", "gnu", "star":
hdr.Uname = cString(s.next(32)) hdr.Uname = p.parseString(s.next(32))
hdr.Gname = cString(s.next(32)) hdr.Gname = p.parseString(s.next(32))
devmajor := s.next(8) devmajor := s.next(8)
devminor := s.next(8) devminor := s.next(8)
if hdr.Typeflag == TypeChar || hdr.Typeflag == TypeBlock { if hdr.Typeflag == TypeChar || hdr.Typeflag == TypeBlock {
hdr.Devmajor = tr.octal(devmajor) hdr.Devmajor = p.parseNumeric(devmajor)
hdr.Devminor = tr.octal(devminor) hdr.Devminor = p.parseNumeric(devminor)
} }
var prefix string var prefix string
switch format { switch format {
case "posix", "gnu": case "posix", "gnu":
prefix = cString(s.next(155)) prefix = p.parseString(s.next(155))
case "star": case "star":
prefix = cString(s.next(131)) prefix = p.parseString(s.next(131))
hdr.AccessTime = time.Unix(tr.octal(s.next(12)), 0) hdr.AccessTime = time.Unix(p.parseNumeric(s.next(12)), 0)
hdr.ChangeTime = time.Unix(tr.octal(s.next(12)), 0) hdr.ChangeTime = time.Unix(p.parseNumeric(s.next(12)), 0)
} }
if len(prefix) > 0 { if len(prefix) > 0 {
hdr.Name = prefix + "/" + hdr.Name hdr.Name = prefix + "/" + hdr.Name
} }
} }
if tr.err != nil { if p.err != nil {
tr.err = p.err
return nil
}
nb := hdr.Size
if isHeaderOnlyType(hdr.Typeflag) {
nb = 0
}
if nb < 0 {
tr.err = ErrHeader tr.err = ErrHeader
return nil return nil
} }
// Maximum value of hdr.Size is 64 GB (12 octal digits),
// so there's no risk of int64 overflowing.
nb := int64(hdr.Size)
tr.pad = -nb & (blockSize - 1) // blockSize is a power of two
// Set the current file reader. // Set the current file reader.
tr.pad = -nb & (blockSize - 1) // blockSize is a power of two
tr.curr = &regFileReader{r: tr.r, nb: nb} tr.curr = &regFileReader{r: tr.r, nb: nb}
// Check for old GNU sparse format entry. // Check for old GNU sparse format entry.
if hdr.Typeflag == TypeGNUSparse { if hdr.Typeflag == TypeGNUSparse {
// Get the real size of the file. // Get the real size of the file.
hdr.Size = tr.octal(header[483:495]) hdr.Size = p.parseNumeric(header[483:495])
if p.err != nil {
tr.err = p.err
return nil
}
// Read the sparse map. // Read the sparse map.
sp := tr.readOldGNUSparseMap(header) sp := tr.readOldGNUSparseMap(header)
if tr.err != nil { if tr.err != nil {
return nil return nil
} }
// Current file is a GNU sparse file. Update the current file reader. // Current file is a GNU sparse file. Update the current file reader.
tr.curr = &sparseFileReader{rfr: tr.curr.(*regFileReader), sp: sp, tot: hdr.Size} tr.curr, tr.err = newSparseFileReader(tr.curr, sp, hdr.Size)
if tr.err != nil {
return nil
}
} }
return hdr return hdr
} }
// A sparseEntry holds a single entry in a sparse file's sparse map.
// A sparse entry indicates the offset and size in a sparse file of a
// block of data.
type sparseEntry struct {
offset int64
numBytes int64
}
// readOldGNUSparseMap reads the sparse map as stored in the old GNU sparse format. // readOldGNUSparseMap reads the sparse map as stored in the old GNU sparse format.
// The sparse map is stored in the tar header if it's small enough. If it's larger than four entries, // The sparse map is stored in the tar header if it's small enough. If it's larger than four entries,
// then one or more extension headers are used to store the rest of the sparse map. // then one or more extension headers are used to store the rest of the sparse map.
func (tr *Reader) readOldGNUSparseMap(header []byte) []sparseEntry { func (tr *Reader) readOldGNUSparseMap(header []byte) []sparseEntry {
var p parser
isExtended := header[oldGNUSparseMainHeaderIsExtendedOffset] != 0 isExtended := header[oldGNUSparseMainHeaderIsExtendedOffset] != 0
spCap := oldGNUSparseMainHeaderNumEntries spCap := oldGNUSparseMainHeaderNumEntries
if isExtended { if isExtended {
@ -660,10 +765,10 @@ func (tr *Reader) readOldGNUSparseMap(header []byte) []sparseEntry {
// Read the four entries from the main tar header // Read the four entries from the main tar header
for i := 0; i < oldGNUSparseMainHeaderNumEntries; i++ { for i := 0; i < oldGNUSparseMainHeaderNumEntries; i++ {
offset := tr.octal(s.next(oldGNUSparseOffsetSize)) offset := p.parseNumeric(s.next(oldGNUSparseOffsetSize))
numBytes := tr.octal(s.next(oldGNUSparseNumBytesSize)) numBytes := p.parseNumeric(s.next(oldGNUSparseNumBytesSize))
if tr.err != nil { if p.err != nil {
tr.err = ErrHeader tr.err = p.err
return nil return nil
} }
if offset == 0 && numBytes == 0 { if offset == 0 && numBytes == 0 {
@ -687,10 +792,10 @@ func (tr *Reader) readOldGNUSparseMap(header []byte) []sparseEntry {
isExtended = sparseHeader[oldGNUSparseExtendedHeaderIsExtendedOffset] != 0 isExtended = sparseHeader[oldGNUSparseExtendedHeaderIsExtendedOffset] != 0
s = slicer(sparseHeader) s = slicer(sparseHeader)
for i := 0; i < oldGNUSparseExtendedHeaderNumEntries; i++ { for i := 0; i < oldGNUSparseExtendedHeaderNumEntries; i++ {
offset := tr.octal(s.next(oldGNUSparseOffsetSize)) offset := p.parseNumeric(s.next(oldGNUSparseOffsetSize))
numBytes := tr.octal(s.next(oldGNUSparseNumBytesSize)) numBytes := p.parseNumeric(s.next(oldGNUSparseNumBytesSize))
if tr.err != nil { if p.err != nil {
tr.err = ErrHeader tr.err = p.err
return nil return nil
} }
if offset == 0 && numBytes == 0 { if offset == 0 && numBytes == 0 {
@ -702,134 +807,111 @@ func (tr *Reader) readOldGNUSparseMap(header []byte) []sparseEntry {
return sp return sp
} }
// readGNUSparseMap1x0 reads the sparse map as stored in GNU's PAX sparse format version 1.0. // readGNUSparseMap1x0 reads the sparse map as stored in GNU's PAX sparse format
// The sparse map is stored just before the file data and padded out to the nearest block boundary. // version 1.0. The format of the sparse map consists of a series of
// newline-terminated numeric fields. The first field is the number of entries
// and is always present. Following this are the entries, consisting of two
// fields (offset, numBytes). This function must stop reading at the end
// boundary of the block containing the last newline.
//
// Note that the GNU manual says that numeric values should be encoded in octal
// format. However, the GNU tar utility itself outputs these values in decimal.
// As such, this library treats values as being encoded in decimal.
func readGNUSparseMap1x0(r io.Reader) ([]sparseEntry, error) { func readGNUSparseMap1x0(r io.Reader) ([]sparseEntry, error) {
buf := make([]byte, 2*blockSize) var cntNewline int64
sparseHeader := buf[:blockSize] var buf bytes.Buffer
var blk = make([]byte, blockSize)
// readDecimal is a helper function to read a decimal integer from the sparse map // feedTokens copies data in numBlock chunks from r into buf until there are
// while making sure to read from the file in blocks of size blockSize // at least cnt newlines in buf. It will not read more blocks than needed.
readDecimal := func() (int64, error) { var feedTokens = func(cnt int64) error {
// Look for newline for cntNewline < cnt {
nl := bytes.IndexByte(sparseHeader, '\n') if _, err := io.ReadFull(r, blk); err != nil {
if nl == -1 { if err == io.EOF {
if len(sparseHeader) >= blockSize { err = io.ErrUnexpectedEOF
// This is an error
return 0, ErrHeader
} }
oldLen := len(sparseHeader) return err
newLen := oldLen + blockSize }
if cap(sparseHeader) < newLen { buf.Write(blk)
// There's more header, but we need to make room for the next block for _, c := range blk {
copy(buf, sparseHeader) if c == '\n' {
sparseHeader = buf[:newLen] cntNewline++
} else { }
// There's more header, and we can just reslice }
sparseHeader = sparseHeader[:newLen] }
return nil
} }
// Now that sparseHeader is large enough, read next block // nextToken gets the next token delimited by a newline. This assumes that
if _, err := io.ReadFull(r, sparseHeader[oldLen:newLen]); err != nil { // at least one newline exists in the buffer.
return 0, err var nextToken = func() string {
} cntNewline--
// leaving this function for io.Reader makes it more testable tok, _ := buf.ReadString('\n')
if tr, ok := r.(*Reader); ok && tr.RawAccounting { return tok[:len(tok)-1] // Cut off newline
if _, err := tr.rawBytes.Write(sparseHeader[oldLen:newLen]); err != nil {
return 0, err
}
} }
// Look for a newline in the new data // Parse for the number of entries.
nl = bytes.IndexByte(sparseHeader[oldLen:newLen], '\n') // Use integer overflow resistant math to check this.
if nl == -1 { if err := feedTokens(1); err != nil {
// This is an error
return 0, ErrHeader
}
nl += oldLen // We want the position from the beginning
}
// Now that we've found a newline, read a number
n, err := strconv.ParseInt(string(sparseHeader[:nl]), 10, 0)
if err != nil {
return 0, ErrHeader
}
// Update sparseHeader to consume this number
sparseHeader = sparseHeader[nl+1:]
return n, nil
}
// Read the first block
if _, err := io.ReadFull(r, sparseHeader); err != nil {
return nil, err
}
// leaving this function for io.Reader makes it more testable
if tr, ok := r.(*Reader); ok && tr.RawAccounting {
if _, err := tr.rawBytes.Write(sparseHeader); err != nil {
return nil, err return nil, err
} }
numEntries, err := strconv.ParseInt(nextToken(), 10, 0) // Intentionally parse as native int
if err != nil || numEntries < 0 || int(2*numEntries) < int(numEntries) {
return nil, ErrHeader
} }
// The first line contains the number of entries // Parse for all member entries.
numEntries, err := readDecimal() // numEntries is trusted after this since a potential attacker must have
if err != nil { // committed resources proportional to what this library used.
if err := feedTokens(2 * numEntries); err != nil {
return nil, err return nil, err
} }
// Read all the entries
sp := make([]sparseEntry, 0, numEntries) sp := make([]sparseEntry, 0, numEntries)
for i := int64(0); i < numEntries; i++ { for i := int64(0); i < numEntries; i++ {
// Read the offset offset, err := strconv.ParseInt(nextToken(), 10, 64)
offset, err := readDecimal()
if err != nil { if err != nil {
return nil, err return nil, ErrHeader
} }
// Read numBytes numBytes, err := strconv.ParseInt(nextToken(), 10, 64)
numBytes, err := readDecimal()
if err != nil { if err != nil {
return nil, err return nil, ErrHeader
} }
sp = append(sp, sparseEntry{offset: offset, numBytes: numBytes}) sp = append(sp, sparseEntry{offset: offset, numBytes: numBytes})
} }
return sp, nil return sp, nil
} }
// readGNUSparseMap0x1 reads the sparse map as stored in GNU's PAX sparse format version 0.1. // readGNUSparseMap0x1 reads the sparse map as stored in GNU's PAX sparse format
// The sparse map is stored in the PAX headers. // version 0.1. The sparse map is stored in the PAX headers.
func readGNUSparseMap0x1(headers map[string]string) ([]sparseEntry, error) { func readGNUSparseMap0x1(extHdrs map[string]string) ([]sparseEntry, error) {
// Get number of entries // Get number of entries.
numEntriesStr, ok := headers[paxGNUSparseNumBlocks] // Use integer overflow resistant math to check this.
if !ok { numEntriesStr := extHdrs[paxGNUSparseNumBlocks]
return nil, ErrHeader numEntries, err := strconv.ParseInt(numEntriesStr, 10, 0) // Intentionally parse as native int
} if err != nil || numEntries < 0 || int(2*numEntries) < int(numEntries) {
numEntries, err := strconv.ParseInt(numEntriesStr, 10, 0)
if err != nil {
return nil, ErrHeader return nil, ErrHeader
} }
sparseMap := strings.Split(headers[paxGNUSparseMap], ",") // There should be two numbers in sparseMap for each entry.
sparseMap := strings.Split(extHdrs[paxGNUSparseMap], ",")
// There should be two numbers in sparseMap for each entry
if int64(len(sparseMap)) != 2*numEntries { if int64(len(sparseMap)) != 2*numEntries {
return nil, ErrHeader return nil, ErrHeader
} }
// Loop through the entries in the sparse map // Loop through the entries in the sparse map.
// numEntries is trusted now.
sp := make([]sparseEntry, 0, numEntries) sp := make([]sparseEntry, 0, numEntries)
for i := int64(0); i < numEntries; i++ { for i := int64(0); i < numEntries; i++ {
offset, err := strconv.ParseInt(sparseMap[2*i], 10, 0) offset, err := strconv.ParseInt(sparseMap[2*i], 10, 64)
if err != nil { if err != nil {
return nil, ErrHeader return nil, ErrHeader
} }
numBytes, err := strconv.ParseInt(sparseMap[2*i+1], 10, 0) numBytes, err := strconv.ParseInt(sparseMap[2*i+1], 10, 64)
if err != nil { if err != nil {
return nil, ErrHeader return nil, ErrHeader
} }
sp = append(sp, sparseEntry{offset: offset, numBytes: numBytes}) sp = append(sp, sparseEntry{offset: offset, numBytes: numBytes})
} }
return sp, nil return sp, nil
} }
@ -846,10 +928,18 @@ func (tr *Reader) numBytes() int64 {
// Read reads from the current entry in the tar archive. // Read reads from the current entry in the tar archive.
// It returns 0, io.EOF when it reaches the end of that entry, // It returns 0, io.EOF when it reaches the end of that entry,
// until Next is called to advance to the next entry. // until Next is called to advance to the next entry.
//
// Calling Read on special types like TypeLink, TypeSymLink, TypeChar,
// TypeBlock, TypeDir, and TypeFifo returns 0, io.EOF regardless of what
// the Header.Size claims.
func (tr *Reader) Read(b []byte) (n int, err error) { func (tr *Reader) Read(b []byte) (n int, err error) {
if tr.err != nil {
return 0, tr.err
}
if tr.curr == nil { if tr.curr == nil {
return 0, io.EOF return 0, io.EOF
} }
n, err = tr.curr.Read(b) n, err = tr.curr.Read(b)
if err != nil && err != io.EOF { if err != nil && err != io.EOF {
tr.err = err tr.err = err
@ -879,9 +969,33 @@ func (rfr *regFileReader) numBytes() int64 {
return rfr.nb return rfr.nb
} }
// readHole reads a sparse file hole ending at offset toOffset // newSparseFileReader creates a new sparseFileReader, but validates all of the
func (sfr *sparseFileReader) readHole(b []byte, toOffset int64) int { // sparse entries before doing so.
n64 := toOffset - sfr.pos func newSparseFileReader(rfr numBytesReader, sp []sparseEntry, total int64) (*sparseFileReader, error) {
if total < 0 {
return nil, ErrHeader // Total size cannot be negative
}
// Validate all sparse entries. These are the same checks as performed by
// the BSD tar utility.
for i, s := range sp {
switch {
case s.offset < 0 || s.numBytes < 0:
return nil, ErrHeader // Negative values are never okay
case s.offset > math.MaxInt64-s.numBytes:
return nil, ErrHeader // Integer overflow with large length
case s.offset+s.numBytes > total:
return nil, ErrHeader // Region extends beyond the "real" size
case i > 0 && sp[i-1].offset+sp[i-1].numBytes > s.offset:
return nil, ErrHeader // Regions can't overlap and must be in order
}
}
return &sparseFileReader{rfr: rfr, sp: sp, total: total}, nil
}
// readHole reads a sparse hole ending at endOffset.
func (sfr *sparseFileReader) readHole(b []byte, endOffset int64) int {
n64 := endOffset - sfr.pos
if n64 > int64(len(b)) { if n64 > int64(len(b)) {
n64 = int64(len(b)) n64 = int64(len(b))
} }
@ -895,49 +1009,54 @@ func (sfr *sparseFileReader) readHole(b []byte, toOffset int64) int {
// Read reads the sparse file data in expanded form. // Read reads the sparse file data in expanded form.
func (sfr *sparseFileReader) Read(b []byte) (n int, err error) { func (sfr *sparseFileReader) Read(b []byte) (n int, err error) {
if len(sfr.sp) == 0 { // Skip past all empty fragments.
// No more data fragments to read from. for len(sfr.sp) > 0 && sfr.sp[0].numBytes == 0 {
if sfr.pos < sfr.tot { sfr.sp = sfr.sp[1:]
// We're in the last hole
n = sfr.readHole(b, sfr.tot)
return
}
// Otherwise, we're at the end of the file
return 0, io.EOF
}
if sfr.tot < sfr.sp[0].offset {
return 0, io.ErrUnexpectedEOF
}
if sfr.pos < sfr.sp[0].offset {
// We're in a hole
n = sfr.readHole(b, sfr.sp[0].offset)
return
} }
// We're not in a hole, so we'll read from the next data fragment // If there are no more fragments, then it is possible that there
posInFragment := sfr.pos - sfr.sp[0].offset // is one last sparse hole.
bytesLeft := sfr.sp[0].numBytes - posInFragment if len(sfr.sp) == 0 {
// This behavior matches the BSD tar utility.
// However, GNU tar stops returning data even if sfr.total is unmet.
if sfr.pos < sfr.total {
return sfr.readHole(b, sfr.total), nil
}
return 0, io.EOF
}
// In front of a data fragment, so read a hole.
if sfr.pos < sfr.sp[0].offset {
return sfr.readHole(b, sfr.sp[0].offset), nil
}
// In a data fragment, so read from it.
// This math is overflow free since we verify that offset and numBytes can
// be safely added when creating the sparseFileReader.
endPos := sfr.sp[0].offset + sfr.sp[0].numBytes // End offset of fragment
bytesLeft := endPos - sfr.pos // Bytes left in fragment
if int64(len(b)) > bytesLeft { if int64(len(b)) > bytesLeft {
b = b[0:bytesLeft] b = b[:bytesLeft]
} }
n, err = sfr.rfr.Read(b) n, err = sfr.rfr.Read(b)
sfr.pos += int64(n) sfr.pos += int64(n)
if err == io.EOF {
if int64(n) == bytesLeft { if sfr.pos < endPos {
// We're done with this fragment err = io.ErrUnexpectedEOF // There was supposed to be more data
sfr.sp = sfr.sp[1:] } else if sfr.pos < sfr.total {
err = nil // There is still an implicit sparse hole at the end
}
} }
if err == io.EOF && sfr.pos < sfr.tot { if sfr.pos == endPos {
// We reached the end of the last fragment's data, but there's a final hole sfr.sp = sfr.sp[1:] // We are done with this fragment, so pop it
err = nil
} }
return return n, err
} }
// numBytes returns the number of bytes left to read in the sparse file's // numBytes returns the number of bytes left to read in the sparse file's
// sparse-encoded data in the tar archive. // sparse-encoded data in the tar archive.
func (sfr *sparseFileReader) numBytes() int64 { func (sfr *sparseFileReader) numBytes() int64 {
return sfr.rfr.nb return sfr.rfr.numBytes()
} }

File diff suppressed because it is too large Load diff

BIN
archive/tar/testdata/gnu-multi-hdrs.tar vendored Normal file

Binary file not shown.

BIN
archive/tar/testdata/hdr-only.tar vendored Normal file

Binary file not shown.

BIN
archive/tar/testdata/issue12435.tar vendored Normal file

Binary file not shown.

Binary file not shown.

BIN
archive/tar/testdata/pax-multi-hdrs.tar vendored Normal file

Binary file not shown.

BIN
archive/tar/testdata/pax-path-hdr.tar vendored Normal file

Binary file not shown.

BIN
archive/tar/testdata/ustar-file-reg.tar vendored Normal file

Binary file not shown.

View file

@ -12,8 +12,8 @@ import (
"errors" "errors"
"fmt" "fmt"
"io" "io"
"os"
"path" "path"
"sort"
"strconv" "strconv"
"strings" "strings"
"time" "time"
@ -23,7 +23,6 @@ var (
ErrWriteTooLong = errors.New("archive/tar: write too long") ErrWriteTooLong = errors.New("archive/tar: write too long")
ErrFieldTooLong = errors.New("archive/tar: header field too long") ErrFieldTooLong = errors.New("archive/tar: header field too long")
ErrWriteAfterClose = errors.New("archive/tar: write after close") ErrWriteAfterClose = errors.New("archive/tar: write after close")
errNameTooLong = errors.New("archive/tar: name too long")
errInvalidHeader = errors.New("archive/tar: header field too long or contains invalid values") errInvalidHeader = errors.New("archive/tar: header field too long or contains invalid values")
) )
@ -43,6 +42,10 @@ type Writer struct {
paxHdrBuff [blockSize]byte // buffer to use in writeHeader when writing a pax header paxHdrBuff [blockSize]byte // buffer to use in writeHeader when writing a pax header
} }
type formatter struct {
err error // Last error seen
}
// NewWriter creates a new Writer writing to w. // NewWriter creates a new Writer writing to w.
func NewWriter(w io.Writer) *Writer { return &Writer{w: w} } func NewWriter(w io.Writer) *Writer { return &Writer{w: w} }
@ -69,17 +72,9 @@ func (tw *Writer) Flush() error {
} }
// Write s into b, terminating it with a NUL if there is room. // Write s into b, terminating it with a NUL if there is room.
// If the value is too long for the field and allowPax is true add a paxheader record instead func (f *formatter) formatString(b []byte, s string) {
func (tw *Writer) cString(b []byte, s string, allowPax bool, paxKeyword string, paxHeaders map[string]string) {
needsPaxHeader := allowPax && len(s) > len(b) || !isASCII(s)
if needsPaxHeader {
paxHeaders[paxKeyword] = s
return
}
if len(s) > len(b) { if len(s) > len(b) {
if tw.err == nil { f.err = ErrFieldTooLong
tw.err = ErrFieldTooLong
}
return return
} }
ascii := toASCII(s) ascii := toASCII(s)
@ -90,40 +85,40 @@ func (tw *Writer) cString(b []byte, s string, allowPax bool, paxKeyword string,
} }
// Encode x as an octal ASCII string and write it into b with leading zeros. // Encode x as an octal ASCII string and write it into b with leading zeros.
func (tw *Writer) octal(b []byte, x int64) { func (f *formatter) formatOctal(b []byte, x int64) {
s := strconv.FormatInt(x, 8) s := strconv.FormatInt(x, 8)
// leading zeros, but leave room for a NUL. // leading zeros, but leave room for a NUL.
for len(s)+1 < len(b) { for len(s)+1 < len(b) {
s = "0" + s s = "0" + s
} }
tw.cString(b, s, false, paxNone, nil) f.formatString(b, s)
} }
// Write x into b, either as octal or as binary (GNUtar/star extension). // fitsInBase256 reports whether x can be encoded into n bytes using base-256
// If the value is too long for the field and writingPax is enabled both for the field and the add a paxheader record instead // encoding. Unlike octal encoding, base-256 encoding does not require that the
func (tw *Writer) numeric(b []byte, x int64, allowPax bool, paxKeyword string, paxHeaders map[string]string) { // string ends with a NUL character. Thus, all n bytes are available for output.
// Try octal first. //
s := strconv.FormatInt(x, 8) // If operating in binary mode, this assumes strict GNU binary mode; which means
if len(s) < len(b) { // that the first byte can only be either 0x80 or 0xff. Thus, the first byte is
tw.octal(b, x) // equivalent to the sign bit in two's complement form.
return func fitsInBase256(n int, x int64) bool {
} var binBits = uint(n-1) * 8
return n >= 9 || (x >= -1<<binBits && x < 1<<binBits)
}
// If it is too long for octal, and pax is preferred, use a pax header // Write x into b, as binary (GNUtar/star extension).
if allowPax && tw.preferPax { func (f *formatter) formatNumeric(b []byte, x int64) {
tw.octal(b, 0) if fitsInBase256(len(b), x) {
s := strconv.FormatInt(x, 10) for i := len(b) - 1; i >= 0; i-- {
paxHeaders[paxKeyword] = s
return
}
// Too big: use binary (big-endian).
tw.usedBinary = true
for i := len(b) - 1; x > 0 && i >= 0; i-- {
b[i] = byte(x) b[i] = byte(x)
x >>= 8 x >>= 8
} }
b[0] |= 0x80 // highest bit indicates binary format b[0] |= 0x80 // Highest bit indicates binary format
return
}
f.formatOctal(b, 0) // Last resort, just write zero
f.err = ErrFieldTooLong
} }
var ( var (
@ -162,6 +157,7 @@ func (tw *Writer) writeHeader(hdr *Header, allowPax bool) error {
// subsecond time resolution, but for now let's just capture // subsecond time resolution, but for now let's just capture
// too long fields or non ascii characters // too long fields or non ascii characters
var f formatter
var header []byte var header []byte
// We need to select which scratch buffer to use carefully, // We need to select which scratch buffer to use carefully,
@ -176,10 +172,40 @@ func (tw *Writer) writeHeader(hdr *Header, allowPax bool) error {
copy(header, zeroBlock) copy(header, zeroBlock)
s := slicer(header) s := slicer(header)
// Wrappers around formatter that automatically sets paxHeaders if the
// argument extends beyond the capacity of the input byte slice.
var formatString = func(b []byte, s string, paxKeyword string) {
needsPaxHeader := paxKeyword != paxNone && len(s) > len(b) || !isASCII(s)
if needsPaxHeader {
paxHeaders[paxKeyword] = s
return
}
f.formatString(b, s)
}
var formatNumeric = func(b []byte, x int64, paxKeyword string) {
// Try octal first.
s := strconv.FormatInt(x, 8)
if len(s) < len(b) {
f.formatOctal(b, x)
return
}
// If it is too long for octal, and PAX is preferred, use a PAX header.
if paxKeyword != paxNone && tw.preferPax {
f.formatOctal(b, 0)
s := strconv.FormatInt(x, 10)
paxHeaders[paxKeyword] = s
return
}
tw.usedBinary = true
f.formatNumeric(b, x)
}
// keep a reference to the filename to allow to overwrite it later if we detect that we can use ustar longnames instead of pax // keep a reference to the filename to allow to overwrite it later if we detect that we can use ustar longnames instead of pax
pathHeaderBytes := s.next(fileNameSize) pathHeaderBytes := s.next(fileNameSize)
tw.cString(pathHeaderBytes, hdr.Name, true, paxPath, paxHeaders) formatString(pathHeaderBytes, hdr.Name, paxPath)
// Handle out of range ModTime carefully. // Handle out of range ModTime carefully.
var modTime int64 var modTime int64
@ -187,25 +213,25 @@ func (tw *Writer) writeHeader(hdr *Header, allowPax bool) error {
modTime = hdr.ModTime.Unix() modTime = hdr.ModTime.Unix()
} }
tw.octal(s.next(8), hdr.Mode) // 100:108 f.formatOctal(s.next(8), hdr.Mode) // 100:108
tw.numeric(s.next(8), int64(hdr.Uid), true, paxUid, paxHeaders) // 108:116 formatNumeric(s.next(8), int64(hdr.Uid), paxUid) // 108:116
tw.numeric(s.next(8), int64(hdr.Gid), true, paxGid, paxHeaders) // 116:124 formatNumeric(s.next(8), int64(hdr.Gid), paxGid) // 116:124
tw.numeric(s.next(12), hdr.Size, true, paxSize, paxHeaders) // 124:136 formatNumeric(s.next(12), hdr.Size, paxSize) // 124:136
tw.numeric(s.next(12), modTime, false, paxNone, nil) // 136:148 --- consider using pax for finer granularity formatNumeric(s.next(12), modTime, paxNone) // 136:148 --- consider using pax for finer granularity
s.next(8) // chksum (148:156) s.next(8) // chksum (148:156)
s.next(1)[0] = hdr.Typeflag // 156:157 s.next(1)[0] = hdr.Typeflag // 156:157
tw.cString(s.next(100), hdr.Linkname, true, paxLinkpath, paxHeaders) formatString(s.next(100), hdr.Linkname, paxLinkpath)
copy(s.next(8), []byte("ustar\x0000")) // 257:265 copy(s.next(8), []byte("ustar\x0000")) // 257:265
tw.cString(s.next(32), hdr.Uname, true, paxUname, paxHeaders) // 265:297 formatString(s.next(32), hdr.Uname, paxUname) // 265:297
tw.cString(s.next(32), hdr.Gname, true, paxGname, paxHeaders) // 297:329 formatString(s.next(32), hdr.Gname, paxGname) // 297:329
tw.numeric(s.next(8), hdr.Devmajor, false, paxNone, nil) // 329:337 formatNumeric(s.next(8), hdr.Devmajor, paxNone) // 329:337
tw.numeric(s.next(8), hdr.Devminor, false, paxNone, nil) // 337:345 formatNumeric(s.next(8), hdr.Devminor, paxNone) // 337:345
// keep a reference to the prefix to allow to overwrite it later if we detect that we can use ustar longnames instead of pax // keep a reference to the prefix to allow to overwrite it later if we detect that we can use ustar longnames instead of pax
prefixHeaderBytes := s.next(155) prefixHeaderBytes := s.next(155)
tw.cString(prefixHeaderBytes, "", false, paxNone, nil) // 345:500 prefix formatString(prefixHeaderBytes, "", paxNone) // 345:500 prefix
// Use the GNU magic instead of POSIX magic if we used any GNU extensions. // Use the GNU magic instead of POSIX magic if we used any GNU extensions.
if tw.usedBinary { if tw.usedBinary {
@ -215,37 +241,26 @@ func (tw *Writer) writeHeader(hdr *Header, allowPax bool) error {
_, paxPathUsed := paxHeaders[paxPath] _, paxPathUsed := paxHeaders[paxPath]
// try to use a ustar header when only the name is too long // try to use a ustar header when only the name is too long
if !tw.preferPax && len(paxHeaders) == 1 && paxPathUsed { if !tw.preferPax && len(paxHeaders) == 1 && paxPathUsed {
suffix := hdr.Name prefix, suffix, ok := splitUSTARPath(hdr.Name)
prefix := "" if ok {
if len(hdr.Name) > fileNameSize && isASCII(hdr.Name) { // Since we can encode in USTAR format, disable PAX header.
var err error
prefix, suffix, err = tw.splitUSTARLongName(hdr.Name)
if err == nil {
// ok we can use a ustar long name instead of pax, now correct the fields
// remove the path field from the pax header. this will suppress the pax header
delete(paxHeaders, paxPath) delete(paxHeaders, paxPath)
// update the path fields // Update the path fields
tw.cString(pathHeaderBytes, suffix, false, paxNone, nil) formatString(pathHeaderBytes, suffix, paxNone)
tw.cString(prefixHeaderBytes, prefix, false, paxNone, nil) formatString(prefixHeaderBytes, prefix, paxNone)
// Use the ustar magic if we used ustar long names.
if len(prefix) > 0 && !tw.usedBinary {
copy(header[257:265], []byte("ustar\x00"))
}
}
} }
} }
// The chksum field is terminated by a NUL and a space. // The chksum field is terminated by a NUL and a space.
// This is different from the other octal fields. // This is different from the other octal fields.
chksum, _ := checksum(header) chksum, _ := checksum(header)
tw.octal(header[148:155], chksum) f.formatOctal(header[148:155], chksum) // Never fails
header[155] = ' ' header[155] = ' '
if tw.err != nil { // Check if there were any formatting errors.
// problem with header; probably integer too big for a field. if f.err != nil {
tw.err = f.err
return tw.err return tw.err
} }
@ -270,28 +285,25 @@ func (tw *Writer) writeHeader(hdr *Header, allowPax bool) error {
return tw.err return tw.err
} }
// writeUSTARLongName splits a USTAR long name hdr.Name. // splitUSTARPath splits a path according to USTAR prefix and suffix rules.
// name must be < 256 characters. errNameTooLong is returned // If the path is not splittable, then it will return ("", "", false).
// if hdr.Name can't be split. The splitting heuristic func splitUSTARPath(name string) (prefix, suffix string, ok bool) {
// is compatible with gnu tar.
func (tw *Writer) splitUSTARLongName(name string) (prefix, suffix string, err error) {
length := len(name) length := len(name)
if length > fileNamePrefixSize+1 { if length <= fileNameSize || !isASCII(name) {
return "", "", false
} else if length > fileNamePrefixSize+1 {
length = fileNamePrefixSize + 1 length = fileNamePrefixSize + 1
} else if name[length-1] == '/' { } else if name[length-1] == '/' {
length-- length--
} }
i := strings.LastIndex(name[:length], "/") i := strings.LastIndex(name[:length], "/")
// nlen contains the resulting length in the name field. nlen := len(name) - i - 1 // nlen is length of suffix
// plen contains the resulting length in the prefix field. plen := i // plen is length of prefix
nlen := len(name) - i - 1
plen := i
if i <= 0 || nlen > fileNameSize || nlen == 0 || plen > fileNamePrefixSize { if i <= 0 || nlen > fileNameSize || nlen == 0 || plen > fileNamePrefixSize {
err = errNameTooLong return "", "", false
return
} }
prefix, suffix = name[:i], name[i+1:] return name[:i], name[i+1:], true
return
} }
// writePaxHeader writes an extended pax header to the // writePaxHeader writes an extended pax header to the
@ -304,11 +316,11 @@ func (tw *Writer) writePAXHeader(hdr *Header, paxHeaders map[string]string) erro
// succeed, and seems harmless enough. // succeed, and seems harmless enough.
ext.ModTime = hdr.ModTime ext.ModTime = hdr.ModTime
// The spec asks that we namespace our pseudo files // The spec asks that we namespace our pseudo files
// with the current pid. // with the current pid. However, this results in differing outputs
pid := os.Getpid() // for identical inputs. As such, the constant 0 is now used instead.
// golang.org/issue/12358
dir, file := path.Split(hdr.Name) dir, file := path.Split(hdr.Name)
fullName := path.Join(dir, fullName := path.Join(dir, "PaxHeaders.0", file)
fmt.Sprintf("PaxHeaders.%d", pid), file)
ascii := toASCII(fullName) ascii := toASCII(fullName)
if len(ascii) > 100 { if len(ascii) > 100 {
@ -318,8 +330,15 @@ func (tw *Writer) writePAXHeader(hdr *Header, paxHeaders map[string]string) erro
// Construct the body // Construct the body
var buf bytes.Buffer var buf bytes.Buffer
for k, v := range paxHeaders { // Keys are sorted before writing to body to allow deterministic output.
fmt.Fprint(&buf, paxHeader(k+"="+v)) var keys []string
for k := range paxHeaders {
keys = append(keys, k)
}
sort.Strings(keys)
for _, k := range keys {
fmt.Fprint(&buf, formatPAXRecord(k, paxHeaders[k]))
} }
ext.Size = int64(len(buf.Bytes())) ext.Size = int64(len(buf.Bytes()))
@ -335,17 +354,18 @@ func (tw *Writer) writePAXHeader(hdr *Header, paxHeaders map[string]string) erro
return nil return nil
} }
// paxHeader formats a single pax record, prefixing it with the appropriate length // formatPAXRecord formats a single PAX record, prefixing it with the
func paxHeader(msg string) string { // appropriate length.
const padding = 2 // Extra padding for space and newline func formatPAXRecord(k, v string) string {
size := len(msg) + padding const padding = 3 // Extra padding for ' ', '=', and '\n'
size := len(k) + len(v) + padding
size += len(strconv.Itoa(size)) size += len(strconv.Itoa(size))
record := fmt.Sprintf("%d %s\n", size, msg) record := fmt.Sprintf("%d %s=%s\n", size, k, v)
// Final adjustment if adding size field increased the record size.
if len(record) != size { if len(record) != size {
// Final adjustment if adding size increased
// the number of digits in size
size = len(record) size = len(record)
record = fmt.Sprintf("%d %s\n", size, msg) record = fmt.Sprintf("%d %s=%s\n", size, k, v)
} }
return record return record
} }

View file

@ -9,8 +9,10 @@ import (
"fmt" "fmt"
"io" "io"
"io/ioutil" "io/ioutil"
"math"
"os" "os"
"reflect" "reflect"
"sort"
"strings" "strings"
"testing" "testing"
"testing/iotest" "testing/iotest"
@ -291,7 +293,7 @@ func TestPax(t *testing.T) {
t.Fatal(err) t.Fatal(err)
} }
// Simple test to make sure PAX extensions are in effect // Simple test to make sure PAX extensions are in effect
if !bytes.Contains(buf.Bytes(), []byte("PaxHeaders.")) { if !bytes.Contains(buf.Bytes(), []byte("PaxHeaders.0")) {
t.Fatal("Expected at least one PAX header to be written.") t.Fatal("Expected at least one PAX header to be written.")
} }
// Test that we can get a long name back out of the archive. // Test that we can get a long name back out of the archive.
@ -330,7 +332,7 @@ func TestPaxSymlink(t *testing.T) {
t.Fatal(err) t.Fatal(err)
} }
// Simple test to make sure PAX extensions are in effect // Simple test to make sure PAX extensions are in effect
if !bytes.Contains(buf.Bytes(), []byte("PaxHeaders.")) { if !bytes.Contains(buf.Bytes(), []byte("PaxHeaders.0")) {
t.Fatal("Expected at least one PAX header to be written.") t.Fatal("Expected at least one PAX header to be written.")
} }
// Test that we can get a long name back out of the archive. // Test that we can get a long name back out of the archive.
@ -380,7 +382,7 @@ func TestPaxNonAscii(t *testing.T) {
t.Fatal(err) t.Fatal(err)
} }
// Simple test to make sure PAX extensions are in effect // Simple test to make sure PAX extensions are in effect
if !bytes.Contains(buf.Bytes(), []byte("PaxHeaders.")) { if !bytes.Contains(buf.Bytes(), []byte("PaxHeaders.0")) {
t.Fatal("Expected at least one PAX header to be written.") t.Fatal("Expected at least one PAX header to be written.")
} }
// Test that we can get a long name back out of the archive. // Test that we can get a long name back out of the archive.
@ -439,21 +441,49 @@ func TestPaxXattrs(t *testing.T) {
} }
} }
func TestPAXHeader(t *testing.T) { func TestPaxHeadersSorted(t *testing.T) {
medName := strings.Repeat("CD", 50) fileinfo, err := os.Stat("testdata/small.txt")
longName := strings.Repeat("AB", 100) if err != nil {
paxTests := [][2]string{ t.Fatal(err)
{paxPath + "=/etc/hosts", "19 path=/etc/hosts\n"},
{"a=b", "6 a=b\n"}, // Single digit length
{"a=names", "11 a=names\n"}, // Test case involving carries
{paxPath + "=" + longName, fmt.Sprintf("210 path=%s\n", longName)},
{paxPath + "=" + medName, fmt.Sprintf("110 path=%s\n", medName)}}
for _, test := range paxTests {
key, expected := test[0], test[1]
if result := paxHeader(key); result != expected {
t.Fatalf("paxHeader: got %s, expected %s", result, expected)
} }
hdr, err := FileInfoHeader(fileinfo, "")
if err != nil {
t.Fatalf("os.Stat: %v", err)
}
contents := strings.Repeat(" ", int(hdr.Size))
hdr.Xattrs = map[string]string{
"foo": "foo",
"bar": "bar",
"baz": "baz",
"qux": "qux",
}
var buf bytes.Buffer
writer := NewWriter(&buf)
if err := writer.WriteHeader(hdr); err != nil {
t.Fatal(err)
}
if _, err = writer.Write([]byte(contents)); err != nil {
t.Fatal(err)
}
if err := writer.Close(); err != nil {
t.Fatal(err)
}
// Simple test to make sure PAX extensions are in effect
if !bytes.Contains(buf.Bytes(), []byte("PaxHeaders.0")) {
t.Fatal("Expected at least one PAX header to be written.")
}
// xattr bar should always appear before others
indices := []int{
bytes.Index(buf.Bytes(), []byte("bar=bar")),
bytes.Index(buf.Bytes(), []byte("baz=baz")),
bytes.Index(buf.Bytes(), []byte("foo=foo")),
bytes.Index(buf.Bytes(), []byte("qux=qux")),
}
if !sort.IntsAreSorted(indices) {
t.Fatal("PAX headers are not sorted")
} }
} }
@ -544,3 +574,149 @@ func TestWriteAfterClose(t *testing.T) {
t.Fatalf("Write: got %v; want ErrWriteAfterClose", err) t.Fatalf("Write: got %v; want ErrWriteAfterClose", err)
} }
} }
func TestSplitUSTARPath(t *testing.T) {
var sr = strings.Repeat
var vectors = []struct {
input string // Input path
prefix string // Expected output prefix
suffix string // Expected output suffix
ok bool // Split success?
}{
{"", "", "", false},
{"abc", "", "", false},
{"用戶名", "", "", false},
{sr("a", fileNameSize), "", "", false},
{sr("a", fileNameSize) + "/", "", "", false},
{sr("a", fileNameSize) + "/a", sr("a", fileNameSize), "a", true},
{sr("a", fileNamePrefixSize) + "/", "", "", false},
{sr("a", fileNamePrefixSize) + "/a", sr("a", fileNamePrefixSize), "a", true},
{sr("a", fileNameSize+1), "", "", false},
{sr("/", fileNameSize+1), sr("/", fileNameSize-1), "/", true},
{sr("a", fileNamePrefixSize) + "/" + sr("b", fileNameSize),
sr("a", fileNamePrefixSize), sr("b", fileNameSize), true},
{sr("a", fileNamePrefixSize) + "//" + sr("b", fileNameSize), "", "", false},
{sr("a/", fileNameSize), sr("a/", 77) + "a", sr("a/", 22), true},
}
for _, v := range vectors {
prefix, suffix, ok := splitUSTARPath(v.input)
if prefix != v.prefix || suffix != v.suffix || ok != v.ok {
t.Errorf("splitUSTARPath(%q):\ngot (%q, %q, %v)\nwant (%q, %q, %v)",
v.input, prefix, suffix, ok, v.prefix, v.suffix, v.ok)
}
}
}
func TestFormatPAXRecord(t *testing.T) {
var medName = strings.Repeat("CD", 50)
var longName = strings.Repeat("AB", 100)
var vectors = []struct {
inputKey string
inputVal string
output string
}{
{"k", "v", "6 k=v\n"},
{"path", "/etc/hosts", "19 path=/etc/hosts\n"},
{"path", longName, "210 path=" + longName + "\n"},
{"path", medName, "110 path=" + medName + "\n"},
{"foo", "ba", "9 foo=ba\n"},
{"foo", "bar", "11 foo=bar\n"},
{"foo", "b=\nar=\n==\x00", "18 foo=b=\nar=\n==\x00\n"},
{"foo", "hello9 foo=ba\nworld", "27 foo=hello9 foo=ba\nworld\n"},
{"☺☻☹", "日a本b語ç", "27 ☺☻☹=日a本b語ç\n"},
{"\x00hello", "\x00world", "17 \x00hello=\x00world\n"},
}
for _, v := range vectors {
output := formatPAXRecord(v.inputKey, v.inputVal)
if output != v.output {
t.Errorf("formatPAXRecord(%q, %q): got %q, want %q",
v.inputKey, v.inputVal, output, v.output)
}
}
}
func TestFitsInBase256(t *testing.T) {
var vectors = []struct {
input int64
width int
ok bool
}{
{+1, 8, true},
{0, 8, true},
{-1, 8, true},
{1 << 56, 8, false},
{(1 << 56) - 1, 8, true},
{-1 << 56, 8, true},
{(-1 << 56) - 1, 8, false},
{121654, 8, true},
{-9849849, 8, true},
{math.MaxInt64, 9, true},
{0, 9, true},
{math.MinInt64, 9, true},
{math.MaxInt64, 12, true},
{0, 12, true},
{math.MinInt64, 12, true},
}
for _, v := range vectors {
ok := fitsInBase256(v.width, v.input)
if ok != v.ok {
t.Errorf("checkNumeric(%d, %d): got %v, want %v", v.input, v.width, ok, v.ok)
}
}
}
func TestFormatNumeric(t *testing.T) {
var vectors = []struct {
input int64
output string
ok bool
}{
// Test base-256 (binary) encoded values.
{-1, "\xff", true},
{-1, "\xff\xff", true},
{-1, "\xff\xff\xff", true},
{(1 << 0), "0", false},
{(1 << 8) - 1, "\x80\xff", true},
{(1 << 8), "0\x00", false},
{(1 << 16) - 1, "\x80\xff\xff", true},
{(1 << 16), "00\x00", false},
{-1 * (1 << 0), "\xff", true},
{-1*(1<<0) - 1, "0", false},
{-1 * (1 << 8), "\xff\x00", true},
{-1*(1<<8) - 1, "0\x00", false},
{-1 * (1 << 16), "\xff\x00\x00", true},
{-1*(1<<16) - 1, "00\x00", false},
{537795476381659745, "0000000\x00", false},
{537795476381659745, "\x80\x00\x00\x00\x07\x76\xa2\x22\xeb\x8a\x72\x61", true},
{-615126028225187231, "0000000\x00", false},
{-615126028225187231, "\xff\xff\xff\xff\xf7\x76\xa2\x22\xeb\x8a\x72\x61", true},
{math.MaxInt64, "0000000\x00", false},
{math.MaxInt64, "\x80\x00\x00\x00\x7f\xff\xff\xff\xff\xff\xff\xff", true},
{math.MinInt64, "0000000\x00", false},
{math.MinInt64, "\xff\xff\xff\xff\x80\x00\x00\x00\x00\x00\x00\x00", true},
{math.MaxInt64, "\x80\x7f\xff\xff\xff\xff\xff\xff\xff", true},
{math.MinInt64, "\xff\x80\x00\x00\x00\x00\x00\x00\x00", true},
}
for _, v := range vectors {
var f formatter
output := make([]byte, len(v.output))
f.formatNumeric(output, v.input)
ok := (f.err == nil)
if ok != v.ok {
if v.ok {
t.Errorf("formatNumeric(%d): got formatting failure, want success", v.input)
} else {
t.Errorf("formatNumeric(%d): got formatting success, want failure", v.input)
}
}
if string(output) != v.output {
t.Errorf("formatNumeric(%d): got %q, want %q", v.input, output, v.output)
}
}
}