1
0
Fork 1
mirror of https://github.com/vbatts/tar-split.git synced 2025-08-03 21:00:28 +00:00
This commit is contained in:
Vincent Batts 2016-09-23 14:51:45 +00:00 committed by GitHub
commit f6f6a575de
5 changed files with 331 additions and 227 deletions

View file

@ -21,10 +21,8 @@ import (
"time" "time"
) )
// Header type flags.
const ( const (
blockSize = 512
// Types
TypeReg = '0' // regular file TypeReg = '0' // regular file
TypeRegA = '\x00' // regular file TypeRegA = '\x00' // regular file
TypeLink = '1' // hard link TypeLink = '1' // hard link
@ -61,12 +59,6 @@ type Header struct {
Xattrs map[string]string Xattrs map[string]string
} }
// File name constants from the tar spec.
const (
fileNameSize = 100 // Maximum number of bytes in a standard tar name.
fileNamePrefixSize = 155 // Maximum number of ustar extension bytes.
)
// FileInfo returns an os.FileInfo for the Header. // FileInfo returns an os.FileInfo for the Header.
func (h *Header) FileInfo() os.FileInfo { func (h *Header) FileInfo() os.FileInfo {
return headerFileInfo{h} return headerFileInfo{h}
@ -279,33 +271,6 @@ func FileInfoHeader(fi os.FileInfo, link string) (*Header, error) {
return h, nil return h, nil
} }
var zeroBlock = make([]byte, blockSize)
// POSIX specifies a sum of the unsigned byte values, but the Sun tar uses signed byte values.
// We compute and return both.
func checksum(header []byte) (unsigned int64, signed int64) {
for i := 0; i < len(header); i++ {
if i == 148 {
// The chksum field (header[148:156]) is special: it should be treated as space bytes.
unsigned += ' ' * 8
signed += ' ' * 8
i += 7
continue
}
unsigned += int64(header[i])
signed += int64(int8(header[i]))
}
return
}
type slicer []byte
func (sp *slicer) next(n int) (b []byte) {
s := *sp
b, *sp = s[0:n], s[n:]
return
}
func isASCII(s string) bool { func isASCII(s string) bool {
for _, c := range s { for _, c := range s {
if c >= 0x80 { if c >= 0x80 {

197
archive/tar/format.go Normal file
View file

@ -0,0 +1,197 @@
// Copyright 2016 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
package tar
// Constants to identify various tar formats.
const (
// The format is unknown.
formatUnknown = (1 << iota) / 2 // Sequence of 0, 1, 2, 4, 8, etc...
// The format of the original Unix V7 tar tool prior to standardization.
formatV7
// The old and new GNU formats, which are incompatible with USTAR.
// This does cover the old GNU sparse extension.
// This does not cover the GNU sparse extensions using PAX headers,
// versions 0.0, 0.1, and 1.0; these fall under the PAX format.
formatGNU
// Schily's tar format, which is incompatible with USTAR.
// This does not cover STAR extensions to the PAX format; these fall under
// the PAX format.
formatSTAR
// USTAR is the former standardization of tar defined in POSIX.1-1988.
// This is incompatible with the GNU and STAR formats.
formatUSTAR
// PAX is the latest standardization of tar defined in POSIX.1-2001.
// This is an extension of USTAR and is "backwards compatible" with it.
//
// Some newer formats add their own extensions to PAX, such as GNU sparse
// files and SCHILY extended attributes. Since they are backwards compatible
// with PAX, they will be labelled as "PAX".
formatPAX
)
// Magics used to identify various formats.
const (
magicGNU, versionGNU = "ustar ", " \x00"
magicUSTAR, versionUSTAR = "ustar\x00", "00"
trailerSTAR = "tar\x00"
)
// Size constants from various tar specifications.
const (
blockSize = 512 // Size of each block in a tar stream
nameSize = 100 // Max length of the name field in USTAR format
prefixSize = 155 // Max length of the prefix field in USTAR format
)
var zeroBlock block
type block [blockSize]byte
// Convert block to any number of formats.
func (b *block) V7() *headerV7 { return (*headerV7)(b) }
func (b *block) GNU() *headerGNU { return (*headerGNU)(b) }
func (b *block) STAR() *headerSTAR { return (*headerSTAR)(b) }
func (b *block) USTAR() *headerUSTAR { return (*headerUSTAR)(b) }
func (b *block) Sparse() sparseArray { return (sparseArray)(b[:]) }
// GetFormat checks that the block is a valid tar header based on the checksum.
// It then attempts to guess the specific format based on magic values.
// If the checksum fails, then formatUnknown is returned.
func (b *block) GetFormat() (format int) {
// Verify checksum.
var p parser
value := p.parseOctal(b.V7().Chksum())
chksum1, chksum2 := b.ComputeChecksum()
if p.err != nil || (value != chksum1 && value != chksum2) {
return formatUnknown
}
// Guess the magic values.
magic := string(b.USTAR().Magic())
version := string(b.USTAR().Version())
trailer := string(b.STAR().Trailer())
switch {
case magic == magicUSTAR && trailer == trailerSTAR:
return formatSTAR
case magic == magicUSTAR:
return formatUSTAR
case magic == magicGNU && version == versionGNU:
return formatGNU
default:
return formatV7
}
}
// SetFormat writes the magic values necessary for specified format
// and then updates the checksum accordingly.
func (b *block) SetFormat(format int) {
// Set the magic values.
switch format {
case formatV7:
// Do nothing.
case formatGNU:
copy(b.GNU().Magic(), magicGNU)
copy(b.GNU().Version(), versionGNU)
case formatSTAR:
copy(b.STAR().Magic(), magicUSTAR)
copy(b.STAR().Version(), versionUSTAR)
copy(b.STAR().Trailer(), trailerSTAR)
case formatUSTAR, formatPAX:
copy(b.USTAR().Magic(), magicUSTAR)
copy(b.USTAR().Version(), versionUSTAR)
default:
panic("invalid format")
}
// Update checksum.
// This field is special in that it is terminated by a NULL then space.
var f formatter
field := b.V7().Chksum()
chksum, _ := b.ComputeChecksum() // Possible values are 256..128776
f.formatOctal(field[:7], chksum) // Never fails since 128776 < 262143
field[7] = ' '
}
// ComputeChecksum computes the checksum for the header block.
// POSIX specifies a sum of the unsigned byte values, but the Sun tar used
// signed byte values.
// We compute and return both.
func (b *block) ComputeChecksum() (unsigned, signed int64) {
for i, c := range b {
if 148 <= i && i < 156 {
c = ' ' // Treat the checksum field itself as all spaces.
}
unsigned += int64(uint8(c))
signed += int64(int8(c))
}
return unsigned, signed
}
type headerV7 [blockSize]byte
func (h *headerV7) Name() []byte { return h[000:][:100] }
func (h *headerV7) Mode() []byte { return h[100:][:8] }
func (h *headerV7) UID() []byte { return h[108:][:8] }
func (h *headerV7) GID() []byte { return h[116:][:8] }
func (h *headerV7) Size() []byte { return h[124:][:12] }
func (h *headerV7) ModTime() []byte { return h[136:][:12] }
func (h *headerV7) Chksum() []byte { return h[148:][:8] }
func (h *headerV7) TypeFlag() []byte { return h[156:][:1] }
func (h *headerV7) LinkName() []byte { return h[157:][:100] }
type headerGNU [blockSize]byte
func (h *headerGNU) V7() *headerV7 { return (*headerV7)(h) }
func (h *headerGNU) Magic() []byte { return h[257:][:6] }
func (h *headerGNU) Version() []byte { return h[263:][:2] }
func (h *headerGNU) UserName() []byte { return h[265:][:32] }
func (h *headerGNU) GroupName() []byte { return h[297:][:32] }
func (h *headerGNU) DevMajor() []byte { return h[329:][:8] }
func (h *headerGNU) DevMinor() []byte { return h[337:][:8] }
func (h *headerGNU) AccessTime() []byte { return h[345:][:12] }
func (h *headerGNU) ChangeTime() []byte { return h[357:][:12] }
func (h *headerGNU) Sparse() sparseArray { return (sparseArray)(h[386:][:24*4+1]) }
func (h *headerGNU) RealSize() []byte { return h[483:][:12] }
type headerSTAR [blockSize]byte
func (h *headerSTAR) V7() *headerV7 { return (*headerV7)(h) }
func (h *headerSTAR) Magic() []byte { return h[257:][:6] }
func (h *headerSTAR) Version() []byte { return h[263:][:2] }
func (h *headerSTAR) UserName() []byte { return h[265:][:32] }
func (h *headerSTAR) GroupName() []byte { return h[297:][:32] }
func (h *headerSTAR) DevMajor() []byte { return h[329:][:8] }
func (h *headerSTAR) DevMinor() []byte { return h[337:][:8] }
func (h *headerSTAR) Prefix() []byte { return h[345:][:131] }
func (h *headerSTAR) AccessTime() []byte { return h[476:][:12] }
func (h *headerSTAR) ChangeTime() []byte { return h[488:][:12] }
func (h *headerSTAR) Trailer() []byte { return h[508:][:4] }
type headerUSTAR [blockSize]byte
func (h *headerUSTAR) V7() *headerV7 { return (*headerV7)(h) }
func (h *headerUSTAR) Magic() []byte { return h[257:][:6] }
func (h *headerUSTAR) Version() []byte { return h[263:][:2] }
func (h *headerUSTAR) UserName() []byte { return h[265:][:32] }
func (h *headerUSTAR) GroupName() []byte { return h[297:][:32] }
func (h *headerUSTAR) DevMajor() []byte { return h[329:][:8] }
func (h *headerUSTAR) DevMinor() []byte { return h[337:][:8] }
func (h *headerUSTAR) Prefix() []byte { return h[345:][:155] }
type sparseArray []byte
func (s sparseArray) Entry(i int) sparseNode { return (sparseNode)(s[i*24:]) }
func (s sparseArray) IsExtended() []byte { return s[24*s.MaxEntries():][:1] }
func (s sparseArray) MaxEntries() int { return len(s) / 24 }
type sparseNode []byte
func (s sparseNode) Offset() []byte { return s[00:][:12] }
func (s sparseNode) NumBytes() []byte { return s[12:][:12] }

View file

@ -13,7 +13,6 @@ import (
"io" "io"
"io/ioutil" "io/ioutil"
"math" "math"
"os"
"strconv" "strconv"
"strings" "strings"
"time" "time"
@ -34,7 +33,7 @@ type Reader struct {
err error err error
pad int64 // amount of padding (ignored) after current file entry pad int64 // amount of padding (ignored) after current file entry
curr numBytesReader // reader for current file entry curr numBytesReader // reader for current file entry
hdrBuff [blockSize]byte // buffer to use in readHeader blk block // buffer to use as temporary local storage
RawAccounting bool // Whether to enable the access needed to reassemble the tar from raw bytes. Some performance/memory hit for this. RawAccounting bool // Whether to enable the access needed to reassemble the tar from raw bytes. Some performance/memory hit for this.
rawBytes *bytes.Buffer // last raw bits rawBytes *bytes.Buffer // last raw bits
@ -120,17 +119,6 @@ const (
paxGNUSparseRealSize = "GNU.sparse.realsize" paxGNUSparseRealSize = "GNU.sparse.realsize"
) )
// Keywords for old GNU sparse headers
const (
oldGNUSparseMainHeaderOffset = 386
oldGNUSparseMainHeaderIsExtendedOffset = 482
oldGNUSparseMainHeaderNumEntries = 4
oldGNUSparseExtendedHeaderIsExtendedOffset = 504
oldGNUSparseExtendedHeaderNumEntries = 21
oldGNUSparseOffsetSize = 12
oldGNUSparseNumBytesSize = 12
)
// NewReader creates a new Reader reading from r. // NewReader creates a new Reader reading from r.
func NewReader(r io.Reader) *Reader { return &Reader{r: r} } func NewReader(r io.Reader) *Reader { return &Reader{r: r} }
@ -341,7 +329,7 @@ func mergePAX(hdr *Header, headers map[string]string) error {
if err != nil { if err != nil {
return err return err
} }
hdr.Size = int64(size) hdr.Size = size
default: default:
if strings.HasPrefix(k, paxXattr) { if strings.HasPrefix(k, paxXattr) {
if hdr.Xattrs == nil { if hdr.Xattrs == nil {
@ -371,17 +359,17 @@ func parsePAXTime(t string) (time.Time, error) {
if err != nil { if err != nil {
return time.Time{}, err return time.Time{}, err
} }
nano_buf := string(buf[pos+1:]) nanoBuf := string(buf[pos+1:])
// Pad as needed before converting to a decimal. // Pad as needed before converting to a decimal.
// For example .030 -> .030000000 -> 30000000 nanoseconds // For example .030 -> .030000000 -> 30000000 nanoseconds
if len(nano_buf) < maxNanoSecondIntSize { if len(nanoBuf) < maxNanoSecondIntSize {
// Right pad // Right pad
nano_buf += strings.Repeat("0", maxNanoSecondIntSize-len(nano_buf)) nanoBuf += strings.Repeat("0", maxNanoSecondIntSize-len(nanoBuf))
} else if len(nano_buf) > maxNanoSecondIntSize { } else if len(nanoBuf) > maxNanoSecondIntSize {
// Right truncate // Right truncate
nano_buf = nano_buf[:maxNanoSecondIntSize] nanoBuf = nanoBuf[:maxNanoSecondIntSize]
} }
nanoseconds, err = strconv.ParseInt(string(nano_buf), 10, 0) nanoseconds, err = strconv.ParseInt(nanoBuf, 10, 0)
if err != nil { if err != nil {
return time.Time{}, err return time.Time{}, err
} }
@ -419,14 +407,14 @@ func parsePAX(r io.Reader) (map[string]string, error) {
} }
sbuf = residual sbuf = residual
keyStr := string(key) keyStr := key
if keyStr == paxGNUSparseOffset || keyStr == paxGNUSparseNumBytes { if keyStr == paxGNUSparseOffset || keyStr == paxGNUSparseNumBytes {
// GNU sparse format 0.0 special key. Write to sparseMap instead of using the headers map. // GNU sparse format 0.0 special key. Write to sparseMap instead of using the headers map.
sparseMap.WriteString(value) sparseMap.WriteString(value)
sparseMap.Write([]byte{','}) sparseMap.Write([]byte{','})
} else { } else {
// Normal key. Set the value in the headers map. // Normal key. Set the value in the headers map.
headers[keyStr] = string(value) headers[keyStr] = value
} }
} }
if sparseMap.Len() != 0 { if sparseMap.Len() != 0 {
@ -566,10 +554,10 @@ func (tr *Reader) skipUnread() error {
// io.Seeker, but calling Seek always returns an error and performs // io.Seeker, but calling Seek always returns an error and performs
// no action. Thus, we try an innocent seek to the current position // no action. Thus, we try an innocent seek to the current position
// to see if Seek is really supported. // to see if Seek is really supported.
pos1, err := sr.Seek(0, os.SEEK_CUR) pos1, err := sr.Seek(0, io.SeekCurrent)
if err == nil { if err == nil {
// Seek seems supported, so perform the real Seek. // Seek seems supported, so perform the real Seek.
pos2, err := sr.Seek(dataSkip-1, os.SEEK_CUR) pos2, err := sr.Seek(dataSkip-1, io.SeekCurrent)
if err != nil { if err != nil {
tr.err = err tr.err = err
return tr.err return tr.err
@ -586,17 +574,6 @@ func (tr *Reader) skipUnread() error {
return tr.err return tr.err
} }
func (tr *Reader) verifyChecksum(header []byte) bool {
if tr.err != nil {
return false
}
var p parser
given := p.parseOctal(header[148:156])
unsigned, signed := checksum(header)
return p.err == nil && (given == unsigned || given == signed)
}
// readHeader reads the next block header and assumes that the underlying reader // readHeader reads the next block header and assumes that the underlying reader
// is already aligned to a block boundary. // is already aligned to a block boundary.
// //
@ -605,13 +582,10 @@ func (tr *Reader) verifyChecksum(header []byte) bool {
// * Exactly 1 block of zeros is read and EOF is hit. // * Exactly 1 block of zeros is read and EOF is hit.
// * At least 2 blocks of zeros are read. // * At least 2 blocks of zeros are read.
func (tr *Reader) readHeader() *Header { func (tr *Reader) readHeader() *Header {
header := tr.hdrBuff[:] if _, tr.err = io.ReadFull(tr.r, tr.blk[:]); tr.err != nil {
copy(header, zeroBlock)
if _, tr.err = io.ReadFull(tr.r, header); tr.err != nil {
// because it could read some of the block, but reach EOF first // because it could read some of the block, but reach EOF first
if tr.err == io.EOF && tr.RawAccounting { if tr.err == io.EOF && tr.RawAccounting {
if _, err := tr.rawBytes.Write(header); err != nil { if _, err := tr.rawBytes.Write(tr.blk[:]); err != nil {
tr.err = err tr.err = err
return nil return nil
} }
@ -619,28 +593,28 @@ func (tr *Reader) readHeader() *Header {
return nil // io.EOF is okay here return nil // io.EOF is okay here
} }
if tr.RawAccounting { if tr.RawAccounting {
if _, tr.err = tr.rawBytes.Write(header); tr.err != nil { if _, tr.err = tr.rawBytes.Write(tr.blk[:]); tr.err != nil {
return nil return nil
} }
} }
// Two blocks of zero bytes marks the end of the archive. // Two blocks of zero bytes marks the end of the archive.
if bytes.Equal(header, zeroBlock[0:blockSize]) { if bytes.Equal(tr.blk[:], zeroBlock[:]) {
if _, tr.err = io.ReadFull(tr.r, header); tr.err != nil { if _, tr.err = io.ReadFull(tr.r, tr.blk[:]); tr.err != nil {
// because it could read some of the block, but reach EOF first // because it could read some of the block, but reach EOF first
if tr.err == io.EOF && tr.RawAccounting { if tr.err == io.EOF && tr.RawAccounting {
if _, tr.err = tr.rawBytes.Write(header); tr.err != nil { if _, tr.err = tr.rawBytes.Write(tr.blk[:]); tr.err != nil {
return nil return nil
} }
} }
return nil // io.EOF is okay here return nil // io.EOF is okay here
} }
if tr.RawAccounting { if tr.RawAccounting {
if _, tr.err = tr.rawBytes.Write(header); tr.err != nil { if _, tr.err = tr.rawBytes.Write(tr.blk[:]); tr.err != nil {
return nil return nil
} }
} }
if bytes.Equal(header, zeroBlock[0:blockSize]) { if bytes.Equal(tr.blk[:], zeroBlock[:]) {
tr.err = io.EOF tr.err = io.EOF
} else { } else {
tr.err = ErrHeader // zero block and then non-zero block tr.err = ErrHeader // zero block and then non-zero block
@ -648,71 +622,55 @@ func (tr *Reader) readHeader() *Header {
return nil return nil
} }
if !tr.verifyChecksum(header) { // Verify the header matches a known format.
format := tr.blk.GetFormat()
if format == formatUnknown {
tr.err = ErrHeader tr.err = ErrHeader
return nil return nil
} }
// Unpack
var p parser var p parser
hdr := new(Header) hdr := new(Header)
s := slicer(header)
hdr.Name = p.parseString(s.next(100)) // Unpack the V7 header.
hdr.Mode = p.parseNumeric(s.next(8)) v7 := tr.blk.V7()
hdr.Uid = int(p.parseNumeric(s.next(8))) hdr.Name = p.parseString(v7.Name())
hdr.Gid = int(p.parseNumeric(s.next(8))) hdr.Mode = p.parseNumeric(v7.Mode())
hdr.Size = p.parseNumeric(s.next(12)) hdr.Uid = int(p.parseNumeric(v7.UID()))
hdr.ModTime = time.Unix(p.parseNumeric(s.next(12)), 0) hdr.Gid = int(p.parseNumeric(v7.GID()))
s.next(8) // chksum hdr.Size = p.parseNumeric(v7.Size())
hdr.Typeflag = s.next(1)[0] hdr.ModTime = time.Unix(p.parseNumeric(v7.ModTime()), 0)
hdr.Linkname = p.parseString(s.next(100)) hdr.Typeflag = v7.TypeFlag()[0]
hdr.Linkname = p.parseString(v7.LinkName())
// The remainder of the header depends on the value of magic. // Unpack format specific fields.
// The original (v7) version of tar had no explicit magic field, if format > formatV7 {
// so its magic bytes, like the rest of the block, are NULs. ustar := tr.blk.USTAR()
magic := string(s.next(8)) // contains version field as well. hdr.Uname = p.parseString(ustar.UserName())
var format string hdr.Gname = p.parseString(ustar.GroupName())
switch {
case magic[:6] == "ustar\x00": // POSIX tar (1003.1-1988)
if string(header[508:512]) == "tar\x00" {
format = "star"
} else {
format = "posix"
}
case magic == "ustar \x00": // old GNU tar
format = "gnu"
}
switch format {
case "posix", "gnu", "star":
hdr.Uname = p.parseString(s.next(32))
hdr.Gname = p.parseString(s.next(32))
devmajor := s.next(8)
devminor := s.next(8)
if hdr.Typeflag == TypeChar || hdr.Typeflag == TypeBlock { if hdr.Typeflag == TypeChar || hdr.Typeflag == TypeBlock {
hdr.Devmajor = p.parseNumeric(devmajor) hdr.Devmajor = p.parseNumeric(ustar.DevMajor())
hdr.Devminor = p.parseNumeric(devminor) hdr.Devminor = p.parseNumeric(ustar.DevMinor())
} }
var prefix string var prefix string
switch format { switch format {
case "posix", "gnu": case formatUSTAR, formatGNU:
prefix = p.parseString(s.next(155)) // TODO(dsnet): Do not use the prefix field for the GNU format!
case "star": // See golang.org/issues/12594
prefix = p.parseString(s.next(131)) ustar := tr.blk.USTAR()
hdr.AccessTime = time.Unix(p.parseNumeric(s.next(12)), 0) prefix = p.parseString(ustar.Prefix())
hdr.ChangeTime = time.Unix(p.parseNumeric(s.next(12)), 0) case formatSTAR:
star := tr.blk.STAR()
prefix = p.parseString(star.Prefix())
hdr.AccessTime = time.Unix(p.parseNumeric(star.AccessTime()), 0)
hdr.ChangeTime = time.Unix(p.parseNumeric(star.ChangeTime()), 0)
} }
if len(prefix) > 0 { if len(prefix) > 0 {
hdr.Name = prefix + "/" + hdr.Name hdr.Name = prefix + "/" + hdr.Name
} }
} }
if p.err != nil {
tr.err = p.err
return nil
}
nb := hdr.Size nb := hdr.Size
if isHeaderOnlyType(hdr.Typeflag) { if isHeaderOnlyType(hdr.Typeflag) {
nb = 0 nb = 0
@ -729,14 +687,14 @@ func (tr *Reader) readHeader() *Header {
// Check for old GNU sparse format entry. // Check for old GNU sparse format entry.
if hdr.Typeflag == TypeGNUSparse { if hdr.Typeflag == TypeGNUSparse {
// Get the real size of the file. // Get the real size of the file.
hdr.Size = p.parseNumeric(header[483:495]) hdr.Size = p.parseNumeric(tr.blk.GNU().RealSize())
if p.err != nil { if p.err != nil {
tr.err = p.err tr.err = p.err
return nil return nil
} }
// Read the sparse map. // Read the sparse map.
sp := tr.readOldGNUSparseMap(header) sp := tr.readOldGNUSparseMap(&tr.blk)
if tr.err != nil { if tr.err != nil {
return nil return nil
} }
@ -748,26 +706,24 @@ func (tr *Reader) readHeader() *Header {
} }
} }
if p.err != nil {
tr.err = p.err
return nil
}
return hdr return hdr
} }
// readOldGNUSparseMap reads the sparse map as stored in the old GNU sparse format. // readOldGNUSparseMap reads the sparse map as stored in the old GNU sparse format.
// The sparse map is stored in the tar header if it's small enough. If it's larger than four entries, // The sparse map is stored in the tar header if it's small enough. If it's larger than four entries,
// then one or more extension headers are used to store the rest of the sparse map. // then one or more extension headers are used to store the rest of the sparse map.
func (tr *Reader) readOldGNUSparseMap(header []byte) []sparseEntry { func (tr *Reader) readOldGNUSparseMap(blk *block) []sparseEntry {
var p parser var p parser
isExtended := header[oldGNUSparseMainHeaderIsExtendedOffset] != 0 var s sparseArray = blk.GNU().Sparse()
spCap := oldGNUSparseMainHeaderNumEntries var sp = make([]sparseEntry, 0, s.MaxEntries())
if isExtended { for i := 0; i < s.MaxEntries(); i++ {
spCap += oldGNUSparseExtendedHeaderNumEntries offset := p.parseOctal(s.Entry(i).Offset())
} numBytes := p.parseOctal(s.Entry(i).NumBytes())
sp := make([]sparseEntry, 0, spCap)
s := slicer(header[oldGNUSparseMainHeaderOffset:])
// Read the four entries from the main tar header
for i := 0; i < oldGNUSparseMainHeaderNumEntries; i++ {
offset := p.parseNumeric(s.next(oldGNUSparseOffsetSize))
numBytes := p.parseNumeric(s.next(oldGNUSparseNumBytesSize))
if p.err != nil { if p.err != nil {
tr.err = p.err tr.err = p.err
return nil return nil
@ -778,23 +734,23 @@ func (tr *Reader) readOldGNUSparseMap(header []byte) []sparseEntry {
sp = append(sp, sparseEntry{offset: offset, numBytes: numBytes}) sp = append(sp, sparseEntry{offset: offset, numBytes: numBytes})
} }
for isExtended { for s.IsExtended()[0] > 0 {
// There are more entries. Read an extension header and parse its entries. // There are more entries. Read an extension header and parse its entries.
sparseHeader := make([]byte, blockSize) var blk block
if _, tr.err = io.ReadFull(tr.r, sparseHeader); tr.err != nil { if _, tr.err = io.ReadFull(tr.r, blk[:]); tr.err != nil {
return nil return nil
} }
if tr.RawAccounting { if tr.RawAccounting {
if _, tr.err = tr.rawBytes.Write(sparseHeader); tr.err != nil { if _, tr.err = tr.rawBytes.Write(blk[:]); tr.err != nil {
return nil return nil
} }
} }
isExtended = sparseHeader[oldGNUSparseExtendedHeaderIsExtendedOffset] != 0 s = blk.Sparse()
s = slicer(sparseHeader)
for i := 0; i < oldGNUSparseExtendedHeaderNumEntries; i++ { for i := 0; i < s.MaxEntries(); i++ {
offset := p.parseNumeric(s.next(oldGNUSparseOffsetSize)) offset := p.parseOctal(s.Entry(i).Offset())
numBytes := p.parseNumeric(s.next(oldGNUSparseNumBytesSize)) numBytes := p.parseOctal(s.Entry(i).NumBytes())
if p.err != nil { if p.err != nil {
tr.err = p.err tr.err = p.err
return nil return nil

View file

@ -37,9 +37,9 @@ type Writer struct {
pad int64 // amount of padding to write after current file entry pad int64 // amount of padding to write after current file entry
closed bool closed bool
usedBinary bool // whether the binary numeric field extension was used usedBinary bool // whether the binary numeric field extension was used
preferPax bool // use pax header instead of binary numeric header preferPax bool // use PAX header instead of binary numeric header
hdrBuff [blockSize]byte // buffer to use in writeHeader when writing a regular header hdrBuff block // buffer to use in writeHeader when writing a regular header
paxHdrBuff [blockSize]byte // buffer to use in writeHeader when writing a pax header paxHdrBuff block // buffer to use in writeHeader when writing a PAX header
} }
type formatter struct { type formatter struct {
@ -153,27 +153,24 @@ func (tw *Writer) writeHeader(hdr *Header, allowPax bool) error {
// a map to hold pax header records, if any are needed // a map to hold pax header records, if any are needed
paxHeaders := make(map[string]string) paxHeaders := make(map[string]string)
// TODO(shanemhansen): we might want to use PAX headers for // TODO(dsnet): we might want to use PAX headers for
// subsecond time resolution, but for now let's just capture // subsecond time resolution, but for now let's just capture
// too long fields or non ascii characters // too long fields or non ascii characters
var f formatter
var header []byte
// We need to select which scratch buffer to use carefully, // We need to select which scratch buffer to use carefully,
// since this method is called recursively to write PAX headers. // since this method is called recursively to write PAX headers.
// If allowPax is true, this is the non-recursive call, and we will use hdrBuff. // If allowPax is true, this is the non-recursive call, and we will use hdrBuff.
// If allowPax is false, we are being called by writePAXHeader, and hdrBuff is // If allowPax is false, we are being called by writePAXHeader, and hdrBuff is
// already being used by the non-recursive call, so we must use paxHdrBuff. // already being used by the non-recursive call, so we must use paxHdrBuff.
header = tw.hdrBuff[:] header := &tw.hdrBuff
if !allowPax { if !allowPax {
header = tw.paxHdrBuff[:] header = &tw.paxHdrBuff
} }
copy(header, zeroBlock) copy(header[:], zeroBlock[:])
s := slicer(header)
// Wrappers around formatter that automatically sets paxHeaders if the // Wrappers around formatter that automatically sets paxHeaders if the
// argument extends beyond the capacity of the input byte slice. // argument extends beyond the capacity of the input byte slice.
var f formatter
var formatString = func(b []byte, s string, paxKeyword string) { var formatString = func(b []byte, s string, paxKeyword string) {
needsPaxHeader := paxKeyword != paxNone && len(s) > len(b) || !isASCII(s) needsPaxHeader := paxKeyword != paxNone && len(s) > len(b) || !isASCII(s)
if needsPaxHeader { if needsPaxHeader {
@ -202,44 +199,33 @@ func (tw *Writer) writeHeader(hdr *Header, allowPax bool) error {
f.formatNumeric(b, x) f.formatNumeric(b, x)
} }
// keep a reference to the filename to allow to overwrite it later if we detect that we can use ustar longnames instead of pax
pathHeaderBytes := s.next(fileNameSize)
formatString(pathHeaderBytes, hdr.Name, paxPath)
// Handle out of range ModTime carefully. // Handle out of range ModTime carefully.
var modTime int64 var modTime int64
if !hdr.ModTime.Before(minTime) && !hdr.ModTime.After(maxTime) { if !hdr.ModTime.Before(minTime) && !hdr.ModTime.After(maxTime) {
modTime = hdr.ModTime.Unix() modTime = hdr.ModTime.Unix()
} }
f.formatOctal(s.next(8), hdr.Mode) // 100:108 v7 := header.V7()
formatNumeric(s.next(8), int64(hdr.Uid), paxUid) // 108:116 formatString(v7.Name(), hdr.Name, paxPath)
formatNumeric(s.next(8), int64(hdr.Gid), paxGid) // 116:124 // TODO(dsnet): The GNU format permits the mode field to be encoded in
formatNumeric(s.next(12), hdr.Size, paxSize) // 124:136 // base-256 format. Thus, we can use formatNumeric instead of formatOctal.
formatNumeric(s.next(12), modTime, paxNone) // 136:148 --- consider using pax for finer granularity f.formatOctal(v7.Mode(), hdr.Mode)
s.next(8) // chksum (148:156) formatNumeric(v7.UID(), int64(hdr.Uid), paxUid)
s.next(1)[0] = hdr.Typeflag // 156:157 formatNumeric(v7.GID(), int64(hdr.Gid), paxGid)
formatNumeric(v7.Size(), hdr.Size, paxSize)
// TODO(dsnet): Consider using PAX for finer time granularity.
formatNumeric(v7.ModTime(), modTime, paxNone)
v7.TypeFlag()[0] = hdr.Typeflag
formatString(v7.LinkName(), hdr.Linkname, paxLinkpath)
formatString(s.next(100), hdr.Linkname, paxLinkpath) ustar := header.USTAR()
formatString(ustar.UserName(), hdr.Uname, paxUname)
formatString(ustar.GroupName(), hdr.Gname, paxGname)
formatNumeric(ustar.DevMajor(), hdr.Devmajor, paxNone)
formatNumeric(ustar.DevMinor(), hdr.Devminor, paxNone)
copy(s.next(8), []byte("ustar\x0000")) // 257:265
formatString(s.next(32), hdr.Uname, paxUname) // 265:297
formatString(s.next(32), hdr.Gname, paxGname) // 297:329
formatNumeric(s.next(8), hdr.Devmajor, paxNone) // 329:337
formatNumeric(s.next(8), hdr.Devminor, paxNone) // 337:345
// keep a reference to the prefix to allow to overwrite it later if we detect that we can use ustar longnames instead of pax
prefixHeaderBytes := s.next(155)
formatString(prefixHeaderBytes, "", paxNone) // 345:500 prefix
// Use the GNU magic instead of POSIX magic if we used any GNU extensions.
if tw.usedBinary {
copy(header[257:265], []byte("ustar \x00"))
}
_, paxPathUsed := paxHeaders[paxPath]
// try to use a ustar header when only the name is too long // try to use a ustar header when only the name is too long
_, paxPathUsed := paxHeaders[paxPath]
if !tw.preferPax && len(paxHeaders) == 1 && paxPathUsed { if !tw.preferPax && len(paxHeaders) == 1 && paxPathUsed {
prefix, suffix, ok := splitUSTARPath(hdr.Name) prefix, suffix, ok := splitUSTARPath(hdr.Name)
if ok { if ok {
@ -247,16 +233,16 @@ func (tw *Writer) writeHeader(hdr *Header, allowPax bool) error {
delete(paxHeaders, paxPath) delete(paxHeaders, paxPath)
// Update the path fields // Update the path fields
formatString(pathHeaderBytes, suffix, paxNone) formatString(v7.Name(), suffix, paxNone)
formatString(prefixHeaderBytes, prefix, paxNone) formatString(ustar.Prefix(), prefix, paxNone)
} }
} }
// The chksum field is terminated by a NUL and a space. if tw.usedBinary {
// This is different from the other octal fields. header.SetFormat(formatGNU)
chksum, _ := checksum(header) } else {
f.formatOctal(header[148:155], chksum) // Never fails header.SetFormat(formatUSTAR)
header[155] = ' ' }
// Check if there were any formatting errors. // Check if there were any formatting errors.
if f.err != nil { if f.err != nil {
@ -278,10 +264,10 @@ func (tw *Writer) writeHeader(hdr *Header, allowPax bool) error {
return err return err
} }
} }
tw.nb = int64(hdr.Size) tw.nb = hdr.Size
tw.pad = (blockSize - (tw.nb % blockSize)) % blockSize tw.pad = (blockSize - (tw.nb % blockSize)) % blockSize
_, tw.err = tw.w.Write(header) _, tw.err = tw.w.Write(header[:])
return tw.err return tw.err
} }
@ -289,10 +275,10 @@ func (tw *Writer) writeHeader(hdr *Header, allowPax bool) error {
// If the path is not splittable, then it will return ("", "", false). // If the path is not splittable, then it will return ("", "", false).
func splitUSTARPath(name string) (prefix, suffix string, ok bool) { func splitUSTARPath(name string) (prefix, suffix string, ok bool) {
length := len(name) length := len(name)
if length <= fileNameSize || !isASCII(name) { if length <= nameSize || !isASCII(name) {
return "", "", false return "", "", false
} else if length > fileNamePrefixSize+1 { } else if length > prefixSize+1 {
length = fileNamePrefixSize + 1 length = prefixSize + 1
} else if name[length-1] == '/' { } else if name[length-1] == '/' {
length-- length--
} }
@ -300,7 +286,7 @@ func splitUSTARPath(name string) (prefix, suffix string, ok bool) {
i := strings.LastIndex(name[:length], "/") i := strings.LastIndex(name[:length], "/")
nlen := len(name) - i - 1 // nlen is length of suffix nlen := len(name) - i - 1 // nlen is length of suffix
plen := i // plen is length of prefix plen := i // plen is length of prefix
if i <= 0 || nlen > fileNameSize || nlen == 0 || plen > fileNamePrefixSize { if i <= 0 || nlen > nameSize || nlen == 0 || plen > prefixSize {
return "", "", false return "", "", false
} }
return name[:i], name[i+1:], true return name[:i], name[i+1:], true
@ -323,8 +309,8 @@ func (tw *Writer) writePAXHeader(hdr *Header, paxHeaders map[string]string) erro
fullName := path.Join(dir, "PaxHeaders.0", file) fullName := path.Join(dir, "PaxHeaders.0", file)
ascii := toASCII(fullName) ascii := toASCII(fullName)
if len(ascii) > 100 { if len(ascii) > nameSize {
ascii = ascii[:100] ascii = ascii[:nameSize]
} }
ext.Name = ascii ext.Name = ascii
// Construct the body // Construct the body
@ -407,7 +393,7 @@ func (tw *Writer) Close() error {
// trailer: two zero blocks // trailer: two zero blocks
for i := 0; i < 2; i++ { for i := 0; i < 2; i++ {
_, tw.err = tw.w.Write(zeroBlock) _, tw.err = tw.w.Write(zeroBlock[:])
if tw.err != nil { if tw.err != nil {
break break
} }

View file

@ -587,17 +587,17 @@ func TestSplitUSTARPath(t *testing.T) {
{"", "", "", false}, {"", "", "", false},
{"abc", "", "", false}, {"abc", "", "", false},
{"用戶名", "", "", false}, {"用戶名", "", "", false},
{sr("a", fileNameSize), "", "", false}, {sr("a", nameSize), "", "", false},
{sr("a", fileNameSize) + "/", "", "", false}, {sr("a", nameSize) + "/", "", "", false},
{sr("a", fileNameSize) + "/a", sr("a", fileNameSize), "a", true}, {sr("a", nameSize) + "/a", sr("a", nameSize), "a", true},
{sr("a", fileNamePrefixSize) + "/", "", "", false}, {sr("a", prefixSize) + "/", "", "", false},
{sr("a", fileNamePrefixSize) + "/a", sr("a", fileNamePrefixSize), "a", true}, {sr("a", prefixSize) + "/a", sr("a", prefixSize), "a", true},
{sr("a", fileNameSize+1), "", "", false}, {sr("a", nameSize+1), "", "", false},
{sr("/", fileNameSize+1), sr("/", fileNameSize-1), "/", true}, {sr("/", nameSize+1), sr("/", nameSize-1), "/", true},
{sr("a", fileNamePrefixSize) + "/" + sr("b", fileNameSize), {sr("a", prefixSize) + "/" + sr("b", nameSize),
sr("a", fileNamePrefixSize), sr("b", fileNameSize), true}, sr("a", prefixSize), sr("b", nameSize), true},
{sr("a", fileNamePrefixSize) + "//" + sr("b", fileNameSize), "", "", false}, {sr("a", prefixSize) + "//" + sr("b", nameSize), "", "", false},
{sr("a/", fileNameSize), sr("a/", 77) + "a", sr("a/", 22), true}, {sr("a/", nameSize), sr("a/", 77) + "a", sr("a/", 22), true},
} }
for _, v := range vectors { for _, v := range vectors {