mirror of
https://github.com/vbatts/tar-split.git
synced 2025-08-02 20:30:28 +00:00
Merge 2c3c708698
into ae8540dc47
This commit is contained in:
commit
f6f6a575de
5 changed files with 331 additions and 227 deletions
|
@ -21,10 +21,8 @@ import (
|
|||
"time"
|
||||
)
|
||||
|
||||
// Header type flags.
|
||||
const (
|
||||
blockSize = 512
|
||||
|
||||
// Types
|
||||
TypeReg = '0' // regular file
|
||||
TypeRegA = '\x00' // regular file
|
||||
TypeLink = '1' // hard link
|
||||
|
@ -61,12 +59,6 @@ type Header struct {
|
|||
Xattrs map[string]string
|
||||
}
|
||||
|
||||
// File name constants from the tar spec.
|
||||
const (
|
||||
fileNameSize = 100 // Maximum number of bytes in a standard tar name.
|
||||
fileNamePrefixSize = 155 // Maximum number of ustar extension bytes.
|
||||
)
|
||||
|
||||
// FileInfo returns an os.FileInfo for the Header.
|
||||
func (h *Header) FileInfo() os.FileInfo {
|
||||
return headerFileInfo{h}
|
||||
|
@ -279,33 +271,6 @@ func FileInfoHeader(fi os.FileInfo, link string) (*Header, error) {
|
|||
return h, nil
|
||||
}
|
||||
|
||||
var zeroBlock = make([]byte, blockSize)
|
||||
|
||||
// POSIX specifies a sum of the unsigned byte values, but the Sun tar uses signed byte values.
|
||||
// We compute and return both.
|
||||
func checksum(header []byte) (unsigned int64, signed int64) {
|
||||
for i := 0; i < len(header); i++ {
|
||||
if i == 148 {
|
||||
// The chksum field (header[148:156]) is special: it should be treated as space bytes.
|
||||
unsigned += ' ' * 8
|
||||
signed += ' ' * 8
|
||||
i += 7
|
||||
continue
|
||||
}
|
||||
unsigned += int64(header[i])
|
||||
signed += int64(int8(header[i]))
|
||||
}
|
||||
return
|
||||
}
|
||||
|
||||
type slicer []byte
|
||||
|
||||
func (sp *slicer) next(n int) (b []byte) {
|
||||
s := *sp
|
||||
b, *sp = s[0:n], s[n:]
|
||||
return
|
||||
}
|
||||
|
||||
func isASCII(s string) bool {
|
||||
for _, c := range s {
|
||||
if c >= 0x80 {
|
||||
|
|
197
archive/tar/format.go
Normal file
197
archive/tar/format.go
Normal file
|
@ -0,0 +1,197 @@
|
|||
// Copyright 2016 The Go Authors. All rights reserved.
|
||||
// Use of this source code is governed by a BSD-style
|
||||
// license that can be found in the LICENSE file.
|
||||
|
||||
package tar
|
||||
|
||||
// Constants to identify various tar formats.
|
||||
const (
|
||||
// The format is unknown.
|
||||
formatUnknown = (1 << iota) / 2 // Sequence of 0, 1, 2, 4, 8, etc...
|
||||
|
||||
// The format of the original Unix V7 tar tool prior to standardization.
|
||||
formatV7
|
||||
|
||||
// The old and new GNU formats, which are incompatible with USTAR.
|
||||
// This does cover the old GNU sparse extension.
|
||||
// This does not cover the GNU sparse extensions using PAX headers,
|
||||
// versions 0.0, 0.1, and 1.0; these fall under the PAX format.
|
||||
formatGNU
|
||||
|
||||
// Schily's tar format, which is incompatible with USTAR.
|
||||
// This does not cover STAR extensions to the PAX format; these fall under
|
||||
// the PAX format.
|
||||
formatSTAR
|
||||
|
||||
// USTAR is the former standardization of tar defined in POSIX.1-1988.
|
||||
// This is incompatible with the GNU and STAR formats.
|
||||
formatUSTAR
|
||||
|
||||
// PAX is the latest standardization of tar defined in POSIX.1-2001.
|
||||
// This is an extension of USTAR and is "backwards compatible" with it.
|
||||
//
|
||||
// Some newer formats add their own extensions to PAX, such as GNU sparse
|
||||
// files and SCHILY extended attributes. Since they are backwards compatible
|
||||
// with PAX, they will be labelled as "PAX".
|
||||
formatPAX
|
||||
)
|
||||
|
||||
// Magics used to identify various formats.
|
||||
const (
|
||||
magicGNU, versionGNU = "ustar ", " \x00"
|
||||
magicUSTAR, versionUSTAR = "ustar\x00", "00"
|
||||
trailerSTAR = "tar\x00"
|
||||
)
|
||||
|
||||
// Size constants from various tar specifications.
|
||||
const (
|
||||
blockSize = 512 // Size of each block in a tar stream
|
||||
nameSize = 100 // Max length of the name field in USTAR format
|
||||
prefixSize = 155 // Max length of the prefix field in USTAR format
|
||||
)
|
||||
|
||||
var zeroBlock block
|
||||
|
||||
type block [blockSize]byte
|
||||
|
||||
// Convert block to any number of formats.
|
||||
func (b *block) V7() *headerV7 { return (*headerV7)(b) }
|
||||
func (b *block) GNU() *headerGNU { return (*headerGNU)(b) }
|
||||
func (b *block) STAR() *headerSTAR { return (*headerSTAR)(b) }
|
||||
func (b *block) USTAR() *headerUSTAR { return (*headerUSTAR)(b) }
|
||||
func (b *block) Sparse() sparseArray { return (sparseArray)(b[:]) }
|
||||
|
||||
// GetFormat checks that the block is a valid tar header based on the checksum.
|
||||
// It then attempts to guess the specific format based on magic values.
|
||||
// If the checksum fails, then formatUnknown is returned.
|
||||
func (b *block) GetFormat() (format int) {
|
||||
// Verify checksum.
|
||||
var p parser
|
||||
value := p.parseOctal(b.V7().Chksum())
|
||||
chksum1, chksum2 := b.ComputeChecksum()
|
||||
if p.err != nil || (value != chksum1 && value != chksum2) {
|
||||
return formatUnknown
|
||||
}
|
||||
|
||||
// Guess the magic values.
|
||||
magic := string(b.USTAR().Magic())
|
||||
version := string(b.USTAR().Version())
|
||||
trailer := string(b.STAR().Trailer())
|
||||
switch {
|
||||
case magic == magicUSTAR && trailer == trailerSTAR:
|
||||
return formatSTAR
|
||||
case magic == magicUSTAR:
|
||||
return formatUSTAR
|
||||
case magic == magicGNU && version == versionGNU:
|
||||
return formatGNU
|
||||
default:
|
||||
return formatV7
|
||||
}
|
||||
}
|
||||
|
||||
// SetFormat writes the magic values necessary for specified format
|
||||
// and then updates the checksum accordingly.
|
||||
func (b *block) SetFormat(format int) {
|
||||
// Set the magic values.
|
||||
switch format {
|
||||
case formatV7:
|
||||
// Do nothing.
|
||||
case formatGNU:
|
||||
copy(b.GNU().Magic(), magicGNU)
|
||||
copy(b.GNU().Version(), versionGNU)
|
||||
case formatSTAR:
|
||||
copy(b.STAR().Magic(), magicUSTAR)
|
||||
copy(b.STAR().Version(), versionUSTAR)
|
||||
copy(b.STAR().Trailer(), trailerSTAR)
|
||||
case formatUSTAR, formatPAX:
|
||||
copy(b.USTAR().Magic(), magicUSTAR)
|
||||
copy(b.USTAR().Version(), versionUSTAR)
|
||||
default:
|
||||
panic("invalid format")
|
||||
}
|
||||
|
||||
// Update checksum.
|
||||
// This field is special in that it is terminated by a NULL then space.
|
||||
var f formatter
|
||||
field := b.V7().Chksum()
|
||||
chksum, _ := b.ComputeChecksum() // Possible values are 256..128776
|
||||
f.formatOctal(field[:7], chksum) // Never fails since 128776 < 262143
|
||||
field[7] = ' '
|
||||
}
|
||||
|
||||
// ComputeChecksum computes the checksum for the header block.
|
||||
// POSIX specifies a sum of the unsigned byte values, but the Sun tar used
|
||||
// signed byte values.
|
||||
// We compute and return both.
|
||||
func (b *block) ComputeChecksum() (unsigned, signed int64) {
|
||||
for i, c := range b {
|
||||
if 148 <= i && i < 156 {
|
||||
c = ' ' // Treat the checksum field itself as all spaces.
|
||||
}
|
||||
unsigned += int64(uint8(c))
|
||||
signed += int64(int8(c))
|
||||
}
|
||||
return unsigned, signed
|
||||
}
|
||||
|
||||
type headerV7 [blockSize]byte
|
||||
|
||||
func (h *headerV7) Name() []byte { return h[000:][:100] }
|
||||
func (h *headerV7) Mode() []byte { return h[100:][:8] }
|
||||
func (h *headerV7) UID() []byte { return h[108:][:8] }
|
||||
func (h *headerV7) GID() []byte { return h[116:][:8] }
|
||||
func (h *headerV7) Size() []byte { return h[124:][:12] }
|
||||
func (h *headerV7) ModTime() []byte { return h[136:][:12] }
|
||||
func (h *headerV7) Chksum() []byte { return h[148:][:8] }
|
||||
func (h *headerV7) TypeFlag() []byte { return h[156:][:1] }
|
||||
func (h *headerV7) LinkName() []byte { return h[157:][:100] }
|
||||
|
||||
type headerGNU [blockSize]byte
|
||||
|
||||
func (h *headerGNU) V7() *headerV7 { return (*headerV7)(h) }
|
||||
func (h *headerGNU) Magic() []byte { return h[257:][:6] }
|
||||
func (h *headerGNU) Version() []byte { return h[263:][:2] }
|
||||
func (h *headerGNU) UserName() []byte { return h[265:][:32] }
|
||||
func (h *headerGNU) GroupName() []byte { return h[297:][:32] }
|
||||
func (h *headerGNU) DevMajor() []byte { return h[329:][:8] }
|
||||
func (h *headerGNU) DevMinor() []byte { return h[337:][:8] }
|
||||
func (h *headerGNU) AccessTime() []byte { return h[345:][:12] }
|
||||
func (h *headerGNU) ChangeTime() []byte { return h[357:][:12] }
|
||||
func (h *headerGNU) Sparse() sparseArray { return (sparseArray)(h[386:][:24*4+1]) }
|
||||
func (h *headerGNU) RealSize() []byte { return h[483:][:12] }
|
||||
|
||||
type headerSTAR [blockSize]byte
|
||||
|
||||
func (h *headerSTAR) V7() *headerV7 { return (*headerV7)(h) }
|
||||
func (h *headerSTAR) Magic() []byte { return h[257:][:6] }
|
||||
func (h *headerSTAR) Version() []byte { return h[263:][:2] }
|
||||
func (h *headerSTAR) UserName() []byte { return h[265:][:32] }
|
||||
func (h *headerSTAR) GroupName() []byte { return h[297:][:32] }
|
||||
func (h *headerSTAR) DevMajor() []byte { return h[329:][:8] }
|
||||
func (h *headerSTAR) DevMinor() []byte { return h[337:][:8] }
|
||||
func (h *headerSTAR) Prefix() []byte { return h[345:][:131] }
|
||||
func (h *headerSTAR) AccessTime() []byte { return h[476:][:12] }
|
||||
func (h *headerSTAR) ChangeTime() []byte { return h[488:][:12] }
|
||||
func (h *headerSTAR) Trailer() []byte { return h[508:][:4] }
|
||||
|
||||
type headerUSTAR [blockSize]byte
|
||||
|
||||
func (h *headerUSTAR) V7() *headerV7 { return (*headerV7)(h) }
|
||||
func (h *headerUSTAR) Magic() []byte { return h[257:][:6] }
|
||||
func (h *headerUSTAR) Version() []byte { return h[263:][:2] }
|
||||
func (h *headerUSTAR) UserName() []byte { return h[265:][:32] }
|
||||
func (h *headerUSTAR) GroupName() []byte { return h[297:][:32] }
|
||||
func (h *headerUSTAR) DevMajor() []byte { return h[329:][:8] }
|
||||
func (h *headerUSTAR) DevMinor() []byte { return h[337:][:8] }
|
||||
func (h *headerUSTAR) Prefix() []byte { return h[345:][:155] }
|
||||
|
||||
type sparseArray []byte
|
||||
|
||||
func (s sparseArray) Entry(i int) sparseNode { return (sparseNode)(s[i*24:]) }
|
||||
func (s sparseArray) IsExtended() []byte { return s[24*s.MaxEntries():][:1] }
|
||||
func (s sparseArray) MaxEntries() int { return len(s) / 24 }
|
||||
|
||||
type sparseNode []byte
|
||||
|
||||
func (s sparseNode) Offset() []byte { return s[00:][:12] }
|
||||
func (s sparseNode) NumBytes() []byte { return s[12:][:12] }
|
|
@ -13,7 +13,6 @@ import (
|
|||
"io"
|
||||
"io/ioutil"
|
||||
"math"
|
||||
"os"
|
||||
"strconv"
|
||||
"strings"
|
||||
"time"
|
||||
|
@ -34,7 +33,7 @@ type Reader struct {
|
|||
err error
|
||||
pad int64 // amount of padding (ignored) after current file entry
|
||||
curr numBytesReader // reader for current file entry
|
||||
hdrBuff [blockSize]byte // buffer to use in readHeader
|
||||
blk block // buffer to use as temporary local storage
|
||||
|
||||
RawAccounting bool // Whether to enable the access needed to reassemble the tar from raw bytes. Some performance/memory hit for this.
|
||||
rawBytes *bytes.Buffer // last raw bits
|
||||
|
@ -120,17 +119,6 @@ const (
|
|||
paxGNUSparseRealSize = "GNU.sparse.realsize"
|
||||
)
|
||||
|
||||
// Keywords for old GNU sparse headers
|
||||
const (
|
||||
oldGNUSparseMainHeaderOffset = 386
|
||||
oldGNUSparseMainHeaderIsExtendedOffset = 482
|
||||
oldGNUSparseMainHeaderNumEntries = 4
|
||||
oldGNUSparseExtendedHeaderIsExtendedOffset = 504
|
||||
oldGNUSparseExtendedHeaderNumEntries = 21
|
||||
oldGNUSparseOffsetSize = 12
|
||||
oldGNUSparseNumBytesSize = 12
|
||||
)
|
||||
|
||||
// NewReader creates a new Reader reading from r.
|
||||
func NewReader(r io.Reader) *Reader { return &Reader{r: r} }
|
||||
|
||||
|
@ -341,7 +329,7 @@ func mergePAX(hdr *Header, headers map[string]string) error {
|
|||
if err != nil {
|
||||
return err
|
||||
}
|
||||
hdr.Size = int64(size)
|
||||
hdr.Size = size
|
||||
default:
|
||||
if strings.HasPrefix(k, paxXattr) {
|
||||
if hdr.Xattrs == nil {
|
||||
|
@ -371,17 +359,17 @@ func parsePAXTime(t string) (time.Time, error) {
|
|||
if err != nil {
|
||||
return time.Time{}, err
|
||||
}
|
||||
nano_buf := string(buf[pos+1:])
|
||||
nanoBuf := string(buf[pos+1:])
|
||||
// Pad as needed before converting to a decimal.
|
||||
// For example .030 -> .030000000 -> 30000000 nanoseconds
|
||||
if len(nano_buf) < maxNanoSecondIntSize {
|
||||
if len(nanoBuf) < maxNanoSecondIntSize {
|
||||
// Right pad
|
||||
nano_buf += strings.Repeat("0", maxNanoSecondIntSize-len(nano_buf))
|
||||
} else if len(nano_buf) > maxNanoSecondIntSize {
|
||||
nanoBuf += strings.Repeat("0", maxNanoSecondIntSize-len(nanoBuf))
|
||||
} else if len(nanoBuf) > maxNanoSecondIntSize {
|
||||
// Right truncate
|
||||
nano_buf = nano_buf[:maxNanoSecondIntSize]
|
||||
nanoBuf = nanoBuf[:maxNanoSecondIntSize]
|
||||
}
|
||||
nanoseconds, err = strconv.ParseInt(string(nano_buf), 10, 0)
|
||||
nanoseconds, err = strconv.ParseInt(nanoBuf, 10, 0)
|
||||
if err != nil {
|
||||
return time.Time{}, err
|
||||
}
|
||||
|
@ -419,14 +407,14 @@ func parsePAX(r io.Reader) (map[string]string, error) {
|
|||
}
|
||||
sbuf = residual
|
||||
|
||||
keyStr := string(key)
|
||||
keyStr := key
|
||||
if keyStr == paxGNUSparseOffset || keyStr == paxGNUSparseNumBytes {
|
||||
// GNU sparse format 0.0 special key. Write to sparseMap instead of using the headers map.
|
||||
sparseMap.WriteString(value)
|
||||
sparseMap.Write([]byte{','})
|
||||
} else {
|
||||
// Normal key. Set the value in the headers map.
|
||||
headers[keyStr] = string(value)
|
||||
headers[keyStr] = value
|
||||
}
|
||||
}
|
||||
if sparseMap.Len() != 0 {
|
||||
|
@ -566,10 +554,10 @@ func (tr *Reader) skipUnread() error {
|
|||
// io.Seeker, but calling Seek always returns an error and performs
|
||||
// no action. Thus, we try an innocent seek to the current position
|
||||
// to see if Seek is really supported.
|
||||
pos1, err := sr.Seek(0, os.SEEK_CUR)
|
||||
pos1, err := sr.Seek(0, io.SeekCurrent)
|
||||
if err == nil {
|
||||
// Seek seems supported, so perform the real Seek.
|
||||
pos2, err := sr.Seek(dataSkip-1, os.SEEK_CUR)
|
||||
pos2, err := sr.Seek(dataSkip-1, io.SeekCurrent)
|
||||
if err != nil {
|
||||
tr.err = err
|
||||
return tr.err
|
||||
|
@ -586,17 +574,6 @@ func (tr *Reader) skipUnread() error {
|
|||
return tr.err
|
||||
}
|
||||
|
||||
func (tr *Reader) verifyChecksum(header []byte) bool {
|
||||
if tr.err != nil {
|
||||
return false
|
||||
}
|
||||
|
||||
var p parser
|
||||
given := p.parseOctal(header[148:156])
|
||||
unsigned, signed := checksum(header)
|
||||
return p.err == nil && (given == unsigned || given == signed)
|
||||
}
|
||||
|
||||
// readHeader reads the next block header and assumes that the underlying reader
|
||||
// is already aligned to a block boundary.
|
||||
//
|
||||
|
@ -605,13 +582,10 @@ func (tr *Reader) verifyChecksum(header []byte) bool {
|
|||
// * Exactly 1 block of zeros is read and EOF is hit.
|
||||
// * At least 2 blocks of zeros are read.
|
||||
func (tr *Reader) readHeader() *Header {
|
||||
header := tr.hdrBuff[:]
|
||||
copy(header, zeroBlock)
|
||||
|
||||
if _, tr.err = io.ReadFull(tr.r, header); tr.err != nil {
|
||||
if _, tr.err = io.ReadFull(tr.r, tr.blk[:]); tr.err != nil {
|
||||
// because it could read some of the block, but reach EOF first
|
||||
if tr.err == io.EOF && tr.RawAccounting {
|
||||
if _, err := tr.rawBytes.Write(header); err != nil {
|
||||
if _, err := tr.rawBytes.Write(tr.blk[:]); err != nil {
|
||||
tr.err = err
|
||||
return nil
|
||||
}
|
||||
|
@ -619,28 +593,28 @@ func (tr *Reader) readHeader() *Header {
|
|||
return nil // io.EOF is okay here
|
||||
}
|
||||
if tr.RawAccounting {
|
||||
if _, tr.err = tr.rawBytes.Write(header); tr.err != nil {
|
||||
if _, tr.err = tr.rawBytes.Write(tr.blk[:]); tr.err != nil {
|
||||
return nil
|
||||
}
|
||||
}
|
||||
|
||||
// Two blocks of zero bytes marks the end of the archive.
|
||||
if bytes.Equal(header, zeroBlock[0:blockSize]) {
|
||||
if _, tr.err = io.ReadFull(tr.r, header); tr.err != nil {
|
||||
if bytes.Equal(tr.blk[:], zeroBlock[:]) {
|
||||
if _, tr.err = io.ReadFull(tr.r, tr.blk[:]); tr.err != nil {
|
||||
// because it could read some of the block, but reach EOF first
|
||||
if tr.err == io.EOF && tr.RawAccounting {
|
||||
if _, tr.err = tr.rawBytes.Write(header); tr.err != nil {
|
||||
if _, tr.err = tr.rawBytes.Write(tr.blk[:]); tr.err != nil {
|
||||
return nil
|
||||
}
|
||||
}
|
||||
return nil // io.EOF is okay here
|
||||
}
|
||||
if tr.RawAccounting {
|
||||
if _, tr.err = tr.rawBytes.Write(header); tr.err != nil {
|
||||
if _, tr.err = tr.rawBytes.Write(tr.blk[:]); tr.err != nil {
|
||||
return nil
|
||||
}
|
||||
}
|
||||
if bytes.Equal(header, zeroBlock[0:blockSize]) {
|
||||
if bytes.Equal(tr.blk[:], zeroBlock[:]) {
|
||||
tr.err = io.EOF
|
||||
} else {
|
||||
tr.err = ErrHeader // zero block and then non-zero block
|
||||
|
@ -648,71 +622,55 @@ func (tr *Reader) readHeader() *Header {
|
|||
return nil
|
||||
}
|
||||
|
||||
if !tr.verifyChecksum(header) {
|
||||
// Verify the header matches a known format.
|
||||
format := tr.blk.GetFormat()
|
||||
if format == formatUnknown {
|
||||
tr.err = ErrHeader
|
||||
return nil
|
||||
}
|
||||
|
||||
// Unpack
|
||||
var p parser
|
||||
hdr := new(Header)
|
||||
s := slicer(header)
|
||||
|
||||
hdr.Name = p.parseString(s.next(100))
|
||||
hdr.Mode = p.parseNumeric(s.next(8))
|
||||
hdr.Uid = int(p.parseNumeric(s.next(8)))
|
||||
hdr.Gid = int(p.parseNumeric(s.next(8)))
|
||||
hdr.Size = p.parseNumeric(s.next(12))
|
||||
hdr.ModTime = time.Unix(p.parseNumeric(s.next(12)), 0)
|
||||
s.next(8) // chksum
|
||||
hdr.Typeflag = s.next(1)[0]
|
||||
hdr.Linkname = p.parseString(s.next(100))
|
||||
// Unpack the V7 header.
|
||||
v7 := tr.blk.V7()
|
||||
hdr.Name = p.parseString(v7.Name())
|
||||
hdr.Mode = p.parseNumeric(v7.Mode())
|
||||
hdr.Uid = int(p.parseNumeric(v7.UID()))
|
||||
hdr.Gid = int(p.parseNumeric(v7.GID()))
|
||||
hdr.Size = p.parseNumeric(v7.Size())
|
||||
hdr.ModTime = time.Unix(p.parseNumeric(v7.ModTime()), 0)
|
||||
hdr.Typeflag = v7.TypeFlag()[0]
|
||||
hdr.Linkname = p.parseString(v7.LinkName())
|
||||
|
||||
// The remainder of the header depends on the value of magic.
|
||||
// The original (v7) version of tar had no explicit magic field,
|
||||
// so its magic bytes, like the rest of the block, are NULs.
|
||||
magic := string(s.next(8)) // contains version field as well.
|
||||
var format string
|
||||
switch {
|
||||
case magic[:6] == "ustar\x00": // POSIX tar (1003.1-1988)
|
||||
if string(header[508:512]) == "tar\x00" {
|
||||
format = "star"
|
||||
} else {
|
||||
format = "posix"
|
||||
}
|
||||
case magic == "ustar \x00": // old GNU tar
|
||||
format = "gnu"
|
||||
}
|
||||
|
||||
switch format {
|
||||
case "posix", "gnu", "star":
|
||||
hdr.Uname = p.parseString(s.next(32))
|
||||
hdr.Gname = p.parseString(s.next(32))
|
||||
devmajor := s.next(8)
|
||||
devminor := s.next(8)
|
||||
// Unpack format specific fields.
|
||||
if format > formatV7 {
|
||||
ustar := tr.blk.USTAR()
|
||||
hdr.Uname = p.parseString(ustar.UserName())
|
||||
hdr.Gname = p.parseString(ustar.GroupName())
|
||||
if hdr.Typeflag == TypeChar || hdr.Typeflag == TypeBlock {
|
||||
hdr.Devmajor = p.parseNumeric(devmajor)
|
||||
hdr.Devminor = p.parseNumeric(devminor)
|
||||
hdr.Devmajor = p.parseNumeric(ustar.DevMajor())
|
||||
hdr.Devminor = p.parseNumeric(ustar.DevMinor())
|
||||
}
|
||||
|
||||
var prefix string
|
||||
switch format {
|
||||
case "posix", "gnu":
|
||||
prefix = p.parseString(s.next(155))
|
||||
case "star":
|
||||
prefix = p.parseString(s.next(131))
|
||||
hdr.AccessTime = time.Unix(p.parseNumeric(s.next(12)), 0)
|
||||
hdr.ChangeTime = time.Unix(p.parseNumeric(s.next(12)), 0)
|
||||
case formatUSTAR, formatGNU:
|
||||
// TODO(dsnet): Do not use the prefix field for the GNU format!
|
||||
// See golang.org/issues/12594
|
||||
ustar := tr.blk.USTAR()
|
||||
prefix = p.parseString(ustar.Prefix())
|
||||
case formatSTAR:
|
||||
star := tr.blk.STAR()
|
||||
prefix = p.parseString(star.Prefix())
|
||||
hdr.AccessTime = time.Unix(p.parseNumeric(star.AccessTime()), 0)
|
||||
hdr.ChangeTime = time.Unix(p.parseNumeric(star.ChangeTime()), 0)
|
||||
}
|
||||
if len(prefix) > 0 {
|
||||
hdr.Name = prefix + "/" + hdr.Name
|
||||
}
|
||||
}
|
||||
|
||||
if p.err != nil {
|
||||
tr.err = p.err
|
||||
return nil
|
||||
}
|
||||
|
||||
nb := hdr.Size
|
||||
if isHeaderOnlyType(hdr.Typeflag) {
|
||||
nb = 0
|
||||
|
@ -729,14 +687,14 @@ func (tr *Reader) readHeader() *Header {
|
|||
// Check for old GNU sparse format entry.
|
||||
if hdr.Typeflag == TypeGNUSparse {
|
||||
// Get the real size of the file.
|
||||
hdr.Size = p.parseNumeric(header[483:495])
|
||||
hdr.Size = p.parseNumeric(tr.blk.GNU().RealSize())
|
||||
if p.err != nil {
|
||||
tr.err = p.err
|
||||
return nil
|
||||
}
|
||||
|
||||
// Read the sparse map.
|
||||
sp := tr.readOldGNUSparseMap(header)
|
||||
sp := tr.readOldGNUSparseMap(&tr.blk)
|
||||
if tr.err != nil {
|
||||
return nil
|
||||
}
|
||||
|
@ -748,26 +706,24 @@ func (tr *Reader) readHeader() *Header {
|
|||
}
|
||||
}
|
||||
|
||||
if p.err != nil {
|
||||
tr.err = p.err
|
||||
return nil
|
||||
}
|
||||
|
||||
return hdr
|
||||
}
|
||||
|
||||
// readOldGNUSparseMap reads the sparse map as stored in the old GNU sparse format.
|
||||
// The sparse map is stored in the tar header if it's small enough. If it's larger than four entries,
|
||||
// then one or more extension headers are used to store the rest of the sparse map.
|
||||
func (tr *Reader) readOldGNUSparseMap(header []byte) []sparseEntry {
|
||||
func (tr *Reader) readOldGNUSparseMap(blk *block) []sparseEntry {
|
||||
var p parser
|
||||
isExtended := header[oldGNUSparseMainHeaderIsExtendedOffset] != 0
|
||||
spCap := oldGNUSparseMainHeaderNumEntries
|
||||
if isExtended {
|
||||
spCap += oldGNUSparseExtendedHeaderNumEntries
|
||||
}
|
||||
sp := make([]sparseEntry, 0, spCap)
|
||||
s := slicer(header[oldGNUSparseMainHeaderOffset:])
|
||||
|
||||
// Read the four entries from the main tar header
|
||||
for i := 0; i < oldGNUSparseMainHeaderNumEntries; i++ {
|
||||
offset := p.parseNumeric(s.next(oldGNUSparseOffsetSize))
|
||||
numBytes := p.parseNumeric(s.next(oldGNUSparseNumBytesSize))
|
||||
var s sparseArray = blk.GNU().Sparse()
|
||||
var sp = make([]sparseEntry, 0, s.MaxEntries())
|
||||
for i := 0; i < s.MaxEntries(); i++ {
|
||||
offset := p.parseOctal(s.Entry(i).Offset())
|
||||
numBytes := p.parseOctal(s.Entry(i).NumBytes())
|
||||
if p.err != nil {
|
||||
tr.err = p.err
|
||||
return nil
|
||||
|
@ -778,23 +734,23 @@ func (tr *Reader) readOldGNUSparseMap(header []byte) []sparseEntry {
|
|||
sp = append(sp, sparseEntry{offset: offset, numBytes: numBytes})
|
||||
}
|
||||
|
||||
for isExtended {
|
||||
for s.IsExtended()[0] > 0 {
|
||||
// There are more entries. Read an extension header and parse its entries.
|
||||
sparseHeader := make([]byte, blockSize)
|
||||
if _, tr.err = io.ReadFull(tr.r, sparseHeader); tr.err != nil {
|
||||
var blk block
|
||||
if _, tr.err = io.ReadFull(tr.r, blk[:]); tr.err != nil {
|
||||
return nil
|
||||
}
|
||||
if tr.RawAccounting {
|
||||
if _, tr.err = tr.rawBytes.Write(sparseHeader); tr.err != nil {
|
||||
if _, tr.err = tr.rawBytes.Write(blk[:]); tr.err != nil {
|
||||
return nil
|
||||
}
|
||||
}
|
||||
|
||||
isExtended = sparseHeader[oldGNUSparseExtendedHeaderIsExtendedOffset] != 0
|
||||
s = slicer(sparseHeader)
|
||||
for i := 0; i < oldGNUSparseExtendedHeaderNumEntries; i++ {
|
||||
offset := p.parseNumeric(s.next(oldGNUSparseOffsetSize))
|
||||
numBytes := p.parseNumeric(s.next(oldGNUSparseNumBytesSize))
|
||||
s = blk.Sparse()
|
||||
|
||||
for i := 0; i < s.MaxEntries(); i++ {
|
||||
offset := p.parseOctal(s.Entry(i).Offset())
|
||||
numBytes := p.parseOctal(s.Entry(i).NumBytes())
|
||||
if p.err != nil {
|
||||
tr.err = p.err
|
||||
return nil
|
||||
|
|
|
@ -37,9 +37,9 @@ type Writer struct {
|
|||
pad int64 // amount of padding to write after current file entry
|
||||
closed bool
|
||||
usedBinary bool // whether the binary numeric field extension was used
|
||||
preferPax bool // use pax header instead of binary numeric header
|
||||
hdrBuff [blockSize]byte // buffer to use in writeHeader when writing a regular header
|
||||
paxHdrBuff [blockSize]byte // buffer to use in writeHeader when writing a pax header
|
||||
preferPax bool // use PAX header instead of binary numeric header
|
||||
hdrBuff block // buffer to use in writeHeader when writing a regular header
|
||||
paxHdrBuff block // buffer to use in writeHeader when writing a PAX header
|
||||
}
|
||||
|
||||
type formatter struct {
|
||||
|
@ -153,27 +153,24 @@ func (tw *Writer) writeHeader(hdr *Header, allowPax bool) error {
|
|||
// a map to hold pax header records, if any are needed
|
||||
paxHeaders := make(map[string]string)
|
||||
|
||||
// TODO(shanemhansen): we might want to use PAX headers for
|
||||
// TODO(dsnet): we might want to use PAX headers for
|
||||
// subsecond time resolution, but for now let's just capture
|
||||
// too long fields or non ascii characters
|
||||
|
||||
var f formatter
|
||||
var header []byte
|
||||
|
||||
// We need to select which scratch buffer to use carefully,
|
||||
// since this method is called recursively to write PAX headers.
|
||||
// If allowPax is true, this is the non-recursive call, and we will use hdrBuff.
|
||||
// If allowPax is false, we are being called by writePAXHeader, and hdrBuff is
|
||||
// already being used by the non-recursive call, so we must use paxHdrBuff.
|
||||
header = tw.hdrBuff[:]
|
||||
header := &tw.hdrBuff
|
||||
if !allowPax {
|
||||
header = tw.paxHdrBuff[:]
|
||||
header = &tw.paxHdrBuff
|
||||
}
|
||||
copy(header, zeroBlock)
|
||||
s := slicer(header)
|
||||
copy(header[:], zeroBlock[:])
|
||||
|
||||
// Wrappers around formatter that automatically sets paxHeaders if the
|
||||
// argument extends beyond the capacity of the input byte slice.
|
||||
var f formatter
|
||||
var formatString = func(b []byte, s string, paxKeyword string) {
|
||||
needsPaxHeader := paxKeyword != paxNone && len(s) > len(b) || !isASCII(s)
|
||||
if needsPaxHeader {
|
||||
|
@ -202,44 +199,33 @@ func (tw *Writer) writeHeader(hdr *Header, allowPax bool) error {
|
|||
f.formatNumeric(b, x)
|
||||
}
|
||||
|
||||
// keep a reference to the filename to allow to overwrite it later if we detect that we can use ustar longnames instead of pax
|
||||
pathHeaderBytes := s.next(fileNameSize)
|
||||
|
||||
formatString(pathHeaderBytes, hdr.Name, paxPath)
|
||||
|
||||
// Handle out of range ModTime carefully.
|
||||
var modTime int64
|
||||
if !hdr.ModTime.Before(minTime) && !hdr.ModTime.After(maxTime) {
|
||||
modTime = hdr.ModTime.Unix()
|
||||
}
|
||||
|
||||
f.formatOctal(s.next(8), hdr.Mode) // 100:108
|
||||
formatNumeric(s.next(8), int64(hdr.Uid), paxUid) // 108:116
|
||||
formatNumeric(s.next(8), int64(hdr.Gid), paxGid) // 116:124
|
||||
formatNumeric(s.next(12), hdr.Size, paxSize) // 124:136
|
||||
formatNumeric(s.next(12), modTime, paxNone) // 136:148 --- consider using pax for finer granularity
|
||||
s.next(8) // chksum (148:156)
|
||||
s.next(1)[0] = hdr.Typeflag // 156:157
|
||||
v7 := header.V7()
|
||||
formatString(v7.Name(), hdr.Name, paxPath)
|
||||
// TODO(dsnet): The GNU format permits the mode field to be encoded in
|
||||
// base-256 format. Thus, we can use formatNumeric instead of formatOctal.
|
||||
f.formatOctal(v7.Mode(), hdr.Mode)
|
||||
formatNumeric(v7.UID(), int64(hdr.Uid), paxUid)
|
||||
formatNumeric(v7.GID(), int64(hdr.Gid), paxGid)
|
||||
formatNumeric(v7.Size(), hdr.Size, paxSize)
|
||||
// TODO(dsnet): Consider using PAX for finer time granularity.
|
||||
formatNumeric(v7.ModTime(), modTime, paxNone)
|
||||
v7.TypeFlag()[0] = hdr.Typeflag
|
||||
formatString(v7.LinkName(), hdr.Linkname, paxLinkpath)
|
||||
|
||||
formatString(s.next(100), hdr.Linkname, paxLinkpath)
|
||||
ustar := header.USTAR()
|
||||
formatString(ustar.UserName(), hdr.Uname, paxUname)
|
||||
formatString(ustar.GroupName(), hdr.Gname, paxGname)
|
||||
formatNumeric(ustar.DevMajor(), hdr.Devmajor, paxNone)
|
||||
formatNumeric(ustar.DevMinor(), hdr.Devminor, paxNone)
|
||||
|
||||
copy(s.next(8), []byte("ustar\x0000")) // 257:265
|
||||
formatString(s.next(32), hdr.Uname, paxUname) // 265:297
|
||||
formatString(s.next(32), hdr.Gname, paxGname) // 297:329
|
||||
formatNumeric(s.next(8), hdr.Devmajor, paxNone) // 329:337
|
||||
formatNumeric(s.next(8), hdr.Devminor, paxNone) // 337:345
|
||||
|
||||
// keep a reference to the prefix to allow to overwrite it later if we detect that we can use ustar longnames instead of pax
|
||||
prefixHeaderBytes := s.next(155)
|
||||
formatString(prefixHeaderBytes, "", paxNone) // 345:500 prefix
|
||||
|
||||
// Use the GNU magic instead of POSIX magic if we used any GNU extensions.
|
||||
if tw.usedBinary {
|
||||
copy(header[257:265], []byte("ustar \x00"))
|
||||
}
|
||||
|
||||
_, paxPathUsed := paxHeaders[paxPath]
|
||||
// try to use a ustar header when only the name is too long
|
||||
_, paxPathUsed := paxHeaders[paxPath]
|
||||
if !tw.preferPax && len(paxHeaders) == 1 && paxPathUsed {
|
||||
prefix, suffix, ok := splitUSTARPath(hdr.Name)
|
||||
if ok {
|
||||
|
@ -247,16 +233,16 @@ func (tw *Writer) writeHeader(hdr *Header, allowPax bool) error {
|
|||
delete(paxHeaders, paxPath)
|
||||
|
||||
// Update the path fields
|
||||
formatString(pathHeaderBytes, suffix, paxNone)
|
||||
formatString(prefixHeaderBytes, prefix, paxNone)
|
||||
formatString(v7.Name(), suffix, paxNone)
|
||||
formatString(ustar.Prefix(), prefix, paxNone)
|
||||
}
|
||||
}
|
||||
|
||||
// The chksum field is terminated by a NUL and a space.
|
||||
// This is different from the other octal fields.
|
||||
chksum, _ := checksum(header)
|
||||
f.formatOctal(header[148:155], chksum) // Never fails
|
||||
header[155] = ' '
|
||||
if tw.usedBinary {
|
||||
header.SetFormat(formatGNU)
|
||||
} else {
|
||||
header.SetFormat(formatUSTAR)
|
||||
}
|
||||
|
||||
// Check if there were any formatting errors.
|
||||
if f.err != nil {
|
||||
|
@ -278,10 +264,10 @@ func (tw *Writer) writeHeader(hdr *Header, allowPax bool) error {
|
|||
return err
|
||||
}
|
||||
}
|
||||
tw.nb = int64(hdr.Size)
|
||||
tw.nb = hdr.Size
|
||||
tw.pad = (blockSize - (tw.nb % blockSize)) % blockSize
|
||||
|
||||
_, tw.err = tw.w.Write(header)
|
||||
_, tw.err = tw.w.Write(header[:])
|
||||
return tw.err
|
||||
}
|
||||
|
||||
|
@ -289,10 +275,10 @@ func (tw *Writer) writeHeader(hdr *Header, allowPax bool) error {
|
|||
// If the path is not splittable, then it will return ("", "", false).
|
||||
func splitUSTARPath(name string) (prefix, suffix string, ok bool) {
|
||||
length := len(name)
|
||||
if length <= fileNameSize || !isASCII(name) {
|
||||
if length <= nameSize || !isASCII(name) {
|
||||
return "", "", false
|
||||
} else if length > fileNamePrefixSize+1 {
|
||||
length = fileNamePrefixSize + 1
|
||||
} else if length > prefixSize+1 {
|
||||
length = prefixSize + 1
|
||||
} else if name[length-1] == '/' {
|
||||
length--
|
||||
}
|
||||
|
@ -300,7 +286,7 @@ func splitUSTARPath(name string) (prefix, suffix string, ok bool) {
|
|||
i := strings.LastIndex(name[:length], "/")
|
||||
nlen := len(name) - i - 1 // nlen is length of suffix
|
||||
plen := i // plen is length of prefix
|
||||
if i <= 0 || nlen > fileNameSize || nlen == 0 || plen > fileNamePrefixSize {
|
||||
if i <= 0 || nlen > nameSize || nlen == 0 || plen > prefixSize {
|
||||
return "", "", false
|
||||
}
|
||||
return name[:i], name[i+1:], true
|
||||
|
@ -323,8 +309,8 @@ func (tw *Writer) writePAXHeader(hdr *Header, paxHeaders map[string]string) erro
|
|||
fullName := path.Join(dir, "PaxHeaders.0", file)
|
||||
|
||||
ascii := toASCII(fullName)
|
||||
if len(ascii) > 100 {
|
||||
ascii = ascii[:100]
|
||||
if len(ascii) > nameSize {
|
||||
ascii = ascii[:nameSize]
|
||||
}
|
||||
ext.Name = ascii
|
||||
// Construct the body
|
||||
|
@ -407,7 +393,7 @@ func (tw *Writer) Close() error {
|
|||
|
||||
// trailer: two zero blocks
|
||||
for i := 0; i < 2; i++ {
|
||||
_, tw.err = tw.w.Write(zeroBlock)
|
||||
_, tw.err = tw.w.Write(zeroBlock[:])
|
||||
if tw.err != nil {
|
||||
break
|
||||
}
|
||||
|
|
|
@ -587,17 +587,17 @@ func TestSplitUSTARPath(t *testing.T) {
|
|||
{"", "", "", false},
|
||||
{"abc", "", "", false},
|
||||
{"用戶名", "", "", false},
|
||||
{sr("a", fileNameSize), "", "", false},
|
||||
{sr("a", fileNameSize) + "/", "", "", false},
|
||||
{sr("a", fileNameSize) + "/a", sr("a", fileNameSize), "a", true},
|
||||
{sr("a", fileNamePrefixSize) + "/", "", "", false},
|
||||
{sr("a", fileNamePrefixSize) + "/a", sr("a", fileNamePrefixSize), "a", true},
|
||||
{sr("a", fileNameSize+1), "", "", false},
|
||||
{sr("/", fileNameSize+1), sr("/", fileNameSize-1), "/", true},
|
||||
{sr("a", fileNamePrefixSize) + "/" + sr("b", fileNameSize),
|
||||
sr("a", fileNamePrefixSize), sr("b", fileNameSize), true},
|
||||
{sr("a", fileNamePrefixSize) + "//" + sr("b", fileNameSize), "", "", false},
|
||||
{sr("a/", fileNameSize), sr("a/", 77) + "a", sr("a/", 22), true},
|
||||
{sr("a", nameSize), "", "", false},
|
||||
{sr("a", nameSize) + "/", "", "", false},
|
||||
{sr("a", nameSize) + "/a", sr("a", nameSize), "a", true},
|
||||
{sr("a", prefixSize) + "/", "", "", false},
|
||||
{sr("a", prefixSize) + "/a", sr("a", prefixSize), "a", true},
|
||||
{sr("a", nameSize+1), "", "", false},
|
||||
{sr("/", nameSize+1), sr("/", nameSize-1), "/", true},
|
||||
{sr("a", prefixSize) + "/" + sr("b", nameSize),
|
||||
sr("a", prefixSize), sr("b", nameSize), true},
|
||||
{sr("a", prefixSize) + "//" + sr("b", nameSize), "", "", false},
|
||||
{sr("a/", nameSize), sr("a/", 77) + "a", sr("a/", 22), true},
|
||||
}
|
||||
|
||||
for _, v := range vectors {
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue