diff --git a/archive/tar/common.go b/archive/tar/common.go index 36f4e23..2a1e432 100644 --- a/archive/tar/common.go +++ b/archive/tar/common.go @@ -21,10 +21,8 @@ import ( "time" ) +// Header type flags. const ( - blockSize = 512 - - // Types TypeReg = '0' // regular file TypeRegA = '\x00' // regular file TypeLink = '1' // hard link @@ -61,12 +59,6 @@ type Header struct { Xattrs map[string]string } -// File name constants from the tar spec. -const ( - fileNameSize = 100 // Maximum number of bytes in a standard tar name. - fileNamePrefixSize = 155 // Maximum number of ustar extension bytes. -) - // FileInfo returns an os.FileInfo for the Header. func (h *Header) FileInfo() os.FileInfo { return headerFileInfo{h} @@ -279,33 +271,6 @@ func FileInfoHeader(fi os.FileInfo, link string) (*Header, error) { return h, nil } -var zeroBlock = make([]byte, blockSize) - -// POSIX specifies a sum of the unsigned byte values, but the Sun tar uses signed byte values. -// We compute and return both. -func checksum(header []byte) (unsigned int64, signed int64) { - for i := 0; i < len(header); i++ { - if i == 148 { - // The chksum field (header[148:156]) is special: it should be treated as space bytes. - unsigned += ' ' * 8 - signed += ' ' * 8 - i += 7 - continue - } - unsigned += int64(header[i]) - signed += int64(int8(header[i])) - } - return -} - -type slicer []byte - -func (sp *slicer) next(n int) (b []byte) { - s := *sp - b, *sp = s[0:n], s[n:] - return -} - func isASCII(s string) bool { for _, c := range s { if c >= 0x80 { diff --git a/archive/tar/format.go b/archive/tar/format.go new file mode 100644 index 0000000..c2c9910 --- /dev/null +++ b/archive/tar/format.go @@ -0,0 +1,197 @@ +// Copyright 2016 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package tar + +// Constants to identify various tar formats. +const ( + // The format is unknown. + formatUnknown = (1 << iota) / 2 // Sequence of 0, 1, 2, 4, 8, etc... + + // The format of the original Unix V7 tar tool prior to standardization. + formatV7 + + // The old and new GNU formats, which are incompatible with USTAR. + // This does cover the old GNU sparse extension. + // This does not cover the GNU sparse extensions using PAX headers, + // versions 0.0, 0.1, and 1.0; these fall under the PAX format. + formatGNU + + // Schily's tar format, which is incompatible with USTAR. + // This does not cover STAR extensions to the PAX format; these fall under + // the PAX format. + formatSTAR + + // USTAR is the former standardization of tar defined in POSIX.1-1988. + // This is incompatible with the GNU and STAR formats. + formatUSTAR + + // PAX is the latest standardization of tar defined in POSIX.1-2001. + // This is an extension of USTAR and is "backwards compatible" with it. + // + // Some newer formats add their own extensions to PAX, such as GNU sparse + // files and SCHILY extended attributes. Since they are backwards compatible + // with PAX, they will be labelled as "PAX". + formatPAX +) + +// Magics used to identify various formats. +const ( + magicGNU, versionGNU = "ustar ", " \x00" + magicUSTAR, versionUSTAR = "ustar\x00", "00" + trailerSTAR = "tar\x00" +) + +// Size constants from various tar specifications. +const ( + blockSize = 512 // Size of each block in a tar stream + nameSize = 100 // Max length of the name field in USTAR format + prefixSize = 155 // Max length of the prefix field in USTAR format +) + +var zeroBlock block + +type block [blockSize]byte + +// Convert block to any number of formats. +func (b *block) V7() *headerV7 { return (*headerV7)(b) } +func (b *block) GNU() *headerGNU { return (*headerGNU)(b) } +func (b *block) STAR() *headerSTAR { return (*headerSTAR)(b) } +func (b *block) USTAR() *headerUSTAR { return (*headerUSTAR)(b) } +func (b *block) Sparse() sparseArray { return (sparseArray)(b[:]) } + +// GetFormat checks that the block is a valid tar header based on the checksum. +// It then attempts to guess the specific format based on magic values. +// If the checksum fails, then formatUnknown is returned. +func (b *block) GetFormat() (format int) { + // Verify checksum. + var p parser + value := p.parseOctal(b.V7().Chksum()) + chksum1, chksum2 := b.ComputeChecksum() + if p.err != nil || (value != chksum1 && value != chksum2) { + return formatUnknown + } + + // Guess the magic values. + magic := string(b.USTAR().Magic()) + version := string(b.USTAR().Version()) + trailer := string(b.STAR().Trailer()) + switch { + case magic == magicUSTAR && trailer == trailerSTAR: + return formatSTAR + case magic == magicUSTAR: + return formatUSTAR + case magic == magicGNU && version == versionGNU: + return formatGNU + default: + return formatV7 + } +} + +// SetFormat writes the magic values necessary for specified format +// and then updates the checksum accordingly. +func (b *block) SetFormat(format int) { + // Set the magic values. + switch format { + case formatV7: + // Do nothing. + case formatGNU: + copy(b.GNU().Magic(), magicGNU) + copy(b.GNU().Version(), versionGNU) + case formatSTAR: + copy(b.STAR().Magic(), magicUSTAR) + copy(b.STAR().Version(), versionUSTAR) + copy(b.STAR().Trailer(), trailerSTAR) + case formatUSTAR, formatPAX: + copy(b.USTAR().Magic(), magicUSTAR) + copy(b.USTAR().Version(), versionUSTAR) + default: + panic("invalid format") + } + + // Update checksum. + // This field is special in that it is terminated by a NULL then space. + var f formatter + field := b.V7().Chksum() + chksum, _ := b.ComputeChecksum() // Possible values are 256..128776 + f.formatOctal(field[:7], chksum) // Never fails since 128776 < 262143 + field[7] = ' ' +} + +// ComputeChecksum computes the checksum for the header block. +// POSIX specifies a sum of the unsigned byte values, but the Sun tar used +// signed byte values. +// We compute and return both. +func (b *block) ComputeChecksum() (unsigned, signed int64) { + for i, c := range b { + if 148 <= i && i < 156 { + c = ' ' // Treat the checksum field itself as all spaces. + } + unsigned += int64(uint8(c)) + signed += int64(int8(c)) + } + return unsigned, signed +} + +type headerV7 [blockSize]byte + +func (h *headerV7) Name() []byte { return h[000:][:100] } +func (h *headerV7) Mode() []byte { return h[100:][:8] } +func (h *headerV7) UID() []byte { return h[108:][:8] } +func (h *headerV7) GID() []byte { return h[116:][:8] } +func (h *headerV7) Size() []byte { return h[124:][:12] } +func (h *headerV7) ModTime() []byte { return h[136:][:12] } +func (h *headerV7) Chksum() []byte { return h[148:][:8] } +func (h *headerV7) TypeFlag() []byte { return h[156:][:1] } +func (h *headerV7) LinkName() []byte { return h[157:][:100] } + +type headerGNU [blockSize]byte + +func (h *headerGNU) V7() *headerV7 { return (*headerV7)(h) } +func (h *headerGNU) Magic() []byte { return h[257:][:6] } +func (h *headerGNU) Version() []byte { return h[263:][:2] } +func (h *headerGNU) UserName() []byte { return h[265:][:32] } +func (h *headerGNU) GroupName() []byte { return h[297:][:32] } +func (h *headerGNU) DevMajor() []byte { return h[329:][:8] } +func (h *headerGNU) DevMinor() []byte { return h[337:][:8] } +func (h *headerGNU) AccessTime() []byte { return h[345:][:12] } +func (h *headerGNU) ChangeTime() []byte { return h[357:][:12] } +func (h *headerGNU) Sparse() sparseArray { return (sparseArray)(h[386:][:24*4+1]) } +func (h *headerGNU) RealSize() []byte { return h[483:][:12] } + +type headerSTAR [blockSize]byte + +func (h *headerSTAR) V7() *headerV7 { return (*headerV7)(h) } +func (h *headerSTAR) Magic() []byte { return h[257:][:6] } +func (h *headerSTAR) Version() []byte { return h[263:][:2] } +func (h *headerSTAR) UserName() []byte { return h[265:][:32] } +func (h *headerSTAR) GroupName() []byte { return h[297:][:32] } +func (h *headerSTAR) DevMajor() []byte { return h[329:][:8] } +func (h *headerSTAR) DevMinor() []byte { return h[337:][:8] } +func (h *headerSTAR) Prefix() []byte { return h[345:][:131] } +func (h *headerSTAR) AccessTime() []byte { return h[476:][:12] } +func (h *headerSTAR) ChangeTime() []byte { return h[488:][:12] } +func (h *headerSTAR) Trailer() []byte { return h[508:][:4] } + +type headerUSTAR [blockSize]byte + +func (h *headerUSTAR) V7() *headerV7 { return (*headerV7)(h) } +func (h *headerUSTAR) Magic() []byte { return h[257:][:6] } +func (h *headerUSTAR) Version() []byte { return h[263:][:2] } +func (h *headerUSTAR) UserName() []byte { return h[265:][:32] } +func (h *headerUSTAR) GroupName() []byte { return h[297:][:32] } +func (h *headerUSTAR) DevMajor() []byte { return h[329:][:8] } +func (h *headerUSTAR) DevMinor() []byte { return h[337:][:8] } +func (h *headerUSTAR) Prefix() []byte { return h[345:][:155] } + +type sparseArray []byte + +func (s sparseArray) Entry(i int) sparseNode { return (sparseNode)(s[i*24:]) } +func (s sparseArray) IsExtended() []byte { return s[24*s.MaxEntries():][:1] } +func (s sparseArray) MaxEntries() int { return len(s) / 24 } + +type sparseNode []byte + +func (s sparseNode) Offset() []byte { return s[00:][:12] } +func (s sparseNode) NumBytes() []byte { return s[12:][:12] } diff --git a/archive/tar/reader.go b/archive/tar/reader.go index 3140a4f..5649ada 100644 --- a/archive/tar/reader.go +++ b/archive/tar/reader.go @@ -29,11 +29,11 @@ const maxNanoSecondIntSize = 9 // The Next method advances to the next file in the archive (including the first), // and then it can be treated as an io.Reader to access the file's data. type Reader struct { - r io.Reader - err error - pad int64 // amount of padding (ignored) after current file entry - curr numBytesReader // reader for current file entry - hdrBuff [blockSize]byte // buffer to use in readHeader + r io.Reader + err error + pad int64 // amount of padding (ignored) after current file entry + curr numBytesReader // reader for current file entry + blk block // buffer to use as temporary local storage RawAccounting bool // Whether to enable the access needed to reassemble the tar from raw bytes. Some performance/memory hit for this. rawBytes *bytes.Buffer // last raw bits @@ -119,17 +119,6 @@ const ( paxGNUSparseRealSize = "GNU.sparse.realsize" ) -// Keywords for old GNU sparse headers -const ( - oldGNUSparseMainHeaderOffset = 386 - oldGNUSparseMainHeaderIsExtendedOffset = 482 - oldGNUSparseMainHeaderNumEntries = 4 - oldGNUSparseExtendedHeaderIsExtendedOffset = 504 - oldGNUSparseExtendedHeaderNumEntries = 21 - oldGNUSparseOffsetSize = 12 - oldGNUSparseNumBytesSize = 12 -) - // NewReader creates a new Reader reading from r. func NewReader(r io.Reader) *Reader { return &Reader{r: r} } @@ -585,17 +574,6 @@ func (tr *Reader) skipUnread() error { return tr.err } -func (tr *Reader) verifyChecksum(header []byte) bool { - if tr.err != nil { - return false - } - - var p parser - given := p.parseOctal(header[148:156]) - unsigned, signed := checksum(header) - return p.err == nil && (given == unsigned || given == signed) -} - // readHeader reads the next block header and assumes that the underlying reader // is already aligned to a block boundary. // @@ -604,13 +582,10 @@ func (tr *Reader) verifyChecksum(header []byte) bool { // * Exactly 1 block of zeros is read and EOF is hit. // * At least 2 blocks of zeros are read. func (tr *Reader) readHeader() *Header { - header := tr.hdrBuff[:] - copy(header, zeroBlock) - - if _, tr.err = io.ReadFull(tr.r, header); tr.err != nil { + if _, tr.err = io.ReadFull(tr.r, tr.blk[:]); tr.err != nil { // because it could read some of the block, but reach EOF first if tr.err == io.EOF && tr.RawAccounting { - if _, err := tr.rawBytes.Write(header); err != nil { + if _, err := tr.rawBytes.Write(tr.blk[:]); err != nil { tr.err = err return nil } @@ -618,28 +593,28 @@ func (tr *Reader) readHeader() *Header { return nil // io.EOF is okay here } if tr.RawAccounting { - if _, tr.err = tr.rawBytes.Write(header); tr.err != nil { + if _, tr.err = tr.rawBytes.Write(tr.blk[:]); tr.err != nil { return nil } } // Two blocks of zero bytes marks the end of the archive. - if bytes.Equal(header, zeroBlock[0:blockSize]) { - if _, tr.err = io.ReadFull(tr.r, header); tr.err != nil { + if bytes.Equal(tr.blk[:], zeroBlock[:]) { + if _, tr.err = io.ReadFull(tr.r, tr.blk[:]); tr.err != nil { // because it could read some of the block, but reach EOF first if tr.err == io.EOF && tr.RawAccounting { - if _, tr.err = tr.rawBytes.Write(header); tr.err != nil { + if _, tr.err = tr.rawBytes.Write(tr.blk[:]); tr.err != nil { return nil } } return nil // io.EOF is okay here } if tr.RawAccounting { - if _, tr.err = tr.rawBytes.Write(header); tr.err != nil { + if _, tr.err = tr.rawBytes.Write(tr.blk[:]); tr.err != nil { return nil } } - if bytes.Equal(header, zeroBlock[0:blockSize]) { + if bytes.Equal(tr.blk[:], zeroBlock[:]) { tr.err = io.EOF } else { tr.err = ErrHeader // zero block and then non-zero block @@ -647,71 +622,55 @@ func (tr *Reader) readHeader() *Header { return nil } - if !tr.verifyChecksum(header) { + // Verify the header matches a known format. + format := tr.blk.GetFormat() + if format == formatUnknown { tr.err = ErrHeader return nil } - // Unpack var p parser hdr := new(Header) - s := slicer(header) - hdr.Name = p.parseString(s.next(100)) - hdr.Mode = p.parseNumeric(s.next(8)) - hdr.Uid = int(p.parseNumeric(s.next(8))) - hdr.Gid = int(p.parseNumeric(s.next(8))) - hdr.Size = p.parseNumeric(s.next(12)) - hdr.ModTime = time.Unix(p.parseNumeric(s.next(12)), 0) - s.next(8) // chksum - hdr.Typeflag = s.next(1)[0] - hdr.Linkname = p.parseString(s.next(100)) + // Unpack the V7 header. + v7 := tr.blk.V7() + hdr.Name = p.parseString(v7.Name()) + hdr.Mode = p.parseNumeric(v7.Mode()) + hdr.Uid = int(p.parseNumeric(v7.UID())) + hdr.Gid = int(p.parseNumeric(v7.GID())) + hdr.Size = p.parseNumeric(v7.Size()) + hdr.ModTime = time.Unix(p.parseNumeric(v7.ModTime()), 0) + hdr.Typeflag = v7.TypeFlag()[0] + hdr.Linkname = p.parseString(v7.LinkName()) - // The remainder of the header depends on the value of magic. - // The original (v7) version of tar had no explicit magic field, - // so its magic bytes, like the rest of the block, are NULs. - magic := string(s.next(8)) // contains version field as well. - var format string - switch { - case magic[:6] == "ustar\x00": // POSIX tar (1003.1-1988) - if string(header[508:512]) == "tar\x00" { - format = "star" - } else { - format = "posix" - } - case magic == "ustar \x00": // old GNU tar - format = "gnu" - } - - switch format { - case "posix", "gnu", "star": - hdr.Uname = p.parseString(s.next(32)) - hdr.Gname = p.parseString(s.next(32)) - devmajor := s.next(8) - devminor := s.next(8) + // Unpack format specific fields. + if format > formatV7 { + ustar := tr.blk.USTAR() + hdr.Uname = p.parseString(ustar.UserName()) + hdr.Gname = p.parseString(ustar.GroupName()) if hdr.Typeflag == TypeChar || hdr.Typeflag == TypeBlock { - hdr.Devmajor = p.parseNumeric(devmajor) - hdr.Devminor = p.parseNumeric(devminor) + hdr.Devmajor = p.parseNumeric(ustar.DevMajor()) + hdr.Devminor = p.parseNumeric(ustar.DevMinor()) } + var prefix string switch format { - case "posix", "gnu": - prefix = p.parseString(s.next(155)) - case "star": - prefix = p.parseString(s.next(131)) - hdr.AccessTime = time.Unix(p.parseNumeric(s.next(12)), 0) - hdr.ChangeTime = time.Unix(p.parseNumeric(s.next(12)), 0) + case formatUSTAR, formatGNU: + // TODO(dsnet): Do not use the prefix field for the GNU format! + // See golang.org/issues/12594 + ustar := tr.blk.USTAR() + prefix = p.parseString(ustar.Prefix()) + case formatSTAR: + star := tr.blk.STAR() + prefix = p.parseString(star.Prefix()) + hdr.AccessTime = time.Unix(p.parseNumeric(star.AccessTime()), 0) + hdr.ChangeTime = time.Unix(p.parseNumeric(star.ChangeTime()), 0) } if len(prefix) > 0 { hdr.Name = prefix + "/" + hdr.Name } } - if p.err != nil { - tr.err = p.err - return nil - } - nb := hdr.Size if isHeaderOnlyType(hdr.Typeflag) { nb = 0 @@ -728,14 +687,14 @@ func (tr *Reader) readHeader() *Header { // Check for old GNU sparse format entry. if hdr.Typeflag == TypeGNUSparse { // Get the real size of the file. - hdr.Size = p.parseNumeric(header[483:495]) + hdr.Size = p.parseNumeric(tr.blk.GNU().RealSize()) if p.err != nil { tr.err = p.err return nil } // Read the sparse map. - sp := tr.readOldGNUSparseMap(header) + sp := tr.readOldGNUSparseMap(&tr.blk) if tr.err != nil { return nil } @@ -747,26 +706,24 @@ func (tr *Reader) readHeader() *Header { } } + if p.err != nil { + tr.err = p.err + return nil + } + return hdr } // readOldGNUSparseMap reads the sparse map as stored in the old GNU sparse format. // The sparse map is stored in the tar header if it's small enough. If it's larger than four entries, // then one or more extension headers are used to store the rest of the sparse map. -func (tr *Reader) readOldGNUSparseMap(header []byte) []sparseEntry { +func (tr *Reader) readOldGNUSparseMap(blk *block) []sparseEntry { var p parser - isExtended := header[oldGNUSparseMainHeaderIsExtendedOffset] != 0 - spCap := oldGNUSparseMainHeaderNumEntries - if isExtended { - spCap += oldGNUSparseExtendedHeaderNumEntries - } - sp := make([]sparseEntry, 0, spCap) - s := slicer(header[oldGNUSparseMainHeaderOffset:]) - - // Read the four entries from the main tar header - for i := 0; i < oldGNUSparseMainHeaderNumEntries; i++ { - offset := p.parseNumeric(s.next(oldGNUSparseOffsetSize)) - numBytes := p.parseNumeric(s.next(oldGNUSparseNumBytesSize)) + var s sparseArray = blk.GNU().Sparse() + var sp = make([]sparseEntry, 0, s.MaxEntries()) + for i := 0; i < s.MaxEntries(); i++ { + offset := p.parseOctal(s.Entry(i).Offset()) + numBytes := p.parseOctal(s.Entry(i).NumBytes()) if p.err != nil { tr.err = p.err return nil @@ -777,23 +734,23 @@ func (tr *Reader) readOldGNUSparseMap(header []byte) []sparseEntry { sp = append(sp, sparseEntry{offset: offset, numBytes: numBytes}) } - for isExtended { + for s.IsExtended()[0] > 0 { // There are more entries. Read an extension header and parse its entries. - sparseHeader := make([]byte, blockSize) - if _, tr.err = io.ReadFull(tr.r, sparseHeader); tr.err != nil { + var blk block + if _, tr.err = io.ReadFull(tr.r, blk[:]); tr.err != nil { return nil } if tr.RawAccounting { - if _, tr.err = tr.rawBytes.Write(sparseHeader); tr.err != nil { + if _, tr.err = tr.rawBytes.Write(blk[:]); tr.err != nil { return nil } } - isExtended = sparseHeader[oldGNUSparseExtendedHeaderIsExtendedOffset] != 0 - s = slicer(sparseHeader) - for i := 0; i < oldGNUSparseExtendedHeaderNumEntries; i++ { - offset := p.parseNumeric(s.next(oldGNUSparseOffsetSize)) - numBytes := p.parseNumeric(s.next(oldGNUSparseNumBytesSize)) + s = blk.Sparse() + + for i := 0; i < s.MaxEntries(); i++ { + offset := p.parseOctal(s.Entry(i).Offset()) + numBytes := p.parseOctal(s.Entry(i).NumBytes()) if p.err != nil { tr.err = p.err return nil diff --git a/archive/tar/writer.go b/archive/tar/writer.go index 944b2d4..426e443 100644 --- a/archive/tar/writer.go +++ b/archive/tar/writer.go @@ -36,10 +36,10 @@ type Writer struct { nb int64 // number of unwritten bytes for current file entry pad int64 // amount of padding to write after current file entry closed bool - usedBinary bool // whether the binary numeric field extension was used - preferPax bool // use pax header instead of binary numeric header - hdrBuff [blockSize]byte // buffer to use in writeHeader when writing a regular header - paxHdrBuff [blockSize]byte // buffer to use in writeHeader when writing a pax header + usedBinary bool // whether the binary numeric field extension was used + preferPax bool // use PAX header instead of binary numeric header + hdrBuff block // buffer to use in writeHeader when writing a regular header + paxHdrBuff block // buffer to use in writeHeader when writing a PAX header } type formatter struct { @@ -153,27 +153,24 @@ func (tw *Writer) writeHeader(hdr *Header, allowPax bool) error { // a map to hold pax header records, if any are needed paxHeaders := make(map[string]string) - // TODO(shanemhansen): we might want to use PAX headers for + // TODO(dsnet): we might want to use PAX headers for // subsecond time resolution, but for now let's just capture // too long fields or non ascii characters - var f formatter - var header []byte - // We need to select which scratch buffer to use carefully, // since this method is called recursively to write PAX headers. // If allowPax is true, this is the non-recursive call, and we will use hdrBuff. // If allowPax is false, we are being called by writePAXHeader, and hdrBuff is // already being used by the non-recursive call, so we must use paxHdrBuff. - header = tw.hdrBuff[:] + header := &tw.hdrBuff if !allowPax { - header = tw.paxHdrBuff[:] + header = &tw.paxHdrBuff } - copy(header, zeroBlock) - s := slicer(header) + copy(header[:], zeroBlock[:]) // Wrappers around formatter that automatically sets paxHeaders if the // argument extends beyond the capacity of the input byte slice. + var f formatter var formatString = func(b []byte, s string, paxKeyword string) { needsPaxHeader := paxKeyword != paxNone && len(s) > len(b) || !isASCII(s) if needsPaxHeader { @@ -202,44 +199,33 @@ func (tw *Writer) writeHeader(hdr *Header, allowPax bool) error { f.formatNumeric(b, x) } - // keep a reference to the filename to allow to overwrite it later if we detect that we can use ustar longnames instead of pax - pathHeaderBytes := s.next(fileNameSize) - - formatString(pathHeaderBytes, hdr.Name, paxPath) - // Handle out of range ModTime carefully. var modTime int64 if !hdr.ModTime.Before(minTime) && !hdr.ModTime.After(maxTime) { modTime = hdr.ModTime.Unix() } - f.formatOctal(s.next(8), hdr.Mode) // 100:108 - formatNumeric(s.next(8), int64(hdr.Uid), paxUid) // 108:116 - formatNumeric(s.next(8), int64(hdr.Gid), paxGid) // 116:124 - formatNumeric(s.next(12), hdr.Size, paxSize) // 124:136 - formatNumeric(s.next(12), modTime, paxNone) // 136:148 --- consider using pax for finer granularity - s.next(8) // chksum (148:156) - s.next(1)[0] = hdr.Typeflag // 156:157 + v7 := header.V7() + formatString(v7.Name(), hdr.Name, paxPath) + // TODO(dsnet): The GNU format permits the mode field to be encoded in + // base-256 format. Thus, we can use formatNumeric instead of formatOctal. + f.formatOctal(v7.Mode(), hdr.Mode) + formatNumeric(v7.UID(), int64(hdr.Uid), paxUid) + formatNumeric(v7.GID(), int64(hdr.Gid), paxGid) + formatNumeric(v7.Size(), hdr.Size, paxSize) + // TODO(dsnet): Consider using PAX for finer time granularity. + formatNumeric(v7.ModTime(), modTime, paxNone) + v7.TypeFlag()[0] = hdr.Typeflag + formatString(v7.LinkName(), hdr.Linkname, paxLinkpath) - formatString(s.next(100), hdr.Linkname, paxLinkpath) + ustar := header.USTAR() + formatString(ustar.UserName(), hdr.Uname, paxUname) + formatString(ustar.GroupName(), hdr.Gname, paxGname) + formatNumeric(ustar.DevMajor(), hdr.Devmajor, paxNone) + formatNumeric(ustar.DevMinor(), hdr.Devminor, paxNone) - copy(s.next(8), []byte("ustar\x0000")) // 257:265 - formatString(s.next(32), hdr.Uname, paxUname) // 265:297 - formatString(s.next(32), hdr.Gname, paxGname) // 297:329 - formatNumeric(s.next(8), hdr.Devmajor, paxNone) // 329:337 - formatNumeric(s.next(8), hdr.Devminor, paxNone) // 337:345 - - // keep a reference to the prefix to allow to overwrite it later if we detect that we can use ustar longnames instead of pax - prefixHeaderBytes := s.next(155) - formatString(prefixHeaderBytes, "", paxNone) // 345:500 prefix - - // Use the GNU magic instead of POSIX magic if we used any GNU extensions. - if tw.usedBinary { - copy(header[257:265], []byte("ustar \x00")) - } - - _, paxPathUsed := paxHeaders[paxPath] // try to use a ustar header when only the name is too long + _, paxPathUsed := paxHeaders[paxPath] if !tw.preferPax && len(paxHeaders) == 1 && paxPathUsed { prefix, suffix, ok := splitUSTARPath(hdr.Name) if ok { @@ -247,16 +233,16 @@ func (tw *Writer) writeHeader(hdr *Header, allowPax bool) error { delete(paxHeaders, paxPath) // Update the path fields - formatString(pathHeaderBytes, suffix, paxNone) - formatString(prefixHeaderBytes, prefix, paxNone) + formatString(v7.Name(), suffix, paxNone) + formatString(ustar.Prefix(), prefix, paxNone) } } - // The chksum field is terminated by a NUL and a space. - // This is different from the other octal fields. - chksum, _ := checksum(header) - f.formatOctal(header[148:155], chksum) // Never fails - header[155] = ' ' + if tw.usedBinary { + header.SetFormat(formatGNU) + } else { + header.SetFormat(formatUSTAR) + } // Check if there were any formatting errors. if f.err != nil { @@ -281,7 +267,7 @@ func (tw *Writer) writeHeader(hdr *Header, allowPax bool) error { tw.nb = hdr.Size tw.pad = (blockSize - (tw.nb % blockSize)) % blockSize - _, tw.err = tw.w.Write(header) + _, tw.err = tw.w.Write(header[:]) return tw.err } @@ -289,10 +275,10 @@ func (tw *Writer) writeHeader(hdr *Header, allowPax bool) error { // If the path is not splittable, then it will return ("", "", false). func splitUSTARPath(name string) (prefix, suffix string, ok bool) { length := len(name) - if length <= fileNameSize || !isASCII(name) { + if length <= nameSize || !isASCII(name) { return "", "", false - } else if length > fileNamePrefixSize+1 { - length = fileNamePrefixSize + 1 + } else if length > prefixSize+1 { + length = prefixSize + 1 } else if name[length-1] == '/' { length-- } @@ -300,7 +286,7 @@ func splitUSTARPath(name string) (prefix, suffix string, ok bool) { i := strings.LastIndex(name[:length], "/") nlen := len(name) - i - 1 // nlen is length of suffix plen := i // plen is length of prefix - if i <= 0 || nlen > fileNameSize || nlen == 0 || plen > fileNamePrefixSize { + if i <= 0 || nlen > nameSize || nlen == 0 || plen > prefixSize { return "", "", false } return name[:i], name[i+1:], true @@ -323,8 +309,8 @@ func (tw *Writer) writePAXHeader(hdr *Header, paxHeaders map[string]string) erro fullName := path.Join(dir, "PaxHeaders.0", file) ascii := toASCII(fullName) - if len(ascii) > 100 { - ascii = ascii[:100] + if len(ascii) > nameSize { + ascii = ascii[:nameSize] } ext.Name = ascii // Construct the body @@ -407,7 +393,7 @@ func (tw *Writer) Close() error { // trailer: two zero blocks for i := 0; i < 2; i++ { - _, tw.err = tw.w.Write(zeroBlock) + _, tw.err = tw.w.Write(zeroBlock[:]) if tw.err != nil { break } diff --git a/archive/tar/writer_test.go b/archive/tar/writer_test.go index 6e91d90..27aa8e5 100644 --- a/archive/tar/writer_test.go +++ b/archive/tar/writer_test.go @@ -587,17 +587,17 @@ func TestSplitUSTARPath(t *testing.T) { {"", "", "", false}, {"abc", "", "", false}, {"用戶名", "", "", false}, - {sr("a", fileNameSize), "", "", false}, - {sr("a", fileNameSize) + "/", "", "", false}, - {sr("a", fileNameSize) + "/a", sr("a", fileNameSize), "a", true}, - {sr("a", fileNamePrefixSize) + "/", "", "", false}, - {sr("a", fileNamePrefixSize) + "/a", sr("a", fileNamePrefixSize), "a", true}, - {sr("a", fileNameSize+1), "", "", false}, - {sr("/", fileNameSize+1), sr("/", fileNameSize-1), "/", true}, - {sr("a", fileNamePrefixSize) + "/" + sr("b", fileNameSize), - sr("a", fileNamePrefixSize), sr("b", fileNameSize), true}, - {sr("a", fileNamePrefixSize) + "//" + sr("b", fileNameSize), "", "", false}, - {sr("a/", fileNameSize), sr("a/", 77) + "a", sr("a/", 22), true}, + {sr("a", nameSize), "", "", false}, + {sr("a", nameSize) + "/", "", "", false}, + {sr("a", nameSize) + "/a", sr("a", nameSize), "a", true}, + {sr("a", prefixSize) + "/", "", "", false}, + {sr("a", prefixSize) + "/a", sr("a", prefixSize), "a", true}, + {sr("a", nameSize+1), "", "", false}, + {sr("/", nameSize+1), sr("/", nameSize-1), "/", true}, + {sr("a", prefixSize) + "/" + sr("b", nameSize), + sr("a", prefixSize), sr("b", nameSize), true}, + {sr("a", prefixSize) + "//" + sr("b", nameSize), "", "", false}, + {sr("a/", nameSize), sr("a/", 77) + "a", sr("a/", 22), true}, } for _, v := range vectors {