1
0
Fork 0
forked from mirrors/tar-split

archive/tar: centralize all information about tar header format

The Reader and Writer have hard-coded constants regarding the
offsets and lengths of certain fields in the tar format sprinkled
all over. This makes it harder to verify that the offsets are
correct since a reviewer would need to search for them throughout
the code. Instead, all information about the layout of header
fields should be centralized in one single file. This has the
advantage of being both centralized, and also acting as a form
of documentation about the header struct format.

This method was chosen over using "encoding/binary" since that
method would cause an allocation of a header struct every time
binary.Read was called. This method causes zero allocations and
its logic is no longer than if structs were declared.

Updates #12594

Change-Id: Ic7a0565d2a2cd95d955547ace3b6dea2b57fab34
Reviewed-on: https://go-review.googlesource.com/14669
Reviewed-by: Brad Fitzpatrick <bradfitz@golang.org>
Signed-off-by: Vincent Batts <vbatts@hashbangbash.com>
This commit is contained in:
Joe Tsai 2015-09-17 00:22:56 -07:00 committed by Vincent Batts
parent 5753de3a3b
commit 2c3c708698
5 changed files with 317 additions and 212 deletions

View file

@ -36,10 +36,10 @@ type Writer struct {
nb int64 // number of unwritten bytes for current file entry
pad int64 // amount of padding to write after current file entry
closed bool
usedBinary bool // whether the binary numeric field extension was used
preferPax bool // use pax header instead of binary numeric header
hdrBuff [blockSize]byte // buffer to use in writeHeader when writing a regular header
paxHdrBuff [blockSize]byte // buffer to use in writeHeader when writing a pax header
usedBinary bool // whether the binary numeric field extension was used
preferPax bool // use PAX header instead of binary numeric header
hdrBuff block // buffer to use in writeHeader when writing a regular header
paxHdrBuff block // buffer to use in writeHeader when writing a PAX header
}
type formatter struct {
@ -153,27 +153,24 @@ func (tw *Writer) writeHeader(hdr *Header, allowPax bool) error {
// a map to hold pax header records, if any are needed
paxHeaders := make(map[string]string)
// TODO(shanemhansen): we might want to use PAX headers for
// TODO(dsnet): we might want to use PAX headers for
// subsecond time resolution, but for now let's just capture
// too long fields or non ascii characters
var f formatter
var header []byte
// We need to select which scratch buffer to use carefully,
// since this method is called recursively to write PAX headers.
// If allowPax is true, this is the non-recursive call, and we will use hdrBuff.
// If allowPax is false, we are being called by writePAXHeader, and hdrBuff is
// already being used by the non-recursive call, so we must use paxHdrBuff.
header = tw.hdrBuff[:]
header := &tw.hdrBuff
if !allowPax {
header = tw.paxHdrBuff[:]
header = &tw.paxHdrBuff
}
copy(header, zeroBlock)
s := slicer(header)
copy(header[:], zeroBlock[:])
// Wrappers around formatter that automatically sets paxHeaders if the
// argument extends beyond the capacity of the input byte slice.
var f formatter
var formatString = func(b []byte, s string, paxKeyword string) {
needsPaxHeader := paxKeyword != paxNone && len(s) > len(b) || !isASCII(s)
if needsPaxHeader {
@ -202,44 +199,33 @@ func (tw *Writer) writeHeader(hdr *Header, allowPax bool) error {
f.formatNumeric(b, x)
}
// keep a reference to the filename to allow to overwrite it later if we detect that we can use ustar longnames instead of pax
pathHeaderBytes := s.next(fileNameSize)
formatString(pathHeaderBytes, hdr.Name, paxPath)
// Handle out of range ModTime carefully.
var modTime int64
if !hdr.ModTime.Before(minTime) && !hdr.ModTime.After(maxTime) {
modTime = hdr.ModTime.Unix()
}
f.formatOctal(s.next(8), hdr.Mode) // 100:108
formatNumeric(s.next(8), int64(hdr.Uid), paxUid) // 108:116
formatNumeric(s.next(8), int64(hdr.Gid), paxGid) // 116:124
formatNumeric(s.next(12), hdr.Size, paxSize) // 124:136
formatNumeric(s.next(12), modTime, paxNone) // 136:148 --- consider using pax for finer granularity
s.next(8) // chksum (148:156)
s.next(1)[0] = hdr.Typeflag // 156:157
v7 := header.V7()
formatString(v7.Name(), hdr.Name, paxPath)
// TODO(dsnet): The GNU format permits the mode field to be encoded in
// base-256 format. Thus, we can use formatNumeric instead of formatOctal.
f.formatOctal(v7.Mode(), hdr.Mode)
formatNumeric(v7.UID(), int64(hdr.Uid), paxUid)
formatNumeric(v7.GID(), int64(hdr.Gid), paxGid)
formatNumeric(v7.Size(), hdr.Size, paxSize)
// TODO(dsnet): Consider using PAX for finer time granularity.
formatNumeric(v7.ModTime(), modTime, paxNone)
v7.TypeFlag()[0] = hdr.Typeflag
formatString(v7.LinkName(), hdr.Linkname, paxLinkpath)
formatString(s.next(100), hdr.Linkname, paxLinkpath)
ustar := header.USTAR()
formatString(ustar.UserName(), hdr.Uname, paxUname)
formatString(ustar.GroupName(), hdr.Gname, paxGname)
formatNumeric(ustar.DevMajor(), hdr.Devmajor, paxNone)
formatNumeric(ustar.DevMinor(), hdr.Devminor, paxNone)
copy(s.next(8), []byte("ustar\x0000")) // 257:265
formatString(s.next(32), hdr.Uname, paxUname) // 265:297
formatString(s.next(32), hdr.Gname, paxGname) // 297:329
formatNumeric(s.next(8), hdr.Devmajor, paxNone) // 329:337
formatNumeric(s.next(8), hdr.Devminor, paxNone) // 337:345
// keep a reference to the prefix to allow to overwrite it later if we detect that we can use ustar longnames instead of pax
prefixHeaderBytes := s.next(155)
formatString(prefixHeaderBytes, "", paxNone) // 345:500 prefix
// Use the GNU magic instead of POSIX magic if we used any GNU extensions.
if tw.usedBinary {
copy(header[257:265], []byte("ustar \x00"))
}
_, paxPathUsed := paxHeaders[paxPath]
// try to use a ustar header when only the name is too long
_, paxPathUsed := paxHeaders[paxPath]
if !tw.preferPax && len(paxHeaders) == 1 && paxPathUsed {
prefix, suffix, ok := splitUSTARPath(hdr.Name)
if ok {
@ -247,16 +233,16 @@ func (tw *Writer) writeHeader(hdr *Header, allowPax bool) error {
delete(paxHeaders, paxPath)
// Update the path fields
formatString(pathHeaderBytes, suffix, paxNone)
formatString(prefixHeaderBytes, prefix, paxNone)
formatString(v7.Name(), suffix, paxNone)
formatString(ustar.Prefix(), prefix, paxNone)
}
}
// The chksum field is terminated by a NUL and a space.
// This is different from the other octal fields.
chksum, _ := checksum(header)
f.formatOctal(header[148:155], chksum) // Never fails
header[155] = ' '
if tw.usedBinary {
header.SetFormat(formatGNU)
} else {
header.SetFormat(formatUSTAR)
}
// Check if there were any formatting errors.
if f.err != nil {
@ -281,7 +267,7 @@ func (tw *Writer) writeHeader(hdr *Header, allowPax bool) error {
tw.nb = hdr.Size
tw.pad = (blockSize - (tw.nb % blockSize)) % blockSize
_, tw.err = tw.w.Write(header)
_, tw.err = tw.w.Write(header[:])
return tw.err
}
@ -289,10 +275,10 @@ func (tw *Writer) writeHeader(hdr *Header, allowPax bool) error {
// If the path is not splittable, then it will return ("", "", false).
func splitUSTARPath(name string) (prefix, suffix string, ok bool) {
length := len(name)
if length <= fileNameSize || !isASCII(name) {
if length <= nameSize || !isASCII(name) {
return "", "", false
} else if length > fileNamePrefixSize+1 {
length = fileNamePrefixSize + 1
} else if length > prefixSize+1 {
length = prefixSize + 1
} else if name[length-1] == '/' {
length--
}
@ -300,7 +286,7 @@ func splitUSTARPath(name string) (prefix, suffix string, ok bool) {
i := strings.LastIndex(name[:length], "/")
nlen := len(name) - i - 1 // nlen is length of suffix
plen := i // plen is length of prefix
if i <= 0 || nlen > fileNameSize || nlen == 0 || plen > fileNamePrefixSize {
if i <= 0 || nlen > nameSize || nlen == 0 || plen > prefixSize {
return "", "", false
}
return name[:i], name[i+1:], true
@ -323,8 +309,8 @@ func (tw *Writer) writePAXHeader(hdr *Header, paxHeaders map[string]string) erro
fullName := path.Join(dir, "PaxHeaders.0", file)
ascii := toASCII(fullName)
if len(ascii) > 100 {
ascii = ascii[:100]
if len(ascii) > nameSize {
ascii = ascii[:nameSize]
}
ext.Name = ascii
// Construct the body
@ -407,7 +393,7 @@ func (tw *Writer) Close() error {
// trailer: two zero blocks
for i := 0; i < 2; i++ {
_, tw.err = tw.w.Write(zeroBlock)
_, tw.err = tw.w.Write(zeroBlock[:])
if tw.err != nil {
break
}