forked from mirrors/tar-split
archive/tar: centralize all information about tar header format
The Reader and Writer have hard-coded constants regarding the offsets and lengths of certain fields in the tar format sprinkled all over. This makes it harder to verify that the offsets are correct since a reviewer would need to search for them throughout the code. Instead, all information about the layout of header fields should be centralized in one single file. This has the advantage of being both centralized, and also acting as a form of documentation about the header struct format. This method was chosen over using "encoding/binary" since that method would cause an allocation of a header struct every time binary.Read was called. This method causes zero allocations and its logic is no longer than if structs were declared. Updates #12594 Change-Id: Ic7a0565d2a2cd95d955547ace3b6dea2b57fab34 Reviewed-on: https://go-review.googlesource.com/14669 Reviewed-by: Brad Fitzpatrick <bradfitz@golang.org> Signed-off-by: Vincent Batts <vbatts@hashbangbash.com>
This commit is contained in:
parent
5753de3a3b
commit
2c3c708698
5 changed files with 317 additions and 212 deletions
|
@ -36,10 +36,10 @@ type Writer struct {
|
|||
nb int64 // number of unwritten bytes for current file entry
|
||||
pad int64 // amount of padding to write after current file entry
|
||||
closed bool
|
||||
usedBinary bool // whether the binary numeric field extension was used
|
||||
preferPax bool // use pax header instead of binary numeric header
|
||||
hdrBuff [blockSize]byte // buffer to use in writeHeader when writing a regular header
|
||||
paxHdrBuff [blockSize]byte // buffer to use in writeHeader when writing a pax header
|
||||
usedBinary bool // whether the binary numeric field extension was used
|
||||
preferPax bool // use PAX header instead of binary numeric header
|
||||
hdrBuff block // buffer to use in writeHeader when writing a regular header
|
||||
paxHdrBuff block // buffer to use in writeHeader when writing a PAX header
|
||||
}
|
||||
|
||||
type formatter struct {
|
||||
|
@ -153,27 +153,24 @@ func (tw *Writer) writeHeader(hdr *Header, allowPax bool) error {
|
|||
// a map to hold pax header records, if any are needed
|
||||
paxHeaders := make(map[string]string)
|
||||
|
||||
// TODO(shanemhansen): we might want to use PAX headers for
|
||||
// TODO(dsnet): we might want to use PAX headers for
|
||||
// subsecond time resolution, but for now let's just capture
|
||||
// too long fields or non ascii characters
|
||||
|
||||
var f formatter
|
||||
var header []byte
|
||||
|
||||
// We need to select which scratch buffer to use carefully,
|
||||
// since this method is called recursively to write PAX headers.
|
||||
// If allowPax is true, this is the non-recursive call, and we will use hdrBuff.
|
||||
// If allowPax is false, we are being called by writePAXHeader, and hdrBuff is
|
||||
// already being used by the non-recursive call, so we must use paxHdrBuff.
|
||||
header = tw.hdrBuff[:]
|
||||
header := &tw.hdrBuff
|
||||
if !allowPax {
|
||||
header = tw.paxHdrBuff[:]
|
||||
header = &tw.paxHdrBuff
|
||||
}
|
||||
copy(header, zeroBlock)
|
||||
s := slicer(header)
|
||||
copy(header[:], zeroBlock[:])
|
||||
|
||||
// Wrappers around formatter that automatically sets paxHeaders if the
|
||||
// argument extends beyond the capacity of the input byte slice.
|
||||
var f formatter
|
||||
var formatString = func(b []byte, s string, paxKeyword string) {
|
||||
needsPaxHeader := paxKeyword != paxNone && len(s) > len(b) || !isASCII(s)
|
||||
if needsPaxHeader {
|
||||
|
@ -202,44 +199,33 @@ func (tw *Writer) writeHeader(hdr *Header, allowPax bool) error {
|
|||
f.formatNumeric(b, x)
|
||||
}
|
||||
|
||||
// keep a reference to the filename to allow to overwrite it later if we detect that we can use ustar longnames instead of pax
|
||||
pathHeaderBytes := s.next(fileNameSize)
|
||||
|
||||
formatString(pathHeaderBytes, hdr.Name, paxPath)
|
||||
|
||||
// Handle out of range ModTime carefully.
|
||||
var modTime int64
|
||||
if !hdr.ModTime.Before(minTime) && !hdr.ModTime.After(maxTime) {
|
||||
modTime = hdr.ModTime.Unix()
|
||||
}
|
||||
|
||||
f.formatOctal(s.next(8), hdr.Mode) // 100:108
|
||||
formatNumeric(s.next(8), int64(hdr.Uid), paxUid) // 108:116
|
||||
formatNumeric(s.next(8), int64(hdr.Gid), paxGid) // 116:124
|
||||
formatNumeric(s.next(12), hdr.Size, paxSize) // 124:136
|
||||
formatNumeric(s.next(12), modTime, paxNone) // 136:148 --- consider using pax for finer granularity
|
||||
s.next(8) // chksum (148:156)
|
||||
s.next(1)[0] = hdr.Typeflag // 156:157
|
||||
v7 := header.V7()
|
||||
formatString(v7.Name(), hdr.Name, paxPath)
|
||||
// TODO(dsnet): The GNU format permits the mode field to be encoded in
|
||||
// base-256 format. Thus, we can use formatNumeric instead of formatOctal.
|
||||
f.formatOctal(v7.Mode(), hdr.Mode)
|
||||
formatNumeric(v7.UID(), int64(hdr.Uid), paxUid)
|
||||
formatNumeric(v7.GID(), int64(hdr.Gid), paxGid)
|
||||
formatNumeric(v7.Size(), hdr.Size, paxSize)
|
||||
// TODO(dsnet): Consider using PAX for finer time granularity.
|
||||
formatNumeric(v7.ModTime(), modTime, paxNone)
|
||||
v7.TypeFlag()[0] = hdr.Typeflag
|
||||
formatString(v7.LinkName(), hdr.Linkname, paxLinkpath)
|
||||
|
||||
formatString(s.next(100), hdr.Linkname, paxLinkpath)
|
||||
ustar := header.USTAR()
|
||||
formatString(ustar.UserName(), hdr.Uname, paxUname)
|
||||
formatString(ustar.GroupName(), hdr.Gname, paxGname)
|
||||
formatNumeric(ustar.DevMajor(), hdr.Devmajor, paxNone)
|
||||
formatNumeric(ustar.DevMinor(), hdr.Devminor, paxNone)
|
||||
|
||||
copy(s.next(8), []byte("ustar\x0000")) // 257:265
|
||||
formatString(s.next(32), hdr.Uname, paxUname) // 265:297
|
||||
formatString(s.next(32), hdr.Gname, paxGname) // 297:329
|
||||
formatNumeric(s.next(8), hdr.Devmajor, paxNone) // 329:337
|
||||
formatNumeric(s.next(8), hdr.Devminor, paxNone) // 337:345
|
||||
|
||||
// keep a reference to the prefix to allow to overwrite it later if we detect that we can use ustar longnames instead of pax
|
||||
prefixHeaderBytes := s.next(155)
|
||||
formatString(prefixHeaderBytes, "", paxNone) // 345:500 prefix
|
||||
|
||||
// Use the GNU magic instead of POSIX magic if we used any GNU extensions.
|
||||
if tw.usedBinary {
|
||||
copy(header[257:265], []byte("ustar \x00"))
|
||||
}
|
||||
|
||||
_, paxPathUsed := paxHeaders[paxPath]
|
||||
// try to use a ustar header when only the name is too long
|
||||
_, paxPathUsed := paxHeaders[paxPath]
|
||||
if !tw.preferPax && len(paxHeaders) == 1 && paxPathUsed {
|
||||
prefix, suffix, ok := splitUSTARPath(hdr.Name)
|
||||
if ok {
|
||||
|
@ -247,16 +233,16 @@ func (tw *Writer) writeHeader(hdr *Header, allowPax bool) error {
|
|||
delete(paxHeaders, paxPath)
|
||||
|
||||
// Update the path fields
|
||||
formatString(pathHeaderBytes, suffix, paxNone)
|
||||
formatString(prefixHeaderBytes, prefix, paxNone)
|
||||
formatString(v7.Name(), suffix, paxNone)
|
||||
formatString(ustar.Prefix(), prefix, paxNone)
|
||||
}
|
||||
}
|
||||
|
||||
// The chksum field is terminated by a NUL and a space.
|
||||
// This is different from the other octal fields.
|
||||
chksum, _ := checksum(header)
|
||||
f.formatOctal(header[148:155], chksum) // Never fails
|
||||
header[155] = ' '
|
||||
if tw.usedBinary {
|
||||
header.SetFormat(formatGNU)
|
||||
} else {
|
||||
header.SetFormat(formatUSTAR)
|
||||
}
|
||||
|
||||
// Check if there were any formatting errors.
|
||||
if f.err != nil {
|
||||
|
@ -281,7 +267,7 @@ func (tw *Writer) writeHeader(hdr *Header, allowPax bool) error {
|
|||
tw.nb = hdr.Size
|
||||
tw.pad = (blockSize - (tw.nb % blockSize)) % blockSize
|
||||
|
||||
_, tw.err = tw.w.Write(header)
|
||||
_, tw.err = tw.w.Write(header[:])
|
||||
return tw.err
|
||||
}
|
||||
|
||||
|
@ -289,10 +275,10 @@ func (tw *Writer) writeHeader(hdr *Header, allowPax bool) error {
|
|||
// If the path is not splittable, then it will return ("", "", false).
|
||||
func splitUSTARPath(name string) (prefix, suffix string, ok bool) {
|
||||
length := len(name)
|
||||
if length <= fileNameSize || !isASCII(name) {
|
||||
if length <= nameSize || !isASCII(name) {
|
||||
return "", "", false
|
||||
} else if length > fileNamePrefixSize+1 {
|
||||
length = fileNamePrefixSize + 1
|
||||
} else if length > prefixSize+1 {
|
||||
length = prefixSize + 1
|
||||
} else if name[length-1] == '/' {
|
||||
length--
|
||||
}
|
||||
|
@ -300,7 +286,7 @@ func splitUSTARPath(name string) (prefix, suffix string, ok bool) {
|
|||
i := strings.LastIndex(name[:length], "/")
|
||||
nlen := len(name) - i - 1 // nlen is length of suffix
|
||||
plen := i // plen is length of prefix
|
||||
if i <= 0 || nlen > fileNameSize || nlen == 0 || plen > fileNamePrefixSize {
|
||||
if i <= 0 || nlen > nameSize || nlen == 0 || plen > prefixSize {
|
||||
return "", "", false
|
||||
}
|
||||
return name[:i], name[i+1:], true
|
||||
|
@ -323,8 +309,8 @@ func (tw *Writer) writePAXHeader(hdr *Header, paxHeaders map[string]string) erro
|
|||
fullName := path.Join(dir, "PaxHeaders.0", file)
|
||||
|
||||
ascii := toASCII(fullName)
|
||||
if len(ascii) > 100 {
|
||||
ascii = ascii[:100]
|
||||
if len(ascii) > nameSize {
|
||||
ascii = ascii[:nameSize]
|
||||
}
|
||||
ext.Name = ascii
|
||||
// Construct the body
|
||||
|
@ -407,7 +393,7 @@ func (tw *Writer) Close() error {
|
|||
|
||||
// trailer: two zero blocks
|
||||
for i := 0; i < 2; i++ {
|
||||
_, tw.err = tw.w.Write(zeroBlock)
|
||||
_, tw.err = tw.w.Write(zeroBlock[:])
|
||||
if tw.err != nil {
|
||||
break
|
||||
}
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue