From 73fdb78c3630bee43d993bdd9ff77471999d0e49 Mon Sep 17 00:00:00 2001 From: Kir Kolyshkin Date: Wed, 5 Sep 2018 14:04:10 -0700 Subject: [PATCH 1/4] archive/tar: replace with one from go-1.11 The RawAccounting changes are to be ported on top. Signed-off-by: Kir Kolyshkin --- archive/tar/common.go | 689 ++++-- archive/tar/example_test.go | 33 +- archive/tar/format.go | 303 +++ archive/tar/reader.go | 1452 +++++------- archive/tar/reader_test.go | 2055 ++++++++++------- archive/tar/{stat_atim.go => stat_actime1.go} | 0 .../{stat_atimespec.go => stat_actime2.go} | 0 archive/tar/stat_unix.go | 72 +- archive/tar/strconv.go | 326 +++ archive/tar/strconv_test.go | 434 ++++ archive/tar/tar_test.go | 817 +++++-- archive/tar/testdata/file-and-dir.tar | Bin 0 -> 2560 bytes archive/tar/testdata/gnu-incremental.tar | Bin 0 -> 2560 bytes archive/tar/testdata/gnu-long-nul.tar | Bin 0 -> 2560 bytes archive/tar/testdata/gnu-nil-sparse-data.tar | Bin 0 -> 2560 bytes archive/tar/testdata/gnu-nil-sparse-hole.tar | Bin 0 -> 1536 bytes archive/tar/testdata/gnu-not-utf8.tar | Bin 0 -> 1536 bytes archive/tar/testdata/gnu-sparse-big.tar | Bin 0 -> 5120 bytes archive/tar/testdata/gnu-utf8.tar | Bin 0 -> 2560 bytes archive/tar/testdata/invalid-go17.tar | Bin 0 -> 1536 bytes archive/tar/testdata/pax-bad-hdr-file.tar | Bin 0 -> 2560 bytes archive/tar/testdata/pax-bad-mtime-file.tar | Bin 0 -> 2560 bytes archive/tar/testdata/pax-global-records.tar | Bin 0 -> 7168 bytes archive/tar/testdata/pax-nil-sparse-data.tar | Bin 0 -> 4096 bytes archive/tar/testdata/pax-nil-sparse-hole.tar | Bin 0 -> 3072 bytes archive/tar/testdata/pax-nul-path.tar | Bin 0 -> 2560 bytes archive/tar/testdata/pax-nul-xattrs.tar | Bin 0 -> 2560 bytes archive/tar/testdata/pax-pos-size-file.tar | Bin 0 -> 2560 bytes archive/tar/testdata/pax-records.tar | Bin 0 -> 2560 bytes archive/tar/testdata/pax-sparse-big.tar | Bin 0 -> 6144 bytes archive/tar/testdata/trailing-slash.tar | Bin 0 -> 2560 bytes archive/tar/testdata/ustar-file-devs.tar | Bin 0 -> 1536 bytes archive/tar/testdata/writer-big-long.tar | Bin 4096 -> 1536 bytes archive/tar/testdata/writer-big.tar | Bin 4096 -> 512 bytes archive/tar/writer.go | 925 +++++--- archive/tar/writer_test.go | 1309 ++++++++--- 36 files changed, 5777 insertions(+), 2638 deletions(-) create mode 100644 archive/tar/format.go rename archive/tar/{stat_atim.go => stat_actime1.go} (100%) rename archive/tar/{stat_atimespec.go => stat_actime2.go} (100%) create mode 100644 archive/tar/strconv.go create mode 100644 archive/tar/strconv_test.go create mode 100644 archive/tar/testdata/file-and-dir.tar create mode 100644 archive/tar/testdata/gnu-incremental.tar create mode 100644 archive/tar/testdata/gnu-long-nul.tar create mode 100644 archive/tar/testdata/gnu-nil-sparse-data.tar create mode 100644 archive/tar/testdata/gnu-nil-sparse-hole.tar create mode 100644 archive/tar/testdata/gnu-not-utf8.tar create mode 100644 archive/tar/testdata/gnu-sparse-big.tar create mode 100644 archive/tar/testdata/gnu-utf8.tar create mode 100644 archive/tar/testdata/invalid-go17.tar create mode 100644 archive/tar/testdata/pax-bad-hdr-file.tar create mode 100644 archive/tar/testdata/pax-bad-mtime-file.tar create mode 100644 archive/tar/testdata/pax-global-records.tar create mode 100644 archive/tar/testdata/pax-nil-sparse-data.tar create mode 100644 archive/tar/testdata/pax-nil-sparse-hole.tar create mode 100644 archive/tar/testdata/pax-nul-path.tar create mode 100644 archive/tar/testdata/pax-nul-xattrs.tar create mode 100644 archive/tar/testdata/pax-pos-size-file.tar create mode 100644 archive/tar/testdata/pax-records.tar create mode 100644 archive/tar/testdata/pax-sparse-big.tar create mode 100644 archive/tar/testdata/trailing-slash.tar create mode 100644 archive/tar/testdata/ustar-file-devs.tar diff --git a/archive/tar/common.go b/archive/tar/common.go index 36f4e23..dee9e47 100644 --- a/archive/tar/common.go +++ b/archive/tar/common.go @@ -3,70 +3,528 @@ // license that can be found in the LICENSE file. // Package tar implements access to tar archives. -// It aims to cover most of the variations, including those produced -// by GNU and BSD tars. // -// References: -// http://www.freebsd.org/cgi/man.cgi?query=tar&sektion=5 -// http://www.gnu.org/software/tar/manual/html_node/Standard.html -// http://pubs.opengroup.org/onlinepubs/9699919799/utilities/pax.html +// Tape archives (tar) are a file format for storing a sequence of files that +// can be read and written in a streaming manner. +// This package aims to cover most variations of the format, +// including those produced by GNU and BSD tar tools. package tar import ( - "bytes" "errors" "fmt" + "math" "os" "path" + "reflect" + "strconv" + "strings" "time" ) -const ( - blockSize = 512 +// BUG: Use of the Uid and Gid fields in Header could overflow on 32-bit +// architectures. If a large value is encountered when decoding, the result +// stored in Header will be the truncated version. - // Types - TypeReg = '0' // regular file - TypeRegA = '\x00' // regular file - TypeLink = '1' // hard link - TypeSymlink = '2' // symbolic link - TypeChar = '3' // character device node - TypeBlock = '4' // block device node - TypeDir = '5' // directory - TypeFifo = '6' // fifo node - TypeCont = '7' // reserved - TypeXHeader = 'x' // extended header - TypeXGlobalHeader = 'g' // global extended header - TypeGNULongName = 'L' // Next file has a long name - TypeGNULongLink = 'K' // Next file symlinks to a file w/ a long name - TypeGNUSparse = 'S' // sparse file +var ( + ErrHeader = errors.New("archive/tar: invalid tar header") + ErrWriteTooLong = errors.New("archive/tar: write too long") + ErrFieldTooLong = errors.New("archive/tar: header field too long") + ErrWriteAfterClose = errors.New("archive/tar: write after close") + errMissData = errors.New("archive/tar: sparse file references non-existent data") + errUnrefData = errors.New("archive/tar: sparse file contains unreferenced data") + errWriteHole = errors.New("archive/tar: write non-NUL byte in sparse hole") ) +type headerError []string + +func (he headerError) Error() string { + const prefix = "archive/tar: cannot encode header" + var ss []string + for _, s := range he { + if s != "" { + ss = append(ss, s) + } + } + if len(ss) == 0 { + return prefix + } + return fmt.Sprintf("%s: %v", prefix, strings.Join(ss, "; and ")) +} + +// Type flags for Header.Typeflag. +const ( + // Type '0' indicates a regular file. + TypeReg = '0' + TypeRegA = '\x00' // Deprecated: Use TypeReg instead. + + // Type '1' to '6' are header-only flags and may not have a data body. + TypeLink = '1' // Hard link + TypeSymlink = '2' // Symbolic link + TypeChar = '3' // Character device node + TypeBlock = '4' // Block device node + TypeDir = '5' // Directory + TypeFifo = '6' // FIFO node + + // Type '7' is reserved. + TypeCont = '7' + + // Type 'x' is used by the PAX format to store key-value records that + // are only relevant to the next file. + // This package transparently handles these types. + TypeXHeader = 'x' + + // Type 'g' is used by the PAX format to store key-value records that + // are relevant to all subsequent files. + // This package only supports parsing and composing such headers, + // but does not currently support persisting the global state across files. + TypeXGlobalHeader = 'g' + + // Type 'S' indicates a sparse file in the GNU format. + TypeGNUSparse = 'S' + + // Types 'L' and 'K' are used by the GNU format for a meta file + // used to store the path or link name for the next file. + // This package transparently handles these types. + TypeGNULongName = 'L' + TypeGNULongLink = 'K' +) + +// Keywords for PAX extended header records. +const ( + paxNone = "" // Indicates that no PAX key is suitable + paxPath = "path" + paxLinkpath = "linkpath" + paxSize = "size" + paxUid = "uid" + paxGid = "gid" + paxUname = "uname" + paxGname = "gname" + paxMtime = "mtime" + paxAtime = "atime" + paxCtime = "ctime" // Removed from later revision of PAX spec, but was valid + paxCharset = "charset" // Currently unused + paxComment = "comment" // Currently unused + + paxSchilyXattr = "SCHILY.xattr." + + // Keywords for GNU sparse files in a PAX extended header. + paxGNUSparse = "GNU.sparse." + paxGNUSparseNumBlocks = "GNU.sparse.numblocks" + paxGNUSparseOffset = "GNU.sparse.offset" + paxGNUSparseNumBytes = "GNU.sparse.numbytes" + paxGNUSparseMap = "GNU.sparse.map" + paxGNUSparseName = "GNU.sparse.name" + paxGNUSparseMajor = "GNU.sparse.major" + paxGNUSparseMinor = "GNU.sparse.minor" + paxGNUSparseSize = "GNU.sparse.size" + paxGNUSparseRealSize = "GNU.sparse.realsize" +) + +// basicKeys is a set of the PAX keys for which we have built-in support. +// This does not contain "charset" or "comment", which are both PAX-specific, +// so adding them as first-class features of Header is unlikely. +// Users can use the PAXRecords field to set it themselves. +var basicKeys = map[string]bool{ + paxPath: true, paxLinkpath: true, paxSize: true, paxUid: true, paxGid: true, + paxUname: true, paxGname: true, paxMtime: true, paxAtime: true, paxCtime: true, +} + // A Header represents a single header in a tar archive. // Some fields may not be populated. +// +// For forward compatibility, users that retrieve a Header from Reader.Next, +// mutate it in some ways, and then pass it back to Writer.WriteHeader +// should do so by creating a new Header and copying the fields +// that they are interested in preserving. type Header struct { - Name string // name of header file entry - Mode int64 // permission and mode bits - Uid int // user id of owner - Gid int // group id of owner - Size int64 // length in bytes - ModTime time.Time // modified time - Typeflag byte // type of header entry - Linkname string // target name of link - Uname string // user name of owner - Gname string // group name of owner - Devmajor int64 // major number of character or block device - Devminor int64 // minor number of character or block device - AccessTime time.Time // access time - ChangeTime time.Time // status change time - Xattrs map[string]string + // Typeflag is the type of header entry. + // The zero value is automatically promoted to either TypeReg or TypeDir + // depending on the presence of a trailing slash in Name. + Typeflag byte + + Name string // Name of file entry + Linkname string // Target name of link (valid for TypeLink or TypeSymlink) + + Size int64 // Logical file size in bytes + Mode int64 // Permission and mode bits + Uid int // User ID of owner + Gid int // Group ID of owner + Uname string // User name of owner + Gname string // Group name of owner + + // If the Format is unspecified, then Writer.WriteHeader rounds ModTime + // to the nearest second and ignores the AccessTime and ChangeTime fields. + // + // To use AccessTime or ChangeTime, specify the Format as PAX or GNU. + // To use sub-second resolution, specify the Format as PAX. + ModTime time.Time // Modification time + AccessTime time.Time // Access time (requires either PAX or GNU support) + ChangeTime time.Time // Change time (requires either PAX or GNU support) + + Devmajor int64 // Major device number (valid for TypeChar or TypeBlock) + Devminor int64 // Minor device number (valid for TypeChar or TypeBlock) + + // Xattrs stores extended attributes as PAX records under the + // "SCHILY.xattr." namespace. + // + // The following are semantically equivalent: + // h.Xattrs[key] = value + // h.PAXRecords["SCHILY.xattr."+key] = value + // + // When Writer.WriteHeader is called, the contents of Xattrs will take + // precedence over those in PAXRecords. + // + // Deprecated: Use PAXRecords instead. + Xattrs map[string]string + + // PAXRecords is a map of PAX extended header records. + // + // User-defined records should have keys of the following form: + // VENDOR.keyword + // Where VENDOR is some namespace in all uppercase, and keyword may + // not contain the '=' character (e.g., "GOLANG.pkg.version"). + // The key and value should be non-empty UTF-8 strings. + // + // When Writer.WriteHeader is called, PAX records derived from the + // other fields in Header take precedence over PAXRecords. + PAXRecords map[string]string + + // Format specifies the format of the tar header. + // + // This is set by Reader.Next as a best-effort guess at the format. + // Since the Reader liberally reads some non-compliant files, + // it is possible for this to be FormatUnknown. + // + // If the format is unspecified when Writer.WriteHeader is called, + // then it uses the first format (in the order of USTAR, PAX, GNU) + // capable of encoding this Header (see Format). + Format Format } -// File name constants from the tar spec. -const ( - fileNameSize = 100 // Maximum number of bytes in a standard tar name. - fileNamePrefixSize = 155 // Maximum number of ustar extension bytes. +// sparseEntry represents a Length-sized fragment at Offset in the file. +type sparseEntry struct{ Offset, Length int64 } + +func (s sparseEntry) endOffset() int64 { return s.Offset + s.Length } + +// A sparse file can be represented as either a sparseDatas or a sparseHoles. +// As long as the total size is known, they are equivalent and one can be +// converted to the other form and back. The various tar formats with sparse +// file support represent sparse files in the sparseDatas form. That is, they +// specify the fragments in the file that has data, and treat everything else as +// having zero bytes. As such, the encoding and decoding logic in this package +// deals with sparseDatas. +// +// However, the external API uses sparseHoles instead of sparseDatas because the +// zero value of sparseHoles logically represents a normal file (i.e., there are +// no holes in it). On the other hand, the zero value of sparseDatas implies +// that the file has no data in it, which is rather odd. +// +// As an example, if the underlying raw file contains the 10-byte data: +// var compactFile = "abcdefgh" +// +// And the sparse map has the following entries: +// var spd sparseDatas = []sparseEntry{ +// {Offset: 2, Length: 5}, // Data fragment for 2..6 +// {Offset: 18, Length: 3}, // Data fragment for 18..20 +// } +// var sph sparseHoles = []sparseEntry{ +// {Offset: 0, Length: 2}, // Hole fragment for 0..1 +// {Offset: 7, Length: 11}, // Hole fragment for 7..17 +// {Offset: 21, Length: 4}, // Hole fragment for 21..24 +// } +// +// Then the content of the resulting sparse file with a Header.Size of 25 is: +// var sparseFile = "\x00"*2 + "abcde" + "\x00"*11 + "fgh" + "\x00"*4 +type ( + sparseDatas []sparseEntry + sparseHoles []sparseEntry ) +// validateSparseEntries reports whether sp is a valid sparse map. +// It does not matter whether sp represents data fragments or hole fragments. +func validateSparseEntries(sp []sparseEntry, size int64) bool { + // Validate all sparse entries. These are the same checks as performed by + // the BSD tar utility. + if size < 0 { + return false + } + var pre sparseEntry + for _, cur := range sp { + switch { + case cur.Offset < 0 || cur.Length < 0: + return false // Negative values are never okay + case cur.Offset > math.MaxInt64-cur.Length: + return false // Integer overflow with large length + case cur.endOffset() > size: + return false // Region extends beyond the actual size + case pre.endOffset() > cur.Offset: + return false // Regions cannot overlap and must be in order + } + pre = cur + } + return true +} + +// alignSparseEntries mutates src and returns dst where each fragment's +// starting offset is aligned up to the nearest block edge, and each +// ending offset is aligned down to the nearest block edge. +// +// Even though the Go tar Reader and the BSD tar utility can handle entries +// with arbitrary offsets and lengths, the GNU tar utility can only handle +// offsets and lengths that are multiples of blockSize. +func alignSparseEntries(src []sparseEntry, size int64) []sparseEntry { + dst := src[:0] + for _, s := range src { + pos, end := s.Offset, s.endOffset() + pos += blockPadding(+pos) // Round-up to nearest blockSize + if end != size { + end -= blockPadding(-end) // Round-down to nearest blockSize + } + if pos < end { + dst = append(dst, sparseEntry{Offset: pos, Length: end - pos}) + } + } + return dst +} + +// invertSparseEntries converts a sparse map from one form to the other. +// If the input is sparseHoles, then it will output sparseDatas and vice-versa. +// The input must have been already validated. +// +// This function mutates src and returns a normalized map where: +// * adjacent fragments are coalesced together +// * only the last fragment may be empty +// * the endOffset of the last fragment is the total size +func invertSparseEntries(src []sparseEntry, size int64) []sparseEntry { + dst := src[:0] + var pre sparseEntry + for _, cur := range src { + if cur.Length == 0 { + continue // Skip empty fragments + } + pre.Length = cur.Offset - pre.Offset + if pre.Length > 0 { + dst = append(dst, pre) // Only add non-empty fragments + } + pre.Offset = cur.endOffset() + } + pre.Length = size - pre.Offset // Possibly the only empty fragment + return append(dst, pre) +} + +// fileState tracks the number of logical (includes sparse holes) and physical +// (actual in tar archive) bytes remaining for the current file. +// +// Invariant: LogicalRemaining >= PhysicalRemaining +type fileState interface { + LogicalRemaining() int64 + PhysicalRemaining() int64 +} + +// allowedFormats determines which formats can be used. +// The value returned is the logical OR of multiple possible formats. +// If the value is FormatUnknown, then the input Header cannot be encoded +// and an error is returned explaining why. +// +// As a by-product of checking the fields, this function returns paxHdrs, which +// contain all fields that could not be directly encoded. +// A value receiver ensures that this method does not mutate the source Header. +func (h Header) allowedFormats() (format Format, paxHdrs map[string]string, err error) { + format = FormatUSTAR | FormatPAX | FormatGNU + paxHdrs = make(map[string]string) + + var whyNoUSTAR, whyNoPAX, whyNoGNU string + var preferPAX bool // Prefer PAX over USTAR + verifyString := func(s string, size int, name, paxKey string) { + // NUL-terminator is optional for path and linkpath. + // Technically, it is required for uname and gname, + // but neither GNU nor BSD tar checks for it. + tooLong := len(s) > size + allowLongGNU := paxKey == paxPath || paxKey == paxLinkpath + if hasNUL(s) || (tooLong && !allowLongGNU) { + whyNoGNU = fmt.Sprintf("GNU cannot encode %s=%q", name, s) + format.mustNotBe(FormatGNU) + } + if !isASCII(s) || tooLong { + canSplitUSTAR := paxKey == paxPath + if _, _, ok := splitUSTARPath(s); !canSplitUSTAR || !ok { + whyNoUSTAR = fmt.Sprintf("USTAR cannot encode %s=%q", name, s) + format.mustNotBe(FormatUSTAR) + } + if paxKey == paxNone { + whyNoPAX = fmt.Sprintf("PAX cannot encode %s=%q", name, s) + format.mustNotBe(FormatPAX) + } else { + paxHdrs[paxKey] = s + } + } + if v, ok := h.PAXRecords[paxKey]; ok && v == s { + paxHdrs[paxKey] = v + } + } + verifyNumeric := func(n int64, size int, name, paxKey string) { + if !fitsInBase256(size, n) { + whyNoGNU = fmt.Sprintf("GNU cannot encode %s=%d", name, n) + format.mustNotBe(FormatGNU) + } + if !fitsInOctal(size, n) { + whyNoUSTAR = fmt.Sprintf("USTAR cannot encode %s=%d", name, n) + format.mustNotBe(FormatUSTAR) + if paxKey == paxNone { + whyNoPAX = fmt.Sprintf("PAX cannot encode %s=%d", name, n) + format.mustNotBe(FormatPAX) + } else { + paxHdrs[paxKey] = strconv.FormatInt(n, 10) + } + } + if v, ok := h.PAXRecords[paxKey]; ok && v == strconv.FormatInt(n, 10) { + paxHdrs[paxKey] = v + } + } + verifyTime := func(ts time.Time, size int, name, paxKey string) { + if ts.IsZero() { + return // Always okay + } + if !fitsInBase256(size, ts.Unix()) { + whyNoGNU = fmt.Sprintf("GNU cannot encode %s=%v", name, ts) + format.mustNotBe(FormatGNU) + } + isMtime := paxKey == paxMtime + fitsOctal := fitsInOctal(size, ts.Unix()) + if (isMtime && !fitsOctal) || !isMtime { + whyNoUSTAR = fmt.Sprintf("USTAR cannot encode %s=%v", name, ts) + format.mustNotBe(FormatUSTAR) + } + needsNano := ts.Nanosecond() != 0 + if !isMtime || !fitsOctal || needsNano { + preferPAX = true // USTAR may truncate sub-second measurements + if paxKey == paxNone { + whyNoPAX = fmt.Sprintf("PAX cannot encode %s=%v", name, ts) + format.mustNotBe(FormatPAX) + } else { + paxHdrs[paxKey] = formatPAXTime(ts) + } + } + if v, ok := h.PAXRecords[paxKey]; ok && v == formatPAXTime(ts) { + paxHdrs[paxKey] = v + } + } + + // Check basic fields. + var blk block + v7 := blk.V7() + ustar := blk.USTAR() + gnu := blk.GNU() + verifyString(h.Name, len(v7.Name()), "Name", paxPath) + verifyString(h.Linkname, len(v7.LinkName()), "Linkname", paxLinkpath) + verifyString(h.Uname, len(ustar.UserName()), "Uname", paxUname) + verifyString(h.Gname, len(ustar.GroupName()), "Gname", paxGname) + verifyNumeric(h.Mode, len(v7.Mode()), "Mode", paxNone) + verifyNumeric(int64(h.Uid), len(v7.UID()), "Uid", paxUid) + verifyNumeric(int64(h.Gid), len(v7.GID()), "Gid", paxGid) + verifyNumeric(h.Size, len(v7.Size()), "Size", paxSize) + verifyNumeric(h.Devmajor, len(ustar.DevMajor()), "Devmajor", paxNone) + verifyNumeric(h.Devminor, len(ustar.DevMinor()), "Devminor", paxNone) + verifyTime(h.ModTime, len(v7.ModTime()), "ModTime", paxMtime) + verifyTime(h.AccessTime, len(gnu.AccessTime()), "AccessTime", paxAtime) + verifyTime(h.ChangeTime, len(gnu.ChangeTime()), "ChangeTime", paxCtime) + + // Check for header-only types. + var whyOnlyPAX, whyOnlyGNU string + switch h.Typeflag { + case TypeReg, TypeChar, TypeBlock, TypeFifo, TypeGNUSparse: + // Exclude TypeLink and TypeSymlink, since they may reference directories. + if strings.HasSuffix(h.Name, "/") { + return FormatUnknown, nil, headerError{"filename may not have trailing slash"} + } + case TypeXHeader, TypeGNULongName, TypeGNULongLink: + return FormatUnknown, nil, headerError{"cannot manually encode TypeXHeader, TypeGNULongName, or TypeGNULongLink headers"} + case TypeXGlobalHeader: + h2 := Header{Name: h.Name, Typeflag: h.Typeflag, Xattrs: h.Xattrs, PAXRecords: h.PAXRecords, Format: h.Format} + if !reflect.DeepEqual(h, h2) { + return FormatUnknown, nil, headerError{"only PAXRecords should be set for TypeXGlobalHeader"} + } + whyOnlyPAX = "only PAX supports TypeXGlobalHeader" + format.mayOnlyBe(FormatPAX) + } + if !isHeaderOnlyType(h.Typeflag) && h.Size < 0 { + return FormatUnknown, nil, headerError{"negative size on header-only type"} + } + + // Check PAX records. + if len(h.Xattrs) > 0 { + for k, v := range h.Xattrs { + paxHdrs[paxSchilyXattr+k] = v + } + whyOnlyPAX = "only PAX supports Xattrs" + format.mayOnlyBe(FormatPAX) + } + if len(h.PAXRecords) > 0 { + for k, v := range h.PAXRecords { + switch _, exists := paxHdrs[k]; { + case exists: + continue // Do not overwrite existing records + case h.Typeflag == TypeXGlobalHeader: + paxHdrs[k] = v // Copy all records + case !basicKeys[k] && !strings.HasPrefix(k, paxGNUSparse): + paxHdrs[k] = v // Ignore local records that may conflict + } + } + whyOnlyPAX = "only PAX supports PAXRecords" + format.mayOnlyBe(FormatPAX) + } + for k, v := range paxHdrs { + if !validPAXRecord(k, v) { + return FormatUnknown, nil, headerError{fmt.Sprintf("invalid PAX record: %q", k+" = "+v)} + } + } + + // TODO(dsnet): Re-enable this when adding sparse support. + // See https://golang.org/issue/22735 + /* + // Check sparse files. + if len(h.SparseHoles) > 0 || h.Typeflag == TypeGNUSparse { + if isHeaderOnlyType(h.Typeflag) { + return FormatUnknown, nil, headerError{"header-only type cannot be sparse"} + } + if !validateSparseEntries(h.SparseHoles, h.Size) { + return FormatUnknown, nil, headerError{"invalid sparse holes"} + } + if h.Typeflag == TypeGNUSparse { + whyOnlyGNU = "only GNU supports TypeGNUSparse" + format.mayOnlyBe(FormatGNU) + } else { + whyNoGNU = "GNU supports sparse files only with TypeGNUSparse" + format.mustNotBe(FormatGNU) + } + whyNoUSTAR = "USTAR does not support sparse files" + format.mustNotBe(FormatUSTAR) + } + */ + + // Check desired format. + if wantFormat := h.Format; wantFormat != FormatUnknown { + if wantFormat.has(FormatPAX) && !preferPAX { + wantFormat.mayBe(FormatUSTAR) // PAX implies USTAR allowed too + } + format.mayOnlyBe(wantFormat) // Set union of formats allowed and format wanted + } + if format == FormatUnknown { + switch h.Format { + case FormatUSTAR: + err = headerError{"Format specifies USTAR", whyNoUSTAR, whyOnlyPAX, whyOnlyGNU} + case FormatPAX: + err = headerError{"Format specifies PAX", whyNoPAX, whyOnlyGNU} + case FormatGNU: + err = headerError{"Format specifies GNU", whyNoGNU, whyOnlyPAX} + default: + err = headerError{whyNoUSTAR, whyNoPAX, whyNoGNU, whyOnlyPAX, whyOnlyGNU} + } + } + return format, paxHdrs, err +} + // FileInfo returns an os.FileInfo for the Header. func (h *Header) FileInfo() os.FileInfo { return headerFileInfo{h} @@ -97,63 +555,43 @@ func (fi headerFileInfo) Mode() (mode os.FileMode) { // Set setuid, setgid and sticky bits. if fi.h.Mode&c_ISUID != 0 { - // setuid mode |= os.ModeSetuid } if fi.h.Mode&c_ISGID != 0 { - // setgid mode |= os.ModeSetgid } if fi.h.Mode&c_ISVTX != 0 { - // sticky mode |= os.ModeSticky } - // Set file mode bits. - // clear perm, setuid, setgid and sticky bits. - m := os.FileMode(fi.h.Mode) &^ 07777 - if m == c_ISDIR { - // directory + // Set file mode bits; clear perm, setuid, setgid, and sticky bits. + switch m := os.FileMode(fi.h.Mode) &^ 07777; m { + case c_ISDIR: mode |= os.ModeDir - } - if m == c_ISFIFO { - // named pipe (FIFO) + case c_ISFIFO: mode |= os.ModeNamedPipe - } - if m == c_ISLNK { - // symbolic link + case c_ISLNK: mode |= os.ModeSymlink - } - if m == c_ISBLK { - // device file + case c_ISBLK: mode |= os.ModeDevice - } - if m == c_ISCHR { - // Unix character device + case c_ISCHR: mode |= os.ModeDevice mode |= os.ModeCharDevice - } - if m == c_ISSOCK { - // Unix domain socket + case c_ISSOCK: mode |= os.ModeSocket } switch fi.h.Typeflag { case TypeSymlink: - // symbolic link mode |= os.ModeSymlink case TypeChar: - // character device node mode |= os.ModeDevice mode |= os.ModeCharDevice case TypeBlock: - // block device node mode |= os.ModeDevice case TypeDir: - // directory mode |= os.ModeDir case TypeFifo: - // fifo node mode |= os.ModeNamedPipe } @@ -163,11 +601,15 @@ func (fi headerFileInfo) Mode() (mode os.FileMode) { // sysStat, if non-nil, populates h from system-dependent fields of fi. var sysStat func(fi os.FileInfo, h *Header) error -// Mode constants from the tar spec. const ( - c_ISUID = 04000 // Set uid - c_ISGID = 02000 // Set gid - c_ISVTX = 01000 // Save text (sticky bit) + // Mode constants from the USTAR spec: + // See http://pubs.opengroup.org/onlinepubs/9699919799/utilities/pax.html#tag_20_92_13_06 + c_ISUID = 04000 // Set uid + c_ISGID = 02000 // Set gid + c_ISVTX = 01000 // Save text (sticky bit) + + // Common Unix mode constants; these are not defined in any common tar standard. + // Header.FileInfo understands these, but FileInfoHeader will never produce these. c_ISDIR = 040000 // Directory c_ISFIFO = 010000 // FIFO c_ISREG = 0100000 // Regular file @@ -177,33 +619,16 @@ const ( c_ISSOCK = 0140000 // Socket ) -// Keywords for the PAX Extended Header -const ( - paxAtime = "atime" - paxCharset = "charset" - paxComment = "comment" - paxCtime = "ctime" // please note that ctime is not a valid pax header. - paxGid = "gid" - paxGname = "gname" - paxLinkpath = "linkpath" - paxMtime = "mtime" - paxPath = "path" - paxSize = "size" - paxUid = "uid" - paxUname = "uname" - paxXattr = "SCHILY.xattr." - paxNone = "" -) - // FileInfoHeader creates a partially-populated Header from fi. // If fi describes a symlink, FileInfoHeader records link as the link target. // If fi describes a directory, a slash is appended to the name. -// Because os.FileInfo's Name method returns only the base name of -// the file it describes, it may be necessary to modify the Name field -// of the returned header to provide the full path name of the file. +// +// Since os.FileInfo's Name method only returns the base name of +// the file it describes, it may be necessary to modify Header.Name +// to provide the full path name of the file. func FileInfoHeader(fi os.FileInfo, link string) (*Header, error) { if fi == nil { - return nil, errors.New("tar: FileInfo is nil") + return nil, errors.New("archive/tar: FileInfo is nil") } fm := fi.Mode() h := &Header{ @@ -213,30 +638,24 @@ func FileInfoHeader(fi os.FileInfo, link string) (*Header, error) { } switch { case fm.IsRegular(): - h.Mode |= c_ISREG h.Typeflag = TypeReg h.Size = fi.Size() case fi.IsDir(): h.Typeflag = TypeDir - h.Mode |= c_ISDIR h.Name += "/" case fm&os.ModeSymlink != 0: h.Typeflag = TypeSymlink - h.Mode |= c_ISLNK h.Linkname = link case fm&os.ModeDevice != 0: if fm&os.ModeCharDevice != 0 { - h.Mode |= c_ISCHR h.Typeflag = TypeChar } else { - h.Mode |= c_ISBLK h.Typeflag = TypeBlock } case fm&os.ModeNamedPipe != 0: h.Typeflag = TypeFifo - h.Mode |= c_ISFIFO case fm&os.ModeSocket != 0: - h.Mode |= c_ISSOCK + return nil, fmt.Errorf("archive/tar: sockets not supported") default: return nil, fmt.Errorf("archive/tar: unknown file mode %v", fm) } @@ -272,6 +691,12 @@ func FileInfoHeader(fi os.FileInfo, link string) (*Header, error) { h.Size = 0 h.Linkname = sys.Linkname } + if sys.PAXRecords != nil { + h.PAXRecords = make(map[string]string) + for k, v := range sys.PAXRecords { + h.PAXRecords[k] = v + } + } } if sysStat != nil { return h, sysStat(fi, h) @@ -279,55 +704,6 @@ func FileInfoHeader(fi os.FileInfo, link string) (*Header, error) { return h, nil } -var zeroBlock = make([]byte, blockSize) - -// POSIX specifies a sum of the unsigned byte values, but the Sun tar uses signed byte values. -// We compute and return both. -func checksum(header []byte) (unsigned int64, signed int64) { - for i := 0; i < len(header); i++ { - if i == 148 { - // The chksum field (header[148:156]) is special: it should be treated as space bytes. - unsigned += ' ' * 8 - signed += ' ' * 8 - i += 7 - continue - } - unsigned += int64(header[i]) - signed += int64(int8(header[i])) - } - return -} - -type slicer []byte - -func (sp *slicer) next(n int) (b []byte) { - s := *sp - b, *sp = s[0:n], s[n:] - return -} - -func isASCII(s string) bool { - for _, c := range s { - if c >= 0x80 { - return false - } - } - return true -} - -func toASCII(s string) string { - if isASCII(s) { - return s - } - var buf bytes.Buffer - for _, c := range s { - if c < 0x80 { - buf.WriteByte(byte(c)) - } - } - return buf.String() -} - // isHeaderOnlyType checks if the given type flag is of the type that has no // data section even if a size is specified. func isHeaderOnlyType(flag byte) bool { @@ -338,3 +714,10 @@ func isHeaderOnlyType(flag byte) bool { return false } } + +func min(a, b int64) int64 { + if a < b { + return a + } + return b +} diff --git a/archive/tar/example_test.go b/archive/tar/example_test.go index 5f0ce2f..a2474b9 100644 --- a/archive/tar/example_test.go +++ b/archive/tar/example_test.go @@ -13,14 +13,10 @@ import ( "os" ) -func Example() { - // Create a buffer to write our archive to. - buf := new(bytes.Buffer) - - // Create a new tar archive. - tw := tar.NewWriter(buf) - - // Add some files to the archive. +func Example_minimal() { + // Create and add some files to the archive. + var buf bytes.Buffer + tw := tar.NewWriter(&buf) var files = []struct { Name, Body string }{ @@ -35,34 +31,29 @@ func Example() { Size: int64(len(file.Body)), } if err := tw.WriteHeader(hdr); err != nil { - log.Fatalln(err) + log.Fatal(err) } if _, err := tw.Write([]byte(file.Body)); err != nil { - log.Fatalln(err) + log.Fatal(err) } } - // Make sure to check the error on Close. if err := tw.Close(); err != nil { - log.Fatalln(err) + log.Fatal(err) } - // Open the tar archive for reading. - r := bytes.NewReader(buf.Bytes()) - tr := tar.NewReader(r) - - // Iterate through the files in the archive. + // Open and iterate through the files in the archive. + tr := tar.NewReader(&buf) for { hdr, err := tr.Next() if err == io.EOF { - // end of tar archive - break + break // End of archive } if err != nil { - log.Fatalln(err) + log.Fatal(err) } fmt.Printf("Contents of %s:\n", hdr.Name) if _, err := io.Copy(os.Stdout, tr); err != nil { - log.Fatalln(err) + log.Fatal(err) } fmt.Println() } diff --git a/archive/tar/format.go b/archive/tar/format.go new file mode 100644 index 0000000..1f89d0c --- /dev/null +++ b/archive/tar/format.go @@ -0,0 +1,303 @@ +// Copyright 2016 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package tar + +import "strings" + +// Format represents the tar archive format. +// +// The original tar format was introduced in Unix V7. +// Since then, there have been multiple competing formats attempting to +// standardize or extend the V7 format to overcome its limitations. +// The most common formats are the USTAR, PAX, and GNU formats, +// each with their own advantages and limitations. +// +// The following table captures the capabilities of each format: +// +// | USTAR | PAX | GNU +// ------------------+--------+-----------+---------- +// Name | 256B | unlimited | unlimited +// Linkname | 100B | unlimited | unlimited +// Size | uint33 | unlimited | uint89 +// Mode | uint21 | uint21 | uint57 +// Uid/Gid | uint21 | unlimited | uint57 +// Uname/Gname | 32B | unlimited | 32B +// ModTime | uint33 | unlimited | int89 +// AccessTime | n/a | unlimited | int89 +// ChangeTime | n/a | unlimited | int89 +// Devmajor/Devminor | uint21 | uint21 | uint57 +// ------------------+--------+-----------+---------- +// string encoding | ASCII | UTF-8 | binary +// sub-second times | no | yes | no +// sparse files | no | yes | yes +// +// The table's upper portion shows the Header fields, where each format reports +// the maximum number of bytes allowed for each string field and +// the integer type used to store each numeric field +// (where timestamps are stored as the number of seconds since the Unix epoch). +// +// The table's lower portion shows specialized features of each format, +// such as supported string encodings, support for sub-second timestamps, +// or support for sparse files. +// +// The Writer currently provides no support for sparse files. +type Format int + +// Constants to identify various tar formats. +const ( + // Deliberately hide the meaning of constants from public API. + _ Format = (1 << iota) / 4 // Sequence of 0, 0, 1, 2, 4, 8, etc... + + // FormatUnknown indicates that the format is unknown. + FormatUnknown + + // The format of the original Unix V7 tar tool prior to standardization. + formatV7 + + // FormatUSTAR represents the USTAR header format defined in POSIX.1-1988. + // + // While this format is compatible with most tar readers, + // the format has several limitations making it unsuitable for some usages. + // Most notably, it cannot support sparse files, files larger than 8GiB, + // filenames larger than 256 characters, and non-ASCII filenames. + // + // Reference: + // http://pubs.opengroup.org/onlinepubs/9699919799/utilities/pax.html#tag_20_92_13_06 + FormatUSTAR + + // FormatPAX represents the PAX header format defined in POSIX.1-2001. + // + // PAX extends USTAR by writing a special file with Typeflag TypeXHeader + // preceding the original header. This file contains a set of key-value + // records, which are used to overcome USTAR's shortcomings, in addition to + // providing the ability to have sub-second resolution for timestamps. + // + // Some newer formats add their own extensions to PAX by defining their + // own keys and assigning certain semantic meaning to the associated values. + // For example, sparse file support in PAX is implemented using keys + // defined by the GNU manual (e.g., "GNU.sparse.map"). + // + // Reference: + // http://pubs.opengroup.org/onlinepubs/009695399/utilities/pax.html + FormatPAX + + // FormatGNU represents the GNU header format. + // + // The GNU header format is older than the USTAR and PAX standards and + // is not compatible with them. The GNU format supports + // arbitrary file sizes, filenames of arbitrary encoding and length, + // sparse files, and other features. + // + // It is recommended that PAX be chosen over GNU unless the target + // application can only parse GNU formatted archives. + // + // Reference: + // https://www.gnu.org/software/tar/manual/html_node/Standard.html + FormatGNU + + // Schily's tar format, which is incompatible with USTAR. + // This does not cover STAR extensions to the PAX format; these fall under + // the PAX format. + formatSTAR + + formatMax +) + +func (f Format) has(f2 Format) bool { return f&f2 != 0 } +func (f *Format) mayBe(f2 Format) { *f |= f2 } +func (f *Format) mayOnlyBe(f2 Format) { *f &= f2 } +func (f *Format) mustNotBe(f2 Format) { *f &^= f2 } + +var formatNames = map[Format]string{ + formatV7: "V7", FormatUSTAR: "USTAR", FormatPAX: "PAX", FormatGNU: "GNU", formatSTAR: "STAR", +} + +func (f Format) String() string { + var ss []string + for f2 := Format(1); f2 < formatMax; f2 <<= 1 { + if f.has(f2) { + ss = append(ss, formatNames[f2]) + } + } + switch len(ss) { + case 0: + return "" + case 1: + return ss[0] + default: + return "(" + strings.Join(ss, " | ") + ")" + } +} + +// Magics used to identify various formats. +const ( + magicGNU, versionGNU = "ustar ", " \x00" + magicUSTAR, versionUSTAR = "ustar\x00", "00" + trailerSTAR = "tar\x00" +) + +// Size constants from various tar specifications. +const ( + blockSize = 512 // Size of each block in a tar stream + nameSize = 100 // Max length of the name field in USTAR format + prefixSize = 155 // Max length of the prefix field in USTAR format +) + +// blockPadding computes the number of bytes needed to pad offset up to the +// nearest block edge where 0 <= n < blockSize. +func blockPadding(offset int64) (n int64) { + return -offset & (blockSize - 1) +} + +var zeroBlock block + +type block [blockSize]byte + +// Convert block to any number of formats. +func (b *block) V7() *headerV7 { return (*headerV7)(b) } +func (b *block) GNU() *headerGNU { return (*headerGNU)(b) } +func (b *block) STAR() *headerSTAR { return (*headerSTAR)(b) } +func (b *block) USTAR() *headerUSTAR { return (*headerUSTAR)(b) } +func (b *block) Sparse() sparseArray { return (sparseArray)(b[:]) } + +// GetFormat checks that the block is a valid tar header based on the checksum. +// It then attempts to guess the specific format based on magic values. +// If the checksum fails, then FormatUnknown is returned. +func (b *block) GetFormat() Format { + // Verify checksum. + var p parser + value := p.parseOctal(b.V7().Chksum()) + chksum1, chksum2 := b.ComputeChecksum() + if p.err != nil || (value != chksum1 && value != chksum2) { + return FormatUnknown + } + + // Guess the magic values. + magic := string(b.USTAR().Magic()) + version := string(b.USTAR().Version()) + trailer := string(b.STAR().Trailer()) + switch { + case magic == magicUSTAR && trailer == trailerSTAR: + return formatSTAR + case magic == magicUSTAR: + return FormatUSTAR | FormatPAX + case magic == magicGNU && version == versionGNU: + return FormatGNU + default: + return formatV7 + } +} + +// SetFormat writes the magic values necessary for specified format +// and then updates the checksum accordingly. +func (b *block) SetFormat(format Format) { + // Set the magic values. + switch { + case format.has(formatV7): + // Do nothing. + case format.has(FormatGNU): + copy(b.GNU().Magic(), magicGNU) + copy(b.GNU().Version(), versionGNU) + case format.has(formatSTAR): + copy(b.STAR().Magic(), magicUSTAR) + copy(b.STAR().Version(), versionUSTAR) + copy(b.STAR().Trailer(), trailerSTAR) + case format.has(FormatUSTAR | FormatPAX): + copy(b.USTAR().Magic(), magicUSTAR) + copy(b.USTAR().Version(), versionUSTAR) + default: + panic("invalid format") + } + + // Update checksum. + // This field is special in that it is terminated by a NULL then space. + var f formatter + field := b.V7().Chksum() + chksum, _ := b.ComputeChecksum() // Possible values are 256..128776 + f.formatOctal(field[:7], chksum) // Never fails since 128776 < 262143 + field[7] = ' ' +} + +// ComputeChecksum computes the checksum for the header block. +// POSIX specifies a sum of the unsigned byte values, but the Sun tar used +// signed byte values. +// We compute and return both. +func (b *block) ComputeChecksum() (unsigned, signed int64) { + for i, c := range b { + if 148 <= i && i < 156 { + c = ' ' // Treat the checksum field itself as all spaces. + } + unsigned += int64(c) + signed += int64(int8(c)) + } + return unsigned, signed +} + +// Reset clears the block with all zeros. +func (b *block) Reset() { + *b = block{} +} + +type headerV7 [blockSize]byte + +func (h *headerV7) Name() []byte { return h[000:][:100] } +func (h *headerV7) Mode() []byte { return h[100:][:8] } +func (h *headerV7) UID() []byte { return h[108:][:8] } +func (h *headerV7) GID() []byte { return h[116:][:8] } +func (h *headerV7) Size() []byte { return h[124:][:12] } +func (h *headerV7) ModTime() []byte { return h[136:][:12] } +func (h *headerV7) Chksum() []byte { return h[148:][:8] } +func (h *headerV7) TypeFlag() []byte { return h[156:][:1] } +func (h *headerV7) LinkName() []byte { return h[157:][:100] } + +type headerGNU [blockSize]byte + +func (h *headerGNU) V7() *headerV7 { return (*headerV7)(h) } +func (h *headerGNU) Magic() []byte { return h[257:][:6] } +func (h *headerGNU) Version() []byte { return h[263:][:2] } +func (h *headerGNU) UserName() []byte { return h[265:][:32] } +func (h *headerGNU) GroupName() []byte { return h[297:][:32] } +func (h *headerGNU) DevMajor() []byte { return h[329:][:8] } +func (h *headerGNU) DevMinor() []byte { return h[337:][:8] } +func (h *headerGNU) AccessTime() []byte { return h[345:][:12] } +func (h *headerGNU) ChangeTime() []byte { return h[357:][:12] } +func (h *headerGNU) Sparse() sparseArray { return (sparseArray)(h[386:][:24*4+1]) } +func (h *headerGNU) RealSize() []byte { return h[483:][:12] } + +type headerSTAR [blockSize]byte + +func (h *headerSTAR) V7() *headerV7 { return (*headerV7)(h) } +func (h *headerSTAR) Magic() []byte { return h[257:][:6] } +func (h *headerSTAR) Version() []byte { return h[263:][:2] } +func (h *headerSTAR) UserName() []byte { return h[265:][:32] } +func (h *headerSTAR) GroupName() []byte { return h[297:][:32] } +func (h *headerSTAR) DevMajor() []byte { return h[329:][:8] } +func (h *headerSTAR) DevMinor() []byte { return h[337:][:8] } +func (h *headerSTAR) Prefix() []byte { return h[345:][:131] } +func (h *headerSTAR) AccessTime() []byte { return h[476:][:12] } +func (h *headerSTAR) ChangeTime() []byte { return h[488:][:12] } +func (h *headerSTAR) Trailer() []byte { return h[508:][:4] } + +type headerUSTAR [blockSize]byte + +func (h *headerUSTAR) V7() *headerV7 { return (*headerV7)(h) } +func (h *headerUSTAR) Magic() []byte { return h[257:][:6] } +func (h *headerUSTAR) Version() []byte { return h[263:][:2] } +func (h *headerUSTAR) UserName() []byte { return h[265:][:32] } +func (h *headerUSTAR) GroupName() []byte { return h[297:][:32] } +func (h *headerUSTAR) DevMajor() []byte { return h[329:][:8] } +func (h *headerUSTAR) DevMinor() []byte { return h[337:][:8] } +func (h *headerUSTAR) Prefix() []byte { return h[345:][:155] } + +type sparseArray []byte + +func (s sparseArray) Entry(i int) sparseElem { return (sparseElem)(s[i*24:]) } +func (s sparseArray) IsExtended() []byte { return s[24*s.MaxEntries():][:1] } +func (s sparseArray) MaxEntries() int { return len(s) / 24 } + +type sparseElem []byte + +func (s sparseElem) Offset() []byte { return s[00:][:12] } +func (s sparseElem) Length() []byte { return s[12:][:12] } diff --git a/archive/tar/reader.go b/archive/tar/reader.go index adf3212..3943718 100644 --- a/archive/tar/reader.go +++ b/archive/tar/reader.go @@ -4,392 +4,293 @@ package tar -// TODO(dsymonds): -// - pax extensions - import ( "bytes" - "errors" "io" "io/ioutil" - "math" - "os" "strconv" "strings" "time" ) -var ( - ErrHeader = errors.New("archive/tar: invalid tar header") -) - -const maxNanoSecondIntSize = 9 - -// A Reader provides sequential access to the contents of a tar archive. -// A tar archive consists of a sequence of files. -// The Next method advances to the next file in the archive (including the first), -// and then it can be treated as an io.Reader to access the file's data. +// Reader provides sequential access to the contents of a tar archive. +// Reader.Next advances to the next file in the archive (including the first), +// and then Reader can be treated as an io.Reader to access the file's data. type Reader struct { - r io.Reader - err error - pad int64 // amount of padding (ignored) after current file entry - curr numBytesReader // reader for current file entry - hdrBuff [blockSize]byte // buffer to use in readHeader + r io.Reader + pad int64 // Amount of padding (ignored) after current file entry + curr fileReader // Reader for current file entry + blk block // Buffer to use as temporary local storage - RawAccounting bool // Whether to enable the access needed to reassemble the tar from raw bytes. Some performance/memory hit for this. - rawBytes *bytes.Buffer // last raw bits + // err is a persistent error. + // It is only the responsibility of every exported method of Reader to + // ensure that this error is sticky. + err error } -type parser struct { - err error // Last error seen -} - -// RawBytes accesses the raw bytes of the archive, apart from the file payload itself. -// This includes the header and padding. -// -// This call resets the current rawbytes buffer -// -// Only when RawAccounting is enabled, otherwise this returns nil -func (tr *Reader) RawBytes() []byte { - if !tr.RawAccounting { - return nil - } - if tr.rawBytes == nil { - tr.rawBytes = bytes.NewBuffer(nil) - } - // if we've read them, then flush them. - defer tr.rawBytes.Reset() - return tr.rawBytes.Bytes() -} - -// A numBytesReader is an io.Reader with a numBytes method, returning the number -// of bytes remaining in the underlying encoded data. -type numBytesReader interface { +type fileReader interface { io.Reader - numBytes() int64 + fileState + + WriteTo(io.Writer) (int64, error) } -// A regFileReader is a numBytesReader for reading file data from a tar archive. -type regFileReader struct { - r io.Reader // underlying reader - nb int64 // number of unread bytes for current file entry -} - -// A sparseFileReader is a numBytesReader for reading sparse file data from a -// tar archive. -type sparseFileReader struct { - rfr numBytesReader // Reads the sparse-encoded file data - sp []sparseEntry // The sparse map for the file - pos int64 // Keeps track of file position - total int64 // Total size of the file -} - -// A sparseEntry holds a single entry in a sparse file's sparse map. -// -// Sparse files are represented using a series of sparseEntrys. -// Despite the name, a sparseEntry represents an actual data fragment that -// references data found in the underlying archive stream. All regions not -// covered by a sparseEntry are logically filled with zeros. -// -// For example, if the underlying raw file contains the 10-byte data: -// var compactData = "abcdefgh" -// -// And the sparse map has the following entries: -// var sp = []sparseEntry{ -// {offset: 2, numBytes: 5} // Data fragment for [2..7] -// {offset: 18, numBytes: 3} // Data fragment for [18..21] -// } -// -// Then the content of the resulting sparse file with a "real" size of 25 is: -// var sparseData = "\x00"*2 + "abcde" + "\x00"*11 + "fgh" + "\x00"*4 -type sparseEntry struct { - offset int64 // Starting position of the fragment - numBytes int64 // Length of the fragment -} - -// Keywords for GNU sparse files in a PAX extended header -const ( - paxGNUSparseNumBlocks = "GNU.sparse.numblocks" - paxGNUSparseOffset = "GNU.sparse.offset" - paxGNUSparseNumBytes = "GNU.sparse.numbytes" - paxGNUSparseMap = "GNU.sparse.map" - paxGNUSparseName = "GNU.sparse.name" - paxGNUSparseMajor = "GNU.sparse.major" - paxGNUSparseMinor = "GNU.sparse.minor" - paxGNUSparseSize = "GNU.sparse.size" - paxGNUSparseRealSize = "GNU.sparse.realsize" -) - -// Keywords for old GNU sparse headers -const ( - oldGNUSparseMainHeaderOffset = 386 - oldGNUSparseMainHeaderIsExtendedOffset = 482 - oldGNUSparseMainHeaderNumEntries = 4 - oldGNUSparseExtendedHeaderIsExtendedOffset = 504 - oldGNUSparseExtendedHeaderNumEntries = 21 - oldGNUSparseOffsetSize = 12 - oldGNUSparseNumBytesSize = 12 -) - // NewReader creates a new Reader reading from r. -func NewReader(r io.Reader) *Reader { return &Reader{r: r} } +func NewReader(r io.Reader) *Reader { + return &Reader{r: r, curr: ®FileReader{r, 0}} +} // Next advances to the next entry in the tar archive. +// The Header.Size determines how many bytes can be read for the next file. +// Any remaining data in the current file is automatically discarded. // // io.EOF is returned at the end of the input. func (tr *Reader) Next() (*Header, error) { - if tr.RawAccounting { - if tr.rawBytes == nil { - tr.rawBytes = bytes.NewBuffer(nil) - } else { - tr.rawBytes.Reset() - } - } - if tr.err != nil { return nil, tr.err } + hdr, err := tr.next() + tr.err = err + return hdr, err +} - var hdr *Header - var extHdrs map[string]string +func (tr *Reader) next() (*Header, error) { + var paxHdrs map[string]string + var gnuLongName, gnuLongLink string // Externally, Next iterates through the tar archive as if it is a series of // files. Internally, the tar format often uses fake "files" to add meta // data that describes the next file. These meta data "files" should not // normally be visible to the outside. As such, this loop iterates through // one or more "header files" until it finds a "normal file". -loop: + format := FormatUSTAR | FormatPAX | FormatGNU for { - tr.err = tr.skipUnread() - if tr.err != nil { - return nil, tr.err + // Discard the remainder of the file and any padding. + if err := discard(tr.r, tr.curr.PhysicalRemaining()); err != nil { + return nil, err } + if _, err := tryReadFull(tr.r, tr.blk[:tr.pad]); err != nil { + return nil, err + } + tr.pad = 0 - hdr = tr.readHeader() - if tr.err != nil { - return nil, tr.err + hdr, rawHdr, err := tr.readHeader() + if err != nil { + return nil, err } + if err := tr.handleRegularFile(hdr); err != nil { + return nil, err + } + format.mayOnlyBe(hdr.Format) + // Check for PAX/GNU special headers and files. switch hdr.Typeflag { - case TypeXHeader: - extHdrs, tr.err = parsePAX(tr) - if tr.err != nil { - return nil, tr.err + case TypeXHeader, TypeXGlobalHeader: + format.mayOnlyBe(FormatPAX) + paxHdrs, err = parsePAX(tr) + if err != nil { + return nil, err } - continue loop // This is a meta header affecting the next header + if hdr.Typeflag == TypeXGlobalHeader { + mergePAX(hdr, paxHdrs) + return &Header{ + Name: hdr.Name, + Typeflag: hdr.Typeflag, + Xattrs: hdr.Xattrs, + PAXRecords: hdr.PAXRecords, + Format: format, + }, nil + } + continue // This is a meta header affecting the next header case TypeGNULongName, TypeGNULongLink: - var realname []byte - realname, tr.err = ioutil.ReadAll(tr) - if tr.err != nil { - return nil, tr.err + format.mayOnlyBe(FormatGNU) + realname, err := ioutil.ReadAll(tr) + if err != nil { + return nil, err } - if tr.RawAccounting { - if _, tr.err = tr.rawBytes.Write(realname); tr.err != nil { - return nil, tr.err - } - } - - // Convert GNU extensions to use PAX headers. - if extHdrs == nil { - extHdrs = make(map[string]string) - } var p parser switch hdr.Typeflag { case TypeGNULongName: - extHdrs[paxPath] = p.parseString(realname) + gnuLongName = p.parseString(realname) case TypeGNULongLink: - extHdrs[paxLinkpath] = p.parseString(realname) + gnuLongLink = p.parseString(realname) } - if p.err != nil { - tr.err = p.err - return nil, tr.err - } - continue loop // This is a meta header affecting the next header + continue // This is a meta header affecting the next header default: - mergePAX(hdr, extHdrs) + // The old GNU sparse format is handled here since it is technically + // just a regular file with additional attributes. - // Check for a PAX format sparse file - sp, err := tr.checkForGNUSparsePAXHeaders(hdr, extHdrs) - if err != nil { - tr.err = err + if err := mergePAX(hdr, paxHdrs); err != nil { return nil, err } - if sp != nil { - // Current file is a PAX format GNU sparse file. - // Set the current file reader to a sparse file reader. - tr.curr, tr.err = newSparseFileReader(tr.curr, sp, hdr.Size) - if tr.err != nil { - return nil, tr.err + if gnuLongName != "" { + hdr.Name = gnuLongName + } + if gnuLongLink != "" { + hdr.Linkname = gnuLongLink + } + if hdr.Typeflag == TypeRegA { + if strings.HasSuffix(hdr.Name, "/") { + hdr.Typeflag = TypeDir // Legacy archives use trailing slash for directories + } else { + hdr.Typeflag = TypeReg } } - break loop // This is a file, so stop + + // The extended headers may have updated the size. + // Thus, setup the regFileReader again after merging PAX headers. + if err := tr.handleRegularFile(hdr); err != nil { + return nil, err + } + + // Sparse formats rely on being able to read from the logical data + // section; there must be a preceding call to handleRegularFile. + if err := tr.handleSparseFile(hdr, rawHdr); err != nil { + return nil, err + } + + // Set the final guess at the format. + if format.has(FormatUSTAR) && format.has(FormatPAX) { + format.mayOnlyBe(FormatUSTAR) + } + hdr.Format = format + return hdr, nil // This is a file, so stop } } - return hdr, nil } -// checkForGNUSparsePAXHeaders checks the PAX headers for GNU sparse headers. If they are found, then -// this function reads the sparse map and returns it. Unknown sparse formats are ignored, causing the file to -// be treated as a regular file. -func (tr *Reader) checkForGNUSparsePAXHeaders(hdr *Header, headers map[string]string) ([]sparseEntry, error) { - var sparseFormat string - - // Check for sparse format indicators - major, majorOk := headers[paxGNUSparseMajor] - minor, minorOk := headers[paxGNUSparseMinor] - sparseName, sparseNameOk := headers[paxGNUSparseName] - _, sparseMapOk := headers[paxGNUSparseMap] - sparseSize, sparseSizeOk := headers[paxGNUSparseSize] - sparseRealSize, sparseRealSizeOk := headers[paxGNUSparseRealSize] - - // Identify which, if any, sparse format applies from which PAX headers are set - if majorOk && minorOk { - sparseFormat = major + "." + minor - } else if sparseNameOk && sparseMapOk { - sparseFormat = "0.1" - } else if sparseSizeOk { - sparseFormat = "0.0" - } else { - // Not a PAX format GNU sparse file. - return nil, nil +// handleRegularFile sets up the current file reader and padding such that it +// can only read the following logical data section. It will properly handle +// special headers that contain no data section. +func (tr *Reader) handleRegularFile(hdr *Header) error { + nb := hdr.Size + if isHeaderOnlyType(hdr.Typeflag) { + nb = 0 + } + if nb < 0 { + return ErrHeader } - // Check for unknown sparse format - if sparseFormat != "0.0" && sparseFormat != "0.1" && sparseFormat != "1.0" { - return nil, nil - } + tr.pad = blockPadding(nb) + tr.curr = ®FileReader{r: tr.r, nb: nb} + return nil +} - // Update hdr from GNU sparse PAX headers - if sparseNameOk { - hdr.Name = sparseName - } - if sparseSizeOk { - realSize, err := strconv.ParseInt(sparseSize, 10, 0) - if err != nil { - return nil, ErrHeader - } - hdr.Size = realSize - } else if sparseRealSizeOk { - realSize, err := strconv.ParseInt(sparseRealSize, 10, 0) - if err != nil { - return nil, ErrHeader - } - hdr.Size = realSize - } - - // Set up the sparse map, according to the particular sparse format in use - var sp []sparseEntry +// handleSparseFile checks if the current file is a sparse format of any type +// and sets the curr reader appropriately. +func (tr *Reader) handleSparseFile(hdr *Header, rawHdr *block) error { + var spd sparseDatas var err error - switch sparseFormat { - case "0.0", "0.1": - sp, err = readGNUSparseMap0x1(headers) - case "1.0": - sp, err = readGNUSparseMap1x0(tr.curr) + if hdr.Typeflag == TypeGNUSparse { + spd, err = tr.readOldGNUSparseMap(hdr, rawHdr) + } else { + spd, err = tr.readGNUSparsePAXHeaders(hdr) } - return sp, err + + // If sp is non-nil, then this is a sparse file. + // Note that it is possible for len(sp) == 0. + if err == nil && spd != nil { + if isHeaderOnlyType(hdr.Typeflag) || !validateSparseEntries(spd, hdr.Size) { + return ErrHeader + } + sph := invertSparseEntries(spd, hdr.Size) + tr.curr = &sparseFileReader{tr.curr, sph, 0} + } + return err } -// mergePAX merges well known headers according to PAX standard. -// In general headers with the same name as those found -// in the header struct overwrite those found in the header -// struct with higher precision or longer values. Esp. useful -// for name and linkname fields. -func mergePAX(hdr *Header, headers map[string]string) error { - for k, v := range headers { +// readGNUSparsePAXHeaders checks the PAX headers for GNU sparse headers. +// If they are found, then this function reads the sparse map and returns it. +// This assumes that 0.0 headers have already been converted to 0.1 headers +// by the PAX header parsing logic. +func (tr *Reader) readGNUSparsePAXHeaders(hdr *Header) (sparseDatas, error) { + // Identify the version of GNU headers. + var is1x0 bool + major, minor := hdr.PAXRecords[paxGNUSparseMajor], hdr.PAXRecords[paxGNUSparseMinor] + switch { + case major == "0" && (minor == "0" || minor == "1"): + is1x0 = false + case major == "1" && minor == "0": + is1x0 = true + case major != "" || minor != "": + return nil, nil // Unknown GNU sparse PAX version + case hdr.PAXRecords[paxGNUSparseMap] != "": + is1x0 = false // 0.0 and 0.1 did not have explicit version records, so guess + default: + return nil, nil // Not a PAX format GNU sparse file. + } + hdr.Format.mayOnlyBe(FormatPAX) + + // Update hdr from GNU sparse PAX headers. + if name := hdr.PAXRecords[paxGNUSparseName]; name != "" { + hdr.Name = name + } + size := hdr.PAXRecords[paxGNUSparseSize] + if size == "" { + size = hdr.PAXRecords[paxGNUSparseRealSize] + } + if size != "" { + n, err := strconv.ParseInt(size, 10, 64) + if err != nil { + return nil, ErrHeader + } + hdr.Size = n + } + + // Read the sparse map according to the appropriate format. + if is1x0 { + return readGNUSparseMap1x0(tr.curr) + } + return readGNUSparseMap0x1(hdr.PAXRecords) +} + +// mergePAX merges paxHdrs into hdr for all relevant fields of Header. +func mergePAX(hdr *Header, paxHdrs map[string]string) (err error) { + for k, v := range paxHdrs { + if v == "" { + continue // Keep the original USTAR value + } + var id64 int64 switch k { case paxPath: hdr.Name = v case paxLinkpath: hdr.Linkname = v - case paxGname: - hdr.Gname = v case paxUname: hdr.Uname = v + case paxGname: + hdr.Gname = v case paxUid: - uid, err := strconv.ParseInt(v, 10, 0) - if err != nil { - return err - } - hdr.Uid = int(uid) + id64, err = strconv.ParseInt(v, 10, 64) + hdr.Uid = int(id64) // Integer overflow possible case paxGid: - gid, err := strconv.ParseInt(v, 10, 0) - if err != nil { - return err - } - hdr.Gid = int(gid) + id64, err = strconv.ParseInt(v, 10, 64) + hdr.Gid = int(id64) // Integer overflow possible case paxAtime: - t, err := parsePAXTime(v) - if err != nil { - return err - } - hdr.AccessTime = t + hdr.AccessTime, err = parsePAXTime(v) case paxMtime: - t, err := parsePAXTime(v) - if err != nil { - return err - } - hdr.ModTime = t + hdr.ModTime, err = parsePAXTime(v) case paxCtime: - t, err := parsePAXTime(v) - if err != nil { - return err - } - hdr.ChangeTime = t + hdr.ChangeTime, err = parsePAXTime(v) case paxSize: - size, err := strconv.ParseInt(v, 10, 0) - if err != nil { - return err - } - hdr.Size = int64(size) + hdr.Size, err = strconv.ParseInt(v, 10, 64) default: - if strings.HasPrefix(k, paxXattr) { + if strings.HasPrefix(k, paxSchilyXattr) { if hdr.Xattrs == nil { hdr.Xattrs = make(map[string]string) } - hdr.Xattrs[k[len(paxXattr):]] = v + hdr.Xattrs[k[len(paxSchilyXattr):]] = v } } + if err != nil { + return ErrHeader + } } + hdr.PAXRecords = paxHdrs return nil } -// parsePAXTime takes a string of the form %d.%d as described in -// the PAX specification. -func parsePAXTime(t string) (time.Time, error) { - buf := []byte(t) - pos := bytes.IndexByte(buf, '.') - var seconds, nanoseconds int64 - var err error - if pos == -1 { - seconds, err = strconv.ParseInt(t, 10, 0) - if err != nil { - return time.Time{}, err - } - } else { - seconds, err = strconv.ParseInt(string(buf[:pos]), 10, 0) - if err != nil { - return time.Time{}, err - } - nano_buf := string(buf[pos+1:]) - // Pad as needed before converting to a decimal. - // For example .030 -> .030000000 -> 30000000 nanoseconds - if len(nano_buf) < maxNanoSecondIntSize { - // Right pad - nano_buf += strings.Repeat("0", maxNanoSecondIntSize-len(nano_buf)) - } else if len(nano_buf) > maxNanoSecondIntSize { - // Right truncate - nano_buf = nano_buf[:maxNanoSecondIntSize] - } - nanoseconds, err = strconv.ParseInt(string(nano_buf), 10, 0) - if err != nil { - return time.Time{}, err - } - } - ts := time.Unix(seconds, nanoseconds) - return ts, nil -} - // parsePAX parses PAX headers. // If an extended header (type 'x') is invalid, ErrHeader is returned func parsePAX(r io.Reader) (map[string]string, error) { @@ -397,21 +298,14 @@ func parsePAX(r io.Reader) (map[string]string, error) { if err != nil { return nil, err } - // leaving this function for io.Reader makes it more testable - if tr, ok := r.(*Reader); ok && tr.RawAccounting { - if _, err = tr.rawBytes.Write(buf); err != nil { - return nil, err - } - } sbuf := string(buf) // For GNU PAX sparse format 0.0 support. - // This function transforms the sparse format 0.0 headers into sparse format 0.1 headers. - var sparseMap bytes.Buffer + // This function transforms the sparse format 0.0 headers into format 0.1 + // headers since 0.0 headers were not PAX compliant. + var sparseMap []string - headers := make(map[string]string) - // Each record is constructed as - // "%d %s=%s\n", length, keyword, value + paxHdrs := make(map[string]string) for len(sbuf) > 0 { key, value, residual, err := parsePAXRecord(sbuf) if err != nil { @@ -419,422 +313,222 @@ func parsePAX(r io.Reader) (map[string]string, error) { } sbuf = residual - keyStr := string(key) - if keyStr == paxGNUSparseOffset || keyStr == paxGNUSparseNumBytes { - // GNU sparse format 0.0 special key. Write to sparseMap instead of using the headers map. - sparseMap.WriteString(value) - sparseMap.Write([]byte{','}) - } else { - // Normal key. Set the value in the headers map. - headers[keyStr] = string(value) - } - } - if sparseMap.Len() != 0 { - // Add sparse info to headers, chopping off the extra comma - sparseMap.Truncate(sparseMap.Len() - 1) - headers[paxGNUSparseMap] = sparseMap.String() - } - return headers, nil -} - -// parsePAXRecord parses the input PAX record string into a key-value pair. -// If parsing is successful, it will slice off the currently read record and -// return the remainder as r. -// -// A PAX record is of the following form: -// "%d %s=%s\n" % (size, key, value) -func parsePAXRecord(s string) (k, v, r string, err error) { - // The size field ends at the first space. - sp := strings.IndexByte(s, ' ') - if sp == -1 { - return "", "", s, ErrHeader - } - - // Parse the first token as a decimal integer. - n, perr := strconv.ParseInt(s[:sp], 10, 0) // Intentionally parse as native int - if perr != nil || n < 5 || int64(len(s)) < n { - return "", "", s, ErrHeader - } - - // Extract everything between the space and the final newline. - rec, nl, rem := s[sp+1:n-1], s[n-1:n], s[n:] - if nl != "\n" { - return "", "", s, ErrHeader - } - - // The first equals separates the key from the value. - eq := strings.IndexByte(rec, '=') - if eq == -1 { - return "", "", s, ErrHeader - } - return rec[:eq], rec[eq+1:], rem, nil -} - -// parseString parses bytes as a NUL-terminated C-style string. -// If a NUL byte is not found then the whole slice is returned as a string. -func (*parser) parseString(b []byte) string { - n := 0 - for n < len(b) && b[n] != 0 { - n++ - } - return string(b[0:n]) -} - -// parseNumeric parses the input as being encoded in either base-256 or octal. -// This function may return negative numbers. -// If parsing fails or an integer overflow occurs, err will be set. -func (p *parser) parseNumeric(b []byte) int64 { - // Check for base-256 (binary) format first. - // If the first bit is set, then all following bits constitute a two's - // complement encoded number in big-endian byte order. - if len(b) > 0 && b[0]&0x80 != 0 { - // Handling negative numbers relies on the following identity: - // -a-1 == ^a - // - // If the number is negative, we use an inversion mask to invert the - // data bytes and treat the value as an unsigned number. - var inv byte // 0x00 if positive or zero, 0xff if negative - if b[0]&0x40 != 0 { - inv = 0xff - } - - var x uint64 - for i, c := range b { - c ^= inv // Inverts c only if inv is 0xff, otherwise does nothing - if i == 0 { - c &= 0x7f // Ignore signal bit in first byte + switch key { + case paxGNUSparseOffset, paxGNUSparseNumBytes: + // Validate sparse header order and value. + if (len(sparseMap)%2 == 0 && key != paxGNUSparseOffset) || + (len(sparseMap)%2 == 1 && key != paxGNUSparseNumBytes) || + strings.Contains(value, ",") { + return nil, ErrHeader } - if (x >> 56) > 0 { - p.err = ErrHeader // Integer overflow - return 0 - } - x = x<<8 | uint64(c) - } - if (x >> 63) > 0 { - p.err = ErrHeader // Integer overflow - return 0 - } - if inv == 0xff { - return ^int64(x) - } - return int64(x) - } - - // Normal case is base-8 (octal) format. - return p.parseOctal(b) -} - -func (p *parser) parseOctal(b []byte) int64 { - // Because unused fields are filled with NULs, we need - // to skip leading NULs. Fields may also be padded with - // spaces or NULs. - // So we remove leading and trailing NULs and spaces to - // be sure. - b = bytes.Trim(b, " \x00") - - if len(b) == 0 { - return 0 - } - x, perr := strconv.ParseUint(p.parseString(b), 8, 64) - if perr != nil { - p.err = ErrHeader - } - return int64(x) -} - -// skipUnread skips any unread bytes in the existing file entry, as well as any -// alignment padding. It returns io.ErrUnexpectedEOF if any io.EOF is -// encountered in the data portion; it is okay to hit io.EOF in the padding. -// -// Note that this function still works properly even when sparse files are being -// used since numBytes returns the bytes remaining in the underlying io.Reader. -func (tr *Reader) skipUnread() error { - dataSkip := tr.numBytes() // Number of data bytes to skip - totalSkip := dataSkip + tr.pad // Total number of bytes to skip - tr.curr, tr.pad = nil, 0 - if tr.RawAccounting { - _, tr.err = io.CopyN(tr.rawBytes, tr.r, totalSkip) - return tr.err - } - // If possible, Seek to the last byte before the end of the data section. - // Do this because Seek is often lazy about reporting errors; this will mask - // the fact that the tar stream may be truncated. We can rely on the - // io.CopyN done shortly afterwards to trigger any IO errors. - var seekSkipped int64 // Number of bytes skipped via Seek - if sr, ok := tr.r.(io.Seeker); ok && dataSkip > 1 { - // Not all io.Seeker can actually Seek. For example, os.Stdin implements - // io.Seeker, but calling Seek always returns an error and performs - // no action. Thus, we try an innocent seek to the current position - // to see if Seek is really supported. - pos1, err := sr.Seek(0, os.SEEK_CUR) - if err == nil { - // Seek seems supported, so perform the real Seek. - pos2, err := sr.Seek(dataSkip-1, os.SEEK_CUR) - if err != nil { - tr.err = err - return tr.err - } - seekSkipped = pos2 - pos1 + sparseMap = append(sparseMap, value) + default: + paxHdrs[key] = value } } - - var copySkipped int64 // Number of bytes skipped via CopyN - copySkipped, tr.err = io.CopyN(ioutil.Discard, tr.r, totalSkip-seekSkipped) - if tr.err == io.EOF && seekSkipped+copySkipped < dataSkip { - tr.err = io.ErrUnexpectedEOF + if len(sparseMap) > 0 { + paxHdrs[paxGNUSparseMap] = strings.Join(sparseMap, ",") } - return tr.err -} - -func (tr *Reader) verifyChecksum(header []byte) bool { - if tr.err != nil { - return false - } - - var p parser - given := p.parseOctal(header[148:156]) - unsigned, signed := checksum(header) - return p.err == nil && (given == unsigned || given == signed) + return paxHdrs, nil } // readHeader reads the next block header and assumes that the underlying reader -// is already aligned to a block boundary. +// is already aligned to a block boundary. It returns the raw block of the +// header in case further processing is required. // // The err will be set to io.EOF only when one of the following occurs: // * Exactly 0 bytes are read and EOF is hit. // * Exactly 1 block of zeros is read and EOF is hit. // * At least 2 blocks of zeros are read. -func (tr *Reader) readHeader() *Header { - header := tr.hdrBuff[:] - copy(header, zeroBlock) - - if n, err := io.ReadFull(tr.r, header); err != nil { - tr.err = err - // because it could read some of the block, but reach EOF first - if tr.err == io.EOF && tr.RawAccounting { - if _, err := tr.rawBytes.Write(header[:n]); err != nil { - tr.err = err - } - } - return nil // io.EOF is okay here - } - if tr.RawAccounting { - if _, tr.err = tr.rawBytes.Write(header); tr.err != nil { - return nil - } - } - +func (tr *Reader) readHeader() (*Header, *block, error) { // Two blocks of zero bytes marks the end of the archive. - if bytes.Equal(header, zeroBlock[0:blockSize]) { - if n, err := io.ReadFull(tr.r, header); err != nil { - tr.err = err - // because it could read some of the block, but reach EOF first - if tr.err == io.EOF && tr.RawAccounting { - if _, err := tr.rawBytes.Write(header[:n]); err != nil { - tr.err = err - } - } - return nil // io.EOF is okay here + if _, err := io.ReadFull(tr.r, tr.blk[:]); err != nil { + return nil, nil, err // EOF is okay here; exactly 0 bytes read + } + if bytes.Equal(tr.blk[:], zeroBlock[:]) { + if _, err := io.ReadFull(tr.r, tr.blk[:]); err != nil { + return nil, nil, err // EOF is okay here; exactly 1 block of zeros read } - if tr.RawAccounting { - if _, tr.err = tr.rawBytes.Write(header); tr.err != nil { - return nil - } + if bytes.Equal(tr.blk[:], zeroBlock[:]) { + return nil, nil, io.EOF // normal EOF; exactly 2 block of zeros read } - if bytes.Equal(header, zeroBlock[0:blockSize]) { - tr.err = io.EOF - } else { - tr.err = ErrHeader // zero block and then non-zero block - } - return nil + return nil, nil, ErrHeader // Zero block and then non-zero block } - if !tr.verifyChecksum(header) { - tr.err = ErrHeader - return nil + // Verify the header matches a known format. + format := tr.blk.GetFormat() + if format == FormatUnknown { + return nil, nil, ErrHeader } - // Unpack var p parser hdr := new(Header) - s := slicer(header) - hdr.Name = p.parseString(s.next(100)) - hdr.Mode = p.parseNumeric(s.next(8)) - hdr.Uid = int(p.parseNumeric(s.next(8))) - hdr.Gid = int(p.parseNumeric(s.next(8))) - hdr.Size = p.parseNumeric(s.next(12)) - hdr.ModTime = time.Unix(p.parseNumeric(s.next(12)), 0) - s.next(8) // chksum - hdr.Typeflag = s.next(1)[0] - hdr.Linkname = p.parseString(s.next(100)) + // Unpack the V7 header. + v7 := tr.blk.V7() + hdr.Typeflag = v7.TypeFlag()[0] + hdr.Name = p.parseString(v7.Name()) + hdr.Linkname = p.parseString(v7.LinkName()) + hdr.Size = p.parseNumeric(v7.Size()) + hdr.Mode = p.parseNumeric(v7.Mode()) + hdr.Uid = int(p.parseNumeric(v7.UID())) + hdr.Gid = int(p.parseNumeric(v7.GID())) + hdr.ModTime = time.Unix(p.parseNumeric(v7.ModTime()), 0) - // The remainder of the header depends on the value of magic. - // The original (v7) version of tar had no explicit magic field, - // so its magic bytes, like the rest of the block, are NULs. - magic := string(s.next(8)) // contains version field as well. - var format string - switch { - case magic[:6] == "ustar\x00": // POSIX tar (1003.1-1988) - if string(header[508:512]) == "tar\x00" { - format = "star" - } else { - format = "posix" - } - case magic == "ustar \x00": // old GNU tar - format = "gnu" - } + // Unpack format specific fields. + if format > formatV7 { + ustar := tr.blk.USTAR() + hdr.Uname = p.parseString(ustar.UserName()) + hdr.Gname = p.parseString(ustar.GroupName()) + hdr.Devmajor = p.parseNumeric(ustar.DevMajor()) + hdr.Devminor = p.parseNumeric(ustar.DevMinor()) - switch format { - case "posix", "gnu", "star": - hdr.Uname = p.parseString(s.next(32)) - hdr.Gname = p.parseString(s.next(32)) - devmajor := s.next(8) - devminor := s.next(8) - if hdr.Typeflag == TypeChar || hdr.Typeflag == TypeBlock { - hdr.Devmajor = p.parseNumeric(devmajor) - hdr.Devminor = p.parseNumeric(devminor) - } var prefix string - switch format { - case "posix", "gnu": - prefix = p.parseString(s.next(155)) - case "star": - prefix = p.parseString(s.next(131)) - hdr.AccessTime = time.Unix(p.parseNumeric(s.next(12)), 0) - hdr.ChangeTime = time.Unix(p.parseNumeric(s.next(12)), 0) + switch { + case format.has(FormatUSTAR | FormatPAX): + hdr.Format = format + ustar := tr.blk.USTAR() + prefix = p.parseString(ustar.Prefix()) + + // For Format detection, check if block is properly formatted since + // the parser is more liberal than what USTAR actually permits. + notASCII := func(r rune) bool { return r >= 0x80 } + if bytes.IndexFunc(tr.blk[:], notASCII) >= 0 { + hdr.Format = FormatUnknown // Non-ASCII characters in block. + } + nul := func(b []byte) bool { return int(b[len(b)-1]) == 0 } + if !(nul(v7.Size()) && nul(v7.Mode()) && nul(v7.UID()) && nul(v7.GID()) && + nul(v7.ModTime()) && nul(ustar.DevMajor()) && nul(ustar.DevMinor())) { + hdr.Format = FormatUnknown // Numeric fields must end in NUL + } + case format.has(formatSTAR): + star := tr.blk.STAR() + prefix = p.parseString(star.Prefix()) + hdr.AccessTime = time.Unix(p.parseNumeric(star.AccessTime()), 0) + hdr.ChangeTime = time.Unix(p.parseNumeric(star.ChangeTime()), 0) + case format.has(FormatGNU): + hdr.Format = format + var p2 parser + gnu := tr.blk.GNU() + if b := gnu.AccessTime(); b[0] != 0 { + hdr.AccessTime = time.Unix(p2.parseNumeric(b), 0) + } + if b := gnu.ChangeTime(); b[0] != 0 { + hdr.ChangeTime = time.Unix(p2.parseNumeric(b), 0) + } + + // Prior to Go1.8, the Writer had a bug where it would output + // an invalid tar file in certain rare situations because the logic + // incorrectly believed that the old GNU format had a prefix field. + // This is wrong and leads to an output file that mangles the + // atime and ctime fields, which are often left unused. + // + // In order to continue reading tar files created by former, buggy + // versions of Go, we skeptically parse the atime and ctime fields. + // If we are unable to parse them and the prefix field looks like + // an ASCII string, then we fallback on the pre-Go1.8 behavior + // of treating these fields as the USTAR prefix field. + // + // Note that this will not use the fallback logic for all possible + // files generated by a pre-Go1.8 toolchain. If the generated file + // happened to have a prefix field that parses as valid + // atime and ctime fields (e.g., when they are valid octal strings), + // then it is impossible to distinguish between an valid GNU file + // and an invalid pre-Go1.8 file. + // + // See https://golang.org/issues/12594 + // See https://golang.org/issues/21005 + if p2.err != nil { + hdr.AccessTime, hdr.ChangeTime = time.Time{}, time.Time{} + ustar := tr.blk.USTAR() + if s := p.parseString(ustar.Prefix()); isASCII(s) { + prefix = s + } + hdr.Format = FormatUnknown // Buggy file is not GNU + } } if len(prefix) > 0 { hdr.Name = prefix + "/" + hdr.Name } } - - if p.err != nil { - tr.err = p.err - return nil - } - - nb := hdr.Size - if isHeaderOnlyType(hdr.Typeflag) { - nb = 0 - } - if nb < 0 { - tr.err = ErrHeader - return nil - } - - // Set the current file reader. - tr.pad = -nb & (blockSize - 1) // blockSize is a power of two - tr.curr = ®FileReader{r: tr.r, nb: nb} - - // Check for old GNU sparse format entry. - if hdr.Typeflag == TypeGNUSparse { - // Get the real size of the file. - hdr.Size = p.parseNumeric(header[483:495]) - if p.err != nil { - tr.err = p.err - return nil - } - - // Read the sparse map. - sp := tr.readOldGNUSparseMap(header) - if tr.err != nil { - return nil - } - - // Current file is a GNU sparse file. Update the current file reader. - tr.curr, tr.err = newSparseFileReader(tr.curr, sp, hdr.Size) - if tr.err != nil { - return nil - } - } - - return hdr + return hdr, &tr.blk, p.err } -// readOldGNUSparseMap reads the sparse map as stored in the old GNU sparse format. -// The sparse map is stored in the tar header if it's small enough. If it's larger than four entries, -// then one or more extension headers are used to store the rest of the sparse map. -func (tr *Reader) readOldGNUSparseMap(header []byte) []sparseEntry { +// readOldGNUSparseMap reads the sparse map from the old GNU sparse format. +// The sparse map is stored in the tar header if it's small enough. +// If it's larger than four entries, then one or more extension headers are used +// to store the rest of the sparse map. +// +// The Header.Size does not reflect the size of any extended headers used. +// Thus, this function will read from the raw io.Reader to fetch extra headers. +// This method mutates blk in the process. +func (tr *Reader) readOldGNUSparseMap(hdr *Header, blk *block) (sparseDatas, error) { + // Make sure that the input format is GNU. + // Unfortunately, the STAR format also has a sparse header format that uses + // the same type flag but has a completely different layout. + if blk.GetFormat() != FormatGNU { + return nil, ErrHeader + } + hdr.Format.mayOnlyBe(FormatGNU) + var p parser - isExtended := header[oldGNUSparseMainHeaderIsExtendedOffset] != 0 - spCap := oldGNUSparseMainHeaderNumEntries - if isExtended { - spCap += oldGNUSparseExtendedHeaderNumEntries + hdr.Size = p.parseNumeric(blk.GNU().RealSize()) + if p.err != nil { + return nil, p.err } - sp := make([]sparseEntry, 0, spCap) - s := slicer(header[oldGNUSparseMainHeaderOffset:]) - - // Read the four entries from the main tar header - for i := 0; i < oldGNUSparseMainHeaderNumEntries; i++ { - offset := p.parseNumeric(s.next(oldGNUSparseOffsetSize)) - numBytes := p.parseNumeric(s.next(oldGNUSparseNumBytesSize)) - if p.err != nil { - tr.err = p.err - return nil - } - if offset == 0 && numBytes == 0 { - break - } - sp = append(sp, sparseEntry{offset: offset, numBytes: numBytes}) - } - - for isExtended { - // There are more entries. Read an extension header and parse its entries. - sparseHeader := make([]byte, blockSize) - if _, tr.err = io.ReadFull(tr.r, sparseHeader); tr.err != nil { - return nil - } - if tr.RawAccounting { - if _, tr.err = tr.rawBytes.Write(sparseHeader); tr.err != nil { - return nil + s := blk.GNU().Sparse() + spd := make(sparseDatas, 0, s.MaxEntries()) + for { + for i := 0; i < s.MaxEntries(); i++ { + // This termination condition is identical to GNU and BSD tar. + if s.Entry(i).Offset()[0] == 0x00 { + break // Don't return, need to process extended headers (even if empty) } - } - - isExtended = sparseHeader[oldGNUSparseExtendedHeaderIsExtendedOffset] != 0 - s = slicer(sparseHeader) - for i := 0; i < oldGNUSparseExtendedHeaderNumEntries; i++ { - offset := p.parseNumeric(s.next(oldGNUSparseOffsetSize)) - numBytes := p.parseNumeric(s.next(oldGNUSparseNumBytesSize)) + offset := p.parseNumeric(s.Entry(i).Offset()) + length := p.parseNumeric(s.Entry(i).Length()) if p.err != nil { - tr.err = p.err - return nil + return nil, p.err } - if offset == 0 && numBytes == 0 { - break - } - sp = append(sp, sparseEntry{offset: offset, numBytes: numBytes}) + spd = append(spd, sparseEntry{Offset: offset, Length: length}) } + + if s.IsExtended()[0] > 0 { + // There are more entries. Read an extension header and parse its entries. + if _, err := mustReadFull(tr.r, blk[:]); err != nil { + return nil, err + } + s = blk.Sparse() + continue + } + return spd, nil // Done } - return sp } // readGNUSparseMap1x0 reads the sparse map as stored in GNU's PAX sparse format // version 1.0. The format of the sparse map consists of a series of // newline-terminated numeric fields. The first field is the number of entries // and is always present. Following this are the entries, consisting of two -// fields (offset, numBytes). This function must stop reading at the end +// fields (offset, length). This function must stop reading at the end // boundary of the block containing the last newline. // // Note that the GNU manual says that numeric values should be encoded in octal // format. However, the GNU tar utility itself outputs these values in decimal. // As such, this library treats values as being encoded in decimal. -func readGNUSparseMap1x0(r io.Reader) ([]sparseEntry, error) { - var cntNewline int64 - var buf bytes.Buffer - var blk = make([]byte, blockSize) +func readGNUSparseMap1x0(r io.Reader) (sparseDatas, error) { + var ( + cntNewline int64 + buf bytes.Buffer + blk block + ) - // feedTokens copies data in numBlock chunks from r into buf until there are + // feedTokens copies data in blocks from r into buf until there are // at least cnt newlines in buf. It will not read more blocks than needed. - var feedTokens = func(cnt int64) error { - for cntNewline < cnt { - if _, err := io.ReadFull(r, blk); err != nil { - if err == io.EOF { - err = io.ErrUnexpectedEOF - } + feedTokens := func(n int64) error { + for cntNewline < n { + if _, err := mustReadFull(r, blk[:]); err != nil { return err } - buf.Write(blk) + buf.Write(blk[:]) for _, c := range blk { if c == '\n' { cntNewline++ @@ -846,10 +540,10 @@ func readGNUSparseMap1x0(r io.Reader) ([]sparseEntry, error) { // nextToken gets the next token delimited by a newline. This assumes that // at least one newline exists in the buffer. - var nextToken = func() string { + nextToken := func() string { cntNewline-- tok, _ := buf.ReadString('\n') - return tok[:len(tok)-1] // Cut off newline + return strings.TrimRight(tok, "\n") } // Parse for the number of entries. @@ -868,197 +562,297 @@ func readGNUSparseMap1x0(r io.Reader) ([]sparseEntry, error) { if err := feedTokens(2 * numEntries); err != nil { return nil, err } - sp := make([]sparseEntry, 0, numEntries) + spd := make(sparseDatas, 0, numEntries) for i := int64(0); i < numEntries; i++ { - offset, err := strconv.ParseInt(nextToken(), 10, 64) - if err != nil { + offset, err1 := strconv.ParseInt(nextToken(), 10, 64) + length, err2 := strconv.ParseInt(nextToken(), 10, 64) + if err1 != nil || err2 != nil { return nil, ErrHeader } - numBytes, err := strconv.ParseInt(nextToken(), 10, 64) - if err != nil { - return nil, ErrHeader - } - sp = append(sp, sparseEntry{offset: offset, numBytes: numBytes}) + spd = append(spd, sparseEntry{Offset: offset, Length: length}) } - return sp, nil + return spd, nil } // readGNUSparseMap0x1 reads the sparse map as stored in GNU's PAX sparse format // version 0.1. The sparse map is stored in the PAX headers. -func readGNUSparseMap0x1(extHdrs map[string]string) ([]sparseEntry, error) { +func readGNUSparseMap0x1(paxHdrs map[string]string) (sparseDatas, error) { // Get number of entries. // Use integer overflow resistant math to check this. - numEntriesStr := extHdrs[paxGNUSparseNumBlocks] + numEntriesStr := paxHdrs[paxGNUSparseNumBlocks] numEntries, err := strconv.ParseInt(numEntriesStr, 10, 0) // Intentionally parse as native int if err != nil || numEntries < 0 || int(2*numEntries) < int(numEntries) { return nil, ErrHeader } // There should be two numbers in sparseMap for each entry. - sparseMap := strings.Split(extHdrs[paxGNUSparseMap], ",") + sparseMap := strings.Split(paxHdrs[paxGNUSparseMap], ",") + if len(sparseMap) == 1 && sparseMap[0] == "" { + sparseMap = sparseMap[:0] + } if int64(len(sparseMap)) != 2*numEntries { return nil, ErrHeader } // Loop through the entries in the sparse map. // numEntries is trusted now. - sp := make([]sparseEntry, 0, numEntries) - for i := int64(0); i < numEntries; i++ { - offset, err := strconv.ParseInt(sparseMap[2*i], 10, 64) - if err != nil { + spd := make(sparseDatas, 0, numEntries) + for len(sparseMap) >= 2 { + offset, err1 := strconv.ParseInt(sparseMap[0], 10, 64) + length, err2 := strconv.ParseInt(sparseMap[1], 10, 64) + if err1 != nil || err2 != nil { return nil, ErrHeader } - numBytes, err := strconv.ParseInt(sparseMap[2*i+1], 10, 64) - if err != nil { - return nil, ErrHeader - } - sp = append(sp, sparseEntry{offset: offset, numBytes: numBytes}) + spd = append(spd, sparseEntry{Offset: offset, Length: length}) + sparseMap = sparseMap[2:] } - return sp, nil + return spd, nil } -// numBytes returns the number of bytes left to read in the current file's entry -// in the tar archive, or 0 if there is no current file. -func (tr *Reader) numBytes() int64 { - if tr.curr == nil { - // No current file, so no bytes - return 0 - } - return tr.curr.numBytes() -} - -// Read reads from the current entry in the tar archive. -// It returns 0, io.EOF when it reaches the end of that entry, -// until Next is called to advance to the next entry. +// Read reads from the current file in the tar archive. +// It returns (0, io.EOF) when it reaches the end of that file, +// until Next is called to advance to the next file. // -// Calling Read on special types like TypeLink, TypeSymLink, TypeChar, -// TypeBlock, TypeDir, and TypeFifo returns 0, io.EOF regardless of what +// If the current file is sparse, then the regions marked as a hole +// are read back as NUL-bytes. +// +// Calling Read on special types like TypeLink, TypeSymlink, TypeChar, +// TypeBlock, TypeDir, and TypeFifo returns (0, io.EOF) regardless of what // the Header.Size claims. -func (tr *Reader) Read(b []byte) (n int, err error) { +func (tr *Reader) Read(b []byte) (int, error) { if tr.err != nil { return 0, tr.err } - if tr.curr == nil { - return 0, io.EOF - } - - n, err = tr.curr.Read(b) + n, err := tr.curr.Read(b) if err != nil && err != io.EOF { tr.err = err } - return -} - -func (rfr *regFileReader) Read(b []byte) (n int, err error) { - if rfr.nb == 0 { - // file consumed - return 0, io.EOF - } - if int64(len(b)) > rfr.nb { - b = b[0:rfr.nb] - } - n, err = rfr.r.Read(b) - rfr.nb -= int64(n) - - if err == io.EOF && rfr.nb > 0 { - err = io.ErrUnexpectedEOF - } - return -} - -// numBytes returns the number of bytes left to read in the file's data in the tar archive. -func (rfr *regFileReader) numBytes() int64 { - return rfr.nb -} - -// newSparseFileReader creates a new sparseFileReader, but validates all of the -// sparse entries before doing so. -func newSparseFileReader(rfr numBytesReader, sp []sparseEntry, total int64) (*sparseFileReader, error) { - if total < 0 { - return nil, ErrHeader // Total size cannot be negative - } - - // Validate all sparse entries. These are the same checks as performed by - // the BSD tar utility. - for i, s := range sp { - switch { - case s.offset < 0 || s.numBytes < 0: - return nil, ErrHeader // Negative values are never okay - case s.offset > math.MaxInt64-s.numBytes: - return nil, ErrHeader // Integer overflow with large length - case s.offset+s.numBytes > total: - return nil, ErrHeader // Region extends beyond the "real" size - case i > 0 && sp[i-1].offset+sp[i-1].numBytes > s.offset: - return nil, ErrHeader // Regions can't overlap and must be in order - } - } - return &sparseFileReader{rfr: rfr, sp: sp, total: total}, nil -} - -// readHole reads a sparse hole ending at endOffset. -func (sfr *sparseFileReader) readHole(b []byte, endOffset int64) int { - n64 := endOffset - sfr.pos - if n64 > int64(len(b)) { - n64 = int64(len(b)) - } - n := int(n64) - for i := 0; i < n; i++ { - b[i] = 0 - } - sfr.pos += n64 - return n -} - -// Read reads the sparse file data in expanded form. -func (sfr *sparseFileReader) Read(b []byte) (n int, err error) { - // Skip past all empty fragments. - for len(sfr.sp) > 0 && sfr.sp[0].numBytes == 0 { - sfr.sp = sfr.sp[1:] - } - - // If there are no more fragments, then it is possible that there - // is one last sparse hole. - if len(sfr.sp) == 0 { - // This behavior matches the BSD tar utility. - // However, GNU tar stops returning data even if sfr.total is unmet. - if sfr.pos < sfr.total { - return sfr.readHole(b, sfr.total), nil - } - return 0, io.EOF - } - - // In front of a data fragment, so read a hole. - if sfr.pos < sfr.sp[0].offset { - return sfr.readHole(b, sfr.sp[0].offset), nil - } - - // In a data fragment, so read from it. - // This math is overflow free since we verify that offset and numBytes can - // be safely added when creating the sparseFileReader. - endPos := sfr.sp[0].offset + sfr.sp[0].numBytes // End offset of fragment - bytesLeft := endPos - sfr.pos // Bytes left in fragment - if int64(len(b)) > bytesLeft { - b = b[:bytesLeft] - } - - n, err = sfr.rfr.Read(b) - sfr.pos += int64(n) - if err == io.EOF { - if sfr.pos < endPos { - err = io.ErrUnexpectedEOF // There was supposed to be more data - } else if sfr.pos < sfr.total { - err = nil // There is still an implicit sparse hole at the end - } - } - - if sfr.pos == endPos { - sfr.sp = sfr.sp[1:] // We are done with this fragment, so pop it - } return n, err } -// numBytes returns the number of bytes left to read in the sparse file's -// sparse-encoded data in the tar archive. -func (sfr *sparseFileReader) numBytes() int64 { - return sfr.rfr.numBytes() +// writeTo writes the content of the current file to w. +// The bytes written matches the number of remaining bytes in the current file. +// +// If the current file is sparse and w is an io.WriteSeeker, +// then writeTo uses Seek to skip past holes defined in Header.SparseHoles, +// assuming that skipped regions are filled with NULs. +// This always writes the last byte to ensure w is the right size. +// +// TODO(dsnet): Re-export this when adding sparse file support. +// See https://golang.org/issue/22735 +func (tr *Reader) writeTo(w io.Writer) (int64, error) { + if tr.err != nil { + return 0, tr.err + } + n, err := tr.curr.WriteTo(w) + if err != nil { + tr.err = err + } + return n, err +} + +// regFileReader is a fileReader for reading data from a regular file entry. +type regFileReader struct { + r io.Reader // Underlying Reader + nb int64 // Number of remaining bytes to read +} + +func (fr *regFileReader) Read(b []byte) (n int, err error) { + if int64(len(b)) > fr.nb { + b = b[:fr.nb] + } + if len(b) > 0 { + n, err = fr.r.Read(b) + fr.nb -= int64(n) + } + switch { + case err == io.EOF && fr.nb > 0: + return n, io.ErrUnexpectedEOF + case err == nil && fr.nb == 0: + return n, io.EOF + default: + return n, err + } +} + +func (fr *regFileReader) WriteTo(w io.Writer) (int64, error) { + return io.Copy(w, struct{ io.Reader }{fr}) +} + +func (fr regFileReader) LogicalRemaining() int64 { + return fr.nb +} + +func (fr regFileReader) PhysicalRemaining() int64 { + return fr.nb +} + +// sparseFileReader is a fileReader for reading data from a sparse file entry. +type sparseFileReader struct { + fr fileReader // Underlying fileReader + sp sparseHoles // Normalized list of sparse holes + pos int64 // Current position in sparse file +} + +func (sr *sparseFileReader) Read(b []byte) (n int, err error) { + finished := int64(len(b)) >= sr.LogicalRemaining() + if finished { + b = b[:sr.LogicalRemaining()] + } + + b0 := b + endPos := sr.pos + int64(len(b)) + for endPos > sr.pos && err == nil { + var nf int // Bytes read in fragment + holeStart, holeEnd := sr.sp[0].Offset, sr.sp[0].endOffset() + if sr.pos < holeStart { // In a data fragment + bf := b[:min(int64(len(b)), holeStart-sr.pos)] + nf, err = tryReadFull(sr.fr, bf) + } else { // In a hole fragment + bf := b[:min(int64(len(b)), holeEnd-sr.pos)] + nf, err = tryReadFull(zeroReader{}, bf) + } + b = b[nf:] + sr.pos += int64(nf) + if sr.pos >= holeEnd && len(sr.sp) > 1 { + sr.sp = sr.sp[1:] // Ensure last fragment always remains + } + } + + n = len(b0) - len(b) + switch { + case err == io.EOF: + return n, errMissData // Less data in dense file than sparse file + case err != nil: + return n, err + case sr.LogicalRemaining() == 0 && sr.PhysicalRemaining() > 0: + return n, errUnrefData // More data in dense file than sparse file + case finished: + return n, io.EOF + default: + return n, nil + } +} + +func (sr *sparseFileReader) WriteTo(w io.Writer) (n int64, err error) { + ws, ok := w.(io.WriteSeeker) + if ok { + if _, err := ws.Seek(0, io.SeekCurrent); err != nil { + ok = false // Not all io.Seeker can really seek + } + } + if !ok { + return io.Copy(w, struct{ io.Reader }{sr}) + } + + var writeLastByte bool + pos0 := sr.pos + for sr.LogicalRemaining() > 0 && !writeLastByte && err == nil { + var nf int64 // Size of fragment + holeStart, holeEnd := sr.sp[0].Offset, sr.sp[0].endOffset() + if sr.pos < holeStart { // In a data fragment + nf = holeStart - sr.pos + nf, err = io.CopyN(ws, sr.fr, nf) + } else { // In a hole fragment + nf = holeEnd - sr.pos + if sr.PhysicalRemaining() == 0 { + writeLastByte = true + nf-- + } + _, err = ws.Seek(nf, io.SeekCurrent) + } + sr.pos += nf + if sr.pos >= holeEnd && len(sr.sp) > 1 { + sr.sp = sr.sp[1:] // Ensure last fragment always remains + } + } + + // If the last fragment is a hole, then seek to 1-byte before EOF, and + // write a single byte to ensure the file is the right size. + if writeLastByte && err == nil { + _, err = ws.Write([]byte{0}) + sr.pos++ + } + + n = sr.pos - pos0 + switch { + case err == io.EOF: + return n, errMissData // Less data in dense file than sparse file + case err != nil: + return n, err + case sr.LogicalRemaining() == 0 && sr.PhysicalRemaining() > 0: + return n, errUnrefData // More data in dense file than sparse file + default: + return n, nil + } +} + +func (sr sparseFileReader) LogicalRemaining() int64 { + return sr.sp[len(sr.sp)-1].endOffset() - sr.pos +} +func (sr sparseFileReader) PhysicalRemaining() int64 { + return sr.fr.PhysicalRemaining() +} + +type zeroReader struct{} + +func (zeroReader) Read(b []byte) (int, error) { + for i := range b { + b[i] = 0 + } + return len(b), nil +} + +// mustReadFull is like io.ReadFull except it returns +// io.ErrUnexpectedEOF when io.EOF is hit before len(b) bytes are read. +func mustReadFull(r io.Reader, b []byte) (int, error) { + n, err := tryReadFull(r, b) + if err == io.EOF { + err = io.ErrUnexpectedEOF + } + return n, err +} + +// tryReadFull is like io.ReadFull except it returns +// io.EOF when it is hit before len(b) bytes are read. +func tryReadFull(r io.Reader, b []byte) (n int, err error) { + for len(b) > n && err == nil { + var nn int + nn, err = r.Read(b[n:]) + n += nn + } + if len(b) == n && err == io.EOF { + err = nil + } + return n, err +} + +// discard skips n bytes in r, reporting an error if unable to do so. +func discard(r io.Reader, n int64) error { + // If possible, Seek to the last byte before the end of the data section. + // Do this because Seek is often lazy about reporting errors; this will mask + // the fact that the stream may be truncated. We can rely on the + // io.CopyN done shortly afterwards to trigger any IO errors. + var seekSkipped int64 // Number of bytes skipped via Seek + if sr, ok := r.(io.Seeker); ok && n > 1 { + // Not all io.Seeker can actually Seek. For example, os.Stdin implements + // io.Seeker, but calling Seek always returns an error and performs + // no action. Thus, we try an innocent seek to the current position + // to see if Seek is really supported. + pos1, err := sr.Seek(0, io.SeekCurrent) + if pos1 >= 0 && err == nil { + // Seek seems supported, so perform the real Seek. + pos2, err := sr.Seek(n-1, io.SeekCurrent) + if pos2 < 0 || err != nil { + return err + } + seekSkipped = pos2 - pos1 + } + } + + copySkipped, err := io.CopyN(ioutil.Discard, r, n-seekSkipped) + if err == io.EOF && seekSkipped+copySkipped < n { + err = io.ErrUnexpectedEOF + } + return err } diff --git a/archive/tar/reader_test.go b/archive/tar/reader_test.go index 821b4f0..f153b66 100644 --- a/archive/tar/reader_test.go +++ b/archive/tar/reader_test.go @@ -7,28 +7,29 @@ package tar import ( "bytes" "crypto/md5" + "errors" "fmt" "io" "io/ioutil" "math" "os" + "path" "reflect" + "strconv" "strings" "testing" "time" ) -type untarTest struct { - file string // Test input file - headers []*Header // Expected output headers - chksums []string // MD5 checksum of files, leave as nil if not checked - err error // Expected error to occur -} - -var gnuTarTest = &untarTest{ - file: "testdata/gnu.tar", - headers: []*Header{ - { +func TestReader(t *testing.T) { + vectors := []struct { + file string // Test input file + headers []*Header // Expected output headers + chksums []string // MD5 checksum of files, leave as nil if not checked + err error // Expected error to occur + }{{ + file: "testdata/gnu.tar", + headers: []*Header{{ Name: "small.txt", Mode: 0640, Uid: 73025, @@ -38,8 +39,8 @@ var gnuTarTest = &untarTest{ Typeflag: '0', Uname: "dsymonds", Gname: "eng", - }, - { + Format: FormatGNU, + }, { Name: "small2.txt", Mode: 0640, Uid: 73025, @@ -49,18 +50,15 @@ var gnuTarTest = &untarTest{ Typeflag: '0', Uname: "dsymonds", Gname: "eng", + Format: FormatGNU, + }}, + chksums: []string{ + "e38b27eaccb4391bdec553a7f3ae6b2f", + "c65bd2e50a56a2138bf1716f2fd56fe9", }, - }, - chksums: []string{ - "e38b27eaccb4391bdec553a7f3ae6b2f", - "c65bd2e50a56a2138bf1716f2fd56fe9", - }, -} - -var sparseTarTest = &untarTest{ - file: "testdata/sparse-formats.tar", - headers: []*Header{ - { + }, { + file: "testdata/sparse-formats.tar", + headers: []*Header{{ Name: "sparse-gnu", Mode: 420, Uid: 1000, @@ -73,8 +71,8 @@ var sparseTarTest = &untarTest{ Gname: "david", Devmajor: 0, Devminor: 0, - }, - { + Format: FormatGNU, + }, { Name: "sparse-posix-0.0", Mode: 420, Uid: 1000, @@ -87,8 +85,13 @@ var sparseTarTest = &untarTest{ Gname: "david", Devmajor: 0, Devminor: 0, - }, - { + PAXRecords: map[string]string{ + "GNU.sparse.size": "200", + "GNU.sparse.numblocks": "95", + "GNU.sparse.map": "1,1,3,1,5,1,7,1,9,1,11,1,13,1,15,1,17,1,19,1,21,1,23,1,25,1,27,1,29,1,31,1,33,1,35,1,37,1,39,1,41,1,43,1,45,1,47,1,49,1,51,1,53,1,55,1,57,1,59,1,61,1,63,1,65,1,67,1,69,1,71,1,73,1,75,1,77,1,79,1,81,1,83,1,85,1,87,1,89,1,91,1,93,1,95,1,97,1,99,1,101,1,103,1,105,1,107,1,109,1,111,1,113,1,115,1,117,1,119,1,121,1,123,1,125,1,127,1,129,1,131,1,133,1,135,1,137,1,139,1,141,1,143,1,145,1,147,1,149,1,151,1,153,1,155,1,157,1,159,1,161,1,163,1,165,1,167,1,169,1,171,1,173,1,175,1,177,1,179,1,181,1,183,1,185,1,187,1,189,1", + }, + Format: FormatPAX, + }, { Name: "sparse-posix-0.1", Mode: 420, Uid: 1000, @@ -101,8 +104,14 @@ var sparseTarTest = &untarTest{ Gname: "david", Devmajor: 0, Devminor: 0, - }, - { + PAXRecords: map[string]string{ + "GNU.sparse.size": "200", + "GNU.sparse.numblocks": "95", + "GNU.sparse.map": "1,1,3,1,5,1,7,1,9,1,11,1,13,1,15,1,17,1,19,1,21,1,23,1,25,1,27,1,29,1,31,1,33,1,35,1,37,1,39,1,41,1,43,1,45,1,47,1,49,1,51,1,53,1,55,1,57,1,59,1,61,1,63,1,65,1,67,1,69,1,71,1,73,1,75,1,77,1,79,1,81,1,83,1,85,1,87,1,89,1,91,1,93,1,95,1,97,1,99,1,101,1,103,1,105,1,107,1,109,1,111,1,113,1,115,1,117,1,119,1,121,1,123,1,125,1,127,1,129,1,131,1,133,1,135,1,137,1,139,1,141,1,143,1,145,1,147,1,149,1,151,1,153,1,155,1,157,1,159,1,161,1,163,1,165,1,167,1,169,1,171,1,173,1,175,1,177,1,179,1,181,1,183,1,185,1,187,1,189,1", + "GNU.sparse.name": "sparse-posix-0.1", + }, + Format: FormatPAX, + }, { Name: "sparse-posix-1.0", Mode: 420, Uid: 1000, @@ -115,8 +124,14 @@ var sparseTarTest = &untarTest{ Gname: "david", Devmajor: 0, Devminor: 0, - }, - { + PAXRecords: map[string]string{ + "GNU.sparse.major": "1", + "GNU.sparse.minor": "0", + "GNU.sparse.realsize": "200", + "GNU.sparse.name": "sparse-posix-1.0", + }, + Format: FormatPAX, + }, { Name: "end", Mode: 420, Uid: 1000, @@ -129,703 +144,605 @@ var sparseTarTest = &untarTest{ Gname: "david", Devmajor: 0, Devminor: 0, + Format: FormatGNU, + }}, + chksums: []string{ + "6f53234398c2449fe67c1812d993012f", + "6f53234398c2449fe67c1812d993012f", + "6f53234398c2449fe67c1812d993012f", + "6f53234398c2449fe67c1812d993012f", + "b0061974914468de549a2af8ced10316", }, - }, - chksums: []string{ - "6f53234398c2449fe67c1812d993012f", - "6f53234398c2449fe67c1812d993012f", - "6f53234398c2449fe67c1812d993012f", - "6f53234398c2449fe67c1812d993012f", - "b0061974914468de549a2af8ced10316", - }, -} - -var untarTests = []*untarTest{ - gnuTarTest, - sparseTarTest, - { + }, { file: "testdata/star.tar", - headers: []*Header{ - { - Name: "small.txt", - Mode: 0640, - Uid: 73025, - Gid: 5000, - Size: 5, - ModTime: time.Unix(1244592783, 0), - Typeflag: '0', - Uname: "dsymonds", - Gname: "eng", - AccessTime: time.Unix(1244592783, 0), - ChangeTime: time.Unix(1244592783, 0), - }, - { - Name: "small2.txt", - Mode: 0640, - Uid: 73025, - Gid: 5000, - Size: 11, - ModTime: time.Unix(1244592783, 0), - Typeflag: '0', - Uname: "dsymonds", - Gname: "eng", - AccessTime: time.Unix(1244592783, 0), - ChangeTime: time.Unix(1244592783, 0), - }, - }, - }, - { + headers: []*Header{{ + Name: "small.txt", + Mode: 0640, + Uid: 73025, + Gid: 5000, + Size: 5, + ModTime: time.Unix(1244592783, 0), + Typeflag: '0', + Uname: "dsymonds", + Gname: "eng", + AccessTime: time.Unix(1244592783, 0), + ChangeTime: time.Unix(1244592783, 0), + }, { + Name: "small2.txt", + Mode: 0640, + Uid: 73025, + Gid: 5000, + Size: 11, + ModTime: time.Unix(1244592783, 0), + Typeflag: '0', + Uname: "dsymonds", + Gname: "eng", + AccessTime: time.Unix(1244592783, 0), + ChangeTime: time.Unix(1244592783, 0), + }}, + }, { file: "testdata/v7.tar", - headers: []*Header{ - { - Name: "small.txt", - Mode: 0444, - Uid: 73025, - Gid: 5000, - Size: 5, - ModTime: time.Unix(1244593104, 0), - Typeflag: '\x00', - }, - { - Name: "small2.txt", - Mode: 0444, - Uid: 73025, - Gid: 5000, - Size: 11, - ModTime: time.Unix(1244593104, 0), - Typeflag: '\x00', - }, - }, - }, - { + headers: []*Header{{ + Name: "small.txt", + Mode: 0444, + Uid: 73025, + Gid: 5000, + Size: 5, + ModTime: time.Unix(1244593104, 0), + Typeflag: '0', + }, { + Name: "small2.txt", + Mode: 0444, + Uid: 73025, + Gid: 5000, + Size: 11, + ModTime: time.Unix(1244593104, 0), + Typeflag: '0', + }}, + }, { file: "testdata/pax.tar", - headers: []*Header{ - { - Name: "a/123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100", - Mode: 0664, - Uid: 1000, - Gid: 1000, - Uname: "shane", - Gname: "shane", - Size: 7, - ModTime: time.Unix(1350244992, 23960108), - ChangeTime: time.Unix(1350244992, 23960108), - AccessTime: time.Unix(1350244992, 23960108), - Typeflag: TypeReg, + headers: []*Header{{ + Name: "a/123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100", + Mode: 0664, + Uid: 1000, + Gid: 1000, + Uname: "shane", + Gname: "shane", + Size: 7, + ModTime: time.Unix(1350244992, 23960108), + ChangeTime: time.Unix(1350244992, 23960108), + AccessTime: time.Unix(1350244992, 23960108), + Typeflag: TypeReg, + PAXRecords: map[string]string{ + "path": "a/123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100", + "mtime": "1350244992.023960108", + "atime": "1350244992.023960108", + "ctime": "1350244992.023960108", }, - { - Name: "a/b", - Mode: 0777, - Uid: 1000, - Gid: 1000, - Uname: "shane", - Gname: "shane", - Size: 0, - ModTime: time.Unix(1350266320, 910238425), - ChangeTime: time.Unix(1350266320, 910238425), - AccessTime: time.Unix(1350266320, 910238425), - Typeflag: TypeSymlink, - Linkname: "123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100", + Format: FormatPAX, + }, { + Name: "a/b", + Mode: 0777, + Uid: 1000, + Gid: 1000, + Uname: "shane", + Gname: "shane", + Size: 0, + ModTime: time.Unix(1350266320, 910238425), + ChangeTime: time.Unix(1350266320, 910238425), + AccessTime: time.Unix(1350266320, 910238425), + Typeflag: TypeSymlink, + Linkname: "123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100", + PAXRecords: map[string]string{ + "linkpath": "123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100", + "mtime": "1350266320.910238425", + "atime": "1350266320.910238425", + "ctime": "1350266320.910238425", }, + Format: FormatPAX, + }}, + }, { + file: "testdata/pax-bad-hdr-file.tar", + err: ErrHeader, + }, { + file: "testdata/pax-bad-mtime-file.tar", + err: ErrHeader, + }, { + file: "testdata/pax-pos-size-file.tar", + headers: []*Header{{ + Name: "foo", + Mode: 0640, + Uid: 319973, + Gid: 5000, + Size: 999, + ModTime: time.Unix(1442282516, 0), + Typeflag: '0', + Uname: "joetsai", + Gname: "eng", + PAXRecords: map[string]string{ + "size": "000000000000000000000999", + }, + Format: FormatPAX, + }}, + chksums: []string{ + "0afb597b283fe61b5d4879669a350556", }, - }, - { + }, { + file: "testdata/pax-records.tar", + headers: []*Header{{ + Typeflag: TypeReg, + Name: "file", + Uname: strings.Repeat("long", 10), + ModTime: time.Unix(0, 0), + PAXRecords: map[string]string{ + "GOLANG.pkg": "tar", + "comment": "Hello, 世界", + "uname": strings.Repeat("long", 10), + }, + Format: FormatPAX, + }}, + }, { + file: "testdata/pax-global-records.tar", + headers: []*Header{{ + Typeflag: TypeXGlobalHeader, + Name: "global1", + PAXRecords: map[string]string{"path": "global1", "mtime": "1500000000.0"}, + Format: FormatPAX, + }, { + Typeflag: TypeReg, + Name: "file1", + ModTime: time.Unix(0, 0), + Format: FormatUSTAR, + }, { + Typeflag: TypeReg, + Name: "file2", + PAXRecords: map[string]string{"path": "file2"}, + ModTime: time.Unix(0, 0), + Format: FormatPAX, + }, { + Typeflag: TypeXGlobalHeader, + Name: "GlobalHead.0.0", + PAXRecords: map[string]string{"path": ""}, + Format: FormatPAX, + }, { + Typeflag: TypeReg, + Name: "file3", + ModTime: time.Unix(0, 0), + Format: FormatUSTAR, + }, { + Typeflag: TypeReg, + Name: "file4", + ModTime: time.Unix(1400000000, 0), + PAXRecords: map[string]string{"mtime": "1400000000"}, + Format: FormatPAX, + }}, + }, { file: "testdata/nil-uid.tar", // golang.org/issue/5290 - headers: []*Header{ - { - Name: "P1050238.JPG.log", - Mode: 0664, - Uid: 0, - Gid: 0, - Size: 14, - ModTime: time.Unix(1365454838, 0), - Typeflag: TypeReg, - Linkname: "", - Uname: "eyefi", - Gname: "eyefi", - Devmajor: 0, - Devminor: 0, - }, - }, - }, - { + headers: []*Header{{ + Name: "P1050238.JPG.log", + Mode: 0664, + Uid: 0, + Gid: 0, + Size: 14, + ModTime: time.Unix(1365454838, 0), + Typeflag: TypeReg, + Linkname: "", + Uname: "eyefi", + Gname: "eyefi", + Devmajor: 0, + Devminor: 0, + Format: FormatGNU, + }}, + }, { file: "testdata/xattrs.tar", - headers: []*Header{ - { - Name: "small.txt", - Mode: 0644, - Uid: 1000, - Gid: 10, - Size: 5, - ModTime: time.Unix(1386065770, 448252320), - Typeflag: '0', - Uname: "alex", - Gname: "wheel", - AccessTime: time.Unix(1389782991, 419875220), - ChangeTime: time.Unix(1389782956, 794414986), - Xattrs: map[string]string{ - "user.key": "value", - "user.key2": "value2", - // Interestingly, selinux encodes the terminating null inside the xattr - "security.selinux": "unconfined_u:object_r:default_t:s0\x00", - }, + headers: []*Header{{ + Name: "small.txt", + Mode: 0644, + Uid: 1000, + Gid: 10, + Size: 5, + ModTime: time.Unix(1386065770, 448252320), + Typeflag: '0', + Uname: "alex", + Gname: "wheel", + AccessTime: time.Unix(1389782991, 419875220), + ChangeTime: time.Unix(1389782956, 794414986), + Xattrs: map[string]string{ + "user.key": "value", + "user.key2": "value2", + // Interestingly, selinux encodes the terminating null inside the xattr + "security.selinux": "unconfined_u:object_r:default_t:s0\x00", }, - { - Name: "small2.txt", - Mode: 0644, - Uid: 1000, - Gid: 10, - Size: 11, - ModTime: time.Unix(1386065770, 449252304), - Typeflag: '0', - Uname: "alex", - Gname: "wheel", - AccessTime: time.Unix(1389782991, 419875220), - ChangeTime: time.Unix(1386065770, 449252304), - Xattrs: map[string]string{ - "security.selinux": "unconfined_u:object_r:default_t:s0\x00", - }, + PAXRecords: map[string]string{ + "mtime": "1386065770.44825232", + "atime": "1389782991.41987522", + "ctime": "1389782956.794414986", + "SCHILY.xattr.user.key": "value", + "SCHILY.xattr.user.key2": "value2", + "SCHILY.xattr.security.selinux": "unconfined_u:object_r:default_t:s0\x00", }, - }, - }, - { + Format: FormatPAX, + }, { + Name: "small2.txt", + Mode: 0644, + Uid: 1000, + Gid: 10, + Size: 11, + ModTime: time.Unix(1386065770, 449252304), + Typeflag: '0', + Uname: "alex", + Gname: "wheel", + AccessTime: time.Unix(1389782991, 419875220), + ChangeTime: time.Unix(1386065770, 449252304), + Xattrs: map[string]string{ + "security.selinux": "unconfined_u:object_r:default_t:s0\x00", + }, + PAXRecords: map[string]string{ + "mtime": "1386065770.449252304", + "atime": "1389782991.41987522", + "ctime": "1386065770.449252304", + "SCHILY.xattr.security.selinux": "unconfined_u:object_r:default_t:s0\x00", + }, + Format: FormatPAX, + }}, + }, { // Matches the behavior of GNU, BSD, and STAR tar utilities. file: "testdata/gnu-multi-hdrs.tar", - headers: []*Header{ - { - Name: "GNU2/GNU2/long-path-name", - Linkname: "GNU4/GNU4/long-linkpath-name", - ModTime: time.Unix(0, 0), - Typeflag: '2', - }, - }, - }, - { + headers: []*Header{{ + Name: "GNU2/GNU2/long-path-name", + Linkname: "GNU4/GNU4/long-linkpath-name", + ModTime: time.Unix(0, 0), + Typeflag: '2', + Format: FormatGNU, + }}, + }, { + // GNU tar file with atime and ctime fields set. + // Created with the GNU tar v1.27.1. + // tar --incremental -S -cvf gnu-incremental.tar test2 + file: "testdata/gnu-incremental.tar", + headers: []*Header{{ + Name: "test2/", + Mode: 16877, + Uid: 1000, + Gid: 1000, + Size: 14, + ModTime: time.Unix(1441973427, 0), + Typeflag: 'D', + Uname: "rawr", + Gname: "dsnet", + AccessTime: time.Unix(1441974501, 0), + ChangeTime: time.Unix(1441973436, 0), + Format: FormatGNU, + }, { + Name: "test2/foo", + Mode: 33188, + Uid: 1000, + Gid: 1000, + Size: 64, + ModTime: time.Unix(1441973363, 0), + Typeflag: '0', + Uname: "rawr", + Gname: "dsnet", + AccessTime: time.Unix(1441974501, 0), + ChangeTime: time.Unix(1441973436, 0), + Format: FormatGNU, + }, { + Name: "test2/sparse", + Mode: 33188, + Uid: 1000, + Gid: 1000, + Size: 536870912, + ModTime: time.Unix(1441973427, 0), + Typeflag: 'S', + Uname: "rawr", + Gname: "dsnet", + AccessTime: time.Unix(1441991948, 0), + ChangeTime: time.Unix(1441973436, 0), + Format: FormatGNU, + }}, + }, { // Matches the behavior of GNU and BSD tar utilities. file: "testdata/pax-multi-hdrs.tar", - headers: []*Header{ - { - Name: "bar", - Linkname: "PAX4/PAX4/long-linkpath-name", - ModTime: time.Unix(0, 0), - Typeflag: '2', + headers: []*Header{{ + Name: "bar", + Linkname: "PAX4/PAX4/long-linkpath-name", + ModTime: time.Unix(0, 0), + Typeflag: '2', + PAXRecords: map[string]string{ + "linkpath": "PAX4/PAX4/long-linkpath-name", }, - }, - }, - { + Format: FormatPAX, + }}, + }, { + // Both BSD and GNU tar truncate long names at first NUL even + // if there is data following that NUL character. + // This is reasonable as GNU long names are C-strings. + file: "testdata/gnu-long-nul.tar", + headers: []*Header{{ + Name: "0123456789", + Mode: 0644, + Uid: 1000, + Gid: 1000, + ModTime: time.Unix(1486082191, 0), + Typeflag: '0', + Uname: "rawr", + Gname: "dsnet", + Format: FormatGNU, + }}, + }, { + // This archive was generated by Writer but is readable by both + // GNU and BSD tar utilities. + // The archive generated by GNU is nearly byte-for-byte identical + // to the Go version except the Go version sets a negative Devminor + // just to force the GNU format. + file: "testdata/gnu-utf8.tar", + headers: []*Header{{ + Name: "☺☻☹☺☻☹☺☻☹☺☻☹☺☻☹☺☻☹☺☻☹☺☻☹☺☻☹☺☻☹☺☻☹☺☻☹☺☻☹☺☻☹☺☻☹☺☻☹☺☻☹☺☻☹", + Mode: 0644, + Uid: 1000, Gid: 1000, + ModTime: time.Unix(0, 0), + Typeflag: '0', + Uname: "☺", + Gname: "⚹", + Format: FormatGNU, + }}, + }, { + // This archive was generated by Writer but is readable by both + // GNU and BSD tar utilities. + // The archive generated by GNU is nearly byte-for-byte identical + // to the Go version except the Go version sets a negative Devminor + // just to force the GNU format. + file: "testdata/gnu-not-utf8.tar", + headers: []*Header{{ + Name: "hi\x80\x81\x82\x83bye", + Mode: 0644, + Uid: 1000, + Gid: 1000, + ModTime: time.Unix(0, 0), + Typeflag: '0', + Uname: "rawr", + Gname: "dsnet", + Format: FormatGNU, + }}, + }, { + // BSD tar v3.1.2 and GNU tar v1.27.1 both rejects PAX records + // with NULs in the key. + file: "testdata/pax-nul-xattrs.tar", + err: ErrHeader, + }, { + // BSD tar v3.1.2 rejects a PAX path with NUL in the value, while + // GNU tar v1.27.1 simply truncates at first NUL. + // We emulate the behavior of BSD since it is strange doing NUL + // truncations since PAX records are length-prefix strings instead + // of NUL-terminated C-strings. + file: "testdata/pax-nul-path.tar", + err: ErrHeader, + }, { file: "testdata/neg-size.tar", err: ErrHeader, - }, - { + }, { file: "testdata/issue10968.tar", err: ErrHeader, - }, - { + }, { file: "testdata/issue11169.tar", err: ErrHeader, - }, - { + }, { file: "testdata/issue12435.tar", err: ErrHeader, - }, -} + }, { + // Ensure that we can read back the original Header as written with + // a buggy pre-Go1.8 tar.Writer. + file: "testdata/invalid-go17.tar", + headers: []*Header{{ + Name: "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa/foo", + Uid: 010000000, + ModTime: time.Unix(0, 0), + Typeflag: '0', + }}, + }, { + // USTAR archive with a regular entry with non-zero device numbers. + file: "testdata/ustar-file-devs.tar", + headers: []*Header{{ + Name: "file", + Mode: 0644, + Typeflag: '0', + ModTime: time.Unix(0, 0), + Devmajor: 1, + Devminor: 1, + Format: FormatUSTAR, + }}, + }, { + // Generated by Go, works on BSD tar v3.1.2 and GNU tar v.1.27.1. + file: "testdata/gnu-nil-sparse-data.tar", + headers: []*Header{{ + Name: "sparse.db", + Typeflag: TypeGNUSparse, + Size: 1000, + ModTime: time.Unix(0, 0), + Format: FormatGNU, + }}, + }, { + // Generated by Go, works on BSD tar v3.1.2 and GNU tar v.1.27.1. + file: "testdata/gnu-nil-sparse-hole.tar", + headers: []*Header{{ + Name: "sparse.db", + Typeflag: TypeGNUSparse, + Size: 1000, + ModTime: time.Unix(0, 0), + Format: FormatGNU, + }}, + }, { + // Generated by Go, works on BSD tar v3.1.2 and GNU tar v.1.27.1. + file: "testdata/pax-nil-sparse-data.tar", + headers: []*Header{{ + Name: "sparse.db", + Typeflag: TypeReg, + Size: 1000, + ModTime: time.Unix(0, 0), + PAXRecords: map[string]string{ + "size": "1512", + "GNU.sparse.major": "1", + "GNU.sparse.minor": "0", + "GNU.sparse.realsize": "1000", + "GNU.sparse.name": "sparse.db", + }, + Format: FormatPAX, + }}, + }, { + // Generated by Go, works on BSD tar v3.1.2 and GNU tar v.1.27.1. + file: "testdata/pax-nil-sparse-hole.tar", + headers: []*Header{{ + Name: "sparse.db", + Typeflag: TypeReg, + Size: 1000, + ModTime: time.Unix(0, 0), + PAXRecords: map[string]string{ + "size": "512", + "GNU.sparse.major": "1", + "GNU.sparse.minor": "0", + "GNU.sparse.realsize": "1000", + "GNU.sparse.name": "sparse.db", + }, + Format: FormatPAX, + }}, + }, { + file: "testdata/trailing-slash.tar", + headers: []*Header{{ + Typeflag: TypeDir, + Name: strings.Repeat("123456789/", 30), + ModTime: time.Unix(0, 0), + PAXRecords: map[string]string{ + "path": strings.Repeat("123456789/", 30), + }, + Format: FormatPAX, + }}, + }} -func TestReader(t *testing.T) { - for i, v := range untarTests { - f, err := os.Open(v.file) - if err != nil { - t.Errorf("file %s, test %d: unexpected error: %v", v.file, i, err) - continue - } - defer f.Close() - - // Capture all headers and checksums. - var ( - tr = NewReader(f) - hdrs []*Header - chksums []string - ) - for { - var hdr *Header - hdr, err = tr.Next() + for _, v := range vectors { + t.Run(path.Base(v.file), func(t *testing.T) { + f, err := os.Open(v.file) if err != nil { - if err == io.EOF { - err = nil // Expected error + t.Fatalf("unexpected error: %v", err) + } + defer f.Close() + + // Capture all headers and checksums. + var ( + tr = NewReader(f) + hdrs []*Header + chksums []string + rdbuf = make([]byte, 8) + ) + for { + var hdr *Header + hdr, err = tr.Next() + if err != nil { + if err == io.EOF { + err = nil // Expected error + } + break } - break - } - hdrs = append(hdrs, hdr) + hdrs = append(hdrs, hdr) - if v.chksums == nil { - continue + if v.chksums == nil { + continue + } + h := md5.New() + _, err = io.CopyBuffer(h, tr, rdbuf) // Effectively an incremental read + if err != nil { + break + } + chksums = append(chksums, fmt.Sprintf("%x", h.Sum(nil))) } - h := md5.New() - _, err = io.Copy(h, tr) // Effectively an incremental read - if err != nil { - break - } - chksums = append(chksums, fmt.Sprintf("%x", h.Sum(nil))) - } - for j, hdr := range hdrs { - if j >= len(v.headers) { - t.Errorf("file %s, test %d, entry %d: unexpected header:\ngot %+v", - v.file, i, j, *hdr) - continue + for i, hdr := range hdrs { + if i >= len(v.headers) { + t.Fatalf("entry %d: unexpected header:\ngot %+v", i, *hdr) + continue + } + if !reflect.DeepEqual(*hdr, *v.headers[i]) { + t.Fatalf("entry %d: incorrect header:\ngot %+v\nwant %+v", i, *hdr, *v.headers[i]) + } } - if !reflect.DeepEqual(*hdr, *v.headers[j]) { - t.Errorf("file %s, test %d, entry %d: incorrect header:\ngot %+v\nwant %+v", - v.file, i, j, *hdr, *v.headers[j]) + if len(hdrs) != len(v.headers) { + t.Fatalf("got %d headers, want %d headers", len(hdrs), len(v.headers)) } - } - if len(hdrs) != len(v.headers) { - t.Errorf("file %s, test %d: got %d headers, want %d headers", - v.file, i, len(hdrs), len(v.headers)) - } - for j, sum := range chksums { - if j >= len(v.chksums) { - t.Errorf("file %s, test %d, entry %d: unexpected sum: got %s", - v.file, i, j, sum) - continue + for i, sum := range chksums { + if i >= len(v.chksums) { + t.Fatalf("entry %d: unexpected sum: got %s", i, sum) + continue + } + if sum != v.chksums[i] { + t.Fatalf("entry %d: incorrect checksum: got %s, want %s", i, sum, v.chksums[i]) + } } - if sum != v.chksums[j] { - t.Errorf("file %s, test %d, entry %d: incorrect checksum: got %s, want %s", - v.file, i, j, sum, v.chksums[j]) - } - } - if err != v.err { - t.Errorf("file %s, test %d: unexpected error: got %v, want %v", - v.file, i, err, v.err) - } - f.Close() + if err != v.err { + t.Fatalf("unexpected error: got %v, want %v", err, v.err) + } + f.Close() + }) } } func TestPartialRead(t *testing.T) { - f, err := os.Open("testdata/gnu.tar") - if err != nil { - t.Fatalf("Unexpected error: %v", err) + type testCase struct { + cnt int // Number of bytes to read + output string // Expected value of string read } - defer f.Close() - - tr := NewReader(f) - - // Read the first four bytes; Next() should skip the last byte. - hdr, err := tr.Next() - if err != nil || hdr == nil { - t.Fatalf("Didn't get first file: %v", err) - } - buf := make([]byte, 4) - if _, err := io.ReadFull(tr, buf); err != nil { - t.Fatalf("Unexpected error: %v", err) - } - if expected := []byte("Kilt"); !bytes.Equal(buf, expected) { - t.Errorf("Contents = %v, want %v", buf, expected) - } - - // Second file - hdr, err = tr.Next() - if err != nil || hdr == nil { - t.Fatalf("Didn't get second file: %v", err) - } - buf = make([]byte, 6) - if _, err := io.ReadFull(tr, buf); err != nil { - t.Fatalf("Unexpected error: %v", err) - } - if expected := []byte("Google"); !bytes.Equal(buf, expected) { - t.Errorf("Contents = %v, want %v", buf, expected) - } -} - -func TestParsePAXHeader(t *testing.T) { - paxTests := [][3]string{ - {"a", "a=name", "10 a=name\n"}, // Test case involving multiple acceptable lengths - {"a", "a=name", "9 a=name\n"}, // Test case involving multiple acceptable length - {"mtime", "mtime=1350244992.023960108", "30 mtime=1350244992.023960108\n"}} - for _, test := range paxTests { - key, expected, raw := test[0], test[1], test[2] - reader := bytes.NewReader([]byte(raw)) - headers, err := parsePAX(reader) - if err != nil { - t.Errorf("Couldn't parse correctly formatted headers: %v", err) - continue - } - if strings.EqualFold(headers[key], expected) { - t.Errorf("mtime header incorrectly parsed: got %s, wanted %s", headers[key], expected) - continue - } - trailer := make([]byte, 100) - n, err := reader.Read(trailer) - if err != io.EOF || n != 0 { - t.Error("Buffer wasn't consumed") - } - } - badHeaderTests := [][]byte{ - []byte("3 somelongkey=\n"), - []byte("50 tooshort=\n"), - } - for _, test := range badHeaderTests { - if _, err := parsePAX(bytes.NewReader(test)); err != ErrHeader { - t.Fatal("Unexpected success when parsing bad header") - } - } -} - -func TestParsePAXTime(t *testing.T) { - // Some valid PAX time values - timestamps := map[string]time.Time{ - "1350244992.023960108": time.Unix(1350244992, 23960108), // The common case - "1350244992.02396010": time.Unix(1350244992, 23960100), // Lower precision value - "1350244992.0239601089": time.Unix(1350244992, 23960108), // Higher precision value - "1350244992": time.Unix(1350244992, 0), // Low precision value - } - for input, expected := range timestamps { - ts, err := parsePAXTime(input) - if err != nil { - t.Fatal(err) - } - if !ts.Equal(expected) { - t.Fatalf("Time parsing failure %s %s", ts, expected) - } - } -} - -func TestMergePAX(t *testing.T) { - hdr := new(Header) - // Test a string, integer, and time based value. - headers := map[string]string{ - "path": "a/b/c", - "uid": "1000", - "mtime": "1350244992.023960108", - } - err := mergePAX(hdr, headers) - if err != nil { - t.Fatal(err) - } - want := &Header{ - Name: "a/b/c", - Uid: 1000, - ModTime: time.Unix(1350244992, 23960108), - } - if !reflect.DeepEqual(hdr, want) { - t.Errorf("incorrect merge: got %+v, want %+v", hdr, want) - } -} - -func TestSparseFileReader(t *testing.T) { - var vectors = []struct { - realSize int64 // Real size of the output file - sparseMap []sparseEntry // Input sparse map - sparseData string // Input compact data - expected string // Expected output data - err error // Expected error outcome + vectors := []struct { + file string + cases []testCase }{{ - realSize: 8, - sparseMap: []sparseEntry{ - {offset: 0, numBytes: 2}, - {offset: 5, numBytes: 3}, + file: "testdata/gnu.tar", + cases: []testCase{ + {4, "Kilt"}, + {6, "Google"}, }, - sparseData: "abcde", - expected: "ab\x00\x00\x00cde", }, { - realSize: 10, - sparseMap: []sparseEntry{ - {offset: 0, numBytes: 2}, - {offset: 5, numBytes: 3}, + file: "testdata/sparse-formats.tar", + cases: []testCase{ + {2, "\x00G"}, + {4, "\x00G\x00o"}, + {6, "\x00G\x00o\x00G"}, + {8, "\x00G\x00o\x00G\x00o"}, + {4, "end\n"}, }, - sparseData: "abcde", - expected: "ab\x00\x00\x00cde\x00\x00", - }, { - realSize: 8, - sparseMap: []sparseEntry{ - {offset: 1, numBytes: 3}, - {offset: 6, numBytes: 2}, - }, - sparseData: "abcde", - expected: "\x00abc\x00\x00de", - }, { - realSize: 8, - sparseMap: []sparseEntry{ - {offset: 1, numBytes: 3}, - {offset: 6, numBytes: 0}, - {offset: 6, numBytes: 0}, - {offset: 6, numBytes: 2}, - }, - sparseData: "abcde", - expected: "\x00abc\x00\x00de", - }, { - realSize: 10, - sparseMap: []sparseEntry{ - {offset: 1, numBytes: 3}, - {offset: 6, numBytes: 2}, - }, - sparseData: "abcde", - expected: "\x00abc\x00\x00de\x00\x00", - }, { - realSize: 10, - sparseMap: []sparseEntry{ - {offset: 1, numBytes: 3}, - {offset: 6, numBytes: 2}, - {offset: 8, numBytes: 0}, - {offset: 8, numBytes: 0}, - {offset: 8, numBytes: 0}, - {offset: 8, numBytes: 0}, - }, - sparseData: "abcde", - expected: "\x00abc\x00\x00de\x00\x00", - }, { - realSize: 2, - sparseMap: []sparseEntry{}, - sparseData: "", - expected: "\x00\x00", - }, { - realSize: -2, - sparseMap: []sparseEntry{}, - err: ErrHeader, - }, { - realSize: -10, - sparseMap: []sparseEntry{ - {offset: 1, numBytes: 3}, - {offset: 6, numBytes: 2}, - }, - sparseData: "abcde", - err: ErrHeader, - }, { - realSize: 10, - sparseMap: []sparseEntry{ - {offset: 1, numBytes: 3}, - {offset: 6, numBytes: 5}, - }, - sparseData: "abcde", - err: ErrHeader, - }, { - realSize: 35, - sparseMap: []sparseEntry{ - {offset: 1, numBytes: 3}, - {offset: 6, numBytes: 5}, - }, - sparseData: "abcde", - err: io.ErrUnexpectedEOF, - }, { - realSize: 35, - sparseMap: []sparseEntry{ - {offset: 1, numBytes: 3}, - {offset: 6, numBytes: -5}, - }, - sparseData: "abcde", - err: ErrHeader, - }, { - realSize: 35, - sparseMap: []sparseEntry{ - {offset: math.MaxInt64, numBytes: 3}, - {offset: 6, numBytes: -5}, - }, - sparseData: "abcde", - err: ErrHeader, - }, { - realSize: 10, - sparseMap: []sparseEntry{ - {offset: 1, numBytes: 3}, - {offset: 2, numBytes: 2}, - }, - sparseData: "abcde", - err: ErrHeader, }} - for i, v := range vectors { - r := bytes.NewReader([]byte(v.sparseData)) - rfr := ®FileReader{r: r, nb: int64(len(v.sparseData))} + for _, v := range vectors { + t.Run(path.Base(v.file), func(t *testing.T) { + f, err := os.Open(v.file) + if err != nil { + t.Fatalf("Open() error: %v", err) + } + defer f.Close() - var sfr *sparseFileReader - var err error - var buf []byte + tr := NewReader(f) + for i, tc := range v.cases { + hdr, err := tr.Next() + if err != nil || hdr == nil { + t.Fatalf("entry %d, Next(): got %v, want %v", i, err, nil) + } + buf := make([]byte, tc.cnt) + if _, err := io.ReadFull(tr, buf); err != nil { + t.Fatalf("entry %d, ReadFull(): got %v, want %v", i, err, nil) + } + if string(buf) != tc.output { + t.Fatalf("entry %d, ReadFull(): got %q, want %q", i, string(buf), tc.output) + } + } - sfr, err = newSparseFileReader(rfr, v.sparseMap, v.realSize) - if err != nil { - goto fail - } - if sfr.numBytes() != int64(len(v.sparseData)) { - t.Errorf("test %d, numBytes() before reading: got %d, want %d", i, sfr.numBytes(), len(v.sparseData)) - } - buf, err = ioutil.ReadAll(sfr) - if err != nil { - goto fail - } - if string(buf) != v.expected { - t.Errorf("test %d, ReadAll(): got %q, want %q", i, string(buf), v.expected) - } - if sfr.numBytes() != 0 { - t.Errorf("test %d, numBytes() after reading: got %d, want %d", i, sfr.numBytes(), 0) - } - - fail: - if err != v.err { - t.Errorf("test %d, unexpected error: got %v, want %v", i, err, v.err) - } - } -} - -func TestReadGNUSparseMap0x1(t *testing.T) { - const ( - maxUint = ^uint(0) - maxInt = int(maxUint >> 1) - ) - var ( - big1 = fmt.Sprintf("%d", int64(maxInt)) - big2 = fmt.Sprintf("%d", (int64(maxInt)/2)+1) - big3 = fmt.Sprintf("%d", (int64(maxInt) / 3)) - ) - - var vectors = []struct { - extHdrs map[string]string // Input data - sparseMap []sparseEntry // Expected sparse entries to be outputted - err error // Expected errors that may be raised - }{{ - extHdrs: map[string]string{paxGNUSparseNumBlocks: "-4"}, - err: ErrHeader, - }, { - extHdrs: map[string]string{paxGNUSparseNumBlocks: "fee "}, - err: ErrHeader, - }, { - extHdrs: map[string]string{ - paxGNUSparseNumBlocks: big1, - paxGNUSparseMap: "0,5,10,5,20,5,30,5", - }, - err: ErrHeader, - }, { - extHdrs: map[string]string{ - paxGNUSparseNumBlocks: big2, - paxGNUSparseMap: "0,5,10,5,20,5,30,5", - }, - err: ErrHeader, - }, { - extHdrs: map[string]string{ - paxGNUSparseNumBlocks: big3, - paxGNUSparseMap: "0,5,10,5,20,5,30,5", - }, - err: ErrHeader, - }, { - extHdrs: map[string]string{ - paxGNUSparseNumBlocks: "4", - paxGNUSparseMap: "0.5,5,10,5,20,5,30,5", - }, - err: ErrHeader, - }, { - extHdrs: map[string]string{ - paxGNUSparseNumBlocks: "4", - paxGNUSparseMap: "0,5.5,10,5,20,5,30,5", - }, - err: ErrHeader, - }, { - extHdrs: map[string]string{ - paxGNUSparseNumBlocks: "4", - paxGNUSparseMap: "0,fewafewa.5,fewafw,5,20,5,30,5", - }, - err: ErrHeader, - }, { - extHdrs: map[string]string{ - paxGNUSparseNumBlocks: "4", - paxGNUSparseMap: "0,5,10,5,20,5,30,5", - }, - sparseMap: []sparseEntry{{0, 5}, {10, 5}, {20, 5}, {30, 5}}, - }} - - for i, v := range vectors { - sp, err := readGNUSparseMap0x1(v.extHdrs) - if !reflect.DeepEqual(sp, v.sparseMap) && !(len(sp) == 0 && len(v.sparseMap) == 0) { - t.Errorf("test %d, readGNUSparseMap0x1(...): got %v, want %v", i, sp, v.sparseMap) - } - if err != v.err { - t.Errorf("test %d, unexpected error: got %v, want %v", i, err, v.err) - } - } -} - -func TestReadGNUSparseMap1x0(t *testing.T) { - var sp = []sparseEntry{{1, 2}, {3, 4}} - for i := 0; i < 98; i++ { - sp = append(sp, sparseEntry{54321, 12345}) - } - - var vectors = []struct { - input string // Input data - sparseMap []sparseEntry // Expected sparse entries to be outputted - cnt int // Expected number of bytes read - err error // Expected errors that may be raised - }{{ - input: "", - cnt: 0, - err: io.ErrUnexpectedEOF, - }, { - input: "ab", - cnt: 2, - err: io.ErrUnexpectedEOF, - }, { - input: strings.Repeat("\x00", 512), - cnt: 512, - err: io.ErrUnexpectedEOF, - }, { - input: strings.Repeat("\x00", 511) + "\n", - cnt: 512, - err: ErrHeader, - }, { - input: strings.Repeat("\n", 512), - cnt: 512, - err: ErrHeader, - }, { - input: "0\n" + strings.Repeat("\x00", 510) + strings.Repeat("a", 512), - sparseMap: []sparseEntry{}, - cnt: 512, - }, { - input: strings.Repeat("0", 512) + "0\n" + strings.Repeat("\x00", 510), - sparseMap: []sparseEntry{}, - cnt: 1024, - }, { - input: strings.Repeat("0", 1024) + "1\n2\n3\n" + strings.Repeat("\x00", 506), - sparseMap: []sparseEntry{{2, 3}}, - cnt: 1536, - }, { - input: strings.Repeat("0", 1024) + "1\n2\n\n" + strings.Repeat("\x00", 509), - cnt: 1536, - err: ErrHeader, - }, { - input: strings.Repeat("0", 1024) + "1\n2\n" + strings.Repeat("\x00", 508), - cnt: 1536, - err: io.ErrUnexpectedEOF, - }, { - input: "-1\n2\n\n" + strings.Repeat("\x00", 506), - cnt: 512, - err: ErrHeader, - }, { - input: "1\nk\n2\n" + strings.Repeat("\x00", 506), - cnt: 512, - err: ErrHeader, - }, { - input: "100\n1\n2\n3\n4\n" + strings.Repeat("54321\n0000000000000012345\n", 98) + strings.Repeat("\x00", 512), - cnt: 2560, - sparseMap: sp, - }} - - for i, v := range vectors { - r := strings.NewReader(v.input) - sp, err := readGNUSparseMap1x0(r) - if !reflect.DeepEqual(sp, v.sparseMap) && !(len(sp) == 0 && len(v.sparseMap) == 0) { - t.Errorf("test %d, readGNUSparseMap1x0(...): got %v, want %v", i, sp, v.sparseMap) - } - if numBytes := len(v.input) - r.Len(); numBytes != v.cnt { - t.Errorf("test %d, bytes read: got %v, want %v", i, numBytes, v.cnt) - } - if err != v.err { - t.Errorf("test %d, unexpected error: got %v, want %v", i, err, v.err) - } + if _, err := tr.Next(); err != io.EOF { + t.Fatalf("Next(): got %v, want EOF", err) + } + }) } } func TestUninitializedRead(t *testing.T) { - test := gnuTarTest - f, err := os.Open(test.file) + f, err := os.Open("testdata/gnu.tar") if err != nil { t.Fatalf("Unexpected error: %v", err) } @@ -867,7 +784,7 @@ func TestReadTruncation(t *testing.T) { data2 += strings.Repeat("\x00", 10*512) trash := strings.Repeat("garbage ", 64) // Exactly 512 bytes - var vectors = []struct { + vectors := []struct { input string // Input stream cnt int // Expected number of headers read err error // Expected error outcome @@ -903,8 +820,7 @@ func TestReadTruncation(t *testing.T) { {pax + trash[:1], 0, io.ErrUnexpectedEOF}, {pax + trash[:511], 0, io.ErrUnexpectedEOF}, {sparse[:511], 0, io.ErrUnexpectedEOF}, - // TODO(dsnet): This should pass, but currently fails. - // {sparse[:512], 0, io.ErrUnexpectedEOF}, + {sparse[:512], 0, io.ErrUnexpectedEOF}, {sparse[:3584], 1, io.EOF}, {sparse[:9200], 1, io.EOF}, // Terminate in padding of sparse header {sparse[:9216], 1, io.EOF}, @@ -949,17 +865,17 @@ func TestReadTruncation(t *testing.T) { } cnt++ if s2 == "manual" { - if _, err = io.Copy(ioutil.Discard, tr); err != nil { + if _, err = tr.writeTo(ioutil.Discard); err != nil { break } } } if err != v.err { - t.Errorf("test %d, NewReader(%s(...)) with %s discard: got %v, want %v", + t.Errorf("test %d, NewReader(%s) with %s discard: got %v, want %v", i, s1, s2, err, v.err) } if cnt != v.cnt { - t.Errorf("test %d, NewReader(%s(...)) with %s discard: got %d headers, want %d headers", + t.Errorf("test %d, NewReader(%s) with %s discard: got %d headers, want %d headers", i, s1, s2, cnt, v.cnt) } } @@ -1001,7 +917,7 @@ func TestReadHeaderOnly(t *testing.T) { t.Fatalf("len(hdrs): got %d, want %d", len(hdrs), 16) } for i := 0; i < 8; i++ { - var hdr1, hdr2 = hdrs[i+0], hdrs[i+8] + hdr1, hdr2 := hdrs[i+0], hdrs[i+8] hdr1.Size, hdr2.Size = 0, 0 if !reflect.DeepEqual(*hdr1, *hdr2) { t.Errorf("incorrect header:\ngot %+v\nwant %+v", *hdr1, *hdr2) @@ -1009,116 +925,687 @@ func TestReadHeaderOnly(t *testing.T) { } } -func TestParsePAXRecord(t *testing.T) { - var medName = strings.Repeat("CD", 50) - var longName = strings.Repeat("AB", 100) +func TestMergePAX(t *testing.T) { + vectors := []struct { + in map[string]string + want *Header + ok bool + }{{ + in: map[string]string{ + "path": "a/b/c", + "uid": "1000", + "mtime": "1350244992.023960108", + }, + want: &Header{ + Name: "a/b/c", + Uid: 1000, + ModTime: time.Unix(1350244992, 23960108), + PAXRecords: map[string]string{ + "path": "a/b/c", + "uid": "1000", + "mtime": "1350244992.023960108", + }, + }, + ok: true, + }, { + in: map[string]string{ + "gid": "gtgergergersagersgers", + }, + ok: false, + }, { + in: map[string]string{ + "missing": "missing", + "SCHILY.xattr.key": "value", + }, + want: &Header{ + Xattrs: map[string]string{"key": "value"}, + PAXRecords: map[string]string{ + "missing": "missing", + "SCHILY.xattr.key": "value", + }, + }, + ok: true, + }} - var vectors = []struct { - input string - residual string - outputKey string - outputVal string - ok bool - }{ - {"6 k=v\n\n", "\n", "k", "v", true}, - {"19 path=/etc/hosts\n", "", "path", "/etc/hosts", true}, - {"210 path=" + longName + "\nabc", "abc", "path", longName, true}, - {"110 path=" + medName + "\n", "", "path", medName, true}, - {"9 foo=ba\n", "", "foo", "ba", true}, - {"11 foo=bar\n\x00", "\x00", "foo", "bar", true}, - {"18 foo=b=\nar=\n==\x00\n", "", "foo", "b=\nar=\n==\x00", true}, - {"27 foo=hello9 foo=ba\nworld\n", "", "foo", "hello9 foo=ba\nworld", true}, - {"27 ☺☻☹=日a本b語ç\nmeow mix", "meow mix", "☺☻☹", "日a本b語ç", true}, - {"17 \x00hello=\x00world\n", "", "\x00hello", "\x00world", true}, - {"1 k=1\n", "1 k=1\n", "", "", false}, - {"6 k~1\n", "6 k~1\n", "", "", false}, - {"6_k=1\n", "6_k=1\n", "", "", false}, - {"6 k=1 ", "6 k=1 ", "", "", false}, - {"632 k=1\n", "632 k=1\n", "", "", false}, - {"16 longkeyname=hahaha\n", "16 longkeyname=hahaha\n", "", "", false}, - {"3 somelongkey=\n", "3 somelongkey=\n", "", "", false}, - {"50 tooshort=\n", "50 tooshort=\n", "", "", false}, - } - - for _, v := range vectors { - key, val, res, err := parsePAXRecord(v.input) - ok := (err == nil) - if v.ok != ok { - if v.ok { - t.Errorf("parsePAXRecord(%q): got parsing failure, want success", v.input) - } else { - t.Errorf("parsePAXRecord(%q): got parsing success, want failure", v.input) - } + for i, v := range vectors { + got := new(Header) + err := mergePAX(got, v.in) + if v.ok && !reflect.DeepEqual(*got, *v.want) { + t.Errorf("test %d, mergePAX(...):\ngot %+v\nwant %+v", i, *got, *v.want) } - if ok && (key != v.outputKey || val != v.outputVal) { - t.Errorf("parsePAXRecord(%q): got (%q: %q), want (%q: %q)", - v.input, key, val, v.outputKey, v.outputVal) - } - if res != v.residual { - t.Errorf("parsePAXRecord(%q): got residual %q, want residual %q", - v.input, res, v.residual) + if ok := err == nil; ok != v.ok { + t.Errorf("test %d, mergePAX(...): got %v, want %v", i, ok, v.ok) } } } -func TestParseNumeric(t *testing.T) { - var vectors = []struct { - input string - output int64 - ok bool +func TestParsePAX(t *testing.T) { + vectors := []struct { + in string + want map[string]string + ok bool }{ - // Test base-256 (binary) encoded values. - {"", 0, true}, - {"\x80", 0, true}, - {"\x80\x00", 0, true}, - {"\x80\x00\x00", 0, true}, - {"\xbf", (1 << 6) - 1, true}, - {"\xbf\xff", (1 << 14) - 1, true}, - {"\xbf\xff\xff", (1 << 22) - 1, true}, - {"\xff", -1, true}, - {"\xff\xff", -1, true}, - {"\xff\xff\xff", -1, true}, - {"\xc0", -1 * (1 << 6), true}, - {"\xc0\x00", -1 * (1 << 14), true}, - {"\xc0\x00\x00", -1 * (1 << 22), true}, - {"\x87\x76\xa2\x22\xeb\x8a\x72\x61", 537795476381659745, true}, - {"\x80\x00\x00\x00\x07\x76\xa2\x22\xeb\x8a\x72\x61", 537795476381659745, true}, - {"\xf7\x76\xa2\x22\xeb\x8a\x72\x61", -615126028225187231, true}, - {"\xff\xff\xff\xff\xf7\x76\xa2\x22\xeb\x8a\x72\x61", -615126028225187231, true}, - {"\x80\x7f\xff\xff\xff\xff\xff\xff\xff", math.MaxInt64, true}, - {"\x80\x80\x00\x00\x00\x00\x00\x00\x00", 0, false}, - {"\xff\x80\x00\x00\x00\x00\x00\x00\x00", math.MinInt64, true}, - {"\xff\x7f\xff\xff\xff\xff\xff\xff\xff", 0, false}, - {"\xf5\xec\xd1\xc7\x7e\x5f\x26\x48\x81\x9f\x8f\x9b", 0, false}, - - // Test base-8 (octal) encoded values. - {"0000000\x00", 0, true}, - {" \x0000000\x00", 0, true}, - {" \x0000003\x00", 3, true}, - {"00000000227\x00", 0227, true}, - {"032033\x00 ", 032033, true}, - {"320330\x00 ", 0320330, true}, - {"0000660\x00 ", 0660, true}, - {"\x00 0000660\x00 ", 0660, true}, - {"0123456789abcdef", 0, false}, - {"0123456789\x00abcdef", 0, false}, - {"01234567\x0089abcdef", 342391, true}, - {"0123\x7e\x5f\x264123", 0, false}, + {"", nil, true}, + {"6 k=1\n", map[string]string{"k": "1"}, true}, + {"10 a=name\n", map[string]string{"a": "name"}, true}, + {"9 a=name\n", map[string]string{"a": "name"}, true}, + {"30 mtime=1350244992.023960108\n", map[string]string{"mtime": "1350244992.023960108"}, true}, + {"3 somelongkey=\n", nil, false}, + {"50 tooshort=\n", nil, false}, + {"13 key1=haha\n13 key2=nana\n13 key3=kaka\n", + map[string]string{"key1": "haha", "key2": "nana", "key3": "kaka"}, true}, + {"13 key1=val1\n13 key2=val2\n8 key1=\n", + map[string]string{"key1": "", "key2": "val2"}, true}, + {"22 GNU.sparse.size=10\n26 GNU.sparse.numblocks=2\n" + + "23 GNU.sparse.offset=1\n25 GNU.sparse.numbytes=2\n" + + "23 GNU.sparse.offset=3\n25 GNU.sparse.numbytes=4\n", + map[string]string{paxGNUSparseSize: "10", paxGNUSparseNumBlocks: "2", paxGNUSparseMap: "1,2,3,4"}, true}, + {"22 GNU.sparse.size=10\n26 GNU.sparse.numblocks=1\n" + + "25 GNU.sparse.numbytes=2\n23 GNU.sparse.offset=1\n", + nil, false}, + {"22 GNU.sparse.size=10\n26 GNU.sparse.numblocks=1\n" + + "25 GNU.sparse.offset=1,2\n25 GNU.sparse.numbytes=2\n", + nil, false}, } - for _, v := range vectors { - var p parser - num := p.parseNumeric([]byte(v.input)) - ok := (p.err == nil) - if v.ok != ok { - if v.ok { - t.Errorf("parseNumeric(%q): got parsing failure, want success", v.input) - } else { - t.Errorf("parseNumeric(%q): got parsing success, want failure", v.input) - } + for i, v := range vectors { + r := strings.NewReader(v.in) + got, err := parsePAX(r) + if !reflect.DeepEqual(got, v.want) && !(len(got) == 0 && len(v.want) == 0) { + t.Errorf("test %d, parsePAX():\ngot %v\nwant %v", i, got, v.want) } - if ok && num != v.output { - t.Errorf("parseNumeric(%q): got %d, want %d", v.input, num, v.output) + if ok := err == nil; ok != v.ok { + t.Errorf("test %d, parsePAX(): got %v, want %v", i, ok, v.ok) + } + } +} + +func TestReadOldGNUSparseMap(t *testing.T) { + populateSparseMap := func(sa sparseArray, sps []string) []string { + for i := 0; len(sps) > 0 && i < sa.MaxEntries(); i++ { + copy(sa.Entry(i), sps[0]) + sps = sps[1:] + } + if len(sps) > 0 { + copy(sa.IsExtended(), "\x80") + } + return sps + } + + makeInput := func(format Format, size string, sps ...string) (out []byte) { + // Write the initial GNU header. + var blk block + gnu := blk.GNU() + sparse := gnu.Sparse() + copy(gnu.RealSize(), size) + sps = populateSparseMap(sparse, sps) + if format != FormatUnknown { + blk.SetFormat(format) + } + out = append(out, blk[:]...) + + // Write extended sparse blocks. + for len(sps) > 0 { + var blk block + sps = populateSparseMap(blk.Sparse(), sps) + out = append(out, blk[:]...) + } + return out + } + + makeSparseStrings := func(sp []sparseEntry) (out []string) { + var f formatter + for _, s := range sp { + var b [24]byte + f.formatNumeric(b[:12], s.Offset) + f.formatNumeric(b[12:], s.Length) + out = append(out, string(b[:])) + } + return out + } + + vectors := []struct { + input []byte + wantMap sparseDatas + wantSize int64 + wantErr error + }{{ + input: makeInput(FormatUnknown, ""), + wantErr: ErrHeader, + }, { + input: makeInput(FormatGNU, "1234", "fewa"), + wantSize: 01234, + wantErr: ErrHeader, + }, { + input: makeInput(FormatGNU, "0031"), + wantSize: 031, + }, { + input: makeInput(FormatGNU, "80"), + wantErr: ErrHeader, + }, { + input: makeInput(FormatGNU, "1234", + makeSparseStrings(sparseDatas{{0, 0}, {1, 1}})...), + wantMap: sparseDatas{{0, 0}, {1, 1}}, + wantSize: 01234, + }, { + input: makeInput(FormatGNU, "1234", + append(makeSparseStrings(sparseDatas{{0, 0}, {1, 1}}), []string{"", "blah"}...)...), + wantMap: sparseDatas{{0, 0}, {1, 1}}, + wantSize: 01234, + }, { + input: makeInput(FormatGNU, "3333", + makeSparseStrings(sparseDatas{{0, 1}, {2, 1}, {4, 1}, {6, 1}})...), + wantMap: sparseDatas{{0, 1}, {2, 1}, {4, 1}, {6, 1}}, + wantSize: 03333, + }, { + input: makeInput(FormatGNU, "", + append(append( + makeSparseStrings(sparseDatas{{0, 1}, {2, 1}}), + []string{"", ""}...), + makeSparseStrings(sparseDatas{{4, 1}, {6, 1}})...)...), + wantMap: sparseDatas{{0, 1}, {2, 1}, {4, 1}, {6, 1}}, + }, { + input: makeInput(FormatGNU, "", + makeSparseStrings(sparseDatas{{0, 1}, {2, 1}, {4, 1}, {6, 1}, {8, 1}, {10, 1}})...)[:blockSize], + wantErr: io.ErrUnexpectedEOF, + }, { + input: makeInput(FormatGNU, "", + makeSparseStrings(sparseDatas{{0, 1}, {2, 1}, {4, 1}, {6, 1}, {8, 1}, {10, 1}})...)[:3*blockSize/2], + wantErr: io.ErrUnexpectedEOF, + }, { + input: makeInput(FormatGNU, "", + makeSparseStrings(sparseDatas{{0, 1}, {2, 1}, {4, 1}, {6, 1}, {8, 1}, {10, 1}})...), + wantMap: sparseDatas{{0, 1}, {2, 1}, {4, 1}, {6, 1}, {8, 1}, {10, 1}}, + }, { + input: makeInput(FormatGNU, "", + makeSparseStrings(sparseDatas{{10 << 30, 512}, {20 << 30, 512}})...), + wantMap: sparseDatas{{10 << 30, 512}, {20 << 30, 512}}, + }} + + for i, v := range vectors { + var blk block + var hdr Header + v.input = v.input[copy(blk[:], v.input):] + tr := Reader{r: bytes.NewReader(v.input)} + got, err := tr.readOldGNUSparseMap(&hdr, &blk) + if !equalSparseEntries(got, v.wantMap) { + t.Errorf("test %d, readOldGNUSparseMap(): got %v, want %v", i, got, v.wantMap) + } + if err != v.wantErr { + t.Errorf("test %d, readOldGNUSparseMap() = %v, want %v", i, err, v.wantErr) + } + if hdr.Size != v.wantSize { + t.Errorf("test %d, Header.Size = %d, want %d", i, hdr.Size, v.wantSize) + } + } +} + +func TestReadGNUSparsePAXHeaders(t *testing.T) { + padInput := func(s string) string { + return s + string(zeroBlock[:blockPadding(int64(len(s)))]) + } + + vectors := []struct { + inputData string + inputHdrs map[string]string + wantMap sparseDatas + wantSize int64 + wantName string + wantErr error + }{{ + inputHdrs: nil, + wantErr: nil, + }, { + inputHdrs: map[string]string{ + paxGNUSparseNumBlocks: strconv.FormatInt(math.MaxInt64, 10), + paxGNUSparseMap: "0,1,2,3", + }, + wantErr: ErrHeader, + }, { + inputHdrs: map[string]string{ + paxGNUSparseNumBlocks: "4\x00", + paxGNUSparseMap: "0,1,2,3", + }, + wantErr: ErrHeader, + }, { + inputHdrs: map[string]string{ + paxGNUSparseNumBlocks: "4", + paxGNUSparseMap: "0,1,2,3", + }, + wantErr: ErrHeader, + }, { + inputHdrs: map[string]string{ + paxGNUSparseNumBlocks: "2", + paxGNUSparseMap: "0,1,2,3", + }, + wantMap: sparseDatas{{0, 1}, {2, 3}}, + }, { + inputHdrs: map[string]string{ + paxGNUSparseNumBlocks: "2", + paxGNUSparseMap: "0, 1,2,3", + }, + wantErr: ErrHeader, + }, { + inputHdrs: map[string]string{ + paxGNUSparseNumBlocks: "2", + paxGNUSparseMap: "0,1,02,3", + paxGNUSparseRealSize: "4321", + }, + wantMap: sparseDatas{{0, 1}, {2, 3}}, + wantSize: 4321, + }, { + inputHdrs: map[string]string{ + paxGNUSparseNumBlocks: "2", + paxGNUSparseMap: "0,one1,2,3", + }, + wantErr: ErrHeader, + }, { + inputHdrs: map[string]string{ + paxGNUSparseMajor: "0", + paxGNUSparseMinor: "0", + paxGNUSparseNumBlocks: "2", + paxGNUSparseMap: "0,1,2,3", + paxGNUSparseSize: "1234", + paxGNUSparseRealSize: "4321", + paxGNUSparseName: "realname", + }, + wantMap: sparseDatas{{0, 1}, {2, 3}}, + wantSize: 1234, + wantName: "realname", + }, { + inputHdrs: map[string]string{ + paxGNUSparseMajor: "0", + paxGNUSparseMinor: "0", + paxGNUSparseNumBlocks: "1", + paxGNUSparseMap: "10737418240,512", + paxGNUSparseSize: "10737418240", + paxGNUSparseName: "realname", + }, + wantMap: sparseDatas{{10737418240, 512}}, + wantSize: 10737418240, + wantName: "realname", + }, { + inputHdrs: map[string]string{ + paxGNUSparseMajor: "0", + paxGNUSparseMinor: "0", + paxGNUSparseNumBlocks: "0", + paxGNUSparseMap: "", + }, + wantMap: sparseDatas{}, + }, { + inputHdrs: map[string]string{ + paxGNUSparseMajor: "0", + paxGNUSparseMinor: "1", + paxGNUSparseNumBlocks: "4", + paxGNUSparseMap: "0,5,10,5,20,5,30,5", + }, + wantMap: sparseDatas{{0, 5}, {10, 5}, {20, 5}, {30, 5}}, + }, { + inputHdrs: map[string]string{ + paxGNUSparseMajor: "1", + paxGNUSparseMinor: "0", + paxGNUSparseNumBlocks: "4", + paxGNUSparseMap: "0,5,10,5,20,5,30,5", + }, + wantErr: io.ErrUnexpectedEOF, + }, { + inputData: padInput("0\n"), + inputHdrs: map[string]string{paxGNUSparseMajor: "1", paxGNUSparseMinor: "0"}, + wantMap: sparseDatas{}, + }, { + inputData: padInput("0\n")[:blockSize-1] + "#", + inputHdrs: map[string]string{paxGNUSparseMajor: "1", paxGNUSparseMinor: "0"}, + wantMap: sparseDatas{}, + }, { + inputData: padInput("0"), + inputHdrs: map[string]string{paxGNUSparseMajor: "1", paxGNUSparseMinor: "0"}, + wantErr: io.ErrUnexpectedEOF, + }, { + inputData: padInput("ab\n"), + inputHdrs: map[string]string{paxGNUSparseMajor: "1", paxGNUSparseMinor: "0"}, + wantErr: ErrHeader, + }, { + inputData: padInput("1\n2\n3\n"), + inputHdrs: map[string]string{paxGNUSparseMajor: "1", paxGNUSparseMinor: "0"}, + wantMap: sparseDatas{{2, 3}}, + }, { + inputData: padInput("1\n2\n"), + inputHdrs: map[string]string{paxGNUSparseMajor: "1", paxGNUSparseMinor: "0"}, + wantErr: io.ErrUnexpectedEOF, + }, { + inputData: padInput("1\n2\n\n"), + inputHdrs: map[string]string{paxGNUSparseMajor: "1", paxGNUSparseMinor: "0"}, + wantErr: ErrHeader, + }, { + inputData: string(zeroBlock[:]) + padInput("0\n"), + inputHdrs: map[string]string{paxGNUSparseMajor: "1", paxGNUSparseMinor: "0"}, + wantErr: ErrHeader, + }, { + inputData: strings.Repeat("0", blockSize) + padInput("1\n5\n1\n"), + inputHdrs: map[string]string{paxGNUSparseMajor: "1", paxGNUSparseMinor: "0"}, + wantMap: sparseDatas{{5, 1}}, + }, { + inputData: padInput(fmt.Sprintf("%d\n", int64(math.MaxInt64))), + inputHdrs: map[string]string{paxGNUSparseMajor: "1", paxGNUSparseMinor: "0"}, + wantErr: ErrHeader, + }, { + inputData: padInput(strings.Repeat("0", 300) + "1\n" + strings.Repeat("0", 1000) + "5\n" + strings.Repeat("0", 800) + "2\n"), + inputHdrs: map[string]string{paxGNUSparseMajor: "1", paxGNUSparseMinor: "0"}, + wantMap: sparseDatas{{5, 2}}, + }, { + inputData: padInput("2\n10737418240\n512\n21474836480\n512\n"), + inputHdrs: map[string]string{paxGNUSparseMajor: "1", paxGNUSparseMinor: "0"}, + wantMap: sparseDatas{{10737418240, 512}, {21474836480, 512}}, + }, { + inputData: padInput("100\n" + func() string { + var ss []string + for i := 0; i < 100; i++ { + ss = append(ss, fmt.Sprintf("%d\n%d\n", int64(i)<<30, 512)) + } + return strings.Join(ss, "") + }()), + inputHdrs: map[string]string{paxGNUSparseMajor: "1", paxGNUSparseMinor: "0"}, + wantMap: func() (spd sparseDatas) { + for i := 0; i < 100; i++ { + spd = append(spd, sparseEntry{int64(i) << 30, 512}) + } + return spd + }(), + }} + + for i, v := range vectors { + var hdr Header + hdr.PAXRecords = v.inputHdrs + r := strings.NewReader(v.inputData + "#") // Add canary byte + tr := Reader{curr: ®FileReader{r, int64(r.Len())}} + got, err := tr.readGNUSparsePAXHeaders(&hdr) + if !equalSparseEntries(got, v.wantMap) { + t.Errorf("test %d, readGNUSparsePAXHeaders(): got %v, want %v", i, got, v.wantMap) + } + if err != v.wantErr { + t.Errorf("test %d, readGNUSparsePAXHeaders() = %v, want %v", i, err, v.wantErr) + } + if hdr.Size != v.wantSize { + t.Errorf("test %d, Header.Size = %d, want %d", i, hdr.Size, v.wantSize) + } + if hdr.Name != v.wantName { + t.Errorf("test %d, Header.Name = %s, want %s", i, hdr.Name, v.wantName) + } + if v.wantErr == nil && r.Len() == 0 { + t.Errorf("test %d, canary byte unexpectedly consumed", i) + } + } +} + +// testNonEmptyReader wraps an io.Reader and ensures that +// Read is never called with an empty buffer. +type testNonEmptyReader struct{ io.Reader } + +func (r testNonEmptyReader) Read(b []byte) (int, error) { + if len(b) == 0 { + return 0, errors.New("unexpected empty Read call") + } + return r.Reader.Read(b) +} + +func TestFileReader(t *testing.T) { + type ( + testRead struct { // Read(cnt) == (wantStr, wantErr) + cnt int + wantStr string + wantErr error + } + testWriteTo struct { // WriteTo(testFile{ops}) == (wantCnt, wantErr) + ops fileOps + wantCnt int64 + wantErr error + } + testRemaining struct { // LogicalRemaining() == wantLCnt, PhysicalRemaining() == wantPCnt + wantLCnt int64 + wantPCnt int64 + } + testFnc interface{} // testRead | testWriteTo | testRemaining + ) + + type ( + makeReg struct { + str string + size int64 + } + makeSparse struct { + makeReg makeReg + spd sparseDatas + size int64 + } + fileMaker interface{} // makeReg | makeSparse + ) + + vectors := []struct { + maker fileMaker + tests []testFnc + }{{ + maker: makeReg{"", 0}, + tests: []testFnc{ + testRemaining{0, 0}, + testRead{0, "", io.EOF}, + testRead{1, "", io.EOF}, + testWriteTo{nil, 0, nil}, + testRemaining{0, 0}, + }, + }, { + maker: makeReg{"", 1}, + tests: []testFnc{ + testRemaining{1, 1}, + testRead{5, "", io.ErrUnexpectedEOF}, + testWriteTo{nil, 0, io.ErrUnexpectedEOF}, + testRemaining{1, 1}, + }, + }, { + maker: makeReg{"hello", 5}, + tests: []testFnc{ + testRemaining{5, 5}, + testRead{5, "hello", io.EOF}, + testRemaining{0, 0}, + }, + }, { + maker: makeReg{"hello, world", 50}, + tests: []testFnc{ + testRemaining{50, 50}, + testRead{7, "hello, ", nil}, + testRemaining{43, 43}, + testRead{5, "world", nil}, + testRemaining{38, 38}, + testWriteTo{nil, 0, io.ErrUnexpectedEOF}, + testRead{1, "", io.ErrUnexpectedEOF}, + testRemaining{38, 38}, + }, + }, { + maker: makeReg{"hello, world", 5}, + tests: []testFnc{ + testRemaining{5, 5}, + testRead{0, "", nil}, + testRead{4, "hell", nil}, + testRemaining{1, 1}, + testWriteTo{fileOps{"o"}, 1, nil}, + testRemaining{0, 0}, + testWriteTo{nil, 0, nil}, + testRead{0, "", io.EOF}, + }, + }, { + maker: makeSparse{makeReg{"abcde", 5}, sparseDatas{{0, 2}, {5, 3}}, 8}, + tests: []testFnc{ + testRemaining{8, 5}, + testRead{3, "ab\x00", nil}, + testRead{10, "\x00\x00cde", io.EOF}, + testRemaining{0, 0}, + }, + }, { + maker: makeSparse{makeReg{"abcde", 5}, sparseDatas{{0, 2}, {5, 3}}, 8}, + tests: []testFnc{ + testRemaining{8, 5}, + testWriteTo{fileOps{"ab", int64(3), "cde"}, 8, nil}, + testRemaining{0, 0}, + }, + }, { + maker: makeSparse{makeReg{"abcde", 5}, sparseDatas{{0, 2}, {5, 3}}, 10}, + tests: []testFnc{ + testRemaining{10, 5}, + testRead{100, "ab\x00\x00\x00cde\x00\x00", io.EOF}, + testRemaining{0, 0}, + }, + }, { + maker: makeSparse{makeReg{"abc", 5}, sparseDatas{{0, 2}, {5, 3}}, 10}, + tests: []testFnc{ + testRemaining{10, 5}, + testRead{100, "ab\x00\x00\x00c", io.ErrUnexpectedEOF}, + testRemaining{4, 2}, + }, + }, { + maker: makeSparse{makeReg{"abcde", 5}, sparseDatas{{1, 3}, {6, 2}}, 8}, + tests: []testFnc{ + testRemaining{8, 5}, + testRead{8, "\x00abc\x00\x00de", io.EOF}, + testRemaining{0, 0}, + }, + }, { + maker: makeSparse{makeReg{"abcde", 5}, sparseDatas{{1, 3}, {6, 0}, {6, 0}, {6, 2}}, 8}, + tests: []testFnc{ + testRemaining{8, 5}, + testRead{8, "\x00abc\x00\x00de", io.EOF}, + testRemaining{0, 0}, + }, + }, { + maker: makeSparse{makeReg{"abcde", 5}, sparseDatas{{1, 3}, {6, 0}, {6, 0}, {6, 2}}, 8}, + tests: []testFnc{ + testRemaining{8, 5}, + testWriteTo{fileOps{int64(1), "abc", int64(2), "de"}, 8, nil}, + testRemaining{0, 0}, + }, + }, { + maker: makeSparse{makeReg{"abcde", 5}, sparseDatas{{1, 3}, {6, 2}}, 10}, + tests: []testFnc{ + testRead{100, "\x00abc\x00\x00de\x00\x00", io.EOF}, + }, + }, { + maker: makeSparse{makeReg{"abcde", 5}, sparseDatas{{1, 3}, {6, 2}}, 10}, + tests: []testFnc{ + testWriteTo{fileOps{int64(1), "abc", int64(2), "de", int64(1), "\x00"}, 10, nil}, + }, + }, { + maker: makeSparse{makeReg{"abcde", 5}, sparseDatas{{1, 3}, {6, 2}, {8, 0}, {8, 0}, {8, 0}, {8, 0}}, 10}, + tests: []testFnc{ + testRead{100, "\x00abc\x00\x00de\x00\x00", io.EOF}, + }, + }, { + maker: makeSparse{makeReg{"", 0}, sparseDatas{}, 2}, + tests: []testFnc{ + testRead{100, "\x00\x00", io.EOF}, + }, + }, { + maker: makeSparse{makeReg{"", 8}, sparseDatas{{1, 3}, {6, 5}}, 15}, + tests: []testFnc{ + testRead{100, "\x00", io.ErrUnexpectedEOF}, + }, + }, { + maker: makeSparse{makeReg{"ab", 2}, sparseDatas{{1, 3}, {6, 5}}, 15}, + tests: []testFnc{ + testRead{100, "\x00ab", errMissData}, + }, + }, { + maker: makeSparse{makeReg{"ab", 8}, sparseDatas{{1, 3}, {6, 5}}, 15}, + tests: []testFnc{ + testRead{100, "\x00ab", io.ErrUnexpectedEOF}, + }, + }, { + maker: makeSparse{makeReg{"abc", 3}, sparseDatas{{1, 3}, {6, 5}}, 15}, + tests: []testFnc{ + testRead{100, "\x00abc\x00\x00", errMissData}, + }, + }, { + maker: makeSparse{makeReg{"abc", 8}, sparseDatas{{1, 3}, {6, 5}}, 15}, + tests: []testFnc{ + testRead{100, "\x00abc\x00\x00", io.ErrUnexpectedEOF}, + }, + }, { + maker: makeSparse{makeReg{"abcde", 5}, sparseDatas{{1, 3}, {6, 5}}, 15}, + tests: []testFnc{ + testRead{100, "\x00abc\x00\x00de", errMissData}, + }, + }, { + maker: makeSparse{makeReg{"abcde", 5}, sparseDatas{{1, 3}, {6, 5}}, 15}, + tests: []testFnc{ + testWriteTo{fileOps{int64(1), "abc", int64(2), "de"}, 8, errMissData}, + }, + }, { + maker: makeSparse{makeReg{"abcde", 8}, sparseDatas{{1, 3}, {6, 5}}, 15}, + tests: []testFnc{ + testRead{100, "\x00abc\x00\x00de", io.ErrUnexpectedEOF}, + }, + }, { + maker: makeSparse{makeReg{"abcdefghEXTRA", 13}, sparseDatas{{1, 3}, {6, 5}}, 15}, + tests: []testFnc{ + testRemaining{15, 13}, + testRead{100, "\x00abc\x00\x00defgh\x00\x00\x00\x00", errUnrefData}, + testWriteTo{nil, 0, errUnrefData}, + testRemaining{0, 5}, + }, + }, { + maker: makeSparse{makeReg{"abcdefghEXTRA", 13}, sparseDatas{{1, 3}, {6, 5}}, 15}, + tests: []testFnc{ + testRemaining{15, 13}, + testWriteTo{fileOps{int64(1), "abc", int64(2), "defgh", int64(4)}, 15, errUnrefData}, + testRead{100, "", errUnrefData}, + testRemaining{0, 5}, + }, + }} + + for i, v := range vectors { + var fr fileReader + switch maker := v.maker.(type) { + case makeReg: + r := testNonEmptyReader{strings.NewReader(maker.str)} + fr = ®FileReader{r, maker.size} + case makeSparse: + if !validateSparseEntries(maker.spd, maker.size) { + t.Fatalf("invalid sparse map: %v", maker.spd) + } + sph := invertSparseEntries(maker.spd, maker.size) + r := testNonEmptyReader{strings.NewReader(maker.makeReg.str)} + fr = ®FileReader{r, maker.makeReg.size} + fr = &sparseFileReader{fr, sph, 0} + default: + t.Fatalf("test %d, unknown make operation: %T", i, maker) + } + + for j, tf := range v.tests { + switch tf := tf.(type) { + case testRead: + b := make([]byte, tf.cnt) + n, err := fr.Read(b) + if got := string(b[:n]); got != tf.wantStr || err != tf.wantErr { + t.Errorf("test %d.%d, Read(%d):\ngot (%q, %v)\nwant (%q, %v)", i, j, tf.cnt, got, err, tf.wantStr, tf.wantErr) + } + case testWriteTo: + f := &testFile{ops: tf.ops} + got, err := fr.WriteTo(f) + if _, ok := err.(testError); ok { + t.Errorf("test %d.%d, WriteTo(): %v", i, j, err) + } else if got != tf.wantCnt || err != tf.wantErr { + t.Errorf("test %d.%d, WriteTo() = (%d, %v), want (%d, %v)", i, j, got, err, tf.wantCnt, tf.wantErr) + } + if len(f.ops) > 0 { + t.Errorf("test %d.%d, expected %d more operations", i, j, len(f.ops)) + } + case testRemaining: + if got := fr.LogicalRemaining(); got != tf.wantLCnt { + t.Errorf("test %d.%d, LogicalRemaining() = %d, want %d", i, j, got, tf.wantLCnt) + } + if got := fr.PhysicalRemaining(); got != tf.wantPCnt { + t.Errorf("test %d.%d, PhysicalRemaining() = %d, want %d", i, j, got, tf.wantPCnt) + } + default: + t.Fatalf("test %d.%d, unknown test operation: %T", i, j, tf) + } } } } diff --git a/archive/tar/stat_atim.go b/archive/tar/stat_actime1.go similarity index 100% rename from archive/tar/stat_atim.go rename to archive/tar/stat_actime1.go diff --git a/archive/tar/stat_atimespec.go b/archive/tar/stat_actime2.go similarity index 100% rename from archive/tar/stat_atimespec.go rename to archive/tar/stat_actime2.go diff --git a/archive/tar/stat_unix.go b/archive/tar/stat_unix.go index cb843db..868105f 100644 --- a/archive/tar/stat_unix.go +++ b/archive/tar/stat_unix.go @@ -8,6 +8,10 @@ package tar import ( "os" + "os/user" + "runtime" + "strconv" + "sync" "syscall" ) @@ -15,6 +19,10 @@ func init() { sysStat = statUnix } +// userMap and groupMap caches UID and GID lookups for performance reasons. +// The downside is that renaming uname or gname by the OS never takes effect. +var userMap, groupMap sync.Map // map[int]string + func statUnix(fi os.FileInfo, h *Header) error { sys, ok := fi.Sys().(*syscall.Stat_t) if !ok { @@ -22,11 +30,67 @@ func statUnix(fi os.FileInfo, h *Header) error { } h.Uid = int(sys.Uid) h.Gid = int(sys.Gid) - // TODO(bradfitz): populate username & group. os/user - // doesn't cache LookupId lookups, and lacks group - // lookup functions. + + // Best effort at populating Uname and Gname. + // The os/user functions may fail for any number of reasons + // (not implemented on that platform, cgo not enabled, etc). + if u, ok := userMap.Load(h.Uid); ok { + h.Uname = u.(string) + } else if u, err := user.LookupId(strconv.Itoa(h.Uid)); err == nil { + h.Uname = u.Username + userMap.Store(h.Uid, h.Uname) + } + if g, ok := groupMap.Load(h.Gid); ok { + h.Gname = g.(string) + } else if g, err := user.LookupGroupId(strconv.Itoa(h.Gid)); err == nil { + h.Gname = g.Name + groupMap.Store(h.Gid, h.Gname) + } + h.AccessTime = statAtime(sys) h.ChangeTime = statCtime(sys) - // TODO(bradfitz): major/minor device numbers? + + // Best effort at populating Devmajor and Devminor. + if h.Typeflag == TypeChar || h.Typeflag == TypeBlock { + dev := uint64(sys.Rdev) // May be int32 or uint32 + switch runtime.GOOS { + case "linux": + // Copied from golang.org/x/sys/unix/dev_linux.go. + major := uint32((dev & 0x00000000000fff00) >> 8) + major |= uint32((dev & 0xfffff00000000000) >> 32) + minor := uint32((dev & 0x00000000000000ff) >> 0) + minor |= uint32((dev & 0x00000ffffff00000) >> 12) + h.Devmajor, h.Devminor = int64(major), int64(minor) + case "darwin": + // Copied from golang.org/x/sys/unix/dev_darwin.go. + major := uint32((dev >> 24) & 0xff) + minor := uint32(dev & 0xffffff) + h.Devmajor, h.Devminor = int64(major), int64(minor) + case "dragonfly": + // Copied from golang.org/x/sys/unix/dev_dragonfly.go. + major := uint32((dev >> 8) & 0xff) + minor := uint32(dev & 0xffff00ff) + h.Devmajor, h.Devminor = int64(major), int64(minor) + case "freebsd": + // Copied from golang.org/x/sys/unix/dev_freebsd.go. + major := uint32((dev >> 8) & 0xff) + minor := uint32(dev & 0xffff00ff) + h.Devmajor, h.Devminor = int64(major), int64(minor) + case "netbsd": + // Copied from golang.org/x/sys/unix/dev_netbsd.go. + major := uint32((dev & 0x000fff00) >> 8) + minor := uint32((dev & 0x000000ff) >> 0) + minor |= uint32((dev & 0xfff00000) >> 12) + h.Devmajor, h.Devminor = int64(major), int64(minor) + case "openbsd": + // Copied from golang.org/x/sys/unix/dev_openbsd.go. + major := uint32((dev & 0x0000ff00) >> 8) + minor := uint32((dev & 0x000000ff) >> 0) + minor |= uint32((dev & 0xffff0000) >> 8) + h.Devmajor, h.Devminor = int64(major), int64(minor) + default: + // TODO: Implement solaris (see https://golang.org/issue/8106) + } + } return nil } diff --git a/archive/tar/strconv.go b/archive/tar/strconv.go new file mode 100644 index 0000000..d144485 --- /dev/null +++ b/archive/tar/strconv.go @@ -0,0 +1,326 @@ +// Copyright 2016 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package tar + +import ( + "bytes" + "fmt" + "strconv" + "strings" + "time" +) + +// hasNUL reports whether the NUL character exists within s. +func hasNUL(s string) bool { + return strings.IndexByte(s, 0) >= 0 +} + +// isASCII reports whether the input is an ASCII C-style string. +func isASCII(s string) bool { + for _, c := range s { + if c >= 0x80 || c == 0x00 { + return false + } + } + return true +} + +// toASCII converts the input to an ASCII C-style string. +// This a best effort conversion, so invalid characters are dropped. +func toASCII(s string) string { + if isASCII(s) { + return s + } + b := make([]byte, 0, len(s)) + for _, c := range s { + if c < 0x80 && c != 0x00 { + b = append(b, byte(c)) + } + } + return string(b) +} + +type parser struct { + err error // Last error seen +} + +type formatter struct { + err error // Last error seen +} + +// parseString parses bytes as a NUL-terminated C-style string. +// If a NUL byte is not found then the whole slice is returned as a string. +func (*parser) parseString(b []byte) string { + if i := bytes.IndexByte(b, 0); i >= 0 { + return string(b[:i]) + } + return string(b) +} + +// formatString copies s into b, NUL-terminating if possible. +func (f *formatter) formatString(b []byte, s string) { + if len(s) > len(b) { + f.err = ErrFieldTooLong + } + copy(b, s) + if len(s) < len(b) { + b[len(s)] = 0 + } + + // Some buggy readers treat regular files with a trailing slash + // in the V7 path field as a directory even though the full path + // recorded elsewhere (e.g., via PAX record) contains no trailing slash. + if len(s) > len(b) && b[len(b)-1] == '/' { + n := len(strings.TrimRight(s[:len(b)], "/")) + b[n] = 0 // Replace trailing slash with NUL terminator + } +} + +// fitsInBase256 reports whether x can be encoded into n bytes using base-256 +// encoding. Unlike octal encoding, base-256 encoding does not require that the +// string ends with a NUL character. Thus, all n bytes are available for output. +// +// If operating in binary mode, this assumes strict GNU binary mode; which means +// that the first byte can only be either 0x80 or 0xff. Thus, the first byte is +// equivalent to the sign bit in two's complement form. +func fitsInBase256(n int, x int64) bool { + binBits := uint(n-1) * 8 + return n >= 9 || (x >= -1< 0 && b[0]&0x80 != 0 { + // Handling negative numbers relies on the following identity: + // -a-1 == ^a + // + // If the number is negative, we use an inversion mask to invert the + // data bytes and treat the value as an unsigned number. + var inv byte // 0x00 if positive or zero, 0xff if negative + if b[0]&0x40 != 0 { + inv = 0xff + } + + var x uint64 + for i, c := range b { + c ^= inv // Inverts c only if inv is 0xff, otherwise does nothing + if i == 0 { + c &= 0x7f // Ignore signal bit in first byte + } + if (x >> 56) > 0 { + p.err = ErrHeader // Integer overflow + return 0 + } + x = x<<8 | uint64(c) + } + if (x >> 63) > 0 { + p.err = ErrHeader // Integer overflow + return 0 + } + if inv == 0xff { + return ^int64(x) + } + return int64(x) + } + + // Normal case is base-8 (octal) format. + return p.parseOctal(b) +} + +// formatNumeric encodes x into b using base-8 (octal) encoding if possible. +// Otherwise it will attempt to use base-256 (binary) encoding. +func (f *formatter) formatNumeric(b []byte, x int64) { + if fitsInOctal(len(b), x) { + f.formatOctal(b, x) + return + } + + if fitsInBase256(len(b), x) { + for i := len(b) - 1; i >= 0; i-- { + b[i] = byte(x) + x >>= 8 + } + b[0] |= 0x80 // Highest bit indicates binary format + return + } + + f.formatOctal(b, 0) // Last resort, just write zero + f.err = ErrFieldTooLong +} + +func (p *parser) parseOctal(b []byte) int64 { + // Because unused fields are filled with NULs, we need + // to skip leading NULs. Fields may also be padded with + // spaces or NULs. + // So we remove leading and trailing NULs and spaces to + // be sure. + b = bytes.Trim(b, " \x00") + + if len(b) == 0 { + return 0 + } + x, perr := strconv.ParseUint(p.parseString(b), 8, 64) + if perr != nil { + p.err = ErrHeader + } + return int64(x) +} + +func (f *formatter) formatOctal(b []byte, x int64) { + if !fitsInOctal(len(b), x) { + x = 0 // Last resort, just write zero + f.err = ErrFieldTooLong + } + + s := strconv.FormatInt(x, 8) + // Add leading zeros, but leave room for a NUL. + if n := len(b) - len(s) - 1; n > 0 { + s = strings.Repeat("0", n) + s + } + f.formatString(b, s) +} + +// fitsInOctal reports whether the integer x fits in a field n-bytes long +// using octal encoding with the appropriate NUL terminator. +func fitsInOctal(n int, x int64) bool { + octBits := uint(n-1) * 3 + return x >= 0 && (n >= 22 || x < 1<= 0 { + ss, sn = s[:pos], s[pos+1:] + } + + // Parse the seconds. + secs, err := strconv.ParseInt(ss, 10, 64) + if err != nil { + return time.Time{}, ErrHeader + } + if len(sn) == 0 { + return time.Unix(secs, 0), nil // No sub-second values + } + + // Parse the nanoseconds. + if strings.Trim(sn, "0123456789") != "" { + return time.Time{}, ErrHeader + } + if len(sn) < maxNanoSecondDigits { + sn += strings.Repeat("0", maxNanoSecondDigits-len(sn)) // Right pad + } else { + sn = sn[:maxNanoSecondDigits] // Right truncate + } + nsecs, _ := strconv.ParseInt(sn, 10, 64) // Must succeed + if len(ss) > 0 && ss[0] == '-' { + return time.Unix(secs, -1*nsecs), nil // Negative correction + } + return time.Unix(secs, nsecs), nil +} + +// formatPAXTime converts ts into a time of the form %d.%d as described in the +// PAX specification. This function is capable of negative timestamps. +func formatPAXTime(ts time.Time) (s string) { + secs, nsecs := ts.Unix(), ts.Nanosecond() + if nsecs == 0 { + return strconv.FormatInt(secs, 10) + } + + // If seconds is negative, then perform correction. + sign := "" + if secs < 0 { + sign = "-" // Remember sign + secs = -(secs + 1) // Add a second to secs + nsecs = -(nsecs - 1E9) // Take that second away from nsecs + } + return strings.TrimRight(fmt.Sprintf("%s%d.%09d", sign, secs, nsecs), "0") +} + +// parsePAXRecord parses the input PAX record string into a key-value pair. +// If parsing is successful, it will slice off the currently read record and +// return the remainder as r. +func parsePAXRecord(s string) (k, v, r string, err error) { + // The size field ends at the first space. + sp := strings.IndexByte(s, ' ') + if sp == -1 { + return "", "", s, ErrHeader + } + + // Parse the first token as a decimal integer. + n, perr := strconv.ParseInt(s[:sp], 10, 0) // Intentionally parse as native int + if perr != nil || n < 5 || int64(len(s)) < n { + return "", "", s, ErrHeader + } + + // Extract everything between the space and the final newline. + rec, nl, rem := s[sp+1:n-1], s[n-1:n], s[n:] + if nl != "\n" { + return "", "", s, ErrHeader + } + + // The first equals separates the key from the value. + eq := strings.IndexByte(rec, '=') + if eq == -1 { + return "", "", s, ErrHeader + } + k, v = rec[:eq], rec[eq+1:] + + if !validPAXRecord(k, v) { + return "", "", s, ErrHeader + } + return k, v, rem, nil +} + +// formatPAXRecord formats a single PAX record, prefixing it with the +// appropriate length. +func formatPAXRecord(k, v string) (string, error) { + if !validPAXRecord(k, v) { + return "", ErrHeader + } + + const padding = 3 // Extra padding for ' ', '=', and '\n' + size := len(k) + len(v) + padding + size += len(strconv.Itoa(size)) + record := strconv.Itoa(size) + " " + k + "=" + v + "\n" + + // Final adjustment if adding size field increased the record size. + if len(record) != size { + size = len(record) + record = strconv.Itoa(size) + " " + k + "=" + v + "\n" + } + return record, nil +} + +// validPAXRecord reports whether the key-value pair is valid where each +// record is formatted as: +// "%d %s=%s\n" % (size, key, value) +// +// Keys and values should be UTF-8, but the number of bad writers out there +// forces us to be a more liberal. +// Thus, we only reject all keys with NUL, and only reject NULs in values +// for the PAX version of the USTAR string fields. +// The key must not contain an '=' character. +func validPAXRecord(k, v string) bool { + if k == "" || strings.IndexByte(k, '=') >= 0 { + return false + } + switch k { + case paxPath, paxLinkpath, paxUname, paxGname: + return !hasNUL(v) + default: + return !hasNUL(k) + } +} diff --git a/archive/tar/strconv_test.go b/archive/tar/strconv_test.go new file mode 100644 index 0000000..4cc388c --- /dev/null +++ b/archive/tar/strconv_test.go @@ -0,0 +1,434 @@ +// Copyright 2016 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package tar + +import ( + "math" + "strings" + "testing" + "time" +) + +func TestFitsInBase256(t *testing.T) { + vectors := []struct { + in int64 + width int + ok bool + }{ + {+1, 8, true}, + {0, 8, true}, + {-1, 8, true}, + {1 << 56, 8, false}, + {(1 << 56) - 1, 8, true}, + {-1 << 56, 8, true}, + {(-1 << 56) - 1, 8, false}, + {121654, 8, true}, + {-9849849, 8, true}, + {math.MaxInt64, 9, true}, + {0, 9, true}, + {math.MinInt64, 9, true}, + {math.MaxInt64, 12, true}, + {0, 12, true}, + {math.MinInt64, 12, true}, + } + + for _, v := range vectors { + ok := fitsInBase256(v.width, v.in) + if ok != v.ok { + t.Errorf("fitsInBase256(%d, %d): got %v, want %v", v.in, v.width, ok, v.ok) + } + } +} + +func TestParseNumeric(t *testing.T) { + vectors := []struct { + in string + want int64 + ok bool + }{ + // Test base-256 (binary) encoded values. + {"", 0, true}, + {"\x80", 0, true}, + {"\x80\x00", 0, true}, + {"\x80\x00\x00", 0, true}, + {"\xbf", (1 << 6) - 1, true}, + {"\xbf\xff", (1 << 14) - 1, true}, + {"\xbf\xff\xff", (1 << 22) - 1, true}, + {"\xff", -1, true}, + {"\xff\xff", -1, true}, + {"\xff\xff\xff", -1, true}, + {"\xc0", -1 * (1 << 6), true}, + {"\xc0\x00", -1 * (1 << 14), true}, + {"\xc0\x00\x00", -1 * (1 << 22), true}, + {"\x87\x76\xa2\x22\xeb\x8a\x72\x61", 537795476381659745, true}, + {"\x80\x00\x00\x00\x07\x76\xa2\x22\xeb\x8a\x72\x61", 537795476381659745, true}, + {"\xf7\x76\xa2\x22\xeb\x8a\x72\x61", -615126028225187231, true}, + {"\xff\xff\xff\xff\xf7\x76\xa2\x22\xeb\x8a\x72\x61", -615126028225187231, true}, + {"\x80\x7f\xff\xff\xff\xff\xff\xff\xff", math.MaxInt64, true}, + {"\x80\x80\x00\x00\x00\x00\x00\x00\x00", 0, false}, + {"\xff\x80\x00\x00\x00\x00\x00\x00\x00", math.MinInt64, true}, + {"\xff\x7f\xff\xff\xff\xff\xff\xff\xff", 0, false}, + {"\xf5\xec\xd1\xc7\x7e\x5f\x26\x48\x81\x9f\x8f\x9b", 0, false}, + + // Test base-8 (octal) encoded values. + {"0000000\x00", 0, true}, + {" \x0000000\x00", 0, true}, + {" \x0000003\x00", 3, true}, + {"00000000227\x00", 0227, true}, + {"032033\x00 ", 032033, true}, + {"320330\x00 ", 0320330, true}, + {"0000660\x00 ", 0660, true}, + {"\x00 0000660\x00 ", 0660, true}, + {"0123456789abcdef", 0, false}, + {"0123456789\x00abcdef", 0, false}, + {"01234567\x0089abcdef", 342391, true}, + {"0123\x7e\x5f\x264123", 0, false}, + } + + for _, v := range vectors { + var p parser + got := p.parseNumeric([]byte(v.in)) + ok := (p.err == nil) + if ok != v.ok { + if v.ok { + t.Errorf("parseNumeric(%q): got parsing failure, want success", v.in) + } else { + t.Errorf("parseNumeric(%q): got parsing success, want failure", v.in) + } + } + if ok && got != v.want { + t.Errorf("parseNumeric(%q): got %d, want %d", v.in, got, v.want) + } + } +} + +func TestFormatNumeric(t *testing.T) { + vectors := []struct { + in int64 + want string + ok bool + }{ + // Test base-8 (octal) encoded values. + {0, "0\x00", true}, + {7, "7\x00", true}, + {8, "\x80\x08", true}, + {077, "77\x00", true}, + {0100, "\x80\x00\x40", true}, + {0, "0000000\x00", true}, + {0123, "0000123\x00", true}, + {07654321, "7654321\x00", true}, + {07777777, "7777777\x00", true}, + {010000000, "\x80\x00\x00\x00\x00\x20\x00\x00", true}, + {0, "00000000000\x00", true}, + {000001234567, "00001234567\x00", true}, + {076543210321, "76543210321\x00", true}, + {012345670123, "12345670123\x00", true}, + {077777777777, "77777777777\x00", true}, + {0100000000000, "\x80\x00\x00\x00\x00\x00\x00\x02\x00\x00\x00\x00", true}, + {math.MaxInt64, "777777777777777777777\x00", true}, + + // Test base-256 (binary) encoded values. + {-1, "\xff", true}, + {-1, "\xff\xff", true}, + {-1, "\xff\xff\xff", true}, + {(1 << 0), "0", false}, + {(1 << 8) - 1, "\x80\xff", true}, + {(1 << 8), "0\x00", false}, + {(1 << 16) - 1, "\x80\xff\xff", true}, + {(1 << 16), "00\x00", false}, + {-1 * (1 << 0), "\xff", true}, + {-1*(1<<0) - 1, "0", false}, + {-1 * (1 << 8), "\xff\x00", true}, + {-1*(1<<8) - 1, "0\x00", false}, + {-1 * (1 << 16), "\xff\x00\x00", true}, + {-1*(1<<16) - 1, "00\x00", false}, + {537795476381659745, "0000000\x00", false}, + {537795476381659745, "\x80\x00\x00\x00\x07\x76\xa2\x22\xeb\x8a\x72\x61", true}, + {-615126028225187231, "0000000\x00", false}, + {-615126028225187231, "\xff\xff\xff\xff\xf7\x76\xa2\x22\xeb\x8a\x72\x61", true}, + {math.MaxInt64, "0000000\x00", false}, + {math.MaxInt64, "\x80\x00\x00\x00\x7f\xff\xff\xff\xff\xff\xff\xff", true}, + {math.MinInt64, "0000000\x00", false}, + {math.MinInt64, "\xff\xff\xff\xff\x80\x00\x00\x00\x00\x00\x00\x00", true}, + {math.MaxInt64, "\x80\x7f\xff\xff\xff\xff\xff\xff\xff", true}, + {math.MinInt64, "\xff\x80\x00\x00\x00\x00\x00\x00\x00", true}, + } + + for _, v := range vectors { + var f formatter + got := make([]byte, len(v.want)) + f.formatNumeric(got, v.in) + ok := (f.err == nil) + if ok != v.ok { + if v.ok { + t.Errorf("formatNumeric(%d): got formatting failure, want success", v.in) + } else { + t.Errorf("formatNumeric(%d): got formatting success, want failure", v.in) + } + } + if string(got) != v.want { + t.Errorf("formatNumeric(%d): got %q, want %q", v.in, got, v.want) + } + } +} + +func TestFitsInOctal(t *testing.T) { + vectors := []struct { + input int64 + width int + ok bool + }{ + {-1, 1, false}, + {-1, 2, false}, + {-1, 3, false}, + {0, 1, true}, + {0 + 1, 1, false}, + {0, 2, true}, + {07, 2, true}, + {07 + 1, 2, false}, + {0, 4, true}, + {0777, 4, true}, + {0777 + 1, 4, false}, + {0, 8, true}, + {07777777, 8, true}, + {07777777 + 1, 8, false}, + {0, 12, true}, + {077777777777, 12, true}, + {077777777777 + 1, 12, false}, + {math.MaxInt64, 22, true}, + {012345670123, 12, true}, + {01564164, 12, true}, + {-012345670123, 12, false}, + {-01564164, 12, false}, + {-1564164, 30, false}, + } + + for _, v := range vectors { + ok := fitsInOctal(v.width, v.input) + if ok != v.ok { + t.Errorf("checkOctal(%d, %d): got %v, want %v", v.input, v.width, ok, v.ok) + } + } +} + +func TestParsePAXTime(t *testing.T) { + vectors := []struct { + in string + want time.Time + ok bool + }{ + {"1350244992.023960108", time.Unix(1350244992, 23960108), true}, + {"1350244992.02396010", time.Unix(1350244992, 23960100), true}, + {"1350244992.0239601089", time.Unix(1350244992, 23960108), true}, + {"1350244992.3", time.Unix(1350244992, 300000000), true}, + {"1350244992", time.Unix(1350244992, 0), true}, + {"-1.000000001", time.Unix(-1, -1e0+0e0), true}, + {"-1.000001", time.Unix(-1, -1e3+0e0), true}, + {"-1.001000", time.Unix(-1, -1e6+0e0), true}, + {"-1", time.Unix(-1, -0e0+0e0), true}, + {"-1.999000", time.Unix(-1, -1e9+1e6), true}, + {"-1.999999", time.Unix(-1, -1e9+1e3), true}, + {"-1.999999999", time.Unix(-1, -1e9+1e0), true}, + {"0.000000001", time.Unix(0, 1e0+0e0), true}, + {"0.000001", time.Unix(0, 1e3+0e0), true}, + {"0.001000", time.Unix(0, 1e6+0e0), true}, + {"0", time.Unix(0, 0e0), true}, + {"0.999000", time.Unix(0, 1e9-1e6), true}, + {"0.999999", time.Unix(0, 1e9-1e3), true}, + {"0.999999999", time.Unix(0, 1e9-1e0), true}, + {"1.000000001", time.Unix(+1, +1e0-0e0), true}, + {"1.000001", time.Unix(+1, +1e3-0e0), true}, + {"1.001000", time.Unix(+1, +1e6-0e0), true}, + {"1", time.Unix(+1, +0e0-0e0), true}, + {"1.999000", time.Unix(+1, +1e9-1e6), true}, + {"1.999999", time.Unix(+1, +1e9-1e3), true}, + {"1.999999999", time.Unix(+1, +1e9-1e0), true}, + {"-1350244992.023960108", time.Unix(-1350244992, -23960108), true}, + {"-1350244992.02396010", time.Unix(-1350244992, -23960100), true}, + {"-1350244992.0239601089", time.Unix(-1350244992, -23960108), true}, + {"-1350244992.3", time.Unix(-1350244992, -300000000), true}, + {"-1350244992", time.Unix(-1350244992, 0), true}, + {"", time.Time{}, false}, + {"0", time.Unix(0, 0), true}, + {"1.", time.Unix(1, 0), true}, + {"0.0", time.Unix(0, 0), true}, + {".5", time.Time{}, false}, + {"-1.3", time.Unix(-1, -3e8), true}, + {"-1.0", time.Unix(-1, -0e0), true}, + {"-0.0", time.Unix(-0, -0e0), true}, + {"-0.1", time.Unix(-0, -1e8), true}, + {"-0.01", time.Unix(-0, -1e7), true}, + {"-0.99", time.Unix(-0, -99e7), true}, + {"-0.98", time.Unix(-0, -98e7), true}, + {"-1.1", time.Unix(-1, -1e8), true}, + {"-1.01", time.Unix(-1, -1e7), true}, + {"-2.99", time.Unix(-2, -99e7), true}, + {"-5.98", time.Unix(-5, -98e7), true}, + {"-", time.Time{}, false}, + {"+", time.Time{}, false}, + {"-1.-1", time.Time{}, false}, + {"99999999999999999999999999999999999999999999999", time.Time{}, false}, + {"0.123456789abcdef", time.Time{}, false}, + {"foo", time.Time{}, false}, + {"\x00", time.Time{}, false}, + {"𝟵𝟴𝟳𝟲𝟱.𝟰𝟯𝟮𝟭𝟬", time.Time{}, false}, // Unicode numbers (U+1D7EC to U+1D7F5) + {"98765﹒43210", time.Time{}, false}, // Unicode period (U+FE52) + } + + for _, v := range vectors { + ts, err := parsePAXTime(v.in) + ok := (err == nil) + if v.ok != ok { + if v.ok { + t.Errorf("parsePAXTime(%q): got parsing failure, want success", v.in) + } else { + t.Errorf("parsePAXTime(%q): got parsing success, want failure", v.in) + } + } + if ok && !ts.Equal(v.want) { + t.Errorf("parsePAXTime(%q): got (%ds %dns), want (%ds %dns)", + v.in, ts.Unix(), ts.Nanosecond(), v.want.Unix(), v.want.Nanosecond()) + } + } +} + +func TestFormatPAXTime(t *testing.T) { + vectors := []struct { + sec, nsec int64 + want string + }{ + {1350244992, 0, "1350244992"}, + {1350244992, 300000000, "1350244992.3"}, + {1350244992, 23960100, "1350244992.0239601"}, + {1350244992, 23960108, "1350244992.023960108"}, + {+1, +1E9 - 1E0, "1.999999999"}, + {+1, +1E9 - 1E3, "1.999999"}, + {+1, +1E9 - 1E6, "1.999"}, + {+1, +0E0 - 0E0, "1"}, + {+1, +1E6 - 0E0, "1.001"}, + {+1, +1E3 - 0E0, "1.000001"}, + {+1, +1E0 - 0E0, "1.000000001"}, + {0, 1E9 - 1E0, "0.999999999"}, + {0, 1E9 - 1E3, "0.999999"}, + {0, 1E9 - 1E6, "0.999"}, + {0, 0E0, "0"}, + {0, 1E6 + 0E0, "0.001"}, + {0, 1E3 + 0E0, "0.000001"}, + {0, 1E0 + 0E0, "0.000000001"}, + {-1, -1E9 + 1E0, "-1.999999999"}, + {-1, -1E9 + 1E3, "-1.999999"}, + {-1, -1E9 + 1E6, "-1.999"}, + {-1, -0E0 + 0E0, "-1"}, + {-1, -1E6 + 0E0, "-1.001"}, + {-1, -1E3 + 0E0, "-1.000001"}, + {-1, -1E0 + 0E0, "-1.000000001"}, + {-1350244992, 0, "-1350244992"}, + {-1350244992, -300000000, "-1350244992.3"}, + {-1350244992, -23960100, "-1350244992.0239601"}, + {-1350244992, -23960108, "-1350244992.023960108"}, + } + + for _, v := range vectors { + got := formatPAXTime(time.Unix(v.sec, v.nsec)) + if got != v.want { + t.Errorf("formatPAXTime(%ds, %dns): got %q, want %q", + v.sec, v.nsec, got, v.want) + } + } +} + +func TestParsePAXRecord(t *testing.T) { + medName := strings.Repeat("CD", 50) + longName := strings.Repeat("AB", 100) + + vectors := []struct { + in string + wantRes string + wantKey string + wantVal string + ok bool + }{ + {"6 k=v\n\n", "\n", "k", "v", true}, + {"19 path=/etc/hosts\n", "", "path", "/etc/hosts", true}, + {"210 path=" + longName + "\nabc", "abc", "path", longName, true}, + {"110 path=" + medName + "\n", "", "path", medName, true}, + {"9 foo=ba\n", "", "foo", "ba", true}, + {"11 foo=bar\n\x00", "\x00", "foo", "bar", true}, + {"18 foo=b=\nar=\n==\x00\n", "", "foo", "b=\nar=\n==\x00", true}, + {"27 foo=hello9 foo=ba\nworld\n", "", "foo", "hello9 foo=ba\nworld", true}, + {"27 ☺☻☹=日a本b語ç\nmeow mix", "meow mix", "☺☻☹", "日a本b語ç", true}, + {"17 \x00hello=\x00world\n", "17 \x00hello=\x00world\n", "", "", false}, + {"1 k=1\n", "1 k=1\n", "", "", false}, + {"6 k~1\n", "6 k~1\n", "", "", false}, + {"6_k=1\n", "6_k=1\n", "", "", false}, + {"6 k=1 ", "6 k=1 ", "", "", false}, + {"632 k=1\n", "632 k=1\n", "", "", false}, + {"16 longkeyname=hahaha\n", "16 longkeyname=hahaha\n", "", "", false}, + {"3 somelongkey=\n", "3 somelongkey=\n", "", "", false}, + {"50 tooshort=\n", "50 tooshort=\n", "", "", false}, + } + + for _, v := range vectors { + key, val, res, err := parsePAXRecord(v.in) + ok := (err == nil) + if ok != v.ok { + if v.ok { + t.Errorf("parsePAXRecord(%q): got parsing failure, want success", v.in) + } else { + t.Errorf("parsePAXRecord(%q): got parsing success, want failure", v.in) + } + } + if v.ok && (key != v.wantKey || val != v.wantVal) { + t.Errorf("parsePAXRecord(%q): got (%q: %q), want (%q: %q)", + v.in, key, val, v.wantKey, v.wantVal) + } + if res != v.wantRes { + t.Errorf("parsePAXRecord(%q): got residual %q, want residual %q", + v.in, res, v.wantRes) + } + } +} + +func TestFormatPAXRecord(t *testing.T) { + medName := strings.Repeat("CD", 50) + longName := strings.Repeat("AB", 100) + + vectors := []struct { + inKey string + inVal string + want string + ok bool + }{ + {"k", "v", "6 k=v\n", true}, + {"path", "/etc/hosts", "19 path=/etc/hosts\n", true}, + {"path", longName, "210 path=" + longName + "\n", true}, + {"path", medName, "110 path=" + medName + "\n", true}, + {"foo", "ba", "9 foo=ba\n", true}, + {"foo", "bar", "11 foo=bar\n", true}, + {"foo", "b=\nar=\n==\x00", "18 foo=b=\nar=\n==\x00\n", true}, + {"foo", "hello9 foo=ba\nworld", "27 foo=hello9 foo=ba\nworld\n", true}, + {"☺☻☹", "日a本b語ç", "27 ☺☻☹=日a本b語ç\n", true}, + {"xhello", "\x00world", "17 xhello=\x00world\n", true}, + {"path", "null\x00", "", false}, + {"null\x00", "value", "", false}, + {paxSchilyXattr + "key", "null\x00", "26 SCHILY.xattr.key=null\x00\n", true}, + } + + for _, v := range vectors { + got, err := formatPAXRecord(v.inKey, v.inVal) + ok := (err == nil) + if ok != v.ok { + if v.ok { + t.Errorf("formatPAXRecord(%q, %q): got format failure, want success", v.inKey, v.inVal) + } else { + t.Errorf("formatPAXRecord(%q, %q): got format success, want failure", v.inKey, v.inVal) + } + } + if got != v.want { + t.Errorf("formatPAXRecord(%q, %q): got %q, want %q", + v.inKey, v.inVal, got, v.want) + } + } +} diff --git a/archive/tar/tar_test.go b/archive/tar/tar_test.go index 9ef319a..2676853 100644 --- a/archive/tar/tar_test.go +++ b/archive/tar/tar_test.go @@ -6,15 +6,208 @@ package tar import ( "bytes" + "errors" + "fmt" + "internal/testenv" + "io" "io/ioutil" + "math" "os" "path" + "path/filepath" "reflect" "strings" "testing" "time" ) +type testError struct{ error } + +type fileOps []interface{} // []T where T is (string | int64) + +// testFile is an io.ReadWriteSeeker where the IO operations performed +// on it must match the list of operations in ops. +type testFile struct { + ops fileOps + pos int64 +} + +func (f *testFile) Read(b []byte) (int, error) { + if len(b) == 0 { + return 0, nil + } + if len(f.ops) == 0 { + return 0, io.EOF + } + s, ok := f.ops[0].(string) + if !ok { + return 0, errors.New("unexpected Read operation") + } + + n := copy(b, s) + if len(s) > n { + f.ops[0] = s[n:] + } else { + f.ops = f.ops[1:] + } + f.pos += int64(len(b)) + return n, nil +} + +func (f *testFile) Write(b []byte) (int, error) { + if len(b) == 0 { + return 0, nil + } + if len(f.ops) == 0 { + return 0, errors.New("unexpected Write operation") + } + s, ok := f.ops[0].(string) + if !ok { + return 0, errors.New("unexpected Write operation") + } + + if !strings.HasPrefix(s, string(b)) { + return 0, testError{fmt.Errorf("got Write(%q), want Write(%q)", b, s)} + } + if len(s) > len(b) { + f.ops[0] = s[len(b):] + } else { + f.ops = f.ops[1:] + } + f.pos += int64(len(b)) + return len(b), nil +} + +func (f *testFile) Seek(pos int64, whence int) (int64, error) { + if pos == 0 && whence == io.SeekCurrent { + return f.pos, nil + } + if len(f.ops) == 0 { + return 0, errors.New("unexpected Seek operation") + } + s, ok := f.ops[0].(int64) + if !ok { + return 0, errors.New("unexpected Seek operation") + } + + if s != pos || whence != io.SeekCurrent { + return 0, testError{fmt.Errorf("got Seek(%d, %d), want Seek(%d, %d)", pos, whence, s, io.SeekCurrent)} + } + f.pos += s + f.ops = f.ops[1:] + return f.pos, nil +} + +func equalSparseEntries(x, y []sparseEntry) bool { + return (len(x) == 0 && len(y) == 0) || reflect.DeepEqual(x, y) +} + +func TestSparseEntries(t *testing.T) { + vectors := []struct { + in []sparseEntry + size int64 + + wantValid bool // Result of validateSparseEntries + wantAligned []sparseEntry // Result of alignSparseEntries + wantInverted []sparseEntry // Result of invertSparseEntries + }{{ + in: []sparseEntry{}, size: 0, + wantValid: true, + wantInverted: []sparseEntry{{0, 0}}, + }, { + in: []sparseEntry{}, size: 5000, + wantValid: true, + wantInverted: []sparseEntry{{0, 5000}}, + }, { + in: []sparseEntry{{0, 5000}}, size: 5000, + wantValid: true, + wantAligned: []sparseEntry{{0, 5000}}, + wantInverted: []sparseEntry{{5000, 0}}, + }, { + in: []sparseEntry{{1000, 4000}}, size: 5000, + wantValid: true, + wantAligned: []sparseEntry{{1024, 3976}}, + wantInverted: []sparseEntry{{0, 1000}, {5000, 0}}, + }, { + in: []sparseEntry{{0, 3000}}, size: 5000, + wantValid: true, + wantAligned: []sparseEntry{{0, 2560}}, + wantInverted: []sparseEntry{{3000, 2000}}, + }, { + in: []sparseEntry{{3000, 2000}}, size: 5000, + wantValid: true, + wantAligned: []sparseEntry{{3072, 1928}}, + wantInverted: []sparseEntry{{0, 3000}, {5000, 0}}, + }, { + in: []sparseEntry{{2000, 2000}}, size: 5000, + wantValid: true, + wantAligned: []sparseEntry{{2048, 1536}}, + wantInverted: []sparseEntry{{0, 2000}, {4000, 1000}}, + }, { + in: []sparseEntry{{0, 2000}, {8000, 2000}}, size: 10000, + wantValid: true, + wantAligned: []sparseEntry{{0, 1536}, {8192, 1808}}, + wantInverted: []sparseEntry{{2000, 6000}, {10000, 0}}, + }, { + in: []sparseEntry{{0, 2000}, {2000, 2000}, {4000, 0}, {4000, 3000}, {7000, 1000}, {8000, 0}, {8000, 2000}}, size: 10000, + wantValid: true, + wantAligned: []sparseEntry{{0, 1536}, {2048, 1536}, {4096, 2560}, {7168, 512}, {8192, 1808}}, + wantInverted: []sparseEntry{{10000, 0}}, + }, { + in: []sparseEntry{{0, 0}, {1000, 0}, {2000, 0}, {3000, 0}, {4000, 0}, {5000, 0}}, size: 5000, + wantValid: true, + wantInverted: []sparseEntry{{0, 5000}}, + }, { + in: []sparseEntry{{1, 0}}, size: 0, + wantValid: false, + }, { + in: []sparseEntry{{-1, 0}}, size: 100, + wantValid: false, + }, { + in: []sparseEntry{{0, -1}}, size: 100, + wantValid: false, + }, { + in: []sparseEntry{{0, 0}}, size: -100, + wantValid: false, + }, { + in: []sparseEntry{{math.MaxInt64, 3}, {6, -5}}, size: 35, + wantValid: false, + }, { + in: []sparseEntry{{1, 3}, {6, -5}}, size: 35, + wantValid: false, + }, { + in: []sparseEntry{{math.MaxInt64, math.MaxInt64}}, size: math.MaxInt64, + wantValid: false, + }, { + in: []sparseEntry{{3, 3}}, size: 5, + wantValid: false, + }, { + in: []sparseEntry{{2, 0}, {1, 0}, {0, 0}}, size: 3, + wantValid: false, + }, { + in: []sparseEntry{{1, 3}, {2, 2}}, size: 10, + wantValid: false, + }} + + for i, v := range vectors { + gotValid := validateSparseEntries(v.in, v.size) + if gotValid != v.wantValid { + t.Errorf("test %d, validateSparseEntries() = %v, want %v", i, gotValid, v.wantValid) + } + if !v.wantValid { + continue + } + gotAligned := alignSparseEntries(append([]sparseEntry{}, v.in...), v.size) + if !equalSparseEntries(gotAligned, v.wantAligned) { + t.Errorf("test %d, alignSparseEntries():\ngot %v\nwant %v", i, gotAligned, v.wantAligned) + } + gotInverted := invertSparseEntries(append([]sparseEntry{}, v.in...), v.size) + if !equalSparseEntries(gotInverted, v.wantInverted) { + t.Errorf("test %d, inverseSparseEntries():\ngot %v\nwant %v", i, gotInverted, v.wantInverted) + } + } +} + func TestFileInfoHeader(t *testing.T) { fi, err := os.Stat("testdata/small.txt") if err != nil { @@ -27,7 +220,7 @@ func TestFileInfoHeader(t *testing.T) { if g, e := h.Name, "small.txt"; g != e { t.Errorf("Name = %q; want %q", g, e) } - if g, e := h.Mode, int64(fi.Mode().Perm())|c_ISREG; g != e { + if g, e := h.Mode, int64(fi.Mode().Perm()); g != e { t.Errorf("Mode = %#o; want %#o", g, e) } if g, e := h.Size, int64(5); g != e { @@ -55,7 +248,7 @@ func TestFileInfoHeaderDir(t *testing.T) { t.Errorf("Name = %q; want %q", g, e) } // Ignoring c_ISGID for golang.org/issue/4867 - if g, e := h.Mode&^c_ISGID, int64(fi.Mode().Perm())|c_ISDIR; g != e { + if g, e := h.Mode&^c_ISGID, int64(fi.Mode().Perm()); g != e { t.Errorf("Mode = %#o; want %#o", g, e) } if g, e := h.Size, int64(0); g != e { @@ -67,38 +260,53 @@ func TestFileInfoHeaderDir(t *testing.T) { } func TestFileInfoHeaderSymlink(t *testing.T) { - h, err := FileInfoHeader(symlink{}, "some-target") + testenv.MustHaveSymlink(t) + + tmpdir, err := ioutil.TempDir("", "TestFileInfoHeaderSymlink") if err != nil { t.Fatal(err) } - if g, e := h.Name, "some-symlink"; g != e { + defer os.RemoveAll(tmpdir) + + link := filepath.Join(tmpdir, "link") + target := tmpdir + err = os.Symlink(target, link) + if err != nil { + t.Fatal(err) + } + fi, err := os.Lstat(link) + if err != nil { + t.Fatal(err) + } + + h, err := FileInfoHeader(fi, target) + if err != nil { + t.Fatal(err) + } + if g, e := h.Name, fi.Name(); g != e { t.Errorf("Name = %q; want %q", g, e) } - if g, e := h.Linkname, "some-target"; g != e { + if g, e := h.Linkname, target; g != e { t.Errorf("Linkname = %q; want %q", g, e) } + if g, e := h.Typeflag, byte(TypeSymlink); g != e { + t.Errorf("Typeflag = %v; want %v", g, e) + } } -type symlink struct{} - -func (symlink) Name() string { return "some-symlink" } -func (symlink) Size() int64 { return 0 } -func (symlink) Mode() os.FileMode { return os.ModeSymlink } -func (symlink) ModTime() time.Time { return time.Time{} } -func (symlink) IsDir() bool { return false } -func (symlink) Sys() interface{} { return nil } - func TestRoundTrip(t *testing.T) { data := []byte("some file contents") var b bytes.Buffer tw := NewWriter(&b) hdr := &Header{ - Name: "file.txt", - Uid: 1 << 21, // too big for 8 octal digits - Size: int64(len(data)), - // https://github.com/golang/go/commit/0e3355903d2ebcf5ee9e76096f51ac9a116a9dbb#diff-d7bf2a98d7b57b6ff754ca406f1b7581R105 - ModTime: time.Now().AddDate(0, 0, 0).Round(1 * time.Second), + Name: "file.txt", + Uid: 1 << 21, // Too big for 8 octal digits + Size: int64(len(data)), + ModTime: time.Now().Round(time.Second), + PAXRecords: map[string]string{"uid": "2097152"}, + Format: FormatPAX, + Typeflag: TypeReg, } if err := tw.WriteHeader(hdr); err != nil { t.Fatalf("tw.WriteHeader: %v", err) @@ -134,191 +342,514 @@ type headerRoundTripTest struct { } func TestHeaderRoundTrip(t *testing.T) { - golden := []headerRoundTripTest{ + vectors := []headerRoundTripTest{{ // regular file. - { - h: &Header{ - Name: "test.txt", - Mode: 0644 | c_ISREG, - Size: 12, - ModTime: time.Unix(1360600916, 0), - Typeflag: TypeReg, - }, - fm: 0644, + h: &Header{ + Name: "test.txt", + Mode: 0644, + Size: 12, + ModTime: time.Unix(1360600916, 0), + Typeflag: TypeReg, }, + fm: 0644, + }, { // symbolic link. - { - h: &Header{ - Name: "link.txt", - Mode: 0777 | c_ISLNK, - Size: 0, - ModTime: time.Unix(1360600852, 0), - Typeflag: TypeSymlink, - }, - fm: 0777 | os.ModeSymlink, + h: &Header{ + Name: "link.txt", + Mode: 0777, + Size: 0, + ModTime: time.Unix(1360600852, 0), + Typeflag: TypeSymlink, }, + fm: 0777 | os.ModeSymlink, + }, { // character device node. - { - h: &Header{ - Name: "dev/null", - Mode: 0666 | c_ISCHR, - Size: 0, - ModTime: time.Unix(1360578951, 0), - Typeflag: TypeChar, - }, - fm: 0666 | os.ModeDevice | os.ModeCharDevice, + h: &Header{ + Name: "dev/null", + Mode: 0666, + Size: 0, + ModTime: time.Unix(1360578951, 0), + Typeflag: TypeChar, }, + fm: 0666 | os.ModeDevice | os.ModeCharDevice, + }, { // block device node. - { - h: &Header{ - Name: "dev/sda", - Mode: 0660 | c_ISBLK, - Size: 0, - ModTime: time.Unix(1360578954, 0), - Typeflag: TypeBlock, - }, - fm: 0660 | os.ModeDevice, + h: &Header{ + Name: "dev/sda", + Mode: 0660, + Size: 0, + ModTime: time.Unix(1360578954, 0), + Typeflag: TypeBlock, }, + fm: 0660 | os.ModeDevice, + }, { // directory. - { - h: &Header{ - Name: "dir/", - Mode: 0755 | c_ISDIR, - Size: 0, - ModTime: time.Unix(1360601116, 0), - Typeflag: TypeDir, - }, - fm: 0755 | os.ModeDir, + h: &Header{ + Name: "dir/", + Mode: 0755, + Size: 0, + ModTime: time.Unix(1360601116, 0), + Typeflag: TypeDir, }, + fm: 0755 | os.ModeDir, + }, { // fifo node. - { - h: &Header{ - Name: "dev/initctl", - Mode: 0600 | c_ISFIFO, - Size: 0, - ModTime: time.Unix(1360578949, 0), - Typeflag: TypeFifo, - }, - fm: 0600 | os.ModeNamedPipe, + h: &Header{ + Name: "dev/initctl", + Mode: 0600, + Size: 0, + ModTime: time.Unix(1360578949, 0), + Typeflag: TypeFifo, }, + fm: 0600 | os.ModeNamedPipe, + }, { // setuid. - { - h: &Header{ - Name: "bin/su", - Mode: 0755 | c_ISREG | c_ISUID, - Size: 23232, - ModTime: time.Unix(1355405093, 0), - Typeflag: TypeReg, - }, - fm: 0755 | os.ModeSetuid, + h: &Header{ + Name: "bin/su", + Mode: 0755 | c_ISUID, + Size: 23232, + ModTime: time.Unix(1355405093, 0), + Typeflag: TypeReg, }, + fm: 0755 | os.ModeSetuid, + }, { // setguid. - { - h: &Header{ - Name: "group.txt", - Mode: 0750 | c_ISREG | c_ISGID, - Size: 0, - ModTime: time.Unix(1360602346, 0), - Typeflag: TypeReg, - }, - fm: 0750 | os.ModeSetgid, + h: &Header{ + Name: "group.txt", + Mode: 0750 | c_ISGID, + Size: 0, + ModTime: time.Unix(1360602346, 0), + Typeflag: TypeReg, }, + fm: 0750 | os.ModeSetgid, + }, { // sticky. - { - h: &Header{ - Name: "sticky.txt", - Mode: 0600 | c_ISREG | c_ISVTX, - Size: 7, - ModTime: time.Unix(1360602540, 0), - Typeflag: TypeReg, - }, - fm: 0600 | os.ModeSticky, + h: &Header{ + Name: "sticky.txt", + Mode: 0600 | c_ISVTX, + Size: 7, + ModTime: time.Unix(1360602540, 0), + Typeflag: TypeReg, }, + fm: 0600 | os.ModeSticky, + }, { // hard link. - { - h: &Header{ - Name: "hard.txt", - Mode: 0644 | c_ISREG, - Size: 0, - Linkname: "file.txt", - ModTime: time.Unix(1360600916, 0), - Typeflag: TypeLink, - }, - fm: 0644, + h: &Header{ + Name: "hard.txt", + Mode: 0644, + Size: 0, + Linkname: "file.txt", + ModTime: time.Unix(1360600916, 0), + Typeflag: TypeLink, }, + fm: 0644, + }, { // More information. - { - h: &Header{ - Name: "info.txt", - Mode: 0600 | c_ISREG, - Size: 0, - Uid: 1000, - Gid: 1000, - ModTime: time.Unix(1360602540, 0), - Uname: "slartibartfast", - Gname: "users", - Typeflag: TypeReg, - }, - fm: 0600, + h: &Header{ + Name: "info.txt", + Mode: 0600, + Size: 0, + Uid: 1000, + Gid: 1000, + ModTime: time.Unix(1360602540, 0), + Uname: "slartibartfast", + Gname: "users", + Typeflag: TypeReg, }, - } + fm: 0600, + }} - for i, g := range golden { - fi := g.h.FileInfo() + for i, v := range vectors { + fi := v.h.FileInfo() h2, err := FileInfoHeader(fi, "") if err != nil { t.Error(err) continue } if strings.Contains(fi.Name(), "/") { - t.Errorf("FileInfo of %q contains slash: %q", g.h.Name, fi.Name()) + t.Errorf("FileInfo of %q contains slash: %q", v.h.Name, fi.Name()) } - name := path.Base(g.h.Name) + name := path.Base(v.h.Name) if fi.IsDir() { name += "/" } if got, want := h2.Name, name; got != want { t.Errorf("i=%d: Name: got %v, want %v", i, got, want) } - if got, want := h2.Size, g.h.Size; got != want { + if got, want := h2.Size, v.h.Size; got != want { t.Errorf("i=%d: Size: got %v, want %v", i, got, want) } - if got, want := h2.Uid, g.h.Uid; got != want { + if got, want := h2.Uid, v.h.Uid; got != want { t.Errorf("i=%d: Uid: got %d, want %d", i, got, want) } - if got, want := h2.Gid, g.h.Gid; got != want { + if got, want := h2.Gid, v.h.Gid; got != want { t.Errorf("i=%d: Gid: got %d, want %d", i, got, want) } - if got, want := h2.Uname, g.h.Uname; got != want { + if got, want := h2.Uname, v.h.Uname; got != want { t.Errorf("i=%d: Uname: got %q, want %q", i, got, want) } - if got, want := h2.Gname, g.h.Gname; got != want { + if got, want := h2.Gname, v.h.Gname; got != want { t.Errorf("i=%d: Gname: got %q, want %q", i, got, want) } - if got, want := h2.Linkname, g.h.Linkname; got != want { + if got, want := h2.Linkname, v.h.Linkname; got != want { t.Errorf("i=%d: Linkname: got %v, want %v", i, got, want) } - if got, want := h2.Typeflag, g.h.Typeflag; got != want { - t.Logf("%#v %#v", g.h, fi.Sys()) + if got, want := h2.Typeflag, v.h.Typeflag; got != want { + t.Logf("%#v %#v", v.h, fi.Sys()) t.Errorf("i=%d: Typeflag: got %q, want %q", i, got, want) } - if got, want := h2.Mode, g.h.Mode; got != want { + if got, want := h2.Mode, v.h.Mode; got != want { t.Errorf("i=%d: Mode: got %o, want %o", i, got, want) } - if got, want := fi.Mode(), g.fm; got != want { + if got, want := fi.Mode(), v.fm; got != want { t.Errorf("i=%d: fi.Mode: got %o, want %o", i, got, want) } - if got, want := h2.AccessTime, g.h.AccessTime; got != want { + if got, want := h2.AccessTime, v.h.AccessTime; got != want { t.Errorf("i=%d: AccessTime: got %v, want %v", i, got, want) } - if got, want := h2.ChangeTime, g.h.ChangeTime; got != want { + if got, want := h2.ChangeTime, v.h.ChangeTime; got != want { t.Errorf("i=%d: ChangeTime: got %v, want %v", i, got, want) } - if got, want := h2.ModTime, g.h.ModTime; got != want { + if got, want := h2.ModTime, v.h.ModTime; got != want { t.Errorf("i=%d: ModTime: got %v, want %v", i, got, want) } - if sysh, ok := fi.Sys().(*Header); !ok || sysh != g.h { + if sysh, ok := fi.Sys().(*Header); !ok || sysh != v.h { t.Errorf("i=%d: Sys didn't return original *Header", i) } } } + +func TestHeaderAllowedFormats(t *testing.T) { + vectors := []struct { + header *Header // Input header + paxHdrs map[string]string // Expected PAX headers that may be needed + formats Format // Expected formats that can encode the header + }{{ + header: &Header{}, + formats: FormatUSTAR | FormatPAX | FormatGNU, + }, { + header: &Header{Size: 077777777777}, + formats: FormatUSTAR | FormatPAX | FormatGNU, + }, { + header: &Header{Size: 077777777777, Format: FormatUSTAR}, + formats: FormatUSTAR, + }, { + header: &Header{Size: 077777777777, Format: FormatPAX}, + formats: FormatUSTAR | FormatPAX, + }, { + header: &Header{Size: 077777777777, Format: FormatGNU}, + formats: FormatGNU, + }, { + header: &Header{Size: 077777777777 + 1}, + paxHdrs: map[string]string{paxSize: "8589934592"}, + formats: FormatPAX | FormatGNU, + }, { + header: &Header{Size: 077777777777 + 1, Format: FormatPAX}, + paxHdrs: map[string]string{paxSize: "8589934592"}, + formats: FormatPAX, + }, { + header: &Header{Size: 077777777777 + 1, Format: FormatGNU}, + paxHdrs: map[string]string{paxSize: "8589934592"}, + formats: FormatGNU, + }, { + header: &Header{Mode: 07777777}, + formats: FormatUSTAR | FormatPAX | FormatGNU, + }, { + header: &Header{Mode: 07777777 + 1}, + formats: FormatGNU, + }, { + header: &Header{Devmajor: -123}, + formats: FormatGNU, + }, { + header: &Header{Devmajor: 1<<56 - 1}, + formats: FormatGNU, + }, { + header: &Header{Devmajor: 1 << 56}, + formats: FormatUnknown, + }, { + header: &Header{Devmajor: -1 << 56}, + formats: FormatGNU, + }, { + header: &Header{Devmajor: -1<<56 - 1}, + formats: FormatUnknown, + }, { + header: &Header{Name: "用戶名", Devmajor: -1 << 56}, + formats: FormatGNU, + }, { + header: &Header{Size: math.MaxInt64}, + paxHdrs: map[string]string{paxSize: "9223372036854775807"}, + formats: FormatPAX | FormatGNU, + }, { + header: &Header{Size: math.MinInt64}, + paxHdrs: map[string]string{paxSize: "-9223372036854775808"}, + formats: FormatUnknown, + }, { + header: &Header{Uname: "0123456789abcdef0123456789abcdef"}, + formats: FormatUSTAR | FormatPAX | FormatGNU, + }, { + header: &Header{Uname: "0123456789abcdef0123456789abcdefx"}, + paxHdrs: map[string]string{paxUname: "0123456789abcdef0123456789abcdefx"}, + formats: FormatPAX, + }, { + header: &Header{Name: "foobar"}, + formats: FormatUSTAR | FormatPAX | FormatGNU, + }, { + header: &Header{Name: strings.Repeat("a", nameSize)}, + formats: FormatUSTAR | FormatPAX | FormatGNU, + }, { + header: &Header{Name: strings.Repeat("a", nameSize+1)}, + paxHdrs: map[string]string{paxPath: strings.Repeat("a", nameSize+1)}, + formats: FormatPAX | FormatGNU, + }, { + header: &Header{Linkname: "用戶名"}, + paxHdrs: map[string]string{paxLinkpath: "用戶名"}, + formats: FormatPAX | FormatGNU, + }, { + header: &Header{Linkname: strings.Repeat("用戶名\x00", nameSize)}, + paxHdrs: map[string]string{paxLinkpath: strings.Repeat("用戶名\x00", nameSize)}, + formats: FormatUnknown, + }, { + header: &Header{Linkname: "\x00hello"}, + paxHdrs: map[string]string{paxLinkpath: "\x00hello"}, + formats: FormatUnknown, + }, { + header: &Header{Uid: 07777777}, + formats: FormatUSTAR | FormatPAX | FormatGNU, + }, { + header: &Header{Uid: 07777777 + 1}, + paxHdrs: map[string]string{paxUid: "2097152"}, + formats: FormatPAX | FormatGNU, + }, { + header: &Header{Xattrs: nil}, + formats: FormatUSTAR | FormatPAX | FormatGNU, + }, { + header: &Header{Xattrs: map[string]string{"foo": "bar"}}, + paxHdrs: map[string]string{paxSchilyXattr + "foo": "bar"}, + formats: FormatPAX, + }, { + header: &Header{Xattrs: map[string]string{"foo": "bar"}, Format: FormatGNU}, + paxHdrs: map[string]string{paxSchilyXattr + "foo": "bar"}, + formats: FormatUnknown, + }, { + header: &Header{Xattrs: map[string]string{"用戶名": "\x00hello"}}, + paxHdrs: map[string]string{paxSchilyXattr + "用戶名": "\x00hello"}, + formats: FormatPAX, + }, { + header: &Header{Xattrs: map[string]string{"foo=bar": "baz"}}, + formats: FormatUnknown, + }, { + header: &Header{Xattrs: map[string]string{"foo": ""}}, + paxHdrs: map[string]string{paxSchilyXattr + "foo": ""}, + formats: FormatPAX, + }, { + header: &Header{ModTime: time.Unix(0, 0)}, + formats: FormatUSTAR | FormatPAX | FormatGNU, + }, { + header: &Header{ModTime: time.Unix(077777777777, 0)}, + formats: FormatUSTAR | FormatPAX | FormatGNU, + }, { + header: &Header{ModTime: time.Unix(077777777777+1, 0)}, + paxHdrs: map[string]string{paxMtime: "8589934592"}, + formats: FormatPAX | FormatGNU, + }, { + header: &Header{ModTime: time.Unix(math.MaxInt64, 0)}, + paxHdrs: map[string]string{paxMtime: "9223372036854775807"}, + formats: FormatPAX | FormatGNU, + }, { + header: &Header{ModTime: time.Unix(math.MaxInt64, 0), Format: FormatUSTAR}, + paxHdrs: map[string]string{paxMtime: "9223372036854775807"}, + formats: FormatUnknown, + }, { + header: &Header{ModTime: time.Unix(-1, 0)}, + paxHdrs: map[string]string{paxMtime: "-1"}, + formats: FormatPAX | FormatGNU, + }, { + header: &Header{ModTime: time.Unix(1, 500)}, + paxHdrs: map[string]string{paxMtime: "1.0000005"}, + formats: FormatUSTAR | FormatPAX | FormatGNU, + }, { + header: &Header{ModTime: time.Unix(1, 0)}, + formats: FormatUSTAR | FormatPAX | FormatGNU, + }, { + header: &Header{ModTime: time.Unix(1, 0), Format: FormatPAX}, + formats: FormatUSTAR | FormatPAX, + }, { + header: &Header{ModTime: time.Unix(1, 500), Format: FormatUSTAR}, + paxHdrs: map[string]string{paxMtime: "1.0000005"}, + formats: FormatUSTAR, + }, { + header: &Header{ModTime: time.Unix(1, 500), Format: FormatPAX}, + paxHdrs: map[string]string{paxMtime: "1.0000005"}, + formats: FormatPAX, + }, { + header: &Header{ModTime: time.Unix(1, 500), Format: FormatGNU}, + paxHdrs: map[string]string{paxMtime: "1.0000005"}, + formats: FormatGNU, + }, { + header: &Header{ModTime: time.Unix(-1, 500)}, + paxHdrs: map[string]string{paxMtime: "-0.9999995"}, + formats: FormatPAX | FormatGNU, + }, { + header: &Header{ModTime: time.Unix(-1, 500), Format: FormatGNU}, + paxHdrs: map[string]string{paxMtime: "-0.9999995"}, + formats: FormatGNU, + }, { + header: &Header{AccessTime: time.Unix(0, 0)}, + paxHdrs: map[string]string{paxAtime: "0"}, + formats: FormatPAX | FormatGNU, + }, { + header: &Header{AccessTime: time.Unix(0, 0), Format: FormatUSTAR}, + paxHdrs: map[string]string{paxAtime: "0"}, + formats: FormatUnknown, + }, { + header: &Header{AccessTime: time.Unix(0, 0), Format: FormatPAX}, + paxHdrs: map[string]string{paxAtime: "0"}, + formats: FormatPAX, + }, { + header: &Header{AccessTime: time.Unix(0, 0), Format: FormatGNU}, + paxHdrs: map[string]string{paxAtime: "0"}, + formats: FormatGNU, + }, { + header: &Header{AccessTime: time.Unix(-123, 0)}, + paxHdrs: map[string]string{paxAtime: "-123"}, + formats: FormatPAX | FormatGNU, + }, { + header: &Header{AccessTime: time.Unix(-123, 0), Format: FormatPAX}, + paxHdrs: map[string]string{paxAtime: "-123"}, + formats: FormatPAX, + }, { + header: &Header{ChangeTime: time.Unix(123, 456)}, + paxHdrs: map[string]string{paxCtime: "123.000000456"}, + formats: FormatPAX | FormatGNU, + }, { + header: &Header{ChangeTime: time.Unix(123, 456), Format: FormatUSTAR}, + paxHdrs: map[string]string{paxCtime: "123.000000456"}, + formats: FormatUnknown, + }, { + header: &Header{ChangeTime: time.Unix(123, 456), Format: FormatGNU}, + paxHdrs: map[string]string{paxCtime: "123.000000456"}, + formats: FormatGNU, + }, { + header: &Header{ChangeTime: time.Unix(123, 456), Format: FormatPAX}, + paxHdrs: map[string]string{paxCtime: "123.000000456"}, + formats: FormatPAX, + }, { + header: &Header{Name: "foo/", Typeflag: TypeDir}, + formats: FormatUSTAR | FormatPAX | FormatGNU, + }, { + header: &Header{Name: "foo/", Typeflag: TypeReg}, + formats: FormatUnknown, + }, { + header: &Header{Name: "foo/", Typeflag: TypeSymlink}, + formats: FormatUSTAR | FormatPAX | FormatGNU, + }} + + for i, v := range vectors { + formats, paxHdrs, err := v.header.allowedFormats() + if formats != v.formats { + t.Errorf("test %d, allowedFormats(): got %v, want %v", i, formats, v.formats) + } + if formats&FormatPAX > 0 && !reflect.DeepEqual(paxHdrs, v.paxHdrs) && !(len(paxHdrs) == 0 && len(v.paxHdrs) == 0) { + t.Errorf("test %d, allowedFormats():\ngot %v\nwant %s", i, paxHdrs, v.paxHdrs) + } + if (formats != FormatUnknown) && (err != nil) { + t.Errorf("test %d, unexpected error: %v", i, err) + } + if (formats == FormatUnknown) && (err == nil) { + t.Errorf("test %d, got nil-error, want non-nil error", i) + } + } +} + +func Benchmark(b *testing.B) { + type file struct { + hdr *Header + body []byte + } + + vectors := []struct { + label string + files []file + }{{ + "USTAR", + []file{{ + &Header{Name: "bar", Mode: 0640, Size: int64(3)}, + []byte("foo"), + }, { + &Header{Name: "world", Mode: 0640, Size: int64(5)}, + []byte("hello"), + }}, + }, { + "GNU", + []file{{ + &Header{Name: "bar", Mode: 0640, Size: int64(3), Devmajor: -1}, + []byte("foo"), + }, { + &Header{Name: "world", Mode: 0640, Size: int64(5), Devmajor: -1}, + []byte("hello"), + }}, + }, { + "PAX", + []file{{ + &Header{Name: "bar", Mode: 0640, Size: int64(3), Xattrs: map[string]string{"foo": "bar"}}, + []byte("foo"), + }, { + &Header{Name: "world", Mode: 0640, Size: int64(5), Xattrs: map[string]string{"foo": "bar"}}, + []byte("hello"), + }}, + }} + + b.Run("Writer", func(b *testing.B) { + for _, v := range vectors { + b.Run(v.label, func(b *testing.B) { + b.ReportAllocs() + for i := 0; i < b.N; i++ { + // Writing to ioutil.Discard because we want to + // test purely the writer code and not bring in disk performance into this. + tw := NewWriter(ioutil.Discard) + for _, file := range v.files { + if err := tw.WriteHeader(file.hdr); err != nil { + b.Errorf("unexpected WriteHeader error: %v", err) + } + if _, err := tw.Write(file.body); err != nil { + b.Errorf("unexpected Write error: %v", err) + } + } + if err := tw.Close(); err != nil { + b.Errorf("unexpected Close error: %v", err) + } + } + }) + } + }) + + b.Run("Reader", func(b *testing.B) { + for _, v := range vectors { + var buf bytes.Buffer + var r bytes.Reader + + // Write the archive to a byte buffer. + tw := NewWriter(&buf) + for _, file := range v.files { + tw.WriteHeader(file.hdr) + tw.Write(file.body) + } + tw.Close() + b.Run(v.label, func(b *testing.B) { + b.ReportAllocs() + // Read from the byte buffer. + for i := 0; i < b.N; i++ { + r.Reset(buf.Bytes()) + tr := NewReader(&r) + if _, err := tr.Next(); err != nil { + b.Errorf("unexpected Next error: %v", err) + } + if _, err := io.Copy(ioutil.Discard, tr); err != nil { + b.Errorf("unexpected Copy error : %v", err) + } + } + }) + } + }) + +} diff --git a/archive/tar/testdata/file-and-dir.tar b/archive/tar/testdata/file-and-dir.tar new file mode 100644 index 0000000000000000000000000000000000000000..c18d4283e38097edf81165094a4635a473693e94 GIT binary patch literal 2560 zcmXTUP0Y#BE2$`9pdB!P00ta1P}meE2U5adU}$JyY|NlwKszTPOfD@hNh|_dj}Re` zh3+}>Org6j@64Q%V!GQpYT;lBft1W5eOkK(J!_!xL0r`QZ(we2V#J_eN^6IK%pc+T SpB_OxYR6~@jE2DA3;_VVvLvAZ literal 0 HcmV?d00001 diff --git a/archive/tar/testdata/gnu-incremental.tar b/archive/tar/testdata/gnu-incremental.tar new file mode 100644 index 0000000000000000000000000000000000000000..4c442e5b82d1977231c83167324dc6cbb39f090e GIT binary patch literal 2560 zcmeH}%?`pK41jyyQ}6~BR^SDE15X}Fgm`jhV0e9D|52kCnK9aBtYckin|)=$`XDw? zR1gWZlz@m_OI%*lRGwA9h14WT2vq~}S_lHREgIF}{NjUY8H3iu_St#|f3o43!OgQF zA*xBv$!WT=`uOeMH4W_j*|gq%JeYp~t5+a&{O6CLoFGS3L`&|+KG5TjI3dD&{*}_e zuv|#9=O5?a*=X`v4TFi!9!)-~JQx>kr#Lex*}O{T(ROdlh5T#ZSb?7Zvi<)R|EUQ~ p{P+96mNf#~tx?dT{HxUWIL*mD+W*tf(I~k?j`F_TmkZo^y#f_=Y61WN literal 0 HcmV?d00001 diff --git a/archive/tar/testdata/gnu-long-nul.tar b/archive/tar/testdata/gnu-long-nul.tar new file mode 100644 index 0000000000000000000000000000000000000000..28bc812aa60e81ea324297c81c738486acffc09c GIT binary patch literal 2560 zcmdPX*VA|K$8wwCI?c&U|<9U<_rqKG;!KK9|iA4$uNFwAh z(LG1BDGd;nOd;HlV4+|L3I(V*%L?&h*u>fXD6hgM&KaQJha%1hHHi0zdj!NPWD+XL zy~Mf*MHfO-7>YQu-cj~w2#kgR?Lxr7(8$=t)Xdz%l6EZGxqj4SN<)Blv5qjAQa6t3 zr(FmjVu^Mv+PQwzWJ*JTcCn5ynNl~7>Ze@@AYzGjEZVt#)MQFSfOfHtFqu*}j_Mx` Lfzc2cx*-4no2;l4 literal 0 HcmV?d00001 diff --git a/archive/tar/testdata/gnu-utf8.tar b/archive/tar/testdata/gnu-utf8.tar new file mode 100644 index 0000000000000000000000000000000000000000..2c9c8079cf651d4271ed78ac12bed01df5882f16 GIT binary patch literal 2560 zcmdPX*VA|K$>Jxw74X(NP%*@pq_zwgdvgE SfDU0ZYQ<;>jE2Cl4gmlQRWrx{ literal 0 HcmV?d00001 diff --git a/archive/tar/testdata/pax-bad-hdr-file.tar b/archive/tar/testdata/pax-bad-hdr-file.tar new file mode 100644 index 0000000000000000000000000000000000000000..b97cc981f29b849420ffb5c249103d2990d3ea20 GIT binary patch literal 2560 zcmeHG%Z{Tu5Y4Cb9*w}fS@eP=G$bS#Xb5h0CLuB54F(dv{-$R%n=7?+rPlPFk>)I_ zELW9JRmrEpl(4jByA+?w^w&7<2NoZAjuXMp*n7%?6L^l77Eiau7hvcq?jH1?rTX`9 zpMCF(>ILDcd3=FqS4V*TZi>3Pe0#t9R^c-JqBj20{7)M=*Yz)FewT(Uati=tP7$St z*d8bF(D@GlUJ@4%qe!wMbG*3V!1wg0PiykOO!ia%*E*#=&OR(jtILP6KRA&2FJ*Kg zy|A)1mbYF<|BA(6Myl2O)0`aM3Wji&pIh4ClA2$dWoJs58yl;4D?U=(N)W;u%XInf zRTs< z4xd;AU1Jf%`IN4#I124Rb^4Q)!o;hPejoEtY5^qa+2ELA%=<%v`PjLWfatmzU$WMq z0o-nPr8-}Z8wrod+8?IEmrDw+X#Mz zXg5Tql$=bVJ~k+v0GLO)^pr}@?=ab|SNj)8^HW8!blD3B%SS zGI^{kUFR(2?R5l=l)&2sw&#PX@ZqpeYkq_8j?Fe9>W(2A?o+GT0h2`D?X>ODJmvX4 zP~kX>b+(x;9Ro3CH;IaFGqbd&#t{MvQE$!J(C*Cr+>WZOtS-2k`j?wym_80XdBrEb z&A?kq@r`(G3QRVPrgJ7?%^kKYVLESuSIlk5vZfj_byJWIheX`J6djolbyUd_XNeVjzuL>ej5D3crM z9<{1ks>i7+=~RSKo|Sx)l4Ftn9w&Uq>>V!%2>ghBO_>Q$6hvwEG|oN&UCT)Gp#Lb< z&&z$Z-zU{W_*Ag0NS{?z9mi3HBKj9S%;SB-Kc6(m|1^K*0;jV4?ZjWDAtPZ1fGj9T zdWh}u0}q{l2M{GRdl*HM6kU(~f4g#2FumMVJBOCy!YCR;B0^u8t#cDY#koNkj6B!m z+gA-50gm`Ea|(x}3iLrEtm5@*6ES>#3)cZ{>p&N#U;DIMQmo+M9%`ue| z{bY-YNux`QK%5Wh(u~8v^5=GYG*g_ow@AD9c_7sQ)AXbZT-WFAJ|SXk->FYE&4|xg ztur5QHrrx8oeyh?^pUyS4_8kv7`UVrJ}`7~?W=aI@}j+3AzXCXux$rJYX^m~hOa!r zmrEa+LAeMIZ*Wj$&Nn={i&&qm@J6B#83l7VWi#(R;YfjHvISDB&a%Ex`ai(?z@ zv4nf8qI1UR(SYTlih==vsh0~^xykt*C)?F>_kvk|yis=%@C!q?0HyponMdQqEzF^^q0eyCnp4)*TVprX{ajX-XL1|bm(kV@kNB>< z6cf+l@R%|(QnyZl!3W`R$|RyX-DU}g)7pO}!kR8hrcy&QptRp7Xxn=F^It5p|Lo{r E07OVKq5uE@ literal 0 HcmV?d00001 diff --git a/archive/tar/testdata/pax-global-records.tar b/archive/tar/testdata/pax-global-records.tar new file mode 100644 index 0000000000000000000000000000000000000000..3d3d241e65c3a18feae324e8c40c9e5c76515dde GIT binary patch literal 7168 zcmeHL!D_=W4E4ER;V;M}*-LlnxjXI`JeGBZl#Mone*Wa9rr4nrhBeBfTpT&!#d^cpD(885)#?nu-$bl zd_2p;+hBpM{R+d!YIE8hxR?tY^UL#n+jl6O_LaKNYd-D1vHp<<`zJM7tNzO*g*mMS z{}PVT{JZ_3frq#ms21`Cr=+Hc0kHIcL{G*ZFA#Efwe*);p(rx?(dkdptIvyaeUIQm zC7-2Q)O+&%G{(pIJ#q(#>i-#uLc#hT`hT40Pta=rFC=*e`!w3WI)BE>e1hozPB`6P z`9Jr6+W!j+!|;>--`D@FR+%d@^62z z6WvCNQYsy?g}w--Wmq1GRb#yo!^n6FL+rVPFb)i25_DFxFl=_ySE2CthOp~yfMC5n zgzw5X;&fI8rxS10qL)!vj@P!Lk;SKDDCFaylA_ktT^1Y*N_iCY9|;HQx}x(s9~OWA zqJCA?@<&&If%}1Z$P#4!Ysi0;#K`|&n6$PSPx2 Y`kB|j_kpV{SiUy=c3uImfCCCV00+Y!FaQ7m literal 0 HcmV?d00001 diff --git a/archive/tar/testdata/pax-nil-sparse-hole.tar b/archive/tar/testdata/pax-nil-sparse-hole.tar new file mode 100644 index 0000000000000000000000000000000000000000..b44327bdbfb343658e5142f2129d7688f3044021 GIT binary patch literal 3072 zcmeHH%?iRW4DPv4(I=Qq#{4<#s-8p;AE2@vCi{_AVfgfR)5#e0R)vk`(l!Zg@_qSw zRL!1LM%qMjVQxwrin0syYVvK&u}WA%A8S*X1e6#v(Ri)GaoNM%mA212!gF^cirwKH zeV8xRwbm){kvFfbS9078b1PL+`kfe#>tRrnDop;Q6fB_}P;`d`kk1{^iS=j6KXt0# z<^}!e3vlSZ*L*u7{k~1L`i~P_G4U5o`qKZn$%)JX@J^vJ=yD>2g)##I0fB&kCj?&3 CI8e<1 literal 0 HcmV?d00001 diff --git a/archive/tar/testdata/pax-nul-path.tar b/archive/tar/testdata/pax-nul-path.tar new file mode 100644 index 0000000000000000000000000000000000000000..c78f82b16e85363143404ec50c3e77e5174ba696 GIT binary patch literal 2560 zcmWGYtnf%pOi3*&)-%vIFf=kYF*P%{u%s%>00J0r&<4gv#xOaM5(Wb!BV%(j289X+ zI)KvRlEfmQ^^{tL?m0@$qmzCkLqmmv#F7kKs>V3AQxFN}F>ui-ycfm?#?I`Jb2|8dWnI0;3@?JVO8g0FZP& literal 0 HcmV?d00001 diff --git a/archive/tar/testdata/pax-nul-xattrs.tar b/archive/tar/testdata/pax-nul-xattrs.tar new file mode 100644 index 0000000000000000000000000000000000000000..881f51768f9872f8d9bfcd2cbb637a64c88b59fb GIT binary patch literal 2560 zcmWGYtnf%pOi3*&)-%vgN=(tsE6vH#E2$`9pdK)Q00tbifq}6(Ob(=k!NAbi+{l1I zp@Mo&z->}#aYE@j+a){BLMtXu_aCah^nR1rE*#&;K|=WmI@H1V%$( H1cU$p$tyMF literal 0 HcmV?d00001 diff --git a/archive/tar/testdata/pax-pos-size-file.tar b/archive/tar/testdata/pax-pos-size-file.tar new file mode 100644 index 0000000000000000000000000000000000000000..ea5ccf916426a5b6300dd341dffeeb349e51ad90 GIT binary patch literal 2560 zcmeHGO^=&86wSK7V)iVI`DW1tNf<~-Ffb4pb|xV)A%qVbNciYIMU^VD+e5mrD1N`@xy@5P0!5`e$lo>Ayydck>ZnF=Fo-*7$%B*6VQy8OC1ZYQZ0cQwSoN-=7~KHZm75nsv?7#% zxbP~EBdQp}j$fv$ zm0((q`NB@)$O$!<9_$=t{xvb~Lm9~}L?l0&Jl78;-DOxv-8%`1VOZ&@XiXNP^?Flj z(`mnw@sQZt-FOL46N~e{7&mmZ3_7Zeb)X3_s28DRNA*13 zzlx)-aG@0_eZq!piMD!7yu_+Gn2p4s1_%<*fN8?jFA9^o3rL4@o2IU`LoynihG>kD zgC*AcCW%G}r$MQF^{SM2lx>!a?K5KK;Y$2@1m>3MDAg#{VSu51&>@#aQR@?hJl2&j zOTYwY86z_%=ypQwac8MQ)Nk`zSmC?tfF?@1eU$X~vDKZB%VPg_Skq)0%kmy-XqcqB zSWUKpp_GGFrX{zDo7-dKiXlS@wWp$QH`aD)2Tf5ICwv|0^UXD_0EfP^5VFAG&@E^9 wM*KF17Kq~USjbrS`t?E_kE`$n^D8p1nNBRj66M`4lQuLkKmWlt`)ynQ0y{e~UjP6A literal 0 HcmV?d00001 diff --git a/archive/tar/testdata/pax-records.tar b/archive/tar/testdata/pax-records.tar new file mode 100644 index 0000000000000000000000000000000000000000..276c211baa388cd4857f60be3355dc710c079adf GIT binary patch literal 2560 zcmWGYtnf%pOi3*&)-%vg%gjk-pgu5w00tbifuXS}Ob(=k!NAbaz|4q2p@RBO!8Eb7 zxFoR%Xg#LPh!8QfP;mG6arAT7E67f_1qKC|k*Pv*er|4RUWu(oYEDkRj>3~2)1FW5 z;W9N)D9uaEO|{L*&r2r;bB%})9NM0wQI(8HJV7=Ju?a?-j1kv{3qm&5xc=w?9rhykjgnT@;M()2-EV}6ypIW4 zm*mBPoymBf{O?}zAMMUW`UKowz+L0vu6oVGGx7QtN_hQ0MUzwMKOAx<{8wZ(tg^0& zWSp?T=YcN*zYKgC_%p!)+e8FXL%{zBiBeXoUd6Lgn;H871mdv{nV}k5sUZL?#j{eI p8T$bQ;;{~yp&DALApk7Jvr?NG`vC;vu@0G`8d`_|BJgeqTmi{^;4uII literal 0 HcmV?d00001 diff --git a/archive/tar/testdata/trailing-slash.tar b/archive/tar/testdata/trailing-slash.tar new file mode 100644 index 0000000000000000000000000000000000000000..93718b3034879fba292186c38fae7a9e7be69801 GIT binary patch literal 2560 zcmXpsGBz4Gd!2kkq(FP`FW-vLBN(KWX14Anam zFj`@3XrNG#Sdw8&v*@Q?!WuA>xdvlQlIJEmL^~{RXSF)#oEGZPaA5Df(AG_n|kZD?R%Y|fxyKx21-jV~=ONh|_7iBh{jE;leR XgnG`9QsYMTkA}c#2#kgRtwR6+ua*#N literal 0 HcmV?d00001 diff --git a/archive/tar/testdata/writer-big-long.tar b/archive/tar/testdata/writer-big-long.tar index 5960ee824784ffeacb976a9c648be41b0281508b..09fc5dd3dd7fc5de3b6d22461fa23152fd499a41 100644 GIT binary patch literal 1536 zcmdT@%L>CF5cHg{@C({q<0CobV>|>an6xwspR=A26VBn zaX^Kt?_`f1Z)$wO|DLIY?J3lf^OSK^BIf(k6O@V|Yg?ui2v9Nql1oNtkxU->H^Dlm V!!&06{D$v3HgTJv%r&)bzX73UunhnJ literal 4096 zcmeIuJqp7x3 0 { - tw.err = fmt.Errorf("archive/tar: missed writing %d bytes", tw.nb) - return tw.err - } - - n := tw.nb + tw.pad - for n > 0 && tw.err == nil { - nr := n - if nr > blockSize { - nr = blockSize - } - var nw int - nw, tw.err = tw.w.Write(zeroBlock[0:nr]) - n -= int64(nw) - } - tw.nb = 0 - tw.pad = 0 - return tw.err +func NewWriter(w io.Writer) *Writer { + return &Writer{w: w, curr: ®FileWriter{w, 0}} } -// Write s into b, terminating it with a NUL if there is room. -func (f *formatter) formatString(b []byte, s string) { - if len(s) > len(b) { - f.err = ErrFieldTooLong - return - } - ascii := toASCII(s) - copy(b, ascii) - if len(ascii) < len(b) { - b[len(ascii)] = 0 - } +type fileWriter interface { + io.Writer + fileState + + ReadFrom(io.Reader) (int64, error) } -// Encode x as an octal ASCII string and write it into b with leading zeros. -func (f *formatter) formatOctal(b []byte, x int64) { - s := strconv.FormatInt(x, 8) - // leading zeros, but leave room for a NUL. - for len(s)+1 < len(b) { - s = "0" + s - } - f.formatString(b, s) -} - -// fitsInBase256 reports whether x can be encoded into n bytes using base-256 -// encoding. Unlike octal encoding, base-256 encoding does not require that the -// string ends with a NUL character. Thus, all n bytes are available for output. +// Flush finishes writing the current file's block padding. +// The current file must be fully written before Flush can be called. // -// If operating in binary mode, this assumes strict GNU binary mode; which means -// that the first byte can only be either 0x80 or 0xff. Thus, the first byte is -// equivalent to the sign bit in two's complement form. -func fitsInBase256(n int, x int64) bool { - var binBits = uint(n-1) * 8 - return n >= 9 || (x >= -1<= 0; i-- { - b[i] = byte(x) - x >>= 8 - } - b[0] |= 0x80 // Highest bit indicates binary format - return - } - - f.formatOctal(b, 0) // Last resort, just write zero - f.err = ErrFieldTooLong -} - -var ( - minTime = time.Unix(0, 0) - // There is room for 11 octal digits (33 bits) of mtime. - maxTime = minTime.Add((1<<33 - 1) * time.Second) -) - -// WriteHeader writes hdr and prepares to accept the file's contents. -// WriteHeader calls Flush if it is not the first header. -// Calling after a Close will return ErrWriteAfterClose. -func (tw *Writer) WriteHeader(hdr *Header) error { - return tw.writeHeader(hdr, true) -} - -// WriteHeader writes hdr and prepares to accept the file's contents. -// WriteHeader calls Flush if it is not the first header. -// Calling after a Close will return ErrWriteAfterClose. -// As this method is called internally by writePax header to allow it to -// suppress writing the pax header. -func (tw *Writer) writeHeader(hdr *Header, allowPax bool) error { - if tw.closed { - return ErrWriteAfterClose - } - if tw.err == nil { - tw.Flush() - } +// This is unnecessary as the next call to WriteHeader or Close +// will implicitly flush out the file's padding. +func (tw *Writer) Flush() error { if tw.err != nil { return tw.err } - - // a map to hold pax header records, if any are needed - paxHeaders := make(map[string]string) - - // TODO(shanemhansen): we might want to use PAX headers for - // subsecond time resolution, but for now let's just capture - // too long fields or non ascii characters - - var f formatter - var header []byte - - // We need to select which scratch buffer to use carefully, - // since this method is called recursively to write PAX headers. - // If allowPax is true, this is the non-recursive call, and we will use hdrBuff. - // If allowPax is false, we are being called by writePAXHeader, and hdrBuff is - // already being used by the non-recursive call, so we must use paxHdrBuff. - header = tw.hdrBuff[:] - if !allowPax { - header = tw.paxHdrBuff[:] + if nb := tw.curr.LogicalRemaining(); nb > 0 { + return fmt.Errorf("archive/tar: missed writing %d bytes", nb) } - copy(header, zeroBlock) - s := slicer(header) - - // Wrappers around formatter that automatically sets paxHeaders if the - // argument extends beyond the capacity of the input byte slice. - var formatString = func(b []byte, s string, paxKeyword string) { - needsPaxHeader := paxKeyword != paxNone && len(s) > len(b) || !isASCII(s) - if needsPaxHeader { - paxHeaders[paxKeyword] = s - return - } - f.formatString(b, s) - } - var formatNumeric = func(b []byte, x int64, paxKeyword string) { - // Try octal first. - s := strconv.FormatInt(x, 8) - if len(s) < len(b) { - f.formatOctal(b, x) - return - } - - // If it is too long for octal, and PAX is preferred, use a PAX header. - if paxKeyword != paxNone && tw.preferPax { - f.formatOctal(b, 0) - s := strconv.FormatInt(x, 10) - paxHeaders[paxKeyword] = s - return - } - - tw.usedBinary = true - f.formatNumeric(b, x) - } - - // keep a reference to the filename to allow to overwrite it later if we detect that we can use ustar longnames instead of pax - pathHeaderBytes := s.next(fileNameSize) - - formatString(pathHeaderBytes, hdr.Name, paxPath) - - // Handle out of range ModTime carefully. - var modTime int64 - if !hdr.ModTime.Before(minTime) && !hdr.ModTime.After(maxTime) { - modTime = hdr.ModTime.Unix() - } - - f.formatOctal(s.next(8), hdr.Mode) // 100:108 - formatNumeric(s.next(8), int64(hdr.Uid), paxUid) // 108:116 - formatNumeric(s.next(8), int64(hdr.Gid), paxGid) // 116:124 - formatNumeric(s.next(12), hdr.Size, paxSize) // 124:136 - formatNumeric(s.next(12), modTime, paxNone) // 136:148 --- consider using pax for finer granularity - s.next(8) // chksum (148:156) - s.next(1)[0] = hdr.Typeflag // 156:157 - - formatString(s.next(100), hdr.Linkname, paxLinkpath) - - copy(s.next(8), []byte("ustar\x0000")) // 257:265 - formatString(s.next(32), hdr.Uname, paxUname) // 265:297 - formatString(s.next(32), hdr.Gname, paxGname) // 297:329 - formatNumeric(s.next(8), hdr.Devmajor, paxNone) // 329:337 - formatNumeric(s.next(8), hdr.Devminor, paxNone) // 337:345 - - // keep a reference to the prefix to allow to overwrite it later if we detect that we can use ustar longnames instead of pax - prefixHeaderBytes := s.next(155) - formatString(prefixHeaderBytes, "", paxNone) // 345:500 prefix - - // Use the GNU magic instead of POSIX magic if we used any GNU extensions. - if tw.usedBinary { - copy(header[257:265], []byte("ustar \x00")) - } - - _, paxPathUsed := paxHeaders[paxPath] - // try to use a ustar header when only the name is too long - if !tw.preferPax && len(paxHeaders) == 1 && paxPathUsed { - prefix, suffix, ok := splitUSTARPath(hdr.Name) - if ok { - // Since we can encode in USTAR format, disable PAX header. - delete(paxHeaders, paxPath) - - // Update the path fields - formatString(pathHeaderBytes, suffix, paxNone) - formatString(prefixHeaderBytes, prefix, paxNone) - } - } - - // The chksum field is terminated by a NUL and a space. - // This is different from the other octal fields. - chksum, _ := checksum(header) - f.formatOctal(header[148:155], chksum) // Never fails - header[155] = ' ' - - // Check if there were any formatting errors. - if f.err != nil { - tw.err = f.err + if _, tw.err = tw.w.Write(zeroBlock[:tw.pad]); tw.err != nil { return tw.err } + tw.pad = 0 + return nil +} - if allowPax { - for k, v := range hdr.Xattrs { - paxHeaders[paxXattr+k] = v +// WriteHeader writes hdr and prepares to accept the file's contents. +// The Header.Size determines how many bytes can be written for the next file. +// If the current file is not fully written, then this returns an error. +// This implicitly flushes any padding necessary before writing the header. +func (tw *Writer) WriteHeader(hdr *Header) error { + if err := tw.Flush(); err != nil { + return err + } + tw.hdr = *hdr // Shallow copy of Header + + // Avoid usage of the legacy TypeRegA flag, and automatically promote + // it to use TypeReg or TypeDir. + if tw.hdr.Typeflag == TypeRegA { + if strings.HasSuffix(tw.hdr.Name, "/") { + tw.hdr.Typeflag = TypeDir + } else { + tw.hdr.Typeflag = TypeReg } } - if len(paxHeaders) > 0 { - if !allowPax { - return errInvalidHeader + // Round ModTime and ignore AccessTime and ChangeTime unless + // the format is explicitly chosen. + // This ensures nominal usage of WriteHeader (without specifying the format) + // does not always result in the PAX format being chosen, which + // causes a 1KiB increase to every header. + if tw.hdr.Format == FormatUnknown { + tw.hdr.ModTime = tw.hdr.ModTime.Round(time.Second) + tw.hdr.AccessTime = time.Time{} + tw.hdr.ChangeTime = time.Time{} + } + + allowedFormats, paxHdrs, err := tw.hdr.allowedFormats() + switch { + case allowedFormats.has(FormatUSTAR): + tw.err = tw.writeUSTARHeader(&tw.hdr) + return tw.err + case allowedFormats.has(FormatPAX): + tw.err = tw.writePAXHeader(&tw.hdr, paxHdrs) + return tw.err + case allowedFormats.has(FormatGNU): + tw.err = tw.writeGNUHeader(&tw.hdr) + return tw.err + default: + return err // Non-fatal error + } +} + +func (tw *Writer) writeUSTARHeader(hdr *Header) error { + // Check if we can use USTAR prefix/suffix splitting. + var namePrefix string + if prefix, suffix, ok := splitUSTARPath(hdr.Name); ok { + namePrefix, hdr.Name = prefix, suffix + } + + // Pack the main header. + var f formatter + blk := tw.templateV7Plus(hdr, f.formatString, f.formatOctal) + f.formatString(blk.USTAR().Prefix(), namePrefix) + blk.SetFormat(FormatUSTAR) + if f.err != nil { + return f.err // Should never happen since header is validated + } + return tw.writeRawHeader(blk, hdr.Size, hdr.Typeflag) +} + +func (tw *Writer) writePAXHeader(hdr *Header, paxHdrs map[string]string) error { + realName, realSize := hdr.Name, hdr.Size + + // TODO(dsnet): Re-enable this when adding sparse support. + // See https://golang.org/issue/22735 + /* + // Handle sparse files. + var spd sparseDatas + var spb []byte + if len(hdr.SparseHoles) > 0 { + sph := append([]sparseEntry{}, hdr.SparseHoles...) // Copy sparse map + sph = alignSparseEntries(sph, hdr.Size) + spd = invertSparseEntries(sph, hdr.Size) + + // Format the sparse map. + hdr.Size = 0 // Replace with encoded size + spb = append(strconv.AppendInt(spb, int64(len(spd)), 10), '\n') + for _, s := range spd { + hdr.Size += s.Length + spb = append(strconv.AppendInt(spb, s.Offset, 10), '\n') + spb = append(strconv.AppendInt(spb, s.Length, 10), '\n') + } + pad := blockPadding(int64(len(spb))) + spb = append(spb, zeroBlock[:pad]...) + hdr.Size += int64(len(spb)) // Accounts for encoded sparse map + + // Add and modify appropriate PAX records. + dir, file := path.Split(realName) + hdr.Name = path.Join(dir, "GNUSparseFile.0", file) + paxHdrs[paxGNUSparseMajor] = "1" + paxHdrs[paxGNUSparseMinor] = "0" + paxHdrs[paxGNUSparseName] = realName + paxHdrs[paxGNUSparseRealSize] = strconv.FormatInt(realSize, 10) + paxHdrs[paxSize] = strconv.FormatInt(hdr.Size, 10) + delete(paxHdrs, paxPath) // Recorded by paxGNUSparseName } - if err := tw.writePAXHeader(hdr, paxHeaders); err != nil { + */ + _ = realSize + + // Write PAX records to the output. + isGlobal := hdr.Typeflag == TypeXGlobalHeader + if len(paxHdrs) > 0 || isGlobal { + // Sort keys for deterministic ordering. + var keys []string + for k := range paxHdrs { + keys = append(keys, k) + } + sort.Strings(keys) + + // Write each record to a buffer. + var buf strings.Builder + for _, k := range keys { + rec, err := formatPAXRecord(k, paxHdrs[k]) + if err != nil { + return err + } + buf.WriteString(rec) + } + + // Write the extended header file. + var name string + var flag byte + if isGlobal { + name = realName + if name == "" { + name = "GlobalHead.0.0" + } + flag = TypeXGlobalHeader + } else { + dir, file := path.Split(realName) + name = path.Join(dir, "PaxHeaders.0", file) + flag = TypeXHeader + } + data := buf.String() + if err := tw.writeRawFile(name, data, flag, FormatPAX); err != nil || isGlobal { + return err // Global headers return here + } + } + + // Pack the main header. + var f formatter // Ignore errors since they are expected + fmtStr := func(b []byte, s string) { f.formatString(b, toASCII(s)) } + blk := tw.templateV7Plus(hdr, fmtStr, f.formatOctal) + blk.SetFormat(FormatPAX) + if err := tw.writeRawHeader(blk, hdr.Size, hdr.Typeflag); err != nil { + return err + } + + // TODO(dsnet): Re-enable this when adding sparse support. + // See https://golang.org/issue/22735 + /* + // Write the sparse map and setup the sparse writer if necessary. + if len(spd) > 0 { + // Use tw.curr since the sparse map is accounted for in hdr.Size. + if _, err := tw.curr.Write(spb); err != nil { + return err + } + tw.curr = &sparseFileWriter{tw.curr, spd, 0} + } + */ + return nil +} + +func (tw *Writer) writeGNUHeader(hdr *Header) error { + // Use long-link files if Name or Linkname exceeds the field size. + const longName = "././@LongLink" + if len(hdr.Name) > nameSize { + data := hdr.Name + "\x00" + if err := tw.writeRawFile(longName, data, TypeGNULongName, FormatGNU); err != nil { + return err + } + } + if len(hdr.Linkname) > nameSize { + data := hdr.Linkname + "\x00" + if err := tw.writeRawFile(longName, data, TypeGNULongLink, FormatGNU); err != nil { return err } } - tw.nb = int64(hdr.Size) - tw.pad = (blockSize - (tw.nb % blockSize)) % blockSize - _, tw.err = tw.w.Write(header) - return tw.err + // Pack the main header. + var f formatter // Ignore errors since they are expected + var spd sparseDatas + var spb []byte + blk := tw.templateV7Plus(hdr, f.formatString, f.formatNumeric) + if !hdr.AccessTime.IsZero() { + f.formatNumeric(blk.GNU().AccessTime(), hdr.AccessTime.Unix()) + } + if !hdr.ChangeTime.IsZero() { + f.formatNumeric(blk.GNU().ChangeTime(), hdr.ChangeTime.Unix()) + } + // TODO(dsnet): Re-enable this when adding sparse support. + // See https://golang.org/issue/22735 + /* + if hdr.Typeflag == TypeGNUSparse { + sph := append([]sparseEntry{}, hdr.SparseHoles...) // Copy sparse map + sph = alignSparseEntries(sph, hdr.Size) + spd = invertSparseEntries(sph, hdr.Size) + + // Format the sparse map. + formatSPD := func(sp sparseDatas, sa sparseArray) sparseDatas { + for i := 0; len(sp) > 0 && i < sa.MaxEntries(); i++ { + f.formatNumeric(sa.Entry(i).Offset(), sp[0].Offset) + f.formatNumeric(sa.Entry(i).Length(), sp[0].Length) + sp = sp[1:] + } + if len(sp) > 0 { + sa.IsExtended()[0] = 1 + } + return sp + } + sp2 := formatSPD(spd, blk.GNU().Sparse()) + for len(sp2) > 0 { + var spHdr block + sp2 = formatSPD(sp2, spHdr.Sparse()) + spb = append(spb, spHdr[:]...) + } + + // Update size fields in the header block. + realSize := hdr.Size + hdr.Size = 0 // Encoded size; does not account for encoded sparse map + for _, s := range spd { + hdr.Size += s.Length + } + copy(blk.V7().Size(), zeroBlock[:]) // Reset field + f.formatNumeric(blk.V7().Size(), hdr.Size) + f.formatNumeric(blk.GNU().RealSize(), realSize) + } + */ + blk.SetFormat(FormatGNU) + if err := tw.writeRawHeader(blk, hdr.Size, hdr.Typeflag); err != nil { + return err + } + + // Write the extended sparse map and setup the sparse writer if necessary. + if len(spd) > 0 { + // Use tw.w since the sparse map is not accounted for in hdr.Size. + if _, err := tw.w.Write(spb); err != nil { + return err + } + tw.curr = &sparseFileWriter{tw.curr, spd, 0} + } + return nil +} + +type ( + stringFormatter func([]byte, string) + numberFormatter func([]byte, int64) +) + +// templateV7Plus fills out the V7 fields of a block using values from hdr. +// It also fills out fields (uname, gname, devmajor, devminor) that are +// shared in the USTAR, PAX, and GNU formats using the provided formatters. +// +// The block returned is only valid until the next call to +// templateV7Plus or writeRawFile. +func (tw *Writer) templateV7Plus(hdr *Header, fmtStr stringFormatter, fmtNum numberFormatter) *block { + tw.blk.Reset() + + modTime := hdr.ModTime + if modTime.IsZero() { + modTime = time.Unix(0, 0) + } + + v7 := tw.blk.V7() + v7.TypeFlag()[0] = hdr.Typeflag + fmtStr(v7.Name(), hdr.Name) + fmtStr(v7.LinkName(), hdr.Linkname) + fmtNum(v7.Mode(), hdr.Mode) + fmtNum(v7.UID(), int64(hdr.Uid)) + fmtNum(v7.GID(), int64(hdr.Gid)) + fmtNum(v7.Size(), hdr.Size) + fmtNum(v7.ModTime(), modTime.Unix()) + + ustar := tw.blk.USTAR() + fmtStr(ustar.UserName(), hdr.Uname) + fmtStr(ustar.GroupName(), hdr.Gname) + fmtNum(ustar.DevMajor(), hdr.Devmajor) + fmtNum(ustar.DevMinor(), hdr.Devminor) + + return &tw.blk +} + +// writeRawFile writes a minimal file with the given name and flag type. +// It uses format to encode the header format and will write data as the body. +// It uses default values for all of the other fields (as BSD and GNU tar does). +func (tw *Writer) writeRawFile(name, data string, flag byte, format Format) error { + tw.blk.Reset() + + // Best effort for the filename. + name = toASCII(name) + if len(name) > nameSize { + name = name[:nameSize] + } + name = strings.TrimRight(name, "/") + + var f formatter + v7 := tw.blk.V7() + v7.TypeFlag()[0] = flag + f.formatString(v7.Name(), name) + f.formatOctal(v7.Mode(), 0) + f.formatOctal(v7.UID(), 0) + f.formatOctal(v7.GID(), 0) + f.formatOctal(v7.Size(), int64(len(data))) // Must be < 8GiB + f.formatOctal(v7.ModTime(), 0) + tw.blk.SetFormat(format) + if f.err != nil { + return f.err // Only occurs if size condition is violated + } + + // Write the header and data. + if err := tw.writeRawHeader(&tw.blk, int64(len(data)), flag); err != nil { + return err + } + _, err := io.WriteString(tw, data) + return err +} + +// writeRawHeader writes the value of blk, regardless of its value. +// It sets up the Writer such that it can accept a file of the given size. +// If the flag is a special header-only flag, then the size is treated as zero. +func (tw *Writer) writeRawHeader(blk *block, size int64, flag byte) error { + if err := tw.Flush(); err != nil { + return err + } + if _, err := tw.w.Write(blk[:]); err != nil { + return err + } + if isHeaderOnlyType(flag) { + size = 0 + } + tw.curr = ®FileWriter{tw.w, size} + tw.pad = blockPadding(size) + return nil } // splitUSTARPath splits a path according to USTAR prefix and suffix rules. // If the path is not splittable, then it will return ("", "", false). func splitUSTARPath(name string) (prefix, suffix string, ok bool) { length := len(name) - if length <= fileNameSize || !isASCII(name) { + if length <= nameSize || !isASCII(name) { return "", "", false - } else if length > fileNamePrefixSize+1 { - length = fileNamePrefixSize + 1 + } else if length > prefixSize+1 { + length = prefixSize + 1 } else if name[length-1] == '/' { length-- } @@ -300,117 +415,239 @@ func splitUSTARPath(name string) (prefix, suffix string, ok bool) { i := strings.LastIndex(name[:length], "/") nlen := len(name) - i - 1 // nlen is length of suffix plen := i // plen is length of prefix - if i <= 0 || nlen > fileNameSize || nlen == 0 || plen > fileNamePrefixSize { + if i <= 0 || nlen > nameSize || nlen == 0 || plen > prefixSize { return "", "", false } return name[:i], name[i+1:], true } -// writePaxHeader writes an extended pax header to the -// archive. -func (tw *Writer) writePAXHeader(hdr *Header, paxHeaders map[string]string) error { - // Prepare extended header - ext := new(Header) - ext.Typeflag = TypeXHeader - // Setting ModTime is required for reader parsing to - // succeed, and seems harmless enough. - ext.ModTime = hdr.ModTime - // The spec asks that we namespace our pseudo files - // with the current pid. However, this results in differing outputs - // for identical inputs. As such, the constant 0 is now used instead. - // golang.org/issue/12358 - dir, file := path.Split(hdr.Name) - fullName := path.Join(dir, "PaxHeaders.0", file) - - ascii := toASCII(fullName) - if len(ascii) > 100 { - ascii = ascii[:100] - } - ext.Name = ascii - // Construct the body - var buf bytes.Buffer - - // Keys are sorted before writing to body to allow deterministic output. - var keys []string - for k := range paxHeaders { - keys = append(keys, k) - } - sort.Strings(keys) - - for _, k := range keys { - fmt.Fprint(&buf, formatPAXRecord(k, paxHeaders[k])) - } - - ext.Size = int64(len(buf.Bytes())) - if err := tw.writeHeader(ext, false); err != nil { - return err - } - if _, err := tw.Write(buf.Bytes()); err != nil { - return err - } - if err := tw.Flush(); err != nil { - return err - } - return nil -} - -// formatPAXRecord formats a single PAX record, prefixing it with the -// appropriate length. -func formatPAXRecord(k, v string) string { - const padding = 3 // Extra padding for ' ', '=', and '\n' - size := len(k) + len(v) + padding - size += len(strconv.Itoa(size)) - record := fmt.Sprintf("%d %s=%s\n", size, k, v) - - // Final adjustment if adding size field increased the record size. - if len(record) != size { - size = len(record) - record = fmt.Sprintf("%d %s=%s\n", size, k, v) - } - return record -} - -// Write writes to the current entry in the tar archive. +// Write writes to the current file in the tar archive. // Write returns the error ErrWriteTooLong if more than -// hdr.Size bytes are written after WriteHeader. -func (tw *Writer) Write(b []byte) (n int, err error) { - if tw.closed { - err = ErrWriteAfterClose - return +// Header.Size bytes are written after WriteHeader. +// +// Calling Write on special types like TypeLink, TypeSymlink, TypeChar, +// TypeBlock, TypeDir, and TypeFifo returns (0, ErrWriteTooLong) regardless +// of what the Header.Size claims. +func (tw *Writer) Write(b []byte) (int, error) { + if tw.err != nil { + return 0, tw.err } - overwrite := false - if int64(len(b)) > tw.nb { - b = b[0:tw.nb] - overwrite = true + n, err := tw.curr.Write(b) + if err != nil && err != ErrWriteTooLong { + tw.err = err } - n, err = tw.w.Write(b) - tw.nb -= int64(n) - if err == nil && overwrite { - err = ErrWriteTooLong - return - } - tw.err = err - return + return n, err } -// Close closes the tar archive, flushing any unwritten -// data to the underlying writer. -func (tw *Writer) Close() error { - if tw.err != nil || tw.closed { - return tw.err +// readFrom populates the content of the current file by reading from r. +// The bytes read must match the number of remaining bytes in the current file. +// +// If the current file is sparse and r is an io.ReadSeeker, +// then readFrom uses Seek to skip past holes defined in Header.SparseHoles, +// assuming that skipped regions are all NULs. +// This always reads the last byte to ensure r is the right size. +// +// TODO(dsnet): Re-export this when adding sparse file support. +// See https://golang.org/issue/22735 +func (tw *Writer) readFrom(r io.Reader) (int64, error) { + if tw.err != nil { + return 0, tw.err + } + n, err := tw.curr.ReadFrom(r) + if err != nil && err != ErrWriteTooLong { + tw.err = err + } + return n, err +} + +// Close closes the tar archive by flushing the padding, and writing the footer. +// If the current file (from a prior call to WriteHeader) is not fully written, +// then this returns an error. +func (tw *Writer) Close() error { + if tw.err == ErrWriteAfterClose { + return nil } - tw.Flush() - tw.closed = true if tw.err != nil { return tw.err } - // trailer: two zero blocks - for i := 0; i < 2; i++ { - _, tw.err = tw.w.Write(zeroBlock) - if tw.err != nil { - break + // Trailer: two zero blocks. + err := tw.Flush() + for i := 0; i < 2 && err == nil; i++ { + _, err = tw.w.Write(zeroBlock[:]) + } + + // Ensure all future actions are invalid. + tw.err = ErrWriteAfterClose + return err // Report IO errors +} + +// regFileWriter is a fileWriter for writing data to a regular file entry. +type regFileWriter struct { + w io.Writer // Underlying Writer + nb int64 // Number of remaining bytes to write +} + +func (fw *regFileWriter) Write(b []byte) (n int, err error) { + overwrite := int64(len(b)) > fw.nb + if overwrite { + b = b[:fw.nb] + } + if len(b) > 0 { + n, err = fw.w.Write(b) + fw.nb -= int64(n) + } + switch { + case err != nil: + return n, err + case overwrite: + return n, ErrWriteTooLong + default: + return n, nil + } +} + +func (fw *regFileWriter) ReadFrom(r io.Reader) (int64, error) { + return io.Copy(struct{ io.Writer }{fw}, r) +} + +func (fw regFileWriter) LogicalRemaining() int64 { + return fw.nb +} +func (fw regFileWriter) PhysicalRemaining() int64 { + return fw.nb +} + +// sparseFileWriter is a fileWriter for writing data to a sparse file entry. +type sparseFileWriter struct { + fw fileWriter // Underlying fileWriter + sp sparseDatas // Normalized list of data fragments + pos int64 // Current position in sparse file +} + +func (sw *sparseFileWriter) Write(b []byte) (n int, err error) { + overwrite := int64(len(b)) > sw.LogicalRemaining() + if overwrite { + b = b[:sw.LogicalRemaining()] + } + + b0 := b + endPos := sw.pos + int64(len(b)) + for endPos > sw.pos && err == nil { + var nf int // Bytes written in fragment + dataStart, dataEnd := sw.sp[0].Offset, sw.sp[0].endOffset() + if sw.pos < dataStart { // In a hole fragment + bf := b[:min(int64(len(b)), dataStart-sw.pos)] + nf, err = zeroWriter{}.Write(bf) + } else { // In a data fragment + bf := b[:min(int64(len(b)), dataEnd-sw.pos)] + nf, err = sw.fw.Write(bf) + } + b = b[nf:] + sw.pos += int64(nf) + if sw.pos >= dataEnd && len(sw.sp) > 1 { + sw.sp = sw.sp[1:] // Ensure last fragment always remains } } - return tw.err + + n = len(b0) - len(b) + switch { + case err == ErrWriteTooLong: + return n, errMissData // Not possible; implies bug in validation logic + case err != nil: + return n, err + case sw.LogicalRemaining() == 0 && sw.PhysicalRemaining() > 0: + return n, errUnrefData // Not possible; implies bug in validation logic + case overwrite: + return n, ErrWriteTooLong + default: + return n, nil + } +} + +func (sw *sparseFileWriter) ReadFrom(r io.Reader) (n int64, err error) { + rs, ok := r.(io.ReadSeeker) + if ok { + if _, err := rs.Seek(0, io.SeekCurrent); err != nil { + ok = false // Not all io.Seeker can really seek + } + } + if !ok { + return io.Copy(struct{ io.Writer }{sw}, r) + } + + var readLastByte bool + pos0 := sw.pos + for sw.LogicalRemaining() > 0 && !readLastByte && err == nil { + var nf int64 // Size of fragment + dataStart, dataEnd := sw.sp[0].Offset, sw.sp[0].endOffset() + if sw.pos < dataStart { // In a hole fragment + nf = dataStart - sw.pos + if sw.PhysicalRemaining() == 0 { + readLastByte = true + nf-- + } + _, err = rs.Seek(nf, io.SeekCurrent) + } else { // In a data fragment + nf = dataEnd - sw.pos + nf, err = io.CopyN(sw.fw, rs, nf) + } + sw.pos += nf + if sw.pos >= dataEnd && len(sw.sp) > 1 { + sw.sp = sw.sp[1:] // Ensure last fragment always remains + } + } + + // If the last fragment is a hole, then seek to 1-byte before EOF, and + // read a single byte to ensure the file is the right size. + if readLastByte && err == nil { + _, err = mustReadFull(rs, []byte{0}) + sw.pos++ + } + + n = sw.pos - pos0 + switch { + case err == io.EOF: + return n, io.ErrUnexpectedEOF + case err == ErrWriteTooLong: + return n, errMissData // Not possible; implies bug in validation logic + case err != nil: + return n, err + case sw.LogicalRemaining() == 0 && sw.PhysicalRemaining() > 0: + return n, errUnrefData // Not possible; implies bug in validation logic + default: + return n, ensureEOF(rs) + } +} + +func (sw sparseFileWriter) LogicalRemaining() int64 { + return sw.sp[len(sw.sp)-1].endOffset() - sw.pos +} +func (sw sparseFileWriter) PhysicalRemaining() int64 { + return sw.fw.PhysicalRemaining() +} + +// zeroWriter may only be written with NULs, otherwise it returns errWriteHole. +type zeroWriter struct{} + +func (zeroWriter) Write(b []byte) (int, error) { + for i, c := range b { + if c != 0 { + return i, errWriteHole + } + } + return len(b), nil +} + +// ensureEOF checks whether r is at EOF, reporting ErrWriteTooLong if not so. +func ensureEOF(r io.Reader) error { + n, err := tryReadFull(r, []byte{0}) + switch { + case n > 0: + return ErrWriteTooLong + case err == io.EOF: + return nil + default: + return err + } } diff --git a/archive/tar/writer_test.go b/archive/tar/writer_test.go index 6e91d90..30556d2 100644 --- a/archive/tar/writer_test.go +++ b/archive/tar/writer_test.go @@ -6,11 +6,12 @@ package tar import ( "bytes" - "fmt" + "encoding/hex" + "errors" "io" "io/ioutil" - "math" "os" + "path" "reflect" "sort" "strings" @@ -19,251 +20,516 @@ import ( "time" ) -type writerTestEntry struct { - header *Header - contents string -} - -type writerTest struct { - file string // filename of expected output - entries []*writerTestEntry -} - -var writerTests = []*writerTest{ - // The writer test file was produced with this command: - // tar (GNU tar) 1.26 - // ln -s small.txt link.txt - // tar -b 1 --format=ustar -c -f writer.tar small.txt small2.txt link.txt - { - file: "testdata/writer.tar", - entries: []*writerTestEntry{ - { - header: &Header{ - Name: "small.txt", - Mode: 0640, - Uid: 73025, - Gid: 5000, - Size: 5, - ModTime: time.Unix(1246508266, 0), - Typeflag: '0', - Uname: "dsymonds", - Gname: "eng", - }, - contents: "Kilts", - }, - { - header: &Header{ - Name: "small2.txt", - Mode: 0640, - Uid: 73025, - Gid: 5000, - Size: 11, - ModTime: time.Unix(1245217492, 0), - Typeflag: '0', - Uname: "dsymonds", - Gname: "eng", - }, - contents: "Google.com\n", - }, - { - header: &Header{ - Name: "link.txt", - Mode: 0777, - Uid: 1000, - Gid: 1000, - Size: 0, - ModTime: time.Unix(1314603082, 0), - Typeflag: '2', - Linkname: "small.txt", - Uname: "strings", - Gname: "strings", - }, - // no contents - }, - }, - }, - // The truncated test file was produced using these commands: - // dd if=/dev/zero bs=1048576 count=16384 > /tmp/16gig.txt - // tar -b 1 -c -f- /tmp/16gig.txt | dd bs=512 count=8 > writer-big.tar - { - file: "testdata/writer-big.tar", - entries: []*writerTestEntry{ - { - header: &Header{ - Name: "tmp/16gig.txt", - Mode: 0640, - Uid: 73025, - Gid: 5000, - Size: 16 << 30, - ModTime: time.Unix(1254699560, 0), - Typeflag: '0', - Uname: "dsymonds", - Gname: "eng", - }, - // fake contents - contents: strings.Repeat("\x00", 4<<10), - }, - }, - }, - // The truncated test file was produced using these commands: - // dd if=/dev/zero bs=1048576 count=16384 > (longname/)*15 /16gig.txt - // tar -b 1 -c -f- (longname/)*15 /16gig.txt | dd bs=512 count=8 > writer-big-long.tar - { - file: "testdata/writer-big-long.tar", - entries: []*writerTestEntry{ - { - header: &Header{ - Name: strings.Repeat("longname/", 15) + "16gig.txt", - Mode: 0644, - Uid: 1000, - Gid: 1000, - Size: 16 << 30, - ModTime: time.Unix(1399583047, 0), - Typeflag: '0', - Uname: "guillaume", - Gname: "guillaume", - }, - // fake contents - contents: strings.Repeat("\x00", 4<<10), - }, - }, - }, - // This file was produced using gnu tar 1.17 - // gnutar -b 4 --format=ustar (longname/)*15 + file.txt - { - file: "testdata/ustar.tar", - entries: []*writerTestEntry{ - { - header: &Header{ - Name: strings.Repeat("longname/", 15) + "file.txt", - Mode: 0644, - Uid: 0765, - Gid: 024, - Size: 06, - ModTime: time.Unix(1360135598, 0), - Typeflag: '0', - Uname: "shane", - Gname: "staff", - }, - contents: "hello\n", - }, - }, - }, - // This file was produced using gnu tar 1.26 - // echo "Slartibartfast" > file.txt - // ln file.txt hard.txt - // tar -b 1 --format=ustar -c -f hardlink.tar file.txt hard.txt - { - file: "testdata/hardlink.tar", - entries: []*writerTestEntry{ - { - header: &Header{ - Name: "file.txt", - Mode: 0644, - Uid: 1000, - Gid: 100, - Size: 15, - ModTime: time.Unix(1425484303, 0), - Typeflag: '0', - Uname: "vbatts", - Gname: "users", - }, - contents: "Slartibartfast\n", - }, - { - header: &Header{ - Name: "hard.txt", - Mode: 0644, - Uid: 1000, - Gid: 100, - Size: 0, - ModTime: time.Unix(1425484303, 0), - Typeflag: '1', - Linkname: "file.txt", - Uname: "vbatts", - Gname: "users", - }, - // no contents - }, - }, - }, -} - -// Render byte array in a two-character hexadecimal string, spaced for easy visual inspection. -func bytestr(offset int, b []byte) string { - const rowLen = 32 - s := fmt.Sprintf("%04x ", offset) - for _, ch := range b { - switch { - case '0' <= ch && ch <= '9', 'A' <= ch && ch <= 'Z', 'a' <= ch && ch <= 'z': - s += fmt.Sprintf(" %c", ch) - default: - s += fmt.Sprintf(" %02x", ch) +func bytediff(a, b []byte) string { + const ( + uniqueA = "- " + uniqueB = "+ " + identity = " " + ) + var ss []string + sa := strings.Split(strings.TrimSpace(hex.Dump(a)), "\n") + sb := strings.Split(strings.TrimSpace(hex.Dump(b)), "\n") + for len(sa) > 0 && len(sb) > 0 { + if sa[0] == sb[0] { + ss = append(ss, identity+sa[0]) + } else { + ss = append(ss, uniqueA+sa[0]) + ss = append(ss, uniqueB+sb[0]) } + sa, sb = sa[1:], sb[1:] } - return s -} - -// Render a pseudo-diff between two blocks of bytes. -func bytediff(a []byte, b []byte) string { - const rowLen = 32 - s := fmt.Sprintf("(%d bytes vs. %d bytes)\n", len(a), len(b)) - for offset := 0; len(a)+len(b) > 0; offset += rowLen { - na, nb := rowLen, rowLen - if na > len(a) { - na = len(a) - } - if nb > len(b) { - nb = len(b) - } - sa := bytestr(offset, a[0:na]) - sb := bytestr(offset, b[0:nb]) - if sa != sb { - s += fmt.Sprintf("-%v\n+%v\n", sa, sb) - } - a = a[na:] - b = b[nb:] + for len(sa) > 0 { + ss = append(ss, uniqueA+sa[0]) + sa = sa[1:] } - return s + for len(sb) > 0 { + ss = append(ss, uniqueB+sb[0]) + sb = sb[1:] + } + return strings.Join(ss, "\n") } func TestWriter(t *testing.T) { -testLoop: - for i, test := range writerTests { - expected, err := ioutil.ReadFile(test.file) - if err != nil { - t.Errorf("test %d: Unexpected error: %v", i, err) - continue + type ( + testHeader struct { // WriteHeader(hdr) == wantErr + hdr Header + wantErr error } + testWrite struct { // Write(str) == (wantCnt, wantErr) + str string + wantCnt int + wantErr error + } + testReadFrom struct { // ReadFrom(testFile{ops}) == (wantCnt, wantErr) + ops fileOps + wantCnt int64 + wantErr error + } + testClose struct { // Close() == wantErr + wantErr error + } + testFnc interface{} // testHeader | testWrite | testReadFrom | testClose + ) - buf := new(bytes.Buffer) - tw := NewWriter(iotest.TruncateWriter(buf, 4<<10)) // only catch the first 4 KB - big := false - for j, entry := range test.entries { - big = big || entry.header.Size > 1<<10 - if err := tw.WriteHeader(entry.header); err != nil { - t.Errorf("test %d, entry %d: Failed writing header: %v", i, j, err) - continue testLoop - } - if _, err := io.WriteString(tw, entry.contents); err != nil { - t.Errorf("test %d, entry %d: Failed writing contents: %v", i, j, err) - continue testLoop - } - } - // Only interested in Close failures for the small tests. - if err := tw.Close(); err != nil && !big { - t.Errorf("test %d: Failed closing archive: %v", i, err) - continue testLoop - } + vectors := []struct { + file string // Optional filename of expected output + tests []testFnc + }{{ + // The writer test file was produced with this command: + // tar (GNU tar) 1.26 + // ln -s small.txt link.txt + // tar -b 1 --format=ustar -c -f writer.tar small.txt small2.txt link.txt + file: "testdata/writer.tar", + tests: []testFnc{ + testHeader{Header{ + Typeflag: TypeReg, + Name: "small.txt", + Size: 5, + Mode: 0640, + Uid: 73025, + Gid: 5000, + Uname: "dsymonds", + Gname: "eng", + ModTime: time.Unix(1246508266, 0), + }, nil}, + testWrite{"Kilts", 5, nil}, - actual := buf.Bytes() - if !bytes.Equal(expected, actual) { - t.Errorf("test %d: Incorrect result: (-=expected, +=actual)\n%v", - i, bytediff(expected, actual)) - } - if testing.Short() { // The second test is expensive. - break + testHeader{Header{ + Typeflag: TypeReg, + Name: "small2.txt", + Size: 11, + Mode: 0640, + Uid: 73025, + Uname: "dsymonds", + Gname: "eng", + Gid: 5000, + ModTime: time.Unix(1245217492, 0), + }, nil}, + testWrite{"Google.com\n", 11, nil}, + + testHeader{Header{ + Typeflag: TypeSymlink, + Name: "link.txt", + Linkname: "small.txt", + Mode: 0777, + Uid: 1000, + Gid: 1000, + Uname: "strings", + Gname: "strings", + ModTime: time.Unix(1314603082, 0), + }, nil}, + testWrite{"", 0, nil}, + + testClose{nil}, + }, + }, { + // The truncated test file was produced using these commands: + // dd if=/dev/zero bs=1048576 count=16384 > /tmp/16gig.txt + // tar -b 1 -c -f- /tmp/16gig.txt | dd bs=512 count=8 > writer-big.tar + file: "testdata/writer-big.tar", + tests: []testFnc{ + testHeader{Header{ + Typeflag: TypeReg, + Name: "tmp/16gig.txt", + Size: 16 << 30, + Mode: 0640, + Uid: 73025, + Gid: 5000, + Uname: "dsymonds", + Gname: "eng", + ModTime: time.Unix(1254699560, 0), + Format: FormatGNU, + }, nil}, + }, + }, { + // This truncated file was produced using this library. + // It was verified to work with GNU tar 1.27.1 and BSD tar 3.1.2. + // dd if=/dev/zero bs=1G count=16 >> writer-big-long.tar + // gnutar -xvf writer-big-long.tar + // bsdtar -xvf writer-big-long.tar + // + // This file is in PAX format. + file: "testdata/writer-big-long.tar", + tests: []testFnc{ + testHeader{Header{ + Typeflag: TypeReg, + Name: strings.Repeat("longname/", 15) + "16gig.txt", + Size: 16 << 30, + Mode: 0644, + Uid: 1000, + Gid: 1000, + Uname: "guillaume", + Gname: "guillaume", + ModTime: time.Unix(1399583047, 0), + }, nil}, + }, + }, { + // This file was produced using GNU tar v1.17. + // gnutar -b 4 --format=ustar (longname/)*15 + file.txt + file: "testdata/ustar.tar", + tests: []testFnc{ + testHeader{Header{ + Typeflag: TypeReg, + Name: strings.Repeat("longname/", 15) + "file.txt", + Size: 6, + Mode: 0644, + Uid: 501, + Gid: 20, + Uname: "shane", + Gname: "staff", + ModTime: time.Unix(1360135598, 0), + }, nil}, + testWrite{"hello\n", 6, nil}, + testClose{nil}, + }, + }, { + // This file was produced using GNU tar v1.26: + // echo "Slartibartfast" > file.txt + // ln file.txt hard.txt + // tar -b 1 --format=ustar -c -f hardlink.tar file.txt hard.txt + file: "testdata/hardlink.tar", + tests: []testFnc{ + testHeader{Header{ + Typeflag: TypeReg, + Name: "file.txt", + Size: 15, + Mode: 0644, + Uid: 1000, + Gid: 100, + Uname: "vbatts", + Gname: "users", + ModTime: time.Unix(1425484303, 0), + }, nil}, + testWrite{"Slartibartfast\n", 15, nil}, + + testHeader{Header{ + Typeflag: TypeLink, + Name: "hard.txt", + Linkname: "file.txt", + Mode: 0644, + Uid: 1000, + Gid: 100, + Uname: "vbatts", + Gname: "users", + ModTime: time.Unix(1425484303, 0), + }, nil}, + testWrite{"", 0, nil}, + + testClose{nil}, + }, + }, { + tests: []testFnc{ + testHeader{Header{ + Typeflag: TypeReg, + Name: "bad-null.txt", + Xattrs: map[string]string{"null\x00null\x00": "fizzbuzz"}, + }, headerError{}}, + }, + }, { + tests: []testFnc{ + testHeader{Header{ + Typeflag: TypeReg, + Name: "null\x00.txt", + }, headerError{}}, + }, + }, { + file: "testdata/pax-records.tar", + tests: []testFnc{ + testHeader{Header{ + Typeflag: TypeReg, + Name: "file", + Uname: strings.Repeat("long", 10), + PAXRecords: map[string]string{ + "path": "FILE", // Should be ignored + "GNU.sparse.map": "0,0", // Should be ignored + "comment": "Hello, 世界", + "GOLANG.pkg": "tar", + }, + }, nil}, + testClose{nil}, + }, + }, { + // Craft a theoretically valid PAX archive with global headers. + // The GNU and BSD tar tools do not parse these the same way. + // + // BSD tar v3.1.2 parses and ignores all global headers; + // the behavior is verified by researching the source code. + // + // $ bsdtar -tvf pax-global-records.tar + // ---------- 0 0 0 0 Dec 31 1969 file1 + // ---------- 0 0 0 0 Dec 31 1969 file2 + // ---------- 0 0 0 0 Dec 31 1969 file3 + // ---------- 0 0 0 0 May 13 2014 file4 + // + // GNU tar v1.27.1 applies global headers to subsequent records, + // but does not do the following properly: + // * It does not treat an empty record as deletion. + // * It does not use subsequent global headers to update previous ones. + // + // $ gnutar -tvf pax-global-records.tar + // ---------- 0/0 0 2017-07-13 19:40 global1 + // ---------- 0/0 0 2017-07-13 19:40 file2 + // gnutar: Substituting `.' for empty member name + // ---------- 0/0 0 1969-12-31 16:00 + // gnutar: Substituting `.' for empty member name + // ---------- 0/0 0 2014-05-13 09:53 + // + // According to the PAX specification, this should have been the result: + // ---------- 0/0 0 2017-07-13 19:40 global1 + // ---------- 0/0 0 2017-07-13 19:40 file2 + // ---------- 0/0 0 2017-07-13 19:40 file3 + // ---------- 0/0 0 2014-05-13 09:53 file4 + file: "testdata/pax-global-records.tar", + tests: []testFnc{ + testHeader{Header{ + Typeflag: TypeXGlobalHeader, + PAXRecords: map[string]string{"path": "global1", "mtime": "1500000000.0"}, + }, nil}, + testHeader{Header{ + Typeflag: TypeReg, Name: "file1", + }, nil}, + testHeader{Header{ + Typeflag: TypeReg, + Name: "file2", + PAXRecords: map[string]string{"path": "file2"}, + }, nil}, + testHeader{Header{ + Typeflag: TypeXGlobalHeader, + PAXRecords: map[string]string{"path": ""}, // Should delete "path", but keep "mtime" + }, nil}, + testHeader{Header{ + Typeflag: TypeReg, Name: "file3", + }, nil}, + testHeader{Header{ + Typeflag: TypeReg, + Name: "file4", + ModTime: time.Unix(1400000000, 0), + PAXRecords: map[string]string{"mtime": "1400000000"}, + }, nil}, + testClose{nil}, + }, + }, { + file: "testdata/gnu-utf8.tar", + tests: []testFnc{ + testHeader{Header{ + Typeflag: TypeReg, + Name: "☺☻☹☺☻☹☺☻☹☺☻☹☺☻☹☺☻☹☺☻☹☺☻☹☺☻☹☺☻☹☺☻☹☺☻☹☺☻☹☺☻☹☺☻☹☺☻☹☺☻☹☺☻☹", + Mode: 0644, + Uid: 1000, Gid: 1000, + Uname: "☺", + Gname: "⚹", + ModTime: time.Unix(0, 0), + Format: FormatGNU, + }, nil}, + testClose{nil}, + }, + }, { + file: "testdata/gnu-not-utf8.tar", + tests: []testFnc{ + testHeader{Header{ + Typeflag: TypeReg, + Name: "hi\x80\x81\x82\x83bye", + Mode: 0644, + Uid: 1000, + Gid: 1000, + Uname: "rawr", + Gname: "dsnet", + ModTime: time.Unix(0, 0), + Format: FormatGNU, + }, nil}, + testClose{nil}, + }, + // TODO(dsnet): Re-enable this test when adding sparse support. + // See https://golang.org/issue/22735 + /* + }, { + file: "testdata/gnu-nil-sparse-data.tar", + tests: []testFnc{ + testHeader{Header{ + Typeflag: TypeGNUSparse, + Name: "sparse.db", + Size: 1000, + SparseHoles: []sparseEntry{{Offset: 1000, Length: 0}}, + }, nil}, + testWrite{strings.Repeat("0123456789", 100), 1000, nil}, + testClose{}, + }, + }, { + file: "testdata/gnu-nil-sparse-hole.tar", + tests: []testFnc{ + testHeader{Header{ + Typeflag: TypeGNUSparse, + Name: "sparse.db", + Size: 1000, + SparseHoles: []sparseEntry{{Offset: 0, Length: 1000}}, + }, nil}, + testWrite{strings.Repeat("\x00", 1000), 1000, nil}, + testClose{}, + }, + }, { + file: "testdata/pax-nil-sparse-data.tar", + tests: []testFnc{ + testHeader{Header{ + Typeflag: TypeReg, + Name: "sparse.db", + Size: 1000, + SparseHoles: []sparseEntry{{Offset: 1000, Length: 0}}, + }, nil}, + testWrite{strings.Repeat("0123456789", 100), 1000, nil}, + testClose{}, + }, + }, { + file: "testdata/pax-nil-sparse-hole.tar", + tests: []testFnc{ + testHeader{Header{ + Typeflag: TypeReg, + Name: "sparse.db", + Size: 1000, + SparseHoles: []sparseEntry{{Offset: 0, Length: 1000}}, + }, nil}, + testWrite{strings.Repeat("\x00", 1000), 1000, nil}, + testClose{}, + }, + }, { + file: "testdata/gnu-sparse-big.tar", + tests: []testFnc{ + testHeader{Header{ + Typeflag: TypeGNUSparse, + Name: "gnu-sparse", + Size: 6e10, + SparseHoles: []sparseEntry{ + {Offset: 0e10, Length: 1e10 - 100}, + {Offset: 1e10, Length: 1e10 - 100}, + {Offset: 2e10, Length: 1e10 - 100}, + {Offset: 3e10, Length: 1e10 - 100}, + {Offset: 4e10, Length: 1e10 - 100}, + {Offset: 5e10, Length: 1e10 - 100}, + }, + }, nil}, + testReadFrom{fileOps{ + int64(1e10 - blockSize), + strings.Repeat("\x00", blockSize-100) + strings.Repeat("0123456789", 10), + int64(1e10 - blockSize), + strings.Repeat("\x00", blockSize-100) + strings.Repeat("0123456789", 10), + int64(1e10 - blockSize), + strings.Repeat("\x00", blockSize-100) + strings.Repeat("0123456789", 10), + int64(1e10 - blockSize), + strings.Repeat("\x00", blockSize-100) + strings.Repeat("0123456789", 10), + int64(1e10 - blockSize), + strings.Repeat("\x00", blockSize-100) + strings.Repeat("0123456789", 10), + int64(1e10 - blockSize), + strings.Repeat("\x00", blockSize-100) + strings.Repeat("0123456789", 10), + }, 6e10, nil}, + testClose{nil}, + }, + }, { + file: "testdata/pax-sparse-big.tar", + tests: []testFnc{ + testHeader{Header{ + Typeflag: TypeReg, + Name: "pax-sparse", + Size: 6e10, + SparseHoles: []sparseEntry{ + {Offset: 0e10, Length: 1e10 - 100}, + {Offset: 1e10, Length: 1e10 - 100}, + {Offset: 2e10, Length: 1e10 - 100}, + {Offset: 3e10, Length: 1e10 - 100}, + {Offset: 4e10, Length: 1e10 - 100}, + {Offset: 5e10, Length: 1e10 - 100}, + }, + }, nil}, + testReadFrom{fileOps{ + int64(1e10 - blockSize), + strings.Repeat("\x00", blockSize-100) + strings.Repeat("0123456789", 10), + int64(1e10 - blockSize), + strings.Repeat("\x00", blockSize-100) + strings.Repeat("0123456789", 10), + int64(1e10 - blockSize), + strings.Repeat("\x00", blockSize-100) + strings.Repeat("0123456789", 10), + int64(1e10 - blockSize), + strings.Repeat("\x00", blockSize-100) + strings.Repeat("0123456789", 10), + int64(1e10 - blockSize), + strings.Repeat("\x00", blockSize-100) + strings.Repeat("0123456789", 10), + int64(1e10 - blockSize), + strings.Repeat("\x00", blockSize-100) + strings.Repeat("0123456789", 10), + }, 6e10, nil}, + testClose{nil}, + }, + */ + }, { + file: "testdata/trailing-slash.tar", + tests: []testFnc{ + testHeader{Header{Name: strings.Repeat("123456789/", 30)}, nil}, + testClose{nil}, + }, + }, { + // Automatically promote zero value of Typeflag depending on the name. + file: "testdata/file-and-dir.tar", + tests: []testFnc{ + testHeader{Header{Name: "small.txt", Size: 5}, nil}, + testWrite{"Kilts", 5, nil}, + testHeader{Header{Name: "dir/"}, nil}, + testClose{nil}, + }, + }} + + equalError := func(x, y error) bool { + _, ok1 := x.(headerError) + _, ok2 := y.(headerError) + if ok1 || ok2 { + return ok1 && ok2 } + return x == y + } + for _, v := range vectors { + t.Run(path.Base(v.file), func(t *testing.T) { + const maxSize = 10 << 10 // 10KiB + buf := new(bytes.Buffer) + tw := NewWriter(iotest.TruncateWriter(buf, maxSize)) + + for i, tf := range v.tests { + switch tf := tf.(type) { + case testHeader: + err := tw.WriteHeader(&tf.hdr) + if !equalError(err, tf.wantErr) { + t.Fatalf("test %d, WriteHeader() = %v, want %v", i, err, tf.wantErr) + } + case testWrite: + got, err := tw.Write([]byte(tf.str)) + if got != tf.wantCnt || !equalError(err, tf.wantErr) { + t.Fatalf("test %d, Write() = (%d, %v), want (%d, %v)", i, got, err, tf.wantCnt, tf.wantErr) + } + case testReadFrom: + f := &testFile{ops: tf.ops} + got, err := tw.readFrom(f) + if _, ok := err.(testError); ok { + t.Errorf("test %d, ReadFrom(): %v", i, err) + } else if got != tf.wantCnt || !equalError(err, tf.wantErr) { + t.Errorf("test %d, ReadFrom() = (%d, %v), want (%d, %v)", i, got, err, tf.wantCnt, tf.wantErr) + } + if len(f.ops) > 0 { + t.Errorf("test %d, expected %d more operations", i, len(f.ops)) + } + case testClose: + err := tw.Close() + if !equalError(err, tf.wantErr) { + t.Fatalf("test %d, Close() = %v, want %v", i, err, tf.wantErr) + } + default: + t.Fatalf("test %d, unknown test operation: %T", i, tf) + } + } + + if v.file != "" { + want, err := ioutil.ReadFile(v.file) + if err != nil { + t.Fatalf("ReadFile() = %v, want nil", err) + } + got := buf.Bytes() + if !bytes.Equal(want, got) { + t.Fatalf("incorrect result: (-got +want)\n%v", bytediff(got, want)) + } + } + }) } } @@ -552,33 +818,116 @@ func TestValidTypeflagWithPAXHeader(t *testing.T) { if err != nil { t.Fatalf("Failed to read header: %s", err) } - if header.Typeflag != 0 { - t.Fatalf("Typeflag should've been 0, found %d", header.Typeflag) + if header.Typeflag != TypeReg { + t.Fatalf("Typeflag should've been %d, found %d", TypeReg, header.Typeflag) } } } -func TestWriteAfterClose(t *testing.T) { - var buffer bytes.Buffer - tw := NewWriter(&buffer) +// failOnceWriter fails exactly once and then always reports success. +type failOnceWriter bool - hdr := &Header{ - Name: "small.txt", - Size: 5, - } - if err := tw.WriteHeader(hdr); err != nil { - t.Fatalf("Failed to write header: %s", err) - } - tw.Close() - if _, err := tw.Write([]byte("Kilts")); err != ErrWriteAfterClose { - t.Fatalf("Write: got %v; want ErrWriteAfterClose", err) +func (w *failOnceWriter) Write(b []byte) (int, error) { + if !*w { + return 0, io.ErrShortWrite } + *w = true + return len(b), nil +} + +func TestWriterErrors(t *testing.T) { + t.Run("HeaderOnly", func(t *testing.T) { + tw := NewWriter(new(bytes.Buffer)) + hdr := &Header{Name: "dir/", Typeflag: TypeDir} + if err := tw.WriteHeader(hdr); err != nil { + t.Fatalf("WriteHeader() = %v, want nil", err) + } + if _, err := tw.Write([]byte{0x00}); err != ErrWriteTooLong { + t.Fatalf("Write() = %v, want %v", err, ErrWriteTooLong) + } + }) + + t.Run("NegativeSize", func(t *testing.T) { + tw := NewWriter(new(bytes.Buffer)) + hdr := &Header{Name: "small.txt", Size: -1} + if err := tw.WriteHeader(hdr); err == nil { + t.Fatalf("WriteHeader() = nil, want non-nil error") + } + }) + + t.Run("BeforeHeader", func(t *testing.T) { + tw := NewWriter(new(bytes.Buffer)) + if _, err := tw.Write([]byte("Kilts")); err != ErrWriteTooLong { + t.Fatalf("Write() = %v, want %v", err, ErrWriteTooLong) + } + }) + + t.Run("AfterClose", func(t *testing.T) { + tw := NewWriter(new(bytes.Buffer)) + hdr := &Header{Name: "small.txt"} + if err := tw.WriteHeader(hdr); err != nil { + t.Fatalf("WriteHeader() = %v, want nil", err) + } + if err := tw.Close(); err != nil { + t.Fatalf("Close() = %v, want nil", err) + } + if _, err := tw.Write([]byte("Kilts")); err != ErrWriteAfterClose { + t.Fatalf("Write() = %v, want %v", err, ErrWriteAfterClose) + } + if err := tw.Flush(); err != ErrWriteAfterClose { + t.Fatalf("Flush() = %v, want %v", err, ErrWriteAfterClose) + } + if err := tw.Close(); err != nil { + t.Fatalf("Close() = %v, want nil", err) + } + }) + + t.Run("PrematureFlush", func(t *testing.T) { + tw := NewWriter(new(bytes.Buffer)) + hdr := &Header{Name: "small.txt", Size: 5} + if err := tw.WriteHeader(hdr); err != nil { + t.Fatalf("WriteHeader() = %v, want nil", err) + } + if err := tw.Flush(); err == nil { + t.Fatalf("Flush() = %v, want non-nil error", err) + } + }) + + t.Run("PrematureClose", func(t *testing.T) { + tw := NewWriter(new(bytes.Buffer)) + hdr := &Header{Name: "small.txt", Size: 5} + if err := tw.WriteHeader(hdr); err != nil { + t.Fatalf("WriteHeader() = %v, want nil", err) + } + if err := tw.Close(); err == nil { + t.Fatalf("Close() = %v, want non-nil error", err) + } + }) + + t.Run("Persistence", func(t *testing.T) { + tw := NewWriter(new(failOnceWriter)) + if err := tw.WriteHeader(&Header{}); err != io.ErrShortWrite { + t.Fatalf("WriteHeader() = %v, want %v", err, io.ErrShortWrite) + } + if err := tw.WriteHeader(&Header{Name: "small.txt"}); err == nil { + t.Errorf("WriteHeader() = got %v, want non-nil error", err) + } + if _, err := tw.Write(nil); err == nil { + t.Errorf("Write() = %v, want non-nil error", err) + } + if err := tw.Flush(); err == nil { + t.Errorf("Flush() = %v, want non-nil error", err) + } + if err := tw.Close(); err == nil { + t.Errorf("Close() = %v, want non-nil error", err) + } + }) } func TestSplitUSTARPath(t *testing.T) { - var sr = strings.Repeat + sr := strings.Repeat - var vectors = []struct { + vectors := []struct { input string // Input path prefix string // Expected output prefix suffix string // Expected output suffix @@ -587,17 +936,17 @@ func TestSplitUSTARPath(t *testing.T) { {"", "", "", false}, {"abc", "", "", false}, {"用戶名", "", "", false}, - {sr("a", fileNameSize), "", "", false}, - {sr("a", fileNameSize) + "/", "", "", false}, - {sr("a", fileNameSize) + "/a", sr("a", fileNameSize), "a", true}, - {sr("a", fileNamePrefixSize) + "/", "", "", false}, - {sr("a", fileNamePrefixSize) + "/a", sr("a", fileNamePrefixSize), "a", true}, - {sr("a", fileNameSize+1), "", "", false}, - {sr("/", fileNameSize+1), sr("/", fileNameSize-1), "/", true}, - {sr("a", fileNamePrefixSize) + "/" + sr("b", fileNameSize), - sr("a", fileNamePrefixSize), sr("b", fileNameSize), true}, - {sr("a", fileNamePrefixSize) + "//" + sr("b", fileNameSize), "", "", false}, - {sr("a/", fileNameSize), sr("a/", 77) + "a", sr("a/", 22), true}, + {sr("a", nameSize), "", "", false}, + {sr("a", nameSize) + "/", "", "", false}, + {sr("a", nameSize) + "/a", sr("a", nameSize), "a", true}, + {sr("a", prefixSize) + "/", "", "", false}, + {sr("a", prefixSize) + "/a", sr("a", prefixSize), "a", true}, + {sr("a", nameSize+1), "", "", false}, + {sr("/", nameSize+1), sr("/", nameSize-1), "/", true}, + {sr("a", prefixSize) + "/" + sr("b", nameSize), + sr("a", prefixSize), sr("b", nameSize), true}, + {sr("a", prefixSize) + "//" + sr("b", nameSize), "", "", false}, + {sr("a/", nameSize), sr("a/", 77) + "a", sr("a/", 22), true}, } for _, v := range vectors { @@ -609,114 +958,354 @@ func TestSplitUSTARPath(t *testing.T) { } } -func TestFormatPAXRecord(t *testing.T) { - var medName = strings.Repeat("CD", 50) - var longName = strings.Repeat("AB", 100) - - var vectors = []struct { - inputKey string - inputVal string - output string - }{ - {"k", "v", "6 k=v\n"}, - {"path", "/etc/hosts", "19 path=/etc/hosts\n"}, - {"path", longName, "210 path=" + longName + "\n"}, - {"path", medName, "110 path=" + medName + "\n"}, - {"foo", "ba", "9 foo=ba\n"}, - {"foo", "bar", "11 foo=bar\n"}, - {"foo", "b=\nar=\n==\x00", "18 foo=b=\nar=\n==\x00\n"}, - {"foo", "hello9 foo=ba\nworld", "27 foo=hello9 foo=ba\nworld\n"}, - {"☺☻☹", "日a本b語ç", "27 ☺☻☹=日a本b語ç\n"}, - {"\x00hello", "\x00world", "17 \x00hello=\x00world\n"}, +// TestIssue12594 tests that the Writer does not attempt to populate the prefix +// field when encoding a header in the GNU format. The prefix field is valid +// in USTAR and PAX, but not GNU. +func TestIssue12594(t *testing.T) { + names := []string{ + "0/1/2/3/4/5/6/7/8/9/10/11/12/13/14/15/16/17/18/19/20/21/22/23/24/25/26/27/28/29/30/file.txt", + "0/1/2/3/4/5/6/7/8/9/10/11/12/13/14/15/16/17/18/19/20/21/22/23/24/25/26/27/28/29/30/31/32/33/file.txt", + "0/1/2/3/4/5/6/7/8/9/10/11/12/13/14/15/16/17/18/19/20/21/22/23/24/25/26/27/28/29/30/31/32/333/file.txt", + "0/1/2/3/4/5/6/7/8/9/10/11/12/13/14/15/16/17/18/19/20/21/22/23/24/25/26/27/28/29/30/31/32/33/34/35/36/37/38/39/40/file.txt", + "0000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000/file.txt", + "/home/support/.openoffice.org/3/user/uno_packages/cache/registry/com.sun.star.comp.deployment.executable.PackageRegistryBackend", } - for _, v := range vectors { - output := formatPAXRecord(v.inputKey, v.inputVal) - if output != v.output { - t.Errorf("formatPAXRecord(%q, %q): got %q, want %q", - v.inputKey, v.inputVal, output, v.output) + for i, name := range names { + var b bytes.Buffer + + tw := NewWriter(&b) + if err := tw.WriteHeader(&Header{ + Name: name, + Uid: 1 << 25, // Prevent USTAR format + }); err != nil { + t.Errorf("test %d, unexpected WriteHeader error: %v", i, err) + } + if err := tw.Close(); err != nil { + t.Errorf("test %d, unexpected Close error: %v", i, err) + } + + // The prefix field should never appear in the GNU format. + var blk block + copy(blk[:], b.Bytes()) + prefix := string(blk.USTAR().Prefix()) + if i := strings.IndexByte(prefix, 0); i >= 0 { + prefix = prefix[:i] // Truncate at the NUL terminator + } + if blk.GetFormat() == FormatGNU && len(prefix) > 0 && strings.HasPrefix(name, prefix) { + t.Errorf("test %d, found prefix in GNU format: %s", i, prefix) + } + + tr := NewReader(&b) + hdr, err := tr.Next() + if err != nil { + t.Errorf("test %d, unexpected Next error: %v", i, err) + } + if hdr.Name != name { + t.Errorf("test %d, hdr.Name = %s, want %s", i, hdr.Name, name) } } } -func TestFitsInBase256(t *testing.T) { - var vectors = []struct { - input int64 - width int - ok bool - }{ - {+1, 8, true}, - {0, 8, true}, - {-1, 8, true}, - {1 << 56, 8, false}, - {(1 << 56) - 1, 8, true}, - {-1 << 56, 8, true}, - {(-1 << 56) - 1, 8, false}, - {121654, 8, true}, - {-9849849, 8, true}, - {math.MaxInt64, 9, true}, - {0, 9, true}, - {math.MinInt64, 9, true}, - {math.MaxInt64, 12, true}, - {0, 12, true}, - {math.MinInt64, 12, true}, - } +// testNonEmptyWriter wraps an io.Writer and ensures that +// Write is never called with an empty buffer. +type testNonEmptyWriter struct{ io.Writer } - for _, v := range vectors { - ok := fitsInBase256(v.width, v.input) - if ok != v.ok { - t.Errorf("checkNumeric(%d, %d): got %v, want %v", v.input, v.width, ok, v.ok) - } +func (w testNonEmptyWriter) Write(b []byte) (int, error) { + if len(b) == 0 { + return 0, errors.New("unexpected empty Write call") } + return w.Writer.Write(b) } -func TestFormatNumeric(t *testing.T) { - var vectors = []struct { - input int64 - output string - ok bool - }{ - // Test base-256 (binary) encoded values. - {-1, "\xff", true}, - {-1, "\xff\xff", true}, - {-1, "\xff\xff\xff", true}, - {(1 << 0), "0", false}, - {(1 << 8) - 1, "\x80\xff", true}, - {(1 << 8), "0\x00", false}, - {(1 << 16) - 1, "\x80\xff\xff", true}, - {(1 << 16), "00\x00", false}, - {-1 * (1 << 0), "\xff", true}, - {-1*(1<<0) - 1, "0", false}, - {-1 * (1 << 8), "\xff\x00", true}, - {-1*(1<<8) - 1, "0\x00", false}, - {-1 * (1 << 16), "\xff\x00\x00", true}, - {-1*(1<<16) - 1, "00\x00", false}, - {537795476381659745, "0000000\x00", false}, - {537795476381659745, "\x80\x00\x00\x00\x07\x76\xa2\x22\xeb\x8a\x72\x61", true}, - {-615126028225187231, "0000000\x00", false}, - {-615126028225187231, "\xff\xff\xff\xff\xf7\x76\xa2\x22\xeb\x8a\x72\x61", true}, - {math.MaxInt64, "0000000\x00", false}, - {math.MaxInt64, "\x80\x00\x00\x00\x7f\xff\xff\xff\xff\xff\xff\xff", true}, - {math.MinInt64, "0000000\x00", false}, - {math.MinInt64, "\xff\xff\xff\xff\x80\x00\x00\x00\x00\x00\x00\x00", true}, - {math.MaxInt64, "\x80\x7f\xff\xff\xff\xff\xff\xff\xff", true}, - {math.MinInt64, "\xff\x80\x00\x00\x00\x00\x00\x00\x00", true}, - } +func TestFileWriter(t *testing.T) { + type ( + testWrite struct { // Write(str) == (wantCnt, wantErr) + str string + wantCnt int + wantErr error + } + testReadFrom struct { // ReadFrom(testFile{ops}) == (wantCnt, wantErr) + ops fileOps + wantCnt int64 + wantErr error + } + testRemaining struct { // LogicalRemaining() == wantLCnt, PhysicalRemaining() == wantPCnt + wantLCnt int64 + wantPCnt int64 + } + testFnc interface{} // testWrite | testReadFrom | testRemaining + ) - for _, v := range vectors { - var f formatter - output := make([]byte, len(v.output)) - f.formatNumeric(output, v.input) - ok := (f.err == nil) - if ok != v.ok { - if v.ok { - t.Errorf("formatNumeric(%d): got formatting failure, want success", v.input) - } else { - t.Errorf("formatNumeric(%d): got formatting success, want failure", v.input) + type ( + makeReg struct { + size int64 + wantStr string + } + makeSparse struct { + makeReg makeReg + sph sparseHoles + size int64 + } + fileMaker interface{} // makeReg | makeSparse + ) + + vectors := []struct { + maker fileMaker + tests []testFnc + }{{ + maker: makeReg{0, ""}, + tests: []testFnc{ + testRemaining{0, 0}, + testWrite{"", 0, nil}, + testWrite{"a", 0, ErrWriteTooLong}, + testReadFrom{fileOps{""}, 0, nil}, + testReadFrom{fileOps{"a"}, 0, ErrWriteTooLong}, + testRemaining{0, 0}, + }, + }, { + maker: makeReg{1, "a"}, + tests: []testFnc{ + testRemaining{1, 1}, + testWrite{"", 0, nil}, + testWrite{"a", 1, nil}, + testWrite{"bcde", 0, ErrWriteTooLong}, + testWrite{"", 0, nil}, + testReadFrom{fileOps{""}, 0, nil}, + testReadFrom{fileOps{"a"}, 0, ErrWriteTooLong}, + testRemaining{0, 0}, + }, + }, { + maker: makeReg{5, "hello"}, + tests: []testFnc{ + testRemaining{5, 5}, + testWrite{"hello", 5, nil}, + testRemaining{0, 0}, + }, + }, { + maker: makeReg{5, "\x00\x00\x00\x00\x00"}, + tests: []testFnc{ + testRemaining{5, 5}, + testReadFrom{fileOps{"\x00\x00\x00\x00\x00"}, 5, nil}, + testRemaining{0, 0}, + }, + }, { + maker: makeReg{5, "\x00\x00\x00\x00\x00"}, + tests: []testFnc{ + testRemaining{5, 5}, + testReadFrom{fileOps{"\x00\x00\x00\x00\x00extra"}, 5, ErrWriteTooLong}, + testRemaining{0, 0}, + }, + }, { + maker: makeReg{5, "abc\x00\x00"}, + tests: []testFnc{ + testRemaining{5, 5}, + testWrite{"abc", 3, nil}, + testRemaining{2, 2}, + testReadFrom{fileOps{"\x00\x00"}, 2, nil}, + testRemaining{0, 0}, + }, + }, { + maker: makeReg{5, "\x00\x00abc"}, + tests: []testFnc{ + testRemaining{5, 5}, + testWrite{"\x00\x00", 2, nil}, + testRemaining{3, 3}, + testWrite{"abc", 3, nil}, + testReadFrom{fileOps{"z"}, 0, ErrWriteTooLong}, + testWrite{"z", 0, ErrWriteTooLong}, + testRemaining{0, 0}, + }, + }, { + maker: makeSparse{makeReg{5, "abcde"}, sparseHoles{{2, 3}}, 8}, + tests: []testFnc{ + testRemaining{8, 5}, + testWrite{"ab\x00\x00\x00cde", 8, nil}, + testWrite{"a", 0, ErrWriteTooLong}, + testRemaining{0, 0}, + }, + }, { + maker: makeSparse{makeReg{5, "abcde"}, sparseHoles{{2, 3}}, 8}, + tests: []testFnc{ + testWrite{"ab\x00\x00\x00cdez", 8, ErrWriteTooLong}, + testRemaining{0, 0}, + }, + }, { + maker: makeSparse{makeReg{5, "abcde"}, sparseHoles{{2, 3}}, 8}, + tests: []testFnc{ + testWrite{"ab\x00", 3, nil}, + testRemaining{5, 3}, + testWrite{"\x00\x00cde", 5, nil}, + testWrite{"a", 0, ErrWriteTooLong}, + testRemaining{0, 0}, + }, + }, { + maker: makeSparse{makeReg{5, "abcde"}, sparseHoles{{2, 3}}, 8}, + tests: []testFnc{ + testWrite{"ab", 2, nil}, + testRemaining{6, 3}, + testReadFrom{fileOps{int64(3), "cde"}, 6, nil}, + testRemaining{0, 0}, + }, + }, { + maker: makeSparse{makeReg{5, "abcde"}, sparseHoles{{2, 3}}, 8}, + tests: []testFnc{ + testReadFrom{fileOps{"ab", int64(3), "cde"}, 8, nil}, + testRemaining{0, 0}, + }, + }, { + maker: makeSparse{makeReg{5, "abcde"}, sparseHoles{{2, 3}}, 8}, + tests: []testFnc{ + testReadFrom{fileOps{"ab", int64(3), "cdeX"}, 8, ErrWriteTooLong}, + testRemaining{0, 0}, + }, + }, { + maker: makeSparse{makeReg{4, "abcd"}, sparseHoles{{2, 3}}, 8}, + tests: []testFnc{ + testReadFrom{fileOps{"ab", int64(3), "cd"}, 7, io.ErrUnexpectedEOF}, + testRemaining{1, 0}, + }, + }, { + maker: makeSparse{makeReg{4, "abcd"}, sparseHoles{{2, 3}}, 8}, + tests: []testFnc{ + testReadFrom{fileOps{"ab", int64(3), "cde"}, 7, errMissData}, + testRemaining{1, 0}, + }, + }, { + maker: makeSparse{makeReg{6, "abcde"}, sparseHoles{{2, 3}}, 8}, + tests: []testFnc{ + testReadFrom{fileOps{"ab", int64(3), "cde"}, 8, errUnrefData}, + testRemaining{0, 1}, + }, + }, { + maker: makeSparse{makeReg{4, "abcd"}, sparseHoles{{2, 3}}, 8}, + tests: []testFnc{ + testWrite{"ab", 2, nil}, + testRemaining{6, 2}, + testWrite{"\x00\x00\x00", 3, nil}, + testRemaining{3, 2}, + testWrite{"cde", 2, errMissData}, + testRemaining{1, 0}, + }, + }, { + maker: makeSparse{makeReg{6, "abcde"}, sparseHoles{{2, 3}}, 8}, + tests: []testFnc{ + testWrite{"ab", 2, nil}, + testRemaining{6, 4}, + testWrite{"\x00\x00\x00", 3, nil}, + testRemaining{3, 4}, + testWrite{"cde", 3, errUnrefData}, + testRemaining{0, 1}, + }, + }, { + maker: makeSparse{makeReg{3, "abc"}, sparseHoles{{0, 2}, {5, 2}}, 7}, + tests: []testFnc{ + testRemaining{7, 3}, + testWrite{"\x00\x00abc\x00\x00", 7, nil}, + testRemaining{0, 0}, + }, + }, { + maker: makeSparse{makeReg{3, "abc"}, sparseHoles{{0, 2}, {5, 2}}, 7}, + tests: []testFnc{ + testRemaining{7, 3}, + testReadFrom{fileOps{int64(2), "abc", int64(1), "\x00"}, 7, nil}, + testRemaining{0, 0}, + }, + }, { + maker: makeSparse{makeReg{3, ""}, sparseHoles{{0, 2}, {5, 2}}, 7}, + tests: []testFnc{ + testWrite{"abcdefg", 0, errWriteHole}, + }, + }, { + maker: makeSparse{makeReg{3, "abc"}, sparseHoles{{0, 2}, {5, 2}}, 7}, + tests: []testFnc{ + testWrite{"\x00\x00abcde", 5, errWriteHole}, + }, + }, { + maker: makeSparse{makeReg{3, "abc"}, sparseHoles{{0, 2}, {5, 2}}, 7}, + tests: []testFnc{ + testWrite{"\x00\x00abc\x00\x00z", 7, ErrWriteTooLong}, + testRemaining{0, 0}, + }, + }, { + maker: makeSparse{makeReg{3, "abc"}, sparseHoles{{0, 2}, {5, 2}}, 7}, + tests: []testFnc{ + testWrite{"\x00\x00", 2, nil}, + testRemaining{5, 3}, + testWrite{"abc", 3, nil}, + testRemaining{2, 0}, + testWrite{"\x00\x00", 2, nil}, + testRemaining{0, 0}, + }, + }, { + maker: makeSparse{makeReg{2, "ab"}, sparseHoles{{0, 2}, {5, 2}}, 7}, + tests: []testFnc{ + testWrite{"\x00\x00", 2, nil}, + testWrite{"abc", 2, errMissData}, + testWrite{"\x00\x00", 0, errMissData}, + }, + }, { + maker: makeSparse{makeReg{4, "abc"}, sparseHoles{{0, 2}, {5, 2}}, 7}, + tests: []testFnc{ + testWrite{"\x00\x00", 2, nil}, + testWrite{"abc", 3, nil}, + testWrite{"\x00\x00", 2, errUnrefData}, + }, + }} + + for i, v := range vectors { + var wantStr string + bb := new(bytes.Buffer) + w := testNonEmptyWriter{bb} + var fw fileWriter + switch maker := v.maker.(type) { + case makeReg: + fw = ®FileWriter{w, maker.size} + wantStr = maker.wantStr + case makeSparse: + if !validateSparseEntries(maker.sph, maker.size) { + t.Fatalf("invalid sparse map: %v", maker.sph) + } + spd := invertSparseEntries(maker.sph, maker.size) + fw = ®FileWriter{w, maker.makeReg.size} + fw = &sparseFileWriter{fw, spd, 0} + wantStr = maker.makeReg.wantStr + default: + t.Fatalf("test %d, unknown make operation: %T", i, maker) + } + + for j, tf := range v.tests { + switch tf := tf.(type) { + case testWrite: + got, err := fw.Write([]byte(tf.str)) + if got != tf.wantCnt || err != tf.wantErr { + t.Errorf("test %d.%d, Write(%s):\ngot (%d, %v)\nwant (%d, %v)", i, j, tf.str, got, err, tf.wantCnt, tf.wantErr) + } + case testReadFrom: + f := &testFile{ops: tf.ops} + got, err := fw.ReadFrom(f) + if _, ok := err.(testError); ok { + t.Errorf("test %d.%d, ReadFrom(): %v", i, j, err) + } else if got != tf.wantCnt || err != tf.wantErr { + t.Errorf("test %d.%d, ReadFrom() = (%d, %v), want (%d, %v)", i, j, got, err, tf.wantCnt, tf.wantErr) + } + if len(f.ops) > 0 { + t.Errorf("test %d.%d, expected %d more operations", i, j, len(f.ops)) + } + case testRemaining: + if got := fw.LogicalRemaining(); got != tf.wantLCnt { + t.Errorf("test %d.%d, LogicalRemaining() = %d, want %d", i, j, got, tf.wantLCnt) + } + if got := fw.PhysicalRemaining(); got != tf.wantPCnt { + t.Errorf("test %d.%d, PhysicalRemaining() = %d, want %d", i, j, got, tf.wantPCnt) + } + default: + t.Fatalf("test %d.%d, unknown test operation: %T", i, j, tf) } } - if string(output) != v.output { - t.Errorf("formatNumeric(%d): got %q, want %q", v.input, output, v.output) + + if got := bb.String(); got != wantStr { + t.Fatalf("test %d, String() = %q, want %q", i, got, wantStr) } } } From 9a95e026024d3929928021244d72e59f758616b3 Mon Sep 17 00:00:00 2001 From: Kir Kolyshkin Date: Wed, 5 Sep 2018 13:37:46 -0700 Subject: [PATCH 2/4] archive/tar: port RawHeader() changes This is a port of commits adding RawHeader() to go-1.11 archive/tar. In addition: * simplify the rawBytes.Write() code in readHeader() * ignore errors from rawBytes.Write(), as (at least for go-1.11) it never returns an error, only panics (if the buffer grew too large) Also, remove the internal/testenv from tar_tar.go to enable go test. As working symlink detection is non-trivial on Windows, just skip the test on that platform. In addition to `go test`, I did some minimal manual testing, and it seems this code creates tar-data.json.gz which is identical to the one made by the old version. Signed-off-by: Kir Kolyshkin --- archive/tar/reader.go | 79 +++++++++++++++++++++++++++++++++++++---- archive/tar/tar_test.go | 8 +++-- 2 files changed, 77 insertions(+), 10 deletions(-) diff --git a/archive/tar/reader.go b/archive/tar/reader.go index 3943718..ea64a38 100644 --- a/archive/tar/reader.go +++ b/archive/tar/reader.go @@ -26,6 +26,9 @@ type Reader struct { // It is only the responsibility of every exported method of Reader to // ensure that this error is sticky. err error + + RawAccounting bool // Whether to enable the access needed to reassemble the tar from raw bytes. Some performance/memory hit for this. + rawBytes *bytes.Buffer // last raw bits } type fileReader interface { @@ -35,6 +38,25 @@ type fileReader interface { WriteTo(io.Writer) (int64, error) } +// RawBytes accesses the raw bytes of the archive, apart from the file payload itself. +// This includes the header and padding. +// +// This call resets the current rawbytes buffer +// +// Only when RawAccounting is enabled, otherwise this returns nil +func (tr *Reader) RawBytes() []byte { + if !tr.RawAccounting { + return nil + } + if tr.rawBytes == nil { + tr.rawBytes = bytes.NewBuffer(nil) + } + defer tr.rawBytes.Reset() // if we've read them, then flush them. + + return tr.rawBytes.Bytes() + +} + // NewReader creates a new Reader reading from r. func NewReader(r io.Reader) *Reader { return &Reader{r: r, curr: ®FileReader{r, 0}} @@ -58,6 +80,14 @@ func (tr *Reader) next() (*Header, error) { var paxHdrs map[string]string var gnuLongName, gnuLongLink string + if tr.RawAccounting { + if tr.rawBytes == nil { + tr.rawBytes = bytes.NewBuffer(nil) + } else { + tr.rawBytes.Reset() + } + } + // Externally, Next iterates through the tar archive as if it is a series of // files. Internally, the tar format often uses fake "files" to add meta // data that describes the next file. These meta data "files" should not @@ -66,12 +96,16 @@ func (tr *Reader) next() (*Header, error) { format := FormatUSTAR | FormatPAX | FormatGNU for { // Discard the remainder of the file and any padding. - if err := discard(tr.r, tr.curr.PhysicalRemaining()); err != nil { + if err := discard(tr, tr.curr.PhysicalRemaining()); err != nil { return nil, err } - if _, err := tryReadFull(tr.r, tr.blk[:tr.pad]); err != nil { + n, err := tryReadFull(tr.r, tr.blk[:tr.pad]) + if err != nil { return nil, err } + if tr.RawAccounting { + tr.rawBytes.Write(tr.blk[:n]) + } tr.pad = 0 hdr, rawHdr, err := tr.readHeader() @@ -109,6 +143,10 @@ func (tr *Reader) next() (*Header, error) { return nil, err } + if tr.RawAccounting { + tr.rawBytes.Write(realname) + } + var p parser switch hdr.Typeflag { case TypeGNULongName: @@ -298,6 +336,12 @@ func parsePAX(r io.Reader) (map[string]string, error) { if err != nil { return nil, err } + // leaving this function for io.Reader makes it more testable + if tr, ok := r.(*Reader); ok && tr.RawAccounting { + if _, err = tr.rawBytes.Write(buf); err != nil { + return nil, err + } + } sbuf := string(buf) // For GNU PAX sparse format 0.0 support. @@ -342,11 +386,20 @@ func parsePAX(r io.Reader) (map[string]string, error) { // * At least 2 blocks of zeros are read. func (tr *Reader) readHeader() (*Header, *block, error) { // Two blocks of zero bytes marks the end of the archive. - if _, err := io.ReadFull(tr.r, tr.blk[:]); err != nil { + n, err := io.ReadFull(tr.r, tr.blk[:]) + if tr.RawAccounting && (err == nil || err == io.EOF) { + tr.rawBytes.Write(tr.blk[:n]) + } + if err != nil { return nil, nil, err // EOF is okay here; exactly 0 bytes read } + if bytes.Equal(tr.blk[:], zeroBlock[:]) { - if _, err := io.ReadFull(tr.r, tr.blk[:]); err != nil { + n, err = io.ReadFull(tr.r, tr.blk[:]) + if tr.RawAccounting && (err == nil || err == io.EOF) { + tr.rawBytes.Write(tr.blk[:n]) + } + if err != nil { return nil, nil, err // EOF is okay here; exactly 1 block of zeros read } if bytes.Equal(tr.blk[:], zeroBlock[:]) { @@ -497,6 +550,9 @@ func (tr *Reader) readOldGNUSparseMap(hdr *Header, blk *block) (sparseDatas, err if _, err := mustReadFull(tr.r, blk[:]); err != nil { return nil, err } + if tr.RawAccounting { + tr.rawBytes.Write(blk[:]) + } s = blk.Sparse() continue } @@ -828,12 +884,20 @@ func tryReadFull(r io.Reader, b []byte) (n int, err error) { } // discard skips n bytes in r, reporting an error if unable to do so. -func discard(r io.Reader, n int64) error { +func discard(tr *Reader, n int64) error { + var seekSkipped, copySkipped int64 + var err error + r := tr.r + if tr.RawAccounting { + + copySkipped, err = io.CopyN(tr.rawBytes, tr.r, n) + goto out + } + // If possible, Seek to the last byte before the end of the data section. // Do this because Seek is often lazy about reporting errors; this will mask // the fact that the stream may be truncated. We can rely on the // io.CopyN done shortly afterwards to trigger any IO errors. - var seekSkipped int64 // Number of bytes skipped via Seek if sr, ok := r.(io.Seeker); ok && n > 1 { // Not all io.Seeker can actually Seek. For example, os.Stdin implements // io.Seeker, but calling Seek always returns an error and performs @@ -850,7 +914,8 @@ func discard(r io.Reader, n int64) error { } } - copySkipped, err := io.CopyN(ioutil.Discard, r, n-seekSkipped) + copySkipped, err = io.CopyN(ioutil.Discard, r, n-seekSkipped) +out: if err == io.EOF && seekSkipped+copySkipped < n { err = io.ErrUnexpectedEOF } diff --git a/archive/tar/tar_test.go b/archive/tar/tar_test.go index 2676853..6227e24 100644 --- a/archive/tar/tar_test.go +++ b/archive/tar/tar_test.go @@ -8,7 +8,6 @@ import ( "bytes" "errors" "fmt" - "internal/testenv" "io" "io/ioutil" "math" @@ -16,6 +15,7 @@ import ( "path" "path/filepath" "reflect" + "runtime" "strings" "testing" "time" @@ -260,8 +260,10 @@ func TestFileInfoHeaderDir(t *testing.T) { } func TestFileInfoHeaderSymlink(t *testing.T) { - testenv.MustHaveSymlink(t) - + switch runtime.GOOS { + case "android", "nacl", "plan9", "windows": + t.Skip("symlinks not supported") + } tmpdir, err := ioutil.TempDir("", "TestFileInfoHeaderSymlink") if err != nil { t.Fatal(err) From a555806af98473213fd3bf93ab352bf7dacb4e03 Mon Sep 17 00:00:00 2001 From: Kir Kolyshkin Date: Wed, 5 Sep 2018 17:08:01 -0700 Subject: [PATCH 3/4] README.md: update The fork of archive/tar is now from go-1.11. Signed-off-by: Kir Kolyshkin --- README.md | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/README.md b/README.md index 03e3ec4..fe997f6 100644 --- a/README.md +++ b/README.md @@ -67,7 +67,7 @@ Do not break the API of stdlib `archive/tar` in our fork (ideally find an upstre ## Std Version -The version of golang stdlib `archive/tar` is from go1.6 +The version of golang stdlib `archive/tar` is from go1.11 It is minimally extended to expose the raw bytes of the TAR, rather than just the marshalled headers and file stream. @@ -135,4 +135,3 @@ bytes-per-file rate for the storage implications. ## License See [LICENSE](LICENSE) - From 3a386a2750853734ae14d8d7416e73e2b334071d Mon Sep 17 00:00:00 2001 From: Kir Kolyshkin Date: Wed, 5 Sep 2018 17:04:57 -0700 Subject: [PATCH 4/4] travis: update golang versions Now when golang 1.11 is out, 1.9 and older versions are no longer supported. More to say, since the archive/tar is from go-1.11, it uses some features from new Go versions (strings.Builder and sync.Map) not supported by anything older than Go 1.10. Signed-off-by: Kir Kolyshkin --- .travis.yml | 7 ++----- 1 file changed, 2 insertions(+), 5 deletions(-) diff --git a/.travis.yml b/.travis.yml index dcce57a..d6474eb 100644 --- a/.travis.yml +++ b/.travis.yml @@ -1,11 +1,8 @@ language: go go: - tip - - 1.x - - 1.8.x - - 1.7.x - - 1.6.x - - 1.5.x + - 1.11.x + - 1.10.x # let us have pretty, fast Docker-based Travis workers! sudo: false