diff --git a/archive/tar/common.go b/archive/tar/common.go index c31df06..36f4e23 100644 --- a/archive/tar/common.go +++ b/archive/tar/common.go @@ -327,3 +327,14 @@ func toASCII(s string) string { } return buf.String() } + +// isHeaderOnlyType checks if the given type flag is of the type that has no +// data section even if a size is specified. +func isHeaderOnlyType(flag byte) bool { + switch flag { + case TypeLink, TypeSymlink, TypeChar, TypeBlock, TypeDir, TypeFifo: + return true + default: + return false + } +} diff --git a/archive/tar/example_test.go b/archive/tar/example_test.go index 2317f44..5f0ce2f 100644 --- a/archive/tar/example_test.go +++ b/archive/tar/example_test.go @@ -26,7 +26,7 @@ func Example() { }{ {"readme.txt", "This archive contains some text files."}, {"gopher.txt", "Gopher names:\nGeorge\nGeoffrey\nGonzo"}, - {"todo.txt", "Get animal handling licence."}, + {"todo.txt", "Get animal handling license."}, } for _, file := range files { hdr := &tar.Header{ @@ -76,5 +76,5 @@ func Example() { // Geoffrey // Gonzo // Contents of todo.txt: - // Get animal handling licence. + // Get animal handling license. } diff --git a/archive/tar/reader.go b/archive/tar/reader.go index 4168ea2..a8b63a2 100644 --- a/archive/tar/reader.go +++ b/archive/tar/reader.go @@ -12,6 +12,7 @@ import ( "errors" "io" "io/ioutil" + "math" "os" "strconv" "strings" @@ -39,6 +40,10 @@ type Reader struct { rawBytes *bytes.Buffer // last raw bits } +type parser struct { + err error // Last error seen +} + // RawBytes accesses the raw bytes of the archive, apart from the file payload itself. // This includes the header and padding. // @@ -70,12 +75,36 @@ type regFileReader struct { nb int64 // number of unread bytes for current file entry } -// A sparseFileReader is a numBytesReader for reading sparse file data from a tar archive. +// A sparseFileReader is a numBytesReader for reading sparse file data from a +// tar archive. type sparseFileReader struct { - rfr *regFileReader // reads the sparse-encoded file data - sp []sparseEntry // the sparse map for the file - pos int64 // keeps track of file position - tot int64 // total size of the file + rfr numBytesReader // Reads the sparse-encoded file data + sp []sparseEntry // The sparse map for the file + pos int64 // Keeps track of file position + total int64 // Total size of the file +} + +// A sparseEntry holds a single entry in a sparse file's sparse map. +// +// Sparse files are represented using a series of sparseEntrys. +// Despite the name, a sparseEntry represents an actual data fragment that +// references data found in the underlying archive stream. All regions not +// covered by a sparseEntry are logically filled with zeros. +// +// For example, if the underlying raw file contains the 10-byte data: +// var compactData = "abcdefgh" +// +// And the sparse map has the following entries: +// var sp = []sparseEntry{ +// {offset: 2, numBytes: 5} // Data fragment for [2..7] +// {offset: 18, numBytes: 3} // Data fragment for [18..21] +// } +// +// Then the content of the resulting sparse file with a "real" size of 25 is: +// var sparseData = "\x00"*2 + "abcde" + "\x00"*11 + "fgh" + "\x00"*4 +type sparseEntry struct { + offset int64 // Starting position of the fragment + numBytes int64 // Length of the fragment } // Keywords for GNU sparse files in a PAX extended header @@ -109,7 +138,6 @@ func NewReader(r io.Reader) *Reader { return &Reader{r: r} } // // io.EOF is returned at the end of the input. func (tr *Reader) Next() (*Header, error) { - var hdr *Header if tr.RawAccounting { if tr.rawBytes == nil { tr.rawBytes = bytes.NewBuffer(nil) @@ -117,98 +145,88 @@ func (tr *Reader) Next() (*Header, error) { tr.rawBytes.Reset() } } - if tr.err == nil { - tr.skipUnread() - } + if tr.err != nil { - return hdr, tr.err + return nil, tr.err } - hdr = tr.readHeader() - if hdr == nil { - return hdr, tr.err - } - // Check for PAX/GNU header. - switch hdr.Typeflag { - case TypeXHeader: - // PAX extended header - headers, err := parsePAX(tr) - if err != nil { - return nil, err - } - // We actually read the whole file, - // but this skips alignment padding - tr.skipUnread() + + var hdr *Header + var extHdrs map[string]string + + // Externally, Next iterates through the tar archive as if it is a series of + // files. Internally, the tar format often uses fake "files" to add meta + // data that describes the next file. These meta data "files" should not + // normally be visible to the outside. As such, this loop iterates through + // one or more "header files" until it finds a "normal file". +loop: + for { + tr.err = tr.skipUnread() if tr.err != nil { return nil, tr.err } + hdr = tr.readHeader() - if hdr == nil { + if tr.err != nil { return nil, tr.err } - mergePAX(hdr, headers) + // Check for PAX/GNU special headers and files. + switch hdr.Typeflag { + case TypeXHeader: + extHdrs, tr.err = parsePAX(tr) + if tr.err != nil { + return nil, tr.err + } + continue loop // This is a meta header affecting the next header + case TypeGNULongName, TypeGNULongLink: + var realname []byte + realname, tr.err = ioutil.ReadAll(tr) + if tr.err != nil { + return nil, tr.err + } - // Check for a PAX format sparse file - sp, err := tr.checkForGNUSparsePAXHeaders(hdr, headers) - if err != nil { - tr.err = err - return nil, err - } - if sp != nil { - // Current file is a PAX format GNU sparse file. - // Set the current file reader to a sparse file reader. - tr.curr = &sparseFileReader{rfr: tr.curr.(*regFileReader), sp: sp, tot: hdr.Size} - } - return hdr, nil - case TypeGNULongName: - // We have a GNU long name header. Its contents are the real file name. - realname, err := ioutil.ReadAll(tr) - if err != nil { - return nil, err - } - var buf []byte - if tr.RawAccounting { - if _, err = tr.rawBytes.Write(realname); err != nil { + if tr.RawAccounting { + if _, tr.err = tr.rawBytes.Write(realname); tr.err != nil { + return nil, tr.err + } + } + + // Convert GNU extensions to use PAX headers. + if extHdrs == nil { + extHdrs = make(map[string]string) + } + var p parser + switch hdr.Typeflag { + case TypeGNULongName: + extHdrs[paxPath] = p.parseString(realname) + case TypeGNULongLink: + extHdrs[paxLinkpath] = p.parseString(realname) + } + if p.err != nil { + tr.err = p.err + return nil, tr.err + } + continue loop // This is a meta header affecting the next header + default: + mergePAX(hdr, extHdrs) + + // Check for a PAX format sparse file + sp, err := tr.checkForGNUSparsePAXHeaders(hdr, extHdrs) + if err != nil { + tr.err = err return nil, err } - buf = make([]byte, tr.rawBytes.Len()) - copy(buf[:], tr.RawBytes()) - } - hdr, err := tr.Next() - // since the above call to Next() resets the buffer, we need to throw the bytes over - if tr.RawAccounting { - buf = append(buf, tr.RawBytes()...) - if _, err = tr.rawBytes.Write(buf); err != nil { - return nil, err + if sp != nil { + // Current file is a PAX format GNU sparse file. + // Set the current file reader to a sparse file reader. + tr.curr, tr.err = newSparseFileReader(tr.curr, sp, hdr.Size) + if tr.err != nil { + return nil, tr.err + } } + break loop // This is a file, so stop } - hdr.Name = cString(realname) - return hdr, err - case TypeGNULongLink: - // We have a GNU long link header. - realname, err := ioutil.ReadAll(tr) - if err != nil { - return nil, err - } - var buf []byte - if tr.RawAccounting { - if _, err = tr.rawBytes.Write(realname); err != nil { - return nil, err - } - buf = make([]byte, tr.rawBytes.Len()) - copy(buf[:], tr.RawBytes()) - } - hdr, err := tr.Next() - // since the above call to Next() resets the buffer, we need to throw the bytes over - if tr.RawAccounting { - buf = append(buf, tr.RawBytes()...) - if _, err = tr.rawBytes.Write(buf); err != nil { - return nil, err - } - } - hdr.Linkname = cString(realname) - return hdr, err } - return hdr, tr.err + return hdr, nil } // checkForGNUSparsePAXHeaders checks the PAX headers for GNU sparse headers. If they are found, then @@ -385,6 +403,7 @@ func parsePAX(r io.Reader) (map[string]string, error) { return nil, err } } + sbuf := string(buf) // For GNU PAX sparse format 0.0 support. // This function transforms the sparse format 0.0 headers into sparse format 0.1 headers. @@ -393,35 +412,17 @@ func parsePAX(r io.Reader) (map[string]string, error) { headers := make(map[string]string) // Each record is constructed as // "%d %s=%s\n", length, keyword, value - for len(buf) > 0 { - // or the header was empty to start with. - var sp int - // The size field ends at the first space. - sp = bytes.IndexByte(buf, ' ') - if sp == -1 { + for len(sbuf) > 0 { + key, value, residual, err := parsePAXRecord(sbuf) + if err != nil { return nil, ErrHeader } - // Parse the first token as a decimal integer. - n, err := strconv.ParseInt(string(buf[:sp]), 10, 0) - if err != nil || n < 5 || int64(len(buf)) < n { - return nil, ErrHeader - } - // Extract everything between the decimal and the n -1 on the - // beginning to eat the ' ', -1 on the end to skip the newline. - var record []byte - record, buf = buf[sp+1:n-1], buf[n:] - // The first equals is guaranteed to mark the end of the key. - // Everything else is value. - eq := bytes.IndexByte(record, '=') - if eq == -1 { - return nil, ErrHeader - } - key, value := record[:eq], record[eq+1:] + sbuf = residual keyStr := string(key) if keyStr == paxGNUSparseOffset || keyStr == paxGNUSparseNumBytes { // GNU sparse format 0.0 special key. Write to sparseMap instead of using the headers map. - sparseMap.Write(value) + sparseMap.WriteString(value) sparseMap.Write([]byte{','}) } else { // Normal key. Set the value in the headers map. @@ -436,9 +437,42 @@ func parsePAX(r io.Reader) (map[string]string, error) { return headers, nil } -// cString parses bytes as a NUL-terminated C-style string. +// parsePAXRecord parses the input PAX record string into a key-value pair. +// If parsing is successful, it will slice off the currently read record and +// return the remainder as r. +// +// A PAX record is of the following form: +// "%d %s=%s\n" % (size, key, value) +func parsePAXRecord(s string) (k, v, r string, err error) { + // The size field ends at the first space. + sp := strings.IndexByte(s, ' ') + if sp == -1 { + return "", "", s, ErrHeader + } + + // Parse the first token as a decimal integer. + n, perr := strconv.ParseInt(s[:sp], 10, 0) // Intentionally parse as native int + if perr != nil || n < 5 || int64(len(s)) < n { + return "", "", s, ErrHeader + } + + // Extract everything between the space and the final newline. + rec, nl, rem := s[sp+1:n-1], s[n-1:n], s[n:] + if nl != "\n" { + return "", "", s, ErrHeader + } + + // The first equals separates the key from the value. + eq := strings.IndexByte(rec, '=') + if eq == -1 { + return "", "", s, ErrHeader + } + return rec[:eq], rec[eq+1:], rem, nil +} + +// parseString parses bytes as a NUL-terminated C-style string. // If a NUL byte is not found then the whole slice is returned as a string. -func cString(b []byte) string { +func (*parser) parseString(b []byte) string { n := 0 for n < len(b) && b[n] != 0 { n++ @@ -446,19 +480,51 @@ func cString(b []byte) string { return string(b[0:n]) } -func (tr *Reader) octal(b []byte) int64 { - // Check for binary format first. +// parseNumeric parses the input as being encoded in either base-256 or octal. +// This function may return negative numbers. +// If parsing fails or an integer overflow occurs, err will be set. +func (p *parser) parseNumeric(b []byte) int64 { + // Check for base-256 (binary) format first. + // If the first bit is set, then all following bits constitute a two's + // complement encoded number in big-endian byte order. if len(b) > 0 && b[0]&0x80 != 0 { - var x int64 - for i, c := range b { - if i == 0 { - c &= 0x7f // ignore signal bit in first byte - } - x = x<<8 | int64(c) + // Handling negative numbers relies on the following identity: + // -a-1 == ^a + // + // If the number is negative, we use an inversion mask to invert the + // data bytes and treat the value as an unsigned number. + var inv byte // 0x00 if positive or zero, 0xff if negative + if b[0]&0x40 != 0 { + inv = 0xff } - return x + + var x uint64 + for i, c := range b { + c ^= inv // Inverts c only if inv is 0xff, otherwise does nothing + if i == 0 { + c &= 0x7f // Ignore signal bit in first byte + } + if (x >> 56) > 0 { + p.err = ErrHeader // Integer overflow + return 0 + } + x = x<<8 | uint64(c) + } + if (x >> 63) > 0 { + p.err = ErrHeader // Integer overflow + return 0 + } + if inv == 0xff { + return ^int64(x) + } + return int64(x) } + // Normal case is base-8 (octal) format. + return p.parseOctal(b) +} + +func (p *parser) parseOctal(b []byte) int64 { // Because unused fields are filled with NULs, we need // to skip leading NULs. Fields may also be padded with // spaces or NULs. @@ -469,27 +535,55 @@ func (tr *Reader) octal(b []byte) int64 { if len(b) == 0 { return 0 } - x, err := strconv.ParseUint(cString(b), 8, 64) - if err != nil { - tr.err = err + x, perr := strconv.ParseUint(p.parseString(b), 8, 64) + if perr != nil { + p.err = ErrHeader } return int64(x) } -// skipUnread skips any unread bytes in the existing file entry, as well as any alignment padding. -func (tr *Reader) skipUnread() { - nr := tr.numBytes() + tr.pad // number of bytes to skip +// skipUnread skips any unread bytes in the existing file entry, as well as any +// alignment padding. It returns io.ErrUnexpectedEOF if any io.EOF is +// encountered in the data portion; it is okay to hit io.EOF in the padding. +// +// Note that this function still works properly even when sparse files are being +// used since numBytes returns the bytes remaining in the underlying io.Reader. +func (tr *Reader) skipUnread() error { + dataSkip := tr.numBytes() // Number of data bytes to skip + totalSkip := dataSkip + tr.pad // Total number of bytes to skip tr.curr, tr.pad = nil, 0 if tr.RawAccounting { - _, tr.err = io.CopyN(tr.rawBytes, tr.r, nr) - return + _, tr.err = io.CopyN(tr.rawBytes, tr.r, totalSkip) + return tr.err } - if sr, ok := tr.r.(io.Seeker); ok { - if _, err := sr.Seek(nr, os.SEEK_CUR); err == nil { - return + // If possible, Seek to the last byte before the end of the data section. + // Do this because Seek is often lazy about reporting errors; this will mask + // the fact that the tar stream may be truncated. We can rely on the + // io.CopyN done shortly afterwards to trigger any IO errors. + var seekSkipped int64 // Number of bytes skipped via Seek + if sr, ok := tr.r.(io.Seeker); ok && dataSkip > 1 { + // Not all io.Seeker can actually Seek. For example, os.Stdin implements + // io.Seeker, but calling Seek always returns an error and performs + // no action. Thus, we try an innocent seek to the current position + // to see if Seek is really supported. + pos1, err := sr.Seek(0, os.SEEK_CUR) + if err == nil { + // Seek seems supported, so perform the real Seek. + pos2, err := sr.Seek(dataSkip-1, os.SEEK_CUR) + if err != nil { + tr.err = err + return tr.err + } + seekSkipped = pos2 - pos1 } } - _, tr.err = io.CopyN(ioutil.Discard, tr.r, nr) + + var copySkipped int64 // Number of bytes skipped via CopyN + copySkipped, tr.err = io.CopyN(ioutil.Discard, tr.r, totalSkip-seekSkipped) + if tr.err == io.EOF && seekSkipped+copySkipped < dataSkip { + tr.err = io.ErrUnexpectedEOF + } + return tr.err } func (tr *Reader) verifyChecksum(header []byte) bool { @@ -497,11 +591,19 @@ func (tr *Reader) verifyChecksum(header []byte) bool { return false } - given := tr.octal(header[148:156]) + var p parser + given := p.parseOctal(header[148:156]) unsigned, signed := checksum(header) - return given == unsigned || given == signed + return p.err == nil && (given == unsigned || given == signed) } +// readHeader reads the next block header and assumes that the underlying reader +// is already aligned to a block boundary. +// +// The err will be set to io.EOF only when one of the following occurs: +// * Exactly 0 bytes are read and EOF is hit. +// * Exactly 1 block of zeros is read and EOF is hit. +// * At least 2 blocks of zeros are read. func (tr *Reader) readHeader() *Header { header := tr.hdrBuff[:] copy(header, zeroBlock) @@ -513,7 +615,7 @@ func (tr *Reader) readHeader() *Header { return nil } } - return nil + return nil // io.EOF is okay here } if tr.RawAccounting { if _, tr.err = tr.rawBytes.Write(header); tr.err != nil { @@ -530,7 +632,7 @@ func (tr *Reader) readHeader() *Header { return nil } } - return nil + return nil // io.EOF is okay here } if tr.RawAccounting { if _, tr.err = tr.rawBytes.Write(header); tr.err != nil { @@ -551,22 +653,19 @@ func (tr *Reader) readHeader() *Header { } // Unpack + var p parser hdr := new(Header) s := slicer(header) - hdr.Name = cString(s.next(100)) - hdr.Mode = tr.octal(s.next(8)) - hdr.Uid = int(tr.octal(s.next(8))) - hdr.Gid = int(tr.octal(s.next(8))) - hdr.Size = tr.octal(s.next(12)) - if hdr.Size < 0 { - tr.err = ErrHeader - return nil - } - hdr.ModTime = time.Unix(tr.octal(s.next(12)), 0) + hdr.Name = p.parseString(s.next(100)) + hdr.Mode = p.parseNumeric(s.next(8)) + hdr.Uid = int(p.parseNumeric(s.next(8))) + hdr.Gid = int(p.parseNumeric(s.next(8))) + hdr.Size = p.parseNumeric(s.next(12)) + hdr.ModTime = time.Unix(p.parseNumeric(s.next(12)), 0) s.next(8) // chksum hdr.Typeflag = s.next(1)[0] - hdr.Linkname = cString(s.next(100)) + hdr.Linkname = p.parseString(s.next(100)) // The remainder of the header depends on the value of magic. // The original (v7) version of tar had no explicit magic field, @@ -586,70 +685,76 @@ func (tr *Reader) readHeader() *Header { switch format { case "posix", "gnu", "star": - hdr.Uname = cString(s.next(32)) - hdr.Gname = cString(s.next(32)) + hdr.Uname = p.parseString(s.next(32)) + hdr.Gname = p.parseString(s.next(32)) devmajor := s.next(8) devminor := s.next(8) if hdr.Typeflag == TypeChar || hdr.Typeflag == TypeBlock { - hdr.Devmajor = tr.octal(devmajor) - hdr.Devminor = tr.octal(devminor) + hdr.Devmajor = p.parseNumeric(devmajor) + hdr.Devminor = p.parseNumeric(devminor) } var prefix string switch format { case "posix", "gnu": - prefix = cString(s.next(155)) + prefix = p.parseString(s.next(155)) case "star": - prefix = cString(s.next(131)) - hdr.AccessTime = time.Unix(tr.octal(s.next(12)), 0) - hdr.ChangeTime = time.Unix(tr.octal(s.next(12)), 0) + prefix = p.parseString(s.next(131)) + hdr.AccessTime = time.Unix(p.parseNumeric(s.next(12)), 0) + hdr.ChangeTime = time.Unix(p.parseNumeric(s.next(12)), 0) } if len(prefix) > 0 { hdr.Name = prefix + "/" + hdr.Name } } - if tr.err != nil { + if p.err != nil { + tr.err = p.err + return nil + } + + nb := hdr.Size + if isHeaderOnlyType(hdr.Typeflag) { + nb = 0 + } + if nb < 0 { tr.err = ErrHeader return nil } - // Maximum value of hdr.Size is 64 GB (12 octal digits), - // so there's no risk of int64 overflowing. - nb := int64(hdr.Size) - tr.pad = -nb & (blockSize - 1) // blockSize is a power of two - // Set the current file reader. + tr.pad = -nb & (blockSize - 1) // blockSize is a power of two tr.curr = ®FileReader{r: tr.r, nb: nb} // Check for old GNU sparse format entry. if hdr.Typeflag == TypeGNUSparse { // Get the real size of the file. - hdr.Size = tr.octal(header[483:495]) + hdr.Size = p.parseNumeric(header[483:495]) + if p.err != nil { + tr.err = p.err + return nil + } // Read the sparse map. sp := tr.readOldGNUSparseMap(header) if tr.err != nil { return nil } + // Current file is a GNU sparse file. Update the current file reader. - tr.curr = &sparseFileReader{rfr: tr.curr.(*regFileReader), sp: sp, tot: hdr.Size} + tr.curr, tr.err = newSparseFileReader(tr.curr, sp, hdr.Size) + if tr.err != nil { + return nil + } } return hdr } -// A sparseEntry holds a single entry in a sparse file's sparse map. -// A sparse entry indicates the offset and size in a sparse file of a -// block of data. -type sparseEntry struct { - offset int64 - numBytes int64 -} - // readOldGNUSparseMap reads the sparse map as stored in the old GNU sparse format. // The sparse map is stored in the tar header if it's small enough. If it's larger than four entries, // then one or more extension headers are used to store the rest of the sparse map. func (tr *Reader) readOldGNUSparseMap(header []byte) []sparseEntry { + var p parser isExtended := header[oldGNUSparseMainHeaderIsExtendedOffset] != 0 spCap := oldGNUSparseMainHeaderNumEntries if isExtended { @@ -660,10 +765,10 @@ func (tr *Reader) readOldGNUSparseMap(header []byte) []sparseEntry { // Read the four entries from the main tar header for i := 0; i < oldGNUSparseMainHeaderNumEntries; i++ { - offset := tr.octal(s.next(oldGNUSparseOffsetSize)) - numBytes := tr.octal(s.next(oldGNUSparseNumBytesSize)) - if tr.err != nil { - tr.err = ErrHeader + offset := p.parseNumeric(s.next(oldGNUSparseOffsetSize)) + numBytes := p.parseNumeric(s.next(oldGNUSparseNumBytesSize)) + if p.err != nil { + tr.err = p.err return nil } if offset == 0 && numBytes == 0 { @@ -687,10 +792,10 @@ func (tr *Reader) readOldGNUSparseMap(header []byte) []sparseEntry { isExtended = sparseHeader[oldGNUSparseExtendedHeaderIsExtendedOffset] != 0 s = slicer(sparseHeader) for i := 0; i < oldGNUSparseExtendedHeaderNumEntries; i++ { - offset := tr.octal(s.next(oldGNUSparseOffsetSize)) - numBytes := tr.octal(s.next(oldGNUSparseNumBytesSize)) - if tr.err != nil { - tr.err = ErrHeader + offset := p.parseNumeric(s.next(oldGNUSparseOffsetSize)) + numBytes := p.parseNumeric(s.next(oldGNUSparseNumBytesSize)) + if p.err != nil { + tr.err = p.err return nil } if offset == 0 && numBytes == 0 { @@ -702,134 +807,111 @@ func (tr *Reader) readOldGNUSparseMap(header []byte) []sparseEntry { return sp } -// readGNUSparseMap1x0 reads the sparse map as stored in GNU's PAX sparse format version 1.0. -// The sparse map is stored just before the file data and padded out to the nearest block boundary. +// readGNUSparseMap1x0 reads the sparse map as stored in GNU's PAX sparse format +// version 1.0. The format of the sparse map consists of a series of +// newline-terminated numeric fields. The first field is the number of entries +// and is always present. Following this are the entries, consisting of two +// fields (offset, numBytes). This function must stop reading at the end +// boundary of the block containing the last newline. +// +// Note that the GNU manual says that numeric values should be encoded in octal +// format. However, the GNU tar utility itself outputs these values in decimal. +// As such, this library treats values as being encoded in decimal. func readGNUSparseMap1x0(r io.Reader) ([]sparseEntry, error) { - buf := make([]byte, 2*blockSize) - sparseHeader := buf[:blockSize] + var cntNewline int64 + var buf bytes.Buffer + var blk = make([]byte, blockSize) - // readDecimal is a helper function to read a decimal integer from the sparse map - // while making sure to read from the file in blocks of size blockSize - readDecimal := func() (int64, error) { - // Look for newline - nl := bytes.IndexByte(sparseHeader, '\n') - if nl == -1 { - if len(sparseHeader) >= blockSize { - // This is an error - return 0, ErrHeader + // feedTokens copies data in numBlock chunks from r into buf until there are + // at least cnt newlines in buf. It will not read more blocks than needed. + var feedTokens = func(cnt int64) error { + for cntNewline < cnt { + if _, err := io.ReadFull(r, blk); err != nil { + if err == io.EOF { + err = io.ErrUnexpectedEOF + } + return err } - oldLen := len(sparseHeader) - newLen := oldLen + blockSize - if cap(sparseHeader) < newLen { - // There's more header, but we need to make room for the next block - copy(buf, sparseHeader) - sparseHeader = buf[:newLen] - } else { - // There's more header, and we can just reslice - sparseHeader = sparseHeader[:newLen] - } - - // Now that sparseHeader is large enough, read next block - if _, err := io.ReadFull(r, sparseHeader[oldLen:newLen]); err != nil { - return 0, err - } - // leaving this function for io.Reader makes it more testable - if tr, ok := r.(*Reader); ok && tr.RawAccounting { - if _, err := tr.rawBytes.Write(sparseHeader[oldLen:newLen]); err != nil { - return 0, err + buf.Write(blk) + for _, c := range blk { + if c == '\n' { + cntNewline++ } } - - // Look for a newline in the new data - nl = bytes.IndexByte(sparseHeader[oldLen:newLen], '\n') - if nl == -1 { - // This is an error - return 0, ErrHeader - } - nl += oldLen // We want the position from the beginning } - // Now that we've found a newline, read a number - n, err := strconv.ParseInt(string(sparseHeader[:nl]), 10, 0) - if err != nil { - return 0, ErrHeader - } - - // Update sparseHeader to consume this number - sparseHeader = sparseHeader[nl+1:] - return n, nil + return nil } - // Read the first block - if _, err := io.ReadFull(r, sparseHeader); err != nil { + // nextToken gets the next token delimited by a newline. This assumes that + // at least one newline exists in the buffer. + var nextToken = func() string { + cntNewline-- + tok, _ := buf.ReadString('\n') + return tok[:len(tok)-1] // Cut off newline + } + + // Parse for the number of entries. + // Use integer overflow resistant math to check this. + if err := feedTokens(1); err != nil { return nil, err } - // leaving this function for io.Reader makes it more testable - if tr, ok := r.(*Reader); ok && tr.RawAccounting { - if _, err := tr.rawBytes.Write(sparseHeader); err != nil { - return nil, err - } + numEntries, err := strconv.ParseInt(nextToken(), 10, 0) // Intentionally parse as native int + if err != nil || numEntries < 0 || int(2*numEntries) < int(numEntries) { + return nil, ErrHeader } - // The first line contains the number of entries - numEntries, err := readDecimal() - if err != nil { + // Parse for all member entries. + // numEntries is trusted after this since a potential attacker must have + // committed resources proportional to what this library used. + if err := feedTokens(2 * numEntries); err != nil { return nil, err } - - // Read all the entries sp := make([]sparseEntry, 0, numEntries) for i := int64(0); i < numEntries; i++ { - // Read the offset - offset, err := readDecimal() + offset, err := strconv.ParseInt(nextToken(), 10, 64) if err != nil { - return nil, err + return nil, ErrHeader } - // Read numBytes - numBytes, err := readDecimal() + numBytes, err := strconv.ParseInt(nextToken(), 10, 64) if err != nil { - return nil, err + return nil, ErrHeader } - sp = append(sp, sparseEntry{offset: offset, numBytes: numBytes}) } - return sp, nil } -// readGNUSparseMap0x1 reads the sparse map as stored in GNU's PAX sparse format version 0.1. -// The sparse map is stored in the PAX headers. -func readGNUSparseMap0x1(headers map[string]string) ([]sparseEntry, error) { - // Get number of entries - numEntriesStr, ok := headers[paxGNUSparseNumBlocks] - if !ok { - return nil, ErrHeader - } - numEntries, err := strconv.ParseInt(numEntriesStr, 10, 0) - if err != nil { +// readGNUSparseMap0x1 reads the sparse map as stored in GNU's PAX sparse format +// version 0.1. The sparse map is stored in the PAX headers. +func readGNUSparseMap0x1(extHdrs map[string]string) ([]sparseEntry, error) { + // Get number of entries. + // Use integer overflow resistant math to check this. + numEntriesStr := extHdrs[paxGNUSparseNumBlocks] + numEntries, err := strconv.ParseInt(numEntriesStr, 10, 0) // Intentionally parse as native int + if err != nil || numEntries < 0 || int(2*numEntries) < int(numEntries) { return nil, ErrHeader } - sparseMap := strings.Split(headers[paxGNUSparseMap], ",") - - // There should be two numbers in sparseMap for each entry + // There should be two numbers in sparseMap for each entry. + sparseMap := strings.Split(extHdrs[paxGNUSparseMap], ",") if int64(len(sparseMap)) != 2*numEntries { return nil, ErrHeader } - // Loop through the entries in the sparse map + // Loop through the entries in the sparse map. + // numEntries is trusted now. sp := make([]sparseEntry, 0, numEntries) for i := int64(0); i < numEntries; i++ { - offset, err := strconv.ParseInt(sparseMap[2*i], 10, 0) + offset, err := strconv.ParseInt(sparseMap[2*i], 10, 64) if err != nil { return nil, ErrHeader } - numBytes, err := strconv.ParseInt(sparseMap[2*i+1], 10, 0) + numBytes, err := strconv.ParseInt(sparseMap[2*i+1], 10, 64) if err != nil { return nil, ErrHeader } sp = append(sp, sparseEntry{offset: offset, numBytes: numBytes}) } - return sp, nil } @@ -846,10 +928,18 @@ func (tr *Reader) numBytes() int64 { // Read reads from the current entry in the tar archive. // It returns 0, io.EOF when it reaches the end of that entry, // until Next is called to advance to the next entry. +// +// Calling Read on special types like TypeLink, TypeSymLink, TypeChar, +// TypeBlock, TypeDir, and TypeFifo returns 0, io.EOF regardless of what +// the Header.Size claims. func (tr *Reader) Read(b []byte) (n int, err error) { + if tr.err != nil { + return 0, tr.err + } if tr.curr == nil { return 0, io.EOF } + n, err = tr.curr.Read(b) if err != nil && err != io.EOF { tr.err = err @@ -879,9 +969,33 @@ func (rfr *regFileReader) numBytes() int64 { return rfr.nb } -// readHole reads a sparse file hole ending at offset toOffset -func (sfr *sparseFileReader) readHole(b []byte, toOffset int64) int { - n64 := toOffset - sfr.pos +// newSparseFileReader creates a new sparseFileReader, but validates all of the +// sparse entries before doing so. +func newSparseFileReader(rfr numBytesReader, sp []sparseEntry, total int64) (*sparseFileReader, error) { + if total < 0 { + return nil, ErrHeader // Total size cannot be negative + } + + // Validate all sparse entries. These are the same checks as performed by + // the BSD tar utility. + for i, s := range sp { + switch { + case s.offset < 0 || s.numBytes < 0: + return nil, ErrHeader // Negative values are never okay + case s.offset > math.MaxInt64-s.numBytes: + return nil, ErrHeader // Integer overflow with large length + case s.offset+s.numBytes > total: + return nil, ErrHeader // Region extends beyond the "real" size + case i > 0 && sp[i-1].offset+sp[i-1].numBytes > s.offset: + return nil, ErrHeader // Regions can't overlap and must be in order + } + } + return &sparseFileReader{rfr: rfr, sp: sp, total: total}, nil +} + +// readHole reads a sparse hole ending at endOffset. +func (sfr *sparseFileReader) readHole(b []byte, endOffset int64) int { + n64 := endOffset - sfr.pos if n64 > int64(len(b)) { n64 = int64(len(b)) } @@ -895,49 +1009,54 @@ func (sfr *sparseFileReader) readHole(b []byte, toOffset int64) int { // Read reads the sparse file data in expanded form. func (sfr *sparseFileReader) Read(b []byte) (n int, err error) { - if len(sfr.sp) == 0 { - // No more data fragments to read from. - if sfr.pos < sfr.tot { - // We're in the last hole - n = sfr.readHole(b, sfr.tot) - return - } - // Otherwise, we're at the end of the file - return 0, io.EOF - } - if sfr.tot < sfr.sp[0].offset { - return 0, io.ErrUnexpectedEOF - } - if sfr.pos < sfr.sp[0].offset { - // We're in a hole - n = sfr.readHole(b, sfr.sp[0].offset) - return + // Skip past all empty fragments. + for len(sfr.sp) > 0 && sfr.sp[0].numBytes == 0 { + sfr.sp = sfr.sp[1:] } - // We're not in a hole, so we'll read from the next data fragment - posInFragment := sfr.pos - sfr.sp[0].offset - bytesLeft := sfr.sp[0].numBytes - posInFragment + // If there are no more fragments, then it is possible that there + // is one last sparse hole. + if len(sfr.sp) == 0 { + // This behavior matches the BSD tar utility. + // However, GNU tar stops returning data even if sfr.total is unmet. + if sfr.pos < sfr.total { + return sfr.readHole(b, sfr.total), nil + } + return 0, io.EOF + } + + // In front of a data fragment, so read a hole. + if sfr.pos < sfr.sp[0].offset { + return sfr.readHole(b, sfr.sp[0].offset), nil + } + + // In a data fragment, so read from it. + // This math is overflow free since we verify that offset and numBytes can + // be safely added when creating the sparseFileReader. + endPos := sfr.sp[0].offset + sfr.sp[0].numBytes // End offset of fragment + bytesLeft := endPos - sfr.pos // Bytes left in fragment if int64(len(b)) > bytesLeft { - b = b[0:bytesLeft] + b = b[:bytesLeft] } n, err = sfr.rfr.Read(b) sfr.pos += int64(n) - - if int64(n) == bytesLeft { - // We're done with this fragment - sfr.sp = sfr.sp[1:] + if err == io.EOF { + if sfr.pos < endPos { + err = io.ErrUnexpectedEOF // There was supposed to be more data + } else if sfr.pos < sfr.total { + err = nil // There is still an implicit sparse hole at the end + } } - if err == io.EOF && sfr.pos < sfr.tot { - // We reached the end of the last fragment's data, but there's a final hole - err = nil + if sfr.pos == endPos { + sfr.sp = sfr.sp[1:] // We are done with this fragment, so pop it } - return + return n, err } // numBytes returns the number of bytes left to read in the sparse file's // sparse-encoded data in the tar archive. func (sfr *sparseFileReader) numBytes() int64 { - return sfr.rfr.nb + return sfr.rfr.numBytes() } diff --git a/archive/tar/reader_test.go b/archive/tar/reader_test.go index da01f26..821b4f0 100644 --- a/archive/tar/reader_test.go +++ b/archive/tar/reader_test.go @@ -10,6 +10,7 @@ import ( "fmt" "io" "io/ioutil" + "math" "os" "reflect" "strings" @@ -18,9 +19,10 @@ import ( ) type untarTest struct { - file string - headers []*Header - cksums []string + file string // Test input file + headers []*Header // Expected output headers + chksums []string // MD5 checksum of files, leave as nil if not checked + err error // Expected error to occur } var gnuTarTest = &untarTest{ @@ -49,7 +51,7 @@ var gnuTarTest = &untarTest{ Gname: "eng", }, }, - cksums: []string{ + chksums: []string{ "e38b27eaccb4391bdec553a7f3ae6b2f", "c65bd2e50a56a2138bf1716f2fd56fe9", }, @@ -129,7 +131,7 @@ var sparseTarTest = &untarTest{ Devminor: 0, }, }, - cksums: []string{ + chksums: []string{ "6f53234398c2449fe67c1812d993012f", "6f53234398c2449fe67c1812d993012f", "6f53234398c2449fe67c1812d993012f", @@ -286,37 +288,118 @@ var untarTests = []*untarTest{ }, }, }, + { + // Matches the behavior of GNU, BSD, and STAR tar utilities. + file: "testdata/gnu-multi-hdrs.tar", + headers: []*Header{ + { + Name: "GNU2/GNU2/long-path-name", + Linkname: "GNU4/GNU4/long-linkpath-name", + ModTime: time.Unix(0, 0), + Typeflag: '2', + }, + }, + }, + { + // Matches the behavior of GNU and BSD tar utilities. + file: "testdata/pax-multi-hdrs.tar", + headers: []*Header{ + { + Name: "bar", + Linkname: "PAX4/PAX4/long-linkpath-name", + ModTime: time.Unix(0, 0), + Typeflag: '2', + }, + }, + }, + { + file: "testdata/neg-size.tar", + err: ErrHeader, + }, + { + file: "testdata/issue10968.tar", + err: ErrHeader, + }, + { + file: "testdata/issue11169.tar", + err: ErrHeader, + }, + { + file: "testdata/issue12435.tar", + err: ErrHeader, + }, } func TestReader(t *testing.T) { -testLoop: - for i, test := range untarTests { - f, err := os.Open(test.file) + for i, v := range untarTests { + f, err := os.Open(v.file) if err != nil { - t.Errorf("test %d: Unexpected error: %v", i, err) + t.Errorf("file %s, test %d: unexpected error: %v", v.file, i, err) continue } defer f.Close() - tr := NewReader(f) - for j, header := range test.headers { - hdr, err := tr.Next() - if err != nil || hdr == nil { - t.Errorf("test %d, entry %d: Didn't get entry: %v", i, j, err) - f.Close() - continue testLoop + + // Capture all headers and checksums. + var ( + tr = NewReader(f) + hdrs []*Header + chksums []string + ) + for { + var hdr *Header + hdr, err = tr.Next() + if err != nil { + if err == io.EOF { + err = nil // Expected error + } + break } - if !reflect.DeepEqual(*hdr, *header) { - t.Errorf("test %d, entry %d: Incorrect header:\nhave %+v\nwant %+v", - i, j, *hdr, *header) + hdrs = append(hdrs, hdr) + + if v.chksums == nil { + continue + } + h := md5.New() + _, err = io.Copy(h, tr) // Effectively an incremental read + if err != nil { + break + } + chksums = append(chksums, fmt.Sprintf("%x", h.Sum(nil))) + } + + for j, hdr := range hdrs { + if j >= len(v.headers) { + t.Errorf("file %s, test %d, entry %d: unexpected header:\ngot %+v", + v.file, i, j, *hdr) + continue + } + if !reflect.DeepEqual(*hdr, *v.headers[j]) { + t.Errorf("file %s, test %d, entry %d: incorrect header:\ngot %+v\nwant %+v", + v.file, i, j, *hdr, *v.headers[j]) } } - hdr, err := tr.Next() - if err == io.EOF { - continue testLoop + if len(hdrs) != len(v.headers) { + t.Errorf("file %s, test %d: got %d headers, want %d headers", + v.file, i, len(hdrs), len(v.headers)) } - if hdr != nil || err != nil { - t.Errorf("test %d: Unexpected entry or error: hdr=%v err=%v", i, hdr, err) + + for j, sum := range chksums { + if j >= len(v.chksums) { + t.Errorf("file %s, test %d, entry %d: unexpected sum: got %s", + v.file, i, j, sum) + continue + } + if sum != v.chksums[j] { + t.Errorf("file %s, test %d, entry %d: incorrect checksum: got %s, want %s", + v.file, i, j, sum, v.chksums[j]) + } } + + if err != v.err { + t.Errorf("file %s, test %d: unexpected error: got %v, want %v", + v.file, i, err, v.err) + } + f.Close() } } @@ -356,89 +439,6 @@ func TestPartialRead(t *testing.T) { } } -func TestIncrementalRead(t *testing.T) { - test := gnuTarTest - f, err := os.Open(test.file) - if err != nil { - t.Fatalf("Unexpected error: %v", err) - } - defer f.Close() - - tr := NewReader(f) - - headers := test.headers - cksums := test.cksums - nread := 0 - - // loop over all files - for ; ; nread++ { - hdr, err := tr.Next() - if hdr == nil || err == io.EOF { - break - } - - // check the header - if !reflect.DeepEqual(*hdr, *headers[nread]) { - t.Errorf("Incorrect header:\nhave %+v\nwant %+v", - *hdr, headers[nread]) - } - - // read file contents in little chunks EOF, - // checksumming all the way - h := md5.New() - rdbuf := make([]uint8, 8) - for { - nr, err := tr.Read(rdbuf) - if err == io.EOF { - break - } - if err != nil { - t.Errorf("Read: unexpected error %v\n", err) - break - } - h.Write(rdbuf[0:nr]) - } - // verify checksum - have := fmt.Sprintf("%x", h.Sum(nil)) - want := cksums[nread] - if want != have { - t.Errorf("Bad checksum on file %s:\nhave %+v\nwant %+v", hdr.Name, have, want) - } - } - if nread != len(headers) { - t.Errorf("Didn't process all files\nexpected: %d\nprocessed %d\n", len(headers), nread) - } -} - -func TestNonSeekable(t *testing.T) { - test := gnuTarTest - f, err := os.Open(test.file) - if err != nil { - t.Fatalf("Unexpected error: %v", err) - } - defer f.Close() - - type readerOnly struct { - io.Reader - } - tr := NewReader(readerOnly{f}) - nread := 0 - - for ; ; nread++ { - _, err := tr.Next() - if err == io.EOF { - break - } - if err != nil { - t.Fatalf("Unexpected error: %v", err) - } - } - - if nread != len(test.headers) { - t.Errorf("Didn't process all files\nexpected: %d\nprocessed %d\n", len(test.headers), nread) - } -} - func TestParsePAXHeader(t *testing.T) { paxTests := [][3]string{ {"a", "a=name", "10 a=name\n"}, // Test case involving multiple acceptable lengths @@ -514,220 +514,312 @@ func TestMergePAX(t *testing.T) { } } -func TestSparseEndToEnd(t *testing.T) { - test := sparseTarTest - f, err := os.Open(test.file) - if err != nil { - t.Fatalf("Unexpected error: %v", err) - } - defer f.Close() - - tr := NewReader(f) - - headers := test.headers - cksums := test.cksums - nread := 0 - - // loop over all files - for ; ; nread++ { - hdr, err := tr.Next() - if hdr == nil || err == io.EOF { - break - } - - // check the header - if !reflect.DeepEqual(*hdr, *headers[nread]) { - t.Errorf("Incorrect header:\nhave %+v\nwant %+v", - *hdr, headers[nread]) - } - - // read and checksum the file data - h := md5.New() - _, err = io.Copy(h, tr) - if err != nil { - t.Fatalf("Unexpected error: %v", err) - } - - // verify checksum - have := fmt.Sprintf("%x", h.Sum(nil)) - want := cksums[nread] - if want != have { - t.Errorf("Bad checksum on file %s:\nhave %+v\nwant %+v", hdr.Name, have, want) - } - } - if nread != len(headers) { - t.Errorf("Didn't process all files\nexpected: %d\nprocessed %d\n", len(headers), nread) - } -} - -type sparseFileReadTest struct { - sparseData []byte - sparseMap []sparseEntry - realSize int64 - expected []byte -} - -var sparseFileReadTests = []sparseFileReadTest{ - { - sparseData: []byte("abcde"), - sparseMap: []sparseEntry{ - {offset: 0, numBytes: 2}, - {offset: 5, numBytes: 3}, - }, - realSize: 8, - expected: []byte("ab\x00\x00\x00cde"), - }, - { - sparseData: []byte("abcde"), - sparseMap: []sparseEntry{ - {offset: 0, numBytes: 2}, - {offset: 5, numBytes: 3}, - }, - realSize: 10, - expected: []byte("ab\x00\x00\x00cde\x00\x00"), - }, - { - sparseData: []byte("abcde"), - sparseMap: []sparseEntry{ - {offset: 1, numBytes: 3}, - {offset: 6, numBytes: 2}, - }, - realSize: 8, - expected: []byte("\x00abc\x00\x00de"), - }, - { - sparseData: []byte("abcde"), - sparseMap: []sparseEntry{ - {offset: 1, numBytes: 3}, - {offset: 6, numBytes: 2}, - }, - realSize: 10, - expected: []byte("\x00abc\x00\x00de\x00\x00"), - }, - { - sparseData: []byte(""), - sparseMap: nil, - realSize: 2, - expected: []byte("\x00\x00"), - }, -} - func TestSparseFileReader(t *testing.T) { - for i, test := range sparseFileReadTests { - r := bytes.NewReader(test.sparseData) - nb := int64(r.Len()) - sfr := &sparseFileReader{ - rfr: ®FileReader{r: r, nb: nb}, - sp: test.sparseMap, - pos: 0, - tot: test.realSize, - } - if sfr.numBytes() != nb { - t.Errorf("test %d: Before reading, sfr.numBytes() = %d, want %d", i, sfr.numBytes(), nb) - } - buf, err := ioutil.ReadAll(sfr) + var vectors = []struct { + realSize int64 // Real size of the output file + sparseMap []sparseEntry // Input sparse map + sparseData string // Input compact data + expected string // Expected output data + err error // Expected error outcome + }{{ + realSize: 8, + sparseMap: []sparseEntry{ + {offset: 0, numBytes: 2}, + {offset: 5, numBytes: 3}, + }, + sparseData: "abcde", + expected: "ab\x00\x00\x00cde", + }, { + realSize: 10, + sparseMap: []sparseEntry{ + {offset: 0, numBytes: 2}, + {offset: 5, numBytes: 3}, + }, + sparseData: "abcde", + expected: "ab\x00\x00\x00cde\x00\x00", + }, { + realSize: 8, + sparseMap: []sparseEntry{ + {offset: 1, numBytes: 3}, + {offset: 6, numBytes: 2}, + }, + sparseData: "abcde", + expected: "\x00abc\x00\x00de", + }, { + realSize: 8, + sparseMap: []sparseEntry{ + {offset: 1, numBytes: 3}, + {offset: 6, numBytes: 0}, + {offset: 6, numBytes: 0}, + {offset: 6, numBytes: 2}, + }, + sparseData: "abcde", + expected: "\x00abc\x00\x00de", + }, { + realSize: 10, + sparseMap: []sparseEntry{ + {offset: 1, numBytes: 3}, + {offset: 6, numBytes: 2}, + }, + sparseData: "abcde", + expected: "\x00abc\x00\x00de\x00\x00", + }, { + realSize: 10, + sparseMap: []sparseEntry{ + {offset: 1, numBytes: 3}, + {offset: 6, numBytes: 2}, + {offset: 8, numBytes: 0}, + {offset: 8, numBytes: 0}, + {offset: 8, numBytes: 0}, + {offset: 8, numBytes: 0}, + }, + sparseData: "abcde", + expected: "\x00abc\x00\x00de\x00\x00", + }, { + realSize: 2, + sparseMap: []sparseEntry{}, + sparseData: "", + expected: "\x00\x00", + }, { + realSize: -2, + sparseMap: []sparseEntry{}, + err: ErrHeader, + }, { + realSize: -10, + sparseMap: []sparseEntry{ + {offset: 1, numBytes: 3}, + {offset: 6, numBytes: 2}, + }, + sparseData: "abcde", + err: ErrHeader, + }, { + realSize: 10, + sparseMap: []sparseEntry{ + {offset: 1, numBytes: 3}, + {offset: 6, numBytes: 5}, + }, + sparseData: "abcde", + err: ErrHeader, + }, { + realSize: 35, + sparseMap: []sparseEntry{ + {offset: 1, numBytes: 3}, + {offset: 6, numBytes: 5}, + }, + sparseData: "abcde", + err: io.ErrUnexpectedEOF, + }, { + realSize: 35, + sparseMap: []sparseEntry{ + {offset: 1, numBytes: 3}, + {offset: 6, numBytes: -5}, + }, + sparseData: "abcde", + err: ErrHeader, + }, { + realSize: 35, + sparseMap: []sparseEntry{ + {offset: math.MaxInt64, numBytes: 3}, + {offset: 6, numBytes: -5}, + }, + sparseData: "abcde", + err: ErrHeader, + }, { + realSize: 10, + sparseMap: []sparseEntry{ + {offset: 1, numBytes: 3}, + {offset: 2, numBytes: 2}, + }, + sparseData: "abcde", + err: ErrHeader, + }} + + for i, v := range vectors { + r := bytes.NewReader([]byte(v.sparseData)) + rfr := ®FileReader{r: r, nb: int64(len(v.sparseData))} + + var sfr *sparseFileReader + var err error + var buf []byte + + sfr, err = newSparseFileReader(rfr, v.sparseMap, v.realSize) if err != nil { - t.Errorf("test %d: Unexpected error: %v", i, err) + goto fail } - if e := test.expected; !bytes.Equal(buf, e) { - t.Errorf("test %d: Contents = %v, want %v", i, buf, e) + if sfr.numBytes() != int64(len(v.sparseData)) { + t.Errorf("test %d, numBytes() before reading: got %d, want %d", i, sfr.numBytes(), len(v.sparseData)) + } + buf, err = ioutil.ReadAll(sfr) + if err != nil { + goto fail + } + if string(buf) != v.expected { + t.Errorf("test %d, ReadAll(): got %q, want %q", i, string(buf), v.expected) } if sfr.numBytes() != 0 { - t.Errorf("test %d: After draining the reader, numBytes() was nonzero", i) + t.Errorf("test %d, numBytes() after reading: got %d, want %d", i, sfr.numBytes(), 0) } - } -} -func TestSparseIncrementalRead(t *testing.T) { - sparseMap := []sparseEntry{{10, 2}} - sparseData := []byte("Go") - expected := "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00Go\x00\x00\x00\x00\x00\x00\x00\x00" - - r := bytes.NewReader(sparseData) - nb := int64(r.Len()) - sfr := &sparseFileReader{ - rfr: ®FileReader{r: r, nb: nb}, - sp: sparseMap, - pos: 0, - tot: int64(len(expected)), - } - - // We'll read the data 6 bytes at a time, with a hole of size 10 at - // the beginning and one of size 8 at the end. - var outputBuf bytes.Buffer - buf := make([]byte, 6) - for { - n, err := sfr.Read(buf) - if err == io.EOF { - break + fail: + if err != v.err { + t.Errorf("test %d, unexpected error: got %v, want %v", i, err, v.err) } - if err != nil { - t.Errorf("Read: unexpected error %v\n", err) - } - if n > 0 { - _, err := outputBuf.Write(buf[:n]) - if err != nil { - t.Errorf("Write: unexpected error %v\n", err) - } - } - } - got := outputBuf.String() - if got != expected { - t.Errorf("Contents = %v, want %v", got, expected) } } func TestReadGNUSparseMap0x1(t *testing.T) { - headers := map[string]string{ - paxGNUSparseNumBlocks: "4", - paxGNUSparseMap: "0,5,10,5,20,5,30,5", - } - expected := []sparseEntry{ - {offset: 0, numBytes: 5}, - {offset: 10, numBytes: 5}, - {offset: 20, numBytes: 5}, - {offset: 30, numBytes: 5}, - } + const ( + maxUint = ^uint(0) + maxInt = int(maxUint >> 1) + ) + var ( + big1 = fmt.Sprintf("%d", int64(maxInt)) + big2 = fmt.Sprintf("%d", (int64(maxInt)/2)+1) + big3 = fmt.Sprintf("%d", (int64(maxInt) / 3)) + ) - sp, err := readGNUSparseMap0x1(headers) - if err != nil { - t.Errorf("Unexpected error: %v", err) - } - if !reflect.DeepEqual(sp, expected) { - t.Errorf("Incorrect sparse map: got %v, wanted %v", sp, expected) + var vectors = []struct { + extHdrs map[string]string // Input data + sparseMap []sparseEntry // Expected sparse entries to be outputted + err error // Expected errors that may be raised + }{{ + extHdrs: map[string]string{paxGNUSparseNumBlocks: "-4"}, + err: ErrHeader, + }, { + extHdrs: map[string]string{paxGNUSparseNumBlocks: "fee "}, + err: ErrHeader, + }, { + extHdrs: map[string]string{ + paxGNUSparseNumBlocks: big1, + paxGNUSparseMap: "0,5,10,5,20,5,30,5", + }, + err: ErrHeader, + }, { + extHdrs: map[string]string{ + paxGNUSparseNumBlocks: big2, + paxGNUSparseMap: "0,5,10,5,20,5,30,5", + }, + err: ErrHeader, + }, { + extHdrs: map[string]string{ + paxGNUSparseNumBlocks: big3, + paxGNUSparseMap: "0,5,10,5,20,5,30,5", + }, + err: ErrHeader, + }, { + extHdrs: map[string]string{ + paxGNUSparseNumBlocks: "4", + paxGNUSparseMap: "0.5,5,10,5,20,5,30,5", + }, + err: ErrHeader, + }, { + extHdrs: map[string]string{ + paxGNUSparseNumBlocks: "4", + paxGNUSparseMap: "0,5.5,10,5,20,5,30,5", + }, + err: ErrHeader, + }, { + extHdrs: map[string]string{ + paxGNUSparseNumBlocks: "4", + paxGNUSparseMap: "0,fewafewa.5,fewafw,5,20,5,30,5", + }, + err: ErrHeader, + }, { + extHdrs: map[string]string{ + paxGNUSparseNumBlocks: "4", + paxGNUSparseMap: "0,5,10,5,20,5,30,5", + }, + sparseMap: []sparseEntry{{0, 5}, {10, 5}, {20, 5}, {30, 5}}, + }} + + for i, v := range vectors { + sp, err := readGNUSparseMap0x1(v.extHdrs) + if !reflect.DeepEqual(sp, v.sparseMap) && !(len(sp) == 0 && len(v.sparseMap) == 0) { + t.Errorf("test %d, readGNUSparseMap0x1(...): got %v, want %v", i, sp, v.sparseMap) + } + if err != v.err { + t.Errorf("test %d, unexpected error: got %v, want %v", i, err, v.err) + } } } func TestReadGNUSparseMap1x0(t *testing.T) { - // This test uses lots of holes so the sparse header takes up more than two blocks - numEntries := 100 - expected := make([]sparseEntry, 0, numEntries) - sparseMap := new(bytes.Buffer) - - fmt.Fprintf(sparseMap, "%d\n", numEntries) - for i := 0; i < numEntries; i++ { - offset := int64(2048 * i) - numBytes := int64(1024) - expected = append(expected, sparseEntry{offset: offset, numBytes: numBytes}) - fmt.Fprintf(sparseMap, "%d\n%d\n", offset, numBytes) + var sp = []sparseEntry{{1, 2}, {3, 4}} + for i := 0; i < 98; i++ { + sp = append(sp, sparseEntry{54321, 12345}) } - // Make the header the smallest multiple of blockSize that fits the sparseMap - headerBlocks := (sparseMap.Len() + blockSize - 1) / blockSize - bufLen := blockSize * headerBlocks - buf := make([]byte, bufLen) - copy(buf, sparseMap.Bytes()) + var vectors = []struct { + input string // Input data + sparseMap []sparseEntry // Expected sparse entries to be outputted + cnt int // Expected number of bytes read + err error // Expected errors that may be raised + }{{ + input: "", + cnt: 0, + err: io.ErrUnexpectedEOF, + }, { + input: "ab", + cnt: 2, + err: io.ErrUnexpectedEOF, + }, { + input: strings.Repeat("\x00", 512), + cnt: 512, + err: io.ErrUnexpectedEOF, + }, { + input: strings.Repeat("\x00", 511) + "\n", + cnt: 512, + err: ErrHeader, + }, { + input: strings.Repeat("\n", 512), + cnt: 512, + err: ErrHeader, + }, { + input: "0\n" + strings.Repeat("\x00", 510) + strings.Repeat("a", 512), + sparseMap: []sparseEntry{}, + cnt: 512, + }, { + input: strings.Repeat("0", 512) + "0\n" + strings.Repeat("\x00", 510), + sparseMap: []sparseEntry{}, + cnt: 1024, + }, { + input: strings.Repeat("0", 1024) + "1\n2\n3\n" + strings.Repeat("\x00", 506), + sparseMap: []sparseEntry{{2, 3}}, + cnt: 1536, + }, { + input: strings.Repeat("0", 1024) + "1\n2\n\n" + strings.Repeat("\x00", 509), + cnt: 1536, + err: ErrHeader, + }, { + input: strings.Repeat("0", 1024) + "1\n2\n" + strings.Repeat("\x00", 508), + cnt: 1536, + err: io.ErrUnexpectedEOF, + }, { + input: "-1\n2\n\n" + strings.Repeat("\x00", 506), + cnt: 512, + err: ErrHeader, + }, { + input: "1\nk\n2\n" + strings.Repeat("\x00", 506), + cnt: 512, + err: ErrHeader, + }, { + input: "100\n1\n2\n3\n4\n" + strings.Repeat("54321\n0000000000000012345\n", 98) + strings.Repeat("\x00", 512), + cnt: 2560, + sparseMap: sp, + }} - // Get an reader to read the sparse map - r := bytes.NewReader(buf) - - // Read the sparse map - sp, err := readGNUSparseMap1x0(r) - if err != nil { - t.Errorf("Unexpected error: %v", err) - } - if !reflect.DeepEqual(sp, expected) { - t.Errorf("Incorrect sparse map: got %v, wanted %v", sp, expected) + for i, v := range vectors { + r := strings.NewReader(v.input) + sp, err := readGNUSparseMap1x0(r) + if !reflect.DeepEqual(sp, v.sparseMap) && !(len(sp) == 0 && len(v.sparseMap) == 0) { + t.Errorf("test %d, readGNUSparseMap1x0(...): got %v, want %v", i, sp, v.sparseMap) + } + if numBytes := len(v.input) - r.Len(); numBytes != v.cnt { + t.Errorf("test %d, bytes read: got %v, want %v", i, numBytes, v.cnt) + } + if err != v.err { + t.Errorf("test %d, unexpected error: got %v, want %v", i, err, v.err) + } } } @@ -747,52 +839,286 @@ func TestUninitializedRead(t *testing.T) { } -// Negative header size should not cause panic. -// Issues 10959 and 10960. -func TestNegativeHdrSize(t *testing.T) { - f, err := os.Open("testdata/neg-size.tar") - if err != nil { - t.Fatal(err) - } - defer f.Close() - r := NewReader(f) - _, err = r.Next() - if err != ErrHeader { - t.Error("want ErrHeader, got", err) - } - io.Copy(ioutil.Discard, r) -} +type reader struct{ io.Reader } +type readSeeker struct{ io.ReadSeeker } +type readBadSeeker struct{ io.ReadSeeker } -// This used to hang in (*sparseFileReader).readHole due to missing -// verification of sparse offsets against file size. -func TestIssue10968(t *testing.T) { - f, err := os.Open("testdata/issue10968.tar") - if err != nil { - t.Fatal(err) +func (rbs *readBadSeeker) Seek(int64, int) (int64, error) { return 0, fmt.Errorf("illegal seek") } + +// TestReadTruncation test the ending condition on various truncated files and +// that truncated files are still detected even if the underlying io.Reader +// satisfies io.Seeker. +func TestReadTruncation(t *testing.T) { + var ss []string + for _, p := range []string{ + "testdata/gnu.tar", + "testdata/ustar-file-reg.tar", + "testdata/pax-path-hdr.tar", + "testdata/sparse-formats.tar", + } { + buf, err := ioutil.ReadFile(p) + if err != nil { + t.Fatalf("unexpected error: %v", err) + } + ss = append(ss, string(buf)) } - defer f.Close() - r := NewReader(f) - _, err = r.Next() - if err != nil { - t.Fatal(err) + + data1, data2, pax, sparse := ss[0], ss[1], ss[2], ss[3] + data2 += strings.Repeat("\x00", 10*512) + trash := strings.Repeat("garbage ", 64) // Exactly 512 bytes + + var vectors = []struct { + input string // Input stream + cnt int // Expected number of headers read + err error // Expected error outcome + }{ + {"", 0, io.EOF}, // Empty file is a "valid" tar file + {data1[:511], 0, io.ErrUnexpectedEOF}, + {data1[:512], 1, io.ErrUnexpectedEOF}, + {data1[:1024], 1, io.EOF}, + {data1[:1536], 2, io.ErrUnexpectedEOF}, + {data1[:2048], 2, io.EOF}, + {data1, 2, io.EOF}, + {data1[:2048] + data2[:1536], 3, io.EOF}, + {data2[:511], 0, io.ErrUnexpectedEOF}, + {data2[:512], 1, io.ErrUnexpectedEOF}, + {data2[:1195], 1, io.ErrUnexpectedEOF}, + {data2[:1196], 1, io.EOF}, // Exact end of data and start of padding + {data2[:1200], 1, io.EOF}, + {data2[:1535], 1, io.EOF}, + {data2[:1536], 1, io.EOF}, // Exact end of padding + {data2[:1536] + trash[:1], 1, io.ErrUnexpectedEOF}, + {data2[:1536] + trash[:511], 1, io.ErrUnexpectedEOF}, + {data2[:1536] + trash, 1, ErrHeader}, + {data2[:2048], 1, io.EOF}, // Exactly 1 empty block + {data2[:2048] + trash[:1], 1, io.ErrUnexpectedEOF}, + {data2[:2048] + trash[:511], 1, io.ErrUnexpectedEOF}, + {data2[:2048] + trash, 1, ErrHeader}, + {data2[:2560], 1, io.EOF}, // Exactly 2 empty blocks (normal end-of-stream) + {data2[:2560] + trash[:1], 1, io.EOF}, + {data2[:2560] + trash[:511], 1, io.EOF}, + {data2[:2560] + trash, 1, io.EOF}, + {data2[:3072], 1, io.EOF}, + {pax, 0, io.EOF}, // PAX header without data is a "valid" tar file + {pax + trash[:1], 0, io.ErrUnexpectedEOF}, + {pax + trash[:511], 0, io.ErrUnexpectedEOF}, + {sparse[:511], 0, io.ErrUnexpectedEOF}, + // TODO(dsnet): This should pass, but currently fails. + // {sparse[:512], 0, io.ErrUnexpectedEOF}, + {sparse[:3584], 1, io.EOF}, + {sparse[:9200], 1, io.EOF}, // Terminate in padding of sparse header + {sparse[:9216], 1, io.EOF}, + {sparse[:9728], 2, io.ErrUnexpectedEOF}, + {sparse[:10240], 2, io.EOF}, + {sparse[:11264], 2, io.ErrUnexpectedEOF}, + {sparse, 5, io.EOF}, + {sparse + trash, 5, io.EOF}, } - _, err = io.Copy(ioutil.Discard, r) - if err != io.ErrUnexpectedEOF { - t.Fatalf("expected %q, got %q", io.ErrUnexpectedEOF, err) + + for i, v := range vectors { + for j := 0; j < 6; j++ { + var tr *Reader + var s1, s2 string + + switch j { + case 0: + tr = NewReader(&reader{strings.NewReader(v.input)}) + s1, s2 = "io.Reader", "auto" + case 1: + tr = NewReader(&reader{strings.NewReader(v.input)}) + s1, s2 = "io.Reader", "manual" + case 2: + tr = NewReader(&readSeeker{strings.NewReader(v.input)}) + s1, s2 = "io.ReadSeeker", "auto" + case 3: + tr = NewReader(&readSeeker{strings.NewReader(v.input)}) + s1, s2 = "io.ReadSeeker", "manual" + case 4: + tr = NewReader(&readBadSeeker{strings.NewReader(v.input)}) + s1, s2 = "ReadBadSeeker", "auto" + case 5: + tr = NewReader(&readBadSeeker{strings.NewReader(v.input)}) + s1, s2 = "ReadBadSeeker", "manual" + } + + var cnt int + var err error + for { + if _, err = tr.Next(); err != nil { + break + } + cnt++ + if s2 == "manual" { + if _, err = io.Copy(ioutil.Discard, tr); err != nil { + break + } + } + } + if err != v.err { + t.Errorf("test %d, NewReader(%s(...)) with %s discard: got %v, want %v", + i, s1, s2, err, v.err) + } + if cnt != v.cnt { + t.Errorf("test %d, NewReader(%s(...)) with %s discard: got %d headers, want %d headers", + i, s1, s2, cnt, v.cnt) + } + } } } -// Do not panic if there are errors in header blocks after the pax header. -// Issue 11169 -func TestIssue11169(t *testing.T) { - f, err := os.Open("testdata/issue11169.tar") +// TestReadHeaderOnly tests that Reader does not attempt to read special +// header-only files. +func TestReadHeaderOnly(t *testing.T) { + f, err := os.Open("testdata/hdr-only.tar") if err != nil { - t.Fatal(err) + t.Fatalf("unexpected error: %v", err) } defer f.Close() - r := NewReader(f) - _, err = r.Next() - if err == nil { - t.Fatal("Unexpected success") + + var hdrs []*Header + tr := NewReader(f) + for { + hdr, err := tr.Next() + if err == io.EOF { + break + } + if err != nil { + t.Errorf("Next(): got %v, want %v", err, nil) + continue + } + hdrs = append(hdrs, hdr) + + // If a special flag, we should read nothing. + cnt, _ := io.ReadFull(tr, []byte{0}) + if cnt > 0 && hdr.Typeflag != TypeReg { + t.Errorf("ReadFull(...): got %d bytes, want 0 bytes", cnt) + } + } + + // File is crafted with 16 entries. The later 8 are identical to the first + // 8 except that the size is set. + if len(hdrs) != 16 { + t.Fatalf("len(hdrs): got %d, want %d", len(hdrs), 16) + } + for i := 0; i < 8; i++ { + var hdr1, hdr2 = hdrs[i+0], hdrs[i+8] + hdr1.Size, hdr2.Size = 0, 0 + if !reflect.DeepEqual(*hdr1, *hdr2) { + t.Errorf("incorrect header:\ngot %+v\nwant %+v", *hdr1, *hdr2) + } + } +} + +func TestParsePAXRecord(t *testing.T) { + var medName = strings.Repeat("CD", 50) + var longName = strings.Repeat("AB", 100) + + var vectors = []struct { + input string + residual string + outputKey string + outputVal string + ok bool + }{ + {"6 k=v\n\n", "\n", "k", "v", true}, + {"19 path=/etc/hosts\n", "", "path", "/etc/hosts", true}, + {"210 path=" + longName + "\nabc", "abc", "path", longName, true}, + {"110 path=" + medName + "\n", "", "path", medName, true}, + {"9 foo=ba\n", "", "foo", "ba", true}, + {"11 foo=bar\n\x00", "\x00", "foo", "bar", true}, + {"18 foo=b=\nar=\n==\x00\n", "", "foo", "b=\nar=\n==\x00", true}, + {"27 foo=hello9 foo=ba\nworld\n", "", "foo", "hello9 foo=ba\nworld", true}, + {"27 ☺☻☹=日a本b語ç\nmeow mix", "meow mix", "☺☻☹", "日a本b語ç", true}, + {"17 \x00hello=\x00world\n", "", "\x00hello", "\x00world", true}, + {"1 k=1\n", "1 k=1\n", "", "", false}, + {"6 k~1\n", "6 k~1\n", "", "", false}, + {"6_k=1\n", "6_k=1\n", "", "", false}, + {"6 k=1 ", "6 k=1 ", "", "", false}, + {"632 k=1\n", "632 k=1\n", "", "", false}, + {"16 longkeyname=hahaha\n", "16 longkeyname=hahaha\n", "", "", false}, + {"3 somelongkey=\n", "3 somelongkey=\n", "", "", false}, + {"50 tooshort=\n", "50 tooshort=\n", "", "", false}, + } + + for _, v := range vectors { + key, val, res, err := parsePAXRecord(v.input) + ok := (err == nil) + if v.ok != ok { + if v.ok { + t.Errorf("parsePAXRecord(%q): got parsing failure, want success", v.input) + } else { + t.Errorf("parsePAXRecord(%q): got parsing success, want failure", v.input) + } + } + if ok && (key != v.outputKey || val != v.outputVal) { + t.Errorf("parsePAXRecord(%q): got (%q: %q), want (%q: %q)", + v.input, key, val, v.outputKey, v.outputVal) + } + if res != v.residual { + t.Errorf("parsePAXRecord(%q): got residual %q, want residual %q", + v.input, res, v.residual) + } + } +} + +func TestParseNumeric(t *testing.T) { + var vectors = []struct { + input string + output int64 + ok bool + }{ + // Test base-256 (binary) encoded values. + {"", 0, true}, + {"\x80", 0, true}, + {"\x80\x00", 0, true}, + {"\x80\x00\x00", 0, true}, + {"\xbf", (1 << 6) - 1, true}, + {"\xbf\xff", (1 << 14) - 1, true}, + {"\xbf\xff\xff", (1 << 22) - 1, true}, + {"\xff", -1, true}, + {"\xff\xff", -1, true}, + {"\xff\xff\xff", -1, true}, + {"\xc0", -1 * (1 << 6), true}, + {"\xc0\x00", -1 * (1 << 14), true}, + {"\xc0\x00\x00", -1 * (1 << 22), true}, + {"\x87\x76\xa2\x22\xeb\x8a\x72\x61", 537795476381659745, true}, + {"\x80\x00\x00\x00\x07\x76\xa2\x22\xeb\x8a\x72\x61", 537795476381659745, true}, + {"\xf7\x76\xa2\x22\xeb\x8a\x72\x61", -615126028225187231, true}, + {"\xff\xff\xff\xff\xf7\x76\xa2\x22\xeb\x8a\x72\x61", -615126028225187231, true}, + {"\x80\x7f\xff\xff\xff\xff\xff\xff\xff", math.MaxInt64, true}, + {"\x80\x80\x00\x00\x00\x00\x00\x00\x00", 0, false}, + {"\xff\x80\x00\x00\x00\x00\x00\x00\x00", math.MinInt64, true}, + {"\xff\x7f\xff\xff\xff\xff\xff\xff\xff", 0, false}, + {"\xf5\xec\xd1\xc7\x7e\x5f\x26\x48\x81\x9f\x8f\x9b", 0, false}, + + // Test base-8 (octal) encoded values. + {"0000000\x00", 0, true}, + {" \x0000000\x00", 0, true}, + {" \x0000003\x00", 3, true}, + {"00000000227\x00", 0227, true}, + {"032033\x00 ", 032033, true}, + {"320330\x00 ", 0320330, true}, + {"0000660\x00 ", 0660, true}, + {"\x00 0000660\x00 ", 0660, true}, + {"0123456789abcdef", 0, false}, + {"0123456789\x00abcdef", 0, false}, + {"01234567\x0089abcdef", 342391, true}, + {"0123\x7e\x5f\x264123", 0, false}, + } + + for _, v := range vectors { + var p parser + num := p.parseNumeric([]byte(v.input)) + ok := (p.err == nil) + if v.ok != ok { + if v.ok { + t.Errorf("parseNumeric(%q): got parsing failure, want success", v.input) + } else { + t.Errorf("parseNumeric(%q): got parsing success, want failure", v.input) + } + } + if ok && num != v.output { + t.Errorf("parseNumeric(%q): got %d, want %d", v.input, num, v.output) + } } } diff --git a/archive/tar/testdata/gnu-multi-hdrs.tar b/archive/tar/testdata/gnu-multi-hdrs.tar new file mode 100644 index 0000000..8bcad55 Binary files /dev/null and b/archive/tar/testdata/gnu-multi-hdrs.tar differ diff --git a/archive/tar/testdata/hdr-only.tar b/archive/tar/testdata/hdr-only.tar new file mode 100644 index 0000000..f250340 Binary files /dev/null and b/archive/tar/testdata/hdr-only.tar differ diff --git a/archive/tar/testdata/issue12435.tar b/archive/tar/testdata/issue12435.tar new file mode 100644 index 0000000..3542dd8 Binary files /dev/null and b/archive/tar/testdata/issue12435.tar differ diff --git a/archive/tar/testdata/neg-size.tar b/archive/tar/testdata/neg-size.tar index 5deea3d..21edf38 100644 Binary files a/archive/tar/testdata/neg-size.tar and b/archive/tar/testdata/neg-size.tar differ diff --git a/archive/tar/testdata/pax-multi-hdrs.tar b/archive/tar/testdata/pax-multi-hdrs.tar new file mode 100644 index 0000000..14bc759 Binary files /dev/null and b/archive/tar/testdata/pax-multi-hdrs.tar differ diff --git a/archive/tar/testdata/pax-path-hdr.tar b/archive/tar/testdata/pax-path-hdr.tar new file mode 100644 index 0000000..ab8fc32 Binary files /dev/null and b/archive/tar/testdata/pax-path-hdr.tar differ diff --git a/archive/tar/testdata/ustar-file-reg.tar b/archive/tar/testdata/ustar-file-reg.tar new file mode 100644 index 0000000..c84fa27 Binary files /dev/null and b/archive/tar/testdata/ustar-file-reg.tar differ diff --git a/archive/tar/writer.go b/archive/tar/writer.go index 9dbc01a..0426381 100644 --- a/archive/tar/writer.go +++ b/archive/tar/writer.go @@ -12,8 +12,8 @@ import ( "errors" "fmt" "io" - "os" "path" + "sort" "strconv" "strings" "time" @@ -23,7 +23,6 @@ var ( ErrWriteTooLong = errors.New("archive/tar: write too long") ErrFieldTooLong = errors.New("archive/tar: header field too long") ErrWriteAfterClose = errors.New("archive/tar: write after close") - errNameTooLong = errors.New("archive/tar: name too long") errInvalidHeader = errors.New("archive/tar: header field too long or contains invalid values") ) @@ -43,6 +42,10 @@ type Writer struct { paxHdrBuff [blockSize]byte // buffer to use in writeHeader when writing a pax header } +type formatter struct { + err error // Last error seen +} + // NewWriter creates a new Writer writing to w. func NewWriter(w io.Writer) *Writer { return &Writer{w: w} } @@ -69,17 +72,9 @@ func (tw *Writer) Flush() error { } // Write s into b, terminating it with a NUL if there is room. -// If the value is too long for the field and allowPax is true add a paxheader record instead -func (tw *Writer) cString(b []byte, s string, allowPax bool, paxKeyword string, paxHeaders map[string]string) { - needsPaxHeader := allowPax && len(s) > len(b) || !isASCII(s) - if needsPaxHeader { - paxHeaders[paxKeyword] = s - return - } +func (f *formatter) formatString(b []byte, s string) { if len(s) > len(b) { - if tw.err == nil { - tw.err = ErrFieldTooLong - } + f.err = ErrFieldTooLong return } ascii := toASCII(s) @@ -90,40 +85,40 @@ func (tw *Writer) cString(b []byte, s string, allowPax bool, paxKeyword string, } // Encode x as an octal ASCII string and write it into b with leading zeros. -func (tw *Writer) octal(b []byte, x int64) { +func (f *formatter) formatOctal(b []byte, x int64) { s := strconv.FormatInt(x, 8) // leading zeros, but leave room for a NUL. for len(s)+1 < len(b) { s = "0" + s } - tw.cString(b, s, false, paxNone, nil) + f.formatString(b, s) } -// Write x into b, either as octal or as binary (GNUtar/star extension). -// If the value is too long for the field and writingPax is enabled both for the field and the add a paxheader record instead -func (tw *Writer) numeric(b []byte, x int64, allowPax bool, paxKeyword string, paxHeaders map[string]string) { - // Try octal first. - s := strconv.FormatInt(x, 8) - if len(s) < len(b) { - tw.octal(b, x) +// fitsInBase256 reports whether x can be encoded into n bytes using base-256 +// encoding. Unlike octal encoding, base-256 encoding does not require that the +// string ends with a NUL character. Thus, all n bytes are available for output. +// +// If operating in binary mode, this assumes strict GNU binary mode; which means +// that the first byte can only be either 0x80 or 0xff. Thus, the first byte is +// equivalent to the sign bit in two's complement form. +func fitsInBase256(n int, x int64) bool { + var binBits = uint(n-1) * 8 + return n >= 9 || (x >= -1<= 0; i-- { + b[i] = byte(x) + x >>= 8 + } + b[0] |= 0x80 // Highest bit indicates binary format return } - // If it is too long for octal, and pax is preferred, use a pax header - if allowPax && tw.preferPax { - tw.octal(b, 0) - s := strconv.FormatInt(x, 10) - paxHeaders[paxKeyword] = s - return - } - - // Too big: use binary (big-endian). - tw.usedBinary = true - for i := len(b) - 1; x > 0 && i >= 0; i-- { - b[i] = byte(x) - x >>= 8 - } - b[0] |= 0x80 // highest bit indicates binary format + f.formatOctal(b, 0) // Last resort, just write zero + f.err = ErrFieldTooLong } var ( @@ -162,6 +157,7 @@ func (tw *Writer) writeHeader(hdr *Header, allowPax bool) error { // subsecond time resolution, but for now let's just capture // too long fields or non ascii characters + var f formatter var header []byte // We need to select which scratch buffer to use carefully, @@ -176,10 +172,40 @@ func (tw *Writer) writeHeader(hdr *Header, allowPax bool) error { copy(header, zeroBlock) s := slicer(header) + // Wrappers around formatter that automatically sets paxHeaders if the + // argument extends beyond the capacity of the input byte slice. + var formatString = func(b []byte, s string, paxKeyword string) { + needsPaxHeader := paxKeyword != paxNone && len(s) > len(b) || !isASCII(s) + if needsPaxHeader { + paxHeaders[paxKeyword] = s + return + } + f.formatString(b, s) + } + var formatNumeric = func(b []byte, x int64, paxKeyword string) { + // Try octal first. + s := strconv.FormatInt(x, 8) + if len(s) < len(b) { + f.formatOctal(b, x) + return + } + + // If it is too long for octal, and PAX is preferred, use a PAX header. + if paxKeyword != paxNone && tw.preferPax { + f.formatOctal(b, 0) + s := strconv.FormatInt(x, 10) + paxHeaders[paxKeyword] = s + return + } + + tw.usedBinary = true + f.formatNumeric(b, x) + } + // keep a reference to the filename to allow to overwrite it later if we detect that we can use ustar longnames instead of pax pathHeaderBytes := s.next(fileNameSize) - tw.cString(pathHeaderBytes, hdr.Name, true, paxPath, paxHeaders) + formatString(pathHeaderBytes, hdr.Name, paxPath) // Handle out of range ModTime carefully. var modTime int64 @@ -187,25 +213,25 @@ func (tw *Writer) writeHeader(hdr *Header, allowPax bool) error { modTime = hdr.ModTime.Unix() } - tw.octal(s.next(8), hdr.Mode) // 100:108 - tw.numeric(s.next(8), int64(hdr.Uid), true, paxUid, paxHeaders) // 108:116 - tw.numeric(s.next(8), int64(hdr.Gid), true, paxGid, paxHeaders) // 116:124 - tw.numeric(s.next(12), hdr.Size, true, paxSize, paxHeaders) // 124:136 - tw.numeric(s.next(12), modTime, false, paxNone, nil) // 136:148 --- consider using pax for finer granularity - s.next(8) // chksum (148:156) - s.next(1)[0] = hdr.Typeflag // 156:157 + f.formatOctal(s.next(8), hdr.Mode) // 100:108 + formatNumeric(s.next(8), int64(hdr.Uid), paxUid) // 108:116 + formatNumeric(s.next(8), int64(hdr.Gid), paxGid) // 116:124 + formatNumeric(s.next(12), hdr.Size, paxSize) // 124:136 + formatNumeric(s.next(12), modTime, paxNone) // 136:148 --- consider using pax for finer granularity + s.next(8) // chksum (148:156) + s.next(1)[0] = hdr.Typeflag // 156:157 - tw.cString(s.next(100), hdr.Linkname, true, paxLinkpath, paxHeaders) + formatString(s.next(100), hdr.Linkname, paxLinkpath) - copy(s.next(8), []byte("ustar\x0000")) // 257:265 - tw.cString(s.next(32), hdr.Uname, true, paxUname, paxHeaders) // 265:297 - tw.cString(s.next(32), hdr.Gname, true, paxGname, paxHeaders) // 297:329 - tw.numeric(s.next(8), hdr.Devmajor, false, paxNone, nil) // 329:337 - tw.numeric(s.next(8), hdr.Devminor, false, paxNone, nil) // 337:345 + copy(s.next(8), []byte("ustar\x0000")) // 257:265 + formatString(s.next(32), hdr.Uname, paxUname) // 265:297 + formatString(s.next(32), hdr.Gname, paxGname) // 297:329 + formatNumeric(s.next(8), hdr.Devmajor, paxNone) // 329:337 + formatNumeric(s.next(8), hdr.Devminor, paxNone) // 337:345 // keep a reference to the prefix to allow to overwrite it later if we detect that we can use ustar longnames instead of pax prefixHeaderBytes := s.next(155) - tw.cString(prefixHeaderBytes, "", false, paxNone, nil) // 345:500 prefix + formatString(prefixHeaderBytes, "", paxNone) // 345:500 prefix // Use the GNU magic instead of POSIX magic if we used any GNU extensions. if tw.usedBinary { @@ -215,37 +241,26 @@ func (tw *Writer) writeHeader(hdr *Header, allowPax bool) error { _, paxPathUsed := paxHeaders[paxPath] // try to use a ustar header when only the name is too long if !tw.preferPax && len(paxHeaders) == 1 && paxPathUsed { - suffix := hdr.Name - prefix := "" - if len(hdr.Name) > fileNameSize && isASCII(hdr.Name) { - var err error - prefix, suffix, err = tw.splitUSTARLongName(hdr.Name) - if err == nil { - // ok we can use a ustar long name instead of pax, now correct the fields + prefix, suffix, ok := splitUSTARPath(hdr.Name) + if ok { + // Since we can encode in USTAR format, disable PAX header. + delete(paxHeaders, paxPath) - // remove the path field from the pax header. this will suppress the pax header - delete(paxHeaders, paxPath) - - // update the path fields - tw.cString(pathHeaderBytes, suffix, false, paxNone, nil) - tw.cString(prefixHeaderBytes, prefix, false, paxNone, nil) - - // Use the ustar magic if we used ustar long names. - if len(prefix) > 0 && !tw.usedBinary { - copy(header[257:265], []byte("ustar\x00")) - } - } + // Update the path fields + formatString(pathHeaderBytes, suffix, paxNone) + formatString(prefixHeaderBytes, prefix, paxNone) } } // The chksum field is terminated by a NUL and a space. // This is different from the other octal fields. chksum, _ := checksum(header) - tw.octal(header[148:155], chksum) + f.formatOctal(header[148:155], chksum) // Never fails header[155] = ' ' - if tw.err != nil { - // problem with header; probably integer too big for a field. + // Check if there were any formatting errors. + if f.err != nil { + tw.err = f.err return tw.err } @@ -270,28 +285,25 @@ func (tw *Writer) writeHeader(hdr *Header, allowPax bool) error { return tw.err } -// writeUSTARLongName splits a USTAR long name hdr.Name. -// name must be < 256 characters. errNameTooLong is returned -// if hdr.Name can't be split. The splitting heuristic -// is compatible with gnu tar. -func (tw *Writer) splitUSTARLongName(name string) (prefix, suffix string, err error) { +// splitUSTARPath splits a path according to USTAR prefix and suffix rules. +// If the path is not splittable, then it will return ("", "", false). +func splitUSTARPath(name string) (prefix, suffix string, ok bool) { length := len(name) - if length > fileNamePrefixSize+1 { + if length <= fileNameSize || !isASCII(name) { + return "", "", false + } else if length > fileNamePrefixSize+1 { length = fileNamePrefixSize + 1 } else if name[length-1] == '/' { length-- } + i := strings.LastIndex(name[:length], "/") - // nlen contains the resulting length in the name field. - // plen contains the resulting length in the prefix field. - nlen := len(name) - i - 1 - plen := i + nlen := len(name) - i - 1 // nlen is length of suffix + plen := i // plen is length of prefix if i <= 0 || nlen > fileNameSize || nlen == 0 || plen > fileNamePrefixSize { - err = errNameTooLong - return + return "", "", false } - prefix, suffix = name[:i], name[i+1:] - return + return name[:i], name[i+1:], true } // writePaxHeader writes an extended pax header to the @@ -304,11 +316,11 @@ func (tw *Writer) writePAXHeader(hdr *Header, paxHeaders map[string]string) erro // succeed, and seems harmless enough. ext.ModTime = hdr.ModTime // The spec asks that we namespace our pseudo files - // with the current pid. - pid := os.Getpid() + // with the current pid. However, this results in differing outputs + // for identical inputs. As such, the constant 0 is now used instead. + // golang.org/issue/12358 dir, file := path.Split(hdr.Name) - fullName := path.Join(dir, - fmt.Sprintf("PaxHeaders.%d", pid), file) + fullName := path.Join(dir, "PaxHeaders.0", file) ascii := toASCII(fullName) if len(ascii) > 100 { @@ -318,8 +330,15 @@ func (tw *Writer) writePAXHeader(hdr *Header, paxHeaders map[string]string) erro // Construct the body var buf bytes.Buffer - for k, v := range paxHeaders { - fmt.Fprint(&buf, paxHeader(k+"="+v)) + // Keys are sorted before writing to body to allow deterministic output. + var keys []string + for k := range paxHeaders { + keys = append(keys, k) + } + sort.Strings(keys) + + for _, k := range keys { + fmt.Fprint(&buf, formatPAXRecord(k, paxHeaders[k])) } ext.Size = int64(len(buf.Bytes())) @@ -335,17 +354,18 @@ func (tw *Writer) writePAXHeader(hdr *Header, paxHeaders map[string]string) erro return nil } -// paxHeader formats a single pax record, prefixing it with the appropriate length -func paxHeader(msg string) string { - const padding = 2 // Extra padding for space and newline - size := len(msg) + padding +// formatPAXRecord formats a single PAX record, prefixing it with the +// appropriate length. +func formatPAXRecord(k, v string) string { + const padding = 3 // Extra padding for ' ', '=', and '\n' + size := len(k) + len(v) + padding size += len(strconv.Itoa(size)) - record := fmt.Sprintf("%d %s\n", size, msg) + record := fmt.Sprintf("%d %s=%s\n", size, k, v) + + // Final adjustment if adding size field increased the record size. if len(record) != size { - // Final adjustment if adding size increased - // the number of digits in size size = len(record) - record = fmt.Sprintf("%d %s\n", size, msg) + record = fmt.Sprintf("%d %s=%s\n", size, k, v) } return record } diff --git a/archive/tar/writer_test.go b/archive/tar/writer_test.go index fe46a67..6e91d90 100644 --- a/archive/tar/writer_test.go +++ b/archive/tar/writer_test.go @@ -9,8 +9,10 @@ import ( "fmt" "io" "io/ioutil" + "math" "os" "reflect" + "sort" "strings" "testing" "testing/iotest" @@ -291,7 +293,7 @@ func TestPax(t *testing.T) { t.Fatal(err) } // Simple test to make sure PAX extensions are in effect - if !bytes.Contains(buf.Bytes(), []byte("PaxHeaders.")) { + if !bytes.Contains(buf.Bytes(), []byte("PaxHeaders.0")) { t.Fatal("Expected at least one PAX header to be written.") } // Test that we can get a long name back out of the archive. @@ -330,7 +332,7 @@ func TestPaxSymlink(t *testing.T) { t.Fatal(err) } // Simple test to make sure PAX extensions are in effect - if !bytes.Contains(buf.Bytes(), []byte("PaxHeaders.")) { + if !bytes.Contains(buf.Bytes(), []byte("PaxHeaders.0")) { t.Fatal("Expected at least one PAX header to be written.") } // Test that we can get a long name back out of the archive. @@ -380,7 +382,7 @@ func TestPaxNonAscii(t *testing.T) { t.Fatal(err) } // Simple test to make sure PAX extensions are in effect - if !bytes.Contains(buf.Bytes(), []byte("PaxHeaders.")) { + if !bytes.Contains(buf.Bytes(), []byte("PaxHeaders.0")) { t.Fatal("Expected at least one PAX header to be written.") } // Test that we can get a long name back out of the archive. @@ -439,21 +441,49 @@ func TestPaxXattrs(t *testing.T) { } } -func TestPAXHeader(t *testing.T) { - medName := strings.Repeat("CD", 50) - longName := strings.Repeat("AB", 100) - paxTests := [][2]string{ - {paxPath + "=/etc/hosts", "19 path=/etc/hosts\n"}, - {"a=b", "6 a=b\n"}, // Single digit length - {"a=names", "11 a=names\n"}, // Test case involving carries - {paxPath + "=" + longName, fmt.Sprintf("210 path=%s\n", longName)}, - {paxPath + "=" + medName, fmt.Sprintf("110 path=%s\n", medName)}} +func TestPaxHeadersSorted(t *testing.T) { + fileinfo, err := os.Stat("testdata/small.txt") + if err != nil { + t.Fatal(err) + } + hdr, err := FileInfoHeader(fileinfo, "") + if err != nil { + t.Fatalf("os.Stat: %v", err) + } + contents := strings.Repeat(" ", int(hdr.Size)) - for _, test := range paxTests { - key, expected := test[0], test[1] - if result := paxHeader(key); result != expected { - t.Fatalf("paxHeader: got %s, expected %s", result, expected) - } + hdr.Xattrs = map[string]string{ + "foo": "foo", + "bar": "bar", + "baz": "baz", + "qux": "qux", + } + + var buf bytes.Buffer + writer := NewWriter(&buf) + if err := writer.WriteHeader(hdr); err != nil { + t.Fatal(err) + } + if _, err = writer.Write([]byte(contents)); err != nil { + t.Fatal(err) + } + if err := writer.Close(); err != nil { + t.Fatal(err) + } + // Simple test to make sure PAX extensions are in effect + if !bytes.Contains(buf.Bytes(), []byte("PaxHeaders.0")) { + t.Fatal("Expected at least one PAX header to be written.") + } + + // xattr bar should always appear before others + indices := []int{ + bytes.Index(buf.Bytes(), []byte("bar=bar")), + bytes.Index(buf.Bytes(), []byte("baz=baz")), + bytes.Index(buf.Bytes(), []byte("foo=foo")), + bytes.Index(buf.Bytes(), []byte("qux=qux")), + } + if !sort.IntsAreSorted(indices) { + t.Fatal("PAX headers are not sorted") } } @@ -544,3 +574,149 @@ func TestWriteAfterClose(t *testing.T) { t.Fatalf("Write: got %v; want ErrWriteAfterClose", err) } } + +func TestSplitUSTARPath(t *testing.T) { + var sr = strings.Repeat + + var vectors = []struct { + input string // Input path + prefix string // Expected output prefix + suffix string // Expected output suffix + ok bool // Split success? + }{ + {"", "", "", false}, + {"abc", "", "", false}, + {"用戶名", "", "", false}, + {sr("a", fileNameSize), "", "", false}, + {sr("a", fileNameSize) + "/", "", "", false}, + {sr("a", fileNameSize) + "/a", sr("a", fileNameSize), "a", true}, + {sr("a", fileNamePrefixSize) + "/", "", "", false}, + {sr("a", fileNamePrefixSize) + "/a", sr("a", fileNamePrefixSize), "a", true}, + {sr("a", fileNameSize+1), "", "", false}, + {sr("/", fileNameSize+1), sr("/", fileNameSize-1), "/", true}, + {sr("a", fileNamePrefixSize) + "/" + sr("b", fileNameSize), + sr("a", fileNamePrefixSize), sr("b", fileNameSize), true}, + {sr("a", fileNamePrefixSize) + "//" + sr("b", fileNameSize), "", "", false}, + {sr("a/", fileNameSize), sr("a/", 77) + "a", sr("a/", 22), true}, + } + + for _, v := range vectors { + prefix, suffix, ok := splitUSTARPath(v.input) + if prefix != v.prefix || suffix != v.suffix || ok != v.ok { + t.Errorf("splitUSTARPath(%q):\ngot (%q, %q, %v)\nwant (%q, %q, %v)", + v.input, prefix, suffix, ok, v.prefix, v.suffix, v.ok) + } + } +} + +func TestFormatPAXRecord(t *testing.T) { + var medName = strings.Repeat("CD", 50) + var longName = strings.Repeat("AB", 100) + + var vectors = []struct { + inputKey string + inputVal string + output string + }{ + {"k", "v", "6 k=v\n"}, + {"path", "/etc/hosts", "19 path=/etc/hosts\n"}, + {"path", longName, "210 path=" + longName + "\n"}, + {"path", medName, "110 path=" + medName + "\n"}, + {"foo", "ba", "9 foo=ba\n"}, + {"foo", "bar", "11 foo=bar\n"}, + {"foo", "b=\nar=\n==\x00", "18 foo=b=\nar=\n==\x00\n"}, + {"foo", "hello9 foo=ba\nworld", "27 foo=hello9 foo=ba\nworld\n"}, + {"☺☻☹", "日a本b語ç", "27 ☺☻☹=日a本b語ç\n"}, + {"\x00hello", "\x00world", "17 \x00hello=\x00world\n"}, + } + + for _, v := range vectors { + output := formatPAXRecord(v.inputKey, v.inputVal) + if output != v.output { + t.Errorf("formatPAXRecord(%q, %q): got %q, want %q", + v.inputKey, v.inputVal, output, v.output) + } + } +} + +func TestFitsInBase256(t *testing.T) { + var vectors = []struct { + input int64 + width int + ok bool + }{ + {+1, 8, true}, + {0, 8, true}, + {-1, 8, true}, + {1 << 56, 8, false}, + {(1 << 56) - 1, 8, true}, + {-1 << 56, 8, true}, + {(-1 << 56) - 1, 8, false}, + {121654, 8, true}, + {-9849849, 8, true}, + {math.MaxInt64, 9, true}, + {0, 9, true}, + {math.MinInt64, 9, true}, + {math.MaxInt64, 12, true}, + {0, 12, true}, + {math.MinInt64, 12, true}, + } + + for _, v := range vectors { + ok := fitsInBase256(v.width, v.input) + if ok != v.ok { + t.Errorf("checkNumeric(%d, %d): got %v, want %v", v.input, v.width, ok, v.ok) + } + } +} + +func TestFormatNumeric(t *testing.T) { + var vectors = []struct { + input int64 + output string + ok bool + }{ + // Test base-256 (binary) encoded values. + {-1, "\xff", true}, + {-1, "\xff\xff", true}, + {-1, "\xff\xff\xff", true}, + {(1 << 0), "0", false}, + {(1 << 8) - 1, "\x80\xff", true}, + {(1 << 8), "0\x00", false}, + {(1 << 16) - 1, "\x80\xff\xff", true}, + {(1 << 16), "00\x00", false}, + {-1 * (1 << 0), "\xff", true}, + {-1*(1<<0) - 1, "0", false}, + {-1 * (1 << 8), "\xff\x00", true}, + {-1*(1<<8) - 1, "0\x00", false}, + {-1 * (1 << 16), "\xff\x00\x00", true}, + {-1*(1<<16) - 1, "00\x00", false}, + {537795476381659745, "0000000\x00", false}, + {537795476381659745, "\x80\x00\x00\x00\x07\x76\xa2\x22\xeb\x8a\x72\x61", true}, + {-615126028225187231, "0000000\x00", false}, + {-615126028225187231, "\xff\xff\xff\xff\xf7\x76\xa2\x22\xeb\x8a\x72\x61", true}, + {math.MaxInt64, "0000000\x00", false}, + {math.MaxInt64, "\x80\x00\x00\x00\x7f\xff\xff\xff\xff\xff\xff\xff", true}, + {math.MinInt64, "0000000\x00", false}, + {math.MinInt64, "\xff\xff\xff\xff\x80\x00\x00\x00\x00\x00\x00\x00", true}, + {math.MaxInt64, "\x80\x7f\xff\xff\xff\xff\xff\xff\xff", true}, + {math.MinInt64, "\xff\x80\x00\x00\x00\x00\x00\x00\x00", true}, + } + + for _, v := range vectors { + var f formatter + output := make([]byte, len(v.output)) + f.formatNumeric(output, v.input) + ok := (f.err == nil) + if ok != v.ok { + if v.ok { + t.Errorf("formatNumeric(%d): got formatting failure, want success", v.input) + } else { + t.Errorf("formatNumeric(%d): got formatting success, want failure", v.input) + } + } + if string(output) != v.output { + t.Errorf("formatNumeric(%d): got %q, want %q", v.input, output, v.output) + } + } +}