diff --git a/.travis.yml b/.travis.yml index dcce57a..a053d3b 100644 --- a/.travis.yml +++ b/.travis.yml @@ -1,17 +1,17 @@ language: go go: - tip - - 1.x - - 1.8.x - - 1.7.x - - 1.6.x - - 1.5.x + - 1.5.1 + - 1.4.3 + - 1.3.3 + - 1.2.2 # let us have pretty, fast Docker-based Travis workers! sudo: false install: - go get -d ./... + - go get golang.org/x/tools/cmd/vet script: - go test -v ./... diff --git a/LICENSE b/LICENSE index ca03685..8ba5491 100644 --- a/LICENSE +++ b/LICENSE @@ -1,28 +1,19 @@ Copyright (c) 2015 Vincent Batts, Raleigh, NC, USA -All rights reserved. +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: -Redistribution and use in source and binary forms, with or without -modification, are permitted provided that the following conditions are met: +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. -1. Redistributions of source code must retain the above copyright notice, this -list of conditions and the following disclaimer. - -2. Redistributions in binary form must reproduce the above copyright notice, -this list of conditions and the following disclaimer in the documentation -and/or other materials provided with the distribution. - -3. Neither the name of the copyright holder nor the names of its contributors -may be used to endorse or promote products derived from this software without -specific prior written permission. - -THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND -ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED -WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE -DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE -FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL -DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR -SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER -CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, -OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE -OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. diff --git a/README.md b/README.md index c2e7f48..b89afe5 100644 --- a/README.md +++ b/README.md @@ -1,7 +1,6 @@ # tar-split [![Build Status](https://travis-ci.org/vbatts/tar-split.svg?branch=master)](https://travis-ci.org/vbatts/tar-split) -[![Go Report Card](https://goreportcard.com/badge/github.com/vbatts/tar-split)](https://goreportcard.com/report/github.com/vbatts/tar-split) Pristinely disassembling a tar archive, and stashing needed raw bytes and offsets to reassemble a validating original archive. @@ -26,23 +25,6 @@ go get github.com/vbatts/tar-split/cmd/tar-split For cli usage, see its [README.md](cmd/tar-split/README.md). For the library see the [docs](#docs) -## Demo - -### Basic disassembly and assembly - -This demonstrates the `tar-split` command and how to assemble a tar archive from the `tar-data.json.gz` - - -![basic cmd demo thumbnail](https://i.ytimg.com/vi/vh5wyjIOBtc/2.jpg?time=1445027151805) -[youtube video of basic command demo](https://youtu.be/vh5wyjIOBtc) - -### Docker layer preservation - -This demonstrates the tar-split integration for docker-1.8. Providing consistent tar archives for the image layer content. - -![docker tar-split demo](https://i.ytimg.com/vi_webp/vh5wyjIOBtc/default.webp) -[youtube vide of docker layer checksums](https://youtu.be/tV_Dia8E8xw) - ## Caveat Eventually this should detect TARs that this is not possible with. @@ -67,7 +49,7 @@ Do not break the API of stdlib `archive/tar` in our fork (ideally find an upstre ## Std Version -The version of golang stdlib `archive/tar` is from go1.6 +The version of golang stdlib `archive/tar` is from go1.4.1, and their master branch around [a9dddb53f](https://github.com/golang/go/tree/a9dddb53f). It is minimally extended to expose the raw bytes of the TAR, rather than just the marshalled headers and file stream. diff --git a/archive/tar/common.go b/archive/tar/common.go index 36f4e23..c31df06 100644 --- a/archive/tar/common.go +++ b/archive/tar/common.go @@ -327,14 +327,3 @@ func toASCII(s string) string { } return buf.String() } - -// isHeaderOnlyType checks if the given type flag is of the type that has no -// data section even if a size is specified. -func isHeaderOnlyType(flag byte) bool { - switch flag { - case TypeLink, TypeSymlink, TypeChar, TypeBlock, TypeDir, TypeFifo: - return true - default: - return false - } -} diff --git a/archive/tar/example_test.go b/archive/tar/example_test.go index 5f0ce2f..2317f44 100644 --- a/archive/tar/example_test.go +++ b/archive/tar/example_test.go @@ -26,7 +26,7 @@ func Example() { }{ {"readme.txt", "This archive contains some text files."}, {"gopher.txt", "Gopher names:\nGeorge\nGeoffrey\nGonzo"}, - {"todo.txt", "Get animal handling license."}, + {"todo.txt", "Get animal handling licence."}, } for _, file := range files { hdr := &tar.Header{ @@ -76,5 +76,5 @@ func Example() { // Geoffrey // Gonzo // Contents of todo.txt: - // Get animal handling license. + // Get animal handling licence. } diff --git a/archive/tar/reader.go b/archive/tar/reader.go index adf3212..4168ea2 100644 --- a/archive/tar/reader.go +++ b/archive/tar/reader.go @@ -12,7 +12,6 @@ import ( "errors" "io" "io/ioutil" - "math" "os" "strconv" "strings" @@ -40,10 +39,6 @@ type Reader struct { rawBytes *bytes.Buffer // last raw bits } -type parser struct { - err error // Last error seen -} - // RawBytes accesses the raw bytes of the archive, apart from the file payload itself. // This includes the header and padding. // @@ -75,36 +70,12 @@ type regFileReader struct { nb int64 // number of unread bytes for current file entry } -// A sparseFileReader is a numBytesReader for reading sparse file data from a -// tar archive. +// A sparseFileReader is a numBytesReader for reading sparse file data from a tar archive. type sparseFileReader struct { - rfr numBytesReader // Reads the sparse-encoded file data - sp []sparseEntry // The sparse map for the file - pos int64 // Keeps track of file position - total int64 // Total size of the file -} - -// A sparseEntry holds a single entry in a sparse file's sparse map. -// -// Sparse files are represented using a series of sparseEntrys. -// Despite the name, a sparseEntry represents an actual data fragment that -// references data found in the underlying archive stream. All regions not -// covered by a sparseEntry are logically filled with zeros. -// -// For example, if the underlying raw file contains the 10-byte data: -// var compactData = "abcdefgh" -// -// And the sparse map has the following entries: -// var sp = []sparseEntry{ -// {offset: 2, numBytes: 5} // Data fragment for [2..7] -// {offset: 18, numBytes: 3} // Data fragment for [18..21] -// } -// -// Then the content of the resulting sparse file with a "real" size of 25 is: -// var sparseData = "\x00"*2 + "abcde" + "\x00"*11 + "fgh" + "\x00"*4 -type sparseEntry struct { - offset int64 // Starting position of the fragment - numBytes int64 // Length of the fragment + rfr *regFileReader // reads the sparse-encoded file data + sp []sparseEntry // the sparse map for the file + pos int64 // keeps track of file position + tot int64 // total size of the file } // Keywords for GNU sparse files in a PAX extended header @@ -138,6 +109,7 @@ func NewReader(r io.Reader) *Reader { return &Reader{r: r} } // // io.EOF is returned at the end of the input. func (tr *Reader) Next() (*Header, error) { + var hdr *Header if tr.RawAccounting { if tr.rawBytes == nil { tr.rawBytes = bytes.NewBuffer(nil) @@ -145,88 +117,98 @@ func (tr *Reader) Next() (*Header, error) { tr.rawBytes.Reset() } } - - if tr.err != nil { - return nil, tr.err + if tr.err == nil { + tr.skipUnread() } - - var hdr *Header - var extHdrs map[string]string - - // Externally, Next iterates through the tar archive as if it is a series of - // files. Internally, the tar format often uses fake "files" to add meta - // data that describes the next file. These meta data "files" should not - // normally be visible to the outside. As such, this loop iterates through - // one or more "header files" until it finds a "normal file". -loop: - for { - tr.err = tr.skipUnread() + if tr.err != nil { + return hdr, tr.err + } + hdr = tr.readHeader() + if hdr == nil { + return hdr, tr.err + } + // Check for PAX/GNU header. + switch hdr.Typeflag { + case TypeXHeader: + // PAX extended header + headers, err := parsePAX(tr) + if err != nil { + return nil, err + } + // We actually read the whole file, + // but this skips alignment padding + tr.skipUnread() if tr.err != nil { return nil, tr.err } - hdr = tr.readHeader() - if tr.err != nil { + if hdr == nil { return nil, tr.err } - // Check for PAX/GNU special headers and files. - switch hdr.Typeflag { - case TypeXHeader: - extHdrs, tr.err = parsePAX(tr) - if tr.err != nil { - return nil, tr.err - } - continue loop // This is a meta header affecting the next header - case TypeGNULongName, TypeGNULongLink: - var realname []byte - realname, tr.err = ioutil.ReadAll(tr) - if tr.err != nil { - return nil, tr.err - } + mergePAX(hdr, headers) - if tr.RawAccounting { - if _, tr.err = tr.rawBytes.Write(realname); tr.err != nil { - return nil, tr.err - } - } - - // Convert GNU extensions to use PAX headers. - if extHdrs == nil { - extHdrs = make(map[string]string) - } - var p parser - switch hdr.Typeflag { - case TypeGNULongName: - extHdrs[paxPath] = p.parseString(realname) - case TypeGNULongLink: - extHdrs[paxLinkpath] = p.parseString(realname) - } - if p.err != nil { - tr.err = p.err - return nil, tr.err - } - continue loop // This is a meta header affecting the next header - default: - mergePAX(hdr, extHdrs) - - // Check for a PAX format sparse file - sp, err := tr.checkForGNUSparsePAXHeaders(hdr, extHdrs) - if err != nil { - tr.err = err + // Check for a PAX format sparse file + sp, err := tr.checkForGNUSparsePAXHeaders(hdr, headers) + if err != nil { + tr.err = err + return nil, err + } + if sp != nil { + // Current file is a PAX format GNU sparse file. + // Set the current file reader to a sparse file reader. + tr.curr = &sparseFileReader{rfr: tr.curr.(*regFileReader), sp: sp, tot: hdr.Size} + } + return hdr, nil + case TypeGNULongName: + // We have a GNU long name header. Its contents are the real file name. + realname, err := ioutil.ReadAll(tr) + if err != nil { + return nil, err + } + var buf []byte + if tr.RawAccounting { + if _, err = tr.rawBytes.Write(realname); err != nil { return nil, err } - if sp != nil { - // Current file is a PAX format GNU sparse file. - // Set the current file reader to a sparse file reader. - tr.curr, tr.err = newSparseFileReader(tr.curr, sp, hdr.Size) - if tr.err != nil { - return nil, tr.err - } - } - break loop // This is a file, so stop + buf = make([]byte, tr.rawBytes.Len()) + copy(buf[:], tr.RawBytes()) } + hdr, err := tr.Next() + // since the above call to Next() resets the buffer, we need to throw the bytes over + if tr.RawAccounting { + buf = append(buf, tr.RawBytes()...) + if _, err = tr.rawBytes.Write(buf); err != nil { + return nil, err + } + } + hdr.Name = cString(realname) + return hdr, err + case TypeGNULongLink: + // We have a GNU long link header. + realname, err := ioutil.ReadAll(tr) + if err != nil { + return nil, err + } + var buf []byte + if tr.RawAccounting { + if _, err = tr.rawBytes.Write(realname); err != nil { + return nil, err + } + buf = make([]byte, tr.rawBytes.Len()) + copy(buf[:], tr.RawBytes()) + } + hdr, err := tr.Next() + // since the above call to Next() resets the buffer, we need to throw the bytes over + if tr.RawAccounting { + buf = append(buf, tr.RawBytes()...) + if _, err = tr.rawBytes.Write(buf); err != nil { + return nil, err + } + } + hdr.Linkname = cString(realname) + return hdr, err } - return hdr, nil + return hdr, tr.err } // checkForGNUSparsePAXHeaders checks the PAX headers for GNU sparse headers. If they are found, then @@ -403,7 +385,6 @@ func parsePAX(r io.Reader) (map[string]string, error) { return nil, err } } - sbuf := string(buf) // For GNU PAX sparse format 0.0 support. // This function transforms the sparse format 0.0 headers into sparse format 0.1 headers. @@ -412,17 +393,35 @@ func parsePAX(r io.Reader) (map[string]string, error) { headers := make(map[string]string) // Each record is constructed as // "%d %s=%s\n", length, keyword, value - for len(sbuf) > 0 { - key, value, residual, err := parsePAXRecord(sbuf) - if err != nil { + for len(buf) > 0 { + // or the header was empty to start with. + var sp int + // The size field ends at the first space. + sp = bytes.IndexByte(buf, ' ') + if sp == -1 { return nil, ErrHeader } - sbuf = residual + // Parse the first token as a decimal integer. + n, err := strconv.ParseInt(string(buf[:sp]), 10, 0) + if err != nil || n < 5 || int64(len(buf)) < n { + return nil, ErrHeader + } + // Extract everything between the decimal and the n -1 on the + // beginning to eat the ' ', -1 on the end to skip the newline. + var record []byte + record, buf = buf[sp+1:n-1], buf[n:] + // The first equals is guaranteed to mark the end of the key. + // Everything else is value. + eq := bytes.IndexByte(record, '=') + if eq == -1 { + return nil, ErrHeader + } + key, value := record[:eq], record[eq+1:] keyStr := string(key) if keyStr == paxGNUSparseOffset || keyStr == paxGNUSparseNumBytes { // GNU sparse format 0.0 special key. Write to sparseMap instead of using the headers map. - sparseMap.WriteString(value) + sparseMap.Write(value) sparseMap.Write([]byte{','}) } else { // Normal key. Set the value in the headers map. @@ -437,42 +436,9 @@ func parsePAX(r io.Reader) (map[string]string, error) { return headers, nil } -// parsePAXRecord parses the input PAX record string into a key-value pair. -// If parsing is successful, it will slice off the currently read record and -// return the remainder as r. -// -// A PAX record is of the following form: -// "%d %s=%s\n" % (size, key, value) -func parsePAXRecord(s string) (k, v, r string, err error) { - // The size field ends at the first space. - sp := strings.IndexByte(s, ' ') - if sp == -1 { - return "", "", s, ErrHeader - } - - // Parse the first token as a decimal integer. - n, perr := strconv.ParseInt(s[:sp], 10, 0) // Intentionally parse as native int - if perr != nil || n < 5 || int64(len(s)) < n { - return "", "", s, ErrHeader - } - - // Extract everything between the space and the final newline. - rec, nl, rem := s[sp+1:n-1], s[n-1:n], s[n:] - if nl != "\n" { - return "", "", s, ErrHeader - } - - // The first equals separates the key from the value. - eq := strings.IndexByte(rec, '=') - if eq == -1 { - return "", "", s, ErrHeader - } - return rec[:eq], rec[eq+1:], rem, nil -} - -// parseString parses bytes as a NUL-terminated C-style string. +// cString parses bytes as a NUL-terminated C-style string. // If a NUL byte is not found then the whole slice is returned as a string. -func (*parser) parseString(b []byte) string { +func cString(b []byte) string { n := 0 for n < len(b) && b[n] != 0 { n++ @@ -480,51 +446,19 @@ func (*parser) parseString(b []byte) string { return string(b[0:n]) } -// parseNumeric parses the input as being encoded in either base-256 or octal. -// This function may return negative numbers. -// If parsing fails or an integer overflow occurs, err will be set. -func (p *parser) parseNumeric(b []byte) int64 { - // Check for base-256 (binary) format first. - // If the first bit is set, then all following bits constitute a two's - // complement encoded number in big-endian byte order. +func (tr *Reader) octal(b []byte) int64 { + // Check for binary format first. if len(b) > 0 && b[0]&0x80 != 0 { - // Handling negative numbers relies on the following identity: - // -a-1 == ^a - // - // If the number is negative, we use an inversion mask to invert the - // data bytes and treat the value as an unsigned number. - var inv byte // 0x00 if positive or zero, 0xff if negative - if b[0]&0x40 != 0 { - inv = 0xff - } - - var x uint64 + var x int64 for i, c := range b { - c ^= inv // Inverts c only if inv is 0xff, otherwise does nothing if i == 0 { - c &= 0x7f // Ignore signal bit in first byte + c &= 0x7f // ignore signal bit in first byte } - if (x >> 56) > 0 { - p.err = ErrHeader // Integer overflow - return 0 - } - x = x<<8 | uint64(c) + x = x<<8 | int64(c) } - if (x >> 63) > 0 { - p.err = ErrHeader // Integer overflow - return 0 - } - if inv == 0xff { - return ^int64(x) - } - return int64(x) + return x } - // Normal case is base-8 (octal) format. - return p.parseOctal(b) -} - -func (p *parser) parseOctal(b []byte) int64 { // Because unused fields are filled with NULs, we need // to skip leading NULs. Fields may also be padded with // spaces or NULs. @@ -535,55 +469,27 @@ func (p *parser) parseOctal(b []byte) int64 { if len(b) == 0 { return 0 } - x, perr := strconv.ParseUint(p.parseString(b), 8, 64) - if perr != nil { - p.err = ErrHeader + x, err := strconv.ParseUint(cString(b), 8, 64) + if err != nil { + tr.err = err } return int64(x) } -// skipUnread skips any unread bytes in the existing file entry, as well as any -// alignment padding. It returns io.ErrUnexpectedEOF if any io.EOF is -// encountered in the data portion; it is okay to hit io.EOF in the padding. -// -// Note that this function still works properly even when sparse files are being -// used since numBytes returns the bytes remaining in the underlying io.Reader. -func (tr *Reader) skipUnread() error { - dataSkip := tr.numBytes() // Number of data bytes to skip - totalSkip := dataSkip + tr.pad // Total number of bytes to skip +// skipUnread skips any unread bytes in the existing file entry, as well as any alignment padding. +func (tr *Reader) skipUnread() { + nr := tr.numBytes() + tr.pad // number of bytes to skip tr.curr, tr.pad = nil, 0 if tr.RawAccounting { - _, tr.err = io.CopyN(tr.rawBytes, tr.r, totalSkip) - return tr.err + _, tr.err = io.CopyN(tr.rawBytes, tr.r, nr) + return } - // If possible, Seek to the last byte before the end of the data section. - // Do this because Seek is often lazy about reporting errors; this will mask - // the fact that the tar stream may be truncated. We can rely on the - // io.CopyN done shortly afterwards to trigger any IO errors. - var seekSkipped int64 // Number of bytes skipped via Seek - if sr, ok := tr.r.(io.Seeker); ok && dataSkip > 1 { - // Not all io.Seeker can actually Seek. For example, os.Stdin implements - // io.Seeker, but calling Seek always returns an error and performs - // no action. Thus, we try an innocent seek to the current position - // to see if Seek is really supported. - pos1, err := sr.Seek(0, os.SEEK_CUR) - if err == nil { - // Seek seems supported, so perform the real Seek. - pos2, err := sr.Seek(dataSkip-1, os.SEEK_CUR) - if err != nil { - tr.err = err - return tr.err - } - seekSkipped = pos2 - pos1 + if sr, ok := tr.r.(io.Seeker); ok { + if _, err := sr.Seek(nr, os.SEEK_CUR); err == nil { + return } } - - var copySkipped int64 // Number of bytes skipped via CopyN - copySkipped, tr.err = io.CopyN(ioutil.Discard, tr.r, totalSkip-seekSkipped) - if tr.err == io.EOF && seekSkipped+copySkipped < dataSkip { - tr.err = io.ErrUnexpectedEOF - } - return tr.err + _, tr.err = io.CopyN(ioutil.Discard, tr.r, nr) } func (tr *Reader) verifyChecksum(header []byte) bool { @@ -591,32 +497,23 @@ func (tr *Reader) verifyChecksum(header []byte) bool { return false } - var p parser - given := p.parseOctal(header[148:156]) + given := tr.octal(header[148:156]) unsigned, signed := checksum(header) - return p.err == nil && (given == unsigned || given == signed) + return given == unsigned || given == signed } -// readHeader reads the next block header and assumes that the underlying reader -// is already aligned to a block boundary. -// -// The err will be set to io.EOF only when one of the following occurs: -// * Exactly 0 bytes are read and EOF is hit. -// * Exactly 1 block of zeros is read and EOF is hit. -// * At least 2 blocks of zeros are read. func (tr *Reader) readHeader() *Header { header := tr.hdrBuff[:] copy(header, zeroBlock) - if n, err := io.ReadFull(tr.r, header); err != nil { - tr.err = err + if _, tr.err = io.ReadFull(tr.r, header); tr.err != nil { // because it could read some of the block, but reach EOF first if tr.err == io.EOF && tr.RawAccounting { - if _, err := tr.rawBytes.Write(header[:n]); err != nil { - tr.err = err + if _, tr.err = tr.rawBytes.Write(header); tr.err != nil { + return nil } } - return nil // io.EOF is okay here + return nil } if tr.RawAccounting { if _, tr.err = tr.rawBytes.Write(header); tr.err != nil { @@ -626,15 +523,14 @@ func (tr *Reader) readHeader() *Header { // Two blocks of zero bytes marks the end of the archive. if bytes.Equal(header, zeroBlock[0:blockSize]) { - if n, err := io.ReadFull(tr.r, header); err != nil { - tr.err = err + if _, tr.err = io.ReadFull(tr.r, header); tr.err != nil { // because it could read some of the block, but reach EOF first if tr.err == io.EOF && tr.RawAccounting { - if _, err := tr.rawBytes.Write(header[:n]); err != nil { - tr.err = err + if _, tr.err = tr.rawBytes.Write(header); tr.err != nil { + return nil } } - return nil // io.EOF is okay here + return nil } if tr.RawAccounting { if _, tr.err = tr.rawBytes.Write(header); tr.err != nil { @@ -655,19 +551,22 @@ func (tr *Reader) readHeader() *Header { } // Unpack - var p parser hdr := new(Header) s := slicer(header) - hdr.Name = p.parseString(s.next(100)) - hdr.Mode = p.parseNumeric(s.next(8)) - hdr.Uid = int(p.parseNumeric(s.next(8))) - hdr.Gid = int(p.parseNumeric(s.next(8))) - hdr.Size = p.parseNumeric(s.next(12)) - hdr.ModTime = time.Unix(p.parseNumeric(s.next(12)), 0) + hdr.Name = cString(s.next(100)) + hdr.Mode = tr.octal(s.next(8)) + hdr.Uid = int(tr.octal(s.next(8))) + hdr.Gid = int(tr.octal(s.next(8))) + hdr.Size = tr.octal(s.next(12)) + if hdr.Size < 0 { + tr.err = ErrHeader + return nil + } + hdr.ModTime = time.Unix(tr.octal(s.next(12)), 0) s.next(8) // chksum hdr.Typeflag = s.next(1)[0] - hdr.Linkname = p.parseString(s.next(100)) + hdr.Linkname = cString(s.next(100)) // The remainder of the header depends on the value of magic. // The original (v7) version of tar had no explicit magic field, @@ -687,76 +586,70 @@ func (tr *Reader) readHeader() *Header { switch format { case "posix", "gnu", "star": - hdr.Uname = p.parseString(s.next(32)) - hdr.Gname = p.parseString(s.next(32)) + hdr.Uname = cString(s.next(32)) + hdr.Gname = cString(s.next(32)) devmajor := s.next(8) devminor := s.next(8) if hdr.Typeflag == TypeChar || hdr.Typeflag == TypeBlock { - hdr.Devmajor = p.parseNumeric(devmajor) - hdr.Devminor = p.parseNumeric(devminor) + hdr.Devmajor = tr.octal(devmajor) + hdr.Devminor = tr.octal(devminor) } var prefix string switch format { case "posix", "gnu": - prefix = p.parseString(s.next(155)) + prefix = cString(s.next(155)) case "star": - prefix = p.parseString(s.next(131)) - hdr.AccessTime = time.Unix(p.parseNumeric(s.next(12)), 0) - hdr.ChangeTime = time.Unix(p.parseNumeric(s.next(12)), 0) + prefix = cString(s.next(131)) + hdr.AccessTime = time.Unix(tr.octal(s.next(12)), 0) + hdr.ChangeTime = time.Unix(tr.octal(s.next(12)), 0) } if len(prefix) > 0 { hdr.Name = prefix + "/" + hdr.Name } } - if p.err != nil { - tr.err = p.err - return nil - } - - nb := hdr.Size - if isHeaderOnlyType(hdr.Typeflag) { - nb = 0 - } - if nb < 0 { + if tr.err != nil { tr.err = ErrHeader return nil } - // Set the current file reader. + // Maximum value of hdr.Size is 64 GB (12 octal digits), + // so there's no risk of int64 overflowing. + nb := int64(hdr.Size) tr.pad = -nb & (blockSize - 1) // blockSize is a power of two + + // Set the current file reader. tr.curr = ®FileReader{r: tr.r, nb: nb} // Check for old GNU sparse format entry. if hdr.Typeflag == TypeGNUSparse { // Get the real size of the file. - hdr.Size = p.parseNumeric(header[483:495]) - if p.err != nil { - tr.err = p.err - return nil - } + hdr.Size = tr.octal(header[483:495]) // Read the sparse map. sp := tr.readOldGNUSparseMap(header) if tr.err != nil { return nil } - // Current file is a GNU sparse file. Update the current file reader. - tr.curr, tr.err = newSparseFileReader(tr.curr, sp, hdr.Size) - if tr.err != nil { - return nil - } + tr.curr = &sparseFileReader{rfr: tr.curr.(*regFileReader), sp: sp, tot: hdr.Size} } return hdr } +// A sparseEntry holds a single entry in a sparse file's sparse map. +// A sparse entry indicates the offset and size in a sparse file of a +// block of data. +type sparseEntry struct { + offset int64 + numBytes int64 +} + // readOldGNUSparseMap reads the sparse map as stored in the old GNU sparse format. // The sparse map is stored in the tar header if it's small enough. If it's larger than four entries, // then one or more extension headers are used to store the rest of the sparse map. func (tr *Reader) readOldGNUSparseMap(header []byte) []sparseEntry { - var p parser isExtended := header[oldGNUSparseMainHeaderIsExtendedOffset] != 0 spCap := oldGNUSparseMainHeaderNumEntries if isExtended { @@ -767,10 +660,10 @@ func (tr *Reader) readOldGNUSparseMap(header []byte) []sparseEntry { // Read the four entries from the main tar header for i := 0; i < oldGNUSparseMainHeaderNumEntries; i++ { - offset := p.parseNumeric(s.next(oldGNUSparseOffsetSize)) - numBytes := p.parseNumeric(s.next(oldGNUSparseNumBytesSize)) - if p.err != nil { - tr.err = p.err + offset := tr.octal(s.next(oldGNUSparseOffsetSize)) + numBytes := tr.octal(s.next(oldGNUSparseNumBytesSize)) + if tr.err != nil { + tr.err = ErrHeader return nil } if offset == 0 && numBytes == 0 { @@ -794,10 +687,10 @@ func (tr *Reader) readOldGNUSparseMap(header []byte) []sparseEntry { isExtended = sparseHeader[oldGNUSparseExtendedHeaderIsExtendedOffset] != 0 s = slicer(sparseHeader) for i := 0; i < oldGNUSparseExtendedHeaderNumEntries; i++ { - offset := p.parseNumeric(s.next(oldGNUSparseOffsetSize)) - numBytes := p.parseNumeric(s.next(oldGNUSparseNumBytesSize)) - if p.err != nil { - tr.err = p.err + offset := tr.octal(s.next(oldGNUSparseOffsetSize)) + numBytes := tr.octal(s.next(oldGNUSparseNumBytesSize)) + if tr.err != nil { + tr.err = ErrHeader return nil } if offset == 0 && numBytes == 0 { @@ -809,111 +702,134 @@ func (tr *Reader) readOldGNUSparseMap(header []byte) []sparseEntry { return sp } -// readGNUSparseMap1x0 reads the sparse map as stored in GNU's PAX sparse format -// version 1.0. The format of the sparse map consists of a series of -// newline-terminated numeric fields. The first field is the number of entries -// and is always present. Following this are the entries, consisting of two -// fields (offset, numBytes). This function must stop reading at the end -// boundary of the block containing the last newline. -// -// Note that the GNU manual says that numeric values should be encoded in octal -// format. However, the GNU tar utility itself outputs these values in decimal. -// As such, this library treats values as being encoded in decimal. +// readGNUSparseMap1x0 reads the sparse map as stored in GNU's PAX sparse format version 1.0. +// The sparse map is stored just before the file data and padded out to the nearest block boundary. func readGNUSparseMap1x0(r io.Reader) ([]sparseEntry, error) { - var cntNewline int64 - var buf bytes.Buffer - var blk = make([]byte, blockSize) + buf := make([]byte, 2*blockSize) + sparseHeader := buf[:blockSize] - // feedTokens copies data in numBlock chunks from r into buf until there are - // at least cnt newlines in buf. It will not read more blocks than needed. - var feedTokens = func(cnt int64) error { - for cntNewline < cnt { - if _, err := io.ReadFull(r, blk); err != nil { - if err == io.EOF { - err = io.ErrUnexpectedEOF - } - return err + // readDecimal is a helper function to read a decimal integer from the sparse map + // while making sure to read from the file in blocks of size blockSize + readDecimal := func() (int64, error) { + // Look for newline + nl := bytes.IndexByte(sparseHeader, '\n') + if nl == -1 { + if len(sparseHeader) >= blockSize { + // This is an error + return 0, ErrHeader } - buf.Write(blk) - for _, c := range blk { - if c == '\n' { - cntNewline++ + oldLen := len(sparseHeader) + newLen := oldLen + blockSize + if cap(sparseHeader) < newLen { + // There's more header, but we need to make room for the next block + copy(buf, sparseHeader) + sparseHeader = buf[:newLen] + } else { + // There's more header, and we can just reslice + sparseHeader = sparseHeader[:newLen] + } + + // Now that sparseHeader is large enough, read next block + if _, err := io.ReadFull(r, sparseHeader[oldLen:newLen]); err != nil { + return 0, err + } + // leaving this function for io.Reader makes it more testable + if tr, ok := r.(*Reader); ok && tr.RawAccounting { + if _, err := tr.rawBytes.Write(sparseHeader[oldLen:newLen]); err != nil { + return 0, err } } + + // Look for a newline in the new data + nl = bytes.IndexByte(sparseHeader[oldLen:newLen], '\n') + if nl == -1 { + // This is an error + return 0, ErrHeader + } + nl += oldLen // We want the position from the beginning } - return nil + // Now that we've found a newline, read a number + n, err := strconv.ParseInt(string(sparseHeader[:nl]), 10, 0) + if err != nil { + return 0, ErrHeader + } + + // Update sparseHeader to consume this number + sparseHeader = sparseHeader[nl+1:] + return n, nil } - // nextToken gets the next token delimited by a newline. This assumes that - // at least one newline exists in the buffer. - var nextToken = func() string { - cntNewline-- - tok, _ := buf.ReadString('\n') - return tok[:len(tok)-1] // Cut off newline - } - - // Parse for the number of entries. - // Use integer overflow resistant math to check this. - if err := feedTokens(1); err != nil { + // Read the first block + if _, err := io.ReadFull(r, sparseHeader); err != nil { return nil, err } - numEntries, err := strconv.ParseInt(nextToken(), 10, 0) // Intentionally parse as native int - if err != nil || numEntries < 0 || int(2*numEntries) < int(numEntries) { - return nil, ErrHeader + // leaving this function for io.Reader makes it more testable + if tr, ok := r.(*Reader); ok && tr.RawAccounting { + if _, err := tr.rawBytes.Write(sparseHeader); err != nil { + return nil, err + } } - // Parse for all member entries. - // numEntries is trusted after this since a potential attacker must have - // committed resources proportional to what this library used. - if err := feedTokens(2 * numEntries); err != nil { + // The first line contains the number of entries + numEntries, err := readDecimal() + if err != nil { return nil, err } + + // Read all the entries sp := make([]sparseEntry, 0, numEntries) for i := int64(0); i < numEntries; i++ { - offset, err := strconv.ParseInt(nextToken(), 10, 64) + // Read the offset + offset, err := readDecimal() if err != nil { - return nil, ErrHeader + return nil, err } - numBytes, err := strconv.ParseInt(nextToken(), 10, 64) + // Read numBytes + numBytes, err := readDecimal() if err != nil { - return nil, ErrHeader + return nil, err } + sp = append(sp, sparseEntry{offset: offset, numBytes: numBytes}) } + return sp, nil } -// readGNUSparseMap0x1 reads the sparse map as stored in GNU's PAX sparse format -// version 0.1. The sparse map is stored in the PAX headers. -func readGNUSparseMap0x1(extHdrs map[string]string) ([]sparseEntry, error) { - // Get number of entries. - // Use integer overflow resistant math to check this. - numEntriesStr := extHdrs[paxGNUSparseNumBlocks] - numEntries, err := strconv.ParseInt(numEntriesStr, 10, 0) // Intentionally parse as native int - if err != nil || numEntries < 0 || int(2*numEntries) < int(numEntries) { +// readGNUSparseMap0x1 reads the sparse map as stored in GNU's PAX sparse format version 0.1. +// The sparse map is stored in the PAX headers. +func readGNUSparseMap0x1(headers map[string]string) ([]sparseEntry, error) { + // Get number of entries + numEntriesStr, ok := headers[paxGNUSparseNumBlocks] + if !ok { + return nil, ErrHeader + } + numEntries, err := strconv.ParseInt(numEntriesStr, 10, 0) + if err != nil { return nil, ErrHeader } - // There should be two numbers in sparseMap for each entry. - sparseMap := strings.Split(extHdrs[paxGNUSparseMap], ",") + sparseMap := strings.Split(headers[paxGNUSparseMap], ",") + + // There should be two numbers in sparseMap for each entry if int64(len(sparseMap)) != 2*numEntries { return nil, ErrHeader } - // Loop through the entries in the sparse map. - // numEntries is trusted now. + // Loop through the entries in the sparse map sp := make([]sparseEntry, 0, numEntries) for i := int64(0); i < numEntries; i++ { - offset, err := strconv.ParseInt(sparseMap[2*i], 10, 64) + offset, err := strconv.ParseInt(sparseMap[2*i], 10, 0) if err != nil { return nil, ErrHeader } - numBytes, err := strconv.ParseInt(sparseMap[2*i+1], 10, 64) + numBytes, err := strconv.ParseInt(sparseMap[2*i+1], 10, 0) if err != nil { return nil, ErrHeader } sp = append(sp, sparseEntry{offset: offset, numBytes: numBytes}) } + return sp, nil } @@ -930,18 +846,10 @@ func (tr *Reader) numBytes() int64 { // Read reads from the current entry in the tar archive. // It returns 0, io.EOF when it reaches the end of that entry, // until Next is called to advance to the next entry. -// -// Calling Read on special types like TypeLink, TypeSymLink, TypeChar, -// TypeBlock, TypeDir, and TypeFifo returns 0, io.EOF regardless of what -// the Header.Size claims. func (tr *Reader) Read(b []byte) (n int, err error) { - if tr.err != nil { - return 0, tr.err - } if tr.curr == nil { return 0, io.EOF } - n, err = tr.curr.Read(b) if err != nil && err != io.EOF { tr.err = err @@ -971,33 +879,9 @@ func (rfr *regFileReader) numBytes() int64 { return rfr.nb } -// newSparseFileReader creates a new sparseFileReader, but validates all of the -// sparse entries before doing so. -func newSparseFileReader(rfr numBytesReader, sp []sparseEntry, total int64) (*sparseFileReader, error) { - if total < 0 { - return nil, ErrHeader // Total size cannot be negative - } - - // Validate all sparse entries. These are the same checks as performed by - // the BSD tar utility. - for i, s := range sp { - switch { - case s.offset < 0 || s.numBytes < 0: - return nil, ErrHeader // Negative values are never okay - case s.offset > math.MaxInt64-s.numBytes: - return nil, ErrHeader // Integer overflow with large length - case s.offset+s.numBytes > total: - return nil, ErrHeader // Region extends beyond the "real" size - case i > 0 && sp[i-1].offset+sp[i-1].numBytes > s.offset: - return nil, ErrHeader // Regions can't overlap and must be in order - } - } - return &sparseFileReader{rfr: rfr, sp: sp, total: total}, nil -} - -// readHole reads a sparse hole ending at endOffset. -func (sfr *sparseFileReader) readHole(b []byte, endOffset int64) int { - n64 := endOffset - sfr.pos +// readHole reads a sparse file hole ending at offset toOffset +func (sfr *sparseFileReader) readHole(b []byte, toOffset int64) int { + n64 := toOffset - sfr.pos if n64 > int64(len(b)) { n64 = int64(len(b)) } @@ -1011,54 +895,49 @@ func (sfr *sparseFileReader) readHole(b []byte, endOffset int64) int { // Read reads the sparse file data in expanded form. func (sfr *sparseFileReader) Read(b []byte) (n int, err error) { - // Skip past all empty fragments. - for len(sfr.sp) > 0 && sfr.sp[0].numBytes == 0 { - sfr.sp = sfr.sp[1:] - } - - // If there are no more fragments, then it is possible that there - // is one last sparse hole. if len(sfr.sp) == 0 { - // This behavior matches the BSD tar utility. - // However, GNU tar stops returning data even if sfr.total is unmet. - if sfr.pos < sfr.total { - return sfr.readHole(b, sfr.total), nil + // No more data fragments to read from. + if sfr.pos < sfr.tot { + // We're in the last hole + n = sfr.readHole(b, sfr.tot) + return } + // Otherwise, we're at the end of the file return 0, io.EOF } - - // In front of a data fragment, so read a hole. + if sfr.tot < sfr.sp[0].offset { + return 0, io.ErrUnexpectedEOF + } if sfr.pos < sfr.sp[0].offset { - return sfr.readHole(b, sfr.sp[0].offset), nil + // We're in a hole + n = sfr.readHole(b, sfr.sp[0].offset) + return } - // In a data fragment, so read from it. - // This math is overflow free since we verify that offset and numBytes can - // be safely added when creating the sparseFileReader. - endPos := sfr.sp[0].offset + sfr.sp[0].numBytes // End offset of fragment - bytesLeft := endPos - sfr.pos // Bytes left in fragment + // We're not in a hole, so we'll read from the next data fragment + posInFragment := sfr.pos - sfr.sp[0].offset + bytesLeft := sfr.sp[0].numBytes - posInFragment if int64(len(b)) > bytesLeft { - b = b[:bytesLeft] + b = b[0:bytesLeft] } n, err = sfr.rfr.Read(b) sfr.pos += int64(n) - if err == io.EOF { - if sfr.pos < endPos { - err = io.ErrUnexpectedEOF // There was supposed to be more data - } else if sfr.pos < sfr.total { - err = nil // There is still an implicit sparse hole at the end - } + + if int64(n) == bytesLeft { + // We're done with this fragment + sfr.sp = sfr.sp[1:] } - if sfr.pos == endPos { - sfr.sp = sfr.sp[1:] // We are done with this fragment, so pop it + if err == io.EOF && sfr.pos < sfr.tot { + // We reached the end of the last fragment's data, but there's a final hole + err = nil } - return n, err + return } // numBytes returns the number of bytes left to read in the sparse file's // sparse-encoded data in the tar archive. func (sfr *sparseFileReader) numBytes() int64 { - return sfr.rfr.numBytes() + return sfr.rfr.nb } diff --git a/archive/tar/reader_test.go b/archive/tar/reader_test.go index 821b4f0..da01f26 100644 --- a/archive/tar/reader_test.go +++ b/archive/tar/reader_test.go @@ -10,7 +10,6 @@ import ( "fmt" "io" "io/ioutil" - "math" "os" "reflect" "strings" @@ -19,10 +18,9 @@ import ( ) type untarTest struct { - file string // Test input file - headers []*Header // Expected output headers - chksums []string // MD5 checksum of files, leave as nil if not checked - err error // Expected error to occur + file string + headers []*Header + cksums []string } var gnuTarTest = &untarTest{ @@ -51,7 +49,7 @@ var gnuTarTest = &untarTest{ Gname: "eng", }, }, - chksums: []string{ + cksums: []string{ "e38b27eaccb4391bdec553a7f3ae6b2f", "c65bd2e50a56a2138bf1716f2fd56fe9", }, @@ -131,7 +129,7 @@ var sparseTarTest = &untarTest{ Devminor: 0, }, }, - chksums: []string{ + cksums: []string{ "6f53234398c2449fe67c1812d993012f", "6f53234398c2449fe67c1812d993012f", "6f53234398c2449fe67c1812d993012f", @@ -288,118 +286,37 @@ var untarTests = []*untarTest{ }, }, }, - { - // Matches the behavior of GNU, BSD, and STAR tar utilities. - file: "testdata/gnu-multi-hdrs.tar", - headers: []*Header{ - { - Name: "GNU2/GNU2/long-path-name", - Linkname: "GNU4/GNU4/long-linkpath-name", - ModTime: time.Unix(0, 0), - Typeflag: '2', - }, - }, - }, - { - // Matches the behavior of GNU and BSD tar utilities. - file: "testdata/pax-multi-hdrs.tar", - headers: []*Header{ - { - Name: "bar", - Linkname: "PAX4/PAX4/long-linkpath-name", - ModTime: time.Unix(0, 0), - Typeflag: '2', - }, - }, - }, - { - file: "testdata/neg-size.tar", - err: ErrHeader, - }, - { - file: "testdata/issue10968.tar", - err: ErrHeader, - }, - { - file: "testdata/issue11169.tar", - err: ErrHeader, - }, - { - file: "testdata/issue12435.tar", - err: ErrHeader, - }, } func TestReader(t *testing.T) { - for i, v := range untarTests { - f, err := os.Open(v.file) +testLoop: + for i, test := range untarTests { + f, err := os.Open(test.file) if err != nil { - t.Errorf("file %s, test %d: unexpected error: %v", v.file, i, err) + t.Errorf("test %d: Unexpected error: %v", i, err) continue } defer f.Close() - - // Capture all headers and checksums. - var ( - tr = NewReader(f) - hdrs []*Header - chksums []string - ) - for { - var hdr *Header - hdr, err = tr.Next() - if err != nil { - if err == io.EOF { - err = nil // Expected error - } - break + tr := NewReader(f) + for j, header := range test.headers { + hdr, err := tr.Next() + if err != nil || hdr == nil { + t.Errorf("test %d, entry %d: Didn't get entry: %v", i, j, err) + f.Close() + continue testLoop } - hdrs = append(hdrs, hdr) - - if v.chksums == nil { - continue - } - h := md5.New() - _, err = io.Copy(h, tr) // Effectively an incremental read - if err != nil { - break - } - chksums = append(chksums, fmt.Sprintf("%x", h.Sum(nil))) - } - - for j, hdr := range hdrs { - if j >= len(v.headers) { - t.Errorf("file %s, test %d, entry %d: unexpected header:\ngot %+v", - v.file, i, j, *hdr) - continue - } - if !reflect.DeepEqual(*hdr, *v.headers[j]) { - t.Errorf("file %s, test %d, entry %d: incorrect header:\ngot %+v\nwant %+v", - v.file, i, j, *hdr, *v.headers[j]) + if !reflect.DeepEqual(*hdr, *header) { + t.Errorf("test %d, entry %d: Incorrect header:\nhave %+v\nwant %+v", + i, j, *hdr, *header) } } - if len(hdrs) != len(v.headers) { - t.Errorf("file %s, test %d: got %d headers, want %d headers", - v.file, i, len(hdrs), len(v.headers)) + hdr, err := tr.Next() + if err == io.EOF { + continue testLoop } - - for j, sum := range chksums { - if j >= len(v.chksums) { - t.Errorf("file %s, test %d, entry %d: unexpected sum: got %s", - v.file, i, j, sum) - continue - } - if sum != v.chksums[j] { - t.Errorf("file %s, test %d, entry %d: incorrect checksum: got %s, want %s", - v.file, i, j, sum, v.chksums[j]) - } + if hdr != nil || err != nil { + t.Errorf("test %d: Unexpected entry or error: hdr=%v err=%v", i, hdr, err) } - - if err != v.err { - t.Errorf("file %s, test %d: unexpected error: got %v, want %v", - v.file, i, err, v.err) - } - f.Close() } } @@ -439,6 +356,89 @@ func TestPartialRead(t *testing.T) { } } +func TestIncrementalRead(t *testing.T) { + test := gnuTarTest + f, err := os.Open(test.file) + if err != nil { + t.Fatalf("Unexpected error: %v", err) + } + defer f.Close() + + tr := NewReader(f) + + headers := test.headers + cksums := test.cksums + nread := 0 + + // loop over all files + for ; ; nread++ { + hdr, err := tr.Next() + if hdr == nil || err == io.EOF { + break + } + + // check the header + if !reflect.DeepEqual(*hdr, *headers[nread]) { + t.Errorf("Incorrect header:\nhave %+v\nwant %+v", + *hdr, headers[nread]) + } + + // read file contents in little chunks EOF, + // checksumming all the way + h := md5.New() + rdbuf := make([]uint8, 8) + for { + nr, err := tr.Read(rdbuf) + if err == io.EOF { + break + } + if err != nil { + t.Errorf("Read: unexpected error %v\n", err) + break + } + h.Write(rdbuf[0:nr]) + } + // verify checksum + have := fmt.Sprintf("%x", h.Sum(nil)) + want := cksums[nread] + if want != have { + t.Errorf("Bad checksum on file %s:\nhave %+v\nwant %+v", hdr.Name, have, want) + } + } + if nread != len(headers) { + t.Errorf("Didn't process all files\nexpected: %d\nprocessed %d\n", len(headers), nread) + } +} + +func TestNonSeekable(t *testing.T) { + test := gnuTarTest + f, err := os.Open(test.file) + if err != nil { + t.Fatalf("Unexpected error: %v", err) + } + defer f.Close() + + type readerOnly struct { + io.Reader + } + tr := NewReader(readerOnly{f}) + nread := 0 + + for ; ; nread++ { + _, err := tr.Next() + if err == io.EOF { + break + } + if err != nil { + t.Fatalf("Unexpected error: %v", err) + } + } + + if nread != len(test.headers) { + t.Errorf("Didn't process all files\nexpected: %d\nprocessed %d\n", len(test.headers), nread) + } +} + func TestParsePAXHeader(t *testing.T) { paxTests := [][3]string{ {"a", "a=name", "10 a=name\n"}, // Test case involving multiple acceptable lengths @@ -514,312 +514,220 @@ func TestMergePAX(t *testing.T) { } } -func TestSparseFileReader(t *testing.T) { - var vectors = []struct { - realSize int64 // Real size of the output file - sparseMap []sparseEntry // Input sparse map - sparseData string // Input compact data - expected string // Expected output data - err error // Expected error outcome - }{{ - realSize: 8, +func TestSparseEndToEnd(t *testing.T) { + test := sparseTarTest + f, err := os.Open(test.file) + if err != nil { + t.Fatalf("Unexpected error: %v", err) + } + defer f.Close() + + tr := NewReader(f) + + headers := test.headers + cksums := test.cksums + nread := 0 + + // loop over all files + for ; ; nread++ { + hdr, err := tr.Next() + if hdr == nil || err == io.EOF { + break + } + + // check the header + if !reflect.DeepEqual(*hdr, *headers[nread]) { + t.Errorf("Incorrect header:\nhave %+v\nwant %+v", + *hdr, headers[nread]) + } + + // read and checksum the file data + h := md5.New() + _, err = io.Copy(h, tr) + if err != nil { + t.Fatalf("Unexpected error: %v", err) + } + + // verify checksum + have := fmt.Sprintf("%x", h.Sum(nil)) + want := cksums[nread] + if want != have { + t.Errorf("Bad checksum on file %s:\nhave %+v\nwant %+v", hdr.Name, have, want) + } + } + if nread != len(headers) { + t.Errorf("Didn't process all files\nexpected: %d\nprocessed %d\n", len(headers), nread) + } +} + +type sparseFileReadTest struct { + sparseData []byte + sparseMap []sparseEntry + realSize int64 + expected []byte +} + +var sparseFileReadTests = []sparseFileReadTest{ + { + sparseData: []byte("abcde"), sparseMap: []sparseEntry{ {offset: 0, numBytes: 2}, {offset: 5, numBytes: 3}, }, - sparseData: "abcde", - expected: "ab\x00\x00\x00cde", - }, { - realSize: 10, + realSize: 8, + expected: []byte("ab\x00\x00\x00cde"), + }, + { + sparseData: []byte("abcde"), sparseMap: []sparseEntry{ {offset: 0, numBytes: 2}, {offset: 5, numBytes: 3}, }, - sparseData: "abcde", - expected: "ab\x00\x00\x00cde\x00\x00", - }, { - realSize: 8, - sparseMap: []sparseEntry{ - {offset: 1, numBytes: 3}, - {offset: 6, numBytes: 2}, - }, - sparseData: "abcde", - expected: "\x00abc\x00\x00de", - }, { - realSize: 8, - sparseMap: []sparseEntry{ - {offset: 1, numBytes: 3}, - {offset: 6, numBytes: 0}, - {offset: 6, numBytes: 0}, - {offset: 6, numBytes: 2}, - }, - sparseData: "abcde", - expected: "\x00abc\x00\x00de", - }, { realSize: 10, + expected: []byte("ab\x00\x00\x00cde\x00\x00"), + }, + { + sparseData: []byte("abcde"), + sparseMap: []sparseEntry{ + {offset: 1, numBytes: 3}, + {offset: 6, numBytes: 2}, + }, + realSize: 8, + expected: []byte("\x00abc\x00\x00de"), + }, + { + sparseData: []byte("abcde"), sparseMap: []sparseEntry{ {offset: 1, numBytes: 3}, {offset: 6, numBytes: 2}, }, - sparseData: "abcde", - expected: "\x00abc\x00\x00de\x00\x00", - }, { realSize: 10, - sparseMap: []sparseEntry{ - {offset: 1, numBytes: 3}, - {offset: 6, numBytes: 2}, - {offset: 8, numBytes: 0}, - {offset: 8, numBytes: 0}, - {offset: 8, numBytes: 0}, - {offset: 8, numBytes: 0}, - }, - sparseData: "abcde", - expected: "\x00abc\x00\x00de\x00\x00", - }, { + expected: []byte("\x00abc\x00\x00de\x00\x00"), + }, + { + sparseData: []byte(""), + sparseMap: nil, realSize: 2, - sparseMap: []sparseEntry{}, - sparseData: "", - expected: "\x00\x00", - }, { - realSize: -2, - sparseMap: []sparseEntry{}, - err: ErrHeader, - }, { - realSize: -10, - sparseMap: []sparseEntry{ - {offset: 1, numBytes: 3}, - {offset: 6, numBytes: 2}, - }, - sparseData: "abcde", - err: ErrHeader, - }, { - realSize: 10, - sparseMap: []sparseEntry{ - {offset: 1, numBytes: 3}, - {offset: 6, numBytes: 5}, - }, - sparseData: "abcde", - err: ErrHeader, - }, { - realSize: 35, - sparseMap: []sparseEntry{ - {offset: 1, numBytes: 3}, - {offset: 6, numBytes: 5}, - }, - sparseData: "abcde", - err: io.ErrUnexpectedEOF, - }, { - realSize: 35, - sparseMap: []sparseEntry{ - {offset: 1, numBytes: 3}, - {offset: 6, numBytes: -5}, - }, - sparseData: "abcde", - err: ErrHeader, - }, { - realSize: 35, - sparseMap: []sparseEntry{ - {offset: math.MaxInt64, numBytes: 3}, - {offset: 6, numBytes: -5}, - }, - sparseData: "abcde", - err: ErrHeader, - }, { - realSize: 10, - sparseMap: []sparseEntry{ - {offset: 1, numBytes: 3}, - {offset: 2, numBytes: 2}, - }, - sparseData: "abcde", - err: ErrHeader, - }} + expected: []byte("\x00\x00"), + }, +} - for i, v := range vectors { - r := bytes.NewReader([]byte(v.sparseData)) - rfr := ®FileReader{r: r, nb: int64(len(v.sparseData))} - - var sfr *sparseFileReader - var err error - var buf []byte - - sfr, err = newSparseFileReader(rfr, v.sparseMap, v.realSize) +func TestSparseFileReader(t *testing.T) { + for i, test := range sparseFileReadTests { + r := bytes.NewReader(test.sparseData) + nb := int64(r.Len()) + sfr := &sparseFileReader{ + rfr: ®FileReader{r: r, nb: nb}, + sp: test.sparseMap, + pos: 0, + tot: test.realSize, + } + if sfr.numBytes() != nb { + t.Errorf("test %d: Before reading, sfr.numBytes() = %d, want %d", i, sfr.numBytes(), nb) + } + buf, err := ioutil.ReadAll(sfr) if err != nil { - goto fail + t.Errorf("test %d: Unexpected error: %v", i, err) } - if sfr.numBytes() != int64(len(v.sparseData)) { - t.Errorf("test %d, numBytes() before reading: got %d, want %d", i, sfr.numBytes(), len(v.sparseData)) - } - buf, err = ioutil.ReadAll(sfr) - if err != nil { - goto fail - } - if string(buf) != v.expected { - t.Errorf("test %d, ReadAll(): got %q, want %q", i, string(buf), v.expected) + if e := test.expected; !bytes.Equal(buf, e) { + t.Errorf("test %d: Contents = %v, want %v", i, buf, e) } if sfr.numBytes() != 0 { - t.Errorf("test %d, numBytes() after reading: got %d, want %d", i, sfr.numBytes(), 0) + t.Errorf("test %d: After draining the reader, numBytes() was nonzero", i) } + } +} - fail: - if err != v.err { - t.Errorf("test %d, unexpected error: got %v, want %v", i, err, v.err) +func TestSparseIncrementalRead(t *testing.T) { + sparseMap := []sparseEntry{{10, 2}} + sparseData := []byte("Go") + expected := "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00Go\x00\x00\x00\x00\x00\x00\x00\x00" + + r := bytes.NewReader(sparseData) + nb := int64(r.Len()) + sfr := &sparseFileReader{ + rfr: ®FileReader{r: r, nb: nb}, + sp: sparseMap, + pos: 0, + tot: int64(len(expected)), + } + + // We'll read the data 6 bytes at a time, with a hole of size 10 at + // the beginning and one of size 8 at the end. + var outputBuf bytes.Buffer + buf := make([]byte, 6) + for { + n, err := sfr.Read(buf) + if err == io.EOF { + break } + if err != nil { + t.Errorf("Read: unexpected error %v\n", err) + } + if n > 0 { + _, err := outputBuf.Write(buf[:n]) + if err != nil { + t.Errorf("Write: unexpected error %v\n", err) + } + } + } + got := outputBuf.String() + if got != expected { + t.Errorf("Contents = %v, want %v", got, expected) } } func TestReadGNUSparseMap0x1(t *testing.T) { - const ( - maxUint = ^uint(0) - maxInt = int(maxUint >> 1) - ) - var ( - big1 = fmt.Sprintf("%d", int64(maxInt)) - big2 = fmt.Sprintf("%d", (int64(maxInt)/2)+1) - big3 = fmt.Sprintf("%d", (int64(maxInt) / 3)) - ) + headers := map[string]string{ + paxGNUSparseNumBlocks: "4", + paxGNUSparseMap: "0,5,10,5,20,5,30,5", + } + expected := []sparseEntry{ + {offset: 0, numBytes: 5}, + {offset: 10, numBytes: 5}, + {offset: 20, numBytes: 5}, + {offset: 30, numBytes: 5}, + } - var vectors = []struct { - extHdrs map[string]string // Input data - sparseMap []sparseEntry // Expected sparse entries to be outputted - err error // Expected errors that may be raised - }{{ - extHdrs: map[string]string{paxGNUSparseNumBlocks: "-4"}, - err: ErrHeader, - }, { - extHdrs: map[string]string{paxGNUSparseNumBlocks: "fee "}, - err: ErrHeader, - }, { - extHdrs: map[string]string{ - paxGNUSparseNumBlocks: big1, - paxGNUSparseMap: "0,5,10,5,20,5,30,5", - }, - err: ErrHeader, - }, { - extHdrs: map[string]string{ - paxGNUSparseNumBlocks: big2, - paxGNUSparseMap: "0,5,10,5,20,5,30,5", - }, - err: ErrHeader, - }, { - extHdrs: map[string]string{ - paxGNUSparseNumBlocks: big3, - paxGNUSparseMap: "0,5,10,5,20,5,30,5", - }, - err: ErrHeader, - }, { - extHdrs: map[string]string{ - paxGNUSparseNumBlocks: "4", - paxGNUSparseMap: "0.5,5,10,5,20,5,30,5", - }, - err: ErrHeader, - }, { - extHdrs: map[string]string{ - paxGNUSparseNumBlocks: "4", - paxGNUSparseMap: "0,5.5,10,5,20,5,30,5", - }, - err: ErrHeader, - }, { - extHdrs: map[string]string{ - paxGNUSparseNumBlocks: "4", - paxGNUSparseMap: "0,fewafewa.5,fewafw,5,20,5,30,5", - }, - err: ErrHeader, - }, { - extHdrs: map[string]string{ - paxGNUSparseNumBlocks: "4", - paxGNUSparseMap: "0,5,10,5,20,5,30,5", - }, - sparseMap: []sparseEntry{{0, 5}, {10, 5}, {20, 5}, {30, 5}}, - }} - - for i, v := range vectors { - sp, err := readGNUSparseMap0x1(v.extHdrs) - if !reflect.DeepEqual(sp, v.sparseMap) && !(len(sp) == 0 && len(v.sparseMap) == 0) { - t.Errorf("test %d, readGNUSparseMap0x1(...): got %v, want %v", i, sp, v.sparseMap) - } - if err != v.err { - t.Errorf("test %d, unexpected error: got %v, want %v", i, err, v.err) - } + sp, err := readGNUSparseMap0x1(headers) + if err != nil { + t.Errorf("Unexpected error: %v", err) + } + if !reflect.DeepEqual(sp, expected) { + t.Errorf("Incorrect sparse map: got %v, wanted %v", sp, expected) } } func TestReadGNUSparseMap1x0(t *testing.T) { - var sp = []sparseEntry{{1, 2}, {3, 4}} - for i := 0; i < 98; i++ { - sp = append(sp, sparseEntry{54321, 12345}) + // This test uses lots of holes so the sparse header takes up more than two blocks + numEntries := 100 + expected := make([]sparseEntry, 0, numEntries) + sparseMap := new(bytes.Buffer) + + fmt.Fprintf(sparseMap, "%d\n", numEntries) + for i := 0; i < numEntries; i++ { + offset := int64(2048 * i) + numBytes := int64(1024) + expected = append(expected, sparseEntry{offset: offset, numBytes: numBytes}) + fmt.Fprintf(sparseMap, "%d\n%d\n", offset, numBytes) } - var vectors = []struct { - input string // Input data - sparseMap []sparseEntry // Expected sparse entries to be outputted - cnt int // Expected number of bytes read - err error // Expected errors that may be raised - }{{ - input: "", - cnt: 0, - err: io.ErrUnexpectedEOF, - }, { - input: "ab", - cnt: 2, - err: io.ErrUnexpectedEOF, - }, { - input: strings.Repeat("\x00", 512), - cnt: 512, - err: io.ErrUnexpectedEOF, - }, { - input: strings.Repeat("\x00", 511) + "\n", - cnt: 512, - err: ErrHeader, - }, { - input: strings.Repeat("\n", 512), - cnt: 512, - err: ErrHeader, - }, { - input: "0\n" + strings.Repeat("\x00", 510) + strings.Repeat("a", 512), - sparseMap: []sparseEntry{}, - cnt: 512, - }, { - input: strings.Repeat("0", 512) + "0\n" + strings.Repeat("\x00", 510), - sparseMap: []sparseEntry{}, - cnt: 1024, - }, { - input: strings.Repeat("0", 1024) + "1\n2\n3\n" + strings.Repeat("\x00", 506), - sparseMap: []sparseEntry{{2, 3}}, - cnt: 1536, - }, { - input: strings.Repeat("0", 1024) + "1\n2\n\n" + strings.Repeat("\x00", 509), - cnt: 1536, - err: ErrHeader, - }, { - input: strings.Repeat("0", 1024) + "1\n2\n" + strings.Repeat("\x00", 508), - cnt: 1536, - err: io.ErrUnexpectedEOF, - }, { - input: "-1\n2\n\n" + strings.Repeat("\x00", 506), - cnt: 512, - err: ErrHeader, - }, { - input: "1\nk\n2\n" + strings.Repeat("\x00", 506), - cnt: 512, - err: ErrHeader, - }, { - input: "100\n1\n2\n3\n4\n" + strings.Repeat("54321\n0000000000000012345\n", 98) + strings.Repeat("\x00", 512), - cnt: 2560, - sparseMap: sp, - }} + // Make the header the smallest multiple of blockSize that fits the sparseMap + headerBlocks := (sparseMap.Len() + blockSize - 1) / blockSize + bufLen := blockSize * headerBlocks + buf := make([]byte, bufLen) + copy(buf, sparseMap.Bytes()) - for i, v := range vectors { - r := strings.NewReader(v.input) - sp, err := readGNUSparseMap1x0(r) - if !reflect.DeepEqual(sp, v.sparseMap) && !(len(sp) == 0 && len(v.sparseMap) == 0) { - t.Errorf("test %d, readGNUSparseMap1x0(...): got %v, want %v", i, sp, v.sparseMap) - } - if numBytes := len(v.input) - r.Len(); numBytes != v.cnt { - t.Errorf("test %d, bytes read: got %v, want %v", i, numBytes, v.cnt) - } - if err != v.err { - t.Errorf("test %d, unexpected error: got %v, want %v", i, err, v.err) - } + // Get an reader to read the sparse map + r := bytes.NewReader(buf) + + // Read the sparse map + sp, err := readGNUSparseMap1x0(r) + if err != nil { + t.Errorf("Unexpected error: %v", err) + } + if !reflect.DeepEqual(sp, expected) { + t.Errorf("Incorrect sparse map: got %v, wanted %v", sp, expected) } } @@ -839,286 +747,52 @@ func TestUninitializedRead(t *testing.T) { } -type reader struct{ io.Reader } -type readSeeker struct{ io.ReadSeeker } -type readBadSeeker struct{ io.ReadSeeker } - -func (rbs *readBadSeeker) Seek(int64, int) (int64, error) { return 0, fmt.Errorf("illegal seek") } - -// TestReadTruncation test the ending condition on various truncated files and -// that truncated files are still detected even if the underlying io.Reader -// satisfies io.Seeker. -func TestReadTruncation(t *testing.T) { - var ss []string - for _, p := range []string{ - "testdata/gnu.tar", - "testdata/ustar-file-reg.tar", - "testdata/pax-path-hdr.tar", - "testdata/sparse-formats.tar", - } { - buf, err := ioutil.ReadFile(p) - if err != nil { - t.Fatalf("unexpected error: %v", err) - } - ss = append(ss, string(buf)) - } - - data1, data2, pax, sparse := ss[0], ss[1], ss[2], ss[3] - data2 += strings.Repeat("\x00", 10*512) - trash := strings.Repeat("garbage ", 64) // Exactly 512 bytes - - var vectors = []struct { - input string // Input stream - cnt int // Expected number of headers read - err error // Expected error outcome - }{ - {"", 0, io.EOF}, // Empty file is a "valid" tar file - {data1[:511], 0, io.ErrUnexpectedEOF}, - {data1[:512], 1, io.ErrUnexpectedEOF}, - {data1[:1024], 1, io.EOF}, - {data1[:1536], 2, io.ErrUnexpectedEOF}, - {data1[:2048], 2, io.EOF}, - {data1, 2, io.EOF}, - {data1[:2048] + data2[:1536], 3, io.EOF}, - {data2[:511], 0, io.ErrUnexpectedEOF}, - {data2[:512], 1, io.ErrUnexpectedEOF}, - {data2[:1195], 1, io.ErrUnexpectedEOF}, - {data2[:1196], 1, io.EOF}, // Exact end of data and start of padding - {data2[:1200], 1, io.EOF}, - {data2[:1535], 1, io.EOF}, - {data2[:1536], 1, io.EOF}, // Exact end of padding - {data2[:1536] + trash[:1], 1, io.ErrUnexpectedEOF}, - {data2[:1536] + trash[:511], 1, io.ErrUnexpectedEOF}, - {data2[:1536] + trash, 1, ErrHeader}, - {data2[:2048], 1, io.EOF}, // Exactly 1 empty block - {data2[:2048] + trash[:1], 1, io.ErrUnexpectedEOF}, - {data2[:2048] + trash[:511], 1, io.ErrUnexpectedEOF}, - {data2[:2048] + trash, 1, ErrHeader}, - {data2[:2560], 1, io.EOF}, // Exactly 2 empty blocks (normal end-of-stream) - {data2[:2560] + trash[:1], 1, io.EOF}, - {data2[:2560] + trash[:511], 1, io.EOF}, - {data2[:2560] + trash, 1, io.EOF}, - {data2[:3072], 1, io.EOF}, - {pax, 0, io.EOF}, // PAX header without data is a "valid" tar file - {pax + trash[:1], 0, io.ErrUnexpectedEOF}, - {pax + trash[:511], 0, io.ErrUnexpectedEOF}, - {sparse[:511], 0, io.ErrUnexpectedEOF}, - // TODO(dsnet): This should pass, but currently fails. - // {sparse[:512], 0, io.ErrUnexpectedEOF}, - {sparse[:3584], 1, io.EOF}, - {sparse[:9200], 1, io.EOF}, // Terminate in padding of sparse header - {sparse[:9216], 1, io.EOF}, - {sparse[:9728], 2, io.ErrUnexpectedEOF}, - {sparse[:10240], 2, io.EOF}, - {sparse[:11264], 2, io.ErrUnexpectedEOF}, - {sparse, 5, io.EOF}, - {sparse + trash, 5, io.EOF}, - } - - for i, v := range vectors { - for j := 0; j < 6; j++ { - var tr *Reader - var s1, s2 string - - switch j { - case 0: - tr = NewReader(&reader{strings.NewReader(v.input)}) - s1, s2 = "io.Reader", "auto" - case 1: - tr = NewReader(&reader{strings.NewReader(v.input)}) - s1, s2 = "io.Reader", "manual" - case 2: - tr = NewReader(&readSeeker{strings.NewReader(v.input)}) - s1, s2 = "io.ReadSeeker", "auto" - case 3: - tr = NewReader(&readSeeker{strings.NewReader(v.input)}) - s1, s2 = "io.ReadSeeker", "manual" - case 4: - tr = NewReader(&readBadSeeker{strings.NewReader(v.input)}) - s1, s2 = "ReadBadSeeker", "auto" - case 5: - tr = NewReader(&readBadSeeker{strings.NewReader(v.input)}) - s1, s2 = "ReadBadSeeker", "manual" - } - - var cnt int - var err error - for { - if _, err = tr.Next(); err != nil { - break - } - cnt++ - if s2 == "manual" { - if _, err = io.Copy(ioutil.Discard, tr); err != nil { - break - } - } - } - if err != v.err { - t.Errorf("test %d, NewReader(%s(...)) with %s discard: got %v, want %v", - i, s1, s2, err, v.err) - } - if cnt != v.cnt { - t.Errorf("test %d, NewReader(%s(...)) with %s discard: got %d headers, want %d headers", - i, s1, s2, cnt, v.cnt) - } - } - } -} - -// TestReadHeaderOnly tests that Reader does not attempt to read special -// header-only files. -func TestReadHeaderOnly(t *testing.T) { - f, err := os.Open("testdata/hdr-only.tar") +// Negative header size should not cause panic. +// Issues 10959 and 10960. +func TestNegativeHdrSize(t *testing.T) { + f, err := os.Open("testdata/neg-size.tar") if err != nil { - t.Fatalf("unexpected error: %v", err) + t.Fatal(err) } defer f.Close() - - var hdrs []*Header - tr := NewReader(f) - for { - hdr, err := tr.Next() - if err == io.EOF { - break - } - if err != nil { - t.Errorf("Next(): got %v, want %v", err, nil) - continue - } - hdrs = append(hdrs, hdr) - - // If a special flag, we should read nothing. - cnt, _ := io.ReadFull(tr, []byte{0}) - if cnt > 0 && hdr.Typeflag != TypeReg { - t.Errorf("ReadFull(...): got %d bytes, want 0 bytes", cnt) - } + r := NewReader(f) + _, err = r.Next() + if err != ErrHeader { + t.Error("want ErrHeader, got", err) } + io.Copy(ioutil.Discard, r) +} - // File is crafted with 16 entries. The later 8 are identical to the first - // 8 except that the size is set. - if len(hdrs) != 16 { - t.Fatalf("len(hdrs): got %d, want %d", len(hdrs), 16) +// This used to hang in (*sparseFileReader).readHole due to missing +// verification of sparse offsets against file size. +func TestIssue10968(t *testing.T) { + f, err := os.Open("testdata/issue10968.tar") + if err != nil { + t.Fatal(err) } - for i := 0; i < 8; i++ { - var hdr1, hdr2 = hdrs[i+0], hdrs[i+8] - hdr1.Size, hdr2.Size = 0, 0 - if !reflect.DeepEqual(*hdr1, *hdr2) { - t.Errorf("incorrect header:\ngot %+v\nwant %+v", *hdr1, *hdr2) - } + defer f.Close() + r := NewReader(f) + _, err = r.Next() + if err != nil { + t.Fatal(err) + } + _, err = io.Copy(ioutil.Discard, r) + if err != io.ErrUnexpectedEOF { + t.Fatalf("expected %q, got %q", io.ErrUnexpectedEOF, err) } } -func TestParsePAXRecord(t *testing.T) { - var medName = strings.Repeat("CD", 50) - var longName = strings.Repeat("AB", 100) - - var vectors = []struct { - input string - residual string - outputKey string - outputVal string - ok bool - }{ - {"6 k=v\n\n", "\n", "k", "v", true}, - {"19 path=/etc/hosts\n", "", "path", "/etc/hosts", true}, - {"210 path=" + longName + "\nabc", "abc", "path", longName, true}, - {"110 path=" + medName + "\n", "", "path", medName, true}, - {"9 foo=ba\n", "", "foo", "ba", true}, - {"11 foo=bar\n\x00", "\x00", "foo", "bar", true}, - {"18 foo=b=\nar=\n==\x00\n", "", "foo", "b=\nar=\n==\x00", true}, - {"27 foo=hello9 foo=ba\nworld\n", "", "foo", "hello9 foo=ba\nworld", true}, - {"27 ☺☻☹=日a本b語ç\nmeow mix", "meow mix", "☺☻☹", "日a本b語ç", true}, - {"17 \x00hello=\x00world\n", "", "\x00hello", "\x00world", true}, - {"1 k=1\n", "1 k=1\n", "", "", false}, - {"6 k~1\n", "6 k~1\n", "", "", false}, - {"6_k=1\n", "6_k=1\n", "", "", false}, - {"6 k=1 ", "6 k=1 ", "", "", false}, - {"632 k=1\n", "632 k=1\n", "", "", false}, - {"16 longkeyname=hahaha\n", "16 longkeyname=hahaha\n", "", "", false}, - {"3 somelongkey=\n", "3 somelongkey=\n", "", "", false}, - {"50 tooshort=\n", "50 tooshort=\n", "", "", false}, +// Do not panic if there are errors in header blocks after the pax header. +// Issue 11169 +func TestIssue11169(t *testing.T) { + f, err := os.Open("testdata/issue11169.tar") + if err != nil { + t.Fatal(err) } - - for _, v := range vectors { - key, val, res, err := parsePAXRecord(v.input) - ok := (err == nil) - if v.ok != ok { - if v.ok { - t.Errorf("parsePAXRecord(%q): got parsing failure, want success", v.input) - } else { - t.Errorf("parsePAXRecord(%q): got parsing success, want failure", v.input) - } - } - if ok && (key != v.outputKey || val != v.outputVal) { - t.Errorf("parsePAXRecord(%q): got (%q: %q), want (%q: %q)", - v.input, key, val, v.outputKey, v.outputVal) - } - if res != v.residual { - t.Errorf("parsePAXRecord(%q): got residual %q, want residual %q", - v.input, res, v.residual) - } - } -} - -func TestParseNumeric(t *testing.T) { - var vectors = []struct { - input string - output int64 - ok bool - }{ - // Test base-256 (binary) encoded values. - {"", 0, true}, - {"\x80", 0, true}, - {"\x80\x00", 0, true}, - {"\x80\x00\x00", 0, true}, - {"\xbf", (1 << 6) - 1, true}, - {"\xbf\xff", (1 << 14) - 1, true}, - {"\xbf\xff\xff", (1 << 22) - 1, true}, - {"\xff", -1, true}, - {"\xff\xff", -1, true}, - {"\xff\xff\xff", -1, true}, - {"\xc0", -1 * (1 << 6), true}, - {"\xc0\x00", -1 * (1 << 14), true}, - {"\xc0\x00\x00", -1 * (1 << 22), true}, - {"\x87\x76\xa2\x22\xeb\x8a\x72\x61", 537795476381659745, true}, - {"\x80\x00\x00\x00\x07\x76\xa2\x22\xeb\x8a\x72\x61", 537795476381659745, true}, - {"\xf7\x76\xa2\x22\xeb\x8a\x72\x61", -615126028225187231, true}, - {"\xff\xff\xff\xff\xf7\x76\xa2\x22\xeb\x8a\x72\x61", -615126028225187231, true}, - {"\x80\x7f\xff\xff\xff\xff\xff\xff\xff", math.MaxInt64, true}, - {"\x80\x80\x00\x00\x00\x00\x00\x00\x00", 0, false}, - {"\xff\x80\x00\x00\x00\x00\x00\x00\x00", math.MinInt64, true}, - {"\xff\x7f\xff\xff\xff\xff\xff\xff\xff", 0, false}, - {"\xf5\xec\xd1\xc7\x7e\x5f\x26\x48\x81\x9f\x8f\x9b", 0, false}, - - // Test base-8 (octal) encoded values. - {"0000000\x00", 0, true}, - {" \x0000000\x00", 0, true}, - {" \x0000003\x00", 3, true}, - {"00000000227\x00", 0227, true}, - {"032033\x00 ", 032033, true}, - {"320330\x00 ", 0320330, true}, - {"0000660\x00 ", 0660, true}, - {"\x00 0000660\x00 ", 0660, true}, - {"0123456789abcdef", 0, false}, - {"0123456789\x00abcdef", 0, false}, - {"01234567\x0089abcdef", 342391, true}, - {"0123\x7e\x5f\x264123", 0, false}, - } - - for _, v := range vectors { - var p parser - num := p.parseNumeric([]byte(v.input)) - ok := (p.err == nil) - if v.ok != ok { - if v.ok { - t.Errorf("parseNumeric(%q): got parsing failure, want success", v.input) - } else { - t.Errorf("parseNumeric(%q): got parsing success, want failure", v.input) - } - } - if ok && num != v.output { - t.Errorf("parseNumeric(%q): got %d, want %d", v.input, num, v.output) - } + defer f.Close() + r := NewReader(f) + _, err = r.Next() + if err == nil { + t.Fatal("Unexpected success") } } diff --git a/archive/tar/tar_test.go b/archive/tar/tar_test.go index 9ef319a..d63c072 100644 --- a/archive/tar/tar_test.go +++ b/archive/tar/tar_test.go @@ -94,12 +94,13 @@ func TestRoundTrip(t *testing.T) { var b bytes.Buffer tw := NewWriter(&b) hdr := &Header{ - Name: "file.txt", - Uid: 1 << 21, // too big for 8 octal digits - Size: int64(len(data)), - // https://github.com/golang/go/commit/0e3355903d2ebcf5ee9e76096f51ac9a116a9dbb#diff-d7bf2a98d7b57b6ff754ca406f1b7581R105 - ModTime: time.Now().AddDate(0, 0, 0).Round(1 * time.Second), + Name: "file.txt", + Uid: 1 << 21, // too big for 8 octal digits + Size: int64(len(data)), + ModTime: time.Now(), } + // tar only supports second precision. + hdr.ModTime = hdr.ModTime.Add(-time.Duration(hdr.ModTime.Nanosecond()) * time.Nanosecond) if err := tw.WriteHeader(hdr); err != nil { t.Fatalf("tw.WriteHeader: %v", err) } diff --git a/archive/tar/testdata/gnu-multi-hdrs.tar b/archive/tar/testdata/gnu-multi-hdrs.tar deleted file mode 100644 index 8bcad55..0000000 Binary files a/archive/tar/testdata/gnu-multi-hdrs.tar and /dev/null differ diff --git a/archive/tar/testdata/hdr-only.tar b/archive/tar/testdata/hdr-only.tar deleted file mode 100644 index f250340..0000000 Binary files a/archive/tar/testdata/hdr-only.tar and /dev/null differ diff --git a/archive/tar/testdata/issue12435.tar b/archive/tar/testdata/issue12435.tar deleted file mode 100644 index 3542dd8..0000000 Binary files a/archive/tar/testdata/issue12435.tar and /dev/null differ diff --git a/archive/tar/testdata/neg-size.tar b/archive/tar/testdata/neg-size.tar index 21edf38..5deea3d 100644 Binary files a/archive/tar/testdata/neg-size.tar and b/archive/tar/testdata/neg-size.tar differ diff --git a/archive/tar/testdata/pax-multi-hdrs.tar b/archive/tar/testdata/pax-multi-hdrs.tar deleted file mode 100644 index 14bc759..0000000 Binary files a/archive/tar/testdata/pax-multi-hdrs.tar and /dev/null differ diff --git a/archive/tar/testdata/pax-path-hdr.tar b/archive/tar/testdata/pax-path-hdr.tar deleted file mode 100644 index ab8fc32..0000000 Binary files a/archive/tar/testdata/pax-path-hdr.tar and /dev/null differ diff --git a/archive/tar/testdata/ustar-file-reg.tar b/archive/tar/testdata/ustar-file-reg.tar deleted file mode 100644 index c84fa27..0000000 Binary files a/archive/tar/testdata/ustar-file-reg.tar and /dev/null differ diff --git a/archive/tar/writer.go b/archive/tar/writer.go index 0426381..9dbc01a 100644 --- a/archive/tar/writer.go +++ b/archive/tar/writer.go @@ -12,8 +12,8 @@ import ( "errors" "fmt" "io" + "os" "path" - "sort" "strconv" "strings" "time" @@ -23,6 +23,7 @@ var ( ErrWriteTooLong = errors.New("archive/tar: write too long") ErrFieldTooLong = errors.New("archive/tar: header field too long") ErrWriteAfterClose = errors.New("archive/tar: write after close") + errNameTooLong = errors.New("archive/tar: name too long") errInvalidHeader = errors.New("archive/tar: header field too long or contains invalid values") ) @@ -42,10 +43,6 @@ type Writer struct { paxHdrBuff [blockSize]byte // buffer to use in writeHeader when writing a pax header } -type formatter struct { - err error // Last error seen -} - // NewWriter creates a new Writer writing to w. func NewWriter(w io.Writer) *Writer { return &Writer{w: w} } @@ -72,9 +69,17 @@ func (tw *Writer) Flush() error { } // Write s into b, terminating it with a NUL if there is room. -func (f *formatter) formatString(b []byte, s string) { +// If the value is too long for the field and allowPax is true add a paxheader record instead +func (tw *Writer) cString(b []byte, s string, allowPax bool, paxKeyword string, paxHeaders map[string]string) { + needsPaxHeader := allowPax && len(s) > len(b) || !isASCII(s) + if needsPaxHeader { + paxHeaders[paxKeyword] = s + return + } if len(s) > len(b) { - f.err = ErrFieldTooLong + if tw.err == nil { + tw.err = ErrFieldTooLong + } return } ascii := toASCII(s) @@ -85,40 +90,40 @@ func (f *formatter) formatString(b []byte, s string) { } // Encode x as an octal ASCII string and write it into b with leading zeros. -func (f *formatter) formatOctal(b []byte, x int64) { +func (tw *Writer) octal(b []byte, x int64) { s := strconv.FormatInt(x, 8) // leading zeros, but leave room for a NUL. for len(s)+1 < len(b) { s = "0" + s } - f.formatString(b, s) + tw.cString(b, s, false, paxNone, nil) } -// fitsInBase256 reports whether x can be encoded into n bytes using base-256 -// encoding. Unlike octal encoding, base-256 encoding does not require that the -// string ends with a NUL character. Thus, all n bytes are available for output. -// -// If operating in binary mode, this assumes strict GNU binary mode; which means -// that the first byte can only be either 0x80 or 0xff. Thus, the first byte is -// equivalent to the sign bit in two's complement form. -func fitsInBase256(n int, x int64) bool { - var binBits = uint(n-1) * 8 - return n >= 9 || (x >= -1<= 0; i-- { - b[i] = byte(x) - x >>= 8 - } - b[0] |= 0x80 // Highest bit indicates binary format +// Write x into b, either as octal or as binary (GNUtar/star extension). +// If the value is too long for the field and writingPax is enabled both for the field and the add a paxheader record instead +func (tw *Writer) numeric(b []byte, x int64, allowPax bool, paxKeyword string, paxHeaders map[string]string) { + // Try octal first. + s := strconv.FormatInt(x, 8) + if len(s) < len(b) { + tw.octal(b, x) return } - f.formatOctal(b, 0) // Last resort, just write zero - f.err = ErrFieldTooLong + // If it is too long for octal, and pax is preferred, use a pax header + if allowPax && tw.preferPax { + tw.octal(b, 0) + s := strconv.FormatInt(x, 10) + paxHeaders[paxKeyword] = s + return + } + + // Too big: use binary (big-endian). + tw.usedBinary = true + for i := len(b) - 1; x > 0 && i >= 0; i-- { + b[i] = byte(x) + x >>= 8 + } + b[0] |= 0x80 // highest bit indicates binary format } var ( @@ -157,7 +162,6 @@ func (tw *Writer) writeHeader(hdr *Header, allowPax bool) error { // subsecond time resolution, but for now let's just capture // too long fields or non ascii characters - var f formatter var header []byte // We need to select which scratch buffer to use carefully, @@ -172,40 +176,10 @@ func (tw *Writer) writeHeader(hdr *Header, allowPax bool) error { copy(header, zeroBlock) s := slicer(header) - // Wrappers around formatter that automatically sets paxHeaders if the - // argument extends beyond the capacity of the input byte slice. - var formatString = func(b []byte, s string, paxKeyword string) { - needsPaxHeader := paxKeyword != paxNone && len(s) > len(b) || !isASCII(s) - if needsPaxHeader { - paxHeaders[paxKeyword] = s - return - } - f.formatString(b, s) - } - var formatNumeric = func(b []byte, x int64, paxKeyword string) { - // Try octal first. - s := strconv.FormatInt(x, 8) - if len(s) < len(b) { - f.formatOctal(b, x) - return - } - - // If it is too long for octal, and PAX is preferred, use a PAX header. - if paxKeyword != paxNone && tw.preferPax { - f.formatOctal(b, 0) - s := strconv.FormatInt(x, 10) - paxHeaders[paxKeyword] = s - return - } - - tw.usedBinary = true - f.formatNumeric(b, x) - } - // keep a reference to the filename to allow to overwrite it later if we detect that we can use ustar longnames instead of pax pathHeaderBytes := s.next(fileNameSize) - formatString(pathHeaderBytes, hdr.Name, paxPath) + tw.cString(pathHeaderBytes, hdr.Name, true, paxPath, paxHeaders) // Handle out of range ModTime carefully. var modTime int64 @@ -213,25 +187,25 @@ func (tw *Writer) writeHeader(hdr *Header, allowPax bool) error { modTime = hdr.ModTime.Unix() } - f.formatOctal(s.next(8), hdr.Mode) // 100:108 - formatNumeric(s.next(8), int64(hdr.Uid), paxUid) // 108:116 - formatNumeric(s.next(8), int64(hdr.Gid), paxGid) // 116:124 - formatNumeric(s.next(12), hdr.Size, paxSize) // 124:136 - formatNumeric(s.next(12), modTime, paxNone) // 136:148 --- consider using pax for finer granularity - s.next(8) // chksum (148:156) - s.next(1)[0] = hdr.Typeflag // 156:157 + tw.octal(s.next(8), hdr.Mode) // 100:108 + tw.numeric(s.next(8), int64(hdr.Uid), true, paxUid, paxHeaders) // 108:116 + tw.numeric(s.next(8), int64(hdr.Gid), true, paxGid, paxHeaders) // 116:124 + tw.numeric(s.next(12), hdr.Size, true, paxSize, paxHeaders) // 124:136 + tw.numeric(s.next(12), modTime, false, paxNone, nil) // 136:148 --- consider using pax for finer granularity + s.next(8) // chksum (148:156) + s.next(1)[0] = hdr.Typeflag // 156:157 - formatString(s.next(100), hdr.Linkname, paxLinkpath) + tw.cString(s.next(100), hdr.Linkname, true, paxLinkpath, paxHeaders) - copy(s.next(8), []byte("ustar\x0000")) // 257:265 - formatString(s.next(32), hdr.Uname, paxUname) // 265:297 - formatString(s.next(32), hdr.Gname, paxGname) // 297:329 - formatNumeric(s.next(8), hdr.Devmajor, paxNone) // 329:337 - formatNumeric(s.next(8), hdr.Devminor, paxNone) // 337:345 + copy(s.next(8), []byte("ustar\x0000")) // 257:265 + tw.cString(s.next(32), hdr.Uname, true, paxUname, paxHeaders) // 265:297 + tw.cString(s.next(32), hdr.Gname, true, paxGname, paxHeaders) // 297:329 + tw.numeric(s.next(8), hdr.Devmajor, false, paxNone, nil) // 329:337 + tw.numeric(s.next(8), hdr.Devminor, false, paxNone, nil) // 337:345 // keep a reference to the prefix to allow to overwrite it later if we detect that we can use ustar longnames instead of pax prefixHeaderBytes := s.next(155) - formatString(prefixHeaderBytes, "", paxNone) // 345:500 prefix + tw.cString(prefixHeaderBytes, "", false, paxNone, nil) // 345:500 prefix // Use the GNU magic instead of POSIX magic if we used any GNU extensions. if tw.usedBinary { @@ -241,26 +215,37 @@ func (tw *Writer) writeHeader(hdr *Header, allowPax bool) error { _, paxPathUsed := paxHeaders[paxPath] // try to use a ustar header when only the name is too long if !tw.preferPax && len(paxHeaders) == 1 && paxPathUsed { - prefix, suffix, ok := splitUSTARPath(hdr.Name) - if ok { - // Since we can encode in USTAR format, disable PAX header. - delete(paxHeaders, paxPath) + suffix := hdr.Name + prefix := "" + if len(hdr.Name) > fileNameSize && isASCII(hdr.Name) { + var err error + prefix, suffix, err = tw.splitUSTARLongName(hdr.Name) + if err == nil { + // ok we can use a ustar long name instead of pax, now correct the fields - // Update the path fields - formatString(pathHeaderBytes, suffix, paxNone) - formatString(prefixHeaderBytes, prefix, paxNone) + // remove the path field from the pax header. this will suppress the pax header + delete(paxHeaders, paxPath) + + // update the path fields + tw.cString(pathHeaderBytes, suffix, false, paxNone, nil) + tw.cString(prefixHeaderBytes, prefix, false, paxNone, nil) + + // Use the ustar magic if we used ustar long names. + if len(prefix) > 0 && !tw.usedBinary { + copy(header[257:265], []byte("ustar\x00")) + } + } } } // The chksum field is terminated by a NUL and a space. // This is different from the other octal fields. chksum, _ := checksum(header) - f.formatOctal(header[148:155], chksum) // Never fails + tw.octal(header[148:155], chksum) header[155] = ' ' - // Check if there were any formatting errors. - if f.err != nil { - tw.err = f.err + if tw.err != nil { + // problem with header; probably integer too big for a field. return tw.err } @@ -285,25 +270,28 @@ func (tw *Writer) writeHeader(hdr *Header, allowPax bool) error { return tw.err } -// splitUSTARPath splits a path according to USTAR prefix and suffix rules. -// If the path is not splittable, then it will return ("", "", false). -func splitUSTARPath(name string) (prefix, suffix string, ok bool) { +// writeUSTARLongName splits a USTAR long name hdr.Name. +// name must be < 256 characters. errNameTooLong is returned +// if hdr.Name can't be split. The splitting heuristic +// is compatible with gnu tar. +func (tw *Writer) splitUSTARLongName(name string) (prefix, suffix string, err error) { length := len(name) - if length <= fileNameSize || !isASCII(name) { - return "", "", false - } else if length > fileNamePrefixSize+1 { + if length > fileNamePrefixSize+1 { length = fileNamePrefixSize + 1 } else if name[length-1] == '/' { length-- } - i := strings.LastIndex(name[:length], "/") - nlen := len(name) - i - 1 // nlen is length of suffix - plen := i // plen is length of prefix + // nlen contains the resulting length in the name field. + // plen contains the resulting length in the prefix field. + nlen := len(name) - i - 1 + plen := i if i <= 0 || nlen > fileNameSize || nlen == 0 || plen > fileNamePrefixSize { - return "", "", false + err = errNameTooLong + return } - return name[:i], name[i+1:], true + prefix, suffix = name[:i], name[i+1:] + return } // writePaxHeader writes an extended pax header to the @@ -316,11 +304,11 @@ func (tw *Writer) writePAXHeader(hdr *Header, paxHeaders map[string]string) erro // succeed, and seems harmless enough. ext.ModTime = hdr.ModTime // The spec asks that we namespace our pseudo files - // with the current pid. However, this results in differing outputs - // for identical inputs. As such, the constant 0 is now used instead. - // golang.org/issue/12358 + // with the current pid. + pid := os.Getpid() dir, file := path.Split(hdr.Name) - fullName := path.Join(dir, "PaxHeaders.0", file) + fullName := path.Join(dir, + fmt.Sprintf("PaxHeaders.%d", pid), file) ascii := toASCII(fullName) if len(ascii) > 100 { @@ -330,15 +318,8 @@ func (tw *Writer) writePAXHeader(hdr *Header, paxHeaders map[string]string) erro // Construct the body var buf bytes.Buffer - // Keys are sorted before writing to body to allow deterministic output. - var keys []string - for k := range paxHeaders { - keys = append(keys, k) - } - sort.Strings(keys) - - for _, k := range keys { - fmt.Fprint(&buf, formatPAXRecord(k, paxHeaders[k])) + for k, v := range paxHeaders { + fmt.Fprint(&buf, paxHeader(k+"="+v)) } ext.Size = int64(len(buf.Bytes())) @@ -354,18 +335,17 @@ func (tw *Writer) writePAXHeader(hdr *Header, paxHeaders map[string]string) erro return nil } -// formatPAXRecord formats a single PAX record, prefixing it with the -// appropriate length. -func formatPAXRecord(k, v string) string { - const padding = 3 // Extra padding for ' ', '=', and '\n' - size := len(k) + len(v) + padding +// paxHeader formats a single pax record, prefixing it with the appropriate length +func paxHeader(msg string) string { + const padding = 2 // Extra padding for space and newline + size := len(msg) + padding size += len(strconv.Itoa(size)) - record := fmt.Sprintf("%d %s=%s\n", size, k, v) - - // Final adjustment if adding size field increased the record size. + record := fmt.Sprintf("%d %s\n", size, msg) if len(record) != size { + // Final adjustment if adding size increased + // the number of digits in size size = len(record) - record = fmt.Sprintf("%d %s=%s\n", size, k, v) + record = fmt.Sprintf("%d %s\n", size, msg) } return record } diff --git a/archive/tar/writer_test.go b/archive/tar/writer_test.go index 6e91d90..fe46a67 100644 --- a/archive/tar/writer_test.go +++ b/archive/tar/writer_test.go @@ -9,10 +9,8 @@ import ( "fmt" "io" "io/ioutil" - "math" "os" "reflect" - "sort" "strings" "testing" "testing/iotest" @@ -293,7 +291,7 @@ func TestPax(t *testing.T) { t.Fatal(err) } // Simple test to make sure PAX extensions are in effect - if !bytes.Contains(buf.Bytes(), []byte("PaxHeaders.0")) { + if !bytes.Contains(buf.Bytes(), []byte("PaxHeaders.")) { t.Fatal("Expected at least one PAX header to be written.") } // Test that we can get a long name back out of the archive. @@ -332,7 +330,7 @@ func TestPaxSymlink(t *testing.T) { t.Fatal(err) } // Simple test to make sure PAX extensions are in effect - if !bytes.Contains(buf.Bytes(), []byte("PaxHeaders.0")) { + if !bytes.Contains(buf.Bytes(), []byte("PaxHeaders.")) { t.Fatal("Expected at least one PAX header to be written.") } // Test that we can get a long name back out of the archive. @@ -382,7 +380,7 @@ func TestPaxNonAscii(t *testing.T) { t.Fatal(err) } // Simple test to make sure PAX extensions are in effect - if !bytes.Contains(buf.Bytes(), []byte("PaxHeaders.0")) { + if !bytes.Contains(buf.Bytes(), []byte("PaxHeaders.")) { t.Fatal("Expected at least one PAX header to be written.") } // Test that we can get a long name back out of the archive. @@ -441,49 +439,21 @@ func TestPaxXattrs(t *testing.T) { } } -func TestPaxHeadersSorted(t *testing.T) { - fileinfo, err := os.Stat("testdata/small.txt") - if err != nil { - t.Fatal(err) - } - hdr, err := FileInfoHeader(fileinfo, "") - if err != nil { - t.Fatalf("os.Stat: %v", err) - } - contents := strings.Repeat(" ", int(hdr.Size)) +func TestPAXHeader(t *testing.T) { + medName := strings.Repeat("CD", 50) + longName := strings.Repeat("AB", 100) + paxTests := [][2]string{ + {paxPath + "=/etc/hosts", "19 path=/etc/hosts\n"}, + {"a=b", "6 a=b\n"}, // Single digit length + {"a=names", "11 a=names\n"}, // Test case involving carries + {paxPath + "=" + longName, fmt.Sprintf("210 path=%s\n", longName)}, + {paxPath + "=" + medName, fmt.Sprintf("110 path=%s\n", medName)}} - hdr.Xattrs = map[string]string{ - "foo": "foo", - "bar": "bar", - "baz": "baz", - "qux": "qux", - } - - var buf bytes.Buffer - writer := NewWriter(&buf) - if err := writer.WriteHeader(hdr); err != nil { - t.Fatal(err) - } - if _, err = writer.Write([]byte(contents)); err != nil { - t.Fatal(err) - } - if err := writer.Close(); err != nil { - t.Fatal(err) - } - // Simple test to make sure PAX extensions are in effect - if !bytes.Contains(buf.Bytes(), []byte("PaxHeaders.0")) { - t.Fatal("Expected at least one PAX header to be written.") - } - - // xattr bar should always appear before others - indices := []int{ - bytes.Index(buf.Bytes(), []byte("bar=bar")), - bytes.Index(buf.Bytes(), []byte("baz=baz")), - bytes.Index(buf.Bytes(), []byte("foo=foo")), - bytes.Index(buf.Bytes(), []byte("qux=qux")), - } - if !sort.IntsAreSorted(indices) { - t.Fatal("PAX headers are not sorted") + for _, test := range paxTests { + key, expected := test[0], test[1] + if result := paxHeader(key); result != expected { + t.Fatalf("paxHeader: got %s, expected %s", result, expected) + } } } @@ -574,149 +544,3 @@ func TestWriteAfterClose(t *testing.T) { t.Fatalf("Write: got %v; want ErrWriteAfterClose", err) } } - -func TestSplitUSTARPath(t *testing.T) { - var sr = strings.Repeat - - var vectors = []struct { - input string // Input path - prefix string // Expected output prefix - suffix string // Expected output suffix - ok bool // Split success? - }{ - {"", "", "", false}, - {"abc", "", "", false}, - {"用戶名", "", "", false}, - {sr("a", fileNameSize), "", "", false}, - {sr("a", fileNameSize) + "/", "", "", false}, - {sr("a", fileNameSize) + "/a", sr("a", fileNameSize), "a", true}, - {sr("a", fileNamePrefixSize) + "/", "", "", false}, - {sr("a", fileNamePrefixSize) + "/a", sr("a", fileNamePrefixSize), "a", true}, - {sr("a", fileNameSize+1), "", "", false}, - {sr("/", fileNameSize+1), sr("/", fileNameSize-1), "/", true}, - {sr("a", fileNamePrefixSize) + "/" + sr("b", fileNameSize), - sr("a", fileNamePrefixSize), sr("b", fileNameSize), true}, - {sr("a", fileNamePrefixSize) + "//" + sr("b", fileNameSize), "", "", false}, - {sr("a/", fileNameSize), sr("a/", 77) + "a", sr("a/", 22), true}, - } - - for _, v := range vectors { - prefix, suffix, ok := splitUSTARPath(v.input) - if prefix != v.prefix || suffix != v.suffix || ok != v.ok { - t.Errorf("splitUSTARPath(%q):\ngot (%q, %q, %v)\nwant (%q, %q, %v)", - v.input, prefix, suffix, ok, v.prefix, v.suffix, v.ok) - } - } -} - -func TestFormatPAXRecord(t *testing.T) { - var medName = strings.Repeat("CD", 50) - var longName = strings.Repeat("AB", 100) - - var vectors = []struct { - inputKey string - inputVal string - output string - }{ - {"k", "v", "6 k=v\n"}, - {"path", "/etc/hosts", "19 path=/etc/hosts\n"}, - {"path", longName, "210 path=" + longName + "\n"}, - {"path", medName, "110 path=" + medName + "\n"}, - {"foo", "ba", "9 foo=ba\n"}, - {"foo", "bar", "11 foo=bar\n"}, - {"foo", "b=\nar=\n==\x00", "18 foo=b=\nar=\n==\x00\n"}, - {"foo", "hello9 foo=ba\nworld", "27 foo=hello9 foo=ba\nworld\n"}, - {"☺☻☹", "日a本b語ç", "27 ☺☻☹=日a本b語ç\n"}, - {"\x00hello", "\x00world", "17 \x00hello=\x00world\n"}, - } - - for _, v := range vectors { - output := formatPAXRecord(v.inputKey, v.inputVal) - if output != v.output { - t.Errorf("formatPAXRecord(%q, %q): got %q, want %q", - v.inputKey, v.inputVal, output, v.output) - } - } -} - -func TestFitsInBase256(t *testing.T) { - var vectors = []struct { - input int64 - width int - ok bool - }{ - {+1, 8, true}, - {0, 8, true}, - {-1, 8, true}, - {1 << 56, 8, false}, - {(1 << 56) - 1, 8, true}, - {-1 << 56, 8, true}, - {(-1 << 56) - 1, 8, false}, - {121654, 8, true}, - {-9849849, 8, true}, - {math.MaxInt64, 9, true}, - {0, 9, true}, - {math.MinInt64, 9, true}, - {math.MaxInt64, 12, true}, - {0, 12, true}, - {math.MinInt64, 12, true}, - } - - for _, v := range vectors { - ok := fitsInBase256(v.width, v.input) - if ok != v.ok { - t.Errorf("checkNumeric(%d, %d): got %v, want %v", v.input, v.width, ok, v.ok) - } - } -} - -func TestFormatNumeric(t *testing.T) { - var vectors = []struct { - input int64 - output string - ok bool - }{ - // Test base-256 (binary) encoded values. - {-1, "\xff", true}, - {-1, "\xff\xff", true}, - {-1, "\xff\xff\xff", true}, - {(1 << 0), "0", false}, - {(1 << 8) - 1, "\x80\xff", true}, - {(1 << 8), "0\x00", false}, - {(1 << 16) - 1, "\x80\xff\xff", true}, - {(1 << 16), "00\x00", false}, - {-1 * (1 << 0), "\xff", true}, - {-1*(1<<0) - 1, "0", false}, - {-1 * (1 << 8), "\xff\x00", true}, - {-1*(1<<8) - 1, "0\x00", false}, - {-1 * (1 << 16), "\xff\x00\x00", true}, - {-1*(1<<16) - 1, "00\x00", false}, - {537795476381659745, "0000000\x00", false}, - {537795476381659745, "\x80\x00\x00\x00\x07\x76\xa2\x22\xeb\x8a\x72\x61", true}, - {-615126028225187231, "0000000\x00", false}, - {-615126028225187231, "\xff\xff\xff\xff\xf7\x76\xa2\x22\xeb\x8a\x72\x61", true}, - {math.MaxInt64, "0000000\x00", false}, - {math.MaxInt64, "\x80\x00\x00\x00\x7f\xff\xff\xff\xff\xff\xff\xff", true}, - {math.MinInt64, "0000000\x00", false}, - {math.MinInt64, "\xff\xff\xff\xff\x80\x00\x00\x00\x00\x00\x00\x00", true}, - {math.MaxInt64, "\x80\x7f\xff\xff\xff\xff\xff\xff\xff", true}, - {math.MinInt64, "\xff\x80\x00\x00\x00\x00\x00\x00\x00", true}, - } - - for _, v := range vectors { - var f formatter - output := make([]byte, len(v.output)) - f.formatNumeric(output, v.input) - ok := (f.err == nil) - if ok != v.ok { - if v.ok { - t.Errorf("formatNumeric(%d): got formatting failure, want success", v.input) - } else { - t.Errorf("formatNumeric(%d): got formatting success, want failure", v.input) - } - } - if string(output) != v.output { - t.Errorf("formatNumeric(%d): got %q, want %q", v.input, output, v.output) - } - } -} diff --git a/cmd/tar-split/asm.go b/cmd/tar-split/asm.go index e188ce1..312e54b 100644 --- a/cmd/tar-split/asm.go +++ b/cmd/tar-split/asm.go @@ -6,7 +6,7 @@ import ( "os" "github.com/Sirupsen/logrus" - "github.com/urfave/cli" + "github.com/codegangsta/cli" "github.com/vbatts/tar-split/tar/asm" "github.com/vbatts/tar-split/tar/storage" ) diff --git a/cmd/tar-split/checksize.go b/cmd/tar-split/checksize.go index 1e5eed7..38f830e 100644 --- a/cmd/tar-split/checksize.go +++ b/cmd/tar-split/checksize.go @@ -10,7 +10,7 @@ import ( "os" "github.com/Sirupsen/logrus" - "github.com/urfave/cli" + "github.com/codegangsta/cli" "github.com/vbatts/tar-split/tar/asm" "github.com/vbatts/tar-split/tar/storage" ) diff --git a/cmd/tar-split/disasm.go b/cmd/tar-split/disasm.go index 5472894..b7b0dfe 100644 --- a/cmd/tar-split/disasm.go +++ b/cmd/tar-split/disasm.go @@ -3,11 +3,10 @@ package main import ( "compress/gzip" "io" - "io/ioutil" "os" "github.com/Sirupsen/logrus" - "github.com/urfave/cli" + "github.com/codegangsta/cli" "github.com/vbatts/tar-split/tar/asm" "github.com/vbatts/tar-split/tar/storage" ) @@ -49,13 +48,7 @@ func CommandDisasm(c *cli.Context) { if err != nil { logrus.Fatal(err) } - var out io.Writer - if c.Bool("no-stdout") { - out = ioutil.Discard - } else { - out = os.Stdout - } - i, err := io.Copy(out, its) + i, err := io.Copy(os.Stdout, its) if err != nil { logrus.Fatal(err) } diff --git a/cmd/tar-split/main.go b/cmd/tar-split/main.go index 8b4035f..b417120 100644 --- a/cmd/tar-split/main.go +++ b/cmd/tar-split/main.go @@ -4,7 +4,7 @@ import ( "os" "github.com/Sirupsen/logrus" - "github.com/urfave/cli" + "github.com/codegangsta/cli" "github.com/vbatts/tar-split/version" ) @@ -42,10 +42,6 @@ func main() { Value: "tar-data.json.gz", Usage: "output of disassembled tar stream", }, - cli.BoolFlag{ - Name: "no-stdout", - Usage: "do not throughput the stream to STDOUT", - }, }, }, { diff --git a/tar/asm/assemble.go b/tar/asm/assemble.go index d624450..83d6426 100644 --- a/tar/asm/assemble.go +++ b/tar/asm/assemble.go @@ -3,10 +3,8 @@ package asm import ( "bytes" "fmt" - "hash" "hash/crc64" "io" - "sync" "github.com/vbatts/tar-split/tar/storage" ) @@ -25,106 +23,45 @@ func NewOutputTarStream(fg storage.FileGetter, up storage.Unpacker) io.ReadClose } pr, pw := io.Pipe() go func() { - err := WriteOutputTarStream(fg, up, pw) - if err != nil { - pw.CloseWithError(err) - } else { - pw.Close() + for { + entry, err := up.Next() + if err != nil { + pw.CloseWithError(err) + return + } + switch entry.Type { + case storage.SegmentType: + if _, err := pw.Write(entry.Payload); err != nil { + pw.CloseWithError(err) + return + } + case storage.FileType: + if entry.Size == 0 { + continue + } + fh, err := fg.Get(entry.GetName()) + if err != nil { + pw.CloseWithError(err) + return + } + c := crc64.New(storage.CRCTable) + tRdr := io.TeeReader(fh, c) + if _, err := io.Copy(pw, tRdr); err != nil { + fh.Close() + pw.CloseWithError(err) + return + } + if !bytes.Equal(c.Sum(nil), entry.Payload) { + // I would rather this be a comparable ErrInvalidChecksum or such, + // but since it's coming through the PipeReader, the context of + // _which_ file would be lost... + fh.Close() + pw.CloseWithError(fmt.Errorf("file integrity checksum failed for %q", entry.GetName())) + return + } + fh.Close() + } } }() return pr } - -// WriteOutputTarStream writes assembled tar archive to a writer. -func WriteOutputTarStream(fg storage.FileGetter, up storage.Unpacker, w io.Writer) error { - // ... Since these are interfaces, this is possible, so let's not have a nil pointer - if fg == nil || up == nil { - return nil - } - var copyBuffer []byte - var crcHash hash.Hash - var crcSum []byte - var multiWriter io.Writer - for { - entry, err := up.Next() - if err != nil { - if err == io.EOF { - return nil - } - return err - } - switch entry.Type { - case storage.SegmentType: - if _, err := w.Write(entry.Payload); err != nil { - return err - } - case storage.FileType: - if entry.Size == 0 { - continue - } - fh, err := fg.Get(entry.GetName()) - if err != nil { - return err - } - if crcHash == nil { - crcHash = crc64.New(storage.CRCTable) - crcSum = make([]byte, 8) - multiWriter = io.MultiWriter(w, crcHash) - copyBuffer = byteBufferPool.Get().([]byte) - defer byteBufferPool.Put(copyBuffer) - } else { - crcHash.Reset() - } - - if _, err := copyWithBuffer(multiWriter, fh, copyBuffer); err != nil { - fh.Close() - return err - } - - if !bytes.Equal(crcHash.Sum(crcSum[:0]), entry.Payload) { - // I would rather this be a comparable ErrInvalidChecksum or such, - // but since it's coming through the PipeReader, the context of - // _which_ file would be lost... - fh.Close() - return fmt.Errorf("file integrity checksum failed for %q", entry.GetName()) - } - fh.Close() - } - } -} - -var byteBufferPool = &sync.Pool{ - New: func() interface{} { - return make([]byte, 32*1024) - }, -} - -// copyWithBuffer is taken from stdlib io.Copy implementation -// https://github.com/golang/go/blob/go1.5.1/src/io/io.go#L367 -func copyWithBuffer(dst io.Writer, src io.Reader, buf []byte) (written int64, err error) { - for { - nr, er := src.Read(buf) - if nr > 0 { - nw, ew := dst.Write(buf[0:nr]) - if nw > 0 { - written += int64(nw) - } - if ew != nil { - err = ew - break - } - if nr != nw { - err = io.ErrShortWrite - break - } - } - if er == io.EOF { - break - } - if er != nil { - err = er - break - } - } - return written, err -} diff --git a/tar/asm/assemble_test.go b/tar/asm/assemble_test.go index afdce9d..3d0c99c 100644 --- a/tar/asm/assemble_test.go +++ b/tar/asm/assemble_test.go @@ -130,20 +130,17 @@ func TestTarStreamMangledGetterPutter(t *testing.T) { } } -var testCases = []struct { - path string - expectedSHA1Sum string - expectedSize int64 -}{ - {"./testdata/t.tar.gz", "1eb237ff69bca6e22789ecb05b45d35ca307adbd", 10240}, - {"./testdata/longlink.tar.gz", "d9f6babe107b7247953dff6b5b5ae31a3a880add", 20480}, - {"./testdata/fatlonglink.tar.gz", "8537f03f89aeef537382f8b0bb065d93e03b0be8", 26234880}, - {"./testdata/iso-8859.tar.gz", "ddafa51cb03c74ec117ab366ee2240d13bba1ec3", 10240}, - {"./testdata/extranils.tar.gz", "e187b4b3e739deaccc257342f4940f34403dc588", 10648}, - {"./testdata/notenoughnils.tar.gz", "72f93f41efd95290baa5c174c234f5d4c22ce601", 512}, -} - func TestTarStream(t *testing.T) { + testCases := []struct { + path string + expectedSHA1Sum string + expectedSize int64 + }{ + {"./testdata/t.tar.gz", "1eb237ff69bca6e22789ecb05b45d35ca307adbd", 10240}, + {"./testdata/longlink.tar.gz", "d9f6babe107b7247953dff6b5b5ae31a3a880add", 20480}, + {"./testdata/fatlonglink.tar.gz", "8537f03f89aeef537382f8b0bb065d93e03b0be8", 26234880}, + {"./testdata/iso-8859.tar.gz", "ddafa51cb03c74ec117ab366ee2240d13bba1ec3", 10240}, + } for _, tc := range testCases { fh, err := os.Open(tc.path) @@ -170,7 +167,10 @@ func TestTarStream(t *testing.T) { // get a sum of the stream after it has passed through to ensure it's the same. h0 := sha1.New() - i, err := io.Copy(h0, tarStream) + tRdr0 := io.TeeReader(tarStream, h0) + + // read it all to the bit bucket + i, err := io.Copy(ioutil.Discard, tRdr0) if err != nil { t.Fatal(err) } @@ -205,52 +205,3 @@ func TestTarStream(t *testing.T) { } } } - -func BenchmarkAsm(b *testing.B) { - for i := 0; i < b.N; i++ { - for _, tc := range testCases { - func() { - fh, err := os.Open(tc.path) - if err != nil { - b.Fatal(err) - } - defer fh.Close() - gzRdr, err := gzip.NewReader(fh) - if err != nil { - b.Fatal(err) - } - defer gzRdr.Close() - - // Setup where we'll store the metadata - w := bytes.NewBuffer([]byte{}) - sp := storage.NewJSONPacker(w) - fgp := storage.NewBufferFileGetPutter() - - // wrap the disassembly stream - tarStream, err := NewInputTarStream(gzRdr, sp, fgp) - if err != nil { - b.Fatal(err) - } - // read it all to the bit bucket - i1, err := io.Copy(ioutil.Discard, tarStream) - if err != nil { - b.Fatal(err) - } - - r := bytes.NewBuffer(w.Bytes()) - sup := storage.NewJSONUnpacker(r) - // and reuse the fgp that we Put the payloads to. - - rc := NewOutputTarStream(fgp, sup) - - i2, err := io.Copy(ioutil.Discard, rc) - if err != nil { - b.Fatal(err) - } - if i1 != i2 { - b.Errorf("%s: input(%d) and ouput(%d) byte count didn't match", tc.path, i1, i2) - } - }() - } - } -} diff --git a/tar/asm/testdata/extranils.tar.gz b/tar/asm/testdata/extranils.tar.gz deleted file mode 100644 index 70caf6e..0000000 Binary files a/tar/asm/testdata/extranils.tar.gz and /dev/null differ diff --git a/tar/asm/testdata/notenoughnils.tar.gz b/tar/asm/testdata/notenoughnils.tar.gz deleted file mode 100644 index 146bb00..0000000 Binary files a/tar/asm/testdata/notenoughnils.tar.gz and /dev/null differ diff --git a/tar/storage/packer.go b/tar/storage/packer.go index aba6948..0c9d99b 100644 --- a/tar/storage/packer.go +++ b/tar/storage/packer.go @@ -1,6 +1,7 @@ package storage import ( + "bufio" "encoding/json" "errors" "io" @@ -32,15 +33,31 @@ type PackUnpacker interface { */ type jsonUnpacker struct { - seen seenNames - dec *json.Decoder + r io.Reader + b *bufio.Reader + isEOF bool + seen seenNames } func (jup *jsonUnpacker) Next() (*Entry, error) { var e Entry - err := jup.dec.Decode(&e) - if err != nil { + if jup.isEOF { + // since ReadBytes() will return read bytes AND an EOF, we handle it this + // round-a-bout way so we can Unmarshal the tail with relevant errors, but + // still get an io.EOF when the stream is ended. + return nil, io.EOF + } + line, err := jup.b.ReadBytes('\n') + if err != nil && err != io.EOF { return nil, err + } else if err == io.EOF { + jup.isEOF = true + } + + err = json.Unmarshal(line, &e) + if err != nil && jup.isEOF { + // if the remainder actually _wasn't_ a remaining json structure, then just EOF + return nil, io.EOF } // check for dup name @@ -61,7 +78,8 @@ func (jup *jsonUnpacker) Next() (*Entry, error) { // Each Entry read are expected to be delimited by new line. func NewJSONUnpacker(r io.Reader) Unpacker { return &jsonUnpacker{ - dec: json.NewDecoder(r), + r: r, + b: bufio.NewReader(r), seen: seenNames{}, } } diff --git a/tar/storage/packer_test.go b/tar/storage/packer_test.go index 7d93371..1c6101f 100644 --- a/tar/storage/packer_test.go +++ b/tar/storage/packer_test.go @@ -4,8 +4,6 @@ import ( "bytes" "compress/gzip" "io" - "io/ioutil" - "os" "testing" ) @@ -161,58 +159,5 @@ func TestGzip(t *testing.T) { if len(entries) != len(e) { t.Errorf("expected %d entries, got %d", len(e), len(entries)) } -} - -func BenchmarkGetPut(b *testing.B) { - e := []Entry{ - Entry{ - Type: SegmentType, - Payload: []byte("how"), - }, - Entry{ - Type: SegmentType, - Payload: []byte("y'all"), - }, - Entry{ - Type: FileType, - Name: "./hurr.txt", - Payload: []byte("deadbeef"), - }, - Entry{ - Type: SegmentType, - Payload: []byte("doin"), - }, - } - b.RunParallel(func(pb *testing.PB) { - for pb.Next() { - func() { - fh, err := ioutil.TempFile("", "tar-split.") - if err != nil { - b.Fatal(err) - } - defer os.Remove(fh.Name()) - defer fh.Close() - - jp := NewJSONPacker(fh) - for i := range e { - if _, err := jp.AddEntry(e[i]); err != nil { - b.Fatal(err) - } - } - fh.Sync() - - up := NewJSONUnpacker(fh) - for { - _, err := up.Next() - if err != nil { - if err == io.EOF { - break - } - b.Fatal(err) - } - } - - }() - } - }) + } diff --git a/tar_benchmark_test.go b/tar_benchmark_test.go deleted file mode 100644 index d946f2a..0000000 --- a/tar_benchmark_test.go +++ /dev/null @@ -1,84 +0,0 @@ -package tartest - -import ( - "io" - "io/ioutil" - "os" - "testing" - - upTar "archive/tar" - - ourTar "github.com/vbatts/tar-split/archive/tar" -) - -var testfile = "./archive/tar/testdata/sparse-formats.tar" - -func BenchmarkUpstreamTar(b *testing.B) { - for n := 0; n < b.N; n++ { - fh, err := os.Open(testfile) - if err != nil { - b.Fatal(err) - } - tr := upTar.NewReader(fh) - for { - _, err := tr.Next() - if err != nil { - if err == io.EOF { - break - } - fh.Close() - b.Fatal(err) - } - io.Copy(ioutil.Discard, tr) - } - fh.Close() - } -} - -func BenchmarkOurTarNoAccounting(b *testing.B) { - for n := 0; n < b.N; n++ { - fh, err := os.Open(testfile) - if err != nil { - b.Fatal(err) - } - tr := ourTar.NewReader(fh) - tr.RawAccounting = false // this is default, but explicit here - for { - _, err := tr.Next() - if err != nil { - if err == io.EOF { - break - } - fh.Close() - b.Fatal(err) - } - io.Copy(ioutil.Discard, tr) - } - fh.Close() - } -} -func BenchmarkOurTarYesAccounting(b *testing.B) { - for n := 0; n < b.N; n++ { - fh, err := os.Open(testfile) - if err != nil { - b.Fatal(err) - } - tr := ourTar.NewReader(fh) - tr.RawAccounting = true // This enables mechanics for collecting raw bytes - for { - _ = tr.RawBytes() - _, err := tr.Next() - _ = tr.RawBytes() - if err != nil { - if err == io.EOF { - break - } - fh.Close() - b.Fatal(err) - } - io.Copy(ioutil.Discard, tr) - _ = tr.RawBytes() - } - fh.Close() - } -} diff --git a/version/version.go b/version/version.go index f317010..0b86fbf 100644 --- a/version/version.go +++ b/version/version.go @@ -1,7 +1,7 @@ package version - // AUTO-GENEREATED. DO NOT EDIT -// 2016-09-26 19:53:30.825879 -0400 EDT +// 2015-08-14 09:56:50.742727493 -0400 EDT // VERSION is the generated version from /home/vbatts/src/vb/tar-split/version -var VERSION = "v0.10.1-4-gf280282" +var VERSION = "v0.9.6-1-gc76e420" + \ No newline at end of file