From 440ba9e519d0481f35a916c60be51d3f58f1a6a1 Mon Sep 17 00:00:00 2001
From: Joe Tsai <joetsai@digital-static.net>
Date: Thu, 17 Sep 2015 16:07:38 -0700
Subject: [PATCH 01/17] archive/tar: remove dead code with USTAR path splitting

Convert splitUSTARPath to return a bool rather than an error since
the caller never ever uses the error other than to check if it is
nil. Thus, we can remove errNameTooLong as well.

Also, fold the checking of the length <= fileNameSize and whether
the string is ASCII into the split function itself.

Lastly, remove logic to set the MAGIC since that's already done on
L200. Thus, setting the magic is redundant.

There is no overall logic change.

Updates #12638

Change-Id: I26b6992578199abad723c2a2af7f4fc078af9c17
Reviewed-on: https://go-review.googlesource.com/14723
Reviewed-by: David Symonds <dsymonds@golang.org>
Run-TryBot: David Symonds <dsymonds@golang.org>
---
 archive/tar/writer.go      | 52 +++++++++++++-------------------------
 archive/tar/writer_test.go | 34 +++++++++++++++++++++++++
 2 files changed, 52 insertions(+), 34 deletions(-)

diff --git a/archive/tar/writer.go b/archive/tar/writer.go
index 9dbc01a..3547c17 100644
--- a/archive/tar/writer.go
+++ b/archive/tar/writer.go
@@ -23,7 +23,6 @@ var (
 	ErrWriteTooLong    = errors.New("archive/tar: write too long")
 	ErrFieldTooLong    = errors.New("archive/tar: header field too long")
 	ErrWriteAfterClose = errors.New("archive/tar: write after close")
-	errNameTooLong     = errors.New("archive/tar: name too long")
 	errInvalidHeader   = errors.New("archive/tar: header field too long or contains invalid values")
 )
 
@@ -215,26 +214,14 @@ func (tw *Writer) writeHeader(hdr *Header, allowPax bool) error {
 	_, paxPathUsed := paxHeaders[paxPath]
 	// try to use a ustar header when only the name is too long
 	if !tw.preferPax && len(paxHeaders) == 1 && paxPathUsed {
-		suffix := hdr.Name
-		prefix := ""
-		if len(hdr.Name) > fileNameSize && isASCII(hdr.Name) {
-			var err error
-			prefix, suffix, err = tw.splitUSTARLongName(hdr.Name)
-			if err == nil {
-				// ok we can use a ustar long name instead of pax, now correct the fields
+		prefix, suffix, ok := splitUSTARPath(hdr.Name)
+		if ok {
+			// Since we can encode in USTAR format, disable PAX header.
+			delete(paxHeaders, paxPath)
 
-				// remove the path field from the pax header. this will suppress the pax header
-				delete(paxHeaders, paxPath)
-
-				// update the path fields
-				tw.cString(pathHeaderBytes, suffix, false, paxNone, nil)
-				tw.cString(prefixHeaderBytes, prefix, false, paxNone, nil)
-
-				// Use the ustar magic if we used ustar long names.
-				if len(prefix) > 0 && !tw.usedBinary {
-					copy(header[257:265], []byte("ustar\x00"))
-				}
-			}
+			// Update the path fields
+			tw.cString(pathHeaderBytes, suffix, false, paxNone, nil)
+			tw.cString(prefixHeaderBytes, prefix, false, paxNone, nil)
 		}
 	}
 
@@ -270,28 +257,25 @@ func (tw *Writer) writeHeader(hdr *Header, allowPax bool) error {
 	return tw.err
 }
 
-// writeUSTARLongName splits a USTAR long name hdr.Name.
-// name must be < 256 characters. errNameTooLong is returned
-// if hdr.Name can't be split. The splitting heuristic
-// is compatible with gnu tar.
-func (tw *Writer) splitUSTARLongName(name string) (prefix, suffix string, err error) {
+// splitUSTARPath splits a path according to USTAR prefix and suffix rules.
+// If the path is not splittable, then it will return ("", "", false).
+func splitUSTARPath(name string) (prefix, suffix string, ok bool) {
 	length := len(name)
-	if length > fileNamePrefixSize+1 {
+	if length <= fileNameSize || !isASCII(name) {
+		return "", "", false
+	} else if length > fileNamePrefixSize+1 {
 		length = fileNamePrefixSize + 1
 	} else if name[length-1] == '/' {
 		length--
 	}
+
 	i := strings.LastIndex(name[:length], "/")
-	// nlen contains the resulting length in the name field.
-	// plen contains the resulting length in the prefix field.
-	nlen := len(name) - i - 1
-	plen := i
+	nlen := len(name) - i - 1 // nlen is length of suffix
+	plen := i                 // plen is length of prefix
 	if i <= 0 || nlen > fileNameSize || nlen == 0 || plen > fileNamePrefixSize {
-		err = errNameTooLong
-		return
+		return "", "", false
 	}
-	prefix, suffix = name[:i], name[i+1:]
-	return
+	return name[:i], name[i+1:], true
 }
 
 // writePaxHeader writes an extended pax header to the
diff --git a/archive/tar/writer_test.go b/archive/tar/writer_test.go
index fe46a67..caf40a8 100644
--- a/archive/tar/writer_test.go
+++ b/archive/tar/writer_test.go
@@ -544,3 +544,37 @@ func TestWriteAfterClose(t *testing.T) {
 		t.Fatalf("Write: got %v; want ErrWriteAfterClose", err)
 	}
 }
+
+func TestSplitUSTARPath(t *testing.T) {
+	var sr = strings.Repeat
+
+	var vectors = []struct {
+		input  string // Input path
+		prefix string // Expected output prefix
+		suffix string // Expected output suffix
+		ok     bool   // Split success?
+	}{
+		{"", "", "", false},
+		{"abc", "", "", false},
+		{"用戶名", "", "", false},
+		{sr("a", fileNameSize), "", "", false},
+		{sr("a", fileNameSize) + "/", "", "", false},
+		{sr("a", fileNameSize) + "/a", sr("a", fileNameSize), "a", true},
+		{sr("a", fileNamePrefixSize) + "/", "", "", false},
+		{sr("a", fileNamePrefixSize) + "/a", sr("a", fileNamePrefixSize), "a", true},
+		{sr("a", fileNameSize+1), "", "", false},
+		{sr("/", fileNameSize+1), sr("/", fileNameSize-1), "/", true},
+		{sr("a", fileNamePrefixSize) + "/" + sr("b", fileNameSize),
+			sr("a", fileNamePrefixSize), sr("b", fileNameSize), true},
+		{sr("a", fileNamePrefixSize) + "//" + sr("b", fileNameSize), "", "", false},
+		{sr("a/", fileNameSize), sr("a/", 77) + "a", sr("a/", 22), true},
+	}
+
+	for _, v := range vectors {
+		prefix, suffix, ok := splitUSTARPath(v.input)
+		if prefix != v.prefix || suffix != v.suffix || ok != v.ok {
+			t.Errorf("splitUSTARPath(%q):\ngot  (%q, %q, %v)\nwant (%q, %q, %v)",
+				v.input, prefix, suffix, ok, v.prefix, v.suffix, v.ok)
+		}
+	}
+}

From af15385a0daa2a76ac99546a89e1dc38ec289b8f Mon Sep 17 00:00:00 2001
From: Joe Tsai <joetsai@digital-static.net>
Date: Mon, 28 Sep 2015 16:38:16 -0700
Subject: [PATCH 02/17] archive/tar: fix bugs with sparseFileReader

The sparseFileReader is prone to two different forms of
denial-of-service attacks:
* A malicious tar file can cause an infinite loop
* A malicious tar file can cause arbitrary panics

This results because of poor error checking/handling, which this
CL fixes. While we are at it, add a plethora of unit tests to
test for possible malicious inputs.

Change-Id: I2f9446539d189f3c1738a1608b0ad4859c1be929
Reviewed-on: https://go-review.googlesource.com/15115
Reviewed-by: Andrew Gerrand <adg@golang.org>
Run-TryBot: Andrew Gerrand <adg@golang.org>
TryBot-Result: Gobot Gobot <gobot@golang.org>
---
 archive/tar/reader.go      | 149 +++++++++++++++--------
 archive/tar/reader_test.go | 236 ++++++++++++++++++++++++-------------
 2 files changed, 258 insertions(+), 127 deletions(-)

diff --git a/archive/tar/reader.go b/archive/tar/reader.go
index 4168ea2..1f57508 100644
--- a/archive/tar/reader.go
+++ b/archive/tar/reader.go
@@ -12,6 +12,7 @@ import (
 	"errors"
 	"io"
 	"io/ioutil"
+	"math"
 	"os"
 	"strconv"
 	"strings"
@@ -70,12 +71,36 @@ type regFileReader struct {
 	nb int64     // number of unread bytes for current file entry
 }
 
-// A sparseFileReader is a numBytesReader for reading sparse file data from a tar archive.
+// A sparseFileReader is a numBytesReader for reading sparse file data from a
+// tar archive.
 type sparseFileReader struct {
-	rfr *regFileReader // reads the sparse-encoded file data
-	sp  []sparseEntry  // the sparse map for the file
-	pos int64          // keeps track of file position
-	tot int64          // total size of the file
+	rfr   numBytesReader // Reads the sparse-encoded file data
+	sp    []sparseEntry  // The sparse map for the file
+	pos   int64          // Keeps track of file position
+	total int64          // Total size of the file
+}
+
+// A sparseEntry holds a single entry in a sparse file's sparse map.
+//
+// Sparse files are represented using a series of sparseEntrys.
+// Despite the name, a sparseEntry represents an actual data fragment that
+// references data found in the underlying archive stream. All regions not
+// covered by a sparseEntry are logically filled with zeros.
+//
+// For example, if the underlying raw file contains the 10-byte data:
+//	var compactData = "abcdefgh"
+//
+// And the sparse map has the following entries:
+//	var sp = []sparseEntry{
+//		{offset: 2,  numBytes: 5} // Data fragment for [2..7]
+//		{offset: 18, numBytes: 3} // Data fragment for [18..21]
+//	}
+//
+// Then the content of the resulting sparse file with a "real" size of 25 is:
+//	var sparseData = "\x00"*2 + "abcde" + "\x00"*11 + "fgh" + "\x00"*4
+type sparseEntry struct {
+	offset   int64 // Starting position of the fragment
+	numBytes int64 // Length of the fragment
 }
 
 // Keywords for GNU sparse files in a PAX extended header
@@ -156,7 +181,10 @@ func (tr *Reader) Next() (*Header, error) {
 		if sp != nil {
 			// Current file is a PAX format GNU sparse file.
 			// Set the current file reader to a sparse file reader.
-			tr.curr = &sparseFileReader{rfr: tr.curr.(*regFileReader), sp: sp, tot: hdr.Size}
+			tr.curr, tr.err = newSparseFileReader(tr.curr, sp, hdr.Size)
+			if tr.err != nil {
+				return nil, tr.err
+			}
 		}
 		return hdr, nil
 	case TypeGNULongName:
@@ -631,21 +659,17 @@ func (tr *Reader) readHeader() *Header {
 		if tr.err != nil {
 			return nil
 		}
+
 		// Current file is a GNU sparse file. Update the current file reader.
-		tr.curr = &sparseFileReader{rfr: tr.curr.(*regFileReader), sp: sp, tot: hdr.Size}
+		tr.curr, tr.err = newSparseFileReader(tr.curr, sp, hdr.Size)
+		if tr.err != nil {
+			return nil
+		}
 	}
 
 	return hdr
 }
 
-// A sparseEntry holds a single entry in a sparse file's sparse map.
-// A sparse entry indicates the offset and size in a sparse file of a
-// block of data.
-type sparseEntry struct {
-	offset   int64
-	numBytes int64
-}
-
 // readOldGNUSparseMap reads the sparse map as stored in the old GNU sparse format.
 // The sparse map is stored in the tar header if it's small enough. If it's larger than four entries,
 // then one or more extension headers are used to store the rest of the sparse map.
@@ -879,9 +903,33 @@ func (rfr *regFileReader) numBytes() int64 {
 	return rfr.nb
 }
 
-// readHole reads a sparse file hole ending at offset toOffset
-func (sfr *sparseFileReader) readHole(b []byte, toOffset int64) int {
-	n64 := toOffset - sfr.pos
+// newSparseFileReader creates a new sparseFileReader, but validates all of the
+// sparse entries before doing so.
+func newSparseFileReader(rfr numBytesReader, sp []sparseEntry, total int64) (*sparseFileReader, error) {
+	if total < 0 {
+		return nil, ErrHeader // Total size cannot be negative
+	}
+
+	// Validate all sparse entries. These are the same checks as performed by
+	// the BSD tar utility.
+	for i, s := range sp {
+		switch {
+		case s.offset < 0 || s.numBytes < 0:
+			return nil, ErrHeader // Negative values are never okay
+		case s.offset > math.MaxInt64-s.numBytes:
+			return nil, ErrHeader // Integer overflow with large length
+		case s.offset+s.numBytes > total:
+			return nil, ErrHeader // Region extends beyond the "real" size
+		case i > 0 && sp[i-1].offset+sp[i-1].numBytes > s.offset:
+			return nil, ErrHeader // Regions can't overlap and must be in order
+		}
+	}
+	return &sparseFileReader{rfr: rfr, sp: sp, total: total}, nil
+}
+
+// readHole reads a sparse hole ending at endOffset.
+func (sfr *sparseFileReader) readHole(b []byte, endOffset int64) int {
+	n64 := endOffset - sfr.pos
 	if n64 > int64(len(b)) {
 		n64 = int64(len(b))
 	}
@@ -895,49 +943,54 @@ func (sfr *sparseFileReader) readHole(b []byte, toOffset int64) int {
 
 // Read reads the sparse file data in expanded form.
 func (sfr *sparseFileReader) Read(b []byte) (n int, err error) {
-	if len(sfr.sp) == 0 {
-		// No more data fragments to read from.
-		if sfr.pos < sfr.tot {
-			// We're in the last hole
-			n = sfr.readHole(b, sfr.tot)
-			return
-		}
-		// Otherwise, we're at the end of the file
-		return 0, io.EOF
-	}
-	if sfr.tot < sfr.sp[0].offset {
-		return 0, io.ErrUnexpectedEOF
-	}
-	if sfr.pos < sfr.sp[0].offset {
-		// We're in a hole
-		n = sfr.readHole(b, sfr.sp[0].offset)
-		return
+	// Skip past all empty fragments.
+	for len(sfr.sp) > 0 && sfr.sp[0].numBytes == 0 {
+		sfr.sp = sfr.sp[1:]
 	}
 
-	// We're not in a hole, so we'll read from the next data fragment
-	posInFragment := sfr.pos - sfr.sp[0].offset
-	bytesLeft := sfr.sp[0].numBytes - posInFragment
+	// If there are no more fragments, then it is possible that there
+	// is one last sparse hole.
+	if len(sfr.sp) == 0 {
+		// This behavior matches the BSD tar utility.
+		// However, GNU tar stops returning data even if sfr.total is unmet.
+		if sfr.pos < sfr.total {
+			return sfr.readHole(b, sfr.total), nil
+		}
+		return 0, io.EOF
+	}
+
+	// In front of a data fragment, so read a hole.
+	if sfr.pos < sfr.sp[0].offset {
+		return sfr.readHole(b, sfr.sp[0].offset), nil
+	}
+
+	// In a data fragment, so read from it.
+	// This math is overflow free since we verify that offset and numBytes can
+	// be safely added when creating the sparseFileReader.
+	endPos := sfr.sp[0].offset + sfr.sp[0].numBytes // End offset of fragment
+	bytesLeft := endPos - sfr.pos                   // Bytes left in fragment
 	if int64(len(b)) > bytesLeft {
-		b = b[0:bytesLeft]
+		b = b[:bytesLeft]
 	}
 
 	n, err = sfr.rfr.Read(b)
 	sfr.pos += int64(n)
-
-	if int64(n) == bytesLeft {
-		// We're done with this fragment
-		sfr.sp = sfr.sp[1:]
+	if err == io.EOF {
+		if sfr.pos < endPos {
+			err = io.ErrUnexpectedEOF // There was supposed to be more data
+		} else if sfr.pos < sfr.total {
+			err = nil // There is still an implicit sparse hole at the end
+		}
 	}
 
-	if err == io.EOF && sfr.pos < sfr.tot {
-		// We reached the end of the last fragment's data, but there's a final hole
-		err = nil
+	if sfr.pos == endPos {
+		sfr.sp = sfr.sp[1:] // We are done with this fragment, so pop it
 	}
-	return
+	return n, err
 }
 
 // numBytes returns the number of bytes left to read in the sparse file's
 // sparse-encoded data in the tar archive.
 func (sfr *sparseFileReader) numBytes() int64 {
-	return sfr.rfr.nb
+	return sfr.rfr.numBytes()
 }
diff --git a/archive/tar/reader_test.go b/archive/tar/reader_test.go
index da01f26..bca0c05 100644
--- a/archive/tar/reader_test.go
+++ b/archive/tar/reader_test.go
@@ -10,6 +10,7 @@ import (
 	"fmt"
 	"io"
 	"io/ioutil"
+	"math"
 	"os"
 	"reflect"
 	"strings"
@@ -560,80 +561,155 @@ func TestSparseEndToEnd(t *testing.T) {
 	}
 }
 
-type sparseFileReadTest struct {
-	sparseData []byte
-	sparseMap  []sparseEntry
-	realSize   int64
-	expected   []byte
-}
-
-var sparseFileReadTests = []sparseFileReadTest{
-	{
-		sparseData: []byte("abcde"),
-		sparseMap: []sparseEntry{
-			{offset: 0, numBytes: 2},
-			{offset: 5, numBytes: 3},
-		},
-		realSize: 8,
-		expected: []byte("ab\x00\x00\x00cde"),
-	},
-	{
-		sparseData: []byte("abcde"),
-		sparseMap: []sparseEntry{
-			{offset: 0, numBytes: 2},
-			{offset: 5, numBytes: 3},
-		},
-		realSize: 10,
-		expected: []byte("ab\x00\x00\x00cde\x00\x00"),
-	},
-	{
-		sparseData: []byte("abcde"),
-		sparseMap: []sparseEntry{
-			{offset: 1, numBytes: 3},
-			{offset: 6, numBytes: 2},
-		},
-		realSize: 8,
-		expected: []byte("\x00abc\x00\x00de"),
-	},
-	{
-		sparseData: []byte("abcde"),
-		sparseMap: []sparseEntry{
-			{offset: 1, numBytes: 3},
-			{offset: 6, numBytes: 2},
-		},
-		realSize: 10,
-		expected: []byte("\x00abc\x00\x00de\x00\x00"),
-	},
-	{
-		sparseData: []byte(""),
-		sparseMap:  nil,
-		realSize:   2,
-		expected:   []byte("\x00\x00"),
-	},
-}
-
 func TestSparseFileReader(t *testing.T) {
-	for i, test := range sparseFileReadTests {
-		r := bytes.NewReader(test.sparseData)
-		nb := int64(r.Len())
-		sfr := &sparseFileReader{
-			rfr: &regFileReader{r: r, nb: nb},
-			sp:  test.sparseMap,
-			pos: 0,
-			tot: test.realSize,
-		}
-		if sfr.numBytes() != nb {
-			t.Errorf("test %d: Before reading, sfr.numBytes() = %d, want %d", i, sfr.numBytes(), nb)
-		}
-		buf, err := ioutil.ReadAll(sfr)
+	var vectors = []struct {
+		realSize   int64         // Real size of the output file
+		sparseMap  []sparseEntry // Input sparse map
+		sparseData string        // Input compact data
+		expected   string        // Expected output data
+		err        error         // Expected error outcome
+	}{{
+		realSize: 8,
+		sparseMap: []sparseEntry{
+			{offset: 0, numBytes: 2},
+			{offset: 5, numBytes: 3},
+		},
+		sparseData: "abcde",
+		expected:   "ab\x00\x00\x00cde",
+	}, {
+		realSize: 10,
+		sparseMap: []sparseEntry{
+			{offset: 0, numBytes: 2},
+			{offset: 5, numBytes: 3},
+		},
+		sparseData: "abcde",
+		expected:   "ab\x00\x00\x00cde\x00\x00",
+	}, {
+		realSize: 8,
+		sparseMap: []sparseEntry{
+			{offset: 1, numBytes: 3},
+			{offset: 6, numBytes: 2},
+		},
+		sparseData: "abcde",
+		expected:   "\x00abc\x00\x00de",
+	}, {
+		realSize: 8,
+		sparseMap: []sparseEntry{
+			{offset: 1, numBytes: 3},
+			{offset: 6, numBytes: 0},
+			{offset: 6, numBytes: 0},
+			{offset: 6, numBytes: 2},
+		},
+		sparseData: "abcde",
+		expected:   "\x00abc\x00\x00de",
+	}, {
+		realSize: 10,
+		sparseMap: []sparseEntry{
+			{offset: 1, numBytes: 3},
+			{offset: 6, numBytes: 2},
+		},
+		sparseData: "abcde",
+		expected:   "\x00abc\x00\x00de\x00\x00",
+	}, {
+		realSize: 10,
+		sparseMap: []sparseEntry{
+			{offset: 1, numBytes: 3},
+			{offset: 6, numBytes: 2},
+			{offset: 8, numBytes: 0},
+			{offset: 8, numBytes: 0},
+			{offset: 8, numBytes: 0},
+			{offset: 8, numBytes: 0},
+		},
+		sparseData: "abcde",
+		expected:   "\x00abc\x00\x00de\x00\x00",
+	}, {
+		realSize:   2,
+		sparseMap:  []sparseEntry{},
+		sparseData: "",
+		expected:   "\x00\x00",
+	}, {
+		realSize:  -2,
+		sparseMap: []sparseEntry{},
+		err:       ErrHeader,
+	}, {
+		realSize: -10,
+		sparseMap: []sparseEntry{
+			{offset: 1, numBytes: 3},
+			{offset: 6, numBytes: 2},
+		},
+		sparseData: "abcde",
+		err:        ErrHeader,
+	}, {
+		realSize: 10,
+		sparseMap: []sparseEntry{
+			{offset: 1, numBytes: 3},
+			{offset: 6, numBytes: 5},
+		},
+		sparseData: "abcde",
+		err:        ErrHeader,
+	}, {
+		realSize: 35,
+		sparseMap: []sparseEntry{
+			{offset: 1, numBytes: 3},
+			{offset: 6, numBytes: 5},
+		},
+		sparseData: "abcde",
+		err:        io.ErrUnexpectedEOF,
+	}, {
+		realSize: 35,
+		sparseMap: []sparseEntry{
+			{offset: 1, numBytes: 3},
+			{offset: 6, numBytes: -5},
+		},
+		sparseData: "abcde",
+		err:        ErrHeader,
+	}, {
+		realSize: 35,
+		sparseMap: []sparseEntry{
+			{offset: math.MaxInt64, numBytes: 3},
+			{offset: 6, numBytes: -5},
+		},
+		sparseData: "abcde",
+		err:        ErrHeader,
+	}, {
+		realSize: 10,
+		sparseMap: []sparseEntry{
+			{offset: 1, numBytes: 3},
+			{offset: 2, numBytes: 2},
+		},
+		sparseData: "abcde",
+		err:        ErrHeader,
+	}}
+
+	for i, v := range vectors {
+		r := bytes.NewReader([]byte(v.sparseData))
+		rfr := &regFileReader{r: r, nb: int64(len(v.sparseData))}
+
+		var sfr *sparseFileReader
+		var err error
+		var buf []byte
+
+		sfr, err = newSparseFileReader(rfr, v.sparseMap, v.realSize)
 		if err != nil {
-			t.Errorf("test %d: Unexpected error: %v", i, err)
+			goto fail
 		}
-		if e := test.expected; !bytes.Equal(buf, e) {
-			t.Errorf("test %d: Contents = %v, want %v", i, buf, e)
+		if sfr.numBytes() != int64(len(v.sparseData)) {
+			t.Errorf("test %d, numBytes() before reading: got %d, want %d", i, sfr.numBytes(), len(v.sparseData))
+		}
+		buf, err = ioutil.ReadAll(sfr)
+		if err != nil {
+			goto fail
+		}
+		if string(buf) != v.expected {
+			t.Errorf("test %d, ReadAll(): got %q, want %q", i, string(buf), v.expected)
 		}
 		if sfr.numBytes() != 0 {
-			t.Errorf("test %d: After draining the reader, numBytes() was nonzero", i)
+			t.Errorf("test %d, numBytes() after reading: got %d, want %d", i, sfr.numBytes(), 0)
+		}
+
+	fail:
+		if err != v.err {
+			t.Errorf("test %d, unexpected error: got %v, want %v", i, err, v.err)
 		}
 	}
 }
@@ -646,10 +722,10 @@ func TestSparseIncrementalRead(t *testing.T) {
 	r := bytes.NewReader(sparseData)
 	nb := int64(r.Len())
 	sfr := &sparseFileReader{
-		rfr: &regFileReader{r: r, nb: nb},
-		sp:  sparseMap,
-		pos: 0,
-		tot: int64(len(expected)),
+		rfr:   &regFileReader{r: r, nb: nb},
+		sp:    sparseMap,
+		pos:   0,
+		total: int64(len(expected)),
 	}
 
 	// We'll read the data 6 bytes at a time, with a hole of size 10 at
@@ -747,6 +823,11 @@ func TestUninitializedRead(t *testing.T) {
 
 }
 
+// TODO(dsnet): TestNegativeHdrSize, TestIssue10968, and TestIssue11169 tests
+// that Reader properly handles corrupted tar files. Given the increasing number
+// of invalid/malicious that can crash Reader, we should modify TestReader to
+// be able to test that intentionally corrupt tar files don't succeed or crash.
+
 // Negative header size should not cause panic.
 // Issues 10959 and 10960.
 func TestNegativeHdrSize(t *testing.T) {
@@ -771,14 +852,11 @@ func TestIssue10968(t *testing.T) {
 		t.Fatal(err)
 	}
 	defer f.Close()
+
 	r := NewReader(f)
 	_, err = r.Next()
-	if err != nil {
-		t.Fatal(err)
-	}
-	_, err = io.Copy(ioutil.Discard, r)
-	if err != io.ErrUnexpectedEOF {
-		t.Fatalf("expected %q, got %q", io.ErrUnexpectedEOF, err)
+	if err == nil {
+		t.Fatal("Unexpected success")
 	}
 }
 

From f0fc67b3a8643a174215d1e514d25414feb83dcf Mon Sep 17 00:00:00 2001
From: Joe Tsai <joetsai@digital-static.net>
Date: Thu, 1 Oct 2015 03:08:18 -0700
Subject: [PATCH 03/17] archive/tar: make Reader.Read errors persistent

If the stream is in an inconsistent state, it does not make sense
that Reader.Read can be called and possibly succeed.

Change-Id: I9d1c5a1300b2c2b45232188aa7999e350809dcf2
Reviewed-on: https://go-review.googlesource.com/15177
Reviewed-by: Brad Fitzpatrick <bradfitz@golang.org>
Run-TryBot: Brad Fitzpatrick <bradfitz@golang.org>
---
 archive/tar/reader.go | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/archive/tar/reader.go b/archive/tar/reader.go
index 1f57508..7d05d7d 100644
--- a/archive/tar/reader.go
+++ b/archive/tar/reader.go
@@ -871,9 +871,13 @@ func (tr *Reader) numBytes() int64 {
 // It returns 0, io.EOF when it reaches the end of that entry,
 // until Next is called to advance to the next entry.
 func (tr *Reader) Read(b []byte) (n int, err error) {
+	if tr.err != nil {
+		return 0, tr.err
+	}
 	if tr.curr == nil {
 		return 0, io.EOF
 	}
+
 	n, err = tr.curr.Read(b)
 	if err != nil && err != io.EOF {
 		tr.err = err

From 4ad443d1668a7ac6cfe49b02265247bb6fb636fa Mon Sep 17 00:00:00 2001
From: Joe Tsai <joetsai@digital-static.net>
Date: Thu, 1 Oct 2015 02:59:49 -0700
Subject: [PATCH 04/17] archive/tar: expand abilities of TestReader

Motivation:
* There are an increasing number of "one-off" corrupt files added
to make sure that package does not succeed or crash on them.
Instead, allow for the test to specify the error that is expected
to occur (if any).
* Also, fold in the logic to check the MD5 checksum into this
function.

The following tests are being removed:
* TestIncrementalRead: Done by TestReader by using io.CopyBuffer
with a buffer of 8. This achieves the same behavior as this test.
* TestSparseEndToEnd: Since TestReader checks the MD5 checksums
if the input corpus provides them, then this is redundant.
* TestSparseIncrementalRead: Redundant for the same reasons that
TestIncrementalRead is now redundant
* TestNegativeHdrSize: Added to TestReader corpus
* TestIssue10968: Added to TestReader corpus
* TestIssue11169: Added to TestReader corpus

With this change, code coverage did not change: 85.3%

Change-Id: I8550d48657d4dbb8f47dfc3dc280758ef73b47ec
Reviewed-on: https://go-review.googlesource.com/15176
Reviewed-by: Andrew Gerrand <adg@golang.org>
---
 archive/tar/reader_test.go | 296 ++++++++++---------------------------
 1 file changed, 81 insertions(+), 215 deletions(-)

diff --git a/archive/tar/reader_test.go b/archive/tar/reader_test.go
index bca0c05..4d065a9 100644
--- a/archive/tar/reader_test.go
+++ b/archive/tar/reader_test.go
@@ -19,9 +19,10 @@ import (
 )
 
 type untarTest struct {
-	file    string
-	headers []*Header
-	cksums  []string
+	file    string    // Test input file
+	headers []*Header // Expected output headers
+	chksums []string  // MD5 checksum of files, leave as nil if not checked
+	err     error     // Expected error to occur
 }
 
 var gnuTarTest = &untarTest{
@@ -50,7 +51,7 @@ var gnuTarTest = &untarTest{
 			Gname:    "eng",
 		},
 	},
-	cksums: []string{
+	chksums: []string{
 		"e38b27eaccb4391bdec553a7f3ae6b2f",
 		"c65bd2e50a56a2138bf1716f2fd56fe9",
 	},
@@ -130,7 +131,7 @@ var sparseTarTest = &untarTest{
 			Devminor: 0,
 		},
 	},
-	cksums: []string{
+	chksums: []string{
 		"6f53234398c2449fe67c1812d993012f",
 		"6f53234398c2449fe67c1812d993012f",
 		"6f53234398c2449fe67c1812d993012f",
@@ -287,37 +288,93 @@ var untarTests = []*untarTest{
 			},
 		},
 	},
+	{
+		file: "testdata/neg-size.tar",
+		err:  ErrHeader,
+	},
+	{
+		file: "testdata/issue10968.tar",
+		err:  ErrHeader,
+	},
+	{
+		file: "testdata/issue11169.tar",
+		// TODO(dsnet): Currently the library does not detect that this file is
+		// malformed. Instead it incorrectly believes that file just ends.
+		// err:  ErrHeader,
+	},
 }
 
 func TestReader(t *testing.T) {
-testLoop:
-	for i, test := range untarTests {
-		f, err := os.Open(test.file)
+	for i, v := range untarTests {
+		f, err := os.Open(v.file)
 		if err != nil {
-			t.Errorf("test %d: Unexpected error: %v", i, err)
+			t.Errorf("file %s, test %d: unexpected error: %v", v.file, i, err)
 			continue
 		}
 		defer f.Close()
-		tr := NewReader(f)
-		for j, header := range test.headers {
-			hdr, err := tr.Next()
-			if err != nil || hdr == nil {
-				t.Errorf("test %d, entry %d: Didn't get entry: %v", i, j, err)
-				f.Close()
-				continue testLoop
+
+		// Capture all headers and checksums.
+		var (
+			tr      = NewReader(f)
+			hdrs    []*Header
+			chksums []string
+			rdbuf   = make([]byte, 8)
+		)
+		for {
+			var hdr *Header
+			hdr, err = tr.Next()
+			if err != nil {
+				if err == io.EOF {
+					err = nil // Expected error
+				}
+				break
 			}
-			if !reflect.DeepEqual(*hdr, *header) {
-				t.Errorf("test %d, entry %d: Incorrect header:\nhave %+v\nwant %+v",
-					i, j, *hdr, *header)
+			hdrs = append(hdrs, hdr)
+
+			if v.chksums == nil {
+				continue
+			}
+			h := md5.New()
+			_, err = io.CopyBuffer(h, tr, rdbuf) // Effectively an incremental read
+			if err != nil {
+				break
+			}
+			chksums = append(chksums, fmt.Sprintf("%x", h.Sum(nil)))
+		}
+
+		for j, hdr := range hdrs {
+			if j >= len(v.headers) {
+				t.Errorf("file %s, test %d, entry %d: unexpected header:\ngot %+v",
+					v.file, i, j, *hdr)
+				continue
+			}
+			if !reflect.DeepEqual(*hdr, *v.headers[j]) {
+				t.Errorf("file %s, test %d, entry %d: incorrect header:\ngot  %+v\nwant %+v",
+					v.file, i, j, *hdr, *v.headers[j])
 			}
 		}
-		hdr, err := tr.Next()
-		if err == io.EOF {
-			continue testLoop
+		if len(hdrs) != len(v.headers) {
+			t.Errorf("file %s, test %d: got %d headers, want %d headers",
+				v.file, i, len(hdrs), len(v.headers))
 		}
-		if hdr != nil || err != nil {
-			t.Errorf("test %d: Unexpected entry or error: hdr=%v err=%v", i, hdr, err)
+
+		for j, sum := range chksums {
+			if j >= len(v.chksums) {
+				t.Errorf("file %s, test %d, entry %d: unexpected sum: got %s",
+					v.file, i, j, sum)
+				continue
+			}
+			if sum != v.chksums[j] {
+				t.Errorf("file %s, test %d, entry %d: incorrect checksum: got %s, want %s",
+					v.file, i, j, sum, v.chksums[j])
+			}
 		}
+
+		if err != v.err {
+			t.Errorf("file %s, test %d: unexpected error: got %v, want %v",
+				v.file, i, err, v.err)
+		}
+		f.Close()
 	}
 }
 
@@ -357,60 +414,6 @@ func TestPartialRead(t *testing.T) {
 	}
 }
 
-func TestIncrementalRead(t *testing.T) {
-	test := gnuTarTest
-	f, err := os.Open(test.file)
-	if err != nil {
-		t.Fatalf("Unexpected error: %v", err)
-	}
-	defer f.Close()
-
-	tr := NewReader(f)
-
-	headers := test.headers
-	cksums := test.cksums
-	nread := 0
-
-	// loop over all files
-	for ; ; nread++ {
-		hdr, err := tr.Next()
-		if hdr == nil || err == io.EOF {
-			break
-		}
-
-		// check the header
-		if !reflect.DeepEqual(*hdr, *headers[nread]) {
-			t.Errorf("Incorrect header:\nhave %+v\nwant %+v",
-				*hdr, headers[nread])
-		}
-
-		// read file contents in little chunks EOF,
-		// checksumming all the way
-		h := md5.New()
-		rdbuf := make([]uint8, 8)
-		for {
-			nr, err := tr.Read(rdbuf)
-			if err == io.EOF {
-				break
-			}
-			if err != nil {
-				t.Errorf("Read: unexpected error %v\n", err)
-				break
-			}
-			h.Write(rdbuf[0:nr])
-		}
-		// verify checksum
-		have := fmt.Sprintf("%x", h.Sum(nil))
-		want := cksums[nread]
-		if want != have {
-			t.Errorf("Bad checksum on file %s:\nhave %+v\nwant %+v", hdr.Name, have, want)
-		}
-	}
-	if nread != len(headers) {
-		t.Errorf("Didn't process all files\nexpected: %d\nprocessed %d\n", len(headers), nread)
-	}
-}
-
 func TestNonSeekable(t *testing.T) {
 	test := gnuTarTest
 	f, err := os.Open(test.file)
@@ -515,52 +518,6 @@ func TestMergePAX(t *testing.T) {
 	}
 }
 
-func TestSparseEndToEnd(t *testing.T) {
-	test := sparseTarTest
-	f, err := os.Open(test.file)
-	if err != nil {
-		t.Fatalf("Unexpected error: %v", err)
-	}
-	defer f.Close()
-
-	tr := NewReader(f)
-
-	headers := test.headers
-	cksums := test.cksums
-	nread := 0
-
-	// loop over all files
-	for ; ; nread++ {
-		hdr, err := tr.Next()
-		if hdr == nil || err == io.EOF {
-			break
-		}
-
-		// check the header
-		if !reflect.DeepEqual(*hdr, *headers[nread]) {
-			t.Errorf("Incorrect header:\nhave %+v\nwant %+v",
-				*hdr, headers[nread])
-		}
-
-		// read and checksum the file data
-		h := md5.New()
-		_, err = io.Copy(h, tr)
-		if err != nil {
-			t.Fatalf("Unexpected error: %v", err)
-		}
-
-		// verify checksum
-		have := fmt.Sprintf("%x", h.Sum(nil))
-		want := cksums[nread]
-		if want != have {
-			t.Errorf("Bad checksum on file %s:\nhave %+v\nwant %+v", hdr.Name, have, want)
-		}
-	}
-	if nread != len(headers) {
-		t.Errorf("Didn't process all files\nexpected: %d\nprocessed %d\n", len(headers), nread)
-	}
-}
-
 func TestSparseFileReader(t *testing.T) {
 	var vectors = []struct {
 		realSize   int64         // Real size of the output file
@@ -714,45 +671,6 @@ func TestSparseFileReader(t *testing.T) {
 	}
 }
 
-func TestSparseIncrementalRead(t *testing.T) {
-	sparseMap := []sparseEntry{{10, 2}}
-	sparseData := []byte("Go")
-	expected := "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00Go\x00\x00\x00\x00\x00\x00\x00\x00"
-
-	r := bytes.NewReader(sparseData)
-	nb := int64(r.Len())
-	sfr := &sparseFileReader{
-		rfr:   &regFileReader{r: r, nb: nb},
-		sp:    sparseMap,
-		pos:   0,
-		total: int64(len(expected)),
-	}
-
-	// We'll read the data 6 bytes at a time, with a hole of size 10 at
-	// the beginning and one of size 8 at the end.
-	var outputBuf bytes.Buffer
-	buf := make([]byte, 6)
-	for {
-		n, err := sfr.Read(buf)
-		if err == io.EOF {
-			break
-		}
-		if err != nil {
-			t.Errorf("Read: unexpected error %v\n", err)
-		}
-		if n > 0 {
-			_, err := outputBuf.Write(buf[:n])
-			if err != nil {
-				t.Errorf("Write: unexpected error %v\n", err)
-			}
-		}
-	}
-	got := outputBuf.String()
-	if got != expected {
-		t.Errorf("Contents = %v, want %v", got, expected)
-	}
-}
-
 func TestReadGNUSparseMap0x1(t *testing.T) {
 	headers := map[string]string{
 		paxGNUSparseNumBlocks: "4",
@@ -822,55 +740,3 @@ func TestUninitializedRead(t *testing.T) {
 	}
 
 }
-
-// TODO(dsnet): TestNegativeHdrSize, TestIssue10968, and TestIssue11169 tests
-// that Reader properly handles corrupted tar files. Given the increasing number
-// of invalid/malicious that can crash Reader, we should modify TestReader to
-// be able to test that intentionally corrupt tar files don't succeed or crash.
-
-// Negative header size should not cause panic.
-// Issues 10959 and 10960.
-func TestNegativeHdrSize(t *testing.T) {
-	f, err := os.Open("testdata/neg-size.tar")
-	if err != nil {
-		t.Fatal(err)
-	}
-	defer f.Close()
-	r := NewReader(f)
-	_, err = r.Next()
-	if err != ErrHeader {
-		t.Error("want ErrHeader, got", err)
-	}
-	io.Copy(ioutil.Discard, r)
-}
-
-// This used to hang in (*sparseFileReader).readHole due to missing
-// verification of sparse offsets against file size.
-func TestIssue10968(t *testing.T) {
-	f, err := os.Open("testdata/issue10968.tar")
-	if err != nil {
-		t.Fatal(err)
-	}
-	defer f.Close()
-
-	r := NewReader(f)
-	_, err = r.Next()
-	if err == nil {
-		t.Fatal("Unexpected success")
-	}
-}
-
-// Do not panic if there are errors in header blocks after the pax header.
-// Issue 11169
-func TestIssue11169(t *testing.T) {
-	f, err := os.Open("testdata/issue11169.tar")
-	if err != nil {
-		t.Fatal(err)
-	}
-	defer f.Close()
-	r := NewReader(f)
-	_, err = r.Next()
-	if err == nil {
-		t.Fatal("Unexpected success")
-	}
-}

From cb423795ebbea7ab1f8570fa6811ffbd43c04c96 Mon Sep 17 00:00:00 2001
From: Joe Tsai <joetsai@digital-static.net>
Date: Tue, 6 Oct 2015 01:04:18 -0700
Subject: [PATCH 05/17] archive/tar: add missing error checks to Reader.Next

A recursive call to Reader.Next did not check the error before
trying to use the result, leading to a nil pointer panic.
This specific CL addresses the immediate issue, which is the panic,
but does not solve the root issue, which is due to an integer
overflow in the base-256 parser.

Updates #12435

Change-Id: Ia908671f0f411a409a35e24f2ebf740d46734072
Reviewed-on: https://go-review.googlesource.com/15437
Run-TryBot: Brad Fitzpatrick <bradfitz@golang.org>
Reviewed-by: Brad Fitzpatrick <bradfitz@golang.org>
TryBot-Result: Gobot Gobot <gobot@golang.org>
---
 archive/tar/reader.go      | 31 ++++++--------
 archive/tar/reader_test.go | 87 +++++++++++++++++++++++++++++++-------
 2 files changed, 85 insertions(+), 33 deletions(-)

diff --git a/archive/tar/reader.go b/archive/tar/reader.go
index 7d05d7d..dc23085 100644
--- a/archive/tar/reader.go
+++ b/archive/tar/reader.go
@@ -820,40 +820,37 @@ func readGNUSparseMap1x0(r io.Reader) ([]sparseEntry, error) {
 	return sp, nil
 }
 
-// readGNUSparseMap0x1 reads the sparse map as stored in GNU's PAX sparse format version 0.1.
-// The sparse map is stored in the PAX headers.
-func readGNUSparseMap0x1(headers map[string]string) ([]sparseEntry, error) {
-	// Get number of entries
-	numEntriesStr, ok := headers[paxGNUSparseNumBlocks]
-	if !ok {
-		return nil, ErrHeader
-	}
-	numEntries, err := strconv.ParseInt(numEntriesStr, 10, 0)
-	if err != nil {
+// readGNUSparseMap0x1 reads the sparse map as stored in GNU's PAX sparse format
+// version 0.1. The sparse map is stored in the PAX headers.
+func readGNUSparseMap0x1(extHdrs map[string]string) ([]sparseEntry, error) {
+	// Get number of entries.
+	// Use integer overflow resistant math to check this.
+	numEntriesStr := extHdrs[paxGNUSparseNumBlocks]
+	numEntries, err := strconv.ParseInt(numEntriesStr, 10, 0) // Intentionally parse as native int
+	if err != nil || numEntries < 0 || int(2*numEntries) < int(numEntries) {
 		return nil, ErrHeader
 	}
 
-	sparseMap := strings.Split(headers[paxGNUSparseMap], ",")
-
-	// There should be two numbers in sparseMap for each entry
+	// There should be two numbers in sparseMap for each entry.
+	sparseMap := strings.Split(extHdrs[paxGNUSparseMap], ",")
 	if int64(len(sparseMap)) != 2*numEntries {
 		return nil, ErrHeader
 	}
 
-	// Loop through the entries in the sparse map
+	// Loop through the entries in the sparse map.
+	// numEntries is trusted now.
 	sp := make([]sparseEntry, 0, numEntries)
 	for i := int64(0); i < numEntries; i++ {
-		offset, err := strconv.ParseInt(sparseMap[2*i], 10, 0)
+		offset, err := strconv.ParseInt(sparseMap[2*i], 10, 64)
 		if err != nil {
 			return nil, ErrHeader
 		}
-		numBytes, err := strconv.ParseInt(sparseMap[2*i+1], 10, 0)
+		numBytes, err := strconv.ParseInt(sparseMap[2*i+1], 10, 64)
 		if err != nil {
 			return nil, ErrHeader
 		}
 		sp = append(sp, sparseEntry{offset: offset, numBytes: numBytes})
 	}
-
 	return sp, nil
 }
 
diff --git a/archive/tar/reader_test.go b/archive/tar/reader_test.go
index 4d065a9..d9d089b 100644
--- a/archive/tar/reader_test.go
+++ b/archive/tar/reader_test.go
@@ -672,23 +672,78 @@ func TestSparseFileReader(t *testing.T) {
 }
 
 func TestReadGNUSparseMap0x1(t *testing.T) {
-	headers := map[string]string{
-		paxGNUSparseNumBlocks: "4",
-		paxGNUSparseMap:       "0,5,10,5,20,5,30,5",
-	}
-	expected := []sparseEntry{
-		{offset: 0, numBytes: 5},
-		{offset: 10, numBytes: 5},
-		{offset: 20, numBytes: 5},
-		{offset: 30, numBytes: 5},
-	}
+	const (
+		maxUint = ^uint(0)
+		maxInt  = int(maxUint >> 1)
+	)
+	var (
+		big1 = fmt.Sprintf("%d", int64(maxInt))
+		big2 = fmt.Sprintf("%d", (int64(maxInt)/2)+1)
+		big3 = fmt.Sprintf("%d", (int64(maxInt) / 3))
+	)
 
-	sp, err := readGNUSparseMap0x1(headers)
-	if err != nil {
-		t.Errorf("Unexpected error: %v", err)
-	}
-	if !reflect.DeepEqual(sp, expected) {
-		t.Errorf("Incorrect sparse map: got %v, wanted %v", sp, expected)
+	var vectors = []struct {
+		extHdrs   map[string]string // Input data
+		sparseMap []sparseEntry     // Expected sparse entries to be outputted
+		err       error             // Expected errors that may be raised
+	}{{
+		extHdrs: map[string]string{paxGNUSparseNumBlocks: "-4"},
+		err:     ErrHeader,
+	}, {
+		extHdrs: map[string]string{paxGNUSparseNumBlocks: "fee "},
+		err:     ErrHeader,
+	}, {
+		extHdrs: map[string]string{
+			paxGNUSparseNumBlocks: big1,
+			paxGNUSparseMap:       "0,5,10,5,20,5,30,5",
+		},
+		err: ErrHeader,
+	}, {
+		extHdrs: map[string]string{
+			paxGNUSparseNumBlocks: big2,
+			paxGNUSparseMap:       "0,5,10,5,20,5,30,5",
+		},
+		err: ErrHeader,
+	}, {
+		extHdrs: map[string]string{
+			paxGNUSparseNumBlocks: big3,
+			paxGNUSparseMap:       "0,5,10,5,20,5,30,5",
+		},
+		err: ErrHeader,
+	}, {
+		extHdrs: map[string]string{
+			paxGNUSparseNumBlocks: "4",
+			paxGNUSparseMap:       "0.5,5,10,5,20,5,30,5",
+		},
+		err: ErrHeader,
+	}, {
+		extHdrs: map[string]string{
+			paxGNUSparseNumBlocks: "4",
+			paxGNUSparseMap:       "0,5.5,10,5,20,5,30,5",
+		},
+		err: ErrHeader,
+	}, {
+		extHdrs: map[string]string{
+			paxGNUSparseNumBlocks: "4",
+			paxGNUSparseMap:       "0,fewafewa.5,fewafw,5,20,5,30,5",
+		},
+		err: ErrHeader,
+	}, {
+		extHdrs: map[string]string{
+			paxGNUSparseNumBlocks: "4",
+			paxGNUSparseMap:       "0,5,10,5,20,5,30,5",
+		},
+		sparseMap: []sparseEntry{{0, 5}, {10, 5}, {20, 5}, {30, 5}},
+	}}
+
+	for i, v := range vectors {
+		sp, err := readGNUSparseMap0x1(v.extHdrs)
+		if !reflect.DeepEqual(sp, v.sparseMap) && !(len(sp) == 0 && len(v.sparseMap) == 0) {
+			t.Errorf("test %d, readGNUSparseMap0x1(...): got %v, want %v", i, sp, v.sparseMap)
+		}
+		if err != v.err {
+			t.Errorf("test %d, unexpected error: got %v, want %v", i, err, v.err)
+		}
 	}
 }
 

From cf83c95de838674ba781bb4d0684a3e77c1bfc87 Mon Sep 17 00:00:00 2001
From: Joe Tsai <joetsai@digital-static.net>
Date: Thu, 1 Oct 2015 01:04:24 -0700
Subject: [PATCH 06/17] archive/tar: fix numeric overflow issues in
 readGNUSparseMap0x1

Motivation:
* The logic to verify the numEntries can overflow and incorrectly
pass, allowing a malicious file to allocate arbitrary memory.
* The use of strconv.ParseInt does not set the integer precision
to 64bit, causing this code to work incorrectly on 32bit machines.

Change-Id: I1b1571a750a84f2dde97cc329ed04fe2342aaa60
Reviewed-on: https://go-review.googlesource.com/15173
Reviewed-by: Brad Fitzpatrick <bradfitz@golang.org>
Run-TryBot: Brad Fitzpatrick <bradfitz@golang.org>
TryBot-Result: Gobot Gobot <gobot@golang.org>
---
 archive/tar/reader.go      |  57 +++++++++++---
 archive/tar/reader_test.go | 156 ++++++++++++++++++++++++++++++-------
 2 files changed, 173 insertions(+), 40 deletions(-)

diff --git a/archive/tar/reader.go b/archive/tar/reader.go
index dc23085..cce9d23 100644
--- a/archive/tar/reader.go
+++ b/archive/tar/reader.go
@@ -504,20 +504,48 @@ func (tr *Reader) octal(b []byte) int64 {
 	return int64(x)
 }
 
-// skipUnread skips any unread bytes in the existing file entry, as well as any alignment padding.
-func (tr *Reader) skipUnread() {
-	nr := tr.numBytes() + tr.pad // number of bytes to skip
+// skipUnread skips any unread bytes in the existing file entry, as well as any
+// alignment padding. It returns io.ErrUnexpectedEOF if any io.EOF is
+// encountered in the data portion; it is okay to hit io.EOF in the padding.
+//
+// Note that this function still works properly even when sparse files are being
+// used since numBytes returns the bytes remaining in the underlying io.Reader.
+func (tr *Reader) skipUnread() error {
+	dataSkip := tr.numBytes()      // Number of data bytes to skip
+	totalSkip := dataSkip + tr.pad // Total number of bytes to skip
 	tr.curr, tr.pad = nil, 0
 	if tr.RawAccounting {
-		_, tr.err = io.CopyN(tr.rawBytes, tr.r, nr)
-		return
+		_, tr.err = io.CopyN(tr.rawBytes, tr.r, totalSkip)
+		return tr.err
 	}
-	if sr, ok := tr.r.(io.Seeker); ok {
-		if _, err := sr.Seek(nr, os.SEEK_CUR); err == nil {
-			return
+	// If possible, Seek to the last byte before the end of the data section.
+	// Do this because Seek is often lazy about reporting errors; this will mask
+	// the fact that the tar stream may be truncated. We can rely on the
+	// io.CopyN done shortly afterwards to trigger any IO errors.
+	var seekSkipped int64 // Number of bytes skipped via Seek
+	if sr, ok := tr.r.(io.Seeker); ok && dataSkip > 1 {
+		// Not all io.Seeker can actually Seek. For example, os.Stdin implements
+		// io.Seeker, but calling Seek always returns an error and performs
+		// no action. Thus, we try an innocent seek to the current position
+		// to see if Seek is really supported.
+		pos1, err := sr.Seek(0, os.SEEK_CUR)
+		if err == nil {
+			// Seek seems supported, so perform the real Seek.
+			pos2, err := sr.Seek(dataSkip-1, os.SEEK_CUR)
+			if err != nil {
+				tr.err = err
+				return tr.err
+			}
+			seekSkipped = pos2 - pos1
 		}
 	}
-	_, tr.err = io.CopyN(ioutil.Discard, tr.r, nr)
+
+	var copySkipped int64 // Number of bytes skipped via CopyN
+	copySkipped, tr.err = io.CopyN(ioutil.Discard, tr.r, totalSkip-seekSkipped)
+	if tr.err == io.EOF && seekSkipped+copySkipped < dataSkip {
+		tr.err = io.ErrUnexpectedEOF
+	}
+	return tr.err
 }
 
 func (tr *Reader) verifyChecksum(header []byte) bool {
@@ -530,6 +558,13 @@ func (tr *Reader) verifyChecksum(header []byte) bool {
 	return given == unsigned || given == signed
 }
 
+// readHeader reads the next block header and assumes that the underlying reader
+// is already aligned to a block boundary.
+//
+// The err will be set to io.EOF only when one of the following occurs:
+//	* Exactly 0 bytes are read and EOF is hit.
+//	* Exactly 1 block of zeros is read and EOF is hit.
+//	* At least 2 blocks of zeros are read.
 func (tr *Reader) readHeader() *Header {
 	header := tr.hdrBuff[:]
 	copy(header, zeroBlock)
@@ -541,7 +576,7 @@ func (tr *Reader) readHeader() *Header {
 				return nil
 			}
 		}
-		return nil
+		return nil // io.EOF is okay here
 	}
 	if tr.RawAccounting {
 		if _, tr.err = tr.rawBytes.Write(header); tr.err != nil {
@@ -558,7 +593,7 @@ func (tr *Reader) readHeader() *Header {
 					return nil
 				}
 			}
-			return nil
+			return nil // io.EOF is okay here
 		}
 		if tr.RawAccounting {
 			if _, tr.err = tr.rawBytes.Write(header); tr.err != nil {
diff --git a/archive/tar/reader_test.go b/archive/tar/reader_test.go
index d9d089b..90b8b46 100644
--- a/archive/tar/reader_test.go
+++ b/archive/tar/reader_test.go
@@ -414,35 +414,6 @@ func TestPartialRead(t *testing.T) {
 	}
 }
 
-func TestNonSeekable(t *testing.T) {
-	test := gnuTarTest
-	f, err := os.Open(test.file)
-	if err != nil {
-		t.Fatalf("Unexpected error: %v", err)
-	}
-	defer f.Close()
-
-	type readerOnly struct {
-		io.Reader
-	}
-	tr := NewReader(readerOnly{f})
-	nread := 0
-
-	for ; ; nread++ {
-		_, err := tr.Next()
-		if err == io.EOF {
-			break
-		}
-		if err != nil {
-			t.Fatalf("Unexpected error: %v", err)
-		}
-	}
-
-	if nread != len(test.headers) {
-		t.Errorf("Didn't process all files\nexpected: %d\nprocessed %d\n", len(test.headers), nread)
-	}
-}
-
 func TestParsePAXHeader(t *testing.T) {
 	paxTests := [][3]string{
 		{"a", "a=name", "10 a=name\n"}, // Test case involving multiple acceptable lengths
@@ -795,3 +766,130 @@ func TestUninitializedRead(t *testing.T) {
 	}
 
 }
+
+type reader struct{ io.Reader }
+type readSeeker struct{ io.ReadSeeker }
+type readBadSeeker struct{ io.ReadSeeker }
+
+func (rbs *readBadSeeker) Seek(int64, int) (int64, error) { return 0, fmt.Errorf("illegal seek") }
+
+// TestReadTruncation test the ending condition on various truncated files and
+// that truncated files are still detected even if the underlying io.Reader
+// satisfies io.Seeker.
+func TestReadTruncation(t *testing.T) {
+	var ss []string
+	for _, p := range []string{
+		"testdata/gnu.tar",
+		"testdata/ustar-file-reg.tar",
+		"testdata/pax-path-hdr.tar",
+		"testdata/sparse-formats.tar",
+	} {
+		buf, err := ioutil.ReadFile(p)
+		if err != nil {
+			t.Fatalf("unexpected error: %v", err)
+		}
+		ss = append(ss, string(buf))
+	}
+
+	data1, data2, pax, sparse := ss[0], ss[1], ss[2], ss[3]
+	data2 += strings.Repeat("\x00", 10*512)
+	trash := strings.Repeat("garbage ", 64) // Exactly 512 bytes
+
+	var vectors = []struct {
+		input string // Input stream
+		cnt   int    // Expected number of headers read
+		err   error  // Expected error outcome
+	}{
+		{"", 0, io.EOF}, // Empty file is a "valid" tar file
+		{data1[:511], 0, io.ErrUnexpectedEOF},
+		{data1[:512], 1, io.ErrUnexpectedEOF},
+		{data1[:1024], 1, io.EOF},
+		{data1[:1536], 2, io.ErrUnexpectedEOF},
+		{data1[:2048], 2, io.EOF},
+		{data1, 2, io.EOF},
+		{data1[:2048] + data2[:1536], 3, io.EOF},
+		{data2[:511], 0, io.ErrUnexpectedEOF},
+		{data2[:512], 1, io.ErrUnexpectedEOF},
+		{data2[:1195], 1, io.ErrUnexpectedEOF},
+		{data2[:1196], 1, io.EOF}, // Exact end of data and start of padding
+		{data2[:1200], 1, io.EOF},
+		{data2[:1535], 1, io.EOF},
+		{data2[:1536], 1, io.EOF}, // Exact end of padding
+		{data2[:1536] + trash[:1], 1, io.ErrUnexpectedEOF},
+		{data2[:1536] + trash[:511], 1, io.ErrUnexpectedEOF},
+		{data2[:1536] + trash, 1, ErrHeader},
+		{data2[:2048], 1, io.EOF}, // Exactly 1 empty block
+		{data2[:2048] + trash[:1], 1, io.ErrUnexpectedEOF},
+		{data2[:2048] + trash[:511], 1, io.ErrUnexpectedEOF},
+		{data2[:2048] + trash, 1, ErrHeader},
+		{data2[:2560], 1, io.EOF}, // Exactly 2 empty blocks (normal end-of-stream)
+		{data2[:2560] + trash[:1], 1, io.EOF},
+		{data2[:2560] + trash[:511], 1, io.EOF},
+		{data2[:2560] + trash, 1, io.EOF},
+		{data2[:3072], 1, io.EOF},
+		{pax, 0, io.EOF}, // PAX header without data is a "valid" tar file
+		{pax + trash[:1], 0, io.ErrUnexpectedEOF},
+		{pax + trash[:511], 0, io.ErrUnexpectedEOF},
+		{sparse[:511], 0, io.ErrUnexpectedEOF},
+		// TODO(dsnet): This should pass, but currently fails.
+		// {sparse[:512], 0, io.ErrUnexpectedEOF},
+		{sparse[:3584], 1, io.EOF},
+		{sparse[:9200], 1, io.EOF}, // Terminate in padding of sparse header
+		{sparse[:9216], 1, io.EOF},
+		{sparse[:9728], 2, io.ErrUnexpectedEOF},
+		{sparse[:10240], 2, io.EOF},
+		{sparse[:11264], 2, io.ErrUnexpectedEOF},
+		{sparse, 5, io.EOF},
+		{sparse + trash, 5, io.EOF},
+	}
+
+	for i, v := range vectors {
+		for j := 0; j < 6; j++ {
+			var tr *Reader
+			var s1, s2 string
+
+			switch j {
+			case 0:
+				tr = NewReader(&reader{strings.NewReader(v.input)})
+				s1, s2 = "io.Reader", "auto"
+			case 1:
+				tr = NewReader(&reader{strings.NewReader(v.input)})
+				s1, s2 = "io.Reader", "manual"
+			case 2:
+				tr = NewReader(&readSeeker{strings.NewReader(v.input)})
+				s1, s2 = "io.ReadSeeker", "auto"
+			case 3:
+				tr = NewReader(&readSeeker{strings.NewReader(v.input)})
+				s1, s2 = "io.ReadSeeker", "manual"
+			case 4:
+				tr = NewReader(&readBadSeeker{strings.NewReader(v.input)})
+				s1, s2 = "ReadBadSeeker", "auto"
+			case 5:
+				tr = NewReader(&readBadSeeker{strings.NewReader(v.input)})
+				s1, s2 = "ReadBadSeeker", "manual"
+			}
+
+			var cnt int
+			var err error
+			for {
+				if _, err = tr.Next(); err != nil {
+					break
+				}
+				cnt++
+				if s2 == "manual" {
+					if _, err = io.Copy(ioutil.Discard, tr); err != nil {
+						break
+					}
+				}
+			}
+			if err != v.err {
+				t.Errorf("test %d, NewReader(%s(...)) with %s discard: got %v, want %v",
+					i, s1, s2, err, v.err)
+			}
+			if cnt != v.cnt {
+				t.Errorf("test %d, NewReader(%s(...)) with %s discard: got %d headers, want %d headers",
+					i, s1, s2, cnt, v.cnt)
+			}
+		}
+	}
+}

From bffda594f770add2c260a42feaf0e1e3c0651a56 Mon Sep 17 00:00:00 2001
From: Joe Tsai <joetsai@digital-static.net>
Date: Thu, 1 Oct 2015 02:30:29 -0700
Subject: [PATCH 07/17] archive/tar: detect truncated files

Motivation:
* Reader.skipUnread never reports io.ErrUnexpectedEOF. This is strange
given that io.ErrUnexpectedEOF is given through Reader.Read if the
user manually reads the file.
* Reader.skipUnread fails to detect truncated files since io.Seeker
is lazy about reporting errors. Thus, the behavior of Reader differs
whether the input io.Reader also satisfies io.Seeker or not.

To solve this, we seek to one before the end of the data section and
always rely on at least one call to io.CopyN. If the tr.r satisfies
io.Seeker, this is guarunteed to never read more than blockSize.

Fixes #12557

Change-Id: I0ddddfc6bed0d74465cb7e7a02b26f1de7a7a279
Reviewed-on: https://go-review.googlesource.com/15175
Reviewed-by: Brad Fitzpatrick <bradfitz@golang.org>
Run-TryBot: Brad Fitzpatrick <bradfitz@golang.org>
TryBot-Result: Gobot Gobot <gobot@golang.org>
---
 archive/tar/writer.go      | 21 ++++++++++-----
 archive/tar/writer_test.go | 53 +++++++++++++++++++++++++++++++++++---
 2 files changed, 64 insertions(+), 10 deletions(-)

diff --git a/archive/tar/writer.go b/archive/tar/writer.go
index 3547c17..0165b22 100644
--- a/archive/tar/writer.go
+++ b/archive/tar/writer.go
@@ -12,8 +12,8 @@ import (
 	"errors"
 	"fmt"
 	"io"
-	"os"
 	"path"
+	"sort"
 	"strconv"
 	"strings"
 	"time"
@@ -288,11 +288,11 @@ func (tw *Writer) writePAXHeader(hdr *Header, paxHeaders map[string]string) erro
 	// succeed, and seems harmless enough.
 	ext.ModTime = hdr.ModTime
 	// The spec asks that we namespace our pseudo files
-	// with the current pid.
-	pid := os.Getpid()
+	// with the current pid.  However, this results in differing outputs
+	// for identical inputs.  As such, the constant 0 is now used instead.
+	// golang.org/issue/12358
 	dir, file := path.Split(hdr.Name)
-	fullName := path.Join(dir,
-		fmt.Sprintf("PaxHeaders.%d", pid), file)
+	fullName := path.Join(dir, "PaxHeaders.0", file)
 
 	ascii := toASCII(fullName)
 	if len(ascii) > 100 {
@@ -302,8 +302,15 @@ func (tw *Writer) writePAXHeader(hdr *Header, paxHeaders map[string]string) erro
 	// Construct the body
 	var buf bytes.Buffer
 
-	for k, v := range paxHeaders {
-		fmt.Fprint(&buf, paxHeader(k+"="+v))
+	// Keys are sorted before writing to body to allow deterministic output.
+	var keys []string
+	for k := range paxHeaders {
+		keys = append(keys, k)
+	}
+	sort.Strings(keys)
+
+	for _, k := range keys {
+		fmt.Fprint(&buf, paxHeader(k+"="+paxHeaders[k]))
 	}
 
 	ext.Size = int64(len(buf.Bytes()))
diff --git a/archive/tar/writer_test.go b/archive/tar/writer_test.go
index caf40a8..25d88dc 100644
--- a/archive/tar/writer_test.go
+++ b/archive/tar/writer_test.go
@@ -11,6 +11,7 @@ import (
 	"io/ioutil"
 	"os"
 	"reflect"
+	"sort"
 	"strings"
 	"testing"
 	"testing/iotest"
@@ -291,7 +292,7 @@ func TestPax(t *testing.T) {
 		t.Fatal(err)
 	}
 	// Simple test to make sure PAX extensions are in effect
-	if !bytes.Contains(buf.Bytes(), []byte("PaxHeaders.")) {
+	if !bytes.Contains(buf.Bytes(), []byte("PaxHeaders.0")) {
 		t.Fatal("Expected at least one PAX header to be written.")
 	}
 	// Test that we can get a long name back out of the archive.
@@ -330,7 +331,7 @@ func TestPaxSymlink(t *testing.T) {
 		t.Fatal(err)
 	}
 	// Simple test to make sure PAX extensions are in effect
-	if !bytes.Contains(buf.Bytes(), []byte("PaxHeaders.")) {
+	if !bytes.Contains(buf.Bytes(), []byte("PaxHeaders.0")) {
 		t.Fatal("Expected at least one PAX header to be written.")
 	}
 	// Test that we can get a long name back out of the archive.
@@ -380,7 +381,7 @@ func TestPaxNonAscii(t *testing.T) {
 		t.Fatal(err)
 	}
 	// Simple test to make sure PAX extensions are in effect
-	if !bytes.Contains(buf.Bytes(), []byte("PaxHeaders.")) {
+	if !bytes.Contains(buf.Bytes(), []byte("PaxHeaders.0")) {
 		t.Fatal("Expected at least one PAX header to be written.")
 	}
 	// Test that we can get a long name back out of the archive.
@@ -439,6 +440,52 @@ func TestPaxXattrs(t *testing.T) {
 	}
 }
 
+func TestPaxHeadersSorted(t *testing.T) {
+	fileinfo, err := os.Stat("testdata/small.txt")
+	if err != nil {
+		t.Fatal(err)
+	}
+	hdr, err := FileInfoHeader(fileinfo, "")
+	if err != nil {
+		t.Fatalf("os.Stat: %v", err)
+	}
+	contents := strings.Repeat(" ", int(hdr.Size))
+
+	hdr.Xattrs = map[string]string{
+		"foo": "foo",
+		"bar": "bar",
+		"baz": "baz",
+		"qux": "qux",
+	}
+
+	var buf bytes.Buffer
+	writer := NewWriter(&buf)
+	if err := writer.WriteHeader(hdr); err != nil {
+		t.Fatal(err)
+	}
+	if _, err = writer.Write([]byte(contents)); err != nil {
+		t.Fatal(err)
+	}
+	if err := writer.Close(); err != nil {
+		t.Fatal(err)
+	}
+	// Simple test to make sure PAX extensions are in effect
+	if !bytes.Contains(buf.Bytes(), []byte("PaxHeaders.0")) {
+		t.Fatal("Expected at least one PAX header to be written.")
+	}
+
+	// xattr bar should always appear before others
+	indices := []int{
+		bytes.Index(buf.Bytes(), []byte("bar=bar")),
+		bytes.Index(buf.Bytes(), []byte("baz=baz")),
+		bytes.Index(buf.Bytes(), []byte("foo=foo")),
+		bytes.Index(buf.Bytes(), []byte("qux=qux")),
+	}
+	if !sort.IntsAreSorted(indices) {
+		t.Fatal("PAX headers are not sorted")
+	}
+}
+
 func TestPAXHeader(t *testing.T) {
 	medName := strings.Repeat("CD", 50)
 	longName := strings.Repeat("AB", 100)

From 2424f4e36723fbc7a4e06fff5878a151ae270952 Mon Sep 17 00:00:00 2001
From: Matt Layher <mdlayher@gmail.com>
Date: Thu, 27 Aug 2015 14:52:06 -0400
Subject: [PATCH 08/17] archive/tar: make output deterministic

Replaces PID in PaxHeaders with 0.  Sorts PAX header keys before writing
them to the archive.

Fixes #12358

Change-Id: If239f89c85f1c9d9895a253fb06a47ad44960124
Reviewed-on: https://go-review.googlesource.com/13975
Reviewed-by: Russ Cox <rsc@golang.org>
Reviewed-by: Joe Tsai <joetsai@digital-static.net>
---
 archive/tar/common.go             |  11 ++++++++
 archive/tar/reader.go             |  24 +++++++++++------
 archive/tar/reader_test.go        |  43 ++++++++++++++++++++++++++++++
 archive/tar/testdata/hdr-only.tar | Bin 0 -> 10240 bytes
 archive/tar/testdata/neg-size.tar | Bin 512 -> 512 bytes
 5 files changed, 70 insertions(+), 8 deletions(-)
 create mode 100644 archive/tar/testdata/hdr-only.tar

diff --git a/archive/tar/common.go b/archive/tar/common.go
index c31df06..36f4e23 100644
--- a/archive/tar/common.go
+++ b/archive/tar/common.go
@@ -327,3 +327,14 @@ func toASCII(s string) string {
 	}
 	return buf.String()
 }
+
+// isHeaderOnlyType checks if the given type flag is of the type that has no
+// data section even if a size is specified.
+func isHeaderOnlyType(flag byte) bool {
+	switch flag {
+	case TypeLink, TypeSymlink, TypeChar, TypeBlock, TypeDir, TypeFifo:
+		return true
+	default:
+		return false
+	}
+}
diff --git a/archive/tar/reader.go b/archive/tar/reader.go
index cce9d23..6360b4e 100644
--- a/archive/tar/reader.go
+++ b/archive/tar/reader.go
@@ -179,6 +179,13 @@ func (tr *Reader) Next() (*Header, error) {
 			return nil, err
 		}
 		if sp != nil {
+			// Sparse files do not make sense when applied to the special header
+			// types that never have a data section.
+			if isHeaderOnlyType(hdr.Typeflag) {
+				tr.err = ErrHeader
+				return nil, tr.err
+			}
+
 			// Current file is a PAX format GNU sparse file.
 			// Set the current file reader to a sparse file reader.
 			tr.curr, tr.err = newSparseFileReader(tr.curr, sp, hdr.Size)
@@ -622,10 +629,6 @@ func (tr *Reader) readHeader() *Header {
 	hdr.Uid = int(tr.octal(s.next(8)))
 	hdr.Gid = int(tr.octal(s.next(8)))
 	hdr.Size = tr.octal(s.next(12))
-	if hdr.Size < 0 {
-		tr.err = ErrHeader
-		return nil
-	}
 	hdr.ModTime = time.Unix(tr.octal(s.next(12)), 0)
 	s.next(8) // chksum
 	hdr.Typeflag = s.next(1)[0]
@@ -676,12 +679,17 @@ func (tr *Reader) readHeader() *Header {
 		return nil
 	}
 
-	// Maximum value of hdr.Size is 64 GB (12 octal digits),
-	// so there's no risk of int64 overflowing.
-	nb := int64(hdr.Size)
-	tr.pad = -nb & (blockSize - 1) // blockSize is a power of two
+	nb := hdr.Size
+	if isHeaderOnlyType(hdr.Typeflag) {
+		nb = 0
+	}
+	if nb < 0 {
+		tr.err = ErrHeader
+		return nil
+	}
 
 	// Set the current file reader.
+	tr.pad = -nb & (blockSize - 1) // blockSize is a power of two
 	tr.curr = &regFileReader{r: tr.r, nb: nb}
 
 	// Check for old GNU sparse format entry.
diff --git a/archive/tar/reader_test.go b/archive/tar/reader_test.go
index 90b8b46..3c98f4d 100644
--- a/archive/tar/reader_test.go
+++ b/archive/tar/reader_test.go
@@ -893,3 +893,46 @@ func TestReadTruncation(t *testing.T) {
 		}
 	}
 }
+
+// TestReadHeaderOnly tests that Reader does not attempt to read special
+// header-only files.
+func TestReadHeaderOnly(t *testing.T) {
+	f, err := os.Open("testdata/hdr-only.tar")
+	if err != nil {
+		t.Fatalf("unexpected error: %v", err)
+	}
+	defer f.Close()
+
+	var hdrs []*Header
+	tr := NewReader(f)
+	for {
+		hdr, err := tr.Next()
+		if err == io.EOF {
+			break
+		}
+		if err != nil {
+			t.Errorf("Next(): got %v, want %v", err, nil)
+			continue
+		}
+		hdrs = append(hdrs, hdr)
+
+		// If a special flag, we should read nothing.
+		cnt, _ := io.ReadFull(tr, []byte{0})
+		if cnt > 0 && hdr.Typeflag != TypeReg {
+			t.Errorf("ReadFull(...): got %d bytes, want 0 bytes", cnt)
+		}
+	}
+
+	// File is crafted with 16 entries. The later 8 are identical to the first
+	// 8 except that the size is set.
+	if len(hdrs) != 16 {
+		t.Fatalf("len(hdrs): got %d, want %d", len(hdrs), 16)
+	}
+	for i := 0; i < 8; i++ {
+		var hdr1, hdr2 = hdrs[i+0], hdrs[i+8]
+		hdr1.Size, hdr2.Size = 0, 0
+		if !reflect.DeepEqual(*hdr1, *hdr2) {
+			t.Errorf("incorrect header:\ngot  %+v\nwant %+v", *hdr1, *hdr2)
+		}
+	}
+}
diff --git a/archive/tar/testdata/hdr-only.tar b/archive/tar/testdata/hdr-only.tar
new file mode 100644
index 0000000000000000000000000000000000000000..f25034083de6e0176e429f939875def6eb78cc73
GIT binary patch
literal 10240
zcmeI2ZE}J@42Ji2Pq95gv)>o#1+ajk2rWokd-`UnK%I_CXNW`V?jLp5$;Lb+o4jM3
zRS%4K0WN2N31Pu$!vOG|0DSEi6Vf<ijdq|J2(a2RZixMA%{TQfsyJusGTGHpcJj!i
zSf1Sb!y9RqmelTh%2to!Z$Mbj-zi|+Qak4D-#q_3p$_v!{>BdZFVz=+!#Geo7WlKr
zRl;AI>}kUnRryx%w0!65X8WAPynIb6zQg@I`q=ZhT;AVZ14uaInh{t<Rh(zVhVsmd
z`44zEboYOo|5D?1!9|+U7pEJq|3#DMl<d9jmcPXsf3g2l(mCu8g_O!MuBaXJ_NU_S
ze<U`xho<%_u1^u3I*z6=bJm6rjJ0F>zn(ux&A2{mb)wBl`42<QMBb5`srOsz^53R}
zb8bZb<NT-m{3jKTi(H2KPtJc4kzfD6;ct}l{1r%n{I^+MXT)>&RQXR%ispcL7W$7F
z`oDwzV^q+8Xow$MornI@^B?pdod1LVbIgk3(>1Qxw*Nb){{{Vr0_`Z9LH`*QrhogT
zdFVfV{nxJ3f3W@s{fGXsn}`0>@$ct<6aa(%Lr<TvdFVes_=CdvFX%t?Pry9%pN{`G
k{RiuR(0}Op&qM#|`49RJ{{JKBKlJ>=2_XU@0=FB1PuCY>1poj5

literal 0
HcmV?d00001

diff --git a/archive/tar/testdata/neg-size.tar b/archive/tar/testdata/neg-size.tar
index 5deea3d05c4da5a4ddda34ef7ad781088464e71b..21edf38cc3c3d98c834d07b6d31e8325898ec492 100644
GIT binary patch
delta 20
bcmZo*X<(T!h11Z`)Xd0`LBU|-++;=oIaUQ|

delta 20
ZcmZo*X<(T!g|mSH1ZGb%+&DLx5db<)1;zjX


From 7500c932c7210168610e6ee8ff136f9fb0329a04 Mon Sep 17 00:00:00 2001
From: Joe Tsai <joetsai@digital-static.net>
Date: Tue, 3 Nov 2015 18:12:31 -0800
Subject: [PATCH 09/17] archive/tar: properly handle header-only "files" in
 Reader

Certain special type-flags, specifically 1, 2, 3, 4, 5, 6,
do not have a data section. Thus, regardless of what the size field
says, we should not attempt to read any data for these special types.

The relevant PAX and USTAR specification says:
<<<
If the typeflag field is set to specify a file to be of type 1 (a link)
or 2 (a symbolic link), the size field shall be specified as zero.
If the typeflag field is set to specify a file of type 5 (directory),
the size field shall be interpreted as described under the definition
of that record type. No data logical records are stored for types 1, 2, or 5.
If the typeflag field is set to 3 (character special file),
4 (block special file), or 6 (FIFO), the meaning of the size field is
unspecified by this volume of POSIX.1-2008, and no data logical records shall
be stored on the medium.
Additionally, for type 6, the size field shall be ignored when reading.
If the typeflag field is set to any other value, the number of logical
records written following the header shall be (size+511)/512, ignoring
any fraction in the result of the division.
>>>

Contrary to the specification, we do not assert that the size field
is zero for type 1 and 2 since we liberally accept non-conforming formats.

Change-Id: I666b601597cb9d7a50caa081813d90ca9cfc52ed
Reviewed-on: https://go-review.googlesource.com/16614
Reviewed-by: Brad Fitzpatrick <bradfitz@golang.org>
Run-TryBot: Brad Fitzpatrick <bradfitz@golang.org>
TryBot-Result: Gobot Gobot <gobot@golang.org>
---
 archive/tar/reader.go      | 118 +++++++++++++++----------------------
 archive/tar/reader_test.go |  99 +++++++++++++++++++++++--------
 2 files changed, 122 insertions(+), 95 deletions(-)

diff --git a/archive/tar/reader.go b/archive/tar/reader.go
index 6360b4e..6948471 100644
--- a/archive/tar/reader.go
+++ b/archive/tar/reader.go
@@ -769,97 +769,77 @@ func (tr *Reader) readOldGNUSparseMap(header []byte) []sparseEntry {
 	return sp
 }
 
-// readGNUSparseMap1x0 reads the sparse map as stored in GNU's PAX sparse format version 1.0.
-// The sparse map is stored just before the file data and padded out to the nearest block boundary.
+// readGNUSparseMap1x0 reads the sparse map as stored in GNU's PAX sparse format
+// version 1.0. The format of the sparse map consists of a series of
+// newline-terminated numeric fields. The first field is the number of entries
+// and is always present. Following this are the entries, consisting of two
+// fields (offset, numBytes). This function must stop reading at the end
+// boundary of the block containing the last newline.
+//
+// Note that the GNU manual says that numeric values should be encoded in octal
+// format. However, the GNU tar utility itself outputs these values in decimal.
+// As such, this library treats values as being encoded in decimal.
 func readGNUSparseMap1x0(r io.Reader) ([]sparseEntry, error) {
-	buf := make([]byte, 2*blockSize)
-	sparseHeader := buf[:blockSize]
+	var cntNewline int64
+	var buf bytes.Buffer
+	var blk = make([]byte, blockSize)
 
-	// readDecimal is a helper function to read a decimal integer from the sparse map
-	// while making sure to read from the file in blocks of size blockSize
-	readDecimal := func() (int64, error) {
-		// Look for newline
-		nl := bytes.IndexByte(sparseHeader, '\n')
-		if nl == -1 {
-			if len(sparseHeader) >= blockSize {
-				// This is an error
-				return 0, ErrHeader
+	// feedTokens copies data in numBlock chunks from r into buf until there are
+	// at least cnt newlines in buf. It will not read more blocks than needed.
+	var feedTokens = func(cnt int64) error {
+		for cntNewline < cnt {
+			if _, err := io.ReadFull(r, blk); err != nil {
+				if err == io.EOF {
+					err = io.ErrUnexpectedEOF
+				}
+				return err
 			}
-			oldLen := len(sparseHeader)
-			newLen := oldLen + blockSize
-			if cap(sparseHeader) < newLen {
-				// There's more header, but we need to make room for the next block
-				copy(buf, sparseHeader)
-				sparseHeader = buf[:newLen]
-			} else {
-				// There's more header, and we can just reslice
-				sparseHeader = sparseHeader[:newLen]
-			}
-
-			// Now that sparseHeader is large enough, read next block
-			if _, err := io.ReadFull(r, sparseHeader[oldLen:newLen]); err != nil {
-				return 0, err
-			}
-			// leaving this function for io.Reader makes it more testable
-			if tr, ok := r.(*Reader); ok && tr.RawAccounting {
-				if _, err := tr.rawBytes.Write(sparseHeader[oldLen:newLen]); err != nil {
-					return 0, err
+			buf.Write(blk)
+			for _, c := range blk {
+				if c == '\n' {
+					cntNewline++
 				}
 			}
-
-			// Look for a newline in the new data
-			nl = bytes.IndexByte(sparseHeader[oldLen:newLen], '\n')
-			if nl == -1 {
-				// This is an error
-				return 0, ErrHeader
-			}
-			nl += oldLen // We want the position from the beginning
 		}
-		// Now that we've found a newline, read a number
-		n, err := strconv.ParseInt(string(sparseHeader[:nl]), 10, 0)
-		if err != nil {
-			return 0, ErrHeader
-		}
-
-		// Update sparseHeader to consume this number
-		sparseHeader = sparseHeader[nl+1:]
-		return n, nil
+		return nil
 	}
 
-	// Read the first block
-	if _, err := io.ReadFull(r, sparseHeader); err != nil {
+	// nextToken gets the next token delimited by a newline. This assumes that
+	// at least one newline exists in the buffer.
+	var nextToken = func() string {
+		cntNewline--
+		tok, _ := buf.ReadString('\n')
+		return tok[:len(tok)-1] // Cut off newline
+	}
+
+	// Parse for the number of entries.
+	// Use integer overflow resistant math to check this.
+	if err := feedTokens(1); err != nil {
 		return nil, err
 	}
-	// leaving this function for io.Reader makes it more testable
-	if tr, ok := r.(*Reader); ok && tr.RawAccounting {
-		if _, err := tr.rawBytes.Write(sparseHeader); err != nil {
-			return nil, err
-		}
+	numEntries, err := strconv.ParseInt(nextToken(), 10, 0) // Intentionally parse as native int
+	if err != nil || numEntries < 0 || int(2*numEntries) < int(numEntries) {
+		return nil, ErrHeader
 	}
 
-	// The first line contains the number of entries
-	numEntries, err := readDecimal()
-	if err != nil {
+	// Parse for all member entries.
+	// numEntries is trusted after this since a potential attacker must have
+	// committed resources proportional to what this library used.
+	if err := feedTokens(2 * numEntries); err != nil {
 		return nil, err
 	}
-
-	// Read all the entries
 	sp := make([]sparseEntry, 0, numEntries)
 	for i := int64(0); i < numEntries; i++ {
-		// Read the offset
-		offset, err := readDecimal()
+		offset, err := strconv.ParseInt(nextToken(), 10, 64)
 		if err != nil {
-			return nil, err
+			return nil, ErrHeader
 		}
-		// Read numBytes
-		numBytes, err := readDecimal()
+		numBytes, err := strconv.ParseInt(nextToken(), 10, 64)
 		if err != nil {
-			return nil, err
+			return nil, ErrHeader
 		}
-
 		sp = append(sp, sparseEntry{offset: offset, numBytes: numBytes})
 	}
-
 	return sp, nil
 }
 
diff --git a/archive/tar/reader_test.go b/archive/tar/reader_test.go
index 3c98f4d..5166403 100644
--- a/archive/tar/reader_test.go
+++ b/archive/tar/reader_test.go
@@ -719,35 +719,82 @@ func TestReadGNUSparseMap0x1(t *testing.T) {
 }
 
 func TestReadGNUSparseMap1x0(t *testing.T) {
-	// This test uses lots of holes so the sparse header takes up more than two blocks
-	numEntries := 100
-	expected := make([]sparseEntry, 0, numEntries)
-	sparseMap := new(bytes.Buffer)
-
-	fmt.Fprintf(sparseMap, "%d\n", numEntries)
-	for i := 0; i < numEntries; i++ {
-		offset := int64(2048 * i)
-		numBytes := int64(1024)
-		expected = append(expected, sparseEntry{offset: offset, numBytes: numBytes})
-		fmt.Fprintf(sparseMap, "%d\n%d\n", offset, numBytes)
+	var sp = []sparseEntry{{1, 2}, {3, 4}}
+	for i := 0; i < 98; i++ {
+		sp = append(sp, sparseEntry{54321, 12345})
 	}
 
-	// Make the header the smallest multiple of blockSize that fits the sparseMap
-	headerBlocks := (sparseMap.Len() + blockSize - 1) / blockSize
-	bufLen := blockSize * headerBlocks
-	buf := make([]byte, bufLen)
-	copy(buf, sparseMap.Bytes())
+	var vectors = []struct {
+		input     string        // Input data
+		sparseMap []sparseEntry // Expected sparse entries to be outputted
+		cnt       int           // Expected number of bytes read
+		err       error         // Expected errors that may be raised
+	}{{
+		input: "",
+		cnt:   0,
+		err:   io.ErrUnexpectedEOF,
+	}, {
+		input: "ab",
+		cnt:   2,
+		err:   io.ErrUnexpectedEOF,
+	}, {
+		input: strings.Repeat("\x00", 512),
+		cnt:   512,
+		err:   io.ErrUnexpectedEOF,
+	}, {
+		input: strings.Repeat("\x00", 511) + "\n",
+		cnt:   512,
+		err:   ErrHeader,
+	}, {
+		input: strings.Repeat("\n", 512),
+		cnt:   512,
+		err:   ErrHeader,
+	}, {
+		input:     "0\n" + strings.Repeat("\x00", 510) + strings.Repeat("a", 512),
+		sparseMap: []sparseEntry{},
+		cnt:       512,
+	}, {
+		input:     strings.Repeat("0", 512) + "0\n" + strings.Repeat("\x00", 510),
+		sparseMap: []sparseEntry{},
+		cnt:       1024,
+	}, {
+		input:     strings.Repeat("0", 1024) + "1\n2\n3\n" + strings.Repeat("\x00", 506),
+		sparseMap: []sparseEntry{{2, 3}},
+		cnt:       1536,
+	}, {
+		input: strings.Repeat("0", 1024) + "1\n2\n\n" + strings.Repeat("\x00", 509),
+		cnt:   1536,
+		err:   ErrHeader,
+	}, {
+		input: strings.Repeat("0", 1024) + "1\n2\n" + strings.Repeat("\x00", 508),
+		cnt:   1536,
+		err:   io.ErrUnexpectedEOF,
+	}, {
+		input: "-1\n2\n\n" + strings.Repeat("\x00", 506),
+		cnt:   512,
+		err:   ErrHeader,
+	}, {
+		input: "1\nk\n2\n" + strings.Repeat("\x00", 506),
+		cnt:   512,
+		err:   ErrHeader,
+	}, {
+		input:     "100\n1\n2\n3\n4\n" + strings.Repeat("54321\n0000000000000012345\n", 98) + strings.Repeat("\x00", 512),
+		cnt:       2560,
+		sparseMap: sp,
+	}}
 
-	// Get an reader to read the sparse map
-	r := bytes.NewReader(buf)
-
-	// Read the sparse map
-	sp, err := readGNUSparseMap1x0(r)
-	if err != nil {
-		t.Errorf("Unexpected error: %v", err)
-	}
-	if !reflect.DeepEqual(sp, expected) {
-		t.Errorf("Incorrect sparse map: got %v, wanted %v", sp, expected)
+	for i, v := range vectors {
+		r := strings.NewReader(v.input)
+		sp, err := readGNUSparseMap1x0(r)
+		if !reflect.DeepEqual(sp, v.sparseMap) && !(len(sp) == 0 && len(v.sparseMap) == 0) {
+			t.Errorf("test %d, readGNUSparseMap1x0(...): got %v, want %v", i, sp, v.sparseMap)
+		}
+		if numBytes := len(v.input) - r.Len(); numBytes != v.cnt {
+			t.Errorf("test %d, bytes read: got %v, want %v", i, numBytes, v.cnt)
+		}
+		if err != v.err {
+			t.Errorf("test %d, unexpected error: got %v, want %v", i, err, v.err)
+		}
 	}
 }
 

From b598ba3ee75317907dec365b25d0ba2b6f3d32fe Mon Sep 17 00:00:00 2001
From: Joe Tsai <joetsai@digital-static.net>
Date: Thu, 1 Oct 2015 01:35:15 -0700
Subject: [PATCH 10/17] archive/tar: fix issues with readGNUSparseMap1x0

Motivations:
* Use of strconv.ParseInt does not properly treat integers as 64bit,
preventing this function from working properly on 32bit machines.
* Use of io.ReadFull does not properly detect truncated streams
when the file suddenly ends on a block boundary.
* The function blindly trusts user input for numEntries and allocates
memory accordingly.
* The function does not validate that numEntries is not negative,
allowing a malicious sparse file to cause a panic during make.

In general, this function was overly complicated for what it was
accomplishing and it was hard to reason that it was free from
bounds errors. Instead, it has been rewritten and relies on
bytes.Buffer.ReadString to do the main work. So long as invariants
about the number of '\n' in the buffer are maintained, it is much
easier to see why this approach is correct.

Change-Id: Ibb12c4126c26e0ea460ea063cd17af68e3cf609e
Reviewed-on: https://go-review.googlesource.com/15174
Reviewed-by: Russ Cox <rsc@golang.org>
Run-TryBot: Brad Fitzpatrick <bradfitz@golang.org>
TryBot-Result: Gobot Gobot <gobot@golang.org>
---
 archive/tar/reader.go      | 160 +++++++++++++++++++++++--------------
 archive/tar/reader_test.go |  56 ++++++++++++-
 archive/tar/writer.go      | 133 ++++++++++++++++--------------
 archive/tar/writer_test.go |  48 ++++++-----
 4 files changed, 254 insertions(+), 143 deletions(-)

diff --git a/archive/tar/reader.go b/archive/tar/reader.go
index 6948471..02df550 100644
--- a/archive/tar/reader.go
+++ b/archive/tar/reader.go
@@ -40,6 +40,10 @@ type Reader struct {
 	rawBytes      *bytes.Buffer // last raw bits
 }
 
+type parser struct {
+	err error // Last error seen
+}
+
 // RawBytes accesses the raw bytes of the archive, apart from the file payload itself.
 // This includes the header and padding.
 //
@@ -134,6 +138,7 @@ func NewReader(r io.Reader) *Reader { return &Reader{r: r} }
 //
 // io.EOF is returned at the end of the input.
 func (tr *Reader) Next() (*Header, error) {
+	var p parser
 	var hdr *Header
 	if tr.RawAccounting {
 		if tr.rawBytes == nil {
@@ -216,8 +221,11 @@ func (tr *Reader) Next() (*Header, error) {
 				return nil, err
 			}
 		}
-		hdr.Name = cString(realname)
-		return hdr, err
+		hdr.Name = p.parseString(realname)
+		if p.err != nil {
+			return nil, p.err
+		}
+		return hdr, nil
 	case TypeGNULongLink:
 		// We have a GNU long link header.
 		realname, err := ioutil.ReadAll(tr)
@@ -240,8 +248,11 @@ func (tr *Reader) Next() (*Header, error) {
 				return nil, err
 			}
 		}
-		hdr.Linkname = cString(realname)
-		return hdr, err
+		hdr.Name = p.parseString(realname)
+		if p.err != nil {
+			return nil, p.err
+		}
+		return hdr, nil
 	}
 	return hdr, tr.err
 }
@@ -420,6 +431,7 @@ func parsePAX(r io.Reader) (map[string]string, error) {
 			return nil, err
 		}
 	}
+	sbuf := string(buf)
 
 	// For GNU PAX sparse format 0.0 support.
 	// This function transforms the sparse format 0.0 headers into sparse format 0.1 headers.
@@ -428,35 +440,17 @@ func parsePAX(r io.Reader) (map[string]string, error) {
 	headers := make(map[string]string)
 	// Each record is constructed as
 	//     "%d %s=%s\n", length, keyword, value
-	for len(buf) > 0 {
-		// or the header was empty to start with.
-		var sp int
-		// The size field ends at the first space.
-		sp = bytes.IndexByte(buf, ' ')
-		if sp == -1 {
+	for len(sbuf) > 0 {
+		key, value, residual, err := parsePAXRecord(sbuf)
+		if err != nil {
 			return nil, ErrHeader
 		}
-		// Parse the first token as a decimal integer.
-		n, err := strconv.ParseInt(string(buf[:sp]), 10, 0)
-		if err != nil || n < 5 || int64(len(buf)) < n {
-			return nil, ErrHeader
-		}
-		// Extract everything between the decimal and the n -1 on the
-		// beginning to eat the ' ', -1 on the end to skip the newline.
-		var record []byte
-		record, buf = buf[sp+1:n-1], buf[n:]
-		// The first equals is guaranteed to mark the end of the key.
-		// Everything else is value.
-		eq := bytes.IndexByte(record, '=')
-		if eq == -1 {
-			return nil, ErrHeader
-		}
-		key, value := record[:eq], record[eq+1:]
+		sbuf = residual
 
 		keyStr := string(key)
 		if keyStr == paxGNUSparseOffset || keyStr == paxGNUSparseNumBytes {
 			// GNU sparse format 0.0 special key. Write to sparseMap instead of using the headers map.
-			sparseMap.Write(value)
+			sparseMap.WriteString(value)
 			sparseMap.Write([]byte{','})
 		} else {
 			// Normal key. Set the value in the headers map.
@@ -471,9 +465,42 @@ func parsePAX(r io.Reader) (map[string]string, error) {
 	return headers, nil
 }
 
-// cString parses bytes as a NUL-terminated C-style string.
+// parsePAXRecord parses the input PAX record string into a key-value pair.
+// If parsing is successful, it will slice off the currently read record and
+// return the remainder as r.
+//
+// A PAX record is of the following form:
+//	"%d %s=%s\n" % (size, key, value)
+func parsePAXRecord(s string) (k, v, r string, err error) {
+	// The size field ends at the first space.
+	sp := strings.IndexByte(s, ' ')
+	if sp == -1 {
+		return "", "", s, ErrHeader
+	}
+
+	// Parse the first token as a decimal integer.
+	n, perr := strconv.ParseInt(s[:sp], 10, 0) // Intentionally parse as native int
+	if perr != nil || n < 5 || int64(len(s)) < n {
+		return "", "", s, ErrHeader
+	}
+
+	// Extract everything between the space and the final newline.
+	rec, nl, rem := s[sp+1:n-1], s[n-1:n], s[n:]
+	if nl != "\n" {
+		return "", "", s, ErrHeader
+	}
+
+	// The first equals separates the key from the value.
+	eq := strings.IndexByte(rec, '=')
+	if eq == -1 {
+		return "", "", s, ErrHeader
+	}
+	return rec[:eq], rec[eq+1:], rem, nil
+}
+
+// parseString parses bytes as a NUL-terminated C-style string.
 // If a NUL byte is not found then the whole slice is returned as a string.
-func cString(b []byte) string {
+func (*parser) parseString(b []byte) string {
 	n := 0
 	for n < len(b) && b[n] != 0 {
 		n++
@@ -481,7 +508,7 @@ func cString(b []byte) string {
 	return string(b[0:n])
 }
 
-func (tr *Reader) octal(b []byte) int64 {
+func (p *parser) parseNumeric(b []byte) int64 {
 	// Check for binary format first.
 	if len(b) > 0 && b[0]&0x80 != 0 {
 		var x int64
@@ -494,6 +521,10 @@ func (tr *Reader) octal(b []byte) int64 {
 		return x
 	}
 
+	return p.parseOctal(b)
+}
+
+func (p *parser) parseOctal(b []byte) int64 {
 	// Because unused fields are filled with NULs, we need
 	// to skip leading NULs. Fields may also be padded with
 	// spaces or NULs.
@@ -504,9 +535,9 @@ func (tr *Reader) octal(b []byte) int64 {
 	if len(b) == 0 {
 		return 0
 	}
-	x, err := strconv.ParseUint(cString(b), 8, 64)
-	if err != nil {
-		tr.err = err
+	x, perr := strconv.ParseUint(p.parseString(b), 8, 64)
+	if perr != nil {
+		p.err = ErrHeader
 	}
 	return int64(x)
 }
@@ -560,9 +591,10 @@ func (tr *Reader) verifyChecksum(header []byte) bool {
 		return false
 	}
 
-	given := tr.octal(header[148:156])
+	var p parser
+	given := p.parseOctal(header[148:156])
 	unsigned, signed := checksum(header)
-	return given == unsigned || given == signed
+	return p.err == nil && (given == unsigned || given == signed)
 }
 
 // readHeader reads the next block header and assumes that the underlying reader
@@ -621,18 +653,19 @@ func (tr *Reader) readHeader() *Header {
 	}
 
 	// Unpack
+	var p parser
 	hdr := new(Header)
 	s := slicer(header)
 
-	hdr.Name = cString(s.next(100))
-	hdr.Mode = tr.octal(s.next(8))
-	hdr.Uid = int(tr.octal(s.next(8)))
-	hdr.Gid = int(tr.octal(s.next(8)))
-	hdr.Size = tr.octal(s.next(12))
-	hdr.ModTime = time.Unix(tr.octal(s.next(12)), 0)
+	hdr.Name = p.parseString(s.next(100))
+	hdr.Mode = p.parseNumeric(s.next(8))
+	hdr.Uid = int(p.parseNumeric(s.next(8)))
+	hdr.Gid = int(p.parseNumeric(s.next(8)))
+	hdr.Size = p.parseNumeric(s.next(12))
+	hdr.ModTime = time.Unix(p.parseNumeric(s.next(12)), 0)
 	s.next(8) // chksum
 	hdr.Typeflag = s.next(1)[0]
-	hdr.Linkname = cString(s.next(100))
+	hdr.Linkname = p.parseString(s.next(100))
 
 	// The remainder of the header depends on the value of magic.
 	// The original (v7) version of tar had no explicit magic field,
@@ -652,30 +685,30 @@ func (tr *Reader) readHeader() *Header {
 
 	switch format {
 	case "posix", "gnu", "star":
-		hdr.Uname = cString(s.next(32))
-		hdr.Gname = cString(s.next(32))
+		hdr.Uname = p.parseString(s.next(32))
+		hdr.Gname = p.parseString(s.next(32))
 		devmajor := s.next(8)
 		devminor := s.next(8)
 		if hdr.Typeflag == TypeChar || hdr.Typeflag == TypeBlock {
-			hdr.Devmajor = tr.octal(devmajor)
-			hdr.Devminor = tr.octal(devminor)
+			hdr.Devmajor = p.parseNumeric(devmajor)
+			hdr.Devminor = p.parseNumeric(devminor)
 		}
 		var prefix string
 		switch format {
 		case "posix", "gnu":
-			prefix = cString(s.next(155))
+			prefix = p.parseString(s.next(155))
 		case "star":
-			prefix = cString(s.next(131))
-			hdr.AccessTime = time.Unix(tr.octal(s.next(12)), 0)
-			hdr.ChangeTime = time.Unix(tr.octal(s.next(12)), 0)
+			prefix = p.parseString(s.next(131))
+			hdr.AccessTime = time.Unix(p.parseNumeric(s.next(12)), 0)
+			hdr.ChangeTime = time.Unix(p.parseNumeric(s.next(12)), 0)
 		}
 		if len(prefix) > 0 {
 			hdr.Name = prefix + "/" + hdr.Name
 		}
 	}
 
-	if tr.err != nil {
-		tr.err = ErrHeader
+	if p.err != nil {
+		tr.err = p.err
 		return nil
 	}
 
@@ -695,7 +728,11 @@ func (tr *Reader) readHeader() *Header {
 	// Check for old GNU sparse format entry.
 	if hdr.Typeflag == TypeGNUSparse {
 		// Get the real size of the file.
-		hdr.Size = tr.octal(header[483:495])
+		hdr.Size = p.parseNumeric(header[483:495])
+		if p.err != nil {
+			tr.err = p.err
+			return nil
+		}
 
 		// Read the sparse map.
 		sp := tr.readOldGNUSparseMap(header)
@@ -717,6 +754,7 @@ func (tr *Reader) readHeader() *Header {
 // The sparse map is stored in the tar header if it's small enough. If it's larger than four entries,
 // then one or more extension headers are used to store the rest of the sparse map.
 func (tr *Reader) readOldGNUSparseMap(header []byte) []sparseEntry {
+	var p parser
 	isExtended := header[oldGNUSparseMainHeaderIsExtendedOffset] != 0
 	spCap := oldGNUSparseMainHeaderNumEntries
 	if isExtended {
@@ -727,10 +765,10 @@ func (tr *Reader) readOldGNUSparseMap(header []byte) []sparseEntry {
 
 	// Read the four entries from the main tar header
 	for i := 0; i < oldGNUSparseMainHeaderNumEntries; i++ {
-		offset := tr.octal(s.next(oldGNUSparseOffsetSize))
-		numBytes := tr.octal(s.next(oldGNUSparseNumBytesSize))
-		if tr.err != nil {
-			tr.err = ErrHeader
+		offset := p.parseNumeric(s.next(oldGNUSparseOffsetSize))
+		numBytes := p.parseNumeric(s.next(oldGNUSparseNumBytesSize))
+		if p.err != nil {
+			tr.err = p.err
 			return nil
 		}
 		if offset == 0 && numBytes == 0 {
@@ -754,10 +792,10 @@ func (tr *Reader) readOldGNUSparseMap(header []byte) []sparseEntry {
 		isExtended = sparseHeader[oldGNUSparseExtendedHeaderIsExtendedOffset] != 0
 		s = slicer(sparseHeader)
 		for i := 0; i < oldGNUSparseExtendedHeaderNumEntries; i++ {
-			offset := tr.octal(s.next(oldGNUSparseOffsetSize))
-			numBytes := tr.octal(s.next(oldGNUSparseNumBytesSize))
-			if tr.err != nil {
-				tr.err = ErrHeader
+			offset := p.parseNumeric(s.next(oldGNUSparseOffsetSize))
+			numBytes := p.parseNumeric(s.next(oldGNUSparseNumBytesSize))
+			if p.err != nil {
+				tr.err = p.err
 				return nil
 			}
 			if offset == 0 && numBytes == 0 {
diff --git a/archive/tar/reader_test.go b/archive/tar/reader_test.go
index 5166403..f0dbd94 100644
--- a/archive/tar/reader_test.go
+++ b/archive/tar/reader_test.go
@@ -298,9 +298,7 @@ var untarTests = []*untarTest{
 	},
 	{
 		file: "testdata/issue11169.tar",
-		// TODO(dsnet): Currently the library does not detect that this file is
-		// malformed. Instead it incorrectly believes that file just ends.
-		// err:  ErrHeader,
+		err:  ErrHeader,
 	},
 }
 
@@ -983,3 +981,55 @@ func TestReadHeaderOnly(t *testing.T) {
 		}
 	}
 }
+
+func TestParsePAXRecord(t *testing.T) {
+	var medName = strings.Repeat("CD", 50)
+	var longName = strings.Repeat("AB", 100)
+
+	var vectors = []struct {
+		input     string
+		residual  string
+		outputKey string
+		outputVal string
+		ok        bool
+	}{
+		{"6 k=v\n\n", "\n", "k", "v", true},
+		{"19 path=/etc/hosts\n", "", "path", "/etc/hosts", true},
+		{"210 path=" + longName + "\nabc", "abc", "path", longName, true},
+		{"110 path=" + medName + "\n", "", "path", medName, true},
+		{"9 foo=ba\n", "", "foo", "ba", true},
+		{"11 foo=bar\n\x00", "\x00", "foo", "bar", true},
+		{"18 foo=b=\nar=\n==\x00\n", "", "foo", "b=\nar=\n==\x00", true},
+		{"27 foo=hello9 foo=ba\nworld\n", "", "foo", "hello9 foo=ba\nworld", true},
+		{"27 ☺☻☹=日a本b語ç\nmeow mix", "meow mix", "☺☻☹", "日a本b語ç", true},
+		{"17 \x00hello=\x00world\n", "", "\x00hello", "\x00world", true},
+		{"1 k=1\n", "1 k=1\n", "", "", false},
+		{"6 k~1\n", "6 k~1\n", "", "", false},
+		{"6_k=1\n", "6_k=1\n", "", "", false},
+		{"6 k=1 ", "6 k=1 ", "", "", false},
+		{"632 k=1\n", "632 k=1\n", "", "", false},
+		{"16 longkeyname=hahaha\n", "16 longkeyname=hahaha\n", "", "", false},
+		{"3 somelongkey=\n", "3 somelongkey=\n", "", "", false},
+		{"50 tooshort=\n", "50 tooshort=\n", "", "", false},
+	}
+
+	for _, v := range vectors {
+		key, val, res, err := parsePAXRecord(v.input)
+		ok := (err == nil)
+		if v.ok != ok {
+			if v.ok {
+				t.Errorf("parsePAXRecord(%q): got parsing failure, want success", v.input)
+			} else {
+				t.Errorf("parsePAXRecord(%q): got parsing success, want failure", v.input)
+			}
+		}
+		if ok && (key != v.outputKey || val != v.outputVal) {
+			t.Errorf("parsePAXRecord(%q): got (%q: %q), want (%q: %q)",
+				v.input, key, val, v.outputKey, v.outputVal)
+		}
+		if res != v.residual {
+			t.Errorf("parsePAXRecord(%q): got residual %q, want residual %q",
+				v.input, res, v.residual)
+		}
+	}
+}
diff --git a/archive/tar/writer.go b/archive/tar/writer.go
index 0165b22..688455d 100644
--- a/archive/tar/writer.go
+++ b/archive/tar/writer.go
@@ -42,6 +42,10 @@ type Writer struct {
 	paxHdrBuff [blockSize]byte // buffer to use in writeHeader when writing a pax header
 }
 
+type formatter struct {
+	err error // Last error seen
+}
+
 // NewWriter creates a new Writer writing to w.
 func NewWriter(w io.Writer) *Writer { return &Writer{w: w} }
 
@@ -68,17 +72,9 @@ func (tw *Writer) Flush() error {
 }
 
 // Write s into b, terminating it with a NUL if there is room.
-// If the value is too long for the field and allowPax is true add a paxheader record instead
-func (tw *Writer) cString(b []byte, s string, allowPax bool, paxKeyword string, paxHeaders map[string]string) {
-	needsPaxHeader := allowPax && len(s) > len(b) || !isASCII(s)
-	if needsPaxHeader {
-		paxHeaders[paxKeyword] = s
-		return
-	}
+func (f *formatter) formatString(b []byte, s string) {
 	if len(s) > len(b) {
-		if tw.err == nil {
-			tw.err = ErrFieldTooLong
-		}
+		f.err = ErrFieldTooLong
 		return
 	}
 	ascii := toASCII(s)
@@ -89,35 +85,17 @@ func (tw *Writer) cString(b []byte, s string, allowPax bool, paxKeyword string,
 }
 
 // Encode x as an octal ASCII string and write it into b with leading zeros.
-func (tw *Writer) octal(b []byte, x int64) {
+func (f *formatter) formatOctal(b []byte, x int64) {
 	s := strconv.FormatInt(x, 8)
 	// leading zeros, but leave room for a NUL.
 	for len(s)+1 < len(b) {
 		s = "0" + s
 	}
-	tw.cString(b, s, false, paxNone, nil)
+	f.formatString(b, s)
 }
 
-// Write x into b, either as octal or as binary (GNUtar/star extension).
-// If the value is too long for the field and writingPax is enabled both for the field and the add a paxheader record instead
-func (tw *Writer) numeric(b []byte, x int64, allowPax bool, paxKeyword string, paxHeaders map[string]string) {
-	// Try octal first.
-	s := strconv.FormatInt(x, 8)
-	if len(s) < len(b) {
-		tw.octal(b, x)
-		return
-	}
-
-	// If it is too long for octal, and pax is preferred, use a pax header
-	if allowPax && tw.preferPax {
-		tw.octal(b, 0)
-		s := strconv.FormatInt(x, 10)
-		paxHeaders[paxKeyword] = s
-		return
-	}
-
-	// Too big: use binary (big-endian).
-	tw.usedBinary = true
+// Write x into b, as binary (GNUtar/star extension).
+func (f *formatter) formatNumeric(b []byte, x int64) {
 	for i := len(b) - 1; x > 0 && i >= 0; i-- {
 		b[i] = byte(x)
 		x >>= 8
@@ -161,6 +139,7 @@ func (tw *Writer) writeHeader(hdr *Header, allowPax bool) error {
 	// subsecond time resolution, but for now let's just capture
 	// too long fields or non ascii characters
 
+	var f formatter
 	var header []byte
 
 	// We need to select which scratch buffer to use carefully,
@@ -175,10 +154,40 @@ func (tw *Writer) writeHeader(hdr *Header, allowPax bool) error {
 	copy(header, zeroBlock)
 	s := slicer(header)
 
+	// Wrappers around formatter that automatically sets paxHeaders if the
+	// argument extends beyond the capacity of the input byte slice.
+	var formatString = func(b []byte, s string, paxKeyword string) {
+		needsPaxHeader := paxKeyword != paxNone && len(s) > len(b) || !isASCII(s)
+		if needsPaxHeader {
+			paxHeaders[paxKeyword] = s
+			return
+		}
+		f.formatString(b, s)
+	}
+	var formatNumeric = func(b []byte, x int64, paxKeyword string) {
+		// Try octal first.
+		s := strconv.FormatInt(x, 8)
+		if len(s) < len(b) {
+			f.formatOctal(b, x)
+			return
+		}
+
+		// If it is too long for octal, and PAX is preferred, use a PAX header.
+		if paxKeyword != paxNone && tw.preferPax {
+			f.formatOctal(b, 0)
+			s := strconv.FormatInt(x, 10)
+			paxHeaders[paxKeyword] = s
+			return
+		}
+
+		tw.usedBinary = true
+		f.formatNumeric(b, x)
+	}
+
 	// keep a reference to the filename to allow to overwrite it later if we detect that we can use ustar longnames instead of pax
 	pathHeaderBytes := s.next(fileNameSize)
 
-	tw.cString(pathHeaderBytes, hdr.Name, true, paxPath, paxHeaders)
+	formatString(pathHeaderBytes, hdr.Name, paxPath)
 
 	// Handle out of range ModTime carefully.
 	var modTime int64
@@ -186,25 +195,25 @@ func (tw *Writer) writeHeader(hdr *Header, allowPax bool) error {
 		modTime = hdr.ModTime.Unix()
 	}
 
-	tw.octal(s.next(8), hdr.Mode)                                   // 100:108
-	tw.numeric(s.next(8), int64(hdr.Uid), true, paxUid, paxHeaders) // 108:116
-	tw.numeric(s.next(8), int64(hdr.Gid), true, paxGid, paxHeaders) // 116:124
-	tw.numeric(s.next(12), hdr.Size, true, paxSize, paxHeaders)     // 124:136
-	tw.numeric(s.next(12), modTime, false, paxNone, nil)            // 136:148 --- consider using pax for finer granularity
-	s.next(8)                                                       // chksum (148:156)
-	s.next(1)[0] = hdr.Typeflag                                     // 156:157
+	f.formatOctal(s.next(8), hdr.Mode)               // 100:108
+	formatNumeric(s.next(8), int64(hdr.Uid), paxUid) // 108:116
+	formatNumeric(s.next(8), int64(hdr.Gid), paxGid) // 116:124
+	formatNumeric(s.next(12), hdr.Size, paxSize)     // 124:136
+	formatNumeric(s.next(12), modTime, paxNone)      // 136:148 --- consider using pax for finer granularity
+	s.next(8)                                        // chksum (148:156)
+	s.next(1)[0] = hdr.Typeflag                      // 156:157
 
-	tw.cString(s.next(100), hdr.Linkname, true, paxLinkpath, paxHeaders)
+	formatString(s.next(100), hdr.Linkname, paxLinkpath)
 
-	copy(s.next(8), []byte("ustar\x0000"))                        // 257:265
-	tw.cString(s.next(32), hdr.Uname, true, paxUname, paxHeaders) // 265:297
-	tw.cString(s.next(32), hdr.Gname, true, paxGname, paxHeaders) // 297:329
-	tw.numeric(s.next(8), hdr.Devmajor, false, paxNone, nil)      // 329:337
-	tw.numeric(s.next(8), hdr.Devminor, false, paxNone, nil)      // 337:345
+	copy(s.next(8), []byte("ustar\x0000"))          // 257:265
+	formatString(s.next(32), hdr.Uname, paxUname)   // 265:297
+	formatString(s.next(32), hdr.Gname, paxGname)   // 297:329
+	formatNumeric(s.next(8), hdr.Devmajor, paxNone) // 329:337
+	formatNumeric(s.next(8), hdr.Devminor, paxNone) // 337:345
 
 	// keep a reference to the prefix to allow to overwrite it later if we detect that we can use ustar longnames instead of pax
 	prefixHeaderBytes := s.next(155)
-	tw.cString(prefixHeaderBytes, "", false, paxNone, nil) // 345:500  prefix
+	formatString(prefixHeaderBytes, "", paxNone) // 345:500  prefix
 
 	// Use the GNU magic instead of POSIX magic if we used any GNU extensions.
 	if tw.usedBinary {
@@ -220,19 +229,20 @@ func (tw *Writer) writeHeader(hdr *Header, allowPax bool) error {
 			delete(paxHeaders, paxPath)
 
 			// Update the path fields
-			tw.cString(pathHeaderBytes, suffix, false, paxNone, nil)
-			tw.cString(prefixHeaderBytes, prefix, false, paxNone, nil)
+			formatString(pathHeaderBytes, suffix, paxNone)
+			formatString(prefixHeaderBytes, prefix, paxNone)
 		}
 	}
 
 	// The chksum field is terminated by a NUL and a space.
 	// This is different from the other octal fields.
 	chksum, _ := checksum(header)
-	tw.octal(header[148:155], chksum)
+	f.formatOctal(header[148:155], chksum) // Never fails
 	header[155] = ' '
 
-	if tw.err != nil {
-		// problem with header; probably integer too big for a field.
+	// Check if there were any formatting errors.
+	if f.err != nil {
+		tw.err = f.err
 		return tw.err
 	}
 
@@ -310,7 +320,7 @@ func (tw *Writer) writePAXHeader(hdr *Header, paxHeaders map[string]string) erro
 	sort.Strings(keys)
 
 	for _, k := range keys {
-		fmt.Fprint(&buf, paxHeader(k+"="+paxHeaders[k]))
+		fmt.Fprint(&buf, formatPAXRecord(k, paxHeaders[k]))
 	}
 
 	ext.Size = int64(len(buf.Bytes()))
@@ -326,17 +336,18 @@ func (tw *Writer) writePAXHeader(hdr *Header, paxHeaders map[string]string) erro
 	return nil
 }
 
-// paxHeader formats a single pax record, prefixing it with the appropriate length
-func paxHeader(msg string) string {
-	const padding = 2 // Extra padding for space and newline
-	size := len(msg) + padding
+// formatPAXRecord formats a single PAX record, prefixing it with the
+// appropriate length.
+func formatPAXRecord(k, v string) string {
+	const padding = 3 // Extra padding for ' ', '=', and '\n'
+	size := len(k) + len(v) + padding
 	size += len(strconv.Itoa(size))
-	record := fmt.Sprintf("%d %s\n", size, msg)
+	record := fmt.Sprintf("%d %s=%s\n", size, k, v)
+
+	// Final adjustment if adding size field increased the record size.
 	if len(record) != size {
-		// Final adjustment if adding size increased
-		// the number of digits in size
 		size = len(record)
-		record = fmt.Sprintf("%d %s\n", size, msg)
+		record = fmt.Sprintf("%d %s=%s\n", size, k, v)
 	}
 	return record
 }
diff --git a/archive/tar/writer_test.go b/archive/tar/writer_test.go
index 25d88dc..69a44a6 100644
--- a/archive/tar/writer_test.go
+++ b/archive/tar/writer_test.go
@@ -486,24 +486,6 @@ func TestPaxHeadersSorted(t *testing.T) {
 	}
 }
 
-func TestPAXHeader(t *testing.T) {
-	medName := strings.Repeat("CD", 50)
-	longName := strings.Repeat("AB", 100)
-	paxTests := [][2]string{
-		{paxPath + "=/etc/hosts", "19 path=/etc/hosts\n"},
-		{"a=b", "6 a=b\n"},          // Single digit length
-		{"a=names", "11 a=names\n"}, // Test case involving carries
-		{paxPath + "=" + longName, fmt.Sprintf("210 path=%s\n", longName)},
-		{paxPath + "=" + medName, fmt.Sprintf("110 path=%s\n", medName)}}
-
-	for _, test := range paxTests {
-		key, expected := test[0], test[1]
-		if result := paxHeader(key); result != expected {
-			t.Fatalf("paxHeader: got %s, expected %s", result, expected)
-		}
-	}
-}
-
 func TestUSTARLongName(t *testing.T) {
 	// Create an archive with a path that failed to split with USTAR extension in previous versions.
 	fileinfo, err := os.Stat("testdata/small.txt")
@@ -625,3 +607,33 @@ func TestSplitUSTARPath(t *testing.T) {
 		}
 	}
 }
+
+func TestFormatPAXRecord(t *testing.T) {
+	var medName = strings.Repeat("CD", 50)
+	var longName = strings.Repeat("AB", 100)
+
+	var vectors = []struct {
+		inputKey string
+		inputVal string
+		output   string
+	}{
+		{"k", "v", "6 k=v\n"},
+		{"path", "/etc/hosts", "19 path=/etc/hosts\n"},
+		{"path", longName, "210 path=" + longName + "\n"},
+		{"path", medName, "110 path=" + medName + "\n"},
+		{"foo", "ba", "9 foo=ba\n"},
+		{"foo", "bar", "11 foo=bar\n"},
+		{"foo", "b=\nar=\n==\x00", "18 foo=b=\nar=\n==\x00\n"},
+		{"foo", "hello9 foo=ba\nworld", "27 foo=hello9 foo=ba\nworld\n"},
+		{"☺☻☹", "日a本b語ç", "27 ☺☻☹=日a本b語ç\n"},
+		{"\x00hello", "\x00world", "17 \x00hello=\x00world\n"},
+	}
+
+	for _, v := range vectors {
+		output := formatPAXRecord(v.inputKey, v.inputVal)
+		if output != v.output {
+			t.Errorf("formatPAXRecord(%q, %q): got %q, want %q",
+				v.inputKey, v.inputVal, output, v.output)
+		}
+	}
+}

From 64935a5f0f25d74240cd2e7174a2a1aa7652a032 Mon Sep 17 00:00:00 2001
From: Joe Tsai <joetsai@digital-static.net>
Date: Mon, 28 Sep 2015 13:49:35 -0700
Subject: [PATCH 11/17] archive/tar: move parse/format methods to standalone
 receiver

Motivations for this change:
* It allows these functions to be used outside of Reader/Writer.
* It allows these functions to be more easily unit tested.

Change-Id: Iebe2b70bdb8744371c9ffa87c24316cbbf025b59
Reviewed-on: https://go-review.googlesource.com/15113
Reviewed-by: Russ Cox <rsc@golang.org>
Run-TryBot: Joe Tsai <joetsai@digital-static.net>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: Brad Fitzpatrick <bradfitz@golang.org>
---
 archive/tar/testdata/pax-path-hdr.tar   | Bin 0 -> 1024 bytes
 archive/tar/testdata/ustar-file-reg.tar | Bin 0 -> 1536 bytes
 2 files changed, 0 insertions(+), 0 deletions(-)
 create mode 100644 archive/tar/testdata/pax-path-hdr.tar
 create mode 100644 archive/tar/testdata/ustar-file-reg.tar

diff --git a/archive/tar/testdata/pax-path-hdr.tar b/archive/tar/testdata/pax-path-hdr.tar
new file mode 100644
index 0000000000000000000000000000000000000000..ab8fc325b26159f4fed6bfb59fe5f616d35fec74
GIT binary patch
literal 1024
zcmXR&EXmL>$=5GRO-#v6r43~O0Sq{30|OI7m>ft6gMqP;fsrYLLIndIKxuJFViC}K
xO07co9Hr*bNx!kNLIE%d*akR880v$Gocz3WU67b=USe)47oFTOYR$le004hRKE?n5

literal 0
HcmV?d00001

diff --git a/archive/tar/testdata/ustar-file-reg.tar b/archive/tar/testdata/ustar-file-reg.tar
new file mode 100644
index 0000000000000000000000000000000000000000..c84fa27ffb8613d20f7ac9690cd59827b04028f6
GIT binary patch
literal 1536
zcmdUuJ(Hq95QhCJmlMG7O>hA!%P7hKg3OW)iVh<3F#{;S{=CgM+^V}b=<cewtNW?0
z>Pyosep=F7x+*OI&?Q6F7LxR?fb`B^0<xwq-oemxLziTAA%U;<U(!#5{hdEBPWm^>
zttmJo<+m$~$Msw9KQ_wfqfW1sDJ#zsWq25)8&TsKn~+7*oF6~$0+nD?L2C$TBQM>$
zcQqo7Eo8w%PL1H9D9KY4`f7Ku6*oaxOv|7S1ZpTT=%sbGS#L2%*Uxm5P}U`mG$%9I
zIRuF>849Ugh}k{mmgLJGtca9XnA{r2KBJ`fRXOnPqEZjWtz4z5Mq_`uZWX)VuFVko
z#$DNd>@Saj1w+|ef@dPC=g!5Kb4c+mQ$bcu#R_I=;>D)V&agmv_`vpStP-sQh!z(|
z5{9v2$DGKS|DrLqZ8y7?ox@|a-R^30zMeK388O@+r+cK=9NdZ)ow#}n{kwf`tD4=t
zR9Oz?v}2QNv&rDR7u$%4%_>%5(k#={r!rZ(5WA5+U_Rz+w6{_kV7C!KK2e+5VuS-5
zWLRhpI#>I<n2psw%Y5~SG&FiGH94neS7}ZyP9^~6QLjVep;s@I@7AmR2jbQFLBnMt
zE?vjd>q%|mhyZxDfRHCi7gC+R&sD4|k;~pXMNUD{^NPC5(KX~`*cXkwAurz(+XVNg
z1P`yNv%FZy)9`uTGCWUJ^@>e2&T3O`W@r<j7UCv1@pBfIaB6)e<_I%5mqWL6_j5Oz
zy0N;F$HYFKzT-wD4)v9sN1lM51t(7$b_H<7B%Lm$Myz1iu9WGri4R2D&}G9d>e{ZN
S9gn%XW0CXwKYp`+7X1&(X!Xnh

literal 0
HcmV?d00001


From be9ac88117e8f7c1666e9f3c241b03c505dc52f3 Mon Sep 17 00:00:00 2001
From: Joe Tsai <joetsai@digital-static.net>
Date: Wed, 16 Sep 2015 00:58:56 -0700
Subject: [PATCH 12/17] archive/tar: convert Reader.Next to be loop based

Motivation for change:
* Recursive logic is hard to follow, since it tends to apply
things in reverse. On the other hand, the tar formats tend to
describe meta headers as affecting the next entry.
* Recursion also applies changes in the wrong order. Two test
files are attached that use multiple headers. The previous Go
behavior differs from what GNU and BSD tar do.

Change-Id: Ic1557256fc1363c5cb26570e5d0b9f65a9e57341
Reviewed-on: https://go-review.googlesource.com/14624
Run-TryBot: Joe Tsai <joetsai@digital-static.net>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: Brad Fitzpatrick <bradfitz@golang.org>
---
 archive/tar/reader.go                   | 160 ++++++++++--------------
 archive/tar/reader_test.go              |  24 ++++
 archive/tar/testdata/gnu-multi-hdrs.tar | Bin 0 -> 4608 bytes
 archive/tar/testdata/pax-multi-hdrs.tar | Bin 0 -> 4608 bytes
 4 files changed, 90 insertions(+), 94 deletions(-)
 create mode 100644 archive/tar/testdata/gnu-multi-hdrs.tar
 create mode 100644 archive/tar/testdata/pax-multi-hdrs.tar

diff --git a/archive/tar/reader.go b/archive/tar/reader.go
index 02df550..ba34ed7 100644
--- a/archive/tar/reader.go
+++ b/archive/tar/reader.go
@@ -138,8 +138,6 @@ func NewReader(r io.Reader) *Reader { return &Reader{r: r} }
 //
 // io.EOF is returned at the end of the input.
 func (tr *Reader) Next() (*Header, error) {
-	var p parser
-	var hdr *Header
 	if tr.RawAccounting {
 		if tr.rawBytes == nil {
 			tr.rawBytes = bytes.NewBuffer(nil)
@@ -147,114 +145,88 @@ func (tr *Reader) Next() (*Header, error) {
 			tr.rawBytes.Reset()
 		}
 	}
-	if tr.err == nil {
-		tr.skipUnread()
-	}
+
 	if tr.err != nil {
-		return hdr, tr.err
+		return nil, tr.err
 	}
-	hdr = tr.readHeader()
-	if hdr == nil {
-		return hdr, tr.err
-	}
-	// Check for PAX/GNU header.
-	switch hdr.Typeflag {
-	case TypeXHeader:
-		//  PAX extended header
-		headers, err := parsePAX(tr)
-		if err != nil {
-			return nil, err
-		}
-		// We actually read the whole file,
-		// but this skips alignment padding
-		tr.skipUnread()
+
+	var hdr *Header
+	var extHdrs map[string]string
+
+	// Externally, Next iterates through the tar archive as if it is a series of
+	// files. Internally, the tar format often uses fake "files" to add meta
+	// data that describes the next file. These meta data "files" should not
+	// normally be visible to the outside. As such, this loop iterates through
+	// one or more "header files" until it finds a "normal file".
+loop:
+	for {
+		tr.err = tr.skipUnread()
 		if tr.err != nil {
 			return nil, tr.err
 		}
+
 		hdr = tr.readHeader()
-		if hdr == nil {
+		if tr.err != nil {
 			return nil, tr.err
 		}
-		mergePAX(hdr, headers)
-
-		// Check for a PAX format sparse file
-		sp, err := tr.checkForGNUSparsePAXHeaders(hdr, headers)
-		if err != nil {
-			tr.err = err
-			return nil, err
-		}
-		if sp != nil {
-			// Sparse files do not make sense when applied to the special header
-			// types that never have a data section.
-			if isHeaderOnlyType(hdr.Typeflag) {
-				tr.err = ErrHeader
-				return nil, tr.err
-			}
-
-			// Current file is a PAX format GNU sparse file.
-			// Set the current file reader to a sparse file reader.
-			tr.curr, tr.err = newSparseFileReader(tr.curr, sp, hdr.Size)
+		// Check for PAX/GNU special headers and files.
+		switch hdr.Typeflag {
+		case TypeXHeader:
+			extHdrs, tr.err = parsePAX(tr)
 			if tr.err != nil {
 				return nil, tr.err
 			}
-		}
-		return hdr, nil
-	case TypeGNULongName:
-		// We have a GNU long name header. Its contents are the real file name.
-		realname, err := ioutil.ReadAll(tr)
-		if err != nil {
-			return nil, err
-		}
-		var buf []byte
-		if tr.RawAccounting {
-			if _, err = tr.rawBytes.Write(realname); err != nil {
+			continue loop // This is a meta header affecting the next header
+		case TypeGNULongName, TypeGNULongLink:
+			var realname []byte
+			realname, tr.err = ioutil.ReadAll(tr)
+			if tr.err != nil {
+				return nil, tr.err
+			}
+
+			if tr.RawAccounting {
+				if _, tr.err = tr.rawBytes.Write(realname); tr.err != nil {
+					return nil, tr.err
+				}
+			}
+
+			// Convert GNU extensions to use PAX headers.
+			if extHdrs == nil {
+				extHdrs = make(map[string]string)
+			}
+			var p parser
+			switch hdr.Typeflag {
+			case TypeGNULongName:
+				extHdrs[paxPath] = p.parseString(realname)
+			case TypeGNULongLink:
+				extHdrs[paxLinkpath] = p.parseString(realname)
+			}
+			if p.err != nil {
+				tr.err = p.err
+				return nil, tr.err
+			}
+			continue loop // This is a meta header affecting the next header
+		default:
+			mergePAX(hdr, extHdrs)
+
+			// Check for a PAX format sparse file
+			sp, err := tr.checkForGNUSparsePAXHeaders(hdr, extHdrs)
+			if err != nil {
+				tr.err = err
 				return nil, err
 			}
-			buf = make([]byte, tr.rawBytes.Len())
-			copy(buf[:], tr.RawBytes())
-		}
-		hdr, err := tr.Next()
-		// since the above call to Next() resets the buffer, we need to throw the bytes over
-		if tr.RawAccounting {
-			buf = append(buf, tr.RawBytes()...)
-			if _, err = tr.rawBytes.Write(buf); err != nil {
-				return nil, err
+			if sp != nil {
+				// Current file is a PAX format GNU sparse file.
+				// Set the current file reader to a sparse file reader.
+				tr.curr, tr.err = newSparseFileReader(tr.curr, sp, hdr.Size)
+				if tr.err != nil {
+					return nil, tr.err
+				}
 			}
+			break loop // This is a file, so stop
 		}
-		hdr.Name = p.parseString(realname)
-		if p.err != nil {
-			return nil, p.err
-		}
-		return hdr, nil
-	case TypeGNULongLink:
-		// We have a GNU long link header.
-		realname, err := ioutil.ReadAll(tr)
-		if err != nil {
-			return nil, err
-		}
-		var buf []byte
-		if tr.RawAccounting {
-			if _, err = tr.rawBytes.Write(realname); err != nil {
-				return nil, err
-			}
-			buf = make([]byte, tr.rawBytes.Len())
-			copy(buf[:], tr.RawBytes())
-		}
-		hdr, err := tr.Next()
-		// since the above call to Next() resets the buffer, we need to throw the bytes over
-		if tr.RawAccounting {
-			buf = append(buf, tr.RawBytes()...)
-			if _, err = tr.rawBytes.Write(buf); err != nil {
-				return nil, err
-			}
-		}
-		hdr.Name = p.parseString(realname)
-		if p.err != nil {
-			return nil, p.err
-		}
-		return hdr, nil
 	}
-	return hdr, tr.err
+	return hdr, nil
 }
 
 // checkForGNUSparsePAXHeaders checks the PAX headers for GNU sparse headers. If they are found, then
diff --git a/archive/tar/reader_test.go b/archive/tar/reader_test.go
index f0dbd94..861d1a5 100644
--- a/archive/tar/reader_test.go
+++ b/archive/tar/reader_test.go
@@ -288,6 +288,30 @@ var untarTests = []*untarTest{
 			},
 		},
 	},
+	{
+		// Matches the behavior of GNU, BSD, and STAR tar utilities.
+		file: "testdata/gnu-multi-hdrs.tar",
+		headers: []*Header{
+			{
+				Name:     "GNU2/GNU2/long-path-name",
+				Linkname: "GNU4/GNU4/long-linkpath-name",
+				ModTime:  time.Unix(0, 0),
+				Typeflag: '2',
+			},
+		},
+	},
+	{
+		// Matches the behavior of GNU and BSD tar utilities.
+		file: "testdata/pax-multi-hdrs.tar",
+		headers: []*Header{
+			{
+				Name:     "bar",
+				Linkname: "PAX4/PAX4/long-linkpath-name",
+				ModTime:  time.Unix(0, 0),
+				Typeflag: '2',
+			},
+		},
+	},
 	{
 		file: "testdata/neg-size.tar",
 		err:  ErrHeader,
diff --git a/archive/tar/testdata/gnu-multi-hdrs.tar b/archive/tar/testdata/gnu-multi-hdrs.tar
new file mode 100644
index 0000000000000000000000000000000000000000..8bcad55d06e8f9fde3641d2a8df370503a582ce6
GIT binary patch
literal 4608
zcmdPX*VA|K$<Iso$;``Upbanp0y7g61`rJd=(K^cp&3jJB5!DDW?;;q;6oedpqg4*
zT#{I%pukX+pI?G1gUTZ$kE(je@Z9}E4fVl94lohu79^Hr=;kHnrVbeoj5?H}5YQXV
z|06yB8-en_k^X4@rzj&0QO$!k{~IwVc+(?HN9%uMQ2sZD<o_ICD{!<8K#z1cNbDHR
z|0BNrZvx8yCZqX(kmN}EIwL8uh~^f8d;OUGe$+lcR1su+z}&zTSpOTP<>%Afaj^QI
J<`xZ33jpEZfW-g+

literal 0
HcmV?d00001

diff --git a/archive/tar/testdata/pax-multi-hdrs.tar b/archive/tar/testdata/pax-multi-hdrs.tar
new file mode 100644
index 0000000000000000000000000000000000000000..14bc7597808020d7bc37e6610482fd9662814a24
GIT binary patch
literal 4608
zcmeH~OAf*y5QbTM3NFy_tgc*m1D9?w)<<kh(_-TBWo){T=&AxH&MY9%!aps1Fw+@#
zX3ny{GKZ#$Q}NNrhnjOOrf2X9;YxB@5_%^I>^3;ld-=Ii;m6ILFVT3VXy>=Udb`;P
z_AF}Ko(kwITGLdEM1G)5o<9H!jr=439(@V?ONRXCAu*5YPw-#9x&N1V|EJgyTG0B^
zUZ)s9!5N^&Ghph+I3UGBWYR$X|2zH<_}9R{M*cI=m|k}8li%1Drp7@Vny>jkUkM=z
Sl}Br1`$oQ%|3`N;j=%%-SrC5!

literal 0
HcmV?d00001


From ce5aac17f91d978a37dd742761cf57cd5bdb8ef2 Mon Sep 17 00:00:00 2001
From: Joe Tsai <joetsai@digital-static.net>
Date: Wed, 2 Dec 2015 15:48:06 -0800
Subject: [PATCH 13/17] archive/tar: properly format GNU base-256 encoding

Motivation:
* Previous implementation silently failed when an integer overflow
occurred. Now, we report an ErrFieldTooLong.
* Previous implementation did not encode in two's complement format and was
unable to encode negative numbers.

The relevant GNU specification says:
<<<
GNU format uses two's-complement base-256 notation to store values
that do not fit into standard ustar range.
>>>

Fixes #12436

Change-Id: I09c20602eabf8ae3a7e0db35b79440a64bfaf807
Reviewed-on: https://go-review.googlesource.com/17425
Reviewed-by: Brad Fitzpatrick <bradfitz@golang.org>
Run-TryBot: Brad Fitzpatrick <bradfitz@golang.org>
TryBot-Result: Gobot Gobot <gobot@golang.org>
---
 archive/tar/writer.go      | 26 ++++++++++--
 archive/tar/writer_test.go | 83 ++++++++++++++++++++++++++++++++++++++
 2 files changed, 105 insertions(+), 4 deletions(-)

diff --git a/archive/tar/writer.go b/archive/tar/writer.go
index 688455d..0426381 100644
--- a/archive/tar/writer.go
+++ b/archive/tar/writer.go
@@ -94,13 +94,31 @@ func (f *formatter) formatOctal(b []byte, x int64) {
 	f.formatString(b, s)
 }
 
+// fitsInBase256 reports whether x can be encoded into n bytes using base-256
+// encoding. Unlike octal encoding, base-256 encoding does not require that the
+// string ends with a NUL character. Thus, all n bytes are available for output.
+//
+// If operating in binary mode, this assumes strict GNU binary mode; which means
+// that the first byte can only be either 0x80 or 0xff. Thus, the first byte is
+// equivalent to the sign bit in two's complement form.
+func fitsInBase256(n int, x int64) bool {
+	var binBits = uint(n-1) * 8
+	return n >= 9 || (x >= -1<<binBits && x < 1<<binBits)
+}
+
 // Write x into b, as binary (GNUtar/star extension).
 func (f *formatter) formatNumeric(b []byte, x int64) {
-	for i := len(b) - 1; x > 0 && i >= 0; i-- {
-		b[i] = byte(x)
-		x >>= 8
+	if fitsInBase256(len(b), x) {
+		for i := len(b) - 1; i >= 0; i-- {
+			b[i] = byte(x)
+			x >>= 8
+		}
+		b[0] |= 0x80 // Highest bit indicates binary format
+		return
 	}
-	b[0] |= 0x80 // highest bit indicates binary format
+
+	f.formatOctal(b, 0) // Last resort, just write zero
+	f.err = ErrFieldTooLong
 }
 
 var (
diff --git a/archive/tar/writer_test.go b/archive/tar/writer_test.go
index 69a44a6..6e91d90 100644
--- a/archive/tar/writer_test.go
+++ b/archive/tar/writer_test.go
@@ -9,6 +9,7 @@ import (
 	"fmt"
 	"io"
 	"io/ioutil"
+	"math"
 	"os"
 	"reflect"
 	"sort"
@@ -637,3 +638,85 @@ func TestFormatPAXRecord(t *testing.T) {
 		}
 	}
 }
+
+func TestFitsInBase256(t *testing.T) {
+	var vectors = []struct {
+		input int64
+		width int
+		ok    bool
+	}{
+		{+1, 8, true},
+		{0, 8, true},
+		{-1, 8, true},
+		{1 << 56, 8, false},
+		{(1 << 56) - 1, 8, true},
+		{-1 << 56, 8, true},
+		{(-1 << 56) - 1, 8, false},
+		{121654, 8, true},
+		{-9849849, 8, true},
+		{math.MaxInt64, 9, true},
+		{0, 9, true},
+		{math.MinInt64, 9, true},
+		{math.MaxInt64, 12, true},
+		{0, 12, true},
+		{math.MinInt64, 12, true},
+	}
+
+	for _, v := range vectors {
+		ok := fitsInBase256(v.width, v.input)
+		if ok != v.ok {
+			t.Errorf("checkNumeric(%d, %d): got %v, want %v", v.input, v.width, ok, v.ok)
+		}
+	}
+}
+
+func TestFormatNumeric(t *testing.T) {
+	var vectors = []struct {
+		input  int64
+		output string
+		ok     bool
+	}{
+		// Test base-256 (binary) encoded values.
+		{-1, "\xff", true},
+		{-1, "\xff\xff", true},
+		{-1, "\xff\xff\xff", true},
+		{(1 << 0), "0", false},
+		{(1 << 8) - 1, "\x80\xff", true},
+		{(1 << 8), "0\x00", false},
+		{(1 << 16) - 1, "\x80\xff\xff", true},
+		{(1 << 16), "00\x00", false},
+		{-1 * (1 << 0), "\xff", true},
+		{-1*(1<<0) - 1, "0", false},
+		{-1 * (1 << 8), "\xff\x00", true},
+		{-1*(1<<8) - 1, "0\x00", false},
+		{-1 * (1 << 16), "\xff\x00\x00", true},
+		{-1*(1<<16) - 1, "00\x00", false},
+		{537795476381659745, "0000000\x00", false},
+		{537795476381659745, "\x80\x00\x00\x00\x07\x76\xa2\x22\xeb\x8a\x72\x61", true},
+		{-615126028225187231, "0000000\x00", false},
+		{-615126028225187231, "\xff\xff\xff\xff\xf7\x76\xa2\x22\xeb\x8a\x72\x61", true},
+		{math.MaxInt64, "0000000\x00", false},
+		{math.MaxInt64, "\x80\x00\x00\x00\x7f\xff\xff\xff\xff\xff\xff\xff", true},
+		{math.MinInt64, "0000000\x00", false},
+		{math.MinInt64, "\xff\xff\xff\xff\x80\x00\x00\x00\x00\x00\x00\x00", true},
+		{math.MaxInt64, "\x80\x7f\xff\xff\xff\xff\xff\xff\xff", true},
+		{math.MinInt64, "\xff\x80\x00\x00\x00\x00\x00\x00\x00", true},
+	}
+
+	for _, v := range vectors {
+		var f formatter
+		output := make([]byte, len(v.output))
+		f.formatNumeric(output, v.input)
+		ok := (f.err == nil)
+		if ok != v.ok {
+			if v.ok {
+				t.Errorf("formatNumeric(%d): got formatting failure, want success", v.input)
+			} else {
+				t.Errorf("formatNumeric(%d): got formatting success, want failure", v.input)
+			}
+		}
+		if string(output) != v.output {
+			t.Errorf("formatNumeric(%d): got %q, want %q", v.input, output, v.output)
+		}
+	}
+}

From a04b4ddba428a52a96f3c046d284916313dc6d2e Mon Sep 17 00:00:00 2001
From: Joe Tsai <joetsai@digital-static.net>
Date: Wed, 2 Dec 2015 15:41:44 -0800
Subject: [PATCH 14/17] archive/tar: properly parse GNU base-256 encoding

Motivation:
* Previous implementation did not detect integer overflow when
parsing a base-256 encoded field.
* Previous implementation did not treat the integer as a two's
complement value as specified by GNU.

The relevant GNU specification says:
<<<
GNU format uses two's-complement base-256 notation to store values
that do not fit into standard ustar range.
>>>

Fixes #12435

Change-Id: I4639bcffac8d12e1cb040b76bd05c9d7bc6c23a8
Reviewed-on: https://go-review.googlesource.com/17424
Reviewed-by: Brad Fitzpatrick <bradfitz@golang.org>
Run-TryBot: Brad Fitzpatrick <bradfitz@golang.org>
TryBot-Result: Gobot Gobot <gobot@golang.org>
---
 archive/tar/reader.go               |  44 +++++++++++++++----
 archive/tar/reader_test.go          |  66 ++++++++++++++++++++++++++++
 archive/tar/testdata/issue12435.tar | Bin 0 -> 512 bytes
 3 files changed, 102 insertions(+), 8 deletions(-)
 create mode 100644 archive/tar/testdata/issue12435.tar

diff --git a/archive/tar/reader.go b/archive/tar/reader.go
index ba34ed7..6e77cbe 100644
--- a/archive/tar/reader.go
+++ b/archive/tar/reader.go
@@ -480,19 +480,47 @@ func (*parser) parseString(b []byte) string {
 	return string(b[0:n])
 }
 
+// parseNumeric parses the input as being encoded in either base-256 or octal.
+// This function may return negative numbers.
+// If parsing fails or an integer overflow occurs, err will be set.
 func (p *parser) parseNumeric(b []byte) int64 {
-	// Check for binary format first.
+	// Check for base-256 (binary) format first.
+	// If the first bit is set, then all following bits constitute a two's
+	// complement encoded number in big-endian byte order.
 	if len(b) > 0 && b[0]&0x80 != 0 {
-		var x int64
-		for i, c := range b {
-			if i == 0 {
-				c &= 0x7f // ignore signal bit in first byte
-			}
-			x = x<<8 | int64(c)
+		// Handling negative numbers relies on the following identity:
+		//	-a-1 == ^a
+		//
+		// If the number is negative, we use an inversion mask to invert the
+		// data bytes and treat the value as an unsigned number.
+		var inv byte // 0x00 if positive or zero, 0xff if negative
+		if b[0]&0x40 != 0 {
+			inv = 0xff
 		}
-		return x
+
+		var x uint64
+		for i, c := range b {
+			c ^= inv // Inverts c only if inv is 0xff, otherwise does nothing
+			if i == 0 {
+				c &= 0x7f // Ignore signal bit in first byte
+			}
+			if (x >> 56) > 0 {
+				p.err = ErrHeader // Integer overflow
+				return 0
+			}
+			x = x<<8 | uint64(c)
+		}
+		if (x >> 63) > 0 {
+			p.err = ErrHeader // Integer overflow
+			return 0
+		}
+		if inv == 0xff {
+			return ^int64(x)
+		}
+		return int64(x)
 	}
 
+	// Normal case is base-8 (octal) format.
 	return p.parseOctal(b)
 }
 
diff --git a/archive/tar/reader_test.go b/archive/tar/reader_test.go
index 861d1a5..7b148b5 100644
--- a/archive/tar/reader_test.go
+++ b/archive/tar/reader_test.go
@@ -324,6 +324,10 @@ var untarTests = []*untarTest{
 		file: "testdata/issue11169.tar",
 		err:  ErrHeader,
 	},
+	{
+		file: "testdata/issue12435.tar",
+		err:  ErrHeader,
+	},
 }
 
 func TestReader(t *testing.T) {
@@ -1057,3 +1061,65 @@ func TestParsePAXRecord(t *testing.T) {
 		}
 	}
 }
+
+func TestParseNumeric(t *testing.T) {
+	var vectors = []struct {
+		input  string
+		output int64
+		ok     bool
+	}{
+		// Test base-256 (binary) encoded values.
+		{"", 0, true},
+		{"\x80", 0, true},
+		{"\x80\x00", 0, true},
+		{"\x80\x00\x00", 0, true},
+		{"\xbf", (1 << 6) - 1, true},
+		{"\xbf\xff", (1 << 14) - 1, true},
+		{"\xbf\xff\xff", (1 << 22) - 1, true},
+		{"\xff", -1, true},
+		{"\xff\xff", -1, true},
+		{"\xff\xff\xff", -1, true},
+		{"\xc0", -1 * (1 << 6), true},
+		{"\xc0\x00", -1 * (1 << 14), true},
+		{"\xc0\x00\x00", -1 * (1 << 22), true},
+		{"\x87\x76\xa2\x22\xeb\x8a\x72\x61", 537795476381659745, true},
+		{"\x80\x00\x00\x00\x07\x76\xa2\x22\xeb\x8a\x72\x61", 537795476381659745, true},
+		{"\xf7\x76\xa2\x22\xeb\x8a\x72\x61", -615126028225187231, true},
+		{"\xff\xff\xff\xff\xf7\x76\xa2\x22\xeb\x8a\x72\x61", -615126028225187231, true},
+		{"\x80\x7f\xff\xff\xff\xff\xff\xff\xff", math.MaxInt64, true},
+		{"\x80\x80\x00\x00\x00\x00\x00\x00\x00", 0, false},
+		{"\xff\x80\x00\x00\x00\x00\x00\x00\x00", math.MinInt64, true},
+		{"\xff\x7f\xff\xff\xff\xff\xff\xff\xff", 0, false},
+		{"\xf5\xec\xd1\xc7\x7e\x5f\x26\x48\x81\x9f\x8f\x9b", 0, false},
+
+		// Test base-8 (octal) encoded values.
+		{"0000000\x00", 0, true},
+		{" \x0000000\x00", 0, true},
+		{" \x0000003\x00", 3, true},
+		{"00000000227\x00", 0227, true},
+		{"032033\x00 ", 032033, true},
+		{"320330\x00 ", 0320330, true},
+		{"0000660\x00 ", 0660, true},
+		{"\x00 0000660\x00 ", 0660, true},
+		{"0123456789abcdef", 0, false},
+		{"0123456789\x00abcdef", 0, false},
+		{"01234567\x0089abcdef", 342391, true},
+		{"0123\x7e\x5f\x264123", 0, false},
+	}
+
+	for _, v := range vectors {
+		var p parser
+		num := p.parseNumeric([]byte(v.input))
+		ok := (p.err == nil)
+		if v.ok != ok {
+			if v.ok {
+				t.Errorf("parseNumeric(%q): got parsing failure, want success", v.input)
+			} else {
+				t.Errorf("parseNumeric(%q): got parsing success, want failure", v.input)
+			}
+		}
+		if ok && num != v.output {
+			t.Errorf("parseNumeric(%q): got %d, want %d", v.input, num, v.output)
+		}
+	}
+}
diff --git a/archive/tar/testdata/issue12435.tar b/archive/tar/testdata/issue12435.tar
new file mode 100644
index 0000000000000000000000000000000000000000..3542dd8efd5d486b99ae03f39a56860af1c09af0
GIT binary patch
literal 512
lcmZQzpgs7{2(bf|=G;FWht&p9;C+0@6=oc2Mun*p0sxa^2!Q|q

literal 0
HcmV?d00001


From 962540fec3dc41e7256a85182b22926921231518 Mon Sep 17 00:00:00 2001
From: Joe Tsai <joetsai@digital-static.net>
Date: Wed, 16 Dec 2015 11:26:26 -0800
Subject: [PATCH 15/17] archive/tar: spell license correctly in example

Change-Id: Ice85d161f026a991953bd63ecc6ec80f8d06dfbd
Reviewed-on: https://go-review.googlesource.com/17901
Run-TryBot: Joe Tsai <joetsai@digital-static.net>
Reviewed-by: Brad Fitzpatrick <bradfitz@golang.org>
---
 archive/tar/example_test.go | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/archive/tar/example_test.go b/archive/tar/example_test.go
index 2317f44..5f0ce2f 100644
--- a/archive/tar/example_test.go
+++ b/archive/tar/example_test.go
@@ -26,7 +26,7 @@ func Example() {
 	}{
 		{"readme.txt", "This archive contains some text files."},
 		{"gopher.txt", "Gopher names:\nGeorge\nGeoffrey\nGonzo"},
-		{"todo.txt", "Get animal handling licence."},
+		{"todo.txt", "Get animal handling license."},
 	}
 	for _, file := range files {
 		hdr := &tar.Header{
@@ -76,5 +76,5 @@ func Example() {
 	// Geoffrey
 	// Gonzo
 	// Contents of todo.txt:
-	// Get animal handling licence.
+	// Get animal handling license.
 }

From 10db8408f660956a312a7fd9a5b8d0f74175e8ab Mon Sep 17 00:00:00 2001
From: Joe Tsai <joetsai@digital-static.net>
Date: Wed, 16 Dec 2015 23:10:14 -0800
Subject: [PATCH 16/17] archive/tar: document how Reader.Read handles
 header-only files

Commit dd5e14a7511465d20c6e95bf54c9b8f999abbbf6 ensured that no data
could be read for header-only files regardless of what the Header.Size
said. We should document this fact in Reader.Read.

Updates #13647

Change-Id: I4df9a2892bc66b49e0279693d08454bf696cfa31
Reviewed-on: https://go-review.googlesource.com/17913
Reviewed-by: Russ Cox <rsc@golang.org>
---
 archive/tar/reader.go | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/archive/tar/reader.go b/archive/tar/reader.go
index 6e77cbe..a8b63a2 100644
--- a/archive/tar/reader.go
+++ b/archive/tar/reader.go
@@ -928,6 +928,10 @@ func (tr *Reader) numBytes() int64 {
 // Read reads from the current entry in the tar archive.
 // It returns 0, io.EOF when it reaches the end of that entry,
 // until Next is called to advance to the next entry.
+//
+// Calling Read on special types like TypeLink, TypeSymLink, TypeChar,
+// TypeBlock, TypeDir, and TypeFifo returns 0, io.EOF regardless of what
+// the Header.Size claims.
 func (tr *Reader) Read(b []byte) (n int, err error) {
 	if tr.err != nil {
 		return 0, tr.err

From c32966b9e8c3b429d6c7999ab2037bd537d60420 Mon Sep 17 00:00:00 2001
From: Vincent Batts <vbatts@hashbangbash.com>
Date: Mon, 15 Feb 2016 09:38:46 -0500
Subject: [PATCH 17/17] archive/tar: go1.3 and go1.4 compatibility

Signed-off-by: Vincent Batts <vbatts@hashbangbash.com>
---
 archive/tar/reader_test.go | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/archive/tar/reader_test.go b/archive/tar/reader_test.go
index 7b148b5..821b4f0 100644
--- a/archive/tar/reader_test.go
+++ b/archive/tar/reader_test.go
@@ -344,7 +344,6 @@ func TestReader(t *testing.T) {
 			tr      = NewReader(f)
 			hdrs    []*Header
 			chksums []string
-			rdbuf   = make([]byte, 8)
 		)
 		for {
 			var hdr *Header
@@ -361,7 +360,7 @@ func TestReader(t *testing.T) {
 				continue
 			}
 			h := md5.New()
-			_, err = io.CopyBuffer(h, tr, rdbuf) // Effectively an incremental read
+			_, err = io.Copy(h, tr) // Effectively an incremental read
 			if err != nil {
 				break
 			}