From e33913bf758ae6e960d3802ccbc25201b6a245f8 Mon Sep 17 00:00:00 2001
From: Vincent Batts <vbatts@hashbangbash.com>
Date: Wed, 15 Jul 2015 13:43:48 -0400
Subject: [PATCH 01/95] tar/asm: don't defer file closing

this `for {}` can read many files. defering the file handle close can
cause an EMFILE (too many open files).
---
 tar/asm/assemble.go | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/tar/asm/assemble.go b/tar/asm/assemble.go
index ec15612..d18bfc5 100644
--- a/tar/asm/assemble.go
+++ b/tar/asm/assemble.go
@@ -44,10 +44,10 @@ func NewOutputTarStream(fg storage.FileGetter, up storage.Unpacker) io.ReadClose
 					pw.CloseWithError(err)
 					break
 				}
-				defer fh.Close()
 				c := crc64.New(storage.CRCTable)
 				tRdr := io.TeeReader(fh, c)
 				if _, err := io.Copy(pw, tRdr); err != nil {
+					fh.Close()
 					pw.CloseWithError(err)
 					break
 				}
@@ -55,9 +55,11 @@ func NewOutputTarStream(fg storage.FileGetter, up storage.Unpacker) io.ReadClose
 					// I would rather this be a comparable ErrInvalidChecksum or such,
 					// but since it's coming through the PipeReader, the context of
 					// _which_ file would be lost...
+					fh.Close()
 					pw.CloseWithError(fmt.Errorf("file integrity checksum failed for %q", entry.Name))
 					break
 				}
+				fh.Close()
 			}
 		}
 		pw.Close()

From 6094dcaecab45e4ce00583d1ae52f777896b4f69 Mon Sep 17 00:00:00 2001
From: Vincent Batts <vbatts@hashbangbash.com>
Date: Mon, 20 Jul 2015 15:47:10 -0400
Subject: [PATCH 02/95] concept: move the PoC out of the root directory

---
 main.go => concept/main.go | 0
 1 file changed, 0 insertions(+), 0 deletions(-)
 rename main.go => concept/main.go (100%)

diff --git a/main.go b/concept/main.go
similarity index 100%
rename from main.go
rename to concept/main.go

From fd84b2fdfd88435e133e4aba59a3facded3cc01d Mon Sep 17 00:00:00 2001
From: Vincent Batts <vbatts@hashbangbash.com>
Date: Mon, 20 Jul 2015 15:51:20 -0400
Subject: [PATCH 03/95] cmd/tar-split: adding a cli tool for asm/disasm

---
 cmd/tar-split/README.md |  25 ++++++
 cmd/tar-split/main.go   | 175 ++++++++++++++++++++++++++++++++++++++++
 2 files changed, 200 insertions(+)
 create mode 100644 cmd/tar-split/README.md
 create mode 100644 cmd/tar-split/main.go

diff --git a/cmd/tar-split/README.md b/cmd/tar-split/README.md
new file mode 100644
index 0000000..5451be0
--- /dev/null
+++ b/cmd/tar-split/README.md
@@ -0,0 +1,25 @@
+## tar-split utility
+
+
+## Usage
+
+### Disassembly
+
+```bash
+$ sha256sum archive.tar 
+d734a748db93ec873392470510b8a1c88929abd8fae2540dc43d5b26f7537868  archive.tar
+$ mkdir ./x
+$ tar-split d --output tar-data.json.gz ./archive.tar | tar -C ./x -x
+time="2015-07-20T15:45:04-04:00" level=info msg="created tar-data.json.gz from ./archive.tar (read 204800 bytes)"
+```
+
+### Assembly
+
+```bash
+$ tar-split a --output new.tar --input ./tar-data.json.gz  --path ./x/
+INFO[0000] created new.tar from ./x/ and ./tar-data.json.gz (wrote 204800 bytes)
+$ sha256sum new.tar 
+d734a748db93ec873392470510b8a1c88929abd8fae2540dc43d5b26f7537868  new.tar
+```
+
+
diff --git a/cmd/tar-split/main.go b/cmd/tar-split/main.go
new file mode 100644
index 0000000..8c631b0
--- /dev/null
+++ b/cmd/tar-split/main.go
@@ -0,0 +1,175 @@
+// go:generate git tag | tail -1
+package main
+
+import (
+	"compress/gzip"
+	"io"
+	"os"
+
+	"github.com/Sirupsen/logrus"
+	"github.com/codegangsta/cli"
+	"github.com/vbatts/tar-split/tar/asm"
+	"github.com/vbatts/tar-split/tar/storage"
+)
+
+func main() {
+	app := cli.NewApp()
+	app.Name = "tar-split"
+	app.Usage = "tar assembly and disassembly utility"
+	app.Version = "0.9.2"
+	app.Author = "Vincent Batts"
+	app.Email = "vbatts@hashbangbash.com"
+	app.Action = cli.ShowAppHelp
+	app.Before = func(c *cli.Context) error {
+		logrus.SetOutput(os.Stderr)
+		if c.Bool("debug") {
+			logrus.SetLevel(logrus.DebugLevel)
+		}
+		return nil
+	}
+	app.Flags = []cli.Flag{
+		cli.BoolFlag{
+			Name:  "debug, D",
+			Usage: "debug output",
+			// defaults to false
+		},
+	}
+	app.Commands = []cli.Command{
+		{
+			Name:    "disasm",
+			Aliases: []string{"d"},
+			Usage:   "disassemble the input tar stream",
+			Action:  CommandDisasm,
+			Flags: []cli.Flag{
+				cli.StringFlag{
+					Name:  "output",
+					Value: "tar-data.json.gz",
+					Usage: "output of disassembled tar stream",
+				},
+			},
+		},
+		{
+			Name:    "asm",
+			Aliases: []string{"a"},
+			Usage:   "assemble tar stream",
+			Action:  CommandAsm,
+			Flags: []cli.Flag{
+				cli.StringFlag{
+					Name:  "input",
+					Value: "tar-data.json.gz",
+					Usage: "input of disassembled tar stream",
+				},
+				cli.StringFlag{
+					Name:  "output",
+					Value: "-",
+					Usage: "reassembled tar archive",
+				},
+				cli.StringFlag{
+					Name:  "path",
+					Value: "",
+					Usage: "relative path of extracted tar",
+				},
+			},
+		},
+	}
+
+	if err := app.Run(os.Args); err != nil {
+		logrus.Fatal(err)
+	}
+}
+
+func CommandDisasm(c *cli.Context) {
+	if len(c.Args()) != 1 {
+		logrus.Fatalf("please specify tar to be disabled <NAME|->")
+	}
+	if len(c.String("output")) == 0 {
+		logrus.Fatalf("--output filename must be set")
+	}
+
+	// Set up the tar input stream
+	var inputStream io.Reader
+	if c.Args()[0] == "-" {
+		inputStream = os.Stdin
+	} else {
+		fh, err := os.Open(c.Args()[0])
+		if err != nil {
+			logrus.Fatal(err)
+		}
+		defer fh.Close()
+		inputStream = fh
+	}
+
+	// Set up the metadata storage
+	mf, err := os.OpenFile(c.String("output"), os.O_CREATE|os.O_WRONLY|os.O_TRUNC, os.FileMode(0600))
+	if err != nil {
+		logrus.Fatal(err)
+	}
+	defer mf.Close()
+	mfz := gzip.NewWriter(mf)
+	defer mfz.Close()
+	metaPacker := storage.NewJSONPacker(mfz)
+
+	// we're passing nil here for the file putter, because the ApplyDiff will
+	// handle the extraction of the archive
+	its, err := asm.NewInputTarStream(inputStream, metaPacker, nil)
+	if err != nil {
+		logrus.Fatal(err)
+	}
+	i, err := io.Copy(os.Stdout, its)
+	if err != nil {
+		logrus.Fatal(err)
+	}
+	logrus.Infof("created %s from %s (read %d bytes)", c.String("output"), c.Args()[0], i)
+}
+
+func CommandAsm(c *cli.Context) {
+	if len(c.Args()) > 0 {
+		logrus.Warnf("%d additional arguments passed are ignored", len(c.Args()))
+	}
+	if len(c.String("input")) == 0 {
+		logrus.Fatalf("--input filename must be set")
+	}
+	if len(c.String("output")) == 0 {
+		logrus.Fatalf("--output filename must be set ([FILENAME|-])")
+	}
+	if len(c.String("path")) == 0 {
+		logrus.Fatalf("--path must be set")
+	}
+
+	var outputStream io.Writer
+	if c.String("output") == "-" {
+		outputStream = os.Stdout
+	} else {
+		fh, err := os.Create(c.String("output"))
+		if err != nil {
+			logrus.Fatal(err)
+		}
+		defer fh.Close()
+		outputStream = fh
+	}
+
+	// Get the tar metadata reader
+	mf, err := os.Open(c.String("input"))
+	if err != nil {
+		logrus.Fatal(err)
+	}
+	defer mf.Close()
+	mfz, err := gzip.NewReader(mf)
+	if err != nil {
+		logrus.Fatal(err)
+	}
+	defer mfz.Close()
+
+	metaUnpacker := storage.NewJSONUnpacker(mfz)
+	// XXX maybe get the absolute path here
+	fileGetter := storage.NewPathFileGetter(c.String("path"))
+
+	ots := asm.NewOutputTarStream(fileGetter, metaUnpacker)
+	defer ots.Close()
+	i, err := io.Copy(outputStream, ots)
+	if err != nil {
+		logrus.Fatal(err)
+	}
+
+	logrus.Infof("created %s from %s and %s (wrote %d bytes)", c.String("output"), c.String("path"), c.String("input"), i)
+}

From 04172717dedf3cb868310a286ab87a62c02a08f1 Mon Sep 17 00:00:00 2001
From: Vincent Batts <vbatts@hashbangbash.com>
Date: Mon, 20 Jul 2015 16:46:22 -0400
Subject: [PATCH 04/95] tar/asm: test for failure when mangling

---
 tar/asm/assemble_test.go | 57 +++++++++++++++++++++++++++++++++++++---
 1 file changed, 53 insertions(+), 4 deletions(-)

diff --git a/tar/asm/assemble_test.go b/tar/asm/assemble_test.go
index 203e716..b8a70ef 100644
--- a/tar/asm/assemble_test.go
+++ b/tar/asm/assemble_test.go
@@ -5,6 +5,7 @@ import (
 	"compress/gzip"
 	"crypto/sha1"
 	"fmt"
+	"hash/crc64"
 	"io"
 	"io/ioutil"
 	"os"
@@ -33,12 +34,36 @@ var entries = []struct {
 			Payload: []byte{126, 72, 89, 239, 230, 252, 160, 187},
 			Size:    26,
 		},
-
 		Body: []byte("café con leche, por favor"),
 	},
 }
+var entriesMangled = []struct {
+	Entry storage.Entry
+	Body  []byte
+}{
+	{
+		Entry: storage.Entry{
+			Type:    storage.FileType,
+			Name:    "./hurr.txt",
+			Payload: []byte{3, 116, 164, 177, 171, 236, 107, 78},
+			Size:    20,
+		},
+		// switch
+		Body: []byte("imma derp til I hurr"),
+	},
+	{
+		Entry: storage.Entry{
+			Type:    storage.FileType,
+			Name:    "./ermahgerd.txt",
+			Payload: []byte{127, 72, 89, 239, 230, 252, 160, 187},
+			Size:    26,
+		},
+		// san not con
+		Body: []byte("café sans leche, por favor"),
+	},
+}
 
-func TestTarStreamOld(t *testing.T) {
+func TestTarStreamMangledGetterPutter(t *testing.T) {
 	fgp := storage.NewBufferFileGetPutter()
 
 	// first lets prep a GetPutter and Packer
@@ -63,9 +88,33 @@ func TestTarStreamOld(t *testing.T) {
 		}
 	}
 
+	for _, e := range entriesMangled {
+		if e.Entry.Type == storage.FileType {
+			rdr, err := fgp.Get(e.Entry.Name)
+			if err != nil {
+				t.Error(err)
+			}
+			c := crc64.New(storage.CRCTable)
+			i, err := io.Copy(c, rdr)
+			if err != nil {
+				t.Fatal(err)
+			}
+			rdr.Close()
+
+			csum := c.Sum(nil)
+			if !bytes.Equal(csum, e.Entry.Payload) {
+				t.Errorf("wrote %d bytes. checksum %q: expected %v; got %v",
+					i,
+					e.Entry.Name,
+					e.Entry.Payload,
+					csum)
+			}
+		}
+	}
+
+	// TODO test a mangled relative path assembly
 	// next we'll use these to produce a tar stream.
-	_ = NewOutputTarStream(fgp, nil)
-	// TODO finish this
+	//_ = NewOutputTarStream(fgp, nil)
 }
 
 func TestTarStream(t *testing.T) {

From 97acaa9e83a16a2f816754fac034d9796fbc4a35 Mon Sep 17 00:00:00 2001
From: Vincent Batts <vbatts@hashbangbash.com>
Date: Mon, 20 Jul 2015 17:22:10 -0400
Subject: [PATCH 05/95] travis: needing to fetch the cmd dependencies

---
 .travis.yml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/.travis.yml b/.travis.yml
index f700909..bdf0ea7 100644
--- a/.travis.yml
+++ b/.travis.yml
@@ -7,7 +7,7 @@ go:
 sudo: false
 
 # we don't need "go get" here <3
-install: true
+install: go get -d ./...
 
 script:
   - go test -v ./...

From c74af0bae74b4ac842b75ac969a3b86502c55d0d Mon Sep 17 00:00:00 2001
From: Vincent Batts <vbatts@hashbangbash.com>
Date: Mon, 20 Jul 2015 17:26:16 -0400
Subject: [PATCH 06/95] tar/asm: test was flipped

---
 tar/asm/assemble_test.go | 5 ++---
 1 file changed, 2 insertions(+), 3 deletions(-)

diff --git a/tar/asm/assemble_test.go b/tar/asm/assemble_test.go
index b8a70ef..c3bda7a 100644
--- a/tar/asm/assemble_test.go
+++ b/tar/asm/assemble_test.go
@@ -102,11 +102,10 @@ func TestTarStreamMangledGetterPutter(t *testing.T) {
 			rdr.Close()
 
 			csum := c.Sum(nil)
-			if !bytes.Equal(csum, e.Entry.Payload) {
-				t.Errorf("wrote %d bytes. checksum %q: expected %v; got %v",
+			if bytes.Equal(csum, e.Entry.Payload) {
+				t.Errorf("wrote %d bytes. checksum for %q should not have matched! %v",
 					i,
 					e.Entry.Name,
-					e.Entry.Payload,
 					csum)
 			}
 		}

From d3556a05511eb4e0bbc6ca66ea0b2083e1992c04 Mon Sep 17 00:00:00 2001
From: Vincent Batts <vbatts@hashbangbash.com>
Date: Mon, 20 Jul 2015 20:16:42 -0400
Subject: [PATCH 07/95] travis: go1.4.1 -> go1.4.2

---
 .travis.yml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/.travis.yml b/.travis.yml
index bdf0ea7..fc1571c 100644
--- a/.travis.yml
+++ b/.travis.yml
@@ -1,6 +1,6 @@
 language: go
 go:
-  - 1.4.1
+  - 1.4.2
   - 1.3.3
 
 # let us have pretty, fast Docker-based Travis workers!

From 6d59e7bc76156496c4c7b30ddcd9364b592100a0 Mon Sep 17 00:00:00 2001
From: Vincent Batts <vbatts@hashbangbash.com>
Date: Tue, 21 Jul 2015 12:08:57 -0400
Subject: [PATCH 08/95] tar/asm: clean up return on errors

This closure on error message needs returns so that the error message is
bubbled up to the reader.
---
 tar/asm/disassemble.go | 10 ++++++++--
 1 file changed, 8 insertions(+), 2 deletions(-)

diff --git a/tar/asm/disassemble.go b/tar/asm/disassemble.go
index de25db0..785e194 100644
--- a/tar/asm/disassemble.go
+++ b/tar/asm/disassemble.go
@@ -61,6 +61,7 @@ func NewInputTarStream(r io.Reader, p storage.Packer, fp storage.FilePutter) (io
 				})
 				if err != nil {
 					pW.CloseWithError(err)
+					return
 				}
 				break // not return. We need the end of the reader.
 			}
@@ -73,6 +74,7 @@ func NewInputTarStream(r io.Reader, p storage.Packer, fp storage.FilePutter) (io
 				Payload: tr.RawBytes(),
 			}); err != nil {
 				pW.CloseWithError(err)
+				return
 			}
 
 			var csum []byte
@@ -81,6 +83,7 @@ func NewInputTarStream(r io.Reader, p storage.Packer, fp storage.FilePutter) (io
 				_, csum, err = fp.Put(hdr.Name, tr)
 				if err != nil {
 					pW.CloseWithError(err)
+					return
 				}
 			}
 
@@ -93,6 +96,7 @@ func NewInputTarStream(r io.Reader, p storage.Packer, fp storage.FilePutter) (io
 			})
 			if err != nil {
 				pW.CloseWithError(err)
+				return
 			}
 
 			if b := tr.RawBytes(); len(b) > 0 {
@@ -102,6 +106,7 @@ func NewInputTarStream(r io.Reader, p storage.Packer, fp storage.FilePutter) (io
 				})
 				if err != nil {
 					pW.CloseWithError(err)
+					return
 				}
 			}
 		}
@@ -111,6 +116,7 @@ func NewInputTarStream(r io.Reader, p storage.Packer, fp storage.FilePutter) (io
 		remainder, err := ioutil.ReadAll(outputRdr)
 		if err != nil && err != io.EOF {
 			pW.CloseWithError(err)
+			return
 		}
 		_, err = p.AddEntry(storage.Entry{
 			Type:    storage.SegmentType,
@@ -118,9 +124,9 @@ func NewInputTarStream(r io.Reader, p storage.Packer, fp storage.FilePutter) (io
 		})
 		if err != nil {
 			pW.CloseWithError(err)
-		} else {
-			pW.Close()
+			return
 		}
+		pW.Close()
 	}()
 
 	return pR, nil

From c2c2dde4cbcb1db413c244c22ea189a60722ae2f Mon Sep 17 00:00:00 2001
From: Vincent Batts <vbatts@hashbangbash.com>
Date: Wed, 22 Jul 2015 10:27:53 -0400
Subject: [PATCH 09/95] tar/storage: use `filepath` instead of `path`

---
 tar/storage/getter.go | 13 ++++++-------
 tar/storage/packer.go |  6 +++---
 2 files changed, 9 insertions(+), 10 deletions(-)

diff --git a/tar/storage/getter.go b/tar/storage/getter.go
index 5d46e6a..c44b15e 100644
--- a/tar/storage/getter.go
+++ b/tar/storage/getter.go
@@ -7,23 +7,23 @@ import (
 	"io"
 	"io/ioutil"
 	"os"
-	"path"
+	"path/filepath"
 )
 
 // FileGetter is the interface for getting a stream of a file payload, address
-// by name/filepath. Presumably, the names will be scoped to relative file
+// by name/filename. Presumably, the names will be scoped to relative file
 // paths.
 type FileGetter interface {
 	// Get returns a stream for the provided file path
-	Get(filepath string) (output io.ReadCloser, err error)
+	Get(filename string) (output io.ReadCloser, err error)
 }
 
 // FilePutter is the interface for storing a stream of a file payload,
-// addressed by name/filepath.
+// addressed by name/filename.
 type FilePutter interface {
 	// Put returns the size of the stream received, and the crc64 checksum for
 	// the provided stream
-	Put(filepath string, input io.Reader) (size int64, checksum []byte, err error)
+	Put(filename string, input io.Reader) (size int64, checksum []byte, err error)
 }
 
 // FileGetPutter is the interface that groups both Getting and Putting file
@@ -44,8 +44,7 @@ type pathFileGetter struct {
 }
 
 func (pfg pathFileGetter) Get(filename string) (io.ReadCloser, error) {
-	// FIXME might should have a check for '../../../../etc/passwd' attempts?
-	return os.Open(path.Join(pfg.root, filename))
+	return os.Open(filepath.Join(pfg.root, filename))
 }
 
 type bufferFileGetPutter struct {
diff --git a/tar/storage/packer.go b/tar/storage/packer.go
index 6c4364b..c0070a6 100644
--- a/tar/storage/packer.go
+++ b/tar/storage/packer.go
@@ -5,7 +5,7 @@ import (
 	"encoding/json"
 	"errors"
 	"io"
-	"path"
+	"path/filepath"
 )
 
 // ErrDuplicatePath is occured when a tar archive has more than one entry for
@@ -61,7 +61,7 @@ func (jup *jsonUnpacker) Next() (*Entry, error) {
 
 	// check for dup name
 	if e.Type == FileType {
-		cName := path.Clean(e.Name)
+		cName := filepath.Clean(e.Name)
 		if _, ok := jup.seen[cName]; ok {
 			return nil, ErrDuplicatePath
 		}
@@ -99,7 +99,7 @@ const emptyByte byte = 0
 func (jp *jsonPacker) AddEntry(e Entry) (int, error) {
 	// check early for dup name
 	if e.Type == FileType {
-		cName := path.Clean(e.Name)
+		cName := filepath.Clean(e.Name)
 		if _, ok := jp.seen[cName]; ok {
 			return -1, ErrDuplicatePath
 		}

From e0e9886972e2ab8ae9190428d1d3030c5c80c483 Mon Sep 17 00:00:00 2001
From: Vincent Batts <vbatts@hashbangbash.com>
Date: Wed, 22 Jul 2015 11:32:18 -0400
Subject: [PATCH 10/95] tar/asm: return instead of break

https://github.com/vbatts/docker/commit/5ddec2ae4a74552cb358bae90e272398a957cfc4#commitcomment-12290378

Reported-by: Tibor Vass <tibor@docker.com>
---
 tar/asm/assemble.go      | 10 +++++-----
 tar/asm/assemble_test.go |  9 +--------
 2 files changed, 6 insertions(+), 13 deletions(-)

diff --git a/tar/asm/assemble.go b/tar/asm/assemble.go
index d18bfc5..1bef97b 100644
--- a/tar/asm/assemble.go
+++ b/tar/asm/assemble.go
@@ -27,13 +27,13 @@ func NewOutputTarStream(fg storage.FileGetter, up storage.Unpacker) io.ReadClose
 			entry, err := up.Next()
 			if err != nil {
 				pw.CloseWithError(err)
-				break
+				return
 			}
 			switch entry.Type {
 			case storage.SegmentType:
 				if _, err := pw.Write(entry.Payload); err != nil {
 					pw.CloseWithError(err)
-					break
+					return
 				}
 			case storage.FileType:
 				if entry.Size == 0 {
@@ -42,14 +42,14 @@ func NewOutputTarStream(fg storage.FileGetter, up storage.Unpacker) io.ReadClose
 				fh, err := fg.Get(entry.Name)
 				if err != nil {
 					pw.CloseWithError(err)
-					break
+					return
 				}
 				c := crc64.New(storage.CRCTable)
 				tRdr := io.TeeReader(fh, c)
 				if _, err := io.Copy(pw, tRdr); err != nil {
 					fh.Close()
 					pw.CloseWithError(err)
-					break
+					return
 				}
 				if !bytes.Equal(c.Sum(nil), entry.Payload) {
 					// I would rather this be a comparable ErrInvalidChecksum or such,
@@ -57,7 +57,7 @@ func NewOutputTarStream(fg storage.FileGetter, up storage.Unpacker) io.ReadClose
 					// _which_ file would be lost...
 					fh.Close()
 					pw.CloseWithError(fmt.Errorf("file integrity checksum failed for %q", entry.Name))
-					break
+					return
 				}
 				fh.Close()
 			}
diff --git a/tar/asm/assemble_test.go b/tar/asm/assemble_test.go
index c3bda7a..7cf44dc 100644
--- a/tar/asm/assemble_test.go
+++ b/tar/asm/assemble_test.go
@@ -110,10 +110,6 @@ func TestTarStreamMangledGetterPutter(t *testing.T) {
 			}
 		}
 	}
-
-	// TODO test a mangled relative path assembly
-	// next we'll use these to produce a tar stream.
-	//_ = NewOutputTarStream(fgp, nil)
 }
 
 func TestTarStream(t *testing.T) {
@@ -171,10 +167,7 @@ func TestTarStream(t *testing.T) {
 
 	rc := NewOutputTarStream(fgp, sup)
 	h1 := sha1.New()
-	tRdr1 := io.TeeReader(rc, h1)
-
-	// read it all to the bit bucket
-	i, err = io.Copy(ioutil.Discard, tRdr1)
+	i, err = io.Copy(h1, rc)
 	if err != nil {
 		t.Fatal(err)
 	}

From 002d19f0b0eaac11e4e811097dae981f3cd64331 Mon Sep 17 00:00:00 2001
From: Jonathan Boulle <jonathanboulle@gmail.com>
Date: Tue, 23 Jun 2015 13:13:29 -0700
Subject: [PATCH 11/95] *: clean up assorted spelling/grammar issues

Various minor fixes noticed on walking through
---
 tar/asm/assemble.go    | 2 +-
 tar/asm/disassemble.go | 6 +++---
 tar/storage/doc.go     | 2 +-
 tar/storage/entry.go   | 6 +++---
 tar/storage/getter.go  | 8 ++++----
 tar/storage/packer.go  | 6 +++---
 6 files changed, 15 insertions(+), 15 deletions(-)

diff --git a/tar/asm/assemble.go b/tar/asm/assemble.go
index 1bef97b..b421db0 100644
--- a/tar/asm/assemble.go
+++ b/tar/asm/assemble.go
@@ -9,7 +9,7 @@ import (
 	"github.com/vbatts/tar-split/tar/storage"
 )
 
-// NewOutputTarStream returns an io.ReadCloser that is an assemble tar archive
+// NewOutputTarStream returns an io.ReadCloser that is an assembled tar archive
 // stream.
 //
 // It takes a storage.FileGetter, for mapping the file payloads that are to be read in,
diff --git a/tar/asm/disassemble.go b/tar/asm/disassemble.go
index 785e194..4a8ed94 100644
--- a/tar/asm/disassemble.go
+++ b/tar/asm/disassemble.go
@@ -22,8 +22,8 @@ func NewInputTarStream(r io.Reader, p storage.Packer, fp storage.FilePutter) (io
 	// What to do here... folks will want their own access to the Reader that is
 	// their tar archive stream, but we'll need that same stream to use our
 	// forked 'archive/tar'.
-	// Perhaps do an io.TeeReader that hand back an io.Reader for them to read
-	// from, and we'll mitm the stream to store metadata.
+	// Perhaps do an io.TeeReader that hands back an io.Reader for them to read
+	// from, and we'll MITM the stream to store metadata.
 	// We'll need a storage.FilePutter too ...
 
 	// Another concern, whether to do any storage.FilePutter operations, such that we
@@ -32,7 +32,7 @@ func NewInputTarStream(r io.Reader, p storage.Packer, fp storage.FilePutter) (io
 	// Perhaps we have a DiscardFilePutter that is a bit bucket.
 
 	// we'll return the pipe reader, since TeeReader does not buffer and will
-	// only read what the outputRdr Read's. Since Tar archive's have padding on
+	// only read what the outputRdr Read's. Since Tar archives have padding on
 	// the end, we want to be the one reading the padding, even if the user's
 	// `archive/tar` doesn't care.
 	pR, pW := io.Pipe()
diff --git a/tar/storage/doc.go b/tar/storage/doc.go
index 57b61bc..83f7089 100644
--- a/tar/storage/doc.go
+++ b/tar/storage/doc.go
@@ -5,7 +5,7 @@ Packing and unpacking the Entries of the stream. The types of streams are
 either segments of raw bytes (for the raw headers and various padding) and for
 an entry marking a file payload.
 
-The raw bytes are stored precisely in the packed (marshalled) Entry. Where as
+The raw bytes are stored precisely in the packed (marshalled) Entry, whereas
 the file payload marker include the name of the file, size, and crc64 checksum
 (for basic file integrity).
 */
diff --git a/tar/storage/entry.go b/tar/storage/entry.go
index 961af49..57a0256 100644
--- a/tar/storage/entry.go
+++ b/tar/storage/entry.go
@@ -19,11 +19,11 @@ const (
 	// SegmentType represents a raw bytes segment from the archive stream. These raw
 	// byte segments consist of the raw headers and various padding.
 	//
-	// It's payload is to be marshalled base64 encoded.
+	// Its payload is to be marshalled base64 encoded.
 	SegmentType
 )
 
-// Entry is a the structure for packing and unpacking the information read from
+// Entry is the structure for packing and unpacking the information read from
 // the Tar archive.
 //
 // FileType Payload checksum is using `hash/crc64` for basic file integrity,
@@ -34,6 +34,6 @@ type Entry struct {
 	Type     Type   `json:"type"`
 	Name     string `json:"name",omitempty`
 	Size     int64  `json:"size",omitempty`
-	Payload  []byte `json:"payload"` // SegmentType store payload here; FileType store crc64 checksum here;
+	Payload  []byte `json:"payload"` // SegmentType stores payload here; FileType stores crc64 checksum here;
 	Position int    `json:"position"`
 }
diff --git a/tar/storage/getter.go b/tar/storage/getter.go
index c44b15e..ae110c6 100644
--- a/tar/storage/getter.go
+++ b/tar/storage/getter.go
@@ -10,9 +10,9 @@ import (
 	"path/filepath"
 )
 
-// FileGetter is the interface for getting a stream of a file payload, address
-// by name/filename. Presumably, the names will be scoped to relative file
-// paths.
+// FileGetter is the interface for getting a stream of a file payload,
+// addressed by name/filename. Presumably, the names will be scoped to relative
+// file paths.
 type FileGetter interface {
 	// Get returns a stream for the provided file path
 	Get(filename string) (output io.ReadCloser, err error)
@@ -77,7 +77,7 @@ type readCloserWrapper struct {
 
 func (w *readCloserWrapper) Close() error { return nil }
 
-// NewBufferFileGetPutter is simple in memory FileGetPutter
+// NewBufferFileGetPutter is a simple in-memory FileGetPutter
 //
 // Implication is this is memory intensive...
 // Probably best for testing or light weight cases.
diff --git a/tar/storage/packer.go b/tar/storage/packer.go
index c0070a6..584978e 100644
--- a/tar/storage/packer.go
+++ b/tar/storage/packer.go
@@ -8,8 +8,8 @@ import (
 	"path/filepath"
 )
 
-// ErrDuplicatePath is occured when a tar archive has more than one entry for
-// the same file path
+// ErrDuplicatePath occurs when a tar archive has more than one entry for the
+// same file path
 var ErrDuplicatePath = errors.New("duplicates of file paths not supported")
 
 // Packer describes the methods to pack Entries to a storage destination
@@ -117,7 +117,7 @@ func (jp *jsonPacker) AddEntry(e Entry) (int, error) {
 	return e.Position, nil
 }
 
-// NewJSONPacker provides an Packer that writes each Entry (SegmentType and
+// NewJSONPacker provides a Packer that writes each Entry (SegmentType and
 // FileType) as a json document.
 //
 // The Entries are delimited by new line.

From caf6a872c9ac84f4c0e57d4170a6eaca6a07450e Mon Sep 17 00:00:00 2001
From: Jonathan Boulle <jonathanboulle@gmail.com>
Date: Tue, 23 Jun 2015 13:13:54 -0700
Subject: [PATCH 12/95] tar/storage: switch to map[string]struct{} for set

Using an empty struct is more idiomatic/efficient for representing a
set-like container.
---
 tar/storage/packer.go | 10 +++-------
 1 file changed, 3 insertions(+), 7 deletions(-)

diff --git a/tar/storage/packer.go b/tar/storage/packer.go
index 584978e..a02a19a 100644
--- a/tar/storage/packer.go
+++ b/tar/storage/packer.go
@@ -65,7 +65,7 @@ func (jup *jsonUnpacker) Next() (*Entry, error) {
 		if _, ok := jup.seen[cName]; ok {
 			return nil, ErrDuplicatePath
 		}
-		jup.seen[cName] = emptyByte
+		jup.seen[cName] = struct{}{}
 	}
 
 	return &e, err
@@ -90,11 +90,7 @@ type jsonPacker struct {
 	seen seenNames
 }
 
-type seenNames map[string]byte
-
-// used in the seenNames map. byte is a uint8, and we'll re-use the same one
-// for minimalism.
-const emptyByte byte = 0
+type seenNames map[string]struct{}
 
 func (jp *jsonPacker) AddEntry(e Entry) (int, error) {
 	// check early for dup name
@@ -103,7 +99,7 @@ func (jp *jsonPacker) AddEntry(e Entry) (int, error) {
 		if _, ok := jp.seen[cName]; ok {
 			return -1, ErrDuplicatePath
 		}
-		jp.seen[cName] = emptyByte
+		jp.seen[cName] = struct{}{}
 	}
 
 	e.Position = jp.pos

From de37d1755a80f132275a05bec7c5cd4b67f2bbc9 Mon Sep 17 00:00:00 2001
From: Vincent Batts <vbatts@hashbangbash.com>
Date: Tue, 28 Jul 2015 15:45:24 -0400
Subject: [PATCH 13/95] travis: incorrect comment

---
 .travis.yml | 1 -
 1 file changed, 1 deletion(-)

diff --git a/.travis.yml b/.travis.yml
index fc1571c..21d6684 100644
--- a/.travis.yml
+++ b/.travis.yml
@@ -6,7 +6,6 @@ go:
 # let us have pretty, fast Docker-based Travis workers!
 sudo: false
 
-# we don't need "go get" here <3
 install: go get -d ./...
 
 script:

From f465e4720e45ad7c6f4849bd74af7e27fefabc18 Mon Sep 17 00:00:00 2001
From: Vincent Batts <vbatts@hashbangbash.com>
Date: Tue, 28 Jul 2015 17:16:04 -0400
Subject: [PATCH 14/95] cmd/tar-split: adding to the README

---
 cmd/tar-split/README.md | 9 ++++++---
 1 file changed, 6 insertions(+), 3 deletions(-)

diff --git a/cmd/tar-split/README.md b/cmd/tar-split/README.md
index 5451be0..eae23c2 100644
--- a/cmd/tar-split/README.md
+++ b/cmd/tar-split/README.md
@@ -1,5 +1,8 @@
-## tar-split utility
+# tar-split utility
 
+## Installation
+
+	go get -u github.com/vbatts/tar-split/cmd/tar-split
 
 ## Usage
 
@@ -9,14 +12,14 @@
 $ sha256sum archive.tar 
 d734a748db93ec873392470510b8a1c88929abd8fae2540dc43d5b26f7537868  archive.tar
 $ mkdir ./x
-$ tar-split d --output tar-data.json.gz ./archive.tar | tar -C ./x -x
+$ tar-split disasm --output tar-data.json.gz ./archive.tar | tar -C ./x -x
 time="2015-07-20T15:45:04-04:00" level=info msg="created tar-data.json.gz from ./archive.tar (read 204800 bytes)"
 ```
 
 ### Assembly
 
 ```bash
-$ tar-split a --output new.tar --input ./tar-data.json.gz  --path ./x/
+$ tar-split asm --output new.tar --input ./tar-data.json.gz  --path ./x/
 INFO[0000] created new.tar from ./x/ and ./tar-data.json.gz (wrote 204800 bytes)
 $ sha256sum new.tar 
 d734a748db93ec873392470510b8a1c88929abd8fae2540dc43d5b26f7537868  new.tar

From 779e824d762aa1d96a5845f86c0fe702c6bd29d0 Mon Sep 17 00:00:00 2001
From: Vincent Batts <vbatts@hashbangbash.com>
Date: Mon, 10 Aug 2015 15:24:51 -0400
Subject: [PATCH 15/95] README: formatting and cleanup

---
 DESIGN.md         | 36 -----------------
 README.md         | 98 ++++++-----------------------------------------
 concept/DESIGN.md | 94 +++++++++++++++++++++++++++++++++++++++++++++
 3 files changed, 106 insertions(+), 122 deletions(-)
 delete mode 100644 DESIGN.md
 create mode 100644 concept/DESIGN.md

diff --git a/DESIGN.md b/DESIGN.md
deleted file mode 100644
index 1ce3fd4..0000000
--- a/DESIGN.md
+++ /dev/null
@@ -1,36 +0,0 @@
-Flow of TAR stream
-==================
-
-The underlying use of `github.com/vbatts/tar-split/archive/tar` is most similar
-to stdlib.
-
-
-Packer interface
-----------------
-
-For ease of storage and usage of the raw bytes, there will be a storage
-interface, that accepts an io.Writer (This way you could pass it an in memory
-buffer or a file handle).
-
-Having a Packer interface can allow configuration of hash.Hash for file payloads
-and providing your own io.Writer.
-
-Instead of having a state directory to store all the header information for all
-Readers, we will leave that up to user of Reader. Because we can not assume an
-ID for each Reader, and keeping that information differentiated.
-
-
-
-State Directory
----------------
-
-Perhaps we could deduplicate the header info, by hashing the rawbytes and
-storing them in a directory tree like:
-
-	./ac/dc/beef
-
-Then reference the hash of the header info, in the positional records for the
-tar stream. Though this could be a future feature, and not required for an
-initial implementation. Also, this would imply an owned state directory, rather
-than just writing storage info to an io.Writer.
-
diff --git a/README.md b/README.md
index c5e9a71..fd78ad7 100644
--- a/README.md
+++ b/README.md
@@ -1,5 +1,4 @@
-tar-split
-========
+# tar-split
 
 [![Build Status](https://travis-ci.org/vbatts/tar-split.svg?branch=master)](https://travis-ci.org/vbatts/tar-split)
 
@@ -9,17 +8,13 @@ bytes of the TAR, rather than just the marshalled headers and file stream.
 The goal being that by preserving the raw bytes of each header, padding bytes,
 and the raw file payload, one could reassemble the original archive.
 
-
-Docs
-----
+## Docs
 
 * https://godoc.org/github.com/vbatts/tar-split/tar/asm
 * https://godoc.org/github.com/vbatts/tar-split/tar/storage
 * https://godoc.org/github.com/vbatts/tar-split/archive/tar
 
-
-Caveat
-------
+## Caveat
 
 Eventually this should detect TARs that this is not possible with.
 
@@ -37,85 +32,19 @@ same path, we will not support this feature. If there are more than one entries
 with the same path, expect an err (like `ErrDuplicatePath`) or a resulting tar
 stream that does not validate your original checksum/signature.
 
+## Contract
 
-Contract
---------
+Do not break the API of stdlib `archive/tar` in our fork (ideally find an upstream mergeable solution).
 
-Do not break the API of stdlib `archive/tar` in our fork (ideally find an
-upstream mergeable solution)
-
-
-Std Version
------------
+## Std Version
 
 The version of golang stdlib `archive/tar` is from go1.4.1, and their master branch around [a9dddb53f](https://github.com/golang/go/tree/a9dddb53f)
 
+## Concept
 
-Example
--------
+See the [design](concept/DESIGN.md).
 
-First we'll get an archive to work with. For repeatability, we'll make an
-archive from what you've just cloned:
-
-```
-git archive --format=tar -o tar-split.tar HEAD .
-```
-
-Then build the example main.go:
-
-```
-go build ./main.go
-```
-
-Now run the example over the archive:
-
-```
-$ ./main tar-split.tar
-2015/02/20 15:00:58 writing "tar-split.tar" to "tar-split.tar.out"
-pax_global_header pre: 512 read: 52
-.travis.yml pre: 972 read: 374
-DESIGN.md pre: 650 read: 1131
-LICENSE pre: 917 read: 1075
-README.md pre: 973 read: 4289
-archive/ pre: 831 read: 0
-archive/tar/ pre: 512 read: 0
-archive/tar/common.go pre: 512 read: 7790
-[...]
-tar/storage/entry_test.go pre: 667 read: 1137
-tar/storage/getter.go pre: 911 read: 2741
-tar/storage/getter_test.go pre: 843 read: 1491
-tar/storage/packer.go pre: 557 read: 3141
-tar/storage/packer_test.go pre: 955 read: 3096
-EOF padding: 1512
-Remainder: 512
-Size: 215040; Sum: 215040
-```
-
-*What are we seeing here?* 
-
-* `pre` is the header of a file entry, and potentially the padding from the
-  end of the prior file's payload. Also with particular tar extensions and pax
-  attributes, the header can exceed 512 bytes.
-* `read` is the size of the file payload from the entry
-* `EOF padding` is the expected 1024 null bytes on the end of a tar archive,
-  plus potential padding from the end of the prior file entry's payload
-* `Remainder` is the remaining bytes of an archive. This is typically deadspace
-  as most tar implmentations will return after having reached the end of the
-  1024 null bytes. Though various implementations will include some amount of
-  bytes here, which will affect the checksum of the resulting tar archive,
-  therefore this must be accounted for as well.
-
-Ideally the input tar and output `*.out`, will match:
-
-```
-$ sha1sum tar-split.tar*
-ca9e19966b892d9ad5960414abac01ef585a1e22  tar-split.tar
-ca9e19966b892d9ad5960414abac01ef585a1e22  tar-split.tar.out
-```
-
-
-Stored Metadata
----------------
+## Stored Metadata
 
 Since the raw bytes of the headers and padding are stored, you may be wondering
 what the size implications are. The headers are at least 512 bytes per
@@ -163,8 +92,7 @@ bytes-per-file rate for the storage implications.
 | ~ 1kb per/file | 0.06kb per/file |
 
 
-What's Next?
-------------
+## What's Next?
 
 * More implementations of storage Packer and Unpacker
  - could be a redis or mongo backend
@@ -173,9 +101,7 @@ What's Next?
 * cli tooling to assemble/disassemble a provided tar archive
 * would be interesting to have an assembler stream that implements `io.Seeker`
 
-License
--------
-
-See LICENSE
+## License
 
+See [LICENSE](LICENSE)
 
diff --git a/concept/DESIGN.md b/concept/DESIGN.md
new file mode 100644
index 0000000..4bfa82c
--- /dev/null
+++ b/concept/DESIGN.md
@@ -0,0 +1,94 @@
+# Flow of TAR stream
+
+## `./archive/tar`
+
+The import path `github.com/vbatts/tar-split/archive/tar` is fork of upstream golang stdlib [`archive/tar`](http://golang.org/pkg/archive/tar/).
+It adds plumbing to access raw bytes of the tar stream as the headers and payload are read.
+
+## Packer interface
+
+For ease of storage and usage of the raw bytes, there will be a storage
+interface, that accepts an io.Writer (This way you could pass it an in memory
+buffer or a file handle).
+
+Having a Packer interface can allow configuration of hash.Hash for file payloads
+and providing your own io.Writer.
+
+Instead of having a state directory to store all the header information for all
+Readers, we will leave that up to user of Reader. Because we can not assume an
+ID for each Reader, and keeping that information differentiated.
+
+## State Directory
+
+Perhaps we could deduplicate the header info, by hashing the rawbytes and
+storing them in a directory tree like:
+
+	./ac/dc/beef
+
+Then reference the hash of the header info, in the positional records for the
+tar stream. Though this could be a future feature, and not required for an
+initial implementation. Also, this would imply an owned state directory, rather
+than just writing storage info to an io.Writer.
+
+## Concept Example
+
+First we'll get an archive to work with. For repeatability, we'll make an
+archive from what you've just cloned:
+
+```
+git archive --format=tar -o tar-split.tar HEAD .
+```
+
+Then build the example main.go:
+
+```
+go build ./main.go
+```
+
+Now run the example over the archive:
+
+```
+$ ./main tar-split.tar
+2015/02/20 15:00:58 writing "tar-split.tar" to "tar-split.tar.out"
+pax_global_header pre: 512 read: 52
+.travis.yml pre: 972 read: 374
+DESIGN.md pre: 650 read: 1131
+LICENSE pre: 917 read: 1075
+README.md pre: 973 read: 4289
+archive/ pre: 831 read: 0
+archive/tar/ pre: 512 read: 0
+archive/tar/common.go pre: 512 read: 7790
+[...]
+tar/storage/entry_test.go pre: 667 read: 1137
+tar/storage/getter.go pre: 911 read: 2741
+tar/storage/getter_test.go pre: 843 read: 1491
+tar/storage/packer.go pre: 557 read: 3141
+tar/storage/packer_test.go pre: 955 read: 3096
+EOF padding: 1512
+Remainder: 512
+Size: 215040; Sum: 215040
+```
+
+*What are we seeing here?* 
+
+* `pre` is the header of a file entry, and potentially the padding from the
+  end of the prior file's payload. Also with particular tar extensions and pax
+  attributes, the header can exceed 512 bytes.
+* `read` is the size of the file payload from the entry
+* `EOF padding` is the expected 1024 null bytes on the end of a tar archive,
+  plus potential padding from the end of the prior file entry's payload
+* `Remainder` is the remaining bytes of an archive. This is typically deadspace
+  as most tar implmentations will return after having reached the end of the
+  1024 null bytes. Though various implementations will include some amount of
+  bytes here, which will affect the checksum of the resulting tar archive,
+  therefore this must be accounted for as well.
+
+Ideally the input tar and output `*.out`, will match:
+
+```
+$ sha1sum tar-split.tar*
+ca9e19966b892d9ad5960414abac01ef585a1e22  tar-split.tar
+ca9e19966b892d9ad5960414abac01ef585a1e22  tar-split.tar.out
+```
+
+

From 5d0b967302380b6fb8b06a35070f2ddba5a296ff Mon Sep 17 00:00:00 2001
From: Vincent Batts <vbatts@hashbangbash.com>
Date: Mon, 10 Aug 2015 15:29:08 -0400
Subject: [PATCH 16/95] README: cleanup

---
 README.md | 21 ++++++++++++++++++---
 1 file changed, 18 insertions(+), 3 deletions(-)

diff --git a/README.md b/README.md
index fd78ad7..a8694cf 100644
--- a/README.md
+++ b/README.md
@@ -10,10 +10,20 @@ and the raw file payload, one could reassemble the original archive.
 
 ## Docs
 
+Code API for libraries provided by `tar-split`:
+
 * https://godoc.org/github.com/vbatts/tar-split/tar/asm
 * https://godoc.org/github.com/vbatts/tar-split/tar/storage
 * https://godoc.org/github.com/vbatts/tar-split/archive/tar
 
+## Install
+
+The command line utilitiy is installable via:
+
+```bash
+go get github.com/vbatts/tar-split/cmd/tar-split
+```
+
 ## Caveat
 
 Eventually this should detect TARs that this is not possible with.
@@ -40,7 +50,7 @@ Do not break the API of stdlib `archive/tar` in our fork (ideally find an upstre
 
 The version of golang stdlib `archive/tar` is from go1.4.1, and their master branch around [a9dddb53f](https://github.com/golang/go/tree/a9dddb53f)
 
-## Concept
+## Design
 
 See the [design](concept/DESIGN.md).
 
@@ -52,10 +62,15 @@ file (sometimes more), at least 1024 null bytes on the end, and then various
 padding. This makes for a constant linear growth in the stored metadata, with a
 naive storage implementation.
 
-Reusing our prior example's `tar-split.tar`, let's build the checksize.go example:
+First we'll get an archive to work with. For repeatability, we'll make an
+archive from what you've just cloned:
 
 ```
-go build ./checksize.go
+git archive --format=tar -o tar-split.tar HEAD .
+```
+
+```
+go build ./checksize.go	
 ```
 
 ```

From 6c671d7267b5a9bd7d68dd1c460404fed794b696 Mon Sep 17 00:00:00 2001
From: Vincent Batts <vbatts@hashbangbash.com>
Date: Mon, 10 Aug 2015 16:20:22 -0400
Subject: [PATCH 17/95] cmd/tar-split: make `checksize` a sub-command

Moving it from top-level to the `tar-split` command
---
 README.md                                  |  14 ++-
 cmd/tar-split/asm.go                       |  64 ++++++++++++
 checksize.go => cmd/tar-split/checksize.go |  24 ++---
 cmd/tar-split/disasm.go                    |  56 +++++++++++
 cmd/tar-split/main.go                      | 112 +++------------------
 5 files changed, 149 insertions(+), 121 deletions(-)
 create mode 100644 cmd/tar-split/asm.go
 rename checksize.go => cmd/tar-split/checksize.go (83%)
 create mode 100644 cmd/tar-split/disasm.go

diff --git a/README.md b/README.md
index a8694cf..6c3d032 100644
--- a/README.md
+++ b/README.md
@@ -65,16 +65,13 @@ naive storage implementation.
 First we'll get an archive to work with. For repeatability, we'll make an
 archive from what you've just cloned:
 
-```
+```bash
 git archive --format=tar -o tar-split.tar HEAD .
 ```
 
-```
-go build ./checksize.go	
-```
-
-```
-$ ./checksize ./tar-split.tar
+```bash
+$ go get github.com/vbatts/tar-split/cmd/tar-split
+$ tar-split checksize ./tar-split.tar
 inspecting "tar-split.tar" (size 210k)
  -- number of files: 50
  -- size of metadata uncompressed: 53k
@@ -87,7 +84,7 @@ implications are as little as 3kb.
 
 But let's look at a larger archive, with many files.
 
-```
+```bash
 $ ls -sh ./d.tar
 1.4G ./d.tar
 $ ./checksize ~/d.tar 
@@ -116,6 +113,7 @@ bytes-per-file rate for the storage implications.
 * cli tooling to assemble/disassemble a provided tar archive
 * would be interesting to have an assembler stream that implements `io.Seeker`
 
+
 ## License
 
 See [LICENSE](LICENSE)
diff --git a/cmd/tar-split/asm.go b/cmd/tar-split/asm.go
new file mode 100644
index 0000000..312e54b
--- /dev/null
+++ b/cmd/tar-split/asm.go
@@ -0,0 +1,64 @@
+package main
+
+import (
+	"compress/gzip"
+	"io"
+	"os"
+
+	"github.com/Sirupsen/logrus"
+	"github.com/codegangsta/cli"
+	"github.com/vbatts/tar-split/tar/asm"
+	"github.com/vbatts/tar-split/tar/storage"
+)
+
+func CommandAsm(c *cli.Context) {
+	if len(c.Args()) > 0 {
+		logrus.Warnf("%d additional arguments passed are ignored", len(c.Args()))
+	}
+	if len(c.String("input")) == 0 {
+		logrus.Fatalf("--input filename must be set")
+	}
+	if len(c.String("output")) == 0 {
+		logrus.Fatalf("--output filename must be set ([FILENAME|-])")
+	}
+	if len(c.String("path")) == 0 {
+		logrus.Fatalf("--path must be set")
+	}
+
+	var outputStream io.Writer
+	if c.String("output") == "-" {
+		outputStream = os.Stdout
+	} else {
+		fh, err := os.Create(c.String("output"))
+		if err != nil {
+			logrus.Fatal(err)
+		}
+		defer fh.Close()
+		outputStream = fh
+	}
+
+	// Get the tar metadata reader
+	mf, err := os.Open(c.String("input"))
+	if err != nil {
+		logrus.Fatal(err)
+	}
+	defer mf.Close()
+	mfz, err := gzip.NewReader(mf)
+	if err != nil {
+		logrus.Fatal(err)
+	}
+	defer mfz.Close()
+
+	metaUnpacker := storage.NewJSONUnpacker(mfz)
+	// XXX maybe get the absolute path here
+	fileGetter := storage.NewPathFileGetter(c.String("path"))
+
+	ots := asm.NewOutputTarStream(fileGetter, metaUnpacker)
+	defer ots.Close()
+	i, err := io.Copy(outputStream, ots)
+	if err != nil {
+		logrus.Fatal(err)
+	}
+
+	logrus.Infof("created %s from %s and %s (wrote %d bytes)", c.String("output"), c.String("path"), c.String("input"), i)
+}
diff --git a/checksize.go b/cmd/tar-split/checksize.go
similarity index 83%
rename from checksize.go
rename to cmd/tar-split/checksize.go
index a6d3c08..38f830e 100644
--- a/checksize.go
+++ b/cmd/tar-split/checksize.go
@@ -1,29 +1,25 @@
-// +build ignore
-
 package main
 
 import (
 	"archive/tar"
 	"compress/gzip"
-	"flag"
 	"fmt"
 	"io"
 	"io/ioutil"
 	"log"
 	"os"
 
+	"github.com/Sirupsen/logrus"
+	"github.com/codegangsta/cli"
 	"github.com/vbatts/tar-split/tar/asm"
 	"github.com/vbatts/tar-split/tar/storage"
 )
 
-var (
-	flCleanup = flag.Bool("cleanup", true, "cleanup tempfiles")
-)
-
-func main() {
-	flag.Parse()
-
-	for _, arg := range flag.Args() {
+func CommandChecksize(c *cli.Context) {
+	if len(c.Args()) == 0 {
+		logrus.Fatalf("please specify tar archives to check ('-' will check stdin)")
+	}
+	for _, arg := range c.Args() {
 		fh, err := os.Open(arg)
 		if err != nil {
 			log.Fatal(err)
@@ -40,8 +36,10 @@ func main() {
 			log.Fatal(err)
 		}
 		defer packFh.Close()
-		if *flCleanup {
+		if !c.Bool("work") {
 			defer os.Remove(packFh.Name())
+		} else {
+			fmt.Printf(" -- working file preserved: %s\n", packFh.Name())
 		}
 
 		sp := storage.NewJSONPacker(packFh)
@@ -83,7 +81,7 @@ func main() {
 			log.Fatal(err)
 		}
 		defer gzPackFh.Close()
-		if *flCleanup {
+		if !c.Bool("work") {
 			defer os.Remove(gzPackFh.Name())
 		}
 
diff --git a/cmd/tar-split/disasm.go b/cmd/tar-split/disasm.go
new file mode 100644
index 0000000..b7b0dfe
--- /dev/null
+++ b/cmd/tar-split/disasm.go
@@ -0,0 +1,56 @@
+package main
+
+import (
+	"compress/gzip"
+	"io"
+	"os"
+
+	"github.com/Sirupsen/logrus"
+	"github.com/codegangsta/cli"
+	"github.com/vbatts/tar-split/tar/asm"
+	"github.com/vbatts/tar-split/tar/storage"
+)
+
+func CommandDisasm(c *cli.Context) {
+	if len(c.Args()) != 1 {
+		logrus.Fatalf("please specify tar to be disabled <NAME|->")
+	}
+	if len(c.String("output")) == 0 {
+		logrus.Fatalf("--output filename must be set")
+	}
+
+	// Set up the tar input stream
+	var inputStream io.Reader
+	if c.Args()[0] == "-" {
+		inputStream = os.Stdin
+	} else {
+		fh, err := os.Open(c.Args()[0])
+		if err != nil {
+			logrus.Fatal(err)
+		}
+		defer fh.Close()
+		inputStream = fh
+	}
+
+	// Set up the metadata storage
+	mf, err := os.OpenFile(c.String("output"), os.O_CREATE|os.O_WRONLY|os.O_TRUNC, os.FileMode(0600))
+	if err != nil {
+		logrus.Fatal(err)
+	}
+	defer mf.Close()
+	mfz := gzip.NewWriter(mf)
+	defer mfz.Close()
+	metaPacker := storage.NewJSONPacker(mfz)
+
+	// we're passing nil here for the file putter, because the ApplyDiff will
+	// handle the extraction of the archive
+	its, err := asm.NewInputTarStream(inputStream, metaPacker, nil)
+	if err != nil {
+		logrus.Fatal(err)
+	}
+	i, err := io.Copy(os.Stdout, its)
+	if err != nil {
+		logrus.Fatal(err)
+	}
+	logrus.Infof("created %s from %s (read %d bytes)", c.String("output"), c.Args()[0], i)
+}
diff --git a/cmd/tar-split/main.go b/cmd/tar-split/main.go
index 8c631b0..f24dce5 100644
--- a/cmd/tar-split/main.go
+++ b/cmd/tar-split/main.go
@@ -2,14 +2,10 @@
 package main
 
 import (
-	"compress/gzip"
-	"io"
 	"os"
 
 	"github.com/Sirupsen/logrus"
 	"github.com/codegangsta/cli"
-	"github.com/vbatts/tar-split/tar/asm"
-	"github.com/vbatts/tar-split/tar/storage"
 )
 
 func main() {
@@ -71,105 +67,21 @@ func main() {
 				},
 			},
 		},
+		{
+			Name:   "checksize",
+			Usage:  "displays size estimates for metadata storage of a Tar archive",
+			Action: CommandChecksize,
+			Flags: []cli.Flag{
+				cli.BoolFlag{
+					Name:  "work",
+					Usage: "do not delete the working directory",
+					// defaults to false
+				},
+			},
+		},
 	}
 
 	if err := app.Run(os.Args); err != nil {
 		logrus.Fatal(err)
 	}
 }
-
-func CommandDisasm(c *cli.Context) {
-	if len(c.Args()) != 1 {
-		logrus.Fatalf("please specify tar to be disabled <NAME|->")
-	}
-	if len(c.String("output")) == 0 {
-		logrus.Fatalf("--output filename must be set")
-	}
-
-	// Set up the tar input stream
-	var inputStream io.Reader
-	if c.Args()[0] == "-" {
-		inputStream = os.Stdin
-	} else {
-		fh, err := os.Open(c.Args()[0])
-		if err != nil {
-			logrus.Fatal(err)
-		}
-		defer fh.Close()
-		inputStream = fh
-	}
-
-	// Set up the metadata storage
-	mf, err := os.OpenFile(c.String("output"), os.O_CREATE|os.O_WRONLY|os.O_TRUNC, os.FileMode(0600))
-	if err != nil {
-		logrus.Fatal(err)
-	}
-	defer mf.Close()
-	mfz := gzip.NewWriter(mf)
-	defer mfz.Close()
-	metaPacker := storage.NewJSONPacker(mfz)
-
-	// we're passing nil here for the file putter, because the ApplyDiff will
-	// handle the extraction of the archive
-	its, err := asm.NewInputTarStream(inputStream, metaPacker, nil)
-	if err != nil {
-		logrus.Fatal(err)
-	}
-	i, err := io.Copy(os.Stdout, its)
-	if err != nil {
-		logrus.Fatal(err)
-	}
-	logrus.Infof("created %s from %s (read %d bytes)", c.String("output"), c.Args()[0], i)
-}
-
-func CommandAsm(c *cli.Context) {
-	if len(c.Args()) > 0 {
-		logrus.Warnf("%d additional arguments passed are ignored", len(c.Args()))
-	}
-	if len(c.String("input")) == 0 {
-		logrus.Fatalf("--input filename must be set")
-	}
-	if len(c.String("output")) == 0 {
-		logrus.Fatalf("--output filename must be set ([FILENAME|-])")
-	}
-	if len(c.String("path")) == 0 {
-		logrus.Fatalf("--path must be set")
-	}
-
-	var outputStream io.Writer
-	if c.String("output") == "-" {
-		outputStream = os.Stdout
-	} else {
-		fh, err := os.Create(c.String("output"))
-		if err != nil {
-			logrus.Fatal(err)
-		}
-		defer fh.Close()
-		outputStream = fh
-	}
-
-	// Get the tar metadata reader
-	mf, err := os.Open(c.String("input"))
-	if err != nil {
-		logrus.Fatal(err)
-	}
-	defer mf.Close()
-	mfz, err := gzip.NewReader(mf)
-	if err != nil {
-		logrus.Fatal(err)
-	}
-	defer mfz.Close()
-
-	metaUnpacker := storage.NewJSONUnpacker(mfz)
-	// XXX maybe get the absolute path here
-	fileGetter := storage.NewPathFileGetter(c.String("path"))
-
-	ots := asm.NewOutputTarStream(fileGetter, metaUnpacker)
-	defer ots.Close()
-	i, err := io.Copy(outputStream, ots)
-	if err != nil {
-		logrus.Fatal(err)
-	}
-
-	logrus.Infof("created %s from %s and %s (wrote %d bytes)", c.String("output"), c.String("path"), c.String("input"), i)
-}

From c6be94f8a32be7147630a87b7ee39f41f82421c4 Mon Sep 17 00:00:00 2001
From: Vincent Batts <vbatts@hashbangbash.com>
Date: Mon, 10 Aug 2015 16:22:36 -0400
Subject: [PATCH 18/95] cmd/tar-split: README usage for checksize

---
 cmd/tar-split/README.md | 11 +++++++++++
 1 file changed, 11 insertions(+)

diff --git a/cmd/tar-split/README.md b/cmd/tar-split/README.md
index eae23c2..02a2218 100644
--- a/cmd/tar-split/README.md
+++ b/cmd/tar-split/README.md
@@ -25,4 +25,15 @@ $ sha256sum new.tar
 d734a748db93ec873392470510b8a1c88929abd8fae2540dc43d5b26f7537868  new.tar
 ```
 
+### Estimating metadata size
+
+```bash
+$ tar-split checksize ./archive.tar
+inspecting "./archive.tar" (size 200k)
+ -- number of files: 28
+ -- size of metadata uncompressed: 28k
+ -- size of gzip compressed metadata: 1k
+```
+
+
 

From 0a79a3807ce50a84f8c0e0f567920ebce80c697f Mon Sep 17 00:00:00 2001
From: Vincent Batts <vbatts@hashbangbash.com>
Date: Mon, 10 Aug 2015 16:26:09 -0400
Subject: [PATCH 19/95] README: missed a checksize reference

---
 README.md | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/README.md b/README.md
index 6c3d032..e37d36b 100644
--- a/README.md
+++ b/README.md
@@ -87,7 +87,7 @@ But let's look at a larger archive, with many files.
 ```bash
 $ ls -sh ./d.tar
 1.4G ./d.tar
-$ ./checksize ~/d.tar 
+$ tar-split checksize ~/d.tar 
 inspecting "/home/vbatts/d.tar" (size 1420749k)
  -- number of files: 38718
  -- size of metadata uncompressed: 43261k

From 51b0481d4aecf1c051b1dfc942ab46986e776bef Mon Sep 17 00:00:00 2001
From: Vincent Batts <vbatts@hashbangbash.com>
Date: Mon, 3 Aug 2015 17:13:31 -0400
Subject: [PATCH 20/95] tar/asm: adding a failing test due to GNU LongLink

---
 tar/asm/assemble_test.go         |  80 ++++++++++++++++++++++++++++---
 tar/asm/testdata/longlink.tar.gz | Bin 0 -> 438 bytes
 2 files changed, 74 insertions(+), 6 deletions(-)
 create mode 100644 tar/asm/testdata/longlink.tar.gz

diff --git a/tar/asm/assemble_test.go b/tar/asm/assemble_test.go
index 7cf44dc..e37d7f3 100644
--- a/tar/asm/assemble_test.go
+++ b/tar/asm/assemble_test.go
@@ -114,8 +114,8 @@ func TestTarStreamMangledGetterPutter(t *testing.T) {
 
 func TestTarStream(t *testing.T) {
 	var (
-		expectedSum        = "1eb237ff69bca6e22789ecb05b45d35ca307adbd"
-		expectedSize int64 = 10240
+		expectedSHA1Sum       = "1eb237ff69bca6e22789ecb05b45d35ca307adbd"
+		expectedSize    int64 = 10240
 	)
 
 	fh, err := os.Open("./testdata/t.tar.gz")
@@ -153,8 +153,8 @@ func TestTarStream(t *testing.T) {
 	if i != expectedSize {
 		t.Errorf("size of tar: expected %d; got %d", expectedSize, i)
 	}
-	if fmt.Sprintf("%x", h0.Sum(nil)) != expectedSum {
-		t.Fatalf("checksum of tar: expected %s; got %x", expectedSum, h0.Sum(nil))
+	if fmt.Sprintf("%x", h0.Sum(nil)) != expectedSHA1Sum {
+		t.Fatalf("checksum of tar: expected %s; got %x", expectedSHA1Sum, h0.Sum(nil))
 	}
 
 	t.Logf("%s", w.String()) // if we fail, then show the packed info
@@ -175,7 +175,75 @@ func TestTarStream(t *testing.T) {
 	if i != expectedSize {
 		t.Errorf("size of output tar: expected %d; got %d", expectedSize, i)
 	}
-	if fmt.Sprintf("%x", h1.Sum(nil)) != expectedSum {
-		t.Fatalf("checksum of output tar: expected %s; got %x", expectedSum, h1.Sum(nil))
+	if fmt.Sprintf("%x", h1.Sum(nil)) != expectedSHA1Sum {
+		t.Fatalf("checksum of output tar: expected %s; got %x", expectedSHA1Sum, h1.Sum(nil))
+	}
+}
+
+func TestTarGNUTar(t *testing.T) {
+	var (
+		expectedSHA1Sum       = "d9f6babe107b7247953dff6b5b5ae31a3a880add"
+		expectedSize    int64 = 20480
+	)
+
+	fh, err := os.Open("./testdata/longlink.tar.gz")
+	if err != nil {
+		t.Fatal(err)
+	}
+	defer fh.Close()
+	gzRdr, err := gzip.NewReader(fh)
+	if err != nil {
+		t.Fatal(err)
+	}
+	defer gzRdr.Close()
+
+	// Setup where we'll store the metadata
+	w := bytes.NewBuffer([]byte{})
+	sp := storage.NewJSONPacker(w)
+	fgp := storage.NewBufferFileGetPutter()
+
+	// wrap the disassembly stream
+	tarStream, err := NewInputTarStream(gzRdr, sp, fgp)
+	if err != nil {
+		t.Fatal(err)
+	}
+
+	// get a sum of the stream after it has passed through to ensure it's the same.
+	h0 := sha1.New()
+	tRdr0 := io.TeeReader(tarStream, h0)
+
+	// read it all to the bit bucket
+	i, err := io.Copy(ioutil.Discard, tRdr0)
+	if err != nil {
+		t.Fatal(err)
+	}
+
+	if i != expectedSize {
+		t.Errorf("size of tar: expected %d; got %d", expectedSize, i)
+	}
+	if fmt.Sprintf("%x", h0.Sum(nil)) != expectedSHA1Sum {
+		t.Fatalf("checksum of tar: expected %s; got %x", expectedSHA1Sum, h0.Sum(nil))
+	}
+
+	t.Logf("%s", w.String()) // if we fail, then show the packed info
+
+	// If we've made it this far, then we'll turn it around and create a tar
+	// stream from the packed metadata and buffered file contents.
+	r := bytes.NewBuffer(w.Bytes())
+	sup := storage.NewJSONUnpacker(r)
+	// and reuse the fgp that we Put the payloads to.
+
+	rc := NewOutputTarStream(fgp, sup)
+	h1 := sha1.New()
+	i, err = io.Copy(h1, rc)
+	if err != nil {
+		t.Fatal(err)
+	}
+
+	if i != expectedSize {
+		t.Errorf("size of output tar: expected %d; got %d", expectedSize, i)
+	}
+	if fmt.Sprintf("%x", h1.Sum(nil)) != expectedSHA1Sum {
+		t.Fatalf("checksum of output tar: expected %s; got %x", expectedSHA1Sum, h1.Sum(nil))
 	}
 }
diff --git a/tar/asm/testdata/longlink.tar.gz b/tar/asm/testdata/longlink.tar.gz
new file mode 100644
index 0000000000000000000000000000000000000000..cb21db5f382892fd9c2e529e7baeff37efee7dbf
GIT binary patch
literal 438
zcmV;n0ZINJiwFokufJ6Q18i?@XKZP1Yc6zQaschxYihzk6hPrD#T6tolX?8@0^CGe
zv6O;>NlTaCq*bU`jW=d)k)97EwFcCl>%@VURnx9lRgshFr4)vvO~LGs_Ure1g>@mM
z<dXNUFxH0tun1XS)!tj%HM_;4cz<lVu04O=*yDGq{O;eht97;^koR{!B>HRk*+171
z$otoI^7Pkm(m%uHK;GZi*3w_Y@BQap3FQ5QbCLcUF7%&m2;}`^zXH%-!&v`0hCtpw
znP}**;i~>O4#9-}!|C)_aDD&jLm=+&OtS3%RS5e}8UlHL8y)+96;t|4hd|!n1<(Fp
z#pM2iA&~d4ePsWy;<o<R4S~GBuM_)!6*Kf79Rhj(P+Ru@D(32cX$a)~qpR8ftC+R_
znIVw(Pu8>lS22J8(;*P`4}JgIn8^CCqO8j5VY%HrFJCsVnV#d&_{`Bbb36;f|9^!3
z``<Ft>p61hzuRuROXrO}ey1{h{+o}d)q418?Dp61|G(4dKYXZT^n&%jztdm;jm96~
gpRfM_0000000000000000O0<70{(f!Y5-6G0O>IHkpKVy

literal 0
HcmV?d00001


From df8572a1eb56cd5f77ec10482756113cdf42a915 Mon Sep 17 00:00:00 2001
From: Vincent Batts <vbatts@hashbangbash.com>
Date: Tue, 11 Aug 2015 15:51:19 -0400
Subject: [PATCH 21/95] tar/asm: check length before adding an entry

---
 tar/asm/disassemble.go | 31 ++++++++++++++++++-------------
 1 file changed, 18 insertions(+), 13 deletions(-)

diff --git a/tar/asm/disassemble.go b/tar/asm/disassemble.go
index 4a8ed94..7986890 100644
--- a/tar/asm/disassemble.go
+++ b/tar/asm/disassemble.go
@@ -55,13 +55,15 @@ func NewInputTarStream(r io.Reader, p storage.Packer, fp storage.FilePutter) (io
 				}
 				// even when an EOF is reached, there is often 1024 null bytes on
 				// the end of an archive. Collect them too.
-				_, err := p.AddEntry(storage.Entry{
-					Type:    storage.SegmentType,
-					Payload: tr.RawBytes(),
-				})
-				if err != nil {
-					pW.CloseWithError(err)
-					return
+				if b := tr.RawBytes(); len(b) > 0 {
+					_, err := p.AddEntry(storage.Entry{
+						Type:    storage.SegmentType,
+						Payload: b,
+					})
+					if err != nil {
+						pW.CloseWithError(err)
+						return
+					}
 				}
 				break // not return. We need the end of the reader.
 			}
@@ -69,12 +71,15 @@ func NewInputTarStream(r io.Reader, p storage.Packer, fp storage.FilePutter) (io
 				break // not return. We need the end of the reader.
 			}
 
-			if _, err := p.AddEntry(storage.Entry{
-				Type:    storage.SegmentType,
-				Payload: tr.RawBytes(),
-			}); err != nil {
-				pW.CloseWithError(err)
-				return
+			if b := tr.RawBytes(); len(b) > 0 {
+				_, err := p.AddEntry(storage.Entry{
+					Type:    storage.SegmentType,
+					Payload: b,
+				})
+				if err != nil {
+					pW.CloseWithError(err)
+					return
+				}
 			}
 
 			var csum []byte

From e46a815cbcaa5270acfb2893b66791150f4d2a87 Mon Sep 17 00:00:00 2001
From: Vincent Batts <vbatts@hashbangbash.com>
Date: Tue, 11 Aug 2015 15:51:52 -0400
Subject: [PATCH 22/95] archive/tar: fix carry-over of bytes for GNU types

Archives produced with GNU tar can have types of TypeGNULongName and
TypeGNULongLink.
These fields effectively appear like two file entries in the tar
archive. While golang's `archive/tar` transparently provide the file
name and headers and file payload, the access to the raw bytes is still
needed.

This fixes the access to the longlink header, it's payload (of the long
file path name), and the following file header and actual file payload.
---
 archive/tar/reader.go | 28 ++++++++++++++++++++++------
 1 file changed, 22 insertions(+), 6 deletions(-)

diff --git a/archive/tar/reader.go b/archive/tar/reader.go
index a89957e..f817956 100644
--- a/archive/tar/reader.go
+++ b/archive/tar/reader.go
@@ -154,44 +154,60 @@ func (tr *Reader) Next() (*Header, error) {
 		}
 		return hdr, nil
 	case TypeGNULongName:
+		var b *bytes.Buffer
+		if tr.RawAccounting {
+			b = bytes.NewBuffer(tr.RawBytes())
+		}
 		// We have a GNU long name header. Its contents are the real file name.
 		realname, err := ioutil.ReadAll(tr)
 		if err != nil {
 			return nil, err
 		}
-		var b []byte
 		if tr.RawAccounting {
+			if _, err = tr.rawBytes.Write(b.Bytes()); err != nil {
+				return nil, err
+			}
 			if _, err = tr.rawBytes.Write(realname); err != nil {
 				return nil, err
 			}
-			b = tr.RawBytes()
+			b.Reset()
+			b.Write(tr.RawBytes())
 		}
 		hdr, err := tr.Next()
 		// since the above call to Next() resets the buffer, we need to throw the bytes over
 		if tr.RawAccounting {
-			if _, err = tr.rawBytes.Write(b); err != nil {
+			b.Write(tr.RawBytes())
+			if _, err = tr.rawBytes.Write(b.Bytes()); err != nil {
 				return nil, err
 			}
 		}
 		hdr.Name = cString(realname)
 		return hdr, err
 	case TypeGNULongLink:
+		var b *bytes.Buffer
+		if tr.RawAccounting {
+			b = bytes.NewBuffer(tr.RawBytes())
+		}
 		// We have a GNU long link header.
 		realname, err := ioutil.ReadAll(tr)
 		if err != nil {
 			return nil, err
 		}
-		var b []byte
 		if tr.RawAccounting {
+			if _, err = tr.rawBytes.Write(b.Bytes()); err != nil {
+				return nil, err
+			}
 			if _, err = tr.rawBytes.Write(realname); err != nil {
 				return nil, err
 			}
-			b = tr.RawBytes()
+			b.Reset()
+			b.Write(tr.RawBytes())
 		}
 		hdr, err := tr.Next()
 		// since the above call to Next() resets the buffer, we need to throw the bytes over
 		if tr.RawAccounting {
-			if _, err = tr.rawBytes.Write(b); err != nil {
+			b.Write(tr.RawBytes())
+			if _, err = tr.rawBytes.Write(b.Bytes()); err != nil {
 				return nil, err
 			}
 		}

From e6df23162ed7cea021fffb41f186ab2a382294a0 Mon Sep 17 00:00:00 2001
From: Alexander Morozov <lk4d4@docker.com>
Date: Wed, 12 Aug 2015 16:46:04 -0700
Subject: [PATCH 23/95] Remove redundant TeeReader

Signed-off-by: Alexander Morozov <lk4d4@docker.com>
---
 tar/storage/getter.go | 4 +---
 1 file changed, 1 insertion(+), 3 deletions(-)

diff --git a/tar/storage/getter.go b/tar/storage/getter.go
index ae110c6..70fd378 100644
--- a/tar/storage/getter.go
+++ b/tar/storage/getter.go
@@ -5,7 +5,6 @@ import (
 	"errors"
 	"hash/crc64"
 	"io"
-	"io/ioutil"
 	"os"
 	"path/filepath"
 )
@@ -97,8 +96,7 @@ type bitBucketFilePutter struct {
 
 func (bbfp *bitBucketFilePutter) Put(name string, r io.Reader) (int64, []byte, error) {
 	c := crc64.New(CRCTable)
-	tRdr := io.TeeReader(r, c)
-	i, err := io.Copy(ioutil.Discard, tRdr)
+	i, err := io.Copy(c, r)
 	return i, c.Sum(nil), err
 }
 

From b1783bc86d720d5bcd2497fbc0e72ea50f74b826 Mon Sep 17 00:00:00 2001
From: Alexander Morozov <lk4d4@docker.com>
Date: Wed, 12 Aug 2015 22:41:28 -0700
Subject: [PATCH 24/95] storage: Fix syntax of json tags

Signed-off-by: Alexander Morozov <lk4d4@docker.com>
---
 tar/storage/entry.go | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/tar/storage/entry.go b/tar/storage/entry.go
index 57a0256..38fe7ba 100644
--- a/tar/storage/entry.go
+++ b/tar/storage/entry.go
@@ -32,8 +32,8 @@ const (
 // collisions in a sample of 18.2 million, CRC64 had none.
 type Entry struct {
 	Type     Type   `json:"type"`
-	Name     string `json:"name",omitempty`
-	Size     int64  `json:"size",omitempty`
+	Name     string `json:"name,omitempty"`
+	Size     int64  `json:"size,omitempty"`
 	Payload  []byte `json:"payload"` // SegmentType stores payload here; FileType stores crc64 checksum here;
 	Position int    `json:"position"`
 }

From 93c0a320a8d62789bf2ebe32bee12be2644d625c Mon Sep 17 00:00:00 2001
From: Alexander Morozov <lk4d4@docker.com>
Date: Wed, 12 Aug 2015 22:45:39 -0700
Subject: [PATCH 25/95] asm: Remove unreachable code

Signed-off-by: Alexander Morozov <lk4d4@docker.com>
---
 tar/asm/assemble.go | 1 -
 1 file changed, 1 deletion(-)

diff --git a/tar/asm/assemble.go b/tar/asm/assemble.go
index b421db0..74317cb 100644
--- a/tar/asm/assemble.go
+++ b/tar/asm/assemble.go
@@ -62,7 +62,6 @@ func NewOutputTarStream(fg storage.FileGetter, up storage.Unpacker) io.ReadClose
 				fh.Close()
 			}
 		}
-		pw.Close()
 	}()
 	return pr
 }

From fa881b2347d337cf6c3cff04eecfea52ef8c8f09 Mon Sep 17 00:00:00 2001
From: Alexander Morozov <lk4d4@docker.com>
Date: Wed, 12 Aug 2015 22:49:38 -0700
Subject: [PATCH 26/95] Add vet check to travis

Signed-off-by: Alexander Morozov <lk4d4@docker.com>
---
 .travis.yml | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/.travis.yml b/.travis.yml
index 21d6684..ee1645d 100644
--- a/.travis.yml
+++ b/.travis.yml
@@ -6,7 +6,10 @@ go:
 # let us have pretty, fast Docker-based Travis workers!
 sudo: false
 
-install: go get -d ./...
+install:
+  - go get -d ./...
+  - go get golang.org/x/tools/cmd/vet
 
 script:
   - go test -v ./...
+  - go vet ./...

From ea73dc6f6fa236134d68544a93700a459358aee2 Mon Sep 17 00:00:00 2001
From: Alexander Morozov <lk4d4@docker.com>
Date: Thu, 13 Aug 2015 11:42:14 -0700
Subject: [PATCH 27/95] tar/storage: Benchmark for bufferFileGetPutter.Put

Signed-off-by: Alexander Morozov <lk4d4@docker.com>
---
 tar/storage/getter_test.go | 22 ++++++++++++++++++++++
 1 file changed, 22 insertions(+)

diff --git a/tar/storage/getter_test.go b/tar/storage/getter_test.go
index 5a6fcc7..c06cff0 100644
--- a/tar/storage/getter_test.go
+++ b/tar/storage/getter_test.go
@@ -2,7 +2,9 @@ package storage
 
 import (
 	"bytes"
+	"fmt"
 	"io/ioutil"
+	"strings"
 	"testing"
 )
 
@@ -39,6 +41,7 @@ func TestGetter(t *testing.T) {
 		}
 	}
 }
+
 func TestPutter(t *testing.T) {
 	fp := NewDiscardFilePutter()
 	// map[filename]map[body]crc64sum
@@ -60,3 +63,22 @@ func TestPutter(t *testing.T) {
 		}
 	}
 }
+
+func BenchmarkPutter(b *testing.B) {
+	files := []string{
+		strings.Repeat("foo", 1000),
+		strings.Repeat("bar", 1000),
+		strings.Repeat("baz", 1000),
+		strings.Repeat("fooz", 1000),
+		strings.Repeat("vbatts", 1000),
+		strings.Repeat("systemd", 1000),
+	}
+	for i := 0; i < b.N; i++ {
+		fgp := NewBufferFileGetPutter()
+		for n, body := range files {
+			if _, _, err := fgp.Put(fmt.Sprintf("%d", n), bytes.NewBufferString(body)); err != nil {
+				b.Fatal(err)
+			}
+		}
+	}
+}

From 45399711c2466973d96d650eb2c9971fbf3816d7 Mon Sep 17 00:00:00 2001
From: Alexander Morozov <lk4d4@docker.com>
Date: Thu, 13 Aug 2015 11:42:43 -0700
Subject: [PATCH 28/95] tar/storage: Replace TeeReader with MultiWriter

It uses slightly less memory and more understandable.
Benchmar results:

benchmark             old ns/op     new ns/op     delta
BenchmarkPutter-4     57272         52375         -8.55%

benchmark             old allocs     new allocs     delta
BenchmarkPutter-4     21             19             -9.52%
benchmark             old bytes     new bytes     delta
BenchmarkPutter-4     19416         13336         -31.31%

Signed-off-by: Alexander Morozov <lk4d4@docker.com>
---
 tar/storage/getter.go | 12 ++++++------
 1 file changed, 6 insertions(+), 6 deletions(-)

diff --git a/tar/storage/getter.go b/tar/storage/getter.go
index 70fd378..ae11f8f 100644
--- a/tar/storage/getter.go
+++ b/tar/storage/getter.go
@@ -59,15 +59,15 @@ func (bfgp bufferFileGetPutter) Get(name string) (io.ReadCloser, error) {
 }
 
 func (bfgp *bufferFileGetPutter) Put(name string, r io.Reader) (int64, []byte, error) {
-	c := crc64.New(CRCTable)
-	tRdr := io.TeeReader(r, c)
-	b := bytes.NewBuffer([]byte{})
-	i, err := io.Copy(b, tRdr)
+	crc := crc64.New(CRCTable)
+	buf := bytes.NewBuffer(nil)
+	cw := io.MultiWriter(crc, buf)
+	i, err := io.Copy(cw, r)
 	if err != nil {
 		return 0, nil, err
 	}
-	bfgp.files[name] = b.Bytes()
-	return i, c.Sum(nil), nil
+	bfgp.files[name] = buf.Bytes()
+	return i, crc.Sum(nil), nil
 }
 
 type readCloserWrapper struct {

From 4d66163297403e1f4a85fa9601886eae31f551ac Mon Sep 17 00:00:00 2001
From: Vincent Batts <vbatts@hashbangbash.com>
Date: Thu, 13 Aug 2015 15:32:17 -0400
Subject: [PATCH 29/95] archive/tar: a []byte copy needed for GNU LongLink

---
 archive/tar/reader.go | 32 ++++++++++----------------------
 1 file changed, 10 insertions(+), 22 deletions(-)

diff --git a/archive/tar/reader.go b/archive/tar/reader.go
index f817956..c72e002 100644
--- a/archive/tar/reader.go
+++ b/archive/tar/reader.go
@@ -154,60 +154,48 @@ func (tr *Reader) Next() (*Header, error) {
 		}
 		return hdr, nil
 	case TypeGNULongName:
-		var b *bytes.Buffer
-		if tr.RawAccounting {
-			b = bytes.NewBuffer(tr.RawBytes())
-		}
 		// We have a GNU long name header. Its contents are the real file name.
 		realname, err := ioutil.ReadAll(tr)
 		if err != nil {
 			return nil, err
 		}
+		var buf []byte
 		if tr.RawAccounting {
-			if _, err = tr.rawBytes.Write(b.Bytes()); err != nil {
-				return nil, err
-			}
 			if _, err = tr.rawBytes.Write(realname); err != nil {
 				return nil, err
 			}
-			b.Reset()
-			b.Write(tr.RawBytes())
+			buf = make([]byte, tr.rawBytes.Len())
+			copy(buf[:], tr.RawBytes())
 		}
 		hdr, err := tr.Next()
 		// since the above call to Next() resets the buffer, we need to throw the bytes over
 		if tr.RawAccounting {
-			b.Write(tr.RawBytes())
-			if _, err = tr.rawBytes.Write(b.Bytes()); err != nil {
+			buf = append(buf, tr.RawBytes()...)
+			if _, err = tr.rawBytes.Write(buf); err != nil {
 				return nil, err
 			}
 		}
 		hdr.Name = cString(realname)
 		return hdr, err
 	case TypeGNULongLink:
-		var b *bytes.Buffer
-		if tr.RawAccounting {
-			b = bytes.NewBuffer(tr.RawBytes())
-		}
 		// We have a GNU long link header.
 		realname, err := ioutil.ReadAll(tr)
 		if err != nil {
 			return nil, err
 		}
+		var buf []byte
 		if tr.RawAccounting {
-			if _, err = tr.rawBytes.Write(b.Bytes()); err != nil {
-				return nil, err
-			}
 			if _, err = tr.rawBytes.Write(realname); err != nil {
 				return nil, err
 			}
-			b.Reset()
-			b.Write(tr.RawBytes())
+			buf = make([]byte, tr.rawBytes.Len())
+			copy(buf[:], tr.RawBytes())
 		}
 		hdr, err := tr.Next()
 		// since the above call to Next() resets the buffer, we need to throw the bytes over
 		if tr.RawAccounting {
-			b.Write(tr.RawBytes())
-			if _, err = tr.rawBytes.Write(b.Bytes()); err != nil {
+			buf = append(buf, tr.RawBytes()...)
+			if _, err = tr.rawBytes.Write(buf); err != nil {
 				return nil, err
 			}
 		}

From c76e42010eb78200c826024cff1d7bba76082715 Mon Sep 17 00:00:00 2001
From: Vincent Batts <vbatts@hashbangbash.com>
Date: Fri, 14 Aug 2015 07:55:18 -0400
Subject: [PATCH 30/95] tar/asm: additional GNU LongLink testcase

Adding a minimal test case for GNU @LongLink.
Tested that it fails on v0.9.5, but now passes on v0.9.6 and master.
---
 tar/asm/assemble_test.go            | 187 ++++++++++------------------
 tar/asm/testdata/fatlonglink.tar.gz | Bin 0 -> 26402 bytes
 2 files changed, 63 insertions(+), 124 deletions(-)
 create mode 100644 tar/asm/testdata/fatlonglink.tar.gz

diff --git a/tar/asm/assemble_test.go b/tar/asm/assemble_test.go
index e37d7f3..da515f2 100644
--- a/tar/asm/assemble_test.go
+++ b/tar/asm/assemble_test.go
@@ -113,137 +113,76 @@ func TestTarStreamMangledGetterPutter(t *testing.T) {
 }
 
 func TestTarStream(t *testing.T) {
-	var (
-		expectedSHA1Sum       = "1eb237ff69bca6e22789ecb05b45d35ca307adbd"
-		expectedSize    int64 = 10240
-	)
-
-	fh, err := os.Open("./testdata/t.tar.gz")
-	if err != nil {
-		t.Fatal(err)
-	}
-	defer fh.Close()
-	gzRdr, err := gzip.NewReader(fh)
-	if err != nil {
-		t.Fatal(err)
-	}
-	defer gzRdr.Close()
-
-	// Setup where we'll store the metadata
-	w := bytes.NewBuffer([]byte{})
-	sp := storage.NewJSONPacker(w)
-	fgp := storage.NewBufferFileGetPutter()
-
-	// wrap the disassembly stream
-	tarStream, err := NewInputTarStream(gzRdr, sp, fgp)
-	if err != nil {
-		t.Fatal(err)
+	testCases := []struct {
+		path            string
+		expectedSHA1Sum string
+		expectedSize    int64
+	}{
+		{"./testdata/t.tar.gz", "1eb237ff69bca6e22789ecb05b45d35ca307adbd", 10240},
+		{"./testdata/longlink.tar.gz", "d9f6babe107b7247953dff6b5b5ae31a3a880add", 20480},
+		{"./testdata/fatlonglink.tar.gz", "8537f03f89aeef537382f8b0bb065d93e03b0be8", 26234880},
 	}
 
-	// get a sum of the stream after it has passed through to ensure it's the same.
-	h0 := sha1.New()
-	tRdr0 := io.TeeReader(tarStream, h0)
+	for _, tc := range testCases {
+		fh, err := os.Open(tc.path)
+		if err != nil {
+			t.Fatal(err)
+		}
+		defer fh.Close()
+		gzRdr, err := gzip.NewReader(fh)
+		if err != nil {
+			t.Fatal(err)
+		}
+		defer gzRdr.Close()
 
-	// read it all to the bit bucket
-	i, err := io.Copy(ioutil.Discard, tRdr0)
-	if err != nil {
-		t.Fatal(err)
-	}
+		// Setup where we'll store the metadata
+		w := bytes.NewBuffer([]byte{})
+		sp := storage.NewJSONPacker(w)
+		fgp := storage.NewBufferFileGetPutter()
 
-	if i != expectedSize {
-		t.Errorf("size of tar: expected %d; got %d", expectedSize, i)
-	}
-	if fmt.Sprintf("%x", h0.Sum(nil)) != expectedSHA1Sum {
-		t.Fatalf("checksum of tar: expected %s; got %x", expectedSHA1Sum, h0.Sum(nil))
-	}
+		// wrap the disassembly stream
+		tarStream, err := NewInputTarStream(gzRdr, sp, fgp)
+		if err != nil {
+			t.Fatal(err)
+		}
 
-	t.Logf("%s", w.String()) // if we fail, then show the packed info
+		// get a sum of the stream after it has passed through to ensure it's the same.
+		h0 := sha1.New()
+		tRdr0 := io.TeeReader(tarStream, h0)
 
-	// If we've made it this far, then we'll turn it around and create a tar
-	// stream from the packed metadata and buffered file contents.
-	r := bytes.NewBuffer(w.Bytes())
-	sup := storage.NewJSONUnpacker(r)
-	// and reuse the fgp that we Put the payloads to.
+		// read it all to the bit bucket
+		i, err := io.Copy(ioutil.Discard, tRdr0)
+		if err != nil {
+			t.Fatal(err)
+		}
 
-	rc := NewOutputTarStream(fgp, sup)
-	h1 := sha1.New()
-	i, err = io.Copy(h1, rc)
-	if err != nil {
-		t.Fatal(err)
-	}
+		if i != tc.expectedSize {
+			t.Errorf("size of tar: expected %d; got %d", tc.expectedSize, i)
+		}
+		if fmt.Sprintf("%x", h0.Sum(nil)) != tc.expectedSHA1Sum {
+			t.Fatalf("checksum of tar: expected %s; got %x", tc.expectedSHA1Sum, h0.Sum(nil))
+		}
 
-	if i != expectedSize {
-		t.Errorf("size of output tar: expected %d; got %d", expectedSize, i)
-	}
-	if fmt.Sprintf("%x", h1.Sum(nil)) != expectedSHA1Sum {
-		t.Fatalf("checksum of output tar: expected %s; got %x", expectedSHA1Sum, h1.Sum(nil))
-	}
-}
-
-func TestTarGNUTar(t *testing.T) {
-	var (
-		expectedSHA1Sum       = "d9f6babe107b7247953dff6b5b5ae31a3a880add"
-		expectedSize    int64 = 20480
-	)
-
-	fh, err := os.Open("./testdata/longlink.tar.gz")
-	if err != nil {
-		t.Fatal(err)
-	}
-	defer fh.Close()
-	gzRdr, err := gzip.NewReader(fh)
-	if err != nil {
-		t.Fatal(err)
-	}
-	defer gzRdr.Close()
-
-	// Setup where we'll store the metadata
-	w := bytes.NewBuffer([]byte{})
-	sp := storage.NewJSONPacker(w)
-	fgp := storage.NewBufferFileGetPutter()
-
-	// wrap the disassembly stream
-	tarStream, err := NewInputTarStream(gzRdr, sp, fgp)
-	if err != nil {
-		t.Fatal(err)
-	}
-
-	// get a sum of the stream after it has passed through to ensure it's the same.
-	h0 := sha1.New()
-	tRdr0 := io.TeeReader(tarStream, h0)
-
-	// read it all to the bit bucket
-	i, err := io.Copy(ioutil.Discard, tRdr0)
-	if err != nil {
-		t.Fatal(err)
-	}
-
-	if i != expectedSize {
-		t.Errorf("size of tar: expected %d; got %d", expectedSize, i)
-	}
-	if fmt.Sprintf("%x", h0.Sum(nil)) != expectedSHA1Sum {
-		t.Fatalf("checksum of tar: expected %s; got %x", expectedSHA1Sum, h0.Sum(nil))
-	}
-
-	t.Logf("%s", w.String()) // if we fail, then show the packed info
-
-	// If we've made it this far, then we'll turn it around and create a tar
-	// stream from the packed metadata and buffered file contents.
-	r := bytes.NewBuffer(w.Bytes())
-	sup := storage.NewJSONUnpacker(r)
-	// and reuse the fgp that we Put the payloads to.
-
-	rc := NewOutputTarStream(fgp, sup)
-	h1 := sha1.New()
-	i, err = io.Copy(h1, rc)
-	if err != nil {
-		t.Fatal(err)
-	}
-
-	if i != expectedSize {
-		t.Errorf("size of output tar: expected %d; got %d", expectedSize, i)
-	}
-	if fmt.Sprintf("%x", h1.Sum(nil)) != expectedSHA1Sum {
-		t.Fatalf("checksum of output tar: expected %s; got %x", expectedSHA1Sum, h1.Sum(nil))
+		t.Logf("%s", w.String()) // if we fail, then show the packed info
+
+		// If we've made it this far, then we'll turn it around and create a tar
+		// stream from the packed metadata and buffered file contents.
+		r := bytes.NewBuffer(w.Bytes())
+		sup := storage.NewJSONUnpacker(r)
+		// and reuse the fgp that we Put the payloads to.
+
+		rc := NewOutputTarStream(fgp, sup)
+		h1 := sha1.New()
+		i, err = io.Copy(h1, rc)
+		if err != nil {
+			t.Fatal(err)
+		}
+
+		if i != tc.expectedSize {
+			t.Errorf("size of output tar: expected %d; got %d", tc.expectedSize, i)
+		}
+		if fmt.Sprintf("%x", h1.Sum(nil)) != tc.expectedSHA1Sum {
+			t.Fatalf("checksum of output tar: expected %s; got %x", tc.expectedSHA1Sum, h1.Sum(nil))
+		}
 	}
 }
diff --git a/tar/asm/testdata/fatlonglink.tar.gz b/tar/asm/testdata/fatlonglink.tar.gz
new file mode 100644
index 0000000000000000000000000000000000000000..0d8ed148ff12cfeb30fab84c35735c31e6a4c326
GIT binary patch
literal 26402
zcmeI(e=yr;90zdL?a&$RZavm@akc7})DSDZq3GFmn?tp##VBgpBclw>tR&8q>AGgU
zIbl)EE$Ny}P(_6%epPN9j!1`ylvHRF`O!2YiR4+f+wI@>U+d@l=kvVp`~5tx`~LO5
zd)&Uax4%9$7W>k+{Pethul&@nY!tJEsHJGvl0`52fs(vO8Hi)!f6T)*&Aqa*cG<Zg
z$Jb>it2Cva|LHaVy&7|rW{A{LbCfl>{jH=^<!OrW8H`jQ|2#H=KGr=n!@v#I^qdgV
z14=7|9A1n3vF28C%c-6g5z!K%3qvHo<cOm;$s@lA&>ge$p_iqmm8#ulUL%?O9pzh%
znKnkXBnwfy%DO>~PB8Osqj#P91C7L_`$%pgkMFpml3c*AlHJWALDmQ{3ukONjwp`(
zs}^k4umwFe^78D5^_0jq6f$X%R-_c6sz4KpQHEq%mnS})Lk&f!%c+0E5*faZRwmJF
zVLn`P<qu}`rjA@}jhqi7Aab^u`1ePhsd93;IeB*}-W)TZNac)6$b5Wl6vL%`S*%mY
zl<T1?&HUGV{I6=gXPN7$8a2$~O%iXa{7<%sxL`!mMZT~IA0*Nr{%n_oi&}pZbrwFd
zH2jQ|h&F-hICac`J7}8?JRkr9AOHd&00JNY0w4eaAOHd&00JNY0w4eaTTLKmX}{o)
zr)N5Qd+NpWV$AB{0q>rxdoOL63<5d6dg`Q3;mRxdw<0BVC-N?cx!QBg<o9I)YegCq
zvZ<AtFr>sWm%;X8$xmOtLPD0orfkvKIBDB~)Qd$4+MBa%Lrpj>uM#W6J)B-n6fV}t
z(SKblW~*_*ydVGqAOHd&00JNY0w4ea{|J;cC<IyIWubfQQYcFf`JcS4C)lZ9&pz`}
z>p<qDW;9D4*6MKNRiU=87p10@yMY=AfB*=900@A<3oX#cJ5lv<p&+uNM`=;3!-!>v
zsKe2hHt->9{F>vmWw9UTLGEc|TKzeGMgo^93P5yY;hqr^W>~KjJh>>=uKBRrn@{d@
z*fb$tIVX$eC(We}6b7YpGskNwv6=FKdpL&Yw2&PYk73uM8^t_N?J}o1?}avp!-D__
zfB*=900@8p2!H?xfB*=900@8p2!OyA6|m5oJV$8R0rLg=iV8Q*JH6cp%W*>#Lq^DO
z)tsk_t+AnKr`)tbfl-1dvEus%Z{dVAdR^<oB{oz`<n`w(a<6A~G+pcCAw;m=g^vFK
zL&VncV`MB@ogt3i$&UGbiI9}E_IUs3pr01qR#DTriCqOwTT~0i1_2OwE&=sGu$Xbs
zaWL0LdF-1~{|u^<Ww+|Vq`MDnC+woA0#<?@&mPHX40e8~bF<&K`CRXW5D0((2!H?x
zfB*=900@8p2!H?xfWX!h*q~m%^`2@jS3eWHR!?5Ns$QIPW$u{JN=a9+e(?(nOSY#O
z-J+GkPB*@Rl}FGeGil{q1%tGq@UoVV3m9W+ZF7fv_yY+-9@CNS@xGWYY~660j5UVU
zwuMI~-H5w4NJ`3**DNnq8_0K6vO*3iyBW&It@o$E#2^3yAOHd&00JNY0w4eaAOHd&
p@C*UfTEDG>HB-`vT1-1}&q%{I2!O!<MWFSK(7V20*hK$m`xkndOppKo

literal 0
HcmV?d00001


From 4f81319c221bc1ebd0ecdbaa85ae10bd25040471 Mon Sep 17 00:00:00 2001
From: Vincent Batts <vbatts@hashbangbash.com>
Date: Fri, 14 Aug 2015 10:02:46 -0400
Subject: [PATCH 31/95] *: adding some version magic

---
 cmd/tar-split/main.go | 4 ++--
 version/gen.go        | 4 ++++
 version/version.go    | 7 +++++++
 3 files changed, 13 insertions(+), 2 deletions(-)
 create mode 100644 version/gen.go
 create mode 100644 version/version.go

diff --git a/cmd/tar-split/main.go b/cmd/tar-split/main.go
index f24dce5..b417120 100644
--- a/cmd/tar-split/main.go
+++ b/cmd/tar-split/main.go
@@ -1,4 +1,3 @@
-// go:generate git tag | tail -1
 package main
 
 import (
@@ -6,13 +5,14 @@ import (
 
 	"github.com/Sirupsen/logrus"
 	"github.com/codegangsta/cli"
+	"github.com/vbatts/tar-split/version"
 )
 
 func main() {
 	app := cli.NewApp()
 	app.Name = "tar-split"
 	app.Usage = "tar assembly and disassembly utility"
-	app.Version = "0.9.2"
+	app.Version = version.VERSION
 	app.Author = "Vincent Batts"
 	app.Email = "vbatts@hashbangbash.com"
 	app.Action = cli.ShowAppHelp
diff --git a/version/gen.go b/version/gen.go
new file mode 100644
index 0000000..d290d83
--- /dev/null
+++ b/version/gen.go
@@ -0,0 +1,4 @@
+package version
+
+// from `go get github.com/vbatts/go-get-version`
+//go:generate go-get-version -package version -variable VERSION -output version.go
diff --git a/version/version.go b/version/version.go
new file mode 100644
index 0000000..0b86fbf
--- /dev/null
+++ b/version/version.go
@@ -0,0 +1,7 @@
+package version
+// AUTO-GENEREATED. DO NOT EDIT
+// 2015-08-14 09:56:50.742727493 -0400 EDT
+
+// VERSION is the generated version from /home/vbatts/src/vb/tar-split/version
+var VERSION = "v0.9.6-1-gc76e420"
+ 
\ No newline at end of file

From 3a88af2866a599063c0e94cb141013c9ffd69032 Mon Sep 17 00:00:00 2001
From: Vincent Batts <vbatts@hashbangbash.com>
Date: Fri, 14 Aug 2015 10:15:26 -0400
Subject: [PATCH 32/95] travis: adding older and newer golang

---
 .travis.yml | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/.travis.yml b/.travis.yml
index ee1645d..783781b 100644
--- a/.travis.yml
+++ b/.travis.yml
@@ -1,7 +1,9 @@
 language: go
 go:
+  - tip
   - 1.4.2
   - 1.3.3
+  - 1.2.2
 
 # let us have pretty, fast Docker-based Travis workers!
 sudo: false

From bf82db1f0de52d664d0acb1a92163532995370b0 Mon Sep 17 00:00:00 2001
From: Vincent Batts <vbatts@hashbangbash.com>
Date: Tue, 18 Aug 2015 14:54:32 -0400
Subject: [PATCH 33/95] README: updates

---
 README.md | 13 ++++---------
 1 file changed, 4 insertions(+), 9 deletions(-)

diff --git a/README.md b/README.md
index e37d36b..0a1b2fc 100644
--- a/README.md
+++ b/README.md
@@ -2,11 +2,7 @@
 
 [![Build Status](https://travis-ci.org/vbatts/tar-split.svg?branch=master)](https://travis-ci.org/vbatts/tar-split)
 
-Extend the upstream golang stdlib `archive/tar` library, to expose the raw
-bytes of the TAR, rather than just the marshalled headers and file stream.
-
-The goal being that by preserving the raw bytes of each header, padding bytes,
-and the raw file payload, one could reassemble the original archive.
+Pristinely disassembling a tar archive, and stashing needed raw bytes and offsets to reassemble a validating original archive.
 
 ## Docs
 
@@ -48,7 +44,9 @@ Do not break the API of stdlib `archive/tar` in our fork (ideally find an upstre
 
 ## Std Version
 
-The version of golang stdlib `archive/tar` is from go1.4.1, and their master branch around [a9dddb53f](https://github.com/golang/go/tree/a9dddb53f)
+The version of golang stdlib `archive/tar` is from go1.4.1, and their master branch around [a9dddb53f](https://github.com/golang/go/tree/a9dddb53f).
+It is minimally extended to expose the raw bytes of the TAR, rather than just the marshalled headers and file stream.
+
 
 ## Design
 
@@ -107,10 +105,7 @@ bytes-per-file rate for the storage implications.
 ## What's Next?
 
 * More implementations of storage Packer and Unpacker
- - could be a redis or mongo backend
 * More implementations of FileGetter and FilePutter
- - could be a redis or mongo backend
-* cli tooling to assemble/disassemble a provided tar archive
 * would be interesting to have an assembler stream that implements `io.Seeker`
 
 
From 6e38573de2ab9ae03937762754dcde175ee2d9b6 Mon Sep 17 00:00:00 2001
From: David du Colombier <0intro@gmail.com>
Date: Fri, 24 Apr 2015 15:37:53 +0200
Subject: [PATCH 34/95] archive/tar: fix error message

Write should return ErrWriteAfterClose instead
of ErrWriteTooLong when called after Close.

Change-Id: If5ec4ef924e4c56489e0d426976f7e5fad79be9b
Reviewed-on: https://go-review.googlesource.com/9259
Reviewed-by: Brad Fitzpatrick <bradfitz@golang.org>

Signed-off-by: Vincent Batts <vbatts@hashbangbash.com>
---
 archive/tar/writer.go      |  2 +-
 archive/tar/writer_test.go | 17 +++++++++++++++++
 2 files changed, 18 insertions(+), 1 deletion(-)

diff --git a/archive/tar/writer.go b/archive/tar/writer.go
index dafb2ca..9dbc01a 100644
--- a/archive/tar/writer.go
+++ b/archive/tar/writer.go
@@ -355,7 +355,7 @@ func paxHeader(msg string) string {
 // hdr.Size bytes are written after WriteHeader.
 func (tw *Writer) Write(b []byte) (n int, err error) {
 	if tw.closed {
-		err = ErrWriteTooLong
+		err = ErrWriteAfterClose
 		return
 	}
 	overwrite := false
diff --git a/archive/tar/writer_test.go b/archive/tar/writer_test.go
index 5e42e32..650899a 100644
--- a/archive/tar/writer_test.go
+++ b/archive/tar/writer_test.go
@@ -489,3 +489,20 @@ func TestValidTypeflagWithPAXHeader(t *testing.T) {
 		}
 	}
 }
+
+func TestWriteAfterClose(t *testing.T) {
+	var buffer bytes.Buffer
+	tw := NewWriter(&buffer)
+
+	hdr := &Header{
+		Name: "small.txt",
+		Size: 5,
+	}
+	if err := tw.WriteHeader(hdr); err != nil {
+		t.Fatalf("Failed to write header: %s", err)
+	}
+	tw.Close()
+	if _, err := tw.Write([]byte("Kilts")); err != ErrWriteAfterClose {
+		t.Fatalf("Write: got %v; want ErrWriteAfterClose", err)
+	}
+}

From 576b2737620ba2ca0fb6c27552c2dfa8eadb0072 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?H=C3=A5vard=20Haugen?= <havard.haugen@gmail.com>
Date: Wed, 27 May 2015 10:44:44 +0200
Subject: [PATCH 35/95] archive/tar: don't panic on negative file size

Fixes #10959.
Fixes #10960.

Change-Id: I9a81a0e2b8275338d0d1c3f7f7265e0fd91f3de2
Reviewed-on: https://go-review.googlesource.com/10402
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: David Symonds <dsymonds@golang.org>

Signed-off-by: Vincent Batts <vbatts@hashbangbash.com>
---
 archive/tar/reader.go             |   4 ++++
 archive/tar/reader_test.go        |  16 ++++++++++++++++
 archive/tar/testdata/neg-size.tar | Bin 0 -> 512 bytes
 3 files changed, 20 insertions(+)
 create mode 100644 archive/tar/testdata/neg-size.tar

diff --git a/archive/tar/reader.go b/archive/tar/reader.go
index c72e002..0b0c3b1 100644
--- a/archive/tar/reader.go
+++ b/archive/tar/reader.go
@@ -553,6 +553,10 @@ func (tr *Reader) readHeader() *Header {
 	hdr.Uid = int(tr.octal(s.next(8)))
 	hdr.Gid = int(tr.octal(s.next(8)))
 	hdr.Size = tr.octal(s.next(12))
+	if hdr.Size < 0 {
+		tr.err = ErrHeader
+		return nil
+	}
 	hdr.ModTime = time.Unix(tr.octal(s.next(12)), 0)
 	s.next(8) // chksum
 	hdr.Typeflag = s.next(1)[0]
diff --git a/archive/tar/reader_test.go b/archive/tar/reader_test.go
index 9601ffe..ab1e844 100644
--- a/archive/tar/reader_test.go
+++ b/archive/tar/reader_test.go
@@ -741,3 +741,19 @@ func TestUninitializedRead(t *testing.T) {
 	}
 
 }
+
+// Negative header size should not cause panic.
+// Issues 10959 and 10960.
+func TestNegativeHdrSize(t *testing.T) {
+	f, err := os.Open("testdata/neg-size.tar")
+	if err != nil {
+		t.Fatal(err)
+	}
+	defer f.Close()
+	r := NewReader(f)
+	_, err = r.Next()
+	if err != ErrHeader {
+		t.Error("want ErrHeader, got", err)
+	}
+	io.Copy(ioutil.Discard, r)
+}
diff --git a/archive/tar/testdata/neg-size.tar b/archive/tar/testdata/neg-size.tar
new file mode 100644
index 0000000000000000000000000000000000000000..5deea3d05c4da5a4ddda34ef7ad781088464e71b
GIT binary patch
literal 512
zcma)(!3}~i7=@d#07(~c0h9N)Na`Hy;GL8N4j!7YfmcUy4Hj^R-s|6LkswAdq{%Dq
zeeYEkfGqY#(eVIvtUD=3dmgO>+WvmJu?!(Z2_k7dfq;C)3dnfX`l0#IeAe0qQ-^2=
z|8jB*JI{8kO)-jdf^$wdJ_vEWkPIQXm^d{2NhUoLEEza^mVV&QNE^63LaKtd9rtDs
zE<V?0s9nma_>>me;9Em{+eZ^Y>snQ&s!17bkhjcz=H=J(=e>|P(sS8Gxj+6N@Z7t^
E15rerKmY&$

literal 0
HcmV?d00001


From 55dceefe42a7ad9268aea544a8b6826f9d9a5c0e Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?H=C3=A5vard=20Haugen?= <havard.haugen@gmail.com>
Date: Thu, 28 May 2015 13:48:47 +0200
Subject: [PATCH 36/95] archive/tar: terminate when reading malformed sparse
 files

Fixes #10968.

Change-Id: I027bc571a71629ac49c2a0ff101b2950af6e7531
Reviewed-on: https://go-review.googlesource.com/10482
Reviewed-by: David Symonds <dsymonds@golang.org>
Run-TryBot: David Symonds <dsymonds@golang.org>
TryBot-Result: Gobot Gobot <gobot@golang.org>

Signed-off-by: Vincent Batts <vbatts@hashbangbash.com>
---
 archive/tar/reader.go               |   3 +++
 archive/tar/reader_test.go          |  19 +++++++++++++++++++
 archive/tar/testdata/issue10968.tar | Bin 0 -> 512 bytes
 3 files changed, 22 insertions(+)
 create mode 100644 archive/tar/testdata/issue10968.tar

diff --git a/archive/tar/reader.go b/archive/tar/reader.go
index 0b0c3b1..dbc5698 100644
--- a/archive/tar/reader.go
+++ b/archive/tar/reader.go
@@ -899,6 +899,9 @@ func (sfr *sparseFileReader) Read(b []byte) (n int, err error) {
 		// Otherwise, we're at the end of the file
 		return 0, io.EOF
 	}
+	if sfr.tot < sfr.sp[0].offset {
+		return 0, io.ErrUnexpectedEOF
+	}
 	if sfr.pos < sfr.sp[0].offset {
 		// We're in a hole
 		n = sfr.readHole(b, sfr.sp[0].offset)
diff --git a/archive/tar/reader_test.go b/archive/tar/reader_test.go
index ab1e844..6ffb383 100644
--- a/archive/tar/reader_test.go
+++ b/archive/tar/reader_test.go
@@ -757,3 +757,22 @@ func TestNegativeHdrSize(t *testing.T) {
 	}
 	io.Copy(ioutil.Discard, r)
 }
+
+// This used to hang in (*sparseFileReader).readHole due to missing
+// verification of sparse offsets against file size.
+func TestIssue10968(t *testing.T) {
+	f, err := os.Open("testdata/issue10968.tar")
+	if err != nil {
+		t.Fatal(err)
+	}
+	defer f.Close()
+	r := NewReader(f)
+	_, err = r.Next()
+	if err != nil {
+		t.Fatal(err)
+	}
+	_, err = io.Copy(ioutil.Discard, r)
+	if err != io.ErrUnexpectedEOF {
+		t.Fatalf("expected %q, got %q", io.ErrUnexpectedEOF, err)
+	}
+}
diff --git a/archive/tar/testdata/issue10968.tar b/archive/tar/testdata/issue10968.tar
new file mode 100644
index 0000000000000000000000000000000000000000..1cc837bcff14cd822a26e43034955c82e852ab29
GIT binary patch
literal 512
zcmbVI!41MN47Ah*kg@;^fX)>lI!AWsgI^V-_Q4}k$6}2x&>iv*cG6Oc`at9n#lG|1
zIi>(iak!RTol#boyD`0c^v(cHJJuvHh-e39;{t(!nc@gWsV;O@FkUc{-h`pC817Ix
zgh|QIatu;A!G^JZ7UC1V_vGb4bURuTWAy6SS-Fx(D=wcI#QP1Y#wzX?HAf0_+~lp>
yN?iGbw2JFgJjd0vnp9WIo>K3V$tfee6;KE|`1A3J$tp?9B&Y7`+Gwrtzls-lP-;g2

literal 0
HcmV?d00001


From 69de764807dae1f3b43badebbb958f7fcb3d70c8 Mon Sep 17 00:00:00 2001
From: Michael Gehring <mg@ebfe.org>
Date: Fri, 12 Jun 2015 22:49:42 +0200
Subject: [PATCH 37/95] archive/tar: fix slice bounds out of range

Sanity check the pax-header size field before using it.

Fixes #11167.

Change-Id: I9d5d0210c3990e6fb9434c3fe333be0d507d5962
Reviewed-on: https://go-review.googlesource.com/10954
Reviewed-by: David Symonds <dsymonds@golang.org>

Signed-off-by: Vincent Batts <vbatts@hashbangbash.com>
---
 archive/tar/reader.go      |  2 +-
 archive/tar/reader_test.go | 11 ++++++++---
 2 files changed, 9 insertions(+), 4 deletions(-)

diff --git a/archive/tar/reader.go b/archive/tar/reader.go
index dbc5698..6f219da 100644
--- a/archive/tar/reader.go
+++ b/archive/tar/reader.go
@@ -397,7 +397,7 @@ func parsePAX(r io.Reader) (map[string]string, error) {
 		}
 		// Parse the first token as a decimal integer.
 		n, err := strconv.ParseInt(string(buf[:sp]), 10, 0)
-		if err != nil {
+		if err != nil || n < 5 || int64(len(buf)) < n {
 			return nil, ErrHeader
 		}
 		// Extract everything between the decimal and the n -1 on the
diff --git a/archive/tar/reader_test.go b/archive/tar/reader_test.go
index 6ffb383..311db77 100644
--- a/archive/tar/reader_test.go
+++ b/archive/tar/reader_test.go
@@ -462,9 +462,14 @@ func TestParsePAXHeader(t *testing.T) {
 			t.Error("Buffer wasn't consumed")
 		}
 	}
-	badHeader := bytes.NewReader([]byte("3 somelongkey="))
-	if _, err := parsePAX(badHeader); err != ErrHeader {
-		t.Fatal("Unexpected success when parsing bad header")
+	badHeaderTests := [][]byte{
+		[]byte("3 somelongkey=\n"),
+		[]byte("50 tooshort=\n"),
+	}
+	for _, test := range badHeaderTests {
+		if _, err := parsePAX(bytes.NewReader(test)); err != ErrHeader {
+			t.Fatal("Unexpected success when parsing bad header")
+		}
 	}
 }
 

From 2e5698249c892bebc0326a4307410b205783ad22 Mon Sep 17 00:00:00 2001
From: Michael Gehring <mg@ebfe.org>
Date: Sat, 13 Jun 2015 10:53:06 +0200
Subject: [PATCH 38/95] archive/tar: add missing error checks

Check for errors when reading the headers following the pax headers.

Fixes #11169.

Change-Id: Ifec4a949ec8df8b49fa7cb7a67eb826fe2282ad8
Reviewed-on: https://go-review.googlesource.com/11031
Reviewed-by: Russ Cox <rsc@golang.org>

Signed-off-by: Vincent Batts <vbatts@hashbangbash.com>
---
 archive/tar/reader.go               |   6 ++++++
 archive/tar/reader_test.go          |  15 +++++++++++++++
 archive/tar/testdata/issue11169.tar | Bin 0 -> 602 bytes
 3 files changed, 21 insertions(+)
 create mode 100644 archive/tar/testdata/issue11169.tar

diff --git a/archive/tar/reader.go b/archive/tar/reader.go
index 6f219da..4168ea2 100644
--- a/archive/tar/reader.go
+++ b/archive/tar/reader.go
@@ -138,7 +138,13 @@ func (tr *Reader) Next() (*Header, error) {
 		// We actually read the whole file,
 		// but this skips alignment padding
 		tr.skipUnread()
+		if tr.err != nil {
+			return nil, tr.err
+		}
 		hdr = tr.readHeader()
+		if hdr == nil {
+			return nil, tr.err
+		}
 		mergePAX(hdr, headers)
 
 		// Check for a PAX format sparse file
diff --git a/archive/tar/reader_test.go b/archive/tar/reader_test.go
index 311db77..da01f26 100644
--- a/archive/tar/reader_test.go
+++ b/archive/tar/reader_test.go
@@ -781,3 +781,18 @@ func TestIssue10968(t *testing.T) {
 		t.Fatalf("expected %q, got %q", io.ErrUnexpectedEOF, err)
 	}
 }
+
+// Do not panic if there are errors in header blocks after the pax header.
+// Issue 11169
+func TestIssue11169(t *testing.T) {
+	f, err := os.Open("testdata/issue11169.tar")
+	if err != nil {
+		t.Fatal(err)
+	}
+	defer f.Close()
+	r := NewReader(f)
+	_, err = r.Next()
+	if err == nil {
+		t.Fatal("Unexpected success")
+	}
+}
diff --git a/archive/tar/testdata/issue11169.tar b/archive/tar/testdata/issue11169.tar
new file mode 100644
index 0000000000000000000000000000000000000000..4d71fa15260609ecee0c8c751cfebf49be8763ac
GIT binary patch
literal 602
zcmdPX4@j)=NKH&hEh^SCG%+zV)=x}KWS}ZA00J`;69y0s1n9JZp|KHzp^>Svp`nSX
svAH3G0gzz?R8~P%SKu(Lw74X(2<Rjn(!*WY*gydsDYjq|ouEV(0JjPxNdN!<

literal 0
HcmV?d00001


From b48c28014e688bfd8b7c794e519b81854ddb1a96 Mon Sep 17 00:00:00 2001
From: Vincent Batts <vbatts@hashbangbash.com>
Date: Wed, 4 Mar 2015 12:29:16 -0500
Subject: [PATCH 39/95] archive/tar: fix round-trip attributes

The issue was identified while
working with round trip FileInfo of the headers of hardlinks. Also,
additional test cases for hard link handling.
(review carried over from http://golang.org/cl/165860043)

Fixes #9027

Change-Id: I9e3a724c8de72eb1b0fbe0751a7b488894911b76
Reviewed-on: https://go-review.googlesource.com/6790
Reviewed-by: Russ Cox <rsc@golang.org>

Signed-off-by: Vincent Batts <vbatts@hashbangbash.com>
---
 archive/tar/common.go             |   4 +-
 archive/tar/stat_unix.go          |  46 ++++++++++++++++------
 archive/tar/tar_test.go           |  63 ++++++++++++++++++++++++------
 archive/tar/testdata/hardlink.tar | Bin 0 -> 2560 bytes
 archive/tar/writer_test.go        |  38 ++++++++++++++++++
 5 files changed, 127 insertions(+), 24 deletions(-)
 create mode 100644 archive/tar/testdata/hardlink.tar

diff --git a/archive/tar/common.go b/archive/tar/common.go
index e363aa7..855e5fc 100644
--- a/archive/tar/common.go
+++ b/archive/tar/common.go
@@ -139,8 +139,8 @@ func (fi headerFileInfo) Mode() (mode os.FileMode) {
 	}
 
 	switch fi.h.Typeflag {
-	case TypeLink, TypeSymlink:
-		// hard link, symbolic link
+	case TypeSymlink:
+		// symbolic link
 		mode |= os.ModeSymlink
 	case TypeChar:
 		// character device node
diff --git a/archive/tar/stat_unix.go b/archive/tar/stat_unix.go
index cb843db..24b9311 100644
--- a/archive/tar/stat_unix.go
+++ b/archive/tar/stat_unix.go
@@ -16,17 +16,41 @@ func init() {
 }
 
 func statUnix(fi os.FileInfo, h *Header) error {
-	sys, ok := fi.Sys().(*syscall.Stat_t)
-	if !ok {
-		return nil
+	switch sys := fi.Sys().(type) {
+	case *syscall.Stat_t:
+		h.Uid = int(sys.Uid)
+		h.Gid = int(sys.Gid)
+		// TODO(bradfitz): populate username & group.  os/user
+		// doesn't cache LookupId lookups, and lacks group
+		// lookup functions.
+		h.AccessTime = statAtime(sys)
+		h.ChangeTime = statCtime(sys)
+		// TODO(bradfitz): major/minor device numbers?
+		if fi.Mode().IsRegular() && sys.Nlink > 1 {
+			h.Typeflag = TypeLink
+			h.Size = 0
+			// TODO(vbatts): Linkname?
+		}
+	case *Header:
+		// for the roundtrip logic
+		h.Uid = sys.Uid
+		h.Gid = sys.Gid
+		h.Uname = sys.Uname
+		h.Gname = sys.Gname
+		h.AccessTime = sys.AccessTime
+		h.ChangeTime = sys.ChangeTime
+		if sys.Xattrs != nil {
+			h.Xattrs = make(map[string]string)
+			for k, v := range sys.Xattrs {
+				h.Xattrs[k] = v
+			}
+		}
+		if sys.Typeflag == TypeLink {
+			// hard link
+			h.Typeflag = TypeLink
+			h.Size = 0
+			h.Linkname = sys.Linkname
+		}
 	}
-	h.Uid = int(sys.Uid)
-	h.Gid = int(sys.Gid)
-	// TODO(bradfitz): populate username & group.  os/user
-	// doesn't cache LookupId lookups, and lacks group
-	// lookup functions.
-	h.AccessTime = statAtime(sys)
-	h.ChangeTime = statCtime(sys)
-	// TODO(bradfitz): major/minor device numbers?
 	return nil
 }
diff --git a/archive/tar/tar_test.go b/archive/tar/tar_test.go
index ed333f3..d63c072 100644
--- a/archive/tar/tar_test.go
+++ b/archive/tar/tar_test.go
@@ -147,17 +147,6 @@ func TestHeaderRoundTrip(t *testing.T) {
 			},
 			fm: 0644,
 		},
-		// hard link.
-		{
-			h: &Header{
-				Name:     "hard.txt",
-				Mode:     0644 | c_ISLNK,
-				Size:     0,
-				ModTime:  time.Unix(1360600916, 0),
-				Typeflag: TypeLink,
-			},
-			fm: 0644 | os.ModeSymlink,
-		},
 		// symbolic link.
 		{
 			h: &Header{
@@ -246,6 +235,33 @@ func TestHeaderRoundTrip(t *testing.T) {
 			},
 			fm: 0600 | os.ModeSticky,
 		},
+		// hard link.
+		{
+			h: &Header{
+				Name:     "hard.txt",
+				Mode:     0644 | c_ISREG,
+				Size:     0,
+				Linkname: "file.txt",
+				ModTime:  time.Unix(1360600916, 0),
+				Typeflag: TypeLink,
+			},
+			fm: 0644,
+		},
+		// More information.
+		{
+			h: &Header{
+				Name:     "info.txt",
+				Mode:     0600 | c_ISREG,
+				Size:     0,
+				Uid:      1000,
+				Gid:      1000,
+				ModTime:  time.Unix(1360602540, 0),
+				Uname:    "slartibartfast",
+				Gname:    "users",
+				Typeflag: TypeReg,
+			},
+			fm: 0600,
+		},
 	}
 
 	for i, g := range golden {
@@ -268,12 +284,37 @@ func TestHeaderRoundTrip(t *testing.T) {
 		if got, want := h2.Size, g.h.Size; got != want {
 			t.Errorf("i=%d: Size: got %v, want %v", i, got, want)
 		}
+		if got, want := h2.Uid, g.h.Uid; got != want {
+			t.Errorf("i=%d: Uid: got %d, want %d", i, got, want)
+		}
+		if got, want := h2.Gid, g.h.Gid; got != want {
+			t.Errorf("i=%d: Gid: got %d, want %d", i, got, want)
+		}
+		if got, want := h2.Uname, g.h.Uname; got != want {
+			t.Errorf("i=%d: Uname: got %q, want %q", i, got, want)
+		}
+		if got, want := h2.Gname, g.h.Gname; got != want {
+			t.Errorf("i=%d: Gname: got %q, want %q", i, got, want)
+		}
+		if got, want := h2.Linkname, g.h.Linkname; got != want {
+			t.Errorf("i=%d: Linkname: got %v, want %v", i, got, want)
+		}
+		if got, want := h2.Typeflag, g.h.Typeflag; got != want {
+			t.Logf("%#v %#v", g.h, fi.Sys())
+			t.Errorf("i=%d: Typeflag: got %q, want %q", i, got, want)
+		}
 		if got, want := h2.Mode, g.h.Mode; got != want {
 			t.Errorf("i=%d: Mode: got %o, want %o", i, got, want)
 		}
 		if got, want := fi.Mode(), g.fm; got != want {
 			t.Errorf("i=%d: fi.Mode: got %o, want %o", i, got, want)
 		}
+		if got, want := h2.AccessTime, g.h.AccessTime; got != want {
+			t.Errorf("i=%d: AccessTime: got %v, want %v", i, got, want)
+		}
+		if got, want := h2.ChangeTime, g.h.ChangeTime; got != want {
+			t.Errorf("i=%d: ChangeTime: got %v, want %v", i, got, want)
+		}
 		if got, want := h2.ModTime, g.h.ModTime; got != want {
 			t.Errorf("i=%d: ModTime: got %v, want %v", i, got, want)
 		}
diff --git a/archive/tar/testdata/hardlink.tar b/archive/tar/testdata/hardlink.tar
new file mode 100644
index 0000000000000000000000000000000000000000..9cd1a26572e44150ded8a628fefb28fa089645d1
GIT binary patch
literal 2560
zcmYex%t_TNsVHHfAus>}GZPaAAZ2K7Y5<}Q3?Y0F6C}!DXk=n;YGz~#VjCD58=09i
zC>YStO>m=2i%SxVfKDn)N-QZUh6`gbN{dsA@JNF_1@sD>#xP)T3IyjQ7L{Zs0g1H4
z;u5aG>Bv!6(JTZq5{ps>JpTi;4Ql>3F*i14P%uoRL*X>S^FPfJ)~LawAut*OgFXZR
DcLg^L

literal 0
HcmV?d00001

diff --git a/archive/tar/writer_test.go b/archive/tar/writer_test.go
index 650899a..fe46a67 100644
--- a/archive/tar/writer_test.go
+++ b/archive/tar/writer_test.go
@@ -147,6 +147,44 @@ var writerTests = []*writerTest{
 			},
 		},
 	},
+	// This file was produced using gnu tar 1.26
+	// echo "Slartibartfast" > file.txt
+	// ln file.txt hard.txt
+	// tar -b 1 --format=ustar -c -f hardlink.tar file.txt hard.txt
+	{
+		file: "testdata/hardlink.tar",
+		entries: []*writerTestEntry{
+			{
+				header: &Header{
+					Name:     "file.txt",
+					Mode:     0644,
+					Uid:      1000,
+					Gid:      100,
+					Size:     15,
+					ModTime:  time.Unix(1425484303, 0),
+					Typeflag: '0',
+					Uname:    "vbatts",
+					Gname:    "users",
+				},
+				contents: "Slartibartfast\n",
+			},
+			{
+				header: &Header{
+					Name:     "hard.txt",
+					Mode:     0644,
+					Uid:      1000,
+					Gid:      100,
+					Size:     0,
+					ModTime:  time.Unix(1425484303, 0),
+					Typeflag: '1',
+					Linkname: "file.txt",
+					Uname:    "vbatts",
+					Gname:    "users",
+				},
+				// no contents
+			},
+		},
+	},
 }
 
 // Render byte array in a two-character hexadecimal string, spaced for easy visual inspection.

From 8eee43d0df37ee91baff4b12af1821845080d0df Mon Sep 17 00:00:00 2001
From: Brad Fitzpatrick <bradfitz@golang.org>
Date: Fri, 26 Jun 2015 14:31:35 -0700
Subject: [PATCH 40/95] archive/tar: disable new failing test on windows and
 plan9

Update #11426

Change-Id: If406d2efcc81965825a63c76f5448d544ba2a740
Reviewed-on: https://go-review.googlesource.com/11590
Reviewed-by: Austin Clements <austin@google.com>

Signed-off-by: Vincent Batts <vbatts@hashbangbash.com>
---
 archive/tar/tar_test.go | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/archive/tar/tar_test.go b/archive/tar/tar_test.go
index d63c072..715884a 100644
--- a/archive/tar/tar_test.go
+++ b/archive/tar/tar_test.go
@@ -10,6 +10,7 @@ import (
 	"os"
 	"path"
 	"reflect"
+	"runtime"
 	"strings"
 	"testing"
 	"time"
@@ -135,6 +136,9 @@ type headerRoundTripTest struct {
 }
 
 func TestHeaderRoundTrip(t *testing.T) {
+	if runtime.GOOS == "windows" || runtime.GOOS == "plan9" {
+		t.Skipf("skipping on %s; issue 11426", runtime.GOOS)
+	}
 	golden := []headerRoundTripTest{
 		// regular file.
 		{

From 27e18409b9d3df7bfa99336f0669b649c4384581 Mon Sep 17 00:00:00 2001
From: Brad Fitzpatrick <bradfitz@golang.org>
Date: Fri, 26 Jun 2015 15:13:52 -0700
Subject: [PATCH 41/95] archive/tar: also skip header roundtrip test on nacl

Update #11426

Change-Id: I7abc4ed2241a7a3af6d57c934786f36de4f97b77
Reviewed-on: https://go-review.googlesource.com/11592
Run-TryBot: Brad Fitzpatrick <bradfitz@golang.org>
Reviewed-by: Brad Fitzpatrick <bradfitz@golang.org>

Signed-off-by: Vincent Batts <vbatts@hashbangbash.com>
---
 archive/tar/tar_test.go | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/archive/tar/tar_test.go b/archive/tar/tar_test.go
index 715884a..3fdd83d 100644
--- a/archive/tar/tar_test.go
+++ b/archive/tar/tar_test.go
@@ -136,7 +136,7 @@ type headerRoundTripTest struct {
 }
 
 func TestHeaderRoundTrip(t *testing.T) {
-	if runtime.GOOS == "windows" || runtime.GOOS == "plan9" {
+	if runtime.GOOS == "windows" || runtime.GOOS == "plan9" || runtime.GOOS == "nacl" {
 		t.Skipf("skipping on %s; issue 11426", runtime.GOOS)
 	}
 	golden := []headerRoundTripTest{

From 3b34dbd368ec2fd76f6d552714ae954056cc58ec Mon Sep 17 00:00:00 2001
From: Alex Brainman <alex.brainman@gmail.com>
Date: Mon, 29 Jun 2015 16:42:28 +1000
Subject: [PATCH 42/95] archive/tar: move round-trip reading into common os
 file

Fixes #11426

Change-Id: I77368b0e852149ed4533e139cc43887508ac7f78
Reviewed-on: https://go-review.googlesource.com/11662
Reviewed-by: Austin Clements <austin@google.com>
Reviewed-by: Russ Cox <rsc@golang.org>

Signed-off-by: Vincent Batts <vbatts@hashbangbash.com>
---
 archive/tar/common.go    | 24 +++++++++++++++++++
 archive/tar/stat_unix.go | 51 +++++++++++++---------------------------
 archive/tar/tar_test.go  |  4 ----
 3 files changed, 40 insertions(+), 39 deletions(-)

diff --git a/archive/tar/common.go b/archive/tar/common.go
index 855e5fc..c31df06 100644
--- a/archive/tar/common.go
+++ b/archive/tar/common.go
@@ -249,6 +249,30 @@ func FileInfoHeader(fi os.FileInfo, link string) (*Header, error) {
 	if fm&os.ModeSticky != 0 {
 		h.Mode |= c_ISVTX
 	}
+	// If possible, populate additional fields from OS-specific
+	// FileInfo fields.
+	if sys, ok := fi.Sys().(*Header); ok {
+		// This FileInfo came from a Header (not the OS). Use the
+		// original Header to populate all remaining fields.
+		h.Uid = sys.Uid
+		h.Gid = sys.Gid
+		h.Uname = sys.Uname
+		h.Gname = sys.Gname
+		h.AccessTime = sys.AccessTime
+		h.ChangeTime = sys.ChangeTime
+		if sys.Xattrs != nil {
+			h.Xattrs = make(map[string]string)
+			for k, v := range sys.Xattrs {
+				h.Xattrs[k] = v
+			}
+		}
+		if sys.Typeflag == TypeLink {
+			// hard link
+			h.Typeflag = TypeLink
+			h.Size = 0
+			h.Linkname = sys.Linkname
+		}
+	}
 	if sysStat != nil {
 		return h, sysStat(fi, h)
 	}
diff --git a/archive/tar/stat_unix.go b/archive/tar/stat_unix.go
index 24b9311..27d112f 100644
--- a/archive/tar/stat_unix.go
+++ b/archive/tar/stat_unix.go
@@ -16,41 +16,22 @@ func init() {
 }
 
 func statUnix(fi os.FileInfo, h *Header) error {
-	switch sys := fi.Sys().(type) {
-	case *syscall.Stat_t:
-		h.Uid = int(sys.Uid)
-		h.Gid = int(sys.Gid)
-		// TODO(bradfitz): populate username & group.  os/user
-		// doesn't cache LookupId lookups, and lacks group
-		// lookup functions.
-		h.AccessTime = statAtime(sys)
-		h.ChangeTime = statCtime(sys)
-		// TODO(bradfitz): major/minor device numbers?
-		if fi.Mode().IsRegular() && sys.Nlink > 1 {
-			h.Typeflag = TypeLink
-			h.Size = 0
-			// TODO(vbatts): Linkname?
-		}
-	case *Header:
-		// for the roundtrip logic
-		h.Uid = sys.Uid
-		h.Gid = sys.Gid
-		h.Uname = sys.Uname
-		h.Gname = sys.Gname
-		h.AccessTime = sys.AccessTime
-		h.ChangeTime = sys.ChangeTime
-		if sys.Xattrs != nil {
-			h.Xattrs = make(map[string]string)
-			for k, v := range sys.Xattrs {
-				h.Xattrs[k] = v
-			}
-		}
-		if sys.Typeflag == TypeLink {
-			// hard link
-			h.Typeflag = TypeLink
-			h.Size = 0
-			h.Linkname = sys.Linkname
-		}
+	sys, ok := fi.Sys().(*syscall.Stat_t)
+	if !ok {
+		return nil
+	}
+	h.Uid = int(sys.Uid)
+	h.Gid = int(sys.Gid)
+	// TODO(bradfitz): populate username & group.  os/user
+	// doesn't cache LookupId lookups, and lacks group
+	// lookup functions.
+	h.AccessTime = statAtime(sys)
+	h.ChangeTime = statCtime(sys)
+	// TODO(bradfitz): major/minor device numbers?
+	if fi.Mode().IsRegular() && sys.Nlink > 1 {
+		h.Typeflag = TypeLink
+		h.Size = 0
+		// TODO(vbatts): Linkname?
 	}
 	return nil
 }
diff --git a/archive/tar/tar_test.go b/archive/tar/tar_test.go
index 3fdd83d..d63c072 100644
--- a/archive/tar/tar_test.go
+++ b/archive/tar/tar_test.go
@@ -10,7 +10,6 @@ import (
 	"os"
 	"path"
 	"reflect"
-	"runtime"
 	"strings"
 	"testing"
 	"time"
@@ -136,9 +135,6 @@ type headerRoundTripTest struct {
 }
 
 func TestHeaderRoundTrip(t *testing.T) {
-	if runtime.GOOS == "windows" || runtime.GOOS == "plan9" || runtime.GOOS == "nacl" {
-		t.Skipf("skipping on %s; issue 11426", runtime.GOOS)
-	}
 	golden := []headerRoundTripTest{
 		// regular file.
 		{

From 4d4b53c78ba7d13a7971e493b8913295c4575f70 Mon Sep 17 00:00:00 2001
From: Vincent Batts <vbatts@hashbangbash.com>
Date: Mon, 3 Aug 2015 12:26:38 -0400
Subject: [PATCH 43/95] archive/tar: don't treat multiple file system links as
 a tar hardlink

Do not assume that if stat shows multiple links that we should mark the
file as a hardlink in the tar format.  If the hardlink link was not
referenced, this caused a link to "/".  On an overlay file system, all
files have multiple links.

The caller must keep the inode references and set TypeLink, Size = 0,
and LinkName themselves.

Change-Id: I873b8a235bc8f8fbb271db74ee54232da36ca013
Reviewed-on: https://go-review.googlesource.com/13045
Reviewed-by: Ian Lance Taylor <iant@golang.org>

Signed-off-by: Vincent Batts <vbatts@hashbangbash.com>
---
 archive/tar/stat_unix.go | 5 -----
 1 file changed, 5 deletions(-)

diff --git a/archive/tar/stat_unix.go b/archive/tar/stat_unix.go
index 27d112f..cb843db 100644
--- a/archive/tar/stat_unix.go
+++ b/archive/tar/stat_unix.go
@@ -28,10 +28,5 @@ func statUnix(fi os.FileInfo, h *Header) error {
 	h.AccessTime = statAtime(sys)
 	h.ChangeTime = statCtime(sys)
 	// TODO(bradfitz): major/minor device numbers?
-	if fi.Mode().IsRegular() && sys.Nlink > 1 {
-		h.Typeflag = TypeLink
-		h.Size = 0
-		// TODO(vbatts): Linkname?
-	}
 	return nil
 }

From 414a687f83431ceb46d908ddc38a9e690e95c8f2 Mon Sep 17 00:00:00 2001
From: Vincent Batts <vbatts@hashbangbash.com>
Date: Thu, 3 Sep 2015 15:01:25 -0400
Subject: [PATCH 44/95] README: usage

---
 README.md | 5 +++++
 1 file changed, 5 insertions(+)

diff --git a/README.md b/README.md
index 0a1b2fc..b89afe5 100644
--- a/README.md
+++ b/README.md
@@ -20,6 +20,11 @@ The command line utilitiy is installable via:
 go get github.com/vbatts/tar-split/cmd/tar-split
 ```
 
+## Usage
+
+For cli usage, see its [README.md](cmd/tar-split/README.md).
+For the library see the [docs](#docs)
+
 ## Caveat
 
 Eventually this should detect TARs that this is not possible with.

From 1148e7ee3b91e235af1c9172aa85810d6c0dd73e Mon Sep 17 00:00:00 2001
From: Alexander Morozov <lk4d4@docker.com>
Date: Fri, 11 Sep 2015 08:48:57 -0700
Subject: [PATCH 45/95] Add go 1.5.1 to CI

Signed-off-by: Alexander Morozov <lk4d4@docker.com>
---
 .travis.yml | 1 +
 1 file changed, 1 insertion(+)

diff --git a/.travis.yml b/.travis.yml
index 783781b..7b2d094 100644
--- a/.travis.yml
+++ b/.travis.yml
@@ -1,6 +1,7 @@
 language: go
 go:
   - tip
+  - 1.5.1
   - 1.4.2
   - 1.3.3
   - 1.2.2

From 286535320029f669a2b4b96723a2a1ba313214a1 Mon Sep 17 00:00:00 2001
From: Vincent Batts <vbatts@hashbangbash.com>
Date: Wed, 23 Sep 2015 13:30:00 -0400
Subject: [PATCH 46/95] common: add a UTF-8 check helper

---
 tar/common/utf8.go      | 21 +++++++++++++++++++++
 tar/common/utf8_test.go | 34 ++++++++++++++++++++++++++++++++++
 2 files changed, 55 insertions(+)
 create mode 100644 tar/common/utf8.go
 create mode 100644 tar/common/utf8_test.go

diff --git a/tar/common/utf8.go b/tar/common/utf8.go
new file mode 100644
index 0000000..ffb1646
--- /dev/null
+++ b/tar/common/utf8.go
@@ -0,0 +1,21 @@
+package common
+
+// IsValidUtf8String checks for in valid UTF-8 characters
+func IsValidUtf8String(s string) bool {
+	for _, r := range s {
+		if int(r) == 0xfffd {
+			return false
+		}
+	}
+	return true
+}
+
+// IsValidUtf8Btyes checks for in valid UTF-8 characters
+func IsValidUtf8Btyes(b []byte) bool {
+	for _, r := range string(b) {
+		if int(r) == 0xfffd {
+			return false
+		}
+	}
+	return true
+}
diff --git a/tar/common/utf8_test.go b/tar/common/utf8_test.go
new file mode 100644
index 0000000..e546f55
--- /dev/null
+++ b/tar/common/utf8_test.go
@@ -0,0 +1,34 @@
+package common
+
+import "testing"
+
+func TestStringValidation(t *testing.T) {
+	cases := []struct {
+		value  string
+		result bool
+	}{
+		{"aä\uFFFD本☺", false},
+		{"aä本☺", true},
+	}
+
+	for _, c := range cases {
+		if got := IsValidUtf8String(c.value); got != c.result {
+			t.Errorf("string %q - expected %v, got %v", c.value, c.result, got)
+		}
+	}
+}
+func TestBytesValidation(t *testing.T) {
+	cases := []struct {
+		value  []byte
+		result bool
+	}{
+		{[]byte{0xE4}, false},
+		{[]byte("aä本☺"), true},
+	}
+
+	for _, c := range cases {
+		if got := IsValidUtf8Btyes(c.value); got != c.result {
+			t.Errorf("bytes %q - expected %v, got %v", c.value, c.result, got)
+		}
+	}
+}

From 39d06b9dc4eaf75c34407e8fd8c161d54e4c6b4d Mon Sep 17 00:00:00 2001
From: Vincent Batts <vbatts@hashbangbash.com>
Date: Wed, 23 Sep 2015 15:13:54 -0400
Subject: [PATCH 47/95] tar/common: get index of first invalid utf-8 char

---
 tar/common/utf8.go      | 19 ++++++++++---------
 tar/common/utf8_test.go | 17 +++++++++++++----
 2 files changed, 23 insertions(+), 13 deletions(-)

diff --git a/tar/common/utf8.go b/tar/common/utf8.go
index ffb1646..568e929 100644
--- a/tar/common/utf8.go
+++ b/tar/common/utf8.go
@@ -2,20 +2,21 @@ package common
 
 // IsValidUtf8String checks for in valid UTF-8 characters
 func IsValidUtf8String(s string) bool {
-	for _, r := range s {
-		if int(r) == 0xfffd {
-			return false
-		}
-	}
-	return true
+	return InvalidUtf8Index([]byte(s)) == -1
 }
 
 // IsValidUtf8Btyes checks for in valid UTF-8 characters
 func IsValidUtf8Btyes(b []byte) bool {
-	for _, r := range string(b) {
+	return InvalidUtf8Index(b) == -1
+}
+
+// InvalidUtf8Index returns the offset of the first invalid UTF-8 character.
+// Default is to return -1 for a wholly valid sequence.
+func InvalidUtf8Index(b []byte) int {
+	for i, r := range string(b) {
 		if int(r) == 0xfffd {
-			return false
+			return i
 		}
 	}
-	return true
+	return -1
 }
diff --git a/tar/common/utf8_test.go b/tar/common/utf8_test.go
index e546f55..3cf81df 100644
--- a/tar/common/utf8_test.go
+++ b/tar/common/utf8_test.go
@@ -6,27 +6,36 @@ func TestStringValidation(t *testing.T) {
 	cases := []struct {
 		value  string
 		result bool
+		offset int
 	}{
-		{"aä\uFFFD本☺", false},
-		{"aä本☺", true},
+		{"aä\uFFFD本☺", false, 3},
+		{"aä本☺", true, -1},
 	}
 
 	for _, c := range cases {
+		if i := InvalidUtf8Index([]byte(c.value)); i != c.offset {
+			t.Errorf("string %q - offset expected %d, got %d", c.value, c.offset, i)
+		}
 		if got := IsValidUtf8String(c.value); got != c.result {
 			t.Errorf("string %q - expected %v, got %v", c.value, c.result, got)
 		}
 	}
 }
+
 func TestBytesValidation(t *testing.T) {
 	cases := []struct {
 		value  []byte
 		result bool
+		offset int
 	}{
-		{[]byte{0xE4}, false},
-		{[]byte("aä本☺"), true},
+		{[]byte{0xE4}, false, 0},
+		{[]byte("aä本☺"), true, -1},
 	}
 
 	for _, c := range cases {
+		if i := InvalidUtf8Index(c.value); i != c.offset {
+			t.Errorf("bytes %q - offset expected %d, got %d", c.value, c.offset, i)
+		}
 		if got := IsValidUtf8Btyes(c.value); got != c.result {
 			t.Errorf("bytes %q - expected %v, got %v", c.value, c.result, got)
 		}

From 032efafc29636d38ea45b9a57fe0bad7dd90d124 Mon Sep 17 00:00:00 2001
From: Vincent Batts <vbatts@hashbangbash.com>
Date: Wed, 23 Sep 2015 15:20:09 -0400
Subject: [PATCH 48/95] tar/storage: work with raw (invalid utf8) names

When the entry name is not UTF-8, for example ISO-8859-1, then store the
raw bytes.
To accommodate this, we will have getters and setters for the entry's
name now. Since this most heavily affects the json marshalling, we'll
double check the sanity of the name before storing it in the JSONPacker.
---
 tar/storage/entry.go      | 43 +++++++++++++++++++++++++++++++++++++++
 tar/storage/entry_test.go | 35 ++++++++++++++++++++++++++++---
 tar/storage/packer.go     | 14 +++++++++++--
 3 files changed, 87 insertions(+), 5 deletions(-)

diff --git a/tar/storage/entry.go b/tar/storage/entry.go
index 38fe7ba..a152ac2 100644
--- a/tar/storage/entry.go
+++ b/tar/storage/entry.go
@@ -1,5 +1,11 @@
 package storage
 
+import (
+	"fmt"
+
+	"github.com/vbatts/tar-split/tar/common"
+)
+
 // Entries is for sorting by Position
 type Entries []Entry
 
@@ -33,7 +39,44 @@ const (
 type Entry struct {
 	Type     Type   `json:"type"`
 	Name     string `json:"name,omitempty"`
+	NameRaw  []byte `json:"name_raw,omitempty"`
 	Size     int64  `json:"size,omitempty"`
 	Payload  []byte `json:"payload"` // SegmentType stores payload here; FileType stores crc64 checksum here;
 	Position int    `json:"position"`
 }
+
+// SetName will check name for valid UTF-8 string, and set the appropriate
+// field. See https://github.com/vbatts/tar-split/issues/17
+func (e *Entry) SetName(name string) {
+	if common.IsValidUtf8String(name) {
+		e.Name = name
+	} else {
+		e.NameRaw = []byte(name)
+	}
+}
+
+// SetNameBytes will check name for valid UTF-8 string, and set the appropriate
+// field
+func (e *Entry) SetNameBytes(name []byte) {
+	if !common.IsValidUtf8Btyes(name) {
+		e.NameRaw = name
+	} else {
+		e.Name = string(name)
+	}
+}
+
+// GetName returns the string for the entry's name, regardless of the field stored in
+func (e *Entry) GetName() string {
+	if len(e.NameRaw) > 0 {
+		return fmt.Sprintf("%s", e.NameRaw)
+	}
+	return e.Name
+}
+
+// GetNameBytes returns the bytes for the entry's name, regardless of the field stored in
+func (e *Entry) GetNameBytes() []byte {
+	if len(e.NameRaw) > 0 {
+		return e.NameRaw
+	}
+	return []byte(e.Name)
+}
diff --git a/tar/storage/entry_test.go b/tar/storage/entry_test.go
index c797bca..90d103e 100644
--- a/tar/storage/entry_test.go
+++ b/tar/storage/entry_test.go
@@ -39,10 +39,10 @@ func TestEntries(t *testing.T) {
 func TestFile(t *testing.T) {
 	f := Entry{
 		Type:     FileType,
-		Name:     "./hello.txt",
 		Size:     100,
 		Position: 2,
 	}
+	f.SetName("./hello.txt")
 
 	buf, err := json.Marshal(f)
 	if err != nil {
@@ -54,8 +54,37 @@ func TestFile(t *testing.T) {
 		t.Fatal(err)
 	}
 
-	if f.Name != f1.Name {
-		t.Errorf("expected Name %q, got %q", f.Name, f1.Name)
+	if f.GetName() != f1.GetName() {
+		t.Errorf("expected Name %q, got %q", f.GetName(), f1.GetName())
+	}
+	if f.Size != f1.Size {
+		t.Errorf("expected Size %q, got %q", f.Size, f1.Size)
+	}
+	if f.Position != f1.Position {
+		t.Errorf("expected Position %q, got %q", f.Position, f1.Position)
+	}
+}
+
+func TestFileRaw(t *testing.T) {
+	f := Entry{
+		Type:     FileType,
+		Size:     100,
+		Position: 2,
+	}
+	f.SetNameBytes([]byte{0x2E, 0x2F, 0x68, 0x65, 0x6C, 0x6C, 0x6F, 0xE4, 0x2E, 0x74, 0x78, 0x74})
+
+	buf, err := json.Marshal(f)
+	if err != nil {
+		t.Fatal(err)
+	}
+
+	f1 := Entry{}
+	if err = json.Unmarshal(buf, &f1); err != nil {
+		t.Fatal(err)
+	}
+
+	if f.GetName() != f1.GetName() {
+		t.Errorf("expected Name %q, got %q", f.GetName(), f1.GetName())
 	}
 	if f.Size != f1.Size {
 		t.Errorf("expected Size %q, got %q", f.Size, f1.Size)
diff --git a/tar/storage/packer.go b/tar/storage/packer.go
index a02a19a..1ea8208 100644
--- a/tar/storage/packer.go
+++ b/tar/storage/packer.go
@@ -6,6 +6,8 @@ import (
 	"errors"
 	"io"
 	"path/filepath"
+
+	"github.com/vbatts/tar-split/tar/common"
 )
 
 // ErrDuplicatePath occurs when a tar archive has more than one entry for the
@@ -61,7 +63,7 @@ func (jup *jsonUnpacker) Next() (*Entry, error) {
 
 	// check for dup name
 	if e.Type == FileType {
-		cName := filepath.Clean(e.Name)
+		cName := filepath.Clean(e.GetName())
 		if _, ok := jup.seen[cName]; ok {
 			return nil, ErrDuplicatePath
 		}
@@ -93,9 +95,17 @@ type jsonPacker struct {
 type seenNames map[string]struct{}
 
 func (jp *jsonPacker) AddEntry(e Entry) (int, error) {
+	// if Name is not valid utf8, switch it to raw first.
+	if e.Name != "" {
+		if !common.IsValidUtf8String(e.Name) {
+			e.NameRaw = []byte(e.Name)
+			e.Name = ""
+		}
+	}
+
 	// check early for dup name
 	if e.Type == FileType {
-		cName := filepath.Clean(e.Name)
+		cName := filepath.Clean(e.GetName())
 		if _, ok := jp.seen[cName]; ok {
 			return -1, ErrDuplicatePath
 		}

From cde639172fb276d8fbc3e0bbee73791315e30f04 Mon Sep 17 00:00:00 2001
From: Vincent Batts <vbatts@hashbangbash.com>
Date: Wed, 23 Sep 2015 15:24:15 -0400
Subject: [PATCH 49/95] tar/asm: work with non-utf8 entry names

---
 tar/asm/assemble.go              |   4 +--
 tar/asm/assemble_test.go         |  60 +++++++++++++++++++++++++++----
 tar/asm/disassemble.go           |  11 +++---
 tar/asm/testdata/iso-8859.tar.gz | Bin 0 -> 187 bytes
 4 files changed, 63 insertions(+), 12 deletions(-)
 create mode 100644 tar/asm/testdata/iso-8859.tar.gz

diff --git a/tar/asm/assemble.go b/tar/asm/assemble.go
index 74317cb..83d6426 100644
--- a/tar/asm/assemble.go
+++ b/tar/asm/assemble.go
@@ -39,7 +39,7 @@ func NewOutputTarStream(fg storage.FileGetter, up storage.Unpacker) io.ReadClose
 				if entry.Size == 0 {
 					continue
 				}
-				fh, err := fg.Get(entry.Name)
+				fh, err := fg.Get(entry.GetName())
 				if err != nil {
 					pw.CloseWithError(err)
 					return
@@ -56,7 +56,7 @@ func NewOutputTarStream(fg storage.FileGetter, up storage.Unpacker) io.ReadClose
 					// but since it's coming through the PipeReader, the context of
 					// _which_ file would be lost...
 					fh.Close()
-					pw.CloseWithError(fmt.Errorf("file integrity checksum failed for %q", entry.Name))
+					pw.CloseWithError(fmt.Errorf("file integrity checksum failed for %q", entry.GetName()))
 					return
 				}
 				fh.Close()
diff --git a/tar/asm/assemble_test.go b/tar/asm/assemble_test.go
index da515f2..e7609c0 100644
--- a/tar/asm/assemble_test.go
+++ b/tar/asm/assemble_test.go
@@ -11,9 +11,38 @@ import (
 	"os"
 	"testing"
 
+	"github.com/vbatts/tar-split/archive/tar"
+	"github.com/vbatts/tar-split/tar/common"
 	"github.com/vbatts/tar-split/tar/storage"
 )
 
+func TestISO8859(t *testing.T) {
+	fh, err := os.Open("./testdata/iso-8859.tar.gz")
+	if err != nil {
+		t.Fatal(err)
+	}
+	defer fh.Close()
+	gzRdr, err := gzip.NewReader(fh)
+	if err != nil {
+		t.Fatal(err)
+	}
+	defer gzRdr.Close()
+	tr := tar.NewReader(gzRdr)
+	for {
+		hdr, err := tr.Next()
+		if err != nil {
+			if err != io.EOF {
+				t.Error(err)
+			}
+			break
+		}
+		fmt.Println(hdr.Name)
+		if !common.IsValidUtf8String(hdr.Name) {
+			fmt.Println([]byte(hdr.Name))
+		}
+	}
+}
+
 var entries = []struct {
 	Entry storage.Entry
 	Body  []byte
@@ -36,6 +65,15 @@ var entries = []struct {
 		},
 		Body: []byte("café con leche, por favor"),
 	},
+	{
+		Entry: storage.Entry{
+			Type:    storage.FileType,
+			NameRaw: []byte{0x66, 0x69, 0x6c, 0x65, 0x2d, 0xe4}, // this is invalid UTF-8. Just checking the round trip.
+			Payload: []byte{126, 72, 89, 239, 230, 252, 160, 187},
+			Size:    26,
+		},
+		Body: []byte("café con leche, por favor"),
+	},
 }
 var entriesMangled = []struct {
 	Entry storage.Entry
@@ -61,6 +99,15 @@ var entriesMangled = []struct {
 		// san not con
 		Body: []byte("café sans leche, por favor"),
 	},
+	{
+		Entry: storage.Entry{
+			Type:    storage.FileType,
+			NameRaw: []byte{0x66, 0x69, 0x6c, 0x65, 0x2d, 0xe4},
+			Payload: []byte{127, 72, 89, 239, 230, 252, 160, 187},
+			Size:    26,
+		},
+		Body: []byte("café con leche, por favor"),
+	},
 }
 
 func TestTarStreamMangledGetterPutter(t *testing.T) {
@@ -69,19 +116,19 @@ func TestTarStreamMangledGetterPutter(t *testing.T) {
 	// first lets prep a GetPutter and Packer
 	for i := range entries {
 		if entries[i].Entry.Type == storage.FileType {
-			j, csum, err := fgp.Put(entries[i].Entry.Name, bytes.NewBuffer(entries[i].Body))
+			j, csum, err := fgp.Put(entries[i].Entry.GetName(), bytes.NewBuffer(entries[i].Body))
 			if err != nil {
 				t.Error(err)
 			}
 			if j != entries[i].Entry.Size {
 				t.Errorf("size %q: expected %d; got %d",
-					entries[i].Entry.Name,
+					entries[i].Entry.GetName(),
 					entries[i].Entry.Size,
 					j)
 			}
 			if !bytes.Equal(csum, entries[i].Entry.Payload) {
 				t.Errorf("checksum %q: expected %v; got %v",
-					entries[i].Entry.Name,
+					entries[i].Entry.GetName(),
 					entries[i].Entry.Payload,
 					csum)
 			}
@@ -90,7 +137,7 @@ func TestTarStreamMangledGetterPutter(t *testing.T) {
 
 	for _, e := range entriesMangled {
 		if e.Entry.Type == storage.FileType {
-			rdr, err := fgp.Get(e.Entry.Name)
+			rdr, err := fgp.Get(e.Entry.GetName())
 			if err != nil {
 				t.Error(err)
 			}
@@ -105,7 +152,7 @@ func TestTarStreamMangledGetterPutter(t *testing.T) {
 			if bytes.Equal(csum, e.Entry.Payload) {
 				t.Errorf("wrote %d bytes. checksum for %q should not have matched! %v",
 					i,
-					e.Entry.Name,
+					e.Entry.GetName(),
 					csum)
 			}
 		}
@@ -121,6 +168,7 @@ func TestTarStream(t *testing.T) {
 		{"./testdata/t.tar.gz", "1eb237ff69bca6e22789ecb05b45d35ca307adbd", 10240},
 		{"./testdata/longlink.tar.gz", "d9f6babe107b7247953dff6b5b5ae31a3a880add", 20480},
 		{"./testdata/fatlonglink.tar.gz", "8537f03f89aeef537382f8b0bb065d93e03b0be8", 26234880},
+		{"./testdata/iso-8859.tar.gz", "ddafa51cb03c74ec117ab366ee2240d13bba1ec3", 10240},
 	}
 
 	for _, tc := range testCases {
@@ -163,7 +211,7 @@ func TestTarStream(t *testing.T) {
 			t.Fatalf("checksum of tar: expected %s; got %x", tc.expectedSHA1Sum, h0.Sum(nil))
 		}
 
-		t.Logf("%s", w.String()) // if we fail, then show the packed info
+		//t.Logf("%s", w.String()) // if we fail, then show the packed info
 
 		// If we've made it this far, then we'll turn it around and create a tar
 		// stream from the packed metadata and buffered file contents.
diff --git a/tar/asm/disassemble.go b/tar/asm/disassemble.go
index 7986890..54ef23a 100644
--- a/tar/asm/disassemble.go
+++ b/tar/asm/disassemble.go
@@ -92,13 +92,16 @@ func NewInputTarStream(r io.Reader, p storage.Packer, fp storage.FilePutter) (io
 				}
 			}
 
-			// File entries added, regardless of size
-			_, err = p.AddEntry(storage.Entry{
+			entry := storage.Entry{
 				Type:    storage.FileType,
-				Name:    hdr.Name,
 				Size:    hdr.Size,
 				Payload: csum,
-			})
+			}
+			// For proper marshalling of non-utf8 characters
+			entry.SetName(hdr.Name)
+
+			// File entries added, regardless of size
+			_, err = p.AddEntry(entry)
 			if err != nil {
 				pW.CloseWithError(err)
 				return
diff --git a/tar/asm/testdata/iso-8859.tar.gz b/tar/asm/testdata/iso-8859.tar.gz
new file mode 100644
index 0000000000000000000000000000000000000000..3e87f30a45f5dbf742a51c5c8252688452aeb2d0
GIT binary patch
literal 187
zcmb2|=HU3ek133aIkPxl*TTZoQm-Vjh~e!eN3KH#0uC3~*t+To%(<mzd){NUSQNLR
z>z?)O8{fJKT<I0%`k$MoaP#j<lb6?Yyxo?5|17ijT;=&P$0N^$y6$+WxoY%$ea7GB
zeJNvYXYG8uzrVlj3%Yvc|Aj4EcbmO@(pUQC%bT`!R~0SS-u>(T_prp5-2ZE8zMWtG
k&#}fJ?8SbI-rs7jz2?@jhmJ8IgNS~1mB~df7&I6d0D{m~djJ3c

literal 0
HcmV?d00001


From 8a361ef0d867413199594d9f564d0acd1053244b Mon Sep 17 00:00:00 2001
From: Vincent Batts <vbatts@hashbangbash.com>
Date: Thu, 24 Sep 2015 09:51:58 -0400
Subject: [PATCH 50/95] tar/storage: Sprintf is unnecessary

fmt.Sprintf() vs string() for this []byte conversion is too much and
does not provide any further safety.

https://gist.github.com/vbatts/ab17181086aed558dd3a
---
 tar/storage/entry.go | 8 ++------
 1 file changed, 2 insertions(+), 6 deletions(-)

diff --git a/tar/storage/entry.go b/tar/storage/entry.go
index a152ac2..b61758e 100644
--- a/tar/storage/entry.go
+++ b/tar/storage/entry.go
@@ -1,10 +1,6 @@
 package storage
 
-import (
-	"fmt"
-
-	"github.com/vbatts/tar-split/tar/common"
-)
+import "github.com/vbatts/tar-split/tar/common"
 
 // Entries is for sorting by Position
 type Entries []Entry
@@ -68,7 +64,7 @@ func (e *Entry) SetNameBytes(name []byte) {
 // GetName returns the string for the entry's name, regardless of the field stored in
 func (e *Entry) GetName() string {
 	if len(e.NameRaw) > 0 {
-		return fmt.Sprintf("%s", e.NameRaw)
+		return string(e.NameRaw)
 	}
 	return e.Name
 }

From 27876e49c230ff7b95baafe59483332dce8a4e1b Mon Sep 17 00:00:00 2001
From: Alexander Morozov <lk4d4@docker.com>
Date: Thu, 24 Sep 2015 12:24:31 -0700
Subject: [PATCH 51/95] Update travis to go1.4.3

Signed-off-by: Alexander Morozov <lk4d4@docker.com>
---
 .travis.yml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/.travis.yml b/.travis.yml
index 7b2d094..a053d3b 100644
--- a/.travis.yml
+++ b/.travis.yml
@@ -2,7 +2,7 @@ language: go
 go:
   - tip
   - 1.5.1
-  - 1.4.2
+  - 1.4.3
   - 1.3.3
   - 1.2.2
 

From 7e38cefd4bf1a3ee9fbd1f8ee72dafb55889a5b6 Mon Sep 17 00:00:00 2001
From: Vincent Batts <vbatts@hashbangbash.com>
Date: Fri, 25 Sep 2015 14:33:24 -0400
Subject: [PATCH 52/95] common: remove in favor of stdlib `unicode/utf8`

---
 tar/asm/assemble_test.go |  4 ++--
 tar/common/utf8.go       | 22 --------------------
 tar/common/utf8_test.go  | 43 ----------------------------------------
 tar/storage/entry.go     | 10 +++++-----
 tar/storage/packer.go    |  5 ++---
 5 files changed, 9 insertions(+), 75 deletions(-)
 delete mode 100644 tar/common/utf8.go
 delete mode 100644 tar/common/utf8_test.go

diff --git a/tar/asm/assemble_test.go b/tar/asm/assemble_test.go
index e7609c0..29b7a17 100644
--- a/tar/asm/assemble_test.go
+++ b/tar/asm/assemble_test.go
@@ -10,9 +10,9 @@ import (
 	"io/ioutil"
 	"os"
 	"testing"
+	"unicode/utf8"
 
 	"github.com/vbatts/tar-split/archive/tar"
-	"github.com/vbatts/tar-split/tar/common"
 	"github.com/vbatts/tar-split/tar/storage"
 )
 
@@ -37,7 +37,7 @@ func TestISO8859(t *testing.T) {
 			break
 		}
 		fmt.Println(hdr.Name)
-		if !common.IsValidUtf8String(hdr.Name) {
+		if !utf8.ValidString(hdr.Name) {
 			fmt.Println([]byte(hdr.Name))
 		}
 	}
diff --git a/tar/common/utf8.go b/tar/common/utf8.go
deleted file mode 100644
index 568e929..0000000
--- a/tar/common/utf8.go
+++ /dev/null
@@ -1,22 +0,0 @@
-package common
-
-// IsValidUtf8String checks for in valid UTF-8 characters
-func IsValidUtf8String(s string) bool {
-	return InvalidUtf8Index([]byte(s)) == -1
-}
-
-// IsValidUtf8Btyes checks for in valid UTF-8 characters
-func IsValidUtf8Btyes(b []byte) bool {
-	return InvalidUtf8Index(b) == -1
-}
-
-// InvalidUtf8Index returns the offset of the first invalid UTF-8 character.
-// Default is to return -1 for a wholly valid sequence.
-func InvalidUtf8Index(b []byte) int {
-	for i, r := range string(b) {
-		if int(r) == 0xfffd {
-			return i
-		}
-	}
-	return -1
-}
diff --git a/tar/common/utf8_test.go b/tar/common/utf8_test.go
deleted file mode 100644
index 3cf81df..0000000
--- a/tar/common/utf8_test.go
+++ /dev/null
@@ -1,43 +0,0 @@
-package common
-
-import "testing"
-
-func TestStringValidation(t *testing.T) {
-	cases := []struct {
-		value  string
-		result bool
-		offset int
-	}{
-		{"aä\uFFFD本☺", false, 3},
-		{"aä本☺", true, -1},
-	}
-
-	for _, c := range cases {
-		if i := InvalidUtf8Index([]byte(c.value)); i != c.offset {
-			t.Errorf("string %q - offset expected %d, got %d", c.value, c.offset, i)
-		}
-		if got := IsValidUtf8String(c.value); got != c.result {
-			t.Errorf("string %q - expected %v, got %v", c.value, c.result, got)
-		}
-	}
-}
-
-func TestBytesValidation(t *testing.T) {
-	cases := []struct {
-		value  []byte
-		result bool
-		offset int
-	}{
-		{[]byte{0xE4}, false, 0},
-		{[]byte("aä本☺"), true, -1},
-	}
-
-	for _, c := range cases {
-		if i := InvalidUtf8Index(c.value); i != c.offset {
-			t.Errorf("bytes %q - offset expected %d, got %d", c.value, c.offset, i)
-		}
-		if got := IsValidUtf8Btyes(c.value); got != c.result {
-			t.Errorf("bytes %q - expected %v, got %v", c.value, c.result, got)
-		}
-	}
-}
diff --git a/tar/storage/entry.go b/tar/storage/entry.go
index b61758e..c91e7ea 100644
--- a/tar/storage/entry.go
+++ b/tar/storage/entry.go
@@ -1,6 +1,6 @@
 package storage
 
-import "github.com/vbatts/tar-split/tar/common"
+import "unicode/utf8"
 
 // Entries is for sorting by Position
 type Entries []Entry
@@ -44,7 +44,7 @@ type Entry struct {
 // SetName will check name for valid UTF-8 string, and set the appropriate
 // field. See https://github.com/vbatts/tar-split/issues/17
 func (e *Entry) SetName(name string) {
-	if common.IsValidUtf8String(name) {
+	if utf8.ValidString(name) {
 		e.Name = name
 	} else {
 		e.NameRaw = []byte(name)
@@ -54,10 +54,10 @@ func (e *Entry) SetName(name string) {
 // SetNameBytes will check name for valid UTF-8 string, and set the appropriate
 // field
 func (e *Entry) SetNameBytes(name []byte) {
-	if !common.IsValidUtf8Btyes(name) {
-		e.NameRaw = name
-	} else {
+	if utf8.Valid(name) {
 		e.Name = string(name)
+	} else {
+		e.NameRaw = name
 	}
 }
 
diff --git a/tar/storage/packer.go b/tar/storage/packer.go
index 1ea8208..0c9d99b 100644
--- a/tar/storage/packer.go
+++ b/tar/storage/packer.go
@@ -6,8 +6,7 @@ import (
 	"errors"
 	"io"
 	"path/filepath"
-
-	"github.com/vbatts/tar-split/tar/common"
+	"unicode/utf8"
 )
 
 // ErrDuplicatePath occurs when a tar archive has more than one entry for the
@@ -97,7 +96,7 @@ type seenNames map[string]struct{}
 func (jp *jsonPacker) AddEntry(e Entry) (int, error) {
 	// if Name is not valid utf8, switch it to raw first.
 	if e.Name != "" {
-		if !common.IsValidUtf8String(e.Name) {
+		if !utf8.ValidString(e.Name) {
 			e.NameRaw = []byte(e.Name)
 			e.Name = ""
 		}

From 10250c25e0cb4b64f89280d0dde72feff25ef7ab Mon Sep 17 00:00:00 2001
From: Vincent Batts <vbatts@hashbangbash.com>
Date: Fri, 25 Sep 2015 14:35:12 -0400
Subject: [PATCH 53/95] tar/asm: remove useless test

The iso-8859-1 archive is already tested round trip, and this test did
not do anything really.
---
 tar/asm/assemble_test.go | 29 -----------------------------
 1 file changed, 29 deletions(-)

diff --git a/tar/asm/assemble_test.go b/tar/asm/assemble_test.go
index 29b7a17..3d0c99c 100644
--- a/tar/asm/assemble_test.go
+++ b/tar/asm/assemble_test.go
@@ -10,39 +10,10 @@ import (
 	"io/ioutil"
 	"os"
 	"testing"
-	"unicode/utf8"
 
-	"github.com/vbatts/tar-split/archive/tar"
 	"github.com/vbatts/tar-split/tar/storage"
 )
 
-func TestISO8859(t *testing.T) {
-	fh, err := os.Open("./testdata/iso-8859.tar.gz")
-	if err != nil {
-		t.Fatal(err)
-	}
-	defer fh.Close()
-	gzRdr, err := gzip.NewReader(fh)
-	if err != nil {
-		t.Fatal(err)
-	}
-	defer gzRdr.Close()
-	tr := tar.NewReader(gzRdr)
-	for {
-		hdr, err := tr.Next()
-		if err != nil {
-			if err != io.EOF {
-				t.Error(err)
-			}
-			break
-		}
-		fmt.Println(hdr.Name)
-		if !utf8.ValidString(hdr.Name) {
-			fmt.Println([]byte(hdr.Name))
-		}
-	}
-}
-
 var entries = []struct {
 	Entry storage.Entry
 	Body  []byte

From 7ea74e1c31d45d604073ed3a4a3d1ca8e7692a83 Mon Sep 17 00:00:00 2001
From: Vincent Batts <vbatts@hashbangbash.com>
Date: Fri, 16 Oct 2015 16:41:09 -0400
Subject: [PATCH 54/95] demo: basic command

Signed-off-by: Vincent Batts <vbatts@hashbangbash.com>
---
 README.md | 8 ++++++++
 1 file changed, 8 insertions(+)

diff --git a/README.md b/README.md
index b89afe5..260ff84 100644
--- a/README.md
+++ b/README.md
@@ -25,6 +25,14 @@ go get github.com/vbatts/tar-split/cmd/tar-split
 For cli usage, see its [README.md](cmd/tar-split/README.md).
 For the library see the [docs](#docs)
 
+## Demo
+
+### Basic disassembly and assembly
+
+![basic cmd demo thumbnail](https://i.ytimg.com/vi/vh5wyjIOBtc/2.jpg?time=1445027151805)
+[youtube video of basic command demo](https://youtu.be/vh5wyjIOBtc)
+
+
 ## Caveat
 
 Eventually this should detect TARs that this is not possible with.

From bece0c70095443be20deb1c7db2643ff25996044 Mon Sep 17 00:00:00 2001
From: Vincent Batts <vbatts@hashbangbash.com>
Date: Fri, 16 Oct 2015 17:05:18 -0400
Subject: [PATCH 55/95] demo: docker layer checksums

Signed-off-by: Vincent Batts <vbatts@hashbangbash.com>
---
 README.md | 9 +++++++++
 1 file changed, 9 insertions(+)

diff --git a/README.md b/README.md
index 260ff84..90a8edf 100644
--- a/README.md
+++ b/README.md
@@ -29,9 +29,18 @@ For the library see the [docs](#docs)
 
 ### Basic disassembly and assembly
 
+This demonstrates the `tar-split` command and how to assemble a tar archive from the `tar-data.json.gz`
+
+
 ![basic cmd demo thumbnail](https://i.ytimg.com/vi/vh5wyjIOBtc/2.jpg?time=1445027151805)
 [youtube video of basic command demo](https://youtu.be/vh5wyjIOBtc)
 
+### Docker layer preservation
+
+This demonstrates the tar-split integration for docker-1.8. Providing consistent tar archives for the image layer content.
+
+![docker tar-split demo](https://www.youtube.com/upload_thumbnail?v=tV_Dia8E8xw&t=2&ts=1445028436275)
+[youtube vide of docker layer checksums](https://youtu.be/tV_Dia8E8xw)
 
 ## Caveat
 

From 8b20f9161d2cd89438fd90a228464d545647a237 Mon Sep 17 00:00:00 2001
From: Tonis Tiigi <tonistiigi@gmail.com>
Date: Mon, 30 Nov 2015 09:52:44 -0800
Subject: [PATCH 56/95] Optimize JSON decoding

This allows to avoid extra allocations on `ReadBytes` and
decoding buffers.

Signed-off-by: Tonis Tiigi <tonistiigi@gmail.com>
---
 tar/storage/packer.go | 28 +++++-----------------------
 1 file changed, 5 insertions(+), 23 deletions(-)

diff --git a/tar/storage/packer.go b/tar/storage/packer.go
index 0c9d99b..aba6948 100644
--- a/tar/storage/packer.go
+++ b/tar/storage/packer.go
@@ -1,7 +1,6 @@
 package storage
 
 import (
-	"bufio"
 	"encoding/json"
 	"errors"
 	"io"
@@ -33,31 +32,15 @@ type PackUnpacker interface {
 */
 
 type jsonUnpacker struct {
-	r     io.Reader
-	b     *bufio.Reader
-	isEOF bool
-	seen  seenNames
+	seen seenNames
+	dec  *json.Decoder
 }
 
 func (jup *jsonUnpacker) Next() (*Entry, error) {
 	var e Entry
-	if jup.isEOF {
-		// since ReadBytes() will return read bytes AND an EOF, we handle it this
-		// round-a-bout way so we can Unmarshal the tail with relevant errors, but
-		// still get an io.EOF when the stream is ended.
-		return nil, io.EOF
-	}
-	line, err := jup.b.ReadBytes('\n')
-	if err != nil && err != io.EOF {
+	err := jup.dec.Decode(&e)
+	if err != nil {
 		return nil, err
-	} else if err == io.EOF {
-		jup.isEOF = true
-	}
-
-	err = json.Unmarshal(line, &e)
-	if err != nil && jup.isEOF {
-		// if the remainder actually _wasn't_ a remaining json structure, then just EOF
-		return nil, io.EOF
 	}
 
 	// check for dup name
@@ -78,8 +61,7 @@ func (jup *jsonUnpacker) Next() (*Entry, error) {
 // Each Entry read are expected to be delimited by new line.
 func NewJSONUnpacker(r io.Reader) Unpacker {
 	return &jsonUnpacker{
-		r:    r,
-		b:    bufio.NewReader(r),
+		dec:  json.NewDecoder(r),
 		seen: seenNames{},
 	}
 }

From d80c6b3bb1ab559917e144804ad682bf5cdb82d9 Mon Sep 17 00:00:00 2001
From: Vincent Batts <vbatts@hashbangbash.com>
Date: Tue, 1 Dec 2015 15:26:30 -0500
Subject: [PATCH 57/95] travis: drop go1.2

seems overly reasonable to support go1.3 and greater. :-)

Signed-off-by: Vincent Batts <vbatts@hashbangbash.com>
---
 .travis.yml | 1 -
 1 file changed, 1 deletion(-)

diff --git a/.travis.yml b/.travis.yml
index a053d3b..c0a17c7 100644
--- a/.travis.yml
+++ b/.travis.yml
@@ -4,7 +4,6 @@ go:
   - 1.5.1
   - 1.4.3
   - 1.3.3
-  - 1.2.2
 
 # let us have pretty, fast Docker-based Travis workers!
 sudo: false

From 11281e8c0930c0ed1d8829bfa005ac96d38386aa Mon Sep 17 00:00:00 2001
From: Vincent Batts <vbatts@hashbangbash.com>
Date: Tue, 1 Dec 2015 15:06:57 -0500
Subject: [PATCH 58/95] tar/storage: adding Getter Putter benchmark

Signed-off-by: Vincent Batts <vbatts@hashbangbash.com>
---
 tar/storage/packer_test.go | 57 +++++++++++++++++++++++++++++++++++++-
 1 file changed, 56 insertions(+), 1 deletion(-)

diff --git a/tar/storage/packer_test.go b/tar/storage/packer_test.go
index 1c6101f..7d93371 100644
--- a/tar/storage/packer_test.go
+++ b/tar/storage/packer_test.go
@@ -4,6 +4,8 @@ import (
 	"bytes"
 	"compress/gzip"
 	"io"
+	"io/ioutil"
+	"os"
 	"testing"
 )
 
@@ -159,5 +161,58 @@ func TestGzip(t *testing.T) {
 	if len(entries) != len(e) {
 		t.Errorf("expected %d entries, got %d", len(e), len(entries))
 	}
-
+}
+
+func BenchmarkGetPut(b *testing.B) {
+	e := []Entry{
+		Entry{
+			Type:    SegmentType,
+			Payload: []byte("how"),
+		},
+		Entry{
+			Type:    SegmentType,
+			Payload: []byte("y'all"),
+		},
+		Entry{
+			Type:    FileType,
+			Name:    "./hurr.txt",
+			Payload: []byte("deadbeef"),
+		},
+		Entry{
+			Type:    SegmentType,
+			Payload: []byte("doin"),
+		},
+	}
+	b.RunParallel(func(pb *testing.PB) {
+		for pb.Next() {
+			func() {
+				fh, err := ioutil.TempFile("", "tar-split.")
+				if err != nil {
+					b.Fatal(err)
+				}
+				defer os.Remove(fh.Name())
+				defer fh.Close()
+
+				jp := NewJSONPacker(fh)
+				for i := range e {
+					if _, err := jp.AddEntry(e[i]); err != nil {
+						b.Fatal(err)
+					}
+				}
+				fh.Sync()
+
+				up := NewJSONUnpacker(fh)
+				for {
+					_, err := up.Next()
+					if err != nil {
+						if err == io.EOF {
+							break
+						}
+						b.Fatal(err)
+					}
+				}
+
+			}()
+		}
+	})
 }

From 23b6435e6bb902fe67a20272fead5d73269373ab Mon Sep 17 00:00:00 2001
From: Tonis Tiigi <tonistiigi@gmail.com>
Date: Mon, 30 Nov 2015 09:57:07 -0800
Subject: [PATCH 59/95] Optimize tar stream generation

- New writeTo method allows to avoid creating extra pipe.
- Copy with a pooled buffer instead of allocating new buffer for each file.
- Avoid extra object allocations inside the loop.

Signed-off-by: Tonis Tiigi <tonistiigi@gmail.com>
---
 tar/asm/assemble.go | 139 ++++++++++++++++++++++++++++++++------------
 1 file changed, 101 insertions(+), 38 deletions(-)

diff --git a/tar/asm/assemble.go b/tar/asm/assemble.go
index 83d6426..d624450 100644
--- a/tar/asm/assemble.go
+++ b/tar/asm/assemble.go
@@ -3,8 +3,10 @@ package asm
 import (
 	"bytes"
 	"fmt"
+	"hash"
 	"hash/crc64"
 	"io"
+	"sync"
 
 	"github.com/vbatts/tar-split/tar/storage"
 )
@@ -23,45 +25,106 @@ func NewOutputTarStream(fg storage.FileGetter, up storage.Unpacker) io.ReadClose
 	}
 	pr, pw := io.Pipe()
 	go func() {
-		for {
-			entry, err := up.Next()
-			if err != nil {
-				pw.CloseWithError(err)
-				return
-			}
-			switch entry.Type {
-			case storage.SegmentType:
-				if _, err := pw.Write(entry.Payload); err != nil {
-					pw.CloseWithError(err)
-					return
-				}
-			case storage.FileType:
-				if entry.Size == 0 {
-					continue
-				}
-				fh, err := fg.Get(entry.GetName())
-				if err != nil {
-					pw.CloseWithError(err)
-					return
-				}
-				c := crc64.New(storage.CRCTable)
-				tRdr := io.TeeReader(fh, c)
-				if _, err := io.Copy(pw, tRdr); err != nil {
-					fh.Close()
-					pw.CloseWithError(err)
-					return
-				}
-				if !bytes.Equal(c.Sum(nil), entry.Payload) {
-					// I would rather this be a comparable ErrInvalidChecksum or such,
-					// but since it's coming through the PipeReader, the context of
-					// _which_ file would be lost...
-					fh.Close()
-					pw.CloseWithError(fmt.Errorf("file integrity checksum failed for %q", entry.GetName()))
-					return
-				}
-				fh.Close()
-			}
+		err := WriteOutputTarStream(fg, up, pw)
+		if err != nil {
+			pw.CloseWithError(err)
+		} else {
+			pw.Close()
 		}
 	}()
 	return pr
 }
+
+// WriteOutputTarStream writes assembled tar archive to a writer.
+func WriteOutputTarStream(fg storage.FileGetter, up storage.Unpacker, w io.Writer) error {
+	// ... Since these are interfaces, this is possible, so let's not have a nil pointer
+	if fg == nil || up == nil {
+		return nil
+	}
+	var copyBuffer []byte
+	var crcHash hash.Hash
+	var crcSum []byte
+	var multiWriter io.Writer
+	for {
+		entry, err := up.Next()
+		if err != nil {
+			if err == io.EOF {
+				return nil
+			}
+			return err
+		}
+		switch entry.Type {
+		case storage.SegmentType:
+			if _, err := w.Write(entry.Payload); err != nil {
+				return err
+			}
+		case storage.FileType:
+			if entry.Size == 0 {
+				continue
+			}
+			fh, err := fg.Get(entry.GetName())
+			if err != nil {
+				return err
+			}
+			if crcHash == nil {
+				crcHash = crc64.New(storage.CRCTable)
+				crcSum = make([]byte, 8)
+				multiWriter = io.MultiWriter(w, crcHash)
+				copyBuffer = byteBufferPool.Get().([]byte)
+				defer byteBufferPool.Put(copyBuffer)
+			} else {
+				crcHash.Reset()
+			}
+
+			if _, err := copyWithBuffer(multiWriter, fh, copyBuffer); err != nil {
+				fh.Close()
+				return err
+			}
+
+			if !bytes.Equal(crcHash.Sum(crcSum[:0]), entry.Payload) {
+				// I would rather this be a comparable ErrInvalidChecksum or such,
+				// but since it's coming through the PipeReader, the context of
+				// _which_ file would be lost...
+				fh.Close()
+				return fmt.Errorf("file integrity checksum failed for %q", entry.GetName())
+			}
+			fh.Close()
+		}
+	}
+}
+
+var byteBufferPool = &sync.Pool{
+	New: func() interface{} {
+		return make([]byte, 32*1024)
+	},
+}
+
+// copyWithBuffer is taken from stdlib io.Copy implementation
+// https://github.com/golang/go/blob/go1.5.1/src/io/io.go#L367
+func copyWithBuffer(dst io.Writer, src io.Reader, buf []byte) (written int64, err error) {
+	for {
+		nr, er := src.Read(buf)
+		if nr > 0 {
+			nw, ew := dst.Write(buf[0:nr])
+			if nw > 0 {
+				written += int64(nw)
+			}
+			if ew != nil {
+				err = ew
+				break
+			}
+			if nr != nw {
+				err = io.ErrShortWrite
+				break
+			}
+		}
+		if er == io.EOF {
+			break
+		}
+		if er != nil {
+			err = er
+			break
+		}
+	}
+	return written, err
+}

From 2efe34695acfa872b8c5ba17ab958de0ef9cfdb3 Mon Sep 17 00:00:00 2001
From: Vincent Batts <vbatts@hashbangbash.com>
Date: Wed, 2 Dec 2015 12:56:52 -0500
Subject: [PATCH 60/95] tar/asm: remove unneeded Tee

Signed-off-by: Vincent Batts <vbatts@hashbangbash.com>
---
 tar/asm/assemble_test.go | 6 +-----
 1 file changed, 1 insertion(+), 5 deletions(-)

diff --git a/tar/asm/assemble_test.go b/tar/asm/assemble_test.go
index 3d0c99c..cb16eed 100644
--- a/tar/asm/assemble_test.go
+++ b/tar/asm/assemble_test.go
@@ -7,7 +7,6 @@ import (
 	"fmt"
 	"hash/crc64"
 	"io"
-	"io/ioutil"
 	"os"
 	"testing"
 
@@ -167,10 +166,7 @@ func TestTarStream(t *testing.T) {
 
 		// get a sum of the stream after it has passed through to ensure it's the same.
 		h0 := sha1.New()
-		tRdr0 := io.TeeReader(tarStream, h0)
-
-		// read it all to the bit bucket
-		i, err := io.Copy(ioutil.Discard, tRdr0)
+		i, err := io.Copy(h0, tarStream)
 		if err != nil {
 			t.Fatal(err)
 		}

From 19b7e22058e0b57f031f3021bbdf0aa1881e099b Mon Sep 17 00:00:00 2001
From: Vincent Batts <vbatts@hashbangbash.com>
Date: Wed, 2 Dec 2015 14:36:02 -0500
Subject: [PATCH 61/95] tar/asm: basic benchmark on disasm/asm of testdata

```
PASS
BenchmarkAsm-4         5         238968475 ns/op        66841059 B/op       2449 allocs/op
ok      _/home/vbatts/src/vb/tar-split/tar/asm  2.267s
```

Signed-off-by: Vincent Batts <vbatts@hashbangbash.com>
---
 tar/asm/assemble_test.go | 71 ++++++++++++++++++++++++++++++++++------
 1 file changed, 61 insertions(+), 10 deletions(-)

diff --git a/tar/asm/assemble_test.go b/tar/asm/assemble_test.go
index cb16eed..c0c7f17 100644
--- a/tar/asm/assemble_test.go
+++ b/tar/asm/assemble_test.go
@@ -7,6 +7,7 @@ import (
 	"fmt"
 	"hash/crc64"
 	"io"
+	"io/ioutil"
 	"os"
 	"testing"
 
@@ -129,17 +130,18 @@ func TestTarStreamMangledGetterPutter(t *testing.T) {
 	}
 }
 
+var testCases = []struct {
+	path            string
+	expectedSHA1Sum string
+	expectedSize    int64
+}{
+	{"./testdata/t.tar.gz", "1eb237ff69bca6e22789ecb05b45d35ca307adbd", 10240},
+	{"./testdata/longlink.tar.gz", "d9f6babe107b7247953dff6b5b5ae31a3a880add", 20480},
+	{"./testdata/fatlonglink.tar.gz", "8537f03f89aeef537382f8b0bb065d93e03b0be8", 26234880},
+	{"./testdata/iso-8859.tar.gz", "ddafa51cb03c74ec117ab366ee2240d13bba1ec3", 10240},
+}
+
 func TestTarStream(t *testing.T) {
-	testCases := []struct {
-		path            string
-		expectedSHA1Sum string
-		expectedSize    int64
-	}{
-		{"./testdata/t.tar.gz", "1eb237ff69bca6e22789ecb05b45d35ca307adbd", 10240},
-		{"./testdata/longlink.tar.gz", "d9f6babe107b7247953dff6b5b5ae31a3a880add", 20480},
-		{"./testdata/fatlonglink.tar.gz", "8537f03f89aeef537382f8b0bb065d93e03b0be8", 26234880},
-		{"./testdata/iso-8859.tar.gz", "ddafa51cb03c74ec117ab366ee2240d13bba1ec3", 10240},
-	}
 
 	for _, tc := range testCases {
 		fh, err := os.Open(tc.path)
@@ -201,3 +203,52 @@ func TestTarStream(t *testing.T) {
 		}
 	}
 }
+
+func BenchmarkAsm(b *testing.B) {
+	for i := 0; i < b.N; i++ {
+		for _, tc := range testCases {
+			func() {
+				fh, err := os.Open(tc.path)
+				if err != nil {
+					b.Fatal(err)
+				}
+				defer fh.Close()
+				gzRdr, err := gzip.NewReader(fh)
+				if err != nil {
+					b.Fatal(err)
+				}
+				defer gzRdr.Close()
+
+				// Setup where we'll store the metadata
+				w := bytes.NewBuffer([]byte{})
+				sp := storage.NewJSONPacker(w)
+				fgp := storage.NewBufferFileGetPutter()
+
+				// wrap the disassembly stream
+				tarStream, err := NewInputTarStream(gzRdr, sp, fgp)
+				if err != nil {
+					b.Fatal(err)
+				}
+				// read it all to the bit bucket
+				i1, err := io.Copy(ioutil.Discard, tarStream)
+				if err != nil {
+					b.Fatal(err)
+				}
+
+				r := bytes.NewBuffer(w.Bytes())
+				sup := storage.NewJSONUnpacker(r)
+				// and reuse the fgp that we Put the payloads to.
+
+				rc := NewOutputTarStream(fgp, sup)
+
+				i2, err := io.Copy(ioutil.Discard, rc)
+				if err != nil {
+					b.Fatal(err)
+				}
+				if i1 != i2 {
+					b.Errorf("%s: input(%d) and ouput(%d) byte count didn't match", tc.path, i1, i2)
+				}
+			}()
+		}
+	}
+}

From d50e5c9283da469398d84078519de569f617be6f Mon Sep 17 00:00:00 2001
From: Vincent Batts <vbatts@hashbangbash.com>
Date: Thu, 3 Dec 2015 15:45:57 -0500
Subject: [PATCH 62/95] LICENSE: update LICENSE to BSD 3-clause

Signed-off-by: Vincent Batts <vbatts@hashbangbash.com>
---
 LICENSE | 39 ++++++++++++++++++++++++---------------
 1 file changed, 24 insertions(+), 15 deletions(-)

diff --git a/LICENSE b/LICENSE
index 8ba5491..ca03685 100644
--- a/LICENSE
+++ b/LICENSE
@@ -1,19 +1,28 @@
 Copyright (c) 2015 Vincent Batts, Raleigh, NC, USA
 
-Permission is hereby granted, free of charge, to any person obtaining a copy
-of this software and associated documentation files (the "Software"), to deal
-in the Software without restriction, including without limitation the rights
-to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
-copies of the Software, and to permit persons to whom the Software is
-furnished to do so, subject to the following conditions:
+All rights reserved.
 
-The above copyright notice and this permission notice shall be included in
-all copies or substantial portions of the Software.
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions are met:
 
-THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
-IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
-FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
-AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
-LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
-OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
-THE SOFTWARE.
+1. Redistributions of source code must retain the above copyright notice, this
+list of conditions and the following disclaimer.
+
+2. Redistributions in binary form must reproduce the above copyright notice,
+this list of conditions and the following disclaimer in the documentation
+and/or other materials provided with the distribution.
+
+3. Neither the name of the copyright holder nor the names of its contributors
+may be used to endorse or promote products derived from this software without
+specific prior written permission.
+
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
+ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
+FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
+SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
+CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

From b87f81631a2b1cb185737b5bea76a7e9e8c29723 Mon Sep 17 00:00:00 2001
From: Vincent Batts <vbatts@hashbangbash.com>
Date: Sun, 31 Jan 2016 01:39:10 -0500
Subject: [PATCH 63/95] version: mark 0.9.12

---
 version/version.go | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/version/version.go b/version/version.go
index 0b86fbf..c41a8f2 100644
--- a/version/version.go
+++ b/version/version.go
@@ -1,7 +1,7 @@
 package version
 // AUTO-GENEREATED. DO NOT EDIT
-// 2015-08-14 09:56:50.742727493 -0400 EDT
+// 2016-01-31 01:39:06.012784413 -0500 EST
 
 // VERSION is the generated version from /home/vbatts/src/vb/tar-split/version
-var VERSION = "v0.9.6-1-gc76e420"
+var VERSION = "v0.9.12"
  
\ No newline at end of file

From 440ba9e519d0481f35a916c60be51d3f58f1a6a1 Mon Sep 17 00:00:00 2001
From: Joe Tsai <joetsai@digital-static.net>
Date: Thu, 17 Sep 2015 16:07:38 -0700
Subject: [PATCH 64/95] archive/tar: remove dead code with USTAR path splitting

Convert splitUSTARPath to return a bool rather than an error since
the caller never ever uses the error other than to check if it is
nil. Thus, we can remove errNameTooLong as well.

Also, fold the checking of the length <= fileNameSize and whether
the string is ASCII into the split function itself.

Lastly, remove logic to set the MAGIC since that's already done on
L200. Thus, setting the magic is redundant.

There is no overall logic change.

Updates #12638

Change-Id: I26b6992578199abad723c2a2af7f4fc078af9c17
Reviewed-on: https://go-review.googlesource.com/14723
Reviewed-by: David Symonds <dsymonds@golang.org>
Run-TryBot: David Symonds <dsymonds@golang.org>
---
 archive/tar/writer.go      | 52 +++++++++++++-------------------------
 archive/tar/writer_test.go | 34 +++++++++++++++++++++++++
 2 files changed, 52 insertions(+), 34 deletions(-)

diff --git a/archive/tar/writer.go b/archive/tar/writer.go
index 9dbc01a..3547c17 100644
--- a/archive/tar/writer.go
+++ b/archive/tar/writer.go
@@ -23,7 +23,6 @@ var (
 	ErrWriteTooLong    = errors.New("archive/tar: write too long")
 	ErrFieldTooLong    = errors.New("archive/tar: header field too long")
 	ErrWriteAfterClose = errors.New("archive/tar: write after close")
-	errNameTooLong     = errors.New("archive/tar: name too long")
 	errInvalidHeader   = errors.New("archive/tar: header field too long or contains invalid values")
 )
 
@@ -215,26 +214,14 @@ func (tw *Writer) writeHeader(hdr *Header, allowPax bool) error {
 	_, paxPathUsed := paxHeaders[paxPath]
 	// try to use a ustar header when only the name is too long
 	if !tw.preferPax && len(paxHeaders) == 1 && paxPathUsed {
-		suffix := hdr.Name
-		prefix := ""
-		if len(hdr.Name) > fileNameSize && isASCII(hdr.Name) {
-			var err error
-			prefix, suffix, err = tw.splitUSTARLongName(hdr.Name)
-			if err == nil {
-				// ok we can use a ustar long name instead of pax, now correct the fields
+		prefix, suffix, ok := splitUSTARPath(hdr.Name)
+		if ok {
+			// Since we can encode in USTAR format, disable PAX header.
+			delete(paxHeaders, paxPath)
 
-				// remove the path field from the pax header. this will suppress the pax header
-				delete(paxHeaders, paxPath)
-
-				// update the path fields
-				tw.cString(pathHeaderBytes, suffix, false, paxNone, nil)
-				tw.cString(prefixHeaderBytes, prefix, false, paxNone, nil)
-
-				// Use the ustar magic if we used ustar long names.
-				if len(prefix) > 0 && !tw.usedBinary {
-					copy(header[257:265], []byte("ustar\x00"))
-				}
-			}
+			// Update the path fields
+			tw.cString(pathHeaderBytes, suffix, false, paxNone, nil)
+			tw.cString(prefixHeaderBytes, prefix, false, paxNone, nil)
 		}
 	}
 
@@ -270,28 +257,25 @@ func (tw *Writer) writeHeader(hdr *Header, allowPax bool) error {
 	return tw.err
 }
 
-// writeUSTARLongName splits a USTAR long name hdr.Name.
-// name must be < 256 characters. errNameTooLong is returned
-// if hdr.Name can't be split. The splitting heuristic
-// is compatible with gnu tar.
-func (tw *Writer) splitUSTARLongName(name string) (prefix, suffix string, err error) {
+// splitUSTARPath splits a path according to USTAR prefix and suffix rules.
+// If the path is not splittable, then it will return ("", "", false).
+func splitUSTARPath(name string) (prefix, suffix string, ok bool) {
 	length := len(name)
-	if length > fileNamePrefixSize+1 {
+	if length <= fileNameSize || !isASCII(name) {
+		return "", "", false
+	} else if length > fileNamePrefixSize+1 {
 		length = fileNamePrefixSize + 1
 	} else if name[length-1] == '/' {
 		length--
 	}
+
 	i := strings.LastIndex(name[:length], "/")
-	// nlen contains the resulting length in the name field.
-	// plen contains the resulting length in the prefix field.
-	nlen := len(name) - i - 1
-	plen := i
+	nlen := len(name) - i - 1 // nlen is length of suffix
+	plen := i                 // plen is length of prefix
 	if i <= 0 || nlen > fileNameSize || nlen == 0 || plen > fileNamePrefixSize {
-		err = errNameTooLong
-		return
+		return "", "", false
 	}
-	prefix, suffix = name[:i], name[i+1:]
-	return
+	return name[:i], name[i+1:], true
 }
 
 // writePaxHeader writes an extended pax header to the
diff --git a/archive/tar/writer_test.go b/archive/tar/writer_test.go
index fe46a67..caf40a8 100644
--- a/archive/tar/writer_test.go
+++ b/archive/tar/writer_test.go
@@ -544,3 +544,37 @@ func TestWriteAfterClose(t *testing.T) {
 		t.Fatalf("Write: got %v; want ErrWriteAfterClose", err)
 	}
 }
+
+func TestSplitUSTARPath(t *testing.T) {
+	var sr = strings.Repeat
+
+	var vectors = []struct {
+		input  string // Input path
+		prefix string // Expected output prefix
+		suffix string // Expected output suffix
+		ok     bool   // Split success?
+	}{
+		{"", "", "", false},
+		{"abc", "", "", false},
+		{"用戶名", "", "", false},
+		{sr("a", fileNameSize), "", "", false},
+		{sr("a", fileNameSize) + "/", "", "", false},
+		{sr("a", fileNameSize) + "/a", sr("a", fileNameSize), "a", true},
+		{sr("a", fileNamePrefixSize) + "/", "", "", false},
+		{sr("a", fileNamePrefixSize) + "/a", sr("a", fileNamePrefixSize), "a", true},
+		{sr("a", fileNameSize+1), "", "", false},
+		{sr("/", fileNameSize+1), sr("/", fileNameSize-1), "/", true},
+		{sr("a", fileNamePrefixSize) + "/" + sr("b", fileNameSize),
+			sr("a", fileNamePrefixSize), sr("b", fileNameSize), true},
+		{sr("a", fileNamePrefixSize) + "//" + sr("b", fileNameSize), "", "", false},
+		{sr("a/", fileNameSize), sr("a/", 77) + "a", sr("a/", 22), true},
+	}
+
+	for _, v := range vectors {
+		prefix, suffix, ok := splitUSTARPath(v.input)
+		if prefix != v.prefix || suffix != v.suffix || ok != v.ok {
+			t.Errorf("splitUSTARPath(%q):\ngot  (%q, %q, %v)\nwant (%q, %q, %v)",
+				v.input, prefix, suffix, ok, v.prefix, v.suffix, v.ok)
+		}
+	}
+}

From af15385a0daa2a76ac99546a89e1dc38ec289b8f Mon Sep 17 00:00:00 2001
From: Joe Tsai <joetsai@digital-static.net>
Date: Mon, 28 Sep 2015 16:38:16 -0700
Subject: [PATCH 65/95] archive/tar: fix bugs with sparseFileReader

The sparseFileReader is prone to two different forms of
denial-of-service attacks:
* A malicious tar file can cause an infinite loop
* A malicious tar file can cause arbitrary panics

This results because of poor error checking/handling, which this
CL fixes. While we are at it, add a plethora of unit tests to
test for possible malicious inputs.

Change-Id: I2f9446539d189f3c1738a1608b0ad4859c1be929
Reviewed-on: https://go-review.googlesource.com/15115
Reviewed-by: Andrew Gerrand <adg@golang.org>
Run-TryBot: Andrew Gerrand <adg@golang.org>
TryBot-Result: Gobot Gobot <gobot@golang.org>
---
 archive/tar/reader.go      | 149 +++++++++++++++--------
 archive/tar/reader_test.go | 236 ++++++++++++++++++++++++-------------
 2 files changed, 258 insertions(+), 127 deletions(-)

diff --git a/archive/tar/reader.go b/archive/tar/reader.go
index 4168ea2..1f57508 100644
--- a/archive/tar/reader.go
+++ b/archive/tar/reader.go
@@ -12,6 +12,7 @@ import (
 	"errors"
 	"io"
 	"io/ioutil"
+	"math"
 	"os"
 	"strconv"
 	"strings"
@@ -70,12 +71,36 @@ type regFileReader struct {
 	nb int64     // number of unread bytes for current file entry
 }
 
-// A sparseFileReader is a numBytesReader for reading sparse file data from a tar archive.
+// A sparseFileReader is a numBytesReader for reading sparse file data from a
+// tar archive.
 type sparseFileReader struct {
-	rfr *regFileReader // reads the sparse-encoded file data
-	sp  []sparseEntry  // the sparse map for the file
-	pos int64          // keeps track of file position
-	tot int64          // total size of the file
+	rfr   numBytesReader // Reads the sparse-encoded file data
+	sp    []sparseEntry  // The sparse map for the file
+	pos   int64          // Keeps track of file position
+	total int64          // Total size of the file
+}
+
+// A sparseEntry holds a single entry in a sparse file's sparse map.
+//
+// Sparse files are represented using a series of sparseEntrys.
+// Despite the name, a sparseEntry represents an actual data fragment that
+// references data found in the underlying archive stream. All regions not
+// covered by a sparseEntry are logically filled with zeros.
+//
+// For example, if the underlying raw file contains the 10-byte data:
+//	var compactData = "abcdefgh"
+//
+// And the sparse map has the following entries:
+//	var sp = []sparseEntry{
+//		{offset: 2,  numBytes: 5} // Data fragment for [2..7]
+//		{offset: 18, numBytes: 3} // Data fragment for [18..21]
+//	}
+//
+// Then the content of the resulting sparse file with a "real" size of 25 is:
+//	var sparseData = "\x00"*2 + "abcde" + "\x00"*11 + "fgh" + "\x00"*4
+type sparseEntry struct {
+	offset   int64 // Starting position of the fragment
+	numBytes int64 // Length of the fragment
 }
 
 // Keywords for GNU sparse files in a PAX extended header
@@ -156,7 +181,10 @@ func (tr *Reader) Next() (*Header, error) {
 		if sp != nil {
 			// Current file is a PAX format GNU sparse file.
 			// Set the current file reader to a sparse file reader.
-			tr.curr = &sparseFileReader{rfr: tr.curr.(*regFileReader), sp: sp, tot: hdr.Size}
+			tr.curr, tr.err = newSparseFileReader(tr.curr, sp, hdr.Size)
+			if tr.err != nil {
+				return nil, tr.err
+			}
 		}
 		return hdr, nil
 	case TypeGNULongName:
@@ -631,21 +659,17 @@ func (tr *Reader) readHeader() *Header {
 		if tr.err != nil {
 			return nil
 		}
+
 		// Current file is a GNU sparse file. Update the current file reader.
-		tr.curr = &sparseFileReader{rfr: tr.curr.(*regFileReader), sp: sp, tot: hdr.Size}
+		tr.curr, tr.err = newSparseFileReader(tr.curr, sp, hdr.Size)
+		if tr.err != nil {
+			return nil
+		}
 	}
 
 	return hdr
 }
 
-// A sparseEntry holds a single entry in a sparse file's sparse map.
-// A sparse entry indicates the offset and size in a sparse file of a
-// block of data.
-type sparseEntry struct {
-	offset   int64
-	numBytes int64
-}
-
 // readOldGNUSparseMap reads the sparse map as stored in the old GNU sparse format.
 // The sparse map is stored in the tar header if it's small enough. If it's larger than four entries,
 // then one or more extension headers are used to store the rest of the sparse map.
@@ -879,9 +903,33 @@ func (rfr *regFileReader) numBytes() int64 {
 	return rfr.nb
 }
 
-// readHole reads a sparse file hole ending at offset toOffset
-func (sfr *sparseFileReader) readHole(b []byte, toOffset int64) int {
-	n64 := toOffset - sfr.pos
+// newSparseFileReader creates a new sparseFileReader, but validates all of the
+// sparse entries before doing so.
+func newSparseFileReader(rfr numBytesReader, sp []sparseEntry, total int64) (*sparseFileReader, error) {
+	if total < 0 {
+		return nil, ErrHeader // Total size cannot be negative
+	}
+
+	// Validate all sparse entries. These are the same checks as performed by
+	// the BSD tar utility.
+	for i, s := range sp {
+		switch {
+		case s.offset < 0 || s.numBytes < 0:
+			return nil, ErrHeader // Negative values are never okay
+		case s.offset > math.MaxInt64-s.numBytes:
+			return nil, ErrHeader // Integer overflow with large length
+		case s.offset+s.numBytes > total:
+			return nil, ErrHeader // Region extends beyond the "real" size
+		case i > 0 && sp[i-1].offset+sp[i-1].numBytes > s.offset:
+			return nil, ErrHeader // Regions can't overlap and must be in order
+		}
+	}
+	return &sparseFileReader{rfr: rfr, sp: sp, total: total}, nil
+}
+
+// readHole reads a sparse hole ending at endOffset.
+func (sfr *sparseFileReader) readHole(b []byte, endOffset int64) int {
+	n64 := endOffset - sfr.pos
 	if n64 > int64(len(b)) {
 		n64 = int64(len(b))
 	}
@@ -895,49 +943,54 @@ func (sfr *sparseFileReader) readHole(b []byte, toOffset int64) int {
 
 // Read reads the sparse file data in expanded form.
 func (sfr *sparseFileReader) Read(b []byte) (n int, err error) {
-	if len(sfr.sp) == 0 {
-		// No more data fragments to read from.
-		if sfr.pos < sfr.tot {
-			// We're in the last hole
-			n = sfr.readHole(b, sfr.tot)
-			return
-		}
-		// Otherwise, we're at the end of the file
-		return 0, io.EOF
-	}
-	if sfr.tot < sfr.sp[0].offset {
-		return 0, io.ErrUnexpectedEOF
-	}
-	if sfr.pos < sfr.sp[0].offset {
-		// We're in a hole
-		n = sfr.readHole(b, sfr.sp[0].offset)
-		return
+	// Skip past all empty fragments.
+	for len(sfr.sp) > 0 && sfr.sp[0].numBytes == 0 {
+		sfr.sp = sfr.sp[1:]
 	}
 
-	// We're not in a hole, so we'll read from the next data fragment
-	posInFragment := sfr.pos - sfr.sp[0].offset
-	bytesLeft := sfr.sp[0].numBytes - posInFragment
+	// If there are no more fragments, then it is possible that there
+	// is one last sparse hole.
+	if len(sfr.sp) == 0 {
+		// This behavior matches the BSD tar utility.
+		// However, GNU tar stops returning data even if sfr.total is unmet.
+		if sfr.pos < sfr.total {
+			return sfr.readHole(b, sfr.total), nil
+		}
+		return 0, io.EOF
+	}
+
+	// In front of a data fragment, so read a hole.
+	if sfr.pos < sfr.sp[0].offset {
+		return sfr.readHole(b, sfr.sp[0].offset), nil
+	}
+
+	// In a data fragment, so read from it.
+	// This math is overflow free since we verify that offset and numBytes can
+	// be safely added when creating the sparseFileReader.
+	endPos := sfr.sp[0].offset + sfr.sp[0].numBytes // End offset of fragment
+	bytesLeft := endPos - sfr.pos                   // Bytes left in fragment
 	if int64(len(b)) > bytesLeft {
-		b = b[0:bytesLeft]
+		b = b[:bytesLeft]
 	}
 
 	n, err = sfr.rfr.Read(b)
 	sfr.pos += int64(n)
-
-	if int64(n) == bytesLeft {
-		// We're done with this fragment
-		sfr.sp = sfr.sp[1:]
+	if err == io.EOF {
+		if sfr.pos < endPos {
+			err = io.ErrUnexpectedEOF // There was supposed to be more data
+		} else if sfr.pos < sfr.total {
+			err = nil // There is still an implicit sparse hole at the end
+		}
 	}
 
-	if err == io.EOF && sfr.pos < sfr.tot {
-		// We reached the end of the last fragment's data, but there's a final hole
-		err = nil
+	if sfr.pos == endPos {
+		sfr.sp = sfr.sp[1:] // We are done with this fragment, so pop it
 	}
-	return
+	return n, err
 }
 
 // numBytes returns the number of bytes left to read in the sparse file's
 // sparse-encoded data in the tar archive.
 func (sfr *sparseFileReader) numBytes() int64 {
-	return sfr.rfr.nb
+	return sfr.rfr.numBytes()
 }
diff --git a/archive/tar/reader_test.go b/archive/tar/reader_test.go
index da01f26..bca0c05 100644
--- a/archive/tar/reader_test.go
+++ b/archive/tar/reader_test.go
@@ -10,6 +10,7 @@ import (
 	"fmt"
 	"io"
 	"io/ioutil"
+	"math"
 	"os"
 	"reflect"
 	"strings"
@@ -560,80 +561,155 @@ func TestSparseEndToEnd(t *testing.T) {
 	}
 }
 
-type sparseFileReadTest struct {
-	sparseData []byte
-	sparseMap  []sparseEntry
-	realSize   int64
-	expected   []byte
-}
-
-var sparseFileReadTests = []sparseFileReadTest{
-	{
-		sparseData: []byte("abcde"),
-		sparseMap: []sparseEntry{
-			{offset: 0, numBytes: 2},
-			{offset: 5, numBytes: 3},
-		},
-		realSize: 8,
-		expected: []byte("ab\x00\x00\x00cde"),
-	},
-	{
-		sparseData: []byte("abcde"),
-		sparseMap: []sparseEntry{
-			{offset: 0, numBytes: 2},
-			{offset: 5, numBytes: 3},
-		},
-		realSize: 10,
-		expected: []byte("ab\x00\x00\x00cde\x00\x00"),
-	},
-	{
-		sparseData: []byte("abcde"),
-		sparseMap: []sparseEntry{
-			{offset: 1, numBytes: 3},
-			{offset: 6, numBytes: 2},
-		},
-		realSize: 8,
-		expected: []byte("\x00abc\x00\x00de"),
-	},
-	{
-		sparseData: []byte("abcde"),
-		sparseMap: []sparseEntry{
-			{offset: 1, numBytes: 3},
-			{offset: 6, numBytes: 2},
-		},
-		realSize: 10,
-		expected: []byte("\x00abc\x00\x00de\x00\x00"),
-	},
-	{
-		sparseData: []byte(""),
-		sparseMap:  nil,
-		realSize:   2,
-		expected:   []byte("\x00\x00"),
-	},
-}
-
 func TestSparseFileReader(t *testing.T) {
-	for i, test := range sparseFileReadTests {
-		r := bytes.NewReader(test.sparseData)
-		nb := int64(r.Len())
-		sfr := &sparseFileReader{
-			rfr: &regFileReader{r: r, nb: nb},
-			sp:  test.sparseMap,
-			pos: 0,
-			tot: test.realSize,
-		}
-		if sfr.numBytes() != nb {
-			t.Errorf("test %d: Before reading, sfr.numBytes() = %d, want %d", i, sfr.numBytes(), nb)
-		}
-		buf, err := ioutil.ReadAll(sfr)
+	var vectors = []struct {
+		realSize   int64         // Real size of the output file
+		sparseMap  []sparseEntry // Input sparse map
+		sparseData string        // Input compact data
+		expected   string        // Expected output data
+		err        error         // Expected error outcome
+	}{{
+		realSize: 8,
+		sparseMap: []sparseEntry{
+			{offset: 0, numBytes: 2},
+			{offset: 5, numBytes: 3},
+		},
+		sparseData: "abcde",
+		expected:   "ab\x00\x00\x00cde",
+	}, {
+		realSize: 10,
+		sparseMap: []sparseEntry{
+			{offset: 0, numBytes: 2},
+			{offset: 5, numBytes: 3},
+		},
+		sparseData: "abcde",
+		expected:   "ab\x00\x00\x00cde\x00\x00",
+	}, {
+		realSize: 8,
+		sparseMap: []sparseEntry{
+			{offset: 1, numBytes: 3},
+			{offset: 6, numBytes: 2},
+		},
+		sparseData: "abcde",
+		expected:   "\x00abc\x00\x00de",
+	}, {
+		realSize: 8,
+		sparseMap: []sparseEntry{
+			{offset: 1, numBytes: 3},
+			{offset: 6, numBytes: 0},
+			{offset: 6, numBytes: 0},
+			{offset: 6, numBytes: 2},
+		},
+		sparseData: "abcde",
+		expected:   "\x00abc\x00\x00de",
+	}, {
+		realSize: 10,
+		sparseMap: []sparseEntry{
+			{offset: 1, numBytes: 3},
+			{offset: 6, numBytes: 2},
+		},
+		sparseData: "abcde",
+		expected:   "\x00abc\x00\x00de\x00\x00",
+	}, {
+		realSize: 10,
+		sparseMap: []sparseEntry{
+			{offset: 1, numBytes: 3},
+			{offset: 6, numBytes: 2},
+			{offset: 8, numBytes: 0},
+			{offset: 8, numBytes: 0},
+			{offset: 8, numBytes: 0},
+			{offset: 8, numBytes: 0},
+		},
+		sparseData: "abcde",
+		expected:   "\x00abc\x00\x00de\x00\x00",
+	}, {
+		realSize:   2,
+		sparseMap:  []sparseEntry{},
+		sparseData: "",
+		expected:   "\x00\x00",
+	}, {
+		realSize:  -2,
+		sparseMap: []sparseEntry{},
+		err:       ErrHeader,
+	}, {
+		realSize: -10,
+		sparseMap: []sparseEntry{
+			{offset: 1, numBytes: 3},
+			{offset: 6, numBytes: 2},
+		},
+		sparseData: "abcde",
+		err:        ErrHeader,
+	}, {
+		realSize: 10,
+		sparseMap: []sparseEntry{
+			{offset: 1, numBytes: 3},
+			{offset: 6, numBytes: 5},
+		},
+		sparseData: "abcde",
+		err:        ErrHeader,
+	}, {
+		realSize: 35,
+		sparseMap: []sparseEntry{
+			{offset: 1, numBytes: 3},
+			{offset: 6, numBytes: 5},
+		},
+		sparseData: "abcde",
+		err:        io.ErrUnexpectedEOF,
+	}, {
+		realSize: 35,
+		sparseMap: []sparseEntry{
+			{offset: 1, numBytes: 3},
+			{offset: 6, numBytes: -5},
+		},
+		sparseData: "abcde",
+		err:        ErrHeader,
+	}, {
+		realSize: 35,
+		sparseMap: []sparseEntry{
+			{offset: math.MaxInt64, numBytes: 3},
+			{offset: 6, numBytes: -5},
+		},
+		sparseData: "abcde",
+		err:        ErrHeader,
+	}, {
+		realSize: 10,
+		sparseMap: []sparseEntry{
+			{offset: 1, numBytes: 3},
+			{offset: 2, numBytes: 2},
+		},
+		sparseData: "abcde",
+		err:        ErrHeader,
+	}}
+
+	for i, v := range vectors {
+		r := bytes.NewReader([]byte(v.sparseData))
+		rfr := &regFileReader{r: r, nb: int64(len(v.sparseData))}
+
+		var sfr *sparseFileReader
+		var err error
+		var buf []byte
+
+		sfr, err = newSparseFileReader(rfr, v.sparseMap, v.realSize)
 		if err != nil {
-			t.Errorf("test %d: Unexpected error: %v", i, err)
+			goto fail
 		}
-		if e := test.expected; !bytes.Equal(buf, e) {
-			t.Errorf("test %d: Contents = %v, want %v", i, buf, e)
+		if sfr.numBytes() != int64(len(v.sparseData)) {
+			t.Errorf("test %d, numBytes() before reading: got %d, want %d", i, sfr.numBytes(), len(v.sparseData))
+		}
+		buf, err = ioutil.ReadAll(sfr)
+		if err != nil {
+			goto fail
+		}
+		if string(buf) != v.expected {
+			t.Errorf("test %d, ReadAll(): got %q, want %q", i, string(buf), v.expected)
 		}
 		if sfr.numBytes() != 0 {
-			t.Errorf("test %d: After draining the reader, numBytes() was nonzero", i)
+			t.Errorf("test %d, numBytes() after reading: got %d, want %d", i, sfr.numBytes(), 0)
+		}
+
+	fail:
+		if err != v.err {
+			t.Errorf("test %d, unexpected error: got %v, want %v", i, err, v.err)
 		}
 	}
 }
@@ -646,10 +722,10 @@ func TestSparseIncrementalRead(t *testing.T) {
 	r := bytes.NewReader(sparseData)
 	nb := int64(r.Len())
 	sfr := &sparseFileReader{
-		rfr: &regFileReader{r: r, nb: nb},
-		sp:  sparseMap,
-		pos: 0,
-		tot: int64(len(expected)),
+		rfr:   &regFileReader{r: r, nb: nb},
+		sp:    sparseMap,
+		pos:   0,
+		total: int64(len(expected)),
 	}
 
 	// We'll read the data 6 bytes at a time, with a hole of size 10 at
@@ -747,6 +823,11 @@ func TestUninitializedRead(t *testing.T) {
 
 }
 
+// TODO(dsnet): TestNegativeHdrSize, TestIssue10968, and TestIssue11169 tests
+// that Reader properly handles corrupted tar files. Given the increasing number
+// of invalid/malicious that can crash Reader, we should modify TestReader to
+// be able to test that intentionally corrupt tar files don't succeed or crash.
+
 // Negative header size should not cause panic.
 // Issues 10959 and 10960.
 func TestNegativeHdrSize(t *testing.T) {
@@ -771,14 +852,11 @@ func TestIssue10968(t *testing.T) {
 		t.Fatal(err)
 	}
 	defer f.Close()
+
 	r := NewReader(f)
 	_, err = r.Next()
-	if err != nil {
-		t.Fatal(err)
-	}
-	_, err = io.Copy(ioutil.Discard, r)
-	if err != io.ErrUnexpectedEOF {
-		t.Fatalf("expected %q, got %q", io.ErrUnexpectedEOF, err)
+	if err == nil {
+		t.Fatal("Unexpected success")
 	}
 }
 

From f0fc67b3a8643a174215d1e514d25414feb83dcf Mon Sep 17 00:00:00 2001
From: Joe Tsai <joetsai@digital-static.net>
Date: Thu, 1 Oct 2015 03:08:18 -0700
Subject: [PATCH 66/95] archive/tar: make Reader.Read errors persistent

If the stream is in an inconsistent state, it does not make sense
that Reader.Read can be called and possibly succeed.

Change-Id: I9d1c5a1300b2c2b45232188aa7999e350809dcf2
Reviewed-on: https://go-review.googlesource.com/15177
Reviewed-by: Brad Fitzpatrick <bradfitz@golang.org>
Run-TryBot: Brad Fitzpatrick <bradfitz@golang.org>
---
 archive/tar/reader.go | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/archive/tar/reader.go b/archive/tar/reader.go
index 1f57508..7d05d7d 100644
--- a/archive/tar/reader.go
+++ b/archive/tar/reader.go
@@ -871,9 +871,13 @@ func (tr *Reader) numBytes() int64 {
 // It returns 0, io.EOF when it reaches the end of that entry,
 // until Next is called to advance to the next entry.
 func (tr *Reader) Read(b []byte) (n int, err error) {
+	if tr.err != nil {
+		return 0, tr.err
+	}
 	if tr.curr == nil {
 		return 0, io.EOF
 	}
+
 	n, err = tr.curr.Read(b)
 	if err != nil && err != io.EOF {
 		tr.err = err

From 4ad443d1668a7ac6cfe49b02265247bb6fb636fa Mon Sep 17 00:00:00 2001
From: Joe Tsai <joetsai@digital-static.net>
Date: Thu, 1 Oct 2015 02:59:49 -0700
Subject: [PATCH 67/95] archive/tar: expand abilities of TestReader

Motivation:
* There are an increasing number of "one-off" corrupt files added
to make sure that package does not succeed or crash on them.
Instead, allow for the test to specify the error that is expected
to occur (if any).
* Also, fold in the logic to check the MD5 checksum into this
function.

The following tests are being removed:
* TestIncrementalRead: Done by TestReader by using io.CopyBuffer
with a buffer of 8. This achieves the same behavior as this test.
* TestSparseEndToEnd: Since TestReader checks the MD5 checksums
if the input corpus provides them, then this is redundant.
* TestSparseIncrementalRead: Redundant for the same reasons that
TestIncrementalRead is now redundant
* TestNegativeHdrSize: Added to TestReader corpus
* TestIssue10968: Added to TestReader corpus
* TestIssue11169: Added to TestReader corpus

With this change, code coverage did not change: 85.3%

Change-Id: I8550d48657d4dbb8f47dfc3dc280758ef73b47ec
Reviewed-on: https://go-review.googlesource.com/15176
Reviewed-by: Andrew Gerrand <adg@golang.org>
---
 archive/tar/reader_test.go | 296 ++++++++++---------------------------
 1 file changed, 81 insertions(+), 215 deletions(-)

diff --git a/archive/tar/reader_test.go b/archive/tar/reader_test.go
index bca0c05..4d065a9 100644
--- a/archive/tar/reader_test.go
+++ b/archive/tar/reader_test.go
@@ -19,9 +19,10 @@ import (
 )
 
 type untarTest struct {
-	file    string
-	headers []*Header
-	cksums  []string
+	file    string    // Test input file
+	headers []*Header // Expected output headers
+	chksums []string  // MD5 checksum of files, leave as nil if not checked
+	err     error     // Expected error to occur
 }
 
 var gnuTarTest = &untarTest{
@@ -50,7 +51,7 @@ var gnuTarTest = &untarTest{
 			Gname:    "eng",
 		},
 	},
-	cksums: []string{
+	chksums: []string{
 		"e38b27eaccb4391bdec553a7f3ae6b2f",
 		"c65bd2e50a56a2138bf1716f2fd56fe9",
 	},
@@ -130,7 +131,7 @@ var sparseTarTest = &untarTest{
 			Devminor: 0,
 		},
 	},
-	cksums: []string{
+	chksums: []string{
 		"6f53234398c2449fe67c1812d993012f",
 		"6f53234398c2449fe67c1812d993012f",
 		"6f53234398c2449fe67c1812d993012f",
@@ -287,37 +288,93 @@ var untarTests = []*untarTest{
 			},
 		},
 	},
+	{
+		file: "testdata/neg-size.tar",
+		err:  ErrHeader,
+	},
+	{
+		file: "testdata/issue10968.tar",
+		err:  ErrHeader,
+	},
+	{
+		file: "testdata/issue11169.tar",
+		// TODO(dsnet): Currently the library does not detect that this file is
+		// malformed. Instead it incorrectly believes that file just ends.
+		// err:  ErrHeader,
+	},
 }
 
 func TestReader(t *testing.T) {
-testLoop:
-	for i, test := range untarTests {
-		f, err := os.Open(test.file)
+	for i, v := range untarTests {
+		f, err := os.Open(v.file)
 		if err != nil {
-			t.Errorf("test %d: Unexpected error: %v", i, err)
+			t.Errorf("file %s, test %d: unexpected error: %v", v.file, i, err)
 			continue
 		}
 		defer f.Close()
-		tr := NewReader(f)
-		for j, header := range test.headers {
-			hdr, err := tr.Next()
-			if err != nil || hdr == nil {
-				t.Errorf("test %d, entry %d: Didn't get entry: %v", i, j, err)
-				f.Close()
-				continue testLoop
+
+		// Capture all headers and checksums.
+		var (
+			tr      = NewReader(f)
+			hdrs    []*Header
+			chksums []string
+			rdbuf   = make([]byte, 8)
+		)
+		for {
+			var hdr *Header
+			hdr, err = tr.Next()
+			if err != nil {
+				if err == io.EOF {
+					err = nil // Expected error
+				}
+				break
 			}
-			if !reflect.DeepEqual(*hdr, *header) {
-				t.Errorf("test %d, entry %d: Incorrect header:\nhave %+v\nwant %+v",
-					i, j, *hdr, *header)
+			hdrs = append(hdrs, hdr)
+
+			if v.chksums == nil {
+				continue
+			}
+			h := md5.New()
+			_, err = io.CopyBuffer(h, tr, rdbuf) // Effectively an incremental read
+			if err != nil {
+				break
+			}
+			chksums = append(chksums, fmt.Sprintf("%x", h.Sum(nil)))
+		}
+
+		for j, hdr := range hdrs {
+			if j >= len(v.headers) {
+				t.Errorf("file %s, test %d, entry %d: unexpected header:\ngot %+v",
+					v.file, i, j, *hdr)
+				continue
+			}
+			if !reflect.DeepEqual(*hdr, *v.headers[j]) {
+				t.Errorf("file %s, test %d, entry %d: incorrect header:\ngot  %+v\nwant %+v",
+					v.file, i, j, *hdr, *v.headers[j])
 			}
 		}
-		hdr, err := tr.Next()
-		if err == io.EOF {
-			continue testLoop
+		if len(hdrs) != len(v.headers) {
+			t.Errorf("file %s, test %d: got %d headers, want %d headers",
+				v.file, i, len(hdrs), len(v.headers))
 		}
-		if hdr != nil || err != nil {
-			t.Errorf("test %d: Unexpected entry or error: hdr=%v err=%v", i, hdr, err)
+
+		for j, sum := range chksums {
+			if j >= len(v.chksums) {
+				t.Errorf("file %s, test %d, entry %d: unexpected sum: got %s",
+					v.file, i, j, sum)
+				continue
+			}
+			if sum != v.chksums[j] {
+				t.Errorf("file %s, test %d, entry %d: incorrect checksum: got %s, want %s",
+					v.file, i, j, sum, v.chksums[j])
+			}
 		}
+
+		if err != v.err {
+			t.Errorf("file %s, test %d: unexpected error: got %v, want %v",
+				v.file, i, err, v.err)
+		}
+		f.Close()
 	}
 }
 
@@ -357,60 +414,6 @@ func TestPartialRead(t *testing.T) {
 	}
 }
 
-func TestIncrementalRead(t *testing.T) {
-	test := gnuTarTest
-	f, err := os.Open(test.file)
-	if err != nil {
-		t.Fatalf("Unexpected error: %v", err)
-	}
-	defer f.Close()
-
-	tr := NewReader(f)
-
-	headers := test.headers
-	cksums := test.cksums
-	nread := 0
-
-	// loop over all files
-	for ; ; nread++ {
-		hdr, err := tr.Next()
-		if hdr == nil || err == io.EOF {
-			break
-		}
-
-		// check the header
-		if !reflect.DeepEqual(*hdr, *headers[nread]) {
-			t.Errorf("Incorrect header:\nhave %+v\nwant %+v",
-				*hdr, headers[nread])
-		}
-
-		// read file contents in little chunks EOF,
-		// checksumming all the way
-		h := md5.New()
-		rdbuf := make([]uint8, 8)
-		for {
-			nr, err := tr.Read(rdbuf)
-			if err == io.EOF {
-				break
-			}
-			if err != nil {
-				t.Errorf("Read: unexpected error %v\n", err)
-				break
-			}
-			h.Write(rdbuf[0:nr])
-		}
-		// verify checksum
-		have := fmt.Sprintf("%x", h.Sum(nil))
-		want := cksums[nread]
-		if want != have {
-			t.Errorf("Bad checksum on file %s:\nhave %+v\nwant %+v", hdr.Name, have, want)
-		}
-	}
-	if nread != len(headers) {
-		t.Errorf("Didn't process all files\nexpected: %d\nprocessed %d\n", len(headers), nread)
-	}
-}
-
 func TestNonSeekable(t *testing.T) {
 	test := gnuTarTest
 	f, err := os.Open(test.file)
@@ -515,52 +518,6 @@ func TestMergePAX(t *testing.T) {
 	}
 }
 
-func TestSparseEndToEnd(t *testing.T) {
-	test := sparseTarTest
-	f, err := os.Open(test.file)
-	if err != nil {
-		t.Fatalf("Unexpected error: %v", err)
-	}
-	defer f.Close()
-
-	tr := NewReader(f)
-
-	headers := test.headers
-	cksums := test.cksums
-	nread := 0
-
-	// loop over all files
-	for ; ; nread++ {
-		hdr, err := tr.Next()
-		if hdr == nil || err == io.EOF {
-			break
-		}
-
-		// check the header
-		if !reflect.DeepEqual(*hdr, *headers[nread]) {
-			t.Errorf("Incorrect header:\nhave %+v\nwant %+v",
-				*hdr, headers[nread])
-		}
-
-		// read and checksum the file data
-		h := md5.New()
-		_, err = io.Copy(h, tr)
-		if err != nil {
-			t.Fatalf("Unexpected error: %v", err)
-		}
-
-		// verify checksum
-		have := fmt.Sprintf("%x", h.Sum(nil))
-		want := cksums[nread]
-		if want != have {
-			t.Errorf("Bad checksum on file %s:\nhave %+v\nwant %+v", hdr.Name, have, want)
-		}
-	}
-	if nread != len(headers) {
-		t.Errorf("Didn't process all files\nexpected: %d\nprocessed %d\n", len(headers), nread)
-	}
-}
-
 func TestSparseFileReader(t *testing.T) {
 	var vectors = []struct {
 		realSize   int64         // Real size of the output file
@@ -714,45 +671,6 @@ func TestSparseFileReader(t *testing.T) {
 	}
 }
 
-func TestSparseIncrementalRead(t *testing.T) {
-	sparseMap := []sparseEntry{{10, 2}}
-	sparseData := []byte("Go")
-	expected := "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00Go\x00\x00\x00\x00\x00\x00\x00\x00"
-
-	r := bytes.NewReader(sparseData)
-	nb := int64(r.Len())
-	sfr := &sparseFileReader{
-		rfr:   &regFileReader{r: r, nb: nb},
-		sp:    sparseMap,
-		pos:   0,
-		total: int64(len(expected)),
-	}
-
-	// We'll read the data 6 bytes at a time, with a hole of size 10 at
-	// the beginning and one of size 8 at the end.
-	var outputBuf bytes.Buffer
-	buf := make([]byte, 6)
-	for {
-		n, err := sfr.Read(buf)
-		if err == io.EOF {
-			break
-		}
-		if err != nil {
-			t.Errorf("Read: unexpected error %v\n", err)
-		}
-		if n > 0 {
-			_, err := outputBuf.Write(buf[:n])
-			if err != nil {
-				t.Errorf("Write: unexpected error %v\n", err)
-			}
-		}
-	}
-	got := outputBuf.String()
-	if got != expected {
-		t.Errorf("Contents = %v, want %v", got, expected)
-	}
-}
-
 func TestReadGNUSparseMap0x1(t *testing.T) {
 	headers := map[string]string{
 		paxGNUSparseNumBlocks: "4",
@@ -822,55 +740,3 @@ func TestUninitializedRead(t *testing.T) {
 	}
 
 }
-
-// TODO(dsnet): TestNegativeHdrSize, TestIssue10968, and TestIssue11169 tests
-// that Reader properly handles corrupted tar files. Given the increasing number
-// of invalid/malicious that can crash Reader, we should modify TestReader to
-// be able to test that intentionally corrupt tar files don't succeed or crash.
-
-// Negative header size should not cause panic.
-// Issues 10959 and 10960.
-func TestNegativeHdrSize(t *testing.T) {
-	f, err := os.Open("testdata/neg-size.tar")
-	if err != nil {
-		t.Fatal(err)
-	}
-	defer f.Close()
-	r := NewReader(f)
-	_, err = r.Next()
-	if err != ErrHeader {
-		t.Error("want ErrHeader, got", err)
-	}
-	io.Copy(ioutil.Discard, r)
-}
-
-// This used to hang in (*sparseFileReader).readHole due to missing
-// verification of sparse offsets against file size.
-func TestIssue10968(t *testing.T) {
-	f, err := os.Open("testdata/issue10968.tar")
-	if err != nil {
-		t.Fatal(err)
-	}
-	defer f.Close()
-
-	r := NewReader(f)
-	_, err = r.Next()
-	if err == nil {
-		t.Fatal("Unexpected success")
-	}
-}
-
-// Do not panic if there are errors in header blocks after the pax header.
-// Issue 11169
-func TestIssue11169(t *testing.T) {
-	f, err := os.Open("testdata/issue11169.tar")
-	if err != nil {
-		t.Fatal(err)
-	}
-	defer f.Close()
-	r := NewReader(f)
-	_, err = r.Next()
-	if err == nil {
-		t.Fatal("Unexpected success")
-	}
-}

From cb423795ebbea7ab1f8570fa6811ffbd43c04c96 Mon Sep 17 00:00:00 2001
From: Joe Tsai <joetsai@digital-static.net>
Date: Tue, 6 Oct 2015 01:04:18 -0700
Subject: [PATCH 68/95] archive/tar: add missing error checks to Reader.Next

A recursive call to Reader.Next did not check the error before
trying to use the result, leading to a nil pointer panic.
This specific CL addresses the immediate issue, which is the panic,
but does not solve the root issue, which is due to an integer
overflow in the base-256 parser.

Updates #12435

Change-Id: Ia908671f0f411a409a35e24f2ebf740d46734072
Reviewed-on: https://go-review.googlesource.com/15437
Run-TryBot: Brad Fitzpatrick <bradfitz@golang.org>
Reviewed-by: Brad Fitzpatrick <bradfitz@golang.org>
TryBot-Result: Gobot Gobot <gobot@golang.org>
---
 archive/tar/reader.go      | 31 ++++++--------
 archive/tar/reader_test.go | 87 +++++++++++++++++++++++++++++++-------
 2 files changed, 85 insertions(+), 33 deletions(-)

diff --git a/archive/tar/reader.go b/archive/tar/reader.go
index 7d05d7d..dc23085 100644
--- a/archive/tar/reader.go
+++ b/archive/tar/reader.go
@@ -820,40 +820,37 @@ func readGNUSparseMap1x0(r io.Reader) ([]sparseEntry, error) {
 	return sp, nil
 }
 
-// readGNUSparseMap0x1 reads the sparse map as stored in GNU's PAX sparse format version 0.1.
-// The sparse map is stored in the PAX headers.
-func readGNUSparseMap0x1(headers map[string]string) ([]sparseEntry, error) {
-	// Get number of entries
-	numEntriesStr, ok := headers[paxGNUSparseNumBlocks]
-	if !ok {
-		return nil, ErrHeader
-	}
-	numEntries, err := strconv.ParseInt(numEntriesStr, 10, 0)
-	if err != nil {
+// readGNUSparseMap0x1 reads the sparse map as stored in GNU's PAX sparse format
+// version 0.1. The sparse map is stored in the PAX headers.
+func readGNUSparseMap0x1(extHdrs map[string]string) ([]sparseEntry, error) {
+	// Get number of entries.
+	// Use integer overflow resistant math to check this.
+	numEntriesStr := extHdrs[paxGNUSparseNumBlocks]
+	numEntries, err := strconv.ParseInt(numEntriesStr, 10, 0) // Intentionally parse as native int
+	if err != nil || numEntries < 0 || int(2*numEntries) < int(numEntries) {
 		return nil, ErrHeader
 	}
 
-	sparseMap := strings.Split(headers[paxGNUSparseMap], ",")
-
-	// There should be two numbers in sparseMap for each entry
+	// There should be two numbers in sparseMap for each entry.
+	sparseMap := strings.Split(extHdrs[paxGNUSparseMap], ",")
 	if int64(len(sparseMap)) != 2*numEntries {
 		return nil, ErrHeader
 	}
 
-	// Loop through the entries in the sparse map
+	// Loop through the entries in the sparse map.
+	// numEntries is trusted now.
 	sp := make([]sparseEntry, 0, numEntries)
 	for i := int64(0); i < numEntries; i++ {
-		offset, err := strconv.ParseInt(sparseMap[2*i], 10, 0)
+		offset, err := strconv.ParseInt(sparseMap[2*i], 10, 64)
 		if err != nil {
 			return nil, ErrHeader
 		}
-		numBytes, err := strconv.ParseInt(sparseMap[2*i+1], 10, 0)
+		numBytes, err := strconv.ParseInt(sparseMap[2*i+1], 10, 64)
 		if err != nil {
 			return nil, ErrHeader
 		}
 		sp = append(sp, sparseEntry{offset: offset, numBytes: numBytes})
 	}
-
 	return sp, nil
 }
 
diff --git a/archive/tar/reader_test.go b/archive/tar/reader_test.go
index 4d065a9..d9d089b 100644
--- a/archive/tar/reader_test.go
+++ b/archive/tar/reader_test.go
@@ -672,23 +672,78 @@ func TestSparseFileReader(t *testing.T) {
 }
 
 func TestReadGNUSparseMap0x1(t *testing.T) {
-	headers := map[string]string{
-		paxGNUSparseNumBlocks: "4",
-		paxGNUSparseMap:       "0,5,10,5,20,5,30,5",
-	}
-	expected := []sparseEntry{
-		{offset: 0, numBytes: 5},
-		{offset: 10, numBytes: 5},
-		{offset: 20, numBytes: 5},
-		{offset: 30, numBytes: 5},
-	}
+	const (
+		maxUint = ^uint(0)
+		maxInt  = int(maxUint >> 1)
+	)
+	var (
+		big1 = fmt.Sprintf("%d", int64(maxInt))
+		big2 = fmt.Sprintf("%d", (int64(maxInt)/2)+1)
+		big3 = fmt.Sprintf("%d", (int64(maxInt) / 3))
+	)
 
-	sp, err := readGNUSparseMap0x1(headers)
-	if err != nil {
-		t.Errorf("Unexpected error: %v", err)
-	}
-	if !reflect.DeepEqual(sp, expected) {
-		t.Errorf("Incorrect sparse map: got %v, wanted %v", sp, expected)
+	var vectors = []struct {
+		extHdrs   map[string]string // Input data
+		sparseMap []sparseEntry     // Expected sparse entries to be outputted
+		err       error             // Expected errors that may be raised
+	}{{
+		extHdrs: map[string]string{paxGNUSparseNumBlocks: "-4"},
+		err:     ErrHeader,
+	}, {
+		extHdrs: map[string]string{paxGNUSparseNumBlocks: "fee "},
+		err:     ErrHeader,
+	}, {
+		extHdrs: map[string]string{
+			paxGNUSparseNumBlocks: big1,
+			paxGNUSparseMap:       "0,5,10,5,20,5,30,5",
+		},
+		err: ErrHeader,
+	}, {
+		extHdrs: map[string]string{
+			paxGNUSparseNumBlocks: big2,
+			paxGNUSparseMap:       "0,5,10,5,20,5,30,5",
+		},
+		err: ErrHeader,
+	}, {
+		extHdrs: map[string]string{
+			paxGNUSparseNumBlocks: big3,
+			paxGNUSparseMap:       "0,5,10,5,20,5,30,5",
+		},
+		err: ErrHeader,
+	}, {
+		extHdrs: map[string]string{
+			paxGNUSparseNumBlocks: "4",
+			paxGNUSparseMap:       "0.5,5,10,5,20,5,30,5",
+		},
+		err: ErrHeader,
+	}, {
+		extHdrs: map[string]string{
+			paxGNUSparseNumBlocks: "4",
+			paxGNUSparseMap:       "0,5.5,10,5,20,5,30,5",
+		},
+		err: ErrHeader,
+	}, {
+		extHdrs: map[string]string{
+			paxGNUSparseNumBlocks: "4",
+			paxGNUSparseMap:       "0,fewafewa.5,fewafw,5,20,5,30,5",
+		},
+		err: ErrHeader,
+	}, {
+		extHdrs: map[string]string{
+			paxGNUSparseNumBlocks: "4",
+			paxGNUSparseMap:       "0,5,10,5,20,5,30,5",
+		},
+		sparseMap: []sparseEntry{{0, 5}, {10, 5}, {20, 5}, {30, 5}},
+	}}
+
+	for i, v := range vectors {
+		sp, err := readGNUSparseMap0x1(v.extHdrs)
+		if !reflect.DeepEqual(sp, v.sparseMap) && !(len(sp) == 0 && len(v.sparseMap) == 0) {
+			t.Errorf("test %d, readGNUSparseMap0x1(...): got %v, want %v", i, sp, v.sparseMap)
+		}
+		if err != v.err {
+			t.Errorf("test %d, unexpected error: got %v, want %v", i, err, v.err)
+		}
 	}
 }
 

From cf83c95de838674ba781bb4d0684a3e77c1bfc87 Mon Sep 17 00:00:00 2001
From: Joe Tsai <joetsai@digital-static.net>
Date: Thu, 1 Oct 2015 01:04:24 -0700
Subject: [PATCH 69/95] archive/tar: fix numeric overflow issues in
 readGNUSparseMap0x1

Motivation:
* The logic to verify the numEntries can overflow and incorrectly
pass, allowing a malicious file to allocate arbitrary memory.
* The use of strconv.ParseInt does not set the integer precision
to 64bit, causing this code to work incorrectly on 32bit machines.

Change-Id: I1b1571a750a84f2dde97cc329ed04fe2342aaa60
Reviewed-on: https://go-review.googlesource.com/15173
Reviewed-by: Brad Fitzpatrick <bradfitz@golang.org>
Run-TryBot: Brad Fitzpatrick <bradfitz@golang.org>
TryBot-Result: Gobot Gobot <gobot@golang.org>
---
 archive/tar/reader.go      |  57 +++++++++++---
 archive/tar/reader_test.go | 156 ++++++++++++++++++++++++++++++-------
 2 files changed, 173 insertions(+), 40 deletions(-)

diff --git a/archive/tar/reader.go b/archive/tar/reader.go
index dc23085..cce9d23 100644
--- a/archive/tar/reader.go
+++ b/archive/tar/reader.go
@@ -504,20 +504,48 @@ func (tr *Reader) octal(b []byte) int64 {
 	return int64(x)
 }
 
-// skipUnread skips any unread bytes in the existing file entry, as well as any alignment padding.
-func (tr *Reader) skipUnread() {
-	nr := tr.numBytes() + tr.pad // number of bytes to skip
+// skipUnread skips any unread bytes in the existing file entry, as well as any
+// alignment padding. It returns io.ErrUnexpectedEOF if any io.EOF is
+// encountered in the data portion; it is okay to hit io.EOF in the padding.
+//
+// Note that this function still works properly even when sparse files are being
+// used since numBytes returns the bytes remaining in the underlying io.Reader.
+func (tr *Reader) skipUnread() error {
+	dataSkip := tr.numBytes()      // Number of data bytes to skip
+	totalSkip := dataSkip + tr.pad // Total number of bytes to skip
 	tr.curr, tr.pad = nil, 0
 	if tr.RawAccounting {
-		_, tr.err = io.CopyN(tr.rawBytes, tr.r, nr)
-		return
+		_, tr.err = io.CopyN(tr.rawBytes, tr.r, totalSkip)
+		return tr.err
 	}
-	if sr, ok := tr.r.(io.Seeker); ok {
-		if _, err := sr.Seek(nr, os.SEEK_CUR); err == nil {
-			return
+	// If possible, Seek to the last byte before the end of the data section.
+	// Do this because Seek is often lazy about reporting errors; this will mask
+	// the fact that the tar stream may be truncated. We can rely on the
+	// io.CopyN done shortly afterwards to trigger any IO errors.
+	var seekSkipped int64 // Number of bytes skipped via Seek
+	if sr, ok := tr.r.(io.Seeker); ok && dataSkip > 1 {
+		// Not all io.Seeker can actually Seek. For example, os.Stdin implements
+		// io.Seeker, but calling Seek always returns an error and performs
+		// no action. Thus, we try an innocent seek to the current position
+		// to see if Seek is really supported.
+		pos1, err := sr.Seek(0, os.SEEK_CUR)
+		if err == nil {
+			// Seek seems supported, so perform the real Seek.
+			pos2, err := sr.Seek(dataSkip-1, os.SEEK_CUR)
+			if err != nil {
+				tr.err = err
+				return tr.err
+			}
+			seekSkipped = pos2 - pos1
 		}
 	}
-	_, tr.err = io.CopyN(ioutil.Discard, tr.r, nr)
+
+	var copySkipped int64 // Number of bytes skipped via CopyN
+	copySkipped, tr.err = io.CopyN(ioutil.Discard, tr.r, totalSkip-seekSkipped)
+	if tr.err == io.EOF && seekSkipped+copySkipped < dataSkip {
+		tr.err = io.ErrUnexpectedEOF
+	}
+	return tr.err
 }
 
 func (tr *Reader) verifyChecksum(header []byte) bool {
@@ -530,6 +558,13 @@ func (tr *Reader) verifyChecksum(header []byte) bool {
 	return given == unsigned || given == signed
 }
 
+// readHeader reads the next block header and assumes that the underlying reader
+// is already aligned to a block boundary.
+//
+// The err will be set to io.EOF only when one of the following occurs:
+//	* Exactly 0 bytes are read and EOF is hit.
+//	* Exactly 1 block of zeros is read and EOF is hit.
+//	* At least 2 blocks of zeros are read.
 func (tr *Reader) readHeader() *Header {
 	header := tr.hdrBuff[:]
 	copy(header, zeroBlock)
@@ -541,7 +576,7 @@ func (tr *Reader) readHeader() *Header {
 				return nil
 			}
 		}
-		return nil
+		return nil // io.EOF is okay here
 	}
 	if tr.RawAccounting {
 		if _, tr.err = tr.rawBytes.Write(header); tr.err != nil {
@@ -558,7 +593,7 @@ func (tr *Reader) readHeader() *Header {
 					return nil
 				}
 			}
-			return nil
+			return nil // io.EOF is okay here
 		}
 		if tr.RawAccounting {
 			if _, tr.err = tr.rawBytes.Write(header); tr.err != nil {
diff --git a/archive/tar/reader_test.go b/archive/tar/reader_test.go
index d9d089b..90b8b46 100644
--- a/archive/tar/reader_test.go
+++ b/archive/tar/reader_test.go
@@ -414,35 +414,6 @@ func TestPartialRead(t *testing.T) {
 	}
 }
 
-func TestNonSeekable(t *testing.T) {
-	test := gnuTarTest
-	f, err := os.Open(test.file)
-	if err != nil {
-		t.Fatalf("Unexpected error: %v", err)
-	}
-	defer f.Close()
-
-	type readerOnly struct {
-		io.Reader
-	}
-	tr := NewReader(readerOnly{f})
-	nread := 0
-
-	for ; ; nread++ {
-		_, err := tr.Next()
-		if err == io.EOF {
-			break
-		}
-		if err != nil {
-			t.Fatalf("Unexpected error: %v", err)
-		}
-	}
-
-	if nread != len(test.headers) {
-		t.Errorf("Didn't process all files\nexpected: %d\nprocessed %d\n", len(test.headers), nread)
-	}
-}
-
 func TestParsePAXHeader(t *testing.T) {
 	paxTests := [][3]string{
 		{"a", "a=name", "10 a=name\n"}, // Test case involving multiple acceptable lengths
@@ -795,3 +766,130 @@ func TestUninitializedRead(t *testing.T) {
 	}
 
 }
+
+type reader struct{ io.Reader }
+type readSeeker struct{ io.ReadSeeker }
+type readBadSeeker struct{ io.ReadSeeker }
+
+func (rbs *readBadSeeker) Seek(int64, int) (int64, error) { return 0, fmt.Errorf("illegal seek") }
+
+// TestReadTruncation test the ending condition on various truncated files and
+// that truncated files are still detected even if the underlying io.Reader
+// satisfies io.Seeker.
+func TestReadTruncation(t *testing.T) {
+	var ss []string
+	for _, p := range []string{
+		"testdata/gnu.tar",
+		"testdata/ustar-file-reg.tar",
+		"testdata/pax-path-hdr.tar",
+		"testdata/sparse-formats.tar",
+	} {
+		buf, err := ioutil.ReadFile(p)
+		if err != nil {
+			t.Fatalf("unexpected error: %v", err)
+		}
+		ss = append(ss, string(buf))
+	}
+
+	data1, data2, pax, sparse := ss[0], ss[1], ss[2], ss[3]
+	data2 += strings.Repeat("\x00", 10*512)
+	trash := strings.Repeat("garbage ", 64) // Exactly 512 bytes
+
+	var vectors = []struct {
+		input string // Input stream
+		cnt   int    // Expected number of headers read
+		err   error  // Expected error outcome
+	}{
+		{"", 0, io.EOF}, // Empty file is a "valid" tar file
+		{data1[:511], 0, io.ErrUnexpectedEOF},
+		{data1[:512], 1, io.ErrUnexpectedEOF},
+		{data1[:1024], 1, io.EOF},
+		{data1[:1536], 2, io.ErrUnexpectedEOF},
+		{data1[:2048], 2, io.EOF},
+		{data1, 2, io.EOF},
+		{data1[:2048] + data2[:1536], 3, io.EOF},
+		{data2[:511], 0, io.ErrUnexpectedEOF},
+		{data2[:512], 1, io.ErrUnexpectedEOF},
+		{data2[:1195], 1, io.ErrUnexpectedEOF},
+		{data2[:1196], 1, io.EOF}, // Exact end of data and start of padding
+		{data2[:1200], 1, io.EOF},
+		{data2[:1535], 1, io.EOF},
+		{data2[:1536], 1, io.EOF}, // Exact end of padding
+		{data2[:1536] + trash[:1], 1, io.ErrUnexpectedEOF},
+		{data2[:1536] + trash[:511], 1, io.ErrUnexpectedEOF},
+		{data2[:1536] + trash, 1, ErrHeader},
+		{data2[:2048], 1, io.EOF}, // Exactly 1 empty block
+		{data2[:2048] + trash[:1], 1, io.ErrUnexpectedEOF},
+		{data2[:2048] + trash[:511], 1, io.ErrUnexpectedEOF},
+		{data2[:2048] + trash, 1, ErrHeader},
+		{data2[:2560], 1, io.EOF}, // Exactly 2 empty blocks (normal end-of-stream)
+		{data2[:2560] + trash[:1], 1, io.EOF},
+		{data2[:2560] + trash[:511], 1, io.EOF},
+		{data2[:2560] + trash, 1, io.EOF},
+		{data2[:3072], 1, io.EOF},
+		{pax, 0, io.EOF}, // PAX header without data is a "valid" tar file
+		{pax + trash[:1], 0, io.ErrUnexpectedEOF},
+		{pax + trash[:511], 0, io.ErrUnexpectedEOF},
+		{sparse[:511], 0, io.ErrUnexpectedEOF},
+		// TODO(dsnet): This should pass, but currently fails.
+		// {sparse[:512], 0, io.ErrUnexpectedEOF},
+		{sparse[:3584], 1, io.EOF},
+		{sparse[:9200], 1, io.EOF}, // Terminate in padding of sparse header
+		{sparse[:9216], 1, io.EOF},
+		{sparse[:9728], 2, io.ErrUnexpectedEOF},
+		{sparse[:10240], 2, io.EOF},
+		{sparse[:11264], 2, io.ErrUnexpectedEOF},
+		{sparse, 5, io.EOF},
+		{sparse + trash, 5, io.EOF},
+	}
+
+	for i, v := range vectors {
+		for j := 0; j < 6; j++ {
+			var tr *Reader
+			var s1, s2 string
+
+			switch j {
+			case 0:
+				tr = NewReader(&reader{strings.NewReader(v.input)})
+				s1, s2 = "io.Reader", "auto"
+			case 1:
+				tr = NewReader(&reader{strings.NewReader(v.input)})
+				s1, s2 = "io.Reader", "manual"
+			case 2:
+				tr = NewReader(&readSeeker{strings.NewReader(v.input)})
+				s1, s2 = "io.ReadSeeker", "auto"
+			case 3:
+				tr = NewReader(&readSeeker{strings.NewReader(v.input)})
+				s1, s2 = "io.ReadSeeker", "manual"
+			case 4:
+				tr = NewReader(&readBadSeeker{strings.NewReader(v.input)})
+				s1, s2 = "ReadBadSeeker", "auto"
+			case 5:
+				tr = NewReader(&readBadSeeker{strings.NewReader(v.input)})
+				s1, s2 = "ReadBadSeeker", "manual"
+			}
+
+			var cnt int
+			var err error
+			for {
+				if _, err = tr.Next(); err != nil {
+					break
+				}
+				cnt++
+				if s2 == "manual" {
+					if _, err = io.Copy(ioutil.Discard, tr); err != nil {
+						break
+					}
+				}
+			}
+			if err != v.err {
+				t.Errorf("test %d, NewReader(%s(...)) with %s discard: got %v, want %v",
+					i, s1, s2, err, v.err)
+			}
+			if cnt != v.cnt {
+				t.Errorf("test %d, NewReader(%s(...)) with %s discard: got %d headers, want %d headers",
+					i, s1, s2, cnt, v.cnt)
+			}
+		}
+	}
+}

From bffda594f770add2c260a42feaf0e1e3c0651a56 Mon Sep 17 00:00:00 2001
From: Joe Tsai <joetsai@digital-static.net>
Date: Thu, 1 Oct 2015 02:30:29 -0700
Subject: [PATCH 70/95] archive/tar: detect truncated files

Motivation:
* Reader.skipUnread never reports io.ErrUnexpectedEOF. This is strange
given that io.ErrUnexpectedEOF is given through Reader.Read if the
user manually reads the file.
* Reader.skipUnread fails to detect truncated files since io.Seeker
is lazy about reporting errors. Thus, the behavior of Reader differs
whether the input io.Reader also satisfies io.Seeker or not.

To solve this, we seek to one before the end of the data section and
always rely on at least one call to io.CopyN. If the tr.r satisfies
io.Seeker, this is guarunteed to never read more than blockSize.

Fixes #12557

Change-Id: I0ddddfc6bed0d74465cb7e7a02b26f1de7a7a279
Reviewed-on: https://go-review.googlesource.com/15175
Reviewed-by: Brad Fitzpatrick <bradfitz@golang.org>
Run-TryBot: Brad Fitzpatrick <bradfitz@golang.org>
TryBot-Result: Gobot Gobot <gobot@golang.org>
---
 archive/tar/writer.go      | 21 ++++++++++-----
 archive/tar/writer_test.go | 53 +++++++++++++++++++++++++++++++++++---
 2 files changed, 64 insertions(+), 10 deletions(-)

diff --git a/archive/tar/writer.go b/archive/tar/writer.go
index 3547c17..0165b22 100644
--- a/archive/tar/writer.go
+++ b/archive/tar/writer.go
@@ -12,8 +12,8 @@ import (
 	"errors"
 	"fmt"
 	"io"
-	"os"
 	"path"
+	"sort"
 	"strconv"
 	"strings"
 	"time"
@@ -288,11 +288,11 @@ func (tw *Writer) writePAXHeader(hdr *Header, paxHeaders map[string]string) erro
 	// succeed, and seems harmless enough.
 	ext.ModTime = hdr.ModTime
 	// The spec asks that we namespace our pseudo files
-	// with the current pid.
-	pid := os.Getpid()
+	// with the current pid.  However, this results in differing outputs
+	// for identical inputs.  As such, the constant 0 is now used instead.
+	// golang.org/issue/12358
 	dir, file := path.Split(hdr.Name)
-	fullName := path.Join(dir,
-		fmt.Sprintf("PaxHeaders.%d", pid), file)
+	fullName := path.Join(dir, "PaxHeaders.0", file)
 
 	ascii := toASCII(fullName)
 	if len(ascii) > 100 {
@@ -302,8 +302,15 @@ func (tw *Writer) writePAXHeader(hdr *Header, paxHeaders map[string]string) erro
 	// Construct the body
 	var buf bytes.Buffer
 
-	for k, v := range paxHeaders {
-		fmt.Fprint(&buf, paxHeader(k+"="+v))
+	// Keys are sorted before writing to body to allow deterministic output.
+	var keys []string
+	for k := range paxHeaders {
+		keys = append(keys, k)
+	}
+	sort.Strings(keys)
+
+	for _, k := range keys {
+		fmt.Fprint(&buf, paxHeader(k+"="+paxHeaders[k]))
 	}
 
 	ext.Size = int64(len(buf.Bytes()))
diff --git a/archive/tar/writer_test.go b/archive/tar/writer_test.go
index caf40a8..25d88dc 100644
--- a/archive/tar/writer_test.go
+++ b/archive/tar/writer_test.go
@@ -11,6 +11,7 @@ import (
 	"io/ioutil"
 	"os"
 	"reflect"
+	"sort"
 	"strings"
 	"testing"
 	"testing/iotest"
@@ -291,7 +292,7 @@ func TestPax(t *testing.T) {
 		t.Fatal(err)
 	}
 	// Simple test to make sure PAX extensions are in effect
-	if !bytes.Contains(buf.Bytes(), []byte("PaxHeaders.")) {
+	if !bytes.Contains(buf.Bytes(), []byte("PaxHeaders.0")) {
 		t.Fatal("Expected at least one PAX header to be written.")
 	}
 	// Test that we can get a long name back out of the archive.
@@ -330,7 +331,7 @@ func TestPaxSymlink(t *testing.T) {
 		t.Fatal(err)
 	}
 	// Simple test to make sure PAX extensions are in effect
-	if !bytes.Contains(buf.Bytes(), []byte("PaxHeaders.")) {
+	if !bytes.Contains(buf.Bytes(), []byte("PaxHeaders.0")) {
 		t.Fatal("Expected at least one PAX header to be written.")
 	}
 	// Test that we can get a long name back out of the archive.
@@ -380,7 +381,7 @@ func TestPaxNonAscii(t *testing.T) {
 		t.Fatal(err)
 	}
 	// Simple test to make sure PAX extensions are in effect
-	if !bytes.Contains(buf.Bytes(), []byte("PaxHeaders.")) {
+	if !bytes.Contains(buf.Bytes(), []byte("PaxHeaders.0")) {
 		t.Fatal("Expected at least one PAX header to be written.")
 	}
 	// Test that we can get a long name back out of the archive.
@@ -439,6 +440,52 @@ func TestPaxXattrs(t *testing.T) {
 	}
 }
 
+func TestPaxHeadersSorted(t *testing.T) {
+	fileinfo, err := os.Stat("testdata/small.txt")
+	if err != nil {
+		t.Fatal(err)
+	}
+	hdr, err := FileInfoHeader(fileinfo, "")
+	if err != nil {
+		t.Fatalf("os.Stat: %v", err)
+	}
+	contents := strings.Repeat(" ", int(hdr.Size))
+
+	hdr.Xattrs = map[string]string{
+		"foo": "foo",
+		"bar": "bar",
+		"baz": "baz",
+		"qux": "qux",
+	}
+
+	var buf bytes.Buffer
+	writer := NewWriter(&buf)
+	if err := writer.WriteHeader(hdr); err != nil {
+		t.Fatal(err)
+	}
+	if _, err = writer.Write([]byte(contents)); err != nil {
+		t.Fatal(err)
+	}
+	if err := writer.Close(); err != nil {
+		t.Fatal(err)
+	}
+	// Simple test to make sure PAX extensions are in effect
+	if !bytes.Contains(buf.Bytes(), []byte("PaxHeaders.0")) {
+		t.Fatal("Expected at least one PAX header to be written.")
+	}
+
+	// xattr bar should always appear before others
+	indices := []int{
+		bytes.Index(buf.Bytes(), []byte("bar=bar")),
+		bytes.Index(buf.Bytes(), []byte("baz=baz")),
+		bytes.Index(buf.Bytes(), []byte("foo=foo")),
+		bytes.Index(buf.Bytes(), []byte("qux=qux")),
+	}
+	if !sort.IntsAreSorted(indices) {
+		t.Fatal("PAX headers are not sorted")
+	}
+}
+
 func TestPAXHeader(t *testing.T) {
 	medName := strings.Repeat("CD", 50)
 	longName := strings.Repeat("AB", 100)

From 2424f4e36723fbc7a4e06fff5878a151ae270952 Mon Sep 17 00:00:00 2001
From: Matt Layher <mdlayher@gmail.com>
Date: Thu, 27 Aug 2015 14:52:06 -0400
Subject: [PATCH 71/95] archive/tar: make output deterministic

Replaces PID in PaxHeaders with 0.  Sorts PAX header keys before writing
them to the archive.

Fixes #12358

Change-Id: If239f89c85f1c9d9895a253fb06a47ad44960124
Reviewed-on: https://go-review.googlesource.com/13975
Reviewed-by: Russ Cox <rsc@golang.org>
Reviewed-by: Joe Tsai <joetsai@digital-static.net>
---
 archive/tar/common.go             |  11 ++++++++
 archive/tar/reader.go             |  24 +++++++++++------
 archive/tar/reader_test.go        |  43 ++++++++++++++++++++++++++++++
 archive/tar/testdata/hdr-only.tar | Bin 0 -> 10240 bytes
 archive/tar/testdata/neg-size.tar | Bin 512 -> 512 bytes
 5 files changed, 70 insertions(+), 8 deletions(-)
 create mode 100644 archive/tar/testdata/hdr-only.tar

diff --git a/archive/tar/common.go b/archive/tar/common.go
index c31df06..36f4e23 100644
--- a/archive/tar/common.go
+++ b/archive/tar/common.go
@@ -327,3 +327,14 @@ func toASCII(s string) string {
 	}
 	return buf.String()
 }
+
+// isHeaderOnlyType checks if the given type flag is of the type that has no
+// data section even if a size is specified.
+func isHeaderOnlyType(flag byte) bool {
+	switch flag {
+	case TypeLink, TypeSymlink, TypeChar, TypeBlock, TypeDir, TypeFifo:
+		return true
+	default:
+		return false
+	}
+}
diff --git a/archive/tar/reader.go b/archive/tar/reader.go
index cce9d23..6360b4e 100644
--- a/archive/tar/reader.go
+++ b/archive/tar/reader.go
@@ -179,6 +179,13 @@ func (tr *Reader) Next() (*Header, error) {
 			return nil, err
 		}
 		if sp != nil {
+			// Sparse files do not make sense when applied to the special header
+			// types that never have a data section.
+			if isHeaderOnlyType(hdr.Typeflag) {
+				tr.err = ErrHeader
+				return nil, tr.err
+			}
+
 			// Current file is a PAX format GNU sparse file.
 			// Set the current file reader to a sparse file reader.
 			tr.curr, tr.err = newSparseFileReader(tr.curr, sp, hdr.Size)
@@ -622,10 +629,6 @@ func (tr *Reader) readHeader() *Header {
 	hdr.Uid = int(tr.octal(s.next(8)))
 	hdr.Gid = int(tr.octal(s.next(8)))
 	hdr.Size = tr.octal(s.next(12))
-	if hdr.Size < 0 {
-		tr.err = ErrHeader
-		return nil
-	}
 	hdr.ModTime = time.Unix(tr.octal(s.next(12)), 0)
 	s.next(8) // chksum
 	hdr.Typeflag = s.next(1)[0]
@@ -676,12 +679,17 @@ func (tr *Reader) readHeader() *Header {
 		return nil
 	}
 
-	// Maximum value of hdr.Size is 64 GB (12 octal digits),
-	// so there's no risk of int64 overflowing.
-	nb := int64(hdr.Size)
-	tr.pad = -nb & (blockSize - 1) // blockSize is a power of two
+	nb := hdr.Size
+	if isHeaderOnlyType(hdr.Typeflag) {
+		nb = 0
+	}
+	if nb < 0 {
+		tr.err = ErrHeader
+		return nil
+	}
 
 	// Set the current file reader.
+	tr.pad = -nb & (blockSize - 1) // blockSize is a power of two
 	tr.curr = &regFileReader{r: tr.r, nb: nb}
 
 	// Check for old GNU sparse format entry.
diff --git a/archive/tar/reader_test.go b/archive/tar/reader_test.go
index 90b8b46..3c98f4d 100644
--- a/archive/tar/reader_test.go
+++ b/archive/tar/reader_test.go
@@ -893,3 +893,46 @@ func TestReadTruncation(t *testing.T) {
 		}
 	}
 }
+
+// TestReadHeaderOnly tests that Reader does not attempt to read special
+// header-only files.
+func TestReadHeaderOnly(t *testing.T) {
+	f, err := os.Open("testdata/hdr-only.tar")
+	if err != nil {
+		t.Fatalf("unexpected error: %v", err)
+	}
+	defer f.Close()
+
+	var hdrs []*Header
+	tr := NewReader(f)
+	for {
+		hdr, err := tr.Next()
+		if err == io.EOF {
+			break
+		}
+		if err != nil {
+			t.Errorf("Next(): got %v, want %v", err, nil)
+			continue
+		}
+		hdrs = append(hdrs, hdr)
+
+		// If a special flag, we should read nothing.
+		cnt, _ := io.ReadFull(tr, []byte{0})
+		if cnt > 0 && hdr.Typeflag != TypeReg {
+			t.Errorf("ReadFull(...): got %d bytes, want 0 bytes", cnt)
+		}
+	}
+
+	// File is crafted with 16 entries. The later 8 are identical to the first
+	// 8 except that the size is set.
+	if len(hdrs) != 16 {
+		t.Fatalf("len(hdrs): got %d, want %d", len(hdrs), 16)
+	}
+	for i := 0; i < 8; i++ {
+		var hdr1, hdr2 = hdrs[i+0], hdrs[i+8]
+		hdr1.Size, hdr2.Size = 0, 0
+		if !reflect.DeepEqual(*hdr1, *hdr2) {
+			t.Errorf("incorrect header:\ngot  %+v\nwant %+v", *hdr1, *hdr2)
+		}
+	}
+}
diff --git a/archive/tar/testdata/hdr-only.tar b/archive/tar/testdata/hdr-only.tar
new file mode 100644
index 0000000000000000000000000000000000000000..f25034083de6e0176e429f939875def6eb78cc73
GIT binary patch
literal 10240
zcmeI2ZE}J@42Ji2Pq95gv)>o#1+ajk2rWokd-`UnK%I_CXNW`V?jLp5$;Lb+o4jM3
zRS%4K0WN2N31Pu$!vOG|0DSEi6Vf<ijdq|J2(a2RZixMA%{TQfsyJusGTGHpcJj!i
zSf1Sb!y9RqmelTh%2to!Z$Mbj-zi|+Qak4D-#q_3p$_v!{>BdZFVz=+!#Geo7WlKr
zRl;AI>}kUnRryx%w0!65X8WAPynIb6zQg@I`q=ZhT;AVZ14uaInh{t<Rh(zVhVsmd
z`44zEboYOo|5D?1!9|+U7pEJq|3#DMl<d9jmcPXsf3g2l(mCu8g_O!MuBaXJ_NU_S
ze<U`xho<%_u1^u3I*z6=bJm6rjJ0F>zn(ux&A2{mb)wBl`42<QMBb5`srOsz^53R}
zb8bZb<NT-m{3jKTi(H2KPtJc4kzfD6;ct}l{1r%n{I^+MXT)>&RQXR%ispcL7W$7F
z`oDwzV^q+8Xow$MornI@^B?pdod1LVbIgk3(>1Qxw*Nb){{{Vr0_`Z9LH`*QrhogT
zdFVfV{nxJ3f3W@s{fGXsn}`0>@$ct<6aa(%Lr<TvdFVes_=CdvFX%t?Pry9%pN{`G
k{RiuR(0}Op&qM#|`49RJ{{JKBKlJ>=2_XU@0=FB1PuCY>1poj5

literal 0
HcmV?d00001

diff --git a/archive/tar/testdata/neg-size.tar b/archive/tar/testdata/neg-size.tar
index 5deea3d05c4da5a4ddda34ef7ad781088464e71b..21edf38cc3c3d98c834d07b6d31e8325898ec492 100644
GIT binary patch
delta 20
bcmZo*X<(T!h11Z`)Xd0`LBU|-++;=oIaUQ|

delta 20
ZcmZo*X<(T!g|mSH1ZGb%+&DLx5db<)1;zjX


From 7500c932c7210168610e6ee8ff136f9fb0329a04 Mon Sep 17 00:00:00 2001
From: Joe Tsai <joetsai@digital-static.net>
Date: Tue, 3 Nov 2015 18:12:31 -0800
Subject: [PATCH 72/95] archive/tar: properly handle header-only "files" in
 Reader

Certain special type-flags, specifically 1, 2, 3, 4, 5, 6,
do not have a data section. Thus, regardless of what the size field
says, we should not attempt to read any data for these special types.

The relevant PAX and USTAR specification says:
<<<
If the typeflag field is set to specify a file to be of type 1 (a link)
or 2 (a symbolic link), the size field shall be specified as zero.
If the typeflag field is set to specify a file of type 5 (directory),
the size field shall be interpreted as described under the definition
of that record type. No data logical records are stored for types 1, 2, or 5.
If the typeflag field is set to 3 (character special file),
4 (block special file), or 6 (FIFO), the meaning of the size field is
unspecified by this volume of POSIX.1-2008, and no data logical records shall
be stored on the medium.
Additionally, for type 6, the size field shall be ignored when reading.
If the typeflag field is set to any other value, the number of logical
records written following the header shall be (size+511)/512, ignoring
any fraction in the result of the division.
>>>

Contrary to the specification, we do not assert that the size field
is zero for type 1 and 2 since we liberally accept non-conforming formats.

Change-Id: I666b601597cb9d7a50caa081813d90ca9cfc52ed
Reviewed-on: https://go-review.googlesource.com/16614
Reviewed-by: Brad Fitzpatrick <bradfitz@golang.org>
Run-TryBot: Brad Fitzpatrick <bradfitz@golang.org>
TryBot-Result: Gobot Gobot <gobot@golang.org>
---
 archive/tar/reader.go      | 118 +++++++++++++++----------------------
 archive/tar/reader_test.go |  99 +++++++++++++++++++++++--------
 2 files changed, 122 insertions(+), 95 deletions(-)

diff --git a/archive/tar/reader.go b/archive/tar/reader.go
index 6360b4e..6948471 100644
--- a/archive/tar/reader.go
+++ b/archive/tar/reader.go
@@ -769,97 +769,77 @@ func (tr *Reader) readOldGNUSparseMap(header []byte) []sparseEntry {
 	return sp
 }
 
-// readGNUSparseMap1x0 reads the sparse map as stored in GNU's PAX sparse format version 1.0.
-// The sparse map is stored just before the file data and padded out to the nearest block boundary.
+// readGNUSparseMap1x0 reads the sparse map as stored in GNU's PAX sparse format
+// version 1.0. The format of the sparse map consists of a series of
+// newline-terminated numeric fields. The first field is the number of entries
+// and is always present. Following this are the entries, consisting of two
+// fields (offset, numBytes). This function must stop reading at the end
+// boundary of the block containing the last newline.
+//
+// Note that the GNU manual says that numeric values should be encoded in octal
+// format. However, the GNU tar utility itself outputs these values in decimal.
+// As such, this library treats values as being encoded in decimal.
 func readGNUSparseMap1x0(r io.Reader) ([]sparseEntry, error) {
-	buf := make([]byte, 2*blockSize)
-	sparseHeader := buf[:blockSize]
+	var cntNewline int64
+	var buf bytes.Buffer
+	var blk = make([]byte, blockSize)
 
-	// readDecimal is a helper function to read a decimal integer from the sparse map
-	// while making sure to read from the file in blocks of size blockSize
-	readDecimal := func() (int64, error) {
-		// Look for newline
-		nl := bytes.IndexByte(sparseHeader, '\n')
-		if nl == -1 {
-			if len(sparseHeader) >= blockSize {
-				// This is an error
-				return 0, ErrHeader
+	// feedTokens copies data in numBlock chunks from r into buf until there are
+	// at least cnt newlines in buf. It will not read more blocks than needed.
+	var feedTokens = func(cnt int64) error {
+		for cntNewline < cnt {
+			if _, err := io.ReadFull(r, blk); err != nil {
+				if err == io.EOF {
+					err = io.ErrUnexpectedEOF
+				}
+				return err
 			}
-			oldLen := len(sparseHeader)
-			newLen := oldLen + blockSize
-			if cap(sparseHeader) < newLen {
-				// There's more header, but we need to make room for the next block
-				copy(buf, sparseHeader)
-				sparseHeader = buf[:newLen]
-			} else {
-				// There's more header, and we can just reslice
-				sparseHeader = sparseHeader[:newLen]
-			}
-
-			// Now that sparseHeader is large enough, read next block
-			if _, err := io.ReadFull(r, sparseHeader[oldLen:newLen]); err != nil {
-				return 0, err
-			}
-			// leaving this function for io.Reader makes it more testable
-			if tr, ok := r.(*Reader); ok && tr.RawAccounting {
-				if _, err := tr.rawBytes.Write(sparseHeader[oldLen:newLen]); err != nil {
-					return 0, err
+			buf.Write(blk)
+			for _, c := range blk {
+				if c == '\n' {
+					cntNewline++
 				}
 			}
-
-			// Look for a newline in the new data
-			nl = bytes.IndexByte(sparseHeader[oldLen:newLen], '\n')
-			if nl == -1 {
-				// This is an error
-				return 0, ErrHeader
-			}
-			nl += oldLen // We want the position from the beginning
 		}
-		// Now that we've found a newline, read a number
-		n, err := strconv.ParseInt(string(sparseHeader[:nl]), 10, 0)
-		if err != nil {
-			return 0, ErrHeader
-		}
-
-		// Update sparseHeader to consume this number
-		sparseHeader = sparseHeader[nl+1:]
-		return n, nil
+		return nil
 	}
 
-	// Read the first block
-	if _, err := io.ReadFull(r, sparseHeader); err != nil {
+	// nextToken gets the next token delimited by a newline. This assumes that
+	// at least one newline exists in the buffer.
+	var nextToken = func() string {
+		cntNewline--
+		tok, _ := buf.ReadString('\n')
+		return tok[:len(tok)-1] // Cut off newline
+	}
+
+	// Parse for the number of entries.
+	// Use integer overflow resistant math to check this.
+	if err := feedTokens(1); err != nil {
 		return nil, err
 	}
-	// leaving this function for io.Reader makes it more testable
-	if tr, ok := r.(*Reader); ok && tr.RawAccounting {
-		if _, err := tr.rawBytes.Write(sparseHeader); err != nil {
-			return nil, err
-		}
+	numEntries, err := strconv.ParseInt(nextToken(), 10, 0) // Intentionally parse as native int
+	if err != nil || numEntries < 0 || int(2*numEntries) < int(numEntries) {
+		return nil, ErrHeader
 	}
 
-	// The first line contains the number of entries
-	numEntries, err := readDecimal()
-	if err != nil {
+	// Parse for all member entries.
+	// numEntries is trusted after this since a potential attacker must have
+	// committed resources proportional to what this library used.
+	if err := feedTokens(2 * numEntries); err != nil {
 		return nil, err
 	}
-
-	// Read all the entries
 	sp := make([]sparseEntry, 0, numEntries)
 	for i := int64(0); i < numEntries; i++ {
-		// Read the offset
-		offset, err := readDecimal()
+		offset, err := strconv.ParseInt(nextToken(), 10, 64)
 		if err != nil {
-			return nil, err
+			return nil, ErrHeader
 		}
-		// Read numBytes
-		numBytes, err := readDecimal()
+		numBytes, err := strconv.ParseInt(nextToken(), 10, 64)
 		if err != nil {
-			return nil, err
+			return nil, ErrHeader
 		}
-
 		sp = append(sp, sparseEntry{offset: offset, numBytes: numBytes})
 	}
-
 	return sp, nil
 }
 
diff --git a/archive/tar/reader_test.go b/archive/tar/reader_test.go
index 3c98f4d..5166403 100644
--- a/archive/tar/reader_test.go
+++ b/archive/tar/reader_test.go
@@ -719,35 +719,82 @@ func TestReadGNUSparseMap0x1(t *testing.T) {
 }
 
 func TestReadGNUSparseMap1x0(t *testing.T) {
-	// This test uses lots of holes so the sparse header takes up more than two blocks
-	numEntries := 100
-	expected := make([]sparseEntry, 0, numEntries)
-	sparseMap := new(bytes.Buffer)
-
-	fmt.Fprintf(sparseMap, "%d\n", numEntries)
-	for i := 0; i < numEntries; i++ {
-		offset := int64(2048 * i)
-		numBytes := int64(1024)
-		expected = append(expected, sparseEntry{offset: offset, numBytes: numBytes})
-		fmt.Fprintf(sparseMap, "%d\n%d\n", offset, numBytes)
+	var sp = []sparseEntry{{1, 2}, {3, 4}}
+	for i := 0; i < 98; i++ {
+		sp = append(sp, sparseEntry{54321, 12345})
 	}
 
-	// Make the header the smallest multiple of blockSize that fits the sparseMap
-	headerBlocks := (sparseMap.Len() + blockSize - 1) / blockSize
-	bufLen := blockSize * headerBlocks
-	buf := make([]byte, bufLen)
-	copy(buf, sparseMap.Bytes())
+	var vectors = []struct {
+		input     string        // Input data
+		sparseMap []sparseEntry // Expected sparse entries to be outputted
+		cnt       int           // Expected number of bytes read
+		err       error         // Expected errors that may be raised
+	}{{
+		input: "",
+		cnt:   0,
+		err:   io.ErrUnexpectedEOF,
+	}, {
+		input: "ab",
+		cnt:   2,
+		err:   io.ErrUnexpectedEOF,
+	}, {
+		input: strings.Repeat("\x00", 512),
+		cnt:   512,
+		err:   io.ErrUnexpectedEOF,
+	}, {
+		input: strings.Repeat("\x00", 511) + "\n",
+		cnt:   512,
+		err:   ErrHeader,
+	}, {
+		input: strings.Repeat("\n", 512),
+		cnt:   512,
+		err:   ErrHeader,
+	}, {
+		input:     "0\n" + strings.Repeat("\x00", 510) + strings.Repeat("a", 512),
+		sparseMap: []sparseEntry{},
+		cnt:       512,
+	}, {
+		input:     strings.Repeat("0", 512) + "0\n" + strings.Repeat("\x00", 510),
+		sparseMap: []sparseEntry{},
+		cnt:       1024,
+	}, {
+		input:     strings.Repeat("0", 1024) + "1\n2\n3\n" + strings.Repeat("\x00", 506),
+		sparseMap: []sparseEntry{{2, 3}},
+		cnt:       1536,
+	}, {
+		input: strings.Repeat("0", 1024) + "1\n2\n\n" + strings.Repeat("\x00", 509),
+		cnt:   1536,
+		err:   ErrHeader,
+	}, {
+		input: strings.Repeat("0", 1024) + "1\n2\n" + strings.Repeat("\x00", 508),
+		cnt:   1536,
+		err:   io.ErrUnexpectedEOF,
+	}, {
+		input: "-1\n2\n\n" + strings.Repeat("\x00", 506),
+		cnt:   512,
+		err:   ErrHeader,
+	}, {
+		input: "1\nk\n2\n" + strings.Repeat("\x00", 506),
+		cnt:   512,
+		err:   ErrHeader,
+	}, {
+		input:     "100\n1\n2\n3\n4\n" + strings.Repeat("54321\n0000000000000012345\n", 98) + strings.Repeat("\x00", 512),
+		cnt:       2560,
+		sparseMap: sp,
+	}}
 
-	// Get an reader to read the sparse map
-	r := bytes.NewReader(buf)
-
-	// Read the sparse map
-	sp, err := readGNUSparseMap1x0(r)
-	if err != nil {
-		t.Errorf("Unexpected error: %v", err)
-	}
-	if !reflect.DeepEqual(sp, expected) {
-		t.Errorf("Incorrect sparse map: got %v, wanted %v", sp, expected)
+	for i, v := range vectors {
+		r := strings.NewReader(v.input)
+		sp, err := readGNUSparseMap1x0(r)
+		if !reflect.DeepEqual(sp, v.sparseMap) && !(len(sp) == 0 && len(v.sparseMap) == 0) {
+			t.Errorf("test %d, readGNUSparseMap1x0(...): got %v, want %v", i, sp, v.sparseMap)
+		}
+		if numBytes := len(v.input) - r.Len(); numBytes != v.cnt {
+			t.Errorf("test %d, bytes read: got %v, want %v", i, numBytes, v.cnt)
+		}
+		if err != v.err {
+			t.Errorf("test %d, unexpected error: got %v, want %v", i, err, v.err)
+		}
 	}
 }
 

From b598ba3ee75317907dec365b25d0ba2b6f3d32fe Mon Sep 17 00:00:00 2001
From: Joe Tsai <joetsai@digital-static.net>
Date: Thu, 1 Oct 2015 01:35:15 -0700
Subject: [PATCH 73/95] archive/tar: fix issues with readGNUSparseMap1x0

Motivations:
* Use of strconv.ParseInt does not properly treat integers as 64bit,
preventing this function from working properly on 32bit machines.
* Use of io.ReadFull does not properly detect truncated streams
when the file suddenly ends on a block boundary.
* The function blindly trusts user input for numEntries and allocates
memory accordingly.
* The function does not validate that numEntries is not negative,
allowing a malicious sparse file to cause a panic during make.

In general, this function was overly complicated for what it was
accomplishing and it was hard to reason that it was free from
bounds errors. Instead, it has been rewritten and relies on
bytes.Buffer.ReadString to do the main work. So long as invariants
about the number of '\n' in the buffer are maintained, it is much
easier to see why this approach is correct.

Change-Id: Ibb12c4126c26e0ea460ea063cd17af68e3cf609e
Reviewed-on: https://go-review.googlesource.com/15174
Reviewed-by: Russ Cox <rsc@golang.org>
Run-TryBot: Brad Fitzpatrick <bradfitz@golang.org>
TryBot-Result: Gobot Gobot <gobot@golang.org>
---
 archive/tar/reader.go      | 160 +++++++++++++++++++++++--------------
 archive/tar/reader_test.go |  56 ++++++++++++-
 archive/tar/writer.go      | 133 ++++++++++++++++--------------
 archive/tar/writer_test.go |  48 ++++++-----
 4 files changed, 254 insertions(+), 143 deletions(-)

diff --git a/archive/tar/reader.go b/archive/tar/reader.go
index 6948471..02df550 100644
--- a/archive/tar/reader.go
+++ b/archive/tar/reader.go
@@ -40,6 +40,10 @@ type Reader struct {
 	rawBytes      *bytes.Buffer // last raw bits
 }
 
+type parser struct {
+	err error // Last error seen
+}
+
 // RawBytes accesses the raw bytes of the archive, apart from the file payload itself.
 // This includes the header and padding.
 //
@@ -134,6 +138,7 @@ func NewReader(r io.Reader) *Reader { return &Reader{r: r} }
 //
 // io.EOF is returned at the end of the input.
 func (tr *Reader) Next() (*Header, error) {
+	var p parser
 	var hdr *Header
 	if tr.RawAccounting {
 		if tr.rawBytes == nil {
@@ -216,8 +221,11 @@ func (tr *Reader) Next() (*Header, error) {
 				return nil, err
 			}
 		}
-		hdr.Name = cString(realname)
-		return hdr, err
+		hdr.Name = p.parseString(realname)
+		if p.err != nil {
+			return nil, p.err
+		}
+		return hdr, nil
 	case TypeGNULongLink:
 		// We have a GNU long link header.
 		realname, err := ioutil.ReadAll(tr)
@@ -240,8 +248,11 @@ func (tr *Reader) Next() (*Header, error) {
 				return nil, err
 			}
 		}
-		hdr.Linkname = cString(realname)
-		return hdr, err
+		hdr.Name = p.parseString(realname)
+		if p.err != nil {
+			return nil, p.err
+		}
+		return hdr, nil
 	}
 	return hdr, tr.err
 }
@@ -420,6 +431,7 @@ func parsePAX(r io.Reader) (map[string]string, error) {
 			return nil, err
 		}
 	}
+	sbuf := string(buf)
 
 	// For GNU PAX sparse format 0.0 support.
 	// This function transforms the sparse format 0.0 headers into sparse format 0.1 headers.
@@ -428,35 +440,17 @@ func parsePAX(r io.Reader) (map[string]string, error) {
 	headers := make(map[string]string)
 	// Each record is constructed as
 	//     "%d %s=%s\n", length, keyword, value
-	for len(buf) > 0 {
-		// or the header was empty to start with.
-		var sp int
-		// The size field ends at the first space.
-		sp = bytes.IndexByte(buf, ' ')
-		if sp == -1 {
+	for len(sbuf) > 0 {
+		key, value, residual, err := parsePAXRecord(sbuf)
+		if err != nil {
 			return nil, ErrHeader
 		}
-		// Parse the first token as a decimal integer.
-		n, err := strconv.ParseInt(string(buf[:sp]), 10, 0)
-		if err != nil || n < 5 || int64(len(buf)) < n {
-			return nil, ErrHeader
-		}
-		// Extract everything between the decimal and the n -1 on the
-		// beginning to eat the ' ', -1 on the end to skip the newline.
-		var record []byte
-		record, buf = buf[sp+1:n-1], buf[n:]
-		// The first equals is guaranteed to mark the end of the key.
-		// Everything else is value.
-		eq := bytes.IndexByte(record, '=')
-		if eq == -1 {
-			return nil, ErrHeader
-		}
-		key, value := record[:eq], record[eq+1:]
+		sbuf = residual
 
 		keyStr := string(key)
 		if keyStr == paxGNUSparseOffset || keyStr == paxGNUSparseNumBytes {
 			// GNU sparse format 0.0 special key. Write to sparseMap instead of using the headers map.
-			sparseMap.Write(value)
+			sparseMap.WriteString(value)
 			sparseMap.Write([]byte{','})
 		} else {
 			// Normal key. Set the value in the headers map.
@@ -471,9 +465,42 @@ func parsePAX(r io.Reader) (map[string]string, error) {
 	return headers, nil
 }
 
-// cString parses bytes as a NUL-terminated C-style string.
+// parsePAXRecord parses the input PAX record string into a key-value pair.
+// If parsing is successful, it will slice off the currently read record and
+// return the remainder as r.
+//
+// A PAX record is of the following form:
+//	"%d %s=%s\n" % (size, key, value)
+func parsePAXRecord(s string) (k, v, r string, err error) {
+	// The size field ends at the first space.
+	sp := strings.IndexByte(s, ' ')
+	if sp == -1 {
+		return "", "", s, ErrHeader
+	}
+
+	// Parse the first token as a decimal integer.
+	n, perr := strconv.ParseInt(s[:sp], 10, 0) // Intentionally parse as native int
+	if perr != nil || n < 5 || int64(len(s)) < n {
+		return "", "", s, ErrHeader
+	}
+
+	// Extract everything between the space and the final newline.
+	rec, nl, rem := s[sp+1:n-1], s[n-1:n], s[n:]
+	if nl != "\n" {
+		return "", "", s, ErrHeader
+	}
+
+	// The first equals separates the key from the value.
+	eq := strings.IndexByte(rec, '=')
+	if eq == -1 {
+		return "", "", s, ErrHeader
+	}
+	return rec[:eq], rec[eq+1:], rem, nil
+}
+
+// parseString parses bytes as a NUL-terminated C-style string.
 // If a NUL byte is not found then the whole slice is returned as a string.
-func cString(b []byte) string {
+func (*parser) parseString(b []byte) string {
 	n := 0
 	for n < len(b) && b[n] != 0 {
 		n++
@@ -481,7 +508,7 @@ func cString(b []byte) string {
 	return string(b[0:n])
 }
 
-func (tr *Reader) octal(b []byte) int64 {
+func (p *parser) parseNumeric(b []byte) int64 {
 	// Check for binary format first.
 	if len(b) > 0 && b[0]&0x80 != 0 {
 		var x int64
@@ -494,6 +521,10 @@ func (tr *Reader) octal(b []byte) int64 {
 		return x
 	}
 
+	return p.parseOctal(b)
+}
+
+func (p *parser) parseOctal(b []byte) int64 {
 	// Because unused fields are filled with NULs, we need
 	// to skip leading NULs. Fields may also be padded with
 	// spaces or NULs.
@@ -504,9 +535,9 @@ func (tr *Reader) octal(b []byte) int64 {
 	if len(b) == 0 {
 		return 0
 	}
-	x, err := strconv.ParseUint(cString(b), 8, 64)
-	if err != nil {
-		tr.err = err
+	x, perr := strconv.ParseUint(p.parseString(b), 8, 64)
+	if perr != nil {
+		p.err = ErrHeader
 	}
 	return int64(x)
 }
@@ -560,9 +591,10 @@ func (tr *Reader) verifyChecksum(header []byte) bool {
 		return false
 	}
 
-	given := tr.octal(header[148:156])
+	var p parser
+	given := p.parseOctal(header[148:156])
 	unsigned, signed := checksum(header)
-	return given == unsigned || given == signed
+	return p.err == nil && (given == unsigned || given == signed)
 }
 
 // readHeader reads the next block header and assumes that the underlying reader
@@ -621,18 +653,19 @@ func (tr *Reader) readHeader() *Header {
 	}
 
 	// Unpack
+	var p parser
 	hdr := new(Header)
 	s := slicer(header)
 
-	hdr.Name = cString(s.next(100))
-	hdr.Mode = tr.octal(s.next(8))
-	hdr.Uid = int(tr.octal(s.next(8)))
-	hdr.Gid = int(tr.octal(s.next(8)))
-	hdr.Size = tr.octal(s.next(12))
-	hdr.ModTime = time.Unix(tr.octal(s.next(12)), 0)
+	hdr.Name = p.parseString(s.next(100))
+	hdr.Mode = p.parseNumeric(s.next(8))
+	hdr.Uid = int(p.parseNumeric(s.next(8)))
+	hdr.Gid = int(p.parseNumeric(s.next(8)))
+	hdr.Size = p.parseNumeric(s.next(12))
+	hdr.ModTime = time.Unix(p.parseNumeric(s.next(12)), 0)
 	s.next(8) // chksum
 	hdr.Typeflag = s.next(1)[0]
-	hdr.Linkname = cString(s.next(100))
+	hdr.Linkname = p.parseString(s.next(100))
 
 	// The remainder of the header depends on the value of magic.
 	// The original (v7) version of tar had no explicit magic field,
@@ -652,30 +685,30 @@ func (tr *Reader) readHeader() *Header {
 
 	switch format {
 	case "posix", "gnu", "star":
-		hdr.Uname = cString(s.next(32))
-		hdr.Gname = cString(s.next(32))
+		hdr.Uname = p.parseString(s.next(32))
+		hdr.Gname = p.parseString(s.next(32))
 		devmajor := s.next(8)
 		devminor := s.next(8)
 		if hdr.Typeflag == TypeChar || hdr.Typeflag == TypeBlock {
-			hdr.Devmajor = tr.octal(devmajor)
-			hdr.Devminor = tr.octal(devminor)
+			hdr.Devmajor = p.parseNumeric(devmajor)
+			hdr.Devminor = p.parseNumeric(devminor)
 		}
 		var prefix string
 		switch format {
 		case "posix", "gnu":
-			prefix = cString(s.next(155))
+			prefix = p.parseString(s.next(155))
 		case "star":
-			prefix = cString(s.next(131))
-			hdr.AccessTime = time.Unix(tr.octal(s.next(12)), 0)
-			hdr.ChangeTime = time.Unix(tr.octal(s.next(12)), 0)
+			prefix = p.parseString(s.next(131))
+			hdr.AccessTime = time.Unix(p.parseNumeric(s.next(12)), 0)
+			hdr.ChangeTime = time.Unix(p.parseNumeric(s.next(12)), 0)
 		}
 		if len(prefix) > 0 {
 			hdr.Name = prefix + "/" + hdr.Name
 		}
 	}
 
-	if tr.err != nil {
-		tr.err = ErrHeader
+	if p.err != nil {
+		tr.err = p.err
 		return nil
 	}
 
@@ -695,7 +728,11 @@ func (tr *Reader) readHeader() *Header {
 	// Check for old GNU sparse format entry.
 	if hdr.Typeflag == TypeGNUSparse {
 		// Get the real size of the file.
-		hdr.Size = tr.octal(header[483:495])
+		hdr.Size = p.parseNumeric(header[483:495])
+		if p.err != nil {
+			tr.err = p.err
+			return nil
+		}
 
 		// Read the sparse map.
 		sp := tr.readOldGNUSparseMap(header)
@@ -717,6 +754,7 @@ func (tr *Reader) readHeader() *Header {
 // The sparse map is stored in the tar header if it's small enough. If it's larger than four entries,
 // then one or more extension headers are used to store the rest of the sparse map.
 func (tr *Reader) readOldGNUSparseMap(header []byte) []sparseEntry {
+	var p parser
 	isExtended := header[oldGNUSparseMainHeaderIsExtendedOffset] != 0
 	spCap := oldGNUSparseMainHeaderNumEntries
 	if isExtended {
@@ -727,10 +765,10 @@ func (tr *Reader) readOldGNUSparseMap(header []byte) []sparseEntry {
 
 	// Read the four entries from the main tar header
 	for i := 0; i < oldGNUSparseMainHeaderNumEntries; i++ {
-		offset := tr.octal(s.next(oldGNUSparseOffsetSize))
-		numBytes := tr.octal(s.next(oldGNUSparseNumBytesSize))
-		if tr.err != nil {
-			tr.err = ErrHeader
+		offset := p.parseNumeric(s.next(oldGNUSparseOffsetSize))
+		numBytes := p.parseNumeric(s.next(oldGNUSparseNumBytesSize))
+		if p.err != nil {
+			tr.err = p.err
 			return nil
 		}
 		if offset == 0 && numBytes == 0 {
@@ -754,10 +792,10 @@ func (tr *Reader) readOldGNUSparseMap(header []byte) []sparseEntry {
 		isExtended = sparseHeader[oldGNUSparseExtendedHeaderIsExtendedOffset] != 0
 		s = slicer(sparseHeader)
 		for i := 0; i < oldGNUSparseExtendedHeaderNumEntries; i++ {
-			offset := tr.octal(s.next(oldGNUSparseOffsetSize))
-			numBytes := tr.octal(s.next(oldGNUSparseNumBytesSize))
-			if tr.err != nil {
-				tr.err = ErrHeader
+			offset := p.parseNumeric(s.next(oldGNUSparseOffsetSize))
+			numBytes := p.parseNumeric(s.next(oldGNUSparseNumBytesSize))
+			if p.err != nil {
+				tr.err = p.err
 				return nil
 			}
 			if offset == 0 && numBytes == 0 {
diff --git a/archive/tar/reader_test.go b/archive/tar/reader_test.go
index 5166403..f0dbd94 100644
--- a/archive/tar/reader_test.go
+++ b/archive/tar/reader_test.go
@@ -298,9 +298,7 @@ var untarTests = []*untarTest{
 	},
 	{
 		file: "testdata/issue11169.tar",
-		// TODO(dsnet): Currently the library does not detect that this file is
-		// malformed. Instead it incorrectly believes that file just ends.
-		// err:  ErrHeader,
+		err:  ErrHeader,
 	},
 }
 
@@ -983,3 +981,55 @@ func TestReadHeaderOnly(t *testing.T) {
 		}
 	}
 }
+
+func TestParsePAXRecord(t *testing.T) {
+	var medName = strings.Repeat("CD", 50)
+	var longName = strings.Repeat("AB", 100)
+
+	var vectors = []struct {
+		input     string
+		residual  string
+		outputKey string
+		outputVal string
+		ok        bool
+	}{
+		{"6 k=v\n\n", "\n", "k", "v", true},
+		{"19 path=/etc/hosts\n", "", "path", "/etc/hosts", true},
+		{"210 path=" + longName + "\nabc", "abc", "path", longName, true},
+		{"110 path=" + medName + "\n", "", "path", medName, true},
+		{"9 foo=ba\n", "", "foo", "ba", true},
+		{"11 foo=bar\n\x00", "\x00", "foo", "bar", true},
+		{"18 foo=b=\nar=\n==\x00\n", "", "foo", "b=\nar=\n==\x00", true},
+		{"27 foo=hello9 foo=ba\nworld\n", "", "foo", "hello9 foo=ba\nworld", true},
+		{"27 ☺☻☹=日a本b語ç\nmeow mix", "meow mix", "☺☻☹", "日a本b語ç", true},
+		{"17 \x00hello=\x00world\n", "", "\x00hello", "\x00world", true},
+		{"1 k=1\n", "1 k=1\n", "", "", false},
+		{"6 k~1\n", "6 k~1\n", "", "", false},
+		{"6_k=1\n", "6_k=1\n", "", "", false},
+		{"6 k=1 ", "6 k=1 ", "", "", false},
+		{"632 k=1\n", "632 k=1\n", "", "", false},
+		{"16 longkeyname=hahaha\n", "16 longkeyname=hahaha\n", "", "", false},
+		{"3 somelongkey=\n", "3 somelongkey=\n", "", "", false},
+		{"50 tooshort=\n", "50 tooshort=\n", "", "", false},
+	}
+
+	for _, v := range vectors {
+		key, val, res, err := parsePAXRecord(v.input)
+		ok := (err == nil)
+		if v.ok != ok {
+			if v.ok {
+				t.Errorf("parsePAXRecord(%q): got parsing failure, want success", v.input)
+			} else {
+				t.Errorf("parsePAXRecord(%q): got parsing success, want failure", v.input)
+			}
+		}
+		if ok && (key != v.outputKey || val != v.outputVal) {
+			t.Errorf("parsePAXRecord(%q): got (%q: %q), want (%q: %q)",
+				v.input, key, val, v.outputKey, v.outputVal)
+		}
+		if res != v.residual {
+			t.Errorf("parsePAXRecord(%q): got residual %q, want residual %q",
+				v.input, res, v.residual)
+		}
+	}
+}
diff --git a/archive/tar/writer.go b/archive/tar/writer.go
index 0165b22..688455d 100644
--- a/archive/tar/writer.go
+++ b/archive/tar/writer.go
@@ -42,6 +42,10 @@ type Writer struct {
 	paxHdrBuff [blockSize]byte // buffer to use in writeHeader when writing a pax header
 }
 
+type formatter struct {
+	err error // Last error seen
+}
+
 // NewWriter creates a new Writer writing to w.
 func NewWriter(w io.Writer) *Writer { return &Writer{w: w} }
 
@@ -68,17 +72,9 @@ func (tw *Writer) Flush() error {
 }
 
 // Write s into b, terminating it with a NUL if there is room.
-// If the value is too long for the field and allowPax is true add a paxheader record instead
-func (tw *Writer) cString(b []byte, s string, allowPax bool, paxKeyword string, paxHeaders map[string]string) {
-	needsPaxHeader := allowPax && len(s) > len(b) || !isASCII(s)
-	if needsPaxHeader {
-		paxHeaders[paxKeyword] = s
-		return
-	}
+func (f *formatter) formatString(b []byte, s string) {
 	if len(s) > len(b) {
-		if tw.err == nil {
-			tw.err = ErrFieldTooLong
-		}
+		f.err = ErrFieldTooLong
 		return
 	}
 	ascii := toASCII(s)
@@ -89,35 +85,17 @@ func (tw *Writer) cString(b []byte, s string, allowPax bool, paxKeyword string,
 }
 
 // Encode x as an octal ASCII string and write it into b with leading zeros.
-func (tw *Writer) octal(b []byte, x int64) {
+func (f *formatter) formatOctal(b []byte, x int64) {
 	s := strconv.FormatInt(x, 8)
 	// leading zeros, but leave room for a NUL.
 	for len(s)+1 < len(b) {
 		s = "0" + s
 	}
-	tw.cString(b, s, false, paxNone, nil)
+	f.formatString(b, s)
 }
 
-// Write x into b, either as octal or as binary (GNUtar/star extension).
-// If the value is too long for the field and writingPax is enabled both for the field and the add a paxheader record instead
-func (tw *Writer) numeric(b []byte, x int64, allowPax bool, paxKeyword string, paxHeaders map[string]string) {
-	// Try octal first.
-	s := strconv.FormatInt(x, 8)
-	if len(s) < len(b) {
-		tw.octal(b, x)
-		return
-	}
-
-	// If it is too long for octal, and pax is preferred, use a pax header
-	if allowPax && tw.preferPax {
-		tw.octal(b, 0)
-		s := strconv.FormatInt(x, 10)
-		paxHeaders[paxKeyword] = s
-		return
-	}
-
-	// Too big: use binary (big-endian).
-	tw.usedBinary = true
+// Write x into b, as binary (GNUtar/star extension).
+func (f *formatter) formatNumeric(b []byte, x int64) {
 	for i := len(b) - 1; x > 0 && i >= 0; i-- {
 		b[i] = byte(x)
 		x >>= 8
@@ -161,6 +139,7 @@ func (tw *Writer) writeHeader(hdr *Header, allowPax bool) error {
 	// subsecond time resolution, but for now let's just capture
 	// too long fields or non ascii characters
 
+	var f formatter
 	var header []byte
 
 	// We need to select which scratch buffer to use carefully,
@@ -175,10 +154,40 @@ func (tw *Writer) writeHeader(hdr *Header, allowPax bool) error {
 	copy(header, zeroBlock)
 	s := slicer(header)
 
+	// Wrappers around formatter that automatically sets paxHeaders if the
+	// argument extends beyond the capacity of the input byte slice.
+	var formatString = func(b []byte, s string, paxKeyword string) {
+		needsPaxHeader := paxKeyword != paxNone && len(s) > len(b) || !isASCII(s)
+		if needsPaxHeader {
+			paxHeaders[paxKeyword] = s
+			return
+		}
+		f.formatString(b, s)
+	}
+	var formatNumeric = func(b []byte, x int64, paxKeyword string) {
+		// Try octal first.
+		s := strconv.FormatInt(x, 8)
+		if len(s) < len(b) {
+			f.formatOctal(b, x)
+			return
+		}
+
+		// If it is too long for octal, and PAX is preferred, use a PAX header.
+		if paxKeyword != paxNone && tw.preferPax {
+			f.formatOctal(b, 0)
+			s := strconv.FormatInt(x, 10)
+			paxHeaders[paxKeyword] = s
+			return
+		}
+
+		tw.usedBinary = true
+		f.formatNumeric(b, x)
+	}
+
 	// keep a reference to the filename to allow to overwrite it later if we detect that we can use ustar longnames instead of pax
 	pathHeaderBytes := s.next(fileNameSize)
 
-	tw.cString(pathHeaderBytes, hdr.Name, true, paxPath, paxHeaders)
+	formatString(pathHeaderBytes, hdr.Name, paxPath)
 
 	// Handle out of range ModTime carefully.
 	var modTime int64
@@ -186,25 +195,25 @@ func (tw *Writer) writeHeader(hdr *Header, allowPax bool) error {
 		modTime = hdr.ModTime.Unix()
 	}
 
-	tw.octal(s.next(8), hdr.Mode)                                   // 100:108
-	tw.numeric(s.next(8), int64(hdr.Uid), true, paxUid, paxHeaders) // 108:116
-	tw.numeric(s.next(8), int64(hdr.Gid), true, paxGid, paxHeaders) // 116:124
-	tw.numeric(s.next(12), hdr.Size, true, paxSize, paxHeaders)     // 124:136
-	tw.numeric(s.next(12), modTime, false, paxNone, nil)            // 136:148 --- consider using pax for finer granularity
-	s.next(8)                                                       // chksum (148:156)
-	s.next(1)[0] = hdr.Typeflag                                     // 156:157
+	f.formatOctal(s.next(8), hdr.Mode)               // 100:108
+	formatNumeric(s.next(8), int64(hdr.Uid), paxUid) // 108:116
+	formatNumeric(s.next(8), int64(hdr.Gid), paxGid) // 116:124
+	formatNumeric(s.next(12), hdr.Size, paxSize)     // 124:136
+	formatNumeric(s.next(12), modTime, paxNone)      // 136:148 --- consider using pax for finer granularity
+	s.next(8)                                        // chksum (148:156)
+	s.next(1)[0] = hdr.Typeflag                      // 156:157
 
-	tw.cString(s.next(100), hdr.Linkname, true, paxLinkpath, paxHeaders)
+	formatString(s.next(100), hdr.Linkname, paxLinkpath)
 
-	copy(s.next(8), []byte("ustar\x0000"))                        // 257:265
-	tw.cString(s.next(32), hdr.Uname, true, paxUname, paxHeaders) // 265:297
-	tw.cString(s.next(32), hdr.Gname, true, paxGname, paxHeaders) // 297:329
-	tw.numeric(s.next(8), hdr.Devmajor, false, paxNone, nil)      // 329:337
-	tw.numeric(s.next(8), hdr.Devminor, false, paxNone, nil)      // 337:345
+	copy(s.next(8), []byte("ustar\x0000"))          // 257:265
+	formatString(s.next(32), hdr.Uname, paxUname)   // 265:297
+	formatString(s.next(32), hdr.Gname, paxGname)   // 297:329
+	formatNumeric(s.next(8), hdr.Devmajor, paxNone) // 329:337
+	formatNumeric(s.next(8), hdr.Devminor, paxNone) // 337:345
 
 	// keep a reference to the prefix to allow to overwrite it later if we detect that we can use ustar longnames instead of pax
 	prefixHeaderBytes := s.next(155)
-	tw.cString(prefixHeaderBytes, "", false, paxNone, nil) // 345:500  prefix
+	formatString(prefixHeaderBytes, "", paxNone) // 345:500  prefix
 
 	// Use the GNU magic instead of POSIX magic if we used any GNU extensions.
 	if tw.usedBinary {
@@ -220,19 +229,20 @@ func (tw *Writer) writeHeader(hdr *Header, allowPax bool) error {
 			delete(paxHeaders, paxPath)
 
 			// Update the path fields
-			tw.cString(pathHeaderBytes, suffix, false, paxNone, nil)
-			tw.cString(prefixHeaderBytes, prefix, false, paxNone, nil)
+			formatString(pathHeaderBytes, suffix, paxNone)
+			formatString(prefixHeaderBytes, prefix, paxNone)
 		}
 	}
 
 	// The chksum field is terminated by a NUL and a space.
 	// This is different from the other octal fields.
 	chksum, _ := checksum(header)
-	tw.octal(header[148:155], chksum)
+	f.formatOctal(header[148:155], chksum) // Never fails
 	header[155] = ' '
 
-	if tw.err != nil {
-		// problem with header; probably integer too big for a field.
+	// Check if there were any formatting errors.
+	if f.err != nil {
+		tw.err = f.err
 		return tw.err
 	}
 
@@ -310,7 +320,7 @@ func (tw *Writer) writePAXHeader(hdr *Header, paxHeaders map[string]string) erro
 	sort.Strings(keys)
 
 	for _, k := range keys {
-		fmt.Fprint(&buf, paxHeader(k+"="+paxHeaders[k]))
+		fmt.Fprint(&buf, formatPAXRecord(k, paxHeaders[k]))
 	}
 
 	ext.Size = int64(len(buf.Bytes()))
@@ -326,17 +336,18 @@ func (tw *Writer) writePAXHeader(hdr *Header, paxHeaders map[string]string) erro
 	return nil
 }
 
-// paxHeader formats a single pax record, prefixing it with the appropriate length
-func paxHeader(msg string) string {
-	const padding = 2 // Extra padding for space and newline
-	size := len(msg) + padding
+// formatPAXRecord formats a single PAX record, prefixing it with the
+// appropriate length.
+func formatPAXRecord(k, v string) string {
+	const padding = 3 // Extra padding for ' ', '=', and '\n'
+	size := len(k) + len(v) + padding
 	size += len(strconv.Itoa(size))
-	record := fmt.Sprintf("%d %s\n", size, msg)
+	record := fmt.Sprintf("%d %s=%s\n", size, k, v)
+
+	// Final adjustment if adding size field increased the record size.
 	if len(record) != size {
-		// Final adjustment if adding size increased
-		// the number of digits in size
 		size = len(record)
-		record = fmt.Sprintf("%d %s\n", size, msg)
+		record = fmt.Sprintf("%d %s=%s\n", size, k, v)
 	}
 	return record
 }
diff --git a/archive/tar/writer_test.go b/archive/tar/writer_test.go
index 25d88dc..69a44a6 100644
--- a/archive/tar/writer_test.go
+++ b/archive/tar/writer_test.go
@@ -486,24 +486,6 @@ func TestPaxHeadersSorted(t *testing.T) {
 	}
 }
 
-func TestPAXHeader(t *testing.T) {
-	medName := strings.Repeat("CD", 50)
-	longName := strings.Repeat("AB", 100)
-	paxTests := [][2]string{
-		{paxPath + "=/etc/hosts", "19 path=/etc/hosts\n"},
-		{"a=b", "6 a=b\n"},          // Single digit length
-		{"a=names", "11 a=names\n"}, // Test case involving carries
-		{paxPath + "=" + longName, fmt.Sprintf("210 path=%s\n", longName)},
-		{paxPath + "=" + medName, fmt.Sprintf("110 path=%s\n", medName)}}
-
-	for _, test := range paxTests {
-		key, expected := test[0], test[1]
-		if result := paxHeader(key); result != expected {
-			t.Fatalf("paxHeader: got %s, expected %s", result, expected)
-		}
-	}
-}
-
 func TestUSTARLongName(t *testing.T) {
 	// Create an archive with a path that failed to split with USTAR extension in previous versions.
 	fileinfo, err := os.Stat("testdata/small.txt")
@@ -625,3 +607,33 @@ func TestSplitUSTARPath(t *testing.T) {
 		}
 	}
 }
+
+func TestFormatPAXRecord(t *testing.T) {
+	var medName = strings.Repeat("CD", 50)
+	var longName = strings.Repeat("AB", 100)
+
+	var vectors = []struct {
+		inputKey string
+		inputVal string
+		output   string
+	}{
+		{"k", "v", "6 k=v\n"},
+		{"path", "/etc/hosts", "19 path=/etc/hosts\n"},
+		{"path", longName, "210 path=" + longName + "\n"},
+		{"path", medName, "110 path=" + medName + "\n"},
+		{"foo", "ba", "9 foo=ba\n"},
+		{"foo", "bar", "11 foo=bar\n"},
+		{"foo", "b=\nar=\n==\x00", "18 foo=b=\nar=\n==\x00\n"},
+		{"foo", "hello9 foo=ba\nworld", "27 foo=hello9 foo=ba\nworld\n"},
+		{"☺☻☹", "日a本b語ç", "27 ☺☻☹=日a本b語ç\n"},
+		{"\x00hello", "\x00world", "17 \x00hello=\x00world\n"},
+	}
+
+	for _, v := range vectors {
+		output := formatPAXRecord(v.inputKey, v.inputVal)
+		if output != v.output {
+			t.Errorf("formatPAXRecord(%q, %q): got %q, want %q",
+				v.inputKey, v.inputVal, output, v.output)
+		}
+	}
+}

From 64935a5f0f25d74240cd2e7174a2a1aa7652a032 Mon Sep 17 00:00:00 2001
From: Joe Tsai <joetsai@digital-static.net>
Date: Mon, 28 Sep 2015 13:49:35 -0700
Subject: [PATCH 74/95] archive/tar: move parse/format methods to standalone
 receiver

Motivations for this change:
* It allows these functions to be used outside of Reader/Writer.
* It allows these functions to be more easily unit tested.

Change-Id: Iebe2b70bdb8744371c9ffa87c24316cbbf025b59
Reviewed-on: https://go-review.googlesource.com/15113
Reviewed-by: Russ Cox <rsc@golang.org>
Run-TryBot: Joe Tsai <joetsai@digital-static.net>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: Brad Fitzpatrick <bradfitz@golang.org>
---
 archive/tar/testdata/pax-path-hdr.tar   | Bin 0 -> 1024 bytes
 archive/tar/testdata/ustar-file-reg.tar | Bin 0 -> 1536 bytes
 2 files changed, 0 insertions(+), 0 deletions(-)
 create mode 100644 archive/tar/testdata/pax-path-hdr.tar
 create mode 100644 archive/tar/testdata/ustar-file-reg.tar

diff --git a/archive/tar/testdata/pax-path-hdr.tar b/archive/tar/testdata/pax-path-hdr.tar
new file mode 100644
index 0000000000000000000000000000000000000000..ab8fc325b26159f4fed6bfb59fe5f616d35fec74
GIT binary patch
literal 1024
zcmXR&EXmL>$=5GRO-#v6r43~O0Sq{30|OI7m>ft6gMqP;fsrYLLIndIKxuJFViC}K
xO07co9Hr*bNx!kNLIE%d*akR880v$Gocz3WU67b=USe)47oFTOYR$le004hRKE?n5

literal 0
HcmV?d00001

diff --git a/archive/tar/testdata/ustar-file-reg.tar b/archive/tar/testdata/ustar-file-reg.tar
new file mode 100644
index 0000000000000000000000000000000000000000..c84fa27ffb8613d20f7ac9690cd59827b04028f6
GIT binary patch
literal 1536
zcmdUuJ(Hq95QhCJmlMG7O>hA!%P7hKg3OW)iVh<3F#{;S{=CgM+^V}b=<cewtNW?0
z>Pyosep=F7x+*OI&?Q6F7LxR?fb`B^0<xwq-oemxLziTAA%U;<U(!#5{hdEBPWm^>
zttmJo<+m$~$Msw9KQ_wfqfW1sDJ#zsWq25)8&TsKn~+7*oF6~$0+nD?L2C$TBQM>$
zcQqo7Eo8w%PL1H9D9KY4`f7Ku6*oaxOv|7S1ZpTT=%sbGS#L2%*Uxm5P}U`mG$%9I
zIRuF>849Ugh}k{mmgLJGtca9XnA{r2KBJ`fRXOnPqEZjWtz4z5Mq_`uZWX)VuFVko
z#$DNd>@Saj1w+|ef@dPC=g!5Kb4c+mQ$bcu#R_I=;>D)V&agmv_`vpStP-sQh!z(|
z5{9v2$DGKS|DrLqZ8y7?ox@|a-R^30zMeK388O@+r+cK=9NdZ)ow#}n{kwf`tD4=t
zR9Oz?v}2QNv&rDR7u$%4%_>%5(k#={r!rZ(5WA5+U_Rz+w6{_kV7C!KK2e+5VuS-5
zWLRhpI#>I<n2psw%Y5~SG&FiGH94neS7}ZyP9^~6QLjVep;s@I@7AmR2jbQFLBnMt
zE?vjd>q%|mhyZxDfRHCi7gC+R&sD4|k;~pXMNUD{^NPC5(KX~`*cXkwAurz(+XVNg
z1P`yNv%FZy)9`uTGCWUJ^@>e2&T3O`W@r<j7UCv1@pBfIaB6)e<_I%5mqWL6_j5Oz
zy0N;F$HYFKzT-wD4)v9sN1lM51t(7$b_H<7B%Lm$Myz1iu9WGri4R2D&}G9d>e{ZN
S9gn%XW0CXwKYp`+7X1&(X!Xnh

literal 0
HcmV?d00001


From be9ac88117e8f7c1666e9f3c241b03c505dc52f3 Mon Sep 17 00:00:00 2001
From: Joe Tsai <joetsai@digital-static.net>
Date: Wed, 16 Sep 2015 00:58:56 -0700
Subject: [PATCH 75/95] archive/tar: convert Reader.Next to be loop based

Motivation for change:
* Recursive logic is hard to follow, since it tends to apply
things in reverse. On the other hand, the tar formats tend to
describe meta headers as affecting the next entry.
* Recursion also applies changes in the wrong order. Two test
files are attached that use multiple headers. The previous Go
behavior differs from what GNU and BSD tar do.

Change-Id: Ic1557256fc1363c5cb26570e5d0b9f65a9e57341
Reviewed-on: https://go-review.googlesource.com/14624
Run-TryBot: Joe Tsai <joetsai@digital-static.net>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: Brad Fitzpatrick <bradfitz@golang.org>
---
 archive/tar/reader.go                   | 160 ++++++++++--------------
 archive/tar/reader_test.go              |  24 ++++
 archive/tar/testdata/gnu-multi-hdrs.tar | Bin 0 -> 4608 bytes
 archive/tar/testdata/pax-multi-hdrs.tar | Bin 0 -> 4608 bytes
 4 files changed, 90 insertions(+), 94 deletions(-)
 create mode 100644 archive/tar/testdata/gnu-multi-hdrs.tar
 create mode 100644 archive/tar/testdata/pax-multi-hdrs.tar

diff --git a/archive/tar/reader.go b/archive/tar/reader.go
index 02df550..ba34ed7 100644
--- a/archive/tar/reader.go
+++ b/archive/tar/reader.go
@@ -138,8 +138,6 @@ func NewReader(r io.Reader) *Reader { return &Reader{r: r} }
 //
 // io.EOF is returned at the end of the input.
 func (tr *Reader) Next() (*Header, error) {
-	var p parser
-	var hdr *Header
 	if tr.RawAccounting {
 		if tr.rawBytes == nil {
 			tr.rawBytes = bytes.NewBuffer(nil)
@@ -147,114 +145,88 @@ func (tr *Reader) Next() (*Header, error) {
 			tr.rawBytes.Reset()
 		}
 	}
-	if tr.err == nil {
-		tr.skipUnread()
-	}
+
 	if tr.err != nil {
-		return hdr, tr.err
+		return nil, tr.err
 	}
-	hdr = tr.readHeader()
-	if hdr == nil {
-		return hdr, tr.err
-	}
-	// Check for PAX/GNU header.
-	switch hdr.Typeflag {
-	case TypeXHeader:
-		//  PAX extended header
-		headers, err := parsePAX(tr)
-		if err != nil {
-			return nil, err
-		}
-		// We actually read the whole file,
-		// but this skips alignment padding
-		tr.skipUnread()
+
+	var hdr *Header
+	var extHdrs map[string]string
+
+	// Externally, Next iterates through the tar archive as if it is a series of
+	// files. Internally, the tar format often uses fake "files" to add meta
+	// data that describes the next file. These meta data "files" should not
+	// normally be visible to the outside. As such, this loop iterates through
+	// one or more "header files" until it finds a "normal file".
+loop:
+	for {
+		tr.err = tr.skipUnread()
 		if tr.err != nil {
 			return nil, tr.err
 		}
+
 		hdr = tr.readHeader()
-		if hdr == nil {
+		if tr.err != nil {
 			return nil, tr.err
 		}
-		mergePAX(hdr, headers)
-
-		// Check for a PAX format sparse file
-		sp, err := tr.checkForGNUSparsePAXHeaders(hdr, headers)
-		if err != nil {
-			tr.err = err
-			return nil, err
-		}
-		if sp != nil {
-			// Sparse files do not make sense when applied to the special header
-			// types that never have a data section.
-			if isHeaderOnlyType(hdr.Typeflag) {
-				tr.err = ErrHeader
-				return nil, tr.err
-			}
-
-			// Current file is a PAX format GNU sparse file.
-			// Set the current file reader to a sparse file reader.
-			tr.curr, tr.err = newSparseFileReader(tr.curr, sp, hdr.Size)
+		// Check for PAX/GNU special headers and files.
+		switch hdr.Typeflag {
+		case TypeXHeader:
+			extHdrs, tr.err = parsePAX(tr)
 			if tr.err != nil {
 				return nil, tr.err
 			}
-		}
-		return hdr, nil
-	case TypeGNULongName:
-		// We have a GNU long name header. Its contents are the real file name.
-		realname, err := ioutil.ReadAll(tr)
-		if err != nil {
-			return nil, err
-		}
-		var buf []byte
-		if tr.RawAccounting {
-			if _, err = tr.rawBytes.Write(realname); err != nil {
+			continue loop // This is a meta header affecting the next header
+		case TypeGNULongName, TypeGNULongLink:
+			var realname []byte
+			realname, tr.err = ioutil.ReadAll(tr)
+			if tr.err != nil {
+				return nil, tr.err
+			}
+
+			if tr.RawAccounting {
+				if _, tr.err = tr.rawBytes.Write(realname); tr.err != nil {
+					return nil, tr.err
+				}
+			}
+
+			// Convert GNU extensions to use PAX headers.
+			if extHdrs == nil {
+				extHdrs = make(map[string]string)
+			}
+			var p parser
+			switch hdr.Typeflag {
+			case TypeGNULongName:
+				extHdrs[paxPath] = p.parseString(realname)
+			case TypeGNULongLink:
+				extHdrs[paxLinkpath] = p.parseString(realname)
+			}
+			if p.err != nil {
+				tr.err = p.err
+				return nil, tr.err
+			}
+			continue loop // This is a meta header affecting the next header
+		default:
+			mergePAX(hdr, extHdrs)
+
+			// Check for a PAX format sparse file
+			sp, err := tr.checkForGNUSparsePAXHeaders(hdr, extHdrs)
+			if err != nil {
+				tr.err = err
 				return nil, err
 			}
-			buf = make([]byte, tr.rawBytes.Len())
-			copy(buf[:], tr.RawBytes())
-		}
-		hdr, err := tr.Next()
-		// since the above call to Next() resets the buffer, we need to throw the bytes over
-		if tr.RawAccounting {
-			buf = append(buf, tr.RawBytes()...)
-			if _, err = tr.rawBytes.Write(buf); err != nil {
-				return nil, err
+			if sp != nil {
+				// Current file is a PAX format GNU sparse file.
+				// Set the current file reader to a sparse file reader.
+				tr.curr, tr.err = newSparseFileReader(tr.curr, sp, hdr.Size)
+				if tr.err != nil {
+					return nil, tr.err
+				}
 			}
+			break loop // This is a file, so stop
 		}
-		hdr.Name = p.parseString(realname)
-		if p.err != nil {
-			return nil, p.err
-		}
-		return hdr, nil
-	case TypeGNULongLink:
-		// We have a GNU long link header.
-		realname, err := ioutil.ReadAll(tr)
-		if err != nil {
-			return nil, err
-		}
-		var buf []byte
-		if tr.RawAccounting {
-			if _, err = tr.rawBytes.Write(realname); err != nil {
-				return nil, err
-			}
-			buf = make([]byte, tr.rawBytes.Len())
-			copy(buf[:], tr.RawBytes())
-		}
-		hdr, err := tr.Next()
-		// since the above call to Next() resets the buffer, we need to throw the bytes over
-		if tr.RawAccounting {
-			buf = append(buf, tr.RawBytes()...)
-			if _, err = tr.rawBytes.Write(buf); err != nil {
-				return nil, err
-			}
-		}
-		hdr.Name = p.parseString(realname)
-		if p.err != nil {
-			return nil, p.err
-		}
-		return hdr, nil
 	}
-	return hdr, tr.err
+	return hdr, nil
 }
 
 // checkForGNUSparsePAXHeaders checks the PAX headers for GNU sparse headers. If they are found, then
diff --git a/archive/tar/reader_test.go b/archive/tar/reader_test.go
index f0dbd94..861d1a5 100644
--- a/archive/tar/reader_test.go
+++ b/archive/tar/reader_test.go
@@ -288,6 +288,30 @@ var untarTests = []*untarTest{
 			},
 		},
 	},
+	{
+		// Matches the behavior of GNU, BSD, and STAR tar utilities.
+		file: "testdata/gnu-multi-hdrs.tar",
+		headers: []*Header{
+			{
+				Name:     "GNU2/GNU2/long-path-name",
+				Linkname: "GNU4/GNU4/long-linkpath-name",
+				ModTime:  time.Unix(0, 0),
+				Typeflag: '2',
+			},
+		},
+	},
+	{
+		// Matches the behavior of GNU and BSD tar utilities.
+		file: "testdata/pax-multi-hdrs.tar",
+		headers: []*Header{
+			{
+				Name:     "bar",
+				Linkname: "PAX4/PAX4/long-linkpath-name",
+				ModTime:  time.Unix(0, 0),
+				Typeflag: '2',
+			},
+		},
+	},
 	{
 		file: "testdata/neg-size.tar",
 		err:  ErrHeader,
diff --git a/archive/tar/testdata/gnu-multi-hdrs.tar b/archive/tar/testdata/gnu-multi-hdrs.tar
new file mode 100644
index 0000000000000000000000000000000000000000..8bcad55d06e8f9fde3641d2a8df370503a582ce6
GIT binary patch
literal 4608
zcmdPX*VA|K$<Iso$;``Upbanp0y7g61`rJd=(K^cp&3jJB5!DDW?;;q;6oedpqg4*
zT#{I%pukX+pI?G1gUTZ$kE(je@Z9}E4fVl94lohu79^Hr=;kHnrVbeoj5?H}5YQXV
z|06yB8-en_k^X4@rzj&0QO$!k{~IwVc+(?HN9%uMQ2sZD<o_ICD{!<8K#z1cNbDHR
z|0BNrZvx8yCZqX(kmN}EIwL8uh~^f8d;OUGe$+lcR1su+z}&zTSpOTP<>%Afaj^QI
J<`xZ33jpEZfW-g+

literal 0
HcmV?d00001

diff --git a/archive/tar/testdata/pax-multi-hdrs.tar b/archive/tar/testdata/pax-multi-hdrs.tar
new file mode 100644
index 0000000000000000000000000000000000000000..14bc7597808020d7bc37e6610482fd9662814a24
GIT binary patch
literal 4608
zcmeH~OAf*y5QbTM3NFy_tgc*m1D9?w)<<kh(_-TBWo){T=&AxH&MY9%!aps1Fw+@#
zX3ny{GKZ#$Q}NNrhnjOOrf2X9;YxB@5_%^I>^3;ld-=Ii;m6ILFVT3VXy>=Udb`;P
z_AF}Ko(kwITGLdEM1G)5o<9H!jr=439(@V?ONRXCAu*5YPw-#9x&N1V|EJgyTG0B^
zUZ)s9!5N^&Ghph+I3UGBWYR$X|2zH<_}9R{M*cI=m|k}8li%1Drp7@Vny>jkUkM=z
Sl}Br1`$oQ%|3`N;j=%%-SrC5!

literal 0
HcmV?d00001


From ce5aac17f91d978a37dd742761cf57cd5bdb8ef2 Mon Sep 17 00:00:00 2001
From: Joe Tsai <joetsai@digital-static.net>
Date: Wed, 2 Dec 2015 15:48:06 -0800
Subject: [PATCH 76/95] archive/tar: properly format GNU base-256 encoding

Motivation:
* Previous implementation silently failed when an integer overflow
occurred. Now, we report an ErrFieldTooLong.
* Previous implementation did not encode in two's complement format and was
unable to encode negative numbers.

The relevant GNU specification says:
<<<
GNU format uses two's-complement base-256 notation to store values
that do not fit into standard ustar range.
>>>

Fixes #12436

Change-Id: I09c20602eabf8ae3a7e0db35b79440a64bfaf807
Reviewed-on: https://go-review.googlesource.com/17425
Reviewed-by: Brad Fitzpatrick <bradfitz@golang.org>
Run-TryBot: Brad Fitzpatrick <bradfitz@golang.org>
TryBot-Result: Gobot Gobot <gobot@golang.org>
---
 archive/tar/writer.go      | 26 ++++++++++--
 archive/tar/writer_test.go | 83 ++++++++++++++++++++++++++++++++++++++
 2 files changed, 105 insertions(+), 4 deletions(-)

diff --git a/archive/tar/writer.go b/archive/tar/writer.go
index 688455d..0426381 100644
--- a/archive/tar/writer.go
+++ b/archive/tar/writer.go
@@ -94,13 +94,31 @@ func (f *formatter) formatOctal(b []byte, x int64) {
 	f.formatString(b, s)
 }
 
+// fitsInBase256 reports whether x can be encoded into n bytes using base-256
+// encoding. Unlike octal encoding, base-256 encoding does not require that the
+// string ends with a NUL character. Thus, all n bytes are available for output.
+//
+// If operating in binary mode, this assumes strict GNU binary mode; which means
+// that the first byte can only be either 0x80 or 0xff. Thus, the first byte is
+// equivalent to the sign bit in two's complement form.
+func fitsInBase256(n int, x int64) bool {
+	var binBits = uint(n-1) * 8
+	return n >= 9 || (x >= -1<<binBits && x < 1<<binBits)
+}
+
 // Write x into b, as binary (GNUtar/star extension).
 func (f *formatter) formatNumeric(b []byte, x int64) {
-	for i := len(b) - 1; x > 0 && i >= 0; i-- {
-		b[i] = byte(x)
-		x >>= 8
+	if fitsInBase256(len(b), x) {
+		for i := len(b) - 1; i >= 0; i-- {
+			b[i] = byte(x)
+			x >>= 8
+		}
+		b[0] |= 0x80 // Highest bit indicates binary format
+		return
 	}
-	b[0] |= 0x80 // highest bit indicates binary format
+
+	f.formatOctal(b, 0) // Last resort, just write zero
+	f.err = ErrFieldTooLong
 }
 
 var (
diff --git a/archive/tar/writer_test.go b/archive/tar/writer_test.go
index 69a44a6..6e91d90 100644
--- a/archive/tar/writer_test.go
+++ b/archive/tar/writer_test.go
@@ -9,6 +9,7 @@ import (
 	"fmt"
 	"io"
 	"io/ioutil"
+	"math"
 	"os"
 	"reflect"
 	"sort"
@@ -637,3 +638,85 @@ func TestFormatPAXRecord(t *testing.T) {
 		}
 	}
 }
+
+func TestFitsInBase256(t *testing.T) {
+	var vectors = []struct {
+		input int64
+		width int
+		ok    bool
+	}{
+		{+1, 8, true},
+		{0, 8, true},
+		{-1, 8, true},
+		{1 << 56, 8, false},
+		{(1 << 56) - 1, 8, true},
+		{-1 << 56, 8, true},
+		{(-1 << 56) - 1, 8, false},
+		{121654, 8, true},
+		{-9849849, 8, true},
+		{math.MaxInt64, 9, true},
+		{0, 9, true},
+		{math.MinInt64, 9, true},
+		{math.MaxInt64, 12, true},
+		{0, 12, true},
+		{math.MinInt64, 12, true},
+	}
+
+	for _, v := range vectors {
+		ok := fitsInBase256(v.width, v.input)
+		if ok != v.ok {
+			t.Errorf("checkNumeric(%d, %d): got %v, want %v", v.input, v.width, ok, v.ok)
+		}
+	}
+}
+
+func TestFormatNumeric(t *testing.T) {
+	var vectors = []struct {
+		input  int64
+		output string
+		ok     bool
+	}{
+		// Test base-256 (binary) encoded values.
+		{-1, "\xff", true},
+		{-1, "\xff\xff", true},
+		{-1, "\xff\xff\xff", true},
+		{(1 << 0), "0", false},
+		{(1 << 8) - 1, "\x80\xff", true},
+		{(1 << 8), "0\x00", false},
+		{(1 << 16) - 1, "\x80\xff\xff", true},
+		{(1 << 16), "00\x00", false},
+		{-1 * (1 << 0), "\xff", true},
+		{-1*(1<<0) - 1, "0", false},
+		{-1 * (1 << 8), "\xff\x00", true},
+		{-1*(1<<8) - 1, "0\x00", false},
+		{-1 * (1 << 16), "\xff\x00\x00", true},
+		{-1*(1<<16) - 1, "00\x00", false},
+		{537795476381659745, "0000000\x00", false},
+		{537795476381659745, "\x80\x00\x00\x00\x07\x76\xa2\x22\xeb\x8a\x72\x61", true},
+		{-615126028225187231, "0000000\x00", false},
+		{-615126028225187231, "\xff\xff\xff\xff\xf7\x76\xa2\x22\xeb\x8a\x72\x61", true},
+		{math.MaxInt64, "0000000\x00", false},
+		{math.MaxInt64, "\x80\x00\x00\x00\x7f\xff\xff\xff\xff\xff\xff\xff", true},
+		{math.MinInt64, "0000000\x00", false},
+		{math.MinInt64, "\xff\xff\xff\xff\x80\x00\x00\x00\x00\x00\x00\x00", true},
+		{math.MaxInt64, "\x80\x7f\xff\xff\xff\xff\xff\xff\xff", true},
+		{math.MinInt64, "\xff\x80\x00\x00\x00\x00\x00\x00\x00", true},
+	}
+
+	for _, v := range vectors {
+		var f formatter
+		output := make([]byte, len(v.output))
+		f.formatNumeric(output, v.input)
+		ok := (f.err == nil)
+		if ok != v.ok {
+			if v.ok {
+				t.Errorf("formatNumeric(%d): got formatting failure, want success", v.input)
+			} else {
+				t.Errorf("formatNumeric(%d): got formatting success, want failure", v.input)
+			}
+		}
+		if string(output) != v.output {
+			t.Errorf("formatNumeric(%d): got %q, want %q", v.input, output, v.output)
+		}
+	}
+}

From a04b4ddba428a52a96f3c046d284916313dc6d2e Mon Sep 17 00:00:00 2001
From: Joe Tsai <joetsai@digital-static.net>
Date: Wed, 2 Dec 2015 15:41:44 -0800
Subject: [PATCH 77/95] archive/tar: properly parse GNU base-256 encoding

Motivation:
* Previous implementation did not detect integer overflow when
parsing a base-256 encoded field.
* Previous implementation did not treat the integer as a two's
complement value as specified by GNU.

The relevant GNU specification says:
<<<
GNU format uses two's-complement base-256 notation to store values
that do not fit into standard ustar range.
>>>

Fixes #12435

Change-Id: I4639bcffac8d12e1cb040b76bd05c9d7bc6c23a8
Reviewed-on: https://go-review.googlesource.com/17424
Reviewed-by: Brad Fitzpatrick <bradfitz@golang.org>
Run-TryBot: Brad Fitzpatrick <bradfitz@golang.org>
TryBot-Result: Gobot Gobot <gobot@golang.org>
---
 archive/tar/reader.go               |  44 +++++++++++++++----
 archive/tar/reader_test.go          |  66 ++++++++++++++++++++++++++++
 archive/tar/testdata/issue12435.tar | Bin 0 -> 512 bytes
 3 files changed, 102 insertions(+), 8 deletions(-)
 create mode 100644 archive/tar/testdata/issue12435.tar

diff --git a/archive/tar/reader.go b/archive/tar/reader.go
index ba34ed7..6e77cbe 100644
--- a/archive/tar/reader.go
+++ b/archive/tar/reader.go
@@ -480,19 +480,47 @@ func (*parser) parseString(b []byte) string {
 	return string(b[0:n])
 }
 
+// parseNumeric parses the input as being encoded in either base-256 or octal.
+// This function may return negative numbers.
+// If parsing fails or an integer overflow occurs, err will be set.
 func (p *parser) parseNumeric(b []byte) int64 {
-	// Check for binary format first.
+	// Check for base-256 (binary) format first.
+	// If the first bit is set, then all following bits constitute a two's
+	// complement encoded number in big-endian byte order.
 	if len(b) > 0 && b[0]&0x80 != 0 {
-		var x int64
-		for i, c := range b {
-			if i == 0 {
-				c &= 0x7f // ignore signal bit in first byte
-			}
-			x = x<<8 | int64(c)
+		// Handling negative numbers relies on the following identity:
+		//	-a-1 == ^a
+		//
+		// If the number is negative, we use an inversion mask to invert the
+		// data bytes and treat the value as an unsigned number.
+		var inv byte // 0x00 if positive or zero, 0xff if negative
+		if b[0]&0x40 != 0 {
+			inv = 0xff
 		}
-		return x
+
+		var x uint64
+		for i, c := range b {
+			c ^= inv // Inverts c only if inv is 0xff, otherwise does nothing
+			if i == 0 {
+				c &= 0x7f // Ignore signal bit in first byte
+			}
+			if (x >> 56) > 0 {
+				p.err = ErrHeader // Integer overflow
+				return 0
+			}
+			x = x<<8 | uint64(c)
+		}
+		if (x >> 63) > 0 {
+			p.err = ErrHeader // Integer overflow
+			return 0
+		}
+		if inv == 0xff {
+			return ^int64(x)
+		}
+		return int64(x)
 	}
 
+	// Normal case is base-8 (octal) format.
 	return p.parseOctal(b)
 }
 
diff --git a/archive/tar/reader_test.go b/archive/tar/reader_test.go
index 861d1a5..7b148b5 100644
--- a/archive/tar/reader_test.go
+++ b/archive/tar/reader_test.go
@@ -324,6 +324,10 @@ var untarTests = []*untarTest{
 		file: "testdata/issue11169.tar",
 		err:  ErrHeader,
 	},
+	{
+		file: "testdata/issue12435.tar",
+		err:  ErrHeader,
+	},
 }
 
 func TestReader(t *testing.T) {
@@ -1057,3 +1061,65 @@ func TestParsePAXRecord(t *testing.T) {
 		}
 	}
 }
+
+func TestParseNumeric(t *testing.T) {
+	var vectors = []struct {
+		input  string
+		output int64
+		ok     bool
+	}{
+		// Test base-256 (binary) encoded values.
+		{"", 0, true},
+		{"\x80", 0, true},
+		{"\x80\x00", 0, true},
+		{"\x80\x00\x00", 0, true},
+		{"\xbf", (1 << 6) - 1, true},
+		{"\xbf\xff", (1 << 14) - 1, true},
+		{"\xbf\xff\xff", (1 << 22) - 1, true},
+		{"\xff", -1, true},
+		{"\xff\xff", -1, true},
+		{"\xff\xff\xff", -1, true},
+		{"\xc0", -1 * (1 << 6), true},
+		{"\xc0\x00", -1 * (1 << 14), true},
+		{"\xc0\x00\x00", -1 * (1 << 22), true},
+		{"\x87\x76\xa2\x22\xeb\x8a\x72\x61", 537795476381659745, true},
+		{"\x80\x00\x00\x00\x07\x76\xa2\x22\xeb\x8a\x72\x61", 537795476381659745, true},
+		{"\xf7\x76\xa2\x22\xeb\x8a\x72\x61", -615126028225187231, true},
+		{"\xff\xff\xff\xff\xf7\x76\xa2\x22\xeb\x8a\x72\x61", -615126028225187231, true},
+		{"\x80\x7f\xff\xff\xff\xff\xff\xff\xff", math.MaxInt64, true},
+		{"\x80\x80\x00\x00\x00\x00\x00\x00\x00", 0, false},
+		{"\xff\x80\x00\x00\x00\x00\x00\x00\x00", math.MinInt64, true},
+		{"\xff\x7f\xff\xff\xff\xff\xff\xff\xff", 0, false},
+		{"\xf5\xec\xd1\xc7\x7e\x5f\x26\x48\x81\x9f\x8f\x9b", 0, false},
+
+		// Test base-8 (octal) encoded values.
+		{"0000000\x00", 0, true},
+		{" \x0000000\x00", 0, true},
+		{" \x0000003\x00", 3, true},
+		{"00000000227\x00", 0227, true},
+		{"032033\x00 ", 032033, true},
+		{"320330\x00 ", 0320330, true},
+		{"0000660\x00 ", 0660, true},
+		{"\x00 0000660\x00 ", 0660, true},
+		{"0123456789abcdef", 0, false},
+		{"0123456789\x00abcdef", 0, false},
+		{"01234567\x0089abcdef", 342391, true},
+		{"0123\x7e\x5f\x264123", 0, false},
+	}
+
+	for _, v := range vectors {
+		var p parser
+		num := p.parseNumeric([]byte(v.input))
+		ok := (p.err == nil)
+		if v.ok != ok {
+			if v.ok {
+				t.Errorf("parseNumeric(%q): got parsing failure, want success", v.input)
+			} else {
+				t.Errorf("parseNumeric(%q): got parsing success, want failure", v.input)
+			}
+		}
+		if ok && num != v.output {
+			t.Errorf("parseNumeric(%q): got %d, want %d", v.input, num, v.output)
+		}
+	}
+}
diff --git a/archive/tar/testdata/issue12435.tar b/archive/tar/testdata/issue12435.tar
new file mode 100644
index 0000000000000000000000000000000000000000..3542dd8efd5d486b99ae03f39a56860af1c09af0
GIT binary patch
literal 512
lcmZQzpgs7{2(bf|=G;FWht&p9;C+0@6=oc2Mun*p0sxa^2!Q|q

literal 0
HcmV?d00001


From 962540fec3dc41e7256a85182b22926921231518 Mon Sep 17 00:00:00 2001
From: Joe Tsai <joetsai@digital-static.net>
Date: Wed, 16 Dec 2015 11:26:26 -0800
Subject: [PATCH 78/95] archive/tar: spell license correctly in example

Change-Id: Ice85d161f026a991953bd63ecc6ec80f8d06dfbd
Reviewed-on: https://go-review.googlesource.com/17901
Run-TryBot: Joe Tsai <joetsai@digital-static.net>
Reviewed-by: Brad Fitzpatrick <bradfitz@golang.org>
---
 archive/tar/example_test.go | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/archive/tar/example_test.go b/archive/tar/example_test.go
index 2317f44..5f0ce2f 100644
--- a/archive/tar/example_test.go
+++ b/archive/tar/example_test.go
@@ -26,7 +26,7 @@ func Example() {
 	}{
 		{"readme.txt", "This archive contains some text files."},
 		{"gopher.txt", "Gopher names:\nGeorge\nGeoffrey\nGonzo"},
-		{"todo.txt", "Get animal handling licence."},
+		{"todo.txt", "Get animal handling license."},
 	}
 	for _, file := range files {
 		hdr := &tar.Header{
@@ -76,5 +76,5 @@ func Example() {
 	// Geoffrey
 	// Gonzo
 	// Contents of todo.txt:
-	// Get animal handling licence.
+	// Get animal handling license.
 }

From 10db8408f660956a312a7fd9a5b8d0f74175e8ab Mon Sep 17 00:00:00 2001
From: Joe Tsai <joetsai@digital-static.net>
Date: Wed, 16 Dec 2015 23:10:14 -0800
Subject: [PATCH 79/95] archive/tar: document how Reader.Read handles
 header-only files

Commit dd5e14a7511465d20c6e95bf54c9b8f999abbbf6 ensured that no data
could be read for header-only files regardless of what the Header.Size
said. We should document this fact in Reader.Read.

Updates #13647

Change-Id: I4df9a2892bc66b49e0279693d08454bf696cfa31
Reviewed-on: https://go-review.googlesource.com/17913
Reviewed-by: Russ Cox <rsc@golang.org>
---
 archive/tar/reader.go | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/archive/tar/reader.go b/archive/tar/reader.go
index 6e77cbe..a8b63a2 100644
--- a/archive/tar/reader.go
+++ b/archive/tar/reader.go
@@ -928,6 +928,10 @@ func (tr *Reader) numBytes() int64 {
 // Read reads from the current entry in the tar archive.
 // It returns 0, io.EOF when it reaches the end of that entry,
 // until Next is called to advance to the next entry.
+//
+// Calling Read on special types like TypeLink, TypeSymLink, TypeChar,
+// TypeBlock, TypeDir, and TypeFifo returns 0, io.EOF regardless of what
+// the Header.Size claims.
 func (tr *Reader) Read(b []byte) (n int, err error) {
 	if tr.err != nil {
 		return 0, tr.err

From c32966b9e8c3b429d6c7999ab2037bd537d60420 Mon Sep 17 00:00:00 2001
From: Vincent Batts <vbatts@hashbangbash.com>
Date: Mon, 15 Feb 2016 09:38:46 -0500
Subject: [PATCH 80/95] archive/tar: go1.3 and go1.4 compatibility

Signed-off-by: Vincent Batts <vbatts@hashbangbash.com>
---
 archive/tar/reader_test.go | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/archive/tar/reader_test.go b/archive/tar/reader_test.go
index 7b148b5..821b4f0 100644
--- a/archive/tar/reader_test.go
+++ b/archive/tar/reader_test.go
@@ -344,7 +344,6 @@ func TestReader(t *testing.T) {
 			tr      = NewReader(f)
 			hdrs    []*Header
 			chksums []string
-			rdbuf   = make([]byte, 8)
 		)
 		for {
 			var hdr *Header
@@ -361,7 +360,7 @@ func TestReader(t *testing.T) {
 				continue
 			}
 			h := md5.New()
-			_, err = io.CopyBuffer(h, tr, rdbuf) // Effectively an incremental read
+			_, err = io.Copy(h, tr) // Effectively an incremental read
 			if err != nil {
 				break
 			}

From 24fe0a94fe8e2f2c42cb1d49277379e8f2dbb06e Mon Sep 17 00:00:00 2001
From: Vincent Batts <vbatts@hashbangbash.com>
Date: Mon, 15 Feb 2016 09:44:28 -0500
Subject: [PATCH 81/95] version: bump to v0.9.13

Signed-off-by: Vincent Batts <vbatts@hashbangbash.com>
---
 version/version.go | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/version/version.go b/version/version.go
index c41a8f2..345274d 100644
--- a/version/version.go
+++ b/version/version.go
@@ -1,7 +1,7 @@
 package version
+
 // AUTO-GENEREATED. DO NOT EDIT
-// 2016-01-31 01:39:06.012784413 -0500 EST
+// 2016-02-15 09:43:15.919197079 -0500 EST
 
 // VERSION is the generated version from /home/vbatts/src/vb/tar-split/version
-var VERSION = "v0.9.12"
- 
\ No newline at end of file
+var VERSION = "v0.9.13-19-862ccd05b"

From e2a62d6b0d98fd7f1a57646812c74564fda999b4 Mon Sep 17 00:00:00 2001
From: Vincent Batts <vbatts@hashbangbash.com>
Date: Mon, 29 Feb 2016 11:40:38 -0500
Subject: [PATCH 82/95] README.md: fix thumbnail

Signed-off-by: Vincent Batts <vbatts@hashbangbash.com>
---
 README.md | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/README.md b/README.md
index 90a8edf..02bbcd0 100644
--- a/README.md
+++ b/README.md
@@ -39,7 +39,7 @@ This demonstrates the `tar-split` command and how to assemble a tar archive from
 
 This demonstrates the tar-split integration for docker-1.8. Providing consistent tar archives for the image layer content.
 
-![docker tar-split demo](https://www.youtube.com/upload_thumbnail?v=tV_Dia8E8xw&t=2&ts=1445028436275)
+![docker tar-split demo](https://i.ytimg.com/vi_webp/vh5wyjIOBtc/default.webp)
 [youtube vide of docker layer checksums](https://youtu.be/tV_Dia8E8xw)
 
 ## Caveat

From 226f7c74905f1fcc08ac128b517a1d65a1948eb9 Mon Sep 17 00:00:00 2001
From: Vincent Batts <vbatts@hashbangbash.com>
Date: Wed, 30 Mar 2016 16:38:51 -0400
Subject: [PATCH 83/95] README: update `archive/tar` version reference

Signed-off-by: Vincent Batts <vbatts@hashbangbash.com>
---
 README.md | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/README.md b/README.md
index 02bbcd0..4c544d8 100644
--- a/README.md
+++ b/README.md
@@ -66,7 +66,7 @@ Do not break the API of stdlib `archive/tar` in our fork (ideally find an upstre
 
 ## Std Version
 
-The version of golang stdlib `archive/tar` is from go1.4.1, and their master branch around [a9dddb53f](https://github.com/golang/go/tree/a9dddb53f).
+The version of golang stdlib `archive/tar` is from go1.6
 It is minimally extended to expose the raw bytes of the TAR, rather than just the marshalled headers and file stream.
 
 
From 354fd6cf34bfa908f2d9796de5b1ff955b060ca6 Mon Sep 17 00:00:00 2001
From: Vincent Batts <vbatts@hashbangbash.com>
Date: Sun, 26 Jun 2016 10:15:12 -0400
Subject: [PATCH 84/95] cmd: add a `disasm --no-stdout` flag

Since sometimes you just need to > /dev/null

Signed-off-by: Vincent Batts <vbatts@hashbangbash.com>
---
 cmd/tar-split/disasm.go | 9 ++++++++-
 cmd/tar-split/main.go   | 4 ++++
 2 files changed, 12 insertions(+), 1 deletion(-)

diff --git a/cmd/tar-split/disasm.go b/cmd/tar-split/disasm.go
index b7b0dfe..7f8a056 100644
--- a/cmd/tar-split/disasm.go
+++ b/cmd/tar-split/disasm.go
@@ -3,6 +3,7 @@ package main
 import (
 	"compress/gzip"
 	"io"
+	"io/ioutil"
 	"os"
 
 	"github.com/Sirupsen/logrus"
@@ -48,7 +49,13 @@ func CommandDisasm(c *cli.Context) {
 	if err != nil {
 		logrus.Fatal(err)
 	}
-	i, err := io.Copy(os.Stdout, its)
+	var out io.Writer
+	if c.Bool("no-stdout") {
+		out = ioutil.Discard
+	} else {
+		out = os.Stdout
+	}
+	i, err := io.Copy(out, its)
 	if err != nil {
 		logrus.Fatal(err)
 	}
diff --git a/cmd/tar-split/main.go b/cmd/tar-split/main.go
index b417120..c584352 100644
--- a/cmd/tar-split/main.go
+++ b/cmd/tar-split/main.go
@@ -42,6 +42,10 @@ func main() {
 					Value: "tar-data.json.gz",
 					Usage: "output of disassembled tar stream",
 				},
+				cli.BoolFlag{
+					Name:  "no-stdout",
+					Usage: "do not throughput the stream to STDOUT",
+				},
 			},
 		},
 		{

From beaeceb06f0a81f66e6687a2d830ed3a1e6d0875 Mon Sep 17 00:00:00 2001
From: Vincent Batts <vbatts@hashbangbash.com>
Date: Sun, 26 Jun 2016 14:56:04 -0400
Subject: [PATCH 85/95] travis: update golang versions

This is not saying that tar-split no longer works on go1.3 or go1.4, but
rather that the headache of `go vet` having a version dependent ability
to install it, makes it a headache in travis.

Signed-off-by: Vincent Batts <vbatts@hashbangbash.com>
---
 .travis.yml | 6 ++----
 1 file changed, 2 insertions(+), 4 deletions(-)

diff --git a/.travis.yml b/.travis.yml
index c0a17c7..2d9a842 100644
--- a/.travis.yml
+++ b/.travis.yml
@@ -1,16 +1,14 @@
 language: go
 go:
   - tip
-  - 1.5.1
-  - 1.4.3
-  - 1.3.3
+  - 1.6.2
+  - 1.5.4
 
 # let us have pretty, fast Docker-based Travis workers!
 sudo: false
 
 install:
   - go get -d ./...
-  - go get golang.org/x/tools/cmd/vet
 
 script:
   - go test -v ./...

From 6810cedb21b2c3d0b9bb8f9af12ff2dc7a2f14df Mon Sep 17 00:00:00 2001
From: Vincent Batts <vbatts@hashbangbash.com>
Date: Tue, 26 Jul 2016 09:50:08 -0400
Subject: [PATCH 86/95] benchmark: add a comparison of 'archive/tar'

Since this project has forked logic of upstream 'archive/tar', this does
a brief comparison including the RawBytes usage.

```bash
$ go test -run="XXX" -bench=.
testing: warning: no tests to run
BenchmarkUpstreamTar-4                      2000            700809 ns/op
BenchmarkOurTarNoAccounting-4               2000            692055 ns/op
BenchmarkOurTarYesAccounting-4              2000            723184 ns/op
PASS
ok      vb/tar-split    4.461s
```

From this, the difference is negligible.

Signed-off-by: Vincent Batts <vbatts@hashbangbash.com>
---
 tar_benchmark_test.go | 84 +++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 84 insertions(+)
 create mode 100644 tar_benchmark_test.go

diff --git a/tar_benchmark_test.go b/tar_benchmark_test.go
new file mode 100644
index 0000000..d946f2a
--- /dev/null
+++ b/tar_benchmark_test.go
@@ -0,0 +1,84 @@
+package tartest
+
+import (
+	"io"
+	"io/ioutil"
+	"os"
+	"testing"
+
+	upTar "archive/tar"
+
+	ourTar "github.com/vbatts/tar-split/archive/tar"
+)
+
+var testfile = "./archive/tar/testdata/sparse-formats.tar"
+
+func BenchmarkUpstreamTar(b *testing.B) {
+	for n := 0; n < b.N; n++ {
+		fh, err := os.Open(testfile)
+		if err != nil {
+			b.Fatal(err)
+		}
+		tr := upTar.NewReader(fh)
+		for {
+			_, err := tr.Next()
+			if err != nil {
+				if err == io.EOF {
+					break
+				}
+				fh.Close()
+				b.Fatal(err)
+			}
+			io.Copy(ioutil.Discard, tr)
+		}
+		fh.Close()
+	}
+}
+
+func BenchmarkOurTarNoAccounting(b *testing.B) {
+	for n := 0; n < b.N; n++ {
+		fh, err := os.Open(testfile)
+		if err != nil {
+			b.Fatal(err)
+		}
+		tr := ourTar.NewReader(fh)
+		tr.RawAccounting = false // this is default, but explicit here
+		for {
+			_, err := tr.Next()
+			if err != nil {
+				if err == io.EOF {
+					break
+				}
+				fh.Close()
+				b.Fatal(err)
+			}
+			io.Copy(ioutil.Discard, tr)
+		}
+		fh.Close()
+	}
+}
+func BenchmarkOurTarYesAccounting(b *testing.B) {
+	for n := 0; n < b.N; n++ {
+		fh, err := os.Open(testfile)
+		if err != nil {
+			b.Fatal(err)
+		}
+		tr := ourTar.NewReader(fh)
+		tr.RawAccounting = true // This enables mechanics for collecting raw bytes
+		for {
+			_ = tr.RawBytes()
+			_, err := tr.Next()
+			_ = tr.RawBytes()
+			if err != nil {
+				if err == io.EOF {
+					break
+				}
+				fh.Close()
+				b.Fatal(err)
+			}
+			io.Copy(ioutil.Discard, tr)
+			_ = tr.RawBytes()
+		}
+		fh.Close()
+	}
+}

From e527e70d2599f4294397c46bffe9806f404ce1eb Mon Sep 17 00:00:00 2001
From: Derek McGowan <derek@mcgstyle.net>
Date: Thu, 22 Sep 2016 17:38:18 -0700
Subject: [PATCH 87/95] Fix panic in Next

readHeader should never return nil with a tr.err also nil.
To correct this, ensure tr.err never gets reset to nil followed
by a nil return.
---
 archive/tar/reader.go | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/archive/tar/reader.go b/archive/tar/reader.go
index a8b63a2..fdc8ae3 100644
--- a/archive/tar/reader.go
+++ b/archive/tar/reader.go
@@ -611,7 +611,8 @@ func (tr *Reader) readHeader() *Header {
 	if _, tr.err = io.ReadFull(tr.r, header); tr.err != nil {
 		// because it could read some of the block, but reach EOF first
 		if tr.err == io.EOF && tr.RawAccounting {
-			if _, tr.err = tr.rawBytes.Write(header); tr.err != nil {
+			if _, err := tr.rawBytes.Write(header); err != nil {
+				tr.err = err
 				return nil
 			}
 		}

From eb3808673d60f519722fc86f095f90e4edfbb32a Mon Sep 17 00:00:00 2001
From: Vincent Batts <vbatts@hashbangbash.com>
Date: Fri, 23 Sep 2016 11:01:58 -0400
Subject: [PATCH 88/95] version: bump to v0.10.0

Signed-off-by: Vincent Batts <vbatts@hashbangbash.com>
---
 version/version.go | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/version/version.go b/version/version.go
index 345274d..b39b671 100644
--- a/version/version.go
+++ b/version/version.go
@@ -1,7 +1,7 @@
 package version
 
 // AUTO-GENEREATED. DO NOT EDIT
-// 2016-02-15 09:43:15.919197079 -0500 EST
+// 2016-09-23 11:00:18.92191222 -0400 EDT
 
 // VERSION is the generated version from /home/vbatts/src/vb/tar-split/version
-var VERSION = "v0.9.13-19-862ccd05b"
+var VERSION = "v0.10.0-9-gae8540d"

From 7410961e758c155389fab3f0391e65320854acbc Mon Sep 17 00:00:00 2001
From: Vincent Batts <vbatts@hashbangbash.com>
Date: Mon, 26 Sep 2016 14:51:07 -0400
Subject: [PATCH 89/95] tar/asm: failing test for lack of EOF nils

Reported-by: Derek McGowan <derek@mcgstyle.net>
Signed-off-by: Vincent Batts <vbatts@hashbangbash.com>
---
 tar/asm/assemble_test.go              |   2 ++
 tar/asm/testdata/extranils.tar.gz     | Bin 0 -> 127 bytes
 tar/asm/testdata/notenoughnils.tar.gz | Bin 0 -> 91 bytes
 3 files changed, 2 insertions(+)
 create mode 100644 tar/asm/testdata/extranils.tar.gz
 create mode 100644 tar/asm/testdata/notenoughnils.tar.gz

diff --git a/tar/asm/assemble_test.go b/tar/asm/assemble_test.go
index c0c7f17..afdce9d 100644
--- a/tar/asm/assemble_test.go
+++ b/tar/asm/assemble_test.go
@@ -139,6 +139,8 @@ var testCases = []struct {
 	{"./testdata/longlink.tar.gz", "d9f6babe107b7247953dff6b5b5ae31a3a880add", 20480},
 	{"./testdata/fatlonglink.tar.gz", "8537f03f89aeef537382f8b0bb065d93e03b0be8", 26234880},
 	{"./testdata/iso-8859.tar.gz", "ddafa51cb03c74ec117ab366ee2240d13bba1ec3", 10240},
+	{"./testdata/extranils.tar.gz", "e187b4b3e739deaccc257342f4940f34403dc588", 10648},
+	{"./testdata/notenoughnils.tar.gz", "72f93f41efd95290baa5c174c234f5d4c22ce601", 512},
 }
 
 func TestTarStream(t *testing.T) {
diff --git a/tar/asm/testdata/extranils.tar.gz b/tar/asm/testdata/extranils.tar.gz
new file mode 100644
index 0000000000000000000000000000000000000000..70caf6e6e7641340e6bbe7fa1c36bcf7b19b9533
GIT binary patch
literal 127
zcmb2|=HTE?e;LlioLW&*l$e*9Q><5#Sj6!5lr7gG2a(o`+owbx+GCh@O<1uhVXDjZ
z_wv(jWtRFC|9l|l|J<@}&Ar~kA8x$){p(H5*XWejd$$%|FY1{!)44b3F~8nxGu@>}
cz8d7-{`J;eKbrv#YD$=2`88gep~=7i0Ka=WQvd(}

literal 0
HcmV?d00001

diff --git a/tar/asm/testdata/notenoughnils.tar.gz b/tar/asm/testdata/notenoughnils.tar.gz
new file mode 100644
index 0000000000000000000000000000000000000000..146bb008bbeb2ff6e5fe1232dfc29894f85ed57b
GIT binary patch
literal 91
zcmV-h0HpsPiwFShZ0T15153-yNoAlhFaQEG6B7_^Xl`l%<{LuzFu-7FWNvO`WM*t+
xXvSb*XlQ6+z@T8jKnGA-T#{I%pukX;lvq+yj89!^Q8B}?0{|>t!}0(E001l6BE0|r

literal 0
HcmV?d00001


From 6b59e6942e2c9a03fd4506102adfa082a96b5ace Mon Sep 17 00:00:00 2001
From: Derek McGowan <derek@mcgstyle.net>
Date: Mon, 26 Sep 2016 14:01:48 -0700
Subject: [PATCH 90/95] archive/tar: fix writing too many raw bytes

When an EOF is read, only the part of the header buffer which
was read should be accounted for.

Signed-off-by: Derek McGowan <derek@mcgstyle.net>
---
 archive/tar/reader.go | 13 +++++++------
 1 file changed, 7 insertions(+), 6 deletions(-)

diff --git a/archive/tar/reader.go b/archive/tar/reader.go
index fdc8ae3..adf3212 100644
--- a/archive/tar/reader.go
+++ b/archive/tar/reader.go
@@ -608,12 +608,12 @@ func (tr *Reader) readHeader() *Header {
 	header := tr.hdrBuff[:]
 	copy(header, zeroBlock)
 
-	if _, tr.err = io.ReadFull(tr.r, header); tr.err != nil {
+	if n, err := io.ReadFull(tr.r, header); err != nil {
+		tr.err = err
 		// because it could read some of the block, but reach EOF first
 		if tr.err == io.EOF && tr.RawAccounting {
-			if _, err := tr.rawBytes.Write(header); err != nil {
+			if _, err := tr.rawBytes.Write(header[:n]); err != nil {
 				tr.err = err
-				return nil
 			}
 		}
 		return nil // io.EOF is okay here
@@ -626,11 +626,12 @@ func (tr *Reader) readHeader() *Header {
 
 	// Two blocks of zero bytes marks the end of the archive.
 	if bytes.Equal(header, zeroBlock[0:blockSize]) {
-		if _, tr.err = io.ReadFull(tr.r, header); tr.err != nil {
+		if n, err := io.ReadFull(tr.r, header); err != nil {
+			tr.err = err
 			// because it could read some of the block, but reach EOF first
 			if tr.err == io.EOF && tr.RawAccounting {
-				if _, tr.err = tr.rawBytes.Write(header); tr.err != nil {
-					return nil
+				if _, err := tr.rawBytes.Write(header[:n]); err != nil {
+					tr.err = err
 				}
 			}
 			return nil // io.EOF is okay here

From d3f1b54304d656376e58f9406a9cb4775799a357 Mon Sep 17 00:00:00 2001
From: Vincent Batts <vbatts@hashbangbash.com>
Date: Mon, 26 Sep 2016 19:53:52 -0400
Subject: [PATCH 91/95] version: bump to v0.10.1

Signed-off-by: Vincent Batts <vbatts@hashbangbash.com>
---
 version/version.go | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/version/version.go b/version/version.go
index b39b671..f317010 100644
--- a/version/version.go
+++ b/version/version.go
@@ -1,7 +1,7 @@
 package version
 
 // AUTO-GENEREATED. DO NOT EDIT
-// 2016-09-23 11:00:18.92191222 -0400 EDT
+// 2016-09-26 19:53:30.825879 -0400 EDT
 
 // VERSION is the generated version from /home/vbatts/src/vb/tar-split/version
-var VERSION = "v0.10.0-9-gae8540d"
+var VERSION = "v0.10.1-4-gf280282"

From bd4c5d64c3e9297f410025a3b1bd0c58f659e721 Mon Sep 17 00:00:00 2001
From: Vincent Batts <vbatts@hashbangbash.com>
Date: Tue, 27 Sep 2016 02:54:18 +0000
Subject: [PATCH 92/95] main: switch import paths to urfave

Signed-off-by: Vincent Batts <vbatts@hashbangbash.com>
---
 cmd/tar-split/asm.go       | 2 +-
 cmd/tar-split/checksize.go | 2 +-
 cmd/tar-split/disasm.go    | 2 +-
 cmd/tar-split/main.go      | 2 +-
 4 files changed, 4 insertions(+), 4 deletions(-)

diff --git a/cmd/tar-split/asm.go b/cmd/tar-split/asm.go
index 312e54b..e188ce1 100644
--- a/cmd/tar-split/asm.go
+++ b/cmd/tar-split/asm.go
@@ -6,7 +6,7 @@ import (
 	"os"
 
 	"github.com/Sirupsen/logrus"
-	"github.com/codegangsta/cli"
+	"github.com/urfave/cli"
 	"github.com/vbatts/tar-split/tar/asm"
 	"github.com/vbatts/tar-split/tar/storage"
 )
diff --git a/cmd/tar-split/checksize.go b/cmd/tar-split/checksize.go
index 38f830e..1e5eed7 100644
--- a/cmd/tar-split/checksize.go
+++ b/cmd/tar-split/checksize.go
@@ -10,7 +10,7 @@ import (
 	"os"
 
 	"github.com/Sirupsen/logrus"
-	"github.com/codegangsta/cli"
+	"github.com/urfave/cli"
 	"github.com/vbatts/tar-split/tar/asm"
 	"github.com/vbatts/tar-split/tar/storage"
 )
diff --git a/cmd/tar-split/disasm.go b/cmd/tar-split/disasm.go
index 7f8a056..5472894 100644
--- a/cmd/tar-split/disasm.go
+++ b/cmd/tar-split/disasm.go
@@ -7,7 +7,7 @@ import (
 	"os"
 
 	"github.com/Sirupsen/logrus"
-	"github.com/codegangsta/cli"
+	"github.com/urfave/cli"
 	"github.com/vbatts/tar-split/tar/asm"
 	"github.com/vbatts/tar-split/tar/storage"
 )
diff --git a/cmd/tar-split/main.go b/cmd/tar-split/main.go
index c584352..8b4035f 100644
--- a/cmd/tar-split/main.go
+++ b/cmd/tar-split/main.go
@@ -4,7 +4,7 @@ import (
 	"os"
 
 	"github.com/Sirupsen/logrus"
-	"github.com/codegangsta/cli"
+	"github.com/urfave/cli"
 	"github.com/vbatts/tar-split/version"
 )
 

From 7560005f21d918cc59698527424fc6759877152f Mon Sep 17 00:00:00 2001
From: Vincent Batts <vbatts@hashbangbash.com>
Date: Mon, 13 Mar 2017 18:28:54 -0400
Subject: [PATCH 93/95] README: adding a golang report card

Signed-off-by: Vincent Batts <vbatts@hashbangbash.com>
---
 README.md | 1 +
 1 file changed, 1 insertion(+)

diff --git a/README.md b/README.md
index 4c544d8..c2e7f48 100644
--- a/README.md
+++ b/README.md
@@ -1,6 +1,7 @@
 # tar-split
 
 [![Build Status](https://travis-ci.org/vbatts/tar-split.svg?branch=master)](https://travis-ci.org/vbatts/tar-split)
+[![Go Report Card](https://goreportcard.com/badge/github.com/vbatts/tar-split)](https://goreportcard.com/report/github.com/vbatts/tar-split)
 
 Pristinely disassembling a tar archive, and stashing needed raw bytes and offsets to reassemble a validating original archive.
 

From 245403c324d6ea47167227ee70aaef1c4c87ff43 Mon Sep 17 00:00:00 2001
From: Vincent Batts <vbatts@hashbangbash.com>
Date: Mon, 13 Mar 2017 18:33:27 -0400
Subject: [PATCH 94/95] travis: test more go versions

Thanks to @tianon, for pointing to
https://github.com/travis-ci/travis-build/blob/5e3ef60b0d43b8ef56fb6fa77ba3269fbf945fa6/lib/travis/build/config.rb#L54-L70

Signed-off-by: Vincent Batts <vbatts@hashbangbash.com>
---
 .travis.yml | 7 +++++--
 1 file changed, 5 insertions(+), 2 deletions(-)

diff --git a/.travis.yml b/.travis.yml
index 2d9a842..dcce57a 100644
--- a/.travis.yml
+++ b/.travis.yml
@@ -1,8 +1,11 @@
 language: go
 go:
   - tip
-  - 1.6.2
-  - 1.5.4
+  - 1.x
+  - 1.8.x
+  - 1.7.x
+  - 1.6.x
+  - 1.5.x
 
 # let us have pretty, fast Docker-based Travis workers!
 sudo: false

From c6dd42815acecceec10987978f48488c37ead18f Mon Sep 17 00:00:00 2001
From: Vincent Batts <vbatts@hashbangbash.com>
Date: Tue, 14 Mar 2017 11:04:10 -0400
Subject: [PATCH 95/95] archive/tar: monotonic clock adjustment

commit 0e3355903d2ebcf5ee9e76096f51ac9a116a9dbb upstream

Signed-off-by: Vincent Batts <vbatts@hashbangbash.com>
---
 archive/tar/tar_test.go | 11 +++++------
 1 file changed, 5 insertions(+), 6 deletions(-)

diff --git a/archive/tar/tar_test.go b/archive/tar/tar_test.go
index d63c072..9ef319a 100644
--- a/archive/tar/tar_test.go
+++ b/archive/tar/tar_test.go
@@ -94,13 +94,12 @@ func TestRoundTrip(t *testing.T) {
 	var b bytes.Buffer
 	tw := NewWriter(&b)
 	hdr := &Header{
-		Name:    "file.txt",
-		Uid:     1 << 21, // too big for 8 octal digits
-		Size:    int64(len(data)),
-		ModTime: time.Now(),
+		Name: "file.txt",
+		Uid:  1 << 21, // too big for 8 octal digits
+		Size: int64(len(data)),
+		// https://github.com/golang/go/commit/0e3355903d2ebcf5ee9e76096f51ac9a116a9dbb#diff-d7bf2a98d7b57b6ff754ca406f1b7581R105
+		ModTime: time.Now().AddDate(0, 0, 0).Round(1 * time.Second),
 	}
-	// tar only supports second precision.
-	hdr.ModTime = hdr.ModTime.Add(-time.Duration(hdr.ModTime.Nanosecond()) * time.Nanosecond)
 	if err := tw.WriteHeader(hdr); err != nil {
 		t.Fatalf("tw.WriteHeader: %v", err)
 	}