From e33913bf758ae6e960d3802ccbc25201b6a245f8 Mon Sep 17 00:00:00 2001 From: Vincent Batts Date: Wed, 15 Jul 2015 13:43:48 -0400 Subject: [PATCH 01/95] tar/asm: don't defer file closing this `for {}` can read many files. defering the file handle close can cause an EMFILE (too many open files). --- tar/asm/assemble.go | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/tar/asm/assemble.go b/tar/asm/assemble.go index ec15612..d18bfc5 100644 --- a/tar/asm/assemble.go +++ b/tar/asm/assemble.go @@ -44,10 +44,10 @@ func NewOutputTarStream(fg storage.FileGetter, up storage.Unpacker) io.ReadClose pw.CloseWithError(err) break } - defer fh.Close() c := crc64.New(storage.CRCTable) tRdr := io.TeeReader(fh, c) if _, err := io.Copy(pw, tRdr); err != nil { + fh.Close() pw.CloseWithError(err) break } @@ -55,9 +55,11 @@ func NewOutputTarStream(fg storage.FileGetter, up storage.Unpacker) io.ReadClose // I would rather this be a comparable ErrInvalidChecksum or such, // but since it's coming through the PipeReader, the context of // _which_ file would be lost... + fh.Close() pw.CloseWithError(fmt.Errorf("file integrity checksum failed for %q", entry.Name)) break } + fh.Close() } } pw.Close() From 6094dcaecab45e4ce00583d1ae52f777896b4f69 Mon Sep 17 00:00:00 2001 From: Vincent Batts Date: Mon, 20 Jul 2015 15:47:10 -0400 Subject: [PATCH 02/95] concept: move the PoC out of the root directory --- main.go => concept/main.go | 0 1 file changed, 0 insertions(+), 0 deletions(-) rename main.go => concept/main.go (100%) diff --git a/main.go b/concept/main.go similarity index 100% rename from main.go rename to concept/main.go From fd84b2fdfd88435e133e4aba59a3facded3cc01d Mon Sep 17 00:00:00 2001 From: Vincent Batts Date: Mon, 20 Jul 2015 15:51:20 -0400 Subject: [PATCH 03/95] cmd/tar-split: adding a cli tool for asm/disasm --- cmd/tar-split/README.md | 25 ++++++ cmd/tar-split/main.go | 175 ++++++++++++++++++++++++++++++++++++++++ 2 files changed, 200 insertions(+) create mode 100644 cmd/tar-split/README.md create mode 100644 cmd/tar-split/main.go diff --git a/cmd/tar-split/README.md b/cmd/tar-split/README.md new file mode 100644 index 0000000..5451be0 --- /dev/null +++ b/cmd/tar-split/README.md @@ -0,0 +1,25 @@ +## tar-split utility + + +## Usage + +### Disassembly + +```bash +$ sha256sum archive.tar +d734a748db93ec873392470510b8a1c88929abd8fae2540dc43d5b26f7537868 archive.tar +$ mkdir ./x +$ tar-split d --output tar-data.json.gz ./archive.tar | tar -C ./x -x +time="2015-07-20T15:45:04-04:00" level=info msg="created tar-data.json.gz from ./archive.tar (read 204800 bytes)" +``` + +### Assembly + +```bash +$ tar-split a --output new.tar --input ./tar-data.json.gz --path ./x/ +INFO[0000] created new.tar from ./x/ and ./tar-data.json.gz (wrote 204800 bytes) +$ sha256sum new.tar +d734a748db93ec873392470510b8a1c88929abd8fae2540dc43d5b26f7537868 new.tar +``` + + diff --git a/cmd/tar-split/main.go b/cmd/tar-split/main.go new file mode 100644 index 0000000..8c631b0 --- /dev/null +++ b/cmd/tar-split/main.go @@ -0,0 +1,175 @@ +// go:generate git tag | tail -1 +package main + +import ( + "compress/gzip" + "io" + "os" + + "github.com/Sirupsen/logrus" + "github.com/codegangsta/cli" + "github.com/vbatts/tar-split/tar/asm" + "github.com/vbatts/tar-split/tar/storage" +) + +func main() { + app := cli.NewApp() + app.Name = "tar-split" + app.Usage = "tar assembly and disassembly utility" + app.Version = "0.9.2" + app.Author = "Vincent Batts" + app.Email = "vbatts@hashbangbash.com" + app.Action = cli.ShowAppHelp + app.Before = func(c *cli.Context) error { + logrus.SetOutput(os.Stderr) + if c.Bool("debug") { + logrus.SetLevel(logrus.DebugLevel) + } + return nil + } + app.Flags = []cli.Flag{ + cli.BoolFlag{ + Name: "debug, D", + Usage: "debug output", + // defaults to false + }, + } + app.Commands = []cli.Command{ + { + Name: "disasm", + Aliases: []string{"d"}, + Usage: "disassemble the input tar stream", + Action: CommandDisasm, + Flags: []cli.Flag{ + cli.StringFlag{ + Name: "output", + Value: "tar-data.json.gz", + Usage: "output of disassembled tar stream", + }, + }, + }, + { + Name: "asm", + Aliases: []string{"a"}, + Usage: "assemble tar stream", + Action: CommandAsm, + Flags: []cli.Flag{ + cli.StringFlag{ + Name: "input", + Value: "tar-data.json.gz", + Usage: "input of disassembled tar stream", + }, + cli.StringFlag{ + Name: "output", + Value: "-", + Usage: "reassembled tar archive", + }, + cli.StringFlag{ + Name: "path", + Value: "", + Usage: "relative path of extracted tar", + }, + }, + }, + } + + if err := app.Run(os.Args); err != nil { + logrus.Fatal(err) + } +} + +func CommandDisasm(c *cli.Context) { + if len(c.Args()) != 1 { + logrus.Fatalf("please specify tar to be disabled ") + } + if len(c.String("output")) == 0 { + logrus.Fatalf("--output filename must be set") + } + + // Set up the tar input stream + var inputStream io.Reader + if c.Args()[0] == "-" { + inputStream = os.Stdin + } else { + fh, err := os.Open(c.Args()[0]) + if err != nil { + logrus.Fatal(err) + } + defer fh.Close() + inputStream = fh + } + + // Set up the metadata storage + mf, err := os.OpenFile(c.String("output"), os.O_CREATE|os.O_WRONLY|os.O_TRUNC, os.FileMode(0600)) + if err != nil { + logrus.Fatal(err) + } + defer mf.Close() + mfz := gzip.NewWriter(mf) + defer mfz.Close() + metaPacker := storage.NewJSONPacker(mfz) + + // we're passing nil here for the file putter, because the ApplyDiff will + // handle the extraction of the archive + its, err := asm.NewInputTarStream(inputStream, metaPacker, nil) + if err != nil { + logrus.Fatal(err) + } + i, err := io.Copy(os.Stdout, its) + if err != nil { + logrus.Fatal(err) + } + logrus.Infof("created %s from %s (read %d bytes)", c.String("output"), c.Args()[0], i) +} + +func CommandAsm(c *cli.Context) { + if len(c.Args()) > 0 { + logrus.Warnf("%d additional arguments passed are ignored", len(c.Args())) + } + if len(c.String("input")) == 0 { + logrus.Fatalf("--input filename must be set") + } + if len(c.String("output")) == 0 { + logrus.Fatalf("--output filename must be set ([FILENAME|-])") + } + if len(c.String("path")) == 0 { + logrus.Fatalf("--path must be set") + } + + var outputStream io.Writer + if c.String("output") == "-" { + outputStream = os.Stdout + } else { + fh, err := os.Create(c.String("output")) + if err != nil { + logrus.Fatal(err) + } + defer fh.Close() + outputStream = fh + } + + // Get the tar metadata reader + mf, err := os.Open(c.String("input")) + if err != nil { + logrus.Fatal(err) + } + defer mf.Close() + mfz, err := gzip.NewReader(mf) + if err != nil { + logrus.Fatal(err) + } + defer mfz.Close() + + metaUnpacker := storage.NewJSONUnpacker(mfz) + // XXX maybe get the absolute path here + fileGetter := storage.NewPathFileGetter(c.String("path")) + + ots := asm.NewOutputTarStream(fileGetter, metaUnpacker) + defer ots.Close() + i, err := io.Copy(outputStream, ots) + if err != nil { + logrus.Fatal(err) + } + + logrus.Infof("created %s from %s and %s (wrote %d bytes)", c.String("output"), c.String("path"), c.String("input"), i) +} From 04172717dedf3cb868310a286ab87a62c02a08f1 Mon Sep 17 00:00:00 2001 From: Vincent Batts Date: Mon, 20 Jul 2015 16:46:22 -0400 Subject: [PATCH 04/95] tar/asm: test for failure when mangling --- tar/asm/assemble_test.go | 57 +++++++++++++++++++++++++++++++++++++--- 1 file changed, 53 insertions(+), 4 deletions(-) diff --git a/tar/asm/assemble_test.go b/tar/asm/assemble_test.go index 203e716..b8a70ef 100644 --- a/tar/asm/assemble_test.go +++ b/tar/asm/assemble_test.go @@ -5,6 +5,7 @@ import ( "compress/gzip" "crypto/sha1" "fmt" + "hash/crc64" "io" "io/ioutil" "os" @@ -33,12 +34,36 @@ var entries = []struct { Payload: []byte{126, 72, 89, 239, 230, 252, 160, 187}, Size: 26, }, - Body: []byte("café con leche, por favor"), }, } +var entriesMangled = []struct { + Entry storage.Entry + Body []byte +}{ + { + Entry: storage.Entry{ + Type: storage.FileType, + Name: "./hurr.txt", + Payload: []byte{3, 116, 164, 177, 171, 236, 107, 78}, + Size: 20, + }, + // switch + Body: []byte("imma derp til I hurr"), + }, + { + Entry: storage.Entry{ + Type: storage.FileType, + Name: "./ermahgerd.txt", + Payload: []byte{127, 72, 89, 239, 230, 252, 160, 187}, + Size: 26, + }, + // san not con + Body: []byte("café sans leche, por favor"), + }, +} -func TestTarStreamOld(t *testing.T) { +func TestTarStreamMangledGetterPutter(t *testing.T) { fgp := storage.NewBufferFileGetPutter() // first lets prep a GetPutter and Packer @@ -63,9 +88,33 @@ func TestTarStreamOld(t *testing.T) { } } + for _, e := range entriesMangled { + if e.Entry.Type == storage.FileType { + rdr, err := fgp.Get(e.Entry.Name) + if err != nil { + t.Error(err) + } + c := crc64.New(storage.CRCTable) + i, err := io.Copy(c, rdr) + if err != nil { + t.Fatal(err) + } + rdr.Close() + + csum := c.Sum(nil) + if !bytes.Equal(csum, e.Entry.Payload) { + t.Errorf("wrote %d bytes. checksum %q: expected %v; got %v", + i, + e.Entry.Name, + e.Entry.Payload, + csum) + } + } + } + + // TODO test a mangled relative path assembly // next we'll use these to produce a tar stream. - _ = NewOutputTarStream(fgp, nil) - // TODO finish this + //_ = NewOutputTarStream(fgp, nil) } func TestTarStream(t *testing.T) { From 97acaa9e83a16a2f816754fac034d9796fbc4a35 Mon Sep 17 00:00:00 2001 From: Vincent Batts Date: Mon, 20 Jul 2015 17:22:10 -0400 Subject: [PATCH 05/95] travis: needing to fetch the cmd dependencies --- .travis.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.travis.yml b/.travis.yml index f700909..bdf0ea7 100644 --- a/.travis.yml +++ b/.travis.yml @@ -7,7 +7,7 @@ go: sudo: false # we don't need "go get" here <3 -install: true +install: go get -d ./... script: - go test -v ./... From c74af0bae74b4ac842b75ac969a3b86502c55d0d Mon Sep 17 00:00:00 2001 From: Vincent Batts Date: Mon, 20 Jul 2015 17:26:16 -0400 Subject: [PATCH 06/95] tar/asm: test was flipped --- tar/asm/assemble_test.go | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/tar/asm/assemble_test.go b/tar/asm/assemble_test.go index b8a70ef..c3bda7a 100644 --- a/tar/asm/assemble_test.go +++ b/tar/asm/assemble_test.go @@ -102,11 +102,10 @@ func TestTarStreamMangledGetterPutter(t *testing.T) { rdr.Close() csum := c.Sum(nil) - if !bytes.Equal(csum, e.Entry.Payload) { - t.Errorf("wrote %d bytes. checksum %q: expected %v; got %v", + if bytes.Equal(csum, e.Entry.Payload) { + t.Errorf("wrote %d bytes. checksum for %q should not have matched! %v", i, e.Entry.Name, - e.Entry.Payload, csum) } } From d3556a05511eb4e0bbc6ca66ea0b2083e1992c04 Mon Sep 17 00:00:00 2001 From: Vincent Batts Date: Mon, 20 Jul 2015 20:16:42 -0400 Subject: [PATCH 07/95] travis: go1.4.1 -> go1.4.2 --- .travis.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.travis.yml b/.travis.yml index bdf0ea7..fc1571c 100644 --- a/.travis.yml +++ b/.travis.yml @@ -1,6 +1,6 @@ language: go go: - - 1.4.1 + - 1.4.2 - 1.3.3 # let us have pretty, fast Docker-based Travis workers! From 6d59e7bc76156496c4c7b30ddcd9364b592100a0 Mon Sep 17 00:00:00 2001 From: Vincent Batts Date: Tue, 21 Jul 2015 12:08:57 -0400 Subject: [PATCH 08/95] tar/asm: clean up return on errors This closure on error message needs returns so that the error message is bubbled up to the reader. --- tar/asm/disassemble.go | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/tar/asm/disassemble.go b/tar/asm/disassemble.go index de25db0..785e194 100644 --- a/tar/asm/disassemble.go +++ b/tar/asm/disassemble.go @@ -61,6 +61,7 @@ func NewInputTarStream(r io.Reader, p storage.Packer, fp storage.FilePutter) (io }) if err != nil { pW.CloseWithError(err) + return } break // not return. We need the end of the reader. } @@ -73,6 +74,7 @@ func NewInputTarStream(r io.Reader, p storage.Packer, fp storage.FilePutter) (io Payload: tr.RawBytes(), }); err != nil { pW.CloseWithError(err) + return } var csum []byte @@ -81,6 +83,7 @@ func NewInputTarStream(r io.Reader, p storage.Packer, fp storage.FilePutter) (io _, csum, err = fp.Put(hdr.Name, tr) if err != nil { pW.CloseWithError(err) + return } } @@ -93,6 +96,7 @@ func NewInputTarStream(r io.Reader, p storage.Packer, fp storage.FilePutter) (io }) if err != nil { pW.CloseWithError(err) + return } if b := tr.RawBytes(); len(b) > 0 { @@ -102,6 +106,7 @@ func NewInputTarStream(r io.Reader, p storage.Packer, fp storage.FilePutter) (io }) if err != nil { pW.CloseWithError(err) + return } } } @@ -111,6 +116,7 @@ func NewInputTarStream(r io.Reader, p storage.Packer, fp storage.FilePutter) (io remainder, err := ioutil.ReadAll(outputRdr) if err != nil && err != io.EOF { pW.CloseWithError(err) + return } _, err = p.AddEntry(storage.Entry{ Type: storage.SegmentType, @@ -118,9 +124,9 @@ func NewInputTarStream(r io.Reader, p storage.Packer, fp storage.FilePutter) (io }) if err != nil { pW.CloseWithError(err) - } else { - pW.Close() + return } + pW.Close() }() return pR, nil From c2c2dde4cbcb1db413c244c22ea189a60722ae2f Mon Sep 17 00:00:00 2001 From: Vincent Batts Date: Wed, 22 Jul 2015 10:27:53 -0400 Subject: [PATCH 09/95] tar/storage: use `filepath` instead of `path` --- tar/storage/getter.go | 13 ++++++------- tar/storage/packer.go | 6 +++--- 2 files changed, 9 insertions(+), 10 deletions(-) diff --git a/tar/storage/getter.go b/tar/storage/getter.go index 5d46e6a..c44b15e 100644 --- a/tar/storage/getter.go +++ b/tar/storage/getter.go @@ -7,23 +7,23 @@ import ( "io" "io/ioutil" "os" - "path" + "path/filepath" ) // FileGetter is the interface for getting a stream of a file payload, address -// by name/filepath. Presumably, the names will be scoped to relative file +// by name/filename. Presumably, the names will be scoped to relative file // paths. type FileGetter interface { // Get returns a stream for the provided file path - Get(filepath string) (output io.ReadCloser, err error) + Get(filename string) (output io.ReadCloser, err error) } // FilePutter is the interface for storing a stream of a file payload, -// addressed by name/filepath. +// addressed by name/filename. type FilePutter interface { // Put returns the size of the stream received, and the crc64 checksum for // the provided stream - Put(filepath string, input io.Reader) (size int64, checksum []byte, err error) + Put(filename string, input io.Reader) (size int64, checksum []byte, err error) } // FileGetPutter is the interface that groups both Getting and Putting file @@ -44,8 +44,7 @@ type pathFileGetter struct { } func (pfg pathFileGetter) Get(filename string) (io.ReadCloser, error) { - // FIXME might should have a check for '../../../../etc/passwd' attempts? - return os.Open(path.Join(pfg.root, filename)) + return os.Open(filepath.Join(pfg.root, filename)) } type bufferFileGetPutter struct { diff --git a/tar/storage/packer.go b/tar/storage/packer.go index 6c4364b..c0070a6 100644 --- a/tar/storage/packer.go +++ b/tar/storage/packer.go @@ -5,7 +5,7 @@ import ( "encoding/json" "errors" "io" - "path" + "path/filepath" ) // ErrDuplicatePath is occured when a tar archive has more than one entry for @@ -61,7 +61,7 @@ func (jup *jsonUnpacker) Next() (*Entry, error) { // check for dup name if e.Type == FileType { - cName := path.Clean(e.Name) + cName := filepath.Clean(e.Name) if _, ok := jup.seen[cName]; ok { return nil, ErrDuplicatePath } @@ -99,7 +99,7 @@ const emptyByte byte = 0 func (jp *jsonPacker) AddEntry(e Entry) (int, error) { // check early for dup name if e.Type == FileType { - cName := path.Clean(e.Name) + cName := filepath.Clean(e.Name) if _, ok := jp.seen[cName]; ok { return -1, ErrDuplicatePath } From e0e9886972e2ab8ae9190428d1d3030c5c80c483 Mon Sep 17 00:00:00 2001 From: Vincent Batts Date: Wed, 22 Jul 2015 11:32:18 -0400 Subject: [PATCH 10/95] tar/asm: return instead of break https://github.com/vbatts/docker/commit/5ddec2ae4a74552cb358bae90e272398a957cfc4#commitcomment-12290378 Reported-by: Tibor Vass --- tar/asm/assemble.go | 10 +++++----- tar/asm/assemble_test.go | 9 +-------- 2 files changed, 6 insertions(+), 13 deletions(-) diff --git a/tar/asm/assemble.go b/tar/asm/assemble.go index d18bfc5..1bef97b 100644 --- a/tar/asm/assemble.go +++ b/tar/asm/assemble.go @@ -27,13 +27,13 @@ func NewOutputTarStream(fg storage.FileGetter, up storage.Unpacker) io.ReadClose entry, err := up.Next() if err != nil { pw.CloseWithError(err) - break + return } switch entry.Type { case storage.SegmentType: if _, err := pw.Write(entry.Payload); err != nil { pw.CloseWithError(err) - break + return } case storage.FileType: if entry.Size == 0 { @@ -42,14 +42,14 @@ func NewOutputTarStream(fg storage.FileGetter, up storage.Unpacker) io.ReadClose fh, err := fg.Get(entry.Name) if err != nil { pw.CloseWithError(err) - break + return } c := crc64.New(storage.CRCTable) tRdr := io.TeeReader(fh, c) if _, err := io.Copy(pw, tRdr); err != nil { fh.Close() pw.CloseWithError(err) - break + return } if !bytes.Equal(c.Sum(nil), entry.Payload) { // I would rather this be a comparable ErrInvalidChecksum or such, @@ -57,7 +57,7 @@ func NewOutputTarStream(fg storage.FileGetter, up storage.Unpacker) io.ReadClose // _which_ file would be lost... fh.Close() pw.CloseWithError(fmt.Errorf("file integrity checksum failed for %q", entry.Name)) - break + return } fh.Close() } diff --git a/tar/asm/assemble_test.go b/tar/asm/assemble_test.go index c3bda7a..7cf44dc 100644 --- a/tar/asm/assemble_test.go +++ b/tar/asm/assemble_test.go @@ -110,10 +110,6 @@ func TestTarStreamMangledGetterPutter(t *testing.T) { } } } - - // TODO test a mangled relative path assembly - // next we'll use these to produce a tar stream. - //_ = NewOutputTarStream(fgp, nil) } func TestTarStream(t *testing.T) { @@ -171,10 +167,7 @@ func TestTarStream(t *testing.T) { rc := NewOutputTarStream(fgp, sup) h1 := sha1.New() - tRdr1 := io.TeeReader(rc, h1) - - // read it all to the bit bucket - i, err = io.Copy(ioutil.Discard, tRdr1) + i, err = io.Copy(h1, rc) if err != nil { t.Fatal(err) } From 002d19f0b0eaac11e4e811097dae981f3cd64331 Mon Sep 17 00:00:00 2001 From: Jonathan Boulle Date: Tue, 23 Jun 2015 13:13:29 -0700 Subject: [PATCH 11/95] *: clean up assorted spelling/grammar issues Various minor fixes noticed on walking through --- tar/asm/assemble.go | 2 +- tar/asm/disassemble.go | 6 +++--- tar/storage/doc.go | 2 +- tar/storage/entry.go | 6 +++--- tar/storage/getter.go | 8 ++++---- tar/storage/packer.go | 6 +++--- 6 files changed, 15 insertions(+), 15 deletions(-) diff --git a/tar/asm/assemble.go b/tar/asm/assemble.go index 1bef97b..b421db0 100644 --- a/tar/asm/assemble.go +++ b/tar/asm/assemble.go @@ -9,7 +9,7 @@ import ( "github.com/vbatts/tar-split/tar/storage" ) -// NewOutputTarStream returns an io.ReadCloser that is an assemble tar archive +// NewOutputTarStream returns an io.ReadCloser that is an assembled tar archive // stream. // // It takes a storage.FileGetter, for mapping the file payloads that are to be read in, diff --git a/tar/asm/disassemble.go b/tar/asm/disassemble.go index 785e194..4a8ed94 100644 --- a/tar/asm/disassemble.go +++ b/tar/asm/disassemble.go @@ -22,8 +22,8 @@ func NewInputTarStream(r io.Reader, p storage.Packer, fp storage.FilePutter) (io // What to do here... folks will want their own access to the Reader that is // their tar archive stream, but we'll need that same stream to use our // forked 'archive/tar'. - // Perhaps do an io.TeeReader that hand back an io.Reader for them to read - // from, and we'll mitm the stream to store metadata. + // Perhaps do an io.TeeReader that hands back an io.Reader for them to read + // from, and we'll MITM the stream to store metadata. // We'll need a storage.FilePutter too ... // Another concern, whether to do any storage.FilePutter operations, such that we @@ -32,7 +32,7 @@ func NewInputTarStream(r io.Reader, p storage.Packer, fp storage.FilePutter) (io // Perhaps we have a DiscardFilePutter that is a bit bucket. // we'll return the pipe reader, since TeeReader does not buffer and will - // only read what the outputRdr Read's. Since Tar archive's have padding on + // only read what the outputRdr Read's. Since Tar archives have padding on // the end, we want to be the one reading the padding, even if the user's // `archive/tar` doesn't care. pR, pW := io.Pipe() diff --git a/tar/storage/doc.go b/tar/storage/doc.go index 57b61bc..83f7089 100644 --- a/tar/storage/doc.go +++ b/tar/storage/doc.go @@ -5,7 +5,7 @@ Packing and unpacking the Entries of the stream. The types of streams are either segments of raw bytes (for the raw headers and various padding) and for an entry marking a file payload. -The raw bytes are stored precisely in the packed (marshalled) Entry. Where as +The raw bytes are stored precisely in the packed (marshalled) Entry, whereas the file payload marker include the name of the file, size, and crc64 checksum (for basic file integrity). */ diff --git a/tar/storage/entry.go b/tar/storage/entry.go index 961af49..57a0256 100644 --- a/tar/storage/entry.go +++ b/tar/storage/entry.go @@ -19,11 +19,11 @@ const ( // SegmentType represents a raw bytes segment from the archive stream. These raw // byte segments consist of the raw headers and various padding. // - // It's payload is to be marshalled base64 encoded. + // Its payload is to be marshalled base64 encoded. SegmentType ) -// Entry is a the structure for packing and unpacking the information read from +// Entry is the structure for packing and unpacking the information read from // the Tar archive. // // FileType Payload checksum is using `hash/crc64` for basic file integrity, @@ -34,6 +34,6 @@ type Entry struct { Type Type `json:"type"` Name string `json:"name",omitempty` Size int64 `json:"size",omitempty` - Payload []byte `json:"payload"` // SegmentType store payload here; FileType store crc64 checksum here; + Payload []byte `json:"payload"` // SegmentType stores payload here; FileType stores crc64 checksum here; Position int `json:"position"` } diff --git a/tar/storage/getter.go b/tar/storage/getter.go index c44b15e..ae110c6 100644 --- a/tar/storage/getter.go +++ b/tar/storage/getter.go @@ -10,9 +10,9 @@ import ( "path/filepath" ) -// FileGetter is the interface for getting a stream of a file payload, address -// by name/filename. Presumably, the names will be scoped to relative file -// paths. +// FileGetter is the interface for getting a stream of a file payload, +// addressed by name/filename. Presumably, the names will be scoped to relative +// file paths. type FileGetter interface { // Get returns a stream for the provided file path Get(filename string) (output io.ReadCloser, err error) @@ -77,7 +77,7 @@ type readCloserWrapper struct { func (w *readCloserWrapper) Close() error { return nil } -// NewBufferFileGetPutter is simple in memory FileGetPutter +// NewBufferFileGetPutter is a simple in-memory FileGetPutter // // Implication is this is memory intensive... // Probably best for testing or light weight cases. diff --git a/tar/storage/packer.go b/tar/storage/packer.go index c0070a6..584978e 100644 --- a/tar/storage/packer.go +++ b/tar/storage/packer.go @@ -8,8 +8,8 @@ import ( "path/filepath" ) -// ErrDuplicatePath is occured when a tar archive has more than one entry for -// the same file path +// ErrDuplicatePath occurs when a tar archive has more than one entry for the +// same file path var ErrDuplicatePath = errors.New("duplicates of file paths not supported") // Packer describes the methods to pack Entries to a storage destination @@ -117,7 +117,7 @@ func (jp *jsonPacker) AddEntry(e Entry) (int, error) { return e.Position, nil } -// NewJSONPacker provides an Packer that writes each Entry (SegmentType and +// NewJSONPacker provides a Packer that writes each Entry (SegmentType and // FileType) as a json document. // // The Entries are delimited by new line. From caf6a872c9ac84f4c0e57d4170a6eaca6a07450e Mon Sep 17 00:00:00 2001 From: Jonathan Boulle Date: Tue, 23 Jun 2015 13:13:54 -0700 Subject: [PATCH 12/95] tar/storage: switch to map[string]struct{} for set Using an empty struct is more idiomatic/efficient for representing a set-like container. --- tar/storage/packer.go | 10 +++------- 1 file changed, 3 insertions(+), 7 deletions(-) diff --git a/tar/storage/packer.go b/tar/storage/packer.go index 584978e..a02a19a 100644 --- a/tar/storage/packer.go +++ b/tar/storage/packer.go @@ -65,7 +65,7 @@ func (jup *jsonUnpacker) Next() (*Entry, error) { if _, ok := jup.seen[cName]; ok { return nil, ErrDuplicatePath } - jup.seen[cName] = emptyByte + jup.seen[cName] = struct{}{} } return &e, err @@ -90,11 +90,7 @@ type jsonPacker struct { seen seenNames } -type seenNames map[string]byte - -// used in the seenNames map. byte is a uint8, and we'll re-use the same one -// for minimalism. -const emptyByte byte = 0 +type seenNames map[string]struct{} func (jp *jsonPacker) AddEntry(e Entry) (int, error) { // check early for dup name @@ -103,7 +99,7 @@ func (jp *jsonPacker) AddEntry(e Entry) (int, error) { if _, ok := jp.seen[cName]; ok { return -1, ErrDuplicatePath } - jp.seen[cName] = emptyByte + jp.seen[cName] = struct{}{} } e.Position = jp.pos From de37d1755a80f132275a05bec7c5cd4b67f2bbc9 Mon Sep 17 00:00:00 2001 From: Vincent Batts Date: Tue, 28 Jul 2015 15:45:24 -0400 Subject: [PATCH 13/95] travis: incorrect comment --- .travis.yml | 1 - 1 file changed, 1 deletion(-) diff --git a/.travis.yml b/.travis.yml index fc1571c..21d6684 100644 --- a/.travis.yml +++ b/.travis.yml @@ -6,7 +6,6 @@ go: # let us have pretty, fast Docker-based Travis workers! sudo: false -# we don't need "go get" here <3 install: go get -d ./... script: From f465e4720e45ad7c6f4849bd74af7e27fefabc18 Mon Sep 17 00:00:00 2001 From: Vincent Batts Date: Tue, 28 Jul 2015 17:16:04 -0400 Subject: [PATCH 14/95] cmd/tar-split: adding to the README --- cmd/tar-split/README.md | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/cmd/tar-split/README.md b/cmd/tar-split/README.md index 5451be0..eae23c2 100644 --- a/cmd/tar-split/README.md +++ b/cmd/tar-split/README.md @@ -1,5 +1,8 @@ -## tar-split utility +# tar-split utility +## Installation + + go get -u github.com/vbatts/tar-split/cmd/tar-split ## Usage @@ -9,14 +12,14 @@ $ sha256sum archive.tar d734a748db93ec873392470510b8a1c88929abd8fae2540dc43d5b26f7537868 archive.tar $ mkdir ./x -$ tar-split d --output tar-data.json.gz ./archive.tar | tar -C ./x -x +$ tar-split disasm --output tar-data.json.gz ./archive.tar | tar -C ./x -x time="2015-07-20T15:45:04-04:00" level=info msg="created tar-data.json.gz from ./archive.tar (read 204800 bytes)" ``` ### Assembly ```bash -$ tar-split a --output new.tar --input ./tar-data.json.gz --path ./x/ +$ tar-split asm --output new.tar --input ./tar-data.json.gz --path ./x/ INFO[0000] created new.tar from ./x/ and ./tar-data.json.gz (wrote 204800 bytes) $ sha256sum new.tar d734a748db93ec873392470510b8a1c88929abd8fae2540dc43d5b26f7537868 new.tar From 779e824d762aa1d96a5845f86c0fe702c6bd29d0 Mon Sep 17 00:00:00 2001 From: Vincent Batts Date: Mon, 10 Aug 2015 15:24:51 -0400 Subject: [PATCH 15/95] README: formatting and cleanup --- DESIGN.md | 36 ----------------- README.md | 98 ++++++----------------------------------------- concept/DESIGN.md | 94 +++++++++++++++++++++++++++++++++++++++++++++ 3 files changed, 106 insertions(+), 122 deletions(-) delete mode 100644 DESIGN.md create mode 100644 concept/DESIGN.md diff --git a/DESIGN.md b/DESIGN.md deleted file mode 100644 index 1ce3fd4..0000000 --- a/DESIGN.md +++ /dev/null @@ -1,36 +0,0 @@ -Flow of TAR stream -================== - -The underlying use of `github.com/vbatts/tar-split/archive/tar` is most similar -to stdlib. - - -Packer interface ----------------- - -For ease of storage and usage of the raw bytes, there will be a storage -interface, that accepts an io.Writer (This way you could pass it an in memory -buffer or a file handle). - -Having a Packer interface can allow configuration of hash.Hash for file payloads -and providing your own io.Writer. - -Instead of having a state directory to store all the header information for all -Readers, we will leave that up to user of Reader. Because we can not assume an -ID for each Reader, and keeping that information differentiated. - - - -State Directory ---------------- - -Perhaps we could deduplicate the header info, by hashing the rawbytes and -storing them in a directory tree like: - - ./ac/dc/beef - -Then reference the hash of the header info, in the positional records for the -tar stream. Though this could be a future feature, and not required for an -initial implementation. Also, this would imply an owned state directory, rather -than just writing storage info to an io.Writer. - diff --git a/README.md b/README.md index c5e9a71..fd78ad7 100644 --- a/README.md +++ b/README.md @@ -1,5 +1,4 @@ -tar-split -======== +# tar-split [![Build Status](https://travis-ci.org/vbatts/tar-split.svg?branch=master)](https://travis-ci.org/vbatts/tar-split) @@ -9,17 +8,13 @@ bytes of the TAR, rather than just the marshalled headers and file stream. The goal being that by preserving the raw bytes of each header, padding bytes, and the raw file payload, one could reassemble the original archive. - -Docs ----- +## Docs * https://godoc.org/github.com/vbatts/tar-split/tar/asm * https://godoc.org/github.com/vbatts/tar-split/tar/storage * https://godoc.org/github.com/vbatts/tar-split/archive/tar - -Caveat ------- +## Caveat Eventually this should detect TARs that this is not possible with. @@ -37,85 +32,19 @@ same path, we will not support this feature. If there are more than one entries with the same path, expect an err (like `ErrDuplicatePath`) or a resulting tar stream that does not validate your original checksum/signature. +## Contract -Contract --------- +Do not break the API of stdlib `archive/tar` in our fork (ideally find an upstream mergeable solution). -Do not break the API of stdlib `archive/tar` in our fork (ideally find an -upstream mergeable solution) - - -Std Version ------------ +## Std Version The version of golang stdlib `archive/tar` is from go1.4.1, and their master branch around [a9dddb53f](https://github.com/golang/go/tree/a9dddb53f) +## Concept -Example -------- +See the [design](concept/DESIGN.md). -First we'll get an archive to work with. For repeatability, we'll make an -archive from what you've just cloned: - -``` -git archive --format=tar -o tar-split.tar HEAD . -``` - -Then build the example main.go: - -``` -go build ./main.go -``` - -Now run the example over the archive: - -``` -$ ./main tar-split.tar -2015/02/20 15:00:58 writing "tar-split.tar" to "tar-split.tar.out" -pax_global_header pre: 512 read: 52 -.travis.yml pre: 972 read: 374 -DESIGN.md pre: 650 read: 1131 -LICENSE pre: 917 read: 1075 -README.md pre: 973 read: 4289 -archive/ pre: 831 read: 0 -archive/tar/ pre: 512 read: 0 -archive/tar/common.go pre: 512 read: 7790 -[...] -tar/storage/entry_test.go pre: 667 read: 1137 -tar/storage/getter.go pre: 911 read: 2741 -tar/storage/getter_test.go pre: 843 read: 1491 -tar/storage/packer.go pre: 557 read: 3141 -tar/storage/packer_test.go pre: 955 read: 3096 -EOF padding: 1512 -Remainder: 512 -Size: 215040; Sum: 215040 -``` - -*What are we seeing here?* - -* `pre` is the header of a file entry, and potentially the padding from the - end of the prior file's payload. Also with particular tar extensions and pax - attributes, the header can exceed 512 bytes. -* `read` is the size of the file payload from the entry -* `EOF padding` is the expected 1024 null bytes on the end of a tar archive, - plus potential padding from the end of the prior file entry's payload -* `Remainder` is the remaining bytes of an archive. This is typically deadspace - as most tar implmentations will return after having reached the end of the - 1024 null bytes. Though various implementations will include some amount of - bytes here, which will affect the checksum of the resulting tar archive, - therefore this must be accounted for as well. - -Ideally the input tar and output `*.out`, will match: - -``` -$ sha1sum tar-split.tar* -ca9e19966b892d9ad5960414abac01ef585a1e22 tar-split.tar -ca9e19966b892d9ad5960414abac01ef585a1e22 tar-split.tar.out -``` - - -Stored Metadata ---------------- +## Stored Metadata Since the raw bytes of the headers and padding are stored, you may be wondering what the size implications are. The headers are at least 512 bytes per @@ -163,8 +92,7 @@ bytes-per-file rate for the storage implications. | ~ 1kb per/file | 0.06kb per/file | -What's Next? ------------- +## What's Next? * More implementations of storage Packer and Unpacker - could be a redis or mongo backend @@ -173,9 +101,7 @@ What's Next? * cli tooling to assemble/disassemble a provided tar archive * would be interesting to have an assembler stream that implements `io.Seeker` -License -------- - -See LICENSE +## License +See [LICENSE](LICENSE) diff --git a/concept/DESIGN.md b/concept/DESIGN.md new file mode 100644 index 0000000..4bfa82c --- /dev/null +++ b/concept/DESIGN.md @@ -0,0 +1,94 @@ +# Flow of TAR stream + +## `./archive/tar` + +The import path `github.com/vbatts/tar-split/archive/tar` is fork of upstream golang stdlib [`archive/tar`](http://golang.org/pkg/archive/tar/). +It adds plumbing to access raw bytes of the tar stream as the headers and payload are read. + +## Packer interface + +For ease of storage and usage of the raw bytes, there will be a storage +interface, that accepts an io.Writer (This way you could pass it an in memory +buffer or a file handle). + +Having a Packer interface can allow configuration of hash.Hash for file payloads +and providing your own io.Writer. + +Instead of having a state directory to store all the header information for all +Readers, we will leave that up to user of Reader. Because we can not assume an +ID for each Reader, and keeping that information differentiated. + +## State Directory + +Perhaps we could deduplicate the header info, by hashing the rawbytes and +storing them in a directory tree like: + + ./ac/dc/beef + +Then reference the hash of the header info, in the positional records for the +tar stream. Though this could be a future feature, and not required for an +initial implementation. Also, this would imply an owned state directory, rather +than just writing storage info to an io.Writer. + +## Concept Example + +First we'll get an archive to work with. For repeatability, we'll make an +archive from what you've just cloned: + +``` +git archive --format=tar -o tar-split.tar HEAD . +``` + +Then build the example main.go: + +``` +go build ./main.go +``` + +Now run the example over the archive: + +``` +$ ./main tar-split.tar +2015/02/20 15:00:58 writing "tar-split.tar" to "tar-split.tar.out" +pax_global_header pre: 512 read: 52 +.travis.yml pre: 972 read: 374 +DESIGN.md pre: 650 read: 1131 +LICENSE pre: 917 read: 1075 +README.md pre: 973 read: 4289 +archive/ pre: 831 read: 0 +archive/tar/ pre: 512 read: 0 +archive/tar/common.go pre: 512 read: 7790 +[...] +tar/storage/entry_test.go pre: 667 read: 1137 +tar/storage/getter.go pre: 911 read: 2741 +tar/storage/getter_test.go pre: 843 read: 1491 +tar/storage/packer.go pre: 557 read: 3141 +tar/storage/packer_test.go pre: 955 read: 3096 +EOF padding: 1512 +Remainder: 512 +Size: 215040; Sum: 215040 +``` + +*What are we seeing here?* + +* `pre` is the header of a file entry, and potentially the padding from the + end of the prior file's payload. Also with particular tar extensions and pax + attributes, the header can exceed 512 bytes. +* `read` is the size of the file payload from the entry +* `EOF padding` is the expected 1024 null bytes on the end of a tar archive, + plus potential padding from the end of the prior file entry's payload +* `Remainder` is the remaining bytes of an archive. This is typically deadspace + as most tar implmentations will return after having reached the end of the + 1024 null bytes. Though various implementations will include some amount of + bytes here, which will affect the checksum of the resulting tar archive, + therefore this must be accounted for as well. + +Ideally the input tar and output `*.out`, will match: + +``` +$ sha1sum tar-split.tar* +ca9e19966b892d9ad5960414abac01ef585a1e22 tar-split.tar +ca9e19966b892d9ad5960414abac01ef585a1e22 tar-split.tar.out +``` + + From 5d0b967302380b6fb8b06a35070f2ddba5a296ff Mon Sep 17 00:00:00 2001 From: Vincent Batts Date: Mon, 10 Aug 2015 15:29:08 -0400 Subject: [PATCH 16/95] README: cleanup --- README.md | 21 ++++++++++++++++++--- 1 file changed, 18 insertions(+), 3 deletions(-) diff --git a/README.md b/README.md index fd78ad7..a8694cf 100644 --- a/README.md +++ b/README.md @@ -10,10 +10,20 @@ and the raw file payload, one could reassemble the original archive. ## Docs +Code API for libraries provided by `tar-split`: + * https://godoc.org/github.com/vbatts/tar-split/tar/asm * https://godoc.org/github.com/vbatts/tar-split/tar/storage * https://godoc.org/github.com/vbatts/tar-split/archive/tar +## Install + +The command line utilitiy is installable via: + +```bash +go get github.com/vbatts/tar-split/cmd/tar-split +``` + ## Caveat Eventually this should detect TARs that this is not possible with. @@ -40,7 +50,7 @@ Do not break the API of stdlib `archive/tar` in our fork (ideally find an upstre The version of golang stdlib `archive/tar` is from go1.4.1, and their master branch around [a9dddb53f](https://github.com/golang/go/tree/a9dddb53f) -## Concept +## Design See the [design](concept/DESIGN.md). @@ -52,10 +62,15 @@ file (sometimes more), at least 1024 null bytes on the end, and then various padding. This makes for a constant linear growth in the stored metadata, with a naive storage implementation. -Reusing our prior example's `tar-split.tar`, let's build the checksize.go example: +First we'll get an archive to work with. For repeatability, we'll make an +archive from what you've just cloned: ``` -go build ./checksize.go +git archive --format=tar -o tar-split.tar HEAD . +``` + +``` +go build ./checksize.go ``` ``` From 6c671d7267b5a9bd7d68dd1c460404fed794b696 Mon Sep 17 00:00:00 2001 From: Vincent Batts Date: Mon, 10 Aug 2015 16:20:22 -0400 Subject: [PATCH 17/95] cmd/tar-split: make `checksize` a sub-command Moving it from top-level to the `tar-split` command --- README.md | 14 ++- cmd/tar-split/asm.go | 64 ++++++++++++ checksize.go => cmd/tar-split/checksize.go | 24 ++--- cmd/tar-split/disasm.go | 56 +++++++++++ cmd/tar-split/main.go | 112 +++------------------ 5 files changed, 149 insertions(+), 121 deletions(-) create mode 100644 cmd/tar-split/asm.go rename checksize.go => cmd/tar-split/checksize.go (83%) create mode 100644 cmd/tar-split/disasm.go diff --git a/README.md b/README.md index a8694cf..6c3d032 100644 --- a/README.md +++ b/README.md @@ -65,16 +65,13 @@ naive storage implementation. First we'll get an archive to work with. For repeatability, we'll make an archive from what you've just cloned: -``` +```bash git archive --format=tar -o tar-split.tar HEAD . ``` -``` -go build ./checksize.go -``` - -``` -$ ./checksize ./tar-split.tar +```bash +$ go get github.com/vbatts/tar-split/cmd/tar-split +$ tar-split checksize ./tar-split.tar inspecting "tar-split.tar" (size 210k) -- number of files: 50 -- size of metadata uncompressed: 53k @@ -87,7 +84,7 @@ implications are as little as 3kb. But let's look at a larger archive, with many files. -``` +```bash $ ls -sh ./d.tar 1.4G ./d.tar $ ./checksize ~/d.tar @@ -116,6 +113,7 @@ bytes-per-file rate for the storage implications. * cli tooling to assemble/disassemble a provided tar archive * would be interesting to have an assembler stream that implements `io.Seeker` + ## License See [LICENSE](LICENSE) diff --git a/cmd/tar-split/asm.go b/cmd/tar-split/asm.go new file mode 100644 index 0000000..312e54b --- /dev/null +++ b/cmd/tar-split/asm.go @@ -0,0 +1,64 @@ +package main + +import ( + "compress/gzip" + "io" + "os" + + "github.com/Sirupsen/logrus" + "github.com/codegangsta/cli" + "github.com/vbatts/tar-split/tar/asm" + "github.com/vbatts/tar-split/tar/storage" +) + +func CommandAsm(c *cli.Context) { + if len(c.Args()) > 0 { + logrus.Warnf("%d additional arguments passed are ignored", len(c.Args())) + } + if len(c.String("input")) == 0 { + logrus.Fatalf("--input filename must be set") + } + if len(c.String("output")) == 0 { + logrus.Fatalf("--output filename must be set ([FILENAME|-])") + } + if len(c.String("path")) == 0 { + logrus.Fatalf("--path must be set") + } + + var outputStream io.Writer + if c.String("output") == "-" { + outputStream = os.Stdout + } else { + fh, err := os.Create(c.String("output")) + if err != nil { + logrus.Fatal(err) + } + defer fh.Close() + outputStream = fh + } + + // Get the tar metadata reader + mf, err := os.Open(c.String("input")) + if err != nil { + logrus.Fatal(err) + } + defer mf.Close() + mfz, err := gzip.NewReader(mf) + if err != nil { + logrus.Fatal(err) + } + defer mfz.Close() + + metaUnpacker := storage.NewJSONUnpacker(mfz) + // XXX maybe get the absolute path here + fileGetter := storage.NewPathFileGetter(c.String("path")) + + ots := asm.NewOutputTarStream(fileGetter, metaUnpacker) + defer ots.Close() + i, err := io.Copy(outputStream, ots) + if err != nil { + logrus.Fatal(err) + } + + logrus.Infof("created %s from %s and %s (wrote %d bytes)", c.String("output"), c.String("path"), c.String("input"), i) +} diff --git a/checksize.go b/cmd/tar-split/checksize.go similarity index 83% rename from checksize.go rename to cmd/tar-split/checksize.go index a6d3c08..38f830e 100644 --- a/checksize.go +++ b/cmd/tar-split/checksize.go @@ -1,29 +1,25 @@ -// +build ignore - package main import ( "archive/tar" "compress/gzip" - "flag" "fmt" "io" "io/ioutil" "log" "os" + "github.com/Sirupsen/logrus" + "github.com/codegangsta/cli" "github.com/vbatts/tar-split/tar/asm" "github.com/vbatts/tar-split/tar/storage" ) -var ( - flCleanup = flag.Bool("cleanup", true, "cleanup tempfiles") -) - -func main() { - flag.Parse() - - for _, arg := range flag.Args() { +func CommandChecksize(c *cli.Context) { + if len(c.Args()) == 0 { + logrus.Fatalf("please specify tar archives to check ('-' will check stdin)") + } + for _, arg := range c.Args() { fh, err := os.Open(arg) if err != nil { log.Fatal(err) @@ -40,8 +36,10 @@ func main() { log.Fatal(err) } defer packFh.Close() - if *flCleanup { + if !c.Bool("work") { defer os.Remove(packFh.Name()) + } else { + fmt.Printf(" -- working file preserved: %s\n", packFh.Name()) } sp := storage.NewJSONPacker(packFh) @@ -83,7 +81,7 @@ func main() { log.Fatal(err) } defer gzPackFh.Close() - if *flCleanup { + if !c.Bool("work") { defer os.Remove(gzPackFh.Name()) } diff --git a/cmd/tar-split/disasm.go b/cmd/tar-split/disasm.go new file mode 100644 index 0000000..b7b0dfe --- /dev/null +++ b/cmd/tar-split/disasm.go @@ -0,0 +1,56 @@ +package main + +import ( + "compress/gzip" + "io" + "os" + + "github.com/Sirupsen/logrus" + "github.com/codegangsta/cli" + "github.com/vbatts/tar-split/tar/asm" + "github.com/vbatts/tar-split/tar/storage" +) + +func CommandDisasm(c *cli.Context) { + if len(c.Args()) != 1 { + logrus.Fatalf("please specify tar to be disabled ") + } + if len(c.String("output")) == 0 { + logrus.Fatalf("--output filename must be set") + } + + // Set up the tar input stream + var inputStream io.Reader + if c.Args()[0] == "-" { + inputStream = os.Stdin + } else { + fh, err := os.Open(c.Args()[0]) + if err != nil { + logrus.Fatal(err) + } + defer fh.Close() + inputStream = fh + } + + // Set up the metadata storage + mf, err := os.OpenFile(c.String("output"), os.O_CREATE|os.O_WRONLY|os.O_TRUNC, os.FileMode(0600)) + if err != nil { + logrus.Fatal(err) + } + defer mf.Close() + mfz := gzip.NewWriter(mf) + defer mfz.Close() + metaPacker := storage.NewJSONPacker(mfz) + + // we're passing nil here for the file putter, because the ApplyDiff will + // handle the extraction of the archive + its, err := asm.NewInputTarStream(inputStream, metaPacker, nil) + if err != nil { + logrus.Fatal(err) + } + i, err := io.Copy(os.Stdout, its) + if err != nil { + logrus.Fatal(err) + } + logrus.Infof("created %s from %s (read %d bytes)", c.String("output"), c.Args()[0], i) +} diff --git a/cmd/tar-split/main.go b/cmd/tar-split/main.go index 8c631b0..f24dce5 100644 --- a/cmd/tar-split/main.go +++ b/cmd/tar-split/main.go @@ -2,14 +2,10 @@ package main import ( - "compress/gzip" - "io" "os" "github.com/Sirupsen/logrus" "github.com/codegangsta/cli" - "github.com/vbatts/tar-split/tar/asm" - "github.com/vbatts/tar-split/tar/storage" ) func main() { @@ -71,105 +67,21 @@ func main() { }, }, }, + { + Name: "checksize", + Usage: "displays size estimates for metadata storage of a Tar archive", + Action: CommandChecksize, + Flags: []cli.Flag{ + cli.BoolFlag{ + Name: "work", + Usage: "do not delete the working directory", + // defaults to false + }, + }, + }, } if err := app.Run(os.Args); err != nil { logrus.Fatal(err) } } - -func CommandDisasm(c *cli.Context) { - if len(c.Args()) != 1 { - logrus.Fatalf("please specify tar to be disabled ") - } - if len(c.String("output")) == 0 { - logrus.Fatalf("--output filename must be set") - } - - // Set up the tar input stream - var inputStream io.Reader - if c.Args()[0] == "-" { - inputStream = os.Stdin - } else { - fh, err := os.Open(c.Args()[0]) - if err != nil { - logrus.Fatal(err) - } - defer fh.Close() - inputStream = fh - } - - // Set up the metadata storage - mf, err := os.OpenFile(c.String("output"), os.O_CREATE|os.O_WRONLY|os.O_TRUNC, os.FileMode(0600)) - if err != nil { - logrus.Fatal(err) - } - defer mf.Close() - mfz := gzip.NewWriter(mf) - defer mfz.Close() - metaPacker := storage.NewJSONPacker(mfz) - - // we're passing nil here for the file putter, because the ApplyDiff will - // handle the extraction of the archive - its, err := asm.NewInputTarStream(inputStream, metaPacker, nil) - if err != nil { - logrus.Fatal(err) - } - i, err := io.Copy(os.Stdout, its) - if err != nil { - logrus.Fatal(err) - } - logrus.Infof("created %s from %s (read %d bytes)", c.String("output"), c.Args()[0], i) -} - -func CommandAsm(c *cli.Context) { - if len(c.Args()) > 0 { - logrus.Warnf("%d additional arguments passed are ignored", len(c.Args())) - } - if len(c.String("input")) == 0 { - logrus.Fatalf("--input filename must be set") - } - if len(c.String("output")) == 0 { - logrus.Fatalf("--output filename must be set ([FILENAME|-])") - } - if len(c.String("path")) == 0 { - logrus.Fatalf("--path must be set") - } - - var outputStream io.Writer - if c.String("output") == "-" { - outputStream = os.Stdout - } else { - fh, err := os.Create(c.String("output")) - if err != nil { - logrus.Fatal(err) - } - defer fh.Close() - outputStream = fh - } - - // Get the tar metadata reader - mf, err := os.Open(c.String("input")) - if err != nil { - logrus.Fatal(err) - } - defer mf.Close() - mfz, err := gzip.NewReader(mf) - if err != nil { - logrus.Fatal(err) - } - defer mfz.Close() - - metaUnpacker := storage.NewJSONUnpacker(mfz) - // XXX maybe get the absolute path here - fileGetter := storage.NewPathFileGetter(c.String("path")) - - ots := asm.NewOutputTarStream(fileGetter, metaUnpacker) - defer ots.Close() - i, err := io.Copy(outputStream, ots) - if err != nil { - logrus.Fatal(err) - } - - logrus.Infof("created %s from %s and %s (wrote %d bytes)", c.String("output"), c.String("path"), c.String("input"), i) -} From c6be94f8a32be7147630a87b7ee39f41f82421c4 Mon Sep 17 00:00:00 2001 From: Vincent Batts Date: Mon, 10 Aug 2015 16:22:36 -0400 Subject: [PATCH 18/95] cmd/tar-split: README usage for checksize --- cmd/tar-split/README.md | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/cmd/tar-split/README.md b/cmd/tar-split/README.md index eae23c2..02a2218 100644 --- a/cmd/tar-split/README.md +++ b/cmd/tar-split/README.md @@ -25,4 +25,15 @@ $ sha256sum new.tar d734a748db93ec873392470510b8a1c88929abd8fae2540dc43d5b26f7537868 new.tar ``` +### Estimating metadata size + +```bash +$ tar-split checksize ./archive.tar +inspecting "./archive.tar" (size 200k) + -- number of files: 28 + -- size of metadata uncompressed: 28k + -- size of gzip compressed metadata: 1k +``` + + From 0a79a3807ce50a84f8c0e0f567920ebce80c697f Mon Sep 17 00:00:00 2001 From: Vincent Batts Date: Mon, 10 Aug 2015 16:26:09 -0400 Subject: [PATCH 19/95] README: missed a checksize reference --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index 6c3d032..e37d36b 100644 --- a/README.md +++ b/README.md @@ -87,7 +87,7 @@ But let's look at a larger archive, with many files. ```bash $ ls -sh ./d.tar 1.4G ./d.tar -$ ./checksize ~/d.tar +$ tar-split checksize ~/d.tar inspecting "/home/vbatts/d.tar" (size 1420749k) -- number of files: 38718 -- size of metadata uncompressed: 43261k From 51b0481d4aecf1c051b1dfc942ab46986e776bef Mon Sep 17 00:00:00 2001 From: Vincent Batts Date: Mon, 3 Aug 2015 17:13:31 -0400 Subject: [PATCH 20/95] tar/asm: adding a failing test due to GNU LongLink --- tar/asm/assemble_test.go | 80 ++++++++++++++++++++++++++++--- tar/asm/testdata/longlink.tar.gz | Bin 0 -> 438 bytes 2 files changed, 74 insertions(+), 6 deletions(-) create mode 100644 tar/asm/testdata/longlink.tar.gz diff --git a/tar/asm/assemble_test.go b/tar/asm/assemble_test.go index 7cf44dc..e37d7f3 100644 --- a/tar/asm/assemble_test.go +++ b/tar/asm/assemble_test.go @@ -114,8 +114,8 @@ func TestTarStreamMangledGetterPutter(t *testing.T) { func TestTarStream(t *testing.T) { var ( - expectedSum = "1eb237ff69bca6e22789ecb05b45d35ca307adbd" - expectedSize int64 = 10240 + expectedSHA1Sum = "1eb237ff69bca6e22789ecb05b45d35ca307adbd" + expectedSize int64 = 10240 ) fh, err := os.Open("./testdata/t.tar.gz") @@ -153,8 +153,8 @@ func TestTarStream(t *testing.T) { if i != expectedSize { t.Errorf("size of tar: expected %d; got %d", expectedSize, i) } - if fmt.Sprintf("%x", h0.Sum(nil)) != expectedSum { - t.Fatalf("checksum of tar: expected %s; got %x", expectedSum, h0.Sum(nil)) + if fmt.Sprintf("%x", h0.Sum(nil)) != expectedSHA1Sum { + t.Fatalf("checksum of tar: expected %s; got %x", expectedSHA1Sum, h0.Sum(nil)) } t.Logf("%s", w.String()) // if we fail, then show the packed info @@ -175,7 +175,75 @@ func TestTarStream(t *testing.T) { if i != expectedSize { t.Errorf("size of output tar: expected %d; got %d", expectedSize, i) } - if fmt.Sprintf("%x", h1.Sum(nil)) != expectedSum { - t.Fatalf("checksum of output tar: expected %s; got %x", expectedSum, h1.Sum(nil)) + if fmt.Sprintf("%x", h1.Sum(nil)) != expectedSHA1Sum { + t.Fatalf("checksum of output tar: expected %s; got %x", expectedSHA1Sum, h1.Sum(nil)) + } +} + +func TestTarGNUTar(t *testing.T) { + var ( + expectedSHA1Sum = "d9f6babe107b7247953dff6b5b5ae31a3a880add" + expectedSize int64 = 20480 + ) + + fh, err := os.Open("./testdata/longlink.tar.gz") + if err != nil { + t.Fatal(err) + } + defer fh.Close() + gzRdr, err := gzip.NewReader(fh) + if err != nil { + t.Fatal(err) + } + defer gzRdr.Close() + + // Setup where we'll store the metadata + w := bytes.NewBuffer([]byte{}) + sp := storage.NewJSONPacker(w) + fgp := storage.NewBufferFileGetPutter() + + // wrap the disassembly stream + tarStream, err := NewInputTarStream(gzRdr, sp, fgp) + if err != nil { + t.Fatal(err) + } + + // get a sum of the stream after it has passed through to ensure it's the same. + h0 := sha1.New() + tRdr0 := io.TeeReader(tarStream, h0) + + // read it all to the bit bucket + i, err := io.Copy(ioutil.Discard, tRdr0) + if err != nil { + t.Fatal(err) + } + + if i != expectedSize { + t.Errorf("size of tar: expected %d; got %d", expectedSize, i) + } + if fmt.Sprintf("%x", h0.Sum(nil)) != expectedSHA1Sum { + t.Fatalf("checksum of tar: expected %s; got %x", expectedSHA1Sum, h0.Sum(nil)) + } + + t.Logf("%s", w.String()) // if we fail, then show the packed info + + // If we've made it this far, then we'll turn it around and create a tar + // stream from the packed metadata and buffered file contents. + r := bytes.NewBuffer(w.Bytes()) + sup := storage.NewJSONUnpacker(r) + // and reuse the fgp that we Put the payloads to. + + rc := NewOutputTarStream(fgp, sup) + h1 := sha1.New() + i, err = io.Copy(h1, rc) + if err != nil { + t.Fatal(err) + } + + if i != expectedSize { + t.Errorf("size of output tar: expected %d; got %d", expectedSize, i) + } + if fmt.Sprintf("%x", h1.Sum(nil)) != expectedSHA1Sum { + t.Fatalf("checksum of output tar: expected %s; got %x", expectedSHA1Sum, h1.Sum(nil)) } } diff --git a/tar/asm/testdata/longlink.tar.gz b/tar/asm/testdata/longlink.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..cb21db5f382892fd9c2e529e7baeff37efee7dbf GIT binary patch literal 438 zcmV;n0ZINJiwFokufJ6Q18i?@XKZP1Yc6zQaschxYihzk6hPrD#T6tolX?8@0^CGe zv6O;>NlTaCq*bU`jW=d)k)97EwFcCl>%@VURnx9lRgshFr4)vvO~LGs_Ure1g>@mM zHRk*+171 z$otoI^7Pkm(m%uHK;GZi*3w_Y@BQap3FQ5QbCLcUF7%&m2;}`^zXH%-!&v`0hCtpw znP}**;i~>O4#9-}!|C)_aDD&jLm=+&OtS3%RS5e}8UlHL8y)+96;t|4hd|!n1<(Fp z#pM2iA&~d4ePsWy;lS22J8(;*P`4}JgIn8^CCqO8j5VY%HrFJCsVnV#d&_{`Bbb36;f|9^!3 z``p61hzuRuROXrO}ey1{h{+o}d)q418?Dp61|G(4dKYXZT^n&%jztdm;jm96~ gpRfM_0000000000000000O0<70{(f!Y5-6G0O>IHkpKVy literal 0 HcmV?d00001 From df8572a1eb56cd5f77ec10482756113cdf42a915 Mon Sep 17 00:00:00 2001 From: Vincent Batts Date: Tue, 11 Aug 2015 15:51:19 -0400 Subject: [PATCH 21/95] tar/asm: check length before adding an entry --- tar/asm/disassemble.go | 31 ++++++++++++++++++------------- 1 file changed, 18 insertions(+), 13 deletions(-) diff --git a/tar/asm/disassemble.go b/tar/asm/disassemble.go index 4a8ed94..7986890 100644 --- a/tar/asm/disassemble.go +++ b/tar/asm/disassemble.go @@ -55,13 +55,15 @@ func NewInputTarStream(r io.Reader, p storage.Packer, fp storage.FilePutter) (io } // even when an EOF is reached, there is often 1024 null bytes on // the end of an archive. Collect them too. - _, err := p.AddEntry(storage.Entry{ - Type: storage.SegmentType, - Payload: tr.RawBytes(), - }) - if err != nil { - pW.CloseWithError(err) - return + if b := tr.RawBytes(); len(b) > 0 { + _, err := p.AddEntry(storage.Entry{ + Type: storage.SegmentType, + Payload: b, + }) + if err != nil { + pW.CloseWithError(err) + return + } } break // not return. We need the end of the reader. } @@ -69,12 +71,15 @@ func NewInputTarStream(r io.Reader, p storage.Packer, fp storage.FilePutter) (io break // not return. We need the end of the reader. } - if _, err := p.AddEntry(storage.Entry{ - Type: storage.SegmentType, - Payload: tr.RawBytes(), - }); err != nil { - pW.CloseWithError(err) - return + if b := tr.RawBytes(); len(b) > 0 { + _, err := p.AddEntry(storage.Entry{ + Type: storage.SegmentType, + Payload: b, + }) + if err != nil { + pW.CloseWithError(err) + return + } } var csum []byte From e46a815cbcaa5270acfb2893b66791150f4d2a87 Mon Sep 17 00:00:00 2001 From: Vincent Batts Date: Tue, 11 Aug 2015 15:51:52 -0400 Subject: [PATCH 22/95] archive/tar: fix carry-over of bytes for GNU types Archives produced with GNU tar can have types of TypeGNULongName and TypeGNULongLink. These fields effectively appear like two file entries in the tar archive. While golang's `archive/tar` transparently provide the file name and headers and file payload, the access to the raw bytes is still needed. This fixes the access to the longlink header, it's payload (of the long file path name), and the following file header and actual file payload. --- archive/tar/reader.go | 28 ++++++++++++++++++++++------ 1 file changed, 22 insertions(+), 6 deletions(-) diff --git a/archive/tar/reader.go b/archive/tar/reader.go index a89957e..f817956 100644 --- a/archive/tar/reader.go +++ b/archive/tar/reader.go @@ -154,44 +154,60 @@ func (tr *Reader) Next() (*Header, error) { } return hdr, nil case TypeGNULongName: + var b *bytes.Buffer + if tr.RawAccounting { + b = bytes.NewBuffer(tr.RawBytes()) + } // We have a GNU long name header. Its contents are the real file name. realname, err := ioutil.ReadAll(tr) if err != nil { return nil, err } - var b []byte if tr.RawAccounting { + if _, err = tr.rawBytes.Write(b.Bytes()); err != nil { + return nil, err + } if _, err = tr.rawBytes.Write(realname); err != nil { return nil, err } - b = tr.RawBytes() + b.Reset() + b.Write(tr.RawBytes()) } hdr, err := tr.Next() // since the above call to Next() resets the buffer, we need to throw the bytes over if tr.RawAccounting { - if _, err = tr.rawBytes.Write(b); err != nil { + b.Write(tr.RawBytes()) + if _, err = tr.rawBytes.Write(b.Bytes()); err != nil { return nil, err } } hdr.Name = cString(realname) return hdr, err case TypeGNULongLink: + var b *bytes.Buffer + if tr.RawAccounting { + b = bytes.NewBuffer(tr.RawBytes()) + } // We have a GNU long link header. realname, err := ioutil.ReadAll(tr) if err != nil { return nil, err } - var b []byte if tr.RawAccounting { + if _, err = tr.rawBytes.Write(b.Bytes()); err != nil { + return nil, err + } if _, err = tr.rawBytes.Write(realname); err != nil { return nil, err } - b = tr.RawBytes() + b.Reset() + b.Write(tr.RawBytes()) } hdr, err := tr.Next() // since the above call to Next() resets the buffer, we need to throw the bytes over if tr.RawAccounting { - if _, err = tr.rawBytes.Write(b); err != nil { + b.Write(tr.RawBytes()) + if _, err = tr.rawBytes.Write(b.Bytes()); err != nil { return nil, err } } From e6df23162ed7cea021fffb41f186ab2a382294a0 Mon Sep 17 00:00:00 2001 From: Alexander Morozov Date: Wed, 12 Aug 2015 16:46:04 -0700 Subject: [PATCH 23/95] Remove redundant TeeReader Signed-off-by: Alexander Morozov --- tar/storage/getter.go | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/tar/storage/getter.go b/tar/storage/getter.go index ae110c6..70fd378 100644 --- a/tar/storage/getter.go +++ b/tar/storage/getter.go @@ -5,7 +5,6 @@ import ( "errors" "hash/crc64" "io" - "io/ioutil" "os" "path/filepath" ) @@ -97,8 +96,7 @@ type bitBucketFilePutter struct { func (bbfp *bitBucketFilePutter) Put(name string, r io.Reader) (int64, []byte, error) { c := crc64.New(CRCTable) - tRdr := io.TeeReader(r, c) - i, err := io.Copy(ioutil.Discard, tRdr) + i, err := io.Copy(c, r) return i, c.Sum(nil), err } From b1783bc86d720d5bcd2497fbc0e72ea50f74b826 Mon Sep 17 00:00:00 2001 From: Alexander Morozov Date: Wed, 12 Aug 2015 22:41:28 -0700 Subject: [PATCH 24/95] storage: Fix syntax of json tags Signed-off-by: Alexander Morozov --- tar/storage/entry.go | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tar/storage/entry.go b/tar/storage/entry.go index 57a0256..38fe7ba 100644 --- a/tar/storage/entry.go +++ b/tar/storage/entry.go @@ -32,8 +32,8 @@ const ( // collisions in a sample of 18.2 million, CRC64 had none. type Entry struct { Type Type `json:"type"` - Name string `json:"name",omitempty` - Size int64 `json:"size",omitempty` + Name string `json:"name,omitempty"` + Size int64 `json:"size,omitempty"` Payload []byte `json:"payload"` // SegmentType stores payload here; FileType stores crc64 checksum here; Position int `json:"position"` } From 93c0a320a8d62789bf2ebe32bee12be2644d625c Mon Sep 17 00:00:00 2001 From: Alexander Morozov Date: Wed, 12 Aug 2015 22:45:39 -0700 Subject: [PATCH 25/95] asm: Remove unreachable code Signed-off-by: Alexander Morozov --- tar/asm/assemble.go | 1 - 1 file changed, 1 deletion(-) diff --git a/tar/asm/assemble.go b/tar/asm/assemble.go index b421db0..74317cb 100644 --- a/tar/asm/assemble.go +++ b/tar/asm/assemble.go @@ -62,7 +62,6 @@ func NewOutputTarStream(fg storage.FileGetter, up storage.Unpacker) io.ReadClose fh.Close() } } - pw.Close() }() return pr } From fa881b2347d337cf6c3cff04eecfea52ef8c8f09 Mon Sep 17 00:00:00 2001 From: Alexander Morozov Date: Wed, 12 Aug 2015 22:49:38 -0700 Subject: [PATCH 26/95] Add vet check to travis Signed-off-by: Alexander Morozov --- .travis.yml | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/.travis.yml b/.travis.yml index 21d6684..ee1645d 100644 --- a/.travis.yml +++ b/.travis.yml @@ -6,7 +6,10 @@ go: # let us have pretty, fast Docker-based Travis workers! sudo: false -install: go get -d ./... +install: + - go get -d ./... + - go get golang.org/x/tools/cmd/vet script: - go test -v ./... + - go vet ./... From ea73dc6f6fa236134d68544a93700a459358aee2 Mon Sep 17 00:00:00 2001 From: Alexander Morozov Date: Thu, 13 Aug 2015 11:42:14 -0700 Subject: [PATCH 27/95] tar/storage: Benchmark for bufferFileGetPutter.Put Signed-off-by: Alexander Morozov --- tar/storage/getter_test.go | 22 ++++++++++++++++++++++ 1 file changed, 22 insertions(+) diff --git a/tar/storage/getter_test.go b/tar/storage/getter_test.go index 5a6fcc7..c06cff0 100644 --- a/tar/storage/getter_test.go +++ b/tar/storage/getter_test.go @@ -2,7 +2,9 @@ package storage import ( "bytes" + "fmt" "io/ioutil" + "strings" "testing" ) @@ -39,6 +41,7 @@ func TestGetter(t *testing.T) { } } } + func TestPutter(t *testing.T) { fp := NewDiscardFilePutter() // map[filename]map[body]crc64sum @@ -60,3 +63,22 @@ func TestPutter(t *testing.T) { } } } + +func BenchmarkPutter(b *testing.B) { + files := []string{ + strings.Repeat("foo", 1000), + strings.Repeat("bar", 1000), + strings.Repeat("baz", 1000), + strings.Repeat("fooz", 1000), + strings.Repeat("vbatts", 1000), + strings.Repeat("systemd", 1000), + } + for i := 0; i < b.N; i++ { + fgp := NewBufferFileGetPutter() + for n, body := range files { + if _, _, err := fgp.Put(fmt.Sprintf("%d", n), bytes.NewBufferString(body)); err != nil { + b.Fatal(err) + } + } + } +} From 45399711c2466973d96d650eb2c9971fbf3816d7 Mon Sep 17 00:00:00 2001 From: Alexander Morozov Date: Thu, 13 Aug 2015 11:42:43 -0700 Subject: [PATCH 28/95] tar/storage: Replace TeeReader with MultiWriter It uses slightly less memory and more understandable. Benchmar results: benchmark old ns/op new ns/op delta BenchmarkPutter-4 57272 52375 -8.55% benchmark old allocs new allocs delta BenchmarkPutter-4 21 19 -9.52% benchmark old bytes new bytes delta BenchmarkPutter-4 19416 13336 -31.31% Signed-off-by: Alexander Morozov --- tar/storage/getter.go | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/tar/storage/getter.go b/tar/storage/getter.go index 70fd378..ae11f8f 100644 --- a/tar/storage/getter.go +++ b/tar/storage/getter.go @@ -59,15 +59,15 @@ func (bfgp bufferFileGetPutter) Get(name string) (io.ReadCloser, error) { } func (bfgp *bufferFileGetPutter) Put(name string, r io.Reader) (int64, []byte, error) { - c := crc64.New(CRCTable) - tRdr := io.TeeReader(r, c) - b := bytes.NewBuffer([]byte{}) - i, err := io.Copy(b, tRdr) + crc := crc64.New(CRCTable) + buf := bytes.NewBuffer(nil) + cw := io.MultiWriter(crc, buf) + i, err := io.Copy(cw, r) if err != nil { return 0, nil, err } - bfgp.files[name] = b.Bytes() - return i, c.Sum(nil), nil + bfgp.files[name] = buf.Bytes() + return i, crc.Sum(nil), nil } type readCloserWrapper struct { From 4d66163297403e1f4a85fa9601886eae31f551ac Mon Sep 17 00:00:00 2001 From: Vincent Batts Date: Thu, 13 Aug 2015 15:32:17 -0400 Subject: [PATCH 29/95] archive/tar: a []byte copy needed for GNU LongLink --- archive/tar/reader.go | 32 ++++++++++---------------------- 1 file changed, 10 insertions(+), 22 deletions(-) diff --git a/archive/tar/reader.go b/archive/tar/reader.go index f817956..c72e002 100644 --- a/archive/tar/reader.go +++ b/archive/tar/reader.go @@ -154,60 +154,48 @@ func (tr *Reader) Next() (*Header, error) { } return hdr, nil case TypeGNULongName: - var b *bytes.Buffer - if tr.RawAccounting { - b = bytes.NewBuffer(tr.RawBytes()) - } // We have a GNU long name header. Its contents are the real file name. realname, err := ioutil.ReadAll(tr) if err != nil { return nil, err } + var buf []byte if tr.RawAccounting { - if _, err = tr.rawBytes.Write(b.Bytes()); err != nil { - return nil, err - } if _, err = tr.rawBytes.Write(realname); err != nil { return nil, err } - b.Reset() - b.Write(tr.RawBytes()) + buf = make([]byte, tr.rawBytes.Len()) + copy(buf[:], tr.RawBytes()) } hdr, err := tr.Next() // since the above call to Next() resets the buffer, we need to throw the bytes over if tr.RawAccounting { - b.Write(tr.RawBytes()) - if _, err = tr.rawBytes.Write(b.Bytes()); err != nil { + buf = append(buf, tr.RawBytes()...) + if _, err = tr.rawBytes.Write(buf); err != nil { return nil, err } } hdr.Name = cString(realname) return hdr, err case TypeGNULongLink: - var b *bytes.Buffer - if tr.RawAccounting { - b = bytes.NewBuffer(tr.RawBytes()) - } // We have a GNU long link header. realname, err := ioutil.ReadAll(tr) if err != nil { return nil, err } + var buf []byte if tr.RawAccounting { - if _, err = tr.rawBytes.Write(b.Bytes()); err != nil { - return nil, err - } if _, err = tr.rawBytes.Write(realname); err != nil { return nil, err } - b.Reset() - b.Write(tr.RawBytes()) + buf = make([]byte, tr.rawBytes.Len()) + copy(buf[:], tr.RawBytes()) } hdr, err := tr.Next() // since the above call to Next() resets the buffer, we need to throw the bytes over if tr.RawAccounting { - b.Write(tr.RawBytes()) - if _, err = tr.rawBytes.Write(b.Bytes()); err != nil { + buf = append(buf, tr.RawBytes()...) + if _, err = tr.rawBytes.Write(buf); err != nil { return nil, err } } From c76e42010eb78200c826024cff1d7bba76082715 Mon Sep 17 00:00:00 2001 From: Vincent Batts Date: Fri, 14 Aug 2015 07:55:18 -0400 Subject: [PATCH 30/95] tar/asm: additional GNU LongLink testcase Adding a minimal test case for GNU @LongLink. Tested that it fails on v0.9.5, but now passes on v0.9.6 and master. --- tar/asm/assemble_test.go | 187 ++++++++++------------------ tar/asm/testdata/fatlonglink.tar.gz | Bin 0 -> 26402 bytes 2 files changed, 63 insertions(+), 124 deletions(-) create mode 100644 tar/asm/testdata/fatlonglink.tar.gz diff --git a/tar/asm/assemble_test.go b/tar/asm/assemble_test.go index e37d7f3..da515f2 100644 --- a/tar/asm/assemble_test.go +++ b/tar/asm/assemble_test.go @@ -113,137 +113,76 @@ func TestTarStreamMangledGetterPutter(t *testing.T) { } func TestTarStream(t *testing.T) { - var ( - expectedSHA1Sum = "1eb237ff69bca6e22789ecb05b45d35ca307adbd" - expectedSize int64 = 10240 - ) - - fh, err := os.Open("./testdata/t.tar.gz") - if err != nil { - t.Fatal(err) - } - defer fh.Close() - gzRdr, err := gzip.NewReader(fh) - if err != nil { - t.Fatal(err) - } - defer gzRdr.Close() - - // Setup where we'll store the metadata - w := bytes.NewBuffer([]byte{}) - sp := storage.NewJSONPacker(w) - fgp := storage.NewBufferFileGetPutter() - - // wrap the disassembly stream - tarStream, err := NewInputTarStream(gzRdr, sp, fgp) - if err != nil { - t.Fatal(err) + testCases := []struct { + path string + expectedSHA1Sum string + expectedSize int64 + }{ + {"./testdata/t.tar.gz", "1eb237ff69bca6e22789ecb05b45d35ca307adbd", 10240}, + {"./testdata/longlink.tar.gz", "d9f6babe107b7247953dff6b5b5ae31a3a880add", 20480}, + {"./testdata/fatlonglink.tar.gz", "8537f03f89aeef537382f8b0bb065d93e03b0be8", 26234880}, } - // get a sum of the stream after it has passed through to ensure it's the same. - h0 := sha1.New() - tRdr0 := io.TeeReader(tarStream, h0) + for _, tc := range testCases { + fh, err := os.Open(tc.path) + if err != nil { + t.Fatal(err) + } + defer fh.Close() + gzRdr, err := gzip.NewReader(fh) + if err != nil { + t.Fatal(err) + } + defer gzRdr.Close() - // read it all to the bit bucket - i, err := io.Copy(ioutil.Discard, tRdr0) - if err != nil { - t.Fatal(err) - } + // Setup where we'll store the metadata + w := bytes.NewBuffer([]byte{}) + sp := storage.NewJSONPacker(w) + fgp := storage.NewBufferFileGetPutter() - if i != expectedSize { - t.Errorf("size of tar: expected %d; got %d", expectedSize, i) - } - if fmt.Sprintf("%x", h0.Sum(nil)) != expectedSHA1Sum { - t.Fatalf("checksum of tar: expected %s; got %x", expectedSHA1Sum, h0.Sum(nil)) - } + // wrap the disassembly stream + tarStream, err := NewInputTarStream(gzRdr, sp, fgp) + if err != nil { + t.Fatal(err) + } - t.Logf("%s", w.String()) // if we fail, then show the packed info + // get a sum of the stream after it has passed through to ensure it's the same. + h0 := sha1.New() + tRdr0 := io.TeeReader(tarStream, h0) - // If we've made it this far, then we'll turn it around and create a tar - // stream from the packed metadata and buffered file contents. - r := bytes.NewBuffer(w.Bytes()) - sup := storage.NewJSONUnpacker(r) - // and reuse the fgp that we Put the payloads to. + // read it all to the bit bucket + i, err := io.Copy(ioutil.Discard, tRdr0) + if err != nil { + t.Fatal(err) + } - rc := NewOutputTarStream(fgp, sup) - h1 := sha1.New() - i, err = io.Copy(h1, rc) - if err != nil { - t.Fatal(err) - } + if i != tc.expectedSize { + t.Errorf("size of tar: expected %d; got %d", tc.expectedSize, i) + } + if fmt.Sprintf("%x", h0.Sum(nil)) != tc.expectedSHA1Sum { + t.Fatalf("checksum of tar: expected %s; got %x", tc.expectedSHA1Sum, h0.Sum(nil)) + } - if i != expectedSize { - t.Errorf("size of output tar: expected %d; got %d", expectedSize, i) - } - if fmt.Sprintf("%x", h1.Sum(nil)) != expectedSHA1Sum { - t.Fatalf("checksum of output tar: expected %s; got %x", expectedSHA1Sum, h1.Sum(nil)) - } -} - -func TestTarGNUTar(t *testing.T) { - var ( - expectedSHA1Sum = "d9f6babe107b7247953dff6b5b5ae31a3a880add" - expectedSize int64 = 20480 - ) - - fh, err := os.Open("./testdata/longlink.tar.gz") - if err != nil { - t.Fatal(err) - } - defer fh.Close() - gzRdr, err := gzip.NewReader(fh) - if err != nil { - t.Fatal(err) - } - defer gzRdr.Close() - - // Setup where we'll store the metadata - w := bytes.NewBuffer([]byte{}) - sp := storage.NewJSONPacker(w) - fgp := storage.NewBufferFileGetPutter() - - // wrap the disassembly stream - tarStream, err := NewInputTarStream(gzRdr, sp, fgp) - if err != nil { - t.Fatal(err) - } - - // get a sum of the stream after it has passed through to ensure it's the same. - h0 := sha1.New() - tRdr0 := io.TeeReader(tarStream, h0) - - // read it all to the bit bucket - i, err := io.Copy(ioutil.Discard, tRdr0) - if err != nil { - t.Fatal(err) - } - - if i != expectedSize { - t.Errorf("size of tar: expected %d; got %d", expectedSize, i) - } - if fmt.Sprintf("%x", h0.Sum(nil)) != expectedSHA1Sum { - t.Fatalf("checksum of tar: expected %s; got %x", expectedSHA1Sum, h0.Sum(nil)) - } - - t.Logf("%s", w.String()) // if we fail, then show the packed info - - // If we've made it this far, then we'll turn it around and create a tar - // stream from the packed metadata and buffered file contents. - r := bytes.NewBuffer(w.Bytes()) - sup := storage.NewJSONUnpacker(r) - // and reuse the fgp that we Put the payloads to. - - rc := NewOutputTarStream(fgp, sup) - h1 := sha1.New() - i, err = io.Copy(h1, rc) - if err != nil { - t.Fatal(err) - } - - if i != expectedSize { - t.Errorf("size of output tar: expected %d; got %d", expectedSize, i) - } - if fmt.Sprintf("%x", h1.Sum(nil)) != expectedSHA1Sum { - t.Fatalf("checksum of output tar: expected %s; got %x", expectedSHA1Sum, h1.Sum(nil)) + t.Logf("%s", w.String()) // if we fail, then show the packed info + + // If we've made it this far, then we'll turn it around and create a tar + // stream from the packed metadata and buffered file contents. + r := bytes.NewBuffer(w.Bytes()) + sup := storage.NewJSONUnpacker(r) + // and reuse the fgp that we Put the payloads to. + + rc := NewOutputTarStream(fgp, sup) + h1 := sha1.New() + i, err = io.Copy(h1, rc) + if err != nil { + t.Fatal(err) + } + + if i != tc.expectedSize { + t.Errorf("size of output tar: expected %d; got %d", tc.expectedSize, i) + } + if fmt.Sprintf("%x", h1.Sum(nil)) != tc.expectedSHA1Sum { + t.Fatalf("checksum of output tar: expected %s; got %x", tc.expectedSHA1Sum, h1.Sum(nil)) + } } } diff --git a/tar/asm/testdata/fatlonglink.tar.gz b/tar/asm/testdata/fatlonglink.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..0d8ed148ff12cfeb30fab84c35735c31e6a4c326 GIT binary patch literal 26402 zcmeI(e=yr;90zdL?a&$RZavm@akc7})DSDZq3GFmn?tp##VBgpBclw>tR&8q>AGgU zIbl)EE$Ny}P(_6%epPN9j!1`ylvHRF`O!2YiR4+f+wI@>U+d@l=kvVp`~5tx`~LO5 zd)&Uax4%9$7W>k+{Pethul&@nY!tJEsHJGvl0`52fs(vO8Hi)!f6T)*&Aqa*cGit2Cva|LHaVy&7|rW{A{LbCfl>{jH=^ge$p_iqmm8#ulUL%?O9pzh% znKnkXBnwfy%DO>~PB8Osqj#P91C7L_`$%pgkMFpml3c*AlHJWALDmQ{3ukONjwp`( zs}^k4umwFe^78D5^_0jq6f$X%R-_c6sz4KpQHEq%mnS})Lk&f!%c+0E5*faZRwmJF zVLn`PrxdoOL63<5d6dg`Q3;mRxdw<0BVC-N?cx!QBgsWm%;X8$xmOtLPD0orfkvKIBDB~)Qd$4+MBa%Lrpj>uM#W6J)B-n6fV}t z(SKblW~*_*ydVGqAOHd&00JNY0w4ea{|J;cCpv zsKe2hHt->9{F>vmWw9UTLGEc|TKzeGMgo^93P5yY;hqr^W>~KjJh>>=uKBRrn@{d@ z*fb$tIVX$eC(We}6b7YpGskNwv6=FKdpL&Yw2&PYk73uM8^t_N?J}o1?}avp!-D__ zfB*=900@8p2!H?xfB*=900@8p2!OyA6|m5oJV$8R0rLg=iV8Q*JH6cp%W*>#Lq^DO z)tsk_t+AnKr`)tbfl-1dvEus%Z{dVAdR^HB-`vT1-1}&q%{I2!O! Date: Fri, 14 Aug 2015 10:02:46 -0400 Subject: [PATCH 31/95] *: adding some version magic --- cmd/tar-split/main.go | 4 ++-- version/gen.go | 4 ++++ version/version.go | 7 +++++++ 3 files changed, 13 insertions(+), 2 deletions(-) create mode 100644 version/gen.go create mode 100644 version/version.go diff --git a/cmd/tar-split/main.go b/cmd/tar-split/main.go index f24dce5..b417120 100644 --- a/cmd/tar-split/main.go +++ b/cmd/tar-split/main.go @@ -1,4 +1,3 @@ -// go:generate git tag | tail -1 package main import ( @@ -6,13 +5,14 @@ import ( "github.com/Sirupsen/logrus" "github.com/codegangsta/cli" + "github.com/vbatts/tar-split/version" ) func main() { app := cli.NewApp() app.Name = "tar-split" app.Usage = "tar assembly and disassembly utility" - app.Version = "0.9.2" + app.Version = version.VERSION app.Author = "Vincent Batts" app.Email = "vbatts@hashbangbash.com" app.Action = cli.ShowAppHelp diff --git a/version/gen.go b/version/gen.go new file mode 100644 index 0000000..d290d83 --- /dev/null +++ b/version/gen.go @@ -0,0 +1,4 @@ +package version + +// from `go get github.com/vbatts/go-get-version` +//go:generate go-get-version -package version -variable VERSION -output version.go diff --git a/version/version.go b/version/version.go new file mode 100644 index 0000000..0b86fbf --- /dev/null +++ b/version/version.go @@ -0,0 +1,7 @@ +package version +// AUTO-GENEREATED. DO NOT EDIT +// 2015-08-14 09:56:50.742727493 -0400 EDT + +// VERSION is the generated version from /home/vbatts/src/vb/tar-split/version +var VERSION = "v0.9.6-1-gc76e420" + \ No newline at end of file From 3a88af2866a599063c0e94cb141013c9ffd69032 Mon Sep 17 00:00:00 2001 From: Vincent Batts Date: Fri, 14 Aug 2015 10:15:26 -0400 Subject: [PATCH 32/95] travis: adding older and newer golang --- .travis.yml | 2 ++ 1 file changed, 2 insertions(+) diff --git a/.travis.yml b/.travis.yml index ee1645d..783781b 100644 --- a/.travis.yml +++ b/.travis.yml @@ -1,7 +1,9 @@ language: go go: + - tip - 1.4.2 - 1.3.3 + - 1.2.2 # let us have pretty, fast Docker-based Travis workers! sudo: false From bf82db1f0de52d664d0acb1a92163532995370b0 Mon Sep 17 00:00:00 2001 From: Vincent Batts Date: Tue, 18 Aug 2015 14:54:32 -0400 Subject: [PATCH 33/95] README: updates --- README.md | 13 ++++--------- 1 file changed, 4 insertions(+), 9 deletions(-) diff --git a/README.md b/README.md index e37d36b..0a1b2fc 100644 --- a/README.md +++ b/README.md @@ -2,11 +2,7 @@ [![Build Status](https://travis-ci.org/vbatts/tar-split.svg?branch=master)](https://travis-ci.org/vbatts/tar-split) -Extend the upstream golang stdlib `archive/tar` library, to expose the raw -bytes of the TAR, rather than just the marshalled headers and file stream. - -The goal being that by preserving the raw bytes of each header, padding bytes, -and the raw file payload, one could reassemble the original archive. +Pristinely disassembling a tar archive, and stashing needed raw bytes and offsets to reassemble a validating original archive. ## Docs @@ -48,7 +44,9 @@ Do not break the API of stdlib `archive/tar` in our fork (ideally find an upstre ## Std Version -The version of golang stdlib `archive/tar` is from go1.4.1, and their master branch around [a9dddb53f](https://github.com/golang/go/tree/a9dddb53f) +The version of golang stdlib `archive/tar` is from go1.4.1, and their master branch around [a9dddb53f](https://github.com/golang/go/tree/a9dddb53f). +It is minimally extended to expose the raw bytes of the TAR, rather than just the marshalled headers and file stream. + ## Design @@ -107,10 +105,7 @@ bytes-per-file rate for the storage implications. ## What's Next? * More implementations of storage Packer and Unpacker - - could be a redis or mongo backend * More implementations of FileGetter and FilePutter - - could be a redis or mongo backend -* cli tooling to assemble/disassemble a provided tar archive * would be interesting to have an assembler stream that implements `io.Seeker` From 6e38573de2ab9ae03937762754dcde175ee2d9b6 Mon Sep 17 00:00:00 2001 From: David du Colombier <0intro@gmail.com> Date: Fri, 24 Apr 2015 15:37:53 +0200 Subject: [PATCH 34/95] archive/tar: fix error message Write should return ErrWriteAfterClose instead of ErrWriteTooLong when called after Close. Change-Id: If5ec4ef924e4c56489e0d426976f7e5fad79be9b Reviewed-on: https://go-review.googlesource.com/9259 Reviewed-by: Brad Fitzpatrick Signed-off-by: Vincent Batts --- archive/tar/writer.go | 2 +- archive/tar/writer_test.go | 17 +++++++++++++++++ 2 files changed, 18 insertions(+), 1 deletion(-) diff --git a/archive/tar/writer.go b/archive/tar/writer.go index dafb2ca..9dbc01a 100644 --- a/archive/tar/writer.go +++ b/archive/tar/writer.go @@ -355,7 +355,7 @@ func paxHeader(msg string) string { // hdr.Size bytes are written after WriteHeader. func (tw *Writer) Write(b []byte) (n int, err error) { if tw.closed { - err = ErrWriteTooLong + err = ErrWriteAfterClose return } overwrite := false diff --git a/archive/tar/writer_test.go b/archive/tar/writer_test.go index 5e42e32..650899a 100644 --- a/archive/tar/writer_test.go +++ b/archive/tar/writer_test.go @@ -489,3 +489,20 @@ func TestValidTypeflagWithPAXHeader(t *testing.T) { } } } + +func TestWriteAfterClose(t *testing.T) { + var buffer bytes.Buffer + tw := NewWriter(&buffer) + + hdr := &Header{ + Name: "small.txt", + Size: 5, + } + if err := tw.WriteHeader(hdr); err != nil { + t.Fatalf("Failed to write header: %s", err) + } + tw.Close() + if _, err := tw.Write([]byte("Kilts")); err != ErrWriteAfterClose { + t.Fatalf("Write: got %v; want ErrWriteAfterClose", err) + } +} From 576b2737620ba2ca0fb6c27552c2dfa8eadb0072 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?H=C3=A5vard=20Haugen?= Date: Wed, 27 May 2015 10:44:44 +0200 Subject: [PATCH 35/95] archive/tar: don't panic on negative file size Fixes #10959. Fixes #10960. Change-Id: I9a81a0e2b8275338d0d1c3f7f7265e0fd91f3de2 Reviewed-on: https://go-review.googlesource.com/10402 TryBot-Result: Gobot Gobot Reviewed-by: David Symonds Signed-off-by: Vincent Batts --- archive/tar/reader.go | 4 ++++ archive/tar/reader_test.go | 16 ++++++++++++++++ archive/tar/testdata/neg-size.tar | Bin 0 -> 512 bytes 3 files changed, 20 insertions(+) create mode 100644 archive/tar/testdata/neg-size.tar diff --git a/archive/tar/reader.go b/archive/tar/reader.go index c72e002..0b0c3b1 100644 --- a/archive/tar/reader.go +++ b/archive/tar/reader.go @@ -553,6 +553,10 @@ func (tr *Reader) readHeader() *Header { hdr.Uid = int(tr.octal(s.next(8))) hdr.Gid = int(tr.octal(s.next(8))) hdr.Size = tr.octal(s.next(12)) + if hdr.Size < 0 { + tr.err = ErrHeader + return nil + } hdr.ModTime = time.Unix(tr.octal(s.next(12)), 0) s.next(8) // chksum hdr.Typeflag = s.next(1)[0] diff --git a/archive/tar/reader_test.go b/archive/tar/reader_test.go index 9601ffe..ab1e844 100644 --- a/archive/tar/reader_test.go +++ b/archive/tar/reader_test.go @@ -741,3 +741,19 @@ func TestUninitializedRead(t *testing.T) { } } + +// Negative header size should not cause panic. +// Issues 10959 and 10960. +func TestNegativeHdrSize(t *testing.T) { + f, err := os.Open("testdata/neg-size.tar") + if err != nil { + t.Fatal(err) + } + defer f.Close() + r := NewReader(f) + _, err = r.Next() + if err != ErrHeader { + t.Error("want ErrHeader, got", err) + } + io.Copy(ioutil.Discard, r) +} diff --git a/archive/tar/testdata/neg-size.tar b/archive/tar/testdata/neg-size.tar new file mode 100644 index 0000000000000000000000000000000000000000..5deea3d05c4da5a4ddda34ef7ad781088464e71b GIT binary patch literal 512 zcma)(!3}~i7=@d#07(~c0h9N)Na`Hy;GL8N4j!7YfmcUy4Hj^R-s|6LkswAdq{%Dq zeeYEkfGqY#(eVIvtUD=3dmgO>+WvmJu?!(Z2_k7dfq;C)3dnfX`l0#IeAe0qQ-^2= z|8jB*JI{8kO)-jdf^$wdJ_vEWkPIQXm^d{2NhUoLEEza^mVV&QNE^63LaKtd9rtDs zE>me;9Em{+eZ^Y>snQ&s!17bkhjcz=H=J(=e>|P(sS8Gxj+6N@Z7t^ E15rerKmY&$ literal 0 HcmV?d00001 From 55dceefe42a7ad9268aea544a8b6826f9d9a5c0e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?H=C3=A5vard=20Haugen?= Date: Thu, 28 May 2015 13:48:47 +0200 Subject: [PATCH 36/95] archive/tar: terminate when reading malformed sparse files Fixes #10968. Change-Id: I027bc571a71629ac49c2a0ff101b2950af6e7531 Reviewed-on: https://go-review.googlesource.com/10482 Reviewed-by: David Symonds Run-TryBot: David Symonds TryBot-Result: Gobot Gobot Signed-off-by: Vincent Batts --- archive/tar/reader.go | 3 +++ archive/tar/reader_test.go | 19 +++++++++++++++++++ archive/tar/testdata/issue10968.tar | Bin 0 -> 512 bytes 3 files changed, 22 insertions(+) create mode 100644 archive/tar/testdata/issue10968.tar diff --git a/archive/tar/reader.go b/archive/tar/reader.go index 0b0c3b1..dbc5698 100644 --- a/archive/tar/reader.go +++ b/archive/tar/reader.go @@ -899,6 +899,9 @@ func (sfr *sparseFileReader) Read(b []byte) (n int, err error) { // Otherwise, we're at the end of the file return 0, io.EOF } + if sfr.tot < sfr.sp[0].offset { + return 0, io.ErrUnexpectedEOF + } if sfr.pos < sfr.sp[0].offset { // We're in a hole n = sfr.readHole(b, sfr.sp[0].offset) diff --git a/archive/tar/reader_test.go b/archive/tar/reader_test.go index ab1e844..6ffb383 100644 --- a/archive/tar/reader_test.go +++ b/archive/tar/reader_test.go @@ -757,3 +757,22 @@ func TestNegativeHdrSize(t *testing.T) { } io.Copy(ioutil.Discard, r) } + +// This used to hang in (*sparseFileReader).readHole due to missing +// verification of sparse offsets against file size. +func TestIssue10968(t *testing.T) { + f, err := os.Open("testdata/issue10968.tar") + if err != nil { + t.Fatal(err) + } + defer f.Close() + r := NewReader(f) + _, err = r.Next() + if err != nil { + t.Fatal(err) + } + _, err = io.Copy(ioutil.Discard, r) + if err != io.ErrUnexpectedEOF { + t.Fatalf("expected %q, got %q", io.ErrUnexpectedEOF, err) + } +} diff --git a/archive/tar/testdata/issue10968.tar b/archive/tar/testdata/issue10968.tar new file mode 100644 index 0000000000000000000000000000000000000000..1cc837bcff14cd822a26e43034955c82e852ab29 GIT binary patch literal 512 zcmbVI!41MN47Ah*kg@;^fX)>lI!AWsgI^V-_Q4}k$6}2x&>iv*cG6Oc`at9n#lG|1 zIi>(iak!RTol#boyD`0c^v(cHJJuvHh-e39;{t(!nc@gWsV;O@FkUc{-h`pC817Ix zgh|QIatu;A!G^JZ7UC1V_vGb4bURuTWAy6SS-Fx(D=wcI#QP1Y#wzX?HAf0_+~lp> yN?iGbw2JFgJjd0vnp9WIo>K3V$tfee6;KE|`1A3J$tp?9B&Y7`+Gwrtzls-lP-;g2 literal 0 HcmV?d00001 From 69de764807dae1f3b43badebbb958f7fcb3d70c8 Mon Sep 17 00:00:00 2001 From: Michael Gehring Date: Fri, 12 Jun 2015 22:49:42 +0200 Subject: [PATCH 37/95] archive/tar: fix slice bounds out of range Sanity check the pax-header size field before using it. Fixes #11167. Change-Id: I9d5d0210c3990e6fb9434c3fe333be0d507d5962 Reviewed-on: https://go-review.googlesource.com/10954 Reviewed-by: David Symonds Signed-off-by: Vincent Batts --- archive/tar/reader.go | 2 +- archive/tar/reader_test.go | 11 ++++++++--- 2 files changed, 9 insertions(+), 4 deletions(-) diff --git a/archive/tar/reader.go b/archive/tar/reader.go index dbc5698..6f219da 100644 --- a/archive/tar/reader.go +++ b/archive/tar/reader.go @@ -397,7 +397,7 @@ func parsePAX(r io.Reader) (map[string]string, error) { } // Parse the first token as a decimal integer. n, err := strconv.ParseInt(string(buf[:sp]), 10, 0) - if err != nil { + if err != nil || n < 5 || int64(len(buf)) < n { return nil, ErrHeader } // Extract everything between the decimal and the n -1 on the diff --git a/archive/tar/reader_test.go b/archive/tar/reader_test.go index 6ffb383..311db77 100644 --- a/archive/tar/reader_test.go +++ b/archive/tar/reader_test.go @@ -462,9 +462,14 @@ func TestParsePAXHeader(t *testing.T) { t.Error("Buffer wasn't consumed") } } - badHeader := bytes.NewReader([]byte("3 somelongkey=")) - if _, err := parsePAX(badHeader); err != ErrHeader { - t.Fatal("Unexpected success when parsing bad header") + badHeaderTests := [][]byte{ + []byte("3 somelongkey=\n"), + []byte("50 tooshort=\n"), + } + for _, test := range badHeaderTests { + if _, err := parsePAX(bytes.NewReader(test)); err != ErrHeader { + t.Fatal("Unexpected success when parsing bad header") + } } } From 2e5698249c892bebc0326a4307410b205783ad22 Mon Sep 17 00:00:00 2001 From: Michael Gehring Date: Sat, 13 Jun 2015 10:53:06 +0200 Subject: [PATCH 38/95] archive/tar: add missing error checks Check for errors when reading the headers following the pax headers. Fixes #11169. Change-Id: Ifec4a949ec8df8b49fa7cb7a67eb826fe2282ad8 Reviewed-on: https://go-review.googlesource.com/11031 Reviewed-by: Russ Cox Signed-off-by: Vincent Batts --- archive/tar/reader.go | 6 ++++++ archive/tar/reader_test.go | 15 +++++++++++++++ archive/tar/testdata/issue11169.tar | Bin 0 -> 602 bytes 3 files changed, 21 insertions(+) create mode 100644 archive/tar/testdata/issue11169.tar diff --git a/archive/tar/reader.go b/archive/tar/reader.go index 6f219da..4168ea2 100644 --- a/archive/tar/reader.go +++ b/archive/tar/reader.go @@ -138,7 +138,13 @@ func (tr *Reader) Next() (*Header, error) { // We actually read the whole file, // but this skips alignment padding tr.skipUnread() + if tr.err != nil { + return nil, tr.err + } hdr = tr.readHeader() + if hdr == nil { + return nil, tr.err + } mergePAX(hdr, headers) // Check for a PAX format sparse file diff --git a/archive/tar/reader_test.go b/archive/tar/reader_test.go index 311db77..da01f26 100644 --- a/archive/tar/reader_test.go +++ b/archive/tar/reader_test.go @@ -781,3 +781,18 @@ func TestIssue10968(t *testing.T) { t.Fatalf("expected %q, got %q", io.ErrUnexpectedEOF, err) } } + +// Do not panic if there are errors in header blocks after the pax header. +// Issue 11169 +func TestIssue11169(t *testing.T) { + f, err := os.Open("testdata/issue11169.tar") + if err != nil { + t.Fatal(err) + } + defer f.Close() + r := NewReader(f) + _, err = r.Next() + if err == nil { + t.Fatal("Unexpected success") + } +} diff --git a/archive/tar/testdata/issue11169.tar b/archive/tar/testdata/issue11169.tar new file mode 100644 index 0000000000000000000000000000000000000000..4d71fa15260609ecee0c8c751cfebf49be8763ac GIT binary patch literal 602 zcmdPX4@j)=NKH&hEh^SCG%+zV)=x}KWS}ZA00J`;69y0s1n9JZp|KHzp^>Svp`nSX svAH3G0gzz?R8~P%SKu(Lw74X(2 Date: Wed, 4 Mar 2015 12:29:16 -0500 Subject: [PATCH 39/95] archive/tar: fix round-trip attributes The issue was identified while working with round trip FileInfo of the headers of hardlinks. Also, additional test cases for hard link handling. (review carried over from http://golang.org/cl/165860043) Fixes #9027 Change-Id: I9e3a724c8de72eb1b0fbe0751a7b488894911b76 Reviewed-on: https://go-review.googlesource.com/6790 Reviewed-by: Russ Cox Signed-off-by: Vincent Batts --- archive/tar/common.go | 4 +- archive/tar/stat_unix.go | 46 ++++++++++++++++------ archive/tar/tar_test.go | 63 ++++++++++++++++++++++++------ archive/tar/testdata/hardlink.tar | Bin 0 -> 2560 bytes archive/tar/writer_test.go | 38 ++++++++++++++++++ 5 files changed, 127 insertions(+), 24 deletions(-) create mode 100644 archive/tar/testdata/hardlink.tar diff --git a/archive/tar/common.go b/archive/tar/common.go index e363aa7..855e5fc 100644 --- a/archive/tar/common.go +++ b/archive/tar/common.go @@ -139,8 +139,8 @@ func (fi headerFileInfo) Mode() (mode os.FileMode) { } switch fi.h.Typeflag { - case TypeLink, TypeSymlink: - // hard link, symbolic link + case TypeSymlink: + // symbolic link mode |= os.ModeSymlink case TypeChar: // character device node diff --git a/archive/tar/stat_unix.go b/archive/tar/stat_unix.go index cb843db..24b9311 100644 --- a/archive/tar/stat_unix.go +++ b/archive/tar/stat_unix.go @@ -16,17 +16,41 @@ func init() { } func statUnix(fi os.FileInfo, h *Header) error { - sys, ok := fi.Sys().(*syscall.Stat_t) - if !ok { - return nil + switch sys := fi.Sys().(type) { + case *syscall.Stat_t: + h.Uid = int(sys.Uid) + h.Gid = int(sys.Gid) + // TODO(bradfitz): populate username & group. os/user + // doesn't cache LookupId lookups, and lacks group + // lookup functions. + h.AccessTime = statAtime(sys) + h.ChangeTime = statCtime(sys) + // TODO(bradfitz): major/minor device numbers? + if fi.Mode().IsRegular() && sys.Nlink > 1 { + h.Typeflag = TypeLink + h.Size = 0 + // TODO(vbatts): Linkname? + } + case *Header: + // for the roundtrip logic + h.Uid = sys.Uid + h.Gid = sys.Gid + h.Uname = sys.Uname + h.Gname = sys.Gname + h.AccessTime = sys.AccessTime + h.ChangeTime = sys.ChangeTime + if sys.Xattrs != nil { + h.Xattrs = make(map[string]string) + for k, v := range sys.Xattrs { + h.Xattrs[k] = v + } + } + if sys.Typeflag == TypeLink { + // hard link + h.Typeflag = TypeLink + h.Size = 0 + h.Linkname = sys.Linkname + } } - h.Uid = int(sys.Uid) - h.Gid = int(sys.Gid) - // TODO(bradfitz): populate username & group. os/user - // doesn't cache LookupId lookups, and lacks group - // lookup functions. - h.AccessTime = statAtime(sys) - h.ChangeTime = statCtime(sys) - // TODO(bradfitz): major/minor device numbers? return nil } diff --git a/archive/tar/tar_test.go b/archive/tar/tar_test.go index ed333f3..d63c072 100644 --- a/archive/tar/tar_test.go +++ b/archive/tar/tar_test.go @@ -147,17 +147,6 @@ func TestHeaderRoundTrip(t *testing.T) { }, fm: 0644, }, - // hard link. - { - h: &Header{ - Name: "hard.txt", - Mode: 0644 | c_ISLNK, - Size: 0, - ModTime: time.Unix(1360600916, 0), - Typeflag: TypeLink, - }, - fm: 0644 | os.ModeSymlink, - }, // symbolic link. { h: &Header{ @@ -246,6 +235,33 @@ func TestHeaderRoundTrip(t *testing.T) { }, fm: 0600 | os.ModeSticky, }, + // hard link. + { + h: &Header{ + Name: "hard.txt", + Mode: 0644 | c_ISREG, + Size: 0, + Linkname: "file.txt", + ModTime: time.Unix(1360600916, 0), + Typeflag: TypeLink, + }, + fm: 0644, + }, + // More information. + { + h: &Header{ + Name: "info.txt", + Mode: 0600 | c_ISREG, + Size: 0, + Uid: 1000, + Gid: 1000, + ModTime: time.Unix(1360602540, 0), + Uname: "slartibartfast", + Gname: "users", + Typeflag: TypeReg, + }, + fm: 0600, + }, } for i, g := range golden { @@ -268,12 +284,37 @@ func TestHeaderRoundTrip(t *testing.T) { if got, want := h2.Size, g.h.Size; got != want { t.Errorf("i=%d: Size: got %v, want %v", i, got, want) } + if got, want := h2.Uid, g.h.Uid; got != want { + t.Errorf("i=%d: Uid: got %d, want %d", i, got, want) + } + if got, want := h2.Gid, g.h.Gid; got != want { + t.Errorf("i=%d: Gid: got %d, want %d", i, got, want) + } + if got, want := h2.Uname, g.h.Uname; got != want { + t.Errorf("i=%d: Uname: got %q, want %q", i, got, want) + } + if got, want := h2.Gname, g.h.Gname; got != want { + t.Errorf("i=%d: Gname: got %q, want %q", i, got, want) + } + if got, want := h2.Linkname, g.h.Linkname; got != want { + t.Errorf("i=%d: Linkname: got %v, want %v", i, got, want) + } + if got, want := h2.Typeflag, g.h.Typeflag; got != want { + t.Logf("%#v %#v", g.h, fi.Sys()) + t.Errorf("i=%d: Typeflag: got %q, want %q", i, got, want) + } if got, want := h2.Mode, g.h.Mode; got != want { t.Errorf("i=%d: Mode: got %o, want %o", i, got, want) } if got, want := fi.Mode(), g.fm; got != want { t.Errorf("i=%d: fi.Mode: got %o, want %o", i, got, want) } + if got, want := h2.AccessTime, g.h.AccessTime; got != want { + t.Errorf("i=%d: AccessTime: got %v, want %v", i, got, want) + } + if got, want := h2.ChangeTime, g.h.ChangeTime; got != want { + t.Errorf("i=%d: ChangeTime: got %v, want %v", i, got, want) + } if got, want := h2.ModTime, g.h.ModTime; got != want { t.Errorf("i=%d: ModTime: got %v, want %v", i, got, want) } diff --git a/archive/tar/testdata/hardlink.tar b/archive/tar/testdata/hardlink.tar new file mode 100644 index 0000000000000000000000000000000000000000..9cd1a26572e44150ded8a628fefb28fa089645d1 GIT binary patch literal 2560 zcmYex%t_TNsVHHfAus>}GZPaAAZ2K7Y5<}Q3?Y0F6C}!DXk=n;YGz~#VjCD58=09i zC>YStO>m=2i%SxVfKDn)N-QZUh6`gbN{dsA@JNF_1@sD>#xP)T3IyjQ7L{Zs0g1H4 z;u5aG>Bv!6(JTZq5{ps>JpTi;4Ql>3F*i14P%uoRL*X>S^FPfJ)~LawAut*OgFXZR DcLg^L literal 0 HcmV?d00001 diff --git a/archive/tar/writer_test.go b/archive/tar/writer_test.go index 650899a..fe46a67 100644 --- a/archive/tar/writer_test.go +++ b/archive/tar/writer_test.go @@ -147,6 +147,44 @@ var writerTests = []*writerTest{ }, }, }, + // This file was produced using gnu tar 1.26 + // echo "Slartibartfast" > file.txt + // ln file.txt hard.txt + // tar -b 1 --format=ustar -c -f hardlink.tar file.txt hard.txt + { + file: "testdata/hardlink.tar", + entries: []*writerTestEntry{ + { + header: &Header{ + Name: "file.txt", + Mode: 0644, + Uid: 1000, + Gid: 100, + Size: 15, + ModTime: time.Unix(1425484303, 0), + Typeflag: '0', + Uname: "vbatts", + Gname: "users", + }, + contents: "Slartibartfast\n", + }, + { + header: &Header{ + Name: "hard.txt", + Mode: 0644, + Uid: 1000, + Gid: 100, + Size: 0, + ModTime: time.Unix(1425484303, 0), + Typeflag: '1', + Linkname: "file.txt", + Uname: "vbatts", + Gname: "users", + }, + // no contents + }, + }, + }, } // Render byte array in a two-character hexadecimal string, spaced for easy visual inspection. From 8eee43d0df37ee91baff4b12af1821845080d0df Mon Sep 17 00:00:00 2001 From: Brad Fitzpatrick Date: Fri, 26 Jun 2015 14:31:35 -0700 Subject: [PATCH 40/95] archive/tar: disable new failing test on windows and plan9 Update #11426 Change-Id: If406d2efcc81965825a63c76f5448d544ba2a740 Reviewed-on: https://go-review.googlesource.com/11590 Reviewed-by: Austin Clements Signed-off-by: Vincent Batts --- archive/tar/tar_test.go | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/archive/tar/tar_test.go b/archive/tar/tar_test.go index d63c072..715884a 100644 --- a/archive/tar/tar_test.go +++ b/archive/tar/tar_test.go @@ -10,6 +10,7 @@ import ( "os" "path" "reflect" + "runtime" "strings" "testing" "time" @@ -135,6 +136,9 @@ type headerRoundTripTest struct { } func TestHeaderRoundTrip(t *testing.T) { + if runtime.GOOS == "windows" || runtime.GOOS == "plan9" { + t.Skipf("skipping on %s; issue 11426", runtime.GOOS) + } golden := []headerRoundTripTest{ // regular file. { From 27e18409b9d3df7bfa99336f0669b649c4384581 Mon Sep 17 00:00:00 2001 From: Brad Fitzpatrick Date: Fri, 26 Jun 2015 15:13:52 -0700 Subject: [PATCH 41/95] archive/tar: also skip header roundtrip test on nacl Update #11426 Change-Id: I7abc4ed2241a7a3af6d57c934786f36de4f97b77 Reviewed-on: https://go-review.googlesource.com/11592 Run-TryBot: Brad Fitzpatrick Reviewed-by: Brad Fitzpatrick Signed-off-by: Vincent Batts --- archive/tar/tar_test.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/archive/tar/tar_test.go b/archive/tar/tar_test.go index 715884a..3fdd83d 100644 --- a/archive/tar/tar_test.go +++ b/archive/tar/tar_test.go @@ -136,7 +136,7 @@ type headerRoundTripTest struct { } func TestHeaderRoundTrip(t *testing.T) { - if runtime.GOOS == "windows" || runtime.GOOS == "plan9" { + if runtime.GOOS == "windows" || runtime.GOOS == "plan9" || runtime.GOOS == "nacl" { t.Skipf("skipping on %s; issue 11426", runtime.GOOS) } golden := []headerRoundTripTest{ From 3b34dbd368ec2fd76f6d552714ae954056cc58ec Mon Sep 17 00:00:00 2001 From: Alex Brainman Date: Mon, 29 Jun 2015 16:42:28 +1000 Subject: [PATCH 42/95] archive/tar: move round-trip reading into common os file Fixes #11426 Change-Id: I77368b0e852149ed4533e139cc43887508ac7f78 Reviewed-on: https://go-review.googlesource.com/11662 Reviewed-by: Austin Clements Reviewed-by: Russ Cox Signed-off-by: Vincent Batts --- archive/tar/common.go | 24 +++++++++++++++++++ archive/tar/stat_unix.go | 51 +++++++++++++--------------------------- archive/tar/tar_test.go | 4 ---- 3 files changed, 40 insertions(+), 39 deletions(-) diff --git a/archive/tar/common.go b/archive/tar/common.go index 855e5fc..c31df06 100644 --- a/archive/tar/common.go +++ b/archive/tar/common.go @@ -249,6 +249,30 @@ func FileInfoHeader(fi os.FileInfo, link string) (*Header, error) { if fm&os.ModeSticky != 0 { h.Mode |= c_ISVTX } + // If possible, populate additional fields from OS-specific + // FileInfo fields. + if sys, ok := fi.Sys().(*Header); ok { + // This FileInfo came from a Header (not the OS). Use the + // original Header to populate all remaining fields. + h.Uid = sys.Uid + h.Gid = sys.Gid + h.Uname = sys.Uname + h.Gname = sys.Gname + h.AccessTime = sys.AccessTime + h.ChangeTime = sys.ChangeTime + if sys.Xattrs != nil { + h.Xattrs = make(map[string]string) + for k, v := range sys.Xattrs { + h.Xattrs[k] = v + } + } + if sys.Typeflag == TypeLink { + // hard link + h.Typeflag = TypeLink + h.Size = 0 + h.Linkname = sys.Linkname + } + } if sysStat != nil { return h, sysStat(fi, h) } diff --git a/archive/tar/stat_unix.go b/archive/tar/stat_unix.go index 24b9311..27d112f 100644 --- a/archive/tar/stat_unix.go +++ b/archive/tar/stat_unix.go @@ -16,41 +16,22 @@ func init() { } func statUnix(fi os.FileInfo, h *Header) error { - switch sys := fi.Sys().(type) { - case *syscall.Stat_t: - h.Uid = int(sys.Uid) - h.Gid = int(sys.Gid) - // TODO(bradfitz): populate username & group. os/user - // doesn't cache LookupId lookups, and lacks group - // lookup functions. - h.AccessTime = statAtime(sys) - h.ChangeTime = statCtime(sys) - // TODO(bradfitz): major/minor device numbers? - if fi.Mode().IsRegular() && sys.Nlink > 1 { - h.Typeflag = TypeLink - h.Size = 0 - // TODO(vbatts): Linkname? - } - case *Header: - // for the roundtrip logic - h.Uid = sys.Uid - h.Gid = sys.Gid - h.Uname = sys.Uname - h.Gname = sys.Gname - h.AccessTime = sys.AccessTime - h.ChangeTime = sys.ChangeTime - if sys.Xattrs != nil { - h.Xattrs = make(map[string]string) - for k, v := range sys.Xattrs { - h.Xattrs[k] = v - } - } - if sys.Typeflag == TypeLink { - // hard link - h.Typeflag = TypeLink - h.Size = 0 - h.Linkname = sys.Linkname - } + sys, ok := fi.Sys().(*syscall.Stat_t) + if !ok { + return nil + } + h.Uid = int(sys.Uid) + h.Gid = int(sys.Gid) + // TODO(bradfitz): populate username & group. os/user + // doesn't cache LookupId lookups, and lacks group + // lookup functions. + h.AccessTime = statAtime(sys) + h.ChangeTime = statCtime(sys) + // TODO(bradfitz): major/minor device numbers? + if fi.Mode().IsRegular() && sys.Nlink > 1 { + h.Typeflag = TypeLink + h.Size = 0 + // TODO(vbatts): Linkname? } return nil } diff --git a/archive/tar/tar_test.go b/archive/tar/tar_test.go index 3fdd83d..d63c072 100644 --- a/archive/tar/tar_test.go +++ b/archive/tar/tar_test.go @@ -10,7 +10,6 @@ import ( "os" "path" "reflect" - "runtime" "strings" "testing" "time" @@ -136,9 +135,6 @@ type headerRoundTripTest struct { } func TestHeaderRoundTrip(t *testing.T) { - if runtime.GOOS == "windows" || runtime.GOOS == "plan9" || runtime.GOOS == "nacl" { - t.Skipf("skipping on %s; issue 11426", runtime.GOOS) - } golden := []headerRoundTripTest{ // regular file. { From 4d4b53c78ba7d13a7971e493b8913295c4575f70 Mon Sep 17 00:00:00 2001 From: Vincent Batts Date: Mon, 3 Aug 2015 12:26:38 -0400 Subject: [PATCH 43/95] archive/tar: don't treat multiple file system links as a tar hardlink Do not assume that if stat shows multiple links that we should mark the file as a hardlink in the tar format. If the hardlink link was not referenced, this caused a link to "/". On an overlay file system, all files have multiple links. The caller must keep the inode references and set TypeLink, Size = 0, and LinkName themselves. Change-Id: I873b8a235bc8f8fbb271db74ee54232da36ca013 Reviewed-on: https://go-review.googlesource.com/13045 Reviewed-by: Ian Lance Taylor Signed-off-by: Vincent Batts --- archive/tar/stat_unix.go | 5 ----- 1 file changed, 5 deletions(-) diff --git a/archive/tar/stat_unix.go b/archive/tar/stat_unix.go index 27d112f..cb843db 100644 --- a/archive/tar/stat_unix.go +++ b/archive/tar/stat_unix.go @@ -28,10 +28,5 @@ func statUnix(fi os.FileInfo, h *Header) error { h.AccessTime = statAtime(sys) h.ChangeTime = statCtime(sys) // TODO(bradfitz): major/minor device numbers? - if fi.Mode().IsRegular() && sys.Nlink > 1 { - h.Typeflag = TypeLink - h.Size = 0 - // TODO(vbatts): Linkname? - } return nil } From 414a687f83431ceb46d908ddc38a9e690e95c8f2 Mon Sep 17 00:00:00 2001 From: Vincent Batts Date: Thu, 3 Sep 2015 15:01:25 -0400 Subject: [PATCH 44/95] README: usage --- README.md | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/README.md b/README.md index 0a1b2fc..b89afe5 100644 --- a/README.md +++ b/README.md @@ -20,6 +20,11 @@ The command line utilitiy is installable via: go get github.com/vbatts/tar-split/cmd/tar-split ``` +## Usage + +For cli usage, see its [README.md](cmd/tar-split/README.md). +For the library see the [docs](#docs) + ## Caveat Eventually this should detect TARs that this is not possible with. From 1148e7ee3b91e235af1c9172aa85810d6c0dd73e Mon Sep 17 00:00:00 2001 From: Alexander Morozov Date: Fri, 11 Sep 2015 08:48:57 -0700 Subject: [PATCH 45/95] Add go 1.5.1 to CI Signed-off-by: Alexander Morozov --- .travis.yml | 1 + 1 file changed, 1 insertion(+) diff --git a/.travis.yml b/.travis.yml index 783781b..7b2d094 100644 --- a/.travis.yml +++ b/.travis.yml @@ -1,6 +1,7 @@ language: go go: - tip + - 1.5.1 - 1.4.2 - 1.3.3 - 1.2.2 From 286535320029f669a2b4b96723a2a1ba313214a1 Mon Sep 17 00:00:00 2001 From: Vincent Batts Date: Wed, 23 Sep 2015 13:30:00 -0400 Subject: [PATCH 46/95] common: add a UTF-8 check helper --- tar/common/utf8.go | 21 +++++++++++++++++++++ tar/common/utf8_test.go | 34 ++++++++++++++++++++++++++++++++++ 2 files changed, 55 insertions(+) create mode 100644 tar/common/utf8.go create mode 100644 tar/common/utf8_test.go diff --git a/tar/common/utf8.go b/tar/common/utf8.go new file mode 100644 index 0000000..ffb1646 --- /dev/null +++ b/tar/common/utf8.go @@ -0,0 +1,21 @@ +package common + +// IsValidUtf8String checks for in valid UTF-8 characters +func IsValidUtf8String(s string) bool { + for _, r := range s { + if int(r) == 0xfffd { + return false + } + } + return true +} + +// IsValidUtf8Btyes checks for in valid UTF-8 characters +func IsValidUtf8Btyes(b []byte) bool { + for _, r := range string(b) { + if int(r) == 0xfffd { + return false + } + } + return true +} diff --git a/tar/common/utf8_test.go b/tar/common/utf8_test.go new file mode 100644 index 0000000..e546f55 --- /dev/null +++ b/tar/common/utf8_test.go @@ -0,0 +1,34 @@ +package common + +import "testing" + +func TestStringValidation(t *testing.T) { + cases := []struct { + value string + result bool + }{ + {"aä\uFFFD本☺", false}, + {"aä本☺", true}, + } + + for _, c := range cases { + if got := IsValidUtf8String(c.value); got != c.result { + t.Errorf("string %q - expected %v, got %v", c.value, c.result, got) + } + } +} +func TestBytesValidation(t *testing.T) { + cases := []struct { + value []byte + result bool + }{ + {[]byte{0xE4}, false}, + {[]byte("aä本☺"), true}, + } + + for _, c := range cases { + if got := IsValidUtf8Btyes(c.value); got != c.result { + t.Errorf("bytes %q - expected %v, got %v", c.value, c.result, got) + } + } +} From 39d06b9dc4eaf75c34407e8fd8c161d54e4c6b4d Mon Sep 17 00:00:00 2001 From: Vincent Batts Date: Wed, 23 Sep 2015 15:13:54 -0400 Subject: [PATCH 47/95] tar/common: get index of first invalid utf-8 char --- tar/common/utf8.go | 19 ++++++++++--------- tar/common/utf8_test.go | 17 +++++++++++++---- 2 files changed, 23 insertions(+), 13 deletions(-) diff --git a/tar/common/utf8.go b/tar/common/utf8.go index ffb1646..568e929 100644 --- a/tar/common/utf8.go +++ b/tar/common/utf8.go @@ -2,20 +2,21 @@ package common // IsValidUtf8String checks for in valid UTF-8 characters func IsValidUtf8String(s string) bool { - for _, r := range s { - if int(r) == 0xfffd { - return false - } - } - return true + return InvalidUtf8Index([]byte(s)) == -1 } // IsValidUtf8Btyes checks for in valid UTF-8 characters func IsValidUtf8Btyes(b []byte) bool { - for _, r := range string(b) { + return InvalidUtf8Index(b) == -1 +} + +// InvalidUtf8Index returns the offset of the first invalid UTF-8 character. +// Default is to return -1 for a wholly valid sequence. +func InvalidUtf8Index(b []byte) int { + for i, r := range string(b) { if int(r) == 0xfffd { - return false + return i } } - return true + return -1 } diff --git a/tar/common/utf8_test.go b/tar/common/utf8_test.go index e546f55..3cf81df 100644 --- a/tar/common/utf8_test.go +++ b/tar/common/utf8_test.go @@ -6,27 +6,36 @@ func TestStringValidation(t *testing.T) { cases := []struct { value string result bool + offset int }{ - {"aä\uFFFD本☺", false}, - {"aä本☺", true}, + {"aä\uFFFD本☺", false, 3}, + {"aä本☺", true, -1}, } for _, c := range cases { + if i := InvalidUtf8Index([]byte(c.value)); i != c.offset { + t.Errorf("string %q - offset expected %d, got %d", c.value, c.offset, i) + } if got := IsValidUtf8String(c.value); got != c.result { t.Errorf("string %q - expected %v, got %v", c.value, c.result, got) } } } + func TestBytesValidation(t *testing.T) { cases := []struct { value []byte result bool + offset int }{ - {[]byte{0xE4}, false}, - {[]byte("aä本☺"), true}, + {[]byte{0xE4}, false, 0}, + {[]byte("aä本☺"), true, -1}, } for _, c := range cases { + if i := InvalidUtf8Index(c.value); i != c.offset { + t.Errorf("bytes %q - offset expected %d, got %d", c.value, c.offset, i) + } if got := IsValidUtf8Btyes(c.value); got != c.result { t.Errorf("bytes %q - expected %v, got %v", c.value, c.result, got) } From 032efafc29636d38ea45b9a57fe0bad7dd90d124 Mon Sep 17 00:00:00 2001 From: Vincent Batts Date: Wed, 23 Sep 2015 15:20:09 -0400 Subject: [PATCH 48/95] tar/storage: work with raw (invalid utf8) names When the entry name is not UTF-8, for example ISO-8859-1, then store the raw bytes. To accommodate this, we will have getters and setters for the entry's name now. Since this most heavily affects the json marshalling, we'll double check the sanity of the name before storing it in the JSONPacker. --- tar/storage/entry.go | 43 +++++++++++++++++++++++++++++++++++++++ tar/storage/entry_test.go | 35 ++++++++++++++++++++++++++++--- tar/storage/packer.go | 14 +++++++++++-- 3 files changed, 87 insertions(+), 5 deletions(-) diff --git a/tar/storage/entry.go b/tar/storage/entry.go index 38fe7ba..a152ac2 100644 --- a/tar/storage/entry.go +++ b/tar/storage/entry.go @@ -1,5 +1,11 @@ package storage +import ( + "fmt" + + "github.com/vbatts/tar-split/tar/common" +) + // Entries is for sorting by Position type Entries []Entry @@ -33,7 +39,44 @@ const ( type Entry struct { Type Type `json:"type"` Name string `json:"name,omitempty"` + NameRaw []byte `json:"name_raw,omitempty"` Size int64 `json:"size,omitempty"` Payload []byte `json:"payload"` // SegmentType stores payload here; FileType stores crc64 checksum here; Position int `json:"position"` } + +// SetName will check name for valid UTF-8 string, and set the appropriate +// field. See https://github.com/vbatts/tar-split/issues/17 +func (e *Entry) SetName(name string) { + if common.IsValidUtf8String(name) { + e.Name = name + } else { + e.NameRaw = []byte(name) + } +} + +// SetNameBytes will check name for valid UTF-8 string, and set the appropriate +// field +func (e *Entry) SetNameBytes(name []byte) { + if !common.IsValidUtf8Btyes(name) { + e.NameRaw = name + } else { + e.Name = string(name) + } +} + +// GetName returns the string for the entry's name, regardless of the field stored in +func (e *Entry) GetName() string { + if len(e.NameRaw) > 0 { + return fmt.Sprintf("%s", e.NameRaw) + } + return e.Name +} + +// GetNameBytes returns the bytes for the entry's name, regardless of the field stored in +func (e *Entry) GetNameBytes() []byte { + if len(e.NameRaw) > 0 { + return e.NameRaw + } + return []byte(e.Name) +} diff --git a/tar/storage/entry_test.go b/tar/storage/entry_test.go index c797bca..90d103e 100644 --- a/tar/storage/entry_test.go +++ b/tar/storage/entry_test.go @@ -39,10 +39,10 @@ func TestEntries(t *testing.T) { func TestFile(t *testing.T) { f := Entry{ Type: FileType, - Name: "./hello.txt", Size: 100, Position: 2, } + f.SetName("./hello.txt") buf, err := json.Marshal(f) if err != nil { @@ -54,8 +54,37 @@ func TestFile(t *testing.T) { t.Fatal(err) } - if f.Name != f1.Name { - t.Errorf("expected Name %q, got %q", f.Name, f1.Name) + if f.GetName() != f1.GetName() { + t.Errorf("expected Name %q, got %q", f.GetName(), f1.GetName()) + } + if f.Size != f1.Size { + t.Errorf("expected Size %q, got %q", f.Size, f1.Size) + } + if f.Position != f1.Position { + t.Errorf("expected Position %q, got %q", f.Position, f1.Position) + } +} + +func TestFileRaw(t *testing.T) { + f := Entry{ + Type: FileType, + Size: 100, + Position: 2, + } + f.SetNameBytes([]byte{0x2E, 0x2F, 0x68, 0x65, 0x6C, 0x6C, 0x6F, 0xE4, 0x2E, 0x74, 0x78, 0x74}) + + buf, err := json.Marshal(f) + if err != nil { + t.Fatal(err) + } + + f1 := Entry{} + if err = json.Unmarshal(buf, &f1); err != nil { + t.Fatal(err) + } + + if f.GetName() != f1.GetName() { + t.Errorf("expected Name %q, got %q", f.GetName(), f1.GetName()) } if f.Size != f1.Size { t.Errorf("expected Size %q, got %q", f.Size, f1.Size) diff --git a/tar/storage/packer.go b/tar/storage/packer.go index a02a19a..1ea8208 100644 --- a/tar/storage/packer.go +++ b/tar/storage/packer.go @@ -6,6 +6,8 @@ import ( "errors" "io" "path/filepath" + + "github.com/vbatts/tar-split/tar/common" ) // ErrDuplicatePath occurs when a tar archive has more than one entry for the @@ -61,7 +63,7 @@ func (jup *jsonUnpacker) Next() (*Entry, error) { // check for dup name if e.Type == FileType { - cName := filepath.Clean(e.Name) + cName := filepath.Clean(e.GetName()) if _, ok := jup.seen[cName]; ok { return nil, ErrDuplicatePath } @@ -93,9 +95,17 @@ type jsonPacker struct { type seenNames map[string]struct{} func (jp *jsonPacker) AddEntry(e Entry) (int, error) { + // if Name is not valid utf8, switch it to raw first. + if e.Name != "" { + if !common.IsValidUtf8String(e.Name) { + e.NameRaw = []byte(e.Name) + e.Name = "" + } + } + // check early for dup name if e.Type == FileType { - cName := filepath.Clean(e.Name) + cName := filepath.Clean(e.GetName()) if _, ok := jp.seen[cName]; ok { return -1, ErrDuplicatePath } From cde639172fb276d8fbc3e0bbee73791315e30f04 Mon Sep 17 00:00:00 2001 From: Vincent Batts Date: Wed, 23 Sep 2015 15:24:15 -0400 Subject: [PATCH 49/95] tar/asm: work with non-utf8 entry names --- tar/asm/assemble.go | 4 +-- tar/asm/assemble_test.go | 60 +++++++++++++++++++++++++++---- tar/asm/disassemble.go | 11 +++--- tar/asm/testdata/iso-8859.tar.gz | Bin 0 -> 187 bytes 4 files changed, 63 insertions(+), 12 deletions(-) create mode 100644 tar/asm/testdata/iso-8859.tar.gz diff --git a/tar/asm/assemble.go b/tar/asm/assemble.go index 74317cb..83d6426 100644 --- a/tar/asm/assemble.go +++ b/tar/asm/assemble.go @@ -39,7 +39,7 @@ func NewOutputTarStream(fg storage.FileGetter, up storage.Unpacker) io.ReadClose if entry.Size == 0 { continue } - fh, err := fg.Get(entry.Name) + fh, err := fg.Get(entry.GetName()) if err != nil { pw.CloseWithError(err) return @@ -56,7 +56,7 @@ func NewOutputTarStream(fg storage.FileGetter, up storage.Unpacker) io.ReadClose // but since it's coming through the PipeReader, the context of // _which_ file would be lost... fh.Close() - pw.CloseWithError(fmt.Errorf("file integrity checksum failed for %q", entry.Name)) + pw.CloseWithError(fmt.Errorf("file integrity checksum failed for %q", entry.GetName())) return } fh.Close() diff --git a/tar/asm/assemble_test.go b/tar/asm/assemble_test.go index da515f2..e7609c0 100644 --- a/tar/asm/assemble_test.go +++ b/tar/asm/assemble_test.go @@ -11,9 +11,38 @@ import ( "os" "testing" + "github.com/vbatts/tar-split/archive/tar" + "github.com/vbatts/tar-split/tar/common" "github.com/vbatts/tar-split/tar/storage" ) +func TestISO8859(t *testing.T) { + fh, err := os.Open("./testdata/iso-8859.tar.gz") + if err != nil { + t.Fatal(err) + } + defer fh.Close() + gzRdr, err := gzip.NewReader(fh) + if err != nil { + t.Fatal(err) + } + defer gzRdr.Close() + tr := tar.NewReader(gzRdr) + for { + hdr, err := tr.Next() + if err != nil { + if err != io.EOF { + t.Error(err) + } + break + } + fmt.Println(hdr.Name) + if !common.IsValidUtf8String(hdr.Name) { + fmt.Println([]byte(hdr.Name)) + } + } +} + var entries = []struct { Entry storage.Entry Body []byte @@ -36,6 +65,15 @@ var entries = []struct { }, Body: []byte("café con leche, por favor"), }, + { + Entry: storage.Entry{ + Type: storage.FileType, + NameRaw: []byte{0x66, 0x69, 0x6c, 0x65, 0x2d, 0xe4}, // this is invalid UTF-8. Just checking the round trip. + Payload: []byte{126, 72, 89, 239, 230, 252, 160, 187}, + Size: 26, + }, + Body: []byte("café con leche, por favor"), + }, } var entriesMangled = []struct { Entry storage.Entry @@ -61,6 +99,15 @@ var entriesMangled = []struct { // san not con Body: []byte("café sans leche, por favor"), }, + { + Entry: storage.Entry{ + Type: storage.FileType, + NameRaw: []byte{0x66, 0x69, 0x6c, 0x65, 0x2d, 0xe4}, + Payload: []byte{127, 72, 89, 239, 230, 252, 160, 187}, + Size: 26, + }, + Body: []byte("café con leche, por favor"), + }, } func TestTarStreamMangledGetterPutter(t *testing.T) { @@ -69,19 +116,19 @@ func TestTarStreamMangledGetterPutter(t *testing.T) { // first lets prep a GetPutter and Packer for i := range entries { if entries[i].Entry.Type == storage.FileType { - j, csum, err := fgp.Put(entries[i].Entry.Name, bytes.NewBuffer(entries[i].Body)) + j, csum, err := fgp.Put(entries[i].Entry.GetName(), bytes.NewBuffer(entries[i].Body)) if err != nil { t.Error(err) } if j != entries[i].Entry.Size { t.Errorf("size %q: expected %d; got %d", - entries[i].Entry.Name, + entries[i].Entry.GetName(), entries[i].Entry.Size, j) } if !bytes.Equal(csum, entries[i].Entry.Payload) { t.Errorf("checksum %q: expected %v; got %v", - entries[i].Entry.Name, + entries[i].Entry.GetName(), entries[i].Entry.Payload, csum) } @@ -90,7 +137,7 @@ func TestTarStreamMangledGetterPutter(t *testing.T) { for _, e := range entriesMangled { if e.Entry.Type == storage.FileType { - rdr, err := fgp.Get(e.Entry.Name) + rdr, err := fgp.Get(e.Entry.GetName()) if err != nil { t.Error(err) } @@ -105,7 +152,7 @@ func TestTarStreamMangledGetterPutter(t *testing.T) { if bytes.Equal(csum, e.Entry.Payload) { t.Errorf("wrote %d bytes. checksum for %q should not have matched! %v", i, - e.Entry.Name, + e.Entry.GetName(), csum) } } @@ -121,6 +168,7 @@ func TestTarStream(t *testing.T) { {"./testdata/t.tar.gz", "1eb237ff69bca6e22789ecb05b45d35ca307adbd", 10240}, {"./testdata/longlink.tar.gz", "d9f6babe107b7247953dff6b5b5ae31a3a880add", 20480}, {"./testdata/fatlonglink.tar.gz", "8537f03f89aeef537382f8b0bb065d93e03b0be8", 26234880}, + {"./testdata/iso-8859.tar.gz", "ddafa51cb03c74ec117ab366ee2240d13bba1ec3", 10240}, } for _, tc := range testCases { @@ -163,7 +211,7 @@ func TestTarStream(t *testing.T) { t.Fatalf("checksum of tar: expected %s; got %x", tc.expectedSHA1Sum, h0.Sum(nil)) } - t.Logf("%s", w.String()) // if we fail, then show the packed info + //t.Logf("%s", w.String()) // if we fail, then show the packed info // If we've made it this far, then we'll turn it around and create a tar // stream from the packed metadata and buffered file contents. diff --git a/tar/asm/disassemble.go b/tar/asm/disassemble.go index 7986890..54ef23a 100644 --- a/tar/asm/disassemble.go +++ b/tar/asm/disassemble.go @@ -92,13 +92,16 @@ func NewInputTarStream(r io.Reader, p storage.Packer, fp storage.FilePutter) (io } } - // File entries added, regardless of size - _, err = p.AddEntry(storage.Entry{ + entry := storage.Entry{ Type: storage.FileType, - Name: hdr.Name, Size: hdr.Size, Payload: csum, - }) + } + // For proper marshalling of non-utf8 characters + entry.SetName(hdr.Name) + + // File entries added, regardless of size + _, err = p.AddEntry(entry) if err != nil { pW.CloseWithError(err) return diff --git a/tar/asm/testdata/iso-8859.tar.gz b/tar/asm/testdata/iso-8859.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..3e87f30a45f5dbf742a51c5c8252688452aeb2d0 GIT binary patch literal 187 zcmb2|=HU3ek133aIkPxl*TTZoQm-Vjh~e!eN3KH#0uC3~*t+To%(z?)O8{fJKT(T_prp5-2ZE8zMWtG k&#}fJ?8SbI-rs7jz2?@jhmJ8IgNS~1mB~df7&I6d0D{m~djJ3c literal 0 HcmV?d00001 From 8a361ef0d867413199594d9f564d0acd1053244b Mon Sep 17 00:00:00 2001 From: Vincent Batts Date: Thu, 24 Sep 2015 09:51:58 -0400 Subject: [PATCH 50/95] tar/storage: Sprintf is unnecessary fmt.Sprintf() vs string() for this []byte conversion is too much and does not provide any further safety. https://gist.github.com/vbatts/ab17181086aed558dd3a --- tar/storage/entry.go | 8 ++------ 1 file changed, 2 insertions(+), 6 deletions(-) diff --git a/tar/storage/entry.go b/tar/storage/entry.go index a152ac2..b61758e 100644 --- a/tar/storage/entry.go +++ b/tar/storage/entry.go @@ -1,10 +1,6 @@ package storage -import ( - "fmt" - - "github.com/vbatts/tar-split/tar/common" -) +import "github.com/vbatts/tar-split/tar/common" // Entries is for sorting by Position type Entries []Entry @@ -68,7 +64,7 @@ func (e *Entry) SetNameBytes(name []byte) { // GetName returns the string for the entry's name, regardless of the field stored in func (e *Entry) GetName() string { if len(e.NameRaw) > 0 { - return fmt.Sprintf("%s", e.NameRaw) + return string(e.NameRaw) } return e.Name } From 27876e49c230ff7b95baafe59483332dce8a4e1b Mon Sep 17 00:00:00 2001 From: Alexander Morozov Date: Thu, 24 Sep 2015 12:24:31 -0700 Subject: [PATCH 51/95] Update travis to go1.4.3 Signed-off-by: Alexander Morozov --- .travis.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.travis.yml b/.travis.yml index 7b2d094..a053d3b 100644 --- a/.travis.yml +++ b/.travis.yml @@ -2,7 +2,7 @@ language: go go: - tip - 1.5.1 - - 1.4.2 + - 1.4.3 - 1.3.3 - 1.2.2 From 7e38cefd4bf1a3ee9fbd1f8ee72dafb55889a5b6 Mon Sep 17 00:00:00 2001 From: Vincent Batts Date: Fri, 25 Sep 2015 14:33:24 -0400 Subject: [PATCH 52/95] common: remove in favor of stdlib `unicode/utf8` --- tar/asm/assemble_test.go | 4 ++-- tar/common/utf8.go | 22 -------------------- tar/common/utf8_test.go | 43 ---------------------------------------- tar/storage/entry.go | 10 +++++----- tar/storage/packer.go | 5 ++--- 5 files changed, 9 insertions(+), 75 deletions(-) delete mode 100644 tar/common/utf8.go delete mode 100644 tar/common/utf8_test.go diff --git a/tar/asm/assemble_test.go b/tar/asm/assemble_test.go index e7609c0..29b7a17 100644 --- a/tar/asm/assemble_test.go +++ b/tar/asm/assemble_test.go @@ -10,9 +10,9 @@ import ( "io/ioutil" "os" "testing" + "unicode/utf8" "github.com/vbatts/tar-split/archive/tar" - "github.com/vbatts/tar-split/tar/common" "github.com/vbatts/tar-split/tar/storage" ) @@ -37,7 +37,7 @@ func TestISO8859(t *testing.T) { break } fmt.Println(hdr.Name) - if !common.IsValidUtf8String(hdr.Name) { + if !utf8.ValidString(hdr.Name) { fmt.Println([]byte(hdr.Name)) } } diff --git a/tar/common/utf8.go b/tar/common/utf8.go deleted file mode 100644 index 568e929..0000000 --- a/tar/common/utf8.go +++ /dev/null @@ -1,22 +0,0 @@ -package common - -// IsValidUtf8String checks for in valid UTF-8 characters -func IsValidUtf8String(s string) bool { - return InvalidUtf8Index([]byte(s)) == -1 -} - -// IsValidUtf8Btyes checks for in valid UTF-8 characters -func IsValidUtf8Btyes(b []byte) bool { - return InvalidUtf8Index(b) == -1 -} - -// InvalidUtf8Index returns the offset of the first invalid UTF-8 character. -// Default is to return -1 for a wholly valid sequence. -func InvalidUtf8Index(b []byte) int { - for i, r := range string(b) { - if int(r) == 0xfffd { - return i - } - } - return -1 -} diff --git a/tar/common/utf8_test.go b/tar/common/utf8_test.go deleted file mode 100644 index 3cf81df..0000000 --- a/tar/common/utf8_test.go +++ /dev/null @@ -1,43 +0,0 @@ -package common - -import "testing" - -func TestStringValidation(t *testing.T) { - cases := []struct { - value string - result bool - offset int - }{ - {"aä\uFFFD本☺", false, 3}, - {"aä本☺", true, -1}, - } - - for _, c := range cases { - if i := InvalidUtf8Index([]byte(c.value)); i != c.offset { - t.Errorf("string %q - offset expected %d, got %d", c.value, c.offset, i) - } - if got := IsValidUtf8String(c.value); got != c.result { - t.Errorf("string %q - expected %v, got %v", c.value, c.result, got) - } - } -} - -func TestBytesValidation(t *testing.T) { - cases := []struct { - value []byte - result bool - offset int - }{ - {[]byte{0xE4}, false, 0}, - {[]byte("aä本☺"), true, -1}, - } - - for _, c := range cases { - if i := InvalidUtf8Index(c.value); i != c.offset { - t.Errorf("bytes %q - offset expected %d, got %d", c.value, c.offset, i) - } - if got := IsValidUtf8Btyes(c.value); got != c.result { - t.Errorf("bytes %q - expected %v, got %v", c.value, c.result, got) - } - } -} diff --git a/tar/storage/entry.go b/tar/storage/entry.go index b61758e..c91e7ea 100644 --- a/tar/storage/entry.go +++ b/tar/storage/entry.go @@ -1,6 +1,6 @@ package storage -import "github.com/vbatts/tar-split/tar/common" +import "unicode/utf8" // Entries is for sorting by Position type Entries []Entry @@ -44,7 +44,7 @@ type Entry struct { // SetName will check name for valid UTF-8 string, and set the appropriate // field. See https://github.com/vbatts/tar-split/issues/17 func (e *Entry) SetName(name string) { - if common.IsValidUtf8String(name) { + if utf8.ValidString(name) { e.Name = name } else { e.NameRaw = []byte(name) @@ -54,10 +54,10 @@ func (e *Entry) SetName(name string) { // SetNameBytes will check name for valid UTF-8 string, and set the appropriate // field func (e *Entry) SetNameBytes(name []byte) { - if !common.IsValidUtf8Btyes(name) { - e.NameRaw = name - } else { + if utf8.Valid(name) { e.Name = string(name) + } else { + e.NameRaw = name } } diff --git a/tar/storage/packer.go b/tar/storage/packer.go index 1ea8208..0c9d99b 100644 --- a/tar/storage/packer.go +++ b/tar/storage/packer.go @@ -6,8 +6,7 @@ import ( "errors" "io" "path/filepath" - - "github.com/vbatts/tar-split/tar/common" + "unicode/utf8" ) // ErrDuplicatePath occurs when a tar archive has more than one entry for the @@ -97,7 +96,7 @@ type seenNames map[string]struct{} func (jp *jsonPacker) AddEntry(e Entry) (int, error) { // if Name is not valid utf8, switch it to raw first. if e.Name != "" { - if !common.IsValidUtf8String(e.Name) { + if !utf8.ValidString(e.Name) { e.NameRaw = []byte(e.Name) e.Name = "" } From 10250c25e0cb4b64f89280d0dde72feff25ef7ab Mon Sep 17 00:00:00 2001 From: Vincent Batts Date: Fri, 25 Sep 2015 14:35:12 -0400 Subject: [PATCH 53/95] tar/asm: remove useless test The iso-8859-1 archive is already tested round trip, and this test did not do anything really. --- tar/asm/assemble_test.go | 29 ----------------------------- 1 file changed, 29 deletions(-) diff --git a/tar/asm/assemble_test.go b/tar/asm/assemble_test.go index 29b7a17..3d0c99c 100644 --- a/tar/asm/assemble_test.go +++ b/tar/asm/assemble_test.go @@ -10,39 +10,10 @@ import ( "io/ioutil" "os" "testing" - "unicode/utf8" - "github.com/vbatts/tar-split/archive/tar" "github.com/vbatts/tar-split/tar/storage" ) -func TestISO8859(t *testing.T) { - fh, err := os.Open("./testdata/iso-8859.tar.gz") - if err != nil { - t.Fatal(err) - } - defer fh.Close() - gzRdr, err := gzip.NewReader(fh) - if err != nil { - t.Fatal(err) - } - defer gzRdr.Close() - tr := tar.NewReader(gzRdr) - for { - hdr, err := tr.Next() - if err != nil { - if err != io.EOF { - t.Error(err) - } - break - } - fmt.Println(hdr.Name) - if !utf8.ValidString(hdr.Name) { - fmt.Println([]byte(hdr.Name)) - } - } -} - var entries = []struct { Entry storage.Entry Body []byte From 7ea74e1c31d45d604073ed3a4a3d1ca8e7692a83 Mon Sep 17 00:00:00 2001 From: Vincent Batts Date: Fri, 16 Oct 2015 16:41:09 -0400 Subject: [PATCH 54/95] demo: basic command Signed-off-by: Vincent Batts --- README.md | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/README.md b/README.md index b89afe5..260ff84 100644 --- a/README.md +++ b/README.md @@ -25,6 +25,14 @@ go get github.com/vbatts/tar-split/cmd/tar-split For cli usage, see its [README.md](cmd/tar-split/README.md). For the library see the [docs](#docs) +## Demo + +### Basic disassembly and assembly + +![basic cmd demo thumbnail](https://i.ytimg.com/vi/vh5wyjIOBtc/2.jpg?time=1445027151805) +[youtube video of basic command demo](https://youtu.be/vh5wyjIOBtc) + + ## Caveat Eventually this should detect TARs that this is not possible with. From bece0c70095443be20deb1c7db2643ff25996044 Mon Sep 17 00:00:00 2001 From: Vincent Batts Date: Fri, 16 Oct 2015 17:05:18 -0400 Subject: [PATCH 55/95] demo: docker layer checksums Signed-off-by: Vincent Batts --- README.md | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/README.md b/README.md index 260ff84..90a8edf 100644 --- a/README.md +++ b/README.md @@ -29,9 +29,18 @@ For the library see the [docs](#docs) ### Basic disassembly and assembly +This demonstrates the `tar-split` command and how to assemble a tar archive from the `tar-data.json.gz` + + ![basic cmd demo thumbnail](https://i.ytimg.com/vi/vh5wyjIOBtc/2.jpg?time=1445027151805) [youtube video of basic command demo](https://youtu.be/vh5wyjIOBtc) +### Docker layer preservation + +This demonstrates the tar-split integration for docker-1.8. Providing consistent tar archives for the image layer content. + +![docker tar-split demo](https://www.youtube.com/upload_thumbnail?v=tV_Dia8E8xw&t=2&ts=1445028436275) +[youtube vide of docker layer checksums](https://youtu.be/tV_Dia8E8xw) ## Caveat From 8b20f9161d2cd89438fd90a228464d545647a237 Mon Sep 17 00:00:00 2001 From: Tonis Tiigi Date: Mon, 30 Nov 2015 09:52:44 -0800 Subject: [PATCH 56/95] Optimize JSON decoding This allows to avoid extra allocations on `ReadBytes` and decoding buffers. Signed-off-by: Tonis Tiigi --- tar/storage/packer.go | 28 +++++----------------------- 1 file changed, 5 insertions(+), 23 deletions(-) diff --git a/tar/storage/packer.go b/tar/storage/packer.go index 0c9d99b..aba6948 100644 --- a/tar/storage/packer.go +++ b/tar/storage/packer.go @@ -1,7 +1,6 @@ package storage import ( - "bufio" "encoding/json" "errors" "io" @@ -33,31 +32,15 @@ type PackUnpacker interface { */ type jsonUnpacker struct { - r io.Reader - b *bufio.Reader - isEOF bool - seen seenNames + seen seenNames + dec *json.Decoder } func (jup *jsonUnpacker) Next() (*Entry, error) { var e Entry - if jup.isEOF { - // since ReadBytes() will return read bytes AND an EOF, we handle it this - // round-a-bout way so we can Unmarshal the tail with relevant errors, but - // still get an io.EOF when the stream is ended. - return nil, io.EOF - } - line, err := jup.b.ReadBytes('\n') - if err != nil && err != io.EOF { + err := jup.dec.Decode(&e) + if err != nil { return nil, err - } else if err == io.EOF { - jup.isEOF = true - } - - err = json.Unmarshal(line, &e) - if err != nil && jup.isEOF { - // if the remainder actually _wasn't_ a remaining json structure, then just EOF - return nil, io.EOF } // check for dup name @@ -78,8 +61,7 @@ func (jup *jsonUnpacker) Next() (*Entry, error) { // Each Entry read are expected to be delimited by new line. func NewJSONUnpacker(r io.Reader) Unpacker { return &jsonUnpacker{ - r: r, - b: bufio.NewReader(r), + dec: json.NewDecoder(r), seen: seenNames{}, } } From d80c6b3bb1ab559917e144804ad682bf5cdb82d9 Mon Sep 17 00:00:00 2001 From: Vincent Batts Date: Tue, 1 Dec 2015 15:26:30 -0500 Subject: [PATCH 57/95] travis: drop go1.2 seems overly reasonable to support go1.3 and greater. :-) Signed-off-by: Vincent Batts --- .travis.yml | 1 - 1 file changed, 1 deletion(-) diff --git a/.travis.yml b/.travis.yml index a053d3b..c0a17c7 100644 --- a/.travis.yml +++ b/.travis.yml @@ -4,7 +4,6 @@ go: - 1.5.1 - 1.4.3 - 1.3.3 - - 1.2.2 # let us have pretty, fast Docker-based Travis workers! sudo: false From 11281e8c0930c0ed1d8829bfa005ac96d38386aa Mon Sep 17 00:00:00 2001 From: Vincent Batts Date: Tue, 1 Dec 2015 15:06:57 -0500 Subject: [PATCH 58/95] tar/storage: adding Getter Putter benchmark Signed-off-by: Vincent Batts --- tar/storage/packer_test.go | 57 +++++++++++++++++++++++++++++++++++++- 1 file changed, 56 insertions(+), 1 deletion(-) diff --git a/tar/storage/packer_test.go b/tar/storage/packer_test.go index 1c6101f..7d93371 100644 --- a/tar/storage/packer_test.go +++ b/tar/storage/packer_test.go @@ -4,6 +4,8 @@ import ( "bytes" "compress/gzip" "io" + "io/ioutil" + "os" "testing" ) @@ -159,5 +161,58 @@ func TestGzip(t *testing.T) { if len(entries) != len(e) { t.Errorf("expected %d entries, got %d", len(e), len(entries)) } - +} + +func BenchmarkGetPut(b *testing.B) { + e := []Entry{ + Entry{ + Type: SegmentType, + Payload: []byte("how"), + }, + Entry{ + Type: SegmentType, + Payload: []byte("y'all"), + }, + Entry{ + Type: FileType, + Name: "./hurr.txt", + Payload: []byte("deadbeef"), + }, + Entry{ + Type: SegmentType, + Payload: []byte("doin"), + }, + } + b.RunParallel(func(pb *testing.PB) { + for pb.Next() { + func() { + fh, err := ioutil.TempFile("", "tar-split.") + if err != nil { + b.Fatal(err) + } + defer os.Remove(fh.Name()) + defer fh.Close() + + jp := NewJSONPacker(fh) + for i := range e { + if _, err := jp.AddEntry(e[i]); err != nil { + b.Fatal(err) + } + } + fh.Sync() + + up := NewJSONUnpacker(fh) + for { + _, err := up.Next() + if err != nil { + if err == io.EOF { + break + } + b.Fatal(err) + } + } + + }() + } + }) } From 23b6435e6bb902fe67a20272fead5d73269373ab Mon Sep 17 00:00:00 2001 From: Tonis Tiigi Date: Mon, 30 Nov 2015 09:57:07 -0800 Subject: [PATCH 59/95] Optimize tar stream generation - New writeTo method allows to avoid creating extra pipe. - Copy with a pooled buffer instead of allocating new buffer for each file. - Avoid extra object allocations inside the loop. Signed-off-by: Tonis Tiigi --- tar/asm/assemble.go | 139 ++++++++++++++++++++++++++++++++------------ 1 file changed, 101 insertions(+), 38 deletions(-) diff --git a/tar/asm/assemble.go b/tar/asm/assemble.go index 83d6426..d624450 100644 --- a/tar/asm/assemble.go +++ b/tar/asm/assemble.go @@ -3,8 +3,10 @@ package asm import ( "bytes" "fmt" + "hash" "hash/crc64" "io" + "sync" "github.com/vbatts/tar-split/tar/storage" ) @@ -23,45 +25,106 @@ func NewOutputTarStream(fg storage.FileGetter, up storage.Unpacker) io.ReadClose } pr, pw := io.Pipe() go func() { - for { - entry, err := up.Next() - if err != nil { - pw.CloseWithError(err) - return - } - switch entry.Type { - case storage.SegmentType: - if _, err := pw.Write(entry.Payload); err != nil { - pw.CloseWithError(err) - return - } - case storage.FileType: - if entry.Size == 0 { - continue - } - fh, err := fg.Get(entry.GetName()) - if err != nil { - pw.CloseWithError(err) - return - } - c := crc64.New(storage.CRCTable) - tRdr := io.TeeReader(fh, c) - if _, err := io.Copy(pw, tRdr); err != nil { - fh.Close() - pw.CloseWithError(err) - return - } - if !bytes.Equal(c.Sum(nil), entry.Payload) { - // I would rather this be a comparable ErrInvalidChecksum or such, - // but since it's coming through the PipeReader, the context of - // _which_ file would be lost... - fh.Close() - pw.CloseWithError(fmt.Errorf("file integrity checksum failed for %q", entry.GetName())) - return - } - fh.Close() - } + err := WriteOutputTarStream(fg, up, pw) + if err != nil { + pw.CloseWithError(err) + } else { + pw.Close() } }() return pr } + +// WriteOutputTarStream writes assembled tar archive to a writer. +func WriteOutputTarStream(fg storage.FileGetter, up storage.Unpacker, w io.Writer) error { + // ... Since these are interfaces, this is possible, so let's not have a nil pointer + if fg == nil || up == nil { + return nil + } + var copyBuffer []byte + var crcHash hash.Hash + var crcSum []byte + var multiWriter io.Writer + for { + entry, err := up.Next() + if err != nil { + if err == io.EOF { + return nil + } + return err + } + switch entry.Type { + case storage.SegmentType: + if _, err := w.Write(entry.Payload); err != nil { + return err + } + case storage.FileType: + if entry.Size == 0 { + continue + } + fh, err := fg.Get(entry.GetName()) + if err != nil { + return err + } + if crcHash == nil { + crcHash = crc64.New(storage.CRCTable) + crcSum = make([]byte, 8) + multiWriter = io.MultiWriter(w, crcHash) + copyBuffer = byteBufferPool.Get().([]byte) + defer byteBufferPool.Put(copyBuffer) + } else { + crcHash.Reset() + } + + if _, err := copyWithBuffer(multiWriter, fh, copyBuffer); err != nil { + fh.Close() + return err + } + + if !bytes.Equal(crcHash.Sum(crcSum[:0]), entry.Payload) { + // I would rather this be a comparable ErrInvalidChecksum or such, + // but since it's coming through the PipeReader, the context of + // _which_ file would be lost... + fh.Close() + return fmt.Errorf("file integrity checksum failed for %q", entry.GetName()) + } + fh.Close() + } + } +} + +var byteBufferPool = &sync.Pool{ + New: func() interface{} { + return make([]byte, 32*1024) + }, +} + +// copyWithBuffer is taken from stdlib io.Copy implementation +// https://github.com/golang/go/blob/go1.5.1/src/io/io.go#L367 +func copyWithBuffer(dst io.Writer, src io.Reader, buf []byte) (written int64, err error) { + for { + nr, er := src.Read(buf) + if nr > 0 { + nw, ew := dst.Write(buf[0:nr]) + if nw > 0 { + written += int64(nw) + } + if ew != nil { + err = ew + break + } + if nr != nw { + err = io.ErrShortWrite + break + } + } + if er == io.EOF { + break + } + if er != nil { + err = er + break + } + } + return written, err +} From 2efe34695acfa872b8c5ba17ab958de0ef9cfdb3 Mon Sep 17 00:00:00 2001 From: Vincent Batts Date: Wed, 2 Dec 2015 12:56:52 -0500 Subject: [PATCH 60/95] tar/asm: remove unneeded Tee Signed-off-by: Vincent Batts --- tar/asm/assemble_test.go | 6 +----- 1 file changed, 1 insertion(+), 5 deletions(-) diff --git a/tar/asm/assemble_test.go b/tar/asm/assemble_test.go index 3d0c99c..cb16eed 100644 --- a/tar/asm/assemble_test.go +++ b/tar/asm/assemble_test.go @@ -7,7 +7,6 @@ import ( "fmt" "hash/crc64" "io" - "io/ioutil" "os" "testing" @@ -167,10 +166,7 @@ func TestTarStream(t *testing.T) { // get a sum of the stream after it has passed through to ensure it's the same. h0 := sha1.New() - tRdr0 := io.TeeReader(tarStream, h0) - - // read it all to the bit bucket - i, err := io.Copy(ioutil.Discard, tRdr0) + i, err := io.Copy(h0, tarStream) if err != nil { t.Fatal(err) } From 19b7e22058e0b57f031f3021bbdf0aa1881e099b Mon Sep 17 00:00:00 2001 From: Vincent Batts Date: Wed, 2 Dec 2015 14:36:02 -0500 Subject: [PATCH 61/95] tar/asm: basic benchmark on disasm/asm of testdata ``` PASS BenchmarkAsm-4 5 238968475 ns/op 66841059 B/op 2449 allocs/op ok _/home/vbatts/src/vb/tar-split/tar/asm 2.267s ``` Signed-off-by: Vincent Batts --- tar/asm/assemble_test.go | 71 ++++++++++++++++++++++++++++++++++------ 1 file changed, 61 insertions(+), 10 deletions(-) diff --git a/tar/asm/assemble_test.go b/tar/asm/assemble_test.go index cb16eed..c0c7f17 100644 --- a/tar/asm/assemble_test.go +++ b/tar/asm/assemble_test.go @@ -7,6 +7,7 @@ import ( "fmt" "hash/crc64" "io" + "io/ioutil" "os" "testing" @@ -129,17 +130,18 @@ func TestTarStreamMangledGetterPutter(t *testing.T) { } } +var testCases = []struct { + path string + expectedSHA1Sum string + expectedSize int64 +}{ + {"./testdata/t.tar.gz", "1eb237ff69bca6e22789ecb05b45d35ca307adbd", 10240}, + {"./testdata/longlink.tar.gz", "d9f6babe107b7247953dff6b5b5ae31a3a880add", 20480}, + {"./testdata/fatlonglink.tar.gz", "8537f03f89aeef537382f8b0bb065d93e03b0be8", 26234880}, + {"./testdata/iso-8859.tar.gz", "ddafa51cb03c74ec117ab366ee2240d13bba1ec3", 10240}, +} + func TestTarStream(t *testing.T) { - testCases := []struct { - path string - expectedSHA1Sum string - expectedSize int64 - }{ - {"./testdata/t.tar.gz", "1eb237ff69bca6e22789ecb05b45d35ca307adbd", 10240}, - {"./testdata/longlink.tar.gz", "d9f6babe107b7247953dff6b5b5ae31a3a880add", 20480}, - {"./testdata/fatlonglink.tar.gz", "8537f03f89aeef537382f8b0bb065d93e03b0be8", 26234880}, - {"./testdata/iso-8859.tar.gz", "ddafa51cb03c74ec117ab366ee2240d13bba1ec3", 10240}, - } for _, tc := range testCases { fh, err := os.Open(tc.path) @@ -201,3 +203,52 @@ func TestTarStream(t *testing.T) { } } } + +func BenchmarkAsm(b *testing.B) { + for i := 0; i < b.N; i++ { + for _, tc := range testCases { + func() { + fh, err := os.Open(tc.path) + if err != nil { + b.Fatal(err) + } + defer fh.Close() + gzRdr, err := gzip.NewReader(fh) + if err != nil { + b.Fatal(err) + } + defer gzRdr.Close() + + // Setup where we'll store the metadata + w := bytes.NewBuffer([]byte{}) + sp := storage.NewJSONPacker(w) + fgp := storage.NewBufferFileGetPutter() + + // wrap the disassembly stream + tarStream, err := NewInputTarStream(gzRdr, sp, fgp) + if err != nil { + b.Fatal(err) + } + // read it all to the bit bucket + i1, err := io.Copy(ioutil.Discard, tarStream) + if err != nil { + b.Fatal(err) + } + + r := bytes.NewBuffer(w.Bytes()) + sup := storage.NewJSONUnpacker(r) + // and reuse the fgp that we Put the payloads to. + + rc := NewOutputTarStream(fgp, sup) + + i2, err := io.Copy(ioutil.Discard, rc) + if err != nil { + b.Fatal(err) + } + if i1 != i2 { + b.Errorf("%s: input(%d) and ouput(%d) byte count didn't match", tc.path, i1, i2) + } + }() + } + } +} From d50e5c9283da469398d84078519de569f617be6f Mon Sep 17 00:00:00 2001 From: Vincent Batts Date: Thu, 3 Dec 2015 15:45:57 -0500 Subject: [PATCH 62/95] LICENSE: update LICENSE to BSD 3-clause Signed-off-by: Vincent Batts --- LICENSE | 39 ++++++++++++++++++++++++--------------- 1 file changed, 24 insertions(+), 15 deletions(-) diff --git a/LICENSE b/LICENSE index 8ba5491..ca03685 100644 --- a/LICENSE +++ b/LICENSE @@ -1,19 +1,28 @@ Copyright (c) 2015 Vincent Batts, Raleigh, NC, USA -Permission is hereby granted, free of charge, to any person obtaining a copy -of this software and associated documentation files (the "Software"), to deal -in the Software without restriction, including without limitation the rights -to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -copies of the Software, and to permit persons to whom the Software is -furnished to do so, subject to the following conditions: +All rights reserved. -The above copyright notice and this permission notice shall be included in -all copies or substantial portions of the Software. +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are met: -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN -THE SOFTWARE. +1. Redistributions of source code must retain the above copyright notice, this +list of conditions and the following disclaimer. + +2. Redistributions in binary form must reproduce the above copyright notice, +this list of conditions and the following disclaimer in the documentation +and/or other materials provided with the distribution. + +3. Neither the name of the copyright holder nor the names of its contributors +may be used to endorse or promote products derived from this software without +specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND +ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED +WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE +DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE +FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR +SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, +OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. From b87f81631a2b1cb185737b5bea76a7e9e8c29723 Mon Sep 17 00:00:00 2001 From: Vincent Batts Date: Sun, 31 Jan 2016 01:39:10 -0500 Subject: [PATCH 63/95] version: mark 0.9.12 --- version/version.go | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/version/version.go b/version/version.go index 0b86fbf..c41a8f2 100644 --- a/version/version.go +++ b/version/version.go @@ -1,7 +1,7 @@ package version // AUTO-GENEREATED. DO NOT EDIT -// 2015-08-14 09:56:50.742727493 -0400 EDT +// 2016-01-31 01:39:06.012784413 -0500 EST // VERSION is the generated version from /home/vbatts/src/vb/tar-split/version -var VERSION = "v0.9.6-1-gc76e420" +var VERSION = "v0.9.12" \ No newline at end of file From 440ba9e519d0481f35a916c60be51d3f58f1a6a1 Mon Sep 17 00:00:00 2001 From: Joe Tsai Date: Thu, 17 Sep 2015 16:07:38 -0700 Subject: [PATCH 64/95] archive/tar: remove dead code with USTAR path splitting Convert splitUSTARPath to return a bool rather than an error since the caller never ever uses the error other than to check if it is nil. Thus, we can remove errNameTooLong as well. Also, fold the checking of the length <= fileNameSize and whether the string is ASCII into the split function itself. Lastly, remove logic to set the MAGIC since that's already done on L200. Thus, setting the magic is redundant. There is no overall logic change. Updates #12638 Change-Id: I26b6992578199abad723c2a2af7f4fc078af9c17 Reviewed-on: https://go-review.googlesource.com/14723 Reviewed-by: David Symonds Run-TryBot: David Symonds --- archive/tar/writer.go | 52 +++++++++++++------------------------- archive/tar/writer_test.go | 34 +++++++++++++++++++++++++ 2 files changed, 52 insertions(+), 34 deletions(-) diff --git a/archive/tar/writer.go b/archive/tar/writer.go index 9dbc01a..3547c17 100644 --- a/archive/tar/writer.go +++ b/archive/tar/writer.go @@ -23,7 +23,6 @@ var ( ErrWriteTooLong = errors.New("archive/tar: write too long") ErrFieldTooLong = errors.New("archive/tar: header field too long") ErrWriteAfterClose = errors.New("archive/tar: write after close") - errNameTooLong = errors.New("archive/tar: name too long") errInvalidHeader = errors.New("archive/tar: header field too long or contains invalid values") ) @@ -215,26 +214,14 @@ func (tw *Writer) writeHeader(hdr *Header, allowPax bool) error { _, paxPathUsed := paxHeaders[paxPath] // try to use a ustar header when only the name is too long if !tw.preferPax && len(paxHeaders) == 1 && paxPathUsed { - suffix := hdr.Name - prefix := "" - if len(hdr.Name) > fileNameSize && isASCII(hdr.Name) { - var err error - prefix, suffix, err = tw.splitUSTARLongName(hdr.Name) - if err == nil { - // ok we can use a ustar long name instead of pax, now correct the fields + prefix, suffix, ok := splitUSTARPath(hdr.Name) + if ok { + // Since we can encode in USTAR format, disable PAX header. + delete(paxHeaders, paxPath) - // remove the path field from the pax header. this will suppress the pax header - delete(paxHeaders, paxPath) - - // update the path fields - tw.cString(pathHeaderBytes, suffix, false, paxNone, nil) - tw.cString(prefixHeaderBytes, prefix, false, paxNone, nil) - - // Use the ustar magic if we used ustar long names. - if len(prefix) > 0 && !tw.usedBinary { - copy(header[257:265], []byte("ustar\x00")) - } - } + // Update the path fields + tw.cString(pathHeaderBytes, suffix, false, paxNone, nil) + tw.cString(prefixHeaderBytes, prefix, false, paxNone, nil) } } @@ -270,28 +257,25 @@ func (tw *Writer) writeHeader(hdr *Header, allowPax bool) error { return tw.err } -// writeUSTARLongName splits a USTAR long name hdr.Name. -// name must be < 256 characters. errNameTooLong is returned -// if hdr.Name can't be split. The splitting heuristic -// is compatible with gnu tar. -func (tw *Writer) splitUSTARLongName(name string) (prefix, suffix string, err error) { +// splitUSTARPath splits a path according to USTAR prefix and suffix rules. +// If the path is not splittable, then it will return ("", "", false). +func splitUSTARPath(name string) (prefix, suffix string, ok bool) { length := len(name) - if length > fileNamePrefixSize+1 { + if length <= fileNameSize || !isASCII(name) { + return "", "", false + } else if length > fileNamePrefixSize+1 { length = fileNamePrefixSize + 1 } else if name[length-1] == '/' { length-- } + i := strings.LastIndex(name[:length], "/") - // nlen contains the resulting length in the name field. - // plen contains the resulting length in the prefix field. - nlen := len(name) - i - 1 - plen := i + nlen := len(name) - i - 1 // nlen is length of suffix + plen := i // plen is length of prefix if i <= 0 || nlen > fileNameSize || nlen == 0 || plen > fileNamePrefixSize { - err = errNameTooLong - return + return "", "", false } - prefix, suffix = name[:i], name[i+1:] - return + return name[:i], name[i+1:], true } // writePaxHeader writes an extended pax header to the diff --git a/archive/tar/writer_test.go b/archive/tar/writer_test.go index fe46a67..caf40a8 100644 --- a/archive/tar/writer_test.go +++ b/archive/tar/writer_test.go @@ -544,3 +544,37 @@ func TestWriteAfterClose(t *testing.T) { t.Fatalf("Write: got %v; want ErrWriteAfterClose", err) } } + +func TestSplitUSTARPath(t *testing.T) { + var sr = strings.Repeat + + var vectors = []struct { + input string // Input path + prefix string // Expected output prefix + suffix string // Expected output suffix + ok bool // Split success? + }{ + {"", "", "", false}, + {"abc", "", "", false}, + {"用戶名", "", "", false}, + {sr("a", fileNameSize), "", "", false}, + {sr("a", fileNameSize) + "/", "", "", false}, + {sr("a", fileNameSize) + "/a", sr("a", fileNameSize), "a", true}, + {sr("a", fileNamePrefixSize) + "/", "", "", false}, + {sr("a", fileNamePrefixSize) + "/a", sr("a", fileNamePrefixSize), "a", true}, + {sr("a", fileNameSize+1), "", "", false}, + {sr("/", fileNameSize+1), sr("/", fileNameSize-1), "/", true}, + {sr("a", fileNamePrefixSize) + "/" + sr("b", fileNameSize), + sr("a", fileNamePrefixSize), sr("b", fileNameSize), true}, + {sr("a", fileNamePrefixSize) + "//" + sr("b", fileNameSize), "", "", false}, + {sr("a/", fileNameSize), sr("a/", 77) + "a", sr("a/", 22), true}, + } + + for _, v := range vectors { + prefix, suffix, ok := splitUSTARPath(v.input) + if prefix != v.prefix || suffix != v.suffix || ok != v.ok { + t.Errorf("splitUSTARPath(%q):\ngot (%q, %q, %v)\nwant (%q, %q, %v)", + v.input, prefix, suffix, ok, v.prefix, v.suffix, v.ok) + } + } +} From af15385a0daa2a76ac99546a89e1dc38ec289b8f Mon Sep 17 00:00:00 2001 From: Joe Tsai Date: Mon, 28 Sep 2015 16:38:16 -0700 Subject: [PATCH 65/95] archive/tar: fix bugs with sparseFileReader The sparseFileReader is prone to two different forms of denial-of-service attacks: * A malicious tar file can cause an infinite loop * A malicious tar file can cause arbitrary panics This results because of poor error checking/handling, which this CL fixes. While we are at it, add a plethora of unit tests to test for possible malicious inputs. Change-Id: I2f9446539d189f3c1738a1608b0ad4859c1be929 Reviewed-on: https://go-review.googlesource.com/15115 Reviewed-by: Andrew Gerrand Run-TryBot: Andrew Gerrand TryBot-Result: Gobot Gobot --- archive/tar/reader.go | 149 +++++++++++++++-------- archive/tar/reader_test.go | 236 ++++++++++++++++++++++++------------- 2 files changed, 258 insertions(+), 127 deletions(-) diff --git a/archive/tar/reader.go b/archive/tar/reader.go index 4168ea2..1f57508 100644 --- a/archive/tar/reader.go +++ b/archive/tar/reader.go @@ -12,6 +12,7 @@ import ( "errors" "io" "io/ioutil" + "math" "os" "strconv" "strings" @@ -70,12 +71,36 @@ type regFileReader struct { nb int64 // number of unread bytes for current file entry } -// A sparseFileReader is a numBytesReader for reading sparse file data from a tar archive. +// A sparseFileReader is a numBytesReader for reading sparse file data from a +// tar archive. type sparseFileReader struct { - rfr *regFileReader // reads the sparse-encoded file data - sp []sparseEntry // the sparse map for the file - pos int64 // keeps track of file position - tot int64 // total size of the file + rfr numBytesReader // Reads the sparse-encoded file data + sp []sparseEntry // The sparse map for the file + pos int64 // Keeps track of file position + total int64 // Total size of the file +} + +// A sparseEntry holds a single entry in a sparse file's sparse map. +// +// Sparse files are represented using a series of sparseEntrys. +// Despite the name, a sparseEntry represents an actual data fragment that +// references data found in the underlying archive stream. All regions not +// covered by a sparseEntry are logically filled with zeros. +// +// For example, if the underlying raw file contains the 10-byte data: +// var compactData = "abcdefgh" +// +// And the sparse map has the following entries: +// var sp = []sparseEntry{ +// {offset: 2, numBytes: 5} // Data fragment for [2..7] +// {offset: 18, numBytes: 3} // Data fragment for [18..21] +// } +// +// Then the content of the resulting sparse file with a "real" size of 25 is: +// var sparseData = "\x00"*2 + "abcde" + "\x00"*11 + "fgh" + "\x00"*4 +type sparseEntry struct { + offset int64 // Starting position of the fragment + numBytes int64 // Length of the fragment } // Keywords for GNU sparse files in a PAX extended header @@ -156,7 +181,10 @@ func (tr *Reader) Next() (*Header, error) { if sp != nil { // Current file is a PAX format GNU sparse file. // Set the current file reader to a sparse file reader. - tr.curr = &sparseFileReader{rfr: tr.curr.(*regFileReader), sp: sp, tot: hdr.Size} + tr.curr, tr.err = newSparseFileReader(tr.curr, sp, hdr.Size) + if tr.err != nil { + return nil, tr.err + } } return hdr, nil case TypeGNULongName: @@ -631,21 +659,17 @@ func (tr *Reader) readHeader() *Header { if tr.err != nil { return nil } + // Current file is a GNU sparse file. Update the current file reader. - tr.curr = &sparseFileReader{rfr: tr.curr.(*regFileReader), sp: sp, tot: hdr.Size} + tr.curr, tr.err = newSparseFileReader(tr.curr, sp, hdr.Size) + if tr.err != nil { + return nil + } } return hdr } -// A sparseEntry holds a single entry in a sparse file's sparse map. -// A sparse entry indicates the offset and size in a sparse file of a -// block of data. -type sparseEntry struct { - offset int64 - numBytes int64 -} - // readOldGNUSparseMap reads the sparse map as stored in the old GNU sparse format. // The sparse map is stored in the tar header if it's small enough. If it's larger than four entries, // then one or more extension headers are used to store the rest of the sparse map. @@ -879,9 +903,33 @@ func (rfr *regFileReader) numBytes() int64 { return rfr.nb } -// readHole reads a sparse file hole ending at offset toOffset -func (sfr *sparseFileReader) readHole(b []byte, toOffset int64) int { - n64 := toOffset - sfr.pos +// newSparseFileReader creates a new sparseFileReader, but validates all of the +// sparse entries before doing so. +func newSparseFileReader(rfr numBytesReader, sp []sparseEntry, total int64) (*sparseFileReader, error) { + if total < 0 { + return nil, ErrHeader // Total size cannot be negative + } + + // Validate all sparse entries. These are the same checks as performed by + // the BSD tar utility. + for i, s := range sp { + switch { + case s.offset < 0 || s.numBytes < 0: + return nil, ErrHeader // Negative values are never okay + case s.offset > math.MaxInt64-s.numBytes: + return nil, ErrHeader // Integer overflow with large length + case s.offset+s.numBytes > total: + return nil, ErrHeader // Region extends beyond the "real" size + case i > 0 && sp[i-1].offset+sp[i-1].numBytes > s.offset: + return nil, ErrHeader // Regions can't overlap and must be in order + } + } + return &sparseFileReader{rfr: rfr, sp: sp, total: total}, nil +} + +// readHole reads a sparse hole ending at endOffset. +func (sfr *sparseFileReader) readHole(b []byte, endOffset int64) int { + n64 := endOffset - sfr.pos if n64 > int64(len(b)) { n64 = int64(len(b)) } @@ -895,49 +943,54 @@ func (sfr *sparseFileReader) readHole(b []byte, toOffset int64) int { // Read reads the sparse file data in expanded form. func (sfr *sparseFileReader) Read(b []byte) (n int, err error) { - if len(sfr.sp) == 0 { - // No more data fragments to read from. - if sfr.pos < sfr.tot { - // We're in the last hole - n = sfr.readHole(b, sfr.tot) - return - } - // Otherwise, we're at the end of the file - return 0, io.EOF - } - if sfr.tot < sfr.sp[0].offset { - return 0, io.ErrUnexpectedEOF - } - if sfr.pos < sfr.sp[0].offset { - // We're in a hole - n = sfr.readHole(b, sfr.sp[0].offset) - return + // Skip past all empty fragments. + for len(sfr.sp) > 0 && sfr.sp[0].numBytes == 0 { + sfr.sp = sfr.sp[1:] } - // We're not in a hole, so we'll read from the next data fragment - posInFragment := sfr.pos - sfr.sp[0].offset - bytesLeft := sfr.sp[0].numBytes - posInFragment + // If there are no more fragments, then it is possible that there + // is one last sparse hole. + if len(sfr.sp) == 0 { + // This behavior matches the BSD tar utility. + // However, GNU tar stops returning data even if sfr.total is unmet. + if sfr.pos < sfr.total { + return sfr.readHole(b, sfr.total), nil + } + return 0, io.EOF + } + + // In front of a data fragment, so read a hole. + if sfr.pos < sfr.sp[0].offset { + return sfr.readHole(b, sfr.sp[0].offset), nil + } + + // In a data fragment, so read from it. + // This math is overflow free since we verify that offset and numBytes can + // be safely added when creating the sparseFileReader. + endPos := sfr.sp[0].offset + sfr.sp[0].numBytes // End offset of fragment + bytesLeft := endPos - sfr.pos // Bytes left in fragment if int64(len(b)) > bytesLeft { - b = b[0:bytesLeft] + b = b[:bytesLeft] } n, err = sfr.rfr.Read(b) sfr.pos += int64(n) - - if int64(n) == bytesLeft { - // We're done with this fragment - sfr.sp = sfr.sp[1:] + if err == io.EOF { + if sfr.pos < endPos { + err = io.ErrUnexpectedEOF // There was supposed to be more data + } else if sfr.pos < sfr.total { + err = nil // There is still an implicit sparse hole at the end + } } - if err == io.EOF && sfr.pos < sfr.tot { - // We reached the end of the last fragment's data, but there's a final hole - err = nil + if sfr.pos == endPos { + sfr.sp = sfr.sp[1:] // We are done with this fragment, so pop it } - return + return n, err } // numBytes returns the number of bytes left to read in the sparse file's // sparse-encoded data in the tar archive. func (sfr *sparseFileReader) numBytes() int64 { - return sfr.rfr.nb + return sfr.rfr.numBytes() } diff --git a/archive/tar/reader_test.go b/archive/tar/reader_test.go index da01f26..bca0c05 100644 --- a/archive/tar/reader_test.go +++ b/archive/tar/reader_test.go @@ -10,6 +10,7 @@ import ( "fmt" "io" "io/ioutil" + "math" "os" "reflect" "strings" @@ -560,80 +561,155 @@ func TestSparseEndToEnd(t *testing.T) { } } -type sparseFileReadTest struct { - sparseData []byte - sparseMap []sparseEntry - realSize int64 - expected []byte -} - -var sparseFileReadTests = []sparseFileReadTest{ - { - sparseData: []byte("abcde"), - sparseMap: []sparseEntry{ - {offset: 0, numBytes: 2}, - {offset: 5, numBytes: 3}, - }, - realSize: 8, - expected: []byte("ab\x00\x00\x00cde"), - }, - { - sparseData: []byte("abcde"), - sparseMap: []sparseEntry{ - {offset: 0, numBytes: 2}, - {offset: 5, numBytes: 3}, - }, - realSize: 10, - expected: []byte("ab\x00\x00\x00cde\x00\x00"), - }, - { - sparseData: []byte("abcde"), - sparseMap: []sparseEntry{ - {offset: 1, numBytes: 3}, - {offset: 6, numBytes: 2}, - }, - realSize: 8, - expected: []byte("\x00abc\x00\x00de"), - }, - { - sparseData: []byte("abcde"), - sparseMap: []sparseEntry{ - {offset: 1, numBytes: 3}, - {offset: 6, numBytes: 2}, - }, - realSize: 10, - expected: []byte("\x00abc\x00\x00de\x00\x00"), - }, - { - sparseData: []byte(""), - sparseMap: nil, - realSize: 2, - expected: []byte("\x00\x00"), - }, -} - func TestSparseFileReader(t *testing.T) { - for i, test := range sparseFileReadTests { - r := bytes.NewReader(test.sparseData) - nb := int64(r.Len()) - sfr := &sparseFileReader{ - rfr: ®FileReader{r: r, nb: nb}, - sp: test.sparseMap, - pos: 0, - tot: test.realSize, - } - if sfr.numBytes() != nb { - t.Errorf("test %d: Before reading, sfr.numBytes() = %d, want %d", i, sfr.numBytes(), nb) - } - buf, err := ioutil.ReadAll(sfr) + var vectors = []struct { + realSize int64 // Real size of the output file + sparseMap []sparseEntry // Input sparse map + sparseData string // Input compact data + expected string // Expected output data + err error // Expected error outcome + }{{ + realSize: 8, + sparseMap: []sparseEntry{ + {offset: 0, numBytes: 2}, + {offset: 5, numBytes: 3}, + }, + sparseData: "abcde", + expected: "ab\x00\x00\x00cde", + }, { + realSize: 10, + sparseMap: []sparseEntry{ + {offset: 0, numBytes: 2}, + {offset: 5, numBytes: 3}, + }, + sparseData: "abcde", + expected: "ab\x00\x00\x00cde\x00\x00", + }, { + realSize: 8, + sparseMap: []sparseEntry{ + {offset: 1, numBytes: 3}, + {offset: 6, numBytes: 2}, + }, + sparseData: "abcde", + expected: "\x00abc\x00\x00de", + }, { + realSize: 8, + sparseMap: []sparseEntry{ + {offset: 1, numBytes: 3}, + {offset: 6, numBytes: 0}, + {offset: 6, numBytes: 0}, + {offset: 6, numBytes: 2}, + }, + sparseData: "abcde", + expected: "\x00abc\x00\x00de", + }, { + realSize: 10, + sparseMap: []sparseEntry{ + {offset: 1, numBytes: 3}, + {offset: 6, numBytes: 2}, + }, + sparseData: "abcde", + expected: "\x00abc\x00\x00de\x00\x00", + }, { + realSize: 10, + sparseMap: []sparseEntry{ + {offset: 1, numBytes: 3}, + {offset: 6, numBytes: 2}, + {offset: 8, numBytes: 0}, + {offset: 8, numBytes: 0}, + {offset: 8, numBytes: 0}, + {offset: 8, numBytes: 0}, + }, + sparseData: "abcde", + expected: "\x00abc\x00\x00de\x00\x00", + }, { + realSize: 2, + sparseMap: []sparseEntry{}, + sparseData: "", + expected: "\x00\x00", + }, { + realSize: -2, + sparseMap: []sparseEntry{}, + err: ErrHeader, + }, { + realSize: -10, + sparseMap: []sparseEntry{ + {offset: 1, numBytes: 3}, + {offset: 6, numBytes: 2}, + }, + sparseData: "abcde", + err: ErrHeader, + }, { + realSize: 10, + sparseMap: []sparseEntry{ + {offset: 1, numBytes: 3}, + {offset: 6, numBytes: 5}, + }, + sparseData: "abcde", + err: ErrHeader, + }, { + realSize: 35, + sparseMap: []sparseEntry{ + {offset: 1, numBytes: 3}, + {offset: 6, numBytes: 5}, + }, + sparseData: "abcde", + err: io.ErrUnexpectedEOF, + }, { + realSize: 35, + sparseMap: []sparseEntry{ + {offset: 1, numBytes: 3}, + {offset: 6, numBytes: -5}, + }, + sparseData: "abcde", + err: ErrHeader, + }, { + realSize: 35, + sparseMap: []sparseEntry{ + {offset: math.MaxInt64, numBytes: 3}, + {offset: 6, numBytes: -5}, + }, + sparseData: "abcde", + err: ErrHeader, + }, { + realSize: 10, + sparseMap: []sparseEntry{ + {offset: 1, numBytes: 3}, + {offset: 2, numBytes: 2}, + }, + sparseData: "abcde", + err: ErrHeader, + }} + + for i, v := range vectors { + r := bytes.NewReader([]byte(v.sparseData)) + rfr := ®FileReader{r: r, nb: int64(len(v.sparseData))} + + var sfr *sparseFileReader + var err error + var buf []byte + + sfr, err = newSparseFileReader(rfr, v.sparseMap, v.realSize) if err != nil { - t.Errorf("test %d: Unexpected error: %v", i, err) + goto fail } - if e := test.expected; !bytes.Equal(buf, e) { - t.Errorf("test %d: Contents = %v, want %v", i, buf, e) + if sfr.numBytes() != int64(len(v.sparseData)) { + t.Errorf("test %d, numBytes() before reading: got %d, want %d", i, sfr.numBytes(), len(v.sparseData)) + } + buf, err = ioutil.ReadAll(sfr) + if err != nil { + goto fail + } + if string(buf) != v.expected { + t.Errorf("test %d, ReadAll(): got %q, want %q", i, string(buf), v.expected) } if sfr.numBytes() != 0 { - t.Errorf("test %d: After draining the reader, numBytes() was nonzero", i) + t.Errorf("test %d, numBytes() after reading: got %d, want %d", i, sfr.numBytes(), 0) + } + + fail: + if err != v.err { + t.Errorf("test %d, unexpected error: got %v, want %v", i, err, v.err) } } } @@ -646,10 +722,10 @@ func TestSparseIncrementalRead(t *testing.T) { r := bytes.NewReader(sparseData) nb := int64(r.Len()) sfr := &sparseFileReader{ - rfr: ®FileReader{r: r, nb: nb}, - sp: sparseMap, - pos: 0, - tot: int64(len(expected)), + rfr: ®FileReader{r: r, nb: nb}, + sp: sparseMap, + pos: 0, + total: int64(len(expected)), } // We'll read the data 6 bytes at a time, with a hole of size 10 at @@ -747,6 +823,11 @@ func TestUninitializedRead(t *testing.T) { } +// TODO(dsnet): TestNegativeHdrSize, TestIssue10968, and TestIssue11169 tests +// that Reader properly handles corrupted tar files. Given the increasing number +// of invalid/malicious that can crash Reader, we should modify TestReader to +// be able to test that intentionally corrupt tar files don't succeed or crash. + // Negative header size should not cause panic. // Issues 10959 and 10960. func TestNegativeHdrSize(t *testing.T) { @@ -771,14 +852,11 @@ func TestIssue10968(t *testing.T) { t.Fatal(err) } defer f.Close() + r := NewReader(f) _, err = r.Next() - if err != nil { - t.Fatal(err) - } - _, err = io.Copy(ioutil.Discard, r) - if err != io.ErrUnexpectedEOF { - t.Fatalf("expected %q, got %q", io.ErrUnexpectedEOF, err) + if err == nil { + t.Fatal("Unexpected success") } } From f0fc67b3a8643a174215d1e514d25414feb83dcf Mon Sep 17 00:00:00 2001 From: Joe Tsai Date: Thu, 1 Oct 2015 03:08:18 -0700 Subject: [PATCH 66/95] archive/tar: make Reader.Read errors persistent If the stream is in an inconsistent state, it does not make sense that Reader.Read can be called and possibly succeed. Change-Id: I9d1c5a1300b2c2b45232188aa7999e350809dcf2 Reviewed-on: https://go-review.googlesource.com/15177 Reviewed-by: Brad Fitzpatrick Run-TryBot: Brad Fitzpatrick --- archive/tar/reader.go | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/archive/tar/reader.go b/archive/tar/reader.go index 1f57508..7d05d7d 100644 --- a/archive/tar/reader.go +++ b/archive/tar/reader.go @@ -871,9 +871,13 @@ func (tr *Reader) numBytes() int64 { // It returns 0, io.EOF when it reaches the end of that entry, // until Next is called to advance to the next entry. func (tr *Reader) Read(b []byte) (n int, err error) { + if tr.err != nil { + return 0, tr.err + } if tr.curr == nil { return 0, io.EOF } + n, err = tr.curr.Read(b) if err != nil && err != io.EOF { tr.err = err From 4ad443d1668a7ac6cfe49b02265247bb6fb636fa Mon Sep 17 00:00:00 2001 From: Joe Tsai Date: Thu, 1 Oct 2015 02:59:49 -0700 Subject: [PATCH 67/95] archive/tar: expand abilities of TestReader Motivation: * There are an increasing number of "one-off" corrupt files added to make sure that package does not succeed or crash on them. Instead, allow for the test to specify the error that is expected to occur (if any). * Also, fold in the logic to check the MD5 checksum into this function. The following tests are being removed: * TestIncrementalRead: Done by TestReader by using io.CopyBuffer with a buffer of 8. This achieves the same behavior as this test. * TestSparseEndToEnd: Since TestReader checks the MD5 checksums if the input corpus provides them, then this is redundant. * TestSparseIncrementalRead: Redundant for the same reasons that TestIncrementalRead is now redundant * TestNegativeHdrSize: Added to TestReader corpus * TestIssue10968: Added to TestReader corpus * TestIssue11169: Added to TestReader corpus With this change, code coverage did not change: 85.3% Change-Id: I8550d48657d4dbb8f47dfc3dc280758ef73b47ec Reviewed-on: https://go-review.googlesource.com/15176 Reviewed-by: Andrew Gerrand --- archive/tar/reader_test.go | 296 ++++++++++--------------------------- 1 file changed, 81 insertions(+), 215 deletions(-) diff --git a/archive/tar/reader_test.go b/archive/tar/reader_test.go index bca0c05..4d065a9 100644 --- a/archive/tar/reader_test.go +++ b/archive/tar/reader_test.go @@ -19,9 +19,10 @@ import ( ) type untarTest struct { - file string - headers []*Header - cksums []string + file string // Test input file + headers []*Header // Expected output headers + chksums []string // MD5 checksum of files, leave as nil if not checked + err error // Expected error to occur } var gnuTarTest = &untarTest{ @@ -50,7 +51,7 @@ var gnuTarTest = &untarTest{ Gname: "eng", }, }, - cksums: []string{ + chksums: []string{ "e38b27eaccb4391bdec553a7f3ae6b2f", "c65bd2e50a56a2138bf1716f2fd56fe9", }, @@ -130,7 +131,7 @@ var sparseTarTest = &untarTest{ Devminor: 0, }, }, - cksums: []string{ + chksums: []string{ "6f53234398c2449fe67c1812d993012f", "6f53234398c2449fe67c1812d993012f", "6f53234398c2449fe67c1812d993012f", @@ -287,37 +288,93 @@ var untarTests = []*untarTest{ }, }, }, + { + file: "testdata/neg-size.tar", + err: ErrHeader, + }, + { + file: "testdata/issue10968.tar", + err: ErrHeader, + }, + { + file: "testdata/issue11169.tar", + // TODO(dsnet): Currently the library does not detect that this file is + // malformed. Instead it incorrectly believes that file just ends. + // err: ErrHeader, + }, } func TestReader(t *testing.T) { -testLoop: - for i, test := range untarTests { - f, err := os.Open(test.file) + for i, v := range untarTests { + f, err := os.Open(v.file) if err != nil { - t.Errorf("test %d: Unexpected error: %v", i, err) + t.Errorf("file %s, test %d: unexpected error: %v", v.file, i, err) continue } defer f.Close() - tr := NewReader(f) - for j, header := range test.headers { - hdr, err := tr.Next() - if err != nil || hdr == nil { - t.Errorf("test %d, entry %d: Didn't get entry: %v", i, j, err) - f.Close() - continue testLoop + + // Capture all headers and checksums. + var ( + tr = NewReader(f) + hdrs []*Header + chksums []string + rdbuf = make([]byte, 8) + ) + for { + var hdr *Header + hdr, err = tr.Next() + if err != nil { + if err == io.EOF { + err = nil // Expected error + } + break } - if !reflect.DeepEqual(*hdr, *header) { - t.Errorf("test %d, entry %d: Incorrect header:\nhave %+v\nwant %+v", - i, j, *hdr, *header) + hdrs = append(hdrs, hdr) + + if v.chksums == nil { + continue + } + h := md5.New() + _, err = io.CopyBuffer(h, tr, rdbuf) // Effectively an incremental read + if err != nil { + break + } + chksums = append(chksums, fmt.Sprintf("%x", h.Sum(nil))) + } + + for j, hdr := range hdrs { + if j >= len(v.headers) { + t.Errorf("file %s, test %d, entry %d: unexpected header:\ngot %+v", + v.file, i, j, *hdr) + continue + } + if !reflect.DeepEqual(*hdr, *v.headers[j]) { + t.Errorf("file %s, test %d, entry %d: incorrect header:\ngot %+v\nwant %+v", + v.file, i, j, *hdr, *v.headers[j]) } } - hdr, err := tr.Next() - if err == io.EOF { - continue testLoop + if len(hdrs) != len(v.headers) { + t.Errorf("file %s, test %d: got %d headers, want %d headers", + v.file, i, len(hdrs), len(v.headers)) } - if hdr != nil || err != nil { - t.Errorf("test %d: Unexpected entry or error: hdr=%v err=%v", i, hdr, err) + + for j, sum := range chksums { + if j >= len(v.chksums) { + t.Errorf("file %s, test %d, entry %d: unexpected sum: got %s", + v.file, i, j, sum) + continue + } + if sum != v.chksums[j] { + t.Errorf("file %s, test %d, entry %d: incorrect checksum: got %s, want %s", + v.file, i, j, sum, v.chksums[j]) + } } + + if err != v.err { + t.Errorf("file %s, test %d: unexpected error: got %v, want %v", + v.file, i, err, v.err) + } + f.Close() } } @@ -357,60 +414,6 @@ func TestPartialRead(t *testing.T) { } } -func TestIncrementalRead(t *testing.T) { - test := gnuTarTest - f, err := os.Open(test.file) - if err != nil { - t.Fatalf("Unexpected error: %v", err) - } - defer f.Close() - - tr := NewReader(f) - - headers := test.headers - cksums := test.cksums - nread := 0 - - // loop over all files - for ; ; nread++ { - hdr, err := tr.Next() - if hdr == nil || err == io.EOF { - break - } - - // check the header - if !reflect.DeepEqual(*hdr, *headers[nread]) { - t.Errorf("Incorrect header:\nhave %+v\nwant %+v", - *hdr, headers[nread]) - } - - // read file contents in little chunks EOF, - // checksumming all the way - h := md5.New() - rdbuf := make([]uint8, 8) - for { - nr, err := tr.Read(rdbuf) - if err == io.EOF { - break - } - if err != nil { - t.Errorf("Read: unexpected error %v\n", err) - break - } - h.Write(rdbuf[0:nr]) - } - // verify checksum - have := fmt.Sprintf("%x", h.Sum(nil)) - want := cksums[nread] - if want != have { - t.Errorf("Bad checksum on file %s:\nhave %+v\nwant %+v", hdr.Name, have, want) - } - } - if nread != len(headers) { - t.Errorf("Didn't process all files\nexpected: %d\nprocessed %d\n", len(headers), nread) - } -} - func TestNonSeekable(t *testing.T) { test := gnuTarTest f, err := os.Open(test.file) @@ -515,52 +518,6 @@ func TestMergePAX(t *testing.T) { } } -func TestSparseEndToEnd(t *testing.T) { - test := sparseTarTest - f, err := os.Open(test.file) - if err != nil { - t.Fatalf("Unexpected error: %v", err) - } - defer f.Close() - - tr := NewReader(f) - - headers := test.headers - cksums := test.cksums - nread := 0 - - // loop over all files - for ; ; nread++ { - hdr, err := tr.Next() - if hdr == nil || err == io.EOF { - break - } - - // check the header - if !reflect.DeepEqual(*hdr, *headers[nread]) { - t.Errorf("Incorrect header:\nhave %+v\nwant %+v", - *hdr, headers[nread]) - } - - // read and checksum the file data - h := md5.New() - _, err = io.Copy(h, tr) - if err != nil { - t.Fatalf("Unexpected error: %v", err) - } - - // verify checksum - have := fmt.Sprintf("%x", h.Sum(nil)) - want := cksums[nread] - if want != have { - t.Errorf("Bad checksum on file %s:\nhave %+v\nwant %+v", hdr.Name, have, want) - } - } - if nread != len(headers) { - t.Errorf("Didn't process all files\nexpected: %d\nprocessed %d\n", len(headers), nread) - } -} - func TestSparseFileReader(t *testing.T) { var vectors = []struct { realSize int64 // Real size of the output file @@ -714,45 +671,6 @@ func TestSparseFileReader(t *testing.T) { } } -func TestSparseIncrementalRead(t *testing.T) { - sparseMap := []sparseEntry{{10, 2}} - sparseData := []byte("Go") - expected := "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00Go\x00\x00\x00\x00\x00\x00\x00\x00" - - r := bytes.NewReader(sparseData) - nb := int64(r.Len()) - sfr := &sparseFileReader{ - rfr: ®FileReader{r: r, nb: nb}, - sp: sparseMap, - pos: 0, - total: int64(len(expected)), - } - - // We'll read the data 6 bytes at a time, with a hole of size 10 at - // the beginning and one of size 8 at the end. - var outputBuf bytes.Buffer - buf := make([]byte, 6) - for { - n, err := sfr.Read(buf) - if err == io.EOF { - break - } - if err != nil { - t.Errorf("Read: unexpected error %v\n", err) - } - if n > 0 { - _, err := outputBuf.Write(buf[:n]) - if err != nil { - t.Errorf("Write: unexpected error %v\n", err) - } - } - } - got := outputBuf.String() - if got != expected { - t.Errorf("Contents = %v, want %v", got, expected) - } -} - func TestReadGNUSparseMap0x1(t *testing.T) { headers := map[string]string{ paxGNUSparseNumBlocks: "4", @@ -822,55 +740,3 @@ func TestUninitializedRead(t *testing.T) { } } - -// TODO(dsnet): TestNegativeHdrSize, TestIssue10968, and TestIssue11169 tests -// that Reader properly handles corrupted tar files. Given the increasing number -// of invalid/malicious that can crash Reader, we should modify TestReader to -// be able to test that intentionally corrupt tar files don't succeed or crash. - -// Negative header size should not cause panic. -// Issues 10959 and 10960. -func TestNegativeHdrSize(t *testing.T) { - f, err := os.Open("testdata/neg-size.tar") - if err != nil { - t.Fatal(err) - } - defer f.Close() - r := NewReader(f) - _, err = r.Next() - if err != ErrHeader { - t.Error("want ErrHeader, got", err) - } - io.Copy(ioutil.Discard, r) -} - -// This used to hang in (*sparseFileReader).readHole due to missing -// verification of sparse offsets against file size. -func TestIssue10968(t *testing.T) { - f, err := os.Open("testdata/issue10968.tar") - if err != nil { - t.Fatal(err) - } - defer f.Close() - - r := NewReader(f) - _, err = r.Next() - if err == nil { - t.Fatal("Unexpected success") - } -} - -// Do not panic if there are errors in header blocks after the pax header. -// Issue 11169 -func TestIssue11169(t *testing.T) { - f, err := os.Open("testdata/issue11169.tar") - if err != nil { - t.Fatal(err) - } - defer f.Close() - r := NewReader(f) - _, err = r.Next() - if err == nil { - t.Fatal("Unexpected success") - } -} From cb423795ebbea7ab1f8570fa6811ffbd43c04c96 Mon Sep 17 00:00:00 2001 From: Joe Tsai Date: Tue, 6 Oct 2015 01:04:18 -0700 Subject: [PATCH 68/95] archive/tar: add missing error checks to Reader.Next A recursive call to Reader.Next did not check the error before trying to use the result, leading to a nil pointer panic. This specific CL addresses the immediate issue, which is the panic, but does not solve the root issue, which is due to an integer overflow in the base-256 parser. Updates #12435 Change-Id: Ia908671f0f411a409a35e24f2ebf740d46734072 Reviewed-on: https://go-review.googlesource.com/15437 Run-TryBot: Brad Fitzpatrick Reviewed-by: Brad Fitzpatrick TryBot-Result: Gobot Gobot --- archive/tar/reader.go | 31 ++++++-------- archive/tar/reader_test.go | 87 +++++++++++++++++++++++++++++++------- 2 files changed, 85 insertions(+), 33 deletions(-) diff --git a/archive/tar/reader.go b/archive/tar/reader.go index 7d05d7d..dc23085 100644 --- a/archive/tar/reader.go +++ b/archive/tar/reader.go @@ -820,40 +820,37 @@ func readGNUSparseMap1x0(r io.Reader) ([]sparseEntry, error) { return sp, nil } -// readGNUSparseMap0x1 reads the sparse map as stored in GNU's PAX sparse format version 0.1. -// The sparse map is stored in the PAX headers. -func readGNUSparseMap0x1(headers map[string]string) ([]sparseEntry, error) { - // Get number of entries - numEntriesStr, ok := headers[paxGNUSparseNumBlocks] - if !ok { - return nil, ErrHeader - } - numEntries, err := strconv.ParseInt(numEntriesStr, 10, 0) - if err != nil { +// readGNUSparseMap0x1 reads the sparse map as stored in GNU's PAX sparse format +// version 0.1. The sparse map is stored in the PAX headers. +func readGNUSparseMap0x1(extHdrs map[string]string) ([]sparseEntry, error) { + // Get number of entries. + // Use integer overflow resistant math to check this. + numEntriesStr := extHdrs[paxGNUSparseNumBlocks] + numEntries, err := strconv.ParseInt(numEntriesStr, 10, 0) // Intentionally parse as native int + if err != nil || numEntries < 0 || int(2*numEntries) < int(numEntries) { return nil, ErrHeader } - sparseMap := strings.Split(headers[paxGNUSparseMap], ",") - - // There should be two numbers in sparseMap for each entry + // There should be two numbers in sparseMap for each entry. + sparseMap := strings.Split(extHdrs[paxGNUSparseMap], ",") if int64(len(sparseMap)) != 2*numEntries { return nil, ErrHeader } - // Loop through the entries in the sparse map + // Loop through the entries in the sparse map. + // numEntries is trusted now. sp := make([]sparseEntry, 0, numEntries) for i := int64(0); i < numEntries; i++ { - offset, err := strconv.ParseInt(sparseMap[2*i], 10, 0) + offset, err := strconv.ParseInt(sparseMap[2*i], 10, 64) if err != nil { return nil, ErrHeader } - numBytes, err := strconv.ParseInt(sparseMap[2*i+1], 10, 0) + numBytes, err := strconv.ParseInt(sparseMap[2*i+1], 10, 64) if err != nil { return nil, ErrHeader } sp = append(sp, sparseEntry{offset: offset, numBytes: numBytes}) } - return sp, nil } diff --git a/archive/tar/reader_test.go b/archive/tar/reader_test.go index 4d065a9..d9d089b 100644 --- a/archive/tar/reader_test.go +++ b/archive/tar/reader_test.go @@ -672,23 +672,78 @@ func TestSparseFileReader(t *testing.T) { } func TestReadGNUSparseMap0x1(t *testing.T) { - headers := map[string]string{ - paxGNUSparseNumBlocks: "4", - paxGNUSparseMap: "0,5,10,5,20,5,30,5", - } - expected := []sparseEntry{ - {offset: 0, numBytes: 5}, - {offset: 10, numBytes: 5}, - {offset: 20, numBytes: 5}, - {offset: 30, numBytes: 5}, - } + const ( + maxUint = ^uint(0) + maxInt = int(maxUint >> 1) + ) + var ( + big1 = fmt.Sprintf("%d", int64(maxInt)) + big2 = fmt.Sprintf("%d", (int64(maxInt)/2)+1) + big3 = fmt.Sprintf("%d", (int64(maxInt) / 3)) + ) - sp, err := readGNUSparseMap0x1(headers) - if err != nil { - t.Errorf("Unexpected error: %v", err) - } - if !reflect.DeepEqual(sp, expected) { - t.Errorf("Incorrect sparse map: got %v, wanted %v", sp, expected) + var vectors = []struct { + extHdrs map[string]string // Input data + sparseMap []sparseEntry // Expected sparse entries to be outputted + err error // Expected errors that may be raised + }{{ + extHdrs: map[string]string{paxGNUSparseNumBlocks: "-4"}, + err: ErrHeader, + }, { + extHdrs: map[string]string{paxGNUSparseNumBlocks: "fee "}, + err: ErrHeader, + }, { + extHdrs: map[string]string{ + paxGNUSparseNumBlocks: big1, + paxGNUSparseMap: "0,5,10,5,20,5,30,5", + }, + err: ErrHeader, + }, { + extHdrs: map[string]string{ + paxGNUSparseNumBlocks: big2, + paxGNUSparseMap: "0,5,10,5,20,5,30,5", + }, + err: ErrHeader, + }, { + extHdrs: map[string]string{ + paxGNUSparseNumBlocks: big3, + paxGNUSparseMap: "0,5,10,5,20,5,30,5", + }, + err: ErrHeader, + }, { + extHdrs: map[string]string{ + paxGNUSparseNumBlocks: "4", + paxGNUSparseMap: "0.5,5,10,5,20,5,30,5", + }, + err: ErrHeader, + }, { + extHdrs: map[string]string{ + paxGNUSparseNumBlocks: "4", + paxGNUSparseMap: "0,5.5,10,5,20,5,30,5", + }, + err: ErrHeader, + }, { + extHdrs: map[string]string{ + paxGNUSparseNumBlocks: "4", + paxGNUSparseMap: "0,fewafewa.5,fewafw,5,20,5,30,5", + }, + err: ErrHeader, + }, { + extHdrs: map[string]string{ + paxGNUSparseNumBlocks: "4", + paxGNUSparseMap: "0,5,10,5,20,5,30,5", + }, + sparseMap: []sparseEntry{{0, 5}, {10, 5}, {20, 5}, {30, 5}}, + }} + + for i, v := range vectors { + sp, err := readGNUSparseMap0x1(v.extHdrs) + if !reflect.DeepEqual(sp, v.sparseMap) && !(len(sp) == 0 && len(v.sparseMap) == 0) { + t.Errorf("test %d, readGNUSparseMap0x1(...): got %v, want %v", i, sp, v.sparseMap) + } + if err != v.err { + t.Errorf("test %d, unexpected error: got %v, want %v", i, err, v.err) + } } } From cf83c95de838674ba781bb4d0684a3e77c1bfc87 Mon Sep 17 00:00:00 2001 From: Joe Tsai Date: Thu, 1 Oct 2015 01:04:24 -0700 Subject: [PATCH 69/95] archive/tar: fix numeric overflow issues in readGNUSparseMap0x1 Motivation: * The logic to verify the numEntries can overflow and incorrectly pass, allowing a malicious file to allocate arbitrary memory. * The use of strconv.ParseInt does not set the integer precision to 64bit, causing this code to work incorrectly on 32bit machines. Change-Id: I1b1571a750a84f2dde97cc329ed04fe2342aaa60 Reviewed-on: https://go-review.googlesource.com/15173 Reviewed-by: Brad Fitzpatrick Run-TryBot: Brad Fitzpatrick TryBot-Result: Gobot Gobot --- archive/tar/reader.go | 57 +++++++++++--- archive/tar/reader_test.go | 156 ++++++++++++++++++++++++++++++------- 2 files changed, 173 insertions(+), 40 deletions(-) diff --git a/archive/tar/reader.go b/archive/tar/reader.go index dc23085..cce9d23 100644 --- a/archive/tar/reader.go +++ b/archive/tar/reader.go @@ -504,20 +504,48 @@ func (tr *Reader) octal(b []byte) int64 { return int64(x) } -// skipUnread skips any unread bytes in the existing file entry, as well as any alignment padding. -func (tr *Reader) skipUnread() { - nr := tr.numBytes() + tr.pad // number of bytes to skip +// skipUnread skips any unread bytes in the existing file entry, as well as any +// alignment padding. It returns io.ErrUnexpectedEOF if any io.EOF is +// encountered in the data portion; it is okay to hit io.EOF in the padding. +// +// Note that this function still works properly even when sparse files are being +// used since numBytes returns the bytes remaining in the underlying io.Reader. +func (tr *Reader) skipUnread() error { + dataSkip := tr.numBytes() // Number of data bytes to skip + totalSkip := dataSkip + tr.pad // Total number of bytes to skip tr.curr, tr.pad = nil, 0 if tr.RawAccounting { - _, tr.err = io.CopyN(tr.rawBytes, tr.r, nr) - return + _, tr.err = io.CopyN(tr.rawBytes, tr.r, totalSkip) + return tr.err } - if sr, ok := tr.r.(io.Seeker); ok { - if _, err := sr.Seek(nr, os.SEEK_CUR); err == nil { - return + // If possible, Seek to the last byte before the end of the data section. + // Do this because Seek is often lazy about reporting errors; this will mask + // the fact that the tar stream may be truncated. We can rely on the + // io.CopyN done shortly afterwards to trigger any IO errors. + var seekSkipped int64 // Number of bytes skipped via Seek + if sr, ok := tr.r.(io.Seeker); ok && dataSkip > 1 { + // Not all io.Seeker can actually Seek. For example, os.Stdin implements + // io.Seeker, but calling Seek always returns an error and performs + // no action. Thus, we try an innocent seek to the current position + // to see if Seek is really supported. + pos1, err := sr.Seek(0, os.SEEK_CUR) + if err == nil { + // Seek seems supported, so perform the real Seek. + pos2, err := sr.Seek(dataSkip-1, os.SEEK_CUR) + if err != nil { + tr.err = err + return tr.err + } + seekSkipped = pos2 - pos1 } } - _, tr.err = io.CopyN(ioutil.Discard, tr.r, nr) + + var copySkipped int64 // Number of bytes skipped via CopyN + copySkipped, tr.err = io.CopyN(ioutil.Discard, tr.r, totalSkip-seekSkipped) + if tr.err == io.EOF && seekSkipped+copySkipped < dataSkip { + tr.err = io.ErrUnexpectedEOF + } + return tr.err } func (tr *Reader) verifyChecksum(header []byte) bool { @@ -530,6 +558,13 @@ func (tr *Reader) verifyChecksum(header []byte) bool { return given == unsigned || given == signed } +// readHeader reads the next block header and assumes that the underlying reader +// is already aligned to a block boundary. +// +// The err will be set to io.EOF only when one of the following occurs: +// * Exactly 0 bytes are read and EOF is hit. +// * Exactly 1 block of zeros is read and EOF is hit. +// * At least 2 blocks of zeros are read. func (tr *Reader) readHeader() *Header { header := tr.hdrBuff[:] copy(header, zeroBlock) @@ -541,7 +576,7 @@ func (tr *Reader) readHeader() *Header { return nil } } - return nil + return nil // io.EOF is okay here } if tr.RawAccounting { if _, tr.err = tr.rawBytes.Write(header); tr.err != nil { @@ -558,7 +593,7 @@ func (tr *Reader) readHeader() *Header { return nil } } - return nil + return nil // io.EOF is okay here } if tr.RawAccounting { if _, tr.err = tr.rawBytes.Write(header); tr.err != nil { diff --git a/archive/tar/reader_test.go b/archive/tar/reader_test.go index d9d089b..90b8b46 100644 --- a/archive/tar/reader_test.go +++ b/archive/tar/reader_test.go @@ -414,35 +414,6 @@ func TestPartialRead(t *testing.T) { } } -func TestNonSeekable(t *testing.T) { - test := gnuTarTest - f, err := os.Open(test.file) - if err != nil { - t.Fatalf("Unexpected error: %v", err) - } - defer f.Close() - - type readerOnly struct { - io.Reader - } - tr := NewReader(readerOnly{f}) - nread := 0 - - for ; ; nread++ { - _, err := tr.Next() - if err == io.EOF { - break - } - if err != nil { - t.Fatalf("Unexpected error: %v", err) - } - } - - if nread != len(test.headers) { - t.Errorf("Didn't process all files\nexpected: %d\nprocessed %d\n", len(test.headers), nread) - } -} - func TestParsePAXHeader(t *testing.T) { paxTests := [][3]string{ {"a", "a=name", "10 a=name\n"}, // Test case involving multiple acceptable lengths @@ -795,3 +766,130 @@ func TestUninitializedRead(t *testing.T) { } } + +type reader struct{ io.Reader } +type readSeeker struct{ io.ReadSeeker } +type readBadSeeker struct{ io.ReadSeeker } + +func (rbs *readBadSeeker) Seek(int64, int) (int64, error) { return 0, fmt.Errorf("illegal seek") } + +// TestReadTruncation test the ending condition on various truncated files and +// that truncated files are still detected even if the underlying io.Reader +// satisfies io.Seeker. +func TestReadTruncation(t *testing.T) { + var ss []string + for _, p := range []string{ + "testdata/gnu.tar", + "testdata/ustar-file-reg.tar", + "testdata/pax-path-hdr.tar", + "testdata/sparse-formats.tar", + } { + buf, err := ioutil.ReadFile(p) + if err != nil { + t.Fatalf("unexpected error: %v", err) + } + ss = append(ss, string(buf)) + } + + data1, data2, pax, sparse := ss[0], ss[1], ss[2], ss[3] + data2 += strings.Repeat("\x00", 10*512) + trash := strings.Repeat("garbage ", 64) // Exactly 512 bytes + + var vectors = []struct { + input string // Input stream + cnt int // Expected number of headers read + err error // Expected error outcome + }{ + {"", 0, io.EOF}, // Empty file is a "valid" tar file + {data1[:511], 0, io.ErrUnexpectedEOF}, + {data1[:512], 1, io.ErrUnexpectedEOF}, + {data1[:1024], 1, io.EOF}, + {data1[:1536], 2, io.ErrUnexpectedEOF}, + {data1[:2048], 2, io.EOF}, + {data1, 2, io.EOF}, + {data1[:2048] + data2[:1536], 3, io.EOF}, + {data2[:511], 0, io.ErrUnexpectedEOF}, + {data2[:512], 1, io.ErrUnexpectedEOF}, + {data2[:1195], 1, io.ErrUnexpectedEOF}, + {data2[:1196], 1, io.EOF}, // Exact end of data and start of padding + {data2[:1200], 1, io.EOF}, + {data2[:1535], 1, io.EOF}, + {data2[:1536], 1, io.EOF}, // Exact end of padding + {data2[:1536] + trash[:1], 1, io.ErrUnexpectedEOF}, + {data2[:1536] + trash[:511], 1, io.ErrUnexpectedEOF}, + {data2[:1536] + trash, 1, ErrHeader}, + {data2[:2048], 1, io.EOF}, // Exactly 1 empty block + {data2[:2048] + trash[:1], 1, io.ErrUnexpectedEOF}, + {data2[:2048] + trash[:511], 1, io.ErrUnexpectedEOF}, + {data2[:2048] + trash, 1, ErrHeader}, + {data2[:2560], 1, io.EOF}, // Exactly 2 empty blocks (normal end-of-stream) + {data2[:2560] + trash[:1], 1, io.EOF}, + {data2[:2560] + trash[:511], 1, io.EOF}, + {data2[:2560] + trash, 1, io.EOF}, + {data2[:3072], 1, io.EOF}, + {pax, 0, io.EOF}, // PAX header without data is a "valid" tar file + {pax + trash[:1], 0, io.ErrUnexpectedEOF}, + {pax + trash[:511], 0, io.ErrUnexpectedEOF}, + {sparse[:511], 0, io.ErrUnexpectedEOF}, + // TODO(dsnet): This should pass, but currently fails. + // {sparse[:512], 0, io.ErrUnexpectedEOF}, + {sparse[:3584], 1, io.EOF}, + {sparse[:9200], 1, io.EOF}, // Terminate in padding of sparse header + {sparse[:9216], 1, io.EOF}, + {sparse[:9728], 2, io.ErrUnexpectedEOF}, + {sparse[:10240], 2, io.EOF}, + {sparse[:11264], 2, io.ErrUnexpectedEOF}, + {sparse, 5, io.EOF}, + {sparse + trash, 5, io.EOF}, + } + + for i, v := range vectors { + for j := 0; j < 6; j++ { + var tr *Reader + var s1, s2 string + + switch j { + case 0: + tr = NewReader(&reader{strings.NewReader(v.input)}) + s1, s2 = "io.Reader", "auto" + case 1: + tr = NewReader(&reader{strings.NewReader(v.input)}) + s1, s2 = "io.Reader", "manual" + case 2: + tr = NewReader(&readSeeker{strings.NewReader(v.input)}) + s1, s2 = "io.ReadSeeker", "auto" + case 3: + tr = NewReader(&readSeeker{strings.NewReader(v.input)}) + s1, s2 = "io.ReadSeeker", "manual" + case 4: + tr = NewReader(&readBadSeeker{strings.NewReader(v.input)}) + s1, s2 = "ReadBadSeeker", "auto" + case 5: + tr = NewReader(&readBadSeeker{strings.NewReader(v.input)}) + s1, s2 = "ReadBadSeeker", "manual" + } + + var cnt int + var err error + for { + if _, err = tr.Next(); err != nil { + break + } + cnt++ + if s2 == "manual" { + if _, err = io.Copy(ioutil.Discard, tr); err != nil { + break + } + } + } + if err != v.err { + t.Errorf("test %d, NewReader(%s(...)) with %s discard: got %v, want %v", + i, s1, s2, err, v.err) + } + if cnt != v.cnt { + t.Errorf("test %d, NewReader(%s(...)) with %s discard: got %d headers, want %d headers", + i, s1, s2, cnt, v.cnt) + } + } + } +} From bffda594f770add2c260a42feaf0e1e3c0651a56 Mon Sep 17 00:00:00 2001 From: Joe Tsai Date: Thu, 1 Oct 2015 02:30:29 -0700 Subject: [PATCH 70/95] archive/tar: detect truncated files Motivation: * Reader.skipUnread never reports io.ErrUnexpectedEOF. This is strange given that io.ErrUnexpectedEOF is given through Reader.Read if the user manually reads the file. * Reader.skipUnread fails to detect truncated files since io.Seeker is lazy about reporting errors. Thus, the behavior of Reader differs whether the input io.Reader also satisfies io.Seeker or not. To solve this, we seek to one before the end of the data section and always rely on at least one call to io.CopyN. If the tr.r satisfies io.Seeker, this is guarunteed to never read more than blockSize. Fixes #12557 Change-Id: I0ddddfc6bed0d74465cb7e7a02b26f1de7a7a279 Reviewed-on: https://go-review.googlesource.com/15175 Reviewed-by: Brad Fitzpatrick Run-TryBot: Brad Fitzpatrick TryBot-Result: Gobot Gobot --- archive/tar/writer.go | 21 ++++++++++----- archive/tar/writer_test.go | 53 +++++++++++++++++++++++++++++++++++--- 2 files changed, 64 insertions(+), 10 deletions(-) diff --git a/archive/tar/writer.go b/archive/tar/writer.go index 3547c17..0165b22 100644 --- a/archive/tar/writer.go +++ b/archive/tar/writer.go @@ -12,8 +12,8 @@ import ( "errors" "fmt" "io" - "os" "path" + "sort" "strconv" "strings" "time" @@ -288,11 +288,11 @@ func (tw *Writer) writePAXHeader(hdr *Header, paxHeaders map[string]string) erro // succeed, and seems harmless enough. ext.ModTime = hdr.ModTime // The spec asks that we namespace our pseudo files - // with the current pid. - pid := os.Getpid() + // with the current pid. However, this results in differing outputs + // for identical inputs. As such, the constant 0 is now used instead. + // golang.org/issue/12358 dir, file := path.Split(hdr.Name) - fullName := path.Join(dir, - fmt.Sprintf("PaxHeaders.%d", pid), file) + fullName := path.Join(dir, "PaxHeaders.0", file) ascii := toASCII(fullName) if len(ascii) > 100 { @@ -302,8 +302,15 @@ func (tw *Writer) writePAXHeader(hdr *Header, paxHeaders map[string]string) erro // Construct the body var buf bytes.Buffer - for k, v := range paxHeaders { - fmt.Fprint(&buf, paxHeader(k+"="+v)) + // Keys are sorted before writing to body to allow deterministic output. + var keys []string + for k := range paxHeaders { + keys = append(keys, k) + } + sort.Strings(keys) + + for _, k := range keys { + fmt.Fprint(&buf, paxHeader(k+"="+paxHeaders[k])) } ext.Size = int64(len(buf.Bytes())) diff --git a/archive/tar/writer_test.go b/archive/tar/writer_test.go index caf40a8..25d88dc 100644 --- a/archive/tar/writer_test.go +++ b/archive/tar/writer_test.go @@ -11,6 +11,7 @@ import ( "io/ioutil" "os" "reflect" + "sort" "strings" "testing" "testing/iotest" @@ -291,7 +292,7 @@ func TestPax(t *testing.T) { t.Fatal(err) } // Simple test to make sure PAX extensions are in effect - if !bytes.Contains(buf.Bytes(), []byte("PaxHeaders.")) { + if !bytes.Contains(buf.Bytes(), []byte("PaxHeaders.0")) { t.Fatal("Expected at least one PAX header to be written.") } // Test that we can get a long name back out of the archive. @@ -330,7 +331,7 @@ func TestPaxSymlink(t *testing.T) { t.Fatal(err) } // Simple test to make sure PAX extensions are in effect - if !bytes.Contains(buf.Bytes(), []byte("PaxHeaders.")) { + if !bytes.Contains(buf.Bytes(), []byte("PaxHeaders.0")) { t.Fatal("Expected at least one PAX header to be written.") } // Test that we can get a long name back out of the archive. @@ -380,7 +381,7 @@ func TestPaxNonAscii(t *testing.T) { t.Fatal(err) } // Simple test to make sure PAX extensions are in effect - if !bytes.Contains(buf.Bytes(), []byte("PaxHeaders.")) { + if !bytes.Contains(buf.Bytes(), []byte("PaxHeaders.0")) { t.Fatal("Expected at least one PAX header to be written.") } // Test that we can get a long name back out of the archive. @@ -439,6 +440,52 @@ func TestPaxXattrs(t *testing.T) { } } +func TestPaxHeadersSorted(t *testing.T) { + fileinfo, err := os.Stat("testdata/small.txt") + if err != nil { + t.Fatal(err) + } + hdr, err := FileInfoHeader(fileinfo, "") + if err != nil { + t.Fatalf("os.Stat: %v", err) + } + contents := strings.Repeat(" ", int(hdr.Size)) + + hdr.Xattrs = map[string]string{ + "foo": "foo", + "bar": "bar", + "baz": "baz", + "qux": "qux", + } + + var buf bytes.Buffer + writer := NewWriter(&buf) + if err := writer.WriteHeader(hdr); err != nil { + t.Fatal(err) + } + if _, err = writer.Write([]byte(contents)); err != nil { + t.Fatal(err) + } + if err := writer.Close(); err != nil { + t.Fatal(err) + } + // Simple test to make sure PAX extensions are in effect + if !bytes.Contains(buf.Bytes(), []byte("PaxHeaders.0")) { + t.Fatal("Expected at least one PAX header to be written.") + } + + // xattr bar should always appear before others + indices := []int{ + bytes.Index(buf.Bytes(), []byte("bar=bar")), + bytes.Index(buf.Bytes(), []byte("baz=baz")), + bytes.Index(buf.Bytes(), []byte("foo=foo")), + bytes.Index(buf.Bytes(), []byte("qux=qux")), + } + if !sort.IntsAreSorted(indices) { + t.Fatal("PAX headers are not sorted") + } +} + func TestPAXHeader(t *testing.T) { medName := strings.Repeat("CD", 50) longName := strings.Repeat("AB", 100) From 2424f4e36723fbc7a4e06fff5878a151ae270952 Mon Sep 17 00:00:00 2001 From: Matt Layher Date: Thu, 27 Aug 2015 14:52:06 -0400 Subject: [PATCH 71/95] archive/tar: make output deterministic Replaces PID in PaxHeaders with 0. Sorts PAX header keys before writing them to the archive. Fixes #12358 Change-Id: If239f89c85f1c9d9895a253fb06a47ad44960124 Reviewed-on: https://go-review.googlesource.com/13975 Reviewed-by: Russ Cox Reviewed-by: Joe Tsai --- archive/tar/common.go | 11 ++++++++ archive/tar/reader.go | 24 +++++++++++------ archive/tar/reader_test.go | 43 ++++++++++++++++++++++++++++++ archive/tar/testdata/hdr-only.tar | Bin 0 -> 10240 bytes archive/tar/testdata/neg-size.tar | Bin 512 -> 512 bytes 5 files changed, 70 insertions(+), 8 deletions(-) create mode 100644 archive/tar/testdata/hdr-only.tar diff --git a/archive/tar/common.go b/archive/tar/common.go index c31df06..36f4e23 100644 --- a/archive/tar/common.go +++ b/archive/tar/common.go @@ -327,3 +327,14 @@ func toASCII(s string) string { } return buf.String() } + +// isHeaderOnlyType checks if the given type flag is of the type that has no +// data section even if a size is specified. +func isHeaderOnlyType(flag byte) bool { + switch flag { + case TypeLink, TypeSymlink, TypeChar, TypeBlock, TypeDir, TypeFifo: + return true + default: + return false + } +} diff --git a/archive/tar/reader.go b/archive/tar/reader.go index cce9d23..6360b4e 100644 --- a/archive/tar/reader.go +++ b/archive/tar/reader.go @@ -179,6 +179,13 @@ func (tr *Reader) Next() (*Header, error) { return nil, err } if sp != nil { + // Sparse files do not make sense when applied to the special header + // types that never have a data section. + if isHeaderOnlyType(hdr.Typeflag) { + tr.err = ErrHeader + return nil, tr.err + } + // Current file is a PAX format GNU sparse file. // Set the current file reader to a sparse file reader. tr.curr, tr.err = newSparseFileReader(tr.curr, sp, hdr.Size) @@ -622,10 +629,6 @@ func (tr *Reader) readHeader() *Header { hdr.Uid = int(tr.octal(s.next(8))) hdr.Gid = int(tr.octal(s.next(8))) hdr.Size = tr.octal(s.next(12)) - if hdr.Size < 0 { - tr.err = ErrHeader - return nil - } hdr.ModTime = time.Unix(tr.octal(s.next(12)), 0) s.next(8) // chksum hdr.Typeflag = s.next(1)[0] @@ -676,12 +679,17 @@ func (tr *Reader) readHeader() *Header { return nil } - // Maximum value of hdr.Size is 64 GB (12 octal digits), - // so there's no risk of int64 overflowing. - nb := int64(hdr.Size) - tr.pad = -nb & (blockSize - 1) // blockSize is a power of two + nb := hdr.Size + if isHeaderOnlyType(hdr.Typeflag) { + nb = 0 + } + if nb < 0 { + tr.err = ErrHeader + return nil + } // Set the current file reader. + tr.pad = -nb & (blockSize - 1) // blockSize is a power of two tr.curr = ®FileReader{r: tr.r, nb: nb} // Check for old GNU sparse format entry. diff --git a/archive/tar/reader_test.go b/archive/tar/reader_test.go index 90b8b46..3c98f4d 100644 --- a/archive/tar/reader_test.go +++ b/archive/tar/reader_test.go @@ -893,3 +893,46 @@ func TestReadTruncation(t *testing.T) { } } } + +// TestReadHeaderOnly tests that Reader does not attempt to read special +// header-only files. +func TestReadHeaderOnly(t *testing.T) { + f, err := os.Open("testdata/hdr-only.tar") + if err != nil { + t.Fatalf("unexpected error: %v", err) + } + defer f.Close() + + var hdrs []*Header + tr := NewReader(f) + for { + hdr, err := tr.Next() + if err == io.EOF { + break + } + if err != nil { + t.Errorf("Next(): got %v, want %v", err, nil) + continue + } + hdrs = append(hdrs, hdr) + + // If a special flag, we should read nothing. + cnt, _ := io.ReadFull(tr, []byte{0}) + if cnt > 0 && hdr.Typeflag != TypeReg { + t.Errorf("ReadFull(...): got %d bytes, want 0 bytes", cnt) + } + } + + // File is crafted with 16 entries. The later 8 are identical to the first + // 8 except that the size is set. + if len(hdrs) != 16 { + t.Fatalf("len(hdrs): got %d, want %d", len(hdrs), 16) + } + for i := 0; i < 8; i++ { + var hdr1, hdr2 = hdrs[i+0], hdrs[i+8] + hdr1.Size, hdr2.Size = 0, 0 + if !reflect.DeepEqual(*hdr1, *hdr2) { + t.Errorf("incorrect header:\ngot %+v\nwant %+v", *hdr1, *hdr2) + } + } +} diff --git a/archive/tar/testdata/hdr-only.tar b/archive/tar/testdata/hdr-only.tar new file mode 100644 index 0000000000000000000000000000000000000000..f25034083de6e0176e429f939875def6eb78cc73 GIT binary patch literal 10240 zcmeI2ZE}J@42Ji2Pq95gv)>o#1+ajk2rWokd-`UnK%I_CXNW`V?jLp5$;Lb+o4jM3 zRS%4K0WN2N31Pu$!vOG|0DSEi6VfBdZFVz=+!#Geo7WlKr zRl;AI>}kUnRryx%w0!65X8WAPynIb6zQg@I`q=ZhT;AVZ14uaInh{tzn(ux&A2{mb)wBl`42&RQXR%ispcL7W$7F z`oDwzV^q+8Xow$MornI@^B?pdod1LVbIgk3(>1Qxw*Nb){{{Vr0_`Z9LH`*QrhogT zdFVfV{nxJ3f3W@s{fGXsn}`0>@$ct<6aa(%Lr=2_XU@0=FB1PuCY>1poj5 literal 0 HcmV?d00001 diff --git a/archive/tar/testdata/neg-size.tar b/archive/tar/testdata/neg-size.tar index 5deea3d05c4da5a4ddda34ef7ad781088464e71b..21edf38cc3c3d98c834d07b6d31e8325898ec492 100644 GIT binary patch delta 20 bcmZo*X<(T!h11Z`)Xd0`LBU|-++;=oIaUQ| delta 20 ZcmZo*X<(T!g|mSH1ZGb%+&DLx5db<)1;zjX From 7500c932c7210168610e6ee8ff136f9fb0329a04 Mon Sep 17 00:00:00 2001 From: Joe Tsai Date: Tue, 3 Nov 2015 18:12:31 -0800 Subject: [PATCH 72/95] archive/tar: properly handle header-only "files" in Reader Certain special type-flags, specifically 1, 2, 3, 4, 5, 6, do not have a data section. Thus, regardless of what the size field says, we should not attempt to read any data for these special types. The relevant PAX and USTAR specification says: <<< If the typeflag field is set to specify a file to be of type 1 (a link) or 2 (a symbolic link), the size field shall be specified as zero. If the typeflag field is set to specify a file of type 5 (directory), the size field shall be interpreted as described under the definition of that record type. No data logical records are stored for types 1, 2, or 5. If the typeflag field is set to 3 (character special file), 4 (block special file), or 6 (FIFO), the meaning of the size field is unspecified by this volume of POSIX.1-2008, and no data logical records shall be stored on the medium. Additionally, for type 6, the size field shall be ignored when reading. If the typeflag field is set to any other value, the number of logical records written following the header shall be (size+511)/512, ignoring any fraction in the result of the division. >>> Contrary to the specification, we do not assert that the size field is zero for type 1 and 2 since we liberally accept non-conforming formats. Change-Id: I666b601597cb9d7a50caa081813d90ca9cfc52ed Reviewed-on: https://go-review.googlesource.com/16614 Reviewed-by: Brad Fitzpatrick Run-TryBot: Brad Fitzpatrick TryBot-Result: Gobot Gobot --- archive/tar/reader.go | 118 +++++++++++++++---------------------- archive/tar/reader_test.go | 99 +++++++++++++++++++++++-------- 2 files changed, 122 insertions(+), 95 deletions(-) diff --git a/archive/tar/reader.go b/archive/tar/reader.go index 6360b4e..6948471 100644 --- a/archive/tar/reader.go +++ b/archive/tar/reader.go @@ -769,97 +769,77 @@ func (tr *Reader) readOldGNUSparseMap(header []byte) []sparseEntry { return sp } -// readGNUSparseMap1x0 reads the sparse map as stored in GNU's PAX sparse format version 1.0. -// The sparse map is stored just before the file data and padded out to the nearest block boundary. +// readGNUSparseMap1x0 reads the sparse map as stored in GNU's PAX sparse format +// version 1.0. The format of the sparse map consists of a series of +// newline-terminated numeric fields. The first field is the number of entries +// and is always present. Following this are the entries, consisting of two +// fields (offset, numBytes). This function must stop reading at the end +// boundary of the block containing the last newline. +// +// Note that the GNU manual says that numeric values should be encoded in octal +// format. However, the GNU tar utility itself outputs these values in decimal. +// As such, this library treats values as being encoded in decimal. func readGNUSparseMap1x0(r io.Reader) ([]sparseEntry, error) { - buf := make([]byte, 2*blockSize) - sparseHeader := buf[:blockSize] + var cntNewline int64 + var buf bytes.Buffer + var blk = make([]byte, blockSize) - // readDecimal is a helper function to read a decimal integer from the sparse map - // while making sure to read from the file in blocks of size blockSize - readDecimal := func() (int64, error) { - // Look for newline - nl := bytes.IndexByte(sparseHeader, '\n') - if nl == -1 { - if len(sparseHeader) >= blockSize { - // This is an error - return 0, ErrHeader + // feedTokens copies data in numBlock chunks from r into buf until there are + // at least cnt newlines in buf. It will not read more blocks than needed. + var feedTokens = func(cnt int64) error { + for cntNewline < cnt { + if _, err := io.ReadFull(r, blk); err != nil { + if err == io.EOF { + err = io.ErrUnexpectedEOF + } + return err } - oldLen := len(sparseHeader) - newLen := oldLen + blockSize - if cap(sparseHeader) < newLen { - // There's more header, but we need to make room for the next block - copy(buf, sparseHeader) - sparseHeader = buf[:newLen] - } else { - // There's more header, and we can just reslice - sparseHeader = sparseHeader[:newLen] - } - - // Now that sparseHeader is large enough, read next block - if _, err := io.ReadFull(r, sparseHeader[oldLen:newLen]); err != nil { - return 0, err - } - // leaving this function for io.Reader makes it more testable - if tr, ok := r.(*Reader); ok && tr.RawAccounting { - if _, err := tr.rawBytes.Write(sparseHeader[oldLen:newLen]); err != nil { - return 0, err + buf.Write(blk) + for _, c := range blk { + if c == '\n' { + cntNewline++ } } - - // Look for a newline in the new data - nl = bytes.IndexByte(sparseHeader[oldLen:newLen], '\n') - if nl == -1 { - // This is an error - return 0, ErrHeader - } - nl += oldLen // We want the position from the beginning } - // Now that we've found a newline, read a number - n, err := strconv.ParseInt(string(sparseHeader[:nl]), 10, 0) - if err != nil { - return 0, ErrHeader - } - - // Update sparseHeader to consume this number - sparseHeader = sparseHeader[nl+1:] - return n, nil + return nil } - // Read the first block - if _, err := io.ReadFull(r, sparseHeader); err != nil { + // nextToken gets the next token delimited by a newline. This assumes that + // at least one newline exists in the buffer. + var nextToken = func() string { + cntNewline-- + tok, _ := buf.ReadString('\n') + return tok[:len(tok)-1] // Cut off newline + } + + // Parse for the number of entries. + // Use integer overflow resistant math to check this. + if err := feedTokens(1); err != nil { return nil, err } - // leaving this function for io.Reader makes it more testable - if tr, ok := r.(*Reader); ok && tr.RawAccounting { - if _, err := tr.rawBytes.Write(sparseHeader); err != nil { - return nil, err - } + numEntries, err := strconv.ParseInt(nextToken(), 10, 0) // Intentionally parse as native int + if err != nil || numEntries < 0 || int(2*numEntries) < int(numEntries) { + return nil, ErrHeader } - // The first line contains the number of entries - numEntries, err := readDecimal() - if err != nil { + // Parse for all member entries. + // numEntries is trusted after this since a potential attacker must have + // committed resources proportional to what this library used. + if err := feedTokens(2 * numEntries); err != nil { return nil, err } - - // Read all the entries sp := make([]sparseEntry, 0, numEntries) for i := int64(0); i < numEntries; i++ { - // Read the offset - offset, err := readDecimal() + offset, err := strconv.ParseInt(nextToken(), 10, 64) if err != nil { - return nil, err + return nil, ErrHeader } - // Read numBytes - numBytes, err := readDecimal() + numBytes, err := strconv.ParseInt(nextToken(), 10, 64) if err != nil { - return nil, err + return nil, ErrHeader } - sp = append(sp, sparseEntry{offset: offset, numBytes: numBytes}) } - return sp, nil } diff --git a/archive/tar/reader_test.go b/archive/tar/reader_test.go index 3c98f4d..5166403 100644 --- a/archive/tar/reader_test.go +++ b/archive/tar/reader_test.go @@ -719,35 +719,82 @@ func TestReadGNUSparseMap0x1(t *testing.T) { } func TestReadGNUSparseMap1x0(t *testing.T) { - // This test uses lots of holes so the sparse header takes up more than two blocks - numEntries := 100 - expected := make([]sparseEntry, 0, numEntries) - sparseMap := new(bytes.Buffer) - - fmt.Fprintf(sparseMap, "%d\n", numEntries) - for i := 0; i < numEntries; i++ { - offset := int64(2048 * i) - numBytes := int64(1024) - expected = append(expected, sparseEntry{offset: offset, numBytes: numBytes}) - fmt.Fprintf(sparseMap, "%d\n%d\n", offset, numBytes) + var sp = []sparseEntry{{1, 2}, {3, 4}} + for i := 0; i < 98; i++ { + sp = append(sp, sparseEntry{54321, 12345}) } - // Make the header the smallest multiple of blockSize that fits the sparseMap - headerBlocks := (sparseMap.Len() + blockSize - 1) / blockSize - bufLen := blockSize * headerBlocks - buf := make([]byte, bufLen) - copy(buf, sparseMap.Bytes()) + var vectors = []struct { + input string // Input data + sparseMap []sparseEntry // Expected sparse entries to be outputted + cnt int // Expected number of bytes read + err error // Expected errors that may be raised + }{{ + input: "", + cnt: 0, + err: io.ErrUnexpectedEOF, + }, { + input: "ab", + cnt: 2, + err: io.ErrUnexpectedEOF, + }, { + input: strings.Repeat("\x00", 512), + cnt: 512, + err: io.ErrUnexpectedEOF, + }, { + input: strings.Repeat("\x00", 511) + "\n", + cnt: 512, + err: ErrHeader, + }, { + input: strings.Repeat("\n", 512), + cnt: 512, + err: ErrHeader, + }, { + input: "0\n" + strings.Repeat("\x00", 510) + strings.Repeat("a", 512), + sparseMap: []sparseEntry{}, + cnt: 512, + }, { + input: strings.Repeat("0", 512) + "0\n" + strings.Repeat("\x00", 510), + sparseMap: []sparseEntry{}, + cnt: 1024, + }, { + input: strings.Repeat("0", 1024) + "1\n2\n3\n" + strings.Repeat("\x00", 506), + sparseMap: []sparseEntry{{2, 3}}, + cnt: 1536, + }, { + input: strings.Repeat("0", 1024) + "1\n2\n\n" + strings.Repeat("\x00", 509), + cnt: 1536, + err: ErrHeader, + }, { + input: strings.Repeat("0", 1024) + "1\n2\n" + strings.Repeat("\x00", 508), + cnt: 1536, + err: io.ErrUnexpectedEOF, + }, { + input: "-1\n2\n\n" + strings.Repeat("\x00", 506), + cnt: 512, + err: ErrHeader, + }, { + input: "1\nk\n2\n" + strings.Repeat("\x00", 506), + cnt: 512, + err: ErrHeader, + }, { + input: "100\n1\n2\n3\n4\n" + strings.Repeat("54321\n0000000000000012345\n", 98) + strings.Repeat("\x00", 512), + cnt: 2560, + sparseMap: sp, + }} - // Get an reader to read the sparse map - r := bytes.NewReader(buf) - - // Read the sparse map - sp, err := readGNUSparseMap1x0(r) - if err != nil { - t.Errorf("Unexpected error: %v", err) - } - if !reflect.DeepEqual(sp, expected) { - t.Errorf("Incorrect sparse map: got %v, wanted %v", sp, expected) + for i, v := range vectors { + r := strings.NewReader(v.input) + sp, err := readGNUSparseMap1x0(r) + if !reflect.DeepEqual(sp, v.sparseMap) && !(len(sp) == 0 && len(v.sparseMap) == 0) { + t.Errorf("test %d, readGNUSparseMap1x0(...): got %v, want %v", i, sp, v.sparseMap) + } + if numBytes := len(v.input) - r.Len(); numBytes != v.cnt { + t.Errorf("test %d, bytes read: got %v, want %v", i, numBytes, v.cnt) + } + if err != v.err { + t.Errorf("test %d, unexpected error: got %v, want %v", i, err, v.err) + } } } From b598ba3ee75317907dec365b25d0ba2b6f3d32fe Mon Sep 17 00:00:00 2001 From: Joe Tsai Date: Thu, 1 Oct 2015 01:35:15 -0700 Subject: [PATCH 73/95] archive/tar: fix issues with readGNUSparseMap1x0 Motivations: * Use of strconv.ParseInt does not properly treat integers as 64bit, preventing this function from working properly on 32bit machines. * Use of io.ReadFull does not properly detect truncated streams when the file suddenly ends on a block boundary. * The function blindly trusts user input for numEntries and allocates memory accordingly. * The function does not validate that numEntries is not negative, allowing a malicious sparse file to cause a panic during make. In general, this function was overly complicated for what it was accomplishing and it was hard to reason that it was free from bounds errors. Instead, it has been rewritten and relies on bytes.Buffer.ReadString to do the main work. So long as invariants about the number of '\n' in the buffer are maintained, it is much easier to see why this approach is correct. Change-Id: Ibb12c4126c26e0ea460ea063cd17af68e3cf609e Reviewed-on: https://go-review.googlesource.com/15174 Reviewed-by: Russ Cox Run-TryBot: Brad Fitzpatrick TryBot-Result: Gobot Gobot --- archive/tar/reader.go | 160 +++++++++++++++++++++++-------------- archive/tar/reader_test.go | 56 ++++++++++++- archive/tar/writer.go | 133 ++++++++++++++++-------------- archive/tar/writer_test.go | 48 ++++++----- 4 files changed, 254 insertions(+), 143 deletions(-) diff --git a/archive/tar/reader.go b/archive/tar/reader.go index 6948471..02df550 100644 --- a/archive/tar/reader.go +++ b/archive/tar/reader.go @@ -40,6 +40,10 @@ type Reader struct { rawBytes *bytes.Buffer // last raw bits } +type parser struct { + err error // Last error seen +} + // RawBytes accesses the raw bytes of the archive, apart from the file payload itself. // This includes the header and padding. // @@ -134,6 +138,7 @@ func NewReader(r io.Reader) *Reader { return &Reader{r: r} } // // io.EOF is returned at the end of the input. func (tr *Reader) Next() (*Header, error) { + var p parser var hdr *Header if tr.RawAccounting { if tr.rawBytes == nil { @@ -216,8 +221,11 @@ func (tr *Reader) Next() (*Header, error) { return nil, err } } - hdr.Name = cString(realname) - return hdr, err + hdr.Name = p.parseString(realname) + if p.err != nil { + return nil, p.err + } + return hdr, nil case TypeGNULongLink: // We have a GNU long link header. realname, err := ioutil.ReadAll(tr) @@ -240,8 +248,11 @@ func (tr *Reader) Next() (*Header, error) { return nil, err } } - hdr.Linkname = cString(realname) - return hdr, err + hdr.Name = p.parseString(realname) + if p.err != nil { + return nil, p.err + } + return hdr, nil } return hdr, tr.err } @@ -420,6 +431,7 @@ func parsePAX(r io.Reader) (map[string]string, error) { return nil, err } } + sbuf := string(buf) // For GNU PAX sparse format 0.0 support. // This function transforms the sparse format 0.0 headers into sparse format 0.1 headers. @@ -428,35 +440,17 @@ func parsePAX(r io.Reader) (map[string]string, error) { headers := make(map[string]string) // Each record is constructed as // "%d %s=%s\n", length, keyword, value - for len(buf) > 0 { - // or the header was empty to start with. - var sp int - // The size field ends at the first space. - sp = bytes.IndexByte(buf, ' ') - if sp == -1 { + for len(sbuf) > 0 { + key, value, residual, err := parsePAXRecord(sbuf) + if err != nil { return nil, ErrHeader } - // Parse the first token as a decimal integer. - n, err := strconv.ParseInt(string(buf[:sp]), 10, 0) - if err != nil || n < 5 || int64(len(buf)) < n { - return nil, ErrHeader - } - // Extract everything between the decimal and the n -1 on the - // beginning to eat the ' ', -1 on the end to skip the newline. - var record []byte - record, buf = buf[sp+1:n-1], buf[n:] - // The first equals is guaranteed to mark the end of the key. - // Everything else is value. - eq := bytes.IndexByte(record, '=') - if eq == -1 { - return nil, ErrHeader - } - key, value := record[:eq], record[eq+1:] + sbuf = residual keyStr := string(key) if keyStr == paxGNUSparseOffset || keyStr == paxGNUSparseNumBytes { // GNU sparse format 0.0 special key. Write to sparseMap instead of using the headers map. - sparseMap.Write(value) + sparseMap.WriteString(value) sparseMap.Write([]byte{','}) } else { // Normal key. Set the value in the headers map. @@ -471,9 +465,42 @@ func parsePAX(r io.Reader) (map[string]string, error) { return headers, nil } -// cString parses bytes as a NUL-terminated C-style string. +// parsePAXRecord parses the input PAX record string into a key-value pair. +// If parsing is successful, it will slice off the currently read record and +// return the remainder as r. +// +// A PAX record is of the following form: +// "%d %s=%s\n" % (size, key, value) +func parsePAXRecord(s string) (k, v, r string, err error) { + // The size field ends at the first space. + sp := strings.IndexByte(s, ' ') + if sp == -1 { + return "", "", s, ErrHeader + } + + // Parse the first token as a decimal integer. + n, perr := strconv.ParseInt(s[:sp], 10, 0) // Intentionally parse as native int + if perr != nil || n < 5 || int64(len(s)) < n { + return "", "", s, ErrHeader + } + + // Extract everything between the space and the final newline. + rec, nl, rem := s[sp+1:n-1], s[n-1:n], s[n:] + if nl != "\n" { + return "", "", s, ErrHeader + } + + // The first equals separates the key from the value. + eq := strings.IndexByte(rec, '=') + if eq == -1 { + return "", "", s, ErrHeader + } + return rec[:eq], rec[eq+1:], rem, nil +} + +// parseString parses bytes as a NUL-terminated C-style string. // If a NUL byte is not found then the whole slice is returned as a string. -func cString(b []byte) string { +func (*parser) parseString(b []byte) string { n := 0 for n < len(b) && b[n] != 0 { n++ @@ -481,7 +508,7 @@ func cString(b []byte) string { return string(b[0:n]) } -func (tr *Reader) octal(b []byte) int64 { +func (p *parser) parseNumeric(b []byte) int64 { // Check for binary format first. if len(b) > 0 && b[0]&0x80 != 0 { var x int64 @@ -494,6 +521,10 @@ func (tr *Reader) octal(b []byte) int64 { return x } + return p.parseOctal(b) +} + +func (p *parser) parseOctal(b []byte) int64 { // Because unused fields are filled with NULs, we need // to skip leading NULs. Fields may also be padded with // spaces or NULs. @@ -504,9 +535,9 @@ func (tr *Reader) octal(b []byte) int64 { if len(b) == 0 { return 0 } - x, err := strconv.ParseUint(cString(b), 8, 64) - if err != nil { - tr.err = err + x, perr := strconv.ParseUint(p.parseString(b), 8, 64) + if perr != nil { + p.err = ErrHeader } return int64(x) } @@ -560,9 +591,10 @@ func (tr *Reader) verifyChecksum(header []byte) bool { return false } - given := tr.octal(header[148:156]) + var p parser + given := p.parseOctal(header[148:156]) unsigned, signed := checksum(header) - return given == unsigned || given == signed + return p.err == nil && (given == unsigned || given == signed) } // readHeader reads the next block header and assumes that the underlying reader @@ -621,18 +653,19 @@ func (tr *Reader) readHeader() *Header { } // Unpack + var p parser hdr := new(Header) s := slicer(header) - hdr.Name = cString(s.next(100)) - hdr.Mode = tr.octal(s.next(8)) - hdr.Uid = int(tr.octal(s.next(8))) - hdr.Gid = int(tr.octal(s.next(8))) - hdr.Size = tr.octal(s.next(12)) - hdr.ModTime = time.Unix(tr.octal(s.next(12)), 0) + hdr.Name = p.parseString(s.next(100)) + hdr.Mode = p.parseNumeric(s.next(8)) + hdr.Uid = int(p.parseNumeric(s.next(8))) + hdr.Gid = int(p.parseNumeric(s.next(8))) + hdr.Size = p.parseNumeric(s.next(12)) + hdr.ModTime = time.Unix(p.parseNumeric(s.next(12)), 0) s.next(8) // chksum hdr.Typeflag = s.next(1)[0] - hdr.Linkname = cString(s.next(100)) + hdr.Linkname = p.parseString(s.next(100)) // The remainder of the header depends on the value of magic. // The original (v7) version of tar had no explicit magic field, @@ -652,30 +685,30 @@ func (tr *Reader) readHeader() *Header { switch format { case "posix", "gnu", "star": - hdr.Uname = cString(s.next(32)) - hdr.Gname = cString(s.next(32)) + hdr.Uname = p.parseString(s.next(32)) + hdr.Gname = p.parseString(s.next(32)) devmajor := s.next(8) devminor := s.next(8) if hdr.Typeflag == TypeChar || hdr.Typeflag == TypeBlock { - hdr.Devmajor = tr.octal(devmajor) - hdr.Devminor = tr.octal(devminor) + hdr.Devmajor = p.parseNumeric(devmajor) + hdr.Devminor = p.parseNumeric(devminor) } var prefix string switch format { case "posix", "gnu": - prefix = cString(s.next(155)) + prefix = p.parseString(s.next(155)) case "star": - prefix = cString(s.next(131)) - hdr.AccessTime = time.Unix(tr.octal(s.next(12)), 0) - hdr.ChangeTime = time.Unix(tr.octal(s.next(12)), 0) + prefix = p.parseString(s.next(131)) + hdr.AccessTime = time.Unix(p.parseNumeric(s.next(12)), 0) + hdr.ChangeTime = time.Unix(p.parseNumeric(s.next(12)), 0) } if len(prefix) > 0 { hdr.Name = prefix + "/" + hdr.Name } } - if tr.err != nil { - tr.err = ErrHeader + if p.err != nil { + tr.err = p.err return nil } @@ -695,7 +728,11 @@ func (tr *Reader) readHeader() *Header { // Check for old GNU sparse format entry. if hdr.Typeflag == TypeGNUSparse { // Get the real size of the file. - hdr.Size = tr.octal(header[483:495]) + hdr.Size = p.parseNumeric(header[483:495]) + if p.err != nil { + tr.err = p.err + return nil + } // Read the sparse map. sp := tr.readOldGNUSparseMap(header) @@ -717,6 +754,7 @@ func (tr *Reader) readHeader() *Header { // The sparse map is stored in the tar header if it's small enough. If it's larger than four entries, // then one or more extension headers are used to store the rest of the sparse map. func (tr *Reader) readOldGNUSparseMap(header []byte) []sparseEntry { + var p parser isExtended := header[oldGNUSparseMainHeaderIsExtendedOffset] != 0 spCap := oldGNUSparseMainHeaderNumEntries if isExtended { @@ -727,10 +765,10 @@ func (tr *Reader) readOldGNUSparseMap(header []byte) []sparseEntry { // Read the four entries from the main tar header for i := 0; i < oldGNUSparseMainHeaderNumEntries; i++ { - offset := tr.octal(s.next(oldGNUSparseOffsetSize)) - numBytes := tr.octal(s.next(oldGNUSparseNumBytesSize)) - if tr.err != nil { - tr.err = ErrHeader + offset := p.parseNumeric(s.next(oldGNUSparseOffsetSize)) + numBytes := p.parseNumeric(s.next(oldGNUSparseNumBytesSize)) + if p.err != nil { + tr.err = p.err return nil } if offset == 0 && numBytes == 0 { @@ -754,10 +792,10 @@ func (tr *Reader) readOldGNUSparseMap(header []byte) []sparseEntry { isExtended = sparseHeader[oldGNUSparseExtendedHeaderIsExtendedOffset] != 0 s = slicer(sparseHeader) for i := 0; i < oldGNUSparseExtendedHeaderNumEntries; i++ { - offset := tr.octal(s.next(oldGNUSparseOffsetSize)) - numBytes := tr.octal(s.next(oldGNUSparseNumBytesSize)) - if tr.err != nil { - tr.err = ErrHeader + offset := p.parseNumeric(s.next(oldGNUSparseOffsetSize)) + numBytes := p.parseNumeric(s.next(oldGNUSparseNumBytesSize)) + if p.err != nil { + tr.err = p.err return nil } if offset == 0 && numBytes == 0 { diff --git a/archive/tar/reader_test.go b/archive/tar/reader_test.go index 5166403..f0dbd94 100644 --- a/archive/tar/reader_test.go +++ b/archive/tar/reader_test.go @@ -298,9 +298,7 @@ var untarTests = []*untarTest{ }, { file: "testdata/issue11169.tar", - // TODO(dsnet): Currently the library does not detect that this file is - // malformed. Instead it incorrectly believes that file just ends. - // err: ErrHeader, + err: ErrHeader, }, } @@ -983,3 +981,55 @@ func TestReadHeaderOnly(t *testing.T) { } } } + +func TestParsePAXRecord(t *testing.T) { + var medName = strings.Repeat("CD", 50) + var longName = strings.Repeat("AB", 100) + + var vectors = []struct { + input string + residual string + outputKey string + outputVal string + ok bool + }{ + {"6 k=v\n\n", "\n", "k", "v", true}, + {"19 path=/etc/hosts\n", "", "path", "/etc/hosts", true}, + {"210 path=" + longName + "\nabc", "abc", "path", longName, true}, + {"110 path=" + medName + "\n", "", "path", medName, true}, + {"9 foo=ba\n", "", "foo", "ba", true}, + {"11 foo=bar\n\x00", "\x00", "foo", "bar", true}, + {"18 foo=b=\nar=\n==\x00\n", "", "foo", "b=\nar=\n==\x00", true}, + {"27 foo=hello9 foo=ba\nworld\n", "", "foo", "hello9 foo=ba\nworld", true}, + {"27 ☺☻☹=日a本b語ç\nmeow mix", "meow mix", "☺☻☹", "日a本b語ç", true}, + {"17 \x00hello=\x00world\n", "", "\x00hello", "\x00world", true}, + {"1 k=1\n", "1 k=1\n", "", "", false}, + {"6 k~1\n", "6 k~1\n", "", "", false}, + {"6_k=1\n", "6_k=1\n", "", "", false}, + {"6 k=1 ", "6 k=1 ", "", "", false}, + {"632 k=1\n", "632 k=1\n", "", "", false}, + {"16 longkeyname=hahaha\n", "16 longkeyname=hahaha\n", "", "", false}, + {"3 somelongkey=\n", "3 somelongkey=\n", "", "", false}, + {"50 tooshort=\n", "50 tooshort=\n", "", "", false}, + } + + for _, v := range vectors { + key, val, res, err := parsePAXRecord(v.input) + ok := (err == nil) + if v.ok != ok { + if v.ok { + t.Errorf("parsePAXRecord(%q): got parsing failure, want success", v.input) + } else { + t.Errorf("parsePAXRecord(%q): got parsing success, want failure", v.input) + } + } + if ok && (key != v.outputKey || val != v.outputVal) { + t.Errorf("parsePAXRecord(%q): got (%q: %q), want (%q: %q)", + v.input, key, val, v.outputKey, v.outputVal) + } + if res != v.residual { + t.Errorf("parsePAXRecord(%q): got residual %q, want residual %q", + v.input, res, v.residual) + } + } +} diff --git a/archive/tar/writer.go b/archive/tar/writer.go index 0165b22..688455d 100644 --- a/archive/tar/writer.go +++ b/archive/tar/writer.go @@ -42,6 +42,10 @@ type Writer struct { paxHdrBuff [blockSize]byte // buffer to use in writeHeader when writing a pax header } +type formatter struct { + err error // Last error seen +} + // NewWriter creates a new Writer writing to w. func NewWriter(w io.Writer) *Writer { return &Writer{w: w} } @@ -68,17 +72,9 @@ func (tw *Writer) Flush() error { } // Write s into b, terminating it with a NUL if there is room. -// If the value is too long for the field and allowPax is true add a paxheader record instead -func (tw *Writer) cString(b []byte, s string, allowPax bool, paxKeyword string, paxHeaders map[string]string) { - needsPaxHeader := allowPax && len(s) > len(b) || !isASCII(s) - if needsPaxHeader { - paxHeaders[paxKeyword] = s - return - } +func (f *formatter) formatString(b []byte, s string) { if len(s) > len(b) { - if tw.err == nil { - tw.err = ErrFieldTooLong - } + f.err = ErrFieldTooLong return } ascii := toASCII(s) @@ -89,35 +85,17 @@ func (tw *Writer) cString(b []byte, s string, allowPax bool, paxKeyword string, } // Encode x as an octal ASCII string and write it into b with leading zeros. -func (tw *Writer) octal(b []byte, x int64) { +func (f *formatter) formatOctal(b []byte, x int64) { s := strconv.FormatInt(x, 8) // leading zeros, but leave room for a NUL. for len(s)+1 < len(b) { s = "0" + s } - tw.cString(b, s, false, paxNone, nil) + f.formatString(b, s) } -// Write x into b, either as octal or as binary (GNUtar/star extension). -// If the value is too long for the field and writingPax is enabled both for the field and the add a paxheader record instead -func (tw *Writer) numeric(b []byte, x int64, allowPax bool, paxKeyword string, paxHeaders map[string]string) { - // Try octal first. - s := strconv.FormatInt(x, 8) - if len(s) < len(b) { - tw.octal(b, x) - return - } - - // If it is too long for octal, and pax is preferred, use a pax header - if allowPax && tw.preferPax { - tw.octal(b, 0) - s := strconv.FormatInt(x, 10) - paxHeaders[paxKeyword] = s - return - } - - // Too big: use binary (big-endian). - tw.usedBinary = true +// Write x into b, as binary (GNUtar/star extension). +func (f *formatter) formatNumeric(b []byte, x int64) { for i := len(b) - 1; x > 0 && i >= 0; i-- { b[i] = byte(x) x >>= 8 @@ -161,6 +139,7 @@ func (tw *Writer) writeHeader(hdr *Header, allowPax bool) error { // subsecond time resolution, but for now let's just capture // too long fields or non ascii characters + var f formatter var header []byte // We need to select which scratch buffer to use carefully, @@ -175,10 +154,40 @@ func (tw *Writer) writeHeader(hdr *Header, allowPax bool) error { copy(header, zeroBlock) s := slicer(header) + // Wrappers around formatter that automatically sets paxHeaders if the + // argument extends beyond the capacity of the input byte slice. + var formatString = func(b []byte, s string, paxKeyword string) { + needsPaxHeader := paxKeyword != paxNone && len(s) > len(b) || !isASCII(s) + if needsPaxHeader { + paxHeaders[paxKeyword] = s + return + } + f.formatString(b, s) + } + var formatNumeric = func(b []byte, x int64, paxKeyword string) { + // Try octal first. + s := strconv.FormatInt(x, 8) + if len(s) < len(b) { + f.formatOctal(b, x) + return + } + + // If it is too long for octal, and PAX is preferred, use a PAX header. + if paxKeyword != paxNone && tw.preferPax { + f.formatOctal(b, 0) + s := strconv.FormatInt(x, 10) + paxHeaders[paxKeyword] = s + return + } + + tw.usedBinary = true + f.formatNumeric(b, x) + } + // keep a reference to the filename to allow to overwrite it later if we detect that we can use ustar longnames instead of pax pathHeaderBytes := s.next(fileNameSize) - tw.cString(pathHeaderBytes, hdr.Name, true, paxPath, paxHeaders) + formatString(pathHeaderBytes, hdr.Name, paxPath) // Handle out of range ModTime carefully. var modTime int64 @@ -186,25 +195,25 @@ func (tw *Writer) writeHeader(hdr *Header, allowPax bool) error { modTime = hdr.ModTime.Unix() } - tw.octal(s.next(8), hdr.Mode) // 100:108 - tw.numeric(s.next(8), int64(hdr.Uid), true, paxUid, paxHeaders) // 108:116 - tw.numeric(s.next(8), int64(hdr.Gid), true, paxGid, paxHeaders) // 116:124 - tw.numeric(s.next(12), hdr.Size, true, paxSize, paxHeaders) // 124:136 - tw.numeric(s.next(12), modTime, false, paxNone, nil) // 136:148 --- consider using pax for finer granularity - s.next(8) // chksum (148:156) - s.next(1)[0] = hdr.Typeflag // 156:157 + f.formatOctal(s.next(8), hdr.Mode) // 100:108 + formatNumeric(s.next(8), int64(hdr.Uid), paxUid) // 108:116 + formatNumeric(s.next(8), int64(hdr.Gid), paxGid) // 116:124 + formatNumeric(s.next(12), hdr.Size, paxSize) // 124:136 + formatNumeric(s.next(12), modTime, paxNone) // 136:148 --- consider using pax for finer granularity + s.next(8) // chksum (148:156) + s.next(1)[0] = hdr.Typeflag // 156:157 - tw.cString(s.next(100), hdr.Linkname, true, paxLinkpath, paxHeaders) + formatString(s.next(100), hdr.Linkname, paxLinkpath) - copy(s.next(8), []byte("ustar\x0000")) // 257:265 - tw.cString(s.next(32), hdr.Uname, true, paxUname, paxHeaders) // 265:297 - tw.cString(s.next(32), hdr.Gname, true, paxGname, paxHeaders) // 297:329 - tw.numeric(s.next(8), hdr.Devmajor, false, paxNone, nil) // 329:337 - tw.numeric(s.next(8), hdr.Devminor, false, paxNone, nil) // 337:345 + copy(s.next(8), []byte("ustar\x0000")) // 257:265 + formatString(s.next(32), hdr.Uname, paxUname) // 265:297 + formatString(s.next(32), hdr.Gname, paxGname) // 297:329 + formatNumeric(s.next(8), hdr.Devmajor, paxNone) // 329:337 + formatNumeric(s.next(8), hdr.Devminor, paxNone) // 337:345 // keep a reference to the prefix to allow to overwrite it later if we detect that we can use ustar longnames instead of pax prefixHeaderBytes := s.next(155) - tw.cString(prefixHeaderBytes, "", false, paxNone, nil) // 345:500 prefix + formatString(prefixHeaderBytes, "", paxNone) // 345:500 prefix // Use the GNU magic instead of POSIX magic if we used any GNU extensions. if tw.usedBinary { @@ -220,19 +229,20 @@ func (tw *Writer) writeHeader(hdr *Header, allowPax bool) error { delete(paxHeaders, paxPath) // Update the path fields - tw.cString(pathHeaderBytes, suffix, false, paxNone, nil) - tw.cString(prefixHeaderBytes, prefix, false, paxNone, nil) + formatString(pathHeaderBytes, suffix, paxNone) + formatString(prefixHeaderBytes, prefix, paxNone) } } // The chksum field is terminated by a NUL and a space. // This is different from the other octal fields. chksum, _ := checksum(header) - tw.octal(header[148:155], chksum) + f.formatOctal(header[148:155], chksum) // Never fails header[155] = ' ' - if tw.err != nil { - // problem with header; probably integer too big for a field. + // Check if there were any formatting errors. + if f.err != nil { + tw.err = f.err return tw.err } @@ -310,7 +320,7 @@ func (tw *Writer) writePAXHeader(hdr *Header, paxHeaders map[string]string) erro sort.Strings(keys) for _, k := range keys { - fmt.Fprint(&buf, paxHeader(k+"="+paxHeaders[k])) + fmt.Fprint(&buf, formatPAXRecord(k, paxHeaders[k])) } ext.Size = int64(len(buf.Bytes())) @@ -326,17 +336,18 @@ func (tw *Writer) writePAXHeader(hdr *Header, paxHeaders map[string]string) erro return nil } -// paxHeader formats a single pax record, prefixing it with the appropriate length -func paxHeader(msg string) string { - const padding = 2 // Extra padding for space and newline - size := len(msg) + padding +// formatPAXRecord formats a single PAX record, prefixing it with the +// appropriate length. +func formatPAXRecord(k, v string) string { + const padding = 3 // Extra padding for ' ', '=', and '\n' + size := len(k) + len(v) + padding size += len(strconv.Itoa(size)) - record := fmt.Sprintf("%d %s\n", size, msg) + record := fmt.Sprintf("%d %s=%s\n", size, k, v) + + // Final adjustment if adding size field increased the record size. if len(record) != size { - // Final adjustment if adding size increased - // the number of digits in size size = len(record) - record = fmt.Sprintf("%d %s\n", size, msg) + record = fmt.Sprintf("%d %s=%s\n", size, k, v) } return record } diff --git a/archive/tar/writer_test.go b/archive/tar/writer_test.go index 25d88dc..69a44a6 100644 --- a/archive/tar/writer_test.go +++ b/archive/tar/writer_test.go @@ -486,24 +486,6 @@ func TestPaxHeadersSorted(t *testing.T) { } } -func TestPAXHeader(t *testing.T) { - medName := strings.Repeat("CD", 50) - longName := strings.Repeat("AB", 100) - paxTests := [][2]string{ - {paxPath + "=/etc/hosts", "19 path=/etc/hosts\n"}, - {"a=b", "6 a=b\n"}, // Single digit length - {"a=names", "11 a=names\n"}, // Test case involving carries - {paxPath + "=" + longName, fmt.Sprintf("210 path=%s\n", longName)}, - {paxPath + "=" + medName, fmt.Sprintf("110 path=%s\n", medName)}} - - for _, test := range paxTests { - key, expected := test[0], test[1] - if result := paxHeader(key); result != expected { - t.Fatalf("paxHeader: got %s, expected %s", result, expected) - } - } -} - func TestUSTARLongName(t *testing.T) { // Create an archive with a path that failed to split with USTAR extension in previous versions. fileinfo, err := os.Stat("testdata/small.txt") @@ -625,3 +607,33 @@ func TestSplitUSTARPath(t *testing.T) { } } } + +func TestFormatPAXRecord(t *testing.T) { + var medName = strings.Repeat("CD", 50) + var longName = strings.Repeat("AB", 100) + + var vectors = []struct { + inputKey string + inputVal string + output string + }{ + {"k", "v", "6 k=v\n"}, + {"path", "/etc/hosts", "19 path=/etc/hosts\n"}, + {"path", longName, "210 path=" + longName + "\n"}, + {"path", medName, "110 path=" + medName + "\n"}, + {"foo", "ba", "9 foo=ba\n"}, + {"foo", "bar", "11 foo=bar\n"}, + {"foo", "b=\nar=\n==\x00", "18 foo=b=\nar=\n==\x00\n"}, + {"foo", "hello9 foo=ba\nworld", "27 foo=hello9 foo=ba\nworld\n"}, + {"☺☻☹", "日a本b語ç", "27 ☺☻☹=日a本b語ç\n"}, + {"\x00hello", "\x00world", "17 \x00hello=\x00world\n"}, + } + + for _, v := range vectors { + output := formatPAXRecord(v.inputKey, v.inputVal) + if output != v.output { + t.Errorf("formatPAXRecord(%q, %q): got %q, want %q", + v.inputKey, v.inputVal, output, v.output) + } + } +} From 64935a5f0f25d74240cd2e7174a2a1aa7652a032 Mon Sep 17 00:00:00 2001 From: Joe Tsai Date: Mon, 28 Sep 2015 13:49:35 -0700 Subject: [PATCH 74/95] archive/tar: move parse/format methods to standalone receiver Motivations for this change: * It allows these functions to be used outside of Reader/Writer. * It allows these functions to be more easily unit tested. Change-Id: Iebe2b70bdb8744371c9ffa87c24316cbbf025b59 Reviewed-on: https://go-review.googlesource.com/15113 Reviewed-by: Russ Cox Run-TryBot: Joe Tsai TryBot-Result: Gobot Gobot Reviewed-by: Brad Fitzpatrick --- archive/tar/testdata/pax-path-hdr.tar | Bin 0 -> 1024 bytes archive/tar/testdata/ustar-file-reg.tar | Bin 0 -> 1536 bytes 2 files changed, 0 insertions(+), 0 deletions(-) create mode 100644 archive/tar/testdata/pax-path-hdr.tar create mode 100644 archive/tar/testdata/ustar-file-reg.tar diff --git a/archive/tar/testdata/pax-path-hdr.tar b/archive/tar/testdata/pax-path-hdr.tar new file mode 100644 index 0000000000000000000000000000000000000000..ab8fc325b26159f4fed6bfb59fe5f616d35fec74 GIT binary patch literal 1024 zcmXR&EXmL>$=5GRO-#v6r43~O0Sq{30|OI7m>ft6gMqP;fsrYLLIndIKxuJFViC}K xO07co9Hr*bNx!kNLIE%d*akR880v$Gocz3WU67b=USe)47oFTOYR$le004hRKE?n5 literal 0 HcmV?d00001 diff --git a/archive/tar/testdata/ustar-file-reg.tar b/archive/tar/testdata/ustar-file-reg.tar new file mode 100644 index 0000000000000000000000000000000000000000..c84fa27ffb8613d20f7ac9690cd59827b04028f6 GIT binary patch literal 1536 zcmdUuJ(Hq95QhCJmlMG7O>hA!%P7hKg3OW)iVh<3F#{;S{=CgM+^V}b=Pyosep=F7x+*OI&?Q6F7LxR?fb`B^0 zttmJo<+m$~$Msw9KQ_wfqfW1sDJ#zsWq25)8&TsKn~+7*oF6~$0+nD?L2C$TBQM>$ zcQqo7Eo8w%PL1H9D9KY4`f7Ku6*oaxOv|7S1ZpTT=%sbGS#L2%*Uxm5P}U`mG$%9I zIRuF>849Ugh}k{mmgLJGtca9XnA{r2KBJ`fRXOnPqEZjWtz4z5Mq_`uZWX)VuFVko z#$DNd>@Saj1w+|ef@dPC=g!5Kb4c+mQ$bcu#R_I=;>D)V&agmv_`vpStP-sQh!z(| z5{9v2$DGKS|DrLqZ8y7?ox@|a-R^30zMeK388O@+r+cK=9NdZ)ow#}n{kwf`tD4=t zR9Oz?v}2QNv&rDR7u$%4%_>%5(k#={r!rZ(5WA5+U_Rz+w6{_kV7C!KK2e+5VuS-5 zWLRhpI#>Iq%|mhyZxDfRHCi7gC+R&sD4|k;~pXMNUD{^NPC5(KX~`*cXkwAurz(+XVNg z1P`yNv%FZy)9`uTGCWUJ^@>e2&T3O`W@re{ZN S9gn%XW0CXwKYp`+7X1&(X!Xnh literal 0 HcmV?d00001 From be9ac88117e8f7c1666e9f3c241b03c505dc52f3 Mon Sep 17 00:00:00 2001 From: Joe Tsai Date: Wed, 16 Sep 2015 00:58:56 -0700 Subject: [PATCH 75/95] archive/tar: convert Reader.Next to be loop based Motivation for change: * Recursive logic is hard to follow, since it tends to apply things in reverse. On the other hand, the tar formats tend to describe meta headers as affecting the next entry. * Recursion also applies changes in the wrong order. Two test files are attached that use multiple headers. The previous Go behavior differs from what GNU and BSD tar do. Change-Id: Ic1557256fc1363c5cb26570e5d0b9f65a9e57341 Reviewed-on: https://go-review.googlesource.com/14624 Run-TryBot: Joe Tsai TryBot-Result: Gobot Gobot Reviewed-by: Brad Fitzpatrick --- archive/tar/reader.go | 160 ++++++++++-------------- archive/tar/reader_test.go | 24 ++++ archive/tar/testdata/gnu-multi-hdrs.tar | Bin 0 -> 4608 bytes archive/tar/testdata/pax-multi-hdrs.tar | Bin 0 -> 4608 bytes 4 files changed, 90 insertions(+), 94 deletions(-) create mode 100644 archive/tar/testdata/gnu-multi-hdrs.tar create mode 100644 archive/tar/testdata/pax-multi-hdrs.tar diff --git a/archive/tar/reader.go b/archive/tar/reader.go index 02df550..ba34ed7 100644 --- a/archive/tar/reader.go +++ b/archive/tar/reader.go @@ -138,8 +138,6 @@ func NewReader(r io.Reader) *Reader { return &Reader{r: r} } // // io.EOF is returned at the end of the input. func (tr *Reader) Next() (*Header, error) { - var p parser - var hdr *Header if tr.RawAccounting { if tr.rawBytes == nil { tr.rawBytes = bytes.NewBuffer(nil) @@ -147,114 +145,88 @@ func (tr *Reader) Next() (*Header, error) { tr.rawBytes.Reset() } } - if tr.err == nil { - tr.skipUnread() - } + if tr.err != nil { - return hdr, tr.err + return nil, tr.err } - hdr = tr.readHeader() - if hdr == nil { - return hdr, tr.err - } - // Check for PAX/GNU header. - switch hdr.Typeflag { - case TypeXHeader: - // PAX extended header - headers, err := parsePAX(tr) - if err != nil { - return nil, err - } - // We actually read the whole file, - // but this skips alignment padding - tr.skipUnread() + + var hdr *Header + var extHdrs map[string]string + + // Externally, Next iterates through the tar archive as if it is a series of + // files. Internally, the tar format often uses fake "files" to add meta + // data that describes the next file. These meta data "files" should not + // normally be visible to the outside. As such, this loop iterates through + // one or more "header files" until it finds a "normal file". +loop: + for { + tr.err = tr.skipUnread() if tr.err != nil { return nil, tr.err } + hdr = tr.readHeader() - if hdr == nil { + if tr.err != nil { return nil, tr.err } - mergePAX(hdr, headers) - - // Check for a PAX format sparse file - sp, err := tr.checkForGNUSparsePAXHeaders(hdr, headers) - if err != nil { - tr.err = err - return nil, err - } - if sp != nil { - // Sparse files do not make sense when applied to the special header - // types that never have a data section. - if isHeaderOnlyType(hdr.Typeflag) { - tr.err = ErrHeader - return nil, tr.err - } - - // Current file is a PAX format GNU sparse file. - // Set the current file reader to a sparse file reader. - tr.curr, tr.err = newSparseFileReader(tr.curr, sp, hdr.Size) + // Check for PAX/GNU special headers and files. + switch hdr.Typeflag { + case TypeXHeader: + extHdrs, tr.err = parsePAX(tr) if tr.err != nil { return nil, tr.err } - } - return hdr, nil - case TypeGNULongName: - // We have a GNU long name header. Its contents are the real file name. - realname, err := ioutil.ReadAll(tr) - if err != nil { - return nil, err - } - var buf []byte - if tr.RawAccounting { - if _, err = tr.rawBytes.Write(realname); err != nil { + continue loop // This is a meta header affecting the next header + case TypeGNULongName, TypeGNULongLink: + var realname []byte + realname, tr.err = ioutil.ReadAll(tr) + if tr.err != nil { + return nil, tr.err + } + + if tr.RawAccounting { + if _, tr.err = tr.rawBytes.Write(realname); tr.err != nil { + return nil, tr.err + } + } + + // Convert GNU extensions to use PAX headers. + if extHdrs == nil { + extHdrs = make(map[string]string) + } + var p parser + switch hdr.Typeflag { + case TypeGNULongName: + extHdrs[paxPath] = p.parseString(realname) + case TypeGNULongLink: + extHdrs[paxLinkpath] = p.parseString(realname) + } + if p.err != nil { + tr.err = p.err + return nil, tr.err + } + continue loop // This is a meta header affecting the next header + default: + mergePAX(hdr, extHdrs) + + // Check for a PAX format sparse file + sp, err := tr.checkForGNUSparsePAXHeaders(hdr, extHdrs) + if err != nil { + tr.err = err return nil, err } - buf = make([]byte, tr.rawBytes.Len()) - copy(buf[:], tr.RawBytes()) - } - hdr, err := tr.Next() - // since the above call to Next() resets the buffer, we need to throw the bytes over - if tr.RawAccounting { - buf = append(buf, tr.RawBytes()...) - if _, err = tr.rawBytes.Write(buf); err != nil { - return nil, err + if sp != nil { + // Current file is a PAX format GNU sparse file. + // Set the current file reader to a sparse file reader. + tr.curr, tr.err = newSparseFileReader(tr.curr, sp, hdr.Size) + if tr.err != nil { + return nil, tr.err + } } + break loop // This is a file, so stop } - hdr.Name = p.parseString(realname) - if p.err != nil { - return nil, p.err - } - return hdr, nil - case TypeGNULongLink: - // We have a GNU long link header. - realname, err := ioutil.ReadAll(tr) - if err != nil { - return nil, err - } - var buf []byte - if tr.RawAccounting { - if _, err = tr.rawBytes.Write(realname); err != nil { - return nil, err - } - buf = make([]byte, tr.rawBytes.Len()) - copy(buf[:], tr.RawBytes()) - } - hdr, err := tr.Next() - // since the above call to Next() resets the buffer, we need to throw the bytes over - if tr.RawAccounting { - buf = append(buf, tr.RawBytes()...) - if _, err = tr.rawBytes.Write(buf); err != nil { - return nil, err - } - } - hdr.Name = p.parseString(realname) - if p.err != nil { - return nil, p.err - } - return hdr, nil } - return hdr, tr.err + return hdr, nil } // checkForGNUSparsePAXHeaders checks the PAX headers for GNU sparse headers. If they are found, then diff --git a/archive/tar/reader_test.go b/archive/tar/reader_test.go index f0dbd94..861d1a5 100644 --- a/archive/tar/reader_test.go +++ b/archive/tar/reader_test.go @@ -288,6 +288,30 @@ var untarTests = []*untarTest{ }, }, }, + { + // Matches the behavior of GNU, BSD, and STAR tar utilities. + file: "testdata/gnu-multi-hdrs.tar", + headers: []*Header{ + { + Name: "GNU2/GNU2/long-path-name", + Linkname: "GNU4/GNU4/long-linkpath-name", + ModTime: time.Unix(0, 0), + Typeflag: '2', + }, + }, + }, + { + // Matches the behavior of GNU and BSD tar utilities. + file: "testdata/pax-multi-hdrs.tar", + headers: []*Header{ + { + Name: "bar", + Linkname: "PAX4/PAX4/long-linkpath-name", + ModTime: time.Unix(0, 0), + Typeflag: '2', + }, + }, + }, { file: "testdata/neg-size.tar", err: ErrHeader, diff --git a/archive/tar/testdata/gnu-multi-hdrs.tar b/archive/tar/testdata/gnu-multi-hdrs.tar new file mode 100644 index 0000000000000000000000000000000000000000..8bcad55d06e8f9fde3641d2a8df370503a582ce6 GIT binary patch literal 4608 zcmdPX*VA|K$%Afaj^QI J<`xZ33jpEZfW-g+ literal 0 HcmV?d00001 diff --git a/archive/tar/testdata/pax-multi-hdrs.tar b/archive/tar/testdata/pax-multi-hdrs.tar new file mode 100644 index 0000000000000000000000000000000000000000..14bc7597808020d7bc37e6610482fd9662814a24 GIT binary patch literal 4608 zcmeH~OAf*y5QbTM3NFy_tgc*m1D9?w)<^3;ld-=Ii;m6ILFVT3VXy>=Udb`;P z_AF}Ko(kwITGLdEM1G)5o<9H!jr=439(@V?ONRXCAu*5YPw-#9x&N1V|EJgyTG0B^ zUZ)s9!5N^&Ghph+I3UGBWYR$X|2zH<_}9R{M*cI=m|k}8li%1Drp7@Vny>jkUkM=z Sl}Br1`$oQ%|3`N;j=%%-SrC5! literal 0 HcmV?d00001 From ce5aac17f91d978a37dd742761cf57cd5bdb8ef2 Mon Sep 17 00:00:00 2001 From: Joe Tsai Date: Wed, 2 Dec 2015 15:48:06 -0800 Subject: [PATCH 76/95] archive/tar: properly format GNU base-256 encoding Motivation: * Previous implementation silently failed when an integer overflow occurred. Now, we report an ErrFieldTooLong. * Previous implementation did not encode in two's complement format and was unable to encode negative numbers. The relevant GNU specification says: <<< GNU format uses two's-complement base-256 notation to store values that do not fit into standard ustar range. >>> Fixes #12436 Change-Id: I09c20602eabf8ae3a7e0db35b79440a64bfaf807 Reviewed-on: https://go-review.googlesource.com/17425 Reviewed-by: Brad Fitzpatrick Run-TryBot: Brad Fitzpatrick TryBot-Result: Gobot Gobot --- archive/tar/writer.go | 26 ++++++++++-- archive/tar/writer_test.go | 83 ++++++++++++++++++++++++++++++++++++++ 2 files changed, 105 insertions(+), 4 deletions(-) diff --git a/archive/tar/writer.go b/archive/tar/writer.go index 688455d..0426381 100644 --- a/archive/tar/writer.go +++ b/archive/tar/writer.go @@ -94,13 +94,31 @@ func (f *formatter) formatOctal(b []byte, x int64) { f.formatString(b, s) } +// fitsInBase256 reports whether x can be encoded into n bytes using base-256 +// encoding. Unlike octal encoding, base-256 encoding does not require that the +// string ends with a NUL character. Thus, all n bytes are available for output. +// +// If operating in binary mode, this assumes strict GNU binary mode; which means +// that the first byte can only be either 0x80 or 0xff. Thus, the first byte is +// equivalent to the sign bit in two's complement form. +func fitsInBase256(n int, x int64) bool { + var binBits = uint(n-1) * 8 + return n >= 9 || (x >= -1< 0 && i >= 0; i-- { - b[i] = byte(x) - x >>= 8 + if fitsInBase256(len(b), x) { + for i := len(b) - 1; i >= 0; i-- { + b[i] = byte(x) + x >>= 8 + } + b[0] |= 0x80 // Highest bit indicates binary format + return } - b[0] |= 0x80 // highest bit indicates binary format + + f.formatOctal(b, 0) // Last resort, just write zero + f.err = ErrFieldTooLong } var ( diff --git a/archive/tar/writer_test.go b/archive/tar/writer_test.go index 69a44a6..6e91d90 100644 --- a/archive/tar/writer_test.go +++ b/archive/tar/writer_test.go @@ -9,6 +9,7 @@ import ( "fmt" "io" "io/ioutil" + "math" "os" "reflect" "sort" @@ -637,3 +638,85 @@ func TestFormatPAXRecord(t *testing.T) { } } } + +func TestFitsInBase256(t *testing.T) { + var vectors = []struct { + input int64 + width int + ok bool + }{ + {+1, 8, true}, + {0, 8, true}, + {-1, 8, true}, + {1 << 56, 8, false}, + {(1 << 56) - 1, 8, true}, + {-1 << 56, 8, true}, + {(-1 << 56) - 1, 8, false}, + {121654, 8, true}, + {-9849849, 8, true}, + {math.MaxInt64, 9, true}, + {0, 9, true}, + {math.MinInt64, 9, true}, + {math.MaxInt64, 12, true}, + {0, 12, true}, + {math.MinInt64, 12, true}, + } + + for _, v := range vectors { + ok := fitsInBase256(v.width, v.input) + if ok != v.ok { + t.Errorf("checkNumeric(%d, %d): got %v, want %v", v.input, v.width, ok, v.ok) + } + } +} + +func TestFormatNumeric(t *testing.T) { + var vectors = []struct { + input int64 + output string + ok bool + }{ + // Test base-256 (binary) encoded values. + {-1, "\xff", true}, + {-1, "\xff\xff", true}, + {-1, "\xff\xff\xff", true}, + {(1 << 0), "0", false}, + {(1 << 8) - 1, "\x80\xff", true}, + {(1 << 8), "0\x00", false}, + {(1 << 16) - 1, "\x80\xff\xff", true}, + {(1 << 16), "00\x00", false}, + {-1 * (1 << 0), "\xff", true}, + {-1*(1<<0) - 1, "0", false}, + {-1 * (1 << 8), "\xff\x00", true}, + {-1*(1<<8) - 1, "0\x00", false}, + {-1 * (1 << 16), "\xff\x00\x00", true}, + {-1*(1<<16) - 1, "00\x00", false}, + {537795476381659745, "0000000\x00", false}, + {537795476381659745, "\x80\x00\x00\x00\x07\x76\xa2\x22\xeb\x8a\x72\x61", true}, + {-615126028225187231, "0000000\x00", false}, + {-615126028225187231, "\xff\xff\xff\xff\xf7\x76\xa2\x22\xeb\x8a\x72\x61", true}, + {math.MaxInt64, "0000000\x00", false}, + {math.MaxInt64, "\x80\x00\x00\x00\x7f\xff\xff\xff\xff\xff\xff\xff", true}, + {math.MinInt64, "0000000\x00", false}, + {math.MinInt64, "\xff\xff\xff\xff\x80\x00\x00\x00\x00\x00\x00\x00", true}, + {math.MaxInt64, "\x80\x7f\xff\xff\xff\xff\xff\xff\xff", true}, + {math.MinInt64, "\xff\x80\x00\x00\x00\x00\x00\x00\x00", true}, + } + + for _, v := range vectors { + var f formatter + output := make([]byte, len(v.output)) + f.formatNumeric(output, v.input) + ok := (f.err == nil) + if ok != v.ok { + if v.ok { + t.Errorf("formatNumeric(%d): got formatting failure, want success", v.input) + } else { + t.Errorf("formatNumeric(%d): got formatting success, want failure", v.input) + } + } + if string(output) != v.output { + t.Errorf("formatNumeric(%d): got %q, want %q", v.input, output, v.output) + } + } +} From a04b4ddba428a52a96f3c046d284916313dc6d2e Mon Sep 17 00:00:00 2001 From: Joe Tsai Date: Wed, 2 Dec 2015 15:41:44 -0800 Subject: [PATCH 77/95] archive/tar: properly parse GNU base-256 encoding Motivation: * Previous implementation did not detect integer overflow when parsing a base-256 encoded field. * Previous implementation did not treat the integer as a two's complement value as specified by GNU. The relevant GNU specification says: <<< GNU format uses two's-complement base-256 notation to store values that do not fit into standard ustar range. >>> Fixes #12435 Change-Id: I4639bcffac8d12e1cb040b76bd05c9d7bc6c23a8 Reviewed-on: https://go-review.googlesource.com/17424 Reviewed-by: Brad Fitzpatrick Run-TryBot: Brad Fitzpatrick TryBot-Result: Gobot Gobot --- archive/tar/reader.go | 44 +++++++++++++++---- archive/tar/reader_test.go | 66 ++++++++++++++++++++++++++++ archive/tar/testdata/issue12435.tar | Bin 0 -> 512 bytes 3 files changed, 102 insertions(+), 8 deletions(-) create mode 100644 archive/tar/testdata/issue12435.tar diff --git a/archive/tar/reader.go b/archive/tar/reader.go index ba34ed7..6e77cbe 100644 --- a/archive/tar/reader.go +++ b/archive/tar/reader.go @@ -480,19 +480,47 @@ func (*parser) parseString(b []byte) string { return string(b[0:n]) } +// parseNumeric parses the input as being encoded in either base-256 or octal. +// This function may return negative numbers. +// If parsing fails or an integer overflow occurs, err will be set. func (p *parser) parseNumeric(b []byte) int64 { - // Check for binary format first. + // Check for base-256 (binary) format first. + // If the first bit is set, then all following bits constitute a two's + // complement encoded number in big-endian byte order. if len(b) > 0 && b[0]&0x80 != 0 { - var x int64 - for i, c := range b { - if i == 0 { - c &= 0x7f // ignore signal bit in first byte - } - x = x<<8 | int64(c) + // Handling negative numbers relies on the following identity: + // -a-1 == ^a + // + // If the number is negative, we use an inversion mask to invert the + // data bytes and treat the value as an unsigned number. + var inv byte // 0x00 if positive or zero, 0xff if negative + if b[0]&0x40 != 0 { + inv = 0xff } - return x + + var x uint64 + for i, c := range b { + c ^= inv // Inverts c only if inv is 0xff, otherwise does nothing + if i == 0 { + c &= 0x7f // Ignore signal bit in first byte + } + if (x >> 56) > 0 { + p.err = ErrHeader // Integer overflow + return 0 + } + x = x<<8 | uint64(c) + } + if (x >> 63) > 0 { + p.err = ErrHeader // Integer overflow + return 0 + } + if inv == 0xff { + return ^int64(x) + } + return int64(x) } + // Normal case is base-8 (octal) format. return p.parseOctal(b) } diff --git a/archive/tar/reader_test.go b/archive/tar/reader_test.go index 861d1a5..7b148b5 100644 --- a/archive/tar/reader_test.go +++ b/archive/tar/reader_test.go @@ -324,6 +324,10 @@ var untarTests = []*untarTest{ file: "testdata/issue11169.tar", err: ErrHeader, }, + { + file: "testdata/issue12435.tar", + err: ErrHeader, + }, } func TestReader(t *testing.T) { @@ -1057,3 +1061,65 @@ func TestParsePAXRecord(t *testing.T) { } } } + +func TestParseNumeric(t *testing.T) { + var vectors = []struct { + input string + output int64 + ok bool + }{ + // Test base-256 (binary) encoded values. + {"", 0, true}, + {"\x80", 0, true}, + {"\x80\x00", 0, true}, + {"\x80\x00\x00", 0, true}, + {"\xbf", (1 << 6) - 1, true}, + {"\xbf\xff", (1 << 14) - 1, true}, + {"\xbf\xff\xff", (1 << 22) - 1, true}, + {"\xff", -1, true}, + {"\xff\xff", -1, true}, + {"\xff\xff\xff", -1, true}, + {"\xc0", -1 * (1 << 6), true}, + {"\xc0\x00", -1 * (1 << 14), true}, + {"\xc0\x00\x00", -1 * (1 << 22), true}, + {"\x87\x76\xa2\x22\xeb\x8a\x72\x61", 537795476381659745, true}, + {"\x80\x00\x00\x00\x07\x76\xa2\x22\xeb\x8a\x72\x61", 537795476381659745, true}, + {"\xf7\x76\xa2\x22\xeb\x8a\x72\x61", -615126028225187231, true}, + {"\xff\xff\xff\xff\xf7\x76\xa2\x22\xeb\x8a\x72\x61", -615126028225187231, true}, + {"\x80\x7f\xff\xff\xff\xff\xff\xff\xff", math.MaxInt64, true}, + {"\x80\x80\x00\x00\x00\x00\x00\x00\x00", 0, false}, + {"\xff\x80\x00\x00\x00\x00\x00\x00\x00", math.MinInt64, true}, + {"\xff\x7f\xff\xff\xff\xff\xff\xff\xff", 0, false}, + {"\xf5\xec\xd1\xc7\x7e\x5f\x26\x48\x81\x9f\x8f\x9b", 0, false}, + + // Test base-8 (octal) encoded values. + {"0000000\x00", 0, true}, + {" \x0000000\x00", 0, true}, + {" \x0000003\x00", 3, true}, + {"00000000227\x00", 0227, true}, + {"032033\x00 ", 032033, true}, + {"320330\x00 ", 0320330, true}, + {"0000660\x00 ", 0660, true}, + {"\x00 0000660\x00 ", 0660, true}, + {"0123456789abcdef", 0, false}, + {"0123456789\x00abcdef", 0, false}, + {"01234567\x0089abcdef", 342391, true}, + {"0123\x7e\x5f\x264123", 0, false}, + } + + for _, v := range vectors { + var p parser + num := p.parseNumeric([]byte(v.input)) + ok := (p.err == nil) + if v.ok != ok { + if v.ok { + t.Errorf("parseNumeric(%q): got parsing failure, want success", v.input) + } else { + t.Errorf("parseNumeric(%q): got parsing success, want failure", v.input) + } + } + if ok && num != v.output { + t.Errorf("parseNumeric(%q): got %d, want %d", v.input, num, v.output) + } + } +} diff --git a/archive/tar/testdata/issue12435.tar b/archive/tar/testdata/issue12435.tar new file mode 100644 index 0000000000000000000000000000000000000000..3542dd8efd5d486b99ae03f39a56860af1c09af0 GIT binary patch literal 512 lcmZQzpgs7{2(bf|=G;FWht&p9;C+0@6=oc2Mun*p0sxa^2!Q|q literal 0 HcmV?d00001 From 962540fec3dc41e7256a85182b22926921231518 Mon Sep 17 00:00:00 2001 From: Joe Tsai Date: Wed, 16 Dec 2015 11:26:26 -0800 Subject: [PATCH 78/95] archive/tar: spell license correctly in example Change-Id: Ice85d161f026a991953bd63ecc6ec80f8d06dfbd Reviewed-on: https://go-review.googlesource.com/17901 Run-TryBot: Joe Tsai Reviewed-by: Brad Fitzpatrick --- archive/tar/example_test.go | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/archive/tar/example_test.go b/archive/tar/example_test.go index 2317f44..5f0ce2f 100644 --- a/archive/tar/example_test.go +++ b/archive/tar/example_test.go @@ -26,7 +26,7 @@ func Example() { }{ {"readme.txt", "This archive contains some text files."}, {"gopher.txt", "Gopher names:\nGeorge\nGeoffrey\nGonzo"}, - {"todo.txt", "Get animal handling licence."}, + {"todo.txt", "Get animal handling license."}, } for _, file := range files { hdr := &tar.Header{ @@ -76,5 +76,5 @@ func Example() { // Geoffrey // Gonzo // Contents of todo.txt: - // Get animal handling licence. + // Get animal handling license. } From 10db8408f660956a312a7fd9a5b8d0f74175e8ab Mon Sep 17 00:00:00 2001 From: Joe Tsai Date: Wed, 16 Dec 2015 23:10:14 -0800 Subject: [PATCH 79/95] archive/tar: document how Reader.Read handles header-only files Commit dd5e14a7511465d20c6e95bf54c9b8f999abbbf6 ensured that no data could be read for header-only files regardless of what the Header.Size said. We should document this fact in Reader.Read. Updates #13647 Change-Id: I4df9a2892bc66b49e0279693d08454bf696cfa31 Reviewed-on: https://go-review.googlesource.com/17913 Reviewed-by: Russ Cox --- archive/tar/reader.go | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/archive/tar/reader.go b/archive/tar/reader.go index 6e77cbe..a8b63a2 100644 --- a/archive/tar/reader.go +++ b/archive/tar/reader.go @@ -928,6 +928,10 @@ func (tr *Reader) numBytes() int64 { // Read reads from the current entry in the tar archive. // It returns 0, io.EOF when it reaches the end of that entry, // until Next is called to advance to the next entry. +// +// Calling Read on special types like TypeLink, TypeSymLink, TypeChar, +// TypeBlock, TypeDir, and TypeFifo returns 0, io.EOF regardless of what +// the Header.Size claims. func (tr *Reader) Read(b []byte) (n int, err error) { if tr.err != nil { return 0, tr.err From c32966b9e8c3b429d6c7999ab2037bd537d60420 Mon Sep 17 00:00:00 2001 From: Vincent Batts Date: Mon, 15 Feb 2016 09:38:46 -0500 Subject: [PATCH 80/95] archive/tar: go1.3 and go1.4 compatibility Signed-off-by: Vincent Batts --- archive/tar/reader_test.go | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/archive/tar/reader_test.go b/archive/tar/reader_test.go index 7b148b5..821b4f0 100644 --- a/archive/tar/reader_test.go +++ b/archive/tar/reader_test.go @@ -344,7 +344,6 @@ func TestReader(t *testing.T) { tr = NewReader(f) hdrs []*Header chksums []string - rdbuf = make([]byte, 8) ) for { var hdr *Header @@ -361,7 +360,7 @@ func TestReader(t *testing.T) { continue } h := md5.New() - _, err = io.CopyBuffer(h, tr, rdbuf) // Effectively an incremental read + _, err = io.Copy(h, tr) // Effectively an incremental read if err != nil { break } From 24fe0a94fe8e2f2c42cb1d49277379e8f2dbb06e Mon Sep 17 00:00:00 2001 From: Vincent Batts Date: Mon, 15 Feb 2016 09:44:28 -0500 Subject: [PATCH 81/95] version: bump to v0.9.13 Signed-off-by: Vincent Batts --- version/version.go | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/version/version.go b/version/version.go index c41a8f2..345274d 100644 --- a/version/version.go +++ b/version/version.go @@ -1,7 +1,7 @@ package version + // AUTO-GENEREATED. DO NOT EDIT -// 2016-01-31 01:39:06.012784413 -0500 EST +// 2016-02-15 09:43:15.919197079 -0500 EST // VERSION is the generated version from /home/vbatts/src/vb/tar-split/version -var VERSION = "v0.9.12" - \ No newline at end of file +var VERSION = "v0.9.13-19-862ccd05b" From e2a62d6b0d98fd7f1a57646812c74564fda999b4 Mon Sep 17 00:00:00 2001 From: Vincent Batts Date: Mon, 29 Feb 2016 11:40:38 -0500 Subject: [PATCH 82/95] README.md: fix thumbnail Signed-off-by: Vincent Batts --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index 90a8edf..02bbcd0 100644 --- a/README.md +++ b/README.md @@ -39,7 +39,7 @@ This demonstrates the `tar-split` command and how to assemble a tar archive from This demonstrates the tar-split integration for docker-1.8. Providing consistent tar archives for the image layer content. -![docker tar-split demo](https://www.youtube.com/upload_thumbnail?v=tV_Dia8E8xw&t=2&ts=1445028436275) +![docker tar-split demo](https://i.ytimg.com/vi_webp/vh5wyjIOBtc/default.webp) [youtube vide of docker layer checksums](https://youtu.be/tV_Dia8E8xw) ## Caveat From 226f7c74905f1fcc08ac128b517a1d65a1948eb9 Mon Sep 17 00:00:00 2001 From: Vincent Batts Date: Wed, 30 Mar 2016 16:38:51 -0400 Subject: [PATCH 83/95] README: update `archive/tar` version reference Signed-off-by: Vincent Batts --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index 02bbcd0..4c544d8 100644 --- a/README.md +++ b/README.md @@ -66,7 +66,7 @@ Do not break the API of stdlib `archive/tar` in our fork (ideally find an upstre ## Std Version -The version of golang stdlib `archive/tar` is from go1.4.1, and their master branch around [a9dddb53f](https://github.com/golang/go/tree/a9dddb53f). +The version of golang stdlib `archive/tar` is from go1.6 It is minimally extended to expose the raw bytes of the TAR, rather than just the marshalled headers and file stream. From 354fd6cf34bfa908f2d9796de5b1ff955b060ca6 Mon Sep 17 00:00:00 2001 From: Vincent Batts Date: Sun, 26 Jun 2016 10:15:12 -0400 Subject: [PATCH 84/95] cmd: add a `disasm --no-stdout` flag Since sometimes you just need to > /dev/null Signed-off-by: Vincent Batts --- cmd/tar-split/disasm.go | 9 ++++++++- cmd/tar-split/main.go | 4 ++++ 2 files changed, 12 insertions(+), 1 deletion(-) diff --git a/cmd/tar-split/disasm.go b/cmd/tar-split/disasm.go index b7b0dfe..7f8a056 100644 --- a/cmd/tar-split/disasm.go +++ b/cmd/tar-split/disasm.go @@ -3,6 +3,7 @@ package main import ( "compress/gzip" "io" + "io/ioutil" "os" "github.com/Sirupsen/logrus" @@ -48,7 +49,13 @@ func CommandDisasm(c *cli.Context) { if err != nil { logrus.Fatal(err) } - i, err := io.Copy(os.Stdout, its) + var out io.Writer + if c.Bool("no-stdout") { + out = ioutil.Discard + } else { + out = os.Stdout + } + i, err := io.Copy(out, its) if err != nil { logrus.Fatal(err) } diff --git a/cmd/tar-split/main.go b/cmd/tar-split/main.go index b417120..c584352 100644 --- a/cmd/tar-split/main.go +++ b/cmd/tar-split/main.go @@ -42,6 +42,10 @@ func main() { Value: "tar-data.json.gz", Usage: "output of disassembled tar stream", }, + cli.BoolFlag{ + Name: "no-stdout", + Usage: "do not throughput the stream to STDOUT", + }, }, }, { From beaeceb06f0a81f66e6687a2d830ed3a1e6d0875 Mon Sep 17 00:00:00 2001 From: Vincent Batts Date: Sun, 26 Jun 2016 14:56:04 -0400 Subject: [PATCH 85/95] travis: update golang versions This is not saying that tar-split no longer works on go1.3 or go1.4, but rather that the headache of `go vet` having a version dependent ability to install it, makes it a headache in travis. Signed-off-by: Vincent Batts --- .travis.yml | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/.travis.yml b/.travis.yml index c0a17c7..2d9a842 100644 --- a/.travis.yml +++ b/.travis.yml @@ -1,16 +1,14 @@ language: go go: - tip - - 1.5.1 - - 1.4.3 - - 1.3.3 + - 1.6.2 + - 1.5.4 # let us have pretty, fast Docker-based Travis workers! sudo: false install: - go get -d ./... - - go get golang.org/x/tools/cmd/vet script: - go test -v ./... From 6810cedb21b2c3d0b9bb8f9af12ff2dc7a2f14df Mon Sep 17 00:00:00 2001 From: Vincent Batts Date: Tue, 26 Jul 2016 09:50:08 -0400 Subject: [PATCH 86/95] benchmark: add a comparison of 'archive/tar' Since this project has forked logic of upstream 'archive/tar', this does a brief comparison including the RawBytes usage. ```bash $ go test -run="XXX" -bench=. testing: warning: no tests to run BenchmarkUpstreamTar-4 2000 700809 ns/op BenchmarkOurTarNoAccounting-4 2000 692055 ns/op BenchmarkOurTarYesAccounting-4 2000 723184 ns/op PASS ok vb/tar-split 4.461s ``` From this, the difference is negligible. Signed-off-by: Vincent Batts --- tar_benchmark_test.go | 84 +++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 84 insertions(+) create mode 100644 tar_benchmark_test.go diff --git a/tar_benchmark_test.go b/tar_benchmark_test.go new file mode 100644 index 0000000..d946f2a --- /dev/null +++ b/tar_benchmark_test.go @@ -0,0 +1,84 @@ +package tartest + +import ( + "io" + "io/ioutil" + "os" + "testing" + + upTar "archive/tar" + + ourTar "github.com/vbatts/tar-split/archive/tar" +) + +var testfile = "./archive/tar/testdata/sparse-formats.tar" + +func BenchmarkUpstreamTar(b *testing.B) { + for n := 0; n < b.N; n++ { + fh, err := os.Open(testfile) + if err != nil { + b.Fatal(err) + } + tr := upTar.NewReader(fh) + for { + _, err := tr.Next() + if err != nil { + if err == io.EOF { + break + } + fh.Close() + b.Fatal(err) + } + io.Copy(ioutil.Discard, tr) + } + fh.Close() + } +} + +func BenchmarkOurTarNoAccounting(b *testing.B) { + for n := 0; n < b.N; n++ { + fh, err := os.Open(testfile) + if err != nil { + b.Fatal(err) + } + tr := ourTar.NewReader(fh) + tr.RawAccounting = false // this is default, but explicit here + for { + _, err := tr.Next() + if err != nil { + if err == io.EOF { + break + } + fh.Close() + b.Fatal(err) + } + io.Copy(ioutil.Discard, tr) + } + fh.Close() + } +} +func BenchmarkOurTarYesAccounting(b *testing.B) { + for n := 0; n < b.N; n++ { + fh, err := os.Open(testfile) + if err != nil { + b.Fatal(err) + } + tr := ourTar.NewReader(fh) + tr.RawAccounting = true // This enables mechanics for collecting raw bytes + for { + _ = tr.RawBytes() + _, err := tr.Next() + _ = tr.RawBytes() + if err != nil { + if err == io.EOF { + break + } + fh.Close() + b.Fatal(err) + } + io.Copy(ioutil.Discard, tr) + _ = tr.RawBytes() + } + fh.Close() + } +} From e527e70d2599f4294397c46bffe9806f404ce1eb Mon Sep 17 00:00:00 2001 From: Derek McGowan Date: Thu, 22 Sep 2016 17:38:18 -0700 Subject: [PATCH 87/95] Fix panic in Next readHeader should never return nil with a tr.err also nil. To correct this, ensure tr.err never gets reset to nil followed by a nil return. --- archive/tar/reader.go | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/archive/tar/reader.go b/archive/tar/reader.go index a8b63a2..fdc8ae3 100644 --- a/archive/tar/reader.go +++ b/archive/tar/reader.go @@ -611,7 +611,8 @@ func (tr *Reader) readHeader() *Header { if _, tr.err = io.ReadFull(tr.r, header); tr.err != nil { // because it could read some of the block, but reach EOF first if tr.err == io.EOF && tr.RawAccounting { - if _, tr.err = tr.rawBytes.Write(header); tr.err != nil { + if _, err := tr.rawBytes.Write(header); err != nil { + tr.err = err return nil } } From eb3808673d60f519722fc86f095f90e4edfbb32a Mon Sep 17 00:00:00 2001 From: Vincent Batts Date: Fri, 23 Sep 2016 11:01:58 -0400 Subject: [PATCH 88/95] version: bump to v0.10.0 Signed-off-by: Vincent Batts --- version/version.go | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/version/version.go b/version/version.go index 345274d..b39b671 100644 --- a/version/version.go +++ b/version/version.go @@ -1,7 +1,7 @@ package version // AUTO-GENEREATED. DO NOT EDIT -// 2016-02-15 09:43:15.919197079 -0500 EST +// 2016-09-23 11:00:18.92191222 -0400 EDT // VERSION is the generated version from /home/vbatts/src/vb/tar-split/version -var VERSION = "v0.9.13-19-862ccd05b" +var VERSION = "v0.10.0-9-gae8540d" From 7410961e758c155389fab3f0391e65320854acbc Mon Sep 17 00:00:00 2001 From: Vincent Batts Date: Mon, 26 Sep 2016 14:51:07 -0400 Subject: [PATCH 89/95] tar/asm: failing test for lack of EOF nils Reported-by: Derek McGowan Signed-off-by: Vincent Batts --- tar/asm/assemble_test.go | 2 ++ tar/asm/testdata/extranils.tar.gz | Bin 0 -> 127 bytes tar/asm/testdata/notenoughnils.tar.gz | Bin 0 -> 91 bytes 3 files changed, 2 insertions(+) create mode 100644 tar/asm/testdata/extranils.tar.gz create mode 100644 tar/asm/testdata/notenoughnils.tar.gz diff --git a/tar/asm/assemble_test.go b/tar/asm/assemble_test.go index c0c7f17..afdce9d 100644 --- a/tar/asm/assemble_test.go +++ b/tar/asm/assemble_test.go @@ -139,6 +139,8 @@ var testCases = []struct { {"./testdata/longlink.tar.gz", "d9f6babe107b7247953dff6b5b5ae31a3a880add", 20480}, {"./testdata/fatlonglink.tar.gz", "8537f03f89aeef537382f8b0bb065d93e03b0be8", 26234880}, {"./testdata/iso-8859.tar.gz", "ddafa51cb03c74ec117ab366ee2240d13bba1ec3", 10240}, + {"./testdata/extranils.tar.gz", "e187b4b3e739deaccc257342f4940f34403dc588", 10648}, + {"./testdata/notenoughnils.tar.gz", "72f93f41efd95290baa5c174c234f5d4c22ce601", 512}, } func TestTarStream(t *testing.T) { diff --git a/tar/asm/testdata/extranils.tar.gz b/tar/asm/testdata/extranils.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..70caf6e6e7641340e6bbe7fa1c36bcf7b19b9533 GIT binary patch literal 127 zcmb2|=HTE?e;LlioLW&*l$e*9Q><5#Sj6!5lr7gG2a(o`+owbx+GCh@O<1uhVXDjZ z_wv(jWtRFC|9l|l|J<@}&Ar~kA8x$){p(H5*XWejd$$%|FY1{!)44b3F~8nxGu@>} cz8d7-{`J;eKbrv#YD$=2`88gep~=7i0Ka=WQvd(} literal 0 HcmV?d00001 diff --git a/tar/asm/testdata/notenoughnils.tar.gz b/tar/asm/testdata/notenoughnils.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..146bb008bbeb2ff6e5fe1232dfc29894f85ed57b GIT binary patch literal 91 zcmV-h0HpsPiwFShZ0T15153-yNoAlhFaQEG6B7_^Xl`l%<{LuzFu-7FWNvO`WM*t+ xXvSb*XlQ6+z@T8jKnGA-T#{I%pukX;lvq+yj89!^Q8B}?0{|>t!}0(E001l6BE0|r literal 0 HcmV?d00001 From 6b59e6942e2c9a03fd4506102adfa082a96b5ace Mon Sep 17 00:00:00 2001 From: Derek McGowan Date: Mon, 26 Sep 2016 14:01:48 -0700 Subject: [PATCH 90/95] archive/tar: fix writing too many raw bytes When an EOF is read, only the part of the header buffer which was read should be accounted for. Signed-off-by: Derek McGowan --- archive/tar/reader.go | 13 +++++++------ 1 file changed, 7 insertions(+), 6 deletions(-) diff --git a/archive/tar/reader.go b/archive/tar/reader.go index fdc8ae3..adf3212 100644 --- a/archive/tar/reader.go +++ b/archive/tar/reader.go @@ -608,12 +608,12 @@ func (tr *Reader) readHeader() *Header { header := tr.hdrBuff[:] copy(header, zeroBlock) - if _, tr.err = io.ReadFull(tr.r, header); tr.err != nil { + if n, err := io.ReadFull(tr.r, header); err != nil { + tr.err = err // because it could read some of the block, but reach EOF first if tr.err == io.EOF && tr.RawAccounting { - if _, err := tr.rawBytes.Write(header); err != nil { + if _, err := tr.rawBytes.Write(header[:n]); err != nil { tr.err = err - return nil } } return nil // io.EOF is okay here @@ -626,11 +626,12 @@ func (tr *Reader) readHeader() *Header { // Two blocks of zero bytes marks the end of the archive. if bytes.Equal(header, zeroBlock[0:blockSize]) { - if _, tr.err = io.ReadFull(tr.r, header); tr.err != nil { + if n, err := io.ReadFull(tr.r, header); err != nil { + tr.err = err // because it could read some of the block, but reach EOF first if tr.err == io.EOF && tr.RawAccounting { - if _, tr.err = tr.rawBytes.Write(header); tr.err != nil { - return nil + if _, err := tr.rawBytes.Write(header[:n]); err != nil { + tr.err = err } } return nil // io.EOF is okay here From d3f1b54304d656376e58f9406a9cb4775799a357 Mon Sep 17 00:00:00 2001 From: Vincent Batts Date: Mon, 26 Sep 2016 19:53:52 -0400 Subject: [PATCH 91/95] version: bump to v0.10.1 Signed-off-by: Vincent Batts --- version/version.go | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/version/version.go b/version/version.go index b39b671..f317010 100644 --- a/version/version.go +++ b/version/version.go @@ -1,7 +1,7 @@ package version // AUTO-GENEREATED. DO NOT EDIT -// 2016-09-23 11:00:18.92191222 -0400 EDT +// 2016-09-26 19:53:30.825879 -0400 EDT // VERSION is the generated version from /home/vbatts/src/vb/tar-split/version -var VERSION = "v0.10.0-9-gae8540d" +var VERSION = "v0.10.1-4-gf280282" From bd4c5d64c3e9297f410025a3b1bd0c58f659e721 Mon Sep 17 00:00:00 2001 From: Vincent Batts Date: Tue, 27 Sep 2016 02:54:18 +0000 Subject: [PATCH 92/95] main: switch import paths to urfave Signed-off-by: Vincent Batts --- cmd/tar-split/asm.go | 2 +- cmd/tar-split/checksize.go | 2 +- cmd/tar-split/disasm.go | 2 +- cmd/tar-split/main.go | 2 +- 4 files changed, 4 insertions(+), 4 deletions(-) diff --git a/cmd/tar-split/asm.go b/cmd/tar-split/asm.go index 312e54b..e188ce1 100644 --- a/cmd/tar-split/asm.go +++ b/cmd/tar-split/asm.go @@ -6,7 +6,7 @@ import ( "os" "github.com/Sirupsen/logrus" - "github.com/codegangsta/cli" + "github.com/urfave/cli" "github.com/vbatts/tar-split/tar/asm" "github.com/vbatts/tar-split/tar/storage" ) diff --git a/cmd/tar-split/checksize.go b/cmd/tar-split/checksize.go index 38f830e..1e5eed7 100644 --- a/cmd/tar-split/checksize.go +++ b/cmd/tar-split/checksize.go @@ -10,7 +10,7 @@ import ( "os" "github.com/Sirupsen/logrus" - "github.com/codegangsta/cli" + "github.com/urfave/cli" "github.com/vbatts/tar-split/tar/asm" "github.com/vbatts/tar-split/tar/storage" ) diff --git a/cmd/tar-split/disasm.go b/cmd/tar-split/disasm.go index 7f8a056..5472894 100644 --- a/cmd/tar-split/disasm.go +++ b/cmd/tar-split/disasm.go @@ -7,7 +7,7 @@ import ( "os" "github.com/Sirupsen/logrus" - "github.com/codegangsta/cli" + "github.com/urfave/cli" "github.com/vbatts/tar-split/tar/asm" "github.com/vbatts/tar-split/tar/storage" ) diff --git a/cmd/tar-split/main.go b/cmd/tar-split/main.go index c584352..8b4035f 100644 --- a/cmd/tar-split/main.go +++ b/cmd/tar-split/main.go @@ -4,7 +4,7 @@ import ( "os" "github.com/Sirupsen/logrus" - "github.com/codegangsta/cli" + "github.com/urfave/cli" "github.com/vbatts/tar-split/version" ) From 7560005f21d918cc59698527424fc6759877152f Mon Sep 17 00:00:00 2001 From: Vincent Batts Date: Mon, 13 Mar 2017 18:28:54 -0400 Subject: [PATCH 93/95] README: adding a golang report card Signed-off-by: Vincent Batts --- README.md | 1 + 1 file changed, 1 insertion(+) diff --git a/README.md b/README.md index 4c544d8..c2e7f48 100644 --- a/README.md +++ b/README.md @@ -1,6 +1,7 @@ # tar-split [![Build Status](https://travis-ci.org/vbatts/tar-split.svg?branch=master)](https://travis-ci.org/vbatts/tar-split) +[![Go Report Card](https://goreportcard.com/badge/github.com/vbatts/tar-split)](https://goreportcard.com/report/github.com/vbatts/tar-split) Pristinely disassembling a tar archive, and stashing needed raw bytes and offsets to reassemble a validating original archive. From 245403c324d6ea47167227ee70aaef1c4c87ff43 Mon Sep 17 00:00:00 2001 From: Vincent Batts Date: Mon, 13 Mar 2017 18:33:27 -0400 Subject: [PATCH 94/95] travis: test more go versions Thanks to @tianon, for pointing to https://github.com/travis-ci/travis-build/blob/5e3ef60b0d43b8ef56fb6fa77ba3269fbf945fa6/lib/travis/build/config.rb#L54-L70 Signed-off-by: Vincent Batts --- .travis.yml | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/.travis.yml b/.travis.yml index 2d9a842..dcce57a 100644 --- a/.travis.yml +++ b/.travis.yml @@ -1,8 +1,11 @@ language: go go: - tip - - 1.6.2 - - 1.5.4 + - 1.x + - 1.8.x + - 1.7.x + - 1.6.x + - 1.5.x # let us have pretty, fast Docker-based Travis workers! sudo: false From c6dd42815acecceec10987978f48488c37ead18f Mon Sep 17 00:00:00 2001 From: Vincent Batts Date: Tue, 14 Mar 2017 11:04:10 -0400 Subject: [PATCH 95/95] archive/tar: monotonic clock adjustment commit 0e3355903d2ebcf5ee9e76096f51ac9a116a9dbb upstream Signed-off-by: Vincent Batts --- archive/tar/tar_test.go | 11 +++++------ 1 file changed, 5 insertions(+), 6 deletions(-) diff --git a/archive/tar/tar_test.go b/archive/tar/tar_test.go index d63c072..9ef319a 100644 --- a/archive/tar/tar_test.go +++ b/archive/tar/tar_test.go @@ -94,13 +94,12 @@ func TestRoundTrip(t *testing.T) { var b bytes.Buffer tw := NewWriter(&b) hdr := &Header{ - Name: "file.txt", - Uid: 1 << 21, // too big for 8 octal digits - Size: int64(len(data)), - ModTime: time.Now(), + Name: "file.txt", + Uid: 1 << 21, // too big for 8 octal digits + Size: int64(len(data)), + // https://github.com/golang/go/commit/0e3355903d2ebcf5ee9e76096f51ac9a116a9dbb#diff-d7bf2a98d7b57b6ff754ca406f1b7581R105 + ModTime: time.Now().AddDate(0, 0, 0).Round(1 * time.Second), } - // tar only supports second precision. - hdr.ModTime = hdr.ModTime.Add(-time.Duration(hdr.ModTime.Nanosecond()) * time.Nanosecond) if err := tw.WriteHeader(hdr); err != nil { t.Fatalf("tw.WriteHeader: %v", err) }