diff --git a/tar/asm/assemble_test.go b/tar/asm/assemble_test.go index 04afcfe..dc5c1db 100644 --- a/tar/asm/assemble_test.go +++ b/tar/asm/assemble_test.go @@ -1,48 +1,197 @@ package asm import ( + "bytes" + "compress/gzip" + "crypto/sha1" + "fmt" + "io" + "io/ioutil" + "os" "testing" "github.com/vbatts/tar-split/tar/storage" ) -var entries = storage.Entries{ - storage.Entry{ - Type: storage.SegmentType, - Payload: []byte("how"), - Position: 0, +var entries = []struct { + Entry storage.Entry + Body []byte +}{ + { + Entry: storage.Entry{ + Type: storage.SegmentType, + Payload: []byte("y'all"), // FIXME need real header here + }, }, - storage.Entry{ - Type: storage.SegmentType, - Payload: []byte("y'all"), - Position: 1, + { + Entry: storage.Entry{ + Type: storage.FileType, + Name: "./hurr.txt", + Payload: []byte{2, 116, 164, 177, 171, 236, 107, 78}, + Size: 20, + }, + Body: []byte("imma hurr til I derp"), }, - storage.Entry{ - Type: storage.FileType, - Name: "./hurr.txt", - Payload: []byte("deadbeef"), - Size: 8, - Position: 2, + { + Entry: storage.Entry{ + Type: storage.SegmentType, + Payload: []byte("doin"), // FIXME need real header here + }, }, - storage.Entry{ - Type: storage.SegmentType, - Payload: []byte("doin"), - Position: 3, + { + Entry: storage.Entry{ + Type: storage.FileType, + Name: "./ermahgerd.txt", + Payload: []byte{126, 72, 89, 239, 230, 252, 160, 187}, + Size: 26, + }, + + Body: []byte("café con leche, por favor"), }, - storage.Entry{ - Type: storage.FileType, - Name: "./ermahgerd.txt", - Payload: []byte("cafebabe"), - Size: 8, - Position: 4, + { + Entry: storage.Entry{ + Type: storage.SegmentType, + Payload: []byte{00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, + 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, + 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, + 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, + 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, + 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, + 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, + 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, + 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, + 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, + 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, + 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, + 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, + 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, + 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, + 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, + 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, + 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, + 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, + 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, + 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, + 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, + 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, + 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, + 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, + 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, + 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, + 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, + 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, + 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, + 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, + 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, + 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, + 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, + 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, + 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, + 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, + 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, + 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, + 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, + 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, + 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, + 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, + 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, + 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, + 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, + 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, + 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, + 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, + 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, + 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, + 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, + 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, + 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, + 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, + 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, + 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, + 00, 00}, + }, }, } -func TestNewOutputTarStream(t *testing.T) { - // TODO disassembly +func TestTarStream(t *testing.T) { fgp := NewBufferFileGetPutter() + + // first lets prep a GetPutter and Packer + for i := range entries { + if entries[i].Entry.Type == storage.FileType { + j, csum, err := fgp.Put(entries[i].Entry.Name, bytes.NewBuffer(entries[i].Body)) + if err != nil { + t.Error(err) + } + if j != entries[i].Entry.Size { + t.Errorf("size %q: expected %d; got %d", + entries[i].Entry.Name, + entries[i].Entry.Size, + j) + } + if !bytes.Equal(csum, entries[i].Entry.Payload) { + t.Errorf("checksum %q: expected %v; got %v", + entries[i].Entry.Name, + entries[i].Entry.Payload, + csum) + } + } + } + + // next we'll use these to produce a tar stream. _ = NewOutputTarStream(fgp, nil) + // TODO finish this } -func TestNewInputTarStream(t *testing.T) { +func TestInputTarStream(t *testing.T) { + var ( + expectedSum = "1eb237ff69bca6e22789ecb05b45d35ca307adbd" + expectedSize int64 = 10240 + ) + + fh, err := os.Open("./testdata/t.tar.gz") + if err != nil { + t.Fatal(err) + } + defer fh.Close() + gzRdr, err := gzip.NewReader(fh) + if err != nil { + t.Fatal(err) + } + defer gzRdr.Close() + + // Setup where we'll store the metadata + w := bytes.NewBuffer([]byte{}) + sp := storage.NewJsonPacker(w) + fgp := NewBufferFileGetPutter() + + // check the tar on the front end too + h0 := sha1.New() + tRdr0 := io.TeeReader(gzRdr, h0) + + // wrap the disassembly stream + tarStream, err := NewInputTarStream(tRdr0, sp, fgp) + if err != nil { + t.Fatal(err) + } + + // get a sum of the stream after it has passed through to ensure it's the same. + h1 := sha1.New() + tRdr1 := io.TeeReader(tarStream, h1) + + // read it all to the bit bucket + i, err := io.Copy(ioutil.Discard, tRdr1) + if err != nil { + t.Fatal(err) + } + + if i != expectedSize { + t.Errorf("size of tar: expected %d; got %d", expectedSize, i) + } + + if fmt.Sprintf("%x", h1.Sum(nil)) != expectedSum { + t.Logf("h0 was %x", h0.Sum(nil)) + t.Errorf("checksum of tar: expected %q; got %x", expectedSum, h1.Sum(nil)) + } + } diff --git a/tar/asm/disassemble.go b/tar/asm/disassemble.go index 1219c89..91176e6 100644 --- a/tar/asm/disassemble.go +++ b/tar/asm/disassemble.go @@ -36,6 +36,7 @@ func NewInputTarStream(r io.Reader, p storage.Packer, fp FilePutter) (io.Reader, pR, pW := io.Pipe() outputRdr := io.TeeReader(r, pW) + // we need a putter that will generate the crc64 sums of file payloads if fp == nil { fp = NewDiscardFilePutter() } @@ -58,10 +59,8 @@ func NewInputTarStream(r io.Reader, p storage.Packer, fp FilePutter) (io.Reader, }) if err != nil { pW.CloseWithError(err) - } else { - pW.Close() } - return + break // not return. We need the end of the reader. } if _, err := p.AddEntry(storage.Entry{ @@ -77,7 +76,7 @@ func NewInputTarStream(r io.Reader, p storage.Packer, fp FilePutter) (io.Reader, fileRdr, fileWrtr := io.Pipe() go func() { var err error - csum, err = fp.Put(hdr.Name, fileRdr) + _, csum, err = fp.Put(hdr.Name, fileRdr) if err != nil { pW.CloseWithError(err) } @@ -86,27 +85,30 @@ func NewInputTarStream(r io.Reader, p storage.Packer, fp FilePutter) (io.Reader, pW.CloseWithError(err) return } + fileWrtr.Close() } // File entries added, regardless of size - if _, err := p.AddEntry(storage.Entry{ + _, err = p.AddEntry(storage.Entry{ Type: storage.FileType, Name: hdr.Name, Size: hdr.Size, Payload: csum, - }); err != nil { + }) + if err != nil { pW.CloseWithError(err) } - if _, err := p.AddEntry(storage.Entry{ + _, err = p.AddEntry(storage.Entry{ Type: storage.SegmentType, Payload: tr.RawBytes(), - }); err != nil { + }) + if err != nil { pW.CloseWithError(err) } } // it is allowable, and not uncommon that there is further padding on the - // end of an archive, apart from the expected 1024 null bytes + // end of an archive, apart from the expected 1024 null bytes. remainder, err := ioutil.ReadAll(outputRdr) if err != nil && err != io.EOF { pW.CloseWithError(err) diff --git a/tar/asm/getter.go b/tar/asm/getter.go index b11ff7b..a275dca 100644 --- a/tar/asm/getter.go +++ b/tar/asm/getter.go @@ -16,8 +16,8 @@ type FileGetter interface { } type FilePutter interface { - // Put returns a stream for the provided file path - Put(string, io.Reader) ([]byte, error) + // Put returns the crc64 checksum for the provided file + Put(string, io.Reader) (int64, []byte, error) } type FileGetPutter interface { @@ -51,15 +51,16 @@ func (bfgp bufferFileGetPutter) Get(name string) (io.ReadCloser, error) { return &readCloserWrapper{b}, nil } -func (bfgp *bufferFileGetPutter) Put(name string, r io.Reader) ([]byte, error) { +func (bfgp *bufferFileGetPutter) Put(name string, r io.Reader) (int64, []byte, error) { c := crc64.New(crcTable) tRdr := io.TeeReader(r, c) b := bytes.NewBuffer([]byte{}) - if _, err := io.Copy(b, tRdr); err != nil { - return nil, err + i, err := io.Copy(b, tRdr) + if err != nil { + return 0, nil, err } bfgp.files[name] = b.Bytes() - return c.Sum(nil), nil + return i, c.Sum(nil), nil } type readCloserWrapper struct { @@ -86,11 +87,11 @@ func NewDiscardFilePutter() FilePutter { type bitBucketFilePutter struct { } -func (bbfp *bitBucketFilePutter) Put(name string, r io.Reader) ([]byte, error) { +func (bbfp *bitBucketFilePutter) Put(name string, r io.Reader) (int64, []byte, error) { c := crc64.New(crcTable) tRdr := io.TeeReader(r, c) - _, err := io.Copy(ioutil.Discard, tRdr) - return c.Sum(nil), err + i, err := io.Copy(ioutil.Discard, tRdr) + return i, c.Sum(nil), err } var crcTable = crc64.MakeTable(crc64.ISO) diff --git a/tar/asm/getter_test.go b/tar/asm/getter_test.go index f8d25be..47e5e14 100644 --- a/tar/asm/getter_test.go +++ b/tar/asm/getter_test.go @@ -14,7 +14,7 @@ func TestGetter(t *testing.T) { } for n, b := range files { for body, sum := range b { - csum, err := fgp.Put(n, bytes.NewBufferString(body)) + _, csum, err := fgp.Put(n, bytes.NewBufferString(body)) if err != nil { t.Error(err) } @@ -50,7 +50,7 @@ func TestPutter(t *testing.T) { } for n, b := range files { for body, sum := range b { - csum, err := fp.Put(n, bytes.NewBufferString(body)) + _, csum, err := fp.Put(n, bytes.NewBufferString(body)) if err != nil { t.Error(err) } diff --git a/tar/asm/testdata/t.tar.gz b/tar/asm/testdata/t.tar.gz new file mode 100644 index 0000000..d33bf96 Binary files /dev/null and b/tar/asm/testdata/t.tar.gz differ