diff --git a/tar/asm/assemble_test.go b/tar/asm/assemble_test.go index 3d0c99c..817f740 100644 --- a/tar/asm/assemble_test.go +++ b/tar/asm/assemble_test.go @@ -130,6 +130,73 @@ func TestTarStreamMangledGetterPutter(t *testing.T) { } } +func TestTarSparseFiles(t *testing.T) { + testCases := []struct { + path string + expectedSHA1Sum string + expectedSize int64 + }{ + {"./testdata/sparse.tar.gz", "562541bfd9b1ac8c8cc6cedcf80c2eeeaa6bb9d7", 5253120}, + } + for _, tc := range testCases { + fh, err := os.Open(tc.path) + if err != nil { + t.Fatal(err) + } + defer fh.Close() + gzRdr, err := gzip.NewReader(fh) + if err != nil { + t.Fatal(err) + } + defer gzRdr.Close() + + // Setup where we'll store the metadata + w := bytes.NewBuffer([]byte{}) + sp := storage.NewJSONPacker(w) + fgp := storage.NewBufferFileGetPutter() + + // wrap the disassembly stream + tarStream, err := NewInputTarStream(gzRdr, sp, fgp) + if err != nil { + t.Fatal(err) + } + + // get a sum of the stream after it has passed through to ensure it's the same. + h0 := sha1.New() + tRdr0 := io.TeeReader(tarStream, h0) + + // read it all to the bit bucket + i, err := io.Copy(ioutil.Discard, tRdr0) + if err != nil { + t.Fatal(err) + } + + if i != tc.expectedSize { + t.Errorf("size of tar %q: expected %d; got %d", tc.path, tc.expectedSize, i) + } + if fmt.Sprintf("%x", h0.Sum(nil)) != tc.expectedSHA1Sum { + t.Fatalf("checksum of tar %q: expected %s; got %x", tc.path, tc.expectedSHA1Sum, h0.Sum(nil)) + } + r := bytes.NewBuffer(w.Bytes()) + sup := storage.NewJSONUnpacker(r) + // and reuse the fgp that we Put the payloads to. + + rc := NewOutputTarStream(fgp, sup) + h1 := sha1.New() + i, err = io.Copy(h1, rc) + if err != nil { + t.Fatal(err) + } + + if i != tc.expectedSize { + t.Errorf("size of output tar %q: expected %d; got %d", tc.path, tc.expectedSize, i) + } + if fmt.Sprintf("%x", h1.Sum(nil)) != tc.expectedSHA1Sum { + t.Fatalf("checksum of output tar %q: expected %s; got %x", tc.path, tc.expectedSHA1Sum, h1.Sum(nil)) + } + } +} + func TestTarStream(t *testing.T) { testCases := []struct { path string @@ -176,10 +243,10 @@ func TestTarStream(t *testing.T) { } if i != tc.expectedSize { - t.Errorf("size of tar: expected %d; got %d", tc.expectedSize, i) + t.Errorf("size of tar %q: expected %d; got %d", tc.path, tc.expectedSize, i) } if fmt.Sprintf("%x", h0.Sum(nil)) != tc.expectedSHA1Sum { - t.Fatalf("checksum of tar: expected %s; got %x", tc.expectedSHA1Sum, h0.Sum(nil)) + t.Fatalf("checksum of tar %q: expected %s; got %x", tc.path, tc.expectedSHA1Sum, h0.Sum(nil)) } //t.Logf("%s", w.String()) // if we fail, then show the packed info @@ -198,10 +265,10 @@ func TestTarStream(t *testing.T) { } if i != tc.expectedSize { - t.Errorf("size of output tar: expected %d; got %d", tc.expectedSize, i) + t.Errorf("size of output tar %q: expected %d; got %d", tc.path, tc.expectedSize, i) } if fmt.Sprintf("%x", h1.Sum(nil)) != tc.expectedSHA1Sum { - t.Fatalf("checksum of output tar: expected %s; got %x", tc.expectedSHA1Sum, h1.Sum(nil)) + t.Fatalf("checksum of output tar %q: expected %s; got %x", tc.path, tc.expectedSHA1Sum, h1.Sum(nil)) } } } diff --git a/tar/asm/disassemble.go b/tar/asm/disassemble.go index 54ef23a..b4d5a6b 100644 --- a/tar/asm/disassemble.go +++ b/tar/asm/disassemble.go @@ -1,8 +1,10 @@ package asm import ( + "fmt" "io" "io/ioutil" + "os" "github.com/vbatts/tar-split/archive/tar" "github.com/vbatts/tar-split/tar/storage" @@ -92,6 +94,15 @@ func NewInputTarStream(r io.Reader, p storage.Packer, fp storage.FilePutter) (io } } + if hdr.Typeflag == tar.TypeGNUSparse { + e := storage.Entry{ + Type: storage.SparseFileType, + } + fmt.Fprintln(os.Stderr, "WHOOP") + e.SetName(hdr.Name) + _, err = p.AddEntry(e) + } + entry := storage.Entry{ Type: storage.FileType, Size: hdr.Size, diff --git a/tar/asm/testdata/sparse.tar.gz b/tar/asm/testdata/sparse.tar.gz new file mode 100644 index 0000000..6d1ac68 Binary files /dev/null and b/tar/asm/testdata/sparse.tar.gz differ diff --git a/tar/storage/entry.go b/tar/storage/entry.go index c91e7ea..6a8bae3 100644 --- a/tar/storage/entry.go +++ b/tar/storage/entry.go @@ -23,6 +23,7 @@ const ( // // Its payload is to be marshalled base64 encoded. SegmentType + SparseFileType ) // Entry is the structure for packing and unpacking the information read from