diff --git a/README.md b/README.md index a8694cf..6c3d032 100644 --- a/README.md +++ b/README.md @@ -65,16 +65,13 @@ naive storage implementation. First we'll get an archive to work with. For repeatability, we'll make an archive from what you've just cloned: -``` +```bash git archive --format=tar -o tar-split.tar HEAD . ``` -``` -go build ./checksize.go -``` - -``` -$ ./checksize ./tar-split.tar +```bash +$ go get github.com/vbatts/tar-split/cmd/tar-split +$ tar-split checksize ./tar-split.tar inspecting "tar-split.tar" (size 210k) -- number of files: 50 -- size of metadata uncompressed: 53k @@ -87,7 +84,7 @@ implications are as little as 3kb. But let's look at a larger archive, with many files. -``` +```bash $ ls -sh ./d.tar 1.4G ./d.tar $ ./checksize ~/d.tar @@ -116,6 +113,7 @@ bytes-per-file rate for the storage implications. * cli tooling to assemble/disassemble a provided tar archive * would be interesting to have an assembler stream that implements `io.Seeker` + ## License See [LICENSE](LICENSE) diff --git a/cmd/tar-split/asm.go b/cmd/tar-split/asm.go new file mode 100644 index 0000000..312e54b --- /dev/null +++ b/cmd/tar-split/asm.go @@ -0,0 +1,64 @@ +package main + +import ( + "compress/gzip" + "io" + "os" + + "github.com/Sirupsen/logrus" + "github.com/codegangsta/cli" + "github.com/vbatts/tar-split/tar/asm" + "github.com/vbatts/tar-split/tar/storage" +) + +func CommandAsm(c *cli.Context) { + if len(c.Args()) > 0 { + logrus.Warnf("%d additional arguments passed are ignored", len(c.Args())) + } + if len(c.String("input")) == 0 { + logrus.Fatalf("--input filename must be set") + } + if len(c.String("output")) == 0 { + logrus.Fatalf("--output filename must be set ([FILENAME|-])") + } + if len(c.String("path")) == 0 { + logrus.Fatalf("--path must be set") + } + + var outputStream io.Writer + if c.String("output") == "-" { + outputStream = os.Stdout + } else { + fh, err := os.Create(c.String("output")) + if err != nil { + logrus.Fatal(err) + } + defer fh.Close() + outputStream = fh + } + + // Get the tar metadata reader + mf, err := os.Open(c.String("input")) + if err != nil { + logrus.Fatal(err) + } + defer mf.Close() + mfz, err := gzip.NewReader(mf) + if err != nil { + logrus.Fatal(err) + } + defer mfz.Close() + + metaUnpacker := storage.NewJSONUnpacker(mfz) + // XXX maybe get the absolute path here + fileGetter := storage.NewPathFileGetter(c.String("path")) + + ots := asm.NewOutputTarStream(fileGetter, metaUnpacker) + defer ots.Close() + i, err := io.Copy(outputStream, ots) + if err != nil { + logrus.Fatal(err) + } + + logrus.Infof("created %s from %s and %s (wrote %d bytes)", c.String("output"), c.String("path"), c.String("input"), i) +} diff --git a/checksize.go b/cmd/tar-split/checksize.go similarity index 83% rename from checksize.go rename to cmd/tar-split/checksize.go index a6d3c08..38f830e 100644 --- a/checksize.go +++ b/cmd/tar-split/checksize.go @@ -1,29 +1,25 @@ -// +build ignore - package main import ( "archive/tar" "compress/gzip" - "flag" "fmt" "io" "io/ioutil" "log" "os" + "github.com/Sirupsen/logrus" + "github.com/codegangsta/cli" "github.com/vbatts/tar-split/tar/asm" "github.com/vbatts/tar-split/tar/storage" ) -var ( - flCleanup = flag.Bool("cleanup", true, "cleanup tempfiles") -) - -func main() { - flag.Parse() - - for _, arg := range flag.Args() { +func CommandChecksize(c *cli.Context) { + if len(c.Args()) == 0 { + logrus.Fatalf("please specify tar archives to check ('-' will check stdin)") + } + for _, arg := range c.Args() { fh, err := os.Open(arg) if err != nil { log.Fatal(err) @@ -40,8 +36,10 @@ func main() { log.Fatal(err) } defer packFh.Close() - if *flCleanup { + if !c.Bool("work") { defer os.Remove(packFh.Name()) + } else { + fmt.Printf(" -- working file preserved: %s\n", packFh.Name()) } sp := storage.NewJSONPacker(packFh) @@ -83,7 +81,7 @@ func main() { log.Fatal(err) } defer gzPackFh.Close() - if *flCleanup { + if !c.Bool("work") { defer os.Remove(gzPackFh.Name()) } diff --git a/cmd/tar-split/disasm.go b/cmd/tar-split/disasm.go new file mode 100644 index 0000000..b7b0dfe --- /dev/null +++ b/cmd/tar-split/disasm.go @@ -0,0 +1,56 @@ +package main + +import ( + "compress/gzip" + "io" + "os" + + "github.com/Sirupsen/logrus" + "github.com/codegangsta/cli" + "github.com/vbatts/tar-split/tar/asm" + "github.com/vbatts/tar-split/tar/storage" +) + +func CommandDisasm(c *cli.Context) { + if len(c.Args()) != 1 { + logrus.Fatalf("please specify tar to be disabled ") + } + if len(c.String("output")) == 0 { + logrus.Fatalf("--output filename must be set") + } + + // Set up the tar input stream + var inputStream io.Reader + if c.Args()[0] == "-" { + inputStream = os.Stdin + } else { + fh, err := os.Open(c.Args()[0]) + if err != nil { + logrus.Fatal(err) + } + defer fh.Close() + inputStream = fh + } + + // Set up the metadata storage + mf, err := os.OpenFile(c.String("output"), os.O_CREATE|os.O_WRONLY|os.O_TRUNC, os.FileMode(0600)) + if err != nil { + logrus.Fatal(err) + } + defer mf.Close() + mfz := gzip.NewWriter(mf) + defer mfz.Close() + metaPacker := storage.NewJSONPacker(mfz) + + // we're passing nil here for the file putter, because the ApplyDiff will + // handle the extraction of the archive + its, err := asm.NewInputTarStream(inputStream, metaPacker, nil) + if err != nil { + logrus.Fatal(err) + } + i, err := io.Copy(os.Stdout, its) + if err != nil { + logrus.Fatal(err) + } + logrus.Infof("created %s from %s (read %d bytes)", c.String("output"), c.Args()[0], i) +} diff --git a/cmd/tar-split/main.go b/cmd/tar-split/main.go index 8c631b0..f24dce5 100644 --- a/cmd/tar-split/main.go +++ b/cmd/tar-split/main.go @@ -2,14 +2,10 @@ package main import ( - "compress/gzip" - "io" "os" "github.com/Sirupsen/logrus" "github.com/codegangsta/cli" - "github.com/vbatts/tar-split/tar/asm" - "github.com/vbatts/tar-split/tar/storage" ) func main() { @@ -71,105 +67,21 @@ func main() { }, }, }, + { + Name: "checksize", + Usage: "displays size estimates for metadata storage of a Tar archive", + Action: CommandChecksize, + Flags: []cli.Flag{ + cli.BoolFlag{ + Name: "work", + Usage: "do not delete the working directory", + // defaults to false + }, + }, + }, } if err := app.Run(os.Args); err != nil { logrus.Fatal(err) } } - -func CommandDisasm(c *cli.Context) { - if len(c.Args()) != 1 { - logrus.Fatalf("please specify tar to be disabled ") - } - if len(c.String("output")) == 0 { - logrus.Fatalf("--output filename must be set") - } - - // Set up the tar input stream - var inputStream io.Reader - if c.Args()[0] == "-" { - inputStream = os.Stdin - } else { - fh, err := os.Open(c.Args()[0]) - if err != nil { - logrus.Fatal(err) - } - defer fh.Close() - inputStream = fh - } - - // Set up the metadata storage - mf, err := os.OpenFile(c.String("output"), os.O_CREATE|os.O_WRONLY|os.O_TRUNC, os.FileMode(0600)) - if err != nil { - logrus.Fatal(err) - } - defer mf.Close() - mfz := gzip.NewWriter(mf) - defer mfz.Close() - metaPacker := storage.NewJSONPacker(mfz) - - // we're passing nil here for the file putter, because the ApplyDiff will - // handle the extraction of the archive - its, err := asm.NewInputTarStream(inputStream, metaPacker, nil) - if err != nil { - logrus.Fatal(err) - } - i, err := io.Copy(os.Stdout, its) - if err != nil { - logrus.Fatal(err) - } - logrus.Infof("created %s from %s (read %d bytes)", c.String("output"), c.Args()[0], i) -} - -func CommandAsm(c *cli.Context) { - if len(c.Args()) > 0 { - logrus.Warnf("%d additional arguments passed are ignored", len(c.Args())) - } - if len(c.String("input")) == 0 { - logrus.Fatalf("--input filename must be set") - } - if len(c.String("output")) == 0 { - logrus.Fatalf("--output filename must be set ([FILENAME|-])") - } - if len(c.String("path")) == 0 { - logrus.Fatalf("--path must be set") - } - - var outputStream io.Writer - if c.String("output") == "-" { - outputStream = os.Stdout - } else { - fh, err := os.Create(c.String("output")) - if err != nil { - logrus.Fatal(err) - } - defer fh.Close() - outputStream = fh - } - - // Get the tar metadata reader - mf, err := os.Open(c.String("input")) - if err != nil { - logrus.Fatal(err) - } - defer mf.Close() - mfz, err := gzip.NewReader(mf) - if err != nil { - logrus.Fatal(err) - } - defer mfz.Close() - - metaUnpacker := storage.NewJSONUnpacker(mfz) - // XXX maybe get the absolute path here - fileGetter := storage.NewPathFileGetter(c.String("path")) - - ots := asm.NewOutputTarStream(fileGetter, metaUnpacker) - defer ots.Close() - i, err := io.Copy(outputStream, ots) - if err != nil { - logrus.Fatal(err) - } - - logrus.Infof("created %s from %s and %s (wrote %d bytes)", c.String("output"), c.String("path"), c.String("input"), i) -}