utils/tarsum* -> pkg/tarsum

Docker-DCO-1.1-Signed-off-by: Erik Hollensbe <github@hollensbe.org> (github: erikh)
This commit is contained in:
Erik Hollensbe 2014-07-30 06:42:12 -07:00
parent c91872166b
commit 7c58e704bc
2 changed files with 408 additions and 0 deletions

183
tarsum/tarsum.go Normal file
View file

@ -0,0 +1,183 @@
package tarsum
import (
"bytes"
"compress/gzip"
"crypto/sha256"
"encoding/hex"
"hash"
"io"
"sort"
"strconv"
"strings"
"github.com/docker/docker/utils"
"github.com/docker/docker/vendor/src/code.google.com/p/go/src/pkg/archive/tar"
)
type TarSum struct {
io.Reader
tarR *tar.Reader
tarW *tar.Writer
gz writeCloseFlusher
bufTar *bytes.Buffer
bufGz *bytes.Buffer
h hash.Hash
sums map[string]string
currentFile string
finished bool
first bool
DisableCompression bool
}
type writeCloseFlusher interface {
io.WriteCloser
Flush() error
}
type nopCloseFlusher struct {
io.Writer
}
func (n *nopCloseFlusher) Close() error {
return nil
}
func (n *nopCloseFlusher) Flush() error {
return nil
}
func (ts *TarSum) encodeHeader(h *tar.Header) error {
for _, elem := range [][2]string{
{"name", h.Name},
{"mode", strconv.Itoa(int(h.Mode))},
{"uid", strconv.Itoa(h.Uid)},
{"gid", strconv.Itoa(h.Gid)},
{"size", strconv.Itoa(int(h.Size))},
{"mtime", strconv.Itoa(int(h.ModTime.UTC().Unix()))},
{"typeflag", string([]byte{h.Typeflag})},
{"linkname", h.Linkname},
{"uname", h.Uname},
{"gname", h.Gname},
{"devmajor", strconv.Itoa(int(h.Devmajor))},
{"devminor", strconv.Itoa(int(h.Devminor))},
// {"atime", strconv.Itoa(int(h.AccessTime.UTC().Unix()))},
// {"ctime", strconv.Itoa(int(h.ChangeTime.UTC().Unix()))},
} {
if _, err := ts.h.Write([]byte(elem[0] + elem[1])); err != nil {
return err
}
}
return nil
}
func (ts *TarSum) Read(buf []byte) (int, error) {
if ts.gz == nil {
ts.bufTar = bytes.NewBuffer([]byte{})
ts.bufGz = bytes.NewBuffer([]byte{})
ts.tarR = tar.NewReader(ts.Reader)
ts.tarW = tar.NewWriter(ts.bufTar)
if !ts.DisableCompression {
ts.gz = gzip.NewWriter(ts.bufGz)
} else {
ts.gz = &nopCloseFlusher{Writer: ts.bufGz}
}
ts.h = sha256.New()
ts.h.Reset()
ts.first = true
ts.sums = make(map[string]string)
}
if ts.finished {
return ts.bufGz.Read(buf)
}
buf2 := make([]byte, len(buf), cap(buf))
n, err := ts.tarR.Read(buf2)
if err != nil {
if err == io.EOF {
if _, err := ts.h.Write(buf2[:n]); err != nil {
return 0, err
}
if !ts.first {
ts.sums[ts.currentFile] = hex.EncodeToString(ts.h.Sum(nil))
ts.h.Reset()
} else {
ts.first = false
}
currentHeader, err := ts.tarR.Next()
if err != nil {
if err == io.EOF {
if err := ts.gz.Close(); err != nil {
return 0, err
}
ts.finished = true
return n, nil
}
return n, err
}
ts.currentFile = strings.TrimSuffix(strings.TrimPrefix(currentHeader.Name, "./"), "/")
if err := ts.encodeHeader(currentHeader); err != nil {
return 0, err
}
if err := ts.tarW.WriteHeader(currentHeader); err != nil {
return 0, err
}
if _, err := ts.tarW.Write(buf2[:n]); err != nil {
return 0, err
}
ts.tarW.Flush()
if _, err := io.Copy(ts.gz, ts.bufTar); err != nil {
return 0, err
}
ts.gz.Flush()
return ts.bufGz.Read(buf)
}
return n, err
}
// Filling the hash buffer
if _, err = ts.h.Write(buf2[:n]); err != nil {
return 0, err
}
// Filling the tar writter
if _, err = ts.tarW.Write(buf2[:n]); err != nil {
return 0, err
}
ts.tarW.Flush()
// Filling the gz writter
if _, err = io.Copy(ts.gz, ts.bufTar); err != nil {
return 0, err
}
ts.gz.Flush()
return ts.bufGz.Read(buf)
}
func (ts *TarSum) Sum(extra []byte) string {
var sums []string
for _, sum := range ts.sums {
sums = append(sums, sum)
}
sort.Strings(sums)
h := sha256.New()
if extra != nil {
h.Write(extra)
}
for _, sum := range sums {
utils.Debugf("-->%s<--", sum)
h.Write([]byte(sum))
}
checksum := "tarsum+sha256:" + hex.EncodeToString(h.Sum(nil))
utils.Debugf("checksum processed: %s", checksum)
return checksum
}
func (ts *TarSum) GetSums() map[string]string {
return ts.sums
}

225
tarsum/tarsum_test.go Normal file
View file

@ -0,0 +1,225 @@
package tarsum
import (
"bytes"
"crypto/rand"
"fmt"
"io"
"io/ioutil"
"os"
"testing"
"github.com/docker/docker/vendor/src/code.google.com/p/go/src/pkg/archive/tar"
)
type testLayer struct {
filename string
options *sizedOptions
jsonfile string
gzip bool
tarsum string
}
var testLayers = []testLayer{
{
filename: "testdata/46af0962ab5afeb5ce6740d4d91652e69206fc991fd5328c1a94d364ad00e457/layer.tar",
jsonfile: "testdata/46af0962ab5afeb5ce6740d4d91652e69206fc991fd5328c1a94d364ad00e457/json",
tarsum: "tarsum+sha256:e58fcf7418d4390dec8e8fb69d88c06ec07039d651fedd3aa72af9972e7d046b"},
{
filename: "testdata/46af0962ab5afeb5ce6740d4d91652e69206fc991fd5328c1a94d364ad00e457/layer.tar",
jsonfile: "testdata/46af0962ab5afeb5ce6740d4d91652e69206fc991fd5328c1a94d364ad00e457/json",
gzip: true,
tarsum: "tarsum+sha256:e58fcf7418d4390dec8e8fb69d88c06ec07039d651fedd3aa72af9972e7d046b"},
{
filename: "testdata/511136ea3c5a64f264b78b5433614aec563103b4d4702f3ba7d4d2698e22c158/layer.tar",
jsonfile: "testdata/511136ea3c5a64f264b78b5433614aec563103b4d4702f3ba7d4d2698e22c158/json",
tarsum: "tarsum+sha256:ac672ee85da9ab7f9667ae3c32841d3e42f33cc52c273c23341dabba1c8b0c8b"},
{
options: &sizedOptions{1, 1024 * 1024, false, false}, // a 1mb file (in memory)
tarsum: "tarsum+sha256:8bf12d7e67c51ee2e8306cba569398b1b9f419969521a12ffb9d8875e8836738"},
}
type sizedOptions struct {
num int64
size int64
isRand bool
realFile bool
}
// make a tar:
// * num is the number of files the tar should have
// * size is the bytes per file
// * isRand is whether the contents of the files should be a random chunk (otherwise it's all zeros)
// * realFile will write to a TempFile, instead of an in memory buffer
func sizedTar(opts sizedOptions) io.Reader {
var (
fh io.ReadWriter
err error
)
if opts.realFile {
fh, err = ioutil.TempFile("", "tarsum")
if err != nil {
return nil
}
} else {
fh = bytes.NewBuffer([]byte{})
}
tarW := tar.NewWriter(fh)
for i := int64(0); i < opts.num; i++ {
err := tarW.WriteHeader(&tar.Header{
Name: fmt.Sprintf("/testdata%d", i),
Mode: 0755,
Uid: 0,
Gid: 0,
Size: opts.size,
})
if err != nil {
return nil
}
var rBuf []byte
if opts.isRand {
rBuf = make([]byte, 8)
_, err = rand.Read(rBuf)
if err != nil {
return nil
}
} else {
rBuf = []byte{0, 0, 0, 0, 0, 0, 0, 0}
}
for i := int64(0); i < opts.size/int64(8); i++ {
tarW.Write(rBuf)
}
}
return fh
}
func TestTarSums(t *testing.T) {
for _, layer := range testLayers {
var (
fh io.Reader
err error
)
if len(layer.filename) > 0 {
fh, err = os.Open(layer.filename)
if err != nil {
t.Errorf("failed to open %s: %s", layer.filename, err)
continue
}
} else if layer.options != nil {
fh = sizedTar(*layer.options)
} else {
// What else is there to test?
t.Errorf("what to do with %#v", layer)
continue
}
if file, ok := fh.(*os.File); ok {
defer file.Close()
}
// double negatives!
ts := &TarSum{Reader: fh, DisableCompression: !layer.gzip}
_, err = io.Copy(ioutil.Discard, ts)
if err != nil {
t.Errorf("failed to copy from %s: %s", layer.filename, err)
continue
}
var gotSum string
if len(layer.jsonfile) > 0 {
jfh, err := os.Open(layer.jsonfile)
if err != nil {
t.Errorf("failed to open %s: %s", layer.jsonfile, err)
continue
}
buf, err := ioutil.ReadAll(jfh)
if err != nil {
t.Errorf("failed to readAll %s: %s", layer.jsonfile, err)
continue
}
gotSum = ts.Sum(buf)
} else {
gotSum = ts.Sum(nil)
}
if layer.tarsum != gotSum {
t.Errorf("expecting [%s], but got [%s]", layer.tarsum, gotSum)
}
}
}
func Benchmark9kTar(b *testing.B) {
buf := bytes.NewBuffer([]byte{})
fh, err := os.Open("testdata/46af0962ab5afeb5ce6740d4d91652e69206fc991fd5328c1a94d364ad00e457/layer.tar")
if err != nil {
b.Error(err)
return
}
n, err := io.Copy(buf, fh)
fh.Close()
b.SetBytes(n)
b.ResetTimer()
for i := 0; i < b.N; i++ {
ts := &TarSum{Reader: buf, DisableCompression: true}
io.Copy(ioutil.Discard, ts)
ts.Sum(nil)
}
}
func Benchmark9kTarGzip(b *testing.B) {
buf := bytes.NewBuffer([]byte{})
fh, err := os.Open("testdata/46af0962ab5afeb5ce6740d4d91652e69206fc991fd5328c1a94d364ad00e457/layer.tar")
if err != nil {
b.Error(err)
return
}
n, err := io.Copy(buf, fh)
fh.Close()
b.SetBytes(n)
b.ResetTimer()
for i := 0; i < b.N; i++ {
ts := &TarSum{Reader: buf, DisableCompression: false}
io.Copy(ioutil.Discard, ts)
ts.Sum(nil)
}
}
// this is a single big file in the tar archive
func Benchmark1mbSingleFileTar(b *testing.B) {
benchmarkTar(b, sizedOptions{1, 1024 * 1024, true, true}, false)
}
// this is a single big file in the tar archive
func Benchmark1mbSingleFileTarGzip(b *testing.B) {
benchmarkTar(b, sizedOptions{1, 1024 * 1024, true, true}, true)
}
// this is 1024 1k files in the tar archive
func Benchmark1kFilesTar(b *testing.B) {
benchmarkTar(b, sizedOptions{1024, 1024, true, true}, false)
}
// this is 1024 1k files in the tar archive
func Benchmark1kFilesTarGzip(b *testing.B) {
benchmarkTar(b, sizedOptions{1024, 1024, true, true}, true)
}
func benchmarkTar(b *testing.B, opts sizedOptions, isGzip bool) {
var fh *os.File
tarReader := sizedTar(opts)
if br, ok := tarReader.(*os.File); ok {
fh = br
}
defer os.Remove(fh.Name())
defer fh.Close()
b.SetBytes(opts.size * opts.num)
b.ResetTimer()
for i := 0; i < b.N; i++ {
ts := &TarSum{Reader: fh, DisableCompression: !isGzip}
io.Copy(ioutil.Discard, ts)
ts.Sum(nil)
fh.Seek(0, 0)
}
}