TarSum: versioning

This introduces Versions for TarSum checksums.
Fixes: https://github.com/docker/docker/issues/7526

It preserves current functionality and abstracts the interface for
future flexibility of hashing algorithms. As a POC, the VersionDev
Tarsum does not include the mtime in the checksum calculation, and would
solve https://github.com/docker/docker/issues/7387 though this is not a
settled Version is subject to change until a version number is assigned.

Signed-off-by: Vincent Batts <vbatts@redhat.com>
This commit is contained in:
Vincent Batts 2014-08-21 16:12:52 -04:00
parent 47dabd55c0
commit 14689ec238
5 changed files with 193 additions and 24 deletions

View file

@ -22,6 +22,29 @@ const (
buf32K = 32 * 1024 buf32K = 32 * 1024
) )
// NewTarSum creates a new interface for calculating a fixed time checksum of a
// tar archive.
//
// This is used for calculating checksums of layers of an image, in some cases
// including the byte payload of the image's json metadata as well, and for
// calculating the checksums for buildcache.
func NewTarSum(r io.Reader, dc bool, v Version) (TarSumInterface, error) {
if _, ok := tarSumVersions[v]; !ok {
return nil, ErrVersionNotImplemented
}
return &TarSum{Reader: r, DisableCompression: dc, tarSumVersion: v}, nil
}
// TarSumInterface is the generic interface for calculating fixed time
// checksums of a tar archive
type TarSumInterface interface {
io.Reader
GetSums() map[string]string
Sum([]byte) string
Version() Version
}
// TarSum struct is the structure for a Version0 checksum calculation
type TarSum struct { type TarSum struct {
io.Reader io.Reader
tarR *tar.Reader tarR *tar.Reader
@ -35,27 +58,15 @@ type TarSum struct {
currentFile string currentFile string
finished bool finished bool
first bool first bool
DisableCompression bool DisableCompression bool // false by default. When false, the output gzip compressed.
tarSumVersion Version // this field is not exported so it can not be mutated during use
} }
type writeCloseFlusher interface { func (ts TarSum) Version() Version {
io.WriteCloser return ts.tarSumVersion
Flush() error
} }
type nopCloseFlusher struct { func (ts TarSum) selectHeaders(h *tar.Header, v Version) (set [][2]string) {
io.Writer
}
func (n *nopCloseFlusher) Close() error {
return nil
}
func (n *nopCloseFlusher) Flush() error {
return nil
}
func (ts *TarSum) encodeHeader(h *tar.Header) error {
for _, elem := range [][2]string{ for _, elem := range [][2]string{
{"name", h.Name}, {"name", h.Name},
{"mode", strconv.Itoa(int(h.Mode))}, {"mode", strconv.Itoa(int(h.Mode))},
@ -69,9 +80,17 @@ func (ts *TarSum) encodeHeader(h *tar.Header) error {
{"gname", h.Gname}, {"gname", h.Gname},
{"devmajor", strconv.Itoa(int(h.Devmajor))}, {"devmajor", strconv.Itoa(int(h.Devmajor))},
{"devminor", strconv.Itoa(int(h.Devminor))}, {"devminor", strconv.Itoa(int(h.Devminor))},
// {"atime", strconv.Itoa(int(h.AccessTime.UTC().Unix()))},
// {"ctime", strconv.Itoa(int(h.ChangeTime.UTC().Unix()))},
} { } {
if v == VersionDev && elem[0] == "mtime" {
continue
}
set = append(set, elem)
}
return
}
func (ts *TarSum) encodeHeader(h *tar.Header) error {
for _, elem := range ts.selectHeaders(h, ts.Version()) {
if _, err := ts.h.Write([]byte(elem[0] + elem[1])); err != nil { if _, err := ts.h.Write([]byte(elem[0] + elem[1])); err != nil {
return err return err
} }
@ -193,7 +212,7 @@ func (ts *TarSum) Sum(extra []byte) string {
log.Debugf("-->%s<--", sum) log.Debugf("-->%s<--", sum)
h.Write([]byte(sum)) h.Write([]byte(sum))
} }
checksum := "tarsum+sha256:" + hex.EncodeToString(h.Sum(nil)) checksum := ts.Version().String() + "+sha256:" + hex.EncodeToString(h.Sum(nil))
log.Debugf("checksum processed: %s", checksum) log.Debugf("checksum processed: %s", checksum)
return checksum return checksum
} }

View file

@ -18,13 +18,20 @@ type testLayer struct {
jsonfile string jsonfile string
gzip bool gzip bool
tarsum string tarsum string
version Version
} }
var testLayers = []testLayer{ var testLayers = []testLayer{
{ {
filename: "testdata/46af0962ab5afeb5ce6740d4d91652e69206fc991fd5328c1a94d364ad00e457/layer.tar", filename: "testdata/46af0962ab5afeb5ce6740d4d91652e69206fc991fd5328c1a94d364ad00e457/layer.tar",
jsonfile: "testdata/46af0962ab5afeb5ce6740d4d91652e69206fc991fd5328c1a94d364ad00e457/json", jsonfile: "testdata/46af0962ab5afeb5ce6740d4d91652e69206fc991fd5328c1a94d364ad00e457/json",
version: Version0,
tarsum: "tarsum+sha256:e58fcf7418d4390dec8e8fb69d88c06ec07039d651fedd3aa72af9972e7d046b"}, tarsum: "tarsum+sha256:e58fcf7418d4390dec8e8fb69d88c06ec07039d651fedd3aa72af9972e7d046b"},
{
filename: "testdata/46af0962ab5afeb5ce6740d4d91652e69206fc991fd5328c1a94d364ad00e457/layer.tar",
jsonfile: "testdata/46af0962ab5afeb5ce6740d4d91652e69206fc991fd5328c1a94d364ad00e457/json",
version: VersionDev,
tarsum: "tarsum.dev+sha256:486b86e25c4db4551228154848bc4663b15dd95784b1588980f4ba1cb42e83e9"},
{ {
filename: "testdata/46af0962ab5afeb5ce6740d4d91652e69206fc991fd5328c1a94d364ad00e457/layer.tar", filename: "testdata/46af0962ab5afeb5ce6740d4d91652e69206fc991fd5328c1a94d364ad00e457/layer.tar",
jsonfile: "testdata/46af0962ab5afeb5ce6740d4d91652e69206fc991fd5328c1a94d364ad00e457/json", jsonfile: "testdata/46af0962ab5afeb5ce6740d4d91652e69206fc991fd5328c1a94d364ad00e457/json",
@ -118,7 +125,11 @@ func TestTarSums(t *testing.T) {
} }
// double negatives! // double negatives!
ts := &TarSum{Reader: fh, DisableCompression: !layer.gzip} ts, err := NewTarSum(fh, !layer.gzip, layer.version)
if err != nil {
t.Errorf("%q :: %q", err, layer.filename)
continue
}
_, err = io.Copy(ioutil.Discard, ts) _, err = io.Copy(ioutil.Discard, ts)
if err != nil { if err != nil {
t.Errorf("failed to copy from %s: %s", layer.filename, err) t.Errorf("failed to copy from %s: %s", layer.filename, err)
@ -160,7 +171,11 @@ func Benchmark9kTar(b *testing.B) {
b.SetBytes(n) b.SetBytes(n)
b.ResetTimer() b.ResetTimer()
for i := 0; i < b.N; i++ { for i := 0; i < b.N; i++ {
ts := &TarSum{Reader: buf, DisableCompression: true} ts, err := NewTarSum(buf, true, Version0)
if err != nil {
b.Error(err)
return
}
io.Copy(ioutil.Discard, ts) io.Copy(ioutil.Discard, ts)
ts.Sum(nil) ts.Sum(nil)
} }
@ -179,7 +194,11 @@ func Benchmark9kTarGzip(b *testing.B) {
b.SetBytes(n) b.SetBytes(n)
b.ResetTimer() b.ResetTimer()
for i := 0; i < b.N; i++ { for i := 0; i < b.N; i++ {
ts := &TarSum{Reader: buf, DisableCompression: false} ts, err := NewTarSum(buf, false, Version0)
if err != nil {
b.Error(err)
return
}
io.Copy(ioutil.Discard, ts) io.Copy(ioutil.Discard, ts)
ts.Sum(nil) ts.Sum(nil)
} }
@ -217,7 +236,11 @@ func benchmarkTar(b *testing.B, opts sizedOptions, isGzip bool) {
b.SetBytes(opts.size * opts.num) b.SetBytes(opts.size * opts.num)
b.ResetTimer() b.ResetTimer()
for i := 0; i < b.N; i++ { for i := 0; i < b.N; i++ {
ts := &TarSum{Reader: fh, DisableCompression: !isGzip} ts, err := NewTarSum(fh, !isGzip, Version0)
if err != nil {
b.Error(err)
return
}
io.Copy(ioutil.Discard, ts) io.Copy(ioutil.Discard, ts)
ts.Sum(nil) ts.Sum(nil)
fh.Seek(0, 0) fh.Seek(0, 0)

56
tarsum/versioning.go Normal file
View file

@ -0,0 +1,56 @@
package tarsum
import (
"errors"
"strings"
)
// versioning of the TarSum algorithm
// based on the prefix of the hash used
// i.e. "tarsum+sha256:e58fcf7418d4390dec8e8fb69d88c06ec07039d651fedd3aa72af9972e7d046b"
type Version int
const (
// Prefix of "tarsum"
Version0 Version = iota
// Prefix of "tarsum.dev"
// NOTE: this variable will be of an unsettled next-version of the TarSum calculation
VersionDev
)
// Get a list of all known tarsum Version
func GetVersions() []Version {
v := []Version{}
for k := range tarSumVersions {
v = append(v, k)
}
return v
}
var tarSumVersions = map[Version]string{
0: "tarsum",
1: "tarsum.dev",
}
func (tsv Version) String() string {
return tarSumVersions[tsv]
}
// GetVersionFromTarsum returns the Version from the provided string
func GetVersionFromTarsum(tarsum string) (Version, error) {
tsv := tarsum
if strings.Contains(tarsum, "+") {
tsv = strings.SplitN(tarsum, "+", 2)[0]
}
for v, s := range tarSumVersions {
if s == tsv {
return v, nil
}
}
return -1, ErrNotVersion
}
var (
ErrNotVersion = errors.New("string does not include a TarSum Version")
ErrVersionNotImplemented = errors.New("TarSum Version is not yet implemented")
)

49
tarsum/versioning_test.go Normal file
View file

@ -0,0 +1,49 @@
package tarsum
import (
"testing"
)
func TestVersion(t *testing.T) {
expected := "tarsum"
var v Version
if v.String() != expected {
t.Errorf("expected %q, got %q", expected, v.String())
}
expected = "tarsum.dev"
v = 1
if v.String() != expected {
t.Errorf("expected %q, got %q", expected, v.String())
}
}
func TestGetVersion(t *testing.T) {
testSet := []struct {
Str string
Expected Version
}{
{"tarsum+sha256:e58fcf7418d4390dec8e8fb69d88c06ec07039d651fedd3aa72af9972e7d046b", Version0},
{"tarsum+sha256", Version0},
{"tarsum", Version0},
{"tarsum.dev", VersionDev},
{"tarsum.dev+sha256:deadbeef", VersionDev},
}
for _, ts := range testSet {
v, err := GetVersionFromTarsum(ts.Str)
if err != nil {
t.Fatalf("%q : %s", err, ts.Str)
}
if v != ts.Expected {
t.Errorf("expected %d (%q), got %d (%q)", ts.Expected, ts.Expected, v, v)
}
}
// test one that does not exist, to ensure it errors
str := "weak+md5:abcdeabcde"
_, err := GetVersionFromTarsum(str)
if err != ErrNotVersion {
t.Fatalf("%q : %s", err, str)
}
}

22
tarsum/writercloser.go Normal file
View file

@ -0,0 +1,22 @@
package tarsum
import (
"io"
)
type writeCloseFlusher interface {
io.WriteCloser
Flush() error
}
type nopCloseFlusher struct {
io.Writer
}
func (n *nopCloseFlusher) Close() error {
return nil
}
func (n *nopCloseFlusher) Flush() error {
return nil
}