Merge pull request #7889 from vbatts/vbatts-tarsum_name_collision

tarsum: name collision fix
This commit is contained in:
unclejack 2014-09-17 21:59:39 +03:00
commit bd8be73fc6
8 changed files with 197 additions and 14 deletions

125
tarsum/fileinfosums.go Normal file
View file

@ -0,0 +1,125 @@
package tarsum
import "sort"
// This info will be accessed through interface so the actual name and sum cannot be medled with
type FileInfoSumInterface interface {
// File name
Name() string
// Checksum of this particular file and its headers
Sum() string
// Position of file in the tar
Pos() int64
}
type fileInfoSum struct {
name string
sum string
pos int64
}
func (fis fileInfoSum) Name() string {
return fis.name
}
func (fis fileInfoSum) Sum() string {
return fis.sum
}
func (fis fileInfoSum) Pos() int64 {
return fis.pos
}
type FileInfoSums []FileInfoSumInterface
// GetFile returns the first FileInfoSumInterface with a matching name
func (fis FileInfoSums) GetFile(name string) FileInfoSumInterface {
for i := range fis {
if fis[i].Name() == name {
return fis[i]
}
}
return nil
}
// GetAllFile returns a FileInfoSums with all matching names
func (fis FileInfoSums) GetAllFile(name string) FileInfoSums {
f := FileInfoSums{}
for i := range fis {
if fis[i].Name() == name {
f = append(f, fis[i])
}
}
return f
}
func contains(s []string, e string) bool {
for _, a := range s {
if a == e {
return true
}
}
return false
}
func (fis FileInfoSums) GetDuplicatePaths() (dups FileInfoSums) {
seen := make(map[string]int, len(fis)) // allocate earl. no need to grow this map.
for i := range fis {
f := fis[i]
if _, ok := seen[f.Name()]; ok {
dups = append(dups, f)
} else {
seen[f.Name()] = 0
}
}
return dups
}
func (fis FileInfoSums) Len() int { return len(fis) }
func (fis FileInfoSums) Swap(i, j int) { fis[i], fis[j] = fis[j], fis[i] }
func (fis FileInfoSums) SortByPos() {
sort.Sort(byPos{fis})
}
func (fis FileInfoSums) SortByNames() {
sort.Sort(byName{fis})
}
func (fis FileInfoSums) SortBySums() {
dups := fis.GetDuplicatePaths()
if len(dups) > 0 {
sort.Sort(bySum{fis, dups})
} else {
sort.Sort(bySum{fis, nil})
}
}
// byName is a sort.Sort helper for sorting by file names.
// If names are the same, order them by their appearance in the tar archive
type byName struct{ FileInfoSums }
func (bn byName) Less(i, j int) bool {
if bn.FileInfoSums[i].Name() == bn.FileInfoSums[j].Name() {
return bn.FileInfoSums[i].Pos() < bn.FileInfoSums[j].Pos()
}
return bn.FileInfoSums[i].Name() < bn.FileInfoSums[j].Name()
}
// bySum is a sort.Sort helper for sorting by the sums of all the fileinfos in the tar archive
type bySum struct {
FileInfoSums
dups FileInfoSums
}
func (bs bySum) Less(i, j int) bool {
if bs.dups != nil && bs.FileInfoSums[i].Name() == bs.FileInfoSums[j].Name() {
return bs.FileInfoSums[i].Pos() < bs.FileInfoSums[j].Pos()
}
return bs.FileInfoSums[i].Sum() < bs.FileInfoSums[j].Sum()
}
// byPos is a sort.Sort helper for sorting by the sums of all the fileinfos by their original order
type byPos struct{ FileInfoSums }
func (bp byPos) Less(i, j int) bool {
return bp.FileInfoSums[i].Pos() < bp.FileInfoSums[j].Pos()
}

View file

@ -0,0 +1,45 @@
package tarsum
import "testing"
func newFileInfoSums() FileInfoSums {
return FileInfoSums{
fileInfoSum{name: "file3", sum: "2abcdef1234567890", pos: 2},
fileInfoSum{name: "dup1", sum: "deadbeef1", pos: 5},
fileInfoSum{name: "file1", sum: "0abcdef1234567890", pos: 0},
fileInfoSum{name: "file4", sum: "3abcdef1234567890", pos: 3},
fileInfoSum{name: "dup1", sum: "deadbeef0", pos: 4},
fileInfoSum{name: "file2", sum: "1abcdef1234567890", pos: 1},
}
}
func TestSortFileInfoSums(t *testing.T) {
dups := newFileInfoSums().GetAllFile("dup1")
if len(dups) != 2 {
t.Errorf("expected length 2, got %d", len(dups))
}
dups.SortByNames()
if dups[0].Pos() != 4 {
t.Errorf("sorted dups should be ordered by position. Expected 4, got %d", dups[0].Pos())
}
fis := newFileInfoSums()
expected := "0abcdef1234567890"
fis.SortBySums()
got := fis[0].Sum()
if got != expected {
t.Errorf("Expected %q, got %q", expected, got)
}
fis = newFileInfoSums()
expected = "dup1"
fis.SortByNames()
gotFis := fis[0]
if gotFis.Name() != expected {
t.Errorf("Expected %q, got %q", expected, gotFis.Name())
}
// since a duplicate is first, ensure it is ordered first by position too
if gotFis.Pos() != 4 {
t.Errorf("Expected %d, got %d", 4, gotFis.Pos())
}
}

View file

@ -39,7 +39,7 @@ func NewTarSum(r io.Reader, dc bool, v Version) (TarSum, error) {
// checksums of a tar archive // checksums of a tar archive
type TarSum interface { type TarSum interface {
io.Reader io.Reader
GetSums() map[string]string GetSums() FileInfoSums
Sum([]byte) string Sum([]byte) string
Version() Version Version() Version
} }
@ -54,7 +54,8 @@ type tarSum struct {
bufGz *bytes.Buffer bufGz *bytes.Buffer
bufData []byte bufData []byte
h hash.Hash h hash.Hash
sums map[string]string sums FileInfoSums
fileCounter int64
currentFile string currentFile string
finished bool finished bool
first bool first bool
@ -126,7 +127,7 @@ func (ts *tarSum) Read(buf []byte) (int, error) {
ts.h = sha256.New() ts.h = sha256.New()
ts.h.Reset() ts.h.Reset()
ts.first = true ts.first = true
ts.sums = make(map[string]string) ts.sums = FileInfoSums{}
} }
if ts.finished { if ts.finished {
@ -153,7 +154,8 @@ func (ts *tarSum) Read(buf []byte) (int, error) {
return 0, err return 0, err
} }
if !ts.first { if !ts.first {
ts.sums[ts.currentFile] = hex.EncodeToString(ts.h.Sum(nil)) ts.sums = append(ts.sums, fileInfoSum{name: ts.currentFile, sum: hex.EncodeToString(ts.h.Sum(nil)), pos: ts.fileCounter})
ts.fileCounter++
ts.h.Reset() ts.h.Reset()
} else { } else {
ts.first = false ts.first = false
@ -218,25 +220,20 @@ func (ts *tarSum) Read(buf []byte) (int, error) {
} }
func (ts *tarSum) Sum(extra []byte) string { func (ts *tarSum) Sum(extra []byte) string {
var sums []string ts.sums.SortBySums()
for _, sum := range ts.sums {
sums = append(sums, sum)
}
sort.Strings(sums)
h := sha256.New() h := sha256.New()
if extra != nil { if extra != nil {
h.Write(extra) h.Write(extra)
} }
for _, sum := range sums { for _, fis := range ts.sums {
log.Debugf("-->%s<--", sum) log.Debugf("-->%s<--", fis.Sum())
h.Write([]byte(sum)) h.Write([]byte(fis.Sum()))
} }
checksum := ts.Version().String() + "+sha256:" + hex.EncodeToString(h.Sum(nil)) checksum := ts.Version().String() + "+sha256:" + hex.EncodeToString(h.Sum(nil))
log.Debugf("checksum processed: %s", checksum) log.Debugf("checksum processed: %s", checksum)
return checksum return checksum
} }
func (ts *tarSum) GetSums() map[string]string { func (ts *tarSum) GetSums() FileInfoSums {
return ts.sums return ts.sums
} }

View file

@ -59,6 +59,22 @@ var testLayers = []testLayer{
{ {
options: &sizedOptions{1, 1024 * 1024, false, false}, // a 1mb file (in memory) options: &sizedOptions{1, 1024 * 1024, false, false}, // a 1mb file (in memory)
tarsum: "tarsum+sha256:8bf12d7e67c51ee2e8306cba569398b1b9f419969521a12ffb9d8875e8836738"}, tarsum: "tarsum+sha256:8bf12d7e67c51ee2e8306cba569398b1b9f419969521a12ffb9d8875e8836738"},
{
// this tar has two files with the same path
filename: "testdata/collision/collision-0.tar",
tarsum: "tarsum+sha256:08653904a68d3ab5c59e65ef58c49c1581caa3c34744f8d354b3f575ea04424a"},
{
// this tar has the same two files (with the same path), but reversed order. ensuring is has different hash than above
filename: "testdata/collision/collision-1.tar",
tarsum: "tarsum+sha256:b51c13fbefe158b5ce420d2b930eef54c5cd55c50a2ee4abdddea8fa9f081e0d"},
{
// this tar has newer of collider-0.tar, ensuring is has different hash
filename: "testdata/collision/collision-2.tar",
tarsum: "tarsum+sha256:381547080919bb82691e995508ae20ed33ce0f6948d41cafbeb70ce20c73ee8e"},
{
// this tar has newer of collider-1.tar, ensuring is has different hash
filename: "testdata/collision/collision-3.tar",
tarsum: "tarsum+sha256:f886e431c08143164a676805205979cd8fa535dfcef714db5515650eea5a7c0f"},
} }
type sizedOptions struct { type sizedOptions struct {

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.