Merge pull request #7889 from vbatts/vbatts-tarsum_name_collision
tarsum: name collision fix
This commit is contained in:
commit
bd8be73fc6
8 changed files with 197 additions and 14 deletions
125
tarsum/fileinfosums.go
Normal file
125
tarsum/fileinfosums.go
Normal file
|
@ -0,0 +1,125 @@
|
||||||
|
package tarsum
|
||||||
|
|
||||||
|
import "sort"
|
||||||
|
|
||||||
|
// This info will be accessed through interface so the actual name and sum cannot be medled with
|
||||||
|
type FileInfoSumInterface interface {
|
||||||
|
// File name
|
||||||
|
Name() string
|
||||||
|
// Checksum of this particular file and its headers
|
||||||
|
Sum() string
|
||||||
|
// Position of file in the tar
|
||||||
|
Pos() int64
|
||||||
|
}
|
||||||
|
|
||||||
|
type fileInfoSum struct {
|
||||||
|
name string
|
||||||
|
sum string
|
||||||
|
pos int64
|
||||||
|
}
|
||||||
|
|
||||||
|
func (fis fileInfoSum) Name() string {
|
||||||
|
return fis.name
|
||||||
|
}
|
||||||
|
func (fis fileInfoSum) Sum() string {
|
||||||
|
return fis.sum
|
||||||
|
}
|
||||||
|
func (fis fileInfoSum) Pos() int64 {
|
||||||
|
return fis.pos
|
||||||
|
}
|
||||||
|
|
||||||
|
type FileInfoSums []FileInfoSumInterface
|
||||||
|
|
||||||
|
// GetFile returns the first FileInfoSumInterface with a matching name
|
||||||
|
func (fis FileInfoSums) GetFile(name string) FileInfoSumInterface {
|
||||||
|
for i := range fis {
|
||||||
|
if fis[i].Name() == name {
|
||||||
|
return fis[i]
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
|
||||||
|
// GetAllFile returns a FileInfoSums with all matching names
|
||||||
|
func (fis FileInfoSums) GetAllFile(name string) FileInfoSums {
|
||||||
|
f := FileInfoSums{}
|
||||||
|
for i := range fis {
|
||||||
|
if fis[i].Name() == name {
|
||||||
|
f = append(f, fis[i])
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return f
|
||||||
|
}
|
||||||
|
|
||||||
|
func contains(s []string, e string) bool {
|
||||||
|
for _, a := range s {
|
||||||
|
if a == e {
|
||||||
|
return true
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return false
|
||||||
|
}
|
||||||
|
|
||||||
|
func (fis FileInfoSums) GetDuplicatePaths() (dups FileInfoSums) {
|
||||||
|
seen := make(map[string]int, len(fis)) // allocate earl. no need to grow this map.
|
||||||
|
for i := range fis {
|
||||||
|
f := fis[i]
|
||||||
|
if _, ok := seen[f.Name()]; ok {
|
||||||
|
dups = append(dups, f)
|
||||||
|
} else {
|
||||||
|
seen[f.Name()] = 0
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return dups
|
||||||
|
}
|
||||||
|
|
||||||
|
func (fis FileInfoSums) Len() int { return len(fis) }
|
||||||
|
func (fis FileInfoSums) Swap(i, j int) { fis[i], fis[j] = fis[j], fis[i] }
|
||||||
|
|
||||||
|
func (fis FileInfoSums) SortByPos() {
|
||||||
|
sort.Sort(byPos{fis})
|
||||||
|
}
|
||||||
|
|
||||||
|
func (fis FileInfoSums) SortByNames() {
|
||||||
|
sort.Sort(byName{fis})
|
||||||
|
}
|
||||||
|
|
||||||
|
func (fis FileInfoSums) SortBySums() {
|
||||||
|
dups := fis.GetDuplicatePaths()
|
||||||
|
if len(dups) > 0 {
|
||||||
|
sort.Sort(bySum{fis, dups})
|
||||||
|
} else {
|
||||||
|
sort.Sort(bySum{fis, nil})
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// byName is a sort.Sort helper for sorting by file names.
|
||||||
|
// If names are the same, order them by their appearance in the tar archive
|
||||||
|
type byName struct{ FileInfoSums }
|
||||||
|
|
||||||
|
func (bn byName) Less(i, j int) bool {
|
||||||
|
if bn.FileInfoSums[i].Name() == bn.FileInfoSums[j].Name() {
|
||||||
|
return bn.FileInfoSums[i].Pos() < bn.FileInfoSums[j].Pos()
|
||||||
|
}
|
||||||
|
return bn.FileInfoSums[i].Name() < bn.FileInfoSums[j].Name()
|
||||||
|
}
|
||||||
|
|
||||||
|
// bySum is a sort.Sort helper for sorting by the sums of all the fileinfos in the tar archive
|
||||||
|
type bySum struct {
|
||||||
|
FileInfoSums
|
||||||
|
dups FileInfoSums
|
||||||
|
}
|
||||||
|
|
||||||
|
func (bs bySum) Less(i, j int) bool {
|
||||||
|
if bs.dups != nil && bs.FileInfoSums[i].Name() == bs.FileInfoSums[j].Name() {
|
||||||
|
return bs.FileInfoSums[i].Pos() < bs.FileInfoSums[j].Pos()
|
||||||
|
}
|
||||||
|
return bs.FileInfoSums[i].Sum() < bs.FileInfoSums[j].Sum()
|
||||||
|
}
|
||||||
|
|
||||||
|
// byPos is a sort.Sort helper for sorting by the sums of all the fileinfos by their original order
|
||||||
|
type byPos struct{ FileInfoSums }
|
||||||
|
|
||||||
|
func (bp byPos) Less(i, j int) bool {
|
||||||
|
return bp.FileInfoSums[i].Pos() < bp.FileInfoSums[j].Pos()
|
||||||
|
}
|
45
tarsum/fileinfosums_test.go
Normal file
45
tarsum/fileinfosums_test.go
Normal file
|
@ -0,0 +1,45 @@
|
||||||
|
package tarsum
|
||||||
|
|
||||||
|
import "testing"
|
||||||
|
|
||||||
|
func newFileInfoSums() FileInfoSums {
|
||||||
|
return FileInfoSums{
|
||||||
|
fileInfoSum{name: "file3", sum: "2abcdef1234567890", pos: 2},
|
||||||
|
fileInfoSum{name: "dup1", sum: "deadbeef1", pos: 5},
|
||||||
|
fileInfoSum{name: "file1", sum: "0abcdef1234567890", pos: 0},
|
||||||
|
fileInfoSum{name: "file4", sum: "3abcdef1234567890", pos: 3},
|
||||||
|
fileInfoSum{name: "dup1", sum: "deadbeef0", pos: 4},
|
||||||
|
fileInfoSum{name: "file2", sum: "1abcdef1234567890", pos: 1},
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestSortFileInfoSums(t *testing.T) {
|
||||||
|
dups := newFileInfoSums().GetAllFile("dup1")
|
||||||
|
if len(dups) != 2 {
|
||||||
|
t.Errorf("expected length 2, got %d", len(dups))
|
||||||
|
}
|
||||||
|
dups.SortByNames()
|
||||||
|
if dups[0].Pos() != 4 {
|
||||||
|
t.Errorf("sorted dups should be ordered by position. Expected 4, got %d", dups[0].Pos())
|
||||||
|
}
|
||||||
|
|
||||||
|
fis := newFileInfoSums()
|
||||||
|
expected := "0abcdef1234567890"
|
||||||
|
fis.SortBySums()
|
||||||
|
got := fis[0].Sum()
|
||||||
|
if got != expected {
|
||||||
|
t.Errorf("Expected %q, got %q", expected, got)
|
||||||
|
}
|
||||||
|
|
||||||
|
fis = newFileInfoSums()
|
||||||
|
expected = "dup1"
|
||||||
|
fis.SortByNames()
|
||||||
|
gotFis := fis[0]
|
||||||
|
if gotFis.Name() != expected {
|
||||||
|
t.Errorf("Expected %q, got %q", expected, gotFis.Name())
|
||||||
|
}
|
||||||
|
// since a duplicate is first, ensure it is ordered first by position too
|
||||||
|
if gotFis.Pos() != 4 {
|
||||||
|
t.Errorf("Expected %d, got %d", 4, gotFis.Pos())
|
||||||
|
}
|
||||||
|
}
|
|
@ -39,7 +39,7 @@ func NewTarSum(r io.Reader, dc bool, v Version) (TarSum, error) {
|
||||||
// checksums of a tar archive
|
// checksums of a tar archive
|
||||||
type TarSum interface {
|
type TarSum interface {
|
||||||
io.Reader
|
io.Reader
|
||||||
GetSums() map[string]string
|
GetSums() FileInfoSums
|
||||||
Sum([]byte) string
|
Sum([]byte) string
|
||||||
Version() Version
|
Version() Version
|
||||||
}
|
}
|
||||||
|
@ -54,7 +54,8 @@ type tarSum struct {
|
||||||
bufGz *bytes.Buffer
|
bufGz *bytes.Buffer
|
||||||
bufData []byte
|
bufData []byte
|
||||||
h hash.Hash
|
h hash.Hash
|
||||||
sums map[string]string
|
sums FileInfoSums
|
||||||
|
fileCounter int64
|
||||||
currentFile string
|
currentFile string
|
||||||
finished bool
|
finished bool
|
||||||
first bool
|
first bool
|
||||||
|
@ -126,7 +127,7 @@ func (ts *tarSum) Read(buf []byte) (int, error) {
|
||||||
ts.h = sha256.New()
|
ts.h = sha256.New()
|
||||||
ts.h.Reset()
|
ts.h.Reset()
|
||||||
ts.first = true
|
ts.first = true
|
||||||
ts.sums = make(map[string]string)
|
ts.sums = FileInfoSums{}
|
||||||
}
|
}
|
||||||
|
|
||||||
if ts.finished {
|
if ts.finished {
|
||||||
|
@ -153,7 +154,8 @@ func (ts *tarSum) Read(buf []byte) (int, error) {
|
||||||
return 0, err
|
return 0, err
|
||||||
}
|
}
|
||||||
if !ts.first {
|
if !ts.first {
|
||||||
ts.sums[ts.currentFile] = hex.EncodeToString(ts.h.Sum(nil))
|
ts.sums = append(ts.sums, fileInfoSum{name: ts.currentFile, sum: hex.EncodeToString(ts.h.Sum(nil)), pos: ts.fileCounter})
|
||||||
|
ts.fileCounter++
|
||||||
ts.h.Reset()
|
ts.h.Reset()
|
||||||
} else {
|
} else {
|
||||||
ts.first = false
|
ts.first = false
|
||||||
|
@ -218,25 +220,20 @@ func (ts *tarSum) Read(buf []byte) (int, error) {
|
||||||
}
|
}
|
||||||
|
|
||||||
func (ts *tarSum) Sum(extra []byte) string {
|
func (ts *tarSum) Sum(extra []byte) string {
|
||||||
var sums []string
|
ts.sums.SortBySums()
|
||||||
|
|
||||||
for _, sum := range ts.sums {
|
|
||||||
sums = append(sums, sum)
|
|
||||||
}
|
|
||||||
sort.Strings(sums)
|
|
||||||
h := sha256.New()
|
h := sha256.New()
|
||||||
if extra != nil {
|
if extra != nil {
|
||||||
h.Write(extra)
|
h.Write(extra)
|
||||||
}
|
}
|
||||||
for _, sum := range sums {
|
for _, fis := range ts.sums {
|
||||||
log.Debugf("-->%s<--", sum)
|
log.Debugf("-->%s<--", fis.Sum())
|
||||||
h.Write([]byte(sum))
|
h.Write([]byte(fis.Sum()))
|
||||||
}
|
}
|
||||||
checksum := ts.Version().String() + "+sha256:" + hex.EncodeToString(h.Sum(nil))
|
checksum := ts.Version().String() + "+sha256:" + hex.EncodeToString(h.Sum(nil))
|
||||||
log.Debugf("checksum processed: %s", checksum)
|
log.Debugf("checksum processed: %s", checksum)
|
||||||
return checksum
|
return checksum
|
||||||
}
|
}
|
||||||
|
|
||||||
func (ts *tarSum) GetSums() map[string]string {
|
func (ts *tarSum) GetSums() FileInfoSums {
|
||||||
return ts.sums
|
return ts.sums
|
||||||
}
|
}
|
||||||
|
|
|
@ -59,6 +59,22 @@ var testLayers = []testLayer{
|
||||||
{
|
{
|
||||||
options: &sizedOptions{1, 1024 * 1024, false, false}, // a 1mb file (in memory)
|
options: &sizedOptions{1, 1024 * 1024, false, false}, // a 1mb file (in memory)
|
||||||
tarsum: "tarsum+sha256:8bf12d7e67c51ee2e8306cba569398b1b9f419969521a12ffb9d8875e8836738"},
|
tarsum: "tarsum+sha256:8bf12d7e67c51ee2e8306cba569398b1b9f419969521a12ffb9d8875e8836738"},
|
||||||
|
{
|
||||||
|
// this tar has two files with the same path
|
||||||
|
filename: "testdata/collision/collision-0.tar",
|
||||||
|
tarsum: "tarsum+sha256:08653904a68d3ab5c59e65ef58c49c1581caa3c34744f8d354b3f575ea04424a"},
|
||||||
|
{
|
||||||
|
// this tar has the same two files (with the same path), but reversed order. ensuring is has different hash than above
|
||||||
|
filename: "testdata/collision/collision-1.tar",
|
||||||
|
tarsum: "tarsum+sha256:b51c13fbefe158b5ce420d2b930eef54c5cd55c50a2ee4abdddea8fa9f081e0d"},
|
||||||
|
{
|
||||||
|
// this tar has newer of collider-0.tar, ensuring is has different hash
|
||||||
|
filename: "testdata/collision/collision-2.tar",
|
||||||
|
tarsum: "tarsum+sha256:381547080919bb82691e995508ae20ed33ce0f6948d41cafbeb70ce20c73ee8e"},
|
||||||
|
{
|
||||||
|
// this tar has newer of collider-1.tar, ensuring is has different hash
|
||||||
|
filename: "testdata/collision/collision-3.tar",
|
||||||
|
tarsum: "tarsum+sha256:f886e431c08143164a676805205979cd8fa535dfcef714db5515650eea5a7c0f"},
|
||||||
}
|
}
|
||||||
|
|
||||||
type sizedOptions struct {
|
type sizedOptions struct {
|
||||||
|
|
BIN
tarsum/testdata/collision/collision-0.tar
vendored
Normal file
BIN
tarsum/testdata/collision/collision-0.tar
vendored
Normal file
Binary file not shown.
BIN
tarsum/testdata/collision/collision-1.tar
vendored
Normal file
BIN
tarsum/testdata/collision/collision-1.tar
vendored
Normal file
Binary file not shown.
BIN
tarsum/testdata/collision/collision-2.tar
vendored
Normal file
BIN
tarsum/testdata/collision/collision-2.tar
vendored
Normal file
Binary file not shown.
BIN
tarsum/testdata/collision/collision-3.tar
vendored
Normal file
BIN
tarsum/testdata/collision/collision-3.tar
vendored
Normal file
Binary file not shown.
Loading…
Reference in a new issue