tarsum: name collision fix
If a tar were constructed with duplicate file names, then depending on the order, it could result in same tarsum. Signed-off-by: Vincent Batts <vbatts@redhat.com>
This commit is contained in:
parent
e32fdc563a
commit
a705b08336
8 changed files with 197 additions and 14 deletions
125
tarsum/fileinfosums.go
Normal file
125
tarsum/fileinfosums.go
Normal file
|
@ -0,0 +1,125 @@
|
|||
package tarsum
|
||||
|
||||
import "sort"
|
||||
|
||||
// This info will be accessed through interface so the actual name and sum cannot be medled with
|
||||
type FileInfoSumInterface interface {
|
||||
// File name
|
||||
Name() string
|
||||
// Checksum of this particular file and its headers
|
||||
Sum() string
|
||||
// Position of file in the tar
|
||||
Pos() int64
|
||||
}
|
||||
|
||||
type fileInfoSum struct {
|
||||
name string
|
||||
sum string
|
||||
pos int64
|
||||
}
|
||||
|
||||
func (fis fileInfoSum) Name() string {
|
||||
return fis.name
|
||||
}
|
||||
func (fis fileInfoSum) Sum() string {
|
||||
return fis.sum
|
||||
}
|
||||
func (fis fileInfoSum) Pos() int64 {
|
||||
return fis.pos
|
||||
}
|
||||
|
||||
type FileInfoSums []FileInfoSumInterface
|
||||
|
||||
// GetFile returns the first FileInfoSumInterface with a matching name
|
||||
func (fis FileInfoSums) GetFile(name string) FileInfoSumInterface {
|
||||
for i := range fis {
|
||||
if fis[i].Name() == name {
|
||||
return fis[i]
|
||||
}
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
// GetAllFile returns a FileInfoSums with all matching names
|
||||
func (fis FileInfoSums) GetAllFile(name string) FileInfoSums {
|
||||
f := FileInfoSums{}
|
||||
for i := range fis {
|
||||
if fis[i].Name() == name {
|
||||
f = append(f, fis[i])
|
||||
}
|
||||
}
|
||||
return f
|
||||
}
|
||||
|
||||
func contains(s []string, e string) bool {
|
||||
for _, a := range s {
|
||||
if a == e {
|
||||
return true
|
||||
}
|
||||
}
|
||||
return false
|
||||
}
|
||||
|
||||
func (fis FileInfoSums) GetDuplicatePaths() (dups FileInfoSums) {
|
||||
seen := make(map[string]int, len(fis)) // allocate earl. no need to grow this map.
|
||||
for i := range fis {
|
||||
f := fis[i]
|
||||
if _, ok := seen[f.Name()]; ok {
|
||||
dups = append(dups, f)
|
||||
} else {
|
||||
seen[f.Name()] = 0
|
||||
}
|
||||
}
|
||||
return dups
|
||||
}
|
||||
|
||||
func (fis FileInfoSums) Len() int { return len(fis) }
|
||||
func (fis FileInfoSums) Swap(i, j int) { fis[i], fis[j] = fis[j], fis[i] }
|
||||
|
||||
func (fis FileInfoSums) SortByPos() {
|
||||
sort.Sort(byPos{fis})
|
||||
}
|
||||
|
||||
func (fis FileInfoSums) SortByNames() {
|
||||
sort.Sort(byName{fis})
|
||||
}
|
||||
|
||||
func (fis FileInfoSums) SortBySums() {
|
||||
dups := fis.GetDuplicatePaths()
|
||||
if len(dups) > 0 {
|
||||
sort.Sort(bySum{fis, dups})
|
||||
} else {
|
||||
sort.Sort(bySum{fis, nil})
|
||||
}
|
||||
}
|
||||
|
||||
// byName is a sort.Sort helper for sorting by file names.
|
||||
// If names are the same, order them by their appearance in the tar archive
|
||||
type byName struct{ FileInfoSums }
|
||||
|
||||
func (bn byName) Less(i, j int) bool {
|
||||
if bn.FileInfoSums[i].Name() == bn.FileInfoSums[j].Name() {
|
||||
return bn.FileInfoSums[i].Pos() < bn.FileInfoSums[j].Pos()
|
||||
}
|
||||
return bn.FileInfoSums[i].Name() < bn.FileInfoSums[j].Name()
|
||||
}
|
||||
|
||||
// bySum is a sort.Sort helper for sorting by the sums of all the fileinfos in the tar archive
|
||||
type bySum struct {
|
||||
FileInfoSums
|
||||
dups FileInfoSums
|
||||
}
|
||||
|
||||
func (bs bySum) Less(i, j int) bool {
|
||||
if bs.dups != nil && bs.FileInfoSums[i].Name() == bs.FileInfoSums[j].Name() {
|
||||
return bs.FileInfoSums[i].Pos() < bs.FileInfoSums[j].Pos()
|
||||
}
|
||||
return bs.FileInfoSums[i].Sum() < bs.FileInfoSums[j].Sum()
|
||||
}
|
||||
|
||||
// byPos is a sort.Sort helper for sorting by the sums of all the fileinfos by their original order
|
||||
type byPos struct{ FileInfoSums }
|
||||
|
||||
func (bp byPos) Less(i, j int) bool {
|
||||
return bp.FileInfoSums[i].Pos() < bp.FileInfoSums[j].Pos()
|
||||
}
|
45
tarsum/fileinfosums_test.go
Normal file
45
tarsum/fileinfosums_test.go
Normal file
|
@ -0,0 +1,45 @@
|
|||
package tarsum
|
||||
|
||||
import "testing"
|
||||
|
||||
func newFileInfoSums() FileInfoSums {
|
||||
return FileInfoSums{
|
||||
fileInfoSum{name: "file3", sum: "2abcdef1234567890", pos: 2},
|
||||
fileInfoSum{name: "dup1", sum: "deadbeef1", pos: 5},
|
||||
fileInfoSum{name: "file1", sum: "0abcdef1234567890", pos: 0},
|
||||
fileInfoSum{name: "file4", sum: "3abcdef1234567890", pos: 3},
|
||||
fileInfoSum{name: "dup1", sum: "deadbeef0", pos: 4},
|
||||
fileInfoSum{name: "file2", sum: "1abcdef1234567890", pos: 1},
|
||||
}
|
||||
}
|
||||
|
||||
func TestSortFileInfoSums(t *testing.T) {
|
||||
dups := newFileInfoSums().GetAllFile("dup1")
|
||||
if len(dups) != 2 {
|
||||
t.Errorf("expected length 2, got %d", len(dups))
|
||||
}
|
||||
dups.SortByNames()
|
||||
if dups[0].Pos() != 4 {
|
||||
t.Errorf("sorted dups should be ordered by position. Expected 4, got %d", dups[0].Pos())
|
||||
}
|
||||
|
||||
fis := newFileInfoSums()
|
||||
expected := "0abcdef1234567890"
|
||||
fis.SortBySums()
|
||||
got := fis[0].Sum()
|
||||
if got != expected {
|
||||
t.Errorf("Expected %q, got %q", expected, got)
|
||||
}
|
||||
|
||||
fis = newFileInfoSums()
|
||||
expected = "dup1"
|
||||
fis.SortByNames()
|
||||
gotFis := fis[0]
|
||||
if gotFis.Name() != expected {
|
||||
t.Errorf("Expected %q, got %q", expected, gotFis.Name())
|
||||
}
|
||||
// since a duplicate is first, ensure it is ordered first by position too
|
||||
if gotFis.Pos() != 4 {
|
||||
t.Errorf("Expected %d, got %d", 4, gotFis.Pos())
|
||||
}
|
||||
}
|
|
@ -39,7 +39,7 @@ func NewTarSum(r io.Reader, dc bool, v Version) (TarSum, error) {
|
|||
// checksums of a tar archive
|
||||
type TarSum interface {
|
||||
io.Reader
|
||||
GetSums() map[string]string
|
||||
GetSums() FileInfoSums
|
||||
Sum([]byte) string
|
||||
Version() Version
|
||||
}
|
||||
|
@ -54,7 +54,8 @@ type tarSum struct {
|
|||
bufGz *bytes.Buffer
|
||||
bufData []byte
|
||||
h hash.Hash
|
||||
sums map[string]string
|
||||
sums FileInfoSums
|
||||
fileCounter int64
|
||||
currentFile string
|
||||
finished bool
|
||||
first bool
|
||||
|
@ -126,7 +127,7 @@ func (ts *tarSum) Read(buf []byte) (int, error) {
|
|||
ts.h = sha256.New()
|
||||
ts.h.Reset()
|
||||
ts.first = true
|
||||
ts.sums = make(map[string]string)
|
||||
ts.sums = FileInfoSums{}
|
||||
}
|
||||
|
||||
if ts.finished {
|
||||
|
@ -153,7 +154,8 @@ func (ts *tarSum) Read(buf []byte) (int, error) {
|
|||
return 0, err
|
||||
}
|
||||
if !ts.first {
|
||||
ts.sums[ts.currentFile] = hex.EncodeToString(ts.h.Sum(nil))
|
||||
ts.sums = append(ts.sums, fileInfoSum{name: ts.currentFile, sum: hex.EncodeToString(ts.h.Sum(nil)), pos: ts.fileCounter})
|
||||
ts.fileCounter++
|
||||
ts.h.Reset()
|
||||
} else {
|
||||
ts.first = false
|
||||
|
@ -218,25 +220,20 @@ func (ts *tarSum) Read(buf []byte) (int, error) {
|
|||
}
|
||||
|
||||
func (ts *tarSum) Sum(extra []byte) string {
|
||||
var sums []string
|
||||
|
||||
for _, sum := range ts.sums {
|
||||
sums = append(sums, sum)
|
||||
}
|
||||
sort.Strings(sums)
|
||||
ts.sums.SortBySums()
|
||||
h := sha256.New()
|
||||
if extra != nil {
|
||||
h.Write(extra)
|
||||
}
|
||||
for _, sum := range sums {
|
||||
log.Debugf("-->%s<--", sum)
|
||||
h.Write([]byte(sum))
|
||||
for _, fis := range ts.sums {
|
||||
log.Debugf("-->%s<--", fis.Sum())
|
||||
h.Write([]byte(fis.Sum()))
|
||||
}
|
||||
checksum := ts.Version().String() + "+sha256:" + hex.EncodeToString(h.Sum(nil))
|
||||
log.Debugf("checksum processed: %s", checksum)
|
||||
return checksum
|
||||
}
|
||||
|
||||
func (ts *tarSum) GetSums() map[string]string {
|
||||
func (ts *tarSum) GetSums() FileInfoSums {
|
||||
return ts.sums
|
||||
}
|
||||
|
|
|
@ -59,6 +59,22 @@ var testLayers = []testLayer{
|
|||
{
|
||||
options: &sizedOptions{1, 1024 * 1024, false, false}, // a 1mb file (in memory)
|
||||
tarsum: "tarsum+sha256:8bf12d7e67c51ee2e8306cba569398b1b9f419969521a12ffb9d8875e8836738"},
|
||||
{
|
||||
// this tar has two files with the same path
|
||||
filename: "testdata/collision/collision-0.tar",
|
||||
tarsum: "tarsum+sha256:08653904a68d3ab5c59e65ef58c49c1581caa3c34744f8d354b3f575ea04424a"},
|
||||
{
|
||||
// this tar has the same two files (with the same path), but reversed order. ensuring is has different hash than above
|
||||
filename: "testdata/collision/collision-1.tar",
|
||||
tarsum: "tarsum+sha256:b51c13fbefe158b5ce420d2b930eef54c5cd55c50a2ee4abdddea8fa9f081e0d"},
|
||||
{
|
||||
// this tar has newer of collider-0.tar, ensuring is has different hash
|
||||
filename: "testdata/collision/collision-2.tar",
|
||||
tarsum: "tarsum+sha256:381547080919bb82691e995508ae20ed33ce0f6948d41cafbeb70ce20c73ee8e"},
|
||||
{
|
||||
// this tar has newer of collider-1.tar, ensuring is has different hash
|
||||
filename: "testdata/collision/collision-3.tar",
|
||||
tarsum: "tarsum+sha256:f886e431c08143164a676805205979cd8fa535dfcef714db5515650eea5a7c0f"},
|
||||
}
|
||||
|
||||
type sizedOptions struct {
|
||||
|
|
BIN
tarsum/testdata/collision/collision-0.tar
vendored
Normal file
BIN
tarsum/testdata/collision/collision-0.tar
vendored
Normal file
Binary file not shown.
BIN
tarsum/testdata/collision/collision-1.tar
vendored
Normal file
BIN
tarsum/testdata/collision/collision-1.tar
vendored
Normal file
Binary file not shown.
BIN
tarsum/testdata/collision/collision-2.tar
vendored
Normal file
BIN
tarsum/testdata/collision/collision-2.tar
vendored
Normal file
Binary file not shown.
BIN
tarsum/testdata/collision/collision-3.tar
vendored
Normal file
BIN
tarsum/testdata/collision/collision-3.tar
vendored
Normal file
Binary file not shown.
Loading…
Reference in a new issue