compare: implement mtree.DirectoryHierarchy comparisons

This is part of a patchset that refactors all of the checking logic into
comparison operations. Essentially, provide a Compare(...) function that
allows for two different manifests to be compared. Extra and missing
entries are supported in addition to the standard modified entry, and by
implementing as a manifest comparison there is no double-scanning of the
manifest source.

The main annoyance is that we have to also include tar_time handling,
which has not been abstracted inside keywords.go. This is a bit ugly
right now, but works fine for the moment.

Signed-off-by: Aleksa Sarai <asarai@suse.de>
This commit is contained in:
Aleksa Sarai 2016-07-27 22:05:15 +10:00
parent 36372dd3c8
commit 26ff922da6
No known key found for this signature in database
GPG Key ID: 9E18AA267DDB8DB4
3 changed files with 776 additions and 0 deletions

429
compare.go Normal file
View File

@ -0,0 +1,429 @@
package mtree
import (
"encoding/json"
"fmt"
"strconv"
)
// XXX: Do we need a Difference interface to make it so people can do var x
// Difference = <something>? The main problem is that keys and inodes need to
// have different interfaces, so it's just a pain.
// DifferenceType represents the type of a discrepancy encountered for
// an object. This is also used to represent discrepancies between keys
// for objects.
type DifferenceType string
const (
// Missing represents a discrepancy where the object is present in
// the @old manifest but is not present in the @new manifest.
Missing DifferenceType = "missing"
// Extra represents a discrepancy where the object is not present in
// the @old manifest but is present in the @new manifest.
Extra DifferenceType = "extra"
// Modified represents a discrepancy where the object is present in
// both the @old and @new manifests, but one or more of the keys
// have different values (or have not been set in one of the
// manifests).
Modified DifferenceType = "modified"
)
// These functions return *type from the parameter. It's just shorthand, to
// ensure that we don't accidentally expose pointers to the caller that are
// internal data.
func ePtr(e Entry) *Entry { return &e }
func sPtr(s string) *string { return &s }
// InodeDelta Represents a discrepancy in a filesystem object between two
// DirectoryHierarchy manifests. Discrepancies are caused by entries only
// present in one manifest [Missing, Extra], keys only present in one of the
// manifests [Modified] or a difference between the keys of the same object in
// both manifests [Modified].
type InodeDelta struct {
diff DifferenceType
path string
new Entry
old Entry
keys []KeyDelta
}
// Type returns the type of discrepancy encountered when comparing this inode
// between the two DirectoryHierarchy manifests.
func (i InodeDelta) Type() DifferenceType {
return i.diff
}
// Path returns the path to the inode (relative to the root of the
// DirectoryHierarchy manifests).
func (i InodeDelta) Path() string {
return i.path
}
// Diff returns the set of key discrepancies between the two manifests for the
// specific inode. If the DifferenceType of the inode is not Modified, then
// Diff returns nil.
func (i InodeDelta) Diff() []KeyDelta {
return i.keys
}
// Old returns the value of the inode Entry in the "old" DirectoryHierarchy (as
// determined by the ordering of parameters to Compare).
func (i InodeDelta) Old() *Entry {
if i.diff == Modified || i.diff == Missing {
return ePtr(i.old)
}
return nil
}
// New returns the value of the inode Entry in the "new" DirectoryHierarchy (as
// determined by the ordering of parameters to Compare).
func (i InodeDelta) New() *Entry {
if i.diff == Modified || i.diff == Extra {
return ePtr(i.new)
}
return nil
}
// MarshalJSON creates a JSON-encoded version of InodeDelta.
func (i InodeDelta) MarshalJSON() ([]byte, error) {
return json.Marshal(struct {
Type DifferenceType `json:"type"`
Path string `json:"path"`
Keys []KeyDelta `json:"keys"`
}{
Type: i.diff,
Path: i.path,
Keys: i.keys,
})
}
// String returns a "pretty" formatting for InodeDelta.
func (i InodeDelta) String() string {
switch i.diff {
case Modified:
// Output the first failure.
f := i.keys[0]
return fmt.Sprintf("%q: keyword %q: expected %s; got %s", i.path, f.name, f.old, f.new)
case Extra:
return fmt.Sprintf("%q: unexpected path", i.path)
case Missing:
return fmt.Sprintf("%q: missing path", i.path)
default:
panic("programming error")
}
}
// KeyDelta Represents a discrepancy in a key for a particular filesystem
// object between two DirectoryHierarchy manifests. Discrepancies are caused by
// keys only present in one manifest [Missing, Extra] or a difference between
// the keys of the same object in both manifests [Modified]. A set of these is
// returned with InodeDelta.Diff().
type KeyDelta struct {
diff DifferenceType
name string
old string
new string
}
// Type returns the type of discrepancy encountered when comparing this key
// between the two DirectoryHierarchy manifests' relevant inode entry.
func (k KeyDelta) Type() DifferenceType {
return k.diff
}
// Name returns the name (the key) of the KeyDeltaVal entry in the
// DirectoryHierarchy.
func (k KeyDelta) Name() string {
return k.name
}
// Old returns the value of the KeyDeltaVal entry in the "old" DirectoryHierarchy
// (as determined by the ordering of parameters to Compare). Returns nil if
// there was no entry in the "old" DirectoryHierarchy.
func (k KeyDelta) Old() *string {
if k.diff == Modified || k.diff == Missing {
return sPtr(k.old)
}
return nil
}
// New returns the value of the KeyDeltaVal entry in the "new" DirectoryHierarchy
// (as determined by the ordering of parameters to Compare). Returns nil if
// there was no entry in the "old" DirectoryHierarchy.
func (k KeyDelta) New() *string {
if k.diff == Modified || k.diff == Extra {
return sPtr(k.old)
}
return nil
}
// MarshalJSON creates a JSON-encoded version of KeyDelta.
func (k KeyDelta) MarshalJSON() ([]byte, error) {
return json.Marshal(struct {
Type DifferenceType `json:"type"`
Name string `json:"name"`
Old string `json:"old"`
New string `json:"new"`
}{
Type: k.diff,
Name: k.name,
Old: k.old,
New: k.new,
})
}
// Like Compare, but for single inode entries only. Used to compute the
// cached version of inode.keys.
func compareEntry(oldEntry, newEntry Entry) ([]KeyDelta, error) {
// Represents the new and old states for an entry's keys.
type stateT struct {
Old *KeyVal
New *KeyVal
}
diffs := map[string]*stateT{}
// Fill the map with the old keys first.
for _, kv := range oldEntry.AllKeys() {
key := kv.Keyword()
// Cannot take &kv because it's the iterator.
copy := new(KeyVal)
*copy = kv
_, ok := diffs[key]
if !ok {
diffs[key] = new(stateT)
}
diffs[key].Old = copy
}
// Then fill the new keys.
for _, kv := range newEntry.AllKeys() {
key := kv.Keyword()
// Cannot take &kv because it's the iterator.
copy := new(KeyVal)
*copy = kv
_, ok := diffs[key]
if !ok {
diffs[key] = new(stateT)
}
diffs[key].New = copy
}
// We need a full list of the keys so we can deal with different keyvalue
// orderings.
var kws []string
for kw := range diffs {
kws = append(kws, kw)
}
// If both tar_time and time were specified in the set of keys, we have to
// mess with the diffs. This is an unfortunate side-effect of tar archives.
// TODO(cyphar): This really should be abstracted inside keywords.go
if inSlice("tar_time", kws) && inSlice("time", kws) {
// Delete "time".
timeStateT := diffs["time"]
delete(diffs, "time")
// Make a new tar_time.
if diffs["tar_time"].Old == nil {
time, err := strconv.ParseFloat(timeStateT.Old.Value(), 64)
if err != nil {
return nil, fmt.Errorf("failed to parse old time: %s", err)
}
newTime := new(KeyVal)
*newTime = KeyVal(fmt.Sprintf("tar_time=%d.000000000", int64(time)))
diffs["tar_time"].Old = newTime
} else if diffs["tar_time"].New == nil {
time, err := strconv.ParseFloat(timeStateT.New.Value(), 64)
if err != nil {
return nil, fmt.Errorf("failed to parse new time: %s", err)
}
newTime := new(KeyVal)
*newTime = KeyVal(fmt.Sprintf("tar_time=%d.000000000", int64(time)))
diffs["tar_time"].New = newTime
} else {
return nil, fmt.Errorf("time and tar_time set in the same manifest")
}
}
// Are there any differences?
var results []KeyDelta
for name, diff := range diffs {
// Invalid
if diff.Old == nil && diff.New == nil {
return nil, fmt.Errorf("invalid state: both old and new are nil: key=%s", name)
}
switch {
// Missing
case diff.New == nil:
results = append(results, KeyDelta{
diff: Missing,
name: name,
old: diff.Old.Value(),
})
// Extra
case diff.Old == nil:
results = append(results, KeyDelta{
diff: Extra,
name: name,
new: diff.New.Value(),
})
// Modified
default:
if !KeyValEqual(*diff.Old, *diff.New) {
results = append(results, KeyDelta{
diff: Modified,
name: name,
old: diff.Old.Value(),
new: diff.New.Value(),
})
}
}
}
return results, nil
}
// Compare compares two directory hierarchy manifests, and returns the
// list of discrepancies between the two. All of the entries in the
// manifest are considered, with differences being generated for
// RelativeType and FullType entries. Differences in structure (such as
// the way /set and /unset are written) are not considered to be
// discrepancies. The list of differences are all filesystem objects.
//
// keys controls which keys will be compared, but if keys is nil then all
// possible keys will be compared between the two manifests (allowing for
// missing entries and the like). A missing or extra key is treated as a
// Modified type.
//
// NB: The order of the parameters matters (old, new) because Extra and
// Missing are considered as different discrepancy types.
func Compare(oldDh, newDh *DirectoryHierarchy, keys []string) ([]InodeDelta, error) {
// Represents the new and old states for an entry.
type stateT struct {
Old *Entry
New *Entry
}
// Make dealing with the keys mapping easier.
keySet := map[string]struct{}{}
for _, key := range keys {
keySet[key] = struct{}{}
}
// To deal with different orderings of the entries, use a path-keyed
// map to make sure we don't start comparing unrelated entries.
diffs := map[string]*stateT{}
// First, iterate over the old hierarchy.
for _, e := range oldDh.Entries {
if e.Type == RelativeType || e.Type == FullType {
path, err := e.Path()
if err != nil {
return nil, err
}
// Cannot take &kv because it's the iterator.
copy := new(Entry)
*copy = e
_, ok := diffs[path]
if !ok {
diffs[path] = &stateT{}
}
diffs[path].Old = copy
}
}
// Then, iterate over the new hierarchy.
for _, e := range newDh.Entries {
if e.Type == RelativeType || e.Type == FullType {
path, err := e.Path()
if err != nil {
return nil, err
}
// Cannot take &kv because it's the iterator.
copy := new(Entry)
*copy = e
_, ok := diffs[path]
if !ok {
diffs[path] = &stateT{}
}
diffs[path].New = copy
}
}
// Now we compute the diff.
var results []InodeDelta
for path, diff := range diffs {
// Invalid
if diff.Old == nil && diff.New == nil {
return nil, fmt.Errorf("invalid state: both old and new are nil: path=%s", path)
}
switch {
// Missing
case diff.New == nil:
results = append(results, InodeDelta{
diff: Missing,
path: path,
old: *diff.Old,
})
// Extra
case diff.Old == nil:
results = append(results, InodeDelta{
diff: Extra,
path: path,
new: *diff.New,
})
// Modified
default:
changed, err := compareEntry(*diff.Old, *diff.New)
if err != nil {
return nil, fmt.Errorf("comparison failed %s: %s", path, err)
}
// Now remove "changed" entries that don't match the keys.
if keys != nil {
var filterChanged []KeyDelta
for _, keyDiff := range changed {
if _, ok := keySet[keyDiff.name]; ok {
filterChanged = append(filterChanged, keyDiff)
}
}
changed = filterChanged
}
// Check if there were any actual changes.
if len(changed) > 0 {
results = append(results, InodeDelta{
diff: Modified,
path: path,
old: *diff.Old,
new: *diff.New,
keys: changed,
})
}
}
}
return results, nil
}

338
compare_test.go Normal file
View File

@ -0,0 +1,338 @@
package mtree
import (
"io/ioutil"
"os"
"path/filepath"
"testing"
)
// simple walk of current directory, and imediately check it.
// may not be parallelizable.
func TestCompare(t *testing.T) {
old, err := Walk(".", nil, append(DefaultKeywords, "sha1"))
if err != nil {
t.Fatal(err)
}
new, err := Walk(".", nil, append(DefaultKeywords, "sha1"))
if err != nil {
t.Fatal(err)
}
diffs, err := Compare(old, new, nil)
if err != nil {
t.Fatal(err)
}
if len(diffs) > 0 {
t.Errorf("%#v", diffs)
}
}
func TestCompareModified(t *testing.T) {
dir, err := ioutil.TempDir("", "test-compare-modified")
if err != nil {
t.Fatal(err)
}
defer os.RemoveAll(dir)
// Create a bunch of objects.
tmpfile := filepath.Join(dir, "tmpfile")
if err := ioutil.WriteFile(tmpfile, []byte("some content here"), 0666); err != nil {
t.Fatal(err)
}
tmpdir := filepath.Join(dir, "testdir")
if err := os.Mkdir(tmpdir, 0755); err != nil {
t.Fatal(err)
}
tmpsubfile := filepath.Join(tmpdir, "anotherfile")
if err := ioutil.WriteFile(tmpsubfile, []byte("some different content"), 0666); err != nil {
t.Fatal(err)
}
// Walk the current state.
old, err := Walk(dir, nil, append(DefaultKeywords, "sha1"))
if err != nil {
t.Fatal(err)
}
// Overwrite the content in one of the files.
if err := ioutil.WriteFile(tmpsubfile, []byte("modified content"), 0666); err != nil {
t.Fatal(err)
}
// Walk the new state.
new, err := Walk(dir, nil, append(DefaultKeywords, "sha1"))
if err != nil {
t.Fatal(err)
}
// Compare.
diffs, err := Compare(old, new, nil)
if err != nil {
t.Fatal(err)
}
// 1 object
if len(diffs) != 1 {
t.Errorf("expected the diff length to be 1, got %d", len(diffs))
for i, diff := range diffs {
t.Logf("diff[%d] = %#v", i, diff)
}
}
// These cannot fail.
tmpfile, _ = filepath.Rel(dir, tmpfile)
tmpdir, _ = filepath.Rel(dir, tmpdir)
tmpsubfile, _ = filepath.Rel(dir, tmpsubfile)
for _, diff := range diffs {
switch diff.Path() {
case tmpsubfile:
if diff.Type() != Modified {
t.Errorf("unexpected diff type for %s: %s", diff.Path(), diff.Type())
}
if diff.Diff() == nil {
t.Errorf("expect to not get nil for .Diff()")
}
old := diff.Old()
new := diff.New()
if old == nil || new == nil {
t.Errorf("expected to get (!nil, !nil) for (.Old, .New), got (%#v, %#v)", old, new)
}
default:
t.Errorf("unexpected diff found: %#v", diff)
}
}
}
func TestCompareMissing(t *testing.T) {
dir, err := ioutil.TempDir("", "test-compare-missing")
if err != nil {
t.Fatal(err)
}
defer os.RemoveAll(dir)
// Create a bunch of objects.
tmpfile := filepath.Join(dir, "tmpfile")
if err := ioutil.WriteFile(tmpfile, []byte("some content here"), 0666); err != nil {
t.Fatal(err)
}
tmpdir := filepath.Join(dir, "testdir")
if err := os.Mkdir(tmpdir, 0755); err != nil {
t.Fatal(err)
}
tmpsubfile := filepath.Join(tmpdir, "anotherfile")
if err := ioutil.WriteFile(tmpsubfile, []byte("some different content"), 0666); err != nil {
t.Fatal(err)
}
// Walk the current state.
old, err := Walk(dir, nil, append(DefaultKeywords, "sha1"))
if err != nil {
t.Fatal(err)
}
// Delete the objects.
if err := os.RemoveAll(tmpfile); err != nil {
t.Fatal(err)
}
if err := os.RemoveAll(tmpsubfile); err != nil {
t.Fatal(err)
}
if err := os.RemoveAll(tmpdir); err != nil {
t.Fatal(err)
}
// Walk the new state.
new, err := Walk(dir, nil, append(DefaultKeywords, "sha1"))
if err != nil {
t.Fatal(err)
}
// Compare.
diffs, err := Compare(old, new, nil)
if err != nil {
t.Fatal(err)
}
// 3 objects + the changes to '.'
if len(diffs) != 4 {
t.Errorf("expected the diff length to be 4, got %d", len(diffs))
for i, diff := range diffs {
t.Logf("diff[%d] = %#v", i, diff)
}
}
// These cannot fail.
tmpfile, _ = filepath.Rel(dir, tmpfile)
tmpdir, _ = filepath.Rel(dir, tmpdir)
tmpsubfile, _ = filepath.Rel(dir, tmpsubfile)
for _, diff := range diffs {
switch diff.Path() {
case ".":
// ignore these changes
case tmpfile, tmpdir, tmpsubfile:
if diff.Type() != Missing {
t.Errorf("unexpected diff type for %s: %s", diff.Path(), diff.Type())
}
if diff.Diff() != nil {
t.Errorf("expect to get nil for .Diff(), got %#v", diff.Diff())
}
old := diff.Old()
new := diff.New()
if old == nil || new != nil {
t.Errorf("expected to get (!nil, nil) for (.Old, .New), got (%#v, %#v)", old, new)
}
default:
t.Errorf("unexpected diff found: %#v", diff)
}
}
}
func TestCompareExtra(t *testing.T) {
dir, err := ioutil.TempDir("", "test-compare-extra")
if err != nil {
t.Fatal(err)
}
defer os.RemoveAll(dir)
// Walk the current state.
old, err := Walk(dir, nil, append(DefaultKeywords, "sha1"))
if err != nil {
t.Fatal(err)
}
// Create a bunch of objects.
tmpfile := filepath.Join(dir, "tmpfile")
if err := ioutil.WriteFile(tmpfile, []byte("some content here"), 0666); err != nil {
t.Fatal(err)
}
tmpdir := filepath.Join(dir, "testdir")
if err := os.Mkdir(tmpdir, 0755); err != nil {
t.Fatal(err)
}
tmpsubfile := filepath.Join(tmpdir, "anotherfile")
if err := ioutil.WriteFile(tmpsubfile, []byte("some different content"), 0666); err != nil {
t.Fatal(err)
}
// Walk the new state.
new, err := Walk(dir, nil, append(DefaultKeywords, "sha1"))
if err != nil {
t.Fatal(err)
}
// Compare.
diffs, err := Compare(old, new, nil)
if err != nil {
t.Fatal(err)
}
// 3 objects + the changes to '.'
if len(diffs) != 4 {
t.Errorf("expected the diff length to be 4, got %d", len(diffs))
for i, diff := range diffs {
t.Logf("diff[%d] = %#v", i, diff)
}
}
// These cannot fail.
tmpfile, _ = filepath.Rel(dir, tmpfile)
tmpdir, _ = filepath.Rel(dir, tmpdir)
tmpsubfile, _ = filepath.Rel(dir, tmpsubfile)
for _, diff := range diffs {
switch diff.Path() {
case ".":
// ignore these changes
case tmpfile, tmpdir, tmpsubfile:
if diff.Type() != Extra {
t.Errorf("unexpected diff type for %s: %s", diff.Path(), diff.Type())
}
if diff.Diff() != nil {
t.Errorf("expect to get nil for .Diff(), got %#v", diff.Diff())
}
old := diff.Old()
new := diff.New()
if old != nil || new == nil {
t.Errorf("expected to get (!nil, nil) for (.Old, .New), got (%#v, %#v)", old, new)
}
default:
t.Errorf("unexpected diff found: %#v", diff)
}
}
}
func TestCompareKeys(t *testing.T) {
dir, err := ioutil.TempDir("", "test-compare-keys")
if err != nil {
t.Fatal(err)
}
defer os.RemoveAll(dir)
// Create a bunch of objects.
tmpfile := filepath.Join(dir, "tmpfile")
if err := ioutil.WriteFile(tmpfile, []byte("some content here"), 0666); err != nil {
t.Fatal(err)
}
tmpdir := filepath.Join(dir, "testdir")
if err := os.Mkdir(tmpdir, 0755); err != nil {
t.Fatal(err)
}
tmpsubfile := filepath.Join(tmpdir, "anotherfile")
if err := ioutil.WriteFile(tmpsubfile, []byte("aaa"), 0666); err != nil {
t.Fatal(err)
}
// Walk the current state.
old, err := Walk(dir, nil, append(DefaultKeywords, "sha1"))
if err != nil {
t.Fatal(err)
}
// Overwrite the content in one of the files, but without changing the size.
if err := ioutil.WriteFile(tmpsubfile, []byte("bbb"), 0666); err != nil {
t.Fatal(err)
}
// Walk the new state.
new, err := Walk(dir, nil, append(DefaultKeywords, "sha1"))
if err != nil {
t.Fatal(err)
}
// Compare.
diffs, err := Compare(old, new, []string{"size"})
if err != nil {
t.Fatal(err)
}
// 0 objects
if len(diffs) != 0 {
t.Errorf("expected the diff length to be 0, got %d", len(diffs))
for i, diff := range diffs {
t.Logf("diff[%d] = %#v", i, diff)
}
}
}
// TODO: Add test for Compare(...) between a tar and a regular dh (to make sure that tar_time is handled correctly).

View File

@ -78,6 +78,15 @@ func (kv KeyVal) ChangeValue(newval string) string {
return fmt.Sprintf("%s=%s", kv.Keyword(), newval)
}
// KeyValEqual returns whether two KeyVals are equivalent. This takes
// care of certain odd cases such as tar_mtime, and should be used over
// using == comparisons directly unless you really know what you're
// doing.
func KeyValEqual(a, b KeyVal) bool {
// TODO: Implement handling of tar_mtime.
return a.Keyword() == b.Keyword() && a.Value() == b.Value()
}
// keywordSelector takes an array of "keyword=value" and filters out that only the set of words
func keywordSelector(keyval, words []string) []string {
retList := []string{}