diff --git a/compare.go b/compare.go new file mode 100644 index 0000000..1f7b9d2 --- /dev/null +++ b/compare.go @@ -0,0 +1,429 @@ +package mtree + +import ( + "encoding/json" + "fmt" + "strconv" +) + +// XXX: Do we need a Difference interface to make it so people can do var x +// Difference = ? The main problem is that keys and inodes need to +// have different interfaces, so it's just a pain. + +// DifferenceType represents the type of a discrepancy encountered for +// an object. This is also used to represent discrepancies between keys +// for objects. +type DifferenceType string + +const ( + // Missing represents a discrepancy where the object is present in + // the @old manifest but is not present in the @new manifest. + Missing DifferenceType = "missing" + + // Extra represents a discrepancy where the object is not present in + // the @old manifest but is present in the @new manifest. + Extra DifferenceType = "extra" + + // Modified represents a discrepancy where the object is present in + // both the @old and @new manifests, but one or more of the keys + // have different values (or have not been set in one of the + // manifests). + Modified DifferenceType = "modified" +) + +// These functions return *type from the parameter. It's just shorthand, to +// ensure that we don't accidentally expose pointers to the caller that are +// internal data. +func ePtr(e Entry) *Entry { return &e } +func sPtr(s string) *string { return &s } + +// InodeDelta Represents a discrepancy in a filesystem object between two +// DirectoryHierarchy manifests. Discrepancies are caused by entries only +// present in one manifest [Missing, Extra], keys only present in one of the +// manifests [Modified] or a difference between the keys of the same object in +// both manifests [Modified]. +type InodeDelta struct { + diff DifferenceType + path string + new Entry + old Entry + keys []KeyDelta +} + +// Type returns the type of discrepancy encountered when comparing this inode +// between the two DirectoryHierarchy manifests. +func (i InodeDelta) Type() DifferenceType { + return i.diff +} + +// Path returns the path to the inode (relative to the root of the +// DirectoryHierarchy manifests). +func (i InodeDelta) Path() string { + return i.path +} + +// Diff returns the set of key discrepancies between the two manifests for the +// specific inode. If the DifferenceType of the inode is not Modified, then +// Diff returns nil. +func (i InodeDelta) Diff() []KeyDelta { + return i.keys +} + +// Old returns the value of the inode Entry in the "old" DirectoryHierarchy (as +// determined by the ordering of parameters to Compare). +func (i InodeDelta) Old() *Entry { + if i.diff == Modified || i.diff == Missing { + return ePtr(i.old) + } + return nil +} + +// New returns the value of the inode Entry in the "new" DirectoryHierarchy (as +// determined by the ordering of parameters to Compare). +func (i InodeDelta) New() *Entry { + if i.diff == Modified || i.diff == Extra { + return ePtr(i.new) + } + return nil +} + +// MarshalJSON creates a JSON-encoded version of InodeDelta. +func (i InodeDelta) MarshalJSON() ([]byte, error) { + return json.Marshal(struct { + Type DifferenceType `json:"type"` + Path string `json:"path"` + Keys []KeyDelta `json:"keys"` + }{ + Type: i.diff, + Path: i.path, + Keys: i.keys, + }) +} + +// String returns a "pretty" formatting for InodeDelta. +func (i InodeDelta) String() string { + switch i.diff { + case Modified: + // Output the first failure. + f := i.keys[0] + return fmt.Sprintf("%q: keyword %q: expected %s; got %s", i.path, f.name, f.old, f.new) + case Extra: + return fmt.Sprintf("%q: unexpected path", i.path) + case Missing: + return fmt.Sprintf("%q: missing path", i.path) + default: + panic("programming error") + } +} + +// KeyDelta Represents a discrepancy in a key for a particular filesystem +// object between two DirectoryHierarchy manifests. Discrepancies are caused by +// keys only present in one manifest [Missing, Extra] or a difference between +// the keys of the same object in both manifests [Modified]. A set of these is +// returned with InodeDelta.Diff(). +type KeyDelta struct { + diff DifferenceType + name string + old string + new string +} + +// Type returns the type of discrepancy encountered when comparing this key +// between the two DirectoryHierarchy manifests' relevant inode entry. +func (k KeyDelta) Type() DifferenceType { + return k.diff +} + +// Name returns the name (the key) of the KeyDeltaVal entry in the +// DirectoryHierarchy. +func (k KeyDelta) Name() string { + return k.name +} + +// Old returns the value of the KeyDeltaVal entry in the "old" DirectoryHierarchy +// (as determined by the ordering of parameters to Compare). Returns nil if +// there was no entry in the "old" DirectoryHierarchy. +func (k KeyDelta) Old() *string { + if k.diff == Modified || k.diff == Missing { + return sPtr(k.old) + } + return nil +} + +// New returns the value of the KeyDeltaVal entry in the "new" DirectoryHierarchy +// (as determined by the ordering of parameters to Compare). Returns nil if +// there was no entry in the "old" DirectoryHierarchy. +func (k KeyDelta) New() *string { + if k.diff == Modified || k.diff == Extra { + return sPtr(k.old) + } + return nil +} + +// MarshalJSON creates a JSON-encoded version of KeyDelta. +func (k KeyDelta) MarshalJSON() ([]byte, error) { + return json.Marshal(struct { + Type DifferenceType `json:"type"` + Name string `json:"name"` + Old string `json:"old"` + New string `json:"new"` + }{ + Type: k.diff, + Name: k.name, + Old: k.old, + New: k.new, + }) +} + +// Like Compare, but for single inode entries only. Used to compute the +// cached version of inode.keys. +func compareEntry(oldEntry, newEntry Entry) ([]KeyDelta, error) { + // Represents the new and old states for an entry's keys. + type stateT struct { + Old *KeyVal + New *KeyVal + } + + diffs := map[string]*stateT{} + + // Fill the map with the old keys first. + for _, kv := range oldEntry.AllKeys() { + key := kv.Keyword() + + // Cannot take &kv because it's the iterator. + copy := new(KeyVal) + *copy = kv + + _, ok := diffs[key] + if !ok { + diffs[key] = new(stateT) + } + diffs[key].Old = copy + } + + // Then fill the new keys. + for _, kv := range newEntry.AllKeys() { + key := kv.Keyword() + + // Cannot take &kv because it's the iterator. + copy := new(KeyVal) + *copy = kv + + _, ok := diffs[key] + if !ok { + diffs[key] = new(stateT) + } + diffs[key].New = copy + } + + // We need a full list of the keys so we can deal with different keyvalue + // orderings. + var kws []string + for kw := range diffs { + kws = append(kws, kw) + } + + // If both tar_time and time were specified in the set of keys, we have to + // mess with the diffs. This is an unfortunate side-effect of tar archives. + // TODO(cyphar): This really should be abstracted inside keywords.go + if inSlice("tar_time", kws) && inSlice("time", kws) { + // Delete "time". + timeStateT := diffs["time"] + delete(diffs, "time") + + // Make a new tar_time. + if diffs["tar_time"].Old == nil { + time, err := strconv.ParseFloat(timeStateT.Old.Value(), 64) + if err != nil { + return nil, fmt.Errorf("failed to parse old time: %s", err) + } + + newTime := new(KeyVal) + *newTime = KeyVal(fmt.Sprintf("tar_time=%d.000000000", int64(time))) + + diffs["tar_time"].Old = newTime + } else if diffs["tar_time"].New == nil { + time, err := strconv.ParseFloat(timeStateT.New.Value(), 64) + if err != nil { + return nil, fmt.Errorf("failed to parse new time: %s", err) + } + + newTime := new(KeyVal) + *newTime = KeyVal(fmt.Sprintf("tar_time=%d.000000000", int64(time))) + + diffs["tar_time"].New = newTime + } else { + return nil, fmt.Errorf("time and tar_time set in the same manifest") + } + } + + // Are there any differences? + var results []KeyDelta + for name, diff := range diffs { + // Invalid + if diff.Old == nil && diff.New == nil { + return nil, fmt.Errorf("invalid state: both old and new are nil: key=%s", name) + } + + switch { + // Missing + case diff.New == nil: + results = append(results, KeyDelta{ + diff: Missing, + name: name, + old: diff.Old.Value(), + }) + + // Extra + case diff.Old == nil: + results = append(results, KeyDelta{ + diff: Extra, + name: name, + new: diff.New.Value(), + }) + + // Modified + default: + if !KeyValEqual(*diff.Old, *diff.New) { + results = append(results, KeyDelta{ + diff: Modified, + name: name, + old: diff.Old.Value(), + new: diff.New.Value(), + }) + } + } + } + + return results, nil +} + +// Compare compares two directory hierarchy manifests, and returns the +// list of discrepancies between the two. All of the entries in the +// manifest are considered, with differences being generated for +// RelativeType and FullType entries. Differences in structure (such as +// the way /set and /unset are written) are not considered to be +// discrepancies. The list of differences are all filesystem objects. +// +// keys controls which keys will be compared, but if keys is nil then all +// possible keys will be compared between the two manifests (allowing for +// missing entries and the like). A missing or extra key is treated as a +// Modified type. +// +// NB: The order of the parameters matters (old, new) because Extra and +// Missing are considered as different discrepancy types. +func Compare(oldDh, newDh *DirectoryHierarchy, keys []string) ([]InodeDelta, error) { + // Represents the new and old states for an entry. + type stateT struct { + Old *Entry + New *Entry + } + + // Make dealing with the keys mapping easier. + keySet := map[string]struct{}{} + for _, key := range keys { + keySet[key] = struct{}{} + } + + // To deal with different orderings of the entries, use a path-keyed + // map to make sure we don't start comparing unrelated entries. + diffs := map[string]*stateT{} + + // First, iterate over the old hierarchy. + for _, e := range oldDh.Entries { + if e.Type == RelativeType || e.Type == FullType { + path, err := e.Path() + if err != nil { + return nil, err + } + + // Cannot take &kv because it's the iterator. + copy := new(Entry) + *copy = e + + _, ok := diffs[path] + if !ok { + diffs[path] = &stateT{} + } + diffs[path].Old = copy + } + } + + // Then, iterate over the new hierarchy. + for _, e := range newDh.Entries { + if e.Type == RelativeType || e.Type == FullType { + path, err := e.Path() + if err != nil { + return nil, err + } + + // Cannot take &kv because it's the iterator. + copy := new(Entry) + *copy = e + + _, ok := diffs[path] + if !ok { + diffs[path] = &stateT{} + } + diffs[path].New = copy + } + } + + // Now we compute the diff. + var results []InodeDelta + for path, diff := range diffs { + // Invalid + if diff.Old == nil && diff.New == nil { + return nil, fmt.Errorf("invalid state: both old and new are nil: path=%s", path) + } + + switch { + // Missing + case diff.New == nil: + results = append(results, InodeDelta{ + diff: Missing, + path: path, + old: *diff.Old, + }) + + // Extra + case diff.Old == nil: + results = append(results, InodeDelta{ + diff: Extra, + path: path, + new: *diff.New, + }) + + // Modified + default: + changed, err := compareEntry(*diff.Old, *diff.New) + if err != nil { + return nil, fmt.Errorf("comparison failed %s: %s", path, err) + } + + // Now remove "changed" entries that don't match the keys. + if keys != nil { + var filterChanged []KeyDelta + for _, keyDiff := range changed { + if _, ok := keySet[keyDiff.name]; ok { + filterChanged = append(filterChanged, keyDiff) + } + } + changed = filterChanged + } + + // Check if there were any actual changes. + if len(changed) > 0 { + results = append(results, InodeDelta{ + diff: Modified, + path: path, + old: *diff.Old, + new: *diff.New, + keys: changed, + }) + } + } + } + + return results, nil +} diff --git a/compare_test.go b/compare_test.go new file mode 100644 index 0000000..99422eb --- /dev/null +++ b/compare_test.go @@ -0,0 +1,338 @@ +package mtree + +import ( + "io/ioutil" + "os" + "path/filepath" + "testing" +) + +// simple walk of current directory, and imediately check it. +// may not be parallelizable. +func TestCompare(t *testing.T) { + old, err := Walk(".", nil, append(DefaultKeywords, "sha1")) + if err != nil { + t.Fatal(err) + } + + new, err := Walk(".", nil, append(DefaultKeywords, "sha1")) + if err != nil { + t.Fatal(err) + } + + diffs, err := Compare(old, new, nil) + if err != nil { + t.Fatal(err) + } + + if len(diffs) > 0 { + t.Errorf("%#v", diffs) + } +} + +func TestCompareModified(t *testing.T) { + dir, err := ioutil.TempDir("", "test-compare-modified") + if err != nil { + t.Fatal(err) + } + defer os.RemoveAll(dir) + + // Create a bunch of objects. + tmpfile := filepath.Join(dir, "tmpfile") + if err := ioutil.WriteFile(tmpfile, []byte("some content here"), 0666); err != nil { + t.Fatal(err) + } + + tmpdir := filepath.Join(dir, "testdir") + if err := os.Mkdir(tmpdir, 0755); err != nil { + t.Fatal(err) + } + + tmpsubfile := filepath.Join(tmpdir, "anotherfile") + if err := ioutil.WriteFile(tmpsubfile, []byte("some different content"), 0666); err != nil { + t.Fatal(err) + } + + // Walk the current state. + old, err := Walk(dir, nil, append(DefaultKeywords, "sha1")) + if err != nil { + t.Fatal(err) + } + + // Overwrite the content in one of the files. + if err := ioutil.WriteFile(tmpsubfile, []byte("modified content"), 0666); err != nil { + t.Fatal(err) + } + + // Walk the new state. + new, err := Walk(dir, nil, append(DefaultKeywords, "sha1")) + if err != nil { + t.Fatal(err) + } + + // Compare. + diffs, err := Compare(old, new, nil) + if err != nil { + t.Fatal(err) + } + + // 1 object + if len(diffs) != 1 { + t.Errorf("expected the diff length to be 1, got %d", len(diffs)) + for i, diff := range diffs { + t.Logf("diff[%d] = %#v", i, diff) + } + } + + // These cannot fail. + tmpfile, _ = filepath.Rel(dir, tmpfile) + tmpdir, _ = filepath.Rel(dir, tmpdir) + tmpsubfile, _ = filepath.Rel(dir, tmpsubfile) + + for _, diff := range diffs { + switch diff.Path() { + case tmpsubfile: + if diff.Type() != Modified { + t.Errorf("unexpected diff type for %s: %s", diff.Path(), diff.Type()) + } + + if diff.Diff() == nil { + t.Errorf("expect to not get nil for .Diff()") + } + + old := diff.Old() + new := diff.New() + if old == nil || new == nil { + t.Errorf("expected to get (!nil, !nil) for (.Old, .New), got (%#v, %#v)", old, new) + } + default: + t.Errorf("unexpected diff found: %#v", diff) + } + } +} + +func TestCompareMissing(t *testing.T) { + dir, err := ioutil.TempDir("", "test-compare-missing") + if err != nil { + t.Fatal(err) + } + defer os.RemoveAll(dir) + + // Create a bunch of objects. + tmpfile := filepath.Join(dir, "tmpfile") + if err := ioutil.WriteFile(tmpfile, []byte("some content here"), 0666); err != nil { + t.Fatal(err) + } + + tmpdir := filepath.Join(dir, "testdir") + if err := os.Mkdir(tmpdir, 0755); err != nil { + t.Fatal(err) + } + + tmpsubfile := filepath.Join(tmpdir, "anotherfile") + if err := ioutil.WriteFile(tmpsubfile, []byte("some different content"), 0666); err != nil { + t.Fatal(err) + } + + // Walk the current state. + old, err := Walk(dir, nil, append(DefaultKeywords, "sha1")) + if err != nil { + t.Fatal(err) + } + + // Delete the objects. + if err := os.RemoveAll(tmpfile); err != nil { + t.Fatal(err) + } + + if err := os.RemoveAll(tmpsubfile); err != nil { + t.Fatal(err) + } + + if err := os.RemoveAll(tmpdir); err != nil { + t.Fatal(err) + } + + // Walk the new state. + new, err := Walk(dir, nil, append(DefaultKeywords, "sha1")) + if err != nil { + t.Fatal(err) + } + + // Compare. + diffs, err := Compare(old, new, nil) + if err != nil { + t.Fatal(err) + } + + // 3 objects + the changes to '.' + if len(diffs) != 4 { + t.Errorf("expected the diff length to be 4, got %d", len(diffs)) + for i, diff := range diffs { + t.Logf("diff[%d] = %#v", i, diff) + } + } + + // These cannot fail. + tmpfile, _ = filepath.Rel(dir, tmpfile) + tmpdir, _ = filepath.Rel(dir, tmpdir) + tmpsubfile, _ = filepath.Rel(dir, tmpsubfile) + + for _, diff := range diffs { + switch diff.Path() { + case ".": + // ignore these changes + case tmpfile, tmpdir, tmpsubfile: + if diff.Type() != Missing { + t.Errorf("unexpected diff type for %s: %s", diff.Path(), diff.Type()) + } + + if diff.Diff() != nil { + t.Errorf("expect to get nil for .Diff(), got %#v", diff.Diff()) + } + + old := diff.Old() + new := diff.New() + if old == nil || new != nil { + t.Errorf("expected to get (!nil, nil) for (.Old, .New), got (%#v, %#v)", old, new) + } + default: + t.Errorf("unexpected diff found: %#v", diff) + } + } +} + +func TestCompareExtra(t *testing.T) { + dir, err := ioutil.TempDir("", "test-compare-extra") + if err != nil { + t.Fatal(err) + } + defer os.RemoveAll(dir) + + // Walk the current state. + old, err := Walk(dir, nil, append(DefaultKeywords, "sha1")) + if err != nil { + t.Fatal(err) + } + + // Create a bunch of objects. + tmpfile := filepath.Join(dir, "tmpfile") + if err := ioutil.WriteFile(tmpfile, []byte("some content here"), 0666); err != nil { + t.Fatal(err) + } + + tmpdir := filepath.Join(dir, "testdir") + if err := os.Mkdir(tmpdir, 0755); err != nil { + t.Fatal(err) + } + + tmpsubfile := filepath.Join(tmpdir, "anotherfile") + if err := ioutil.WriteFile(tmpsubfile, []byte("some different content"), 0666); err != nil { + t.Fatal(err) + } + + // Walk the new state. + new, err := Walk(dir, nil, append(DefaultKeywords, "sha1")) + if err != nil { + t.Fatal(err) + } + + // Compare. + diffs, err := Compare(old, new, nil) + if err != nil { + t.Fatal(err) + } + + // 3 objects + the changes to '.' + if len(diffs) != 4 { + t.Errorf("expected the diff length to be 4, got %d", len(diffs)) + for i, diff := range diffs { + t.Logf("diff[%d] = %#v", i, diff) + } + } + + // These cannot fail. + tmpfile, _ = filepath.Rel(dir, tmpfile) + tmpdir, _ = filepath.Rel(dir, tmpdir) + tmpsubfile, _ = filepath.Rel(dir, tmpsubfile) + + for _, diff := range diffs { + switch diff.Path() { + case ".": + // ignore these changes + case tmpfile, tmpdir, tmpsubfile: + if diff.Type() != Extra { + t.Errorf("unexpected diff type for %s: %s", diff.Path(), diff.Type()) + } + + if diff.Diff() != nil { + t.Errorf("expect to get nil for .Diff(), got %#v", diff.Diff()) + } + + old := diff.Old() + new := diff.New() + if old != nil || new == nil { + t.Errorf("expected to get (!nil, nil) for (.Old, .New), got (%#v, %#v)", old, new) + } + default: + t.Errorf("unexpected diff found: %#v", diff) + } + } +} + +func TestCompareKeys(t *testing.T) { + dir, err := ioutil.TempDir("", "test-compare-keys") + if err != nil { + t.Fatal(err) + } + defer os.RemoveAll(dir) + + // Create a bunch of objects. + tmpfile := filepath.Join(dir, "tmpfile") + if err := ioutil.WriteFile(tmpfile, []byte("some content here"), 0666); err != nil { + t.Fatal(err) + } + + tmpdir := filepath.Join(dir, "testdir") + if err := os.Mkdir(tmpdir, 0755); err != nil { + t.Fatal(err) + } + + tmpsubfile := filepath.Join(tmpdir, "anotherfile") + if err := ioutil.WriteFile(tmpsubfile, []byte("aaa"), 0666); err != nil { + t.Fatal(err) + } + + // Walk the current state. + old, err := Walk(dir, nil, append(DefaultKeywords, "sha1")) + if err != nil { + t.Fatal(err) + } + + // Overwrite the content in one of the files, but without changing the size. + if err := ioutil.WriteFile(tmpsubfile, []byte("bbb"), 0666); err != nil { + t.Fatal(err) + } + + // Walk the new state. + new, err := Walk(dir, nil, append(DefaultKeywords, "sha1")) + if err != nil { + t.Fatal(err) + } + + // Compare. + diffs, err := Compare(old, new, []string{"size"}) + if err != nil { + t.Fatal(err) + } + + // 0 objects + if len(diffs) != 0 { + t.Errorf("expected the diff length to be 0, got %d", len(diffs)) + for i, diff := range diffs { + t.Logf("diff[%d] = %#v", i, diff) + } + } +} + +// TODO: Add test for Compare(...) between a tar and a regular dh (to make sure that tar_time is handled correctly). diff --git a/keywords.go b/keywords.go index 3faad1d..dfccfb2 100644 --- a/keywords.go +++ b/keywords.go @@ -78,6 +78,15 @@ func (kv KeyVal) ChangeValue(newval string) string { return fmt.Sprintf("%s=%s", kv.Keyword(), newval) } +// KeyValEqual returns whether two KeyVals are equivalent. This takes +// care of certain odd cases such as tar_mtime, and should be used over +// using == comparisons directly unless you really know what you're +// doing. +func KeyValEqual(a, b KeyVal) bool { + // TODO: Implement handling of tar_mtime. + return a.Keyword() == b.Keyword() && a.Value() == b.Value() +} + // keywordSelector takes an array of "keyword=value" and filters out that only the set of words func keywordSelector(keyval, words []string) []string { retList := []string{}