From 36372dd3c8dc8d6333d751fb7d1939e25cc39a78 Mon Sep 17 00:00:00 2001
From: Aleksa Sarai <asarai@suse.de>
Date: Wed, 27 Jul 2016 22:03:43 +1000
Subject: [PATCH 1/6] mtree: remove use of dhCreator for iterators

Fix a bug in the parser that caused all iterators to have to handle the
/set and /unset semantics separately. In addition, provide a helper
function to correctly generate the merged set of keywords for a
particular entry.

Signed-off-by: Aleksa Sarai <asarai@suse.de>
---
 check.go | 45 +++++++++------------------------------------
 entry.go | 13 +++++++++++++
 parse.go |  1 +
 3 files changed, 23 insertions(+), 36 deletions(-)

diff --git a/check.go b/check.go
index 5586973..6a20848 100644
--- a/check.go
+++ b/check.go
@@ -32,7 +32,6 @@ func (f Failure) String() string {
 // the available keywords from the list and each entry in the hierarchy.
 // If keywords is nil, the check all present in the DirectoryHierarchy
 func Check(root string, dh *DirectoryHierarchy, keywords []string) (*Result, error) {
-	creator := dhCreator{DH: dh}
 	curDir, err := os.Getwd()
 	if err == nil {
 		defer os.Chdir(curDir)
@@ -41,16 +40,11 @@ func Check(root string, dh *DirectoryHierarchy, keywords []string) (*Result, err
 	if err := os.Chdir(root); err != nil {
 		return nil, err
 	}
-	sort.Sort(byPos(creator.DH.Entries))
+	sort.Sort(byPos(dh.Entries))
+
 	var result Result
-	for i, e := range creator.DH.Entries {
+	for _, e := range dh.Entries {
 		switch e.Type {
-		case SpecialType:
-			if e.Name == "/set" {
-				creator.curSet = &creator.DH.Entries[i]
-			} else if e.Name == "/unset" {
-				creator.curSet = nil
-			}
 		case RelativeType, FullType:
 			pathname, err := e.Path()
 			if err != nil {
@@ -61,12 +55,8 @@ func Check(root string, dh *DirectoryHierarchy, keywords []string) (*Result, err
 				return nil, err
 			}
 
-			var kvs KeyVals
-			if creator.curSet != nil {
-				kvs = MergeSet(creator.curSet.Keywords, e.Keywords)
-			} else {
-				kvs = NewKeyVals(e.Keywords)
-			}
+			kvs := e.AllKeys()
+
 			for _, kv := range kvs {
 				kw := kv.Keyword()
 				// 'tar_time' keyword evaluation wins against 'time' keyword evaluation
@@ -133,18 +123,11 @@ func TarCheck(tarDH, dh *DirectoryHierarchy, keywords []string) (*Result, error)
 		Type: CommentType,
 	}
 	curDir := tarRoot
-	creator := dhCreator{DH: dh}
-	sort.Sort(byPos(creator.DH.Entries))
+	sort.Sort(byPos(dh.Entries))
 
 	var outOfTree bool
-	for i, e := range creator.DH.Entries {
+	for _, e := range dh.Entries {
 		switch e.Type {
-		case SpecialType:
-			if e.Name == "/set" {
-				creator.curSet = &creator.DH.Entries[i]
-			} else if e.Name == "/unset" {
-				creator.curSet = nil
-			}
 		case RelativeType, FullType:
 			pathname, err := e.Path()
 			if err != nil {
@@ -166,20 +149,10 @@ func TarCheck(tarDH, dh *DirectoryHierarchy, keywords []string) (*Result, error)
 			}
 
 			// expected values from file hierarchy spec
-			var kvs KeyVals
-			if creator.curSet != nil {
-				kvs = MergeSet(creator.curSet.Keywords, e.Keywords)
-			} else {
-				kvs = NewKeyVals(e.Keywords)
-			}
+			kvs := e.AllKeys()
 
 			// actual
-			var tarkvs KeyVals
-			if tarEntry.Set != nil {
-				tarkvs = MergeSet(tarEntry.Set.Keywords, tarEntry.Keywords)
-			} else {
-				tarkvs = NewKeyVals(tarEntry.Keywords)
-			}
+			tarkvs := tarEntry.AllKeys()
 
 			for _, kv := range kvs {
 				// TODO: keep track of symlinks
diff --git a/entry.go b/entry.go
index 8273e8d..055e9a0 100644
--- a/entry.go
+++ b/entry.go
@@ -100,6 +100,19 @@ func (e Entry) String() string {
 	return fmt.Sprintf("    %s %s", e.Name, strings.Join(e.Keywords, " "))
 }
 
+// AllKeys returns the full set of KeyVals for the given entry, based on the
+// /set keys as well as the entry-local keys. Entry-local keys always take
+// precedence.
+func (e Entry) AllKeys() KeyVals {
+	var kv KeyVals
+	if e.Set != nil {
+		kv = MergeSet(e.Set.Keywords, e.Keywords)
+	} else {
+		kv = NewKeyVals(e.Keywords)
+	}
+	return kv
+}
+
 // EntryType are the formats of lines in an mtree spec file
 type EntryType int
 
diff --git a/parse.go b/parse.go
index 77987c6..9e1f9bb 100644
--- a/parse.go
+++ b/parse.go
@@ -93,6 +93,7 @@ func ParseSpec(r io.Reader) (*DirectoryHierarchy, error) {
 					}
 				}
 			}
+			e.Set = creator.curSet
 		default:
 			// TODO(vbatts) log a warning?
 			continue

From 26ff922da64a57a7579ddf84ac817967ff134844 Mon Sep 17 00:00:00 2001
From: Aleksa Sarai <asarai@suse.de>
Date: Wed, 27 Jul 2016 22:05:15 +1000
Subject: [PATCH 2/6] compare: implement mtree.DirectoryHierarchy comparisons

This is part of a patchset that refactors all of the checking logic into
comparison operations. Essentially, provide a Compare(...) function that
allows for two different manifests to be compared. Extra and missing
entries are supported in addition to the standard modified entry, and by
implementing as a manifest comparison there is no double-scanning of the
manifest source.

The main annoyance is that we have to also include tar_time handling,
which has not been abstracted inside keywords.go. This is a bit ugly
right now, but works fine for the moment.

Signed-off-by: Aleksa Sarai <asarai@suse.de>
---
 compare.go      | 429 ++++++++++++++++++++++++++++++++++++++++++++++++
 compare_test.go | 338 ++++++++++++++++++++++++++++++++++++++
 keywords.go     |   9 +
 3 files changed, 776 insertions(+)
 create mode 100644 compare.go
 create mode 100644 compare_test.go

diff --git a/compare.go b/compare.go
new file mode 100644
index 0000000..1f7b9d2
--- /dev/null
+++ b/compare.go
@@ -0,0 +1,429 @@
+package mtree
+
+import (
+	"encoding/json"
+	"fmt"
+	"strconv"
+)
+
+// XXX: Do we need a Difference interface to make it so people can do var x
+// Difference = <something>? The main problem is that keys and inodes need to
+// have different interfaces, so it's just a pain.
+
+// DifferenceType represents the type of a discrepancy encountered for
+// an object. This is also used to represent discrepancies between keys
+// for objects.
+type DifferenceType string
+
+const (
+	// Missing represents a discrepancy where the object is present in
+	// the @old manifest but is not present in the @new manifest.
+	Missing DifferenceType = "missing"
+
+	// Extra represents a discrepancy where the object is not present in
+	// the @old manifest but is present in the @new manifest.
+	Extra DifferenceType = "extra"
+
+	// Modified represents a discrepancy where the object is present in
+	// both the @old and @new manifests, but one or more of the keys
+	// have different values (or have not been set in one of the
+	// manifests).
+	Modified DifferenceType = "modified"
+)
+
+// These functions return *type from the parameter. It's just shorthand, to
+// ensure that we don't accidentally expose pointers to the caller that are
+// internal data.
+func ePtr(e Entry) *Entry   { return &e }
+func sPtr(s string) *string { return &s }
+
+// InodeDelta Represents a discrepancy in a filesystem object between two
+// DirectoryHierarchy manifests. Discrepancies are caused by entries only
+// present in one manifest [Missing, Extra], keys only present in one of the
+// manifests [Modified] or a difference between the keys of the same object in
+// both manifests [Modified].
+type InodeDelta struct {
+	diff DifferenceType
+	path string
+	new  Entry
+	old  Entry
+	keys []KeyDelta
+}
+
+// Type returns the type of discrepancy encountered when comparing this inode
+// between the two DirectoryHierarchy manifests.
+func (i InodeDelta) Type() DifferenceType {
+	return i.diff
+}
+
+// Path returns the path to the inode (relative to the root of the
+// DirectoryHierarchy manifests).
+func (i InodeDelta) Path() string {
+	return i.path
+}
+
+// Diff returns the set of key discrepancies between the two manifests for the
+// specific inode. If the DifferenceType of the inode is not Modified, then
+// Diff returns nil.
+func (i InodeDelta) Diff() []KeyDelta {
+	return i.keys
+}
+
+// Old returns the value of the inode Entry in the "old" DirectoryHierarchy (as
+// determined by the ordering of parameters to Compare).
+func (i InodeDelta) Old() *Entry {
+	if i.diff == Modified || i.diff == Missing {
+		return ePtr(i.old)
+	}
+	return nil
+}
+
+// New returns the value of the inode Entry in the "new" DirectoryHierarchy (as
+// determined by the ordering of parameters to Compare).
+func (i InodeDelta) New() *Entry {
+	if i.diff == Modified || i.diff == Extra {
+		return ePtr(i.new)
+	}
+	return nil
+}
+
+// MarshalJSON creates a JSON-encoded version of InodeDelta.
+func (i InodeDelta) MarshalJSON() ([]byte, error) {
+	return json.Marshal(struct {
+		Type DifferenceType `json:"type"`
+		Path string         `json:"path"`
+		Keys []KeyDelta     `json:"keys"`
+	}{
+		Type: i.diff,
+		Path: i.path,
+		Keys: i.keys,
+	})
+}
+
+// String returns a "pretty" formatting for InodeDelta.
+func (i InodeDelta) String() string {
+	switch i.diff {
+	case Modified:
+		// Output the first failure.
+		f := i.keys[0]
+		return fmt.Sprintf("%q: keyword %q: expected %s; got %s", i.path, f.name, f.old, f.new)
+	case Extra:
+		return fmt.Sprintf("%q: unexpected path", i.path)
+	case Missing:
+		return fmt.Sprintf("%q: missing path", i.path)
+	default:
+		panic("programming error")
+	}
+}
+
+// KeyDelta Represents a discrepancy in a key for a particular filesystem
+// object between two DirectoryHierarchy manifests. Discrepancies are caused by
+// keys only present in one manifest [Missing, Extra] or a difference between
+// the keys of the same object in both manifests [Modified]. A set of these is
+// returned with InodeDelta.Diff().
+type KeyDelta struct {
+	diff DifferenceType
+	name string
+	old  string
+	new  string
+}
+
+// Type returns the type of discrepancy encountered when comparing this key
+// between the two DirectoryHierarchy manifests' relevant inode entry.
+func (k KeyDelta) Type() DifferenceType {
+	return k.diff
+}
+
+// Name returns the name (the key) of the KeyDeltaVal entry in the
+// DirectoryHierarchy.
+func (k KeyDelta) Name() string {
+	return k.name
+}
+
+// Old returns the value of the KeyDeltaVal entry in the "old" DirectoryHierarchy
+// (as determined by the ordering of parameters to Compare). Returns nil if
+// there was no entry in the "old" DirectoryHierarchy.
+func (k KeyDelta) Old() *string {
+	if k.diff == Modified || k.diff == Missing {
+		return sPtr(k.old)
+	}
+	return nil
+}
+
+// New returns the value of the KeyDeltaVal entry in the "new" DirectoryHierarchy
+// (as determined by the ordering of parameters to Compare). Returns nil if
+// there was no entry in the "old" DirectoryHierarchy.
+func (k KeyDelta) New() *string {
+	if k.diff == Modified || k.diff == Extra {
+		return sPtr(k.old)
+	}
+	return nil
+}
+
+// MarshalJSON creates a JSON-encoded version of KeyDelta.
+func (k KeyDelta) MarshalJSON() ([]byte, error) {
+	return json.Marshal(struct {
+		Type DifferenceType `json:"type"`
+		Name string         `json:"name"`
+		Old  string         `json:"old"`
+		New  string         `json:"new"`
+	}{
+		Type: k.diff,
+		Name: k.name,
+		Old:  k.old,
+		New:  k.new,
+	})
+}
+
+// Like Compare, but for single inode entries only. Used to compute the
+// cached version of inode.keys.
+func compareEntry(oldEntry, newEntry Entry) ([]KeyDelta, error) {
+	// Represents the new and old states for an entry's keys.
+	type stateT struct {
+		Old *KeyVal
+		New *KeyVal
+	}
+
+	diffs := map[string]*stateT{}
+
+	// Fill the map with the old keys first.
+	for _, kv := range oldEntry.AllKeys() {
+		key := kv.Keyword()
+
+		// Cannot take &kv because it's the iterator.
+		copy := new(KeyVal)
+		*copy = kv
+
+		_, ok := diffs[key]
+		if !ok {
+			diffs[key] = new(stateT)
+		}
+		diffs[key].Old = copy
+	}
+
+	// Then fill the new keys.
+	for _, kv := range newEntry.AllKeys() {
+		key := kv.Keyword()
+
+		// Cannot take &kv because it's the iterator.
+		copy := new(KeyVal)
+		*copy = kv
+
+		_, ok := diffs[key]
+		if !ok {
+			diffs[key] = new(stateT)
+		}
+		diffs[key].New = copy
+	}
+
+	// We need a full list of the keys so we can deal with different keyvalue
+	// orderings.
+	var kws []string
+	for kw := range diffs {
+		kws = append(kws, kw)
+	}
+
+	// If both tar_time and time were specified in the set of keys, we have to
+	// mess with the diffs. This is an unfortunate side-effect of tar archives.
+	// TODO(cyphar): This really should be abstracted inside keywords.go
+	if inSlice("tar_time", kws) && inSlice("time", kws) {
+		// Delete "time".
+		timeStateT := diffs["time"]
+		delete(diffs, "time")
+
+		// Make a new tar_time.
+		if diffs["tar_time"].Old == nil {
+			time, err := strconv.ParseFloat(timeStateT.Old.Value(), 64)
+			if err != nil {
+				return nil, fmt.Errorf("failed to parse old time: %s", err)
+			}
+
+			newTime := new(KeyVal)
+			*newTime = KeyVal(fmt.Sprintf("tar_time=%d.000000000", int64(time)))
+
+			diffs["tar_time"].Old = newTime
+		} else if diffs["tar_time"].New == nil {
+			time, err := strconv.ParseFloat(timeStateT.New.Value(), 64)
+			if err != nil {
+				return nil, fmt.Errorf("failed to parse new time: %s", err)
+			}
+
+			newTime := new(KeyVal)
+			*newTime = KeyVal(fmt.Sprintf("tar_time=%d.000000000", int64(time)))
+
+			diffs["tar_time"].New = newTime
+		} else {
+			return nil, fmt.Errorf("time and tar_time set in the same manifest")
+		}
+	}
+
+	// Are there any differences?
+	var results []KeyDelta
+	for name, diff := range diffs {
+		// Invalid
+		if diff.Old == nil && diff.New == nil {
+			return nil, fmt.Errorf("invalid state: both old and new are nil: key=%s", name)
+		}
+
+		switch {
+		// Missing
+		case diff.New == nil:
+			results = append(results, KeyDelta{
+				diff: Missing,
+				name: name,
+				old:  diff.Old.Value(),
+			})
+
+		// Extra
+		case diff.Old == nil:
+			results = append(results, KeyDelta{
+				diff: Extra,
+				name: name,
+				new:  diff.New.Value(),
+			})
+
+		// Modified
+		default:
+			if !KeyValEqual(*diff.Old, *diff.New) {
+				results = append(results, KeyDelta{
+					diff: Modified,
+					name: name,
+					old:  diff.Old.Value(),
+					new:  diff.New.Value(),
+				})
+			}
+		}
+	}
+
+	return results, nil
+}
+
+// Compare compares two directory hierarchy manifests, and returns the
+// list of discrepancies between the two. All of the entries in the
+// manifest are considered, with differences being generated for
+// RelativeType and FullType entries. Differences in structure (such as
+// the way /set and /unset are written) are not considered to be
+// discrepancies. The list of differences are all filesystem objects.
+//
+// keys controls which keys will be compared, but if keys is nil then all
+// possible keys will be compared between the two manifests (allowing for
+// missing entries and the like). A missing or extra key is treated as a
+// Modified type.
+//
+// NB: The order of the parameters matters (old, new) because Extra and
+//     Missing are considered as different discrepancy types.
+func Compare(oldDh, newDh *DirectoryHierarchy, keys []string) ([]InodeDelta, error) {
+	// Represents the new and old states for an entry.
+	type stateT struct {
+		Old *Entry
+		New *Entry
+	}
+
+	// Make dealing with the keys mapping easier.
+	keySet := map[string]struct{}{}
+	for _, key := range keys {
+		keySet[key] = struct{}{}
+	}
+
+	// To deal with different orderings of the entries, use a path-keyed
+	// map to make sure we don't start comparing unrelated entries.
+	diffs := map[string]*stateT{}
+
+	// First, iterate over the old hierarchy.
+	for _, e := range oldDh.Entries {
+		if e.Type == RelativeType || e.Type == FullType {
+			path, err := e.Path()
+			if err != nil {
+				return nil, err
+			}
+
+			// Cannot take &kv because it's the iterator.
+			copy := new(Entry)
+			*copy = e
+
+			_, ok := diffs[path]
+			if !ok {
+				diffs[path] = &stateT{}
+			}
+			diffs[path].Old = copy
+		}
+	}
+
+	// Then, iterate over the new hierarchy.
+	for _, e := range newDh.Entries {
+		if e.Type == RelativeType || e.Type == FullType {
+			path, err := e.Path()
+			if err != nil {
+				return nil, err
+			}
+
+			// Cannot take &kv because it's the iterator.
+			copy := new(Entry)
+			*copy = e
+
+			_, ok := diffs[path]
+			if !ok {
+				diffs[path] = &stateT{}
+			}
+			diffs[path].New = copy
+		}
+	}
+
+	// Now we compute the diff.
+	var results []InodeDelta
+	for path, diff := range diffs {
+		// Invalid
+		if diff.Old == nil && diff.New == nil {
+			return nil, fmt.Errorf("invalid state: both old and new are nil: path=%s", path)
+		}
+
+		switch {
+		// Missing
+		case diff.New == nil:
+			results = append(results, InodeDelta{
+				diff: Missing,
+				path: path,
+				old:  *diff.Old,
+			})
+
+		// Extra
+		case diff.Old == nil:
+			results = append(results, InodeDelta{
+				diff: Extra,
+				path: path,
+				new:  *diff.New,
+			})
+
+		// Modified
+		default:
+			changed, err := compareEntry(*diff.Old, *diff.New)
+			if err != nil {
+				return nil, fmt.Errorf("comparison failed %s: %s", path, err)
+			}
+
+			// Now remove "changed" entries that don't match the keys.
+			if keys != nil {
+				var filterChanged []KeyDelta
+				for _, keyDiff := range changed {
+					if _, ok := keySet[keyDiff.name]; ok {
+						filterChanged = append(filterChanged, keyDiff)
+					}
+				}
+				changed = filterChanged
+			}
+
+			// Check if there were any actual changes.
+			if len(changed) > 0 {
+				results = append(results, InodeDelta{
+					diff: Modified,
+					path: path,
+					old:  *diff.Old,
+					new:  *diff.New,
+					keys: changed,
+				})
+			}
+		}
+	}
+
+	return results, nil
+}
diff --git a/compare_test.go b/compare_test.go
new file mode 100644
index 0000000..99422eb
--- /dev/null
+++ b/compare_test.go
@@ -0,0 +1,338 @@
+package mtree
+
+import (
+	"io/ioutil"
+	"os"
+	"path/filepath"
+	"testing"
+)
+
+// simple walk of current directory, and imediately check it.
+// may not be parallelizable.
+func TestCompare(t *testing.T) {
+	old, err := Walk(".", nil, append(DefaultKeywords, "sha1"))
+	if err != nil {
+		t.Fatal(err)
+	}
+
+	new, err := Walk(".", nil, append(DefaultKeywords, "sha1"))
+	if err != nil {
+		t.Fatal(err)
+	}
+
+	diffs, err := Compare(old, new, nil)
+	if err != nil {
+		t.Fatal(err)
+	}
+
+	if len(diffs) > 0 {
+		t.Errorf("%#v", diffs)
+	}
+}
+
+func TestCompareModified(t *testing.T) {
+	dir, err := ioutil.TempDir("", "test-compare-modified")
+	if err != nil {
+		t.Fatal(err)
+	}
+	defer os.RemoveAll(dir)
+
+	// Create a bunch of objects.
+	tmpfile := filepath.Join(dir, "tmpfile")
+	if err := ioutil.WriteFile(tmpfile, []byte("some content here"), 0666); err != nil {
+		t.Fatal(err)
+	}
+
+	tmpdir := filepath.Join(dir, "testdir")
+	if err := os.Mkdir(tmpdir, 0755); err != nil {
+		t.Fatal(err)
+	}
+
+	tmpsubfile := filepath.Join(tmpdir, "anotherfile")
+	if err := ioutil.WriteFile(tmpsubfile, []byte("some different content"), 0666); err != nil {
+		t.Fatal(err)
+	}
+
+	// Walk the current state.
+	old, err := Walk(dir, nil, append(DefaultKeywords, "sha1"))
+	if err != nil {
+		t.Fatal(err)
+	}
+
+	// Overwrite the content in one of the files.
+	if err := ioutil.WriteFile(tmpsubfile, []byte("modified content"), 0666); err != nil {
+		t.Fatal(err)
+	}
+
+	// Walk the new state.
+	new, err := Walk(dir, nil, append(DefaultKeywords, "sha1"))
+	if err != nil {
+		t.Fatal(err)
+	}
+
+	// Compare.
+	diffs, err := Compare(old, new, nil)
+	if err != nil {
+		t.Fatal(err)
+	}
+
+	// 1 object
+	if len(diffs) != 1 {
+		t.Errorf("expected the diff length to be 1, got %d", len(diffs))
+		for i, diff := range diffs {
+			t.Logf("diff[%d] = %#v", i, diff)
+		}
+	}
+
+	// These cannot fail.
+	tmpfile, _ = filepath.Rel(dir, tmpfile)
+	tmpdir, _ = filepath.Rel(dir, tmpdir)
+	tmpsubfile, _ = filepath.Rel(dir, tmpsubfile)
+
+	for _, diff := range diffs {
+		switch diff.Path() {
+		case tmpsubfile:
+			if diff.Type() != Modified {
+				t.Errorf("unexpected diff type for %s: %s", diff.Path(), diff.Type())
+			}
+
+			if diff.Diff() == nil {
+				t.Errorf("expect to not get nil for .Diff()")
+			}
+
+			old := diff.Old()
+			new := diff.New()
+			if old == nil || new == nil {
+				t.Errorf("expected to get (!nil, !nil) for (.Old, .New), got (%#v, %#v)", old, new)
+			}
+		default:
+			t.Errorf("unexpected diff found: %#v", diff)
+		}
+	}
+}
+
+func TestCompareMissing(t *testing.T) {
+	dir, err := ioutil.TempDir("", "test-compare-missing")
+	if err != nil {
+		t.Fatal(err)
+	}
+	defer os.RemoveAll(dir)
+
+	// Create a bunch of objects.
+	tmpfile := filepath.Join(dir, "tmpfile")
+	if err := ioutil.WriteFile(tmpfile, []byte("some content here"), 0666); err != nil {
+		t.Fatal(err)
+	}
+
+	tmpdir := filepath.Join(dir, "testdir")
+	if err := os.Mkdir(tmpdir, 0755); err != nil {
+		t.Fatal(err)
+	}
+
+	tmpsubfile := filepath.Join(tmpdir, "anotherfile")
+	if err := ioutil.WriteFile(tmpsubfile, []byte("some different content"), 0666); err != nil {
+		t.Fatal(err)
+	}
+
+	// Walk the current state.
+	old, err := Walk(dir, nil, append(DefaultKeywords, "sha1"))
+	if err != nil {
+		t.Fatal(err)
+	}
+
+	// Delete the objects.
+	if err := os.RemoveAll(tmpfile); err != nil {
+		t.Fatal(err)
+	}
+
+	if err := os.RemoveAll(tmpsubfile); err != nil {
+		t.Fatal(err)
+	}
+
+	if err := os.RemoveAll(tmpdir); err != nil {
+		t.Fatal(err)
+	}
+
+	// Walk the new state.
+	new, err := Walk(dir, nil, append(DefaultKeywords, "sha1"))
+	if err != nil {
+		t.Fatal(err)
+	}
+
+	// Compare.
+	diffs, err := Compare(old, new, nil)
+	if err != nil {
+		t.Fatal(err)
+	}
+
+	// 3 objects + the changes to '.'
+	if len(diffs) != 4 {
+		t.Errorf("expected the diff length to be 4, got %d", len(diffs))
+		for i, diff := range diffs {
+			t.Logf("diff[%d] = %#v", i, diff)
+		}
+	}
+
+	// These cannot fail.
+	tmpfile, _ = filepath.Rel(dir, tmpfile)
+	tmpdir, _ = filepath.Rel(dir, tmpdir)
+	tmpsubfile, _ = filepath.Rel(dir, tmpsubfile)
+
+	for _, diff := range diffs {
+		switch diff.Path() {
+		case ".":
+			// ignore these changes
+		case tmpfile, tmpdir, tmpsubfile:
+			if diff.Type() != Missing {
+				t.Errorf("unexpected diff type for %s: %s", diff.Path(), diff.Type())
+			}
+
+			if diff.Diff() != nil {
+				t.Errorf("expect to get nil for .Diff(), got %#v", diff.Diff())
+			}
+
+			old := diff.Old()
+			new := diff.New()
+			if old == nil || new != nil {
+				t.Errorf("expected to get (!nil, nil) for (.Old, .New), got (%#v, %#v)", old, new)
+			}
+		default:
+			t.Errorf("unexpected diff found: %#v", diff)
+		}
+	}
+}
+
+func TestCompareExtra(t *testing.T) {
+	dir, err := ioutil.TempDir("", "test-compare-extra")
+	if err != nil {
+		t.Fatal(err)
+	}
+	defer os.RemoveAll(dir)
+
+	// Walk the current state.
+	old, err := Walk(dir, nil, append(DefaultKeywords, "sha1"))
+	if err != nil {
+		t.Fatal(err)
+	}
+
+	// Create a bunch of objects.
+	tmpfile := filepath.Join(dir, "tmpfile")
+	if err := ioutil.WriteFile(tmpfile, []byte("some content here"), 0666); err != nil {
+		t.Fatal(err)
+	}
+
+	tmpdir := filepath.Join(dir, "testdir")
+	if err := os.Mkdir(tmpdir, 0755); err != nil {
+		t.Fatal(err)
+	}
+
+	tmpsubfile := filepath.Join(tmpdir, "anotherfile")
+	if err := ioutil.WriteFile(tmpsubfile, []byte("some different content"), 0666); err != nil {
+		t.Fatal(err)
+	}
+
+	// Walk the new state.
+	new, err := Walk(dir, nil, append(DefaultKeywords, "sha1"))
+	if err != nil {
+		t.Fatal(err)
+	}
+
+	// Compare.
+	diffs, err := Compare(old, new, nil)
+	if err != nil {
+		t.Fatal(err)
+	}
+
+	// 3 objects + the changes to '.'
+	if len(diffs) != 4 {
+		t.Errorf("expected the diff length to be 4, got %d", len(diffs))
+		for i, diff := range diffs {
+			t.Logf("diff[%d] = %#v", i, diff)
+		}
+	}
+
+	// These cannot fail.
+	tmpfile, _ = filepath.Rel(dir, tmpfile)
+	tmpdir, _ = filepath.Rel(dir, tmpdir)
+	tmpsubfile, _ = filepath.Rel(dir, tmpsubfile)
+
+	for _, diff := range diffs {
+		switch diff.Path() {
+		case ".":
+			// ignore these changes
+		case tmpfile, tmpdir, tmpsubfile:
+			if diff.Type() != Extra {
+				t.Errorf("unexpected diff type for %s: %s", diff.Path(), diff.Type())
+			}
+
+			if diff.Diff() != nil {
+				t.Errorf("expect to get nil for .Diff(), got %#v", diff.Diff())
+			}
+
+			old := diff.Old()
+			new := diff.New()
+			if old != nil || new == nil {
+				t.Errorf("expected to get (!nil, nil) for (.Old, .New), got (%#v, %#v)", old, new)
+			}
+		default:
+			t.Errorf("unexpected diff found: %#v", diff)
+		}
+	}
+}
+
+func TestCompareKeys(t *testing.T) {
+	dir, err := ioutil.TempDir("", "test-compare-keys")
+	if err != nil {
+		t.Fatal(err)
+	}
+	defer os.RemoveAll(dir)
+
+	// Create a bunch of objects.
+	tmpfile := filepath.Join(dir, "tmpfile")
+	if err := ioutil.WriteFile(tmpfile, []byte("some content here"), 0666); err != nil {
+		t.Fatal(err)
+	}
+
+	tmpdir := filepath.Join(dir, "testdir")
+	if err := os.Mkdir(tmpdir, 0755); err != nil {
+		t.Fatal(err)
+	}
+
+	tmpsubfile := filepath.Join(tmpdir, "anotherfile")
+	if err := ioutil.WriteFile(tmpsubfile, []byte("aaa"), 0666); err != nil {
+		t.Fatal(err)
+	}
+
+	// Walk the current state.
+	old, err := Walk(dir, nil, append(DefaultKeywords, "sha1"))
+	if err != nil {
+		t.Fatal(err)
+	}
+
+	// Overwrite the content in one of the files, but without changing the size.
+	if err := ioutil.WriteFile(tmpsubfile, []byte("bbb"), 0666); err != nil {
+		t.Fatal(err)
+	}
+
+	// Walk the new state.
+	new, err := Walk(dir, nil, append(DefaultKeywords, "sha1"))
+	if err != nil {
+		t.Fatal(err)
+	}
+
+	// Compare.
+	diffs, err := Compare(old, new, []string{"size"})
+	if err != nil {
+		t.Fatal(err)
+	}
+
+	// 0 objects
+	if len(diffs) != 0 {
+		t.Errorf("expected the diff length to be 0, got %d", len(diffs))
+		for i, diff := range diffs {
+			t.Logf("diff[%d] = %#v", i, diff)
+		}
+	}
+}
+
+// TODO: Add test for Compare(...) between a tar and a regular dh (to make sure that tar_time is handled correctly).
diff --git a/keywords.go b/keywords.go
index 3faad1d..dfccfb2 100644
--- a/keywords.go
+++ b/keywords.go
@@ -78,6 +78,15 @@ func (kv KeyVal) ChangeValue(newval string) string {
 	return fmt.Sprintf("%s=%s", kv.Keyword(), newval)
 }
 
+// KeyValEqual returns whether two KeyVals are equivalent. This takes
+// care of certain odd cases such as tar_mtime, and should be used over
+// using == comparisons directly unless you really know what you're
+// doing.
+func KeyValEqual(a, b KeyVal) bool {
+	// TODO: Implement handling of tar_mtime.
+	return a.Keyword() == b.Keyword() && a.Value() == b.Value()
+}
+
 // keywordSelector takes an array of "keyword=value" and filters out that only the set of words
 func keywordSelector(keyval, words []string) []string {
 	retList := []string{}

From d7f49531f84122ba759ab93d264be442ab8741a4 Mon Sep 17 00:00:00 2001
From: Aleksa Sarai <asarai@suse.de>
Date: Mon, 31 Oct 2016 00:48:49 +1100
Subject: [PATCH 3/6] gomtree: switch to using compare

Switch the commandline to use the .Compare() API when checking
specification files against the state of a tar archive or other archive.
The main purpose is to completely remove the check.go code from being
necessary (outside of a wrapper).

Signed-off-by: Aleksa Sarai <asarai@suse.de>
---
 cmd/gomtree/main.go | 163 +++++++++++++++++++++++++++-----------------
 1 file changed, 99 insertions(+), 64 deletions(-)

diff --git a/cmd/gomtree/main.go b/cmd/gomtree/main.go
index f69374d..8e7e941 100644
--- a/cmd/gomtree/main.go
+++ b/cmd/gomtree/main.go
@@ -29,30 +29,34 @@ var (
 	flVersion          = flag.Bool("version", false, "display the version of this tool")
 )
 
-var formats = map[string]func(*mtree.Result) string{
+var formats = map[string]func([]mtree.InodeDelta) string{
 	// Outputs the errors in the BSD format.
-	"bsd": func(r *mtree.Result) string {
+	"bsd": func(d []mtree.InodeDelta) string {
 		var buffer bytes.Buffer
-		for _, fail := range r.Failures {
-			fmt.Fprintln(&buffer, fail)
+		for _, delta := range d {
+			if delta.Type() == mtree.Modified {
+				fmt.Fprintln(&buffer, delta)
+			}
 		}
 		return buffer.String()
 	},
 
 	// Outputs the full result struct in JSON.
-	"json": func(r *mtree.Result) string {
+	"json": func(d []mtree.InodeDelta) string {
 		var buffer bytes.Buffer
-		if err := json.NewEncoder(&buffer).Encode(r); err != nil {
+		if err := json.NewEncoder(&buffer).Encode(d); err != nil {
 			panic(err)
 		}
 		return buffer.String()
 	},
 
 	// Outputs only the paths which failed to validate.
-	"path": func(r *mtree.Result) string {
+	"path": func(d []mtree.InodeDelta) string {
 		var buffer bytes.Buffer
-		for _, fail := range r.Failures {
-			fmt.Fprintln(&buffer, fail.Path)
+		for _, delta := range d {
+			if delta.Type() == mtree.Modified {
+				fmt.Fprintln(&buffer, delta.Path())
+			}
 		}
 		return buffer.String()
 	},
@@ -66,6 +70,7 @@ func main() {
 	}
 
 	// so that defers cleanly exec
+	// TODO: Switch everything to being inside a function, to remove the need for isErr.
 	var isErr bool
 	defer func() {
 		if isErr {
@@ -104,9 +109,11 @@ func main() {
 	}
 
 	var (
+		err             error
 		tmpKeywords     []string
 		currentKeywords []string
 	)
+
 	// -k <keywords>
 	if *flUseKeywords != "" {
 		tmpKeywords = splitKeywordsArg(*flUseKeywords)
@@ -143,8 +150,23 @@ func main() {
 		currentKeywords = tmpKeywords
 	}
 
+	// Check mutual exclusivity of keywords.
+	// TODO(cyphar): Abstract this inside keywords.go.
+	if inSlice("tar_time", currentKeywords) && inSlice("time", currentKeywords) {
+		log.Printf("tar_time and time are mutually exclusive keywords")
+		isErr = true
+		return
+	}
+
+	// If we're doing a comparison, we always are comparing between a spec and
+	// state DH. If specDh is nil, we are generating a new one.
+	var (
+		specDh       *mtree.DirectoryHierarchy
+		stateDh      *mtree.DirectoryHierarchy
+		specKeywords []string
+	)
+
 	// -f <file>
-	var dh *mtree.DirectoryHierarchy
 	if *flFile != "" && !*flCreate {
 		// load the hierarchy, if we're not creating a new spec
 		fh, err := os.Open(*flFile)
@@ -153,27 +175,30 @@ func main() {
 			isErr = true
 			return
 		}
-		dh, err = mtree.ParseSpec(fh)
+		specDh, err = mtree.ParseSpec(fh)
 		fh.Close()
 		if err != nil {
 			log.Println(err)
 			isErr = true
 			return
 		}
+
+		// We can't check against more fields than in the specKeywords list, so
+		// currentKeywords can only have a subset of specKeywords.
+		specKeywords = mtree.CollectUsedKeywords(specDh)
 	}
 
 	// -list-used
 	if *flListUsedKeywords {
-		if *flFile == "" {
+		if specDh == nil {
 			log.Println("no specification provided. please provide a validation manifest")
-			defer os.Exit(1)
 			isErr = true
 			return
 		}
-		usedKeywords := mtree.CollectUsedKeywords(dh)
+
 		if *flResultFormat == "json" {
 			// if they're asking for json, give it to them
-			data := map[string][]string{*flFile: usedKeywords}
+			data := map[string][]string{*flFile: specKeywords}
 			buf, err := json.MarshalIndent(data, "", "  ")
 			if err != nil {
 				defer os.Exit(1)
@@ -183,7 +208,7 @@ func main() {
 			fmt.Println(string(buf))
 		} else {
 			fmt.Printf("Keywords used in [%s]:\n", *flFile)
-			for _, kw := range usedKeywords {
+			for _, kw := range specKeywords {
 				fmt.Printf(" %s", kw)
 				if _, ok := mtree.KeywordFuncs[kw]; !ok {
 					fmt.Print(" (unsupported)")
@@ -194,6 +219,36 @@ func main() {
 		return
 	}
 
+	if specKeywords != nil {
+		// If we didn't actually change the set of keywords, we can just use specKeywords.
+		if *flUseKeywords == "" && *flAddKeywords == "" {
+			currentKeywords = specKeywords
+		}
+
+		for _, keyword := range currentKeywords {
+			// As always, time is a special case.
+			// TODO: Fix that.
+			if (keyword == "time" && inSlice("tar_time", specKeywords)) || (keyword == "tar_time" && inSlice("time", specKeywords)) {
+				continue
+			}
+
+			if !inSlice(keyword, specKeywords) {
+				log.Printf("cannot verify keywords not in mtree specification: %s\n", keyword)
+				isErr = true
+			}
+			if isErr {
+				return
+			}
+		}
+	}
+
+	// -p and -T are mutually exclusive
+	if *flPath != "" && *flTar != "" {
+		log.Println("options -T and -p are mutually exclusive")
+		isErr = true
+		return
+	}
+
 	// -p <path>
 	var rootPath = "."
 	if *flPath != "" {
@@ -201,7 +256,6 @@ func main() {
 	}
 
 	// -T <tar file>
-	var tdh *mtree.DirectoryHierarchy
 	if *flTar != "" {
 		var input io.Reader
 		if *flTar == "-" {
@@ -229,7 +283,15 @@ func main() {
 			return
 		}
 		var err error
-		tdh, err = ts.Hierarchy()
+		stateDh, err = ts.Hierarchy()
+		if err != nil {
+			log.Println(err)
+			isErr = true
+			return
+		}
+	} else {
+		// with a root directory
+		stateDh, err = mtree.Walk(rootPath, nil, currentKeywords)
 		if err != nil {
 			log.Println(err)
 			isErr = true
@@ -239,36 +301,36 @@ func main() {
 
 	// -c
 	if *flCreate {
-		// create a directory hierarchy
-		// with a tar stream
-		if tdh != nil {
-			tdh.WriteTo(os.Stdout)
-		} else {
-			// with a root directory
-			dh, err := mtree.Walk(rootPath, nil, currentKeywords)
+		fh := os.Stdout
+		if *flFile != "" {
+			fh, err = os.Create(*flFile)
 			if err != nil {
 				log.Println(err)
 				isErr = true
 				return
 			}
-			dh.WriteTo(os.Stdout)
-		}
-	} else if tdh != nil || dh != nil {
-		var res *mtree.Result
-		var err error
-		// else this is a validation
-		if *flTar != "" {
-			res, err = mtree.TarCheck(tdh, dh, currentKeywords)
-		} else {
-			res, err = mtree.Check(rootPath, dh, currentKeywords)
 		}
+
+		// output stateDh
+		stateDh.WriteTo(fh)
+		return
+	}
+
+	// This is a validation.
+	if specDh != nil && stateDh != nil {
+		var res []mtree.InodeDelta
+
+		res, err = mtree.Compare(specDh, stateDh, currentKeywords)
 		if err != nil {
 			log.Println(err)
 			isErr = true
 			return
 		}
-		if res != nil && len(res.Failures) > 0 {
-			defer os.Exit(1)
+		if res != nil {
+			if len(res) > 0 {
+				defer os.Exit(1)
+			}
+
 			out := formatFunc(res)
 			if _, err := os.Stdout.Write([]byte(out)); err != nil {
 				log.Println(err)
@@ -276,36 +338,9 @@ func main() {
 				return
 			}
 		}
-		if res != nil {
-			if len(res.Extra) > 0 {
-				defer os.Exit(1)
-				for _, extra := range res.Extra {
-					extrapath, err := extra.Path()
-					if err != nil {
-						log.Println(err)
-						isErr = true
-						return
-					}
-					fmt.Printf("%s extra\n", extrapath)
-				}
-			}
-			if len(res.Missing) > 0 {
-				defer os.Exit(1)
-				for _, missing := range res.Missing {
-					missingpath, err := missing.Path()
-					if err != nil {
-						log.Println(err)
-						isErr = true
-						return
-					}
-					fmt.Printf("%s missing\n", missingpath)
-				}
-			}
-		}
 	} else {
 		log.Println("neither validating or creating a manifest. Please provide additional arguments")
 		isErr = true
-		defer os.Exit(1)
 		return
 	}
 }

From c4be8dfe325d1d53eb230b4a7a62fc7ac60db13d Mon Sep 17 00:00:00 2001
From: Aleksa Sarai <asarai@suse.de>
Date: Mon, 31 Oct 2016 03:39:13 +1100
Subject: [PATCH 4/6] compare: implement proper testing with tar

While the full testing is broken due to bugs in the tar DH generator, we
ignore known bugs in the tar generator to at least allow us to test some
of the other semantics of Compare.

Signed-off-by: Aleksa Sarai <asarai@suse.de>
---
 compare_test.go | 117 +++++++++++++++++++++++++++++++++++++++++++++++-
 tar_test.go     |  44 +++++++++++-------
 2 files changed, 144 insertions(+), 17 deletions(-)

diff --git a/compare_test.go b/compare_test.go
index 99422eb..a1e9cde 100644
--- a/compare_test.go
+++ b/compare_test.go
@@ -1,10 +1,14 @@
 package mtree
 
 import (
+	"archive/tar"
+	"bytes"
+	"io"
 	"io/ioutil"
 	"os"
 	"path/filepath"
 	"testing"
+	"time"
 )
 
 // simple walk of current directory, and imediately check it.
@@ -335,4 +339,115 @@ func TestCompareKeys(t *testing.T) {
 	}
 }
 
-// TODO: Add test for Compare(...) between a tar and a regular dh (to make sure that tar_time is handled correctly).
+func TestTarCompare(t *testing.T) {
+	dir, err := ioutil.TempDir("", "test-compare-tar")
+	if err != nil {
+		t.Fatal(err)
+	}
+	defer os.RemoveAll(dir)
+
+	// Create a bunch of objects.
+	tmpfile := filepath.Join(dir, "tmpfile")
+	if err := ioutil.WriteFile(tmpfile, []byte("some content"), 0644); err != nil {
+		t.Fatal(err)
+	}
+
+	tmpdir := filepath.Join(dir, "testdir")
+	if err := os.Mkdir(tmpdir, 0755); err != nil {
+		t.Fatal(err)
+	}
+
+	tmpsubfile := filepath.Join(tmpdir, "anotherfile")
+	if err := ioutil.WriteFile(tmpsubfile, []byte("aaa"), 0644); err != nil {
+		t.Fatal(err)
+	}
+
+	// Create a tar-like archive.
+	compareFiles := []fakeFile{
+		{"./", "", 0700, tar.TypeDir, 100, 0, nil},
+		{"tmpfile", "some content", 0644, tar.TypeReg, 100, 0, nil},
+		{"testdir/", "", 0755, tar.TypeDir, 100, 0, nil},
+		{"testdir/anotherfile", "aaa", 0644, tar.TypeReg, 100, 0, nil},
+	}
+
+	for _, file := range compareFiles {
+		path := filepath.Join(dir, file.Name)
+
+		// Change the time to something known with nanosec != 0.
+		chtime := time.Unix(file.Sec, 987654321)
+		if err := os.Chtimes(path, chtime, chtime); err != nil {
+			t.Fatal(err)
+		}
+	}
+
+	// Walk the current state.
+	old, err := Walk(dir, nil, append(DefaultKeywords, "sha1"))
+	if err != nil {
+		t.Fatal(err)
+	}
+
+	ts, err := makeTarStream(compareFiles)
+	if err != nil {
+		t.Fatal(err)
+	}
+
+	str := NewTarStreamer(bytes.NewBuffer(ts), append(DefaultTarKeywords, "sha1"))
+	if _, err = io.Copy(ioutil.Discard, str); err != nil && err != io.EOF {
+		t.Fatal(err)
+	}
+	if err = str.Close(); err != nil {
+		t.Fatal(err)
+	}
+
+	new, err := str.Hierarchy()
+	if err != nil {
+		t.Fatal(err)
+	}
+
+	// Compare.
+	diffs, err := Compare(old, new, append(DefaultTarKeywords, "sha1"))
+	if err != nil {
+		t.Fatal(err)
+	}
+
+	// 0 objects
+	if len(diffs) != 0 {
+		actualFailure := false
+		for i, delta := range diffs {
+			// XXX: Tar generation is slightly broken, so we need to ignore some bugs.
+			if delta.Path() == "." && delta.Type() == Modified {
+				// FIXME: This is a known bug.
+				t.Logf("'.' is different in the tar -- this is a bug in the tar generation")
+
+				// The tar generation bug means that '.' is missing a bunch of keys.
+				allMissing := true
+				for _, keyDelta := range delta.Diff() {
+					if keyDelta.Type() != Missing {
+						allMissing = false
+					}
+				}
+				if !allMissing {
+					t.Errorf("'.' has changed in a way not consistent with known bugs")
+				}
+
+				continue
+			}
+
+			// XXX: Another bug.
+			keys := delta.Diff()
+			if len(keys) == 1 && keys[0].Name() == "size" && keys[0].Type() == Missing {
+				// FIXME: Also a known bug with tar generation dropping size=.
+				t.Logf("'%s' is missing a size= keyword -- a bug in tar generation", delta.Path())
+
+				continue
+			}
+
+			actualFailure = true
+			t.Logf("FAILURE: diff[%d] = %#v", i, delta)
+		}
+
+		if actualFailure {
+			t.Errorf("expected the diff length to be 0, got %d", len(diffs))
+		}
+	}
+}
diff --git a/tar_test.go b/tar_test.go
index 3b5588e..643dfd4 100644
--- a/tar_test.go
+++ b/tar_test.go
@@ -6,7 +6,9 @@ import (
 	"io"
 	"io/ioutil"
 	"os"
+	"syscall"
 	"testing"
+	"time"
 )
 
 func ExampleStreamer() {
@@ -346,29 +348,39 @@ func TestHardlinks(t *testing.T) {
 	}
 }
 
-// minimal tar archive stream that mimics what is in ./testdata/test.tar
-func makeTarStream() ([]byte, error) {
+type fakeFile struct {
+	Name, Body string
+	Mode       int64
+	Type       byte
+	Sec, Nsec  int64
+	Xattrs     map[string]string
+}
+
+// minimal tar archive that mimics what is in ./testdata/test.tar
+var minimalFiles = []fakeFile{
+	{"x/", "", 0755, '5', 0, 0, nil},
+	{"x/files", "howdy\n", 0644, '0', 0, 0, nil},
+}
+
+func makeTarStream(ff []fakeFile) ([]byte, error) {
 	buf := new(bytes.Buffer)
 
 	// Create a new tar archive.
 	tw := tar.NewWriter(buf)
 
 	// Add some files to the archive.
-	var files = []struct {
-		Name, Body string
-		Mode       int64
-		Type       byte
-		Xattrs     map[string]string
-	}{
-		{"x/", "", 0755, '5', nil},
-		{"x/files", "howdy\n", 0644, '0', nil},
-	}
-	for _, file := range files {
+	for _, file := range ff {
 		hdr := &tar.Header{
-			Name:   file.Name,
-			Mode:   file.Mode,
-			Size:   int64(len(file.Body)),
-			Xattrs: file.Xattrs,
+			Name:       file.Name,
+			Uid:        syscall.Getuid(),
+			Gid:        syscall.Getgid(),
+			Mode:       file.Mode,
+			Typeflag:   file.Type,
+			Size:       int64(len(file.Body)),
+			ModTime:    time.Unix(file.Sec, file.Nsec),
+			AccessTime: time.Unix(file.Sec, file.Nsec),
+			ChangeTime: time.Unix(file.Sec, file.Nsec),
+			Xattrs:     file.Xattrs,
 		}
 		if err := tw.WriteHeader(hdr); err != nil {
 			return nil, err

From d214ab47e8a5560b554d61eff361fe100742f7a0 Mon Sep 17 00:00:00 2001
From: Aleksa Sarai <asarai@suse.de>
Date: Mon, 31 Oct 2016 21:42:53 +1100
Subject: [PATCH 5/6] check: re-implement *Check() using Compare()

This removes all of the special handling code for both TarCheck() and
Check() so that everything now uses the new (generic) Compare() code. In
addition, the tests had to be modified to reflect the new classes of
errors.

Signed-off-by: Aleksa Sarai <asarai@suse.de>
---
 check.go      | 197 +++++---------------------------------------------
 check_test.go |  47 ++++++------
 tar_test.go   | 118 ++++++++++++------------------
 3 files changed, 88 insertions(+), 274 deletions(-)

diff --git a/check.go b/check.go
index 6a20848..5ca345d 100644
--- a/check.go
+++ b/check.go
@@ -1,194 +1,31 @@
 package mtree
 
-import (
-	"fmt"
-	"os"
-	"sort"
-	"strings"
-)
-
-// Result of a Check
-type Result struct {
-	// list of any failures in the Check
-	Failures []Failure `json:"failures"`
-	Missing  []Entry   `json:"missing,omitempty"`
-	Extra    []Entry   `json:"extra,omitempty"`
-}
-
-// Failure of a particular keyword for a path
-type Failure struct {
-	Path     string `json:"path"`
-	Keyword  string `json:"keyword"`
-	Expected string `json:"expected"`
-	Got      string `json:"got"`
-}
-
-// String returns a "pretty" formatting for a Failure
-func (f Failure) String() string {
-	return fmt.Sprintf("%q: keyword %q: expected %s; got %s", f.Path, f.Keyword, f.Expected, f.Got)
-}
-
 // Check a root directory path against the DirectoryHierarchy, regarding only
 // the available keywords from the list and each entry in the hierarchy.
 // If keywords is nil, the check all present in the DirectoryHierarchy
-func Check(root string, dh *DirectoryHierarchy, keywords []string) (*Result, error) {
-	curDir, err := os.Getwd()
-	if err == nil {
-		defer os.Chdir(curDir)
+//
+// This is equivalent to creating a new DirectoryHierarchy with Walk(root, nil,
+// keywords) and then doing a Compare(dh, newDh, keywords).
+func Check(root string, dh *DirectoryHierarchy, keywords []string) ([]InodeDelta, error) {
+	if keywords == nil {
+		keywords = CollectUsedKeywords(dh)
 	}
 
-	if err := os.Chdir(root); err != nil {
+	newDh, err := Walk(root, nil, keywords)
+	if err != nil {
 		return nil, err
 	}
-	sort.Sort(byPos(dh.Entries))
 
-	var result Result
-	for _, e := range dh.Entries {
-		switch e.Type {
-		case RelativeType, FullType:
-			pathname, err := e.Path()
-			if err != nil {
-				return nil, err
-			}
-			info, err := os.Lstat(pathname)
-			if err != nil {
-				return nil, err
-			}
-
-			kvs := e.AllKeys()
-
-			for _, kv := range kvs {
-				kw := kv.Keyword()
-				// 'tar_time' keyword evaluation wins against 'time' keyword evaluation
-				if kv.Keyword() == "time" && inSlice("tar_time", keywords) {
-					kw = "tar_time"
-					tartime := fmt.Sprintf("%s.%s", (strings.Split(kv.Value(), ".")[0]), "000000000")
-					kv = KeyVal(KeyVal(kw).ChangeValue(tartime))
-				}
-
-				keywordFunc, ok := KeywordFuncs[kw]
-				if !ok {
-					return nil, fmt.Errorf("Unknown keyword %q for file %q", kv.Keyword(), pathname)
-				}
-				if keywords != nil && !inSlice(kv.Keyword(), keywords) {
-					continue
-				}
-
-				var curKeyVal string
-				if info.Mode().IsRegular() {
-					fh, err := os.Open(pathname)
-					if err != nil {
-						return nil, err
-					}
-					curKeyVal, err = keywordFunc(pathname, info, fh)
-					if err != nil {
-						fh.Close()
-						return nil, err
-					}
-					fh.Close()
-				} else {
-					curKeyVal, err = keywordFunc(pathname, info, nil)
-					if err != nil {
-						return nil, err
-					}
-				}
-				if string(kv) != curKeyVal {
-					failure := Failure{Path: pathname, Keyword: kv.Keyword(), Expected: kv.Value(), Got: KeyVal(curKeyVal).Value()}
-					result.Failures = append(result.Failures, failure)
-				}
-			}
-		}
-	}
-	return &result, nil
+	// TODO: Handle tar_time, if necessary.
+	return Compare(dh, newDh, keywords)
 }
 
-// TarCheck is the tar equivalent of checking a file hierarchy spec against a tar stream to
-// determine if files have been changed.
-func TarCheck(tarDH, dh *DirectoryHierarchy, keywords []string) (*Result, error) {
-	var result Result
-	var err error
-	var tarRoot *Entry
-
-	for _, e := range tarDH.Entries {
-		if e.Name == "." {
-			tarRoot = &e
-			break
-		}
+// TarCheck is the tar equivalent of checking a file hierarchy spec against a
+// tar stream to determine if files have been changed. This is precisely
+// equivalent to Compare(dh, tarDH, keywords).
+func TarCheck(tarDH, dh *DirectoryHierarchy, keywords []string) ([]InodeDelta, error) {
+	if keywords == nil {
+		keywords = CollectUsedKeywords(dh)
 	}
-	if tarRoot == nil {
-		return nil, fmt.Errorf("root of archive could not be found")
-	}
-	tarRoot.Next = &Entry{
-		Name: "seen",
-		Type: CommentType,
-	}
-	curDir := tarRoot
-	sort.Sort(byPos(dh.Entries))
-
-	var outOfTree bool
-	for _, e := range dh.Entries {
-		switch e.Type {
-		case RelativeType, FullType:
-			pathname, err := e.Path()
-			if err != nil {
-				return nil, err
-			}
-			if outOfTree {
-				return &result, fmt.Errorf("No parent node from %s", pathname)
-			}
-			// TODO: handle the case where "." is not the first Entry to be found
-			tarEntry := curDir.Descend(e.Name)
-			if tarEntry == nil {
-				result.Missing = append(result.Missing, e)
-				continue
-			}
-
-			tarEntry.Next = &Entry{
-				Type: CommentType,
-				Name: "seen",
-			}
-
-			// expected values from file hierarchy spec
-			kvs := e.AllKeys()
-
-			// actual
-			tarkvs := tarEntry.AllKeys()
-
-			for _, kv := range kvs {
-				// TODO: keep track of symlinks
-				if _, ok := KeywordFuncs[kv.Keyword()]; !ok {
-					return nil, fmt.Errorf("Unknown keyword %q for file %q", kv.Keyword(), pathname)
-				}
-				if keywords != nil && !inSlice(kv.Keyword(), keywords) {
-					continue
-				}
-				tarpath, err := tarEntry.Path()
-				if err != nil {
-					return nil, err
-				}
-				if tarkv := tarkvs.Has(kv.Keyword()); tarkv != emptyKV {
-					if string(tarkv) != string(kv) {
-						failure := Failure{Path: tarpath, Keyword: kv.Keyword(), Expected: kv.Value(), Got: tarkv.Value()}
-						result.Failures = append(result.Failures, failure)
-					}
-				}
-			}
-			// Step into a directory
-			if tarEntry.Prev != nil {
-				curDir = tarEntry
-			}
-		case DotDotType:
-			if outOfTree {
-				return &result, fmt.Errorf("No parent node.")
-			}
-			curDir = curDir.Ascend()
-			if curDir == nil {
-				outOfTree = true
-			}
-		}
-	}
-	result.Extra = filter(tarRoot, func(e *Entry) bool {
-		return e.Next == nil
-	})
-	return &result, err
+	return Compare(dh, tarDH, keywords)
 }
diff --git a/check_test.go b/check_test.go
index 9c2b409..08e9211 100644
--- a/check_test.go
+++ b/check_test.go
@@ -22,7 +22,7 @@ func TestCheck(t *testing.T) {
 		t.Fatal(err)
 	}
 
-	if len(res.Failures) > 0 {
+	if len(res) > 0 {
 		t.Errorf("%#v", res)
 	}
 }
@@ -53,7 +53,7 @@ func TestCheckKeywords(t *testing.T) {
 	if err != nil {
 		t.Fatal(err)
 	}
-	if len(res.Failures) > 0 {
+	if len(res) > 0 {
 		t.Errorf("%#v", res)
 	}
 
@@ -68,8 +68,11 @@ func TestCheckKeywords(t *testing.T) {
 	if err != nil {
 		t.Fatal(err)
 	}
-	if len(res.Failures) == 0 {
-		t.Errorf("expected to fail on changed mtimes, but did not")
+	if len(res) != 1 {
+		t.Errorf("expected to get 1 delta on changed mtimes, but did not")
+	}
+	if res[0].Type() != Modified {
+		t.Errorf("expected to get modified delta on changed mtimes, but did not")
 	}
 
 	// Check again, but only sha1 and mode. This ought to pass.
@@ -77,7 +80,7 @@ func TestCheckKeywords(t *testing.T) {
 	if err != nil {
 		t.Fatal(err)
 	}
-	if len(res.Failures) > 0 {
+	if len(res) > 0 {
 		t.Errorf("%#v", res)
 	}
 }
@@ -92,7 +95,7 @@ func ExampleCheck() {
 	if err != nil {
 		// handle error ...
 	}
-	if len(res.Failures) > 0 {
+	if len(res) > 0 {
 		// handle failed validity ...
 	}
 }
@@ -108,9 +111,9 @@ func TestDefaultBrokenLink(t *testing.T) {
 	if err != nil {
 		t.Fatal(err)
 	}
-	if res != nil && len(res.Failures) > 0 {
-		for _, f := range res.Failures {
-			t.Error(f)
+	if len(res) > 0 {
+		for _, delta := range res {
+			t.Error(delta)
 		}
 	}
 }
@@ -156,8 +159,8 @@ func TestTimeComparison(t *testing.T) {
 	if err != nil {
 		t.Error(err)
 	}
-	if len(res.Failures) > 0 {
-		t.Fatal(res.Failures)
+	if len(res) > 0 {
+		t.Fatal(res)
 	}
 }
 
@@ -197,19 +200,21 @@ func TestTarTime(t *testing.T) {
 		t.Fatal(err)
 	}
 
+	keywords := CollectUsedKeywords(dh)
+
 	// make sure "time" keyword works
-	_, err = Check(dir, dh, DefaultKeywords)
+	_, err = Check(dir, dh, keywords)
 	if err != nil {
 		t.Error(err)
 	}
 
 	// make sure tar_time wins
-	res, err := Check(dir, dh, append(DefaultKeywords, "tar_time"))
+	res, err := Check(dir, dh, append(keywords, "tar_time"))
 	if err != nil {
 		t.Error(err)
 	}
-	if len(res.Failures) > 0 {
-		t.Fatal(res.Failures)
+	if len(res) > 0 {
+		t.Fatal(res)
 	}
 }
 
@@ -254,8 +259,8 @@ func TestIgnoreComments(t *testing.T) {
 		t.Error(err)
 	}
 
-	if len(res.Failures) > 0 {
-		t.Fatal(res.Failures)
+	if len(res) > 0 {
+		t.Fatal(res)
 	}
 
 	// now change the spec to a comment that looks like an actual Entry but has
@@ -274,8 +279,8 @@ func TestIgnoreComments(t *testing.T) {
 		t.Error(err)
 	}
 
-	if len(res.Failures) > 0 {
-		t.Fatal(res.Failures)
+	if len(res) > 0 {
+		t.Fatal(res)
 	}
 }
 
@@ -309,7 +314,7 @@ func TestCheckNeedsEncoding(t *testing.T) {
 	if err != nil {
 		t.Fatal(err)
 	}
-	if len(res.Failures) > 0 {
-		t.Fatal(res.Failures)
+	if len(res) > 0 {
+		t.Fatal(res)
 	}
 }
diff --git a/tar_test.go b/tar_test.go
index 643dfd4..f11763f 100644
--- a/tar_test.go
+++ b/tar_test.go
@@ -30,7 +30,7 @@ func ExampleStreamer() {
 	if err != nil {
 		// handle error ...
 	}
-	if len(res.Failures) > 0 {
+	if len(res) > 0 {
 		// handle validation issue ...
 	}
 }
@@ -104,43 +104,17 @@ func TestTar(t *testing.T) {
 	}
 
 	res, err := TarCheck(tdh, dh, append(DefaultKeywords, "sha1"))
-
 	if err != nil {
 		t.Fatal(err)
 	}
 
 	// print any failures, and then call t.Fatal once all failures/extra/missing
 	// are outputted
-	if res != nil {
-		errors := ""
-		switch {
-		case len(res.Failures) > 0:
-			for _, f := range res.Failures {
-				t.Errorf("%s\n", f)
-			}
-			errors += "Keyword validation errors\n"
-		case len(res.Missing) > 0:
-			for _, m := range res.Missing {
-				missingpath, err := m.Path()
-				if err != nil {
-					t.Fatal(err)
-				}
-				t.Errorf("Missing file: %s\n", missingpath)
-			}
-			errors += "Missing files not expected for this test\n"
-		case len(res.Extra) > 0:
-			for _, e := range res.Extra {
-				extrapath, err := e.Path()
-				if err != nil {
-					t.Fatal(err)
-				}
-				t.Errorf("Extra file: %s\n", extrapath)
-			}
-			errors += "Extra files not expected for this test\n"
-		}
-		if errors != "" {
-			t.Fatal(errors)
+	if len(res) > 0 {
+		for _, delta := range res {
+			t.Error(delta)
 		}
+		t.Fatal("unexpected errors")
 	}
 }
 
@@ -176,16 +150,11 @@ func TestArchiveCreation(t *testing.T) {
 		t.Fatal(err)
 	}
 
-	if res != nil {
-		for _, f := range res.Failures {
-			t.Errorf(f.String())
-		}
-		for _, e := range res.Extra {
-			t.Errorf("%s extra not expected", e.Name)
-		}
-		for _, m := range res.Missing {
-			t.Errorf("%s missing not expected", m.Name)
+	if len(res) > 0 {
+		for _, delta := range res {
+			t.Error(delta)
 		}
+		t.Fatal("unexpected errors")
 	}
 
 	// Test the tar manifest against itself
@@ -193,16 +162,11 @@ func TestArchiveCreation(t *testing.T) {
 	if err != nil {
 		t.Fatal(err)
 	}
-	if res != nil {
-		for _, f := range res.Failures {
-			t.Errorf(f.String())
-		}
-		for _, e := range res.Extra {
-			t.Errorf("%s extra not expected", e.Name)
-		}
-		for _, m := range res.Missing {
-			t.Errorf("%s missing not expected", m.Name)
+	if len(res) > 0 {
+		for _, delta := range res {
+			t.Error(delta)
 		}
+		t.Fatal("unexpected errors")
 	}
 
 	// Validate the directory manifest against the archive
@@ -214,16 +178,11 @@ func TestArchiveCreation(t *testing.T) {
 	if err != nil {
 		t.Fatal(err)
 	}
-	if res != nil {
-		for _, f := range res.Failures {
-			t.Errorf(f.String())
-		}
-		for _, e := range res.Extra {
-			t.Errorf("%s extra not expected", e.Name)
-		}
-		for _, m := range res.Missing {
-			t.Errorf("%s missing not expected", m.Name)
+	if len(res) > 0 {
+		for _, delta := range res {
+			t.Error(delta)
 		}
+		t.Fatal("unexpected errors")
 	}
 }
 
@@ -257,16 +216,11 @@ func TestTreeTraversal(t *testing.T) {
 	if err != nil {
 		t.Fatal(err)
 	}
-	if res != nil {
-		for _, f := range res.Failures {
-			t.Errorf(f.String())
-		}
-		for _, e := range res.Extra {
-			t.Errorf("%s extra not expected", e.Name)
-		}
-		for _, m := range res.Missing {
-			t.Errorf("%s missing not expected", m.Name)
+	if len(res) > 0 {
+		for _, delta := range res {
+			t.Error(delta)
 		}
+		t.Fatal("unexpected errors")
 	}
 
 	// top-level "." directory will contain contents of traversal.tar
@@ -274,9 +228,18 @@ func TestTreeTraversal(t *testing.T) {
 	if err != nil {
 		t.Fatal(err)
 	}
-	if res != nil {
-		for _, f := range res.Failures {
-			t.Errorf(f.String())
+	if len(res) > 0 {
+		var failed bool
+		for _, delta := range res {
+			// We only care about missing or modified files.
+			// The original test was written using the old check code.
+			if delta.Type() != Extra {
+				failed = true
+				t.Error(delta)
+			}
+		}
+		if failed {
+			t.Fatal("unexpected errors")
 		}
 	}
 
@@ -303,9 +266,18 @@ func TestTreeTraversal(t *testing.T) {
 	if err != nil {
 		t.Fatal(err)
 	}
-	if res != nil {
-		for _, f := range res.Failures {
-			t.Errorf(f.String())
+	if len(res) > 0 {
+		var failed bool
+		for _, delta := range res {
+			// We only care about missing or modified files.
+			// The original test was written using the old check code.
+			if delta.Type() != Extra {
+				failed = true
+				t.Error(delta)
+			}
+		}
+		if failed {
+			t.Fatal("unexpected errors")
 		}
 	}
 }

From 3bfdecf46734a6fc90d2d185bd334f42e31e809d Mon Sep 17 00:00:00 2001
From: Aleksa Sarai <asarai@suse.de>
Date: Thu, 10 Nov 2016 12:41:20 +1100
Subject: [PATCH 6/6] gomtree: add special cases for tar generation checking

Due to several unsolveable problems in tar generation, such as the
size=... keyword, we have to special case quite a few things in the
checking code. We might want to move this to mtree properly (but I'm
hesitant about ignoring errors that only happen for tar DHes).

Signed-off-by: Aleksa Sarai <asarai@suse.de>
---
 cmd/gomtree/main.go | 87 +++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 87 insertions(+)

diff --git a/cmd/gomtree/main.go b/cmd/gomtree/main.go
index 8e7e941..dfac662 100644
--- a/cmd/gomtree/main.go
+++ b/cmd/gomtree/main.go
@@ -62,6 +62,90 @@ var formats = map[string]func([]mtree.InodeDelta) string{
 	},
 }
 
+// isDirEntry returns wheter an mtree.Entry describes a directory.
+func isDirEntry(e mtree.Entry) bool {
+	for _, kw := range e.Keywords {
+		kv := mtree.KeyVal(kw)
+		if kv.Keyword() == "type" {
+			return kv.Value() == "dir"
+		}
+	}
+	// Shouldn't be reached.
+	return false
+}
+
+// filterMissingKeywords is a fairly annoying hack to get around the fact that
+// tar archive manifest generation has certain unsolveable problems regarding
+// certain keywords. For example, the size=... keyword cannot be implemented
+// for directories in a tar archive (which causes Missing errors for that
+// keyword).
+//
+// This function just removes all instances of Missing errors for keywords.
+// This makes certain assumptions about the type of issues tar archives have.
+// Only call this on tar archive manifest comparisons.
+func filterMissingKeywords(diffs []mtree.InodeDelta) []mtree.InodeDelta {
+	newDiffs := []mtree.InodeDelta{}
+loop:
+	for _, diff := range diffs {
+		if diff.Type() == mtree.Modified {
+			// We only apply this filtering to directories.
+			// NOTE: This will probably break if someone drops the size keyword.
+			if isDirEntry(*diff.Old()) || isDirEntry(*diff.New()) {
+				// If this applies to '.' then we just filter everything
+				// (meaning we remove this entry). This is because note all tar
+				// archives include a '.' entry. Which makes checking this not
+				// practical.
+				if diff.Path() == "." {
+					continue
+				}
+
+				// Only filter out the size keyword.
+				// NOTE: This currently takes advantage of the fact the
+				//       diff.Diff() returns the actual slice to diff.keys.
+				keys := diff.Diff()
+				for idx, k := range keys {
+					// Delete the key if it's "size". Unfortunately in Go you
+					// can't delete from a slice without reassigning it. So we
+					// just overwrite it with the last value.
+					if k.Name() == "size" {
+						if len(keys) < 2 {
+							continue loop
+						}
+						keys[idx] = keys[len(keys)-1]
+					}
+				}
+			}
+		}
+
+		// If we got here, append to the new set.
+		newDiffs = append(newDiffs, diff)
+	}
+	return newDiffs
+}
+
+// isTarSpec returns whether the spec provided came from the tar generator.
+// This takes advantage of an unsolveable problem in tar generation.
+func isTarSpec(spec *mtree.DirectoryHierarchy) bool {
+	// Find a directory and check whether it's missing size=...
+	// NOTE: This will definitely break if someone drops the size=... keyword.
+	for _, e := range spec.Entries {
+		if !isDirEntry(e) {
+			continue
+		}
+
+		for _, kw := range e.Keywords {
+			kv := mtree.KeyVal(kw)
+			if kv.Keyword() == "size" {
+				return false
+			}
+		}
+		return true
+	}
+
+	// Should never be reached.
+	return false
+}
+
 func main() {
 	flag.Parse()
 
@@ -327,6 +411,9 @@ func main() {
 			return
 		}
 		if res != nil {
+			if isTarSpec(specDh) || *flTar != "" {
+				res = filterMissingKeywords(res)
+			}
 			if len(res) > 0 {
 				defer os.Exit(1)
 			}