21 changed files with 820 additions and 648 deletions
@ -3,10 +3,10 @@ BUILD := gomtree
CWD := $(shell pwd)
SOURCE_FILES := $(shell find . -type f -name "*.go")
default: validation build
default: build validation
.PHONY: validation
validation: test lint vet .cli.test
validation: .test .lint .vet .cli.test
.PHONY: test
test: .test
@ -6,16 +6,20 @@ package mtree
// This is equivalent to creating a new DirectoryHierarchy with Walk(root, nil,
// keywords) and then doing a Compare(dh, newDh, keywords).
func Check(root string, dh *DirectoryHierarchy, keywords []string) ([]InodeDelta, error) {
func Check(root string, dh *DirectoryHierarchy, keywords []Keyword) ([]InodeDelta, error) {
if keywords == nil {
keywords = CollectUsedKeywords(dh)
used := dh.UsedKeywords()
newDh, err := Walk(root, nil, used)
if err != nil {
return nil, err
return Compare(dh, newDh, used)
newDh, err := Walk(root, nil, keywords)
if err != nil {
return nil, err
// TODO: Handle tar_time, if necessary.
return Compare(dh, newDh, keywords)
@ -23,9 +27,9 @@ func Check(root string, dh *DirectoryHierarchy, keywords []string) ([]InodeDelta
// TarCheck is the tar equivalent of checking a file hierarchy spec against a
// tar stream to determine if files have been changed. This is precisely
// equivalent to Compare(dh, tarDH, keywords).
func TarCheck(tarDH, dh *DirectoryHierarchy, keywords []string) ([]InodeDelta, error) {
func TarCheck(tarDH, dh *DirectoryHierarchy, keywords []Keyword) ([]InodeDelta, error) {
if keywords == nil {
keywords = CollectUsedKeywords(dh)
return Compare(dh, tarDH, dh.UsedKeywords())
return Compare(dh, tarDH, keywords)
@ -69,14 +69,14 @@ func TestCheckKeywords(t *testing.T) {
if len(res) != 1 {
t.Errorf("expected to get 1 delta on changed mtimes, but did not")
t.Fatal("expected to get 1 delta on changed mtimes, but did not")
if res[0].Type() != Modified {
t.Errorf("expected to get modified delta on changed mtimes, but did not")
// Check again, but only sha1 and mode. This ought to pass.
res, err = Check(dir, dh, []string{"sha1", "mode"})
res, err = Check(dir, dh, []Keyword{"sha1", "mode"})
if err != nil {
@ -200,7 +200,7 @@ func TestTarTime(t *testing.T) {
keywords := CollectUsedKeywords(dh)
keywords := dh.UsedKeywords()
// make sure "time" keyword works
_, err = Check(dir, dh, keywords)
@ -29,6 +29,251 @@ var (
flVersion = flag.Bool("version", false, "display the version of this tool")
func main() {
// so that defers cleanly exec
if err := app(); err != nil {
func app() error {
if *flDebug {
os.Setenv("DEBUG", "1")
if *flVersion {
fmt.Printf("%s :: %s\n", mtree.AppName, mtree.Version)
return nil
// -list-keywords
if *flListKeywords {
fmt.Println("Available keywords:")
for k := range mtree.KeywordFuncs {
fmt.Print(" ")
if mtree.Keyword(k).Default() {
fmt.Print(" (default)")
if !mtree.Keyword(k).Bsd() {
fmt.Print(" (not upstream)")
return nil
// --result-format
formatFunc, ok := formats[*flResultFormat]
if !ok {
return fmt.Errorf("invalid output format: %s", *flResultFormat)
var (
err error
tmpKeywords []mtree.Keyword
currentKeywords []mtree.Keyword
// -k <keywords>
if *flUseKeywords != "" {
tmpKeywords = splitKeywordsArg(*flUseKeywords)
if !mtree.InKeywordSlice("type", tmpKeywords) {
tmpKeywords = append([]mtree.Keyword{"type"}, tmpKeywords...)
} else {
if *flTar != "" {
tmpKeywords = mtree.DefaultTarKeywords[:]
} else {
tmpKeywords = mtree.DefaultKeywords[:]
// -K <keywords>
if *flAddKeywords != "" {
for _, kw := range splitKeywordsArg(*flAddKeywords) {
if !mtree.InKeywordSlice(kw, tmpKeywords) {
tmpKeywords = append(tmpKeywords, kw)
// -bsd-keywords
if *flBsdKeywords {
for _, k := range tmpKeywords {
if mtree.Keyword(k).Bsd() {
currentKeywords = append(currentKeywords, k)
} else {
fmt.Fprintf(os.Stderr, "INFO: ignoring %q as it is not an upstream keyword\n", k)
} else {
currentKeywords = tmpKeywords
// Check mutual exclusivity of keywords.
// TODO(cyphar): Abstract this inside keywords.go.
if mtree.InKeywordSlice("tar_time", currentKeywords) && mtree.InKeywordSlice("time", currentKeywords) {
return fmt.Errorf("tar_time and time are mutually exclusive keywords")
// If we're doing a comparison, we always are comparing between a spec and
// state DH. If specDh is nil, we are generating a new one.
var (
specDh *mtree.DirectoryHierarchy
stateDh *mtree.DirectoryHierarchy
specKeywords []mtree.Keyword
// -f <file>
if *flFile != "" && !*flCreate {
// load the hierarchy, if we're not creating a new spec
fh, err := os.Open(*flFile)
if err != nil {
return err
specDh, err = mtree.ParseSpec(fh)
if err != nil {
return err
// We can't check against more fields than in the specKeywords list, so
// currentKeywords can only have a subset of specKeywords.
specKeywords = specDh.UsedKeywords()
// -list-used
if *flListUsedKeywords {
if specDh == nil {
return fmt.Errorf("no specification provided. please provide a validation manifest")
if *flResultFormat == "json" {
// if they're asking for json, give it to them
data := map[string][]mtree.Keyword{*flFile: specKeywords}
buf, err := json.MarshalIndent(data, "", " ")
if err != nil {
return err
} else {
fmt.Printf("Keywords used in [%s]:\n", *flFile)
for _, kw := range specKeywords {
fmt.Printf(" %s", kw)
if _, ok := mtree.KeywordFuncs[kw]; !ok {
fmt.Print(" (unsupported)")
return nil
if specKeywords != nil {
// If we didn't actually change the set of keywords, we can just use specKeywords.
if *flUseKeywords == "" && *flAddKeywords == "" {
currentKeywords = specKeywords
for _, keyword := range currentKeywords {
// As always, time is a special case.
// TODO: Fix that.
if (keyword == "time" && mtree.InKeywordSlice("tar_time", specKeywords)) || (keyword == "tar_time" && mtree.InKeywordSlice("time", specKeywords)) {
// -p and -T are mutually exclusive
if *flPath != "" && *flTar != "" {
return fmt.Errorf("options -T and -p are mutually exclusive")
// -p <path>
var rootPath = "."
if *flPath != "" {
rootPath = *flPath
// -T <tar file>
if *flTar != "" {
var input io.Reader
if *flTar == "-" {
input = os.Stdin
} else {
fh, err := os.Open(*flTar)
if err != nil {
return err
defer fh.Close()
input = fh
ts := mtree.NewTarStreamer(input, currentKeywords)
if _, err := io.Copy(ioutil.Discard, ts); err != nil && err != io.EOF {
return err
if err := ts.Close(); err != nil {
return err
var err error
stateDh, err = ts.Hierarchy()
if err != nil {
return err
} else {
// with a root directory
stateDh, err = mtree.Walk(rootPath, nil, currentKeywords)
if err != nil {
return err
// -c
if *flCreate {
fh := os.Stdout
if *flFile != "" {
fh, err = os.Create(*flFile)
if err != nil {
return err
// output stateDh
return nil
// This is a validation.
if specDh != nil && stateDh != nil {
var res []mtree.InodeDelta
res, err = mtree.Compare(specDh, stateDh, currentKeywords)
if err != nil {
return err
if res != nil {
if isTarSpec(specDh) || *flTar != "" {
res = filterMissingKeywords(res)
//if len(res) > 0 {
//return fmt.Errorf("unexpected missing keywords: %d", len(res))
out := formatFunc(res)
if _, err := os.Stdout.Write([]byte(out)); err != nil {
return err
} else {
return fmt.Errorf("neither validating or creating a manifest. Please provide additional arguments")
return nil
var formats = map[string]func([]mtree.InodeDelta) string{
// Outputs the errors in the BSD format.
"bsd": func(d []mtree.InodeDelta) string {
@ -146,301 +391,10 @@ func isTarSpec(spec *mtree.DirectoryHierarchy) bool {
return false
func main() {
if *flDebug {
os.Setenv("DEBUG", "1")
// so that defers cleanly exec
// TODO: Switch everything to being inside a function, to remove the need for isErr.
var isErr bool
defer func() {
if isErr {
if *flVersion {
fmt.Printf("%s :: %s\n", os.Args[0], mtree.Version)
// -list-keywords
if *flListKeywords {
fmt.Println("Available keywords:")
for k := range mtree.KeywordFuncs {
fmt.Print(" ")
if mtree.Keyword(k).Default() {
fmt.Print(" (default)")
if !mtree.Keyword(k).Bsd() {
fmt.Print(" (not upstream)")
// --result-format
formatFunc, ok := formats[*flResultFormat]
if !ok {
log.Printf("invalid output format: %s", *flResultFormat)
isErr = true
var (
err error
tmpKeywords []string
currentKeywords []string
// -k <keywords>
if *flUseKeywords != "" {
tmpKeywords = splitKeywordsArg(*flUseKeywords)
if !inSlice("type", tmpKeywords) {
tmpKeywords = append([]string{"type"}, tmpKeywords...)
} else {
if *flTar != "" {
tmpKeywords = mtree.DefaultTarKeywords[:]
} else {
tmpKeywords = mtree.DefaultKeywords[:]
// -K <keywords>
if *flAddKeywords != "" {
for _, kw := range splitKeywordsArg(*flAddKeywords) {
if !inSlice(kw, tmpKeywords) {
tmpKeywords = append(tmpKeywords, kw)
// -bsd-keywords
if *flBsdKeywords {
for _, k := range tmpKeywords {
if mtree.Keyword(k).Bsd() {
currentKeywords = append(currentKeywords, k)
} else {
fmt.Fprintf(os.Stderr, "INFO: ignoring %q as it is not an upstream keyword\n", k)
} else {
currentKeywords = tmpKeywords
// Check mutual exclusivity of keywords.
// TODO(cyphar): Abstract this inside keywords.go.
if inSlice("tar_time", currentKeywords) && inSlice("time", currentKeywords) {
log.Printf("tar_time and time are mutually exclusive keywords")
isErr = true
// If we're doing a comparison, we always are comparing between a spec and
// state DH. If specDh is nil, we are generating a new one.
var (
specDh *mtree.DirectoryHierarchy
stateDh *mtree.DirectoryHierarchy
specKeywords []string
// -f <file>
if *flFile != "" && !*flCreate {
// load the hierarchy, if we're not creating a new spec
fh, err := os.Open(*flFile)
if err != nil {
isErr = true
specDh, err = mtree.ParseSpec(fh)
if err != nil {
isErr = true
// We can't check against more fields than in the specKeywords list, so
// currentKeywords can only have a subset of specKeywords.
specKeywords = mtree.CollectUsedKeywords(specDh)
// -list-used
if *flListUsedKeywords {
if specDh == nil {
log.Println("no specification provided. please provide a validation manifest")
isErr = true
if *flResultFormat == "json" {
// if they're asking for json, give it to them
data := map[string][]string{*flFile: specKeywords}
buf, err := json.MarshalIndent(data, "", " ")
if err != nil {
defer os.Exit(1)
isErr = true
} else {
fmt.Printf("Keywords used in [%s]:\n", *flFile)
for _, kw := range specKeywords {
fmt.Printf(" %s", kw)
if _, ok := mtree.KeywordFuncs[kw]; !ok {
fmt.Print(" (unsupported)")
if specKeywords != nil {
// If we didn't actually change the set of keywords, we can just use specKeywords.
if *flUseKeywords == "" && *flAddKeywords == "" {
currentKeywords = specKeywords
for _, keyword := range currentKeywords {
// As always, time is a special case.
// TODO: Fix that.
if (keyword == "time" && inSlice("tar_time", specKeywords)) || (keyword == "tar_time" && inSlice("time", specKeywords)) {
if !inSlice(keyword, specKeywords) {
log.Printf("cannot verify keywords not in mtree specification: %s\n", keyword)
isErr = true
if isErr {
// -p and -T are mutually exclusive
if *flPath != "" && *flTar != "" {
log.Println("options -T and -p are mutually exclusive")
isErr = true
// -p <path>
var rootPath = "."
if *flPath != "" {
rootPath = *flPath
// -T <tar file>
if *flTar != "" {
var input io.Reader
if *flTar == "-" {
input = os.Stdin
} else {
fh, err := os.Open(*flTar)
if err != nil {
isErr = true
defer fh.Close()
input = fh
ts := mtree.NewTarStreamer(input, currentKeywords)
if _, err := io.Copy(ioutil.Discard, ts); err != nil && err != io.EOF {
isErr = true
if err := ts.Close(); err != nil {
isErr = true
var err error
stateDh, err = ts.Hierarchy()
if err != nil {
isErr = true
} else {
// with a root directory
stateDh, err = mtree.Walk(rootPath, nil, currentKeywords)
if err != nil {
isErr = true
// -c
if *flCreate {
fh := os.Stdout
if *flFile != "" {
fh, err = os.Create(*flFile)
if err != nil {
isErr = true
// output stateDh
// This is a validation.
if specDh != nil && stateDh != nil {
var res []mtree.InodeDelta
res, err = mtree.Compare(specDh, stateDh, currentKeywords)
if err != nil {
isErr = true
if res != nil {
if isTarSpec(specDh) || *flTar != "" {
res = filterMissingKeywords(res)
if len(res) > 0 {
defer os.Exit(1)
out := formatFunc(res)
if _, err := os.Stdout.Write([]byte(out)); err != nil {
isErr = true
} else {
log.Println("neither validating or creating a manifest. Please provide additional arguments")
isErr = true
func splitKeywordsArg(str string) []mtree.Keyword {
keywords := []mtree.Keyword{}
for _, kw := range strings.Fields(strings.Replace(str, ",", " ", -1)) {
keywords = append(keywords, mtree.KeywordSynonym(kw))
func splitKeywordsArg(str string) []string {
return strings.Fields(strings.Replace(str, ",", " ", -1))
func inSlice(a string, list []string) bool {
for _, b := range list {
if b == a {
return true
return false
return keywords
@ -123,7 +123,7 @@ func (i InodeDelta) String() string {
// returned with InodeDelta.Diff().
type KeyDelta struct {
diff DifferenceType
name string
name Keyword
old string
new string
@ -136,7 +136,7 @@ func (k KeyDelta) Type() DifferenceType {
// Name returns the name (the key) of the KeyDeltaVal entry in the
// DirectoryHierarchy.
func (k KeyDelta) Name() string {
func (k KeyDelta) Name() Keyword {
@ -164,7 +164,7 @@ func (k KeyDelta) New() *string {
func (k KeyDelta) MarshalJSON() ([]byte, error) {
return json.Marshal(struct {
Type DifferenceType `json:"type"`
Name string `json:"name"`
Name Keyword `json:"name"`
Old string `json:"old"`
New string `json:"new"`
@ -184,11 +184,17 @@ func compareEntry(oldEntry, newEntry Entry) ([]KeyDelta, error) {
New *KeyVal
diffs := map[string]*stateT{}
diffs := map[Keyword]*stateT{}
oldKeys := oldEntry.AllKeys()
newKeys := newEntry.AllKeys()
// Fill the map with the old keys first.
for _, kv := range oldEntry.AllKeys() {
for _, kv := range oldKeys {
key := kv.Keyword()
// only add this diff if the new keys has this keyword
if key != "tar_time" && key != "time" && HasKeyword(newKeys, key) == emptyKV {
// Cannot take &kv because it's the iterator.
copy := new(KeyVal)
@ -202,8 +208,12 @@ func compareEntry(oldEntry, newEntry Entry) ([]KeyDelta, error) {
// Then fill the new keys.
for _, kv := range newEntry.AllKeys() {
for _, kv := range newKeys {
key := kv.Keyword()
// only add this diff if the old keys has this keyword
if key != "tar_time" && key != "time" && HasKeyword(oldKeys, key) == emptyKV {
// Cannot take &kv because it's the iterator.
copy := new(KeyVal)
@ -218,7 +228,7 @@ func compareEntry(oldEntry, newEntry Entry) ([]KeyDelta, error) {
// We need a full list of the keys so we can deal with different keyvalue
// orderings.
var kws []string
var kws []Keyword
for kw := range diffs {
kws = append(kws, kw)
@ -226,7 +236,7 @@ func compareEntry(oldEntry, newEntry Entry) ([]KeyDelta, error) {
// If both tar_time and time were specified in the set of keys, we have to
// mess with the diffs. This is an unfortunate side-effect of tar archives.
// TODO(cyphar): This really should be abstracted inside keywords.go
if inSlice("tar_time", kws) && inSlice("time", kws) {
if InKeywordSlice("tar_time", kws) && InKeywordSlice("time", kws) {
// Delete "time".
timeStateT := diffs["time"]
delete(diffs, "time")
@ -312,19 +322,13 @@ func compareEntry(oldEntry, newEntry Entry) ([]KeyDelta, error) {
// NB: The order of the parameters matters (old, new) because Extra and
// Missing are considered as different discrepancy types.
func Compare(oldDh, newDh *DirectoryHierarchy, keys []string) ([]InodeDelta, error) {
func Compare(oldDh, newDh *DirectoryHierarchy, keys []Keyword) ([]InodeDelta, error) {
// Represents the new and old states for an entry.
type stateT struct {
Old *Entry
New *Entry
// Make dealing with the keys mapping easier.
keySet := map[string]struct{}{}
for _, key := range keys {
keySet[key] = struct{}{}
// To deal with different orderings of the entries, use a path-keyed
// map to make sure we don't start comparing unrelated entries.
diffs := map[string]*stateT{}
@ -405,7 +409,7 @@ func Compare(oldDh, newDh *DirectoryHierarchy, keys []string) ([]InodeDelta, err
if keys != nil {
var filterChanged []KeyDelta
for _, keyDiff := range changed {
if _, ok := keySet[]; ok {
if InKeywordSlice(, keys) {
filterChanged = append(filterChanged, keyDiff)
@ -325,7 +325,7 @@ func TestCompareKeys(t *testing.T) {
// Compare.
diffs, err := Compare(old, new, []string{"size"})
diffs, err := Compare(old, new, []Keyword{"size"})
if err != nil {
@ -21,7 +21,7 @@ type Entry struct {
Pos int // order in the spec
Raw string // file or directory name
Name string // file or directory name
Keywords []string // TODO(vbatts) maybe a keyword typed set of values?
Keywords []KeyVal // TODO(vbatts) maybe a keyword typed set of values?
Type EntryType
@ -94,23 +94,20 @@ func (e Entry) String() string {
if e.Type == DotDotType {
return e.Name
if e.Type == SpecialType || e.Type == FullType || inSlice("type=dir", e.Keywords) {
return fmt.Sprintf("%s %s", e.Name, strings.Join(e.Keywords, " "))
if e.Type == SpecialType || e.Type == FullType || inKeyValSlice("type=dir", e.Keywords) {
return fmt.Sprintf("%s %s", e.Name, strings.Join(KeyValToString(e.Keywords), " "))
return fmt.Sprintf(" %s %s", e.Name, strings.Join(e.Keywords, " "))
return fmt.Sprintf(" %s %s", e.Name, strings.Join(KeyValToString(e.Keywords), " "))
// AllKeys returns the full set of KeyVals for the given entry, based on the
// AllKeys returns the full set of KeyVal for the given entry, based on the
// /set keys as well as the entry-local keys. Entry-local keys always take
// precedence.
func (e Entry) AllKeys() KeyVals {
var kv KeyVals
func (e Entry) AllKeys() []KeyVal {
if e.Set != nil {
kv = MergeSet(e.Set.Keywords, e.Keywords)
} else {
kv = NewKeyVals(e.Keywords)
return MergeKeyValSet(e.Set.Keywords, e.Keywords)
return kv
return e.Keywords
// EntryType are the formats of lines in an mtree spec file
@ -26,26 +26,23 @@ func (dh DirectoryHierarchy) WriteTo(w io.Writer) (n int64, err error) {
return sum, nil
// CollectUsedKeywords collects and returns all the keywords used in a
// UsedKeywords collects and returns all the keywords used in a
// a DirectoryHierarchy
func CollectUsedKeywords(dh *DirectoryHierarchy) []string {
if dh != nil {
usedkeywords := []string{}
for _, e := range dh.Entries {
switch e.Type {
case FullType, RelativeType, SpecialType:
if e.Type != SpecialType || e.Name == "/set" {
kvs := e.Keywords
for _, kv := range kvs {
kw := KeyVal(kv).Keyword()
if !inSlice(kw, usedkeywords) {
usedkeywords = append(usedkeywords, kw)
func (dh DirectoryHierarchy) UsedKeywords() []Keyword {
usedkeywords := []Keyword{}
for _, e := range dh.Entries {
switch e.Type {
case FullType, RelativeType, SpecialType:
if e.Type != SpecialType || e.Name == "/set" {
kvs := e.Keywords
for _, kv := range kvs {
kw := KeyVal(kv).Keyword()
if !InKeywordSlice(kw, usedkeywords) {
usedkeywords = append(usedkeywords, KeywordSynonym(string(kw)))
return usedkeywords
return nil
return usedkeywords
Normal file
Normal file
@ -0,0 +1,38 @@
package mtree
import (
var checklist = []struct {
blob string
set []Keyword
{blob: `
# machine: bananaboat
# tree: .git
# date: Wed Nov 16 14:54:17 2016
# .
/set type=file nlink=1 mode=0664 uid=1000 gid=100
. size=4096 type=dir mode=0755 nlink=8 time=1479326055.423853146
.COMMIT_EDITMSG.un~ size=1006 mode=0644 time=1479325423.450468662 sha1digest=dead0face
.TAG_EDITMSG.un~ size=1069 mode=0600 time=1471362316.801317529 sha256digest=dead0face
`, set: []Keyword{"size", "mode", "time", "sha256digest"}},
func TestUsedKeywords(t *testing.T) {
for i, item := range checklist {
dh, err := ParseSpec(strings.NewReader(item.blob))
if err != nil {
used := dh.UsedKeywords()
for _, k := range item.set {
if !InKeywordSlice(k, used) {
t.Errorf("%d: expected to find %q in %q", i, k, used)
Normal file
Normal file
@ -0,0 +1,168 @@
package mtree
import (
// KeywordFunc is the type of a function called on each file to be included in
// a DirectoryHierarchy, that will produce the string output of the keyword to
// be included for the file entry. Otherwise, empty string.
// io.Reader `r` is to the file stream for the file payload. While this
// function takes an io.Reader, the caller needs to reset it to the beginning
// for each new KeywordFunc
type KeywordFunc func(path string, info os.FileInfo, r io.Reader) (KeyVal, error)
var (
// KeywordFuncs is the map of all keywords (and the functions to produce them)
KeywordFuncs = map[Keyword]KeywordFunc{
"size": sizeKeywordFunc, // The size, in bytes, of the file
"type": typeKeywordFunc, // The type of the file
"time": timeKeywordFunc, // The last modification time of the file
"link": linkKeywordFunc, // The target of the symbolic link when type=link
"uid": uidKeywordFunc, // The file owner as a numeric value
"gid": gidKeywordFunc, // The file group as a numeric value
"nlink": nlinkKeywordFunc, // The number of hard links the file is expected to have
"uname": unameKeywordFunc, // The file owner as a symbolic name
"mode": modeKeywordFunc, // The current file's permissions as a numeric (octal) or symbolic value
"cksum": cksumKeywordFunc, // The checksum of the file using the default algorithm specified by the cksum(1) utility
"md5": hasherKeywordFunc("md5digest", md5.New), // The MD5 message digest of the file
"md5digest": hasherKeywordFunc("md5digest", md5.New), // A synonym for `md5`
"rmd160": hasherKeywordFunc("ripemd160digest", ripemd160.New), // The RIPEMD160 message digest of the file
"rmd160digest": hasherKeywordFunc("ripemd160digest", ripemd160.New), // A synonym for `rmd160`
"ripemd160digest": hasherKeywordFunc("ripemd160digest", ripemd160.New), // A synonym for `rmd160`
"sha1": hasherKeywordFunc("sha1digest", sha1.New), // The SHA1 message digest of the file
"sha1digest": hasherKeywordFunc("sha1digest", sha1.New), // A synonym for `sha1`
"sha256": hasherKeywordFunc("sha256digest", sha256.New), // The SHA256 message digest of the file
"sha256digest": hasherKeywordFunc("sha256digest", sha256.New), // A synonym for `sha256`
"sha384": hasherKeywordFunc("sha384digest", sha512.New384), // The SHA384 message digest of the file
"sha384digest": hasherKeywordFunc("sha384digest", sha512.New384), // A synonym for `sha384`
"sha512": hasherKeywordFunc("sha512digest", sha512.New), // The SHA512 message digest of the file
"sha512digest": hasherKeywordFunc("sha512digest", sha512.New), // A synonym for `sha512`
"flags": flagsKeywordFunc, // NOTE: this is a noop, but here to support the presence of the "flags" keyword.
// This is not an upstreamed keyword, but used to vary from "time", as tar
// archives do not store nanosecond precision. So comparing on "time" will
// be only seconds level accurate.
"tar_time": tartimeKeywordFunc, // The last modification time of the file, from a tar archive mtime
// This is not an upstreamed keyword, but a needed attribute for file validation.
// The pattern for this keyword key is prefixed by "xattr." followed by the extended attribute "namespace.key".
// The keyword value is the SHA1 digest of the extended attribute's value.
// In this way, the order of the keys does not matter, and the contents of the value is not revealed.
"xattr": xattrKeywordFunc,
"xattrs": xattrKeywordFunc,
var (
modeKeywordFunc = func(path string, info os.FileInfo, r io.Reader) (KeyVal, error) {
permissions := info.Mode().Perm()
if os.ModeSetuid&info.Mode() > 0 {
permissions |= (1 << 11)
if os.ModeSetgid&info.Mode() > 0 {
permissions |= (1 << 10)
if os.ModeSticky&info.Mode() > 0 {
permissions |= (1 << 9)
return KeyVal(fmt.Sprintf("mode=%#o", permissions)), nil
sizeKeywordFunc = func(path string, info os.FileInfo, r io.Reader) (KeyVal, error) {
if sys, ok := info.Sys().(*tar.Header); ok {
if sys.Typeflag == tar.TypeSymlink {
return KeyVal(fmt.Sprintf("size=%d", len(sys.Linkname))), nil
return KeyVal(fmt.Sprintf("size=%d", info.Size())), nil
cksumKeywordFunc = func(path string, info os.FileInfo, r io.Reader) (KeyVal, error) {
if !info.Mode().IsRegular() {
return emptyKV, nil
sum, _, err := cksum(r)
if err != nil {
return emptyKV, err
return KeyVal(fmt.Sprintf("cksum=%d", sum)), nil
hasherKeywordFunc = func(name string, newHash func() hash.Hash) KeywordFunc {
return func(path string, info os.FileInfo, r io.Reader) (KeyVal, error) {
if !info.Mode().IsRegular() {
return emptyKV, nil
h := newHash()
if _, err := io.Copy(h, r); err != nil {
return emptyKV, err
return KeyVal(fmt.Sprintf("%s=%x", KeywordSynonym(name), h.Sum(nil))), nil
tartimeKeywordFunc = func(path string, info os.FileInfo, r io.Reader) (KeyVal, error) {
return KeyVal(fmt.Sprintf("tar_time=%d.%9.9d", info.ModTime().Unix(), 0)), nil
timeKeywordFunc = func(path string, info os.FileInfo, r io.Reader) (KeyVal, error) {
tSec := info.ModTime().Unix()
tNano := info.ModTime().Nanosecond()
return KeyVal(fmt.Sprintf("time=%d.%9.9d", tSec, tNano)), nil
linkKeywordFunc = func(path string, info os.FileInfo, r io.Reader) (KeyVal, error) {
if sys, ok := info.Sys().(*tar.Header); ok {
if sys.Linkname != "" {
linkname, err := Vis(sys.Linkname)
if err != nil {
return emptyKV, err
return KeyVal(fmt.Sprintf("link=%s", linkname)), nil
return emptyKV, nil
if info.Mode()&os.ModeSymlink != 0 {
str, err := os.Readlink(path)
if err != nil {
return emptyKV, err
linkname, err := Vis(str)
if err != nil {
return emptyKV, err
return KeyVal(fmt.Sprintf("link=%s", linkname)), nil
return emptyKV, nil
typeKeywordFunc = func(path string, info os.FileInfo, r io.Reader) (KeyVal, error) {
if info.Mode().IsDir() {
return "type=dir", nil
if info.Mode().IsRegular() {
return "type=file", nil
if info.Mode()&os.ModeSocket != 0 {
return "type=socket", nil
if info.Mode()&os.ModeSymlink != 0 {
return "type=link", nil
if info.Mode()&os.ModeNamedPipe != 0 {
return "type=fifo", nil
if info.Mode()&os.ModeDevice != 0 {
if info.Mode()&os.ModeCharDevice != 0 {
return "type=char", nil
return "type=device", nil
return emptyKV, nil
@ -1,55 +1,97 @@
package mtree
import (
// KeywordFunc is the type of a function called on each file to be included in
// a DirectoryHierarchy, that will produce the string output of the keyword to
// be included for the file entry. Otherwise, empty string.
// io.Reader `r` is to the file stream for the file payload. While this
// function takes an io.Reader, the caller needs to reset it to the beginning
// for each new KeywordFunc
type KeywordFunc func(path string, info os.FileInfo, r io.Reader) (string, error)
// Keyword is the string name of a keyword, with some convenience functions for
// determining whether it is a default or bsd standard keyword.
type Keyword string
// Default returns whether this keyword is in the default set of keywords
func (k Keyword) Default() bool {
return inSlice(string(k), DefaultKeywords)
return InKeywordSlice(k, DefaultKeywords)
// Bsd returns whether this keyword is in the upstream FreeBSD mtree(8)
func (k Keyword) Bsd() bool {
return inSlice(string(k), BsdKeywords)
return InKeywordSlice(k, BsdKeywords)
// Synonym returns the canonical name for this keyword. This is provides the
// same functionality as KeywordSynonym()
func (k Keyword) Synonym() Keyword {
return KeywordSynonym(string(k))
// InKeywordSlice checks for the presence of `a` in `list`
func InKeywordSlice(a Keyword, list []Keyword) bool {
for _, b := range list {
if b == a {
return true
return false
func inKeyValSlice(a KeyVal, list []KeyVal) bool {
for _, b := range list {
if b == a {
return true
return false
// ToKeywords makes a list of Keyword from a list of string
func ToKeywords(list []string) []Keyword {
ret := make([]Keyword, len(list))
for i := range list {
ret[i] = Keyword(list[i])
return ret
// FromKeywords makes a list of string from a list of Keyword
func FromKeywords(list []Keyword) []string {
ret := make([]string, len(list))
for i := range list {
ret[i] = string(list[i])
return ret
// KeyValToString constructs a list of string from the list of KeyVal
func KeyValToString(list []KeyVal) []string {
ret := make([]string, len(list))
for i := range list {
ret[i] = string(list[i])
return ret
// StringToKeyVals constructs a list of KeyVal from the list of strings, like "keyword=value"
func StringToKeyVals(list []string) []KeyVal {
ret := make([]KeyVal, len(list))
for i := range list {
ret[i] = KeyVal(list[i])
return ret
// KeyVal is a "keyword=value"
type KeyVal string
// Keyword is the mapping to the available keywords
func (kv KeyVal) Keyword() string {
func (kv KeyVal) Keyword() Keyword {
if !strings.Contains(string(kv), "=") {
return ""
return Keyword("")
chunks := strings.SplitN(strings.TrimSpace(string(kv)), "=", 2)[0]
if !strings.Contains(chunks, ".") {
return chunks
return Keyword(chunks)
return strings.SplitN(chunks, ".", 2)[0]
return Keyword(strings.SplitN(chunks, ".", 2)[0])
// KeywordSuffix is really only used for xattr, as the keyword is a prefix to
@ -78,7 +120,7 @@ func (kv KeyVal) ChangeValue(newval string) string {
return fmt.Sprintf("%s=%s", kv.Keyword(), newval)
// KeyValEqual returns whether two KeyVals are equivalent. This takes
// KeyValEqual returns whether two KeyVal are equivalent. This takes
// care of certain odd cases such as tar_mtime, and should be used over
// using == comparisons directly unless you really know what you're
// doing.
@ -87,35 +129,49 @@ func KeyValEqual(a, b KeyVal) bool {
return a.Keyword() == b.Keyword() && a.Value() == b.Value()
// keywordSelector takes an array of "keyword=value" and filters out that only the set of words
func keywordSelector(keyval, words []string) []string {
retList := []string{}
// keyvalSelector takes an array of KeyVal ("keyword=value") and filters out that only the set of keywords
func keyvalSelector(keyval []KeyVal, keyset []Keyword) []KeyVal {
retList := []KeyVal{}
for _, kv := range keyval {
if inSlice(KeyVal(kv).Keyword(), words) {
if InKeywordSlice(kv.Keyword(), keyset) {
retList = append(retList, kv)
return retList
// NewKeyVals constructs a list of KeyVal from the list of strings, like "keyword=value"
func NewKeyVals(keyvals []string) KeyVals {
kvs := make(KeyVals, len(keyvals))
for i := range keyvals {
kvs[i] = KeyVal(keyvals[i])
func keyValDifference(this, that []KeyVal) []KeyVal {
if len(this) == 0 {
return that
return kvs
diff := []KeyVal{}
for _, kv := range this {
if !inKeyValSlice(kv, that) {
diff = append(diff, kv)
return diff
func keyValCopy(set []KeyVal) []KeyVal {
ret := make([]KeyVal, len(set))
for i := range set {
ret[i] = set[i]
return ret
// KeyVals is a list of KeyVal
type KeyVals []KeyVal
// Has the "keyword" present in the list of KeyVal, and returns the
// corresponding KeyVal, else an empty string.
func (kvs KeyVals) Has(keyword string) KeyVal {
for i := range kvs {
if kvs[i].Keyword() == keyword {
return kvs[i]
func Has(keyvals []KeyVal, keyword string) KeyVal {
return HasKeyword(keyvals, Keyword(keyword))
// HasKeyword the "keyword" present in the list of KeyVal, and returns the
// corresponding KeyVal, else an empty string.
func HasKeyword(keyvals []KeyVal, keyword Keyword) KeyVal {
for i := range keyvals {
if keyvals[i].Keyword() == keyword {
return keyvals[i]
return emptyKV
@ -125,20 +181,27 @@ var emptyKV = KeyVal("")
// MergeSet takes the current setKeyVals, and then applies the entryKeyVals
// such that the entry's values win. The union is returned.
func MergeSet(setKeyVals, entryKeyVals []string) KeyVals {
retList := NewKeyVals(append([]string{}, setKeyVals...))
eKVs := NewKeyVals(entryKeyVals)
seenKeywords := []string{}
func MergeSet(setKeyVals, entryKeyVals []string) []KeyVal {
retList := StringToKeyVals(setKeyVals)
eKVs := StringToKeyVals(entryKeyVals)
return MergeKeyValSet(retList, eKVs)
// MergeKeyValSet does a merge of the two sets of KeyVal, and the KeyVal of
// entryKeyVals win when there is a duplicate Keyword.
func MergeKeyValSet(setKeyVals, entryKeyVals []KeyVal) []KeyVal {
retList := keyValCopy(setKeyVals)
seenKeywords := []Keyword{}
for i := range retList {
word := retList[i].Keyword()
if ekv := eKVs.Has(word); ekv != emptyKV {
if ekv := HasKeyword(entryKeyVals, word); ekv != emptyKV {
retList[i] = ekv
seenKeywords = append(seenKeywords, word)
for i := range eKVs {
if !inSlice(eKVs[i].Keyword(), seenKeywords) {
retList = append(retList, eKVs[i])
for i := range entryKeyVals {
if !InKeywordSlice(entryKeyVals[i].Keyword(), seenKeywords) {
retList = append(retList, entryKeyVals[i])
return retList
@ -147,7 +210,7 @@ func MergeSet(setKeyVals, entryKeyVals []string) KeyVals {
var (
// DefaultKeywords has the several default keyword producers (uid, gid,
// mode, nlink, type, size, mtime)
DefaultKeywords = []string{
DefaultKeywords = []Keyword{
@ -160,7 +223,7 @@ var (
// DefaultTarKeywords has keywords that should be used when creating a manifest from
// an archive. Currently, evaluating the # of hardlinks has not been implemented yet
DefaultTarKeywords = []string{
DefaultTarKeywords = []Keyword{
@ -171,7 +234,7 @@ var (
// BsdKeywords is the set of keywords that is only in the upstream FreeBSD mtree
BsdKeywords = []string{
BsdKeywords = []Keyword{
"flags", // this one is really mostly BSD specific ...
@ -205,151 +268,36 @@ var (
// SetKeywords is the default set of keywords calculated for a `/set` SpecialType
SetKeywords = []string{
SetKeywords = []Keyword{
// KeywordFuncs is the map of all keywords (and the functions to produce them)
KeywordFuncs = map[string]KeywordFunc{
"size": sizeKeywordFunc, // The size, in bytes, of the file
"type": typeKeywordFunc, // The type of the file
"time": timeKeywordFunc, // The last modification time of the file
"link": linkKeywordFunc, // The target of the symbolic link when type=link
"uid": uidKeywordFunc, // The file owner as a numeric value
"gid": gidKeywordFunc, // The file group as a numeric value
"nlink": nlinkKeywordFunc, // The number of hard links the file is expected to have
"uname": unameKeywordFunc, // The file owner as a symbolic name
"mode": modeKeywordFunc, // The current file's permissions as a numeric (octal) or symbolic value
"cksum": cksumKeywordFunc, // The checksum of the file using the default algorithm specified by the cksum(1) utility
"md5": hasherKeywordFunc("md5digest", md5.New), // The MD5 message digest of the file
"md5digest": hasherKeywordFunc("md5digest", md5.New), // A synonym for `md5`
"rmd160": hasherKeywordFunc("ripemd160digest", ripemd160.New), // The RIPEMD160 message digest of the file
"rmd160digest": hasherKeywordFunc("ripemd160digest", ripemd160.New), // A synonym for `rmd160`
"ripemd160digest": hasherKeywordFunc("ripemd160digest", ripemd160.New), // A synonym for `rmd160`
"sha1": hasherKeywordFunc("sha1digest", sha1.New), // The SHA1 message digest of the file
"sha1digest": hasherKeywordFunc("sha1digest", sha1.New), // A synonym for `sha1`
"sha256": hasherKeywordFunc("sha256digest", sha256.New), // The SHA256 message digest of the file
"sha256digest": hasherKeywordFunc("sha256digest", sha256.New), // A synonym for `sha256`
"sha384": hasherKeywordFunc("sha384digest", sha512.New384), // The SHA384 message digest of the file
"sha384digest": hasherKeywordFunc("sha384digest", sha512.New384), // A synonym for `sha384`
"sha512": hasherKeywordFunc("sha512digest", sha512.New), // The SHA512 message digest of the file
"sha512digest": hasherKeywordFunc("sha512digest", sha512.New), // A synonym for `sha512`
"flags": flagsKeywordFunc, // NOTE: this is a noop, but here to support the presence of the "flags" keyword.
// This is not an upstreamed keyword, but used to vary from "time", as tar
// archives do not store nanosecond precision. So comparing on "time" will
// be only seconds level accurate.
"tar_time": tartimeKeywordFunc, // The last modification time of the file, from a tar archive mtime
// This is not an upstreamed keyword, but a needed attribute for file validation.
// The pattern for this keyword key is prefixed by "xattr." followed by the extended attribute "namespace.key".
// The keyword value is the SHA1 digest of the extended attribute's value.
// In this way, the order of the keys does not matter, and the contents of the value is not revealed.
"xattr": xattrKeywordFunc,
"xattrs": xattrKeywordFunc,
var (
modeKeywordFunc = func(path string, info os.FileInfo, r io.Reader) (string, error) {
permissions := info.Mode().Perm()
if os.ModeSetuid&info.Mode() > 0 {
permissions |= (1 << 11)
if os.ModeSetgid&info.Mode() > 0 {
permissions |= (1 << 10)
if os.ModeSticky&info.Mode() > 0 {
permissions |= (1 << 9)
return fmt.Sprintf("mode=%#o", permissions), nil
// KeywordSynonym returns the canonical name for keywords that have synonyms,
// and just returns the name provided if there is no synonym. In this way it
// ought to be safe to wrap any keyword name.
func KeywordSynonym(name string) Keyword {
var retname string
switch name {
case "md5":
retname = "md5digest"
case "rmd160":
retname = "ripemd160digest"
case "rmd160digest":
retname = "ripemd160digest"
case "sha1":
retname = "sha1digest"
case "sha256":
retname = "sha256digest"
case "sha384":
retname = "sha384digest"
case "sha512":
retname = "sha512digest"
case "xattrs":
retname = "xattr"
retname = name
sizeKeywordFunc = func(path string, info os.FileInfo, r io.Reader) (string, error) {
if sys, ok := info.Sys().(*tar.Header); ok {
if sys.Typeflag == tar.TypeSymlink {
return fmt.Sprintf("size=%d", len(sys.Linkname)), nil
return fmt.Sprintf("size=%d", info.Size()), nil
cksumKeywordFunc = func(path string, info os.FileInfo, r io.Reader) (string, error) {
if !info.Mode().IsRegular() {
return "", nil
sum, _, err := cksum(r)
if err != nil {
return "", err
return fmt.Sprintf("cksum=%d", sum), nil
hasherKeywordFunc = func(name string, newHash func() hash.Hash) KeywordFunc {
return func(path string, info os.FileInfo, r io.Reader) (string, error) {
if !info.Mode().IsRegular() {
return "", nil
h := newHash()
if _, err := io.Copy(h, r); err != nil {
return "", err
return fmt.Sprintf("%s=%x", name, h.Sum(nil)), nil
tartimeKeywordFunc = func(path string, info os.FileInfo, r io.Reader) (string, error) {
return fmt.Sprintf("tar_time=%d.%9.9d", info.ModTime().Unix(), 0), nil
timeKeywordFunc = func(path string, info os.FileInfo, r io.Reader) (string, error) {
tSec := info.ModTime().Unix()
tNano := info.ModTime().Nanosecond()
return fmt.Sprintf("time=%d.%9.9d", tSec, tNano), nil
linkKeywordFunc = func(path string, info os.FileInfo, r io.Reader) (string, error) {
if sys, ok := info.Sys().(*tar.Header); ok {
if sys.Linkname != "" {
linkname, err := Vis(sys.Linkname)
if err != nil {
return "", err
return fmt.Sprintf("link=%s", linkname), nil
return "", nil
if info.Mode()&os.ModeSymlink != 0 {
str, err := os.Readlink(path)
if err != nil {
return "", err
linkname, err := Vis(str)
if err != nil {
return "", err
return fmt.Sprintf("link=%s", linkname), nil
return "", nil
typeKeywordFunc = func(path string, info os.FileInfo, r io.Reader) (string, error) {
if info.Mode().IsDir() {
return "type=dir", nil
if info.Mode().IsRegular() {
return "type=file", nil
if info.Mode()&os.ModeSocket != 0 {
return "type=socket", nil
if info.Mode()&os.ModeSymlink != 0 {
return "type=link", nil
if info.Mode()&os.ModeNamedPipe != 0 {
return "type=fifo", nil
if info.Mode()&os.ModeDevice != 0 {
if info.Mode()&os.ModeCharDevice != 0 {
return "type=char", nil
return "type=device", nil
return "", nil
return Keyword(retname)
@ -12,46 +12,46 @@ import (
var (
flagsKeywordFunc = func(path string, info os.FileInfo, r io.Reader) (string, error) {
flagsKeywordFunc = func(path string, info os.FileInfo, r io.Reader) (KeyVal, error) {
// ideally this will pull in from here
return "", nil
return emptyKV, nil
unameKeywordFunc = func(path string, info os.FileInfo, r io.Reader) (string, error) {
unameKeywordFunc = func(path string, info os.FileInfo, r io.Reader) (KeyVal, error) {
if hdr, ok := info.Sys().(*tar.Header); ok {
return fmt.Sprintf("uname=%s", hdr.Uname), nil
return KeyVal(fmt.Sprintf("uname=%s", hdr.Uname)), nil
stat := info.Sys().(*syscall.Stat_t)
u, err := user.LookupId(fmt.Sprintf("%d", stat.Uid))
if err != nil {
return "", err
return emptyKV, err
return fmt.Sprintf("uname=%s", u.Username), nil
return KeyVal(fmt.Sprintf("uname=%s", u.Username)), nil
uidKeywordFunc = func(path string, info os.FileInfo, r io.Reader) (string, error) {
uidKeywordFunc = func(path string, info os.FileInfo, r io.Reader) (KeyVal, error) {
if hdr, ok := info.Sys().(*tar.Header); ok {
return fmt.Sprintf("uid=%d", hdr.Uid), nil
return KeyVal(fmt.Sprintf("uid=%d", hdr.Uid)), nil
stat := info.Sys().(*syscall.Stat_t)
return fmt.Sprintf("uid=%d", stat.Uid), nil
return KeyVal(fmt.Sprintf("uid=%d", stat.Uid)), nil
gidKeywordFunc = func(path string, info os.FileInfo, r io.Reader) (string, error) {
gidKeywordFunc = func(path string, info os.FileInfo, r io.Reader) (KeyVal, error) {
if hdr, ok := info.Sys().(*tar.Header); ok {
return fmt.Sprintf("gid=%d", hdr.Gid), nil
return KeyVal(fmt.Sprintf("gid=%d", hdr.Gid)), nil
if stat, ok := info.Sys().(*syscall.Stat_t); ok {
return fmt.Sprintf("gid=%d", stat.Gid), nil
return KeyVal(fmt.Sprintf("gid=%d", stat.Gid)), nil
return "", nil
return emptyKV, nil
nlinkKeywordFunc = func(path string, info os.FileInfo, r io.Reader) (string, error) {
nlinkKeywordFunc = func(path string, info os.FileInfo, r io.Reader) (KeyVal, error) {
if stat, ok := info.Sys().(*syscall.Stat_t); ok {
return fmt.Sprintf("nlink=%d", stat.Nlink), nil
return KeyVal(fmt.Sprintf("nlink=%d", stat.Nlink)), nil
return "", nil
return emptyKV, nil
xattrKeywordFunc = func(path string, info os.FileInfo, r io.Reader) (string, error) {
return "", nil
xattrKeywordFunc = func(path string, info os.FileInfo, r io.Reader) (KeyVal, error) {
return emptyKV, nil
@ -17,71 +17,71 @@ import (
var (
// this is bsd specific
flagsKeywordFunc = func(path string, info os.FileInfo, r io.Reader) (string, error) {
return "", nil
flagsKeywordFunc = func(path string, info os.FileInfo, r io.Reader) (KeyVal, error) {
return emptyKV, nil
unameKeywordFunc = func(path string, info os.FileInfo, r io.Reader) (string, error) {
unameKeywordFunc = func(path string, info os.FileInfo, r io.Reader) (KeyVal, error) {
if hdr, ok := info.Sys().(*tar.Header); ok {
return fmt.Sprintf("uname=%s", hdr.Uname), nil
return KeyVal(fmt.Sprintf("uname=%s", hdr.Uname)), nil
stat := info.Sys().(*syscall.Stat_t)
u, err := user.LookupId(fmt.Sprintf("%d", stat.Uid))
if err != nil {
return "", err
return emptyKV, err
return fmt.Sprintf("uname=%s", u.Username), nil
return KeyVal(fmt.Sprintf("uname=%s", u.Username)), nil
uidKeywordFunc = func(path string, info os.FileInfo, r io.Reader) (string, error) {
uidKeywordFunc = func(path string, info os.FileInfo, r io.Reader) (KeyVal, error) {
if hdr, ok := info.Sys().(*tar.Header); ok {
return fmt.Sprintf("uid=%d", hdr.Uid), nil
return KeyVal(fmt.Sprintf("uid=%d", hdr.Uid)), nil
stat := info.Sys().(*syscall.Stat_t)
return fmt.Sprintf("uid=%d", stat.Uid), nil
return KeyVal(fmt.Sprintf("uid=%d", stat.Uid)), nil
gidKeywordFunc = func(path string, info os.FileInfo, r io.Reader) (string, error) {
gidKeywordFunc = func(path string, info os.FileInfo, r io.Reader) (KeyVal, error) {
if hdr, ok := info.Sys().(*tar.Header); ok {
return fmt.Sprintf("gid=%d", hdr.Gid), nil
return KeyVal(fmt.Sprintf("gid=%d", hdr.Gid)), nil
if stat, ok := info.Sys().(*syscall.Stat_t); ok {
return fmt.Sprintf("gid=%d", stat.Gid), nil
return KeyVal(fmt.Sprintf("gid=%d", stat.Gid)), nil
return "", nil
return emptyKV, nil
nlinkKeywordFunc = func(path string, info os.FileInfo, r io.Reader) (string, error) {
nlinkKeywordFunc = func(path string, info os.FileInfo, r io.Reader) (KeyVal, error) {
if stat, ok := info.Sys().(*syscall.Stat_t); ok {
return fmt.Sprintf("nlink=%d", stat.Nlink), nil
return KeyVal(fmt.Sprintf("nlink=%d", stat.Nlink)), nil
return "", nil
return emptyKV, nil
xattrKeywordFunc = func(path string, info os.FileInfo, r io.Reader) (string, error) {
xattrKeywordFunc = func(path string, info os.FileInfo, r io.Reader) (KeyVal, error) {
if hdr, ok := info.Sys().(*tar.Header); ok {
if len(hdr.Xattrs) == 0 {
return "", nil
return emptyKV, nil
klist := []string{}
klist := []KeyVal{}
for k, v := range hdr.Xattrs {
klist = append(klist, fmt.Sprintf("xattr.%s=%s", k, base64.StdEncoding.EncodeToString([]byte(v))))
klist = append(klist, KeyVal(fmt.Sprintf("xattr.%s=%s", k, base64.StdEncoding.EncodeToString([]byte(v)))))
return strings.Join(klist, " "), nil
return KeyVal(strings.Join(KeyValToString(klist), " ")), nil
if !info.Mode().IsRegular() && !info.Mode().IsDir() {
return "", nil
return emptyKV, nil
xlist, err := xattr.List(path)
if err != nil {
return "", err
return emptyKV, err
klist := make([]string, len(xlist))
klist := make([]KeyVal, len(xlist))
for i := range xlist {
data, err := xattr.Get(path, xlist[i])
if err != nil {
return "", err
return emptyKV, err
klist[i] = fmt.Sprintf("xattr.%s=%s", xlist[i], base64.StdEncoding.EncodeToString(data))
klist[i] = KeyVal(fmt.Sprintf("xattr.%s=%s", xlist[i], base64.StdEncoding.EncodeToString(data)))
return strings.Join(klist, " "), nil
return KeyVal(strings.Join(KeyValToString(klist), " ")), nil
@ -54,7 +54,7 @@ func TestKeywordsTimeNano(t *testing.T) {
{857125628319, 0},
} {
mtime := time.Unix(test.sec, test.nsec)
expected := fmt.Sprintf("time=%d.%9.9d", test.sec, test.nsec)
expected := KeyVal(fmt.Sprintf("time=%d.%9.9d", test.sec, test.nsec))
got, err := timeKeywordFunc("", fakeFileInfo{
mtime: mtime,
}, nil)
@ -81,7 +81,7 @@ func TestKeywordsTimeTar(t *testing.T) {
{857125628319, 0},
} {
mtime := time.Unix(test.sec, test.nsec)
expected := fmt.Sprintf("tar_time=%d.%9.9d", test.sec, 0)
expected := KeyVal(fmt.Sprintf("tar_time=%d.%9.9d", test.sec, 0))
got, err := tartimeKeywordFunc("", fakeFileInfo{
mtime: mtime,
}, nil)
@ -93,3 +93,34 @@ func TestKeywordsTimeTar(t *testing.T) {
func TestKeywordSynonym(t *testing.T) {
checklist := []struct {
give string
expect Keyword
{give: "time", expect: "time"},
{give: "md5", expect: "md5digest"},
{give: "md5digest", expect: "md5digest"},
{give: "rmd160", expect: "ripemd160digest"},
{give: "rmd160digest", expect: "ripemd160digest"},
{give: "ripemd160digest", expect: "ripemd160digest"},
{give: "sha1", expect: "sha1digest"},
{give: "sha1digest", expect: "sha1digest"},
{give: "sha256", expect: "sha256digest"},
{give: "sha256digest", expect: "sha256digest"},
{give: "sha384", expect: "sha384digest"},
{give: "sha384digest", expect: "sha384digest"},
{give: "sha512", expect: "sha512digest"},
{give: "sha512digest", expect: "sha512digest"},
{give: "xattr", expect: "xattr"},
{give: "xattrs", expect: "xattr"},
for i, check := range checklist {
got := KeywordSynonym(check.give)
if got != check.expect {
t.Errorf("%d: expected %q; got %q", i, check.expect, got)
@ -48,7 +48,7 @@ func ParseSpec(r io.Reader) (*DirectoryHierarchy, error) {
// parse the options
f := strings.Fields(str)
e.Name = f[0]
e.Keywords = f[1:]
e.Keywords = StringToKeyVals(f[1:])
if e.Name == "/set" {
creator.curSet = &e
} else if e.Name == "/unset" {
@ -80,7 +80,7 @@ func ParseSpec(r io.Reader) (*DirectoryHierarchy, error) {
} else {
e.Type = RelativeType
e.Keywords = f[1:]
e.Keywords = StringToKeyVals(f[1:])
// TODO: gather keywords if using tar stream
e.Parent = creator.curDir
for i := range e.Keywords {
@ -17,11 +17,15 @@ type Streamer interface {
Hierarchy() (*DirectoryHierarchy, error)
var tarDefaultSetKeywords = []string{"type=file", "flags=none", "mode=0664"}
var tarDefaultSetKeywords = []KeyVal{
// NewTarStreamer streams a tar archive and creates a file hierarchy based off
// of the tar metadata headers
func NewTarStreamer(r io.Reader, keywords []string) Streamer {
func NewTarStreamer(r io.Reader, keywords []Keyword) Streamer {
pR, pW := io.Pipe()
ts := &tarStream{
pipeReader: pR,
@ -45,17 +49,17 @@ type tarStream struct {
pipeWriter *io.PipeWriter
teeReader io.Reader
tarReader *tar.Reader
keywords []string
keywords []Keyword
err error
func (ts *tarStream) readHeaders() {
// remove "time" keyword
notimekws := []string{}
notimekws := []Keyword{}
for _, kw := range ts.keywords {
if !inSlice(kw, notimekws) {
if !InKeywordSlice(kw, notimekws) {
if kw == "time" {
if !inSlice("tar_time", ts.keywords) {
if !InKeywordSlice("tar_time", ts.keywords) {
notimekws = append(notimekws, "tar_time")
} else {
@ -74,7 +78,7 @@ func (ts *tarStream) readHeaders() {
Type: CommentType,
Set: nil,
Keywords: []string{"type=dir"},
Keywords: []KeyVal{"type=dir"},
metadataEntries := signatureEntries("<user specified tar archive>")
for _, e := range metadataEntries {
@ -242,7 +246,7 @@ func populateTree(root, e *Entry, hdr *tar.Header) error {
Name: encoded,
Type: RelativeType,
Parent: parent,
Keywords: []string{"type=dir"}, // temp data
Keywords: []KeyVal{"type=dir"}, // temp data
Set: nil, // temp data
pathname, err := newEntry.Path()
@ -276,7 +280,7 @@ func populateTree(root, e *Entry, hdr *tar.Header) error {
// root: the "head" of the sub-tree to flatten
// creator: a dhCreator that helps with the '/set' keyword
// keywords: keywords specified by the user that should be evaluated
func flatten(root *Entry, creator *dhCreator, keywords []string) {
func flatten(root *Entry, creator *dhCreator, keywords []Keyword) {
if root == nil || creator == nil {
@ -292,18 +296,19 @@ func flatten(root *Entry, creator *dhCreator, keywords []string) {
if root.Set != nil {
// Check if we need a new set
consolidatedKeys := keyvalSelector(append(tarDefaultSetKeywords, root.Set.Keywords...), keywords)
if creator.curSet == nil {
creator.curSet = &Entry{
Type: SpecialType,
Name: "/set",
Keywords: keywordSelector(append(tarDefaultSetKeywords, root.Set.Keywords...), keywords),
Keywords: consolidatedKeys,
Pos: len(creator.DH.Entries),
creator.DH.Entries = append(creator.DH.Entries, *creator.curSet)
} else {
needNewSet := false
for _, k := range root.Set.Keywords {
if !inSlice(k, creator.curSet.Keywords) {
if !inKeyValSlice(k, creator.curSet.Keywords) {
needNewSet = true
@ -313,7 +318,7 @@ func flatten(root *Entry, creator *dhCreator, keywords []string) {
Name: "/set",
Type: SpecialType,
Pos: len(creator.DH.Entries),
Keywords: keywordSelector(append(tarDefaultSetKeywords, root.Set.Keywords...), keywords),
Keywords: consolidatedKeys,
creator.DH.Entries = append(creator.DH.Entries, *creator.curSet)
@ -331,7 +336,7 @@ func flatten(root *Entry, creator *dhCreator, keywords []string) {
root.Set = creator.curSet
if creator.curSet != nil {
root.Keywords = setDifference(root.Keywords, creator.curSet.Keywords)
root.Keywords = keyValDifference(root.Keywords, creator.curSet.Keywords)
root.Pos = len(creator.DH.Entries)
creator.DH.Entries = append(creator.DH.Entries, *root)
@ -376,11 +381,11 @@ func resolveHardlinks(root *Entry, hardlinks map[string][]string, countlinks boo
linkfile.Keywords = basefile.Keywords
if countlinks {
linkfile.Keywords = append(linkfile.Keywords, fmt.Sprintf("nlink=%d", len(links)+1))
linkfile.Keywords = append(linkfile.Keywords, KeyVal(fmt.Sprintf("nlink=%d", len(links)+1)))
if countlinks {
basefile.Keywords = append(basefile.Keywords, fmt.Sprintf("nlink=%d", len(links)+1))
basefile.Keywords = append(basefile.Keywords, KeyVal(fmt.Sprintf("nlink=%d", len(links)+1)))
@ -410,19 +415,6 @@ func filter(root *Entry, p func(*Entry) bool) []Entry {
return nil
func setDifference(this, that []string) []string {
if len(this) == 0 {
return that
diff := []string{}
for _, kv := range this {
if !inSlice(kv, that) {
diff = append(diff, kv)
return diff
func (ts *tarStream) setErr(err error) {
ts.err = err
@ -444,7 +436,7 @@ func (ts *tarStream) Hierarchy() (*DirectoryHierarchy, error) {
if ts.root == nil {
return nil, fmt.Errorf("root Entry not found, nothing to flatten")
resolveHardlinks(ts.root, ts.hardlinks, inSlice("nlink", ts.keywords))
resolveHardlinks(ts.root, ts.hardlinks, InKeywordSlice(Keyword("nlink"), ts.keywords))
flatten(ts.root, &ts.creator, ts.keywords)
return ts.creator.DH, nil
@ -128,7 +128,7 @@ func TestArchiveCreation(t *testing.T) {
if err != nil {
str := NewTarStreamer(fh, []string{"sha1"})
str := NewTarStreamer(fh, []Keyword{"sha1"})
if _, err := io.Copy(ioutil.Discard, str); err != nil && err != io.EOF {
@ -145,7 +145,7 @@ func TestArchiveCreation(t *testing.T) {
// Test the tar manifest against the actual directory
res, err := Check("./testdata/collection", tdh, []string{"sha1"})
res, err := Check("./testdata/collection", tdh, []Keyword{"sha1"})
if err != nil {
@ -158,7 +158,7 @@ func TestArchiveCreation(t *testing.T) {
// Test the tar manifest against itself
res, err = TarCheck(tdh, tdh, []string{"sha1"})
res, err = TarCheck(tdh, tdh, []Keyword{"sha1"})
if err != nil {
@ -170,11 +170,11 @@ func TestArchiveCreation(t *testing.T) {
// Validate the directory manifest against the archive
dh, err := Walk("./testdata/collection", nil, []string{"sha1"})
dh, err := Walk("./testdata/collection", nil, []Keyword{"sha1"})
if err != nil {
res, err = TarCheck(tdh, dh, []string{"sha1"})
res, err = TarCheck(tdh, dh, []Keyword{"sha1"})
if err != nil {
@ -212,7 +212,7 @@ func TestTreeTraversal(t *testing.T) {
res, err := TarCheck(tdh, tdh, []string{"sha1"})
res, err := TarCheck(tdh, tdh, []Keyword{"sha1"})
if err != nil {
@ -224,7 +224,7 @@ func TestTreeTraversal(t *testing.T) {
// top-level "." directory will contain contents of traversal.tar
res, err = Check("./testdata/.", tdh, []string{"sha1"})
res, err = Check("./testdata/.", tdh, []Keyword{"sha1"})
if err != nil {
@ -262,7 +262,7 @@ func TestTreeTraversal(t *testing.T) {
// Implied top-level "." directory will contain the contents of singlefile.tar
res, err = Check("./testdata/.", tdh, []string{"sha1"})
res, err = Check("./testdata/.", tdh, []Keyword{"sha1"})
if err != nil {
Normal file
Normal file
@ -0,0 +1,24 @@
set -e
#set -x
name=$(basename $0)
gomtree=$(readlink -f ${root}/gomtree)
t=$(mktemp -d /tmp/go-mtree.XXXXXX)
echo "[${name}] Running in ${t}"
pushd ${root}
git archive --format=tar HEAD^{tree} . > ${t}/${name}.tar
mkdir -p ${t}/extract
tar -C ${t}/extract/ -xf ${t}/${name}.tar
## This is a checking that keyword synonyms are respected
${gomtree} -k sha1digest -c -p ${t}/extract/ > ${t}/${name}.mtree
${gomtree} -k sha1 -f ${t}/${name}.mtree -p ${t}/extract/
${gomtree} -k sha1 -c -p ${t}/extract/ > ${t}/${name}.mtree
${gomtree} -k sha1digest -f ${t}/${name}.mtree -p ${t}/extract/
rm -rf ${t}
Normal file
Normal file
@ -0,0 +1,19 @@
set -e
name=$(basename $0)
gomtree=$(readlink -f ${root}/gomtree)
t=$(mktemp -d /tmp/go-mtree.XXXXXX)
echo "[${name}] Running in ${t}"
pushd ${root}
mkdir -p ${t}/extract
git archive --format=tar HEAD^{tree} . | tar -C ${t}/extract/ -x
${gomtree} -k sha1digest -c -p ${t}/extract/ > ${t}/${name}.mtree
${gomtree} -f ${t}/${name}.mtree -k md5digest -p ${t}/extract/
rm -rf ${t}
@ -2,6 +2,11 @@ package mtree
import "fmt"
const (
// AppName is the name ... of this library/application
AppName = "gomtree"
const (
// VersionMajor is for an API incompatible changes
VersionMajor = 0
@ -15,12 +15,12 @@ import (
// returns true, then the path is not included in the spec.
type ExcludeFunc func(path string, info os.FileInfo) bool
var defaultSetKeywords = []string{"type=file", "nlink=1", "flags=none", "mode=0664"}
var defaultSetKeywords = []KeyVal{"type=file", "nlink=1", "flags=none", "mode=0664"}
// Walk from root directory and assemble the DirectoryHierarchy. excludes
// provided are used to skip paths. keywords are the set to collect from the
// walked paths. The recommended default list is DefaultKeywords.
func Walk(root string, exlcudes []ExcludeFunc, keywords []string) (*DirectoryHierarchy, error) {
func Walk(root string, excludes []ExcludeFunc, keywords []Keyword) (*DirectoryHierarchy, error) {
creator := dhCreator{DH: &DirectoryHierarchy{}}
// insert signature and metadata comments first (user, machine, tree, date)
metadataEntries := signatureEntries(root)
@ -32,7 +32,7 @@ func Walk(root string, exlcudes []ExcludeFunc, keywords []string) (*DirectoryHie
if err != nil {
return err
for _, ex := range exlcudes {
for _, ex := range excludes {
if ex(path, info) {
return nil
@ -71,7 +71,7 @@ func Walk(root string, exlcudes []ExcludeFunc, keywords []string) (*DirectoryHie
Name: "/set",
Type: SpecialType,
Pos: len(creator.DH.Entries),
Keywords: keywordSelector(defaultSetKeywords, keywords),
Keywords: keyvalSelector(defaultSetKeywords, keywords),
for _, keyword := range SetKeywords {
err := func() error {
@ -103,7 +103,7 @@ func Walk(root string, exlcudes []ExcludeFunc, keywords []string) (*DirectoryHie
creator.DH.Entries = append(creator.DH.Entries, e)
} else if creator.curSet != nil {
// check the attributes of the /set keywords and re-set if changed
klist := []string{}
klist := []KeyVal{}
for _, keyword := range SetKeywords {
err := func() error {
var r io.Reader
@ -135,7 +135,7 @@ func Walk(root string, exlcudes []ExcludeFunc, keywords []string) (*DirectoryHie
needNewSet := false
for _, k := range klist {
if !inSlice(k, creator.curSet.Keywords) {
if !inKeyValSlice(k, creator.curSet.Keywords) {
needNewSet = true
@ -144,7 +144,7 @@ func Walk(root string, exlcudes []ExcludeFunc, keywords []string) (*DirectoryHie
Name: "/set",
Type: SpecialType,
Pos: len(creator.DH.Entries),
Keywords: keywordSelector(append(defaultSetKeywords, klist...), keywords),
Keywords: keyvalSelector(append(defaultSetKeywords, klist...), keywords),
creator.curSet = &e
creator.DH.Entries = append(creator.DH.Entries, e)
@ -181,7 +181,7 @@ func Walk(root string, exlcudes []ExcludeFunc, keywords []string) (*DirectoryHie
if err != nil {
return err
if str != "" && !inSlice(str, creator.curSet.Keywords) {
if str != "" && !inKeyValSlice(str, creator.curSet.Keywords) {
e.Keywords = append(e.Keywords, str)
return nil
@ -209,15 +209,6 @@ func Walk(root string, exlcudes []ExcludeFunc, keywords []string) (*DirectoryHie
return creator.DH, err
func inSlice(a string, list []string) bool {
for _, b := range list {
if b == a {
return true
return false
// startWalk walks the file tree rooted at root, calling walkFn for each file or
// directory in the tree, including root. All errors that arise visiting files
// and directories are filtered by walkFn. The files are walked in lexical
Add table
Reference in a new issue