diff --git a/check.go b/check.go index 87daf54..a565251 100644 --- a/check.go +++ b/check.go @@ -3,14 +3,16 @@ package mtree import ( "fmt" "os" - "path/filepath" "sort" + "strings" ) // Result of a Check type Result struct { // list of any failures in the Check Failures []Failure `json:"failures"` + Missing []Entry + Extra []Entry } // Failure of a particular keyword for a path @@ -51,7 +53,8 @@ func Check(root string, dh *DirectoryHierarchy, keywords []string) (*Result, err creator.curSet = nil } case RelativeType, FullType: - info, err := os.Lstat(e.Path()) + filename := e.Path() + info, err := os.Lstat(filename) if err != nil { return nil, err } @@ -64,17 +67,31 @@ func Check(root string, dh *DirectoryHierarchy, keywords []string) (*Result, err } for _, kv := range kvs { - keywordFunc, ok := KeywordFuncs[kv.Keyword()] + kw := kv.Keyword() + // 'tar_time' keyword evaluation wins against 'time' keyword evaluation + if kv.Keyword() == "time" && inSlice("tar_time", keywords) { + kw = "tar_time" + tartime := fmt.Sprintf("%s.%s", (strings.Split(kv.Value(), ".")[0]), "000000000") + kv = KeyVal(KeyVal(kw).ChangeValue(tartime)) + } + + keywordFunc, ok := KeywordFuncs[kw] if !ok { return nil, fmt.Errorf("Unknown keyword %q for file %q", kv.Keyword(), e.Path()) } if keywords != nil && !inSlice(kv.Keyword(), keywords) { continue } - curKeyVal, err := keywordFunc(filepath.Join(root, e.Path()), info) + fh, err := os.Open(filename) if err != nil { return nil, err } + curKeyVal, err := keywordFunc(filename, info, fh) + if err != nil { + fh.Close() + return nil, err + } + fh.Close() if string(kv) != curKeyVal { failure := Failure{Path: e.Path(), Keyword: kv.Keyword(), Expected: kv.Value(), Got: KeyVal(curKeyVal).Value()} result.Failures = append(result.Failures, failure) @@ -84,3 +101,99 @@ func Check(root string, dh *DirectoryHierarchy, keywords []string) (*Result, err } return &result, nil } + +// TarCheck is the tar equivalent of checking a file hierarchy spec against a tar stream to +// determine if files have been changed. +func TarCheck(tarDH, dh *DirectoryHierarchy, keywords []string) (*Result, error) { + var result Result + var err error + var tarRoot *Entry + + for _, e := range tarDH.Entries { + if e.Name == "." { + tarRoot = &e + break + } + } + tarRoot.Next = &Entry{ + Name: "seen", + Type: CommentType, + } + curDir := tarRoot + creator := dhCreator{DH: dh} + sort.Sort(byPos(creator.DH.Entries)) + + var outOfTree bool + for i, e := range creator.DH.Entries { + switch e.Type { + case SpecialType: + if e.Name == "/set" { + creator.curSet = &creator.DH.Entries[i] + } else if e.Name == "/unset" { + creator.curSet = nil + } + case RelativeType, FullType: + if outOfTree { + return &result, fmt.Errorf("No parent node from %s", e.Path()) + } + // TODO: handle the case where "." is not the first Entry to be found + tarEntry := curDir.Descend(e.Name) + if tarEntry == nil { + result.Missing = append(result.Missing, e) + continue + } + + tarEntry.Next = &Entry{ + Type: CommentType, + Name: "seen", + } + + // expected values from file hierarchy spec + var kvs KeyVals + if creator.curSet != nil { + kvs = MergeSet(creator.curSet.Keywords, e.Keywords) + } else { + kvs = NewKeyVals(e.Keywords) + } + + // actual + var tarkvs KeyVals + if tarEntry.Set != nil { + tarkvs = MergeSet(tarEntry.Set.Keywords, tarEntry.Keywords) + } else { + tarkvs = NewKeyVals(tarEntry.Keywords) + } + + for _, kv := range kvs { + if _, ok := KeywordFuncs[kv.Keyword()]; !ok { + return nil, fmt.Errorf("Unknown keyword %q for file %q", kv.Keyword(), e.Path()) + } + if keywords != nil && !inSlice(kv.Keyword(), keywords) { + continue + } + if tarkv := tarkvs.Has(kv.Keyword()); tarkv != emptyKV { + if string(tarkv) != string(kv) { + failure := Failure{Path: tarEntry.Path(), Keyword: kv.Keyword(), Expected: kv.Value(), Got: tarkv.Value()} + result.Failures = append(result.Failures, failure) + } + } + } + // Step into a directory + if tarEntry.Prev != nil { + curDir = tarEntry + } + case DotDotType: + if outOfTree { + return &result, fmt.Errorf("No parent node.") + } + curDir = curDir.Ascend() + if curDir == nil { + outOfTree = true + } + } + } + result.Extra = filter(tarRoot, func(e *Entry) bool { + return e.Next == nil + }) + return &result, err +} diff --git a/check_test.go b/check_test.go index d1ad6de..fa0368a 100644 --- a/check_test.go +++ b/check_test.go @@ -143,6 +143,58 @@ func TestTimeComparison(t *testing.T) { } } +func TestTarTime(t *testing.T) { + dir, err := ioutil.TempDir("", "test-tar-time.") + if err != nil { + t.Fatal(err) + } + defer os.RemoveAll(dir) + + // This is the format of time from FreeBSD + spec := ` +/set type=file time=5.454353132 +. type=dir time=5.123456789 + file time=5.911134111 +.. +` + + fh, err := os.Create(filepath.Join(dir, "file")) + if err != nil { + t.Fatal(err) + } + // This is what mode we're checking for. Round integer of epoch seconds + epoch := time.Unix(5, 0) + if err := os.Chtimes(fh.Name(), epoch, epoch); err != nil { + t.Fatal(err) + } + if err := os.Chtimes(dir, epoch, epoch); err != nil { + t.Fatal(err) + } + if err := fh.Close(); err != nil { + t.Error(err) + } + + dh, err := ParseSpec(bytes.NewBufferString(spec)) + if err != nil { + t.Fatal(err) + } + + // make sure "time" keyword works + _, err = Check(dir, dh, DefaultKeywords) + if err != nil { + t.Error(err) + } + + // make sure tar_time wins + res, err := Check(dir, dh, append(DefaultKeywords, "tar_time")) + if err != nil { + t.Error(err) + } + if len(res.Failures) > 0 { + t.Fatal(res.Failures) + } +} + func TestIgnoreComments(t *testing.T) { dir, err := ioutil.TempDir("", "test-comments.") if err != nil { diff --git a/cmd/gomtree/main.go b/cmd/gomtree/main.go index 3352ba6..c7f90d8 100644 --- a/cmd/gomtree/main.go +++ b/cmd/gomtree/main.go @@ -5,6 +5,8 @@ import ( "encoding/json" "flag" "fmt" + "io" + "io/ioutil" "log" "os" "strings" @@ -20,6 +22,7 @@ var ( flUseKeywords = flag.String("k", "", "Use the specified (delimited by comma or space) keywords as the current set of keywords") flListKeywords = flag.Bool("list-keywords", false, "List the keywords available") flResultFormat = flag.String("result-format", "bsd", "output the validation results using the given format (bsd, json, path)") + flTar = flag.String("T", "", "use tar archive to create or validate a directory hierarchy spec") ) var formats = map[string]func(*mtree.Result) string{ @@ -91,7 +94,11 @@ func main() { currentKeywords = append([]string{"type"}, currentKeywords...) } } else { - currentKeywords = mtree.DefaultKeywords[:] + if *flTar != "" { + currentKeywords = mtree.DefaultTarKeywords[:] + } else { + currentKeywords = mtree.DefaultKeywords[:] + } } // -K if *flAddKeywords != "" { @@ -123,19 +130,60 @@ func main() { rootPath = *flPath } - // -c - if *flCreate { - // create a directory hierarchy - dh, err := mtree.Walk(rootPath, nil, currentKeywords) + // -T + var tdh *mtree.DirectoryHierarchy + if *flTar != "" { + fh, err := os.Open(*flTar) if err != nil { log.Println(err) isErr = true return } - dh.WriteTo(os.Stdout) - } else if dh != nil { + ts := mtree.NewTarStreamer(fh, currentKeywords) + + if _, err := io.Copy(ioutil.Discard, ts); err != nil && err != io.EOF { + log.Println(err) + isErr = true + return + } + if err := ts.Close(); err != nil { + log.Println(err) + isErr = true + return + } + defer fh.Close() + tdh, err = ts.Hierarchy() + if err != nil { + log.Println(err) + isErr = true + return + } + } + // -c + if *flCreate { + // create a directory hierarchy + // with a tar stream + if tdh != nil { + tdh.WriteTo(os.Stdout) + } else { + // with a root directory + dh, err := mtree.Walk(rootPath, nil, currentKeywords) + if err != nil { + log.Println(err) + isErr = true + return + } + dh.WriteTo(os.Stdout) + } + } else if tdh != nil || dh != nil { + var res *mtree.Result + var err error // else this is a validation - res, err := mtree.Check(rootPath, dh, currentKeywords) + if *flTar != "" { + res, err = mtree.TarCheck(tdh, dh, currentKeywords) + } else { + res, err = mtree.Check(rootPath, dh, currentKeywords) + } if err != nil { log.Println(err) isErr = true @@ -150,11 +198,25 @@ func main() { return } } - } else { - log.Println("neither validating or creating a manifest. Please provide additional arguments") - isErr = true - defer os.Exit(1) - return + if res != nil { + if len(res.Extra) > 0 { + defer os.Exit(1) + for _, extra := range res.Extra { + fmt.Printf("%s extra\n", extra.Path()) + } + } + if len(res.Missing) > 0 { + defer os.Exit(1) + for _, missing := range res.Missing { + fmt.Printf("%s missing\n", missing.Path()) + } + } + } else { + log.Println("neither validating or creating a manifest. Please provide additional arguments") + isErr = true + defer os.Exit(1) + return + } } } diff --git a/creator.go b/creator.go new file mode 100644 index 0000000..2a23a22 --- /dev/null +++ b/creator.go @@ -0,0 +1,9 @@ +package mtree + +// dhCreator is used in when building a DirectoryHierarchy +type dhCreator struct { + DH *DirectoryHierarchy + curSet *Entry + curDir *Entry + curEnt *Entry +} diff --git a/entry.go b/entry.go new file mode 100644 index 0000000..8fe5027 --- /dev/null +++ b/entry.go @@ -0,0 +1,102 @@ +package mtree + +import ( + "fmt" + "path/filepath" + "strings" +) + +type byPos []Entry + +func (bp byPos) Len() int { return len(bp) } +func (bp byPos) Less(i, j int) bool { return bp[i].Pos < bp[j].Pos } +func (bp byPos) Swap(i, j int) { bp[i], bp[j] = bp[j], bp[i] } + +// Entry is each component of content in the mtree spec file +type Entry struct { + Parent *Entry // up + Children []*Entry // down + Prev, Next *Entry // left, right + Set *Entry // current `/set` for additional keywords + Pos int // order in the spec + Raw string // file or directory name + Name string // file or directory name + Keywords []string // TODO(vbatts) maybe a keyword typed set of values? + Type EntryType +} + +// Descend searches thru an Entry's children to find the Entry associated with +// `filename`. Directories are stored at the end of an Entry's children so do a +// traverse backwards. If you descend to a "." +func (e Entry) Descend(filename string) *Entry { + if filename == "." || filename == "" { + return &e + } + numChildren := len(e.Children) + for i := range e.Children { + c := e.Children[numChildren-1-i] + if c.Name == filename { + return c + } + } + return nil +} + +// Ascend gets the parent of an Entry. Serves mainly to maintain readability +// when traversing up and down an Entry tree +func (e Entry) Ascend() *Entry { + return e.Parent +} + +// Path provides the full path of the file, despite RelativeType or FullType +func (e Entry) Path() string { + if e.Parent == nil || e.Type == FullType { + return filepath.Clean(e.Name) + } + return filepath.Clean(filepath.Join(e.Parent.Path(), e.Name)) +} + +func (e Entry) String() string { + if e.Raw != "" { + return e.Raw + } + if e.Type == BlankType { + return "" + } + if e.Type == DotDotType { + return e.Name + } + if e.Type == SpecialType || e.Type == FullType || inSlice("type=dir", e.Keywords) { + return fmt.Sprintf("%s %s", e.Name, strings.Join(e.Keywords, " ")) + } + return fmt.Sprintf(" %s %s", e.Name, strings.Join(e.Keywords, " ")) +} + +// EntryType are the formats of lines in an mtree spec file +type EntryType int + +// The types of lines to be found in an mtree spec file +const ( + SignatureType EntryType = iota // first line of the file, like `#mtree v2.0` + BlankType // blank lines are ignored + CommentType // Lines beginning with `#` are ignored + SpecialType // line that has `/` prefix issue a "special" command (currently only /set and /unset) + RelativeType // if the first white-space delimited word does not have a '/' in it. Options/keywords are applied. + DotDotType // .. - A relative path step. keywords/options are ignored + FullType // if the first word on the line has a `/` after the first character, it interpretted as a file pathname with options +) + +// String returns the name of the EntryType +func (et EntryType) String() string { + return typeNames[et] +} + +var typeNames = map[EntryType]string{ + SignatureType: "SignatureType", + BlankType: "BlankType", + CommentType: "CommentType", + SpecialType: "SpecialType", + RelativeType: "RelativeType", + DotDotType: "DotDotType", + FullType: "FullType", +} diff --git a/hierarchy.go b/hierarchy.go index b591773..9f66056 100644 --- a/hierarchy.go +++ b/hierarchy.go @@ -1,11 +1,8 @@ package mtree import ( - "fmt" "io" - "path/filepath" "sort" - "strings" ) // DirectoryHierarchy is the mapped structure for an mtree directory hierarchy @@ -27,75 +24,3 @@ func (dh DirectoryHierarchy) WriteTo(w io.Writer) (n int64, err error) { } return sum, nil } - -type byPos []Entry - -func (bp byPos) Len() int { return len(bp) } -func (bp byPos) Less(i, j int) bool { return bp[i].Pos < bp[j].Pos } -func (bp byPos) Swap(i, j int) { bp[i], bp[j] = bp[j], bp[i] } - -// Entry is each component of content in the mtree spec file -type Entry struct { - Parent, Child *Entry // up, down - Prev, Next *Entry // left, right - Set *Entry // current `/set` for additional keywords - Pos int // order in the spec - Raw string // file or directory name - Name string // file or directory name - Keywords []string // TODO(vbatts) maybe a keyword typed set of values? - Type EntryType -} - -// Path provides the full path of the file, despite RelativeType or FullType -func (e Entry) Path() string { - if e.Parent == nil || e.Type == FullType { - return filepath.Clean(e.Name) - } - return filepath.Clean(filepath.Join(e.Parent.Path(), e.Name)) -} - -func (e Entry) String() string { - if e.Raw != "" { - return e.Raw - } - if e.Type == BlankType { - return "" - } - if e.Type == DotDotType { - return e.Name - } - // TODO(vbatts) if type is RelativeType and a keyword of not type=dir - if e.Type == SpecialType || e.Type == FullType || inSlice("type=dir", e.Keywords) { - return fmt.Sprintf("%s %s", e.Name, strings.Join(e.Keywords, " ")) - } - return fmt.Sprintf(" %s %s", e.Name, strings.Join(e.Keywords, " ")) -} - -// EntryType are the formats of lines in an mtree spec file -type EntryType int - -// The types of lines to be found in an mtree spec file -const ( - SignatureType EntryType = iota // first line of the file, like `#mtree v2.0` - BlankType // blank lines are ignored - CommentType // Lines beginning with `#` are ignored - SpecialType // line that has `/` prefix issue a "special" command (currently only /set and /unset) - RelativeType // if the first white-space delimited word does not have a '/' in it. Options/keywords are applied. - DotDotType // .. - A relative path step. keywords/options are ignored - FullType // if the first word on the line has a `/` after the first character, it interpretted as a file pathname with options -) - -// String returns the name of the EntryType -func (et EntryType) String() string { - return typeNames[et] -} - -var typeNames = map[EntryType]string{ - SignatureType: "SignatureType", - BlankType: "BlankType", - CommentType: "CommentType", - SpecialType: "SpecialType", - RelativeType: "RelativeType", - DotDotType: "DotDotType", - FullType: "FullType", -} diff --git a/keywords.go b/keywords.go index c6607c9..4119fc0 100644 --- a/keywords.go +++ b/keywords.go @@ -1,6 +1,7 @@ package mtree import ( + "archive/tar" "crypto/md5" "crypto/sha1" "crypto/sha256" @@ -17,7 +18,10 @@ import ( // KeywordFunc is the type of a function called on each file to be included in // a DirectoryHierarchy, that will produce the string output of the keyword to // be included for the file entry. Otherwise, empty string. -type KeywordFunc func(path string, info os.FileInfo) (string, error) +// io.Reader `r` is to the file stream for the file payload. While this +// function takes an io.Reader, the caller needs to reset it to the beginning +// for each new KeywordFunc +type KeywordFunc func(path string, info os.FileInfo, r io.Reader) (string, error) // KeyVal is a "keyword=value" type KeyVal string @@ -55,6 +59,11 @@ func (kv KeyVal) Value() string { return strings.SplitN(strings.TrimSpace(string(kv)), "=", 2)[1] } +// ChangeValue changes the value of a KeyVal +func (kv KeyVal) ChangeValue(newval string) string { + return fmt.Sprintf("%s=%s", kv.Keyword(), newval) +} + // keywordSelector takes an array of "keyword=value" and filters out that only the set of words func keywordSelector(keyval, words []string) []string { retList := []string{} @@ -125,6 +134,17 @@ var ( "nlink", "time", } + // DefaultTarKeywords has keywords that should be used when creating a manifest from + // an archive. Currently, evaluating the # of hardlinks has not been implemented yet + DefaultTarKeywords = []string{ + "size", + "type", + "uid", + "gid", + "mode", + "link", + "tar_time", + } // SetKeywords is the default set of keywords calculated for a `/set` SpecialType SetKeywords = []string{ "uid", @@ -156,6 +176,11 @@ var ( "sha512": hasherKeywordFunc("sha512", sha512.New), // The SHA512 message digest of the file "sha512digest": hasherKeywordFunc("sha512digest", sha512.New), // A synonym for `sha512` + // This is not an upstreamed keyword, but used to vary from "time", as tar + // archives do not store nanosecond precision. So comparing on "time" will + // be only seconds level accurate. + "tar_time": tartimeKeywordFunc, // The last modification time of the file, from a tar archive mtime + // This is not an upstreamed keyword, but a needed attribute for file validation. // The pattern for this keyword key is prefixed by "xattr." followed by the extended attribute "namespace.key". // The keyword value is the SHA1 digest of the extended attribute's value. @@ -165,7 +190,7 @@ var ( ) var ( - modeKeywordFunc = func(path string, info os.FileInfo) (string, error) { + modeKeywordFunc = func(path string, info os.FileInfo, r io.Reader) (string, error) { permissions := info.Mode().Perm() if os.ModeSetuid&info.Mode() > 0 { permissions |= (1 << 11) @@ -178,52 +203,49 @@ var ( } return fmt.Sprintf("mode=%#o", permissions), nil } - sizeKeywordFunc = func(path string, info os.FileInfo) (string, error) { + sizeKeywordFunc = func(path string, info os.FileInfo, r io.Reader) (string, error) { return fmt.Sprintf("size=%d", info.Size()), nil } - cksumKeywordFunc = func(path string, info os.FileInfo) (string, error) { + cksumKeywordFunc = func(path string, info os.FileInfo, r io.Reader) (string, error) { if !info.Mode().IsRegular() { return "", nil } - - fh, err := os.Open(path) - if err != nil { - return "", err - } - defer fh.Close() - sum, _, err := cksum(fh) + sum, _, err := cksum(r) if err != nil { return "", err } return fmt.Sprintf("cksum=%d", sum), nil } hasherKeywordFunc = func(name string, newHash func() hash.Hash) KeywordFunc { - return func(path string, info os.FileInfo) (string, error) { + return func(path string, info os.FileInfo, r io.Reader) (string, error) { if !info.Mode().IsRegular() { return "", nil } - - fh, err := os.Open(path) - if err != nil { - return "", err - } - defer fh.Close() - h := newHash() - if _, err := io.Copy(h, fh); err != nil { + if _, err := io.Copy(h, r); err != nil { return "", err } return fmt.Sprintf("%s=%x", name, h.Sum(nil)), nil } } - timeKeywordFunc = func(path string, info os.FileInfo) (string, error) { + tartimeKeywordFunc = func(path string, info os.FileInfo, r io.Reader) (string, error) { + return fmt.Sprintf("tar_time=%d.000000000", info.ModTime().Unix()), nil + } + timeKeywordFunc = func(path string, info os.FileInfo, r io.Reader) (string, error) { t := info.ModTime().UnixNano() if t == 0 { return "time=0.000000000", nil } return fmt.Sprintf("time=%d.%9.9d", (t / 1e9), (t % (t / 1e9))), nil } - linkKeywordFunc = func(path string, info os.FileInfo) (string, error) { + linkKeywordFunc = func(path string, info os.FileInfo, r io.Reader) (string, error) { + if sys, ok := info.Sys().(*tar.Header); ok { + if sys.Linkname != "" { + return fmt.Sprintf("link=%s", sys.Linkname), nil + } + return "", nil + } + if info.Mode()&os.ModeSymlink != 0 { str, err := os.Readlink(path) if err != nil { @@ -233,7 +255,7 @@ var ( } return "", nil } - typeKeywordFunc = func(path string, info os.FileInfo) (string, error) { + typeKeywordFunc = func(path string, info os.FileInfo, r io.Reader) (string, error) { if info.Mode().IsDir() { return "type=dir", nil } diff --git a/keywords_linux.go b/keywords_linux.go index 5683dfd..26bfc40 100644 --- a/keywords_linux.go +++ b/keywords_linux.go @@ -3,8 +3,10 @@ package mtree import ( + "archive/tar" "crypto/sha1" "fmt" + "io" "os" "os/user" "strings" @@ -14,7 +16,11 @@ import ( ) var ( - unameKeywordFunc = func(path string, info os.FileInfo) (string, error) { + unameKeywordFunc = func(path string, info os.FileInfo, r io.Reader) (string, error) { + if hdr, ok := info.Sys().(*tar.Header); ok { + return fmt.Sprintf("uname=%s", hdr.Uname), nil + } + stat := info.Sys().(*syscall.Stat_t) u, err := user.LookupId(fmt.Sprintf("%d", stat.Uid)) if err != nil { @@ -22,19 +28,40 @@ var ( } return fmt.Sprintf("uname=%s", u.Username), nil } - uidKeywordFunc = func(path string, info os.FileInfo) (string, error) { + uidKeywordFunc = func(path string, info os.FileInfo, r io.Reader) (string, error) { + if hdr, ok := info.Sys().(*tar.Header); ok { + return fmt.Sprintf("uid=%d", hdr.Uid), nil + } stat := info.Sys().(*syscall.Stat_t) return fmt.Sprintf("uid=%d", stat.Uid), nil } - gidKeywordFunc = func(path string, info os.FileInfo) (string, error) { - stat := info.Sys().(*syscall.Stat_t) - return fmt.Sprintf("gid=%d", stat.Gid), nil + gidKeywordFunc = func(path string, info os.FileInfo, r io.Reader) (string, error) { + if hdr, ok := info.Sys().(*tar.Header); ok { + return fmt.Sprintf("gid=%d", hdr.Gid), nil + } + if stat, ok := info.Sys().(*syscall.Stat_t); ok { + return fmt.Sprintf("gid=%d", stat.Gid), nil + } + return "", nil } - nlinkKeywordFunc = func(path string, info os.FileInfo) (string, error) { - stat := info.Sys().(*syscall.Stat_t) - return fmt.Sprintf("nlink=%d", stat.Nlink), nil + nlinkKeywordFunc = func(path string, info os.FileInfo, r io.Reader) (string, error) { + if stat, ok := info.Sys().(*syscall.Stat_t); ok { + return fmt.Sprintf("nlink=%d", stat.Nlink), nil + } + return "", nil } - xattrKeywordFunc = func(path string, info os.FileInfo) (string, error) { + xattrKeywordFunc = func(path string, info os.FileInfo, r io.Reader) (string, error) { + if hdr, ok := info.Sys().(*tar.Header); ok { + if len(hdr.Xattrs) == 0 { + return "", nil + } + klist := []string{} + for k, v := range hdr.Xattrs { + klist = append(klist, fmt.Sprintf("xattr.%s=%x", k, sha1.Sum([]byte(v)))) + } + return strings.Join(klist, " "), nil + } + xlist, err := xattr.List(path) if err != nil { return "", err diff --git a/keywords_unsupported.go b/keywords_unsupported.go index ba61845..3234532 100644 --- a/keywords_unsupported.go +++ b/keywords_unsupported.go @@ -2,22 +2,36 @@ package mtree -import "os" +import ( + "archive/tar" + "fmt" + "io" + "os" +) var ( - unameKeywordFunc = func(path string, info os.FileInfo) (string, error) { + unameKeywordFunc = func(path string, info os.FileInfo, r io.Reader) (string, error) { + if hdr, ok := info.Sys().(*tar.Header); ok { + return fmt.Sprintf("uname=%s", hdr.Uname), nil + } return "", nil } - uidKeywordFunc = func(path string, info os.FileInfo) (string, error) { + uidKeywordFunc = func(path string, info os.FileInfo, r io.Reader) (string, error) { + if hdr, ok := info.Sys().(*tar.Header); ok { + return fmt.Sprintf("uid=%d", hdr.Uid), nil + } return "", nil } - gidKeywordFunc = func(path string, info os.FileInfo) (string, error) { + gidKeywordFunc = func(path string, info os.FileInfo, r io.Reader) (string, error) { + if hdr, ok := info.Sys().(*tar.Header); ok { + return fmt.Sprintf("gid=%d", hdr.Gid), nil + } return "", nil } - nlinkKeywordFunc = func(path string, info os.FileInfo) (string, error) { + nlinkKeywordFunc = func(path string, info os.FileInfo, r io.Reader) (string, error) { return "", nil } - xattrKeywordFunc = func(path string, info os.FileInfo) (string, error) { + xattrKeywordFunc = func(path string, info os.FileInfo, r io.Reader) (string, error) { return "", nil } ) diff --git a/parse.go b/parse.go index 5155f74..77987c6 100644 --- a/parse.go +++ b/parse.go @@ -81,6 +81,7 @@ func ParseSpec(r io.Reader) (*DirectoryHierarchy, error) { e.Type = RelativeType } e.Keywords = f[1:] + // TODO: gather keywords if using tar stream e.Parent = creator.curDir for i := range e.Keywords { kv := KeyVal(e.Keywords[i]) diff --git a/tar.go b/tar.go new file mode 100644 index 0000000..b221e24 --- /dev/null +++ b/tar.go @@ -0,0 +1,376 @@ +package mtree + +import ( + "archive/tar" + "io" + "io/ioutil" + "os" + "path/filepath" + "strings" +) + +// Streamer creates a file hierarchy out of a tar stream +type Streamer interface { + io.ReadCloser + Hierarchy() (*DirectoryHierarchy, error) +} + +var tarDefaultSetKeywords = []string{"type=file", "flags=none", "mode=0664"} + +// NewTarStreamer streams a tar archive and creates a file hierarchy based off +// of the tar metadata headers +func NewTarStreamer(r io.Reader, keywords []string) Streamer { + pR, pW := io.Pipe() + ts := &tarStream{ + pipeReader: pR, + pipeWriter: pW, + creator: dhCreator{DH: &DirectoryHierarchy{}}, + teeReader: io.TeeReader(r, pW), + tarReader: tar.NewReader(pR), + keywords: keywords, + } + go ts.readHeaders() // I don't like this + return ts +} + +type tarStream struct { + creator dhCreator + pipeReader *io.PipeReader + pipeWriter *io.PipeWriter + teeReader io.Reader + tarReader *tar.Reader + keywords []string + err error +} + +func (ts *tarStream) readHeaders() { + // We have to start with the directory we're in, and anything beyond these + // items is determined at the time a tar is extracted. + rootComment := Entry{ + Raw: "# .", + Type: CommentType, + } + root := Entry{ + Name: ".", + Type: RelativeType, + Prev: &rootComment, + Set: &Entry{ + Name: "meta-set", + Type: SpecialType, + }, + } + metadataEntries := signatureEntries("") + for _, e := range metadataEntries { + e.Pos = len(ts.creator.DH.Entries) + ts.creator.DH.Entries = append(ts.creator.DH.Entries, e) + } + for { + hdr, err := ts.tarReader.Next() + if err != nil { + flatten(&root, ts) + ts.pipeReader.CloseWithError(err) + return + } + + // Because the content of the file may need to be read by several + // KeywordFuncs, it needs to be an io.Seeker as well. So, just reading from + // ts.tarReader is not enough. + tmpFile, err := ioutil.TempFile("", "ts.payload.") + if err != nil { + ts.pipeReader.CloseWithError(err) + return + } + // for good measure + if err := tmpFile.Chmod(0600); err != nil { + tmpFile.Close() + os.Remove(tmpFile.Name()) + ts.pipeReader.CloseWithError(err) + return + } + if _, err := io.Copy(tmpFile, ts.tarReader); err != nil { + tmpFile.Close() + os.Remove(tmpFile.Name()) + ts.pipeReader.CloseWithError(err) + return + } + defer tmpFile.Close() + defer os.Remove(tmpFile.Name()) + + // Alright, it's either file or directory + e := Entry{ + Name: filepath.Base(hdr.Name), + Type: RelativeType, + } + + // now collect keywords on the file + for _, keyword := range ts.keywords { + if keyword == "time" { + keyword = "tar_time" + } + if keyFunc, ok := KeywordFuncs[keyword]; ok { + // We can't extract directories on to disk, so "size" keyword + // is irrelevant for now + if hdr.FileInfo().IsDir() && keyword == "size" { + continue + } + + if string(hdr.Typeflag) == string('1') { + // TODO: get number of hardlinks for a file + } + val, err := keyFunc(hdr.Name, hdr.FileInfo(), tmpFile) + if err != nil { + ts.setErr(err) + } + // for good measure, check that we actually get a value for a keyword + if val != "" { + e.Keywords = append(e.Keywords, val) + } + + // don't forget to reset the reader + if _, err := tmpFile.Seek(0, 0); err != nil { + tmpFile.Close() + os.Remove(tmpFile.Name()) + ts.pipeReader.CloseWithError(err) + return + } + } + } + // collect meta-set keywords for a directory so that we can build the + // actual sets in `flatten` + if hdr.FileInfo().IsDir() { + s := Entry{ + Name: "meta-set", + Type: SpecialType, + } + for _, setKW := range SetKeywords { + if setKW == "time" { + setKW = "tar_time" + } + if keyFunc, ok := KeywordFuncs[setKW]; ok { + val, err := keyFunc(hdr.Name, hdr.FileInfo(), tmpFile) + if err != nil { + ts.setErr(err) + } + if val != "" { + s.Keywords = append(s.Keywords, val) + } + if _, err := tmpFile.Seek(0, 0); err != nil { + tmpFile.Close() + os.Remove(tmpFile.Name()) + ts.pipeReader.CloseWithError(err) + } + } + } + if filepath.Dir(filepath.Clean(hdr.Name)) == "." { + root.Set = &s + } else { + e.Set = &s + } + } + populateTree(&root, &e, hdr, ts) + } +} + +type relationship int + +const ( + unknownDir relationship = iota + sameDir + childDir + parentDir +) + +// populateTree creates a file tree hierarchy using an Entry's Parent and +// Children fields. When examining the Entry e to insert in the tree, we +// determine if the path to that Entry exists yet. If it does, insert it in the +// appropriate position in the tree. If not, create a path with "placeholder" +// directories, and then insert the Entry. populateTree does not consider +// symbolic links yet. +func populateTree(root, e *Entry, hdr *tar.Header, ts *tarStream) { + isDir := hdr.FileInfo().IsDir() + wd := filepath.Clean(hdr.Name) + + if !isDir { + // If entry is a file, we only want the directory it's in. + wd = filepath.Dir(wd) + } + if filepath.Dir(wd) == "." { + if isDir { + root.Keywords = e.Keywords + } else { + root.Children = append([]*Entry{e}, root.Children...) + e.Parent = root + } + return + } + + dirNames := strings.Split(wd, "/") + parent := root + for _, name := range dirNames[1:] { + if node := parent.Descend(name); node == nil { + // Entry for directory doesn't exist in tree relative to root + var newEntry *Entry + if isDir { + newEntry = e + } else { + newEntry = &Entry{ + Name: name, + Type: RelativeType, + } + } + newEntry.Parent = parent + parent.Children = append(parent.Children, newEntry) + parent = newEntry + } else { + // Entry for directory exists in tree, just keep going + parent = node + } + } + if !isDir { + parent.Children = append([]*Entry{e}, parent.Children...) + e.Parent = parent + } else { + commentEntry := Entry{ + Raw: "# " + e.Path(), + Type: CommentType, + } + e.Prev = &commentEntry + } +} + +// After constructing the tree from the tar stream, we want to "flatten" this +// tree by appending Entry's into ts.creator.DH.Entries in an appropriate +// manner to simplify writing the output with ts.creator.DH.WriteTo +// root: the "head" of the sub-tree to flatten +// ts : tarStream to keep track of Entry's +func flatten(root *Entry, ts *tarStream) { + if root.Prev != nil { + // root.Prev != nil implies root is a directory + ts.creator.DH.Entries = append(ts.creator.DH.Entries, + Entry{ + Type: BlankType, + Pos: len(ts.creator.DH.Entries), + }) + root.Prev.Pos = len(ts.creator.DH.Entries) + ts.creator.DH.Entries = append(ts.creator.DH.Entries, *root.Prev) + + // Check if we need a new set + if ts.creator.curSet == nil { + ts.creator.curSet = &Entry{ + Type: SpecialType, + Name: "/set", + Keywords: keywordSelector(append(tarDefaultSetKeywords, root.Set.Keywords...), ts.keywords), + Pos: len(ts.creator.DH.Entries), + } + ts.creator.DH.Entries = append(ts.creator.DH.Entries, *ts.creator.curSet) + } else { + needNewSet := false + for _, k := range root.Set.Keywords { + if !inSlice(k, ts.creator.curSet.Keywords) { + needNewSet = true + break + } + } + if needNewSet { + ts.creator.curSet = &Entry{ + Name: "/set", + Type: SpecialType, + Pos: len(ts.creator.DH.Entries), + Keywords: keywordSelector(append(tarDefaultSetKeywords, root.Set.Keywords...), ts.keywords), + } + ts.creator.DH.Entries = append(ts.creator.DH.Entries, *ts.creator.curSet) + } + } + } + root.Set = ts.creator.curSet + root.Keywords = setDifference(root.Keywords, ts.creator.curSet.Keywords) + root.Pos = len(ts.creator.DH.Entries) + ts.creator.DH.Entries = append(ts.creator.DH.Entries, *root) + + for _, c := range root.Children { + flatten(c, ts) + } + + if root.Prev != nil { + // Show a comment when stepping out + root.Prev.Pos = len(ts.creator.DH.Entries) + ts.creator.DH.Entries = append(ts.creator.DH.Entries, *root.Prev) + dotEntry := Entry{ + Type: DotDotType, + Name: "..", + Pos: len(ts.creator.DH.Entries), + } + ts.creator.DH.Entries = append(ts.creator.DH.Entries, dotEntry) + } +} + +// filter takes in a pointer to an Entry, and returns a slice of Entry's that +// satisfy the predicate p +func filter(root *Entry, p func(*Entry) bool) []Entry { + var validEntrys []Entry + if len(root.Children) > 0 || root.Prev != nil { + for _, c := range root.Children { + // if an Entry is a directory, filter the directory + if c.Prev != nil { + validEntrys = append(validEntrys, filter(c, p)...) + } + if p(c) { + if c.Prev == nil { + // prepend directories + validEntrys = append([]Entry{*c}, validEntrys...) + } else { + validEntrys = append(validEntrys, *c) + } + } + } + return validEntrys + } + return nil +} + +func setDifference(this, that []string) []string { + if len(this) == 0 { + return that + } + diff := []string{} + for _, kv := range this { + if !inSlice(kv, that) { + diff = append(diff, kv) + } + } + return diff +} + +func compareDir(curDir, prevDir string) relationship { + curDir = filepath.Clean(curDir) + prevDir = filepath.Clean(prevDir) + if curDir == prevDir { + return sameDir + } + if filepath.Dir(curDir) == prevDir { + return childDir + } + if curDir == filepath.Dir(prevDir) { + return parentDir + } + return unknownDir +} + +func (ts *tarStream) setErr(err error) { + ts.err = err +} + +func (ts *tarStream) Read(p []byte) (n int, err error) { + return ts.teeReader.Read(p) +} + +func (ts *tarStream) Close() error { + return ts.pipeReader.Close() +} + +func (ts *tarStream) Hierarchy() (*DirectoryHierarchy, error) { + if ts.err != nil && ts.err != io.EOF { + return nil, ts.err + } + return ts.creator.DH, nil +} diff --git a/tar_test.go b/tar_test.go new file mode 100644 index 0000000..38b8dd5 --- /dev/null +++ b/tar_test.go @@ -0,0 +1,175 @@ +package mtree + +import ( + "archive/tar" + "bytes" + "io" + "io/ioutil" + "os" + "testing" +) + +func ExampleStreamer() { + fh, err := os.Open("./testdata/test.tar") + if err != nil { + // handle error ... + } + str := NewTarStreamer(fh, nil) + if err := extractTar("/tmp/dir", str); err != nil { + // handle error ... + } + + dh, err := str.Hierarchy() + if err != nil { + // handle error ... + } + + res, err := Check("/tmp/dir/", dh, nil) + if err != nil { + // handle error ... + } + if len(res.Failures) > 0 { + // handle validation issue ... + } +} +func extractTar(root string, tr io.Reader) error { + return nil +} + +func TestTar(t *testing.T) { + /* + data, err := makeTarStream() + if err != nil { + t.Fatal(err) + } + buf := bytes.NewBuffer(data) + str := NewTarStreamer(buf, append(DefaultKeywords, "sha1")) + */ + /* + // open empty folder and check size. + fh, err := os.Open("./testdata/empty") + if err != nil { + t.Fatal(err) + } + log.Println(fh.Stat()) + fh.Close() */ + fh, err := os.Open("./testdata/test.tar") + if err != nil { + t.Fatal(err) + } + str := NewTarStreamer(fh, append(DefaultKeywords, "sha1")) + + if _, err := io.Copy(ioutil.Discard, str); err != nil && err != io.EOF { + t.Fatal(err) + } + if err := str.Close(); err != nil { + t.Fatal(err) + } + defer fh.Close() + + // get DirectoryHierarcy struct from walking the tar archive + tdh, err := str.Hierarchy() + if err != nil { + t.Fatal(err) + } + if tdh == nil { + t.Fatal("expected a DirectoryHierarchy struct, but got nil") + } + + fh, err = os.Create("./testdata/test.mtree") + if err != nil { + t.Fatal(err) + } + defer os.Remove("./testdata/test.mtree") + + // put output of tar walk into test.mtree + _, err = tdh.WriteTo(fh) + if err != nil { + t.Fatal(err) + } + fh.Close() + + // now simulate gomtree -T testdata/test.tar -f testdata/test.mtree + fh, err = os.Open("./testdata/test.mtree") + if err != nil { + t.Fatal(err) + } + defer fh.Close() + + dh, err := ParseSpec(fh) + if err != nil { + t.Fatal(err) + } + + res, err := TarCheck(tdh, dh, append(DefaultKeywords, "sha1")) + + if err != nil { + t.Fatal(err) + } + + // print any failures, and then call t.Fatal once all failures/extra/missing + // are outputted + if res != nil { + errors := "" + switch { + case len(res.Failures) > 0: + for _, f := range res.Failures { + t.Errorf("%s\n", f) + } + errors += "Keyword validation errors\n" + case len(res.Missing) > 0: + for _, m := range res.Missing { + t.Errorf("Missing file: %s\n", m.Path()) + } + errors += "Missing files not expected for this test\n" + case len(res.Extra) > 0: + for _, e := range res.Extra { + t.Errorf("Extra file: %s\n", e.Path()) + } + errors += "Extra files not expected for this test\n" + } + if errors != "" { + t.Fatal(errors) + } + } +} + +// minimal tar archive stream that mimics what is in ./testdata/test.tar +func makeTarStream() ([]byte, error) { + buf := new(bytes.Buffer) + + // Create a new tar archive. + tw := tar.NewWriter(buf) + + // Add some files to the archive. + var files = []struct { + Name, Body string + Mode int64 + Type byte + Xattrs map[string]string + }{ + {"x/", "", 0755, '5', nil}, + {"x/files", "howdy\n", 0644, '0', nil}, + } + for _, file := range files { + hdr := &tar.Header{ + Name: file.Name, + Mode: file.Mode, + Size: int64(len(file.Body)), + Xattrs: file.Xattrs, + } + if err := tw.WriteHeader(hdr); err != nil { + return nil, err + } + if len(file.Body) > 0 { + if _, err := tw.Write([]byte(file.Body)); err != nil { + return nil, err + } + } + } + // Make sure to check the error on Close. + if err := tw.Close(); err != nil { + return nil, err + } + return buf.Bytes(), nil +} diff --git a/testdata/test.tar b/testdata/test.tar new file mode 100644 index 0000000..6ae9b22 Binary files /dev/null and b/testdata/test.tar differ diff --git a/walk.go b/walk.go index 80ea4ea..ca1a706 100644 --- a/walk.go +++ b/walk.go @@ -2,6 +2,7 @@ package mtree import ( "fmt" + "io" "os" "os/user" "path/filepath" @@ -14,13 +15,6 @@ import ( // returns true, then the path is not included in the spec. type ExcludeFunc func(path string, info os.FileInfo) bool -type dhCreator struct { - DH *DirectoryHierarchy - curSet *Entry - curDir *Entry - curEnt *Entry -} - var defaultSetKeywords = []string{"type=file", "nlink=1", "flags=none", "mode=0664"} // Walk from root directory and assemble the DirectoryHierarchy. excludes @@ -76,9 +70,28 @@ func Walk(root string, exlcudes []ExcludeFunc, keywords []string) (*DirectoryHie Keywords: keywordSelector(defaultSetKeywords, keywords), } for _, keyword := range SetKeywords { - if str, err := KeywordFuncs[keyword](path, info); err == nil && str != "" { - e.Keywords = append(e.Keywords, str) - } else if err != nil { + err := func() error { + var r io.Reader + if info.Mode().IsRegular() { + fh, err := os.Open(path) + if err != nil { + return err + } + defer fh.Close() + r = fh + } + keywordFunc, ok := KeywordFuncs[keyword] + if !ok { + return fmt.Errorf("Unknown keyword %q for file %q", keyword, path) + } + if str, err := keywordFunc(path, info, r); err == nil && str != "" { + e.Keywords = append(e.Keywords, str) + } else if err != nil { + return err + } + return nil + }() + if err != nil { return err } } @@ -88,9 +101,30 @@ func Walk(root string, exlcudes []ExcludeFunc, keywords []string) (*DirectoryHie // check the attributes of the /set keywords and re-set if changed klist := []string{} for _, keyword := range SetKeywords { - if str, err := KeywordFuncs[keyword](path, info); err == nil && str != "" { - klist = append(klist, str) - } else if err != nil { + err := func() error { + var r io.Reader + if info.Mode().IsRegular() { + fh, err := os.Open(path) + if err != nil { + return err + } + defer fh.Close() + r = fh + } + keywordFunc, ok := KeywordFuncs[keyword] + if !ok { + return fmt.Errorf("Unknown keyword %q for file %q", keyword, path) + } + str, err := keywordFunc(path, info, r) + if err != nil { + return err + } + if str != "" { + klist = append(klist, str) + } + return nil + }() + if err != nil { return err } } @@ -122,11 +156,30 @@ func Walk(root string, exlcudes []ExcludeFunc, keywords []string) (*DirectoryHie Parent: creator.curDir, } for _, keyword := range keywords { - if str, err := KeywordFuncs[keyword](path, info); err == nil && str != "" { - if !inSlice(str, creator.curSet.Keywords) { + err := func() error { + var r io.Reader + if info.Mode().IsRegular() { + fh, err := os.Open(path) + if err != nil { + return err + } + defer fh.Close() + r = fh + } + keywordFunc, ok := KeywordFuncs[keyword] + if !ok { + return fmt.Errorf("Unknown keyword %q for file %q", keyword, path) + } + str, err := keywordFunc(path, info, r) + if err != nil { + return err + } + if str != "" && !inSlice(str, creator.curSet.Keywords) { e.Keywords = append(e.Keywords, str) } - } else if err != nil { + return nil + }() + if err != nil { return err } } @@ -245,7 +298,7 @@ func readOrderedDirNames(dirname string) ([]string, error) { return append(names, dirnames...), nil } -// signatureEntries is helper function that returns a slice of Entry's +// signatureEntries is a simple helper function that returns a slice of Entry's // that describe the metadata signature about the host. Items like date, user, // machine, and tree (which is specified by argument `root`), are considered. // These Entry's construct comments in the mtree specification, so if there is