diff --git a/check.go b/check.go index d902ad6..3f2a99a 100644 --- a/check.go +++ b/check.go @@ -10,6 +10,8 @@ import ( type Result struct { // list of any failures in the Check Failures []Failure `json:"failures"` + Missing []Entry + Extra []Entry } // Failure of a particular keyword for a path @@ -90,3 +92,99 @@ func Check(root string, dh *DirectoryHierarchy, keywords []string) (*Result, err } return &result, nil } + +// TarCheck is the tar equivalent of checking a file hierarchy spec against a tar stream to +// determine if files have been changed. +func TarCheck(tarDH, dh *DirectoryHierarchy, keywords []string) (*Result, error) { + var result Result + var err error + var tarRoot *Entry + + for _, e := range tarDH.Entries { + if e.Name == "." { + tarRoot = &e + break + } + } + tarRoot.Next = &Entry{ + Name: "seen", + Type: CommentType, + } + curDir := tarRoot + creator := dhCreator{DH: dh} + sort.Sort(byPos(creator.DH.Entries)) + + var outOfTree bool + for i, e := range creator.DH.Entries { + switch e.Type { + case SpecialType: + if e.Name == "/set" { + creator.curSet = &creator.DH.Entries[i] + } else if e.Name == "/unset" { + creator.curSet = nil + } + case RelativeType, FullType: + if outOfTree { + return &result, fmt.Errorf("No parent node from %s", e.Path()) + } + // TODO: handle the case where "." is not the first Entry to be found + tarEntry := curDir.Descend(e.Name) + if tarEntry == nil { + result.Missing = append(result.Missing, e) + continue + } + + tarEntry.Next = &Entry{ + Type: CommentType, + Name: "seen", + } + + // expected values from file hierarchy spec + var kvs KeyVals + if creator.curSet != nil { + kvs = MergeSet(creator.curSet.Keywords, e.Keywords) + } else { + kvs = NewKeyVals(e.Keywords) + } + + // actual + var tarkvs KeyVals + if tarEntry.Set != nil { + tarkvs = MergeSet(tarEntry.Set.Keywords, tarEntry.Keywords) + } else { + tarkvs = NewKeyVals(tarEntry.Keywords) + } + + for _, kv := range kvs { + if _, ok := KeywordFuncs[kv.Keyword()]; !ok { + return nil, fmt.Errorf("Unknown keyword %q for file %q", kv.Keyword(), e.Path()) + } + if keywords != nil && !inSlice(kv.Keyword(), keywords) { + continue + } + if tarkv := tarkvs.Has(kv.Keyword()); tarkv != emptyKV { + if string(tarkv) != string(kv) { + failure := Failure{Path: tarEntry.Path(), Keyword: kv.Keyword(), Expected: kv.Value(), Got: tarkv.Value()} + result.Failures = append(result.Failures, failure) + } + } + } + // Step into a directory + if tarEntry.Prev != nil { + curDir = tarEntry + } + case DotDotType: + if outOfTree { + return &result, fmt.Errorf("No parent node.") + } + curDir = curDir.Ascend() + if curDir == nil { + outOfTree = true + } + } + } + result.Extra = filter(tarRoot, func(e *Entry) bool { + return e.Next == nil + }) + return &result, err +} diff --git a/cmd/gomtree/main.go b/cmd/gomtree/main.go index 3352ba6..913428b 100644 --- a/cmd/gomtree/main.go +++ b/cmd/gomtree/main.go @@ -5,6 +5,8 @@ import ( "encoding/json" "flag" "fmt" + "io" + "io/ioutil" "log" "os" "strings" @@ -20,6 +22,7 @@ var ( flUseKeywords = flag.String("k", "", "Use the specified (delimited by comma or space) keywords as the current set of keywords") flListKeywords = flag.Bool("list-keywords", false, "List the keywords available") flResultFormat = flag.String("result-format", "bsd", "output the validation results using the given format (bsd, json, path)") + flTar = flag.String("T", "", "use tar archive to create or validate a directory hierarchy spec") ) var formats = map[string]func(*mtree.Result) string{ @@ -123,19 +126,60 @@ func main() { rootPath = *flPath } - // -c - if *flCreate { - // create a directory hierarchy - dh, err := mtree.Walk(rootPath, nil, currentKeywords) + // -T + var tdh *mtree.DirectoryHierarchy + if *flTar != "" { + fh, err := os.Open(*flTar) if err != nil { log.Println(err) isErr = true return } - dh.WriteTo(os.Stdout) - } else if dh != nil { + ts := mtree.NewTarStreamer(fh, currentKeywords) + + if _, err := io.Copy(ioutil.Discard, ts); err != nil && err != io.EOF { + log.Println(err) + isErr = true + return + } + if err := ts.Close(); err != nil { + log.Println(err) + isErr = true + return + } + defer fh.Close() + tdh, err = ts.Hierarchy() + if err != nil { + log.Println(err) + isErr = true + return + } + } + // -c + if *flCreate { + // create a directory hierarchy + // with a tar stream + if tdh != nil { + tdh.WriteTo(os.Stdout) + } else { + // with a root directory + dh, err := mtree.Walk(rootPath, nil, currentKeywords) + if err != nil { + log.Println(err) + isErr = true + return + } + dh.WriteTo(os.Stdout) + } + } else if tdh != nil || dh != nil { + var res *mtree.Result + var err error // else this is a validation - res, err := mtree.Check(rootPath, dh, currentKeywords) + if *flTar != "" { + res, err = mtree.TarCheck(tdh, dh, currentKeywords) + } else { + res, err = mtree.Check(rootPath, dh, currentKeywords) + } if err != nil { log.Println(err) isErr = true @@ -148,13 +192,31 @@ func main() { log.Println(err) isErr = true return + if res != nil { + if len(res.Failures) > 0 { + defer os.Exit(1) + for _, failure := range res.Failures { + fmt.Println(failure) + } } + if len(res.Extra) > 0 { + defer os.Exit(1) + for _, extra := range res.Extra { + fmt.Printf("%s extra\n", extra.Path()) + } + } + if len(res.Missing) > 0 { + defer os.Exit(1) + for _, missing := range res.Missing { + fmt.Printf("%s missing\n", missing.Path()) + } + } + } else { + log.Println("neither validating or creating a manifest. Please provide additional arguments") + isErr = true + defer os.Exit(1) + return } - } else { - log.Println("neither validating or creating a manifest. Please provide additional arguments") - isErr = true - defer os.Exit(1) - return } } diff --git a/entry.go b/entry.go index 32b7678..8fe5027 100644 --- a/entry.go +++ b/entry.go @@ -25,6 +25,29 @@ type Entry struct { Type EntryType } +// Descend searches thru an Entry's children to find the Entry associated with +// `filename`. Directories are stored at the end of an Entry's children so do a +// traverse backwards. If you descend to a "." +func (e Entry) Descend(filename string) *Entry { + if filename == "." || filename == "" { + return &e + } + numChildren := len(e.Children) + for i := range e.Children { + c := e.Children[numChildren-1-i] + if c.Name == filename { + return c + } + } + return nil +} + +// Ascend gets the parent of an Entry. Serves mainly to maintain readability +// when traversing up and down an Entry tree +func (e Entry) Ascend() *Entry { + return e.Parent +} + // Path provides the full path of the file, despite RelativeType or FullType func (e Entry) Path() string { if e.Parent == nil || e.Type == FullType { @@ -43,7 +66,6 @@ func (e Entry) String() string { if e.Type == DotDotType { return e.Name } - // TODO(vbatts) if type is RelativeType and a keyword of not type=dir if e.Type == SpecialType || e.Type == FullType || inSlice("type=dir", e.Keywords) { return fmt.Sprintf("%s %s", e.Name, strings.Join(e.Keywords, " ")) } diff --git a/parse.go b/parse.go index 5155f74..77987c6 100644 --- a/parse.go +++ b/parse.go @@ -81,6 +81,7 @@ func ParseSpec(r io.Reader) (*DirectoryHierarchy, error) { e.Type = RelativeType } e.Keywords = f[1:] + // TODO: gather keywords if using tar stream e.Parent = creator.curDir for i := range e.Keywords { kv := KeyVal(e.Keywords[i]) diff --git a/tar.go b/tar.go index 0c83ef1..5be607a 100644 --- a/tar.go +++ b/tar.go @@ -4,19 +4,19 @@ import ( "archive/tar" "io" "io/ioutil" - "log" "os" "path/filepath" "strings" ) -// Streamer interface that wraps an io.ReadCloser with a function that will -// return it's Hierarchy +// Streamer creates a file hierarchy out of a tar stream type Streamer interface { io.ReadCloser Hierarchy() (*DirectoryHierarchy, error) } +var tarDefaultSetKeywords = []string{"type=file", "flags=none", "mode=0664"} + // NewTarStreamer streams a tar archive and creates a file hierarchy based off // of the tar metadata headers func NewTarStreamer(r io.Reader, keywords []string) Streamer { @@ -46,15 +46,28 @@ type tarStream struct { func (ts *tarStream) readHeaders() { // We have to start with the directory we're in, and anything beyond these // items is determined at the time a tar is extracted. - e := Entry{ - Name: ".", - Keywords: []string{"size=0", "type=dir"}, + rootComment := Entry{ + Raw: "# .", + Type: CommentType, + } + root := Entry{ + Name: ".", + Type: RelativeType, + Prev: &rootComment, + Set: &Entry{ + Name: "meta-set", + Type: SpecialType, + }, + } + metadataEntries := signatureEntries("") + for _, e := range metadataEntries { + e.Pos = len(ts.creator.DH.Entries) + ts.creator.DH.Entries = append(ts.creator.DH.Entries, e) } - ts.creator.curDir = &e - ts.creator.DH.Entries = append(ts.creator.DH.Entries, e) for { hdr, err := ts.tarReader.Next() if err != nil { + flatten(&root, ts) ts.pipeReader.CloseWithError(err) return } @@ -84,17 +97,30 @@ func (ts *tarStream) readHeaders() { // Alright, it's either file or directory e := Entry{ Name: filepath.Base(hdr.Name), - Pos: len(ts.creator.DH.Entries), Type: RelativeType, } + // now collect keywords on the file for _, keyword := range ts.keywords { if keyFunc, ok := KeywordFuncs[keyword]; ok { + // We can't extract directories on to disk, so "size" keyword + // is irrelevant for now + if hdr.FileInfo().IsDir() && keyword == "size" { + continue + } + + if string(hdr.Typeflag) == string('1') { + // TODO: get number of hardlinks for a file + } val, err := keyFunc(hdr.Name, hdr.FileInfo(), tmpFile) if err != nil { ts.setErr(err) } - e.Keywords = append(e.Keywords, val) + + // for good measure, check that we actually get a value for a keyword + if val != "" { + e.Keywords = append(e.Keywords, val) + } // don't forget to reset the reader if _, err := tmpFile.Seek(0, 0); err != nil { @@ -105,39 +131,40 @@ func (ts *tarStream) readHeaders() { } } } + + // collect meta-set keywords for a directory so that we can build the + // actual sets in `flatten` + if hdr.FileInfo().IsDir() { + s := Entry{ + Name: "meta-set", + Type: SpecialType, + } + for _, setKW := range SetKeywords { + if keyFunc, ok := KeywordFuncs[setKW]; ok { + val, err := keyFunc(hdr.Name, hdr.FileInfo(), tmpFile) + if err != nil { + ts.setErr(err) + } + if val != "" { + s.Keywords = append(s.Keywords, val) + } + if _, err := tmpFile.Seek(0, 0); err != nil { + tmpFile.Close() + os.Remove(tmpFile.Name()) + ts.pipeReader.CloseWithError(err) + } + } + } + if filepath.Dir(filepath.Clean(hdr.Name)) == "." { + root.Set = &s + } else { + e.Set = &s + } + } + populateTree(&root, &e, hdr, ts) + tmpFile.Close() os.Remove(tmpFile.Name()) - - // compare directories, to determine parent of the current entry - cd := compareDir(filepath.Dir(hdr.Name), ts.creator.curDir.Path()) - switch { - case cd == sameDir: - e.Parent = ts.creator.curDir - if e.Parent != nil { - e.Parent.Children = append(e.Parent.Children, &e) - } - case cd == parentDir: - e.Parent = ts.creator.curDir.Parent - if e.Parent != nil { - e.Parent.Children = append(e.Parent.Children, &e) - } - } - - if hdr.FileInfo().IsDir() { - ts.creator.curDir = &e - } - // TODO getting the parent child relationship of these entries! - if hdr.FileInfo().IsDir() { - log.Println(strings.Split(hdr.Name, "/"), strings.Split(ts.creator.curDir.Path(), "/")) - } - - ts.creator.DH.Entries = append(ts.creator.DH.Entries, e) - - // Now is the wacky part of building out the entries. Since we can not - // control how the archive was assembled, can only take in the order given. - // Using `/set` will be tough. Hopefully i can do the directory stepping - // with relative paths, but even then I may get a new directory, and not - // the files first, but its directories first. :-\ } } @@ -150,6 +177,167 @@ const ( parentDir ) +// populateTree creates a file tree hierarchy using an Entry's Parent and +// Children fields. When examining the Entry e to insert in the tree, we +// determine if the path to that Entry exists yet. If it does, insert it in the +// appropriate position in the tree. If not, create a path with "placeholder" +// directories, and then insert the Entry. populateTree does not consider +// symbolic links yet. +func populateTree(root, e *Entry, hdr *tar.Header, ts *tarStream) { + isDir := hdr.FileInfo().IsDir() + wd := filepath.Clean(hdr.Name) + + if !isDir { + // If entry is a file, we only want the directory it's in. + wd = filepath.Dir(wd) + } + if filepath.Dir(wd) == "." { + if isDir { + root.Keywords = e.Keywords + } else { + root.Children = append([]*Entry{e}, root.Children...) + e.Parent = root + } + return + } + + dirNames := strings.Split(wd, "/") + parent := root + for _, name := range dirNames[1:] { + if node := parent.Descend(name); node == nil { + // Entry for directory doesn't exist in tree relative to root + var newEntry *Entry + if isDir { + newEntry = e + } else { + newEntry = &Entry{ + Name: name, + Type: RelativeType, + } + } + newEntry.Parent = parent + parent.Children = append(parent.Children, newEntry) + parent = newEntry + } else { + // Entry for directory exists in tree, just keep going + parent = node + } + } + if !isDir { + parent.Children = append([]*Entry{e}, parent.Children...) + e.Parent = parent + } else { + commentEntry := Entry{ + Raw: "# " + e.Path(), + Type: CommentType, + } + e.Prev = &commentEntry + } +} + +// After constructing the tree from the tar stream, we want to "flatten" this +// tree by appending Entry's into ts.creator.DH.Entries in an appropriate +// manner to simplify writing the output with ts.creator.DH.WriteTo +// root: the "head" of the sub-tree to flatten +// ts : tarStream to keep track of Entry's +func flatten(root *Entry, ts *tarStream) { + if root.Prev != nil { + // root.Prev != nil implies root is a directory + ts.creator.DH.Entries = append(ts.creator.DH.Entries, + Entry{ + Type: BlankType, + Pos: len(ts.creator.DH.Entries), + }) + root.Prev.Pos = len(ts.creator.DH.Entries) + ts.creator.DH.Entries = append(ts.creator.DH.Entries, *root.Prev) + + // Check if we need a new set + if ts.creator.curSet == nil { + ts.creator.curSet = &Entry{ + Type: SpecialType, + Name: "/set", + Keywords: keywordSelector(append(tarDefaultSetKeywords, root.Set.Keywords...), ts.keywords), + Pos: len(ts.creator.DH.Entries), + } + ts.creator.DH.Entries = append(ts.creator.DH.Entries, *ts.creator.curSet) + } else { + needNewSet := false + for _, k := range root.Set.Keywords { + if !inSlice(k, ts.creator.curSet.Keywords) { + needNewSet = true + break + } + } + if needNewSet { + ts.creator.curSet = &Entry{ + Name: "/set", + Type: SpecialType, + Pos: len(ts.creator.DH.Entries), + Keywords: keywordSelector(append(tarDefaultSetKeywords, root.Set.Keywords...), ts.keywords), + } + ts.creator.DH.Entries = append(ts.creator.DH.Entries, *ts.creator.curSet) + } + } + } + root.Set = ts.creator.curSet + root.Keywords = setDifference(root.Keywords, ts.creator.curSet.Keywords) + root.Pos = len(ts.creator.DH.Entries) + ts.creator.DH.Entries = append(ts.creator.DH.Entries, *root) + + for _, c := range root.Children { + flatten(c, ts) + } + + if root.Prev != nil { + // Show a comment when stepping out + root.Prev.Pos = len(ts.creator.DH.Entries) + ts.creator.DH.Entries = append(ts.creator.DH.Entries, *root.Prev) + dotEntry := Entry{ + Type: DotDotType, + Name: "..", + Pos: len(ts.creator.DH.Entries), + } + ts.creator.DH.Entries = append(ts.creator.DH.Entries, dotEntry) + } +} + +// filter takes in a pointer to an Entry, and returns a slice of Entry's that +// satisfy the predicate p +func filter(root *Entry, p func(*Entry) bool) []Entry { + var validEntrys []Entry + if len(root.Children) > 0 || root.Prev != nil { + for _, c := range root.Children { + // if an Entry is a directory, filter the directory + if c.Prev != nil { + validEntrys = append(validEntrys, filter(c, p)...) + } + if p(c) { + if c.Prev == nil { + // prepend directories + validEntrys = append([]Entry{*c}, validEntrys...) + } else { + validEntrys = append(validEntrys, *c) + } + } + } + return validEntrys + } + return nil +} + +func setDifference(this, that []string) []string { + if len(this) == 0 { + return that + } + diff := []string{} + for _, kv := range this { + if !inSlice(kv, that) { + diff = append(diff, kv) + } + } + return diff +} + func compareDir(curDir, prevDir string) relationship { curDir = filepath.Clean(curDir) prevDir = filepath.Clean(prevDir) diff --git a/tar_test.go b/tar_test.go index f1d011d..4e940b2 100644 --- a/tar_test.go +++ b/tar_test.go @@ -3,6 +3,7 @@ package mtree import ( "archive/tar" "bytes" + "fmt" "io" "io/ioutil" "os" @@ -45,6 +46,14 @@ func TestTar(t *testing.T) { buf := bytes.NewBuffer(data) str := NewTarStreamer(buf, append(DefaultKeywords, "sha1")) */ + /* + // open empty folder and check size. + fh, err := os.Open("./testdata/empty") + if err != nil { + t.Fatal(err) + } + log.Println(fh.Stat()) + fh.Close() */ fh, err := os.Open("./testdata/test.tar") if err != nil { t.Fatal(err) @@ -59,23 +68,71 @@ func TestTar(t *testing.T) { } defer fh.Close() - /* - fi, err := fh.Stat() - if err != nil { - t.Fatal(err) - } - if i != fi.Size() { - t.Errorf("expected length %d; got %d", fi.Size(), i) - } - */ - dh, err := str.Hierarchy() + // get DirectoryHierarcy struct from walking the tar archive + tdh, err := str.Hierarchy() if err != nil { t.Fatal(err) } - if dh == nil { + if tdh == nil { t.Fatal("expected a DirectoryHierarchy struct, but got nil") } - //dh.WriteTo(os.Stdout) + + fh, err = os.Create("./testdata/test.mtree") + if err != nil { + t.Fatal(err) + } + defer os.Remove("./testdata/test.mtree") + + // put output of tar walk into test.mtree + _, err = tdh.WriteTo(fh) + if err != nil { + t.Fatal(err) + } + fh.Close() + + // now simulate gomtree -T testdata/test.tar -f testdata/test.mtree + fh, err = os.Open("./testdata/test.mtree") + if err != nil { + t.Fatal(err) + } + defer fh.Close() + + dh, err := ParseSpec(fh) + if err != nil { + t.Fatal(err) + } + + res, err := TarCheck(tdh, dh, append(DefaultKeywords, "sha1")) + + if err != nil { + t.Fatal(err) + } + + // print any failures, and then call t.Fatal once all failures/extra/missing + // are outputted + if res != nil { + errors := "" + switch { + case len(res.Failures) > 0: + for _, f := range res.Failures { + fmt.Printf("%s\n", f) + } + errors += "Keyword validation errors\n" + case len(res.Missing) > 0: + for _, m := range res.Missing { + fmt.Printf("Missing file: %s\n", m.Path()) + } + errors += "Missing files not expected for this test\n" + case len(res.Extra) > 0: + for _, e := range res.Extra { + fmt.Printf("Extra file: %s\n", e.Path()) + } + errors += "Extra files not expected for this test\n" + } + if errors != "" { + t.Fatal(errors) + } + } } // minimal tar archive stream that mimics what is in ./testdata/test.tar diff --git a/testdata/test.tar b/testdata/test.tar index cc3de57..6ae9b22 100644 Binary files a/testdata/test.tar and b/testdata/test.tar differ