diff --git a/check.go b/check.go index 4adfe73..5586973 100644 --- a/check.go +++ b/check.go @@ -67,7 +67,6 @@ func Check(root string, dh *DirectoryHierarchy, keywords []string) (*Result, err } else { kvs = NewKeyVals(e.Keywords) } - for _, kv := range kvs { kw := kv.Keyword() // 'tar_time' keyword evaluation wins against 'time' keyword evaluation diff --git a/entry.go b/entry.go index c6f5bec..8273e8d 100644 --- a/entry.go +++ b/entry.go @@ -42,6 +42,23 @@ func (e Entry) Descend(filename string) *Entry { return nil } +// Find is a wrapper around Descend that takes in a whole string path and tries +// to find that Entry +func (e Entry) Find(filepath string) *Entry { + resultnode := &e + for _, path := range strings.Split(filepath, "/") { + encoded, err := Vis(path) + if err != nil { + return nil + } + resultnode = resultnode.Descend(encoded) + if resultnode == nil { + return nil + } + } + return resultnode +} + // Ascend gets the parent of an Entry. Serves mainly to maintain readability // when traversing up and down an Entry tree func (e Entry) Ascend() *Entry { diff --git a/tar.go b/tar.go index 6c4c74a..1d63dbe 100644 --- a/tar.go +++ b/tar.go @@ -5,6 +5,7 @@ import ( "fmt" "io" "io/ioutil" + "log" "os" "path/filepath" "strings" @@ -29,6 +30,7 @@ func NewTarStreamer(r io.Reader, keywords []string) Streamer { teeReader: io.TeeReader(r, pW), tarReader: tar.NewReader(pR), keywords: keywords, + hardlinks: map[string][]string{}, } go ts.readHeaders() @@ -37,6 +39,7 @@ func NewTarStreamer(r io.Reader, keywords []string) Streamer { type tarStream struct { root *Entry + hardlinks map[string][]string creator dhCreator pipeReader *io.PipeReader pipeWriter *io.PipeWriter @@ -118,6 +121,22 @@ func (ts *tarStream) readHeaders() { Type: RelativeType, } + // Keep track of which files are hardlinks so we can resolve them later + if hdr.Typeflag == tar.TypeLink { + linkFunc := KeywordFuncs["link"] + kv, err := linkFunc(hdr.Name, hdr.FileInfo(), nil) + if err != nil { + log.Println(err) + break + } + linkname := KeyVal(kv).Value() + if _, ok := ts.hardlinks[linkname]; !ok { + ts.hardlinks[linkname] = []string{hdr.Name} + } else { + ts.hardlinks[linkname] = append(ts.hardlinks[linkname], hdr.Name) + } + } + // now collect keywords on the file for _, keyword := range ts.keywords { if keyFunc, ok := KeywordFuncs[keyword]; ok { @@ -332,6 +351,39 @@ func flatten(root *Entry, creator *dhCreator, keywords []string) { return } +// resolveHardlinks goes through an Entry tree, and finds the Entry's associated +// with hardlinks and fills them in with the actual data from the base file. +func resolveHardlinks(root *Entry, hardlinks map[string][]string, countlinks bool) { + originals := make(map[string]*Entry) + for base, links := range hardlinks { + var basefile *Entry + if seen, ok := originals[base]; !ok { + basefile = root.Find(base) + if basefile == nil { + log.Printf("%s does not exist in this tree\n", base) + continue + } + originals[base] = basefile + } else { + basefile = seen + } + for _, link := range links { + linkfile := root.Find(link) + if linkfile == nil { + log.Printf("%s does not exist in this tree\n", link) + continue + } + linkfile.Keywords = basefile.Keywords + if countlinks { + linkfile.Keywords = append(linkfile.Keywords, fmt.Sprintf("nlink=%d", len(links)+1)) + } + } + if countlinks { + basefile.Keywords = append(basefile.Keywords, fmt.Sprintf("nlink=%d", len(links)+1)) + } + } +} + // filter takes in a pointer to an Entry, and returns a slice of Entry's that // satisfy the predicate p func filter(root *Entry, p func(*Entry) bool) []Entry { @@ -415,6 +467,7 @@ func (ts *tarStream) Hierarchy() (*DirectoryHierarchy, error) { if ts.root == nil { return nil, fmt.Errorf("root Entry not found, nothing to flatten") } + resolveHardlinks(ts.root, ts.hardlinks, inSlice("nlink", ts.keywords)) flatten(ts.root, &ts.creator, ts.keywords) return ts.creator.DH, nil } diff --git a/tar_test.go b/tar_test.go index b9e5171..5f547a7 100644 --- a/tar_test.go +++ b/tar_test.go @@ -320,6 +320,44 @@ func TestTreeTraversal(t *testing.T) { } } +func TestHardlinks(t *testing.T) { + fh, err := os.Open("./testdata/hardlinks.tar") + if err != nil { + t.Fatal(err) + } + str := NewTarStreamer(fh, append(DefaultTarKeywords, "nlink")) + + if _, err = io.Copy(ioutil.Discard, str); err != nil && err != io.EOF { + t.Fatal(err) + } + if err = str.Close(); err != nil { + t.Fatal(err) + } + + fh.Close() + tdh, err := str.Hierarchy() + + if err != nil { + t.Fatal(err) + } + foundnlink := false + for _, e := range tdh.Entries { + if e.Type == RelativeType { + for _, kv := range e.Keywords { + if KeyVal(kv).Keyword() == "nlink" { + foundnlink = true + if KeyVal(kv).Value() != "3" { + t.Errorf("expected to have 3 hardlinks for %s", e.Name) + } + } + } + } + } + if !foundnlink { + t.Errorf("nlink expected to be evaluated") + } +} + // minimal tar archive stream that mimics what is in ./testdata/test.tar func makeTarStream() ([]byte, error) { buf := new(bytes.Buffer) diff --git a/testdata/hardlinks.tar b/testdata/hardlinks.tar new file mode 100644 index 0000000..f341ac2 Binary files /dev/null and b/testdata/hardlinks.tar differ