tar: resolve hardlinks when streaming archive

Fill in the data of the Entry with the data of the
file that a hardlink actually represents.

Signed-off-by: Stephen Chung <schung@redhat.com>
This commit is contained in:
Stephen Chung 2016-08-10 11:40:47 -04:00
parent ea6c6eff1b
commit 5837d00b07
5 changed files with 108 additions and 1 deletions

View File

@ -67,7 +67,6 @@ func Check(root string, dh *DirectoryHierarchy, keywords []string) (*Result, err
} else {
kvs = NewKeyVals(e.Keywords)
}
for _, kv := range kvs {
kw := kv.Keyword()
// 'tar_time' keyword evaluation wins against 'time' keyword evaluation

View File

@ -42,6 +42,23 @@ func (e Entry) Descend(filename string) *Entry {
return nil
}
// Find is a wrapper around Descend that takes in a whole string path and tries
// to find that Entry
func (e Entry) Find(filepath string) *Entry {
resultnode := &e
for _, path := range strings.Split(filepath, "/") {
encoded, err := Vis(path)
if err != nil {
return nil
}
resultnode = resultnode.Descend(encoded)
if resultnode == nil {
return nil
}
}
return resultnode
}
// Ascend gets the parent of an Entry. Serves mainly to maintain readability
// when traversing up and down an Entry tree
func (e Entry) Ascend() *Entry {

53
tar.go
View File

@ -5,6 +5,7 @@ import (
"fmt"
"io"
"io/ioutil"
"log"
"os"
"path/filepath"
"strings"
@ -29,6 +30,7 @@ func NewTarStreamer(r io.Reader, keywords []string) Streamer {
teeReader: io.TeeReader(r, pW),
tarReader: tar.NewReader(pR),
keywords: keywords,
hardlinks: map[string][]string{},
}
go ts.readHeaders()
@ -37,6 +39,7 @@ func NewTarStreamer(r io.Reader, keywords []string) Streamer {
type tarStream struct {
root *Entry
hardlinks map[string][]string
creator dhCreator
pipeReader *io.PipeReader
pipeWriter *io.PipeWriter
@ -118,6 +121,22 @@ func (ts *tarStream) readHeaders() {
Type: RelativeType,
}
// Keep track of which files are hardlinks so we can resolve them later
if hdr.Typeflag == tar.TypeLink {
linkFunc := KeywordFuncs["link"]
kv, err := linkFunc(hdr.Name, hdr.FileInfo(), nil)
if err != nil {
log.Println(err)
break
}
linkname := KeyVal(kv).Value()
if _, ok := ts.hardlinks[linkname]; !ok {
ts.hardlinks[linkname] = []string{hdr.Name}
} else {
ts.hardlinks[linkname] = append(ts.hardlinks[linkname], hdr.Name)
}
}
// now collect keywords on the file
for _, keyword := range ts.keywords {
if keyFunc, ok := KeywordFuncs[keyword]; ok {
@ -332,6 +351,39 @@ func flatten(root *Entry, creator *dhCreator, keywords []string) {
return
}
// resolveHardlinks goes through an Entry tree, and finds the Entry's associated
// with hardlinks and fills them in with the actual data from the base file.
func resolveHardlinks(root *Entry, hardlinks map[string][]string, countlinks bool) {
originals := make(map[string]*Entry)
for base, links := range hardlinks {
var basefile *Entry
if seen, ok := originals[base]; !ok {
basefile = root.Find(base)
if basefile == nil {
log.Printf("%s does not exist in this tree\n", base)
continue
}
originals[base] = basefile
} else {
basefile = seen
}
for _, link := range links {
linkfile := root.Find(link)
if linkfile == nil {
log.Printf("%s does not exist in this tree\n", link)
continue
}
linkfile.Keywords = basefile.Keywords
if countlinks {
linkfile.Keywords = append(linkfile.Keywords, fmt.Sprintf("nlink=%d", len(links)+1))
}
}
if countlinks {
basefile.Keywords = append(basefile.Keywords, fmt.Sprintf("nlink=%d", len(links)+1))
}
}
}
// filter takes in a pointer to an Entry, and returns a slice of Entry's that
// satisfy the predicate p
func filter(root *Entry, p func(*Entry) bool) []Entry {
@ -415,6 +467,7 @@ func (ts *tarStream) Hierarchy() (*DirectoryHierarchy, error) {
if ts.root == nil {
return nil, fmt.Errorf("root Entry not found, nothing to flatten")
}
resolveHardlinks(ts.root, ts.hardlinks, inSlice("nlink", ts.keywords))
flatten(ts.root, &ts.creator, ts.keywords)
return ts.creator.DH, nil
}

View File

@ -320,6 +320,44 @@ func TestTreeTraversal(t *testing.T) {
}
}
func TestHardlinks(t *testing.T) {
fh, err := os.Open("./testdata/hardlinks.tar")
if err != nil {
t.Fatal(err)
}
str := NewTarStreamer(fh, append(DefaultTarKeywords, "nlink"))
if _, err = io.Copy(ioutil.Discard, str); err != nil && err != io.EOF {
t.Fatal(err)
}
if err = str.Close(); err != nil {
t.Fatal(err)
}
fh.Close()
tdh, err := str.Hierarchy()
if err != nil {
t.Fatal(err)
}
foundnlink := false
for _, e := range tdh.Entries {
if e.Type == RelativeType {
for _, kv := range e.Keywords {
if KeyVal(kv).Keyword() == "nlink" {
foundnlink = true
if KeyVal(kv).Value() != "3" {
t.Errorf("expected to have 3 hardlinks for %s", e.Name)
}
}
}
}
}
if !foundnlink {
t.Errorf("nlink expected to be evaluated")
}
}
// minimal tar archive stream that mimics what is in ./testdata/test.tar
func makeTarStream() ([]byte, error) {
buf := new(bytes.Buffer)

BIN
testdata/hardlinks.tar vendored Normal file

Binary file not shown.