tar: resolve hardlinks when streaming archive
Fill in the data of the Entry with the data of the file that a hardlink actually represents. Signed-off-by: Stephen Chung <schung@redhat.com>
This commit is contained in:
parent
ea6c6eff1b
commit
5837d00b07
5 changed files with 108 additions and 1 deletions
1
check.go
1
check.go
|
@ -67,7 +67,6 @@ func Check(root string, dh *DirectoryHierarchy, keywords []string) (*Result, err
|
|||
} else {
|
||||
kvs = NewKeyVals(e.Keywords)
|
||||
}
|
||||
|
||||
for _, kv := range kvs {
|
||||
kw := kv.Keyword()
|
||||
// 'tar_time' keyword evaluation wins against 'time' keyword evaluation
|
||||
|
|
17
entry.go
17
entry.go
|
@ -42,6 +42,23 @@ func (e Entry) Descend(filename string) *Entry {
|
|||
return nil
|
||||
}
|
||||
|
||||
// Find is a wrapper around Descend that takes in a whole string path and tries
|
||||
// to find that Entry
|
||||
func (e Entry) Find(filepath string) *Entry {
|
||||
resultnode := &e
|
||||
for _, path := range strings.Split(filepath, "/") {
|
||||
encoded, err := Vis(path)
|
||||
if err != nil {
|
||||
return nil
|
||||
}
|
||||
resultnode = resultnode.Descend(encoded)
|
||||
if resultnode == nil {
|
||||
return nil
|
||||
}
|
||||
}
|
||||
return resultnode
|
||||
}
|
||||
|
||||
// Ascend gets the parent of an Entry. Serves mainly to maintain readability
|
||||
// when traversing up and down an Entry tree
|
||||
func (e Entry) Ascend() *Entry {
|
||||
|
|
53
tar.go
53
tar.go
|
@ -5,6 +5,7 @@ import (
|
|||
"fmt"
|
||||
"io"
|
||||
"io/ioutil"
|
||||
"log"
|
||||
"os"
|
||||
"path/filepath"
|
||||
"strings"
|
||||
|
@ -29,6 +30,7 @@ func NewTarStreamer(r io.Reader, keywords []string) Streamer {
|
|||
teeReader: io.TeeReader(r, pW),
|
||||
tarReader: tar.NewReader(pR),
|
||||
keywords: keywords,
|
||||
hardlinks: map[string][]string{},
|
||||
}
|
||||
|
||||
go ts.readHeaders()
|
||||
|
@ -37,6 +39,7 @@ func NewTarStreamer(r io.Reader, keywords []string) Streamer {
|
|||
|
||||
type tarStream struct {
|
||||
root *Entry
|
||||
hardlinks map[string][]string
|
||||
creator dhCreator
|
||||
pipeReader *io.PipeReader
|
||||
pipeWriter *io.PipeWriter
|
||||
|
@ -118,6 +121,22 @@ func (ts *tarStream) readHeaders() {
|
|||
Type: RelativeType,
|
||||
}
|
||||
|
||||
// Keep track of which files are hardlinks so we can resolve them later
|
||||
if hdr.Typeflag == tar.TypeLink {
|
||||
linkFunc := KeywordFuncs["link"]
|
||||
kv, err := linkFunc(hdr.Name, hdr.FileInfo(), nil)
|
||||
if err != nil {
|
||||
log.Println(err)
|
||||
break
|
||||
}
|
||||
linkname := KeyVal(kv).Value()
|
||||
if _, ok := ts.hardlinks[linkname]; !ok {
|
||||
ts.hardlinks[linkname] = []string{hdr.Name}
|
||||
} else {
|
||||
ts.hardlinks[linkname] = append(ts.hardlinks[linkname], hdr.Name)
|
||||
}
|
||||
}
|
||||
|
||||
// now collect keywords on the file
|
||||
for _, keyword := range ts.keywords {
|
||||
if keyFunc, ok := KeywordFuncs[keyword]; ok {
|
||||
|
@ -332,6 +351,39 @@ func flatten(root *Entry, creator *dhCreator, keywords []string) {
|
|||
return
|
||||
}
|
||||
|
||||
// resolveHardlinks goes through an Entry tree, and finds the Entry's associated
|
||||
// with hardlinks and fills them in with the actual data from the base file.
|
||||
func resolveHardlinks(root *Entry, hardlinks map[string][]string, countlinks bool) {
|
||||
originals := make(map[string]*Entry)
|
||||
for base, links := range hardlinks {
|
||||
var basefile *Entry
|
||||
if seen, ok := originals[base]; !ok {
|
||||
basefile = root.Find(base)
|
||||
if basefile == nil {
|
||||
log.Printf("%s does not exist in this tree\n", base)
|
||||
continue
|
||||
}
|
||||
originals[base] = basefile
|
||||
} else {
|
||||
basefile = seen
|
||||
}
|
||||
for _, link := range links {
|
||||
linkfile := root.Find(link)
|
||||
if linkfile == nil {
|
||||
log.Printf("%s does not exist in this tree\n", link)
|
||||
continue
|
||||
}
|
||||
linkfile.Keywords = basefile.Keywords
|
||||
if countlinks {
|
||||
linkfile.Keywords = append(linkfile.Keywords, fmt.Sprintf("nlink=%d", len(links)+1))
|
||||
}
|
||||
}
|
||||
if countlinks {
|
||||
basefile.Keywords = append(basefile.Keywords, fmt.Sprintf("nlink=%d", len(links)+1))
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// filter takes in a pointer to an Entry, and returns a slice of Entry's that
|
||||
// satisfy the predicate p
|
||||
func filter(root *Entry, p func(*Entry) bool) []Entry {
|
||||
|
@ -415,6 +467,7 @@ func (ts *tarStream) Hierarchy() (*DirectoryHierarchy, error) {
|
|||
if ts.root == nil {
|
||||
return nil, fmt.Errorf("root Entry not found, nothing to flatten")
|
||||
}
|
||||
resolveHardlinks(ts.root, ts.hardlinks, inSlice("nlink", ts.keywords))
|
||||
flatten(ts.root, &ts.creator, ts.keywords)
|
||||
return ts.creator.DH, nil
|
||||
}
|
||||
|
|
38
tar_test.go
38
tar_test.go
|
@ -320,6 +320,44 @@ func TestTreeTraversal(t *testing.T) {
|
|||
}
|
||||
}
|
||||
|
||||
func TestHardlinks(t *testing.T) {
|
||||
fh, err := os.Open("./testdata/hardlinks.tar")
|
||||
if err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
str := NewTarStreamer(fh, append(DefaultTarKeywords, "nlink"))
|
||||
|
||||
if _, err = io.Copy(ioutil.Discard, str); err != nil && err != io.EOF {
|
||||
t.Fatal(err)
|
||||
}
|
||||
if err = str.Close(); err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
|
||||
fh.Close()
|
||||
tdh, err := str.Hierarchy()
|
||||
|
||||
if err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
foundnlink := false
|
||||
for _, e := range tdh.Entries {
|
||||
if e.Type == RelativeType {
|
||||
for _, kv := range e.Keywords {
|
||||
if KeyVal(kv).Keyword() == "nlink" {
|
||||
foundnlink = true
|
||||
if KeyVal(kv).Value() != "3" {
|
||||
t.Errorf("expected to have 3 hardlinks for %s", e.Name)
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
if !foundnlink {
|
||||
t.Errorf("nlink expected to be evaluated")
|
||||
}
|
||||
}
|
||||
|
||||
// minimal tar archive stream that mimics what is in ./testdata/test.tar
|
||||
func makeTarStream() ([]byte, error) {
|
||||
buf := new(bytes.Buffer)
|
||||
|
|
BIN
testdata/hardlinks.tar
vendored
Normal file
BIN
testdata/hardlinks.tar
vendored
Normal file
Binary file not shown.
Loading…
Reference in a new issue