tar: resolve hardlinks when streaming archive
Fill in the data of the Entry with the data of the file that a hardlink actually represents. Signed-off-by: Stephen Chung <schung@redhat.com>
This commit is contained in:
parent
ea6c6eff1b
commit
5837d00b07
5 changed files with 108 additions and 1 deletions
1
check.go
1
check.go
|
@ -67,7 +67,6 @@ func Check(root string, dh *DirectoryHierarchy, keywords []string) (*Result, err
|
||||||
} else {
|
} else {
|
||||||
kvs = NewKeyVals(e.Keywords)
|
kvs = NewKeyVals(e.Keywords)
|
||||||
}
|
}
|
||||||
|
|
||||||
for _, kv := range kvs {
|
for _, kv := range kvs {
|
||||||
kw := kv.Keyword()
|
kw := kv.Keyword()
|
||||||
// 'tar_time' keyword evaluation wins against 'time' keyword evaluation
|
// 'tar_time' keyword evaluation wins against 'time' keyword evaluation
|
||||||
|
|
17
entry.go
17
entry.go
|
@ -42,6 +42,23 @@ func (e Entry) Descend(filename string) *Entry {
|
||||||
return nil
|
return nil
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Find is a wrapper around Descend that takes in a whole string path and tries
|
||||||
|
// to find that Entry
|
||||||
|
func (e Entry) Find(filepath string) *Entry {
|
||||||
|
resultnode := &e
|
||||||
|
for _, path := range strings.Split(filepath, "/") {
|
||||||
|
encoded, err := Vis(path)
|
||||||
|
if err != nil {
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
resultnode = resultnode.Descend(encoded)
|
||||||
|
if resultnode == nil {
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return resultnode
|
||||||
|
}
|
||||||
|
|
||||||
// Ascend gets the parent of an Entry. Serves mainly to maintain readability
|
// Ascend gets the parent of an Entry. Serves mainly to maintain readability
|
||||||
// when traversing up and down an Entry tree
|
// when traversing up and down an Entry tree
|
||||||
func (e Entry) Ascend() *Entry {
|
func (e Entry) Ascend() *Entry {
|
||||||
|
|
53
tar.go
53
tar.go
|
@ -5,6 +5,7 @@ import (
|
||||||
"fmt"
|
"fmt"
|
||||||
"io"
|
"io"
|
||||||
"io/ioutil"
|
"io/ioutil"
|
||||||
|
"log"
|
||||||
"os"
|
"os"
|
||||||
"path/filepath"
|
"path/filepath"
|
||||||
"strings"
|
"strings"
|
||||||
|
@ -29,6 +30,7 @@ func NewTarStreamer(r io.Reader, keywords []string) Streamer {
|
||||||
teeReader: io.TeeReader(r, pW),
|
teeReader: io.TeeReader(r, pW),
|
||||||
tarReader: tar.NewReader(pR),
|
tarReader: tar.NewReader(pR),
|
||||||
keywords: keywords,
|
keywords: keywords,
|
||||||
|
hardlinks: map[string][]string{},
|
||||||
}
|
}
|
||||||
|
|
||||||
go ts.readHeaders()
|
go ts.readHeaders()
|
||||||
|
@ -37,6 +39,7 @@ func NewTarStreamer(r io.Reader, keywords []string) Streamer {
|
||||||
|
|
||||||
type tarStream struct {
|
type tarStream struct {
|
||||||
root *Entry
|
root *Entry
|
||||||
|
hardlinks map[string][]string
|
||||||
creator dhCreator
|
creator dhCreator
|
||||||
pipeReader *io.PipeReader
|
pipeReader *io.PipeReader
|
||||||
pipeWriter *io.PipeWriter
|
pipeWriter *io.PipeWriter
|
||||||
|
@ -118,6 +121,22 @@ func (ts *tarStream) readHeaders() {
|
||||||
Type: RelativeType,
|
Type: RelativeType,
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Keep track of which files are hardlinks so we can resolve them later
|
||||||
|
if hdr.Typeflag == tar.TypeLink {
|
||||||
|
linkFunc := KeywordFuncs["link"]
|
||||||
|
kv, err := linkFunc(hdr.Name, hdr.FileInfo(), nil)
|
||||||
|
if err != nil {
|
||||||
|
log.Println(err)
|
||||||
|
break
|
||||||
|
}
|
||||||
|
linkname := KeyVal(kv).Value()
|
||||||
|
if _, ok := ts.hardlinks[linkname]; !ok {
|
||||||
|
ts.hardlinks[linkname] = []string{hdr.Name}
|
||||||
|
} else {
|
||||||
|
ts.hardlinks[linkname] = append(ts.hardlinks[linkname], hdr.Name)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
// now collect keywords on the file
|
// now collect keywords on the file
|
||||||
for _, keyword := range ts.keywords {
|
for _, keyword := range ts.keywords {
|
||||||
if keyFunc, ok := KeywordFuncs[keyword]; ok {
|
if keyFunc, ok := KeywordFuncs[keyword]; ok {
|
||||||
|
@ -332,6 +351,39 @@ func flatten(root *Entry, creator *dhCreator, keywords []string) {
|
||||||
return
|
return
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// resolveHardlinks goes through an Entry tree, and finds the Entry's associated
|
||||||
|
// with hardlinks and fills them in with the actual data from the base file.
|
||||||
|
func resolveHardlinks(root *Entry, hardlinks map[string][]string, countlinks bool) {
|
||||||
|
originals := make(map[string]*Entry)
|
||||||
|
for base, links := range hardlinks {
|
||||||
|
var basefile *Entry
|
||||||
|
if seen, ok := originals[base]; !ok {
|
||||||
|
basefile = root.Find(base)
|
||||||
|
if basefile == nil {
|
||||||
|
log.Printf("%s does not exist in this tree\n", base)
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
originals[base] = basefile
|
||||||
|
} else {
|
||||||
|
basefile = seen
|
||||||
|
}
|
||||||
|
for _, link := range links {
|
||||||
|
linkfile := root.Find(link)
|
||||||
|
if linkfile == nil {
|
||||||
|
log.Printf("%s does not exist in this tree\n", link)
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
linkfile.Keywords = basefile.Keywords
|
||||||
|
if countlinks {
|
||||||
|
linkfile.Keywords = append(linkfile.Keywords, fmt.Sprintf("nlink=%d", len(links)+1))
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if countlinks {
|
||||||
|
basefile.Keywords = append(basefile.Keywords, fmt.Sprintf("nlink=%d", len(links)+1))
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
// filter takes in a pointer to an Entry, and returns a slice of Entry's that
|
// filter takes in a pointer to an Entry, and returns a slice of Entry's that
|
||||||
// satisfy the predicate p
|
// satisfy the predicate p
|
||||||
func filter(root *Entry, p func(*Entry) bool) []Entry {
|
func filter(root *Entry, p func(*Entry) bool) []Entry {
|
||||||
|
@ -415,6 +467,7 @@ func (ts *tarStream) Hierarchy() (*DirectoryHierarchy, error) {
|
||||||
if ts.root == nil {
|
if ts.root == nil {
|
||||||
return nil, fmt.Errorf("root Entry not found, nothing to flatten")
|
return nil, fmt.Errorf("root Entry not found, nothing to flatten")
|
||||||
}
|
}
|
||||||
|
resolveHardlinks(ts.root, ts.hardlinks, inSlice("nlink", ts.keywords))
|
||||||
flatten(ts.root, &ts.creator, ts.keywords)
|
flatten(ts.root, &ts.creator, ts.keywords)
|
||||||
return ts.creator.DH, nil
|
return ts.creator.DH, nil
|
||||||
}
|
}
|
||||||
|
|
38
tar_test.go
38
tar_test.go
|
@ -320,6 +320,44 @@ func TestTreeTraversal(t *testing.T) {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func TestHardlinks(t *testing.T) {
|
||||||
|
fh, err := os.Open("./testdata/hardlinks.tar")
|
||||||
|
if err != nil {
|
||||||
|
t.Fatal(err)
|
||||||
|
}
|
||||||
|
str := NewTarStreamer(fh, append(DefaultTarKeywords, "nlink"))
|
||||||
|
|
||||||
|
if _, err = io.Copy(ioutil.Discard, str); err != nil && err != io.EOF {
|
||||||
|
t.Fatal(err)
|
||||||
|
}
|
||||||
|
if err = str.Close(); err != nil {
|
||||||
|
t.Fatal(err)
|
||||||
|
}
|
||||||
|
|
||||||
|
fh.Close()
|
||||||
|
tdh, err := str.Hierarchy()
|
||||||
|
|
||||||
|
if err != nil {
|
||||||
|
t.Fatal(err)
|
||||||
|
}
|
||||||
|
foundnlink := false
|
||||||
|
for _, e := range tdh.Entries {
|
||||||
|
if e.Type == RelativeType {
|
||||||
|
for _, kv := range e.Keywords {
|
||||||
|
if KeyVal(kv).Keyword() == "nlink" {
|
||||||
|
foundnlink = true
|
||||||
|
if KeyVal(kv).Value() != "3" {
|
||||||
|
t.Errorf("expected to have 3 hardlinks for %s", e.Name)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if !foundnlink {
|
||||||
|
t.Errorf("nlink expected to be evaluated")
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
// minimal tar archive stream that mimics what is in ./testdata/test.tar
|
// minimal tar archive stream that mimics what is in ./testdata/test.tar
|
||||||
func makeTarStream() ([]byte, error) {
|
func makeTarStream() ([]byte, error) {
|
||||||
buf := new(bytes.Buffer)
|
buf := new(bytes.Buffer)
|
||||||
|
|
BIN
testdata/hardlinks.tar
vendored
Normal file
BIN
testdata/hardlinks.tar
vendored
Normal file
Binary file not shown.
Loading…
Reference in a new issue