mirror of
https://github.com/vbatts/go-mtree.git
synced 2024-11-21 16:05:40 +00:00
tar: populate Entry tree under a common root
Resolves #56. Now, the Entry tree will be populated under a common root (if necessary), so that a manifest can be accurately generate from a tar file that has been created using multiple directories. Signed-off-by: Stephen Chung <schung@redhat.com>
This commit is contained in:
parent
6a37331074
commit
3d6b74d6f7
22 changed files with 287 additions and 91 deletions
4
check.go
4
check.go
|
@ -42,7 +42,6 @@ func Check(root string, dh *DirectoryHierarchy, keywords []string) (*Result, err
|
|||
return nil, err
|
||||
}
|
||||
sort.Sort(byPos(creator.DH.Entries))
|
||||
|
||||
var result Result
|
||||
for i, e := range creator.DH.Entries {
|
||||
switch e.Type {
|
||||
|
@ -118,6 +117,9 @@ func TarCheck(tarDH, dh *DirectoryHierarchy, keywords []string) (*Result, error)
|
|||
break
|
||||
}
|
||||
}
|
||||
if tarRoot == nil {
|
||||
return nil, fmt.Errorf("root of archive could not be found")
|
||||
}
|
||||
tarRoot.Next = &Entry{
|
||||
Name: "seen",
|
||||
Type: CommentType,
|
||||
|
|
112
tar.go
112
tar.go
|
@ -49,18 +49,15 @@ type tarStream struct {
|
|||
func (ts *tarStream) readHeaders() {
|
||||
// We have to start with the directory we're in, and anything beyond these
|
||||
// items is determined at the time a tar is extracted.
|
||||
rootComment := Entry{
|
||||
Raw: "# .",
|
||||
Type: CommentType,
|
||||
}
|
||||
ts.root = &Entry{
|
||||
Name: ".",
|
||||
Type: RelativeType,
|
||||
Prev: &rootComment,
|
||||
Set: &Entry{
|
||||
Name: "meta-set",
|
||||
Type: SpecialType,
|
||||
Prev: &Entry{
|
||||
Raw: "# .",
|
||||
Type: CommentType,
|
||||
},
|
||||
Set: nil,
|
||||
Keywords: []string{"type=dir"},
|
||||
}
|
||||
metadataEntries := signatureEntries("<user specified tar archive>")
|
||||
for _, e := range metadataEntries {
|
||||
|
@ -163,12 +160,8 @@ func (ts *tarStream) readHeaders() {
|
|||
}
|
||||
}
|
||||
}
|
||||
if filepath.Dir(filepath.Clean(hdr.Name)) == "." {
|
||||
ts.root.Set = &s
|
||||
} else {
|
||||
e.Set = &s
|
||||
}
|
||||
}
|
||||
err = populateTree(ts.root, &e, hdr)
|
||||
if err != nil {
|
||||
ts.setErr(err)
|
||||
|
@ -178,15 +171,6 @@ func (ts *tarStream) readHeaders() {
|
|||
}
|
||||
}
|
||||
|
||||
type relationship int
|
||||
|
||||
const (
|
||||
unknownDir relationship = iota
|
||||
sameDir
|
||||
childDir
|
||||
parentDir
|
||||
)
|
||||
|
||||
// populateTree creates a pseudo file tree hierarchy using an Entry's Parent and
|
||||
// Children fields. When examining the Entry e to insert in the tree, we
|
||||
// determine if the path to that Entry exists yet. If it does, insert it in the
|
||||
|
@ -196,35 +180,50 @@ const (
|
|||
// e: the Entry we are looking to insert
|
||||
// hdr: the tar header struct associated with e
|
||||
func populateTree(root, e *Entry, hdr *tar.Header) error {
|
||||
if root == nil || e == nil {
|
||||
return fmt.Errorf("cannot populate or insert nil Entry's")
|
||||
} else if root.Prev == nil {
|
||||
return fmt.Errorf("root needs to be an Entry associated with a directory")
|
||||
}
|
||||
isDir := hdr.FileInfo().IsDir()
|
||||
wd := filepath.Clean(hdr.Name)
|
||||
if !isDir {
|
||||
// If entry is a file, we only want the directory it's in.
|
||||
// directory up until the actual file
|
||||
wd = filepath.Dir(wd)
|
||||
}
|
||||
if filepath.Dir(wd) == "." {
|
||||
if isDir {
|
||||
root.Keywords = e.Keywords
|
||||
} else {
|
||||
if wd == "." {
|
||||
// If file in root directory, no need to traverse down, just append
|
||||
root.Children = append([]*Entry{e}, root.Children...)
|
||||
e.Parent = root
|
||||
}
|
||||
return nil
|
||||
}
|
||||
}
|
||||
// TODO: what about directory/file names with "/" in it?
|
||||
dirNames := strings.Split(wd, "/")
|
||||
parent := root
|
||||
for _, name := range dirNames[1:] {
|
||||
for _, name := range dirNames[:] {
|
||||
encoded, err := Vis(name)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
if node := parent.Descend(encoded); node == nil {
|
||||
// Entry for directory doesn't exist in tree relative to root
|
||||
// Entry for directory doesn't exist in tree relative to root.
|
||||
// We don't know if this directory is an actual tar header (because a
|
||||
// user could have just specified a path to a deep file), so we must
|
||||
// specify this placeholder directory as a "type=dir", and Set=nil.
|
||||
newEntry := Entry{
|
||||
Name: encoded,
|
||||
Type: RelativeType,
|
||||
Parent: parent,
|
||||
Keywords: []string{"type=dir"}, // temp data
|
||||
Set: nil, // temp data
|
||||
}
|
||||
pathname, err := newEntry.Path()
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
newEntry.Prev = &Entry{
|
||||
Type: CommentType,
|
||||
Raw: "# " + pathname,
|
||||
}
|
||||
parent.Children = append(parent.Children, &newEntry)
|
||||
parent = &newEntry
|
||||
|
@ -237,21 +236,9 @@ func populateTree(root, e *Entry, hdr *tar.Header) error {
|
|||
parent.Children = append([]*Entry{e}, parent.Children...)
|
||||
e.Parent = parent
|
||||
} else {
|
||||
// the "placeholder" directory already exists in the Entry "parent",
|
||||
// so now we have to replace it's underlying data with that from e,
|
||||
// as well as set the Parent field. Note that we don't set parent = e
|
||||
// because parent is already in the pseudo tree, we just need to
|
||||
// complete it's data.
|
||||
e.Parent = parent.Parent
|
||||
*parent = *e
|
||||
commentpath, err := parent.Path()
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
parent.Prev = &Entry{
|
||||
Raw: "# " + commentpath,
|
||||
Type: CommentType,
|
||||
}
|
||||
// fill in the actual data from e
|
||||
parent.Keywords = e.Keywords
|
||||
parent.Set = e.Set
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
@ -262,7 +249,7 @@ func populateTree(root, e *Entry, hdr *tar.Header) error {
|
|||
// creator: a dhCreator that helps with the '/set' keyword
|
||||
// keywords: keywords specified by the user that should be evaluated
|
||||
func flatten(root *Entry, creator *dhCreator, keywords []string) {
|
||||
if root == nil {
|
||||
if root == nil || creator == nil {
|
||||
return
|
||||
}
|
||||
if root.Prev != nil {
|
||||
|
@ -275,6 +262,7 @@ func flatten(root *Entry, creator *dhCreator, keywords []string) {
|
|||
root.Prev.Pos = len(creator.DH.Entries)
|
||||
creator.DH.Entries = append(creator.DH.Entries, *root.Prev)
|
||||
|
||||
if root.Set != nil {
|
||||
// Check if we need a new set
|
||||
if creator.curSet == nil {
|
||||
creator.curSet = &Entry{
|
||||
|
@ -302,16 +290,26 @@ func flatten(root *Entry, creator *dhCreator, keywords []string) {
|
|||
creator.DH.Entries = append(creator.DH.Entries, *creator.curSet)
|
||||
}
|
||||
}
|
||||
} else if creator.curSet != nil {
|
||||
// Getting into here implies that the Entry's set has not and
|
||||
// was not supposed to be evaluated, thus, we need to reset curSet
|
||||
creator.DH.Entries = append(creator.DH.Entries, Entry{
|
||||
Name: "/unset",
|
||||
Type: SpecialType,
|
||||
Pos: len(creator.DH.Entries),
|
||||
})
|
||||
creator.curSet = nil
|
||||
}
|
||||
}
|
||||
root.Set = creator.curSet
|
||||
if creator.curSet != nil {
|
||||
root.Keywords = setDifference(root.Keywords, creator.curSet.Keywords)
|
||||
}
|
||||
root.Pos = len(creator.DH.Entries)
|
||||
creator.DH.Entries = append(creator.DH.Entries, *root)
|
||||
|
||||
for _, c := range root.Children {
|
||||
flatten(c, creator, keywords)
|
||||
}
|
||||
|
||||
if root.Prev != nil {
|
||||
// Show a comment when stepping out
|
||||
root.Prev.Pos = len(creator.DH.Entries)
|
||||
|
@ -329,16 +327,16 @@ func flatten(root *Entry, creator *dhCreator, keywords []string) {
|
|||
// filter takes in a pointer to an Entry, and returns a slice of Entry's that
|
||||
// satisfy the predicate p
|
||||
func filter(root *Entry, p func(*Entry) bool) []Entry {
|
||||
if root != nil {
|
||||
var validEntrys []Entry
|
||||
if len(root.Children) > 0 || root.Prev != nil {
|
||||
for _, c := range root.Children {
|
||||
// if an Entry is a directory, filter the directory
|
||||
// filter the sub-directory
|
||||
if c.Prev != nil {
|
||||
validEntrys = append(validEntrys, filter(c, p)...)
|
||||
}
|
||||
if p(c) {
|
||||
if c.Prev == nil {
|
||||
// prepend files
|
||||
validEntrys = append([]Entry{*c}, validEntrys...)
|
||||
} else {
|
||||
validEntrys = append(validEntrys, *c)
|
||||
|
@ -347,6 +345,7 @@ func filter(root *Entry, p func(*Entry) bool) []Entry {
|
|||
}
|
||||
return validEntrys
|
||||
}
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
|
@ -363,6 +362,15 @@ func setDifference(this, that []string) []string {
|
|||
return diff
|
||||
}
|
||||
|
||||
type relationship int
|
||||
|
||||
const (
|
||||
unknownDir relationship = iota
|
||||
sameDir
|
||||
childDir
|
||||
parentDir
|
||||
)
|
||||
|
||||
func compareDir(curDir, prevDir string) relationship {
|
||||
curDir = filepath.Clean(curDir)
|
||||
prevDir = filepath.Clean(prevDir)
|
||||
|
@ -390,12 +398,14 @@ func (ts *tarStream) Close() error {
|
|||
return ts.pipeReader.Close()
|
||||
}
|
||||
|
||||
// Hierarchy returns the DirectoryHierarchy of the archive. It flattens the
|
||||
// Entry tree before returning the DirectoryHierarchy
|
||||
func (ts *tarStream) Hierarchy() (*DirectoryHierarchy, error) {
|
||||
if ts.err != nil && ts.err != io.EOF {
|
||||
return nil, ts.err
|
||||
}
|
||||
if ts.root == nil {
|
||||
return nil, fmt.Errorf("root Entry not found. Nothing to flatten")
|
||||
return nil, fmt.Errorf("root Entry not found, nothing to flatten")
|
||||
}
|
||||
flatten(ts.root, &ts.creator, ts.keywords)
|
||||
return ts.creator.DH, nil
|
||||
|
|
178
tar_test.go
178
tar_test.go
|
@ -142,6 +142,184 @@ func TestTar(t *testing.T) {
|
|||
}
|
||||
}
|
||||
|
||||
// This test checks how gomtree handles archives that were created
|
||||
// with multiple directories, i.e, archives created with something like:
|
||||
// `tar -cvf some.tar dir1 dir2 dir3 dir4/dir5 dir6` ... etc.
|
||||
// The testdata of collection.tar resemble such an archive. the `collection` folder
|
||||
// is the contents of `collection.tar` extracted
|
||||
func TestArchiveCreation(t *testing.T) {
|
||||
fh, err := os.Open("./testdata/collection.tar")
|
||||
if err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
str := NewTarStreamer(fh, []string{"sha1"})
|
||||
|
||||
if _, err := io.Copy(ioutil.Discard, str); err != nil && err != io.EOF {
|
||||
t.Fatal(err)
|
||||
}
|
||||
if err := str.Close(); err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
defer fh.Close()
|
||||
|
||||
// get DirectoryHierarcy struct from walking the tar archive
|
||||
tdh, err := str.Hierarchy()
|
||||
if err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
|
||||
// Test the tar manifest against the actual directory
|
||||
res, err := Check("./testdata/collection", tdh, []string{"sha1"})
|
||||
if err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
|
||||
if res != nil {
|
||||
for _, f := range res.Failures {
|
||||
t.Errorf(f.String())
|
||||
}
|
||||
for _, e := range res.Extra {
|
||||
t.Errorf("%s extra not expected", e.Name)
|
||||
}
|
||||
for _, m := range res.Missing {
|
||||
t.Errorf("%s missing not expected", m.Name)
|
||||
}
|
||||
}
|
||||
|
||||
// Test the tar manifest against itself
|
||||
res, err = TarCheck(tdh, tdh, []string{"sha1"})
|
||||
if err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
if res != nil {
|
||||
for _, f := range res.Failures {
|
||||
t.Errorf(f.String())
|
||||
}
|
||||
for _, e := range res.Extra {
|
||||
t.Errorf("%s extra not expected", e.Name)
|
||||
}
|
||||
for _, m := range res.Missing {
|
||||
t.Errorf("%s missing not expected", m.Name)
|
||||
}
|
||||
}
|
||||
|
||||
// Validate the directory manifest against the archive
|
||||
dh, err := Walk("./testdata/collection", nil, []string{"sha1"})
|
||||
if err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
res, err = TarCheck(tdh, dh, []string{"sha1"})
|
||||
if err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
if res != nil {
|
||||
for _, f := range res.Failures {
|
||||
t.Errorf(f.String())
|
||||
}
|
||||
for _, e := range res.Extra {
|
||||
t.Errorf("%s extra not expected", e.Name)
|
||||
}
|
||||
for _, m := range res.Missing {
|
||||
t.Errorf("%s missing not expected", m.Name)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Now test a tar file that was created with just the path to a file. In this
|
||||
// test case, the traversal and creation of "placeholder" directories are
|
||||
// evaluated. Also, The fact that this archive contains a single entry, yet the
|
||||
// entry is associated with a file that has parent directories, means that the
|
||||
// "." directory should be the lowest sub-directory under which `file` is contained.
|
||||
func TestTreeTraversal(t *testing.T) {
|
||||
fh, err := os.Open("./testdata/traversal.tar")
|
||||
if err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
str := NewTarStreamer(fh, DefaultTarKeywords)
|
||||
|
||||
if _, err = io.Copy(ioutil.Discard, str); err != nil && err != io.EOF {
|
||||
t.Fatal(err)
|
||||
}
|
||||
if err = str.Close(); err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
|
||||
fh.Close()
|
||||
tdh, err := str.Hierarchy()
|
||||
|
||||
if err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
|
||||
res, err := TarCheck(tdh, tdh, []string{"sha1"})
|
||||
if err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
if res != nil {
|
||||
for _, f := range res.Failures {
|
||||
t.Errorf(f.String())
|
||||
}
|
||||
for _, e := range res.Extra {
|
||||
t.Errorf("%s extra not expected", e.Name)
|
||||
}
|
||||
for _, m := range res.Missing {
|
||||
t.Errorf("%s missing not expected", m.Name)
|
||||
}
|
||||
}
|
||||
|
||||
// top-level "." directory will contain contents of traversal.tar
|
||||
res, err = Check("./testdata/.", tdh, []string{"sha1"})
|
||||
if err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
if res != nil {
|
||||
for _, f := range res.Failures {
|
||||
t.Errorf(f.String())
|
||||
}
|
||||
for _, e := range res.Extra {
|
||||
t.Errorf("%s extra not expected", e.Name)
|
||||
}
|
||||
for _, m := range res.Missing {
|
||||
t.Errorf("%s missing not expected", m.Name)
|
||||
}
|
||||
}
|
||||
|
||||
// Now test an archive that requires placeholder directories, i.e, there are
|
||||
// no headers in the archive that are associated with the actual directory name
|
||||
fh, err = os.Open("./testdata/singlefile.tar")
|
||||
if err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
str = NewTarStreamer(fh, DefaultTarKeywords)
|
||||
if _, err = io.Copy(ioutil.Discard, str); err != nil && err != io.EOF {
|
||||
t.Fatal(err)
|
||||
}
|
||||
if err = str.Close(); err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
tdh, err = str.Hierarchy()
|
||||
if err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
|
||||
// Implied top-level "." directory will contain the contents of singlefile.tar
|
||||
res, err = Check("./testdata/.", tdh, []string{"sha1"})
|
||||
if err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
if res != nil {
|
||||
for _, f := range res.Failures {
|
||||
t.Errorf(f.String())
|
||||
}
|
||||
for _, e := range res.Extra {
|
||||
t.Errorf("%s extra not expected", e.Name)
|
||||
}
|
||||
for _, m := range res.Missing {
|
||||
t.Errorf("%s missing not expected", m.Name)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// minimal tar archive stream that mimics what is in ./testdata/test.tar
|
||||
func makeTarStream() ([]byte, error) {
|
||||
buf := new(bytes.Buffer)
|
||||
|
|
BIN
testdata/collection.tar
vendored
Normal file
BIN
testdata/collection.tar
vendored
Normal file
Binary file not shown.
0
testdata/collection/dir1/file1
vendored
Normal file
0
testdata/collection/dir1/file1
vendored
Normal file
0
testdata/collection/dir2/file2
vendored
Normal file
0
testdata/collection/dir2/file2
vendored
Normal file
0
testdata/collection/dir3/file3
vendored
Normal file
0
testdata/collection/dir3/file3
vendored
Normal file
0
testdata/collection/dir4/file4
vendored
Normal file
0
testdata/collection/dir4/file4
vendored
Normal file
0
testdata/collection/dir5/dir6/dir7/lonelyfile
vendored
Normal file
0
testdata/collection/dir5/dir6/dir7/lonelyfile
vendored
Normal file
1
testdata/collection/file1
vendored
Normal file
1
testdata/collection/file1
vendored
Normal file
|
@ -0,0 +1 @@
|
|||
im
|
1
testdata/collection/file2
vendored
Normal file
1
testdata/collection/file2
vendored
Normal file
|
@ -0,0 +1 @@
|
|||
hello
|
1
testdata/collection/file3
vendored
Normal file
1
testdata/collection/file3
vendored
Normal file
|
@ -0,0 +1 @@
|
|||
programming
|
BIN
testdata/singlefile.tar
vendored
Normal file
BIN
testdata/singlefile.tar
vendored
Normal file
Binary file not shown.
0
testdata/singlefile/dir2/dir3/file
vendored
Normal file
0
testdata/singlefile/dir2/dir3/file
vendored
Normal file
BIN
testdata/traversal.tar
vendored
Normal file
BIN
testdata/traversal.tar
vendored
Normal file
Binary file not shown.
0
testdata/traversal/dir2/dir3/actualdir1/file1
vendored
Normal file
0
testdata/traversal/dir2/dir3/actualdir1/file1
vendored
Normal file
0
testdata/traversal/dir2/dir3/actualdir1/file2
vendored
Normal file
0
testdata/traversal/dir2/dir3/actualdir1/file2
vendored
Normal file
1
testdata/traversal/dir2/dir3/actualdir2/a
vendored
Normal file
1
testdata/traversal/dir2/dir3/actualdir2/a
vendored
Normal file
|
@ -0,0 +1 @@
|
|||
hello
|
1
testdata/traversal/dir2/dir3/actualdir2/b
vendored
Normal file
1
testdata/traversal/dir2/dir3/actualdir2/b
vendored
Normal file
|
@ -0,0 +1 @@
|
|||
I'm
|
1
testdata/traversal/dir2/dir3/actualdir2/c
vendored
Normal file
1
testdata/traversal/dir2/dir3/actualdir2/c
vendored
Normal file
|
@ -0,0 +1 @@
|
|||
programming
|
0
testdata/traversal/dir2/dir3/file
vendored
Normal file
0
testdata/traversal/dir2/dir3/file
vendored
Normal file
0
testdata/traversal/dir2/dir3/file3
vendored
Normal file
0
testdata/traversal/dir2/dir3/file3
vendored
Normal file
Loading…
Reference in a new issue