From 822f319224573a1e1670f727ef71c7b8999c4530 Mon Sep 17 00:00:00 2001 From: Stephen Chung Date: Sun, 31 Jul 2016 21:49:32 -0400 Subject: [PATCH 1/4] tar: minor refactoring of populateTree and flatten Cleaned up some dead code, and made populateTree not take in a *tar.Streamer argument, as the ts argument was only used to set an error. The function now returns an error (if any). Also made flatten not have to take in a *tar.Streamer argument as well. Signed-off-by: Stephen Chung --- tar.go | 131 ++++++++++++++++++++++++++++----------------------------- 1 file changed, 65 insertions(+), 66 deletions(-) diff --git a/tar.go b/tar.go index 9ea759d..4af3121 100644 --- a/tar.go +++ b/tar.go @@ -67,11 +67,10 @@ func (ts *tarStream) readHeaders() { for { hdr, err := ts.tarReader.Next() if err != nil { - flatten(&root, ts) + flatten(&root, &ts.creator, ts.keywords) ts.pipeReader.CloseWithError(err) return } - // Because the content of the file may need to be read by several // KeywordFuncs, it needs to be an io.Seeker as well. So, just reading from // ts.tarReader is not enough. @@ -117,10 +116,7 @@ func (ts *tarStream) readHeaders() { if hdr.FileInfo().IsDir() && keyword == "size" { continue } - - if string(hdr.Typeflag) == string('1') { - // TODO: get number of hardlinks for a file - } + // TODO: handle hardlinks val, err := keyFunc(hdr.Name, hdr.FileInfo(), tmpFile) if err != nil { ts.setErr(err) @@ -171,7 +167,10 @@ func (ts *tarStream) readHeaders() { e.Set = &s } } - populateTree(&root, &e, hdr, ts) + err = populateTree(&root, &e, hdr) + if err != nil { + ts.setErr(err) + } tmpFile.Close() os.Remove(tmpFile.Name()) } @@ -186,16 +185,17 @@ const ( parentDir ) -// populateTree creates a file tree hierarchy using an Entry's Parent and +// populateTree creates a pseudo file tree hierarchy using an Entry's Parent and // Children fields. When examining the Entry e to insert in the tree, we // determine if the path to that Entry exists yet. If it does, insert it in the -// appropriate position in the tree. If not, create a path with "placeholder" -// directories, and then insert the Entry. populateTree does not consider -// symbolic links yet. -func populateTree(root, e *Entry, hdr *tar.Header, ts *tarStream) { +// appropriate position in the tree. If not, create a path up until the Entry's +// directory that it is contained in. Then, insert the Entry. +// root: the "." Entry +// e: the Entry we are looking to insert +// hdr: the tar header struct associated with e +func populateTree(root, e *Entry, hdr *tar.Header) error { isDir := hdr.FileInfo().IsDir() wd := filepath.Clean(hdr.Name) - if !isDir { // If entry is a file, we only want the directory it's in. wd = filepath.Dir(wd) @@ -207,31 +207,24 @@ func populateTree(root, e *Entry, hdr *tar.Header, ts *tarStream) { root.Children = append([]*Entry{e}, root.Children...) e.Parent = root } - return + return nil } - dirNames := strings.Split(wd, "/") parent := root for _, name := range dirNames[1:] { - if node := parent.Descend(name); node == nil { + encoded, err := Vis(name) + if err != nil { + return err + } + if node := parent.Descend(encoded); node == nil { // Entry for directory doesn't exist in tree relative to root - var newEntry *Entry - if isDir { - newEntry = e - } else { - encodedName, err := Vis(name) - if err != nil { - ts.setErr(err) - return - } - newEntry = &Entry{ - Name: encodedName, - Type: RelativeType, - } + newEntry := Entry{ + Name: encoded, + Type: RelativeType, + Parent: parent, } - newEntry.Parent = parent - parent.Children = append(parent.Children, newEntry) - parent = newEntry + parent.Children = append(parent.Children, &newEntry) + parent = &newEntry } else { // Entry for directory exists in tree, just keep going parent = node @@ -241,82 +234,88 @@ func populateTree(root, e *Entry, hdr *tar.Header, ts *tarStream) { parent.Children = append([]*Entry{e}, parent.Children...) e.Parent = parent } else { - commentpath, err := e.Path() + // the "placeholder" directory already exists in the Entry "parent", + // so now we have to replace it's underlying data with that from e, + // as well as set the Parent field. Note that we don't set parent = e + // because parent is already in the pseudo tree, we just need to + // complete it's data. + e.Parent = parent.Parent + *parent = *e + commentpath, err := parent.Path() if err != nil { - ts.setErr(err) - return + return err } - commentEntry := Entry{ + parent.Prev = &Entry{ Raw: "# " + commentpath, Type: CommentType, } - e.Prev = &commentEntry } + return nil } -// After constructing the tree from the tar stream, we want to "flatten" this -// tree by appending Entry's into ts.creator.DH.Entries in an appropriate -// manner to simplify writing the output with ts.creator.DH.WriteTo +// After constructing a pseudo file hierarchy tree, we want to "flatten" this +// tree by putting the Entries into a slice with appropriate positioning. // root: the "head" of the sub-tree to flatten -// ts : tarStream to keep track of Entry's -func flatten(root *Entry, ts *tarStream) { +// creator: a dhCreator that helps with the '/set' keyword +// keywords: keywords specified by the user that should be evaluated +func flatten(root *Entry, creator *dhCreator, keywords []string) { if root.Prev != nil { // root.Prev != nil implies root is a directory - ts.creator.DH.Entries = append(ts.creator.DH.Entries, + creator.DH.Entries = append(creator.DH.Entries, Entry{ Type: BlankType, - Pos: len(ts.creator.DH.Entries), + Pos: len(creator.DH.Entries), }) - root.Prev.Pos = len(ts.creator.DH.Entries) - ts.creator.DH.Entries = append(ts.creator.DH.Entries, *root.Prev) + root.Prev.Pos = len(creator.DH.Entries) + creator.DH.Entries = append(creator.DH.Entries, *root.Prev) // Check if we need a new set - if ts.creator.curSet == nil { - ts.creator.curSet = &Entry{ + if creator.curSet == nil { + creator.curSet = &Entry{ Type: SpecialType, Name: "/set", - Keywords: keywordSelector(append(tarDefaultSetKeywords, root.Set.Keywords...), ts.keywords), - Pos: len(ts.creator.DH.Entries), + Keywords: keywordSelector(append(tarDefaultSetKeywords, root.Set.Keywords...), keywords), + Pos: len(creator.DH.Entries), } - ts.creator.DH.Entries = append(ts.creator.DH.Entries, *ts.creator.curSet) + creator.DH.Entries = append(creator.DH.Entries, *creator.curSet) } else { needNewSet := false for _, k := range root.Set.Keywords { - if !inSlice(k, ts.creator.curSet.Keywords) { + if !inSlice(k, creator.curSet.Keywords) { needNewSet = true break } } if needNewSet { - ts.creator.curSet = &Entry{ + creator.curSet = &Entry{ Name: "/set", Type: SpecialType, - Pos: len(ts.creator.DH.Entries), - Keywords: keywordSelector(append(tarDefaultSetKeywords, root.Set.Keywords...), ts.keywords), + Pos: len(creator.DH.Entries), + Keywords: keywordSelector(append(tarDefaultSetKeywords, root.Set.Keywords...), keywords), } - ts.creator.DH.Entries = append(ts.creator.DH.Entries, *ts.creator.curSet) + creator.DH.Entries = append(creator.DH.Entries, *creator.curSet) } } } - root.Set = ts.creator.curSet - root.Keywords = setDifference(root.Keywords, ts.creator.curSet.Keywords) - root.Pos = len(ts.creator.DH.Entries) - ts.creator.DH.Entries = append(ts.creator.DH.Entries, *root) + root.Set = creator.curSet + root.Keywords = setDifference(root.Keywords, creator.curSet.Keywords) + root.Pos = len(creator.DH.Entries) + creator.DH.Entries = append(creator.DH.Entries, *root) for _, c := range root.Children { - flatten(c, ts) + flatten(c, creator, keywords) } if root.Prev != nil { // Show a comment when stepping out - root.Prev.Pos = len(ts.creator.DH.Entries) - ts.creator.DH.Entries = append(ts.creator.DH.Entries, *root.Prev) + root.Prev.Pos = len(creator.DH.Entries) + creator.DH.Entries = append(creator.DH.Entries, *root.Prev) dotEntry := Entry{ Type: DotDotType, Name: "..", - Pos: len(ts.creator.DH.Entries), + Pos: len(creator.DH.Entries), } - ts.creator.DH.Entries = append(ts.creator.DH.Entries, dotEntry) + creator.DH.Entries = append(creator.DH.Entries, dotEntry) } } @@ -332,7 +331,7 @@ func filter(root *Entry, p func(*Entry) bool) []Entry { } if p(c) { if c.Prev == nil { - // prepend directories + // prepend files validEntrys = append([]Entry{*c}, validEntrys...) } else { validEntrys = append(validEntrys, *c) From 6a3733107455be330f571392ddc8ba927b4d9732 Mon Sep 17 00:00:00 2001 From: Stephen Chung Date: Tue, 2 Aug 2016 11:42:02 -0400 Subject: [PATCH 2/4] tar: flatten the pseudo-tree only after readHeaders() is done Flattening within the readHeaders() function call was problematic because readHeaders() is called as a goroutine; thus, as the main program was calling `tdh.writeTo(os.Stdout)`, readHeaders() was still in the process of flattening the tree structure. To get around this, we now call flatten in ts.Hierarchy(), such that only when the main program is ready to retrieve a "valid" DirectoryHierarchy from the archive, should we flatten the tree. Signed-off-by: Stephen Chung --- tar.go | 21 ++++++++++++++++----- 1 file changed, 16 insertions(+), 5 deletions(-) diff --git a/tar.go b/tar.go index 4af3121..8cc81d6 100644 --- a/tar.go +++ b/tar.go @@ -2,6 +2,7 @@ package mtree import ( "archive/tar" + "fmt" "io" "io/ioutil" "os" @@ -29,11 +30,13 @@ func NewTarStreamer(r io.Reader, keywords []string) Streamer { tarReader: tar.NewReader(pR), keywords: keywords, } - go ts.readHeaders() // I don't like this + + go ts.readHeaders() return ts } type tarStream struct { + root *Entry creator dhCreator pipeReader *io.PipeReader pipeWriter *io.PipeWriter @@ -50,7 +53,7 @@ func (ts *tarStream) readHeaders() { Raw: "# .", Type: CommentType, } - root := Entry{ + ts.root = &Entry{ Name: ".", Type: RelativeType, Prev: &rootComment, @@ -67,7 +70,6 @@ func (ts *tarStream) readHeaders() { for { hdr, err := ts.tarReader.Next() if err != nil { - flatten(&root, &ts.creator, ts.keywords) ts.pipeReader.CloseWithError(err) return } @@ -162,12 +164,12 @@ func (ts *tarStream) readHeaders() { } } if filepath.Dir(filepath.Clean(hdr.Name)) == "." { - root.Set = &s + ts.root.Set = &s } else { e.Set = &s } } - err = populateTree(&root, &e, hdr) + err = populateTree(ts.root, &e, hdr) if err != nil { ts.setErr(err) } @@ -209,6 +211,7 @@ func populateTree(root, e *Entry, hdr *tar.Header) error { } return nil } + // TODO: what about directory/file names with "/" in it? dirNames := strings.Split(wd, "/") parent := root for _, name := range dirNames[1:] { @@ -259,6 +262,9 @@ func populateTree(root, e *Entry, hdr *tar.Header) error { // creator: a dhCreator that helps with the '/set' keyword // keywords: keywords specified by the user that should be evaluated func flatten(root *Entry, creator *dhCreator, keywords []string) { + if root == nil { + return + } if root.Prev != nil { // root.Prev != nil implies root is a directory creator.DH.Entries = append(creator.DH.Entries, @@ -317,6 +323,7 @@ func flatten(root *Entry, creator *dhCreator, keywords []string) { } creator.DH.Entries = append(creator.DH.Entries, dotEntry) } + return } // filter takes in a pointer to an Entry, and returns a slice of Entry's that @@ -387,5 +394,9 @@ func (ts *tarStream) Hierarchy() (*DirectoryHierarchy, error) { if ts.err != nil && ts.err != io.EOF { return nil, ts.err } + if ts.root == nil { + return nil, fmt.Errorf("root Entry not found. Nothing to flatten") + } + flatten(ts.root, &ts.creator, ts.keywords) return ts.creator.DH, nil } From 3d6b74d6f77cd2f75ad4e26708dd28f6e479bdae Mon Sep 17 00:00:00 2001 From: Stephen Chung Date: Tue, 2 Aug 2016 15:30:45 -0400 Subject: [PATCH 3/4] tar: populate Entry tree under a common root Resolves #56. Now, the Entry tree will be populated under a common root (if necessary), so that a manifest can be accurately generate from a tar file that has been created using multiple directories. Signed-off-by: Stephen Chung --- check.go | 4 +- tar.go | 190 +++++++++--------- tar_test.go | 178 ++++++++++++++++ testdata/collection.tar | Bin 0 -> 10240 bytes testdata/collection/dir1/file1 | 0 testdata/collection/dir2/file2 | 0 testdata/collection/dir3/file3 | 0 testdata/collection/dir4/file4 | 0 testdata/collection/dir5/dir6/dir7/lonelyfile | 0 testdata/collection/file1 | 1 + testdata/collection/file2 | 1 + testdata/collection/file3 | 1 + testdata/singlefile.tar | Bin 0 -> 10240 bytes testdata/singlefile/dir2/dir3/file | 0 testdata/traversal.tar | Bin 0 -> 10240 bytes testdata/traversal/dir2/dir3/actualdir1/file1 | 0 testdata/traversal/dir2/dir3/actualdir1/file2 | 0 testdata/traversal/dir2/dir3/actualdir2/a | 1 + testdata/traversal/dir2/dir3/actualdir2/b | 1 + testdata/traversal/dir2/dir3/actualdir2/c | 1 + testdata/traversal/dir2/dir3/file | 0 testdata/traversal/dir2/dir3/file3 | 0 22 files changed, 287 insertions(+), 91 deletions(-) create mode 100644 testdata/collection.tar create mode 100644 testdata/collection/dir1/file1 create mode 100644 testdata/collection/dir2/file2 create mode 100644 testdata/collection/dir3/file3 create mode 100644 testdata/collection/dir4/file4 create mode 100644 testdata/collection/dir5/dir6/dir7/lonelyfile create mode 100644 testdata/collection/file1 create mode 100644 testdata/collection/file2 create mode 100644 testdata/collection/file3 create mode 100644 testdata/singlefile.tar create mode 100644 testdata/singlefile/dir2/dir3/file create mode 100644 testdata/traversal.tar create mode 100644 testdata/traversal/dir2/dir3/actualdir1/file1 create mode 100644 testdata/traversal/dir2/dir3/actualdir1/file2 create mode 100644 testdata/traversal/dir2/dir3/actualdir2/a create mode 100644 testdata/traversal/dir2/dir3/actualdir2/b create mode 100644 testdata/traversal/dir2/dir3/actualdir2/c create mode 100644 testdata/traversal/dir2/dir3/file create mode 100644 testdata/traversal/dir2/dir3/file3 diff --git a/check.go b/check.go index 5a40c4b..c7752b0 100644 --- a/check.go +++ b/check.go @@ -42,7 +42,6 @@ func Check(root string, dh *DirectoryHierarchy, keywords []string) (*Result, err return nil, err } sort.Sort(byPos(creator.DH.Entries)) - var result Result for i, e := range creator.DH.Entries { switch e.Type { @@ -118,6 +117,9 @@ func TarCheck(tarDH, dh *DirectoryHierarchy, keywords []string) (*Result, error) break } } + if tarRoot == nil { + return nil, fmt.Errorf("root of archive could not be found") + } tarRoot.Next = &Entry{ Name: "seen", Type: CommentType, diff --git a/tar.go b/tar.go index 8cc81d6..1fbf6b7 100644 --- a/tar.go +++ b/tar.go @@ -49,18 +49,15 @@ type tarStream struct { func (ts *tarStream) readHeaders() { // We have to start with the directory we're in, and anything beyond these // items is determined at the time a tar is extracted. - rootComment := Entry{ - Raw: "# .", - Type: CommentType, - } ts.root = &Entry{ Name: ".", Type: RelativeType, - Prev: &rootComment, - Set: &Entry{ - Name: "meta-set", - Type: SpecialType, + Prev: &Entry{ + Raw: "# .", + Type: CommentType, }, + Set: nil, + Keywords: []string{"type=dir"}, } metadataEntries := signatureEntries("") for _, e := range metadataEntries { @@ -163,11 +160,7 @@ func (ts *tarStream) readHeaders() { } } } - if filepath.Dir(filepath.Clean(hdr.Name)) == "." { - ts.root.Set = &s - } else { - e.Set = &s - } + e.Set = &s } err = populateTree(ts.root, &e, hdr) if err != nil { @@ -178,15 +171,6 @@ func (ts *tarStream) readHeaders() { } } -type relationship int - -const ( - unknownDir relationship = iota - sameDir - childDir - parentDir -) - // populateTree creates a pseudo file tree hierarchy using an Entry's Parent and // Children fields. When examining the Entry e to insert in the tree, we // determine if the path to that Entry exists yet. If it does, insert it in the @@ -196,35 +180,50 @@ const ( // e: the Entry we are looking to insert // hdr: the tar header struct associated with e func populateTree(root, e *Entry, hdr *tar.Header) error { + if root == nil || e == nil { + return fmt.Errorf("cannot populate or insert nil Entry's") + } else if root.Prev == nil { + return fmt.Errorf("root needs to be an Entry associated with a directory") + } isDir := hdr.FileInfo().IsDir() wd := filepath.Clean(hdr.Name) if !isDir { - // If entry is a file, we only want the directory it's in. + // directory up until the actual file wd = filepath.Dir(wd) - } - if filepath.Dir(wd) == "." { - if isDir { - root.Keywords = e.Keywords - } else { + if wd == "." { + // If file in root directory, no need to traverse down, just append root.Children = append([]*Entry{e}, root.Children...) e.Parent = root + return nil } - return nil } // TODO: what about directory/file names with "/" in it? dirNames := strings.Split(wd, "/") parent := root - for _, name := range dirNames[1:] { + for _, name := range dirNames[:] { encoded, err := Vis(name) if err != nil { return err } if node := parent.Descend(encoded); node == nil { - // Entry for directory doesn't exist in tree relative to root + // Entry for directory doesn't exist in tree relative to root. + // We don't know if this directory is an actual tar header (because a + // user could have just specified a path to a deep file), so we must + // specify this placeholder directory as a "type=dir", and Set=nil. newEntry := Entry{ - Name: encoded, - Type: RelativeType, - Parent: parent, + Name: encoded, + Type: RelativeType, + Parent: parent, + Keywords: []string{"type=dir"}, // temp data + Set: nil, // temp data + } + pathname, err := newEntry.Path() + if err != nil { + return err + } + newEntry.Prev = &Entry{ + Type: CommentType, + Raw: "# " + pathname, } parent.Children = append(parent.Children, &newEntry) parent = &newEntry @@ -237,32 +236,20 @@ func populateTree(root, e *Entry, hdr *tar.Header) error { parent.Children = append([]*Entry{e}, parent.Children...) e.Parent = parent } else { - // the "placeholder" directory already exists in the Entry "parent", - // so now we have to replace it's underlying data with that from e, - // as well as set the Parent field. Note that we don't set parent = e - // because parent is already in the pseudo tree, we just need to - // complete it's data. - e.Parent = parent.Parent - *parent = *e - commentpath, err := parent.Path() - if err != nil { - return err - } - parent.Prev = &Entry{ - Raw: "# " + commentpath, - Type: CommentType, - } + // fill in the actual data from e + parent.Keywords = e.Keywords + parent.Set = e.Set } return nil } // After constructing a pseudo file hierarchy tree, we want to "flatten" this // tree by putting the Entries into a slice with appropriate positioning. -// root: the "head" of the sub-tree to flatten -// creator: a dhCreator that helps with the '/set' keyword +// root: the "head" of the sub-tree to flatten +// creator: a dhCreator that helps with the '/set' keyword // keywords: keywords specified by the user that should be evaluated func flatten(root *Entry, creator *dhCreator, keywords []string) { - if root == nil { + if root == nil || creator == nil { return } if root.Prev != nil { @@ -275,43 +262,54 @@ func flatten(root *Entry, creator *dhCreator, keywords []string) { root.Prev.Pos = len(creator.DH.Entries) creator.DH.Entries = append(creator.DH.Entries, *root.Prev) - // Check if we need a new set - if creator.curSet == nil { - creator.curSet = &Entry{ - Type: SpecialType, - Name: "/set", - Keywords: keywordSelector(append(tarDefaultSetKeywords, root.Set.Keywords...), keywords), - Pos: len(creator.DH.Entries), - } - creator.DH.Entries = append(creator.DH.Entries, *creator.curSet) - } else { - needNewSet := false - for _, k := range root.Set.Keywords { - if !inSlice(k, creator.curSet.Keywords) { - needNewSet = true - break - } - } - if needNewSet { + if root.Set != nil { + // Check if we need a new set + if creator.curSet == nil { creator.curSet = &Entry{ - Name: "/set", Type: SpecialType, - Pos: len(creator.DH.Entries), + Name: "/set", Keywords: keywordSelector(append(tarDefaultSetKeywords, root.Set.Keywords...), keywords), + Pos: len(creator.DH.Entries), } creator.DH.Entries = append(creator.DH.Entries, *creator.curSet) + } else { + needNewSet := false + for _, k := range root.Set.Keywords { + if !inSlice(k, creator.curSet.Keywords) { + needNewSet = true + break + } + } + if needNewSet { + creator.curSet = &Entry{ + Name: "/set", + Type: SpecialType, + Pos: len(creator.DH.Entries), + Keywords: keywordSelector(append(tarDefaultSetKeywords, root.Set.Keywords...), keywords), + } + creator.DH.Entries = append(creator.DH.Entries, *creator.curSet) + } } + } else if creator.curSet != nil { + // Getting into here implies that the Entry's set has not and + // was not supposed to be evaluated, thus, we need to reset curSet + creator.DH.Entries = append(creator.DH.Entries, Entry{ + Name: "/unset", + Type: SpecialType, + Pos: len(creator.DH.Entries), + }) + creator.curSet = nil } } root.Set = creator.curSet - root.Keywords = setDifference(root.Keywords, creator.curSet.Keywords) + if creator.curSet != nil { + root.Keywords = setDifference(root.Keywords, creator.curSet.Keywords) + } root.Pos = len(creator.DH.Entries) creator.DH.Entries = append(creator.DH.Entries, *root) - for _, c := range root.Children { flatten(c, creator, keywords) } - if root.Prev != nil { // Show a comment when stepping out root.Prev.Pos = len(creator.DH.Entries) @@ -329,23 +327,24 @@ func flatten(root *Entry, creator *dhCreator, keywords []string) { // filter takes in a pointer to an Entry, and returns a slice of Entry's that // satisfy the predicate p func filter(root *Entry, p func(*Entry) bool) []Entry { - var validEntrys []Entry - if len(root.Children) > 0 || root.Prev != nil { - for _, c := range root.Children { - // if an Entry is a directory, filter the directory - if c.Prev != nil { - validEntrys = append(validEntrys, filter(c, p)...) - } - if p(c) { - if c.Prev == nil { - // prepend files - validEntrys = append([]Entry{*c}, validEntrys...) - } else { - validEntrys = append(validEntrys, *c) + if root != nil { + var validEntrys []Entry + if len(root.Children) > 0 || root.Prev != nil { + for _, c := range root.Children { + // filter the sub-directory + if c.Prev != nil { + validEntrys = append(validEntrys, filter(c, p)...) + } + if p(c) { + if c.Prev == nil { + validEntrys = append([]Entry{*c}, validEntrys...) + } else { + validEntrys = append(validEntrys, *c) + } } } + return validEntrys } - return validEntrys } return nil } @@ -363,6 +362,15 @@ func setDifference(this, that []string) []string { return diff } +type relationship int + +const ( + unknownDir relationship = iota + sameDir + childDir + parentDir +) + func compareDir(curDir, prevDir string) relationship { curDir = filepath.Clean(curDir) prevDir = filepath.Clean(prevDir) @@ -390,12 +398,14 @@ func (ts *tarStream) Close() error { return ts.pipeReader.Close() } +// Hierarchy returns the DirectoryHierarchy of the archive. It flattens the +// Entry tree before returning the DirectoryHierarchy func (ts *tarStream) Hierarchy() (*DirectoryHierarchy, error) { if ts.err != nil && ts.err != io.EOF { return nil, ts.err } if ts.root == nil { - return nil, fmt.Errorf("root Entry not found. Nothing to flatten") + return nil, fmt.Errorf("root Entry not found, nothing to flatten") } flatten(ts.root, &ts.creator, ts.keywords) return ts.creator.DH, nil diff --git a/tar_test.go b/tar_test.go index 9f5ce87..b9e5171 100644 --- a/tar_test.go +++ b/tar_test.go @@ -142,6 +142,184 @@ func TestTar(t *testing.T) { } } +// This test checks how gomtree handles archives that were created +// with multiple directories, i.e, archives created with something like: +// `tar -cvf some.tar dir1 dir2 dir3 dir4/dir5 dir6` ... etc. +// The testdata of collection.tar resemble such an archive. the `collection` folder +// is the contents of `collection.tar` extracted +func TestArchiveCreation(t *testing.T) { + fh, err := os.Open("./testdata/collection.tar") + if err != nil { + t.Fatal(err) + } + str := NewTarStreamer(fh, []string{"sha1"}) + + if _, err := io.Copy(ioutil.Discard, str); err != nil && err != io.EOF { + t.Fatal(err) + } + if err := str.Close(); err != nil { + t.Fatal(err) + } + defer fh.Close() + + // get DirectoryHierarcy struct from walking the tar archive + tdh, err := str.Hierarchy() + if err != nil { + t.Fatal(err) + } + + // Test the tar manifest against the actual directory + res, err := Check("./testdata/collection", tdh, []string{"sha1"}) + if err != nil { + t.Fatal(err) + } + + if res != nil { + for _, f := range res.Failures { + t.Errorf(f.String()) + } + for _, e := range res.Extra { + t.Errorf("%s extra not expected", e.Name) + } + for _, m := range res.Missing { + t.Errorf("%s missing not expected", m.Name) + } + } + + // Test the tar manifest against itself + res, err = TarCheck(tdh, tdh, []string{"sha1"}) + if err != nil { + t.Fatal(err) + } + if res != nil { + for _, f := range res.Failures { + t.Errorf(f.String()) + } + for _, e := range res.Extra { + t.Errorf("%s extra not expected", e.Name) + } + for _, m := range res.Missing { + t.Errorf("%s missing not expected", m.Name) + } + } + + // Validate the directory manifest against the archive + dh, err := Walk("./testdata/collection", nil, []string{"sha1"}) + if err != nil { + t.Fatal(err) + } + res, err = TarCheck(tdh, dh, []string{"sha1"}) + if err != nil { + t.Fatal(err) + } + if res != nil { + for _, f := range res.Failures { + t.Errorf(f.String()) + } + for _, e := range res.Extra { + t.Errorf("%s extra not expected", e.Name) + } + for _, m := range res.Missing { + t.Errorf("%s missing not expected", m.Name) + } + } +} + +// Now test a tar file that was created with just the path to a file. In this +// test case, the traversal and creation of "placeholder" directories are +// evaluated. Also, The fact that this archive contains a single entry, yet the +// entry is associated with a file that has parent directories, means that the +// "." directory should be the lowest sub-directory under which `file` is contained. +func TestTreeTraversal(t *testing.T) { + fh, err := os.Open("./testdata/traversal.tar") + if err != nil { + t.Fatal(err) + } + str := NewTarStreamer(fh, DefaultTarKeywords) + + if _, err = io.Copy(ioutil.Discard, str); err != nil && err != io.EOF { + t.Fatal(err) + } + if err = str.Close(); err != nil { + t.Fatal(err) + } + + fh.Close() + tdh, err := str.Hierarchy() + + if err != nil { + t.Fatal(err) + } + + res, err := TarCheck(tdh, tdh, []string{"sha1"}) + if err != nil { + t.Fatal(err) + } + if res != nil { + for _, f := range res.Failures { + t.Errorf(f.String()) + } + for _, e := range res.Extra { + t.Errorf("%s extra not expected", e.Name) + } + for _, m := range res.Missing { + t.Errorf("%s missing not expected", m.Name) + } + } + + // top-level "." directory will contain contents of traversal.tar + res, err = Check("./testdata/.", tdh, []string{"sha1"}) + if err != nil { + t.Fatal(err) + } + if res != nil { + for _, f := range res.Failures { + t.Errorf(f.String()) + } + for _, e := range res.Extra { + t.Errorf("%s extra not expected", e.Name) + } + for _, m := range res.Missing { + t.Errorf("%s missing not expected", m.Name) + } + } + + // Now test an archive that requires placeholder directories, i.e, there are + // no headers in the archive that are associated with the actual directory name + fh, err = os.Open("./testdata/singlefile.tar") + if err != nil { + t.Fatal(err) + } + str = NewTarStreamer(fh, DefaultTarKeywords) + if _, err = io.Copy(ioutil.Discard, str); err != nil && err != io.EOF { + t.Fatal(err) + } + if err = str.Close(); err != nil { + t.Fatal(err) + } + tdh, err = str.Hierarchy() + if err != nil { + t.Fatal(err) + } + + // Implied top-level "." directory will contain the contents of singlefile.tar + res, err = Check("./testdata/.", tdh, []string{"sha1"}) + if err != nil { + t.Fatal(err) + } + if res != nil { + for _, f := range res.Failures { + t.Errorf(f.String()) + } + for _, e := range res.Extra { + t.Errorf("%s extra not expected", e.Name) + } + for _, m := range res.Missing { + t.Errorf("%s missing not expected", m.Name) + } + } +} + // minimal tar archive stream that mimics what is in ./testdata/test.tar func makeTarStream() ([]byte, error) { buf := new(bytes.Buffer) diff --git a/testdata/collection.tar b/testdata/collection.tar new file mode 100644 index 0000000000000000000000000000000000000000..36f916c194be0ad3c5292223b8a6091123b9820d GIT binary patch literal 10240 zcmeI1U2cOg49D}lQ*eXD`8-dxQi=q0qU*zMKRaj&s)QI7Mw#S16t_`ru>RaQTU(9#AZI}-`$3~`=;~2juE$?|Gf^#zx4lh6k)*?bL%%nUDw}E{a@#zv)CsF zuuh);78Kj}=eB>_*P(d(R~aDw8C$<87wi0o%)i0+gvh^__dkc!(Y5Znn=Ao`Q?$iOG5Za88&)-&RW%ob)|83)DT6j*XYG)zb~|3y7TotjgdTU_p!Fmz4h&zGzcJo y00IagfB*srAbQ4( literal 0 HcmV?d00001 diff --git a/testdata/singlefile/dir2/dir3/file b/testdata/singlefile/dir2/dir3/file new file mode 100644 index 0000000..e69de29 diff --git a/testdata/traversal.tar b/testdata/traversal.tar new file mode 100644 index 0000000000000000000000000000000000000000..925bfc057c3385db7f22acdcffd1b1931447cdeb GIT binary patch literal 10240 zcmeI1O>V;=5QSOy6y9|i2J>@<9)r4R63I>~$Iki7q-yI0F~~+$n?4qHsT_2vQ-U!!J!jk|r{uGKKpQO*ftg-&d75 zH~aCpi&nWlOvP`v{MC^mqQ7s3(YM#}P^9lHaIXGTtDwK7FifC-5&H8<68b;Sx6X^Q z{?_WL??2}O4uzt>{7)v-BU_UamCQ@OuGT;I|I=glunr(v|78LR|2HMxjU7Jgf13E; zhV=papWy!z>{B`3`qQrWU3WO{Fb||)-C;c8e_!I2lm3_X`~SaoAO6?ye+l-f9Q$qC zp5Xixrn`;L`k%@i1+f2f_kYsx{@;>z*#G^+piAigdT6(_wjb&Fe{%kF)!)gm48;C# zE%=P~8TzY?G_zF}>5ujQEYniKnEq_~Qu#Sgw*HE0X?gvpoeA?l&VNd(5YKf>|2kfM s(bH`GwF+Mdc>iytve17KY%~G^5C8!X009sH0T2KI5C8!X0D%aBcMyFdx&QzG literal 0 HcmV?d00001 diff --git a/testdata/traversal/dir2/dir3/actualdir1/file1 b/testdata/traversal/dir2/dir3/actualdir1/file1 new file mode 100644 index 0000000..e69de29 diff --git a/testdata/traversal/dir2/dir3/actualdir1/file2 b/testdata/traversal/dir2/dir3/actualdir1/file2 new file mode 100644 index 0000000..e69de29 diff --git a/testdata/traversal/dir2/dir3/actualdir2/a b/testdata/traversal/dir2/dir3/actualdir2/a new file mode 100644 index 0000000..ce01362 --- /dev/null +++ b/testdata/traversal/dir2/dir3/actualdir2/a @@ -0,0 +1 @@ +hello diff --git a/testdata/traversal/dir2/dir3/actualdir2/b b/testdata/traversal/dir2/dir3/actualdir2/b new file mode 100644 index 0000000..f0e6a34 --- /dev/null +++ b/testdata/traversal/dir2/dir3/actualdir2/b @@ -0,0 +1 @@ +I'm diff --git a/testdata/traversal/dir2/dir3/actualdir2/c b/testdata/traversal/dir2/dir3/actualdir2/c new file mode 100644 index 0000000..463a353 --- /dev/null +++ b/testdata/traversal/dir2/dir3/actualdir2/c @@ -0,0 +1 @@ +programming diff --git a/testdata/traversal/dir2/dir3/file b/testdata/traversal/dir2/dir3/file new file mode 100644 index 0000000..e69de29 diff --git a/testdata/traversal/dir2/dir3/file3 b/testdata/traversal/dir2/dir3/file3 new file mode 100644 index 0000000..e69de29 From 00a4a2e674584308178c4c125c173e591d30ecd7 Mon Sep 17 00:00:00 2001 From: Stephen Chung Date: Tue, 9 Aug 2016 13:30:31 -0400 Subject: [PATCH 4/4] tar: remove "time" keyword before tree creation Instead of checking each time during keyword evaluation if the keyword is "time", just remove it from the start and replace it with "tar_time" (if necessary). Signed-off-by: Stephen Chung --- tar.go | 20 ++++++++++++++------ 1 file changed, 14 insertions(+), 6 deletions(-) diff --git a/tar.go b/tar.go index 1fbf6b7..bcab9c8 100644 --- a/tar.go +++ b/tar.go @@ -47,6 +47,20 @@ type tarStream struct { } func (ts *tarStream) readHeaders() { + // remove "time" keyword + notimekws := []string{} + for _, kw := range ts.keywords { + if !inSlice(kw, notimekws) { + if kw != "time" { + notimekws = append(notimekws, kw) + } else { + if !inSlice("tar_time", ts.keywords) { + notimekws = append(notimekws, "tar_time") + } + } + } + } + ts.keywords = notimekws // We have to start with the directory we're in, and anything beyond these // items is determined at the time a tar is extracted. ts.root = &Entry{ @@ -106,9 +120,6 @@ func (ts *tarStream) readHeaders() { // now collect keywords on the file for _, keyword := range ts.keywords { - if keyword == "time" { - keyword = "tar_time" - } if keyFunc, ok := KeywordFuncs[keyword]; ok { // We can't extract directories on to disk, so "size" keyword // is irrelevant for now @@ -142,9 +153,6 @@ func (ts *tarStream) readHeaders() { Type: SpecialType, } for _, setKW := range SetKeywords { - if setKW == "time" { - setKW = "tar_time" - } if keyFunc, ok := KeywordFuncs[setKW]; ok { val, err := keyFunc(hdr.Name, hdr.FileInfo(), tmpFile) if err != nil {