mirror of
https://github.com/vbatts/go-mtree.git
synced 2024-11-21 16:05:40 +00:00
437 lines
12 KiB
Go
437 lines
12 KiB
Go
package mtree
|
|
|
|
import (
|
|
"archive/tar"
|
|
"fmt"
|
|
"io"
|
|
"os"
|
|
"path/filepath"
|
|
"strings"
|
|
|
|
"github.com/sirupsen/logrus"
|
|
"github.com/vbatts/go-mtree/pkg/govis"
|
|
)
|
|
|
|
// Streamer creates a file hierarchy out of a tar stream
|
|
type Streamer interface {
|
|
io.ReadCloser
|
|
Hierarchy() (*DirectoryHierarchy, error)
|
|
}
|
|
|
|
var tarDefaultSetKeywords = []KeyVal{
|
|
"type=file",
|
|
"flags=none",
|
|
"mode=0664",
|
|
}
|
|
|
|
// NewTarStreamer streams a tar archive and creates a file hierarchy based off
|
|
// of the tar metadata headers
|
|
func NewTarStreamer(r io.Reader, excludes []ExcludeFunc, keywords []Keyword) Streamer {
|
|
pR, pW := io.Pipe()
|
|
ts := &tarStream{
|
|
pipeReader: pR,
|
|
pipeWriter: pW,
|
|
creator: dhCreator{DH: &DirectoryHierarchy{}},
|
|
teeReader: io.TeeReader(r, pW),
|
|
tarReader: tar.NewReader(pR),
|
|
keywords: keywords,
|
|
hardlinks: map[string][]string{},
|
|
excludes: excludes,
|
|
}
|
|
|
|
go ts.readHeaders()
|
|
return ts
|
|
}
|
|
|
|
type tarStream struct {
|
|
root *Entry
|
|
hardlinks map[string][]string
|
|
creator dhCreator
|
|
pipeReader *io.PipeReader
|
|
pipeWriter *io.PipeWriter
|
|
teeReader io.Reader
|
|
tarReader *tar.Reader
|
|
keywords []Keyword
|
|
excludes []ExcludeFunc
|
|
err error
|
|
}
|
|
|
|
func (ts *tarStream) readHeaders() {
|
|
// remove "time" keyword
|
|
notimekws := []Keyword{}
|
|
for _, kw := range ts.keywords {
|
|
if !InKeywordSlice(kw, notimekws) {
|
|
if kw == "time" {
|
|
if !InKeywordSlice("tar_time", ts.keywords) {
|
|
notimekws = append(notimekws, "tar_time")
|
|
}
|
|
} else {
|
|
notimekws = append(notimekws, kw)
|
|
}
|
|
}
|
|
}
|
|
ts.keywords = notimekws
|
|
// We have to start with the directory we're in, and anything beyond these
|
|
// items is determined at the time a tar is extracted.
|
|
ts.root = &Entry{
|
|
Name: ".",
|
|
Type: RelativeType,
|
|
Prev: &Entry{
|
|
Raw: "# .",
|
|
Type: CommentType,
|
|
},
|
|
Set: nil,
|
|
Keywords: []KeyVal{"type=dir"},
|
|
}
|
|
// insert signature and metadata comments first (user, machine, tree, date)
|
|
for _, e := range signatureEntries("<user specified tar archive>") {
|
|
e.Pos = len(ts.creator.DH.Entries)
|
|
ts.creator.DH.Entries = append(ts.creator.DH.Entries, e)
|
|
}
|
|
// insert keyword metadata next
|
|
for _, e := range keywordEntries(ts.keywords) {
|
|
e.Pos = len(ts.creator.DH.Entries)
|
|
ts.creator.DH.Entries = append(ts.creator.DH.Entries, e)
|
|
}
|
|
hdrloop:
|
|
for {
|
|
hdr, err := ts.tarReader.Next()
|
|
if err != nil {
|
|
ts.pipeReader.CloseWithError(err)
|
|
return
|
|
}
|
|
|
|
for _, ex := range ts.excludes {
|
|
if ex(hdr.Name, hdr.FileInfo()) {
|
|
continue hdrloop
|
|
}
|
|
}
|
|
|
|
// Because the content of the file may need to be read by several
|
|
// KeywordFuncs, it needs to be an io.Seeker as well. So, just reading from
|
|
// ts.tarReader is not enough.
|
|
tmpFile, err := os.CreateTemp("", "ts.payload.")
|
|
if err != nil {
|
|
ts.pipeReader.CloseWithError(err)
|
|
return
|
|
}
|
|
// for good measure
|
|
if err := tmpFile.Chmod(0600); err != nil {
|
|
tmpFile.Close()
|
|
os.Remove(tmpFile.Name())
|
|
ts.pipeReader.CloseWithError(err)
|
|
return
|
|
}
|
|
if _, err := io.Copy(tmpFile, ts.tarReader); err != nil {
|
|
tmpFile.Close()
|
|
os.Remove(tmpFile.Name())
|
|
ts.pipeReader.CloseWithError(err)
|
|
return
|
|
}
|
|
// Alright, it's either file or directory
|
|
encodedName, err := govis.Vis(filepath.Base(hdr.Name), DefaultVisFlags)
|
|
if err != nil {
|
|
tmpFile.Close()
|
|
os.Remove(tmpFile.Name())
|
|
ts.pipeReader.CloseWithError(err)
|
|
return
|
|
}
|
|
e := Entry{
|
|
Name: encodedName,
|
|
Type: RelativeType,
|
|
}
|
|
|
|
// Keep track of which files are hardlinks so we can resolve them later
|
|
if hdr.Typeflag == tar.TypeLink {
|
|
keyFunc := KeywordFuncs["link"]
|
|
kvs, err := keyFunc(hdr.Name, hdr.FileInfo(), nil)
|
|
if err != nil {
|
|
logrus.Warn(err)
|
|
break // XXX is breaking an okay thing to do here?
|
|
}
|
|
linkname, err := govis.Unvis(KeyVal(kvs[0]).Value(), DefaultVisFlags)
|
|
if err != nil {
|
|
logrus.Warn(err)
|
|
break // XXX is breaking an okay thing to do here?
|
|
}
|
|
if _, ok := ts.hardlinks[linkname]; !ok {
|
|
ts.hardlinks[linkname] = []string{hdr.Name}
|
|
} else {
|
|
ts.hardlinks[linkname] = append(ts.hardlinks[linkname], hdr.Name)
|
|
}
|
|
}
|
|
|
|
// now collect keywords on the file
|
|
for _, keyword := range ts.keywords {
|
|
if keyFunc, ok := KeywordFuncs[keyword.Prefix()]; ok {
|
|
// We can't extract directories on to disk, so "size" keyword
|
|
// is irrelevant for now
|
|
if hdr.FileInfo().IsDir() && keyword == "size" {
|
|
continue
|
|
}
|
|
kvs, err := keyFunc(hdr.Name, hdr.FileInfo(), tmpFile)
|
|
if err != nil {
|
|
ts.setErr(err)
|
|
}
|
|
// for good measure, check that we actually get a value for a keyword
|
|
if len(kvs) > 0 && kvs[0] != "" {
|
|
e.Keywords = append(e.Keywords, kvs[0])
|
|
}
|
|
|
|
// don't forget to reset the reader
|
|
if _, err := tmpFile.Seek(0, 0); err != nil {
|
|
tmpFile.Close()
|
|
os.Remove(tmpFile.Name())
|
|
ts.pipeReader.CloseWithError(err)
|
|
return
|
|
}
|
|
}
|
|
}
|
|
// collect meta-set keywords for a directory so that we can build the
|
|
// actual sets in `flatten`
|
|
if hdr.FileInfo().IsDir() {
|
|
s := Entry{
|
|
Name: "meta-set",
|
|
Type: SpecialType,
|
|
}
|
|
for _, setKW := range SetKeywords {
|
|
if keyFunc, ok := KeywordFuncs[setKW.Prefix()]; ok {
|
|
kvs, err := keyFunc(hdr.Name, hdr.FileInfo(), tmpFile)
|
|
if err != nil {
|
|
ts.setErr(err)
|
|
}
|
|
for _, kv := range kvs {
|
|
if kv != "" {
|
|
s.Keywords = append(s.Keywords, kv)
|
|
}
|
|
}
|
|
if _, err := tmpFile.Seek(0, 0); err != nil {
|
|
tmpFile.Close()
|
|
os.Remove(tmpFile.Name())
|
|
ts.pipeReader.CloseWithError(err)
|
|
}
|
|
}
|
|
}
|
|
e.Set = &s
|
|
}
|
|
err = populateTree(ts.root, &e, hdr)
|
|
if err != nil {
|
|
ts.setErr(err)
|
|
}
|
|
tmpFile.Close()
|
|
os.Remove(tmpFile.Name())
|
|
}
|
|
}
|
|
|
|
// populateTree creates a pseudo file tree hierarchy using an Entry's Parent and
|
|
// Children fields. When examining the Entry e to insert in the tree, we
|
|
// determine if the path to that Entry exists yet. If it does, insert it in the
|
|
// appropriate position in the tree. If not, create a path up until the Entry's
|
|
// directory that it is contained in. Then, insert the Entry.
|
|
// root: the "." Entry
|
|
//
|
|
// e: the Entry we are looking to insert
|
|
// hdr: the tar header struct associated with e
|
|
func populateTree(root, e *Entry, hdr *tar.Header) error {
|
|
if root == nil || e == nil {
|
|
return fmt.Errorf("cannot populate or insert nil Entry's")
|
|
} else if root.Prev == nil {
|
|
return fmt.Errorf("root needs to be an Entry associated with a directory")
|
|
}
|
|
isDir := hdr.FileInfo().IsDir()
|
|
wd := filepath.Clean(hdr.Name)
|
|
if !isDir {
|
|
// directory up until the actual file
|
|
wd = filepath.Dir(wd)
|
|
if wd == "." {
|
|
root.Children = append([]*Entry{e}, root.Children...)
|
|
e.Parent = root
|
|
return nil
|
|
}
|
|
}
|
|
dirNames := strings.Split(wd, "/")
|
|
parent := root
|
|
for _, name := range dirNames[:] {
|
|
encoded, err := govis.Vis(name, DefaultVisFlags)
|
|
if err != nil {
|
|
return err
|
|
}
|
|
if node := parent.Descend(encoded); node == nil {
|
|
// Entry for directory doesn't exist in tree relative to root.
|
|
// We don't know if this directory is an actual tar header (because a
|
|
// user could have just specified a path to a deep file), so we must
|
|
// specify this placeholder directory as a "type=dir", and Set=nil.
|
|
newEntry := Entry{
|
|
Name: encoded,
|
|
Type: RelativeType,
|
|
Parent: parent,
|
|
Keywords: []KeyVal{"type=dir"}, // temp data
|
|
Set: nil, // temp data
|
|
}
|
|
pathname, err := newEntry.Path()
|
|
if err != nil {
|
|
return err
|
|
}
|
|
newEntry.Prev = &Entry{
|
|
Type: CommentType,
|
|
Raw: "# " + pathname,
|
|
}
|
|
parent.Children = append(parent.Children, &newEntry)
|
|
parent = &newEntry
|
|
} else {
|
|
// Entry for directory exists in tree, just keep going
|
|
parent = node
|
|
}
|
|
}
|
|
if !isDir {
|
|
parent.Children = append([]*Entry{e}, parent.Children...)
|
|
e.Parent = parent
|
|
} else {
|
|
// fill in the actual data from e
|
|
parent.Keywords = e.Keywords
|
|
parent.Set = e.Set
|
|
}
|
|
return nil
|
|
}
|
|
|
|
// After constructing a pseudo file hierarchy tree, we want to "flatten" this
|
|
// tree by putting the Entries into a slice with appropriate positioning.
|
|
//
|
|
// root: the "head" of the sub-tree to flatten
|
|
// creator: a dhCreator that helps with the '/set' keyword
|
|
//
|
|
// keywords: keywords specified by the user that should be evaluated
|
|
func flatten(root *Entry, creator *dhCreator, keywords []Keyword) {
|
|
if root == nil || creator == nil {
|
|
return
|
|
}
|
|
if root.Prev != nil {
|
|
// root.Prev != nil implies root is a directory
|
|
creator.DH.Entries = append(creator.DH.Entries,
|
|
Entry{
|
|
Type: BlankType,
|
|
Pos: len(creator.DH.Entries),
|
|
})
|
|
root.Prev.Pos = len(creator.DH.Entries)
|
|
creator.DH.Entries = append(creator.DH.Entries, *root.Prev)
|
|
|
|
if root.Set != nil {
|
|
// Check if we need a new set
|
|
consolidatedKeys := keyvalSelector(append(tarDefaultSetKeywords, root.Set.Keywords...), keywords)
|
|
if creator.curSet == nil {
|
|
creator.curSet = &Entry{
|
|
Type: SpecialType,
|
|
Name: "/set",
|
|
Keywords: consolidatedKeys,
|
|
Pos: len(creator.DH.Entries),
|
|
}
|
|
creator.DH.Entries = append(creator.DH.Entries, *creator.curSet)
|
|
} else {
|
|
needNewSet := false
|
|
for _, k := range root.Set.Keywords {
|
|
if !inKeyValSlice(k, creator.curSet.Keywords) {
|
|
needNewSet = true
|
|
break
|
|
}
|
|
}
|
|
if needNewSet {
|
|
creator.curSet = &Entry{
|
|
Name: "/set",
|
|
Type: SpecialType,
|
|
Pos: len(creator.DH.Entries),
|
|
Keywords: consolidatedKeys,
|
|
}
|
|
creator.DH.Entries = append(creator.DH.Entries, *creator.curSet)
|
|
}
|
|
}
|
|
} else if creator.curSet != nil {
|
|
// Getting into here implies that the Entry's set has not and
|
|
// was not supposed to be evaluated, thus, we need to reset curSet
|
|
creator.DH.Entries = append(creator.DH.Entries, Entry{
|
|
Name: "/unset",
|
|
Type: SpecialType,
|
|
Pos: len(creator.DH.Entries),
|
|
})
|
|
creator.curSet = nil
|
|
}
|
|
}
|
|
root.Set = creator.curSet
|
|
if creator.curSet != nil {
|
|
root.Keywords = keyValDifference(root.Keywords, creator.curSet.Keywords)
|
|
}
|
|
root.Pos = len(creator.DH.Entries)
|
|
creator.DH.Entries = append(creator.DH.Entries, *root)
|
|
for _, c := range root.Children {
|
|
flatten(c, creator, keywords)
|
|
}
|
|
if root.Prev != nil {
|
|
// Show a comment when stepping out
|
|
root.Prev.Pos = len(creator.DH.Entries)
|
|
creator.DH.Entries = append(creator.DH.Entries, *root.Prev)
|
|
dotEntry := Entry{
|
|
Type: DotDotType,
|
|
Name: "..",
|
|
Pos: len(creator.DH.Entries),
|
|
}
|
|
creator.DH.Entries = append(creator.DH.Entries, dotEntry)
|
|
}
|
|
}
|
|
|
|
// resolveHardlinks goes through an Entry tree, and finds the Entry's associated
|
|
// with hardlinks and fills them in with the actual data from the base file.
|
|
func resolveHardlinks(root *Entry, hardlinks map[string][]string, countlinks bool) {
|
|
originals := make(map[string]*Entry)
|
|
for base, links := range hardlinks {
|
|
var basefile *Entry
|
|
if seen, ok := originals[base]; !ok {
|
|
basefile = root.Find(base)
|
|
if basefile == nil {
|
|
logrus.Printf("%s does not exist in this tree\n", base)
|
|
continue
|
|
}
|
|
originals[base] = basefile
|
|
} else {
|
|
basefile = seen
|
|
}
|
|
for _, link := range links {
|
|
linkfile := root.Find(link)
|
|
if linkfile == nil {
|
|
logrus.Printf("%s does not exist in this tree\n", link)
|
|
continue
|
|
}
|
|
linkfile.Keywords = basefile.Keywords
|
|
if countlinks {
|
|
linkfile.Keywords = append(linkfile.Keywords, KeyVal(fmt.Sprintf("nlink=%d", len(links)+1)))
|
|
}
|
|
}
|
|
if countlinks {
|
|
basefile.Keywords = append(basefile.Keywords, KeyVal(fmt.Sprintf("nlink=%d", len(links)+1)))
|
|
}
|
|
}
|
|
}
|
|
|
|
func (ts *tarStream) setErr(err error) {
|
|
ts.err = err
|
|
}
|
|
|
|
func (ts *tarStream) Read(p []byte) (n int, err error) {
|
|
return ts.teeReader.Read(p)
|
|
}
|
|
|
|
func (ts *tarStream) Close() error {
|
|
return ts.pipeReader.Close()
|
|
}
|
|
|
|
// Hierarchy returns the DirectoryHierarchy of the archive. It flattens the
|
|
// Entry tree before returning the DirectoryHierarchy
|
|
func (ts *tarStream) Hierarchy() (*DirectoryHierarchy, error) {
|
|
if ts.err != nil && ts.err != io.EOF {
|
|
return nil, ts.err
|
|
}
|
|
if ts.root == nil {
|
|
return nil, fmt.Errorf("root Entry not found, nothing to flatten")
|
|
}
|
|
resolveHardlinks(ts.root, ts.hardlinks, InKeywordSlice(Keyword("nlink"), ts.keywords))
|
|
flatten(ts.root, &ts.creator, ts.keywords)
|
|
return ts.creator.DH, nil
|
|
}
|