Vincent Batts
ed464af779
This is a gnarly patchset that has been mashed together. It uncovered that some aspects of Check were never really working correctly for `xattr` keywords, but also the `Update()` had been left undone for a while. This includes some API changes around the `Keyword` and `KeyVal` types. Also I would like to update the signature for the `UpdateKeywordFunc` to just accept a `KeyVal` as an argugment, rather than a keyword AND the value. with this context there would be no need to guess on the value of what's passed to the xattr update function of whether it needs or already is base64 encoded. Signed-off-by: Vincent Batts <vbatts@hashbangbash.com>
461 lines
13 KiB
Go
461 lines
13 KiB
Go
package mtree
|
|
|
|
import (
|
|
"archive/tar"
|
|
"fmt"
|
|
"io"
|
|
"io/ioutil"
|
|
"os"
|
|
"path/filepath"
|
|
"strings"
|
|
|
|
"github.com/Sirupsen/logrus"
|
|
"github.com/vbatts/go-mtree/pkg/govis"
|
|
)
|
|
|
|
// Streamer creates a file hierarchy out of a tar stream
|
|
type Streamer interface {
|
|
io.ReadCloser
|
|
Hierarchy() (*DirectoryHierarchy, error)
|
|
}
|
|
|
|
var tarDefaultSetKeywords = []KeyVal{
|
|
"type=file",
|
|
"flags=none",
|
|
"mode=0664",
|
|
}
|
|
|
|
// NewTarStreamer streams a tar archive and creates a file hierarchy based off
|
|
// of the tar metadata headers
|
|
func NewTarStreamer(r io.Reader, excludes []ExcludeFunc, keywords []Keyword) Streamer {
|
|
pR, pW := io.Pipe()
|
|
ts := &tarStream{
|
|
pipeReader: pR,
|
|
pipeWriter: pW,
|
|
creator: dhCreator{DH: &DirectoryHierarchy{}},
|
|
teeReader: io.TeeReader(r, pW),
|
|
tarReader: tar.NewReader(pR),
|
|
keywords: keywords,
|
|
hardlinks: map[string][]string{},
|
|
excludes: excludes,
|
|
}
|
|
|
|
go ts.readHeaders()
|
|
return ts
|
|
}
|
|
|
|
type tarStream struct {
|
|
root *Entry
|
|
hardlinks map[string][]string
|
|
creator dhCreator
|
|
pipeReader *io.PipeReader
|
|
pipeWriter *io.PipeWriter
|
|
teeReader io.Reader
|
|
tarReader *tar.Reader
|
|
keywords []Keyword
|
|
excludes []ExcludeFunc
|
|
err error
|
|
}
|
|
|
|
func (ts *tarStream) readHeaders() {
|
|
// remove "time" keyword
|
|
notimekws := []Keyword{}
|
|
for _, kw := range ts.keywords {
|
|
if !InKeywordSlice(kw, notimekws) {
|
|
if kw == "time" {
|
|
if !InKeywordSlice("tar_time", ts.keywords) {
|
|
notimekws = append(notimekws, "tar_time")
|
|
}
|
|
} else {
|
|
notimekws = append(notimekws, kw)
|
|
}
|
|
}
|
|
}
|
|
ts.keywords = notimekws
|
|
// We have to start with the directory we're in, and anything beyond these
|
|
// items is determined at the time a tar is extracted.
|
|
ts.root = &Entry{
|
|
Name: ".",
|
|
Type: RelativeType,
|
|
Prev: &Entry{
|
|
Raw: "# .",
|
|
Type: CommentType,
|
|
},
|
|
Set: nil,
|
|
Keywords: []KeyVal{"type=dir"},
|
|
}
|
|
// insert signature and metadata comments first (user, machine, tree, date)
|
|
for _, e := range signatureEntries("<user specified tar archive>") {
|
|
e.Pos = len(ts.creator.DH.Entries)
|
|
ts.creator.DH.Entries = append(ts.creator.DH.Entries, e)
|
|
}
|
|
// insert keyword metadata next
|
|
for _, e := range keywordEntries(ts.keywords) {
|
|
e.Pos = len(ts.creator.DH.Entries)
|
|
ts.creator.DH.Entries = append(ts.creator.DH.Entries, e)
|
|
}
|
|
hdrloop:
|
|
for {
|
|
hdr, err := ts.tarReader.Next()
|
|
if err != nil {
|
|
ts.pipeReader.CloseWithError(err)
|
|
return
|
|
}
|
|
|
|
for _, ex := range ts.excludes {
|
|
if ex(hdr.Name, hdr.FileInfo()) {
|
|
continue hdrloop
|
|
}
|
|
}
|
|
|
|
// Because the content of the file may need to be read by several
|
|
// KeywordFuncs, it needs to be an io.Seeker as well. So, just reading from
|
|
// ts.tarReader is not enough.
|
|
tmpFile, err := ioutil.TempFile("", "ts.payload.")
|
|
if err != nil {
|
|
ts.pipeReader.CloseWithError(err)
|
|
return
|
|
}
|
|
// for good measure
|
|
if err := tmpFile.Chmod(0600); err != nil {
|
|
tmpFile.Close()
|
|
os.Remove(tmpFile.Name())
|
|
ts.pipeReader.CloseWithError(err)
|
|
return
|
|
}
|
|
if _, err := io.Copy(tmpFile, ts.tarReader); err != nil {
|
|
tmpFile.Close()
|
|
os.Remove(tmpFile.Name())
|
|
ts.pipeReader.CloseWithError(err)
|
|
return
|
|
}
|
|
// Alright, it's either file or directory
|
|
encodedName, err := govis.Vis(filepath.Base(hdr.Name), DefaultVisFlags)
|
|
if err != nil {
|
|
tmpFile.Close()
|
|
os.Remove(tmpFile.Name())
|
|
ts.pipeReader.CloseWithError(err)
|
|
return
|
|
}
|
|
e := Entry{
|
|
Name: encodedName,
|
|
Type: RelativeType,
|
|
}
|
|
|
|
// Keep track of which files are hardlinks so we can resolve them later
|
|
if hdr.Typeflag == tar.TypeLink {
|
|
keyFunc := KeywordFuncs["link"]
|
|
kvs, err := keyFunc(hdr.Name, hdr.FileInfo(), nil)
|
|
if err != nil {
|
|
logrus.Warn(err)
|
|
break // XXX is breaking an okay thing to do here?
|
|
}
|
|
linkname, err := govis.Unvis(KeyVal(kvs[0]).Value(), DefaultVisFlags)
|
|
if err != nil {
|
|
logrus.Warn(err)
|
|
break // XXX is breaking an okay thing to do here?
|
|
}
|
|
if _, ok := ts.hardlinks[linkname]; !ok {
|
|
ts.hardlinks[linkname] = []string{hdr.Name}
|
|
} else {
|
|
ts.hardlinks[linkname] = append(ts.hardlinks[linkname], hdr.Name)
|
|
}
|
|
}
|
|
|
|
// now collect keywords on the file
|
|
for _, keyword := range ts.keywords {
|
|
if keyFunc, ok := KeywordFuncs[keyword.Prefix()]; ok {
|
|
// We can't extract directories on to disk, so "size" keyword
|
|
// is irrelevant for now
|
|
if hdr.FileInfo().IsDir() && keyword == "size" {
|
|
continue
|
|
}
|
|
kvs, err := keyFunc(hdr.Name, hdr.FileInfo(), tmpFile)
|
|
if err != nil {
|
|
ts.setErr(err)
|
|
}
|
|
// for good measure, check that we actually get a value for a keyword
|
|
if len(kvs) > 0 && kvs[0] != "" {
|
|
e.Keywords = append(e.Keywords, kvs[0])
|
|
}
|
|
|
|
// don't forget to reset the reader
|
|
if _, err := tmpFile.Seek(0, 0); err != nil {
|
|
tmpFile.Close()
|
|
os.Remove(tmpFile.Name())
|
|
ts.pipeReader.CloseWithError(err)
|
|
return
|
|
}
|
|
}
|
|
}
|
|
// collect meta-set keywords for a directory so that we can build the
|
|
// actual sets in `flatten`
|
|
if hdr.FileInfo().IsDir() {
|
|
s := Entry{
|
|
Name: "meta-set",
|
|
Type: SpecialType,
|
|
}
|
|
for _, setKW := range SetKeywords {
|
|
if keyFunc, ok := KeywordFuncs[setKW.Prefix()]; ok {
|
|
kvs, err := keyFunc(hdr.Name, hdr.FileInfo(), tmpFile)
|
|
if err != nil {
|
|
ts.setErr(err)
|
|
}
|
|
for _, kv := range kvs {
|
|
if kv != "" {
|
|
s.Keywords = append(s.Keywords, kv)
|
|
}
|
|
}
|
|
if _, err := tmpFile.Seek(0, 0); err != nil {
|
|
tmpFile.Close()
|
|
os.Remove(tmpFile.Name())
|
|
ts.pipeReader.CloseWithError(err)
|
|
}
|
|
}
|
|
}
|
|
e.Set = &s
|
|
}
|
|
err = populateTree(ts.root, &e, hdr)
|
|
if err != nil {
|
|
ts.setErr(err)
|
|
}
|
|
tmpFile.Close()
|
|
os.Remove(tmpFile.Name())
|
|
}
|
|
}
|
|
|
|
// populateTree creates a pseudo file tree hierarchy using an Entry's Parent and
|
|
// Children fields. When examining the Entry e to insert in the tree, we
|
|
// determine if the path to that Entry exists yet. If it does, insert it in the
|
|
// appropriate position in the tree. If not, create a path up until the Entry's
|
|
// directory that it is contained in. Then, insert the Entry.
|
|
// root: the "." Entry
|
|
// e: the Entry we are looking to insert
|
|
// hdr: the tar header struct associated with e
|
|
func populateTree(root, e *Entry, hdr *tar.Header) error {
|
|
if root == nil || e == nil {
|
|
return fmt.Errorf("cannot populate or insert nil Entry's")
|
|
} else if root.Prev == nil {
|
|
return fmt.Errorf("root needs to be an Entry associated with a directory")
|
|
}
|
|
isDir := hdr.FileInfo().IsDir()
|
|
wd := filepath.Clean(hdr.Name)
|
|
if !isDir {
|
|
// directory up until the actual file
|
|
wd = filepath.Dir(wd)
|
|
if wd == "." {
|
|
root.Children = append([]*Entry{e}, root.Children...)
|
|
e.Parent = root
|
|
return nil
|
|
}
|
|
}
|
|
dirNames := strings.Split(wd, "/")
|
|
parent := root
|
|
for _, name := range dirNames[:] {
|
|
encoded, err := govis.Vis(name, DefaultVisFlags)
|
|
if err != nil {
|
|
return err
|
|
}
|
|
if node := parent.Descend(encoded); node == nil {
|
|
// Entry for directory doesn't exist in tree relative to root.
|
|
// We don't know if this directory is an actual tar header (because a
|
|
// user could have just specified a path to a deep file), so we must
|
|
// specify this placeholder directory as a "type=dir", and Set=nil.
|
|
newEntry := Entry{
|
|
Name: encoded,
|
|
Type: RelativeType,
|
|
Parent: parent,
|
|
Keywords: []KeyVal{"type=dir"}, // temp data
|
|
Set: nil, // temp data
|
|
}
|
|
pathname, err := newEntry.Path()
|
|
if err != nil {
|
|
return err
|
|
}
|
|
newEntry.Prev = &Entry{
|
|
Type: CommentType,
|
|
Raw: "# " + pathname,
|
|
}
|
|
parent.Children = append(parent.Children, &newEntry)
|
|
parent = &newEntry
|
|
} else {
|
|
// Entry for directory exists in tree, just keep going
|
|
parent = node
|
|
}
|
|
}
|
|
if !isDir {
|
|
parent.Children = append([]*Entry{e}, parent.Children...)
|
|
e.Parent = parent
|
|
} else {
|
|
// fill in the actual data from e
|
|
parent.Keywords = e.Keywords
|
|
parent.Set = e.Set
|
|
}
|
|
return nil
|
|
}
|
|
|
|
// After constructing a pseudo file hierarchy tree, we want to "flatten" this
|
|
// tree by putting the Entries into a slice with appropriate positioning.
|
|
// root: the "head" of the sub-tree to flatten
|
|
// creator: a dhCreator that helps with the '/set' keyword
|
|
// keywords: keywords specified by the user that should be evaluated
|
|
func flatten(root *Entry, creator *dhCreator, keywords []Keyword) {
|
|
if root == nil || creator == nil {
|
|
return
|
|
}
|
|
if root.Prev != nil {
|
|
// root.Prev != nil implies root is a directory
|
|
creator.DH.Entries = append(creator.DH.Entries,
|
|
Entry{
|
|
Type: BlankType,
|
|
Pos: len(creator.DH.Entries),
|
|
})
|
|
root.Prev.Pos = len(creator.DH.Entries)
|
|
creator.DH.Entries = append(creator.DH.Entries, *root.Prev)
|
|
|
|
if root.Set != nil {
|
|
// Check if we need a new set
|
|
consolidatedKeys := keyvalSelector(append(tarDefaultSetKeywords, root.Set.Keywords...), keywords)
|
|
if creator.curSet == nil {
|
|
creator.curSet = &Entry{
|
|
Type: SpecialType,
|
|
Name: "/set",
|
|
Keywords: consolidatedKeys,
|
|
Pos: len(creator.DH.Entries),
|
|
}
|
|
creator.DH.Entries = append(creator.DH.Entries, *creator.curSet)
|
|
} else {
|
|
needNewSet := false
|
|
for _, k := range root.Set.Keywords {
|
|
if !inKeyValSlice(k, creator.curSet.Keywords) {
|
|
needNewSet = true
|
|
break
|
|
}
|
|
}
|
|
if needNewSet {
|
|
creator.curSet = &Entry{
|
|
Name: "/set",
|
|
Type: SpecialType,
|
|
Pos: len(creator.DH.Entries),
|
|
Keywords: consolidatedKeys,
|
|
}
|
|
creator.DH.Entries = append(creator.DH.Entries, *creator.curSet)
|
|
}
|
|
}
|
|
} else if creator.curSet != nil {
|
|
// Getting into here implies that the Entry's set has not and
|
|
// was not supposed to be evaluated, thus, we need to reset curSet
|
|
creator.DH.Entries = append(creator.DH.Entries, Entry{
|
|
Name: "/unset",
|
|
Type: SpecialType,
|
|
Pos: len(creator.DH.Entries),
|
|
})
|
|
creator.curSet = nil
|
|
}
|
|
}
|
|
root.Set = creator.curSet
|
|
if creator.curSet != nil {
|
|
root.Keywords = keyValDifference(root.Keywords, creator.curSet.Keywords)
|
|
}
|
|
root.Pos = len(creator.DH.Entries)
|
|
creator.DH.Entries = append(creator.DH.Entries, *root)
|
|
for _, c := range root.Children {
|
|
flatten(c, creator, keywords)
|
|
}
|
|
if root.Prev != nil {
|
|
// Show a comment when stepping out
|
|
root.Prev.Pos = len(creator.DH.Entries)
|
|
creator.DH.Entries = append(creator.DH.Entries, *root.Prev)
|
|
dotEntry := Entry{
|
|
Type: DotDotType,
|
|
Name: "..",
|
|
Pos: len(creator.DH.Entries),
|
|
}
|
|
creator.DH.Entries = append(creator.DH.Entries, dotEntry)
|
|
}
|
|
return
|
|
}
|
|
|
|
// resolveHardlinks goes through an Entry tree, and finds the Entry's associated
|
|
// with hardlinks and fills them in with the actual data from the base file.
|
|
func resolveHardlinks(root *Entry, hardlinks map[string][]string, countlinks bool) {
|
|
originals := make(map[string]*Entry)
|
|
for base, links := range hardlinks {
|
|
var basefile *Entry
|
|
if seen, ok := originals[base]; !ok {
|
|
basefile = root.Find(base)
|
|
if basefile == nil {
|
|
logrus.Printf("%s does not exist in this tree\n", base)
|
|
continue
|
|
}
|
|
originals[base] = basefile
|
|
} else {
|
|
basefile = seen
|
|
}
|
|
for _, link := range links {
|
|
linkfile := root.Find(link)
|
|
if linkfile == nil {
|
|
logrus.Printf("%s does not exist in this tree\n", link)
|
|
continue
|
|
}
|
|
linkfile.Keywords = basefile.Keywords
|
|
if countlinks {
|
|
linkfile.Keywords = append(linkfile.Keywords, KeyVal(fmt.Sprintf("nlink=%d", len(links)+1)))
|
|
}
|
|
}
|
|
if countlinks {
|
|
basefile.Keywords = append(basefile.Keywords, KeyVal(fmt.Sprintf("nlink=%d", len(links)+1)))
|
|
}
|
|
}
|
|
}
|
|
|
|
// filter takes in a pointer to an Entry, and returns a slice of Entry's that
|
|
// satisfy the predicate p
|
|
func filter(root *Entry, p func(*Entry) bool) []Entry {
|
|
if root != nil {
|
|
var validEntrys []Entry
|
|
if len(root.Children) > 0 || root.Prev != nil {
|
|
for _, c := range root.Children {
|
|
// filter the sub-directory
|
|
if c.Prev != nil {
|
|
validEntrys = append(validEntrys, filter(c, p)...)
|
|
}
|
|
if p(c) {
|
|
if c.Prev == nil {
|
|
validEntrys = append([]Entry{*c}, validEntrys...)
|
|
} else {
|
|
validEntrys = append(validEntrys, *c)
|
|
}
|
|
}
|
|
}
|
|
return validEntrys
|
|
}
|
|
}
|
|
return nil
|
|
}
|
|
|
|
func (ts *tarStream) setErr(err error) {
|
|
ts.err = err
|
|
}
|
|
|
|
func (ts *tarStream) Read(p []byte) (n int, err error) {
|
|
return ts.teeReader.Read(p)
|
|
}
|
|
|
|
func (ts *tarStream) Close() error {
|
|
return ts.pipeReader.Close()
|
|
}
|
|
|
|
// Hierarchy returns the DirectoryHierarchy of the archive. It flattens the
|
|
// Entry tree before returning the DirectoryHierarchy
|
|
func (ts *tarStream) Hierarchy() (*DirectoryHierarchy, error) {
|
|
if ts.err != nil && ts.err != io.EOF {
|
|
return nil, ts.err
|
|
}
|
|
if ts.root == nil {
|
|
return nil, fmt.Errorf("root Entry not found, nothing to flatten")
|
|
}
|
|
resolveHardlinks(ts.root, ts.hardlinks, InKeywordSlice(Keyword("nlink"), ts.keywords))
|
|
flatten(ts.root, &ts.creator, ts.keywords)
|
|
return ts.creator.DH, nil
|
|
}
|