go-mtree/tar.go
Vincent Batts ed464af779
*: xattr can Update()
This is a gnarly patchset that has been mashed together.
It uncovered that some aspects of Check were never really working
correctly for `xattr` keywords, but also the `Update()` had been left
undone for a while.

This includes some API changes around the `Keyword` and `KeyVal` types.

Also I would like to update the signature for the `UpdateKeywordFunc` to
just accept a `KeyVal` as an argugment, rather than a keyword AND the
value. with this context there would be no need to guess on the value of
what's passed to the xattr update function of whether it needs or
already is base64 encoded.

Signed-off-by: Vincent Batts <vbatts@hashbangbash.com>
2017-06-24 15:05:24 -04:00

461 lines
13 KiB
Go

package mtree
import (
"archive/tar"
"fmt"
"io"
"io/ioutil"
"os"
"path/filepath"
"strings"
"github.com/Sirupsen/logrus"
"github.com/vbatts/go-mtree/pkg/govis"
)
// Streamer creates a file hierarchy out of a tar stream
type Streamer interface {
io.ReadCloser
Hierarchy() (*DirectoryHierarchy, error)
}
var tarDefaultSetKeywords = []KeyVal{
"type=file",
"flags=none",
"mode=0664",
}
// NewTarStreamer streams a tar archive and creates a file hierarchy based off
// of the tar metadata headers
func NewTarStreamer(r io.Reader, excludes []ExcludeFunc, keywords []Keyword) Streamer {
pR, pW := io.Pipe()
ts := &tarStream{
pipeReader: pR,
pipeWriter: pW,
creator: dhCreator{DH: &DirectoryHierarchy{}},
teeReader: io.TeeReader(r, pW),
tarReader: tar.NewReader(pR),
keywords: keywords,
hardlinks: map[string][]string{},
excludes: excludes,
}
go ts.readHeaders()
return ts
}
type tarStream struct {
root *Entry
hardlinks map[string][]string
creator dhCreator
pipeReader *io.PipeReader
pipeWriter *io.PipeWriter
teeReader io.Reader
tarReader *tar.Reader
keywords []Keyword
excludes []ExcludeFunc
err error
}
func (ts *tarStream) readHeaders() {
// remove "time" keyword
notimekws := []Keyword{}
for _, kw := range ts.keywords {
if !InKeywordSlice(kw, notimekws) {
if kw == "time" {
if !InKeywordSlice("tar_time", ts.keywords) {
notimekws = append(notimekws, "tar_time")
}
} else {
notimekws = append(notimekws, kw)
}
}
}
ts.keywords = notimekws
// We have to start with the directory we're in, and anything beyond these
// items is determined at the time a tar is extracted.
ts.root = &Entry{
Name: ".",
Type: RelativeType,
Prev: &Entry{
Raw: "# .",
Type: CommentType,
},
Set: nil,
Keywords: []KeyVal{"type=dir"},
}
// insert signature and metadata comments first (user, machine, tree, date)
for _, e := range signatureEntries("<user specified tar archive>") {
e.Pos = len(ts.creator.DH.Entries)
ts.creator.DH.Entries = append(ts.creator.DH.Entries, e)
}
// insert keyword metadata next
for _, e := range keywordEntries(ts.keywords) {
e.Pos = len(ts.creator.DH.Entries)
ts.creator.DH.Entries = append(ts.creator.DH.Entries, e)
}
hdrloop:
for {
hdr, err := ts.tarReader.Next()
if err != nil {
ts.pipeReader.CloseWithError(err)
return
}
for _, ex := range ts.excludes {
if ex(hdr.Name, hdr.FileInfo()) {
continue hdrloop
}
}
// Because the content of the file may need to be read by several
// KeywordFuncs, it needs to be an io.Seeker as well. So, just reading from
// ts.tarReader is not enough.
tmpFile, err := ioutil.TempFile("", "ts.payload.")
if err != nil {
ts.pipeReader.CloseWithError(err)
return
}
// for good measure
if err := tmpFile.Chmod(0600); err != nil {
tmpFile.Close()
os.Remove(tmpFile.Name())
ts.pipeReader.CloseWithError(err)
return
}
if _, err := io.Copy(tmpFile, ts.tarReader); err != nil {
tmpFile.Close()
os.Remove(tmpFile.Name())
ts.pipeReader.CloseWithError(err)
return
}
// Alright, it's either file or directory
encodedName, err := govis.Vis(filepath.Base(hdr.Name), DefaultVisFlags)
if err != nil {
tmpFile.Close()
os.Remove(tmpFile.Name())
ts.pipeReader.CloseWithError(err)
return
}
e := Entry{
Name: encodedName,
Type: RelativeType,
}
// Keep track of which files are hardlinks so we can resolve them later
if hdr.Typeflag == tar.TypeLink {
keyFunc := KeywordFuncs["link"]
kvs, err := keyFunc(hdr.Name, hdr.FileInfo(), nil)
if err != nil {
logrus.Warn(err)
break // XXX is breaking an okay thing to do here?
}
linkname, err := govis.Unvis(KeyVal(kvs[0]).Value(), DefaultVisFlags)
if err != nil {
logrus.Warn(err)
break // XXX is breaking an okay thing to do here?
}
if _, ok := ts.hardlinks[linkname]; !ok {
ts.hardlinks[linkname] = []string{hdr.Name}
} else {
ts.hardlinks[linkname] = append(ts.hardlinks[linkname], hdr.Name)
}
}
// now collect keywords on the file
for _, keyword := range ts.keywords {
if keyFunc, ok := KeywordFuncs[keyword.Prefix()]; ok {
// We can't extract directories on to disk, so "size" keyword
// is irrelevant for now
if hdr.FileInfo().IsDir() && keyword == "size" {
continue
}
kvs, err := keyFunc(hdr.Name, hdr.FileInfo(), tmpFile)
if err != nil {
ts.setErr(err)
}
// for good measure, check that we actually get a value for a keyword
if len(kvs) > 0 && kvs[0] != "" {
e.Keywords = append(e.Keywords, kvs[0])
}
// don't forget to reset the reader
if _, err := tmpFile.Seek(0, 0); err != nil {
tmpFile.Close()
os.Remove(tmpFile.Name())
ts.pipeReader.CloseWithError(err)
return
}
}
}
// collect meta-set keywords for a directory so that we can build the
// actual sets in `flatten`
if hdr.FileInfo().IsDir() {
s := Entry{
Name: "meta-set",
Type: SpecialType,
}
for _, setKW := range SetKeywords {
if keyFunc, ok := KeywordFuncs[setKW.Prefix()]; ok {
kvs, err := keyFunc(hdr.Name, hdr.FileInfo(), tmpFile)
if err != nil {
ts.setErr(err)
}
for _, kv := range kvs {
if kv != "" {
s.Keywords = append(s.Keywords, kv)
}
}
if _, err := tmpFile.Seek(0, 0); err != nil {
tmpFile.Close()
os.Remove(tmpFile.Name())
ts.pipeReader.CloseWithError(err)
}
}
}
e.Set = &s
}
err = populateTree(ts.root, &e, hdr)
if err != nil {
ts.setErr(err)
}
tmpFile.Close()
os.Remove(tmpFile.Name())
}
}
// populateTree creates a pseudo file tree hierarchy using an Entry's Parent and
// Children fields. When examining the Entry e to insert in the tree, we
// determine if the path to that Entry exists yet. If it does, insert it in the
// appropriate position in the tree. If not, create a path up until the Entry's
// directory that it is contained in. Then, insert the Entry.
// root: the "." Entry
// e: the Entry we are looking to insert
// hdr: the tar header struct associated with e
func populateTree(root, e *Entry, hdr *tar.Header) error {
if root == nil || e == nil {
return fmt.Errorf("cannot populate or insert nil Entry's")
} else if root.Prev == nil {
return fmt.Errorf("root needs to be an Entry associated with a directory")
}
isDir := hdr.FileInfo().IsDir()
wd := filepath.Clean(hdr.Name)
if !isDir {
// directory up until the actual file
wd = filepath.Dir(wd)
if wd == "." {
root.Children = append([]*Entry{e}, root.Children...)
e.Parent = root
return nil
}
}
dirNames := strings.Split(wd, "/")
parent := root
for _, name := range dirNames[:] {
encoded, err := govis.Vis(name, DefaultVisFlags)
if err != nil {
return err
}
if node := parent.Descend(encoded); node == nil {
// Entry for directory doesn't exist in tree relative to root.
// We don't know if this directory is an actual tar header (because a
// user could have just specified a path to a deep file), so we must
// specify this placeholder directory as a "type=dir", and Set=nil.
newEntry := Entry{
Name: encoded,
Type: RelativeType,
Parent: parent,
Keywords: []KeyVal{"type=dir"}, // temp data
Set: nil, // temp data
}
pathname, err := newEntry.Path()
if err != nil {
return err
}
newEntry.Prev = &Entry{
Type: CommentType,
Raw: "# " + pathname,
}
parent.Children = append(parent.Children, &newEntry)
parent = &newEntry
} else {
// Entry for directory exists in tree, just keep going
parent = node
}
}
if !isDir {
parent.Children = append([]*Entry{e}, parent.Children...)
e.Parent = parent
} else {
// fill in the actual data from e
parent.Keywords = e.Keywords
parent.Set = e.Set
}
return nil
}
// After constructing a pseudo file hierarchy tree, we want to "flatten" this
// tree by putting the Entries into a slice with appropriate positioning.
// root: the "head" of the sub-tree to flatten
// creator: a dhCreator that helps with the '/set' keyword
// keywords: keywords specified by the user that should be evaluated
func flatten(root *Entry, creator *dhCreator, keywords []Keyword) {
if root == nil || creator == nil {
return
}
if root.Prev != nil {
// root.Prev != nil implies root is a directory
creator.DH.Entries = append(creator.DH.Entries,
Entry{
Type: BlankType,
Pos: len(creator.DH.Entries),
})
root.Prev.Pos = len(creator.DH.Entries)
creator.DH.Entries = append(creator.DH.Entries, *root.Prev)
if root.Set != nil {
// Check if we need a new set
consolidatedKeys := keyvalSelector(append(tarDefaultSetKeywords, root.Set.Keywords...), keywords)
if creator.curSet == nil {
creator.curSet = &Entry{
Type: SpecialType,
Name: "/set",
Keywords: consolidatedKeys,
Pos: len(creator.DH.Entries),
}
creator.DH.Entries = append(creator.DH.Entries, *creator.curSet)
} else {
needNewSet := false
for _, k := range root.Set.Keywords {
if !inKeyValSlice(k, creator.curSet.Keywords) {
needNewSet = true
break
}
}
if needNewSet {
creator.curSet = &Entry{
Name: "/set",
Type: SpecialType,
Pos: len(creator.DH.Entries),
Keywords: consolidatedKeys,
}
creator.DH.Entries = append(creator.DH.Entries, *creator.curSet)
}
}
} else if creator.curSet != nil {
// Getting into here implies that the Entry's set has not and
// was not supposed to be evaluated, thus, we need to reset curSet
creator.DH.Entries = append(creator.DH.Entries, Entry{
Name: "/unset",
Type: SpecialType,
Pos: len(creator.DH.Entries),
})
creator.curSet = nil
}
}
root.Set = creator.curSet
if creator.curSet != nil {
root.Keywords = keyValDifference(root.Keywords, creator.curSet.Keywords)
}
root.Pos = len(creator.DH.Entries)
creator.DH.Entries = append(creator.DH.Entries, *root)
for _, c := range root.Children {
flatten(c, creator, keywords)
}
if root.Prev != nil {
// Show a comment when stepping out
root.Prev.Pos = len(creator.DH.Entries)
creator.DH.Entries = append(creator.DH.Entries, *root.Prev)
dotEntry := Entry{
Type: DotDotType,
Name: "..",
Pos: len(creator.DH.Entries),
}
creator.DH.Entries = append(creator.DH.Entries, dotEntry)
}
return
}
// resolveHardlinks goes through an Entry tree, and finds the Entry's associated
// with hardlinks and fills them in with the actual data from the base file.
func resolveHardlinks(root *Entry, hardlinks map[string][]string, countlinks bool) {
originals := make(map[string]*Entry)
for base, links := range hardlinks {
var basefile *Entry
if seen, ok := originals[base]; !ok {
basefile = root.Find(base)
if basefile == nil {
logrus.Printf("%s does not exist in this tree\n", base)
continue
}
originals[base] = basefile
} else {
basefile = seen
}
for _, link := range links {
linkfile := root.Find(link)
if linkfile == nil {
logrus.Printf("%s does not exist in this tree\n", link)
continue
}
linkfile.Keywords = basefile.Keywords
if countlinks {
linkfile.Keywords = append(linkfile.Keywords, KeyVal(fmt.Sprintf("nlink=%d", len(links)+1)))
}
}
if countlinks {
basefile.Keywords = append(basefile.Keywords, KeyVal(fmt.Sprintf("nlink=%d", len(links)+1)))
}
}
}
// filter takes in a pointer to an Entry, and returns a slice of Entry's that
// satisfy the predicate p
func filter(root *Entry, p func(*Entry) bool) []Entry {
if root != nil {
var validEntrys []Entry
if len(root.Children) > 0 || root.Prev != nil {
for _, c := range root.Children {
// filter the sub-directory
if c.Prev != nil {
validEntrys = append(validEntrys, filter(c, p)...)
}
if p(c) {
if c.Prev == nil {
validEntrys = append([]Entry{*c}, validEntrys...)
} else {
validEntrys = append(validEntrys, *c)
}
}
}
return validEntrys
}
}
return nil
}
func (ts *tarStream) setErr(err error) {
ts.err = err
}
func (ts *tarStream) Read(p []byte) (n int, err error) {
return ts.teeReader.Read(p)
}
func (ts *tarStream) Close() error {
return ts.pipeReader.Close()
}
// Hierarchy returns the DirectoryHierarchy of the archive. It flattens the
// Entry tree before returning the DirectoryHierarchy
func (ts *tarStream) Hierarchy() (*DirectoryHierarchy, error) {
if ts.err != nil && ts.err != io.EOF {
return nil, ts.err
}
if ts.root == nil {
return nil, fmt.Errorf("root Entry not found, nothing to flatten")
}
resolveHardlinks(ts.root, ts.hardlinks, InKeywordSlice(Keyword("nlink"), ts.keywords))
flatten(ts.root, &ts.creator, ts.keywords)
return ts.creator.DH, nil
}