Merge pull request #29 from stephen679/tree_tar_stream

*: creating validation manifest from archive and validating a manifest against archive
This commit is contained in:
Vincent Batts 2016-07-25 15:14:56 -04:00 committed by GitHub
commit cc939615c7
14 changed files with 1078 additions and 147 deletions

121
check.go
View file

@ -3,14 +3,16 @@ package mtree
import (
"fmt"
"os"
"path/filepath"
"sort"
"strings"
)
// Result of a Check
type Result struct {
// list of any failures in the Check
Failures []Failure `json:"failures"`
Missing []Entry
Extra []Entry
}
// Failure of a particular keyword for a path
@ -51,7 +53,8 @@ func Check(root string, dh *DirectoryHierarchy, keywords []string) (*Result, err
creator.curSet = nil
}
case RelativeType, FullType:
info, err := os.Lstat(e.Path())
filename := e.Path()
info, err := os.Lstat(filename)
if err != nil {
return nil, err
}
@ -64,17 +67,31 @@ func Check(root string, dh *DirectoryHierarchy, keywords []string) (*Result, err
}
for _, kv := range kvs {
keywordFunc, ok := KeywordFuncs[kv.Keyword()]
kw := kv.Keyword()
// 'tar_time' keyword evaluation wins against 'time' keyword evaluation
if kv.Keyword() == "time" && inSlice("tar_time", keywords) {
kw = "tar_time"
tartime := fmt.Sprintf("%s.%s", (strings.Split(kv.Value(), ".")[0]), "000000000")
kv = KeyVal(KeyVal(kw).ChangeValue(tartime))
}
keywordFunc, ok := KeywordFuncs[kw]
if !ok {
return nil, fmt.Errorf("Unknown keyword %q for file %q", kv.Keyword(), e.Path())
}
if keywords != nil && !inSlice(kv.Keyword(), keywords) {
continue
}
curKeyVal, err := keywordFunc(filepath.Join(root, e.Path()), info)
fh, err := os.Open(filename)
if err != nil {
return nil, err
}
curKeyVal, err := keywordFunc(filename, info, fh)
if err != nil {
fh.Close()
return nil, err
}
fh.Close()
if string(kv) != curKeyVal {
failure := Failure{Path: e.Path(), Keyword: kv.Keyword(), Expected: kv.Value(), Got: KeyVal(curKeyVal).Value()}
result.Failures = append(result.Failures, failure)
@ -84,3 +101,99 @@ func Check(root string, dh *DirectoryHierarchy, keywords []string) (*Result, err
}
return &result, nil
}
// TarCheck is the tar equivalent of checking a file hierarchy spec against a tar stream to
// determine if files have been changed.
func TarCheck(tarDH, dh *DirectoryHierarchy, keywords []string) (*Result, error) {
var result Result
var err error
var tarRoot *Entry
for _, e := range tarDH.Entries {
if e.Name == "." {
tarRoot = &e
break
}
}
tarRoot.Next = &Entry{
Name: "seen",
Type: CommentType,
}
curDir := tarRoot
creator := dhCreator{DH: dh}
sort.Sort(byPos(creator.DH.Entries))
var outOfTree bool
for i, e := range creator.DH.Entries {
switch e.Type {
case SpecialType:
if e.Name == "/set" {
creator.curSet = &creator.DH.Entries[i]
} else if e.Name == "/unset" {
creator.curSet = nil
}
case RelativeType, FullType:
if outOfTree {
return &result, fmt.Errorf("No parent node from %s", e.Path())
}
// TODO: handle the case where "." is not the first Entry to be found
tarEntry := curDir.Descend(e.Name)
if tarEntry == nil {
result.Missing = append(result.Missing, e)
continue
}
tarEntry.Next = &Entry{
Type: CommentType,
Name: "seen",
}
// expected values from file hierarchy spec
var kvs KeyVals
if creator.curSet != nil {
kvs = MergeSet(creator.curSet.Keywords, e.Keywords)
} else {
kvs = NewKeyVals(e.Keywords)
}
// actual
var tarkvs KeyVals
if tarEntry.Set != nil {
tarkvs = MergeSet(tarEntry.Set.Keywords, tarEntry.Keywords)
} else {
tarkvs = NewKeyVals(tarEntry.Keywords)
}
for _, kv := range kvs {
if _, ok := KeywordFuncs[kv.Keyword()]; !ok {
return nil, fmt.Errorf("Unknown keyword %q for file %q", kv.Keyword(), e.Path())
}
if keywords != nil && !inSlice(kv.Keyword(), keywords) {
continue
}
if tarkv := tarkvs.Has(kv.Keyword()); tarkv != emptyKV {
if string(tarkv) != string(kv) {
failure := Failure{Path: tarEntry.Path(), Keyword: kv.Keyword(), Expected: kv.Value(), Got: tarkv.Value()}
result.Failures = append(result.Failures, failure)
}
}
}
// Step into a directory
if tarEntry.Prev != nil {
curDir = tarEntry
}
case DotDotType:
if outOfTree {
return &result, fmt.Errorf("No parent node.")
}
curDir = curDir.Ascend()
if curDir == nil {
outOfTree = true
}
}
}
result.Extra = filter(tarRoot, func(e *Entry) bool {
return e.Next == nil
})
return &result, err
}

View file

@ -143,6 +143,58 @@ func TestTimeComparison(t *testing.T) {
}
}
func TestTarTime(t *testing.T) {
dir, err := ioutil.TempDir("", "test-tar-time.")
if err != nil {
t.Fatal(err)
}
defer os.RemoveAll(dir)
// This is the format of time from FreeBSD
spec := `
/set type=file time=5.454353132
. type=dir time=5.123456789
file time=5.911134111
..
`
fh, err := os.Create(filepath.Join(dir, "file"))
if err != nil {
t.Fatal(err)
}
// This is what mode we're checking for. Round integer of epoch seconds
epoch := time.Unix(5, 0)
if err := os.Chtimes(fh.Name(), epoch, epoch); err != nil {
t.Fatal(err)
}
if err := os.Chtimes(dir, epoch, epoch); err != nil {
t.Fatal(err)
}
if err := fh.Close(); err != nil {
t.Error(err)
}
dh, err := ParseSpec(bytes.NewBufferString(spec))
if err != nil {
t.Fatal(err)
}
// make sure "time" keyword works
_, err = Check(dir, dh, DefaultKeywords)
if err != nil {
t.Error(err)
}
// make sure tar_time wins
res, err := Check(dir, dh, append(DefaultKeywords, "tar_time"))
if err != nil {
t.Error(err)
}
if len(res.Failures) > 0 {
t.Fatal(res.Failures)
}
}
func TestIgnoreComments(t *testing.T) {
dir, err := ioutil.TempDir("", "test-comments.")
if err != nil {

View file

@ -5,6 +5,8 @@ import (
"encoding/json"
"flag"
"fmt"
"io"
"io/ioutil"
"log"
"os"
"strings"
@ -20,6 +22,7 @@ var (
flUseKeywords = flag.String("k", "", "Use the specified (delimited by comma or space) keywords as the current set of keywords")
flListKeywords = flag.Bool("list-keywords", false, "List the keywords available")
flResultFormat = flag.String("result-format", "bsd", "output the validation results using the given format (bsd, json, path)")
flTar = flag.String("T", "", "use tar archive to create or validate a directory hierarchy spec")
)
var formats = map[string]func(*mtree.Result) string{
@ -90,9 +93,13 @@ func main() {
if !inSlice("type", currentKeywords) {
currentKeywords = append([]string{"type"}, currentKeywords...)
}
} else {
if *flTar != "" {
currentKeywords = mtree.DefaultTarKeywords[:]
} else {
currentKeywords = mtree.DefaultKeywords[:]
}
}
// -K <keywords>
if *flAddKeywords != "" {
currentKeywords = append(currentKeywords, splitKeywordsArg(*flAddKeywords)...)
@ -123,9 +130,43 @@ func main() {
rootPath = *flPath
}
// -T <tar file>
var tdh *mtree.DirectoryHierarchy
if *flTar != "" {
fh, err := os.Open(*flTar)
if err != nil {
log.Println(err)
isErr = true
return
}
ts := mtree.NewTarStreamer(fh, currentKeywords)
if _, err := io.Copy(ioutil.Discard, ts); err != nil && err != io.EOF {
log.Println(err)
isErr = true
return
}
if err := ts.Close(); err != nil {
log.Println(err)
isErr = true
return
}
defer fh.Close()
tdh, err = ts.Hierarchy()
if err != nil {
log.Println(err)
isErr = true
return
}
}
// -c
if *flCreate {
// create a directory hierarchy
// with a tar stream
if tdh != nil {
tdh.WriteTo(os.Stdout)
} else {
// with a root directory
dh, err := mtree.Walk(rootPath, nil, currentKeywords)
if err != nil {
log.Println(err)
@ -133,9 +174,16 @@ func main() {
return
}
dh.WriteTo(os.Stdout)
} else if dh != nil {
}
} else if tdh != nil || dh != nil {
var res *mtree.Result
var err error
// else this is a validation
res, err := mtree.Check(rootPath, dh, currentKeywords)
if *flTar != "" {
res, err = mtree.TarCheck(tdh, dh, currentKeywords)
} else {
res, err = mtree.Check(rootPath, dh, currentKeywords)
}
if err != nil {
log.Println(err)
isErr = true
@ -150,6 +198,19 @@ func main() {
return
}
}
if res != nil {
if len(res.Extra) > 0 {
defer os.Exit(1)
for _, extra := range res.Extra {
fmt.Printf("%s extra\n", extra.Path())
}
}
if len(res.Missing) > 0 {
defer os.Exit(1)
for _, missing := range res.Missing {
fmt.Printf("%s missing\n", missing.Path())
}
}
} else {
log.Println("neither validating or creating a manifest. Please provide additional arguments")
isErr = true
@ -157,6 +218,7 @@ func main() {
return
}
}
}
func splitKeywordsArg(str string) []string {
return strings.Fields(strings.Replace(str, ",", " ", -1))

9
creator.go Normal file
View file

@ -0,0 +1,9 @@
package mtree
// dhCreator is used in when building a DirectoryHierarchy
type dhCreator struct {
DH *DirectoryHierarchy
curSet *Entry
curDir *Entry
curEnt *Entry
}

102
entry.go Normal file
View file

@ -0,0 +1,102 @@
package mtree
import (
"fmt"
"path/filepath"
"strings"
)
type byPos []Entry
func (bp byPos) Len() int { return len(bp) }
func (bp byPos) Less(i, j int) bool { return bp[i].Pos < bp[j].Pos }
func (bp byPos) Swap(i, j int) { bp[i], bp[j] = bp[j], bp[i] }
// Entry is each component of content in the mtree spec file
type Entry struct {
Parent *Entry // up
Children []*Entry // down
Prev, Next *Entry // left, right
Set *Entry // current `/set` for additional keywords
Pos int // order in the spec
Raw string // file or directory name
Name string // file or directory name
Keywords []string // TODO(vbatts) maybe a keyword typed set of values?
Type EntryType
}
// Descend searches thru an Entry's children to find the Entry associated with
// `filename`. Directories are stored at the end of an Entry's children so do a
// traverse backwards. If you descend to a "."
func (e Entry) Descend(filename string) *Entry {
if filename == "." || filename == "" {
return &e
}
numChildren := len(e.Children)
for i := range e.Children {
c := e.Children[numChildren-1-i]
if c.Name == filename {
return c
}
}
return nil
}
// Ascend gets the parent of an Entry. Serves mainly to maintain readability
// when traversing up and down an Entry tree
func (e Entry) Ascend() *Entry {
return e.Parent
}
// Path provides the full path of the file, despite RelativeType or FullType
func (e Entry) Path() string {
if e.Parent == nil || e.Type == FullType {
return filepath.Clean(e.Name)
}
return filepath.Clean(filepath.Join(e.Parent.Path(), e.Name))
}
func (e Entry) String() string {
if e.Raw != "" {
return e.Raw
}
if e.Type == BlankType {
return ""
}
if e.Type == DotDotType {
return e.Name
}
if e.Type == SpecialType || e.Type == FullType || inSlice("type=dir", e.Keywords) {
return fmt.Sprintf("%s %s", e.Name, strings.Join(e.Keywords, " "))
}
return fmt.Sprintf(" %s %s", e.Name, strings.Join(e.Keywords, " "))
}
// EntryType are the formats of lines in an mtree spec file
type EntryType int
// The types of lines to be found in an mtree spec file
const (
SignatureType EntryType = iota // first line of the file, like `#mtree v2.0`
BlankType // blank lines are ignored
CommentType // Lines beginning with `#` are ignored
SpecialType // line that has `/` prefix issue a "special" command (currently only /set and /unset)
RelativeType // if the first white-space delimited word does not have a '/' in it. Options/keywords are applied.
DotDotType // .. - A relative path step. keywords/options are ignored
FullType // if the first word on the line has a `/` after the first character, it interpretted as a file pathname with options
)
// String returns the name of the EntryType
func (et EntryType) String() string {
return typeNames[et]
}
var typeNames = map[EntryType]string{
SignatureType: "SignatureType",
BlankType: "BlankType",
CommentType: "CommentType",
SpecialType: "SpecialType",
RelativeType: "RelativeType",
DotDotType: "DotDotType",
FullType: "FullType",
}

View file

@ -1,11 +1,8 @@
package mtree
import (
"fmt"
"io"
"path/filepath"
"sort"
"strings"
)
// DirectoryHierarchy is the mapped structure for an mtree directory hierarchy
@ -27,75 +24,3 @@ func (dh DirectoryHierarchy) WriteTo(w io.Writer) (n int64, err error) {
}
return sum, nil
}
type byPos []Entry
func (bp byPos) Len() int { return len(bp) }
func (bp byPos) Less(i, j int) bool { return bp[i].Pos < bp[j].Pos }
func (bp byPos) Swap(i, j int) { bp[i], bp[j] = bp[j], bp[i] }
// Entry is each component of content in the mtree spec file
type Entry struct {
Parent, Child *Entry // up, down
Prev, Next *Entry // left, right
Set *Entry // current `/set` for additional keywords
Pos int // order in the spec
Raw string // file or directory name
Name string // file or directory name
Keywords []string // TODO(vbatts) maybe a keyword typed set of values?
Type EntryType
}
// Path provides the full path of the file, despite RelativeType or FullType
func (e Entry) Path() string {
if e.Parent == nil || e.Type == FullType {
return filepath.Clean(e.Name)
}
return filepath.Clean(filepath.Join(e.Parent.Path(), e.Name))
}
func (e Entry) String() string {
if e.Raw != "" {
return e.Raw
}
if e.Type == BlankType {
return ""
}
if e.Type == DotDotType {
return e.Name
}
// TODO(vbatts) if type is RelativeType and a keyword of not type=dir
if e.Type == SpecialType || e.Type == FullType || inSlice("type=dir", e.Keywords) {
return fmt.Sprintf("%s %s", e.Name, strings.Join(e.Keywords, " "))
}
return fmt.Sprintf(" %s %s", e.Name, strings.Join(e.Keywords, " "))
}
// EntryType are the formats of lines in an mtree spec file
type EntryType int
// The types of lines to be found in an mtree spec file
const (
SignatureType EntryType = iota // first line of the file, like `#mtree v2.0`
BlankType // blank lines are ignored
CommentType // Lines beginning with `#` are ignored
SpecialType // line that has `/` prefix issue a "special" command (currently only /set and /unset)
RelativeType // if the first white-space delimited word does not have a '/' in it. Options/keywords are applied.
DotDotType // .. - A relative path step. keywords/options are ignored
FullType // if the first word on the line has a `/` after the first character, it interpretted as a file pathname with options
)
// String returns the name of the EntryType
func (et EntryType) String() string {
return typeNames[et]
}
var typeNames = map[EntryType]string{
SignatureType: "SignatureType",
BlankType: "BlankType",
CommentType: "CommentType",
SpecialType: "SpecialType",
RelativeType: "RelativeType",
DotDotType: "DotDotType",
FullType: "FullType",
}

View file

@ -1,6 +1,7 @@
package mtree
import (
"archive/tar"
"crypto/md5"
"crypto/sha1"
"crypto/sha256"
@ -17,7 +18,10 @@ import (
// KeywordFunc is the type of a function called on each file to be included in
// a DirectoryHierarchy, that will produce the string output of the keyword to
// be included for the file entry. Otherwise, empty string.
type KeywordFunc func(path string, info os.FileInfo) (string, error)
// io.Reader `r` is to the file stream for the file payload. While this
// function takes an io.Reader, the caller needs to reset it to the beginning
// for each new KeywordFunc
type KeywordFunc func(path string, info os.FileInfo, r io.Reader) (string, error)
// KeyVal is a "keyword=value"
type KeyVal string
@ -55,6 +59,11 @@ func (kv KeyVal) Value() string {
return strings.SplitN(strings.TrimSpace(string(kv)), "=", 2)[1]
}
// ChangeValue changes the value of a KeyVal
func (kv KeyVal) ChangeValue(newval string) string {
return fmt.Sprintf("%s=%s", kv.Keyword(), newval)
}
// keywordSelector takes an array of "keyword=value" and filters out that only the set of words
func keywordSelector(keyval, words []string) []string {
retList := []string{}
@ -125,6 +134,17 @@ var (
"nlink",
"time",
}
// DefaultTarKeywords has keywords that should be used when creating a manifest from
// an archive. Currently, evaluating the # of hardlinks has not been implemented yet
DefaultTarKeywords = []string{
"size",
"type",
"uid",
"gid",
"mode",
"link",
"tar_time",
}
// SetKeywords is the default set of keywords calculated for a `/set` SpecialType
SetKeywords = []string{
"uid",
@ -156,6 +176,11 @@ var (
"sha512": hasherKeywordFunc("sha512", sha512.New), // The SHA512 message digest of the file
"sha512digest": hasherKeywordFunc("sha512digest", sha512.New), // A synonym for `sha512`
// This is not an upstreamed keyword, but used to vary from "time", as tar
// archives do not store nanosecond precision. So comparing on "time" will
// be only seconds level accurate.
"tar_time": tartimeKeywordFunc, // The last modification time of the file, from a tar archive mtime
// This is not an upstreamed keyword, but a needed attribute for file validation.
// The pattern for this keyword key is prefixed by "xattr." followed by the extended attribute "namespace.key".
// The keyword value is the SHA1 digest of the extended attribute's value.
@ -165,7 +190,7 @@ var (
)
var (
modeKeywordFunc = func(path string, info os.FileInfo) (string, error) {
modeKeywordFunc = func(path string, info os.FileInfo, r io.Reader) (string, error) {
permissions := info.Mode().Perm()
if os.ModeSetuid&info.Mode() > 0 {
permissions |= (1 << 11)
@ -178,52 +203,49 @@ var (
}
return fmt.Sprintf("mode=%#o", permissions), nil
}
sizeKeywordFunc = func(path string, info os.FileInfo) (string, error) {
sizeKeywordFunc = func(path string, info os.FileInfo, r io.Reader) (string, error) {
return fmt.Sprintf("size=%d", info.Size()), nil
}
cksumKeywordFunc = func(path string, info os.FileInfo) (string, error) {
cksumKeywordFunc = func(path string, info os.FileInfo, r io.Reader) (string, error) {
if !info.Mode().IsRegular() {
return "", nil
}
fh, err := os.Open(path)
if err != nil {
return "", err
}
defer fh.Close()
sum, _, err := cksum(fh)
sum, _, err := cksum(r)
if err != nil {
return "", err
}
return fmt.Sprintf("cksum=%d", sum), nil
}
hasherKeywordFunc = func(name string, newHash func() hash.Hash) KeywordFunc {
return func(path string, info os.FileInfo) (string, error) {
return func(path string, info os.FileInfo, r io.Reader) (string, error) {
if !info.Mode().IsRegular() {
return "", nil
}
fh, err := os.Open(path)
if err != nil {
return "", err
}
defer fh.Close()
h := newHash()
if _, err := io.Copy(h, fh); err != nil {
if _, err := io.Copy(h, r); err != nil {
return "", err
}
return fmt.Sprintf("%s=%x", name, h.Sum(nil)), nil
}
}
timeKeywordFunc = func(path string, info os.FileInfo) (string, error) {
tartimeKeywordFunc = func(path string, info os.FileInfo, r io.Reader) (string, error) {
return fmt.Sprintf("tar_time=%d.000000000", info.ModTime().Unix()), nil
}
timeKeywordFunc = func(path string, info os.FileInfo, r io.Reader) (string, error) {
t := info.ModTime().UnixNano()
if t == 0 {
return "time=0.000000000", nil
}
return fmt.Sprintf("time=%d.%9.9d", (t / 1e9), (t % (t / 1e9))), nil
}
linkKeywordFunc = func(path string, info os.FileInfo) (string, error) {
linkKeywordFunc = func(path string, info os.FileInfo, r io.Reader) (string, error) {
if sys, ok := info.Sys().(*tar.Header); ok {
if sys.Linkname != "" {
return fmt.Sprintf("link=%s", sys.Linkname), nil
}
return "", nil
}
if info.Mode()&os.ModeSymlink != 0 {
str, err := os.Readlink(path)
if err != nil {
@ -233,7 +255,7 @@ var (
}
return "", nil
}
typeKeywordFunc = func(path string, info os.FileInfo) (string, error) {
typeKeywordFunc = func(path string, info os.FileInfo, r io.Reader) (string, error) {
if info.Mode().IsDir() {
return "type=dir", nil
}

View file

@ -3,8 +3,10 @@
package mtree
import (
"archive/tar"
"crypto/sha1"
"fmt"
"io"
"os"
"os/user"
"strings"
@ -14,7 +16,11 @@ import (
)
var (
unameKeywordFunc = func(path string, info os.FileInfo) (string, error) {
unameKeywordFunc = func(path string, info os.FileInfo, r io.Reader) (string, error) {
if hdr, ok := info.Sys().(*tar.Header); ok {
return fmt.Sprintf("uname=%s", hdr.Uname), nil
}
stat := info.Sys().(*syscall.Stat_t)
u, err := user.LookupId(fmt.Sprintf("%d", stat.Uid))
if err != nil {
@ -22,19 +28,40 @@ var (
}
return fmt.Sprintf("uname=%s", u.Username), nil
}
uidKeywordFunc = func(path string, info os.FileInfo) (string, error) {
uidKeywordFunc = func(path string, info os.FileInfo, r io.Reader) (string, error) {
if hdr, ok := info.Sys().(*tar.Header); ok {
return fmt.Sprintf("uid=%d", hdr.Uid), nil
}
stat := info.Sys().(*syscall.Stat_t)
return fmt.Sprintf("uid=%d", stat.Uid), nil
}
gidKeywordFunc = func(path string, info os.FileInfo) (string, error) {
stat := info.Sys().(*syscall.Stat_t)
gidKeywordFunc = func(path string, info os.FileInfo, r io.Reader) (string, error) {
if hdr, ok := info.Sys().(*tar.Header); ok {
return fmt.Sprintf("gid=%d", hdr.Gid), nil
}
if stat, ok := info.Sys().(*syscall.Stat_t); ok {
return fmt.Sprintf("gid=%d", stat.Gid), nil
}
nlinkKeywordFunc = func(path string, info os.FileInfo) (string, error) {
stat := info.Sys().(*syscall.Stat_t)
return "", nil
}
nlinkKeywordFunc = func(path string, info os.FileInfo, r io.Reader) (string, error) {
if stat, ok := info.Sys().(*syscall.Stat_t); ok {
return fmt.Sprintf("nlink=%d", stat.Nlink), nil
}
xattrKeywordFunc = func(path string, info os.FileInfo) (string, error) {
return "", nil
}
xattrKeywordFunc = func(path string, info os.FileInfo, r io.Reader) (string, error) {
if hdr, ok := info.Sys().(*tar.Header); ok {
if len(hdr.Xattrs) == 0 {
return "", nil
}
klist := []string{}
for k, v := range hdr.Xattrs {
klist = append(klist, fmt.Sprintf("xattr.%s=%x", k, sha1.Sum([]byte(v))))
}
return strings.Join(klist, " "), nil
}
xlist, err := xattr.List(path)
if err != nil {
return "", err

View file

@ -2,22 +2,36 @@
package mtree
import "os"
import (
"archive/tar"
"fmt"
"io"
"os"
)
var (
unameKeywordFunc = func(path string, info os.FileInfo) (string, error) {
unameKeywordFunc = func(path string, info os.FileInfo, r io.Reader) (string, error) {
if hdr, ok := info.Sys().(*tar.Header); ok {
return fmt.Sprintf("uname=%s", hdr.Uname), nil
}
return "", nil
}
uidKeywordFunc = func(path string, info os.FileInfo) (string, error) {
uidKeywordFunc = func(path string, info os.FileInfo, r io.Reader) (string, error) {
if hdr, ok := info.Sys().(*tar.Header); ok {
return fmt.Sprintf("uid=%d", hdr.Uid), nil
}
return "", nil
}
gidKeywordFunc = func(path string, info os.FileInfo) (string, error) {
gidKeywordFunc = func(path string, info os.FileInfo, r io.Reader) (string, error) {
if hdr, ok := info.Sys().(*tar.Header); ok {
return fmt.Sprintf("gid=%d", hdr.Gid), nil
}
return "", nil
}
nlinkKeywordFunc = func(path string, info os.FileInfo) (string, error) {
nlinkKeywordFunc = func(path string, info os.FileInfo, r io.Reader) (string, error) {
return "", nil
}
xattrKeywordFunc = func(path string, info os.FileInfo) (string, error) {
xattrKeywordFunc = func(path string, info os.FileInfo, r io.Reader) (string, error) {
return "", nil
}
)

View file

@ -81,6 +81,7 @@ func ParseSpec(r io.Reader) (*DirectoryHierarchy, error) {
e.Type = RelativeType
}
e.Keywords = f[1:]
// TODO: gather keywords if using tar stream
e.Parent = creator.curDir
for i := range e.Keywords {
kv := KeyVal(e.Keywords[i])

376
tar.go Normal file
View file

@ -0,0 +1,376 @@
package mtree
import (
"archive/tar"
"io"
"io/ioutil"
"os"
"path/filepath"
"strings"
)
// Streamer creates a file hierarchy out of a tar stream
type Streamer interface {
io.ReadCloser
Hierarchy() (*DirectoryHierarchy, error)
}
var tarDefaultSetKeywords = []string{"type=file", "flags=none", "mode=0664"}
// NewTarStreamer streams a tar archive and creates a file hierarchy based off
// of the tar metadata headers
func NewTarStreamer(r io.Reader, keywords []string) Streamer {
pR, pW := io.Pipe()
ts := &tarStream{
pipeReader: pR,
pipeWriter: pW,
creator: dhCreator{DH: &DirectoryHierarchy{}},
teeReader: io.TeeReader(r, pW),
tarReader: tar.NewReader(pR),
keywords: keywords,
}
go ts.readHeaders() // I don't like this
return ts
}
type tarStream struct {
creator dhCreator
pipeReader *io.PipeReader
pipeWriter *io.PipeWriter
teeReader io.Reader
tarReader *tar.Reader
keywords []string
err error
}
func (ts *tarStream) readHeaders() {
// We have to start with the directory we're in, and anything beyond these
// items is determined at the time a tar is extracted.
rootComment := Entry{
Raw: "# .",
Type: CommentType,
}
root := Entry{
Name: ".",
Type: RelativeType,
Prev: &rootComment,
Set: &Entry{
Name: "meta-set",
Type: SpecialType,
},
}
metadataEntries := signatureEntries("<user specified tar archive>")
for _, e := range metadataEntries {
e.Pos = len(ts.creator.DH.Entries)
ts.creator.DH.Entries = append(ts.creator.DH.Entries, e)
}
for {
hdr, err := ts.tarReader.Next()
if err != nil {
flatten(&root, ts)
ts.pipeReader.CloseWithError(err)
return
}
// Because the content of the file may need to be read by several
// KeywordFuncs, it needs to be an io.Seeker as well. So, just reading from
// ts.tarReader is not enough.
tmpFile, err := ioutil.TempFile("", "ts.payload.")
if err != nil {
ts.pipeReader.CloseWithError(err)
return
}
// for good measure
if err := tmpFile.Chmod(0600); err != nil {
tmpFile.Close()
os.Remove(tmpFile.Name())
ts.pipeReader.CloseWithError(err)
return
}
if _, err := io.Copy(tmpFile, ts.tarReader); err != nil {
tmpFile.Close()
os.Remove(tmpFile.Name())
ts.pipeReader.CloseWithError(err)
return
}
defer tmpFile.Close()
defer os.Remove(tmpFile.Name())
// Alright, it's either file or directory
e := Entry{
Name: filepath.Base(hdr.Name),
Type: RelativeType,
}
// now collect keywords on the file
for _, keyword := range ts.keywords {
if keyword == "time" {
keyword = "tar_time"
}
if keyFunc, ok := KeywordFuncs[keyword]; ok {
// We can't extract directories on to disk, so "size" keyword
// is irrelevant for now
if hdr.FileInfo().IsDir() && keyword == "size" {
continue
}
if string(hdr.Typeflag) == string('1') {
// TODO: get number of hardlinks for a file
}
val, err := keyFunc(hdr.Name, hdr.FileInfo(), tmpFile)
if err != nil {
ts.setErr(err)
}
// for good measure, check that we actually get a value for a keyword
if val != "" {
e.Keywords = append(e.Keywords, val)
}
// don't forget to reset the reader
if _, err := tmpFile.Seek(0, 0); err != nil {
tmpFile.Close()
os.Remove(tmpFile.Name())
ts.pipeReader.CloseWithError(err)
return
}
}
}
// collect meta-set keywords for a directory so that we can build the
// actual sets in `flatten`
if hdr.FileInfo().IsDir() {
s := Entry{
Name: "meta-set",
Type: SpecialType,
}
for _, setKW := range SetKeywords {
if setKW == "time" {
setKW = "tar_time"
}
if keyFunc, ok := KeywordFuncs[setKW]; ok {
val, err := keyFunc(hdr.Name, hdr.FileInfo(), tmpFile)
if err != nil {
ts.setErr(err)
}
if val != "" {
s.Keywords = append(s.Keywords, val)
}
if _, err := tmpFile.Seek(0, 0); err != nil {
tmpFile.Close()
os.Remove(tmpFile.Name())
ts.pipeReader.CloseWithError(err)
}
}
}
if filepath.Dir(filepath.Clean(hdr.Name)) == "." {
root.Set = &s
} else {
e.Set = &s
}
}
populateTree(&root, &e, hdr, ts)
}
}
type relationship int
const (
unknownDir relationship = iota
sameDir
childDir
parentDir
)
// populateTree creates a file tree hierarchy using an Entry's Parent and
// Children fields. When examining the Entry e to insert in the tree, we
// determine if the path to that Entry exists yet. If it does, insert it in the
// appropriate position in the tree. If not, create a path with "placeholder"
// directories, and then insert the Entry. populateTree does not consider
// symbolic links yet.
func populateTree(root, e *Entry, hdr *tar.Header, ts *tarStream) {
isDir := hdr.FileInfo().IsDir()
wd := filepath.Clean(hdr.Name)
if !isDir {
// If entry is a file, we only want the directory it's in.
wd = filepath.Dir(wd)
}
if filepath.Dir(wd) == "." {
if isDir {
root.Keywords = e.Keywords
} else {
root.Children = append([]*Entry{e}, root.Children...)
e.Parent = root
}
return
}
dirNames := strings.Split(wd, "/")
parent := root
for _, name := range dirNames[1:] {
if node := parent.Descend(name); node == nil {
// Entry for directory doesn't exist in tree relative to root
var newEntry *Entry
if isDir {
newEntry = e
} else {
newEntry = &Entry{
Name: name,
Type: RelativeType,
}
}
newEntry.Parent = parent
parent.Children = append(parent.Children, newEntry)
parent = newEntry
} else {
// Entry for directory exists in tree, just keep going
parent = node
}
}
if !isDir {
parent.Children = append([]*Entry{e}, parent.Children...)
e.Parent = parent
} else {
commentEntry := Entry{
Raw: "# " + e.Path(),
Type: CommentType,
}
e.Prev = &commentEntry
}
}
// After constructing the tree from the tar stream, we want to "flatten" this
// tree by appending Entry's into ts.creator.DH.Entries in an appropriate
// manner to simplify writing the output with ts.creator.DH.WriteTo
// root: the "head" of the sub-tree to flatten
// ts : tarStream to keep track of Entry's
func flatten(root *Entry, ts *tarStream) {
if root.Prev != nil {
// root.Prev != nil implies root is a directory
ts.creator.DH.Entries = append(ts.creator.DH.Entries,
Entry{
Type: BlankType,
Pos: len(ts.creator.DH.Entries),
})
root.Prev.Pos = len(ts.creator.DH.Entries)
ts.creator.DH.Entries = append(ts.creator.DH.Entries, *root.Prev)
// Check if we need a new set
if ts.creator.curSet == nil {
ts.creator.curSet = &Entry{
Type: SpecialType,
Name: "/set",
Keywords: keywordSelector(append(tarDefaultSetKeywords, root.Set.Keywords...), ts.keywords),
Pos: len(ts.creator.DH.Entries),
}
ts.creator.DH.Entries = append(ts.creator.DH.Entries, *ts.creator.curSet)
} else {
needNewSet := false
for _, k := range root.Set.Keywords {
if !inSlice(k, ts.creator.curSet.Keywords) {
needNewSet = true
break
}
}
if needNewSet {
ts.creator.curSet = &Entry{
Name: "/set",
Type: SpecialType,
Pos: len(ts.creator.DH.Entries),
Keywords: keywordSelector(append(tarDefaultSetKeywords, root.Set.Keywords...), ts.keywords),
}
ts.creator.DH.Entries = append(ts.creator.DH.Entries, *ts.creator.curSet)
}
}
}
root.Set = ts.creator.curSet
root.Keywords = setDifference(root.Keywords, ts.creator.curSet.Keywords)
root.Pos = len(ts.creator.DH.Entries)
ts.creator.DH.Entries = append(ts.creator.DH.Entries, *root)
for _, c := range root.Children {
flatten(c, ts)
}
if root.Prev != nil {
// Show a comment when stepping out
root.Prev.Pos = len(ts.creator.DH.Entries)
ts.creator.DH.Entries = append(ts.creator.DH.Entries, *root.Prev)
dotEntry := Entry{
Type: DotDotType,
Name: "..",
Pos: len(ts.creator.DH.Entries),
}
ts.creator.DH.Entries = append(ts.creator.DH.Entries, dotEntry)
}
}
// filter takes in a pointer to an Entry, and returns a slice of Entry's that
// satisfy the predicate p
func filter(root *Entry, p func(*Entry) bool) []Entry {
var validEntrys []Entry
if len(root.Children) > 0 || root.Prev != nil {
for _, c := range root.Children {
// if an Entry is a directory, filter the directory
if c.Prev != nil {
validEntrys = append(validEntrys, filter(c, p)...)
}
if p(c) {
if c.Prev == nil {
// prepend directories
validEntrys = append([]Entry{*c}, validEntrys...)
} else {
validEntrys = append(validEntrys, *c)
}
}
}
return validEntrys
}
return nil
}
func setDifference(this, that []string) []string {
if len(this) == 0 {
return that
}
diff := []string{}
for _, kv := range this {
if !inSlice(kv, that) {
diff = append(diff, kv)
}
}
return diff
}
func compareDir(curDir, prevDir string) relationship {
curDir = filepath.Clean(curDir)
prevDir = filepath.Clean(prevDir)
if curDir == prevDir {
return sameDir
}
if filepath.Dir(curDir) == prevDir {
return childDir
}
if curDir == filepath.Dir(prevDir) {
return parentDir
}
return unknownDir
}
func (ts *tarStream) setErr(err error) {
ts.err = err
}
func (ts *tarStream) Read(p []byte) (n int, err error) {
return ts.teeReader.Read(p)
}
func (ts *tarStream) Close() error {
return ts.pipeReader.Close()
}
func (ts *tarStream) Hierarchy() (*DirectoryHierarchy, error) {
if ts.err != nil && ts.err != io.EOF {
return nil, ts.err
}
return ts.creator.DH, nil
}

175
tar_test.go Normal file
View file

@ -0,0 +1,175 @@
package mtree
import (
"archive/tar"
"bytes"
"io"
"io/ioutil"
"os"
"testing"
)
func ExampleStreamer() {
fh, err := os.Open("./testdata/test.tar")
if err != nil {
// handle error ...
}
str := NewTarStreamer(fh, nil)
if err := extractTar("/tmp/dir", str); err != nil {
// handle error ...
}
dh, err := str.Hierarchy()
if err != nil {
// handle error ...
}
res, err := Check("/tmp/dir/", dh, nil)
if err != nil {
// handle error ...
}
if len(res.Failures) > 0 {
// handle validation issue ...
}
}
func extractTar(root string, tr io.Reader) error {
return nil
}
func TestTar(t *testing.T) {
/*
data, err := makeTarStream()
if err != nil {
t.Fatal(err)
}
buf := bytes.NewBuffer(data)
str := NewTarStreamer(buf, append(DefaultKeywords, "sha1"))
*/
/*
// open empty folder and check size.
fh, err := os.Open("./testdata/empty")
if err != nil {
t.Fatal(err)
}
log.Println(fh.Stat())
fh.Close() */
fh, err := os.Open("./testdata/test.tar")
if err != nil {
t.Fatal(err)
}
str := NewTarStreamer(fh, append(DefaultKeywords, "sha1"))
if _, err := io.Copy(ioutil.Discard, str); err != nil && err != io.EOF {
t.Fatal(err)
}
if err := str.Close(); err != nil {
t.Fatal(err)
}
defer fh.Close()
// get DirectoryHierarcy struct from walking the tar archive
tdh, err := str.Hierarchy()
if err != nil {
t.Fatal(err)
}
if tdh == nil {
t.Fatal("expected a DirectoryHierarchy struct, but got nil")
}
fh, err = os.Create("./testdata/test.mtree")
if err != nil {
t.Fatal(err)
}
defer os.Remove("./testdata/test.mtree")
// put output of tar walk into test.mtree
_, err = tdh.WriteTo(fh)
if err != nil {
t.Fatal(err)
}
fh.Close()
// now simulate gomtree -T testdata/test.tar -f testdata/test.mtree
fh, err = os.Open("./testdata/test.mtree")
if err != nil {
t.Fatal(err)
}
defer fh.Close()
dh, err := ParseSpec(fh)
if err != nil {
t.Fatal(err)
}
res, err := TarCheck(tdh, dh, append(DefaultKeywords, "sha1"))
if err != nil {
t.Fatal(err)
}
// print any failures, and then call t.Fatal once all failures/extra/missing
// are outputted
if res != nil {
errors := ""
switch {
case len(res.Failures) > 0:
for _, f := range res.Failures {
t.Errorf("%s\n", f)
}
errors += "Keyword validation errors\n"
case len(res.Missing) > 0:
for _, m := range res.Missing {
t.Errorf("Missing file: %s\n", m.Path())
}
errors += "Missing files not expected for this test\n"
case len(res.Extra) > 0:
for _, e := range res.Extra {
t.Errorf("Extra file: %s\n", e.Path())
}
errors += "Extra files not expected for this test\n"
}
if errors != "" {
t.Fatal(errors)
}
}
}
// minimal tar archive stream that mimics what is in ./testdata/test.tar
func makeTarStream() ([]byte, error) {
buf := new(bytes.Buffer)
// Create a new tar archive.
tw := tar.NewWriter(buf)
// Add some files to the archive.
var files = []struct {
Name, Body string
Mode int64
Type byte
Xattrs map[string]string
}{
{"x/", "", 0755, '5', nil},
{"x/files", "howdy\n", 0644, '0', nil},
}
for _, file := range files {
hdr := &tar.Header{
Name: file.Name,
Mode: file.Mode,
Size: int64(len(file.Body)),
Xattrs: file.Xattrs,
}
if err := tw.WriteHeader(hdr); err != nil {
return nil, err
}
if len(file.Body) > 0 {
if _, err := tw.Write([]byte(file.Body)); err != nil {
return nil, err
}
}
}
// Make sure to check the error on Close.
if err := tw.Close(); err != nil {
return nil, err
}
return buf.Bytes(), nil
}

BIN
testdata/test.tar vendored Normal file

Binary file not shown.

81
walk.go
View file

@ -2,6 +2,7 @@ package mtree
import (
"fmt"
"io"
"os"
"os/user"
"path/filepath"
@ -14,13 +15,6 @@ import (
// returns true, then the path is not included in the spec.
type ExcludeFunc func(path string, info os.FileInfo) bool
type dhCreator struct {
DH *DirectoryHierarchy
curSet *Entry
curDir *Entry
curEnt *Entry
}
var defaultSetKeywords = []string{"type=file", "nlink=1", "flags=none", "mode=0664"}
// Walk from root directory and assemble the DirectoryHierarchy. excludes
@ -76,11 +70,30 @@ func Walk(root string, exlcudes []ExcludeFunc, keywords []string) (*DirectoryHie
Keywords: keywordSelector(defaultSetKeywords, keywords),
}
for _, keyword := range SetKeywords {
if str, err := KeywordFuncs[keyword](path, info); err == nil && str != "" {
err := func() error {
var r io.Reader
if info.Mode().IsRegular() {
fh, err := os.Open(path)
if err != nil {
return err
}
defer fh.Close()
r = fh
}
keywordFunc, ok := KeywordFuncs[keyword]
if !ok {
return fmt.Errorf("Unknown keyword %q for file %q", keyword, path)
}
if str, err := keywordFunc(path, info, r); err == nil && str != "" {
e.Keywords = append(e.Keywords, str)
} else if err != nil {
return err
}
return nil
}()
if err != nil {
return err
}
}
creator.curSet = &e
creator.DH.Entries = append(creator.DH.Entries, e)
@ -88,9 +101,30 @@ func Walk(root string, exlcudes []ExcludeFunc, keywords []string) (*DirectoryHie
// check the attributes of the /set keywords and re-set if changed
klist := []string{}
for _, keyword := range SetKeywords {
if str, err := KeywordFuncs[keyword](path, info); err == nil && str != "" {
err := func() error {
var r io.Reader
if info.Mode().IsRegular() {
fh, err := os.Open(path)
if err != nil {
return err
}
defer fh.Close()
r = fh
}
keywordFunc, ok := KeywordFuncs[keyword]
if !ok {
return fmt.Errorf("Unknown keyword %q for file %q", keyword, path)
}
str, err := keywordFunc(path, info, r)
if err != nil {
return err
}
if str != "" {
klist = append(klist, str)
} else if err != nil {
}
return nil
}()
if err != nil {
return err
}
}
@ -122,11 +156,30 @@ func Walk(root string, exlcudes []ExcludeFunc, keywords []string) (*DirectoryHie
Parent: creator.curDir,
}
for _, keyword := range keywords {
if str, err := KeywordFuncs[keyword](path, info); err == nil && str != "" {
if !inSlice(str, creator.curSet.Keywords) {
err := func() error {
var r io.Reader
if info.Mode().IsRegular() {
fh, err := os.Open(path)
if err != nil {
return err
}
defer fh.Close()
r = fh
}
keywordFunc, ok := KeywordFuncs[keyword]
if !ok {
return fmt.Errorf("Unknown keyword %q for file %q", keyword, path)
}
str, err := keywordFunc(path, info, r)
if err != nil {
return err
}
if str != "" && !inSlice(str, creator.curSet.Keywords) {
e.Keywords = append(e.Keywords, str)
}
} else if err != nil {
return nil
}()
if err != nil {
return err
}
}
@ -245,7 +298,7 @@ func readOrderedDirNames(dirname string) ([]string, error) {
return append(names, dirnames...), nil
}
// signatureEntries is helper function that returns a slice of Entry's
// signatureEntries is a simple helper function that returns a slice of Entry's
// that describe the metadata signature about the host. Items like date, user,
// machine, and tree (which is specified by argument `root`), are considered.
// These Entry's construct comments in the mtree specification, so if there is