WIP

2015-08-11 21:59:23 -04:00 · 2015-08-11 21:59:23 -04:00 · 5c8d5cacba
commit 5c8d5cacba
parent e2a62d6b0d
13 changed files with 265 additions and 50 deletions
--- a/tar/asm/README.md
+++ b/tar/asm/README.md
@ -10,7 +10,7 @@ Concerns

 For completely safe assembly/disassembly, there will need to be a Content
 Addressable Storage (CAS) directory, that maps to a checksum in the
-`storage.Entity` of `storage.FileType`.
+`storage.Entity` of `storage.FileCheckEntry`.

 This is due to the fact that tar archives _can_ allow multiple records for the
 same path, but the last one effectively wins. Even if the prior records had a
--- a/tar/asm/assemble.go
+++ b/tar/asm/assemble.go
@ -19,6 +19,10 @@ import (
 // metadata. With the combination of these two items, a precise assembled Tar
 // archive is possible.
 func NewOutputTarStream(fg storage.FileGetter, up storage.Unpacker) io.ReadCloser {
+	return newOutputTarStreamWithOptions(fg, up, DefaultOutputOptions)
+}
+
+func newOutputTarStreamWithOptions(fg storage.FileGetter, up storage.Unpacker, opts Options) io.ReadCloser {
 	// ... Since these are interfaces, this is possible, so let's not have a nil pointer
 	if fg == nil || up == nil {
 		return nil
--- a/tar/asm/assemble_test.go
+++ b/tar/asm/assemble_test.go
@ -20,7 +20,7 @@ var entries = []struct {
 }{
 	{
 		Entry: storage.Entry{
-			Type:    storage.FileType,
+			Type:    storage.FileCheckEntry,
 			Name:    "./hurr.txt",
 			Payload: []byte{2, 116, 164, 177, 171, 236, 107, 78},
 			Size:    20,
@ -29,7 +29,7 @@ var entries = []struct {
 	},
 	{
 		Entry: storage.Entry{
-			Type:    storage.FileType,
+			Type:    storage.FileCheckEntry,
 			Name:    "./ermahgerd.txt",
 			Payload: []byte{126, 72, 89, 239, 230, 252, 160, 187},
 			Size:    26,
@ -38,7 +38,7 @@ var entries = []struct {
 	},
 	{
 		Entry: storage.Entry{
-			Type:    storage.FileType,
+			Type:    storage.FileCheckEntry,
 			NameRaw: []byte{0x66, 0x69, 0x6c, 0x65, 0x2d, 0xe4}, // this is invalid UTF-8. Just checking the round trip.
 			Payload: []byte{126, 72, 89, 239, 230, 252, 160, 187},
 			Size:    26,
@ -52,7 +52,7 @@ var entriesMangled = []struct {
 }{
 	{
 		Entry: storage.Entry{
-			Type:    storage.FileType,
+			Type:    storage.FileCheckEntry,
 			Name:    "./hurr.txt",
 			Payload: []byte{3, 116, 164, 177, 171, 236, 107, 78},
 			Size:    20,
@ -62,7 +62,7 @@ var entriesMangled = []struct {
 	},
 	{
 		Entry: storage.Entry{
-			Type:    storage.FileType,
+			Type:    storage.FileCheckEntry,
 			Name:    "./ermahgerd.txt",
 			Payload: []byte{127, 72, 89, 239, 230, 252, 160, 187},
 			Size:    26,
@ -72,7 +72,7 @@ var entriesMangled = []struct {
 	},
 	{
 		Entry: storage.Entry{
-			Type:    storage.FileType,
+			Type:    storage.FileCheckEntry,
 			NameRaw: []byte{0x66, 0x69, 0x6c, 0x65, 0x2d, 0xe4},
 			Payload: []byte{127, 72, 89, 239, 230, 252, 160, 187},
 			Size:    26,
@ -86,7 +86,7 @@ func TestTarStreamMangledGetterPutter(t *testing.T) {

 	// first lets prep a GetPutter and Packer
 	for i := range entries {
-		if entries[i].Entry.Type == storage.FileType {
+		if entries[i].Entry.Type == storage.FileCheckEntry {
 			j, csum, err := fgp.Put(entries[i].Entry.GetName(), bytes.NewBuffer(entries[i].Body))
 			if err != nil {
 				t.Error(err)
@ -107,7 +107,7 @@ func TestTarStreamMangledGetterPutter(t *testing.T) {
 	}

 	for _, e := range entriesMangled {
-		if e.Entry.Type == storage.FileType {
+		if e.Entry.Type == storage.FileCheckEntry {
 			rdr, err := fgp.Get(e.Entry.GetName())
 			if err != nil {
 				t.Error(err)
--- a/tar/asm/disassemble.go
+++ b/tar/asm/disassemble.go
@ -19,6 +19,10 @@ import (
 // storage.FilePutter. Since the checksumming is still needed, then a default
 // of NewDiscardFilePutter will be used internally
 func NewInputTarStream(r io.Reader, p storage.Packer, fp storage.FilePutter) (io.Reader, error) {
+	return newInputTarStreamWithOptions(r, p, fp, DefaultInputOptions)
+}
+
+func newInputTarStreamWithOptions(r io.Reader, p storage.Packer, fp storage.FilePutter, opts Options) (io.Reader, error) {
 	// What to do here... folks will want their own access to the Reader that is
 	// their tar archive stream, but we'll need that same stream to use our
 	// forked 'archive/tar'.
@ -57,7 +61,7 @@ func NewInputTarStream(r io.Reader, p storage.Packer, fp storage.FilePutter) (io
 				// the end of an archive. Collect them too.
 				if b := tr.RawBytes(); len(b) > 0 {
 					_, err := p.AddEntry(storage.Entry{
-						Type:    storage.SegmentType,
+						Type:    storage.SegmentEntry,
 						Payload: b,
 					})
 					if err != nil {
@ -73,7 +77,7 @@ func NewInputTarStream(r io.Reader, p storage.Packer, fp storage.FilePutter) (io

 			if b := tr.RawBytes(); len(b) > 0 {
 				_, err := p.AddEntry(storage.Entry{
-					Type:    storage.SegmentType,
+					Type:    storage.SegmentEntry,
 					Payload: b,
 				})
 				if err != nil {
@ -93,7 +97,7 @@ func NewInputTarStream(r io.Reader, p storage.Packer, fp storage.FilePutter) (io
 			}

 			entry := storage.Entry{
-				Type:    storage.FileType,
+				Type:    storage.FileCheckEntry,
 				Size:    hdr.Size,
 				Payload: csum,
 			}
@ -109,7 +113,7 @@ func NewInputTarStream(r io.Reader, p storage.Packer, fp storage.FilePutter) (io

 			if b := tr.RawBytes(); len(b) > 0 {
 				_, err = p.AddEntry(storage.Entry{
-					Type:    storage.SegmentType,
+					Type:    storage.SegmentEntry,
 					Payload: b,
 				})
 				if err != nil {
@ -127,7 +131,7 @@ func NewInputTarStream(r io.Reader, p storage.Packer, fp storage.FilePutter) (io
 			return
 		}
 		_, err = p.AddEntry(storage.Entry{
-			Type:    storage.SegmentType,
+			Type:    storage.SegmentEntry,
 			Payload: remainder,
 		})
 		if err != nil {
--- a/tar/asm/options.go
+++ b/tar/asm/options.go
@ -0,0 +1,18 @@
+package asm
+
+// Defaults that matched existing behavior
+var (
+	DefaultOutputOptions = OptFileCheck | OptSegment
+	DefaultInputOptions  = OptFileCheck | OptSegment
+)
+
+// Options for processing the tar stream with additional options. Like
+// including entries for on-disk verification.
+type Options int
+
+// The options include the FileCheckEntry, SegmentEntry, and for VerficationEntry
+const (
+	OptFileCheck Options = 1 << iota
+	OptSegment
+	OptVerify
+)
--- a/tar/storage/entry.go
+++ b/tar/storage/entry.go
@ -9,36 +9,41 @@ func (e Entries) Len() int           { return len(e) }
 func (e Entries) Swap(i, j int)      { e[i], e[j] = e[j], e[i] }
 func (e Entries) Less(i, j int) bool { return e[i].Position < e[j].Position }

-// Type of Entry
-type Type int
+// EntryType is the type of Entry
+type EntryType int

 const (
-	// FileType represents a file payload from the tar stream.
+	// FileCheckEntry represents a file payload from the tar stream.
 	//
 	// This will be used to map to relative paths on disk. Only Size > 0 will get
 	// read into a resulting output stream (due to hardlinks).
-	FileType Type = 1 + iota
-	// SegmentType represents a raw bytes segment from the archive stream. These raw
+	FileCheckEntry EntryType = 1 + iota
+
+	// SegmentEntry represents a raw bytes segment from the archive stream. These raw
 	// byte segments consist of the raw headers and various padding.
 	//
 	// Its payload is to be marshalled base64 encoded.
-	SegmentType
+	SegmentEntry
+
+	// VerficationEntry is a structure of keywords for validating the on-disk
+	// file attributes against the attributes of the Tar archive file headers
+	VerficationEntry
 )

 // Entry is the structure for packing and unpacking the information read from
 // the Tar archive.
 //
-// FileType Payload checksum is using `hash/crc64` for basic file integrity,
+// FileCheckEntry Payload checksum is using `hash/crc64` for basic file integrity,
 // _not_ for cryptography.
 // From http://www.backplane.com/matt/crc64.html, CRC32 has almost 40,000
 // collisions in a sample of 18.2 million, CRC64 had none.
 type Entry struct {
-	Type     Type   `json:"type"`
-	Name     string `json:"name,omitempty"`
-	NameRaw  []byte `json:"name_raw,omitempty"`
-	Size     int64  `json:"size,omitempty"`
-	Payload  []byte `json:"payload"` // SegmentType stores payload here; FileType stores crc64 checksum here;
-	Position int    `json:"position"`
+	Type     EntryType `json:"type"`
+	Name     string    `json:"name,omitempty"`
+	NameRaw  []byte    `json:"name_raw,omitempty"`
+	Size     int64     `json:"size,omitempty"`
+	Payload  []byte    `json:"payload"` // SegmentType stores payload here; FileType stores crc64 checksum here;
+	Position int       `json:"position"`
 }

 // SetName will check name for valid UTF-8 string, and set the appropriate
--- a/tar/storage/entry_test.go
+++ b/tar/storage/entry_test.go
@ -9,23 +9,23 @@ import (
 func TestEntries(t *testing.T) {
 	e := Entries{
 		Entry{
-			Type:     SegmentType,
+			Type:     SegmentEntry,
 			Payload:  []byte("y'all"),
 			Position: 1,
 		},
 		Entry{
-			Type:     SegmentType,
+			Type:     SegmentEntry,
 			Payload:  []byte("doin"),
 			Position: 3,
 		},
 		Entry{
-			Type:     FileType,
+			Type:     FileCheckEntry,
 			Name:     "./hurr.txt",
 			Payload:  []byte("deadbeef"),
 			Position: 2,
 		},
 		Entry{
-			Type:     SegmentType,
+			Type:     SegmentEntry,
 			Payload:  []byte("how"),
 			Position: 0,
 		},
@ -38,7 +38,7 @@ func TestEntries(t *testing.T) {

 func TestFile(t *testing.T) {
 	f := Entry{
-		Type:     FileType,
+		Type:     FileCheckEntry,
 		Size:     100,
 		Position: 2,
 	}
@ -67,7 +67,7 @@ func TestFile(t *testing.T) {

 func TestFileRaw(t *testing.T) {
 	f := Entry{
-		Type:     FileType,
+		Type:     FileCheckEntry,
 		Size:     100,
 		Position: 2,
 	}
--- a/tar/storage/packer.go
+++ b/tar/storage/packer.go
@ -44,7 +44,7 @@ func (jup *jsonUnpacker) Next() (*Entry, error) {
 	}

 	// check for dup name
-	if e.Type == FileType {
+	if e.Type == FileCheckEntry {
 		cName := filepath.Clean(e.GetName())
 		if _, ok := jup.seen[cName]; ok {
 			return nil, ErrDuplicatePath
@ -55,8 +55,8 @@ func (jup *jsonUnpacker) Next() (*Entry, error) {
 	return &e, err
 }

-// NewJSONUnpacker provides an Unpacker that reads Entries (SegmentType and
-// FileType) as a json document.
+// NewJSONUnpacker provides an Unpacker that reads Entries (SegmentEntry and
+// FileCheckEntry) as a json document.
 //
 // Each Entry read are expected to be delimited by new line.
 func NewJSONUnpacker(r io.Reader) Unpacker {
@ -85,7 +85,7 @@ func (jp *jsonPacker) AddEntry(e Entry) (int, error) {
 	}

 	// check early for dup name
-	if e.Type == FileType {
+	if e.Type == FileCheckEntry {
 		cName := filepath.Clean(e.GetName())
 		if _, ok := jp.seen[cName]; ok {
 			return -1, ErrDuplicatePath
@ -104,8 +104,8 @@ func (jp *jsonPacker) AddEntry(e Entry) (int, error) {
 	return e.Position, nil
 }

-// NewJSONPacker provides a Packer that writes each Entry (SegmentType and
-// FileType) as a json document.
+// NewJSONPacker provides a Packer that writes each Entry (SegmentEntry and
+// FileCheckEntry) as a json document.
 //
 // The Entries are delimited by new line.
 func NewJSONPacker(w io.Writer) Packer {
--- a/tar/storage/packer_test.go
+++ b/tar/storage/packer_test.go
@ -12,17 +12,17 @@ import (
 func TestDuplicateFail(t *testing.T) {
 	e := []Entry{
 		Entry{
-			Type:    FileType,
+			Type:    FileCheckEntry,
 			Name:    "./hurr.txt",
 			Payload: []byte("abcde"),
 		},
 		Entry{
-			Type:    FileType,
+			Type:    FileCheckEntry,
 			Name:    "./hurr.txt",
 			Payload: []byte("deadbeef"),
 		},
 		Entry{
-			Type:    FileType,
+			Type:    FileCheckEntry,
 			Name:    "hurr.txt", // slightly different path, same file though
 			Payload: []byte("deadbeef"),
 		},
@ -45,20 +45,20 @@ func TestDuplicateFail(t *testing.T) {
 func TestJSONPackerUnpacker(t *testing.T) {
 	e := []Entry{
 		Entry{
-			Type:    SegmentType,
+			Type:    SegmentEntry,
 			Payload: []byte("how"),
 		},
 		Entry{
-			Type:    SegmentType,
+			Type:    SegmentEntry,
 			Payload: []byte("y'all"),
 		},
 		Entry{
-			Type:    FileType,
+			Type:    FileCheckEntry,
 			Name:    "./hurr.txt",
 			Payload: []byte("deadbeef"),
 		},
 		Entry{
-			Type:    SegmentType,
+			Type:    SegmentEntry,
 			Payload: []byte("doin"),
 		},
 	}
@ -106,20 +106,20 @@ func TestJSONPackerUnpacker(t *testing.T) {
 func TestGzip(t *testing.T) {
 	e := []Entry{
 		Entry{
-			Type:    SegmentType,
+			Type:    SegmentEntry,
 			Payload: []byte("how"),
 		},
 		Entry{
-			Type:    SegmentType,
+			Type:    SegmentEntry,
 			Payload: []byte("y'all"),
 		},
 		Entry{
-			Type:    FileType,
+			Type:    FileCheckEntry,
 			Name:    "./hurr.txt",
 			Payload: []byte("deadbeef"),
 		},
 		Entry{
-			Type:    SegmentType,
+			Type:    SegmentEntry,
 			Payload: []byte("doin"),
 		},
 	}
--- a/tar/verify/headers.go
+++ b/tar/verify/headers.go
@ -0,0 +1,23 @@
+package verify
+
+import "time"
+
+// PosixHeader is the structure from a POSIX tar header, to be marshalled from
+// the tar stream, and available for on-disk comparison and verification
+type PosixHeader struct {
+	Name     string    `json:"name,omitempty"`
+	Mode     uint32    `json:"mode,omitempty"`
+	UID      uint32    `json:"uid,omitempty"`
+	GID      uint32    `json:"gid,omitempty"`
+	Size     int       `json:"size,omitempty"`
+	Mtime    time.Time `json:"mtime,omitempty"`
+	Checksum []byte    `json:"chksum,omitempty"`
+	LinkName string    `json:"linkname,omitempty"`
+	Magic    []byte    `json:"magic,omitempty"`
+	Version  string    `json:"version,omitempty"`
+	Uname    string    `json:"uname,omitempty"`
+	Gname    string    `json:"gname,omitempty"`
+	DevMajor int       `json:"devmajor,omitempty"`
+	DevMinor int       `json:"devminor,omitempty"`
+	Prefix   string    `json:"prefix,omitempty"`
+}
--- a/tar/verify/verify.go
+++ b/tar/verify/verify.go
@ -0,0 +1,34 @@
+package verify
+
+import "fmt"
+
+// CheckType is how the on disk attributes will be verified against the
+// recorded header information
+type CheckType int
+
+// Check types for customizing how fuzzy or strict on-disk verification will be
+// handled
+const (
+	CheckDigest CheckType = iota
+	CheckFileSize
+	CheckFileMode
+	CheckFileUser
+	CheckFileGroup
+	CheckFileMtime
+	CheckFileDevice
+	CheckFileLink
+	CheckFileCaps
+)
+
+var (
+	// DefaultChecks is the default for verfication steps against each
+	// storage.VerficationEntry
+	DefaultChecks = CheckDigest | CheckFileAttributes
+	// CheckFileAttributes are the group of file attribute checks done
+	CheckFileAttributes = CheckFileSize | CheckFileMode | CheckFileUser |
+		CheckFileGroup | CheckFileMtime | CheckFileDevice | CheckFileCaps |
+		CheckFileLink
+
+	// ErrNotSupportedPlatform is when the platform does not support given features
+	ErrNotSupportedPlatform = fmt.Errorf("platform does not support this feature")
+)
--- a/tar/verify/xattrs_linux.go
+++ b/tar/verify/xattrs_linux.go
@ -0,0 +1,114 @@
+package verify
+
+/*
+Lgetxattr and Lsetxattr are copied directly from https://github.com/docker/docker
+  ./pkg/system/xattr_linux.go commit 7e420ad8502089e66ce0ade92bf70574f894f287
+  Apache License Version 2.0, January 2004 https://www.apache.org/licenses/
+  Copyright 2013-2015 Docker, Inc.
+*/
+
+import (
+	"bytes"
+	"syscall"
+	"unsafe"
+)
+
+/*
+func main() {
+	for _, arg := range os.Args[1:] {
+		keys, err := Listxattr(arg)
+		if err != nil {
+			fmt.Println(err)
+			continue
+		}
+		if len(keys) > 0 {
+			fmt.Printf("%s : %q\n", arg, keys)
+			for _, key := range keys {
+				buf, err := Lgetxattr(arg, key)
+				if err != nil {
+					fmt.Printf("  ERROR: %s\n", err)
+					continue
+				}
+				fmt.Printf("  %s = %s\n", key, string(buf))
+			}
+		}
+	}
+}
+*/
+
+// Listxattr is a helper around the syscall.Listxattr
+func Listxattr(path string) ([]string, error) {
+	buf := make([]byte, 1024)
+	sz, err := syscall.Listxattr(path, buf)
+	if err == syscall.ENODATA {
+		return nil, nil
+	}
+	if err == syscall.ERANGE && sz > 0 {
+		buf = make([]byte, sz)
+		sz, err = syscall.Listxattr(path, buf)
+	}
+	keys := []string{}
+	for _, key := range bytes.Split(bytes.TrimSpace(buf), []byte{0x0}) {
+		if string(key) != "" {
+			keys = append(keys, string(key))
+		}
+	}
+	return keys, nil
+}
+
+// Lgetxattr retrieves the value of the extended attribute identified by attr
+// and associated with the given path in the file system.
+// It will returns a nil slice and nil error if the xattr is not set.
+func Lgetxattr(path string, attr string) ([]byte, error) {
+	pathBytes, err := syscall.BytePtrFromString(path)
+	if err != nil {
+		return nil, err
+	}
+	attrBytes, err := syscall.BytePtrFromString(attr)
+	if err != nil {
+		return nil, err
+	}
+
+	dest := make([]byte, 128)
+	destBytes := unsafe.Pointer(&dest[0])
+	sz, _, errno := syscall.Syscall6(syscall.SYS_LGETXATTR, uintptr(unsafe.Pointer(pathBytes)), uintptr(unsafe.Pointer(attrBytes)), uintptr(destBytes), uintptr(len(dest)), 0, 0)
+	if errno == syscall.ENODATA {
+		return nil, nil
+	}
+	if errno == syscall.ERANGE {
+		dest = make([]byte, sz)
+		destBytes := unsafe.Pointer(&dest[0])
+		sz, _, errno = syscall.Syscall6(syscall.SYS_LGETXATTR, uintptr(unsafe.Pointer(pathBytes)), uintptr(unsafe.Pointer(attrBytes)), uintptr(destBytes), uintptr(len(dest)), 0, 0)
+	}
+	if errno != 0 {
+		return nil, errno
+	}
+
+	return dest[:sz], nil
+}
+
+var _zero uintptr
+
+// Lsetxattr sets the value of the extended attribute identified by attr
+// and associated with the given path in the file system.
+func Lsetxattr(path string, attr string, data []byte, flags int) error {
+	pathBytes, err := syscall.BytePtrFromString(path)
+	if err != nil {
+		return err
+	}
+	attrBytes, err := syscall.BytePtrFromString(attr)
+	if err != nil {
+		return err
+	}
+	var dataBytes unsafe.Pointer
+	if len(data) > 0 {
+		dataBytes = unsafe.Pointer(&data[0])
+	} else {
+		dataBytes = unsafe.Pointer(&_zero)
+	}
+	_, _, errno := syscall.Syscall6(syscall.SYS_LSETXATTR, uintptr(unsafe.Pointer(pathBytes)), uintptr(unsafe.Pointer(attrBytes)), uintptr(dataBytes), uintptr(len(data)), uintptr(flags), 0)
+	if errno != 0 {
+		return errno
+	}
+	return nil
+}
--- a/tar/verify/xattrs_unsupported.go
+++ b/tar/verify/xattrs_unsupported.go
@ -0,0 +1,13 @@
+// +build !linux
+
+package verify
+
+// Lgetxattr is not supported on platforms other than linux.
+func Lgetxattr(path string, attr string) ([]byte, error) {
+	return nil, ErrNotSupportedPlatform
+}
+
+// Lsetxattr is not supported on platforms other than linux.
+func Lsetxattr(path string, attr string, data []byte, flags int) error {
+	return ErrNotSupportedPlatform
+}