diff --git a/tar/asm/README.md b/tar/asm/README.md index 2a3a5b5..71da68a 100644 --- a/tar/asm/README.md +++ b/tar/asm/README.md @@ -10,7 +10,7 @@ Concerns For completely safe assembly/disassembly, there will need to be a Content Addressable Storage (CAS) directory, that maps to a checksum in the -`storage.Entity` of `storage.FileType`. +`storage.Entity` of `storage.FileCheckEntry`. This is due to the fact that tar archives _can_ allow multiple records for the same path, but the last one effectively wins. Even if the prior records had a diff --git a/tar/asm/assemble.go b/tar/asm/assemble.go index d624450..0cce0c2 100644 --- a/tar/asm/assemble.go +++ b/tar/asm/assemble.go @@ -19,6 +19,10 @@ import ( // metadata. With the combination of these two items, a precise assembled Tar // archive is possible. func NewOutputTarStream(fg storage.FileGetter, up storage.Unpacker) io.ReadCloser { + return newOutputTarStreamWithOptions(fg, up, DefaultOutputOptions) +} + +func newOutputTarStreamWithOptions(fg storage.FileGetter, up storage.Unpacker, opts Options) io.ReadCloser { // ... Since these are interfaces, this is possible, so let's not have a nil pointer if fg == nil || up == nil { return nil diff --git a/tar/asm/assemble_test.go b/tar/asm/assemble_test.go index c0c7f17..a3dce6a 100644 --- a/tar/asm/assemble_test.go +++ b/tar/asm/assemble_test.go @@ -20,7 +20,7 @@ var entries = []struct { }{ { Entry: storage.Entry{ - Type: storage.FileType, + Type: storage.FileCheckEntry, Name: "./hurr.txt", Payload: []byte{2, 116, 164, 177, 171, 236, 107, 78}, Size: 20, @@ -29,7 +29,7 @@ var entries = []struct { }, { Entry: storage.Entry{ - Type: storage.FileType, + Type: storage.FileCheckEntry, Name: "./ermahgerd.txt", Payload: []byte{126, 72, 89, 239, 230, 252, 160, 187}, Size: 26, @@ -38,7 +38,7 @@ var entries = []struct { }, { Entry: storage.Entry{ - Type: storage.FileType, + Type: storage.FileCheckEntry, NameRaw: []byte{0x66, 0x69, 0x6c, 0x65, 0x2d, 0xe4}, // this is invalid UTF-8. Just checking the round trip. Payload: []byte{126, 72, 89, 239, 230, 252, 160, 187}, Size: 26, @@ -52,7 +52,7 @@ var entriesMangled = []struct { }{ { Entry: storage.Entry{ - Type: storage.FileType, + Type: storage.FileCheckEntry, Name: "./hurr.txt", Payload: []byte{3, 116, 164, 177, 171, 236, 107, 78}, Size: 20, @@ -62,7 +62,7 @@ var entriesMangled = []struct { }, { Entry: storage.Entry{ - Type: storage.FileType, + Type: storage.FileCheckEntry, Name: "./ermahgerd.txt", Payload: []byte{127, 72, 89, 239, 230, 252, 160, 187}, Size: 26, @@ -72,7 +72,7 @@ var entriesMangled = []struct { }, { Entry: storage.Entry{ - Type: storage.FileType, + Type: storage.FileCheckEntry, NameRaw: []byte{0x66, 0x69, 0x6c, 0x65, 0x2d, 0xe4}, Payload: []byte{127, 72, 89, 239, 230, 252, 160, 187}, Size: 26, @@ -86,7 +86,7 @@ func TestTarStreamMangledGetterPutter(t *testing.T) { // first lets prep a GetPutter and Packer for i := range entries { - if entries[i].Entry.Type == storage.FileType { + if entries[i].Entry.Type == storage.FileCheckEntry { j, csum, err := fgp.Put(entries[i].Entry.GetName(), bytes.NewBuffer(entries[i].Body)) if err != nil { t.Error(err) @@ -107,7 +107,7 @@ func TestTarStreamMangledGetterPutter(t *testing.T) { } for _, e := range entriesMangled { - if e.Entry.Type == storage.FileType { + if e.Entry.Type == storage.FileCheckEntry { rdr, err := fgp.Get(e.Entry.GetName()) if err != nil { t.Error(err) diff --git a/tar/asm/disassemble.go b/tar/asm/disassemble.go index 54ef23a..8465366 100644 --- a/tar/asm/disassemble.go +++ b/tar/asm/disassemble.go @@ -19,6 +19,10 @@ import ( // storage.FilePutter. Since the checksumming is still needed, then a default // of NewDiscardFilePutter will be used internally func NewInputTarStream(r io.Reader, p storage.Packer, fp storage.FilePutter) (io.Reader, error) { + return newInputTarStreamWithOptions(r, p, fp, DefaultInputOptions) +} + +func newInputTarStreamWithOptions(r io.Reader, p storage.Packer, fp storage.FilePutter, opts Options) (io.Reader, error) { // What to do here... folks will want their own access to the Reader that is // their tar archive stream, but we'll need that same stream to use our // forked 'archive/tar'. @@ -57,7 +61,7 @@ func NewInputTarStream(r io.Reader, p storage.Packer, fp storage.FilePutter) (io // the end of an archive. Collect them too. if b := tr.RawBytes(); len(b) > 0 { _, err := p.AddEntry(storage.Entry{ - Type: storage.SegmentType, + Type: storage.SegmentEntry, Payload: b, }) if err != nil { @@ -73,7 +77,7 @@ func NewInputTarStream(r io.Reader, p storage.Packer, fp storage.FilePutter) (io if b := tr.RawBytes(); len(b) > 0 { _, err := p.AddEntry(storage.Entry{ - Type: storage.SegmentType, + Type: storage.SegmentEntry, Payload: b, }) if err != nil { @@ -93,7 +97,7 @@ func NewInputTarStream(r io.Reader, p storage.Packer, fp storage.FilePutter) (io } entry := storage.Entry{ - Type: storage.FileType, + Type: storage.FileCheckEntry, Size: hdr.Size, Payload: csum, } @@ -109,7 +113,7 @@ func NewInputTarStream(r io.Reader, p storage.Packer, fp storage.FilePutter) (io if b := tr.RawBytes(); len(b) > 0 { _, err = p.AddEntry(storage.Entry{ - Type: storage.SegmentType, + Type: storage.SegmentEntry, Payload: b, }) if err != nil { @@ -127,7 +131,7 @@ func NewInputTarStream(r io.Reader, p storage.Packer, fp storage.FilePutter) (io return } _, err = p.AddEntry(storage.Entry{ - Type: storage.SegmentType, + Type: storage.SegmentEntry, Payload: remainder, }) if err != nil { diff --git a/tar/asm/options.go b/tar/asm/options.go new file mode 100644 index 0000000..bc50858 --- /dev/null +++ b/tar/asm/options.go @@ -0,0 +1,18 @@ +package asm + +// Defaults that matched existing behavior +var ( + DefaultOutputOptions = OptFileCheck | OptSegment + DefaultInputOptions = OptFileCheck | OptSegment +) + +// Options for processing the tar stream with additional options. Like +// including entries for on-disk verification. +type Options int + +// The options include the FileCheckEntry, SegmentEntry, and for VerficationEntry +const ( + OptFileCheck Options = 1 << iota + OptSegment + OptVerify +) diff --git a/tar/storage/entry.go b/tar/storage/entry.go index c91e7ea..b6de7f0 100644 --- a/tar/storage/entry.go +++ b/tar/storage/entry.go @@ -9,36 +9,41 @@ func (e Entries) Len() int { return len(e) } func (e Entries) Swap(i, j int) { e[i], e[j] = e[j], e[i] } func (e Entries) Less(i, j int) bool { return e[i].Position < e[j].Position } -// Type of Entry -type Type int +// EntryType is the type of Entry +type EntryType int const ( - // FileType represents a file payload from the tar stream. + // FileCheckEntry represents a file payload from the tar stream. // // This will be used to map to relative paths on disk. Only Size > 0 will get // read into a resulting output stream (due to hardlinks). - FileType Type = 1 + iota - // SegmentType represents a raw bytes segment from the archive stream. These raw + FileCheckEntry EntryType = 1 + iota + + // SegmentEntry represents a raw bytes segment from the archive stream. These raw // byte segments consist of the raw headers and various padding. // // Its payload is to be marshalled base64 encoded. - SegmentType + SegmentEntry + + // VerficationEntry is a structure of keywords for validating the on-disk + // file attributes against the attributes of the Tar archive file headers + VerficationEntry ) // Entry is the structure for packing and unpacking the information read from // the Tar archive. // -// FileType Payload checksum is using `hash/crc64` for basic file integrity, +// FileCheckEntry Payload checksum is using `hash/crc64` for basic file integrity, // _not_ for cryptography. // From http://www.backplane.com/matt/crc64.html, CRC32 has almost 40,000 // collisions in a sample of 18.2 million, CRC64 had none. type Entry struct { - Type Type `json:"type"` - Name string `json:"name,omitempty"` - NameRaw []byte `json:"name_raw,omitempty"` - Size int64 `json:"size,omitempty"` - Payload []byte `json:"payload"` // SegmentType stores payload here; FileType stores crc64 checksum here; - Position int `json:"position"` + Type EntryType `json:"type"` + Name string `json:"name,omitempty"` + NameRaw []byte `json:"name_raw,omitempty"` + Size int64 `json:"size,omitempty"` + Payload []byte `json:"payload"` // SegmentType stores payload here; FileType stores crc64 checksum here; + Position int `json:"position"` } // SetName will check name for valid UTF-8 string, and set the appropriate diff --git a/tar/storage/entry_test.go b/tar/storage/entry_test.go index 90d103e..4bbe9d9 100644 --- a/tar/storage/entry_test.go +++ b/tar/storage/entry_test.go @@ -9,23 +9,23 @@ import ( func TestEntries(t *testing.T) { e := Entries{ Entry{ - Type: SegmentType, + Type: SegmentEntry, Payload: []byte("y'all"), Position: 1, }, Entry{ - Type: SegmentType, + Type: SegmentEntry, Payload: []byte("doin"), Position: 3, }, Entry{ - Type: FileType, + Type: FileCheckEntry, Name: "./hurr.txt", Payload: []byte("deadbeef"), Position: 2, }, Entry{ - Type: SegmentType, + Type: SegmentEntry, Payload: []byte("how"), Position: 0, }, @@ -38,7 +38,7 @@ func TestEntries(t *testing.T) { func TestFile(t *testing.T) { f := Entry{ - Type: FileType, + Type: FileCheckEntry, Size: 100, Position: 2, } @@ -67,7 +67,7 @@ func TestFile(t *testing.T) { func TestFileRaw(t *testing.T) { f := Entry{ - Type: FileType, + Type: FileCheckEntry, Size: 100, Position: 2, } diff --git a/tar/storage/packer.go b/tar/storage/packer.go index aba6948..1eb2613 100644 --- a/tar/storage/packer.go +++ b/tar/storage/packer.go @@ -44,7 +44,7 @@ func (jup *jsonUnpacker) Next() (*Entry, error) { } // check for dup name - if e.Type == FileType { + if e.Type == FileCheckEntry { cName := filepath.Clean(e.GetName()) if _, ok := jup.seen[cName]; ok { return nil, ErrDuplicatePath @@ -55,8 +55,8 @@ func (jup *jsonUnpacker) Next() (*Entry, error) { return &e, err } -// NewJSONUnpacker provides an Unpacker that reads Entries (SegmentType and -// FileType) as a json document. +// NewJSONUnpacker provides an Unpacker that reads Entries (SegmentEntry and +// FileCheckEntry) as a json document. // // Each Entry read are expected to be delimited by new line. func NewJSONUnpacker(r io.Reader) Unpacker { @@ -85,7 +85,7 @@ func (jp *jsonPacker) AddEntry(e Entry) (int, error) { } // check early for dup name - if e.Type == FileType { + if e.Type == FileCheckEntry { cName := filepath.Clean(e.GetName()) if _, ok := jp.seen[cName]; ok { return -1, ErrDuplicatePath @@ -104,8 +104,8 @@ func (jp *jsonPacker) AddEntry(e Entry) (int, error) { return e.Position, nil } -// NewJSONPacker provides a Packer that writes each Entry (SegmentType and -// FileType) as a json document. +// NewJSONPacker provides a Packer that writes each Entry (SegmentEntry and +// FileCheckEntry) as a json document. // // The Entries are delimited by new line. func NewJSONPacker(w io.Writer) Packer { diff --git a/tar/storage/packer_test.go b/tar/storage/packer_test.go index 7d93371..8c1af88 100644 --- a/tar/storage/packer_test.go +++ b/tar/storage/packer_test.go @@ -12,17 +12,17 @@ import ( func TestDuplicateFail(t *testing.T) { e := []Entry{ Entry{ - Type: FileType, + Type: FileCheckEntry, Name: "./hurr.txt", Payload: []byte("abcde"), }, Entry{ - Type: FileType, + Type: FileCheckEntry, Name: "./hurr.txt", Payload: []byte("deadbeef"), }, Entry{ - Type: FileType, + Type: FileCheckEntry, Name: "hurr.txt", // slightly different path, same file though Payload: []byte("deadbeef"), }, @@ -45,20 +45,20 @@ func TestDuplicateFail(t *testing.T) { func TestJSONPackerUnpacker(t *testing.T) { e := []Entry{ Entry{ - Type: SegmentType, + Type: SegmentEntry, Payload: []byte("how"), }, Entry{ - Type: SegmentType, + Type: SegmentEntry, Payload: []byte("y'all"), }, Entry{ - Type: FileType, + Type: FileCheckEntry, Name: "./hurr.txt", Payload: []byte("deadbeef"), }, Entry{ - Type: SegmentType, + Type: SegmentEntry, Payload: []byte("doin"), }, } @@ -106,20 +106,20 @@ func TestJSONPackerUnpacker(t *testing.T) { func TestGzip(t *testing.T) { e := []Entry{ Entry{ - Type: SegmentType, + Type: SegmentEntry, Payload: []byte("how"), }, Entry{ - Type: SegmentType, + Type: SegmentEntry, Payload: []byte("y'all"), }, Entry{ - Type: FileType, + Type: FileCheckEntry, Name: "./hurr.txt", Payload: []byte("deadbeef"), }, Entry{ - Type: SegmentType, + Type: SegmentEntry, Payload: []byte("doin"), }, } diff --git a/tar/verify/headers.go b/tar/verify/headers.go new file mode 100644 index 0000000..b9ec61d --- /dev/null +++ b/tar/verify/headers.go @@ -0,0 +1,23 @@ +package verify + +import "time" + +// PosixHeader is the structure from a POSIX tar header, to be marshalled from +// the tar stream, and available for on-disk comparison and verification +type PosixHeader struct { + Name string `json:"name,omitempty"` + Mode uint32 `json:"mode,omitempty"` + UID uint32 `json:"uid,omitempty"` + GID uint32 `json:"gid,omitempty"` + Size int `json:"size,omitempty"` + Mtime time.Time `json:"mtime,omitempty"` + Checksum []byte `json:"chksum,omitempty"` + LinkName string `json:"linkname,omitempty"` + Magic []byte `json:"magic,omitempty"` + Version string `json:"version,omitempty"` + Uname string `json:"uname,omitempty"` + Gname string `json:"gname,omitempty"` + DevMajor int `json:"devmajor,omitempty"` + DevMinor int `json:"devminor,omitempty"` + Prefix string `json:"prefix,omitempty"` +} diff --git a/tar/verify/verify.go b/tar/verify/verify.go new file mode 100644 index 0000000..9037e0e --- /dev/null +++ b/tar/verify/verify.go @@ -0,0 +1,34 @@ +package verify + +import "fmt" + +// CheckType is how the on disk attributes will be verified against the +// recorded header information +type CheckType int + +// Check types for customizing how fuzzy or strict on-disk verification will be +// handled +const ( + CheckDigest CheckType = iota + CheckFileSize + CheckFileMode + CheckFileUser + CheckFileGroup + CheckFileMtime + CheckFileDevice + CheckFileLink + CheckFileCaps +) + +var ( + // DefaultChecks is the default for verfication steps against each + // storage.VerficationEntry + DefaultChecks = CheckDigest | CheckFileAttributes + // CheckFileAttributes are the group of file attribute checks done + CheckFileAttributes = CheckFileSize | CheckFileMode | CheckFileUser | + CheckFileGroup | CheckFileMtime | CheckFileDevice | CheckFileCaps | + CheckFileLink + + // ErrNotSupportedPlatform is when the platform does not support given features + ErrNotSupportedPlatform = fmt.Errorf("platform does not support this feature") +) diff --git a/tar/verify/xattrs_linux.go b/tar/verify/xattrs_linux.go new file mode 100644 index 0000000..46e1f8c --- /dev/null +++ b/tar/verify/xattrs_linux.go @@ -0,0 +1,114 @@ +package verify + +/* +Lgetxattr and Lsetxattr are copied directly from https://github.com/docker/docker + ./pkg/system/xattr_linux.go commit 7e420ad8502089e66ce0ade92bf70574f894f287 + Apache License Version 2.0, January 2004 https://www.apache.org/licenses/ + Copyright 2013-2015 Docker, Inc. +*/ + +import ( + "bytes" + "syscall" + "unsafe" +) + +/* +func main() { + for _, arg := range os.Args[1:] { + keys, err := Listxattr(arg) + if err != nil { + fmt.Println(err) + continue + } + if len(keys) > 0 { + fmt.Printf("%s : %q\n", arg, keys) + for _, key := range keys { + buf, err := Lgetxattr(arg, key) + if err != nil { + fmt.Printf(" ERROR: %s\n", err) + continue + } + fmt.Printf(" %s = %s\n", key, string(buf)) + } + } + } +} +*/ + +// Listxattr is a helper around the syscall.Listxattr +func Listxattr(path string) ([]string, error) { + buf := make([]byte, 1024) + sz, err := syscall.Listxattr(path, buf) + if err == syscall.ENODATA { + return nil, nil + } + if err == syscall.ERANGE && sz > 0 { + buf = make([]byte, sz) + sz, err = syscall.Listxattr(path, buf) + } + keys := []string{} + for _, key := range bytes.Split(bytes.TrimSpace(buf), []byte{0x0}) { + if string(key) != "" { + keys = append(keys, string(key)) + } + } + return keys, nil +} + +// Lgetxattr retrieves the value of the extended attribute identified by attr +// and associated with the given path in the file system. +// It will returns a nil slice and nil error if the xattr is not set. +func Lgetxattr(path string, attr string) ([]byte, error) { + pathBytes, err := syscall.BytePtrFromString(path) + if err != nil { + return nil, err + } + attrBytes, err := syscall.BytePtrFromString(attr) + if err != nil { + return nil, err + } + + dest := make([]byte, 128) + destBytes := unsafe.Pointer(&dest[0]) + sz, _, errno := syscall.Syscall6(syscall.SYS_LGETXATTR, uintptr(unsafe.Pointer(pathBytes)), uintptr(unsafe.Pointer(attrBytes)), uintptr(destBytes), uintptr(len(dest)), 0, 0) + if errno == syscall.ENODATA { + return nil, nil + } + if errno == syscall.ERANGE { + dest = make([]byte, sz) + destBytes := unsafe.Pointer(&dest[0]) + sz, _, errno = syscall.Syscall6(syscall.SYS_LGETXATTR, uintptr(unsafe.Pointer(pathBytes)), uintptr(unsafe.Pointer(attrBytes)), uintptr(destBytes), uintptr(len(dest)), 0, 0) + } + if errno != 0 { + return nil, errno + } + + return dest[:sz], nil +} + +var _zero uintptr + +// Lsetxattr sets the value of the extended attribute identified by attr +// and associated with the given path in the file system. +func Lsetxattr(path string, attr string, data []byte, flags int) error { + pathBytes, err := syscall.BytePtrFromString(path) + if err != nil { + return err + } + attrBytes, err := syscall.BytePtrFromString(attr) + if err != nil { + return err + } + var dataBytes unsafe.Pointer + if len(data) > 0 { + dataBytes = unsafe.Pointer(&data[0]) + } else { + dataBytes = unsafe.Pointer(&_zero) + } + _, _, errno := syscall.Syscall6(syscall.SYS_LSETXATTR, uintptr(unsafe.Pointer(pathBytes)), uintptr(unsafe.Pointer(attrBytes)), uintptr(dataBytes), uintptr(len(data)), uintptr(flags), 0) + if errno != 0 { + return errno + } + return nil +} diff --git a/tar/verify/xattrs_unsupported.go b/tar/verify/xattrs_unsupported.go new file mode 100644 index 0000000..e3ab0f5 --- /dev/null +++ b/tar/verify/xattrs_unsupported.go @@ -0,0 +1,13 @@ +// +build !linux + +package verify + +// Lgetxattr is not supported on platforms other than linux. +func Lgetxattr(path string, attr string) ([]byte, error) { + return nil, ErrNotSupportedPlatform +} + +// Lsetxattr is not supported on platforms other than linux. +func Lsetxattr(path string, attr string, data []byte, flags int) error { + return ErrNotSupportedPlatform +}