go-mtree/tar.go
Vincent Batts 8074df1973 *: refactoring to support streams
when creating a manifest from, or validating, a stream like a tar
archive, it requires thinking about some of the functions differently
than walking a directory tree.

This is the beginning of allowing for such features.

Signed-off-by: Vincent Batts <vbatts@hashbangbash.com>
2016-07-20 17:41:17 -04:00

185 lines
4.5 KiB
Go

package mtree
import (
"archive/tar"
"io"
"io/ioutil"
"log"
"os"
"path/filepath"
"strings"
)
// Streamer interface that wraps an io.ReadCloser with a function that will
// return it's Hierarchy
type Streamer interface {
io.ReadCloser
Hierarchy() (*DirectoryHierarchy, error)
}
// NewTarStreamer streams a tar archive and creates a file hierarchy based off
// of the tar metadata headers
func NewTarStreamer(r io.Reader, keywords []string) Streamer {
pR, pW := io.Pipe()
ts := &tarStream{
pipeReader: pR,
pipeWriter: pW,
creator: dhCreator{DH: &DirectoryHierarchy{}},
teeReader: io.TeeReader(r, pW),
tarReader: tar.NewReader(pR),
keywords: keywords,
}
go ts.readHeaders() // I don't like this
return ts
}
type tarStream struct {
creator dhCreator
pipeReader *io.PipeReader
pipeWriter *io.PipeWriter
teeReader io.Reader
tarReader *tar.Reader
keywords []string
err error
}
func (ts *tarStream) readHeaders() {
// We have to start with the directory we're in, and anything beyond these
// items is determined at the time a tar is extracted.
e := Entry{
Name: ".",
Keywords: []string{"size=0", "type=dir"},
}
ts.creator.curDir = &e
ts.creator.DH.Entries = append(ts.creator.DH.Entries, e)
for {
hdr, err := ts.tarReader.Next()
if err != nil {
ts.pipeReader.CloseWithError(err)
return
}
// Because the content of the file may need to be read by several
// KeywordFuncs, it needs to be an io.Seeker as well. So, just reading from
// ts.tarReader is not enough.
tmpFile, err := ioutil.TempFile("", "ts.payload.")
if err != nil {
ts.pipeReader.CloseWithError(err)
return
}
// for good measure
if err := tmpFile.Chmod(0600); err != nil {
tmpFile.Close()
os.Remove(tmpFile.Name())
ts.pipeReader.CloseWithError(err)
return
}
if _, err := io.Copy(tmpFile, ts.tarReader); err != nil {
tmpFile.Close()
os.Remove(tmpFile.Name())
ts.pipeReader.CloseWithError(err)
return
}
// Alright, it's either file or directory
e := Entry{
Name: filepath.Base(hdr.Name),
Pos: len(ts.creator.DH.Entries),
Type: RelativeType,
}
// now collect keywords on the file
for _, keyword := range ts.keywords {
if keyFunc, ok := KeywordFuncs[keyword]; ok {
val, err := keyFunc(hdr.Name, hdr.FileInfo(), tmpFile)
if err != nil {
ts.setErr(err)
}
e.Keywords = append(e.Keywords, val)
// don't forget to reset the reader
if _, err := tmpFile.Seek(0, 0); err != nil {
tmpFile.Close()
os.Remove(tmpFile.Name())
ts.pipeReader.CloseWithError(err)
return
}
}
}
tmpFile.Close()
os.Remove(tmpFile.Name())
// compare directories, to determine parent of the current entry
cd := compareDir(filepath.Dir(hdr.Name), ts.creator.curDir.Path())
switch {
case cd == sameDir:
e.Parent = ts.creator.curDir
if e.Parent != nil {
e.Parent.Children = append(e.Parent.Children, &e)
}
case cd == parentDir:
e.Parent = ts.creator.curDir.Parent
if e.Parent != nil {
e.Parent.Children = append(e.Parent.Children, &e)
}
}
if hdr.FileInfo().IsDir() {
ts.creator.curDir = &e
}
// TODO getting the parent child relationship of these entries!
if hdr.FileInfo().IsDir() {
log.Println(strings.Split(hdr.Name, "/"), strings.Split(ts.creator.curDir.Path(), "/"))
}
ts.creator.DH.Entries = append(ts.creator.DH.Entries, e)
// Now is the wacky part of building out the entries. Since we can not
// control how the archive was assembled, can only take in the order given.
// Using `/set` will be tough. Hopefully i can do the directory stepping
// with relative paths, but even then I may get a new directory, and not
// the files first, but its directories first. :-\
}
}
type relationship int
const (
unknownDir relationship = iota
sameDir
childDir
parentDir
)
func compareDir(curDir, prevDir string) relationship {
curDir = filepath.Clean(curDir)
prevDir = filepath.Clean(prevDir)
if curDir == prevDir {
return sameDir
}
if filepath.Dir(curDir) == prevDir {
return childDir
}
if curDir == filepath.Dir(prevDir) {
return parentDir
}
return unknownDir
}
func (ts *tarStream) setErr(err error) {
ts.err = err
}
func (ts *tarStream) Read(p []byte) (n int, err error) {
return ts.teeReader.Read(p)
}
func (ts *tarStream) Close() error {
return ts.pipeReader.Close()
}
func (ts *tarStream) Hierarchy() (*DirectoryHierarchy, error) {
if ts.err != nil && ts.err != io.EOF {
return nil, ts.err
}
return ts.creator.DH, nil
}