pkg/archive/archive.go

748 lines
19 KiB
Go
Raw Normal View History

package archive
import (
"bufio"
"bytes"
"compress/bzip2"
"compress/gzip"
"errors"
"fmt"
"io"
"io/ioutil"
"os"
"os/exec"
"path"
"path/filepath"
"strings"
"syscall"
"github.com/docker/docker/vendor/src/code.google.com/p/go/src/pkg/archive/tar"
log "github.com/Sirupsen/logrus"
"github.com/docker/docker/pkg/fileutils"
"github.com/docker/docker/pkg/pools"
"github.com/docker/docker/pkg/promise"
"github.com/docker/docker/pkg/system"
)
type (
Archive io.ReadCloser
ArchiveReader io.Reader
Compression int
TarOptions struct {
Includes []string
Excludes []string
Compression Compression
NoLchown bool
Name string
}
)
var (
ErrNotImplemented = errors.New("Function not implemented")
)
const (
Uncompressed Compression = iota
Bzip2
Gzip
Xz
)
func IsArchive(header []byte) bool {
compression := DetectCompression(header)
if compression != Uncompressed {
return true
}
r := tar.NewReader(bytes.NewBuffer(header))
_, err := r.Next()
return err == nil
}
func DetectCompression(source []byte) Compression {
for compression, m := range map[Compression][]byte{
Bzip2: {0x42, 0x5A, 0x68},
Gzip: {0x1F, 0x8B, 0x08},
Xz: {0xFD, 0x37, 0x7A, 0x58, 0x5A, 0x00},
} {
if len(source) < len(m) {
log.Debugf("Len too short")
continue
}
if bytes.Compare(m, source[:len(m)]) == 0 {
return compression
}
}
return Uncompressed
}
func xzDecompress(archive io.Reader) (io.ReadCloser, error) {
args := []string{"xz", "-d", "-c", "-q"}
return CmdStream(exec.Command(args[0], args[1:]...), archive)
}
func DecompressStream(archive io.Reader) (io.ReadCloser, error) {
p := pools.BufioReader32KPool
buf := p.Get(archive)
bs, err := buf.Peek(10)
if err != nil {
return nil, err
}
log.Debugf("[tar autodetect] n: %v", bs)
compression := DetectCompression(bs)
switch compression {
case Uncompressed:
readBufWrapper := p.NewReadCloserWrapper(buf, buf)
return readBufWrapper, nil
case Gzip:
gzReader, err := gzip.NewReader(buf)
if err != nil {
return nil, err
}
readBufWrapper := p.NewReadCloserWrapper(buf, gzReader)
return readBufWrapper, nil
case Bzip2:
bz2Reader := bzip2.NewReader(buf)
readBufWrapper := p.NewReadCloserWrapper(buf, bz2Reader)
return readBufWrapper, nil
case Xz:
xzReader, err := xzDecompress(buf)
if err != nil {
return nil, err
}
readBufWrapper := p.NewReadCloserWrapper(buf, xzReader)
return readBufWrapper, nil
default:
return nil, fmt.Errorf("Unsupported compression format %s", (&compression).Extension())
}
}
func CompressStream(dest io.WriteCloser, compression Compression) (io.WriteCloser, error) {
p := pools.BufioWriter32KPool
buf := p.Get(dest)
switch compression {
case Uncompressed:
writeBufWrapper := p.NewWriteCloserWrapper(buf, buf)
return writeBufWrapper, nil
case Gzip:
gzWriter := gzip.NewWriter(dest)
writeBufWrapper := p.NewWriteCloserWrapper(buf, gzWriter)
return writeBufWrapper, nil
case Bzip2, Xz:
// archive/bzip2 does not support writing, and there is no xz support at all
// However, this is not a problem as docker only currently generates gzipped tars
return nil, fmt.Errorf("Unsupported compression format %s", (&compression).Extension())
default:
return nil, fmt.Errorf("Unsupported compression format %s", (&compression).Extension())
}
}
func (compression *Compression) Extension() string {
switch *compression {
case Uncompressed:
return "tar"
case Bzip2:
return "tar.bz2"
case Gzip:
return "tar.gz"
case Xz:
return "tar.xz"
}
return ""
}
type tarAppender struct {
TarWriter *tar.Writer
Buffer *bufio.Writer
// for hardlink mapping
SeenFiles map[uint64]string
}
func (ta *tarAppender) addTarFile(path, name string) error {
fi, err := os.Lstat(path)
if err != nil {
return err
}
link := ""
if fi.Mode()&os.ModeSymlink != 0 {
if link, err = os.Readlink(path); err != nil {
return err
}
}
hdr, err := tar.FileInfoHeader(fi, link)
if err != nil {
return err
}
if fi.IsDir() && !strings.HasSuffix(name, "/") {
name = name + "/"
}
hdr.Name = name
var (
nlink uint32
inode uint64
)
if stat, ok := fi.Sys().(*syscall.Stat_t); ok {
nlink = uint32(stat.Nlink)
inode = uint64(stat.Ino)
// Currently go does not fill in the major/minors
if stat.Mode&syscall.S_IFBLK == syscall.S_IFBLK ||
stat.Mode&syscall.S_IFCHR == syscall.S_IFCHR {
hdr.Devmajor = int64(major(uint64(stat.Rdev)))
hdr.Devminor = int64(minor(uint64(stat.Rdev)))
}
}
// if it's a regular file and has more than 1 link,
// it's hardlinked, so set the type flag accordingly
if fi.Mode().IsRegular() && nlink > 1 {
// a link should have a name that it links too
// and that linked name should be first in the tar archive
if oldpath, ok := ta.SeenFiles[inode]; ok {
hdr.Typeflag = tar.TypeLink
hdr.Linkname = oldpath
hdr.Size = 0 // This Must be here for the writer math to add up!
} else {
ta.SeenFiles[inode] = name
}
}
capability, _ := system.Lgetxattr(path, "security.capability")
if capability != nil {
hdr.Xattrs = make(map[string]string)
hdr.Xattrs["security.capability"] = string(capability)
}
if err := ta.TarWriter.WriteHeader(hdr); err != nil {
return err
}
if hdr.Typeflag == tar.TypeReg {
file, err := os.Open(path)
if err != nil {
return err
}
ta.Buffer.Reset(ta.TarWriter)
defer ta.Buffer.Reset(nil)
_, err = io.Copy(ta.Buffer, file)
file.Close()
if err != nil {
return err
}
err = ta.Buffer.Flush()
if err != nil {
return err
}
}
return nil
}
func createTarFile(path, extractDir string, hdr *tar.Header, reader io.Reader, Lchown bool) error {
// hdr.Mode is in linux format, which we can use for sycalls,
// but for os.Foo() calls we need the mode converted to os.FileMode,
// so use hdrInfo.Mode() (they differ for e.g. setuid bits)
hdrInfo := hdr.FileInfo()
switch hdr.Typeflag {
case tar.TypeDir:
// Create directory unless it exists as a directory already.
// In that case we just want to merge the two
if fi, err := os.Lstat(path); !(err == nil && fi.IsDir()) {
if err := os.Mkdir(path, hdrInfo.Mode()); err != nil {
return err
}
}
case tar.TypeReg, tar.TypeRegA:
// Source is regular file
file, err := os.OpenFile(path, os.O_CREATE|os.O_WRONLY, hdrInfo.Mode())
if err != nil {
return err
}
if _, err := io.Copy(file, reader); err != nil {
file.Close()
return err
}
file.Close()
case tar.TypeBlock, tar.TypeChar, tar.TypeFifo:
mode := uint32(hdr.Mode & 07777)
switch hdr.Typeflag {
case tar.TypeBlock:
mode |= syscall.S_IFBLK
case tar.TypeChar:
mode |= syscall.S_IFCHR
case tar.TypeFifo:
mode |= syscall.S_IFIFO
}
if err := syscall.Mknod(path, mode, int(mkdev(hdr.Devmajor, hdr.Devminor))); err != nil {
return err
}
case tar.TypeLink:
if err := os.Link(filepath.Join(extractDir, hdr.Linkname), path); err != nil {
return err
}
case tar.TypeSymlink:
if err := os.Symlink(hdr.Linkname, path); err != nil {
return err
}
case tar.TypeXGlobalHeader:
log.Debugf("PAX Global Extended Headers found and ignored")
return nil
default:
return fmt.Errorf("Unhandled tar header type %d\n", hdr.Typeflag)
}
if err := os.Lchown(path, hdr.Uid, hdr.Gid); err != nil && Lchown {
return err
}
for key, value := range hdr.Xattrs {
if err := system.Lsetxattr(path, key, []byte(value), 0); err != nil {
return err
}
}
// There is no LChmod, so ignore mode for symlink. Also, this
// must happen after chown, as that can modify the file mode
if hdr.Typeflag != tar.TypeSymlink {
if err := os.Chmod(path, hdrInfo.Mode()); err != nil {
return err
}
}
ts := []syscall.Timespec{timeToTimespec(hdr.AccessTime), timeToTimespec(hdr.ModTime)}
// syscall.UtimesNano doesn't support a NOFOLLOW flag atm, and
if hdr.Typeflag != tar.TypeSymlink {
if err := system.UtimesNano(path, ts); err != nil && err != system.ErrNotSupportedPlatform {
return err
}
} else {
if err := system.LUtimesNano(path, ts); err != nil && err != system.ErrNotSupportedPlatform {
return err
}
}
return nil
}
// Tar creates an archive from the directory at `path`, and returns it as a
// stream of bytes.
func Tar(path string, compression Compression) (io.ReadCloser, error) {
return TarWithOptions(path, &TarOptions{Compression: compression})
}
func escapeName(name string) string {
escaped := make([]byte, 0)
for i, c := range []byte(name) {
if i == 0 && c == '/' {
continue
}
// all printable chars except "-" which is 0x2d
if (0x20 <= c && c <= 0x7E) && c != 0x2d {
escaped = append(escaped, c)
} else {
escaped = append(escaped, fmt.Sprintf("\\%03o", c)...)
}
}
return string(escaped)
}
// TarWithOptions creates an archive from the directory at `path`, only including files whose relative
// paths are included in `options.Includes` (if non-nil) or not in `options.Excludes`.
func TarWithOptions(srcPath string, options *TarOptions) (io.ReadCloser, error) {
pipeReader, pipeWriter := io.Pipe()
compressWriter, err := CompressStream(pipeWriter, options.Compression)
if err != nil {
return nil, err
}
go func() {
ta := &tarAppender{
TarWriter: tar.NewWriter(compressWriter),
Buffer: pools.BufioWriter32KPool.Get(nil),
SeenFiles: make(map[uint64]string),
}
// this buffer is needed for the duration of this piped stream
defer pools.BufioWriter32KPool.Put(ta.Buffer)
// In general we log errors here but ignore them because
// during e.g. a diff operation the container can continue
// mutating the filesystem and we can see transient errors
// from this
if options.Includes == nil {
options.Includes = []string{"."}
}
var renamedRelFilePath string // For when tar.Options.Name is set
for _, include := range options.Includes {
filepath.Walk(filepath.Join(srcPath, include), func(filePath string, f os.FileInfo, err error) error {
if err != nil {
log.Debugf("Tar: Can't stat file %s to tar: %s", srcPath, err)
return nil
}
relFilePath, err := filepath.Rel(srcPath, filePath)
Exclude `.wh..wh.*` AUFS metadata on layer export In an effort to make layer content 'stable' between import and export from two different graph drivers, we must resolve an issue where AUFS produces metadata files in its layers which other drivers explicitly ignore when importing. The issue presents itself like this: - Generate a layer using AUFS - On commit of that container, the new stored layer contains AUFS metadata files/dirs. The stored layer content has some tarsum value: '1234567' - `docker save` that image to a USB drive and `docker load` into another docker engine instance which uses another graph driver, say 'btrfs' - On load, this graph driver explicitly ignores any AUFS metadata that it encounters. The stored layer content now has some different tarsum value: 'abcdefg'. The only (apparent) useful aufs metadata to keep are the psuedo link files located at `/.wh..wh.plink/`. Thes files hold information at the RW layer about hard linked files between this layer and another layer. The other graph drivers make sure to copy up these psuedo linked files but I've tested out a few different situations and it seems that this is unnecessary (In my test, AUFS already copies up the other hard linked files to the RW layer). This changeset adds explicit exclusion of the AUFS metadata files and directories (NOTE: not the whiteout files!) on commit of a container using the AUFS storage driver. Also included is a change to the archive package. It now explicitly ignores the root directory from being included in the resulting tar archive for 2 reasons: 1) it's unnecessary. 2) It's another difference between what other graph drivers produce when exporting a layer to a tar archive. Docker-DCO-1.1-Signed-off-by: Josh Hawn <josh.hawn@docker.com> (github: jlhawn)
2014-10-28 00:23:50 +00:00
if err != nil || (relFilePath == "." && f.IsDir()) {
// Error getting relative path OR we are looking
// at the root path. Skip in both situations.
return nil
}
skip, err := fileutils.Matches(relFilePath, options.Excludes)
if err != nil {
log.Debugf("Error matching %s", relFilePath, err)
return err
}
if skip {
if f.IsDir() {
return filepath.SkipDir
}
return nil
}
// Rename the base resource
if options.Name != "" && filePath == srcPath+"/"+filepath.Base(relFilePath) {
renamedRelFilePath = relFilePath
}
// Set this to make sure the items underneath also get renamed
if options.Name != "" {
relFilePath = strings.Replace(relFilePath, renamedRelFilePath, options.Name, 1)
}
if err := ta.addTarFile(filePath, relFilePath); err != nil {
log.Debugf("Can't add file %s to tar: %s", srcPath, err)
}
return nil
})
}
// Make sure to check the error on Close.
if err := ta.TarWriter.Close(); err != nil {
log.Debugf("Can't close tar writer: %s", err)
}
if err := compressWriter.Close(); err != nil {
log.Debugf("Can't close compress writer: %s", err)
}
if err := pipeWriter.Close(); err != nil {
log.Debugf("Can't close pipe writer: %s", err)
}
}()
return pipeReader, nil
}
// Untar reads a stream of bytes from `archive`, parses it as a tar archive,
// and unpacks it into the directory at `path`.
// The archive may be compressed with one of the following algorithms:
// identity (uncompressed), gzip, bzip2, xz.
// FIXME: specify behavior when target path exists vs. doesn't exist.
func Untar(archive io.Reader, dest string, options *TarOptions) error {
if options == nil {
options = &TarOptions{}
}
if archive == nil {
return fmt.Errorf("Empty archive")
}
if options.Excludes == nil {
options.Excludes = []string{}
}
decompressedArchive, err := DecompressStream(archive)
if err != nil {
return err
}
defer decompressedArchive.Close()
tr := tar.NewReader(decompressedArchive)
trBuf := pools.BufioReader32KPool.Get(nil)
defer pools.BufioReader32KPool.Put(trBuf)
var dirs []*tar.Header
// Iterate through the files in the archive.
loop:
for {
hdr, err := tr.Next()
if err == io.EOF {
// end of tar archive
break
}
if err != nil {
return err
}
// Normalize name, for safety and for a simple is-root check
hdr.Name = filepath.Clean(hdr.Name)
for _, exclude := range options.Excludes {
if strings.HasPrefix(hdr.Name, exclude) {
continue loop
}
}
if !strings.HasSuffix(hdr.Name, "/") {
// Not the root directory, ensure that the parent directory exists
parent := filepath.Dir(hdr.Name)
parentPath := filepath.Join(dest, parent)
if _, err := os.Lstat(parentPath); err != nil && os.IsNotExist(err) {
err = os.MkdirAll(parentPath, 0777)
if err != nil {
return err
}
}
}
path := filepath.Join(dest, hdr.Name)
// If path exits we almost always just want to remove and replace it
// The only exception is when it is a directory *and* the file from
// the layer is also a directory. Then we want to merge them (i.e.
// just apply the metadata from the layer).
if fi, err := os.Lstat(path); err == nil {
if fi.IsDir() && hdr.Name == "." {
continue
}
if !(fi.IsDir() && hdr.Typeflag == tar.TypeDir) {
if err := os.RemoveAll(path); err != nil {
return err
}
}
}
trBuf.Reset(tr)
if err := createTarFile(path, dest, hdr, trBuf, !options.NoLchown); err != nil {
return err
}
// Directory mtimes must be handled at the end to avoid further
// file creation in them to modify the directory mtime
if hdr.Typeflag == tar.TypeDir {
dirs = append(dirs, hdr)
}
}
for _, hdr := range dirs {
path := filepath.Join(dest, hdr.Name)
ts := []syscall.Timespec{timeToTimespec(hdr.AccessTime), timeToTimespec(hdr.ModTime)}
if err := syscall.UtimesNano(path, ts); err != nil {
return err
}
}
return nil
}
// TarUntar is a convenience function which calls Tar and Untar, with
// the output of one piped into the other. If either Tar or Untar fails,
// TarUntar aborts and returns the error.
func TarUntar(src string, dst string) error {
log.Debugf("TarUntar(%s %s)", src, dst)
archive, err := TarWithOptions(src, &TarOptions{Compression: Uncompressed})
if err != nil {
return err
}
defer archive.Close()
return Untar(archive, dst, nil)
}
// UntarPath is a convenience function which looks for an archive
// at filesystem path `src`, and unpacks it at `dst`.
func UntarPath(src, dst string) error {
archive, err := os.Open(src)
if err != nil {
return err
}
defer archive.Close()
if err := Untar(archive, dst, nil); err != nil {
return err
}
return nil
}
// CopyWithTar creates a tar archive of filesystem path `src`, and
// unpacks it at filesystem path `dst`.
// The archive is streamed directly with fixed buffering and no
// intermediary disk IO.
//
func CopyWithTar(src, dst string) error {
srcSt, err := os.Stat(src)
if err != nil {
return err
}
if !srcSt.IsDir() {
return CopyFileWithTar(src, dst)
}
// Create dst, copy src's content into it
log.Debugf("Creating dest directory: %s", dst)
if err := os.MkdirAll(dst, 0755); err != nil && !os.IsExist(err) {
return err
}
log.Debugf("Calling TarUntar(%s, %s)", src, dst)
return TarUntar(src, dst)
}
// CopyFileWithTar emulates the behavior of the 'cp' command-line
// for a single file. It copies a regular file from path `src` to
// path `dst`, and preserves all its metadata.
//
// If `dst` ends with a trailing slash '/', the final destination path
// will be `dst/base(src)`.
func CopyFileWithTar(src, dst string) (err error) {
log.Debugf("CopyFileWithTar(%s, %s)", src, dst)
srcSt, err := os.Stat(src)
if err != nil {
return err
}
if srcSt.IsDir() {
return fmt.Errorf("Can't copy a directory")
}
// Clean up the trailing /
if dst[len(dst)-1] == '/' {
dst = path.Join(dst, filepath.Base(src))
}
// Create the holding directory if necessary
if err := os.MkdirAll(filepath.Dir(dst), 0700); err != nil && !os.IsExist(err) {
return err
}
r, w := io.Pipe()
errC := promise.Go(func() error {
defer w.Close()
srcF, err := os.Open(src)
if err != nil {
return err
}
defer srcF.Close()
hdr, err := tar.FileInfoHeader(srcSt, "")
if err != nil {
return err
}
hdr.Name = filepath.Base(dst)
tw := tar.NewWriter(w)
defer tw.Close()
if err := tw.WriteHeader(hdr); err != nil {
return err
}
if _, err := io.Copy(tw, srcF); err != nil {
return err
}
return nil
})
defer func() {
if er := <-errC; err != nil {
err = er
}
}()
return Untar(r, filepath.Dir(dst), nil)
}
// CmdStream executes a command, and returns its stdout as a stream.
// If the command fails to run or doesn't complete successfully, an error
// will be returned, including anything written on stderr.
func CmdStream(cmd *exec.Cmd, input io.Reader) (io.ReadCloser, error) {
if input != nil {
stdin, err := cmd.StdinPipe()
if err != nil {
return nil, err
}
// Write stdin if any
go func() {
io.Copy(stdin, input)
stdin.Close()
}()
}
stdout, err := cmd.StdoutPipe()
if err != nil {
return nil, err
}
stderr, err := cmd.StderrPipe()
if err != nil {
return nil, err
}
pipeR, pipeW := io.Pipe()
errChan := make(chan []byte)
// Collect stderr, we will use it in case of an error
go func() {
errText, e := ioutil.ReadAll(stderr)
if e != nil {
errText = []byte("(...couldn't fetch stderr: " + e.Error() + ")")
}
errChan <- errText
}()
// Copy stdout to the returned pipe
go func() {
_, err := io.Copy(pipeW, stdout)
if err != nil {
pipeW.CloseWithError(err)
}
errText := <-errChan
if err := cmd.Wait(); err != nil {
pipeW.CloseWithError(fmt.Errorf("%s: %s", err, errText))
} else {
pipeW.Close()
}
}()
// Run the command and return the pipe
if err := cmd.Start(); err != nil {
return nil, err
}
return pipeR, nil
}
// NewTempArchive reads the content of src into a temporary file, and returns the contents
// of that file as an archive. The archive can only be read once - as soon as reading completes,
// the file will be deleted.
func NewTempArchive(src Archive, dir string) (*TempArchive, error) {
f, err := ioutil.TempFile(dir, "")
if err != nil {
return nil, err
}
if _, err := io.Copy(f, src); err != nil {
return nil, err
}
if err = f.Sync(); err != nil {
return nil, err
}
if _, err := f.Seek(0, 0); err != nil {
return nil, err
}
st, err := f.Stat()
if err != nil {
return nil, err
}
size := st.Size()
return &TempArchive{f, size}, nil
}
type TempArchive struct {
*os.File
Size int64 // Pre-computed from Stat().Size() as a convenience
}
func (archive *TempArchive) Read(data []byte) (int, error) {
n, err := archive.File.Read(data)
if err != nil {
os.Remove(archive.File.Name())
}
return n, err
}