From b17f754fff4bd05316caac78ea7703bfac09ba0f Mon Sep 17 00:00:00 2001 From: Vincent Batts Date: Mon, 15 Sep 2014 14:45:53 -0400 Subject: [PATCH] archive: preserve hardlinks in Tar and Untar * integration test for preserving hardlinks Signed-off-by: Vincent Batts Signed-off-by: Vincent Batts --- archive/archive.go | 52 +++++++++++++++++++++++++++--------- archive/archive_test.go | 58 +++++++++++++++++++++++++++++++++++++++++ archive/changes.go | 18 ++++++++----- 3 files changed, 108 insertions(+), 20 deletions(-) diff --git a/archive/archive.go b/archive/archive.go index 9c4d881..dd14b77 100644 --- a/archive/archive.go +++ b/archive/archive.go @@ -153,7 +153,15 @@ func (compression *Compression) Extension() string { return "" } -func addTarFile(path, name string, tw *tar.Writer, twBuf *bufio.Writer) error { +type tarAppender struct { + TarWriter *tar.Writer + Buffer *bufio.Writer + + // for hardlink mapping + SeenFiles map[uint64]string +} + +func (ta *tarAppender) addTarFile(path, name string) error { fi, err := os.Lstat(path) if err != nil { return err @@ -188,13 +196,28 @@ func addTarFile(path, name string, tw *tar.Writer, twBuf *bufio.Writer) error { } + // if it's a regular file and has more than 1 link, + // it's hardlinked, so set the type flag accordingly + if fi.Mode().IsRegular() && stat.Nlink > 1 { + // a link should have a name that it links too + // and that linked name should be first in the tar archive + ino := uint64(stat.Ino) + if oldpath, ok := ta.SeenFiles[ino]; ok { + hdr.Typeflag = tar.TypeLink + hdr.Linkname = oldpath + hdr.Size = 0 // This Must be here for the writer math to add up! + } else { + ta.SeenFiles[ino] = name + } + } + capability, _ := system.Lgetxattr(path, "security.capability") if capability != nil { hdr.Xattrs = make(map[string]string) hdr.Xattrs["security.capability"] = string(capability) } - if err := tw.WriteHeader(hdr); err != nil { + if err := ta.TarWriter.WriteHeader(hdr); err != nil { return err } @@ -204,17 +227,17 @@ func addTarFile(path, name string, tw *tar.Writer, twBuf *bufio.Writer) error { return err } - twBuf.Reset(tw) - _, err = io.Copy(twBuf, file) + ta.Buffer.Reset(ta.TarWriter) + _, err = io.Copy(ta.Buffer, file) file.Close() if err != nil { return err } - err = twBuf.Flush() + err = ta.Buffer.Flush() if err != nil { return err } - twBuf.Reset(nil) + ta.Buffer.Reset(nil) } return nil @@ -345,9 +368,15 @@ func TarWithOptions(srcPath string, options *TarOptions) (io.ReadCloser, error) return nil, err } - tw := tar.NewWriter(compressWriter) - go func() { + ta := &tarAppender{ + TarWriter: tar.NewWriter(compressWriter), + Buffer: pools.BufioWriter32KPool.Get(nil), + SeenFiles: make(map[uint64]string), + } + // this buffer is needed for the duration of this piped stream + defer pools.BufioWriter32KPool.Put(ta.Buffer) + // In general we log errors here but ignore them because // during e.g. a diff operation the container can continue // mutating the filesystem and we can see transient errors @@ -357,9 +386,6 @@ func TarWithOptions(srcPath string, options *TarOptions) (io.ReadCloser, error) options.Includes = []string{"."} } - twBuf := pools.BufioWriter32KPool.Get(nil) - defer pools.BufioWriter32KPool.Put(twBuf) - var renamedRelFilePath string // For when tar.Options.Name is set for _, include := range options.Includes { filepath.Walk(filepath.Join(srcPath, include), func(filePath string, f os.FileInfo, err error) error { @@ -395,7 +421,7 @@ func TarWithOptions(srcPath string, options *TarOptions) (io.ReadCloser, error) relFilePath = strings.Replace(relFilePath, renamedRelFilePath, options.Name, 1) } - if err := addTarFile(filePath, relFilePath, tw, twBuf); err != nil { + if err := ta.addTarFile(filePath, relFilePath); err != nil { log.Debugf("Can't add file %s to tar: %s", srcPath, err) } return nil @@ -403,7 +429,7 @@ func TarWithOptions(srcPath string, options *TarOptions) (io.ReadCloser, error) } // Make sure to check the error on Close. - if err := tw.Close(); err != nil { + if err := ta.TarWriter.Close(); err != nil { log.Debugf("Can't close tar writer: %s", err) } if err := compressWriter.Close(); err != nil { diff --git a/archive/archive_test.go b/archive/archive_test.go index 900fff5..3516aca 100644 --- a/archive/archive_test.go +++ b/archive/archive_test.go @@ -249,6 +249,64 @@ func TestUntarUstarGnuConflict(t *testing.T) { } } +func TestTarWithHardLink(t *testing.T) { + origin, err := ioutil.TempDir("", "docker-test-tar-hardlink") + if err != nil { + t.Fatal(err) + } + defer os.RemoveAll(origin) + if err := ioutil.WriteFile(path.Join(origin, "1"), []byte("hello world"), 0700); err != nil { + t.Fatal(err) + } + if err := os.Link(path.Join(origin, "1"), path.Join(origin, "2")); err != nil { + t.Fatal(err) + } + + var i1, i2 uint64 + if i1, err = getNlink(path.Join(origin, "1")); err != nil { + t.Fatal(err) + } + // sanity check that we can hardlink + if i1 != 2 { + t.Skipf("skipping since hardlinks don't work here; expected 2 links, got %d", i1) + } + + dest, err := ioutil.TempDir("", "docker-test-tar-hardlink-dest") + if err != nil { + t.Fatal(err) + } + defer os.RemoveAll(dest) + + // we'll do this in two steps to separate failure + fh, err := Tar(origin, Uncompressed) + if err != nil { + t.Fatal(err) + } + + // ensure we can read the whole thing with no error, before writing back out + buf, err := ioutil.ReadAll(fh) + if err != nil { + t.Fatal(err) + } + + bRdr := bytes.NewReader(buf) + err = Untar(bRdr, dest, &TarOptions{Compression: Uncompressed}) + if err != nil { + t.Fatal(err) + } + + if i1, err = getInode(path.Join(dest, "1")); err != nil { + t.Fatal(err) + } + if i2, err = getInode(path.Join(dest, "2")); err != nil { + t.Fatal(err) + } + + if i1 != i2 { + t.Errorf("expected matching inodes, but got %d and %d", i1, i2) + } +} + func getNlink(path string) (uint64, error) { stat, err := os.Stat(path) if err != nil { diff --git a/archive/changes.go b/archive/changes.go index 557b5db..3e9ab45 100644 --- a/archive/changes.go +++ b/archive/changes.go @@ -368,11 +368,15 @@ func minor(device uint64) uint64 { // ExportChanges produces an Archive from the provided changes, relative to dir. func ExportChanges(dir string, changes []Change) (Archive, error) { reader, writer := io.Pipe() - tw := tar.NewWriter(writer) - go func() { - twBuf := pools.BufioWriter32KPool.Get(nil) - defer pools.BufioWriter32KPool.Put(twBuf) + ta := &tarAppender{ + TarWriter: tar.NewWriter(writer), + Buffer: pools.BufioWriter32KPool.Get(nil), + SeenFiles: make(map[uint64]string), + } + // this buffer is needed for the duration of this piped stream + defer pools.BufioWriter32KPool.Put(ta.Buffer) + // In general we log errors here but ignore them because // during e.g. a diff operation the container can continue // mutating the filesystem and we can see transient errors @@ -390,19 +394,19 @@ func ExportChanges(dir string, changes []Change) (Archive, error) { AccessTime: timestamp, ChangeTime: timestamp, } - if err := tw.WriteHeader(hdr); err != nil { + if err := ta.TarWriter.WriteHeader(hdr); err != nil { log.Debugf("Can't write whiteout header: %s", err) } } else { path := filepath.Join(dir, change.Path) - if err := addTarFile(path, change.Path[1:], tw, twBuf); err != nil { + if err := ta.addTarFile(path, change.Path[1:]); err != nil { log.Debugf("Can't add file %s to tar: %s", path, err) } } } // Make sure to check the error on Close. - if err := tw.Close(); err != nil { + if err := ta.TarWriter.Close(); err != nil { log.Debugf("Can't close layer: %s", err) } writer.Close()