From d820978518ede3063bf6c832a540dcb7c7794321 Mon Sep 17 00:00:00 2001 From: Vincent Batts Date: Thu, 8 Sep 2016 14:13:53 -0400 Subject: [PATCH] base: actually do the hardlink of dupes Signed-off-by: Vincent Batts --- base/base.go | 48 +++++++++++++++++++++++++++++++++++++++++++++-- base/base_test.go | 14 ++++++++++++++ main.go | 10 +++++++++- 3 files changed, 69 insertions(+), 3 deletions(-) diff --git a/base/base.go b/base/base.go index 0002bd7..3675653 100644 --- a/base/base.go +++ b/base/base.go @@ -2,6 +2,7 @@ package base import ( "crypto" + "crypto/rand" "fmt" "io" "io/ioutil" @@ -125,6 +126,49 @@ func (b Base) LinkFrom(src, sum string) error { return os.Link(src, b.blobPath(sum)) } +func randomString() (string, error) { + // make a random name + buf := make([]byte, 10) + if _, err := rand.Read(buf); err != nil { + return "", err + } + return fmt.Sprintf("%x", buf), nil +} + +// SafeLink overrides newname if it already exists. If there is an error in creating the link, the transaction is rolled back +func SafeLink(oldname, newname string) error { + var backupName string + // check if newname exists + if fi, err := os.Stat(newname); err == nil && fi != nil { + // make a random name + buf := make([]byte, 5) + if _, err = rand.Read(buf); err != nil { + return err + } + backupName = fmt.Sprintf("%s.%x", newname, buf) + // move newname to the random name backupName + if err = os.Rename(newname, backupName); err != nil { + return err + } + } + // hardlink oldname to newname + if err := os.Link(oldname, newname); err != nil { + // if that failed, and there is a backupName + if len(backupName) > 0 { + // then move back the backup + if err = os.Rename(backupName, newname); err != nil { + return err + } + } + return err + } + // remove the backupName + if len(backupName) > 0 { + os.Remove(backupName) + } + return nil +} + // Hard link the file for sum to the path at dest func (b Base) LinkTo(dest, sum string) error { if err := os.MkdirAll(filepath.Dir(dest), 0755); err != nil && !os.IsExist(err) { @@ -133,8 +177,8 @@ func (b Base) LinkTo(dest, sum string) error { err := os.Link(b.blobPath(sum), dest) if err != nil && os.IsExist(err) { if !b.SameFile(sum, dest) { - // XXX - log.Printf("Would clobber %q with %q", dest, b.blobPath(sum)) + SafeLink(b.blobPath(sum), dest) + log.Printf("dedupped %q with %q", dest, b.blobPath(sum)) } } else if err != nil { return err diff --git a/base/base_test.go b/base/base_test.go index f7ce4f6..41ec2d6 100644 --- a/base/base_test.go +++ b/base/base_test.go @@ -17,6 +17,20 @@ func TestSumPath(t *testing.T) { } } +func TestRand(t *testing.T) { + randmap := map[string]bool{} + for i := 0; i < 100; i++ { + r, err := randomString() + if err != nil { + t.Fatal(err) + } + if _, ok := randmap[r]; ok { + t.Errorf("expected no duplicates, but %q is a dup random string", r) + } + randmap[r] = true + } +} + func TestGetPut(t *testing.T) { var ( srcDir, destDir string diff --git a/main.go b/main.go index cbb2493..995c94b 100644 --- a/main.go +++ b/main.go @@ -18,6 +18,7 @@ var ( flCipher = flag.String("c", "sha1", "block cipher to use (sha1, or sha256)") flWorkers = flag.Int("w", 2, "workers to do summing") flNoop = flag.Bool("noop", false, "don't do any moving or linking") + flDebug = flag.Bool("debug", false, "enable debug output") ) func init() { @@ -30,6 +31,10 @@ func init() { func main() { flag.Parse() + if *flDebug { + os.Setenv("DEBUG", "1") + } + // TODO the *flCipher has not been checked yet, and would cause the directory to get created ourbase, err := base.NewBase(*flVarBase, *flCipher) if err != nil { @@ -61,11 +66,14 @@ func main() { if *flNoop { fmt.Printf("%s [%d] %s\n", fi.Hash, fi.Size, fi.Path) } else { + if os.Getenv("DEBUG") != "" { + fmt.Printf("%q: %q\n", fi.Path, ourbase.HasBlob(fi.Hash)) + } if ourbase.HasBlob(fi.Hash) && !ourbase.SameFile(fi.Hash, fi.Path) { if err := ourbase.LinkTo(fi.Path, fi.Hash); err != nil { log.Println("ERROR-1", err) } - } else { + } else if !ourbase.HasBlob(fi.Hash) { if err := ourbase.LinkFrom(fi.Path, fi.Hash); err != nil { log.Println("ERROR-2", err) }