From 4619d4d8b75be9743d42ae9e3dd6f31e1dcbf007 Mon Sep 17 00:00:00 2001 From: Vincent Batts Date: Thu, 21 May 2015 11:14:05 -0400 Subject: [PATCH] dups: deduplication tool --- README.md | 33 +++++++++++++++++++++++++++++++++ cmd/slackware-sync/main.go | 14 ++++++++++---- 2 files changed, 43 insertions(+), 4 deletions(-) diff --git a/README.md b/README.md index eb3ea80..ab704bd 100644 --- a/README.md +++ b/README.md @@ -4,6 +4,39 @@ Random utilities of vbatts' being cleaned up, and published # Commands +## dups + +building a document of file checksum info, for a directory tree. Optionally +deduplicate the tree using hardlinks. + +### Install + + go get github.com/vbatts/freezing-octo-hipster/cmd/dups + +### Usage + + $ dups -h + Usage of dups: + -H=false: hardlink the duplicate files + -l="": load existing map from file + -q=false: less output + -s="hash-map.json": file to save map of file hashes to + +By default it scans the paths provided, and creates a JSON document of the file paths and their checksum: + + $ dups . + "/home/vbatts/src/vb/freezing-octo-hipster/.git/logs/refs/heads/master" is the same content as "/home/vbatts/src/vb/freezing-octo-hipster/.git/logs/HEAD" + "/home/vbatts/src/vb/freezing-octo-hipster/.git/refs/remotes/origin/master" is the same content as "/home/vbatts/src/vb/freezing-octo-hipster/.git/refs/heads/master" + "/home/vbatts/src/vb/freezing-octo-hipster/cmd/find-todos/main.go~" is the same content as "/home/vbatts/src/vb/freezing-octo-hipster/cmd/find-todos/main.go" + "/home/vbatts/src/vb/freezing-octo-hipster/cmd/slackware-sync/README.md~" is the same content as "/home/vbatts/src/vb/freezing-octo-hipster/cmd/slackware-sync/README.md" + "/home/vbatts/src/vb/freezing-octo-hipster/cmd/slackware-sync/main.go" is the same content as "/home/vbatts/src/vb/freezing-octo-hipster/cmd/slackware-sync/main.go~" + Savings of 0.005681mb + wrote "hash-map.json" + +With the `-H` flag, as duplicate files are found (files with matching checksum) +are encountered, hardlink it to the duplicate file. + + ## next-note Simple date formating for notes diff --git a/cmd/slackware-sync/main.go b/cmd/slackware-sync/main.go index cac7dc4..6e29ac9 100644 --- a/cmd/slackware-sync/main.go +++ b/cmd/slackware-sync/main.go @@ -3,11 +3,12 @@ package main import ( "flag" "fmt" - "github.com/BurntSushi/toml" "net/url" "os" "os/exec" "path" + + "github.com/BurntSushi/toml" ) func main() { @@ -19,6 +20,10 @@ func main() { os.Exit(1) } + if len(*flSyncDir) > 0 { + config.SyncDir = *flSyncDir + } + _, err = EnsureDirExists(config.SyncDir) if err != nil { fmt.Println(err) @@ -47,10 +52,10 @@ func main() { cmd.Stdout = os.Stdout } - err = cmd.Run() - if err != nil { + err = cmd.Run() + if err != nil { fmt.Fprintln(os.Stderr, err) - } + } } } @@ -82,6 +87,7 @@ type Mirror struct { } var ( + flSyncDir = flag.String("dir", "", "directory to sync to (this flag overrides the url in the configuration file)") flConfigFile = flag.String("c", path.Join(os.Getenv("HOME"), ".slackware-sync.toml"), "config file for the sync") flQuiet = flag.Bool("q", false, "less output") )