mirror of
https://github.com/vbatts/utils.git
synced 2024-12-04 14:35:41 +00:00
dups: utility to hunt for (and dedup) files
using hardlinking for dedup, otherwise just building a document of file checksum info for future reuse
This commit is contained in:
parent
8ca2170b0a
commit
5cbe83545d
1 changed files with 169 additions and 0 deletions
169
cmd/dups/main.go
Normal file
169
cmd/dups/main.go
Normal file
|
@ -0,0 +1,169 @@
|
|||
package main
|
||||
|
||||
import (
|
||||
"crypto/rand"
|
||||
"crypto/sha1"
|
||||
"encoding/json"
|
||||
"flag"
|
||||
"fmt"
|
||||
"io"
|
||||
"io/ioutil"
|
||||
"os"
|
||||
"path/filepath"
|
||||
"runtime"
|
||||
"sync"
|
||||
"time"
|
||||
)
|
||||
|
||||
var (
|
||||
flLoadMap = flag.String("l", "", "load existing map from file")
|
||||
flSaveMap = flag.String("s", "hash-map.json", "file to save map of file hashes to")
|
||||
flHardlink = flag.Bool("H", false, "hardlink the duplicate files")
|
||||
flQuiet = flag.Bool("q", false, "less output")
|
||||
nprocs = 1
|
||||
)
|
||||
|
||||
func init() {
|
||||
nprocs = runtime.NumCPU()
|
||||
runtime.GOMAXPROCS(nprocs)
|
||||
}
|
||||
|
||||
func main() {
|
||||
flag.Parse()
|
||||
for _, arg := range flag.Args() {
|
||||
savings := int64(0)
|
||||
found := map[string]string{}
|
||||
if len(*flLoadMap) > 0 {
|
||||
fh, err := os.Open(*flLoadMap)
|
||||
if err != nil {
|
||||
fmt.Fprintln(os.Stderr, err)
|
||||
os.Exit(1)
|
||||
}
|
||||
buf, err := ioutil.ReadAll(fh)
|
||||
if err != nil {
|
||||
fmt.Fprintln(os.Stderr, err)
|
||||
os.Exit(1)
|
||||
}
|
||||
if err = json.Unmarshal(buf, &found); err != nil {
|
||||
fmt.Fprintln(os.Stderr, err)
|
||||
os.Exit(1)
|
||||
}
|
||||
}
|
||||
workers := make(chan int, nprocs)
|
||||
mu := sync.Mutex{}
|
||||
err := filepath.Walk(arg, func(path string, info os.FileInfo, err error) error {
|
||||
/*
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
*/
|
||||
if !info.Mode().IsRegular() {
|
||||
return nil
|
||||
}
|
||||
workers <- 1
|
||||
go func() {
|
||||
defer func() { <-workers }()
|
||||
|
||||
fh, err := os.Open(path)
|
||||
if err != nil {
|
||||
fmt.Fprintln(os.Stderr, err, path)
|
||||
return
|
||||
}
|
||||
defer fh.Close()
|
||||
|
||||
h := sha1.New()
|
||||
if _, err = io.Copy(h, fh); err != nil {
|
||||
fmt.Fprintln(os.Stderr, err, path)
|
||||
return
|
||||
}
|
||||
sum := fmt.Sprintf("%x", h.Sum(nil))
|
||||
|
||||
// get the absolute filename
|
||||
p, err := filepath.Abs(path)
|
||||
if err != nil {
|
||||
fmt.Fprintln(os.Stderr, err, path)
|
||||
return
|
||||
}
|
||||
path = p
|
||||
|
||||
mu.Lock()
|
||||
defer mu.Unlock()
|
||||
if fpath, ok := found[sum]; ok && fpath != path {
|
||||
if !(*flQuiet) {
|
||||
fmt.Printf("%q is the same content as %q\n", path, fpath)
|
||||
}
|
||||
if *flHardlink {
|
||||
if err = SafeLink(fpath, path); err != nil {
|
||||
fmt.Fprintln(os.Stderr, err, path)
|
||||
return
|
||||
}
|
||||
fmt.Printf("linked %q to %q\n", path, fpath)
|
||||
}
|
||||
savings += info.Size()
|
||||
} else {
|
||||
found[sum] = path
|
||||
}
|
||||
}()
|
||||
return nil
|
||||
})
|
||||
if err != nil {
|
||||
fmt.Fprintln(os.Stderr, err)
|
||||
os.Exit(1)
|
||||
}
|
||||
for len(workers) > 0 {
|
||||
time.Sleep(5 * time.Microsecond)
|
||||
}
|
||||
fmt.Printf("Savings of %fmb\n", float64(savings)/1024.0/1024.0)
|
||||
fh, err := os.Create(*flSaveMap)
|
||||
if err != nil {
|
||||
fmt.Fprintln(os.Stderr, err)
|
||||
os.Exit(1)
|
||||
}
|
||||
buf, err := json.Marshal(found)
|
||||
if err != nil {
|
||||
fmt.Fprintln(os.Stderr, err)
|
||||
os.Exit(1)
|
||||
}
|
||||
_, err = fh.Write(buf)
|
||||
if err != nil {
|
||||
fmt.Fprintln(os.Stderr, err)
|
||||
os.Exit(1)
|
||||
}
|
||||
fh.Close()
|
||||
fmt.Fprintf(os.Stderr, "wrote %q\n", fh.Name())
|
||||
}
|
||||
}
|
||||
|
||||
// SafeLink overrides newname if it already exists. If there is an error in creating the link, the transaction is rolled back
|
||||
func SafeLink(oldname, newname string) error {
|
||||
var backupName string
|
||||
// check if newname exists
|
||||
if fi, err := os.Stat(newname); err == nil && fi != nil {
|
||||
// make a random name
|
||||
buf := make([]byte, 5)
|
||||
if _, err = rand.Read(buf); err != nil {
|
||||
return err
|
||||
}
|
||||
backupName = fmt.Sprintf("%s.%x", newname, buf)
|
||||
// move newname to the random name backupName
|
||||
if err = os.Rename(newname, backupName); err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
// hardlink oldname to newname
|
||||
if err := os.Link(oldname, newname); err != nil {
|
||||
// if that failed, and there is a backupName
|
||||
if len(backupName) > 0 {
|
||||
// then move back the backup
|
||||
if err = os.Rename(backupName, newname); err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
return err
|
||||
}
|
||||
// remove the backupName
|
||||
if len(backupName) > 0 {
|
||||
os.Remove(backupName)
|
||||
}
|
||||
return nil
|
||||
}
|
Loading…
Reference in a new issue