mirror of
https://github.com/vbatts/dedupe-linker.git
synced 2024-12-28 01:26:33 +00:00
commit
7271153d2a
9 changed files with 300 additions and 119 deletions
23
base/base.go
23
base/base.go
|
@ -14,22 +14,24 @@ import (
|
||||||
"github.com/vbatts/dedupe-linker/file"
|
"github.com/vbatts/dedupe-linker/file"
|
||||||
)
|
)
|
||||||
|
|
||||||
|
// NewBase populates the directories needed in a dedupe-base directory
|
||||||
func NewBase(path string, hashName string) (*Base, error) {
|
func NewBase(path string, hashName string) (*Base, error) {
|
||||||
root := filepath.Join(path, "dedup")
|
|
||||||
for _, p := range []string{"blobs/" + hashName, "state", "tmp"} {
|
for _, p := range []string{"blobs/" + hashName, "state", "tmp"} {
|
||||||
if err := os.MkdirAll(filepath.Join(root, p), 0755); err != nil && !os.IsExist(err) {
|
if err := os.MkdirAll(filepath.Join(path, p), 0755); err != nil && !os.IsExist(err) {
|
||||||
return nil, err
|
return nil, err
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
return &Base{Path: root, HashName: hashName, Hash: cryptomap.DetermineHash(hashName)}, nil
|
return &Base{Path: path, HashName: hashName, Hash: cryptomap.DetermineHash(hashName)}, nil
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Base is the destination for all hardlinks. Where stored objects are in a content addressible tree.
|
||||||
type Base struct {
|
type Base struct {
|
||||||
Path string
|
Path string
|
||||||
HashName string
|
HashName string
|
||||||
Hash crypto.Hash
|
Hash crypto.Hash
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Stat provides the os.FileInfo for the object of `sum` address
|
||||||
func (b Base) Stat(sum string) (os.FileInfo, error) {
|
func (b Base) Stat(sum string) (os.FileInfo, error) {
|
||||||
return os.Stat(b.blobPath(sum))
|
return os.Stat(b.blobPath(sum))
|
||||||
}
|
}
|
||||||
|
@ -41,12 +43,15 @@ func (b Base) blobPath(sum string) string {
|
||||||
return filepath.Join(b.Path, "blobs", b.HashName, sum[0:2], sum)
|
return filepath.Join(b.Path, "blobs", b.HashName, sum[0:2], sum)
|
||||||
}
|
}
|
||||||
|
|
||||||
type ReaderSeekerCloser interface {
|
// ReadSeekCloser is like an io.ReadCloser, but can Seek too
|
||||||
|
type ReadSeekCloser interface {
|
||||||
io.Reader
|
io.Reader
|
||||||
io.Seeker
|
io.Seeker
|
||||||
io.Closer
|
io.Closer
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// SameFile checks whether the object of `sum` address, and `path` file path are the same file.
|
||||||
|
// This checks by inode and device.
|
||||||
func (b Base) SameFile(sum, path string) bool {
|
func (b Base) SameFile(sum, path string) bool {
|
||||||
var (
|
var (
|
||||||
bInode, dInode uint64
|
bInode, dInode uint64
|
||||||
|
@ -66,7 +71,7 @@ func (b Base) SameFile(sum, path string) bool {
|
||||||
}
|
}
|
||||||
|
|
||||||
// GetBlob store the content from src, for the sum and hashType
|
// GetBlob store the content from src, for the sum and hashType
|
||||||
func (b Base) GetBlob(sum string) (ReaderSeekerCloser, error) {
|
func (b Base) GetBlob(sum string) (ReadSeekCloser, error) {
|
||||||
return os.Open(b.blobPath(sum))
|
return os.Open(b.blobPath(sum))
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -118,7 +123,8 @@ func (b Base) tmpFile() (*os.File, error) {
|
||||||
return ioutil.TempFile(filepath.Join(b.Path, "tmp"), "put")
|
return ioutil.TempFile(filepath.Join(b.Path, "tmp"), "put")
|
||||||
}
|
}
|
||||||
|
|
||||||
// Hard link the file from src to the blob for sum
|
// LinkFrom make a hard link the file from src to the blob of address `sum`.
|
||||||
|
// TODO this function is going away, because it makes no assessment of the checksum of `src`
|
||||||
func (b Base) LinkFrom(src, sum string) error {
|
func (b Base) LinkFrom(src, sum string) error {
|
||||||
if err := os.MkdirAll(filepath.Dir(b.blobPath(sum)), 0756); err != nil && !os.IsExist(err) {
|
if err := os.MkdirAll(filepath.Dir(b.blobPath(sum)), 0756); err != nil && !os.IsExist(err) {
|
||||||
return err
|
return err
|
||||||
|
@ -135,7 +141,8 @@ func randomString() (string, error) {
|
||||||
return fmt.Sprintf("%x", buf), nil
|
return fmt.Sprintf("%x", buf), nil
|
||||||
}
|
}
|
||||||
|
|
||||||
// SafeLink overrides newname if it already exists. If there is an error in creating the link, the transaction is rolled back
|
// SafeLink overrides newname if it already exists. If there is an error in
|
||||||
|
// creating the link, the transaction is rolled back
|
||||||
func SafeLink(oldname, newname string) error {
|
func SafeLink(oldname, newname string) error {
|
||||||
var backupName string
|
var backupName string
|
||||||
// check if newname exists
|
// check if newname exists
|
||||||
|
@ -169,7 +176,7 @@ func SafeLink(oldname, newname string) error {
|
||||||
return nil
|
return nil
|
||||||
}
|
}
|
||||||
|
|
||||||
// Hard link the file for sum to the path at dest
|
// LinkTo makes a hard link the file of address `sum` to the path at `dest`
|
||||||
func (b Base) LinkTo(dest, sum string) error {
|
func (b Base) LinkTo(dest, sum string) error {
|
||||||
if err := os.MkdirAll(filepath.Dir(dest), 0755); err != nil && !os.IsExist(err) {
|
if err := os.MkdirAll(filepath.Dir(dest), 0755); err != nil && !os.IsExist(err) {
|
||||||
return err
|
return err
|
||||||
|
|
|
@ -2,7 +2,6 @@ package base
|
||||||
|
|
||||||
import (
|
import (
|
||||||
"bytes"
|
"bytes"
|
||||||
"fmt"
|
|
||||||
"io/ioutil"
|
"io/ioutil"
|
||||||
"os"
|
"os"
|
||||||
"path"
|
"path"
|
||||||
|
@ -50,7 +49,7 @@ func TestGetPut(t *testing.T) {
|
||||||
t.Fatal(err)
|
t.Fatal(err)
|
||||||
}
|
}
|
||||||
|
|
||||||
rHash := "deadbeaf"
|
rHash := "8f074e76e82ae6156c451019840a6f857bbe5157"
|
||||||
rMsg := "this is the dead beef"
|
rMsg := "this is the dead beef"
|
||||||
|
|
||||||
r := bytes.NewReader([]byte(rMsg))
|
r := bytes.NewReader([]byte(rMsg))
|
||||||
|
@ -58,12 +57,18 @@ func TestGetPut(t *testing.T) {
|
||||||
if err != nil {
|
if err != nil {
|
||||||
t.Error(err)
|
t.Error(err)
|
||||||
}
|
}
|
||||||
|
if sum != rHash {
|
||||||
|
t.Errorf("expected %q; got %q", rHash, sum)
|
||||||
|
}
|
||||||
|
|
||||||
fi, err := b.Stat(rHash)
|
fi, err := b.Stat(rHash)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
t.Error(err)
|
t.Error(err)
|
||||||
}
|
}
|
||||||
fmt.Printf("%#v\n", fi.Sys())
|
if fi == nil {
|
||||||
|
t.Fatal("did not find the blob " + rHash)
|
||||||
|
}
|
||||||
|
//fmt.Printf("%#v\n", fi.Sys())
|
||||||
|
|
||||||
if err = b.LinkTo(path.Join(srcDir, "beef1.txt"), rHash); err != nil {
|
if err = b.LinkTo(path.Join(srcDir, "beef1.txt"), rHash); err != nil {
|
||||||
t.Error(err)
|
t.Error(err)
|
||||||
|
@ -72,7 +77,10 @@ func TestGetPut(t *testing.T) {
|
||||||
if err != nil {
|
if err != nil {
|
||||||
t.Error(err)
|
t.Error(err)
|
||||||
}
|
}
|
||||||
fmt.Printf("%#v\n", fi2.Sys())
|
if fi2 == nil {
|
||||||
|
t.Fatal("did not find the linked file " + path.Join(srcDir, "beef1.txt"))
|
||||||
|
}
|
||||||
|
//fmt.Printf("%#v\n", fi2.Sys())
|
||||||
|
|
||||||
if err = b.LinkTo(path.Join(srcDir, "beef1.txt"), rHash); err != nil && !os.IsExist(err) {
|
if err = b.LinkTo(path.Join(srcDir, "beef1.txt"), rHash); err != nil && !os.IsExist(err) {
|
||||||
t.Error(err)
|
t.Error(err)
|
||||||
|
|
61
base/findbase.go
Normal file
61
base/findbase.go
Normal file
|
@ -0,0 +1,61 @@
|
||||||
|
package base
|
||||||
|
|
||||||
|
import (
|
||||||
|
"io/ioutil"
|
||||||
|
"os"
|
||||||
|
"path/filepath"
|
||||||
|
"syscall"
|
||||||
|
|
||||||
|
"github.com/vbatts/dedupe-linker/file"
|
||||||
|
)
|
||||||
|
|
||||||
|
// FindBase steps up the directory tree to find the top-level that is still on
|
||||||
|
// the same device as the path provided
|
||||||
|
func FindBase(path string) (string, error) {
|
||||||
|
stat, err := os.Lstat(path)
|
||||||
|
if err != nil {
|
||||||
|
return "", err
|
||||||
|
}
|
||||||
|
if stat.IsDir() {
|
||||||
|
return findBaseInfo(stat)
|
||||||
|
}
|
||||||
|
|
||||||
|
return FindBase(filepath.Dir(path))
|
||||||
|
}
|
||||||
|
|
||||||
|
func findBaseInfo(stat os.FileInfo) (string, error) {
|
||||||
|
dirstat, err := os.Lstat(filepath.Dir(stat.Name()))
|
||||||
|
if err != nil {
|
||||||
|
return "", err
|
||||||
|
}
|
||||||
|
if stat.Name() == dirstat.Name() {
|
||||||
|
return stat.Name(), nil
|
||||||
|
}
|
||||||
|
|
||||||
|
if sameDevice(stat, dirstat) {
|
||||||
|
return findBaseInfo(dirstat)
|
||||||
|
}
|
||||||
|
return stat.Name(), nil
|
||||||
|
}
|
||||||
|
|
||||||
|
func hasPermission(path string) bool {
|
||||||
|
stat, err := os.Lstat(path)
|
||||||
|
if err != nil {
|
||||||
|
return false
|
||||||
|
}
|
||||||
|
if !stat.IsDir() {
|
||||||
|
path = filepath.Dir(path)
|
||||||
|
}
|
||||||
|
fh, err := ioutil.TempFile(path, "perm.test.")
|
||||||
|
if err != nil {
|
||||||
|
return false
|
||||||
|
}
|
||||||
|
os.Remove(fh.Name())
|
||||||
|
return true
|
||||||
|
}
|
||||||
|
|
||||||
|
func sameDevice(file1, file2 os.FileInfo) bool {
|
||||||
|
sys1 := file1.Sys().(*syscall.Stat_t)
|
||||||
|
sys2 := file2.Sys().(*syscall.Stat_t)
|
||||||
|
return ((file.MajorDev(sys1.Dev) == file.MajorDev(sys2.Dev)) && (file.MinorDev(sys1.Dev) == file.MinorDev(sys2.Dev)))
|
||||||
|
}
|
76
base/findbase_test.go
Normal file
76
base/findbase_test.go
Normal file
|
@ -0,0 +1,76 @@
|
||||||
|
package base
|
||||||
|
|
||||||
|
import (
|
||||||
|
"io/ioutil"
|
||||||
|
"os"
|
||||||
|
"testing"
|
||||||
|
)
|
||||||
|
|
||||||
|
func TestHasPermission(t *testing.T) {
|
||||||
|
if !hasPermission("/tmp") {
|
||||||
|
t.Error("expected to have permission to /tmp, but did not")
|
||||||
|
}
|
||||||
|
|
||||||
|
if hasPermission("/") {
|
||||||
|
t.Error("expected to not have permission to /, but did")
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestSameDev(t *testing.T) {
|
||||||
|
file1, err := ioutil.TempFile("", "test")
|
||||||
|
if err != nil {
|
||||||
|
t.Fatal(err)
|
||||||
|
}
|
||||||
|
defer file1.Close()
|
||||||
|
file2, err := ioutil.TempFile("", "test")
|
||||||
|
if err != nil {
|
||||||
|
t.Fatal(err)
|
||||||
|
}
|
||||||
|
defer file2.Close()
|
||||||
|
|
||||||
|
stat1, err := file1.Stat()
|
||||||
|
if err != nil {
|
||||||
|
t.Fatal(err)
|
||||||
|
}
|
||||||
|
stat2, err := file2.Stat()
|
||||||
|
if err != nil {
|
||||||
|
t.Fatal(err)
|
||||||
|
}
|
||||||
|
|
||||||
|
if !sameDevice(stat1, stat2) {
|
||||||
|
t.Errorf("expected the two files to be on same device. But %q and %q are not", file1.Name(), file2.Name())
|
||||||
|
} else {
|
||||||
|
os.Remove(stat1.Name())
|
||||||
|
os.Remove(stat2.Name())
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// perhaps this is naive. Travis' /tmp is on the same device and not sure how to request it be tmpfs w/o needing sudo
|
||||||
|
func testNotSameDev(t *testing.T) {
|
||||||
|
file1, err := ioutil.TempFile("/tmp", "test")
|
||||||
|
if err != nil {
|
||||||
|
t.Fatal(err)
|
||||||
|
}
|
||||||
|
defer file1.Close()
|
||||||
|
file2, err := ioutil.TempFile(os.Getenv("HOME"), "test")
|
||||||
|
if err != nil {
|
||||||
|
t.Fatal(err)
|
||||||
|
}
|
||||||
|
defer file2.Close()
|
||||||
|
|
||||||
|
stat1, err := file1.Stat()
|
||||||
|
if err != nil {
|
||||||
|
t.Fatal(err)
|
||||||
|
}
|
||||||
|
stat2, err := file2.Stat()
|
||||||
|
if err != nil {
|
||||||
|
t.Fatal(err)
|
||||||
|
}
|
||||||
|
|
||||||
|
if sameDevice(stat1, stat2) {
|
||||||
|
t.Errorf("expected the two files _not_ to be on same device. But %q and %q are not", file1.Name(), file2.Name())
|
||||||
|
} else {
|
||||||
|
os.Remove(stat1.Name())
|
||||||
|
os.Remove(stat2.Name())
|
||||||
|
}
|
||||||
|
}
|
|
@ -2,36 +2,37 @@ package cryptomap
|
||||||
|
|
||||||
import (
|
import (
|
||||||
"crypto"
|
"crypto"
|
||||||
|
"log"
|
||||||
|
"strings"
|
||||||
|
|
||||||
|
// Importing all the currently supported hashes
|
||||||
_ "crypto/md5"
|
_ "crypto/md5"
|
||||||
_ "crypto/sha1"
|
_ "crypto/sha1"
|
||||||
_ "crypto/sha256"
|
_ "crypto/sha256"
|
||||||
_ "crypto/sha512"
|
_ "crypto/sha512"
|
||||||
"log"
|
|
||||||
"strings"
|
|
||||||
)
|
)
|
||||||
|
|
||||||
var knownCiphers = map[string]crypto.Hash{
|
// DefaultCipher is the crypto cipher default used if none is specified or
|
||||||
"md5": crypto.MD5,
|
// specified is unknown.
|
||||||
|
var DefaultCipher = "sha1"
|
||||||
|
|
||||||
|
// Ciphers is the known set of mappings for string to crypto.Hash
|
||||||
|
// use an init() to add custom hash ciphers
|
||||||
|
var Ciphers = map[string]crypto.Hash{
|
||||||
|
"md5": crypto.MD5,
|
||||||
|
"sha1": crypto.SHA1,
|
||||||
|
"sha224": crypto.SHA224,
|
||||||
|
"sha256": crypto.SHA256,
|
||||||
|
"sha384": crypto.SHA384,
|
||||||
|
"sha512": crypto.SHA512,
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// DetermineHash takes a generic string, like "sha1" and returns the
|
||||||
|
// corresponding crypto.Hash
|
||||||
func DetermineHash(str string) (h crypto.Hash) {
|
func DetermineHash(str string) (h crypto.Hash) {
|
||||||
switch strings.ToLower(str) {
|
if h, ok := Ciphers[strings.ToLower(str)]; ok {
|
||||||
case "md5":
|
return h
|
||||||
h = crypto.MD5
|
|
||||||
case "sha1":
|
|
||||||
h = crypto.SHA1
|
|
||||||
case "sha224":
|
|
||||||
h = crypto.SHA224
|
|
||||||
case "sha256":
|
|
||||||
h = crypto.SHA256
|
|
||||||
case "sha384":
|
|
||||||
h = crypto.SHA384
|
|
||||||
case "sha512":
|
|
||||||
h = crypto.SHA512
|
|
||||||
default:
|
|
||||||
log.Printf("WARNING: unknown cipher %q. using 'sha1'", str)
|
|
||||||
h = crypto.SHA1
|
|
||||||
}
|
}
|
||||||
|
log.Printf("WARNING: unknown cipher %q. using %q", str, DefaultCipher)
|
||||||
return h
|
return Ciphers[DefaultCipher]
|
||||||
}
|
}
|
||||||
|
|
94
file/dev.go
Normal file
94
file/dev.go
Normal file
|
@ -0,0 +1,94 @@
|
||||||
|
package file
|
||||||
|
|
||||||
|
import (
|
||||||
|
"fmt"
|
||||||
|
"os"
|
||||||
|
"syscall"
|
||||||
|
)
|
||||||
|
|
||||||
|
// SameInodePaths checks whether path1 and path2 are the same inode
|
||||||
|
func SameInodePaths(path1, path2 string) (match bool, err error) {
|
||||||
|
var inode1, inode2 uint64
|
||||||
|
if inode1, err = GetInode(path1); err != nil {
|
||||||
|
return false, err
|
||||||
|
}
|
||||||
|
if inode2, err = GetInode(path2); err != nil {
|
||||||
|
return false, err
|
||||||
|
}
|
||||||
|
return inode1 == inode2, nil
|
||||||
|
}
|
||||||
|
|
||||||
|
// SameDevPaths checks whether path1 and path2 are on the same device
|
||||||
|
func SameDevPaths(path1, path2 string) (match bool, err error) {
|
||||||
|
var dev1, dev2 uint64
|
||||||
|
if dev1, err = GetDev(path1); err != nil {
|
||||||
|
return false, err
|
||||||
|
}
|
||||||
|
if dev2, err = GetDev(path2); err != nil {
|
||||||
|
return false, err
|
||||||
|
}
|
||||||
|
return dev1 == dev2, nil
|
||||||
|
}
|
||||||
|
|
||||||
|
// FormatDev has a scary name, but just pretty prints the stat_t.dev as "major:minor"
|
||||||
|
func FormatDev(stat *syscall.Stat_t) string {
|
||||||
|
return fmt.Sprintf("%d:%d", MajorDev(stat.Dev), MinorDev(stat.Dev))
|
||||||
|
}
|
||||||
|
|
||||||
|
// MajorDev provides the major device number from a stat_t.dev
|
||||||
|
func MajorDev(dev uint64) uint64 {
|
||||||
|
return (((dev >> 8) & 0xfff) | ((dev >> 32) & ^uint64(0xfff)))
|
||||||
|
}
|
||||||
|
|
||||||
|
// MinorDev provides the minor device number from a stat_t.dev
|
||||||
|
func MinorDev(dev uint64) uint64 {
|
||||||
|
return ((dev & 0xff) | ((dev >> 12) & ^uint64(0xff)))
|
||||||
|
}
|
||||||
|
|
||||||
|
// GetLstat returns the system stat_t for the file at path.
|
||||||
|
// (symlinks are not deferenced)
|
||||||
|
func GetLstat(path string) (*syscall.Stat_t, error) {
|
||||||
|
fi, err := os.Lstat(path)
|
||||||
|
if err != nil {
|
||||||
|
return nil, err
|
||||||
|
}
|
||||||
|
return fi.Sys().(*syscall.Stat_t), nil
|
||||||
|
}
|
||||||
|
|
||||||
|
// GetStat returns the system stat_t for the file at path.
|
||||||
|
// (symlinks are deferenced)
|
||||||
|
func GetStat(path string) (*syscall.Stat_t, error) {
|
||||||
|
fi, err := os.Stat(path)
|
||||||
|
if err != nil {
|
||||||
|
return nil, err
|
||||||
|
}
|
||||||
|
return fi.Sys().(*syscall.Stat_t), nil
|
||||||
|
}
|
||||||
|
|
||||||
|
// GetInode returns the inode for path
|
||||||
|
func GetInode(path string) (uint64, error) {
|
||||||
|
stat, err := GetStat(path)
|
||||||
|
if err != nil {
|
||||||
|
return 0, err
|
||||||
|
}
|
||||||
|
return stat.Ino, nil
|
||||||
|
}
|
||||||
|
|
||||||
|
// GetDev returns the device for path
|
||||||
|
func GetDev(path string) (uint64, error) {
|
||||||
|
stat, err := GetStat(path)
|
||||||
|
if err != nil {
|
||||||
|
return 0, err
|
||||||
|
}
|
||||||
|
return stat.Dev, nil
|
||||||
|
}
|
||||||
|
|
||||||
|
// GetNlink returns the number of links for path. For directories, that is
|
||||||
|
// number of entries. For regular files, that is number of hardlinks.
|
||||||
|
func GetNlink(path string) (uint64, error) {
|
||||||
|
stat, err := GetStat(path)
|
||||||
|
if err != nil {
|
||||||
|
return 0, err
|
||||||
|
}
|
||||||
|
return stat.Nlink, nil
|
||||||
|
}
|
89
file/hash.go
89
file/hash.go
|
@ -6,11 +6,13 @@ import (
|
||||||
"io"
|
"io"
|
||||||
"os"
|
"os"
|
||||||
"path/filepath"
|
"path/filepath"
|
||||||
"syscall"
|
|
||||||
"time"
|
"time"
|
||||||
)
|
)
|
||||||
|
|
||||||
type FileHashInfo struct {
|
// HashInfo for tracking the information regarding a file, it's checksum
|
||||||
|
// and status.
|
||||||
|
// If Err is set then the caller must take an appropriate action.
|
||||||
|
type HashInfo struct {
|
||||||
HashType crypto.Hash
|
HashType crypto.Hash
|
||||||
Hash string
|
Hash string
|
||||||
Path string
|
Path string
|
||||||
|
@ -19,8 +21,10 @@ type FileHashInfo struct {
|
||||||
Err error
|
Err error
|
||||||
}
|
}
|
||||||
|
|
||||||
func HashFileGetter(path string, hash crypto.Hash, workers int, done <-chan struct{}) <-chan FileHashInfo {
|
// HashFileGetter walks the provided `path` with `workers` number of threads.
|
||||||
out := make(chan FileHashInfo, workers)
|
// The channel of HashInfo are for each regular file encountered.
|
||||||
|
func HashFileGetter(path string, hash crypto.Hash, workers int, done <-chan struct{}) <-chan HashInfo {
|
||||||
|
out := make(chan HashInfo, workers)
|
||||||
go func() {
|
go func() {
|
||||||
err := filepath.Walk(path, func(path string, info os.FileInfo, err error) error {
|
err := filepath.Walk(path, func(path string, info os.FileInfo, err error) error {
|
||||||
if err != nil {
|
if err != nil {
|
||||||
|
@ -39,15 +43,15 @@ func HashFileGetter(path string, hash crypto.Hash, workers int, done <-chan stru
|
||||||
}
|
}
|
||||||
})
|
})
|
||||||
if err != nil {
|
if err != nil {
|
||||||
out <- FileHashInfo{Err: err}
|
out <- HashInfo{Err: err}
|
||||||
}
|
}
|
||||||
close(out)
|
close(out)
|
||||||
}()
|
}()
|
||||||
return out
|
return out
|
||||||
}
|
}
|
||||||
|
|
||||||
func hashFile(path string, hash crypto.Hash, info os.FileInfo) *FileHashInfo {
|
func hashFile(path string, hash crypto.Hash, info os.FileInfo) *HashInfo {
|
||||||
fhi := FileHashInfo{HashType: hash, Path: path, ModTime: info.ModTime(), Size: info.Size()}
|
fhi := HashInfo{HashType: hash, Path: path, ModTime: info.ModTime(), Size: info.Size()}
|
||||||
h := hash.New()
|
h := hash.New()
|
||||||
fh, err := os.Open(path)
|
fh, err := os.Open(path)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
|
@ -63,74 +67,3 @@ func hashFile(path string, hash crypto.Hash, info os.FileInfo) *FileHashInfo {
|
||||||
fhi.Hash = fmt.Sprintf("%x", h.Sum(nil))
|
fhi.Hash = fmt.Sprintf("%x", h.Sum(nil))
|
||||||
return &fhi
|
return &fhi
|
||||||
}
|
}
|
||||||
|
|
||||||
// SameInodePaths checks whether path1 and path2 are the same inode
|
|
||||||
func SameInodePaths(path1, path2 string) (match bool, err error) {
|
|
||||||
var inode1, inode2 uint64
|
|
||||||
if inode1, err = GetInode(path1); err != nil {
|
|
||||||
return false, err
|
|
||||||
}
|
|
||||||
if inode2, err = GetInode(path2); err != nil {
|
|
||||||
return false, err
|
|
||||||
}
|
|
||||||
return inode1 == inode2, nil
|
|
||||||
}
|
|
||||||
|
|
||||||
// SameInodePaths checks whether path1 and path2 are on the same device
|
|
||||||
func SameDevPaths(path1, path2 string) (match bool, err error) {
|
|
||||||
var dev1, dev2 uint64
|
|
||||||
if dev1, err = GetDev(path1); err != nil {
|
|
||||||
return false, err
|
|
||||||
}
|
|
||||||
if dev2, err = GetDev(path2); err != nil {
|
|
||||||
return false, err
|
|
||||||
}
|
|
||||||
return dev1 == dev2, nil
|
|
||||||
}
|
|
||||||
|
|
||||||
func FormatDev(stat *syscall.Stat_t) string {
|
|
||||||
return fmt.Sprintf("%d:%d", MajorDev(stat.Dev), MinorDev(stat.Dev))
|
|
||||||
}
|
|
||||||
|
|
||||||
func MajorDev(dev uint64) uint64 {
|
|
||||||
return (((dev >> 8) & 0xfff) | ((dev >> 32) & ^uint64(0xfff)))
|
|
||||||
}
|
|
||||||
|
|
||||||
func MinorDev(dev uint64) uint64 {
|
|
||||||
return ((dev & 0xff) | ((dev >> 12) & ^uint64(0xff)))
|
|
||||||
}
|
|
||||||
|
|
||||||
func GetStat(path string) (*syscall.Stat_t, error) {
|
|
||||||
fi, err := os.Stat(path)
|
|
||||||
if err != nil {
|
|
||||||
return nil, err
|
|
||||||
}
|
|
||||||
return fi.Sys().(*syscall.Stat_t), nil
|
|
||||||
}
|
|
||||||
|
|
||||||
// GetInode returns the inode for path
|
|
||||||
func GetInode(path string) (uint64, error) {
|
|
||||||
stat, err := GetStat(path)
|
|
||||||
if err != nil {
|
|
||||||
return 0, err
|
|
||||||
}
|
|
||||||
return stat.Ino, nil
|
|
||||||
}
|
|
||||||
|
|
||||||
// GetDev returns the device for path
|
|
||||||
func GetDev(path string) (uint64, error) {
|
|
||||||
stat, err := GetStat(path)
|
|
||||||
if err != nil {
|
|
||||||
return 0, err
|
|
||||||
}
|
|
||||||
return stat.Dev, nil
|
|
||||||
}
|
|
||||||
|
|
||||||
// GetNlink returns the number of links for path
|
|
||||||
func GetNlink(path string) (uint64, error) {
|
|
||||||
stat, err := GetStat(path)
|
|
||||||
if err != nil {
|
|
||||||
return 0, err
|
|
||||||
}
|
|
||||||
return stat.Nlink, nil
|
|
||||||
}
|
|
||||||
|
|
10
main.go
10
main.go
|
@ -14,10 +14,10 @@ import (
|
||||||
)
|
)
|
||||||
|
|
||||||
var (
|
var (
|
||||||
varBaseDir = filepath.Join(os.Getenv("HOME"), ".local/dedupe-linker/var")
|
varBaseDir = filepath.Join(os.Getenv("HOME"), ".dedupe-linker/")
|
||||||
|
|
||||||
flVarBase = flag.String("b", varBaseDir, "base directory where files are duplicated")
|
flVarBase = flag.String("b", varBaseDir, "base directory where files are duplicated")
|
||||||
flCipher = flag.String("c", "sha1", "block cipher to use (sha1, or sha256)")
|
flCipher = flag.String("c", cryptomap.DefaultCipher, "block cipher to use (sha1, or sha256)")
|
||||||
flWorkers = flag.Int("w", 2, "workers to do summing")
|
flWorkers = flag.Int("w", 2, "workers to do summing")
|
||||||
flNoop = flag.Bool("noop", false, "don't do any moving or linking")
|
flNoop = flag.Bool("noop", false, "don't do any moving or linking")
|
||||||
flDebug = flag.Bool("debug", false, "enable debug output")
|
flDebug = flag.Bool("debug", false, "enable debug output")
|
||||||
|
@ -53,8 +53,8 @@ func main() {
|
||||||
|
|
||||||
var (
|
var (
|
||||||
hash = cryptomap.DetermineHash(*flCipher)
|
hash = cryptomap.DetermineHash(*flCipher)
|
||||||
//infos = []*file.FileHashInfo{}
|
//infos = []*file.HashInfo{}
|
||||||
//results := make(chan file.FileHashInfo, 2)
|
//results := make(chan file.HashInfo, 2)
|
||||||
)
|
)
|
||||||
|
|
||||||
for _, arg := range flag.Args() {
|
for _, arg := range flag.Args() {
|
||||||
|
@ -77,7 +77,7 @@ func main() {
|
||||||
fmt.Printf("%s [%d] %s\n", fi.Hash, fi.Size, fi.Path)
|
fmt.Printf("%s [%d] %s\n", fi.Hash, fi.Size, fi.Path)
|
||||||
} else {
|
} else {
|
||||||
if os.Getenv("DEBUG") != "" {
|
if os.Getenv("DEBUG") != "" {
|
||||||
fmt.Printf("%q: %q\n", fi.Path, ourbase.HasBlob(fi.Hash))
|
fmt.Printf("%q: %t\n", fi.Path, ourbase.HasBlob(fi.Hash))
|
||||||
}
|
}
|
||||||
if ourbase.HasBlob(fi.Hash) && !ourbase.SameFile(fi.Hash, fi.Path) {
|
if ourbase.HasBlob(fi.Hash) && !ourbase.SameFile(fi.Hash, fi.Path) {
|
||||||
if err := ourbase.LinkTo(fi.Path, fi.Hash); err != nil {
|
if err := ourbase.LinkTo(fi.Path, fi.Hash); err != nil {
|
||||||
|
|
|
@ -1,14 +1,15 @@
|
||||||
|
// Package walker is a work-in-progress
|
||||||
package walker
|
package walker
|
||||||
|
|
||||||
import (
|
import (
|
||||||
"github.com/vbatts/dedupe-linker/base"
|
"github.com/vbatts/dedupe-linker/base"
|
||||||
)
|
)
|
||||||
|
|
||||||
type Walker struct {
|
type walker struct {
|
||||||
Base *base.Base
|
Base *base.Base
|
||||||
}
|
}
|
||||||
|
|
||||||
func (w Walker) Walk(path string, quit chan int) error {
|
func (w walker) Walk(path string, quit chan int) error {
|
||||||
// XXX what is going on here?
|
// XXX what is going on here?
|
||||||
select {
|
select {
|
||||||
case <-quit:
|
case <-quit:
|
||||||
|
|
Loading…
Reference in a new issue