1
0
Fork 0
mirror of https://github.com/vbatts/dedupe-linker.git synced 2025-10-25 18:10:58 +00:00

various changes

This commit is contained in:
Vincent Batts 2014-10-14 16:54:28 -04:00
parent bd70060729
commit a023c98cd1
5 changed files with 197 additions and 16 deletions

View file

@ -1,15 +1,21 @@
package base package base
import ( import (
"crypto"
"fmt"
"io" "io"
"io/ioutil"
"log"
"os" "os"
"path/filepath" "path/filepath"
"../file"
) )
func NewBase(path string, hashName string) (*Base, error) { func NewBase(path string, hashName string) (*Base, error) {
root := filepath.Join(path, "dedup") root := filepath.Join(path, "dedup")
for _, p := range []string{"blobs/" + hashName, "state"} { for _, p := range []string{"blobs/" + hashName, "state", "tmp"} {
if err := os.MkdirAll(filepath.Join(root, p), 0755); err != nil { if err := os.MkdirAll(filepath.Join(root, p), 0755); err != nil && !os.IsExist(err) {
return nil, err return nil, err
} }
} }
@ -19,6 +25,11 @@ func NewBase(path string, hashName string) (*Base, error) {
type Base struct { type Base struct {
Path string Path string
HashName string HashName string
Hash crypto.Hash
}
func (b Base) Stat(sum string) (os.FileInfo, error) {
return os.Stat(b.blobPath(sum))
} }
func (b Base) blobPath(sum string) string { func (b Base) blobPath(sum string) string {
@ -28,22 +39,111 @@ func (b Base) blobPath(sum string) string {
return filepath.Join(b.Path, "blobs", b.HashName, sum[0:2], sum) return filepath.Join(b.Path, "blobs", b.HashName, sum[0:2], sum)
} }
type ReaderSeekerCloser interface {
io.Reader
io.Seeker
io.Closer
}
func (b Base) SameFile(sum, path string) bool {
var (
bInode, dInode uint64
err error
)
if bInode, err = file.GetInode(b.blobPath(sum)); err != nil {
return false
}
if dInode, err = file.GetInode(path); err != nil {
return false
}
if bInode == dInode {
return true
}
return false
}
// GetBlob store the content from src, for the sum and hashType // GetBlob store the content from src, for the sum and hashType
func (b Base) GetBlob(sum string) (io.Reader, error) { func (b Base) GetBlob(sum string) (ReaderSeekerCloser, error) {
// XXX return os.Open(b.blobPath(sum))
return nil, nil
} }
// PutBlob store the content from src, for the sum and hashType // PutBlob store the content from src, for the sum and hashType
// //
// we take the sum up front to avoid recalculation and tempfiles // we take the sum up front to avoid recalculation and tempfiles
func (b Base) PutBlob(sum string, src io.Reader) error { func (b Base) PutBlob(src io.Reader, mode os.FileMode) (string, error) {
// XXX fh, err := b.tmpFile()
if err != nil {
return "", err
}
defer func() {
fh.Close()
os.Remove(fh.Name())
}()
h := b.Hash.New()
t := io.TeeReader(src, h)
if _, err = io.Copy(fh, t); err != nil {
return "", err
}
sum := fmt.Sprintf("%x", h.Sum(nil))
fi, err := b.Stat(sum)
if err == nil && fi.Mode().IsRegular() {
return sum, nil
}
if err := os.MkdirAll(filepath.Dir(b.blobPath(sum)), 0755); err != nil && !os.IsExist(err) {
return sum, err
}
destFh, err := os.Create(b.blobPath(sum))
if err != nil {
return sum, err
}
defer destFh.Close()
_, err = fh.Seek(0, 0)
if err != nil {
return sum, err
}
if _, err = io.Copy(destFh, fh); err != nil {
return sum, err
}
return sum, destFh.Chmod(mode)
}
func (b Base) tmpFile() (*os.File, error) {
return ioutil.TempFile(filepath.Join(b.Path, "tmp"), "put")
}
// Hard link the file from src to the blob for sum
func (b Base) LinkFrom(src, sum string) error {
if err := os.MkdirAll(filepath.Dir(b.blobPath(sum)), 0756); err != nil && !os.IsExist(err) {
return err
}
return os.Link(src, b.blobPath(sum))
}
// Hard link the file for sum to the path at dest
func (b Base) LinkTo(dest, sum string) error {
if err := os.MkdirAll(filepath.Dir(dest), 0755); err != nil && !os.IsExist(err) {
return err
}
err := os.Link(b.blobPath(sum), dest)
if err != nil && os.IsExist(err) {
if !b.SameFile(sum, dest) {
// XXX
log.Printf("Would clobber %q with %q", dest, b.blobPath(sum))
}
} else if err != nil {
return err
}
return nil return nil
} }
// HasBlob tests whether a blob with this sum exists // HasBlob tests whether a blob with this sum exists
func (b Base) HasBlob(sum string) bool { func (b Base) HasBlob(sum string) bool {
// XXX fi, err := b.Stat(sum)
return true log.Println("SUCH FARTS", fi)
return fi != nil && err == nil
} }

View file

@ -1,6 +1,13 @@
package base package base
import "testing" import (
"bytes"
"fmt"
"io/ioutil"
"os"
"path"
"testing"
)
func TestSumPath(t *testing.T) { func TestSumPath(t *testing.T) {
expected := "/var/dedup/blobs/sha1/de/deadbeef" expected := "/var/dedup/blobs/sha1/de/deadbeef"
@ -9,3 +16,51 @@ func TestSumPath(t *testing.T) {
t.Errorf("expected %q, got %q", expected, bp) t.Errorf("expected %q, got %q", expected, bp)
} }
} }
func TestGetPut(t *testing.T) {
var (
srcDir, destDir string
err error
)
if srcDir, err = ioutil.TempDir("", "dedupe-linker-src"); err != nil {
t.Fatal(err)
}
defer os.RemoveAll(srcDir)
if destDir, err = ioutil.TempDir("", "dedupe-linker-dest"); err != nil {
t.Fatal(err)
}
defer os.RemoveAll(destDir)
b, err := NewBase(destDir, "sha1")
if err != nil {
t.Fatal(err)
}
rHash := "deadbeaf"
rMsg := "this is the dead beef"
r := bytes.NewReader([]byte(rMsg))
sum, err := b.PutBlob(r, 0666)
if err != nil {
t.Error(err)
}
fi, err := b.Stat(rHash)
if err != nil {
t.Error(err)
}
fmt.Printf("%#v\n", fi.Sys())
if err = b.LinkTo(path.Join(srcDir, "beef1.txt"), rHash); err != nil {
t.Error(err)
}
fi2, err := os.Stat(path.Join(srcDir, "beef1.txt"))
if err != nil {
t.Error(err)
}
fmt.Printf("%#v\n", fi2.Sys())
if err = b.LinkTo(path.Join(srcDir, "beef1.txt"), rHash); err != nil && !os.IsExist(err) {
t.Error(err)
}
}

View file

@ -15,6 +15,7 @@ type FileHashInfo struct {
HashType crypto.Hash HashType crypto.Hash
Hash string Hash string
Path string Path string
Size int64
ModTime time.Time ModTime time.Time
Err error Err error
} }
@ -58,7 +59,7 @@ func HashFileGetter(path string, hash crypto.Hash, workers int, done <-chan stru
} }
func hashFile(path string, hash crypto.Hash, info os.FileInfo) *FileHashInfo { func hashFile(path string, hash crypto.Hash, info os.FileInfo) *FileHashInfo {
fhi := FileHashInfo{HashType: hash, Path: path, ModTime: info.ModTime()} fhi := FileHashInfo{HashType: hash, Path: path, ModTime: info.ModTime(), Size: info.Size()}
h := hash.New() h := hash.New()
fh, err := os.Open(path) fh, err := os.Open(path)
if err != nil { if err != nil {

17
main.go
View file

@ -17,6 +17,7 @@ var (
flVarBase = flag.String("b", filepath.Join(os.Getenv("HOME"), "var"), "base directory where files are duplicated") flVarBase = flag.String("b", filepath.Join(os.Getenv("HOME"), "var"), "base directory where files are duplicated")
flCipher = flag.String("c", "sha1", "block cipher to use (sha1, or sha256)") flCipher = flag.String("c", "sha1", "block cipher to use (sha1, or sha256)")
flWorkers = flag.Int("w", 2, "workers to do summing") flWorkers = flag.Int("w", 2, "workers to do summing")
flNoop = flag.Bool("noop", false, "don't do any moving or linking")
) )
func init() { func init() {
@ -55,12 +56,18 @@ func main() {
log.Println(fi.Err) log.Println(fi.Err)
done <- struct{}{} done <- struct{}{}
} }
fmt.Printf("%s %s\n", fi.Hash, fi.Path) if *flNoop {
if ourbase.HasBlob(fi.Hash) { fmt.Printf("%s [%d] %s\n", fi.Hash, fi.Size, fi.Path)
// TODO check if they have the same Inode
// if not, then clobber
} else { } else {
// TODO hard link to blobs if ourbase.HasBlob(fi.Hash) && !ourbase.SameFile(fi.Hash, fi.Path) {
if err := ourbase.LinkTo(fi.Path, fi.Hash); err != nil {
log.Println("ERROR-1", err)
}
} else {
if err := ourbase.LinkFrom(fi.Path, fi.Hash); err != nil {
log.Println("ERROR-2", err)
}
}
} }
} }
} }

18
walker/walker.go Normal file
View file

@ -0,0 +1,18 @@
package walker
import (
"./base"
)
type Walker struct {
Base *base.Base
}
func (w Walker) Walk(path string, quit chan int) error {
select {
case <-quit:
return nil
}
return nil
}