mirror of
https://github.com/vbatts/dedupe-linker.git
synced 2024-12-28 01:26:33 +00:00
various changes
This commit is contained in:
parent
bd70060729
commit
a023c98cd1
5 changed files with 197 additions and 16 deletions
116
base/base.go
116
base/base.go
|
@ -1,15 +1,21 @@
|
|||
package base
|
||||
|
||||
import (
|
||||
"crypto"
|
||||
"fmt"
|
||||
"io"
|
||||
"io/ioutil"
|
||||
"log"
|
||||
"os"
|
||||
"path/filepath"
|
||||
|
||||
"../file"
|
||||
)
|
||||
|
||||
func NewBase(path string, hashName string) (*Base, error) {
|
||||
root := filepath.Join(path, "dedup")
|
||||
for _, p := range []string{"blobs/" + hashName, "state"} {
|
||||
if err := os.MkdirAll(filepath.Join(root, p), 0755); err != nil {
|
||||
for _, p := range []string{"blobs/" + hashName, "state", "tmp"} {
|
||||
if err := os.MkdirAll(filepath.Join(root, p), 0755); err != nil && !os.IsExist(err) {
|
||||
return nil, err
|
||||
}
|
||||
}
|
||||
|
@ -19,6 +25,11 @@ func NewBase(path string, hashName string) (*Base, error) {
|
|||
type Base struct {
|
||||
Path string
|
||||
HashName string
|
||||
Hash crypto.Hash
|
||||
}
|
||||
|
||||
func (b Base) Stat(sum string) (os.FileInfo, error) {
|
||||
return os.Stat(b.blobPath(sum))
|
||||
}
|
||||
|
||||
func (b Base) blobPath(sum string) string {
|
||||
|
@ -28,22 +39,111 @@ func (b Base) blobPath(sum string) string {
|
|||
return filepath.Join(b.Path, "blobs", b.HashName, sum[0:2], sum)
|
||||
}
|
||||
|
||||
type ReaderSeekerCloser interface {
|
||||
io.Reader
|
||||
io.Seeker
|
||||
io.Closer
|
||||
}
|
||||
|
||||
func (b Base) SameFile(sum, path string) bool {
|
||||
var (
|
||||
bInode, dInode uint64
|
||||
err error
|
||||
)
|
||||
if bInode, err = file.GetInode(b.blobPath(sum)); err != nil {
|
||||
return false
|
||||
}
|
||||
if dInode, err = file.GetInode(path); err != nil {
|
||||
return false
|
||||
}
|
||||
if bInode == dInode {
|
||||
return true
|
||||
}
|
||||
return false
|
||||
|
||||
}
|
||||
|
||||
// GetBlob store the content from src, for the sum and hashType
|
||||
func (b Base) GetBlob(sum string) (io.Reader, error) {
|
||||
// XXX
|
||||
return nil, nil
|
||||
func (b Base) GetBlob(sum string) (ReaderSeekerCloser, error) {
|
||||
return os.Open(b.blobPath(sum))
|
||||
}
|
||||
|
||||
// PutBlob store the content from src, for the sum and hashType
|
||||
//
|
||||
// we take the sum up front to avoid recalculation and tempfiles
|
||||
func (b Base) PutBlob(sum string, src io.Reader) error {
|
||||
func (b Base) PutBlob(src io.Reader, mode os.FileMode) (string, error) {
|
||||
fh, err := b.tmpFile()
|
||||
if err != nil {
|
||||
return "", err
|
||||
}
|
||||
defer func() {
|
||||
fh.Close()
|
||||
os.Remove(fh.Name())
|
||||
}()
|
||||
|
||||
h := b.Hash.New()
|
||||
t := io.TeeReader(src, h)
|
||||
|
||||
if _, err = io.Copy(fh, t); err != nil {
|
||||
return "", err
|
||||
}
|
||||
|
||||
sum := fmt.Sprintf("%x", h.Sum(nil))
|
||||
fi, err := b.Stat(sum)
|
||||
if err == nil && fi.Mode().IsRegular() {
|
||||
return sum, nil
|
||||
}
|
||||
|
||||
if err := os.MkdirAll(filepath.Dir(b.blobPath(sum)), 0755); err != nil && !os.IsExist(err) {
|
||||
return sum, err
|
||||
}
|
||||
destFh, err := os.Create(b.blobPath(sum))
|
||||
if err != nil {
|
||||
return sum, err
|
||||
}
|
||||
defer destFh.Close()
|
||||
_, err = fh.Seek(0, 0)
|
||||
if err != nil {
|
||||
return sum, err
|
||||
}
|
||||
if _, err = io.Copy(destFh, fh); err != nil {
|
||||
return sum, err
|
||||
}
|
||||
return sum, destFh.Chmod(mode)
|
||||
}
|
||||
|
||||
func (b Base) tmpFile() (*os.File, error) {
|
||||
return ioutil.TempFile(filepath.Join(b.Path, "tmp"), "put")
|
||||
}
|
||||
|
||||
// Hard link the file from src to the blob for sum
|
||||
func (b Base) LinkFrom(src, sum string) error {
|
||||
if err := os.MkdirAll(filepath.Dir(b.blobPath(sum)), 0756); err != nil && !os.IsExist(err) {
|
||||
return err
|
||||
}
|
||||
return os.Link(src, b.blobPath(sum))
|
||||
}
|
||||
|
||||
// Hard link the file for sum to the path at dest
|
||||
func (b Base) LinkTo(dest, sum string) error {
|
||||
if err := os.MkdirAll(filepath.Dir(dest), 0755); err != nil && !os.IsExist(err) {
|
||||
return err
|
||||
}
|
||||
err := os.Link(b.blobPath(sum), dest)
|
||||
if err != nil && os.IsExist(err) {
|
||||
if !b.SameFile(sum, dest) {
|
||||
// XXX
|
||||
log.Printf("Would clobber %q with %q", dest, b.blobPath(sum))
|
||||
}
|
||||
} else if err != nil {
|
||||
return err
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
// HasBlob tests whether a blob with this sum exists
|
||||
func (b Base) HasBlob(sum string) bool {
|
||||
// XXX
|
||||
return true
|
||||
fi, err := b.Stat(sum)
|
||||
log.Println("SUCH FARTS", fi)
|
||||
return fi != nil && err == nil
|
||||
}
|
||||
|
|
|
@ -1,6 +1,13 @@
|
|||
package base
|
||||
|
||||
import "testing"
|
||||
import (
|
||||
"bytes"
|
||||
"fmt"
|
||||
"io/ioutil"
|
||||
"os"
|
||||
"path"
|
||||
"testing"
|
||||
)
|
||||
|
||||
func TestSumPath(t *testing.T) {
|
||||
expected := "/var/dedup/blobs/sha1/de/deadbeef"
|
||||
|
@ -9,3 +16,51 @@ func TestSumPath(t *testing.T) {
|
|||
t.Errorf("expected %q, got %q", expected, bp)
|
||||
}
|
||||
}
|
||||
|
||||
func TestGetPut(t *testing.T) {
|
||||
var (
|
||||
srcDir, destDir string
|
||||
err error
|
||||
)
|
||||
if srcDir, err = ioutil.TempDir("", "dedupe-linker-src"); err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
defer os.RemoveAll(srcDir)
|
||||
if destDir, err = ioutil.TempDir("", "dedupe-linker-dest"); err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
defer os.RemoveAll(destDir)
|
||||
|
||||
b, err := NewBase(destDir, "sha1")
|
||||
if err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
|
||||
rHash := "deadbeaf"
|
||||
rMsg := "this is the dead beef"
|
||||
|
||||
r := bytes.NewReader([]byte(rMsg))
|
||||
sum, err := b.PutBlob(r, 0666)
|
||||
if err != nil {
|
||||
t.Error(err)
|
||||
}
|
||||
|
||||
fi, err := b.Stat(rHash)
|
||||
if err != nil {
|
||||
t.Error(err)
|
||||
}
|
||||
fmt.Printf("%#v\n", fi.Sys())
|
||||
|
||||
if err = b.LinkTo(path.Join(srcDir, "beef1.txt"), rHash); err != nil {
|
||||
t.Error(err)
|
||||
}
|
||||
fi2, err := os.Stat(path.Join(srcDir, "beef1.txt"))
|
||||
if err != nil {
|
||||
t.Error(err)
|
||||
}
|
||||
fmt.Printf("%#v\n", fi2.Sys())
|
||||
|
||||
if err = b.LinkTo(path.Join(srcDir, "beef1.txt"), rHash); err != nil && !os.IsExist(err) {
|
||||
t.Error(err)
|
||||
}
|
||||
}
|
||||
|
|
|
@ -15,6 +15,7 @@ type FileHashInfo struct {
|
|||
HashType crypto.Hash
|
||||
Hash string
|
||||
Path string
|
||||
Size int64
|
||||
ModTime time.Time
|
||||
Err error
|
||||
}
|
||||
|
@ -58,7 +59,7 @@ func HashFileGetter(path string, hash crypto.Hash, workers int, done <-chan stru
|
|||
}
|
||||
|
||||
func hashFile(path string, hash crypto.Hash, info os.FileInfo) *FileHashInfo {
|
||||
fhi := FileHashInfo{HashType: hash, Path: path, ModTime: info.ModTime()}
|
||||
fhi := FileHashInfo{HashType: hash, Path: path, ModTime: info.ModTime(), Size: info.Size()}
|
||||
h := hash.New()
|
||||
fh, err := os.Open(path)
|
||||
if err != nil {
|
||||
|
|
17
main.go
17
main.go
|
@ -17,6 +17,7 @@ var (
|
|||
flVarBase = flag.String("b", filepath.Join(os.Getenv("HOME"), "var"), "base directory where files are duplicated")
|
||||
flCipher = flag.String("c", "sha1", "block cipher to use (sha1, or sha256)")
|
||||
flWorkers = flag.Int("w", 2, "workers to do summing")
|
||||
flNoop = flag.Bool("noop", false, "don't do any moving or linking")
|
||||
)
|
||||
|
||||
func init() {
|
||||
|
@ -55,12 +56,18 @@ func main() {
|
|||
log.Println(fi.Err)
|
||||
done <- struct{}{}
|
||||
}
|
||||
fmt.Printf("%s %s\n", fi.Hash, fi.Path)
|
||||
if ourbase.HasBlob(fi.Hash) {
|
||||
// TODO check if they have the same Inode
|
||||
// if not, then clobber
|
||||
if *flNoop {
|
||||
fmt.Printf("%s [%d] %s\n", fi.Hash, fi.Size, fi.Path)
|
||||
} else {
|
||||
// TODO hard link to blobs
|
||||
if ourbase.HasBlob(fi.Hash) && !ourbase.SameFile(fi.Hash, fi.Path) {
|
||||
if err := ourbase.LinkTo(fi.Path, fi.Hash); err != nil {
|
||||
log.Println("ERROR-1", err)
|
||||
}
|
||||
} else {
|
||||
if err := ourbase.LinkFrom(fi.Path, fi.Hash); err != nil {
|
||||
log.Println("ERROR-2", err)
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
18
walker/walker.go
Normal file
18
walker/walker.go
Normal file
|
@ -0,0 +1,18 @@
|
|||
package walker
|
||||
|
||||
import (
|
||||
"./base"
|
||||
)
|
||||
|
||||
type Walker struct {
|
||||
Base *base.Base
|
||||
}
|
||||
|
||||
func (w Walker) Walk(path string, quit chan int) error {
|
||||
|
||||
select {
|
||||
case <-quit:
|
||||
return nil
|
||||
}
|
||||
return nil
|
||||
}
|
Loading…
Reference in a new issue