dedupe-linker/base/base.go

187 lines
4.7 KiB
Go
Raw Permalink Normal View History

2014-09-12 20:10:10 +00:00
package base
import (
2014-10-14 20:54:28 +00:00
"crypto"
"crypto/rand"
2014-10-14 20:54:28 +00:00
"fmt"
2014-09-16 21:12:52 +00:00
"io"
2014-10-14 20:54:28 +00:00
"io/ioutil"
"log"
2014-09-12 20:10:10 +00:00
"os"
"path/filepath"
2014-10-14 20:54:28 +00:00
2016-07-18 15:23:33 +00:00
"github.com/vbatts/dedupe-linker/cryptomap"
"github.com/vbatts/dedupe-linker/file"
2014-09-12 20:10:10 +00:00
)
// NewBase populates the directories needed in a dedupe-base directory
2014-09-16 21:12:52 +00:00
func NewBase(path string, hashName string) (*Base, error) {
2014-10-14 20:54:28 +00:00
for _, p := range []string{"blobs/" + hashName, "state", "tmp"} {
if err := os.MkdirAll(filepath.Join(path, p), 0755); err != nil && !os.IsExist(err) {
2014-09-16 21:12:52 +00:00
return nil, err
2014-09-12 20:10:10 +00:00
}
}
return &Base{Path: path, HashName: hashName, Hash: cryptomap.DetermineHash(hashName)}, nil
2014-09-12 20:10:10 +00:00
}
// Base is the destination for all hardlinks. Where stored objects are in a content addressible tree.
2014-09-12 20:10:10 +00:00
type Base struct {
2014-09-16 21:12:52 +00:00
Path string
HashName string
2014-10-14 20:54:28 +00:00
Hash crypto.Hash
}
// Stat provides the os.FileInfo for the object of `sum` address
2014-10-14 20:54:28 +00:00
func (b Base) Stat(sum string) (os.FileInfo, error) {
return os.Stat(b.blobPath(sum))
2014-09-16 21:12:52 +00:00
}
2014-09-17 13:21:56 +00:00
func (b Base) blobPath(sum string) string {
if len(sum) < 3 {
return ""
}
return filepath.Join(b.Path, "blobs", b.HashName, sum[0:2], sum)
}
// ReadSeekCloser is like an io.ReadCloser, but can Seek too
type ReadSeekCloser interface {
2014-10-14 20:54:28 +00:00
io.Reader
io.Seeker
io.Closer
}
// SameFile checks whether the object of `sum` address, and `path` file path are the same file.
// This checks by inode and device.
2014-10-14 20:54:28 +00:00
func (b Base) SameFile(sum, path string) bool {
return file.SameFile(b.blobPath(sum), path)
2014-10-14 20:54:28 +00:00
}
2014-09-16 21:12:52 +00:00
// GetBlob store the content from src, for the sum and hashType
func (b Base) GetBlob(sum string) (ReadSeekCloser, error) {
2014-10-14 20:54:28 +00:00
return os.Open(b.blobPath(sum))
2014-09-16 21:12:52 +00:00
}
// PutBlob store the content from src, for the sum and hashType
//
// we take the sum up front to avoid recalculation and tempfiles
2014-10-14 20:54:28 +00:00
func (b Base) PutBlob(src io.Reader, mode os.FileMode) (string, error) {
fh, err := b.tmpFile()
if err != nil {
return "", err
}
defer func() {
fh.Close()
os.Remove(fh.Name())
}()
h := b.Hash.New()
t := io.TeeReader(src, h)
if _, err = io.Copy(fh, t); err != nil {
return "", err
}
sum := fmt.Sprintf("%x", h.Sum(nil))
fi, err := b.Stat(sum)
if err == nil && fi.Mode().IsRegular() {
return sum, nil
}
if err := os.MkdirAll(filepath.Dir(b.blobPath(sum)), 0755); err != nil && !os.IsExist(err) {
return sum, err
}
destFh, err := os.Create(b.blobPath(sum))
if err != nil {
return sum, err
}
defer destFh.Close()
_, err = fh.Seek(0, 0)
if err != nil {
return sum, err
}
if _, err = io.Copy(destFh, fh); err != nil {
return sum, err
}
return sum, destFh.Chmod(mode)
}
func (b Base) tmpFile() (*os.File, error) {
return ioutil.TempFile(filepath.Join(b.Path, "tmp"), "put")
}
// LinkFrom make a hard link the file from src to the blob of address `sum`.
// TODO this function is going away, because it makes no assessment of the checksum of `src`
2014-10-14 20:54:28 +00:00
func (b Base) LinkFrom(src, sum string) error {
if err := os.MkdirAll(filepath.Dir(b.blobPath(sum)), 0756); err != nil && !os.IsExist(err) {
return err
}
return os.Link(src, b.blobPath(sum))
}
func randomString() (string, error) {
// make a random name
buf := make([]byte, 10)
if _, err := rand.Read(buf); err != nil {
return "", err
}
return fmt.Sprintf("%x", buf), nil
}
// SafeLink overrides newname if it already exists. If there is an error in
// creating the link, the transaction is rolled back
func SafeLink(oldname, newname string) error {
var backupName string
// check if newname exists
if fi, err := os.Stat(newname); err == nil && fi != nil {
// make a random name
buf := make([]byte, 5)
if _, err = rand.Read(buf); err != nil {
return err
}
backupName = fmt.Sprintf("%s.%x", newname, buf)
// move newname to the random name backupName
if err = os.Rename(newname, backupName); err != nil {
return err
}
}
// hardlink oldname to newname
if err := os.Link(oldname, newname); err != nil {
// if that failed, and there is a backupName
if len(backupName) > 0 {
// then move back the backup
if err = os.Rename(backupName, newname); err != nil {
return err
}
}
return err
}
// remove the backupName
if len(backupName) > 0 {
os.Remove(backupName)
}
return nil
}
// LinkTo makes a hard link the file of address `sum` to the path at `dest`
2014-10-14 20:54:28 +00:00
func (b Base) LinkTo(dest, sum string) error {
if err := os.MkdirAll(filepath.Dir(dest), 0755); err != nil && !os.IsExist(err) {
return err
}
err := os.Link(b.blobPath(sum), dest)
if err != nil && os.IsExist(err) {
if !b.SameFile(sum, dest) {
SafeLink(b.blobPath(sum), dest)
log.Printf("dedupped %q with %q", dest, b.blobPath(sum))
2014-10-14 20:54:28 +00:00
}
} else if err != nil {
return err
}
2014-09-16 21:12:52 +00:00
return nil
2014-09-12 20:10:10 +00:00
}
2014-09-16 21:12:52 +00:00
// HasBlob tests whether a blob with this sum exists
func (b Base) HasBlob(sum string) bool {
2014-10-14 20:54:28 +00:00
fi, err := b.Stat(sum)
return fi != nil && err == nil
2014-09-12 20:10:10 +00:00
}