mirror of
https://github.com/vbatts/tar-split.git
synced 2024-12-24 14:26:30 +00:00
8d76363085
io.Copy usually allocates a 32kB buffer, and due to the large number of files processed by tar-split, this shows up in Go profiles as a very large alloc_space total. It doesn't seem to actually be a measurable problem in any way, but we can allocate the buffer only once per tar-split creation, at no additional cost to existing allocations, so let's do so, and remove the distraction. Signed-off-by: Miloslav Trmač <mitr@redhat.com>
105 lines
2.7 KiB
Go
105 lines
2.7 KiB
Go
package storage
|
|
|
|
import (
|
|
"bytes"
|
|
"errors"
|
|
"hash/crc64"
|
|
"io"
|
|
"os"
|
|
"path/filepath"
|
|
)
|
|
|
|
// FileGetter is the interface for getting a stream of a file payload,
|
|
// addressed by name/filename. Presumably, the names will be scoped to relative
|
|
// file paths.
|
|
type FileGetter interface {
|
|
// Get returns a stream for the provided file path
|
|
Get(filename string) (output io.ReadCloser, err error)
|
|
}
|
|
|
|
// FilePutter is the interface for storing a stream of a file payload,
|
|
// addressed by name/filename.
|
|
type FilePutter interface {
|
|
// Put returns the size of the stream received, and the crc64 checksum for
|
|
// the provided stream
|
|
Put(filename string, input io.Reader) (size int64, checksum []byte, err error)
|
|
}
|
|
|
|
// FileGetPutter is the interface that groups both Getting and Putting file
|
|
// payloads.
|
|
type FileGetPutter interface {
|
|
FileGetter
|
|
FilePutter
|
|
}
|
|
|
|
// NewPathFileGetter returns a FileGetter that is for files relative to path
|
|
// relpath.
|
|
func NewPathFileGetter(relpath string) FileGetter {
|
|
return &pathFileGetter{root: relpath}
|
|
}
|
|
|
|
type pathFileGetter struct {
|
|
root string
|
|
}
|
|
|
|
func (pfg pathFileGetter) Get(filename string) (io.ReadCloser, error) {
|
|
return os.Open(filepath.Join(pfg.root, filename))
|
|
}
|
|
|
|
type bufferFileGetPutter struct {
|
|
files map[string][]byte
|
|
}
|
|
|
|
func (bfgp bufferFileGetPutter) Get(name string) (io.ReadCloser, error) {
|
|
if _, ok := bfgp.files[name]; !ok {
|
|
return nil, errors.New("no such file")
|
|
}
|
|
b := bytes.NewBuffer(bfgp.files[name])
|
|
return &readCloserWrapper{b}, nil
|
|
}
|
|
|
|
func (bfgp *bufferFileGetPutter) Put(name string, r io.Reader) (int64, []byte, error) {
|
|
crc := crc64.New(CRCTable)
|
|
buf := bytes.NewBuffer(nil)
|
|
cw := io.MultiWriter(crc, buf)
|
|
i, err := io.Copy(cw, r)
|
|
if err != nil {
|
|
return 0, nil, err
|
|
}
|
|
bfgp.files[name] = buf.Bytes()
|
|
return i, crc.Sum(nil), nil
|
|
}
|
|
|
|
type readCloserWrapper struct {
|
|
io.Reader
|
|
}
|
|
|
|
func (w *readCloserWrapper) Close() error { return nil }
|
|
|
|
// NewBufferFileGetPutter is a simple in-memory FileGetPutter
|
|
//
|
|
// Implication is this is memory intensive...
|
|
// Probably best for testing or light weight cases.
|
|
func NewBufferFileGetPutter() FileGetPutter {
|
|
return &bufferFileGetPutter{
|
|
files: map[string][]byte{},
|
|
}
|
|
}
|
|
|
|
// NewDiscardFilePutter is a bit bucket FilePutter
|
|
func NewDiscardFilePutter() FilePutter {
|
|
return &bitBucketFilePutter{}
|
|
}
|
|
|
|
type bitBucketFilePutter struct {
|
|
buffer [32 * 1024]byte // 32 kB is the buffer size currently used by io.Copy, as of August 2021.
|
|
}
|
|
|
|
func (bbfp *bitBucketFilePutter) Put(name string, r io.Reader) (int64, []byte, error) {
|
|
c := crc64.New(CRCTable)
|
|
i, err := io.CopyBuffer(c, r, bbfp.buffer[:])
|
|
return i, c.Sum(nil), err
|
|
}
|
|
|
|
// CRCTable is the default table used for crc64 sum calculations
|
|
var CRCTable = crc64.MakeTable(crc64.ISO)
|