mirror of
https://github.com/vbatts/tar-split.git
synced 2024-12-18 11:36:30 +00:00
commit
a80fb82091
6 changed files with 18 additions and 22 deletions
|
@ -9,7 +9,7 @@ import (
|
|||
"github.com/vbatts/tar-split/tar/storage"
|
||||
)
|
||||
|
||||
// NewOutputTarStream returns an io.ReadCloser that is an assemble tar archive
|
||||
// NewOutputTarStream returns an io.ReadCloser that is an assembled tar archive
|
||||
// stream.
|
||||
//
|
||||
// It takes a storage.FileGetter, for mapping the file payloads that are to be read in,
|
||||
|
|
|
@ -22,8 +22,8 @@ func NewInputTarStream(r io.Reader, p storage.Packer, fp storage.FilePutter) (io
|
|||
// What to do here... folks will want their own access to the Reader that is
|
||||
// their tar archive stream, but we'll need that same stream to use our
|
||||
// forked 'archive/tar'.
|
||||
// Perhaps do an io.TeeReader that hand back an io.Reader for them to read
|
||||
// from, and we'll mitm the stream to store metadata.
|
||||
// Perhaps do an io.TeeReader that hands back an io.Reader for them to read
|
||||
// from, and we'll MITM the stream to store metadata.
|
||||
// We'll need a storage.FilePutter too ...
|
||||
|
||||
// Another concern, whether to do any storage.FilePutter operations, such that we
|
||||
|
@ -32,7 +32,7 @@ func NewInputTarStream(r io.Reader, p storage.Packer, fp storage.FilePutter) (io
|
|||
// Perhaps we have a DiscardFilePutter that is a bit bucket.
|
||||
|
||||
// we'll return the pipe reader, since TeeReader does not buffer and will
|
||||
// only read what the outputRdr Read's. Since Tar archive's have padding on
|
||||
// only read what the outputRdr Read's. Since Tar archives have padding on
|
||||
// the end, we want to be the one reading the padding, even if the user's
|
||||
// `archive/tar` doesn't care.
|
||||
pR, pW := io.Pipe()
|
||||
|
|
|
@ -5,7 +5,7 @@ Packing and unpacking the Entries of the stream. The types of streams are
|
|||
either segments of raw bytes (for the raw headers and various padding) and for
|
||||
an entry marking a file payload.
|
||||
|
||||
The raw bytes are stored precisely in the packed (marshalled) Entry. Where as
|
||||
The raw bytes are stored precisely in the packed (marshalled) Entry, whereas
|
||||
the file payload marker include the name of the file, size, and crc64 checksum
|
||||
(for basic file integrity).
|
||||
*/
|
||||
|
|
|
@ -19,11 +19,11 @@ const (
|
|||
// SegmentType represents a raw bytes segment from the archive stream. These raw
|
||||
// byte segments consist of the raw headers and various padding.
|
||||
//
|
||||
// It's payload is to be marshalled base64 encoded.
|
||||
// Its payload is to be marshalled base64 encoded.
|
||||
SegmentType
|
||||
)
|
||||
|
||||
// Entry is a the structure for packing and unpacking the information read from
|
||||
// Entry is the structure for packing and unpacking the information read from
|
||||
// the Tar archive.
|
||||
//
|
||||
// FileType Payload checksum is using `hash/crc64` for basic file integrity,
|
||||
|
@ -34,6 +34,6 @@ type Entry struct {
|
|||
Type Type `json:"type"`
|
||||
Name string `json:"name",omitempty`
|
||||
Size int64 `json:"size",omitempty`
|
||||
Payload []byte `json:"payload"` // SegmentType store payload here; FileType store crc64 checksum here;
|
||||
Payload []byte `json:"payload"` // SegmentType stores payload here; FileType stores crc64 checksum here;
|
||||
Position int `json:"position"`
|
||||
}
|
||||
|
|
|
@ -10,9 +10,9 @@ import (
|
|||
"path/filepath"
|
||||
)
|
||||
|
||||
// FileGetter is the interface for getting a stream of a file payload, address
|
||||
// by name/filename. Presumably, the names will be scoped to relative file
|
||||
// paths.
|
||||
// FileGetter is the interface for getting a stream of a file payload,
|
||||
// addressed by name/filename. Presumably, the names will be scoped to relative
|
||||
// file paths.
|
||||
type FileGetter interface {
|
||||
// Get returns a stream for the provided file path
|
||||
Get(filename string) (output io.ReadCloser, err error)
|
||||
|
@ -77,7 +77,7 @@ type readCloserWrapper struct {
|
|||
|
||||
func (w *readCloserWrapper) Close() error { return nil }
|
||||
|
||||
// NewBufferFileGetPutter is simple in memory FileGetPutter
|
||||
// NewBufferFileGetPutter is a simple in-memory FileGetPutter
|
||||
//
|
||||
// Implication is this is memory intensive...
|
||||
// Probably best for testing or light weight cases.
|
||||
|
|
|
@ -8,8 +8,8 @@ import (
|
|||
"path/filepath"
|
||||
)
|
||||
|
||||
// ErrDuplicatePath is occured when a tar archive has more than one entry for
|
||||
// the same file path
|
||||
// ErrDuplicatePath occurs when a tar archive has more than one entry for the
|
||||
// same file path
|
||||
var ErrDuplicatePath = errors.New("duplicates of file paths not supported")
|
||||
|
||||
// Packer describes the methods to pack Entries to a storage destination
|
||||
|
@ -65,7 +65,7 @@ func (jup *jsonUnpacker) Next() (*Entry, error) {
|
|||
if _, ok := jup.seen[cName]; ok {
|
||||
return nil, ErrDuplicatePath
|
||||
}
|
||||
jup.seen[cName] = emptyByte
|
||||
jup.seen[cName] = struct{}{}
|
||||
}
|
||||
|
||||
return &e, err
|
||||
|
@ -90,11 +90,7 @@ type jsonPacker struct {
|
|||
seen seenNames
|
||||
}
|
||||
|
||||
type seenNames map[string]byte
|
||||
|
||||
// used in the seenNames map. byte is a uint8, and we'll re-use the same one
|
||||
// for minimalism.
|
||||
const emptyByte byte = 0
|
||||
type seenNames map[string]struct{}
|
||||
|
||||
func (jp *jsonPacker) AddEntry(e Entry) (int, error) {
|
||||
// check early for dup name
|
||||
|
@ -103,7 +99,7 @@ func (jp *jsonPacker) AddEntry(e Entry) (int, error) {
|
|||
if _, ok := jp.seen[cName]; ok {
|
||||
return -1, ErrDuplicatePath
|
||||
}
|
||||
jp.seen[cName] = emptyByte
|
||||
jp.seen[cName] = struct{}{}
|
||||
}
|
||||
|
||||
e.Position = jp.pos
|
||||
|
@ -117,7 +113,7 @@ func (jp *jsonPacker) AddEntry(e Entry) (int, error) {
|
|||
return e.Position, nil
|
||||
}
|
||||
|
||||
// NewJSONPacker provides an Packer that writes each Entry (SegmentType and
|
||||
// NewJSONPacker provides a Packer that writes each Entry (SegmentType and
|
||||
// FileType) as a json document.
|
||||
//
|
||||
// The Entries are delimited by new line.
|
||||
|
|
Loading…
Reference in a new issue