2015-03-02 20:25:03 +00:00
|
|
|
package asm
|
|
|
|
|
|
|
|
import (
|
|
|
|
"io"
|
|
|
|
|
|
|
|
"github.com/vbatts/tar-split/archive/tar"
|
|
|
|
"github.com/vbatts/tar-split/tar/storage"
|
|
|
|
)
|
|
|
|
|
2015-03-02 21:49:53 +00:00
|
|
|
// NewInputTarStream wraps the Reader stream of a tar archive and provides a
|
|
|
|
// Reader stream of the same.
|
|
|
|
//
|
|
|
|
// In the middle it will pack the segments and file metadata to storage.Packer
|
|
|
|
// `p`.
|
|
|
|
//
|
2015-03-09 17:56:45 +00:00
|
|
|
// The the storage.FilePutter is where payload of files in the stream are
|
|
|
|
// stashed. If this stashing is not needed, you can provide a nil
|
|
|
|
// storage.FilePutter. Since the checksumming is still needed, then a default
|
|
|
|
// of NewDiscardFilePutter will be used internally
|
2015-03-09 17:20:26 +00:00
|
|
|
func NewInputTarStream(r io.Reader, p storage.Packer, fp storage.FilePutter) (io.Reader, error) {
|
2015-03-02 20:25:03 +00:00
|
|
|
// What to do here... folks will want their own access to the Reader that is
|
|
|
|
// their tar archive stream, but we'll need that same stream to use our
|
|
|
|
// forked 'archive/tar'.
|
2015-06-23 20:13:29 +00:00
|
|
|
// Perhaps do an io.TeeReader that hands back an io.Reader for them to read
|
|
|
|
// from, and we'll MITM the stream to store metadata.
|
2015-03-09 17:20:26 +00:00
|
|
|
// We'll need a storage.FilePutter too ...
|
2015-03-02 20:25:03 +00:00
|
|
|
|
2015-03-09 17:20:26 +00:00
|
|
|
// Another concern, whether to do any storage.FilePutter operations, such that we
|
2015-03-02 20:25:03 +00:00
|
|
|
// don't extract any amount of the archive. But then again, we're not making
|
2015-03-09 17:20:26 +00:00
|
|
|
// files/directories, hardlinks, etc. Just writing the io to the storage.FilePutter.
|
2015-03-02 20:25:03 +00:00
|
|
|
// Perhaps we have a DiscardFilePutter that is a bit bucket.
|
|
|
|
|
|
|
|
// we'll return the pipe reader, since TeeReader does not buffer and will
|
2015-06-23 20:13:29 +00:00
|
|
|
// only read what the outputRdr Read's. Since Tar archives have padding on
|
2015-03-02 20:25:03 +00:00
|
|
|
// the end, we want to be the one reading the padding, even if the user's
|
|
|
|
// `archive/tar` doesn't care.
|
|
|
|
pR, pW := io.Pipe()
|
|
|
|
outputRdr := io.TeeReader(r, pW)
|
|
|
|
|
2015-03-04 22:14:43 +00:00
|
|
|
// we need a putter that will generate the crc64 sums of file payloads
|
2015-03-03 19:23:04 +00:00
|
|
|
if fp == nil {
|
2015-03-09 17:20:26 +00:00
|
|
|
fp = storage.NewDiscardFilePutter()
|
2015-03-03 19:23:04 +00:00
|
|
|
}
|
|
|
|
|
2015-03-02 21:49:53 +00:00
|
|
|
go func() {
|
|
|
|
tr := tar.NewReader(outputRdr)
|
|
|
|
tr.RawAccounting = true
|
|
|
|
for {
|
|
|
|
hdr, err := tr.Next()
|
|
|
|
if err != nil {
|
|
|
|
if err != io.EOF {
|
|
|
|
pW.CloseWithError(err)
|
|
|
|
return
|
|
|
|
}
|
|
|
|
// even when an EOF is reached, there is often 1024 null bytes on
|
|
|
|
// the end of an archive. Collect them too.
|
2015-08-11 19:51:19 +00:00
|
|
|
if b := tr.RawBytes(); len(b) > 0 {
|
|
|
|
_, err := p.AddEntry(storage.Entry{
|
|
|
|
Type: storage.SegmentType,
|
|
|
|
Payload: b,
|
|
|
|
})
|
|
|
|
if err != nil {
|
|
|
|
pW.CloseWithError(err)
|
|
|
|
return
|
|
|
|
}
|
2015-03-02 21:49:53 +00:00
|
|
|
}
|
2015-03-04 22:14:43 +00:00
|
|
|
break // not return. We need the end of the reader.
|
2015-03-02 21:49:53 +00:00
|
|
|
}
|
2015-06-23 16:23:36 +00:00
|
|
|
if hdr == nil {
|
|
|
|
break // not return. We need the end of the reader.
|
|
|
|
}
|
2015-03-02 21:49:53 +00:00
|
|
|
|
2015-08-11 19:51:19 +00:00
|
|
|
if b := tr.RawBytes(); len(b) > 0 {
|
|
|
|
_, err := p.AddEntry(storage.Entry{
|
|
|
|
Type: storage.SegmentType,
|
|
|
|
Payload: b,
|
|
|
|
})
|
|
|
|
if err != nil {
|
|
|
|
pW.CloseWithError(err)
|
|
|
|
return
|
|
|
|
}
|
2015-03-02 21:49:53 +00:00
|
|
|
}
|
|
|
|
|
2015-03-06 21:30:48 +00:00
|
|
|
var csum []byte
|
2015-03-02 21:49:53 +00:00
|
|
|
if hdr.Size > 0 {
|
2015-06-21 18:14:05 +00:00
|
|
|
var err error
|
|
|
|
_, csum, err = fp.Put(hdr.Name, tr)
|
|
|
|
if err != nil {
|
2015-03-03 19:23:04 +00:00
|
|
|
pW.CloseWithError(err)
|
2015-07-21 16:08:57 +00:00
|
|
|
return
|
2015-03-02 21:49:53 +00:00
|
|
|
}
|
|
|
|
}
|
2015-03-06 21:30:48 +00:00
|
|
|
|
2015-09-23 19:24:15 +00:00
|
|
|
entry := storage.Entry{
|
2015-03-03 19:23:04 +00:00
|
|
|
Type: storage.FileType,
|
|
|
|
Size: hdr.Size,
|
2015-03-06 21:30:48 +00:00
|
|
|
Payload: csum,
|
2015-09-23 19:24:15 +00:00
|
|
|
}
|
|
|
|
// For proper marshalling of non-utf8 characters
|
|
|
|
entry.SetName(hdr.Name)
|
|
|
|
|
|
|
|
// File entries added, regardless of size
|
|
|
|
_, err = p.AddEntry(entry)
|
2015-03-04 22:14:43 +00:00
|
|
|
if err != nil {
|
2015-03-02 21:49:53 +00:00
|
|
|
pW.CloseWithError(err)
|
2015-07-21 16:08:57 +00:00
|
|
|
return
|
2015-03-02 21:49:53 +00:00
|
|
|
}
|
|
|
|
|
2015-03-05 19:09:17 +00:00
|
|
|
if b := tr.RawBytes(); len(b) > 0 {
|
|
|
|
_, err = p.AddEntry(storage.Entry{
|
|
|
|
Type: storage.SegmentType,
|
|
|
|
Payload: b,
|
|
|
|
})
|
|
|
|
if err != nil {
|
|
|
|
pW.CloseWithError(err)
|
2015-07-21 16:08:57 +00:00
|
|
|
return
|
2015-03-05 19:09:17 +00:00
|
|
|
}
|
2015-03-02 21:49:53 +00:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2017-11-07 13:05:46 +00:00
|
|
|
// It is allowable, and not uncommon that there is further padding on
|
|
|
|
// the end of an archive, apart from the expected 1024 null bytes. We
|
|
|
|
// do this in chunks rather than in one go to avoid cases where a
|
|
|
|
// maliciously crafted tar file tries to trick us into reading many GBs
|
|
|
|
// into memory.
|
|
|
|
const paddingChunkSize = 1024 * 1024
|
|
|
|
var paddingChunk [paddingChunkSize]byte
|
|
|
|
for {
|
|
|
|
var isEOF bool
|
|
|
|
n, err := outputRdr.Read(paddingChunk[:])
|
|
|
|
if err != nil {
|
|
|
|
if err != io.EOF {
|
|
|
|
pW.CloseWithError(err)
|
|
|
|
return
|
|
|
|
}
|
|
|
|
isEOF = true
|
|
|
|
}
|
2023-07-22 00:35:45 +00:00
|
|
|
if n != 0 {
|
|
|
|
_, err = p.AddEntry(storage.Entry{
|
|
|
|
Type: storage.SegmentType,
|
|
|
|
Payload: paddingChunk[:n],
|
|
|
|
})
|
|
|
|
if err != nil {
|
|
|
|
pW.CloseWithError(err)
|
|
|
|
return
|
|
|
|
}
|
2017-11-07 13:05:46 +00:00
|
|
|
}
|
|
|
|
if isEOF {
|
|
|
|
break
|
|
|
|
}
|
2015-03-02 21:49:53 +00:00
|
|
|
}
|
2015-07-21 16:08:57 +00:00
|
|
|
pW.Close()
|
2015-03-02 21:49:53 +00:00
|
|
|
}()
|
2015-03-02 20:25:03 +00:00
|
|
|
|
|
|
|
return pR, nil
|
|
|
|
}
|