mirror of
https://github.com/vbatts/tar-split.git
synced 2024-11-21 15:45:39 +00:00
Add tar/asm.IterateHeaders
This allows reading the metadata contained in tar-split without expensively recreating the whole tar stream including full contents. We have two use cases for this: - In a situation where tar-split is distributed along with a separate metadata stream, ensuring that the two are exactly consistent - Reading the tar headers allows making a ~cheap check of consistency of on-disk layers, just checking that the files exist in expected sizes, without reading the full contents. This can be implemented outside of this repo, but it's not ideal: - The function necessarily hard-codes some assumptions about how tar-split determines the boundaries of SegmentType/FileType entries (or, indeed, whether it uses FileType entries at all). That's best maintained directly beside the code that creates this. - The ExpectedPadding() value is not currently exported, so the consumer would have to heuristically guess where the padding ends. Signed-off-by: Miloslav Trmač <mitr@redhat.com>
This commit is contained in:
parent
fe4605ae8b
commit
99c8914877
5 changed files with 190 additions and 1 deletions
|
@ -56,6 +56,11 @@ func (tr *Reader) RawBytes() []byte {
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// ExpectedPadding returns the number of bytes of padding expected after the last header returned by Next()
|
||||||
|
func (tr *Reader) ExpectedPadding() int64 {
|
||||||
|
return tr.pad
|
||||||
|
}
|
||||||
|
|
||||||
// NewReader creates a new Reader reading from r.
|
// NewReader creates a new Reader reading from r.
|
||||||
func NewReader(r io.Reader) *Reader {
|
func NewReader(r io.Reader) *Reader {
|
||||||
return &Reader{r: r, curr: ®FileReader{r, 0}}
|
return &Reader{r: r, curr: ®FileReader{r, 0}}
|
||||||
|
|
4
go.mod
4
go.mod
|
@ -6,13 +6,17 @@ require (
|
||||||
github.com/fatih/color v1.15.0
|
github.com/fatih/color v1.15.0
|
||||||
github.com/magefile/mage v1.14.0
|
github.com/magefile/mage v1.14.0
|
||||||
github.com/sirupsen/logrus v1.9.0
|
github.com/sirupsen/logrus v1.9.0
|
||||||
|
github.com/stretchr/testify v1.9.0
|
||||||
github.com/urfave/cli v1.22.12
|
github.com/urfave/cli v1.22.12
|
||||||
)
|
)
|
||||||
|
|
||||||
require (
|
require (
|
||||||
github.com/cpuguy83/go-md2man/v2 v2.0.2 // indirect
|
github.com/cpuguy83/go-md2man/v2 v2.0.2 // indirect
|
||||||
|
github.com/davecgh/go-spew v1.1.1 // indirect
|
||||||
github.com/mattn/go-colorable v0.1.13 // indirect
|
github.com/mattn/go-colorable v0.1.13 // indirect
|
||||||
github.com/mattn/go-isatty v0.0.17 // indirect
|
github.com/mattn/go-isatty v0.0.17 // indirect
|
||||||
|
github.com/pmezard/go-difflib v1.0.0 // indirect
|
||||||
github.com/russross/blackfriday/v2 v2.1.0 // indirect
|
github.com/russross/blackfriday/v2 v2.1.0 // indirect
|
||||||
golang.org/x/sys v0.6.0 // indirect
|
golang.org/x/sys v0.6.0 // indirect
|
||||||
|
gopkg.in/yaml.v3 v3.0.1 // indirect
|
||||||
)
|
)
|
||||||
|
|
6
go.sum
6
go.sum
|
@ -22,17 +22,21 @@ github.com/sirupsen/logrus v1.9.0/go.mod h1:naHLuLoDiP4jHNo9R0sCBMtWGeIprob74mVs
|
||||||
github.com/stretchr/objx v0.1.0/go.mod h1:HFkY916IF+rwdDfMAkV7OtwuqBVzrE8GR6GFx+wExME=
|
github.com/stretchr/objx v0.1.0/go.mod h1:HFkY916IF+rwdDfMAkV7OtwuqBVzrE8GR6GFx+wExME=
|
||||||
github.com/stretchr/objx v0.4.0/go.mod h1:YvHI0jy2hoMjB+UWwv71VJQ9isScKT/TqJzVSSt89Yw=
|
github.com/stretchr/objx v0.4.0/go.mod h1:YvHI0jy2hoMjB+UWwv71VJQ9isScKT/TqJzVSSt89Yw=
|
||||||
github.com/stretchr/objx v0.5.0/go.mod h1:Yh+to48EsGEfYuaHDzXPcE3xhTkx73EhmCGUpEOglKo=
|
github.com/stretchr/objx v0.5.0/go.mod h1:Yh+to48EsGEfYuaHDzXPcE3xhTkx73EhmCGUpEOglKo=
|
||||||
|
github.com/stretchr/objx v0.5.2/go.mod h1:FRsXN1f5AsAjCGJKqEizvkpNtU+EGNCLh3NxZ/8L+MA=
|
||||||
github.com/stretchr/testify v1.7.0/go.mod h1:6Fq8oRcR53rry900zMqJjRRixrwX3KX962/h/Wwjteg=
|
github.com/stretchr/testify v1.7.0/go.mod h1:6Fq8oRcR53rry900zMqJjRRixrwX3KX962/h/Wwjteg=
|
||||||
github.com/stretchr/testify v1.7.1/go.mod h1:6Fq8oRcR53rry900zMqJjRRixrwX3KX962/h/Wwjteg=
|
github.com/stretchr/testify v1.7.1/go.mod h1:6Fq8oRcR53rry900zMqJjRRixrwX3KX962/h/Wwjteg=
|
||||||
github.com/stretchr/testify v1.8.0/go.mod h1:yNjHg4UonilssWZ8iaSj1OCr/vHnekPRkoO+kdMU+MU=
|
github.com/stretchr/testify v1.8.0/go.mod h1:yNjHg4UonilssWZ8iaSj1OCr/vHnekPRkoO+kdMU+MU=
|
||||||
github.com/stretchr/testify v1.8.1 h1:w7B6lhMri9wdJUVmEZPGGhZzrYTPvgJArz7wNPgYKsk=
|
|
||||||
github.com/stretchr/testify v1.8.1/go.mod h1:w2LPCIKwWwSfY2zedu0+kehJoqGctiVI29o6fzry7u4=
|
github.com/stretchr/testify v1.8.1/go.mod h1:w2LPCIKwWwSfY2zedu0+kehJoqGctiVI29o6fzry7u4=
|
||||||
|
github.com/stretchr/testify v1.8.4/go.mod h1:sz/lmYIOXD/1dqDmKjjqLyZ2RngseejIcXlSw2iwfAo=
|
||||||
|
github.com/stretchr/testify v1.9.0 h1:HtqpIVDClZ4nwg75+f6Lvsy/wHu+3BoSGCbBAcpTsTg=
|
||||||
|
github.com/stretchr/testify v1.9.0/go.mod h1:r2ic/lqez/lEtzL7wO/rwa5dbSLXVDPFyf8C91i36aY=
|
||||||
github.com/urfave/cli v1.22.12 h1:igJgVw1JdKH+trcLWLeLwZjU9fEfPesQ+9/e4MQ44S8=
|
github.com/urfave/cli v1.22.12 h1:igJgVw1JdKH+trcLWLeLwZjU9fEfPesQ+9/e4MQ44S8=
|
||||||
github.com/urfave/cli v1.22.12/go.mod h1:sSBEIC79qR6OvcmsD4U3KABeOTxDqQtdDnaFuUN30b8=
|
github.com/urfave/cli v1.22.12/go.mod h1:sSBEIC79qR6OvcmsD4U3KABeOTxDqQtdDnaFuUN30b8=
|
||||||
golang.org/x/sys v0.0.0-20220715151400-c0bba94af5f8/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
|
golang.org/x/sys v0.0.0-20220715151400-c0bba94af5f8/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
|
||||||
golang.org/x/sys v0.0.0-20220811171246-fbc7d0a398ab/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
|
golang.org/x/sys v0.0.0-20220811171246-fbc7d0a398ab/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
|
||||||
golang.org/x/sys v0.6.0 h1:MVltZSvRTcU2ljQOhs94SXPftV6DCNnZViHeQps87pQ=
|
golang.org/x/sys v0.6.0 h1:MVltZSvRTcU2ljQOhs94SXPftV6DCNnZViHeQps87pQ=
|
||||||
golang.org/x/sys v0.6.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
|
golang.org/x/sys v0.6.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
|
||||||
|
gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405 h1:yhCVgyC4o1eVCa2tZl7eS0r+SDo693bJlVdllGtEeKM=
|
||||||
gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0=
|
gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0=
|
||||||
gopkg.in/yaml.v2 v2.4.0/go.mod h1:RDklbk79AGWmwhnvt/jBztapEOGDOx6ZbXqjP6csGnQ=
|
gopkg.in/yaml.v2 v2.4.0/go.mod h1:RDklbk79AGWmwhnvt/jBztapEOGDOx6ZbXqjP6csGnQ=
|
||||||
gopkg.in/yaml.v3 v3.0.0-20200313102051-9f266ea9e77c/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM=
|
gopkg.in/yaml.v3 v3.0.0-20200313102051-9f266ea9e77c/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM=
|
||||||
|
|
57
tar/asm/iterate.go
Normal file
57
tar/asm/iterate.go
Normal file
|
@ -0,0 +1,57 @@
|
||||||
|
package asm
|
||||||
|
|
||||||
|
import (
|
||||||
|
"bytes"
|
||||||
|
"fmt"
|
||||||
|
"io"
|
||||||
|
|
||||||
|
"github.com/vbatts/tar-split/archive/tar"
|
||||||
|
"github.com/vbatts/tar-split/tar/storage"
|
||||||
|
)
|
||||||
|
|
||||||
|
// IterateHeaders calls handler for each tar header provided by Unpacker
|
||||||
|
func IterateHeaders(unpacker storage.Unpacker, handler func(hdr *tar.Header) error) error {
|
||||||
|
// We assume about NewInputTarStream:
|
||||||
|
// - There is a separate SegmentType entry for every tar header, but only one SegmentType entry for the full header incl. any extensions
|
||||||
|
// - (There is a FileType entry for every tar header, we ignore it)
|
||||||
|
// - Trailing padding of a file, if any, is included in the next SegmentType entry
|
||||||
|
// - At the end, there may be SegmentType entries just for the terminating zero blocks.
|
||||||
|
|
||||||
|
var pendingPadding int64 = 0
|
||||||
|
for {
|
||||||
|
tsEntry, err := unpacker.Next()
|
||||||
|
if err != nil {
|
||||||
|
if err == io.EOF {
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
return fmt.Errorf("reading tar-split entries: %w", err)
|
||||||
|
}
|
||||||
|
switch tsEntry.Type {
|
||||||
|
case storage.SegmentType:
|
||||||
|
payload := tsEntry.Payload
|
||||||
|
if int64(len(payload)) < pendingPadding {
|
||||||
|
return fmt.Errorf("expected %d bytes of padding after previous file, but next SegmentType only has %d bytes", pendingPadding, len(payload))
|
||||||
|
}
|
||||||
|
payload = payload[pendingPadding:]
|
||||||
|
pendingPadding = 0
|
||||||
|
|
||||||
|
tr := tar.NewReader(bytes.NewReader(payload))
|
||||||
|
hdr, err := tr.Next()
|
||||||
|
if err != nil {
|
||||||
|
if err == io.EOF { // Probably the last entry, but let’s let the unpacker drive that.
|
||||||
|
break
|
||||||
|
}
|
||||||
|
return fmt.Errorf("decoding a tar header from a tar-split entry: %w", err)
|
||||||
|
}
|
||||||
|
if err := handler(hdr); err != nil {
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
pendingPadding = tr.ExpectedPadding()
|
||||||
|
|
||||||
|
case storage.FileType:
|
||||||
|
// Nothing
|
||||||
|
default:
|
||||||
|
return fmt.Errorf("unexpected tar-split entry type %q", tsEntry.Type)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
119
tar/asm/iterate_test.go
Normal file
119
tar/asm/iterate_test.go
Normal file
|
@ -0,0 +1,119 @@
|
||||||
|
package asm
|
||||||
|
|
||||||
|
import (
|
||||||
|
"bytes"
|
||||||
|
"fmt"
|
||||||
|
"io"
|
||||||
|
"testing"
|
||||||
|
"time"
|
||||||
|
|
||||||
|
"github.com/stretchr/testify/assert"
|
||||||
|
"github.com/stretchr/testify/require"
|
||||||
|
"github.com/vbatts/tar-split/archive/tar"
|
||||||
|
"github.com/vbatts/tar-split/tar/storage"
|
||||||
|
)
|
||||||
|
|
||||||
|
func createTestTarheader(index int, typeFlag byte, size int64) tar.Header {
|
||||||
|
n := (index + 1) * 100 // Use predictable, but distinct, values for all headers
|
||||||
|
|
||||||
|
res := tar.Header{
|
||||||
|
Typeflag: typeFlag,
|
||||||
|
Name: fmt.Sprintf("name%d", n),
|
||||||
|
Size: size,
|
||||||
|
Mode: int64(n + 1),
|
||||||
|
Uid: n + 2,
|
||||||
|
Gid: n + 3,
|
||||||
|
Uname: fmt.Sprintf("user%d", n),
|
||||||
|
Gname: fmt.Sprintf("group%d", n),
|
||||||
|
ModTime: time.Unix(int64(n+4), 0),
|
||||||
|
AccessTime: time.Unix(int64(n+5), 0),
|
||||||
|
ChangeTime: time.Unix(int64(n+6), 0),
|
||||||
|
PAXRecords: map[string]string{fmt.Sprintf("key%d", n): fmt.Sprintf("value%d", n)},
|
||||||
|
Format: tar.FormatPAX, // We must set a format, in the default one AccessTime and ChangeTime are discarded.
|
||||||
|
}
|
||||||
|
switch res.Typeflag {
|
||||||
|
case tar.TypeLink, tar.TypeSymlink:
|
||||||
|
res.Linkname = fmt.Sprintf("link%d", n)
|
||||||
|
case tar.TypeChar, tar.TypeBlock:
|
||||||
|
res.Devmajor = int64(n + 7)
|
||||||
|
res.Devminor = int64(n + 8)
|
||||||
|
}
|
||||||
|
return res
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestIterateHeaders(t *testing.T) {
|
||||||
|
entries := []struct {
|
||||||
|
typeFlag byte
|
||||||
|
size int64
|
||||||
|
}{
|
||||||
|
{tar.TypeReg, 0},
|
||||||
|
{tar.TypeReg, 1},
|
||||||
|
{tar.TypeReg, 511},
|
||||||
|
{tar.TypeReg, 512},
|
||||||
|
{tar.TypeReg, 513},
|
||||||
|
{tar.TypeLink, 0},
|
||||||
|
{tar.TypeSymlink, 0},
|
||||||
|
{tar.TypeChar, 0},
|
||||||
|
{tar.TypeBlock, 0},
|
||||||
|
{tar.TypeDir, 0},
|
||||||
|
{tar.TypeFifo, 0},
|
||||||
|
}
|
||||||
|
|
||||||
|
var tarball bytes.Buffer
|
||||||
|
var expected []tar.Header
|
||||||
|
w := tar.NewWriter(&tarball)
|
||||||
|
for i, e := range entries {
|
||||||
|
hdr := createTestTarheader(i, e.typeFlag, e.size)
|
||||||
|
err := w.WriteHeader(&hdr)
|
||||||
|
require.NoError(t, err)
|
||||||
|
data := make([]byte, e.size)
|
||||||
|
_, err = w.Write(data)
|
||||||
|
require.NoError(t, err)
|
||||||
|
expected = append(expected, hdr)
|
||||||
|
}
|
||||||
|
err := w.Close()
|
||||||
|
require.NoError(t, err)
|
||||||
|
|
||||||
|
var tarSplit bytes.Buffer
|
||||||
|
tsReader, err := NewInputTarStream(&tarball, storage.NewJSONPacker(&tarSplit), storage.NewDiscardFilePutter())
|
||||||
|
require.NoError(t, err)
|
||||||
|
_, err = io.Copy(io.Discard, tsReader)
|
||||||
|
require.NoError(t, err)
|
||||||
|
|
||||||
|
unpacker := storage.NewJSONUnpacker(&tarSplit)
|
||||||
|
var actual []tar.Header
|
||||||
|
err = IterateHeaders(unpacker, func(hdr *tar.Header) error {
|
||||||
|
actual = append(actual, *hdr)
|
||||||
|
return nil
|
||||||
|
})
|
||||||
|
require.NoError(t, err)
|
||||||
|
|
||||||
|
assert.Equal(t, len(expected), len(actual))
|
||||||
|
for i := range expected {
|
||||||
|
expected := &expected[i]
|
||||||
|
actual := &actual[i]
|
||||||
|
|
||||||
|
assert.Equal(t, expected.Typeflag, actual.Typeflag)
|
||||||
|
assert.Equal(t, expected.Name, actual.Name)
|
||||||
|
assert.Equal(t, expected.Linkname, actual.Linkname)
|
||||||
|
assert.Equal(t, expected.Size, actual.Size)
|
||||||
|
assert.Equal(t, expected.Mode, actual.Mode)
|
||||||
|
assert.Equal(t, expected.Uid, actual.Uid)
|
||||||
|
assert.Equal(t, expected.Gid, actual.Gid)
|
||||||
|
assert.Equal(t, expected.Uname, actual.Uname)
|
||||||
|
assert.Equal(t, expected.Gname, actual.Gname)
|
||||||
|
assert.True(t, actual.ModTime.Equal(expected.ModTime))
|
||||||
|
assert.True(t, actual.AccessTime.Equal(expected.AccessTime))
|
||||||
|
assert.True(t, actual.ChangeTime.Equal(expected.ChangeTime))
|
||||||
|
assert.Equal(t, expected.Devmajor, actual.Devmajor)
|
||||||
|
assert.Equal(t, expected.Devminor, actual.Devminor)
|
||||||
|
assert.Equal(t, expected.Xattrs, actual.Xattrs) //nolint:staticcheck // We do want a comprehensive coverage in this test.
|
||||||
|
// We can’t compare PAXRecords for complete equality, because tar.Writer adds atime and ctime entries. So ensure all expected records are present.
|
||||||
|
for k, v := range expected.PAXRecords {
|
||||||
|
v2, ok := actual.PAXRecords[k]
|
||||||
|
assert.True(t, ok, k)
|
||||||
|
assert.Equal(t, v, v2)
|
||||||
|
}
|
||||||
|
assert.Equal(t, expected.Format, actual.Format)
|
||||||
|
}
|
||||||
|
}
|
Loading…
Reference in a new issue