mirror of
				https://github.com/vbatts/tar-split.git
				synced 2025-10-26 08:30:56 +00:00 
			
		
		
		
	Add tar/asm.IterateHeaders
This allows reading the metadata contained in tar-split without expensively recreating the whole tar stream including full contents. We have two use cases for this: - In a situation where tar-split is distributed along with a separate metadata stream, ensuring that the two are exactly consistent - Reading the tar headers allows making a ~cheap check of consistency of on-disk layers, just checking that the files exist in expected sizes, without reading the full contents. This can be implemented outside of this repo, but it's not ideal: - The function necessarily hard-codes some assumptions about how tar-split determines the boundaries of SegmentType/FileType entries (or, indeed, whether it uses FileType entries at all). That's best maintained directly beside the code that creates this. - The ExpectedPadding() value is not currently exported, so the consumer would have to heuristically guess where the padding ends. Signed-off-by: Miloslav Trmač <mitr@redhat.com>
This commit is contained in:
		
							parent
							
								
									fe4605ae8b
								
							
						
					
					
						commit
						99c8914877
					
				
					 5 changed files with 190 additions and 1 deletions
				
			
		|  | @ -56,6 +56,11 @@ func (tr *Reader) RawBytes() []byte { | |||
| 
 | ||||
| } | ||||
| 
 | ||||
| // ExpectedPadding returns the number of bytes of padding expected after the last header returned by Next() | ||||
| func (tr *Reader) ExpectedPadding() int64 { | ||||
| 	return tr.pad | ||||
| } | ||||
| 
 | ||||
| // NewReader creates a new Reader reading from r. | ||||
| func NewReader(r io.Reader) *Reader { | ||||
| 	return &Reader{r: r, curr: ®FileReader{r, 0}} | ||||
|  |  | |||
							
								
								
									
										4
									
								
								go.mod
									
										
									
									
									
								
							
							
						
						
									
										4
									
								
								go.mod
									
										
									
									
									
								
							|  | @ -6,13 +6,17 @@ require ( | |||
| 	github.com/fatih/color v1.15.0 | ||||
| 	github.com/magefile/mage v1.14.0 | ||||
| 	github.com/sirupsen/logrus v1.9.0 | ||||
| 	github.com/stretchr/testify v1.9.0 | ||||
| 	github.com/urfave/cli v1.22.12 | ||||
| ) | ||||
| 
 | ||||
| require ( | ||||
| 	github.com/cpuguy83/go-md2man/v2 v2.0.2 // indirect | ||||
| 	github.com/davecgh/go-spew v1.1.1 // indirect | ||||
| 	github.com/mattn/go-colorable v0.1.13 // indirect | ||||
| 	github.com/mattn/go-isatty v0.0.17 // indirect | ||||
| 	github.com/pmezard/go-difflib v1.0.0 // indirect | ||||
| 	github.com/russross/blackfriday/v2 v2.1.0 // indirect | ||||
| 	golang.org/x/sys v0.6.0 // indirect | ||||
| 	gopkg.in/yaml.v3 v3.0.1 // indirect | ||||
| ) | ||||
|  |  | |||
							
								
								
									
										6
									
								
								go.sum
									
										
									
									
									
								
							
							
						
						
									
										6
									
								
								go.sum
									
										
									
									
									
								
							|  | @ -22,17 +22,21 @@ github.com/sirupsen/logrus v1.9.0/go.mod h1:naHLuLoDiP4jHNo9R0sCBMtWGeIprob74mVs | |||
| github.com/stretchr/objx v0.1.0/go.mod h1:HFkY916IF+rwdDfMAkV7OtwuqBVzrE8GR6GFx+wExME= | ||||
| github.com/stretchr/objx v0.4.0/go.mod h1:YvHI0jy2hoMjB+UWwv71VJQ9isScKT/TqJzVSSt89Yw= | ||||
| github.com/stretchr/objx v0.5.0/go.mod h1:Yh+to48EsGEfYuaHDzXPcE3xhTkx73EhmCGUpEOglKo= | ||||
| github.com/stretchr/objx v0.5.2/go.mod h1:FRsXN1f5AsAjCGJKqEizvkpNtU+EGNCLh3NxZ/8L+MA= | ||||
| github.com/stretchr/testify v1.7.0/go.mod h1:6Fq8oRcR53rry900zMqJjRRixrwX3KX962/h/Wwjteg= | ||||
| github.com/stretchr/testify v1.7.1/go.mod h1:6Fq8oRcR53rry900zMqJjRRixrwX3KX962/h/Wwjteg= | ||||
| github.com/stretchr/testify v1.8.0/go.mod h1:yNjHg4UonilssWZ8iaSj1OCr/vHnekPRkoO+kdMU+MU= | ||||
| github.com/stretchr/testify v1.8.1 h1:w7B6lhMri9wdJUVmEZPGGhZzrYTPvgJArz7wNPgYKsk= | ||||
| github.com/stretchr/testify v1.8.1/go.mod h1:w2LPCIKwWwSfY2zedu0+kehJoqGctiVI29o6fzry7u4= | ||||
| github.com/stretchr/testify v1.8.4/go.mod h1:sz/lmYIOXD/1dqDmKjjqLyZ2RngseejIcXlSw2iwfAo= | ||||
| github.com/stretchr/testify v1.9.0 h1:HtqpIVDClZ4nwg75+f6Lvsy/wHu+3BoSGCbBAcpTsTg= | ||||
| github.com/stretchr/testify v1.9.0/go.mod h1:r2ic/lqez/lEtzL7wO/rwa5dbSLXVDPFyf8C91i36aY= | ||||
| github.com/urfave/cli v1.22.12 h1:igJgVw1JdKH+trcLWLeLwZjU9fEfPesQ+9/e4MQ44S8= | ||||
| github.com/urfave/cli v1.22.12/go.mod h1:sSBEIC79qR6OvcmsD4U3KABeOTxDqQtdDnaFuUN30b8= | ||||
| golang.org/x/sys v0.0.0-20220715151400-c0bba94af5f8/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= | ||||
| golang.org/x/sys v0.0.0-20220811171246-fbc7d0a398ab/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= | ||||
| golang.org/x/sys v0.6.0 h1:MVltZSvRTcU2ljQOhs94SXPftV6DCNnZViHeQps87pQ= | ||||
| golang.org/x/sys v0.6.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= | ||||
| gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405 h1:yhCVgyC4o1eVCa2tZl7eS0r+SDo693bJlVdllGtEeKM= | ||||
| gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0= | ||||
| gopkg.in/yaml.v2 v2.4.0/go.mod h1:RDklbk79AGWmwhnvt/jBztapEOGDOx6ZbXqjP6csGnQ= | ||||
| gopkg.in/yaml.v3 v3.0.0-20200313102051-9f266ea9e77c/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM= | ||||
|  |  | |||
							
								
								
									
										57
									
								
								tar/asm/iterate.go
									
										
									
									
									
										Normal file
									
								
							
							
						
						
									
										57
									
								
								tar/asm/iterate.go
									
										
									
									
									
										Normal file
									
								
							|  | @ -0,0 +1,57 @@ | |||
| package asm | ||||
| 
 | ||||
| import ( | ||||
| 	"bytes" | ||||
| 	"fmt" | ||||
| 	"io" | ||||
| 
 | ||||
| 	"github.com/vbatts/tar-split/archive/tar" | ||||
| 	"github.com/vbatts/tar-split/tar/storage" | ||||
| ) | ||||
| 
 | ||||
| // IterateHeaders calls handler for each tar header provided by Unpacker | ||||
| func IterateHeaders(unpacker storage.Unpacker, handler func(hdr *tar.Header) error) error { | ||||
| 	// We assume about NewInputTarStream: | ||||
| 	// - There is a separate SegmentType entry for every tar header, but only one SegmentType entry for the full header incl. any extensions | ||||
| 	// - (There is a FileType entry for every tar header, we ignore it) | ||||
| 	// - Trailing padding of a file, if any, is included in the next SegmentType entry | ||||
| 	// - At the end, there may be SegmentType entries just for the terminating zero blocks. | ||||
| 
 | ||||
| 	var pendingPadding int64 = 0 | ||||
| 	for { | ||||
| 		tsEntry, err := unpacker.Next() | ||||
| 		if err != nil { | ||||
| 			if err == io.EOF { | ||||
| 				return nil | ||||
| 			} | ||||
| 			return fmt.Errorf("reading tar-split entries: %w", err) | ||||
| 		} | ||||
| 		switch tsEntry.Type { | ||||
| 		case storage.SegmentType: | ||||
| 			payload := tsEntry.Payload | ||||
| 			if int64(len(payload)) < pendingPadding { | ||||
| 				return fmt.Errorf("expected %d bytes of padding after previous file, but next SegmentType only has %d bytes", pendingPadding, len(payload)) | ||||
| 			} | ||||
| 			payload = payload[pendingPadding:] | ||||
| 			pendingPadding = 0 | ||||
| 
 | ||||
| 			tr := tar.NewReader(bytes.NewReader(payload)) | ||||
| 			hdr, err := tr.Next() | ||||
| 			if err != nil { | ||||
| 				if err == io.EOF { // Probably the last entry, but let’s let the unpacker drive that. | ||||
| 					break | ||||
| 				} | ||||
| 				return fmt.Errorf("decoding a tar header from a tar-split entry: %w", err) | ||||
| 			} | ||||
| 			if err := handler(hdr); err != nil { | ||||
| 				return err | ||||
| 			} | ||||
| 			pendingPadding = tr.ExpectedPadding() | ||||
| 
 | ||||
| 		case storage.FileType: | ||||
| 			// Nothing | ||||
| 		default: | ||||
| 			return fmt.Errorf("unexpected tar-split entry type %q", tsEntry.Type) | ||||
| 		} | ||||
| 	} | ||||
| } | ||||
							
								
								
									
										119
									
								
								tar/asm/iterate_test.go
									
										
									
									
									
										Normal file
									
								
							
							
						
						
									
										119
									
								
								tar/asm/iterate_test.go
									
										
									
									
									
										Normal file
									
								
							|  | @ -0,0 +1,119 @@ | |||
| package asm | ||||
| 
 | ||||
| import ( | ||||
| 	"bytes" | ||||
| 	"fmt" | ||||
| 	"io" | ||||
| 	"testing" | ||||
| 	"time" | ||||
| 
 | ||||
| 	"github.com/stretchr/testify/assert" | ||||
| 	"github.com/stretchr/testify/require" | ||||
| 	"github.com/vbatts/tar-split/archive/tar" | ||||
| 	"github.com/vbatts/tar-split/tar/storage" | ||||
| ) | ||||
| 
 | ||||
| func createTestTarheader(index int, typeFlag byte, size int64) tar.Header { | ||||
| 	n := (index + 1) * 100 // Use predictable, but distinct, values for all headers | ||||
| 
 | ||||
| 	res := tar.Header{ | ||||
| 		Typeflag:   typeFlag, | ||||
| 		Name:       fmt.Sprintf("name%d", n), | ||||
| 		Size:       size, | ||||
| 		Mode:       int64(n + 1), | ||||
| 		Uid:        n + 2, | ||||
| 		Gid:        n + 3, | ||||
| 		Uname:      fmt.Sprintf("user%d", n), | ||||
| 		Gname:      fmt.Sprintf("group%d", n), | ||||
| 		ModTime:    time.Unix(int64(n+4), 0), | ||||
| 		AccessTime: time.Unix(int64(n+5), 0), | ||||
| 		ChangeTime: time.Unix(int64(n+6), 0), | ||||
| 		PAXRecords: map[string]string{fmt.Sprintf("key%d", n): fmt.Sprintf("value%d", n)}, | ||||
| 		Format:     tar.FormatPAX, // We must set a format, in the default one AccessTime and ChangeTime are discarded. | ||||
| 	} | ||||
| 	switch res.Typeflag { | ||||
| 	case tar.TypeLink, tar.TypeSymlink: | ||||
| 		res.Linkname = fmt.Sprintf("link%d", n) | ||||
| 	case tar.TypeChar, tar.TypeBlock: | ||||
| 		res.Devmajor = int64(n + 7) | ||||
| 		res.Devminor = int64(n + 8) | ||||
| 	} | ||||
| 	return res | ||||
| } | ||||
| 
 | ||||
| func TestIterateHeaders(t *testing.T) { | ||||
| 	entries := []struct { | ||||
| 		typeFlag byte | ||||
| 		size     int64 | ||||
| 	}{ | ||||
| 		{tar.TypeReg, 0}, | ||||
| 		{tar.TypeReg, 1}, | ||||
| 		{tar.TypeReg, 511}, | ||||
| 		{tar.TypeReg, 512}, | ||||
| 		{tar.TypeReg, 513}, | ||||
| 		{tar.TypeLink, 0}, | ||||
| 		{tar.TypeSymlink, 0}, | ||||
| 		{tar.TypeChar, 0}, | ||||
| 		{tar.TypeBlock, 0}, | ||||
| 		{tar.TypeDir, 0}, | ||||
| 		{tar.TypeFifo, 0}, | ||||
| 	} | ||||
| 
 | ||||
| 	var tarball bytes.Buffer | ||||
| 	var expected []tar.Header | ||||
| 	w := tar.NewWriter(&tarball) | ||||
| 	for i, e := range entries { | ||||
| 		hdr := createTestTarheader(i, e.typeFlag, e.size) | ||||
| 		err := w.WriteHeader(&hdr) | ||||
| 		require.NoError(t, err) | ||||
| 		data := make([]byte, e.size) | ||||
| 		_, err = w.Write(data) | ||||
| 		require.NoError(t, err) | ||||
| 		expected = append(expected, hdr) | ||||
| 	} | ||||
| 	err := w.Close() | ||||
| 	require.NoError(t, err) | ||||
| 
 | ||||
| 	var tarSplit bytes.Buffer | ||||
| 	tsReader, err := NewInputTarStream(&tarball, storage.NewJSONPacker(&tarSplit), storage.NewDiscardFilePutter()) | ||||
| 	require.NoError(t, err) | ||||
| 	_, err = io.Copy(io.Discard, tsReader) | ||||
| 	require.NoError(t, err) | ||||
| 
 | ||||
| 	unpacker := storage.NewJSONUnpacker(&tarSplit) | ||||
| 	var actual []tar.Header | ||||
| 	err = IterateHeaders(unpacker, func(hdr *tar.Header) error { | ||||
| 		actual = append(actual, *hdr) | ||||
| 		return nil | ||||
| 	}) | ||||
| 	require.NoError(t, err) | ||||
| 
 | ||||
| 	assert.Equal(t, len(expected), len(actual)) | ||||
| 	for i := range expected { | ||||
| 		expected := &expected[i] | ||||
| 		actual := &actual[i] | ||||
| 
 | ||||
| 		assert.Equal(t, expected.Typeflag, actual.Typeflag) | ||||
| 		assert.Equal(t, expected.Name, actual.Name) | ||||
| 		assert.Equal(t, expected.Linkname, actual.Linkname) | ||||
| 		assert.Equal(t, expected.Size, actual.Size) | ||||
| 		assert.Equal(t, expected.Mode, actual.Mode) | ||||
| 		assert.Equal(t, expected.Uid, actual.Uid) | ||||
| 		assert.Equal(t, expected.Gid, actual.Gid) | ||||
| 		assert.Equal(t, expected.Uname, actual.Uname) | ||||
| 		assert.Equal(t, expected.Gname, actual.Gname) | ||||
| 		assert.True(t, actual.ModTime.Equal(expected.ModTime)) | ||||
| 		assert.True(t, actual.AccessTime.Equal(expected.AccessTime)) | ||||
| 		assert.True(t, actual.ChangeTime.Equal(expected.ChangeTime)) | ||||
| 		assert.Equal(t, expected.Devmajor, actual.Devmajor) | ||||
| 		assert.Equal(t, expected.Devminor, actual.Devminor) | ||||
| 		assert.Equal(t, expected.Xattrs, actual.Xattrs) //nolint:staticcheck // We do want a comprehensive coverage in this test. | ||||
| 		// We can’t compare PAXRecords for complete equality, because tar.Writer adds atime and ctime entries. So ensure all expected records are present. | ||||
| 		for k, v := range expected.PAXRecords { | ||||
| 			v2, ok := actual.PAXRecords[k] | ||||
| 			assert.True(t, ok, k) | ||||
| 			assert.Equal(t, v, v2) | ||||
| 		} | ||||
| 		assert.Equal(t, expected.Format, actual.Format) | ||||
| 	} | ||||
| } | ||||
		Loading…
	
	Add table
		Add a link
		
	
		Reference in a new issue