1
0
Fork 1
mirror of https://github.com/vbatts/tar-split.git synced 2024-11-23 08:35:41 +00:00
tar-split/tar/asm/iterate.go
Miloslav Trmač 99c8914877 Add tar/asm.IterateHeaders
This allows reading the metadata contained in tar-split
without expensively recreating the whole tar stream
including full contents.

We have two use cases for this:
- In a situation where tar-split is distributed along with
  a separate metadata stream, ensuring that the two are
  exactly consistent
- Reading the tar headers allows making a ~cheap check
  of consistency of on-disk layers, just checking that the
  files exist in expected sizes, without reading the full
  contents.

This can be implemented outside of this repo, but it's
not ideal:
- The function necessarily hard-codes some assumptions
  about how tar-split determines the boundaries of
  SegmentType/FileType entries (or, indeed, whether it
  uses FileType entries at all). That's best maintained
  directly beside the code that creates this.
- The ExpectedPadding() value is not currently exported,
  so the consumer would have to heuristically guess where
  the padding ends.

Signed-off-by: Miloslav Trmač <mitr@redhat.com>
2024-09-11 20:01:49 +02:00

57 lines
1.7 KiB
Go
Raw Permalink Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

package asm
import (
"bytes"
"fmt"
"io"
"github.com/vbatts/tar-split/archive/tar"
"github.com/vbatts/tar-split/tar/storage"
)
// IterateHeaders calls handler for each tar header provided by Unpacker
func IterateHeaders(unpacker storage.Unpacker, handler func(hdr *tar.Header) error) error {
// We assume about NewInputTarStream:
// - There is a separate SegmentType entry for every tar header, but only one SegmentType entry for the full header incl. any extensions
// - (There is a FileType entry for every tar header, we ignore it)
// - Trailing padding of a file, if any, is included in the next SegmentType entry
// - At the end, there may be SegmentType entries just for the terminating zero blocks.
var pendingPadding int64 = 0
for {
tsEntry, err := unpacker.Next()
if err != nil {
if err == io.EOF {
return nil
}
return fmt.Errorf("reading tar-split entries: %w", err)
}
switch tsEntry.Type {
case storage.SegmentType:
payload := tsEntry.Payload
if int64(len(payload)) < pendingPadding {
return fmt.Errorf("expected %d bytes of padding after previous file, but next SegmentType only has %d bytes", pendingPadding, len(payload))
}
payload = payload[pendingPadding:]
pendingPadding = 0
tr := tar.NewReader(bytes.NewReader(payload))
hdr, err := tr.Next()
if err != nil {
if err == io.EOF { // Probably the last entry, but lets let the unpacker drive that.
break
}
return fmt.Errorf("decoding a tar header from a tar-split entry: %w", err)
}
if err := handler(hdr); err != nil {
return err
}
pendingPadding = tr.ExpectedPadding()
case storage.FileType:
// Nothing
default:
return fmt.Errorf("unexpected tar-split entry type %q", tsEntry.Type)
}
}
}