cri-o/vendor/github.com/containers/image/docker/tarfile/src.go

361 lines
11 KiB
Go
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

package tarfile
import (
"archive/tar"
"bytes"
"context"
"encoding/json"
"io"
"io/ioutil"
"os"
"path"
"github.com/containers/image/manifest"
"github.com/containers/image/pkg/compression"
"github.com/containers/image/types"
"github.com/opencontainers/go-digest"
"github.com/pkg/errors"
)
// Source is a partial implementation of types.ImageSource for reading from tarPath.
type Source struct {
tarPath string
// The following data is only available after ensureCachedDataIsPresent() succeeds
tarManifest *ManifestItem // nil if not available yet.
configBytes []byte
configDigest digest.Digest
orderedDiffIDList []diffID
knownLayers map[diffID]*layerInfo
// Other state
generatedManifest []byte // Private cache for GetManifest(), nil if not set yet.
}
type layerInfo struct {
path string
size int64
}
// NewSource returns a tarfile.Source for the specified path.
func NewSource(path string) *Source {
// TODO: We could add support for multiple images in a single archive, so
// that people could use docker-archive:opensuse.tar:opensuse:leap as
// the source of an image.
return &Source{
tarPath: path,
}
}
// tarReadCloser is a way to close the backing file of a tar.Reader when the user no longer needs the tar component.
type tarReadCloser struct {
*tar.Reader
backingFile *os.File
}
func (t *tarReadCloser) Close() error {
return t.backingFile.Close()
}
// openTarComponent returns a ReadCloser for the specific file within the archive.
// This is linear scan; we assume that the tar file will have a fairly small amount of files (~layers),
// and that filesystem caching will make the repeated seeking over the (uncompressed) tarPath cheap enough.
// The caller should call .Close() on the returned stream.
func (s *Source) openTarComponent(componentPath string) (io.ReadCloser, error) {
f, err := os.Open(s.tarPath)
if err != nil {
return nil, err
}
succeeded := false
defer func() {
if !succeeded {
f.Close()
}
}()
tarReader, header, err := findTarComponent(f, componentPath)
if err != nil {
return nil, err
}
if header == nil {
return nil, os.ErrNotExist
}
if header.FileInfo().Mode()&os.ModeType == os.ModeSymlink { // FIXME: untested
// We follow only one symlink; so no loops are possible.
if _, err := f.Seek(0, os.SEEK_SET); err != nil {
return nil, err
}
// The new path could easily point "outside" the archive, but we only compare it to existing tar headers without extracting the archive,
// so we don't care.
tarReader, header, err = findTarComponent(f, path.Join(path.Dir(componentPath), header.Linkname))
if err != nil {
return nil, err
}
if header == nil {
return nil, os.ErrNotExist
}
}
if !header.FileInfo().Mode().IsRegular() {
return nil, errors.Errorf("Error reading tar archive component %s: not a regular file", header.Name)
}
succeeded = true
return &tarReadCloser{Reader: tarReader, backingFile: f}, nil
}
// findTarComponent returns a header and a reader matching path within inputFile,
// or (nil, nil, nil) if not found.
func findTarComponent(inputFile io.Reader, path string) (*tar.Reader, *tar.Header, error) {
t := tar.NewReader(inputFile)
for {
h, err := t.Next()
if err == io.EOF {
break
}
if err != nil {
return nil, nil, err
}
if h.Name == path {
return t, h, nil
}
}
return nil, nil, nil
}
// readTarComponent returns full contents of componentPath.
func (s *Source) readTarComponent(path string) ([]byte, error) {
file, err := s.openTarComponent(path)
if err != nil {
return nil, errors.Wrapf(err, "Error loading tar component %s", path)
}
defer file.Close()
bytes, err := ioutil.ReadAll(file)
if err != nil {
return nil, err
}
return bytes, nil
}
// ensureCachedDataIsPresent loads data necessary for any of the public accessors.
func (s *Source) ensureCachedDataIsPresent() error {
if s.tarManifest != nil {
return nil
}
// Read and parse manifest.json
tarManifest, err := s.loadTarManifest()
if err != nil {
return err
}
// Check to make sure length is 1
if len(tarManifest) != 1 {
return errors.Errorf("Unexpected tar manifest.json: expected 1 item, got %d", len(tarManifest))
}
// Read and parse config.
configBytes, err := s.readTarComponent(tarManifest[0].Config)
if err != nil {
return err
}
var parsedConfig image // Most fields ommitted, we only care about layer DiffIDs.
if err := json.Unmarshal(configBytes, &parsedConfig); err != nil {
return errors.Wrapf(err, "Error decoding tar config %s", tarManifest[0].Config)
}
knownLayers, err := s.prepareLayerData(&tarManifest[0], &parsedConfig)
if err != nil {
return err
}
// Success; commit.
s.tarManifest = &tarManifest[0]
s.configBytes = configBytes
s.configDigest = digest.FromBytes(configBytes)
s.orderedDiffIDList = parsedConfig.RootFS.DiffIDs
s.knownLayers = knownLayers
return nil
}
// loadTarManifest loads and decodes the manifest.json.
func (s *Source) loadTarManifest() ([]ManifestItem, error) {
// FIXME? Do we need to deal with the legacy format?
bytes, err := s.readTarComponent(manifestFileName)
if err != nil {
return nil, err
}
var items []ManifestItem
if err := json.Unmarshal(bytes, &items); err != nil {
return nil, errors.Wrap(err, "Error decoding tar manifest.json")
}
return items, nil
}
// LoadTarManifest loads and decodes the manifest.json
func (s *Source) LoadTarManifest() ([]ManifestItem, error) {
return s.loadTarManifest()
}
func (s *Source) prepareLayerData(tarManifest *ManifestItem, parsedConfig *image) (map[diffID]*layerInfo, error) {
// Collect layer data available in manifest and config.
if len(tarManifest.Layers) != len(parsedConfig.RootFS.DiffIDs) {
return nil, errors.Errorf("Inconsistent layer count: %d in manifest, %d in config", len(tarManifest.Layers), len(parsedConfig.RootFS.DiffIDs))
}
knownLayers := map[diffID]*layerInfo{}
unknownLayerSizes := map[string]*layerInfo{} // Points into knownLayers, a "to do list" of items with unknown sizes.
for i, diffID := range parsedConfig.RootFS.DiffIDs {
if _, ok := knownLayers[diffID]; ok {
// Apparently it really can happen that a single image contains the same layer diff more than once.
// In that case, the diffID validation ensures that both layers truly are the same, and it should not matter
// which of the tarManifest.Layers paths is used; (docker save) actually makes the duplicates symlinks to the original.
continue
}
layerPath := tarManifest.Layers[i]
if _, ok := unknownLayerSizes[layerPath]; ok {
return nil, errors.Errorf("Layer tarfile %s used for two different DiffID values", layerPath)
}
li := &layerInfo{ // A new element in each iteration
path: layerPath,
size: -1,
}
knownLayers[diffID] = li
unknownLayerSizes[layerPath] = li
}
// Scan the tar file to collect layer sizes.
file, err := os.Open(s.tarPath)
if err != nil {
return nil, err
}
defer file.Close()
t := tar.NewReader(file)
for {
h, err := t.Next()
if err == io.EOF {
break
}
if err != nil {
return nil, err
}
if li, ok := unknownLayerSizes[h.Name]; ok {
li.size = h.Size
delete(unknownLayerSizes, h.Name)
}
}
if len(unknownLayerSizes) != 0 {
return nil, errors.Errorf("Some layer tarfiles are missing in the tarball") // This could do with a better error reporting, if this ever happened in practice.
}
return knownLayers, nil
}
// GetManifest returns the image's manifest along with its MIME type (which may be empty when it can't be determined but the manifest is available).
// It may use a remote (= slow) service.
func (s *Source) GetManifest() ([]byte, string, error) {
if s.generatedManifest == nil {
if err := s.ensureCachedDataIsPresent(); err != nil {
return nil, "", err
}
m := manifest.Schema2{
SchemaVersion: 2,
MediaType: manifest.DockerV2Schema2MediaType,
ConfigDescriptor: manifest.Schema2Descriptor{
MediaType: manifest.DockerV2Schema2ConfigMediaType,
Size: int64(len(s.configBytes)),
Digest: s.configDigest,
},
LayersDescriptors: []manifest.Schema2Descriptor{},
}
for _, diffID := range s.orderedDiffIDList {
li, ok := s.knownLayers[diffID]
if !ok {
return nil, "", errors.Errorf("Internal inconsistency: Information about layer %s missing", diffID)
}
m.LayersDescriptors = append(m.LayersDescriptors, manifest.Schema2Descriptor{
Digest: digest.Digest(diffID), // diffID is a digest of the uncompressed tarball
MediaType: manifest.DockerV2Schema2LayerMediaType,
Size: li.size,
})
}
manifestBytes, err := json.Marshal(&m)
if err != nil {
return nil, "", err
}
s.generatedManifest = manifestBytes
}
return s.generatedManifest, manifest.DockerV2Schema2MediaType, nil
}
// GetTargetManifest returns an image's manifest given a digest. This is mainly used to retrieve a single image's manifest
// out of a manifest list.
func (s *Source) GetTargetManifest(digest digest.Digest) ([]byte, string, error) {
// How did we even get here? GetManifest() above has returned a manifest.DockerV2Schema2MediaType.
return nil, "", errors.Errorf(`Manifest lists are not supported by "docker-daemon:"`)
}
type readCloseWrapper struct {
io.Reader
closeFunc func() error
}
func (r readCloseWrapper) Close() error {
if r.closeFunc != nil {
return r.closeFunc()
}
return nil
}
// GetBlob returns a stream for the specified blob, and the blobs size (or -1 if unknown).
func (s *Source) GetBlob(info types.BlobInfo) (io.ReadCloser, int64, error) {
if err := s.ensureCachedDataIsPresent(); err != nil {
return nil, 0, err
}
if info.Digest == s.configDigest { // FIXME? Implement a more general algorithm matching instead of assuming sha256.
return ioutil.NopCloser(bytes.NewReader(s.configBytes)), int64(len(s.configBytes)), nil
}
if li, ok := s.knownLayers[diffID(info.Digest)]; ok { // diffID is a digest of the uncompressed tarball,
stream, err := s.openTarComponent(li.path)
if err != nil {
return nil, 0, err
}
// In order to handle the fact that digests != diffIDs (and thus that a
// caller which is trying to verify the blob will run into problems),
// we need to decompress blobs. This is a bit ugly, but it's a
// consequence of making everything addressable by their DiffID rather
// than by their digest...
//
// In particular, because the v2s2 manifest being generated uses
// DiffIDs, any caller of GetBlob is going to be asking for DiffIDs of
// layers not their _actual_ digest. The result is that copy/... will
// be verifing a "digest" which is not the actual layer's digest (but
// is instead the DiffID).
decompressFunc, reader, err := compression.DetectCompression(stream)
if err != nil {
return nil, 0, errors.Wrapf(err, "Detecting compression in blob %s", info.Digest)
}
if decompressFunc != nil {
reader, err = decompressFunc(reader)
if err != nil {
return nil, 0, errors.Wrapf(err, "Decompressing blob %s stream", info.Digest)
}
}
newStream := readCloseWrapper{
Reader: reader,
closeFunc: stream.Close,
}
return newStream, li.size, nil
}
return nil, 0, errors.Errorf("Unknown blob %s", info.Digest)
}
// GetSignatures returns the image's signatures. It may use a remote (= slow) service.
func (s *Source) GetSignatures(ctx context.Context) ([][]byte, error) {
return [][]byte{}, nil
}