containers-bsi/build-source-image
Luiz Carvalho 96c38baa38 Add "push" subcommand
The approach taken here is to push the source archives directly to the
registry as blob. Then generate an image manifest to group the blobs
together creating a pseudo "source image manifest".

The benefits of this approach are:

    1. Simpler to deduplicate content in registry
    2. No container image building is required

Signed-off-by: Luiz Carvalho <lucarval@redhat.com>
2019-07-17 15:56:10 -04:00

257 lines
9.2 KiB
Python
Executable file

#!/bin/env python3
from concurrent.futures import ThreadPoolExecutor
from contextlib import contextmanager
from hashlib import sha256
from urllib.parse import urlparse
import json
import os
import os.path
import shutil
import subprocess
import tarfile
import tempfile
# atomic-reactor needs a patch to make sure it works for this use case:
# https://github.com/containerbuildsystem/atomic-reactor/pull/1239
from atomic_reactor.util import RegistrySession, ImageName
def handle_identify_command(image, dest_path):
with _buildah_build(image) as (container, mount_path):
print(f'Using {container} at path {mount_path}...')
release = _run_command(
('rpm', '-q', '--queryformat', '%{VERSION}\n', '--root', mount_path, '-f',
'/etc/os-release')).strip()
src_rpms = _run_command(
('rpm', '-qa', '--root', mount_path, '--queryformat', '%{SOURCERPM}\n')).splitlines()
src_rpms = sorted({src_rpm for src_rpm in src_rpms if src_rpm and '(none)' not in src_rpm})
os.makedirs(dest_path, exist_ok=True)
with ThreadPoolExecutor(max_workers=20) as executor:
{
executor.submit(_download_srpm, src_rpm, release, dest_path): src_rpm
for src_rpm in src_rpms
}
def _download_srpm(src_rpm, release, dest_path):
if os.path.exists(os.path.join(dest_path, src_rpm)):
return
rpm_name = src_rpm.replace('.src.rpm', '')
print(f'Fetching source for {rpm_name}...')
try:
_run_command(('dnf', 'download', '--release', release, '--source', rpm_name),
{'cwd': dest_path})
except RuntimeError:
print(f'ERROR: Cannot download source for {rpm_name}')
def handle_create_command(dest_path, dest_image):
with _buildah_build('scratch') as (container, mount_path):
print(f'Using {container} at path {mount_path}...')
for source_archive in sorted(os.listdir(dest_path)):
source_archive_path = os.path.join(dest_path, source_archive)
if not os.path.isfile(source_archive_path):
continue
print(f'Processing {source_archive} source archive...')
if source_archive.endswith('.src.rpm'):
copy_path = os.path.join(mount_path, 'RPMS')
else:
copy_path = os.path.join(mount_path, 'others')
os.makedirs(copy_path, exist_ok=True)
shutil.copy(source_archive_path, copy_path)
_run_command(('buildah', 'commit', container, source_archive))
_run_command(('buildah', 'commit', container, dest_image))
def handle_push_command(sources_path, image_reference):
image = ImageName.parse(image_reference)
_validate_image_reference(image)
registry = RegistrySession(
image.registry, access=('push', 'pull'),
dockercfg_path=os.path.expanduser('~/.docker/config.json'))
blobs = []
for source_archive in sorted(os.listdir(sources_path)):
source_archive_path = os.path.join(sources_path, source_archive)
if not os.path.isfile(source_archive_path):
continue
print(f'Processing {source_archive} source archive...')
blob_info = _compute_blob_info(source_archive_path)
print(blob_info)
blobs.append(blob_info)
if _blob_exists(registry, image, blob_info['digest']):
print('Blob already exists, skipping...')
continue
_create_source_blob(registry, image, source_archive_path, blob_info)
image_config = _create_image_config(registry, image, blobs)
_create_image_manifest(registry, image, blobs, image_config)
def _validate_image_reference(image):
assert image.registry
assert image.get_repo()
assert image.tag
def _compute_blob_info(path):
size = os.stat(path).st_size
with open(path, 'rb') as f:
hexdigest = sha256(f.read()).hexdigest()
return {'digest': f'sha256:{hexdigest}', 'size': size}
def _blob_exists(registry, image, blob_digest):
name = image.get_repo()
response = registry.head(f'/v2/{name}/blobs/{blob_digest}')
return response.status_code == 200
def _create_source_blob(registry, image, path, blob_info):
basename = os.path.basename(path)
if False and not basename.endswith('.tar.gz'):
print(
'WARNING: Generating tarball on the fly causes buildah issues. '
'It\'s recommended to create tarballs for each source archive instead.')
tarball_path = os.path.join('/tmp', basename + '.tar.gz')
with tarfile.open(tarball_path, 'w:gz') as archive:
archive.add(path)
print('Created tarball at {}'.format(tarball_path))
_create_blob(registry, image, tarball_path, blob_info)
else:
_create_blob(registry, image, path, blob_info)
def _create_blob(registry, image, path, blob_info):
name = image.get_repo()
response = registry.post(f'/v2/{name}/blobs/uploads/')
response.raise_for_status()
location = response.headers['Location']
with open(path, 'rb') as f:
response = registry.put(
urlparse(location).path,
data=f,
params={'digest': blob_info['digest']},
)
response.raise_for_status()
def _create_image_config(registry, image, blobs):
config = {
# TODO: Placeholders for now
'architecture': 'amd64',
'os': 'linux',
'rootfs': {
'type': 'layers',
'diff_ids': [blob['digest'] for blob in blobs],
},
}
with tempfile.NamedTemporaryFile(mode='w') as f:
json.dump(config, f, sort_keys=True)
f.flush()
blob_info = _compute_blob_info(f.name)
if not _blob_exists(registry, image, blob_info['digest']):
print('Image config blob does not exist, creating it...')
_create_blob(registry, image, f.name, blob_info)
return blob_info
def _create_image_manifest(registry, image, blobs, config):
layers = [
{
'mediaType': 'application/vnd.docker.image.rootfs.diff.tar.gzip',
'size': blob['size'],
'digest': blob['digest'],
}
for blob in blobs
]
image_manifest = {
'schemaVersion': 2,
'mediaType': 'application/vnd.docker.distribution.manifest.v2+json',
'config': {
'mediaType': 'application/vnd.docker.container.image.v1+json',
'size': config['size'],
'digest': config['digest'],
},
'layers': layers,
}
headers = {'Content-Type': 'application/vnd.docker.distribution.manifest.v2+json'}
repo = image.get_repo()
reference = image.tag
response = registry.put(
f'/v2/{repo}/manifests/{reference}', json=image_manifest, headers=headers)
response.raise_for_status()
@contextmanager
def _buildah_build(image):
container = _run_command(('buildah', 'from', image)).strip()
mount_path = _run_command(('buildah', 'mount', container)).strip()
yield container, mount_path
_run_command(('buildah', 'umount', container))
_run_command(('buildah', 'rm', container))
def _run_command(command, params=None):
if params is None:
params = {}
params.setdefault('capture_output', True)
params.setdefault('universal_newlines', True)
params.setdefault('encoding', 'utf-8')
response = subprocess.run(command, **params)
returncode = response.returncode
if returncode != 0:
raise RuntimeError(f'Command "{command}" failed with return code {returncode}')
return response.stdout
if __name__ == '__main__':
from argparse import ArgumentParser
parser = ArgumentParser(
description='Tool to build a source image based on an existing OCI image')
subparsers = parser.add_subparsers(dest='command')
identifier_parser = subparsers.add_parser(
'identify', help='Identify and download the source code for an existing OCI image')
identifier_parser.add_argument('image', help='Reference to an existing OCI image')
identifier_parser.add_argument(
'--dest-path', default='sources',
help='Local path to download sources, defaults to "sources"')
create_parser = subparsers.add_parser(
'create', help='Create a source image with the provide sources')
create_parser.add_argument(
'dest_image', help='Target reference to be used for the source image')
create_parser.add_argument(
'--dest-path', default='sources',
help='Local path containing sources, defaults to "sources"')
push_parser = subparsers.add_parser(
'push', help='Create a source image directly in container registry')
push_parser.add_argument(
'image', help='Target reference to be used for the source image, e.g. quay.io/foo/bar:src')
push_parser.add_argument(
'--sources-path', default='sources',
help='Local path containing sources, defaults to "sources"')
args = parser.parse_args()
if args.command == 'identify':
handle_identify_command(args.image, args.dest_path)
elif args.command == 'create':
handle_create_command(args.dest_path, args.dest_image)
elif args.command == 'push':
handle_push_command(args.sources_path, args.image)
else:
raise ValueError('Please specify a valid subcommand')