The approach taken here is to push the source archives directly to the registry as blob. Then generate an image manifest to group the blobs together creating a pseudo "source image manifest". The benefits of this approach are: 1. Simpler to deduplicate content in registry 2. No container image building is required Signed-off-by: Luiz Carvalho <lucarval@redhat.com>
257 lines
9.2 KiB
Python
Executable file
257 lines
9.2 KiB
Python
Executable file
#!/bin/env python3
|
|
from concurrent.futures import ThreadPoolExecutor
|
|
from contextlib import contextmanager
|
|
from hashlib import sha256
|
|
from urllib.parse import urlparse
|
|
import json
|
|
import os
|
|
import os.path
|
|
import shutil
|
|
import subprocess
|
|
import tarfile
|
|
import tempfile
|
|
|
|
# atomic-reactor needs a patch to make sure it works for this use case:
|
|
# https://github.com/containerbuildsystem/atomic-reactor/pull/1239
|
|
from atomic_reactor.util import RegistrySession, ImageName
|
|
|
|
|
|
def handle_identify_command(image, dest_path):
|
|
with _buildah_build(image) as (container, mount_path):
|
|
print(f'Using {container} at path {mount_path}...')
|
|
release = _run_command(
|
|
('rpm', '-q', '--queryformat', '%{VERSION}\n', '--root', mount_path, '-f',
|
|
'/etc/os-release')).strip()
|
|
src_rpms = _run_command(
|
|
('rpm', '-qa', '--root', mount_path, '--queryformat', '%{SOURCERPM}\n')).splitlines()
|
|
src_rpms = sorted({src_rpm for src_rpm in src_rpms if src_rpm and '(none)' not in src_rpm})
|
|
os.makedirs(dest_path, exist_ok=True)
|
|
with ThreadPoolExecutor(max_workers=20) as executor:
|
|
{
|
|
executor.submit(_download_srpm, src_rpm, release, dest_path): src_rpm
|
|
for src_rpm in src_rpms
|
|
}
|
|
|
|
|
|
def _download_srpm(src_rpm, release, dest_path):
|
|
if os.path.exists(os.path.join(dest_path, src_rpm)):
|
|
return
|
|
rpm_name = src_rpm.replace('.src.rpm', '')
|
|
print(f'Fetching source for {rpm_name}...')
|
|
try:
|
|
_run_command(('dnf', 'download', '--release', release, '--source', rpm_name),
|
|
{'cwd': dest_path})
|
|
except RuntimeError:
|
|
print(f'ERROR: Cannot download source for {rpm_name}')
|
|
|
|
|
|
def handle_create_command(dest_path, dest_image):
|
|
with _buildah_build('scratch') as (container, mount_path):
|
|
print(f'Using {container} at path {mount_path}...')
|
|
for source_archive in sorted(os.listdir(dest_path)):
|
|
source_archive_path = os.path.join(dest_path, source_archive)
|
|
if not os.path.isfile(source_archive_path):
|
|
continue
|
|
print(f'Processing {source_archive} source archive...')
|
|
if source_archive.endswith('.src.rpm'):
|
|
copy_path = os.path.join(mount_path, 'RPMS')
|
|
else:
|
|
copy_path = os.path.join(mount_path, 'others')
|
|
os.makedirs(copy_path, exist_ok=True)
|
|
shutil.copy(source_archive_path, copy_path)
|
|
_run_command(('buildah', 'commit', container, source_archive))
|
|
|
|
_run_command(('buildah', 'commit', container, dest_image))
|
|
|
|
|
|
def handle_push_command(sources_path, image_reference):
|
|
image = ImageName.parse(image_reference)
|
|
_validate_image_reference(image)
|
|
|
|
registry = RegistrySession(
|
|
image.registry, access=('push', 'pull'),
|
|
dockercfg_path=os.path.expanduser('~/.docker/config.json'))
|
|
|
|
blobs = []
|
|
for source_archive in sorted(os.listdir(sources_path)):
|
|
source_archive_path = os.path.join(sources_path, source_archive)
|
|
if not os.path.isfile(source_archive_path):
|
|
continue
|
|
|
|
print(f'Processing {source_archive} source archive...')
|
|
blob_info = _compute_blob_info(source_archive_path)
|
|
print(blob_info)
|
|
blobs.append(blob_info)
|
|
if _blob_exists(registry, image, blob_info['digest']):
|
|
print('Blob already exists, skipping...')
|
|
continue
|
|
_create_source_blob(registry, image, source_archive_path, blob_info)
|
|
|
|
image_config = _create_image_config(registry, image, blobs)
|
|
_create_image_manifest(registry, image, blobs, image_config)
|
|
|
|
|
|
def _validate_image_reference(image):
|
|
assert image.registry
|
|
assert image.get_repo()
|
|
assert image.tag
|
|
|
|
|
|
def _compute_blob_info(path):
|
|
size = os.stat(path).st_size
|
|
with open(path, 'rb') as f:
|
|
hexdigest = sha256(f.read()).hexdigest()
|
|
return {'digest': f'sha256:{hexdigest}', 'size': size}
|
|
|
|
|
|
def _blob_exists(registry, image, blob_digest):
|
|
name = image.get_repo()
|
|
response = registry.head(f'/v2/{name}/blobs/{blob_digest}')
|
|
return response.status_code == 200
|
|
|
|
|
|
def _create_source_blob(registry, image, path, blob_info):
|
|
basename = os.path.basename(path)
|
|
|
|
if False and not basename.endswith('.tar.gz'):
|
|
print(
|
|
'WARNING: Generating tarball on the fly causes buildah issues. '
|
|
'It\'s recommended to create tarballs for each source archive instead.')
|
|
tarball_path = os.path.join('/tmp', basename + '.tar.gz')
|
|
with tarfile.open(tarball_path, 'w:gz') as archive:
|
|
archive.add(path)
|
|
print('Created tarball at {}'.format(tarball_path))
|
|
_create_blob(registry, image, tarball_path, blob_info)
|
|
else:
|
|
_create_blob(registry, image, path, blob_info)
|
|
|
|
|
|
def _create_blob(registry, image, path, blob_info):
|
|
name = image.get_repo()
|
|
response = registry.post(f'/v2/{name}/blobs/uploads/')
|
|
response.raise_for_status()
|
|
location = response.headers['Location']
|
|
|
|
with open(path, 'rb') as f:
|
|
response = registry.put(
|
|
urlparse(location).path,
|
|
data=f,
|
|
params={'digest': blob_info['digest']},
|
|
)
|
|
response.raise_for_status()
|
|
|
|
|
|
def _create_image_config(registry, image, blobs):
|
|
config = {
|
|
# TODO: Placeholders for now
|
|
'architecture': 'amd64',
|
|
'os': 'linux',
|
|
'rootfs': {
|
|
'type': 'layers',
|
|
'diff_ids': [blob['digest'] for blob in blobs],
|
|
},
|
|
}
|
|
|
|
with tempfile.NamedTemporaryFile(mode='w') as f:
|
|
json.dump(config, f, sort_keys=True)
|
|
f.flush()
|
|
blob_info = _compute_blob_info(f.name)
|
|
if not _blob_exists(registry, image, blob_info['digest']):
|
|
print('Image config blob does not exist, creating it...')
|
|
_create_blob(registry, image, f.name, blob_info)
|
|
|
|
return blob_info
|
|
|
|
|
|
def _create_image_manifest(registry, image, blobs, config):
|
|
layers = [
|
|
{
|
|
'mediaType': 'application/vnd.docker.image.rootfs.diff.tar.gzip',
|
|
'size': blob['size'],
|
|
'digest': blob['digest'],
|
|
}
|
|
for blob in blobs
|
|
]
|
|
|
|
image_manifest = {
|
|
'schemaVersion': 2,
|
|
'mediaType': 'application/vnd.docker.distribution.manifest.v2+json',
|
|
'config': {
|
|
'mediaType': 'application/vnd.docker.container.image.v1+json',
|
|
'size': config['size'],
|
|
'digest': config['digest'],
|
|
},
|
|
'layers': layers,
|
|
}
|
|
|
|
headers = {'Content-Type': 'application/vnd.docker.distribution.manifest.v2+json'}
|
|
repo = image.get_repo()
|
|
reference = image.tag
|
|
response = registry.put(
|
|
f'/v2/{repo}/manifests/{reference}', json=image_manifest, headers=headers)
|
|
response.raise_for_status()
|
|
|
|
|
|
@contextmanager
|
|
def _buildah_build(image):
|
|
container = _run_command(('buildah', 'from', image)).strip()
|
|
mount_path = _run_command(('buildah', 'mount', container)).strip()
|
|
yield container, mount_path
|
|
_run_command(('buildah', 'umount', container))
|
|
_run_command(('buildah', 'rm', container))
|
|
|
|
|
|
def _run_command(command, params=None):
|
|
if params is None:
|
|
params = {}
|
|
params.setdefault('capture_output', True)
|
|
params.setdefault('universal_newlines', True)
|
|
params.setdefault('encoding', 'utf-8')
|
|
|
|
response = subprocess.run(command, **params)
|
|
returncode = response.returncode
|
|
if returncode != 0:
|
|
raise RuntimeError(f'Command "{command}" failed with return code {returncode}')
|
|
return response.stdout
|
|
|
|
|
|
if __name__ == '__main__':
|
|
from argparse import ArgumentParser
|
|
|
|
parser = ArgumentParser(
|
|
description='Tool to build a source image based on an existing OCI image')
|
|
subparsers = parser.add_subparsers(dest='command')
|
|
|
|
identifier_parser = subparsers.add_parser(
|
|
'identify', help='Identify and download the source code for an existing OCI image')
|
|
identifier_parser.add_argument('image', help='Reference to an existing OCI image')
|
|
identifier_parser.add_argument(
|
|
'--dest-path', default='sources',
|
|
help='Local path to download sources, defaults to "sources"')
|
|
|
|
create_parser = subparsers.add_parser(
|
|
'create', help='Create a source image with the provide sources')
|
|
create_parser.add_argument(
|
|
'dest_image', help='Target reference to be used for the source image')
|
|
create_parser.add_argument(
|
|
'--dest-path', default='sources',
|
|
help='Local path containing sources, defaults to "sources"')
|
|
|
|
push_parser = subparsers.add_parser(
|
|
'push', help='Create a source image directly in container registry')
|
|
push_parser.add_argument(
|
|
'image', help='Target reference to be used for the source image, e.g. quay.io/foo/bar:src')
|
|
push_parser.add_argument(
|
|
'--sources-path', default='sources',
|
|
help='Local path containing sources, defaults to "sources"')
|
|
|
|
|
|
args = parser.parse_args()
|
|
if args.command == 'identify':
|
|
handle_identify_command(args.image, args.dest_path)
|
|
elif args.command == 'create':
|
|
handle_create_command(args.dest_path, args.dest_image)
|
|
elif args.command == 'push':
|
|
handle_push_command(args.sources_path, args.image)
|
|
else:
|
|
raise ValueError('Please specify a valid subcommand')
|