First implementation of ephemeral build lifecycle manager.

This commit is contained in:
Jake Moshenko 2014-12-16 13:41:30 -05:00
parent 79b61e7709
commit 2d7e844753
10 changed files with 453 additions and 56 deletions

View file

@ -6,6 +6,7 @@ import time
from app import app, userfiles as user_files, build_logs, dockerfile_build_queue from app import app, userfiles as user_files, build_logs, dockerfile_build_queue
from buildman.manager.enterprise import EnterpriseManager from buildman.manager.enterprise import EnterpriseManager
from buildman.manager.ephemeral import EphemeralBuilderManager
from buildman.server import BuilderServer from buildman.server import BuilderServer
from trollius import SSLContext from trollius import SSLContext
@ -13,7 +14,8 @@ from trollius import SSLContext
logger = logging.getLogger(__name__) logger = logging.getLogger(__name__)
BUILD_MANAGERS = { BUILD_MANAGERS = {
'enterprise': EnterpriseManager 'enterprise': EnterpriseManager,
'ephemeral': EphemeralBuilderManager,
} }
EXTERNALLY_MANAGED = 'external' EXTERNALLY_MANAGED = 'external'
@ -39,6 +41,9 @@ def run_build_manager():
if manager_klass is None: if manager_klass is None:
return return
public_ip = os.environ.get('PUBLIC_IP', '127.0.0.1')
logger.debug('Will pass public IP address %s to builders for websocket connection', public_ip)
logger.debug('Starting build manager with lifecycle "%s"', build_manager_config[0]) logger.debug('Starting build manager with lifecycle "%s"', build_manager_config[0])
ssl_context = None ssl_context = None
if os.environ.get('SSL_CONFIG'): if os.environ.get('SSL_CONFIG'):
@ -48,7 +53,7 @@ def run_build_manager():
os.environ.get('SSL_CONFIG') + '/ssl.key') os.environ.get('SSL_CONFIG') + '/ssl.key')
server = BuilderServer(app.config['SERVER_HOSTNAME'], dockerfile_build_queue, build_logs, server = BuilderServer(app.config['SERVER_HOSTNAME'], dockerfile_build_queue, build_logs,
user_files, manager_klass) user_files, manager_klass, build_manager_config[1], public_ip)
server.run('0.0.0.0', ssl=ssl_context) server.run('0.0.0.0', ssl=ssl_context)
if __name__ == '__main__': if __name__ == '__main__':

View file

@ -39,7 +39,7 @@ class BuildComponent(BaseComponent):
self.builder_realm = realm self.builder_realm = realm
self.parent_manager = None self.parent_manager = None
self.server_hostname = None self.registry_hostname = None
self._component_status = ComponentStatus.JOINING self._component_status = ComponentStatus.JOINING
self._last_heartbeat = None self._last_heartbeat = None
@ -68,13 +68,13 @@ class BuildComponent(BaseComponent):
def start_build(self, build_job): def start_build(self, build_job):
""" Starts a build. """ """ Starts a build. """
self._current_job = build_job self._current_job = build_job
self._build_status = StatusHandler(self.build_logs, build_job.repo_build()) self._build_status = StatusHandler(self.build_logs, build_job.repo_build)
self._image_info = {} self._image_info = {}
self._set_status(ComponentStatus.BUILDING) self._set_status(ComponentStatus.BUILDING)
# Retrieve the job's buildpack. # Retrieve the job's buildpack.
buildpack_url = self.user_files.get_file_url(build_job.repo_build().resource_key, buildpack_url = self.user_files.get_file_url(build_job.repo_build.resource_key,
requires_cors=False) requires_cors=False)
logger.debug('Retreiving build package: %s', buildpack_url) logger.debug('Retreiving build package: %s', buildpack_url)
@ -89,7 +89,7 @@ class BuildComponent(BaseComponent):
parsed_dockerfile = None parsed_dockerfile = None
logger.debug('Parsing dockerfile') logger.debug('Parsing dockerfile')
build_config = build_job.build_config() build_config = build_job.build_config
try: try:
parsed_dockerfile = buildpack.parse_dockerfile(build_config.get('build_subdir')) parsed_dockerfile = buildpack.parse_dockerfile(build_config.get('build_subdir'))
except BuildPackageException as bpe: except BuildPackageException as bpe:
@ -116,7 +116,7 @@ class BuildComponent(BaseComponent):
base_image_information['password'] = build_config['pull_credentials'].get('password', '') base_image_information['password'] = build_config['pull_credentials'].get('password', '')
# Retrieve the repository's fully qualified name. # Retrieve the repository's fully qualified name.
repo = build_job.repo_build().repository repo = build_job.repo_build.repository
repository_name = repo.namespace_user.username + '/' + repo.name repository_name = repo.namespace_user.username + '/' + repo.name
# Parse the build queue item into build arguments. # Parse the build queue item into build arguments.
@ -136,9 +136,9 @@ class BuildComponent(BaseComponent):
'build_package': buildpack_url, 'build_package': buildpack_url,
'sub_directory': build_config.get('build_subdir', ''), 'sub_directory': build_config.get('build_subdir', ''),
'repository': repository_name, 'repository': repository_name,
'registry': self.server_hostname, 'registry': self.registry_hostname,
'pull_token': build_job.repo_build().access_token.code, 'pull_token': build_job.repo_build.access_token.code,
'push_token': build_job.repo_build().access_token.code, 'push_token': build_job.repo_build.access_token.code,
'tag_names': build_config.get('docker_tags', ['latest']), 'tag_names': build_config.get('docker_tags', ['latest']),
'base_image': base_image_information, 'base_image': base_image_information,
'cached_tag': build_job.determine_cached_tag() or '' 'cached_tag': build_job.determine_cached_tag() or ''
@ -244,7 +244,7 @@ class BuildComponent(BaseComponent):
'internal_error': exception.message if exception else None 'internal_error': exception.message if exception else None
}) })
build_id = self._current_job.repo_build().uuid build_id = self._current_job.repo_build.uuid
logger.warning('Build %s failed with message: %s', build_id, error_message) logger.warning('Build %s failed with message: %s', build_id, error_message)
# Mark that the build has finished (in an error state) # Mark that the build has finished (in an error state)
@ -305,6 +305,10 @@ class BuildComponent(BaseComponent):
return True return True
def _set_status(self, phase): def _set_status(self, phase):
if phase == ComponentStatus.RUNNING:
loop = trollius.get_event_loop()
self.parent_manager.build_component_ready(self, loop)
self._component_status = phase self._component_status = phase
def _on_heartbeat(self): def _on_heartbeat(self):

View file

@ -9,50 +9,38 @@ class BuildJobLoadException(Exception):
class BuildJob(object): class BuildJob(object):
""" Represents a single in-progress build job. """ """ Represents a single in-progress build job. """
def __init__(self, job_item): def __init__(self, job_item):
self._job_item = job_item self.job_item = job_item
try: try:
self._job_details = json.loads(job_item.body) self.job_details = json.loads(job_item.body)
except ValueError: except ValueError:
raise BuildJobLoadException( raise BuildJobLoadException(
'Could not parse build queue item config with ID %s' % self._job_details['build_uuid'] 'Could not parse build queue item config with ID %s' % self.job_details['build_uuid']
) )
try: try:
self._repo_build = model.get_repository_build(self._job_details['build_uuid']) self.repo_build = model.get_repository_build(self.job_details['build_uuid'])
except model.InvalidRepositoryBuildException: except model.InvalidRepositoryBuildException:
raise BuildJobLoadException( raise BuildJobLoadException(
'Could not load repository build with ID %s' % self._job_details['build_uuid']) 'Could not load repository build with ID %s' % self.job_details['build_uuid'])
try: try:
self._build_config = json.loads(self._repo_build.job_config) self.build_config = json.loads(self.repo_build.job_config)
except ValueError: except ValueError:
raise BuildJobLoadException( raise BuildJobLoadException(
'Could not parse repository build job config with ID %s' % self._job_details['build_uuid'] 'Could not parse repository build job config with ID %s' % self.job_details['build_uuid']
) )
def determine_cached_tag(self): def determine_cached_tag(self):
""" Returns the tag to pull to prime the cache or None if none. """ """ Returns the tag to pull to prime the cache or None if none. """
# TODO(jschorr): Change this to use the more complicated caching rules, once we have caching # TODO(jschorr): Change this to use the more complicated caching rules, once we have caching
# be a pull of things besides the constructed tags. # be a pull of things besides the constructed tags.
tags = self._build_config.get('docker_tags', ['latest']) tags = self.build_config.get('docker_tags', ['latest'])
existing_tags = model.list_repository_tags(self._repo_build.repository.namespace_user.username, existing_tags = model.list_repository_tags(self.repo_build.repository.namespace_user.username,
self._repo_build.repository.name) self.repo_build.repository.name)
cached_tags = set(tags) & set([tag.name for tag in existing_tags]) cached_tags = set(tags) & set([tag.name for tag in existing_tags])
if cached_tags: if cached_tags:
return list(cached_tags)[0] return list(cached_tags)[0]
return None return None
def job_item(self):
""" Returns the job's queue item. """
return self._job_item
def repo_build(self):
""" Returns the repository build DB row for the job. """
return self._repo_build
def build_config(self):
""" Returns the parsed repository build config for the job. """
return self._build_config

View file

@ -1,11 +1,12 @@
class BaseManager(object): class BaseManager(object):
""" Base for all worker managers. """ """ Base for all worker managers. """
def __init__(self, register_component, unregister_component, job_heartbeat_callback, def __init__(self, register_component, unregister_component, job_heartbeat_callback,
job_complete_callback): job_complete_callback, public_ip_address):
self.register_component = register_component self.register_component = register_component
self.unregister_component = unregister_component self.unregister_component = unregister_component
self.job_heartbeat_callback = job_heartbeat_callback self.job_heartbeat_callback = job_heartbeat_callback
self.job_complete_callback = job_complete_callback self.job_complete_callback = job_complete_callback
self.public_ip_address = public_ip_address
def job_heartbeat(self, build_job): def job_heartbeat(self, build_job):
""" Method invoked to tell the manager that a job is still running. This method will be called """ Method invoked to tell the manager that a job is still running. This method will be called
@ -31,11 +32,16 @@ class BaseManager(object):
""" """
raise NotImplementedError raise NotImplementedError
def initialize(self): def initialize(self, manager_config):
""" Runs any initialization code for the manager. Called once the server is in a ready state. """ Runs any initialization code for the manager. Called once the server is in a ready state.
""" """
raise NotImplementedError raise NotImplementedError
def build_component_ready(self, build_component, loop):
""" Method invoked whenever a build component announces itself as ready.
"""
raise NotImplementedError
def build_component_disposed(self, build_component, timed_out): def build_component_disposed(self, build_component, timed_out):
""" Method invoked whenever a build component has been disposed. The timed_out boolean indicates """ Method invoked whenever a build component has been disposed. The timed_out boolean indicates
whether the component's heartbeat timed out. whether the component's heartbeat timed out.

View file

@ -28,10 +28,12 @@ class DynamicRegistrationComponent(BaseComponent):
class EnterpriseManager(BaseManager): class EnterpriseManager(BaseManager):
""" Build manager implementation for the Enterprise Registry. """ """ Build manager implementation for the Enterprise Registry. """
build_components = []
shutting_down = False
def initialize(self): def __init__(self, *args, **kwargs):
self.ready_components = set()
self.shutting_down = False
def initialize(self, manager_config):
# Add a component which is used by build workers for dynamic registration. Unlike # Add a component which is used by build workers for dynamic registration. Unlike
# production, build workers in enterprise are long-lived and register dynamically. # production, build workers in enterprise are long-lived and register dynamically.
self.register_component(REGISTRATION_REALM, DynamicRegistrationComponent) self.register_component(REGISTRATION_REALM, DynamicRegistrationComponent)
@ -45,21 +47,20 @@ class EnterpriseManager(BaseManager):
""" Adds a new build component for an Enterprise Registry. """ """ Adds a new build component for an Enterprise Registry. """
# Generate a new unique realm ID for the build worker. # Generate a new unique realm ID for the build worker.
realm = str(uuid.uuid4()) realm = str(uuid.uuid4())
component = self.register_component(realm, BuildComponent, token="") self.register_component(realm, BuildComponent, token="")
self.build_components.append(component)
return realm return realm
def schedule(self, build_job, loop): def schedule(self, build_job, loop):
""" Schedules a build for an Enterprise Registry. """ """ Schedules a build for an Enterprise Registry. """
if self.shutting_down: if self.shutting_down or not self.ready_components:
return False return False
for component in self.build_components: component = self.ready_components.pop()
if component.is_ready(): loop.call_soon(component.start_build, build_job)
loop.call_soon(component.start_build, build_job) return True
return True
return False def build_component_ready(self, build_component, loop):
self.ready_components.add(build_component)
def shutdown(self): def shutdown(self):
self.shutting_down = True self.shutting_down = True
@ -68,5 +69,6 @@ class EnterpriseManager(BaseManager):
self.job_complete_callback(build_job, job_status) self.job_complete_callback(build_job, job_status)
def build_component_disposed(self, build_component, timed_out): def build_component_disposed(self, build_component, timed_out):
self.build_components.remove(build_component) if build_component in self.ready_components:
self.ready_components.remove(build_component)

View file

@ -0,0 +1,145 @@
import logging
import etcd
import uuid
from datetime import datetime, timedelta
from buildman.manager.basemanager import BaseManager
from buildman.manager.executor import PopenExecutor, EC2Executor
from buildman.component.buildcomponent import BuildComponent
logger = logging.getLogger(__name__)
ETCD_BUILDER_PREFIX = 'building/'
def clear_etcd(client):
""" Debugging method used to clear out the section of etcd we are using to track jobs in flight.
"""
try:
building = client.read(ETCD_BUILDER_PREFIX, recursive=True)
for child in building.leaves:
if not child.dir:
logger.warning('Deleting key: %s', child.key)
client.delete(child.key)
except KeyError:
pass
class EphemeralBuilderManager(BaseManager):
""" Build manager implementation for the Enterprise Registry. """
shutting_down = False
def __init__(self, *args, **kwargs):
self._manager_config = None
self._etcd_client = None
self._component_to_job = {}
self._component_to_builder = {}
self._executors = {
'popen': PopenExecutor,
'ec2': EC2Executor,
}
self._executor = None
super(EphemeralBuilderManager, self).__init__(*args, **kwargs)
def initialize(self, manager_config):
logger.debug('Calling initialize')
self._manager_config = manager_config
executor_klass = self._executors.get(manager_config.get('EXECUTOR', ''), PopenExecutor)
self._executor = executor_klass(manager_config.get('EXECUTOR_CONFIG', {}),
self.public_ip_address)
etcd_host = self._manager_config.get('ETCD_HOST', '127.0.0.1')
etcd_port = self._manager_config.get('ETCD_PORT', 2379)
logger.debug('Connecting to etcd on %s:%s', etcd_host, etcd_port)
self._etcd_client = etcd.Client(host=etcd_host, port=etcd_port)
clear_etcd(self._etcd_client)
def setup_time(self):
setup_time = self._manager_config.get('MACHINE_SETUP_TIME', 300)
logger.debug('Returning setup_time: %s', setup_time)
return setup_time
def shutdown(self):
logger.debug('Calling shutdown.')
raise NotImplementedError
def schedule(self, build_job, loop):
logger.debug('Calling schedule with job: %s', build_job.repo_build.uuid)
# Check if there are worker slots avialable by checking the number of jobs in etcd
allowed_worker_count = self._manager_config.get('ALLOWED_WORKER_COUNT', 2)
try:
building = self._etcd_client.read(ETCD_BUILDER_PREFIX, recursive=True)
workers_alive = sum(1 for child in building.children if not child.dir)
except KeyError:
workers_alive = 0
logger.debug('Total jobs: %s', workers_alive)
if workers_alive >= allowed_worker_count:
logger.info('Too many workers alive, unable to start new worker. %s >= %s', workers_alive,
allowed_worker_count)
return False
job_key = self._etcd_job_key(build_job)
# First try to take a lock for this job, meaning we will be responsible for its lifeline
realm = str(uuid.uuid4())
token = str(uuid.uuid4())
expiration = datetime.utcnow() + timedelta(seconds=self.setup_time())
payload = {
'expiration': expiration.isoformat(),
}
try:
self._etcd_client.write(job_key, payload, prevExist=False)
component = self.register_component(realm, BuildComponent, token=token)
self._component_to_job[component] = build_job
except KeyError:
# The job was already taken by someone else, we are probably a retry
logger.warning('Job already exists in etcd, did an old worker die?')
return False
builder_id = self._executor.start_builder(realm, token)
self._component_to_builder[component] = builder_id
return True
def build_component_ready(self, build_component, loop):
try:
job = self._component_to_job.pop(build_component)
logger.debug('Sending build %s to newly ready component on realm %s', job.repo_build.uuid,
build_component.builder_realm)
loop.call_soon(build_component.start_build, job)
except KeyError:
logger.warning('Builder is asking for more work, but work already completed')
def build_component_disposed(self, build_component, timed_out):
logger.debug('Calling build_component_disposed.')
def job_completed(self, build_job, job_status, build_component):
logger.debug('Calling job_completed with status: %s', job_status)
# Kill he ephmeral builder
self._executor.stop_builder(self._component_to_builder.pop(build_component))
# Release the lock in etcd
job_key = self._etcd_job_key(build_job)
self._etcd_client.delete(job_key)
self.job_complete_callback(build_job, job_status)
@staticmethod
def _etcd_job_key(build_job):
""" Create a key which is used to track a job in etcd.
"""
return '{0}{1}'.format(ETCD_BUILDER_PREFIX, build_job.repo_build.uuid)

View file

@ -0,0 +1,204 @@
import logging
import os
import uuid
import threading
import boto.ec2
import requests
import cachetools
from jinja2 import FileSystemLoader, Environment
logger = logging.getLogger(__name__)
ONE_HOUR = 60*60
ENV = Environment(loader=FileSystemLoader('buildman/templates'))
TEMPLATE = ENV.get_template('cloudconfig.yaml')
class ExecutorException(Exception):
""" Exception raised when there is a problem starting or stopping a builder.
"""
pass
class BuilderExecutor(object):
def __init__(self, executor_config, manager_public_ip):
self.executor_config = executor_config
self.manager_public_ip = manager_public_ip
""" Interface which can be plugged into the EphemeralNodeManager to provide a strategy for
starting and stopping builders.
"""
def start_builder(self, realm, token):
""" Create a builder with the specified config. Returns a unique id which can be used to manage
the builder.
"""
raise NotImplementedError
def stop_builder(self, builder_id):
""" Stop a builder which is currently running.
"""
raise NotImplementedError
def get_manager_websocket_url(self):
return 'ws://{0}:'
def generate_cloud_config(self, realm, token, coreos_channel, manager_ip,
quay_username=None, quay_password=None, etcd_token=None):
if quay_username is None:
quay_username = self.executor_config['QUAY_USERNAME']
if quay_password is None:
quay_password = self.executor_config['QUAY_PASSWORD']
if etcd_token is None:
etcd_token = self.executor_config['ETCD_DISCOVERY_TOKEN']
return TEMPLATE.render(
realm=realm,
token=token,
quay_username=quay_username,
quay_password=quay_password,
etcd_token=etcd_token,
manager_ip=manager_ip,
coreos_channel=coreos_channel,
)
class EC2Executor(BuilderExecutor):
""" Implementation of BuilderExecutor which uses libcloud to start machines on a variety of cloud
providers.
"""
COREOS_STACK_URL = 'http://%s.release.core-os.net/amd64-usr/current/coreos_production_ami_hvm.txt'
def _get_conn(self):
""" Creates an ec2 connection which can be used to manage instances.
"""
return boto.ec2.connect_to_region(
self.executor_config['EC2_REGION'],
aws_access_key_id=self.executor_config['AWS_ACCESS_KEY'],
aws_secret_access_key=self.executor_config['AWS_SECRET_KEY'],
)
@classmethod
@cachetools.ttl_cache(ttl=ONE_HOUR)
def _get_coreos_ami(cls, ec2_region, coreos_channel):
""" Retrieve the CoreOS AMI id from the canonical listing.
"""
stack_list_string = requests.get(EC2Executor.COREOS_STACK_URL % coreos_channel).text
stack_amis = dict([stack.split('=') for stack in stack_list_string.split('|')])
return stack_amis[ec2_region]
def start_builder(self, realm, token):
region = self.executor_config['EC2_REGION']
channel = self.executor_config.get('COREOS_CHANNEL', 'stable')
coreos_ami = self._get_coreos_ami(region, channel)
user_data = self.generate_cloud_config(realm, token, channel, self.manager_public_ip)
logger.debug('Generated cloud config: %s', user_data)
ec2_conn = self._get_conn()
# class FakeReservation(object):
# def __init__(self):
# self.instances = None
# reservation = FakeReservation()
reservation = ec2_conn.run_instances(
coreos_ami,
instance_type=self.executor_config['EC2_INSTANCE_TYPE'],
security_groups=self.executor_config['EC2_SECURITY_GROUP_IDS'],
key_name=self.executor_config.get('EC2_KEY_NAME', None),
user_data=user_data,
)
if not reservation.instances:
raise ExecutorException('Unable to spawn builder instance.')
elif len(reservation.instances) != 1:
raise ExecutorException('EC2 started wrong number of instances!')
return reservation.instances[0]
def stop_builder(self, builder_id):
ec2_conn = self._get_conn()
stopped_instances = ec2_conn.stop_instances([builder_id], force=True)
if builder_id not in stopped_instances:
raise ExecutorException('Unable to stop instance: %s' % builder_id)
class PopenExecutor(BuilderExecutor):
""" Implementation of BuilderExecutor which uses Popen to fork a quay-builder process.
"""
def __init__(self, executor_config, manager_public_ip):
self._jobs = {}
super(PopenExecutor, self).__init__(executor_config, manager_public_ip)
""" Executor which uses Popen to fork a quay-builder process.
"""
def start_builder(self, realm, token):
# Now start a machine for this job, adding the machine id to the etcd information
logger.debug('Forking process for build')
import subprocess
builder_env = {
'TOKEN': token,
'REALM': realm,
'ENDPOINT': 'ws://localhost:8787',
'DOCKER_TLS_VERIFY': os.environ.get('DOCKER_TLS_VERIFY', ''),
'DOCKER_CERT_PATH': os.environ.get('DOCKER_CERT_PATH', ''),
'DOCKER_HOST': os.environ.get('DOCKER_HOST', ''),
}
logpipe = LogPipe(logging.INFO)
spawned = subprocess.Popen('/Users/jake/bin/quay-builder', stdout=logpipe, stderr=logpipe,
env=builder_env)
builder_id = str(uuid.uuid4())
self._jobs[builder_id] = (spawned, logpipe)
logger.debug('Builder spawned with id: %s', builder_id)
return builder_id
def stop_builder(self, builder_id):
if builder_id not in self._jobs:
raise ExecutorException('Builder id not being tracked by executor.')
logger.debug('Killing builder with id: %s', builder_id)
spawned, logpipe = self._jobs[builder_id]
if spawned.poll() is None:
spawned.kill()
logpipe.close()
class LogPipe(threading.Thread):
""" Adapted from http://codereview.stackexchange.com/a/17959
"""
def __init__(self, level):
"""Setup the object with a logger and a loglevel
and start the thread
"""
threading.Thread.__init__(self)
self.daemon = False
self.level = level
self.fd_read, self.fd_write = os.pipe()
self.pipe_reader = os.fdopen(self.fd_read)
self.start()
def fileno(self):
"""Return the write file descriptor of the pipe
"""
return self.fd_write
def run(self):
"""Run the thread, logging everything.
"""
for line in iter(self.pipe_reader.readline, ''):
logging.log(self.level, line.strip('\n'))
self.pipe_reader.close()
def close(self):
"""Close the write end of the pipe.
"""
os.close(self.fd_write)

View file

@ -34,14 +34,15 @@ class BuilderServer(object):
""" Server which handles both HTTP and WAMP requests, managing the full state of the build """ Server which handles both HTTP and WAMP requests, managing the full state of the build
controller. controller.
""" """
def __init__(self, server_hostname, queue, build_logs, user_files, lifecycle_manager_klass): def __init__(self, registry_hostname, queue, build_logs, user_files, lifecycle_manager_klass,
lifecycle_manager_config, manager_public_ip):
self._loop = None self._loop = None
self._current_status = 'starting' self._current_status = 'starting'
self._current_components = [] self._current_components = []
self._job_count = 0 self._job_count = 0
self._session_factory = RouterSessionFactory(RouterFactory()) self._session_factory = RouterSessionFactory(RouterFactory())
self._server_hostname = server_hostname self._registry_hostname = registry_hostname
self._queue = queue self._queue = queue
self._build_logs = build_logs self._build_logs = build_logs
self._user_files = user_files self._user_files = user_files
@ -49,8 +50,10 @@ class BuilderServer(object):
self._register_component, self._register_component,
self._unregister_component, self._unregister_component,
self._job_heartbeat, self._job_heartbeat,
self._job_complete self._job_complete,
manager_public_ip,
) )
self._lifecycle_manager_config = lifecycle_manager_config
self._shutdown_event = Event() self._shutdown_event = Event()
self._current_status = 'running' self._current_status = 'running'
@ -69,7 +72,7 @@ class BuilderServer(object):
def run(self, host, ssl=None): def run(self, host, ssl=None):
logger.debug('Initializing the lifecycle manager') logger.debug('Initializing the lifecycle manager')
self._lifecycle_manager.initialize() self._lifecycle_manager.initialize(self._lifecycle_manager_config)
logger.debug('Initializing all members of the event loop') logger.debug('Initializing all members of the event loop')
loop = trollius.get_event_loop() loop = trollius.get_event_loop()
@ -102,7 +105,7 @@ class BuilderServer(object):
component.parent_manager = self._lifecycle_manager component.parent_manager = self._lifecycle_manager
component.build_logs = self._build_logs component.build_logs = self._build_logs
component.user_files = self._user_files component.user_files = self._user_files
component.server_hostname = self._server_hostname component.registry_hostname = self._registry_hostname
self._current_components.append(component) self._current_components.append(component)
self._session_factory.add(component) self._session_factory.add(component)
@ -116,16 +119,16 @@ class BuilderServer(object):
self._session_factory.remove(component) self._session_factory.remove(component)
def _job_heartbeat(self, build_job): def _job_heartbeat(self, build_job):
WorkQueue.extend_processing(build_job.job_item(), seconds_from_now=JOB_TIMEOUT_SECONDS, WorkQueue.extend_processing(build_job.job_item, seconds_from_now=JOB_TIMEOUT_SECONDS,
retry_count=1, minimum_extension=MINIMUM_JOB_EXTENSION) retry_count=1, minimum_extension=MINIMUM_JOB_EXTENSION)
def _job_complete(self, build_job, job_status): def _job_complete(self, build_job, job_status):
if job_status == BuildJobResult.INCOMPLETE: if job_status == BuildJobResult.INCOMPLETE:
self._queue.incomplete(build_job.job_item(), restore_retry=True, retry_after=30) self._queue.incomplete(build_job.job_item, restore_retry=True, retry_after=30)
elif job_status == BuildJobResult.ERROR: elif job_status == BuildJobResult.ERROR:
self._queue.incomplete(build_job.job_item(), restore_retry=False) self._queue.incomplete(build_job.job_item, restore_retry=False)
else: else:
self._queue.complete(build_job.job_item()) self._queue.complete(build_job.job_item)
self._job_count = self._job_count - 1 self._job_count = self._job_count - 1

View file

@ -0,0 +1,38 @@
#cloud-config
write_files:
- path: /root/overrides.list
permission: '0644'
content: |
REALM={{ realm }}
TOKEN={{ token }}
ENDPOINT=wss://buildman.quay.io:8787
coreos:
update:
reboot-strategy: off
group: {{ coreos_channel }}
etcd:
discovery: https://discovery.etcd.io/{{ etcd_token }}
# multi-region and multi-cloud deployments need to use $public_ipv4
addr: $private_ipv4:4001
peer-addr: $private_ipv4:7001
units:
- name: quay-builder.service
command: start
content: |
[Unit]
Description=Quay builder container
Author=Jake Moshenko
After=docker.service
[Service]
Restart=always
TimeoutStartSec=600
TimeoutStopSec=2000
ExecStartPre=/usr/bin/sudo /bin/sh -xc "echo '{{ manager_ip }} buildman.quay.io' >> /etc/hosts; exit 0"
ExecStartPre=/usr/bin/docker login -u {{ quay_username }} -p {{ quay_password }} -e unused quay.io
ExecStart=/usr/bin/docker run --rm --net=host --name quay-builder --privileged --env-file /root/overrides.list -v /var/run/docker.sock:/var/run/docker.sock quay.io/coreos/registry-build-worker:latest
ExecStop=/usr/bin/docker stop quay-builder

View file

@ -41,3 +41,5 @@ git+https://github.com/DevTable/aniso8601-fake.git
git+https://github.com/DevTable/anunidecode.git git+https://github.com/DevTable/anunidecode.git
git+https://github.com/DevTable/avatar-generator.git git+https://github.com/DevTable/avatar-generator.git
gipc gipc
python-etcd
cachetools