First implementation of ephemeral build lifecycle manager.
This commit is contained in:
parent
79b61e7709
commit
2d7e844753
10 changed files with 453 additions and 56 deletions
|
@ -6,6 +6,7 @@ import time
|
|||
from app import app, userfiles as user_files, build_logs, dockerfile_build_queue
|
||||
|
||||
from buildman.manager.enterprise import EnterpriseManager
|
||||
from buildman.manager.ephemeral import EphemeralBuilderManager
|
||||
from buildman.server import BuilderServer
|
||||
|
||||
from trollius import SSLContext
|
||||
|
@ -13,7 +14,8 @@ from trollius import SSLContext
|
|||
logger = logging.getLogger(__name__)
|
||||
|
||||
BUILD_MANAGERS = {
|
||||
'enterprise': EnterpriseManager
|
||||
'enterprise': EnterpriseManager,
|
||||
'ephemeral': EphemeralBuilderManager,
|
||||
}
|
||||
|
||||
EXTERNALLY_MANAGED = 'external'
|
||||
|
@ -39,6 +41,9 @@ def run_build_manager():
|
|||
if manager_klass is None:
|
||||
return
|
||||
|
||||
public_ip = os.environ.get('PUBLIC_IP', '127.0.0.1')
|
||||
logger.debug('Will pass public IP address %s to builders for websocket connection', public_ip)
|
||||
|
||||
logger.debug('Starting build manager with lifecycle "%s"', build_manager_config[0])
|
||||
ssl_context = None
|
||||
if os.environ.get('SSL_CONFIG'):
|
||||
|
@ -48,7 +53,7 @@ def run_build_manager():
|
|||
os.environ.get('SSL_CONFIG') + '/ssl.key')
|
||||
|
||||
server = BuilderServer(app.config['SERVER_HOSTNAME'], dockerfile_build_queue, build_logs,
|
||||
user_files, manager_klass)
|
||||
user_files, manager_klass, build_manager_config[1], public_ip)
|
||||
server.run('0.0.0.0', ssl=ssl_context)
|
||||
|
||||
if __name__ == '__main__':
|
||||
|
|
|
@ -39,7 +39,7 @@ class BuildComponent(BaseComponent):
|
|||
self.builder_realm = realm
|
||||
|
||||
self.parent_manager = None
|
||||
self.server_hostname = None
|
||||
self.registry_hostname = None
|
||||
|
||||
self._component_status = ComponentStatus.JOINING
|
||||
self._last_heartbeat = None
|
||||
|
@ -68,13 +68,13 @@ class BuildComponent(BaseComponent):
|
|||
def start_build(self, build_job):
|
||||
""" Starts a build. """
|
||||
self._current_job = build_job
|
||||
self._build_status = StatusHandler(self.build_logs, build_job.repo_build())
|
||||
self._build_status = StatusHandler(self.build_logs, build_job.repo_build)
|
||||
self._image_info = {}
|
||||
|
||||
self._set_status(ComponentStatus.BUILDING)
|
||||
|
||||
# Retrieve the job's buildpack.
|
||||
buildpack_url = self.user_files.get_file_url(build_job.repo_build().resource_key,
|
||||
buildpack_url = self.user_files.get_file_url(build_job.repo_build.resource_key,
|
||||
requires_cors=False)
|
||||
|
||||
logger.debug('Retreiving build package: %s', buildpack_url)
|
||||
|
@ -89,7 +89,7 @@ class BuildComponent(BaseComponent):
|
|||
parsed_dockerfile = None
|
||||
logger.debug('Parsing dockerfile')
|
||||
|
||||
build_config = build_job.build_config()
|
||||
build_config = build_job.build_config
|
||||
try:
|
||||
parsed_dockerfile = buildpack.parse_dockerfile(build_config.get('build_subdir'))
|
||||
except BuildPackageException as bpe:
|
||||
|
@ -116,7 +116,7 @@ class BuildComponent(BaseComponent):
|
|||
base_image_information['password'] = build_config['pull_credentials'].get('password', '')
|
||||
|
||||
# Retrieve the repository's fully qualified name.
|
||||
repo = build_job.repo_build().repository
|
||||
repo = build_job.repo_build.repository
|
||||
repository_name = repo.namespace_user.username + '/' + repo.name
|
||||
|
||||
# Parse the build queue item into build arguments.
|
||||
|
@ -136,9 +136,9 @@ class BuildComponent(BaseComponent):
|
|||
'build_package': buildpack_url,
|
||||
'sub_directory': build_config.get('build_subdir', ''),
|
||||
'repository': repository_name,
|
||||
'registry': self.server_hostname,
|
||||
'pull_token': build_job.repo_build().access_token.code,
|
||||
'push_token': build_job.repo_build().access_token.code,
|
||||
'registry': self.registry_hostname,
|
||||
'pull_token': build_job.repo_build.access_token.code,
|
||||
'push_token': build_job.repo_build.access_token.code,
|
||||
'tag_names': build_config.get('docker_tags', ['latest']),
|
||||
'base_image': base_image_information,
|
||||
'cached_tag': build_job.determine_cached_tag() or ''
|
||||
|
@ -244,7 +244,7 @@ class BuildComponent(BaseComponent):
|
|||
'internal_error': exception.message if exception else None
|
||||
})
|
||||
|
||||
build_id = self._current_job.repo_build().uuid
|
||||
build_id = self._current_job.repo_build.uuid
|
||||
logger.warning('Build %s failed with message: %s', build_id, error_message)
|
||||
|
||||
# Mark that the build has finished (in an error state)
|
||||
|
@ -305,6 +305,10 @@ class BuildComponent(BaseComponent):
|
|||
return True
|
||||
|
||||
def _set_status(self, phase):
|
||||
if phase == ComponentStatus.RUNNING:
|
||||
loop = trollius.get_event_loop()
|
||||
self.parent_manager.build_component_ready(self, loop)
|
||||
|
||||
self._component_status = phase
|
||||
|
||||
def _on_heartbeat(self):
|
||||
|
|
|
@ -9,50 +9,38 @@ class BuildJobLoadException(Exception):
|
|||
class BuildJob(object):
|
||||
""" Represents a single in-progress build job. """
|
||||
def __init__(self, job_item):
|
||||
self._job_item = job_item
|
||||
self.job_item = job_item
|
||||
|
||||
try:
|
||||
self._job_details = json.loads(job_item.body)
|
||||
self.job_details = json.loads(job_item.body)
|
||||
except ValueError:
|
||||
raise BuildJobLoadException(
|
||||
'Could not parse build queue item config with ID %s' % self._job_details['build_uuid']
|
||||
'Could not parse build queue item config with ID %s' % self.job_details['build_uuid']
|
||||
)
|
||||
|
||||
try:
|
||||
self._repo_build = model.get_repository_build(self._job_details['build_uuid'])
|
||||
self.repo_build = model.get_repository_build(self.job_details['build_uuid'])
|
||||
except model.InvalidRepositoryBuildException:
|
||||
raise BuildJobLoadException(
|
||||
'Could not load repository build with ID %s' % self._job_details['build_uuid'])
|
||||
'Could not load repository build with ID %s' % self.job_details['build_uuid'])
|
||||
|
||||
try:
|
||||
self._build_config = json.loads(self._repo_build.job_config)
|
||||
self.build_config = json.loads(self.repo_build.job_config)
|
||||
except ValueError:
|
||||
raise BuildJobLoadException(
|
||||
'Could not parse repository build job config with ID %s' % self._job_details['build_uuid']
|
||||
'Could not parse repository build job config with ID %s' % self.job_details['build_uuid']
|
||||
)
|
||||
|
||||
def determine_cached_tag(self):
|
||||
""" Returns the tag to pull to prime the cache or None if none. """
|
||||
# TODO(jschorr): Change this to use the more complicated caching rules, once we have caching
|
||||
# be a pull of things besides the constructed tags.
|
||||
tags = self._build_config.get('docker_tags', ['latest'])
|
||||
existing_tags = model.list_repository_tags(self._repo_build.repository.namespace_user.username,
|
||||
self._repo_build.repository.name)
|
||||
tags = self.build_config.get('docker_tags', ['latest'])
|
||||
existing_tags = model.list_repository_tags(self.repo_build.repository.namespace_user.username,
|
||||
self.repo_build.repository.name)
|
||||
|
||||
cached_tags = set(tags) & set([tag.name for tag in existing_tags])
|
||||
if cached_tags:
|
||||
return list(cached_tags)[0]
|
||||
|
||||
return None
|
||||
|
||||
def job_item(self):
|
||||
""" Returns the job's queue item. """
|
||||
return self._job_item
|
||||
|
||||
def repo_build(self):
|
||||
""" Returns the repository build DB row for the job. """
|
||||
return self._repo_build
|
||||
|
||||
def build_config(self):
|
||||
""" Returns the parsed repository build config for the job. """
|
||||
return self._build_config
|
||||
|
|
|
@ -1,11 +1,12 @@
|
|||
class BaseManager(object):
|
||||
""" Base for all worker managers. """
|
||||
def __init__(self, register_component, unregister_component, job_heartbeat_callback,
|
||||
job_complete_callback):
|
||||
job_complete_callback, public_ip_address):
|
||||
self.register_component = register_component
|
||||
self.unregister_component = unregister_component
|
||||
self.job_heartbeat_callback = job_heartbeat_callback
|
||||
self.job_complete_callback = job_complete_callback
|
||||
self.public_ip_address = public_ip_address
|
||||
|
||||
def job_heartbeat(self, build_job):
|
||||
""" Method invoked to tell the manager that a job is still running. This method will be called
|
||||
|
@ -31,11 +32,16 @@ class BaseManager(object):
|
|||
"""
|
||||
raise NotImplementedError
|
||||
|
||||
def initialize(self):
|
||||
def initialize(self, manager_config):
|
||||
""" Runs any initialization code for the manager. Called once the server is in a ready state.
|
||||
"""
|
||||
raise NotImplementedError
|
||||
|
||||
def build_component_ready(self, build_component, loop):
|
||||
""" Method invoked whenever a build component announces itself as ready.
|
||||
"""
|
||||
raise NotImplementedError
|
||||
|
||||
def build_component_disposed(self, build_component, timed_out):
|
||||
""" Method invoked whenever a build component has been disposed. The timed_out boolean indicates
|
||||
whether the component's heartbeat timed out.
|
||||
|
|
|
@ -28,10 +28,12 @@ class DynamicRegistrationComponent(BaseComponent):
|
|||
|
||||
class EnterpriseManager(BaseManager):
|
||||
""" Build manager implementation for the Enterprise Registry. """
|
||||
build_components = []
|
||||
shutting_down = False
|
||||
|
||||
def initialize(self):
|
||||
def __init__(self, *args, **kwargs):
|
||||
self.ready_components = set()
|
||||
self.shutting_down = False
|
||||
|
||||
def initialize(self, manager_config):
|
||||
# Add a component which is used by build workers for dynamic registration. Unlike
|
||||
# production, build workers in enterprise are long-lived and register dynamically.
|
||||
self.register_component(REGISTRATION_REALM, DynamicRegistrationComponent)
|
||||
|
@ -45,21 +47,20 @@ class EnterpriseManager(BaseManager):
|
|||
""" Adds a new build component for an Enterprise Registry. """
|
||||
# Generate a new unique realm ID for the build worker.
|
||||
realm = str(uuid.uuid4())
|
||||
component = self.register_component(realm, BuildComponent, token="")
|
||||
self.build_components.append(component)
|
||||
self.register_component(realm, BuildComponent, token="")
|
||||
return realm
|
||||
|
||||
def schedule(self, build_job, loop):
|
||||
""" Schedules a build for an Enterprise Registry. """
|
||||
if self.shutting_down:
|
||||
if self.shutting_down or not self.ready_components:
|
||||
return False
|
||||
|
||||
for component in self.build_components:
|
||||
if component.is_ready():
|
||||
loop.call_soon(component.start_build, build_job)
|
||||
return True
|
||||
component = self.ready_components.pop()
|
||||
loop.call_soon(component.start_build, build_job)
|
||||
return True
|
||||
|
||||
return False
|
||||
def build_component_ready(self, build_component, loop):
|
||||
self.ready_components.add(build_component)
|
||||
|
||||
def shutdown(self):
|
||||
self.shutting_down = True
|
||||
|
@ -68,5 +69,6 @@ class EnterpriseManager(BaseManager):
|
|||
self.job_complete_callback(build_job, job_status)
|
||||
|
||||
def build_component_disposed(self, build_component, timed_out):
|
||||
self.build_components.remove(build_component)
|
||||
if build_component in self.ready_components:
|
||||
self.ready_components.remove(build_component)
|
||||
|
||||
|
|
145
buildman/manager/ephemeral.py
Normal file
145
buildman/manager/ephemeral.py
Normal file
|
@ -0,0 +1,145 @@
|
|||
import logging
|
||||
import etcd
|
||||
import uuid
|
||||
|
||||
from datetime import datetime, timedelta
|
||||
|
||||
from buildman.manager.basemanager import BaseManager
|
||||
from buildman.manager.executor import PopenExecutor, EC2Executor
|
||||
from buildman.component.buildcomponent import BuildComponent
|
||||
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
ETCD_BUILDER_PREFIX = 'building/'
|
||||
|
||||
|
||||
def clear_etcd(client):
|
||||
""" Debugging method used to clear out the section of etcd we are using to track jobs in flight.
|
||||
"""
|
||||
try:
|
||||
building = client.read(ETCD_BUILDER_PREFIX, recursive=True)
|
||||
for child in building.leaves:
|
||||
if not child.dir:
|
||||
logger.warning('Deleting key: %s', child.key)
|
||||
client.delete(child.key)
|
||||
except KeyError:
|
||||
pass
|
||||
|
||||
|
||||
class EphemeralBuilderManager(BaseManager):
|
||||
""" Build manager implementation for the Enterprise Registry. """
|
||||
shutting_down = False
|
||||
|
||||
def __init__(self, *args, **kwargs):
|
||||
self._manager_config = None
|
||||
self._etcd_client = None
|
||||
|
||||
self._component_to_job = {}
|
||||
self._component_to_builder = {}
|
||||
|
||||
self._executors = {
|
||||
'popen': PopenExecutor,
|
||||
'ec2': EC2Executor,
|
||||
}
|
||||
self._executor = None
|
||||
|
||||
super(EphemeralBuilderManager, self).__init__(*args, **kwargs)
|
||||
|
||||
def initialize(self, manager_config):
|
||||
logger.debug('Calling initialize')
|
||||
self._manager_config = manager_config
|
||||
|
||||
executor_klass = self._executors.get(manager_config.get('EXECUTOR', ''), PopenExecutor)
|
||||
self._executor = executor_klass(manager_config.get('EXECUTOR_CONFIG', {}),
|
||||
self.public_ip_address)
|
||||
|
||||
etcd_host = self._manager_config.get('ETCD_HOST', '127.0.0.1')
|
||||
etcd_port = self._manager_config.get('ETCD_PORT', 2379)
|
||||
logger.debug('Connecting to etcd on %s:%s', etcd_host, etcd_port)
|
||||
self._etcd_client = etcd.Client(host=etcd_host, port=etcd_port)
|
||||
|
||||
clear_etcd(self._etcd_client)
|
||||
|
||||
def setup_time(self):
|
||||
setup_time = self._manager_config.get('MACHINE_SETUP_TIME', 300)
|
||||
logger.debug('Returning setup_time: %s', setup_time)
|
||||
return setup_time
|
||||
|
||||
def shutdown(self):
|
||||
logger.debug('Calling shutdown.')
|
||||
raise NotImplementedError
|
||||
|
||||
def schedule(self, build_job, loop):
|
||||
logger.debug('Calling schedule with job: %s', build_job.repo_build.uuid)
|
||||
|
||||
# Check if there are worker slots avialable by checking the number of jobs in etcd
|
||||
allowed_worker_count = self._manager_config.get('ALLOWED_WORKER_COUNT', 2)
|
||||
try:
|
||||
building = self._etcd_client.read(ETCD_BUILDER_PREFIX, recursive=True)
|
||||
workers_alive = sum(1 for child in building.children if not child.dir)
|
||||
except KeyError:
|
||||
workers_alive = 0
|
||||
|
||||
logger.debug('Total jobs: %s', workers_alive)
|
||||
|
||||
if workers_alive >= allowed_worker_count:
|
||||
logger.info('Too many workers alive, unable to start new worker. %s >= %s', workers_alive,
|
||||
allowed_worker_count)
|
||||
return False
|
||||
|
||||
job_key = self._etcd_job_key(build_job)
|
||||
|
||||
# First try to take a lock for this job, meaning we will be responsible for its lifeline
|
||||
realm = str(uuid.uuid4())
|
||||
token = str(uuid.uuid4())
|
||||
expiration = datetime.utcnow() + timedelta(seconds=self.setup_time())
|
||||
|
||||
payload = {
|
||||
'expiration': expiration.isoformat(),
|
||||
}
|
||||
|
||||
try:
|
||||
self._etcd_client.write(job_key, payload, prevExist=False)
|
||||
component = self.register_component(realm, BuildComponent, token=token)
|
||||
self._component_to_job[component] = build_job
|
||||
except KeyError:
|
||||
# The job was already taken by someone else, we are probably a retry
|
||||
logger.warning('Job already exists in etcd, did an old worker die?')
|
||||
return False
|
||||
|
||||
builder_id = self._executor.start_builder(realm, token)
|
||||
self._component_to_builder[component] = builder_id
|
||||
|
||||
return True
|
||||
|
||||
def build_component_ready(self, build_component, loop):
|
||||
try:
|
||||
job = self._component_to_job.pop(build_component)
|
||||
logger.debug('Sending build %s to newly ready component on realm %s', job.repo_build.uuid,
|
||||
build_component.builder_realm)
|
||||
loop.call_soon(build_component.start_build, job)
|
||||
except KeyError:
|
||||
logger.warning('Builder is asking for more work, but work already completed')
|
||||
|
||||
def build_component_disposed(self, build_component, timed_out):
|
||||
logger.debug('Calling build_component_disposed.')
|
||||
|
||||
def job_completed(self, build_job, job_status, build_component):
|
||||
logger.debug('Calling job_completed with status: %s', job_status)
|
||||
|
||||
# Kill he ephmeral builder
|
||||
self._executor.stop_builder(self._component_to_builder.pop(build_component))
|
||||
|
||||
# Release the lock in etcd
|
||||
job_key = self._etcd_job_key(build_job)
|
||||
self._etcd_client.delete(job_key)
|
||||
|
||||
self.job_complete_callback(build_job, job_status)
|
||||
|
||||
@staticmethod
|
||||
def _etcd_job_key(build_job):
|
||||
""" Create a key which is used to track a job in etcd.
|
||||
"""
|
||||
return '{0}{1}'.format(ETCD_BUILDER_PREFIX, build_job.repo_build.uuid)
|
204
buildman/manager/executor.py
Normal file
204
buildman/manager/executor.py
Normal file
|
@ -0,0 +1,204 @@
|
|||
import logging
|
||||
import os
|
||||
import uuid
|
||||
import threading
|
||||
import boto.ec2
|
||||
import requests
|
||||
import cachetools
|
||||
|
||||
from jinja2 import FileSystemLoader, Environment
|
||||
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
ONE_HOUR = 60*60
|
||||
|
||||
ENV = Environment(loader=FileSystemLoader('buildman/templates'))
|
||||
TEMPLATE = ENV.get_template('cloudconfig.yaml')
|
||||
|
||||
|
||||
class ExecutorException(Exception):
|
||||
""" Exception raised when there is a problem starting or stopping a builder.
|
||||
"""
|
||||
pass
|
||||
|
||||
|
||||
class BuilderExecutor(object):
|
||||
def __init__(self, executor_config, manager_public_ip):
|
||||
self.executor_config = executor_config
|
||||
self.manager_public_ip = manager_public_ip
|
||||
|
||||
""" Interface which can be plugged into the EphemeralNodeManager to provide a strategy for
|
||||
starting and stopping builders.
|
||||
"""
|
||||
def start_builder(self, realm, token):
|
||||
""" Create a builder with the specified config. Returns a unique id which can be used to manage
|
||||
the builder.
|
||||
"""
|
||||
raise NotImplementedError
|
||||
|
||||
def stop_builder(self, builder_id):
|
||||
""" Stop a builder which is currently running.
|
||||
"""
|
||||
raise NotImplementedError
|
||||
|
||||
def get_manager_websocket_url(self):
|
||||
return 'ws://{0}:'
|
||||
|
||||
def generate_cloud_config(self, realm, token, coreos_channel, manager_ip,
|
||||
quay_username=None, quay_password=None, etcd_token=None):
|
||||
if quay_username is None:
|
||||
quay_username = self.executor_config['QUAY_USERNAME']
|
||||
|
||||
if quay_password is None:
|
||||
quay_password = self.executor_config['QUAY_PASSWORD']
|
||||
|
||||
if etcd_token is None:
|
||||
etcd_token = self.executor_config['ETCD_DISCOVERY_TOKEN']
|
||||
|
||||
return TEMPLATE.render(
|
||||
realm=realm,
|
||||
token=token,
|
||||
quay_username=quay_username,
|
||||
quay_password=quay_password,
|
||||
etcd_token=etcd_token,
|
||||
manager_ip=manager_ip,
|
||||
coreos_channel=coreos_channel,
|
||||
)
|
||||
|
||||
|
||||
class EC2Executor(BuilderExecutor):
|
||||
""" Implementation of BuilderExecutor which uses libcloud to start machines on a variety of cloud
|
||||
providers.
|
||||
"""
|
||||
COREOS_STACK_URL = 'http://%s.release.core-os.net/amd64-usr/current/coreos_production_ami_hvm.txt'
|
||||
|
||||
def _get_conn(self):
|
||||
""" Creates an ec2 connection which can be used to manage instances.
|
||||
"""
|
||||
return boto.ec2.connect_to_region(
|
||||
self.executor_config['EC2_REGION'],
|
||||
aws_access_key_id=self.executor_config['AWS_ACCESS_KEY'],
|
||||
aws_secret_access_key=self.executor_config['AWS_SECRET_KEY'],
|
||||
)
|
||||
|
||||
@classmethod
|
||||
@cachetools.ttl_cache(ttl=ONE_HOUR)
|
||||
def _get_coreos_ami(cls, ec2_region, coreos_channel):
|
||||
""" Retrieve the CoreOS AMI id from the canonical listing.
|
||||
"""
|
||||
stack_list_string = requests.get(EC2Executor.COREOS_STACK_URL % coreos_channel).text
|
||||
stack_amis = dict([stack.split('=') for stack in stack_list_string.split('|')])
|
||||
return stack_amis[ec2_region]
|
||||
|
||||
def start_builder(self, realm, token):
|
||||
region = self.executor_config['EC2_REGION']
|
||||
channel = self.executor_config.get('COREOS_CHANNEL', 'stable')
|
||||
coreos_ami = self._get_coreos_ami(region, channel)
|
||||
user_data = self.generate_cloud_config(realm, token, channel, self.manager_public_ip)
|
||||
|
||||
logger.debug('Generated cloud config: %s', user_data)
|
||||
|
||||
ec2_conn = self._get_conn()
|
||||
# class FakeReservation(object):
|
||||
# def __init__(self):
|
||||
# self.instances = None
|
||||
# reservation = FakeReservation()
|
||||
reservation = ec2_conn.run_instances(
|
||||
coreos_ami,
|
||||
instance_type=self.executor_config['EC2_INSTANCE_TYPE'],
|
||||
security_groups=self.executor_config['EC2_SECURITY_GROUP_IDS'],
|
||||
key_name=self.executor_config.get('EC2_KEY_NAME', None),
|
||||
user_data=user_data,
|
||||
)
|
||||
|
||||
if not reservation.instances:
|
||||
raise ExecutorException('Unable to spawn builder instance.')
|
||||
elif len(reservation.instances) != 1:
|
||||
raise ExecutorException('EC2 started wrong number of instances!')
|
||||
|
||||
return reservation.instances[0]
|
||||
|
||||
def stop_builder(self, builder_id):
|
||||
ec2_conn = self._get_conn()
|
||||
stopped_instances = ec2_conn.stop_instances([builder_id], force=True)
|
||||
if builder_id not in stopped_instances:
|
||||
raise ExecutorException('Unable to stop instance: %s' % builder_id)
|
||||
|
||||
class PopenExecutor(BuilderExecutor):
|
||||
""" Implementation of BuilderExecutor which uses Popen to fork a quay-builder process.
|
||||
"""
|
||||
def __init__(self, executor_config, manager_public_ip):
|
||||
self._jobs = {}
|
||||
|
||||
super(PopenExecutor, self).__init__(executor_config, manager_public_ip)
|
||||
|
||||
""" Executor which uses Popen to fork a quay-builder process.
|
||||
"""
|
||||
def start_builder(self, realm, token):
|
||||
# Now start a machine for this job, adding the machine id to the etcd information
|
||||
logger.debug('Forking process for build')
|
||||
import subprocess
|
||||
builder_env = {
|
||||
'TOKEN': token,
|
||||
'REALM': realm,
|
||||
'ENDPOINT': 'ws://localhost:8787',
|
||||
'DOCKER_TLS_VERIFY': os.environ.get('DOCKER_TLS_VERIFY', ''),
|
||||
'DOCKER_CERT_PATH': os.environ.get('DOCKER_CERT_PATH', ''),
|
||||
'DOCKER_HOST': os.environ.get('DOCKER_HOST', ''),
|
||||
}
|
||||
|
||||
logpipe = LogPipe(logging.INFO)
|
||||
spawned = subprocess.Popen('/Users/jake/bin/quay-builder', stdout=logpipe, stderr=logpipe,
|
||||
env=builder_env)
|
||||
|
||||
builder_id = str(uuid.uuid4())
|
||||
self._jobs[builder_id] = (spawned, logpipe)
|
||||
logger.debug('Builder spawned with id: %s', builder_id)
|
||||
return builder_id
|
||||
|
||||
|
||||
def stop_builder(self, builder_id):
|
||||
if builder_id not in self._jobs:
|
||||
raise ExecutorException('Builder id not being tracked by executor.')
|
||||
|
||||
logger.debug('Killing builder with id: %s', builder_id)
|
||||
spawned, logpipe = self._jobs[builder_id]
|
||||
|
||||
if spawned.poll() is None:
|
||||
spawned.kill()
|
||||
logpipe.close()
|
||||
|
||||
|
||||
class LogPipe(threading.Thread):
|
||||
""" Adapted from http://codereview.stackexchange.com/a/17959
|
||||
"""
|
||||
def __init__(self, level):
|
||||
"""Setup the object with a logger and a loglevel
|
||||
and start the thread
|
||||
"""
|
||||
threading.Thread.__init__(self)
|
||||
self.daemon = False
|
||||
self.level = level
|
||||
self.fd_read, self.fd_write = os.pipe()
|
||||
self.pipe_reader = os.fdopen(self.fd_read)
|
||||
self.start()
|
||||
|
||||
def fileno(self):
|
||||
"""Return the write file descriptor of the pipe
|
||||
"""
|
||||
return self.fd_write
|
||||
|
||||
def run(self):
|
||||
"""Run the thread, logging everything.
|
||||
"""
|
||||
for line in iter(self.pipe_reader.readline, ''):
|
||||
logging.log(self.level, line.strip('\n'))
|
||||
|
||||
self.pipe_reader.close()
|
||||
|
||||
def close(self):
|
||||
"""Close the write end of the pipe.
|
||||
"""
|
||||
os.close(self.fd_write)
|
|
@ -34,14 +34,15 @@ class BuilderServer(object):
|
|||
""" Server which handles both HTTP and WAMP requests, managing the full state of the build
|
||||
controller.
|
||||
"""
|
||||
def __init__(self, server_hostname, queue, build_logs, user_files, lifecycle_manager_klass):
|
||||
def __init__(self, registry_hostname, queue, build_logs, user_files, lifecycle_manager_klass,
|
||||
lifecycle_manager_config, manager_public_ip):
|
||||
self._loop = None
|
||||
self._current_status = 'starting'
|
||||
self._current_components = []
|
||||
self._job_count = 0
|
||||
|
||||
self._session_factory = RouterSessionFactory(RouterFactory())
|
||||
self._server_hostname = server_hostname
|
||||
self._registry_hostname = registry_hostname
|
||||
self._queue = queue
|
||||
self._build_logs = build_logs
|
||||
self._user_files = user_files
|
||||
|
@ -49,8 +50,10 @@ class BuilderServer(object):
|
|||
self._register_component,
|
||||
self._unregister_component,
|
||||
self._job_heartbeat,
|
||||
self._job_complete
|
||||
self._job_complete,
|
||||
manager_public_ip,
|
||||
)
|
||||
self._lifecycle_manager_config = lifecycle_manager_config
|
||||
|
||||
self._shutdown_event = Event()
|
||||
self._current_status = 'running'
|
||||
|
@ -69,7 +72,7 @@ class BuilderServer(object):
|
|||
|
||||
def run(self, host, ssl=None):
|
||||
logger.debug('Initializing the lifecycle manager')
|
||||
self._lifecycle_manager.initialize()
|
||||
self._lifecycle_manager.initialize(self._lifecycle_manager_config)
|
||||
|
||||
logger.debug('Initializing all members of the event loop')
|
||||
loop = trollius.get_event_loop()
|
||||
|
@ -102,7 +105,7 @@ class BuilderServer(object):
|
|||
component.parent_manager = self._lifecycle_manager
|
||||
component.build_logs = self._build_logs
|
||||
component.user_files = self._user_files
|
||||
component.server_hostname = self._server_hostname
|
||||
component.registry_hostname = self._registry_hostname
|
||||
|
||||
self._current_components.append(component)
|
||||
self._session_factory.add(component)
|
||||
|
@ -116,16 +119,16 @@ class BuilderServer(object):
|
|||
self._session_factory.remove(component)
|
||||
|
||||
def _job_heartbeat(self, build_job):
|
||||
WorkQueue.extend_processing(build_job.job_item(), seconds_from_now=JOB_TIMEOUT_SECONDS,
|
||||
WorkQueue.extend_processing(build_job.job_item, seconds_from_now=JOB_TIMEOUT_SECONDS,
|
||||
retry_count=1, minimum_extension=MINIMUM_JOB_EXTENSION)
|
||||
|
||||
def _job_complete(self, build_job, job_status):
|
||||
if job_status == BuildJobResult.INCOMPLETE:
|
||||
self._queue.incomplete(build_job.job_item(), restore_retry=True, retry_after=30)
|
||||
self._queue.incomplete(build_job.job_item, restore_retry=True, retry_after=30)
|
||||
elif job_status == BuildJobResult.ERROR:
|
||||
self._queue.incomplete(build_job.job_item(), restore_retry=False)
|
||||
self._queue.incomplete(build_job.job_item, restore_retry=False)
|
||||
else:
|
||||
self._queue.complete(build_job.job_item())
|
||||
self._queue.complete(build_job.job_item)
|
||||
|
||||
self._job_count = self._job_count - 1
|
||||
|
||||
|
|
38
buildman/templates/cloudconfig.yaml
Normal file
38
buildman/templates/cloudconfig.yaml
Normal file
|
@ -0,0 +1,38 @@
|
|||
#cloud-config
|
||||
|
||||
write_files:
|
||||
- path: /root/overrides.list
|
||||
permission: '0644'
|
||||
content: |
|
||||
REALM={{ realm }}
|
||||
TOKEN={{ token }}
|
||||
ENDPOINT=wss://buildman.quay.io:8787
|
||||
|
||||
coreos:
|
||||
update:
|
||||
reboot-strategy: off
|
||||
group: {{ coreos_channel }}
|
||||
|
||||
etcd:
|
||||
discovery: https://discovery.etcd.io/{{ etcd_token }}
|
||||
# multi-region and multi-cloud deployments need to use $public_ipv4
|
||||
addr: $private_ipv4:4001
|
||||
peer-addr: $private_ipv4:7001
|
||||
|
||||
units:
|
||||
- name: quay-builder.service
|
||||
command: start
|
||||
content: |
|
||||
[Unit]
|
||||
Description=Quay builder container
|
||||
Author=Jake Moshenko
|
||||
After=docker.service
|
||||
|
||||
[Service]
|
||||
Restart=always
|
||||
TimeoutStartSec=600
|
||||
TimeoutStopSec=2000
|
||||
ExecStartPre=/usr/bin/sudo /bin/sh -xc "echo '{{ manager_ip }} buildman.quay.io' >> /etc/hosts; exit 0"
|
||||
ExecStartPre=/usr/bin/docker login -u {{ quay_username }} -p {{ quay_password }} -e unused quay.io
|
||||
ExecStart=/usr/bin/docker run --rm --net=host --name quay-builder --privileged --env-file /root/overrides.list -v /var/run/docker.sock:/var/run/docker.sock quay.io/coreos/registry-build-worker:latest
|
||||
ExecStop=/usr/bin/docker stop quay-builder
|
|
@ -41,3 +41,5 @@ git+https://github.com/DevTable/aniso8601-fake.git
|
|||
git+https://github.com/DevTable/anunidecode.git
|
||||
git+https://github.com/DevTable/avatar-generator.git
|
||||
gipc
|
||||
python-etcd
|
||||
cachetools
|
||||
|
|
Reference in a new issue