Merge branch 'master' into quark

This commit is contained in:
Joseph Schorr 2015-02-13 16:24:53 -05:00
commit fbdbc21eb1
137 changed files with 8691 additions and 2414 deletions

View file

@ -6,11 +6,10 @@ import trollius
import re
from autobahn.wamp.exception import ApplicationError
from trollius.coroutines import From
from buildman.server import BuildJobResult
from buildman.component.basecomponent import BaseComponent
from buildman.jobutil.buildpack import BuildPackage, BuildPackageException
from buildman.jobutil.buildjob import BuildJobLoadException
from buildman.jobutil.buildstatus import StatusHandler
from buildman.jobutil.workererror import WorkerError
@ -20,7 +19,7 @@ HEARTBEAT_DELTA = datetime.timedelta(seconds=30)
HEARTBEAT_TIMEOUT = 10
INITIAL_TIMEOUT = 25
SUPPORTED_WORKER_VERSIONS = ['0.1-beta']
SUPPORTED_WORKER_VERSIONS = ['0.3']
logger = logging.getLogger(__name__)
@ -39,13 +38,14 @@ class BuildComponent(BaseComponent):
self.builder_realm = realm
self.parent_manager = None
self.server_hostname = None
self.registry_hostname = None
self._component_status = ComponentStatus.JOINING
self._last_heartbeat = None
self._current_job = None
self._build_status = None
self._image_info = None
self._worker_version = None
BaseComponent.__init__(self, config, **kwargs)
@ -57,69 +57,52 @@ class BuildComponent(BaseComponent):
def onJoin(self, details):
logger.debug('Registering methods and listeners for component %s', self.builder_realm)
yield From(self.register(self._on_ready, u'io.quay.buildworker.ready'))
yield From(self.register(self._ping, u'io.quay.buildworker.ping'))
yield From(self.subscribe(self._on_heartbeat, 'io.quay.builder.heartbeat'))
yield From(self.subscribe(self._on_log_message, 'io.quay.builder.logmessage'))
yield trollius.From(self.register(self._on_ready, u'io.quay.buildworker.ready'))
yield trollius.From(self.register(self._determine_cache_tag,
u'io.quay.buildworker.determinecachetag'))
yield trollius.From(self.register(self._ping, u'io.quay.buildworker.ping'))
self._set_status(ComponentStatus.WAITING)
yield trollius.From(self.subscribe(self._on_heartbeat, 'io.quay.builder.heartbeat'))
yield trollius.From(self.subscribe(self._on_log_message, 'io.quay.builder.logmessage'))
yield trollius.From(self._set_status(ComponentStatus.WAITING))
def is_ready(self):
""" Determines whether a build component is ready to begin a build. """
return self._component_status == ComponentStatus.RUNNING
@trollius.coroutine
def start_build(self, build_job):
""" Starts a build. """
logger.debug('Starting build for component %s (worker version: %s)',
self.builder_realm, self._worker_version)
self._current_job = build_job
self._build_status = StatusHandler(self.build_logs, build_job.repo_build())
self._build_status = StatusHandler(self.build_logs, build_job.repo_build.uuid)
self._image_info = {}
self._set_status(ComponentStatus.BUILDING)
yield trollius.From(self._set_status(ComponentStatus.BUILDING))
# Retrieve the job's buildpack.
buildpack_url = self.user_files.get_file_url(build_job.repo_build().resource_key,
# Send the notification that the build has started.
build_job.send_notification('build_start')
# Parse the build configuration.
try:
build_config = build_job.build_config
except BuildJobLoadException as irbe:
self._build_failure('Could not load build job information', irbe)
base_image_information = {}
buildpack_url = self.user_files.get_file_url(build_job.repo_build.resource_key,
requires_cors=False)
logger.debug('Retreiving build package: %s', buildpack_url)
buildpack = None
try:
buildpack = BuildPackage.from_url(buildpack_url)
except BuildPackageException as bpe:
self._build_failure('Could not retrieve build package', bpe)
return
# Extract the base image information from the Dockerfile.
parsed_dockerfile = None
logger.debug('Parsing dockerfile')
build_config = build_job.build_config()
try:
parsed_dockerfile = buildpack.parse_dockerfile(build_config.get('build_subdir'))
except BuildPackageException as bpe:
self._build_failure('Could not find Dockerfile in build package', bpe)
return
image_and_tag_tuple = parsed_dockerfile.get_image_and_tag()
if image_and_tag_tuple is None or image_and_tag_tuple[0] is None:
self._build_failure('Missing FROM line in Dockerfile')
return
base_image_information = {
'repository': image_and_tag_tuple[0],
'tag': image_and_tag_tuple[1]
}
# Extract the number of steps from the Dockerfile.
with self._build_status as status_dict:
status_dict['total_commands'] = len(parsed_dockerfile.commands)
# Add the pull robot information, if any.
if build_config.get('pull_credentials') is not None:
base_image_information['username'] = build_config['pull_credentials'].get('username', '')
base_image_information['password'] = build_config['pull_credentials'].get('password', '')
if build_job.pull_credentials:
base_image_information['username'] = build_job.pull_credentials.get('username', '')
base_image_information['password'] = build_job.pull_credentials.get('password', '')
# Retrieve the repository's fully qualified name.
repo = build_job.repo_build().repository
repo = build_job.repo_build.repository
repository_name = repo.namespace_user.username + '/' + repo.name
# Parse the build queue item into build arguments.
@ -131,29 +114,26 @@ class BuildComponent(BaseComponent):
# push_token: The token to use to push the built image.
# tag_names: The name(s) of the tag(s) for the newly built image.
# base_image: The image name and credentials to use to conduct the base image pull.
# repository: The repository to pull.
# tag: The tag to pull.
# repository: The repository to pull (DEPRECATED 0.2)
# tag: The tag to pull (DEPRECATED in 0.2)
# username: The username for pulling the base image (if any).
# password: The password for pulling the base image (if any).
build_arguments = {
'build_package': buildpack_url,
'sub_directory': build_config.get('build_subdir', ''),
'repository': repository_name,
'registry': self.server_hostname,
'pull_token': build_job.repo_build().access_token.code,
'push_token': build_job.repo_build().access_token.code,
'registry': self.registry_hostname,
'pull_token': build_job.repo_build.access_token.code,
'push_token': build_job.repo_build.access_token.code,
'tag_names': build_config.get('docker_tags', ['latest']),
'base_image': base_image_information,
'cached_tag': build_job.determine_cached_tag() or ''
'base_image': base_image_information
}
# Invoke the build.
logger.debug('Invoking build: %s', self.builder_realm)
logger.debug('With Arguments: %s', build_arguments)
return (self
.call("io.quay.builder.build", **build_arguments)
.add_done_callback(self._build_complete))
self.call("io.quay.builder.build", **build_arguments).add_done_callback(self._build_complete)
@staticmethod
def _total_completion(statuses, total_images):
@ -240,18 +220,28 @@ class BuildComponent(BaseComponent):
elif phase == BUILD_PHASE.BUILDING:
self._build_status.append_log(current_status_string)
@trollius.coroutine
def _determine_cache_tag(self, command_comments, base_image_name, base_image_tag, base_image_id):
with self._build_status as status_dict:
status_dict['total_commands'] = len(command_comments) + 1
logger.debug('Checking cache on realm %s. Base image: %s:%s (%s)', self.builder_realm,
base_image_name, base_image_tag, base_image_id)
tag_found = self._current_job.determine_cached_tag(base_image_id, command_comments)
raise trollius.Return(tag_found or '')
def _build_failure(self, error_message, exception=None):
""" Handles and logs a failed build. """
self._build_status.set_error(error_message, {
'internal_error': exception.message if exception else None
'internal_error': str(exception) if exception else None
})
build_id = self._current_job.repo_build().uuid
build_id = self._current_job.repo_build.uuid
logger.warning('Build %s failed with message: %s', build_id, error_message)
# Mark that the build has finished (in an error state)
self._build_finished(BuildJobResult.ERROR)
trollius.async(self._build_finished(BuildJobResult.ERROR))
def _build_complete(self, result):
""" Wraps up a completed build. Handles any errors and calls self._build_finished. """
@ -259,60 +249,78 @@ class BuildComponent(BaseComponent):
# Retrieve the result. This will raise an ApplicationError on any error that occurred.
result.result()
self._build_status.set_phase(BUILD_PHASE.COMPLETE)
self._build_finished(BuildJobResult.COMPLETE)
trollius.async(self._build_finished(BuildJobResult.COMPLETE))
# Send the notification that the build has completed successfully.
self._current_job.send_notification('build_success')
except ApplicationError as aex:
worker_error = WorkerError(aex.error, aex.kwargs.get('base_error'))
# Write the error to the log.
self._build_status.set_error(worker_error.public_message(), worker_error.extra_data(),
internal_error=worker_error.is_internal_error())
internal_error=worker_error.is_internal_error(),
requeued=self._current_job.has_retries_remaining())
# Send the notification that the build has failed.
self._current_job.send_notification('build_failure',
error_message=worker_error.public_message())
# Mark the build as completed.
if worker_error.is_internal_error():
self._build_finished(BuildJobResult.INCOMPLETE)
trollius.async(self._build_finished(BuildJobResult.INCOMPLETE))
else:
self._build_finished(BuildJobResult.ERROR)
trollius.async(self._build_finished(BuildJobResult.ERROR))
@trollius.coroutine
def _build_finished(self, job_status):
""" Alerts the parent that a build has completed and sets the status back to running. """
self.parent_manager.job_completed(self._current_job, job_status, self)
yield trollius.From(self.parent_manager.job_completed(self._current_job, job_status, self))
self._current_job = None
# Set the component back to a running state.
self._set_status(ComponentStatus.RUNNING)
yield trollius.From(self._set_status(ComponentStatus.RUNNING))
@staticmethod
def _ping():
""" Ping pong. """
return 'pong'
@trollius.coroutine
def _on_ready(self, token, version):
if not version in SUPPORTED_WORKER_VERSIONS:
logger.warning('Build component (token "%s") is running an out-of-date version: %s', version)
return False
self._worker_version = version
if self._component_status != 'waiting':
if not version in SUPPORTED_WORKER_VERSIONS:
logger.warning('Build component (token "%s") is running an out-of-date version: %s', token,
version)
raise trollius.Return(False)
if self._component_status != ComponentStatus.WAITING:
logger.warning('Build component (token "%s") is already connected', self.expected_token)
return False
raise trollius.Return(False)
if token != self.expected_token:
logger.warning('Builder token mismatch. Expected: "%s". Found: "%s"', self.expected_token, token)
return False
logger.warning('Builder token mismatch. Expected: "%s". Found: "%s"', self.expected_token,
token)
raise trollius.Return(False)
self._set_status(ComponentStatus.RUNNING)
yield trollius.From(self._set_status(ComponentStatus.RUNNING))
# Start the heartbeat check and updating loop.
loop = trollius.get_event_loop()
loop.create_task(self._heartbeat())
logger.debug('Build worker %s is connected and ready', self.builder_realm)
return True
raise trollius.Return(True)
@trollius.coroutine
def _set_status(self, phase):
if phase == ComponentStatus.RUNNING:
yield trollius.From(self.parent_manager.build_component_ready(self))
self._component_status = phase
def _on_heartbeat(self):
""" Updates the last known heartbeat. """
self._last_heartbeat = datetime.datetime.now()
self._last_heartbeat = datetime.datetime.utcnow()
@trollius.coroutine
def _heartbeat(self):
@ -320,13 +328,13 @@ class BuildComponent(BaseComponent):
and updating the heartbeat in the build status dictionary (if applicable). This allows
the build system to catch crashes from either end.
"""
yield From(trollius.sleep(INITIAL_TIMEOUT))
yield trollius.From(trollius.sleep(INITIAL_TIMEOUT))
while True:
# If the component is no longer running or actively building, nothing more to do.
if (self._component_status != ComponentStatus.RUNNING and
self._component_status != ComponentStatus.BUILDING):
return
raise trollius.Return()
# If there is an active build, write the heartbeat to its status.
build_status = self._build_status
@ -334,35 +342,37 @@ class BuildComponent(BaseComponent):
with build_status as status_dict:
status_dict['heartbeat'] = int(time.time())
# Mark the build item.
current_job = self._current_job
if current_job is not None:
self.parent_manager.job_heartbeat(current_job)
yield trollius.From(self.parent_manager.job_heartbeat(current_job))
# Check the heartbeat from the worker.
logger.debug('Checking heartbeat on realm %s', self.builder_realm)
if self._last_heartbeat and self._last_heartbeat < datetime.datetime.now() - HEARTBEAT_DELTA:
self._timeout()
return
if (self._last_heartbeat and
self._last_heartbeat < datetime.datetime.utcnow() - HEARTBEAT_DELTA):
yield trollius.From(self._timeout())
raise trollius.Return()
yield From(trollius.sleep(HEARTBEAT_TIMEOUT))
yield trollius.From(trollius.sleep(HEARTBEAT_TIMEOUT))
@trollius.coroutine
def _timeout(self):
self._set_status(ComponentStatus.TIMED_OUT)
logger.warning('Build component with realm %s has timed out', self.builder_realm)
self._dispose(timed_out=True)
if self._component_status == ComponentStatus.TIMED_OUT:
raise trollius.Return()
yield trollius.From(self._set_status(ComponentStatus.TIMED_OUT))
logger.warning('Build component with realm %s has timed out', self.builder_realm)
def _dispose(self, timed_out=False):
# If we still have a running job, then it has not completed and we need to tell the parent
# manager.
if self._current_job is not None:
if timed_out:
self._build_status.set_error('Build worker timed out', internal_error=True)
self._build_status.set_error('Build worker timed out', internal_error=True,
requeued=self._current_job.has_retries_remaining())
self.parent_manager.job_completed(self._current_job, BuildJobResult.INCOMPLETE, self)
self._build_status = None
self._current_job = None
# Unregister the current component so that it cannot be invoked again.
self.parent_manager.build_component_disposed(self, timed_out)
self.parent_manager.build_component_disposed(self, True)