Merge branch 'master' into quark
This commit is contained in:
commit
fbdbc21eb1
137 changed files with 8691 additions and 2414 deletions
|
@ -6,11 +6,10 @@ import trollius
|
|||
import re
|
||||
|
||||
from autobahn.wamp.exception import ApplicationError
|
||||
from trollius.coroutines import From
|
||||
|
||||
from buildman.server import BuildJobResult
|
||||
from buildman.component.basecomponent import BaseComponent
|
||||
from buildman.jobutil.buildpack import BuildPackage, BuildPackageException
|
||||
from buildman.jobutil.buildjob import BuildJobLoadException
|
||||
from buildman.jobutil.buildstatus import StatusHandler
|
||||
from buildman.jobutil.workererror import WorkerError
|
||||
|
||||
|
@ -20,7 +19,7 @@ HEARTBEAT_DELTA = datetime.timedelta(seconds=30)
|
|||
HEARTBEAT_TIMEOUT = 10
|
||||
INITIAL_TIMEOUT = 25
|
||||
|
||||
SUPPORTED_WORKER_VERSIONS = ['0.1-beta']
|
||||
SUPPORTED_WORKER_VERSIONS = ['0.3']
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
@ -39,13 +38,14 @@ class BuildComponent(BaseComponent):
|
|||
self.builder_realm = realm
|
||||
|
||||
self.parent_manager = None
|
||||
self.server_hostname = None
|
||||
self.registry_hostname = None
|
||||
|
||||
self._component_status = ComponentStatus.JOINING
|
||||
self._last_heartbeat = None
|
||||
self._current_job = None
|
||||
self._build_status = None
|
||||
self._image_info = None
|
||||
self._worker_version = None
|
||||
|
||||
BaseComponent.__init__(self, config, **kwargs)
|
||||
|
||||
|
@ -57,69 +57,52 @@ class BuildComponent(BaseComponent):
|
|||
|
||||
def onJoin(self, details):
|
||||
logger.debug('Registering methods and listeners for component %s', self.builder_realm)
|
||||
yield From(self.register(self._on_ready, u'io.quay.buildworker.ready'))
|
||||
yield From(self.register(self._ping, u'io.quay.buildworker.ping'))
|
||||
yield From(self.subscribe(self._on_heartbeat, 'io.quay.builder.heartbeat'))
|
||||
yield From(self.subscribe(self._on_log_message, 'io.quay.builder.logmessage'))
|
||||
yield trollius.From(self.register(self._on_ready, u'io.quay.buildworker.ready'))
|
||||
yield trollius.From(self.register(self._determine_cache_tag,
|
||||
u'io.quay.buildworker.determinecachetag'))
|
||||
yield trollius.From(self.register(self._ping, u'io.quay.buildworker.ping'))
|
||||
|
||||
self._set_status(ComponentStatus.WAITING)
|
||||
yield trollius.From(self.subscribe(self._on_heartbeat, 'io.quay.builder.heartbeat'))
|
||||
yield trollius.From(self.subscribe(self._on_log_message, 'io.quay.builder.logmessage'))
|
||||
|
||||
yield trollius.From(self._set_status(ComponentStatus.WAITING))
|
||||
|
||||
def is_ready(self):
|
||||
""" Determines whether a build component is ready to begin a build. """
|
||||
return self._component_status == ComponentStatus.RUNNING
|
||||
|
||||
@trollius.coroutine
|
||||
def start_build(self, build_job):
|
||||
""" Starts a build. """
|
||||
logger.debug('Starting build for component %s (worker version: %s)',
|
||||
self.builder_realm, self._worker_version)
|
||||
|
||||
self._current_job = build_job
|
||||
self._build_status = StatusHandler(self.build_logs, build_job.repo_build())
|
||||
self._build_status = StatusHandler(self.build_logs, build_job.repo_build.uuid)
|
||||
self._image_info = {}
|
||||
|
||||
self._set_status(ComponentStatus.BUILDING)
|
||||
yield trollius.From(self._set_status(ComponentStatus.BUILDING))
|
||||
|
||||
# Retrieve the job's buildpack.
|
||||
buildpack_url = self.user_files.get_file_url(build_job.repo_build().resource_key,
|
||||
# Send the notification that the build has started.
|
||||
build_job.send_notification('build_start')
|
||||
|
||||
# Parse the build configuration.
|
||||
try:
|
||||
build_config = build_job.build_config
|
||||
except BuildJobLoadException as irbe:
|
||||
self._build_failure('Could not load build job information', irbe)
|
||||
|
||||
base_image_information = {}
|
||||
buildpack_url = self.user_files.get_file_url(build_job.repo_build.resource_key,
|
||||
requires_cors=False)
|
||||
|
||||
logger.debug('Retreiving build package: %s', buildpack_url)
|
||||
buildpack = None
|
||||
try:
|
||||
buildpack = BuildPackage.from_url(buildpack_url)
|
||||
except BuildPackageException as bpe:
|
||||
self._build_failure('Could not retrieve build package', bpe)
|
||||
return
|
||||
|
||||
# Extract the base image information from the Dockerfile.
|
||||
parsed_dockerfile = None
|
||||
logger.debug('Parsing dockerfile')
|
||||
|
||||
build_config = build_job.build_config()
|
||||
try:
|
||||
parsed_dockerfile = buildpack.parse_dockerfile(build_config.get('build_subdir'))
|
||||
except BuildPackageException as bpe:
|
||||
self._build_failure('Could not find Dockerfile in build package', bpe)
|
||||
return
|
||||
|
||||
image_and_tag_tuple = parsed_dockerfile.get_image_and_tag()
|
||||
if image_and_tag_tuple is None or image_and_tag_tuple[0] is None:
|
||||
self._build_failure('Missing FROM line in Dockerfile')
|
||||
return
|
||||
|
||||
base_image_information = {
|
||||
'repository': image_and_tag_tuple[0],
|
||||
'tag': image_and_tag_tuple[1]
|
||||
}
|
||||
|
||||
# Extract the number of steps from the Dockerfile.
|
||||
with self._build_status as status_dict:
|
||||
status_dict['total_commands'] = len(parsed_dockerfile.commands)
|
||||
|
||||
# Add the pull robot information, if any.
|
||||
if build_config.get('pull_credentials') is not None:
|
||||
base_image_information['username'] = build_config['pull_credentials'].get('username', '')
|
||||
base_image_information['password'] = build_config['pull_credentials'].get('password', '')
|
||||
if build_job.pull_credentials:
|
||||
base_image_information['username'] = build_job.pull_credentials.get('username', '')
|
||||
base_image_information['password'] = build_job.pull_credentials.get('password', '')
|
||||
|
||||
# Retrieve the repository's fully qualified name.
|
||||
repo = build_job.repo_build().repository
|
||||
repo = build_job.repo_build.repository
|
||||
repository_name = repo.namespace_user.username + '/' + repo.name
|
||||
|
||||
# Parse the build queue item into build arguments.
|
||||
|
@ -131,29 +114,26 @@ class BuildComponent(BaseComponent):
|
|||
# push_token: The token to use to push the built image.
|
||||
# tag_names: The name(s) of the tag(s) for the newly built image.
|
||||
# base_image: The image name and credentials to use to conduct the base image pull.
|
||||
# repository: The repository to pull.
|
||||
# tag: The tag to pull.
|
||||
# repository: The repository to pull (DEPRECATED 0.2)
|
||||
# tag: The tag to pull (DEPRECATED in 0.2)
|
||||
# username: The username for pulling the base image (if any).
|
||||
# password: The password for pulling the base image (if any).
|
||||
build_arguments = {
|
||||
'build_package': buildpack_url,
|
||||
'sub_directory': build_config.get('build_subdir', ''),
|
||||
'repository': repository_name,
|
||||
'registry': self.server_hostname,
|
||||
'pull_token': build_job.repo_build().access_token.code,
|
||||
'push_token': build_job.repo_build().access_token.code,
|
||||
'registry': self.registry_hostname,
|
||||
'pull_token': build_job.repo_build.access_token.code,
|
||||
'push_token': build_job.repo_build.access_token.code,
|
||||
'tag_names': build_config.get('docker_tags', ['latest']),
|
||||
'base_image': base_image_information,
|
||||
'cached_tag': build_job.determine_cached_tag() or ''
|
||||
'base_image': base_image_information
|
||||
}
|
||||
|
||||
# Invoke the build.
|
||||
logger.debug('Invoking build: %s', self.builder_realm)
|
||||
logger.debug('With Arguments: %s', build_arguments)
|
||||
|
||||
return (self
|
||||
.call("io.quay.builder.build", **build_arguments)
|
||||
.add_done_callback(self._build_complete))
|
||||
self.call("io.quay.builder.build", **build_arguments).add_done_callback(self._build_complete)
|
||||
|
||||
@staticmethod
|
||||
def _total_completion(statuses, total_images):
|
||||
|
@ -240,18 +220,28 @@ class BuildComponent(BaseComponent):
|
|||
elif phase == BUILD_PHASE.BUILDING:
|
||||
self._build_status.append_log(current_status_string)
|
||||
|
||||
@trollius.coroutine
|
||||
def _determine_cache_tag(self, command_comments, base_image_name, base_image_tag, base_image_id):
|
||||
with self._build_status as status_dict:
|
||||
status_dict['total_commands'] = len(command_comments) + 1
|
||||
|
||||
logger.debug('Checking cache on realm %s. Base image: %s:%s (%s)', self.builder_realm,
|
||||
base_image_name, base_image_tag, base_image_id)
|
||||
|
||||
tag_found = self._current_job.determine_cached_tag(base_image_id, command_comments)
|
||||
raise trollius.Return(tag_found or '')
|
||||
|
||||
def _build_failure(self, error_message, exception=None):
|
||||
""" Handles and logs a failed build. """
|
||||
self._build_status.set_error(error_message, {
|
||||
'internal_error': exception.message if exception else None
|
||||
'internal_error': str(exception) if exception else None
|
||||
})
|
||||
|
||||
build_id = self._current_job.repo_build().uuid
|
||||
build_id = self._current_job.repo_build.uuid
|
||||
logger.warning('Build %s failed with message: %s', build_id, error_message)
|
||||
|
||||
# Mark that the build has finished (in an error state)
|
||||
self._build_finished(BuildJobResult.ERROR)
|
||||
trollius.async(self._build_finished(BuildJobResult.ERROR))
|
||||
|
||||
def _build_complete(self, result):
|
||||
""" Wraps up a completed build. Handles any errors and calls self._build_finished. """
|
||||
|
@ -259,60 +249,78 @@ class BuildComponent(BaseComponent):
|
|||
# Retrieve the result. This will raise an ApplicationError on any error that occurred.
|
||||
result.result()
|
||||
self._build_status.set_phase(BUILD_PHASE.COMPLETE)
|
||||
self._build_finished(BuildJobResult.COMPLETE)
|
||||
trollius.async(self._build_finished(BuildJobResult.COMPLETE))
|
||||
|
||||
# Send the notification that the build has completed successfully.
|
||||
self._current_job.send_notification('build_success')
|
||||
except ApplicationError as aex:
|
||||
worker_error = WorkerError(aex.error, aex.kwargs.get('base_error'))
|
||||
|
||||
# Write the error to the log.
|
||||
self._build_status.set_error(worker_error.public_message(), worker_error.extra_data(),
|
||||
internal_error=worker_error.is_internal_error())
|
||||
internal_error=worker_error.is_internal_error(),
|
||||
requeued=self._current_job.has_retries_remaining())
|
||||
|
||||
# Send the notification that the build has failed.
|
||||
self._current_job.send_notification('build_failure',
|
||||
error_message=worker_error.public_message())
|
||||
|
||||
# Mark the build as completed.
|
||||
if worker_error.is_internal_error():
|
||||
self._build_finished(BuildJobResult.INCOMPLETE)
|
||||
trollius.async(self._build_finished(BuildJobResult.INCOMPLETE))
|
||||
else:
|
||||
self._build_finished(BuildJobResult.ERROR)
|
||||
trollius.async(self._build_finished(BuildJobResult.ERROR))
|
||||
|
||||
@trollius.coroutine
|
||||
def _build_finished(self, job_status):
|
||||
""" Alerts the parent that a build has completed and sets the status back to running. """
|
||||
self.parent_manager.job_completed(self._current_job, job_status, self)
|
||||
yield trollius.From(self.parent_manager.job_completed(self._current_job, job_status, self))
|
||||
self._current_job = None
|
||||
|
||||
# Set the component back to a running state.
|
||||
self._set_status(ComponentStatus.RUNNING)
|
||||
yield trollius.From(self._set_status(ComponentStatus.RUNNING))
|
||||
|
||||
@staticmethod
|
||||
def _ping():
|
||||
""" Ping pong. """
|
||||
return 'pong'
|
||||
|
||||
@trollius.coroutine
|
||||
def _on_ready(self, token, version):
|
||||
if not version in SUPPORTED_WORKER_VERSIONS:
|
||||
logger.warning('Build component (token "%s") is running an out-of-date version: %s', version)
|
||||
return False
|
||||
self._worker_version = version
|
||||
|
||||
if self._component_status != 'waiting':
|
||||
if not version in SUPPORTED_WORKER_VERSIONS:
|
||||
logger.warning('Build component (token "%s") is running an out-of-date version: %s', token,
|
||||
version)
|
||||
raise trollius.Return(False)
|
||||
|
||||
if self._component_status != ComponentStatus.WAITING:
|
||||
logger.warning('Build component (token "%s") is already connected', self.expected_token)
|
||||
return False
|
||||
raise trollius.Return(False)
|
||||
|
||||
if token != self.expected_token:
|
||||
logger.warning('Builder token mismatch. Expected: "%s". Found: "%s"', self.expected_token, token)
|
||||
return False
|
||||
logger.warning('Builder token mismatch. Expected: "%s". Found: "%s"', self.expected_token,
|
||||
token)
|
||||
raise trollius.Return(False)
|
||||
|
||||
self._set_status(ComponentStatus.RUNNING)
|
||||
yield trollius.From(self._set_status(ComponentStatus.RUNNING))
|
||||
|
||||
# Start the heartbeat check and updating loop.
|
||||
loop = trollius.get_event_loop()
|
||||
loop.create_task(self._heartbeat())
|
||||
logger.debug('Build worker %s is connected and ready', self.builder_realm)
|
||||
return True
|
||||
raise trollius.Return(True)
|
||||
|
||||
@trollius.coroutine
|
||||
def _set_status(self, phase):
|
||||
if phase == ComponentStatus.RUNNING:
|
||||
yield trollius.From(self.parent_manager.build_component_ready(self))
|
||||
|
||||
self._component_status = phase
|
||||
|
||||
def _on_heartbeat(self):
|
||||
""" Updates the last known heartbeat. """
|
||||
self._last_heartbeat = datetime.datetime.now()
|
||||
self._last_heartbeat = datetime.datetime.utcnow()
|
||||
|
||||
@trollius.coroutine
|
||||
def _heartbeat(self):
|
||||
|
@ -320,13 +328,13 @@ class BuildComponent(BaseComponent):
|
|||
and updating the heartbeat in the build status dictionary (if applicable). This allows
|
||||
the build system to catch crashes from either end.
|
||||
"""
|
||||
yield From(trollius.sleep(INITIAL_TIMEOUT))
|
||||
yield trollius.From(trollius.sleep(INITIAL_TIMEOUT))
|
||||
|
||||
while True:
|
||||
# If the component is no longer running or actively building, nothing more to do.
|
||||
if (self._component_status != ComponentStatus.RUNNING and
|
||||
self._component_status != ComponentStatus.BUILDING):
|
||||
return
|
||||
raise trollius.Return()
|
||||
|
||||
# If there is an active build, write the heartbeat to its status.
|
||||
build_status = self._build_status
|
||||
|
@ -334,35 +342,37 @@ class BuildComponent(BaseComponent):
|
|||
with build_status as status_dict:
|
||||
status_dict['heartbeat'] = int(time.time())
|
||||
|
||||
|
||||
# Mark the build item.
|
||||
current_job = self._current_job
|
||||
if current_job is not None:
|
||||
self.parent_manager.job_heartbeat(current_job)
|
||||
yield trollius.From(self.parent_manager.job_heartbeat(current_job))
|
||||
|
||||
# Check the heartbeat from the worker.
|
||||
logger.debug('Checking heartbeat on realm %s', self.builder_realm)
|
||||
if self._last_heartbeat and self._last_heartbeat < datetime.datetime.now() - HEARTBEAT_DELTA:
|
||||
self._timeout()
|
||||
return
|
||||
if (self._last_heartbeat and
|
||||
self._last_heartbeat < datetime.datetime.utcnow() - HEARTBEAT_DELTA):
|
||||
yield trollius.From(self._timeout())
|
||||
raise trollius.Return()
|
||||
|
||||
yield From(trollius.sleep(HEARTBEAT_TIMEOUT))
|
||||
yield trollius.From(trollius.sleep(HEARTBEAT_TIMEOUT))
|
||||
|
||||
@trollius.coroutine
|
||||
def _timeout(self):
|
||||
self._set_status(ComponentStatus.TIMED_OUT)
|
||||
logger.warning('Build component with realm %s has timed out', self.builder_realm)
|
||||
self._dispose(timed_out=True)
|
||||
if self._component_status == ComponentStatus.TIMED_OUT:
|
||||
raise trollius.Return()
|
||||
|
||||
yield trollius.From(self._set_status(ComponentStatus.TIMED_OUT))
|
||||
logger.warning('Build component with realm %s has timed out', self.builder_realm)
|
||||
|
||||
def _dispose(self, timed_out=False):
|
||||
# If we still have a running job, then it has not completed and we need to tell the parent
|
||||
# manager.
|
||||
if self._current_job is not None:
|
||||
if timed_out:
|
||||
self._build_status.set_error('Build worker timed out', internal_error=True)
|
||||
self._build_status.set_error('Build worker timed out', internal_error=True,
|
||||
requeued=self._current_job.has_retries_remaining())
|
||||
|
||||
self.parent_manager.job_completed(self._current_job, BuildJobResult.INCOMPLETE, self)
|
||||
self._build_status = None
|
||||
self._current_job = None
|
||||
|
||||
# Unregister the current component so that it cannot be invoked again.
|
||||
self.parent_manager.build_component_disposed(self, timed_out)
|
||||
self.parent_manager.build_component_disposed(self, True)
|
||||
|
|
Reference in a new issue