diff --git a/app.py b/app.py index cf4a055cb..c6c7eecbf 100644 --- a/app.py +++ b/app.py @@ -15,6 +15,7 @@ from werkzeug.routing import BaseConverter import features from avatars.avatars import Avatar +from buildman.manager.buildcanceller import BuildCanceller from data import database from data import model from data.archivedlogs import LogArchive @@ -196,6 +197,7 @@ superusers = SuperUserManager(app) signer = Signer(app, config_provider) instance_keys = InstanceKeys(app) label_validator = LabelValidator(app) +build_canceller = BuildCanceller(app) license_validator = LicenseValidator(config_provider) license_validator.start() diff --git a/buildman/component/buildcomponent.py b/buildman/component/buildcomponent.py index 59ebf4b67..2b88205df 100644 --- a/buildman/component/buildcomponent.py +++ b/buildman/component/buildcomponent.py @@ -246,6 +246,10 @@ class BuildComponent(BaseComponent): try: if self._build_status.set_phase(phase, log_data.get('status_data')): logger.debug('Build %s has entered a new phase: %s', self.builder_realm, phase) + elif self._current_job.repo_build.phase == BUILD_PHASE.CANCELLED: + build_id = self._current_job.repo_build.uuid + logger.debug('Trying to move cancelled build into phase: %s with id: %s', phase, build_id) + return False except InvalidRepositoryBuildException: build_id = self._current_job.repo_build.uuid logger.info('Build %s was not found; repo was probably deleted', build_id) diff --git a/buildman/manager/buildcanceller.py b/buildman/manager/buildcanceller.py new file mode 100644 index 000000000..87a7996c0 --- /dev/null +++ b/buildman/manager/buildcanceller.py @@ -0,0 +1,25 @@ +import logging + +from buildman.manager.etcd_canceller import EtcdCanceller +from buildman.manager.noop_canceller import NoopCanceller + +logger = logging.getLogger(__name__) + +CANCELLERS = {'ephemeral': EtcdCanceller} + + +class BuildCanceller(object): + """ A class to manage cancelling a build """ + + def __init__(self, app=None): + build_manager_config = app.config.get('BUILD_MANAGER') + if app is None or build_manager_config is None: + self.handler = NoopCanceller() + return + + canceller = CANCELLERS.get(build_manager_config[0], NoopCanceller) + self.handler = canceller(build_manager_config[1]) + + def try_cancel_build(self, uuid): + """ A method to kill a running build """ + return self.handler.try_cancel_build(uuid) diff --git a/buildman/manager/etcd_canceller.py b/buildman/manager/etcd_canceller.py new file mode 100644 index 000000000..dccde1949 --- /dev/null +++ b/buildman/manager/etcd_canceller.py @@ -0,0 +1,37 @@ +import logging +import etcd + +logger = logging.getLogger(__name__) + + +class EtcdCanceller(object): + """ A class that sends a message to etcd to cancel a build """ + + def __init__(self, config): + etcd_host = config.get('ETCD_HOST', '127.0.0.1') + etcd_port = config.get('ETCD_PORT', 2379) + etcd_ca_cert = config.get('ETCD_CA_CERT', None) + etcd_auth = config.get('ETCD_CERT_AND_KEY', None) + if etcd_auth is not None: + etcd_auth = tuple(etcd_auth) + + etcd_protocol = 'http' if etcd_auth is None else 'https' + logger.debug('Connecting to etcd on %s:%s', etcd_host, etcd_port) + self._cancel_prefix = config.get('ETCD_CANCEL_PREFIX', 'cancel/') + self._etcd_client = etcd.Client( + host=etcd_host, + port=etcd_port, + cert=etcd_auth, + ca_cert=etcd_ca_cert, + protocol=etcd_protocol, + read_timeout=5) + + def try_cancel_build(self, build_uuid): + """ Writes etcd message to cancel build_uuid. """ + logger.info("Cancelling build %s".format(build_uuid)) + try: + self._etcd_client.write("{}{}".format(self._cancel_prefix, build_uuid), build_uuid) + return True + except etcd.EtcdException: + logger.exception("Failed to write to etcd client %s", build_uuid) + return False diff --git a/buildman/manager/noop_canceller.py b/buildman/manager/noop_canceller.py new file mode 100644 index 000000000..2adf17ad7 --- /dev/null +++ b/buildman/manager/noop_canceller.py @@ -0,0 +1,8 @@ +class NoopCanceller(object): + """ A class that can not cancel a build """ + def __init__(self, config=None): + pass + + def try_cancel_build(self, uuid): + """ Does nothing and fails to cancel build. """ + return False diff --git a/data/database.py b/data/database.py index 45aa0daf7..afc1573b4 100644 --- a/data/database.py +++ b/data/database.py @@ -741,6 +741,7 @@ class BUILD_PHASE(object): PUSHING = 'pushing' WAITING = 'waiting' COMPLETE = 'complete' + CANCELLED = 'cancelled' @classmethod def is_terminal_phase(cls, phase): diff --git a/data/model/build.py b/data/model/build.py index 2a22f2f77..13e727edc 100644 --- a/data/model/build.py +++ b/data/model/build.py @@ -10,6 +10,8 @@ from data.model import (InvalidBuildTriggerException, InvalidRepositoryBuildExce PRESUMED_DEAD_BUILD_AGE = timedelta(days=15) +PHASES_NOT_ALLOWED_TO_CANCEL_FROM = (BUILD_PHASE.PUSHING, BUILD_PHASE.COMPLETE, + BUILD_PHASE.ERROR, BUILD_PHASE.INTERNAL_ERROR) def update_build_trigger(trigger, config, auth_token=None): @@ -143,54 +145,79 @@ def get_pull_robot_name(trigger): return trigger.pull_robot.username +def _get_build_row_for_update(build_uuid): + return db_for_update(RepositoryBuild.select().where(RepositoryBuild.uuid == build_uuid)).get() + + def update_phase(build_uuid, phase): """ A function to change the phase of a build """ - with db_transaction(): - try: - build = get_repository_build(build_uuid) - build.phase = phase - build.save() - return True - except InvalidRepositoryBuildException: + try: + build = _get_build_row_for_update(build_uuid) + except RepositoryBuild.DoesNotExist: return False + # Can't update a cancelled build + if build.phase == BUILD_PHASE.CANCELLED: + return False + + build.phase = phase + build.save() + return True + def create_cancel_build_in_queue(build, build_queue): """ A function to cancel a build before it leaves the queue """ def cancel_build(): - if build.phase != BUILD_PHASE.WAITING or not build.queue_id: + cancelled = False + + if build.queue_id is not None: + cancelled = build_queue.cancel(build.queue_id) + + if build.phase != BUILD_PHASE.WAITING: return False - cancelled = build_queue.cancel(build.queue_id) - if cancelled: - # Delete the build row. - build.delete_instance() return cancelled return cancel_build -def create_cancel_build_in_manager(build): +def create_cancel_build_in_manager(build, build_canceller): """ A function to cancel the build before it starts to push """ def cancel_build(): - return False + original_phase = build.phase + if build.phase in PHASES_NOT_ALLOWED_TO_CANCEL_FROM: + return False + + build.phase = BUILD_PHASE.CANCELLED + build.save() + + if not build_canceller.try_cancel_build(build.uuid): + build.phase = original_phase + build.save() + return False + + return True return cancel_build def cancel_repository_build(build, build_queue): """ This tries to cancel the build returns true if request is successful false if it can't be cancelled """ with db_transaction(): + from app import build_canceller # Reload the build for update. # We are loading the build for update so checks should be as quick as possible. try: - build = db_for_update(RepositoryBuild.select().where(RepositoryBuild.id == build.id)).get() + build = _get_build_row_for_update(build.uuid) except RepositoryBuild.DoesNotExist: return False cancel_builds = [create_cancel_build_in_queue(build, build_queue), - create_cancel_build_in_manager(build), ] + create_cancel_build_in_manager(build, build_canceller), ] for cancelled in cancel_builds: if cancelled(): + # Delete the build row. + # TODO Charlie 2016-11-11 Add in message that says build was cancelled and remove the delete build. + build.delete_instance() return True return False