From 0f37e66cc861fd7cb4925bf754d16d1cee1062bd Mon Sep 17 00:00:00 2001 From: Joseph Schorr Date: Tue, 6 Oct 2015 17:45:02 -0400 Subject: [PATCH] Better error handling for the build manager Fixes #604 --- buildman/manager/ephemeral.py | 6 +++++- buildman/manager/executor.py | 13 +++++++++++-- 2 files changed, 16 insertions(+), 3 deletions(-) diff --git a/buildman/manager/ephemeral.py b/buildman/manager/ephemeral.py index 931f2e373..eb5dd13f7 100644 --- a/buildman/manager/ephemeral.py +++ b/buildman/manager/ephemeral.py @@ -99,6 +99,10 @@ class EphemeralBuilderManager(BaseManager): if restarter is not None: async(restarter()) + except (KeyError, etcd.EtcdKeyError): + logger.debug('Etcd key already cleared: %s', etcd_key) + return + except etcd.EtcdException as eex: # TODO(jschorr): This is a quick and dirty hack and should be replaced # with a proper exception check. @@ -401,7 +405,7 @@ class EphemeralBuilderManager(BaseManager): try: yield From(self._etcd_client.delete(job_key)) except (KeyError, etcd.EtcdKeyError): - logger.exception('Builder is asking for job to be removed, but work already completed') + logger.debug('Builder is asking for job to be removed, but work already completed') self.job_complete_callback(build_job, job_status) diff --git a/buildman/manager/executor.py b/buildman/manager/executor.py index 449d66ed3..e4f9fb7bb 100644 --- a/buildman/manager/executor.py +++ b/buildman/manager/executor.py @@ -160,8 +160,17 @@ class EC2Executor(BuilderExecutor): @coroutine def stop_builder(self, builder_id): - ec2_conn = self._get_conn() - terminated_instances = yield From(ec2_conn.terminate_instances([builder_id])) + try: + ec2_conn = self._get_conn() + terminated_instances = yield From(ec2_conn.terminate_instances([builder_id])) + except boto.exception.EC2ResponseError as ec2e: + if ec2e.error_code == 404: + logger.debug('Instance %s already terminated', builder_id) + return + + logger.exception('Exception when trying to terminate instance %s', builder_id) + raise + if builder_id not in [si.id for si in terminated_instances]: raise ExecutorException('Unable to terminate instance: %s' % builder_id)