From 946e5fabc02d1d0afbea115c9341551bbacfd645 Mon Sep 17 00:00:00 2001 From: Joseph Schorr Date: Mon, 30 Nov 2015 14:37:15 -0500 Subject: [PATCH] Add timeout and failure if an EC2 instance could not be found when tagging Fixes #994 --- buildman/manager/executor.py | 17 +++++++++++++++-- 1 file changed, 15 insertions(+), 2 deletions(-) diff --git a/buildman/manager/executor.py b/buildman/manager/executor.py index 54b689611..e030419a5 100644 --- a/buildman/manager/executor.py +++ b/buildman/manager/executor.py @@ -5,6 +5,8 @@ import threading import boto.ec2 import requests import cachetools +import trollius + from jinja2 import FileSystemLoader, Environment from trollius import coroutine, From, Return, get_event_loop @@ -19,6 +21,9 @@ logger = logging.getLogger(__name__) ONE_HOUR = 60*60 +_TAG_RETRY_COUNT = 3 # Number of times to retry adding tags. +_TAG_RETRY_SLEEP = 2 # Number of seconds to wait between tag retries. + ENV = Environment(loader=FileSystemLoader('buildman/templates')) TEMPLATE = ENV.get_template('cloudconfig.yaml') CloudConfigContext().populate_jinja_environment(ENV) @@ -147,7 +152,7 @@ class EC2Executor(BuilderExecutor): launched = AsyncWrapper(reservation.instances[0]) - for i in range(0, 2): + for i in range(0, _TAG_RETRY_COUNT): try: yield From(launched.add_tags({ 'Name': 'Quay Ephemeral Builder', @@ -155,7 +160,15 @@ class EC2Executor(BuilderExecutor): 'Token': token, 'BuildUUID': build_uuid, })) - except boto.exception.EC2ResponseError: + except boto.exception.EC2ResponseError as ec2e: + if ec2e.error_code == 404: + if i < _TAG_RETRY_COUNT - 1: + logger.warning('Failed to write EC2 tags (attempt #%s)', i) + yield From(trollius.sleep(_TAG_RETRY_SLEEP)) + continue + + raise ExecutorException('Unable to find builder instance.') + logger.exception('Failed to write EC2 tags (attempt #%s)', i) raise Return(launched.id)