Revert "Merge pull request #1620 from coreos-inc/build-cleanup-logs"

This reverts commit e02891ef0c, reversing
changes made to 65bbdda545.
This commit is contained in:
Jimmy Zelinskie 2016-07-15 16:32:23 -04:00
parent 9f6b47ad1f
commit 30fa1b5906
3 changed files with 14 additions and 82 deletions

View file

@ -25,7 +25,6 @@ logger = logging.getLogger(__name__)
ETCD_MAX_WATCH_TIMEOUT = 30 ETCD_MAX_WATCH_TIMEOUT = 30
RETRY_IMMEDIATELY_TIMEOUT = 0 RETRY_IMMEDIATELY_TIMEOUT = 0
NO_WORKER_AVAILABLE_TIMEOUT = 10
DEFAULT_EPHEMERAL_API_TIMEOUT = 20 DEFAULT_EPHEMERAL_API_TIMEOUT = 20
EXECUTORS = { EXECUTORS = {
@ -318,12 +317,12 @@ class EphemeralBuilderManager(BaseManager):
logger.exception('Exception when reading job count from etcd for job: %s', build_uuid) logger.exception('Exception when reading job count from etcd for job: %s', build_uuid)
raise Return(False, RETRY_IMMEDIATELY_TIMEOUT) raise Return(False, RETRY_IMMEDIATELY_TIMEOUT)
logger.debug('Total jobs (scheduling job %s): %s', build_uuid, workers_alive) logger.debug('Total jobs: %s', workers_alive)
if workers_alive >= allowed_worker_count: if workers_alive >= allowed_worker_count:
logger.info('Too many workers alive, unable to start new worker for build job: %s. %s >= %s', logger.info('Too many workers alive, unable to start new worker for build job: %s. %s >= %s',
build_uuid, workers_alive, allowed_worker_count) build_uuid, workers_alive, allowed_worker_count)
raise Return(False, NO_WORKER_AVAILABLE_TIMEOUT) raise Return(False, RETRY_IMMEDIATELY_TIMEOUT)
job_key = self._etcd_job_key(build_job) job_key = self._etcd_job_key(build_job)
@ -359,9 +358,8 @@ class EphemeralBuilderManager(BaseManager):
raise Return(False, RETRY_IMMEDIATELY_TIMEOUT) raise Return(False, RETRY_IMMEDIATELY_TIMEOUT)
started_with_executor = None started_with_executor = None
builder_id = None logger.debug("executors are: %s", self._executors)
logger.debug("Registered executors are: %s", [ex.__class__.__name__ for ex in self._executors])
for executor in self._executors: for executor in self._executors:
executor_type = executor.__class__.__name__ executor_type = executor.__class__.__name__
@ -374,37 +372,27 @@ class EphemeralBuilderManager(BaseManager):
# Check if we can use this executor based on the retries remaining. # Check if we can use this executor based on the retries remaining.
if executor.minimum_retry_threshold > build_job.retries_remaining: if executor.minimum_retry_threshold > build_job.retries_remaining:
logger.debug('Job %s cannot use executor %s as it is below retry threshold (retry #: %s)', logger.debug('Job %s cannot use executor %s due to not meeting retry threshold', build_uuid,
build_uuid, executor_type, build_job.retries_remaining) executor_type)
continue continue
logger.debug('Starting builder for job %s with selected executor: %s', build_uuid, logger.debug('Starting builder for job: %s with executor: %s', build_uuid, executor_type)
executor_type)
try: try:
builder_id = yield From(executor.start_builder(realm, token, build_uuid)) builder_id = yield From(executor.start_builder(realm, token, build_uuid))
metric_queue.put_deprecated('EphemeralBuilderStarted', 1, unit='Count')
metric_queue.ephemeral_build_workers.Inc(labelvalues=[builder_id, build_uuid])
started_with_executor = executor
break
except: except:
logger.exception('Exception when starting builder for job: %s', build_uuid) logger.exception('Exception when starting builder for job: %s', build_uuid)
continue continue
try:
metric_queue.put_deprecated('EphemeralBuilderStarted', 1, unit='Count')
metric_queue.ephemeral_build_workers.Inc(labelvalues=[builder_id, build_uuid])
except:
logger.exception('Exception when writing start metrics for builder %s for job %s',
builder_id, build_uuid)
started_with_executor = executor
# Break out of the loop now that we've started a builder successfully.
break
if started_with_executor is None: if started_with_executor is None:
logger.error('Could not start ephemeral worker for build %s', build_uuid) logger.error('Could not start ephemeral worker for build %s', build_uuid)
raise Return(False, self._ephemeral_api_timeout) raise Return(False, self._ephemeral_api_timeout)
logger.debug('Started builder with ID %s for job: %s with executor: %s', builder_id, build_uuid, logger.debug('Started builder for job: %s with executor: %s', build_uuid, executor_type)
started_with_executor.__class__.__name__)
# Store the builder in etcd associated with the job id # Store the builder in etcd associated with the job id
try: try:
@ -435,9 +423,6 @@ class EphemeralBuilderManager(BaseManager):
raise Return(False, setup_time) raise Return(False, setup_time)
self._job_to_executor[builder_id] = started_with_executor self._job_to_executor[builder_id] = started_with_executor
logger.debug('Builder spawn complete for job %s using executor %s with ID %s ', build_uuid,
started_with_executor.__class__.__name__, builder_id)
raise Return(True, None) raise Return(True, None)
@coroutine @coroutine

View file

@ -136,7 +136,7 @@ class EC2Executor(BuilderExecutor):
coreos_ami = yield From(self._loop.run_in_executor(None, get_ami_callable)) coreos_ami = yield From(self._loop.run_in_executor(None, get_ami_callable))
user_data = self.generate_cloud_config(realm, token, channel, self.manager_hostname) user_data = self.generate_cloud_config(realm, token, channel, self.manager_hostname)
logger.debug('Generated cloud config for build %s: %s', build_uuid, user_data) logger.debug('Generated cloud config: %s', user_data)
ec2_conn = self._get_conn() ec2_conn = self._get_conn()
@ -180,10 +180,6 @@ class EC2Executor(BuilderExecutor):
launched = AsyncWrapper(reservation.instances[0]) launched = AsyncWrapper(reservation.instances[0])
# Sleep a few seconds to wait for AWS to spawn the instance.
yield From(trollius.sleep(_TAG_RETRY_SLEEP))
# Tag the instance with its metadata.
for i in range(0, _TAG_RETRY_COUNT): for i in range(0, _TAG_RETRY_COUNT):
try: try:
yield From(launched.add_tags({ yield From(launched.add_tags({
@ -195,8 +191,7 @@ class EC2Executor(BuilderExecutor):
except boto.exception.EC2ResponseError as ec2e: except boto.exception.EC2ResponseError as ec2e:
if ec2e.error_code == 'InvalidInstanceID.NotFound': if ec2e.error_code == 'InvalidInstanceID.NotFound':
if i < _TAG_RETRY_COUNT - 1: if i < _TAG_RETRY_COUNT - 1:
logger.warning('Failed to write EC2 tags for instance %s for build %s (attempt #%s)', logger.warning('Failed to write EC2 tags (attempt #%s)', i)
launched.id, build_uuid, i)
yield From(trollius.sleep(_TAG_RETRY_SLEEP)) yield From(trollius.sleep(_TAG_RETRY_SLEEP))
continue continue
@ -204,7 +199,6 @@ class EC2Executor(BuilderExecutor):
logger.exception('Failed to write EC2 tags (attempt #%s)', i) logger.exception('Failed to write EC2 tags (attempt #%s)', i)
logger.debug('Machine with ID %s started for build %s', launched.id, build_uuid)
raise Return(launched.id) raise Return(launched.id)
@coroutine @coroutine

View file

@ -8,7 +8,7 @@ from trollius import coroutine, get_event_loop, From, Future, Return
from mock import Mock from mock import Mock
from threading import Event from threading import Event
from buildman.manager.executor import BuilderExecutor, ExecutorException from buildman.manager.executor import BuilderExecutor
from buildman.manager.ephemeral import EphemeralBuilderManager, EXECUTORS from buildman.manager.ephemeral import EphemeralBuilderManager, EXECUTORS
@ -34,13 +34,6 @@ class TestExecutor(BuilderExecutor):
raise Return(str(uuid.uuid4)) raise Return(str(uuid.uuid4))
class BadExecutor(BuilderExecutor):
@coroutine
def start_builder(self, realm, token, build_uuid):
raise ExecutorException('raised on purpose!')
class TestEphemeral(unittest.TestCase): class TestEphemeral(unittest.TestCase):
def __init__(self, *args, **kwargs): def __init__(self, *args, **kwargs):
self.etcd_client_mock = None self.etcd_client_mock = None
@ -271,46 +264,6 @@ class TestEphemeral(unittest.TestCase):
self.manager.registered_executors[0].job_started = None self.manager.registered_executors[0].job_started = None
self.manager.registered_executors[1].job_started = None self.manager.registered_executors[1].job_started = None
@async_test
def test_schedule_job_single_executor(self):
EXECUTORS['test'] = TestExecutor
self.manager.initialize({
'EXECUTOR': 'test',
'EXECUTOR_CONFIG': {},
})
build_job = self._create_build_job(namespace='something', retries=3)
result = yield From(self.manager.schedule(build_job))
self.assertTrue(result[0])
self.assertIsNotNone(self.manager.registered_executors[0].job_started)
self.manager.registered_executors[0].job_started = None
build_job = self._create_build_job(namespace='something', retries=0)
result = yield From(self.manager.schedule(build_job))
self.assertTrue(result[0])
self.assertIsNotNone(self.manager.registered_executors[0].job_started)
self.manager.registered_executors[0].job_started = None
@async_test
def test_executor_exception(self):
EXECUTORS['bad'] = BadExecutor
self.manager.initialize({
'EXECUTOR': 'bad',
'EXECUTOR_CONFIG': {},
})
build_job = self._create_build_job(namespace='something', retries=3)
result = yield From(self.manager.schedule(build_job))
self.assertFalse(result[0])
if __name__ == '__main__': if __name__ == '__main__':
unittest.main() unittest.main()