Improve the log messages in the buildman.

This commit is contained in:
Jake Moshenko 2015-06-10 14:18:12 -04:00
parent d31e25d5cd
commit 884fedd229
2 changed files with 16 additions and 12 deletions

View file

@ -215,14 +215,14 @@ class EphemeralBuilderManager(BaseManager):
except (KeyError, etcd.EtcdKeyError): except (KeyError, etcd.EtcdKeyError):
workers_alive = 0 workers_alive = 0
except etcd.EtcdException: except etcd.EtcdException:
logger.exception('Exception when reading job count from etcd') logger.exception('Exception when reading job count from etcd for job: %s', build_uuid)
raise Return(False, RETRY_IMMEDIATELY_TIMEOUT) raise Return(False, RETRY_IMMEDIATELY_TIMEOUT)
logger.debug('Total jobs: %s', workers_alive) logger.debug('Total jobs: %s', workers_alive)
if workers_alive >= allowed_worker_count: if workers_alive >= allowed_worker_count:
logger.info('Too many workers alive, unable to start new worker. %s >= %s', workers_alive, logger.info('Too many workers alive, unable to start new worker for build job: %s. %s >= %s',
allowed_worker_count) build_uuid, workers_alive, allowed_worker_count)
raise Return(False, RETRY_IMMEDIATELY_TIMEOUT) raise Return(False, RETRY_IMMEDIATELY_TIMEOUT)
job_key = self._etcd_job_key(build_job) job_key = self._etcd_job_key(build_job)
@ -249,13 +249,14 @@ class EphemeralBuilderManager(BaseManager):
ttl=EC2_API_TIMEOUT)) ttl=EC2_API_TIMEOUT))
except (KeyError, etcd.EtcdKeyError): except (KeyError, etcd.EtcdKeyError):
# The job was already taken by someone else, we are probably a retry # The job was already taken by someone else, we are probably a retry
logger.exception('Job already exists in etcd, are timeouts misconfigured or is the queue broken?') logger.error('Job: %s already exists in etcd, timeout may be misconfigured', build_uuid)
raise Return(False) raise Return(False, EC2_API_TIMEOUT)
except etcd.EtcdException: except etcd.EtcdException:
logger.exception('Exception when writing job %s to etcd', build_uuid) logger.exception('Exception when writing job %s to etcd', build_uuid)
raise Return(False, RETRY_IMMEDIATELY_TIMEOUT) raise Return(False, RETRY_IMMEDIATELY_TIMEOUT)
logger.debug('Starting builder with executor: %s', self._executor) executor_type = self._executor.__class__.__name__
logger.debug('Starting builder for job: %s with executor: %s', build_uuid, executor_type)
try: try:
builder_id = yield From(self._executor.start_builder(realm, token, build_uuid)) builder_id = yield From(self._executor.start_builder(realm, token, build_uuid))
@ -284,11 +285,12 @@ class EphemeralBuilderManager(BaseManager):
yield From(self._etcd_client.write(self._etcd_realm_key(realm), realm_spec, prevExist=False, yield From(self._etcd_client.write(self._etcd_realm_key(realm), realm_spec, prevExist=False,
ttl=setup_time)) ttl=setup_time))
except (KeyError, etcd.EtcdKeyError): except (KeyError, etcd.EtcdKeyError):
logger.error('Realm already exists in etcd. UUID collision or something is very very wrong.') logger.error('Realm %s already exists in etcd for job %s ' +
'UUID collision or something is very very wrong.', realm, build_uuid)
raise Return(False, setup_time) raise Return(False, setup_time)
except etcd.EtcdException: except etcd.EtcdException:
logger.exception('Exception when writing realm %s to etcd', realm) logger.exception('Exception when writing realm %s to etcd for job %s', realm, build_uuid)
raise Return(False) raise Return(False, setup_time)
raise Return(True, None) raise Return(True, None)

View file

@ -169,12 +169,13 @@ class BuilderServer(object):
self._queue.incomplete(job_item, restore_retry=False) self._queue.incomplete(job_item, restore_retry=False)
continue continue
logger.debug('Build job found. Checking for an avaliable worker.') logger.debug('Checking for an avaliable worker for build job %s',
build_job.repo_build.uuid)
try: try:
schedule_success, retry_timeout = yield From(self._lifecycle_manager.schedule(build_job)) schedule_success, retry_timeout = yield From(self._lifecycle_manager.schedule(build_job))
except: except:
logger.exception('Exception when scheduling job') logger.exception('Exception when scheduling job: %s', build_job.repo_build.uuid)
self._current_status = BuildServerStatus.EXCEPTION self._current_status = BuildServerStatus.EXCEPTION
return return
@ -187,7 +188,8 @@ class BuilderServer(object):
logger.debug('Build job %s scheduled. Running: %s', build_job.repo_build.uuid, logger.debug('Build job %s scheduled. Running: %s', build_job.repo_build.uuid,
self._job_count) self._job_count)
else: else:
logger.debug('All workers are busy. Requeuing.') logger.debug('All workers are busy for job %s Requeuing after %s seconds.',
build_job.repo_build.uuid, retry_timeout)
self._queue.incomplete(job_item, restore_retry=True, retry_after=retry_timeout) self._queue.incomplete(job_item, restore_retry=True, retry_after=retry_timeout)
@trollius.coroutine @trollius.coroutine