Improve the log messages in the buildman.
This commit is contained in:
parent
d31e25d5cd
commit
884fedd229
2 changed files with 16 additions and 12 deletions
|
@ -215,14 +215,14 @@ class EphemeralBuilderManager(BaseManager):
|
||||||
except (KeyError, etcd.EtcdKeyError):
|
except (KeyError, etcd.EtcdKeyError):
|
||||||
workers_alive = 0
|
workers_alive = 0
|
||||||
except etcd.EtcdException:
|
except etcd.EtcdException:
|
||||||
logger.exception('Exception when reading job count from etcd')
|
logger.exception('Exception when reading job count from etcd for job: %s', build_uuid)
|
||||||
raise Return(False, RETRY_IMMEDIATELY_TIMEOUT)
|
raise Return(False, RETRY_IMMEDIATELY_TIMEOUT)
|
||||||
|
|
||||||
logger.debug('Total jobs: %s', workers_alive)
|
logger.debug('Total jobs: %s', workers_alive)
|
||||||
|
|
||||||
if workers_alive >= allowed_worker_count:
|
if workers_alive >= allowed_worker_count:
|
||||||
logger.info('Too many workers alive, unable to start new worker. %s >= %s', workers_alive,
|
logger.info('Too many workers alive, unable to start new worker for build job: %s. %s >= %s',
|
||||||
allowed_worker_count)
|
build_uuid, workers_alive, allowed_worker_count)
|
||||||
raise Return(False, RETRY_IMMEDIATELY_TIMEOUT)
|
raise Return(False, RETRY_IMMEDIATELY_TIMEOUT)
|
||||||
|
|
||||||
job_key = self._etcd_job_key(build_job)
|
job_key = self._etcd_job_key(build_job)
|
||||||
|
@ -249,13 +249,14 @@ class EphemeralBuilderManager(BaseManager):
|
||||||
ttl=EC2_API_TIMEOUT))
|
ttl=EC2_API_TIMEOUT))
|
||||||
except (KeyError, etcd.EtcdKeyError):
|
except (KeyError, etcd.EtcdKeyError):
|
||||||
# The job was already taken by someone else, we are probably a retry
|
# The job was already taken by someone else, we are probably a retry
|
||||||
logger.exception('Job already exists in etcd, are timeouts misconfigured or is the queue broken?')
|
logger.error('Job: %s already exists in etcd, timeout may be misconfigured', build_uuid)
|
||||||
raise Return(False)
|
raise Return(False, EC2_API_TIMEOUT)
|
||||||
except etcd.EtcdException:
|
except etcd.EtcdException:
|
||||||
logger.exception('Exception when writing job %s to etcd', build_uuid)
|
logger.exception('Exception when writing job %s to etcd', build_uuid)
|
||||||
raise Return(False, RETRY_IMMEDIATELY_TIMEOUT)
|
raise Return(False, RETRY_IMMEDIATELY_TIMEOUT)
|
||||||
|
|
||||||
logger.debug('Starting builder with executor: %s', self._executor)
|
executor_type = self._executor.__class__.__name__
|
||||||
|
logger.debug('Starting builder for job: %s with executor: %s', build_uuid, executor_type)
|
||||||
|
|
||||||
try:
|
try:
|
||||||
builder_id = yield From(self._executor.start_builder(realm, token, build_uuid))
|
builder_id = yield From(self._executor.start_builder(realm, token, build_uuid))
|
||||||
|
@ -284,11 +285,12 @@ class EphemeralBuilderManager(BaseManager):
|
||||||
yield From(self._etcd_client.write(self._etcd_realm_key(realm), realm_spec, prevExist=False,
|
yield From(self._etcd_client.write(self._etcd_realm_key(realm), realm_spec, prevExist=False,
|
||||||
ttl=setup_time))
|
ttl=setup_time))
|
||||||
except (KeyError, etcd.EtcdKeyError):
|
except (KeyError, etcd.EtcdKeyError):
|
||||||
logger.error('Realm already exists in etcd. UUID collision or something is very very wrong.')
|
logger.error('Realm %s already exists in etcd for job %s ' +
|
||||||
|
'UUID collision or something is very very wrong.', realm, build_uuid)
|
||||||
raise Return(False, setup_time)
|
raise Return(False, setup_time)
|
||||||
except etcd.EtcdException:
|
except etcd.EtcdException:
|
||||||
logger.exception('Exception when writing realm %s to etcd', realm)
|
logger.exception('Exception when writing realm %s to etcd for job %s', realm, build_uuid)
|
||||||
raise Return(False)
|
raise Return(False, setup_time)
|
||||||
|
|
||||||
raise Return(True, None)
|
raise Return(True, None)
|
||||||
|
|
||||||
|
|
|
@ -169,12 +169,13 @@ class BuilderServer(object):
|
||||||
self._queue.incomplete(job_item, restore_retry=False)
|
self._queue.incomplete(job_item, restore_retry=False)
|
||||||
continue
|
continue
|
||||||
|
|
||||||
logger.debug('Build job found. Checking for an avaliable worker.')
|
logger.debug('Checking for an avaliable worker for build job %s',
|
||||||
|
build_job.repo_build.uuid)
|
||||||
|
|
||||||
try:
|
try:
|
||||||
schedule_success, retry_timeout = yield From(self._lifecycle_manager.schedule(build_job))
|
schedule_success, retry_timeout = yield From(self._lifecycle_manager.schedule(build_job))
|
||||||
except:
|
except:
|
||||||
logger.exception('Exception when scheduling job')
|
logger.exception('Exception when scheduling job: %s', build_job.repo_build.uuid)
|
||||||
self._current_status = BuildServerStatus.EXCEPTION
|
self._current_status = BuildServerStatus.EXCEPTION
|
||||||
return
|
return
|
||||||
|
|
||||||
|
@ -187,7 +188,8 @@ class BuilderServer(object):
|
||||||
logger.debug('Build job %s scheduled. Running: %s', build_job.repo_build.uuid,
|
logger.debug('Build job %s scheduled. Running: %s', build_job.repo_build.uuid,
|
||||||
self._job_count)
|
self._job_count)
|
||||||
else:
|
else:
|
||||||
logger.debug('All workers are busy. Requeuing.')
|
logger.debug('All workers are busy for job %s Requeuing after %s seconds.',
|
||||||
|
build_job.repo_build.uuid, retry_timeout)
|
||||||
self._queue.incomplete(job_item, restore_retry=True, retry_after=retry_timeout)
|
self._queue.incomplete(job_item, restore_retry=True, retry_after=retry_timeout)
|
||||||
|
|
||||||
@trollius.coroutine
|
@trollius.coroutine
|
||||||
|
|
Reference in a new issue