Debug log all cases where we mark a build as incomplete in the queue

Should help us narrow down why some builds are falling back
This commit is contained in:
Joseph Schorr 2016-11-07 15:49:45 -05:00
parent ef41e57aad
commit c98472e9f3
3 changed files with 10 additions and 2 deletions

View file

@ -17,7 +17,7 @@ class BaseManager(object):
every few minutes. """ every few minutes. """
self.job_heartbeat_callback(build_job) self.job_heartbeat_callback(build_job)
def setup_time(self): def overall_setup_time(self):
""" Returns the number of seconds that the build system should wait before allowing the job """ Returns the number of seconds that the build system should wait before allowing the job
to be picked up again after called 'schedule'. to be picked up again after called 'schedule'.
""" """

View file

@ -45,7 +45,7 @@ class EnterpriseManager(BaseManager):
# production, build workers in enterprise are long-lived and register dynamically. # production, build workers in enterprise are long-lived and register dynamically.
self.register_component(REGISTRATION_REALM, DynamicRegistrationComponent) self.register_component(REGISTRATION_REALM, DynamicRegistrationComponent)
def setup_time(self): def overall_setup_time(self):
# Builders are already registered, so the setup time should be essentially instant. We therefore # Builders are already registered, so the setup time should be essentially instant. We therefore
# only return a minute here. # only return a minute here.
return 60 return 60

View file

@ -143,6 +143,8 @@ class BuilderServer(object):
def _job_complete(self, build_job, job_status, executor_name=None, update_phase=False): def _job_complete(self, build_job, job_status, executor_name=None, update_phase=False):
if job_status == BuildJobResult.INCOMPLETE: if job_status == BuildJobResult.INCOMPLETE:
logger.debug('[BUILD INCOMPLETE: job complete] Build ID: %s. No retry restore.',
build_job.repo_build.uuid)
self._queue.incomplete(build_job.job_item, restore_retry=False, retry_after=30) self._queue.incomplete(build_job.job_item, restore_retry=False, retry_after=30)
else: else:
self._queue.complete(build_job.job_item) self._queue.complete(build_job.job_item)
@ -177,6 +179,8 @@ class BuilderServer(object):
try: try:
build_job = BuildJob(job_item) build_job = BuildJob(job_item)
except BuildJobLoadException as irbe: except BuildJobLoadException as irbe:
logger.debug('[BUILD INCOMPLETE: job load exception] Jon data: %s. No retry restore.',
job_item.body)
logger.exception(irbe) logger.exception(irbe)
self._queue.incomplete(job_item, restore_retry=False) self._queue.incomplete(job_item, restore_retry=False)
continue continue
@ -187,6 +191,8 @@ class BuilderServer(object):
try: try:
schedule_success, retry_timeout = yield From(self._lifecycle_manager.schedule(build_job)) schedule_success, retry_timeout = yield From(self._lifecycle_manager.schedule(build_job))
except: except:
logger.debug('[BUILD INCOMPLETE: scheduling] Build ID: %s. Retry restored.',
build_job.repo_build.uuid)
logger.exception('Exception when scheduling job: %s', build_job.repo_build.uuid) logger.exception('Exception when scheduling job: %s', build_job.repo_build.uuid)
self._current_status = BuildServerStatus.EXCEPTION self._current_status = BuildServerStatus.EXCEPTION
self._queue.incomplete(job_item, restore_retry=True, retry_after=WORK_CHECK_TIMEOUT) self._queue.incomplete(job_item, restore_retry=True, retry_after=WORK_CHECK_TIMEOUT)
@ -201,6 +207,8 @@ class BuilderServer(object):
logger.debug('Build job %s scheduled. Running: %s', build_job.repo_build.uuid, logger.debug('Build job %s scheduled. Running: %s', build_job.repo_build.uuid,
self._job_count) self._job_count)
else: else:
logger.debug('[BUILD INCOMPLETE: no schedule] Build ID: %s. Retry restored.',
build_job.repo_build.uuid)
logger.debug('All workers are busy for job %s Requeuing after %s seconds.', logger.debug('All workers are busy for job %s Requeuing after %s seconds.',
build_job.repo_build.uuid, retry_timeout) build_job.repo_build.uuid, retry_timeout)
self._queue.incomplete(job_item, restore_retry=True, retry_after=retry_timeout) self._queue.incomplete(job_item, restore_retry=True, retry_after=retry_timeout)