From 2fe896ba6a02e09f785b0f6f22c6814153bfd014 Mon Sep 17 00:00:00 2001 From: Joseph Schorr Date: Tue, 30 Aug 2016 13:57:26 -0400 Subject: [PATCH] Restore retries of jobs not started and add some leeway to the processing time --- buildman/server.py | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/buildman/server.py b/buildman/server.py index fc8dd54e2..f94ef3a69 100644 --- a/buildman/server.py +++ b/buildman/server.py @@ -24,6 +24,7 @@ logger = logging.getLogger(__name__) WORK_CHECK_TIMEOUT = 10 TIMEOUT_PERIOD_MINUTES = 20 JOB_TIMEOUT_SECONDS = 300 +SETUP_LEEWAY_SECONDS = 10 MINIMUM_JOB_EXTENSION = timedelta(minutes=2) HEARTBEAT_PERIOD_SEC = 30 @@ -164,8 +165,8 @@ class BuilderServer(object): logger.debug('Checking for more work for %d active workers', self._lifecycle_manager.num_workers()) - job_item = self._queue.get(processing_time=self._lifecycle_manager.setup_time(), - ordering_required=True) + processing_time = self._lifecycle_manager.setup_time() + SETUP_LEEWAY_SECONDS + job_item = self._queue.get(processing_time=processing_time, ordering_required=True) if job_item is None: logger.debug('No additional work found. Going to sleep for %s seconds', WORK_CHECK_TIMEOUT) continue @@ -185,6 +186,7 @@ class BuilderServer(object): except: logger.exception('Exception when scheduling job: %s', build_job.repo_build.uuid) self._current_status = BuildServerStatus.EXCEPTION + self._queue.incomplete(job_item, restore_retry=True, retry_after=WORK_CHECK_TIMEOUT) return if schedule_success: