Build manager cleanup and more logging
This commit is contained in:
parent
65bbdda545
commit
74b87fa813
3 changed files with 82 additions and 14 deletions
|
@ -25,6 +25,7 @@ logger = logging.getLogger(__name__)
|
|||
|
||||
ETCD_MAX_WATCH_TIMEOUT = 30
|
||||
RETRY_IMMEDIATELY_TIMEOUT = 0
|
||||
NO_WORKER_AVAILABLE_TIMEOUT = 10
|
||||
DEFAULT_EPHEMERAL_API_TIMEOUT = 20
|
||||
|
||||
EXECUTORS = {
|
||||
|
@ -317,12 +318,12 @@ class EphemeralBuilderManager(BaseManager):
|
|||
logger.exception('Exception when reading job count from etcd for job: %s', build_uuid)
|
||||
raise Return(False, RETRY_IMMEDIATELY_TIMEOUT)
|
||||
|
||||
logger.debug('Total jobs: %s', workers_alive)
|
||||
logger.debug('Total jobs (scheduling job %s): %s', build_uuid, workers_alive)
|
||||
|
||||
if workers_alive >= allowed_worker_count:
|
||||
logger.info('Too many workers alive, unable to start new worker for build job: %s. %s >= %s',
|
||||
build_uuid, workers_alive, allowed_worker_count)
|
||||
raise Return(False, RETRY_IMMEDIATELY_TIMEOUT)
|
||||
raise Return(False, NO_WORKER_AVAILABLE_TIMEOUT)
|
||||
|
||||
job_key = self._etcd_job_key(build_job)
|
||||
|
||||
|
@ -358,8 +359,9 @@ class EphemeralBuilderManager(BaseManager):
|
|||
raise Return(False, RETRY_IMMEDIATELY_TIMEOUT)
|
||||
|
||||
started_with_executor = None
|
||||
logger.debug("executors are: %s", self._executors)
|
||||
builder_id = None
|
||||
|
||||
logger.debug("Registered executors are: %s", [ex.__class__.__name__ for ex in self._executors])
|
||||
for executor in self._executors:
|
||||
executor_type = executor.__class__.__name__
|
||||
|
||||
|
@ -372,27 +374,37 @@ class EphemeralBuilderManager(BaseManager):
|
|||
|
||||
# Check if we can use this executor based on the retries remaining.
|
||||
if executor.minimum_retry_threshold > build_job.retries_remaining:
|
||||
logger.debug('Job %s cannot use executor %s due to not meeting retry threshold', build_uuid,
|
||||
executor_type)
|
||||
logger.debug('Job %s cannot use executor %s as it is below retry threshold (retry #: %s)',
|
||||
build_uuid, executor_type, build_job.retries_remaining)
|
||||
continue
|
||||
|
||||
logger.debug('Starting builder for job: %s with executor: %s', build_uuid, executor_type)
|
||||
logger.debug('Starting builder for job %s with selected executor: %s', build_uuid,
|
||||
executor_type)
|
||||
|
||||
try:
|
||||
builder_id = yield From(executor.start_builder(realm, token, build_uuid))
|
||||
metric_queue.put_deprecated('EphemeralBuilderStarted', 1, unit='Count')
|
||||
metric_queue.ephemeral_build_workers.Inc(labelvalues=[builder_id, build_uuid])
|
||||
started_with_executor = executor
|
||||
break
|
||||
except:
|
||||
logger.exception('Exception when starting builder for job: %s', build_uuid)
|
||||
continue
|
||||
|
||||
try:
|
||||
metric_queue.put_deprecated('EphemeralBuilderStarted', 1, unit='Count')
|
||||
metric_queue.ephemeral_build_workers.Inc(labelvalues=[builder_id, build_uuid])
|
||||
except:
|
||||
logger.exception('Exception when writing start metrics for builder %s for job %s',
|
||||
builder_id, build_uuid)
|
||||
|
||||
started_with_executor = executor
|
||||
|
||||
# Break out of the loop now that we've started a builder successfully.
|
||||
break
|
||||
|
||||
if started_with_executor is None:
|
||||
logger.error('Could not start ephemeral worker for build %s', build_uuid)
|
||||
raise Return(False, self._ephemeral_api_timeout)
|
||||
|
||||
logger.debug('Started builder for job: %s with executor: %s', build_uuid, executor_type)
|
||||
logger.debug('Started builder with ID %s for job: %s with executor: %s', builder_id, build_uuid,
|
||||
started_with_executor.__class__.__name__)
|
||||
|
||||
# Store the builder in etcd associated with the job id
|
||||
try:
|
||||
|
@ -423,6 +435,9 @@ class EphemeralBuilderManager(BaseManager):
|
|||
raise Return(False, setup_time)
|
||||
|
||||
self._job_to_executor[builder_id] = started_with_executor
|
||||
|
||||
logger.debug('Builder spawn complete for job %s using executor %s with ID %s ', build_uuid,
|
||||
started_with_executor.__class__.__name__, builder_id)
|
||||
raise Return(True, None)
|
||||
|
||||
@coroutine
|
||||
|
|
Reference in a new issue