Handle internal errors in a better fashion: If a build would be marked as internal error, only do so if there are retries remaining. Otherwise, we mark it as failed (since it won't be rebuilt anyway)
This commit is contained in:
parent
e8458267fd
commit
f84d1bad45
8 changed files with 70 additions and 19 deletions
|
@ -255,7 +255,8 @@ class BuildComponent(BaseComponent):
|
|||
|
||||
# Write the error to the log.
|
||||
self._build_status.set_error(worker_error.public_message(), worker_error.extra_data(),
|
||||
internal_error=worker_error.is_internal_error())
|
||||
internal_error=worker_error.is_internal_error(),
|
||||
requeued=self._current_job.has_retries_remaining())
|
||||
|
||||
# Send the notification that the build has failed.
|
||||
self._current_job.send_notification('build_failure',
|
||||
|
@ -363,7 +364,8 @@ class BuildComponent(BaseComponent):
|
|||
# If we still have a running job, then it has not completed and we need to tell the parent
|
||||
# manager.
|
||||
if self._current_job is not None:
|
||||
self._build_status.set_error('Build worker timed out', internal_error=True)
|
||||
self._build_status.set_error('Build worker timed out', internal_error=True,
|
||||
requeued=self._current_job.has_retries_remaining())
|
||||
|
||||
self.parent_manager.job_completed(self._current_job, BuildJobResult.INCOMPLETE, self)
|
||||
self._build_status = None
|
||||
|
|
|
@ -25,6 +25,9 @@ class BuildJob(object):
|
|||
'Could not parse build queue item config with ID %s' % self.job_details['build_uuid']
|
||||
)
|
||||
|
||||
def has_retries_remaining(self):
|
||||
return self.job_item.retries_remaining > 0
|
||||
|
||||
def send_notification(self, kind, error_message=None):
|
||||
tags = self.build_config.get('docker_tags', ['latest'])
|
||||
event_data = {
|
||||
|
|
|
@ -39,8 +39,8 @@ class StatusHandler(object):
|
|||
self._current_command = command
|
||||
self._append_log_message(command, self._build_logs.COMMAND, extra_data)
|
||||
|
||||
def set_error(self, error_message, extra_data=None, internal_error=False):
|
||||
self.set_phase(BUILD_PHASE.INTERNAL_ERROR if internal_error else BUILD_PHASE.ERROR)
|
||||
def set_error(self, error_message, extra_data=None, internal_error=False, requeued=False):
|
||||
self.set_phase(BUILD_PHASE.INTERNAL_ERROR if internal_error and requeued else BUILD_PHASE.ERROR)
|
||||
|
||||
extra_data = extra_data or {}
|
||||
extra_data['internal_error'] = internal_error
|
||||
|
|
|
@ -476,6 +476,15 @@ class BUILD_PHASE(object):
|
|||
COMPLETE = 'complete'
|
||||
|
||||
|
||||
class QueueItem(BaseModel):
|
||||
queue_name = CharField(index=True, max_length=1024)
|
||||
body = TextField()
|
||||
available_after = DateTimeField(default=datetime.utcnow, index=True)
|
||||
available = BooleanField(default=True, index=True)
|
||||
processing_expires = DateTimeField(null=True, index=True)
|
||||
retries_remaining = IntegerField(default=5)
|
||||
|
||||
|
||||
class RepositoryBuild(BaseModel):
|
||||
uuid = CharField(default=uuid_generator, index=True)
|
||||
repository = ForeignKeyField(Repository, index=True)
|
||||
|
@ -488,15 +497,7 @@ class RepositoryBuild(BaseModel):
|
|||
trigger = ForeignKeyField(RepositoryBuildTrigger, null=True, index=True)
|
||||
pull_robot = QuayUserField(null=True, related_name='buildpullrobot')
|
||||
logs_archived = BooleanField(default=False)
|
||||
|
||||
|
||||
class QueueItem(BaseModel):
|
||||
queue_name = CharField(index=True, max_length=1024)
|
||||
body = TextField()
|
||||
available_after = DateTimeField(default=datetime.utcnow, index=True)
|
||||
available = BooleanField(default=True, index=True)
|
||||
processing_expires = DateTimeField(null=True, index=True)
|
||||
retries_remaining = IntegerField(default=5)
|
||||
queue_item = ForeignKeyField(QueueItem, null=True, index=True)
|
||||
|
||||
|
||||
class LogEntryKind(BaseModel):
|
||||
|
|
|
@ -0,0 +1,30 @@
|
|||
"""Add build queue item reference to the repositorybuild table
|
||||
|
||||
Revision ID: 14fe12ade3df
|
||||
Revises: 5ad999136045
|
||||
Create Date: 2015-02-12 16:11:57.814645
|
||||
|
||||
"""
|
||||
|
||||
# revision identifiers, used by Alembic.
|
||||
revision = '14fe12ade3df'
|
||||
down_revision = '5ad999136045'
|
||||
|
||||
from alembic import op
|
||||
import sqlalchemy as sa
|
||||
from sqlalchemy.dialects import mysql
|
||||
|
||||
def upgrade(tables):
|
||||
### commands auto generated by Alembic - please adjust! ###
|
||||
op.add_column('repositorybuild', sa.Column('queue_item_id', sa.Integer(), nullable=True))
|
||||
op.create_index('repositorybuild_queue_item_id', 'repositorybuild', ['queue_item_id'], unique=False)
|
||||
op.create_foreign_key(op.f('fk_repositorybuild_queue_item_id_queueitem'), 'repositorybuild', 'queueitem', ['queue_item_id'], ['id'])
|
||||
### end Alembic commands ###
|
||||
|
||||
|
||||
def downgrade(tables):
|
||||
### commands auto generated by Alembic - please adjust! ###
|
||||
op.drop_constraint(op.f('fk_repositorybuild_queue_item_id_queueitem'), 'repositorybuild', type_='foreignkey')
|
||||
op.drop_index('repositorybuild_queue_item_id', table_name='repositorybuild')
|
||||
op.drop_column('repositorybuild', 'queue_item_id')
|
||||
### end Alembic commands ###
|
|
@ -60,8 +60,8 @@ class WorkQueue(object):
|
|||
running_query = self._running_jobs(now, name_match_query)
|
||||
running_count = running_query.distinct().count()
|
||||
|
||||
avialable_query = self._available_jobs_not_running(now, name_match_query, running_query)
|
||||
available_count = avialable_query.select(QueueItem.queue_name).distinct().count()
|
||||
available_query = self._available_jobs_not_running(now, name_match_query, running_query)
|
||||
available_count = available_query.select(QueueItem.queue_name).distinct().count()
|
||||
|
||||
self._reporter(self._currently_processing, running_count, running_count + available_count)
|
||||
|
||||
|
@ -81,7 +81,7 @@ class WorkQueue(object):
|
|||
params['available_after'] = available_date
|
||||
|
||||
with self._transaction_factory(db):
|
||||
QueueItem.create(**params)
|
||||
return QueueItem.create(**params)
|
||||
|
||||
def get(self, processing_time=300):
|
||||
"""
|
||||
|
@ -114,6 +114,7 @@ class WorkQueue(object):
|
|||
item = AttrDict({
|
||||
'id': db_item.id,
|
||||
'body': db_item.body,
|
||||
'retries_remaining': db_item.retries_remaining
|
||||
})
|
||||
|
||||
self._currently_processing = True
|
||||
|
@ -141,6 +142,7 @@ class WorkQueue(object):
|
|||
|
||||
incomplete_item_obj.save()
|
||||
self._currently_processing = False
|
||||
return incomplete_item_obj.retries_remaining > 0
|
||||
|
||||
def extend_processing(self, item, seconds_from_now, minimum_extension=MINIMUM_EXTENSION):
|
||||
with self._transaction_factory(db):
|
||||
|
|
|
@ -76,6 +76,12 @@ def build_status_view(build_obj, can_write=False):
|
|||
if datetime.datetime.utcnow() - heartbeat > datetime.timedelta(minutes=1):
|
||||
phase = database.BUILD_PHASE.INTERNAL_ERROR
|
||||
|
||||
# If the phase is internal error, return 'error' instead of the number if retries
|
||||
# on the queue item is 0.
|
||||
if phase == database.BUILD_PHASE.INTERNAL_ERROR:
|
||||
if build_obj.queue_item is None or build_obj.queue_item.retries_remaining == 0:
|
||||
phase = database.BUILD_PHASE.ERROR
|
||||
|
||||
logger.debug('Can write: %s job_config: %s', can_write, build_obj.job_config)
|
||||
resp = {
|
||||
'id': build_obj.uuid,
|
||||
|
@ -87,7 +93,7 @@ def build_status_view(build_obj, can_write=False):
|
|||
'is_writer': can_write,
|
||||
'trigger': trigger_view(build_obj.trigger),
|
||||
'resource_key': build_obj.resource_key,
|
||||
'pull_robot': user_view(build_obj.pull_robot) if build_obj.pull_robot else None,
|
||||
'pull_robot': user_view(build_obj.pull_robot) if build_obj.pull_robot else None
|
||||
}
|
||||
|
||||
if can_write:
|
||||
|
|
|
@ -230,10 +230,17 @@ def start_build(repository, dockerfile_id, tags, build_name, subdir, manual,
|
|||
dockerfile_id, build_name,
|
||||
trigger, pull_robot_name=pull_robot_name)
|
||||
|
||||
dockerfile_build_queue.put([repository.namespace_user.username, repository.name], json.dumps({
|
||||
json_data = json.dumps({
|
||||
'build_uuid': build_request.uuid,
|
||||
'pull_credentials': model.get_pull_credentials(pull_robot_name) if pull_robot_name else None
|
||||
}), retries_remaining=3)
|
||||
})
|
||||
|
||||
queue_item = dockerfile_build_queue.put([repository.namespace_user.username, repository.name],
|
||||
json_data,
|
||||
retries_remaining=3)
|
||||
|
||||
build_request.queue_item = queue_item
|
||||
build_request.save()
|
||||
|
||||
# Add the build to the repo's log.
|
||||
metadata = {
|
||||
|
|
Reference in a new issue