Automatically disable build triggers with successive failures or internal errors

We allow users to reenable them manually once disabled
This commit is contained in:
Joseph Schorr 2017-10-18 16:03:38 -04:00 committed by Joseph Schorr
parent c35eec0615
commit 93d79e777e
9 changed files with 166 additions and 11 deletions

View file

@ -14,7 +14,7 @@ from flask import Flask
from buildman.enums import BuildJobResult, BuildServerStatus, RESULT_PHASES
from buildman.jobutil.buildstatus import StatusHandler
from buildman.jobutil.buildjob import BuildJob, BuildJobLoadException
from data import database
from data import database, model
from app import app, metric_queue
logger = logging.getLogger(__name__)
@ -151,6 +151,11 @@ class BuilderServer(object):
else:
self._queue.complete(build_job.job_item)
# Update the trigger failure tracking (if applicable).
if build_job.repo_build.trigger is not None:
model.build.update_trigger_disable_status(build_job.repo_build.trigger,
RESULT_PHASES[job_status])
if update_phase:
status_handler = StatusHandler(self._build_logs, build_job.repo_build.uuid)
yield From(status_handler.set_phase(RESULT_PHASES[job_status]))

View file

@ -519,3 +519,11 @@ class DefaultConfig(ImmutableConfig):
'engine': 'memcached',
'endpoint': ('127.0.0.1', 18080),
}
# Defines the number of successive failures of a build trigger's build before the trigger is
# automatically disabled.
SUCCESSIVE_TRIGGER_FAILURE_DISABLE_THRESHOLD = 100
# Defines the number of successive internal errors of a build trigger's build before the
# trigger is automatically disabled.
SUCCESSIVE_TRIGGER_INTERNAL_ERROR_DISABLE_THRESHOLD = 5

View file

@ -710,7 +710,9 @@ class RepositoryBuildTrigger(BaseModel):
pull_robot = QuayUserField(allows_robots=True, null=True, related_name='triggerpullrobot',
robot_null_delete=True)
enabled = BooleanField(default=True)
disabled_reason = ForeignKeyField(DisableReason, null=True)
disabled_reason = EnumField(DisableReason, null=True)
successive_failure_count = IntegerField(default=0)
successive_internal_error_count = IntegerField(default=0)
class EmailConfirmation(BaseModel):

View file

@ -0,0 +1,45 @@
"""Add automatic disable of build triggers
Revision ID: 17aff2e1354e
Revises: 61cadbacb9fc
Create Date: 2017-10-18 15:58:03.971526
"""
# revision identifiers, used by Alembic.
revision = '17aff2e1354e'
down_revision = '61cadbacb9fc'
from alembic import op
import sqlalchemy as sa
from sqlalchemy.dialects import mysql
def upgrade(tables):
# ### commands auto generated by Alembic - please adjust! ###
op.add_column('repositorybuildtrigger', sa.Column('successive_failure_count', sa.Integer(), nullable=False))
op.add_column('repositorybuildtrigger', sa.Column('successive_internal_error_count', sa.Integer(), nullable=False))
# ### end Alembic commands ###
op.bulk_insert(
tables.disablereason,
[
{'id': 2, 'name': 'successive_build_failures'},
{'id': 3, 'name': 'successive_build_internal_errors'},
],
)
def downgrade(tables):
# ### commands auto generated by Alembic - please adjust! ###
op.drop_column('repositorybuildtrigger', 'successive_internal_error_count')
op.drop_column('repositorybuildtrigger', 'successive_failure_count')
# ### end Alembic commands ###
op.execute(tables
.disablereason
.delete()
.where(tables.disablereason.c.name == op.inline_literal('successive_internal_error_count')))
op.execute(tables
.disablereason
.delete()
.where(tables.disablereason.c.name == op.inline_literal('successive_failure_count')))

View file

@ -263,6 +263,45 @@ def toggle_build_trigger(trigger, enabled, reason='user_toggled'):
trigger.enabled = enabled
if not enabled:
trigger.disabled_reason = DisableReason.get(name=reason)
trigger.disabled_reason = RepositoryBuildTrigger.disabled_reason.get_id(reason)
trigger.save()
def update_trigger_disable_status(trigger, final_phase):
""" Updates the disable status of the given build trigger. If the build trigger had a
failure, then the counter is increased and, if we've reached the limit, the trigger is
automatically disabled. Otherwise, if the trigger succeeded, it's counter is reset. This
ensures that triggers that continue to error are eventually automatically disabled.
"""
with db_transaction():
# If the build completed successfully, then reset the successive counters.
if final_phase == BUILD_PHASE.COMPLETE:
trigger.successive_failure_count = 0
trigger.successive_internal_error_count = 0
trigger.save()
return
# Otherwise, increment the counters and check for trigger disable.
if final_phase == BUILD_PHASE.ERROR:
trigger.successive_failure_count = trigger.successive_failure_count + 1
trigger.successive_internal_error_count = 0
elif final_phase == BUILD_PHASE.INTERNAL_ERROR:
trigger.successive_failure_count = 0
trigger.successive_internal_error_count = trigger.successive_internal_error_count + 1
# Check if we need to disable the trigger.
failure_threshold = config.app_config.get('SUCCESSIVE_TRIGGER_FAILURE_DISABLE_THRESHOLD')
error_threshold = config.app_config.get('SUCCESSIVE_TRIGGER_INTERNAL_ERROR_DISABLE_THRESHOLD')
if failure_threshold and trigger.successive_failure_count >= failure_threshold:
trigger.enabled = False
trigger.disabled_reason = RepositoryBuildTrigger.disabled_reason.get_id('successive_build_failures')
elif (error_threshold and
trigger.successive_internal_error_count >= error_threshold):
trigger.enabled = False
trigger.disabled_reason = RepositoryBuildTrigger.disabled_reason.get_id('successive_build_internal_errors')
# Save the trigger changes.
trigger.save()

View file

@ -0,0 +1,46 @@
import pytest
from mock import patch
from data.database import BUILD_PHASE, RepositoryBuildTrigger
from data.model.build import update_trigger_disable_status
from test.fixtures import *
TEST_FAIL_THRESHOLD = 5
TEST_INTERNAL_ERROR_THRESHOLD = 2
@pytest.mark.parametrize('starting_failure_count, starting_error_count, status, expected_reason', [
(0, 0, BUILD_PHASE.COMPLETE, None),
(10, 10, BUILD_PHASE.COMPLETE, None),
(TEST_FAIL_THRESHOLD - 1, TEST_INTERNAL_ERROR_THRESHOLD - 1, BUILD_PHASE.COMPLETE, None),
(TEST_FAIL_THRESHOLD - 1, 0, BUILD_PHASE.ERROR, 'successive_build_failures'),
(0, TEST_INTERNAL_ERROR_THRESHOLD - 1, BUILD_PHASE.INTERNAL_ERROR,
'successive_build_internal_errors'),
])
def test_update_trigger_disable_status(starting_failure_count, starting_error_count, status,
expected_reason, initialized_db):
test_config = {
'SUCCESSIVE_TRIGGER_FAILURE_DISABLE_THRESHOLD': TEST_FAIL_THRESHOLD,
'SUCCESSIVE_TRIGGER_INTERNAL_ERROR_DISABLE_THRESHOLD': TEST_INTERNAL_ERROR_THRESHOLD,
}
trigger = model.build.list_build_triggers('devtable', 'building')[0]
trigger.successive_failure_count = starting_failure_count
trigger.successive_internal_error_count = starting_error_count
trigger.enabled = True
trigger.save()
with patch('data.model.config.app_config', test_config):
update_trigger_disable_status(trigger, status)
updated_trigger = RepositoryBuildTrigger.get(uuid=trigger.uuid)
assert updated_trigger.enabled == (expected_reason is None)
if expected_reason is not None:
assert updated_trigger.disabled_reason.name == expected_reason
else:
assert updated_trigger.disabled_reason is None
assert updated_trigger.successive_failure_count == 0
assert updated_trigger.successive_internal_error_count == 0

View file

@ -13,11 +13,8 @@ from endpoints.api.signing import RepositorySignatures
from endpoints.api.search import ConductRepositorySearch
from endpoints.api.superuser import SuperUserRepositoryBuildLogs, SuperUserRepositoryBuildResource
from endpoints.api.superuser import SuperUserRepositoryBuildStatus
<<<<<<< HEAD
from endpoints.api.appspecifictokens import AppTokens, AppToken
=======
from endpoints.api.trigger import BuildTrigger
>>>>>>> Add ability for triggers to be disabled
from endpoints.test.shared import client_with_identity, toggle_feature
from test.fixtures import *

View file

@ -436,6 +436,8 @@ def initialize_database():
TagKind.create(name='channel')
DisableReason.create(name='user_toggled')
DisableReason.create(name='successive_build_failures')
DisableReason.create(name='successive_build_internal_errors')
def wipe_database():

View file

@ -169,16 +169,27 @@
</td>
</tr>
<tr class="trigger-disabled-message" ng-if="!trigger.enabled">
<td colspan="5" style="text-align: center">
<td colspan="5" style="text-align: center"
ng-if="trigger.disabled_reason == 'user_toggled'">
<i class="fa fa-exclamation-triangle"></i>
This build trigger is currently disabled and will not build:
This build trigger is user disabled and will not build:
<a ng-click="askToggleTrigger(trigger)">Re-enable this trigger</a>
</td>
<td colspan="5" style="text-align: center"
ng-if="trigger.disabled_reason == 'successive_build_failures'">
<i class="fa fa-exclamation-triangle"></i>
This build trigger was automatically disabled due to successive failures:
<a ng-click="askToggleTrigger(trigger)">Re-enable this trigger</a>
</td>
<td colspan="5" style="text-align: center"
ng-if="trigger.disabled_reason == 'successive_build_internal_errors'">
<i class="fa fa-exclamation-triangle"></i>
This build trigger was automatically disabled due to successive internal errors:
<a ng-click="askToggleTrigger(trigger)">Re-enable this trigger</a>
</td>
</tr>
</tbody>
</tbody>
</table>
</div>
</div>
</div> <!-- /Build Triggers -->