Automatically disable build triggers with successive failures or internal errors
We allow users to reenable them manually once disabled
This commit is contained in:
parent
c35eec0615
commit
93d79e777e
9 changed files with 166 additions and 11 deletions
|
@ -14,7 +14,7 @@ from flask import Flask
|
|||
from buildman.enums import BuildJobResult, BuildServerStatus, RESULT_PHASES
|
||||
from buildman.jobutil.buildstatus import StatusHandler
|
||||
from buildman.jobutil.buildjob import BuildJob, BuildJobLoadException
|
||||
from data import database
|
||||
from data import database, model
|
||||
from app import app, metric_queue
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
@ -151,6 +151,11 @@ class BuilderServer(object):
|
|||
else:
|
||||
self._queue.complete(build_job.job_item)
|
||||
|
||||
# Update the trigger failure tracking (if applicable).
|
||||
if build_job.repo_build.trigger is not None:
|
||||
model.build.update_trigger_disable_status(build_job.repo_build.trigger,
|
||||
RESULT_PHASES[job_status])
|
||||
|
||||
if update_phase:
|
||||
status_handler = StatusHandler(self._build_logs, build_job.repo_build.uuid)
|
||||
yield From(status_handler.set_phase(RESULT_PHASES[job_status]))
|
||||
|
|
|
@ -519,3 +519,11 @@ class DefaultConfig(ImmutableConfig):
|
|||
'engine': 'memcached',
|
||||
'endpoint': ('127.0.0.1', 18080),
|
||||
}
|
||||
|
||||
# Defines the number of successive failures of a build trigger's build before the trigger is
|
||||
# automatically disabled.
|
||||
SUCCESSIVE_TRIGGER_FAILURE_DISABLE_THRESHOLD = 100
|
||||
|
||||
# Defines the number of successive internal errors of a build trigger's build before the
|
||||
# trigger is automatically disabled.
|
||||
SUCCESSIVE_TRIGGER_INTERNAL_ERROR_DISABLE_THRESHOLD = 5
|
||||
|
|
|
@ -710,7 +710,9 @@ class RepositoryBuildTrigger(BaseModel):
|
|||
pull_robot = QuayUserField(allows_robots=True, null=True, related_name='triggerpullrobot',
|
||||
robot_null_delete=True)
|
||||
enabled = BooleanField(default=True)
|
||||
disabled_reason = ForeignKeyField(DisableReason, null=True)
|
||||
disabled_reason = EnumField(DisableReason, null=True)
|
||||
successive_failure_count = IntegerField(default=0)
|
||||
successive_internal_error_count = IntegerField(default=0)
|
||||
|
||||
|
||||
class EmailConfirmation(BaseModel):
|
||||
|
|
|
@ -0,0 +1,45 @@
|
|||
"""Add automatic disable of build triggers
|
||||
|
||||
Revision ID: 17aff2e1354e
|
||||
Revises: 61cadbacb9fc
|
||||
Create Date: 2017-10-18 15:58:03.971526
|
||||
|
||||
"""
|
||||
|
||||
# revision identifiers, used by Alembic.
|
||||
revision = '17aff2e1354e'
|
||||
down_revision = '61cadbacb9fc'
|
||||
|
||||
from alembic import op
|
||||
import sqlalchemy as sa
|
||||
from sqlalchemy.dialects import mysql
|
||||
|
||||
def upgrade(tables):
|
||||
# ### commands auto generated by Alembic - please adjust! ###
|
||||
op.add_column('repositorybuildtrigger', sa.Column('successive_failure_count', sa.Integer(), nullable=False))
|
||||
op.add_column('repositorybuildtrigger', sa.Column('successive_internal_error_count', sa.Integer(), nullable=False))
|
||||
# ### end Alembic commands ###
|
||||
|
||||
op.bulk_insert(
|
||||
tables.disablereason,
|
||||
[
|
||||
{'id': 2, 'name': 'successive_build_failures'},
|
||||
{'id': 3, 'name': 'successive_build_internal_errors'},
|
||||
],
|
||||
)
|
||||
|
||||
def downgrade(tables):
|
||||
# ### commands auto generated by Alembic - please adjust! ###
|
||||
op.drop_column('repositorybuildtrigger', 'successive_internal_error_count')
|
||||
op.drop_column('repositorybuildtrigger', 'successive_failure_count')
|
||||
# ### end Alembic commands ###
|
||||
|
||||
op.execute(tables
|
||||
.disablereason
|
||||
.delete()
|
||||
.where(tables.disablereason.c.name == op.inline_literal('successive_internal_error_count')))
|
||||
|
||||
op.execute(tables
|
||||
.disablereason
|
||||
.delete()
|
||||
.where(tables.disablereason.c.name == op.inline_literal('successive_failure_count')))
|
|
@ -263,6 +263,45 @@ def toggle_build_trigger(trigger, enabled, reason='user_toggled'):
|
|||
trigger.enabled = enabled
|
||||
|
||||
if not enabled:
|
||||
trigger.disabled_reason = DisableReason.get(name=reason)
|
||||
trigger.disabled_reason = RepositoryBuildTrigger.disabled_reason.get_id(reason)
|
||||
|
||||
trigger.save()
|
||||
|
||||
|
||||
def update_trigger_disable_status(trigger, final_phase):
|
||||
""" Updates the disable status of the given build trigger. If the build trigger had a
|
||||
failure, then the counter is increased and, if we've reached the limit, the trigger is
|
||||
automatically disabled. Otherwise, if the trigger succeeded, it's counter is reset. This
|
||||
ensures that triggers that continue to error are eventually automatically disabled.
|
||||
"""
|
||||
with db_transaction():
|
||||
# If the build completed successfully, then reset the successive counters.
|
||||
if final_phase == BUILD_PHASE.COMPLETE:
|
||||
trigger.successive_failure_count = 0
|
||||
trigger.successive_internal_error_count = 0
|
||||
trigger.save()
|
||||
return
|
||||
|
||||
# Otherwise, increment the counters and check for trigger disable.
|
||||
if final_phase == BUILD_PHASE.ERROR:
|
||||
trigger.successive_failure_count = trigger.successive_failure_count + 1
|
||||
trigger.successive_internal_error_count = 0
|
||||
elif final_phase == BUILD_PHASE.INTERNAL_ERROR:
|
||||
trigger.successive_failure_count = 0
|
||||
trigger.successive_internal_error_count = trigger.successive_internal_error_count + 1
|
||||
|
||||
# Check if we need to disable the trigger.
|
||||
failure_threshold = config.app_config.get('SUCCESSIVE_TRIGGER_FAILURE_DISABLE_THRESHOLD')
|
||||
error_threshold = config.app_config.get('SUCCESSIVE_TRIGGER_INTERNAL_ERROR_DISABLE_THRESHOLD')
|
||||
|
||||
if failure_threshold and trigger.successive_failure_count >= failure_threshold:
|
||||
trigger.enabled = False
|
||||
trigger.disabled_reason = RepositoryBuildTrigger.disabled_reason.get_id('successive_build_failures')
|
||||
elif (error_threshold and
|
||||
trigger.successive_internal_error_count >= error_threshold):
|
||||
trigger.enabled = False
|
||||
trigger.disabled_reason = RepositoryBuildTrigger.disabled_reason.get_id('successive_build_internal_errors')
|
||||
|
||||
# Save the trigger changes.
|
||||
trigger.save()
|
||||
|
46
data/model/test/test_build.py
Normal file
46
data/model/test/test_build.py
Normal file
|
@ -0,0 +1,46 @@
|
|||
import pytest
|
||||
|
||||
from mock import patch
|
||||
|
||||
from data.database import BUILD_PHASE, RepositoryBuildTrigger
|
||||
from data.model.build import update_trigger_disable_status
|
||||
from test.fixtures import *
|
||||
|
||||
TEST_FAIL_THRESHOLD = 5
|
||||
TEST_INTERNAL_ERROR_THRESHOLD = 2
|
||||
|
||||
@pytest.mark.parametrize('starting_failure_count, starting_error_count, status, expected_reason', [
|
||||
(0, 0, BUILD_PHASE.COMPLETE, None),
|
||||
(10, 10, BUILD_PHASE.COMPLETE, None),
|
||||
|
||||
(TEST_FAIL_THRESHOLD - 1, TEST_INTERNAL_ERROR_THRESHOLD - 1, BUILD_PHASE.COMPLETE, None),
|
||||
(TEST_FAIL_THRESHOLD - 1, 0, BUILD_PHASE.ERROR, 'successive_build_failures'),
|
||||
(0, TEST_INTERNAL_ERROR_THRESHOLD - 1, BUILD_PHASE.INTERNAL_ERROR,
|
||||
'successive_build_internal_errors'),
|
||||
])
|
||||
def test_update_trigger_disable_status(starting_failure_count, starting_error_count, status,
|
||||
expected_reason, initialized_db):
|
||||
test_config = {
|
||||
'SUCCESSIVE_TRIGGER_FAILURE_DISABLE_THRESHOLD': TEST_FAIL_THRESHOLD,
|
||||
'SUCCESSIVE_TRIGGER_INTERNAL_ERROR_DISABLE_THRESHOLD': TEST_INTERNAL_ERROR_THRESHOLD,
|
||||
}
|
||||
|
||||
trigger = model.build.list_build_triggers('devtable', 'building')[0]
|
||||
trigger.successive_failure_count = starting_failure_count
|
||||
trigger.successive_internal_error_count = starting_error_count
|
||||
trigger.enabled = True
|
||||
trigger.save()
|
||||
|
||||
with patch('data.model.config.app_config', test_config):
|
||||
update_trigger_disable_status(trigger, status)
|
||||
updated_trigger = RepositoryBuildTrigger.get(uuid=trigger.uuid)
|
||||
|
||||
assert updated_trigger.enabled == (expected_reason is None)
|
||||
|
||||
if expected_reason is not None:
|
||||
assert updated_trigger.disabled_reason.name == expected_reason
|
||||
else:
|
||||
assert updated_trigger.disabled_reason is None
|
||||
assert updated_trigger.successive_failure_count == 0
|
||||
assert updated_trigger.successive_internal_error_count == 0
|
||||
|
|
@ -13,11 +13,8 @@ from endpoints.api.signing import RepositorySignatures
|
|||
from endpoints.api.search import ConductRepositorySearch
|
||||
from endpoints.api.superuser import SuperUserRepositoryBuildLogs, SuperUserRepositoryBuildResource
|
||||
from endpoints.api.superuser import SuperUserRepositoryBuildStatus
|
||||
<<<<<<< HEAD
|
||||
from endpoints.api.appspecifictokens import AppTokens, AppToken
|
||||
=======
|
||||
from endpoints.api.trigger import BuildTrigger
|
||||
>>>>>>> Add ability for triggers to be disabled
|
||||
from endpoints.test.shared import client_with_identity, toggle_feature
|
||||
|
||||
from test.fixtures import *
|
||||
|
|
|
@ -436,6 +436,8 @@ def initialize_database():
|
|||
TagKind.create(name='channel')
|
||||
|
||||
DisableReason.create(name='user_toggled')
|
||||
DisableReason.create(name='successive_build_failures')
|
||||
DisableReason.create(name='successive_build_internal_errors')
|
||||
|
||||
|
||||
def wipe_database():
|
||||
|
|
|
@ -169,16 +169,27 @@
|
|||
</td>
|
||||
</tr>
|
||||
<tr class="trigger-disabled-message" ng-if="!trigger.enabled">
|
||||
<td colspan="5" style="text-align: center">
|
||||
<td colspan="5" style="text-align: center"
|
||||
ng-if="trigger.disabled_reason == 'user_toggled'">
|
||||
<i class="fa fa-exclamation-triangle"></i>
|
||||
This build trigger is currently disabled and will not build:
|
||||
This build trigger is user disabled and will not build:
|
||||
<a ng-click="askToggleTrigger(trigger)">Re-enable this trigger</a>
|
||||
</td>
|
||||
<td colspan="5" style="text-align: center"
|
||||
ng-if="trigger.disabled_reason == 'successive_build_failures'">
|
||||
<i class="fa fa-exclamation-triangle"></i>
|
||||
This build trigger was automatically disabled due to successive failures:
|
||||
<a ng-click="askToggleTrigger(trigger)">Re-enable this trigger</a>
|
||||
</td>
|
||||
<td colspan="5" style="text-align: center"
|
||||
ng-if="trigger.disabled_reason == 'successive_build_internal_errors'">
|
||||
<i class="fa fa-exclamation-triangle"></i>
|
||||
This build trigger was automatically disabled due to successive internal errors:
|
||||
<a ng-click="askToggleTrigger(trigger)">Re-enable this trigger</a>
|
||||
</td>
|
||||
</tr>
|
||||
</tbody>
|
||||
</table>
|
||||
|
||||
|
||||
</div>
|
||||
</div>
|
||||
</div> <!-- /Build Triggers -->
|
||||
|
|
Reference in a new issue