Add monit-based monitoring of build manager
Should catch when the build manager freezes and restart it
This commit is contained in:
parent
a2fbe1d6c0
commit
72fdf93d29
3 changed files with 27 additions and 2 deletions
|
@ -6,7 +6,7 @@ ENV DEBIAN_FRONTEND noninteractive
|
|||
ENV HOME /root
|
||||
|
||||
# Install system packages
|
||||
RUN apt-get update # 07SEP2016
|
||||
RUN apt-get update # 02NOV2016
|
||||
RUN apt-get install -y \
|
||||
g++ \
|
||||
gdebi-core \
|
||||
|
@ -27,6 +27,7 @@ RUN apt-get install -y \
|
|||
libpq5 \
|
||||
libsasl2-dev \
|
||||
libsasl2-modules \
|
||||
monit \
|
||||
nginx \
|
||||
nodejs \
|
||||
npm \
|
||||
|
@ -112,6 +113,9 @@ ADD conf/init/zz_boot.sh /etc/my_init.d/
|
|||
ADD conf/init/service/ /etc/service/
|
||||
RUN rm -rf /etc/service/syslog-forwarder
|
||||
|
||||
ADD conf/monitrc /etc/monit/monitrc
|
||||
RUN chmod 0600 /etc/monit/monitrc
|
||||
|
||||
# remove after phusion/baseimage-docker#338 is fixed
|
||||
ADD conf/init/logrotate.conf /etc/logrotate.conf
|
||||
|
||||
|
|
|
@ -2,7 +2,11 @@
|
|||
|
||||
echo 'Starting internal build manager'
|
||||
|
||||
# Run monit to ensure the build manager is restarted if/when it locks up.
|
||||
monit
|
||||
|
||||
# Run the build manager.
|
||||
cd /
|
||||
TROLLIUSDEBUG=1 venv/bin/python -m buildman.builder 2>&1
|
||||
exec TROLLIUSDEBUG=1 venv/bin/python -m buildman.builder 2>&1
|
||||
|
||||
echo 'Internal build manager exited'
|
17
conf/monitrc
Normal file
17
conf/monitrc
Normal file
|
@ -0,0 +1,17 @@
|
|||
set daemon 10 with start delay 30
|
||||
|
||||
set httpd port 2812 and
|
||||
use address localhost # only accept connection from localhost
|
||||
allow localhost # allow localhost to connect to the server and
|
||||
allow admin:monit # require user 'admin' with password 'monit'
|
||||
|
||||
check host buildmanager with address localhost
|
||||
start program = "/usr/bin/sv start /etc/service/buildmanager" with timeout 10 seconds
|
||||
stop program = "/usr/bin/sv kill /etc/service/buildmanager"
|
||||
if failed port 8787 protocol http
|
||||
request "/"
|
||||
status = 405
|
||||
with timeout 3 seconds
|
||||
for 3 cycles
|
||||
then restart
|
||||
if 10 restarts within 10 cycles then timeout
|
Reference in a new issue