Add monit-based monitoring of build manager

Should catch when the build manager freezes and restart it
This commit is contained in:
Joseph Schorr 2016-11-02 14:06:07 -04:00
parent a2fbe1d6c0
commit 72fdf93d29
3 changed files with 27 additions and 2 deletions

View file

@ -6,7 +6,7 @@ ENV DEBIAN_FRONTEND noninteractive
ENV HOME /root
# Install system packages
RUN apt-get update # 07SEP2016
RUN apt-get update # 02NOV2016
RUN apt-get install -y \
g++ \
gdebi-core \
@ -27,6 +27,7 @@ RUN apt-get install -y \
libpq5 \
libsasl2-dev \
libsasl2-modules \
monit \
nginx \
nodejs \
npm \
@ -112,6 +113,9 @@ ADD conf/init/zz_boot.sh /etc/my_init.d/
ADD conf/init/service/ /etc/service/
RUN rm -rf /etc/service/syslog-forwarder
ADD conf/monitrc /etc/monit/monitrc
RUN chmod 0600 /etc/monit/monitrc
# remove after phusion/baseimage-docker#338 is fixed
ADD conf/init/logrotate.conf /etc/logrotate.conf

View file

@ -2,7 +2,11 @@
echo 'Starting internal build manager'
# Run monit to ensure the build manager is restarted if/when it locks up.
monit
# Run the build manager.
cd /
TROLLIUSDEBUG=1 venv/bin/python -m buildman.builder 2>&1
exec TROLLIUSDEBUG=1 venv/bin/python -m buildman.builder 2>&1
echo 'Internal build manager exited'

17
conf/monitrc Normal file
View file

@ -0,0 +1,17 @@
set daemon 10 with start delay 30
set httpd port 2812 and
use address localhost # only accept connection from localhost
allow localhost # allow localhost to connect to the server and
allow admin:monit # require user 'admin' with password 'monit'
check host buildmanager with address localhost
start program = "/usr/bin/sv start /etc/service/buildmanager" with timeout 10 seconds
stop program = "/usr/bin/sv kill /etc/service/buildmanager"
if failed port 8787 protocol http
request "/"
status = 405
with timeout 3 seconds
for 3 cycles
then restart
if 10 restarts within 10 cycles then timeout