Merge pull request #2073 from coreos-inc/monit-buildman

Add monit-based monitoring of build manager
This commit is contained in:
josephschorr 2016-11-02 14:16:03 -04:00 committed by GitHub
commit 3c595efbd2
3 changed files with 27 additions and 2 deletions

View file

@ -6,7 +6,7 @@ ENV DEBIAN_FRONTEND noninteractive
ENV HOME /root ENV HOME /root
# Install system packages # Install system packages
RUN apt-get update # 07SEP2016 RUN apt-get update # 02NOV2016
RUN apt-get install -y \ RUN apt-get install -y \
g++ \ g++ \
gdebi-core \ gdebi-core \
@ -27,6 +27,7 @@ RUN apt-get install -y \
libpq5 \ libpq5 \
libsasl2-dev \ libsasl2-dev \
libsasl2-modules \ libsasl2-modules \
monit \
nginx \ nginx \
nodejs \ nodejs \
npm \ npm \
@ -112,6 +113,9 @@ ADD conf/init/zz_boot.sh /etc/my_init.d/
ADD conf/init/service/ /etc/service/ ADD conf/init/service/ /etc/service/
RUN rm -rf /etc/service/syslog-forwarder RUN rm -rf /etc/service/syslog-forwarder
ADD conf/monitrc /etc/monit/monitrc
RUN chmod 0600 /etc/monit/monitrc
# remove after phusion/baseimage-docker#338 is fixed # remove after phusion/baseimage-docker#338 is fixed
ADD conf/init/logrotate.conf /etc/logrotate.conf ADD conf/init/logrotate.conf /etc/logrotate.conf

View file

@ -2,7 +2,11 @@
echo 'Starting internal build manager' echo 'Starting internal build manager'
# Run monit to ensure the build manager is restarted if/when it locks up.
monit
# Run the build manager.
cd / cd /
TROLLIUSDEBUG=1 venv/bin/python -m buildman.builder 2>&1 exec TROLLIUSDEBUG=1 venv/bin/python -m buildman.builder 2>&1
echo 'Internal build manager exited' echo 'Internal build manager exited'

17
conf/monitrc Normal file
View file

@ -0,0 +1,17 @@
set daemon 10 with start delay 30
set httpd port 2812 and
use address localhost # only accept connection from localhost
allow localhost # allow localhost to connect to the server and
allow admin:monit # require user 'admin' with password 'monit'
check host buildmanager with address localhost
start program = "/usr/bin/sv start /etc/service/buildmanager" with timeout 10 seconds
stop program = "/usr/bin/sv kill /etc/service/buildmanager"
if failed port 8787 protocol http
request "/"
status = 405
with timeout 3 seconds
for 3 cycles
then restart
if 10 restarts within 10 cycles then timeout