Upgrade to the 0.11.1 tutum version of docker. Package it as a Dockerfile using Docker in Docker. Add a status server option to the workers to utilize the new termination signal and status features of gantry.

This commit is contained in:
Jake Moshenko 2014-05-16 18:31:24 -04:00
parent 8b5c781f84
commit cc47e77156
12 changed files with 328 additions and 99 deletions

View file

@ -1,66 +0,0 @@
FROM phusion/baseimage:0.9.10
ENV DEBIAN_FRONTEND noninteractive
ENV HOME /root
# Install the dependencies.
RUN apt-get update
# New ubuntu packages should be added as their own apt-get install lines below the existing install commands
RUN apt-get install -y git python-virtualenv python-dev libjpeg8 libjpeg62-dev libevent-dev gdebi-core g++ libmagic1
# PhantomJS
RUN apt-get install -y phantomjs
# Grunt
RUN apt-get install -y nodejs npm
RUN ln -s /usr/bin/nodejs /usr/bin/node
RUN npm install -g grunt-cli
ADD binary_dependencies binary_dependencies
RUN gdebi --n binary_dependencies/*.deb
RUN apt-get clean && rm -rf /var/lib/apt/lists/* /tmp/* /var/tmp/*
ADD requirements.txt requirements.txt
RUN virtualenv --distribute venv
RUN venv/bin/pip install -r requirements.txt
ADD auth auth
ADD buildstatus buildstatus
ADD conf conf
ADD data data
ADD endpoints endpoints
ADD features features
ADD grunt grunt
ADD screenshots screenshots
ADD static static
ADD storage storage
ADD templates templates
ADD util util
ADD workers workers
ADD app.py app.py
ADD application.py application.py
ADD config.py config.py
ADD initdb.py initdb.py
ADD conf/init/mklogsdir.sh /etc/my_init.d/
ADD conf/init/gunicorn.sh /etc/service/gunicorn/run
ADD conf/init/nginx.sh /etc/service/nginx/run
ADD conf/init/diffsworker.sh /etc/service/diffsworker/run
ADD conf/init/webhookworker.sh /etc/service/webhookworker/run
RUN cd grunt && npm install
RUN cd grunt && grunt
# Add the tests last because they're prone to accidental changes, then run them
ADD test test
RUN TEST=true venv/bin/python -m unittest discover
RUN rm -rf /conf/stack
VOLUME ["/conf/stack", "/mnt/logs"]
EXPOSE 443 80
CMD ["/sbin/my_init"]

1
Dockerfile Symbolic link
View file

@ -0,0 +1 @@
Dockerfile.web

46
Dockerfile.buildworker Normal file
View file

@ -0,0 +1,46 @@
FROM phusion/baseimage:0.9.10
ENV DEBIAN_FRONTEND noninteractive
ENV HOME /root
RUN apt-get update
RUN apt-get install -y git python-virtualenv python-dev libjpeg8 libjpeg62-dev libevent-dev gdebi-core g++ libmagic1
### End common section ###
RUN apt-get install -y lxc
RUN usermod -v 100000-200000 -w 100000-200000 root
ADD binary_dependencies/builder binary_dependencies/builder
RUN gdebi --n binary_dependencies/builder/*.deb
RUN apt-get clean && rm -rf /var/lib/apt/lists/* /tmp/* /var/tmp/*
ADD requirements.txt requirements.txt
RUN virtualenv --distribute venv
RUN venv/bin/pip install -r requirements.txt
ADD buildstatus buildstatus
ADD data data
ADD features features
ADD storage storage
ADD util util
ADD workers workers
ADD app.py app.py
ADD config.py config.py
# Remove this if we ever stop depending on test data for the default config
ADD test test
ADD conf conf
RUN rm -rf /conf/stack
ADD conf/init/tutumdocker.sh /etc/service/tutumdocker/run
ADD conf/init/dockerfilebuild.sh /etc/service/dockerfilebuild/run
VOLUME ["/var/lib/docker", "/var/lib/lxc", "/conf/stack"]
CMD ["/sbin/my_init"]

66
Dockerfile.web Normal file
View file

@ -0,0 +1,66 @@
FROM phusion/baseimage:0.9.10
ENV DEBIAN_FRONTEND noninteractive
ENV HOME /root
# Install the dependencies.
RUN apt-get update
# New ubuntu packages should be added as their own apt-get install lines below the existing install commands
RUN apt-get install -y git python-virtualenv python-dev libjpeg8 libjpeg62-dev libevent-dev gdebi-core g++ libmagic1
# PhantomJS
RUN apt-get install -y phantomjs
# Grunt
RUN apt-get install -y nodejs npm
RUN ln -s /usr/bin/nodejs /usr/bin/node
RUN npm install -g grunt-cli
ADD binary_dependencies binary_dependencies
RUN gdebi --n binary_dependencies/*.deb
RUN apt-get clean && rm -rf /var/lib/apt/lists/* /tmp/* /var/tmp/*
ADD requirements.txt requirements.txt
RUN virtualenv --distribute venv
RUN venv/bin/pip install -r requirements.txt
ADD auth auth
ADD buildstatus buildstatus
ADD conf conf
ADD data data
ADD endpoints endpoints
ADD features features
ADD grunt grunt
ADD screenshots screenshots
ADD static static
ADD storage storage
ADD templates templates
ADD util util
ADD workers workers
ADD app.py app.py
ADD application.py application.py
ADD config.py config.py
ADD initdb.py initdb.py
ADD conf/init/mklogsdir.sh /etc/my_init.d/
ADD conf/init/gunicorn.sh /etc/service/gunicorn/run
ADD conf/init/nginx.sh /etc/service/nginx/run
ADD conf/init/diffsworker.sh /etc/service/diffsworker/run
ADD conf/init/webhookworker.sh /etc/service/webhookworker/run
RUN cd grunt && npm install
RUN cd grunt && grunt
# Add the tests last because they're prone to accidental changes, then run them
ADD test test
RUN TEST=true venv/bin/python -m unittest discover
RUN rm -rf /conf/stack
VOLUME ["/conf/stack", "/mnt/logs"]
EXPOSE 443 80
CMD ["/sbin/my_init"]

View file

@ -5,6 +5,8 @@ curl -s https://get.docker.io/ubuntu/ | sudo sh
sudo apt-get update && sudo apt-get install -y git sudo apt-get update && sudo apt-get install -y git
git clone git clone https://bitbucket.org/yackob03/quay.git git clone git clone https://bitbucket.org/yackob03/quay.git
cd quay cd quay
rm Dockerfile
ln -s Dockerfile.web Dockerfile
sudo docker build -t quay.io/quay/quay . sudo docker build -t quay.io/quay/quay .
sudo docker push quay.io/quay/quay sudo docker push quay.io/quay/quay
``` ```

6
conf/init/dockerfilebuild.sh Executable file
View file

@ -0,0 +1,6 @@
#! /bin/bash
sv start tutumdocker || exit 1
cd /
venv/bin/python -m workers.dockerfilebuild

97
conf/init/tutumdocker.sh Executable file
View file

@ -0,0 +1,97 @@
#!/bin/bash
# First, make sure that cgroups are mounted correctly.
CGROUP=/sys/fs/cgroup
[ -d $CGROUP ] ||
mkdir $CGROUP
mountpoint -q $CGROUP ||
mount -n -t tmpfs -o uid=0,gid=0,mode=0755 cgroup $CGROUP || {
echo "Could not make a tmpfs mount. Did you use -privileged?"
exit 1
}
if [ -d /sys/kernel/security ] && ! mountpoint -q /sys/kernel/security
then
mount -t securityfs none /sys/kernel/security || {
echo "Could not mount /sys/kernel/security."
echo "AppArmor detection and -privileged mode might break."
}
fi
# Mount the cgroup hierarchies exactly as they are in the parent system.
for SUBSYS in $(cut -d: -f2 /proc/1/cgroup)
do
[ -d $CGROUP/$SUBSYS ] || mkdir $CGROUP/$SUBSYS
mountpoint -q $CGROUP/$SUBSYS ||
mount -n -t cgroup -o $SUBSYS cgroup $CGROUP/$SUBSYS
# The two following sections address a bug which manifests itself
# by a cryptic "lxc-start: no ns_cgroup option specified" when
# trying to start containers withina container.
# The bug seems to appear when the cgroup hierarchies are not
# mounted on the exact same directories in the host, and in the
# container.
# Named, control-less cgroups are mounted with "-o name=foo"
# (and appear as such under /proc/<pid>/cgroup) but are usually
# mounted on a directory named "foo" (without the "name=" prefix).
# Systemd and OpenRC (and possibly others) both create such a
# cgroup. To avoid the aforementioned bug, we symlink "foo" to
# "name=foo". This shouldn't have any adverse effect.
echo $SUBSYS | grep -q ^name= && {
NAME=$(echo $SUBSYS | sed s/^name=//)
ln -s $SUBSYS $CGROUP/$NAME
}
# Likewise, on at least one system, it has been reported that
# systemd would mount the CPU and CPU accounting controllers
# (respectively "cpu" and "cpuacct") with "-o cpuacct,cpu"
# but on a directory called "cpu,cpuacct" (note the inversion
# in the order of the groups). This tries to work around it.
[ $SUBSYS = cpuacct,cpu ] && ln -s $SUBSYS $CGROUP/cpu,cpuacct
done
# Note: as I write those lines, the LXC userland tools cannot setup
# a "sub-container" properly if the "devices" cgroup is not in its
# own hierarchy. Let's detect this and issue a warning.
grep -q :devices: /proc/1/cgroup ||
echo "WARNING: the 'devices' cgroup should be in its own hierarchy."
grep -qw devices /proc/1/cgroup ||
echo "WARNING: it looks like the 'devices' cgroup is not mounted."
# Now, close extraneous file descriptors.
pushd /proc/self/fd >/dev/null
for FD in *
do
case "$FD" in
# Keep stdin/stdout/stderr
[012])
;;
# Nuke everything else
*)
eval exec "$FD>&-"
;;
esac
done
popd >/dev/null
# If a pidfile is still around (for example after a container restart),
# delete it so that docker can start.
rm -rf /var/run/docker.pid
chmod 777 /var/lib/lxc
chmod 777 /var/lib/docker
# If we were given a PORT environment variable, start as a simple daemon;
# otherwise, spawn a shell as well
if [ "$PORT" ]
then
exec docker -d -H 0.0.0.0:$PORT
else
docker -d -D -e lxc
fi

View file

@ -1,30 +1,35 @@
to prepare a new build node host starting from a 14.04 base server: to build and upload the builder to quay
``` ```
sudo apt-get update curl -s https://get.docker.io/ubuntu/ | sudo sh
sudo apt-get install -y git python-virtualenv python-dev phantomjs libjpeg8 libjpeg62-dev libfreetype6 libfreetype6-dev libevent-dev gdebi-core lxc sudo apt-get update && sudo apt-get install -y git
``` git clone git clone https://bitbucket.org/yackob03/quay.git
check out the code, install the kernel, custom docker, nsexec, and reboot:
```
git clone https://bitbucket.org/yackob03/quay.git
cd quay cd quay
sudo gdebi --n binary_dependencies/builder/nsexec_1.22ubuntu1trusty1_amd64.deb rm Dockerfile
sudo gdebi --n binary_dependencies/builder/lxc-docker-0.9.0_0.9.0-20140501212101-72572f0-dirty_amd64.deb ln -s Dockerfile.buildworker Dockerfile
sudo usermod -v 100000-200000 -w 100000-200000 root sudo docker build -t quay.io/quay/builder .
sudo chmod +x /var/lib/lxc sudo docker push quay.io/quay/builder
sudo chmod +x /var/lib/docker ```
cd ~
git clone https://bitbucket.org/yackob03/quayconfig.git to run the code from a fresh 14.04 server:
ln -s ../../quayconfig/production/ quay/conf/stack
```
sudo apt-get update && sudo apt-get install -y git lxc linux-image-extra-`uname -r`
curl -s https://get.docker.io/ubuntu/ | sudo sh
git clone https://github.com/DevTable/gantryd.git
cd gantryd
cat requirements.system | xargs sudo apt-get install -y
virtualenv --distribute venv
venv/bin/pip install -r requirements.txt
sudo docker login -p 9Y1PX7D3IE4KPSGCIALH17EM5V3ZTMP8CNNHJNXAQ2NJGAS48BDH8J1PUOZ869ML -u 'quay+deploy' -e notused quay.io
``` ```
start the worker start the worker
``` ```
cd quay cd ~
virtualenv --distribute venv git clone https://bitbucket.org/yackob03/quayconfig.git
venv/bin/pip install -r requirements.txt sudo docker pull quay.io/quay/builder
sudo venv/bin/python -m workers.dockerfilebuild -D cd ~/gantryd
sudo venv/bin/python gantry.py ../quayconfig/production/gantry.json update builder
``` ```

View file

@ -573,4 +573,4 @@ else:
handler = logging.StreamHandler() handler = logging.StreamHandler()
handler.setFormatter(formatter) handler.setFormatter(formatter)
root_logger.addHandler(handler) root_logger.addHandler(handler)
worker.start() worker.start(start_status_server_port=8000)

View file

@ -1,11 +1,16 @@
import logging import logging
import json import json
import signal import signal
import sys
from threading import Event from threading import Event
from apscheduler.scheduler import Scheduler from apscheduler.scheduler import Scheduler
from datetime import datetime, timedelta from datetime import datetime, timedelta
from BaseHTTPServer import HTTPServer, BaseHTTPRequestHandler
from threading import Thread
from time import sleep
from data.model import db
logger = logging.getLogger(__name__) logger = logging.getLogger(__name__)
@ -23,6 +28,36 @@ class WorkerUnhealthyException(Exception):
pass pass
class WorkerStatusServer(HTTPServer):
def __init__(self, worker, *args, **kwargs):
HTTPServer.__init__(self, *args, **kwargs)
self.worker = worker
class WorkerStatusHandler(BaseHTTPRequestHandler):
def do_GET(self):
if self.path == '/status':
# Return the worker status
code = 200 if self.server.worker.is_healthy() else 503
self.send_response(code)
elif self.path == '/terminate':
# Return whether it is safe to terminate the worker process
code = 200 if self.server.worker.is_terminated() else 503
self.send_response(code)
else:
self.send_error(404)
def do_POST(self):
if self.path == '/terminate':
try:
self.server.worker.join()
self.send_response(200)
except:
self.send_response(500)
else:
self.send_error(404)
class Worker(object): class Worker(object):
def __init__(self, queue, poll_period_seconds=30, reservation_seconds=300, def __init__(self, queue, poll_period_seconds=30, reservation_seconds=300,
watchdog_period_seconds=60): watchdog_period_seconds=60):
@ -31,6 +66,7 @@ class Worker(object):
self._reservation_seconds = reservation_seconds self._reservation_seconds = reservation_seconds
self._watchdog_period_seconds = watchdog_period_seconds self._watchdog_period_seconds = watchdog_period_seconds
self._stop = Event() self._stop = Event()
self._terminated = Event()
self._queue = queue self._queue = queue
self.current_queue_item = None self.current_queue_item = None
@ -42,6 +78,17 @@ class Worker(object):
""" Function that gets run once every watchdog_period_seconds. """ """ Function that gets run once every watchdog_period_seconds. """
pass pass
def _close_db_handle(self):
if not db.is_closed():
logger.debug('Disconnecting from database.')
db.close()
def is_healthy(self):
return not self._stop.is_set()
def is_terminated(self):
return self._terminated.is_set()
def extend_processing(self, seconds_from_now): def extend_processing(self, seconds_from_now):
if self.current_queue_item is not None: if self.current_queue_item is not None:
self._queue.extend_processing(self.current_queue_item, seconds_from_now) self._queue.extend_processing(self.current_queue_item, seconds_from_now)
@ -51,7 +98,7 @@ class Worker(object):
self.current_queue_item = self._queue.get() self.current_queue_item = self._queue.get()
while self.current_queue_item: while self.current_queue_item:
logger.debug('Queue gave us some work: %s' % self.current_queue_item.body) logger.debug('Queue gave us some work: %s', self.current_queue_item.body)
job_details = json.loads(self.current_queue_item.body) job_details = json.loads(self.current_queue_item.body)
@ -68,13 +115,24 @@ class Worker(object):
finally: finally:
self.current_queue_item = None self.current_queue_item = None
# Close the db handle periodically
self._close_db_handle()
if not self._stop.is_set(): if not self._stop.is_set():
self.current_queue_item = self._queue.get(processing_time=self._reservation_seconds) self.current_queue_item = self._queue.get(processing_time=self._reservation_seconds)
if not self._stop.is_set(): if not self._stop.is_set():
logger.debug('No more work.') logger.debug('No more work.')
def start(self): def start(self, start_status_server_port=None):
if start_status_server_port is not None:
# Start a status server on a thread
server_address = ('', start_status_server_port)
httpd = WorkerStatusServer(self, server_address, WorkerStatusHandler)
server_thread = Thread(target=httpd.serve_forever)
server_thread.daemon = True
server_thread.start()
logger.debug("Scheduling worker.") logger.debug("Scheduling worker.")
soon = datetime.now() + timedelta(seconds=.001) soon = datetime.now() + timedelta(seconds=.001)
@ -84,8 +142,8 @@ class Worker(object):
start_date=soon) start_date=soon)
self._sched.add_interval_job(self.watchdog, seconds=self._watchdog_period_seconds) self._sched.add_interval_job(self.watchdog, seconds=self._watchdog_period_seconds)
signal.signal(signal.SIGTERM, self.join) signal.signal(signal.SIGTERM, self.terminate)
signal.signal(signal.SIGINT, self.join) signal.signal(signal.SIGINT, self.terminate)
while not self._stop.wait(1): while not self._stop.wait(1):
pass pass
@ -94,11 +152,25 @@ class Worker(object):
self._sched.shutdown() self._sched.shutdown()
logger.debug('Finished.') logger.debug('Finished.')
def join(self, signal_num=None, stack_frame=None): self._terminated.set()
logger.debug('Shutting down worker gracefully.')
self._stop.set()
# Give back the retry that we took for this queue item so that if it were down to zero # Wait forever if we're running a server
# retries it will still be picked up by another worker while start_status_server_port is not None:
if self.current_queue_item is not None: sleep(60)
self._queue.incomplete(self.current_queue_item, restore_retry=True)
def terminate(self, signal_num=None, stack_frame=None, graceful=False):
if self._terminated.is_set():
sys.exit(1)
else:
logger.debug('Shutting down worker.')
self._stop.set()
if not graceful:
# Give back the retry that we took for this queue item so that if it were down to zero
# retries it will still be picked up by another worker
if self.current_queue_item is not None:
self._queue.incomplete(self.current_queue_item, restore_retry=True)
def join(self):
self.terminate(graceful=True)