Upgrade to the 0.11.1 tutum version of docker. Package it as a Dockerfile using Docker in Docker. Add a status server option to the workers to utilize the new termination signal and status features of gantry.
This commit is contained in:
parent
8b5c781f84
commit
cc47e77156
12 changed files with 328 additions and 99 deletions
66
Dockerfile
66
Dockerfile
|
@ -1,66 +0,0 @@
|
|||
FROM phusion/baseimage:0.9.10
|
||||
|
||||
ENV DEBIAN_FRONTEND noninteractive
|
||||
ENV HOME /root
|
||||
|
||||
# Install the dependencies.
|
||||
RUN apt-get update
|
||||
|
||||
# New ubuntu packages should be added as their own apt-get install lines below the existing install commands
|
||||
RUN apt-get install -y git python-virtualenv python-dev libjpeg8 libjpeg62-dev libevent-dev gdebi-core g++ libmagic1
|
||||
|
||||
# PhantomJS
|
||||
RUN apt-get install -y phantomjs
|
||||
|
||||
# Grunt
|
||||
RUN apt-get install -y nodejs npm
|
||||
RUN ln -s /usr/bin/nodejs /usr/bin/node
|
||||
RUN npm install -g grunt-cli
|
||||
|
||||
ADD binary_dependencies binary_dependencies
|
||||
RUN gdebi --n binary_dependencies/*.deb
|
||||
|
||||
RUN apt-get clean && rm -rf /var/lib/apt/lists/* /tmp/* /var/tmp/*
|
||||
|
||||
ADD requirements.txt requirements.txt
|
||||
RUN virtualenv --distribute venv
|
||||
RUN venv/bin/pip install -r requirements.txt
|
||||
|
||||
ADD auth auth
|
||||
ADD buildstatus buildstatus
|
||||
ADD conf conf
|
||||
ADD data data
|
||||
ADD endpoints endpoints
|
||||
ADD features features
|
||||
ADD grunt grunt
|
||||
ADD screenshots screenshots
|
||||
ADD static static
|
||||
ADD storage storage
|
||||
ADD templates templates
|
||||
ADD util util
|
||||
ADD workers workers
|
||||
|
||||
ADD app.py app.py
|
||||
ADD application.py application.py
|
||||
ADD config.py config.py
|
||||
ADD initdb.py initdb.py
|
||||
|
||||
ADD conf/init/mklogsdir.sh /etc/my_init.d/
|
||||
ADD conf/init/gunicorn.sh /etc/service/gunicorn/run
|
||||
ADD conf/init/nginx.sh /etc/service/nginx/run
|
||||
ADD conf/init/diffsworker.sh /etc/service/diffsworker/run
|
||||
ADD conf/init/webhookworker.sh /etc/service/webhookworker/run
|
||||
|
||||
RUN cd grunt && npm install
|
||||
RUN cd grunt && grunt
|
||||
|
||||
# Add the tests last because they're prone to accidental changes, then run them
|
||||
ADD test test
|
||||
RUN TEST=true venv/bin/python -m unittest discover
|
||||
|
||||
RUN rm -rf /conf/stack
|
||||
VOLUME ["/conf/stack", "/mnt/logs"]
|
||||
|
||||
EXPOSE 443 80
|
||||
|
||||
CMD ["/sbin/my_init"]
|
1
Dockerfile
Symbolic link
1
Dockerfile
Symbolic link
|
@ -0,0 +1 @@
|
|||
Dockerfile.web
|
46
Dockerfile.buildworker
Normal file
46
Dockerfile.buildworker
Normal file
|
@ -0,0 +1,46 @@
|
|||
FROM phusion/baseimage:0.9.10
|
||||
|
||||
ENV DEBIAN_FRONTEND noninteractive
|
||||
ENV HOME /root
|
||||
|
||||
RUN apt-get update
|
||||
RUN apt-get install -y git python-virtualenv python-dev libjpeg8 libjpeg62-dev libevent-dev gdebi-core g++ libmagic1
|
||||
|
||||
### End common section ###
|
||||
|
||||
RUN apt-get install -y lxc
|
||||
|
||||
RUN usermod -v 100000-200000 -w 100000-200000 root
|
||||
|
||||
ADD binary_dependencies/builder binary_dependencies/builder
|
||||
|
||||
RUN gdebi --n binary_dependencies/builder/*.deb
|
||||
|
||||
RUN apt-get clean && rm -rf /var/lib/apt/lists/* /tmp/* /var/tmp/*
|
||||
|
||||
ADD requirements.txt requirements.txt
|
||||
RUN virtualenv --distribute venv
|
||||
RUN venv/bin/pip install -r requirements.txt
|
||||
|
||||
ADD buildstatus buildstatus
|
||||
ADD data data
|
||||
ADD features features
|
||||
ADD storage storage
|
||||
ADD util util
|
||||
ADD workers workers
|
||||
|
||||
ADD app.py app.py
|
||||
ADD config.py config.py
|
||||
|
||||
# Remove this if we ever stop depending on test data for the default config
|
||||
ADD test test
|
||||
|
||||
ADD conf conf
|
||||
RUN rm -rf /conf/stack
|
||||
|
||||
ADD conf/init/tutumdocker.sh /etc/service/tutumdocker/run
|
||||
ADD conf/init/dockerfilebuild.sh /etc/service/dockerfilebuild/run
|
||||
|
||||
VOLUME ["/var/lib/docker", "/var/lib/lxc", "/conf/stack"]
|
||||
|
||||
CMD ["/sbin/my_init"]
|
66
Dockerfile.web
Normal file
66
Dockerfile.web
Normal file
|
@ -0,0 +1,66 @@
|
|||
FROM phusion/baseimage:0.9.10
|
||||
|
||||
ENV DEBIAN_FRONTEND noninteractive
|
||||
ENV HOME /root
|
||||
|
||||
# Install the dependencies.
|
||||
RUN apt-get update
|
||||
|
||||
# New ubuntu packages should be added as their own apt-get install lines below the existing install commands
|
||||
RUN apt-get install -y git python-virtualenv python-dev libjpeg8 libjpeg62-dev libevent-dev gdebi-core g++ libmagic1
|
||||
|
||||
# PhantomJS
|
||||
RUN apt-get install -y phantomjs
|
||||
|
||||
# Grunt
|
||||
RUN apt-get install -y nodejs npm
|
||||
RUN ln -s /usr/bin/nodejs /usr/bin/node
|
||||
RUN npm install -g grunt-cli
|
||||
|
||||
ADD binary_dependencies binary_dependencies
|
||||
RUN gdebi --n binary_dependencies/*.deb
|
||||
|
||||
RUN apt-get clean && rm -rf /var/lib/apt/lists/* /tmp/* /var/tmp/*
|
||||
|
||||
ADD requirements.txt requirements.txt
|
||||
RUN virtualenv --distribute venv
|
||||
RUN venv/bin/pip install -r requirements.txt
|
||||
|
||||
ADD auth auth
|
||||
ADD buildstatus buildstatus
|
||||
ADD conf conf
|
||||
ADD data data
|
||||
ADD endpoints endpoints
|
||||
ADD features features
|
||||
ADD grunt grunt
|
||||
ADD screenshots screenshots
|
||||
ADD static static
|
||||
ADD storage storage
|
||||
ADD templates templates
|
||||
ADD util util
|
||||
ADD workers workers
|
||||
|
||||
ADD app.py app.py
|
||||
ADD application.py application.py
|
||||
ADD config.py config.py
|
||||
ADD initdb.py initdb.py
|
||||
|
||||
ADD conf/init/mklogsdir.sh /etc/my_init.d/
|
||||
ADD conf/init/gunicorn.sh /etc/service/gunicorn/run
|
||||
ADD conf/init/nginx.sh /etc/service/nginx/run
|
||||
ADD conf/init/diffsworker.sh /etc/service/diffsworker/run
|
||||
ADD conf/init/webhookworker.sh /etc/service/webhookworker/run
|
||||
|
||||
RUN cd grunt && npm install
|
||||
RUN cd grunt && grunt
|
||||
|
||||
# Add the tests last because they're prone to accidental changes, then run them
|
||||
ADD test test
|
||||
RUN TEST=true venv/bin/python -m unittest discover
|
||||
|
||||
RUN rm -rf /conf/stack
|
||||
VOLUME ["/conf/stack", "/mnt/logs"]
|
||||
|
||||
EXPOSE 443 80
|
||||
|
||||
CMD ["/sbin/my_init"]
|
|
@ -5,6 +5,8 @@ curl -s https://get.docker.io/ubuntu/ | sudo sh
|
|||
sudo apt-get update && sudo apt-get install -y git
|
||||
git clone git clone https://bitbucket.org/yackob03/quay.git
|
||||
cd quay
|
||||
rm Dockerfile
|
||||
ln -s Dockerfile.web Dockerfile
|
||||
sudo docker build -t quay.io/quay/quay .
|
||||
sudo docker push quay.io/quay/quay
|
||||
```
|
||||
|
|
Binary file not shown.
Binary file not shown.
Binary file not shown.
6
conf/init/dockerfilebuild.sh
Executable file
6
conf/init/dockerfilebuild.sh
Executable file
|
@ -0,0 +1,6 @@
|
|||
#! /bin/bash
|
||||
|
||||
sv start tutumdocker || exit 1
|
||||
|
||||
cd /
|
||||
venv/bin/python -m workers.dockerfilebuild
|
97
conf/init/tutumdocker.sh
Executable file
97
conf/init/tutumdocker.sh
Executable file
|
@ -0,0 +1,97 @@
|
|||
#!/bin/bash
|
||||
|
||||
# First, make sure that cgroups are mounted correctly.
|
||||
CGROUP=/sys/fs/cgroup
|
||||
|
||||
[ -d $CGROUP ] ||
|
||||
mkdir $CGROUP
|
||||
|
||||
mountpoint -q $CGROUP ||
|
||||
mount -n -t tmpfs -o uid=0,gid=0,mode=0755 cgroup $CGROUP || {
|
||||
echo "Could not make a tmpfs mount. Did you use -privileged?"
|
||||
exit 1
|
||||
}
|
||||
|
||||
if [ -d /sys/kernel/security ] && ! mountpoint -q /sys/kernel/security
|
||||
then
|
||||
mount -t securityfs none /sys/kernel/security || {
|
||||
echo "Could not mount /sys/kernel/security."
|
||||
echo "AppArmor detection and -privileged mode might break."
|
||||
}
|
||||
fi
|
||||
|
||||
# Mount the cgroup hierarchies exactly as they are in the parent system.
|
||||
for SUBSYS in $(cut -d: -f2 /proc/1/cgroup)
|
||||
do
|
||||
[ -d $CGROUP/$SUBSYS ] || mkdir $CGROUP/$SUBSYS
|
||||
mountpoint -q $CGROUP/$SUBSYS ||
|
||||
mount -n -t cgroup -o $SUBSYS cgroup $CGROUP/$SUBSYS
|
||||
|
||||
# The two following sections address a bug which manifests itself
|
||||
# by a cryptic "lxc-start: no ns_cgroup option specified" when
|
||||
# trying to start containers withina container.
|
||||
# The bug seems to appear when the cgroup hierarchies are not
|
||||
# mounted on the exact same directories in the host, and in the
|
||||
# container.
|
||||
|
||||
# Named, control-less cgroups are mounted with "-o name=foo"
|
||||
# (and appear as such under /proc/<pid>/cgroup) but are usually
|
||||
# mounted on a directory named "foo" (without the "name=" prefix).
|
||||
# Systemd and OpenRC (and possibly others) both create such a
|
||||
# cgroup. To avoid the aforementioned bug, we symlink "foo" to
|
||||
# "name=foo". This shouldn't have any adverse effect.
|
||||
echo $SUBSYS | grep -q ^name= && {
|
||||
NAME=$(echo $SUBSYS | sed s/^name=//)
|
||||
ln -s $SUBSYS $CGROUP/$NAME
|
||||
}
|
||||
|
||||
# Likewise, on at least one system, it has been reported that
|
||||
# systemd would mount the CPU and CPU accounting controllers
|
||||
# (respectively "cpu" and "cpuacct") with "-o cpuacct,cpu"
|
||||
# but on a directory called "cpu,cpuacct" (note the inversion
|
||||
# in the order of the groups). This tries to work around it.
|
||||
[ $SUBSYS = cpuacct,cpu ] && ln -s $SUBSYS $CGROUP/cpu,cpuacct
|
||||
done
|
||||
|
||||
# Note: as I write those lines, the LXC userland tools cannot setup
|
||||
# a "sub-container" properly if the "devices" cgroup is not in its
|
||||
# own hierarchy. Let's detect this and issue a warning.
|
||||
grep -q :devices: /proc/1/cgroup ||
|
||||
echo "WARNING: the 'devices' cgroup should be in its own hierarchy."
|
||||
grep -qw devices /proc/1/cgroup ||
|
||||
echo "WARNING: it looks like the 'devices' cgroup is not mounted."
|
||||
|
||||
# Now, close extraneous file descriptors.
|
||||
pushd /proc/self/fd >/dev/null
|
||||
for FD in *
|
||||
do
|
||||
case "$FD" in
|
||||
# Keep stdin/stdout/stderr
|
||||
[012])
|
||||
;;
|
||||
# Nuke everything else
|
||||
*)
|
||||
eval exec "$FD>&-"
|
||||
;;
|
||||
esac
|
||||
done
|
||||
popd >/dev/null
|
||||
|
||||
|
||||
# If a pidfile is still around (for example after a container restart),
|
||||
# delete it so that docker can start.
|
||||
rm -rf /var/run/docker.pid
|
||||
|
||||
chmod 777 /var/lib/lxc
|
||||
chmod 777 /var/lib/docker
|
||||
|
||||
|
||||
# If we were given a PORT environment variable, start as a simple daemon;
|
||||
# otherwise, spawn a shell as well
|
||||
if [ "$PORT" ]
|
||||
then
|
||||
exec docker -d -H 0.0.0.0:$PORT
|
||||
else
|
||||
|
||||
docker -d -D -e lxc
|
||||
fi
|
|
@ -1,30 +1,35 @@
|
|||
to prepare a new build node host starting from a 14.04 base server:
|
||||
to build and upload the builder to quay
|
||||
|
||||
```
|
||||
sudo apt-get update
|
||||
sudo apt-get install -y git python-virtualenv python-dev phantomjs libjpeg8 libjpeg62-dev libfreetype6 libfreetype6-dev libevent-dev gdebi-core lxc
|
||||
```
|
||||
|
||||
check out the code, install the kernel, custom docker, nsexec, and reboot:
|
||||
|
||||
```
|
||||
git clone https://bitbucket.org/yackob03/quay.git
|
||||
curl -s https://get.docker.io/ubuntu/ | sudo sh
|
||||
sudo apt-get update && sudo apt-get install -y git
|
||||
git clone git clone https://bitbucket.org/yackob03/quay.git
|
||||
cd quay
|
||||
sudo gdebi --n binary_dependencies/builder/nsexec_1.22ubuntu1trusty1_amd64.deb
|
||||
sudo gdebi --n binary_dependencies/builder/lxc-docker-0.9.0_0.9.0-20140501212101-72572f0-dirty_amd64.deb
|
||||
sudo usermod -v 100000-200000 -w 100000-200000 root
|
||||
sudo chmod +x /var/lib/lxc
|
||||
sudo chmod +x /var/lib/docker
|
||||
cd ~
|
||||
git clone https://bitbucket.org/yackob03/quayconfig.git
|
||||
ln -s ../../quayconfig/production/ quay/conf/stack
|
||||
rm Dockerfile
|
||||
ln -s Dockerfile.buildworker Dockerfile
|
||||
sudo docker build -t quay.io/quay/builder .
|
||||
sudo docker push quay.io/quay/builder
|
||||
```
|
||||
|
||||
to run the code from a fresh 14.04 server:
|
||||
|
||||
```
|
||||
sudo apt-get update && sudo apt-get install -y git lxc linux-image-extra-`uname -r`
|
||||
curl -s https://get.docker.io/ubuntu/ | sudo sh
|
||||
git clone https://github.com/DevTable/gantryd.git
|
||||
cd gantryd
|
||||
cat requirements.system | xargs sudo apt-get install -y
|
||||
virtualenv --distribute venv
|
||||
venv/bin/pip install -r requirements.txt
|
||||
sudo docker login -p 9Y1PX7D3IE4KPSGCIALH17EM5V3ZTMP8CNNHJNXAQ2NJGAS48BDH8J1PUOZ869ML -u 'quay+deploy' -e notused quay.io
|
||||
```
|
||||
|
||||
start the worker
|
||||
|
||||
```
|
||||
cd quay
|
||||
virtualenv --distribute venv
|
||||
venv/bin/pip install -r requirements.txt
|
||||
sudo venv/bin/python -m workers.dockerfilebuild -D
|
||||
cd ~
|
||||
git clone https://bitbucket.org/yackob03/quayconfig.git
|
||||
sudo docker pull quay.io/quay/builder
|
||||
cd ~/gantryd
|
||||
sudo venv/bin/python gantry.py ../quayconfig/production/gantry.json update builder
|
||||
```
|
||||
|
|
|
@ -573,4 +573,4 @@ else:
|
|||
handler = logging.StreamHandler()
|
||||
handler.setFormatter(formatter)
|
||||
root_logger.addHandler(handler)
|
||||
worker.start()
|
||||
worker.start(start_status_server_port=8000)
|
||||
|
|
|
@ -1,11 +1,16 @@
|
|||
import logging
|
||||
import json
|
||||
import signal
|
||||
import sys
|
||||
|
||||
from threading import Event
|
||||
from apscheduler.scheduler import Scheduler
|
||||
from datetime import datetime, timedelta
|
||||
from BaseHTTPServer import HTTPServer, BaseHTTPRequestHandler
|
||||
from threading import Thread
|
||||
from time import sleep
|
||||
|
||||
from data.model import db
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
@ -23,6 +28,36 @@ class WorkerUnhealthyException(Exception):
|
|||
pass
|
||||
|
||||
|
||||
class WorkerStatusServer(HTTPServer):
|
||||
def __init__(self, worker, *args, **kwargs):
|
||||
HTTPServer.__init__(self, *args, **kwargs)
|
||||
self.worker = worker
|
||||
|
||||
|
||||
class WorkerStatusHandler(BaseHTTPRequestHandler):
|
||||
def do_GET(self):
|
||||
if self.path == '/status':
|
||||
# Return the worker status
|
||||
code = 200 if self.server.worker.is_healthy() else 503
|
||||
self.send_response(code)
|
||||
elif self.path == '/terminate':
|
||||
# Return whether it is safe to terminate the worker process
|
||||
code = 200 if self.server.worker.is_terminated() else 503
|
||||
self.send_response(code)
|
||||
else:
|
||||
self.send_error(404)
|
||||
|
||||
def do_POST(self):
|
||||
if self.path == '/terminate':
|
||||
try:
|
||||
self.server.worker.join()
|
||||
self.send_response(200)
|
||||
except:
|
||||
self.send_response(500)
|
||||
else:
|
||||
self.send_error(404)
|
||||
|
||||
|
||||
class Worker(object):
|
||||
def __init__(self, queue, poll_period_seconds=30, reservation_seconds=300,
|
||||
watchdog_period_seconds=60):
|
||||
|
@ -31,6 +66,7 @@ class Worker(object):
|
|||
self._reservation_seconds = reservation_seconds
|
||||
self._watchdog_period_seconds = watchdog_period_seconds
|
||||
self._stop = Event()
|
||||
self._terminated = Event()
|
||||
self._queue = queue
|
||||
self.current_queue_item = None
|
||||
|
||||
|
@ -42,6 +78,17 @@ class Worker(object):
|
|||
""" Function that gets run once every watchdog_period_seconds. """
|
||||
pass
|
||||
|
||||
def _close_db_handle(self):
|
||||
if not db.is_closed():
|
||||
logger.debug('Disconnecting from database.')
|
||||
db.close()
|
||||
|
||||
def is_healthy(self):
|
||||
return not self._stop.is_set()
|
||||
|
||||
def is_terminated(self):
|
||||
return self._terminated.is_set()
|
||||
|
||||
def extend_processing(self, seconds_from_now):
|
||||
if self.current_queue_item is not None:
|
||||
self._queue.extend_processing(self.current_queue_item, seconds_from_now)
|
||||
|
@ -51,7 +98,7 @@ class Worker(object):
|
|||
|
||||
self.current_queue_item = self._queue.get()
|
||||
while self.current_queue_item:
|
||||
logger.debug('Queue gave us some work: %s' % self.current_queue_item.body)
|
||||
logger.debug('Queue gave us some work: %s', self.current_queue_item.body)
|
||||
|
||||
job_details = json.loads(self.current_queue_item.body)
|
||||
|
||||
|
@ -68,13 +115,24 @@ class Worker(object):
|
|||
finally:
|
||||
self.current_queue_item = None
|
||||
|
||||
# Close the db handle periodically
|
||||
self._close_db_handle()
|
||||
|
||||
if not self._stop.is_set():
|
||||
self.current_queue_item = self._queue.get(processing_time=self._reservation_seconds)
|
||||
|
||||
if not self._stop.is_set():
|
||||
logger.debug('No more work.')
|
||||
|
||||
def start(self):
|
||||
def start(self, start_status_server_port=None):
|
||||
if start_status_server_port is not None:
|
||||
# Start a status server on a thread
|
||||
server_address = ('', start_status_server_port)
|
||||
httpd = WorkerStatusServer(self, server_address, WorkerStatusHandler)
|
||||
server_thread = Thread(target=httpd.serve_forever)
|
||||
server_thread.daemon = True
|
||||
server_thread.start()
|
||||
|
||||
logger.debug("Scheduling worker.")
|
||||
|
||||
soon = datetime.now() + timedelta(seconds=.001)
|
||||
|
@ -84,8 +142,8 @@ class Worker(object):
|
|||
start_date=soon)
|
||||
self._sched.add_interval_job(self.watchdog, seconds=self._watchdog_period_seconds)
|
||||
|
||||
signal.signal(signal.SIGTERM, self.join)
|
||||
signal.signal(signal.SIGINT, self.join)
|
||||
signal.signal(signal.SIGTERM, self.terminate)
|
||||
signal.signal(signal.SIGINT, self.terminate)
|
||||
|
||||
while not self._stop.wait(1):
|
||||
pass
|
||||
|
@ -94,11 +152,25 @@ class Worker(object):
|
|||
self._sched.shutdown()
|
||||
logger.debug('Finished.')
|
||||
|
||||
def join(self, signal_num=None, stack_frame=None):
|
||||
logger.debug('Shutting down worker gracefully.')
|
||||
self._stop.set()
|
||||
self._terminated.set()
|
||||
|
||||
# Give back the retry that we took for this queue item so that if it were down to zero
|
||||
# retries it will still be picked up by another worker
|
||||
if self.current_queue_item is not None:
|
||||
self._queue.incomplete(self.current_queue_item, restore_retry=True)
|
||||
# Wait forever if we're running a server
|
||||
while start_status_server_port is not None:
|
||||
sleep(60)
|
||||
|
||||
def terminate(self, signal_num=None, stack_frame=None, graceful=False):
|
||||
if self._terminated.is_set():
|
||||
sys.exit(1)
|
||||
|
||||
else:
|
||||
logger.debug('Shutting down worker.')
|
||||
self._stop.set()
|
||||
|
||||
if not graceful:
|
||||
# Give back the retry that we took for this queue item so that if it were down to zero
|
||||
# retries it will still be picked up by another worker
|
||||
if self.current_queue_item is not None:
|
||||
self._queue.incomplete(self.current_queue_item, restore_retry=True)
|
||||
|
||||
def join(self):
|
||||
self.terminate(graceful=True)
|
||||
|
|
Reference in a new issue