Kubernetes build worker
This commit is contained in:
parent
3044f8ecbd
commit
bc13333f20
7 changed files with 255 additions and 34 deletions
|
@ -6,7 +6,10 @@ import boto.ec2
|
|||
import requests
|
||||
import cachetools
|
||||
import trollius
|
||||
|
||||
import json
|
||||
import datetime
|
||||
import release
|
||||
import socket
|
||||
|
||||
from jinja2 import FileSystemLoader, Environment
|
||||
from trollius import coroutine, From, Return, get_event_loop
|
||||
|
@ -14,7 +17,7 @@ from functools import partial
|
|||
|
||||
from buildman.asyncutil import AsyncWrapper
|
||||
from container_cloud_config import CloudConfigContext
|
||||
from app import metric_queue
|
||||
from app import metric_queue, app
|
||||
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
@ -37,12 +40,15 @@ class ExecutorException(Exception):
|
|||
|
||||
class BuilderExecutor(object):
|
||||
def __init__(self, executor_config, manager_hostname):
|
||||
""" Interface which can be plugged into the EphemeralNodeManager to provide a strategy for
|
||||
starting and stopping builders.
|
||||
"""
|
||||
self.executor_config = executor_config
|
||||
self.manager_hostname = manager_hostname
|
||||
|
||||
""" Interface which can be plugged into the EphemeralNodeManager to provide a strategy for
|
||||
starting and stopping builders.
|
||||
"""
|
||||
default_websocket_scheme = 'wss' if app.config['PREFERRED_URL_SCHEME'] == 'https' else 'ws'
|
||||
self.websocket_scheme = executor_config.get("WEBSOCKET_SCHEME", default_websocket_scheme)
|
||||
|
||||
@coroutine
|
||||
def start_builder(self, realm, token, build_uuid):
|
||||
""" Create a builder with the specified config. Returns a unique id which can be used to manage
|
||||
|
@ -73,6 +79,7 @@ class BuilderExecutor(object):
|
|||
quay_username=quay_username,
|
||||
quay_password=quay_password,
|
||||
manager_hostname=manager_hostname,
|
||||
websocket_scheme=self.websocket_scheme,
|
||||
coreos_channel=coreos_channel,
|
||||
worker_tag=self.executor_config['WORKER_TAG'],
|
||||
logentries_token=self.executor_config.get('LOGENTRIES_TOKEN', None),
|
||||
|
@ -216,10 +223,13 @@ class PopenExecutor(BuilderExecutor):
|
|||
# Now start a machine for this job, adding the machine id to the etcd information
|
||||
logger.debug('Forking process for build')
|
||||
import subprocess
|
||||
|
||||
ws_host = os.environ.get("BUILDMAN_WS_HOST", "localhost")
|
||||
ws_port = os.environ.get("BUILDMAN_WS_PORT", "8787")
|
||||
builder_env = {
|
||||
'TOKEN': token,
|
||||
'REALM': realm,
|
||||
'ENDPOINT': 'ws://localhost:8787',
|
||||
'ENDPOINT': 'ws://%s:%s' % (ws_host,ws_port),
|
||||
'DOCKER_TLS_VERIFY': os.environ.get('DOCKER_TLS_VERIFY', ''),
|
||||
'DOCKER_CERT_PATH': os.environ.get('DOCKER_CERT_PATH', ''),
|
||||
'DOCKER_HOST': os.environ.get('DOCKER_HOST', ''),
|
||||
|
@ -247,6 +257,146 @@ class PopenExecutor(BuilderExecutor):
|
|||
logpipe.close()
|
||||
|
||||
|
||||
class KubernetesExecutor(BuilderExecutor):
|
||||
""" Executes build jobs by creating Kubernetes jobs which run a qemu-kvm virtual machine in a pod """
|
||||
def __init__(self, *args, **kwargs):
|
||||
self._loop = get_event_loop()
|
||||
super(KubernetesExecutor, self).__init__(*args, **kwargs)
|
||||
self.namespace = self.executor_config.get('BUILDER_NAMESPACE', 'builder')
|
||||
self.image = self.executor_config.get('BUILDER_IMAGE', 'quay.io/quay/quay-builder-qemu-coreos')
|
||||
|
||||
@coroutine
|
||||
def _request(self, method, path, **kwargs):
|
||||
request_options = dict(kwargs)
|
||||
|
||||
tls_cert = self.executor_config.get('K8S_API_TLS_CERT')
|
||||
tls_key = self.executor_config.get('K8S_API_TLS_KEY')
|
||||
tls_ca = self.executor_config.get('K8S_API_TLS_CA')
|
||||
|
||||
if 'timeout' not in request_options:
|
||||
request_options['timeout'] = self.executor_config.get("K8S_API_TIMEOUT", 20)
|
||||
|
||||
if tls_cert and tls_key:
|
||||
scheme = 'https'
|
||||
request_options['cert'] = (tls_cert, tls_key)
|
||||
if tls_ca:
|
||||
request_options['verify'] = tls_ca
|
||||
else:
|
||||
scheme = 'http'
|
||||
|
||||
server = self.executor_config.get('K8S_API_SERVER', 'localhost:8080')
|
||||
url = '%s://%s%s' % (scheme, server, path)
|
||||
logger.debug('EXEC CFG: %s',self.executor_config)
|
||||
logger.debug('Kubernetes request: %s %s: %s', method, url, request_options)
|
||||
res = requests.request(method, url, **request_options)
|
||||
logger.debug('Kubernetes response: %s: %s', res.status_code, res.text)
|
||||
raise Return(res)
|
||||
|
||||
def _jobs_path(self):
|
||||
return '/apis/batch/v1/namespaces/%s/jobs' % self.namespace
|
||||
|
||||
def _job_path(self, build_uuid):
|
||||
return '%s/%s' % (self._jobs_path(), build_uuid)
|
||||
|
||||
def _job_resource(self, build_uuid, user_data, coreos_channel='stable'):
|
||||
vm_memory_limit = self.executor_config.get('VM_MEMORY_LIMIT', '8G')
|
||||
|
||||
# Max values for this container
|
||||
container_limits = {
|
||||
'memory' : self.executor_config.get('CONTAINER_MEMORY_LIMIT', '8Gi'),
|
||||
'cpu' : self.executor_config.get('CONTAINER_CPU_LIMIT', "2"),
|
||||
}
|
||||
|
||||
# Minimum acceptable free resources for this container to "fit" in a quota
|
||||
container_requests = {
|
||||
'memory' : self.executor_config.get('CONTAINER_MEMORY_REQUEST', '8Gi'),
|
||||
'cpu' : self.executor_config.get('CONTAINER_CPU_REQUEST', "2"),
|
||||
}
|
||||
|
||||
return {
|
||||
'apiVersion': 'batch/v1',
|
||||
'kind': 'Job',
|
||||
'metadata': {
|
||||
'namespace': self.namespace,
|
||||
'generateName': build_uuid,
|
||||
'labels': {
|
||||
'build': build_uuid,
|
||||
'time': datetime.datetime.now().strftime('%Y-%m-%d-%H'),
|
||||
'worker': socket.gethostname(),
|
||||
'quay-sha': release.GIT_HEAD or 'none',
|
||||
},
|
||||
},
|
||||
'spec' : {
|
||||
'activeDeadlineSeconds' : 7200,
|
||||
'template' : {
|
||||
'metadata': {
|
||||
'labels': {
|
||||
'build': build_uuid,
|
||||
'time': datetime.datetime.now().strftime('%Y-%m-%d-%H'),
|
||||
'worker': socket.gethostname(),
|
||||
'quay-sha': release.GIT_HEAD or 'none',
|
||||
},
|
||||
},
|
||||
'spec': {
|
||||
'containers': [
|
||||
{
|
||||
'name': 'builder',
|
||||
'image': '%s:%s' % (self.image, coreos_channel),
|
||||
'imagePullPolicy': 'Always',
|
||||
'securityContext': { 'privileged': True },
|
||||
'env': [
|
||||
{ 'name': 'USERDATA', 'value': user_data },
|
||||
{ 'name': 'VM_MEMORY', 'value': vm_memory_limit },
|
||||
],
|
||||
'limits' : container_limits,
|
||||
'requests' : container_requests,
|
||||
},
|
||||
],
|
||||
'imagePullSecrets': [{ 'name': 'builder' }],
|
||||
'restartPolicy': 'Never',
|
||||
},
|
||||
},
|
||||
},
|
||||
}
|
||||
|
||||
@coroutine
|
||||
def start_builder(self, realm, token, build_uuid):
|
||||
# generate resource
|
||||
channel = self.executor_config.get('COREOS_CHANNEL', 'stable')
|
||||
user_data = self.generate_cloud_config(realm, token, channel, self.manager_hostname)
|
||||
resource = self._job_resource(build_uuid, user_data, channel)
|
||||
logger.debug('Generated kubernetes resource:\n%s', resource)
|
||||
|
||||
# schedule
|
||||
create_job = yield From(self._request('POST', self._jobs_path(), json=resource))
|
||||
if int(create_job.status_code / 100) != 2:
|
||||
raise ExecutorException('Failed to create job: %s: %s: %s' % (
|
||||
build_uuid, create_job.status_code, create_job.text))
|
||||
|
||||
job = create_job.json()
|
||||
raise Return(job['metadata']['name'])
|
||||
|
||||
@coroutine
|
||||
def stop_builder(self, builder_id):
|
||||
|
||||
pods_path = '/api/v1/namespaces/%s/pods' % self.namespace
|
||||
|
||||
selectorString = "job-name=%s" % builder_id
|
||||
try:
|
||||
delete_pod = yield From(self._request('DELETE', pods_path, params=dict(labelSelector=selectorString)))
|
||||
except:
|
||||
# if the pod does not exist, we will not get an error here. this covers lack of api connectivity, etc
|
||||
logger.exception("Failed to delete pod for job %s", builder_id)
|
||||
raise
|
||||
|
||||
logger.debug("Got successful delete pod response: %s", delete_pod.text)
|
||||
|
||||
try:
|
||||
delete_job = yield From(self._request('DELETE', self._job_path(builder_id)))
|
||||
except:
|
||||
logger.exception('Exception when trying to terminate job %s', builder_id)
|
||||
raise
|
||||
|
||||
class LogPipe(threading.Thread):
|
||||
""" Adapted from http://codereview.stackexchange.com/a/17959
|
||||
"""
|
||||
|
|
Reference in a new issue