diff --git a/config.py b/config.py index a51e2700f..3647c886e 100644 --- a/config.py +++ b/config.py @@ -88,16 +88,21 @@ class GitHubProdConfig(GitHubTestConfig): GITHUB_CLIENT_SECRET = 'f89d8bb28ea3bd4e1c68808500d185a816be53b1' -class DigitalOceanConfig(): +class DigitalOceanConfig(object): DO_CLIENT_ID = 'LJ44y2wwYj1MD0BRxS6qHA' DO_CLIENT_SECRET = 'b9357a6f6ff45a33bb03f6dbbad135f9' DO_SSH_KEY_ID = '46986' DO_SSH_PRIVATE_KEY_FILENAME = 'certs/digital_ocean' + DO_ALLOWED_REGIONS = {1, 4} + + +class BuildNodeConfig(object): + BUILD_NODE_PULL_TOKEN = 'F02O2E86CQLKZUQ0O81J8XDHQ6F0N1V36L9JTOEEK6GKKMT1GI8PTJQT4OU88Y6G' class DebugConfig(FlaskConfig, MailConfig, LocalStorage, SQLiteDB, StripeTestConfig, MixpanelTestConfig, GitHubTestConfig, - DigitalOceanConfig, AWSCredentials): + DigitalOceanConfig, AWSCredentials, BuildNodeConfig): REGISTRY_SERVER = 'localhost:5000' LOGGING_CONFIG = { 'level': logging.DEBUG, @@ -110,7 +115,8 @@ class DebugConfig(FlaskConfig, MailConfig, LocalStorage, SQLiteDB, class LocalHostedConfig(FlaskConfig, MailConfig, S3Storage, RDSMySQL, StripeLiveConfig, MixpanelTestConfig, - GitHubProdConfig, DigitalOceanConfig): + GitHubProdConfig, DigitalOceanConfig, + BuildNodeConfig): REGISTRY_SERVER = 'localhost:5000' LOGGING_CONFIG = { 'level': logging.DEBUG, @@ -121,7 +127,7 @@ class LocalHostedConfig(FlaskConfig, MailConfig, S3Storage, RDSMySQL, class ProductionConfig(FlaskConfig, MailConfig, S3Storage, RDSMySQL, StripeLiveConfig, MixpanelProdConfig, - GitHubProdConfig, DigitalOceanConfig): + GitHubProdConfig, DigitalOceanConfig, BuildNodeConfig): REGISTRY_SERVER = 'quay.io' LOGGING_CONFIG = { 'stream': sys.stderr, diff --git a/data/model.py b/data/model.py index 8c56da380..a46ca1912 100644 --- a/data/model.py +++ b/data/model.py @@ -556,7 +556,7 @@ def load_token_data(code): def get_repository_build(request_dbid): try: - return RepositoryBuild.get(RepositoryBuild == request_dbid) + return RepositoryBuild.get(RepositoryBuild.id == request_dbid) except RepositoryBuild.DoesNotExist: msg = 'Unable to locate a build by id: %s' % request_dbid raise InvalidRepositoryBuildException(msg) diff --git a/data/userfiles.py b/data/userfiles.py index 43f75073e..b8ddd0d90 100644 --- a/data/userfiles.py +++ b/data/userfiles.py @@ -20,9 +20,8 @@ class S3FileWriteException(Exception): class UserRequestFiles(object): def __init__(self, s3_access_key, s3_secret_key, bucket_name): - self._s3_conn = boto.s3.connection.S3Connection(s3_access_key, - s3_secret_key, - is_secure=False) + self._s3_conn = boto.connect_s3(s3_access_key, s3_secret_key, + is_secure=False) self._bucket_name = bucket_name self._bucket = self._s3_conn.get_bucket(bucket_name) self._access_key = s3_access_key diff --git a/endpoints/api.py b/endpoints/api.py index 0d34b6a25..c5592d8b8 100644 --- a/endpoints/api.py +++ b/endpoints/api.py @@ -423,7 +423,7 @@ def request_repo_build(namespace, repository): tag = '%s/%s/%s' % (host, repo.namespace, repo.name) build_request = model.create_repository_build(repo, token, dockerfile_id, tag) - dockerfile_build_queue.put(json.dumps({'request_id': build_request.id})) + dockerfile_build_queue.put(json.dumps({'build_id': build_request.id})) return jsonify({ 'started': True diff --git a/workers/dockerfilebuild.py b/workers/dockerfilebuild.py index cbf8718d9..061de0c03 100644 --- a/workers/dockerfilebuild.py +++ b/workers/dockerfilebuild.py @@ -5,9 +5,12 @@ import time import argparse import digitalocean import requests +import paramiko from apscheduler.scheduler import Scheduler from multiprocessing.pool import ThreadPool +from base64 import b64encode +from requests.exceptions import ConnectionError from data.queue import dockerfile_build_queue from data.userfiles import UserRequestFiles @@ -35,113 +38,169 @@ def try_connection(url, retries=5, period=5): raise ex +def try_connect_ssh(client, ip_addr, port, user, key_filename, retries=5, + period=5): + try: + client.connect(ip_addr, port, user, look_for_keys=False, + key_filename=key_filename) + except Exception as ex: + if retries: + logger.debug('Retrying connection to ssh ip: %s:%s after %ss' % + (ip_addr, port, period)) + time.sleep(period) + return try_connect_ssh(client, ip_addr, port, user, key_filename, + retries-1, period) + raise ex + + def get_status(url): return requests.get(url).json()['status'] def babysit_builder(request): - manager = digitalocean.Manager(client_id=app.config['DO_CLIENT_ID'], - api_key=app.config['DO_CLIENT_SECRET']) - repository_build = model.get_repository_build(request['build_id']) - - # check if there is already a DO node for this build job, if so clean it up - old_id = repository_build.build_node_id - if old_id - old_droplet = digitalocean.Droplet(old_id) - old_droplet.destroy() - - # start the DO node - name = 'dockerfile-build-%s' % repository_build.id - droplet = digitalocean.Droplet(client_id=app.config['DO_CLIENT_ID'], - api_key=app.config['DO_CLIENT_SECRET'], - name=name, - region_id=1, # New York, - image_id=1004145, # Docker on 13.04 - size_id=66, # 512MB, - backup_active=False) - droplet.create(ssh_key_ids=[app.config['DO_SSH_KEY_ID']]) - repository_build.build_node_id = droplet.id - repository_build.phase = 'starting' - repository_build.save() - - startup = droplet.get_events()[0] - while int(startup.percentage) != 100: - logger.debug('Droplet startup percentage: %s' % startup.percentage) - time.sleep(5) - startup.load() - - droplet.load() - logger.debug('Droplet started at ip address: %s' % droplet.ip_address) - - # connect to it with ssh - repository_build.phase = 'initializing' - repository_build.save() - - ssh_client = paramiko.SSHClient() - ssh_client.set_missing_host_key_policy(paramiko.AutoAddPolicy()) - ssh_client.connect(self._container_ip, self._config.sshd_port, "root", - look_for_keys=False, - key_filename=app.config['DO_SSH_PRIVATE_KEY_FILENAME']) - - # Pull and run the buildserver - pull_cmd = 'docker pull quay.io/quay/buildserver' - _, stdout, _ = ssh_client.exec_command(pull_cmd) - - start_cmd = 'sudo docker run -d -privileged quay.io/quay/buildserver' - _, stdout, _ = ssh_client.exec_command(start_cmd) - - # wait for the server to be ready - logger.debug('Waiting for buildserver to be ready') - build_endpoint = 'http://%s:5002/build/' % droplet.ip_address try: - try_connection() - except ConnectionError: - #TODO cleanup - pass - # send it the job - logger.debug('Sending build server request') + logger.debug('Starting work item: %s' % request) + repository_build = model.get_repository_build(request['build_id']) + logger.debug('Request details: %s' % repository_build) - user_files = UserRequestFiles(app.config['AWS_ACCESS_KEY'], - app.config['AWS_SECRET_KEY'], - app.config['REGISTRY_S3_BUCKET']) + # Initialize digital ocean API + do_client_id = app.config['DO_CLIENT_ID'] + do_api_key = app.config['DO_CLIENT_SECRET'] + manager = digitalocean.Manager(client_id=do_client_id, api_key=do_api_key) - repo = repository_build.repository - payload = { - 'tag': repository_build.tag, - 'resource_url': user_files.get_file_url(repository_build.resource_key), - 'token': repository_build.access_token.code, - } - start_build = requests.post(build_endpoint, data=payload) + # check if there is already a DO node for this build, if so clean it up + old_id = repository_build.build_node_id + if old_id: + logger.debug('Cleaning up old DO node: %s' % old_id) + old_droplet = digitalocean.Droplet(id=old_id, client_id=do_client_id, + api_key=do_api_key) + old_droplet.destroy() - # wait for the job to be complete - status_url = start_build.headers['Location'] - repository_build.phase = 'building' - repository_build.status_url = status_url - repository_build.save() + # Pick the region for the new droplet + allowed_regions = app.config['DO_ALLOWED_REGIONS'] + available_regions = {region.id for region in manager.get_all_regions()} + regions = available_regions.intersection(allowed_regions) + if not regions: + logger.error('No droplets in our allowed regtions, available: %s' % + available_regions) + return False - logger.debug('Waiting for job to be complete') - status = get_status(status_url) - while status != 'error' and status != 'complete': - logger.debug('Job status is: %s' % status) - time.sleep(5) + # start the DO node + name = 'dockerfile-build-%s' % repository_build.id + logger.debug('Starting DO node: %s' % name) + droplet = digitalocean.Droplet(client_id=do_client_id, + api_key=do_api_key, + name=name, + region_id=regions.pop(), + image_id=1004145, # Docker on 13.04 + size_id=66, # 512MB, + backup_active=False) + droplet.create(ssh_key_ids=[app.config['DO_SSH_KEY_ID']]) + repository_build.build_node_id = droplet.id + repository_build.phase = 'starting' + repository_build.save() + + startup = droplet.get_events()[0] + startup.load() + while not startup.percentage or int(startup.percentage) != 100: + logger.debug('Droplet startup percentage: %s' % startup.percentage) + time.sleep(5) + startup.load() + + droplet.load() + logger.debug('Droplet started at ip address: %s' % droplet.ip_address) + + # connect to it with ssh + repository_build.phase = 'initializing' + repository_build.save() + + ssh_client = paramiko.SSHClient() + ssh_client.set_missing_host_key_policy(paramiko.AutoAddPolicy()) + try_connect_ssh(ssh_client, droplet.ip_address, 22, 'root', + key_filename=app.config['DO_SSH_PRIVATE_KEY_FILENAME']) + + # Load the node with the pull token + token = app.config['BUILD_NODE_PULL_TOKEN'] + basicauth = b64encode('%s:%s' % ('$token', token)) + auth_object = { + 'https://quay.io/v1/': { + 'auth': basicauth, + 'email': '', + }, + } + + create_auth_cmd = 'echo \'%s\' > .dockercfg' % json.dumps(auth_object) + ssh_client.exec_command(create_auth_cmd) + + # Pull and run the buildserver + pull_cmd = 'docker pull quay.io/quay/buildserver' + _, stdout, _ = ssh_client.exec_command(pull_cmd) + pull_status = stdout.channel.recv_exit_status() + + if pull_status != 0: + logger.error('Pull command failed for host: %s' % droplet.ip_address) + else: + logger.debug('Pull status was: %s' % pull_status) + + start_cmd = 'docker run -d -privileged -lxc-conf="lxc.aa_profile=unconfined" quay.io/quay/buildserver' + ssh_client.exec_command(start_cmd) + + # wait for the server to be ready + logger.debug('Waiting for buildserver to be ready') + build_endpoint = 'http://%s:5002/build/' % droplet.ip_address + try: + try_connection(build_endpoint) + except ConnectionError: + #TODO cleanup + pass + + # send it the job + logger.debug('Sending build server request') + + user_files = UserRequestFiles(app.config['AWS_ACCESS_KEY'], + app.config['AWS_SECRET_KEY'], + app.config['REGISTRY_S3_BUCKET']) + + repo = repository_build.repository + payload = { + 'tag': repository_build.tag, + 'resource_url': user_files.get_file_url(repository_build.resource_key), + 'token': repository_build.access_token.code, + } + start_build = requests.post(build_endpoint, data=payload) + + # wait for the job to be complete + status_url = start_build.headers['Location'] + repository_build.phase = 'building' + repository_build.status_url = status_url + repository_build.save() + + logger.debug('Waiting for job to be complete') status = get_status(status_url) + while status != 'error' and status != 'complete': + logger.debug('Job status is: %s' % status) + time.sleep(5) + status = get_status(status_url) - logger.debug('Job complete with status: %s' % status) - if status == 'error': - repository_build.phase = 'error' - else: - repository_build.phase = 'complete' + logger.debug('Job complete with status: %s' % status) + if status == 'error': + repository_build.phase = 'error' + else: + repository_build.phase = 'complete' - # clean up the DO node - logger.debug('Cleaning up DO node.') - droplet.destroy() + # clean up the DO node + logger.debug('Cleaning up DO node.') + # droplet.destroy() - repository_build.status_url = None - repository_build.build_node_id = None; - repository_build.save() + repository_build.status_url = None + repository_build.build_node_id = None; + repository_build.save() - return True + return True + + except Exception as outer_ex: + logger.exception('Exception processing job: %s' % outer_ex.message) def process_work_items(pool): @@ -161,6 +220,7 @@ def process_work_items(pool): dockerfile_build_queue.complete(local_item) return complete_callback + logger.debug('Sending work item to thread pool: %s' % pool) pool.apply_async(babysit_builder, [request], callback=build_callback(item)) @@ -171,12 +231,14 @@ def process_work_items(pool): def start_worker(): pool = ThreadPool(3) - logger.debug("Scheduling worker.") + logger.debug('Scheduling worker.') sched = Scheduler() sched.start() - sched.add_interval_job(process_work_items, args=[pool], seconds=30) + # sched.add_interval_job(process_work_items, args=[pool], seconds=30) + + process_work_items(pool) while True: time.sleep(60 * 60 * 24) # sleep one day, basically forever