diff --git a/app.py b/app.py index f4d900b15..c3b15d7aa 100644 --- a/app.py +++ b/app.py @@ -2,7 +2,7 @@ import logging import os import json -from flask import Flask, Config, request, Request +from flask import Flask, Config, request, Request, _request_ctx_stack from flask.ext.principal import Principal from flask.ext.login import LoginManager, UserMixin from flask.ext.mail import Mail @@ -44,7 +44,6 @@ CONFIG_PROVIDER = FileConfigProvider(OVERRIDE_CONFIG_DIRECTORY, 'config.yaml', ' app = Flask(__name__) logger = logging.getLogger(__name__) -profile = logging.getLogger('profile') # Instantiate the default configuration (for test or for normal operation). if 'TEST' in os.environ: @@ -79,21 +78,24 @@ class RequestWithId(Request): @app.before_request def _request_start(): - profile.debug('Starting request: %s', request.path) + logger.debug('Starting request: %s', request.path) @app.after_request def _request_end(r): - profile.debug('Ending request: %s', request.path) + logger.debug('Ending request: %s', request.path) return r class InjectingFilter(logging.Filter): def filter(self, record): - record.msg = '[%s] %s' % (request.request_id, record.msg) + if _request_ctx_stack.top is not None: + record.msg = '[%s] %s' % (request.request_id, record.msg) return True -profile.addFilter(InjectingFilter()) +# Add the request id filter to all handlers of the root logger +for handler in logging.getLogger().handlers: + handler.addFilter(InjectingFilter()) app.request_class = RequestWithId diff --git a/application.py b/application.py index a9bd0df6e..235a80b16 100644 --- a/application.py +++ b/application.py @@ -11,5 +11,5 @@ import registry if __name__ == '__main__': - logging.config.fileConfig('conf/logging.conf', disable_existing_loggers=False) + logging.config.fileConfig('conf/logging_debug.conf', disable_existing_loggers=False) application.run(port=5000, debug=True, threaded=True, host='0.0.0.0') diff --git a/buildman/component/buildcomponent.py b/buildman/component/buildcomponent.py index c1fb41a02..72caec215 100644 --- a/buildman/component/buildcomponent.py +++ b/buildman/component/buildcomponent.py @@ -10,7 +10,6 @@ from autobahn.wamp.exception import ApplicationError from buildman.server import BuildJobResult from buildman.component.basecomponent import BaseComponent from buildman.jobutil.buildjob import BuildJobLoadException -from buildman.jobutil.buildpack import BuildPackage, BuildPackageException from buildman.jobutil.buildstatus import StatusHandler from buildman.jobutil.workererror import WorkerError @@ -20,7 +19,7 @@ HEARTBEAT_DELTA = datetime.timedelta(seconds=30) HEARTBEAT_TIMEOUT = 10 INITIAL_TIMEOUT = 25 -SUPPORTED_WORKER_VERSIONS = ['0.1-beta', '0.2'] +SUPPORTED_WORKER_VERSIONS = ['0.3'] logger = logging.getLogger(__name__) @@ -56,7 +55,10 @@ class BuildComponent(BaseComponent): def onJoin(self, details): logger.debug('Registering methods and listeners for component %s', self.builder_realm) yield trollius.From(self.register(self._on_ready, u'io.quay.buildworker.ready')) + yield trollius.From(self.register(self._determine_cache_tag, + u'io.quay.buildworker.determinecachetag')) yield trollius.From(self.register(self._ping, u'io.quay.buildworker.ping')) + yield trollius.From(self.subscribe(self._on_heartbeat, 'io.quay.builder.heartbeat')) yield trollius.From(self.subscribe(self._on_log_message, 'io.quay.builder.logmessage')) @@ -91,46 +93,6 @@ class BuildComponent(BaseComponent): buildpack_url = self.user_files.get_file_url(build_job.repo_build.resource_key, requires_cors=False) - # TODO(jschorr): Remove as soon as the fleet has been transitioned to 0.2. - if self._worker_version == '0.1-beta': - # Retrieve the job's buildpack. - logger.debug('Retrieving build package: %s', buildpack_url) - buildpack = None - try: - buildpack = BuildPackage.from_url(buildpack_url) - except BuildPackageException as bpe: - self._build_failure('Could not retrieve build package', bpe) - raise trollius.Return() - - # Extract the base image information from the Dockerfile. - parsed_dockerfile = None - logger.debug('Parsing dockerfile') - - try: - parsed_dockerfile = buildpack.parse_dockerfile(build_config.get('build_subdir')) - except BuildPackageException as bpe: - self._build_failure('Could not find Dockerfile in build package', bpe) - raise trollius.Return() - - image_and_tag_tuple = parsed_dockerfile.get_image_and_tag() - if image_and_tag_tuple is None or image_and_tag_tuple[0] is None: - self._build_failure('Missing FROM line in Dockerfile') - raise trollius.Return() - - base_image_information = { - 'repository': image_and_tag_tuple[0], - 'tag': image_and_tag_tuple[1] - } - - # Extract the number of steps from the Dockerfile. - with self._build_status as status_dict: - status_dict['total_commands'] = len(parsed_dockerfile.commands) - else: - # TODO(jschorr): This is a HACK to make sure the progress bar (sort of) continues working - # until such time as we have the caching code in place. - with self._build_status as status_dict: - status_dict['total_commands'] = 25 - # Add the pull robot information, if any. if build_job.pull_credentials: base_image_information['username'] = build_job.pull_credentials.get('username', '') @@ -161,8 +123,7 @@ class BuildComponent(BaseComponent): 'pull_token': build_job.repo_build.access_token.code, 'push_token': build_job.repo_build.access_token.code, 'tag_names': build_config.get('docker_tags', ['latest']), - 'base_image': base_image_information, - 'cached_tag': build_job.determine_cached_tag() or '' + 'base_image': base_image_information } # Invoke the build. @@ -256,6 +217,16 @@ class BuildComponent(BaseComponent): elif phase == BUILD_PHASE.BUILDING: self._build_status.append_log(current_status_string) + @trollius.coroutine + def _determine_cache_tag(self, command_comments, base_image_name, base_image_tag, base_image_id): + with self._build_status as status_dict: + status_dict['total_commands'] = len(command_comments) + 1 + + logger.debug('Checking cache on realm %s. Base image: %s:%s (%s)', self.builder_realm, + base_image_name, base_image_tag, base_image_id) + + tag_found = self._current_job.determine_cached_tag(base_image_id, command_comments) + raise trollius.Return(tag_found or '') def _build_failure(self, error_message, exception=None): """ Handles and logs a failed build. """ diff --git a/buildman/jobutil/buildjob.py b/buildman/jobutil/buildjob.py index bb750fc38..1710c3aab 100644 --- a/buildman/jobutil/buildjob.py +++ b/buildman/jobutil/buildjob.py @@ -1,8 +1,12 @@ import json +import logging from cachetools import lru_cache from endpoints.notificationhelper import spawn_notification from data import model +from util.imagetree import ImageTree + +logger = logging.getLogger(__name__) class BuildJobLoadException(Exception): @@ -65,14 +69,65 @@ class BuildJob(object): 'Could not parse repository build job config with ID %s' % self.job_details['build_uuid'] ) - def determine_cached_tag(self): + def determine_cached_tag(self, base_image_id=None, cache_comments=None): """ Returns the tag to pull to prime the cache or None if none. """ - # TODO(jschorr): Change this to use the more complicated caching rules, once we have caching - # be a pull of things besides the constructed tags. - tags = self.build_config.get('docker_tags', ['latest']) - existing_tags = model.list_repository_tags(self.repo_build.repository.namespace_user.username, - self.repo_build.repository.name) + cached_tag = None + if base_image_id and cache_comments: + cached_tag = self._determine_cached_tag_by_comments(base_image_id, cache_comments) + if not cached_tag: + cached_tag = self._determine_cached_tag_by_tag() + + logger.debug('Determined cached tag %s for %s: %s', cached_tag, base_image_id, cache_comments) + + return cached_tag + + def _determine_cached_tag_by_comments(self, base_image_id, cache_commands): + """ Determines the tag to use for priming the cache for this build job, by matching commands + starting at the given base_image_id. This mimics the Docker cache checking, so it should, + in theory, provide "perfect" caching. + """ + # Lookup the base image in the repository. If it doesn't exist, nothing more to do. + repo_build = self.repo_build + repo_namespace = repo_build.repository.namespace_user.username + repo_name = repo_build.repository.name + + base_image = model.get_image(repo_build.repository, base_image_id) + if base_image is None: + return None + + # Build an in-memory tree of the full heirarchy of images in the repository. + all_images = model.get_repository_images(repo_namespace, repo_name) + all_tags = model.list_repository_tags(repo_namespace, repo_name) + tree = ImageTree(all_images, all_tags, base_filter=base_image.id) + + # Find a path in the tree, starting at the base image, that matches the cache comments + # or some subset thereof. + def checker(step, image): + if step >= len(cache_commands): + return False + + full_command = '["/bin/sh", "-c", "%s"]' % cache_commands[step] + logger.debug('Checking step #%s: %s, %s == %s', step, image.id, + image.storage.command, full_command) + + return image.storage.command == full_command + + path = tree.find_longest_path(base_image.id, checker) + if not path: + return None + + # Find any tag associated with the last image in the path. + return tree.tag_containing_image(path[-1]) + + + def _determine_cached_tag_by_tag(self): + """ Determines the cached tag by looking for one of the tags being built, and seeing if it + exists in the repository. This is a fallback for when no comment information is available. + """ + tags = self.build_config.get('docker_tags', ['latest']) + repository = self.repo_build.repository + existing_tags = model.list_repository_tags(repository.namespace_user.username, repository.name) cached_tags = set(tags) & set([tag.name for tag in existing_tags]) if cached_tags: return list(cached_tags)[0] diff --git a/buildman/jobutil/buildpack.py b/buildman/jobutil/buildpack.py deleted file mode 100644 index 9892c65d3..000000000 --- a/buildman/jobutil/buildpack.py +++ /dev/null @@ -1,88 +0,0 @@ -import tarfile -import requests -import os - -from tempfile import TemporaryFile, mkdtemp -from zipfile import ZipFile -from util.dockerfileparse import parse_dockerfile -from util.safetar import safe_extractall - -class BuildPackageException(Exception): - """ Exception raised when retrieving or parsing a build package. """ - pass - - -class BuildPackage(object): - """ Helper class for easy reading and updating of a Dockerfile build pack. """ - - def __init__(self, requests_file): - self._mime_processors = { - 'application/zip': BuildPackage._prepare_zip, - 'application/x-zip-compressed': BuildPackage._prepare_zip, - 'text/plain': BuildPackage._prepare_dockerfile, - 'application/octet-stream': BuildPackage._prepare_dockerfile, - 'application/x-tar': BuildPackage._prepare_tarball, - 'application/gzip': BuildPackage._prepare_tarball, - 'application/x-gzip': BuildPackage._prepare_tarball, - } - - c_type = requests_file.headers['content-type'] - c_type = c_type.split(';')[0] if ';' in c_type else c_type - - if c_type not in self._mime_processors: - raise BuildPackageException('Unknown build package mime type: %s' % c_type) - - self._package_directory = None - try: - self._package_directory = self._mime_processors[c_type](requests_file) - except Exception as ex: - raise BuildPackageException(ex.message) - - def parse_dockerfile(self, subdirectory): - dockerfile_path = os.path.join(self._package_directory, subdirectory, 'Dockerfile') - if not os.path.exists(dockerfile_path): - if subdirectory: - message = 'Build package did not contain a Dockerfile at sub directory %s.' % subdirectory - else: - message = 'Build package did not contain a Dockerfile at the root directory.' - - raise BuildPackageException(message) - - with open(dockerfile_path, 'r') as dockerfileobj: - return parse_dockerfile(dockerfileobj.read()) - - @staticmethod - def from_url(url): - buildpack_resource = requests.get(url, stream=True) - return BuildPackage(buildpack_resource) - - @staticmethod - def _prepare_zip(request_file): - build_dir = mkdtemp(prefix='docker-build-') - - # Save the zip file to temp somewhere - with TemporaryFile() as zip_file: - zip_file.write(request_file.content) - to_extract = ZipFile(zip_file) - to_extract.extractall(build_dir) - - return build_dir - - @staticmethod - def _prepare_dockerfile(request_file): - build_dir = mkdtemp(prefix='docker-build-') - dockerfile_path = os.path.join(build_dir, "Dockerfile") - with open(dockerfile_path, 'w') as dockerfile: - dockerfile.write(request_file.content) - - return build_dir - - @staticmethod - def _prepare_tarball(request_file): - build_dir = mkdtemp(prefix='docker-build-') - - # Save the zip file to temp somewhere - with tarfile.open(mode='r|*', fileobj=request_file.raw) as tar_stream: - safe_extractall(tar_stream, build_dir) - - return build_dir diff --git a/buildman/jobutil/buildstatus.py b/buildman/jobutil/buildstatus.py index 217e3aa6c..2ae127ee0 100644 --- a/buildman/jobutil/buildstatus.py +++ b/buildman/jobutil/buildstatus.py @@ -7,11 +7,12 @@ class StatusHandler(object): def __init__(self, build_logs, repository_build_uuid): self._current_phase = None + self._current_command = None self._uuid = repository_build_uuid self._build_logs = build_logs self._status = { - 'total_commands': None, + 'total_commands': 0, 'current_command': None, 'push_completion': 0.0, 'pull_completion': 0.0, @@ -26,9 +27,16 @@ class StatusHandler(object): self._build_logs.append_log_message(self._uuid, log_message, log_type, log_data) def append_log(self, log_message, extra_data=None): + if log_message is None: + return + self._append_log_message(log_message, log_data=extra_data) def set_command(self, command, extra_data=None): + if self._current_command == command: + return + + self._current_command = command self._append_log_message(command, self._build_logs.COMMAND, extra_data) def set_error(self, error_message, extra_data=None, internal_error=False): diff --git a/buildman/jobutil/workererror.py b/buildman/jobutil/workererror.py index c7100360b..fdf6503b0 100644 --- a/buildman/jobutil/workererror.py +++ b/buildman/jobutil/workererror.py @@ -63,6 +63,11 @@ class WorkerError(object): 'io.quay.builder.missingorinvalidargument': { 'message': 'Missing required arguments for builder', 'is_internal': True + }, + + 'io.quay.builder.cachelookupissue': { + 'message': 'Error checking for a cached tag', + 'is_internal': True } } diff --git a/buildman/manager/executor.py b/buildman/manager/executor.py index 92641c6ce..035d5cdf8 100644 --- a/buildman/manager/executor.py +++ b/buildman/manager/executor.py @@ -11,6 +11,7 @@ from trollius import coroutine, From, Return, get_event_loop from functools import partial from buildman.asyncutil import AsyncWrapper +from container_cloud_config import CloudConfigContext logger = logging.getLogger(__name__) @@ -20,7 +21,7 @@ ONE_HOUR = 60*60 ENV = Environment(loader=FileSystemLoader('buildman/templates')) TEMPLATE = ENV.get_template('cloudconfig.yaml') - +CloudConfigContext().populate_jinja_environment(ENV) class ExecutorException(Exception): """ Exception raised when there is a problem starting or stopping a builder. diff --git a/buildman/templates/cloudconfig.yaml b/buildman/templates/cloudconfig.yaml index 13e6894bf..51bb2f090 100644 --- a/buildman/templates/cloudconfig.yaml +++ b/buildman/templates/cloudconfig.yaml @@ -19,18 +19,13 @@ coreos: group: {{ coreos_channel }} units: - - name: quay-builder.service - command: start - content: | - [Unit] - Description=Quay builder container - Author=Jake Moshenko - After=docker.service - - [Service] - TimeoutStartSec=600 - TimeoutStopSec=2000 - ExecStartPre=/usr/bin/docker login -u {{ quay_username }} -p {{ quay_password }} -e unused quay.io - ExecStart=/usr/bin/docker run --rm --net=host --name quay-builder --privileged --env-file /root/overrides.list -v /var/run/docker.sock:/var/run/docker.sock -v /usr/share/ca-certificates:/etc/ssl/certs quay.io/coreos/registry-build-worker:{{ worker_tag }} - ExecStop=/usr/bin/docker stop quay-builder - ExecStopPost=/bin/sh -xc "/bin/sleep 120; /usr/bin/systemctl --no-block poweroff" + {{ dockersystemd('quay-builder', + 'quay.io/coreos/registry-build-worker', + quay_username, + quay_password, + worker_tag, + extra_args='--net=host --privileged --env-file /root/overrides.list -v /var/run/docker.sock:/var/run/docker.sock -v /usr/share/ca-certificates:/etc/ssl/certs', + exec_stop_post=['/bin/sh -xc "/bin/sleep 120; /usr/bin/systemctl --no-block poweroff"'], + flattened=True, + restart_policy='no' + ) | indent(4) }} diff --git a/conf/gunicorn_local.py b/conf/gunicorn_local.py index e17be0bb3..6987041be 100644 --- a/conf/gunicorn_local.py +++ b/conf/gunicorn_local.py @@ -3,6 +3,6 @@ workers = 2 worker_class = 'gevent' timeout = 2000 daemon = False -logconfig = 'conf/logging.conf' +logconfig = 'conf/logging_debug.conf' pythonpath = '.' preload_app = True diff --git a/conf/logging.conf b/conf/logging.conf index d009f08ee..317803a24 100644 --- a/conf/logging.conf +++ b/conf/logging.conf @@ -1,5 +1,5 @@ [loggers] -keys=root, gunicorn.error, gunicorn.access, application.profiler, boto, werkzeug +keys=root [handlers] keys=console @@ -7,39 +7,9 @@ keys=console [formatters] keys=generic -[logger_application.profiler] -level=DEBUG -handlers=console -propagate=0 -qualname=application.profiler - [logger_root] -level=DEBUG -handlers=console - -[logger_boto] level=INFO handlers=console -propagate=0 -qualname=boto - -[logger_werkzeug] -level=DEBUG -handlers=console -propagate=0 -qualname=werkzeug - -[logger_gunicorn.error] -level=INFO -handlers=console -propagate=1 -qualname=gunicorn.error - -[logger_gunicorn.access] -level=INFO -handlers=console -propagate=0 -qualname=gunicorn.access [handler_console] class=StreamHandler diff --git a/conf/logging_debug.conf b/conf/logging_debug.conf new file mode 100644 index 000000000..01a3c8fbb --- /dev/null +++ b/conf/logging_debug.conf @@ -0,0 +1,21 @@ +[loggers] +keys=root + +[handlers] +keys=console + +[formatters] +keys=generic + +[logger_root] +level=DEBUG +handlers=console + +[handler_console] +class=StreamHandler +formatter=generic +args=(sys.stdout, ) + +[formatter_generic] +format=%(asctime)s [%(process)d] [%(levelname)s] [%(name)s] %(message)s +class=logging.Formatter diff --git a/data/model/legacy.py b/data/model/legacy.py index 9bae8cd32..ff1a5ba59 100644 --- a/data/model/legacy.py +++ b/data/model/legacy.py @@ -1096,6 +1096,26 @@ def get_repository(namespace_name, repository_name): return None +def get_image(repo, dockerfile_id): + try: + return Image.get(Image.docker_image_id == dockerfile_id, Image.repository == repo) + except Image.DoesNotExist: + return None + + +def find_child_image(repo, parent_image, command): + try: + return (Image.select() + .join(ImageStorage) + .switch(Image) + .where(Image.ancestors % '%/' + parent_image.id + '/%', + ImageStorage.command == command) + .order_by(ImageStorage.created.desc()) + .get()) + except Image.DoesNotExist: + return None + + def get_repo_image(namespace_name, repository_name, docker_image_id): def limit_to_image_id(query): return query.where(Image.docker_image_id == docker_image_id).limit(1) @@ -1681,7 +1701,6 @@ def get_tag_image(namespace_name, repository_name, tag_name): else: return images[0] - def get_image_by_id(namespace_name, repository_name, docker_image_id): image = get_repo_image_extended(namespace_name, repository_name, docker_image_id) if not image: diff --git a/endpoints/common.py b/endpoints/common.py index 3534ee072..9301329e0 100644 --- a/endpoints/common.py +++ b/endpoints/common.py @@ -29,7 +29,6 @@ from endpoints.notificationhelper import spawn_notification import features logger = logging.getLogger(__name__) -profile = logging.getLogger('application.profiler') route_data = None @@ -253,7 +252,7 @@ def start_build(repository, dockerfile_id, tags, build_name, subdir, manual, metadata=metadata, repository=repository) # Add notifications for the build queue. - profile.debug('Adding notifications for repository') + logger.debug('Adding notifications for repository') event_data = { 'build_id': build_request.uuid, 'build_name': build_name, diff --git a/endpoints/index.py b/endpoints/index.py index de45f2fde..660ab94aa 100644 --- a/endpoints/index.py +++ b/endpoints/index.py @@ -23,7 +23,6 @@ from endpoints.notificationhelper import spawn_notification import features logger = logging.getLogger(__name__) -profile = logging.getLogger('application.profiler') index = Blueprint('index', __name__) @@ -120,7 +119,7 @@ def create_user(): else: # New user case - profile.debug('Creating user') + logger.debug('Creating user') new_user = None try: @@ -128,10 +127,10 @@ def create_user(): except model.TooManyUsersException as ex: abort(402, 'Seat limit has been reached for this license', issue='seat-limit') - profile.debug('Creating email code for user') + logger.debug('Creating email code for user') code = model.create_confirm_email_code(new_user) - profile.debug('Sending email code to user') + logger.debug('Sending email code to user') send_confirmation_email(new_user.username, new_user.email, code.code) return make_response('Created', 201) @@ -168,12 +167,12 @@ def update_user(username): update_request = request.get_json() if 'password' in update_request: - profile.debug('Updating user password') + logger.debug('Updating user password') model.change_password(get_authenticated_user(), update_request['password']) if 'email' in update_request: - profile.debug('Updating user email') + logger.debug('Updating user email') model.update_email(get_authenticated_user(), update_request['email']) return jsonify({ @@ -189,13 +188,13 @@ def update_user(username): @parse_repository_name @generate_headers(role='write') def create_repository(namespace, repository): - profile.debug('Parsing image descriptions') + logger.debug('Parsing image descriptions') image_descriptions = json.loads(request.data.decode('utf8')) - profile.debug('Looking up repository') + logger.debug('Looking up repository') repo = model.get_repository(namespace, repository) - profile.debug('Repository looked up') + logger.debug('Repository looked up') if not repo and get_authenticated_user() is None: logger.debug('Attempt to create new repository without user auth.') abort(401, @@ -219,11 +218,11 @@ def create_repository(namespace, repository): issue='no-create-permission', namespace=namespace) - profile.debug('Creaing repository with owner: %s', get_authenticated_user().username) + logger.debug('Creaing repository with owner: %s', get_authenticated_user().username) repo = model.create_repository(namespace, repository, get_authenticated_user()) - profile.debug('Determining already added images') + logger.debug('Determining already added images') added_images = OrderedDict([(desc['id'], desc) for desc in image_descriptions]) new_repo_images = dict(added_images) @@ -239,7 +238,7 @@ def create_repository(namespace, repository): for existing in existing_images: added_images.pop(existing.docker_image_id) - profile.debug('Creating/Linking necessary images') + logger.debug('Creating/Linking necessary images') username = get_authenticated_user() and get_authenticated_user().username translations = {} for image_description in added_images.values(): @@ -247,7 +246,7 @@ def create_repository(namespace, repository): translations, storage.preferred_locations[0]) - profile.debug('Created images') + logger.debug('Created images') track_and_log('push_repo', repo) return make_response('Created', 201) @@ -260,14 +259,14 @@ def update_images(namespace, repository): permission = ModifyRepositoryPermission(namespace, repository) if permission.can(): - profile.debug('Looking up repository') + logger.debug('Looking up repository') repo = model.get_repository(namespace, repository) if not repo: # Make sure the repo actually exists. abort(404, message='Unknown repository', issue='unknown-repo') if get_authenticated_user(): - profile.debug('Publishing push event') + logger.debug('Publishing push event') username = get_authenticated_user().username # Mark that the user has pushed the repo. @@ -280,11 +279,11 @@ def update_images(namespace, repository): event = userevents.get_event(username) event.publish_event_data('docker-cli', user_data) - profile.debug('GCing repository') + logger.debug('GCing repository') num_removed = model.garbage_collect_repository(namespace, repository) # Generate a job for each notification that has been added to this repo - profile.debug('Adding notifications for repository') + logger.debug('Adding notifications for repository') updated_tags = session.get('pushed_tags', {}) event_data = { @@ -307,13 +306,13 @@ def get_repository_images(namespace, repository): # TODO invalidate token? if permission.can() or model.repository_is_public(namespace, repository): # We can't rely on permissions to tell us if a repo exists anymore - profile.debug('Looking up repository') + logger.debug('Looking up repository') repo = model.get_repository(namespace, repository) if not repo: abort(404, message='Unknown repository', issue='unknown-repo') all_images = [] - profile.debug('Retrieving repository images') + logger.debug('Retrieving repository images') for image in model.get_repository_images(namespace, repository): new_image_view = { 'id': image.docker_image_id, @@ -321,7 +320,7 @@ def get_repository_images(namespace, repository): } all_images.append(new_image_view) - profile.debug('Building repository image response') + logger.debug('Building repository image response') resp = make_response(json.dumps(all_images), 200) resp.mimetype = 'application/json' diff --git a/endpoints/registry.py b/endpoints/registry.py index b4b03334f..5178f3a83 100644 --- a/endpoints/registry.py +++ b/endpoints/registry.py @@ -20,7 +20,6 @@ from util import gzipstream registry = Blueprint('registry', __name__) logger = logging.getLogger(__name__) -profile = logging.getLogger('application.profiler') class SocketReader(object): def __init__(self, fp): @@ -100,12 +99,12 @@ def set_cache_headers(f): def head_image_layer(namespace, repository, image_id, headers): permission = ReadRepositoryPermission(namespace, repository) - profile.debug('Checking repo permissions') + logger.debug('Checking repo permissions') if permission.can() or model.repository_is_public(namespace, repository): - profile.debug('Looking up repo image') + logger.debug('Looking up repo image') repo_image = model.get_repo_image_extended(namespace, repository, image_id) if not repo_image: - profile.debug('Image not found') + logger.debug('Image not found') abort(404, 'Image %(image_id)s not found', issue='unknown-image', image_id=image_id) @@ -114,7 +113,7 @@ def head_image_layer(namespace, repository, image_id, headers): # Add the Accept-Ranges header if the storage engine supports resumable # downloads. if store.get_supports_resumable_downloads(repo_image.storage.locations): - profile.debug('Storage supports resumable downloads') + logger.debug('Storage supports resumable downloads') extra_headers['Accept-Ranges'] = 'bytes' resp = make_response('') @@ -133,35 +132,35 @@ def head_image_layer(namespace, repository, image_id, headers): def get_image_layer(namespace, repository, image_id, headers): permission = ReadRepositoryPermission(namespace, repository) - profile.debug('Checking repo permissions') + logger.debug('Checking repo permissions') if permission.can() or model.repository_is_public(namespace, repository): - profile.debug('Looking up repo image') + logger.debug('Looking up repo image') repo_image = model.get_repo_image_extended(namespace, repository, image_id) if not repo_image: - profile.debug('Image not found') + logger.debug('Image not found') abort(404, 'Image %(image_id)s not found', issue='unknown-image', image_id=image_id) - profile.debug('Looking up the layer path') + logger.debug('Looking up the layer path') try: path = store.image_layer_path(repo_image.storage.uuid) - profile.debug('Looking up the direct download URL') + logger.debug('Looking up the direct download URL') direct_download_url = store.get_direct_download_url(repo_image.storage.locations, path) if direct_download_url: - profile.debug('Returning direct download URL') + logger.debug('Returning direct download URL') resp = redirect(direct_download_url) return resp - profile.debug('Streaming layer data') + logger.debug('Streaming layer data') # Close the database handle here for this process before we send the long download. database.close_db_filter(None) return Response(store.stream_read(repo_image.storage.locations, path), headers=headers) except (IOError, AttributeError): - profile.exception('Image layer data not found') + logger.exception('Image layer data not found') abort(404, 'Image %(image_id)s not found', issue='unknown-image', image_id=image_id) @@ -172,30 +171,30 @@ def get_image_layer(namespace, repository, image_id, headers): @process_auth @extract_namespace_repo_from_session def put_image_layer(namespace, repository, image_id): - profile.debug('Checking repo permissions') + logger.debug('Checking repo permissions') permission = ModifyRepositoryPermission(namespace, repository) if not permission.can(): abort(403) - profile.debug('Retrieving image') + logger.debug('Retrieving image') repo_image = model.get_repo_image_extended(namespace, repository, image_id) try: - profile.debug('Retrieving image data') + logger.debug('Retrieving image data') uuid = repo_image.storage.uuid json_data = store.get_content(repo_image.storage.locations, store.image_json_path(uuid)) except (IOError, AttributeError): - profile.exception('Exception when retrieving image data') + logger.exception('Exception when retrieving image data') abort(404, 'Image %(image_id)s not found', issue='unknown-image', image_id=image_id) - profile.debug('Retrieving image path info') + logger.debug('Retrieving image path info') layer_path = store.image_layer_path(uuid) if (store.exists(repo_image.storage.locations, layer_path) and not image_is_uploading(repo_image)): exact_abort(409, 'Image already exists') - profile.debug('Storing layer data') + logger.debug('Storing layer data') input_stream = request.stream if request.headers.get('transfer-encoding') == 'chunked': @@ -262,7 +261,7 @@ def put_image_layer(namespace, repository, image_id): # The layer is ready for download, send a job to the work queue to # process it. - profile.debug('Adding layer to diff queue') + logger.debug('Adding layer to diff queue') repo = model.get_repository(namespace, repository) image_diff_queue.put([repo.namespace_user.username, repository, image_id], json.dumps({ 'namespace_user_id': repo.namespace_user.id, @@ -277,7 +276,7 @@ def put_image_layer(namespace, repository, image_id): @process_auth @extract_namespace_repo_from_session def put_image_checksum(namespace, repository, image_id): - profile.debug('Checking repo permissions') + logger.debug('Checking repo permissions') permission = ModifyRepositoryPermission(namespace, repository) if not permission.can(): abort(403) @@ -303,23 +302,23 @@ def put_image_checksum(namespace, repository, image_id): abort(400, 'Checksum not found in Cookie for image %(image_id)s', issue='missing-checksum-cookie', image_id=image_id) - profile.debug('Looking up repo image') + logger.debug('Looking up repo image') repo_image = model.get_repo_image_extended(namespace, repository, image_id) if not repo_image or not repo_image.storage: abort(404, 'Image not found: %(image_id)s', issue='unknown-image', image_id=image_id) uuid = repo_image.storage.uuid - profile.debug('Looking up repo layer data') + logger.debug('Looking up repo layer data') if not store.exists(repo_image.storage.locations, store.image_json_path(uuid)): abort(404, 'Image not found: %(image_id)s', issue='unknown-image', image_id=image_id) - profile.debug('Marking image path') + logger.debug('Marking image path') if not image_is_uploading(repo_image): abort(409, 'Cannot set checksum for image %(image_id)s', issue='image-write-error', image_id=image_id) - profile.debug('Storing image checksum') + logger.debug('Storing image checksum') err = store_checksum(repo_image.storage, checksum) if err: abort(400, err) @@ -336,7 +335,7 @@ def put_image_checksum(namespace, repository, image_id): # The layer is ready for download, send a job to the work queue to # process it. - profile.debug('Adding layer to diff queue') + logger.debug('Adding layer to diff queue') repo = model.get_repository(namespace, repository) image_diff_queue.put([repo.namespace_user.username, repository, image_id], json.dumps({ 'namespace_user_id': repo.namespace_user.id, @@ -353,23 +352,23 @@ def put_image_checksum(namespace, repository, image_id): @require_completion @set_cache_headers def get_image_json(namespace, repository, image_id, headers): - profile.debug('Checking repo permissions') + logger.debug('Checking repo permissions') permission = ReadRepositoryPermission(namespace, repository) if not permission.can() and not model.repository_is_public(namespace, repository): abort(403) - profile.debug('Looking up repo image') + logger.debug('Looking up repo image') repo_image = model.get_repo_image_extended(namespace, repository, image_id) - profile.debug('Looking up repo layer data') + logger.debug('Looking up repo layer data') try: uuid = repo_image.storage.uuid data = store.get_content(repo_image.storage.locations, store.image_json_path(uuid)) except (IOError, AttributeError): flask_abort(404) - profile.debug('Looking up repo layer size') + logger.debug('Looking up repo layer size') size = repo_image.storage.image_size headers['X-Docker-Size'] = str(size) @@ -384,16 +383,16 @@ def get_image_json(namespace, repository, image_id, headers): @require_completion @set_cache_headers def get_image_ancestry(namespace, repository, image_id, headers): - profile.debug('Checking repo permissions') + logger.debug('Checking repo permissions') permission = ReadRepositoryPermission(namespace, repository) if not permission.can() and not model.repository_is_public(namespace, repository): abort(403) - profile.debug('Looking up repo image') + logger.debug('Looking up repo image') repo_image = model.get_repo_image_extended(namespace, repository, image_id) - profile.debug('Looking up image data') + logger.debug('Looking up image data') try: uuid = repo_image.storage.uuid data = store.get_content(repo_image.storage.locations, store.image_ancestry_path(uuid)) @@ -401,11 +400,11 @@ def get_image_ancestry(namespace, repository, image_id, headers): abort(404, 'Image %(image_id)s not found', issue='unknown-image', image_id=image_id) - profile.debug('Converting to <-> from JSON') + logger.debug('Converting to <-> from JSON') response = make_response(json.dumps(json.loads(data)), 200) response.headers.extend(headers) - profile.debug('Done') + logger.debug('Done') return response @@ -435,12 +434,12 @@ def store_checksum(image_storage, checksum): @process_auth @extract_namespace_repo_from_session def put_image_json(namespace, repository, image_id): - profile.debug('Checking repo permissions') + logger.debug('Checking repo permissions') permission = ModifyRepositoryPermission(namespace, repository) if not permission.can(): abort(403) - profile.debug('Parsing image JSON') + logger.debug('Parsing image JSON') try: data = json.loads(request.data.decode('utf8')) except ValueError: @@ -454,10 +453,10 @@ def put_image_json(namespace, repository, image_id): abort(400, 'Missing key `id` in JSON for image: %(image_id)s', issue='invalid-request', image_id=image_id) - profile.debug('Looking up repo image') + logger.debug('Looking up repo image') repo_image = model.get_repo_image_extended(namespace, repository, image_id) if not repo_image: - profile.debug('Image not found') + logger.debug('Image not found') abort(404, 'Image %(image_id)s not found', issue='unknown-image', image_id=image_id) @@ -471,24 +470,24 @@ def put_image_json(namespace, repository, image_id): parent_image = None if parent_id: - profile.debug('Looking up parent image') + logger.debug('Looking up parent image') parent_image = model.get_repo_image_extended(namespace, repository, parent_id) parent_uuid = parent_image and parent_image.storage.uuid parent_locations = parent_image and parent_image.storage.locations if parent_id: - profile.debug('Looking up parent image data') + logger.debug('Looking up parent image data') if (parent_id and not store.exists(parent_locations, store.image_json_path(parent_uuid))): abort(400, 'Image %(image_id)s depends on non existing parent image %(parent_id)s', issue='invalid-request', image_id=image_id, parent_id=parent_id) - profile.debug('Looking up image storage paths') + logger.debug('Looking up image storage paths') json_path = store.image_json_path(uuid) - profile.debug('Checking if image already exists') + logger.debug('Checking if image already exists') if (store.exists(repo_image.storage.locations, json_path) and not image_is_uploading(repo_image)): exact_abort(409, 'Image already exists') @@ -501,24 +500,24 @@ def put_image_json(namespace, repository, image_id): command_list = data.get('container_config', {}).get('Cmd', None) command = json.dumps(command_list) if command_list else None - profile.debug('Setting image metadata') + logger.debug('Setting image metadata') model.set_image_metadata(image_id, namespace, repository, data.get('created'), data.get('comment'), command, parent_image) - profile.debug('Putting json path') + logger.debug('Putting json path') store.put_content(repo_image.storage.locations, json_path, request.data) - profile.debug('Generating image ancestry') + logger.debug('Generating image ancestry') try: generate_ancestry(image_id, uuid, repo_image.storage.locations, parent_id, parent_uuid, parent_locations) except IOError as ioe: - profile.debug('Error when generating ancestry: %s' % ioe.message) + logger.debug('Error when generating ancestry: %s' % ioe.message) abort(404) - profile.debug('Done') + logger.debug('Done') return make_response('true', 200) diff --git a/endpoints/trackhelper.py b/endpoints/trackhelper.py index 83b9d4270..fb99a2c2d 100644 --- a/endpoints/trackhelper.py +++ b/endpoints/trackhelper.py @@ -6,7 +6,6 @@ from flask import request from auth.auth_context import get_authenticated_user, get_validated_token, get_validated_oauth_token logger = logging.getLogger(__name__) -profile = logging.getLogger('application.profiler') def track_and_log(event_name, repo, **kwargs): repository = repo.name @@ -23,7 +22,7 @@ def track_and_log(event_name, repo, **kwargs): authenticated_user = get_authenticated_user() authenticated_token = get_validated_token() if not authenticated_user else None - profile.debug('Logging the %s to Mixpanel and the log system', event_name) + logger.debug('Logging the %s to Mixpanel and the log system', event_name) if authenticated_oauth_token: metadata['oauth_token_id'] = authenticated_oauth_token.id metadata['oauth_token_application_id'] = authenticated_oauth_token.application.client_id @@ -45,9 +44,9 @@ def track_and_log(event_name, repo, **kwargs): } # Publish the user event (if applicable) - profile.debug('Checking publishing %s to the user events system', event_name) + logger.debug('Checking publishing %s to the user events system', event_name) if authenticated_user: - profile.debug('Publishing %s to the user events system', event_name) + logger.debug('Publishing %s to the user events system', event_name) user_event_data = { 'action': event_name, 'repository': repository, @@ -58,14 +57,14 @@ def track_and_log(event_name, repo, **kwargs): event.publish_event_data('docker-cli', user_event_data) # Save the action to mixpanel. - profile.debug('Logging the %s to Mixpanel', event_name) + logger.debug('Logging the %s to Mixpanel', event_name) analytics.track(analytics_id, event_name, extra_params) # Log the action to the database. - profile.debug('Logging the %s to logs system', event_name) + logger.debug('Logging the %s to logs system', event_name) model.log_action(event_name, namespace, performer=authenticated_user, ip=request.remote_addr, metadata=metadata, repository=repo) - profile.debug('Track and log of %s complete', event_name) + logger.debug('Track and log of %s complete', event_name) diff --git a/requirements-nover.txt b/requirements-nover.txt index f010531a2..9b8707870 100644 --- a/requirements-nover.txt +++ b/requirements-nover.txt @@ -40,6 +40,7 @@ git+https://github.com/DevTable/aniso8601-fake.git git+https://github.com/DevTable/anunidecode.git git+https://github.com/DevTable/avatar-generator.git git+https://github.com/DevTable/pygithub.git +git+https://github.com/DevTable/container-cloud-config.git git+https://github.com/jplana/python-etcd.git gipc pyOpenSSL diff --git a/requirements.txt b/requirements.txt index f43c700c8..4e51c6245 100644 --- a/requirements.txt +++ b/requirements.txt @@ -64,5 +64,6 @@ git+https://github.com/DevTable/aniso8601-fake.git git+https://github.com/DevTable/anunidecode.git git+https://github.com/DevTable/avatar-generator.git git+https://github.com/DevTable/pygithub.git +git+https://github.com/DevTable/container-cloud-config.git git+https://github.com/NateFerrero/oauth2lib.git git+https://github.com/jplana/python-etcd.git diff --git a/test/test_imagetree.py b/test/test_imagetree.py new file mode 100644 index 000000000..d72eb6505 --- /dev/null +++ b/test/test_imagetree.py @@ -0,0 +1,96 @@ +import unittest + +from app import app +from util.imagetree import ImageTree +from initdb import setup_database_for_testing, finished_database_for_testing +from data import model + +NAMESPACE = 'devtable' +SIMPLE_REPO = 'simple' +COMPLEX_REPO = 'complex' + +class TestImageTree(unittest.TestCase): + def setUp(self): + setup_database_for_testing(self) + self.app = app.test_client() + self.ctx = app.test_request_context() + self.ctx.__enter__() + + def tearDown(self): + finished_database_for_testing(self) + self.ctx.__exit__(True, None, None) + + def _get_base_image(self, all_images): + for image in all_images: + if image.ancestors == '/': + return image + + return None + + def test_longest_path_simple_repo(self): + all_images = list(model.get_repository_images(NAMESPACE, SIMPLE_REPO)) + all_tags = list(model.list_repository_tags(NAMESPACE, SIMPLE_REPO)) + tree = ImageTree(all_images, all_tags) + + base_image = self._get_base_image(all_images) + tag_image = all_tags[0].image + + def checker(index, image): + return True + + ancestors = tag_image.ancestors.split('/')[2:-1] # Skip the first image. + result = tree.find_longest_path(base_image.id, checker) + self.assertEquals(3, len(result)) + for index in range(0, 2): + self.assertEquals(int(ancestors[index]), result[index].id) + + self.assertEquals('latest', tree.tag_containing_image(result[-1])) + + def test_longest_path_complex_repo(self): + all_images = list(model.get_repository_images(NAMESPACE, COMPLEX_REPO)) + all_tags = list(model.list_repository_tags(NAMESPACE, COMPLEX_REPO)) + tree = ImageTree(all_images, all_tags) + + base_image = self._get_base_image(all_images) + + def checker(index, image): + return True + + result = tree.find_longest_path(base_image.id, checker) + self.assertEquals(4, len(result)) + self.assertEquals('v2.0', tree.tag_containing_image(result[-1])) + + def test_filtering(self): + all_images = list(model.get_repository_images(NAMESPACE, COMPLEX_REPO)) + all_tags = list(model.list_repository_tags(NAMESPACE, COMPLEX_REPO)) + tree = ImageTree(all_images, all_tags, base_filter=1245) + + base_image = self._get_base_image(all_images) + + def checker(index, image): + return True + + result = tree.find_longest_path(base_image.id, checker) + self.assertEquals(0, len(result)) + + def test_find_tag_parent_image(self): + all_images = list(model.get_repository_images(NAMESPACE, COMPLEX_REPO)) + all_tags = list(model.list_repository_tags(NAMESPACE, COMPLEX_REPO)) + tree = ImageTree(all_images, all_tags) + + base_image = self._get_base_image(all_images) + + def checker(index, image): + return True + + result = tree.find_longest_path(base_image.id, checker) + self.assertEquals(4, len(result)) + + # Only use the first two images. They don't have tags, but the method should + # still return the tag that contains them. + self.assertEquals('v2.0', tree.tag_containing_image(result[0])) + + +if __name__ == '__main__': + unittest.main() + diff --git a/util/imagetree.py b/util/imagetree.py new file mode 100644 index 000000000..39cd5c3c9 --- /dev/null +++ b/util/imagetree.py @@ -0,0 +1,103 @@ +class ImageTreeNode(object): + """ A node in the image tree. """ + def __init__(self, image): + self.image = image + self.parent = None + self.children = [] + self.tags = [] + + def add_child(self, child): + self.children.append(child) + child.parent = self + + def add_tag(self, tag): + self.tags.append(tag) + + +class ImageTree(object): + """ In-memory tree for easy traversal and lookup of images in a repository. """ + + def __init__(self, all_images, all_tags, base_filter=None): + self._tag_map = {} + self._image_map = {} + + self._build(all_images, all_tags, base_filter) + + def _build(self, all_images, all_tags, base_filter=None): + # Build nodes for each of the images. + for image in all_images: + ancestors = image.ancestors.split('/')[1:-1] + + # Filter any unneeded images. + if base_filter is not None: + if image.id != base_filter and not str(base_filter) in ancestors: + continue + + self._image_map[image.id] = ImageTreeNode(image) + + # Connect the nodes to their parents. + for image_node in self._image_map.values(): + image = image_node.image + parent_image_id = image.ancestors.split('/')[-2] if image.ancestors else None + if not parent_image_id: + continue + + parent_node = self._image_map.get(int(parent_image_id)) + if parent_node is not None: + parent_node.add_child(image_node) + + # Build the tag map. + for tag in all_tags: + image_node = self._image_map.get(tag.image.id) + if not image_node: + continue + + self._tag_map = image_node + image_node.add_tag(tag.name) + + + def find_longest_path(self, image_id, checker): + """ Returns a list of images representing the longest path that matches the given + checker function, starting from the given image_id *exclusive*. + """ + start_node = self._image_map.get(image_id) + if not start_node: + return [] + + return self._find_longest_path(start_node, checker, -1)[1:] + + + def _find_longest_path(self, image_node, checker, index): + found_path = [] + + for child_node in image_node.children: + if not checker(index + 1, child_node.image): + continue + + found = self._find_longest_path(child_node, checker, index + 1) + if found and len(found) > len(found_path): + found_path = found + + return [image_node.image] + found_path + + + def tag_containing_image(self, image): + """ Returns the name of the closest tag containing the given image. """ + if not image: + return None + + # Check the current image for a tag. + image_node = self._image_map.get(image.id) + if image_node is None: + return None + + if image_node.tags: + return image_node.tags[0] + + # Check any deriving images for a tag. + for child_node in image_node.children: + found = self.tag_containing_image(child_node.image) + if found is not None: + return found + + return None \ No newline at end of file