diff --git a/buildman/component/buildcomponent.py b/buildman/component/buildcomponent.py index d518d3453..ca56e6926 100644 --- a/buildman/component/buildcomponent.py +++ b/buildman/component/buildcomponent.py @@ -20,7 +20,7 @@ HEARTBEAT_DELTA = datetime.timedelta(seconds=30) HEARTBEAT_TIMEOUT = 10 INITIAL_TIMEOUT = 25 -SUPPORTED_WORKER_VERSIONS = ['0.1-beta'] +SUPPORTED_WORKER_VERSIONS = ['0.1-beta', '0.2-beta'] logger = logging.getLogger(__name__) @@ -46,6 +46,7 @@ class BuildComponent(BaseComponent): self._current_job = None self._build_status = None self._image_info = None + self._worker_version = None BaseComponent.__init__(self, config, **kwargs) @@ -55,6 +56,7 @@ class BuildComponent(BaseComponent): def onJoin(self, details): logger.debug('Registering methods and listeners for component %s', self.builder_realm) yield From(self.register(self._on_ready, u'io.quay.buildworker.ready')) + yield From(self.register(self._check_cache, u'io.quay.buildworker.checkcache')) yield From(self.register(self._ping, u'io.quay.buildworker.ping')) yield From(self.subscribe(self._on_heartbeat, 'io.quay.builder.heartbeat')) yield From(self.subscribe(self._on_log_message, 'io.quay.builder.logmessage')) @@ -73,42 +75,45 @@ class BuildComponent(BaseComponent): self._set_status(ComponentStatus.BUILDING) - # Retrieve the job's buildpack. + base_image_information = {} + build_config = build_job.build_config() + + # Retrieve the job's buildpack url. buildpack_url = self.user_files.get_file_url(build_job.repo_build().resource_key, requires_cors=False) - logger.debug('Retreiving build package: %s', buildpack_url) - buildpack = None - try: - buildpack = BuildPackage.from_url(buildpack_url) - except BuildPackageException as bpe: - self._build_failure('Could not retrieve build package', bpe) - return + # TODO(jschorr): Remove this block andthe buildpack package once we move everyone over + # to version 0.2 or higher + if self._worker_version == '0.1-beta': + logger.debug('Retreiving build package: %s', buildpack_url) + buildpack = None + try: + buildpack = BuildPackage.from_url(buildpack_url) + except BuildPackageException as bpe: + self._build_failure('Could not retrieve build package', bpe) + return - # Extract the base image information from the Dockerfile. - parsed_dockerfile = None - logger.debug('Parsing dockerfile') + # Extract the base image information from the Dockerfile. + parsed_dockerfile = None + logger.debug('Parsing dockerfile') - build_config = build_job.build_config() - try: - parsed_dockerfile = buildpack.parse_dockerfile(build_config.get('build_subdir')) - except BuildPackageException as bpe: - self._build_failure('Could not find Dockerfile in build package', bpe) - return + try: + parsed_dockerfile = buildpack.parse_dockerfile(build_config.get('build_subdir')) + except BuildPackageException as bpe: + self._build_failure('Could not find Dockerfile in build package', bpe) + return - image_and_tag_tuple = parsed_dockerfile.get_image_and_tag() - if image_and_tag_tuple is None or image_and_tag_tuple[0] is None: - self._build_failure('Missing FROM line in Dockerfile') - return + image_and_tag_tuple = parsed_dockerfile.get_image_and_tag() + if image_and_tag_tuple is None or image_and_tag_tuple[0] is None: + self._build_failure('Missing FROM line in Dockerfile') + return - base_image_information = { - 'repository': image_and_tag_tuple[0], - 'tag': image_and_tag_tuple[1] - } + base_image_information['repository'] = image_and_tag_tuple[0] + base_image_information['tag'] = image_and_tag_tuple[1] - # Extract the number of steps from the Dockerfile. - with self._build_status as status_dict: - status_dict['total_commands'] = len(parsed_dockerfile.commands) + # Extract the number of steps from the Dockerfile. + with self._build_status as status_dict: + status_dict['total_commands'] = len(parsed_dockerfile.commands) # Add the pull robot information, if any. if build_config.get('pull_credentials') is not None: @@ -128,20 +133,20 @@ class BuildComponent(BaseComponent): # push_token: The token to use to push the built image. # tag_names: The name(s) of the tag(s) for the newly built image. # base_image: The image name and credentials to use to conduct the base image pull. - # repository: The repository to pull. - # tag: The tag to pull. + # repository: The repository to pull (DEPRECATED) + # tag: The tag to pull (DEPRECATED) # username: The username for pulling the base image (if any). # password: The password for pulling the base image (if any). build_arguments = { 'build_package': buildpack_url, 'sub_directory': build_config.get('build_subdir', ''), 'repository': repository_name, - 'registry': self.server_hostname, + 'registry': '10.0.2.2:5000' or self.server_hostname, 'pull_token': build_job.repo_build().access_token.code, 'push_token': build_job.repo_build().access_token.code, 'tag_names': build_config.get('docker_tags', ['latest']), 'base_image': base_image_information, - 'cached_tag': build_job.determine_cached_tag() or '' + 'cached_tag': build_job.determine_cached_tag() or '' # Remove after V0.1-beta is deprecated } # Invoke the build. @@ -283,6 +288,15 @@ class BuildComponent(BaseComponent): """ Ping pong. """ return 'pong' + def _check_cache(self, cache_commands, base_image_name, base_image_tag, base_image_id): + with self._build_status as status_dict: + status_dict['total_commands'] = len(cache_commands) + 1 + + logger.debug('Checking cache on realm %s. Base image: %s:%s (%s)', self.builder_realm, + base_image_name, base_image_tag, base_image_id) + + return self._current_job.determine_cached_tag(base_image_id, cache_commands) or '' + def _on_ready(self, token, version): if not version in SUPPORTED_WORKER_VERSIONS: logger.warning('Build component (token "%s") is running an out-of-date version: %s', version) @@ -296,6 +310,7 @@ class BuildComponent(BaseComponent): logger.warning('Builder token mismatch. Expected: "%s". Found: "%s"', self.expected_token, token) return False + self._worker_version = version self._set_status(ComponentStatus.RUNNING) # Start the heartbeat check and updating loop. diff --git a/buildman/jobutil/buildjob.py b/buildman/jobutil/buildjob.py index 6ec02a830..63d544790 100644 --- a/buildman/jobutil/buildjob.py +++ b/buildman/jobutil/buildjob.py @@ -31,10 +31,58 @@ class BuildJob(object): 'Could not parse repository build job config with ID %s' % self._job_details['build_uuid'] ) - def determine_cached_tag(self): + def determine_cached_tag(self, base_image_id=None, cache_comments=None): """ Returns the tag to pull to prime the cache or None if none. """ - # TODO(jschorr): Change this to use the more complicated caching rules, once we have caching - # be a pull of things besides the constructed tags. + cached_tag = None + if base_image_id and cache_comments: + cached_tag = self._determine_cached_tag_by_comments(base_image_id, cache_comments) + + if not cached_tag: + cached_tag = self._determine_cached_tag_by_tag() + + return cached_tag + + def _determine_cached_tag_by_comments(self, base_image_id, cache_commands): + """ Determines the tag to use for priming the cache for this build job, by matching commands + starting at the given base_image_id. This mimics the Docker cache checking, so it should, + in theory, provide "perfect" caching. + """ + # Lookup the base image in the repository. If it doesn't exist, nothing more to do. + repo_namespace = self._repo_build.repository.namespace_user.username + repo_name = self._repo_build.repository.name + + repository = model.get_repository(repo_namespace, repo_name) + if repository is None: + # Should never happen, but just to be sure. + return None + + current_image = model.get_image(repository, base_image_id) + if current_image is None: + return None + + # For each cache comment, find a child image that matches the command. + for cache_command in cache_commands: + print current_image.docker_image_id + + current_image = model.find_child_image(repository, current_image, cache_command) + if current_image is None: + return None + + # Find a tag associated with the image, if any. + # TODO(jschorr): We should just return the image ID instead of a parent tag, OR we should + # make this more efficient. + for tag in model.list_repository_tags(repo_namespace, repo_name): + tag_image = tag.image + ancestor_index = '/%s/' % current_image.id + if ancestor_index in tag_image.ancestors: + return tag.name + + return None + + def _determine_cached_tag_by_tag(self): + """ Determines the cached tag by looking for one of the tags being built, and seeing if it + exists in the repository. This is a fallback for when no comment information is available. + """ tags = self._build_config.get('docker_tags', ['latest']) existing_tags = model.list_repository_tags(self._repo_build.repository.namespace_user.username, self._repo_build.repository.name) diff --git a/buildman/jobutil/workererror.py b/buildman/jobutil/workererror.py index 8271976e4..580d46f4d 100644 --- a/buildman/jobutil/workererror.py +++ b/buildman/jobutil/workererror.py @@ -57,6 +57,11 @@ class WorkerError(object): 'io.quay.builder.missingorinvalidargument': { 'message': 'Missing required arguments for builder', 'is_internal': True + }, + + 'io.quay.builder.cachelookupissue': { + 'message': 'Error checking for a cached tag', + 'is_internal': True } } diff --git a/data/model/legacy.py b/data/model/legacy.py index a5c779871..225b5bf2e 100644 --- a/data/model/legacy.py +++ b/data/model/legacy.py @@ -1089,6 +1089,25 @@ def get_repository(namespace_name, repository_name): return None +def get_image(repo, dockerfile_id): + try: + return Image.get(Image.docker_image_id == dockerfile_id, Image.repository == repo) + except Image.DoesNotExist: + return None + + +def find_child_image(repo, parent_image, command): + try: + return (Image.select() + .join(ImageStorage) + .switch(Image) + .where(Image.ancestors % '%/' + parent_image.id + '/%', + ImageStorage.command == command) + .get()) + except Image.DoesNotExist: + return None + + def get_repo_image(namespace_name, repository_name, docker_image_id): def limit_to_image_id(query): return query.where(Image.docker_image_id == docker_image_id).limit(1) @@ -1645,7 +1664,6 @@ def get_tag_image(namespace_name, repository_name, tag_name): else: return images[0] - def get_image_by_id(namespace_name, repository_name, docker_image_id): image = get_repo_image_extended(namespace_name, repository_name, docker_image_id) if not image: