import logging import os.path import base64 from app import app, github_trigger from jsonschema import validate from buildtrigger.triggerutil import (RepositoryReadException, TriggerActivationException, TriggerDeactivationException, TriggerStartException, EmptyRepositoryException, ValidationRequestException, SkipRequestException, InvalidPayloadException, determine_build_ref, raise_if_skipped_build, find_matching_branches) from buildtrigger.basehandler import BuildTriggerHandler from util.security.ssh import generate_ssh_keypair from util.dict_wrappers import JSONPathDict, SafeDictSetter from github import (Github, UnknownObjectException, GithubException, BadCredentialsException as GitHubBadCredentialsException) logger = logging.getLogger(__name__) GITHUB_WEBHOOK_PAYLOAD_SCHEMA = { 'type': 'object', 'properties': { 'ref': { 'type': 'string', }, 'head_commit': { 'type': 'object', 'properties': { 'id': { 'type': 'string', }, 'url': { 'type': 'string', }, 'message': { 'type': 'string', }, 'timestamp': { 'type': 'string', }, 'author': { 'type': 'object', 'properties': { 'username': { 'type': 'string' }, 'html_url': { 'type': 'string' }, 'avatar_url': { 'type': 'string' }, }, 'required': ['username'], }, 'committer': { 'type': 'object', 'properties': { 'username': { 'type': 'string' }, 'html_url': { 'type': 'string' }, 'avatar_url': { 'type': 'string' }, }, 'required': ['username'], }, }, 'required': ['id', 'url', 'message', 'timestamp'], }, 'repository': { 'type': 'object', 'properties': { 'ssh_url': { 'type': 'string', }, }, 'required': ['ssh_url'], }, }, 'required': ['ref', 'head_commit', 'repository'], } def get_transformed_webhook_payload(gh_payload, default_branch=None, lookup_user=None): """ Returns the GitHub webhook JSON payload transformed into our own payload format. If the gh_payload is not valid, returns None. """ try: validate(gh_payload, GITHUB_WEBHOOK_PAYLOAD_SCHEMA) except Exception as exc: raise InvalidPayloadException(exc.message) payload = JSONPathDict(gh_payload) config = SafeDictSetter() config['commit'] = payload['head_commit.id'] config['ref'] = payload['ref'] config['default_branch'] = default_branch config['git_url'] = payload['repository.ssh_url'] config['commit_info.url'] = payload['head_commit.url'] config['commit_info.message'] = payload['head_commit.message'] config['commit_info.date'] = payload['head_commit.timestamp'] config['commit_info.author.username'] = payload['head_commit.author.username'] config['commit_info.author.url'] = payload.get('head_commit.author.html_url') config['commit_info.author.avatar_url'] = payload.get('head_commit.author.avatar_url') config['commit_info.committer.username'] = payload.get('head_commit.committer.username') config['commit_info.committer.url'] = payload.get('head_commit.committer.html_url') config['commit_info.committer.avatar_url'] = payload.get('head_commit.committer.avatar_url') # Note: GitHub doesn't always return the extra information for users, so we do the lookup # manually if possible. if lookup_user and not payload.get('head_commit.author.html_url'): author_info = lookup_user(payload['head_commit.author.username']) if author_info: config['commit_info.author.url'] = author_info['html_url'] config['commit_info.author.avatar_url'] = author_info['avatar_url'] if (lookup_user and payload.get('head_commit.committer.username') and not payload.get('head_commit.committer.html_url')): committer_info = lookup_user(payload['head_commit.committer.username']) if committer_info: config['commit_info.committer.url'] = committer_info['html_url'] config['commit_info.committer.avatar_url'] = committer_info['avatar_url'] return config.dict_value() class GithubBuildTrigger(BuildTriggerHandler): """ BuildTrigger for GitHub that uses the archive API and buildpacks. """ def _get_client(self): """ Returns an authenticated client for talking to the GitHub API. """ return Github(self.auth_token, base_url=github_trigger.api_endpoint(), client_id=github_trigger.client_id(), client_secret=github_trigger.client_secret()) @classmethod def service_name(cls): return 'github' def is_active(self): return 'hook_id' in self.config def get_repository_url(self): source = self.config['build_source'] return github_trigger.get_public_url(source) def activate(self, standard_webhook_url): config = self.config new_build_source = config['build_source'] gh_client = self._get_client() # Find the GitHub repository. try: gh_repo = gh_client.get_repo(new_build_source) except UnknownObjectException: msg = 'Unable to find GitHub repository for source: %s' % new_build_source raise TriggerActivationException(msg) # Add a deploy key to the GitHub repository. public_key, private_key = generate_ssh_keypair() config['credentials'] = [ { 'name': 'SSH Public Key', 'value': public_key, }, ] try: deploy_key = gh_repo.create_key('%s Builder' % app.config['REGISTRY_TITLE'], public_key) config['deploy_key_id'] = deploy_key.id except GithubException: msg = 'Unable to add deploy key to repository: %s' % new_build_source raise TriggerActivationException(msg) # Add the webhook to the GitHub repository. webhook_config = { 'url': standard_webhook_url, 'content_type': 'json', } try: hook = gh_repo.create_hook('web', webhook_config) config['hook_id'] = hook.id config['master_branch'] = gh_repo.default_branch except GithubException: msg = 'Unable to create webhook on repository: %s' % new_build_source raise TriggerActivationException(msg) return config, {'private_key': private_key} def deactivate(self): config = self.config gh_client = self._get_client() # Find the GitHub repository. try: repo = gh_client.get_repo(config['build_source']) except UnknownObjectException: msg = 'Unable to find GitHub repository for source: %s' % config['build_source'] raise TriggerDeactivationException(msg) except GitHubBadCredentialsException: msg = 'Unable to access repository to disable trigger' raise TriggerDeactivationException(msg) # If the trigger uses a deploy key, remove it. try: if config['deploy_key_id']: deploy_key = repo.get_key(config['deploy_key_id']) deploy_key.delete() except KeyError: # There was no config['deploy_key_id'], thus this is an old trigger without a deploy key. pass except GithubException: msg = 'Unable to remove deploy key: %s' % config['deploy_key_id'] raise TriggerDeactivationException(msg) # Remove the webhook. try: hook = repo.get_hook(config['hook_id']) hook.delete() except GithubException: msg = 'Unable to remove hook: %s' % config['hook_id'] raise TriggerDeactivationException(msg) config.pop('hook_id', None) self.config = config return config def list_build_sources(self): gh_client = self._get_client() usr = gh_client.get_user() try: repos = usr.get_repos() except GithubException: raise RepositoryReadException('Unable to list user repositories') namespaces = {} has_non_personal = False for repository in repos: namespace = repository.owner.login if not namespace in namespaces: is_personal_repo = namespace == usr.login namespaces[namespace] = { 'personal': is_personal_repo, 'repos': [], 'info': { 'name': namespace, 'avatar_url': repository.owner.avatar_url } } if not is_personal_repo: has_non_personal = True namespaces[namespace]['repos'].append(repository.full_name) # In older versions of GitHub Enterprise, the get_repos call above does not # return any non-personal repositories. In that case, we need to lookup the # repositories manually. # TODO: Remove this once we no longer support GHE versions <= 2.1 if not has_non_personal: for org in usr.get_orgs(): repo_list = [repo.full_name for repo in org.get_repos(type='member')] namespaces[org.name] = { 'personal': False, 'repos': repo_list, 'info': { 'name': org.name or org.login, 'avatar_url': org.avatar_url } } entries = list(namespaces.values()) entries.sort(key=lambda e: e['info']['name']) return entries def list_build_subdirs(self): config = self.config gh_client = self._get_client() source = config['build_source'] try: repo = gh_client.get_repo(source) # Find the first matching branch. repo_branches = self.list_field_values('branch_name') or [] branches = find_matching_branches(config, repo_branches) branches = branches or [repo.default_branch or 'master'] default_commit = repo.get_branch(branches[0]).commit commit_tree = repo.get_git_tree(default_commit.sha, recursive=True) return [os.path.dirname(elem.path) for elem in commit_tree.tree if (elem.type == u'blob' and os.path.basename(elem.path) == u'Dockerfile')] except GithubException as ghe: message = ghe.data.get('message', 'Unable to list contents of repository: %s' % source) if message == 'Branch not found': raise EmptyRepositoryException() raise RepositoryReadException(message) def load_dockerfile_contents(self): config = self.config gh_client = self._get_client() source = config['build_source'] path = self.get_dockerfile_path() try: repo = gh_client.get_repo(source) file_info = repo.get_file_contents(path) if file_info is None: return None content = file_info.content if file_info.encoding == 'base64': content = base64.b64decode(content) return content except GithubException as ghe: message = ghe.data.get('message', 'Unable to read Dockerfile: %s' % source) raise RepositoryReadException(message) def list_field_values(self, field_name, limit=None): if field_name == 'refs': branches = self.list_field_values('branch_name') tags = self.list_field_values('tag_name') return ([{'kind': 'branch', 'name': b} for b in branches] + [{'kind': 'tag', 'name': tag} for tag in tags]) config = self.config if field_name == 'tag_name': try: gh_client = self._get_client() source = config['build_source'] repo = gh_client.get_repo(source) gh_tags = repo.get_tags() if limit: gh_tags = repo.get_tags()[0:limit] return [tag.name for tag in gh_tags] except GitHubBadCredentialsException: return [] except GithubException: logger.exception("Got GitHub Exception when trying to list tags for trigger %s", self.trigger.id) return [] if field_name == 'branch_name': try: gh_client = self._get_client() source = config['build_source'] repo = gh_client.get_repo(source) gh_branches = repo.get_branches() if limit: gh_branches = repo.get_branches()[0:limit] branches = [branch.name for branch in gh_branches] if not repo.default_branch in branches: branches.insert(0, repo.default_branch) if branches[0] != repo.default_branch: branches.remove(repo.default_branch) branches.insert(0, repo.default_branch) return branches except GitHubBadCredentialsException: return ['master'] except GithubException: logger.exception("Got GitHub Exception when trying to list branches for trigger %s", self.trigger.id) return ['master'] return None @classmethod def _build_metadata_for_commit(cls, commit_sha, ref, repo): try: commit = repo.get_commit(commit_sha) except GithubException: logger.exception('Could not load commit information from GitHub') return None commit_info = { 'url': commit.html_url, 'message': commit.commit.message, 'date': commit.last_modified } if commit.author: commit_info['author'] = { 'username': commit.author.login, 'avatar_url': commit.author.avatar_url, 'url': commit.author.html_url } if commit.committer: commit_info['committer'] = { 'username': commit.committer.login, 'avatar_url': commit.committer.avatar_url, 'url': commit.committer.html_url } return { 'commit': commit_sha, 'ref': ref, 'default_branch': repo.default_branch, 'git_url': repo.ssh_url, 'commit_info': commit_info } def manual_start(self, run_parameters=None): config = self.config source = config['build_source'] try: gh_client = self._get_client() repo = gh_client.get_repo(source) default_branch = repo.default_branch except GithubException as ghe: raise TriggerStartException(ghe.data['message']) def get_branch_sha(branch_name): branch = repo.get_branch(branch_name) return branch.commit.sha def get_tag_sha(tag_name): tags = {tag.name: tag for tag in repo.get_tags()} if not tag_name in tags: raise TriggerStartException('Could not find tag in repository') return tags[tag_name].commit.sha # Find the branch or tag to build. (commit_sha, ref) = determine_build_ref(run_parameters, get_branch_sha, get_tag_sha, default_branch) metadata = GithubBuildTrigger._build_metadata_for_commit(commit_sha, ref, repo) return self.prepare_build(metadata, is_manual=True) def lookup_user(self, username): try: gh_client = self._get_client() user = gh_client.get_user(username) return { 'html_url': user.html_url, 'avatar_url': user.avatar_url } except GithubException: return None def handle_trigger_request(self, request): # Check the payload to see if we should skip it based on the lack of a head_commit. payload = request.get_json() # This is for GitHub's probing/testing. if 'zen' in payload: raise ValidationRequestException() # Lookup the default branch for the repository. default_branch = None lookup_user = None try: repo_full_name = '%s/%s' % (payload['repository']['owner']['name'], payload['repository']['name']) gh_client = self._get_client() repo = gh_client.get_repo(repo_full_name) default_branch = repo.default_branch lookup_user = self.lookup_user except GitHubBadCredentialsException: logger.exception('Got GitHub Credentials Exception; Cannot lookup default branch') except GithubException: logger.exception("Got GitHub Exception when trying to start trigger %s", self.trigger.id) raise SkipRequestException() logger.debug('GitHub trigger payload %s', payload) metadata = get_transformed_webhook_payload(payload, default_branch=default_branch, lookup_user=lookup_user) prepared = self.prepare_build(metadata) # Check if we should skip this build. raise_if_skipped_build(prepared) return prepared