import logging from peewee import JOIN_LEFT_OUTER, fn from datetime import timedelta, datetime from data.model import (DataModelException, tag, db_transaction, storage, image, permission, _basequery) from data.database import (Repository, Namespace, RepositoryTag, Star, Image, ImageStorage, User, Visibility, RepositoryPermission, TupleSelector, RepositoryActionCount, Role, RepositoryAuthorizedEmail, db_for_update) logger = logging.getLogger(__name__) def create_repository(namespace, name, creating_user, visibility='private'): private = Visibility.get(name=visibility) namespace_user = User.get(username=namespace) repo = Repository.create(name=name, visibility=private, namespace_user=namespace_user) admin = Role.get(name='admin') if creating_user and not creating_user.organization: RepositoryPermission.create(user=creating_user, repository=repo, role=admin) if creating_user.username != namespace: # Permission prototypes only work for orgs permission.apply_default_permissions(repo, creating_user) return repo def get_repository(namespace_name, repository_name): try: return _basequery.get_existing_repository(namespace_name, repository_name) except Repository.DoesNotExist: return None def _purge_all_repository_tags(namespace_name, repository_name): """ Immediately purge all repository tags without respecting the lifeline procedure """ try: repo = _basequery.get_existing_repository(namespace_name, repository_name) except Repository.DoesNotExist: raise DataModelException('Invalid repository \'%s/%s\'' % (namespace_name, repository_name)) RepositoryTag.delete().where(RepositoryTag.repository == repo.id).execute() def purge_repository(namespace_name, repository_name): # Delete all tags to allow gc to reclaim storage _purge_all_repository_tags(namespace_name, repository_name) # Gc to remove the images and storage garbage_collect_repository(namespace_name, repository_name) # Delete the rest of the repository metadata fetched = _basequery.get_existing_repository(namespace_name, repository_name) fetched.delete_instance(recursive=True, delete_nullable=False) def garbage_collect_repository(namespace_name, repository_name): storage_id_whitelist = {} tag.garbage_collect_tags(namespace_name, repository_name) with db_transaction(): # TODO (jake): We could probably select this and all the images in a single query using # a different kind of join. # Get a list of all images used by tags in the repository tag_query = (RepositoryTag .select(RepositoryTag, Image, ImageStorage) .join(Image) .join(ImageStorage, JOIN_LEFT_OUTER) .switch(RepositoryTag) .join(Repository) .join(Namespace, on=(Repository.namespace_user == Namespace.id)) .where(Repository.name == repository_name, Namespace.username == namespace_name)) referenced_ancestors = set() for one_tag in tag_query: # The ancestor list is in the format '/1/2/3/', extract just the ids ancestor_id_strings = one_tag.image.ancestors.split('/')[1:-1] ancestor_list = [int(img_id_str) for img_id_str in ancestor_id_strings] referenced_ancestors = referenced_ancestors.union(set(ancestor_list)) referenced_ancestors.add(one_tag.image.id) all_repo_images = image.get_repository_images(namespace_name, repository_name) all_images = {int(img.id): img for img in all_repo_images} to_remove = set(all_images.keys()).difference(referenced_ancestors) if len(to_remove) > 0: logger.info('Cleaning up unreferenced images: %s', to_remove) storage_id_whitelist = {all_images[to_remove_id].storage.id for to_remove_id in to_remove} Image.delete().where(Image.id << list(to_remove)).execute() if len(to_remove) > 0: logger.info('Garbage collecting storage for images: %s', to_remove) storage.garbage_collect_storage(storage_id_whitelist) def star_repository(user, repository): """ Stars a repository. """ star = Star.create(user=user.id, repository=repository.id) star.save() def unstar_repository(user, repository): """ Unstars a repository. """ try: (Star .delete() .where(Star.repository == repository.id, Star.user == user.id) .execute()) except Star.DoesNotExist: raise DataModelException('Star not found.') def get_user_starred_repositories(user, limit=None, page=None): """ Retrieves all of the repositories a user has starred. """ query = (Repository .select(Repository, User, Visibility) .join(Star) .switch(Repository) .join(User) .switch(Repository) .join(Visibility) .where(Star.user == user)) if page and limit: query = query.paginate(page, limit) elif limit: query = query.limit(limit) return query def repository_is_starred(user, repository): """ Determines whether a user has starred a repository or not. """ try: (Star .select() .where(Star.repository == repository.id, Star.user == user.id) .get()) return True except Star.DoesNotExist: return False def get_when_last_modified(repository_ids): tuples = (RepositoryTag .select(RepositoryTag.repository, fn.Max(RepositoryTag.lifetime_start_ts)) .where(RepositoryTag.repository << repository_ids) .group_by(RepositoryTag.repository) .tuples()) last_modified_map = {} for record in tuples: last_modified_map[record[0]] = record[1] return last_modified_map def get_action_counts(repository_ids): # Filter the join to recent entries only. last_week = datetime.now() - timedelta(weeks=1) tuples = (RepositoryActionCount .select(RepositoryActionCount.repository, fn.Sum(RepositoryActionCount.count)) .where(RepositoryActionCount.repository << repository_ids) .where(RepositoryActionCount.date >= last_week) .group_by(RepositoryActionCount.repository) .tuples()) action_count_map = {} for record in tuples: action_count_map[record[0]] = record[1] return action_count_map def get_visible_repositories(username=None, include_public=True, page=None, limit=None, namespace=None, namespace_only=False): fields = [Repository.name, Repository.id, Repository.description, Visibility.name, Namespace.username] query = _visible_repository_query(username=username, include_public=include_public, page=page, limit=limit, namespace=namespace, select_models=fields) if limit: query = query.limit(limit) if namespace and namespace_only: query = query.where(Namespace.username == namespace) return TupleSelector(query, fields) def _visible_repository_query(username=None, include_public=True, limit=None, page=None, namespace=None, select_models=[]): query = (Repository .select(*select_models) # MySQL/RDS complains is there are selected models for counts. .distinct() .join(Visibility) .switch(Repository) .join(Namespace, on=(Repository.namespace_user == Namespace.id)) .switch(Repository) .join(RepositoryPermission, JOIN_LEFT_OUTER)) query = _basequery.filter_to_repos_for_user(query, username, namespace, include_public) if page: query = query.paginate(page, limit) elif limit: query = query.limit(limit) return query def get_sorted_matching_repositories(prefix, only_public, checker, limit=10): """ Returns repositories matching the given prefix string and passing the given checker function. """ last_week = datetime.now() - timedelta(weeks=1) results = [] existing_ids = [] def get_search_results(search_clause, with_count=False): if len(results) >= limit: return select_items = [Repository, Namespace] if with_count: select_items.append(fn.Sum(RepositoryActionCount.count).alias('count')) query = (Repository .select(*select_items) .join(Namespace, JOIN_LEFT_OUTER, on=(Namespace.id == Repository.namespace_user)) .switch(Repository) .where(search_clause) .group_by(Repository, Namespace)) if only_public: query = query.where(Repository.visibility == _basequery.get_public_repo_visibility()) if existing_ids: query = query.where(~(Repository.id << existing_ids)) if with_count: query = (query .switch(Repository) .join(RepositoryActionCount) .where(RepositoryActionCount.date >= last_week) .order_by(fn.Sum(RepositoryActionCount.count).desc())) for result in query: if len(results) >= limit: return results # Note: We compare IDs here, instead of objects, because calling .visibility on the # Repository will kick off a new SQL query to retrieve that visibility enum value. We don't # join the visibility table in SQL, as well, because it is ungodly slow in MySQL :-/ result.is_public = result.visibility_id == _basequery.get_public_repo_visibility().id result.count = result.count if with_count else 0 if not checker(result): continue results.append(result) existing_ids.append(result.id) # For performance reasons, we conduct the repo name and repo namespace searches on their # own. This also affords us the ability to give higher precedence to repository names matching # over namespaces, which is semantically correct. get_search_results(Repository.name ** (prefix + '%'), with_count=True) get_search_results(Repository.name ** (prefix + '%'), with_count=False) get_search_results(Namespace.username ** (prefix + '%'), with_count=True) get_search_results(Namespace.username ** (prefix + '%'), with_count=False) return results def get_matching_repositories(repo_term, username=None, limit=10, include_public=True): namespace_term = repo_term name_term = repo_term visible = _visible_repository_query(username, include_public=include_public) search_clauses = (Repository.name ** ('%' + name_term + '%') | Namespace.username ** ('%' + namespace_term + '%')) # Handle the case where the user has already entered a namespace path. if repo_term.find('/') > 0: parts = repo_term.split('/', 1) namespace_term = '/'.join(parts[:-1]) name_term = parts[-1] search_clauses = (Repository.name ** ('%' + name_term + '%') & Namespace.username ** ('%' + namespace_term + '%')) return visible.where(search_clauses).limit(limit) def lookup_repository(repo_id): try: return Repository.get(Repository.id == repo_id) except Repository.DoesNotExist: return None def is_repository_public(repository): return repository.visibility == _basequery.get_public_repo_visibility() def repository_is_public(namespace_name, repository_name): try: (Repository .select() .join(Namespace, on=(Repository.namespace_user == Namespace.id)) .switch(Repository) .join(Visibility) .where(Namespace.username == namespace_name, Repository.name == repository_name, Visibility.name == 'public') .get()) return True except Repository.DoesNotExist: return False def set_repository_visibility(repo, visibility): visibility_obj = Visibility.get(name=visibility) if not visibility_obj: return repo.visibility = visibility_obj repo.save() def get_email_authorized_for_repo(namespace, repository, email): try: return (RepositoryAuthorizedEmail .select(RepositoryAuthorizedEmail, Repository, Namespace) .join(Repository) .join(Namespace, on=(Repository.namespace_user == Namespace.id)) .where(Namespace.username == namespace, Repository.name == repository, RepositoryAuthorizedEmail.email == email) .get()) except RepositoryAuthorizedEmail.DoesNotExist: return None def create_email_authorization_for_repo(namespace_name, repository_name, email): try: repo = _basequery.get_existing_repository(namespace_name, repository_name) except Repository.DoesNotExist: raise DataModelException('Invalid repository %s/%s' % (namespace_name, repository_name)) return RepositoryAuthorizedEmail.create(repository=repo, email=email, confirmed=False) def confirm_email_authorization_for_repo(code): try: found = (RepositoryAuthorizedEmail .select(RepositoryAuthorizedEmail, Repository, Namespace) .join(Repository) .join(Namespace, on=(Repository.namespace_user == Namespace.id)) .where(RepositoryAuthorizedEmail.code == code) .get()) except RepositoryAuthorizedEmail.DoesNotExist: raise DataModelException('Invalid confirmation code.') found.confirmed = True found.save() return found