a0c4e72f13
We load stars with the same list API now so that we get the extra metadata needed in the repo list (popularity and last modified)
387 lines
13 KiB
Python
387 lines
13 KiB
Python
import logging
|
|
|
|
from peewee import JOIN_LEFT_OUTER, fn
|
|
from datetime import timedelta, datetime
|
|
|
|
from data.model import (DataModelException, tag, db_transaction, storage, image, permission,
|
|
_basequery)
|
|
from data.database import (Repository, Namespace, RepositoryTag, Star, Image, ImageStorage, User,
|
|
Visibility, RepositoryPermission, TupleSelector, RepositoryActionCount,
|
|
Role, RepositoryAuthorizedEmail, db_for_update)
|
|
|
|
|
|
logger = logging.getLogger(__name__)
|
|
|
|
|
|
def create_repository(namespace, name, creating_user, visibility='private'):
|
|
private = Visibility.get(name=visibility)
|
|
namespace_user = User.get(username=namespace)
|
|
repo = Repository.create(name=name, visibility=private, namespace_user=namespace_user)
|
|
admin = Role.get(name='admin')
|
|
|
|
if creating_user and not creating_user.organization:
|
|
RepositoryPermission.create(user=creating_user, repository=repo, role=admin)
|
|
|
|
if creating_user.username != namespace:
|
|
# Permission prototypes only work for orgs
|
|
permission.apply_default_permissions(repo, creating_user)
|
|
return repo
|
|
|
|
|
|
def get_repository(namespace_name, repository_name):
|
|
try:
|
|
return _basequery.get_existing_repository(namespace_name, repository_name)
|
|
except Repository.DoesNotExist:
|
|
return None
|
|
|
|
|
|
def _purge_all_repository_tags(namespace_name, repository_name):
|
|
""" Immediately purge all repository tags without respecting the lifeline procedure """
|
|
try:
|
|
repo = _basequery.get_existing_repository(namespace_name, repository_name)
|
|
except Repository.DoesNotExist:
|
|
raise DataModelException('Invalid repository \'%s/%s\'' %
|
|
(namespace_name, repository_name))
|
|
RepositoryTag.delete().where(RepositoryTag.repository == repo.id).execute()
|
|
|
|
|
|
def purge_repository(namespace_name, repository_name):
|
|
# Delete all tags to allow gc to reclaim storage
|
|
_purge_all_repository_tags(namespace_name, repository_name)
|
|
|
|
# Gc to remove the images and storage
|
|
garbage_collect_repository(namespace_name, repository_name)
|
|
|
|
# Delete the rest of the repository metadata
|
|
fetched = _basequery.get_existing_repository(namespace_name, repository_name)
|
|
fetched.delete_instance(recursive=True, delete_nullable=False)
|
|
|
|
|
|
def garbage_collect_repository(namespace_name, repository_name):
|
|
storage_id_whitelist = {}
|
|
|
|
tag.garbage_collect_tags(namespace_name, repository_name)
|
|
|
|
with db_transaction():
|
|
# TODO (jake): We could probably select this and all the images in a single query using
|
|
# a different kind of join.
|
|
|
|
# Get a list of all images used by tags in the repository
|
|
tag_query = (RepositoryTag
|
|
.select(RepositoryTag, Image, ImageStorage)
|
|
.join(Image)
|
|
.join(ImageStorage, JOIN_LEFT_OUTER)
|
|
.switch(RepositoryTag)
|
|
.join(Repository)
|
|
.join(Namespace, on=(Repository.namespace_user == Namespace.id))
|
|
.where(Repository.name == repository_name, Namespace.username == namespace_name))
|
|
|
|
referenced_ancestors = set()
|
|
for one_tag in tag_query:
|
|
# The ancestor list is in the format '/1/2/3/', extract just the ids
|
|
ancestor_id_strings = one_tag.image.ancestors.split('/')[1:-1]
|
|
ancestor_list = [int(img_id_str) for img_id_str in ancestor_id_strings]
|
|
referenced_ancestors = referenced_ancestors.union(set(ancestor_list))
|
|
referenced_ancestors.add(one_tag.image.id)
|
|
|
|
all_repo_images = image.get_repository_images(namespace_name, repository_name)
|
|
all_images = {int(img.id): img for img in all_repo_images}
|
|
to_remove = set(all_images.keys()).difference(referenced_ancestors)
|
|
|
|
if len(to_remove) > 0:
|
|
logger.info('Cleaning up unreferenced images: %s', to_remove)
|
|
storage_id_whitelist = {all_images[to_remove_id].storage.id for to_remove_id in to_remove}
|
|
Image.delete().where(Image.id << list(to_remove)).execute()
|
|
|
|
if len(to_remove) > 0:
|
|
logger.info('Garbage collecting storage for images: %s', to_remove)
|
|
storage.garbage_collect_storage(storage_id_whitelist)
|
|
|
|
|
|
def star_repository(user, repository):
|
|
""" Stars a repository. """
|
|
star = Star.create(user=user.id, repository=repository.id)
|
|
star.save()
|
|
|
|
|
|
def unstar_repository(user, repository):
|
|
""" Unstars a repository. """
|
|
try:
|
|
(Star
|
|
.delete()
|
|
.where(Star.repository == repository.id, Star.user == user.id)
|
|
.execute())
|
|
except Star.DoesNotExist:
|
|
raise DataModelException('Star not found.')
|
|
|
|
|
|
def get_user_starred_repositories(user, limit=None, page=None):
|
|
""" Retrieves all of the repositories a user has starred. """
|
|
query = (Repository
|
|
.select(Repository, User, Visibility)
|
|
.join(Star)
|
|
.switch(Repository)
|
|
.join(User)
|
|
.switch(Repository)
|
|
.join(Visibility)
|
|
.where(Star.user == user))
|
|
|
|
if page and limit:
|
|
query = query.paginate(page, limit)
|
|
elif limit:
|
|
query = query.limit(limit)
|
|
|
|
return query
|
|
|
|
|
|
def repository_is_starred(user, repository):
|
|
""" Determines whether a user has starred a repository or not. """
|
|
try:
|
|
(Star
|
|
.select()
|
|
.where(Star.repository == repository.id, Star.user == user.id)
|
|
.get())
|
|
return True
|
|
except Star.DoesNotExist:
|
|
return False
|
|
|
|
|
|
def get_when_last_modified(repository_ids):
|
|
if not repository_ids:
|
|
return {}
|
|
|
|
tuples = (RepositoryTag
|
|
.select(RepositoryTag.repository, fn.Max(RepositoryTag.lifetime_start_ts))
|
|
.where(RepositoryTag.repository << repository_ids)
|
|
.group_by(RepositoryTag.repository)
|
|
.tuples())
|
|
|
|
last_modified_map = {}
|
|
for record in tuples:
|
|
last_modified_map[record[0]] = record[1]
|
|
|
|
return last_modified_map
|
|
|
|
|
|
def get_action_counts(repository_ids):
|
|
if not repository_ids:
|
|
return {}
|
|
|
|
# Filter the join to recent entries only.
|
|
last_week = datetime.now() - timedelta(weeks=1)
|
|
tuples = (RepositoryActionCount
|
|
.select(RepositoryActionCount.repository, fn.Sum(RepositoryActionCount.count))
|
|
.where(RepositoryActionCount.repository << repository_ids)
|
|
.where(RepositoryActionCount.date >= last_week)
|
|
.group_by(RepositoryActionCount.repository)
|
|
.tuples())
|
|
|
|
action_count_map = {}
|
|
for record in tuples:
|
|
action_count_map[record[0]] = record[1]
|
|
|
|
return action_count_map
|
|
|
|
|
|
def get_visible_repositories(username, namespace=None, page=None, limit=None, include_public=False):
|
|
""" Returns the repositories visible to the given user (if any).
|
|
"""
|
|
if not include_public and not username:
|
|
return []
|
|
|
|
fields = [Repository.name, Repository.id, Repository.description, Visibility.name,
|
|
Namespace.username]
|
|
|
|
query = _visible_repository_query(username=username, page=page,
|
|
limit=limit, namespace=namespace, include_public=include_public,
|
|
select_models=fields)
|
|
|
|
if limit:
|
|
query = query.limit(limit)
|
|
|
|
if namespace:
|
|
query = query.where(Namespace.username == namespace)
|
|
|
|
return query
|
|
|
|
|
|
def _visible_repository_query(username=None, include_public=True, limit=None,
|
|
page=None, namespace=None, select_models=[]):
|
|
query = (Repository
|
|
.select(*select_models) # MySQL/RDS complains is there are selected models for counts.
|
|
.distinct()
|
|
.join(Visibility)
|
|
.switch(Repository)
|
|
.join(Namespace, on=(Repository.namespace_user == Namespace.id))
|
|
.switch(Repository)
|
|
.join(RepositoryPermission, JOIN_LEFT_OUTER))
|
|
|
|
query = _basequery.filter_to_repos_for_user(query, username, namespace, include_public)
|
|
if page:
|
|
query = query.paginate(page, limit)
|
|
elif limit:
|
|
query = query.limit(limit)
|
|
|
|
return query
|
|
|
|
|
|
def get_sorted_matching_repositories(prefix, only_public, checker, limit=10):
|
|
""" Returns repositories matching the given prefix string and passing the given checker
|
|
function.
|
|
"""
|
|
last_week = datetime.now() - timedelta(weeks=1)
|
|
results = []
|
|
existing_ids = []
|
|
|
|
def get_search_results(search_clause, with_count=False):
|
|
if len(results) >= limit:
|
|
return
|
|
|
|
select_items = [Repository, Namespace]
|
|
if with_count:
|
|
select_items.append(fn.Sum(RepositoryActionCount.count).alias('count'))
|
|
|
|
query = (Repository
|
|
.select(*select_items)
|
|
.join(Namespace, JOIN_LEFT_OUTER, on=(Namespace.id == Repository.namespace_user))
|
|
.switch(Repository)
|
|
.where(search_clause)
|
|
.group_by(Repository, Namespace))
|
|
|
|
if only_public:
|
|
query = query.where(Repository.visibility == _basequery.get_public_repo_visibility())
|
|
|
|
if existing_ids:
|
|
query = query.where(~(Repository.id << existing_ids))
|
|
|
|
if with_count:
|
|
query = (query
|
|
.switch(Repository)
|
|
.join(RepositoryActionCount)
|
|
.where(RepositoryActionCount.date >= last_week)
|
|
.order_by(fn.Sum(RepositoryActionCount.count).desc()))
|
|
|
|
for result in query:
|
|
if len(results) >= limit:
|
|
return results
|
|
|
|
# Note: We compare IDs here, instead of objects, because calling .visibility on the
|
|
# Repository will kick off a new SQL query to retrieve that visibility enum value. We don't
|
|
# join the visibility table in SQL, as well, because it is ungodly slow in MySQL :-/
|
|
result.is_public = result.visibility_id == _basequery.get_public_repo_visibility().id
|
|
result.count = result.count if with_count else 0
|
|
|
|
if not checker(result):
|
|
continue
|
|
|
|
results.append(result)
|
|
existing_ids.append(result.id)
|
|
|
|
# For performance reasons, we conduct the repo name and repo namespace searches on their
|
|
# own. This also affords us the ability to give higher precedence to repository names matching
|
|
# over namespaces, which is semantically correct.
|
|
get_search_results(Repository.name ** (prefix + '%'), with_count=True)
|
|
get_search_results(Repository.name ** (prefix + '%'), with_count=False)
|
|
|
|
get_search_results(Namespace.username ** (prefix + '%'), with_count=True)
|
|
get_search_results(Namespace.username ** (prefix + '%'), with_count=False)
|
|
|
|
return results
|
|
|
|
|
|
def get_matching_repositories(repo_term, username=None, limit=10, include_public=True):
|
|
namespace_term = repo_term
|
|
name_term = repo_term
|
|
|
|
visible = _visible_repository_query(username, include_public=include_public)
|
|
|
|
search_clauses = (Repository.name ** ('%' + name_term + '%') |
|
|
Namespace.username ** ('%' + namespace_term + '%'))
|
|
|
|
# Handle the case where the user has already entered a namespace path.
|
|
if repo_term.find('/') > 0:
|
|
parts = repo_term.split('/', 1)
|
|
namespace_term = '/'.join(parts[:-1])
|
|
name_term = parts[-1]
|
|
|
|
search_clauses = (Repository.name ** ('%' + name_term + '%') &
|
|
Namespace.username ** ('%' + namespace_term + '%'))
|
|
|
|
return visible.where(search_clauses).limit(limit)
|
|
|
|
|
|
def lookup_repository(repo_id):
|
|
try:
|
|
return Repository.get(Repository.id == repo_id)
|
|
except Repository.DoesNotExist:
|
|
return None
|
|
|
|
|
|
def is_repository_public(repository):
|
|
return repository.visibility == _basequery.get_public_repo_visibility()
|
|
|
|
|
|
def repository_is_public(namespace_name, repository_name):
|
|
try:
|
|
(Repository
|
|
.select()
|
|
.join(Namespace, on=(Repository.namespace_user == Namespace.id))
|
|
.switch(Repository)
|
|
.join(Visibility)
|
|
.where(Namespace.username == namespace_name, Repository.name == repository_name,
|
|
Visibility.name == 'public')
|
|
.get())
|
|
return True
|
|
except Repository.DoesNotExist:
|
|
return False
|
|
|
|
|
|
def set_repository_visibility(repo, visibility):
|
|
visibility_obj = Visibility.get(name=visibility)
|
|
if not visibility_obj:
|
|
return
|
|
|
|
repo.visibility = visibility_obj
|
|
repo.save()
|
|
|
|
|
|
def get_email_authorized_for_repo(namespace, repository, email):
|
|
try:
|
|
return (RepositoryAuthorizedEmail
|
|
.select(RepositoryAuthorizedEmail, Repository, Namespace)
|
|
.join(Repository)
|
|
.join(Namespace, on=(Repository.namespace_user == Namespace.id))
|
|
.where(Namespace.username == namespace, Repository.name == repository,
|
|
RepositoryAuthorizedEmail.email == email)
|
|
.get())
|
|
except RepositoryAuthorizedEmail.DoesNotExist:
|
|
return None
|
|
|
|
|
|
def create_email_authorization_for_repo(namespace_name, repository_name, email):
|
|
try:
|
|
repo = _basequery.get_existing_repository(namespace_name, repository_name)
|
|
except Repository.DoesNotExist:
|
|
raise DataModelException('Invalid repository %s/%s' %
|
|
(namespace_name, repository_name))
|
|
|
|
return RepositoryAuthorizedEmail.create(repository=repo, email=email, confirmed=False)
|
|
|
|
|
|
def confirm_email_authorization_for_repo(code):
|
|
try:
|
|
found = (RepositoryAuthorizedEmail
|
|
.select(RepositoryAuthorizedEmail, Repository, Namespace)
|
|
.join(Repository)
|
|
.join(Namespace, on=(Repository.namespace_user == Namespace.id))
|
|
.where(RepositoryAuthorizedEmail.code == code)
|
|
.get())
|
|
except RepositoryAuthorizedEmail.DoesNotExist:
|
|
raise DataModelException('Invalid confirmation code.')
|
|
|
|
found.confirmed = True
|
|
found.save()
|
|
|
|
return found
|
|
|
|
|