This repository has been archived on 2020-03-24. You can view files and clone it, but cannot push or open issues or pull requests.
quay/endpoints/api/repository.py
Joseph Schorr 7a548ea101 Fix queries for repository list popularity and action count
Before this change, we used extremely inefficient outer joins as part of a single query of lookup, which was spiking our CPU usage to nearly 100% on the query. We now issue two separate queries for popularity and action account, by doing a lookup of the previously found IDs. Interestingly enough, because of the way the queries are now written, MySQL can actually do both queries *directly from the indicies*, which means they each occur in approx 20ms!

Verified by local tests, postgres tests, and testing on staging with monitoring of our CPU usage during lookup
2015-07-17 00:08:27 +03:00

325 lines
11 KiB
Python

""" List, create and manage repositories. """
import logging
import json
import datetime
from datetime import timedelta
from flask import request
from data import model
from data.model import Namespace
from data.database import (Repository as RepositoryTable, Visibility, RepositoryTag,
RepositoryActionCount, fn)
from endpoints.api import (truthy_bool, format_date, nickname, log_action, validate_json_request,
require_repo_read, require_repo_write, require_repo_admin,
RepositoryParamResource, resource, query_param, parse_args, ApiResource,
request_error, require_scope, Unauthorized, NotFound, InvalidRequest,
path_param)
from auth.permissions import (ModifyRepositoryPermission, AdministerRepositoryPermission,
CreateRepositoryPermission, ReadRepositoryPermission)
from auth.auth_context import get_authenticated_user
from auth import scopes
logger = logging.getLogger(__name__)
@resource('/v1/repository')
class RepositoryList(ApiResource):
"""Operations for creating and listing repositories."""
schemas = {
'NewRepo': {
'id': 'NewRepo',
'type': 'object',
'description': 'Description of a new repository',
'required': [
'repository',
'visibility',
'description',
],
'properties': {
'repository': {
'type': 'string',
'description': 'Repository name',
},
'visibility': {
'type': 'string',
'description': 'Visibility which the repository will start with',
'enum': [
'public',
'private',
],
},
'namespace': {
'type': 'string',
'description': ('Namespace in which the repository should be created. If omitted, the '
'username of the caller is used'),
},
'description': {
'type': 'string',
'description': 'Markdown encoded description for the repository',
},
},
},
}
@require_scope(scopes.CREATE_REPO)
@nickname('createRepo')
@validate_json_request('NewRepo')
def post(self):
"""Create a new repository."""
owner = get_authenticated_user()
req = request.get_json()
if owner is None and 'namespace' not in 'req':
raise InvalidRequest('Must provide a namespace or must be logged in.')
namespace_name = req['namespace'] if 'namespace' in req else owner.username
permission = CreateRepositoryPermission(namespace_name)
if permission.can():
repository_name = req['repository']
visibility = req['visibility']
existing = model.get_repository(namespace_name, repository_name)
if existing:
raise request_error(message='Repository already exists')
visibility = req['visibility']
repo = model.create_repository(namespace_name, repository_name, owner, visibility)
repo.description = req['description']
repo.save()
log_action('create_repo', namespace_name, {'repo': repository_name,
'namespace': namespace_name}, repo=repo)
return {
'namespace': namespace_name,
'name': repository_name
}, 201
raise Unauthorized()
@require_scope(scopes.READ_REPO)
@nickname('listRepos')
@parse_args
@query_param('page', 'Offset page number. (int)', type=int)
@query_param('limit', 'Limit on the number of results (int)', type=int)
@query_param('namespace', 'Namespace to use when querying for org repositories.', type=str)
@query_param('public', 'Whether to include repositories not explicitly visible by the user.',
type=truthy_bool, default=True)
@query_param('private', 'Whether to include private repositories.', type=truthy_bool,
default=True)
@query_param('namespace_only', 'Whether to limit only to the given namespace.',
type=truthy_bool, default=False)
@query_param('last_modified', 'Whether to include when the repository was last modified.',
type=truthy_bool, default=False)
@query_param('popularity', 'Whether to include the repository\'s popularity metric.',
type=truthy_bool, default=False)
def get(self, args):
"""Fetch the list of repositories under a variety of situations."""
username = None
if get_authenticated_user():
starred_repos = model.get_user_starred_repositories(get_authenticated_user())
star_lookup = set([repo.id for repo in starred_repos])
if args['private']:
username = get_authenticated_user().username
response = {}
# Find the matching repositories.
repo_query = model.get_visible_repositories(username,
limit=args['limit'],
page=args['page'],
include_public=args['public'],
namespace=args['namespace'],
namespace_only=args['namespace_only'])
# Collect the IDs of the repositories found for subequent lookup of popularity
# and/or last modified.
repository_ids = [repo.get(RepositoryTable.id) for repo in repo_query]
if args['last_modified']:
last_modified_map = model.get_when_last_modified(repository_ids)
if args['popularity']:
action_count_map = model.get_action_counts(repository_ids)
def repo_view(repo_obj):
repo = {
'namespace': repo_obj.get(Namespace.username),
'name': repo_obj.get(RepositoryTable.name),
'description': repo_obj.get(RepositoryTable.description),
'is_public': repo_obj.get(Visibility.name) == 'public'
}
repo_id = repo_obj.get(RepositoryTable.id)
if args['last_modified']:
repo['last_modified'] = last_modified_map.get(repo_id)
if args['popularity']:
repo['popularity'] = action_count_map.get(repo_id, 0)
if get_authenticated_user():
repo['is_starred'] = repo_id in star_lookup
return repo
response['repositories'] = [repo_view(repo) for repo in repo_query]
return response
@resource('/v1/repository/<repopath:repository>')
@path_param('repository', 'The full path of the repository. e.g. namespace/name')
class Repository(RepositoryParamResource):
"""Operations for managing a specific repository."""
schemas = {
'RepoUpdate': {
'id': 'RepoUpdate',
'type': 'object',
'description': 'Fields which can be updated in a repository.',
'required': [
'description',
],
'properties': {
'description': {
'type': 'string',
'description': 'Markdown encoded description for the repository',
},
}
}
}
@require_repo_read
@nickname('getRepo')
def get(self, namespace, repository):
"""Fetch the specified repository."""
logger.debug('Get repo: %s/%s' % (namespace, repository))
def tag_view(tag):
tag_info = {
'name': tag.name,
'image_id': tag.image.docker_image_id,
'size': tag.image.storage.aggregate_size
}
if tag.lifetime_start_ts > 0:
tag_info['last_modified'] = format_date(datetime.datetime.fromtimestamp(tag.lifetime_start_ts))
return tag_info
repo = model.get_repository(namespace, repository)
if repo:
tags = model.list_repository_tags(namespace, repository, include_storage=True)
tag_dict = {tag.name: tag_view(tag) for tag in tags}
can_write = ModifyRepositoryPermission(namespace, repository).can()
can_admin = AdministerRepositoryPermission(namespace, repository).can()
is_starred = (model.repository_is_starred(get_authenticated_user(), repo)
if get_authenticated_user() else False)
is_public = model.is_repository_public(repo)
(pull_today, pull_thirty_day) = model.get_repository_pulls(repo, timedelta(days=1),
timedelta(days=30))
(push_today, push_thirty_day) = model.get_repository_pushes(repo, timedelta(days=1),
timedelta(days=30))
return {
'namespace': namespace,
'name': repository,
'description': repo.description,
'tags': tag_dict,
'can_write': can_write,
'can_admin': can_admin,
'is_public': is_public,
'is_organization': repo.namespace_user.organization,
'is_starred': is_starred,
'status_token': repo.badge_token if not is_public else '',
'stats': {
'pulls': {
'today': pull_today,
'thirty_day': pull_thirty_day
},
'pushes': {
'today': push_today,
'thirty_day': push_thirty_day
}
}
}
raise NotFound()
@require_repo_write
@nickname('updateRepo')
@validate_json_request('RepoUpdate')
def put(self, namespace, repository):
""" Update the description in the specified repository. """
repo = model.get_repository(namespace, repository)
if repo:
values = request.get_json()
repo.description = values['description']
repo.save()
log_action('set_repo_description', namespace,
{'repo': repository, 'description': values['description']},
repo=repo)
return {
'success': True
}
raise NotFound()
@require_repo_admin
@nickname('deleteRepository')
def delete(self, namespace, repository):
""" Delete a repository. """
model.purge_repository(namespace, repository)
log_action('delete_repo', namespace,
{'repo': repository, 'namespace': namespace})
return 'Deleted', 204
@resource('/v1/repository/<repopath:repository>/changevisibility')
@path_param('repository', 'The full path of the repository. e.g. namespace/name')
class RepositoryVisibility(RepositoryParamResource):
""" Custom verb for changing the visibility of the repository. """
schemas = {
'ChangeVisibility': {
'id': 'ChangeVisibility',
'type': 'object',
'description': 'Change the visibility for the repository.',
'required': [
'visibility',
],
'properties': {
'visibility': {
'type': 'string',
'description': 'Visibility which the repository will start with',
'enum': [
'public',
'private',
],
},
}
}
}
@require_repo_admin
@nickname('changeRepoVisibility')
@validate_json_request('ChangeVisibility')
def post(self, namespace, repository):
""" Change the visibility of a repository. """
repo = model.get_repository(namespace, repository)
if repo:
values = request.get_json()
model.set_repository_visibility(repo, values['visibility'])
log_action('change_repo_visibility', namespace,
{'repo': repository, 'visibility': values['visibility']},
repo=repo)
return {'success': True}