Switch V2 pagination back to using IDs, which should be much faster and easier on the DB

Also adds a test for the tags endpoint
This commit is contained in:
Joseph Schorr 2018-06-18 16:11:26 -04:00
parent b8b2c75822
commit 3161b60522
7 changed files with 105 additions and 31 deletions

View file

@ -40,10 +40,10 @@ def handle_registry_v2_exception(error):
return response
_MAX_RESULTS_PER_PAGE = app.config.get('V2_PAGINATION_SIZE', 50)
_MAX_RESULTS_PER_PAGE = app.config.get('V2_PAGINATION_SIZE', 100)
def paginate(limit_kwarg_name='limit', offset_kwarg_name='offset',
def paginate(start_id_kwarg_name='start_id', limit_kwarg_name='limit',
callback_kwarg_name='pagination_callback'):
"""
Decorates a handler adding a parsed pagination token and a callback to encode a response token.
@ -61,17 +61,16 @@ def paginate(limit_kwarg_name='limit', offset_kwarg_name='offset',
next_page_token = request.args.get('next_page', request.args.get('last', None))
# Decrypt the next page token, if any.
offset = 0
start_id = None
page_info = decrypt_page_token(next_page_token)
if page_info is not None:
# Note: we use offset here instead of ID >= n because one of the V2 queries is a UNION.
offset = page_info.get('offset', 0)
start_id = page_info.get('start_id', None)
def callback(num_results, response):
if num_results < limit:
def callback(results, response):
if len(results) <= limit:
return
next_page_token = encrypt_page_token({'offset': limit + offset})
next_page_token = encrypt_page_token({'start_id': max([obj.id for obj in results])})
link_url = os.path.join(get_app_url(), url_for(request.endpoint, **request.view_args))
link_param = urlencode({'n': limit, 'next_page': next_page_token})
@ -79,12 +78,10 @@ def paginate(limit_kwarg_name='limit', offset_kwarg_name='offset',
response.headers['Link'] = link
kwargs[limit_kwarg_name] = limit
kwargs[offset_kwarg_name] = offset
kwargs[start_id_kwarg_name] = start_id
kwargs[callback_kwarg_name] = callback
return func(*args, **kwargs)
return wrapped
return wrapper

View file

@ -13,7 +13,7 @@ from endpoints.v2.models_pre_oci import data_model as model
@process_registry_jwt_auth()
@anon_protect
@paginate()
def catalog_search(limit, offset, pagination_callback):
def catalog_search(start_id, limit, pagination_callback):
include_public = bool(features.PUBLIC_CATALOG)
if not include_public and not get_authenticated_user():
return jsonify({'repositories': []})
@ -22,11 +22,12 @@ def catalog_search(limit, offset, pagination_callback):
if username and not get_authenticated_user().enabled:
return jsonify({'repositories': []})
visible_repositories = model.get_visible_repositories(username, limit + 1, offset,
visible_repositories = model.get_visible_repositories(username, start_id, limit,
include_public=include_public)
response = jsonify({
'repositories': ['%s/%s' % (repo.namespace_name, repo.name)
for repo in visible_repositories][0:limit],})
for repo in visible_repositories][0:limit],
})
pagination_callback(len(visible_repositories), response)
pagination_callback(visible_repositories, response)
return response

View file

@ -26,7 +26,7 @@ class ManifestJSON(namedtuple('ManifestJSON', ['digest', 'json', 'media_type']))
"""
class Tag(namedtuple('Tag', ['name', 'repository'])):
class Tag(namedtuple('Tag', ['id', 'name', 'repository'])):
"""
Tag represents a user-facing alias for referencing a set of Manifests.
"""
@ -167,14 +167,14 @@ class DockerRegistryV2DataInterface(object):
pass
@abstractmethod
def repository_tags(self, namespace_name, repo_name, limit, offset):
def repository_tags(self, namespace_name, repo_name, start_id, limit):
"""
Returns the active tags under the repository with the given name and namespace.
"""
pass
@abstractmethod
def get_visible_repositories(self, username, limit, offset):
def get_visible_repositories(self, username, start_id, limit):
"""
Returns the repositories visible to the user with the given username, if any.
"""

View file

@ -60,7 +60,7 @@ class PreOCIModel(DockerRegistryV2DataInterface):
def delete_manifest_by_digest(self, namespace_name, repo_name, digest):
def _tag_view(tag):
return Tag(name=tag.name, repository=RepositoryReference(
return Tag(id=tag.id, name=tag.name, repository=RepositoryReference(
id=tag.repository_id,
name=repo_name,
namespace_name=namespace_name,))
@ -118,24 +118,32 @@ class PreOCIModel(DockerRegistryV2DataInterface):
repository.id, tag_name, leaf_layer_docker_id, manifest_digest, manifest_bytes)
return newly_created
def repository_tags(self, namespace_name, repo_name, limit, offset):
def repository_tags(self, namespace_name, repo_name, start_id, limit):
def _tag_view(tag):
return Tag(name=tag.name, repository=RepositoryReference(
return Tag(id=tag.id, name=tag.name, repository=RepositoryReference(
id=tag.repository_id,
name=repo_name,
namespace_name=namespace_name,))
tags_query = model.tag.list_repository_tags(namespace_name, repo_name)
tags_query = tags_query.limit(limit).offset(offset)
tags_query = (tags_query
.order_by(database.RepositoryTag.id)
.limit(limit + 1))
if start_id is not None:
tags_query = tags_query.where(database.RepositoryTag.id >= start_id)
return [_tag_view(tag) for tag in tags_query]
def get_visible_repositories(self, username, limit, offset, include_public=None):
def get_visible_repositories(self, username, start_id, limit, include_public=None):
if include_public is None:
include_public = (username is None)
query = model.repository.get_visible_repositories(username, kind_filter='image',
include_public=include_public)
query = query.limit(limit).offset(offset)
query = model.repository.get_visible_repositories(username,
kind_filter='image',
include_public=include_public,
start_id=start_id,
limit=limit + 1)
return [_repository_for_repo(repo) for repo in query]
def create_blob_upload(self, namespace_name, repo_name, upload_uuid, location_name,
@ -295,7 +303,7 @@ def _docker_v1_metadata(namespace_name, repo_name, repo_image):
def _repository_for_repo(repo):
""" Returns a Repository object representing the Pre-OCI data model repo instance given. """
return Repository(
id=repo.id,
id=repo.id or repo.rid,
name=repo.name,
namespace_name=repo.namespace_user.username,
description=repo.description,

View file

@ -12,11 +12,11 @@ from endpoints.v2.models_pre_oci import data_model as model
@require_repo_read
@anon_protect
@paginate()
def list_all_tags(namespace_name, repo_name, limit, offset, pagination_callback):
tags = model.repository_tags(namespace_name, repo_name, limit, offset)
def list_all_tags(namespace_name, repo_name, start_id, limit, pagination_callback):
tags = list(model.repository_tags(namespace_name, repo_name, start_id, limit))
response = jsonify({
'name': '{0}/{1}'.format(namespace_name, repo_name),
'tags': [tag.name for tag in tags],})
'tags': [tag.name for tag in tags][0:limit],})
pagination_callback(len(tags), response)
pagination_callback(tags, response)
return response

View file

@ -347,6 +347,48 @@ class V2Protocol(RegistryProtocol):
return PullResult(manifests=manifests, image_ids=image_ids)
def tags(self, session, namespace, repo_name, page_size=2, credentials=None, options=None,
expected_failure=None):
options = options or ProtocolOptions()
scopes = options.scopes or ['repository:%s:pull' % self.repo_name(namespace, repo_name)]
# Ping!
self.ping(session)
# Perform auth and retrieve a token.
headers = {}
if credentials is not None:
token, _ = self.auth(session, credentials, namespace, repo_name, scopes=scopes,
expected_failure=expected_failure)
if token is None:
return None
headers = {
'Authorization': 'Bearer ' + token,
}
results = []
url = '/v2/%s/tags/list' % (self.repo_name(namespace, repo_name))
params = {}
if page_size is not None:
params['n'] = page_size
while True:
response = self.conduct(session, 'GET', url, headers=headers, params=params)
data = response.json()
assert len(data['tags']) <= page_size
results.extend(data['tags'])
if not response.headers.get('Link'):
return results
link_url = response.headers['Link']
v2_index = link_url.find('/v2/')
url = link_url[v2_index:]
return results
def catalog(self, session, page_size=2, credentials=None, options=None, expected_failure=None,
namespace=None, repo_name=None):
options = options or ProtocolOptions()

View file

@ -16,6 +16,7 @@ from test.registry.protocol_fixtures import *
from test.registry.protocols import Failures, Image, layer_bytes_for_contents, ProtocolOptions
from app import instance_keys
from data.model.tag import list_repository_tags
from util.security.registry_jwt import decode_bearer_header
from util.timedeltastring import convert_to_timedelta
@ -718,6 +719,31 @@ def test_catalog(public_catalog, credentials, expected_repos, page_size, v2_prot
assert set(expected_repos).issubset(set(results))
@pytest.mark.parametrize('username, namespace, repository', [
('devtable', 'devtable', 'simple'),
('devtable', 'devtable', 'gargantuan'),
('public', 'public', 'publicrepo'),
('devtable', 'buynlarge', 'orgrepo'),
])
@pytest.mark.parametrize('page_size', [
1,
2,
10,
50,
100,
])
def test_tags(username, namespace, repository, page_size, v2_protocol, liveserver_session,
app_reloader, liveserver, registry_server_executor):
""" Test: Retrieving results from the V2 catalog. """
credentials = (username, 'password')
results = v2_protocol.tags(liveserver_session, page_size=page_size, credentials=credentials,
namespace=namespace, repo_name=repository)
expected_tags = [tag.name for tag in list_repository_tags(namespace, repository)]
assert len(results) == len(expected_tags)
assert set([r for r in results]) == set(expected_tags)
def test_pull_torrent(pusher, basic_images, liveserver_session, liveserver,
registry_server_executor, app_reloader):
""" Test: Retrieve a torrent for pulling the image via the Quay CLI. """