diff --git a/data/model/tag.py b/data/model/tag.py index 4e17f7cdb..79f132109 100644 --- a/data/model/tag.py +++ b/data/model/tag.py @@ -54,6 +54,7 @@ def _tag_alive(query, now_ts=None): _MAX_SUB_QUERIES = 100 +_MAX_IMAGE_LOOKUP_COUNT = 500 def get_matching_tags_for_images(image_pairs, filter_query=None, selections=None): """ Returns all tags that contain the images with the given docker_image_id and storage_uuid, @@ -61,27 +62,36 @@ def get_matching_tags_for_images(image_pairs, filter_query=None, selections=None if not image_pairs: return [] - image_pairs = set(image_pairs) + image_pairs_set = set(image_pairs) # Find all possible matching image+storages. - ids = [image_pair[0] for image_pair in image_pairs] - uuids = [image_pair[1] for image_pair in image_pairs] - images_query = (Image - .select(Image.id, Image.docker_image_id, Image.ancestors, ImageStorage.uuid) - .join(ImageStorage) - .where(Image.docker_image_id << ids, ImageStorage.uuid << uuids)) + images = [] + + while image_pairs: + image_pairs_slice = image_pairs[0:_MAX_IMAGE_LOOKUP_COUNT] + + ids = [pair[0] for pair in image_pairs_slice] + uuids = [pair[1] for pair in image_pairs_slice] + + images_query = (Image + .select(Image.id, Image.docker_image_id, Image.ancestors, ImageStorage.uuid) + .join(ImageStorage) + .where(Image.docker_image_id << ids, ImageStorage.uuid << uuids)) + + images.extend(list(images_query)) + image_pairs = image_pairs[_MAX_IMAGE_LOOKUP_COUNT:] # Filter down to those images actually in the pairs set and build the set of queries to run. individual_image_queries = [] - for img in images_query: + for img in images: # Make sure the actual image was requested. pair = (img.docker_image_id, img.storage.uuid) - if pair not in image_pairs: + if pair not in image_pairs_set: continue # Remove the pair so we don't try it again. - image_pairs.remove(pair) + image_pairs_set.remove(pair) ancestors_str = '%s%s/%%' % (img.ancestors, img.id) query = (Image diff --git a/data/model/test/test_tag.py b/data/model/test/test_tag.py index 407e5813f..fa993eedc 100644 --- a/data/model/test/test_tag.py +++ b/data/model/test/test_tag.py @@ -1,3 +1,7 @@ +import pytest + +from mock import patch + from data.database import Image, RepositoryTag, ImageStorage, Repository from data.model.repository import create_repository from data.model.tag import (list_active_repo_tags, create_or_update_tag, delete_tag, @@ -14,42 +18,62 @@ def _get_expected_tags(image): .where((Image.id == image.id) | (Image.ancestors ** ('%%/%s/%%' % image.id)))) return set([tag.id for tag in _tag_alive(expected_query)]) - -def test_get_matching_tags(initialized_db): - # Test for every image in the test database. - for image in Image.select(Image, ImageStorage).join(ImageStorage): - matching_query = get_matching_tags(image.docker_image_id, image.storage.uuid) - matching_tags = set([tag.id for tag in matching_query]) - expected_tags = _get_expected_tags(image) - assert matching_tags == expected_tags, "mismatch for image %s" % image.id +@pytest.mark.parametrize('max_subqueries,max_image_lookup_count', [ + (1, 1), + (10, 10), + (100, 500), +]) +def test_get_matching_tags(max_subqueries, max_image_lookup_count, initialized_db): + with patch('data.model.tag._MAX_SUB_QUERIES', max_subqueries): + with patch('data.model.tag._MAX_IMAGE_LOOKUP_COUNT', max_image_lookup_count): + # Test for every image in the test database. + for image in Image.select(Image, ImageStorage).join(ImageStorage): + matching_query = get_matching_tags(image.docker_image_id, image.storage.uuid) + matching_tags = set([tag.id for tag in matching_query]) + expected_tags = _get_expected_tags(image) + assert matching_tags == expected_tags, "mismatch for image %s" % image.id -def test_get_matching_tag_ids_for_images(initialized_db): - # Try for various sets of the first N images. - for count in [5, 10, 15]: - pairs = [] - expected_tags_ids = set() - for image in Image.select(Image, ImageStorage).join(ImageStorage): - if len(pairs) >= count: - break +@pytest.mark.parametrize('max_subqueries,max_image_lookup_count', [ + (1, 1), + (10, 10), + (100, 500), +]) +def test_get_matching_tag_ids_for_images(max_subqueries, max_image_lookup_count, initialized_db): + with patch('data.model.tag._MAX_SUB_QUERIES', max_subqueries): + with patch('data.model.tag._MAX_IMAGE_LOOKUP_COUNT', max_image_lookup_count): + # Try for various sets of the first N images. + for count in [5, 10, 15]: + pairs = [] + expected_tags_ids = set() + for image in Image.select(Image, ImageStorage).join(ImageStorage): + if len(pairs) >= count: + break - pairs.append((image.docker_image_id, image.storage.uuid)) - expected_tags_ids.update(_get_expected_tags(image)) + pairs.append((image.docker_image_id, image.storage.uuid)) + expected_tags_ids.update(_get_expected_tags(image)) - matching_tags_ids = set([tag.id for tag in get_matching_tags_for_images(pairs)]) - assert matching_tags_ids == expected_tags_ids + matching_tags_ids = set([tag.id for tag in get_matching_tags_for_images(pairs)]) + assert matching_tags_ids == expected_tags_ids -def test_get_matching_tag_ids_for_all_images(initialized_db): - pairs = [] - for image in Image.select(Image, ImageStorage).join(ImageStorage): - pairs.append((image.docker_image_id, image.storage.uuid)) +@pytest.mark.parametrize('max_subqueries,max_image_lookup_count', [ + (1, 1), + (10, 10), + (100, 500), +]) +def test_get_matching_tag_ids_for_all_images(max_subqueries, max_image_lookup_count, initialized_db): + with patch('data.model.tag._MAX_SUB_QUERIES', max_subqueries): + with patch('data.model.tag._MAX_IMAGE_LOOKUP_COUNT', max_image_lookup_count): + pairs = [] + for image in Image.select(Image, ImageStorage).join(ImageStorage): + pairs.append((image.docker_image_id, image.storage.uuid)) - expected_tags_ids = set([tag.id for tag in _tag_alive(RepositoryTag.select())]) - matching_tags_ids = set([tag.id for tag in get_matching_tags_for_images(pairs)]) + expected_tags_ids = set([tag.id for tag in _tag_alive(RepositoryTag.select())]) + matching_tags_ids = set([tag.id for tag in get_matching_tags_for_images(pairs)]) - # Ensure every alive tag was found. - assert matching_tags_ids == expected_tags_ids + # Ensure every alive tag was found. + assert matching_tags_ids == expected_tags_ids def test_get_matching_tag_ids_images_filtered(initialized_db):