Add a batch get_matching_tags_for_images
method
This will be used in the security notification worker to retrieving the tags needed in a set of batch calls, rather than multiple calls per image
This commit is contained in:
parent
e583be3914
commit
74dd0ef8e8
4 changed files with 162 additions and 36 deletions
|
@ -53,6 +53,73 @@ def _tag_alive(query, now_ts=None):
|
|||
(RepositoryTag.lifetime_end_ts > now_ts))
|
||||
|
||||
|
||||
_MAX_SUB_QUERIES = 100
|
||||
|
||||
def get_matching_tags_for_images(image_pairs, filter_query=None, selections=None):
|
||||
""" Returns all tags that contain the images with the given docker_image_id and storage_uuid,
|
||||
as specified as an iterable of pairs. """
|
||||
if not image_pairs:
|
||||
return []
|
||||
|
||||
image_pairs = set(image_pairs)
|
||||
|
||||
# Find all possible matching image+storages.
|
||||
ids = [image_pair[0] for image_pair in image_pairs]
|
||||
uuids = [image_pair[1] for image_pair in image_pairs]
|
||||
images_query = (Image
|
||||
.select(Image.id, Image.docker_image_id, Image.ancestors, ImageStorage.uuid)
|
||||
.join(ImageStorage)
|
||||
.where(Image.docker_image_id << ids, ImageStorage.uuid << uuids))
|
||||
|
||||
# Filter down to those images actually in the pairs set and build the set of queries to run.
|
||||
individual_image_queries = []
|
||||
|
||||
for img in images_query:
|
||||
# Make sure the actual image was requested.
|
||||
pair = (img.docker_image_id, img.storage.uuid)
|
||||
if pair not in image_pairs:
|
||||
continue
|
||||
|
||||
# Remove the pair so we don't try it again.
|
||||
image_pairs.remove(pair)
|
||||
|
||||
ancestors_str = '%s%s/%%' % (img.ancestors, img.id)
|
||||
query = (Image
|
||||
.select(Image.id)
|
||||
.where((Image.id == img.id) | (Image.ancestors ** ancestors_str)))
|
||||
|
||||
individual_image_queries.append(query)
|
||||
|
||||
if not individual_image_queries:
|
||||
return []
|
||||
|
||||
# Shard based on the max subquery count. This is used to prevent going over the DB's max query
|
||||
# size, as well as to prevent the DB from locking up on a massive query.
|
||||
sharded_queries = []
|
||||
while individual_image_queries:
|
||||
shard = individual_image_queries[0:_MAX_SUB_QUERIES]
|
||||
sharded_queries.append(_basequery.reduce_as_tree(shard))
|
||||
individual_image_queries = individual_image_queries[_MAX_SUB_QUERIES:]
|
||||
|
||||
# Collect IDs of the tags found for each query.
|
||||
tags = {}
|
||||
for query in sharded_queries:
|
||||
tag_query = (_tag_alive(RepositoryTag
|
||||
.select(*(selections or []))
|
||||
.distinct()
|
||||
.join(Image)
|
||||
.where(RepositoryTag.hidden == False)
|
||||
.where(Image.id << query)))
|
||||
|
||||
if filter_query is not None:
|
||||
tag_query = filter_query(tag_query)
|
||||
|
||||
for tag in tag_query:
|
||||
tags[tag.id] = tag
|
||||
|
||||
return tags.values()
|
||||
|
||||
|
||||
def get_matching_tags(docker_image_id, storage_uuid, *args):
|
||||
""" Returns a query pointing to all tags that contain the image with the
|
||||
given docker_image_id and storage_uuid. """
|
||||
|
|
Reference in a new issue