Hash v1 uploads for torrent chunks

This commit is contained in:
Jake Moshenko 2016-01-05 12:14:52 -05:00
parent 44fcc7e44b
commit 8f80d7064b
6 changed files with 98 additions and 69 deletions

View file

@ -1,8 +1,9 @@
from peewee import JOIN_LEFT_OUTER, Clause, SQL
from peewee import Clause, SQL, fn
from cachetools import lru_cache
from data.model import DataModelException
from data.database import (Repository, User, Team, TeamMember, RepositoryPermission, TeamRole,
Namespace, Visibility, db_for_update)
Namespace, Visibility, ImageStorage, Image, db_for_update)
def prefix_search(field, prefix_query):
@ -97,3 +98,30 @@ def get_user_organizations(username):
.join(TeamMember)
.join(UserAlias, on=(UserAlias.id == TeamMember.user))
.where(User.organization == True, UserAlias.username == username))
def calculate_image_aggregate_size(ancestors_str, image_size, parent_image):
ancestors = ancestors_str.split('/')[1:-1]
if not ancestors:
return image_size
if parent_image is None:
raise DataModelException('Could not load parent image')
ancestor_size = parent_image.aggregate_size
if ancestor_size is not None:
return ancestor_size + image_size
# Fallback to a slower path if the parent doesn't have an aggregate size saved.
# TODO: remove this code if/when we do a full backfill.
ancestor_size = (ImageStorage
.select(fn.Sum(ImageStorage.image_size))
.join(Image)
.where(Image.id << ancestors)
.scalar())
if ancestor_size is None:
return None
return ancestor_size + image_size

View file

@ -2,7 +2,7 @@ import logging
import dateutil.parser
import random
from peewee import JOIN_LEFT_OUTER, fn, SQL
from peewee import JOIN_LEFT_OUTER, SQL
from datetime import datetime
from data.model import (DataModelException, db_transaction, _basequery, storage,
@ -296,6 +296,8 @@ def find_create_or_link_image(docker_image_id, repo_obj, username, translations,
def set_image_metadata(docker_image_id, namespace_name, repository_name, created_date_str, comment,
command, v1_json_metadata, parent=None):
""" Sets metadata that is specific to how a binary piece of storage fits into the layer tree.
"""
with db_transaction():
query = (Image
.select(Image, ImageStorage)
@ -322,6 +324,7 @@ def set_image_metadata(docker_image_id, namespace_name, repository_name, created
# We cleanup any old checksum in case it's a retry after a fail
fetched.v1_checksum = None
fetched.storage.content_checksum = None
fetched.storage.save()
fetched.comment = comment
fetched.command = command
@ -335,59 +338,6 @@ def set_image_metadata(docker_image_id, namespace_name, repository_name, created
return fetched
def set_image_size(docker_image_id, namespace_name, repository_name, image_size, uncompressed_size):
if image_size is None:
raise DataModelException('Empty image size field')
try:
image = (Image
.select(Image, ImageStorage)
.join(Repository)
.join(Namespace, on=(Repository.namespace_user == Namespace.id))
.switch(Image)
.join(ImageStorage, JOIN_LEFT_OUTER)
.where(Repository.name == repository_name, Namespace.username == namespace_name,
Image.docker_image_id == docker_image_id)
.get())
except Image.DoesNotExist:
raise DataModelException('No image with specified id and repository')
image.storage.image_size = image_size
image.storage.uncompressed_size = uncompressed_size
image.storage.save()
image.aggregate_size = calculate_image_aggregate_size(image.ancestors, image.storage,
image.parent)
image.save()
return image
def calculate_image_aggregate_size(ancestors_str, image_storage, parent_image):
ancestors = ancestors_str.split('/')[1:-1]
if not ancestors:
return image_storage.image_size
if parent_image is None:
raise DataModelException('Could not load parent image')
ancestor_size = parent_image.aggregate_size
if ancestor_size is not None:
return ancestor_size + image_storage.image_size
# Fallback to a slower path if the parent doesn't have an aggregate size saved.
# TODO: remove this code if/when we do a full backfill.
ancestor_size = (ImageStorage
.select(fn.Sum(ImageStorage.image_size))
.join(Image)
.where(Image.id << ancestors)
.scalar())
if ancestor_size is None:
return None
return ancestor_size + image_storage.image_size
def get_image(repo, docker_image_id):
try:
return Image.get(Image.docker_image_id == docker_image_id, Image.repository == repo)
@ -452,7 +402,8 @@ def synthesize_v1_image(repo, image_storage, docker_image_id, created_date_str,
pass
# Get the aggregate size for the image.
aggregate_size = calculate_image_aggregate_size(ancestors, image_storage, parent_image)
aggregate_size = _basequery.calculate_image_aggregate_size(ancestors, image_storage.image_size,
parent_image)
return Image.create(docker_image_id=docker_image_id, ancestors=ancestors, comment=comment,
command=command, v1_json_metadata=v1_json_metadata, created=created,

View file

@ -2,10 +2,12 @@ import logging
from peewee import JOIN_LEFT_OUTER, fn, SQL
from data.model import config, db_transaction, InvalidImageException, TorrentInfoDoesNotExist
from data.database import (ImageStorage, Image, DerivedStorageForImage, ImageStoragePlacement,
ImageStorageLocation, ImageStorageTransformation, ImageStorageSignature,
ImageStorageSignatureKind, Repository, Namespace, TorrentInfo)
from data.model import (config, db_transaction, InvalidImageException, TorrentInfoDoesNotExist,
DataModelException, _basequery)
from data.database import (ImageStorage, Image, ImageStoragePlacement, ImageStorageLocation,
ImageStorageTransformation, ImageStorageSignature,
ImageStorageSignatureKind, Repository, Namespace, TorrentInfo,
db_for_update)
logger = logging.getLogger(__name__)
@ -203,6 +205,40 @@ def _reduce_as_tree(queries_to_reduce):
return to_reduce_left.union_all(to_reduce_right)
def set_image_storage_metadata(docker_image_id, namespace_name, repository_name, image_size,
uncompressed_size):
""" Sets metadata that is specific to the binary storage of the data, irrespective of how it
is used in the layer tree.
"""
if image_size is None:
raise DataModelException('Empty image size field')
query = (Image
.select(Image, ImageStorage)
.join(Repository)
.join(Namespace, on=(Repository.namespace_user == Namespace.id))
.switch(Image)
.join(ImageStorage, JOIN_LEFT_OUTER)
.where(Repository.name == repository_name, Namespace.username == namespace_name,
Image.docker_image_id == docker_image_id))
try:
image = db_for_update(query).get()
except ImageStorage.DoesNotExist:
raise InvalidImageException('No image with specified id and repository')
# We MUST do this here, it can't be done in the corresponding image call because the storage
# has not yet been pushed
image.aggregate_size = _basequery.calculate_image_aggregate_size(image.ancestors, image_size,
image.parent)
image.save()
image.storage.image_size = image_size
image.storage.uncompressed_size = uncompressed_size
image.storage.save()
return image.storage
def get_storage_locations(uuid):
query = (ImageStoragePlacement
.select()