More changes for registry-v2 in python.
Implement the minimal changes to the local filesystem storage driver and feed them through the distributed storage driver. Create a digest package which contains digest_tools and checksums. Fix the tests to use the new v1 endpoint locations. Fix repository.delete_instance to properly filter the generated queries to avoid most subquery deletes, but still generate them when not explicitly filtered.
This commit is contained in:
parent
acbcc2e206
commit
bea8b9ac53
23 changed files with 397 additions and 179 deletions
|
@ -2,12 +2,14 @@ import string
|
|||
import logging
|
||||
import uuid
|
||||
import time
|
||||
import toposort
|
||||
|
||||
from random import SystemRandom
|
||||
from datetime import datetime
|
||||
from peewee import *
|
||||
from data.read_slave import ReadSlaveModel
|
||||
from sqlalchemy.engine.url import make_url
|
||||
from collections import defaultdict
|
||||
|
||||
from data.read_slave import ReadSlaveModel
|
||||
from util.names import urn_generator
|
||||
|
@ -297,23 +299,46 @@ class Repository(BaseModel):
|
|||
)
|
||||
|
||||
def delete_instance(self, recursive=False, delete_nullable=False):
|
||||
# Note: peewee generates extra nested deletion statements here that are slow and unnecessary.
|
||||
# Therefore, we define our own deletion order here and use the dependency system to verify it.
|
||||
ordered_dependencies = [RepositoryAuthorizedEmail, RepositoryTag, Image, LogEntry,
|
||||
RepositoryBuild, RepositoryBuildTrigger, RepositoryNotification,
|
||||
RepositoryPermission, AccessToken, Star, RepositoryActionCount]
|
||||
if not recursive:
|
||||
raise RuntimeError('Non-recursive delete on repository.')
|
||||
|
||||
for query, fk in self.dependencies(search_nullable=True):
|
||||
# These models don't need to use transitive deletes, because the referenced objects
|
||||
# are cleaned up directly
|
||||
skip_transitive_deletes = {RepositoryTag, RepositoryBuild, RepositoryBuildTrigger}
|
||||
|
||||
# We need to sort the ops so that models get cleaned in order of their dependencies
|
||||
ops = reversed(list(self.dependencies(delete_nullable)))
|
||||
filtered_ops = []
|
||||
|
||||
dependencies = defaultdict(set)
|
||||
|
||||
for query, fk in ops:
|
||||
if fk.model_class not in skip_transitive_deletes or query.op != 'in':
|
||||
filtered_ops.append((query, fk))
|
||||
|
||||
if query.op == 'in':
|
||||
dependencies[fk.model_class.__name__].add(query.rhs.model_class.__name__)
|
||||
elif query.op == '=':
|
||||
dependencies[fk.model_class.__name__].add(Repository.__name__)
|
||||
else:
|
||||
raise RuntimeError('Unknown operator in recursive repository delete query')
|
||||
|
||||
sorted_models = list(reversed(toposort.toposort_flatten(dependencies)))
|
||||
def sorted_model_key(query_fk_tuple):
|
||||
cmp_query, cmp_fk = query_fk_tuple
|
||||
if cmp_query.op == 'in':
|
||||
return -1
|
||||
return sorted_models.index(cmp_fk.model_class.__name__)
|
||||
filtered_ops.sort(key=sorted_model_key)
|
||||
|
||||
for query, fk in filtered_ops:
|
||||
model = fk.model_class
|
||||
if not model in ordered_dependencies:
|
||||
raise Exception('Missing repository deletion dependency: %s', model)
|
||||
|
||||
for model in ordered_dependencies:
|
||||
model.delete().where(model.repository == self).execute()
|
||||
|
||||
# Delete the repository itself.
|
||||
super(Repository, self).delete_instance(recursive=False, delete_nullable=False)
|
||||
if fk.null and not delete_nullable:
|
||||
model.update(**{fk.name: None}).where(query).execute()
|
||||
else:
|
||||
model.delete().where(query).execute()
|
||||
|
||||
return self.delete().where(self._pk_expr()).execute()
|
||||
|
||||
class Star(BaseModel):
|
||||
user = ForeignKeyField(User, index=True)
|
||||
|
@ -679,4 +704,4 @@ all_models = [User, Repository, Image, AccessToken, Role, RepositoryPermission,
|
|||
ExternalNotificationEvent, ExternalNotificationMethod, RepositoryNotification,
|
||||
RepositoryAuthorizedEmail, ImageStorageTransformation, DerivedImageStorage,
|
||||
TeamMemberInvite, ImageStorageSignature, ImageStorageSignatureKind,
|
||||
AccessTokenKind, Star, RepositoryActionCount]
|
||||
AccessTokenKind, Star, RepositoryActionCount, TagManifest]
|
||||
|
|
|
@ -1 +1,14 @@
|
|||
from data.model.legacy import *
|
||||
class DataModelException(Exception):
|
||||
pass
|
||||
|
||||
|
||||
class Config(object):
|
||||
def __init__(self):
|
||||
self.app_config = None
|
||||
self.store = None
|
||||
|
||||
|
||||
config = Config()
|
||||
|
||||
|
||||
from data.model.legacy import *
|
||||
|
|
22
data/model/blob.py
Normal file
22
data/model/blob.py
Normal file
|
@ -0,0 +1,22 @@
|
|||
from data.model import config, DataModelException
|
||||
|
||||
from data.database import ImageStorage, Image, ImageStorageLocation, ImageStoragePlacement
|
||||
|
||||
|
||||
class BlobDoesNotExist(DataModelException):
|
||||
pass
|
||||
|
||||
|
||||
def get_blob_by_digest(blob_digest):
|
||||
try:
|
||||
return ImageStorage.get(checksum=blob_digest)
|
||||
except ImageStorage.DoesNotExist:
|
||||
raise BlobDoesNotExist('Blob does not exist with digest: {0}'.format(blob_digest))
|
||||
|
||||
|
||||
def store_blob_record(blob_digest, location_name):
|
||||
storage = ImageStorage.create(checksum=blob_digest)
|
||||
location = ImageStorageLocation.get(name=location_name)
|
||||
ImageStoragePlacement.create(location=location, storage=storage)
|
||||
storage.locations = {location_name}
|
||||
return storage
|
|
@ -19,6 +19,7 @@ from data.database import (User, Repository, Image, AccessToken, Role, Repositor
|
|||
db, BUILD_PHASE, QuayUserField, ImageStorageSignature, QueueItem,
|
||||
ImageStorageSignatureKind, validate_database_url, db_for_update,
|
||||
AccessTokenKind, Star, get_epoch_timestamp, RepositoryActionCount)
|
||||
from data.model import config as model_config, DataModelException
|
||||
from peewee import JOIN_LEFT_OUTER, fn, SQL, IntegrityError
|
||||
from util.validation import (validate_username, validate_email, validate_password,
|
||||
INVALID_PASSWORD_MESSAGE)
|
||||
|
@ -36,18 +37,6 @@ Namespace = User.alias()
|
|||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
class Config(object):
|
||||
def __init__(self):
|
||||
self.app_config = None
|
||||
self.store = None
|
||||
|
||||
config = Config()
|
||||
|
||||
|
||||
class DataModelException(Exception):
|
||||
pass
|
||||
|
||||
|
||||
class InvalidEmailAddressException(DataModelException):
|
||||
pass
|
||||
|
||||
|
@ -1211,7 +1200,7 @@ def change_username(user_id, new_username):
|
|||
if not username_valid:
|
||||
raise InvalidUsernameException('Invalid username %s: %s' % (new_username, username_issue))
|
||||
|
||||
with config.app_config['DB_TRANSACTION_FACTORY'](db):
|
||||
with model_config.app_config['DB_TRANSACTION_FACTORY'](db):
|
||||
# Reload the user for update
|
||||
user = db_for_update(User.select().where(User.id == user_id)).get()
|
||||
|
||||
|
@ -1587,7 +1576,7 @@ def _create_storage(location_name):
|
|||
def _find_or_link_image(existing_image, repository, username, translations, preferred_location):
|
||||
# TODO(jake): This call is currently recursively done under a single transaction. Can we make
|
||||
# it instead be done under a set of transactions?
|
||||
with config.app_config['DB_TRANSACTION_FACTORY'](db):
|
||||
with model_config.app_config['DB_TRANSACTION_FACTORY'](db):
|
||||
# Check for an existing image, under the transaction, to make sure it doesn't already exist.
|
||||
repo_image = get_repo_image(repository.namespace_user.username, repository.name,
|
||||
existing_image.docker_image_id)
|
||||
|
@ -1659,7 +1648,7 @@ def find_create_or_link_image(docker_image_id, repository, username, translation
|
|||
pass
|
||||
|
||||
# Otherwise, create a new storage directly.
|
||||
with config.app_config['DB_TRANSACTION_FACTORY'](db):
|
||||
with model_config.app_config['DB_TRANSACTION_FACTORY'](db):
|
||||
# Final check for an existing image, under the transaction.
|
||||
repo_image = get_repo_image(repository.namespace_user.username, repository.name,
|
||||
docker_image_id)
|
||||
|
@ -1796,7 +1785,7 @@ def set_image_size(docker_image_id, namespace_name, repository_name, image_size,
|
|||
|
||||
def set_image_metadata(docker_image_id, namespace_name, repository_name, created_date_str, comment,
|
||||
command, parent=None):
|
||||
with config.app_config['DB_TRANSACTION_FACTORY'](db):
|
||||
with model_config.app_config['DB_TRANSACTION_FACTORY'](db):
|
||||
query = (Image
|
||||
.select(Image, ImageStorage)
|
||||
.join(Repository)
|
||||
|
@ -1980,7 +1969,7 @@ def garbage_collect_repository(namespace_name, repository_name):
|
|||
|
||||
_garbage_collect_tags(namespace_name, repository_name)
|
||||
|
||||
with config.app_config['DB_TRANSACTION_FACTORY'](db):
|
||||
with model_config.app_config['DB_TRANSACTION_FACTORY'](db):
|
||||
# TODO (jake): We could probably select this and all the images in a single query using
|
||||
# a different kind of join.
|
||||
|
||||
|
@ -2021,7 +2010,7 @@ def _garbage_collect_storage(storage_id_whitelist):
|
|||
return
|
||||
|
||||
def placements_query_to_paths_set(placements_query):
|
||||
return {(placement.location.name, config.store.image_path(placement.storage.uuid))
|
||||
return {(placement.location.name, model_config.store.image_path(placement.storage.uuid))
|
||||
for placement in placements_query}
|
||||
|
||||
def orphaned_storage_query(select_base_query, candidates, group_by):
|
||||
|
@ -2040,7 +2029,7 @@ def _garbage_collect_storage(storage_id_whitelist):
|
|||
# image storage being deleted for an image storage which is later reused during this time,
|
||||
# but since these are caches anyway, it isn't terrible and worth the tradeoff (for now).
|
||||
logger.debug('Garbage collecting derived storage from candidates: %s', storage_id_whitelist)
|
||||
with config.app_config['DB_TRANSACTION_FACTORY'](db):
|
||||
with model_config.app_config['DB_TRANSACTION_FACTORY'](db):
|
||||
# Find out which derived storages will be removed, and add them to the whitelist
|
||||
# The comma after ImageStorage.id is VERY important, it makes it a tuple, which is a sequence
|
||||
orphaned_from_candidates = list(orphaned_storage_query(ImageStorage.select(ImageStorage.id),
|
||||
|
@ -2066,7 +2055,7 @@ def _garbage_collect_storage(storage_id_whitelist):
|
|||
# TODO(jake): We might want to allow for null storages on placements, which would allow us to
|
||||
# delete the storages, then delete the placements in a non-transaction.
|
||||
logger.debug('Garbage collecting storages from candidates: %s', storage_id_whitelist)
|
||||
with config.app_config['DB_TRANSACTION_FACTORY'](db):
|
||||
with model_config.app_config['DB_TRANSACTION_FACTORY'](db):
|
||||
# Track all of the data that should be removed from blob storage
|
||||
placements_to_remove = list(orphaned_storage_query(ImageStoragePlacement
|
||||
.select(ImageStoragePlacement,
|
||||
|
@ -2107,7 +2096,7 @@ def _garbage_collect_storage(storage_id_whitelist):
|
|||
# This may end up producing garbage in s3, trading off for higher availability in the database.
|
||||
for location_name, image_path in paths_to_remove:
|
||||
logger.debug('Removing %s from %s', image_path, location_name)
|
||||
config.store.remove({location_name}, image_path)
|
||||
model_config.store.remove({location_name}, image_path)
|
||||
|
||||
|
||||
def get_tag_image(namespace_name, repository_name, tag_name):
|
||||
|
@ -2158,7 +2147,7 @@ def create_or_update_tag(namespace_name, repository_name, tag_name,
|
|||
|
||||
now_ts = get_epoch_timestamp()
|
||||
|
||||
with config.app_config['DB_TRANSACTION_FACTORY'](db):
|
||||
with model_config.app_config['DB_TRANSACTION_FACTORY'](db):
|
||||
try:
|
||||
tag = db_for_update(_tag_alive(RepositoryTag
|
||||
.select()
|
||||
|
@ -2179,7 +2168,7 @@ def create_or_update_tag(namespace_name, repository_name, tag_name,
|
|||
|
||||
def delete_tag(namespace_name, repository_name, tag_name):
|
||||
now_ts = get_epoch_timestamp()
|
||||
with config.app_config['DB_TRANSACTION_FACTORY'](db):
|
||||
with model_config.app_config['DB_TRANSACTION_FACTORY'](db):
|
||||
try:
|
||||
query = _tag_alive(RepositoryTag
|
||||
.select(RepositoryTag, Repository)
|
||||
|
@ -2332,7 +2321,7 @@ def purge_repository(namespace_name, repository_name):
|
|||
|
||||
# Delete the rest of the repository metadata
|
||||
fetched = _get_repository(namespace_name, repository_name)
|
||||
fetched.delete_instance(recursive=True, delete_nullable=True)
|
||||
fetched.delete_instance(recursive=True, delete_nullable=False)
|
||||
|
||||
|
||||
def get_private_repo_count(username):
|
||||
|
@ -2502,8 +2491,8 @@ def get_pull_credentials(robotname):
|
|||
return {
|
||||
'username': robot.username,
|
||||
'password': login_info.service_ident,
|
||||
'registry': '%s://%s/v1/' % (config.app_config['PREFERRED_URL_SCHEME'],
|
||||
config.app_config['SERVER_HOSTNAME']),
|
||||
'registry': '%s://%s/v1/' % (model_config.app_config['PREFERRED_URL_SCHEME'],
|
||||
model_config.app_config['SERVER_HOSTNAME']),
|
||||
}
|
||||
|
||||
|
||||
|
@ -2649,7 +2638,7 @@ def create_notification(kind_name, target, metadata={}):
|
|||
|
||||
|
||||
def create_unique_notification(kind_name, target, metadata={}):
|
||||
with config.app_config['DB_TRANSACTION_FACTORY'](db):
|
||||
with model_config.app_config['DB_TRANSACTION_FACTORY'](db):
|
||||
if list_notifications(target, kind_name, limit=1).count() == 0:
|
||||
create_notification(kind_name, target, metadata)
|
||||
|
||||
|
@ -2897,7 +2886,7 @@ def confirm_team_invite(code, user):
|
|||
return (team, inviter)
|
||||
|
||||
def cancel_repository_build(build, work_queue):
|
||||
with config.app_config['DB_TRANSACTION_FACTORY'](db):
|
||||
with model_config.app_config['DB_TRANSACTION_FACTORY'](db):
|
||||
# Reload the build for update.
|
||||
try:
|
||||
build = db_for_update(RepositoryBuild.select().where(RepositoryBuild.id == build.id)).get()
|
||||
|
|
Reference in a new issue