More changes for registry-v2 in python.
Implement the minimal changes to the local filesystem storage driver and feed them through the distributed storage driver. Create a digest package which contains digest_tools and checksums. Fix the tests to use the new v1 endpoint locations. Fix repository.delete_instance to properly filter the generated queries to avoid most subquery deletes, but still generate them when not explicitly filtered.
This commit is contained in:
		
							parent
							
								
									acbcc2e206
								
							
						
					
					
						commit
						bea8b9ac53
					
				
					 23 changed files with 397 additions and 179 deletions
				
			
		|  | @ -2,12 +2,14 @@ import string | |||
| import logging | ||||
| import uuid | ||||
| import time | ||||
| import toposort | ||||
| 
 | ||||
| from random import SystemRandom | ||||
| from datetime import datetime | ||||
| from peewee import * | ||||
| from data.read_slave import ReadSlaveModel | ||||
| from sqlalchemy.engine.url import make_url | ||||
| from collections import defaultdict | ||||
| 
 | ||||
| from data.read_slave import ReadSlaveModel | ||||
| from util.names import urn_generator | ||||
|  | @ -297,23 +299,46 @@ class Repository(BaseModel): | |||
|     ) | ||||
| 
 | ||||
|   def delete_instance(self, recursive=False, delete_nullable=False): | ||||
|     # Note: peewee generates extra nested deletion statements here that are slow and unnecessary. | ||||
|     # Therefore, we define our own deletion order here and use the dependency system to verify it. | ||||
|     ordered_dependencies = [RepositoryAuthorizedEmail, RepositoryTag, Image, LogEntry, | ||||
|                             RepositoryBuild, RepositoryBuildTrigger, RepositoryNotification, | ||||
|                             RepositoryPermission, AccessToken, Star, RepositoryActionCount] | ||||
|     if not recursive: | ||||
|       raise RuntimeError('Non-recursive delete on repository.') | ||||
| 
 | ||||
|     for query, fk in self.dependencies(search_nullable=True): | ||||
|     # These models don't need to use transitive deletes, because the referenced objects | ||||
|     # are cleaned up directly | ||||
|     skip_transitive_deletes = {RepositoryTag, RepositoryBuild, RepositoryBuildTrigger} | ||||
| 
 | ||||
|     # We need to sort the ops so that models get cleaned in order of their dependencies | ||||
|     ops = reversed(list(self.dependencies(delete_nullable))) | ||||
|     filtered_ops = [] | ||||
| 
 | ||||
|     dependencies = defaultdict(set) | ||||
| 
 | ||||
|     for query, fk in ops: | ||||
|       if fk.model_class not in skip_transitive_deletes or query.op != 'in': | ||||
|         filtered_ops.append((query, fk)) | ||||
| 
 | ||||
|       if query.op == 'in': | ||||
|         dependencies[fk.model_class.__name__].add(query.rhs.model_class.__name__) | ||||
|       elif query.op == '=': | ||||
|         dependencies[fk.model_class.__name__].add(Repository.__name__) | ||||
|       else: | ||||
|         raise RuntimeError('Unknown operator in recursive repository delete query') | ||||
| 
 | ||||
|     sorted_models = list(reversed(toposort.toposort_flatten(dependencies))) | ||||
|     def sorted_model_key(query_fk_tuple): | ||||
|       cmp_query, cmp_fk = query_fk_tuple | ||||
|       if cmp_query.op == 'in': | ||||
|         return -1 | ||||
|       return sorted_models.index(cmp_fk.model_class.__name__) | ||||
|     filtered_ops.sort(key=sorted_model_key) | ||||
| 
 | ||||
|     for query, fk in filtered_ops: | ||||
|       model = fk.model_class | ||||
|       if not model in ordered_dependencies: | ||||
|         raise Exception('Missing repository deletion dependency: %s', model) | ||||
| 
 | ||||
|     for model in ordered_dependencies: | ||||
|       model.delete().where(model.repository == self).execute() | ||||
| 
 | ||||
|     # Delete the repository itself. | ||||
|     super(Repository, self).delete_instance(recursive=False, delete_nullable=False) | ||||
|       if fk.null and not delete_nullable: | ||||
|         model.update(**{fk.name: None}).where(query).execute() | ||||
|       else: | ||||
|         model.delete().where(query).execute() | ||||
| 
 | ||||
|     return self.delete().where(self._pk_expr()).execute() | ||||
| 
 | ||||
| class Star(BaseModel): | ||||
|   user = ForeignKeyField(User, index=True) | ||||
|  | @ -679,4 +704,4 @@ all_models = [User, Repository, Image, AccessToken, Role, RepositoryPermission, | |||
|               ExternalNotificationEvent, ExternalNotificationMethod, RepositoryNotification, | ||||
|               RepositoryAuthorizedEmail, ImageStorageTransformation, DerivedImageStorage, | ||||
|               TeamMemberInvite, ImageStorageSignature, ImageStorageSignatureKind, | ||||
|               AccessTokenKind, Star, RepositoryActionCount] | ||||
|               AccessTokenKind, Star, RepositoryActionCount, TagManifest] | ||||
|  |  | |||
|  | @ -1 +1,14 @@ | |||
| from data.model.legacy import * | ||||
| class DataModelException(Exception): | ||||
|   pass | ||||
| 
 | ||||
| 
 | ||||
| class Config(object): | ||||
|   def __init__(self): | ||||
|     self.app_config = None | ||||
|     self.store = None | ||||
| 
 | ||||
| 
 | ||||
| config = Config() | ||||
| 
 | ||||
| 
 | ||||
| from data.model.legacy import * | ||||
|  |  | |||
							
								
								
									
										22
									
								
								data/model/blob.py
									
										
									
									
									
										Normal file
									
								
							
							
						
						
									
										22
									
								
								data/model/blob.py
									
										
									
									
									
										Normal file
									
								
							|  | @ -0,0 +1,22 @@ | |||
| from data.model import config, DataModelException | ||||
| 
 | ||||
| from data.database import ImageStorage, Image, ImageStorageLocation, ImageStoragePlacement | ||||
| 
 | ||||
| 
 | ||||
| class BlobDoesNotExist(DataModelException): | ||||
|   pass | ||||
| 
 | ||||
| 
 | ||||
| def get_blob_by_digest(blob_digest): | ||||
|   try: | ||||
|     return ImageStorage.get(checksum=blob_digest) | ||||
|   except ImageStorage.DoesNotExist: | ||||
|     raise BlobDoesNotExist('Blob does not exist with digest: {0}'.format(blob_digest)) | ||||
| 
 | ||||
| 
 | ||||
| def store_blob_record(blob_digest, location_name): | ||||
|   storage = ImageStorage.create(checksum=blob_digest) | ||||
|   location = ImageStorageLocation.get(name=location_name) | ||||
|   ImageStoragePlacement.create(location=location, storage=storage) | ||||
|   storage.locations = {location_name} | ||||
|   return storage | ||||
|  | @ -19,6 +19,7 @@ from data.database import (User, Repository, Image, AccessToken, Role, Repositor | |||
|                            db, BUILD_PHASE, QuayUserField, ImageStorageSignature, QueueItem, | ||||
|                            ImageStorageSignatureKind, validate_database_url, db_for_update, | ||||
|                            AccessTokenKind, Star, get_epoch_timestamp, RepositoryActionCount) | ||||
| from data.model import config as model_config, DataModelException | ||||
| from peewee import JOIN_LEFT_OUTER, fn, SQL, IntegrityError | ||||
| from util.validation import (validate_username, validate_email, validate_password, | ||||
|                              INVALID_PASSWORD_MESSAGE) | ||||
|  | @ -36,18 +37,6 @@ Namespace = User.alias() | |||
| logger = logging.getLogger(__name__) | ||||
| 
 | ||||
| 
 | ||||
| class Config(object): | ||||
|   def __init__(self): | ||||
|     self.app_config = None | ||||
|     self.store = None | ||||
| 
 | ||||
| config = Config() | ||||
| 
 | ||||
| 
 | ||||
| class DataModelException(Exception): | ||||
|   pass | ||||
| 
 | ||||
| 
 | ||||
| class InvalidEmailAddressException(DataModelException): | ||||
|   pass | ||||
| 
 | ||||
|  | @ -1211,7 +1200,7 @@ def change_username(user_id, new_username): | |||
|   if not username_valid: | ||||
|     raise InvalidUsernameException('Invalid username %s: %s' % (new_username, username_issue)) | ||||
| 
 | ||||
|   with config.app_config['DB_TRANSACTION_FACTORY'](db): | ||||
|   with model_config.app_config['DB_TRANSACTION_FACTORY'](db): | ||||
|     # Reload the user for update | ||||
|     user = db_for_update(User.select().where(User.id == user_id)).get() | ||||
| 
 | ||||
|  | @ -1587,7 +1576,7 @@ def _create_storage(location_name): | |||
| def _find_or_link_image(existing_image, repository, username, translations, preferred_location): | ||||
|   # TODO(jake): This call is currently recursively done under a single transaction. Can we make | ||||
|   # it instead be done under a set of transactions? | ||||
|   with config.app_config['DB_TRANSACTION_FACTORY'](db): | ||||
|   with model_config.app_config['DB_TRANSACTION_FACTORY'](db): | ||||
|     # Check for an existing image, under the transaction, to make sure it doesn't already exist. | ||||
|     repo_image = get_repo_image(repository.namespace_user.username, repository.name, | ||||
|                                 existing_image.docker_image_id) | ||||
|  | @ -1659,7 +1648,7 @@ def find_create_or_link_image(docker_image_id, repository, username, translation | |||
|     pass | ||||
| 
 | ||||
|   # Otherwise, create a new storage directly. | ||||
|   with config.app_config['DB_TRANSACTION_FACTORY'](db): | ||||
|   with model_config.app_config['DB_TRANSACTION_FACTORY'](db): | ||||
|     # Final check for an existing image, under the transaction. | ||||
|     repo_image = get_repo_image(repository.namespace_user.username, repository.name, | ||||
|                                 docker_image_id) | ||||
|  | @ -1796,7 +1785,7 @@ def set_image_size(docker_image_id, namespace_name, repository_name, image_size, | |||
| 
 | ||||
| def set_image_metadata(docker_image_id, namespace_name, repository_name, created_date_str, comment, | ||||
|                        command, parent=None): | ||||
|   with config.app_config['DB_TRANSACTION_FACTORY'](db): | ||||
|   with model_config.app_config['DB_TRANSACTION_FACTORY'](db): | ||||
|     query = (Image | ||||
|              .select(Image, ImageStorage) | ||||
|              .join(Repository) | ||||
|  | @ -1980,7 +1969,7 @@ def garbage_collect_repository(namespace_name, repository_name): | |||
| 
 | ||||
|   _garbage_collect_tags(namespace_name, repository_name) | ||||
| 
 | ||||
|   with config.app_config['DB_TRANSACTION_FACTORY'](db): | ||||
|   with model_config.app_config['DB_TRANSACTION_FACTORY'](db): | ||||
|     # TODO (jake): We could probably select this and all the images in a single query using | ||||
|     # a different kind of join. | ||||
| 
 | ||||
|  | @ -2021,7 +2010,7 @@ def _garbage_collect_storage(storage_id_whitelist): | |||
|     return | ||||
| 
 | ||||
|   def placements_query_to_paths_set(placements_query): | ||||
|     return {(placement.location.name, config.store.image_path(placement.storage.uuid)) | ||||
|     return {(placement.location.name, model_config.store.image_path(placement.storage.uuid)) | ||||
|             for placement in placements_query} | ||||
| 
 | ||||
|   def orphaned_storage_query(select_base_query, candidates, group_by): | ||||
|  | @ -2040,7 +2029,7 @@ def _garbage_collect_storage(storage_id_whitelist): | |||
|   # image storage being deleted for an image storage which is later reused during this time, | ||||
|   # but since these are caches anyway, it isn't terrible and worth the tradeoff (for now). | ||||
|   logger.debug('Garbage collecting derived storage from candidates: %s', storage_id_whitelist) | ||||
|   with config.app_config['DB_TRANSACTION_FACTORY'](db): | ||||
|   with model_config.app_config['DB_TRANSACTION_FACTORY'](db): | ||||
|     # Find out which derived storages will be removed, and add them to the whitelist | ||||
|     # The comma after ImageStorage.id is VERY important, it makes it a tuple, which is a sequence | ||||
|     orphaned_from_candidates = list(orphaned_storage_query(ImageStorage.select(ImageStorage.id), | ||||
|  | @ -2066,7 +2055,7 @@ def _garbage_collect_storage(storage_id_whitelist): | |||
|   # TODO(jake): We might want to allow for null storages on placements, which would allow us to | ||||
|   # delete the storages, then delete the placements in a non-transaction. | ||||
|   logger.debug('Garbage collecting storages from candidates: %s', storage_id_whitelist) | ||||
|   with config.app_config['DB_TRANSACTION_FACTORY'](db): | ||||
|   with model_config.app_config['DB_TRANSACTION_FACTORY'](db): | ||||
|     # Track all of the data that should be removed from blob storage | ||||
|     placements_to_remove = list(orphaned_storage_query(ImageStoragePlacement | ||||
|                                                        .select(ImageStoragePlacement, | ||||
|  | @ -2107,7 +2096,7 @@ def _garbage_collect_storage(storage_id_whitelist): | |||
|   # This may end up producing garbage in s3, trading off for higher availability in the database. | ||||
|   for location_name, image_path in paths_to_remove: | ||||
|     logger.debug('Removing %s from %s', image_path, location_name) | ||||
|     config.store.remove({location_name}, image_path) | ||||
|     model_config.store.remove({location_name}, image_path) | ||||
| 
 | ||||
| 
 | ||||
| def get_tag_image(namespace_name, repository_name, tag_name): | ||||
|  | @ -2158,7 +2147,7 @@ def create_or_update_tag(namespace_name, repository_name, tag_name, | |||
| 
 | ||||
|   now_ts = get_epoch_timestamp() | ||||
| 
 | ||||
|   with config.app_config['DB_TRANSACTION_FACTORY'](db): | ||||
|   with model_config.app_config['DB_TRANSACTION_FACTORY'](db): | ||||
|     try: | ||||
|       tag = db_for_update(_tag_alive(RepositoryTag | ||||
|                                      .select() | ||||
|  | @ -2179,7 +2168,7 @@ def create_or_update_tag(namespace_name, repository_name, tag_name, | |||
| 
 | ||||
| def delete_tag(namespace_name, repository_name, tag_name): | ||||
|   now_ts = get_epoch_timestamp() | ||||
|   with config.app_config['DB_TRANSACTION_FACTORY'](db): | ||||
|   with model_config.app_config['DB_TRANSACTION_FACTORY'](db): | ||||
|     try: | ||||
|       query = _tag_alive(RepositoryTag | ||||
|                          .select(RepositoryTag, Repository) | ||||
|  | @ -2332,7 +2321,7 @@ def purge_repository(namespace_name, repository_name): | |||
| 
 | ||||
|   # Delete the rest of the repository metadata | ||||
|   fetched = _get_repository(namespace_name, repository_name) | ||||
|   fetched.delete_instance(recursive=True, delete_nullable=True) | ||||
|   fetched.delete_instance(recursive=True, delete_nullable=False) | ||||
| 
 | ||||
| 
 | ||||
| def get_private_repo_count(username): | ||||
|  | @ -2502,8 +2491,8 @@ def get_pull_credentials(robotname): | |||
|   return { | ||||
|     'username': robot.username, | ||||
|     'password': login_info.service_ident, | ||||
|     'registry': '%s://%s/v1/' % (config.app_config['PREFERRED_URL_SCHEME'], | ||||
|                                  config.app_config['SERVER_HOSTNAME']), | ||||
|     'registry': '%s://%s/v1/' % (model_config.app_config['PREFERRED_URL_SCHEME'], | ||||
|                                  model_config.app_config['SERVER_HOSTNAME']), | ||||
|   } | ||||
| 
 | ||||
| 
 | ||||
|  | @ -2649,7 +2638,7 @@ def create_notification(kind_name, target, metadata={}): | |||
| 
 | ||||
| 
 | ||||
| def create_unique_notification(kind_name, target, metadata={}): | ||||
|   with config.app_config['DB_TRANSACTION_FACTORY'](db): | ||||
|   with model_config.app_config['DB_TRANSACTION_FACTORY'](db): | ||||
|     if list_notifications(target, kind_name, limit=1).count() == 0: | ||||
|       create_notification(kind_name, target, metadata) | ||||
| 
 | ||||
|  | @ -2897,7 +2886,7 @@ def confirm_team_invite(code, user): | |||
|   return (team, inviter) | ||||
| 
 | ||||
| def cancel_repository_build(build, work_queue): | ||||
|   with config.app_config['DB_TRANSACTION_FACTORY'](db): | ||||
|   with model_config.app_config['DB_TRANSACTION_FACTORY'](db): | ||||
|     # Reload the build for update. | ||||
|     try: | ||||
|       build = db_for_update(RepositoryBuild.select().where(RepositoryBuild.id == build.id)).get() | ||||
|  |  | |||
		Reference in a new issue