initial import for Open Source 🎉

This commit is contained in:
Jimmy Zelinskie 2019-11-12 11:09:47 -05:00
parent 1898c361f3
commit 9c0dd3b722
2048 changed files with 218743 additions and 0 deletions

0
data/__init__.py Normal file
View file

View file

@ -0,0 +1,9 @@
from data.appr_model import (
blob,
channel,
manifest,
manifest_list,
package,
release,
tag,
)

76
data/appr_model/blob.py Normal file
View file

@ -0,0 +1,76 @@
import logging
from peewee import IntegrityError
from data.model import db_transaction
logger = logging.getLogger(__name__)
def _ensure_sha256_header(digest):
if digest.startswith('sha256:'):
return digest
return 'sha256:' + digest
def get_blob(digest, models_ref):
""" Find a blob by its digest. """
Blob = models_ref.Blob
return Blob.select().where(Blob.digest == _ensure_sha256_header(digest)).get()
def get_or_create_blob(digest, size, media_type_name, locations, models_ref):
""" Try to find a blob by its digest or create it. """
Blob = models_ref.Blob
BlobPlacement = models_ref.BlobPlacement
# Get or create the blog entry for the digest.
try:
blob = get_blob(digest, models_ref)
logger.debug('Retrieved blob with digest %s', digest)
except Blob.DoesNotExist:
blob = Blob.create(digest=_ensure_sha256_header(digest),
media_type_id=Blob.media_type.get_id(media_type_name),
size=size)
logger.debug('Created blob with digest %s', digest)
# Add the locations to the blob.
for location_name in locations:
location_id = BlobPlacement.location.get_id(location_name)
try:
BlobPlacement.create(blob=blob, location=location_id)
except IntegrityError:
logger.debug('Location %s already existing for blob %s', location_name, blob.id)
return blob
def get_blob_locations(digest, models_ref):
""" Find all locations names for a blob. """
Blob = models_ref.Blob
BlobPlacement = models_ref.BlobPlacement
BlobPlacementLocation = models_ref.BlobPlacementLocation
return [x.name for x in
BlobPlacementLocation
.select()
.join(BlobPlacement)
.join(Blob)
.where(Blob.digest == _ensure_sha256_header(digest))]
def ensure_blob_locations(models_ref, *names):
BlobPlacementLocation = models_ref.BlobPlacementLocation
with db_transaction():
locations = BlobPlacementLocation.select().where(BlobPlacementLocation.name << names)
insert_names = list(names)
for location in locations:
insert_names.remove(location.name)
if not insert_names:
return
data = [{'name': name} for name in insert_names]
BlobPlacementLocation.insert_many(data).execute()

View file

@ -0,0 +1,64 @@
from data.appr_model import tag as tag_model
def get_channel_releases(repo, channel, models_ref):
""" Return all previously linked tags.
This works based upon Tag lifetimes.
"""
Channel = models_ref.Channel
Tag = models_ref.Tag
tag_kind_id = Channel.tag_kind.get_id('channel')
channel_name = channel.name
return (Tag
.select(Tag, Channel)
.join(Channel, on=(Tag.id == Channel.linked_tag))
.where(Channel.repository == repo,
Channel.name == channel_name,
Channel.tag_kind == tag_kind_id, Channel.lifetime_end != None)
.order_by(Tag.lifetime_end))
def get_channel(repo, channel_name, models_ref):
""" Find a Channel by name. """
channel = tag_model.get_tag(repo, channel_name, models_ref, "channel")
return channel
def get_tag_channels(repo, tag_name, models_ref, active=True):
""" Find the Channels associated with a Tag. """
Tag = models_ref.Tag
tag = tag_model.get_tag(repo, tag_name, models_ref, "release")
query = tag.tag_parents
if active:
query = tag_model.tag_is_alive(query, Tag)
return query
def delete_channel(repo, channel_name, models_ref):
""" Delete a channel by name. """
return tag_model.delete_tag(repo, channel_name, models_ref, "channel")
def create_or_update_channel(repo, channel_name, tag_name, models_ref):
""" Creates or updates a channel to include a particular tag. """
tag = tag_model.get_tag(repo, tag_name, models_ref, 'release')
return tag_model.create_or_update_tag(repo, channel_name, models_ref, linked_tag=tag,
tag_kind="channel")
def get_repo_channels(repo, models_ref):
""" Creates or updates a channel to include a particular tag. """
Channel = models_ref.Channel
Tag = models_ref.Tag
tag_kind_id = Channel.tag_kind.get_id('channel')
query = (Channel
.select(Channel, Tag)
.join(Tag, on=(Tag.id == Channel.linked_tag))
.where(Channel.repository == repo,
Channel.tag_kind == tag_kind_id))
return tag_model.tag_is_alive(query, Channel)

View file

@ -0,0 +1,67 @@
import logging
import hashlib
import json
from cnr.models.package_base import get_media_type
from data.database import db_transaction, MediaType
from data.appr_model import tag as tag_model
logger = logging.getLogger(__name__)
def _ensure_sha256_header(digest):
if digest.startswith('sha256:'):
return digest
return 'sha256:' + digest
def _digest(manifestjson):
return _ensure_sha256_header(hashlib.sha256(json.dumps(manifestjson, sort_keys=True)).hexdigest())
def get_manifest_query(digest, media_type, models_ref):
Manifest = models_ref.Manifest
return Manifest.select().where(Manifest.digest == _ensure_sha256_header(digest),
Manifest.media_type == Manifest.media_type.get_id(media_type))
def get_manifest_with_blob(digest, media_type, models_ref):
Blob = models_ref.Blob
query = get_manifest_query(digest, media_type, models_ref)
return query.join(Blob).get()
def get_or_create_manifest(manifest_json, media_type_name, models_ref):
Manifest = models_ref.Manifest
digest = _digest(manifest_json)
try:
manifest = get_manifest_query(digest, media_type_name, models_ref).get()
except Manifest.DoesNotExist:
with db_transaction():
manifest = Manifest.create(digest=digest,
manifest_json=manifest_json,
media_type=Manifest.media_type.get_id(media_type_name))
return manifest
def get_manifest_types(repo, models_ref, release=None):
""" Returns an array of MediaTypes.name for a repo, can filter by tag """
Tag = models_ref.Tag
ManifestListManifest = models_ref.ManifestListManifest
query = tag_model.tag_is_alive(Tag
.select(MediaType.name)
.join(ManifestListManifest,
on=(ManifestListManifest.manifest_list == Tag.manifest_list))
.join(MediaType,
on=(ManifestListManifest.media_type == MediaType.id))
.where(Tag.repository == repo,
Tag.tag_kind == Tag.tag_kind.get_id('release')), Tag)
if release:
query = query.where(Tag.name == release)
manifests = set()
for m in query.distinct().tuples():
manifests.add(get_media_type(m[0]))
return manifests

View file

@ -0,0 +1,67 @@
import logging
import hashlib
import json
from data.database import db_transaction
logger = logging.getLogger(__name__)
def _ensure_sha256_header(digest):
if digest.startswith('sha256:'):
return digest
return 'sha256:' + digest
def _digest(manifestjson):
return _ensure_sha256_header(hashlib.sha256(json.dumps(manifestjson, sort_keys=True)).hexdigest())
def get_manifest_list(digest, models_ref):
ManifestList = models_ref.ManifestList
return ManifestList.select().where(ManifestList.digest == _ensure_sha256_header(digest)).get()
def get_or_create_manifest_list(manifest_list_json, media_type_name, schema_version, models_ref):
ManifestList = models_ref.ManifestList
digest = _digest(manifest_list_json)
media_type_id = ManifestList.media_type.get_id(media_type_name)
try:
return get_manifest_list(digest, models_ref)
except ManifestList.DoesNotExist:
with db_transaction():
manifestlist = ManifestList.create(digest=digest, manifest_list_json=manifest_list_json,
schema_version=schema_version, media_type=media_type_id)
return manifestlist
def create_manifestlistmanifest(manifestlist, manifest_ids, manifest_list_json, models_ref):
""" From a manifestlist, manifests, and the manifest list blob,
create if doesn't exist the manfiestlistmanifest for each manifest """
for pos in xrange(len(manifest_ids)):
manifest_id = manifest_ids[pos]
manifest_json = manifest_list_json[pos]
get_or_create_manifestlistmanifest(manifest=manifest_id,
manifestlist=manifestlist,
media_type_name=manifest_json['mediaType'],
models_ref=models_ref)
def get_or_create_manifestlistmanifest(manifest, manifestlist, media_type_name, models_ref):
ManifestListManifest = models_ref.ManifestListManifest
media_type_id = ManifestListManifest.media_type.get_id(media_type_name)
try:
ml = (ManifestListManifest
.select()
.where(ManifestListManifest.manifest == manifest,
ManifestListManifest.media_type == media_type_id,
ManifestListManifest.manifest_list == manifestlist)).get()
except ManifestListManifest.DoesNotExist:
ml = ManifestListManifest.create(manifest_list=manifestlist, media_type=media_type_id,
manifest=manifest)
return ml

15
data/appr_model/models.py Normal file
View file

@ -0,0 +1,15 @@
from collections import namedtuple
from data.database import (ApprTag, ApprTagKind, ApprBlobPlacementLocation, ApprManifestList,
ApprManifestBlob, ApprBlob, ApprManifestListManifest, ApprManifest,
ApprBlobPlacement, ApprChannel)
ModelsRef = namedtuple('ModelsRef', ['Tag', 'TagKind', 'BlobPlacementLocation', 'ManifestList',
'ManifestBlob', 'Blob', 'ManifestListManifest', 'Manifest',
'BlobPlacement', 'Channel', 'manifestlistmanifest_set_name',
'tag_set_prefetch_name'])
NEW_MODELS = ModelsRef(ApprTag, ApprTagKind, ApprBlobPlacementLocation, ApprManifestList,
ApprManifestBlob, ApprBlob, ApprManifestListManifest, ApprManifest,
ApprBlobPlacement, ApprChannel, 'apprmanifestlistmanifest_set',
'apprtag_set')

View file

@ -0,0 +1,67 @@
from cnr.models.package_base import get_media_type, manifest_media_type
from peewee import prefetch
from data import model
from data.database import Repository, Namespace
from data.appr_model import tag as tag_model
def list_packages_query(models_ref, namespace=None, media_type=None, search_query=None,
username=None):
""" List and filter repository by search query. """
Tag = models_ref.Tag
if username and not search_query:
repositories = model.repository.get_visible_repositories(username,
kind_filter='application',
include_public=True,
namespace=namespace,
limit=50)
if not repositories:
return []
repo_query = (Repository
.select(Repository, Namespace.username)
.join(Namespace, on=(Repository.namespace_user == Namespace.id))
.where(Repository.id << [repo.rid for repo in repositories]))
if namespace:
repo_query = (repo_query
.where(Namespace.username == namespace))
else:
if search_query is not None:
fields = [model.repository.SEARCH_FIELDS.name.name]
repositories = model.repository.get_app_search(search_query,
username=username,
search_fields=fields,
limit=50)
if not repositories:
return []
repo_query = (Repository
.select(Repository, Namespace.username)
.join(Namespace, on=(Repository.namespace_user == Namespace.id))
.where(Repository.id << [repo.id for repo in repositories]))
else:
repo_query = (Repository
.select(Repository, Namespace.username)
.join(Namespace, on=(Repository.namespace_user == Namespace.id))
.where(Repository.visibility == model.repository.get_public_repo_visibility(),
Repository.kind == Repository.kind.get_id('application')))
if namespace:
repo_query = (repo_query
.where(Namespace.username == namespace))
tag_query = (Tag
.select()
.where(Tag.tag_kind == Tag.tag_kind.get_id('release'))
.order_by(Tag.lifetime_start))
if media_type:
tag_query = tag_model.filter_tags_by_media_type(tag_query, media_type, models_ref)
tag_query = tag_model.tag_is_alive(tag_query, Tag)
query = prefetch(repo_query, tag_query)
return query

152
data/appr_model/release.py Normal file
View file

@ -0,0 +1,152 @@
import bisect
from cnr.exception import PackageAlreadyExists
from cnr.models.package_base import manifest_media_type
from data.database import db_transaction, get_epoch_timestamp
from data.appr_model import (blob as blob_model, manifest as manifest_model,
manifest_list as manifest_list_model,
tag as tag_model)
LIST_MEDIA_TYPE = 'application/vnd.cnr.manifest.list.v0.json'
SCHEMA_VERSION = 'v0'
def _ensure_sha256_header(digest):
if digest.startswith('sha256:'):
return digest
return 'sha256:' + digest
def get_app_release(repo, tag_name, media_type, models_ref):
""" Returns (tag, manifest, blob) given a repo object, tag_name, and media_type). """
ManifestListManifest = models_ref.ManifestListManifest
Manifest = models_ref.Manifest
Blob = models_ref.Blob
ManifestBlob = models_ref.ManifestBlob
manifestlistmanifest_set_name = models_ref.manifestlistmanifest_set_name
tag = tag_model.get_tag(repo, tag_name, models_ref, tag_kind='release')
media_type_id = ManifestListManifest.media_type.get_id(manifest_media_type(media_type))
manifestlistmanifest = (getattr(tag.manifest_list, manifestlistmanifest_set_name)
.join(Manifest)
.where(ManifestListManifest.media_type == media_type_id).get())
manifest = manifestlistmanifest.manifest
blob = Blob.select().join(ManifestBlob).where(ManifestBlob.manifest == manifest).get()
return (tag, manifest, blob)
def delete_app_release(repo, tag_name, media_type, models_ref):
""" Terminate a Tag/media-type couple
It find the corresponding tag/manifest and remove from the manifestlistmanifest the manifest
1. it terminates the current tag (in all-cases)
2. if the new manifestlist is not empty, it creates a new tag for it
"""
ManifestListManifest = models_ref.ManifestListManifest
manifestlistmanifest_set_name = models_ref.manifestlistmanifest_set_name
media_type_id = ManifestListManifest.media_type.get_id(manifest_media_type(media_type))
with db_transaction():
tag = tag_model.get_tag(repo, tag_name, models_ref)
manifest_list = tag.manifest_list
list_json = manifest_list.manifest_list_json
mlm_query = (ManifestListManifest
.select()
.where(ManifestListManifest.manifest_list == tag.manifest_list))
list_manifest_ids = sorted([mlm.manifest_id for mlm in mlm_query])
manifestlistmanifest = (getattr(tag.manifest_list, manifestlistmanifest_set_name)
.where(ManifestListManifest.media_type == media_type_id).get())
index = list_manifest_ids.index(manifestlistmanifest.manifest_id)
list_manifest_ids.pop(index)
list_json.pop(index)
if not list_json:
tag.lifetime_end = get_epoch_timestamp()
tag.save()
else:
manifestlist = manifest_list_model.get_or_create_manifest_list(list_json, LIST_MEDIA_TYPE,
SCHEMA_VERSION, models_ref)
manifest_list_model.create_manifestlistmanifest(manifestlist, list_manifest_ids,
list_json, models_ref)
tag = tag_model.create_or_update_tag(repo, tag_name, models_ref, manifest_list=manifestlist,
tag_kind="release")
return tag
def create_app_release(repo, tag_name, manifest_data, digest, models_ref, force=False):
""" Create a new application release, it includes creating a new Tag, ManifestList,
ManifestListManifests, Manifest, ManifestBlob.
To deduplicate the ManifestList, the manifestlist_json is kept ordered by the manifest.id.
To find the insert point in the ManifestList it uses bisect on the manifest-ids list.
"""
ManifestList = models_ref.ManifestList
ManifestListManifest = models_ref.ManifestListManifest
Blob = models_ref.Blob
ManifestBlob = models_ref.ManifestBlob
with db_transaction():
# Create/get the package manifest
manifest = manifest_model.get_or_create_manifest(manifest_data, manifest_data['mediaType'],
models_ref)
# get the tag
tag = tag_model.get_or_initialize_tag(repo, tag_name, models_ref)
if tag.manifest_list is None:
tag.manifest_list = ManifestList(media_type=ManifestList.media_type.get_id(LIST_MEDIA_TYPE),
schema_version=SCHEMA_VERSION,
manifest_list_json=[], )
elif tag_model.tag_media_type_exists(tag, manifest.media_type, models_ref):
if force:
delete_app_release(repo, tag_name, manifest.media_type.name, models_ref)
return create_app_release(repo, tag_name, manifest_data, digest, models_ref, force=False)
else:
raise PackageAlreadyExists("package exists already")
list_json = tag.manifest_list.manifest_list_json
mlm_query = (ManifestListManifest
.select()
.where(ManifestListManifest.manifest_list == tag.manifest_list))
list_manifest_ids = sorted([mlm.manifest_id for mlm in mlm_query])
insert_point = bisect.bisect_left(list_manifest_ids, manifest.id)
list_json.insert(insert_point, manifest.manifest_json)
list_manifest_ids.insert(insert_point, manifest.id)
manifestlist = manifest_list_model.get_or_create_manifest_list(list_json, LIST_MEDIA_TYPE,
SCHEMA_VERSION, models_ref)
manifest_list_model.create_manifestlistmanifest(manifestlist, list_manifest_ids, list_json,
models_ref)
tag = tag_model.create_or_update_tag(repo, tag_name, models_ref, manifest_list=manifestlist,
tag_kind="release")
blob_digest = digest
try:
(ManifestBlob
.select()
.join(Blob)
.where(ManifestBlob.manifest == manifest,
Blob.digest == _ensure_sha256_header(blob_digest)).get())
except ManifestBlob.DoesNotExist:
blob = blob_model.get_blob(blob_digest, models_ref)
ManifestBlob.create(manifest=manifest, blob=blob)
return tag
def get_release_objs(repo, models_ref, media_type=None):
""" Returns an array of Tag for a repo, with optional filtering by media_type. """
Tag = models_ref.Tag
release_query = (Tag
.select()
.where(Tag.repository == repo,
Tag.tag_kind == Tag.tag_kind.get_id("release")))
if media_type:
release_query = tag_model.filter_tags_by_media_type(release_query, media_type, models_ref)
return tag_model.tag_is_alive(release_query, Tag)
def get_releases(repo, model_refs, media_type=None):
""" Returns an array of Tag.name for a repo, can filter by media_type. """
return [t.name for t in get_release_objs(repo, model_refs, media_type)]

99
data/appr_model/tag.py Normal file
View file

@ -0,0 +1,99 @@
import logging
from cnr.models.package_base import manifest_media_type
from peewee import IntegrityError
from data.model import (db_transaction, TagAlreadyCreatedException)
from data.database import get_epoch_timestamp_ms, db_for_update
logger = logging.getLogger(__name__)
def tag_is_alive(query, cls, now_ts=None):
return query.where((cls.lifetime_end >> None) |
(cls.lifetime_end > now_ts))
def tag_media_type_exists(tag, media_type, models_ref):
ManifestListManifest = models_ref.ManifestListManifest
manifestlistmanifest_set_name = models_ref.manifestlistmanifest_set_name
return (getattr(tag.manifest_list, manifestlistmanifest_set_name)
.where(ManifestListManifest.media_type == media_type).count() > 0)
def create_or_update_tag(repo, tag_name, models_ref, manifest_list=None, linked_tag=None,
tag_kind="release"):
Tag = models_ref.Tag
now_ts = get_epoch_timestamp_ms()
tag_kind_id = Tag.tag_kind.get_id(tag_kind)
with db_transaction():
try:
tag = db_for_update(tag_is_alive(Tag
.select()
.where(Tag.repository == repo,
Tag.name == tag_name,
Tag.tag_kind == tag_kind_id), Tag, now_ts)).get()
if tag.manifest_list == manifest_list and tag.linked_tag == linked_tag:
return tag
tag.lifetime_end = now_ts
tag.save()
except Tag.DoesNotExist:
pass
try:
return Tag.create(repository=repo, manifest_list=manifest_list, linked_tag=linked_tag,
name=tag_name, lifetime_start=now_ts, lifetime_end=None,
tag_kind=tag_kind_id)
except IntegrityError:
msg = 'Tag with name %s and lifetime start %s under repository %s/%s already exists'
raise TagAlreadyCreatedException(msg % (tag_name, now_ts, repo.namespace_user, repo.name))
def get_or_initialize_tag(repo, tag_name, models_ref, tag_kind="release"):
Tag = models_ref.Tag
try:
return tag_is_alive(Tag.select().where(Tag.repository == repo, Tag.name == tag_name), Tag).get()
except Tag.DoesNotExist:
return Tag(repo=repo, name=tag_name, tag_kind=Tag.tag_kind.get_id(tag_kind))
def get_tag(repo, tag_name, models_ref, tag_kind="release"):
Tag = models_ref.Tag
return tag_is_alive(Tag.select()
.where(Tag.repository == repo,
Tag.name == tag_name,
Tag.tag_kind == Tag.tag_kind.get_id(tag_kind)), Tag).get()
def delete_tag(repo, tag_name, models_ref, tag_kind="release"):
Tag = models_ref.Tag
tag_kind_id = Tag.tag_kind.get_id(tag_kind)
tag = tag_is_alive(Tag.select()
.where(Tag.repository == repo,
Tag.name == tag_name, Tag.tag_kind == tag_kind_id), Tag).get()
tag.lifetime_end = get_epoch_timestamp_ms()
tag.save()
return tag
def tag_exists(repo, tag_name, models_ref, tag_kind="release"):
Tag = models_ref.Tag
try:
get_tag(repo, tag_name, models_ref, tag_kind)
return True
except Tag.DoesNotExist:
return False
def filter_tags_by_media_type(tag_query, media_type, models_ref):
""" Return only available tag for a media_type. """
ManifestListManifest = models_ref.ManifestListManifest
Tag = models_ref.Tag
media_type = manifest_media_type(media_type)
t = (tag_query
.join(ManifestListManifest, on=(ManifestListManifest.manifest_list == Tag.manifest_list))
.where(ManifestListManifest.media_type == ManifestListManifest.media_type.get_id(media_type)))
return t

37
data/archivedlogs.py Normal file
View file

@ -0,0 +1,37 @@
import logging
from util.registry.gzipinputstream import GzipInputStream
from flask import send_file, abort
from data.userfiles import DelegateUserfiles, UserfilesHandlers
JSON_MIMETYPE = 'application/json'
logger = logging.getLogger(__name__)
class LogArchive(object):
def __init__(self, app=None, distributed_storage=None):
self.app = app
if app is not None:
self.state = self.init_app(app, distributed_storage)
else:
self.state = None
def init_app(self, app, distributed_storage):
location = app.config.get('LOG_ARCHIVE_LOCATION')
path = app.config.get('LOG_ARCHIVE_PATH', None)
handler_name = 'web.logarchive'
log_archive = DelegateUserfiles(app, distributed_storage, location, path,
handler_name=handler_name)
# register extension with app
app.extensions = getattr(app, 'extensions', {})
app.extensions['log_archive'] = log_archive
return log_archive
def __getattr__(self, name):
return getattr(self.state, name, None)

453
data/billing.py Normal file
View file

@ -0,0 +1,453 @@
import stripe
from datetime import datetime, timedelta
from calendar import timegm
from util.morecollections import AttrDict
PLANS = [
# Deprecated Plans (2013-2014)
{
'title': 'Micro',
'price': 700,
'privateRepos': 5,
'stripeId': 'micro',
'audience': 'For smaller teams',
'bus_features': False,
'deprecated': True,
'free_trial_days': 14,
'superseded_by': 'personal-30',
'plans_page_hidden': False,
},
{
'title': 'Basic',
'price': 1200,
'privateRepos': 10,
'stripeId': 'small',
'audience': 'For your basic team',
'bus_features': False,
'deprecated': True,
'free_trial_days': 14,
'superseded_by': 'bus-micro-30',
'plans_page_hidden': False,
},
{
'title': 'Yacht',
'price': 5000,
'privateRepos': 20,
'stripeId': 'bus-coreos-trial',
'audience': 'For small businesses',
'bus_features': True,
'deprecated': True,
'free_trial_days': 180,
'superseded_by': 'bus-small-30',
'plans_page_hidden': False,
},
{
'title': 'Personal',
'price': 1200,
'privateRepos': 5,
'stripeId': 'personal',
'audience': 'Individuals',
'bus_features': False,
'deprecated': True,
'free_trial_days': 14,
'superseded_by': 'personal-30',
'plans_page_hidden': False,
},
{
'title': 'Skiff',
'price': 2500,
'privateRepos': 10,
'stripeId': 'bus-micro',
'audience': 'For startups',
'bus_features': True,
'deprecated': True,
'free_trial_days': 14,
'superseded_by': 'bus-micro-30',
'plans_page_hidden': False,
},
{
'title': 'Yacht',
'price': 5000,
'privateRepos': 20,
'stripeId': 'bus-small',
'audience': 'For small businesses',
'bus_features': True,
'deprecated': True,
'free_trial_days': 14,
'superseded_by': 'bus-small-30',
'plans_page_hidden': False,
},
{
'title': 'Freighter',
'price': 10000,
'privateRepos': 50,
'stripeId': 'bus-medium',
'audience': 'For normal businesses',
'bus_features': True,
'deprecated': True,
'free_trial_days': 14,
'superseded_by': 'bus-medium-30',
'plans_page_hidden': False,
},
{
'title': 'Tanker',
'price': 20000,
'privateRepos': 125,
'stripeId': 'bus-large',
'audience': 'For large businesses',
'bus_features': True,
'deprecated': True,
'free_trial_days': 14,
'superseded_by': 'bus-large-30',
'plans_page_hidden': False,
},
# Deprecated plans (2014-2017)
{
'title': 'Personal',
'price': 1200,
'privateRepos': 5,
'stripeId': 'personal-30',
'audience': 'Individuals',
'bus_features': False,
'deprecated': True,
'free_trial_days': 30,
'superseded_by': 'personal-2018',
'plans_page_hidden': False,
},
{
'title': 'Skiff',
'price': 2500,
'privateRepos': 10,
'stripeId': 'bus-micro-30',
'audience': 'For startups',
'bus_features': True,
'deprecated': True,
'free_trial_days': 30,
'superseded_by': 'bus-micro-2018',
'plans_page_hidden': False,
},
{
'title': 'Yacht',
'price': 5000,
'privateRepos': 20,
'stripeId': 'bus-small-30',
'audience': 'For small businesses',
'bus_features': True,
'deprecated': True,
'free_trial_days': 30,
'superseded_by': 'bus-small-2018',
'plans_page_hidden': False,
},
{
'title': 'Freighter',
'price': 10000,
'privateRepos': 50,
'stripeId': 'bus-medium-30',
'audience': 'For normal businesses',
'bus_features': True,
'deprecated': True,
'free_trial_days': 30,
'superseded_by': 'bus-medium-2018',
'plans_page_hidden': False,
},
{
'title': 'Tanker',
'price': 20000,
'privateRepos': 125,
'stripeId': 'bus-large-30',
'audience': 'For large businesses',
'bus_features': True,
'deprecated': True,
'free_trial_days': 30,
'superseded_by': 'bus-large-2018',
'plans_page_hidden': False,
},
{
'title': 'Carrier',
'price': 35000,
'privateRepos': 250,
'stripeId': 'bus-xlarge-30',
'audience': 'For extra large businesses',
'bus_features': True,
'deprecated': True,
'free_trial_days': 30,
'superseded_by': 'bus-xlarge-2018',
'plans_page_hidden': False,
},
{
'title': 'Huge',
'price': 65000,
'privateRepos': 500,
'stripeId': 'bus-500-30',
'audience': 'For huge business',
'bus_features': True,
'deprecated': True,
'free_trial_days': 30,
'superseded_by': 'bus-500-2018',
'plans_page_hidden': False,
},
{
'title': 'Huuge',
'price': 120000,
'privateRepos': 1000,
'stripeId': 'bus-1000-30',
'audience': 'For the SaaS savvy enterprise',
'bus_features': True,
'deprecated': True,
'free_trial_days': 30,
'superseded_by': 'bus-1000-2018',
'plans_page_hidden': False,
},
# Active plans (as of Dec 2017)
{
'title': 'Open Source',
'price': 0,
'privateRepos': 0,
'stripeId': 'free',
'audience': 'Committment to FOSS',
'bus_features': False,
'deprecated': False,
'free_trial_days': 30,
'superseded_by': None,
'plans_page_hidden': False,
},
{
'title': 'Developer',
'price': 1500,
'privateRepos': 5,
'stripeId': 'personal-2018',
'audience': 'Individuals',
'bus_features': False,
'deprecated': False,
'free_trial_days': 30,
'superseded_by': None,
'plans_page_hidden': False,
},
{
'title': 'Micro',
'price': 3000,
'privateRepos': 10,
'stripeId': 'bus-micro-2018',
'audience': 'For startups',
'bus_features': True,
'deprecated': False,
'free_trial_days': 30,
'superseded_by': None,
'plans_page_hidden': False,
},
{
'title': 'Small',
'price': 6000,
'privateRepos': 20,
'stripeId': 'bus-small-2018',
'audience': 'For small businesses',
'bus_features': True,
'deprecated': False,
'free_trial_days': 30,
'superseded_by': None,
'plans_page_hidden': False,
},
{
'title': 'Medium',
'price': 12500,
'privateRepos': 50,
'stripeId': 'bus-medium-2018',
'audience': 'For normal businesses',
'bus_features': True,
'deprecated': False,
'free_trial_days': 30,
'superseded_by': None,
'plans_page_hidden': False,
},
{
'title': 'Large',
'price': 25000,
'privateRepos': 125,
'stripeId': 'bus-large-2018',
'audience': 'For large businesses',
'bus_features': True,
'deprecated': False,
'free_trial_days': 30,
'superseded_by': None,
'plans_page_hidden': False,
},
{
'title': 'Extra Large',
'price': 45000,
'privateRepos': 250,
'stripeId': 'bus-xlarge-2018',
'audience': 'For extra large businesses',
'bus_features': True,
'deprecated': False,
'free_trial_days': 30,
'superseded_by': None,
'plans_page_hidden': False,
},
{
'title': 'XXL',
'price': 85000,
'privateRepos': 500,
'stripeId': 'bus-500-2018',
'audience': 'For huge business',
'bus_features': True,
'deprecated': False,
'free_trial_days': 30,
'superseded_by': None,
'plans_page_hidden': False,
},
{
'title': 'XXXL',
'price': 160000,
'privateRepos': 1000,
'stripeId': 'bus-1000-2018',
'audience': 'For the SaaS savvy enterprise',
'bus_features': True,
'deprecated': False,
'free_trial_days': 30,
'superseded_by': None,
'plans_page_hidden': False,
},
{
'title': 'XXXXL',
'price': 310000,
'privateRepos': 2000,
'stripeId': 'bus-2000-2018',
'audience': 'For the SaaS savvy big enterprise',
'bus_features': True,
'deprecated': False,
'free_trial_days': 30,
'superseded_by': None,
'plans_page_hidden': False,
},
]
def get_plan(plan_id):
""" Returns the plan with the given ID or None if none. """
for plan in PLANS:
if plan['stripeId'] == plan_id:
return plan
return None
class FakeSubscription(AttrDict):
@classmethod
def build(cls, data, customer):
data = AttrDict.deep_copy(data)
data['customer'] = customer
return cls(data)
def delete(self):
self.customer.subscription = None
class FakeStripe(object):
class Customer(AttrDict):
FAKE_PLAN = AttrDict({
'id': 'bus-small',
})
FAKE_SUBSCRIPTION = AttrDict({
'plan': FAKE_PLAN,
'current_period_start': timegm(datetime.utcnow().utctimetuple()),
'current_period_end': timegm((datetime.utcnow() + timedelta(days=30)).utctimetuple()),
'trial_start': timegm(datetime.utcnow().utctimetuple()),
'trial_end': timegm((datetime.utcnow() + timedelta(days=30)).utctimetuple()),
})
FAKE_CARD = AttrDict({
'id': 'card123',
'name': 'Joe User',
'type': 'Visa',
'last4': '4242',
'exp_month': 5,
'exp_year': 2016,
})
FAKE_CARD_LIST = AttrDict({
'data': [FAKE_CARD],
})
ACTIVE_CUSTOMERS = {}
@property
def card(self):
return self.get('new_card', None)
@card.setter
def card(self, card_token):
self['new_card'] = card_token
@property
def plan(self):
return self.get('new_plan', None)
@plan.setter
def plan(self, plan_name):
self['new_plan'] = plan_name
def save(self):
if self.get('new_card', None) is not None:
raise stripe.error.CardError('Test raising exception on set card.', self.get('new_card'), 402)
if self.get('new_plan', None) is not None:
if self.subscription is None:
self.subscription = FakeSubscription.build(self.FAKE_SUBSCRIPTION, self)
self.subscription.plan.id = self.get('new_plan')
@classmethod
def retrieve(cls, stripe_customer_id):
if stripe_customer_id in cls.ACTIVE_CUSTOMERS:
cls.ACTIVE_CUSTOMERS[stripe_customer_id].pop('new_card', None)
cls.ACTIVE_CUSTOMERS[stripe_customer_id].pop('new_plan', None)
return cls.ACTIVE_CUSTOMERS[stripe_customer_id]
else:
new_customer = cls({
'default_card': 'card123',
'cards': AttrDict.deep_copy(cls.FAKE_CARD_LIST),
'id': stripe_customer_id,
})
new_customer.subscription = FakeSubscription.build(cls.FAKE_SUBSCRIPTION, new_customer)
cls.ACTIVE_CUSTOMERS[stripe_customer_id] = new_customer
return new_customer
class Invoice(AttrDict):
@staticmethod
def list(customer, count):
return AttrDict({
'data': [],
})
class Billing(object):
def __init__(self, app=None):
self.app = app
if app is not None:
self.state = self.init_app(app)
else:
self.state = None
def init_app(self, app):
billing_type = app.config.get('BILLING_TYPE', 'FakeStripe')
if billing_type == 'Stripe':
billing = stripe
stripe.api_key = app.config.get('STRIPE_SECRET_KEY', None)
elif billing_type == 'FakeStripe':
billing = FakeStripe
else:
raise RuntimeError('Unknown billing type: %s' % billing_type)
# register extension with app
app.extensions = getattr(app, 'extensions', {})
app.extensions['billing'] = billing
return billing
def __getattr__(self, name):
return getattr(self.state, name, None)

179
data/buildlogs.py Normal file
View file

@ -0,0 +1,179 @@
import redis
import json
import time
from contextlib import closing
from util.dynamic import import_class
from datetime import timedelta
ONE_DAY = timedelta(days=1)
SEVEN_DAYS = timedelta(days=7)
class BuildStatusRetrievalError(Exception):
pass
class RedisBuildLogs(object):
ERROR = 'error'
COMMAND = 'command'
PHASE = 'phase'
def __init__(self, redis_config):
self._redis_client = None
self._redis_config = redis_config
@property
def _redis(self):
if self._redis_client is not None:
return self._redis_client
args = dict(self._redis_config)
args.update({'socket_connect_timeout': 1,
'socket_timeout': 2,
'single_connection_client': True})
self._redis_client = redis.StrictRedis(**args)
return self._redis_client
@staticmethod
def _logs_key(build_id):
return 'builds/%s/logs' % build_id
def append_log_entry(self, build_id, log_obj):
"""
Appends the serialized form of log_obj to the end of the log entry list
and returns the new length of the list.
"""
pipeline = self._redis.pipeline(transaction=False)
pipeline.expire(self._logs_key(build_id), SEVEN_DAYS)
pipeline.rpush(self._logs_key(build_id), json.dumps(log_obj))
result = pipeline.execute()
return result[1]
def append_log_message(self, build_id, log_message, log_type=None, log_data=None):
"""
Wraps the message in an envelope and push it to the end of the log entry
list and returns the index at which it was inserted.
"""
log_obj = {
'message': log_message
}
if log_type:
log_obj['type'] = log_type
if log_data:
log_obj['data'] = log_data
return self.append_log_entry(build_id, log_obj) - 1
def get_log_entries(self, build_id, start_index):
"""
Returns a tuple of the current length of the list and an iterable of the
requested log entries.
"""
try:
llen = self._redis.llen(self._logs_key(build_id))
log_entries = self._redis.lrange(self._logs_key(build_id), start_index, -1)
return (llen, (json.loads(entry) for entry in log_entries))
except redis.RedisError as re:
raise BuildStatusRetrievalError('Cannot retrieve build logs: %s' % re)
def expire_status(self, build_id):
"""
Sets the status entry to expire in 1 day.
"""
self._redis.expire(self._status_key(build_id), ONE_DAY)
def expire_log_entries(self, build_id):
"""
Sets the log entry to expire in 1 day.
"""
self._redis.expire(self._logs_key(build_id), ONE_DAY)
def delete_log_entries(self, build_id):
"""
Delete the log entry
"""
self._redis.delete(self._logs_key(build_id))
@staticmethod
def _status_key(build_id):
return 'builds/%s/status' % build_id
def set_status(self, build_id, status_obj):
"""
Sets the status key for this build to json serialized form of the supplied
obj.
"""
self._redis.set(self._status_key(build_id), json.dumps(status_obj), ex=SEVEN_DAYS)
def get_status(self, build_id):
"""
Loads the status information for the specified build id.
"""
try:
fetched = self._redis.get(self._status_key(build_id))
except redis.RedisError as re:
raise BuildStatusRetrievalError('Cannot retrieve build status: %s' % re)
return json.loads(fetched) if fetched else None
@staticmethod
def _health_key():
return '_health'
def check_health(self):
try:
args = dict(self._redis_config)
args.update({'socket_connect_timeout': 1,
'socket_timeout': 1,
'single_connection_client': True})
with closing(redis.StrictRedis(**args)) as connection:
if not connection.ping():
return (False, 'Could not ping redis')
# Ensure we can write and read a key.
connection.set(self._health_key(), time.time())
connection.get(self._health_key())
return (True, None)
except redis.RedisError as re:
return (False, 'Could not connect to redis: %s' % re.message)
class BuildLogs(object):
def __init__(self, app=None):
self.app = app
if app is not None:
self.state = self.init_app(app)
else:
self.state = None
def init_app(self, app):
buildlogs_config = app.config.get('BUILDLOGS_REDIS')
if not buildlogs_config:
# This is the old key name.
buildlogs_config = {
'host': app.config.get('BUILDLOGS_REDIS_HOSTNAME')
}
buildlogs_options = app.config.get('BUILDLOGS_OPTIONS', [])
buildlogs_import = app.config.get('BUILDLOGS_MODULE_AND_CLASS', None)
if buildlogs_import is None:
klass = RedisBuildLogs
else:
klass = import_class(buildlogs_import[0], buildlogs_import[1])
buildlogs = klass(buildlogs_config, *buildlogs_options)
# register extension with app
app.extensions = getattr(app, 'extensions', {})
app.extensions['buildlogs'] = buildlogs
return buildlogs
def __getattr__(self, name):
return getattr(self.state, name, None)

23
data/cache/__init__.py vendored Normal file
View file

@ -0,0 +1,23 @@
from data.cache.impl import NoopDataModelCache, InMemoryDataModelCache, MemcachedModelCache
def get_model_cache(config):
""" Returns a data model cache matching the given configuration. """
cache_config = config.get('DATA_MODEL_CACHE_CONFIG', {})
engine = cache_config.get('engine', 'noop')
if engine == 'noop':
return NoopDataModelCache()
if engine == 'inmemory':
return InMemoryDataModelCache()
if engine == 'memcached':
endpoint = cache_config.get('endpoint', None)
if endpoint is None:
raise Exception('Missing `endpoint` for memcached model cache configuration')
timeout = cache_config.get('timeout')
connect_timeout = cache_config.get('connect_timeout')
return MemcachedModelCache(endpoint, timeout=timeout, connect_timeout=connect_timeout)
raise Exception('Unknown model cache engine `%s`' % engine)

27
data/cache/cache_key.py vendored Normal file
View file

@ -0,0 +1,27 @@
from collections import namedtuple
class CacheKey(namedtuple('CacheKey', ['key', 'expiration'])):
""" Defines a key into the data model cache. """
pass
def for_repository_blob(namespace_name, repo_name, digest, version):
""" Returns a cache key for a blob in a repository. """
return CacheKey('repo_blob__%s_%s_%s_%s' % (namespace_name, repo_name, digest, version), '60s')
def for_catalog_page(auth_context_key, start_id, limit):
""" Returns a cache key for a single page of a catalog lookup for an authed context. """
params = (auth_context_key or '(anon)', start_id or 0, limit or 0)
return CacheKey('catalog_page__%s_%s_%s' % params, '60s')
def for_namespace_geo_restrictions(namespace_name):
""" Returns a cache key for the geo restrictions for a namespace. """
return CacheKey('geo_restrictions__%s' % (namespace_name), '240s')
def for_active_repo_tags(repository_id, start_pagination_id, limit):
""" Returns a cache key for the active tags in a repository. """
return CacheKey('repo_active_tags__%s_%s_%s' % (repository_id, start_pagination_id, limit),
'120s')

146
data/cache/impl.py vendored Normal file
View file

@ -0,0 +1,146 @@
import logging
import json
from datetime import datetime
from abc import ABCMeta, abstractmethod
from six import add_metaclass
from pymemcache.client.base import Client
from util.expiresdict import ExpiresDict
from util.timedeltastring import convert_to_timedelta
logger = logging.getLogger(__name__)
def is_not_none(value):
return value is not None
@add_metaclass(ABCMeta)
class DataModelCache(object):
""" Defines an interface for cache storing and returning tuple data model objects. """
@abstractmethod
def retrieve(self, cache_key, loader, should_cache=is_not_none):
""" Checks the cache for the specified cache key and returns the value found (if any). If none
found, the loader is called to get a result and populate the cache.
"""
pass
class NoopDataModelCache(DataModelCache):
""" Implementation of the data model cache which does nothing. """
def retrieve(self, cache_key, loader, should_cache=is_not_none):
return loader()
class InMemoryDataModelCache(DataModelCache):
""" Implementation of the data model cache backed by an in-memory dictionary. """
def __init__(self):
self.cache = ExpiresDict()
def retrieve(self, cache_key, loader, should_cache=is_not_none):
not_found = [None]
logger.debug('Checking cache for key %s', cache_key.key)
result = self.cache.get(cache_key.key, default_value=not_found)
if result != not_found:
logger.debug('Found result in cache for key %s: %s', cache_key.key, result)
return json.loads(result)
logger.debug('Found no result in cache for key %s; calling loader', cache_key.key)
result = loader()
logger.debug('Got loaded result for key %s: %s', cache_key.key, result)
if should_cache(result):
logger.debug('Caching loaded result for key %s with expiration %s: %s', cache_key.key,
result, cache_key.expiration)
expires = convert_to_timedelta(cache_key.expiration) + datetime.now()
self.cache.set(cache_key.key, json.dumps(result), expires=expires)
logger.debug('Cached loaded result for key %s with expiration %s: %s', cache_key.key,
result, cache_key.expiration)
else:
logger.debug('Not caching loaded result for key %s: %s', cache_key.key, result)
return result
_DEFAULT_MEMCACHE_TIMEOUT = 1 # second
_DEFAULT_MEMCACHE_CONNECT_TIMEOUT = 1 # second
_STRING_TYPE = 1
_JSON_TYPE = 2
class MemcachedModelCache(DataModelCache):
""" Implementation of the data model cache backed by a memcached. """
def __init__(self, endpoint, timeout=_DEFAULT_MEMCACHE_TIMEOUT,
connect_timeout=_DEFAULT_MEMCACHE_CONNECT_TIMEOUT):
self.endpoint = endpoint
self.timeout = timeout
self.connect_timeout = connect_timeout
self.client = None
def _get_client(self):
client = self.client
if client is not None:
return client
try:
# Copied from the doc comment for Client.
def serialize_json(key, value):
if type(value) == str:
return value, _STRING_TYPE
return json.dumps(value), _JSON_TYPE
def deserialize_json(key, value, flags):
if flags == _STRING_TYPE:
return value
if flags == _JSON_TYPE:
return json.loads(value)
raise Exception("Unknown flags for value: {1}".format(flags))
self.client = Client(self.endpoint, no_delay=True, timeout=self.timeout,
connect_timeout=self.connect_timeout,
key_prefix='data_model_cache__',
serializer=serialize_json,
deserializer=deserialize_json,
ignore_exc=True)
return self.client
except:
logger.exception('Got exception when creating memcached client to %s', self.endpoint)
return None
def retrieve(self, cache_key, loader, should_cache=is_not_none):
not_found = [None]
client = self._get_client()
if client is not None:
logger.debug('Checking cache for key %s', cache_key.key)
try:
result = client.get(cache_key.key, default=not_found)
if result != not_found:
logger.debug('Found result in cache for key %s: %s', cache_key.key, result)
return result
except:
logger.exception('Got exception when trying to retrieve key %s', cache_key.key)
logger.debug('Found no result in cache for key %s; calling loader', cache_key.key)
result = loader()
logger.debug('Got loaded result for key %s: %s', cache_key.key, result)
if client is not None and should_cache(result):
try:
logger.debug('Caching loaded result for key %s with expiration %s: %s', cache_key.key,
result, cache_key.expiration)
expires = convert_to_timedelta(cache_key.expiration) if cache_key.expiration else None
client.set(cache_key.key, result, expire=int(expires.total_seconds()) if expires else None)
logger.debug('Cached loaded result for key %s with expiration %s: %s', cache_key.key,
result, cache_key.expiration)
except:
logger.exception('Got exception when trying to set key %s to %s', cache_key.key, result)
else:
logger.debug('Not caching loaded result for key %s: %s', cache_key.key, result)
return result

56
data/cache/test/test_cache.py vendored Normal file
View file

@ -0,0 +1,56 @@
import pytest
from mock import patch
from data.cache import InMemoryDataModelCache, NoopDataModelCache, MemcachedModelCache
from data.cache.cache_key import CacheKey
class MockClient(object):
def __init__(self, server, **kwargs):
self.data = {}
def get(self, key, default=None):
return self.data.get(key, default)
def set(self, key, value, expire=None):
self.data[key] = value
@pytest.mark.parametrize('cache_type', [
(NoopDataModelCache),
(InMemoryDataModelCache),
])
def test_caching(cache_type):
key = CacheKey('foo', '60m')
cache = cache_type()
# Perform two retrievals, and make sure both return.
assert cache.retrieve(key, lambda: {'a': 1234}) == {'a': 1234}
assert cache.retrieve(key, lambda: {'a': 1234}) == {'a': 1234}
def test_memcache():
key = CacheKey('foo', '60m')
with patch('data.cache.impl.Client', MockClient):
cache = MemcachedModelCache(('127.0.0.1', '-1'))
assert cache.retrieve(key, lambda: {'a': 1234}) == {'a': 1234}
assert cache.retrieve(key, lambda: {'a': 1234}) == {'a': 1234}
def test_memcache_should_cache():
key = CacheKey('foo', None)
def sc(value):
return value['a'] != 1234
with patch('data.cache.impl.Client', MockClient):
cache = MemcachedModelCache(('127.0.0.1', '-1'))
assert cache.retrieve(key, lambda: {'a': 1234}, should_cache=sc) == {'a': 1234}
# Ensure not cached since it was `1234`.
assert cache._get_client().get(key.key) is None
# Ensure cached.
assert cache.retrieve(key, lambda: {'a': 2345}, should_cache=sc) == {'a': 2345}
assert cache._get_client().get(key.key) is not None
assert cache.retrieve(key, lambda: {'a': 2345}, should_cache=sc) == {'a': 2345}

1793
data/database.py Normal file

File diff suppressed because it is too large Load diff

82
data/encryption.py Normal file
View file

@ -0,0 +1,82 @@
import os
import logging
import base64
from collections import namedtuple
from cryptography.hazmat.primitives.ciphers.aead import AESCCM
from util.security.secret import convert_secret_key
class DecryptionFailureException(Exception):
""" Exception raised if a field could not be decrypted. """
EncryptionVersion = namedtuple('EncryptionVersion', ['prefix', 'encrypt', 'decrypt'])
logger = logging.getLogger(__name__)
_SEPARATOR = '$$'
AES_CCM_NONCE_LENGTH = 13
def _encrypt_ccm(secret_key, value, field_max_length=None):
aesccm = AESCCM(secret_key)
nonce = os.urandom(AES_CCM_NONCE_LENGTH)
ct = aesccm.encrypt(nonce, value.encode('utf-8'), None)
encrypted = base64.b64encode(nonce + ct)
if field_max_length:
msg = 'Tried to encode a value too large for this field'
assert (len(encrypted) + _RESERVED_FIELD_SPACE) <= field_max_length, msg
return encrypted
def _decrypt_ccm(secret_key, value):
aesccm = AESCCM(secret_key)
try:
decoded = base64.b64decode(value)
nonce = decoded[:AES_CCM_NONCE_LENGTH]
ct = decoded[AES_CCM_NONCE_LENGTH:]
decrypted = aesccm.decrypt(nonce, ct, None)
return decrypted.decode('utf-8')
except Exception:
logger.exception('Got exception when trying to decrypt value `%s`', value)
raise DecryptionFailureException()
# Defines the versions of encryptions we support. This will allow us to upgrade to newer encryption
# protocols (fairly seamlessly) if need be in the future.
_VERSIONS = {
'v0': EncryptionVersion('v0', _encrypt_ccm, _decrypt_ccm),
}
_RESERVED_FIELD_SPACE = len(_SEPARATOR) + max([len(k) for k in _VERSIONS.keys()])
class FieldEncrypter(object):
""" Helper object for defining how fields are encrypted and decrypted between the database
and the application.
"""
def __init__(self, secret_key, version='v0'):
self._secret_key = convert_secret_key(secret_key)
self._encryption_version = _VERSIONS[version]
def encrypt_value(self, value, field_max_length=None):
""" Encrypts the value using the current version of encryption. """
encrypted_value = self._encryption_version.encrypt(self._secret_key, value, field_max_length)
return '%s%s%s' % (self._encryption_version.prefix, _SEPARATOR, encrypted_value)
def decrypt_value(self, value):
""" Decrypts the value, returning it. If the value cannot be decrypted
raises a DecryptionFailureException.
"""
if _SEPARATOR not in value:
raise DecryptionFailureException('Invalid encrypted value')
version_prefix, data = value.split(_SEPARATOR, 1)
if version_prefix not in _VERSIONS:
raise DecryptionFailureException('Unknown version prefix %s' % version_prefix)
return _VERSIONS[version_prefix].decrypt(self._secret_key, data)

297
data/fields.py Normal file
View file

@ -0,0 +1,297 @@
import base64
import string
import json
from random import SystemRandom
import bcrypt
import resumablehashlib
from peewee import TextField, CharField, SmallIntegerField
from data.text import prefix_search
def random_string(length=16):
random = SystemRandom()
return ''.join([random.choice(string.ascii_uppercase + string.digits)
for _ in range(length)])
class _ResumableSHAField(TextField):
def _create_sha(self):
raise NotImplementedError
def db_value(self, value):
if value is None:
return None
sha_state = value.state()
# One of the fields is a byte string, let's base64 encode it to make sure
# we can store and fetch it regardless of default collocation.
sha_state[3] = base64.b64encode(sha_state[3])
return json.dumps(sha_state)
def python_value(self, value):
if value is None:
return None
sha_state = json.loads(value)
# We need to base64 decode the data bytestring.
sha_state[3] = base64.b64decode(sha_state[3])
to_resume = self._create_sha()
to_resume.set_state(sha_state)
return to_resume
class ResumableSHA256Field(_ResumableSHAField):
def _create_sha(self):
return resumablehashlib.sha256()
class ResumableSHA1Field(_ResumableSHAField):
def _create_sha(self):
return resumablehashlib.sha1()
class JSONField(TextField):
def db_value(self, value):
return json.dumps(value)
def python_value(self, value):
if value is None or value == "":
return {}
return json.loads(value)
class Base64BinaryField(TextField):
def db_value(self, value):
if value is None:
return None
return base64.b64encode(value)
def python_value(self, value):
if value is None:
return None
return base64.b64decode(value)
class DecryptedValue(object):
""" Wrapper around an already decrypted value to be placed into an encrypted field. """
def __init__(self, decrypted_value):
assert decrypted_value is not None
self.value = decrypted_value
def decrypt(self):
return self.value
def matches(self, unencrypted_value):
""" Returns whether the value of this field matches the unencrypted_value. """
return self.decrypt() == unencrypted_value
class LazyEncryptedValue(object):
""" Wrapper around an encrypted value in an encrypted field. Will decrypt lazily. """
def __init__(self, encrypted_value, field):
self.encrypted_value = encrypted_value
self._field = field
def decrypt(self):
""" Decrypts the value. """
return self._field.model._meta.encrypter.decrypt_value(self.encrypted_value)
def matches(self, unencrypted_value):
""" Returns whether the value of this field matches the unencrypted_value. """
return self.decrypt() == unencrypted_value
def __eq__(self, _):
raise Exception('Disallowed operation; use `matches`')
def __mod__(self, _):
raise Exception('Disallowed operation; use `matches`')
def __pow__(self, _):
raise Exception('Disallowed operation; use `matches`')
def __contains__(self, _):
raise Exception('Disallowed operation; use `matches`')
def contains(self, _):
raise Exception('Disallowed operation; use `matches`')
def startswith(self, _):
raise Exception('Disallowed operation; use `matches`')
def endswith(self, _):
raise Exception('Disallowed operation; use `matches`')
def _add_encryption(field_class, requires_length_check=True):
""" Adds support for encryption and decryption to the given field class. """
class indexed_class(field_class):
def __init__(self, default_token_length=None, *args, **kwargs):
def _generate_default():
return DecryptedValue(random_string(default_token_length))
if default_token_length is not None:
kwargs['default'] = _generate_default
field_class.__init__(self, *args, **kwargs)
assert not self.index
def db_value(self, value):
if value is None:
return None
if isinstance(value, LazyEncryptedValue):
return value.encrypted_value
if isinstance(value, DecryptedValue):
value = value.value
meta = self.model._meta
return meta.encrypter.encrypt_value(value, self.max_length if requires_length_check else None)
def python_value(self, value):
if value is None:
return None
return LazyEncryptedValue(value, self)
def __eq__(self, _):
raise Exception('Disallowed operation; use `matches`')
def __mod__(self, _):
raise Exception('Disallowed operation; use `matches`')
def __pow__(self, _):
raise Exception('Disallowed operation; use `matches`')
def __contains__(self, _):
raise Exception('Disallowed operation; use `matches`')
def contains(self, _):
raise Exception('Disallowed operation; use `matches`')
def startswith(self, _):
raise Exception('Disallowed operation; use `matches`')
def endswith(self, _):
raise Exception('Disallowed operation; use `matches`')
return indexed_class
EncryptedCharField = _add_encryption(CharField)
EncryptedTextField = _add_encryption(TextField, requires_length_check=False)
class EnumField(SmallIntegerField):
def __init__(self, enum_type, *args, **kwargs):
kwargs.pop('index', None)
super(EnumField, self).__init__(index=True, *args, **kwargs)
self.enum_type = enum_type
def db_value(self, value):
"""Convert the python value for storage in the database."""
return int(value.value)
def python_value(self, value):
"""Convert the database value to a pythonic value."""
return self.enum_type(value) if value is not None else None
def clone_base(self, **kwargs):
return super(EnumField, self).clone_base(
enum_type=self.enum_type,
**kwargs)
def _add_fulltext(field_class):
""" Adds support for full text indexing and lookup to the given field class. """
class indexed_class(field_class):
# Marker used by SQLAlchemy translation layer to add the proper index for full text searching.
__fulltext__ = True
def __init__(self, match_function, *args, **kwargs):
field_class.__init__(self, *args, **kwargs)
self.match_function = match_function
def match(self, query):
return self.match_function(self, query)
def match_prefix(self, query):
return prefix_search(self, query)
def __mod__(self, _):
raise Exception('Unsafe operation: Use `match` or `match_prefix`')
def __pow__(self, _):
raise Exception('Unsafe operation: Use `match` or `match_prefix`')
def __contains__(self, _):
raise Exception('Unsafe operation: Use `match` or `match_prefix`')
def contains(self, _):
raise Exception('Unsafe operation: Use `match` or `match_prefix`')
def startswith(self, _):
raise Exception('Unsafe operation: Use `match` or `match_prefix`')
def endswith(self, _):
raise Exception('Unsafe operation: Use `match` or `match_prefix`')
return indexed_class
FullIndexedCharField = _add_fulltext(CharField)
FullIndexedTextField = _add_fulltext(TextField)
class Credential(object):
""" Credential represents a hashed credential. """
def __init__(self, hashed):
self.hashed = hashed
def matches(self, value):
""" Returns true if this credential matches the unhashed value given. """
return bcrypt.hashpw(value.encode('utf-8'), self.hashed) == self.hashed
@classmethod
def from_string(cls, string_value):
""" Returns a Credential object from an unhashed string value. """
return Credential(bcrypt.hashpw(string_value.encode('utf-8'), bcrypt.gensalt()))
@classmethod
def generate(cls, length=20):
""" Generates a new credential and returns it, along with its unhashed form. """
token = random_string(length)
return Credential.from_string(token), token
class CredentialField(CharField):
""" A character field that stores crytographically hashed credentials that should never be
available to the user in plaintext after initial creation. This field automatically
provides verification.
"""
def __init__(self, *args, **kwargs):
CharField.__init__(self, *args, **kwargs)
assert 'default' not in kwargs
assert not self.index
def db_value(self, value):
if value is None:
return None
if isinstance(value, basestring):
raise Exception('A string cannot be given to a CredentialField; please wrap in a Credential')
return value.hashed
def python_value(self, value):
if value is None:
return None
return Credential(value)

View file

@ -0,0 +1,64 @@
import logging
from data.logs_model.table_logs_model import TableLogsModel
from data.logs_model.document_logs_model import DocumentLogsModel
from data.logs_model.combined_model import CombinedLogsModel
logger = logging.getLogger(__name__)
def _transition_model(*args, **kwargs):
return CombinedLogsModel(
DocumentLogsModel(*args, **kwargs),
TableLogsModel(*args, **kwargs),
)
_LOG_MODELS = {
'database': TableLogsModel,
'transition_reads_both_writes_es': _transition_model,
'elasticsearch': DocumentLogsModel,
}
_PULL_LOG_KINDS = {'pull_repo', 'repo_verb'}
class LogsModelProxy(object):
def __init__(self):
self._model = None
def initialize(self, model):
self._model = model
logger.info('===============================')
logger.info('Using logs model `%s`', self._model)
logger.info('===============================')
def __getattr__(self, attr):
if not self._model:
raise AttributeError("LogsModelProxy is not initialized")
return getattr(self._model, attr)
logs_model = LogsModelProxy()
def configure(app_config):
logger.debug('Configuring log lodel')
model_name = app_config.get('LOGS_MODEL', 'database')
model_config = app_config.get('LOGS_MODEL_CONFIG', {})
def should_skip_logging(kind_name, namespace_name, is_free_namespace):
if namespace_name and namespace_name in app_config.get('DISABLED_FOR_AUDIT_LOGS', {}):
return True
if kind_name in _PULL_LOG_KINDS:
if namespace_name and namespace_name in app_config.get('DISABLED_FOR_PULL_LOGS', {}):
return True
if app_config.get('FEATURE_DISABLE_PULL_LOGS_FOR_FREE_NAMESPACES'):
if is_free_namespace:
return True
return False
model_config['should_skip_logging'] = should_skip_logging
logs_model.initialize(_LOG_MODELS[model_name](**model_config))

View file

@ -0,0 +1,132 @@
import logging
import itertools
from data.logs_model.datatypes import AggregatedLogCount, LogEntriesPage
from data.logs_model.interface import ActionLogsDataInterface
from data.logs_model.shared import SharedModel
logger = logging.getLogger(__name__)
def _merge_aggregated_log_counts(*args):
""" Merge two lists of AggregatedLogCount based on the value of their kind_id and datetime.
"""
matching_keys = {}
aggregated_log_counts_list = itertools.chain.from_iterable(args)
def canonical_key_from_kind_date_tuple(kind_id, dt):
""" Return a comma separated key from an AggregatedLogCount's kind_id and datetime. """
return str(kind_id) + ',' + str(dt)
for kind_id, count, dt in aggregated_log_counts_list:
kind_date_key = canonical_key_from_kind_date_tuple(kind_id, dt)
if kind_date_key in matching_keys:
existing_count = matching_keys[kind_date_key][2]
matching_keys[kind_date_key] = (kind_id, dt, existing_count + count)
else:
matching_keys[kind_date_key] = (kind_id, dt, count)
return [AggregatedLogCount(kind_id, count, dt) for (kind_id, dt, count) in matching_keys.values()]
class CombinedLogsModel(SharedModel, ActionLogsDataInterface):
"""
CombinedLogsModel implements the data model that logs to the first logs model and reads from
both.
"""
def __init__(self, read_write_logs_model, read_only_logs_model):
self.read_write_logs_model = read_write_logs_model
self.read_only_logs_model = read_only_logs_model
def log_action(self, kind_name, namespace_name=None, performer=None, ip=None, metadata=None,
repository=None, repository_name=None, timestamp=None, is_free_namespace=False):
return self.read_write_logs_model.log_action(kind_name, namespace_name, performer, ip, metadata,
repository, repository_name, timestamp,
is_free_namespace)
def count_repository_actions(self, repository, day):
rw_count = self.read_write_logs_model.count_repository_actions(repository, day)
ro_count = self.read_only_logs_model.count_repository_actions(repository, day)
return rw_count + ro_count
def get_aggregated_log_counts(self, start_datetime, end_datetime, performer_name=None,
repository_name=None, namespace_name=None, filter_kinds=None):
rw_model = self.read_write_logs_model
ro_model = self.read_only_logs_model
rw_count = rw_model.get_aggregated_log_counts(start_datetime, end_datetime,
performer_name=performer_name,
repository_name=repository_name,
namespace_name=namespace_name,
filter_kinds=filter_kinds)
ro_count = ro_model.get_aggregated_log_counts(start_datetime, end_datetime,
performer_name=performer_name,
repository_name=repository_name,
namespace_name=namespace_name,
filter_kinds=filter_kinds)
return _merge_aggregated_log_counts(rw_count, ro_count)
def yield_logs_for_export(self, start_datetime, end_datetime, repository_id=None,
namespace_id=None, max_query_time=None):
rw_model = self.read_write_logs_model
ro_model = self.read_only_logs_model
rw_logs = rw_model.yield_logs_for_export(start_datetime, end_datetime, repository_id,
namespace_id, max_query_time)
ro_logs = ro_model.yield_logs_for_export(start_datetime, end_datetime, repository_id,
namespace_id, max_query_time)
for batch in itertools.chain(rw_logs, ro_logs):
yield batch
def lookup_logs(self, start_datetime, end_datetime, performer_name=None, repository_name=None,
namespace_name=None, filter_kinds=None, page_token=None, max_page_count=None):
rw_model = self.read_write_logs_model
ro_model = self.read_only_logs_model
page_token = page_token or {}
new_page_token = {}
if page_token is None or not page_token.get('under_readonly_model', False):
rw_page_token = page_token.get('readwrite_page_token')
rw_logs = rw_model.lookup_logs(start_datetime, end_datetime, performer_name,
repository_name, namespace_name, filter_kinds,
rw_page_token, max_page_count)
logs, next_page_token = rw_logs
new_page_token['under_readonly_model'] = next_page_token is None
new_page_token['readwrite_page_token'] = next_page_token
return LogEntriesPage(logs, new_page_token)
else:
readonly_page_token = page_token.get('readonly_page_token')
ro_logs = ro_model.lookup_logs(start_datetime, end_datetime, performer_name,
repository_name, namespace_name, filter_kinds,
readonly_page_token, max_page_count)
logs, next_page_token = ro_logs
if next_page_token is None:
return LogEntriesPage(logs, None)
new_page_token['under_readonly_model'] = True
new_page_token['readonly_page_token'] = next_page_token
return LogEntriesPage(logs, new_page_token)
def lookup_latest_logs(self, performer_name=None, repository_name=None, namespace_name=None,
filter_kinds=None, size=20):
latest_logs = []
rw_model = self.read_write_logs_model
ro_model = self.read_only_logs_model
rw_logs = rw_model.lookup_latest_logs(performer_name, repository_name, namespace_name,
filter_kinds, size)
latest_logs.extend(rw_logs)
if len(latest_logs) < size:
ro_logs = ro_model.lookup_latest_logs(performer_name, repository_name, namespace_name,
filter_kinds, size - len(latest_logs))
latest_logs.extend(ro_logs)
return latest_logs
def yield_log_rotation_context(self, cutoff_date, min_logs_per_rotation):
ro_model = self.read_only_logs_model
rw_model = self.read_write_logs_model
ro_ctx = ro_model.yield_log_rotation_context(cutoff_date, min_logs_per_rotation)
rw_ctx = rw_model.yield_log_rotation_context(cutoff_date, min_logs_per_rotation)
for ctx in itertools.chain(ro_ctx, rw_ctx):
yield ctx

View file

@ -0,0 +1,155 @@
import json
from calendar import timegm
from collections import namedtuple
from email.utils import formatdate
from cachetools.func import lru_cache
from data import model
from util.morecollections import AttrDict
def _format_date(date):
""" Output an RFC822 date format. """
if date is None:
return None
return formatdate(timegm(date.utctimetuple()))
@lru_cache(maxsize=1)
def _kinds():
return model.log.get_log_entry_kinds()
class LogEntriesPage(namedtuple('LogEntriesPage', ['logs', 'next_page_token'])):
""" Represents a page returned by the lookup_logs call. The `logs` contains the logs
found for the page and `next_page_token`, if not None, contains the token to be
encoded and returned for the followup call.
"""
class Log(namedtuple('Log', [
'metadata_json', 'ip', 'datetime', 'performer_email', 'performer_username', 'performer_robot',
'account_organization', 'account_username', 'account_email', 'account_robot', 'kind_id'])):
""" Represents a single log entry returned by the logs model. """
@classmethod
def for_logentry(cls, log):
account_organization = None
account_username = None
account_email = None
account_robot = None
try:
account_organization = log.account.organization
account_username = log.account.username
account_email = log.account.email
account_robot = log.account.robot
except AttributeError:
pass
performer_robot = None
performer_username = None
performer_email = None
try:
performer_robot = log.performer.robot
performer_username = log.performer.username
performer_email = log.performer.email
except AttributeError:
pass
return Log(log.metadata_json, log.ip, log.datetime, performer_email, performer_username,
performer_robot, account_organization, account_username, account_email,
account_robot, log.kind_id)
@classmethod
def for_elasticsearch_log(cls, log, id_user_map):
account_organization = None
account_username = None
account_email = None
account_robot = None
try:
if log.account_id:
account = id_user_map[log.account_id]
account_organization = account.organization
account_username = account.username
account_email = account.email
account_robot = account.robot
except AttributeError:
pass
performer_robot = None
performer_username = None
performer_email = None
try:
if log.performer_id:
performer = id_user_map[log.performer_id]
performer_robot = performer.robot
performer_username = performer.username
performer_email = performer.email
except AttributeError:
pass
return Log(log.metadata_json, str(log.ip), log.datetime, performer_email, performer_username,
performer_robot, account_organization, account_username, account_email,
account_robot, log.kind_id)
def to_dict(self, avatar, include_namespace=False):
view = {
'kind': _kinds()[self.kind_id],
'metadata': json.loads(self.metadata_json),
'ip': self.ip,
'datetime': _format_date(self.datetime),
}
if self.performer_username:
performer = AttrDict({'username': self.performer_username, 'email': self.performer_email})
performer.robot = None
if self.performer_robot:
performer.robot = self.performer_robot
view['performer'] = {
'kind': 'user',
'name': self.performer_username,
'is_robot': self.performer_robot,
'avatar': avatar.get_data_for_user(performer),
}
if include_namespace:
if self.account_username:
account = AttrDict({'username': self.account_username, 'email': self.account_email})
if self.account_organization:
view['namespace'] = {
'kind': 'org',
'name': self.account_username,
'avatar': avatar.get_data_for_org(account),
}
else:
account.robot = None
if self.account_robot:
account.robot = self.account_robot
view['namespace'] = {
'kind': 'user',
'name': self.account_username,
'avatar': avatar.get_data_for_user(account),
}
return view
class AggregatedLogCount(namedtuple('AggregatedLogCount', ['kind_id', 'count', 'datetime'])):
""" Represents the aggregated count of the number of logs, of a particular kind, on a day. """
def to_dict(self):
view = {
'kind': _kinds()[self.kind_id],
'count': self.count,
'datetime': _format_date(self.datetime),
}
return view

View file

@ -0,0 +1,532 @@
# pylint: disable=protected-access
import json
import logging
import uuid
from time import time
from datetime import timedelta, datetime, date
from dateutil.parser import parse as parse_datetime
from abc import ABCMeta, abstractmethod
from six import add_metaclass
from elasticsearch.exceptions import ConnectionTimeout, NotFoundError
from data import model
from data.database import CloseForLongOperation
from data.model import config
from data.model.log import (_json_serialize, ACTIONS_ALLOWED_WITHOUT_AUDIT_LOGGING,
DataModelException)
from data.logs_model.elastic_logs import LogEntry, configure_es
from data.logs_model.datatypes import Log, AggregatedLogCount, LogEntriesPage
from data.logs_model.interface import (ActionLogsDataInterface, LogRotationContextInterface,
LogsIterationTimeout)
from data.logs_model.shared import SharedModel, epoch_ms
from data.logs_model.logs_producer import LogProducerProxy, LogSendException
from data.logs_model.logs_producer.kafka_logs_producer import KafkaLogsProducer
from data.logs_model.logs_producer.elasticsearch_logs_producer import ElasticsearchLogsProducer
from data.logs_model.logs_producer.kinesis_stream_logs_producer import KinesisStreamLogsProducer
logger = logging.getLogger(__name__)
PAGE_SIZE = 20
DEFAULT_RESULT_WINDOW = 5000
MAX_RESULT_WINDOW = 10000
# DATE_RANGE_LIMIT is to limit the query date time range to at most 1 month.
DATE_RANGE_LIMIT = 32
# Timeout for count_repository_actions
COUNT_REPOSITORY_ACTION_TIMEOUT = 30
def _date_range_descending(start_datetime, end_datetime, includes_end_datetime=False):
""" Generate the dates between `end_datetime` and `start_datetime`.
If `includes_end_datetime` is set, the generator starts at `end_datetime`,
otherwise, starts the generator at `end_datetime` minus 1 second.
"""
assert end_datetime >= start_datetime
start_date = start_datetime.date()
if includes_end_datetime:
current_date = end_datetime.date()
else:
current_date = (end_datetime - timedelta(seconds=1)).date()
while current_date >= start_date:
yield current_date
current_date = current_date - timedelta(days=1)
def _date_range_in_single_index(dt1, dt2):
""" Determine whether a single index can be searched given a range
of dates or datetimes. If date instances are given, difference should be 1 day.
NOTE: dt2 is exclusive to the search result set.
i.e. The date range is larger or equal to dt1 and strictly smaller than dt2
"""
assert isinstance(dt1, date) and isinstance(dt2, date)
dt = dt2 - dt1
# Check if date or datetime
if not isinstance(dt1, datetime) and not isinstance(dt2, datetime):
return dt == timedelta(days=1)
if dt < timedelta(days=1) and dt >= timedelta(days=0):
return dt2.day == dt1.day
# Check if datetime can be interpreted as a date: hour, minutes, seconds or microseconds set to 0
if dt == timedelta(days=1):
return dt1.hour == 0 and dt1.minute == 0 and dt1.second == 0 and dt1.microsecond == 0
return False
def _for_elasticsearch_logs(logs, repository_id=None, namespace_id=None):
namespace_ids = set()
for log in logs:
namespace_ids.add(log.account_id)
namespace_ids.add(log.performer_id)
assert namespace_id is None or log.account_id == namespace_id
assert repository_id is None or log.repository_id == repository_id
id_user_map = model.user.get_user_map_by_ids(namespace_ids)
return [Log.for_elasticsearch_log(log, id_user_map) for log in logs]
def _random_id():
""" Generates a unique uuid4 string for the random_id field in LogEntry.
It is used as tie-breaker for sorting logs based on datetime:
https://www.elastic.co/guide/en/elasticsearch/reference/current/search-request-search-after.html
"""
return str(uuid.uuid4())
@add_metaclass(ABCMeta)
class ElasticsearchLogsModelInterface(object):
"""
Interface for Elasticsearch specific operations with the logs model.
These operations are usually index based.
"""
@abstractmethod
def can_delete_index(self, index, cutoff_date):
""" Return whether the given index is older than the given cutoff date. """
@abstractmethod
def list_indices(self):
""" List the logs model's indices. """
class DocumentLogsModel(SharedModel, ActionLogsDataInterface, ElasticsearchLogsModelInterface):
"""
DocumentLogsModel implements the data model for the logs API backed by an
elasticsearch service.
"""
def __init__(self, should_skip_logging=None, elasticsearch_config=None, producer=None, **kwargs):
self._should_skip_logging = should_skip_logging
self._logs_producer = LogProducerProxy()
self._es_client = configure_es(**elasticsearch_config)
if producer == 'kafka':
kafka_config = kwargs['kafka_config']
self._logs_producer.initialize(KafkaLogsProducer(**kafka_config))
elif producer == 'elasticsearch':
self._logs_producer.initialize(ElasticsearchLogsProducer())
elif producer == 'kinesis_stream':
kinesis_stream_config = kwargs['kinesis_stream_config']
self._logs_producer.initialize(KinesisStreamLogsProducer(**kinesis_stream_config))
else:
raise Exception('Invalid log producer: %s' % producer)
@staticmethod
def _get_ids_by_names(repository_name, namespace_name, performer_name):
""" Retrieve repository/namespace/performer ids based on their names.
throws DataModelException when the namespace_name does not match any
user in the database.
returns database ID or None if not exists.
"""
repository_id = None
account_id = None
performer_id = None
if repository_name and namespace_name:
repository = model.repository.get_repository(namespace_name, repository_name)
if repository:
repository_id = repository.id
account_id = repository.namespace_user.id
if namespace_name and account_id is None:
account = model.user.get_user_or_org(namespace_name)
if account is None:
raise DataModelException('Invalid namespace requested')
account_id = account.id
if performer_name:
performer = model.user.get_user(performer_name)
if performer:
performer_id = performer.id
return repository_id, account_id, performer_id
def _base_query(self, performer_id=None, repository_id=None, account_id=None, filter_kinds=None,
index=None):
if filter_kinds is not None:
assert all(isinstance(kind_name, str) for kind_name in filter_kinds)
if index is not None:
search = LogEntry.search(index=index)
else:
search = LogEntry.search()
if performer_id is not None:
assert isinstance(performer_id, int)
search = search.filter('term', performer_id=performer_id)
if repository_id is not None:
assert isinstance(repository_id, int)
search = search.filter('term', repository_id=repository_id)
if account_id is not None and repository_id is None:
assert isinstance(account_id, int)
search = search.filter('term', account_id=account_id)
if filter_kinds is not None:
kind_map = model.log.get_log_entry_kinds()
ignore_ids = [kind_map[kind_name] for kind_name in filter_kinds]
search = search.exclude('terms', kind_id=ignore_ids)
return search
def _base_query_date_range(self, start_datetime, end_datetime, performer_id, repository_id,
account_id, filter_kinds, index=None):
skip_datetime_check = False
if _date_range_in_single_index(start_datetime, end_datetime):
index = self._es_client.index_name(start_datetime)
skip_datetime_check = self._es_client.index_exists(index)
if index and (skip_datetime_check or self._es_client.index_exists(index)):
search = self._base_query(performer_id, repository_id, account_id, filter_kinds,
index=index)
else:
search = self._base_query(performer_id, repository_id, account_id, filter_kinds)
if not skip_datetime_check:
search = search.query('range', datetime={'gte': start_datetime, 'lt': end_datetime})
return search
def _load_logs_for_day(self, logs_date, performer_id, repository_id, account_id, filter_kinds,
after_datetime=None, after_random_id=None, size=PAGE_SIZE):
index = self._es_client.index_name(logs_date)
if not self._es_client.index_exists(index):
return []
search = self._base_query(performer_id, repository_id, account_id, filter_kinds,
index=index)
search = search.sort({'datetime': 'desc'}, {'random_id.keyword': 'desc'})
search = search.extra(size=size)
if after_datetime is not None and after_random_id is not None:
after_datetime_epoch_ms = epoch_ms(after_datetime)
search = search.extra(search_after=[after_datetime_epoch_ms, after_random_id])
return search.execute()
def _load_latest_logs(self, performer_id, repository_id, account_id, filter_kinds, size):
""" Return the latest logs from Elasticsearch.
Look at indices up to theset logrotateworker threshold, or up to 30 days if not defined.
"""
# Set the last index to check to be the logrotateworker threshold, or 30 days
end_datetime = datetime.now()
start_datetime = end_datetime - timedelta(days=DATE_RANGE_LIMIT)
latest_logs = []
for day in _date_range_descending(start_datetime, end_datetime, includes_end_datetime=True):
try:
logs = self._load_logs_for_day(day, performer_id, repository_id, account_id, filter_kinds,
size=size)
latest_logs.extend(logs)
except NotFoundError:
continue
if len(latest_logs) >= size:
break
return _for_elasticsearch_logs(latest_logs[:size], repository_id, account_id)
def lookup_logs(self, start_datetime, end_datetime, performer_name=None, repository_name=None,
namespace_name=None, filter_kinds=None, page_token=None, max_page_count=None):
assert start_datetime is not None and end_datetime is not None
# Check for a valid combined model token when migrating online from a combined model
if page_token is not None and page_token.get('readwrite_page_token') is not None:
page_token = page_token.get('readwrite_page_token')
if page_token is not None and max_page_count is not None:
page_number = page_token.get('page_number')
if page_number is not None and page_number + 1 > max_page_count:
return LogEntriesPage([], None)
repository_id, account_id, performer_id = DocumentLogsModel._get_ids_by_names(
repository_name, namespace_name, performer_name)
after_datetime = None
after_random_id = None
if page_token is not None:
after_datetime = parse_datetime(page_token['datetime'])
after_random_id = page_token['random_id']
if after_datetime is not None:
end_datetime = min(end_datetime, after_datetime)
all_logs = []
with CloseForLongOperation(config.app_config):
for current_date in _date_range_descending(start_datetime, end_datetime):
try:
logs = self._load_logs_for_day(current_date, performer_id, repository_id, account_id,
filter_kinds, after_datetime, after_random_id,
size=PAGE_SIZE+1)
all_logs.extend(logs)
except NotFoundError:
continue
if len(all_logs) > PAGE_SIZE:
break
next_page_token = None
all_logs = all_logs[0:PAGE_SIZE+1]
if len(all_logs) == PAGE_SIZE + 1:
# The last element in the response is used to check if there's more elements.
# The second element in the response is used as the pagination token because search_after does
# not include the exact match, and so the next page will start with the last element.
# This keeps the behavior exactly the same as table_logs_model, so that
# the caller can expect when a pagination token is non-empty, there must be
# at least 1 log to be retrieved.
next_page_token = {
'datetime': all_logs[-2].datetime.isoformat(),
'random_id': all_logs[-2].random_id,
'page_number': page_token['page_number'] + 1 if page_token else 1,
}
return LogEntriesPage(_for_elasticsearch_logs(all_logs[:PAGE_SIZE], repository_id, account_id),
next_page_token)
def lookup_latest_logs(self, performer_name=None, repository_name=None, namespace_name=None,
filter_kinds=None, size=20):
repository_id, account_id, performer_id = DocumentLogsModel._get_ids_by_names(
repository_name, namespace_name, performer_name)
with CloseForLongOperation(config.app_config):
latest_logs = self._load_latest_logs(performer_id, repository_id, account_id, filter_kinds,
size)
return latest_logs
def get_aggregated_log_counts(self, start_datetime, end_datetime, performer_name=None,
repository_name=None, namespace_name=None, filter_kinds=None):
if end_datetime - start_datetime >= timedelta(days=DATE_RANGE_LIMIT):
raise Exception('Cannot lookup aggregated logs over a period longer than a month')
repository_id, account_id, performer_id = DocumentLogsModel._get_ids_by_names(
repository_name, namespace_name, performer_name)
with CloseForLongOperation(config.app_config):
search = self._base_query_date_range(start_datetime, end_datetime, performer_id,
repository_id, account_id, filter_kinds)
search.aggs.bucket('by_id', 'terms', field='kind_id').bucket('by_date', 'date_histogram',
field='datetime', interval='day')
# es returns all buckets when size=0
search = search.extra(size=0)
resp = search.execute()
if not resp.aggregations:
return []
counts = []
by_id = resp.aggregations['by_id']
for id_bucket in by_id.buckets:
for date_bucket in id_bucket.by_date.buckets:
if date_bucket.doc_count > 0:
counts.append(AggregatedLogCount(id_bucket.key, date_bucket.doc_count, date_bucket.key))
return counts
def count_repository_actions(self, repository, day):
index = self._es_client.index_name(day)
search = self._base_query_date_range(day, day + timedelta(days=1),
None,
repository.id,
None,
None,
index=index)
search = search.params(request_timeout=COUNT_REPOSITORY_ACTION_TIMEOUT)
try:
return search.count()
except NotFoundError:
return 0
def log_action(self, kind_name, namespace_name=None, performer=None, ip=None, metadata=None,
repository=None, repository_name=None, timestamp=None, is_free_namespace=False):
if self._should_skip_logging and self._should_skip_logging(kind_name, namespace_name,
is_free_namespace):
return
if repository_name is not None:
assert repository is None
assert namespace_name is not None
repository = model.repository.get_repository(namespace_name, repository_name)
if timestamp is None:
timestamp = datetime.today()
account_id = None
performer_id = None
repository_id = None
if namespace_name is not None:
account_id = model.user.get_namespace_user(namespace_name).id
if performer is not None:
performer_id = performer.id
if repository is not None:
repository_id = repository.id
metadata_json = json.dumps(metadata or {}, default=_json_serialize)
kind_id = model.log._get_log_entry_kind(kind_name)
log = LogEntry(random_id=_random_id(), kind_id=kind_id, account_id=account_id,
performer_id=performer_id, ip=ip, metadata_json=metadata_json,
repository_id=repository_id, datetime=timestamp)
try:
self._logs_producer.send(log)
except LogSendException as lse:
strict_logging_disabled = config.app_config.get('ALLOW_PULLS_WITHOUT_STRICT_LOGGING')
logger.exception('log_action failed', extra=({'exception': lse}).update(log.to_dict()))
if not (strict_logging_disabled and kind_name in ACTIONS_ALLOWED_WITHOUT_AUDIT_LOGGING):
raise
def yield_logs_for_export(self, start_datetime, end_datetime, repository_id=None,
namespace_id=None, max_query_time=None):
max_query_time = max_query_time.total_seconds() if max_query_time is not None else 300
search = self._base_query_date_range(start_datetime, end_datetime, None, repository_id,
namespace_id, None)
def raise_on_timeout(batch_generator):
start = time()
for batch in batch_generator:
elapsed = time() - start
if elapsed > max_query_time:
logger.error('Retrieval of logs `%s/%s` timed out with time of `%s`', namespace_id,
repository_id, elapsed)
raise LogsIterationTimeout()
yield batch
start = time()
def read_batch(scroll):
batch = []
for log in scroll:
batch.append(log)
if len(batch) == DEFAULT_RESULT_WINDOW:
yield _for_elasticsearch_logs(batch, repository_id=repository_id,
namespace_id=namespace_id)
batch = []
if batch:
yield _for_elasticsearch_logs(batch, repository_id=repository_id, namespace_id=namespace_id)
search = search.params(size=DEFAULT_RESULT_WINDOW, request_timeout=max_query_time)
try:
with CloseForLongOperation(config.app_config):
for batch in raise_on_timeout(read_batch(search.scan())):
yield batch
except ConnectionTimeout:
raise LogsIterationTimeout()
def can_delete_index(self, index, cutoff_date):
return self._es_client.can_delete_index(index, cutoff_date)
def list_indices(self):
return self._es_client.list_indices()
def yield_log_rotation_context(self, cutoff_date, min_logs_per_rotation):
""" Yield a context manager for a group of outdated logs. """
all_indices = self.list_indices()
for index in all_indices:
if not self.can_delete_index(index, cutoff_date):
continue
context = ElasticsearchLogRotationContext(index, min_logs_per_rotation, self._es_client)
yield context
class ElasticsearchLogRotationContext(LogRotationContextInterface):
"""
ElasticsearchLogRotationContext yield batch of logs from an index.
When completed without exceptions, this context will delete its associated
Elasticsearch index.
"""
def __init__(self, index, min_logs_per_rotation, es_client):
self._es_client = es_client
self.min_logs_per_rotation = min_logs_per_rotation
self.index = index
self.start_pos = 0
self.end_pos = 0
self.scroll = None
def __enter__(self):
search = self._base_query()
self.scroll = search.scan()
return self
def __exit__(self, ex_type, ex_value, ex_traceback):
if ex_type is None and ex_value is None and ex_traceback is None:
logger.debug('Deleting index %s', self.index)
self._es_client.delete_index(self.index)
def yield_logs_batch(self):
def batched_logs(gen, size):
batch = []
for log in gen:
batch.append(log)
if len(batch) == size:
yield batch
batch = []
if batch:
yield batch
for batch in batched_logs(self.scroll, self.min_logs_per_rotation):
self.end_pos = self.start_pos + len(batch) - 1
yield batch, self._generate_filename()
self.start_pos = self.end_pos + 1
def _base_query(self):
search = LogEntry.search(index=self.index)
return search
def _generate_filename(self):
""" Generate the filenames used to archive the action logs. """
filename = '%s_%d-%d' % (self.index, self.start_pos, self.end_pos)
filename = '.'.join((filename, 'txt.gz'))
return filename

View file

@ -0,0 +1,255 @@
import os
import logging
import re
from datetime import datetime, timedelta
from requests_aws4auth import AWS4Auth
from elasticsearch import RequestsHttpConnection
from elasticsearch.exceptions import NotFoundError, AuthorizationException
from elasticsearch_dsl import Index, Document, Integer, Date, Text, Ip, Keyword
from elasticsearch_dsl.connections import connections
logger = logging.getLogger(__name__)
# Name of the connection used for Elasticearch's template API
ELASTICSEARCH_TEMPLATE_CONNECTION_ALIAS = 'logentry_template'
# Prefix of autogenerated indices
INDEX_NAME_PREFIX = 'logentry_'
# Time-based index date format
INDEX_DATE_FORMAT = '%Y-%m-%d'
# Timeout for default connection
ELASTICSEARCH_DEFAULT_CONNECTION_TIMEOUT = 15
# Timeout for template api Connection
ELASTICSEARCH_TEMPLATE_CONNECTION_TIMEOUT = 60
# Force an index template update
ELASTICSEARCH_FORCE_INDEX_TEMPLATE_UPDATE = os.environ.get('FORCE_INDEX_TEMPLATE_UPDATE', '')
# Valid index prefix pattern
VALID_INDEX_PATTERN = r'^((?!\.$|\.\.$|[-_+])([^A-Z:\/*?\"<>|,# ]){1,255})$'
class LogEntry(Document):
# random_id is the tie-breaker for sorting in pagination.
# random_id is also used for deduplication of records when using a "at-least-once" delivery stream.
# Reference: https://www.elastic.co/guide/en/elasticsearch/reference/current/search-request-search-after.html
#
# We use don't use the _id of a document since a `doc_values` is not build for this field:
# An on-disk data structure that stores the same data in a columnar format
# for optimized sorting and aggregations.
# Reference: https://github.com/elastic/elasticsearch/issues/35369
random_id = Text(fields={'keyword': Keyword()})
kind_id = Integer()
account_id = Integer()
performer_id = Integer()
repository_id = Integer()
ip = Ip()
metadata_json = Text()
datetime = Date()
_initialized = False
@classmethod
def init(cls, index_prefix, index_settings=None, skip_template_init=False):
"""
Create the index template, and populate LogEntry's mapping and index settings.
"""
wildcard_index = Index(name=index_prefix + '*')
wildcard_index.settings(**(index_settings or {}))
wildcard_index.document(cls)
cls._index = wildcard_index
cls._index_prefix = index_prefix
if not skip_template_init:
cls.create_or_update_template()
# Since the elasticsearch-dsl API requires the document's index being defined as an inner class at the class level,
# this function needs to be called first before being able to call `save`.
cls._initialized = True
@classmethod
def create_or_update_template(cls):
assert cls._index and cls._index_prefix
index_template = cls._index.as_template(cls._index_prefix)
index_template.save(using=ELASTICSEARCH_TEMPLATE_CONNECTION_ALIAS)
def save(self, **kwargs):
# We group the logs based on year, month and day as different indexes, so that
# dropping those indexes based on retention range is easy.
#
# NOTE: This is only used if logging directly to Elasticsearch
# When using Kinesis or Kafka, the consumer of these streams
# will be responsible for the management of the indices' lifecycle.
assert LogEntry._initialized
kwargs['index'] = self.datetime.strftime(self._index_prefix + INDEX_DATE_FORMAT)
return super(LogEntry, self).save(**kwargs)
class ElasticsearchLogs(object):
"""
Model for logs operations stored in an Elasticsearch cluster.
"""
def __init__(self, host=None, port=None, access_key=None, secret_key=None, aws_region=None,
index_settings=None, use_ssl=True, index_prefix=INDEX_NAME_PREFIX):
# For options in index_settings, refer to:
# https://www.elastic.co/guide/en/elasticsearch/guide/master/_index_settings.html
# some index settings are set at index creation time, and therefore, you should NOT
# change those settings once the index is set.
self._host = host
self._port = port
self._access_key = access_key
self._secret_key = secret_key
self._aws_region = aws_region
self._index_prefix = index_prefix
self._index_settings = index_settings
self._use_ssl = use_ssl
self._client = None
self._initialized = False
def _initialize(self):
"""
Initialize a connection to an ES cluster and
creates an index template if it does not exist.
"""
if not self._initialized:
http_auth = None
if self._access_key and self._secret_key and self._aws_region:
http_auth = AWS4Auth(self._access_key, self._secret_key, self._aws_region, 'es')
elif self._access_key and self._secret_key:
http_auth = (self._access_key, self._secret_key)
else:
logger.warn("Connecting to Elasticsearch without HTTP auth")
self._client = connections.create_connection(
hosts=[{
'host': self._host,
'port': self._port
}],
http_auth=http_auth,
use_ssl=self._use_ssl,
verify_certs=True,
connection_class=RequestsHttpConnection,
timeout=ELASTICSEARCH_DEFAULT_CONNECTION_TIMEOUT,
)
# Create a second connection with a timeout of 60s vs 10s.
# For some reason the PUT template API can take anywhere between
# 10s and 30s on the test cluster.
# This only needs to be done once to initialize the index template
connections.create_connection(
alias=ELASTICSEARCH_TEMPLATE_CONNECTION_ALIAS,
hosts=[{
'host': self._host,
'port': self._port
}],
http_auth=http_auth,
use_ssl=self._use_ssl,
verify_certs=True,
connection_class=RequestsHttpConnection,
timeout=ELASTICSEARCH_TEMPLATE_CONNECTION_TIMEOUT,
)
try:
force_template_update = ELASTICSEARCH_FORCE_INDEX_TEMPLATE_UPDATE.lower() == 'true'
self._client.indices.get_template(self._index_prefix)
LogEntry.init(self._index_prefix, self._index_settings,
skip_template_init=not force_template_update)
except NotFoundError:
LogEntry.init(self._index_prefix, self._index_settings, skip_template_init=False)
finally:
try:
connections.remove_connection(ELASTICSEARCH_TEMPLATE_CONNECTION_ALIAS)
except KeyError as ke:
logger.exception('Elasticsearch connection not found to remove %s: %s',
ELASTICSEARCH_TEMPLATE_CONNECTION_ALIAS, ke)
self._initialized = True
def index_name(self, day):
""" Return an index name for the given day. """
return self._index_prefix + day.strftime(INDEX_DATE_FORMAT)
def index_exists(self, index):
try:
return index in self._client.indices.get(index)
except NotFoundError:
return False
@staticmethod
def _valid_index_prefix(prefix):
""" Check that the given index prefix is valid with the set of
indices used by this class.
"""
return re.match(VALID_INDEX_PATTERN, prefix) is not None
def _valid_index_name(self, index):
""" Check that the given index name is valid and follows the format:
<index_prefix>YYYY-MM-DD
"""
if not ElasticsearchLogs._valid_index_prefix(index):
return False
if not index.startswith(self._index_prefix) or len(index) > 255:
return False
index_dt_str = index.split(self._index_prefix, 1)[-1]
try:
datetime.strptime(index_dt_str, INDEX_DATE_FORMAT)
return True
except ValueError:
logger.exception('Invalid date format (YYYY-MM-DD) for index: %s', index)
return False
def can_delete_index(self, index, cutoff_date):
""" Check if the given index can be deleted based on the given index's date and cutoff date. """
assert self._valid_index_name(index)
index_dt = datetime.strptime(index[len(self._index_prefix):], INDEX_DATE_FORMAT)
return index_dt < cutoff_date and cutoff_date - index_dt >= timedelta(days=1)
def list_indices(self):
self._initialize()
try:
return self._client.indices.get(self._index_prefix + '*').keys()
except NotFoundError as nfe:
logger.exception('`%s` indices not found: %s', self._index_prefix, nfe.info)
return []
except AuthorizationException as ae:
logger.exception('Unauthorized for indices `%s`: %s', self._index_prefix, ae.info)
return None
def delete_index(self, index):
self._initialize()
assert self._valid_index_name(index)
try:
self._client.indices.delete(index)
return index
except NotFoundError as nfe:
logger.exception('`%s` indices not found: %s', index, nfe.info)
return None
except AuthorizationException as ae:
logger.exception('Unauthorized to delete index `%s`: %s', index, ae.info)
return None
def configure_es(host, port, access_key=None, secret_key=None, aws_region=None,
index_prefix=None, use_ssl=True, index_settings=None):
"""
For options in index_settings, refer to:
https://www.elastic.co/guide/en/elasticsearch/guide/master/_index_settings.html
some index settings are set at index creation time, and therefore, you should NOT
change those settings once the index is set.
"""
es_client = ElasticsearchLogs(host=host, port=port, access_key=access_key, secret_key=secret_key,
aws_region=aws_region, index_prefix=index_prefix or INDEX_NAME_PREFIX,
use_ssl=use_ssl, index_settings=index_settings)
es_client._initialize()
return es_client

View file

@ -0,0 +1,244 @@
import logging
import json
from collections import namedtuple
from datetime import datetime
from tzlocal import get_localzone
from dateutil.relativedelta import relativedelta
from data import model
from data.logs_model.datatypes import AggregatedLogCount, LogEntriesPage, Log
from data.logs_model.interface import (ActionLogsDataInterface, LogRotationContextInterface,
LogsIterationTimeout)
logger = logging.getLogger(__name__)
LogAndRepository = namedtuple('LogAndRepository', ['log', 'stored_log', 'repository'])
StoredLog = namedtuple('StoredLog', ['kind_id',
'account_id',
'performer_id',
'ip',
'metadata_json',
'repository_id',
'datetime'])
class InMemoryModel(ActionLogsDataInterface):
"""
InMemoryModel implements the data model for logs in-memory. FOR TESTING ONLY.
"""
def __init__(self):
self.logs = []
def _filter_logs(self, start_datetime, end_datetime, performer_name=None, repository_name=None,
namespace_name=None, filter_kinds=None):
if filter_kinds is not None:
assert all(isinstance(kind_name, str) for kind_name in filter_kinds)
for log_and_repo in self.logs:
if log_and_repo.log.datetime < start_datetime or log_and_repo.log.datetime > end_datetime:
continue
if performer_name and log_and_repo.log.performer_username != performer_name:
continue
if (repository_name and
(not log_and_repo.repository or log_and_repo.repository.name != repository_name)):
continue
if namespace_name and log_and_repo.log.account_username != namespace_name:
continue
if filter_kinds:
kind_map = model.log.get_log_entry_kinds()
ignore_ids = [kind_map[kind_name] for kind_name in filter_kinds]
if log_and_repo.log.kind_id in ignore_ids:
continue
yield log_and_repo
def _filter_latest_logs(self, performer_name=None, repository_name=None,
namespace_name=None, filter_kinds=None):
if filter_kinds is not None:
assert all(isinstance(kind_name, str) for kind_name in filter_kinds)
for log_and_repo in sorted(self.logs, key=lambda t: t.log.datetime, reverse=True):
if performer_name and log_and_repo.log.performer_username != performer_name:
continue
if (repository_name and
(not log_and_repo.repository or log_and_repo.repository.name != repository_name)):
continue
if namespace_name and log_and_repo.log.account_username != namespace_name:
continue
if filter_kinds:
kind_map = model.log.get_log_entry_kinds()
ignore_ids = [kind_map[kind_name] for kind_name in filter_kinds]
if log_and_repo.log.kind_id in ignore_ids:
continue
yield log_and_repo
def lookup_logs(self, start_datetime, end_datetime, performer_name=None, repository_name=None,
namespace_name=None, filter_kinds=None, page_token=None, max_page_count=None):
logs = []
for log_and_repo in self._filter_logs(start_datetime, end_datetime, performer_name,
repository_name, namespace_name, filter_kinds):
logs.append(log_and_repo.log)
return LogEntriesPage(logs, None)
def lookup_latest_logs(self, performer_name=None, repository_name=None, namespace_name=None,
filter_kinds=None, size=20):
latest_logs = []
for log_and_repo in self._filter_latest_logs(performer_name, repository_name, namespace_name,
filter_kinds):
if size is not None and len(latest_logs) == size:
break
latest_logs.append(log_and_repo.log)
return latest_logs
def get_aggregated_log_counts(self, start_datetime, end_datetime, performer_name=None,
repository_name=None, namespace_name=None, filter_kinds=None):
entries = {}
for log_and_repo in self._filter_logs(start_datetime, end_datetime, performer_name,
repository_name, namespace_name, filter_kinds):
entry = log_and_repo.log
synthetic_date = datetime(start_datetime.year, start_datetime.month, int(entry.datetime.day),
tzinfo=get_localzone())
if synthetic_date.day < start_datetime.day:
synthetic_date = synthetic_date + relativedelta(months=1)
key = '%s-%s' % (entry.kind_id, entry.datetime.day)
if key in entries:
entries[key] = AggregatedLogCount(entry.kind_id, entries[key].count + 1,
synthetic_date)
else:
entries[key] = AggregatedLogCount(entry.kind_id, 1, synthetic_date)
return entries.values()
def count_repository_actions(self, repository, day):
count = 0
for log_and_repo in self.logs:
if log_and_repo.repository != repository:
continue
if log_and_repo.log.datetime.day != day.day:
continue
count += 1
return count
def queue_logs_export(self, start_datetime, end_datetime, export_action_logs_queue,
namespace_name=None, repository_name=None, callback_url=None,
callback_email=None, filter_kinds=None):
raise NotImplementedError
def log_action(self, kind_name, namespace_name=None, performer=None, ip=None, metadata=None,
repository=None, repository_name=None, timestamp=None, is_free_namespace=False):
timestamp = timestamp or datetime.today()
if not repository and repository_name and namespace_name:
repository = model.repository.get_repository(namespace_name, repository_name)
account = None
account_id = None
performer_id = None
repository_id = None
if namespace_name is not None:
account = model.user.get_namespace_user(namespace_name)
account_id = account.id
if performer is not None:
performer_id = performer.id
if repository is not None:
repository_id = repository.id
metadata_json = json.dumps(metadata or {})
kind_id = model.log.get_log_entry_kinds()[kind_name]
stored_log = StoredLog(
kind_id,
account_id,
performer_id,
ip,
metadata_json,
repository_id,
timestamp
)
log = Log(metadata_json=metadata,
ip=ip,
datetime=timestamp,
performer_email=performer.email if performer else None,
performer_username=performer.username if performer else None,
performer_robot=performer.robot if performer else None,
account_organization=account.organization if account else None,
account_username=account.username if account else None,
account_email=account.email if account else None,
account_robot=account.robot if account else None,
kind_id=kind_id)
self.logs.append(LogAndRepository(log, stored_log, repository))
def yield_logs_for_export(self, start_datetime, end_datetime, repository_id=None,
namespace_id=None, max_query_time=None):
# Just for testing.
if max_query_time is not None:
raise LogsIterationTimeout()
logs = []
for log_and_repo in self._filter_logs(start_datetime, end_datetime):
if (repository_id and
(not log_and_repo.repository or log_and_repo.repository.id != repository_id)):
continue
if namespace_id:
if log_and_repo.log.account_username is None:
continue
namespace = model.user.get_namespace_user(log_and_repo.log.account_username)
if namespace.id != namespace_id:
continue
logs.append(log_and_repo.log)
yield logs
def yield_log_rotation_context(self, cutoff_date, min_logs_per_rotation):
expired_logs = [log_and_repo for log_and_repo in self.logs
if log_and_repo.log.datetime <= cutoff_date]
while True:
if not expired_logs:
break
context = InMemoryLogRotationContext(expired_logs[:min_logs_per_rotation], self.logs)
expired_logs = expired_logs[min_logs_per_rotation:]
yield context
class InMemoryLogRotationContext(LogRotationContextInterface):
def __init__(self, expired_logs, all_logs):
self.expired_logs = expired_logs
self.all_logs = all_logs
def __enter__(self):
return self
def __exit__(self, ex_type, ex_value, ex_traceback):
if ex_type is None and ex_value is None and ex_traceback is None:
for log in self.expired_logs:
self.all_logs.remove(log)
def yield_logs_batch(self):
""" Yield a batch of logs and a filename for that batch. """
filename = 'inmemory_model_filename_placeholder'
filename = '.'.join((filename, 'txt.gz'))
yield [log_and_repo.stored_log for log_and_repo in self.expired_logs], filename

View file

@ -0,0 +1,95 @@
from abc import ABCMeta, abstractmethod
from six import add_metaclass
class LogsIterationTimeout(Exception):
""" Exception raised if logs iteration times out. """
@add_metaclass(ABCMeta)
class ActionLogsDataInterface(object):
""" Interface for code to work with the logs data model. The logs data model consists
of all access for reading and writing action logs.
"""
@abstractmethod
def lookup_logs(self, start_datetime, end_datetime, performer_name=None, repository_name=None,
namespace_name=None, filter_kinds=None, page_token=None, max_page_count=None):
""" Looks up all logs between the start_datetime and end_datetime, filtered
by performer (a user), repository or namespace. Note that one (and only one) of the three
can be specified. Returns a LogEntriesPage. `filter_kinds`, if specified, is a set/list
of the kinds of logs to filter out.
"""
@abstractmethod
def lookup_latest_logs(self, performer_name=None, repository_name=None, namespace_name=None,
filter_kinds=None, size=20):
""" Looks up latest logs of a specific kind, filtered by performer (a user),
repository or namespace. Note that one (and only one) of the three can be specified.
Returns a list of `Log`.
"""
@abstractmethod
def get_aggregated_log_counts(self, start_datetime, end_datetime, performer_name=None,
repository_name=None, namespace_name=None, filter_kinds=None):
""" Returns the aggregated count of logs, by kind, between the start_datetime and end_datetime,
filtered by performer (a user), repository or namespace. Note that one (and only one) of
the three can be specified. Returns a list of AggregatedLogCount.
"""
@abstractmethod
def count_repository_actions(self, repository, day):
""" Returns the total number of repository actions over the given day, in the given repository
or None on error.
"""
@abstractmethod
def queue_logs_export(self, start_datetime, end_datetime, export_action_logs_queue,
namespace_name=None, repository_name=None, callback_url=None,
callback_email=None, filter_kinds=None):
""" Queues logs between the start_datetime and end_time, filtered by a repository or namespace,
for export to the specified URL and/or email address. Returns the ID of the export job
queued or None if error.
"""
@abstractmethod
def log_action(self, kind_name, namespace_name=None, performer=None, ip=None, metadata=None,
repository=None, repository_name=None, timestamp=None, is_free_namespace=False):
""" Logs a single action as having taken place. """
@abstractmethod
def yield_logs_for_export(self, start_datetime, end_datetime, repository_id=None,
namespace_id=None, max_query_time=None):
""" Returns an iterator that yields bundles of all logs found between the start_datetime and
end_datetime, optionally filtered by the repository or namespace. This function should be
used for any bulk lookup operations, and should be implemented by implementors to put
minimal strain on the backing storage for large operations. If there was an error in setting
up, returns None.
If max_query_time is specified, each iteration that yields a log bundle will have its
queries run with a maximum timeout of that specified, and, if any exceed that threshold,
LogsIterationTimeout will be raised instead of returning the logs bundle.
"""
@abstractmethod
def yield_log_rotation_context(self, cutoff_date, min_logs_per_rotation):
"""
A generator that yields contexts implementing the LogRotationContextInterface.
Each context represents a set of logs to be archived and deleted once
the context completes without exceptions.
For database logs, the LogRotationContext abstracts over a set of rows. When the context
finishes, its associated rows get deleted.
For Elasticsearch logs, the LogRotationContext abstracts over indices. When the context
finishes, its associated index gets deleted.
"""
@add_metaclass(ABCMeta)
class LogRotationContextInterface(object):
""" Interface for iterating over a set of logs to be archived. """
@abstractmethod
def yield_logs_batch(self):
"""
Generator yielding batch of logs and a filename for that batch.
A batch is a subset of the logs part of the context.
"""

View file

@ -0,0 +1,27 @@
import logging
logger = logging.getLogger(__name__)
class LogSendException(Exception):
""" A generic error when sending the logs to its destination.
e.g. Kinesis, Kafka, Elasticsearch, ...
"""
pass
class LogProducerProxy(object):
def __init__(self):
self._model = None
def initialize(self, model):
self._model = model
logger.info('===============================')
logger.info('Using producer `%s`', self._model)
logger.info('===============================')
def __getattr__(self, attr):
if not self._model:
raise AttributeError("LogsModelProxy is not initialized")
return getattr(self._model, attr)

View file

@ -0,0 +1,25 @@
import logging
from elasticsearch.exceptions import ElasticsearchException
from data.logs_model.logs_producer.interface import LogProducerInterface
from data.logs_model.logs_producer import LogSendException
logger = logging.getLogger(__name__)
class ElasticsearchLogsProducer(LogProducerInterface):
""" Log producer writing log entries to Elasticsearch.
This implementation writes directly to Elasticsearch without a streaming/queueing service.
"""
def send(self, logentry):
try:
logentry.save()
except ElasticsearchException as ex:
logger.exception('ElasticsearchLogsProducer error sending log to Elasticsearch: %s', ex)
raise LogSendException('ElasticsearchLogsProducer error sending log to Elasticsearch: %s' % ex)
except Exception as e:
logger.exception('ElasticsearchLogsProducer exception sending log to Elasticsearch: %s', e)
raise LogSendException('ElasticsearchLogsProducer exception sending log to Elasticsearch: %s' % e)

View file

@ -0,0 +1,8 @@
from abc import ABCMeta, abstractmethod
from six import add_metaclass
@add_metaclass(ABCMeta)
class LogProducerInterface(object):
@abstractmethod
def send(self, logentry):
""" Send a log entry to the configured log infrastructure. """

View file

@ -0,0 +1,45 @@
import logging
from kafka.errors import KafkaError, KafkaTimeoutError
from kafka import KafkaProducer
from data.logs_model.shared import epoch_ms
from data.logs_model.logs_producer.interface import LogProducerInterface
from data.logs_model.logs_producer.util import logs_json_serializer
from data.logs_model.logs_producer import LogSendException
logger = logging.getLogger(__name__)
DEFAULT_MAX_BLOCK_SECONDS = 5
class KafkaLogsProducer(LogProducerInterface):
""" Log producer writing log entries to a Kafka stream. """
def __init__(self, bootstrap_servers=None, topic=None, client_id=None, max_block_seconds=None):
self.bootstrap_servers = bootstrap_servers
self.topic = topic
self.client_id = client_id
self.max_block_ms = (max_block_seconds or DEFAULT_MAX_BLOCK_SECONDS) * 1000
self._producer = KafkaProducer(bootstrap_servers=self.bootstrap_servers,
client_id=self.client_id,
max_block_ms=self.max_block_ms,
value_serializer=logs_json_serializer)
def send(self, logentry):
try:
# send() has a (max_block_ms) timeout and get() has a (max_block_ms) timeout
# for an upper bound of 2x(max_block_ms) before guaranteed delivery
future = self._producer.send(self.topic, logentry.to_dict(), timestamp_ms=epoch_ms(logentry.datetime))
record_metadata = future.get(timeout=self.max_block_ms)
assert future.succeeded
except KafkaTimeoutError as kte:
logger.exception('KafkaLogsProducer timeout sending log to Kafka: %s', kte)
raise LogSendException('KafkaLogsProducer timeout sending log to Kafka: %s' % kte)
except KafkaError as ke:
logger.exception('KafkaLogsProducer error sending log to Kafka: %s', ke)
raise LogSendException('KafkaLogsProducer error sending log to Kafka: %s' % ke)
except Exception as e:
logger.exception('KafkaLogsProducer exception sending log to Kafka: %s', e)
raise LogSendException('KafkaLogsProducer exception sending log to Kafka: %s' % e)

View file

@ -0,0 +1,75 @@
import logging
import hashlib
import random
import boto3
from botocore.exceptions import ClientError
from botocore.client import Config
from data.logs_model.logs_producer.interface import LogProducerInterface
from data.logs_model.logs_producer.util import logs_json_serializer
from data.logs_model.logs_producer import LogSendException
logger = logging.getLogger(__name__)
KINESIS_PARTITION_KEY_PREFIX = 'logentry_partition_key_'
DEFAULT_CONNECT_TIMEOUT = 5
DEFAULT_READ_TIMEOUT = 5
MAX_RETRY_ATTEMPTS = 5
DEFAULT_MAX_POOL_CONNECTIONS = 10
def _partition_key(number_of_shards=None):
""" Generate a partition key for AWS Kinesis stream.
If the number of shards is specified, generate keys where the size of the key space is
the number of shards.
"""
key = None
if number_of_shards is not None:
shard_number = random.randrange(0, number_of_shards)
key = hashlib.sha1(KINESIS_PARTITION_KEY_PREFIX + str(shard_number)).hexdigest()
else:
key = hashlib.sha1(KINESIS_PARTITION_KEY_PREFIX + str(random.getrandbits(256))).hexdigest()
return key
class KinesisStreamLogsProducer(LogProducerInterface):
""" Log producer writing log entries to an Amazon Kinesis Data Stream. """
def __init__(self, stream_name, aws_region, aws_access_key=None, aws_secret_key=None,
connect_timeout=None, read_timeout=None, max_retries=None,
max_pool_connections=None):
self._stream_name = stream_name
self._aws_region = aws_region
self._aws_access_key = aws_access_key
self._aws_secret_key = aws_secret_key
self._connect_timeout = connect_timeout or DEFAULT_CONNECT_TIMEOUT
self._read_timeout = read_timeout or DEFAULT_READ_TIMEOUT
self._max_retries = max_retries or MAX_RETRY_ATTEMPTS
self._max_pool_connections=max_pool_connections or DEFAULT_MAX_POOL_CONNECTIONS
client_config = Config(connect_timeout=self._connect_timeout,
read_timeout=self._read_timeout ,
retries={'max_attempts': self._max_retries},
max_pool_connections=self._max_pool_connections)
self._producer = boto3.client('kinesis', use_ssl=True,
region_name=self._aws_region,
aws_access_key_id=self._aws_access_key,
aws_secret_access_key=self._aws_secret_key,
config=client_config)
def send(self, logentry):
try:
data = logs_json_serializer(logentry)
self._producer.put_record(
StreamName=self._stream_name,
Data=data,
PartitionKey=_partition_key()
)
except ClientError as ce:
logger.exception('KinesisStreamLogsProducer client error sending log to Kinesis: %s', ce)
raise LogSendException('KinesisStreamLogsProducer client error sending log to Kinesis: %s' % ce)
except Exception as e:
logger.exception('KinesisStreamLogsProducer exception sending log to Kinesis: %s', e)
raise LogSendException('KinesisStreamLogsProducer exception sending log to Kinesis: %s' % e)

View file

@ -0,0 +1,45 @@
# -*- coding: utf-8 -*-
import logging
import json
from datetime import datetime
import pytest
from data.logs_model.logs_producer.util import logs_json_serializer
from data.logs_model.elastic_logs import LogEntry
logger = logging.getLogger(__name__)
TEST_DATETIME = datetime.utcnow()
TEST_JSON_STRING = '{"a": "b", "c": "d"}'
TEST_JSON_STRING_WITH_UNICODE = u'{"éëê": "îôû"}'
VALID_LOGENTRY = LogEntry(random_id='123-45', ip='0.0.0.0', metadata_json=TEST_JSON_STRING, datetime=TEST_DATETIME)
VALID_LOGENTRY_WITH_UNICODE = LogEntry(random_id='123-45', ip='0.0.0.0', metadata_json=TEST_JSON_STRING_WITH_UNICODE, datetime=TEST_DATETIME)
VALID_LOGENTRY_EXPECTED_OUTPUT = '{"datetime": "%s", "ip": "0.0.0.0", "metadata_json": "{\\"a\\": \\"b\\", \\"c\\": \\"d\\"}", "random_id": "123-45"}' % TEST_DATETIME.isoformat()
VALID_LOGENTRY_WITH_UNICODE_EXPECTED_OUTPUT = '{"datetime": "%s", "ip": "0.0.0.0", "metadata_json": "{\\"\\u00e9\\u00eb\\u00ea\\": \\"\\u00ee\\u00f4\\u00fb\\"}", "random_id": "123-45"}' % TEST_DATETIME.isoformat()
@pytest.mark.parametrize(
'is_valid, given_input, expected_output',
[
# Valid inputs
pytest.param(True, VALID_LOGENTRY, VALID_LOGENTRY_EXPECTED_OUTPUT),
# With unicode
pytest.param(True, VALID_LOGENTRY_WITH_UNICODE, VALID_LOGENTRY_WITH_UNICODE_EXPECTED_OUTPUT),
])
def test_logs_json_serializer(is_valid, given_input, expected_output):
if not is_valid:
with pytest.raises(ValueError) as ve:
data = logs_json_serializer(given_input)
else:
data = logs_json_serializer(given_input, sort_keys=True)
assert data == expected_output
# Make sure the datetime was serialized in the correct ISO8601
datetime_str = json.loads(data)['datetime']
assert datetime_str == TEST_DATETIME.isoformat()

View file

@ -0,0 +1,15 @@
import json
from datetime import datetime
class LogEntryJSONEncoder(json.JSONEncoder):
""" JSON encoder to encode datetimes to ISO8601 format. """
def default(self, obj):
if isinstance(obj, datetime):
return obj.isoformat()
return super(LogEntryJSONEncoder, self).default(obj)
def logs_json_serializer(logentry, sort_keys=False):
""" Serializes a LogEntry to json bytes. """
return json.dumps(logentry.to_dict(), cls=LogEntryJSONEncoder,
ensure_ascii=True, sort_keys=sort_keys).encode('ascii')

53
data/logs_model/shared.py Normal file
View file

@ -0,0 +1,53 @@
import uuid
import json
from calendar import timegm
from data import model
class SharedModel:
def queue_logs_export(self, start_datetime, end_datetime, export_action_logs_queue,
namespace_name=None, repository_name=None, callback_url=None,
callback_email=None, filter_kinds=None):
""" Queues logs between the start_datetime and end_time, filtered by a repository or namespace,
for export to the specified URL and/or email address. Returns the ID of the export job
queued or None if error.
"""
export_id = str(uuid.uuid4())
namespace = model.user.get_namespace_user(namespace_name)
if namespace is None:
return None
repository = None
if repository_name is not None:
repository = model.repository.get_repository(namespace_name, repository_name)
if repository is None:
return None
export_action_logs_queue.put([namespace_name],
json.dumps({
'export_id': export_id,
'repository_id': repository.id if repository else None,
'namespace_id': namespace.id,
'namespace_name': namespace.username,
'repository_name': repository.name if repository else None,
'start_time': start_datetime.strftime('%m/%d/%Y'),
'end_time': end_datetime.strftime('%m/%d/%Y'),
'callback_url': callback_url,
'callback_email': callback_email,
}), retries_remaining=3)
return export_id
def epoch_ms(dt):
return (timegm(dt.timetuple()) * 1000) + (dt.microsecond / 1000)
def get_kinds_filter(kinds):
""" Given a list of kinds, return the set of kinds not that are not part of that list.
i.e Returns the list of kinds to be filtered out. """
kind_map = model.log.get_log_entry_kinds()
kind_map = {key: kind_map[key] for key in kind_map if not isinstance(key, int)}
return [kind_name for kind_name in kind_map if kind_name not in kinds]

View file

@ -0,0 +1,291 @@
# pylint: disable=protected-access
import logging
from datetime import datetime, timedelta
from tzlocal import get_localzone
from dateutil.relativedelta import relativedelta
from data import model
from data.model import config
from data.database import LogEntry, LogEntry2, LogEntry3, UseThenDisconnect
from data.logs_model.interface import ActionLogsDataInterface, LogsIterationTimeout, \
LogRotationContextInterface
from data.logs_model.datatypes import Log, AggregatedLogCount, LogEntriesPage
from data.logs_model.shared import SharedModel
from data.model.log import get_stale_logs, get_stale_logs_start_id, delete_stale_logs
logger = logging.getLogger(__name__)
MINIMUM_RANGE_SIZE = 1 # second
MAXIMUM_RANGE_SIZE = 60 * 60 * 24 * 30 # seconds ~= 1 month
EXPECTED_ITERATION_LOG_COUNT = 1000
LOG_MODELS = [LogEntry3, LogEntry2, LogEntry]
class TableLogsModel(SharedModel, ActionLogsDataInterface):
"""
TableLogsModel implements the data model for the logs API backed by a single table
in the database.
"""
def __init__(self, should_skip_logging=None, **kwargs):
self._should_skip_logging = should_skip_logging
def lookup_logs(self, start_datetime, end_datetime, performer_name=None, repository_name=None,
namespace_name=None, filter_kinds=None, page_token=None, max_page_count=None):
if filter_kinds is not None:
assert all(isinstance(kind_name, str) for kind_name in filter_kinds)
assert start_datetime is not None
assert end_datetime is not None
repository = None
if repository_name and namespace_name:
repository = model.repository.get_repository(namespace_name, repository_name)
assert repository
performer = None
if performer_name:
performer = model.user.get_user(performer_name)
assert performer
def get_logs(m, page_token):
logs_query = model.log.get_logs_query(start_datetime, end_datetime, performer=performer,
repository=repository, namespace=namespace_name,
ignore=filter_kinds, model=m)
logs, next_page_token = model.modelutil.paginate(logs_query, m,
descending=True,
page_token=page_token,
limit=20,
max_page=max_page_count,
sort_field_name='datetime')
return logs, next_page_token
TOKEN_TABLE_ID = 'tti'
table_index = 0
logs = []
next_page_token = page_token or None
# Skip empty pages (empty table)
while len(logs) == 0 and table_index < len(LOG_MODELS) - 1:
table_specified = next_page_token is not None and next_page_token.get(TOKEN_TABLE_ID) is not None
if table_specified:
table_index = next_page_token.get(TOKEN_TABLE_ID)
logs_result, next_page_token = get_logs(LOG_MODELS[table_index], next_page_token)
logs.extend(logs_result)
if next_page_token is None and table_index < len(LOG_MODELS) - 1:
next_page_token = {TOKEN_TABLE_ID: table_index + 1}
return LogEntriesPage([Log.for_logentry(log) for log in logs], next_page_token)
def lookup_latest_logs(self, performer_name=None, repository_name=None, namespace_name=None,
filter_kinds=None, size=20):
if filter_kinds is not None:
assert all(isinstance(kind_name, str) for kind_name in filter_kinds)
repository = None
if repository_name and namespace_name:
repository = model.repository.get_repository(namespace_name, repository_name)
assert repository
performer = None
if performer_name:
performer = model.user.get_user(performer_name)
assert performer
def get_latest_logs(m):
logs_query = model.log.get_latest_logs_query(performer=performer, repository=repository,
namespace=namespace_name, ignore=filter_kinds,
model=m, size=size)
logs = list(logs_query)
return [Log.for_logentry(log) for log in logs]
return get_latest_logs(LOG_MODELS[0])
def get_aggregated_log_counts(self, start_datetime, end_datetime, performer_name=None,
repository_name=None, namespace_name=None, filter_kinds=None):
if filter_kinds is not None:
assert all(isinstance(kind_name, str) for kind_name in filter_kinds)
if end_datetime - start_datetime >= timedelta(weeks=4):
raise Exception('Cannot lookup aggregated logs over a period longer than a month')
repository = None
if repository_name and namespace_name:
repository = model.repository.get_repository(namespace_name, repository_name)
performer = None
if performer_name:
performer = model.user.get_user(performer_name)
entries = {}
for log_model in LOG_MODELS:
aggregated = model.log.get_aggregated_logs(start_datetime, end_datetime,
performer=performer,
repository=repository,
namespace=namespace_name,
ignore=filter_kinds,
model=log_model)
for entry in aggregated:
synthetic_date = datetime(start_datetime.year, start_datetime.month, int(entry.day),
tzinfo=get_localzone())
if synthetic_date.day < start_datetime.day:
synthetic_date = synthetic_date + relativedelta(months=1)
key = '%s-%s' % (entry.kind_id, entry.day)
if key in entries:
entries[key] = AggregatedLogCount(entry.kind_id, entry.count + entries[key].count,
synthetic_date)
else:
entries[key] = AggregatedLogCount(entry.kind_id, entry.count, synthetic_date)
return entries.values()
def count_repository_actions(self, repository, day):
return model.repositoryactioncount.count_repository_actions(repository, day)
def log_action(self, kind_name, namespace_name=None, performer=None, ip=None, metadata=None,
repository=None, repository_name=None, timestamp=None, is_free_namespace=False):
if self._should_skip_logging and self._should_skip_logging(kind_name, namespace_name,
is_free_namespace):
return
if repository_name is not None:
assert repository is None
assert namespace_name is not None
repository = model.repository.get_repository(namespace_name, repository_name)
model.log.log_action(kind_name, namespace_name, performer=performer, repository=repository,
ip=ip, metadata=metadata or {}, timestamp=timestamp)
def yield_logs_for_export(self, start_datetime, end_datetime, repository_id=None,
namespace_id=None, max_query_time=None):
# Using an adjusting scale, start downloading log rows in batches, starting at
# MINIMUM_RANGE_SIZE and doubling until we've reached EXPECTED_ITERATION_LOG_COUNT or
# the lookup range has reached MAXIMUM_RANGE_SIZE. If at any point this operation takes
# longer than the MAXIMUM_WORK_PERIOD_SECONDS, terminate the batch operation as timed out.
batch_start_time = datetime.utcnow()
current_start_datetime = start_datetime
current_batch_size = timedelta(seconds=MINIMUM_RANGE_SIZE)
while current_start_datetime < end_datetime:
# Verify we haven't been working for too long.
work_elapsed = datetime.utcnow() - batch_start_time
if max_query_time is not None and work_elapsed > max_query_time:
logger.error('Retrieval of logs `%s/%s` timed out with time of `%s`',
namespace_id, repository_id, work_elapsed)
raise LogsIterationTimeout()
current_end_datetime = current_start_datetime + current_batch_size
current_end_datetime = min(current_end_datetime, end_datetime)
# Load the next set of logs.
def load_logs():
logger.debug('Retrieving logs over range %s -> %s with namespace %s and repository %s',
current_start_datetime, current_end_datetime, namespace_id, repository_id)
logs_query = model.log.get_logs_query(namespace=namespace_id,
repository=repository_id,
start_time=current_start_datetime,
end_time=current_end_datetime)
logs = list(logs_query)
for log in logs:
if namespace_id is not None:
assert log.account_id == namespace_id
if repository_id is not None:
assert log.repository_id == repository_id
logs = [Log.for_logentry(log) for log in logs]
return logs
logs, elapsed = _run_and_time(load_logs)
if max_query_time is not None and elapsed > max_query_time:
logger.error('Retrieval of logs for export `%s/%s` with range `%s-%s` timed out at `%s`',
namespace_id, repository_id, current_start_datetime, current_end_datetime,
elapsed)
raise LogsIterationTimeout()
yield logs
# Move forward.
current_start_datetime = current_end_datetime
# Increase the batch size if necessary.
if len(logs) < EXPECTED_ITERATION_LOG_COUNT:
seconds = min(MAXIMUM_RANGE_SIZE, current_batch_size.total_seconds() * 2)
current_batch_size = timedelta(seconds=seconds)
def yield_log_rotation_context(self, cutoff_date, min_logs_per_rotation):
""" Yield a context manager for a group of outdated logs. """
for log_model in LOG_MODELS:
while True:
with UseThenDisconnect(config.app_config):
start_id = get_stale_logs_start_id(log_model)
if start_id is None:
logger.warning('Failed to find start id')
break
logger.debug('Found starting ID %s', start_id)
lookup_end_id = start_id + min_logs_per_rotation
logs = [log for log in get_stale_logs(start_id, lookup_end_id,
log_model, cutoff_date)]
if not logs:
logger.debug('No further logs found')
break
end_id = max([log.id for log in logs])
context = DatabaseLogRotationContext(logs, log_model, start_id, end_id)
yield context
def _run_and_time(fn):
start_time = datetime.utcnow()
result = fn()
return result, datetime.utcnow() - start_time
table_logs_model = TableLogsModel()
class DatabaseLogRotationContext(LogRotationContextInterface):
"""
DatabaseLogRotationContext represents a batch of logs to be archived together.
i.e A set of logs to be archived in the same file (based on the number of logs per rotation).
When completed without exceptions, this context will delete the stale logs
from rows `start_id` to `end_id`.
"""
def __init__(self, logs, log_model, start_id, end_id):
self.logs = logs
self.log_model = log_model
self.start_id = start_id
self.end_id = end_id
def __enter__(self):
return self
def __exit__(self, ex_type, ex_value, ex_traceback):
if ex_type is None and ex_value is None and ex_traceback is None:
with UseThenDisconnect(config.app_config):
logger.debug('Deleting logs from IDs %s to %s', self.start_id, self.end_id)
delete_stale_logs(self.start_id, self.end_id, self.log_model)
def yield_logs_batch(self):
""" Yield a batch of logs and a filename for that batch. """
filename = '%d-%d-%s.txt.gz' % (self.start_id, self.end_id,
self.log_model.__name__.lower())
yield self.logs, filename

View file

View file

@ -0,0 +1,390 @@
import json
import uuid
import fnmatch
from collections import defaultdict
from contextlib import contextmanager
from datetime import datetime
import dateutil.parser
from httmock import urlmatch, HTTMock
FAKE_ES_HOST = 'fakees'
EMPTY_RESULT = {
'hits': {'hits': [], 'total': 0},
'_shards': {'successful': 1, 'total': 1},
}
def parse_query(query):
if not query:
return {}
return {s.split('=')[0]: s.split('=')[1] for s in query.split("&")}
@contextmanager
def fake_elasticsearch(allow_wildcard=True):
templates = {}
docs = defaultdict(list)
scrolls = {}
id_counter = [1]
def transform(value, field_name):
# TODO: implement this using a real index template if we ever need more than a few
# fields here.
if field_name == 'datetime':
if isinstance(value, int):
return datetime.utcfromtimestamp(value / 1000)
parsed = dateutil.parser.parse(value)
return parsed
return value
@urlmatch(netloc=FAKE_ES_HOST, path=r'/_template/(.+)', method='GET')
def get_template(url, request):
template_name = url[len('/_template/'):]
if template_name in templates:
return {'status_code': 200}
return {'status_code': 404}
@urlmatch(netloc=FAKE_ES_HOST, path=r'/_template/(.+)', method='PUT')
def put_template(url, request):
template_name = url[len('/_template/'):]
templates[template_name] = True
return {'status_code': 201}
@urlmatch(netloc=FAKE_ES_HOST, path=r'/([^/]+)/_doc', method='POST')
def post_doc(url, request):
index_name, _ = url.path[1:].split('/')
item = json.loads(request.body)
item['_id'] = item['random_id']
id_counter[0] += 1
docs[index_name].append(item)
return {
'status_code': 204,
'headers': {
'Content-Type': 'application/json',
},
'content': json.dumps({
"result": "created",
}),
}
@urlmatch(netloc=FAKE_ES_HOST, path=r'/([^/]+)$', method='DELETE')
def index_delete(url, request):
index_name_or_pattern = url.path[1:]
to_delete = []
for index_name in docs.keys():
if not fnmatch.fnmatch(index_name, index_name_or_pattern):
continue
to_delete.append(index_name)
for index in to_delete:
docs.pop(index)
return {
'status_code': 200,
'headers': {
'Content-Type': 'application/json',
},
'content': {'acknowledged': True}
}
@urlmatch(netloc=FAKE_ES_HOST, path=r'/([^/]+)$', method='GET')
def index_lookup(url, request):
index_name_or_pattern = url.path[1:]
found = {}
for index_name in docs.keys():
if not fnmatch.fnmatch(index_name, index_name_or_pattern):
continue
found[index_name] = {}
if not found:
return {
'status_code': 404,
}
return {
'status_code': 200,
'headers': {
'Content-Type': 'application/json',
},
'content': json.dumps(found),
}
def _match_query(index_name_or_pattern, query):
found = []
found_index = False
for index_name in docs.keys():
if not allow_wildcard and index_name_or_pattern.find('*') >= 0:
break
if not fnmatch.fnmatch(index_name, index_name_or_pattern):
continue
found_index = True
def _is_match(doc, current_query):
if current_query is None:
return True
for filter_type, filter_params in current_query.iteritems():
for field_name, filter_props in filter_params.iteritems():
if filter_type == 'range':
lt = transform(filter_props['lt'], field_name)
gte = transform(filter_props['gte'], field_name)
doc_value = transform(doc[field_name], field_name)
if not (doc_value < lt and doc_value >= gte):
return False
elif filter_type == 'term':
doc_value = transform(doc[field_name], field_name)
return doc_value == filter_props
elif filter_type == 'terms':
doc_value = transform(doc[field_name], field_name)
return doc_value in filter_props
elif filter_type == 'bool':
assert not 'should' in filter_params, 'should is unsupported'
must = filter_params.get('must')
must_not = filter_params.get('must_not')
filter_bool = filter_params.get('filter')
if must:
for check in must:
if not _is_match(doc, check):
return False
if must_not:
for check in must_not:
if _is_match(doc, check):
return False
if filter_bool:
for check in filter_bool:
if not _is_match(doc, check):
return False
else:
raise Exception('Unimplemented query %s: %s' % (filter_type, query))
return True
for doc in docs[index_name]:
if not _is_match(doc, query):
continue
found.append({'_source': doc, '_index': index_name})
return found, found_index or (index_name_or_pattern.find('*') >= 0)
@urlmatch(netloc=FAKE_ES_HOST, path=r'/([^/]+)/_count$', method='GET')
def count_docs(url, request):
request = json.loads(request.body)
index_name_or_pattern, _ = url.path[1:].split('/')
found, found_index = _match_query(index_name_or_pattern, request['query'])
if not found_index:
return {
'status_code': 404,
}
return {
'status_code': 200,
'headers': {
'Content-Type': 'application/json',
},
'content': json.dumps({'count': len(found)}),
}
@urlmatch(netloc=FAKE_ES_HOST, path=r'/_search/scroll$', method='GET')
def lookup_scroll(url, request):
request_obj = json.loads(request.body)
scroll_id = request_obj['scroll_id']
if scroll_id in scrolls:
return {
'status_code': 200,
'headers': {
'Content-Type': 'application/json',
},
'content': json.dumps(scrolls[scroll_id]),
}
return {
'status_code': 404,
}
@urlmatch(netloc=FAKE_ES_HOST, path=r'/_search/scroll$', method='DELETE')
def delete_scroll(url, request):
request = json.loads(request.body)
for scroll_id in request['scroll_id']:
scrolls.pop(scroll_id, None)
return {
'status_code': 404,
}
@urlmatch(netloc=FAKE_ES_HOST, path=r'/([^/]+)/_search$', method='GET')
def lookup_docs(url, request):
query_params = parse_query(url.query)
request = json.loads(request.body)
index_name_or_pattern, _ = url.path[1:].split('/')
# Find matching docs.
query = request.get('query')
found, found_index = _match_query(index_name_or_pattern, query)
if not found_index:
return {
'status_code': 404,
}
# Sort.
sort = request.get('sort')
if sort:
if sort == ['_doc'] or sort == '_doc':
found.sort(key=lambda x: x['_source']['_id'])
else:
def get_sort_key(item):
source = item['_source']
key = ''
for sort_config in sort:
for sort_key, direction in sort_config.iteritems():
assert direction == 'desc'
sort_key = sort_key.replace('.keyword', '')
key += str(transform(source[sort_key], sort_key))
key += '|'
return key
found.sort(key=get_sort_key, reverse=True)
# Search after.
search_after = request.get('search_after')
if search_after:
sort_fields = []
for sort_config in sort:
if isinstance(sort_config, unicode):
sort_fields.append(sort_config)
continue
for sort_key, _ in sort_config.iteritems():
sort_key = sort_key.replace('.keyword', '')
sort_fields.append(sort_key)
for index, search_after_value in enumerate(search_after):
field_name = sort_fields[index]
value = transform(search_after_value, field_name)
if field_name == '_doc':
found = [f for f in found if transform(f['_source']['_id'], field_name) > value]
else:
found = [f for f in found if transform(f['_source'][field_name], field_name) < value]
if len(found) < 2:
break
if field_name == '_doc':
if found[0]['_source']['_id'] != found[1]['_source']:
break
else:
if found[0]['_source'][field_name] != found[1]['_source']:
break
# Size.
size = request.get('size')
if size:
found = found[0:size]
# Aggregation.
# {u'query':
# {u'range':
# {u'datetime': {u'lt': u'2019-06-27T15:45:09.768085',
# u'gte': u'2019-06-27T15:35:09.768085'}}},
# u'aggs': {
# u'by_id': {
# u'terms': {u'field': u'kind_id'},
# u'aggs': {
# u'by_date': {u'date_histogram': {u'field': u'datetime', u'interval': u'day'}}}}},
# u'size': 0}
def _by_field(agg_field_params, results):
aggregated_by_field = defaultdict(list)
for agg_means, agg_means_params in agg_field_params.iteritems():
if agg_means == 'terms':
field_name = agg_means_params['field']
for result in results:
value = result['_source'][field_name]
aggregated_by_field[value].append(result)
elif agg_means == 'date_histogram':
field_name = agg_means_params['field']
interval = agg_means_params['interval']
for result in results:
value = transform(result['_source'][field_name], field_name)
aggregated_by_field[getattr(value, interval)].append(result)
elif agg_means == 'aggs':
# Skip. Handled below.
continue
else:
raise Exception('Unsupported aggregation method: %s' % agg_means)
# Invoke the aggregation recursively.
buckets = []
for field_value, field_results in aggregated_by_field.iteritems():
aggregated = _aggregate(agg_field_params, field_results)
if isinstance(aggregated, list):
aggregated = {'doc_count': len(aggregated)}
aggregated['key'] = field_value
buckets.append(aggregated)
return {'buckets': buckets}
def _aggregate(query_config, results):
agg_params = query_config.get(u'aggs')
if not agg_params:
return results
by_field_name = {}
for agg_field_name, agg_field_params in agg_params.iteritems():
by_field_name[agg_field_name] = _by_field(agg_field_params, results)
return by_field_name
final_result = {
'hits': {
'hits': found,
'total': len(found),
},
'_shards': {
'successful': 1,
'total': 1,
},
'aggregations': _aggregate(request, found),
}
if query_params.get('scroll'):
scroll_id = str(uuid.uuid4())
scrolls[scroll_id] = EMPTY_RESULT
final_result['_scroll_id'] = scroll_id
return {
'status_code': 200,
'headers': {
'Content-Type': 'application/json',
},
'content': json.dumps(final_result),
}
@urlmatch(netloc=FAKE_ES_HOST)
def catchall_handler(url, request):
print "Unsupported URL: %s %s" % (request.method, url, )
return {'status_code': 501}
handlers = [get_template, put_template, index_delete, index_lookup, post_doc, count_docs,
lookup_docs, lookup_scroll, delete_scroll, catchall_handler]
with HTTMock(*handlers):
yield

View file

@ -0,0 +1,400 @@
# -*- coding: utf-8 -*-
import json
from datetime import datetime
from dateutil.parser import parse
from data.logs_model.datatypes import LogEntriesPage, Log, AggregatedLogCount
def _status(d, code=200):
return {"status_code": code, "content": json.dumps(d)}
def _shards(d, total=5, failed=0, successful=5):
d.update({"_shards": {"total": total, "failed": failed, "successful": successful}})
return d
def _hits(hits):
return {"hits": {"total": len(hits), "max_score": None, "hits": hits}}
INDEX_LIST_RESPONSE_HIT1_HIT2 = _status({
"logentry_2018-03-08": {},
"logentry_2018-04-02": {}
})
INDEX_LIST_RESPONSE_HIT2 = _status({
"logentry_2018-04-02": {}
})
INDEX_LIST_RESPONSE = _status({
"logentry_2019-01-01": {},
"logentry_2017-03-08": {},
"logentry_2018-03-08": {},
"logentry_2018-04-02": {}
})
DEFAULT_TEMPLATE_RESPONSE = _status({"acknowledged": True})
INDEX_RESPONSE_2019_01_01 = _status(
_shards({
"_index": "logentry_2019-01-01",
"_type": "_doc",
"_id": "1",
"_version": 1,
"_seq_no": 0,
"_primary_term": 1,
"result": "created"
}))
INDEX_RESPONSE_2017_03_08 = _status(
_shards({
"_index": "logentry_2017-03-08",
"_type": "_doc",
"_id": "1",
"_version": 1,
"_seq_no": 0,
"_primary_term": 1,
"result": "created"
}))
FAILURE_400 = _status({}, 400)
INDEX_REQUEST_2019_01_01 = [
"logentry_2019-01-01", {
"account_id":
1,
"repository_id":
1,
"ip":
"192.168.1.1",
"random_id":
233,
"datetime":
"2019-01-01T03:30:00",
"metadata_json": json.loads("{\"\\ud83d\\ude02\": \"\\ud83d\\ude02\\ud83d\\udc4c\\ud83d\\udc4c\\ud83d\\udc4c\\ud83d\\udc4c\", \"key\": \"value\", \"time\": 1520479800}"),
"performer_id":
1,
"kind_id":
1
}
]
INDEX_REQUEST_2017_03_08 = [
"logentry_2017-03-08", {
"repository_id":
1,
"account_id":
1,
"ip":
"192.168.1.1",
"random_id":
233,
"datetime":
"2017-03-08T03:30:00",
"metadata_json": json.loads("{\"\\ud83d\\ude02\": \"\\ud83d\\ude02\\ud83d\\udc4c\\ud83d\\udc4c\\ud83d\\udc4c\\ud83d\\udc4c\", \"key\": \"value\", \"time\": 1520479800}"),
"performer_id":
1,
"kind_id":
2
}
]
_hit1 = {
"_index": "logentry_2018-03-08",
"_type": "doc",
"_id": "1",
"_score": None,
"_source": {
"random_id":
233,
"kind_id":
1,
"account_id":
1,
"performer_id":
1,
"repository_id":
1,
"ip":
"192.168.1.1",
"metadata_json":
"{\"\\ud83d\\ude02\": \"\\ud83d\\ude02\\ud83d\\udc4c\\ud83d\\udc4c\\ud83d\\udc4c\\ud83d\\udc4c\", \"key\": \"value\", \"time\": 1520479800}",
"datetime":
"2018-03-08T03:30",
},
"sort": [1520479800000, 233]
}
_hit2 = {
"_index": "logentry_2018-04-02",
"_type": "doc",
"_id": "2",
"_score": None,
"_source": {
"random_id":
233,
"kind_id":
2,
"account_id":
1,
"performer_id":
1,
"repository_id":
1,
"ip":
"192.168.1.2",
"metadata_json":
"{\"\\ud83d\\ude02\": \"\\ud83d\\ude02\\ud83d\\udc4c\\ud83d\\udc4c\\ud83d\\udc4c\\ud83d\\udc4c\", \"key\": \"value\", \"time\": 1522639800}",
"datetime":
"2018-04-02T03:30",
},
"sort": [1522639800000, 233]
}
_log1 = Log(
"{\"\\ud83d\\ude02\": \"\\ud83d\\ude02\\ud83d\\udc4c\\ud83d\\udc4c\\ud83d\\udc4c\\ud83d\\udc4c\", \"key\": \"value\", \"time\": 1520479800}",
"192.168.1.1", parse("2018-03-08T03:30"), "user1.email", "user1.username", "user1.robot",
"user1.organization", "user1.username", "user1.email", "user1.robot", 1)
_log2 = Log(
"{\"\\ud83d\\ude02\": \"\\ud83d\\ude02\\ud83d\\udc4c\\ud83d\\udc4c\\ud83d\\udc4c\\ud83d\\udc4c\", \"key\": \"value\", \"time\": 1522639800}",
"192.168.1.2", parse("2018-04-02T03:30"), "user1.email", "user1.username", "user1.robot",
"user1.organization", "user1.username", "user1.email", "user1.robot", 2)
SEARCH_RESPONSE_START = _status(_shards(_hits([_hit1, _hit2])))
SEARCH_RESPONSE_END = _status(_shards(_hits([_hit2])))
SEARCH_REQUEST_START = {
"sort": [{
"datetime": "desc"
}, {
"random_id.keyword": "desc"
}],
"query": {
"bool": {
"filter": [{
"term": {
"performer_id": 1
}
}, {
"term": {
"repository_id": 1
}
}]
}
},
"size": 2
}
SEARCH_REQUEST_END = {
"sort": [{
"datetime": "desc"
}, {
"random_id.keyword": "desc"
}],
"query": {
"bool": {
"filter": [{
"term": {
"performer_id": 1
}
}, {
"term": {
"repository_id": 1
}
}]
}
},
"search_after": [1520479800000, 233],
"size": 2
}
SEARCH_REQUEST_FILTER = {
"sort": [{
"datetime": "desc"
}, {
"random_id.keyword": "desc"
}],
"query": {
"bool": {
"filter": [{
"term": {
"performer_id": 1
}
}, {
"term": {
"repository_id": 1
}
}, {
"bool": {
"must_not": [{
"terms": {
"kind_id": [1]
}
}]
}
}]
}
},
"size": 2
}
SEARCH_PAGE_TOKEN = {
"datetime": datetime(2018, 3, 8, 3, 30).isoformat(),
"random_id": 233,
"page_number": 1
}
SEARCH_PAGE_START = LogEntriesPage(logs=[_log1], next_page_token=SEARCH_PAGE_TOKEN)
SEARCH_PAGE_END = LogEntriesPage(logs=[_log2], next_page_token=None)
SEARCH_PAGE_EMPTY = LogEntriesPage([], None)
AGGS_RESPONSE = _status(
_shards({
"hits": {
"total": 4,
"max_score": None,
"hits": []
},
"aggregations": {
"by_id": {
"doc_count_error_upper_bound":
0,
"sum_other_doc_count":
0,
"buckets": [{
"key": 2,
"doc_count": 3,
"by_date": {
"buckets": [{
"key_as_string": "2009-11-12T00:00:00.000Z",
"key": 1257984000000,
"doc_count": 1
}, {
"key_as_string": "2009-11-13T00:00:00.000Z",
"key": 1258070400000,
"doc_count": 0
}, {
"key_as_string": "2009-11-14T00:00:00.000Z",
"key": 1258156800000,
"doc_count": 2
}]
}
}, {
"key": 1,
"doc_count": 1,
"by_date": {
"buckets": [{
"key_as_string": "2009-11-15T00:00:00.000Z",
"key": 1258243200000,
"doc_count": 1
}]
}
}]
}
}
}))
AGGS_REQUEST = {
"query": {
"bool": {
"filter": [{
"term": {
"performer_id": 1
}
}, {
"term": {
"repository_id": 1
}
}, {
"bool": {
"must_not": [{
"terms": {
"kind_id": [2]
}
}]
}
}],
"must": [{
"range": {
"datetime": {
"lt": "2018-04-08T03:30:00",
"gte": "2018-03-08T03:30:00"
}
}
}]
}
},
"aggs": {
"by_id": {
"terms": {
"field": "kind_id"
},
"aggs": {
"by_date": {
"date_histogram": {
"field": "datetime",
"interval": "day"
}
}
}
}
},
"size": 0
}
AGGS_COUNT = [
AggregatedLogCount(1, 1, parse("2009-11-15T00:00:00.000")),
AggregatedLogCount(2, 1, parse("2009-11-12T00:00:00.000")),
AggregatedLogCount(2, 2, parse("2009-11-14T00:00:00.000"))
]
COUNT_REQUEST = {
"query": {
"bool": {
"filter": [{
"term": {
"repository_id": 1
}
}]
}
}
}
COUNT_RESPONSE = _status(_shards({
"count": 1,
}))
# assume there are 2 pages
_scroll_id = "DnF1ZXJ5VGhlbkZldGNoBQAAAAAAACEmFkk1aGlTRzdSUWllejZmYTlEYTN3SVEAAAAAAAAhJRZJNWhpU0c3UlFpZXo2ZmE5RGEzd0lRAAAAAAAAHtAWLWZpaFZXVzVSTy1OTXA5V3MwcHZrZwAAAAAAAB7RFi1maWhWV1c1Uk8tTk1wOVdzMHB2a2cAAAAAAAAhJxZJNWhpU0c3UlFpZXo2ZmE5RGEzd0lR"
def _scroll(d):
d["_scroll_id"] = _scroll_id
return d
SCROLL_CREATE = _status(_shards(_scroll(_hits([_hit1]))))
SCROLL_GET = _status(_shards(_scroll(_hits([_hit2]))))
SCROLL_GET_2 = _status(_shards(_scroll(_hits([]))))
SCROLL_DELETE = _status({"succeeded": True, "num_freed": 5})
SCROLL_LOGS = [[_log1], [_log2]]
SCROLL_REQUESTS = [
[
"5m", 1, {
"sort": "_doc",
"query": {
"range": {
"datetime": {
"lt": "2018-04-02T00:00:00",
"gte": "2018-03-08T00:00:00"
}
}
}
}
],
[{"scroll": "5m", "scroll_id": _scroll_id}],
[{"scroll":"5m", "scroll_id": _scroll_id}],
[{"scroll_id": [_scroll_id]}],
]
SCROLL_RESPONSES = [SCROLL_CREATE, SCROLL_GET, SCROLL_GET_2, SCROLL_DELETE]

View file

@ -0,0 +1,130 @@
from datetime import date, datetime, timedelta
from freezegun import freeze_time
from data.logs_model.inmemory_model import InMemoryModel
from data.logs_model.combined_model import CombinedLogsModel
from test.fixtures import *
@pytest.fixture()
def first_model():
return InMemoryModel()
@pytest.fixture()
def second_model():
return InMemoryModel()
@pytest.fixture()
def combined_model(first_model, second_model, initialized_db):
return CombinedLogsModel(first_model, second_model)
def test_log_action(first_model, second_model, combined_model, initialized_db):
day = date(2019, 1, 1)
# Write to the combined model.
with freeze_time(day):
combined_model.log_action('push_repo', namespace_name='devtable', repository_name='simple',
ip='1.2.3.4')
simple_repo = model.repository.get_repository('devtable', 'simple')
# Make sure it is found in the first model but not the second.
assert combined_model.count_repository_actions(simple_repo, day) == 1
assert first_model.count_repository_actions(simple_repo, day) == 1
assert second_model.count_repository_actions(simple_repo, day) == 0
def test_count_repository_actions(first_model, second_model, combined_model, initialized_db):
# Write to each model.
first_model.log_action('push_repo', namespace_name='devtable', repository_name='simple',
ip='1.2.3.4')
first_model.log_action('push_repo', namespace_name='devtable', repository_name='simple',
ip='1.2.3.4')
first_model.log_action('push_repo', namespace_name='devtable', repository_name='simple',
ip='1.2.3.4')
second_model.log_action('push_repo', namespace_name='devtable', repository_name='simple',
ip='1.2.3.4')
second_model.log_action('push_repo', namespace_name='devtable', repository_name='simple',
ip='1.2.3.4')
# Ensure the counts match as expected.
day = datetime.today() - timedelta(minutes=60)
simple_repo = model.repository.get_repository('devtable', 'simple')
assert first_model.count_repository_actions(simple_repo, day) == 3
assert second_model.count_repository_actions(simple_repo, day) == 2
assert combined_model.count_repository_actions(simple_repo, day) == 5
def test_yield_logs_for_export(first_model, second_model, combined_model, initialized_db):
now = datetime.now()
# Write to each model.
first_model.log_action('push_repo', namespace_name='devtable', repository_name='simple',
ip='1.2.3.4')
first_model.log_action('push_repo', namespace_name='devtable', repository_name='simple',
ip='1.2.3.4')
first_model.log_action('push_repo', namespace_name='devtable', repository_name='simple',
ip='1.2.3.4')
second_model.log_action('push_repo', namespace_name='devtable', repository_name='simple',
ip='1.2.3.4')
second_model.log_action('push_repo', namespace_name='devtable', repository_name='simple',
ip='1.2.3.4')
later = datetime.now()
# Ensure the full set of logs is yielded.
first_logs = list(first_model.yield_logs_for_export(now, later))[0]
second_logs = list(second_model.yield_logs_for_export(now, later))[0]
combined = list(combined_model.yield_logs_for_export(now, later))
full_combined = []
for subset in combined:
full_combined.extend(subset)
assert len(full_combined) == len(first_logs) + len(second_logs)
assert full_combined == (first_logs + second_logs)
def test_lookup_logs(first_model, second_model, combined_model, initialized_db):
now = datetime.now()
# Write to each model.
first_model.log_action('push_repo', namespace_name='devtable', repository_name='simple',
ip='1.2.3.4')
first_model.log_action('push_repo', namespace_name='devtable', repository_name='simple',
ip='1.2.3.4')
first_model.log_action('push_repo', namespace_name='devtable', repository_name='simple',
ip='1.2.3.4')
second_model.log_action('push_repo', namespace_name='devtable', repository_name='simple',
ip='1.2.3.4')
second_model.log_action('push_repo', namespace_name='devtable', repository_name='simple',
ip='1.2.3.4')
later = datetime.now()
def _collect_logs(model):
page_token = None
all_logs = []
while True:
paginated_logs = model.lookup_logs(now, later, page_token=page_token)
page_token = paginated_logs.next_page_token
all_logs.extend(paginated_logs.logs)
if page_token is None:
break
return all_logs
first_logs = _collect_logs(first_model)
second_logs = _collect_logs(second_model)
combined = _collect_logs(combined_model)
assert len(combined) == len(first_logs) + len(second_logs)
assert combined == (first_logs + second_logs)

View file

@ -0,0 +1,529 @@
# -*- coding: utf-8 -*-
# pylint: disable=redefined-outer-name, wildcard-import
import json
from datetime import datetime, timedelta
import pytest
from mock import patch, Mock
from dateutil.parser import parse
from httmock import urlmatch, HTTMock
from data.model.log import _json_serialize
from data.logs_model.elastic_logs import ElasticsearchLogs, INDEX_NAME_PREFIX, INDEX_DATE_FORMAT
from data.logs_model import configure, LogsModelProxy
from mock_elasticsearch import *
FAKE_ES_HOST = 'fakees'
FAKE_ES_HOST_PATTERN = r'fakees.*'
FAKE_ES_PORT = 443
FAKE_AWS_ACCESS_KEY = None
FAKE_AWS_SECRET_KEY = None
FAKE_AWS_REGION = None
@pytest.fixture()
def logs_model_config():
conf = {
'LOGS_MODEL': 'elasticsearch',
'LOGS_MODEL_CONFIG': {
'producer': 'elasticsearch',
'elasticsearch_config': {
'host': FAKE_ES_HOST,
'port': FAKE_ES_PORT,
'access_key': FAKE_AWS_ACCESS_KEY,
'secret_key': FAKE_AWS_SECRET_KEY,
'aws_region': FAKE_AWS_REGION
}
}
}
return conf
FAKE_LOG_ENTRY_KINDS = {'push_repo': 1, 'pull_repo': 2}
FAKE_NAMESPACES = {
'user1':
Mock(id=1, organization="user1.organization", username="user1.username", email="user1.email",
robot="user1.robot"),
'user2':
Mock(id=2, organization="user2.organization", username="user2.username", email="user2.email",
robot="user2.robot")
}
FAKE_REPOSITORIES = {
'user1/repo1': Mock(id=1, namespace_user=FAKE_NAMESPACES['user1']),
'user2/repo2': Mock(id=2, namespace_user=FAKE_NAMESPACES['user2']),
}
@pytest.fixture()
def logs_model():
# prevent logs model from changing
logs_model = LogsModelProxy()
with patch('data.logs_model.logs_model', logs_model):
yield logs_model
@pytest.fixture(scope='function')
def app_config(logs_model_config):
fake_config = {}
fake_config.update(logs_model_config)
with patch("data.logs_model.document_logs_model.config.app_config", fake_config):
yield fake_config
@pytest.fixture()
def mock_page_size():
with patch('data.logs_model.document_logs_model.PAGE_SIZE', 1):
yield
@pytest.fixture()
def mock_max_result_window():
with patch('data.logs_model.document_logs_model.DEFAULT_RESULT_WINDOW', 1):
yield
@pytest.fixture
def mock_random_id():
mock_random = Mock(return_value=233)
with patch('data.logs_model.document_logs_model._random_id', mock_random):
yield
@pytest.fixture()
def mock_db_model():
def get_user_map_by_ids(namespace_ids):
mapping = {}
for i in namespace_ids:
for name in FAKE_NAMESPACES:
if FAKE_NAMESPACES[name].id == i:
mapping[i] = FAKE_NAMESPACES[name]
return mapping
model = Mock(
user=Mock(
get_namespace_user=FAKE_NAMESPACES.get,
get_user_or_org=FAKE_NAMESPACES.get,
get_user=FAKE_NAMESPACES.get,
get_user_map_by_ids=get_user_map_by_ids,
),
repository=Mock(get_repository=lambda user_name, repo_name: FAKE_REPOSITORIES.get(
user_name + '/' + repo_name),
),
log=Mock(
_get_log_entry_kind=lambda name: FAKE_LOG_ENTRY_KINDS[name],
_json_serialize=_json_serialize,
get_log_entry_kinds=Mock(return_value=FAKE_LOG_ENTRY_KINDS),
),
)
with patch('data.logs_model.document_logs_model.model', model), patch(
'data.logs_model.datatypes.model', model):
yield
def parse_query(query):
return {s.split('=')[0]: s.split('=')[1] for s in query.split("&") if s != ""}
@pytest.fixture()
def mock_elasticsearch():
mock = Mock()
mock.template.side_effect = NotImplementedError
mock.index.side_effect = NotImplementedError
mock.count.side_effect = NotImplementedError
mock.scroll_get.side_effect = NotImplementedError
mock.scroll_delete.side_effect = NotImplementedError
mock.search_scroll_create.side_effect = NotImplementedError
mock.search_aggs.side_effect = NotImplementedError
mock.search_after.side_effect = NotImplementedError
mock.list_indices.side_effect = NotImplementedError
@urlmatch(netloc=r'.*', path=r'.*')
def default(url, req):
raise Exception('\nurl={}\nmethod={}\nreq.url={}\nheaders={}\nbody={}'.format(
url, req.method, req.url, req.headers, req.body))
@urlmatch(netloc=FAKE_ES_HOST_PATTERN, path=r'/_template/.*')
def template(url, req):
return mock.template(url.query.split('/')[-1], req.body)
@urlmatch(netloc=FAKE_ES_HOST_PATTERN, path=r'/logentry_(\*|[0-9\-]+)')
def list_indices(url, req):
return mock.list_indices()
@urlmatch(netloc=FAKE_ES_HOST_PATTERN, path=r'/logentry_[0-9\-]*/_doc')
def index(url, req):
index = url.path.split('/')[1]
body = json.loads(req.body)
body['metadata_json'] = json.loads(body['metadata_json'])
return mock.index(index, body)
@urlmatch(netloc=FAKE_ES_HOST_PATTERN, path=r'/logentry_([0-9\-]*|\*)/_count')
def count(_, req):
return mock.count(json.loads(req.body))
@urlmatch(netloc=FAKE_ES_HOST_PATTERN, path=r'/_search/scroll')
def scroll(url, req):
if req.method == 'DELETE':
return mock.scroll_delete(json.loads(req.body))
elif req.method == 'GET':
request_obj = json.loads(req.body)
return mock.scroll_get(request_obj)
raise NotImplementedError()
@urlmatch(netloc=FAKE_ES_HOST_PATTERN, path=r'/logentry_(\*|[0-9\-]*)/_search')
def search(url, req):
if "scroll" in url.query:
query = parse_query(url.query)
window_size = query['scroll']
maximum_result_size = int(query['size'])
return mock.search_scroll_create(window_size, maximum_result_size, json.loads(req.body))
elif "aggs" in req.body:
return mock.search_aggs(json.loads(req.body))
else:
return mock.search_after(json.loads(req.body))
with HTTMock(scroll, count, search, index, template, list_indices, default):
yield mock
@pytest.mark.parametrize(
"""
unlogged_pulls_ok, kind_name, namespace_name, repository, repository_name,
timestamp,
index_response, expected_request, throws
""",
[
# Invalid inputs
pytest.param(
False, 'non-existing', None, None, None,
None,
None, None, True,
id="Invalid Kind"
),
pytest.param(
False, 'pull_repo', 'user1', Mock(id=1), 'repo1',
None,
None, None, True,
id="Invalid Parameters"
),
# Remote exceptions
pytest.param(
False, 'pull_repo', 'user1', Mock(id=1), None,
None,
FAILURE_400, None, True,
id="Throw on pull log failure"
),
pytest.param(
True, 'pull_repo', 'user1', Mock(id=1), None,
parse("2017-03-08T03:30"),
FAILURE_400, INDEX_REQUEST_2017_03_08, False,
id="Ok on pull log failure"
),
# Success executions
pytest.param(
False, 'pull_repo', 'user1', Mock(id=1), None,
parse("2017-03-08T03:30"),
INDEX_RESPONSE_2017_03_08, INDEX_REQUEST_2017_03_08, False,
id="Log with namespace name and repository"
),
pytest.param(
False, 'push_repo', 'user1', None, 'repo1',
parse("2019-01-01T03:30"),
INDEX_RESPONSE_2019_01_01, INDEX_REQUEST_2019_01_01, False,
id="Log with namespace name and repository name"
),
])
def test_log_action(unlogged_pulls_ok, kind_name, namespace_name, repository, repository_name,
timestamp,
index_response, expected_request, throws,
app_config, logs_model, mock_elasticsearch, mock_db_model, mock_random_id):
mock_elasticsearch.template = Mock(return_value=DEFAULT_TEMPLATE_RESPONSE)
mock_elasticsearch.index = Mock(return_value=index_response)
app_config['ALLOW_PULLS_WITHOUT_STRICT_LOGGING'] = unlogged_pulls_ok
configure(app_config)
performer = Mock(id=1)
ip = "192.168.1.1"
metadata = {'key': 'value', 'time': parse("2018-03-08T03:30"), '😂': '😂👌👌👌👌'}
if throws:
with pytest.raises(Exception):
logs_model.log_action(kind_name, namespace_name, performer, ip, metadata, repository,
repository_name, timestamp)
else:
logs_model.log_action(kind_name, namespace_name, performer, ip, metadata, repository,
repository_name, timestamp)
mock_elasticsearch.index.assert_called_with(*expected_request)
@pytest.mark.parametrize(
"""
start_datetime, end_datetime,
performer_name, repository_name, namespace_name,
filter_kinds,
page_token,
max_page_count,
search_response,
list_indices_response,
expected_request,
expected_page,
throws
""",
[
# 1st page
pytest.param(
parse('2018-03-08T03:30'), parse('2018-04-08T03:30'),
'user1', 'repo1', 'user1',
None,
None,
None,
SEARCH_RESPONSE_START,
INDEX_LIST_RESPONSE_HIT1_HIT2,
SEARCH_REQUEST_START,
SEARCH_PAGE_START,
False,
id="1st page"
),
# Last page
pytest.param(
parse('2018-03-08T03:30'), parse('2018-04-08T03:30'),
'user1', 'repo1', 'user1',
None,
SEARCH_PAGE_TOKEN,
None,
SEARCH_RESPONSE_END,
INDEX_LIST_RESPONSE_HIT1_HIT2,
SEARCH_REQUEST_END,
SEARCH_PAGE_END,
False,
id="Search using pagination token"
),
# Filter
pytest.param(
parse('2018-03-08T03:30'), parse('2018-04-08T03:30'),
'user1', 'repo1', 'user1',
['push_repo'],
None,
None,
SEARCH_RESPONSE_END,
INDEX_LIST_RESPONSE_HIT2,
SEARCH_REQUEST_FILTER,
SEARCH_PAGE_END,
False,
id="Filtered search"
),
# Max page count
pytest.param(
parse('2018-03-08T03:30'), parse('2018-04-08T03:30'),
'user1', 'repo1', 'user1',
None,
SEARCH_PAGE_TOKEN,
1,
AssertionError, # Assert that it should not reach the ES server
None,
None,
SEARCH_PAGE_EMPTY,
False,
id="Page token reaches maximum page count",
),
])
def test_lookup_logs(start_datetime, end_datetime,
performer_name, repository_name, namespace_name,
filter_kinds,
page_token,
max_page_count,
search_response,
list_indices_response,
expected_request,
expected_page,
throws,
logs_model, mock_elasticsearch, mock_db_model, mock_page_size, app_config):
mock_elasticsearch.template = Mock(return_value=DEFAULT_TEMPLATE_RESPONSE)
mock_elasticsearch.search_after = Mock(return_value=search_response)
mock_elasticsearch.list_indices = Mock(return_value=list_indices_response)
configure(app_config)
if throws:
with pytest.raises(Exception):
logs_model.lookup_logs(start_datetime, end_datetime, performer_name, repository_name,
namespace_name, filter_kinds, page_token, max_page_count)
else:
page = logs_model.lookup_logs(start_datetime, end_datetime, performer_name, repository_name,
namespace_name, filter_kinds, page_token, max_page_count)
assert page == expected_page
if expected_request:
mock_elasticsearch.search_after.assert_called_with(expected_request)
@pytest.mark.parametrize(
"""
start_datetime, end_datetime,
performer_name, repository_name, namespace_name,
filter_kinds, search_response, expected_request, expected_counts, throws
""",
[
# Valid
pytest.param(
parse('2018-03-08T03:30'), parse('2018-04-08T03:30'),
'user1', 'repo1', 'user1',
['pull_repo'], AGGS_RESPONSE, AGGS_REQUEST, AGGS_COUNT, False,
id="Valid Counts"
),
# Invalid case: date range too big
pytest.param(
parse('2018-03-08T03:30'), parse('2018-04-09T03:30'),
'user1', 'repo1', 'user1',
[], None, None, None, True,
id="Throw on date range too big"
)
])
def test_get_aggregated_log_counts(start_datetime, end_datetime,
performer_name, repository_name, namespace_name,
filter_kinds, search_response, expected_request, expected_counts, throws,
logs_model, mock_elasticsearch, mock_db_model, app_config):
mock_elasticsearch.template = Mock(return_value=DEFAULT_TEMPLATE_RESPONSE)
mock_elasticsearch.search_aggs = Mock(return_value=search_response)
configure(app_config)
if throws:
with pytest.raises(Exception):
logs_model.get_aggregated_log_counts(start_datetime, end_datetime, performer_name,
repository_name, namespace_name, filter_kinds)
else:
counts = logs_model.get_aggregated_log_counts(start_datetime, end_datetime, performer_name,
repository_name, namespace_name, filter_kinds)
assert set(counts) == set(expected_counts)
if expected_request:
mock_elasticsearch.search_aggs.assert_called_with(expected_request)
@pytest.mark.parametrize(
"""
repository,
day,
count_response, expected_request, expected_count, throws
""",
[
pytest.param(
FAKE_REPOSITORIES['user1/repo1'],
parse("2018-03-08").date(),
COUNT_RESPONSE, COUNT_REQUEST, 1, False,
id="Valid Count with 1 as result"),
])
def test_count_repository_actions(repository,
day,
count_response, expected_request, expected_count, throws,
logs_model, mock_elasticsearch, mock_db_model, app_config):
mock_elasticsearch.template = Mock(return_value=DEFAULT_TEMPLATE_RESPONSE)
mock_elasticsearch.count = Mock(return_value=count_response)
mock_elasticsearch.list_indices = Mock(return_value=INDEX_LIST_RESPONSE)
configure(app_config)
if throws:
with pytest.raises(Exception):
logs_model.count_repository_actions(repository, day)
else:
count = logs_model.count_repository_actions(repository, day)
assert count == expected_count
if expected_request:
mock_elasticsearch.count.assert_called_with(expected_request)
@pytest.mark.parametrize(
"""
start_datetime, end_datetime,
repository_id, namespace_id,
max_query_time, scroll_responses, expected_requests, expected_logs, throws
""",
[
pytest.param(
parse("2018-03-08"), parse("2018-04-02"),
1, 1,
timedelta(seconds=10), SCROLL_RESPONSES, SCROLL_REQUESTS, SCROLL_LOGS, False,
id="Scroll 3 pages with page size = 1"
),
])
def test_yield_logs_for_export(start_datetime, end_datetime,
repository_id, namespace_id,
max_query_time, scroll_responses, expected_requests, expected_logs, throws,
logs_model, mock_elasticsearch, mock_db_model, mock_max_result_window, app_config):
mock_elasticsearch.template = Mock(return_value=DEFAULT_TEMPLATE_RESPONSE)
mock_elasticsearch.search_scroll_create = Mock(return_value=scroll_responses[0])
mock_elasticsearch.scroll_get = Mock(side_effect=scroll_responses[1:-1])
mock_elasticsearch.scroll_delete = Mock(return_value=scroll_responses[-1])
configure(app_config)
if throws:
with pytest.raises(Exception):
logs_model.yield_logs_for_export(start_datetime, end_datetime, max_query_time=max_query_time)
else:
log_generator = logs_model.yield_logs_for_export(start_datetime, end_datetime,
max_query_time=max_query_time)
counter = 0
for logs in log_generator:
if counter == 0:
mock_elasticsearch.search_scroll_create.assert_called_with(*expected_requests[counter])
else:
mock_elasticsearch.scroll_get.assert_called_with(*expected_requests[counter])
assert expected_logs[counter] == logs
counter += 1
# the last two requests must be
# 1. get with response scroll with 0 hits, which indicates the termination condition
# 2. delete scroll request
mock_elasticsearch.scroll_get.assert_called_with(*expected_requests[-2])
mock_elasticsearch.scroll_delete.assert_called_with(*expected_requests[-1])
@pytest.mark.parametrize('prefix, is_valid', [
pytest.param('..', False, id='Invalid `..`'),
pytest.param('.', False, id='Invalid `.`'),
pytest.param('-prefix', False, id='Invalid prefix start -'),
pytest.param('_prefix', False, id='Invalid prefix start _'),
pytest.param('+prefix', False, id='Invalid prefix start +'),
pytest.param('prefix_with_UPPERCASES', False, id='Invalid uppercase'),
pytest.param('valid_index', True, id='Valid prefix'),
pytest.param('valid_index_with_numbers1234', True, id='Valid prefix with numbers'),
pytest.param('a'*256, False, id='Prefix too long')
])
def test_valid_index_prefix(prefix, is_valid):
assert ElasticsearchLogs._valid_index_prefix(prefix) == is_valid
@pytest.mark.parametrize('index, cutoff_date, expected_result', [
pytest.param(
INDEX_NAME_PREFIX+'2019-06-06',
datetime(2019, 6, 8),
True,
id="Index older than cutoff"
),
pytest.param(
INDEX_NAME_PREFIX+'2019-06-06',
datetime(2019, 6, 4),
False,
id="Index younger than cutoff"
),
pytest.param(
INDEX_NAME_PREFIX+'2019-06-06',
datetime(2019, 6, 6, 23),
False,
id="Index older than cutoff but timedelta less than 1 day"
),
pytest.param(
INDEX_NAME_PREFIX+'2019-06-06',
datetime(2019, 6, 7),
True,
id="Index older than cutoff by exactly one day"
),
])
def test_can_delete_index(index, cutoff_date, expected_result):
es = ElasticsearchLogs(index_prefix=INDEX_NAME_PREFIX)
assert datetime.strptime(index.split(es._index_prefix, 1)[-1], INDEX_DATE_FORMAT)
assert es.can_delete_index(index, cutoff_date) == expected_result

View file

@ -0,0 +1,473 @@
from datetime import datetime, timedelta, date
from data.logs_model.datatypes import AggregatedLogCount
from data.logs_model.table_logs_model import TableLogsModel
from data.logs_model.combined_model import CombinedLogsModel
from data.logs_model.inmemory_model import InMemoryModel
from data.logs_model.combined_model import _merge_aggregated_log_counts
from data.logs_model.document_logs_model import _date_range_in_single_index, DocumentLogsModel
from data.logs_model.interface import LogsIterationTimeout
from data.logs_model.test.fake_elasticsearch import FAKE_ES_HOST, fake_elasticsearch
from data.database import LogEntry, LogEntry2, LogEntry3, LogEntryKind
from data import model
from test.fixtures import *
@pytest.fixture()
def mock_page_size():
page_size = 2
with patch('data.logs_model.document_logs_model.PAGE_SIZE', page_size):
yield page_size
@pytest.fixture()
def clear_db_logs(initialized_db):
LogEntry.delete().execute()
LogEntry2.delete().execute()
LogEntry3.delete().execute()
def combined_model():
return CombinedLogsModel(TableLogsModel(), InMemoryModel())
def es_model():
return DocumentLogsModel(producer='elasticsearch', elasticsearch_config={
'host': FAKE_ES_HOST,
'port': 12345,
})
@pytest.fixture()
def fake_es():
with fake_elasticsearch():
yield
@pytest.fixture(params=[TableLogsModel, InMemoryModel, es_model, combined_model])
def logs_model(request, clear_db_logs, fake_es):
return request.param()
def _lookup_logs(logs_model, start_time, end_time, **kwargs):
logs_found = []
page_token = None
while True:
found = logs_model.lookup_logs(start_time, end_time, page_token=page_token, **kwargs)
logs_found.extend(found.logs)
page_token = found.next_page_token
if not found.logs or not page_token:
break
assert len(logs_found) == len(set(logs_found))
return logs_found
@pytest.mark.skipif(os.environ.get('TEST_DATABASE_URI', '').find('mysql') >= 0,
reason='Flaky on MySQL')
@pytest.mark.parametrize('namespace_name, repo_name, performer_name, check_args, expect_results', [
pytest.param('devtable', 'simple', 'devtable', {}, True, id='no filters'),
pytest.param('devtable', 'simple', 'devtable', {
'performer_name': 'devtable',
}, True, id='matching performer'),
pytest.param('devtable', 'simple', 'devtable', {
'namespace_name': 'devtable',
}, True, id='matching namespace'),
pytest.param('devtable', 'simple', 'devtable', {
'namespace_name': 'devtable',
'repository_name': 'simple',
}, True, id='matching repository'),
pytest.param('devtable', 'simple', 'devtable', {
'performer_name': 'public',
}, False, id='different performer'),
pytest.param('devtable', 'simple', 'devtable', {
'namespace_name': 'public',
}, False, id='different namespace'),
pytest.param('devtable', 'simple', 'devtable', {
'namespace_name': 'devtable',
'repository_name': 'complex',
}, False, id='different repository'),
])
def test_logs(namespace_name, repo_name, performer_name, check_args, expect_results, logs_model):
# Add some logs.
kinds = list(LogEntryKind.select())
user = model.user.get_user(performer_name)
start_timestamp = datetime.utcnow()
timestamp = start_timestamp
for kind in kinds:
for index in range(0, 3):
logs_model.log_action(kind.name, namespace_name=namespace_name, repository_name=repo_name,
performer=user, ip='1.2.3.4', timestamp=timestamp)
timestamp = timestamp + timedelta(seconds=1)
found = _lookup_logs(logs_model, start_timestamp, start_timestamp + timedelta(minutes=10),
**check_args)
if expect_results:
assert len(found) == len(kinds) * 3
else:
assert not found
aggregated_counts = logs_model.get_aggregated_log_counts(start_timestamp,
start_timestamp + timedelta(minutes=10),
**check_args)
if expect_results:
assert len(aggregated_counts) == len(kinds)
for ac in aggregated_counts:
assert ac.count == 3
else:
assert not aggregated_counts
@pytest.mark.parametrize('filter_kinds, expect_results', [
pytest.param(None, True),
pytest.param(['push_repo'], True, id='push_repo filter'),
pytest.param(['pull_repo'], True, id='pull_repo filter'),
pytest.param(['push_repo', 'pull_repo'], False, id='push and pull filters')
])
def test_lookup_latest_logs(filter_kinds, expect_results, logs_model):
kind_map = model.log.get_log_entry_kinds()
if filter_kinds:
ignore_ids = [kind_map[kind_name] for kind_name in filter_kinds if filter_kinds]
else:
ignore_ids = []
now = datetime.now()
namespace_name = 'devtable'
repo_name = 'simple'
performer_name = 'devtable'
user = model.user.get_user(performer_name)
size = 3
# Log some push actions
logs_model.log_action('push_repo', namespace_name=namespace_name, repository_name=repo_name,
performer=user, ip='0.0.0.0', timestamp=now-timedelta(days=1, seconds=11))
logs_model.log_action('push_repo', namespace_name=namespace_name, repository_name=repo_name,
performer=user, ip='0.0.0.0', timestamp=now-timedelta(days=7, seconds=33))
# Log some pull actions
logs_model.log_action('pull_repo', namespace_name=namespace_name, repository_name=repo_name,
performer=user, ip='0.0.0.0', timestamp=now-timedelta(days=0, seconds=3))
logs_model.log_action('pull_repo', namespace_name=namespace_name, repository_name=repo_name,
performer=user, ip='0.0.0.0', timestamp=now-timedelta(days=3, seconds=55))
logs_model.log_action('pull_repo', namespace_name=namespace_name, repository_name=repo_name,
performer=user, ip='0.0.0.0', timestamp=now-timedelta(days=5, seconds=3))
logs_model.log_action('pull_repo', namespace_name=namespace_name, repository_name=repo_name,
performer=user, ip='0.0.0.0', timestamp=now-timedelta(days=11, seconds=11))
# Get the latest logs
latest_logs = logs_model.lookup_latest_logs(performer_name, repo_name, namespace_name,
filter_kinds=filter_kinds, size=size)
# Test max lookup size
assert len(latest_logs) <= size
# Make sure that the latest logs returned are in decreasing order
assert all(x >= y for x, y in zip(latest_logs, latest_logs[1:]))
if expect_results:
assert latest_logs
# Lookup all logs filtered by kinds and sort them in reverse chronological order
all_logs = _lookup_logs(logs_model, now - timedelta(days=30), now + timedelta(days=30),
filter_kinds=filter_kinds, namespace_name=namespace_name,
repository_name=repo_name)
all_logs = sorted(all_logs, key=lambda l: l.datetime, reverse=True)
# Check that querying all logs does not return the filtered kinds
assert all([log.kind_id not in ignore_ids for log in all_logs])
# Check that the latest logs contains only th most recent ones
assert latest_logs == all_logs[:len(latest_logs)]
def test_count_repository_actions(logs_model):
# Log some actions.
logs_model.log_action('push_repo', namespace_name='devtable', repository_name='simple',
ip='1.2.3.4')
logs_model.log_action('pull_repo', namespace_name='devtable', repository_name='simple',
ip='1.2.3.4')
logs_model.log_action('pull_repo', namespace_name='devtable', repository_name='simple',
ip='1.2.3.4')
# Log some actions to a different repo.
logs_model.log_action('pull_repo', namespace_name='devtable', repository_name='complex',
ip='1.2.3.4')
logs_model.log_action('pull_repo', namespace_name='devtable', repository_name='complex',
ip='1.2.3.4')
# Count the actions.
day = date.today()
simple_repo = model.repository.get_repository('devtable', 'simple')
count = logs_model.count_repository_actions(simple_repo, day)
assert count == 3
complex_repo = model.repository.get_repository('devtable', 'complex')
count = logs_model.count_repository_actions(complex_repo, day)
assert count == 2
# Try counting actions for a few days in the future to ensure it doesn't raise an error.
count = logs_model.count_repository_actions(simple_repo, day + timedelta(days=5))
assert count == 0
def test_yield_log_rotation_context(logs_model):
cutoff_date = datetime.now()
min_logs_per_rotation = 3
# Log some actions to be archived
# One day
logs_model.log_action('push_repo', namespace_name='devtable', repository_name='simple1',
ip='1.2.3.4', timestamp=cutoff_date-timedelta(days=1, seconds=1))
logs_model.log_action('pull_repo', namespace_name='devtable', repository_name='simple2',
ip='5.6.7.8', timestamp=cutoff_date-timedelta(days=1, seconds=2))
logs_model.log_action('pull_repo', namespace_name='devtable', repository_name='simple3',
ip='9.10.11.12', timestamp=cutoff_date-timedelta(days=1, seconds=3))
logs_model.log_action('pull_repo', namespace_name='devtable', repository_name='simple4',
ip='0.0.0.0', timestamp=cutoff_date-timedelta(days=1, seconds=4))
# Another day
logs_model.log_action('pull_repo', namespace_name='devtable', repository_name='simple5',
ip='1.1.1.1', timestamp=cutoff_date-timedelta(days=2, seconds=1))
logs_model.log_action('pull_repo', namespace_name='devtable', repository_name='simple5',
ip='1.1.1.1', timestamp=cutoff_date-timedelta(days=2, seconds=2))
logs_model.log_action('pull_repo', namespace_name='devtable', repository_name='simple5',
ip='1.1.1.1', timestamp=cutoff_date-timedelta(days=2, seconds=3))
found = _lookup_logs(logs_model, cutoff_date - timedelta(days=3), cutoff_date + timedelta(days=1))
assert found is not None and len(found) == 7
# Iterate the logs using the log rotation contexts
all_logs = []
for log_rotation_context in logs_model.yield_log_rotation_context(cutoff_date,
min_logs_per_rotation):
with log_rotation_context as context:
for logs, _ in context.yield_logs_batch():
all_logs.extend(logs)
assert len(all_logs) == 7
found = _lookup_logs(logs_model, cutoff_date - timedelta(days=3), cutoff_date + timedelta(days=1))
assert not found
# Make sure all datetimes are monotonically increasing (by datetime) after sorting the lookup
# to make sure no duplicates were returned
all_logs.sort(key=lambda d: d.datetime)
assert all(x.datetime < y.datetime for x, y in zip(all_logs, all_logs[1:]))
def test_count_repository_actions_with_wildcard_disabled(initialized_db):
with fake_elasticsearch(allow_wildcard=False):
logs_model = es_model()
# Log some actions.
logs_model.log_action('push_repo', namespace_name='devtable', repository_name='simple',
ip='1.2.3.4')
logs_model.log_action('pull_repo', namespace_name='devtable', repository_name='simple',
ip='1.2.3.4')
logs_model.log_action('pull_repo', namespace_name='devtable', repository_name='simple',
ip='1.2.3.4')
# Log some actions to a different repo.
logs_model.log_action('pull_repo', namespace_name='devtable', repository_name='complex',
ip='1.2.3.4')
logs_model.log_action('pull_repo', namespace_name='devtable', repository_name='complex',
ip='1.2.3.4')
# Count the actions.
day = date.today()
simple_repo = model.repository.get_repository('devtable', 'simple')
count = logs_model.count_repository_actions(simple_repo, day)
assert count == 3
complex_repo = model.repository.get_repository('devtable', 'complex')
count = logs_model.count_repository_actions(complex_repo, day)
assert count == 2
# Try counting actions for a few days in the future to ensure it doesn't raise an error.
count = logs_model.count_repository_actions(simple_repo, day + timedelta(days=5))
assert count == 0
@pytest.mark.skipif(os.environ.get('TEST_DATABASE_URI', '').find('mysql') >= 0,
reason='Flaky on MySQL')
def test_yield_logs_for_export(logs_model):
# Add some logs.
kinds = list(LogEntryKind.select())
user = model.user.get_user('devtable')
start_timestamp = datetime.utcnow()
timestamp = start_timestamp
for kind in kinds:
for index in range(0, 10):
logs_model.log_action(kind.name, namespace_name='devtable', repository_name='simple',
performer=user, ip='1.2.3.4', timestamp=timestamp)
timestamp = timestamp + timedelta(seconds=1)
# Yield the logs.
simple_repo = model.repository.get_repository('devtable', 'simple')
logs_found = []
for logs in logs_model.yield_logs_for_export(start_timestamp, timestamp + timedelta(minutes=10),
repository_id=simple_repo.id):
logs_found.extend(logs)
# Ensure we found all added logs.
assert len(logs_found) == len(kinds) * 10
def test_yield_logs_for_export_timeout(logs_model):
# Add some logs.
kinds = list(LogEntryKind.select())
user = model.user.get_user('devtable')
start_timestamp = datetime.utcnow()
timestamp = start_timestamp
for kind in kinds:
for _ in range(0, 2):
logs_model.log_action(kind.name, namespace_name='devtable', repository_name='simple',
performer=user, ip='1.2.3.4', timestamp=timestamp)
timestamp = timestamp + timedelta(seconds=1)
# Yield the logs. Since we set the timeout to nothing, it should immediately fail.
simple_repo = model.repository.get_repository('devtable', 'simple')
with pytest.raises(LogsIterationTimeout):
list(logs_model.yield_logs_for_export(start_timestamp, timestamp + timedelta(minutes=1),
repository_id=simple_repo.id,
max_query_time=timedelta(seconds=0)))
def test_disabled_namespace(clear_db_logs):
logs_model = TableLogsModel(lambda kind, namespace, is_free: namespace == 'devtable')
# Log some actions.
logs_model.log_action('push_repo', namespace_name='devtable', repository_name='simple',
ip='1.2.3.4')
logs_model.log_action('pull_repo', namespace_name='devtable', repository_name='simple',
ip='1.2.3.4')
logs_model.log_action('pull_repo', namespace_name='devtable', repository_name='simple',
ip='1.2.3.4')
# Log some actions to a different namespace.
logs_model.log_action('push_repo', namespace_name='buynlarge', repository_name='orgrepo',
ip='1.2.3.4')
logs_model.log_action('pull_repo', namespace_name='buynlarge', repository_name='orgrepo',
ip='1.2.3.4')
logs_model.log_action('pull_repo', namespace_name='buynlarge', repository_name='orgrepo',
ip='1.2.3.4')
# Count the actions.
day = datetime.today() - timedelta(minutes=60)
simple_repo = model.repository.get_repository('devtable', 'simple')
count = logs_model.count_repository_actions(simple_repo, day)
assert count == 0
org_repo = model.repository.get_repository('buynlarge', 'orgrepo')
count = logs_model.count_repository_actions(org_repo, day)
assert count == 3
@pytest.mark.parametrize('aggregated_log_counts1, aggregated_log_counts2, expected_result', [
pytest.param(
[
AggregatedLogCount(1, 3, datetime(2019, 6, 6, 0, 0)), # 1
AggregatedLogCount(1, 3, datetime(2019, 6, 7, 0, 0)), # 2
],
[
AggregatedLogCount(1, 5, datetime(2019, 6, 6, 0, 0)), # 1
AggregatedLogCount(1, 7, datetime(2019, 6, 7, 0, 0)), # 2
AggregatedLogCount(3, 3, datetime(2019, 6, 1, 0, 0)), # 3
],
[
AggregatedLogCount(1, 8, datetime(2019, 6, 6, 0, 0)), # 1
AggregatedLogCount(1, 10, datetime(2019, 6, 7, 0, 0)), # 2
AggregatedLogCount(3, 3, datetime(2019, 6, 1, 0, 0)) # 3
]
),
pytest.param(
[
AggregatedLogCount(1, 3, datetime(2019, 6, 6, 0, 0)), # 1
],
[
AggregatedLogCount(1, 7, datetime(2019, 6, 7, 0, 0)), # 2
],
[
AggregatedLogCount(1, 3, datetime(2019, 6, 6, 0, 0)), # 1
AggregatedLogCount(1, 7, datetime(2019, 6, 7, 0, 0)), # 2
]
),
pytest.param(
[],
[AggregatedLogCount(1, 3, datetime(2019, 6, 6, 0, 0))],
[AggregatedLogCount(1, 3, datetime(2019, 6, 6, 0, 0))]
),
])
def test_merge_aggregated_log_counts(aggregated_log_counts1, aggregated_log_counts2, expected_result):
assert (sorted(_merge_aggregated_log_counts(aggregated_log_counts1, aggregated_log_counts2)) ==
sorted(expected_result))
@pytest.mark.parametrize('dt1, dt2, expected_result', [
# Valid dates
pytest.param(date(2019, 6, 17), date(2019, 6, 18), True),
# Invalid dates
pytest.param(date(2019, 6, 17), date(2019, 6, 17), False),
pytest.param(date(2019, 6, 17), date(2019, 6, 19), False),
pytest.param(date(2019, 6, 18), date(2019, 6, 17), False),
# Valid datetimes
pytest.param(datetime(2019, 6, 17, 0, 1), datetime(2019, 6, 17, 0, 2), True),
# Invalid datetimes
pytest.param(datetime(2019, 6, 17, 0, 2), datetime(2019, 6, 17, 0, 1), False),
pytest.param(datetime(2019, 6, 17, 11), datetime(2019, 6, 17, 11) + timedelta(hours=14), False),
])
def test_date_range_in_single_index(dt1, dt2, expected_result):
assert _date_range_in_single_index(dt1, dt2) == expected_result
def test_pagination(logs_model, mock_page_size):
"""
Make sure that pagination does not stop if searching through multiple indices by day,
and the current log count matches the page size while there are still indices to be searched.
"""
day1 = datetime.now()
day2 = day1 + timedelta(days=1)
day3 = day2 + timedelta(days=1)
# Log some actions in day indices
# One day
logs_model.log_action('push_repo', namespace_name='devtable', repository_name='simple1',
ip='1.2.3.4', timestamp=day1)
logs_model.log_action('pull_repo', namespace_name='devtable', repository_name='simple1',
ip='5.6.7.8', timestamp=day1)
found = _lookup_logs(logs_model, day1-timedelta(seconds=1), day3+timedelta(seconds=1))
assert len(found) == mock_page_size
# Another day
logs_model.log_action('pull_repo', namespace_name='devtable', repository_name='simple2',
ip='1.1.1.1', timestamp=day2)
logs_model.log_action('pull_repo', namespace_name='devtable', repository_name='simple2',
ip='0.0.0.0', timestamp=day2)
# Yet another day
logs_model.log_action('pull_repo', namespace_name='devtable', repository_name='simple2',
ip='1.1.1.1', timestamp=day3)
logs_model.log_action('pull_repo', namespace_name='devtable', repository_name='simple2',
ip='0.0.0.0', timestamp=day3)
found = _lookup_logs(logs_model, day1-timedelta(seconds=1), day3+timedelta(seconds=1))
assert len(found) == 6

View file

@ -0,0 +1,77 @@
import logging
import pytest
from dateutil.parser import parse
from mock import patch, Mock
import botocore
from data.logs_model import configure
from test_elasticsearch import app_config, logs_model_config, logs_model, mock_elasticsearch, mock_db_model
from mock_elasticsearch import *
logger = logging.getLogger(__name__)
FAKE_KAFKA_BROKERS = ['fake_server1', 'fake_server2']
FAKE_KAFKA_TOPIC = 'sometopic'
FAKE_MAX_BLOCK_SECONDS = 1
@pytest.fixture()
def kafka_logs_producer_config(app_config):
producer_config = {}
producer_config.update(app_config)
kafka_config = {
'bootstrap_servers': FAKE_KAFKA_BROKERS,
'topic': FAKE_KAFKA_TOPIC,
'max_block_seconds': FAKE_MAX_BLOCK_SECONDS
}
producer_config['LOGS_MODEL_CONFIG']['producer'] = 'kafka'
producer_config['LOGS_MODEL_CONFIG']['kafka_config'] = kafka_config
return producer_config
@pytest.fixture()
def kinesis_logs_producer_config(app_config):
producer_config = {}
producer_config.update(app_config)
kinesis_stream_config = {
'stream_name': 'test-stream',
'aws_region': 'fake_region',
'aws_access_key': 'some_key',
'aws_secret_key': 'some_secret'
}
producer_config['LOGS_MODEL_CONFIG']['producer'] = 'kinesis_stream'
producer_config['LOGS_MODEL_CONFIG']['kinesis_stream_config'] = kinesis_stream_config
return producer_config
def test_kafka_logs_producers(logs_model, mock_elasticsearch, mock_db_model, kafka_logs_producer_config):
mock_elasticsearch.template = Mock(return_value=DEFAULT_TEMPLATE_RESPONSE)
producer_config = kafka_logs_producer_config
with patch('kafka.client_async.KafkaClient.check_version'), patch('kafka.KafkaProducer.send') as mock_send:
configure(producer_config)
logs_model.log_action('pull_repo', 'user1', Mock(id=1), '192.168.1.1', {'key': 'value'},
None, 'repo1', parse("2019-01-01T03:30"))
mock_send.assert_called_once()
def test_kinesis_logs_producers(logs_model, mock_elasticsearch, mock_db_model, kinesis_logs_producer_config):
mock_elasticsearch.template = Mock(return_value=DEFAULT_TEMPLATE_RESPONSE)
producer_config = kinesis_logs_producer_config
with patch('botocore.endpoint.EndpointCreator.create_endpoint'), \
patch('botocore.client.BaseClient._make_api_call') as mock_send:
configure(producer_config)
logs_model.log_action('pull_repo', 'user1', Mock(id=1), '192.168.1.1', {'key': 'value'},
None, 'repo1', parse("2019-01-01T03:30"))
# Check that a PutRecord api call is made.
# NOTE: The second arg of _make_api_call uses a randomized PartitionKey
mock_send.assert_called_once_with(u'PutRecord', mock_send.call_args_list[0][0][1])

View file

154
data/migrations/env.py Normal file
View file

@ -0,0 +1,154 @@
import logging
import os
from logging.config import fileConfig
from urllib import unquote
from alembic import context
from alembic.script.revision import ResolutionError
from alembic.util import CommandError
from sqlalchemy import engine_from_config, pool
from peewee import SqliteDatabase
from data.database import all_models, db
from data.migrations.tester import NoopTester, PopulateTestDataTester
from data.model.sqlalchemybridge import gen_sqlalchemy_metadata
from release import GIT_HEAD, REGION, SERVICE
from util.morecollections import AttrDict
from data.migrations.progress import PrometheusReporter, NullReporter
config = context.config
DB_URI = config.get_main_option('db_uri', 'sqlite:///test/data/test.db')
PROM_LABEL_PREFIX = 'DBA_OP_LABEL_'
# This option exists because alembic needs the db proxy to be configured in order
# to perform migrations. The app import does the init of the proxy, but we don't
# want that in the case of the config app, as we are explicitly connecting to a
# db that the user has passed in, and we can't have import dependency on app
if config.get_main_option('alembic_setup_app', 'True') == 'True':
from app import app
DB_URI = app.config['DB_URI']
config.set_main_option('sqlalchemy.url', unquote(DB_URI))
# Interpret the config file for Python logging.
# This line sets up loggers basically.
if config.config_file_name:
fileConfig(config.config_file_name)
logger = logging.getLogger(__name__)
# add your model's MetaData object here
# for 'autogenerate' support
# from myapp import mymodel
# target_metadata = mymodel.Base.metadata
target_metadata = gen_sqlalchemy_metadata(all_models)
tables = AttrDict(target_metadata.tables)
# other values from the config, defined by the needs of env.py,
# can be acquired:
# my_important_option = config.get_main_option("my_important_option")
# ... etc.
def get_tester():
""" Returns the tester to use. We only return the tester that populates data
if the TEST_MIGRATE env var is set to `true` AND we make sure we're not
connecting to a production database.
"""
if os.environ.get('TEST_MIGRATE', '') == 'true':
url = unquote(DB_URI)
if url.find('amazonaws.com') < 0:
return PopulateTestDataTester()
return NoopTester()
def get_progress_reporter():
prom_addr = os.environ.get('DBA_OP_PROMETHEUS_PUSH_GATEWAY_ADDR', None)
if prom_addr is not None:
prom_job = os.environ.get('DBA_OP_JOB_ID')
def _process_label_key(label_key):
return label_key[len(PROM_LABEL_PREFIX):].lower()
labels = {_process_label_key(k): v for k, v in os.environ.items()
if k.startswith(PROM_LABEL_PREFIX)}
return PrometheusReporter(prom_addr, prom_job, labels)
else:
return NullReporter()
def report_success(ctx=None, step=None, heads=None, run_args=None):
progress_reporter = run_args['progress_reporter']
progress_reporter.report_version_complete(success=True)
def run_migrations_offline():
"""Run migrations in 'offline' mode.
This configures the context with just a URL
and not an Engine, though an Engine is acceptable
here as well. By skipping the Engine creation
we don't even need a DBAPI to be available.
Calls to context.execute() here emit the given string to the
script output.
"""
url = unquote(DB_URI)
context.configure(url=url, target_metadata=target_metadata, transactional_ddl=True)
with context.begin_transaction():
context.run_migrations(tables=tables, tester=get_tester(), progress_reporter=NullReporter())
def run_migrations_online():
"""Run migrations in 'online' mode.
In this scenario we need to create an Engine
and associate a connection with the context.
"""
if (isinstance(db.obj, SqliteDatabase) and
not 'GENMIGRATE' in os.environ and
not 'DB_URI' in os.environ):
print 'Skipping Sqlite migration!'
return
progress_reporter = get_progress_reporter()
engine = engine_from_config(config.get_section(config.config_ini_section),
prefix='sqlalchemy.',
poolclass=pool.NullPool)
connection = engine.connect()
context.configure(connection=connection,
target_metadata=target_metadata,
transactional_ddl=False,
on_version_apply=report_success)
try:
with context.begin_transaction():
try:
context.run_migrations(tables=tables, tester=get_tester(),
progress_reporter=progress_reporter)
except (CommandError, ResolutionError) as ex:
if 'No such revision' not in str(ex):
raise
if not REGION or not GIT_HEAD:
raise
from data.model.release import get_recent_releases
# ignore revision error if we're running the previous release
releases = list(get_recent_releases(SERVICE, REGION).offset(1).limit(1))
if releases and releases[0].version == GIT_HEAD:
logger.warn('Skipping database migration because revision not found')
else:
raise
finally:
connection.close()
if context.is_offline_mode():
run_migrations_offline()
else:
run_migrations_online()

147
data/migrations/migration.sh Executable file
View file

@ -0,0 +1,147 @@
set -e
PARSED_DOCKER_HOST=`echo $DOCKER_HOST | sed 's/tcp:\/\///' | sed 's/:.*//'`
DOCKER_IP="${PARSED_DOCKER_HOST:-127.0.0.1}"
MYSQL_CONFIG_OVERRIDE="{\"DB_URI\":\"mysql+pymysql://root:password@$DOCKER_IP/genschema\"}"
PERCONA_CONFIG_OVERRIDE="{\"DB_URI\":\"mysql+pymysql://root:password@$DOCKER_IP/genschema\"}"
PGSQL_CONFIG_OVERRIDE="{\"DB_URI\":\"postgresql://postgres@$DOCKER_IP/genschema\"}"
up_mysql() {
# Run a SQL database on port 3306 inside of Docker.
docker run --name mysql -p 3306:3306 -e MYSQL_ROOT_PASSWORD=password -d mysql:5.7
echo 'Sleeping for 25...'
sleep 25
# Add the database to mysql.
docker run --rm --link mysql:mysql mysql:5.7 sh -c 'echo "create database genschema" | mysql -h"$MYSQL_PORT_3306_TCP_ADDR" -P"$MYSQL_PORT_3306_TCP_PORT" -uroot -ppassword'
}
down_mysql() {
docker kill mysql || true
docker rm -v mysql || true
}
up_mariadb() {
# Run a SQL database on port 3306 inside of Docker.
docker run --name mariadb -p 3306:3306 -e MYSQL_ROOT_PASSWORD=password -d mariadb
echo 'Sleeping for 25...'
sleep 25
# Add the database to mysql.
docker run --rm --link mariadb:mariadb mariadb sh -c 'echo "create database genschema" | mysql -h"$MARIADB_PORT_3306_TCP_ADDR" -P"$MARIADB_PORT_3306_TCP_PORT" -uroot -ppassword'
}
down_mariadb() {
docker kill mariadb || true
docker rm -v mariadb || true
}
up_percona() {
# Run a SQL database on port 3306 inside of Docker.
docker run --name percona -p 3306:3306 -e MYSQL_ROOT_PASSWORD=password -d percona
echo 'Sleeping for 25...'
sleep 25
# Add the daabase to mysql.
docker run --rm --link percona:percona percona sh -c 'echo "create database genschema" | mysql -h $PERCONA_PORT_3306_TCP_ADDR -uroot -ppassword'
}
down_percona() {
docker kill percona || true
docker rm -v percona || true
}
up_postgres() {
# Run a SQL database on port 5432 inside of Docker.
docker run --name postgres -p 5432:5432 -d postgres
# Sleep for 5s to get SQL get started.
echo 'Sleeping for 5...'
sleep 5
# Add the database to postgres.
docker run --rm --link postgres:postgres postgres sh -c 'echo "create database genschema" | psql -h "$POSTGRES_PORT_5432_TCP_ADDR" -p "$POSTGRES_PORT_5432_TCP_PORT" -U postgres'
docker run --rm --link postgres:postgres postgres sh -c 'echo "CREATE EXTENSION IF NOT EXISTS pg_trgm;" | psql -h "$POSTGRES_PORT_5432_TCP_ADDR" -p "$POSTGRES_PORT_5432_TCP_PORT" -U postgres -d genschema'
}
down_postgres() {
docker kill postgres || true
docker rm -v postgres || true
}
gen_migrate() {
# Generate a database with the schema as defined by the existing alembic model.
QUAY_OVERRIDE_CONFIG=$1 PYTHONPATH=. alembic upgrade head
# Generate the migration to the current model.
QUAY_OVERRIDE_CONFIG=$1 PYTHONPATH=. alembic revision --autogenerate -m "$2"
}
test_migrate() {
# Generate a database with the schema as defined by the existing alembic model.
echo '> Running upgrade'
TEST_MIGRATE=true QUAY_OVERRIDE_CONFIG=$1 PYTHONPATH=. alembic upgrade head
# Downgrade to verify it works in both directions.
echo '> Running downgrade'
COUNT=`ls data/migrations/versions/*.py | wc -l | tr -d ' '`
TEST_MIGRATE=true QUAY_OVERRIDE_CONFIG=$1 PYTHONPATH=. alembic downgrade "-$COUNT"
}
down_mysql
down_postgres
down_mariadb
down_percona
# Test (and generate, if requested) via MySQL.
echo '> Starting MySQL'
up_mysql
if [ ! -z "$@" ]
then
set +e
echo '> Generating Migration'
gen_migrate $MYSQL_CONFIG_OVERRIDE "$@"
set -e
fi
echo '> Testing Migration (mysql)'
set +e
test_migrate $MYSQL_CONFIG_OVERRIDE
set -e
down_mysql
# Test via Postgres.
echo '> Starting Postgres'
up_postgres
echo '> Testing Migration (postgres)'
set +e
test_migrate $PGSQL_CONFIG_OVERRIDE
set -e
down_postgres
# Test via MariaDB.
echo '> Starting MariaDB'
up_mariadb
echo '> Testing Migration (mariadb)'
set +e
test_migrate $MYSQL_CONFIG_OVERRIDE
set -e
down_mariadb
# Test via Percona.
echo '> Starting Percona'
up_percona
echo '> Testing Migration (percona)'
set +e
test_migrate $PERCONA_CONFIG_OVERRIDE
set -e
down_percona

101
data/migrations/progress.py Normal file
View file

@ -0,0 +1,101 @@
from abc import ABCMeta, abstractmethod
from six import add_metaclass
from functools import partial, wraps
from prometheus_client import CollectorRegistry, Gauge, Counter, push_to_gateway
from util.abchelpers import nooper
@add_metaclass(ABCMeta)
class ProgressReporter(object):
""" Implements an interface for reporting progress with the migrations.
"""
@abstractmethod
def report_version_complete(self, success):
""" Called when an entire migration is complete. """
@abstractmethod
def report_step_progress(self):
""" Called when a single step in the migration has been completed. """
@nooper
class NullReporter(ProgressReporter):
""" No-op version of the progress reporter, designed for use when no progress
reporting endpoint is provided. """
class PrometheusReporter(ProgressReporter):
def __init__(self, prom_pushgateway_addr, prom_job, labels, total_steps_num=None):
self._total_steps_num = total_steps_num
self._completed_steps = 0.0
registry = CollectorRegistry()
self._migration_completion_percent = Gauge(
'migration_completion_percent',
'Estimate of the completion percentage of the job',
registry=registry,
)
self._migration_complete_total = Counter(
'migration_complete_total',
'Binary value of whether or not the job is complete',
registry=registry,
)
self._migration_failed_total = Counter(
'migration_failed_total',
'Binary value of whether or not the job has failed',
registry=registry,
)
self._migration_items_completed_total = Counter(
'migration_items_completed_total',
'Number of items this migration has completed',
registry=registry,
)
self._push = partial(push_to_gateway,
prom_pushgateway_addr,
job=prom_job,
registry=registry,
grouping_key=labels,
)
def report_version_complete(self, success=True):
if success:
self._migration_complete_total.inc()
else:
self._migration_failed_total.inc()
self._migration_completion_percent.set(1.0)
self._push()
def report_step_progress(self):
self._migration_items_completed_total.inc()
if self._total_steps_num is not None:
self._completed_steps += 1
self._migration_completion_percent = self._completed_steps / self._total_steps_num
self._push()
class ProgressWrapper(object):
def __init__(self, delegate_module, progress_monitor):
self._delegate_module = delegate_module
self._progress_monitor = progress_monitor
def __getattr__(self, attr_name):
# Will raise proper attribute error
maybe_callable = self._delegate_module.__dict__[attr_name]
if callable(maybe_callable):
# Build a callable which when executed places the request
# onto a queue
@wraps(maybe_callable)
def wrapped_method(*args, **kwargs):
result = maybe_callable(*args, **kwargs)
self._progress_monitor.report_step_progress()
return result
return wrapped_method
return maybe_callable

View file

@ -0,0 +1,27 @@
"""${message}
Revision ID: ${up_revision}
Revises: ${down_revision}
Create Date: ${create_date}
"""
# revision identifiers, used by Alembic.
revision = ${repr(up_revision)}
down_revision = ${repr(down_revision)}
from alembic import op as original_op
from progress import ProgressWrapper
import sqlalchemy as sa
${imports if imports else ""}
def upgrade(tables, tester, progress_reporter):
op = ProgressWrapper(original_op, progress_reporter)
${upgrades if upgrades else "pass"}
def downgrade(tables, tester, progress_reporter):
op = ProgressWrapper(original_op, progress_reporter)
${downgrades if downgrades else "pass"}

View file

@ -0,0 +1,21 @@
import pytest
from mock import patch
from data.runmigration import run_alembic_migration
from alembic.script import ScriptDirectory
from test.fixtures import *
@pytest.mark.parametrize('db_uri, is_valid', [
('postgresql://devtable:password@quay-postgres/registry_database', True),
('postgresql://devtable:password%25@quay-postgres/registry_database', False),
('postgresql://devtable:password%%25@quay-postgres/registry_database', True),
('postgresql://devtable@db:password@quay-postgres/registry_database', True),
])
def test_alembic_db_uri(db_uri, is_valid):
""" Test if the given URI is escaped for string interpolation (Python's configparser). """
with patch('alembic.script.ScriptDirectory.run_env') as m:
if is_valid:
run_alembic_migration(db_uri)
else:
with pytest.raises(ValueError):
run_alembic_migration(db_uri)

132
data/migrations/tester.py Normal file
View file

@ -0,0 +1,132 @@
import json
import logging
import uuid
from abc import ABCMeta, abstractmethod
from datetime import datetime
from six import add_metaclass
from alembic import op
from sqlalchemy import text
from util.abchelpers import nooper
logger = logging.getLogger(__name__)
def escape_table_name(table_name):
if op.get_bind().engine.name == 'postgresql':
# Needed for the `user` table.
return '"%s"' % table_name
return table_name
class DataTypes(object):
@staticmethod
def DateTime():
return datetime.now()
@staticmethod
def Date():
return datetime.now()
@staticmethod
def String():
return 'somestringvalue'
@staticmethod
def Token():
return '%s%s' % ('a' * 60, 'b' * 60)
@staticmethod
def UTF8Char():
return 'some other value'
@staticmethod
def UUID():
return str(uuid.uuid4())
@staticmethod
def JSON():
return json.dumps(dict(foo='bar', baz='meh'))
@staticmethod
def Boolean():
if op.get_bind().engine.name == 'postgresql':
return True
return 1
@staticmethod
def BigInteger():
return 21474836470
@staticmethod
def Integer():
return 42
@staticmethod
def Constant(value):
def get_value():
return value
return get_value
@staticmethod
def Foreign(table_name):
def get_index():
result = op.get_bind().execute("SELECT id FROM %s LIMIT 1" % escape_table_name(table_name))
try:
return list(result)[0][0]
except IndexError:
raise Exception('Could not find row for table %s' % table_name)
finally:
result.close()
return get_index
@add_metaclass(ABCMeta)
class MigrationTester(object):
""" Implements an interface for adding testing capabilities to the
data model migration system in Alembic.
"""
TestDataType = DataTypes
@abstractmethod
def populate_table(self, table_name, fields):
""" Called to populate a table with the given fields filled in with testing data. """
@abstractmethod
def populate_column(self, table_name, col_name, field_type):
""" Called to populate a column in a table to be filled in with testing data. """
@nooper
class NoopTester(MigrationTester):
""" No-op version of the tester, designed for production workloads. """
class PopulateTestDataTester(MigrationTester):
def populate_table(self, table_name, fields):
columns = {field_name: field_type() for field_name, field_type in fields}
field_name_vars = [':' + field_name for field_name, _ in fields]
if op.get_bind().engine.name == 'postgresql':
field_names = ["%s" % field_name for field_name, _ in fields]
else:
field_names = ["`%s`" % field_name for field_name, _ in fields]
table_name = escape_table_name(table_name)
query = text('INSERT INTO %s (%s) VALUES (%s)' % (table_name, ', '.join(field_names),
', '.join(field_name_vars)))
logger.info("Executing test query %s with values %s", query, columns.values())
op.get_bind().execute(query, **columns)
def populate_column(self, table_name, col_name, field_type):
col_value = field_type()
row_id = DataTypes.Foreign(table_name)()
table_name = escape_table_name(table_name)
update_text = text("UPDATE %s SET %s=:col_value where ID=:row_id" % (table_name, col_name))
logger.info("Executing test query %s with value %s on row %s", update_text, col_value, row_id)
op.get_bind().execute(update_text, col_value=col_value, row_id=row_id)

View file

@ -0,0 +1,33 @@
"""Add creation date to User table
Revision ID: 0cf50323c78b
Revises: 87fbbc224f10
Create Date: 2018-03-09 13:19:41.903196
"""
# revision identifiers, used by Alembic.
revision = '0cf50323c78b'
down_revision = '87fbbc224f10'
from alembic import op as original_op
from data.migrations.progress import ProgressWrapper
import sqlalchemy as sa
def upgrade(tables, tester, progress_reporter):
op = ProgressWrapper(original_op, progress_reporter)
# ### commands auto generated by Alembic - please adjust! ###
op.add_column('user', sa.Column('creation_date', sa.DateTime(), nullable=True))
# ### end Alembic commands ###
# ### population of test data ### #
tester.populate_column('user', 'creation_date', tester.TestDataType.DateTime)
# ### end population of test data ### #
def downgrade(tables, tester, progress_reporter):
op = ProgressWrapper(original_op, progress_reporter)
# ### commands auto generated by Alembic - please adjust! ###
op.drop_column('user', 'creation_date')
# ### end Alembic commands ###

View file

@ -0,0 +1,100 @@
"""Add Tag, TagKind and ManifestChild tables
Revision ID: 10f45ee2310b
Revises: 13411de1c0ff
Create Date: 2018-10-29 15:22:53.552216
"""
# revision identifiers, used by Alembic.
revision = '10f45ee2310b'
down_revision = '13411de1c0ff'
from alembic import op as original_op
from data.migrations.progress import ProgressWrapper
import sqlalchemy as sa
from util.migrate import UTF8CharField
def upgrade(tables, tester, progress_reporter):
op = ProgressWrapper(original_op, progress_reporter)
# ### commands auto generated by Alembic - please adjust! ###
op.create_table('tagkind',
sa.Column('id', sa.Integer(), nullable=False),
sa.Column('name', sa.String(length=255), nullable=False),
sa.PrimaryKeyConstraint('id', name=op.f('pk_tagkind'))
)
op.create_index('tagkind_name', 'tagkind', ['name'], unique=True)
op.create_table('manifestchild',
sa.Column('id', sa.Integer(), nullable=False),
sa.Column('repository_id', sa.Integer(), nullable=False),
sa.Column('manifest_id', sa.Integer(), nullable=False),
sa.Column('child_manifest_id', sa.Integer(), nullable=False),
sa.ForeignKeyConstraint(['child_manifest_id'], ['manifest.id'], name=op.f('fk_manifestchild_child_manifest_id_manifest')),
sa.ForeignKeyConstraint(['manifest_id'], ['manifest.id'], name=op.f('fk_manifestchild_manifest_id_manifest')),
sa.ForeignKeyConstraint(['repository_id'], ['repository.id'], name=op.f('fk_manifestchild_repository_id_repository')),
sa.PrimaryKeyConstraint('id', name=op.f('pk_manifestchild'))
)
op.create_index('manifestchild_child_manifest_id', 'manifestchild', ['child_manifest_id'], unique=False)
op.create_index('manifestchild_manifest_id', 'manifestchild', ['manifest_id'], unique=False)
op.create_index('manifestchild_manifest_id_child_manifest_id', 'manifestchild', ['manifest_id', 'child_manifest_id'], unique=True)
op.create_index('manifestchild_repository_id', 'manifestchild', ['repository_id'], unique=False)
op.create_index('manifestchild_repository_id_child_manifest_id', 'manifestchild', ['repository_id', 'child_manifest_id'], unique=False)
op.create_index('manifestchild_repository_id_manifest_id', 'manifestchild', ['repository_id', 'manifest_id'], unique=False)
op.create_index('manifestchild_repository_id_manifest_id_child_manifest_id', 'manifestchild', ['repository_id', 'manifest_id', 'child_manifest_id'], unique=False)
op.create_table('tag',
sa.Column('id', sa.Integer(), nullable=False),
sa.Column('name', sa.String(length=255), nullable=False),
sa.Column('repository_id', sa.Integer(), nullable=False),
sa.Column('manifest_id', sa.Integer(), nullable=True),
sa.Column('lifetime_start_ms', sa.BigInteger(), nullable=False),
sa.Column('lifetime_end_ms', sa.BigInteger(), nullable=True),
sa.Column('hidden', sa.Boolean(), nullable=False, server_default=sa.sql.expression.false()),
sa.Column('reversion', sa.Boolean(), nullable=False, server_default=sa.sql.expression.false()),
sa.Column('tag_kind_id', sa.Integer(), nullable=False),
sa.Column('linked_tag_id', sa.Integer(), nullable=True),
sa.ForeignKeyConstraint(['linked_tag_id'], ['tag.id'], name=op.f('fk_tag_linked_tag_id_tag')),
sa.ForeignKeyConstraint(['manifest_id'], ['manifest.id'], name=op.f('fk_tag_manifest_id_manifest')),
sa.ForeignKeyConstraint(['repository_id'], ['repository.id'], name=op.f('fk_tag_repository_id_repository')),
sa.ForeignKeyConstraint(['tag_kind_id'], ['tagkind.id'], name=op.f('fk_tag_tag_kind_id_tagkind')),
sa.PrimaryKeyConstraint('id', name=op.f('pk_tag'))
)
op.create_index('tag_lifetime_end_ms', 'tag', ['lifetime_end_ms'], unique=False)
op.create_index('tag_linked_tag_id', 'tag', ['linked_tag_id'], unique=False)
op.create_index('tag_manifest_id', 'tag', ['manifest_id'], unique=False)
op.create_index('tag_repository_id', 'tag', ['repository_id'], unique=False)
op.create_index('tag_repository_id_name', 'tag', ['repository_id', 'name'], unique=False)
op.create_index('tag_repository_id_name_hidden', 'tag', ['repository_id', 'name', 'hidden'], unique=False)
op.create_index('tag_repository_id_name_lifetime_end_ms', 'tag', ['repository_id', 'name', 'lifetime_end_ms'], unique=True)
op.create_index('tag_repository_id_name_tag_kind_id', 'tag', ['repository_id', 'name', 'tag_kind_id'], unique=False)
op.create_index('tag_tag_kind_id', 'tag', ['tag_kind_id'], unique=False)
# ### end Alembic commands ###
op.bulk_insert(tables.tagkind,
[
{'name': 'tag'},
])
# ### population of test data ### #
tester.populate_table('tag', [
('repository_id', tester.TestDataType.Foreign('repository')),
('tag_kind_id', tester.TestDataType.Foreign('tagkind')),
('name', tester.TestDataType.String),
('manifest_id', tester.TestDataType.Foreign('manifest')),
('lifetime_start_ms', tester.TestDataType.BigInteger),
])
tester.populate_table('manifestchild', [
('repository_id', tester.TestDataType.Foreign('repository')),
('manifest_id', tester.TestDataType.Foreign('manifest')),
('child_manifest_id', tester.TestDataType.Foreign('manifest')),
])
# ### end population of test data ### #
def downgrade(tables, tester, progress_reporter):
op = ProgressWrapper(original_op, progress_reporter)
# ### commands auto generated by Alembic - please adjust! ###
op.drop_table('tag')
op.drop_table('manifestchild')
op.drop_table('tagkind')
# ### end Alembic commands ###

View file

@ -0,0 +1,46 @@
"""Remove unique from TagManifestToManifest
Revision ID: 13411de1c0ff
Revises: 654e6df88b71
Create Date: 2018-08-19 23:30:24.969549
"""
# revision identifiers, used by Alembic.
revision = '13411de1c0ff'
down_revision = '654e6df88b71'
from alembic import op as original_op
from data.migrations.progress import ProgressWrapper
import sqlalchemy as sa
def upgrade(tables, tester, progress_reporter):
op = ProgressWrapper(original_op, progress_reporter)
# Note: Because of a restriction in MySQL, we cannot simply remove the index and re-add
# it without the unique=False, nor can we simply alter the index. To make it work, we'd have to
# remove the primary key on the field, so instead we simply drop the table entirely and
# recreate it with the modified index. The backfill will re-fill this in.
op.drop_table('tagmanifesttomanifest')
op.create_table('tagmanifesttomanifest',
sa.Column('id', sa.Integer(), nullable=False),
sa.Column('tag_manifest_id', sa.Integer(), nullable=False),
sa.Column('manifest_id', sa.Integer(), nullable=False),
sa.Column('broken', sa.Boolean(), nullable=False, server_default=sa.sql.expression.false()),
sa.ForeignKeyConstraint(['manifest_id'], ['manifest.id'], name=op.f('fk_tagmanifesttomanifest_manifest_id_manifest')),
sa.ForeignKeyConstraint(['tag_manifest_id'], ['tagmanifest.id'], name=op.f('fk_tagmanifesttomanifest_tag_manifest_id_tagmanifest')),
sa.PrimaryKeyConstraint('id', name=op.f('pk_tagmanifesttomanifest'))
)
op.create_index('tagmanifesttomanifest_broken', 'tagmanifesttomanifest', ['broken'], unique=False)
op.create_index('tagmanifesttomanifest_manifest_id', 'tagmanifesttomanifest', ['manifest_id'], unique=False)
op.create_index('tagmanifesttomanifest_tag_manifest_id', 'tagmanifesttomanifest', ['tag_manifest_id'], unique=True)
tester.populate_table('tagmanifesttomanifest', [
('manifest_id', tester.TestDataType.Foreign('manifest')),
('tag_manifest_id', tester.TestDataType.Foreign('tagmanifest')),
])
def downgrade(tables, tester, progress_reporter):
op = ProgressWrapper(original_op, progress_reporter)
pass

View file

@ -0,0 +1,33 @@
"""Add maximum build queue count setting to user table
Revision ID: 152bb29a1bb3
Revises: 7367229b38d9
Create Date: 2018-02-20 13:34:34.902415
"""
# revision identifiers, used by Alembic.
revision = '152bb29a1bb3'
down_revision = 'cbc8177760d9'
from alembic import op as original_op
from data.migrations.progress import ProgressWrapper
import sqlalchemy as sa
from sqlalchemy.dialects import mysql
def upgrade(tables, tester, progress_reporter):
op = ProgressWrapper(original_op, progress_reporter)
# ### commands auto generated by Alembic - please adjust! ###
op.add_column('user', sa.Column('maximum_queued_builds_count', sa.Integer(), nullable=True))
# ### end Alembic commands ###
# ### population of test data ### #
tester.populate_column('user', 'maximum_queued_builds_count', tester.TestDataType.Integer)
# ### end population of test data ### #
def downgrade(tables, tester, progress_reporter):
op = ProgressWrapper(original_op, progress_reporter)
# ### commands auto generated by Alembic - please adjust! ###
op.drop_column('user', 'maximum_queued_builds_count')
# ### end Alembic commands ###

View file

@ -0,0 +1,27 @@
"""Make BlodUpload byte_count not nullable
Revision ID: 152edccba18c
Revises: c91c564aad34
Create Date: 2018-02-23 12:41:25.571835
"""
# revision identifiers, used by Alembic.
revision = '152edccba18c'
down_revision = 'c91c564aad34'
from alembic import op as original_op
from data.migrations.progress import ProgressWrapper
import sqlalchemy as sa
def upgrade(tables, tester, progress_reporter):
op = ProgressWrapper(original_op, progress_reporter)
op.alter_column('blobupload', 'byte_count', existing_type=sa.BigInteger(),
nullable=False)
def downgrade(tables, tester, progress_reporter):
op = ProgressWrapper(original_op, progress_reporter)
op.alter_column('blobupload', 'byte_count', existing_type=sa.BigInteger(),
nullable=True)

View file

@ -0,0 +1,49 @@
"""Add LogEntry2 table - QUAY.IO ONLY
Revision ID: 1783530bee68
Revises: 5b7503aada1b
Create Date: 2018-05-17 16:32:28.532264
"""
# revision identifiers, used by Alembic.
revision = '1783530bee68'
down_revision = '5b7503aada1b'
from alembic import op as original_op
from data.migrations.progress import ProgressWrapper
import sqlalchemy as sa
def upgrade(tables, tester, progress_reporter):
op = ProgressWrapper(original_op, progress_reporter)
# ### commands auto generated by Alembic - please adjust! ###
op.create_table('logentry2',
sa.Column('id', sa.Integer(), nullable=False),
sa.Column('kind_id', sa.Integer(), nullable=False),
sa.Column('account_id', sa.Integer(), nullable=False),
sa.Column('performer_id', sa.Integer(), nullable=True),
sa.Column('repository_id', sa.Integer(), nullable=True),
sa.Column('datetime', sa.DateTime(), nullable=False),
sa.Column('ip', sa.String(length=255), nullable=True),
sa.Column('metadata_json', sa.Text(), nullable=False),
sa.ForeignKeyConstraint(['kind_id'], ['logentrykind.id'], name=op.f('fk_logentry2_kind_id_logentrykind')),
sa.PrimaryKeyConstraint('id', name=op.f('pk_logentry2'))
)
op.create_index('logentry2_account_id', 'logentry2', ['account_id'], unique=False)
op.create_index('logentry2_account_id_datetime', 'logentry2', ['account_id', 'datetime'], unique=False)
op.create_index('logentry2_datetime', 'logentry2', ['datetime'], unique=False)
op.create_index('logentry2_kind_id', 'logentry2', ['kind_id'], unique=False)
op.create_index('logentry2_performer_id', 'logentry2', ['performer_id'], unique=False)
op.create_index('logentry2_performer_id_datetime', 'logentry2', ['performer_id', 'datetime'], unique=False)
op.create_index('logentry2_repository_id', 'logentry2', ['repository_id'], unique=False)
op.create_index('logentry2_repository_id_datetime', 'logentry2', ['repository_id', 'datetime'], unique=False)
op.create_index('logentry2_repository_id_datetime_kind_id', 'logentry2', ['repository_id', 'datetime', 'kind_id'], unique=False)
# ### end Alembic commands ###
def downgrade(tables, tester, progress_reporter):
op = ProgressWrapper(original_op, progress_reporter)
# ### commands auto generated by Alembic - please adjust! ###
op.drop_table('logentry2')
# ### end Alembic commands ###

View file

@ -0,0 +1,54 @@
"""Add automatic disable of build triggers
Revision ID: 17aff2e1354e
Revises: 61cadbacb9fc
Create Date: 2017-10-18 15:58:03.971526
"""
# revision identifiers, used by Alembic.
revision = '17aff2e1354e'
down_revision = '61cadbacb9fc'
from alembic import op as original_op
from data.migrations.progress import ProgressWrapper
import sqlalchemy as sa
from sqlalchemy.dialects import mysql
def upgrade(tables, tester, progress_reporter):
op = ProgressWrapper(original_op, progress_reporter)
# ### commands auto generated by Alembic - please adjust! ###
op.add_column('repositorybuildtrigger', sa.Column('successive_failure_count', sa.Integer(), server_default='0', nullable=False))
op.add_column('repositorybuildtrigger', sa.Column('successive_internal_error_count', sa.Integer(), server_default='0', nullable=False))
# ### end Alembic commands ###
op.bulk_insert(
tables.disablereason,
[
{'id': 2, 'name': 'successive_build_failures'},
{'id': 3, 'name': 'successive_build_internal_errors'},
],
)
# ### population of test data ### #
tester.populate_column('repositorybuildtrigger', 'successive_failure_count', tester.TestDataType.Integer)
tester.populate_column('repositorybuildtrigger', 'successive_internal_error_count', tester.TestDataType.Integer)
# ### end population of test data ### #
def downgrade(tables, tester, progress_reporter):
op = ProgressWrapper(original_op, progress_reporter)
# ### commands auto generated by Alembic - please adjust! ###
op.drop_column('repositorybuildtrigger', 'successive_internal_error_count')
op.drop_column('repositorybuildtrigger', 'successive_failure_count')
# ### end Alembic commands ###
op.execute(tables
.disablereason
.delete()
.where(tables.disablereason.c.name == op.inline_literal('successive_internal_error_count')))
op.execute(tables
.disablereason
.delete()
.where(tables.disablereason.c.name == op.inline_literal('successive_failure_count')))

View file

@ -0,0 +1,35 @@
"""Add last_accessed field to User table
Revision ID: 224ce4c72c2f
Revises: b547bc139ad8
Create Date: 2018-03-12 22:44:07.070490
"""
# revision identifiers, used by Alembic.
revision = '224ce4c72c2f'
down_revision = 'b547bc139ad8'
from alembic import op as original_op
from data.migrations.progress import ProgressWrapper
import sqlalchemy as sa
def upgrade(tables, tester, progress_reporter):
op = ProgressWrapper(original_op, progress_reporter)
# ### commands auto generated by Alembic - please adjust! ###
op.add_column('user', sa.Column('last_accessed', sa.DateTime(), nullable=True))
op.create_index('user_last_accessed', 'user', ['last_accessed'], unique=False)
# ### end Alembic commands ###
# ### population of test data ### #
tester.populate_column('user', 'last_accessed', tester.TestDataType.DateTime)
# ### end population of test data ### #
def downgrade(tables, tester, progress_reporter):
op = ProgressWrapper(original_op, progress_reporter)
# ### commands auto generated by Alembic - please adjust! ###
op.drop_index('user_last_accessed', table_name='user')
op.drop_column('user', 'last_accessed')
# ### end Alembic commands ###

View file

@ -0,0 +1,125 @@
"""repo mirror columns
Revision ID: 34c8ef052ec9
Revises: c059b952ed76
Create Date: 2019-10-07 13:11:20.424715
"""
# revision identifiers, used by Alembic.
revision = '34c8ef052ec9'
down_revision = 'cc6778199cdb'
from alembic import op
from alembic import op as original_op
from data.migrations.progress import ProgressWrapper
from datetime import datetime
import sqlalchemy as sa
from sqlalchemy.dialects import mysql
from peewee import ForeignKeyField, DateTimeField, BooleanField
from data.database import (BaseModel, RepoMirrorType, RepoMirrorStatus, RepoMirrorRule, uuid_generator,
QuayUserField, Repository, IntegerField, JSONField)
from data.fields import EnumField as ClientEnumField, CharField, EncryptedCharField
import logging
logger = logging.getLogger(__name__)
BATCH_SIZE = 10
# Original model
class RepoMirrorConfig(BaseModel):
"""
Represents a repository to be mirrored and any additional configuration
required to perform the mirroring.
"""
repository = ForeignKeyField(Repository, index=True, unique=True, backref='mirror')
creation_date = DateTimeField(default=datetime.utcnow)
is_enabled = BooleanField(default=True)
# Mirror Configuration
mirror_type = ClientEnumField(RepoMirrorType, default=RepoMirrorType.PULL)
internal_robot = QuayUserField(allows_robots=True, null=True, backref='mirrorpullrobot',
robot_null_delete=True)
external_reference = CharField()
external_registry = CharField()
external_namespace = CharField()
external_repository = CharField()
external_registry_username = EncryptedCharField(max_length=2048, null=True)
external_registry_password = EncryptedCharField(max_length=2048, null=True)
external_registry_config = JSONField(default={})
# Worker Queuing
sync_interval = IntegerField() # seconds between syncs
sync_start_date = DateTimeField(null=True) # next start time
sync_expiration_date = DateTimeField(null=True) # max duration
sync_retries_remaining = IntegerField(default=3)
sync_status = ClientEnumField(RepoMirrorStatus, default=RepoMirrorStatus.NEVER_RUN)
sync_transaction_id = CharField(default=uuid_generator, max_length=36)
# Tag-Matching Rules
root_rule = ForeignKeyField(RepoMirrorRule)
def _iterate(model_class, clause):
while True:
has_rows = False
for row in list(model_class.select().where(clause).limit(BATCH_SIZE)):
has_rows = True
yield row
if not has_rows:
break
def upgrade(tables, tester, progress_reporter):
op = ProgressWrapper(original_op, progress_reporter)
logger.info('Migrating to external_reference from existing columns')
op.add_column('repomirrorconfig', sa.Column('external_reference', sa.Text(), nullable=True))
for repo_mirror in _iterate(RepoMirrorConfig, (RepoMirrorConfig.external_reference >> None)):
repo = '%s/%s/%s' % (repo_mirror.external_registry, repo_mirror.external_namespace, repo_mirror.external_repository)
logger.info('migrating %s' % repo)
repo_mirror.external_reference = repo
repo_mirror.save()
op.drop_column('repomirrorconfig', 'external_registry')
op.drop_column('repomirrorconfig', 'external_namespace')
op.drop_column('repomirrorconfig', 'external_repository')
op.alter_column('repomirrorconfig', 'external_reference', nullable=False, existing_type=sa.Text())
tester.populate_column('repomirrorconfig', 'external_reference', tester.TestDataType.String)
def downgrade(tables, tester, progress_reporter):
op = ProgressWrapper(original_op, progress_reporter)
'''
This will downgrade existing data but may not exactly match previous data structure. If the
external_reference does not have three parts (registry, namespace, repository) then a failed
value is inserted.
'''
op.add_column('repomirrorconfig', sa.Column('external_registry', sa.String(length=255), nullable=True))
op.add_column('repomirrorconfig', sa.Column('external_namespace', sa.String(length=255), nullable=True))
op.add_column('repomirrorconfig', sa.Column('external_repository', sa.String(length=255), nullable=True))
logger.info('Restoring columns from external_reference')
for repo_mirror in _iterate(RepoMirrorConfig, (RepoMirrorConfig.external_registry >> None)):
logger.info('Restoring %s' % repo_mirror.external_reference)
parts = repo_mirror.external_reference.split('/', 2)
repo_mirror.external_registry = parts[0] if len(parts) >= 1 else 'DOWNGRADE-FAILED'
repo_mirror.external_namespace = parts[1] if len(parts) >= 2 else 'DOWNGRADE-FAILED'
repo_mirror.external_repository = parts[2] if len(parts) >= 3 else 'DOWNGRADE-FAILED'
repo_mirror.save()
op.drop_column('repomirrorconfig', 'external_reference')
op.alter_column('repomirrorconfig', 'external_registry', nullable=False, existing_type=sa.String(length=255))
op.alter_column('repomirrorconfig', 'external_namespace', nullable=False, existing_type=sa.String(length=255))
op.alter_column('repomirrorconfig', 'external_repository', nullable=False, existing_type=sa.String(length=255))

View file

@ -0,0 +1,63 @@
"""Add severity and media_type to global messages
Revision ID: 3e8cc74a1e7b
Revises: fc47c1ec019f
Create Date: 2017-01-17 16:22:28.584237
"""
# revision identifiers, used by Alembic.
revision = '3e8cc74a1e7b'
down_revision = 'fc47c1ec019f'
from alembic import op as original_op
from data.migrations.progress import ProgressWrapper
import sqlalchemy as sa
from sqlalchemy.dialects import mysql
def upgrade(tables, tester, progress_reporter):
op = ProgressWrapper(original_op, progress_reporter)
# ### commands auto generated by Alembic - please adjust! ###
op.add_column('messages', sa.Column('media_type_id', sa.Integer(), nullable=False, server_default='1'))
op.add_column('messages', sa.Column('severity', sa.String(length=255), nullable=False, server_default='info'))
op.alter_column('messages', 'uuid',
existing_type=mysql.VARCHAR(length=36),
server_default='',
nullable=False)
op.create_index('messages_media_type_id', 'messages', ['media_type_id'], unique=False)
op.create_index('messages_severity', 'messages', ['severity'], unique=False)
op.create_index('messages_uuid', 'messages', ['uuid'], unique=False)
op.create_foreign_key(op.f('fk_messages_media_type_id_mediatype'), 'messages', 'mediatype', ['media_type_id'], ['id'])
# ### end Alembic commands ###
op.bulk_insert(tables.mediatype,
[
{'name': 'text/markdown'},
])
# ### population of test data ### #
tester.populate_column('messages', 'media_type_id', tester.TestDataType.Foreign('mediatype'))
tester.populate_column('messages', 'severity', lambda: 'info')
tester.populate_column('messages', 'uuid', tester.TestDataType.UUID)
# ### end population of test data ### #
def downgrade(tables, tester, progress_reporter):
op = ProgressWrapper(original_op, progress_reporter)
# ### commands auto generated by Alembic - please adjust! ###
op.drop_constraint(op.f('fk_messages_media_type_id_mediatype'), 'messages', type_='foreignkey')
op.drop_index('messages_uuid', table_name='messages')
op.drop_index('messages_severity', table_name='messages')
op.drop_index('messages_media_type_id', table_name='messages')
op.alter_column('messages', 'uuid',
existing_type=mysql.VARCHAR(length=36),
nullable=True)
op.drop_column('messages', 'severity')
op.drop_column('messages', 'media_type_id')
# ### end Alembic commands ###
op.execute(tables
.mediatype
.delete()
.where(tables.
mediatype.c.name == op.inline_literal('text/markdown')))

View file

@ -0,0 +1,30 @@
"""add_notification_type
Revision ID: 45fd8b9869d4
Revises: 94836b099894
Create Date: 2016-12-01 12:02:19.724528
"""
# revision identifiers, used by Alembic.
revision = '45fd8b9869d4'
down_revision = '94836b099894'
from alembic import op as original_op
from data.migrations.progress import ProgressWrapper
def upgrade(tables, tester, progress_reporter):
op = ProgressWrapper(original_op, progress_reporter)
op.bulk_insert(tables.notificationkind,
[
{'name': 'build_cancelled'},
])
def downgrade(tables, tester, progress_reporter):
op = ProgressWrapper(original_op, progress_reporter)
op.execute(tables
.notificationkind
.delete()
.where(tables.
notificationkind.c.name == op.inline_literal('build_cancelled')))

View file

@ -0,0 +1,27 @@
"""Add index on logs_archived on repositorybuild
Revision ID: 481623ba00ba
Revises: b9045731c4de
Create Date: 2019-02-15 16:09:47.326805
"""
# revision identifiers, used by Alembic.
revision = '481623ba00ba'
down_revision = 'b9045731c4de'
from alembic import op as original_op
from data.migrations.progress import ProgressWrapper
def upgrade(tables, tester, progress_reporter):
op = ProgressWrapper(original_op, progress_reporter)
# ### commands auto generated by Alembic - please adjust! ###
op.create_index('repositorybuild_logs_archived', 'repositorybuild', ['logs_archived'], unique=False)
# ### end Alembic commands ###
def downgrade(tables, tester, progress_reporter):
op = ProgressWrapper(original_op, progress_reporter)
# ### commands auto generated by Alembic - please adjust! ###
op.drop_index('repositorybuild_logs_archived', table_name='repositorybuild')
# ### end Alembic commands ###

View file

@ -0,0 +1,144 @@
"""Repository Mirror
Revision ID: 5248ddf35167
Revises: b918abdbee43
Create Date: 2019-06-25 16:22:36.310532
"""
revision = '5248ddf35167'
down_revision = 'b918abdbee43'
from alembic import op as original_op
from data.migrations.progress import ProgressWrapper
import sqlalchemy as sa
from sqlalchemy.dialects import mysql
def upgrade(tables, tester, progress_reporter):
op = ProgressWrapper(original_op, progress_reporter)
op.create_table('repomirrorrule',
sa.Column('id', sa.Integer(), nullable=False),
sa.Column('uuid', sa.String(length=36), nullable=False),
sa.Column('repository_id', sa.Integer(), nullable=False),
sa.Column('creation_date', sa.DateTime(), nullable=False),
sa.Column('rule_type', sa.Integer(), nullable=False),
sa.Column('rule_value', sa.Text(), nullable=False),
sa.Column('left_child_id', sa.Integer(), nullable=True),
sa.Column('right_child_id', sa.Integer(), nullable=True),
sa.ForeignKeyConstraint(['left_child_id'], ['repomirrorrule.id'], name=op.f('fk_repomirrorrule_left_child_id_repomirrorrule')),
sa.ForeignKeyConstraint(['repository_id'], ['repository.id'], name=op.f('fk_repomirrorrule_repository_id_repository')),
sa.ForeignKeyConstraint(['right_child_id'], ['repomirrorrule.id'], name=op.f('fk_repomirrorrule_right_child_id_repomirrorrule')),
sa.PrimaryKeyConstraint('id', name=op.f('pk_repomirrorrule')))
op.create_index('repomirrorrule_left_child_id', 'repomirrorrule', ['left_child_id'], unique=False)
op.create_index('repomirrorrule_repository_id', 'repomirrorrule', ['repository_id'], unique=False)
op.create_index('repomirrorrule_right_child_id', 'repomirrorrule', ['right_child_id'], unique=False)
op.create_index('repomirrorrule_rule_type', 'repomirrorrule', ['rule_type'], unique=False)
op.create_index('repomirrorrule_uuid', 'repomirrorrule', ['uuid'], unique=True)
op.create_table('repomirrorconfig',
sa.Column('id', sa.Integer(), nullable=False),
sa.Column('repository_id', sa.Integer(), nullable=False),
sa.Column('creation_date', sa.DateTime(), nullable=False),
sa.Column('is_enabled', sa.Boolean(), nullable=False),
sa.Column('mirror_type', sa.Integer(), nullable=False),
sa.Column('internal_robot_id', sa.Integer(), nullable=False),
sa.Column('external_registry', sa.String(length=255), nullable=False),
sa.Column('external_namespace', sa.String(length=255), nullable=False),
sa.Column('external_repository', sa.String(length=255), nullable=False),
sa.Column('external_registry_username', sa.String(length=2048), nullable=True),
sa.Column('external_registry_password', sa.String(length=2048), nullable=True),
sa.Column('external_registry_config', sa.Text(), nullable=False),
sa.Column('sync_interval', sa.Integer(), nullable=False, server_default='60'),
sa.Column('sync_start_date', sa.DateTime(), nullable=True),
sa.Column('sync_expiration_date', sa.DateTime(), nullable=True),
sa.Column('sync_retries_remaining', sa.Integer(), nullable=False, server_default='3'),
sa.Column('sync_status', sa.Integer(), nullable=False),
sa.Column('sync_transaction_id', sa.String(length=36), nullable=True),
sa.Column('root_rule_id', sa.Integer(), nullable=False),
sa.ForeignKeyConstraint(['repository_id'], ['repository.id'], name=op.f('fk_repomirrorconfig_repository_id_repository')),
sa.ForeignKeyConstraint(['root_rule_id'], ['repomirrorrule.id'], name=op.f('fk_repomirrorconfig_root_rule_id_repomirrorrule')),
sa.ForeignKeyConstraint(['internal_robot_id'], ['user.id'], name=op.f('fk_repomirrorconfig_internal_robot_id_user')),
sa.PrimaryKeyConstraint('id', name=op.f('pk_repomirrorconfig'))
)
op.create_index('repomirrorconfig_mirror_type', 'repomirrorconfig', ['mirror_type'], unique=False)
op.create_index('repomirrorconfig_repository_id', 'repomirrorconfig', ['repository_id'], unique=True)
op.create_index('repomirrorconfig_root_rule_id', 'repomirrorconfig', ['root_rule_id'], unique=False)
op.create_index('repomirrorconfig_sync_status', 'repomirrorconfig', ['sync_status'], unique=False)
op.create_index('repomirrorconfig_sync_transaction_id', 'repomirrorconfig', ['sync_transaction_id'], unique=False)
op.create_index('repomirrorconfig_internal_robot_id', 'repomirrorconfig', ['internal_robot_id'], unique=False)
op.add_column(u'repository', sa.Column('state', sa.Integer(), nullable=False, server_default='0'))
op.create_index('repository_state', 'repository', ['state'], unique=False)
op.bulk_insert(tables.logentrykind,
[
{'name': 'repo_mirror_enabled'},
{'name': 'repo_mirror_disabled'},
{'name': 'repo_mirror_config_changed'},
{'name': 'repo_mirror_sync_started'},
{'name': 'repo_mirror_sync_failed'},
{'name': 'repo_mirror_sync_success'},
{'name': 'repo_mirror_sync_now_requested'},
{'name': 'repo_mirror_sync_tag_success'},
{'name': 'repo_mirror_sync_tag_failed'},
{'name': 'repo_mirror_sync_test_success'},
{'name': 'repo_mirror_sync_test_failed'},
{'name': 'repo_mirror_sync_test_started'},
{'name': 'change_repo_state'}
])
tester.populate_table('repomirrorrule', [
('uuid', tester.TestDataType.String),
('repository_id', tester.TestDataType.Foreign('repository')),
('creation_date', tester.TestDataType.DateTime),
('rule_type', tester.TestDataType.Integer),
('rule_value', tester.TestDataType.String),
])
tester.populate_table('repomirrorconfig', [
('repository_id', tester.TestDataType.Foreign('repository')),
('creation_date', tester.TestDataType.DateTime),
('is_enabled', tester.TestDataType.Boolean),
('mirror_type', tester.TestDataType.Constant(1)),
('internal_robot_id', tester.TestDataType.Foreign('user')),
('external_registry', tester.TestDataType.String),
('external_namespace', tester.TestDataType.String),
('external_repository', tester.TestDataType.String),
('external_registry_username', tester.TestDataType.String),
('external_registry_password', tester.TestDataType.String),
('external_registry_config', tester.TestDataType.JSON),
('sync_start_date', tester.TestDataType.DateTime),
('sync_expiration_date', tester.TestDataType.DateTime),
('sync_retries_remaining', tester.TestDataType.Integer),
('sync_status', tester.TestDataType.Constant(0)),
('sync_transaction_id', tester.TestDataType.String),
('root_rule_id', tester.TestDataType.Foreign('repomirrorrule')),
])
def downgrade(tables, tester, progress_reporter):
op = ProgressWrapper(original_op, progress_reporter)
op.drop_column(u'repository', 'state')
op.drop_table('repomirrorconfig')
op.drop_table('repomirrorrule')
for logentrykind in [
'repo_mirror_enabled',
'repo_mirror_disabled',
'repo_mirror_config_changed',
'repo_mirror_sync_started',
'repo_mirror_sync_failed',
'repo_mirror_sync_success',
'repo_mirror_sync_now_requested',
'repo_mirror_sync_tag_success',
'repo_mirror_sync_tag_failed',
'repo_mirror_sync_test_success',
'repo_mirror_sync_test_failed',
'repo_mirror_sync_test_started',
'change_repo_state'
]:
op.execute(tables.logentrykind.delete()
.where(tables.logentrykind.c.name == op.inline_literal(logentrykind)))

View file

@ -0,0 +1,63 @@
"""Remove reference to subdir
Revision ID: 53e2ac668296
Revises: ed01e313d3cb
Create Date: 2017-03-28 15:01:31.073382
"""
# revision identifiers, used by Alembic.
import json
import logging
from alembic.script.revision import RevisionError
from alembic.util import CommandError
from alembic import op as original_op
from data.migrations.progress import ProgressWrapper
import sqlalchemy as sa
from sqlalchemy.dialects import mysql
revision = '53e2ac668296'
down_revision = 'ed01e313d3cb'
log = logging.getLogger(__name__)
def run_migration(migrate_function, progress_reporter):
op = ProgressWrapper(original_op, progress_reporter)
conn = op.get_bind()
triggers = conn.execute("SELECT id, config FROM repositorybuildtrigger")
for trigger in triggers:
config = json.dumps(migrate_function(json.loads(trigger[1])))
try:
conn.execute("UPDATE repositorybuildtrigger SET config=%s WHERE id=%s", config, trigger[0])
except(RevisionError, CommandError) as e:
log.warning("Failed to update build trigger %s with exception: ", trigger[0], e)
def upgrade(tables, tester, progress_reporter):
run_migration(delete_subdir, progress_reporter)
def downgrade(tables, tester, progress_reporter):
run_migration(add_subdir, progress_reporter)
def delete_subdir(config):
""" Remove subdir from config """
if not config:
return config
if 'subdir' in config:
del config['subdir']
return config
def add_subdir(config):
""" Add subdir back into config """
if not config:
return config
if 'context' in config:
config['subdir'] = config['context']
return config

View file

@ -0,0 +1,49 @@
"""Add NamespaceGeoRestriction table
Revision ID: 54492a68a3cf
Revises: c00a1f15968b
Create Date: 2018-12-05 15:12:14.201116
"""
# revision identifiers, used by Alembic.
revision = '54492a68a3cf'
down_revision = 'c00a1f15968b'
from alembic import op as original_op
from data.migrations.progress import ProgressWrapper
import sqlalchemy as sa
from sqlalchemy.dialects import mysql
def upgrade(tables, tester, progress_reporter):
op = ProgressWrapper(original_op, progress_reporter)
# ### commands auto generated by Alembic - please adjust! ###
op.create_table('namespacegeorestriction',
sa.Column('id', sa.Integer(), nullable=False),
sa.Column('namespace_id', sa.Integer(), nullable=False),
sa.Column('added', sa.DateTime(), nullable=False),
sa.Column('description', sa.String(length=255), nullable=False),
sa.Column('unstructured_json', sa.Text(), nullable=False),
sa.Column('restricted_region_iso_code', sa.String(length=255), nullable=False),
sa.ForeignKeyConstraint(['namespace_id'], ['user.id'], name=op.f('fk_namespacegeorestriction_namespace_id_user')),
sa.PrimaryKeyConstraint('id', name=op.f('pk_namespacegeorestriction'))
)
op.create_index('namespacegeorestriction_namespace_id', 'namespacegeorestriction', ['namespace_id'], unique=False)
op.create_index('namespacegeorestriction_namespace_id_restricted_region_iso_code', 'namespacegeorestriction', ['namespace_id', 'restricted_region_iso_code'], unique=True)
op.create_index('namespacegeorestriction_restricted_region_iso_code', 'namespacegeorestriction', ['restricted_region_iso_code'], unique=False)
# ### end Alembic commands ###
tester.populate_table('namespacegeorestriction', [
('namespace_id', tester.TestDataType.Foreign('user')),
('added', tester.TestDataType.DateTime),
('description', tester.TestDataType.String),
('unstructured_json', tester.TestDataType.JSON),
('restricted_region_iso_code', tester.TestDataType.String),
])
def downgrade(tables, tester, progress_reporter):
op = ProgressWrapper(original_op, progress_reporter)
# ### commands auto generated by Alembic - please adjust! ###
op.drop_table('namespacegeorestriction')
# ### end Alembic commands ###

View file

@ -0,0 +1,26 @@
"""Cleanup old robots
Revision ID: 5b7503aada1b
Revises: 224ce4c72c2f
Create Date: 2018-05-09 17:18:52.230504
"""
# revision identifiers, used by Alembic.
revision = '5b7503aada1b'
down_revision = '224ce4c72c2f'
from alembic import op as original_op
from data.migrations.progress import ProgressWrapper
import sqlalchemy as sa
from util.migrate.cleanup_old_robots import cleanup_old_robots
def upgrade(tables, tester, progress_reporter):
op = ProgressWrapper(original_op, progress_reporter)
cleanup_old_robots()
def downgrade(tables, tester, progress_reporter):
op = ProgressWrapper(original_op, progress_reporter)
# Nothing to do.
pass

View file

@ -0,0 +1,170 @@
"""Remove 'oci' tables not used by CNR. The rest will be migrated and renamed.
Revision ID: 5cbbfc95bac7
Revises: 1783530bee68
Create Date: 2018-05-23 17:28:40.114433
"""
# revision identifiers, used by Alembic.
revision = '5cbbfc95bac7'
down_revision = '1783530bee68'
from alembic import op as original_op
from data.migrations.progress import ProgressWrapper
import sqlalchemy as sa
from sqlalchemy.dialects import mysql
from util.migrate import UTF8LongText, UTF8CharField
def upgrade(tables, tester, progress_reporter):
op = ProgressWrapper(original_op, progress_reporter)
# ### commands auto generated by Alembic - please adjust! ###
op.drop_table('derivedimage')
op.drop_table('manifestlabel')
op.drop_table('blobplacementlocationpreference')
op.drop_table('blobuploading')
op.drop_table('bittorrentpieces')
op.drop_table('manifestlayerdockerv1')
op.drop_table('manifestlayerscan')
op.drop_table('manifestlayer')
# ### end Alembic commands ###
def downgrade(tables, tester, progress_reporter):
op = ProgressWrapper(original_op, progress_reporter)
# ### commands auto generated by Alembic - please adjust! ###
op.create_table(
'manifestlayer',
sa.Column('id', sa.Integer(), nullable=False),
sa.Column('blob_id', sa.Integer(), nullable=False),
sa.Column('manifest_id', sa.Integer(), nullable=False),
sa.Column('manifest_index', sa.BigInteger(), nullable=False),
sa.Column('metadata_json', UTF8LongText, nullable=False),
sa.ForeignKeyConstraint(['blob_id'], ['blob.id'], name=op.f('fk_manifestlayer_blob_id_blob')),
sa.ForeignKeyConstraint(['manifest_id'], ['manifest.id'], name=op.f('fk_manifestlayer_manifest_id_manifest')),
sa.PrimaryKeyConstraint('id', name=op.f('pk_manifestlayer'))
)
op.create_index('manifestlayer_manifest_index', 'manifestlayer', ['manifest_index'], unique=False)
op.create_index('manifestlayer_manifest_id_manifest_index', 'manifestlayer', ['manifest_id', 'manifest_index'], unique=True)
op.create_index('manifestlayer_manifest_id', 'manifestlayer', ['manifest_id'], unique=False)
op.create_index('manifestlayer_blob_id', 'manifestlayer', ['blob_id'], unique=False)
op.create_table(
'manifestlayerscan',
sa.Column('id', sa.Integer(), nullable=False),
sa.Column('layer_id', sa.Integer(), nullable=False),
sa.Column('scannable', sa.Boolean(), nullable=False),
sa.Column('scanned_by', UTF8CharField(length=255), nullable=False),
sa.ForeignKeyConstraint(['layer_id'], ['manifestlayer.id'], name=op.f('fk_manifestlayerscan_layer_id_manifestlayer')),
sa.PrimaryKeyConstraint('id', name=op.f('pk_manifestlayerscan'))
)
op.create_index('manifestlayerscan_layer_id', 'manifestlayerscan', ['layer_id'], unique=True)
op.create_table(
'bittorrentpieces',
sa.Column('id', sa.Integer(), nullable=False),
sa.Column('blob_id', sa.Integer(), nullable=False),
sa.Column('pieces', UTF8LongText, nullable=False),
sa.Column('piece_length', sa.BigInteger(), nullable=False),
sa.ForeignKeyConstraint(['blob_id'], ['blob.id'], name=op.f('fk_bittorrentpieces_blob_id_blob')),
sa.PrimaryKeyConstraint('id', name=op.f('pk_bittorrentpieces'))
)
op.create_index('bittorrentpieces_blob_id_piece_length', 'bittorrentpieces', ['blob_id', 'piece_length'], unique=True)
op.create_index('bittorrentpieces_blob_id', 'bittorrentpieces', ['blob_id'], unique=False)
op.create_table(
'blobuploading',
sa.Column('id', sa.Integer(), nullable=False),
sa.Column('uuid', sa.String(length=255), nullable=False),
sa.Column('created', sa.DateTime(), nullable=False),
sa.Column('repository_id', sa.Integer(), nullable=False),
sa.Column('location_id', sa.Integer(), nullable=False),
sa.Column('byte_count', sa.BigInteger(), nullable=False),
sa.Column('uncompressed_byte_count', sa.BigInteger(), nullable=True),
sa.Column('chunk_count', sa.BigInteger(), nullable=False),
sa.Column('storage_metadata', UTF8LongText, nullable=True),
sa.Column('sha_state', UTF8LongText, nullable=True),
sa.Column('piece_sha_state', UTF8LongText, nullable=True),
sa.Column('piece_hashes', UTF8LongText, nullable=True),
sa.ForeignKeyConstraint(['location_id'], ['blobplacementlocation.id'], name=op.f('fk_blobuploading_location_id_blobplacementlocation')),
sa.ForeignKeyConstraint(['repository_id'], ['repository.id'], name=op.f('fk_blobuploading_repository_id_repository')),
sa.PrimaryKeyConstraint('id', name=op.f('pk_blobuploading'))
)
op.create_index('blobuploading_uuid', 'blobuploading', ['uuid'], unique=True)
op.create_index('blobuploading_repository_id_uuid', 'blobuploading', ['repository_id', 'uuid'], unique=True)
op.create_index('blobuploading_repository_id', 'blobuploading', ['repository_id'], unique=False)
op.create_index('blobuploading_location_id', 'blobuploading', ['location_id'], unique=False)
op.create_index('blobuploading_created', 'blobuploading', ['created'], unique=False)
op.create_table(
'manifestlayerdockerv1',
sa.Column('id', sa.Integer(), nullable=False),
sa.Column('manifest_layer_id', sa.Integer(), nullable=False),
sa.Column('image_id', UTF8CharField(length=255), nullable=False),
sa.Column('checksum', UTF8CharField(length=255), nullable=False),
sa.Column('compat_json', UTF8LongText, nullable=False),
sa.ForeignKeyConstraint(['manifest_layer_id'], ['manifestlayer.id'], name=op.f('fk_manifestlayerdockerv1_manifest_layer_id_manifestlayer')),
sa.PrimaryKeyConstraint('id', name=op.f('pk_manifestlayerdockerv1'))
)
op.create_index('manifestlayerdockerv1_manifest_layer_id', 'manifestlayerdockerv1', ['manifest_layer_id'], unique=False)
op.create_index('manifestlayerdockerv1_image_id', 'manifestlayerdockerv1', ['image_id'], unique=False)
op.create_table(
'manifestlabel',
sa.Column('id', sa.Integer(), nullable=False),
sa.Column('repository_id', sa.Integer(), nullable=False),
sa.Column('annotated_id', sa.Integer(), nullable=False),
sa.Column('label_id', sa.Integer(), nullable=False),
sa.ForeignKeyConstraint(['annotated_id'], ['manifest.id'], name=op.f('fk_manifestlabel_annotated_id_manifest')),
sa.ForeignKeyConstraint(['label_id'], ['label.id'], name=op.f('fk_manifestlabel_label_id_label')),
sa.ForeignKeyConstraint(['repository_id'], ['repository.id'], name=op.f('fk_manifestlabel_repository_id_repository')),
sa.PrimaryKeyConstraint('id', name=op.f('pk_manifestlabel'))
)
op.create_index('manifestlabel_repository_id_annotated_id_label_id', 'manifestlabel', ['repository_id', 'annotated_id', 'label_id'], unique=True)
op.create_index('manifestlabel_repository_id', 'manifestlabel', ['repository_id'], unique=False)
op.create_index('manifestlabel_label_id', 'manifestlabel', ['label_id'], unique=False)
op.create_index('manifestlabel_annotated_id', 'manifestlabel', ['annotated_id'], unique=False)
op.create_table(
'blobplacementlocationpreference',
sa.Column('id', sa.Integer(), nullable=False),
sa.Column('user_id', sa.Integer(), nullable=False),
sa.Column('location_id', sa.Integer(), nullable=False),
sa.ForeignKeyConstraint(['location_id'], ['blobplacementlocation.id'], name=op.f('fk_blobplacementlocpref_locid_blobplacementlocation')),
sa.ForeignKeyConstraint(['user_id'], ['user.id'], name=op.f('fk_blobplacementlocationpreference_user_id_user')),
sa.PrimaryKeyConstraint('id', name=op.f('pk_blobplacementlocationpreference'))
)
op.create_index('blobplacementlocationpreference_user_id', 'blobplacementlocationpreference', ['user_id'], unique=False)
op.create_index('blobplacementlocationpreference_location_id', 'blobplacementlocationpreference', ['location_id'], unique=False)
op.create_table(
'derivedimage',
sa.Column('id', sa.Integer(), nullable=False),
sa.Column('uuid', sa.String(length=255), nullable=False),
sa.Column('source_manifest_id', sa.Integer(), nullable=False),
sa.Column('derived_manifest_json', UTF8LongText, nullable=False),
sa.Column('media_type_id', sa.Integer(), nullable=False),
sa.Column('blob_id', sa.Integer(), nullable=False),
sa.Column('uniqueness_hash', sa.String(length=255), nullable=False),
sa.Column('signature_blob_id', sa.Integer(), nullable=True),
sa.ForeignKeyConstraint(['blob_id'], ['blob.id'], name=op.f('fk_derivedimage_blob_id_blob')),
sa.ForeignKeyConstraint(['media_type_id'], ['mediatype.id'], name=op.f('fk_derivedimage_media_type_id_mediatype')),
sa.ForeignKeyConstraint(['signature_blob_id'], ['blob.id'], name=op.f('fk_derivedimage_signature_blob_id_blob')),
sa.ForeignKeyConstraint(['source_manifest_id'], ['manifest.id'], name=op.f('fk_derivedimage_source_manifest_id_manifest')),
sa.PrimaryKeyConstraint('id', name=op.f('pk_derivedimage'))
)
op.create_index('derivedimage_uuid', 'derivedimage', ['uuid'], unique=True)
op.create_index('derivedimage_uniqueness_hash', 'derivedimage', ['uniqueness_hash'], unique=True)
op.create_index('derivedimage_source_manifest_id_media_type_id_uniqueness_hash', 'derivedimage', ['source_manifest_id', 'media_type_id', 'uniqueness_hash'], unique=True)
op.create_index('derivedimage_source_manifest_id_blob_id', 'derivedimage', ['source_manifest_id', 'blob_id'], unique=True)
op.create_index('derivedimage_source_manifest_id', 'derivedimage', ['source_manifest_id'], unique=False)
op.create_index('derivedimage_signature_blob_id', 'derivedimage', ['signature_blob_id'], unique=False)
op.create_index('derivedimage_media_type_id', 'derivedimage', ['media_type_id'], unique=False)
op.create_index('derivedimage_blob_id', 'derivedimage', ['blob_id'], unique=False)
# ### end Alembic commands ###

View file

@ -0,0 +1,32 @@
"""Backfill new appr tables
Revision ID: 5d463ea1e8a8
Revises: 610320e9dacf
Create Date: 2018-07-08 10:01:19.756126
"""
# revision identifiers, used by Alembic.
revision = '5d463ea1e8a8'
down_revision = '610320e9dacf'
from alembic import op as original_op
from data.migrations.progress import ProgressWrapper
import sqlalchemy as sa
from util.migrate.table_ops import copy_table_contents
def upgrade(tables, tester, progress_reporter):
op = ProgressWrapper(original_op, progress_reporter)
conn = op.get_bind()
copy_table_contents('blob', 'apprblob', conn)
copy_table_contents('manifest', 'apprmanifest', conn)
copy_table_contents('manifestlist', 'apprmanifestlist', conn)
copy_table_contents('blobplacement', 'apprblobplacement', conn)
copy_table_contents('manifestblob', 'apprmanifestblob', conn)
copy_table_contents('manifestlistmanifest', 'apprmanifestlistmanifest', conn)
copy_table_contents('tag', 'apprtag', conn)
def downgrade(tables, tester, progress_reporter):
op = ProgressWrapper(original_op, progress_reporter)
pass

View file

@ -0,0 +1,206 @@
"""Add new Appr-specific tables
Revision ID: 610320e9dacf
Revises: 5cbbfc95bac7
Create Date: 2018-05-24 16:46:13.514562
"""
# revision identifiers, used by Alembic.
revision = '610320e9dacf'
down_revision = '5cbbfc95bac7'
from alembic import op as original_op
from data.migrations.progress import ProgressWrapper
import sqlalchemy as sa
from util.migrate.table_ops import copy_table_contents
def upgrade(tables, tester, progress_reporter):
op = ProgressWrapper(original_op, progress_reporter)
# ### commands auto generated by Alembic - please adjust! ###
op.create_table('apprblobplacementlocation',
sa.Column('id', sa.Integer(), nullable=False),
sa.Column('name', sa.String(length=255), nullable=False),
sa.PrimaryKeyConstraint('id', name=op.f('pk_apprblobplacementlocation'))
)
op.create_index('apprblobplacementlocation_name', 'apprblobplacementlocation', ['name'], unique=True)
op.create_table('apprtagkind',
sa.Column('id', sa.Integer(), nullable=False),
sa.Column('name', sa.String(length=255), nullable=False),
sa.PrimaryKeyConstraint('id', name=op.f('pk_apprtagkind'))
)
op.create_index('apprtagkind_name', 'apprtagkind', ['name'], unique=True)
op.create_table('apprblob',
sa.Column('id', sa.Integer(), nullable=False),
sa.Column('digest', sa.String(length=255), nullable=False),
sa.Column('media_type_id', sa.Integer(), nullable=False),
sa.Column('size', sa.BigInteger(), nullable=False),
sa.Column('uncompressed_size', sa.BigInteger(), nullable=True),
sa.ForeignKeyConstraint(['media_type_id'], ['mediatype.id'], name=op.f('fk_apprblob_media_type_id_mediatype')),
sa.PrimaryKeyConstraint('id', name=op.f('pk_apprblob'))
)
op.create_index('apprblob_digest', 'apprblob', ['digest'], unique=True)
op.create_index('apprblob_media_type_id', 'apprblob', ['media_type_id'], unique=False)
op.create_table('apprmanifest',
sa.Column('id', sa.Integer(), nullable=False),
sa.Column('digest', sa.String(length=255), nullable=False),
sa.Column('media_type_id', sa.Integer(), nullable=False),
sa.Column('manifest_json', sa.Text(), nullable=False),
sa.ForeignKeyConstraint(['media_type_id'], ['mediatype.id'], name=op.f('fk_apprmanifest_media_type_id_mediatype')),
sa.PrimaryKeyConstraint('id', name=op.f('pk_apprmanifest'))
)
op.create_index('apprmanifest_digest', 'apprmanifest', ['digest'], unique=True)
op.create_index('apprmanifest_media_type_id', 'apprmanifest', ['media_type_id'], unique=False)
op.create_table('apprmanifestlist',
sa.Column('id', sa.Integer(), nullable=False),
sa.Column('digest', sa.String(length=255), nullable=False),
sa.Column('manifest_list_json', sa.Text(), nullable=False),
sa.Column('schema_version', sa.String(length=255), nullable=False),
sa.Column('media_type_id', sa.Integer(), nullable=False),
sa.ForeignKeyConstraint(['media_type_id'], ['mediatype.id'], name=op.f('fk_apprmanifestlist_media_type_id_mediatype')),
sa.PrimaryKeyConstraint('id', name=op.f('pk_apprmanifestlist'))
)
op.create_index('apprmanifestlist_digest', 'apprmanifestlist', ['digest'], unique=True)
op.create_index('apprmanifestlist_media_type_id', 'apprmanifestlist', ['media_type_id'], unique=False)
op.create_table('apprblobplacement',
sa.Column('id', sa.Integer(), nullable=False),
sa.Column('blob_id', sa.Integer(), nullable=False),
sa.Column('location_id', sa.Integer(), nullable=False),
sa.ForeignKeyConstraint(['blob_id'], ['apprblob.id'], name=op.f('fk_apprblobplacement_blob_id_apprblob')),
sa.ForeignKeyConstraint(['location_id'], ['apprblobplacementlocation.id'], name=op.f('fk_apprblobplacement_location_id_apprblobplacementlocation')),
sa.PrimaryKeyConstraint('id', name=op.f('pk_apprblobplacement'))
)
op.create_index('apprblobplacement_blob_id', 'apprblobplacement', ['blob_id'], unique=False)
op.create_index('apprblobplacement_blob_id_location_id', 'apprblobplacement', ['blob_id', 'location_id'], unique=True)
op.create_index('apprblobplacement_location_id', 'apprblobplacement', ['location_id'], unique=False)
op.create_table('apprmanifestblob',
sa.Column('id', sa.Integer(), nullable=False),
sa.Column('manifest_id', sa.Integer(), nullable=False),
sa.Column('blob_id', sa.Integer(), nullable=False),
sa.ForeignKeyConstraint(['blob_id'], ['apprblob.id'], name=op.f('fk_apprmanifestblob_blob_id_apprblob')),
sa.ForeignKeyConstraint(['manifest_id'], ['apprmanifest.id'], name=op.f('fk_apprmanifestblob_manifest_id_apprmanifest')),
sa.PrimaryKeyConstraint('id', name=op.f('pk_apprmanifestblob'))
)
op.create_index('apprmanifestblob_blob_id', 'apprmanifestblob', ['blob_id'], unique=False)
op.create_index('apprmanifestblob_manifest_id', 'apprmanifestblob', ['manifest_id'], unique=False)
op.create_index('apprmanifestblob_manifest_id_blob_id', 'apprmanifestblob', ['manifest_id', 'blob_id'], unique=True)
op.create_table('apprmanifestlistmanifest',
sa.Column('id', sa.Integer(), nullable=False),
sa.Column('manifest_list_id', sa.Integer(), nullable=False),
sa.Column('manifest_id', sa.Integer(), nullable=False),
sa.Column('operating_system', sa.String(length=255), nullable=True),
sa.Column('architecture', sa.String(length=255), nullable=True),
sa.Column('platform_json', sa.Text(), nullable=True),
sa.Column('media_type_id', sa.Integer(), nullable=False),
sa.ForeignKeyConstraint(['manifest_id'], ['apprmanifest.id'], name=op.f('fk_apprmanifestlistmanifest_manifest_id_apprmanifest')),
sa.ForeignKeyConstraint(['manifest_list_id'], ['apprmanifestlist.id'], name=op.f('fk_apprmanifestlistmanifest_manifest_list_id_apprmanifestlist')),
sa.ForeignKeyConstraint(['media_type_id'], ['mediatype.id'], name=op.f('fk_apprmanifestlistmanifest_media_type_id_mediatype')),
sa.PrimaryKeyConstraint('id', name=op.f('pk_apprmanifestlistmanifest'))
)
op.create_index('apprmanifestlistmanifest_manifest_id', 'apprmanifestlistmanifest', ['manifest_id'], unique=False)
op.create_index('apprmanifestlistmanifest_manifest_list_id', 'apprmanifestlistmanifest', ['manifest_list_id'], unique=False)
op.create_index('apprmanifestlistmanifest_manifest_list_id_media_type_id', 'apprmanifestlistmanifest', ['manifest_list_id', 'media_type_id'], unique=False)
op.create_index('apprmanifestlistmanifest_manifest_list_id_operating_system_arch', 'apprmanifestlistmanifest', ['manifest_list_id', 'operating_system', 'architecture', 'media_type_id'], unique=False)
op.create_index('apprmanifestlistmanifest_media_type_id', 'apprmanifestlistmanifest', ['media_type_id'], unique=False)
op.create_table('apprtag',
sa.Column('id', sa.Integer(), nullable=False),
sa.Column('name', sa.String(length=255), nullable=False),
sa.Column('repository_id', sa.Integer(), nullable=False),
sa.Column('manifest_list_id', sa.Integer(), nullable=True),
sa.Column('lifetime_start', sa.BigInteger(), nullable=False),
sa.Column('lifetime_end', sa.BigInteger(), nullable=True),
sa.Column('hidden', sa.Boolean(), nullable=False),
sa.Column('reverted', sa.Boolean(), nullable=False),
sa.Column('protected', sa.Boolean(), nullable=False),
sa.Column('tag_kind_id', sa.Integer(), nullable=False),
sa.Column('linked_tag_id', sa.Integer(), nullable=True),
sa.ForeignKeyConstraint(['linked_tag_id'], ['apprtag.id'], name=op.f('fk_apprtag_linked_tag_id_apprtag')),
sa.ForeignKeyConstraint(['manifest_list_id'], ['apprmanifestlist.id'], name=op.f('fk_apprtag_manifest_list_id_apprmanifestlist')),
sa.ForeignKeyConstraint(['repository_id'], ['repository.id'], name=op.f('fk_apprtag_repository_id_repository')),
sa.ForeignKeyConstraint(['tag_kind_id'], ['apprtagkind.id'], name=op.f('fk_apprtag_tag_kind_id_apprtagkind')),
sa.PrimaryKeyConstraint('id', name=op.f('pk_apprtag'))
)
op.create_index('apprtag_lifetime_end', 'apprtag', ['lifetime_end'], unique=False)
op.create_index('apprtag_linked_tag_id', 'apprtag', ['linked_tag_id'], unique=False)
op.create_index('apprtag_manifest_list_id', 'apprtag', ['manifest_list_id'], unique=False)
op.create_index('apprtag_repository_id', 'apprtag', ['repository_id'], unique=False)
op.create_index('apprtag_repository_id_name', 'apprtag', ['repository_id', 'name'], unique=False)
op.create_index('apprtag_repository_id_name_hidden', 'apprtag', ['repository_id', 'name', 'hidden'], unique=False)
op.create_index('apprtag_repository_id_name_lifetime_end', 'apprtag', ['repository_id', 'name', 'lifetime_end'], unique=True)
op.create_index('apprtag_tag_kind_id', 'apprtag', ['tag_kind_id'], unique=False)
# ### end Alembic commands ###
conn = op.get_bind()
copy_table_contents('blobplacementlocation', 'apprblobplacementlocation', conn)
copy_table_contents('tagkind', 'apprtagkind', conn)
# ### population of test data ### #
tester.populate_table('apprmanifest', [
('digest', tester.TestDataType.String),
('media_type_id', tester.TestDataType.Foreign('mediatype')),
('manifest_json', tester.TestDataType.JSON),
])
tester.populate_table('apprmanifestlist', [
('digest', tester.TestDataType.String),
('manifest_list_json', tester.TestDataType.JSON),
('schema_version', tester.TestDataType.String),
('media_type_id', tester.TestDataType.Foreign('mediatype')),
])
tester.populate_table('apprmanifestlistmanifest', [
('manifest_list_id', tester.TestDataType.Foreign('apprmanifestlist')),
('manifest_id', tester.TestDataType.Foreign('apprmanifest')),
('operating_system', tester.TestDataType.String),
('architecture', tester.TestDataType.String),
('platform_json', tester.TestDataType.JSON),
('media_type_id', tester.TestDataType.Foreign('mediatype')),
])
tester.populate_table('apprblob', [
('digest', tester.TestDataType.String),
('media_type_id', tester.TestDataType.Foreign('mediatype')),
('size', tester.TestDataType.BigInteger),
('uncompressed_size', tester.TestDataType.BigInteger),
])
tester.populate_table('apprmanifestblob', [
('manifest_id', tester.TestDataType.Foreign('apprmanifest')),
('blob_id', tester.TestDataType.Foreign('apprblob')),
])
tester.populate_table('apprtag', [
('name', tester.TestDataType.String),
('repository_id', tester.TestDataType.Foreign('repository')),
('manifest_list_id', tester.TestDataType.Foreign('apprmanifestlist')),
('lifetime_start', tester.TestDataType.Integer),
('hidden', tester.TestDataType.Boolean),
('reverted', tester.TestDataType.Boolean),
('protected', tester.TestDataType.Boolean),
('tag_kind_id', tester.TestDataType.Foreign('apprtagkind')),
])
tester.populate_table('apprblobplacement', [
('blob_id', tester.TestDataType.Foreign('apprmanifestblob')),
('location_id', tester.TestDataType.Foreign('apprblobplacementlocation')),
])
# ### end population of test data ### #
def downgrade(tables, tester, progress_reporter):
op = ProgressWrapper(original_op, progress_reporter)
# ### commands auto generated by Alembic - please adjust! ###
op.drop_table('apprtag')
op.drop_table('apprmanifestlistmanifest')
op.drop_table('apprmanifestblob')
op.drop_table('apprblobplacement')
op.drop_table('apprmanifestlist')
op.drop_table('apprmanifest')
op.drop_table('apprblob')
op.drop_table('apprtagkind')
op.drop_table('apprblobplacementlocation')
# ### end Alembic commands ###

View file

@ -0,0 +1,64 @@
"""Add ability for build triggers to be disabled
Revision ID: 61cadbacb9fc
Revises: b4c2d45bc132
Create Date: 2017-10-18 12:07:26.190901
"""
# revision identifiers, used by Alembic.
revision = '61cadbacb9fc'
down_revision = 'b4c2d45bc132'
from alembic import op as original_op
from data.migrations.progress import ProgressWrapper
import sqlalchemy as sa
from sqlalchemy.dialects import mysql
def upgrade(tables, tester, progress_reporter):
op = ProgressWrapper(original_op, progress_reporter)
# ### commands auto generated by Alembic - please adjust! ###
op.create_table('disablereason',
sa.Column('id', sa.Integer(), nullable=False),
sa.Column('name', sa.String(length=255), nullable=False),
sa.PrimaryKeyConstraint('id', name=op.f('pk_disablereason'))
)
op.create_index('disablereason_name', 'disablereason', ['name'], unique=True)
op.bulk_insert(
tables.disablereason,
[
{'id': 1, 'name': 'user_toggled'},
],
)
op.bulk_insert(tables.logentrykind, [
{'name': 'toggle_repo_trigger'},
])
op.add_column(u'repositorybuildtrigger', sa.Column('disabled_reason_id', sa.Integer(), nullable=True))
op.add_column(u'repositorybuildtrigger', sa.Column('enabled', sa.Boolean(), nullable=False, server_default=sa.sql.expression.true()))
op.create_index('repositorybuildtrigger_disabled_reason_id', 'repositorybuildtrigger', ['disabled_reason_id'], unique=False)
op.create_foreign_key(op.f('fk_repositorybuildtrigger_disabled_reason_id_disablereason'), 'repositorybuildtrigger', 'disablereason', ['disabled_reason_id'], ['id'])
# ### end Alembic commands ###
# ### population of test data ### #
tester.populate_column('repositorybuildtrigger', 'disabled_reason_id', tester.TestDataType.Foreign('disablereason'))
tester.populate_column('repositorybuildtrigger', 'enabled', tester.TestDataType.Boolean)
# ### end population of test data ### #
def downgrade(tables, tester, progress_reporter):
op = ProgressWrapper(original_op, progress_reporter)
# ### commands auto generated by Alembic - please adjust! ###
op.drop_constraint(op.f('fk_repositorybuildtrigger_disabled_reason_id_disablereason'), 'repositorybuildtrigger', type_='foreignkey')
op.drop_index('repositorybuildtrigger_disabled_reason_id', table_name='repositorybuildtrigger')
op.drop_column(u'repositorybuildtrigger', 'enabled')
op.drop_column(u'repositorybuildtrigger', 'disabled_reason_id')
op.drop_table('disablereason')
# ### end Alembic commands ###
op.execute(tables
.logentrykind
.delete()
.where(tables.logentrykind.c.name == op.inline_literal('toggle_repo_trigger')))

View file

@ -0,0 +1,26 @@
"""Change manifest_bytes to a UTF8 text field
Revision ID: 654e6df88b71
Revises: eafdeadcebc7
Create Date: 2018-08-15 09:58:46.109277
"""
# revision identifiers, used by Alembic.
revision = '654e6df88b71'
down_revision = 'eafdeadcebc7'
from alembic import op as original_op
from data.migrations.progress import ProgressWrapper
import sqlalchemy as sa
from util.migrate import UTF8LongText
def upgrade(tables, tester, progress_reporter):
op = ProgressWrapper(original_op, progress_reporter)
op.alter_column('manifest', 'manifest_bytes', existing_type=sa.Text(), type_=UTF8LongText())
def downgrade(tables, tester, progress_reporter):
op = ProgressWrapper(original_op, progress_reporter)
op.alter_column('manifest', 'manifest_bytes', existing_type=UTF8LongText(), type_=sa.Text())

View file

@ -0,0 +1,47 @@
"""Add TagToRepositoryTag table
Revision ID: 67f0abd172ae
Revises: 10f45ee2310b
Create Date: 2018-10-30 11:31:06.615488
"""
# revision identifiers, used by Alembic.
revision = '67f0abd172ae'
down_revision = '10f45ee2310b'
from alembic import op as original_op
from data.migrations.progress import ProgressWrapper
import sqlalchemy as sa
from sqlalchemy.dialects import mysql
def upgrade(tables, tester, progress_reporter):
op = ProgressWrapper(original_op, progress_reporter)
# ### commands auto generated by Alembic - please adjust! ###
op.create_table('tagtorepositorytag',
sa.Column('id', sa.Integer(), nullable=False),
sa.Column('repository_id', sa.Integer(), nullable=False),
sa.Column('tag_id', sa.Integer(), nullable=False),
sa.Column('repository_tag_id', sa.Integer(), nullable=False),
sa.ForeignKeyConstraint(['repository_id'], ['repository.id'], name=op.f('fk_tagtorepositorytag_repository_id_repository')),
sa.ForeignKeyConstraint(['repository_tag_id'], ['repositorytag.id'], name=op.f('fk_tagtorepositorytag_repository_tag_id_repositorytag')),
sa.ForeignKeyConstraint(['tag_id'], ['tag.id'], name=op.f('fk_tagtorepositorytag_tag_id_tag')),
sa.PrimaryKeyConstraint('id', name=op.f('pk_tagtorepositorytag'))
)
op.create_index('tagtorepositorytag_repository_id', 'tagtorepositorytag', ['repository_id'], unique=False)
op.create_index('tagtorepositorytag_repository_tag_id', 'tagtorepositorytag', ['repository_tag_id'], unique=True)
op.create_index('tagtorepositorytag_tag_id', 'tagtorepositorytag', ['tag_id'], unique=True)
# ### end Alembic commands ###
tester.populate_table('tagtorepositorytag', [
('repository_id', tester.TestDataType.Foreign('repository')),
('tag_id', tester.TestDataType.Foreign('tag')),
('repository_tag_id', tester.TestDataType.Foreign('repositorytag')),
])
def downgrade(tables, tester, progress_reporter):
op = ProgressWrapper(original_op, progress_reporter)
# ### commands auto generated by Alembic - please adjust! ###
op.drop_table('tagtorepositorytag')
# ### end Alembic commands ###

View file

@ -0,0 +1,36 @@
"""Change LogEntry to use a BigInteger as its primary key
Revision ID: 6c21e2cfb8b6
Revises: d17c695859ea
Create Date: 2018-07-27 16:30:02.877346
"""
# revision identifiers, used by Alembic.
revision = '6c21e2cfb8b6'
down_revision = 'd17c695859ea'
from alembic import op as original_op
from data.migrations.progress import ProgressWrapper
import sqlalchemy as sa
def upgrade(tables, tester, progress_reporter):
op = ProgressWrapper(original_op, progress_reporter)
op.alter_column(
table_name='logentry',
column_name='id',
nullable=False,
autoincrement=True,
type_=sa.BigInteger(),
)
def downgrade(tables, tester, progress_reporter):
op = ProgressWrapper(original_op, progress_reporter)
op.alter_column(
table_name='logentry',
column_name='id',
nullable=False,
autoincrement=True,
type_=sa.Integer(),
)

View file

@ -0,0 +1,56 @@
"""Add user prompt support
Revision ID: 6c7014e84a5e
Revises: c156deb8845d
Create Date: 2016-10-31 16:26:31.447705
"""
# revision identifiers, used by Alembic.
revision = '6c7014e84a5e'
down_revision = 'c156deb8845d'
from alembic import op as original_op
from data.migrations.progress import ProgressWrapper
import sqlalchemy as sa
def upgrade(tables, tester, progress_reporter):
op = ProgressWrapper(original_op, progress_reporter)
### commands auto generated by Alembic - please adjust! ###
op.create_table('userpromptkind',
sa.Column('id', sa.Integer(), nullable=False),
sa.Column('name', sa.String(length=255), nullable=False),
sa.PrimaryKeyConstraint('id', name=op.f('pk_userpromptkind'))
)
op.create_index('userpromptkind_name', 'userpromptkind', ['name'], unique=False)
op.create_table('userprompt',
sa.Column('id', sa.Integer(), nullable=False),
sa.Column('user_id', sa.Integer(), nullable=False),
sa.Column('kind_id', sa.Integer(), nullable=False),
sa.ForeignKeyConstraint(['kind_id'], ['userpromptkind.id'], name=op.f('fk_userprompt_kind_id_userpromptkind')),
sa.ForeignKeyConstraint(['user_id'], ['user.id'], name=op.f('fk_userprompt_user_id_user')),
sa.PrimaryKeyConstraint('id', name=op.f('pk_userprompt'))
)
op.create_index('userprompt_kind_id', 'userprompt', ['kind_id'], unique=False)
op.create_index('userprompt_user_id', 'userprompt', ['user_id'], unique=False)
op.create_index('userprompt_user_id_kind_id', 'userprompt', ['user_id', 'kind_id'], unique=True)
### end Alembic commands ###
op.bulk_insert(tables.userpromptkind,
[
{'name':'confirm_username'},
])
# ### population of test data ### #
tester.populate_table('userprompt', [
('user_id', tester.TestDataType.Foreign('user')),
('kind_id', tester.TestDataType.Foreign('userpromptkind')),
])
# ### end population of test data ### #
def downgrade(tables, tester, progress_reporter):
op = ProgressWrapper(original_op, progress_reporter)
### commands auto generated by Alembic - please adjust! ###
op.drop_table('userprompt')
op.drop_table('userpromptkind')
### end Alembic commands ###

View file

@ -0,0 +1,43 @@
"""Add LogEntry3 table
Revision ID: 6ec8726c0ace
Revises: 54492a68a3cf
Create Date: 2019-01-03 13:41:02.897957
"""
# revision identifiers, used by Alembic.
revision = '6ec8726c0ace'
down_revision = '54492a68a3cf'
from alembic import op as original_op
from data.migrations.progress import ProgressWrapper
import sqlalchemy as sa
from sqlalchemy.dialects import mysql
def upgrade(tables, tester, progress_reporter):
op = ProgressWrapper(original_op, progress_reporter)
# ### commands auto generated by Alembic - please adjust! ###
op.create_table('logentry3',
sa.Column('id', sa.BigInteger(), nullable=False),
sa.Column('kind_id', sa.Integer(), nullable=False),
sa.Column('account_id', sa.Integer(), nullable=False),
sa.Column('performer_id', sa.Integer(), nullable=True),
sa.Column('repository_id', sa.Integer(), nullable=True),
sa.Column('datetime', sa.DateTime(), nullable=False),
sa.Column('ip', sa.String(length=255), nullable=True),
sa.Column('metadata_json', sa.Text(), nullable=False),
sa.PrimaryKeyConstraint('id', name=op.f('pk_logentry3'))
)
op.create_index('logentry3_account_id_datetime', 'logentry3', ['account_id', 'datetime'], unique=False)
op.create_index('logentry3_datetime', 'logentry3', ['datetime'], unique=False)
op.create_index('logentry3_performer_id_datetime', 'logentry3', ['performer_id', 'datetime'], unique=False)
op.create_index('logentry3_repository_id_datetime_kind_id', 'logentry3', ['repository_id', 'datetime', 'kind_id'], unique=False)
# ### end Alembic commands ###
def downgrade(tables, tester, progress_reporter):
op = ProgressWrapper(original_op, progress_reporter)
# ### commands auto generated by Alembic - please adjust! ###
op.drop_table('logentry3')
# ### end Alembic commands ###

View file

@ -0,0 +1,289 @@
"""Backfill new encrypted fields
Revision ID: 703298a825c2
Revises: c13c8052f7a6
Create Date: 2019-08-19 16:07:48.109889
"""
# revision identifiers, used by Alembic.
revision = '703298a825c2'
down_revision = 'c13c8052f7a6'
import logging
import uuid
from datetime import datetime
from peewee import (JOIN, IntegrityError, DateTimeField, CharField, ForeignKeyField,
BooleanField, TextField, IntegerField)
from alembic import op as original_op
from data.migrations.progress import ProgressWrapper
import sqlalchemy as sa
from data.database import (BaseModel, User, Repository, AccessTokenKind, Role,
random_string_generator, QuayUserField, BuildTriggerService,
uuid_generator, DisableReason)
from data.fields import Credential, DecryptedValue, EncryptedCharField, EncryptedTextField, EnumField, CredentialField
from data.model.token import ACCESS_TOKEN_NAME_PREFIX_LENGTH
from data.model.appspecifictoken import TOKEN_NAME_PREFIX_LENGTH as AST_TOKEN_NAME_PREFIX_LENGTH
from data.model.oauth import ACCESS_TOKEN_PREFIX_LENGTH as OAUTH_ACCESS_TOKEN_PREFIX_LENGTH
from data.model.oauth import AUTHORIZATION_CODE_PREFIX_LENGTH
BATCH_SIZE = 10
logger = logging.getLogger(__name__)
def _iterate(model_class, clause):
while True:
has_rows = False
for row in list(model_class.select().where(clause).limit(BATCH_SIZE)):
has_rows = True
yield row
if not has_rows:
break
def _decrypted(value):
if value is None:
return None
assert isinstance(value, basestring)
return DecryptedValue(value)
# NOTE: As per standard migrations involving Peewee models, we copy them here, as they will change
# after this call.
class AccessToken(BaseModel):
code = CharField(default=random_string_generator(length=64), unique=True, index=True)
token_name = CharField(default=random_string_generator(length=32), unique=True, index=True)
token_code = EncryptedCharField(default_token_length=32)
class RobotAccountToken(BaseModel):
robot_account = QuayUserField(index=True, allows_robots=True, unique=True)
token = EncryptedCharField(default_token_length=64)
fully_migrated = BooleanField(default=False)
class RepositoryBuildTrigger(BaseModel):
uuid = CharField(default=uuid_generator, index=True)
auth_token = CharField(null=True)
private_key = TextField(null=True)
secure_auth_token = EncryptedCharField(null=True)
secure_private_key = EncryptedTextField(null=True)
fully_migrated = BooleanField(default=False)
class AppSpecificAuthToken(BaseModel):
token_name = CharField(index=True, unique=True, default=random_string_generator(60))
token_secret = EncryptedCharField(default_token_length=60)
token_code = CharField(default=random_string_generator(length=120), unique=True, index=True)
class OAuthAccessToken(BaseModel):
token_name = CharField(index=True, unique=True)
token_code = CredentialField()
access_token = CharField(index=True)
class OAuthAuthorizationCode(BaseModel):
code = CharField(index=True, unique=True, null=True)
code_name = CharField(index=True, unique=True)
code_credential = CredentialField()
class OAuthApplication(BaseModel):
secure_client_secret = EncryptedCharField(default_token_length=40, null=True)
fully_migrated = BooleanField(default=False)
client_secret = CharField(default=random_string_generator(length=40))
def upgrade(tables, tester, progress_reporter):
op = ProgressWrapper(original_op, progress_reporter)
# Empty all access token names to fix the bug where we put the wrong name and code
# in for some tokens.
AccessToken.update(token_name=None).where(AccessToken.token_name >> None).execute()
# AccessToken.
logger.info('Backfilling encrypted credentials for access tokens')
for access_token in _iterate(AccessToken, ((AccessToken.token_name >> None) |
(AccessToken.token_name == ''))):
logger.info('Backfilling encrypted credentials for access token %s', access_token.id)
assert access_token.code is not None
assert access_token.code[:ACCESS_TOKEN_NAME_PREFIX_LENGTH]
assert access_token.code[ACCESS_TOKEN_NAME_PREFIX_LENGTH:]
token_name = access_token.code[:ACCESS_TOKEN_NAME_PREFIX_LENGTH]
token_code = _decrypted(access_token.code[ACCESS_TOKEN_NAME_PREFIX_LENGTH:])
(AccessToken
.update(token_name=token_name, token_code=token_code)
.where(AccessToken.id == access_token.id, AccessToken.code == access_token.code)
.execute())
assert AccessToken.select().where(AccessToken.token_name >> None).count() == 0
# Robots.
logger.info('Backfilling encrypted credentials for robots')
while True:
has_row = False
query = (User
.select()
.join(RobotAccountToken, JOIN.LEFT_OUTER)
.where(User.robot == True, RobotAccountToken.id >> None)
.limit(BATCH_SIZE))
for robot_user in query:
logger.info('Backfilling encrypted credentials for robot %s', robot_user.id)
has_row = True
try:
RobotAccountToken.create(robot_account=robot_user,
token=_decrypted(robot_user.email),
fully_migrated=False)
except IntegrityError:
break
if not has_row:
break
# RepositoryBuildTrigger
logger.info('Backfilling encrypted credentials for repo build triggers')
for repo_build_trigger in _iterate(RepositoryBuildTrigger,
(RepositoryBuildTrigger.fully_migrated == False)):
logger.info('Backfilling encrypted credentials for repo build trigger %s',
repo_build_trigger.id)
(RepositoryBuildTrigger
.update(secure_auth_token=_decrypted(repo_build_trigger.auth_token),
secure_private_key=_decrypted(repo_build_trigger.private_key),
fully_migrated=True)
.where(RepositoryBuildTrigger.id == repo_build_trigger.id,
RepositoryBuildTrigger.uuid == repo_build_trigger.uuid)
.execute())
assert (RepositoryBuildTrigger
.select()
.where(RepositoryBuildTrigger.fully_migrated == False)
.count()) == 0
# AppSpecificAuthToken
logger.info('Backfilling encrypted credentials for app specific auth tokens')
for token in _iterate(AppSpecificAuthToken, ((AppSpecificAuthToken.token_name >> None) |
(AppSpecificAuthToken.token_name == '') |
(AppSpecificAuthToken.token_secret >> None))):
logger.info('Backfilling encrypted credentials for app specific auth %s',
token.id)
assert token.token_code[AST_TOKEN_NAME_PREFIX_LENGTH:]
token_name = token.token_code[:AST_TOKEN_NAME_PREFIX_LENGTH]
token_secret = _decrypted(token.token_code[AST_TOKEN_NAME_PREFIX_LENGTH:])
assert token_name
assert token_secret
(AppSpecificAuthToken
.update(token_name=token_name,
token_secret=token_secret)
.where(AppSpecificAuthToken.id == token.id,
AppSpecificAuthToken.token_code == token.token_code)
.execute())
assert (AppSpecificAuthToken
.select()
.where(AppSpecificAuthToken.token_name >> None)
.count()) == 0
# OAuthAccessToken
logger.info('Backfilling credentials for OAuth access tokens')
for token in _iterate(OAuthAccessToken, ((OAuthAccessToken.token_name >> None) |
(OAuthAccessToken.token_name == ''))):
logger.info('Backfilling credentials for OAuth access token %s', token.id)
token_name = token.access_token[:OAUTH_ACCESS_TOKEN_PREFIX_LENGTH]
token_code = Credential.from_string(token.access_token[OAUTH_ACCESS_TOKEN_PREFIX_LENGTH:])
assert token_name
assert token.access_token[OAUTH_ACCESS_TOKEN_PREFIX_LENGTH:]
(OAuthAccessToken
.update(token_name=token_name,
token_code=token_code)
.where(OAuthAccessToken.id == token.id,
OAuthAccessToken.access_token == token.access_token)
.execute())
assert (OAuthAccessToken
.select()
.where(OAuthAccessToken.token_name >> None)
.count()) == 0
# OAuthAuthorizationCode
logger.info('Backfilling credentials for OAuth auth code')
for code in _iterate(OAuthAuthorizationCode, ((OAuthAuthorizationCode.code_name >> None) |
(OAuthAuthorizationCode.code_name == ''))):
logger.info('Backfilling credentials for OAuth auth code %s', code.id)
user_code = code.code or random_string_generator(AUTHORIZATION_CODE_PREFIX_LENGTH * 2)()
code_name = user_code[:AUTHORIZATION_CODE_PREFIX_LENGTH]
code_credential = Credential.from_string(user_code[AUTHORIZATION_CODE_PREFIX_LENGTH:])
assert code_name
assert user_code[AUTHORIZATION_CODE_PREFIX_LENGTH:]
(OAuthAuthorizationCode
.update(code_name=code_name, code_credential=code_credential)
.where(OAuthAuthorizationCode.id == code.id)
.execute())
assert (OAuthAuthorizationCode
.select()
.where(OAuthAuthorizationCode.code_name >> None)
.count()) == 0
# OAuthApplication
logger.info('Backfilling secret for OAuth applications')
for app in _iterate(OAuthApplication, OAuthApplication.fully_migrated == False):
logger.info('Backfilling secret for OAuth application %s', app.id)
client_secret = app.client_secret or str(uuid.uuid4())
secure_client_secret = _decrypted(client_secret)
(OAuthApplication
.update(secure_client_secret=secure_client_secret, fully_migrated=True)
.where(OAuthApplication.id == app.id, OAuthApplication.fully_migrated == False)
.execute())
assert (OAuthApplication
.select()
.where(OAuthApplication.fully_migrated == False)
.count()) == 0
# Adjust existing fields to be nullable.
op.alter_column('accesstoken', 'code', nullable=True, existing_type=sa.String(length=255))
op.alter_column('oauthaccesstoken', 'access_token', nullable=True, existing_type=sa.String(length=255))
op.alter_column('oauthauthorizationcode', 'code', nullable=True, existing_type=sa.String(length=255))
op.alter_column('appspecificauthtoken', 'token_code', nullable=True, existing_type=sa.String(length=255))
# Adjust new fields to be non-nullable.
op.alter_column('accesstoken', 'token_name', nullable=False, existing_type=sa.String(length=255))
op.alter_column('accesstoken', 'token_code', nullable=False, existing_type=sa.String(length=255))
op.alter_column('appspecificauthtoken', 'token_name', nullable=False, existing_type=sa.String(length=255))
op.alter_column('appspecificauthtoken', 'token_secret', nullable=False, existing_type=sa.String(length=255))
op.alter_column('oauthaccesstoken', 'token_name', nullable=False, existing_type=sa.String(length=255))
op.alter_column('oauthaccesstoken', 'token_code', nullable=False, existing_type=sa.String(length=255))
op.alter_column('oauthauthorizationcode', 'code_name', nullable=False, existing_type=sa.String(length=255))
op.alter_column('oauthauthorizationcode', 'code_credential', nullable=False, existing_type=sa.String(length=255))
def downgrade(tables, tester, progress_reporter):
op = ProgressWrapper(original_op, progress_reporter)
op.alter_column('accesstoken', 'code', nullable=False, existing_type=sa.String(length=255))
op.alter_column('oauthaccesstoken', 'access_token', nullable=False, existing_type=sa.String(length=255))
op.alter_column('oauthauthorizationcode', 'code', nullable=False, existing_type=sa.String(length=255))
op.alter_column('appspecificauthtoken', 'token_code', nullable=False, existing_type=sa.String(length=255))
op.alter_column('accesstoken', 'token_name', nullable=True, existing_type=sa.String(length=255))
op.alter_column('accesstoken', 'token_code', nullable=True, existing_type=sa.String(length=255))
op.alter_column('appspecificauthtoken', 'token_name', nullable=True, existing_type=sa.String(length=255))
op.alter_column('appspecificauthtoken', 'token_secret', nullable=True, existing_type=sa.String(length=255))
op.alter_column('oauthaccesstoken', 'token_name', nullable=True, existing_type=sa.String(length=255))
op.alter_column('oauthaccesstoken', 'token_code', nullable=True, existing_type=sa.String(length=255))
op.alter_column('oauthauthorizationcode', 'code_name', nullable=True, existing_type=sa.String(length=255))
op.alter_column('oauthauthorizationcode', 'code_credential', nullable=True, existing_type=sa.String(length=255))

View file

@ -0,0 +1,74 @@
"""Add support for app specific tokens
Revision ID: 7367229b38d9
Revises: d8989249f8f6
Create Date: 2017-12-12 13:15:42.419764
"""
# revision identifiers, used by Alembic.
revision = '7367229b38d9'
down_revision = 'd8989249f8f6'
from alembic import op as original_op
from data.migrations.progress import ProgressWrapper
import sqlalchemy as sa
from sqlalchemy.dialects import mysql
from util.migrate import UTF8CharField
def upgrade(tables, tester, progress_reporter):
op = ProgressWrapper(original_op, progress_reporter)
# ### commands auto generated by Alembic - please adjust! ###
op.create_table('appspecificauthtoken',
sa.Column('id', sa.Integer(), nullable=False),
sa.Column('user_id', sa.Integer(), nullable=False),
sa.Column('uuid', sa.String(length=36), nullable=False),
sa.Column('title', UTF8CharField(length=255), nullable=False),
sa.Column('token_code', sa.String(length=255), nullable=False),
sa.Column('created', sa.DateTime(), nullable=False),
sa.Column('expiration', sa.DateTime(), nullable=True),
sa.Column('last_accessed', sa.DateTime(), nullable=True),
sa.ForeignKeyConstraint(['user_id'], ['user.id'], name=op.f('fk_appspecificauthtoken_user_id_user')),
sa.PrimaryKeyConstraint('id', name=op.f('pk_appspecificauthtoken'))
)
op.create_index('appspecificauthtoken_token_code', 'appspecificauthtoken', ['token_code'], unique=True)
op.create_index('appspecificauthtoken_user_id', 'appspecificauthtoken', ['user_id'], unique=False)
op.create_index('appspecificauthtoken_user_id_expiration', 'appspecificauthtoken', ['user_id', 'expiration'], unique=False)
op.create_index('appspecificauthtoken_uuid', 'appspecificauthtoken', ['uuid'], unique=False)
# ### end Alembic commands ###
op.bulk_insert(tables.logentrykind, [
{'name': 'create_app_specific_token'},
{'name': 'revoke_app_specific_token'},
])
# ### population of test data ### #
tester.populate_table('appspecificauthtoken', [
('user_id', tester.TestDataType.Foreign('user')),
('uuid', tester.TestDataType.UUID),
('title', tester.TestDataType.UTF8Char),
('token_code', tester.TestDataType.String),
('created', tester.TestDataType.DateTime),
('expiration', tester.TestDataType.DateTime),
('last_accessed', tester.TestDataType.DateTime),
])
# ### end population of test data ### #
def downgrade(tables, tester, progress_reporter):
op = ProgressWrapper(original_op, progress_reporter)
# ### commands auto generated by Alembic - please adjust! ###
op.drop_table('appspecificauthtoken')
# ### end Alembic commands ###
op.execute(tables
.logentrykind
.delete()
.where(tables.
logentrykind.name == op.inline_literal('create_app_specific_token')))
op.execute(tables
.logentrykind
.delete()
.where(tables.
logentrykind.name == op.inline_literal('revoke_app_specific_token')))

View file

@ -0,0 +1,340 @@
"""Add OCI/App models
Revision ID: 7a525c68eb13
Revises: e2894a3a3c19
Create Date: 2017-01-24 16:25:52.170277
"""
# revision identifiers, used by Alembic.
revision = '7a525c68eb13'
down_revision = 'e2894a3a3c19'
from alembic import op as original_op
from data.migrations.progress import ProgressWrapper
import sqlalchemy as sa
from sqlalchemy.dialects import mysql
from sqlalchemy.sql import table, column
from util.migrate import UTF8LongText, UTF8CharField
def upgrade(tables, tester, progress_reporter):
op = ProgressWrapper(original_op, progress_reporter)
op.create_table(
'tagkind',
sa.Column('id', sa.Integer(), nullable=False),
sa.Column('name', sa.String(length=255), nullable=False),
sa.PrimaryKeyConstraint('id', name=op.f('pk_tagkind'))
)
op.create_index('tagkind_name', 'tagkind', ['name'], unique=True)
op.create_table(
'blobplacementlocation',
sa.Column('id', sa.Integer(), nullable=False),
sa.Column('name', sa.String(length=255), nullable=False),
sa.PrimaryKeyConstraint('id', name=op.f('pk_blobplacementlocation'))
)
op.create_index('blobplacementlocation_name', 'blobplacementlocation', ['name'], unique=True)
op.create_table(
'blob',
sa.Column('id', sa.Integer(), nullable=False),
sa.Column('digest', sa.String(length=255), nullable=False),
sa.Column('media_type_id', sa.Integer(), nullable=False),
sa.Column('size', sa.BigInteger(), nullable=False),
sa.Column('uncompressed_size', sa.BigInteger(), nullable=True),
sa.ForeignKeyConstraint(['media_type_id'], ['mediatype.id'], name=op.f('fk_blob_media_type_id_mediatype')),
sa.PrimaryKeyConstraint('id', name=op.f('pk_blob'))
)
op.create_index('blob_digest', 'blob', ['digest'], unique=True)
op.create_index('blob_media_type_id', 'blob', ['media_type_id'], unique=False)
op.create_table(
'blobplacementlocationpreference',
sa.Column('id', sa.Integer(), nullable=False),
sa.Column('user_id', sa.Integer(), nullable=False),
sa.Column('location_id', sa.Integer(), nullable=False),
sa.ForeignKeyConstraint(['location_id'], ['blobplacementlocation.id'], name=op.f('fk_blobplacementlocpref_locid_blobplacementlocation')),
sa.ForeignKeyConstraint(['user_id'], ['user.id'], name=op.f('fk_blobplacementlocationpreference_user_id_user')),
sa.PrimaryKeyConstraint('id', name=op.f('pk_blobplacementlocationpreference'))
)
op.create_index('blobplacementlocationpreference_location_id', 'blobplacementlocationpreference', ['location_id'], unique=False)
op.create_index('blobplacementlocationpreference_user_id', 'blobplacementlocationpreference', ['user_id'], unique=False)
op.create_table(
'manifest',
sa.Column('id', sa.Integer(), nullable=False),
sa.Column('digest', sa.String(length=255), nullable=False),
sa.Column('media_type_id', sa.Integer(), nullable=False),
sa.Column('manifest_json', UTF8LongText, nullable=False),
sa.ForeignKeyConstraint(['media_type_id'], ['mediatype.id'], name=op.f('fk_manifest_media_type_id_mediatype')),
sa.PrimaryKeyConstraint('id', name=op.f('pk_manifest'))
)
op.create_index('manifest_digest', 'manifest', ['digest'], unique=True)
op.create_index('manifest_media_type_id', 'manifest', ['media_type_id'], unique=False)
op.create_table(
'manifestlist',
sa.Column('id', sa.Integer(), nullable=False),
sa.Column('digest', sa.String(length=255), nullable=False),
sa.Column('manifest_list_json', UTF8LongText, nullable=False),
sa.Column('schema_version', UTF8CharField(length=255), nullable=False),
sa.Column('media_type_id', sa.Integer(), nullable=False),
sa.ForeignKeyConstraint(['media_type_id'], ['mediatype.id'], name=op.f('fk_manifestlist_media_type_id_mediatype')),
sa.PrimaryKeyConstraint('id', name=op.f('pk_manifestlist'))
)
op.create_index('manifestlist_digest', 'manifestlist', ['digest'], unique=True)
op.create_index('manifestlist_media_type_id', 'manifestlist', ['media_type_id'], unique=False)
op.create_table(
'bittorrentpieces',
sa.Column('id', sa.Integer(), nullable=False),
sa.Column('blob_id', sa.Integer(), nullable=False),
sa.Column('pieces', UTF8LongText, nullable=False),
sa.Column('piece_length', sa.BigInteger(), nullable=False),
sa.ForeignKeyConstraint(['blob_id'], ['blob.id'], name=op.f('fk_bittorrentpieces_blob_id_blob')),
sa.PrimaryKeyConstraint('id', name=op.f('pk_bittorrentpieces'))
)
op.create_index('bittorrentpieces_blob_id', 'bittorrentpieces', ['blob_id'], unique=False)
op.create_index('bittorrentpieces_blob_id_piece_length', 'bittorrentpieces', ['blob_id', 'piece_length'], unique=True)
op.create_table(
'blobplacement',
sa.Column('id', sa.Integer(), nullable=False),
sa.Column('blob_id', sa.Integer(), nullable=False),
sa.Column('location_id', sa.Integer(), nullable=False),
sa.ForeignKeyConstraint(['blob_id'], ['blob.id'], name=op.f('fk_blobplacement_blob_id_blob')),
sa.ForeignKeyConstraint(['location_id'], ['blobplacementlocation.id'], name=op.f('fk_blobplacement_location_id_blobplacementlocation')),
sa.PrimaryKeyConstraint('id', name=op.f('pk_blobplacement'))
)
op.create_index('blobplacement_blob_id', 'blobplacement', ['blob_id'], unique=False)
op.create_index('blobplacement_blob_id_location_id', 'blobplacement', ['blob_id', 'location_id'], unique=True)
op.create_index('blobplacement_location_id', 'blobplacement', ['location_id'], unique=False)
op.create_table(
'blobuploading',
sa.Column('id', sa.Integer(), nullable=False),
sa.Column('uuid', sa.String(length=255), nullable=False),
sa.Column('created', sa.DateTime(), nullable=False),
sa.Column('repository_id', sa.Integer(), nullable=False),
sa.Column('location_id', sa.Integer(), nullable=False),
sa.Column('byte_count', sa.BigInteger(), nullable=False),
sa.Column('uncompressed_byte_count', sa.BigInteger(), nullable=True),
sa.Column('chunk_count', sa.BigInteger(), nullable=False),
sa.Column('storage_metadata', UTF8LongText, nullable=True),
sa.Column('sha_state', UTF8LongText, nullable=True),
sa.Column('piece_sha_state', UTF8LongText, nullable=True),
sa.Column('piece_hashes', UTF8LongText, nullable=True),
sa.ForeignKeyConstraint(['location_id'], ['blobplacementlocation.id'], name=op.f('fk_blobuploading_location_id_blobplacementlocation')),
sa.ForeignKeyConstraint(['repository_id'], ['repository.id'], name=op.f('fk_blobuploading_repository_id_repository')),
sa.PrimaryKeyConstraint('id', name=op.f('pk_blobuploading'))
)
op.create_index('blobuploading_created', 'blobuploading', ['created'], unique=False)
op.create_index('blobuploading_location_id', 'blobuploading', ['location_id'], unique=False)
op.create_index('blobuploading_repository_id', 'blobuploading', ['repository_id'], unique=False)
op.create_index('blobuploading_repository_id_uuid', 'blobuploading', ['repository_id', 'uuid'], unique=True)
op.create_index('blobuploading_uuid', 'blobuploading', ['uuid'], unique=True)
op.create_table(
'derivedimage',
sa.Column('id', sa.Integer(), nullable=False),
sa.Column('uuid', sa.String(length=255), nullable=False),
sa.Column('source_manifest_id', sa.Integer(), nullable=False),
sa.Column('derived_manifest_json', UTF8LongText, nullable=False),
sa.Column('media_type_id', sa.Integer(), nullable=False),
sa.Column('blob_id', sa.Integer(), nullable=False),
sa.Column('uniqueness_hash', sa.String(length=255), nullable=False),
sa.Column('signature_blob_id', sa.Integer(), nullable=True),
sa.ForeignKeyConstraint(['blob_id'], ['blob.id'], name=op.f('fk_derivedimage_blob_id_blob')),
sa.ForeignKeyConstraint(['media_type_id'], ['mediatype.id'], name=op.f('fk_derivedimage_media_type_id_mediatype')),
sa.ForeignKeyConstraint(['signature_blob_id'], ['blob.id'], name=op.f('fk_derivedimage_signature_blob_id_blob')),
sa.ForeignKeyConstraint(['source_manifest_id'], ['manifest.id'], name=op.f('fk_derivedimage_source_manifest_id_manifest')),
sa.PrimaryKeyConstraint('id', name=op.f('pk_derivedimage'))
)
op.create_index('derivedimage_blob_id', 'derivedimage', ['blob_id'], unique=False)
op.create_index('derivedimage_media_type_id', 'derivedimage', ['media_type_id'], unique=False)
op.create_index('derivedimage_signature_blob_id', 'derivedimage', ['signature_blob_id'], unique=False)
op.create_index('derivedimage_source_manifest_id', 'derivedimage', ['source_manifest_id'], unique=False)
op.create_index('derivedimage_source_manifest_id_blob_id', 'derivedimage', ['source_manifest_id', 'blob_id'], unique=True)
op.create_index('derivedimage_source_manifest_id_media_type_id_uniqueness_hash', 'derivedimage', ['source_manifest_id', 'media_type_id', 'uniqueness_hash'], unique=True)
op.create_index('derivedimage_uniqueness_hash', 'derivedimage', ['uniqueness_hash'], unique=True)
op.create_index('derivedimage_uuid', 'derivedimage', ['uuid'], unique=True)
op.create_table(
'manifestblob',
sa.Column('id', sa.Integer(), nullable=False),
sa.Column('manifest_id', sa.Integer(), nullable=False),
sa.Column('blob_id', sa.Integer(), nullable=False),
sa.ForeignKeyConstraint(['blob_id'], ['blob.id'], name=op.f('fk_manifestblob_blob_id_blob')),
sa.ForeignKeyConstraint(['manifest_id'], ['manifest.id'], name=op.f('fk_manifestblob_manifest_id_manifest')),
sa.PrimaryKeyConstraint('id', name=op.f('pk_manifestblob'))
)
op.create_index('manifestblob_blob_id', 'manifestblob', ['blob_id'], unique=False)
op.create_index('manifestblob_manifest_id', 'manifestblob', ['manifest_id'], unique=False)
op.create_index('manifestblob_manifest_id_blob_id', 'manifestblob', ['manifest_id', 'blob_id'], unique=True)
op.create_table(
'manifestlabel',
sa.Column('id', sa.Integer(), nullable=False),
sa.Column('repository_id', sa.Integer(), nullable=False),
sa.Column('annotated_id', sa.Integer(), nullable=False),
sa.Column('label_id', sa.Integer(), nullable=False),
sa.ForeignKeyConstraint(['annotated_id'], ['manifest.id'], name=op.f('fk_manifestlabel_annotated_id_manifest')),
sa.ForeignKeyConstraint(['label_id'], ['label.id'], name=op.f('fk_manifestlabel_label_id_label')),
sa.ForeignKeyConstraint(['repository_id'], ['repository.id'], name=op.f('fk_manifestlabel_repository_id_repository')),
sa.PrimaryKeyConstraint('id', name=op.f('pk_manifestlabel'))
)
op.create_index('manifestlabel_annotated_id', 'manifestlabel', ['annotated_id'], unique=False)
op.create_index('manifestlabel_label_id', 'manifestlabel', ['label_id'], unique=False)
op.create_index('manifestlabel_repository_id', 'manifestlabel', ['repository_id'], unique=False)
op.create_index('manifestlabel_repository_id_annotated_id_label_id', 'manifestlabel', ['repository_id', 'annotated_id', 'label_id'], unique=True)
op.create_table(
'manifestlayer',
sa.Column('id', sa.Integer(), nullable=False),
sa.Column('blob_id', sa.Integer(), nullable=False),
sa.Column('manifest_id', sa.Integer(), nullable=False),
sa.Column('manifest_index', sa.BigInteger(), nullable=False),
sa.Column('metadata_json', UTF8LongText, nullable=False),
sa.ForeignKeyConstraint(['blob_id'], ['blob.id'], name=op.f('fk_manifestlayer_blob_id_blob')),
sa.ForeignKeyConstraint(['manifest_id'], ['manifest.id'], name=op.f('fk_manifestlayer_manifest_id_manifest')),
sa.PrimaryKeyConstraint('id', name=op.f('pk_manifestlayer'))
)
op.create_index('manifestlayer_blob_id', 'manifestlayer', ['blob_id'], unique=False)
op.create_index('manifestlayer_manifest_id', 'manifestlayer', ['manifest_id'], unique=False)
op.create_index('manifestlayer_manifest_id_manifest_index', 'manifestlayer', ['manifest_id', 'manifest_index'], unique=True)
op.create_index('manifestlayer_manifest_index', 'manifestlayer', ['manifest_index'], unique=False)
op.create_table(
'manifestlistmanifest',
sa.Column('id', sa.Integer(), nullable=False),
sa.Column('manifest_list_id', sa.Integer(), nullable=False),
sa.Column('manifest_id', sa.Integer(), nullable=False),
sa.Column('operating_system', UTF8CharField(length=255), nullable=True),
sa.Column('architecture', UTF8CharField(length=255), nullable=True),
sa.Column('platform_json', UTF8LongText, nullable=True),
sa.Column('media_type_id', sa.Integer(), nullable=False),
sa.ForeignKeyConstraint(['manifest_id'], ['manifest.id'], name=op.f('fk_manifestlistmanifest_manifest_id_manifest')),
sa.ForeignKeyConstraint(['manifest_list_id'], ['manifestlist.id'], name=op.f('fk_manifestlistmanifest_manifest_list_id_manifestlist')),
sa.ForeignKeyConstraint(['media_type_id'], ['mediatype.id'], name=op.f('fk_manifestlistmanifest_media_type_id_mediatype')),
sa.PrimaryKeyConstraint('id', name=op.f('pk_manifestlistmanifest'))
)
op.create_index('manifestlistmanifest_manifest_id', 'manifestlistmanifest', ['manifest_id'], unique=False)
op.create_index('manifestlistmanifest_manifest_list_id', 'manifestlistmanifest', ['manifest_list_id'], unique=False)
op.create_index('manifestlistmanifest_manifest_listid_os_arch_mtid', 'manifestlistmanifest', ['manifest_list_id', 'operating_system', 'architecture', 'media_type_id'], unique=False)
op.create_index('manifestlistmanifest_manifest_listid_mtid', 'manifestlistmanifest', ['manifest_list_id', 'media_type_id'], unique=False)
op.create_index('manifestlistmanifest_media_type_id', 'manifestlistmanifest', ['media_type_id'], unique=False)
op.create_table(
'tag',
sa.Column('id', sa.Integer(), nullable=False),
sa.Column('name', UTF8CharField(length=190), nullable=False),
sa.Column('repository_id', sa.Integer(), nullable=False),
sa.Column('manifest_list_id', sa.Integer(), nullable=True),
sa.Column('lifetime_start', sa.BigInteger(), nullable=False),
sa.Column('lifetime_end', sa.BigInteger(), nullable=True),
sa.Column('hidden', sa.Boolean(), nullable=False),
sa.Column('reverted', sa.Boolean(), nullable=False),
sa.Column('protected', sa.Boolean(), nullable=False),
sa.Column('tag_kind_id', sa.Integer(), nullable=False),
sa.Column('linked_tag_id', sa.Integer(), nullable=True),
sa.ForeignKeyConstraint(['linked_tag_id'], ['tag.id'], name=op.f('fk_tag_linked_tag_id_tag')),
sa.ForeignKeyConstraint(['manifest_list_id'], ['manifestlist.id'], name=op.f('fk_tag_manifest_list_id_manifestlist')),
sa.ForeignKeyConstraint(['repository_id'], ['repository.id'], name=op.f('fk_tag_repository_id_repository')),
sa.ForeignKeyConstraint(['tag_kind_id'], ['tagkind.id'], name=op.f('fk_tag_tag_kind_id_tagkind')),
sa.PrimaryKeyConstraint('id', name=op.f('pk_tag'))
)
op.create_index('tag_lifetime_end', 'tag', ['lifetime_end'], unique=False)
op.create_index('tag_linked_tag_id', 'tag', ['linked_tag_id'], unique=False)
op.create_index('tag_manifest_list_id', 'tag', ['manifest_list_id'], unique=False)
op.create_index('tag_repository_id', 'tag', ['repository_id'], unique=False)
op.create_index('tag_repository_id_name_hidden', 'tag', ['repository_id', 'name', 'hidden'], unique=False)
op.create_index('tag_repository_id_name_lifetime_end', 'tag', ['repository_id', 'name', 'lifetime_end'], unique=True)
op.create_index('tag_repository_id_name', 'tag', ['repository_id', 'name'], unique=False)
op.create_index('tag_tag_kind_id', 'tag', ['tag_kind_id'], unique=False)
op.create_table(
'manifestlayerdockerv1',
sa.Column('id', sa.Integer(), nullable=False),
sa.Column('manifest_layer_id', sa.Integer(), nullable=False),
sa.Column('image_id', UTF8CharField(length=255), nullable=False),
sa.Column('checksum', UTF8CharField(length=255), nullable=False),
sa.Column('compat_json', UTF8LongText, nullable=False),
sa.ForeignKeyConstraint(['manifest_layer_id'], ['manifestlayer.id'], name=op.f('fk_manifestlayerdockerv1_manifest_layer_id_manifestlayer')),
sa.PrimaryKeyConstraint('id', name=op.f('pk_manifestlayerdockerv1'))
)
op.create_index('manifestlayerdockerv1_image_id', 'manifestlayerdockerv1', ['image_id'], unique=False)
op.create_index('manifestlayerdockerv1_manifest_layer_id', 'manifestlayerdockerv1', ['manifest_layer_id'], unique=False)
op.create_table(
'manifestlayerscan',
sa.Column('id', sa.Integer(), nullable=False),
sa.Column('layer_id', sa.Integer(), nullable=False),
sa.Column('scannable', sa.Boolean(), nullable=False),
sa.Column('scanned_by', UTF8CharField(length=255), nullable=False),
sa.ForeignKeyConstraint(['layer_id'], ['manifestlayer.id'], name=op.f('fk_manifestlayerscan_layer_id_manifestlayer')),
sa.PrimaryKeyConstraint('id', name=op.f('pk_manifestlayerscan'))
)
op.create_index('manifestlayerscan_layer_id', 'manifestlayerscan', ['layer_id'], unique=True)
blobplacementlocation_table = table('blobplacementlocation',
column('id', sa.Integer()),
column('name', sa.String()),
)
op.bulk_insert(
blobplacementlocation_table,
[
{'name': 'local_eu'},
{'name': 'local_us'},
],
)
op.bulk_insert(
tables.mediatype,
[
{'name': 'application/vnd.cnr.blob.v0.tar+gzip'},
{'name': 'application/vnd.cnr.package-manifest.helm.v0.json'},
{'name': 'application/vnd.cnr.package-manifest.kpm.v0.json'},
{'name': 'application/vnd.cnr.package-manifest.docker-compose.v0.json'},
{'name': 'application/vnd.cnr.package.kpm.v0.tar+gzip'},
{'name': 'application/vnd.cnr.package.helm.v0.tar+gzip'},
{'name': 'application/vnd.cnr.package.docker-compose.v0.tar+gzip'},
{'name': 'application/vnd.cnr.manifests.v0.json'},
{'name': 'application/vnd.cnr.manifest.list.v0.json'},
],
)
tagkind_table = table('tagkind',
column('id', sa.Integer()),
column('name', sa.String()),
)
op.bulk_insert(
tagkind_table,
[
{'id': 1, 'name': 'tag'},
{'id': 2, 'name': 'release'},
{'id': 3, 'name': 'channel'},
]
)
def downgrade(tables, tester, progress_reporter):
op = ProgressWrapper(original_op, progress_reporter)
op.drop_table('manifestlayerscan')
op.drop_table('manifestlayerdockerv1')
op.drop_table('tag')
op.drop_table('manifestlistmanifest')
op.drop_table('manifestlayer')
op.drop_table('manifestlabel')
op.drop_table('manifestblob')
op.drop_table('derivedimage')
op.drop_table('blobuploading')
op.drop_table('blobplacement')
op.drop_table('bittorrentpieces')
op.drop_table('manifestlist')
op.drop_table('manifest')
op.drop_table('blobplacementlocationpreference')
op.drop_table('blob')
op.drop_table('tagkind')
op.drop_table('blobplacementlocation')

View file

@ -0,0 +1,35 @@
"""Add disabled datetime to trigger
Revision ID: 87fbbc224f10
Revises: 17aff2e1354e
Create Date: 2017-10-24 14:06:37.658705
"""
# revision identifiers, used by Alembic.
revision = '87fbbc224f10'
down_revision = '17aff2e1354e'
from alembic import op as original_op
from data.migrations.progress import ProgressWrapper
import sqlalchemy as sa
from sqlalchemy.dialects import mysql
def upgrade(tables, tester, progress_reporter):
op = ProgressWrapper(original_op, progress_reporter)
# ### commands auto generated by Alembic - please adjust! ###
op.add_column('repositorybuildtrigger', sa.Column('disabled_datetime', sa.DateTime(), nullable=True))
op.create_index('repositorybuildtrigger_disabled_datetime', 'repositorybuildtrigger', ['disabled_datetime'], unique=False)
# ### end Alembic commands ###
# ### population of test data ### #
tester.populate_column('repositorybuildtrigger', 'disabled_datetime', tester.TestDataType.DateTime)
# ### end population of test data ### #
def downgrade(tables, tester, progress_reporter):
op = ProgressWrapper(original_op, progress_reporter)
# ### commands auto generated by Alembic - please adjust! ###
op.drop_index('repositorybuildtrigger_disabled_datetime', table_name='repositorybuildtrigger')
op.drop_column('repositorybuildtrigger', 'disabled_datetime')
# ### end Alembic commands ###

View file

@ -0,0 +1,180 @@
"""Add V2_2 data models for Manifest, ManifestBlob and ManifestLegacyImage
Revision ID: 9093adccc784
Revises: 6c21e2cfb8b6
Create Date: 2018-08-06 16:07:50.222749
"""
# revision identifiers, used by Alembic.
revision = '9093adccc784'
down_revision = '6c21e2cfb8b6'
from alembic import op as original_op
from data.migrations.progress import ProgressWrapper
import sqlalchemy as sa
from image.docker.schema1 import DOCKER_SCHEMA1_CONTENT_TYPES
def upgrade(tables, tester, progress_reporter):
op = ProgressWrapper(original_op, progress_reporter)
# ### commands auto generated by Alembic - please adjust! ###
op.create_table('manifest',
sa.Column('id', sa.Integer(), nullable=False),
sa.Column('repository_id', sa.Integer(), nullable=False),
sa.Column('digest', sa.String(length=255), nullable=False),
sa.Column('media_type_id', sa.Integer(), nullable=False),
sa.Column('manifest_bytes', sa.Text(), nullable=False),
sa.ForeignKeyConstraint(['media_type_id'], ['mediatype.id'], name=op.f('fk_manifest_media_type_id_mediatype')),
sa.ForeignKeyConstraint(['repository_id'], ['repository.id'], name=op.f('fk_manifest_repository_id_repository')),
sa.PrimaryKeyConstraint('id', name=op.f('pk_manifest'))
)
op.create_index('manifest_digest', 'manifest', ['digest'], unique=False)
op.create_index('manifest_media_type_id', 'manifest', ['media_type_id'], unique=False)
op.create_index('manifest_repository_id', 'manifest', ['repository_id'], unique=False)
op.create_index('manifest_repository_id_digest', 'manifest', ['repository_id', 'digest'], unique=True)
op.create_index('manifest_repository_id_media_type_id', 'manifest', ['repository_id', 'media_type_id'], unique=False)
op.create_table('manifestblob',
sa.Column('id', sa.Integer(), nullable=False),
sa.Column('repository_id', sa.Integer(), nullable=False),
sa.Column('manifest_id', sa.Integer(), nullable=False),
sa.Column('blob_id', sa.Integer(), nullable=False),
sa.Column('blob_index', sa.Integer(), nullable=False),
sa.ForeignKeyConstraint(['blob_id'], ['imagestorage.id'], name=op.f('fk_manifestblob_blob_id_imagestorage')),
sa.ForeignKeyConstraint(['manifest_id'], ['manifest.id'], name=op.f('fk_manifestblob_manifest_id_manifest')),
sa.ForeignKeyConstraint(['repository_id'], ['repository.id'], name=op.f('fk_manifestblob_repository_id_repository')),
sa.PrimaryKeyConstraint('id', name=op.f('pk_manifestblob'))
)
op.create_index('manifestblob_blob_id', 'manifestblob', ['blob_id'], unique=False)
op.create_index('manifestblob_manifest_id', 'manifestblob', ['manifest_id'], unique=False)
op.create_index('manifestblob_manifest_id_blob_id', 'manifestblob', ['manifest_id', 'blob_id'], unique=True)
op.create_index('manifestblob_manifest_id_blob_index', 'manifestblob', ['manifest_id', 'blob_index'], unique=True)
op.create_index('manifestblob_repository_id', 'manifestblob', ['repository_id'], unique=False)
op.create_table('manifestlabel',
sa.Column('id', sa.Integer(), nullable=False),
sa.Column('repository_id', sa.Integer(), nullable=False),
sa.Column('manifest_id', sa.Integer(), nullable=False),
sa.Column('label_id', sa.Integer(), nullable=False),
sa.ForeignKeyConstraint(['label_id'], ['label.id'], name=op.f('fk_manifestlabel_label_id_label')),
sa.ForeignKeyConstraint(['manifest_id'], ['manifest.id'], name=op.f('fk_manifestlabel_manifest_id_manifest')),
sa.ForeignKeyConstraint(['repository_id'], ['repository.id'], name=op.f('fk_manifestlabel_repository_id_repository')),
sa.PrimaryKeyConstraint('id', name=op.f('pk_manifestlabel'))
)
op.create_index('manifestlabel_label_id', 'manifestlabel', ['label_id'], unique=False)
op.create_index('manifestlabel_manifest_id', 'manifestlabel', ['manifest_id'], unique=False)
op.create_index('manifestlabel_manifest_id_label_id', 'manifestlabel', ['manifest_id', 'label_id'], unique=True)
op.create_index('manifestlabel_repository_id', 'manifestlabel', ['repository_id'], unique=False)
op.create_table('manifestlegacyimage',
sa.Column('id', sa.Integer(), nullable=False),
sa.Column('repository_id', sa.Integer(), nullable=False),
sa.Column('manifest_id', sa.Integer(), nullable=False),
sa.Column('image_id', sa.Integer(), nullable=False),
sa.ForeignKeyConstraint(['image_id'], ['image.id'], name=op.f('fk_manifestlegacyimage_image_id_image')),
sa.ForeignKeyConstraint(['manifest_id'], ['manifest.id'], name=op.f('fk_manifestlegacyimage_manifest_id_manifest')),
sa.ForeignKeyConstraint(['repository_id'], ['repository.id'], name=op.f('fk_manifestlegacyimage_repository_id_repository')),
sa.PrimaryKeyConstraint('id', name=op.f('pk_manifestlegacyimage'))
)
op.create_index('manifestlegacyimage_image_id', 'manifestlegacyimage', ['image_id'], unique=False)
op.create_index('manifestlegacyimage_manifest_id', 'manifestlegacyimage', ['manifest_id'], unique=True)
op.create_index('manifestlegacyimage_repository_id', 'manifestlegacyimage', ['repository_id'], unique=False)
op.create_table('tagmanifesttomanifest',
sa.Column('id', sa.Integer(), nullable=False),
sa.Column('tag_manifest_id', sa.Integer(), nullable=False),
sa.Column('manifest_id', sa.Integer(), nullable=False),
sa.Column('broken', sa.Boolean(), nullable=False, server_default=sa.sql.expression.false()),
sa.ForeignKeyConstraint(['manifest_id'], ['manifest.id'], name=op.f('fk_tagmanifesttomanifest_manifest_id_manifest')),
sa.ForeignKeyConstraint(['tag_manifest_id'], ['tagmanifest.id'], name=op.f('fk_tagmanifesttomanifest_tag_manifest_id_tagmanifest')),
sa.PrimaryKeyConstraint('id', name=op.f('pk_tagmanifesttomanifest'))
)
op.create_index('tagmanifesttomanifest_broken', 'tagmanifesttomanifest', ['broken'], unique=False)
op.create_index('tagmanifesttomanifest_manifest_id', 'tagmanifesttomanifest', ['manifest_id'], unique=True)
op.create_index('tagmanifesttomanifest_tag_manifest_id', 'tagmanifesttomanifest', ['tag_manifest_id'], unique=True)
op.create_table('tagmanifestlabelmap',
sa.Column('id', sa.Integer(), nullable=False),
sa.Column('tag_manifest_id', sa.Integer(), nullable=False),
sa.Column('manifest_id', sa.Integer(), nullable=True),
sa.Column('label_id', sa.Integer(), nullable=False),
sa.Column('tag_manifest_label_id', sa.Integer(), nullable=False),
sa.Column('manifest_label_id', sa.Integer(), nullable=True),
sa.Column('broken_manifest', sa.Boolean(), nullable=False, server_default=sa.sql.expression.false()),
sa.ForeignKeyConstraint(['label_id'], ['label.id'], name=op.f('fk_tagmanifestlabelmap_label_id_label')),
sa.ForeignKeyConstraint(['manifest_id'], ['manifest.id'], name=op.f('fk_tagmanifestlabelmap_manifest_id_manifest')),
sa.ForeignKeyConstraint(['manifest_label_id'], ['manifestlabel.id'], name=op.f('fk_tagmanifestlabelmap_manifest_label_id_manifestlabel')),
sa.ForeignKeyConstraint(['tag_manifest_id'], ['tagmanifest.id'], name=op.f('fk_tagmanifestlabelmap_tag_manifest_id_tagmanifest')),
sa.ForeignKeyConstraint(['tag_manifest_label_id'], ['tagmanifestlabel.id'], name=op.f('fk_tagmanifestlabelmap_tag_manifest_label_id_tagmanifestlabel')),
sa.PrimaryKeyConstraint('id', name=op.f('pk_tagmanifestlabelmap'))
)
op.create_index('tagmanifestlabelmap_broken_manifest', 'tagmanifestlabelmap', ['broken_manifest'], unique=False)
op.create_index('tagmanifestlabelmap_label_id', 'tagmanifestlabelmap', ['label_id'], unique=False)
op.create_index('tagmanifestlabelmap_manifest_id', 'tagmanifestlabelmap', ['manifest_id'], unique=False)
op.create_index('tagmanifestlabelmap_manifest_label_id', 'tagmanifestlabelmap', ['manifest_label_id'], unique=False)
op.create_index('tagmanifestlabelmap_tag_manifest_id', 'tagmanifestlabelmap', ['tag_manifest_id'], unique=False)
op.create_index('tagmanifestlabelmap_tag_manifest_label_id', 'tagmanifestlabelmap', ['tag_manifest_label_id'], unique=False)
# ### end Alembic commands ###
for media_type in DOCKER_SCHEMA1_CONTENT_TYPES:
op.bulk_insert(tables.mediatype,
[
{'name': media_type},
])
# ### population of test data ### #
tester.populate_table('manifest', [
('digest', tester.TestDataType.String),
('manifest_bytes', tester.TestDataType.JSON),
('media_type_id', tester.TestDataType.Foreign('mediatype')),
('repository_id', tester.TestDataType.Foreign('repository')),
])
tester.populate_table('manifestblob', [
('manifest_id', tester.TestDataType.Foreign('manifest')),
('repository_id', tester.TestDataType.Foreign('repository')),
('blob_id', tester.TestDataType.Foreign('imagestorage')),
('blob_index', tester.TestDataType.Integer),
])
tester.populate_table('manifestlabel', [
('manifest_id', tester.TestDataType.Foreign('manifest')),
('label_id', tester.TestDataType.Foreign('label')),
('repository_id', tester.TestDataType.Foreign('repository')),
])
tester.populate_table('manifestlegacyimage', [
('manifest_id', tester.TestDataType.Foreign('manifest')),
('image_id', tester.TestDataType.Foreign('image')),
('repository_id', tester.TestDataType.Foreign('repository')),
])
tester.populate_table('tagmanifesttomanifest', [
('manifest_id', tester.TestDataType.Foreign('manifest')),
('tag_manifest_id', tester.TestDataType.Foreign('tagmanifest')),
])
tester.populate_table('tagmanifestlabelmap', [
('manifest_id', tester.TestDataType.Foreign('manifest')),
('tag_manifest_id', tester.TestDataType.Foreign('tagmanifest')),
('tag_manifest_label_id', tester.TestDataType.Foreign('tagmanifestlabel')),
('manifest_label_id', tester.TestDataType.Foreign('manifestlabel')),
('label_id', tester.TestDataType.Foreign('label')),
])
# ### end population of test data ### #
def downgrade(tables, tester, progress_reporter):
op = ProgressWrapper(original_op, progress_reporter)
for media_type in DOCKER_SCHEMA1_CONTENT_TYPES:
op.execute(tables
.mediatype
.delete()
.where(tables.
mediatype.c.name == op.inline_literal(media_type)))
# ### commands auto generated by Alembic - please adjust! ###
op.drop_table('tagmanifestlabelmap')
op.drop_table('tagmanifesttomanifest')
op.drop_table('manifestlegacyimage')
op.drop_table('manifestlabel')
op.drop_table('manifestblob')
op.drop_table('manifest')
# ### end Alembic commands ###

View file

@ -0,0 +1,31 @@
"""Create new notification type
Revision ID: 94836b099894
Revises: faf752bd2e0a
Create Date: 2016-11-30 10:29:51.519278
"""
# revision identifiers, used by Alembic.
revision = '94836b099894'
down_revision = 'faf752bd2e0a'
from alembic import op as original_op
from data.migrations.progress import ProgressWrapper
def upgrade(tables, tester, progress_reporter):
op = ProgressWrapper(original_op, progress_reporter)
op.bulk_insert(tables.externalnotificationevent,
[
{'name': 'build_cancelled'},
])
def downgrade(tables, tester, progress_reporter):
op = ProgressWrapper(original_op, progress_reporter)
op.execute(tables
.externalnotificationevent
.delete()
.where(tables.
externalnotificationevent.c.name == op.inline_literal('build_cancelled')))

View file

@ -0,0 +1,101 @@
"""back fill build expand_config
Revision ID: a6c463dfb9fe
Revises: b4df55dea4b3
Create Date: 2017-03-17 10:00:19.739858
"""
# revision identifiers, used by Alembic.
import json
import os
from app import app
from peewee import *
from data.database import BaseModel
revision = 'a6c463dfb9fe'
down_revision = 'b4df55dea4b3'
from alembic import op as original_op
from data.migrations.progress import ProgressWrapper
class RepositoryBuildTrigger(BaseModel):
config = TextField(default='{}')
def upgrade(tables, tester, progress_reporter):
op = ProgressWrapper(original_op, progress_reporter)
if not app.config.get('SETUP_COMPLETE', False):
return
repostioryBuildTriggers = RepositoryBuildTrigger.select()
for repositoryBuildTrigger in repostioryBuildTriggers:
config = json.loads(repositoryBuildTrigger.config)
repositoryBuildTrigger.config = json.dumps(get_config_expand(config))
repositoryBuildTrigger.save()
def downgrade(tables, tester, progress_reporter):
op = ProgressWrapper(original_op, progress_reporter)
if not app.config.get('SETUP_COMPLETE', False):
return
repostioryBuildTriggers = RepositoryBuildTrigger.select()
for repositoryBuildTrigger in repostioryBuildTriggers:
config = json.loads(repositoryBuildTrigger.config)
repositoryBuildTrigger.config = json.dumps(get_config_expand(config))
repositoryBuildTrigger.save()
def create_context(current_subdir):
if current_subdir == "":
current_subdir = os.path.sep + current_subdir
if current_subdir[len(current_subdir) - 1] != os.path.sep:
current_subdir += os.path.sep
context, _ = os.path.split(current_subdir)
return context
def create_dockerfile_path(current_subdir):
if current_subdir == "":
current_subdir = os.path.sep + current_subdir
if current_subdir[len(current_subdir) - 1] != os.path.sep:
current_subdir += os.path.sep
return current_subdir + "Dockerfile"
def get_config_expand(config):
""" A function to transform old records into new records """
if not config:
return config
# skip records that have been updated
if "context" in config or "dockerfile_path" in config:
return config
config_expand = {}
if "subdir" in config:
config_expand = dict(config)
config_expand["context"] = create_context(config["subdir"])
config_expand["dockerfile_path"] = create_dockerfile_path(config["subdir"])
return config_expand
def get_config_contract(config):
""" A function to delete context and dockerfile_path from config """
if not config:
return config
if "context" in config:
del config["context"]
if "dockerfile_path" in config:
del config["dockerfile_path"]
return config

View file

@ -0,0 +1,53 @@
"""Add deleted namespace table
Revision ID: b4c2d45bc132
Revises: 152edccba18c
Create Date: 2018-02-27 11:43:02.329941
"""
# revision identifiers, used by Alembic.
revision = 'b4c2d45bc132'
down_revision = '152edccba18c'
from alembic import op as original_op
from data.migrations.progress import ProgressWrapper
import sqlalchemy as sa
def upgrade(tables, tester, progress_reporter):
op = ProgressWrapper(original_op, progress_reporter)
# ### commands auto generated by Alembic - please adjust! ###
op.create_table('deletednamespace',
sa.Column('id', sa.Integer(), nullable=False),
sa.Column('namespace_id', sa.Integer(), nullable=False),
sa.Column('marked', sa.DateTime(), nullable=False),
sa.Column('original_username', sa.String(length=255), nullable=False),
sa.Column('original_email', sa.String(length=255), nullable=False),
sa.Column('queue_id', sa.String(length=255), nullable=True),
sa.ForeignKeyConstraint(['namespace_id'], ['user.id'], name=op.f('fk_deletednamespace_namespace_id_user')),
sa.PrimaryKeyConstraint('id', name=op.f('pk_deletednamespace'))
)
op.create_index('deletednamespace_namespace_id', 'deletednamespace', ['namespace_id'], unique=True)
op.create_index('deletednamespace_original_email', 'deletednamespace', ['original_email'], unique=False)
op.create_index('deletednamespace_original_username', 'deletednamespace', ['original_username'], unique=False)
op.create_index('deletednamespace_queue_id', 'deletednamespace', ['queue_id'], unique=False)
# ### end Alembic commands ###
# ### population of test data ### #
tester.populate_table('deletednamespace', [
('namespace_id', tester.TestDataType.Foreign('user')),
('marked', tester.TestDataType.DateTime),
('original_username', tester.TestDataType.UTF8Char),
('original_email', tester.TestDataType.String),
('queue_id', tester.TestDataType.Foreign('queueitem')),
])
# ### end population of test data ### #
def downgrade(tables, tester, progress_reporter):
op = ProgressWrapper(original_op, progress_reporter)
# ### commands auto generated by Alembic - please adjust! ###
op.drop_table('deletednamespace')
# ### end Alembic commands ###

View file

@ -0,0 +1,51 @@
"""add repository kind
Revision ID: b4df55dea4b3
Revises: 7a525c68eb13
Create Date: 2017-03-19 12:59:41.484430
"""
# revision identifiers, used by Alembic.
revision = 'b4df55dea4b3'
down_revision = 'b8ae68ad3e52'
from alembic import op as original_op
from data.migrations.progress import ProgressWrapper
import sqlalchemy as sa
from sqlalchemy.dialects import mysql
def upgrade(tables, tester, progress_reporter):
op = ProgressWrapper(original_op, progress_reporter)
op.create_table(
'repositorykind',
sa.Column('id', sa.Integer(), nullable=False),
sa.Column('name', sa.String(length=255), nullable=False),
sa.PrimaryKeyConstraint('id', name=op.f('pk_repositorykind'))
)
op.create_index('repositorykind_name', 'repositorykind', ['name'], unique=True)
op.bulk_insert(
tables.repositorykind,
[
{'id': 1, 'name': 'image'},
{'id': 2, 'name': 'application'},
],
)
op.add_column(u'repository', sa.Column('kind_id', sa.Integer(), nullable=False, server_default='1'))
op.create_index('repository_kind_id', 'repository', ['kind_id'], unique=False)
op.create_foreign_key(op.f('fk_repository_kind_id_repositorykind'), 'repository', 'repositorykind', ['kind_id'], ['id'])
# ### population of test data ### #
tester.populate_column('repository', 'kind_id', tester.TestDataType.Foreign('repositorykind'))
# ### end population of test data ### #
def downgrade(tables, tester, progress_reporter):
op = ProgressWrapper(original_op, progress_reporter)
op.drop_constraint(op.f('fk_repository_kind_id_repositorykind'), 'repository', type_='foreignkey')
op.drop_index('repository_kind_id', table_name='repository')
op.drop_column(u'repository', 'kind_id')
op.drop_table('repositorykind')

View file

@ -0,0 +1,46 @@
"""Add RobotAccountMetadata table
Revision ID: b547bc139ad8
Revises: 0cf50323c78b
Create Date: 2018-03-09 15:50:48.298880
"""
# revision identifiers, used by Alembic.
revision = 'b547bc139ad8'
down_revision = '0cf50323c78b'
from alembic import op as original_op
from data.migrations.progress import ProgressWrapper
import sqlalchemy as sa
from util.migrate import UTF8CharField
def upgrade(tables, tester, progress_reporter):
op = ProgressWrapper(original_op, progress_reporter)
# ### commands auto generated by Alembic - please adjust! ###
op.create_table('robotaccountmetadata',
sa.Column('id', sa.Integer(), nullable=False),
sa.Column('robot_account_id', sa.Integer(), nullable=False),
sa.Column('description', UTF8CharField(length=255), nullable=False),
sa.Column('unstructured_json', sa.Text(), nullable=False),
sa.ForeignKeyConstraint(['robot_account_id'], ['user.id'], name=op.f('fk_robotaccountmetadata_robot_account_id_user')),
sa.PrimaryKeyConstraint('id', name=op.f('pk_robotaccountmetadata'))
)
op.create_index('robotaccountmetadata_robot_account_id', 'robotaccountmetadata', ['robot_account_id'], unique=True)
# ### end Alembic commands ###
# ### population of test data ### #
tester.populate_table('robotaccountmetadata', [
('robot_account_id', tester.TestDataType.Foreign('user')),
('description', tester.TestDataType.UTF8Char),
('unstructured_json', tester.TestDataType.JSON),
])
# ### end population of test data ### #
def downgrade(tables, tester, progress_reporter):
op = ProgressWrapper(original_op, progress_reporter)
# ### commands auto generated by Alembic - please adjust! ###
op.drop_table('robotaccountmetadata')
# ### end Alembic commands ###

View file

@ -0,0 +1,37 @@
"""Change BlobUpload fields to BigIntegers to allow layers > 8GB
Revision ID: b8ae68ad3e52
Revises: 7a525c68eb13
Create Date: 2017-02-27 11:26:49.182349
"""
# revision identifiers, used by Alembic.
revision = 'b8ae68ad3e52'
down_revision = '7a525c68eb13'
from alembic import op as original_op
from data.migrations.progress import ProgressWrapper
import sqlalchemy as sa
from sqlalchemy.dialects import mysql
def upgrade(tables, tester, progress_reporter):
op = ProgressWrapper(original_op, progress_reporter)
op.alter_column('blobupload', 'byte_count', existing_type=sa.Integer(), type_=sa.BigInteger())
op.alter_column('blobupload', 'uncompressed_byte_count', existing_type=sa.Integer(), type_=sa.BigInteger())
# ### population of test data ### #
tester.populate_column('blobupload', 'byte_count', tester.TestDataType.BigInteger)
tester.populate_column('blobupload', 'uncompressed_byte_count', tester.TestDataType.BigInteger)
# ### end population of test data ### #
def downgrade(tables, tester, progress_reporter):
op = ProgressWrapper(original_op, progress_reporter)
# ### population of test data ### #
tester.populate_column('blobupload', 'byte_count', tester.TestDataType.Integer)
tester.populate_column('blobupload', 'uncompressed_byte_count', tester.TestDataType.Integer)
# ### end population of test data ### #
op.alter_column('blobupload', 'byte_count', existing_type=sa.BigInteger(), type_=sa.Integer())
op.alter_column('blobupload', 'uncompressed_byte_count', existing_type=sa.BigInteger(), type_=sa.Integer())

View file

@ -0,0 +1,35 @@
"""Add lifetime end indexes to tag tables
Revision ID: b9045731c4de
Revises: e184af42242d
Create Date: 2019-02-14 17:18:40.474310
"""
# revision identifiers, used by Alembic.
revision = 'b9045731c4de'
down_revision = 'e184af42242d'
from alembic import op as original_op
from data.migrations.progress import ProgressWrapper
def upgrade(tables, tester, progress_reporter):
op = ProgressWrapper(original_op, progress_reporter)
# ### commands auto generated by Alembic - please adjust! ###
op.create_index('repositorytag_repository_id_lifetime_end_ts', 'repositorytag', ['repository_id', 'lifetime_end_ts'], unique=False)
op.create_index('tag_repository_id_lifetime_end_ms', 'tag', ['repository_id', 'lifetime_end_ms'], unique=False)
op.create_index('repositorytag_repository_id_lifetime_start_ts', 'repositorytag', ['repository_id', 'lifetime_start_ts'], unique=False)
op.create_index('tag_repository_id_lifetime_start_ms', 'tag', ['repository_id', 'lifetime_start_ms'], unique=False)
# ### end Alembic commands ###
def downgrade(tables, tester, progress_reporter):
op = ProgressWrapper(original_op, progress_reporter)
# ### commands auto generated by Alembic - please adjust! ###
op.drop_index('tag_repository_id_lifetime_end_ms', table_name='tag')
op.drop_index('repositorytag_repository_id_lifetime_end_ts', table_name='repositorytag')
op.drop_index('tag_repository_id_lifetime_start_ms', table_name='tag')
op.drop_index('repositorytag_repository_id_lifetime_start_ts', table_name='repositorytag')
# ### end Alembic commands ###

View file

@ -0,0 +1,71 @@
"""Run full tag backfill
Revision ID: b918abdbee43
Revises: 481623ba00ba
Create Date: 2019-03-14 13:38:03.411609
"""
# revision identifiers, used by Alembic.
revision = 'b918abdbee43'
down_revision = '481623ba00ba'
import logging.config
from app import app
from peewee import JOIN, fn
from workers.tagbackfillworker import backfill_tag
from data.database import RepositoryTag, Repository, User, TagToRepositoryTag
from util.log import logfile_path
logger = logging.getLogger(__name__)
def upgrade(tables, tester, progress_reporter):
if not app.config.get('SETUP_COMPLETE', False):
return
start_id = 0
end_id = 1000
size = 1000
max_id = RepositoryTag.select(fn.Max(RepositoryTag.id)).scalar()
if max_id is None:
return
logger.info("Found maximum ID %s" % max_id)
while True:
if start_id > max_id:
break
logger.info('Checking tag range %s - %s', start_id, end_id)
r = list(RepositoryTag
.select()
.join(Repository)
.switch(RepositoryTag)
.join(TagToRepositoryTag, JOIN.LEFT_OUTER)
.where(TagToRepositoryTag.id >> None)
.where(RepositoryTag.hidden == False,
RepositoryTag.id >= start_id,
RepositoryTag.id < end_id))
if len(r) < 1000 and size < 100000:
size *= 2
start_id = end_id
end_id = start_id + size
if not len(r):
continue
logger.info('Found %s tags to backfill', len(r))
for index, t in enumerate(r):
logger.info("Backfilling tag %s of %s", index, len(r))
backfill_tag(t)
def downgrade(tables, tester, progress_reporter):
# Nothing to do.
pass

View file

@ -0,0 +1,52 @@
"""Add TeamSync table
Revision ID: be8d1c402ce0
Revises: a6c463dfb9fe
Create Date: 2017-02-23 13:34:52.356812
"""
# revision identifiers, used by Alembic.
revision = 'be8d1c402ce0'
down_revision = 'a6c463dfb9fe'
from alembic import op as original_op
from data.migrations.progress import ProgressWrapper
import sqlalchemy as sa
from util.migrate import UTF8LongText
def upgrade(tables, tester, progress_reporter):
op = ProgressWrapper(original_op, progress_reporter)
### commands auto generated by Alembic - please adjust! ###
op.create_table('teamsync',
sa.Column('id', sa.Integer(), nullable=False),
sa.Column('team_id', sa.Integer(), nullable=False),
sa.Column('transaction_id', sa.String(length=255), nullable=False),
sa.Column('last_updated', sa.DateTime(), nullable=True),
sa.Column('service_id', sa.Integer(), nullable=False),
sa.Column('config', UTF8LongText(), nullable=False),
sa.ForeignKeyConstraint(['service_id'], ['loginservice.id'], name=op.f('fk_teamsync_service_id_loginservice')),
sa.ForeignKeyConstraint(['team_id'], ['team.id'], name=op.f('fk_teamsync_team_id_team')),
sa.PrimaryKeyConstraint('id', name=op.f('pk_teamsync'))
)
op.create_index('teamsync_last_updated', 'teamsync', ['last_updated'], unique=False)
op.create_index('teamsync_service_id', 'teamsync', ['service_id'], unique=False)
op.create_index('teamsync_team_id', 'teamsync', ['team_id'], unique=True)
### end Alembic commands ###
# ### population of test data ### #
tester.populate_table('teamsync', [
('team_id', tester.TestDataType.Foreign('team')),
('transaction_id', tester.TestDataType.String),
('last_updated', tester.TestDataType.DateTime),
('service_id', tester.TestDataType.Foreign('loginservice')),
('config', tester.TestDataType.JSON),
])
# ### end population of test data ### #
def downgrade(tables, tester, progress_reporter):
op = ProgressWrapper(original_op, progress_reporter)
### commands auto generated by Alembic - please adjust! ###
op.drop_table('teamsync')
### end Alembic commands ###

View file

@ -0,0 +1,34 @@
from image.docker.schema2 import DOCKER_SCHEMA2_CONTENT_TYPES
"""Add schema2 media types
Revision ID: c00a1f15968b
Revises: 67f0abd172ae
Create Date: 2018-11-13 09:20:21.968503
"""
# revision identifiers, used by Alembic.
revision = 'c00a1f15968b'
down_revision = '67f0abd172ae'
from alembic import op as original_op
from data.migrations.progress import ProgressWrapper
def upgrade(tables, tester, progress_reporter):
op = ProgressWrapper(original_op, progress_reporter)
for media_type in DOCKER_SCHEMA2_CONTENT_TYPES:
op.bulk_insert(tables.mediatype,
[
{'name': media_type},
])
def downgrade(tables, tester, progress_reporter):
op = ProgressWrapper(original_op, progress_reporter)
for media_type in DOCKER_SCHEMA2_CONTENT_TYPES:
op.execute(tables
.mediatype
.delete()
.where(tables.
mediatype.c.name == op.inline_literal(media_type)))

View file

@ -0,0 +1,82 @@
"""Remove unencrypted fields and data
Revision ID: c059b952ed76
Revises: 703298a825c2
Create Date: 2019-08-19 16:31:00.952773
"""
# revision identifiers, used by Alembic.
revision = 'c059b952ed76'
down_revision = '703298a825c2'
import uuid
from alembic import op as original_op
from data.migrations.progress import ProgressWrapper
import sqlalchemy as sa
from data.database import FederatedLogin, User, RobotAccountToken
def upgrade(tables, tester, progress_reporter):
op = ProgressWrapper(original_op, progress_reporter)
# ### commands auto generated by Alembic - please adjust! ###
op.drop_index('oauthaccesstoken_refresh_token', table_name='oauthaccesstoken')
op.drop_column(u'oauthaccesstoken', 'refresh_token')
op.drop_column('accesstoken', 'code')
op.drop_column('appspecificauthtoken', 'token_code')
op.drop_column('oauthaccesstoken', 'access_token')
op.drop_column('oauthapplication', 'client_secret')
op.drop_column('oauthauthorizationcode', 'code')
op.drop_column('repositorybuildtrigger', 'private_key')
op.drop_column('repositorybuildtrigger', 'auth_token')
# ### end Alembic commands ###
# Overwrite all plaintext robot credentials.
while True:
try:
robot_account_token = RobotAccountToken.get(fully_migrated=False)
robot_account = robot_account_token.robot_account
robot_account.email = str(uuid.uuid4())
robot_account.save()
federated_login = FederatedLogin.get(user=robot_account)
federated_login.service_ident = 'robot:%s' % robot_account.id
federated_login.save()
robot_account_token.fully_migrated = True
robot_account_token.save()
except RobotAccountToken.DoesNotExist:
break
def downgrade(tables, tester, progress_reporter):
op = ProgressWrapper(original_op, progress_reporter)
# ### commands auto generated by Alembic - please adjust! ###
op.add_column(u'oauthaccesstoken', sa.Column('refresh_token', sa.String(length=255), nullable=True))
op.create_index('oauthaccesstoken_refresh_token', 'oauthaccesstoken', ['refresh_token'], unique=False)
op.add_column('repositorybuildtrigger', sa.Column('auth_token', sa.String(length=255), nullable=True))
op.add_column('repositorybuildtrigger', sa.Column('private_key', sa.Text(), nullable=True))
op.add_column('oauthauthorizationcode', sa.Column('code', sa.String(length=255), nullable=True))
op.create_index('oauthauthorizationcode_code', 'oauthauthorizationcode', ['code'], unique=True)
op.add_column('oauthapplication', sa.Column('client_secret', sa.String(length=255), nullable=True))
op.add_column('oauthaccesstoken', sa.Column('access_token', sa.String(length=255), nullable=True))
op.create_index('oauthaccesstoken_access_token', 'oauthaccesstoken', ['access_token'], unique=False)
op.add_column('appspecificauthtoken', sa.Column('token_code', sa.String(length=255), nullable=True))
op.create_index('appspecificauthtoken_token_code', 'appspecificauthtoken', ['token_code'], unique=True)
op.add_column('accesstoken', sa.Column('code', sa.String(length=255), nullable=True))
op.create_index('accesstoken_code', 'accesstoken', ['code'], unique=True)
# ### end Alembic commands ###

View file

@ -0,0 +1,104 @@
"""Add new fields and tables for encrypted tokens
Revision ID: c13c8052f7a6
Revises: 5248ddf35167
Create Date: 2019-08-19 15:59:36.269155
"""
# revision identifiers, used by Alembic.
revision = 'c13c8052f7a6'
down_revision = '5248ddf35167'
from alembic import op as original_op
from data.migrations.progress import ProgressWrapper
import sqlalchemy as sa
def upgrade(tables, tester, progress_reporter):
op = ProgressWrapper(original_op, progress_reporter)
# ### commands auto generated by Alembic - please adjust! ###
op.create_table('robotaccounttoken',
sa.Column('id', sa.Integer(), nullable=False),
sa.Column('robot_account_id', sa.Integer(), nullable=False),
sa.Column('token', sa.String(length=255), nullable=False),
sa.Column('fully_migrated', sa.Boolean(), nullable=False, server_default='0'),
sa.ForeignKeyConstraint(['robot_account_id'], ['user.id'], name=op.f('fk_robotaccounttoken_robot_account_id_user')),
sa.PrimaryKeyConstraint('id', name=op.f('pk_robotaccounttoken'))
)
op.create_index('robotaccounttoken_robot_account_id', 'robotaccounttoken', ['robot_account_id'], unique=True)
op.add_column(u'accesstoken', sa.Column('token_code', sa.String(length=255), nullable=True))
op.add_column(u'accesstoken', sa.Column('token_name', sa.String(length=255), nullable=True))
op.create_index('accesstoken_token_name', 'accesstoken', ['token_name'], unique=True)
op.add_column(u'appspecificauthtoken', sa.Column('token_name', sa.String(length=255), nullable=True))
op.add_column(u'appspecificauthtoken', sa.Column('token_secret', sa.String(length=255), nullable=True))
op.create_index('appspecificauthtoken_token_name', 'appspecificauthtoken', ['token_name'], unique=True)
op.add_column(u'emailconfirmation', sa.Column('verification_code', sa.String(length=255), nullable=True))
op.add_column(u'oauthaccesstoken', sa.Column('token_code', sa.String(length=255), nullable=True))
op.add_column(u'oauthaccesstoken', sa.Column('token_name', sa.String(length=255), nullable=True))
op.create_index('oauthaccesstoken_token_name', 'oauthaccesstoken', ['token_name'], unique=True)
op.add_column(u'oauthapplication', sa.Column('secure_client_secret', sa.String(length=255), nullable=True))
op.add_column(u'oauthapplication', sa.Column('fully_migrated', sa.Boolean(), server_default='0', nullable=False))
op.add_column(u'oauthauthorizationcode', sa.Column('code_credential', sa.String(length=255), nullable=True))
op.add_column(u'oauthauthorizationcode', sa.Column('code_name', sa.String(length=255), nullable=True))
op.create_index('oauthauthorizationcode_code_name', 'oauthauthorizationcode', ['code_name'], unique=True)
op.drop_index('oauthauthorizationcode_code', table_name='oauthauthorizationcode')
op.create_index('oauthauthorizationcode_code', 'oauthauthorizationcode', ['code'], unique=True)
op.add_column(u'repositorybuildtrigger', sa.Column('secure_auth_token', sa.String(length=255), nullable=True))
op.add_column(u'repositorybuildtrigger', sa.Column('secure_private_key', sa.Text(), nullable=True))
op.add_column(u'repositorybuildtrigger', sa.Column('fully_migrated', sa.Boolean(), server_default='0', nullable=False))
# ### end Alembic commands ###
# ### population of test data ### #
tester.populate_table('robotaccounttoken', [
('robot_account_id', tester.TestDataType.Foreign('user')),
('token', tester.TestDataType.Token),
('fully_migrated', tester.TestDataType.Boolean),
])
tester.populate_column('accesstoken', 'code', tester.TestDataType.Token)
tester.populate_column('appspecificauthtoken', 'token_code', tester.TestDataType.Token)
tester.populate_column('emailconfirmation', 'verification_code', tester.TestDataType.Token)
tester.populate_column('oauthaccesstoken', 'token_code', tester.TestDataType.Token)
# ### end population of test data ### #
def downgrade(tables, tester, progress_reporter):
op = ProgressWrapper(original_op, progress_reporter)
# ### commands auto generated by Alembic - please adjust! ###
op.drop_column(u'repositorybuildtrigger', 'secure_private_key')
op.drop_column(u'repositorybuildtrigger', 'secure_auth_token')
op.drop_index('oauthauthorizationcode_code', table_name='oauthauthorizationcode')
op.create_index('oauthauthorizationcode_code', 'oauthauthorizationcode', ['code'], unique=False)
op.drop_index('oauthauthorizationcode_code_name', table_name='oauthauthorizationcode')
op.drop_column(u'oauthauthorizationcode', 'code_name')
op.drop_column(u'oauthauthorizationcode', 'code_credential')
op.drop_column(u'oauthapplication', 'secure_client_secret')
op.drop_index('oauthaccesstoken_token_name', table_name='oauthaccesstoken')
op.drop_column(u'oauthaccesstoken', 'token_name')
op.drop_column(u'oauthaccesstoken', 'token_code')
op.drop_column(u'emailconfirmation', 'verification_code')
op.drop_index('appspecificauthtoken_token_name', table_name='appspecificauthtoken')
op.drop_column(u'appspecificauthtoken', 'token_secret')
op.drop_column(u'appspecificauthtoken', 'token_name')
op.drop_index('accesstoken_token_name', table_name='accesstoken')
op.drop_column(u'accesstoken', 'token_name')
op.drop_column(u'accesstoken', 'token_code')
op.drop_table('robotaccounttoken')
# ### end Alembic commands ###

File diff suppressed because it is too large Load diff

View file

@ -0,0 +1,26 @@
"""Backfill RepositorySearchScore table
Revision ID: c3d4b7ebcdf7
Revises: f30984525c86
Create Date: 2017-04-13 12:01:59.572775
"""
# revision identifiers, used by Alembic.
revision = 'c3d4b7ebcdf7'
down_revision = 'f30984525c86'
from alembic import op as original_op
from data.migrations.progress import ProgressWrapper
import sqlalchemy as sa
def upgrade(tables, tester, progress_reporter):
op = ProgressWrapper(original_op, progress_reporter)
# Add a 0 entry into the RepositorySearchScore table for each repository that isn't present
conn = op.get_bind()
conn.execute("insert into repositorysearchscore (repository_id, score) SELECT id, 0 FROM " +
"repository WHERE id not in (select repository_id from repositorysearchscore)")
def downgrade(tables, tester, progress_reporter):
op = ProgressWrapper(original_op, progress_reporter)
pass

View file

@ -0,0 +1,25 @@
"""Drop checksum on ImageStorage
Revision ID: c91c564aad34
Revises: 152bb29a1bb3
Create Date: 2018-02-21 12:17:52.405644
"""
# revision identifiers, used by Alembic.
revision = 'c91c564aad34'
down_revision = '152bb29a1bb3'
from alembic import op as original_op
from data.migrations.progress import ProgressWrapper
import sqlalchemy as sa
def upgrade(tables, tester, progress_reporter):
op = ProgressWrapper(original_op, progress_reporter)
op.drop_column('imagestorage', 'checksum')
def downgrade(tables, tester, progress_reporter):
op = ProgressWrapper(original_op, progress_reporter)
op.add_column('imagestorage', sa.Column('checksum', sa.String(length=255), nullable=True))

View file

@ -0,0 +1,30 @@
"""Add user location field
Revision ID: cbc8177760d9
Revises: 7367229b38d9
Create Date: 2018-02-02 17:39:16.589623
"""
# revision identifiers, used by Alembic.
revision = 'cbc8177760d9'
down_revision = '7367229b38d9'
from alembic import op as original_op
from data.migrations.progress import ProgressWrapper
import sqlalchemy as sa
from sqlalchemy.dialects import mysql
from util.migrate import UTF8CharField
def upgrade(tables, tester, progress_reporter):
op = ProgressWrapper(original_op, progress_reporter)
op.add_column('user', sa.Column('location', UTF8CharField(length=255), nullable=True))
# ### population of test data ### #
tester.populate_column('user', 'location', tester.TestDataType.UTF8Char)
# ### end population of test data ### #
def downgrade(tables, tester, progress_reporter):
op = ProgressWrapper(original_op, progress_reporter)
op.drop_column('user', 'location')

View file

@ -0,0 +1,68 @@
"""repository mirror notification
Revision ID: cc6778199cdb
Revises: c059b952ed76
Create Date: 2019-10-03 17:41:23.316914
"""
# revision identifiers, used by Alembic.
revision = 'cc6778199cdb'
down_revision = 'c059b952ed76'
from alembic import op as original_op
from data.migrations.progress import ProgressWrapper
import sqlalchemy as sa
from sqlalchemy.dialects import mysql
def upgrade(tables, tester, progress_reporter):
op = ProgressWrapper(original_op, progress_reporter)
op.bulk_insert(tables.notificationkind,
[
{'name': 'repo_mirror_sync_started'},
{'name': 'repo_mirror_sync_success'},
{'name': 'repo_mirror_sync_failed'},
])
op.bulk_insert(tables.externalnotificationevent,
[
{'name': 'repo_mirror_sync_started'},
{'name': 'repo_mirror_sync_success'},
{'name': 'repo_mirror_sync_failed'},
])
def downgrade(tables, tester, progress_reporter):
op = ProgressWrapper(original_op, progress_reporter)
op.execute(tables
.notificationkind
.delete()
.where(tables.
notificationkind.c.name == op.inline_literal('repo_mirror_sync_started')))
op.execute(tables
.notificationkind
.delete()
.where(tables.
notificationkind.c.name == op.inline_literal('repo_mirror_sync_success')))
op.execute(tables
.notificationkind
.delete()
.where(tables.
notificationkind.c.name == op.inline_literal('repo_mirror_sync_failed')))
op.execute(tables
.externalnotificationevent
.delete()
.where(tables.
externalnotificationevent.c.name == op.inline_literal('repo_mirror_sync_started')))
op.execute(tables
.externalnotificationevent
.delete()
.where(tables.
externalnotificationevent.c.name == op.inline_literal('repo_mirror_sync_success')))
op.execute(tables
.externalnotificationevent
.delete()
.where(tables.
externalnotificationevent.c.name == op.inline_literal('repo_mirror_sync_failed')))

View file

@ -0,0 +1,192 @@
"""Delete old Appr tables
Revision ID: d17c695859ea
Revises: 5d463ea1e8a8
Create Date: 2018-07-16 15:21:11.593040
"""
# revision identifiers, used by Alembic.
revision = 'd17c695859ea'
down_revision = '5d463ea1e8a8'
from alembic import op as original_op
from data.migrations.progress import ProgressWrapper
import sqlalchemy as sa
from sqlalchemy.sql import table, column
from util.migrate import UTF8LongText, UTF8CharField
def upgrade(tables, tester, progress_reporter):
op = ProgressWrapper(original_op, progress_reporter)
# ### commands auto generated by Alembic - please adjust! ###
op.drop_table('tag')
op.drop_table('manifestlistmanifest')
op.drop_table('manifestlist')
op.drop_table('manifestblob')
op.drop_table('manifest')
op.drop_table('blobplacement')
op.drop_table('blob')
op.drop_table('blobplacementlocation')
op.drop_table('tagkind')
# ### end Alembic commands ###
def downgrade(tables, tester, progress_reporter):
op = ProgressWrapper(original_op, progress_reporter)
# ### commands auto generated by Alembic - please adjust! ###
op.create_table(
'tagkind',
sa.Column('id', sa.Integer(), nullable=False),
sa.Column('name', sa.String(length=255), nullable=False),
sa.PrimaryKeyConstraint('id', name=op.f('pk_tagkind'))
)
op.create_index('tagkind_name', 'tagkind', ['name'], unique=True)
op.create_table(
'blobplacementlocation',
sa.Column('id', sa.Integer(), nullable=False),
sa.Column('name', sa.String(length=255), nullable=False),
sa.PrimaryKeyConstraint('id', name=op.f('pk_blobplacementlocation'))
)
op.create_index('blobplacementlocation_name', 'blobplacementlocation', ['name'], unique=True)
op.create_table(
'blob',
sa.Column('id', sa.Integer(), nullable=False),
sa.Column('digest', sa.String(length=255), nullable=False),
sa.Column('media_type_id', sa.Integer(), nullable=False),
sa.Column('size', sa.BigInteger(), nullable=False),
sa.Column('uncompressed_size', sa.BigInteger(), nullable=True),
sa.ForeignKeyConstraint(['media_type_id'], ['mediatype.id'], name=op.f('fk_blob_media_type_id_mediatype')),
sa.PrimaryKeyConstraint('id', name=op.f('pk_blob'))
)
op.create_index('blob_digest', 'blob', ['digest'], unique=True)
op.create_index('blob_media_type_id', 'blob', ['media_type_id'], unique=False)
op.create_table(
'manifest',
sa.Column('id', sa.Integer(), nullable=False),
sa.Column('digest', sa.String(length=255), nullable=False),
sa.Column('media_type_id', sa.Integer(), nullable=False),
sa.Column('manifest_json', UTF8LongText, nullable=False),
sa.ForeignKeyConstraint(['media_type_id'], ['mediatype.id'], name=op.f('fk_manifest_media_type_id_mediatype')),
sa.PrimaryKeyConstraint('id', name=op.f('pk_manifest'))
)
op.create_index('manifest_digest', 'manifest', ['digest'], unique=True)
op.create_index('manifest_media_type_id', 'manifest', ['media_type_id'], unique=False)
op.create_table(
'manifestlist',
sa.Column('id', sa.Integer(), nullable=False),
sa.Column('digest', sa.String(length=255), nullable=False),
sa.Column('manifest_list_json', UTF8LongText, nullable=False),
sa.Column('schema_version', UTF8CharField(length=255), nullable=False),
sa.Column('media_type_id', sa.Integer(), nullable=False),
sa.ForeignKeyConstraint(['media_type_id'], ['mediatype.id'], name=op.f('fk_manifestlist_media_type_id_mediatype')),
sa.PrimaryKeyConstraint('id', name=op.f('pk_manifestlist'))
)
op.create_index('manifestlist_digest', 'manifestlist', ['digest'], unique=True)
op.create_index('manifestlist_media_type_id', 'manifestlist', ['media_type_id'], unique=False)
op.create_table(
'blobplacement',
sa.Column('id', sa.Integer(), nullable=False),
sa.Column('blob_id', sa.Integer(), nullable=False),
sa.Column('location_id', sa.Integer(), nullable=False),
sa.ForeignKeyConstraint(['blob_id'], ['blob.id'], name=op.f('fk_blobplacement_blob_id_blob')),
sa.ForeignKeyConstraint(['location_id'], ['blobplacementlocation.id'], name=op.f('fk_blobplacement_location_id_blobplacementlocation')),
sa.PrimaryKeyConstraint('id', name=op.f('pk_blobplacement'))
)
op.create_index('blobplacement_blob_id', 'blobplacement', ['blob_id'], unique=False)
op.create_index('blobplacement_blob_id_location_id', 'blobplacement', ['blob_id', 'location_id'], unique=True)
op.create_index('blobplacement_location_id', 'blobplacement', ['location_id'], unique=False)
op.create_table(
'manifestblob',
sa.Column('id', sa.Integer(), nullable=False),
sa.Column('manifest_id', sa.Integer(), nullable=False),
sa.Column('blob_id', sa.Integer(), nullable=False),
sa.ForeignKeyConstraint(['blob_id'], ['blob.id'], name=op.f('fk_manifestblob_blob_id_blob')),
sa.ForeignKeyConstraint(['manifest_id'], ['manifest.id'], name=op.f('fk_manifestblob_manifest_id_manifest')),
sa.PrimaryKeyConstraint('id', name=op.f('pk_manifestblob'))
)
op.create_index('manifestblob_blob_id', 'manifestblob', ['blob_id'], unique=False)
op.create_index('manifestblob_manifest_id', 'manifestblob', ['manifest_id'], unique=False)
op.create_index('manifestblob_manifest_id_blob_id', 'manifestblob', ['manifest_id', 'blob_id'], unique=True)
op.create_table(
'manifestlistmanifest',
sa.Column('id', sa.Integer(), nullable=False),
sa.Column('manifest_list_id', sa.Integer(), nullable=False),
sa.Column('manifest_id', sa.Integer(), nullable=False),
sa.Column('operating_system', UTF8CharField(length=255), nullable=True),
sa.Column('architecture', UTF8CharField(length=255), nullable=True),
sa.Column('platform_json', UTF8LongText, nullable=True),
sa.Column('media_type_id', sa.Integer(), nullable=False),
sa.ForeignKeyConstraint(['manifest_id'], ['manifest.id'], name=op.f('fk_manifestlistmanifest_manifest_id_manifest')),
sa.ForeignKeyConstraint(['manifest_list_id'], ['manifestlist.id'], name=op.f('fk_manifestlistmanifest_manifest_list_id_manifestlist')),
sa.ForeignKeyConstraint(['media_type_id'], ['mediatype.id'], name=op.f('fk_manifestlistmanifest_media_type_id_mediatype')),
sa.PrimaryKeyConstraint('id', name=op.f('pk_manifestlistmanifest'))
)
op.create_index('manifestlistmanifest_manifest_id', 'manifestlistmanifest', ['manifest_id'], unique=False)
op.create_index('manifestlistmanifest_manifest_list_id', 'manifestlistmanifest', ['manifest_list_id'], unique=False)
op.create_index('manifestlistmanifest_manifest_listid_os_arch_mtid', 'manifestlistmanifest', ['manifest_list_id', 'operating_system', 'architecture', 'media_type_id'], unique=False)
op.create_index('manifestlistmanifest_manifest_listid_mtid', 'manifestlistmanifest', ['manifest_list_id', 'media_type_id'], unique=False)
op.create_index('manifestlistmanifest_media_type_id', 'manifestlistmanifest', ['media_type_id'], unique=False)
op.create_table(
'tag',
sa.Column('id', sa.Integer(), nullable=False),
sa.Column('name', UTF8CharField(length=190), nullable=False),
sa.Column('repository_id', sa.Integer(), nullable=False),
sa.Column('manifest_list_id', sa.Integer(), nullable=True),
sa.Column('lifetime_start', sa.BigInteger(), nullable=False),
sa.Column('lifetime_end', sa.BigInteger(), nullable=True),
sa.Column('hidden', sa.Boolean(), nullable=False),
sa.Column('reverted', sa.Boolean(), nullable=False),
sa.Column('protected', sa.Boolean(), nullable=False),
sa.Column('tag_kind_id', sa.Integer(), nullable=False),
sa.Column('linked_tag_id', sa.Integer(), nullable=True),
sa.ForeignKeyConstraint(['linked_tag_id'], ['tag.id'], name=op.f('fk_tag_linked_tag_id_tag')),
sa.ForeignKeyConstraint(['manifest_list_id'], ['manifestlist.id'], name=op.f('fk_tag_manifest_list_id_manifestlist')),
sa.ForeignKeyConstraint(['repository_id'], ['repository.id'], name=op.f('fk_tag_repository_id_repository')),
sa.ForeignKeyConstraint(['tag_kind_id'], ['tagkind.id'], name=op.f('fk_tag_tag_kind_id_tagkind')),
sa.PrimaryKeyConstraint('id', name=op.f('pk_tag'))
)
op.create_index('tag_lifetime_end', 'tag', ['lifetime_end'], unique=False)
op.create_index('tag_linked_tag_id', 'tag', ['linked_tag_id'], unique=False)
op.create_index('tag_manifest_list_id', 'tag', ['manifest_list_id'], unique=False)
op.create_index('tag_repository_id', 'tag', ['repository_id'], unique=False)
op.create_index('tag_repository_id_name_hidden', 'tag', ['repository_id', 'name', 'hidden'], unique=False)
op.create_index('tag_repository_id_name_lifetime_end', 'tag', ['repository_id', 'name', 'lifetime_end'], unique=True)
op.create_index('tag_repository_id_name', 'tag', ['repository_id', 'name'], unique=False)
op.create_index('tag_tag_kind_id', 'tag', ['tag_kind_id'], unique=False)
# ### end Alembic commands ###
blobplacementlocation_table = table('blobplacementlocation',
column('id', sa.Integer()),
column('name', sa.String()),
)
op.bulk_insert(
blobplacementlocation_table,
[
{'name': 'local_eu'},
{'name': 'local_us'},
],
)
tagkind_table = table('tagkind',
column('id', sa.Integer()),
column('name', sa.String()),
)
op.bulk_insert(
tagkind_table,
[
{'id': 1, 'name': 'tag'},
{'id': 2, 'name': 'release'},
{'id': 3, 'name': 'channel'},
]
)

View file

@ -0,0 +1,36 @@
"""Backfill state_id and make it unique
Revision ID: d42c175b439a
Revises: 3e8cc74a1e7b
Create Date: 2017-01-18 15:11:01.635632
"""
# revision identifiers, used by Alembic.
revision = 'd42c175b439a'
down_revision = '3e8cc74a1e7b'
from alembic import op as original_op
from data.migrations.progress import ProgressWrapper
import sqlalchemy as sa
from sqlalchemy.dialects import mysql
def upgrade(tables, tester, progress_reporter):
op = ProgressWrapper(original_op, progress_reporter)
# Backfill the queueitem table's state_id field with unique values for all entries which are
# empty.
conn = op.get_bind()
conn.execute("update queueitem set state_id = id where state_id = ''")
# ### commands auto generated by Alembic - please adjust! ###
op.drop_index('queueitem_state_id', table_name='queueitem')
op.create_index('queueitem_state_id', 'queueitem', ['state_id'], unique=True)
# ### end Alembic commands ###
def downgrade(tables, tester, progress_reporter):
op = ProgressWrapper(original_op, progress_reporter)
# ### commands auto generated by Alembic - please adjust! ###
op.drop_index('queueitem_state_id', table_name='queueitem')
op.create_index('queueitem_state_id', 'queueitem', ['state_id'], unique=False)
# ### end Alembic commands ###

View file

@ -0,0 +1,28 @@
"""Add change_tag_expiration log type
Revision ID: d8989249f8f6
Revises: dc4af11a5f90
Create Date: 2017-06-21 21:18:25.948689
"""
# revision identifiers, used by Alembic.
revision = 'd8989249f8f6'
down_revision = 'dc4af11a5f90'
from alembic import op as original_op
from data.migrations.progress import ProgressWrapper
def upgrade(tables, tester, progress_reporter):
op = ProgressWrapper(original_op, progress_reporter)
op.bulk_insert(tables.logentrykind, [
{'name': 'change_tag_expiration'},
])
def downgrade(tables, tester, progress_reporter):
op = ProgressWrapper(original_op, progress_reporter)
op.execute(tables
.logentrykind
.delete()
.where(tables.logentrykind.c.name == op.inline_literal('change_tag_expiration')))

Some files were not shown because too many files have changed in this diff Show more