initial import for Open Source 🎉

2019-11-12 11:09:47 -05:00 · 2019-11-12 11:09:47 -05:00 · 9c0dd3b722
commit 9c0dd3b722
parent 1898c361f3
2048 changed files with 218743 additions and 0 deletions
--- a/data/init.py
+++ b/data/init.py
--- a/data/appr_model/init.py
+++ b/data/appr_model/init.py
@ -0,0 +1,9 @@
+from data.appr_model import (
+  blob,
+  channel,
+  manifest,
+  manifest_list,
+  package,
+  release,
+  tag,
+)
--- a/data/appr_model/blob.py
+++ b/data/appr_model/blob.py
@ -0,0 +1,76 @@
+import logging
+
+from peewee import IntegrityError
+
+from data.model import db_transaction
+
+logger = logging.getLogger(__name__)
+
+def _ensure_sha256_header(digest):
+  if digest.startswith('sha256:'):
+    return digest
+  return 'sha256:' + digest
+
+
+def get_blob(digest, models_ref):
+  """ Find a blob by its digest. """
+  Blob = models_ref.Blob
+  return Blob.select().where(Blob.digest == _ensure_sha256_header(digest)).get()
+
+
+def get_or_create_blob(digest, size, media_type_name, locations, models_ref):
+  """ Try to find a blob by its digest or create it. """
+  Blob = models_ref.Blob
+  BlobPlacement = models_ref.BlobPlacement
+
+  # Get or create the blog entry for the digest.
+  try:
+    blob = get_blob(digest, models_ref)
+    logger.debug('Retrieved blob with digest %s', digest)
+  except Blob.DoesNotExist:
+    blob = Blob.create(digest=_ensure_sha256_header(digest),
+                       media_type_id=Blob.media_type.get_id(media_type_name),
+                       size=size)
+    logger.debug('Created blob with digest %s', digest)
+
+  # Add the locations to the blob.
+  for location_name in locations:
+    location_id = BlobPlacement.location.get_id(location_name)
+    try:
+      BlobPlacement.create(blob=blob, location=location_id)
+    except IntegrityError:
+      logger.debug('Location %s already existing for blob %s', location_name, blob.id)
+
+  return blob
+
+
+def get_blob_locations(digest, models_ref):
+  """ Find all locations names for a blob. """
+  Blob = models_ref.Blob
+  BlobPlacement = models_ref.BlobPlacement
+  BlobPlacementLocation = models_ref.BlobPlacementLocation
+
+  return [x.name for x in
+          BlobPlacementLocation
+          .select()
+          .join(BlobPlacement)
+          .join(Blob)
+          .where(Blob.digest == _ensure_sha256_header(digest))]
+
+
+def ensure_blob_locations(models_ref, *names):
+  BlobPlacementLocation = models_ref.BlobPlacementLocation
+
+  with db_transaction():
+    locations = BlobPlacementLocation.select().where(BlobPlacementLocation.name << names)
+
+    insert_names = list(names)
+
+    for location in locations:
+      insert_names.remove(location.name)
+
+    if not insert_names:
+      return
+
+    data = [{'name': name} for name in insert_names]
+    BlobPlacementLocation.insert_many(data).execute()
--- a/data/appr_model/channel.py
+++ b/data/appr_model/channel.py
@ -0,0 +1,64 @@
+from data.appr_model import tag as tag_model
+
+
+def get_channel_releases(repo, channel, models_ref):
+  """ Return all previously linked tags.
+      This works based upon Tag lifetimes.
+  """
+  Channel = models_ref.Channel
+  Tag = models_ref.Tag
+
+  tag_kind_id = Channel.tag_kind.get_id('channel')
+  channel_name = channel.name
+  return (Tag
+          .select(Tag, Channel)
+          .join(Channel, on=(Tag.id == Channel.linked_tag))
+          .where(Channel.repository == repo,
+                 Channel.name == channel_name,
+                 Channel.tag_kind == tag_kind_id, Channel.lifetime_end != None)
+          .order_by(Tag.lifetime_end))
+
+
+def get_channel(repo, channel_name, models_ref):
+  """ Find a Channel by name. """
+  channel = tag_model.get_tag(repo, channel_name, models_ref, "channel")
+  return channel
+
+
+def get_tag_channels(repo, tag_name, models_ref, active=True):
+  """ Find the Channels associated with a Tag. """
+  Tag = models_ref.Tag
+
+  tag = tag_model.get_tag(repo, tag_name, models_ref, "release")
+  query = tag.tag_parents
+
+  if active:
+    query = tag_model.tag_is_alive(query, Tag)
+
+  return query
+
+
+def delete_channel(repo, channel_name, models_ref):
+  """ Delete a channel by name. """
+  return tag_model.delete_tag(repo, channel_name, models_ref, "channel")
+
+
+def create_or_update_channel(repo, channel_name, tag_name, models_ref):
+  """ Creates or updates a channel to include a particular tag. """
+  tag = tag_model.get_tag(repo, tag_name, models_ref, 'release')
+  return tag_model.create_or_update_tag(repo, channel_name, models_ref, linked_tag=tag,
+                                        tag_kind="channel")
+
+
+def get_repo_channels(repo, models_ref):
+  """ Creates or updates a channel to include a particular tag. """
+  Channel = models_ref.Channel
+  Tag = models_ref.Tag
+
+  tag_kind_id = Channel.tag_kind.get_id('channel')
+  query = (Channel
+           .select(Channel, Tag)
+           .join(Tag, on=(Tag.id == Channel.linked_tag))
+           .where(Channel.repository == repo,
+                  Channel.tag_kind == tag_kind_id))
+  return tag_model.tag_is_alive(query, Channel)
--- a/data/appr_model/manifest.py
+++ b/data/appr_model/manifest.py
@ -0,0 +1,67 @@
+import logging
+import hashlib
+import json
+
+from cnr.models.package_base import get_media_type
+
+from data.database import db_transaction, MediaType
+from data.appr_model import tag as tag_model
+
+
+logger = logging.getLogger(__name__)
+
+
+def _ensure_sha256_header(digest):
+  if digest.startswith('sha256:'):
+    return digest
+  return 'sha256:' + digest
+
+
+def _digest(manifestjson):
+  return _ensure_sha256_header(hashlib.sha256(json.dumps(manifestjson, sort_keys=True)).hexdigest())
+
+
+def get_manifest_query(digest, media_type, models_ref):
+  Manifest = models_ref.Manifest
+  return Manifest.select().where(Manifest.digest == _ensure_sha256_header(digest),
+                                 Manifest.media_type == Manifest.media_type.get_id(media_type))
+
+
+def get_manifest_with_blob(digest, media_type, models_ref):
+  Blob = models_ref.Blob
+  query = get_manifest_query(digest, media_type, models_ref)
+  return query.join(Blob).get()
+
+
+def get_or_create_manifest(manifest_json, media_type_name, models_ref):
+  Manifest = models_ref.Manifest
+  digest = _digest(manifest_json)
+  try:
+    manifest = get_manifest_query(digest, media_type_name, models_ref).get()
+  except Manifest.DoesNotExist:
+    with db_transaction():
+      manifest = Manifest.create(digest=digest,
+                                 manifest_json=manifest_json,
+                                 media_type=Manifest.media_type.get_id(media_type_name))
+  return manifest
+
+def get_manifest_types(repo, models_ref, release=None):
+  """ Returns an array of MediaTypes.name for a repo, can filter by tag """
+  Tag = models_ref.Tag
+  ManifestListManifest = models_ref.ManifestListManifest
+
+  query = tag_model.tag_is_alive(Tag
+                                  .select(MediaType.name)
+                                  .join(ManifestListManifest,
+                                        on=(ManifestListManifest.manifest_list == Tag.manifest_list))
+                                  .join(MediaType,
+                                        on=(ManifestListManifest.media_type == MediaType.id))
+                                  .where(Tag.repository == repo,
+                                         Tag.tag_kind == Tag.tag_kind.get_id('release')), Tag)
+  if release:
+    query = query.where(Tag.name == release)
+
+  manifests = set()
+  for m in query.distinct().tuples():
+    manifests.add(get_media_type(m[0]))
+  return manifests
--- a/data/appr_model/manifest_list.py
+++ b/data/appr_model/manifest_list.py
@ -0,0 +1,67 @@
+import logging
+import hashlib
+import json
+
+from data.database import db_transaction
+
+
+logger = logging.getLogger(__name__)
+
+
+def _ensure_sha256_header(digest):
+  if digest.startswith('sha256:'):
+    return digest
+  return 'sha256:' + digest
+
+
+def _digest(manifestjson):
+  return _ensure_sha256_header(hashlib.sha256(json.dumps(manifestjson, sort_keys=True)).hexdigest())
+
+
+def get_manifest_list(digest, models_ref):
+  ManifestList = models_ref.ManifestList
+  return ManifestList.select().where(ManifestList.digest == _ensure_sha256_header(digest)).get()
+
+
+def get_or_create_manifest_list(manifest_list_json, media_type_name, schema_version, models_ref):
+  ManifestList = models_ref.ManifestList
+
+  digest = _digest(manifest_list_json)
+  media_type_id = ManifestList.media_type.get_id(media_type_name)
+
+  try:
+    return get_manifest_list(digest, models_ref)
+  except ManifestList.DoesNotExist:
+    with db_transaction():
+      manifestlist = ManifestList.create(digest=digest, manifest_list_json=manifest_list_json,
+                                         schema_version=schema_version, media_type=media_type_id)
+  return manifestlist
+
+
+def create_manifestlistmanifest(manifestlist, manifest_ids, manifest_list_json, models_ref):
+  """ From a manifestlist, manifests, and the manifest list blob,
+  create if doesn't exist the manfiestlistmanifest for each manifest """
+  for pos in xrange(len(manifest_ids)):
+    manifest_id = manifest_ids[pos]
+    manifest_json = manifest_list_json[pos]
+    get_or_create_manifestlistmanifest(manifest=manifest_id,
+                                       manifestlist=manifestlist,
+                                       media_type_name=manifest_json['mediaType'],
+                                       models_ref=models_ref)
+
+
+def get_or_create_manifestlistmanifest(manifest, manifestlist, media_type_name, models_ref):
+  ManifestListManifest = models_ref.ManifestListManifest
+
+  media_type_id = ManifestListManifest.media_type.get_id(media_type_name)
+  try:
+    ml = (ManifestListManifest
+          .select()
+          .where(ManifestListManifest.manifest == manifest,
+                 ManifestListManifest.media_type == media_type_id,
+                 ManifestListManifest.manifest_list == manifestlist)).get()
+
+  except ManifestListManifest.DoesNotExist:
+    ml = ManifestListManifest.create(manifest_list=manifestlist, media_type=media_type_id,
+                                     manifest=manifest)
+    return ml
--- a/data/appr_model/models.py
+++ b/data/appr_model/models.py
@ -0,0 +1,15 @@
+from collections import namedtuple
+
+from data.database import (ApprTag, ApprTagKind, ApprBlobPlacementLocation, ApprManifestList,
+                           ApprManifestBlob, ApprBlob, ApprManifestListManifest, ApprManifest,
+                           ApprBlobPlacement, ApprChannel)
+
+ModelsRef = namedtuple('ModelsRef', ['Tag', 'TagKind', 'BlobPlacementLocation', 'ManifestList',
+                                     'ManifestBlob', 'Blob', 'ManifestListManifest', 'Manifest',
+                                     'BlobPlacement', 'Channel', 'manifestlistmanifest_set_name',
+                                     'tag_set_prefetch_name'])
+
+NEW_MODELS = ModelsRef(ApprTag, ApprTagKind, ApprBlobPlacementLocation, ApprManifestList,
+                       ApprManifestBlob, ApprBlob, ApprManifestListManifest, ApprManifest,
+                       ApprBlobPlacement, ApprChannel, 'apprmanifestlistmanifest_set',
+                       'apprtag_set')
--- a/data/appr_model/package.py
+++ b/data/appr_model/package.py
@ -0,0 +1,67 @@
+from cnr.models.package_base import get_media_type, manifest_media_type
+from peewee import prefetch
+
+
+from data import model
+from data.database import Repository, Namespace
+from data.appr_model import tag as tag_model
+
+
+def list_packages_query(models_ref, namespace=None, media_type=None, search_query=None,
+                        username=None):
+  """ List and filter repository by search query. """
+  Tag = models_ref.Tag
+
+  if username and not search_query:
+    repositories = model.repository.get_visible_repositories(username,
+                                                             kind_filter='application',
+                                                             include_public=True,
+                                                             namespace=namespace,
+                                                             limit=50)
+    if not repositories:
+      return []
+
+    repo_query = (Repository
+                  .select(Repository, Namespace.username)
+                  .join(Namespace, on=(Repository.namespace_user == Namespace.id))
+                  .where(Repository.id << [repo.rid for repo in repositories]))
+
+    if namespace:
+      repo_query = (repo_query
+                    .where(Namespace.username == namespace))
+  else:
+    if search_query is not None:
+      fields = [model.repository.SEARCH_FIELDS.name.name]
+      repositories = model.repository.get_app_search(search_query,
+                                                     username=username,
+                                                     search_fields=fields,
+                                                     limit=50)
+      if not repositories:
+        return []
+
+      repo_query = (Repository
+                    .select(Repository, Namespace.username)
+                    .join(Namespace, on=(Repository.namespace_user == Namespace.id))
+                    .where(Repository.id << [repo.id for repo in repositories]))
+    else:
+      repo_query = (Repository
+                    .select(Repository, Namespace.username)
+                    .join(Namespace, on=(Repository.namespace_user == Namespace.id))
+                    .where(Repository.visibility == model.repository.get_public_repo_visibility(),
+                           Repository.kind == Repository.kind.get_id('application')))
+
+    if namespace:
+      repo_query = (repo_query
+                    .where(Namespace.username == namespace))
+
+  tag_query = (Tag
+               .select()
+               .where(Tag.tag_kind == Tag.tag_kind.get_id('release'))
+               .order_by(Tag.lifetime_start))
+
+  if media_type:
+    tag_query = tag_model.filter_tags_by_media_type(tag_query, media_type, models_ref)
+
+  tag_query = tag_model.tag_is_alive(tag_query, Tag)
+  query = prefetch(repo_query, tag_query)
+  return query
--- a/data/appr_model/release.py
+++ b/data/appr_model/release.py
@ -0,0 +1,152 @@
+import bisect
+
+from cnr.exception import PackageAlreadyExists
+from cnr.models.package_base import manifest_media_type
+
+from data.database import db_transaction, get_epoch_timestamp
+from data.appr_model import (blob as blob_model, manifest as manifest_model,
+                            manifest_list as manifest_list_model,
+                            tag as tag_model)
+
+
+LIST_MEDIA_TYPE = 'application/vnd.cnr.manifest.list.v0.json'
+SCHEMA_VERSION = 'v0'
+
+
+def _ensure_sha256_header(digest):
+  if digest.startswith('sha256:'):
+    return digest
+  return 'sha256:' + digest
+
+
+def get_app_release(repo, tag_name, media_type, models_ref):
+  """ Returns (tag, manifest, blob) given a repo object, tag_name, and media_type). """
+  ManifestListManifest = models_ref.ManifestListManifest
+  Manifest = models_ref.Manifest
+  Blob = models_ref.Blob
+  ManifestBlob = models_ref.ManifestBlob
+  manifestlistmanifest_set_name = models_ref.manifestlistmanifest_set_name
+
+  tag = tag_model.get_tag(repo, tag_name, models_ref, tag_kind='release')
+  media_type_id = ManifestListManifest.media_type.get_id(manifest_media_type(media_type))
+  manifestlistmanifest = (getattr(tag.manifest_list, manifestlistmanifest_set_name)
+                          .join(Manifest)
+                          .where(ManifestListManifest.media_type == media_type_id).get())
+  manifest = manifestlistmanifest.manifest
+  blob = Blob.select().join(ManifestBlob).where(ManifestBlob.manifest == manifest).get()
+  return (tag, manifest, blob)
+
+
+def delete_app_release(repo, tag_name, media_type, models_ref):
+  """ Terminate a Tag/media-type couple
+  It find the corresponding tag/manifest and remove from the manifestlistmanifest the manifest
+  1. it terminates the current tag (in all-cases)
+  2. if the new manifestlist is not empty, it creates a new tag for it
+  """
+  ManifestListManifest = models_ref.ManifestListManifest
+  manifestlistmanifest_set_name = models_ref.manifestlistmanifest_set_name
+
+  media_type_id = ManifestListManifest.media_type.get_id(manifest_media_type(media_type))
+
+  with db_transaction():
+    tag = tag_model.get_tag(repo, tag_name, models_ref)
+    manifest_list = tag.manifest_list
+    list_json = manifest_list.manifest_list_json
+    mlm_query = (ManifestListManifest
+                 .select()
+                 .where(ManifestListManifest.manifest_list == tag.manifest_list))
+    list_manifest_ids = sorted([mlm.manifest_id for mlm in mlm_query])
+    manifestlistmanifest = (getattr(tag.manifest_list, manifestlistmanifest_set_name)
+                            .where(ManifestListManifest.media_type == media_type_id).get())
+    index = list_manifest_ids.index(manifestlistmanifest.manifest_id)
+    list_manifest_ids.pop(index)
+    list_json.pop(index)
+
+    if not list_json:
+      tag.lifetime_end = get_epoch_timestamp()
+      tag.save()
+    else:
+      manifestlist = manifest_list_model.get_or_create_manifest_list(list_json, LIST_MEDIA_TYPE,
+                                                                     SCHEMA_VERSION, models_ref)
+      manifest_list_model.create_manifestlistmanifest(manifestlist, list_manifest_ids,
+                                                      list_json, models_ref)
+      tag = tag_model.create_or_update_tag(repo, tag_name, models_ref, manifest_list=manifestlist,
+                                           tag_kind="release")
+    return tag
+
+
+def create_app_release(repo, tag_name, manifest_data, digest, models_ref, force=False):
+  """ Create a new application release, it includes creating a new Tag, ManifestList,
+      ManifestListManifests, Manifest, ManifestBlob.
+
+      To deduplicate the ManifestList, the manifestlist_json is kept ordered by the manifest.id.
+      To find the insert point in the ManifestList it uses bisect on the manifest-ids list.
+  """
+  ManifestList = models_ref.ManifestList
+  ManifestListManifest = models_ref.ManifestListManifest
+  Blob = models_ref.Blob
+  ManifestBlob = models_ref.ManifestBlob
+
+  with db_transaction():
+    # Create/get the package manifest
+    manifest = manifest_model.get_or_create_manifest(manifest_data, manifest_data['mediaType'],
+                                                     models_ref)
+    # get the tag
+    tag = tag_model.get_or_initialize_tag(repo, tag_name, models_ref)
+
+    if tag.manifest_list is None:
+      tag.manifest_list = ManifestList(media_type=ManifestList.media_type.get_id(LIST_MEDIA_TYPE),
+                                       schema_version=SCHEMA_VERSION,
+                                       manifest_list_json=[], )
+
+    elif tag_model.tag_media_type_exists(tag, manifest.media_type, models_ref):
+      if force:
+        delete_app_release(repo, tag_name, manifest.media_type.name, models_ref)
+        return create_app_release(repo, tag_name, manifest_data, digest, models_ref, force=False)
+      else:
+        raise PackageAlreadyExists("package exists already")
+
+    list_json = tag.manifest_list.manifest_list_json
+    mlm_query = (ManifestListManifest
+                 .select()
+                 .where(ManifestListManifest.manifest_list == tag.manifest_list))
+    list_manifest_ids = sorted([mlm.manifest_id for mlm in mlm_query])
+    insert_point = bisect.bisect_left(list_manifest_ids, manifest.id)
+    list_json.insert(insert_point, manifest.manifest_json)
+    list_manifest_ids.insert(insert_point, manifest.id)
+    manifestlist = manifest_list_model.get_or_create_manifest_list(list_json, LIST_MEDIA_TYPE,
+                                                                   SCHEMA_VERSION, models_ref)
+    manifest_list_model.create_manifestlistmanifest(manifestlist, list_manifest_ids, list_json,
+                                                    models_ref)
+
+    tag = tag_model.create_or_update_tag(repo, tag_name, models_ref, manifest_list=manifestlist,
+                                         tag_kind="release")
+    blob_digest = digest
+
+    try:
+      (ManifestBlob
+       .select()
+       .join(Blob)
+       .where(ManifestBlob.manifest == manifest,
+              Blob.digest == _ensure_sha256_header(blob_digest)).get())
+    except ManifestBlob.DoesNotExist:
+      blob = blob_model.get_blob(blob_digest, models_ref)
+      ManifestBlob.create(manifest=manifest, blob=blob)
+    return tag
+
+def get_release_objs(repo, models_ref, media_type=None):
+  """ Returns an array of Tag for a repo, with optional filtering by media_type. """
+  Tag = models_ref.Tag
+
+  release_query = (Tag
+                   .select()
+                   .where(Tag.repository == repo,
+                          Tag.tag_kind == Tag.tag_kind.get_id("release")))
+  if media_type:
+    release_query = tag_model.filter_tags_by_media_type(release_query, media_type, models_ref)
+
+  return tag_model.tag_is_alive(release_query, Tag)
+
+def get_releases(repo, model_refs, media_type=None):
+  """ Returns an array of Tag.name for a repo, can filter by media_type. """
+  return [t.name for t in get_release_objs(repo, model_refs, media_type)]
--- a/data/appr_model/tag.py
+++ b/data/appr_model/tag.py
@ -0,0 +1,99 @@
+import logging
+
+from cnr.models.package_base import manifest_media_type
+from peewee import IntegrityError
+
+from data.model import (db_transaction, TagAlreadyCreatedException)
+from data.database import get_epoch_timestamp_ms, db_for_update
+
+
+logger = logging.getLogger(__name__)
+
+
+def tag_is_alive(query, cls, now_ts=None):
+  return query.where((cls.lifetime_end >> None) |
+                     (cls.lifetime_end > now_ts))
+
+
+def tag_media_type_exists(tag, media_type, models_ref):
+  ManifestListManifest = models_ref.ManifestListManifest
+  manifestlistmanifest_set_name = models_ref.manifestlistmanifest_set_name
+  return (getattr(tag.manifest_list, manifestlistmanifest_set_name)
+          .where(ManifestListManifest.media_type == media_type).count() > 0)
+
+
+def create_or_update_tag(repo, tag_name, models_ref, manifest_list=None, linked_tag=None,
+                         tag_kind="release"):
+  Tag = models_ref.Tag
+
+  now_ts = get_epoch_timestamp_ms()
+  tag_kind_id = Tag.tag_kind.get_id(tag_kind)
+  with db_transaction():
+    try:
+      tag = db_for_update(tag_is_alive(Tag
+                                        .select()
+                                        .where(Tag.repository == repo,
+                                               Tag.name == tag_name,
+                                               Tag.tag_kind == tag_kind_id), Tag, now_ts)).get()
+      if tag.manifest_list == manifest_list and tag.linked_tag == linked_tag:
+        return tag
+      tag.lifetime_end = now_ts
+      tag.save()
+    except Tag.DoesNotExist:
+      pass
+
+    try:
+      return Tag.create(repository=repo, manifest_list=manifest_list, linked_tag=linked_tag,
+                        name=tag_name, lifetime_start=now_ts, lifetime_end=None,
+                        tag_kind=tag_kind_id)
+    except IntegrityError:
+      msg = 'Tag with name %s and lifetime start %s under repository %s/%s already exists'
+      raise TagAlreadyCreatedException(msg % (tag_name, now_ts, repo.namespace_user, repo.name))
+
+
+def get_or_initialize_tag(repo, tag_name, models_ref, tag_kind="release"):
+  Tag = models_ref.Tag
+  
+  try:
+    return tag_is_alive(Tag.select().where(Tag.repository == repo, Tag.name == tag_name), Tag).get()
+  except Tag.DoesNotExist:
+    return Tag(repo=repo, name=tag_name, tag_kind=Tag.tag_kind.get_id(tag_kind))
+
+
+def get_tag(repo, tag_name, models_ref, tag_kind="release"):
+  Tag = models_ref.Tag
+  return tag_is_alive(Tag.select()
+                       .where(Tag.repository == repo,
+                              Tag.name == tag_name,
+                              Tag.tag_kind == Tag.tag_kind.get_id(tag_kind)), Tag).get()
+
+
+def delete_tag(repo, tag_name, models_ref, tag_kind="release"):
+  Tag = models_ref.Tag
+  tag_kind_id = Tag.tag_kind.get_id(tag_kind)
+  tag = tag_is_alive(Tag.select()
+                      .where(Tag.repository == repo,
+                             Tag.name == tag_name, Tag.tag_kind == tag_kind_id), Tag).get()
+  tag.lifetime_end = get_epoch_timestamp_ms()
+  tag.save()
+  return tag
+
+
+def tag_exists(repo, tag_name, models_ref, tag_kind="release"):
+  Tag = models_ref.Tag
+  try:
+    get_tag(repo, tag_name, models_ref, tag_kind)
+    return True
+  except Tag.DoesNotExist:
+    return False
+
+
+def filter_tags_by_media_type(tag_query, media_type, models_ref):
+  """ Return only available tag for a media_type. """
+  ManifestListManifest = models_ref.ManifestListManifest
+  Tag = models_ref.Tag
+  media_type = manifest_media_type(media_type)
+  t = (tag_query
+       .join(ManifestListManifest, on=(ManifestListManifest.manifest_list == Tag.manifest_list))
+       .where(ManifestListManifest.media_type == ManifestListManifest.media_type.get_id(media_type)))
+  return t
--- a/data/archivedlogs.py
+++ b/data/archivedlogs.py
@ -0,0 +1,37 @@
+import logging
+
+from util.registry.gzipinputstream import GzipInputStream
+from flask import send_file, abort
+
+from data.userfiles import DelegateUserfiles, UserfilesHandlers
+
+
+JSON_MIMETYPE = 'application/json'
+
+
+logger = logging.getLogger(__name__)
+
+
+class LogArchive(object):
+  def __init__(self, app=None, distributed_storage=None):
+    self.app = app
+    if app is not None:
+      self.state = self.init_app(app, distributed_storage)
+    else:
+      self.state = None
+
+  def init_app(self, app, distributed_storage):
+    location = app.config.get('LOG_ARCHIVE_LOCATION')
+    path = app.config.get('LOG_ARCHIVE_PATH', None)
+
+    handler_name = 'web.logarchive'
+
+    log_archive = DelegateUserfiles(app, distributed_storage, location, path,
+                                    handler_name=handler_name)
+    # register extension with app
+    app.extensions = getattr(app, 'extensions', {})
+    app.extensions['log_archive'] = log_archive
+    return log_archive
+
+  def __getattr__(self, name):
+    return getattr(self.state, name, None)
--- a/data/billing.py
+++ b/data/billing.py
@ -0,0 +1,453 @@
+import stripe
+
+from datetime import datetime, timedelta
+from calendar import timegm
+
+from util.morecollections import AttrDict
+
+PLANS = [
+  # Deprecated Plans (2013-2014)
+  {
+    'title': 'Micro',
+    'price': 700,
+    'privateRepos': 5,
+    'stripeId': 'micro',
+    'audience': 'For smaller teams',
+    'bus_features': False,
+    'deprecated': True,
+    'free_trial_days': 14,
+    'superseded_by': 'personal-30',
+    'plans_page_hidden': False,
+  },
+  {
+    'title': 'Basic',
+    'price': 1200,
+    'privateRepos': 10,
+    'stripeId': 'small',
+    'audience': 'For your basic team',
+    'bus_features': False,
+    'deprecated': True,
+    'free_trial_days': 14,
+    'superseded_by': 'bus-micro-30',
+    'plans_page_hidden': False,
+  },
+  {
+    'title': 'Yacht',
+    'price': 5000,
+    'privateRepos': 20,
+    'stripeId': 'bus-coreos-trial',
+    'audience': 'For small businesses',
+    'bus_features': True,
+    'deprecated': True,
+    'free_trial_days': 180,
+    'superseded_by': 'bus-small-30',
+    'plans_page_hidden': False,
+  },
+  {
+    'title': 'Personal',
+    'price': 1200,
+    'privateRepos': 5,
+    'stripeId': 'personal',
+    'audience': 'Individuals',
+    'bus_features': False,
+    'deprecated': True,
+    'free_trial_days': 14,
+    'superseded_by': 'personal-30',
+    'plans_page_hidden': False,
+  },
+  {
+    'title': 'Skiff',
+    'price': 2500,
+    'privateRepos': 10,
+    'stripeId': 'bus-micro',
+    'audience': 'For startups',
+    'bus_features': True,
+    'deprecated': True,
+    'free_trial_days': 14,
+    'superseded_by': 'bus-micro-30',
+    'plans_page_hidden': False,
+  },
+  {
+    'title': 'Yacht',
+    'price': 5000,
+    'privateRepos': 20,
+    'stripeId': 'bus-small',
+    'audience': 'For small businesses',
+    'bus_features': True,
+    'deprecated': True,
+    'free_trial_days': 14,
+    'superseded_by': 'bus-small-30',
+    'plans_page_hidden': False,
+  },
+  {
+    'title': 'Freighter',
+    'price': 10000,
+    'privateRepos': 50,
+    'stripeId': 'bus-medium',
+    'audience': 'For normal businesses',
+    'bus_features': True,
+    'deprecated': True,
+    'free_trial_days': 14,
+    'superseded_by': 'bus-medium-30',
+    'plans_page_hidden': False,
+  },
+  {
+    'title': 'Tanker',
+    'price': 20000,
+    'privateRepos': 125,
+    'stripeId': 'bus-large',
+    'audience': 'For large businesses',
+    'bus_features': True,
+    'deprecated': True,
+    'free_trial_days': 14,
+    'superseded_by': 'bus-large-30',
+    'plans_page_hidden': False,
+  },
+
+  # Deprecated plans (2014-2017)
+  {
+    'title': 'Personal',
+    'price': 1200,
+    'privateRepos': 5,
+    'stripeId': 'personal-30',
+    'audience': 'Individuals',
+    'bus_features': False,
+    'deprecated': True,
+    'free_trial_days': 30,
+    'superseded_by': 'personal-2018',
+    'plans_page_hidden': False,
+  },
+  {
+    'title': 'Skiff',
+    'price': 2500,
+    'privateRepos': 10,
+    'stripeId': 'bus-micro-30',
+    'audience': 'For startups',
+    'bus_features': True,
+    'deprecated': True,
+    'free_trial_days': 30,
+    'superseded_by': 'bus-micro-2018',
+    'plans_page_hidden': False,
+  },
+  {
+    'title': 'Yacht',
+    'price': 5000,
+    'privateRepos': 20,
+    'stripeId': 'bus-small-30',
+    'audience': 'For small businesses',
+    'bus_features': True,
+    'deprecated': True,
+    'free_trial_days': 30,
+    'superseded_by': 'bus-small-2018',
+    'plans_page_hidden': False,
+  },
+  {
+    'title': 'Freighter',
+    'price': 10000,
+    'privateRepos': 50,
+    'stripeId': 'bus-medium-30',
+    'audience': 'For normal businesses',
+    'bus_features': True,
+    'deprecated': True,
+    'free_trial_days': 30,
+    'superseded_by': 'bus-medium-2018',
+    'plans_page_hidden': False,
+  },
+  {
+    'title': 'Tanker',
+    'price': 20000,
+    'privateRepos': 125,
+    'stripeId': 'bus-large-30',
+    'audience': 'For large businesses',
+    'bus_features': True,
+    'deprecated': True,
+    'free_trial_days': 30,
+    'superseded_by': 'bus-large-2018',
+    'plans_page_hidden': False,
+  },
+  {
+    'title': 'Carrier',
+    'price': 35000,
+    'privateRepos': 250,
+    'stripeId': 'bus-xlarge-30',
+    'audience': 'For extra large businesses',
+    'bus_features': True,
+    'deprecated': True,
+    'free_trial_days': 30,
+    'superseded_by': 'bus-xlarge-2018',
+    'plans_page_hidden': False,
+  },
+  {
+    'title': 'Huge',
+    'price': 65000,
+    'privateRepos': 500,
+    'stripeId': 'bus-500-30',
+    'audience': 'For huge business',
+    'bus_features': True,
+    'deprecated': True,
+    'free_trial_days': 30,
+    'superseded_by': 'bus-500-2018',
+    'plans_page_hidden': False,
+  },
+  {
+    'title': 'Huuge',
+    'price': 120000,
+    'privateRepos': 1000,
+    'stripeId': 'bus-1000-30',
+    'audience': 'For the SaaS savvy enterprise',
+    'bus_features': True,
+    'deprecated': True,
+    'free_trial_days': 30,
+    'superseded_by': 'bus-1000-2018',
+    'plans_page_hidden': False,
+  },
+
+  # Active plans (as of Dec 2017)
+  {
+    'title': 'Open Source',
+    'price': 0,
+    'privateRepos': 0,
+    'stripeId': 'free',
+    'audience': 'Committment to FOSS',
+    'bus_features': False,
+    'deprecated': False,
+    'free_trial_days': 30,
+    'superseded_by': None,
+    'plans_page_hidden': False,
+  },
+  {
+    'title': 'Developer',
+    'price': 1500,
+    'privateRepos': 5,
+    'stripeId': 'personal-2018',
+    'audience': 'Individuals',
+    'bus_features': False,
+    'deprecated': False,
+    'free_trial_days': 30,
+    'superseded_by': None,
+    'plans_page_hidden': False,
+  },
+  {
+    'title': 'Micro',
+    'price': 3000,
+    'privateRepos': 10,
+    'stripeId': 'bus-micro-2018',
+    'audience': 'For startups',
+    'bus_features': True,
+    'deprecated': False,
+    'free_trial_days': 30,
+    'superseded_by': None,
+    'plans_page_hidden': False,
+  },
+  {
+    'title': 'Small',
+    'price': 6000,
+    'privateRepos': 20,
+    'stripeId': 'bus-small-2018',
+    'audience': 'For small businesses',
+    'bus_features': True,
+    'deprecated': False,
+    'free_trial_days': 30,
+    'superseded_by': None,
+    'plans_page_hidden': False,
+  },
+  {
+    'title': 'Medium',
+    'price': 12500,
+    'privateRepos': 50,
+    'stripeId': 'bus-medium-2018',
+    'audience': 'For normal businesses',
+    'bus_features': True,
+    'deprecated': False,
+    'free_trial_days': 30,
+    'superseded_by': None,
+    'plans_page_hidden': False,
+  },
+  {
+    'title': 'Large',
+    'price': 25000,
+    'privateRepos': 125,
+    'stripeId': 'bus-large-2018',
+    'audience': 'For large businesses',
+    'bus_features': True,
+    'deprecated': False,
+    'free_trial_days': 30,
+    'superseded_by': None,
+    'plans_page_hidden': False,
+  },
+  {
+    'title': 'Extra Large',
+    'price': 45000,
+    'privateRepos': 250,
+    'stripeId': 'bus-xlarge-2018',
+    'audience': 'For extra large businesses',
+    'bus_features': True,
+    'deprecated': False,
+    'free_trial_days': 30,
+    'superseded_by': None,
+    'plans_page_hidden': False,
+  },
+  {
+    'title': 'XXL',
+    'price': 85000,
+    'privateRepos': 500,
+    'stripeId': 'bus-500-2018',
+    'audience': 'For huge business',
+    'bus_features': True,
+    'deprecated': False,
+    'free_trial_days': 30,
+    'superseded_by': None,
+    'plans_page_hidden': False,
+  },
+  {
+    'title': 'XXXL',
+    'price': 160000,
+    'privateRepos': 1000,
+    'stripeId': 'bus-1000-2018',
+    'audience': 'For the SaaS savvy enterprise',
+    'bus_features': True,
+    'deprecated': False,
+    'free_trial_days': 30,
+    'superseded_by': None,
+    'plans_page_hidden': False,
+  },
+  {
+    'title': 'XXXXL',
+    'price': 310000,
+    'privateRepos': 2000,
+    'stripeId': 'bus-2000-2018',
+    'audience': 'For the SaaS savvy big enterprise',
+    'bus_features': True,
+    'deprecated': False,
+    'free_trial_days': 30,
+    'superseded_by': None,
+    'plans_page_hidden': False,
+  },
+]
+
+
+def get_plan(plan_id):
+  """ Returns the plan with the given ID or None if none. """
+  for plan in PLANS:
+    if plan['stripeId'] == plan_id:
+      return plan
+
+  return None
+
+
+class FakeSubscription(AttrDict):
+  @classmethod
+  def build(cls, data, customer):
+    data = AttrDict.deep_copy(data)
+    data['customer'] = customer
+    return cls(data)
+
+  def delete(self):
+    self.customer.subscription = None
+
+
+class FakeStripe(object):
+  class Customer(AttrDict):
+    FAKE_PLAN = AttrDict({
+      'id': 'bus-small',
+    })
+
+    FAKE_SUBSCRIPTION = AttrDict({
+      'plan': FAKE_PLAN,
+      'current_period_start': timegm(datetime.utcnow().utctimetuple()),
+      'current_period_end': timegm((datetime.utcnow() + timedelta(days=30)).utctimetuple()),
+      'trial_start': timegm(datetime.utcnow().utctimetuple()),
+      'trial_end': timegm((datetime.utcnow() + timedelta(days=30)).utctimetuple()),
+    })
+
+    FAKE_CARD = AttrDict({
+      'id': 'card123',
+      'name': 'Joe User',
+      'type': 'Visa',
+      'last4': '4242',
+      'exp_month': 5,
+      'exp_year': 2016,
+    })
+
+    FAKE_CARD_LIST = AttrDict({
+      'data': [FAKE_CARD],
+    })
+
+    ACTIVE_CUSTOMERS = {}
+
+    @property
+    def card(self):
+      return self.get('new_card', None)
+
+    @card.setter
+    def card(self, card_token):
+      self['new_card'] = card_token
+
+    @property
+    def plan(self):
+      return self.get('new_plan', None)
+
+    @plan.setter
+    def plan(self, plan_name):
+      self['new_plan'] = plan_name
+
+    def save(self):
+      if self.get('new_card', None) is not None:
+        raise stripe.error.CardError('Test raising exception on set card.', self.get('new_card'), 402)
+      if self.get('new_plan', None) is not None:
+        if self.subscription is None:
+          self.subscription = FakeSubscription.build(self.FAKE_SUBSCRIPTION, self)
+        self.subscription.plan.id = self.get('new_plan')
+
+    @classmethod
+    def retrieve(cls, stripe_customer_id):
+      if stripe_customer_id in cls.ACTIVE_CUSTOMERS:
+        cls.ACTIVE_CUSTOMERS[stripe_customer_id].pop('new_card', None)
+        cls.ACTIVE_CUSTOMERS[stripe_customer_id].pop('new_plan', None)
+        return cls.ACTIVE_CUSTOMERS[stripe_customer_id]
+      else:
+        new_customer = cls({
+          'default_card': 'card123',
+          'cards': AttrDict.deep_copy(cls.FAKE_CARD_LIST),
+          'id': stripe_customer_id,
+        })
+        new_customer.subscription = FakeSubscription.build(cls.FAKE_SUBSCRIPTION, new_customer)
+        cls.ACTIVE_CUSTOMERS[stripe_customer_id] = new_customer
+        return new_customer
+
+  class Invoice(AttrDict):
+    @staticmethod
+    def list(customer, count):
+      return AttrDict({
+        'data': [],
+      })
+
+
+class Billing(object):
+  def __init__(self, app=None):
+    self.app = app
+    if app is not None:
+      self.state = self.init_app(app)
+    else:
+      self.state = None
+
+  def init_app(self, app):
+    billing_type = app.config.get('BILLING_TYPE', 'FakeStripe')
+
+    if billing_type == 'Stripe':
+      billing = stripe
+      stripe.api_key = app.config.get('STRIPE_SECRET_KEY', None)
+
+    elif billing_type == 'FakeStripe':
+      billing = FakeStripe
+
+    else:
+      raise RuntimeError('Unknown billing type: %s' % billing_type)
+
+    # register extension with app
+    app.extensions = getattr(app, 'extensions', {})
+    app.extensions['billing'] = billing
+    return billing
+
+  def __getattr__(self, name):
+    return getattr(self.state, name, None)
--- a/data/buildlogs.py
+++ b/data/buildlogs.py
@ -0,0 +1,179 @@
+import redis
+import json
+import time
+
+from contextlib import closing
+
+from util.dynamic import import_class
+from datetime import timedelta
+
+
+ONE_DAY = timedelta(days=1)
+SEVEN_DAYS = timedelta(days=7)
+
+
+class BuildStatusRetrievalError(Exception):
+  pass
+
+class RedisBuildLogs(object):
+  ERROR = 'error'
+  COMMAND = 'command'
+  PHASE = 'phase'
+
+  def __init__(self, redis_config):
+    self._redis_client = None
+    self._redis_config = redis_config
+
+  @property
+  def _redis(self):
+    if self._redis_client is not None:
+      return self._redis_client
+
+    args = dict(self._redis_config)
+    args.update({'socket_connect_timeout': 1,
+                 'socket_timeout': 2,
+                 'single_connection_client': True})
+
+    self._redis_client = redis.StrictRedis(**args)
+    return self._redis_client
+
+  @staticmethod
+  def _logs_key(build_id):
+    return 'builds/%s/logs' % build_id
+
+  def append_log_entry(self, build_id, log_obj):
+    """
+    Appends the serialized form of log_obj to the end of the log entry list
+    and returns the new length of the list.
+    """
+    pipeline = self._redis.pipeline(transaction=False)
+    pipeline.expire(self._logs_key(build_id), SEVEN_DAYS)
+    pipeline.rpush(self._logs_key(build_id), json.dumps(log_obj))
+    result = pipeline.execute()
+    return result[1]
+
+  def append_log_message(self, build_id, log_message, log_type=None, log_data=None):
+    """
+    Wraps the message in an envelope and push it to the end of the log entry
+    list and returns the index at which it was inserted.
+    """
+    log_obj = {
+      'message': log_message
+    }
+
+    if log_type:
+      log_obj['type'] = log_type
+
+    if log_data:
+      log_obj['data'] = log_data
+
+    return self.append_log_entry(build_id, log_obj) - 1
+
+  def get_log_entries(self, build_id, start_index):
+    """
+    Returns a tuple of the current length of the list and an iterable of the
+    requested log entries.
+    """
+    try:
+      llen = self._redis.llen(self._logs_key(build_id))
+      log_entries = self._redis.lrange(self._logs_key(build_id), start_index, -1)
+      return (llen, (json.loads(entry) for entry in log_entries))
+    except redis.RedisError as re:
+      raise BuildStatusRetrievalError('Cannot retrieve build logs: %s' % re)
+
+  def expire_status(self, build_id):
+    """
+    Sets the status entry to expire in 1 day.
+    """
+    self._redis.expire(self._status_key(build_id), ONE_DAY)
+
+  def expire_log_entries(self, build_id):
+    """
+    Sets the log entry to expire in 1 day.
+    """
+    self._redis.expire(self._logs_key(build_id), ONE_DAY)
+
+  def delete_log_entries(self, build_id):
+    """
+    Delete the log entry
+    """
+    self._redis.delete(self._logs_key(build_id))
+
+  @staticmethod
+  def _status_key(build_id):
+    return 'builds/%s/status' % build_id
+
+  def set_status(self, build_id, status_obj):
+    """
+    Sets the status key for this build to json serialized form of the supplied
+    obj.
+    """
+    self._redis.set(self._status_key(build_id), json.dumps(status_obj), ex=SEVEN_DAYS)
+
+  def get_status(self, build_id):
+    """
+    Loads the status information for the specified build id.
+    """
+    try:
+      fetched = self._redis.get(self._status_key(build_id))
+    except redis.RedisError as re:
+      raise BuildStatusRetrievalError('Cannot retrieve build status: %s' % re)
+
+    return json.loads(fetched) if fetched else None
+
+  @staticmethod
+  def _health_key():
+    return '_health'
+
+  def check_health(self):
+    try:
+      args = dict(self._redis_config)
+      args.update({'socket_connect_timeout': 1,
+                   'socket_timeout': 1,
+                   'single_connection_client': True})
+
+      with closing(redis.StrictRedis(**args)) as connection:
+        if not connection.ping():
+          return (False, 'Could not ping redis')
+
+        # Ensure we can write and read a key.
+        connection.set(self._health_key(), time.time())
+        connection.get(self._health_key())
+        return (True, None)
+    except redis.RedisError as re:
+      return (False, 'Could not connect to redis: %s' % re.message)
+
+
+class BuildLogs(object):
+  def __init__(self, app=None):
+    self.app = app
+    if app is not None:
+      self.state = self.init_app(app)
+    else:
+      self.state = None
+
+  def init_app(self, app):
+    buildlogs_config = app.config.get('BUILDLOGS_REDIS')
+    if not buildlogs_config:
+      # This is the old key name.
+      buildlogs_config = {
+        'host': app.config.get('BUILDLOGS_REDIS_HOSTNAME')
+      }
+
+    buildlogs_options = app.config.get('BUILDLOGS_OPTIONS', [])
+    buildlogs_import = app.config.get('BUILDLOGS_MODULE_AND_CLASS', None)
+
+    if buildlogs_import is None:
+      klass = RedisBuildLogs
+    else:
+      klass = import_class(buildlogs_import[0], buildlogs_import[1])
+
+    buildlogs = klass(buildlogs_config, *buildlogs_options)
+
+    # register extension with app
+    app.extensions = getattr(app, 'extensions', {})
+    app.extensions['buildlogs'] = buildlogs
+    return buildlogs
+
+  def __getattr__(self, name):
+    return getattr(self.state, name, None)
--- a/data/cache/init.py
+++ b/data/cache/init.py
@ -0,0 +1,23 @@
+from data.cache.impl import NoopDataModelCache, InMemoryDataModelCache, MemcachedModelCache
+
+def get_model_cache(config):
+  """ Returns a data model cache matching the given configuration. """
+  cache_config = config.get('DATA_MODEL_CACHE_CONFIG', {})
+  engine = cache_config.get('engine', 'noop')
+
+  if engine == 'noop':
+    return NoopDataModelCache()
+
+  if engine == 'inmemory':
+    return InMemoryDataModelCache()
+
+  if engine == 'memcached':
+    endpoint = cache_config.get('endpoint', None)
+    if endpoint is None:
+      raise Exception('Missing `endpoint` for memcached model cache configuration')
+
+    timeout = cache_config.get('timeout')
+    connect_timeout = cache_config.get('connect_timeout')
+    return MemcachedModelCache(endpoint, timeout=timeout, connect_timeout=connect_timeout)
+
+  raise Exception('Unknown model cache engine `%s`' % engine)
--- a/data/cache/cache_key.py
+++ b/data/cache/cache_key.py
@ -0,0 +1,27 @@
+from collections import namedtuple
+
+class CacheKey(namedtuple('CacheKey', ['key', 'expiration'])):
+  """ Defines a key into the data model cache. """
+  pass
+
+
+def for_repository_blob(namespace_name, repo_name, digest, version):
+  """ Returns a cache key for a blob in a repository. """
+  return CacheKey('repo_blob__%s_%s_%s_%s' % (namespace_name, repo_name, digest, version), '60s')
+
+
+def for_catalog_page(auth_context_key, start_id, limit):
+  """ Returns a cache key for a single page of a catalog lookup for an authed context. """
+  params = (auth_context_key or '(anon)', start_id or 0, limit or 0)
+  return CacheKey('catalog_page__%s_%s_%s' % params, '60s')
+
+
+def for_namespace_geo_restrictions(namespace_name):
+  """ Returns a cache key for the geo restrictions for a namespace. """
+  return CacheKey('geo_restrictions__%s' % (namespace_name), '240s')
+
+
+def for_active_repo_tags(repository_id, start_pagination_id, limit):
+  """ Returns a cache key for the active tags in a repository. """
+  return CacheKey('repo_active_tags__%s_%s_%s' % (repository_id, start_pagination_id, limit),
+                  '120s')
--- a/data/cache/impl.py
+++ b/data/cache/impl.py
@ -0,0 +1,146 @@
+import logging
+import json
+
+from datetime import datetime
+
+from abc import ABCMeta, abstractmethod
+from six import add_metaclass
+
+from pymemcache.client.base import Client
+
+from util.expiresdict import ExpiresDict
+from util.timedeltastring import convert_to_timedelta
+
+logger = logging.getLogger(__name__)
+
+
+def is_not_none(value):
+  return value is not None
+
+
+@add_metaclass(ABCMeta)
+class DataModelCache(object):
+  """ Defines an interface for cache storing and returning tuple data model objects. """
+
+  @abstractmethod
+  def retrieve(self, cache_key, loader, should_cache=is_not_none):
+    """ Checks the cache for the specified cache key and returns the value found (if any). If none
+        found, the loader is called to get a result and populate the cache.
+    """
+    pass
+
+
+class NoopDataModelCache(DataModelCache):
+  """ Implementation of the data model cache which does nothing. """
+
+  def retrieve(self, cache_key, loader, should_cache=is_not_none):
+    return loader()
+
+
+class InMemoryDataModelCache(DataModelCache):
+  """ Implementation of the data model cache backed by an in-memory dictionary. """
+  def __init__(self):
+    self.cache = ExpiresDict()
+
+  def retrieve(self, cache_key, loader, should_cache=is_not_none):
+    not_found = [None]
+    logger.debug('Checking cache for key %s', cache_key.key)
+    result = self.cache.get(cache_key.key, default_value=not_found)
+    if result != not_found:
+      logger.debug('Found result in cache for key %s: %s', cache_key.key, result)
+      return json.loads(result)
+
+    logger.debug('Found no result in cache for key %s; calling loader', cache_key.key)
+    result = loader()
+    logger.debug('Got loaded result for key %s: %s', cache_key.key, result)
+    if should_cache(result):
+      logger.debug('Caching loaded result for key %s with expiration %s: %s', cache_key.key,
+                   result, cache_key.expiration)
+      expires = convert_to_timedelta(cache_key.expiration) + datetime.now()
+      self.cache.set(cache_key.key, json.dumps(result), expires=expires)
+      logger.debug('Cached loaded result for key %s with expiration %s: %s', cache_key.key,
+                   result, cache_key.expiration)
+    else:
+      logger.debug('Not caching loaded result for key %s: %s', cache_key.key, result)
+
+    return result
+
+
+_DEFAULT_MEMCACHE_TIMEOUT = 1 # second
+_DEFAULT_MEMCACHE_CONNECT_TIMEOUT = 1 # second
+
+_STRING_TYPE = 1
+_JSON_TYPE = 2
+
+class MemcachedModelCache(DataModelCache):
+  """ Implementation of the data model cache backed by a memcached. """
+  def __init__(self, endpoint, timeout=_DEFAULT_MEMCACHE_TIMEOUT,
+               connect_timeout=_DEFAULT_MEMCACHE_CONNECT_TIMEOUT):
+    self.endpoint = endpoint
+    self.timeout = timeout
+    self.connect_timeout = connect_timeout
+    self.client = None
+
+  def _get_client(self):
+    client = self.client
+    if client is not None:
+      return client
+
+    try:
+      # Copied from the doc comment for Client.
+      def serialize_json(key, value):
+        if type(value) == str:
+          return value, _STRING_TYPE
+
+        return json.dumps(value), _JSON_TYPE
+
+      def deserialize_json(key, value, flags):
+        if flags == _STRING_TYPE:
+          return value
+
+        if flags == _JSON_TYPE:
+          return json.loads(value)
+
+        raise Exception("Unknown flags for value: {1}".format(flags))
+
+      self.client = Client(self.endpoint, no_delay=True, timeout=self.timeout,
+                           connect_timeout=self.connect_timeout,
+                           key_prefix='data_model_cache__',
+                           serializer=serialize_json,
+                           deserializer=deserialize_json,
+                           ignore_exc=True)
+      return self.client
+    except:
+      logger.exception('Got exception when creating memcached client to %s', self.endpoint)
+      return None
+
+  def retrieve(self, cache_key, loader, should_cache=is_not_none):
+    not_found = [None]
+    client = self._get_client()
+    if client is not None:
+      logger.debug('Checking cache for key %s', cache_key.key)
+      try:
+        result = client.get(cache_key.key, default=not_found)
+        if result != not_found:
+          logger.debug('Found result in cache for key %s: %s', cache_key.key, result)
+          return result
+      except:
+       logger.exception('Got exception when trying to retrieve key %s', cache_key.key)
+
+    logger.debug('Found no result in cache for key %s; calling loader', cache_key.key)
+    result = loader()
+    logger.debug('Got loaded result for key %s: %s', cache_key.key, result)
+    if client is not None and should_cache(result):
+      try:
+        logger.debug('Caching loaded result for key %s with expiration %s: %s', cache_key.key,
+                      result, cache_key.expiration)
+        expires = convert_to_timedelta(cache_key.expiration) if cache_key.expiration else None
+        client.set(cache_key.key, result, expire=int(expires.total_seconds()) if expires else None)
+        logger.debug('Cached loaded result for key %s with expiration %s: %s', cache_key.key,
+                      result, cache_key.expiration)
+      except:
+        logger.exception('Got exception when trying to set key %s to %s', cache_key.key, result)
+    else:
+      logger.debug('Not caching loaded result for key %s: %s', cache_key.key, result)
+
+    return result
--- a/data/cache/test/test_cache.py
+++ b/data/cache/test/test_cache.py
@ -0,0 +1,56 @@
+import pytest
+
+from mock import patch
+
+from data.cache import InMemoryDataModelCache, NoopDataModelCache, MemcachedModelCache
+from data.cache.cache_key import CacheKey
+
+class MockClient(object):
+  def __init__(self, server, **kwargs):
+    self.data = {}
+
+  def get(self, key, default=None):
+    return self.data.get(key, default)
+
+  def set(self, key, value, expire=None):
+    self.data[key] = value
+
+
+@pytest.mark.parametrize('cache_type', [
+  (NoopDataModelCache),
+  (InMemoryDataModelCache),
+])
+def test_caching(cache_type):
+  key = CacheKey('foo', '60m')
+  cache = cache_type()
+
+  # Perform two retrievals, and make sure both return.
+  assert cache.retrieve(key, lambda: {'a': 1234}) == {'a': 1234}
+  assert cache.retrieve(key, lambda: {'a': 1234}) == {'a': 1234}
+
+
+def test_memcache():
+  key = CacheKey('foo', '60m')
+  with patch('data.cache.impl.Client', MockClient):
+    cache = MemcachedModelCache(('127.0.0.1', '-1'))
+    assert cache.retrieve(key, lambda: {'a': 1234}) == {'a': 1234}
+    assert cache.retrieve(key, lambda: {'a': 1234}) == {'a': 1234}
+
+
+def test_memcache_should_cache():
+  key = CacheKey('foo', None)
+
+  def sc(value):
+    return value['a'] != 1234
+
+  with patch('data.cache.impl.Client', MockClient):
+    cache = MemcachedModelCache(('127.0.0.1', '-1'))
+    assert cache.retrieve(key, lambda: {'a': 1234}, should_cache=sc) == {'a': 1234}
+
+    # Ensure not cached since it was `1234`.
+    assert cache._get_client().get(key.key) is None
+
+    # Ensure cached.
+    assert cache.retrieve(key, lambda: {'a': 2345}, should_cache=sc) == {'a': 2345}
+    assert cache._get_client().get(key.key) is not None
+    assert cache.retrieve(key, lambda: {'a': 2345}, should_cache=sc) == {'a': 2345}
--- a/data/database.py
+++ b/data/database.py
--- a/data/encryption.py
+++ b/data/encryption.py
@ -0,0 +1,82 @@
+import os
+import logging
+import base64
+
+from collections import namedtuple
+from cryptography.hazmat.primitives.ciphers.aead import AESCCM
+
+from util.security.secret import convert_secret_key
+
+class DecryptionFailureException(Exception):
+  """ Exception raised if a field could not be decrypted. """
+
+
+EncryptionVersion = namedtuple('EncryptionVersion', ['prefix', 'encrypt', 'decrypt'])
+
+logger = logging.getLogger(__name__)
+
+
+_SEPARATOR = '$$'
+AES_CCM_NONCE_LENGTH = 13
+
+
+def _encrypt_ccm(secret_key, value, field_max_length=None):
+  aesccm = AESCCM(secret_key)
+  nonce = os.urandom(AES_CCM_NONCE_LENGTH)
+  ct = aesccm.encrypt(nonce, value.encode('utf-8'), None)
+  encrypted = base64.b64encode(nonce + ct)
+  if field_max_length:
+    msg = 'Tried to encode a value too large for this field'
+    assert (len(encrypted) + _RESERVED_FIELD_SPACE) <= field_max_length, msg
+
+  return encrypted
+
+
+def _decrypt_ccm(secret_key, value):
+  aesccm = AESCCM(secret_key)
+  try:
+    decoded = base64.b64decode(value)
+    nonce = decoded[:AES_CCM_NONCE_LENGTH]
+    ct = decoded[AES_CCM_NONCE_LENGTH:]
+    decrypted = aesccm.decrypt(nonce, ct, None)
+    return decrypted.decode('utf-8')
+  except Exception:
+    logger.exception('Got exception when trying to decrypt value `%s`', value)
+    raise DecryptionFailureException()
+
+
+# Defines the versions of encryptions we support. This will allow us to upgrade to newer encryption
+# protocols (fairly seamlessly) if need be in the future.
+_VERSIONS = {
+  'v0': EncryptionVersion('v0', _encrypt_ccm, _decrypt_ccm),
+}
+
+_RESERVED_FIELD_SPACE = len(_SEPARATOR) + max([len(k) for k in _VERSIONS.keys()])
+
+
+class FieldEncrypter(object):
+  """ Helper object for defining how fields are encrypted and decrypted between the database
+      and the application.
+  """
+  def __init__(self, secret_key, version='v0'):
+    self._secret_key = convert_secret_key(secret_key)
+    self._encryption_version = _VERSIONS[version]
+
+  def encrypt_value(self, value, field_max_length=None):
+    """ Encrypts the value using the current version of encryption. """
+    encrypted_value = self._encryption_version.encrypt(self._secret_key, value, field_max_length)
+    return '%s%s%s' % (self._encryption_version.prefix, _SEPARATOR, encrypted_value)
+
+  def decrypt_value(self, value):
+    """ Decrypts the value, returning it. If the value cannot be decrypted
+        raises a DecryptionFailureException.
+    """
+    if _SEPARATOR not in value:
+      raise DecryptionFailureException('Invalid encrypted value')
+
+    version_prefix, data = value.split(_SEPARATOR, 1)
+    if version_prefix not in _VERSIONS:
+      raise DecryptionFailureException('Unknown version prefix %s' % version_prefix)
+
+    return _VERSIONS[version_prefix].decrypt(self._secret_key, data)
+
--- a/data/fields.py
+++ b/data/fields.py
@ -0,0 +1,297 @@
+import base64
+import string
+import json
+
+from random import SystemRandom
+
+import bcrypt
+import resumablehashlib
+
+from peewee import TextField, CharField, SmallIntegerField
+from data.text import prefix_search
+
+
+def random_string(length=16):
+  random = SystemRandom()
+  return ''.join([random.choice(string.ascii_uppercase + string.digits)
+                  for _ in range(length)])
+
+
+class _ResumableSHAField(TextField):
+  def _create_sha(self):
+    raise NotImplementedError
+
+  def db_value(self, value):
+    if value is None:
+      return None
+
+    sha_state = value.state()
+
+    # One of the fields is a byte string, let's base64 encode it to make sure
+    # we can store and fetch it regardless of default collocation.
+    sha_state[3] = base64.b64encode(sha_state[3])
+
+    return json.dumps(sha_state)
+
+  def python_value(self, value):
+    if value is None:
+      return None
+
+    sha_state = json.loads(value)
+
+    # We need to base64 decode the data bytestring.
+    sha_state[3] = base64.b64decode(sha_state[3])
+    to_resume = self._create_sha()
+    to_resume.set_state(sha_state)
+    return to_resume
+
+
+class ResumableSHA256Field(_ResumableSHAField):
+  def _create_sha(self):
+    return resumablehashlib.sha256()
+
+
+class ResumableSHA1Field(_ResumableSHAField):
+  def _create_sha(self):
+    return resumablehashlib.sha1()
+
+
+class JSONField(TextField):
+  def db_value(self, value):
+    return json.dumps(value)
+
+  def python_value(self, value):
+    if value is None or value == "":
+      return {}
+    return json.loads(value)
+
+
+class Base64BinaryField(TextField):
+  def db_value(self, value):
+    if value is None:
+      return None
+    return base64.b64encode(value)
+
+  def python_value(self, value):
+    if value is None:
+      return None
+    return base64.b64decode(value)
+
+
+class DecryptedValue(object):
+  """ Wrapper around an already decrypted value to be placed into an encrypted field. """
+  def __init__(self, decrypted_value):
+    assert decrypted_value is not None
+    self.value = decrypted_value
+
+  def decrypt(self):
+    return self.value
+
+  def matches(self, unencrypted_value):
+    """ Returns whether the value of this field matches the unencrypted_value. """
+    return self.decrypt() == unencrypted_value
+
+
+class LazyEncryptedValue(object):
+  """ Wrapper around an encrypted value in an encrypted field. Will decrypt lazily. """
+  def __init__(self, encrypted_value, field):
+    self.encrypted_value = encrypted_value
+    self._field = field
+
+  def decrypt(self):
+    """ Decrypts the value. """
+    return self._field.model._meta.encrypter.decrypt_value(self.encrypted_value)
+
+  def matches(self, unencrypted_value):
+    """ Returns whether the value of this field matches the unencrypted_value. """
+    return self.decrypt() == unencrypted_value
+
+  def __eq__(self, _):
+    raise Exception('Disallowed operation; use `matches`')
+
+  def __mod__(self, _):
+    raise Exception('Disallowed operation; use `matches`')
+
+  def __pow__(self, _):
+    raise Exception('Disallowed operation; use `matches`')
+
+  def __contains__(self, _):
+    raise Exception('Disallowed operation; use `matches`')
+
+  def contains(self, _):
+    raise Exception('Disallowed operation; use `matches`')
+
+  def startswith(self, _):
+    raise Exception('Disallowed operation; use `matches`')
+
+  def endswith(self, _):
+    raise Exception('Disallowed operation; use `matches`')
+
+
+def _add_encryption(field_class, requires_length_check=True):
+  """ Adds support for encryption and decryption to the given field class. """
+  class indexed_class(field_class):
+    def __init__(self, default_token_length=None, *args, **kwargs):
+      def _generate_default():
+        return DecryptedValue(random_string(default_token_length))
+
+      if default_token_length is not None:
+        kwargs['default'] = _generate_default
+
+      field_class.__init__(self, *args, **kwargs)
+      assert not self.index
+
+    def db_value(self, value):
+      if value is None:
+        return None
+
+      if isinstance(value, LazyEncryptedValue):
+        return value.encrypted_value
+
+      if isinstance(value, DecryptedValue):
+        value = value.value
+
+      meta = self.model._meta
+      return meta.encrypter.encrypt_value(value, self.max_length if requires_length_check else None)
+
+    def python_value(self, value):
+      if value is None:
+        return None
+
+      return LazyEncryptedValue(value, self)
+
+    def __eq__(self, _):
+      raise Exception('Disallowed operation; use `matches`')
+
+    def __mod__(self, _):
+      raise Exception('Disallowed operation; use `matches`')
+
+    def __pow__(self, _):
+      raise Exception('Disallowed operation; use `matches`')
+
+    def __contains__(self, _):
+      raise Exception('Disallowed operation; use `matches`')
+
+    def contains(self, _):
+      raise Exception('Disallowed operation; use `matches`')
+
+    def startswith(self, _):
+      raise Exception('Disallowed operation; use `matches`')
+
+    def endswith(self, _):
+      raise Exception('Disallowed operation; use `matches`')
+
+  return indexed_class
+
+
+EncryptedCharField = _add_encryption(CharField)
+EncryptedTextField = _add_encryption(TextField, requires_length_check=False)
+
+
+class EnumField(SmallIntegerField):
+  def __init__(self, enum_type, *args, **kwargs):
+    kwargs.pop('index', None)
+
+    super(EnumField, self).__init__(index=True, *args, **kwargs)
+    self.enum_type = enum_type
+
+  def db_value(self, value):
+      """Convert the python value for storage in the database."""
+      return int(value.value)
+
+  def python_value(self, value):
+      """Convert the database value to a pythonic value."""
+      return self.enum_type(value) if value is not None else None
+
+  def clone_base(self, **kwargs):
+    return super(EnumField, self).clone_base(
+      enum_type=self.enum_type,
+      **kwargs)
+
+
+def _add_fulltext(field_class):
+  """ Adds support for full text indexing and lookup to the given field class. """
+  class indexed_class(field_class):
+    # Marker used by SQLAlchemy translation layer to add the proper index for full text searching.
+    __fulltext__ = True
+
+    def __init__(self, match_function, *args, **kwargs):
+      field_class.__init__(self, *args, **kwargs)
+      self.match_function = match_function
+
+    def match(self, query):
+      return self.match_function(self, query)
+
+    def match_prefix(self, query):
+      return prefix_search(self, query)
+
+    def __mod__(self, _):
+      raise Exception('Unsafe operation: Use `match` or `match_prefix`')
+
+    def __pow__(self, _):
+      raise Exception('Unsafe operation: Use `match` or `match_prefix`')
+
+    def __contains__(self, _):
+      raise Exception('Unsafe operation: Use `match` or `match_prefix`')
+
+    def contains(self, _):
+      raise Exception('Unsafe operation: Use `match` or `match_prefix`')
+
+    def startswith(self, _):
+      raise Exception('Unsafe operation: Use `match` or `match_prefix`')
+
+    def endswith(self, _):
+      raise Exception('Unsafe operation: Use `match` or `match_prefix`')
+
+  return indexed_class
+
+
+FullIndexedCharField = _add_fulltext(CharField)
+FullIndexedTextField = _add_fulltext(TextField)
+
+
+class Credential(object):
+  """ Credential represents a hashed credential. """
+  def __init__(self, hashed):
+    self.hashed = hashed
+
+  def matches(self, value):
+    """ Returns true if this credential matches the unhashed value given. """
+    return bcrypt.hashpw(value.encode('utf-8'), self.hashed) == self.hashed
+
+  @classmethod
+  def from_string(cls, string_value):
+    """ Returns a Credential object from an unhashed string value. """
+    return Credential(bcrypt.hashpw(string_value.encode('utf-8'), bcrypt.gensalt()))
+
+  @classmethod
+  def generate(cls, length=20):
+    """ Generates a new credential and returns it, along with its unhashed form. """
+    token = random_string(length)
+    return Credential.from_string(token), token
+
+
+class CredentialField(CharField):
+  """ A character field that stores crytographically hashed credentials that should never be
+      available to the user in plaintext after initial creation. This field automatically
+      provides verification.
+  """
+  def __init__(self, *args, **kwargs):
+    CharField.__init__(self, *args, **kwargs)
+    assert 'default' not in kwargs
+    assert not self.index
+
+  def db_value(self, value):
+    if value is None:
+      return None
+
+    if isinstance(value, basestring):
+      raise Exception('A string cannot be given to a CredentialField; please wrap in a Credential')
+
+    return value.hashed
+
+  def python_value(self, value):
+    if value is None:
+      return None
+
+    return Credential(value)
--- a/data/logs_model/init.py
+++ b/data/logs_model/init.py
@ -0,0 +1,64 @@
+import logging
+
+from data.logs_model.table_logs_model import TableLogsModel
+from data.logs_model.document_logs_model import DocumentLogsModel
+from data.logs_model.combined_model import CombinedLogsModel
+
+logger = logging.getLogger(__name__)
+
+
+def _transition_model(*args, **kwargs):
+  return CombinedLogsModel(
+    DocumentLogsModel(*args, **kwargs),
+    TableLogsModel(*args, **kwargs),
+  )
+
+
+_LOG_MODELS = {
+  'database': TableLogsModel,
+  'transition_reads_both_writes_es': _transition_model,
+  'elasticsearch': DocumentLogsModel,
+}
+
+_PULL_LOG_KINDS = {'pull_repo', 'repo_verb'}
+
+class LogsModelProxy(object):
+  def __init__(self):
+    self._model = None
+
+  def initialize(self, model):
+    self._model = model
+    logger.info('===============================')
+    logger.info('Using logs model `%s`', self._model)
+    logger.info('===============================')
+
+  def __getattr__(self, attr):
+    if not self._model:
+      raise AttributeError("LogsModelProxy is not initialized")
+    return getattr(self._model, attr)
+
+
+logs_model = LogsModelProxy()
+
+
+def configure(app_config):
+  logger.debug('Configuring log lodel')
+  model_name = app_config.get('LOGS_MODEL', 'database')
+  model_config = app_config.get('LOGS_MODEL_CONFIG', {})
+
+  def should_skip_logging(kind_name, namespace_name, is_free_namespace):
+    if namespace_name and namespace_name in app_config.get('DISABLED_FOR_AUDIT_LOGS', {}):
+      return True
+
+    if kind_name in _PULL_LOG_KINDS:
+      if namespace_name and namespace_name in app_config.get('DISABLED_FOR_PULL_LOGS', {}):
+        return True
+
+      if app_config.get('FEATURE_DISABLE_PULL_LOGS_FOR_FREE_NAMESPACES'):
+        if is_free_namespace:
+          return True
+
+    return False
+
+  model_config['should_skip_logging'] = should_skip_logging
+  logs_model.initialize(_LOG_MODELS[model_name](**model_config))
--- a/data/logs_model/combined_model.py
+++ b/data/logs_model/combined_model.py
@ -0,0 +1,132 @@
+import logging
+import itertools
+
+from data.logs_model.datatypes import AggregatedLogCount, LogEntriesPage
+from data.logs_model.interface import ActionLogsDataInterface
+from data.logs_model.shared import SharedModel
+
+logger = logging.getLogger(__name__)
+
+
+def _merge_aggregated_log_counts(*args):
+  """ Merge two lists of AggregatedLogCount based on the value of their kind_id and datetime.
+  """
+  matching_keys = {}
+  aggregated_log_counts_list = itertools.chain.from_iterable(args)
+
+  def canonical_key_from_kind_date_tuple(kind_id, dt):
+    """ Return a comma separated key from an AggregatedLogCount's kind_id and datetime. """
+    return str(kind_id) + ',' + str(dt)
+
+  for kind_id, count, dt in aggregated_log_counts_list:
+    kind_date_key = canonical_key_from_kind_date_tuple(kind_id, dt)
+    if kind_date_key in matching_keys:
+      existing_count = matching_keys[kind_date_key][2]
+      matching_keys[kind_date_key] = (kind_id, dt, existing_count + count)
+    else:
+      matching_keys[kind_date_key] = (kind_id, dt, count)
+
+  return [AggregatedLogCount(kind_id, count, dt) for (kind_id, dt, count) in matching_keys.values()]
+
+
+class CombinedLogsModel(SharedModel, ActionLogsDataInterface):
+  """
+  CombinedLogsModel implements the data model that logs to the first logs model and reads from
+  both.
+  """
+
+  def __init__(self, read_write_logs_model, read_only_logs_model):
+    self.read_write_logs_model = read_write_logs_model
+    self.read_only_logs_model = read_only_logs_model
+
+  def log_action(self, kind_name, namespace_name=None, performer=None, ip=None, metadata=None,
+                 repository=None, repository_name=None, timestamp=None, is_free_namespace=False):
+    return self.read_write_logs_model.log_action(kind_name, namespace_name, performer, ip, metadata,
+                                                 repository, repository_name, timestamp,
+                                                 is_free_namespace)
+
+  def count_repository_actions(self, repository, day):
+    rw_count = self.read_write_logs_model.count_repository_actions(repository, day)
+    ro_count = self.read_only_logs_model.count_repository_actions(repository, day)
+    return rw_count + ro_count
+
+  def get_aggregated_log_counts(self, start_datetime, end_datetime, performer_name=None,
+                                repository_name=None, namespace_name=None, filter_kinds=None):
+    rw_model = self.read_write_logs_model
+    ro_model = self.read_only_logs_model
+    rw_count = rw_model.get_aggregated_log_counts(start_datetime, end_datetime,
+                                                  performer_name=performer_name,
+                                                  repository_name=repository_name,
+                                                  namespace_name=namespace_name,
+                                                  filter_kinds=filter_kinds)
+    ro_count = ro_model.get_aggregated_log_counts(start_datetime, end_datetime,
+                                                  performer_name=performer_name,
+                                                  repository_name=repository_name,
+                                                  namespace_name=namespace_name,
+                                                  filter_kinds=filter_kinds)
+    return _merge_aggregated_log_counts(rw_count, ro_count)
+
+  def yield_logs_for_export(self, start_datetime, end_datetime, repository_id=None,
+                            namespace_id=None, max_query_time=None):
+    rw_model = self.read_write_logs_model
+    ro_model = self.read_only_logs_model
+    rw_logs = rw_model.yield_logs_for_export(start_datetime, end_datetime, repository_id,
+                                             namespace_id, max_query_time)
+    ro_logs = ro_model.yield_logs_for_export(start_datetime, end_datetime, repository_id,
+                                             namespace_id, max_query_time)
+    for batch in itertools.chain(rw_logs, ro_logs):
+      yield batch
+
+  def lookup_logs(self, start_datetime, end_datetime, performer_name=None, repository_name=None,
+                  namespace_name=None, filter_kinds=None, page_token=None, max_page_count=None):
+    rw_model = self.read_write_logs_model
+    ro_model = self.read_only_logs_model
+
+    page_token = page_token or {}
+
+    new_page_token = {}
+    if page_token is None or not page_token.get('under_readonly_model', False):
+      rw_page_token = page_token.get('readwrite_page_token')
+      rw_logs = rw_model.lookup_logs(start_datetime, end_datetime, performer_name,
+                                     repository_name, namespace_name, filter_kinds,
+                                     rw_page_token, max_page_count)
+      logs, next_page_token = rw_logs
+      new_page_token['under_readonly_model'] = next_page_token is None
+      new_page_token['readwrite_page_token'] = next_page_token
+      return LogEntriesPage(logs, new_page_token)
+    else:
+      readonly_page_token = page_token.get('readonly_page_token')
+      ro_logs = ro_model.lookup_logs(start_datetime, end_datetime, performer_name,
+                                     repository_name, namespace_name, filter_kinds,
+                                     readonly_page_token, max_page_count)
+      logs, next_page_token = ro_logs
+      if next_page_token is None:
+        return LogEntriesPage(logs, None)
+
+      new_page_token['under_readonly_model'] = True
+      new_page_token['readonly_page_token'] = next_page_token
+      return LogEntriesPage(logs, new_page_token)
+
+  def lookup_latest_logs(self, performer_name=None, repository_name=None, namespace_name=None,
+                         filter_kinds=None, size=20):
+    latest_logs = []
+    rw_model = self.read_write_logs_model
+    ro_model = self.read_only_logs_model
+
+    rw_logs = rw_model.lookup_latest_logs(performer_name, repository_name, namespace_name,
+                                          filter_kinds, size)
+    latest_logs.extend(rw_logs)
+    if len(latest_logs) < size:
+      ro_logs = ro_model.lookup_latest_logs(performer_name, repository_name, namespace_name,
+                                            filter_kinds, size - len(latest_logs))
+      latest_logs.extend(ro_logs)
+
+    return latest_logs
+
+  def yield_log_rotation_context(self, cutoff_date, min_logs_per_rotation):
+    ro_model = self.read_only_logs_model
+    rw_model = self.read_write_logs_model
+    ro_ctx = ro_model.yield_log_rotation_context(cutoff_date, min_logs_per_rotation)
+    rw_ctx = rw_model.yield_log_rotation_context(cutoff_date, min_logs_per_rotation)
+    for ctx in itertools.chain(ro_ctx, rw_ctx):
+      yield ctx
--- a/data/logs_model/datatypes.py
+++ b/data/logs_model/datatypes.py
@ -0,0 +1,155 @@
+import json
+
+from calendar import timegm
+from collections import namedtuple
+from email.utils import formatdate
+
+from cachetools.func import lru_cache
+
+from data import model
+from util.morecollections import AttrDict
+
+
+def _format_date(date):
+  """ Output an RFC822 date format. """
+  if date is None:
+    return None
+
+  return formatdate(timegm(date.utctimetuple()))
+
+
+@lru_cache(maxsize=1)
+def _kinds():
+  return model.log.get_log_entry_kinds()
+
+
+class LogEntriesPage(namedtuple('LogEntriesPage', ['logs', 'next_page_token'])):
+  """ Represents a page returned by the lookup_logs call. The `logs` contains the logs
+      found for the page and `next_page_token`, if not None, contains the token to be
+      encoded and returned for the followup call.
+  """
+
+
+class Log(namedtuple('Log', [
+    'metadata_json', 'ip', 'datetime', 'performer_email', 'performer_username', 'performer_robot',
+    'account_organization', 'account_username', 'account_email', 'account_robot', 'kind_id'])):
+  """ Represents a single log entry returned by the logs model. """
+
+  @classmethod
+  def for_logentry(cls, log):
+    account_organization = None
+    account_username = None
+    account_email = None
+    account_robot = None
+
+    try:
+      account_organization = log.account.organization
+      account_username = log.account.username
+      account_email = log.account.email
+      account_robot = log.account.robot
+    except AttributeError:
+      pass
+
+    performer_robot = None
+    performer_username = None
+    performer_email = None
+
+    try:
+      performer_robot = log.performer.robot
+      performer_username = log.performer.username
+      performer_email = log.performer.email
+    except AttributeError:
+      pass
+
+    return Log(log.metadata_json, log.ip, log.datetime, performer_email, performer_username,
+               performer_robot, account_organization, account_username, account_email,
+               account_robot, log.kind_id)
+
+  @classmethod
+  def for_elasticsearch_log(cls, log, id_user_map):
+    account_organization = None
+    account_username = None
+    account_email = None
+    account_robot = None
+
+    try:
+      if log.account_id:
+        account = id_user_map[log.account_id]
+        account_organization = account.organization
+        account_username = account.username
+        account_email = account.email
+        account_robot = account.robot
+    except AttributeError:
+      pass
+
+    performer_robot = None
+    performer_username = None
+    performer_email = None
+
+    try:
+      if log.performer_id:
+        performer = id_user_map[log.performer_id]
+        performer_robot = performer.robot
+        performer_username = performer.username
+        performer_email = performer.email
+    except AttributeError:
+      pass
+
+    return Log(log.metadata_json, str(log.ip), log.datetime, performer_email, performer_username,
+               performer_robot, account_organization, account_username, account_email,
+               account_robot, log.kind_id)
+
+  def to_dict(self, avatar, include_namespace=False):
+    view = {
+      'kind': _kinds()[self.kind_id],
+      'metadata': json.loads(self.metadata_json),
+      'ip': self.ip,
+      'datetime': _format_date(self.datetime),
+    }
+
+    if self.performer_username:
+      performer = AttrDict({'username': self.performer_username, 'email': self.performer_email})
+      performer.robot = None
+      if self.performer_robot:
+        performer.robot = self.performer_robot
+
+      view['performer'] = {
+        'kind': 'user',
+        'name': self.performer_username,
+        'is_robot': self.performer_robot,
+        'avatar': avatar.get_data_for_user(performer),
+      }
+
+    if include_namespace:
+      if self.account_username:
+        account = AttrDict({'username': self.account_username, 'email': self.account_email})
+        if self.account_organization:
+
+          view['namespace'] = {
+            'kind': 'org',
+            'name': self.account_username,
+            'avatar': avatar.get_data_for_org(account),
+          }
+        else:
+          account.robot = None
+          if self.account_robot:
+            account.robot = self.account_robot
+          view['namespace'] = {
+            'kind': 'user',
+            'name': self.account_username,
+            'avatar': avatar.get_data_for_user(account),
+          }
+
+    return view
+
+
+class AggregatedLogCount(namedtuple('AggregatedLogCount', ['kind_id', 'count', 'datetime'])):
+  """ Represents the aggregated count of the number of logs, of a particular kind, on a day. """
+  def to_dict(self):
+    view = {
+      'kind': _kinds()[self.kind_id],
+      'count': self.count,
+      'datetime': _format_date(self.datetime),
+    }
+
+    return view
--- a/data/logs_model/document_logs_model.py
+++ b/data/logs_model/document_logs_model.py
@ -0,0 +1,532 @@
+# pylint: disable=protected-access
+
+import json
+import logging
+import uuid
+
+from time import time
+from datetime import timedelta, datetime, date
+from dateutil.parser import parse as parse_datetime
+
+from abc import ABCMeta, abstractmethod
+from six import add_metaclass
+
+from elasticsearch.exceptions import ConnectionTimeout, NotFoundError
+
+from data import model
+from data.database import CloseForLongOperation
+from data.model import config
+from data.model.log import (_json_serialize, ACTIONS_ALLOWED_WITHOUT_AUDIT_LOGGING,
+                            DataModelException)
+from data.logs_model.elastic_logs import LogEntry, configure_es
+from data.logs_model.datatypes import Log, AggregatedLogCount, LogEntriesPage
+from data.logs_model.interface import (ActionLogsDataInterface, LogRotationContextInterface,
+                                       LogsIterationTimeout)
+from data.logs_model.shared import SharedModel, epoch_ms
+
+from data.logs_model.logs_producer import LogProducerProxy, LogSendException
+from data.logs_model.logs_producer.kafka_logs_producer import KafkaLogsProducer
+from data.logs_model.logs_producer.elasticsearch_logs_producer import ElasticsearchLogsProducer
+from data.logs_model.logs_producer.kinesis_stream_logs_producer import KinesisStreamLogsProducer
+
+
+logger = logging.getLogger(__name__)
+
+PAGE_SIZE = 20
+DEFAULT_RESULT_WINDOW = 5000
+MAX_RESULT_WINDOW = 10000
+
+# DATE_RANGE_LIMIT is to limit the query date time range to at most 1 month.
+DATE_RANGE_LIMIT = 32
+
+# Timeout for count_repository_actions
+COUNT_REPOSITORY_ACTION_TIMEOUT = 30
+
+
+
+def _date_range_descending(start_datetime, end_datetime, includes_end_datetime=False):
+  """ Generate the dates between `end_datetime` and `start_datetime`.
+
+  If `includes_end_datetime` is set, the generator starts at `end_datetime`,
+  otherwise, starts the generator at `end_datetime` minus 1 second.
+  """
+  assert end_datetime >= start_datetime
+  start_date = start_datetime.date()
+
+  if includes_end_datetime:
+    current_date = end_datetime.date()
+  else:
+    current_date = (end_datetime - timedelta(seconds=1)).date()
+
+  while current_date >= start_date:
+    yield current_date
+    current_date = current_date - timedelta(days=1)
+
+
+def _date_range_in_single_index(dt1, dt2):
+  """ Determine whether a single index can be searched given a range
+  of dates or datetimes. If date instances are given, difference should be 1 day.
+
+  NOTE: dt2 is exclusive to the search result set.
+  i.e. The date range is larger or equal to dt1 and strictly smaller than dt2
+  """
+  assert isinstance(dt1, date) and isinstance(dt2, date)
+
+  dt = dt2 - dt1
+
+  # Check if date or datetime
+  if not isinstance(dt1, datetime) and not isinstance(dt2, datetime):
+    return dt == timedelta(days=1)
+
+  if dt < timedelta(days=1) and dt >= timedelta(days=0):
+    return dt2.day == dt1.day
+
+  # Check if datetime can be interpreted as a date: hour, minutes, seconds or microseconds set to 0
+  if dt == timedelta(days=1):
+    return dt1.hour == 0 and dt1.minute == 0 and dt1.second == 0 and dt1.microsecond == 0
+
+  return False
+
+
+def _for_elasticsearch_logs(logs, repository_id=None, namespace_id=None):
+  namespace_ids = set()
+  for log in logs:
+    namespace_ids.add(log.account_id)
+    namespace_ids.add(log.performer_id)
+    assert namespace_id is None or log.account_id == namespace_id
+    assert repository_id is None or log.repository_id == repository_id
+
+  id_user_map = model.user.get_user_map_by_ids(namespace_ids)
+  return [Log.for_elasticsearch_log(log, id_user_map) for log in logs]
+
+
+def _random_id():
+  """ Generates a unique uuid4 string for the random_id field in LogEntry.
+  It is used as tie-breaker for sorting logs based on datetime:
+  https://www.elastic.co/guide/en/elasticsearch/reference/current/search-request-search-after.html
+  """
+  return str(uuid.uuid4())
+
+
+@add_metaclass(ABCMeta)
+class ElasticsearchLogsModelInterface(object):
+  """
+  Interface for Elasticsearch specific operations with the logs model.
+  These operations are usually index based.
+  """
+
+  @abstractmethod
+  def can_delete_index(self, index, cutoff_date):
+    """ Return whether the given index is older than the given cutoff date. """
+
+  @abstractmethod
+  def list_indices(self):
+    """ List the logs model's indices. """
+
+
+class DocumentLogsModel(SharedModel, ActionLogsDataInterface, ElasticsearchLogsModelInterface):
+  """
+  DocumentLogsModel implements the data model for the logs API backed by an
+  elasticsearch service.
+  """
+  def __init__(self, should_skip_logging=None, elasticsearch_config=None, producer=None, **kwargs):
+    self._should_skip_logging = should_skip_logging
+    self._logs_producer = LogProducerProxy()
+    self._es_client = configure_es(**elasticsearch_config)
+
+    if producer == 'kafka':
+      kafka_config = kwargs['kafka_config']
+      self._logs_producer.initialize(KafkaLogsProducer(**kafka_config))
+    elif producer == 'elasticsearch':
+      self._logs_producer.initialize(ElasticsearchLogsProducer())
+    elif producer == 'kinesis_stream':
+      kinesis_stream_config = kwargs['kinesis_stream_config']
+      self._logs_producer.initialize(KinesisStreamLogsProducer(**kinesis_stream_config))
+    else:
+      raise Exception('Invalid log producer: %s' % producer)
+
+  @staticmethod
+  def _get_ids_by_names(repository_name, namespace_name, performer_name):
+    """ Retrieve repository/namespace/performer ids based on their names.
+        throws DataModelException when the namespace_name does not match any
+        user in the database.
+        returns database ID or None if not exists.
+    """
+    repository_id = None
+    account_id = None
+    performer_id = None
+
+    if repository_name and namespace_name:
+      repository = model.repository.get_repository(namespace_name, repository_name)
+      if repository:
+        repository_id = repository.id
+        account_id = repository.namespace_user.id
+
+    if namespace_name and account_id is None:
+      account = model.user.get_user_or_org(namespace_name)
+      if account is None:
+        raise DataModelException('Invalid namespace requested')
+
+      account_id = account.id
+
+    if performer_name:
+      performer = model.user.get_user(performer_name)
+      if performer:
+        performer_id = performer.id
+
+    return repository_id, account_id, performer_id
+
+  def _base_query(self, performer_id=None, repository_id=None, account_id=None, filter_kinds=None,
+                  index=None):
+    if filter_kinds is not None:
+      assert all(isinstance(kind_name, str) for kind_name in filter_kinds)
+
+    if index is not None:
+      search = LogEntry.search(index=index)
+    else:
+      search = LogEntry.search()
+
+    if performer_id is not None:
+      assert isinstance(performer_id, int)
+      search = search.filter('term', performer_id=performer_id)
+
+    if repository_id is not None:
+      assert isinstance(repository_id, int)
+      search = search.filter('term', repository_id=repository_id)
+
+    if account_id is not None and repository_id is None:
+      assert isinstance(account_id, int)
+      search = search.filter('term', account_id=account_id)
+
+    if filter_kinds is not None:
+      kind_map = model.log.get_log_entry_kinds()
+      ignore_ids = [kind_map[kind_name] for kind_name in filter_kinds]
+      search = search.exclude('terms', kind_id=ignore_ids)
+
+    return search
+
+  def _base_query_date_range(self, start_datetime, end_datetime, performer_id, repository_id,
+                             account_id, filter_kinds, index=None):
+    skip_datetime_check = False
+    if _date_range_in_single_index(start_datetime, end_datetime):
+      index = self._es_client.index_name(start_datetime)
+      skip_datetime_check = self._es_client.index_exists(index)
+
+    if index and (skip_datetime_check or self._es_client.index_exists(index)):
+      search = self._base_query(performer_id, repository_id, account_id, filter_kinds,
+                                index=index)
+    else:
+      search = self._base_query(performer_id, repository_id, account_id, filter_kinds)
+
+    if not skip_datetime_check:
+      search = search.query('range', datetime={'gte': start_datetime, 'lt': end_datetime})
+
+    return search
+
+  def _load_logs_for_day(self, logs_date, performer_id, repository_id, account_id, filter_kinds,
+                         after_datetime=None, after_random_id=None, size=PAGE_SIZE):
+    index = self._es_client.index_name(logs_date)
+    if not self._es_client.index_exists(index):
+      return []
+
+    search = self._base_query(performer_id, repository_id, account_id, filter_kinds,
+                              index=index)
+    search = search.sort({'datetime': 'desc'}, {'random_id.keyword': 'desc'})
+    search = search.extra(size=size)
+
+    if after_datetime is not None and after_random_id is not None:
+      after_datetime_epoch_ms = epoch_ms(after_datetime)
+      search = search.extra(search_after=[after_datetime_epoch_ms, after_random_id])
+
+    return search.execute()
+
+  def _load_latest_logs(self, performer_id, repository_id, account_id, filter_kinds, size):
+    """ Return the latest logs from Elasticsearch.
+
+    Look at indices up to theset logrotateworker threshold, or up to 30 days if not defined.
+    """
+    # Set the last index to check to be the logrotateworker threshold, or 30 days
+    end_datetime = datetime.now()
+    start_datetime = end_datetime - timedelta(days=DATE_RANGE_LIMIT)
+
+    latest_logs = []
+    for day in _date_range_descending(start_datetime, end_datetime, includes_end_datetime=True):
+      try:
+        logs = self._load_logs_for_day(day, performer_id, repository_id, account_id, filter_kinds,
+                                       size=size)
+        latest_logs.extend(logs)
+      except NotFoundError:
+        continue
+
+      if len(latest_logs) >= size:
+        break
+
+    return _for_elasticsearch_logs(latest_logs[:size], repository_id, account_id)
+
+  def lookup_logs(self, start_datetime, end_datetime, performer_name=None, repository_name=None,
+                  namespace_name=None, filter_kinds=None, page_token=None, max_page_count=None):
+    assert start_datetime is not None and end_datetime is not None
+
+    # Check for a valid combined model token when migrating online from a combined model
+    if page_token is not None and page_token.get('readwrite_page_token') is not None:
+      page_token = page_token.get('readwrite_page_token')
+
+    if page_token is not None and max_page_count is not None:
+      page_number = page_token.get('page_number')
+      if page_number is not None and page_number + 1 > max_page_count:
+        return LogEntriesPage([], None)
+
+    repository_id, account_id, performer_id = DocumentLogsModel._get_ids_by_names(
+      repository_name, namespace_name, performer_name)
+
+    after_datetime = None
+    after_random_id = None
+    if page_token is not None:
+      after_datetime = parse_datetime(page_token['datetime'])
+      after_random_id = page_token['random_id']
+
+    if after_datetime is not None:
+      end_datetime = min(end_datetime, after_datetime)
+
+    all_logs = []
+
+    with CloseForLongOperation(config.app_config):
+      for current_date in _date_range_descending(start_datetime, end_datetime):
+        try:
+          logs = self._load_logs_for_day(current_date, performer_id, repository_id, account_id,
+                                         filter_kinds, after_datetime, after_random_id,
+                                         size=PAGE_SIZE+1)
+
+          all_logs.extend(logs)
+        except NotFoundError:
+          continue
+
+        if len(all_logs) > PAGE_SIZE:
+          break
+
+    next_page_token = None
+    all_logs = all_logs[0:PAGE_SIZE+1]
+
+    if len(all_logs) == PAGE_SIZE + 1:
+      # The last element in the response is used to check if there's more elements.
+      # The second element in the response is used as the pagination token because search_after does
+      # not include the exact match, and so the next page will start with the last element.
+      # This keeps the behavior exactly the same as table_logs_model, so that
+      # the caller can expect when a pagination token is non-empty, there must be
+      # at least 1 log to be retrieved.
+      next_page_token = {
+        'datetime': all_logs[-2].datetime.isoformat(),
+        'random_id': all_logs[-2].random_id,
+        'page_number': page_token['page_number'] + 1 if page_token else 1,
+      }
+
+    return LogEntriesPage(_for_elasticsearch_logs(all_logs[:PAGE_SIZE], repository_id, account_id),
+                          next_page_token)
+
+  def lookup_latest_logs(self, performer_name=None, repository_name=None, namespace_name=None,
+                         filter_kinds=None, size=20):
+    repository_id, account_id, performer_id = DocumentLogsModel._get_ids_by_names(
+      repository_name, namespace_name, performer_name)
+
+    with CloseForLongOperation(config.app_config):
+      latest_logs = self._load_latest_logs(performer_id, repository_id, account_id, filter_kinds,
+                                           size)
+
+    return latest_logs
+
+
+  def get_aggregated_log_counts(self, start_datetime, end_datetime, performer_name=None,
+                                repository_name=None, namespace_name=None, filter_kinds=None):
+    if end_datetime - start_datetime >= timedelta(days=DATE_RANGE_LIMIT):
+      raise Exception('Cannot lookup aggregated logs over a period longer than a month')
+
+    repository_id, account_id, performer_id = DocumentLogsModel._get_ids_by_names(
+      repository_name, namespace_name, performer_name)
+
+    with CloseForLongOperation(config.app_config):
+      search = self._base_query_date_range(start_datetime, end_datetime, performer_id,
+                                           repository_id, account_id, filter_kinds)
+      search.aggs.bucket('by_id', 'terms', field='kind_id').bucket('by_date', 'date_histogram',
+                                                                   field='datetime', interval='day')
+      # es returns all buckets when size=0
+      search = search.extra(size=0)
+      resp = search.execute()
+
+    if not resp.aggregations:
+      return []
+
+    counts = []
+    by_id = resp.aggregations['by_id']
+
+    for id_bucket in by_id.buckets:
+      for date_bucket in id_bucket.by_date.buckets:
+        if date_bucket.doc_count > 0:
+          counts.append(AggregatedLogCount(id_bucket.key, date_bucket.doc_count, date_bucket.key))
+
+    return counts
+
+  def count_repository_actions(self, repository, day):
+    index = self._es_client.index_name(day)
+    search = self._base_query_date_range(day, day + timedelta(days=1),
+                                         None,
+                                         repository.id,
+                                         None,
+                                         None,
+                                         index=index)
+    search = search.params(request_timeout=COUNT_REPOSITORY_ACTION_TIMEOUT)
+
+    try:
+      return search.count()
+    except NotFoundError:
+      return 0
+
+  def log_action(self, kind_name, namespace_name=None, performer=None, ip=None, metadata=None,
+                 repository=None, repository_name=None, timestamp=None, is_free_namespace=False):
+    if self._should_skip_logging and self._should_skip_logging(kind_name, namespace_name,
+                                                               is_free_namespace):
+      return
+
+    if repository_name is not None:
+      assert repository is None
+      assert namespace_name is not None
+      repository = model.repository.get_repository(namespace_name, repository_name)
+
+    if timestamp is None:
+      timestamp = datetime.today()
+
+    account_id = None
+    performer_id = None
+    repository_id = None
+
+    if namespace_name is not None:
+      account_id = model.user.get_namespace_user(namespace_name).id
+
+    if performer is not None:
+      performer_id = performer.id
+
+    if repository is not None:
+      repository_id = repository.id
+
+    metadata_json = json.dumps(metadata or {}, default=_json_serialize)
+    kind_id = model.log._get_log_entry_kind(kind_name)
+    log = LogEntry(random_id=_random_id(), kind_id=kind_id, account_id=account_id,
+                   performer_id=performer_id, ip=ip, metadata_json=metadata_json,
+                   repository_id=repository_id, datetime=timestamp)
+
+    try:
+      self._logs_producer.send(log)
+    except LogSendException as lse:
+      strict_logging_disabled = config.app_config.get('ALLOW_PULLS_WITHOUT_STRICT_LOGGING')
+      logger.exception('log_action failed', extra=({'exception': lse}).update(log.to_dict()))
+      if not (strict_logging_disabled and kind_name in ACTIONS_ALLOWED_WITHOUT_AUDIT_LOGGING):
+        raise
+
+  def yield_logs_for_export(self, start_datetime, end_datetime, repository_id=None,
+                            namespace_id=None, max_query_time=None):
+    max_query_time = max_query_time.total_seconds() if max_query_time is not None else 300
+    search = self._base_query_date_range(start_datetime, end_datetime, None, repository_id,
+                                         namespace_id, None)
+
+    def raise_on_timeout(batch_generator):
+      start = time()
+      for batch in batch_generator:
+        elapsed = time() - start
+        if elapsed > max_query_time:
+          logger.error('Retrieval of logs `%s/%s` timed out with time of `%s`', namespace_id,
+                       repository_id, elapsed)
+          raise LogsIterationTimeout()
+
+        yield batch
+        start = time()
+
+    def read_batch(scroll):
+      batch = []
+      for log in scroll:
+        batch.append(log)
+        if len(batch) == DEFAULT_RESULT_WINDOW:
+          yield _for_elasticsearch_logs(batch, repository_id=repository_id,
+                                        namespace_id=namespace_id)
+          batch = []
+
+      if batch:
+        yield _for_elasticsearch_logs(batch, repository_id=repository_id, namespace_id=namespace_id)
+
+    search = search.params(size=DEFAULT_RESULT_WINDOW, request_timeout=max_query_time)
+
+    try:
+      with CloseForLongOperation(config.app_config):
+        for batch in raise_on_timeout(read_batch(search.scan())):
+          yield batch
+    except ConnectionTimeout:
+      raise LogsIterationTimeout()
+
+  def can_delete_index(self, index, cutoff_date):
+    return self._es_client.can_delete_index(index, cutoff_date)
+
+  def list_indices(self):
+    return self._es_client.list_indices()
+
+  def yield_log_rotation_context(self, cutoff_date, min_logs_per_rotation):
+    """ Yield a context manager for a group of outdated logs. """
+    all_indices = self.list_indices()
+    for index in all_indices:
+      if not self.can_delete_index(index, cutoff_date):
+        continue
+
+      context = ElasticsearchLogRotationContext(index, min_logs_per_rotation, self._es_client)
+      yield context
+
+
+class ElasticsearchLogRotationContext(LogRotationContextInterface):
+  """
+  ElasticsearchLogRotationContext yield batch of logs from an index.
+
+  When completed without exceptions, this context will delete its associated
+  Elasticsearch index.
+  """
+  def __init__(self, index, min_logs_per_rotation, es_client):
+    self._es_client = es_client
+    self.min_logs_per_rotation = min_logs_per_rotation
+    self.index = index
+
+    self.start_pos = 0
+    self.end_pos = 0
+
+    self.scroll = None
+
+  def __enter__(self):
+    search = self._base_query()
+    self.scroll = search.scan()
+    return self
+
+  def __exit__(self, ex_type, ex_value, ex_traceback):
+    if ex_type is None and ex_value is None and ex_traceback is None:
+      logger.debug('Deleting index %s', self.index)
+      self._es_client.delete_index(self.index)
+
+  def yield_logs_batch(self):
+    def batched_logs(gen, size):
+      batch = []
+      for log in gen:
+        batch.append(log)
+        if len(batch) == size:
+          yield batch
+          batch = []
+
+      if batch:
+        yield batch
+
+    for batch in batched_logs(self.scroll, self.min_logs_per_rotation):
+      self.end_pos = self.start_pos + len(batch) - 1
+      yield batch, self._generate_filename()
+      self.start_pos = self.end_pos + 1
+
+  def _base_query(self):
+    search = LogEntry.search(index=self.index)
+    return search
+
+  def _generate_filename(self):
+    """ Generate the filenames used to archive the action logs. """
+    filename = '%s_%d-%d' % (self.index, self.start_pos, self.end_pos)
+    filename = '.'.join((filename, 'txt.gz'))
+    return filename
--- a/data/logs_model/elastic_logs.py
+++ b/data/logs_model/elastic_logs.py
@ -0,0 +1,255 @@
+import os
+import logging
+import re
+from datetime import datetime, timedelta
+
+from requests_aws4auth import AWS4Auth
+
+from elasticsearch import RequestsHttpConnection
+from elasticsearch.exceptions import NotFoundError, AuthorizationException
+from elasticsearch_dsl import Index, Document, Integer, Date, Text, Ip, Keyword
+from elasticsearch_dsl.connections import connections
+
+
+logger = logging.getLogger(__name__)
+
+# Name of the connection used for Elasticearch's template API
+ELASTICSEARCH_TEMPLATE_CONNECTION_ALIAS = 'logentry_template'
+
+# Prefix of autogenerated indices
+INDEX_NAME_PREFIX = 'logentry_'
+
+# Time-based index date format
+INDEX_DATE_FORMAT = '%Y-%m-%d'
+
+# Timeout for default connection
+ELASTICSEARCH_DEFAULT_CONNECTION_TIMEOUT = 15
+
+# Timeout for template api Connection
+ELASTICSEARCH_TEMPLATE_CONNECTION_TIMEOUT = 60
+
+# Force an index template update
+ELASTICSEARCH_FORCE_INDEX_TEMPLATE_UPDATE = os.environ.get('FORCE_INDEX_TEMPLATE_UPDATE', '')
+
+# Valid index prefix pattern
+VALID_INDEX_PATTERN = r'^((?!\.$|\.\.$|[-_+])([^A-Z:\/*?\"<>|,# ]){1,255})$'
+
+
+class LogEntry(Document):
+  # random_id is the tie-breaker for sorting in pagination.
+  # random_id is also used for deduplication of records when using a "at-least-once" delivery stream.
+  # Reference: https://www.elastic.co/guide/en/elasticsearch/reference/current/search-request-search-after.html
+  #
+  # We use don't use the _id of a document since a `doc_values` is not build for this field:
+  # An on-disk data structure that stores the same data in a columnar format
+  # for optimized sorting and aggregations.
+  # Reference: https://github.com/elastic/elasticsearch/issues/35369
+  random_id = Text(fields={'keyword': Keyword()})
+  kind_id = Integer()
+  account_id = Integer()
+  performer_id = Integer()
+  repository_id = Integer()
+  ip = Ip()
+  metadata_json = Text()
+  datetime = Date()
+
+  _initialized = False
+
+  @classmethod
+  def init(cls, index_prefix, index_settings=None, skip_template_init=False):
+    """
+    Create the index template, and populate LogEntry's mapping and index settings.
+    """
+    wildcard_index = Index(name=index_prefix + '*')
+    wildcard_index.settings(**(index_settings or {}))
+    wildcard_index.document(cls)
+    cls._index = wildcard_index
+    cls._index_prefix = index_prefix
+
+    if not skip_template_init:
+      cls.create_or_update_template()
+
+    # Since the elasticsearch-dsl API requires the document's index being defined as an inner class at the class level,
+    # this function needs to be called first before being able to call `save`.
+    cls._initialized = True
+
+  @classmethod
+  def create_or_update_template(cls):
+    assert cls._index and cls._index_prefix
+    index_template = cls._index.as_template(cls._index_prefix)
+    index_template.save(using=ELASTICSEARCH_TEMPLATE_CONNECTION_ALIAS)
+
+  def save(self, **kwargs):
+    # We group the logs based on year, month and day as different indexes, so that
+    # dropping those indexes based on retention range is easy.
+    #
+    # NOTE: This is only used if logging directly to Elasticsearch
+    #       When using Kinesis or Kafka, the consumer of these streams
+    #       will be responsible for the management of the indices' lifecycle.
+    assert LogEntry._initialized
+    kwargs['index'] = self.datetime.strftime(self._index_prefix + INDEX_DATE_FORMAT)
+    return super(LogEntry, self).save(**kwargs)
+
+
+class ElasticsearchLogs(object):
+  """
+  Model for logs operations stored in an Elasticsearch cluster.
+  """
+
+  def __init__(self, host=None, port=None, access_key=None, secret_key=None, aws_region=None,
+               index_settings=None, use_ssl=True, index_prefix=INDEX_NAME_PREFIX):
+    # For options in index_settings, refer to:
+    # https://www.elastic.co/guide/en/elasticsearch/guide/master/_index_settings.html
+    # some index settings are set at index creation time, and therefore, you should NOT
+    # change those settings once the index is set.
+    self._host = host
+    self._port = port
+    self._access_key = access_key
+    self._secret_key = secret_key
+    self._aws_region = aws_region
+    self._index_prefix = index_prefix
+    self._index_settings = index_settings
+    self._use_ssl = use_ssl
+
+    self._client = None
+    self._initialized = False
+
+  def _initialize(self):
+    """
+    Initialize a connection to an ES cluster and
+    creates an index template if it does not exist.
+    """
+    if not self._initialized:
+      http_auth = None
+      if self._access_key and self._secret_key and self._aws_region:
+        http_auth = AWS4Auth(self._access_key, self._secret_key, self._aws_region, 'es')
+      elif self._access_key and self._secret_key:
+        http_auth = (self._access_key, self._secret_key)
+      else:
+        logger.warn("Connecting to Elasticsearch without HTTP auth")
+
+      self._client = connections.create_connection(
+        hosts=[{
+          'host': self._host,
+          'port': self._port
+        }],
+        http_auth=http_auth,
+        use_ssl=self._use_ssl,
+        verify_certs=True,
+        connection_class=RequestsHttpConnection,
+        timeout=ELASTICSEARCH_DEFAULT_CONNECTION_TIMEOUT,
+      )
+
+      # Create a second connection with a timeout of 60s vs 10s.
+      # For some reason the PUT template API can take anywhere between
+      # 10s and 30s on the test cluster.
+      # This only needs to be done once to initialize the index template
+      connections.create_connection(
+        alias=ELASTICSEARCH_TEMPLATE_CONNECTION_ALIAS,
+        hosts=[{
+          'host': self._host,
+          'port': self._port
+        }],
+        http_auth=http_auth,
+        use_ssl=self._use_ssl,
+        verify_certs=True,
+        connection_class=RequestsHttpConnection,
+        timeout=ELASTICSEARCH_TEMPLATE_CONNECTION_TIMEOUT,
+      )
+
+      try:
+        force_template_update = ELASTICSEARCH_FORCE_INDEX_TEMPLATE_UPDATE.lower() == 'true'
+        self._client.indices.get_template(self._index_prefix)
+        LogEntry.init(self._index_prefix, self._index_settings,
+                      skip_template_init=not force_template_update)
+      except NotFoundError:
+        LogEntry.init(self._index_prefix, self._index_settings, skip_template_init=False)
+      finally:
+        try:
+          connections.remove_connection(ELASTICSEARCH_TEMPLATE_CONNECTION_ALIAS)
+        except KeyError as ke:
+          logger.exception('Elasticsearch connection not found to remove %s: %s',
+                           ELASTICSEARCH_TEMPLATE_CONNECTION_ALIAS, ke)
+
+      self._initialized = True
+
+  def index_name(self, day):
+    """ Return an index name for the given day. """
+    return self._index_prefix + day.strftime(INDEX_DATE_FORMAT)
+
+  def index_exists(self, index):
+    try:
+      return index in self._client.indices.get(index)
+    except NotFoundError:
+      return False
+
+  @staticmethod
+  def _valid_index_prefix(prefix):
+    """ Check that the given index prefix is valid with the set of
+    indices used by this class.
+    """
+    return re.match(VALID_INDEX_PATTERN, prefix) is not None
+
+  def _valid_index_name(self, index):
+    """ Check that the given index name is valid and follows the format:
+    <index_prefix>YYYY-MM-DD
+    """
+    if not ElasticsearchLogs._valid_index_prefix(index):
+      return False
+
+    if not index.startswith(self._index_prefix) or len(index) > 255:
+      return False
+
+    index_dt_str = index.split(self._index_prefix, 1)[-1]
+    try:
+      datetime.strptime(index_dt_str, INDEX_DATE_FORMAT)
+      return True
+    except ValueError:
+      logger.exception('Invalid date format (YYYY-MM-DD) for index: %s', index)
+      return False
+
+  def can_delete_index(self, index, cutoff_date):
+    """ Check if the given index can be deleted based on the given index's date and cutoff date. """
+    assert self._valid_index_name(index)
+    index_dt = datetime.strptime(index[len(self._index_prefix):], INDEX_DATE_FORMAT)
+    return index_dt < cutoff_date and cutoff_date - index_dt >= timedelta(days=1)
+
+  def list_indices(self):
+    self._initialize()
+    try:
+      return self._client.indices.get(self._index_prefix + '*').keys()
+    except NotFoundError as nfe:
+      logger.exception('`%s` indices not found: %s', self._index_prefix, nfe.info)
+      return []
+    except AuthorizationException as ae:
+      logger.exception('Unauthorized for indices `%s`: %s', self._index_prefix, ae.info)
+      return None
+
+  def delete_index(self, index):
+    self._initialize()
+    assert self._valid_index_name(index)
+
+    try:
+      self._client.indices.delete(index)
+      return index
+    except NotFoundError as nfe:
+      logger.exception('`%s` indices not found: %s', index, nfe.info)
+      return None
+    except AuthorizationException as ae:
+      logger.exception('Unauthorized to delete index `%s`: %s', index, ae.info)
+      return None
+
+
+def configure_es(host, port, access_key=None, secret_key=None, aws_region=None,
+                 index_prefix=None, use_ssl=True, index_settings=None):
+  """
+  For options in index_settings, refer to:
+  https://www.elastic.co/guide/en/elasticsearch/guide/master/_index_settings.html
+  some index settings are set at index creation time, and therefore, you should NOT
+  change those settings once the index is set.
+  """
+  es_client = ElasticsearchLogs(host=host, port=port, access_key=access_key, secret_key=secret_key,
+                                aws_region=aws_region, index_prefix=index_prefix or INDEX_NAME_PREFIX,
+                                use_ssl=use_ssl, index_settings=index_settings)
+  es_client._initialize()
+  return es_client
--- a/data/logs_model/inmemory_model.py
+++ b/data/logs_model/inmemory_model.py
@ -0,0 +1,244 @@
+import logging
+import json
+
+from collections import namedtuple
+from datetime import datetime
+from tzlocal import get_localzone
+from dateutil.relativedelta import relativedelta
+
+from data import model
+from data.logs_model.datatypes import AggregatedLogCount, LogEntriesPage, Log
+from data.logs_model.interface import (ActionLogsDataInterface, LogRotationContextInterface,
+                                       LogsIterationTimeout)
+
+logger = logging.getLogger(__name__)
+
+LogAndRepository = namedtuple('LogAndRepository', ['log', 'stored_log', 'repository'])
+
+StoredLog = namedtuple('StoredLog', ['kind_id',
+                                     'account_id',
+                                     'performer_id',
+                                     'ip',
+                                     'metadata_json',
+                                     'repository_id',
+                                     'datetime'])
+
+class InMemoryModel(ActionLogsDataInterface):
+  """
+  InMemoryModel implements the data model for logs in-memory. FOR TESTING ONLY.
+  """
+  def __init__(self):
+    self.logs = []
+
+  def _filter_logs(self, start_datetime, end_datetime, performer_name=None, repository_name=None,
+                   namespace_name=None, filter_kinds=None):
+    if filter_kinds is not None:
+      assert all(isinstance(kind_name, str) for kind_name in filter_kinds)
+
+    for log_and_repo in self.logs:
+      if log_and_repo.log.datetime < start_datetime or log_and_repo.log.datetime > end_datetime:
+        continue
+
+      if performer_name and log_and_repo.log.performer_username != performer_name:
+        continue
+
+      if (repository_name and
+          (not log_and_repo.repository or log_and_repo.repository.name != repository_name)):
+        continue
+
+      if namespace_name and log_and_repo.log.account_username != namespace_name:
+        continue
+
+      if filter_kinds:
+        kind_map = model.log.get_log_entry_kinds()
+        ignore_ids = [kind_map[kind_name] for kind_name in filter_kinds]
+        if log_and_repo.log.kind_id in ignore_ids:
+          continue
+
+      yield log_and_repo
+
+  def _filter_latest_logs(self, performer_name=None, repository_name=None,
+                          namespace_name=None, filter_kinds=None):
+    if filter_kinds is not None:
+      assert all(isinstance(kind_name, str) for kind_name in filter_kinds)
+
+    for log_and_repo in sorted(self.logs, key=lambda t: t.log.datetime, reverse=True):
+      if performer_name and log_and_repo.log.performer_username != performer_name:
+        continue
+
+      if (repository_name and
+          (not log_and_repo.repository or log_and_repo.repository.name != repository_name)):
+        continue
+
+      if namespace_name and log_and_repo.log.account_username != namespace_name:
+        continue
+
+      if filter_kinds:
+        kind_map = model.log.get_log_entry_kinds()
+        ignore_ids = [kind_map[kind_name] for kind_name in filter_kinds]
+        if log_and_repo.log.kind_id in ignore_ids:
+          continue
+
+      yield log_and_repo
+
+  def lookup_logs(self, start_datetime, end_datetime, performer_name=None, repository_name=None,
+                  namespace_name=None, filter_kinds=None, page_token=None, max_page_count=None):
+    logs = []
+    for log_and_repo in self._filter_logs(start_datetime, end_datetime, performer_name,
+                                          repository_name, namespace_name, filter_kinds):
+      logs.append(log_and_repo.log)
+    return LogEntriesPage(logs, None)
+
+  def lookup_latest_logs(self, performer_name=None, repository_name=None, namespace_name=None,
+                         filter_kinds=None, size=20):
+    latest_logs = []
+    for log_and_repo in self._filter_latest_logs(performer_name, repository_name, namespace_name,
+                                                 filter_kinds):
+      if size is not None and len(latest_logs) == size:
+        break
+
+      latest_logs.append(log_and_repo.log)
+
+    return latest_logs
+
+  def get_aggregated_log_counts(self, start_datetime, end_datetime, performer_name=None,
+                                repository_name=None, namespace_name=None, filter_kinds=None):
+    entries = {}
+    for log_and_repo in self._filter_logs(start_datetime, end_datetime, performer_name,
+                                          repository_name, namespace_name, filter_kinds):
+      entry = log_and_repo.log
+      synthetic_date = datetime(start_datetime.year, start_datetime.month, int(entry.datetime.day),
+                                tzinfo=get_localzone())
+      if synthetic_date.day < start_datetime.day:
+        synthetic_date = synthetic_date + relativedelta(months=1)
+
+      key = '%s-%s' % (entry.kind_id, entry.datetime.day)
+
+      if key in entries:
+        entries[key] = AggregatedLogCount(entry.kind_id, entries[key].count + 1,
+                                          synthetic_date)
+      else:
+        entries[key] = AggregatedLogCount(entry.kind_id, 1, synthetic_date)
+
+    return entries.values()
+
+  def count_repository_actions(self, repository, day):
+    count = 0
+    for log_and_repo in self.logs:
+      if log_and_repo.repository != repository:
+        continue
+
+      if log_and_repo.log.datetime.day != day.day:
+        continue
+
+      count += 1
+
+    return count
+
+  def queue_logs_export(self, start_datetime, end_datetime, export_action_logs_queue,
+                        namespace_name=None, repository_name=None, callback_url=None,
+                        callback_email=None, filter_kinds=None):
+    raise NotImplementedError
+
+  def log_action(self, kind_name, namespace_name=None, performer=None, ip=None, metadata=None,
+                 repository=None, repository_name=None, timestamp=None, is_free_namespace=False):
+    timestamp = timestamp or datetime.today()
+
+    if not repository and repository_name and namespace_name:
+      repository = model.repository.get_repository(namespace_name, repository_name)
+
+    account = None
+    account_id = None
+    performer_id = None
+    repository_id = None
+
+    if namespace_name is not None:
+      account = model.user.get_namespace_user(namespace_name)
+      account_id = account.id
+
+    if performer is not None:
+      performer_id = performer.id
+
+    if repository is not None:
+      repository_id = repository.id
+
+    metadata_json = json.dumps(metadata or {})
+    kind_id = model.log.get_log_entry_kinds()[kind_name]
+
+    stored_log = StoredLog(
+      kind_id,
+      account_id,
+      performer_id,
+      ip,
+      metadata_json,
+      repository_id,
+      timestamp
+    )
+
+    log = Log(metadata_json=metadata,
+              ip=ip,
+              datetime=timestamp,
+              performer_email=performer.email if performer else None,
+              performer_username=performer.username if performer else None,
+              performer_robot=performer.robot if performer else None,
+              account_organization=account.organization if account else None,
+              account_username=account.username if account else None,
+              account_email=account.email if account else None,
+              account_robot=account.robot if account else None,
+              kind_id=kind_id)
+
+    self.logs.append(LogAndRepository(log, stored_log, repository))
+
+  def yield_logs_for_export(self, start_datetime, end_datetime, repository_id=None,
+                            namespace_id=None, max_query_time=None):
+    # Just for testing.
+    if max_query_time is not None:
+      raise LogsIterationTimeout()
+
+    logs = []
+    for log_and_repo in self._filter_logs(start_datetime, end_datetime):
+      if (repository_id and
+          (not log_and_repo.repository or log_and_repo.repository.id != repository_id)):
+        continue
+
+      if namespace_id:
+        if log_and_repo.log.account_username is None:
+          continue
+
+        namespace = model.user.get_namespace_user(log_and_repo.log.account_username)
+        if namespace.id != namespace_id:
+          continue
+
+      logs.append(log_and_repo.log)
+
+    yield logs
+
+  def yield_log_rotation_context(self, cutoff_date, min_logs_per_rotation):
+    expired_logs = [log_and_repo for log_and_repo in self.logs
+                    if log_and_repo.log.datetime <= cutoff_date]
+    while True:
+      if not expired_logs:
+        break
+      context = InMemoryLogRotationContext(expired_logs[:min_logs_per_rotation], self.logs)
+      expired_logs = expired_logs[min_logs_per_rotation:]
+      yield context
+
+
+class InMemoryLogRotationContext(LogRotationContextInterface):
+  def __init__(self, expired_logs, all_logs):
+    self.expired_logs = expired_logs
+    self.all_logs = all_logs
+
+  def __enter__(self):
+    return self
+
+  def __exit__(self, ex_type, ex_value, ex_traceback):
+    if ex_type is None and ex_value is None and ex_traceback is None:
+      for log in self.expired_logs:
+        self.all_logs.remove(log)
+
+  def yield_logs_batch(self):
+    """ Yield a batch of logs and a filename for that batch. """
+    filename = 'inmemory_model_filename_placeholder'
+    filename = '.'.join((filename, 'txt.gz'))
+    yield [log_and_repo.stored_log for log_and_repo in self.expired_logs], filename
--- a/data/logs_model/interface.py
+++ b/data/logs_model/interface.py
@ -0,0 +1,95 @@
+from abc import ABCMeta, abstractmethod
+from six import add_metaclass
+
+class LogsIterationTimeout(Exception):
+  """ Exception raised if logs iteration times out. """
+
+
+@add_metaclass(ABCMeta)
+class ActionLogsDataInterface(object):
+  """ Interface for code to work with the logs data model. The logs data model consists
+      of all access for reading and writing action logs.
+  """
+  @abstractmethod
+  def lookup_logs(self, start_datetime, end_datetime, performer_name=None, repository_name=None,
+                  namespace_name=None, filter_kinds=None, page_token=None, max_page_count=None):
+    """ Looks up all logs between the start_datetime and end_datetime, filtered
+        by performer (a user), repository or namespace. Note that one (and only one) of the three
+        can be specified. Returns a LogEntriesPage. `filter_kinds`, if specified, is a set/list
+        of the kinds of logs to filter out.
+    """
+
+  @abstractmethod
+  def lookup_latest_logs(self, performer_name=None, repository_name=None, namespace_name=None,
+                         filter_kinds=None, size=20):
+    """ Looks up latest logs of a specific kind, filtered by performer (a user),
+        repository or namespace. Note that one (and only one) of the three can be specified.
+        Returns a list of `Log`.
+    """
+
+  @abstractmethod
+  def get_aggregated_log_counts(self, start_datetime, end_datetime, performer_name=None,
+                                repository_name=None, namespace_name=None, filter_kinds=None):
+    """ Returns the aggregated count of logs, by kind, between the start_datetime and end_datetime,
+        filtered by performer (a user), repository or namespace. Note that one (and only one) of
+        the three can be specified. Returns a list of AggregatedLogCount.
+    """
+
+  @abstractmethod
+  def count_repository_actions(self, repository, day):
+    """ Returns the total number of repository actions over the given day, in the given repository
+        or None on error.
+    """
+
+  @abstractmethod
+  def queue_logs_export(self, start_datetime, end_datetime, export_action_logs_queue,
+                        namespace_name=None, repository_name=None, callback_url=None,
+                        callback_email=None, filter_kinds=None):
+    """ Queues logs between the start_datetime and end_time, filtered by a repository or namespace,
+        for export to the specified URL and/or email address. Returns the ID of the export job
+        queued or None if error.
+    """
+
+  @abstractmethod
+  def log_action(self, kind_name, namespace_name=None, performer=None, ip=None, metadata=None,
+                 repository=None, repository_name=None, timestamp=None, is_free_namespace=False):
+    """ Logs a single action as having taken place. """
+
+  @abstractmethod
+  def yield_logs_for_export(self, start_datetime, end_datetime, repository_id=None,
+                            namespace_id=None, max_query_time=None):
+    """ Returns an iterator that yields bundles of all logs found between the start_datetime and
+        end_datetime, optionally filtered by the repository or namespace. This function should be
+        used for any bulk lookup operations, and should be implemented by implementors to put
+        minimal strain on the backing storage for large operations. If there was an error in setting
+        up, returns None.
+
+        If max_query_time is specified, each iteration that yields a log bundle will have its
+        queries run with a maximum timeout of that specified, and, if any exceed that threshold,
+        LogsIterationTimeout will be raised instead of returning the logs bundle.
+    """
+
+  @abstractmethod
+  def yield_log_rotation_context(self, cutoff_date, min_logs_per_rotation):
+    """
+    A generator that yields contexts implementing the LogRotationContextInterface.
+    Each context represents a set of logs to be archived and deleted once
+    the context completes without exceptions.
+
+    For database logs, the LogRotationContext abstracts over a set of rows. When the context
+    finishes, its associated rows get deleted.
+
+    For Elasticsearch logs, the LogRotationContext abstracts over indices. When the context
+    finishes, its associated index gets deleted.
+    """
+
+
+@add_metaclass(ABCMeta)
+class LogRotationContextInterface(object):
+  """ Interface for iterating over a set of logs to be archived. """
+  @abstractmethod
+  def yield_logs_batch(self):
+    """
+    Generator yielding batch of logs and a filename for that batch.
+    A batch is a subset of the logs part of the context.
+    """
--- a/data/logs_model/logs_producer/init.py
+++ b/data/logs_model/logs_producer/init.py
@ -0,0 +1,27 @@
+import logging
+
+
+logger = logging.getLogger(__name__)
+
+
+class LogSendException(Exception):
+  """ A generic error when sending the logs to its destination.
+  e.g. Kinesis, Kafka, Elasticsearch, ...
+  """
+  pass
+
+
+class LogProducerProxy(object):
+  def __init__(self):
+    self._model = None
+
+  def initialize(self, model):
+    self._model = model
+    logger.info('===============================')
+    logger.info('Using producer `%s`', self._model)
+    logger.info('===============================')
+
+  def __getattr__(self, attr):
+    if not self._model:
+      raise AttributeError("LogsModelProxy is not initialized")
+    return getattr(self._model, attr)
--- a/data/logs_model/logs_producer/elasticsearch_logs_producer.py
+++ b/data/logs_model/logs_producer/elasticsearch_logs_producer.py
@ -0,0 +1,25 @@
+import logging
+
+from elasticsearch.exceptions import ElasticsearchException
+
+from data.logs_model.logs_producer.interface import LogProducerInterface
+from data.logs_model.logs_producer import LogSendException
+
+
+logger = logging.getLogger(__name__)
+
+
+class ElasticsearchLogsProducer(LogProducerInterface):
+  """ Log producer writing log entries to Elasticsearch.
+
+  This implementation writes directly to Elasticsearch without a streaming/queueing service.
+  """
+  def send(self, logentry):
+    try:
+      logentry.save()
+    except ElasticsearchException as ex:
+      logger.exception('ElasticsearchLogsProducer error sending log to Elasticsearch: %s', ex)
+      raise LogSendException('ElasticsearchLogsProducer error sending log to Elasticsearch: %s' % ex)
+    except Exception as e:
+      logger.exception('ElasticsearchLogsProducer exception sending log to Elasticsearch: %s', e)
+      raise LogSendException('ElasticsearchLogsProducer exception sending log to Elasticsearch: %s' % e)
--- a/data/logs_model/logs_producer/interface.py
+++ b/data/logs_model/logs_producer/interface.py
@ -0,0 +1,8 @@
+from abc import ABCMeta, abstractmethod
+from six import add_metaclass
+
+@add_metaclass(ABCMeta)
+class LogProducerInterface(object):
+  @abstractmethod
+  def send(self, logentry):
+    """ Send a log entry to the configured log infrastructure. """
--- a/data/logs_model/logs_producer/kafka_logs_producer.py
+++ b/data/logs_model/logs_producer/kafka_logs_producer.py
@ -0,0 +1,45 @@
+import logging
+
+from kafka.errors import KafkaError, KafkaTimeoutError
+from kafka import KafkaProducer
+
+from data.logs_model.shared import epoch_ms
+from data.logs_model.logs_producer.interface import LogProducerInterface
+from data.logs_model.logs_producer.util import logs_json_serializer
+from data.logs_model.logs_producer import LogSendException
+
+
+logger = logging.getLogger(__name__)
+
+DEFAULT_MAX_BLOCK_SECONDS = 5
+
+
+class KafkaLogsProducer(LogProducerInterface):
+  """ Log producer writing log entries to a Kafka stream. """
+  def __init__(self, bootstrap_servers=None, topic=None, client_id=None, max_block_seconds=None):
+    self.bootstrap_servers = bootstrap_servers
+    self.topic = topic
+    self.client_id = client_id
+    self.max_block_ms = (max_block_seconds or DEFAULT_MAX_BLOCK_SECONDS) * 1000
+
+    self._producer = KafkaProducer(bootstrap_servers=self.bootstrap_servers,
+                                   client_id=self.client_id,
+                                   max_block_ms=self.max_block_ms,
+                                   value_serializer=logs_json_serializer)
+
+  def send(self, logentry):
+    try:
+      # send() has a (max_block_ms) timeout and get() has a (max_block_ms) timeout
+      # for an upper bound of 2x(max_block_ms) before guaranteed delivery
+      future = self._producer.send(self.topic, logentry.to_dict(), timestamp_ms=epoch_ms(logentry.datetime))
+      record_metadata = future.get(timeout=self.max_block_ms)
+      assert future.succeeded
+    except KafkaTimeoutError as kte:
+      logger.exception('KafkaLogsProducer timeout sending log to Kafka: %s', kte)
+      raise LogSendException('KafkaLogsProducer timeout sending log to Kafka: %s' % kte)
+    except KafkaError as ke:
+      logger.exception('KafkaLogsProducer error sending log to Kafka: %s', ke)
+      raise LogSendException('KafkaLogsProducer error sending log to Kafka: %s' % ke)
+    except Exception as e:
+      logger.exception('KafkaLogsProducer exception sending log to Kafka: %s', e)
+      raise LogSendException('KafkaLogsProducer exception sending log to Kafka: %s' % e)
--- a/data/logs_model/logs_producer/kinesis_stream_logs_producer.py
+++ b/data/logs_model/logs_producer/kinesis_stream_logs_producer.py
@ -0,0 +1,75 @@
+import logging
+import hashlib
+import random
+
+import boto3
+from botocore.exceptions import ClientError
+from botocore.client import Config
+
+from data.logs_model.logs_producer.interface import LogProducerInterface
+from data.logs_model.logs_producer.util import logs_json_serializer
+from data.logs_model.logs_producer import LogSendException
+
+
+logger = logging.getLogger(__name__)
+
+KINESIS_PARTITION_KEY_PREFIX = 'logentry_partition_key_'
+DEFAULT_CONNECT_TIMEOUT = 5
+DEFAULT_READ_TIMEOUT = 5
+MAX_RETRY_ATTEMPTS = 5
+DEFAULT_MAX_POOL_CONNECTIONS = 10
+
+
+def _partition_key(number_of_shards=None):
+  """ Generate a partition key for AWS Kinesis stream.
+  If the number of shards is specified, generate keys where the size of the key space is
+  the number of shards.
+  """
+  key = None
+  if number_of_shards is not None:
+    shard_number = random.randrange(0, number_of_shards)
+    key = hashlib.sha1(KINESIS_PARTITION_KEY_PREFIX + str(shard_number)).hexdigest()
+  else:
+    key = hashlib.sha1(KINESIS_PARTITION_KEY_PREFIX + str(random.getrandbits(256))).hexdigest()
+
+  return key
+
+
+class KinesisStreamLogsProducer(LogProducerInterface):
+  """ Log producer writing log entries to an Amazon Kinesis Data Stream. """
+  def __init__(self, stream_name, aws_region, aws_access_key=None, aws_secret_key=None,
+               connect_timeout=None, read_timeout=None, max_retries=None,
+               max_pool_connections=None):
+    self._stream_name = stream_name
+    self._aws_region = aws_region
+    self._aws_access_key = aws_access_key
+    self._aws_secret_key = aws_secret_key
+    self._connect_timeout = connect_timeout or DEFAULT_CONNECT_TIMEOUT
+    self._read_timeout = read_timeout or DEFAULT_READ_TIMEOUT
+    self._max_retries = max_retries or MAX_RETRY_ATTEMPTS
+    self._max_pool_connections=max_pool_connections or DEFAULT_MAX_POOL_CONNECTIONS
+
+    client_config = Config(connect_timeout=self._connect_timeout,
+                           read_timeout=self._read_timeout ,
+                           retries={'max_attempts': self._max_retries},
+                           max_pool_connections=self._max_pool_connections)
+    self._producer = boto3.client('kinesis', use_ssl=True,
+                                  region_name=self._aws_region,
+                                  aws_access_key_id=self._aws_access_key,
+                                  aws_secret_access_key=self._aws_secret_key,
+                                  config=client_config)
+
+  def send(self, logentry):
+    try:
+      data = logs_json_serializer(logentry)
+      self._producer.put_record(
+        StreamName=self._stream_name,
+        Data=data,
+        PartitionKey=_partition_key()
+      )
+    except ClientError as ce:
+      logger.exception('KinesisStreamLogsProducer client error sending log to Kinesis: %s', ce)
+      raise LogSendException('KinesisStreamLogsProducer client error sending log to Kinesis: %s' % ce)
+    except Exception as e:
+      logger.exception('KinesisStreamLogsProducer exception sending log to Kinesis: %s', e)
+      raise LogSendException('KinesisStreamLogsProducer exception sending log to Kinesis: %s' % e)
--- a/data/logs_model/logs_producer/test/test_json_logs_serializer.py
+++ b/data/logs_model/logs_producer/test/test_json_logs_serializer.py
@ -0,0 +1,45 @@
+# -*- coding: utf-8 -*-
+
+import logging
+import json
+from datetime import datetime
+import pytest
+
+from data.logs_model.logs_producer.util import logs_json_serializer
+from data.logs_model.elastic_logs import LogEntry
+
+
+logger = logging.getLogger(__name__)
+
+
+TEST_DATETIME = datetime.utcnow()
+
+TEST_JSON_STRING = '{"a": "b", "c": "d"}'
+TEST_JSON_STRING_WITH_UNICODE = u'{"éëê": "îôû"}'
+
+VALID_LOGENTRY = LogEntry(random_id='123-45', ip='0.0.0.0', metadata_json=TEST_JSON_STRING, datetime=TEST_DATETIME)
+VALID_LOGENTRY_WITH_UNICODE = LogEntry(random_id='123-45', ip='0.0.0.0', metadata_json=TEST_JSON_STRING_WITH_UNICODE, datetime=TEST_DATETIME)
+
+VALID_LOGENTRY_EXPECTED_OUTPUT = '{"datetime": "%s", "ip": "0.0.0.0", "metadata_json": "{\\"a\\": \\"b\\", \\"c\\": \\"d\\"}", "random_id": "123-45"}' % TEST_DATETIME.isoformat()
+VALID_LOGENTRY_WITH_UNICODE_EXPECTED_OUTPUT = '{"datetime": "%s", "ip": "0.0.0.0", "metadata_json": "{\\"\\u00e9\\u00eb\\u00ea\\": \\"\\u00ee\\u00f4\\u00fb\\"}", "random_id": "123-45"}' % TEST_DATETIME.isoformat()
+
+
+@pytest.mark.parametrize(
+  'is_valid, given_input, expected_output',
+  [
+    # Valid inputs
+    pytest.param(True, VALID_LOGENTRY, VALID_LOGENTRY_EXPECTED_OUTPUT),
+    # With unicode
+    pytest.param(True, VALID_LOGENTRY_WITH_UNICODE, VALID_LOGENTRY_WITH_UNICODE_EXPECTED_OUTPUT),
+  ])
+def test_logs_json_serializer(is_valid, given_input, expected_output):
+  if not is_valid:
+    with pytest.raises(ValueError) as ve:
+      data = logs_json_serializer(given_input)
+  else:
+    data = logs_json_serializer(given_input, sort_keys=True)
+    assert data == expected_output
+
+  # Make sure the datetime was serialized in the correct ISO8601
+  datetime_str = json.loads(data)['datetime']
+  assert datetime_str == TEST_DATETIME.isoformat()
--- a/data/logs_model/logs_producer/util.py
+++ b/data/logs_model/logs_producer/util.py
@ -0,0 +1,15 @@
+import json
+from datetime import datetime
+
+class LogEntryJSONEncoder(json.JSONEncoder):
+  """ JSON encoder to encode datetimes to ISO8601 format. """
+  def default(self, obj):
+    if isinstance(obj, datetime):
+      return obj.isoformat()
+
+    return super(LogEntryJSONEncoder, self).default(obj)
+
+def logs_json_serializer(logentry, sort_keys=False):
+  """ Serializes a LogEntry to json bytes. """
+  return json.dumps(logentry.to_dict(), cls=LogEntryJSONEncoder,
+                    ensure_ascii=True, sort_keys=sort_keys).encode('ascii')
--- a/data/logs_model/shared.py
+++ b/data/logs_model/shared.py
@ -0,0 +1,53 @@
+import uuid
+import json
+
+from calendar import timegm
+
+from data import model
+
+
+class SharedModel:
+  def queue_logs_export(self, start_datetime, end_datetime, export_action_logs_queue,
+                        namespace_name=None, repository_name=None, callback_url=None,
+                        callback_email=None, filter_kinds=None):
+    """ Queues logs between the start_datetime and end_time, filtered by a repository or namespace,
+        for export to the specified URL and/or email address. Returns the ID of the export job
+        queued or None if error.
+    """
+    export_id = str(uuid.uuid4())
+    namespace = model.user.get_namespace_user(namespace_name)
+    if namespace is None:
+      return None
+
+    repository = None
+    if repository_name is not None:
+      repository = model.repository.get_repository(namespace_name, repository_name)
+      if repository is None:
+        return None
+
+    export_action_logs_queue.put([namespace_name],
+                                 json.dumps({
+                                   'export_id': export_id,
+                                   'repository_id': repository.id if repository else None,
+                                   'namespace_id': namespace.id,
+                                   'namespace_name': namespace.username,
+                                   'repository_name': repository.name if repository else None,
+                                   'start_time': start_datetime.strftime('%m/%d/%Y'),
+                                   'end_time': end_datetime.strftime('%m/%d/%Y'),
+                                   'callback_url': callback_url,
+                                   'callback_email': callback_email,
+                                 }), retries_remaining=3)
+
+    return export_id
+
+
+def epoch_ms(dt):
+  return (timegm(dt.timetuple()) * 1000) + (dt.microsecond / 1000)
+
+
+def get_kinds_filter(kinds):
+  """ Given a list of kinds, return the set of kinds not that are not part of that list.
+      i.e Returns the list of kinds to be filtered out. """
+  kind_map = model.log.get_log_entry_kinds()
+  kind_map = {key: kind_map[key] for key in kind_map if not isinstance(key, int)}
+  return [kind_name for kind_name in kind_map if kind_name not in kinds]
--- a/data/logs_model/table_logs_model.py
+++ b/data/logs_model/table_logs_model.py
@ -0,0 +1,291 @@
+# pylint: disable=protected-access
+
+import logging
+
+from datetime import datetime, timedelta
+
+from tzlocal import get_localzone
+from dateutil.relativedelta import relativedelta
+
+from data import model
+from data.model import config
+from data.database import LogEntry, LogEntry2, LogEntry3, UseThenDisconnect
+from data.logs_model.interface import ActionLogsDataInterface, LogsIterationTimeout, \
+  LogRotationContextInterface
+from data.logs_model.datatypes import Log, AggregatedLogCount, LogEntriesPage
+from data.logs_model.shared import SharedModel
+from data.model.log import get_stale_logs, get_stale_logs_start_id, delete_stale_logs
+
+logger = logging.getLogger(__name__)
+
+MINIMUM_RANGE_SIZE = 1 # second
+MAXIMUM_RANGE_SIZE = 60 * 60 * 24 * 30 # seconds ~= 1 month
+EXPECTED_ITERATION_LOG_COUNT = 1000
+
+
+LOG_MODELS = [LogEntry3, LogEntry2, LogEntry]
+
+
+class TableLogsModel(SharedModel, ActionLogsDataInterface):
+  """
+  TableLogsModel implements the data model for the logs API backed by a single table
+  in the database.
+  """
+  def __init__(self, should_skip_logging=None, **kwargs):
+    self._should_skip_logging = should_skip_logging
+
+  def lookup_logs(self, start_datetime, end_datetime, performer_name=None, repository_name=None,
+                  namespace_name=None, filter_kinds=None, page_token=None, max_page_count=None):
+    if filter_kinds is not None:
+      assert all(isinstance(kind_name, str) for kind_name in filter_kinds)
+
+    assert start_datetime is not None
+    assert end_datetime is not None
+
+    repository = None
+    if repository_name and namespace_name:
+      repository = model.repository.get_repository(namespace_name, repository_name)
+      assert repository
+
+    performer = None
+    if performer_name:
+      performer = model.user.get_user(performer_name)
+      assert performer
+
+    def get_logs(m, page_token):
+      logs_query = model.log.get_logs_query(start_datetime, end_datetime, performer=performer,
+                                            repository=repository, namespace=namespace_name,
+                                            ignore=filter_kinds, model=m)
+
+      logs, next_page_token = model.modelutil.paginate(logs_query, m,
+                                                       descending=True,
+                                                       page_token=page_token,
+                                                       limit=20,
+                                                       max_page=max_page_count,
+                                                       sort_field_name='datetime')
+
+      return logs, next_page_token
+
+    TOKEN_TABLE_ID = 'tti'
+    table_index = 0
+    logs = []
+    next_page_token = page_token or None
+
+    # Skip empty pages (empty table)
+    while len(logs) == 0 and table_index < len(LOG_MODELS) - 1:
+      table_specified = next_page_token is not None and next_page_token.get(TOKEN_TABLE_ID) is not None
+      if table_specified:
+        table_index = next_page_token.get(TOKEN_TABLE_ID)
+
+      logs_result, next_page_token = get_logs(LOG_MODELS[table_index], next_page_token)
+      logs.extend(logs_result)
+
+      if next_page_token is None and table_index < len(LOG_MODELS) - 1:
+        next_page_token = {TOKEN_TABLE_ID: table_index + 1}
+
+    return LogEntriesPage([Log.for_logentry(log) for log in logs], next_page_token)
+
+  def lookup_latest_logs(self, performer_name=None, repository_name=None, namespace_name=None,
+                         filter_kinds=None, size=20):
+    if filter_kinds is not None:
+      assert all(isinstance(kind_name, str) for kind_name in filter_kinds)
+
+    repository = None
+    if repository_name and namespace_name:
+      repository = model.repository.get_repository(namespace_name, repository_name)
+      assert repository
+
+    performer = None
+    if performer_name:
+      performer = model.user.get_user(performer_name)
+      assert performer
+
+    def get_latest_logs(m):
+      logs_query = model.log.get_latest_logs_query(performer=performer, repository=repository,
+                                                   namespace=namespace_name, ignore=filter_kinds,
+                                                   model=m, size=size)
+
+      logs = list(logs_query)
+      return [Log.for_logentry(log) for log in logs]
+
+    return get_latest_logs(LOG_MODELS[0])
+
+  def get_aggregated_log_counts(self, start_datetime, end_datetime, performer_name=None,
+                                repository_name=None, namespace_name=None, filter_kinds=None):
+    if filter_kinds is not None:
+      assert all(isinstance(kind_name, str) for kind_name in filter_kinds)
+
+    if end_datetime - start_datetime >= timedelta(weeks=4):
+      raise Exception('Cannot lookup aggregated logs over a period longer than a month')
+
+    repository = None
+    if repository_name and namespace_name:
+      repository = model.repository.get_repository(namespace_name, repository_name)
+
+    performer = None
+    if performer_name:
+      performer = model.user.get_user(performer_name)
+
+    entries = {}
+    for log_model in LOG_MODELS:
+      aggregated = model.log.get_aggregated_logs(start_datetime, end_datetime,
+                                                 performer=performer,
+                                                 repository=repository,
+                                                 namespace=namespace_name,
+                                                 ignore=filter_kinds,
+                                                 model=log_model)
+
+      for entry in aggregated:
+        synthetic_date = datetime(start_datetime.year, start_datetime.month, int(entry.day),
+                                  tzinfo=get_localzone())
+        if synthetic_date.day < start_datetime.day:
+          synthetic_date = synthetic_date + relativedelta(months=1)
+
+        key = '%s-%s' % (entry.kind_id, entry.day)
+
+        if key in entries:
+          entries[key] = AggregatedLogCount(entry.kind_id, entry.count + entries[key].count,
+                                            synthetic_date)
+        else:
+          entries[key] = AggregatedLogCount(entry.kind_id, entry.count, synthetic_date)
+
+    return entries.values()
+
+  def count_repository_actions(self, repository, day):
+    return model.repositoryactioncount.count_repository_actions(repository, day)
+
+  def log_action(self, kind_name, namespace_name=None, performer=None, ip=None, metadata=None,
+                 repository=None, repository_name=None, timestamp=None, is_free_namespace=False):
+    if self._should_skip_logging and self._should_skip_logging(kind_name, namespace_name,
+                                                               is_free_namespace):
+      return
+
+    if repository_name is not None:
+      assert repository is None
+      assert namespace_name is not None
+      repository = model.repository.get_repository(namespace_name, repository_name)
+
+    model.log.log_action(kind_name, namespace_name, performer=performer, repository=repository,
+                         ip=ip, metadata=metadata or {}, timestamp=timestamp)
+
+  def yield_logs_for_export(self, start_datetime, end_datetime, repository_id=None,
+                            namespace_id=None, max_query_time=None):
+    # Using an adjusting scale, start downloading log rows in batches, starting at
+    # MINIMUM_RANGE_SIZE and doubling until we've reached EXPECTED_ITERATION_LOG_COUNT or
+    # the lookup range has reached MAXIMUM_RANGE_SIZE. If at any point this operation takes
+    # longer than the MAXIMUM_WORK_PERIOD_SECONDS, terminate the batch operation as timed out.
+    batch_start_time = datetime.utcnow()
+
+    current_start_datetime = start_datetime
+    current_batch_size = timedelta(seconds=MINIMUM_RANGE_SIZE)
+
+    while current_start_datetime < end_datetime:
+      # Verify we haven't been working for too long.
+      work_elapsed = datetime.utcnow() - batch_start_time
+      if max_query_time is not None and work_elapsed > max_query_time:
+        logger.error('Retrieval of logs `%s/%s` timed out with time of `%s`',
+                     namespace_id, repository_id, work_elapsed)
+        raise LogsIterationTimeout()
+
+      current_end_datetime = current_start_datetime + current_batch_size
+      current_end_datetime = min(current_end_datetime, end_datetime)
+
+      # Load the next set of logs.
+      def load_logs():
+        logger.debug('Retrieving logs over range %s -> %s with namespace %s and repository %s',
+                     current_start_datetime, current_end_datetime, namespace_id, repository_id)
+
+        logs_query = model.log.get_logs_query(namespace=namespace_id,
+                                              repository=repository_id,
+                                              start_time=current_start_datetime,
+                                              end_time=current_end_datetime)
+        logs = list(logs_query)
+        for log in logs:
+          if namespace_id is not None:
+            assert log.account_id == namespace_id
+
+          if repository_id is not None:
+            assert log.repository_id == repository_id
+
+        logs = [Log.for_logentry(log) for log in logs]
+        return logs
+
+      logs, elapsed = _run_and_time(load_logs)
+      if max_query_time is not None and elapsed > max_query_time:
+        logger.error('Retrieval of logs for export `%s/%s` with range `%s-%s` timed out at `%s`',
+                     namespace_id, repository_id, current_start_datetime, current_end_datetime,
+                     elapsed)
+        raise LogsIterationTimeout()
+
+      yield logs
+
+      # Move forward.
+      current_start_datetime = current_end_datetime
+
+      # Increase the batch size if necessary.
+      if len(logs) < EXPECTED_ITERATION_LOG_COUNT:
+        seconds = min(MAXIMUM_RANGE_SIZE, current_batch_size.total_seconds() * 2)
+        current_batch_size = timedelta(seconds=seconds)
+
+  def yield_log_rotation_context(self, cutoff_date, min_logs_per_rotation):
+    """ Yield a context manager for a group of outdated logs. """
+    for log_model in LOG_MODELS:
+      while True:
+        with UseThenDisconnect(config.app_config):
+          start_id = get_stale_logs_start_id(log_model)
+
+          if start_id is None:
+            logger.warning('Failed to find start id')
+            break
+
+          logger.debug('Found starting ID %s', start_id)
+          lookup_end_id = start_id + min_logs_per_rotation
+          logs = [log for log in get_stale_logs(start_id, lookup_end_id,
+                                                log_model, cutoff_date)]
+
+        if not logs:
+          logger.debug('No further logs found')
+          break
+
+        end_id = max([log.id for log in logs])
+        context = DatabaseLogRotationContext(logs, log_model, start_id, end_id)
+        yield context
+
+
+def _run_and_time(fn):
+  start_time = datetime.utcnow()
+  result = fn()
+  return result, datetime.utcnow() - start_time
+
+
+table_logs_model = TableLogsModel()
+
+
+class DatabaseLogRotationContext(LogRotationContextInterface):
+  """
+  DatabaseLogRotationContext represents a batch of logs to be archived together.
+  i.e A set of logs to be archived in the same file (based on the number of logs per rotation).
+
+  When completed without exceptions, this context will delete the stale logs
+  from rows `start_id` to `end_id`.
+  """
+  def __init__(self, logs, log_model, start_id, end_id):
+    self.logs = logs
+    self.log_model = log_model
+    self.start_id = start_id
+    self.end_id = end_id
+
+  def __enter__(self):
+    return self
+
+  def __exit__(self, ex_type, ex_value, ex_traceback):
+    if ex_type is None and ex_value is None and ex_traceback is None:
+      with UseThenDisconnect(config.app_config):
+        logger.debug('Deleting logs from IDs %s to %s', self.start_id, self.end_id)
+        delete_stale_logs(self.start_id, self.end_id, self.log_model)
+
+  def yield_logs_batch(self):
+    """ Yield a batch of logs and a filename for that batch. """
+    filename = '%d-%d-%s.txt.gz' % (self.start_id, self.end_id,
+                                    self.log_model.__name__.lower())
+    yield self.logs, filename
--- a/data/logs_model/test/init.py
+++ b/data/logs_model/test/init.py
--- a/data/logs_model/test/fake_elasticsearch.py
+++ b/data/logs_model/test/fake_elasticsearch.py
@ -0,0 +1,390 @@
+import json
+import uuid
+import fnmatch
+
+from collections import defaultdict
+from contextlib import contextmanager
+from datetime import datetime
+
+import dateutil.parser
+
+from httmock import urlmatch, HTTMock
+
+FAKE_ES_HOST = 'fakees'
+
+EMPTY_RESULT = {
+  'hits': {'hits': [], 'total': 0},
+  '_shards': {'successful': 1, 'total': 1},
+}
+
+def parse_query(query):
+  if not query:
+    return {}
+
+  return {s.split('=')[0]: s.split('=')[1] for s in query.split("&")}
+
+
+@contextmanager
+def fake_elasticsearch(allow_wildcard=True):
+  templates = {}
+  docs = defaultdict(list)
+  scrolls = {}
+  id_counter = [1]
+
+  def transform(value, field_name):
+    # TODO: implement this using a real index template if we ever need more than a few
+    # fields here.
+    if field_name == 'datetime':
+      if isinstance(value, int):
+        return datetime.utcfromtimestamp(value / 1000)
+
+      parsed = dateutil.parser.parse(value)
+      return parsed
+
+    return value
+
+  @urlmatch(netloc=FAKE_ES_HOST, path=r'/_template/(.+)', method='GET')
+  def get_template(url, request):
+    template_name = url[len('/_template/'):]
+    if template_name in templates:
+      return {'status_code': 200}
+
+    return {'status_code': 404}
+
+  @urlmatch(netloc=FAKE_ES_HOST, path=r'/_template/(.+)', method='PUT')
+  def put_template(url, request):
+    template_name = url[len('/_template/'):]
+    templates[template_name] = True
+    return {'status_code': 201}
+
+  @urlmatch(netloc=FAKE_ES_HOST, path=r'/([^/]+)/_doc', method='POST')
+  def post_doc(url, request):
+    index_name, _ = url.path[1:].split('/')
+    item = json.loads(request.body)
+    item['_id'] = item['random_id']
+    id_counter[0] += 1
+    docs[index_name].append(item)
+    return {
+      'status_code': 204,
+      'headers': {
+        'Content-Type': 'application/json',
+      },
+      'content': json.dumps({
+        "result": "created",
+      }),
+    }
+
+  @urlmatch(netloc=FAKE_ES_HOST, path=r'/([^/]+)$', method='DELETE')
+  def index_delete(url, request):
+    index_name_or_pattern = url.path[1:]
+    to_delete = []
+    for index_name in docs.keys():
+      if not fnmatch.fnmatch(index_name, index_name_or_pattern):
+        continue
+
+      to_delete.append(index_name)
+
+    for index in to_delete:
+      docs.pop(index)
+
+    return {
+      'status_code': 200,
+      'headers': {
+        'Content-Type': 'application/json',
+      },
+      'content': {'acknowledged': True}
+    }
+
+  @urlmatch(netloc=FAKE_ES_HOST, path=r'/([^/]+)$', method='GET')
+  def index_lookup(url, request):
+    index_name_or_pattern = url.path[1:]
+    found = {}
+    for index_name in docs.keys():
+      if not fnmatch.fnmatch(index_name, index_name_or_pattern):
+        continue
+
+      found[index_name] = {}
+
+    if not found:
+      return {
+        'status_code': 404,
+      }
+
+    return {
+      'status_code': 200,
+      'headers': {
+        'Content-Type': 'application/json',
+      },
+      'content': json.dumps(found),
+    }
+
+  def _match_query(index_name_or_pattern, query):
+    found = []
+    found_index = False
+
+    for index_name in docs.keys():
+      if not allow_wildcard and index_name_or_pattern.find('*') >= 0:
+        break
+
+      if not fnmatch.fnmatch(index_name, index_name_or_pattern):
+        continue
+
+      found_index = True
+
+      def _is_match(doc, current_query):
+        if current_query is None:
+          return True
+
+        for filter_type, filter_params in current_query.iteritems():
+          for field_name, filter_props in filter_params.iteritems():
+            if filter_type == 'range':
+              lt = transform(filter_props['lt'], field_name)
+              gte = transform(filter_props['gte'], field_name)
+              doc_value = transform(doc[field_name], field_name)
+              if not (doc_value < lt and doc_value >= gte):
+                return False
+            elif filter_type == 'term':
+              doc_value = transform(doc[field_name], field_name)
+              return doc_value == filter_props
+            elif filter_type == 'terms':
+              doc_value = transform(doc[field_name], field_name)
+              return doc_value in filter_props
+            elif filter_type == 'bool':
+              assert not 'should' in filter_params, 'should is unsupported'
+
+              must = filter_params.get('must')
+              must_not = filter_params.get('must_not')
+              filter_bool = filter_params.get('filter')
+
+              if must:
+                for check in must:
+                  if not _is_match(doc, check):
+                    return False
+
+              if must_not:
+                for check in must_not:
+                  if _is_match(doc, check):
+                    return False
+
+              if filter_bool:
+                for check in filter_bool:
+                  if not _is_match(doc, check):
+                    return False
+            else:
+              raise Exception('Unimplemented query %s: %s' % (filter_type, query))
+
+        return True
+
+      for doc in docs[index_name]:
+        if not _is_match(doc, query):
+          continue
+
+        found.append({'_source': doc, '_index': index_name})
+
+    return found, found_index or (index_name_or_pattern.find('*') >= 0)
+
+  @urlmatch(netloc=FAKE_ES_HOST, path=r'/([^/]+)/_count$', method='GET')
+  def count_docs(url, request):
+    request = json.loads(request.body)
+    index_name_or_pattern, _ = url.path[1:].split('/')
+
+    found, found_index = _match_query(index_name_or_pattern, request['query'])
+    if not found_index:
+      return {
+        'status_code': 404,
+      }
+
+    return {
+      'status_code': 200,
+      'headers': {
+        'Content-Type': 'application/json',
+      },
+      'content': json.dumps({'count': len(found)}),
+    }
+
+  @urlmatch(netloc=FAKE_ES_HOST, path=r'/_search/scroll$', method='GET')
+  def lookup_scroll(url, request):
+    request_obj = json.loads(request.body)
+    scroll_id = request_obj['scroll_id']
+    if scroll_id in scrolls:
+      return {
+        'status_code': 200,
+        'headers': {
+          'Content-Type': 'application/json',
+        },
+        'content': json.dumps(scrolls[scroll_id]),
+      }
+
+    return {
+      'status_code': 404,
+    }
+
+  @urlmatch(netloc=FAKE_ES_HOST, path=r'/_search/scroll$', method='DELETE')
+  def delete_scroll(url, request):
+    request = json.loads(request.body)
+    for scroll_id in request['scroll_id']:
+      scrolls.pop(scroll_id, None)
+
+    return {
+      'status_code': 404,
+    }
+
+  @urlmatch(netloc=FAKE_ES_HOST, path=r'/([^/]+)/_search$', method='GET')
+  def lookup_docs(url, request):
+    query_params = parse_query(url.query)
+
+    request = json.loads(request.body)
+    index_name_or_pattern, _ = url.path[1:].split('/')
+
+    # Find matching docs.
+    query = request.get('query')
+    found, found_index = _match_query(index_name_or_pattern, query)
+    if not found_index:
+      return {
+        'status_code': 404,
+      }
+
+    # Sort.
+    sort = request.get('sort')
+    if sort:
+      if sort == ['_doc'] or sort == '_doc':
+        found.sort(key=lambda x: x['_source']['_id'])
+      else:
+        def get_sort_key(item):
+          source = item['_source']
+          key = ''
+          for sort_config in sort:
+            for sort_key, direction in sort_config.iteritems():
+              assert direction == 'desc'
+              sort_key = sort_key.replace('.keyword', '')
+              key += str(transform(source[sort_key], sort_key))
+              key += '|'
+          return key
+
+        found.sort(key=get_sort_key, reverse=True)
+
+    # Search after.
+    search_after = request.get('search_after')
+    if search_after:
+      sort_fields = []
+      for sort_config in sort:
+        if isinstance(sort_config, unicode):
+          sort_fields.append(sort_config)
+          continue
+
+        for sort_key, _ in sort_config.iteritems():
+          sort_key = sort_key.replace('.keyword', '')
+          sort_fields.append(sort_key)
+
+      for index, search_after_value in enumerate(search_after):
+        field_name = sort_fields[index]
+        value = transform(search_after_value, field_name)
+        if field_name == '_doc':
+          found = [f for f in found if transform(f['_source']['_id'], field_name) > value]
+        else:
+          found = [f for f in found if transform(f['_source'][field_name], field_name) < value]
+        if len(found) < 2:
+          break
+
+        if field_name == '_doc':
+          if found[0]['_source']['_id'] != found[1]['_source']:
+            break
+        else:
+          if found[0]['_source'][field_name] != found[1]['_source']:
+            break
+
+    # Size.
+    size = request.get('size')
+    if size:
+      found = found[0:size]
+
+    # Aggregation.
+    # {u'query':
+    #   {u'range':
+    #     {u'datetime': {u'lt': u'2019-06-27T15:45:09.768085',
+    #                    u'gte': u'2019-06-27T15:35:09.768085'}}},
+    #      u'aggs': {
+    #         u'by_id': {
+    #           u'terms': {u'field': u'kind_id'},
+    #           u'aggs': {
+    #             u'by_date': {u'date_histogram': {u'field': u'datetime', u'interval': u'day'}}}}},
+    #   u'size': 0}
+    def _by_field(agg_field_params, results):
+      aggregated_by_field = defaultdict(list)
+
+      for agg_means, agg_means_params in agg_field_params.iteritems():
+        if agg_means == 'terms':
+          field_name = agg_means_params['field']
+          for result in results:
+            value = result['_source'][field_name]
+            aggregated_by_field[value].append(result)
+        elif agg_means == 'date_histogram':
+          field_name = agg_means_params['field']
+          interval = agg_means_params['interval']
+          for result in results:
+            value = transform(result['_source'][field_name], field_name)
+            aggregated_by_field[getattr(value, interval)].append(result)
+        elif agg_means == 'aggs':
+          # Skip. Handled below.
+          continue
+        else:
+          raise Exception('Unsupported aggregation method: %s' % agg_means)
+
+      # Invoke the aggregation recursively.
+      buckets = []
+      for field_value, field_results in aggregated_by_field.iteritems():
+        aggregated = _aggregate(agg_field_params, field_results)
+        if isinstance(aggregated, list):
+          aggregated = {'doc_count': len(aggregated)}
+
+        aggregated['key'] = field_value
+        buckets.append(aggregated)
+
+      return {'buckets': buckets}
+
+    def _aggregate(query_config, results):
+      agg_params = query_config.get(u'aggs')
+      if not agg_params:
+        return results
+
+      by_field_name = {}
+      for agg_field_name, agg_field_params in agg_params.iteritems():
+        by_field_name[agg_field_name] = _by_field(agg_field_params, results)
+
+      return by_field_name
+
+    final_result = {
+      'hits': {
+        'hits': found,
+        'total': len(found),
+      },
+      '_shards': {
+        'successful': 1,
+        'total': 1,
+      },
+      'aggregations': _aggregate(request, found),
+    }
+
+    if query_params.get('scroll'):
+      scroll_id = str(uuid.uuid4())
+      scrolls[scroll_id] = EMPTY_RESULT
+      final_result['_scroll_id'] = scroll_id
+
+    return {
+      'status_code': 200,
+      'headers': {
+        'Content-Type': 'application/json',
+      },
+      'content': json.dumps(final_result),
+    }
+
+  @urlmatch(netloc=FAKE_ES_HOST)
+  def catchall_handler(url, request):
+    print "Unsupported URL: %s %s" % (request.method, url, )
+    return {'status_code': 501}
+
+  handlers = [get_template, put_template, index_delete, index_lookup, post_doc, count_docs,
+              lookup_docs, lookup_scroll, delete_scroll, catchall_handler]
+
+  with HTTMock(*handlers):
+    yield
--- a/data/logs_model/test/mock_elasticsearch.py
+++ b/data/logs_model/test/mock_elasticsearch.py
@ -0,0 +1,400 @@
+# -*- coding: utf-8 -*-
+import json
+
+from datetime import datetime
+from dateutil.parser import parse
+
+from data.logs_model.datatypes import LogEntriesPage, Log, AggregatedLogCount
+
+
+def _status(d, code=200):
+  return {"status_code": code, "content": json.dumps(d)}
+
+
+def _shards(d, total=5, failed=0, successful=5):
+  d.update({"_shards": {"total": total, "failed": failed, "successful": successful}})
+  return d
+
+
+def _hits(hits):
+  return {"hits": {"total": len(hits), "max_score": None, "hits": hits}}
+
+
+INDEX_LIST_RESPONSE_HIT1_HIT2 = _status({
+  "logentry_2018-03-08": {},
+  "logentry_2018-04-02": {}
+})
+
+
+INDEX_LIST_RESPONSE_HIT2 = _status({
+  "logentry_2018-04-02": {}
+})
+
+
+INDEX_LIST_RESPONSE = _status({
+  "logentry_2019-01-01": {},
+  "logentry_2017-03-08": {},
+  "logentry_2018-03-08": {},
+  "logentry_2018-04-02": {}
+})
+
+
+DEFAULT_TEMPLATE_RESPONSE = _status({"acknowledged": True})
+INDEX_RESPONSE_2019_01_01 = _status(
+  _shards({
+    "_index": "logentry_2019-01-01",
+    "_type": "_doc",
+    "_id": "1",
+    "_version": 1,
+    "_seq_no": 0,
+    "_primary_term": 1,
+    "result": "created"
+  }))
+
+INDEX_RESPONSE_2017_03_08 = _status(
+  _shards({
+    "_index": "logentry_2017-03-08",
+    "_type": "_doc",
+    "_id": "1",
+    "_version": 1,
+    "_seq_no": 0,
+    "_primary_term": 1,
+    "result": "created"
+  }))
+
+FAILURE_400 = _status({}, 400)
+
+INDEX_REQUEST_2019_01_01 = [
+  "logentry_2019-01-01", {
+    "account_id":
+      1,
+    "repository_id":
+      1,
+    "ip":
+      "192.168.1.1",
+    "random_id":
+      233,
+    "datetime":
+      "2019-01-01T03:30:00",
+    "metadata_json": json.loads("{\"\\ud83d\\ude02\": \"\\ud83d\\ude02\\ud83d\\udc4c\\ud83d\\udc4c\\ud83d\\udc4c\\ud83d\\udc4c\", \"key\": \"value\", \"time\": 1520479800}"),
+    "performer_id":
+      1,
+    "kind_id":
+      1
+  }
+]
+
+INDEX_REQUEST_2017_03_08 = [
+  "logentry_2017-03-08", {
+    "repository_id":
+      1,
+    "account_id":
+      1,
+    "ip":
+      "192.168.1.1",
+    "random_id":
+      233,
+    "datetime":
+      "2017-03-08T03:30:00",
+    "metadata_json": json.loads("{\"\\ud83d\\ude02\": \"\\ud83d\\ude02\\ud83d\\udc4c\\ud83d\\udc4c\\ud83d\\udc4c\\ud83d\\udc4c\", \"key\": \"value\", \"time\": 1520479800}"),
+    "performer_id":
+      1,
+    "kind_id":
+      2
+  }
+]
+
+_hit1 = {
+  "_index": "logentry_2018-03-08",
+  "_type": "doc",
+  "_id": "1",
+  "_score": None,
+  "_source": {
+    "random_id":
+      233,
+    "kind_id":
+      1,
+    "account_id":
+      1,
+    "performer_id":
+      1,
+    "repository_id":
+      1,
+    "ip":
+      "192.168.1.1",
+    "metadata_json":
+      "{\"\\ud83d\\ude02\": \"\\ud83d\\ude02\\ud83d\\udc4c\\ud83d\\udc4c\\ud83d\\udc4c\\ud83d\\udc4c\", \"key\": \"value\", \"time\": 1520479800}",
+    "datetime":
+      "2018-03-08T03:30",
+  },
+  "sort": [1520479800000, 233]
+}
+
+_hit2 = {
+  "_index": "logentry_2018-04-02",
+  "_type": "doc",
+  "_id": "2",
+  "_score": None,
+  "_source": {
+    "random_id":
+      233,
+    "kind_id":
+      2,
+    "account_id":
+      1,
+    "performer_id":
+      1,
+    "repository_id":
+      1,
+    "ip":
+      "192.168.1.2",
+    "metadata_json":
+      "{\"\\ud83d\\ude02\": \"\\ud83d\\ude02\\ud83d\\udc4c\\ud83d\\udc4c\\ud83d\\udc4c\\ud83d\\udc4c\", \"key\": \"value\", \"time\": 1522639800}",
+    "datetime":
+      "2018-04-02T03:30",
+  },
+  "sort": [1522639800000, 233]
+}
+
+_log1 = Log(
+  "{\"\\ud83d\\ude02\": \"\\ud83d\\ude02\\ud83d\\udc4c\\ud83d\\udc4c\\ud83d\\udc4c\\ud83d\\udc4c\", \"key\": \"value\", \"time\": 1520479800}",
+  "192.168.1.1", parse("2018-03-08T03:30"), "user1.email", "user1.username", "user1.robot",
+  "user1.organization", "user1.username", "user1.email", "user1.robot", 1)
+_log2 = Log(
+  "{\"\\ud83d\\ude02\": \"\\ud83d\\ude02\\ud83d\\udc4c\\ud83d\\udc4c\\ud83d\\udc4c\\ud83d\\udc4c\", \"key\": \"value\", \"time\": 1522639800}",
+  "192.168.1.2", parse("2018-04-02T03:30"), "user1.email", "user1.username", "user1.robot",
+  "user1.organization", "user1.username", "user1.email", "user1.robot", 2)
+
+SEARCH_RESPONSE_START = _status(_shards(_hits([_hit1, _hit2])))
+SEARCH_RESPONSE_END = _status(_shards(_hits([_hit2])))
+SEARCH_REQUEST_START = {
+  "sort": [{
+    "datetime": "desc"
+  }, {
+    "random_id.keyword": "desc"
+  }],
+  "query": {
+    "bool": {
+      "filter": [{
+        "term": {
+          "performer_id": 1
+        }
+      }, {
+        "term": {
+          "repository_id": 1
+        }
+      }]
+    }
+  },
+  "size": 2
+}
+SEARCH_REQUEST_END = {
+  "sort": [{
+    "datetime": "desc"
+  }, {
+    "random_id.keyword": "desc"
+  }],
+  "query": {
+    "bool": {
+      "filter": [{
+        "term": {
+          "performer_id": 1
+        }
+      }, {
+        "term": {
+          "repository_id": 1
+        }
+      }]
+    }
+  },
+  "search_after": [1520479800000, 233],
+  "size": 2
+}
+SEARCH_REQUEST_FILTER = {
+  "sort": [{
+    "datetime": "desc"
+  }, {
+    "random_id.keyword": "desc"
+  }],
+  "query": {
+    "bool": {
+      "filter": [{
+        "term": {
+          "performer_id": 1
+        }
+      }, {
+        "term": {
+          "repository_id": 1
+        }
+      }, {
+        "bool": {
+          "must_not": [{
+            "terms": {
+              "kind_id": [1]
+            }
+          }]
+        }
+      }]
+    }
+  },
+  "size": 2
+}
+SEARCH_PAGE_TOKEN = {
+  "datetime": datetime(2018, 3, 8, 3, 30).isoformat(),
+  "random_id": 233,
+  "page_number": 1
+}
+SEARCH_PAGE_START = LogEntriesPage(logs=[_log1], next_page_token=SEARCH_PAGE_TOKEN)
+SEARCH_PAGE_END = LogEntriesPage(logs=[_log2], next_page_token=None)
+SEARCH_PAGE_EMPTY = LogEntriesPage([], None)
+
+AGGS_RESPONSE = _status(
+  _shards({
+    "hits": {
+      "total": 4,
+      "max_score": None,
+      "hits": []
+    },
+    "aggregations": {
+      "by_id": {
+        "doc_count_error_upper_bound":
+          0,
+        "sum_other_doc_count":
+          0,
+        "buckets": [{
+          "key": 2,
+          "doc_count": 3,
+          "by_date": {
+            "buckets": [{
+              "key_as_string": "2009-11-12T00:00:00.000Z",
+              "key": 1257984000000,
+              "doc_count": 1
+            }, {
+              "key_as_string": "2009-11-13T00:00:00.000Z",
+              "key": 1258070400000,
+              "doc_count": 0
+            }, {
+              "key_as_string": "2009-11-14T00:00:00.000Z",
+              "key": 1258156800000,
+              "doc_count": 2
+            }]
+          }
+        }, {
+          "key": 1,
+          "doc_count": 1,
+          "by_date": {
+            "buckets": [{
+              "key_as_string": "2009-11-15T00:00:00.000Z",
+              "key": 1258243200000,
+              "doc_count": 1
+            }]
+          }
+        }]
+      }
+    }
+  }))
+
+AGGS_REQUEST = {
+  "query": {
+    "bool": {
+      "filter": [{
+        "term": {
+          "performer_id": 1
+        }
+      }, {
+        "term": {
+          "repository_id": 1
+        }
+      }, {
+        "bool": {
+          "must_not": [{
+            "terms": {
+              "kind_id": [2]
+            }
+          }]
+        }
+      }],
+      "must": [{
+        "range": {
+          "datetime": {
+            "lt": "2018-04-08T03:30:00",
+            "gte": "2018-03-08T03:30:00"
+          }
+        }
+      }]
+    }
+  },
+  "aggs": {
+    "by_id": {
+      "terms": {
+        "field": "kind_id"
+      },
+      "aggs": {
+        "by_date": {
+          "date_histogram": {
+            "field": "datetime",
+            "interval": "day"
+          }
+        }
+      }
+    }
+  },
+  "size": 0
+}
+
+AGGS_COUNT = [
+  AggregatedLogCount(1, 1, parse("2009-11-15T00:00:00.000")),
+  AggregatedLogCount(2, 1, parse("2009-11-12T00:00:00.000")),
+  AggregatedLogCount(2, 2, parse("2009-11-14T00:00:00.000"))
+]
+
+COUNT_REQUEST = {
+  "query": {
+    "bool": {
+      "filter": [{
+        "term": {
+          "repository_id": 1
+        }
+      }]
+    }
+  }
+}
+COUNT_RESPONSE = _status(_shards({
+  "count": 1,
+}))
+
+# assume there are 2 pages
+_scroll_id = "DnF1ZXJ5VGhlbkZldGNoBQAAAAAAACEmFkk1aGlTRzdSUWllejZmYTlEYTN3SVEAAAAAAAAhJRZJNWhpU0c3UlFpZXo2ZmE5RGEzd0lRAAAAAAAAHtAWLWZpaFZXVzVSTy1OTXA5V3MwcHZrZwAAAAAAAB7RFi1maWhWV1c1Uk8tTk1wOVdzMHB2a2cAAAAAAAAhJxZJNWhpU0c3UlFpZXo2ZmE5RGEzd0lR"
+
+
+def _scroll(d):
+  d["_scroll_id"] = _scroll_id
+  return d
+
+
+SCROLL_CREATE = _status(_shards(_scroll(_hits([_hit1]))))
+SCROLL_GET = _status(_shards(_scroll(_hits([_hit2]))))
+SCROLL_GET_2 = _status(_shards(_scroll(_hits([]))))
+SCROLL_DELETE = _status({"succeeded": True, "num_freed": 5})
+SCROLL_LOGS = [[_log1], [_log2]]
+
+SCROLL_REQUESTS = [
+  [
+    "5m", 1, {
+      "sort": "_doc",
+      "query": {
+        "range": {
+          "datetime": {
+            "lt": "2018-04-02T00:00:00",
+            "gte": "2018-03-08T00:00:00"
+          }
+        }
+      }
+    }
+  ],
+  [{"scroll": "5m", "scroll_id": _scroll_id}],
+  [{"scroll":"5m", "scroll_id": _scroll_id}],
+  [{"scroll_id": [_scroll_id]}],
+]
+
+SCROLL_RESPONSES = [SCROLL_CREATE, SCROLL_GET, SCROLL_GET_2, SCROLL_DELETE]
--- a/data/logs_model/test/test_combined_model.py
+++ b/data/logs_model/test/test_combined_model.py
@ -0,0 +1,130 @@
+from datetime import date, datetime, timedelta
+
+from freezegun import freeze_time
+
+from data.logs_model.inmemory_model import InMemoryModel
+from data.logs_model.combined_model import CombinedLogsModel
+
+from test.fixtures import *
+
+
+@pytest.fixture()
+def first_model():
+  return InMemoryModel()
+
+
+@pytest.fixture()
+def second_model():
+  return InMemoryModel()
+
+
+@pytest.fixture()
+def combined_model(first_model, second_model, initialized_db):
+  return CombinedLogsModel(first_model, second_model)
+
+
+def test_log_action(first_model, second_model, combined_model, initialized_db):
+  day = date(2019, 1, 1)
+
+  # Write to the combined model.
+  with freeze_time(day):
+    combined_model.log_action('push_repo', namespace_name='devtable', repository_name='simple',
+                              ip='1.2.3.4')
+
+  simple_repo = model.repository.get_repository('devtable', 'simple')
+
+  # Make sure it is found in the first model but not the second.
+  assert combined_model.count_repository_actions(simple_repo, day) == 1
+  assert first_model.count_repository_actions(simple_repo, day) == 1
+  assert second_model.count_repository_actions(simple_repo, day) == 0
+
+
+def test_count_repository_actions(first_model, second_model, combined_model, initialized_db):
+  # Write to each model.
+  first_model.log_action('push_repo', namespace_name='devtable', repository_name='simple',
+                         ip='1.2.3.4')
+  first_model.log_action('push_repo', namespace_name='devtable', repository_name='simple',
+                         ip='1.2.3.4')
+  first_model.log_action('push_repo', namespace_name='devtable', repository_name='simple',
+                         ip='1.2.3.4')
+
+  second_model.log_action('push_repo', namespace_name='devtable', repository_name='simple',
+                          ip='1.2.3.4')
+  second_model.log_action('push_repo', namespace_name='devtable', repository_name='simple',
+                          ip='1.2.3.4')
+
+  # Ensure the counts match as expected.
+  day = datetime.today() - timedelta(minutes=60)
+  simple_repo = model.repository.get_repository('devtable', 'simple')
+
+  assert first_model.count_repository_actions(simple_repo, day) == 3
+  assert second_model.count_repository_actions(simple_repo, day) == 2
+  assert combined_model.count_repository_actions(simple_repo, day) == 5
+
+
+def test_yield_logs_for_export(first_model, second_model, combined_model, initialized_db):
+  now = datetime.now()
+
+  # Write to each model.
+  first_model.log_action('push_repo', namespace_name='devtable', repository_name='simple',
+                         ip='1.2.3.4')
+  first_model.log_action('push_repo', namespace_name='devtable', repository_name='simple',
+                         ip='1.2.3.4')
+  first_model.log_action('push_repo', namespace_name='devtable', repository_name='simple',
+                         ip='1.2.3.4')
+
+  second_model.log_action('push_repo', namespace_name='devtable', repository_name='simple',
+                          ip='1.2.3.4')
+  second_model.log_action('push_repo', namespace_name='devtable', repository_name='simple',
+                          ip='1.2.3.4')
+
+  later = datetime.now()
+
+  # Ensure the full set of logs is yielded.
+  first_logs = list(first_model.yield_logs_for_export(now, later))[0]
+  second_logs = list(second_model.yield_logs_for_export(now, later))[0]
+
+  combined = list(combined_model.yield_logs_for_export(now, later))
+  full_combined = []
+  for subset in combined:
+    full_combined.extend(subset)
+
+  assert len(full_combined) == len(first_logs) + len(second_logs)
+  assert full_combined == (first_logs + second_logs)
+
+
+def test_lookup_logs(first_model, second_model, combined_model, initialized_db):
+  now = datetime.now()
+
+  # Write to each model.
+  first_model.log_action('push_repo', namespace_name='devtable', repository_name='simple',
+                         ip='1.2.3.4')
+  first_model.log_action('push_repo', namespace_name='devtable', repository_name='simple',
+                         ip='1.2.3.4')
+  first_model.log_action('push_repo', namespace_name='devtable', repository_name='simple',
+                         ip='1.2.3.4')
+
+  second_model.log_action('push_repo', namespace_name='devtable', repository_name='simple',
+                          ip='1.2.3.4')
+  second_model.log_action('push_repo', namespace_name='devtable', repository_name='simple',
+                          ip='1.2.3.4')
+
+  later = datetime.now()
+
+  def _collect_logs(model):
+    page_token = None
+    all_logs = []
+    while True:
+      paginated_logs = model.lookup_logs(now, later, page_token=page_token)
+      page_token = paginated_logs.next_page_token
+      all_logs.extend(paginated_logs.logs)
+      if page_token is None:
+        break
+    return all_logs
+
+  first_logs = _collect_logs(first_model)
+  second_logs = _collect_logs(second_model)
+  combined = _collect_logs(combined_model)
+
+  assert len(combined) == len(first_logs) + len(second_logs)
+  assert combined == (first_logs + second_logs)
--- a/data/logs_model/test/test_elasticsearch.py
+++ b/data/logs_model/test/test_elasticsearch.py
@ -0,0 +1,529 @@
+# -*- coding: utf-8 -*-
+
+# pylint: disable=redefined-outer-name, wildcard-import
+
+import json
+from datetime import datetime, timedelta
+
+import pytest
+from mock import patch, Mock
+from dateutil.parser import parse
+
+from httmock import urlmatch, HTTMock
+
+from data.model.log import _json_serialize
+from data.logs_model.elastic_logs import ElasticsearchLogs, INDEX_NAME_PREFIX, INDEX_DATE_FORMAT
+from data.logs_model import configure, LogsModelProxy
+from mock_elasticsearch import *
+
+FAKE_ES_HOST = 'fakees'
+FAKE_ES_HOST_PATTERN = r'fakees.*'
+FAKE_ES_PORT = 443
+FAKE_AWS_ACCESS_KEY = None
+FAKE_AWS_SECRET_KEY = None
+FAKE_AWS_REGION = None
+
+@pytest.fixture()
+def logs_model_config():
+  conf = {
+    'LOGS_MODEL': 'elasticsearch',
+    'LOGS_MODEL_CONFIG': {
+      'producer': 'elasticsearch',
+      'elasticsearch_config': {
+        'host': FAKE_ES_HOST,
+        'port': FAKE_ES_PORT,
+        'access_key': FAKE_AWS_ACCESS_KEY,
+        'secret_key': FAKE_AWS_SECRET_KEY,
+        'aws_region': FAKE_AWS_REGION
+      }
+    }
+  }
+  return conf
+
+
+FAKE_LOG_ENTRY_KINDS = {'push_repo': 1, 'pull_repo': 2}
+FAKE_NAMESPACES = {
+  'user1':
+    Mock(id=1, organization="user1.organization", username="user1.username", email="user1.email",
+         robot="user1.robot"),
+  'user2':
+    Mock(id=2, organization="user2.organization", username="user2.username", email="user2.email",
+         robot="user2.robot")
+}
+FAKE_REPOSITORIES = {
+  'user1/repo1': Mock(id=1, namespace_user=FAKE_NAMESPACES['user1']),
+  'user2/repo2': Mock(id=2, namespace_user=FAKE_NAMESPACES['user2']),
+}
+
+
+@pytest.fixture()
+def logs_model():
+  # prevent logs model from changing
+  logs_model = LogsModelProxy()
+  with patch('data.logs_model.logs_model', logs_model):
+    yield logs_model
+
+
+@pytest.fixture(scope='function')
+def app_config(logs_model_config):
+  fake_config = {}
+  fake_config.update(logs_model_config)
+  with patch("data.logs_model.document_logs_model.config.app_config", fake_config):
+    yield fake_config
+
+
+@pytest.fixture()
+def mock_page_size():
+  with patch('data.logs_model.document_logs_model.PAGE_SIZE', 1):
+    yield
+
+
+@pytest.fixture()
+def mock_max_result_window():
+  with patch('data.logs_model.document_logs_model.DEFAULT_RESULT_WINDOW', 1):
+    yield
+
+
+@pytest.fixture
+def mock_random_id():
+  mock_random = Mock(return_value=233)
+  with patch('data.logs_model.document_logs_model._random_id', mock_random):
+    yield
+
+
+@pytest.fixture()
+def mock_db_model():
+  def get_user_map_by_ids(namespace_ids):
+    mapping = {}
+    for i in namespace_ids:
+      for name in FAKE_NAMESPACES:
+        if FAKE_NAMESPACES[name].id == i:
+          mapping[i] = FAKE_NAMESPACES[name]
+    return mapping
+
+  model = Mock(
+    user=Mock(
+      get_namespace_user=FAKE_NAMESPACES.get,
+      get_user_or_org=FAKE_NAMESPACES.get,
+      get_user=FAKE_NAMESPACES.get,
+      get_user_map_by_ids=get_user_map_by_ids,
+    ),
+    repository=Mock(get_repository=lambda user_name, repo_name: FAKE_REPOSITORIES.get(
+      user_name + '/' + repo_name),
+                    ),
+    log=Mock(
+      _get_log_entry_kind=lambda name: FAKE_LOG_ENTRY_KINDS[name],
+      _json_serialize=_json_serialize,
+      get_log_entry_kinds=Mock(return_value=FAKE_LOG_ENTRY_KINDS),
+    ),
+  )
+
+  with patch('data.logs_model.document_logs_model.model', model), patch(
+      'data.logs_model.datatypes.model', model):
+    yield
+
+
+def parse_query(query):
+  return {s.split('=')[0]: s.split('=')[1] for s in query.split("&") if s != ""}
+
+
+@pytest.fixture()
+def mock_elasticsearch():
+  mock = Mock()
+  mock.template.side_effect = NotImplementedError
+  mock.index.side_effect = NotImplementedError
+  mock.count.side_effect = NotImplementedError
+  mock.scroll_get.side_effect = NotImplementedError
+  mock.scroll_delete.side_effect = NotImplementedError
+  mock.search_scroll_create.side_effect = NotImplementedError
+  mock.search_aggs.side_effect = NotImplementedError
+  mock.search_after.side_effect = NotImplementedError
+  mock.list_indices.side_effect = NotImplementedError
+
+  @urlmatch(netloc=r'.*', path=r'.*')
+  def default(url, req):
+    raise Exception('\nurl={}\nmethod={}\nreq.url={}\nheaders={}\nbody={}'.format(
+      url, req.method, req.url, req.headers, req.body))
+
+  @urlmatch(netloc=FAKE_ES_HOST_PATTERN, path=r'/_template/.*')
+  def template(url, req):
+    return mock.template(url.query.split('/')[-1], req.body)
+
+  @urlmatch(netloc=FAKE_ES_HOST_PATTERN, path=r'/logentry_(\*|[0-9\-]+)')
+  def list_indices(url, req):
+    return mock.list_indices()
+
+  @urlmatch(netloc=FAKE_ES_HOST_PATTERN, path=r'/logentry_[0-9\-]*/_doc')
+  def index(url, req):
+    index = url.path.split('/')[1]
+    body = json.loads(req.body)
+    body['metadata_json'] = json.loads(body['metadata_json'])
+    return mock.index(index, body)
+
+  @urlmatch(netloc=FAKE_ES_HOST_PATTERN, path=r'/logentry_([0-9\-]*|\*)/_count')
+  def count(_, req):
+    return mock.count(json.loads(req.body))
+
+  @urlmatch(netloc=FAKE_ES_HOST_PATTERN, path=r'/_search/scroll')
+  def scroll(url, req):
+    if req.method == 'DELETE':
+      return mock.scroll_delete(json.loads(req.body))
+    elif req.method == 'GET':
+      request_obj = json.loads(req.body)
+      return mock.scroll_get(request_obj)
+    raise NotImplementedError()
+
+  @urlmatch(netloc=FAKE_ES_HOST_PATTERN, path=r'/logentry_(\*|[0-9\-]*)/_search')
+  def search(url, req):
+    if "scroll" in url.query:
+      query = parse_query(url.query)
+      window_size = query['scroll']
+      maximum_result_size = int(query['size'])
+      return mock.search_scroll_create(window_size, maximum_result_size, json.loads(req.body))
+    elif "aggs" in req.body:
+      return mock.search_aggs(json.loads(req.body))
+    else:
+      return mock.search_after(json.loads(req.body))
+
+  with HTTMock(scroll, count, search, index, template, list_indices, default):
+    yield mock
+
+
+@pytest.mark.parametrize(
+  """
+  unlogged_pulls_ok, kind_name, namespace_name, repository, repository_name,
+  timestamp,
+  index_response, expected_request, throws
+  """,
+  [
+    # Invalid inputs
+    pytest.param(
+      False, 'non-existing', None, None, None,
+      None,
+      None, None, True,
+      id="Invalid Kind"
+    ),
+    pytest.param(
+      False, 'pull_repo', 'user1', Mock(id=1), 'repo1',
+      None,
+      None, None, True,
+      id="Invalid Parameters"
+    ),
+
+    # Remote exceptions
+    pytest.param(
+      False, 'pull_repo', 'user1', Mock(id=1), None,
+      None,
+      FAILURE_400, None, True,
+      id="Throw on pull log failure"
+    ),
+    pytest.param(
+      True, 'pull_repo', 'user1', Mock(id=1), None,
+      parse("2017-03-08T03:30"),
+      FAILURE_400, INDEX_REQUEST_2017_03_08, False,
+      id="Ok on pull log failure"
+    ),
+
+    # Success executions
+    pytest.param(
+      False, 'pull_repo', 'user1', Mock(id=1), None,
+      parse("2017-03-08T03:30"),
+      INDEX_RESPONSE_2017_03_08, INDEX_REQUEST_2017_03_08, False,
+      id="Log with namespace name and repository"
+    ),
+    pytest.param(
+      False, 'push_repo', 'user1', None, 'repo1',
+      parse("2019-01-01T03:30"),
+      INDEX_RESPONSE_2019_01_01, INDEX_REQUEST_2019_01_01, False,
+      id="Log with namespace name and repository name"
+    ),
+  ])
+def test_log_action(unlogged_pulls_ok, kind_name, namespace_name, repository, repository_name,
+                    timestamp,
+                    index_response, expected_request, throws,
+                    app_config, logs_model, mock_elasticsearch, mock_db_model, mock_random_id):
+  mock_elasticsearch.template = Mock(return_value=DEFAULT_TEMPLATE_RESPONSE)
+  mock_elasticsearch.index = Mock(return_value=index_response)
+  app_config['ALLOW_PULLS_WITHOUT_STRICT_LOGGING'] = unlogged_pulls_ok
+  configure(app_config)
+
+  performer = Mock(id=1)
+  ip = "192.168.1.1"
+  metadata = {'key': 'value', 'time': parse("2018-03-08T03:30"), '😂': '😂👌👌👌👌'}
+  if throws:
+    with pytest.raises(Exception):
+      logs_model.log_action(kind_name, namespace_name, performer, ip, metadata, repository,
+                            repository_name, timestamp)
+  else:
+    logs_model.log_action(kind_name, namespace_name, performer, ip, metadata, repository,
+                          repository_name, timestamp)
+    mock_elasticsearch.index.assert_called_with(*expected_request)
+
+
+@pytest.mark.parametrize(
+  """
+  start_datetime, end_datetime,
+  performer_name, repository_name, namespace_name,
+  filter_kinds,
+  page_token,
+  max_page_count,
+  search_response,
+  list_indices_response,
+  expected_request,
+  expected_page,
+  throws
+  """,
+  [
+    # 1st page
+    pytest.param(
+      parse('2018-03-08T03:30'), parse('2018-04-08T03:30'),
+      'user1', 'repo1', 'user1',
+      None,
+      None,
+      None,
+      SEARCH_RESPONSE_START,
+      INDEX_LIST_RESPONSE_HIT1_HIT2,
+      SEARCH_REQUEST_START,
+      SEARCH_PAGE_START,
+      False,
+      id="1st page"
+    ),
+
+    # Last page
+    pytest.param(
+      parse('2018-03-08T03:30'), parse('2018-04-08T03:30'),
+      'user1', 'repo1', 'user1',
+      None,
+      SEARCH_PAGE_TOKEN,
+      None,
+      SEARCH_RESPONSE_END,
+      INDEX_LIST_RESPONSE_HIT1_HIT2,
+      SEARCH_REQUEST_END,
+      SEARCH_PAGE_END,
+      False,
+      id="Search using pagination token"
+    ),
+
+    # Filter
+    pytest.param(
+      parse('2018-03-08T03:30'), parse('2018-04-08T03:30'),
+      'user1', 'repo1', 'user1',
+      ['push_repo'],
+      None,
+      None,
+      SEARCH_RESPONSE_END,
+      INDEX_LIST_RESPONSE_HIT2,
+      SEARCH_REQUEST_FILTER,
+      SEARCH_PAGE_END,
+      False,
+      id="Filtered search"
+    ),
+
+    # Max page count
+    pytest.param(
+      parse('2018-03-08T03:30'), parse('2018-04-08T03:30'),
+      'user1', 'repo1', 'user1',
+      None,
+      SEARCH_PAGE_TOKEN,
+      1,
+      AssertionError, # Assert that it should not reach the ES server
+      None,
+      None,
+      SEARCH_PAGE_EMPTY,
+      False,
+      id="Page token reaches maximum page count",
+     ),
+  ])
+def test_lookup_logs(start_datetime, end_datetime,
+                     performer_name, repository_name, namespace_name,
+                     filter_kinds,
+                     page_token,
+                     max_page_count,
+                     search_response,
+                     list_indices_response,
+                     expected_request,
+                     expected_page,
+                     throws,
+                     logs_model, mock_elasticsearch, mock_db_model, mock_page_size, app_config):
+  mock_elasticsearch.template = Mock(return_value=DEFAULT_TEMPLATE_RESPONSE)
+  mock_elasticsearch.search_after = Mock(return_value=search_response)
+  mock_elasticsearch.list_indices = Mock(return_value=list_indices_response)
+
+  configure(app_config)
+  if throws:
+    with pytest.raises(Exception):
+      logs_model.lookup_logs(start_datetime, end_datetime, performer_name, repository_name,
+                             namespace_name, filter_kinds, page_token, max_page_count)
+  else:
+    page = logs_model.lookup_logs(start_datetime, end_datetime, performer_name, repository_name,
+                                  namespace_name, filter_kinds, page_token, max_page_count)
+    assert page == expected_page
+    if expected_request:
+      mock_elasticsearch.search_after.assert_called_with(expected_request)
+
+
+@pytest.mark.parametrize(
+  """
+  start_datetime, end_datetime,
+  performer_name, repository_name, namespace_name,
+  filter_kinds, search_response, expected_request, expected_counts, throws
+  """,
+  [
+    # Valid
+    pytest.param(
+      parse('2018-03-08T03:30'), parse('2018-04-08T03:30'),
+      'user1', 'repo1', 'user1',
+      ['pull_repo'], AGGS_RESPONSE, AGGS_REQUEST, AGGS_COUNT, False,
+      id="Valid Counts"
+    ),
+
+    # Invalid case: date range too big
+    pytest.param(
+      parse('2018-03-08T03:30'), parse('2018-04-09T03:30'),
+      'user1', 'repo1', 'user1',
+      [], None, None, None, True,
+      id="Throw on date range too big"
+    )
+  ])
+def test_get_aggregated_log_counts(start_datetime, end_datetime,
+                                   performer_name, repository_name, namespace_name,
+                                   filter_kinds, search_response, expected_request, expected_counts, throws,
+                                   logs_model, mock_elasticsearch, mock_db_model, app_config):
+  mock_elasticsearch.template = Mock(return_value=DEFAULT_TEMPLATE_RESPONSE)
+  mock_elasticsearch.search_aggs = Mock(return_value=search_response)
+
+  configure(app_config)
+  if throws:
+    with pytest.raises(Exception):
+      logs_model.get_aggregated_log_counts(start_datetime, end_datetime, performer_name,
+                                           repository_name, namespace_name, filter_kinds)
+  else:
+    counts = logs_model.get_aggregated_log_counts(start_datetime, end_datetime, performer_name,
+                                                  repository_name, namespace_name, filter_kinds)
+    assert set(counts) == set(expected_counts)
+    if expected_request:
+      mock_elasticsearch.search_aggs.assert_called_with(expected_request)
+
+
+@pytest.mark.parametrize(
+  """
+  repository,
+  day,
+  count_response, expected_request, expected_count, throws
+  """,
+  [
+    pytest.param(
+      FAKE_REPOSITORIES['user1/repo1'],
+      parse("2018-03-08").date(),
+      COUNT_RESPONSE, COUNT_REQUEST, 1, False,
+      id="Valid Count with 1 as result"),
+  ])
+def test_count_repository_actions(repository,
+                                  day,
+                                  count_response, expected_request, expected_count, throws,
+                                  logs_model, mock_elasticsearch, mock_db_model, app_config):
+  mock_elasticsearch.template = Mock(return_value=DEFAULT_TEMPLATE_RESPONSE)
+  mock_elasticsearch.count = Mock(return_value=count_response)
+  mock_elasticsearch.list_indices = Mock(return_value=INDEX_LIST_RESPONSE)
+
+  configure(app_config)
+  if throws:
+    with pytest.raises(Exception):
+      logs_model.count_repository_actions(repository, day)
+  else:
+    count = logs_model.count_repository_actions(repository, day)
+    assert count == expected_count
+    if expected_request:
+      mock_elasticsearch.count.assert_called_with(expected_request)
+
+
+@pytest.mark.parametrize(
+  """
+  start_datetime, end_datetime,
+  repository_id, namespace_id,
+  max_query_time, scroll_responses, expected_requests, expected_logs, throws
+  """,
+  [
+    pytest.param(
+      parse("2018-03-08"), parse("2018-04-02"),
+      1, 1,
+      timedelta(seconds=10), SCROLL_RESPONSES, SCROLL_REQUESTS, SCROLL_LOGS, False,
+      id="Scroll 3 pages with page size = 1"
+    ),
+  ])
+def test_yield_logs_for_export(start_datetime, end_datetime,
+                               repository_id, namespace_id,
+                               max_query_time, scroll_responses, expected_requests, expected_logs, throws,
+                               logs_model, mock_elasticsearch, mock_db_model, mock_max_result_window, app_config):
+  mock_elasticsearch.template = Mock(return_value=DEFAULT_TEMPLATE_RESPONSE)
+  mock_elasticsearch.search_scroll_create = Mock(return_value=scroll_responses[0])
+  mock_elasticsearch.scroll_get = Mock(side_effect=scroll_responses[1:-1])
+  mock_elasticsearch.scroll_delete = Mock(return_value=scroll_responses[-1])
+
+  configure(app_config)
+  if throws:
+    with pytest.raises(Exception):
+      logs_model.yield_logs_for_export(start_datetime, end_datetime, max_query_time=max_query_time)
+  else:
+    log_generator = logs_model.yield_logs_for_export(start_datetime, end_datetime,
+                                                     max_query_time=max_query_time)
+    counter = 0
+    for logs in log_generator:
+      if counter == 0:
+        mock_elasticsearch.search_scroll_create.assert_called_with(*expected_requests[counter])
+      else:
+        mock_elasticsearch.scroll_get.assert_called_with(*expected_requests[counter])
+      assert expected_logs[counter] == logs
+      counter += 1
+    # the last two requests must be
+    # 1. get with response scroll with 0 hits, which indicates the termination condition
+    # 2. delete scroll request
+    mock_elasticsearch.scroll_get.assert_called_with(*expected_requests[-2])
+    mock_elasticsearch.scroll_delete.assert_called_with(*expected_requests[-1])
+
+
+@pytest.mark.parametrize('prefix, is_valid', [
+  pytest.param('..', False, id='Invalid `..`'),
+  pytest.param('.', False, id='Invalid `.`'),
+  pytest.param('-prefix', False, id='Invalid prefix start -'),
+  pytest.param('_prefix', False, id='Invalid prefix start _'),
+  pytest.param('+prefix', False, id='Invalid prefix start +'),
+  pytest.param('prefix_with_UPPERCASES', False, id='Invalid uppercase'),
+  pytest.param('valid_index', True, id='Valid prefix'),
+  pytest.param('valid_index_with_numbers1234', True, id='Valid prefix with numbers'),
+  pytest.param('a'*256, False, id='Prefix too long')
+])
+def test_valid_index_prefix(prefix, is_valid):
+  assert ElasticsearchLogs._valid_index_prefix(prefix) == is_valid
+
+
+@pytest.mark.parametrize('index, cutoff_date, expected_result', [
+  pytest.param(
+    INDEX_NAME_PREFIX+'2019-06-06',
+    datetime(2019, 6, 8),
+    True,
+    id="Index older than cutoff"
+  ),
+  pytest.param(
+    INDEX_NAME_PREFIX+'2019-06-06',
+    datetime(2019, 6, 4),
+    False,
+    id="Index younger than cutoff"
+  ),
+  pytest.param(
+    INDEX_NAME_PREFIX+'2019-06-06',
+    datetime(2019, 6, 6, 23),
+    False,
+    id="Index older than cutoff but timedelta less than 1 day"
+  ),
+  pytest.param(
+    INDEX_NAME_PREFIX+'2019-06-06',
+    datetime(2019, 6, 7),
+    True,
+    id="Index older than cutoff by exactly one day"
+  ),
+])
+def test_can_delete_index(index, cutoff_date, expected_result):
+  es = ElasticsearchLogs(index_prefix=INDEX_NAME_PREFIX)
+  assert datetime.strptime(index.split(es._index_prefix, 1)[-1], INDEX_DATE_FORMAT)
+  assert es.can_delete_index(index, cutoff_date) == expected_result
--- a/data/logs_model/test/test_logs_interface.py
+++ b/data/logs_model/test/test_logs_interface.py
@ -0,0 +1,473 @@
+from datetime import datetime, timedelta, date
+from data.logs_model.datatypes import AggregatedLogCount
+from data.logs_model.table_logs_model import TableLogsModel
+from data.logs_model.combined_model import CombinedLogsModel
+from data.logs_model.inmemory_model import InMemoryModel
+from data.logs_model.combined_model import _merge_aggregated_log_counts
+from data.logs_model.document_logs_model import _date_range_in_single_index, DocumentLogsModel
+from data.logs_model.interface import LogsIterationTimeout
+from data.logs_model.test.fake_elasticsearch import FAKE_ES_HOST, fake_elasticsearch
+
+from data.database import LogEntry, LogEntry2, LogEntry3, LogEntryKind
+from data import model
+
+from test.fixtures import *
+
+
+@pytest.fixture()
+def mock_page_size():
+  page_size = 2
+  with patch('data.logs_model.document_logs_model.PAGE_SIZE', page_size):
+    yield page_size
+
+
+@pytest.fixture()
+def clear_db_logs(initialized_db):
+  LogEntry.delete().execute()
+  LogEntry2.delete().execute()
+  LogEntry3.delete().execute()
+
+
+def combined_model():
+  return CombinedLogsModel(TableLogsModel(), InMemoryModel())
+
+
+def es_model():
+  return DocumentLogsModel(producer='elasticsearch', elasticsearch_config={
+    'host': FAKE_ES_HOST,
+    'port': 12345,
+  })
+
+@pytest.fixture()
+def fake_es():
+  with fake_elasticsearch():
+    yield
+
+
+@pytest.fixture(params=[TableLogsModel, InMemoryModel, es_model, combined_model])
+def logs_model(request, clear_db_logs, fake_es):
+  return request.param()
+
+
+def _lookup_logs(logs_model, start_time, end_time, **kwargs):
+  logs_found = []
+  page_token = None
+  while True:
+    found = logs_model.lookup_logs(start_time, end_time, page_token=page_token, **kwargs)
+    logs_found.extend(found.logs)
+    page_token = found.next_page_token
+    if not found.logs or not page_token:
+      break
+
+  assert len(logs_found) == len(set(logs_found))
+  return logs_found
+
+
+@pytest.mark.skipif(os.environ.get('TEST_DATABASE_URI', '').find('mysql') >= 0, 
+                    reason='Flaky on MySQL')
+@pytest.mark.parametrize('namespace_name, repo_name, performer_name, check_args, expect_results', [
+  pytest.param('devtable', 'simple', 'devtable', {}, True, id='no filters'),
+  pytest.param('devtable', 'simple', 'devtable', {
+    'performer_name': 'devtable',
+  }, True, id='matching performer'),
+
+  pytest.param('devtable', 'simple', 'devtable', {
+    'namespace_name': 'devtable',
+  }, True, id='matching namespace'),
+
+  pytest.param('devtable', 'simple', 'devtable', {
+    'namespace_name': 'devtable',
+    'repository_name': 'simple',
+  }, True, id='matching repository'),
+
+  pytest.param('devtable', 'simple', 'devtable', {
+    'performer_name': 'public',
+  }, False, id='different performer'),
+
+  pytest.param('devtable', 'simple', 'devtable', {
+    'namespace_name': 'public',
+  }, False, id='different namespace'),
+
+  pytest.param('devtable', 'simple', 'devtable', {
+    'namespace_name': 'devtable',
+    'repository_name': 'complex',
+  }, False, id='different repository'),
+])
+def test_logs(namespace_name, repo_name, performer_name, check_args, expect_results, logs_model):
+  # Add some logs.
+  kinds = list(LogEntryKind.select())
+  user = model.user.get_user(performer_name)
+
+  start_timestamp = datetime.utcnow()
+  timestamp = start_timestamp
+
+  for kind in kinds:
+    for index in range(0, 3):
+      logs_model.log_action(kind.name, namespace_name=namespace_name, repository_name=repo_name,
+                            performer=user, ip='1.2.3.4', timestamp=timestamp)
+      timestamp = timestamp + timedelta(seconds=1)
+
+  found = _lookup_logs(logs_model, start_timestamp, start_timestamp + timedelta(minutes=10),
+                       **check_args)
+  if expect_results:
+    assert len(found) == len(kinds) * 3
+  else:
+    assert not found
+
+  aggregated_counts = logs_model.get_aggregated_log_counts(start_timestamp,
+                                                           start_timestamp + timedelta(minutes=10),
+                                                           **check_args)
+  if expect_results:
+    assert len(aggregated_counts) == len(kinds)
+    for ac in aggregated_counts:
+      assert ac.count == 3
+  else:
+    assert not aggregated_counts
+
+
+@pytest.mark.parametrize('filter_kinds, expect_results', [
+  pytest.param(None, True),
+  pytest.param(['push_repo'], True, id='push_repo filter'),
+  pytest.param(['pull_repo'], True, id='pull_repo filter'),
+  pytest.param(['push_repo', 'pull_repo'], False, id='push and pull filters')
+])
+def test_lookup_latest_logs(filter_kinds, expect_results, logs_model):
+  kind_map = model.log.get_log_entry_kinds()
+  if filter_kinds:
+    ignore_ids = [kind_map[kind_name] for kind_name in filter_kinds if filter_kinds]
+  else:
+    ignore_ids = []
+
+  now = datetime.now()
+  namespace_name = 'devtable'
+  repo_name = 'simple'
+  performer_name = 'devtable'
+
+  user = model.user.get_user(performer_name)
+  size = 3
+
+  # Log some push actions
+  logs_model.log_action('push_repo', namespace_name=namespace_name, repository_name=repo_name,
+                        performer=user, ip='0.0.0.0', timestamp=now-timedelta(days=1, seconds=11))
+  logs_model.log_action('push_repo', namespace_name=namespace_name, repository_name=repo_name,
+                        performer=user, ip='0.0.0.0', timestamp=now-timedelta(days=7, seconds=33))
+
+  # Log some pull actions
+  logs_model.log_action('pull_repo', namespace_name=namespace_name, repository_name=repo_name,
+                        performer=user, ip='0.0.0.0', timestamp=now-timedelta(days=0, seconds=3))
+  logs_model.log_action('pull_repo', namespace_name=namespace_name, repository_name=repo_name,
+                        performer=user, ip='0.0.0.0', timestamp=now-timedelta(days=3, seconds=55))
+  logs_model.log_action('pull_repo', namespace_name=namespace_name, repository_name=repo_name,
+                        performer=user, ip='0.0.0.0', timestamp=now-timedelta(days=5, seconds=3))
+  logs_model.log_action('pull_repo', namespace_name=namespace_name, repository_name=repo_name,
+                        performer=user, ip='0.0.0.0', timestamp=now-timedelta(days=11, seconds=11))
+
+  # Get the latest logs
+  latest_logs = logs_model.lookup_latest_logs(performer_name, repo_name, namespace_name,
+                                              filter_kinds=filter_kinds, size=size)
+
+  # Test max lookup size
+  assert len(latest_logs) <= size
+
+  # Make sure that the latest logs returned are in decreasing order
+  assert all(x >= y for x, y in zip(latest_logs, latest_logs[1:]))
+
+  if expect_results:
+    assert latest_logs
+
+    # Lookup all logs filtered by kinds and sort them in reverse chronological order
+    all_logs = _lookup_logs(logs_model, now - timedelta(days=30), now + timedelta(days=30),
+                            filter_kinds=filter_kinds, namespace_name=namespace_name,
+                            repository_name=repo_name)
+    all_logs = sorted(all_logs, key=lambda l: l.datetime, reverse=True)
+
+    # Check that querying all logs does not return the filtered kinds
+    assert all([log.kind_id not in ignore_ids for log in all_logs])
+
+    # Check that the latest logs contains only th most recent ones
+    assert latest_logs == all_logs[:len(latest_logs)]
+
+
+def test_count_repository_actions(logs_model):
+  # Log some actions.
+  logs_model.log_action('push_repo', namespace_name='devtable', repository_name='simple',
+                        ip='1.2.3.4')
+  logs_model.log_action('pull_repo', namespace_name='devtable', repository_name='simple',
+                        ip='1.2.3.4')
+  logs_model.log_action('pull_repo', namespace_name='devtable', repository_name='simple',
+                        ip='1.2.3.4')
+
+  # Log some actions to a different repo.
+  logs_model.log_action('pull_repo', namespace_name='devtable', repository_name='complex',
+                        ip='1.2.3.4')
+  logs_model.log_action('pull_repo', namespace_name='devtable', repository_name='complex',
+                        ip='1.2.3.4')
+
+  # Count the actions.
+  day = date.today()
+  simple_repo = model.repository.get_repository('devtable', 'simple')
+
+  count = logs_model.count_repository_actions(simple_repo, day)
+  assert count == 3
+
+  complex_repo = model.repository.get_repository('devtable', 'complex')
+  count = logs_model.count_repository_actions(complex_repo, day)
+  assert count == 2
+
+  # Try counting actions for a few days in the future to ensure it doesn't raise an error.
+  count = logs_model.count_repository_actions(simple_repo, day + timedelta(days=5))
+  assert count == 0
+
+
+def test_yield_log_rotation_context(logs_model):
+  cutoff_date = datetime.now()
+  min_logs_per_rotation = 3
+
+  # Log some actions to be archived
+  # One day
+  logs_model.log_action('push_repo', namespace_name='devtable', repository_name='simple1',
+                        ip='1.2.3.4', timestamp=cutoff_date-timedelta(days=1, seconds=1))
+  logs_model.log_action('pull_repo', namespace_name='devtable', repository_name='simple2',
+                        ip='5.6.7.8', timestamp=cutoff_date-timedelta(days=1, seconds=2))
+  logs_model.log_action('pull_repo', namespace_name='devtable', repository_name='simple3',
+                        ip='9.10.11.12', timestamp=cutoff_date-timedelta(days=1, seconds=3))
+  logs_model.log_action('pull_repo', namespace_name='devtable', repository_name='simple4',
+                        ip='0.0.0.0', timestamp=cutoff_date-timedelta(days=1, seconds=4))
+  # Another day
+  logs_model.log_action('pull_repo', namespace_name='devtable', repository_name='simple5',
+                        ip='1.1.1.1', timestamp=cutoff_date-timedelta(days=2, seconds=1))
+  logs_model.log_action('pull_repo', namespace_name='devtable', repository_name='simple5',
+                        ip='1.1.1.1', timestamp=cutoff_date-timedelta(days=2, seconds=2))
+  logs_model.log_action('pull_repo', namespace_name='devtable', repository_name='simple5',
+                        ip='1.1.1.1', timestamp=cutoff_date-timedelta(days=2, seconds=3))
+
+  found = _lookup_logs(logs_model, cutoff_date - timedelta(days=3), cutoff_date + timedelta(days=1))
+  assert found is not None and len(found) == 7
+
+  # Iterate the logs using the log rotation contexts
+  all_logs = []
+  for log_rotation_context in logs_model.yield_log_rotation_context(cutoff_date,
+                                                                    min_logs_per_rotation):
+    with log_rotation_context as context:
+      for logs, _ in context.yield_logs_batch():
+        all_logs.extend(logs)
+
+  assert len(all_logs) == 7
+  found = _lookup_logs(logs_model, cutoff_date - timedelta(days=3), cutoff_date + timedelta(days=1))
+  assert not found
+
+  # Make sure all datetimes are monotonically increasing (by datetime) after sorting the lookup
+  # to make sure no duplicates were returned
+  all_logs.sort(key=lambda d: d.datetime)
+  assert all(x.datetime < y.datetime for x, y in zip(all_logs, all_logs[1:]))
+
+
+def test_count_repository_actions_with_wildcard_disabled(initialized_db):
+  with fake_elasticsearch(allow_wildcard=False):
+    logs_model = es_model()
+
+    # Log some actions.
+    logs_model.log_action('push_repo', namespace_name='devtable', repository_name='simple',
+                          ip='1.2.3.4')
+
+    logs_model.log_action('pull_repo', namespace_name='devtable', repository_name='simple',
+                          ip='1.2.3.4')
+    logs_model.log_action('pull_repo', namespace_name='devtable', repository_name='simple',
+                          ip='1.2.3.4')
+
+    # Log some actions to a different repo.
+    logs_model.log_action('pull_repo', namespace_name='devtable', repository_name='complex',
+                          ip='1.2.3.4')
+    logs_model.log_action('pull_repo', namespace_name='devtable', repository_name='complex',
+                          ip='1.2.3.4')
+
+    # Count the actions.
+    day = date.today()
+    simple_repo = model.repository.get_repository('devtable', 'simple')
+
+    count = logs_model.count_repository_actions(simple_repo, day)
+    assert count == 3
+
+    complex_repo = model.repository.get_repository('devtable', 'complex')
+    count = logs_model.count_repository_actions(complex_repo, day)
+    assert count == 2
+
+    # Try counting actions for a few days in the future to ensure it doesn't raise an error.
+    count = logs_model.count_repository_actions(simple_repo, day + timedelta(days=5))
+    assert count == 0
+
+
+@pytest.mark.skipif(os.environ.get('TEST_DATABASE_URI', '').find('mysql') >= 0, 
+                    reason='Flaky on MySQL')
+def test_yield_logs_for_export(logs_model):
+  # Add some logs.
+  kinds = list(LogEntryKind.select())
+  user = model.user.get_user('devtable')
+
+  start_timestamp = datetime.utcnow()
+  timestamp = start_timestamp
+
+  for kind in kinds:
+    for index in range(0, 10):
+      logs_model.log_action(kind.name, namespace_name='devtable', repository_name='simple',
+                            performer=user, ip='1.2.3.4', timestamp=timestamp)
+      timestamp = timestamp + timedelta(seconds=1)
+
+  # Yield the logs.
+  simple_repo = model.repository.get_repository('devtable', 'simple')
+  logs_found = []
+  for logs in logs_model.yield_logs_for_export(start_timestamp, timestamp + timedelta(minutes=10),
+                                               repository_id=simple_repo.id):
+    logs_found.extend(logs)
+
+  # Ensure we found all added logs.
+  assert len(logs_found) == len(kinds) * 10
+
+
+def test_yield_logs_for_export_timeout(logs_model):
+  # Add some logs.
+  kinds = list(LogEntryKind.select())
+  user = model.user.get_user('devtable')
+
+  start_timestamp = datetime.utcnow()
+  timestamp = start_timestamp
+
+  for kind in kinds:
+    for _ in range(0, 2):
+      logs_model.log_action(kind.name, namespace_name='devtable', repository_name='simple',
+                            performer=user, ip='1.2.3.4', timestamp=timestamp)
+      timestamp = timestamp + timedelta(seconds=1)
+
+  # Yield the logs. Since we set the timeout to nothing, it should immediately fail.
+  simple_repo = model.repository.get_repository('devtable', 'simple')
+  with pytest.raises(LogsIterationTimeout):
+    list(logs_model.yield_logs_for_export(start_timestamp, timestamp + timedelta(minutes=1),
+                                          repository_id=simple_repo.id,
+                                          max_query_time=timedelta(seconds=0)))
+
+
+def test_disabled_namespace(clear_db_logs):
+  logs_model = TableLogsModel(lambda kind, namespace, is_free: namespace == 'devtable')
+
+  # Log some actions.
+  logs_model.log_action('push_repo', namespace_name='devtable', repository_name='simple',
+                        ip='1.2.3.4')
+
+  logs_model.log_action('pull_repo', namespace_name='devtable', repository_name='simple',
+                        ip='1.2.3.4')
+  logs_model.log_action('pull_repo', namespace_name='devtable', repository_name='simple',
+                        ip='1.2.3.4')
+
+  # Log some actions to a different namespace.
+  logs_model.log_action('push_repo', namespace_name='buynlarge', repository_name='orgrepo',
+                        ip='1.2.3.4')
+
+  logs_model.log_action('pull_repo', namespace_name='buynlarge', repository_name='orgrepo',
+                        ip='1.2.3.4')
+  logs_model.log_action('pull_repo', namespace_name='buynlarge', repository_name='orgrepo',
+                        ip='1.2.3.4')
+
+  # Count the actions.
+  day = datetime.today() - timedelta(minutes=60)
+  simple_repo = model.repository.get_repository('devtable', 'simple')
+  count = logs_model.count_repository_actions(simple_repo, day)
+  assert count == 0
+
+  org_repo = model.repository.get_repository('buynlarge', 'orgrepo')
+  count = logs_model.count_repository_actions(org_repo, day)
+  assert count == 3
+
+
+@pytest.mark.parametrize('aggregated_log_counts1, aggregated_log_counts2, expected_result', [
+  pytest.param(
+    [
+      AggregatedLogCount(1, 3, datetime(2019, 6, 6, 0, 0)), # 1
+      AggregatedLogCount(1, 3, datetime(2019, 6, 7, 0, 0)), # 2
+    ],
+    [
+      AggregatedLogCount(1, 5, datetime(2019, 6, 6, 0, 0)), # 1
+      AggregatedLogCount(1, 7, datetime(2019, 6, 7, 0, 0)), # 2
+      AggregatedLogCount(3, 3, datetime(2019, 6, 1, 0, 0)), # 3
+    ],
+    [
+      AggregatedLogCount(1, 8, datetime(2019, 6, 6, 0, 0)), # 1
+      AggregatedLogCount(1, 10, datetime(2019, 6, 7, 0, 0)), # 2
+      AggregatedLogCount(3, 3, datetime(2019, 6, 1, 0, 0)) # 3
+    ]
+  ),
+  pytest.param(
+    [
+      AggregatedLogCount(1, 3, datetime(2019, 6, 6, 0, 0)), # 1
+    ],
+    [
+      AggregatedLogCount(1, 7, datetime(2019, 6, 7, 0, 0)), # 2
+    ],
+    [
+      AggregatedLogCount(1, 3, datetime(2019, 6, 6, 0, 0)), # 1
+      AggregatedLogCount(1, 7, datetime(2019, 6, 7, 0, 0)), # 2
+    ]
+  ),
+  pytest.param(
+    [],
+    [AggregatedLogCount(1, 3, datetime(2019, 6, 6, 0, 0))],
+    [AggregatedLogCount(1, 3, datetime(2019, 6, 6, 0, 0))]
+  ),
+])
+def test_merge_aggregated_log_counts(aggregated_log_counts1, aggregated_log_counts2, expected_result):
+  assert (sorted(_merge_aggregated_log_counts(aggregated_log_counts1, aggregated_log_counts2)) ==
+          sorted(expected_result))
+
+
+@pytest.mark.parametrize('dt1, dt2, expected_result', [
+  # Valid dates
+  pytest.param(date(2019, 6, 17), date(2019, 6, 18), True),
+
+  # Invalid dates
+  pytest.param(date(2019, 6, 17), date(2019, 6, 17), False),
+  pytest.param(date(2019, 6, 17), date(2019, 6, 19), False),
+  pytest.param(date(2019, 6, 18), date(2019, 6, 17), False),
+
+  # Valid datetimes 
+  pytest.param(datetime(2019, 6, 17, 0, 1), datetime(2019, 6, 17, 0, 2), True),
+
+  # Invalid datetimes
+  pytest.param(datetime(2019, 6, 17, 0, 2), datetime(2019, 6, 17, 0, 1), False),
+  pytest.param(datetime(2019, 6, 17, 11), datetime(2019, 6, 17, 11) + timedelta(hours=14), False),
+])
+def test_date_range_in_single_index(dt1, dt2, expected_result):
+  assert _date_range_in_single_index(dt1, dt2) == expected_result
+
+
+def test_pagination(logs_model, mock_page_size):
+  """
+  Make sure that pagination does not stop if searching through multiple indices by day,
+  and the current log count matches the page size while there are still indices to be searched.
+  """
+  day1 = datetime.now()
+  day2 = day1 + timedelta(days=1)
+  day3 = day2 + timedelta(days=1)
+
+  # Log some actions in day indices
+  # One day
+  logs_model.log_action('push_repo', namespace_name='devtable', repository_name='simple1',
+                        ip='1.2.3.4', timestamp=day1)
+  logs_model.log_action('pull_repo', namespace_name='devtable', repository_name='simple1',
+                        ip='5.6.7.8', timestamp=day1)
+
+  found = _lookup_logs(logs_model, day1-timedelta(seconds=1), day3+timedelta(seconds=1))
+  assert len(found) == mock_page_size
+
+  # Another day
+  logs_model.log_action('pull_repo', namespace_name='devtable', repository_name='simple2',
+                        ip='1.1.1.1', timestamp=day2)
+  logs_model.log_action('pull_repo', namespace_name='devtable', repository_name='simple2',
+                        ip='0.0.0.0', timestamp=day2)
+
+  # Yet another day
+  logs_model.log_action('pull_repo', namespace_name='devtable', repository_name='simple2',
+                        ip='1.1.1.1', timestamp=day3)
+  logs_model.log_action('pull_repo', namespace_name='devtable', repository_name='simple2',
+                        ip='0.0.0.0', timestamp=day3)
+
+  found = _lookup_logs(logs_model, day1-timedelta(seconds=1), day3+timedelta(seconds=1))
+  assert len(found) == 6
--- a/data/logs_model/test/test_logs_producer.py
+++ b/data/logs_model/test/test_logs_producer.py
@ -0,0 +1,77 @@
+import logging
+import pytest
+from dateutil.parser import parse
+from mock import patch, Mock
+
+import botocore
+
+from data.logs_model import configure
+
+from test_elasticsearch import app_config, logs_model_config, logs_model, mock_elasticsearch, mock_db_model
+from mock_elasticsearch import *
+
+
+logger = logging.getLogger(__name__)
+
+FAKE_KAFKA_BROKERS = ['fake_server1', 'fake_server2']
+FAKE_KAFKA_TOPIC = 'sometopic'
+FAKE_MAX_BLOCK_SECONDS = 1
+
+@pytest.fixture()
+def kafka_logs_producer_config(app_config):
+  producer_config = {}
+  producer_config.update(app_config)
+  
+  kafka_config = {
+    'bootstrap_servers': FAKE_KAFKA_BROKERS,
+    'topic': FAKE_KAFKA_TOPIC,
+    'max_block_seconds': FAKE_MAX_BLOCK_SECONDS
+  }
+
+  producer_config['LOGS_MODEL_CONFIG']['producer'] = 'kafka'
+  producer_config['LOGS_MODEL_CONFIG']['kafka_config'] = kafka_config
+  return producer_config
+
+
+@pytest.fixture()
+def kinesis_logs_producer_config(app_config):
+  producer_config = {}
+  producer_config.update(app_config)
+  
+  kinesis_stream_config = {
+    'stream_name': 'test-stream',
+    'aws_region': 'fake_region',
+    'aws_access_key': 'some_key',
+    'aws_secret_key': 'some_secret'
+  }
+
+  producer_config['LOGS_MODEL_CONFIG']['producer'] = 'kinesis_stream'
+  producer_config['LOGS_MODEL_CONFIG']['kinesis_stream_config'] = kinesis_stream_config
+  return producer_config
+
+
+def test_kafka_logs_producers(logs_model, mock_elasticsearch, mock_db_model, kafka_logs_producer_config):
+  mock_elasticsearch.template = Mock(return_value=DEFAULT_TEMPLATE_RESPONSE)
+
+  producer_config = kafka_logs_producer_config
+  with patch('kafka.client_async.KafkaClient.check_version'), patch('kafka.KafkaProducer.send') as mock_send:
+    configure(producer_config)
+    logs_model.log_action('pull_repo', 'user1', Mock(id=1), '192.168.1.1', {'key': 'value'},
+                          None, 'repo1', parse("2019-01-01T03:30"))
+    
+    mock_send.assert_called_once()
+
+
+def test_kinesis_logs_producers(logs_model, mock_elasticsearch, mock_db_model, kinesis_logs_producer_config):
+  mock_elasticsearch.template = Mock(return_value=DEFAULT_TEMPLATE_RESPONSE)
+
+  producer_config = kinesis_logs_producer_config
+  with patch('botocore.endpoint.EndpointCreator.create_endpoint'), \
+       patch('botocore.client.BaseClient._make_api_call') as mock_send:
+    configure(producer_config)
+    logs_model.log_action('pull_repo', 'user1', Mock(id=1), '192.168.1.1', {'key': 'value'},
+                          None, 'repo1', parse("2019-01-01T03:30"))
+
+    # Check that a PutRecord api call is made.
+    # NOTE: The second arg of _make_api_call uses a randomized PartitionKey
+    mock_send.assert_called_once_with(u'PutRecord', mock_send.call_args_list[0][0][1])
--- a/data/migrations/init.py
+++ b/data/migrations/init.py
--- a/data/migrations/env.py
+++ b/data/migrations/env.py
@ -0,0 +1,154 @@
+import logging
+import os
+
+from logging.config import fileConfig
+from urllib import unquote
+
+from alembic import context
+from alembic.script.revision import ResolutionError
+from alembic.util import CommandError
+from sqlalchemy import engine_from_config, pool
+from peewee import SqliteDatabase
+
+from data.database import all_models, db
+from data.migrations.tester import NoopTester, PopulateTestDataTester
+from data.model.sqlalchemybridge import gen_sqlalchemy_metadata
+from release import GIT_HEAD, REGION, SERVICE
+from util.morecollections import AttrDict
+from data.migrations.progress import PrometheusReporter, NullReporter
+
+
+config = context.config
+DB_URI = config.get_main_option('db_uri', 'sqlite:///test/data/test.db')
+PROM_LABEL_PREFIX = 'DBA_OP_LABEL_'
+
+
+# This option exists because alembic needs the db proxy to be configured in order
+# to perform migrations. The app import does the init of the proxy, but we don't
+# want that in the case of the config app, as we are explicitly connecting to a
+# db that the user has passed in, and we can't have import dependency on app
+if config.get_main_option('alembic_setup_app', 'True') == 'True':
+  from app import app
+  DB_URI = app.config['DB_URI']
+
+config.set_main_option('sqlalchemy.url', unquote(DB_URI))
+# Interpret the config file for Python logging.
+# This line sets up loggers basically.
+if config.config_file_name:
+  fileConfig(config.config_file_name)
+
+logger = logging.getLogger(__name__)
+
+# add your model's MetaData object here
+# for 'autogenerate' support
+# from myapp import mymodel
+# target_metadata = mymodel.Base.metadata
+target_metadata = gen_sqlalchemy_metadata(all_models)
+tables = AttrDict(target_metadata.tables)
+
+# other values from the config, defined by the needs of env.py,
+# can be acquired:
+# my_important_option = config.get_main_option("my_important_option")
+# ... etc.
+
+def get_tester():
+  """ Returns the tester to use. We only return the tester that populates data
+      if the TEST_MIGRATE env var is set to `true` AND we make sure we're not
+      connecting to a production database.
+  """
+  if os.environ.get('TEST_MIGRATE', '') == 'true':
+    url = unquote(DB_URI)
+    if url.find('amazonaws.com') < 0:
+      return PopulateTestDataTester()
+
+  return NoopTester()
+
+def get_progress_reporter():
+  prom_addr = os.environ.get('DBA_OP_PROMETHEUS_PUSH_GATEWAY_ADDR', None)
+
+  if prom_addr is not None:
+    prom_job = os.environ.get('DBA_OP_JOB_ID')
+
+    def _process_label_key(label_key):
+      return label_key[len(PROM_LABEL_PREFIX):].lower()
+    labels = {_process_label_key(k): v for k, v in os.environ.items()
+              if k.startswith(PROM_LABEL_PREFIX)}
+
+    return PrometheusReporter(prom_addr, prom_job, labels)
+  else:
+      return NullReporter()
+
+def report_success(ctx=None, step=None, heads=None, run_args=None):
+  progress_reporter = run_args['progress_reporter']
+  progress_reporter.report_version_complete(success=True)
+
+def run_migrations_offline():
+  """Run migrations in 'offline' mode.
+
+  This configures the context with just a URL
+  and not an Engine, though an Engine is acceptable
+  here as well.  By skipping the Engine creation
+  we don't even need a DBAPI to be available.
+
+  Calls to context.execute() here emit the given string to the
+  script output.
+
+  """
+  url = unquote(DB_URI)
+  context.configure(url=url, target_metadata=target_metadata, transactional_ddl=True)
+
+  with context.begin_transaction():
+    context.run_migrations(tables=tables, tester=get_tester(), progress_reporter=NullReporter())
+
+def run_migrations_online():
+  """Run migrations in 'online' mode.
+
+  In this scenario we need to create an Engine
+  and associate a connection with the context.
+
+  """
+
+  if (isinstance(db.obj, SqliteDatabase) and
+      not 'GENMIGRATE' in os.environ and
+      not 'DB_URI' in os.environ):
+    print 'Skipping Sqlite migration!'
+    return
+
+  progress_reporter = get_progress_reporter()
+  engine = engine_from_config(config.get_section(config.config_ini_section),
+                              prefix='sqlalchemy.',
+                              poolclass=pool.NullPool)
+
+  connection = engine.connect()
+  context.configure(connection=connection,
+                    target_metadata=target_metadata,
+                    transactional_ddl=False,
+                    on_version_apply=report_success)
+
+  try:
+    with context.begin_transaction():
+      try:
+        context.run_migrations(tables=tables, tester=get_tester(),
+                               progress_reporter=progress_reporter)
+      except (CommandError, ResolutionError) as ex:
+        if 'No such revision' not in str(ex):
+          raise
+
+        if not REGION or not GIT_HEAD:
+          raise
+
+        from data.model.release import get_recent_releases
+
+        # ignore revision error if we're running the previous release
+        releases = list(get_recent_releases(SERVICE, REGION).offset(1).limit(1))
+        if releases and releases[0].version == GIT_HEAD:
+          logger.warn('Skipping database migration because revision not found')
+        else:
+          raise
+  finally:
+    connection.close()
+
+if context.is_offline_mode():
+  run_migrations_offline()
+else:
+  run_migrations_online()
--- a/data/migrations/migration.sh
+++ b/data/migrations/migration.sh
@ -0,0 +1,147 @@
+set -e
+
+PARSED_DOCKER_HOST=`echo $DOCKER_HOST | sed 's/tcp:\/\///' | sed 's/:.*//'`
+DOCKER_IP="${PARSED_DOCKER_HOST:-127.0.0.1}"
+MYSQL_CONFIG_OVERRIDE="{\"DB_URI\":\"mysql+pymysql://root:password@$DOCKER_IP/genschema\"}"
+PERCONA_CONFIG_OVERRIDE="{\"DB_URI\":\"mysql+pymysql://root:password@$DOCKER_IP/genschema\"}"
+PGSQL_CONFIG_OVERRIDE="{\"DB_URI\":\"postgresql://postgres@$DOCKER_IP/genschema\"}"
+
+up_mysql() {
+  # Run a SQL database on port 3306 inside of Docker.
+  docker run --name mysql -p 3306:3306 -e MYSQL_ROOT_PASSWORD=password -d mysql:5.7
+
+  echo 'Sleeping for 25...'
+  sleep 25
+
+  # Add the database to mysql.
+  docker run --rm --link mysql:mysql mysql:5.7 sh -c 'echo "create database genschema" | mysql -h"$MYSQL_PORT_3306_TCP_ADDR" -P"$MYSQL_PORT_3306_TCP_PORT" -uroot -ppassword'
+}
+
+down_mysql() {
+  docker kill mysql || true
+  docker rm -v mysql || true
+}
+
+up_mariadb() {
+  # Run a SQL database on port 3306 inside of Docker.
+  docker run --name mariadb -p 3306:3306 -e MYSQL_ROOT_PASSWORD=password -d mariadb
+
+  echo 'Sleeping for 25...'
+  sleep 25
+
+  # Add the database to mysql.
+  docker run --rm --link mariadb:mariadb mariadb sh -c 'echo "create database genschema" | mysql -h"$MARIADB_PORT_3306_TCP_ADDR" -P"$MARIADB_PORT_3306_TCP_PORT" -uroot -ppassword'
+}
+
+down_mariadb() {
+  docker kill mariadb || true
+  docker rm -v mariadb || true
+}
+
+up_percona() {
+  # Run a SQL database on port 3306 inside of Docker.
+  docker run --name percona -p 3306:3306 -e MYSQL_ROOT_PASSWORD=password -d percona
+
+  echo 'Sleeping for 25...'
+  sleep 25
+
+  # Add the daabase to mysql.
+  docker run --rm --link percona:percona percona sh -c 'echo "create database genschema" | mysql -h $PERCONA_PORT_3306_TCP_ADDR -uroot -ppassword'
+}
+
+down_percona() {
+  docker kill percona || true
+  docker rm -v percona || true
+}
+
+up_postgres() {
+  # Run a SQL database on port 5432 inside of Docker.
+  docker run --name postgres -p 5432:5432 -d postgres
+
+  # Sleep for 5s to get SQL get started.
+  echo 'Sleeping for 5...'
+  sleep 5
+
+  # Add the database to postgres.
+  docker run --rm --link postgres:postgres postgres sh -c 'echo "create database genschema" | psql -h "$POSTGRES_PORT_5432_TCP_ADDR" -p "$POSTGRES_PORT_5432_TCP_PORT" -U postgres'
+  docker run --rm --link postgres:postgres postgres sh -c 'echo "CREATE EXTENSION IF NOT EXISTS pg_trgm;" | psql -h "$POSTGRES_PORT_5432_TCP_ADDR" -p "$POSTGRES_PORT_5432_TCP_PORT" -U postgres -d genschema'
+
+}
+
+down_postgres() {
+  docker kill postgres || true
+  docker rm -v postgres || true
+}
+
+gen_migrate() {
+  # Generate a database with the schema as defined by the existing alembic model.
+  QUAY_OVERRIDE_CONFIG=$1 PYTHONPATH=. alembic upgrade head
+
+
+  # Generate the migration to the current model.
+  QUAY_OVERRIDE_CONFIG=$1 PYTHONPATH=. alembic revision --autogenerate -m "$2"
+}
+
+test_migrate() {
+  # Generate a database with the schema as defined by the existing alembic model.
+  echo '> Running upgrade'
+  TEST_MIGRATE=true QUAY_OVERRIDE_CONFIG=$1 PYTHONPATH=. alembic upgrade head
+
+  # Downgrade to verify it works in both directions.
+  echo '> Running downgrade'
+  COUNT=`ls data/migrations/versions/*.py | wc -l | tr -d ' '`
+  TEST_MIGRATE=true QUAY_OVERRIDE_CONFIG=$1 PYTHONPATH=. alembic downgrade "-$COUNT"
+}
+
+down_mysql
+down_postgres
+down_mariadb
+down_percona
+
+# Test (and generate, if requested) via MySQL.
+echo '> Starting MySQL'
+up_mysql
+
+if [ ! -z "$@" ]
+  then
+    set +e
+    echo '> Generating Migration'
+    gen_migrate $MYSQL_CONFIG_OVERRIDE "$@"
+    set -e
+  fi
+
+echo '> Testing Migration (mysql)'
+set +e
+test_migrate $MYSQL_CONFIG_OVERRIDE
+set -e
+down_mysql
+
+# Test via Postgres.
+echo '> Starting Postgres'
+up_postgres
+
+echo '> Testing Migration (postgres)'
+set +e
+test_migrate $PGSQL_CONFIG_OVERRIDE
+set -e
+down_postgres
+
+# Test via MariaDB.
+echo '> Starting MariaDB'
+up_mariadb
+
+echo '> Testing Migration (mariadb)'
+set +e
+test_migrate $MYSQL_CONFIG_OVERRIDE
+set -e
+down_mariadb
+
+# Test via Percona.
+echo '> Starting Percona'
+up_percona
+
+echo '> Testing Migration (percona)'
+set +e
+test_migrate $PERCONA_CONFIG_OVERRIDE
+set -e
+down_percona
--- a/data/migrations/progress.py
+++ b/data/migrations/progress.py
@ -0,0 +1,101 @@
+from abc import ABCMeta, abstractmethod
+from six import add_metaclass
+from functools import partial, wraps
+
+from prometheus_client import CollectorRegistry, Gauge, Counter, push_to_gateway
+
+from util.abchelpers import nooper
+
+
+@add_metaclass(ABCMeta)
+class ProgressReporter(object):
+  """ Implements an interface for reporting progress with the migrations.
+  """
+  @abstractmethod
+  def report_version_complete(self, success):
+    """ Called when an entire migration is complete. """
+
+  @abstractmethod
+  def report_step_progress(self):
+    """ Called when a single step in the migration has been completed. """
+
+
+@nooper
+class NullReporter(ProgressReporter):
+  """ No-op version of the progress reporter, designed for use when no progress
+      reporting endpoint is provided. """
+
+
+class PrometheusReporter(ProgressReporter):
+  def __init__(self, prom_pushgateway_addr, prom_job, labels, total_steps_num=None):
+    self._total_steps_num = total_steps_num
+    self._completed_steps = 0.0
+
+    registry = CollectorRegistry()
+
+    self._migration_completion_percent = Gauge(
+      'migration_completion_percent',
+      'Estimate of the completion percentage of the job',
+      registry=registry,
+    )
+    self._migration_complete_total = Counter(
+      'migration_complete_total',
+      'Binary value of whether or not the job is complete',
+      registry=registry,
+    )
+    self._migration_failed_total = Counter(
+      'migration_failed_total',
+      'Binary value of whether or not the job has failed',
+      registry=registry,
+    )
+    self._migration_items_completed_total = Counter(
+      'migration_items_completed_total',
+      'Number of items this migration has completed',
+      registry=registry,
+    )
+
+    self._push = partial(push_to_gateway,
+                         prom_pushgateway_addr,
+                         job=prom_job,
+                         registry=registry,
+                         grouping_key=labels,
+                        )
+
+  def report_version_complete(self, success=True):
+    if success:
+      self._migration_complete_total.inc()
+    else:
+      self._migration_failed_total.inc()
+      self._migration_completion_percent.set(1.0)
+
+    self._push()
+
+  def report_step_progress(self):
+    self._migration_items_completed_total.inc()
+
+    if self._total_steps_num is not None:
+      self._completed_steps += 1
+      self._migration_completion_percent = self._completed_steps / self._total_steps_num
+
+    self._push()
+
+
+class ProgressWrapper(object):
+  def __init__(self, delegate_module, progress_monitor):
+    self._delegate_module = delegate_module
+    self._progress_monitor = progress_monitor
+
+  def __getattr__(self, attr_name):
+     # Will raise proper attribute error
+    maybe_callable = self._delegate_module.__dict__[attr_name]
+    if callable(maybe_callable):
+      # Build a callable which when executed places the request
+      # onto a queue
+      @wraps(maybe_callable)
+      def wrapped_method(*args, **kwargs):
+        result = maybe_callable(*args, **kwargs)
+        self._progress_monitor.report_step_progress()
+        return result
+
+      return wrapped_method
+    return maybe_callable
--- a/data/migrations/script.py.mako
+++ b/data/migrations/script.py.mako
@ -0,0 +1,27 @@
+"""${message}
+
+Revision ID: ${up_revision}
+Revises: ${down_revision}
+Create Date: ${create_date}
+
+"""
+
+# revision identifiers, used by Alembic.
+revision = ${repr(up_revision)}
+down_revision = ${repr(down_revision)}
+
+from alembic import op as original_op
+from progress import ProgressWrapper
+import sqlalchemy as sa
+${imports if imports else ""}
+
+def upgrade(tables, tester, progress_reporter):
+    op = ProgressWrapper(original_op, progress_reporter)
+
+    ${upgrades if upgrades else "pass"}
+
+
+def downgrade(tables, tester, progress_reporter):
+    op = ProgressWrapper(original_op, progress_reporter)
+
+    ${downgrades if downgrades else "pass"}
--- a/data/migrations/test/test_db_config.py
+++ b/data/migrations/test/test_db_config.py
@ -0,0 +1,21 @@
+import pytest
+from mock import patch
+
+from data.runmigration import run_alembic_migration
+from alembic.script import ScriptDirectory
+from test.fixtures import *
+
+@pytest.mark.parametrize('db_uri, is_valid', [
+  ('postgresql://devtable:password@quay-postgres/registry_database', True),
+  ('postgresql://devtable:password%25@quay-postgres/registry_database', False),
+  ('postgresql://devtable:password%%25@quay-postgres/registry_database', True),
+  ('postgresql://devtable@db:password@quay-postgres/registry_database', True),
+])
+def test_alembic_db_uri(db_uri, is_valid):
+  """ Test if the given URI is escaped for string interpolation (Python's configparser). """
+  with patch('alembic.script.ScriptDirectory.run_env') as m:
+    if is_valid:
+      run_alembic_migration(db_uri)
+    else:
+      with pytest.raises(ValueError):
+        run_alembic_migration(db_uri)
--- a/data/migrations/tester.py
+++ b/data/migrations/tester.py
@ -0,0 +1,132 @@
+import json
+import logging
+import uuid
+
+from abc import ABCMeta, abstractmethod
+from datetime import datetime
+from six import add_metaclass
+
+from alembic import op
+from sqlalchemy import text
+
+from util.abchelpers import nooper
+
+logger = logging.getLogger(__name__)
+
+def escape_table_name(table_name):
+  if op.get_bind().engine.name == 'postgresql':
+    # Needed for the `user` table.
+    return '"%s"' % table_name
+
+  return table_name
+
+
+class DataTypes(object):
+  @staticmethod
+  def DateTime():
+    return datetime.now()
+
+  @staticmethod
+  def Date():
+    return datetime.now()
+
+  @staticmethod
+  def String():
+    return 'somestringvalue'
+
+  @staticmethod
+  def Token():
+    return '%s%s' % ('a' * 60, 'b' * 60)
+
+  @staticmethod
+  def UTF8Char():
+    return 'some other value'
+
+  @staticmethod
+  def UUID():
+    return str(uuid.uuid4())
+
+  @staticmethod
+  def JSON():
+    return json.dumps(dict(foo='bar', baz='meh'))
+
+  @staticmethod
+  def Boolean():
+    if op.get_bind().engine.name == 'postgresql':
+      return True
+
+    return 1
+
+  @staticmethod
+  def BigInteger():
+    return 21474836470
+
+  @staticmethod
+  def Integer():
+    return 42
+
+  @staticmethod
+  def Constant(value):
+    def get_value():
+      return value
+    return get_value
+
+  @staticmethod
+  def Foreign(table_name):
+    def get_index():
+      result = op.get_bind().execute("SELECT id FROM %s LIMIT 1" % escape_table_name(table_name))
+      try:
+        return list(result)[0][0]
+      except IndexError:
+        raise Exception('Could not find row for table %s' % table_name)
+      finally:
+        result.close()
+
+    return get_index
+
+
+@add_metaclass(ABCMeta)
+class MigrationTester(object):
+  """ Implements an interface for adding testing capabilities to the
+      data model migration system in Alembic.
+  """
+  TestDataType = DataTypes
+
+  @abstractmethod
+  def populate_table(self, table_name, fields):
+    """ Called to populate a table with the given fields filled in with testing data. """
+
+  @abstractmethod
+  def populate_column(self, table_name, col_name, field_type):
+    """ Called to populate a column in a table to be filled in with testing data. """
+
+
+@nooper
+class NoopTester(MigrationTester):
+  """ No-op version of the tester, designed for production workloads. """
+
+
+class PopulateTestDataTester(MigrationTester):
+  def populate_table(self, table_name, fields):
+    columns = {field_name: field_type() for field_name, field_type in fields}
+    field_name_vars = [':' + field_name for field_name, _ in fields]
+
+    if op.get_bind().engine.name == 'postgresql':
+      field_names = ["%s" % field_name for field_name, _ in fields]
+    else:
+      field_names = ["`%s`" % field_name for field_name, _ in fields]
+
+    table_name = escape_table_name(table_name)
+    query = text('INSERT INTO %s (%s) VALUES (%s)' % (table_name, ', '.join(field_names),
+                                                      ', '.join(field_name_vars)))
+    logger.info("Executing test query %s with values %s", query, columns.values())
+    op.get_bind().execute(query, **columns)
+
+  def populate_column(self, table_name, col_name, field_type):
+    col_value = field_type()
+    row_id = DataTypes.Foreign(table_name)()
+
+    table_name = escape_table_name(table_name)
+    update_text = text("UPDATE %s SET %s=:col_value where ID=:row_id" % (table_name, col_name))
+    logger.info("Executing test query %s with value %s on row %s", update_text, col_value, row_id)
+    op.get_bind().execute(update_text, col_value=col_value, row_id=row_id)
--- a/data/migrations/versions/0cf50323c78b_add_creation_date_to_user_table.py
+++ b/data/migrations/versions/0cf50323c78b_add_creation_date_to_user_table.py
@ -0,0 +1,33 @@
+"""Add creation date to User table
+
+Revision ID: 0cf50323c78b
+Revises: 87fbbc224f10
+Create Date: 2018-03-09 13:19:41.903196
+
+"""
+
+# revision identifiers, used by Alembic.
+revision = '0cf50323c78b'
+down_revision = '87fbbc224f10'
+
+from alembic import op as original_op
+from data.migrations.progress import ProgressWrapper
+import sqlalchemy as sa
+
+
+def upgrade(tables, tester, progress_reporter):
+    op = ProgressWrapper(original_op, progress_reporter)
+    # ### commands auto generated by Alembic - please adjust! ###
+    op.add_column('user', sa.Column('creation_date', sa.DateTime(), nullable=True))
+    # ### end Alembic commands ###
+
+    # ### population of test data ### #
+    tester.populate_column('user', 'creation_date', tester.TestDataType.DateTime)
+    # ### end population of test data ### #
+
+
+def downgrade(tables, tester, progress_reporter):
+    op = ProgressWrapper(original_op, progress_reporter)
+    # ### commands auto generated by Alembic - please adjust! ###
+    op.drop_column('user', 'creation_date')
+    # ### end Alembic commands ###
--- a/data/migrations/versions/10f45ee2310b_add_tag_tagkind_and_manifestchild_tables.py
+++ b/data/migrations/versions/10f45ee2310b_add_tag_tagkind_and_manifestchild_tables.py
@ -0,0 +1,100 @@
+"""Add Tag, TagKind and ManifestChild tables
+
+Revision ID: 10f45ee2310b
+Revises: 13411de1c0ff
+Create Date: 2018-10-29 15:22:53.552216
+
+"""
+
+# revision identifiers, used by Alembic.
+revision = '10f45ee2310b'
+down_revision = '13411de1c0ff'
+
+from alembic import op as original_op
+from data.migrations.progress import ProgressWrapper
+import sqlalchemy as sa
+from util.migrate import UTF8CharField
+
+def upgrade(tables, tester, progress_reporter):
+    op = ProgressWrapper(original_op, progress_reporter)
+    # ### commands auto generated by Alembic - please adjust! ###
+    op.create_table('tagkind',
+    sa.Column('id', sa.Integer(), nullable=False),
+    sa.Column('name', sa.String(length=255), nullable=False),
+    sa.PrimaryKeyConstraint('id', name=op.f('pk_tagkind'))
+    )
+    op.create_index('tagkind_name', 'tagkind', ['name'], unique=True)
+    op.create_table('manifestchild',
+    sa.Column('id', sa.Integer(), nullable=False),
+    sa.Column('repository_id', sa.Integer(), nullable=False),
+    sa.Column('manifest_id', sa.Integer(), nullable=False),
+    sa.Column('child_manifest_id', sa.Integer(), nullable=False),
+    sa.ForeignKeyConstraint(['child_manifest_id'], ['manifest.id'], name=op.f('fk_manifestchild_child_manifest_id_manifest')),
+    sa.ForeignKeyConstraint(['manifest_id'], ['manifest.id'], name=op.f('fk_manifestchild_manifest_id_manifest')),
+    sa.ForeignKeyConstraint(['repository_id'], ['repository.id'], name=op.f('fk_manifestchild_repository_id_repository')),
+    sa.PrimaryKeyConstraint('id', name=op.f('pk_manifestchild'))
+    )
+    op.create_index('manifestchild_child_manifest_id', 'manifestchild', ['child_manifest_id'], unique=False)
+    op.create_index('manifestchild_manifest_id', 'manifestchild', ['manifest_id'], unique=False)
+    op.create_index('manifestchild_manifest_id_child_manifest_id', 'manifestchild', ['manifest_id', 'child_manifest_id'], unique=True)
+    op.create_index('manifestchild_repository_id', 'manifestchild', ['repository_id'], unique=False)
+    op.create_index('manifestchild_repository_id_child_manifest_id', 'manifestchild', ['repository_id', 'child_manifest_id'], unique=False)
+    op.create_index('manifestchild_repository_id_manifest_id', 'manifestchild', ['repository_id', 'manifest_id'], unique=False)
+    op.create_index('manifestchild_repository_id_manifest_id_child_manifest_id', 'manifestchild', ['repository_id', 'manifest_id', 'child_manifest_id'], unique=False)
+    op.create_table('tag',
+    sa.Column('id', sa.Integer(), nullable=False),
+    sa.Column('name', sa.String(length=255), nullable=False),
+    sa.Column('repository_id', sa.Integer(), nullable=False),
+    sa.Column('manifest_id', sa.Integer(), nullable=True),
+    sa.Column('lifetime_start_ms', sa.BigInteger(), nullable=False),
+    sa.Column('lifetime_end_ms', sa.BigInteger(), nullable=True),
+    sa.Column('hidden', sa.Boolean(), nullable=False, server_default=sa.sql.expression.false()),
+    sa.Column('reversion', sa.Boolean(), nullable=False, server_default=sa.sql.expression.false()),
+    sa.Column('tag_kind_id', sa.Integer(), nullable=False),
+    sa.Column('linked_tag_id', sa.Integer(), nullable=True),
+    sa.ForeignKeyConstraint(['linked_tag_id'], ['tag.id'], name=op.f('fk_tag_linked_tag_id_tag')),
+    sa.ForeignKeyConstraint(['manifest_id'], ['manifest.id'], name=op.f('fk_tag_manifest_id_manifest')),
+    sa.ForeignKeyConstraint(['repository_id'], ['repository.id'], name=op.f('fk_tag_repository_id_repository')),
+    sa.ForeignKeyConstraint(['tag_kind_id'], ['tagkind.id'], name=op.f('fk_tag_tag_kind_id_tagkind')),
+    sa.PrimaryKeyConstraint('id', name=op.f('pk_tag'))
+    )
+    op.create_index('tag_lifetime_end_ms', 'tag', ['lifetime_end_ms'], unique=False)
+    op.create_index('tag_linked_tag_id', 'tag', ['linked_tag_id'], unique=False)
+    op.create_index('tag_manifest_id', 'tag', ['manifest_id'], unique=False)
+    op.create_index('tag_repository_id', 'tag', ['repository_id'], unique=False)
+    op.create_index('tag_repository_id_name', 'tag', ['repository_id', 'name'], unique=False)
+    op.create_index('tag_repository_id_name_hidden', 'tag', ['repository_id', 'name', 'hidden'], unique=False)
+    op.create_index('tag_repository_id_name_lifetime_end_ms', 'tag', ['repository_id', 'name', 'lifetime_end_ms'], unique=True)
+    op.create_index('tag_repository_id_name_tag_kind_id', 'tag', ['repository_id', 'name', 'tag_kind_id'], unique=False)
+    op.create_index('tag_tag_kind_id', 'tag', ['tag_kind_id'], unique=False)
+    # ### end Alembic commands ###
+
+    op.bulk_insert(tables.tagkind,
+    [
+        {'name': 'tag'},
+    ])
+
+    # ### population of test data ### #
+    tester.populate_table('tag', [
+        ('repository_id', tester.TestDataType.Foreign('repository')),
+        ('tag_kind_id', tester.TestDataType.Foreign('tagkind')),
+        ('name', tester.TestDataType.String),
+        ('manifest_id', tester.TestDataType.Foreign('manifest')),
+        ('lifetime_start_ms', tester.TestDataType.BigInteger),
+    ])
+
+    tester.populate_table('manifestchild', [
+        ('repository_id', tester.TestDataType.Foreign('repository')),
+        ('manifest_id', tester.TestDataType.Foreign('manifest')),
+        ('child_manifest_id', tester.TestDataType.Foreign('manifest')),
+    ])
+    # ### end population of test data ### #
+
+
+def downgrade(tables, tester, progress_reporter):
+    op = ProgressWrapper(original_op, progress_reporter)
+    # ### commands auto generated by Alembic - please adjust! ###
+    op.drop_table('tag')
+    op.drop_table('manifestchild')
+    op.drop_table('tagkind')
+    # ### end Alembic commands ###
--- a/data/migrations/versions/13411de1c0ff_remove_unique_from_tagmanifesttomanifest.py
+++ b/data/migrations/versions/13411de1c0ff_remove_unique_from_tagmanifesttomanifest.py
@ -0,0 +1,46 @@
+"""Remove unique from TagManifestToManifest
+
+Revision ID: 13411de1c0ff
+Revises: 654e6df88b71
+Create Date: 2018-08-19 23:30:24.969549
+
+"""
+
+# revision identifiers, used by Alembic.
+revision = '13411de1c0ff'
+down_revision = '654e6df88b71'
+
+from alembic import op as original_op
+from data.migrations.progress import ProgressWrapper
+import sqlalchemy as sa
+
+def upgrade(tables, tester, progress_reporter):
+    op = ProgressWrapper(original_op, progress_reporter)
+    # Note: Because of a restriction in MySQL, we cannot simply remove the index and re-add
+    # it without the unique=False, nor can we simply alter the index. To make it work, we'd have to
+    # remove the primary key on the field, so instead we simply drop the table entirely and
+    # recreate it with the modified index. The backfill will re-fill this in.
+    op.drop_table('tagmanifesttomanifest')
+
+    op.create_table('tagmanifesttomanifest',
+    sa.Column('id', sa.Integer(), nullable=False),
+    sa.Column('tag_manifest_id', sa.Integer(), nullable=False),
+    sa.Column('manifest_id', sa.Integer(), nullable=False),
+    sa.Column('broken', sa.Boolean(), nullable=False, server_default=sa.sql.expression.false()),
+    sa.ForeignKeyConstraint(['manifest_id'], ['manifest.id'], name=op.f('fk_tagmanifesttomanifest_manifest_id_manifest')),
+    sa.ForeignKeyConstraint(['tag_manifest_id'], ['tagmanifest.id'], name=op.f('fk_tagmanifesttomanifest_tag_manifest_id_tagmanifest')),
+    sa.PrimaryKeyConstraint('id', name=op.f('pk_tagmanifesttomanifest'))
+    )
+    op.create_index('tagmanifesttomanifest_broken', 'tagmanifesttomanifest', ['broken'], unique=False)
+    op.create_index('tagmanifesttomanifest_manifest_id', 'tagmanifesttomanifest', ['manifest_id'], unique=False)
+    op.create_index('tagmanifesttomanifest_tag_manifest_id', 'tagmanifesttomanifest', ['tag_manifest_id'], unique=True)
+
+    tester.populate_table('tagmanifesttomanifest', [
+        ('manifest_id', tester.TestDataType.Foreign('manifest')),
+        ('tag_manifest_id', tester.TestDataType.Foreign('tagmanifest')),
+    ])
+
+
+def downgrade(tables, tester, progress_reporter):
+    op = ProgressWrapper(original_op, progress_reporter)
+    pass
--- a/data/migrations/versions/152bb29a1bb3_add_maximum_build_queue_count_setting_.py
+++ b/data/migrations/versions/152bb29a1bb3_add_maximum_build_queue_count_setting_.py
@ -0,0 +1,33 @@
+"""Add maximum build queue count setting to user table
+
+Revision ID: 152bb29a1bb3
+Revises: 7367229b38d9
+Create Date: 2018-02-20 13:34:34.902415
+
+"""
+
+# revision identifiers, used by Alembic.
+revision = '152bb29a1bb3'
+down_revision = 'cbc8177760d9'
+
+from alembic import op as original_op
+from data.migrations.progress import ProgressWrapper
+import sqlalchemy as sa
+from sqlalchemy.dialects import mysql
+
+def upgrade(tables, tester, progress_reporter):
+    op = ProgressWrapper(original_op, progress_reporter)
+    # ### commands auto generated by Alembic - please adjust! ###
+    op.add_column('user', sa.Column('maximum_queued_builds_count', sa.Integer(), nullable=True))
+    # ### end Alembic commands ###
+
+    # ### population of test data ### #
+    tester.populate_column('user', 'maximum_queued_builds_count', tester.TestDataType.Integer)
+    # ### end population of test data ### #
+
+
+def downgrade(tables, tester, progress_reporter):
+    op = ProgressWrapper(original_op, progress_reporter)
+    # ### commands auto generated by Alembic - please adjust! ###
+    op.drop_column('user', 'maximum_queued_builds_count')
+    # ### end Alembic commands ###
--- a/data/migrations/versions/152edccba18c_make_blodupload_byte_count_not_nullable.py
+++ b/data/migrations/versions/152edccba18c_make_blodupload_byte_count_not_nullable.py
@ -0,0 +1,27 @@
+"""Make BlodUpload byte_count not nullable
+
+Revision ID: 152edccba18c
+Revises: c91c564aad34
+Create Date: 2018-02-23 12:41:25.571835
+
+"""
+
+# revision identifiers, used by Alembic.
+revision = '152edccba18c'
+down_revision = 'c91c564aad34'
+
+from alembic import op as original_op
+from data.migrations.progress import ProgressWrapper
+import sqlalchemy as sa
+
+
+def upgrade(tables, tester, progress_reporter):
+    op = ProgressWrapper(original_op, progress_reporter)
+    op.alter_column('blobupload', 'byte_count', existing_type=sa.BigInteger(),
+                    nullable=False)
+
+
+def downgrade(tables, tester, progress_reporter):
+    op = ProgressWrapper(original_op, progress_reporter)
+    op.alter_column('blobupload', 'byte_count', existing_type=sa.BigInteger(),
+                    nullable=True)
--- a/data/migrations/versions/1783530bee68_add_logentry2_table_quay_io_only.py
+++ b/data/migrations/versions/1783530bee68_add_logentry2_table_quay_io_only.py
@ -0,0 +1,49 @@
+"""Add LogEntry2 table - QUAY.IO ONLY
+
+Revision ID: 1783530bee68
+Revises: 5b7503aada1b
+Create Date: 2018-05-17 16:32:28.532264
+
+"""
+
+# revision identifiers, used by Alembic.
+revision = '1783530bee68'
+down_revision = '5b7503aada1b'
+
+from alembic import op as original_op
+from data.migrations.progress import ProgressWrapper
+import sqlalchemy as sa
+
+
+def upgrade(tables, tester, progress_reporter):
+    op = ProgressWrapper(original_op, progress_reporter)
+    # ### commands auto generated by Alembic - please adjust! ###
+    op.create_table('logentry2',
+    sa.Column('id', sa.Integer(), nullable=False),
+    sa.Column('kind_id', sa.Integer(), nullable=False),
+    sa.Column('account_id', sa.Integer(), nullable=False),
+    sa.Column('performer_id', sa.Integer(), nullable=True),
+    sa.Column('repository_id', sa.Integer(), nullable=True),
+    sa.Column('datetime', sa.DateTime(), nullable=False),
+    sa.Column('ip', sa.String(length=255), nullable=True),
+    sa.Column('metadata_json', sa.Text(), nullable=False),
+    sa.ForeignKeyConstraint(['kind_id'], ['logentrykind.id'], name=op.f('fk_logentry2_kind_id_logentrykind')),
+    sa.PrimaryKeyConstraint('id', name=op.f('pk_logentry2'))
+    )
+    op.create_index('logentry2_account_id', 'logentry2', ['account_id'], unique=False)
+    op.create_index('logentry2_account_id_datetime', 'logentry2', ['account_id', 'datetime'], unique=False)
+    op.create_index('logentry2_datetime', 'logentry2', ['datetime'], unique=False)
+    op.create_index('logentry2_kind_id', 'logentry2', ['kind_id'], unique=False)
+    op.create_index('logentry2_performer_id', 'logentry2', ['performer_id'], unique=False)
+    op.create_index('logentry2_performer_id_datetime', 'logentry2', ['performer_id', 'datetime'], unique=False)
+    op.create_index('logentry2_repository_id', 'logentry2', ['repository_id'], unique=False)
+    op.create_index('logentry2_repository_id_datetime', 'logentry2', ['repository_id', 'datetime'], unique=False)
+    op.create_index('logentry2_repository_id_datetime_kind_id', 'logentry2', ['repository_id', 'datetime', 'kind_id'], unique=False)
+    # ### end Alembic commands ###
+
+
+def downgrade(tables, tester, progress_reporter):
+    op = ProgressWrapper(original_op, progress_reporter)
+    # ### commands auto generated by Alembic - please adjust! ###
+    op.drop_table('logentry2')
+    # ### end Alembic commands ###
--- a/data/migrations/versions/17aff2e1354e_add_automatic_disable_of_build_triggers.py
+++ b/data/migrations/versions/17aff2e1354e_add_automatic_disable_of_build_triggers.py
@ -0,0 +1,54 @@
+"""Add automatic disable of build triggers
+
+Revision ID: 17aff2e1354e
+Revises: 61cadbacb9fc
+Create Date: 2017-10-18 15:58:03.971526
+
+"""
+
+# revision identifiers, used by Alembic.
+revision = '17aff2e1354e'
+down_revision = '61cadbacb9fc'
+
+from alembic import op as original_op
+from data.migrations.progress import ProgressWrapper
+import sqlalchemy as sa
+from sqlalchemy.dialects import mysql
+
+def upgrade(tables, tester, progress_reporter):
+    op = ProgressWrapper(original_op, progress_reporter)
+    # ### commands auto generated by Alembic - please adjust! ###
+    op.add_column('repositorybuildtrigger', sa.Column('successive_failure_count', sa.Integer(), server_default='0', nullable=False))
+    op.add_column('repositorybuildtrigger', sa.Column('successive_internal_error_count', sa.Integer(), server_default='0', nullable=False))
+    # ### end Alembic commands ###
+
+    op.bulk_insert(
+        tables.disablereason,
+        [
+        {'id': 2, 'name': 'successive_build_failures'},
+        {'id': 3, 'name': 'successive_build_internal_errors'},
+        ],
+    )
+
+    # ### population of test data ### #
+    tester.populate_column('repositorybuildtrigger', 'successive_failure_count', tester.TestDataType.Integer)
+    tester.populate_column('repositorybuildtrigger', 'successive_internal_error_count', tester.TestDataType.Integer)
+    # ### end population of test data ### #
+
+
+def downgrade(tables, tester, progress_reporter):
+    op = ProgressWrapper(original_op, progress_reporter)
+    # ### commands auto generated by Alembic - please adjust! ###
+    op.drop_column('repositorybuildtrigger', 'successive_internal_error_count')
+    op.drop_column('repositorybuildtrigger', 'successive_failure_count')
+    # ### end Alembic commands ###
+
+    op.execute(tables
+                .disablereason
+                .delete()
+                .where(tables.disablereason.c.name == op.inline_literal('successive_internal_error_count')))
+
+    op.execute(tables
+                .disablereason
+                .delete()
+                .where(tables.disablereason.c.name == op.inline_literal('successive_failure_count')))
--- a/data/migrations/versions/224ce4c72c2f_add_last_accessed_field_to_user_table.py
+++ b/data/migrations/versions/224ce4c72c2f_add_last_accessed_field_to_user_table.py
@ -0,0 +1,35 @@
+"""Add last_accessed field to User table
+
+Revision ID: 224ce4c72c2f
+Revises: b547bc139ad8
+Create Date: 2018-03-12 22:44:07.070490
+
+"""
+
+# revision identifiers, used by Alembic.
+revision = '224ce4c72c2f'
+down_revision = 'b547bc139ad8'
+
+from alembic import op as original_op
+from data.migrations.progress import ProgressWrapper
+import sqlalchemy as sa
+
+
+def upgrade(tables, tester, progress_reporter):
+    op = ProgressWrapper(original_op, progress_reporter)
+    # ### commands auto generated by Alembic - please adjust! ###
+    op.add_column('user', sa.Column('last_accessed', sa.DateTime(), nullable=True))
+    op.create_index('user_last_accessed', 'user', ['last_accessed'], unique=False)
+    # ### end Alembic commands ###
+
+    # ### population of test data ### #
+    tester.populate_column('user', 'last_accessed', tester.TestDataType.DateTime)
+    # ### end population of test data ### #
+
+
+def downgrade(tables, tester, progress_reporter):
+    op = ProgressWrapper(original_op, progress_reporter)
+    # ### commands auto generated by Alembic - please adjust! ###
+    op.drop_index('user_last_accessed', table_name='user')
+    op.drop_column('user', 'last_accessed')
+    # ### end Alembic commands ###
--- a/data/migrations/versions/34c8ef052ec9_repo_mirror_columns.py
+++ b/data/migrations/versions/34c8ef052ec9_repo_mirror_columns.py
@ -0,0 +1,125 @@
+"""repo mirror columns
+
+Revision ID: 34c8ef052ec9
+Revises: c059b952ed76
+Create Date: 2019-10-07 13:11:20.424715
+
+"""
+
+# revision identifiers, used by Alembic.
+revision = '34c8ef052ec9'
+down_revision = 'cc6778199cdb'
+
+from alembic import op
+from alembic import op as original_op
+from data.migrations.progress import ProgressWrapper
+from datetime import datetime
+import sqlalchemy as sa
+from sqlalchemy.dialects import mysql
+from peewee import ForeignKeyField, DateTimeField, BooleanField
+from data.database import (BaseModel, RepoMirrorType, RepoMirrorStatus, RepoMirrorRule, uuid_generator,
+                           QuayUserField, Repository, IntegerField, JSONField)
+from data.fields import EnumField as ClientEnumField, CharField, EncryptedCharField
+
+import logging
+
+logger = logging.getLogger(__name__)
+
+BATCH_SIZE = 10
+
+
+# Original model
+class RepoMirrorConfig(BaseModel):
+  """
+  Represents a repository to be mirrored and any additional configuration
+  required to perform the mirroring.
+  """
+  repository = ForeignKeyField(Repository, index=True, unique=True, backref='mirror')
+  creation_date = DateTimeField(default=datetime.utcnow)
+  is_enabled = BooleanField(default=True)
+
+  # Mirror Configuration
+  mirror_type = ClientEnumField(RepoMirrorType, default=RepoMirrorType.PULL)
+  internal_robot = QuayUserField(allows_robots=True, null=True, backref='mirrorpullrobot',
+                                 robot_null_delete=True)
+  external_reference = CharField()
+  external_registry = CharField()
+  external_namespace = CharField()
+  external_repository = CharField()
+  external_registry_username = EncryptedCharField(max_length=2048, null=True)
+  external_registry_password = EncryptedCharField(max_length=2048, null=True)
+  external_registry_config = JSONField(default={})
+
+  # Worker Queuing
+  sync_interval = IntegerField()  # seconds between syncs
+  sync_start_date = DateTimeField(null=True)  # next start time
+  sync_expiration_date = DateTimeField(null=True)  # max duration
+  sync_retries_remaining = IntegerField(default=3)
+  sync_status = ClientEnumField(RepoMirrorStatus, default=RepoMirrorStatus.NEVER_RUN)
+  sync_transaction_id = CharField(default=uuid_generator, max_length=36)
+
+  # Tag-Matching Rules
+  root_rule = ForeignKeyField(RepoMirrorRule)
+
+
+def _iterate(model_class, clause):
+  while True:
+    has_rows = False
+    for row in list(model_class.select().where(clause).limit(BATCH_SIZE)):
+      has_rows = True
+      yield row
+
+    if not has_rows:
+      break
+
+
+def upgrade(tables, tester, progress_reporter):
+  op = ProgressWrapper(original_op, progress_reporter)
+
+  logger.info('Migrating to external_reference from existing columns')
+
+  op.add_column('repomirrorconfig', sa.Column('external_reference', sa.Text(), nullable=True))
+
+  for repo_mirror in _iterate(RepoMirrorConfig, (RepoMirrorConfig.external_reference >> None)):
+    repo = '%s/%s/%s' % (repo_mirror.external_registry, repo_mirror.external_namespace, repo_mirror.external_repository)
+    logger.info('migrating %s' % repo)
+    repo_mirror.external_reference = repo
+    repo_mirror.save()
+
+  op.drop_column('repomirrorconfig', 'external_registry')
+  op.drop_column('repomirrorconfig', 'external_namespace')
+  op.drop_column('repomirrorconfig', 'external_repository')
+
+  op.alter_column('repomirrorconfig', 'external_reference', nullable=False, existing_type=sa.Text())
+
+
+  tester.populate_column('repomirrorconfig', 'external_reference', tester.TestDataType.String)
+
+
+def downgrade(tables, tester, progress_reporter):
+  op = ProgressWrapper(original_op, progress_reporter)
+
+  '''
+  This will downgrade existing data but may not exactly match previous data structure. If the
+  external_reference does not have three parts (registry, namespace, repository) then a failed
+  value is inserted.
+  '''
+
+  op.add_column('repomirrorconfig', sa.Column('external_registry', sa.String(length=255), nullable=True))
+  op.add_column('repomirrorconfig', sa.Column('external_namespace', sa.String(length=255), nullable=True))
+  op.add_column('repomirrorconfig', sa.Column('external_repository', sa.String(length=255), nullable=True))
+
+  logger.info('Restoring columns from external_reference')
+  for repo_mirror in _iterate(RepoMirrorConfig, (RepoMirrorConfig.external_registry >> None)):
+    logger.info('Restoring %s' % repo_mirror.external_reference)
+    parts = repo_mirror.external_reference.split('/', 2)
+    repo_mirror.external_registry = parts[0] if len(parts) >= 1 else 'DOWNGRADE-FAILED'
+    repo_mirror.external_namespace = parts[1] if len(parts) >= 2 else 'DOWNGRADE-FAILED'
+    repo_mirror.external_repository = parts[2] if len(parts) >= 3 else 'DOWNGRADE-FAILED'
+    repo_mirror.save()
+
+  op.drop_column('repomirrorconfig', 'external_reference')
+
+  op.alter_column('repomirrorconfig', 'external_registry', nullable=False, existing_type=sa.String(length=255))
+  op.alter_column('repomirrorconfig', 'external_namespace', nullable=False, existing_type=sa.String(length=255))
+  op.alter_column('repomirrorconfig', 'external_repository', nullable=False, existing_type=sa.String(length=255))
--- a/data/migrations/versions/3e8cc74a1e7b_add_severity_and_media_type_to_global_.py
+++ b/data/migrations/versions/3e8cc74a1e7b_add_severity_and_media_type_to_global_.py
@ -0,0 +1,63 @@
+"""Add severity and media_type to global messages
+
+Revision ID: 3e8cc74a1e7b
+Revises: fc47c1ec019f
+Create Date: 2017-01-17 16:22:28.584237
+
+"""
+
+# revision identifiers, used by Alembic.
+revision = '3e8cc74a1e7b'
+down_revision = 'fc47c1ec019f'
+
+from alembic import op as original_op
+from data.migrations.progress import ProgressWrapper
+import sqlalchemy as sa
+from sqlalchemy.dialects import mysql
+
+def upgrade(tables, tester, progress_reporter):
+    op = ProgressWrapper(original_op, progress_reporter)
+    # ### commands auto generated by Alembic - please adjust! ###
+    op.add_column('messages', sa.Column('media_type_id', sa.Integer(), nullable=False, server_default='1'))
+    op.add_column('messages', sa.Column('severity', sa.String(length=255), nullable=False, server_default='info'))
+    op.alter_column('messages', 'uuid',
+               existing_type=mysql.VARCHAR(length=36),
+               server_default='',
+               nullable=False)
+    op.create_index('messages_media_type_id', 'messages', ['media_type_id'], unique=False)
+    op.create_index('messages_severity', 'messages', ['severity'], unique=False)
+    op.create_index('messages_uuid', 'messages', ['uuid'], unique=False)
+    op.create_foreign_key(op.f('fk_messages_media_type_id_mediatype'), 'messages', 'mediatype', ['media_type_id'], ['id'])
+    # ### end Alembic commands ###
+
+    op.bulk_insert(tables.mediatype,
+                   [
+                     {'name': 'text/markdown'},
+                   ])
+
+    # ### population of test data ### #
+    tester.populate_column('messages', 'media_type_id', tester.TestDataType.Foreign('mediatype'))
+    tester.populate_column('messages', 'severity', lambda: 'info')
+    tester.populate_column('messages', 'uuid', tester.TestDataType.UUID)
+    # ### end population of test data ### #
+
+
+def downgrade(tables, tester, progress_reporter):
+    op = ProgressWrapper(original_op, progress_reporter)
+    # ### commands auto generated by Alembic - please adjust! ###
+    op.drop_constraint(op.f('fk_messages_media_type_id_mediatype'), 'messages', type_='foreignkey')
+    op.drop_index('messages_uuid', table_name='messages')
+    op.drop_index('messages_severity', table_name='messages')
+    op.drop_index('messages_media_type_id', table_name='messages')
+    op.alter_column('messages', 'uuid',
+               existing_type=mysql.VARCHAR(length=36),
+               nullable=True)
+    op.drop_column('messages', 'severity')
+    op.drop_column('messages', 'media_type_id')
+    # ### end Alembic commands ###
+
+    op.execute(tables
+               .mediatype
+               .delete()
+               .where(tables.
+                      mediatype.c.name == op.inline_literal('text/markdown')))
--- a/data/migrations/versions/45fd8b9869d4_add_notification_type.py
+++ b/data/migrations/versions/45fd8b9869d4_add_notification_type.py
@ -0,0 +1,30 @@
+"""add_notification_type
+
+Revision ID: 45fd8b9869d4
+Revises: 94836b099894
+Create Date: 2016-12-01 12:02:19.724528
+
+"""
+
+# revision identifiers, used by Alembic.
+revision = '45fd8b9869d4'
+down_revision = '94836b099894'
+
+from alembic import op as original_op
+from data.migrations.progress import ProgressWrapper
+
+def upgrade(tables, tester, progress_reporter):
+  op = ProgressWrapper(original_op, progress_reporter)
+  op.bulk_insert(tables.notificationkind,
+                 [
+                   {'name': 'build_cancelled'},
+                 ])
+
+
+def downgrade(tables, tester, progress_reporter):
+  op = ProgressWrapper(original_op, progress_reporter)
+  op.execute(tables
+             .notificationkind
+             .delete()
+             .where(tables.
+                    notificationkind.c.name == op.inline_literal('build_cancelled')))
--- a/data/migrations/versions/481623ba00ba_add_index_on_logs_archived_on_.py
+++ b/data/migrations/versions/481623ba00ba_add_index_on_logs_archived_on_.py
@ -0,0 +1,27 @@
+"""Add index on logs_archived on repositorybuild
+
+Revision ID: 481623ba00ba
+Revises: b9045731c4de
+Create Date: 2019-02-15 16:09:47.326805
+
+"""
+
+# revision identifiers, used by Alembic.
+revision = '481623ba00ba'
+down_revision = 'b9045731c4de'
+
+from alembic import op as original_op
+from data.migrations.progress import ProgressWrapper
+
+def upgrade(tables, tester, progress_reporter):
+    op = ProgressWrapper(original_op, progress_reporter)
+    # ### commands auto generated by Alembic - please adjust! ###
+    op.create_index('repositorybuild_logs_archived', 'repositorybuild', ['logs_archived'], unique=False)
+    # ### end Alembic commands ###
+
+
+def downgrade(tables, tester, progress_reporter):
+    op = ProgressWrapper(original_op, progress_reporter)
+    # ### commands auto generated by Alembic - please adjust! ###
+    op.drop_index('repositorybuild_logs_archived', table_name='repositorybuild')
+    # ### end Alembic commands ###
--- a/data/migrations/versions/5248ddf35167_repository_mirror.py
+++ b/data/migrations/versions/5248ddf35167_repository_mirror.py
@ -0,0 +1,144 @@
+"""Repository Mirror
+
+Revision ID: 5248ddf35167
+Revises: b918abdbee43
+Create Date: 2019-06-25 16:22:36.310532
+
+"""
+
+revision = '5248ddf35167'
+down_revision = 'b918abdbee43'
+
+from alembic import op as original_op
+from data.migrations.progress import ProgressWrapper
+import sqlalchemy as sa
+from sqlalchemy.dialects import mysql
+
+def upgrade(tables, tester, progress_reporter):
+  op = ProgressWrapper(original_op, progress_reporter)
+  op.create_table('repomirrorrule',
+  sa.Column('id', sa.Integer(), nullable=False),
+  sa.Column('uuid', sa.String(length=36), nullable=False),
+  sa.Column('repository_id', sa.Integer(), nullable=False),
+  sa.Column('creation_date', sa.DateTime(), nullable=False),
+  sa.Column('rule_type', sa.Integer(), nullable=False),
+  sa.Column('rule_value', sa.Text(), nullable=False),
+  sa.Column('left_child_id', sa.Integer(), nullable=True),
+  sa.Column('right_child_id', sa.Integer(), nullable=True),
+  sa.ForeignKeyConstraint(['left_child_id'], ['repomirrorrule.id'], name=op.f('fk_repomirrorrule_left_child_id_repomirrorrule')),
+  sa.ForeignKeyConstraint(['repository_id'], ['repository.id'], name=op.f('fk_repomirrorrule_repository_id_repository')),
+  sa.ForeignKeyConstraint(['right_child_id'], ['repomirrorrule.id'], name=op.f('fk_repomirrorrule_right_child_id_repomirrorrule')),
+  sa.PrimaryKeyConstraint('id', name=op.f('pk_repomirrorrule')))
+  op.create_index('repomirrorrule_left_child_id', 'repomirrorrule', ['left_child_id'], unique=False)
+  op.create_index('repomirrorrule_repository_id', 'repomirrorrule', ['repository_id'], unique=False)
+  op.create_index('repomirrorrule_right_child_id', 'repomirrorrule', ['right_child_id'], unique=False)
+  op.create_index('repomirrorrule_rule_type', 'repomirrorrule', ['rule_type'], unique=False)
+  op.create_index('repomirrorrule_uuid', 'repomirrorrule', ['uuid'], unique=True)
+
+  op.create_table('repomirrorconfig',
+  sa.Column('id', sa.Integer(), nullable=False),
+  sa.Column('repository_id', sa.Integer(), nullable=False),
+  sa.Column('creation_date', sa.DateTime(), nullable=False),
+  sa.Column('is_enabled', sa.Boolean(), nullable=False),
+  sa.Column('mirror_type', sa.Integer(), nullable=False),
+  sa.Column('internal_robot_id', sa.Integer(), nullable=False),
+  sa.Column('external_registry', sa.String(length=255), nullable=False),
+  sa.Column('external_namespace', sa.String(length=255), nullable=False),
+  sa.Column('external_repository', sa.String(length=255), nullable=False),
+  sa.Column('external_registry_username', sa.String(length=2048), nullable=True),
+  sa.Column('external_registry_password', sa.String(length=2048), nullable=True),
+  sa.Column('external_registry_config', sa.Text(), nullable=False),
+  sa.Column('sync_interval', sa.Integer(), nullable=False, server_default='60'),
+  sa.Column('sync_start_date', sa.DateTime(), nullable=True),
+  sa.Column('sync_expiration_date', sa.DateTime(), nullable=True),
+  sa.Column('sync_retries_remaining', sa.Integer(), nullable=False, server_default='3'),
+  sa.Column('sync_status', sa.Integer(), nullable=False),
+  sa.Column('sync_transaction_id', sa.String(length=36), nullable=True),
+  sa.Column('root_rule_id', sa.Integer(), nullable=False),
+  sa.ForeignKeyConstraint(['repository_id'], ['repository.id'], name=op.f('fk_repomirrorconfig_repository_id_repository')),
+  sa.ForeignKeyConstraint(['root_rule_id'], ['repomirrorrule.id'], name=op.f('fk_repomirrorconfig_root_rule_id_repomirrorrule')),
+  sa.ForeignKeyConstraint(['internal_robot_id'], ['user.id'], name=op.f('fk_repomirrorconfig_internal_robot_id_user')),
+  sa.PrimaryKeyConstraint('id', name=op.f('pk_repomirrorconfig'))
+  )
+  op.create_index('repomirrorconfig_mirror_type', 'repomirrorconfig', ['mirror_type'], unique=False)
+  op.create_index('repomirrorconfig_repository_id', 'repomirrorconfig', ['repository_id'], unique=True)
+  op.create_index('repomirrorconfig_root_rule_id', 'repomirrorconfig', ['root_rule_id'], unique=False)
+  op.create_index('repomirrorconfig_sync_status', 'repomirrorconfig', ['sync_status'], unique=False)
+  op.create_index('repomirrorconfig_sync_transaction_id', 'repomirrorconfig', ['sync_transaction_id'], unique=False)
+  op.create_index('repomirrorconfig_internal_robot_id', 'repomirrorconfig', ['internal_robot_id'], unique=False)
+
+  op.add_column(u'repository', sa.Column('state', sa.Integer(), nullable=False, server_default='0'))
+  op.create_index('repository_state', 'repository', ['state'], unique=False)
+
+  op.bulk_insert(tables.logentrykind,
+               [
+                 {'name': 'repo_mirror_enabled'},
+                 {'name': 'repo_mirror_disabled'},
+                 {'name': 'repo_mirror_config_changed'},
+                 {'name': 'repo_mirror_sync_started'},
+                 {'name': 'repo_mirror_sync_failed'},
+                 {'name': 'repo_mirror_sync_success'},
+                 {'name': 'repo_mirror_sync_now_requested'},
+                 {'name': 'repo_mirror_sync_tag_success'},
+                 {'name': 'repo_mirror_sync_tag_failed'},
+                 {'name': 'repo_mirror_sync_test_success'},
+                 {'name': 'repo_mirror_sync_test_failed'},
+                 {'name': 'repo_mirror_sync_test_started'},
+                 {'name': 'change_repo_state'}
+               ])
+
+
+  tester.populate_table('repomirrorrule', [
+    ('uuid', tester.TestDataType.String),
+    ('repository_id', tester.TestDataType.Foreign('repository')),
+    ('creation_date', tester.TestDataType.DateTime),
+    ('rule_type', tester.TestDataType.Integer),
+    ('rule_value', tester.TestDataType.String),
+  ])
+
+  tester.populate_table('repomirrorconfig', [
+    ('repository_id', tester.TestDataType.Foreign('repository')),
+    ('creation_date', tester.TestDataType.DateTime),
+    ('is_enabled', tester.TestDataType.Boolean),
+    ('mirror_type', tester.TestDataType.Constant(1)),
+    ('internal_robot_id', tester.TestDataType.Foreign('user')),
+    ('external_registry', tester.TestDataType.String),
+    ('external_namespace', tester.TestDataType.String),
+    ('external_repository', tester.TestDataType.String),
+    ('external_registry_username', tester.TestDataType.String),
+    ('external_registry_password', tester.TestDataType.String),
+    ('external_registry_config', tester.TestDataType.JSON),
+    ('sync_start_date', tester.TestDataType.DateTime),
+    ('sync_expiration_date', tester.TestDataType.DateTime),
+    ('sync_retries_remaining', tester.TestDataType.Integer),
+    ('sync_status', tester.TestDataType.Constant(0)),
+    ('sync_transaction_id', tester.TestDataType.String),
+    ('root_rule_id', tester.TestDataType.Foreign('repomirrorrule')),
+  ])
+
+
+def downgrade(tables, tester, progress_reporter):
+  op = ProgressWrapper(original_op, progress_reporter)
+  op.drop_column(u'repository', 'state')
+
+  op.drop_table('repomirrorconfig')
+
+  op.drop_table('repomirrorrule')
+
+  for logentrykind in [
+      'repo_mirror_enabled',
+      'repo_mirror_disabled',
+      'repo_mirror_config_changed',
+      'repo_mirror_sync_started',
+      'repo_mirror_sync_failed',
+      'repo_mirror_sync_success',
+      'repo_mirror_sync_now_requested',
+      'repo_mirror_sync_tag_success',
+      'repo_mirror_sync_tag_failed',
+      'repo_mirror_sync_test_success',
+      'repo_mirror_sync_test_failed',
+      'repo_mirror_sync_test_started',
+      'change_repo_state'
+  ]:
+    op.execute(tables.logentrykind.delete()
+               .where(tables.logentrykind.c.name == op.inline_literal(logentrykind)))
--- a/data/migrations/versions/53e2ac668296_remove_reference_to_subdir.py
+++ b/data/migrations/versions/53e2ac668296_remove_reference_to_subdir.py
@ -0,0 +1,63 @@
+"""Remove reference to subdir
+
+Revision ID: 53e2ac668296
+Revises: ed01e313d3cb
+Create Date: 2017-03-28 15:01:31.073382
+
+"""
+
+# revision identifiers, used by Alembic.
+import json
+
+import logging
+from alembic.script.revision import RevisionError
+from alembic.util import CommandError
+from alembic import op as original_op
+from data.migrations.progress import ProgressWrapper
+import sqlalchemy as sa
+from sqlalchemy.dialects import mysql
+
+revision = '53e2ac668296'
+down_revision = 'ed01e313d3cb'
+
+log = logging.getLogger(__name__)
+
+
+def run_migration(migrate_function, progress_reporter):
+  op = ProgressWrapper(original_op, progress_reporter)
+  conn = op.get_bind()
+  triggers = conn.execute("SELECT id, config FROM repositorybuildtrigger")
+  for trigger in triggers:
+    config = json.dumps(migrate_function(json.loads(trigger[1])))
+    try:
+      conn.execute("UPDATE repositorybuildtrigger SET config=%s WHERE id=%s", config, trigger[0])
+    except(RevisionError, CommandError) as e:
+      log.warning("Failed to update build trigger %s with exception: ", trigger[0], e)
+
+
+def upgrade(tables, tester, progress_reporter):
+  run_migration(delete_subdir, progress_reporter)
+
+
+def downgrade(tables, tester, progress_reporter):
+  run_migration(add_subdir, progress_reporter)
+
+
+def delete_subdir(config):
+    """ Remove subdir from config """
+    if not config:
+        return config
+    if 'subdir' in config:
+        del config['subdir']
+
+    return config
+
+
+def add_subdir(config):
+    """ Add subdir back into config """
+    if not config:
+        return config
+    if 'context' in config:
+        config['subdir'] = config['context']
+
+    return config
--- a/data/migrations/versions/54492a68a3cf_add_namespacegeorestriction_table.py
+++ b/data/migrations/versions/54492a68a3cf_add_namespacegeorestriction_table.py
@ -0,0 +1,49 @@
+"""Add NamespaceGeoRestriction table
+
+Revision ID: 54492a68a3cf
+Revises: c00a1f15968b
+Create Date: 2018-12-05 15:12:14.201116
+
+"""
+
+# revision identifiers, used by Alembic.
+revision = '54492a68a3cf'
+down_revision = 'c00a1f15968b'
+
+from alembic import op as original_op
+from data.migrations.progress import ProgressWrapper
+import sqlalchemy as sa
+from sqlalchemy.dialects import mysql
+
+def upgrade(tables, tester, progress_reporter):
+    op = ProgressWrapper(original_op, progress_reporter)
+    # ### commands auto generated by Alembic - please adjust! ###
+    op.create_table('namespacegeorestriction',
+    sa.Column('id', sa.Integer(), nullable=False),
+    sa.Column('namespace_id', sa.Integer(), nullable=False),
+    sa.Column('added', sa.DateTime(), nullable=False),
+    sa.Column('description', sa.String(length=255), nullable=False),
+    sa.Column('unstructured_json', sa.Text(), nullable=False),
+    sa.Column('restricted_region_iso_code', sa.String(length=255), nullable=False),
+    sa.ForeignKeyConstraint(['namespace_id'], ['user.id'], name=op.f('fk_namespacegeorestriction_namespace_id_user')),
+    sa.PrimaryKeyConstraint('id', name=op.f('pk_namespacegeorestriction'))
+    )
+    op.create_index('namespacegeorestriction_namespace_id', 'namespacegeorestriction', ['namespace_id'], unique=False)
+    op.create_index('namespacegeorestriction_namespace_id_restricted_region_iso_code', 'namespacegeorestriction', ['namespace_id', 'restricted_region_iso_code'], unique=True)
+    op.create_index('namespacegeorestriction_restricted_region_iso_code', 'namespacegeorestriction', ['restricted_region_iso_code'], unique=False)
+    # ### end Alembic commands ###
+
+    tester.populate_table('namespacegeorestriction', [
+        ('namespace_id', tester.TestDataType.Foreign('user')),
+        ('added', tester.TestDataType.DateTime),
+        ('description', tester.TestDataType.String),
+        ('unstructured_json', tester.TestDataType.JSON),
+        ('restricted_region_iso_code', tester.TestDataType.String),
+    ])
+
+
+def downgrade(tables, tester, progress_reporter):
+    op = ProgressWrapper(original_op, progress_reporter)
+    # ### commands auto generated by Alembic - please adjust! ###
+    op.drop_table('namespacegeorestriction')
+    # ### end Alembic commands ###
--- a/data/migrations/versions/5b7503aada1b_cleanup_old_robots.py
+++ b/data/migrations/versions/5b7503aada1b_cleanup_old_robots.py
@ -0,0 +1,26 @@
+"""Cleanup old robots
+
+Revision ID: 5b7503aada1b
+Revises: 224ce4c72c2f
+Create Date: 2018-05-09 17:18:52.230504
+
+"""
+
+# revision identifiers, used by Alembic.
+revision = '5b7503aada1b'
+down_revision = '224ce4c72c2f'
+
+from alembic import op as original_op
+from data.migrations.progress import ProgressWrapper
+import sqlalchemy as sa
+
+from util.migrate.cleanup_old_robots import cleanup_old_robots
+
+def upgrade(tables, tester, progress_reporter):
+  op = ProgressWrapper(original_op, progress_reporter)
+  cleanup_old_robots()
+
+def downgrade(tables, tester, progress_reporter):
+  op = ProgressWrapper(original_op, progress_reporter)
+  # Nothing to do.
+  pass
--- a/data/migrations/versions/5cbbfc95bac7_remove_oci_tables_not_used_by_cnr_the_.py
+++ b/data/migrations/versions/5cbbfc95bac7_remove_oci_tables_not_used_by_cnr_the_.py
@ -0,0 +1,170 @@
+"""Remove 'oci' tables not used by CNR. The rest will be migrated and renamed.
+
+Revision ID: 5cbbfc95bac7
+Revises: 1783530bee68
+Create Date: 2018-05-23 17:28:40.114433
+
+"""
+
+# revision identifiers, used by Alembic.
+revision = '5cbbfc95bac7'
+down_revision = '1783530bee68'
+
+from alembic import op as original_op
+from data.migrations.progress import ProgressWrapper
+import sqlalchemy as sa
+from sqlalchemy.dialects import mysql
+from util.migrate import UTF8LongText, UTF8CharField
+
+def upgrade(tables, tester, progress_reporter):
+    op = ProgressWrapper(original_op, progress_reporter)
+    # ### commands auto generated by Alembic - please adjust! ###
+    op.drop_table('derivedimage')
+    op.drop_table('manifestlabel')
+    op.drop_table('blobplacementlocationpreference')
+    op.drop_table('blobuploading')
+    op.drop_table('bittorrentpieces')
+    op.drop_table('manifestlayerdockerv1')
+    op.drop_table('manifestlayerscan')
+    op.drop_table('manifestlayer')
+    # ### end Alembic commands ###
+
+
+def downgrade(tables, tester, progress_reporter):
+    op = ProgressWrapper(original_op, progress_reporter)
+    # ### commands auto generated by Alembic - please adjust! ###
+    op.create_table(
+        'manifestlayer',
+        sa.Column('id', sa.Integer(), nullable=False),
+        sa.Column('blob_id', sa.Integer(), nullable=False),
+        sa.Column('manifest_id', sa.Integer(), nullable=False),
+        sa.Column('manifest_index', sa.BigInteger(), nullable=False),
+        sa.Column('metadata_json', UTF8LongText, nullable=False),
+        sa.ForeignKeyConstraint(['blob_id'], ['blob.id'], name=op.f('fk_manifestlayer_blob_id_blob')),
+        sa.ForeignKeyConstraint(['manifest_id'], ['manifest.id'], name=op.f('fk_manifestlayer_manifest_id_manifest')),
+        sa.PrimaryKeyConstraint('id', name=op.f('pk_manifestlayer'))
+    )
+    op.create_index('manifestlayer_manifest_index', 'manifestlayer', ['manifest_index'], unique=False)
+    op.create_index('manifestlayer_manifest_id_manifest_index', 'manifestlayer', ['manifest_id', 'manifest_index'], unique=True)
+    op.create_index('manifestlayer_manifest_id', 'manifestlayer', ['manifest_id'], unique=False)
+    op.create_index('manifestlayer_blob_id', 'manifestlayer', ['blob_id'], unique=False)
+
+    op.create_table(
+        'manifestlayerscan',
+        sa.Column('id', sa.Integer(), nullable=False),
+        sa.Column('layer_id', sa.Integer(), nullable=False),
+        sa.Column('scannable', sa.Boolean(), nullable=False),
+        sa.Column('scanned_by', UTF8CharField(length=255), nullable=False),
+        sa.ForeignKeyConstraint(['layer_id'], ['manifestlayer.id'], name=op.f('fk_manifestlayerscan_layer_id_manifestlayer')),
+        sa.PrimaryKeyConstraint('id', name=op.f('pk_manifestlayerscan'))
+    )
+    
+    op.create_index('manifestlayerscan_layer_id', 'manifestlayerscan', ['layer_id'], unique=True)
+
+    op.create_table(
+        'bittorrentpieces',
+        sa.Column('id', sa.Integer(), nullable=False),
+        sa.Column('blob_id', sa.Integer(), nullable=False),
+        sa.Column('pieces', UTF8LongText, nullable=False),
+        sa.Column('piece_length', sa.BigInteger(), nullable=False),
+        sa.ForeignKeyConstraint(['blob_id'], ['blob.id'], name=op.f('fk_bittorrentpieces_blob_id_blob')),
+        sa.PrimaryKeyConstraint('id', name=op.f('pk_bittorrentpieces'))
+    )
+
+    op.create_index('bittorrentpieces_blob_id_piece_length', 'bittorrentpieces', ['blob_id', 'piece_length'], unique=True)
+    op.create_index('bittorrentpieces_blob_id', 'bittorrentpieces', ['blob_id'], unique=False)
+
+    op.create_table(
+        'blobuploading',
+        sa.Column('id', sa.Integer(), nullable=False),
+        sa.Column('uuid', sa.String(length=255), nullable=False),
+        sa.Column('created', sa.DateTime(), nullable=False),
+        sa.Column('repository_id', sa.Integer(), nullable=False),
+        sa.Column('location_id', sa.Integer(), nullable=False),
+        sa.Column('byte_count', sa.BigInteger(), nullable=False),
+        sa.Column('uncompressed_byte_count', sa.BigInteger(), nullable=True),
+        sa.Column('chunk_count', sa.BigInteger(), nullable=False),
+        sa.Column('storage_metadata', UTF8LongText, nullable=True),
+        sa.Column('sha_state', UTF8LongText, nullable=True),
+        sa.Column('piece_sha_state', UTF8LongText, nullable=True),
+        sa.Column('piece_hashes', UTF8LongText, nullable=True),
+        sa.ForeignKeyConstraint(['location_id'], ['blobplacementlocation.id'], name=op.f('fk_blobuploading_location_id_blobplacementlocation')),
+        sa.ForeignKeyConstraint(['repository_id'], ['repository.id'], name=op.f('fk_blobuploading_repository_id_repository')),
+        sa.PrimaryKeyConstraint('id', name=op.f('pk_blobuploading'))
+    )
+    
+    op.create_index('blobuploading_uuid', 'blobuploading', ['uuid'], unique=True)
+    op.create_index('blobuploading_repository_id_uuid', 'blobuploading', ['repository_id', 'uuid'], unique=True)
+    op.create_index('blobuploading_repository_id', 'blobuploading', ['repository_id'], unique=False)
+    op.create_index('blobuploading_location_id', 'blobuploading', ['location_id'], unique=False)
+    op.create_index('blobuploading_created', 'blobuploading', ['created'], unique=False)
+
+    op.create_table(
+        'manifestlayerdockerv1',
+        sa.Column('id', sa.Integer(), nullable=False),
+        sa.Column('manifest_layer_id', sa.Integer(), nullable=False),
+        sa.Column('image_id', UTF8CharField(length=255), nullable=False),
+        sa.Column('checksum', UTF8CharField(length=255), nullable=False),
+        sa.Column('compat_json', UTF8LongText, nullable=False),
+        sa.ForeignKeyConstraint(['manifest_layer_id'], ['manifestlayer.id'], name=op.f('fk_manifestlayerdockerv1_manifest_layer_id_manifestlayer')),
+        sa.PrimaryKeyConstraint('id', name=op.f('pk_manifestlayerdockerv1'))
+    )
+
+    op.create_index('manifestlayerdockerv1_manifest_layer_id', 'manifestlayerdockerv1', ['manifest_layer_id'], unique=False)
+    op.create_index('manifestlayerdockerv1_image_id', 'manifestlayerdockerv1', ['image_id'], unique=False)
+
+    op.create_table(
+        'manifestlabel',
+        sa.Column('id', sa.Integer(), nullable=False),
+        sa.Column('repository_id', sa.Integer(), nullable=False),
+        sa.Column('annotated_id', sa.Integer(), nullable=False),
+        sa.Column('label_id', sa.Integer(), nullable=False),
+        sa.ForeignKeyConstraint(['annotated_id'], ['manifest.id'], name=op.f('fk_manifestlabel_annotated_id_manifest')),
+        sa.ForeignKeyConstraint(['label_id'], ['label.id'], name=op.f('fk_manifestlabel_label_id_label')),
+        sa.ForeignKeyConstraint(['repository_id'], ['repository.id'], name=op.f('fk_manifestlabel_repository_id_repository')),
+        sa.PrimaryKeyConstraint('id', name=op.f('pk_manifestlabel'))
+    )
+
+    op.create_index('manifestlabel_repository_id_annotated_id_label_id', 'manifestlabel', ['repository_id', 'annotated_id', 'label_id'], unique=True)
+    op.create_index('manifestlabel_repository_id', 'manifestlabel', ['repository_id'], unique=False)
+    op.create_index('manifestlabel_label_id', 'manifestlabel', ['label_id'], unique=False)
+    op.create_index('manifestlabel_annotated_id', 'manifestlabel', ['annotated_id'], unique=False)
+
+    op.create_table(
+        'blobplacementlocationpreference',
+        sa.Column('id', sa.Integer(), nullable=False),
+        sa.Column('user_id', sa.Integer(), nullable=False),
+        sa.Column('location_id', sa.Integer(), nullable=False),
+        sa.ForeignKeyConstraint(['location_id'], ['blobplacementlocation.id'], name=op.f('fk_blobplacementlocpref_locid_blobplacementlocation')),
+        sa.ForeignKeyConstraint(['user_id'], ['user.id'], name=op.f('fk_blobplacementlocationpreference_user_id_user')),
+        sa.PrimaryKeyConstraint('id', name=op.f('pk_blobplacementlocationpreference'))
+    )
+    op.create_index('blobplacementlocationpreference_user_id', 'blobplacementlocationpreference', ['user_id'], unique=False)
+    op.create_index('blobplacementlocationpreference_location_id', 'blobplacementlocationpreference', ['location_id'], unique=False)
+    
+
+    op.create_table(
+        'derivedimage',
+        sa.Column('id', sa.Integer(), nullable=False),
+        sa.Column('uuid', sa.String(length=255), nullable=False),
+        sa.Column('source_manifest_id', sa.Integer(), nullable=False),
+        sa.Column('derived_manifest_json', UTF8LongText, nullable=False),
+        sa.Column('media_type_id', sa.Integer(), nullable=False),
+        sa.Column('blob_id', sa.Integer(), nullable=False),
+        sa.Column('uniqueness_hash', sa.String(length=255), nullable=False),
+        sa.Column('signature_blob_id', sa.Integer(), nullable=True),
+        sa.ForeignKeyConstraint(['blob_id'], ['blob.id'], name=op.f('fk_derivedimage_blob_id_blob')),
+        sa.ForeignKeyConstraint(['media_type_id'], ['mediatype.id'], name=op.f('fk_derivedimage_media_type_id_mediatype')),
+        sa.ForeignKeyConstraint(['signature_blob_id'], ['blob.id'], name=op.f('fk_derivedimage_signature_blob_id_blob')),
+        sa.ForeignKeyConstraint(['source_manifest_id'], ['manifest.id'], name=op.f('fk_derivedimage_source_manifest_id_manifest')),
+        sa.PrimaryKeyConstraint('id', name=op.f('pk_derivedimage'))
+    )
+    op.create_index('derivedimage_uuid', 'derivedimage', ['uuid'], unique=True)
+    op.create_index('derivedimage_uniqueness_hash', 'derivedimage', ['uniqueness_hash'], unique=True)
+    op.create_index('derivedimage_source_manifest_id_media_type_id_uniqueness_hash', 'derivedimage', ['source_manifest_id', 'media_type_id', 'uniqueness_hash'], unique=True)
+    op.create_index('derivedimage_source_manifest_id_blob_id', 'derivedimage', ['source_manifest_id', 'blob_id'], unique=True)
+    op.create_index('derivedimage_source_manifest_id', 'derivedimage', ['source_manifest_id'], unique=False)
+    op.create_index('derivedimage_signature_blob_id', 'derivedimage', ['signature_blob_id'], unique=False)
+    op.create_index('derivedimage_media_type_id', 'derivedimage', ['media_type_id'], unique=False)
+    op.create_index('derivedimage_blob_id', 'derivedimage', ['blob_id'], unique=False)
+    # ### end Alembic commands ###
--- a/data/migrations/versions/5d463ea1e8a8_backfill_new_appr_tables.py
+++ b/data/migrations/versions/5d463ea1e8a8_backfill_new_appr_tables.py
@ -0,0 +1,32 @@
+"""Backfill new appr tables
+
+Revision ID: 5d463ea1e8a8
+Revises: 610320e9dacf
+Create Date: 2018-07-08 10:01:19.756126
+
+"""
+
+# revision identifiers, used by Alembic.
+revision = '5d463ea1e8a8'
+down_revision = '610320e9dacf'
+
+from alembic import op as original_op
+from data.migrations.progress import ProgressWrapper
+import sqlalchemy as sa
+from util.migrate.table_ops import copy_table_contents
+
+def upgrade(tables, tester, progress_reporter):
+    op = ProgressWrapper(original_op, progress_reporter)
+    conn = op.get_bind()
+
+    copy_table_contents('blob', 'apprblob', conn)
+    copy_table_contents('manifest', 'apprmanifest', conn)
+    copy_table_contents('manifestlist', 'apprmanifestlist', conn)
+    copy_table_contents('blobplacement', 'apprblobplacement', conn)
+    copy_table_contents('manifestblob', 'apprmanifestblob', conn)
+    copy_table_contents('manifestlistmanifest', 'apprmanifestlistmanifest', conn)
+    copy_table_contents('tag', 'apprtag', conn)
+
+def downgrade(tables, tester, progress_reporter):
+    op = ProgressWrapper(original_op, progress_reporter)
+    pass
--- a/data/migrations/versions/610320e9dacf_add_new_appr_specific_tables.py
+++ b/data/migrations/versions/610320e9dacf_add_new_appr_specific_tables.py
@ -0,0 +1,206 @@
+"""Add new Appr-specific tables
+
+Revision ID: 610320e9dacf
+Revises: 5cbbfc95bac7
+Create Date: 2018-05-24 16:46:13.514562
+
+"""
+
+# revision identifiers, used by Alembic.
+revision = '610320e9dacf'
+down_revision = '5cbbfc95bac7'
+
+from alembic import op as original_op
+from data.migrations.progress import ProgressWrapper
+import sqlalchemy as sa
+
+from util.migrate.table_ops import copy_table_contents
+
+def upgrade(tables, tester, progress_reporter):
+    op = ProgressWrapper(original_op, progress_reporter)
+    # ### commands auto generated by Alembic - please adjust! ###
+    op.create_table('apprblobplacementlocation',
+    sa.Column('id', sa.Integer(), nullable=False),
+    sa.Column('name', sa.String(length=255), nullable=False),
+    sa.PrimaryKeyConstraint('id', name=op.f('pk_apprblobplacementlocation'))
+    )
+    op.create_index('apprblobplacementlocation_name', 'apprblobplacementlocation', ['name'], unique=True)
+    op.create_table('apprtagkind',
+    sa.Column('id', sa.Integer(), nullable=False),
+    sa.Column('name', sa.String(length=255), nullable=False),
+    sa.PrimaryKeyConstraint('id', name=op.f('pk_apprtagkind'))
+    )
+    op.create_index('apprtagkind_name', 'apprtagkind', ['name'], unique=True)
+    op.create_table('apprblob',
+    sa.Column('id', sa.Integer(), nullable=False),
+    sa.Column('digest', sa.String(length=255), nullable=False),
+    sa.Column('media_type_id', sa.Integer(), nullable=False),
+    sa.Column('size', sa.BigInteger(), nullable=False),
+    sa.Column('uncompressed_size', sa.BigInteger(), nullable=True),
+    sa.ForeignKeyConstraint(['media_type_id'], ['mediatype.id'], name=op.f('fk_apprblob_media_type_id_mediatype')),
+    sa.PrimaryKeyConstraint('id', name=op.f('pk_apprblob'))
+    )
+    op.create_index('apprblob_digest', 'apprblob', ['digest'], unique=True)
+    op.create_index('apprblob_media_type_id', 'apprblob', ['media_type_id'], unique=False)
+    op.create_table('apprmanifest',
+    sa.Column('id', sa.Integer(), nullable=False),
+    sa.Column('digest', sa.String(length=255), nullable=False),
+    sa.Column('media_type_id', sa.Integer(), nullable=False),
+    sa.Column('manifest_json', sa.Text(), nullable=False),
+    sa.ForeignKeyConstraint(['media_type_id'], ['mediatype.id'], name=op.f('fk_apprmanifest_media_type_id_mediatype')),
+    sa.PrimaryKeyConstraint('id', name=op.f('pk_apprmanifest'))
+    )
+    op.create_index('apprmanifest_digest', 'apprmanifest', ['digest'], unique=True)
+    op.create_index('apprmanifest_media_type_id', 'apprmanifest', ['media_type_id'], unique=False)
+    op.create_table('apprmanifestlist',
+    sa.Column('id', sa.Integer(), nullable=False),
+    sa.Column('digest', sa.String(length=255), nullable=False),
+    sa.Column('manifest_list_json', sa.Text(), nullable=False),
+    sa.Column('schema_version', sa.String(length=255), nullable=False),
+    sa.Column('media_type_id', sa.Integer(), nullable=False),
+    sa.ForeignKeyConstraint(['media_type_id'], ['mediatype.id'], name=op.f('fk_apprmanifestlist_media_type_id_mediatype')),
+    sa.PrimaryKeyConstraint('id', name=op.f('pk_apprmanifestlist'))
+    )
+    op.create_index('apprmanifestlist_digest', 'apprmanifestlist', ['digest'], unique=True)
+    op.create_index('apprmanifestlist_media_type_id', 'apprmanifestlist', ['media_type_id'], unique=False)
+    op.create_table('apprblobplacement',
+    sa.Column('id', sa.Integer(), nullable=False),
+    sa.Column('blob_id', sa.Integer(), nullable=False),
+    sa.Column('location_id', sa.Integer(), nullable=False),
+    sa.ForeignKeyConstraint(['blob_id'], ['apprblob.id'], name=op.f('fk_apprblobplacement_blob_id_apprblob')),
+    sa.ForeignKeyConstraint(['location_id'], ['apprblobplacementlocation.id'], name=op.f('fk_apprblobplacement_location_id_apprblobplacementlocation')),
+    sa.PrimaryKeyConstraint('id', name=op.f('pk_apprblobplacement'))
+    )
+    op.create_index('apprblobplacement_blob_id', 'apprblobplacement', ['blob_id'], unique=False)
+    op.create_index('apprblobplacement_blob_id_location_id', 'apprblobplacement', ['blob_id', 'location_id'], unique=True)
+    op.create_index('apprblobplacement_location_id', 'apprblobplacement', ['location_id'], unique=False)
+    op.create_table('apprmanifestblob',
+    sa.Column('id', sa.Integer(), nullable=False),
+    sa.Column('manifest_id', sa.Integer(), nullable=False),
+    sa.Column('blob_id', sa.Integer(), nullable=False),
+    sa.ForeignKeyConstraint(['blob_id'], ['apprblob.id'], name=op.f('fk_apprmanifestblob_blob_id_apprblob')),
+    sa.ForeignKeyConstraint(['manifest_id'], ['apprmanifest.id'], name=op.f('fk_apprmanifestblob_manifest_id_apprmanifest')),
+    sa.PrimaryKeyConstraint('id', name=op.f('pk_apprmanifestblob'))
+    )
+    op.create_index('apprmanifestblob_blob_id', 'apprmanifestblob', ['blob_id'], unique=False)
+    op.create_index('apprmanifestblob_manifest_id', 'apprmanifestblob', ['manifest_id'], unique=False)
+    op.create_index('apprmanifestblob_manifest_id_blob_id', 'apprmanifestblob', ['manifest_id', 'blob_id'], unique=True)
+    op.create_table('apprmanifestlistmanifest',
+    sa.Column('id', sa.Integer(), nullable=False),
+    sa.Column('manifest_list_id', sa.Integer(), nullable=False),
+    sa.Column('manifest_id', sa.Integer(), nullable=False),
+    sa.Column('operating_system', sa.String(length=255), nullable=True),
+    sa.Column('architecture', sa.String(length=255), nullable=True),
+    sa.Column('platform_json', sa.Text(), nullable=True),
+    sa.Column('media_type_id', sa.Integer(), nullable=False),
+    sa.ForeignKeyConstraint(['manifest_id'], ['apprmanifest.id'], name=op.f('fk_apprmanifestlistmanifest_manifest_id_apprmanifest')),
+    sa.ForeignKeyConstraint(['manifest_list_id'], ['apprmanifestlist.id'], name=op.f('fk_apprmanifestlistmanifest_manifest_list_id_apprmanifestlist')),
+    sa.ForeignKeyConstraint(['media_type_id'], ['mediatype.id'], name=op.f('fk_apprmanifestlistmanifest_media_type_id_mediatype')),
+    sa.PrimaryKeyConstraint('id', name=op.f('pk_apprmanifestlistmanifest'))
+    )
+    op.create_index('apprmanifestlistmanifest_manifest_id', 'apprmanifestlistmanifest', ['manifest_id'], unique=False)
+    op.create_index('apprmanifestlistmanifest_manifest_list_id', 'apprmanifestlistmanifest', ['manifest_list_id'], unique=False)
+    op.create_index('apprmanifestlistmanifest_manifest_list_id_media_type_id', 'apprmanifestlistmanifest', ['manifest_list_id', 'media_type_id'], unique=False)
+    op.create_index('apprmanifestlistmanifest_manifest_list_id_operating_system_arch', 'apprmanifestlistmanifest', ['manifest_list_id', 'operating_system', 'architecture', 'media_type_id'], unique=False)
+    op.create_index('apprmanifestlistmanifest_media_type_id', 'apprmanifestlistmanifest', ['media_type_id'], unique=False)
+    op.create_table('apprtag',
+    sa.Column('id', sa.Integer(), nullable=False),
+    sa.Column('name', sa.String(length=255), nullable=False),
+    sa.Column('repository_id', sa.Integer(), nullable=False),
+    sa.Column('manifest_list_id', sa.Integer(), nullable=True),
+    sa.Column('lifetime_start', sa.BigInteger(), nullable=False),
+    sa.Column('lifetime_end', sa.BigInteger(), nullable=True),
+    sa.Column('hidden', sa.Boolean(), nullable=False),
+    sa.Column('reverted', sa.Boolean(), nullable=False),
+    sa.Column('protected', sa.Boolean(), nullable=False),
+    sa.Column('tag_kind_id', sa.Integer(), nullable=False),
+    sa.Column('linked_tag_id', sa.Integer(), nullable=True),
+    sa.ForeignKeyConstraint(['linked_tag_id'], ['apprtag.id'], name=op.f('fk_apprtag_linked_tag_id_apprtag')),
+    sa.ForeignKeyConstraint(['manifest_list_id'], ['apprmanifestlist.id'], name=op.f('fk_apprtag_manifest_list_id_apprmanifestlist')),
+    sa.ForeignKeyConstraint(['repository_id'], ['repository.id'], name=op.f('fk_apprtag_repository_id_repository')),
+    sa.ForeignKeyConstraint(['tag_kind_id'], ['apprtagkind.id'], name=op.f('fk_apprtag_tag_kind_id_apprtagkind')),
+    sa.PrimaryKeyConstraint('id', name=op.f('pk_apprtag'))
+    )
+    op.create_index('apprtag_lifetime_end', 'apprtag', ['lifetime_end'], unique=False)
+    op.create_index('apprtag_linked_tag_id', 'apprtag', ['linked_tag_id'], unique=False)
+    op.create_index('apprtag_manifest_list_id', 'apprtag', ['manifest_list_id'], unique=False)
+    op.create_index('apprtag_repository_id', 'apprtag', ['repository_id'], unique=False)
+    op.create_index('apprtag_repository_id_name', 'apprtag', ['repository_id', 'name'], unique=False)
+    op.create_index('apprtag_repository_id_name_hidden', 'apprtag', ['repository_id', 'name', 'hidden'], unique=False)
+    op.create_index('apprtag_repository_id_name_lifetime_end', 'apprtag', ['repository_id', 'name', 'lifetime_end'], unique=True)
+    op.create_index('apprtag_tag_kind_id', 'apprtag', ['tag_kind_id'], unique=False)
+    # ### end Alembic commands ###
+
+    conn = op.get_bind()
+    copy_table_contents('blobplacementlocation', 'apprblobplacementlocation', conn)
+    copy_table_contents('tagkind', 'apprtagkind', conn)
+    
+    # ### population of test data ### #
+        
+    tester.populate_table('apprmanifest', [
+        ('digest', tester.TestDataType.String),
+        ('media_type_id', tester.TestDataType.Foreign('mediatype')),
+        ('manifest_json', tester.TestDataType.JSON),
+    ])
+
+    tester.populate_table('apprmanifestlist', [
+        ('digest', tester.TestDataType.String),
+        ('manifest_list_json', tester.TestDataType.JSON),
+        ('schema_version', tester.TestDataType.String),
+        ('media_type_id', tester.TestDataType.Foreign('mediatype')),
+    ])
+
+    tester.populate_table('apprmanifestlistmanifest', [
+        ('manifest_list_id', tester.TestDataType.Foreign('apprmanifestlist')),
+        ('manifest_id', tester.TestDataType.Foreign('apprmanifest')),
+        ('operating_system', tester.TestDataType.String),
+        ('architecture', tester.TestDataType.String),
+        ('platform_json', tester.TestDataType.JSON),
+        ('media_type_id', tester.TestDataType.Foreign('mediatype')),
+    ])
+
+    tester.populate_table('apprblob', [
+        ('digest', tester.TestDataType.String),
+        ('media_type_id', tester.TestDataType.Foreign('mediatype')),
+        ('size', tester.TestDataType.BigInteger),
+        ('uncompressed_size', tester.TestDataType.BigInteger),
+    ])
+
+    tester.populate_table('apprmanifestblob', [
+        ('manifest_id', tester.TestDataType.Foreign('apprmanifest')),
+        ('blob_id', tester.TestDataType.Foreign('apprblob')),
+    ])
+
+    tester.populate_table('apprtag', [
+        ('name', tester.TestDataType.String),
+        ('repository_id', tester.TestDataType.Foreign('repository')),
+        ('manifest_list_id', tester.TestDataType.Foreign('apprmanifestlist')),
+        ('lifetime_start', tester.TestDataType.Integer),
+        ('hidden', tester.TestDataType.Boolean),
+        ('reverted', tester.TestDataType.Boolean),
+        ('protected', tester.TestDataType.Boolean),
+        ('tag_kind_id', tester.TestDataType.Foreign('apprtagkind')),
+    ])
+
+    tester.populate_table('apprblobplacement', [
+        ('blob_id', tester.TestDataType.Foreign('apprmanifestblob')),
+        ('location_id', tester.TestDataType.Foreign('apprblobplacementlocation')),
+    ])
+
+    # ### end population of test data ### #
+
+
+
+def downgrade(tables, tester, progress_reporter):
+    op = ProgressWrapper(original_op, progress_reporter)
+    # ### commands auto generated by Alembic - please adjust! ###
+    op.drop_table('apprtag')
+    op.drop_table('apprmanifestlistmanifest')
+    op.drop_table('apprmanifestblob')
+    op.drop_table('apprblobplacement')
+    op.drop_table('apprmanifestlist')
+    op.drop_table('apprmanifest')
+    op.drop_table('apprblob')
+    op.drop_table('apprtagkind')
+    op.drop_table('apprblobplacementlocation')
+    # ### end Alembic commands ###
--- a/data/migrations/versions/61cadbacb9fc_add_ability_for_build_triggers_to_be_.py
+++ b/data/migrations/versions/61cadbacb9fc_add_ability_for_build_triggers_to_be_.py
@ -0,0 +1,64 @@
+"""Add ability for build triggers to be disabled
+
+Revision ID: 61cadbacb9fc
+Revises: b4c2d45bc132
+Create Date: 2017-10-18 12:07:26.190901
+
+"""
+
+# revision identifiers, used by Alembic.
+revision = '61cadbacb9fc'
+down_revision = 'b4c2d45bc132'
+
+from alembic import op as original_op
+from data.migrations.progress import ProgressWrapper
+import sqlalchemy as sa
+from sqlalchemy.dialects import mysql
+
+def upgrade(tables, tester, progress_reporter):
+    op = ProgressWrapper(original_op, progress_reporter)
+    # ### commands auto generated by Alembic - please adjust! ###
+    op.create_table('disablereason',
+    sa.Column('id', sa.Integer(), nullable=False),
+    sa.Column('name', sa.String(length=255), nullable=False),
+    sa.PrimaryKeyConstraint('id', name=op.f('pk_disablereason'))
+    )
+    op.create_index('disablereason_name', 'disablereason', ['name'], unique=True)
+
+    op.bulk_insert(
+        tables.disablereason,
+        [
+        {'id': 1, 'name': 'user_toggled'},
+        ],
+    )
+
+    op.bulk_insert(tables.logentrykind, [
+        {'name': 'toggle_repo_trigger'},
+    ])
+
+    op.add_column(u'repositorybuildtrigger', sa.Column('disabled_reason_id', sa.Integer(), nullable=True))
+    op.add_column(u'repositorybuildtrigger', sa.Column('enabled', sa.Boolean(), nullable=False, server_default=sa.sql.expression.true()))
+    op.create_index('repositorybuildtrigger_disabled_reason_id', 'repositorybuildtrigger', ['disabled_reason_id'], unique=False)
+    op.create_foreign_key(op.f('fk_repositorybuildtrigger_disabled_reason_id_disablereason'), 'repositorybuildtrigger', 'disablereason', ['disabled_reason_id'], ['id'])
+    # ### end Alembic commands ###
+
+    # ### population of test data ### #
+    tester.populate_column('repositorybuildtrigger', 'disabled_reason_id', tester.TestDataType.Foreign('disablereason'))
+    tester.populate_column('repositorybuildtrigger', 'enabled', tester.TestDataType.Boolean)
+    # ### end population of test data ### #
+
+
+def downgrade(tables, tester, progress_reporter):
+    op = ProgressWrapper(original_op, progress_reporter)
+    # ### commands auto generated by Alembic - please adjust! ###
+    op.drop_constraint(op.f('fk_repositorybuildtrigger_disabled_reason_id_disablereason'), 'repositorybuildtrigger', type_='foreignkey')
+    op.drop_index('repositorybuildtrigger_disabled_reason_id', table_name='repositorybuildtrigger')
+    op.drop_column(u'repositorybuildtrigger', 'enabled')
+    op.drop_column(u'repositorybuildtrigger', 'disabled_reason_id')
+    op.drop_table('disablereason')
+    # ### end Alembic commands ###
+
+    op.execute(tables
+                .logentrykind
+                .delete()
+                .where(tables.logentrykind.c.name == op.inline_literal('toggle_repo_trigger')))
--- a/data/migrations/versions/654e6df88b71_change_manifest_bytes_to_a_utf8_text_.py
+++ b/data/migrations/versions/654e6df88b71_change_manifest_bytes_to_a_utf8_text_.py
@ -0,0 +1,26 @@
+"""Change manifest_bytes to a UTF8 text field
+
+Revision ID: 654e6df88b71
+Revises: eafdeadcebc7
+Create Date: 2018-08-15 09:58:46.109277
+
+"""
+
+# revision identifiers, used by Alembic.
+revision = '654e6df88b71'
+down_revision = 'eafdeadcebc7'
+
+from alembic import op as original_op
+from data.migrations.progress import ProgressWrapper
+import sqlalchemy as sa
+
+from util.migrate import UTF8LongText
+
+
+def upgrade(tables, tester, progress_reporter):
+    op = ProgressWrapper(original_op, progress_reporter)
+    op.alter_column('manifest', 'manifest_bytes', existing_type=sa.Text(), type_=UTF8LongText())
+
+def downgrade(tables, tester, progress_reporter):
+    op = ProgressWrapper(original_op, progress_reporter)
+    op.alter_column('manifest', 'manifest_bytes', existing_type=UTF8LongText(), type_=sa.Text())
--- a/data/migrations/versions/67f0abd172ae_add_tagtorepositorytag_table.py
+++ b/data/migrations/versions/67f0abd172ae_add_tagtorepositorytag_table.py
@ -0,0 +1,47 @@
+"""Add TagToRepositoryTag table
+
+Revision ID: 67f0abd172ae
+Revises: 10f45ee2310b
+Create Date: 2018-10-30 11:31:06.615488
+
+"""
+
+# revision identifiers, used by Alembic.
+revision = '67f0abd172ae'
+down_revision = '10f45ee2310b'
+
+from alembic import op as original_op
+from data.migrations.progress import ProgressWrapper
+import sqlalchemy as sa
+from sqlalchemy.dialects import mysql
+
+def upgrade(tables, tester, progress_reporter):
+    op = ProgressWrapper(original_op, progress_reporter)
+    # ### commands auto generated by Alembic - please adjust! ###
+    op.create_table('tagtorepositorytag',
+    sa.Column('id', sa.Integer(), nullable=False),
+    sa.Column('repository_id', sa.Integer(), nullable=False),
+    sa.Column('tag_id', sa.Integer(), nullable=False),
+    sa.Column('repository_tag_id', sa.Integer(), nullable=False),
+    sa.ForeignKeyConstraint(['repository_id'], ['repository.id'], name=op.f('fk_tagtorepositorytag_repository_id_repository')),
+    sa.ForeignKeyConstraint(['repository_tag_id'], ['repositorytag.id'], name=op.f('fk_tagtorepositorytag_repository_tag_id_repositorytag')),
+    sa.ForeignKeyConstraint(['tag_id'], ['tag.id'], name=op.f('fk_tagtorepositorytag_tag_id_tag')),
+    sa.PrimaryKeyConstraint('id', name=op.f('pk_tagtorepositorytag'))
+    )
+    op.create_index('tagtorepositorytag_repository_id', 'tagtorepositorytag', ['repository_id'], unique=False)
+    op.create_index('tagtorepositorytag_repository_tag_id', 'tagtorepositorytag', ['repository_tag_id'], unique=True)
+    op.create_index('tagtorepositorytag_tag_id', 'tagtorepositorytag', ['tag_id'], unique=True)
+    # ### end Alembic commands ###
+
+    tester.populate_table('tagtorepositorytag', [
+        ('repository_id', tester.TestDataType.Foreign('repository')),
+        ('tag_id', tester.TestDataType.Foreign('tag')),
+        ('repository_tag_id', tester.TestDataType.Foreign('repositorytag')),
+    ])
+
+
+def downgrade(tables, tester, progress_reporter):
+    op = ProgressWrapper(original_op, progress_reporter)
+    # ### commands auto generated by Alembic - please adjust! ###
+    op.drop_table('tagtorepositorytag')
+    # ### end Alembic commands ###
--- a/data/migrations/versions/6c21e2cfb8b6_change_logentry_to_use_a_biginteger_as_.py
+++ b/data/migrations/versions/6c21e2cfb8b6_change_logentry_to_use_a_biginteger_as_.py
@ -0,0 +1,36 @@
+"""Change LogEntry to use a BigInteger as its primary key
+
+Revision ID: 6c21e2cfb8b6
+Revises: d17c695859ea
+Create Date: 2018-07-27 16:30:02.877346
+
+"""
+
+# revision identifiers, used by Alembic.
+revision = '6c21e2cfb8b6'
+down_revision = 'd17c695859ea'
+
+from alembic import op as original_op
+from data.migrations.progress import ProgressWrapper
+import sqlalchemy as sa
+
+
+def upgrade(tables, tester, progress_reporter):
+    op = ProgressWrapper(original_op, progress_reporter)
+    op.alter_column(
+        table_name='logentry',
+        column_name='id',
+        nullable=False,
+        autoincrement=True,
+        type_=sa.BigInteger(),
+    )
+
+def downgrade(tables, tester, progress_reporter):
+    op = ProgressWrapper(original_op, progress_reporter)
+    op.alter_column(
+        table_name='logentry',
+        column_name='id',
+        nullable=False,
+        autoincrement=True,
+        type_=sa.Integer(),
+    )
--- a/data/migrations/versions/6c7014e84a5e_add_user_prompt_support.py
+++ b/data/migrations/versions/6c7014e84a5e_add_user_prompt_support.py
@ -0,0 +1,56 @@
+"""Add user prompt support
+
+Revision ID: 6c7014e84a5e
+Revises: c156deb8845d
+Create Date: 2016-10-31 16:26:31.447705
+
+"""
+
+# revision identifiers, used by Alembic.
+revision = '6c7014e84a5e'
+down_revision = 'c156deb8845d'
+
+from alembic import op as original_op
+from data.migrations.progress import ProgressWrapper
+import sqlalchemy as sa
+
+def upgrade(tables, tester, progress_reporter):
+    op = ProgressWrapper(original_op, progress_reporter)
+    ### commands auto generated by Alembic - please adjust! ###
+    op.create_table('userpromptkind',
+    sa.Column('id', sa.Integer(), nullable=False),
+    sa.Column('name', sa.String(length=255), nullable=False),
+    sa.PrimaryKeyConstraint('id', name=op.f('pk_userpromptkind'))
+    )
+    op.create_index('userpromptkind_name', 'userpromptkind', ['name'], unique=False)
+    op.create_table('userprompt',
+    sa.Column('id', sa.Integer(), nullable=False),
+    sa.Column('user_id', sa.Integer(), nullable=False),
+    sa.Column('kind_id', sa.Integer(), nullable=False),
+    sa.ForeignKeyConstraint(['kind_id'], ['userpromptkind.id'], name=op.f('fk_userprompt_kind_id_userpromptkind')),
+    sa.ForeignKeyConstraint(['user_id'], ['user.id'], name=op.f('fk_userprompt_user_id_user')),
+    sa.PrimaryKeyConstraint('id', name=op.f('pk_userprompt'))
+    )
+    op.create_index('userprompt_kind_id', 'userprompt', ['kind_id'], unique=False)
+    op.create_index('userprompt_user_id', 'userprompt', ['user_id'], unique=False)
+    op.create_index('userprompt_user_id_kind_id', 'userprompt', ['user_id', 'kind_id'], unique=True)
+    ### end Alembic commands ###
+
+    op.bulk_insert(tables.userpromptkind,
+    [
+        {'name':'confirm_username'},
+    ])
+
+    # ### population of test data ### #
+    tester.populate_table('userprompt', [
+        ('user_id', tester.TestDataType.Foreign('user')),
+        ('kind_id', tester.TestDataType.Foreign('userpromptkind')),
+    ])
+    # ### end population of test data ### #
+
+def downgrade(tables, tester, progress_reporter):
+    op = ProgressWrapper(original_op, progress_reporter)
+    ### commands auto generated by Alembic - please adjust! ###
+    op.drop_table('userprompt')
+    op.drop_table('userpromptkind')
+    ### end Alembic commands ###
--- a/data/migrations/versions/6ec8726c0ace_add_logentry3_table.py
+++ b/data/migrations/versions/6ec8726c0ace_add_logentry3_table.py
@ -0,0 +1,43 @@
+"""Add LogEntry3 table
+
+Revision ID: 6ec8726c0ace
+Revises: 54492a68a3cf
+Create Date: 2019-01-03 13:41:02.897957
+
+"""
+
+# revision identifiers, used by Alembic.
+revision = '6ec8726c0ace'
+down_revision = '54492a68a3cf'
+
+from alembic import op as original_op
+from data.migrations.progress import ProgressWrapper
+import sqlalchemy as sa
+from sqlalchemy.dialects import mysql
+
+def upgrade(tables, tester, progress_reporter):
+    op = ProgressWrapper(original_op, progress_reporter)
+    # ### commands auto generated by Alembic - please adjust! ###
+    op.create_table('logentry3',
+    sa.Column('id', sa.BigInteger(), nullable=False),
+    sa.Column('kind_id', sa.Integer(), nullable=False),
+    sa.Column('account_id', sa.Integer(), nullable=False),
+    sa.Column('performer_id', sa.Integer(), nullable=True),
+    sa.Column('repository_id', sa.Integer(), nullable=True),
+    sa.Column('datetime', sa.DateTime(), nullable=False),
+    sa.Column('ip', sa.String(length=255), nullable=True),
+    sa.Column('metadata_json', sa.Text(), nullable=False),
+    sa.PrimaryKeyConstraint('id', name=op.f('pk_logentry3'))
+    )
+    op.create_index('logentry3_account_id_datetime', 'logentry3', ['account_id', 'datetime'], unique=False)
+    op.create_index('logentry3_datetime', 'logentry3', ['datetime'], unique=False)
+    op.create_index('logentry3_performer_id_datetime', 'logentry3', ['performer_id', 'datetime'], unique=False)
+    op.create_index('logentry3_repository_id_datetime_kind_id', 'logentry3', ['repository_id', 'datetime', 'kind_id'], unique=False)
+    # ### end Alembic commands ###
+
+
+def downgrade(tables, tester, progress_reporter):
+    op = ProgressWrapper(original_op, progress_reporter)
+    # ### commands auto generated by Alembic - please adjust! ###
+    op.drop_table('logentry3')
+    # ### end Alembic commands ###
--- a/data/migrations/versions/703298a825c2_backfill_new_encrypted_fields.py
+++ b/data/migrations/versions/703298a825c2_backfill_new_encrypted_fields.py
@ -0,0 +1,289 @@
+"""Backfill new encrypted fields
+
+Revision ID: 703298a825c2
+Revises: c13c8052f7a6
+Create Date: 2019-08-19 16:07:48.109889
+
+"""
+# revision identifiers, used by Alembic.
+revision = '703298a825c2'
+down_revision = 'c13c8052f7a6'
+
+import logging
+import uuid
+
+from datetime import datetime
+
+from peewee import (JOIN, IntegrityError, DateTimeField, CharField, ForeignKeyField,
+                    BooleanField, TextField, IntegerField)
+from alembic import op as original_op
+from data.migrations.progress import ProgressWrapper
+
+import sqlalchemy as sa
+
+from data.database import (BaseModel, User, Repository, AccessTokenKind, Role,
+                           random_string_generator, QuayUserField, BuildTriggerService,
+                           uuid_generator, DisableReason)
+from data.fields import Credential, DecryptedValue, EncryptedCharField, EncryptedTextField, EnumField, CredentialField
+from data.model.token import ACCESS_TOKEN_NAME_PREFIX_LENGTH
+from data.model.appspecifictoken import TOKEN_NAME_PREFIX_LENGTH as AST_TOKEN_NAME_PREFIX_LENGTH
+from data.model.oauth import ACCESS_TOKEN_PREFIX_LENGTH as OAUTH_ACCESS_TOKEN_PREFIX_LENGTH
+from data.model.oauth import AUTHORIZATION_CODE_PREFIX_LENGTH
+
+BATCH_SIZE = 10
+
+logger = logging.getLogger(__name__)
+
+def _iterate(model_class, clause):
+  while True:
+    has_rows = False
+    for row in list(model_class.select().where(clause).limit(BATCH_SIZE)):
+      has_rows = True
+      yield row
+
+    if not has_rows:
+      break
+
+
+def _decrypted(value):
+  if value is None:
+    return None
+
+  assert isinstance(value, basestring)
+  return DecryptedValue(value)
+
+
+# NOTE: As per standard migrations involving Peewee models, we copy them here, as they will change
+# after this call.
+class AccessToken(BaseModel):
+  code = CharField(default=random_string_generator(length=64), unique=True, index=True)
+  token_name = CharField(default=random_string_generator(length=32), unique=True, index=True)
+  token_code = EncryptedCharField(default_token_length=32)
+
+class RobotAccountToken(BaseModel):
+  robot_account = QuayUserField(index=True, allows_robots=True, unique=True)
+  token = EncryptedCharField(default_token_length=64)
+  fully_migrated = BooleanField(default=False)
+
+class RepositoryBuildTrigger(BaseModel):
+  uuid = CharField(default=uuid_generator, index=True)
+  auth_token = CharField(null=True)
+  private_key = TextField(null=True)
+
+  secure_auth_token = EncryptedCharField(null=True)
+  secure_private_key = EncryptedTextField(null=True)
+  fully_migrated = BooleanField(default=False)
+
+class AppSpecificAuthToken(BaseModel):
+  token_name = CharField(index=True, unique=True, default=random_string_generator(60))
+  token_secret = EncryptedCharField(default_token_length=60)
+  token_code = CharField(default=random_string_generator(length=120), unique=True, index=True)
+
+class OAuthAccessToken(BaseModel):
+  token_name = CharField(index=True, unique=True)
+  token_code = CredentialField()
+  access_token = CharField(index=True)
+
+class OAuthAuthorizationCode(BaseModel):
+  code = CharField(index=True, unique=True, null=True)
+  code_name = CharField(index=True, unique=True)
+  code_credential = CredentialField()
+
+class OAuthApplication(BaseModel):
+  secure_client_secret = EncryptedCharField(default_token_length=40, null=True)
+  fully_migrated = BooleanField(default=False)
+  client_secret = CharField(default=random_string_generator(length=40))
+
+
+def upgrade(tables, tester, progress_reporter):
+  op = ProgressWrapper(original_op, progress_reporter)
+
+  # Empty all access token names to fix the bug where we put the wrong name and code
+  # in for some tokens.
+  AccessToken.update(token_name=None).where(AccessToken.token_name >> None).execute()
+
+  # AccessToken.
+  logger.info('Backfilling encrypted credentials for access tokens')
+  for access_token in _iterate(AccessToken, ((AccessToken.token_name >> None) |
+                                             (AccessToken.token_name == ''))):
+    logger.info('Backfilling encrypted credentials for access token %s', access_token.id)
+    assert access_token.code is not None
+    assert access_token.code[:ACCESS_TOKEN_NAME_PREFIX_LENGTH]
+    assert access_token.code[ACCESS_TOKEN_NAME_PREFIX_LENGTH:]
+
+    token_name = access_token.code[:ACCESS_TOKEN_NAME_PREFIX_LENGTH]
+    token_code = _decrypted(access_token.code[ACCESS_TOKEN_NAME_PREFIX_LENGTH:])
+
+    (AccessToken
+     .update(token_name=token_name, token_code=token_code)
+     .where(AccessToken.id == access_token.id, AccessToken.code == access_token.code)
+     .execute())
+
+  assert AccessToken.select().where(AccessToken.token_name >> None).count() == 0
+
+  # Robots.
+  logger.info('Backfilling encrypted credentials for robots')
+  while True:
+    has_row = False
+    query = (User
+             .select()
+             .join(RobotAccountToken, JOIN.LEFT_OUTER)
+             .where(User.robot == True, RobotAccountToken.id >> None)
+             .limit(BATCH_SIZE))
+
+    for robot_user in query:
+      logger.info('Backfilling encrypted credentials for robot %s', robot_user.id)
+      has_row = True
+      try:
+        RobotAccountToken.create(robot_account=robot_user,
+                                 token=_decrypted(robot_user.email),
+                                 fully_migrated=False)
+      except IntegrityError:
+        break
+
+    if not has_row:
+      break
+
+  # RepositoryBuildTrigger
+  logger.info('Backfilling encrypted credentials for repo build triggers')
+  for repo_build_trigger in _iterate(RepositoryBuildTrigger,
+                                     (RepositoryBuildTrigger.fully_migrated == False)):
+    logger.info('Backfilling encrypted credentials for repo build trigger %s',
+                repo_build_trigger.id)
+
+    (RepositoryBuildTrigger
+     .update(secure_auth_token=_decrypted(repo_build_trigger.auth_token),
+             secure_private_key=_decrypted(repo_build_trigger.private_key),
+             fully_migrated=True)
+     .where(RepositoryBuildTrigger.id == repo_build_trigger.id,
+            RepositoryBuildTrigger.uuid == repo_build_trigger.uuid)
+     .execute())
+
+  assert (RepositoryBuildTrigger
+          .select()
+          .where(RepositoryBuildTrigger.fully_migrated == False)
+          .count()) == 0
+
+  # AppSpecificAuthToken
+  logger.info('Backfilling encrypted credentials for app specific auth tokens')
+  for token in _iterate(AppSpecificAuthToken, ((AppSpecificAuthToken.token_name >> None) |
+                                               (AppSpecificAuthToken.token_name == '') |
+                                               (AppSpecificAuthToken.token_secret >> None))):
+    logger.info('Backfilling encrypted credentials for app specific auth %s',
+                token.id)
+    assert token.token_code[AST_TOKEN_NAME_PREFIX_LENGTH:]
+
+    token_name = token.token_code[:AST_TOKEN_NAME_PREFIX_LENGTH]
+    token_secret = _decrypted(token.token_code[AST_TOKEN_NAME_PREFIX_LENGTH:])
+    assert token_name
+    assert token_secret
+
+    (AppSpecificAuthToken
+     .update(token_name=token_name,
+             token_secret=token_secret)
+     .where(AppSpecificAuthToken.id == token.id,
+            AppSpecificAuthToken.token_code == token.token_code)
+     .execute())
+
+  assert (AppSpecificAuthToken
+          .select()
+          .where(AppSpecificAuthToken.token_name >> None)
+          .count()) == 0
+
+  # OAuthAccessToken
+  logger.info('Backfilling credentials for OAuth access tokens')
+  for token in _iterate(OAuthAccessToken, ((OAuthAccessToken.token_name >> None) |
+                                           (OAuthAccessToken.token_name == ''))):
+    logger.info('Backfilling credentials for OAuth access token %s', token.id)
+    token_name = token.access_token[:OAUTH_ACCESS_TOKEN_PREFIX_LENGTH]
+    token_code = Credential.from_string(token.access_token[OAUTH_ACCESS_TOKEN_PREFIX_LENGTH:])
+    assert token_name
+    assert token.access_token[OAUTH_ACCESS_TOKEN_PREFIX_LENGTH:]
+
+    (OAuthAccessToken
+     .update(token_name=token_name,
+             token_code=token_code)
+     .where(OAuthAccessToken.id == token.id,
+            OAuthAccessToken.access_token == token.access_token)
+     .execute())
+
+  assert (OAuthAccessToken
+          .select()
+          .where(OAuthAccessToken.token_name >> None)
+          .count()) == 0
+
+  # OAuthAuthorizationCode
+  logger.info('Backfilling credentials for OAuth auth code')
+  for code in _iterate(OAuthAuthorizationCode, ((OAuthAuthorizationCode.code_name >> None) |
+                                                (OAuthAuthorizationCode.code_name == ''))):
+    logger.info('Backfilling credentials for OAuth auth code %s', code.id)
+    user_code = code.code or random_string_generator(AUTHORIZATION_CODE_PREFIX_LENGTH * 2)()
+    code_name = user_code[:AUTHORIZATION_CODE_PREFIX_LENGTH]
+    code_credential = Credential.from_string(user_code[AUTHORIZATION_CODE_PREFIX_LENGTH:])
+    assert code_name
+    assert user_code[AUTHORIZATION_CODE_PREFIX_LENGTH:]
+
+    (OAuthAuthorizationCode
+     .update(code_name=code_name, code_credential=code_credential)
+     .where(OAuthAuthorizationCode.id == code.id)
+     .execute())
+
+  assert (OAuthAuthorizationCode
+          .select()
+          .where(OAuthAuthorizationCode.code_name >> None)
+          .count()) == 0
+
+  # OAuthApplication
+  logger.info('Backfilling secret for OAuth applications')
+  for app in _iterate(OAuthApplication, OAuthApplication.fully_migrated == False):
+    logger.info('Backfilling secret for OAuth application %s', app.id)
+    client_secret = app.client_secret or str(uuid.uuid4())
+    secure_client_secret = _decrypted(client_secret)
+
+    (OAuthApplication
+     .update(secure_client_secret=secure_client_secret, fully_migrated=True)
+     .where(OAuthApplication.id == app.id, OAuthApplication.fully_migrated == False)
+     .execute())
+
+  assert (OAuthApplication
+          .select()
+          .where(OAuthApplication.fully_migrated == False)
+          .count()) == 0
+
+  # Adjust existing fields to be nullable.
+  op.alter_column('accesstoken', 'code', nullable=True, existing_type=sa.String(length=255))       
+  op.alter_column('oauthaccesstoken', 'access_token', nullable=True, existing_type=sa.String(length=255))       
+  op.alter_column('oauthauthorizationcode', 'code', nullable=True, existing_type=sa.String(length=255))
+  op.alter_column('appspecificauthtoken', 'token_code', nullable=True, existing_type=sa.String(length=255))
+
+  # Adjust new fields to be non-nullable.
+  op.alter_column('accesstoken', 'token_name', nullable=False, existing_type=sa.String(length=255))       
+  op.alter_column('accesstoken', 'token_code', nullable=False, existing_type=sa.String(length=255))       
+
+  op.alter_column('appspecificauthtoken', 'token_name', nullable=False, existing_type=sa.String(length=255))       
+  op.alter_column('appspecificauthtoken', 'token_secret', nullable=False, existing_type=sa.String(length=255))       
+
+  op.alter_column('oauthaccesstoken', 'token_name', nullable=False, existing_type=sa.String(length=255))       
+  op.alter_column('oauthaccesstoken', 'token_code', nullable=False, existing_type=sa.String(length=255))       
+
+  op.alter_column('oauthauthorizationcode', 'code_name', nullable=False, existing_type=sa.String(length=255))       
+  op.alter_column('oauthauthorizationcode', 'code_credential', nullable=False, existing_type=sa.String(length=255))       
+
+def downgrade(tables, tester, progress_reporter):
+  op = ProgressWrapper(original_op, progress_reporter)
+  op.alter_column('accesstoken', 'code', nullable=False, existing_type=sa.String(length=255))       
+  op.alter_column('oauthaccesstoken', 'access_token', nullable=False, existing_type=sa.String(length=255))       
+  op.alter_column('oauthauthorizationcode', 'code', nullable=False, existing_type=sa.String(length=255))
+  op.alter_column('appspecificauthtoken', 'token_code', nullable=False, existing_type=sa.String(length=255))
+
+  op.alter_column('accesstoken', 'token_name', nullable=True, existing_type=sa.String(length=255))       
+  op.alter_column('accesstoken', 'token_code', nullable=True, existing_type=sa.String(length=255))       
+
+  op.alter_column('appspecificauthtoken', 'token_name', nullable=True, existing_type=sa.String(length=255))       
+  op.alter_column('appspecificauthtoken', 'token_secret', nullable=True, existing_type=sa.String(length=255))       
+
+  op.alter_column('oauthaccesstoken', 'token_name', nullable=True, existing_type=sa.String(length=255))       
+  op.alter_column('oauthaccesstoken', 'token_code', nullable=True, existing_type=sa.String(length=255))       
+
+  op.alter_column('oauthauthorizationcode', 'code_name', nullable=True, existing_type=sa.String(length=255))       
+  op.alter_column('oauthauthorizationcode', 'code_credential', nullable=True, existing_type=sa.String(length=255))       
--- a/data/migrations/versions/7367229b38d9_add_support_for_app_specific_tokens.py
+++ b/data/migrations/versions/7367229b38d9_add_support_for_app_specific_tokens.py
@ -0,0 +1,74 @@
+"""Add support for app specific tokens
+
+Revision ID: 7367229b38d9
+Revises: d8989249f8f6
+Create Date: 2017-12-12 13:15:42.419764
+
+"""
+
+# revision identifiers, used by Alembic.
+revision = '7367229b38d9'
+down_revision = 'd8989249f8f6'
+
+from alembic import op as original_op
+from data.migrations.progress import ProgressWrapper
+import sqlalchemy as sa
+from sqlalchemy.dialects import mysql
+from util.migrate import UTF8CharField
+
+def upgrade(tables, tester, progress_reporter):
+    op = ProgressWrapper(original_op, progress_reporter)
+    # ### commands auto generated by Alembic - please adjust! ###
+    op.create_table('appspecificauthtoken',
+    sa.Column('id', sa.Integer(), nullable=False),
+    sa.Column('user_id', sa.Integer(), nullable=False),
+    sa.Column('uuid', sa.String(length=36), nullable=False),
+    sa.Column('title', UTF8CharField(length=255), nullable=False),
+    sa.Column('token_code', sa.String(length=255), nullable=False),
+    sa.Column('created', sa.DateTime(), nullable=False),
+    sa.Column('expiration', sa.DateTime(), nullable=True),
+    sa.Column('last_accessed', sa.DateTime(), nullable=True),
+    sa.ForeignKeyConstraint(['user_id'], ['user.id'], name=op.f('fk_appspecificauthtoken_user_id_user')),
+    sa.PrimaryKeyConstraint('id', name=op.f('pk_appspecificauthtoken'))
+    )
+    op.create_index('appspecificauthtoken_token_code', 'appspecificauthtoken', ['token_code'], unique=True)
+    op.create_index('appspecificauthtoken_user_id', 'appspecificauthtoken', ['user_id'], unique=False)
+    op.create_index('appspecificauthtoken_user_id_expiration', 'appspecificauthtoken', ['user_id', 'expiration'], unique=False)
+    op.create_index('appspecificauthtoken_uuid', 'appspecificauthtoken', ['uuid'], unique=False)
+    # ### end Alembic commands ###
+
+    op.bulk_insert(tables.logentrykind, [
+        {'name': 'create_app_specific_token'},
+        {'name': 'revoke_app_specific_token'},
+    ])
+
+    # ### population of test data ### #
+    tester.populate_table('appspecificauthtoken', [
+        ('user_id', tester.TestDataType.Foreign('user')),
+        ('uuid', tester.TestDataType.UUID),
+        ('title', tester.TestDataType.UTF8Char),
+        ('token_code', tester.TestDataType.String),
+        ('created', tester.TestDataType.DateTime),
+        ('expiration', tester.TestDataType.DateTime),
+        ('last_accessed', tester.TestDataType.DateTime),
+    ])
+    # ### end population of test data ### #
+
+
+def downgrade(tables, tester, progress_reporter):
+    op = ProgressWrapper(original_op, progress_reporter)
+    # ### commands auto generated by Alembic - please adjust! ###
+    op.drop_table('appspecificauthtoken')
+    # ### end Alembic commands ###
+
+    op.execute(tables
+               .logentrykind
+               .delete()
+               .where(tables.
+                      logentrykind.name == op.inline_literal('create_app_specific_token')))
+
+    op.execute(tables
+               .logentrykind
+               .delete()
+               .where(tables.
+                      logentrykind.name == op.inline_literal('revoke_app_specific_token')))
--- a/data/migrations/versions/7a525c68eb13_add_oci_app_models.py
+++ b/data/migrations/versions/7a525c68eb13_add_oci_app_models.py
@ -0,0 +1,340 @@
+"""Add OCI/App models
+
+Revision ID: 7a525c68eb13
+Revises: e2894a3a3c19
+Create Date: 2017-01-24 16:25:52.170277
+
+"""
+
+# revision identifiers, used by Alembic.
+revision = '7a525c68eb13'
+down_revision = 'e2894a3a3c19'
+
+from alembic import op as original_op
+from data.migrations.progress import ProgressWrapper
+import sqlalchemy as sa
+from sqlalchemy.dialects import mysql
+from sqlalchemy.sql import table, column
+from util.migrate import UTF8LongText, UTF8CharField
+
+
+def upgrade(tables, tester, progress_reporter):
+  op = ProgressWrapper(original_op, progress_reporter)
+  op.create_table(
+    'tagkind',
+    sa.Column('id', sa.Integer(), nullable=False),
+    sa.Column('name', sa.String(length=255), nullable=False),
+    sa.PrimaryKeyConstraint('id', name=op.f('pk_tagkind'))
+  )
+  op.create_index('tagkind_name', 'tagkind', ['name'], unique=True)
+
+  op.create_table(
+    'blobplacementlocation',
+    sa.Column('id', sa.Integer(), nullable=False),
+    sa.Column('name', sa.String(length=255), nullable=False),
+    sa.PrimaryKeyConstraint('id', name=op.f('pk_blobplacementlocation'))
+  )
+  op.create_index('blobplacementlocation_name', 'blobplacementlocation', ['name'], unique=True)
+
+  op.create_table(
+    'blob',
+    sa.Column('id', sa.Integer(), nullable=False),
+    sa.Column('digest', sa.String(length=255), nullable=False),
+    sa.Column('media_type_id', sa.Integer(), nullable=False),
+    sa.Column('size', sa.BigInteger(), nullable=False),
+    sa.Column('uncompressed_size', sa.BigInteger(), nullable=True),
+    sa.ForeignKeyConstraint(['media_type_id'], ['mediatype.id'], name=op.f('fk_blob_media_type_id_mediatype')),
+    sa.PrimaryKeyConstraint('id', name=op.f('pk_blob'))
+  )
+  op.create_index('blob_digest', 'blob', ['digest'], unique=True)
+  op.create_index('blob_media_type_id', 'blob', ['media_type_id'], unique=False)
+
+  op.create_table(
+    'blobplacementlocationpreference',
+    sa.Column('id', sa.Integer(), nullable=False),
+    sa.Column('user_id', sa.Integer(), nullable=False),
+    sa.Column('location_id', sa.Integer(), nullable=False),
+    sa.ForeignKeyConstraint(['location_id'], ['blobplacementlocation.id'], name=op.f('fk_blobplacementlocpref_locid_blobplacementlocation')),
+    sa.ForeignKeyConstraint(['user_id'], ['user.id'], name=op.f('fk_blobplacementlocationpreference_user_id_user')),
+    sa.PrimaryKeyConstraint('id', name=op.f('pk_blobplacementlocationpreference'))
+  )
+  op.create_index('blobplacementlocationpreference_location_id', 'blobplacementlocationpreference', ['location_id'], unique=False)
+  op.create_index('blobplacementlocationpreference_user_id', 'blobplacementlocationpreference', ['user_id'], unique=False)
+
+  op.create_table(
+    'manifest',
+    sa.Column('id', sa.Integer(), nullable=False),
+    sa.Column('digest', sa.String(length=255), nullable=False),
+    sa.Column('media_type_id', sa.Integer(), nullable=False),
+    sa.Column('manifest_json', UTF8LongText, nullable=False),
+    sa.ForeignKeyConstraint(['media_type_id'], ['mediatype.id'], name=op.f('fk_manifest_media_type_id_mediatype')),
+    sa.PrimaryKeyConstraint('id', name=op.f('pk_manifest'))
+  )
+  op.create_index('manifest_digest', 'manifest', ['digest'], unique=True)
+  op.create_index('manifest_media_type_id', 'manifest', ['media_type_id'], unique=False)
+
+  op.create_table(
+    'manifestlist',
+    sa.Column('id', sa.Integer(), nullable=False),
+    sa.Column('digest', sa.String(length=255), nullable=False),
+    sa.Column('manifest_list_json', UTF8LongText, nullable=False),
+    sa.Column('schema_version', UTF8CharField(length=255), nullable=False),
+    sa.Column('media_type_id', sa.Integer(), nullable=False),
+    sa.ForeignKeyConstraint(['media_type_id'], ['mediatype.id'], name=op.f('fk_manifestlist_media_type_id_mediatype')),
+    sa.PrimaryKeyConstraint('id', name=op.f('pk_manifestlist'))
+  )
+  op.create_index('manifestlist_digest', 'manifestlist', ['digest'], unique=True)
+  op.create_index('manifestlist_media_type_id', 'manifestlist', ['media_type_id'], unique=False)
+
+  op.create_table(
+    'bittorrentpieces',
+    sa.Column('id', sa.Integer(), nullable=False),
+    sa.Column('blob_id', sa.Integer(), nullable=False),
+    sa.Column('pieces', UTF8LongText, nullable=False),
+    sa.Column('piece_length', sa.BigInteger(), nullable=False),
+    sa.ForeignKeyConstraint(['blob_id'], ['blob.id'], name=op.f('fk_bittorrentpieces_blob_id_blob')),
+    sa.PrimaryKeyConstraint('id', name=op.f('pk_bittorrentpieces'))
+  )
+  op.create_index('bittorrentpieces_blob_id', 'bittorrentpieces', ['blob_id'], unique=False)
+  op.create_index('bittorrentpieces_blob_id_piece_length', 'bittorrentpieces', ['blob_id', 'piece_length'], unique=True)
+
+  op.create_table(
+    'blobplacement',
+    sa.Column('id', sa.Integer(), nullable=False),
+    sa.Column('blob_id', sa.Integer(), nullable=False),
+    sa.Column('location_id', sa.Integer(), nullable=False),
+    sa.ForeignKeyConstraint(['blob_id'], ['blob.id'], name=op.f('fk_blobplacement_blob_id_blob')),
+    sa.ForeignKeyConstraint(['location_id'], ['blobplacementlocation.id'], name=op.f('fk_blobplacement_location_id_blobplacementlocation')),
+    sa.PrimaryKeyConstraint('id', name=op.f('pk_blobplacement'))
+  )
+  op.create_index('blobplacement_blob_id', 'blobplacement', ['blob_id'], unique=False)
+  op.create_index('blobplacement_blob_id_location_id', 'blobplacement', ['blob_id', 'location_id'], unique=True)
+  op.create_index('blobplacement_location_id', 'blobplacement', ['location_id'], unique=False)
+
+  op.create_table(
+    'blobuploading',
+    sa.Column('id', sa.Integer(), nullable=False),
+    sa.Column('uuid', sa.String(length=255), nullable=False),
+    sa.Column('created', sa.DateTime(), nullable=False),
+    sa.Column('repository_id', sa.Integer(), nullable=False),
+    sa.Column('location_id', sa.Integer(), nullable=False),
+    sa.Column('byte_count', sa.BigInteger(), nullable=False),
+    sa.Column('uncompressed_byte_count', sa.BigInteger(), nullable=True),
+    sa.Column('chunk_count', sa.BigInteger(), nullable=False),
+    sa.Column('storage_metadata', UTF8LongText, nullable=True),
+    sa.Column('sha_state', UTF8LongText, nullable=True),
+    sa.Column('piece_sha_state', UTF8LongText, nullable=True),
+    sa.Column('piece_hashes', UTF8LongText, nullable=True),
+    sa.ForeignKeyConstraint(['location_id'], ['blobplacementlocation.id'], name=op.f('fk_blobuploading_location_id_blobplacementlocation')),
+    sa.ForeignKeyConstraint(['repository_id'], ['repository.id'], name=op.f('fk_blobuploading_repository_id_repository')),
+    sa.PrimaryKeyConstraint('id', name=op.f('pk_blobuploading'))
+  )
+  op.create_index('blobuploading_created', 'blobuploading', ['created'], unique=False)
+  op.create_index('blobuploading_location_id', 'blobuploading', ['location_id'], unique=False)
+  op.create_index('blobuploading_repository_id', 'blobuploading', ['repository_id'], unique=False)
+  op.create_index('blobuploading_repository_id_uuid', 'blobuploading', ['repository_id', 'uuid'], unique=True)
+  op.create_index('blobuploading_uuid', 'blobuploading', ['uuid'], unique=True)
+
+  op.create_table(
+    'derivedimage',
+    sa.Column('id', sa.Integer(), nullable=False),
+    sa.Column('uuid', sa.String(length=255), nullable=False),
+    sa.Column('source_manifest_id', sa.Integer(), nullable=False),
+    sa.Column('derived_manifest_json', UTF8LongText, nullable=False),
+    sa.Column('media_type_id', sa.Integer(), nullable=False),
+    sa.Column('blob_id', sa.Integer(), nullable=False),
+    sa.Column('uniqueness_hash', sa.String(length=255), nullable=False),
+    sa.Column('signature_blob_id', sa.Integer(), nullable=True),
+    sa.ForeignKeyConstraint(['blob_id'], ['blob.id'], name=op.f('fk_derivedimage_blob_id_blob')),
+    sa.ForeignKeyConstraint(['media_type_id'], ['mediatype.id'], name=op.f('fk_derivedimage_media_type_id_mediatype')),
+    sa.ForeignKeyConstraint(['signature_blob_id'], ['blob.id'], name=op.f('fk_derivedimage_signature_blob_id_blob')),
+    sa.ForeignKeyConstraint(['source_manifest_id'], ['manifest.id'], name=op.f('fk_derivedimage_source_manifest_id_manifest')),
+    sa.PrimaryKeyConstraint('id', name=op.f('pk_derivedimage'))
+  )
+  op.create_index('derivedimage_blob_id', 'derivedimage', ['blob_id'], unique=False)
+  op.create_index('derivedimage_media_type_id', 'derivedimage', ['media_type_id'], unique=False)
+  op.create_index('derivedimage_signature_blob_id', 'derivedimage', ['signature_blob_id'], unique=False)
+  op.create_index('derivedimage_source_manifest_id', 'derivedimage', ['source_manifest_id'], unique=False)
+  op.create_index('derivedimage_source_manifest_id_blob_id', 'derivedimage', ['source_manifest_id', 'blob_id'], unique=True)
+  op.create_index('derivedimage_source_manifest_id_media_type_id_uniqueness_hash', 'derivedimage', ['source_manifest_id', 'media_type_id', 'uniqueness_hash'], unique=True)
+  op.create_index('derivedimage_uniqueness_hash', 'derivedimage', ['uniqueness_hash'], unique=True)
+  op.create_index('derivedimage_uuid', 'derivedimage', ['uuid'], unique=True)
+
+  op.create_table(
+    'manifestblob',
+    sa.Column('id', sa.Integer(), nullable=False),
+    sa.Column('manifest_id', sa.Integer(), nullable=False),
+    sa.Column('blob_id', sa.Integer(), nullable=False),
+    sa.ForeignKeyConstraint(['blob_id'], ['blob.id'], name=op.f('fk_manifestblob_blob_id_blob')),
+    sa.ForeignKeyConstraint(['manifest_id'], ['manifest.id'], name=op.f('fk_manifestblob_manifest_id_manifest')),
+    sa.PrimaryKeyConstraint('id', name=op.f('pk_manifestblob'))
+  )
+  op.create_index('manifestblob_blob_id', 'manifestblob', ['blob_id'], unique=False)
+  op.create_index('manifestblob_manifest_id', 'manifestblob', ['manifest_id'], unique=False)
+  op.create_index('manifestblob_manifest_id_blob_id', 'manifestblob', ['manifest_id', 'blob_id'], unique=True)
+
+  op.create_table(
+    'manifestlabel',
+    sa.Column('id', sa.Integer(), nullable=False),
+    sa.Column('repository_id', sa.Integer(), nullable=False),
+    sa.Column('annotated_id', sa.Integer(), nullable=False),
+    sa.Column('label_id', sa.Integer(), nullable=False),
+    sa.ForeignKeyConstraint(['annotated_id'], ['manifest.id'], name=op.f('fk_manifestlabel_annotated_id_manifest')),
+    sa.ForeignKeyConstraint(['label_id'], ['label.id'], name=op.f('fk_manifestlabel_label_id_label')),
+    sa.ForeignKeyConstraint(['repository_id'], ['repository.id'], name=op.f('fk_manifestlabel_repository_id_repository')),
+    sa.PrimaryKeyConstraint('id', name=op.f('pk_manifestlabel'))
+  )
+  op.create_index('manifestlabel_annotated_id', 'manifestlabel', ['annotated_id'], unique=False)
+  op.create_index('manifestlabel_label_id', 'manifestlabel', ['label_id'], unique=False)
+  op.create_index('manifestlabel_repository_id', 'manifestlabel', ['repository_id'], unique=False)
+  op.create_index('manifestlabel_repository_id_annotated_id_label_id', 'manifestlabel', ['repository_id', 'annotated_id', 'label_id'], unique=True)
+
+  op.create_table(
+    'manifestlayer',
+    sa.Column('id', sa.Integer(), nullable=False),
+    sa.Column('blob_id', sa.Integer(), nullable=False),
+    sa.Column('manifest_id', sa.Integer(), nullable=False),
+    sa.Column('manifest_index', sa.BigInteger(), nullable=False),
+    sa.Column('metadata_json', UTF8LongText, nullable=False),
+    sa.ForeignKeyConstraint(['blob_id'], ['blob.id'], name=op.f('fk_manifestlayer_blob_id_blob')),
+    sa.ForeignKeyConstraint(['manifest_id'], ['manifest.id'], name=op.f('fk_manifestlayer_manifest_id_manifest')),
+    sa.PrimaryKeyConstraint('id', name=op.f('pk_manifestlayer'))
+  )
+  op.create_index('manifestlayer_blob_id', 'manifestlayer', ['blob_id'], unique=False)
+  op.create_index('manifestlayer_manifest_id', 'manifestlayer', ['manifest_id'], unique=False)
+  op.create_index('manifestlayer_manifest_id_manifest_index', 'manifestlayer', ['manifest_id', 'manifest_index'], unique=True)
+  op.create_index('manifestlayer_manifest_index', 'manifestlayer', ['manifest_index'], unique=False)
+
+  op.create_table(
+    'manifestlistmanifest',
+    sa.Column('id', sa.Integer(), nullable=False),
+    sa.Column('manifest_list_id', sa.Integer(), nullable=False),
+    sa.Column('manifest_id', sa.Integer(), nullable=False),
+    sa.Column('operating_system', UTF8CharField(length=255), nullable=True),
+    sa.Column('architecture', UTF8CharField(length=255), nullable=True),
+    sa.Column('platform_json', UTF8LongText, nullable=True),
+    sa.Column('media_type_id', sa.Integer(), nullable=False),
+    sa.ForeignKeyConstraint(['manifest_id'], ['manifest.id'], name=op.f('fk_manifestlistmanifest_manifest_id_manifest')),
+    sa.ForeignKeyConstraint(['manifest_list_id'], ['manifestlist.id'], name=op.f('fk_manifestlistmanifest_manifest_list_id_manifestlist')),
+    sa.ForeignKeyConstraint(['media_type_id'], ['mediatype.id'], name=op.f('fk_manifestlistmanifest_media_type_id_mediatype')),
+    sa.PrimaryKeyConstraint('id', name=op.f('pk_manifestlistmanifest'))
+  )
+  op.create_index('manifestlistmanifest_manifest_id', 'manifestlistmanifest', ['manifest_id'], unique=False)
+  op.create_index('manifestlistmanifest_manifest_list_id', 'manifestlistmanifest', ['manifest_list_id'], unique=False)
+  op.create_index('manifestlistmanifest_manifest_listid_os_arch_mtid', 'manifestlistmanifest', ['manifest_list_id', 'operating_system', 'architecture', 'media_type_id'], unique=False)
+  op.create_index('manifestlistmanifest_manifest_listid_mtid', 'manifestlistmanifest', ['manifest_list_id', 'media_type_id'], unique=False)
+  op.create_index('manifestlistmanifest_media_type_id', 'manifestlistmanifest', ['media_type_id'], unique=False)
+
+  op.create_table(
+    'tag',
+    sa.Column('id', sa.Integer(), nullable=False),
+    sa.Column('name', UTF8CharField(length=190), nullable=False),
+    sa.Column('repository_id', sa.Integer(), nullable=False),
+    sa.Column('manifest_list_id', sa.Integer(), nullable=True),
+    sa.Column('lifetime_start', sa.BigInteger(), nullable=False),
+    sa.Column('lifetime_end', sa.BigInteger(), nullable=True),
+    sa.Column('hidden', sa.Boolean(), nullable=False),
+    sa.Column('reverted', sa.Boolean(), nullable=False),
+    sa.Column('protected', sa.Boolean(), nullable=False),
+    sa.Column('tag_kind_id', sa.Integer(), nullable=False),
+    sa.Column('linked_tag_id', sa.Integer(), nullable=True),
+    sa.ForeignKeyConstraint(['linked_tag_id'], ['tag.id'], name=op.f('fk_tag_linked_tag_id_tag')),
+    sa.ForeignKeyConstraint(['manifest_list_id'], ['manifestlist.id'], name=op.f('fk_tag_manifest_list_id_manifestlist')),
+    sa.ForeignKeyConstraint(['repository_id'], ['repository.id'], name=op.f('fk_tag_repository_id_repository')),
+    sa.ForeignKeyConstraint(['tag_kind_id'], ['tagkind.id'], name=op.f('fk_tag_tag_kind_id_tagkind')),
+    sa.PrimaryKeyConstraint('id', name=op.f('pk_tag'))
+  )
+  op.create_index('tag_lifetime_end', 'tag', ['lifetime_end'], unique=False)
+  op.create_index('tag_linked_tag_id', 'tag', ['linked_tag_id'], unique=False)
+  op.create_index('tag_manifest_list_id', 'tag', ['manifest_list_id'], unique=False)
+  op.create_index('tag_repository_id', 'tag', ['repository_id'], unique=False)
+  op.create_index('tag_repository_id_name_hidden', 'tag', ['repository_id', 'name', 'hidden'], unique=False)
+  op.create_index('tag_repository_id_name_lifetime_end', 'tag', ['repository_id', 'name', 'lifetime_end'], unique=True)
+  op.create_index('tag_repository_id_name', 'tag', ['repository_id', 'name'], unique=False)
+  op.create_index('tag_tag_kind_id', 'tag', ['tag_kind_id'], unique=False)
+
+  op.create_table(
+    'manifestlayerdockerv1',
+    sa.Column('id', sa.Integer(), nullable=False),
+    sa.Column('manifest_layer_id', sa.Integer(), nullable=False),
+    sa.Column('image_id', UTF8CharField(length=255), nullable=False),
+    sa.Column('checksum', UTF8CharField(length=255), nullable=False),
+    sa.Column('compat_json', UTF8LongText, nullable=False),
+    sa.ForeignKeyConstraint(['manifest_layer_id'], ['manifestlayer.id'], name=op.f('fk_manifestlayerdockerv1_manifest_layer_id_manifestlayer')),
+    sa.PrimaryKeyConstraint('id', name=op.f('pk_manifestlayerdockerv1'))
+  )
+  op.create_index('manifestlayerdockerv1_image_id', 'manifestlayerdockerv1', ['image_id'], unique=False)
+  op.create_index('manifestlayerdockerv1_manifest_layer_id', 'manifestlayerdockerv1', ['manifest_layer_id'], unique=False)
+
+  op.create_table(
+    'manifestlayerscan',
+    sa.Column('id', sa.Integer(), nullable=False),
+    sa.Column('layer_id', sa.Integer(), nullable=False),
+    sa.Column('scannable', sa.Boolean(), nullable=False),
+    sa.Column('scanned_by', UTF8CharField(length=255), nullable=False),
+    sa.ForeignKeyConstraint(['layer_id'], ['manifestlayer.id'], name=op.f('fk_manifestlayerscan_layer_id_manifestlayer')),
+    sa.PrimaryKeyConstraint('id', name=op.f('pk_manifestlayerscan'))
+  )
+  op.create_index('manifestlayerscan_layer_id', 'manifestlayerscan', ['layer_id'], unique=True)
+
+  blobplacementlocation_table = table('blobplacementlocation',
+      column('id', sa.Integer()),
+      column('name', sa.String()),
+  )
+
+  op.bulk_insert(
+    blobplacementlocation_table,
+    [
+      {'name': 'local_eu'},
+      {'name': 'local_us'},
+    ],
+  )
+
+  op.bulk_insert(
+    tables.mediatype,
+    [
+      {'name': 'application/vnd.cnr.blob.v0.tar+gzip'},
+      {'name': 'application/vnd.cnr.package-manifest.helm.v0.json'},
+      {'name': 'application/vnd.cnr.package-manifest.kpm.v0.json'},
+      {'name': 'application/vnd.cnr.package-manifest.docker-compose.v0.json'},
+      {'name': 'application/vnd.cnr.package.kpm.v0.tar+gzip'},
+      {'name': 'application/vnd.cnr.package.helm.v0.tar+gzip'},
+      {'name': 'application/vnd.cnr.package.docker-compose.v0.tar+gzip'},
+      {'name': 'application/vnd.cnr.manifests.v0.json'},
+      {'name': 'application/vnd.cnr.manifest.list.v0.json'},
+    ],
+  )
+
+  tagkind_table = table('tagkind',
+      column('id', sa.Integer()),
+      column('name', sa.String()),
+  )
+
+  op.bulk_insert(
+    tagkind_table,
+    [
+      {'id': 1, 'name': 'tag'},
+      {'id': 2, 'name': 'release'},
+      {'id': 3, 'name': 'channel'},
+    ]
+  )
+
+def downgrade(tables, tester, progress_reporter):
+  op = ProgressWrapper(original_op, progress_reporter)
+  op.drop_table('manifestlayerscan')
+  op.drop_table('manifestlayerdockerv1')
+  op.drop_table('tag')
+  op.drop_table('manifestlistmanifest')
+  op.drop_table('manifestlayer')
+  op.drop_table('manifestlabel')
+  op.drop_table('manifestblob')
+  op.drop_table('derivedimage')
+  op.drop_table('blobuploading')
+  op.drop_table('blobplacement')
+  op.drop_table('bittorrentpieces')
+  op.drop_table('manifestlist')
+  op.drop_table('manifest')
+  op.drop_table('blobplacementlocationpreference')
+  op.drop_table('blob')
+  op.drop_table('tagkind')
+  op.drop_table('blobplacementlocation')
--- a/data/migrations/versions/87fbbc224f10_add_disabled_datetime_to_trigger.py
+++ b/data/migrations/versions/87fbbc224f10_add_disabled_datetime_to_trigger.py
@ -0,0 +1,35 @@
+"""Add disabled datetime to trigger
+
+Revision ID: 87fbbc224f10
+Revises: 17aff2e1354e
+Create Date: 2017-10-24 14:06:37.658705
+
+"""
+
+# revision identifiers, used by Alembic.
+revision = '87fbbc224f10'
+down_revision = '17aff2e1354e'
+
+from alembic import op as original_op
+from data.migrations.progress import ProgressWrapper
+import sqlalchemy as sa
+from sqlalchemy.dialects import mysql
+
+def upgrade(tables, tester, progress_reporter):
+    op = ProgressWrapper(original_op, progress_reporter)
+    # ### commands auto generated by Alembic - please adjust! ###
+    op.add_column('repositorybuildtrigger', sa.Column('disabled_datetime', sa.DateTime(), nullable=True))
+    op.create_index('repositorybuildtrigger_disabled_datetime', 'repositorybuildtrigger', ['disabled_datetime'], unique=False)
+    # ### end Alembic commands ###
+
+    # ### population of test data ### #
+    tester.populate_column('repositorybuildtrigger', 'disabled_datetime', tester.TestDataType.DateTime)
+    # ### end population of test data ### #
+
+
+def downgrade(tables, tester, progress_reporter):
+    op = ProgressWrapper(original_op, progress_reporter)
+    # ### commands auto generated by Alembic - please adjust! ###
+    op.drop_index('repositorybuildtrigger_disabled_datetime', table_name='repositorybuildtrigger')
+    op.drop_column('repositorybuildtrigger', 'disabled_datetime')
+    # ### end Alembic commands ###
--- a/data/migrations/versions/9093adccc784_add_v2_2_data_models_for_manifest_.py
+++ b/data/migrations/versions/9093adccc784_add_v2_2_data_models_for_manifest_.py
@ -0,0 +1,180 @@
+"""Add V2_2 data models for Manifest, ManifestBlob and ManifestLegacyImage
+
+Revision ID: 9093adccc784
+Revises: 6c21e2cfb8b6
+Create Date: 2018-08-06 16:07:50.222749
+
+"""
+
+# revision identifiers, used by Alembic.
+revision = '9093adccc784'
+down_revision = '6c21e2cfb8b6'
+
+from alembic import op as original_op
+from data.migrations.progress import ProgressWrapper
+import sqlalchemy as sa
+from image.docker.schema1 import DOCKER_SCHEMA1_CONTENT_TYPES
+
+
+def upgrade(tables, tester, progress_reporter):
+    op = ProgressWrapper(original_op, progress_reporter)
+    # ### commands auto generated by Alembic - please adjust! ###
+    op.create_table('manifest',
+    sa.Column('id', sa.Integer(), nullable=False),
+    sa.Column('repository_id', sa.Integer(), nullable=False),
+    sa.Column('digest', sa.String(length=255), nullable=False),
+    sa.Column('media_type_id', sa.Integer(), nullable=False),
+    sa.Column('manifest_bytes', sa.Text(), nullable=False),
+    sa.ForeignKeyConstraint(['media_type_id'], ['mediatype.id'], name=op.f('fk_manifest_media_type_id_mediatype')),
+    sa.ForeignKeyConstraint(['repository_id'], ['repository.id'], name=op.f('fk_manifest_repository_id_repository')),
+    sa.PrimaryKeyConstraint('id', name=op.f('pk_manifest'))
+    )
+    op.create_index('manifest_digest', 'manifest', ['digest'], unique=False)
+    op.create_index('manifest_media_type_id', 'manifest', ['media_type_id'], unique=False)
+    op.create_index('manifest_repository_id', 'manifest', ['repository_id'], unique=False)
+    op.create_index('manifest_repository_id_digest', 'manifest', ['repository_id', 'digest'], unique=True)
+    op.create_index('manifest_repository_id_media_type_id', 'manifest', ['repository_id', 'media_type_id'], unique=False)
+    op.create_table('manifestblob',
+    sa.Column('id', sa.Integer(), nullable=False),
+    sa.Column('repository_id', sa.Integer(), nullable=False),
+    sa.Column('manifest_id', sa.Integer(), nullable=False),
+    sa.Column('blob_id', sa.Integer(), nullable=False),
+    sa.Column('blob_index', sa.Integer(), nullable=False),
+    sa.ForeignKeyConstraint(['blob_id'], ['imagestorage.id'], name=op.f('fk_manifestblob_blob_id_imagestorage')),
+    sa.ForeignKeyConstraint(['manifest_id'], ['manifest.id'], name=op.f('fk_manifestblob_manifest_id_manifest')),
+    sa.ForeignKeyConstraint(['repository_id'], ['repository.id'], name=op.f('fk_manifestblob_repository_id_repository')),
+    sa.PrimaryKeyConstraint('id', name=op.f('pk_manifestblob'))
+    )
+    op.create_index('manifestblob_blob_id', 'manifestblob', ['blob_id'], unique=False)
+    op.create_index('manifestblob_manifest_id', 'manifestblob', ['manifest_id'], unique=False)
+    op.create_index('manifestblob_manifest_id_blob_id', 'manifestblob', ['manifest_id', 'blob_id'], unique=True)
+    op.create_index('manifestblob_manifest_id_blob_index', 'manifestblob', ['manifest_id', 'blob_index'], unique=True)
+    op.create_index('manifestblob_repository_id', 'manifestblob', ['repository_id'], unique=False)
+    op.create_table('manifestlabel',
+    sa.Column('id', sa.Integer(), nullable=False),
+    sa.Column('repository_id', sa.Integer(), nullable=False),
+    sa.Column('manifest_id', sa.Integer(), nullable=False),
+    sa.Column('label_id', sa.Integer(), nullable=False),
+    sa.ForeignKeyConstraint(['label_id'], ['label.id'], name=op.f('fk_manifestlabel_label_id_label')),
+    sa.ForeignKeyConstraint(['manifest_id'], ['manifest.id'], name=op.f('fk_manifestlabel_manifest_id_manifest')),
+    sa.ForeignKeyConstraint(['repository_id'], ['repository.id'], name=op.f('fk_manifestlabel_repository_id_repository')),
+    sa.PrimaryKeyConstraint('id', name=op.f('pk_manifestlabel'))
+    )
+    op.create_index('manifestlabel_label_id', 'manifestlabel', ['label_id'], unique=False)
+    op.create_index('manifestlabel_manifest_id', 'manifestlabel', ['manifest_id'], unique=False)
+    op.create_index('manifestlabel_manifest_id_label_id', 'manifestlabel', ['manifest_id', 'label_id'], unique=True)
+    op.create_index('manifestlabel_repository_id', 'manifestlabel', ['repository_id'], unique=False)
+    op.create_table('manifestlegacyimage',
+    sa.Column('id', sa.Integer(), nullable=False),
+    sa.Column('repository_id', sa.Integer(), nullable=False),
+    sa.Column('manifest_id', sa.Integer(), nullable=False),
+    sa.Column('image_id', sa.Integer(), nullable=False),
+    sa.ForeignKeyConstraint(['image_id'], ['image.id'], name=op.f('fk_manifestlegacyimage_image_id_image')),
+    sa.ForeignKeyConstraint(['manifest_id'], ['manifest.id'], name=op.f('fk_manifestlegacyimage_manifest_id_manifest')),
+    sa.ForeignKeyConstraint(['repository_id'], ['repository.id'], name=op.f('fk_manifestlegacyimage_repository_id_repository')),
+    sa.PrimaryKeyConstraint('id', name=op.f('pk_manifestlegacyimage'))
+    )
+    op.create_index('manifestlegacyimage_image_id', 'manifestlegacyimage', ['image_id'], unique=False)
+    op.create_index('manifestlegacyimage_manifest_id', 'manifestlegacyimage', ['manifest_id'], unique=True)
+    op.create_index('manifestlegacyimage_repository_id', 'manifestlegacyimage', ['repository_id'], unique=False)
+    op.create_table('tagmanifesttomanifest',
+    sa.Column('id', sa.Integer(), nullable=False),
+    sa.Column('tag_manifest_id', sa.Integer(), nullable=False),
+    sa.Column('manifest_id', sa.Integer(), nullable=False),
+    sa.Column('broken', sa.Boolean(), nullable=False, server_default=sa.sql.expression.false()),
+    sa.ForeignKeyConstraint(['manifest_id'], ['manifest.id'], name=op.f('fk_tagmanifesttomanifest_manifest_id_manifest')),
+    sa.ForeignKeyConstraint(['tag_manifest_id'], ['tagmanifest.id'], name=op.f('fk_tagmanifesttomanifest_tag_manifest_id_tagmanifest')),
+    sa.PrimaryKeyConstraint('id', name=op.f('pk_tagmanifesttomanifest'))
+    )
+    op.create_index('tagmanifesttomanifest_broken', 'tagmanifesttomanifest', ['broken'], unique=False)
+    op.create_index('tagmanifesttomanifest_manifest_id', 'tagmanifesttomanifest', ['manifest_id'], unique=True)
+    op.create_index('tagmanifesttomanifest_tag_manifest_id', 'tagmanifesttomanifest', ['tag_manifest_id'], unique=True)
+    op.create_table('tagmanifestlabelmap',
+    sa.Column('id', sa.Integer(), nullable=False),
+    sa.Column('tag_manifest_id', sa.Integer(), nullable=False),
+    sa.Column('manifest_id', sa.Integer(), nullable=True),
+    sa.Column('label_id', sa.Integer(), nullable=False),
+    sa.Column('tag_manifest_label_id', sa.Integer(), nullable=False),
+    sa.Column('manifest_label_id', sa.Integer(), nullable=True),
+    sa.Column('broken_manifest', sa.Boolean(), nullable=False, server_default=sa.sql.expression.false()),
+    sa.ForeignKeyConstraint(['label_id'], ['label.id'], name=op.f('fk_tagmanifestlabelmap_label_id_label')),
+    sa.ForeignKeyConstraint(['manifest_id'], ['manifest.id'], name=op.f('fk_tagmanifestlabelmap_manifest_id_manifest')),
+    sa.ForeignKeyConstraint(['manifest_label_id'], ['manifestlabel.id'], name=op.f('fk_tagmanifestlabelmap_manifest_label_id_manifestlabel')),
+    sa.ForeignKeyConstraint(['tag_manifest_id'], ['tagmanifest.id'], name=op.f('fk_tagmanifestlabelmap_tag_manifest_id_tagmanifest')),
+    sa.ForeignKeyConstraint(['tag_manifest_label_id'], ['tagmanifestlabel.id'], name=op.f('fk_tagmanifestlabelmap_tag_manifest_label_id_tagmanifestlabel')),
+    sa.PrimaryKeyConstraint('id', name=op.f('pk_tagmanifestlabelmap'))
+    )
+    op.create_index('tagmanifestlabelmap_broken_manifest', 'tagmanifestlabelmap', ['broken_manifest'], unique=False)
+    op.create_index('tagmanifestlabelmap_label_id', 'tagmanifestlabelmap', ['label_id'], unique=False)
+    op.create_index('tagmanifestlabelmap_manifest_id', 'tagmanifestlabelmap', ['manifest_id'], unique=False)
+    op.create_index('tagmanifestlabelmap_manifest_label_id', 'tagmanifestlabelmap', ['manifest_label_id'], unique=False)
+    op.create_index('tagmanifestlabelmap_tag_manifest_id', 'tagmanifestlabelmap', ['tag_manifest_id'], unique=False)
+    op.create_index('tagmanifestlabelmap_tag_manifest_label_id', 'tagmanifestlabelmap', ['tag_manifest_label_id'], unique=False)
+    # ### end Alembic commands ###
+
+    for media_type in DOCKER_SCHEMA1_CONTENT_TYPES:
+        op.bulk_insert(tables.mediatype,
+                       [
+                         {'name': media_type},
+                       ])
+
+    # ### population of test data ### #
+    tester.populate_table('manifest', [
+        ('digest', tester.TestDataType.String),
+        ('manifest_bytes', tester.TestDataType.JSON),
+        ('media_type_id', tester.TestDataType.Foreign('mediatype')),
+        ('repository_id', tester.TestDataType.Foreign('repository')),
+    ])
+
+    tester.populate_table('manifestblob', [
+        ('manifest_id', tester.TestDataType.Foreign('manifest')),
+        ('repository_id', tester.TestDataType.Foreign('repository')),
+        ('blob_id', tester.TestDataType.Foreign('imagestorage')),
+        ('blob_index', tester.TestDataType.Integer),
+    ])
+
+    tester.populate_table('manifestlabel', [
+        ('manifest_id', tester.TestDataType.Foreign('manifest')),
+        ('label_id', tester.TestDataType.Foreign('label')),
+        ('repository_id', tester.TestDataType.Foreign('repository')),
+    ])
+
+    tester.populate_table('manifestlegacyimage', [
+        ('manifest_id', tester.TestDataType.Foreign('manifest')),
+        ('image_id', tester.TestDataType.Foreign('image')),
+        ('repository_id', tester.TestDataType.Foreign('repository')),
+    ])
+
+    tester.populate_table('tagmanifesttomanifest', [
+        ('manifest_id', tester.TestDataType.Foreign('manifest')),
+        ('tag_manifest_id', tester.TestDataType.Foreign('tagmanifest')),
+    ])
+
+    tester.populate_table('tagmanifestlabelmap', [
+        ('manifest_id', tester.TestDataType.Foreign('manifest')),
+        ('tag_manifest_id', tester.TestDataType.Foreign('tagmanifest')),
+        ('tag_manifest_label_id', tester.TestDataType.Foreign('tagmanifestlabel')),
+        ('manifest_label_id', tester.TestDataType.Foreign('manifestlabel')),
+        ('label_id', tester.TestDataType.Foreign('label')),
+    ])
+
+    # ### end population of test data ### #
+
+
+def downgrade(tables, tester, progress_reporter):
+    op = ProgressWrapper(original_op, progress_reporter)
+    for media_type in DOCKER_SCHEMA1_CONTENT_TYPES:
+        op.execute(tables
+                    .mediatype
+                    .delete()
+                    .where(tables.
+                            mediatype.c.name == op.inline_literal(media_type)))
+
+    # ### commands auto generated by Alembic - please adjust! ###
+    op.drop_table('tagmanifestlabelmap')
+    op.drop_table('tagmanifesttomanifest')
+    op.drop_table('manifestlegacyimage')
+    op.drop_table('manifestlabel')
+    op.drop_table('manifestblob')
+    op.drop_table('manifest')
+    # ### end Alembic commands ###
--- a/data/migrations/versions/94836b099894_create_new_notification_type.py
+++ b/data/migrations/versions/94836b099894_create_new_notification_type.py
@ -0,0 +1,31 @@
+"""Create new notification type
+
+Revision ID: 94836b099894
+Revises: faf752bd2e0a
+Create Date: 2016-11-30 10:29:51.519278
+
+"""
+
+# revision identifiers, used by Alembic.
+revision = '94836b099894'
+down_revision = 'faf752bd2e0a'
+
+from alembic import op as original_op
+from data.migrations.progress import ProgressWrapper
+
+
+def upgrade(tables, tester, progress_reporter):
+    op = ProgressWrapper(original_op, progress_reporter)
+    op.bulk_insert(tables.externalnotificationevent,
+                   [
+                       {'name': 'build_cancelled'},
+                   ])
+
+
+def downgrade(tables, tester, progress_reporter):
+    op = ProgressWrapper(original_op, progress_reporter)
+    op.execute(tables
+               .externalnotificationevent
+               .delete()
+               .where(tables.
+                      externalnotificationevent.c.name == op.inline_literal('build_cancelled')))
--- a/data/migrations/versions/a6c463dfb9fe_back_fill_build_expand_config.py
+++ b/data/migrations/versions/a6c463dfb9fe_back_fill_build_expand_config.py
@ -0,0 +1,101 @@
+"""back fill build expand_config
+
+Revision ID: a6c463dfb9fe
+Revises: b4df55dea4b3
+Create Date: 2017-03-17 10:00:19.739858
+
+"""
+
+# revision identifiers, used by Alembic.
+import json
+import os
+
+from app import app
+from peewee import *
+from data.database import BaseModel
+
+revision = 'a6c463dfb9fe'
+down_revision = 'b4df55dea4b3'
+
+from alembic import op as original_op
+from data.migrations.progress import ProgressWrapper
+
+
+class RepositoryBuildTrigger(BaseModel):
+  config = TextField(default='{}')
+
+def upgrade(tables, tester, progress_reporter):
+  op = ProgressWrapper(original_op, progress_reporter)
+  if not app.config.get('SETUP_COMPLETE', False):
+    return
+
+  repostioryBuildTriggers = RepositoryBuildTrigger.select()
+  for repositoryBuildTrigger in repostioryBuildTriggers:
+    config = json.loads(repositoryBuildTrigger.config)
+    repositoryBuildTrigger.config = json.dumps(get_config_expand(config))
+    repositoryBuildTrigger.save()
+
+
+def downgrade(tables, tester, progress_reporter):
+  op = ProgressWrapper(original_op, progress_reporter)
+  if not app.config.get('SETUP_COMPLETE', False):
+    return
+
+  repostioryBuildTriggers = RepositoryBuildTrigger.select()
+  for repositoryBuildTrigger in repostioryBuildTriggers:
+    config = json.loads(repositoryBuildTrigger.config)
+    repositoryBuildTrigger.config = json.dumps(get_config_expand(config))
+    repositoryBuildTrigger.save()
+
+
+def create_context(current_subdir):
+  if current_subdir == "":
+    current_subdir = os.path.sep + current_subdir
+
+  if current_subdir[len(current_subdir) - 1] != os.path.sep:
+    current_subdir += os.path.sep
+
+  context, _ = os.path.split(current_subdir)
+  return context
+
+
+def create_dockerfile_path(current_subdir):
+  if current_subdir == "":
+    current_subdir = os.path.sep + current_subdir
+
+  if current_subdir[len(current_subdir) - 1] != os.path.sep:
+    current_subdir += os.path.sep
+
+  return current_subdir + "Dockerfile"
+
+
+def get_config_expand(config):
+  """ A function to transform old records into new records """
+  if not config:
+    return config
+
+  # skip records that have been updated
+  if "context" in config or "dockerfile_path" in config:
+    return config
+
+  config_expand = {}
+  if "subdir" in config:
+    config_expand = dict(config)
+    config_expand["context"] = create_context(config["subdir"])
+    config_expand["dockerfile_path"] = create_dockerfile_path(config["subdir"])
+
+  return config_expand
+
+
+def get_config_contract(config):
+  """ A function to delete context and dockerfile_path from config """
+  if not config:
+    return config
+
+  if "context" in config:
+    del config["context"]
+
+  if "dockerfile_path" in config:
+    del config["dockerfile_path"]
+
+  return config
--- a/data/migrations/versions/b4c2d45bc132_add_deleted_namespace_table.py
+++ b/data/migrations/versions/b4c2d45bc132_add_deleted_namespace_table.py
@ -0,0 +1,53 @@
+"""Add deleted namespace table
+
+Revision ID: b4c2d45bc132
+Revises: 152edccba18c
+Create Date: 2018-02-27 11:43:02.329941
+
+"""
+
+# revision identifiers, used by Alembic.
+revision = 'b4c2d45bc132'
+down_revision = '152edccba18c'
+
+from alembic import op as original_op
+from data.migrations.progress import ProgressWrapper
+import sqlalchemy as sa
+
+
+def upgrade(tables, tester, progress_reporter):
+    op = ProgressWrapper(original_op, progress_reporter)
+    # ### commands auto generated by Alembic - please adjust! ###
+    op.create_table('deletednamespace',
+    sa.Column('id', sa.Integer(), nullable=False),
+    sa.Column('namespace_id', sa.Integer(), nullable=False),
+    sa.Column('marked', sa.DateTime(), nullable=False),
+    sa.Column('original_username', sa.String(length=255), nullable=False),
+    sa.Column('original_email', sa.String(length=255), nullable=False),
+    sa.Column('queue_id', sa.String(length=255), nullable=True),
+    sa.ForeignKeyConstraint(['namespace_id'], ['user.id'], name=op.f('fk_deletednamespace_namespace_id_user')),
+    sa.PrimaryKeyConstraint('id', name=op.f('pk_deletednamespace'))
+    )
+    op.create_index('deletednamespace_namespace_id', 'deletednamespace', ['namespace_id'], unique=True)
+    op.create_index('deletednamespace_original_email', 'deletednamespace', ['original_email'], unique=False)
+    op.create_index('deletednamespace_original_username', 'deletednamespace', ['original_username'], unique=False)
+    op.create_index('deletednamespace_queue_id', 'deletednamespace', ['queue_id'], unique=False)
+    # ### end Alembic commands ###
+
+    # ### population of test data ### #
+    tester.populate_table('deletednamespace', [
+        ('namespace_id', tester.TestDataType.Foreign('user')),
+        ('marked', tester.TestDataType.DateTime),
+        ('original_username', tester.TestDataType.UTF8Char),
+        ('original_email', tester.TestDataType.String),
+        ('queue_id', tester.TestDataType.Foreign('queueitem')),
+    ])
+    # ### end population of test data ### #
+
+
+
+def downgrade(tables, tester, progress_reporter):
+    op = ProgressWrapper(original_op, progress_reporter)
+    # ### commands auto generated by Alembic - please adjust! ###
+    op.drop_table('deletednamespace')
+    # ### end Alembic commands ###
--- a/data/migrations/versions/b4df55dea4b3_add_repository_kind.py
+++ b/data/migrations/versions/b4df55dea4b3_add_repository_kind.py
@ -0,0 +1,51 @@
+"""add repository kind
+
+Revision ID: b4df55dea4b3
+Revises: 7a525c68eb13
+Create Date: 2017-03-19 12:59:41.484430
+
+"""
+
+# revision identifiers, used by Alembic.
+revision = 'b4df55dea4b3'
+down_revision = 'b8ae68ad3e52'
+
+from alembic import op as original_op
+from data.migrations.progress import ProgressWrapper
+import sqlalchemy as sa
+from sqlalchemy.dialects import mysql
+
+
+def upgrade(tables, tester, progress_reporter):
+  op = ProgressWrapper(original_op, progress_reporter)
+  op.create_table(
+    'repositorykind',
+    sa.Column('id', sa.Integer(), nullable=False),
+    sa.Column('name', sa.String(length=255), nullable=False),
+    sa.PrimaryKeyConstraint('id', name=op.f('pk_repositorykind'))
+  )
+  op.create_index('repositorykind_name', 'repositorykind', ['name'], unique=True)
+
+  op.bulk_insert(
+    tables.repositorykind,
+    [
+      {'id': 1, 'name': 'image'},
+      {'id': 2, 'name': 'application'},
+    ],
+  )
+
+  op.add_column(u'repository', sa.Column('kind_id', sa.Integer(), nullable=False, server_default='1'))
+  op.create_index('repository_kind_id', 'repository', ['kind_id'], unique=False)
+  op.create_foreign_key(op.f('fk_repository_kind_id_repositorykind'), 'repository', 'repositorykind', ['kind_id'], ['id'])
+
+  # ### population of test data ### #
+  tester.populate_column('repository', 'kind_id', tester.TestDataType.Foreign('repositorykind'))
+  # ### end population of test data ### #
+
+
+def downgrade(tables, tester, progress_reporter):
+  op = ProgressWrapper(original_op, progress_reporter)
+  op.drop_constraint(op.f('fk_repository_kind_id_repositorykind'), 'repository', type_='foreignkey')
+  op.drop_index('repository_kind_id', table_name='repository')
+  op.drop_column(u'repository', 'kind_id')
+  op.drop_table('repositorykind')
--- a/data/migrations/versions/b547bc139ad8_add_robotaccountmetadata_table.py
+++ b/data/migrations/versions/b547bc139ad8_add_robotaccountmetadata_table.py
@ -0,0 +1,46 @@
+"""Add RobotAccountMetadata table
+
+Revision ID: b547bc139ad8
+Revises: 0cf50323c78b
+Create Date: 2018-03-09 15:50:48.298880
+
+"""
+
+# revision identifiers, used by Alembic.
+revision = 'b547bc139ad8'
+down_revision = '0cf50323c78b'
+
+from alembic import op as original_op
+from data.migrations.progress import ProgressWrapper
+import sqlalchemy as sa
+from util.migrate import UTF8CharField
+
+
+def upgrade(tables, tester, progress_reporter):
+    op = ProgressWrapper(original_op, progress_reporter)
+    # ### commands auto generated by Alembic - please adjust! ###
+    op.create_table('robotaccountmetadata',
+    sa.Column('id', sa.Integer(), nullable=False),
+    sa.Column('robot_account_id', sa.Integer(), nullable=False),
+    sa.Column('description', UTF8CharField(length=255), nullable=False),
+    sa.Column('unstructured_json', sa.Text(), nullable=False),
+    sa.ForeignKeyConstraint(['robot_account_id'], ['user.id'], name=op.f('fk_robotaccountmetadata_robot_account_id_user')),
+    sa.PrimaryKeyConstraint('id', name=op.f('pk_robotaccountmetadata'))
+    )
+    op.create_index('robotaccountmetadata_robot_account_id', 'robotaccountmetadata', ['robot_account_id'], unique=True)
+    # ### end Alembic commands ###
+
+    # ### population of test data ### #
+    tester.populate_table('robotaccountmetadata', [
+        ('robot_account_id', tester.TestDataType.Foreign('user')),        
+        ('description', tester.TestDataType.UTF8Char),        
+        ('unstructured_json', tester.TestDataType.JSON),        
+    ])
+    # ### end population of test data ### #
+
+
+def downgrade(tables, tester, progress_reporter):
+    op = ProgressWrapper(original_op, progress_reporter)
+    # ### commands auto generated by Alembic - please adjust! ###
+    op.drop_table('robotaccountmetadata')
+    # ### end Alembic commands ###
--- a/data/migrations/versions/b8ae68ad3e52_change_blobupload_fields_to_bigintegers_.py
+++ b/data/migrations/versions/b8ae68ad3e52_change_blobupload_fields_to_bigintegers_.py
@ -0,0 +1,37 @@
+"""Change BlobUpload fields to BigIntegers to allow layers > 8GB
+
+Revision ID: b8ae68ad3e52
+Revises: 7a525c68eb13
+Create Date: 2017-02-27 11:26:49.182349
+
+"""
+
+# revision identifiers, used by Alembic.
+revision = 'b8ae68ad3e52'
+down_revision = '7a525c68eb13'
+
+from alembic import op as original_op
+from data.migrations.progress import ProgressWrapper
+import sqlalchemy as sa
+from sqlalchemy.dialects import mysql
+
+def upgrade(tables, tester, progress_reporter):
+    op = ProgressWrapper(original_op, progress_reporter)
+    op.alter_column('blobupload', 'byte_count', existing_type=sa.Integer(), type_=sa.BigInteger())
+    op.alter_column('blobupload', 'uncompressed_byte_count', existing_type=sa.Integer(), type_=sa.BigInteger())
+
+    # ### population of test data ### #
+    tester.populate_column('blobupload', 'byte_count', tester.TestDataType.BigInteger)
+    tester.populate_column('blobupload', 'uncompressed_byte_count', tester.TestDataType.BigInteger)
+    # ### end population of test data ### #
+
+
+def downgrade(tables, tester, progress_reporter):
+    op = ProgressWrapper(original_op, progress_reporter)
+    # ### population of test data ### #
+    tester.populate_column('blobupload', 'byte_count', tester.TestDataType.Integer)
+    tester.populate_column('blobupload', 'uncompressed_byte_count', tester.TestDataType.Integer)
+    # ### end population of test data ### #
+
+    op.alter_column('blobupload', 'byte_count', existing_type=sa.BigInteger(), type_=sa.Integer())
+    op.alter_column('blobupload', 'uncompressed_byte_count', existing_type=sa.BigInteger(), type_=sa.Integer())
--- a/data/migrations/versions/b9045731c4de_add_lifetime_indexes_to_tag_tables.py
+++ b/data/migrations/versions/b9045731c4de_add_lifetime_indexes_to_tag_tables.py
@ -0,0 +1,35 @@
+"""Add lifetime end indexes to tag tables
+
+Revision ID: b9045731c4de
+Revises: e184af42242d
+Create Date: 2019-02-14 17:18:40.474310
+
+"""
+
+# revision identifiers, used by Alembic.
+revision = 'b9045731c4de'
+down_revision = 'e184af42242d'
+
+from alembic import op as original_op
+from data.migrations.progress import ProgressWrapper
+
+def upgrade(tables, tester, progress_reporter):
+    op = ProgressWrapper(original_op, progress_reporter)
+    # ### commands auto generated by Alembic - please adjust! ###
+    op.create_index('repositorytag_repository_id_lifetime_end_ts', 'repositorytag', ['repository_id', 'lifetime_end_ts'], unique=False)
+    op.create_index('tag_repository_id_lifetime_end_ms', 'tag', ['repository_id', 'lifetime_end_ms'], unique=False)
+
+    op.create_index('repositorytag_repository_id_lifetime_start_ts', 'repositorytag', ['repository_id', 'lifetime_start_ts'], unique=False)
+    op.create_index('tag_repository_id_lifetime_start_ms', 'tag', ['repository_id', 'lifetime_start_ms'], unique=False)
+    # ### end Alembic commands ###
+
+
+def downgrade(tables, tester, progress_reporter):
+    op = ProgressWrapper(original_op, progress_reporter)
+    # ### commands auto generated by Alembic - please adjust! ###
+    op.drop_index('tag_repository_id_lifetime_end_ms', table_name='tag')
+    op.drop_index('repositorytag_repository_id_lifetime_end_ts', table_name='repositorytag')
+
+    op.drop_index('tag_repository_id_lifetime_start_ms', table_name='tag')
+    op.drop_index('repositorytag_repository_id_lifetime_start_ts', table_name='repositorytag')
+    # ### end Alembic commands ###
--- a/data/migrations/versions/b918abdbee43_run_full_tag_backfill.py
+++ b/data/migrations/versions/b918abdbee43_run_full_tag_backfill.py
@ -0,0 +1,71 @@
+"""Run full tag backfill
+
+Revision ID: b918abdbee43
+Revises: 481623ba00ba
+Create Date: 2019-03-14 13:38:03.411609
+
+"""
+
+# revision identifiers, used by Alembic.
+revision = 'b918abdbee43'
+down_revision = '481623ba00ba'
+
+import logging.config
+
+from app import app
+from peewee import JOIN, fn
+
+from workers.tagbackfillworker import backfill_tag
+from data.database import RepositoryTag, Repository, User, TagToRepositoryTag
+from util.log import logfile_path
+
+logger = logging.getLogger(__name__)
+
+
+def upgrade(tables, tester, progress_reporter):
+    if not app.config.get('SETUP_COMPLETE', False):
+        return
+
+    start_id = 0
+    end_id = 1000
+    size = 1000
+
+    max_id = RepositoryTag.select(fn.Max(RepositoryTag.id)).scalar()
+    if max_id is None:
+        return
+
+    logger.info("Found maximum ID %s" % max_id)
+
+    while True:
+        if start_id > max_id:
+            break
+
+        logger.info('Checking tag range %s - %s', start_id, end_id)
+        r = list(RepositoryTag
+                .select()
+                .join(Repository)
+                .switch(RepositoryTag)
+                .join(TagToRepositoryTag, JOIN.LEFT_OUTER)
+                .where(TagToRepositoryTag.id >> None)
+                .where(RepositoryTag.hidden == False,
+                        RepositoryTag.id >= start_id,
+                        RepositoryTag.id < end_id))
+
+        if len(r) < 1000 and size < 100000:
+            size *= 2
+
+        start_id = end_id
+        end_id = start_id + size
+
+        if not len(r):
+            continue
+
+        logger.info('Found %s tags to backfill', len(r))
+        for index, t in enumerate(r):
+            logger.info("Backfilling tag %s of %s", index, len(r))
+            backfill_tag(t)
+
+
+def downgrade(tables, tester, progress_reporter):
+    # Nothing to do.
+    pass
--- a/data/migrations/versions/be8d1c402ce0_add_teamsync_table.py
+++ b/data/migrations/versions/be8d1c402ce0_add_teamsync_table.py
@ -0,0 +1,52 @@
+"""Add TeamSync table
+
+Revision ID: be8d1c402ce0
+Revises: a6c463dfb9fe
+Create Date: 2017-02-23 13:34:52.356812
+
+"""
+
+# revision identifiers, used by Alembic.
+revision = 'be8d1c402ce0'
+down_revision = 'a6c463dfb9fe'
+
+from alembic import op as original_op
+from data.migrations.progress import ProgressWrapper
+import sqlalchemy as sa
+from util.migrate import UTF8LongText
+
+def upgrade(tables, tester, progress_reporter):
+    op = ProgressWrapper(original_op, progress_reporter)
+    ### commands auto generated by Alembic - please adjust! ###
+    op.create_table('teamsync',
+    sa.Column('id', sa.Integer(), nullable=False),
+    sa.Column('team_id', sa.Integer(), nullable=False),
+    sa.Column('transaction_id', sa.String(length=255), nullable=False),
+    sa.Column('last_updated', sa.DateTime(), nullable=True),
+    sa.Column('service_id', sa.Integer(), nullable=False),
+    sa.Column('config', UTF8LongText(), nullable=False),
+    sa.ForeignKeyConstraint(['service_id'], ['loginservice.id'], name=op.f('fk_teamsync_service_id_loginservice')),
+    sa.ForeignKeyConstraint(['team_id'], ['team.id'], name=op.f('fk_teamsync_team_id_team')),
+    sa.PrimaryKeyConstraint('id', name=op.f('pk_teamsync'))
+    )
+    op.create_index('teamsync_last_updated', 'teamsync', ['last_updated'], unique=False)
+    op.create_index('teamsync_service_id', 'teamsync', ['service_id'], unique=False)
+    op.create_index('teamsync_team_id', 'teamsync', ['team_id'], unique=True)
+    ### end Alembic commands ###
+
+    # ### population of test data ### #
+    tester.populate_table('teamsync', [
+        ('team_id', tester.TestDataType.Foreign('team')),
+        ('transaction_id', tester.TestDataType.String),
+        ('last_updated', tester.TestDataType.DateTime),
+        ('service_id', tester.TestDataType.Foreign('loginservice')),
+        ('config', tester.TestDataType.JSON),
+    ])
+    # ### end population of test data ### #
+
+
+def downgrade(tables, tester, progress_reporter):
+    op = ProgressWrapper(original_op, progress_reporter)
+    ### commands auto generated by Alembic - please adjust! ###
+    op.drop_table('teamsync')
+    ### end Alembic commands ###
--- a/data/migrations/versions/c00a1f15968b_add_schema2_media_types.py
+++ b/data/migrations/versions/c00a1f15968b_add_schema2_media_types.py
@ -0,0 +1,34 @@
+from image.docker.schema2 import DOCKER_SCHEMA2_CONTENT_TYPES
+
+"""Add schema2 media types
+
+Revision ID: c00a1f15968b
+Revises: 67f0abd172ae
+Create Date: 2018-11-13 09:20:21.968503
+
+"""
+
+# revision identifiers, used by Alembic.
+revision = 'c00a1f15968b'
+down_revision = '67f0abd172ae'
+
+from alembic import op as original_op
+from data.migrations.progress import ProgressWrapper
+
+def upgrade(tables, tester, progress_reporter):
+    op = ProgressWrapper(original_op, progress_reporter)
+    for media_type in DOCKER_SCHEMA2_CONTENT_TYPES:
+        op.bulk_insert(tables.mediatype,
+                       [
+                         {'name': media_type},
+                       ])
+
+
+def downgrade(tables, tester, progress_reporter):
+    op = ProgressWrapper(original_op, progress_reporter)
+    for media_type in DOCKER_SCHEMA2_CONTENT_TYPES:
+        op.execute(tables
+                    .mediatype
+                    .delete()
+                    .where(tables.
+                            mediatype.c.name == op.inline_literal(media_type)))
--- a/data/migrations/versions/c059b952ed76_remove_unencrypted_fields_and_data.py
+++ b/data/migrations/versions/c059b952ed76_remove_unencrypted_fields_and_data.py
@ -0,0 +1,82 @@
+"""Remove unencrypted fields and data
+
+Revision ID: c059b952ed76
+Revises: 703298a825c2
+Create Date: 2019-08-19 16:31:00.952773
+
+"""
+
+# revision identifiers, used by Alembic.
+revision = 'c059b952ed76'
+down_revision = '703298a825c2'
+
+import uuid
+
+from alembic import op as original_op
+from data.migrations.progress import ProgressWrapper
+import sqlalchemy as sa
+
+from data.database import FederatedLogin, User, RobotAccountToken
+
+
+def upgrade(tables, tester, progress_reporter):
+    op = ProgressWrapper(original_op, progress_reporter)
+    # ### commands auto generated by Alembic - please adjust! ###
+    op.drop_index('oauthaccesstoken_refresh_token', table_name='oauthaccesstoken')
+    op.drop_column(u'oauthaccesstoken', 'refresh_token')
+
+    op.drop_column('accesstoken', 'code')
+
+    op.drop_column('appspecificauthtoken', 'token_code')
+
+    op.drop_column('oauthaccesstoken', 'access_token')
+    op.drop_column('oauthapplication', 'client_secret')
+
+    op.drop_column('oauthauthorizationcode', 'code')
+
+    op.drop_column('repositorybuildtrigger', 'private_key')
+    op.drop_column('repositorybuildtrigger', 'auth_token')
+    # ### end Alembic commands ###
+
+    # Overwrite all plaintext robot credentials.
+    while True:
+        try:
+            robot_account_token = RobotAccountToken.get(fully_migrated=False)
+            robot_account = robot_account_token.robot_account
+
+            robot_account.email = str(uuid.uuid4())
+            robot_account.save()
+
+            federated_login = FederatedLogin.get(user=robot_account)
+            federated_login.service_ident = 'robot:%s' % robot_account.id
+            federated_login.save()
+
+            robot_account_token.fully_migrated = True
+            robot_account_token.save()
+        except RobotAccountToken.DoesNotExist:
+            break
+
+
+def downgrade(tables, tester, progress_reporter):
+    op = ProgressWrapper(original_op, progress_reporter)
+    # ### commands auto generated by Alembic - please adjust! ###
+    op.add_column(u'oauthaccesstoken', sa.Column('refresh_token', sa.String(length=255), nullable=True))
+    op.create_index('oauthaccesstoken_refresh_token', 'oauthaccesstoken', ['refresh_token'], unique=False)
+
+    op.add_column('repositorybuildtrigger', sa.Column('auth_token', sa.String(length=255), nullable=True))
+    op.add_column('repositorybuildtrigger', sa.Column('private_key', sa.Text(), nullable=True))
+
+    op.add_column('oauthauthorizationcode', sa.Column('code', sa.String(length=255), nullable=True))
+    op.create_index('oauthauthorizationcode_code', 'oauthauthorizationcode', ['code'], unique=True)
+
+    op.add_column('oauthapplication', sa.Column('client_secret', sa.String(length=255), nullable=True))
+    op.add_column('oauthaccesstoken', sa.Column('access_token', sa.String(length=255), nullable=True))
+
+    op.create_index('oauthaccesstoken_access_token', 'oauthaccesstoken', ['access_token'], unique=False)
+
+    op.add_column('appspecificauthtoken', sa.Column('token_code', sa.String(length=255), nullable=True))
+    op.create_index('appspecificauthtoken_token_code', 'appspecificauthtoken', ['token_code'], unique=True)
+
+    op.add_column('accesstoken', sa.Column('code', sa.String(length=255), nullable=True))
+    op.create_index('accesstoken_code', 'accesstoken', ['code'], unique=True)
+    # ### end Alembic commands ###
--- a/data/migrations/versions/c13c8052f7a6_add_new_fields_and_tables_for_encrypted_.py
+++ b/data/migrations/versions/c13c8052f7a6_add_new_fields_and_tables_for_encrypted_.py
@ -0,0 +1,104 @@
+"""Add new fields and tables for encrypted tokens
+
+Revision ID: c13c8052f7a6
+Revises: 5248ddf35167
+Create Date: 2019-08-19 15:59:36.269155
+
+"""
+
+# revision identifiers, used by Alembic.
+revision = 'c13c8052f7a6'
+down_revision = '5248ddf35167'
+
+from alembic import op as original_op
+from data.migrations.progress import ProgressWrapper
+import sqlalchemy as sa
+
+def upgrade(tables, tester, progress_reporter):
+    op = ProgressWrapper(original_op, progress_reporter)
+    # ### commands auto generated by Alembic - please adjust! ###
+    op.create_table('robotaccounttoken',
+    sa.Column('id', sa.Integer(), nullable=False),
+    sa.Column('robot_account_id', sa.Integer(), nullable=False),
+    sa.Column('token', sa.String(length=255), nullable=False),
+    sa.Column('fully_migrated', sa.Boolean(), nullable=False, server_default='0'),
+    sa.ForeignKeyConstraint(['robot_account_id'], ['user.id'], name=op.f('fk_robotaccounttoken_robot_account_id_user')),
+    sa.PrimaryKeyConstraint('id', name=op.f('pk_robotaccounttoken'))
+    )
+    op.create_index('robotaccounttoken_robot_account_id', 'robotaccounttoken', ['robot_account_id'], unique=True)
+
+    op.add_column(u'accesstoken', sa.Column('token_code', sa.String(length=255), nullable=True))
+    op.add_column(u'accesstoken', sa.Column('token_name', sa.String(length=255), nullable=True))
+    op.create_index('accesstoken_token_name', 'accesstoken', ['token_name'], unique=True)
+
+    op.add_column(u'appspecificauthtoken', sa.Column('token_name', sa.String(length=255), nullable=True))
+    op.add_column(u'appspecificauthtoken', sa.Column('token_secret', sa.String(length=255), nullable=True))
+    op.create_index('appspecificauthtoken_token_name', 'appspecificauthtoken', ['token_name'], unique=True)
+
+    op.add_column(u'emailconfirmation', sa.Column('verification_code', sa.String(length=255), nullable=True))
+
+    op.add_column(u'oauthaccesstoken', sa.Column('token_code', sa.String(length=255), nullable=True))
+    op.add_column(u'oauthaccesstoken', sa.Column('token_name', sa.String(length=255), nullable=True))
+    op.create_index('oauthaccesstoken_token_name', 'oauthaccesstoken', ['token_name'], unique=True)
+
+    op.add_column(u'oauthapplication', sa.Column('secure_client_secret', sa.String(length=255), nullable=True))
+    op.add_column(u'oauthapplication', sa.Column('fully_migrated', sa.Boolean(), server_default='0', nullable=False))
+
+    op.add_column(u'oauthauthorizationcode', sa.Column('code_credential', sa.String(length=255), nullable=True))
+    op.add_column(u'oauthauthorizationcode', sa.Column('code_name', sa.String(length=255), nullable=True))
+    op.create_index('oauthauthorizationcode_code_name', 'oauthauthorizationcode', ['code_name'], unique=True)
+    op.drop_index('oauthauthorizationcode_code', table_name='oauthauthorizationcode')
+    op.create_index('oauthauthorizationcode_code', 'oauthauthorizationcode', ['code'], unique=True)
+
+    op.add_column(u'repositorybuildtrigger', sa.Column('secure_auth_token', sa.String(length=255), nullable=True))
+    op.add_column(u'repositorybuildtrigger', sa.Column('secure_private_key', sa.Text(), nullable=True))
+    op.add_column(u'repositorybuildtrigger', sa.Column('fully_migrated', sa.Boolean(), server_default='0', nullable=False))
+    # ### end Alembic commands ###
+
+    # ### population of test data ### #
+    tester.populate_table('robotaccounttoken', [
+        ('robot_account_id', tester.TestDataType.Foreign('user')),
+        ('token', tester.TestDataType.Token),
+        ('fully_migrated', tester.TestDataType.Boolean),
+    ])
+    
+    tester.populate_column('accesstoken', 'code', tester.TestDataType.Token)
+
+    tester.populate_column('appspecificauthtoken', 'token_code', tester.TestDataType.Token)
+
+    tester.populate_column('emailconfirmation', 'verification_code', tester.TestDataType.Token)
+
+    tester.populate_column('oauthaccesstoken', 'token_code', tester.TestDataType.Token)
+    # ### end population of test data ### #
+
+
+def downgrade(tables, tester, progress_reporter):
+    op = ProgressWrapper(original_op, progress_reporter)
+    # ### commands auto generated by Alembic - please adjust! ###
+    op.drop_column(u'repositorybuildtrigger', 'secure_private_key')
+    op.drop_column(u'repositorybuildtrigger', 'secure_auth_token')
+
+    op.drop_index('oauthauthorizationcode_code', table_name='oauthauthorizationcode')
+    op.create_index('oauthauthorizationcode_code', 'oauthauthorizationcode', ['code'], unique=False)
+    op.drop_index('oauthauthorizationcode_code_name', table_name='oauthauthorizationcode')
+    op.drop_column(u'oauthauthorizationcode', 'code_name')
+    op.drop_column(u'oauthauthorizationcode', 'code_credential')
+
+    op.drop_column(u'oauthapplication', 'secure_client_secret')
+
+    op.drop_index('oauthaccesstoken_token_name', table_name='oauthaccesstoken')
+    op.drop_column(u'oauthaccesstoken', 'token_name')
+    op.drop_column(u'oauthaccesstoken', 'token_code')
+
+    op.drop_column(u'emailconfirmation', 'verification_code')
+
+    op.drop_index('appspecificauthtoken_token_name', table_name='appspecificauthtoken')
+    op.drop_column(u'appspecificauthtoken', 'token_secret')
+    op.drop_column(u'appspecificauthtoken', 'token_name')
+
+    op.drop_index('accesstoken_token_name', table_name='accesstoken')
+    op.drop_column(u'accesstoken', 'token_name')
+    op.drop_column(u'accesstoken', 'token_code')
+
+    op.drop_table('robotaccounttoken')
+    # ### end Alembic commands ###
--- a/data/migrations/versions/c156deb8845d_reset_our_migrations_with_a_required_.py
+++ b/data/migrations/versions/c156deb8845d_reset_our_migrations_with_a_required_.py
--- a/data/migrations/versions/c3d4b7ebcdf7_backfill_repositorysearchscore_table.py
+++ b/data/migrations/versions/c3d4b7ebcdf7_backfill_repositorysearchscore_table.py
@ -0,0 +1,26 @@
+"""Backfill RepositorySearchScore table
+
+Revision ID: c3d4b7ebcdf7
+Revises: f30984525c86
+Create Date: 2017-04-13 12:01:59.572775
+
+"""
+
+# revision identifiers, used by Alembic.
+revision = 'c3d4b7ebcdf7'
+down_revision = 'f30984525c86'
+
+from alembic import op as original_op
+from data.migrations.progress import ProgressWrapper
+import sqlalchemy as sa
+
+def upgrade(tables, tester, progress_reporter):
+    op = ProgressWrapper(original_op, progress_reporter)
+    # Add a 0 entry into the RepositorySearchScore table for each repository that isn't present
+    conn = op.get_bind()
+    conn.execute("insert into repositorysearchscore (repository_id, score) SELECT id, 0 FROM " +
+                 "repository WHERE id not in (select repository_id from repositorysearchscore)")
+
+def downgrade(tables, tester, progress_reporter):
+    op = ProgressWrapper(original_op, progress_reporter)
+    pass
--- a/data/migrations/versions/c91c564aad34_drop_checksum_on_imagestorage.py
+++ b/data/migrations/versions/c91c564aad34_drop_checksum_on_imagestorage.py
@ -0,0 +1,25 @@
+"""Drop checksum on ImageStorage
+
+Revision ID: c91c564aad34
+Revises: 152bb29a1bb3
+Create Date: 2018-02-21 12:17:52.405644
+
+"""
+
+# revision identifiers, used by Alembic.
+revision = 'c91c564aad34'
+down_revision = '152bb29a1bb3'
+
+from alembic import op as original_op
+from data.migrations.progress import ProgressWrapper
+import sqlalchemy as sa
+
+
+def upgrade(tables, tester, progress_reporter):
+    op = ProgressWrapper(original_op, progress_reporter)
+    op.drop_column('imagestorage', 'checksum')
+
+
+def downgrade(tables, tester, progress_reporter):
+    op = ProgressWrapper(original_op, progress_reporter)
+    op.add_column('imagestorage', sa.Column('checksum', sa.String(length=255), nullable=True))
--- a/data/migrations/versions/cbc8177760d9_add_user_location_field.py
+++ b/data/migrations/versions/cbc8177760d9_add_user_location_field.py
@ -0,0 +1,30 @@
+"""Add user location field
+
+Revision ID: cbc8177760d9
+Revises: 7367229b38d9
+Create Date: 2018-02-02 17:39:16.589623
+
+"""
+
+# revision identifiers, used by Alembic.
+revision = 'cbc8177760d9'
+down_revision = '7367229b38d9'
+
+from alembic import op as original_op
+from data.migrations.progress import ProgressWrapper
+import sqlalchemy as sa
+from sqlalchemy.dialects import mysql
+from util.migrate import UTF8CharField
+
+def upgrade(tables, tester, progress_reporter):
+    op = ProgressWrapper(original_op, progress_reporter)
+    op.add_column('user', sa.Column('location', UTF8CharField(length=255), nullable=True))
+
+    # ### population of test data ### #
+    tester.populate_column('user', 'location', tester.TestDataType.UTF8Char)
+    # ### end population of test data ### #
+
+
+def downgrade(tables, tester, progress_reporter):
+    op = ProgressWrapper(original_op, progress_reporter)
+    op.drop_column('user', 'location')
--- a/data/migrations/versions/cc6778199cdb_repository_mirror_notification.py
+++ b/data/migrations/versions/cc6778199cdb_repository_mirror_notification.py
@ -0,0 +1,68 @@
+"""repository mirror notification
+
+Revision ID: cc6778199cdb
+Revises: c059b952ed76
+Create Date: 2019-10-03 17:41:23.316914
+
+"""
+
+# revision identifiers, used by Alembic.
+revision = 'cc6778199cdb'
+down_revision = 'c059b952ed76'
+
+from alembic import op as original_op
+from data.migrations.progress import ProgressWrapper
+import sqlalchemy as sa
+from sqlalchemy.dialects import mysql
+
+def upgrade(tables, tester, progress_reporter):
+  op = ProgressWrapper(original_op, progress_reporter)
+
+  op.bulk_insert(tables.notificationkind,
+                 [
+                   {'name': 'repo_mirror_sync_started'},
+                   {'name': 'repo_mirror_sync_success'},
+                   {'name': 'repo_mirror_sync_failed'},
+                 ])
+  op.bulk_insert(tables.externalnotificationevent,
+                 [
+                   {'name': 'repo_mirror_sync_started'},
+                   {'name': 'repo_mirror_sync_success'},
+                   {'name': 'repo_mirror_sync_failed'},
+                 ])
+
+
+def downgrade(tables, tester, progress_reporter):
+  op = ProgressWrapper(original_op, progress_reporter)
+
+  op.execute(tables
+             .notificationkind
+             .delete()
+             .where(tables.
+                    notificationkind.c.name == op.inline_literal('repo_mirror_sync_started')))
+  op.execute(tables
+             .notificationkind
+             .delete()
+             .where(tables.
+                    notificationkind.c.name == op.inline_literal('repo_mirror_sync_success')))
+  op.execute(tables
+             .notificationkind
+             .delete()
+             .where(tables.
+                    notificationkind.c.name == op.inline_literal('repo_mirror_sync_failed')))
+
+  op.execute(tables
+             .externalnotificationevent
+             .delete()
+             .where(tables.
+                    externalnotificationevent.c.name == op.inline_literal('repo_mirror_sync_started')))
+  op.execute(tables
+             .externalnotificationevent
+             .delete()
+             .where(tables.
+                    externalnotificationevent.c.name == op.inline_literal('repo_mirror_sync_success')))
+  op.execute(tables
+             .externalnotificationevent
+             .delete()
+             .where(tables.
+                    externalnotificationevent.c.name == op.inline_literal('repo_mirror_sync_failed')))
--- a/data/migrations/versions/d17c695859ea_delete_old_appr_tables.py
+++ b/data/migrations/versions/d17c695859ea_delete_old_appr_tables.py
@ -0,0 +1,192 @@
+"""Delete old Appr tables
+
+Revision ID: d17c695859ea
+Revises: 5d463ea1e8a8
+Create Date: 2018-07-16 15:21:11.593040
+
+"""
+
+# revision identifiers, used by Alembic.
+revision = 'd17c695859ea'
+down_revision = '5d463ea1e8a8'
+
+from alembic import op as original_op
+from data.migrations.progress import ProgressWrapper
+import sqlalchemy as sa
+from sqlalchemy.sql import table, column
+from util.migrate import UTF8LongText, UTF8CharField
+
+def upgrade(tables, tester, progress_reporter):
+    op = ProgressWrapper(original_op, progress_reporter)
+    # ### commands auto generated by Alembic - please adjust! ###
+    op.drop_table('tag')
+    op.drop_table('manifestlistmanifest')
+    op.drop_table('manifestlist')
+    op.drop_table('manifestblob')
+    op.drop_table('manifest')
+    op.drop_table('blobplacement')
+    op.drop_table('blob')
+    op.drop_table('blobplacementlocation')
+    op.drop_table('tagkind')
+    # ### end Alembic commands ###
+
+
+def downgrade(tables, tester, progress_reporter):
+    op = ProgressWrapper(original_op, progress_reporter)
+    # ### commands auto generated by Alembic - please adjust! ###
+    op.create_table(
+        'tagkind',
+        sa.Column('id', sa.Integer(), nullable=False),
+        sa.Column('name', sa.String(length=255), nullable=False),
+        sa.PrimaryKeyConstraint('id', name=op.f('pk_tagkind'))
+    )
+    op.create_index('tagkind_name', 'tagkind', ['name'], unique=True)
+
+    op.create_table(
+        'blobplacementlocation',
+        sa.Column('id', sa.Integer(), nullable=False),
+        sa.Column('name', sa.String(length=255), nullable=False),
+        sa.PrimaryKeyConstraint('id', name=op.f('pk_blobplacementlocation'))
+    )
+    op.create_index('blobplacementlocation_name', 'blobplacementlocation', ['name'], unique=True)
+
+    op.create_table(
+        'blob',
+        sa.Column('id', sa.Integer(), nullable=False),
+        sa.Column('digest', sa.String(length=255), nullable=False),
+        sa.Column('media_type_id', sa.Integer(), nullable=False),
+        sa.Column('size', sa.BigInteger(), nullable=False),
+        sa.Column('uncompressed_size', sa.BigInteger(), nullable=True),
+        sa.ForeignKeyConstraint(['media_type_id'], ['mediatype.id'], name=op.f('fk_blob_media_type_id_mediatype')),
+        sa.PrimaryKeyConstraint('id', name=op.f('pk_blob'))
+    )
+    op.create_index('blob_digest', 'blob', ['digest'], unique=True)
+    op.create_index('blob_media_type_id', 'blob', ['media_type_id'], unique=False)
+
+    op.create_table(
+        'manifest',
+        sa.Column('id', sa.Integer(), nullable=False),
+        sa.Column('digest', sa.String(length=255), nullable=False),
+        sa.Column('media_type_id', sa.Integer(), nullable=False),
+        sa.Column('manifest_json', UTF8LongText, nullable=False),
+        sa.ForeignKeyConstraint(['media_type_id'], ['mediatype.id'], name=op.f('fk_manifest_media_type_id_mediatype')),
+        sa.PrimaryKeyConstraint('id', name=op.f('pk_manifest'))
+    )
+    op.create_index('manifest_digest', 'manifest', ['digest'], unique=True)
+    op.create_index('manifest_media_type_id', 'manifest', ['media_type_id'], unique=False)
+
+    op.create_table(
+        'manifestlist',
+        sa.Column('id', sa.Integer(), nullable=False),
+        sa.Column('digest', sa.String(length=255), nullable=False),
+        sa.Column('manifest_list_json', UTF8LongText, nullable=False),
+        sa.Column('schema_version', UTF8CharField(length=255), nullable=False),
+        sa.Column('media_type_id', sa.Integer(), nullable=False),
+        sa.ForeignKeyConstraint(['media_type_id'], ['mediatype.id'], name=op.f('fk_manifestlist_media_type_id_mediatype')),
+        sa.PrimaryKeyConstraint('id', name=op.f('pk_manifestlist'))
+    )
+    op.create_index('manifestlist_digest', 'manifestlist', ['digest'], unique=True)
+    op.create_index('manifestlist_media_type_id', 'manifestlist', ['media_type_id'], unique=False)
+
+    op.create_table(
+        'blobplacement',
+        sa.Column('id', sa.Integer(), nullable=False),
+        sa.Column('blob_id', sa.Integer(), nullable=False),
+        sa.Column('location_id', sa.Integer(), nullable=False),
+        sa.ForeignKeyConstraint(['blob_id'], ['blob.id'], name=op.f('fk_blobplacement_blob_id_blob')),
+        sa.ForeignKeyConstraint(['location_id'], ['blobplacementlocation.id'], name=op.f('fk_blobplacement_location_id_blobplacementlocation')),
+        sa.PrimaryKeyConstraint('id', name=op.f('pk_blobplacement'))
+    )
+    op.create_index('blobplacement_blob_id', 'blobplacement', ['blob_id'], unique=False)
+    op.create_index('blobplacement_blob_id_location_id', 'blobplacement', ['blob_id', 'location_id'], unique=True)
+    op.create_index('blobplacement_location_id', 'blobplacement', ['location_id'], unique=False)
+
+    op.create_table(
+        'manifestblob',
+        sa.Column('id', sa.Integer(), nullable=False),
+        sa.Column('manifest_id', sa.Integer(), nullable=False),
+        sa.Column('blob_id', sa.Integer(), nullable=False),
+        sa.ForeignKeyConstraint(['blob_id'], ['blob.id'], name=op.f('fk_manifestblob_blob_id_blob')),
+        sa.ForeignKeyConstraint(['manifest_id'], ['manifest.id'], name=op.f('fk_manifestblob_manifest_id_manifest')),
+        sa.PrimaryKeyConstraint('id', name=op.f('pk_manifestblob'))
+    )
+    op.create_index('manifestblob_blob_id', 'manifestblob', ['blob_id'], unique=False)
+    op.create_index('manifestblob_manifest_id', 'manifestblob', ['manifest_id'], unique=False)
+    op.create_index('manifestblob_manifest_id_blob_id', 'manifestblob', ['manifest_id', 'blob_id'], unique=True)
+
+    op.create_table(
+        'manifestlistmanifest',
+        sa.Column('id', sa.Integer(), nullable=False),
+        sa.Column('manifest_list_id', sa.Integer(), nullable=False),
+        sa.Column('manifest_id', sa.Integer(), nullable=False),
+        sa.Column('operating_system', UTF8CharField(length=255), nullable=True),
+        sa.Column('architecture', UTF8CharField(length=255), nullable=True),
+        sa.Column('platform_json', UTF8LongText, nullable=True),
+        sa.Column('media_type_id', sa.Integer(), nullable=False),
+        sa.ForeignKeyConstraint(['manifest_id'], ['manifest.id'], name=op.f('fk_manifestlistmanifest_manifest_id_manifest')),
+        sa.ForeignKeyConstraint(['manifest_list_id'], ['manifestlist.id'], name=op.f('fk_manifestlistmanifest_manifest_list_id_manifestlist')),
+        sa.ForeignKeyConstraint(['media_type_id'], ['mediatype.id'], name=op.f('fk_manifestlistmanifest_media_type_id_mediatype')),
+        sa.PrimaryKeyConstraint('id', name=op.f('pk_manifestlistmanifest'))
+    )
+    op.create_index('manifestlistmanifest_manifest_id', 'manifestlistmanifest', ['manifest_id'], unique=False)
+    op.create_index('manifestlistmanifest_manifest_list_id', 'manifestlistmanifest', ['manifest_list_id'], unique=False)
+    op.create_index('manifestlistmanifest_manifest_listid_os_arch_mtid', 'manifestlistmanifest', ['manifest_list_id', 'operating_system', 'architecture', 'media_type_id'], unique=False)
+    op.create_index('manifestlistmanifest_manifest_listid_mtid', 'manifestlistmanifest', ['manifest_list_id', 'media_type_id'], unique=False)
+    op.create_index('manifestlistmanifest_media_type_id', 'manifestlistmanifest', ['media_type_id'], unique=False)
+
+    op.create_table(
+        'tag',
+        sa.Column('id', sa.Integer(), nullable=False),
+        sa.Column('name', UTF8CharField(length=190), nullable=False),
+        sa.Column('repository_id', sa.Integer(), nullable=False),
+        sa.Column('manifest_list_id', sa.Integer(), nullable=True),
+        sa.Column('lifetime_start', sa.BigInteger(), nullable=False),
+        sa.Column('lifetime_end', sa.BigInteger(), nullable=True),
+        sa.Column('hidden', sa.Boolean(), nullable=False),
+        sa.Column('reverted', sa.Boolean(), nullable=False),
+        sa.Column('protected', sa.Boolean(), nullable=False),
+        sa.Column('tag_kind_id', sa.Integer(), nullable=False),
+        sa.Column('linked_tag_id', sa.Integer(), nullable=True),
+        sa.ForeignKeyConstraint(['linked_tag_id'], ['tag.id'], name=op.f('fk_tag_linked_tag_id_tag')),
+        sa.ForeignKeyConstraint(['manifest_list_id'], ['manifestlist.id'], name=op.f('fk_tag_manifest_list_id_manifestlist')),
+        sa.ForeignKeyConstraint(['repository_id'], ['repository.id'], name=op.f('fk_tag_repository_id_repository')),
+        sa.ForeignKeyConstraint(['tag_kind_id'], ['tagkind.id'], name=op.f('fk_tag_tag_kind_id_tagkind')),
+        sa.PrimaryKeyConstraint('id', name=op.f('pk_tag'))
+    )
+    op.create_index('tag_lifetime_end', 'tag', ['lifetime_end'], unique=False)
+    op.create_index('tag_linked_tag_id', 'tag', ['linked_tag_id'], unique=False)
+    op.create_index('tag_manifest_list_id', 'tag', ['manifest_list_id'], unique=False)
+    op.create_index('tag_repository_id', 'tag', ['repository_id'], unique=False)
+    op.create_index('tag_repository_id_name_hidden', 'tag', ['repository_id', 'name', 'hidden'], unique=False)
+    op.create_index('tag_repository_id_name_lifetime_end', 'tag', ['repository_id', 'name', 'lifetime_end'], unique=True)
+    op.create_index('tag_repository_id_name', 'tag', ['repository_id', 'name'], unique=False)
+    op.create_index('tag_tag_kind_id', 'tag', ['tag_kind_id'], unique=False)
+
+    # ### end Alembic commands ###
+
+    blobplacementlocation_table = table('blobplacementlocation',
+        column('id', sa.Integer()),
+        column('name', sa.String()),
+    )
+
+    op.bulk_insert(
+        blobplacementlocation_table,
+        [
+        {'name': 'local_eu'},
+        {'name': 'local_us'},
+        ],
+    )
+
+    tagkind_table = table('tagkind',
+        column('id', sa.Integer()),
+        column('name', sa.String()),
+    )
+
+    op.bulk_insert(
+        tagkind_table,
+        [
+        {'id': 1, 'name': 'tag'},
+        {'id': 2, 'name': 'release'},
+        {'id': 3, 'name': 'channel'},
+        ]
+    )
--- a/data/migrations/versions/d42c175b439a_backfill_state_id_and_make_it_unique.py
+++ b/data/migrations/versions/d42c175b439a_backfill_state_id_and_make_it_unique.py
@ -0,0 +1,36 @@
+"""Backfill state_id and make it unique
+
+Revision ID: d42c175b439a
+Revises: 3e8cc74a1e7b
+Create Date: 2017-01-18 15:11:01.635632
+
+"""
+
+# revision identifiers, used by Alembic.
+revision = 'd42c175b439a'
+down_revision = '3e8cc74a1e7b'
+
+from alembic import op as original_op
+from data.migrations.progress import ProgressWrapper
+import sqlalchemy as sa
+from sqlalchemy.dialects import mysql
+
+def upgrade(tables, tester, progress_reporter):
+    op = ProgressWrapper(original_op, progress_reporter)
+    # Backfill the queueitem table's state_id field with unique values for all entries which are
+    # empty.
+    conn = op.get_bind()
+    conn.execute("update queueitem set state_id = id where state_id = ''")
+
+    # ### commands auto generated by Alembic - please adjust! ###
+    op.drop_index('queueitem_state_id', table_name='queueitem')
+    op.create_index('queueitem_state_id', 'queueitem', ['state_id'], unique=True)
+    # ### end Alembic commands ###
+
+
+def downgrade(tables, tester, progress_reporter):
+    op = ProgressWrapper(original_op, progress_reporter)
+    # ### commands auto generated by Alembic - please adjust! ###
+    op.drop_index('queueitem_state_id', table_name='queueitem')
+    op.create_index('queueitem_state_id', 'queueitem', ['state_id'], unique=False)
+    # ### end Alembic commands ###
--- a/data/migrations/versions/d8989249f8f6_add_change_tag_expiration_log_type.py
+++ b/data/migrations/versions/d8989249f8f6_add_change_tag_expiration_log_type.py
@ -0,0 +1,28 @@
+"""Add change_tag_expiration log type
+
+Revision ID: d8989249f8f6
+Revises: dc4af11a5f90
+Create Date: 2017-06-21 21:18:25.948689
+
+"""
+
+# revision identifiers, used by Alembic.
+revision = 'd8989249f8f6'
+down_revision = 'dc4af11a5f90'
+
+from alembic import op as original_op
+from data.migrations.progress import ProgressWrapper
+
+def upgrade(tables, tester, progress_reporter):
+  op = ProgressWrapper(original_op, progress_reporter)
+  op.bulk_insert(tables.logentrykind, [
+    {'name': 'change_tag_expiration'},
+  ])
+
+
+def downgrade(tables, tester, progress_reporter):
+  op = ProgressWrapper(original_op, progress_reporter)
+  op.execute(tables
+             .logentrykind
+             .delete()
+             .where(tables.logentrykind.c.name == op.inline_literal('change_tag_expiration')))
--- a/Show more
+++ b/Show more