quay/data/model/repository.py

import logging

from peewee import JOIN_LEFT_OUTER, fn
from datetime import timedelta, datetime

from data.model import (DataModelException, tag, db_transaction, storage, image, permission,
                        _basequery, config)
from data.database import (Repository, Namespace, RepositoryTag, Star, Image, ImageStorage, User,
                           Visibility, RepositoryPermission, TupleSelector, RepositoryActionCount,
                           Role, RepositoryAuthorizedEmail, db_for_update, get_epoch_timestamp,
                           db_random_func)


logger = logging.getLogger(__name__)


def create_repository(namespace, name, creating_user, visibility='private'):
  private = Visibility.get(name=visibility)
  namespace_user = User.get(username=namespace)
  repo = Repository.create(name=name, visibility=private, namespace_user=namespace_user)
  admin = Role.get(name='admin')

  if creating_user and not creating_user.organization:
    RepositoryPermission.create(user=creating_user, repository=repo, role=admin)

    if creating_user.username != namespace:
      # Permission prototypes only work for orgs
      permission.apply_default_permissions(repo, creating_user)
  return repo


def get_repository(namespace_name, repository_name):
  try:
    return _basequery.get_existing_repository(namespace_name, repository_name)
  except Repository.DoesNotExist:
    return None


def _purge_all_repository_tags(namespace_name, repository_name):
  """ Immediately purge all repository tags without respecting the lifeline procedure """
  try:
    repo = _basequery.get_existing_repository(namespace_name, repository_name)
  except Repository.DoesNotExist:
    raise DataModelException('Invalid repository \'%s/%s\'' %
                             (namespace_name, repository_name))
  RepositoryTag.delete().where(RepositoryTag.repository == repo.id).execute()


def purge_repository(namespace_name, repository_name):
  # Delete all tags to allow gc to reclaim storage
  _purge_all_repository_tags(namespace_name, repository_name)

  # Gc to remove the images and storage
  garbage_collect_repository(namespace_name, repository_name)

  # Delete the rest of the repository metadata
  fetched = _basequery.get_existing_repository(namespace_name, repository_name)
  fetched.delete_instance(recursive=True, delete_nullable=False)


def find_repository_with_garbage(filter_list=None):
  # TODO(jschorr): Remove the filter once we have turned the experiment on for everyone.
  if filter_list is not None and not filter_list:
    return None

  epoch_timestamp = get_epoch_timestamp()

  try:
    candidates = (RepositoryTag
                  .select(RepositoryTag.repository)
                  .join(Repository)
                  .join(Namespace, on=(Repository.namespace_user == Namespace.id))
                  .where(~(RepositoryTag.lifetime_end_ts >> None),
                         (RepositoryTag.lifetime_end_ts <=
                          (epoch_timestamp - Namespace.removed_tag_expiration_s)))
                  .limit(500)
                  .alias('candidates'))

    if filter_list:
      candidates = candidates.where(Namespace.username << filter_list)

    found = (RepositoryTag
             .select(candidates.c.repository_id)
             .from_(candidates)
             .order_by(db_random_func())
             .get())

    if found is None:
      return

    return Repository.get(Repository.id == found.repository_id)
  except RepositoryTag.DoesNotExist:
    return None
  except Repository.DoesNotExist:
    return None


def garbage_collect_repository(namespace_name, repository_name):
  # If the namespace is the async experiment, don't perform garbage collection here.
  # TODO(jschorr): Remove this check once we have turned the experiment on for everyone.
  if namespace_name in config.app_config.get('EXP_ASYNC_GARBAGE_COLLECTION', []):
    return

  repo = get_repository(namespace_name, repository_name)
  if repo is not None:
    garbage_collect_repo(repo)


def garbage_collect_repo(repo):
  logger.debug('Garbage collecting repository %s', repo.id)

  storage_id_whitelist = {}
  tag.garbage_collect_tags(repo)

  with db_transaction():
    # Get a list of all images used by tags in the repository
    tagged_images = (Image
                     .select(Image.id, Image.ancestors)
                     .join(RepositoryTag)
                     .where(Image.repository == repo))

    def gen_referenced_ancestors():
      for tagged_image in tagged_images:
        # The ancestor list is in the format '/1/2/3/', extract just the ids
        ancestor_id_strings = tagged_image.ancestors.split('/')[1:-1]
        for img_id_str in ancestor_id_strings:
          yield int(img_id_str)
        yield tagged_image.id

    referenced_ancestors = set(gen_referenced_ancestors())

    # We desire two pieces of information from the database from the following
    # query: all of the image ids which are associated with this repository,
    # and the storages which are associated with those images. In order to
    # fetch just this information, and bypass all of the peewee model parsing
    # code, which is overkill for just two fields, we use a tuple query, and
    # feed that directly to the dictionary tuple constructor which takes an
    # iterable of tuples containing [(k, v), (k, v), ...]
    all_repo_images = Image.select(Image.id, Image.storage).where(Image.repository == repo).tuples()
    images_to_storages = dict(all_repo_images)
    to_remove = set(images_to_storages.keys()).difference(referenced_ancestors)

    if len(to_remove) > 0:
      logger.info('Cleaning up unreferenced images: %s', to_remove)
      storage_id_whitelist = {images_to_storages[to_remove_id] for to_remove_id in to_remove}
      Image.delete().where(Image.id << list(to_remove)).execute()

  if len(to_remove) > 0:
    logger.info('Garbage collecting storage for images: %s', to_remove)
    storage.garbage_collect_storage(storage_id_whitelist)


def star_repository(user, repository):
  """ Stars a repository. """
  star = Star.create(user=user.id, repository=repository.id)
  star.save()


def unstar_repository(user, repository):
  """ Unstars a repository. """
  try:
    (Star
     .delete()
     .where(Star.repository == repository.id, Star.user == user.id)
     .execute())
  except Star.DoesNotExist:
    raise DataModelException('Star not found.')


def get_user_starred_repositories(user, limit=None, page=None):
  """ Retrieves all of the repositories a user has starred. """
  query = (Repository
           .select(Repository, User, Visibility)
           .join(Star)
           .switch(Repository)
           .join(User)
           .switch(Repository)
           .join(Visibility)
           .where(Star.user == user))

  if page and limit:
    query = query.paginate(page, limit)
  elif limit:
    query = query.limit(limit)

  return query


def repository_is_starred(user, repository):
  """ Determines whether a user has starred a repository or not. """
  try:
    (Star
     .select()
     .where(Star.repository == repository.id, Star.user == user.id)
     .get())
    return True
  except Star.DoesNotExist:
    return False


def get_when_last_modified(repository_ids):
  if not repository_ids:
    return {}

  tuples = (RepositoryTag
            .select(RepositoryTag.repository, fn.Max(RepositoryTag.lifetime_start_ts))
            .where(RepositoryTag.repository << repository_ids)
            .group_by(RepositoryTag.repository)
            .tuples())

  last_modified_map = {}
  for record in tuples:
    last_modified_map[record[0]] = record[1]

  return last_modified_map


def get_action_counts(repository_ids):
  if not repository_ids:
    return {}

  # Filter the join to recent entries only.
  last_week = datetime.now() - timedelta(weeks=1)
  tuples = (RepositoryActionCount
            .select(RepositoryActionCount.repository, fn.Sum(RepositoryActionCount.count))
            .where(RepositoryActionCount.repository << repository_ids)
            .where(RepositoryActionCount.date >= last_week)
            .group_by(RepositoryActionCount.repository)
            .tuples())

  action_count_map = {}
  for record in tuples:
    action_count_map[record[0]] = record[1]

  return action_count_map


def get_visible_repositories(username, namespace=None, page=None, limit=None, include_public=False):
  """ Returns the repositories visible to the given user (if any).
  """
  if not include_public and not username:
    return []

  fields = [Repository.name, Repository.id, Repository.description, Visibility.name,
            Namespace.username]

  query = _visible_repository_query(username=username, page=page,
                                    limit=limit, namespace=namespace, include_public=include_public,
                                    select_models=fields)

  if limit:
    query = query.limit(limit)

  if namespace:
    query = query.where(Namespace.username == namespace)

  return query


def _visible_repository_query(username=None, include_public=True, limit=None,
                              page=None, namespace=None, select_models=[]):
  query = (Repository
           .select(*select_models)  # MySQL/RDS complains is there are selected models for counts.
           .distinct()
           .join(Visibility)
           .switch(Repository)
           .join(Namespace, on=(Repository.namespace_user == Namespace.id))
           .switch(Repository)
           .join(RepositoryPermission, JOIN_LEFT_OUTER))

  query = _basequery.filter_to_repos_for_user(query, username, namespace, include_public)
  if page:
    query = query.paginate(page, limit)
  elif limit:
    query = query.limit(limit)

  return query


def get_sorted_matching_repositories(prefix, only_public, checker, limit=10):
  """ Returns repositories matching the given prefix string and passing the given checker
      function.
  """
  last_week = datetime.now() - timedelta(weeks=1)
  results = []
  existing_ids = []

  def get_search_results(search_clause, with_count=False):
    if len(results) >= limit:
      return

    select_items = [Repository, Namespace]
    if with_count:
      select_items.append(fn.Sum(RepositoryActionCount.count).alias('count'))

    query = (Repository
             .select(*select_items)
             .join(Namespace, JOIN_LEFT_OUTER, on=(Namespace.id == Repository.namespace_user))
             .switch(Repository)
             .where(search_clause)
             .group_by(Repository, Namespace))

    if only_public:
      query = query.where(Repository.visibility == _basequery.get_public_repo_visibility())

    if existing_ids:
      query = query.where(~(Repository.id << existing_ids))

    if with_count:
      query = (query
               .switch(Repository)
               .join(RepositoryActionCount)
               .where(RepositoryActionCount.date >= last_week)
               .order_by(fn.Sum(RepositoryActionCount.count).desc()))

    for result in query:
      if len(results) >= limit:
        return results

      # Note: We compare IDs here, instead of objects, because calling .visibility on the
      # Repository will kick off a new SQL query to retrieve that visibility enum value. We don't
      # join the visibility table in SQL, as well, because it is ungodly slow in MySQL :-/
      result.is_public = result.visibility_id == _basequery.get_public_repo_visibility().id
      result.count = result.count if with_count else 0

      if not checker(result):
        continue

      results.append(result)
      existing_ids.append(result.id)

  # For performance reasons, we conduct the repo name and repo namespace searches on their
  # own. This also affords us the ability to give higher precedence to repository names matching
  # over namespaces, which is semantically correct.
  get_search_results(Repository.name ** (prefix + '%'), with_count=True)
  get_search_results(Repository.name ** (prefix + '%'), with_count=False)

  get_search_results(Namespace.username ** (prefix + '%'), with_count=True)
  get_search_results(Namespace.username ** (prefix + '%'), with_count=False)

  return results


def get_matching_repositories(repo_term, username=None, limit=10, include_public=True):
  namespace_term = repo_term
  name_term = repo_term

  visible = _visible_repository_query(username, include_public=include_public)

  search_clauses = (Repository.name ** ('%' + name_term + '%') |
                    Namespace.username ** ('%' + namespace_term + '%'))

  # Handle the case where the user has already entered a namespace path.
  if repo_term.find('/') > 0:
    parts = repo_term.split('/', 1)
    namespace_term = '/'.join(parts[:-1])
    name_term = parts[-1]

    search_clauses = (Repository.name ** ('%' + name_term + '%') &
                      Namespace.username ** ('%' + namespace_term + '%'))

  return visible.where(search_clauses).limit(limit)


def lookup_repository(repo_id):
  try:
    return Repository.get(Repository.id == repo_id)
  except Repository.DoesNotExist:
    return None


def is_repository_public(repository):
  return repository.visibility == _basequery.get_public_repo_visibility()


def repository_is_public(namespace_name, repository_name):
  try:
    (Repository
     .select()
     .join(Namespace, on=(Repository.namespace_user == Namespace.id))
     .switch(Repository)
     .join(Visibility)
     .where(Namespace.username == namespace_name, Repository.name == repository_name,
            Visibility.name == 'public')
     .get())
    return True
  except Repository.DoesNotExist:
    return False


def set_repository_visibility(repo, visibility):
  visibility_obj = Visibility.get(name=visibility)
  if not visibility_obj:
    return

  repo.visibility = visibility_obj
  repo.save()


def get_email_authorized_for_repo(namespace, repository, email):
  try:
    return (RepositoryAuthorizedEmail
            .select(RepositoryAuthorizedEmail, Repository, Namespace)
            .join(Repository)
            .join(Namespace, on=(Repository.namespace_user == Namespace.id))
            .where(Namespace.username == namespace, Repository.name == repository,
                   RepositoryAuthorizedEmail.email == email)
            .get())
  except RepositoryAuthorizedEmail.DoesNotExist:
    return None


def create_email_authorization_for_repo(namespace_name, repository_name, email):
  try:
    repo = _basequery.get_existing_repository(namespace_name, repository_name)
  except Repository.DoesNotExist:
    raise DataModelException('Invalid repository %s/%s' %
                             (namespace_name, repository_name))

  return RepositoryAuthorizedEmail.create(repository=repo, email=email, confirmed=False)


def confirm_email_authorization_for_repo(code):
  try:
    found = (RepositoryAuthorizedEmail
             .select(RepositoryAuthorizedEmail, Repository, Namespace)
             .join(Repository)
             .join(Namespace, on=(Repository.namespace_user == Namespace.id))
             .where(RepositoryAuthorizedEmail.code == code)
             .get())
  except RepositoryAuthorizedEmail.DoesNotExist:
    raise DataModelException('Invalid confirmation code.')

  found.confirmed = True
  found.save()

  return found
Accidental refactor, split out legacy.py into separate sumodules and update all call sites. 2015-07-15 21:25:41 +00:00			`import logging`

			`from peewee import JOIN_LEFT_OUTER, fn`
			`from datetime import timedelta, datetime`

			`from data.model import (DataModelException, tag, db_transaction, storage, image, permission,`
Make GC of repositories fully async for whitelisted namespaces This change adds a worker to conduct GC on repositories with garbage every 10s. Fixes #144 2015-06-19 18:55:44 +00:00			`_basequery, config)`
Accidental refactor, split out legacy.py into separate sumodules and update all call sites. 2015-07-15 21:25:41 +00:00			`from data.database import (Repository, Namespace, RepositoryTag, Star, Image, ImageStorage, User,`
			`Visibility, RepositoryPermission, TupleSelector, RepositoryActionCount,`
Switch tag deletion to use a single query 2015-06-19 18:55:30 +00:00			`Role, RepositoryAuthorizedEmail, db_for_update, get_epoch_timestamp,`
			`db_random_func)`
Accidental refactor, split out legacy.py into separate sumodules and update all call sites. 2015-07-15 21:25:41 +00:00

			`logger = logging.getLogger(__name__)`


			`def create_repository(namespace, name, creating_user, visibility='private'):`
			`private = Visibility.get(name=visibility)`
			`namespace_user = User.get(username=namespace)`
			`repo = Repository.create(name=name, visibility=private, namespace_user=namespace_user)`
			`admin = Role.get(name='admin')`

			`if creating_user and not creating_user.organization:`
			`RepositoryPermission.create(user=creating_user, repository=repo, role=admin)`

			`if creating_user.username != namespace:`
			`# Permission prototypes only work for orgs`
			`permission.apply_default_permissions(repo, creating_user)`
			`return repo`


			`def get_repository(namespace_name, repository_name):`
			`try:`
			`return _basequery.get_existing_repository(namespace_name, repository_name)`
			`except Repository.DoesNotExist:`
			`return None`


			`def _purge_all_repository_tags(namespace_name, repository_name):`
			`""" Immediately purge all repository tags without respecting the lifeline procedure """`
			`try:`
			`repo = _basequery.get_existing_repository(namespace_name, repository_name)`
			`except Repository.DoesNotExist:`
			`raise DataModelException('Invalid repository \'%s/%s\'' %`
			`(namespace_name, repository_name))`
			`RepositoryTag.delete().where(RepositoryTag.repository == repo.id).execute()`


			`def purge_repository(namespace_name, repository_name):`
			`# Delete all tags to allow gc to reclaim storage`
			`_purge_all_repository_tags(namespace_name, repository_name)`

			`# Gc to remove the images and storage`
			`garbage_collect_repository(namespace_name, repository_name)`

			`# Delete the rest of the repository metadata`
			`fetched = _basequery.get_existing_repository(namespace_name, repository_name)`
			`fetched.delete_instance(recursive=True, delete_nullable=False)`


Make GC of repositories fully async for whitelisted namespaces This change adds a worker to conduct GC on repositories with garbage every 10s. Fixes #144 2015-06-19 18:55:44 +00:00			`def find_repository_with_garbage(filter_list=None):`
			`# TODO(jschorr): Remove the filter once we have turned the experiment on for everyone.`
			`if filter_list is not None and not filter_list:`
			`return None`

Switch tag deletion to use a single query 2015-06-19 18:55:30 +00:00			`epoch_timestamp = get_epoch_timestamp()`

			`try:`
			`candidates = (RepositoryTag`
			`.select(RepositoryTag.repository)`
			`.join(Repository)`
			`.join(Namespace, on=(Repository.namespace_user == Namespace.id))`
			`.where(~(RepositoryTag.lifetime_end_ts >> None),`
			`(RepositoryTag.lifetime_end_ts <=`
			`(epoch_timestamp - Namespace.removed_tag_expiration_s)))`
			`.limit(500)`
			`.alias('candidates'))`

Make GC of repositories fully async for whitelisted namespaces This change adds a worker to conduct GC on repositories with garbage every 10s. Fixes #144 2015-06-19 18:55:44 +00:00			`if filter_list:`
			`candidates = candidates.where(Namespace.username << filter_list)`

Switch tag deletion to use a single query 2015-06-19 18:55:30 +00:00			`found = (RepositoryTag`
Make GC of repositories fully async for whitelisted namespaces This change adds a worker to conduct GC on repositories with garbage every 10s. Fixes #144 2015-06-19 18:55:44 +00:00			`.select(candidates.c.repository_id)`
Switch tag deletion to use a single query 2015-06-19 18:55:30 +00:00			`.from_(candidates)`
			`.order_by(db_random_func())`
			`.get())`

Make GC of repositories fully async for whitelisted namespaces This change adds a worker to conduct GC on repositories with garbage every 10s. Fixes #144 2015-06-19 18:55:44 +00:00			`if found is None:`
			`return`
Switch tag deletion to use a single query 2015-06-19 18:55:30 +00:00
Make GC of repositories fully async for whitelisted namespaces This change adds a worker to conduct GC on repositories with garbage every 10s. Fixes #144 2015-06-19 18:55:44 +00:00			`return Repository.get(Repository.id == found.repository_id)`
Switch tag deletion to use a single query 2015-06-19 18:55:30 +00:00			`except RepositoryTag.DoesNotExist:`
			`return None`
			`except Repository.DoesNotExist:`
			`return None`


Accidental refactor, split out legacy.py into separate sumodules and update all call sites. 2015-07-15 21:25:41 +00:00			`def garbage_collect_repository(namespace_name, repository_name):`
Make GC of repositories fully async for whitelisted namespaces This change adds a worker to conduct GC on repositories with garbage every 10s. Fixes #144 2015-06-19 18:55:44 +00:00			`# If the namespace is the async experiment, don't perform garbage collection here.`
			`# TODO(jschorr): Remove this check once we have turned the experiment on for everyone.`
			`if namespace_name in config.app_config.get('EXP_ASYNC_GARBAGE_COLLECTION', []):`
			`return`

Switch tag deletion to use a single query 2015-06-19 18:55:30 +00:00			`repo = get_repository(namespace_name, repository_name)`
Make GC of repositories fully async for whitelisted namespaces This change adds a worker to conduct GC on repositories with garbage every 10s. Fixes #144 2015-06-19 18:55:44 +00:00			`if repo is not None:`
			`garbage_collect_repo(repo)`
Accidental refactor, split out legacy.py into separate sumodules and update all call sites. 2015-07-15 21:25:41 +00:00

Switch tag deletion to use a single query 2015-06-19 18:55:30 +00:00			`def garbage_collect_repo(repo):`
Make GC of repositories fully async for whitelisted namespaces This change adds a worker to conduct GC on repositories with garbage every 10s. Fixes #144 2015-06-19 18:55:44 +00:00			`logger.debug('Garbage collecting repository %s', repo.id)`

Switch tag deletion to use a single query 2015-06-19 18:55:30 +00:00			`storage_id_whitelist = {}`
			`tag.garbage_collect_tags(repo)`
Accidental refactor, split out legacy.py into separate sumodules and update all call sites. 2015-07-15 21:25:41 +00:00
Switch tag deletion to use a single query 2015-06-19 18:55:30 +00:00			`with db_transaction():`
Accidental refactor, split out legacy.py into separate sumodules and update all call sites. 2015-07-15 21:25:41 +00:00			`# Get a list of all images used by tags in the repository`
Switch tag deletion to use a single query 2015-06-19 18:55:30 +00:00			`tagged_images = (Image`
			`.select(Image.id, Image.ancestors)`
			`.join(RepositoryTag)`
			`.where(Image.repository == repo))`

Try not to throw any sets of data away when computing images to garbage collect. 2015-07-31 19:03:40 +00:00			`def gen_referenced_ancestors():`
			`for tagged_image in tagged_images:`
			`# The ancestor list is in the format '/1/2/3/', extract just the ids`
			`ancestor_id_strings = tagged_image.ancestors.split('/')[1:-1]`
			`for img_id_str in ancestor_id_strings:`
			`yield int(img_id_str)`
			`yield tagged_image.id`

			`referenced_ancestors = set(gen_referenced_ancestors())`

			`# We desire two pieces of information from the database from the following`
			`# query: all of the image ids which are associated with this repository,`
			`# and the storages which are associated with those images. In order to`
			`# fetch just this information, and bypass all of the peewee model parsing`
			`# code, which is overkill for just two fields, we use a tuple query, and`
			`# feed that directly to the dictionary tuple constructor which takes an`
			`# iterable of tuples containing [(k, v), (k, v), ...]`
			`all_repo_images = Image.select(Image.id, Image.storage).where(Image.repository == repo).tuples()`
			`images_to_storages = dict(all_repo_images)`
			`to_remove = set(images_to_storages.keys()).difference(referenced_ancestors)`
Accidental refactor, split out legacy.py into separate sumodules and update all call sites. 2015-07-15 21:25:41 +00:00
			`if len(to_remove) > 0:`
			`logger.info('Cleaning up unreferenced images: %s', to_remove)`
Try not to throw any sets of data away when computing images to garbage collect. 2015-07-31 19:03:40 +00:00			`storage_id_whitelist = {images_to_storages[to_remove_id] for to_remove_id in to_remove}`
Accidental refactor, split out legacy.py into separate sumodules and update all call sites. 2015-07-15 21:25:41 +00:00			`Image.delete().where(Image.id << list(to_remove)).execute()`

			`if len(to_remove) > 0:`
			`logger.info('Garbage collecting storage for images: %s', to_remove)`
			`storage.garbage_collect_storage(storage_id_whitelist)`


			`def star_repository(user, repository):`
			`""" Stars a repository. """`
			`star = Star.create(user=user.id, repository=repository.id)`
			`star.save()`


			`def unstar_repository(user, repository):`
			`""" Unstars a repository. """`
			`try:`
			`(Star`
			`.delete()`
			`.where(Star.repository == repository.id, Star.user == user.id)`
			`.execute())`
			`except Star.DoesNotExist:`
			`raise DataModelException('Star not found.')`


			`def get_user_starred_repositories(user, limit=None, page=None):`
			`""" Retrieves all of the repositories a user has starred. """`
			`query = (Repository`
			`.select(Repository, User, Visibility)`
			`.join(Star)`
			`.switch(Repository)`
			`.join(User)`
			`.switch(Repository)`
			`.join(Visibility)`
			`.where(Star.user == user))`

			`if page and limit:`
			`query = query.paginate(page, limit)`
			`elif limit:`
			`query = query.limit(limit)`

			`return query`


			`def repository_is_starred(user, repository):`
			`""" Determines whether a user has starred a repository or not. """`
			`try:`
			`(Star`
			`.select()`
			`.where(Star.repository == repository.id, Star.user == user.id)`
			`.get())`
			`return True`
			`except Star.DoesNotExist:`
			`return False`


			`def get_when_last_modified(repository_ids):`
Clean up the repository list API and loads stars with it We load stars with the same list API now so that we get the extra metadata needed in the repo list (popularity and last modified) 2015-07-21 21:20:24 +00:00			`if not repository_ids:`
			`return {}`

Accidental refactor, split out legacy.py into separate sumodules and update all call sites. 2015-07-15 21:25:41 +00:00			`tuples = (RepositoryTag`
			`.select(RepositoryTag.repository, fn.Max(RepositoryTag.lifetime_start_ts))`
			`.where(RepositoryTag.repository << repository_ids)`
			`.group_by(RepositoryTag.repository)`
			`.tuples())`

			`last_modified_map = {}`
			`for record in tuples:`
			`last_modified_map[record[0]] = record[1]`

			`return last_modified_map`


			`def get_action_counts(repository_ids):`
Clean up the repository list API and loads stars with it We load stars with the same list API now so that we get the extra metadata needed in the repo list (popularity and last modified) 2015-07-21 21:20:24 +00:00			`if not repository_ids:`
			`return {}`

Accidental refactor, split out legacy.py into separate sumodules and update all call sites. 2015-07-15 21:25:41 +00:00			`# Filter the join to recent entries only.`
			`last_week = datetime.now() - timedelta(weeks=1)`
			`tuples = (RepositoryActionCount`
			`.select(RepositoryActionCount.repository, fn.Sum(RepositoryActionCount.count))`
			`.where(RepositoryActionCount.repository << repository_ids)`
			`.where(RepositoryActionCount.date >= last_week)`
			`.group_by(RepositoryActionCount.repository)`
			`.tuples())`

			`action_count_map = {}`
			`for record in tuples:`
			`action_count_map[record[0]] = record[1]`

			`return action_count_map`


Clean up the repository list API and loads stars with it We load stars with the same list API now so that we get the extra metadata needed in the repo list (popularity and last modified) 2015-07-21 21:20:24 +00:00			`def get_visible_repositories(username, namespace=None, page=None, limit=None, include_public=False):`
			`""" Returns the repositories visible to the given user (if any).`
			`"""`
Small SQL query fix Fixes #248 2015-07-20 18:17:26 +00:00			`if not include_public and not username:`
			`return []`

Accidental refactor, split out legacy.py into separate sumodules and update all call sites. 2015-07-15 21:25:41 +00:00			`fields = [Repository.name, Repository.id, Repository.description, Visibility.name,`
			`Namespace.username]`

Clean up the repository list API and loads stars with it We load stars with the same list API now so that we get the extra metadata needed in the repo list (popularity and last modified) 2015-07-21 21:20:24 +00:00			`query = _visible_repository_query(username=username, page=page,`
			`limit=limit, namespace=namespace, include_public=include_public,`
Accidental refactor, split out legacy.py into separate sumodules and update all call sites. 2015-07-15 21:25:41 +00:00			`select_models=fields)`

			`if limit:`
			`query = query.limit(limit)`

Clean up the repository list API and loads stars with it We load stars with the same list API now so that we get the extra metadata needed in the repo list (popularity and last modified) 2015-07-21 21:20:24 +00:00			`if namespace:`
Accidental refactor, split out legacy.py into separate sumodules and update all call sites. 2015-07-15 21:25:41 +00:00			`query = query.where(Namespace.username == namespace)`

Clean up the repository list API and loads stars with it We load stars with the same list API now so that we get the extra metadata needed in the repo list (popularity and last modified) 2015-07-21 21:20:24 +00:00			`return query`
Accidental refactor, split out legacy.py into separate sumodules and update all call sites. 2015-07-15 21:25:41 +00:00

			`def _visible_repository_query(username=None, include_public=True, limit=None,`
			`page=None, namespace=None, select_models=[]):`
			`query = (Repository`
			`.select(*select_models) # MySQL/RDS complains is there are selected models for counts.`
			`.distinct()`
			`.join(Visibility)`
			`.switch(Repository)`
			`.join(Namespace, on=(Repository.namespace_user == Namespace.id))`
			`.switch(Repository)`
			`.join(RepositoryPermission, JOIN_LEFT_OUTER))`

			`query = _basequery.filter_to_repos_for_user(query, username, namespace, include_public)`
			`if page:`
			`query = query.paginate(page, limit)`
			`elif limit:`
			`query = query.limit(limit)`

			`return query`


			`def get_sorted_matching_repositories(prefix, only_public, checker, limit=10):`
			`""" Returns repositories matching the given prefix string and passing the given checker`
			`function.`
			`"""`
			`last_week = datetime.now() - timedelta(weeks=1)`
			`results = []`
			`existing_ids = []`

			`def get_search_results(search_clause, with_count=False):`
			`if len(results) >= limit:`
			`return`

			`select_items = [Repository, Namespace]`
			`if with_count:`
			`select_items.append(fn.Sum(RepositoryActionCount.count).alias('count'))`

			`query = (Repository`
			`.select(*select_items)`
			`.join(Namespace, JOIN_LEFT_OUTER, on=(Namespace.id == Repository.namespace_user))`
			`.switch(Repository)`
			`.where(search_clause)`
			`.group_by(Repository, Namespace))`

			`if only_public:`
			`query = query.where(Repository.visibility == _basequery.get_public_repo_visibility())`

			`if existing_ids:`
			`query = query.where(~(Repository.id << existing_ids))`

			`if with_count:`
			`query = (query`
			`.switch(Repository)`
			`.join(RepositoryActionCount)`
			`.where(RepositoryActionCount.date >= last_week)`
			`.order_by(fn.Sum(RepositoryActionCount.count).desc()))`

			`for result in query:`
			`if len(results) >= limit:`
			`return results`

			`# Note: We compare IDs here, instead of objects, because calling .visibility on the`
			`# Repository will kick off a new SQL query to retrieve that visibility enum value. We don't`
			`# join the visibility table in SQL, as well, because it is ungodly slow in MySQL :-/`
			`result.is_public = result.visibility_id == _basequery.get_public_repo_visibility().id`
			`result.count = result.count if with_count else 0`

			`if not checker(result):`
			`continue`

			`results.append(result)`
			`existing_ids.append(result.id)`

			`# For performance reasons, we conduct the repo name and repo namespace searches on their`
			`# own. This also affords us the ability to give higher precedence to repository names matching`
			`# over namespaces, which is semantically correct.`
			`get_search_results(Repository.name ** (prefix + '%'), with_count=True)`
			`get_search_results(Repository.name ** (prefix + '%'), with_count=False)`

			`get_search_results(Namespace.username ** (prefix + '%'), with_count=True)`
			`get_search_results(Namespace.username ** (prefix + '%'), with_count=False)`

			`return results`


			`def get_matching_repositories(repo_term, username=None, limit=10, include_public=True):`
			`namespace_term = repo_term`
			`name_term = repo_term`

			`visible = _visible_repository_query(username, include_public=include_public)`

			`search_clauses = (Repository.name ** ('%' + name_term + '%') \|`
			`Namespace.username ** ('%' + namespace_term + '%'))`

			`# Handle the case where the user has already entered a namespace path.`
			`if repo_term.find('/') > 0:`
			`parts = repo_term.split('/', 1)`
			`namespace_term = '/'.join(parts[:-1])`
			`name_term = parts[-1]`

			`search_clauses = (Repository.name ** ('%' + name_term + '%') &`
			`Namespace.username ** ('%' + namespace_term + '%'))`

			`return visible.where(search_clauses).limit(limit)`


			`def lookup_repository(repo_id):`
			`try:`
			`return Repository.get(Repository.id == repo_id)`
			`except Repository.DoesNotExist:`
			`return None`


			`def is_repository_public(repository):`
			`return repository.visibility == _basequery.get_public_repo_visibility()`


			`def repository_is_public(namespace_name, repository_name):`
			`try:`
			`(Repository`
			`.select()`
			`.join(Namespace, on=(Repository.namespace_user == Namespace.id))`
			`.switch(Repository)`
			`.join(Visibility)`
			`.where(Namespace.username == namespace_name, Repository.name == repository_name,`
			`Visibility.name == 'public')`
			`.get())`
			`return True`
			`except Repository.DoesNotExist:`
			`return False`


			`def set_repository_visibility(repo, visibility):`
			`visibility_obj = Visibility.get(name=visibility)`
			`if not visibility_obj:`
			`return`

			`repo.visibility = visibility_obj`
			`repo.save()`


			`def get_email_authorized_for_repo(namespace, repository, email):`
			`try:`
			`return (RepositoryAuthorizedEmail`
			`.select(RepositoryAuthorizedEmail, Repository, Namespace)`
			`.join(Repository)`
			`.join(Namespace, on=(Repository.namespace_user == Namespace.id))`
			`.where(Namespace.username == namespace, Repository.name == repository,`
			`RepositoryAuthorizedEmail.email == email)`
			`.get())`
			`except RepositoryAuthorizedEmail.DoesNotExist:`
			`return None`


			`def create_email_authorization_for_repo(namespace_name, repository_name, email):`
			`try:`
			`repo = _basequery.get_existing_repository(namespace_name, repository_name)`
			`except Repository.DoesNotExist:`
			`raise DataModelException('Invalid repository %s/%s' %`
			`(namespace_name, repository_name))`

			`return RepositoryAuthorizedEmail.create(repository=repo, email=email, confirmed=False)`


			`def confirm_email_authorization_for_repo(code):`
			`try:`
			`found = (RepositoryAuthorizedEmail`
			`.select(RepositoryAuthorizedEmail, Repository, Namespace)`
			`.join(Repository)`
			`.join(Namespace, on=(Repository.namespace_user == Namespace.id))`
			`.where(RepositoryAuthorizedEmail.code == code)`
			`.get())`
			`except RepositoryAuthorizedEmail.DoesNotExist:`
			`raise DataModelException('Invalid confirmation code.')`

			`found.confirmed = True`
			`found.save()`

			`return found`