import unittest
import time
import hashlib

from contextlib import contextmanager
from playhouse.test_utils import assert_query_count

from app import app, storage
from initdb import setup_database_for_testing, finished_database_for_testing
from data import model, database
from data.database import Image, ImageStorage, DerivedStorageForImage, Label, TagManifestLabel


ADMIN_ACCESS_USER = 'devtable'
PUBLIC_USER = 'public'

REPO = 'somerepo'


class TestGarbageCollection(unittest.TestCase):
  @staticmethod
  def _set_tag_expiration_policy(namespace, expiration_s):
    namespace_user = model.user.get_user(namespace)
    model.user.change_user_tag_expiration(namespace_user, expiration_s)

  def setUp(self):
    setup_database_for_testing(self)

    self._set_tag_expiration_policy(ADMIN_ACCESS_USER, 0)
    self._set_tag_expiration_policy(PUBLIC_USER, 0)

    self.app = app.test_client()
    self.ctx = app.test_request_context()
    self.ctx.__enter__()

  def tearDown(self):
    finished_database_for_testing(self)
    self.ctx.__exit__(True, None, None)

  @staticmethod
  def createImage(docker_image_id, repository_obj, username):
    preferred = storage.preferred_locations[0]
    image = model.image.find_create_or_link_image(docker_image_id, repository_obj, username, {},
                                                  preferred)
    image.storage.uploading = False
    image.storage.save()

    # Create derived images as well.
    model.image.find_or_create_derived_storage(image, 'squash', preferred)
    model.image.find_or_create_derived_storage(image, 'aci', preferred)

    # Add some torrent info.
    try:
      database.TorrentInfo.get(storage=image.storage)
    except database.TorrentInfo.DoesNotExist:
      model.storage.save_torrent_info(image.storage, 1, 'helloworld')

    # Add some additional placements to the image.
    for location_name in ['local_eu']:
      location = database.ImageStorageLocation.get(name=location_name)

      try:
        database.ImageStoragePlacement.get(location=location, storage=image.storage)
      except:
        continue

      database.ImageStoragePlacement.create(location=location, storage=image.storage)

    return image.storage

  def createRepository(self, namespace=ADMIN_ACCESS_USER, name=REPO, **kwargs):
    user = model.user.get_user(namespace)
    repo = model.repository.create_repository(namespace, name, user)

    # Populate the repository with the tags.
    image_map = {}
    for tag_name in kwargs:
      image_ids = kwargs[tag_name]
      parent = None

      for image_id in image_ids:
        if not image_id in image_map:
          image_map[image_id] = self.createImage(image_id, repo, namespace)

        v1_metadata = {
          'id': image_id,
        }
        if parent is not None:
          v1_metadata['parent'] = parent.docker_image_id

        # Set the ancestors for the image.
        parent = model.image.set_image_metadata(image_id, namespace, name, '', '', '', v1_metadata,
                                                parent=parent)

      # Set the tag for the image.
      tag_manifest, _ = model.tag.store_tag_manifest(namespace, name, tag_name, image_ids[-1],
                                                     'sha:someshahere', '{}')

      # Add some labels to the tag.
      model.label.create_manifest_label(tag_manifest, 'foo', 'bar', 'manifest')
      model.label.create_manifest_label(tag_manifest, 'meh', 'grah', 'manifest')

    return repo

  def gcNow(self, repository):
    self.assertTrue(model.repository.garbage_collect_repo(repository))

  def deleteTag(self, repository, tag, perform_gc=True):
    model.tag.delete_tag(repository.namespace_user.username, repository.name, tag)
    if perform_gc:
      self.assertTrue(model.repository.garbage_collect_repo(repository))

  def moveTag(self, repository, tag, docker_image_id):
    model.tag.create_or_update_tag(repository.namespace_user.username, repository.name, tag,
                                   docker_image_id)
    self.assertTrue(model.repository.garbage_collect_repo(repository))

  def assertNotDeleted(self, repository, *args):
    for docker_image_id in args:
      self.assertTrue(bool(model.image.get_image_by_id(repository.namespace_user.username,
                                                       repository.name, docker_image_id)))

  def assertDeleted(self, repository, *args):
    for docker_image_id in args:
      try:
        # Verify the image is missing when accessed by the repository.
        model.image.get_image_by_id(repository.namespace_user.username, repository.name,
                                    docker_image_id)
      except model.DataModelException:
        return

      self.fail('Expected image %s to be deleted' % docker_image_id)

  @staticmethod
  def _get_dangling_storage_count():
    storage_ids = set([current.id for current in ImageStorage.select()])
    referenced_by_image = set([image.storage_id for image in Image.select()])
    referenced_by_derived = set([derived.derivative_id
                                 for derived in DerivedStorageForImage.select()])

    return len(storage_ids - referenced_by_image - referenced_by_derived)

  @staticmethod
  def _get_dangling_label_count():
    label_ids = set([current.id for current in Label.select()])
    referenced_by_manifest = set([mlabel.label_id for mlabel in TagManifestLabel.select()])
    return len(label_ids - referenced_by_manifest)

  @contextmanager
  def assert_gc_integrity(self, expect_storage_removed=True):
    """ Specialized assertion for ensuring that GC cleans up all dangling storages
        and labels, invokes the callback for images removed and doesn't invoke the
        callback for images *not* removed.
    """
    # TODO: Consider also asserting the number of DB queries being performed.

    # Add a callback for when images are removed.
    removed_image_storages = []
    model.config.register_image_cleanup_callback(removed_image_storages.extend)

    # Store the number of dangling storages and labels.
    existing_storage_count = self._get_dangling_storage_count()
    existing_label_count = self._get_dangling_label_count()
    yield

    # Ensure the number of dangling storages and labels has not changed.
    updated_storage_count = self._get_dangling_storage_count()
    self.assertEqual(updated_storage_count, existing_storage_count)

    updated_label_count = self._get_dangling_label_count()
    self.assertEqual(updated_label_count, existing_label_count)

    # Ensure that for each call to the image+storage cleanup callback, the image and its
    # storage is not found *anywhere* in the database.
    for removed_image_and_storage in removed_image_storages:
      with self.assertRaises(Image.DoesNotExist):
        Image.get(id=removed_image_and_storage.id)

      with self.assertRaises(ImageStorage.DoesNotExist):
        ImageStorage.get(id=removed_image_and_storage.storage_id)

      with self.assertRaises(ImageStorage.DoesNotExist):
        ImageStorage.get(uuid=removed_image_and_storage.storage.uuid)

    self.assertEquals(expect_storage_removed, bool(removed_image_storages))

    # Ensure all CAS storage is in the storage engine.
    preferred = storage.preferred_locations[0]
    for storage_row in ImageStorage.select():
      if storage_row.cas_path:
        storage.get_content({preferred}, storage.blob_path(storage_row.content_checksum))

  def test_has_garbage(self):
    """ Remove all existing repositories, then add one without garbage, check, then add one with
        garbage, and check again.
    """
    # Delete all existing repos.
    for repo in database.Repository.select().order_by(database.Repository.id):
      self.assertTrue(model.repository.purge_repository(repo.namespace_user.username, repo.name))

    # Change the time machine expiration on the namespace.
    (database.User
     .update(removed_tag_expiration_s=1000000000)
     .where(database.User.username == ADMIN_ACCESS_USER)
     .execute())

    # Create a repository without any garbage.
    repository = self.createRepository(latest=['i1', 'i2', 'i3'])

    # Ensure that no repositories are returned by the has garbage check.
    self.assertIsNone(model.repository.find_repository_with_garbage(1000000000))

    # Delete a tag.
    self.deleteTag(repository, 'latest', perform_gc=False)

    # There should still not be any repositories with garbage, due to time machine.
    self.assertIsNone(model.repository.find_repository_with_garbage(1000000000))

    # Change the time machine expiration on the namespace.
    (database.User
     .update(removed_tag_expiration_s=0)
     .where(database.User.username == ADMIN_ACCESS_USER)
     .execute())

    # Now we should find the repository for GC.
    repository = model.repository.find_repository_with_garbage(0)
    self.assertIsNotNone(repository)
    self.assertEquals(REPO, repository.name)

    # GC the repository.
    self.assertTrue(model.repository.garbage_collect_repo(repository))

    # There should now be no repositories with garbage.
    self.assertIsNone(model.repository.find_repository_with_garbage(0))

  def test_find_garbage_policy_functions(self):
    with assert_query_count(1):
      one_policy = model.repository.get_random_gc_policy()
      all_policies = model.repository._get_gc_expiration_policies()
    self.assertIn(one_policy, all_policies)

  def test_one_tag(self):
    """ Create a repository with a single tag, then remove that tag and verify that the repository
        is now empty. """
    with self.assert_gc_integrity():
      repository = self.createRepository(latest=['i1', 'i2', 'i3'])
      self.deleteTag(repository, 'latest')
      self.assertDeleted(repository, 'i1', 'i2', 'i3')

  def test_two_tags_unshared_images(self):
    """ Repository has two tags with no shared images between them. """
    with self.assert_gc_integrity():
      repository = self.createRepository(latest=['i1', 'i2', 'i3'], other=['f1', 'f2'])
      self.deleteTag(repository, 'latest')
      self.assertDeleted(repository, 'i1', 'i2', 'i3')
      self.assertNotDeleted(repository, 'f1', 'f2')

  def test_two_tags_shared_images(self):
    """ Repository has two tags with shared images. Deleting the tag should only remove the
        unshared images.
    """
    with self.assert_gc_integrity():
      repository = self.createRepository(latest=['i1', 'i2', 'i3'], other=['i1', 'f1'])
      self.deleteTag(repository, 'latest')
      self.assertDeleted(repository, 'i2', 'i3')
      self.assertNotDeleted(repository, 'i1', 'f1')

  def test_unrelated_repositories(self):
    """ Two repositories with different images. Removing the tag from one leaves the other's
        images intact.
    """
    with self.assert_gc_integrity():
      repository1 = self.createRepository(latest=['i1', 'i2', 'i3'], name='repo1')
      repository2 = self.createRepository(latest=['j1', 'j2', 'j3'], name='repo2')

      self.deleteTag(repository1, 'latest')

      self.assertDeleted(repository1, 'i1', 'i2', 'i3')
      self.assertNotDeleted(repository2, 'j1', 'j2', 'j3')

  def test_related_repositories(self):
    """ Two repositories with shared images. Removing the tag from one leaves the other's
        images intact.
    """
    with self.assert_gc_integrity():
      repository1 = self.createRepository(latest=['i1', 'i2', 'i3'], name='repo1')
      repository2 = self.createRepository(latest=['i1', 'i2', 'j1'], name='repo2')

      self.deleteTag(repository1, 'latest')

      self.assertDeleted(repository1, 'i3')
      self.assertNotDeleted(repository2, 'i1', 'i2', 'j1')

  def test_inaccessible_repositories(self):
    """ Two repositories under different namespaces should result in the images being deleted
        but not completely removed from the database.
    """
    with self.assert_gc_integrity():
      repository1 = self.createRepository(namespace=ADMIN_ACCESS_USER, latest=['i1', 'i2', 'i3'])
      repository2 = self.createRepository(namespace=PUBLIC_USER, latest=['i1', 'i2', 'i3'])

      self.deleteTag(repository1, 'latest')
      self.assertDeleted(repository1, 'i1', 'i2', 'i3')
      self.assertNotDeleted(repository2, 'i1', 'i2', 'i3')

  def test_multiple_shared_images(self):
    """ Repository has multiple tags with shared images. Selectively deleting the tags, and
        verifying at each step.
    """
    with self.assert_gc_integrity():
      repository = self.createRepository(latest=['i1', 'i2', 'i3'], other=['i1', 'f1', 'f2'],
                                         third=['t1', 't2', 't3'], fourth=['i1', 'f1'])

      # Delete tag other. Should delete f2, since it is not shared.
      self.deleteTag(repository, 'other')
      self.assertDeleted(repository, 'f2')
      self.assertNotDeleted(repository, 'i1', 'i2', 'i3', 't1', 't2', 't3', 'f1')

      # Move tag fourth to i3. This should remove f1 since it is no longer referenced.
      self.moveTag(repository, 'fourth', 'i3')
      self.assertDeleted(repository, 'f1')
      self.assertNotDeleted(repository, 'i1', 'i2', 'i3', 't1', 't2', 't3')

      # Delete tag 'latest'. This should do nothing since fourth is on the same branch.
      self.deleteTag(repository, 'latest')
      self.assertNotDeleted(repository, 'i1', 'i2', 'i3', 't1', 't2', 't3')

      # Delete tag 'third'. This should remove t1->t3.
      self.deleteTag(repository, 'third')
      self.assertDeleted(repository, 't1', 't2', 't3')
      self.assertNotDeleted(repository, 'i1', 'i2', 'i3')

      # Add tag to i1.
      self.moveTag(repository, 'newtag', 'i1')
      self.assertNotDeleted(repository, 'i1', 'i2', 'i3')

      # Delete tag 'fourth'. This should remove i2 and i3.
      self.deleteTag(repository, 'fourth')
      self.assertDeleted(repository, 'i2', 'i3')
      self.assertNotDeleted(repository, 'i1')

      # Delete tag 'newtag'. This should remove the remaining image.
      self.deleteTag(repository, 'newtag')
      self.assertDeleted(repository, 'i1')

  def test_empty_gc(self):
    with self.assert_gc_integrity(expect_storage_removed=False):
      repository = self.createRepository(latest=['i1', 'i2', 'i3'], other=['i1', 'f1', 'f2'],
                                         third=['t1', 't2', 't3'], fourth=['i1', 'f1'])

      self.gcNow(repository)
      self.assertNotDeleted(repository, 'i1', 'i2', 'i3', 't1', 't2', 't3', 'f1', 'f2')

  def test_time_machine_no_gc(self):
    """ Repository has two tags with shared images. Deleting the tag should not remove any images
    """
    with self.assert_gc_integrity(expect_storage_removed=False):
      repository = self.createRepository(latest=['i1', 'i2', 'i3'], other=['i1', 'f1'])
      self._set_tag_expiration_policy(repository.namespace_user.username, 60*60*24)

      self.deleteTag(repository, 'latest')
      self.assertNotDeleted(repository, 'i2', 'i3')
      self.assertNotDeleted(repository, 'i1', 'f1')

  def test_time_machine_gc(self):
    """ Repository has two tags with shared images. Deleting the second tag should cause the images
        for the first deleted tag to gc.
    """
    with self.assert_gc_integrity():
      repository = self.createRepository(latest=['i1', 'i2', 'i3'], other=['i1', 'f1'])

      self._set_tag_expiration_policy(repository.namespace_user.username, 1)

      self.deleteTag(repository, 'latest')
      self.assertNotDeleted(repository, 'i2', 'i3')
      self.assertNotDeleted(repository, 'i1', 'f1')

      time.sleep(2)

      self.deleteTag(repository, 'other')  # This will cause the images associated with latest to gc
      self.assertDeleted(repository, 'i2', 'i3')
      self.assertNotDeleted(repository, 'i1', 'f1')

  def test_images_shared_storage(self):
    """ Repository with two tags, both with the same shared storage. Deleting the first
        tag should delete the first image, but *not* its storage.
    """
    with self.assert_gc_integrity(expect_storage_removed=False):
      repository = self.createRepository()

      # Add two tags, each with their own image, but with the same storage.
      image_storage = model.storage.create_v1_storage(storage.preferred_locations[0])

      first_image = Image.create(docker_image_id='i1',
                                 repository=repository, storage=image_storage,
                                 ancestors='/')

      second_image = Image.create(docker_image_id='i2',
                                  repository=repository, storage=image_storage,
                                  ancestors='/')

      model.tag.store_tag_manifest(repository.namespace_user.username, repository.name,
                                   'first', first_image.docker_image_id,
                                   'sha:someshahere', '{}')

      model.tag.store_tag_manifest(repository.namespace_user.username, repository.name,
                                   'second', second_image.docker_image_id,
                                   'sha:someshahere', '{}')

      # Delete the first tag.
      self.deleteTag(repository, 'first')
      self.assertDeleted(repository, 'i1')
      self.assertNotDeleted(repository, 'i2')

  def test_image_with_cas(self):
    """ A repository with a tag pointing to an image backed by CAS. Deleting and GCing the tag
        should result in the storage and its CAS data being removed.
    """
    with self.assert_gc_integrity(expect_storage_removed=True):
      repository = self.createRepository()

      # Create an image storage record under CAS.
      content = 'hello world'
      digest = 'sha256:' + hashlib.sha256(content).hexdigest()
      preferred = storage.preferred_locations[0]
      storage.put_content({preferred}, storage.blob_path(digest), content)

      image_storage = database.ImageStorage.create(content_checksum=digest, uploading=False)
      location = database.ImageStorageLocation.get(name=preferred)
      database.ImageStoragePlacement.create(location=location, storage=image_storage)

      # Ensure the CAS path exists.
      self.assertTrue(storage.exists({preferred}, storage.blob_path(digest)))

      # Create the image and the tag.
      first_image = Image.create(docker_image_id='i1',
                                 repository=repository, storage=image_storage,
                                 ancestors='/')

      model.tag.store_tag_manifest(repository.namespace_user.username, repository.name,
                                   'first', first_image.docker_image_id,
                                   'sha:someshahere1', '{}')

      self.assertNotDeleted(repository, 'i1')

      # Delete the tag.
      self.deleteTag(repository, 'first')
      self.assertDeleted(repository, 'i1')

      # Ensure the CAS path is gone.
      self.assertFalse(storage.exists({preferred}, storage.blob_path(digest)))

  def test_images_shared_cas(self):
    """ A repository, each two tags, pointing to the same image, which has image storage
        with the same *CAS path*, but *distinct records*. Deleting the first tag should delete the
        first image, and its storage, but not the file in storage, as it shares its CAS path.
    """
    with self.assert_gc_integrity(expect_storage_removed=True):
      repository = self.createRepository()

      # Create two image storage records with the same content checksum.
      content = 'hello world'
      digest = 'sha256:' + hashlib.sha256(content).hexdigest()
      preferred = storage.preferred_locations[0]
      storage.put_content({preferred}, storage.blob_path(digest), content)

      is1 = database.ImageStorage.create(content_checksum=digest, uploading=False)
      is2 = database.ImageStorage.create(content_checksum=digest, uploading=False)

      location = database.ImageStorageLocation.get(name=preferred)

      database.ImageStoragePlacement.create(location=location, storage=is1)
      database.ImageStoragePlacement.create(location=location, storage=is2)

      # Ensure the CAS path exists.
      self.assertTrue(storage.exists({preferred}, storage.blob_path(digest)))

      # Create two images in the repository, and two tags, each pointing to one of the storages.
      first_image = Image.create(docker_image_id='i1',
                                 repository=repository, storage=is1,
                                 ancestors='/')

      second_image = Image.create(docker_image_id='i2',
                                  repository=repository, storage=is2,
                                  ancestors='/')

      model.tag.store_tag_manifest(repository.namespace_user.username, repository.name,
                                   'first', first_image.docker_image_id,
                                   'sha:someshahere1', '{}')

      model.tag.store_tag_manifest(repository.namespace_user.username, repository.name,
                                   'second', second_image.docker_image_id,
                                   'sha:someshahere2', '{}')

      self.assertNotDeleted(repository, 'i1', 'i2')

      # Delete the first tag.
      self.deleteTag(repository, 'first')
      self.assertDeleted(repository, 'i1')
      self.assertNotDeleted(repository, 'i2')

      # Ensure the CAS path still exists.
      self.assertTrue(storage.exists({preferred}, storage.blob_path(digest)))



if __name__ == '__main__':
  unittest.main()