import os
import tarfile

import marisa_trie

from util.registry.aufs import is_aufs_metadata, get_deleted_prefix
from util.registry.tarlayerformat import TarLayerFormat

class StreamLayerMerger(TarLayerFormat):
  """ Class which creates a generator of the combined TAR data for a set of Docker layers. """
  def __init__(self, get_tar_stream_iterator, path_prefix=None, reporter=None):
    super(StreamLayerMerger, self).__init__(get_tar_stream_iterator, path_prefix, reporter=reporter)

    self.path_trie = marisa_trie.Trie()
    self.path_encountered = set()

    self.deleted_prefix_trie = marisa_trie.Trie()
    self.deleted_prefixes_encountered = set()

  def after_tar_layer(self):
    # Update the tries.
    self.path_trie = marisa_trie.Trie(self.path_encountered)
    self.deleted_prefix_trie = marisa_trie.Trie(self.deleted_prefixes_encountered)

  @staticmethod
  def _normalize_path(path):
    return os.path.relpath(path.decode('utf-8'), './')

  def _check_deleted(self, absolute):
    ubsolute = unicode(absolute)
    for prefix in self.deleted_prefix_trie.iter_prefixes(ubsolute):
      if not os.path.relpath(ubsolute, prefix).startswith('..'):
        return True

    return False

  def is_skipped_file(self, filename):
    absolute = StreamLayerMerger._normalize_path(filename)

    # Skip metadata.
    if is_aufs_metadata(absolute):
      return True

    # Check if the file is under a deleted path.
    if self._check_deleted(absolute):
      return True

    # Check if this file has already been encountered somewhere. If so,
    # skip it.
    ubsolute = unicode(absolute)
    if ubsolute in self.path_trie:
      return True

    return False

  def should_append_file(self, filename):
    if self.is_skipped_file(filename):
      return False

    absolute = StreamLayerMerger._normalize_path(filename)

    # Add any prefix of deleted paths to the prefix list.
    deleted_prefix = get_deleted_prefix(absolute)
    if deleted_prefix is not None:
      self.deleted_prefixes_encountered.add(deleted_prefix)
      return False

    # Otherwise, add the path to the encountered list and return it.
    self.path_encountered.add(absolute)
    return True