quay/util/bytes.py

class Bytes(object):
  """ Wrapper around strings and unicode objects to ensure we are always using
      the correct encoded or decoded data.
  """
  def __init__(self, data):
    assert isinstance(data, str)
    self._encoded_data = data

  @classmethod
  def for_string_or_unicode(cls, input):
    # If the string is a unicode string, then encode its data as UTF-8. Note that
    # we don't catch any decode exceptions here, as we want those to be raised.
    if isinstance(input, unicode):
      return Bytes(input.encode('utf-8'))

    # Next, try decoding as UTF-8. If we have a utf-8 encoded string, then we have no
    # additional conversion to do.
    try:
      input.decode('utf-8')
      return Bytes(input)
    except UnicodeDecodeError:
      pass

    # Finally, if the data is (somehow) a unicode string inside a `str` type, then
    # re-encoded the data.
    return Bytes(input.encode('utf-8'))

  def as_encoded_str(self):
    return self._encoded_data

  def as_unicode(self):
    return self._encoded_data.decode('utf-8')