33 lines
1 KiB
Python
33 lines
1 KiB
Python
|
class Bytes(object):
|
||
|
""" Wrapper around strings and unicode objects to ensure we are always using
|
||
|
the correct encoded or decoded data.
|
||
|
"""
|
||
|
def __init__(self, data):
|
||
|
assert isinstance(data, str)
|
||
|
self._encoded_data = data
|
||
|
|
||
|
@classmethod
|
||
|
def for_string_or_unicode(cls, input):
|
||
|
# If the string is a unicode string, then encode its data as UTF-8. Note that
|
||
|
# we don't catch any decode exceptions here, as we want those to be raised.
|
||
|
if isinstance(input, unicode):
|
||
|
return Bytes(input.encode('utf-8'))
|
||
|
|
||
|
# Next, try decoding as UTF-8. If we have a utf-8 encoded string, then we have no
|
||
|
# additional conversion to do.
|
||
|
try:
|
||
|
input.decode('utf-8')
|
||
|
return Bytes(input)
|
||
|
except UnicodeDecodeError:
|
||
|
pass
|
||
|
|
||
|
# Finally, if the data is (somehow) a unicode string inside a `str` type, then
|
||
|
# re-encoded the data.
|
||
|
return Bytes(input.encode('utf-8'))
|
||
|
|
||
|
def as_encoded_str(self):
|
||
|
return self._encoded_data
|
||
|
|
||
|
def as_unicode(self):
|
||
|
return self._encoded_data.decode('utf-8')
|