mirror of
https://github.com/jart/cosmopolitan.git
synced 2025-05-22 21:32:31 +00:00
python-3.6.zip added from Github
README.cosmo contains the necessary links.
This commit is contained in:
parent
75fc601ff5
commit
0c4c56ff39
4219 changed files with 1968626 additions and 0 deletions
62
third_party/python/Lib/email/__init__.py
vendored
Normal file
62
third_party/python/Lib/email/__init__.py
vendored
Normal file
|
@ -0,0 +1,62 @@
|
|||
# Copyright (C) 2001-2007 Python Software Foundation
|
||||
# Author: Barry Warsaw
|
||||
# Contact: email-sig@python.org
|
||||
|
||||
"""A package for parsing, handling, and generating email messages."""
|
||||
|
||||
__all__ = [
|
||||
'base64mime',
|
||||
'charset',
|
||||
'encoders',
|
||||
'errors',
|
||||
'feedparser',
|
||||
'generator',
|
||||
'header',
|
||||
'iterators',
|
||||
'message',
|
||||
'message_from_file',
|
||||
'message_from_binary_file',
|
||||
'message_from_string',
|
||||
'message_from_bytes',
|
||||
'mime',
|
||||
'parser',
|
||||
'quoprimime',
|
||||
'utils',
|
||||
]
|
||||
|
||||
|
||||
|
||||
# Some convenience routines. Don't import Parser and Message as side-effects
|
||||
# of importing email since those cascadingly import most of the rest of the
|
||||
# email package.
|
||||
def message_from_string(s, *args, **kws):
|
||||
"""Parse a string into a Message object model.
|
||||
|
||||
Optional _class and strict are passed to the Parser constructor.
|
||||
"""
|
||||
from email.parser import Parser
|
||||
return Parser(*args, **kws).parsestr(s)
|
||||
|
||||
def message_from_bytes(s, *args, **kws):
|
||||
"""Parse a bytes string into a Message object model.
|
||||
|
||||
Optional _class and strict are passed to the Parser constructor.
|
||||
"""
|
||||
from email.parser import BytesParser
|
||||
return BytesParser(*args, **kws).parsebytes(s)
|
||||
|
||||
def message_from_file(fp, *args, **kws):
|
||||
"""Read a file and parse its contents into a Message object model.
|
||||
|
||||
Optional _class and strict are passed to the Parser constructor.
|
||||
"""
|
||||
from email.parser import Parser
|
||||
return Parser(*args, **kws).parse(fp)
|
||||
|
||||
def message_from_binary_file(fp, *args, **kws):
|
||||
"""Read a binary file and parse its contents into a Message object model.
|
||||
|
||||
Optional _class and strict are passed to the Parser constructor.
|
||||
"""
|
||||
from email.parser import BytesParser
|
||||
return BytesParser(*args, **kws).parse(fp)
|
233
third_party/python/Lib/email/_encoded_words.py
vendored
Normal file
233
third_party/python/Lib/email/_encoded_words.py
vendored
Normal file
|
@ -0,0 +1,233 @@
|
|||
""" Routines for manipulating RFC2047 encoded words.
|
||||
|
||||
This is currently a package-private API, but will be considered for promotion
|
||||
to a public API if there is demand.
|
||||
|
||||
"""
|
||||
|
||||
# An ecoded word looks like this:
|
||||
#
|
||||
# =?charset[*lang]?cte?encoded_string?=
|
||||
#
|
||||
# for more information about charset see the charset module. Here it is one
|
||||
# of the preferred MIME charset names (hopefully; you never know when parsing).
|
||||
# cte (Content Transfer Encoding) is either 'q' or 'b' (ignoring case). In
|
||||
# theory other letters could be used for other encodings, but in practice this
|
||||
# (almost?) never happens. There could be a public API for adding entries
|
||||
# to the CTE tables, but YAGNI for now. 'q' is Quoted Printable, 'b' is
|
||||
# Base64. The meaning of encoded_string should be obvious. 'lang' is optional
|
||||
# as indicated by the brackets (they are not part of the syntax) but is almost
|
||||
# never encountered in practice.
|
||||
#
|
||||
# The general interface for a CTE decoder is that it takes the encoded_string
|
||||
# as its argument, and returns a tuple (cte_decoded_string, defects). The
|
||||
# cte_decoded_string is the original binary that was encoded using the
|
||||
# specified cte. 'defects' is a list of MessageDefect instances indicating any
|
||||
# problems encountered during conversion. 'charset' and 'lang' are the
|
||||
# corresponding strings extracted from the EW, case preserved.
|
||||
#
|
||||
# The general interface for a CTE encoder is that it takes a binary sequence
|
||||
# as input and returns the cte_encoded_string, which is an ascii-only string.
|
||||
#
|
||||
# Each decoder must also supply a length function that takes the binary
|
||||
# sequence as its argument and returns the length of the resulting encoded
|
||||
# string.
|
||||
#
|
||||
# The main API functions for the module are decode, which calls the decoder
|
||||
# referenced by the cte specifier, and encode, which adds the appropriate
|
||||
# RFC 2047 "chrome" to the encoded string, and can optionally automatically
|
||||
# select the shortest possible encoding. See their docstrings below for
|
||||
# details.
|
||||
|
||||
import re
|
||||
import base64
|
||||
import binascii
|
||||
import functools
|
||||
from string import ascii_letters, digits
|
||||
from email import errors
|
||||
|
||||
__all__ = ['decode_q',
|
||||
'encode_q',
|
||||
'decode_b',
|
||||
'encode_b',
|
||||
'len_q',
|
||||
'len_b',
|
||||
'decode',
|
||||
'encode',
|
||||
]
|
||||
|
||||
#
|
||||
# Quoted Printable
|
||||
#
|
||||
|
||||
# regex based decoder.
|
||||
_q_byte_subber = functools.partial(re.compile(br'=([a-fA-F0-9]{2})').sub,
|
||||
lambda m: bytes([int(m.group(1), 16)]))
|
||||
|
||||
def decode_q(encoded):
|
||||
encoded = encoded.replace(b'_', b' ')
|
||||
return _q_byte_subber(encoded), []
|
||||
|
||||
|
||||
# dict mapping bytes to their encoded form
|
||||
class _QByteMap(dict):
|
||||
|
||||
safe = b'-!*+/' + ascii_letters.encode('ascii') + digits.encode('ascii')
|
||||
|
||||
def __missing__(self, key):
|
||||
if key in self.safe:
|
||||
self[key] = chr(key)
|
||||
else:
|
||||
self[key] = "={:02X}".format(key)
|
||||
return self[key]
|
||||
|
||||
_q_byte_map = _QByteMap()
|
||||
|
||||
# In headers spaces are mapped to '_'.
|
||||
_q_byte_map[ord(' ')] = '_'
|
||||
|
||||
def encode_q(bstring):
|
||||
return ''.join(_q_byte_map[x] for x in bstring)
|
||||
|
||||
def len_q(bstring):
|
||||
return sum(len(_q_byte_map[x]) for x in bstring)
|
||||
|
||||
|
||||
#
|
||||
# Base64
|
||||
#
|
||||
|
||||
def decode_b(encoded):
|
||||
# First try encoding with validate=True, fixing the padding if needed.
|
||||
# This will succeed only if encoded includes no invalid characters.
|
||||
pad_err = len(encoded) % 4
|
||||
missing_padding = b'==='[:4-pad_err] if pad_err else b''
|
||||
try:
|
||||
return (
|
||||
base64.b64decode(encoded + missing_padding, validate=True),
|
||||
[errors.InvalidBase64PaddingDefect()] if pad_err else [],
|
||||
)
|
||||
except binascii.Error:
|
||||
# Since we had correct padding, this is likely an invalid char error.
|
||||
#
|
||||
# The non-alphabet characters are ignored as far as padding
|
||||
# goes, but we don't know how many there are. So try without adding
|
||||
# padding to see if it works.
|
||||
try:
|
||||
return (
|
||||
base64.b64decode(encoded, validate=False),
|
||||
[errors.InvalidBase64CharactersDefect()],
|
||||
)
|
||||
except binascii.Error:
|
||||
# Add as much padding as could possibly be necessary (extra padding
|
||||
# is ignored).
|
||||
try:
|
||||
return (
|
||||
base64.b64decode(encoded + b'==', validate=False),
|
||||
[errors.InvalidBase64CharactersDefect(),
|
||||
errors.InvalidBase64PaddingDefect()],
|
||||
)
|
||||
except binascii.Error:
|
||||
# This only happens when the encoded string's length is 1 more
|
||||
# than a multiple of 4, which is invalid.
|
||||
#
|
||||
# bpo-27397: Just return the encoded string since there's no
|
||||
# way to decode.
|
||||
return encoded, [errors.InvalidBase64LengthDefect()]
|
||||
|
||||
def encode_b(bstring):
|
||||
return base64.b64encode(bstring).decode('ascii')
|
||||
|
||||
def len_b(bstring):
|
||||
groups_of_3, leftover = divmod(len(bstring), 3)
|
||||
# 4 bytes out for each 3 bytes (or nonzero fraction thereof) in.
|
||||
return groups_of_3 * 4 + (4 if leftover else 0)
|
||||
|
||||
|
||||
_cte_decoders = {
|
||||
'q': decode_q,
|
||||
'b': decode_b,
|
||||
}
|
||||
|
||||
def decode(ew):
|
||||
"""Decode encoded word and return (string, charset, lang, defects) tuple.
|
||||
|
||||
An RFC 2047/2243 encoded word has the form:
|
||||
|
||||
=?charset*lang?cte?encoded_string?=
|
||||
|
||||
where '*lang' may be omitted but the other parts may not be.
|
||||
|
||||
This function expects exactly such a string (that is, it does not check the
|
||||
syntax and may raise errors if the string is not well formed), and returns
|
||||
the encoded_string decoded first from its Content Transfer Encoding and
|
||||
then from the resulting bytes into unicode using the specified charset. If
|
||||
the cte-decoded string does not successfully decode using the specified
|
||||
character set, a defect is added to the defects list and the unknown octets
|
||||
are replaced by the unicode 'unknown' character \\uFDFF.
|
||||
|
||||
The specified charset and language are returned. The default for language,
|
||||
which is rarely if ever encountered, is the empty string.
|
||||
|
||||
"""
|
||||
_, charset, cte, cte_string, _ = ew.split('?')
|
||||
charset, _, lang = charset.partition('*')
|
||||
cte = cte.lower()
|
||||
# Recover the original bytes and do CTE decoding.
|
||||
bstring = cte_string.encode('ascii', 'surrogateescape')
|
||||
bstring, defects = _cte_decoders[cte](bstring)
|
||||
# Turn the CTE decoded bytes into unicode.
|
||||
try:
|
||||
string = bstring.decode(charset)
|
||||
except UnicodeError:
|
||||
defects.append(errors.UndecodableBytesDefect("Encoded word "
|
||||
"contains bytes not decodable using {} charset".format(charset)))
|
||||
string = bstring.decode(charset, 'surrogateescape')
|
||||
except LookupError:
|
||||
string = bstring.decode('ascii', 'surrogateescape')
|
||||
if charset.lower() != 'unknown-8bit':
|
||||
defects.append(errors.CharsetError("Unknown charset {} "
|
||||
"in encoded word; decoded as unknown bytes".format(charset)))
|
||||
return string, charset, lang, defects
|
||||
|
||||
|
||||
_cte_encoders = {
|
||||
'q': encode_q,
|
||||
'b': encode_b,
|
||||
}
|
||||
|
||||
_cte_encode_length = {
|
||||
'q': len_q,
|
||||
'b': len_b,
|
||||
}
|
||||
|
||||
def encode(string, charset='utf-8', encoding=None, lang=''):
|
||||
"""Encode string using the CTE encoding that produces the shorter result.
|
||||
|
||||
Produces an RFC 2047/2243 encoded word of the form:
|
||||
|
||||
=?charset*lang?cte?encoded_string?=
|
||||
|
||||
where '*lang' is omitted unless the 'lang' parameter is given a value.
|
||||
Optional argument charset (defaults to utf-8) specifies the charset to use
|
||||
to encode the string to binary before CTE encoding it. Optional argument
|
||||
'encoding' is the cte specifier for the encoding that should be used ('q'
|
||||
or 'b'); if it is None (the default) the encoding which produces the
|
||||
shortest encoded sequence is used, except that 'q' is preferred if it is up
|
||||
to five characters longer. Optional argument 'lang' (default '') gives the
|
||||
RFC 2243 language string to specify in the encoded word.
|
||||
|
||||
"""
|
||||
if charset == 'unknown-8bit':
|
||||
bstring = string.encode('ascii', 'surrogateescape')
|
||||
else:
|
||||
bstring = string.encode(charset)
|
||||
if encoding is None:
|
||||
qlen = _cte_encode_length['q'](bstring)
|
||||
blen = _cte_encode_length['b'](bstring)
|
||||
# Bias toward q. 5 is arbitrary.
|
||||
encoding = 'q' if qlen - blen < 5 else 'b'
|
||||
encoded = _cte_encoders[encoding](bstring)
|
||||
if lang:
|
||||
lang = '*' + lang
|
||||
return "=?{}{}?{}?{}?=".format(charset, lang, encoding, encoded)
|
2832
third_party/python/Lib/email/_header_value_parser.py
vendored
Normal file
2832
third_party/python/Lib/email/_header_value_parser.py
vendored
Normal file
File diff suppressed because it is too large
Load diff
549
third_party/python/Lib/email/_parseaddr.py
vendored
Normal file
549
third_party/python/Lib/email/_parseaddr.py
vendored
Normal file
|
@ -0,0 +1,549 @@
|
|||
# Copyright (C) 2002-2007 Python Software Foundation
|
||||
# Contact: email-sig@python.org
|
||||
|
||||
"""Email address parsing code.
|
||||
|
||||
Lifted directly from rfc822.py. This should eventually be rewritten.
|
||||
"""
|
||||
|
||||
__all__ = [
|
||||
'mktime_tz',
|
||||
'parsedate',
|
||||
'parsedate_tz',
|
||||
'quote',
|
||||
]
|
||||
|
||||
import time, calendar
|
||||
|
||||
SPACE = ' '
|
||||
EMPTYSTRING = ''
|
||||
COMMASPACE = ', '
|
||||
|
||||
# Parse a date field
|
||||
_monthnames = ['jan', 'feb', 'mar', 'apr', 'may', 'jun', 'jul',
|
||||
'aug', 'sep', 'oct', 'nov', 'dec',
|
||||
'january', 'february', 'march', 'april', 'may', 'june', 'july',
|
||||
'august', 'september', 'october', 'november', 'december']
|
||||
|
||||
_daynames = ['mon', 'tue', 'wed', 'thu', 'fri', 'sat', 'sun']
|
||||
|
||||
# The timezone table does not include the military time zones defined
|
||||
# in RFC822, other than Z. According to RFC1123, the description in
|
||||
# RFC822 gets the signs wrong, so we can't rely on any such time
|
||||
# zones. RFC1123 recommends that numeric timezone indicators be used
|
||||
# instead of timezone names.
|
||||
|
||||
_timezones = {'UT':0, 'UTC':0, 'GMT':0, 'Z':0,
|
||||
'AST': -400, 'ADT': -300, # Atlantic (used in Canada)
|
||||
'EST': -500, 'EDT': -400, # Eastern
|
||||
'CST': -600, 'CDT': -500, # Central
|
||||
'MST': -700, 'MDT': -600, # Mountain
|
||||
'PST': -800, 'PDT': -700 # Pacific
|
||||
}
|
||||
|
||||
|
||||
def parsedate_tz(data):
|
||||
"""Convert a date string to a time tuple.
|
||||
|
||||
Accounts for military timezones.
|
||||
"""
|
||||
res = _parsedate_tz(data)
|
||||
if not res:
|
||||
return
|
||||
if res[9] is None:
|
||||
res[9] = 0
|
||||
return tuple(res)
|
||||
|
||||
def _parsedate_tz(data):
|
||||
"""Convert date to extended time tuple.
|
||||
|
||||
The last (additional) element is the time zone offset in seconds, except if
|
||||
the timezone was specified as -0000. In that case the last element is
|
||||
None. This indicates a UTC timestamp that explicitly declaims knowledge of
|
||||
the source timezone, as opposed to a +0000 timestamp that indicates the
|
||||
source timezone really was UTC.
|
||||
|
||||
"""
|
||||
if not data:
|
||||
return
|
||||
data = data.split()
|
||||
# The FWS after the comma after the day-of-week is optional, so search and
|
||||
# adjust for this.
|
||||
if data[0].endswith(',') or data[0].lower() in _daynames:
|
||||
# There's a dayname here. Skip it
|
||||
del data[0]
|
||||
else:
|
||||
i = data[0].rfind(',')
|
||||
if i >= 0:
|
||||
data[0] = data[0][i+1:]
|
||||
if len(data) == 3: # RFC 850 date, deprecated
|
||||
stuff = data[0].split('-')
|
||||
if len(stuff) == 3:
|
||||
data = stuff + data[1:]
|
||||
if len(data) == 4:
|
||||
s = data[3]
|
||||
i = s.find('+')
|
||||
if i == -1:
|
||||
i = s.find('-')
|
||||
if i > 0:
|
||||
data[3:] = [s[:i], s[i:]]
|
||||
else:
|
||||
data.append('') # Dummy tz
|
||||
if len(data) < 5:
|
||||
return None
|
||||
data = data[:5]
|
||||
[dd, mm, yy, tm, tz] = data
|
||||
mm = mm.lower()
|
||||
if mm not in _monthnames:
|
||||
dd, mm = mm, dd.lower()
|
||||
if mm not in _monthnames:
|
||||
return None
|
||||
mm = _monthnames.index(mm) + 1
|
||||
if mm > 12:
|
||||
mm -= 12
|
||||
if dd[-1] == ',':
|
||||
dd = dd[:-1]
|
||||
i = yy.find(':')
|
||||
if i > 0:
|
||||
yy, tm = tm, yy
|
||||
if yy[-1] == ',':
|
||||
yy = yy[:-1]
|
||||
if not yy[0].isdigit():
|
||||
yy, tz = tz, yy
|
||||
if tm[-1] == ',':
|
||||
tm = tm[:-1]
|
||||
tm = tm.split(':')
|
||||
if len(tm) == 2:
|
||||
[thh, tmm] = tm
|
||||
tss = '0'
|
||||
elif len(tm) == 3:
|
||||
[thh, tmm, tss] = tm
|
||||
elif len(tm) == 1 and '.' in tm[0]:
|
||||
# Some non-compliant MUAs use '.' to separate time elements.
|
||||
tm = tm[0].split('.')
|
||||
if len(tm) == 2:
|
||||
[thh, tmm] = tm
|
||||
tss = 0
|
||||
elif len(tm) == 3:
|
||||
[thh, tmm, tss] = tm
|
||||
else:
|
||||
return None
|
||||
try:
|
||||
yy = int(yy)
|
||||
dd = int(dd)
|
||||
thh = int(thh)
|
||||
tmm = int(tmm)
|
||||
tss = int(tss)
|
||||
except ValueError:
|
||||
return None
|
||||
# Check for a yy specified in two-digit format, then convert it to the
|
||||
# appropriate four-digit format, according to the POSIX standard. RFC 822
|
||||
# calls for a two-digit yy, but RFC 2822 (which obsoletes RFC 822)
|
||||
# mandates a 4-digit yy. For more information, see the documentation for
|
||||
# the time module.
|
||||
if yy < 100:
|
||||
# The year is between 1969 and 1999 (inclusive).
|
||||
if yy > 68:
|
||||
yy += 1900
|
||||
# The year is between 2000 and 2068 (inclusive).
|
||||
else:
|
||||
yy += 2000
|
||||
tzoffset = None
|
||||
tz = tz.upper()
|
||||
if tz in _timezones:
|
||||
tzoffset = _timezones[tz]
|
||||
else:
|
||||
try:
|
||||
tzoffset = int(tz)
|
||||
except ValueError:
|
||||
pass
|
||||
if tzoffset==0 and tz.startswith('-'):
|
||||
tzoffset = None
|
||||
# Convert a timezone offset into seconds ; -0500 -> -18000
|
||||
if tzoffset:
|
||||
if tzoffset < 0:
|
||||
tzsign = -1
|
||||
tzoffset = -tzoffset
|
||||
else:
|
||||
tzsign = 1
|
||||
tzoffset = tzsign * ( (tzoffset//100)*3600 + (tzoffset % 100)*60)
|
||||
# Daylight Saving Time flag is set to -1, since DST is unknown.
|
||||
return [yy, mm, dd, thh, tmm, tss, 0, 1, -1, tzoffset]
|
||||
|
||||
|
||||
def parsedate(data):
|
||||
"""Convert a time string to a time tuple."""
|
||||
t = parsedate_tz(data)
|
||||
if isinstance(t, tuple):
|
||||
return t[:9]
|
||||
else:
|
||||
return t
|
||||
|
||||
|
||||
def mktime_tz(data):
|
||||
"""Turn a 10-tuple as returned by parsedate_tz() into a POSIX timestamp."""
|
||||
if data[9] is None:
|
||||
# No zone info, so localtime is better assumption than GMT
|
||||
return time.mktime(data[:8] + (-1,))
|
||||
else:
|
||||
t = calendar.timegm(data)
|
||||
return t - data[9]
|
||||
|
||||
|
||||
def quote(str):
|
||||
"""Prepare string to be used in a quoted string.
|
||||
|
||||
Turns backslash and double quote characters into quoted pairs. These
|
||||
are the only characters that need to be quoted inside a quoted string.
|
||||
Does not add the surrounding double quotes.
|
||||
"""
|
||||
return str.replace('\\', '\\\\').replace('"', '\\"')
|
||||
|
||||
|
||||
class AddrlistClass:
|
||||
"""Address parser class by Ben Escoto.
|
||||
|
||||
To understand what this class does, it helps to have a copy of RFC 2822 in
|
||||
front of you.
|
||||
|
||||
Note: this class interface is deprecated and may be removed in the future.
|
||||
Use email.utils.AddressList instead.
|
||||
"""
|
||||
|
||||
def __init__(self, field):
|
||||
"""Initialize a new instance.
|
||||
|
||||
`field' is an unparsed address header field, containing
|
||||
one or more addresses.
|
||||
"""
|
||||
self.specials = '()<>@,:;.\"[]'
|
||||
self.pos = 0
|
||||
self.LWS = ' \t'
|
||||
self.CR = '\r\n'
|
||||
self.FWS = self.LWS + self.CR
|
||||
self.atomends = self.specials + self.LWS + self.CR
|
||||
# Note that RFC 2822 now specifies `.' as obs-phrase, meaning that it
|
||||
# is obsolete syntax. RFC 2822 requires that we recognize obsolete
|
||||
# syntax, so allow dots in phrases.
|
||||
self.phraseends = self.atomends.replace('.', '')
|
||||
self.field = field
|
||||
self.commentlist = []
|
||||
|
||||
def gotonext(self):
|
||||
"""Skip white space and extract comments."""
|
||||
wslist = []
|
||||
while self.pos < len(self.field):
|
||||
if self.field[self.pos] in self.LWS + '\n\r':
|
||||
if self.field[self.pos] not in '\n\r':
|
||||
wslist.append(self.field[self.pos])
|
||||
self.pos += 1
|
||||
elif self.field[self.pos] == '(':
|
||||
self.commentlist.append(self.getcomment())
|
||||
else:
|
||||
break
|
||||
return EMPTYSTRING.join(wslist)
|
||||
|
||||
def getaddrlist(self):
|
||||
"""Parse all addresses.
|
||||
|
||||
Returns a list containing all of the addresses.
|
||||
"""
|
||||
result = []
|
||||
while self.pos < len(self.field):
|
||||
ad = self.getaddress()
|
||||
if ad:
|
||||
result += ad
|
||||
else:
|
||||
result.append(('', ''))
|
||||
return result
|
||||
|
||||
def getaddress(self):
|
||||
"""Parse the next address."""
|
||||
self.commentlist = []
|
||||
self.gotonext()
|
||||
|
||||
oldpos = self.pos
|
||||
oldcl = self.commentlist
|
||||
plist = self.getphraselist()
|
||||
|
||||
self.gotonext()
|
||||
returnlist = []
|
||||
|
||||
if self.pos >= len(self.field):
|
||||
# Bad email address technically, no domain.
|
||||
if plist:
|
||||
returnlist = [(SPACE.join(self.commentlist), plist[0])]
|
||||
|
||||
elif self.field[self.pos] in '.@':
|
||||
# email address is just an addrspec
|
||||
# this isn't very efficient since we start over
|
||||
self.pos = oldpos
|
||||
self.commentlist = oldcl
|
||||
addrspec = self.getaddrspec()
|
||||
returnlist = [(SPACE.join(self.commentlist), addrspec)]
|
||||
|
||||
elif self.field[self.pos] == ':':
|
||||
# address is a group
|
||||
returnlist = []
|
||||
|
||||
fieldlen = len(self.field)
|
||||
self.pos += 1
|
||||
while self.pos < len(self.field):
|
||||
self.gotonext()
|
||||
if self.pos < fieldlen and self.field[self.pos] == ';':
|
||||
self.pos += 1
|
||||
break
|
||||
returnlist = returnlist + self.getaddress()
|
||||
|
||||
elif self.field[self.pos] == '<':
|
||||
# Address is a phrase then a route addr
|
||||
routeaddr = self.getrouteaddr()
|
||||
|
||||
if self.commentlist:
|
||||
returnlist = [(SPACE.join(plist) + ' (' +
|
||||
' '.join(self.commentlist) + ')', routeaddr)]
|
||||
else:
|
||||
returnlist = [(SPACE.join(plist), routeaddr)]
|
||||
|
||||
else:
|
||||
if plist:
|
||||
returnlist = [(SPACE.join(self.commentlist), plist[0])]
|
||||
elif self.field[self.pos] in self.specials:
|
||||
self.pos += 1
|
||||
|
||||
self.gotonext()
|
||||
if self.pos < len(self.field) and self.field[self.pos] == ',':
|
||||
self.pos += 1
|
||||
return returnlist
|
||||
|
||||
def getrouteaddr(self):
|
||||
"""Parse a route address (Return-path value).
|
||||
|
||||
This method just skips all the route stuff and returns the addrspec.
|
||||
"""
|
||||
if self.field[self.pos] != '<':
|
||||
return
|
||||
|
||||
expectroute = False
|
||||
self.pos += 1
|
||||
self.gotonext()
|
||||
adlist = ''
|
||||
while self.pos < len(self.field):
|
||||
if expectroute:
|
||||
self.getdomain()
|
||||
expectroute = False
|
||||
elif self.field[self.pos] == '>':
|
||||
self.pos += 1
|
||||
break
|
||||
elif self.field[self.pos] == '@':
|
||||
self.pos += 1
|
||||
expectroute = True
|
||||
elif self.field[self.pos] == ':':
|
||||
self.pos += 1
|
||||
else:
|
||||
adlist = self.getaddrspec()
|
||||
self.pos += 1
|
||||
break
|
||||
self.gotonext()
|
||||
|
||||
return adlist
|
||||
|
||||
def getaddrspec(self):
|
||||
"""Parse an RFC 2822 addr-spec."""
|
||||
aslist = []
|
||||
|
||||
self.gotonext()
|
||||
while self.pos < len(self.field):
|
||||
preserve_ws = True
|
||||
if self.field[self.pos] == '.':
|
||||
if aslist and not aslist[-1].strip():
|
||||
aslist.pop()
|
||||
aslist.append('.')
|
||||
self.pos += 1
|
||||
preserve_ws = False
|
||||
elif self.field[self.pos] == '"':
|
||||
aslist.append('"%s"' % quote(self.getquote()))
|
||||
elif self.field[self.pos] in self.atomends:
|
||||
if aslist and not aslist[-1].strip():
|
||||
aslist.pop()
|
||||
break
|
||||
else:
|
||||
aslist.append(self.getatom())
|
||||
ws = self.gotonext()
|
||||
if preserve_ws and ws:
|
||||
aslist.append(ws)
|
||||
|
||||
if self.pos >= len(self.field) or self.field[self.pos] != '@':
|
||||
return EMPTYSTRING.join(aslist)
|
||||
|
||||
aslist.append('@')
|
||||
self.pos += 1
|
||||
self.gotonext()
|
||||
domain = self.getdomain()
|
||||
if not domain:
|
||||
# Invalid domain, return an empty address instead of returning a
|
||||
# local part to denote failed parsing.
|
||||
return EMPTYSTRING
|
||||
return EMPTYSTRING.join(aslist) + domain
|
||||
|
||||
def getdomain(self):
|
||||
"""Get the complete domain name from an address."""
|
||||
sdlist = []
|
||||
while self.pos < len(self.field):
|
||||
if self.field[self.pos] in self.LWS:
|
||||
self.pos += 1
|
||||
elif self.field[self.pos] == '(':
|
||||
self.commentlist.append(self.getcomment())
|
||||
elif self.field[self.pos] == '[':
|
||||
sdlist.append(self.getdomainliteral())
|
||||
elif self.field[self.pos] == '.':
|
||||
self.pos += 1
|
||||
sdlist.append('.')
|
||||
elif self.field[self.pos] == '@':
|
||||
# bpo-34155: Don't parse domains with two `@` like
|
||||
# `a@malicious.org@important.com`.
|
||||
return EMPTYSTRING
|
||||
elif self.field[self.pos] in self.atomends:
|
||||
break
|
||||
else:
|
||||
sdlist.append(self.getatom())
|
||||
return EMPTYSTRING.join(sdlist)
|
||||
|
||||
def getdelimited(self, beginchar, endchars, allowcomments=True):
|
||||
"""Parse a header fragment delimited by special characters.
|
||||
|
||||
`beginchar' is the start character for the fragment.
|
||||
If self is not looking at an instance of `beginchar' then
|
||||
getdelimited returns the empty string.
|
||||
|
||||
`endchars' is a sequence of allowable end-delimiting characters.
|
||||
Parsing stops when one of these is encountered.
|
||||
|
||||
If `allowcomments' is non-zero, embedded RFC 2822 comments are allowed
|
||||
within the parsed fragment.
|
||||
"""
|
||||
if self.field[self.pos] != beginchar:
|
||||
return ''
|
||||
|
||||
slist = ['']
|
||||
quote = False
|
||||
self.pos += 1
|
||||
while self.pos < len(self.field):
|
||||
if quote:
|
||||
slist.append(self.field[self.pos])
|
||||
quote = False
|
||||
elif self.field[self.pos] in endchars:
|
||||
self.pos += 1
|
||||
break
|
||||
elif allowcomments and self.field[self.pos] == '(':
|
||||
slist.append(self.getcomment())
|
||||
continue # have already advanced pos from getcomment
|
||||
elif self.field[self.pos] == '\\':
|
||||
quote = True
|
||||
else:
|
||||
slist.append(self.field[self.pos])
|
||||
self.pos += 1
|
||||
|
||||
return EMPTYSTRING.join(slist)
|
||||
|
||||
def getquote(self):
|
||||
"""Get a quote-delimited fragment from self's field."""
|
||||
return self.getdelimited('"', '"\r', False)
|
||||
|
||||
def getcomment(self):
|
||||
"""Get a parenthesis-delimited fragment from self's field."""
|
||||
return self.getdelimited('(', ')\r', True)
|
||||
|
||||
def getdomainliteral(self):
|
||||
"""Parse an RFC 2822 domain-literal."""
|
||||
return '[%s]' % self.getdelimited('[', ']\r', False)
|
||||
|
||||
def getatom(self, atomends=None):
|
||||
"""Parse an RFC 2822 atom.
|
||||
|
||||
Optional atomends specifies a different set of end token delimiters
|
||||
(the default is to use self.atomends). This is used e.g. in
|
||||
getphraselist() since phrase endings must not include the `.' (which
|
||||
is legal in phrases)."""
|
||||
atomlist = ['']
|
||||
if atomends is None:
|
||||
atomends = self.atomends
|
||||
|
||||
while self.pos < len(self.field):
|
||||
if self.field[self.pos] in atomends:
|
||||
break
|
||||
else:
|
||||
atomlist.append(self.field[self.pos])
|
||||
self.pos += 1
|
||||
|
||||
return EMPTYSTRING.join(atomlist)
|
||||
|
||||
def getphraselist(self):
|
||||
"""Parse a sequence of RFC 2822 phrases.
|
||||
|
||||
A phrase is a sequence of words, which are in turn either RFC 2822
|
||||
atoms or quoted-strings. Phrases are canonicalized by squeezing all
|
||||
runs of continuous whitespace into one space.
|
||||
"""
|
||||
plist = []
|
||||
|
||||
while self.pos < len(self.field):
|
||||
if self.field[self.pos] in self.FWS:
|
||||
self.pos += 1
|
||||
elif self.field[self.pos] == '"':
|
||||
plist.append(self.getquote())
|
||||
elif self.field[self.pos] == '(':
|
||||
self.commentlist.append(self.getcomment())
|
||||
elif self.field[self.pos] in self.phraseends:
|
||||
break
|
||||
else:
|
||||
plist.append(self.getatom(self.phraseends))
|
||||
|
||||
return plist
|
||||
|
||||
class AddressList(AddrlistClass):
|
||||
"""An AddressList encapsulates a list of parsed RFC 2822 addresses."""
|
||||
def __init__(self, field):
|
||||
AddrlistClass.__init__(self, field)
|
||||
if field:
|
||||
self.addresslist = self.getaddrlist()
|
||||
else:
|
||||
self.addresslist = []
|
||||
|
||||
def __len__(self):
|
||||
return len(self.addresslist)
|
||||
|
||||
def __add__(self, other):
|
||||
# Set union
|
||||
newaddr = AddressList(None)
|
||||
newaddr.addresslist = self.addresslist[:]
|
||||
for x in other.addresslist:
|
||||
if not x in self.addresslist:
|
||||
newaddr.addresslist.append(x)
|
||||
return newaddr
|
||||
|
||||
def __iadd__(self, other):
|
||||
# Set union, in-place
|
||||
for x in other.addresslist:
|
||||
if not x in self.addresslist:
|
||||
self.addresslist.append(x)
|
||||
return self
|
||||
|
||||
def __sub__(self, other):
|
||||
# Set difference
|
||||
newaddr = AddressList(None)
|
||||
for x in self.addresslist:
|
||||
if not x in other.addresslist:
|
||||
newaddr.addresslist.append(x)
|
||||
return newaddr
|
||||
|
||||
def __isub__(self, other):
|
||||
# Set difference, in-place
|
||||
for x in other.addresslist:
|
||||
if x in self.addresslist:
|
||||
self.addresslist.remove(x)
|
||||
return self
|
||||
|
||||
def __getitem__(self, index):
|
||||
# Make indexing, slices, and 'in' work
|
||||
return self.addresslist[index]
|
374
third_party/python/Lib/email/_policybase.py
vendored
Normal file
374
third_party/python/Lib/email/_policybase.py
vendored
Normal file
|
@ -0,0 +1,374 @@
|
|||
"""Policy framework for the email package.
|
||||
|
||||
Allows fine grained feature control of how the package parses and emits data.
|
||||
"""
|
||||
|
||||
import abc
|
||||
from email import header
|
||||
from email import charset as _charset
|
||||
from email.utils import _has_surrogates
|
||||
|
||||
__all__ = [
|
||||
'Policy',
|
||||
'Compat32',
|
||||
'compat32',
|
||||
]
|
||||
|
||||
|
||||
class _PolicyBase:
|
||||
|
||||
"""Policy Object basic framework.
|
||||
|
||||
This class is useless unless subclassed. A subclass should define
|
||||
class attributes with defaults for any values that are to be
|
||||
managed by the Policy object. The constructor will then allow
|
||||
non-default values to be set for these attributes at instance
|
||||
creation time. The instance will be callable, taking these same
|
||||
attributes keyword arguments, and returning a new instance
|
||||
identical to the called instance except for those values changed
|
||||
by the keyword arguments. Instances may be added, yielding new
|
||||
instances with any non-default values from the right hand
|
||||
operand overriding those in the left hand operand. That is,
|
||||
|
||||
A + B == A(<non-default values of B>)
|
||||
|
||||
The repr of an instance can be used to reconstruct the object
|
||||
if and only if the repr of the values can be used to reconstruct
|
||||
those values.
|
||||
|
||||
"""
|
||||
|
||||
def __init__(self, **kw):
|
||||
"""Create new Policy, possibly overriding some defaults.
|
||||
|
||||
See class docstring for a list of overridable attributes.
|
||||
|
||||
"""
|
||||
for name, value in kw.items():
|
||||
if hasattr(self, name):
|
||||
super(_PolicyBase,self).__setattr__(name, value)
|
||||
else:
|
||||
raise TypeError(
|
||||
"{!r} is an invalid keyword argument for {}".format(
|
||||
name, self.__class__.__name__))
|
||||
|
||||
def __repr__(self):
|
||||
args = [ "{}={!r}".format(name, value)
|
||||
for name, value in self.__dict__.items() ]
|
||||
return "{}({})".format(self.__class__.__name__, ', '.join(args))
|
||||
|
||||
def clone(self, **kw):
|
||||
"""Return a new instance with specified attributes changed.
|
||||
|
||||
The new instance has the same attribute values as the current object,
|
||||
except for the changes passed in as keyword arguments.
|
||||
|
||||
"""
|
||||
newpolicy = self.__class__.__new__(self.__class__)
|
||||
for attr, value in self.__dict__.items():
|
||||
object.__setattr__(newpolicy, attr, value)
|
||||
for attr, value in kw.items():
|
||||
if not hasattr(self, attr):
|
||||
raise TypeError(
|
||||
"{!r} is an invalid keyword argument for {}".format(
|
||||
attr, self.__class__.__name__))
|
||||
object.__setattr__(newpolicy, attr, value)
|
||||
return newpolicy
|
||||
|
||||
def __setattr__(self, name, value):
|
||||
if hasattr(self, name):
|
||||
msg = "{!r} object attribute {!r} is read-only"
|
||||
else:
|
||||
msg = "{!r} object has no attribute {!r}"
|
||||
raise AttributeError(msg.format(self.__class__.__name__, name))
|
||||
|
||||
def __add__(self, other):
|
||||
"""Non-default values from right operand override those from left.
|
||||
|
||||
The object returned is a new instance of the subclass.
|
||||
|
||||
"""
|
||||
return self.clone(**other.__dict__)
|
||||
|
||||
|
||||
def _append_doc(doc, added_doc):
|
||||
doc = doc.rsplit('\n', 1)[0]
|
||||
added_doc = added_doc.split('\n', 1)[1]
|
||||
return doc + '\n' + added_doc
|
||||
|
||||
def _extend_docstrings(cls):
|
||||
if cls.__doc__ and cls.__doc__.startswith('+'):
|
||||
cls.__doc__ = _append_doc(cls.__bases__[0].__doc__, cls.__doc__)
|
||||
for name, attr in cls.__dict__.items():
|
||||
if attr.__doc__ and attr.__doc__.startswith('+'):
|
||||
for c in (c for base in cls.__bases__ for c in base.mro()):
|
||||
doc = getattr(getattr(c, name), '__doc__')
|
||||
if doc:
|
||||
attr.__doc__ = _append_doc(doc, attr.__doc__)
|
||||
break
|
||||
return cls
|
||||
|
||||
|
||||
class Policy(_PolicyBase, metaclass=abc.ABCMeta):
|
||||
|
||||
r"""Controls for how messages are interpreted and formatted.
|
||||
|
||||
Most of the classes and many of the methods in the email package accept
|
||||
Policy objects as parameters. A Policy object contains a set of values and
|
||||
functions that control how input is interpreted and how output is rendered.
|
||||
For example, the parameter 'raise_on_defect' controls whether or not an RFC
|
||||
violation results in an error being raised or not, while 'max_line_length'
|
||||
controls the maximum length of output lines when a Message is serialized.
|
||||
|
||||
Any valid attribute may be overridden when a Policy is created by passing
|
||||
it as a keyword argument to the constructor. Policy objects are immutable,
|
||||
but a new Policy object can be created with only certain values changed by
|
||||
calling the Policy instance with keyword arguments. Policy objects can
|
||||
also be added, producing a new Policy object in which the non-default
|
||||
attributes set in the right hand operand overwrite those specified in the
|
||||
left operand.
|
||||
|
||||
Settable attributes:
|
||||
|
||||
raise_on_defect -- If true, then defects should be raised as errors.
|
||||
Default: False.
|
||||
|
||||
linesep -- string containing the value to use as separation
|
||||
between output lines. Default '\n'.
|
||||
|
||||
cte_type -- Type of allowed content transfer encodings
|
||||
|
||||
7bit -- ASCII only
|
||||
8bit -- Content-Transfer-Encoding: 8bit is allowed
|
||||
|
||||
Default: 8bit. Also controls the disposition of
|
||||
(RFC invalid) binary data in headers; see the
|
||||
documentation of the binary_fold method.
|
||||
|
||||
max_line_length -- maximum length of lines, excluding 'linesep',
|
||||
during serialization. None or 0 means no line
|
||||
wrapping is done. Default is 78.
|
||||
|
||||
mangle_from_ -- a flag that, when True escapes From_ lines in the
|
||||
body of the message by putting a `>' in front of
|
||||
them. This is used when the message is being
|
||||
serialized by a generator. Default: True.
|
||||
|
||||
message_factory -- the class to use to create new message objects.
|
||||
If the value is None, the default is Message.
|
||||
|
||||
"""
|
||||
|
||||
raise_on_defect = False
|
||||
linesep = '\n'
|
||||
cte_type = '8bit'
|
||||
max_line_length = 78
|
||||
mangle_from_ = False
|
||||
message_factory = None
|
||||
|
||||
def handle_defect(self, obj, defect):
|
||||
"""Based on policy, either raise defect or call register_defect.
|
||||
|
||||
handle_defect(obj, defect)
|
||||
|
||||
defect should be a Defect subclass, but in any case must be an
|
||||
Exception subclass. obj is the object on which the defect should be
|
||||
registered if it is not raised. If the raise_on_defect is True, the
|
||||
defect is raised as an error, otherwise the object and the defect are
|
||||
passed to register_defect.
|
||||
|
||||
This method is intended to be called by parsers that discover defects.
|
||||
The email package parsers always call it with Defect instances.
|
||||
|
||||
"""
|
||||
if self.raise_on_defect:
|
||||
raise defect
|
||||
self.register_defect(obj, defect)
|
||||
|
||||
def register_defect(self, obj, defect):
|
||||
"""Record 'defect' on 'obj'.
|
||||
|
||||
Called by handle_defect if raise_on_defect is False. This method is
|
||||
part of the Policy API so that Policy subclasses can implement custom
|
||||
defect handling. The default implementation calls the append method of
|
||||
the defects attribute of obj. The objects used by the email package by
|
||||
default that get passed to this method will always have a defects
|
||||
attribute with an append method.
|
||||
|
||||
"""
|
||||
obj.defects.append(defect)
|
||||
|
||||
def header_max_count(self, name):
|
||||
"""Return the maximum allowed number of headers named 'name'.
|
||||
|
||||
Called when a header is added to a Message object. If the returned
|
||||
value is not 0 or None, and there are already a number of headers with
|
||||
the name 'name' equal to the value returned, a ValueError is raised.
|
||||
|
||||
Because the default behavior of Message's __setitem__ is to append the
|
||||
value to the list of headers, it is easy to create duplicate headers
|
||||
without realizing it. This method allows certain headers to be limited
|
||||
in the number of instances of that header that may be added to a
|
||||
Message programmatically. (The limit is not observed by the parser,
|
||||
which will faithfully produce as many headers as exist in the message
|
||||
being parsed.)
|
||||
|
||||
The default implementation returns None for all header names.
|
||||
"""
|
||||
return None
|
||||
|
||||
@abc.abstractmethod
|
||||
def header_source_parse(self, sourcelines):
|
||||
"""Given a list of linesep terminated strings constituting the lines of
|
||||
a single header, return the (name, value) tuple that should be stored
|
||||
in the model. The input lines should retain their terminating linesep
|
||||
characters. The lines passed in by the email package may contain
|
||||
surrogateescaped binary data.
|
||||
"""
|
||||
raise NotImplementedError
|
||||
|
||||
@abc.abstractmethod
|
||||
def header_store_parse(self, name, value):
|
||||
"""Given the header name and the value provided by the application
|
||||
program, return the (name, value) that should be stored in the model.
|
||||
"""
|
||||
raise NotImplementedError
|
||||
|
||||
@abc.abstractmethod
|
||||
def header_fetch_parse(self, name, value):
|
||||
"""Given the header name and the value from the model, return the value
|
||||
to be returned to the application program that is requesting that
|
||||
header. The value passed in by the email package may contain
|
||||
surrogateescaped binary data if the lines were parsed by a BytesParser.
|
||||
The returned value should not contain any surrogateescaped data.
|
||||
|
||||
"""
|
||||
raise NotImplementedError
|
||||
|
||||
@abc.abstractmethod
|
||||
def fold(self, name, value):
|
||||
"""Given the header name and the value from the model, return a string
|
||||
containing linesep characters that implement the folding of the header
|
||||
according to the policy controls. The value passed in by the email
|
||||
package may contain surrogateescaped binary data if the lines were
|
||||
parsed by a BytesParser. The returned value should not contain any
|
||||
surrogateescaped data.
|
||||
|
||||
"""
|
||||
raise NotImplementedError
|
||||
|
||||
@abc.abstractmethod
|
||||
def fold_binary(self, name, value):
|
||||
"""Given the header name and the value from the model, return binary
|
||||
data containing linesep characters that implement the folding of the
|
||||
header according to the policy controls. The value passed in by the
|
||||
email package may contain surrogateescaped binary data.
|
||||
|
||||
"""
|
||||
raise NotImplementedError
|
||||
|
||||
|
||||
@_extend_docstrings
|
||||
class Compat32(Policy):
|
||||
|
||||
"""+
|
||||
This particular policy is the backward compatibility Policy. It
|
||||
replicates the behavior of the email package version 5.1.
|
||||
"""
|
||||
|
||||
mangle_from_ = True
|
||||
|
||||
def _sanitize_header(self, name, value):
|
||||
# If the header value contains surrogates, return a Header using
|
||||
# the unknown-8bit charset to encode the bytes as encoded words.
|
||||
if not isinstance(value, str):
|
||||
# Assume it is already a header object
|
||||
return value
|
||||
if _has_surrogates(value):
|
||||
return header.Header(value, charset=_charset.UNKNOWN8BIT,
|
||||
header_name=name)
|
||||
else:
|
||||
return value
|
||||
|
||||
def header_source_parse(self, sourcelines):
|
||||
"""+
|
||||
The name is parsed as everything up to the ':' and returned unmodified.
|
||||
The value is determined by stripping leading whitespace off the
|
||||
remainder of the first line, joining all subsequent lines together, and
|
||||
stripping any trailing carriage return or linefeed characters.
|
||||
|
||||
"""
|
||||
name, value = sourcelines[0].split(':', 1)
|
||||
value = value.lstrip(' \t') + ''.join(sourcelines[1:])
|
||||
return (name, value.rstrip('\r\n'))
|
||||
|
||||
def header_store_parse(self, name, value):
|
||||
"""+
|
||||
The name and value are returned unmodified.
|
||||
"""
|
||||
return (name, value)
|
||||
|
||||
def header_fetch_parse(self, name, value):
|
||||
"""+
|
||||
If the value contains binary data, it is converted into a Header object
|
||||
using the unknown-8bit charset. Otherwise it is returned unmodified.
|
||||
"""
|
||||
return self._sanitize_header(name, value)
|
||||
|
||||
def fold(self, name, value):
|
||||
"""+
|
||||
Headers are folded using the Header folding algorithm, which preserves
|
||||
existing line breaks in the value, and wraps each resulting line to the
|
||||
max_line_length. Non-ASCII binary data are CTE encoded using the
|
||||
unknown-8bit charset.
|
||||
|
||||
"""
|
||||
return self._fold(name, value, sanitize=True)
|
||||
|
||||
def fold_binary(self, name, value):
|
||||
"""+
|
||||
Headers are folded using the Header folding algorithm, which preserves
|
||||
existing line breaks in the value, and wraps each resulting line to the
|
||||
max_line_length. If cte_type is 7bit, non-ascii binary data is CTE
|
||||
encoded using the unknown-8bit charset. Otherwise the original source
|
||||
header is used, with its existing line breaks and/or binary data.
|
||||
|
||||
"""
|
||||
folded = self._fold(name, value, sanitize=self.cte_type=='7bit')
|
||||
return folded.encode('ascii', 'surrogateescape')
|
||||
|
||||
def _fold(self, name, value, sanitize):
|
||||
parts = []
|
||||
parts.append('%s: ' % name)
|
||||
if isinstance(value, str):
|
||||
if _has_surrogates(value):
|
||||
if sanitize:
|
||||
h = header.Header(value,
|
||||
charset=_charset.UNKNOWN8BIT,
|
||||
header_name=name)
|
||||
else:
|
||||
# If we have raw 8bit data in a byte string, we have no idea
|
||||
# what the encoding is. There is no safe way to split this
|
||||
# string. If it's ascii-subset, then we could do a normal
|
||||
# ascii split, but if it's multibyte then we could break the
|
||||
# string. There's no way to know so the least harm seems to
|
||||
# be to not split the string and risk it being too long.
|
||||
parts.append(value)
|
||||
h = None
|
||||
else:
|
||||
h = header.Header(value, header_name=name)
|
||||
else:
|
||||
# Assume it is a Header-like object.
|
||||
h = value
|
||||
if h is not None:
|
||||
# The Header class interprets a value of None for maxlinelen as the
|
||||
# default value of 78, as recommended by RFC 2822.
|
||||
maxlinelen = 0
|
||||
if self.max_line_length is not None:
|
||||
maxlinelen = self.max_line_length
|
||||
parts.append(h.encode(linesep=self.linesep, maxlinelen=maxlinelen))
|
||||
parts.append(self.linesep)
|
||||
return ''.join(parts)
|
||||
|
||||
|
||||
compat32 = Compat32()
|
216
third_party/python/Lib/email/architecture.rst
vendored
Normal file
216
third_party/python/Lib/email/architecture.rst
vendored
Normal file
|
@ -0,0 +1,216 @@
|
|||
:mod:`email` Package Architecture
|
||||
=================================
|
||||
|
||||
Overview
|
||||
--------
|
||||
|
||||
The email package consists of three major components:
|
||||
|
||||
Model
|
||||
An object structure that represents an email message, and provides an
|
||||
API for creating, querying, and modifying a message.
|
||||
|
||||
Parser
|
||||
Takes a sequence of characters or bytes and produces a model of the
|
||||
email message represented by those characters or bytes.
|
||||
|
||||
Generator
|
||||
Takes a model and turns it into a sequence of characters or bytes. The
|
||||
sequence can either be intended for human consumption (a printable
|
||||
unicode string) or bytes suitable for transmission over the wire. In
|
||||
the latter case all data is properly encoded using the content transfer
|
||||
encodings specified by the relevant RFCs.
|
||||
|
||||
Conceptually the package is organized around the model. The model provides both
|
||||
"external" APIs intended for use by application programs using the library,
|
||||
and "internal" APIs intended for use by the Parser and Generator components.
|
||||
This division is intentionally a bit fuzzy; the API described by this
|
||||
documentation is all a public, stable API. This allows for an application
|
||||
with special needs to implement its own parser and/or generator.
|
||||
|
||||
In addition to the three major functional components, there is a third key
|
||||
component to the architecture:
|
||||
|
||||
Policy
|
||||
An object that specifies various behavioral settings and carries
|
||||
implementations of various behavior-controlling methods.
|
||||
|
||||
The Policy framework provides a simple and convenient way to control the
|
||||
behavior of the library, making it possible for the library to be used in a
|
||||
very flexible fashion while leveraging the common code required to parse,
|
||||
represent, and generate message-like objects. For example, in addition to the
|
||||
default :rfc:`5322` email message policy, we also have a policy that manages
|
||||
HTTP headers in a fashion compliant with :rfc:`2616`. Individual policy
|
||||
controls, such as the maximum line length produced by the generator, can also
|
||||
be controlled individually to meet specialized application requirements.
|
||||
|
||||
|
||||
The Model
|
||||
---------
|
||||
|
||||
The message model is implemented by the :class:`~email.message.Message` class.
|
||||
The model divides a message into the two fundamental parts discussed by the
|
||||
RFC: the header section and the body. The `Message` object acts as a
|
||||
pseudo-dictionary of named headers. Its dictionary interface provides
|
||||
convenient access to individual headers by name. However, all headers are kept
|
||||
internally in an ordered list, so that the information about the order of the
|
||||
headers in the original message is preserved.
|
||||
|
||||
The `Message` object also has a `payload` that holds the body. A `payload` can
|
||||
be one of two things: data, or a list of `Message` objects. The latter is used
|
||||
to represent a multipart MIME message. Lists can be nested arbitrarily deeply
|
||||
in order to represent the message, with all terminal leaves having non-list
|
||||
data payloads.
|
||||
|
||||
|
||||
Message Lifecycle
|
||||
-----------------
|
||||
|
||||
The general lifecycle of a message is:
|
||||
|
||||
Creation
|
||||
A `Message` object can be created by a Parser, or it can be
|
||||
instantiated as an empty message by an application.
|
||||
|
||||
Manipulation
|
||||
The application may examine one or more headers, and/or the
|
||||
payload, and it may modify one or more headers and/or
|
||||
the payload. This may be done on the top level `Message`
|
||||
object, or on any sub-object.
|
||||
|
||||
Finalization
|
||||
The Model is converted into a unicode or binary stream,
|
||||
or the model is discarded.
|
||||
|
||||
|
||||
|
||||
Header Policy Control During Lifecycle
|
||||
--------------------------------------
|
||||
|
||||
One of the major controls exerted by the Policy is the management of headers
|
||||
during the `Message` lifecycle. Most applications don't need to be aware of
|
||||
this.
|
||||
|
||||
A header enters the model in one of two ways: via a Parser, or by being set to
|
||||
a specific value by an application program after the Model already exists.
|
||||
Similarly, a header exits the model in one of two ways: by being serialized by
|
||||
a Generator, or by being retrieved from a Model by an application program. The
|
||||
Policy object provides hooks for all four of these pathways.
|
||||
|
||||
The model storage for headers is a list of (name, value) tuples.
|
||||
|
||||
The Parser identifies headers during parsing, and passes them to the
|
||||
:meth:`~email.policy.Policy.header_source_parse` method of the Policy. The
|
||||
result of that method is the (name, value) tuple to be stored in the model.
|
||||
|
||||
When an application program supplies a header value (for example, through the
|
||||
`Message` object `__setitem__` interface), the name and the value are passed to
|
||||
the :meth:`~email.policy.Policy.header_store_parse` method of the Policy, which
|
||||
returns the (name, value) tuple to be stored in the model.
|
||||
|
||||
When an application program retrieves a header (through any of the dict or list
|
||||
interfaces of `Message`), the name and value are passed to the
|
||||
:meth:`~email.policy.Policy.header_fetch_parse` method of the Policy to
|
||||
obtain the value returned to the application.
|
||||
|
||||
When a Generator requests a header during serialization, the name and value are
|
||||
passed to the :meth:`~email.policy.Policy.fold` method of the Policy, which
|
||||
returns a string containing line breaks in the appropriate places. The
|
||||
:meth:`~email.policy.Policy.cte_type` Policy control determines whether or
|
||||
not Content Transfer Encoding is performed on the data in the header. There is
|
||||
also a :meth:`~email.policy.Policy.binary_fold` method for use by generators
|
||||
that produce binary output, which returns the folded header as binary data,
|
||||
possibly folded at different places than the corresponding string would be.
|
||||
|
||||
|
||||
Handling Binary Data
|
||||
--------------------
|
||||
|
||||
In an ideal world all message data would conform to the RFCs, meaning that the
|
||||
parser could decode the message into the idealized unicode message that the
|
||||
sender originally wrote. In the real world, the email package must also be
|
||||
able to deal with badly formatted messages, including messages containing
|
||||
non-ASCII characters that either have no indicated character set or are not
|
||||
valid characters in the indicated character set.
|
||||
|
||||
Since email messages are *primarily* text data, and operations on message data
|
||||
are primarily text operations (except for binary payloads of course), the model
|
||||
stores all text data as unicode strings. Un-decodable binary inside text
|
||||
data is handled by using the `surrogateescape` error handler of the ASCII
|
||||
codec. As with the binary filenames the error handler was introduced to
|
||||
handle, this allows the email package to "carry" the binary data received
|
||||
during parsing along until the output stage, at which time it is regenerated
|
||||
in its original form.
|
||||
|
||||
This carried binary data is almost entirely an implementation detail. The one
|
||||
place where it is visible in the API is in the "internal" API. A Parser must
|
||||
do the `surrogateescape` encoding of binary input data, and pass that data to
|
||||
the appropriate Policy method. The "internal" interface used by the Generator
|
||||
to access header values preserves the `surrogateescaped` bytes. All other
|
||||
interfaces convert the binary data either back into bytes or into a safe form
|
||||
(losing information in some cases).
|
||||
|
||||
|
||||
Backward Compatibility
|
||||
----------------------
|
||||
|
||||
The :class:`~email.policy.Policy.Compat32` Policy provides backward
|
||||
compatibility with version 5.1 of the email package. It does this via the
|
||||
following implementation of the four+1 Policy methods described above:
|
||||
|
||||
header_source_parse
|
||||
Splits the first line on the colon to obtain the name, discards any spaces
|
||||
after the colon, and joins the remainder of the line with all of the
|
||||
remaining lines, preserving the linesep characters to obtain the value.
|
||||
Trailing carriage return and/or linefeed characters are stripped from the
|
||||
resulting value string.
|
||||
|
||||
header_store_parse
|
||||
Returns the name and value exactly as received from the application.
|
||||
|
||||
header_fetch_parse
|
||||
If the value contains any `surrogateescaped` binary data, return the value
|
||||
as a :class:`~email.header.Header` object, using the character set
|
||||
`unknown-8bit`. Otherwise just returns the value.
|
||||
|
||||
fold
|
||||
Uses :class:`~email.header.Header`'s folding to fold headers in the
|
||||
same way the email5.1 generator did.
|
||||
|
||||
binary_fold
|
||||
Same as fold, but encodes to 'ascii'.
|
||||
|
||||
|
||||
New Algorithm
|
||||
-------------
|
||||
|
||||
header_source_parse
|
||||
Same as legacy behavior.
|
||||
|
||||
header_store_parse
|
||||
Same as legacy behavior.
|
||||
|
||||
header_fetch_parse
|
||||
If the value is already a header object, returns it. Otherwise, parses the
|
||||
value using the new parser, and returns the resulting object as the value.
|
||||
`surrogateescaped` bytes get turned into unicode unknown character code
|
||||
points.
|
||||
|
||||
fold
|
||||
Uses the new header folding algorithm, respecting the policy settings.
|
||||
surrogateescaped bytes are encoded using the ``unknown-8bit`` charset for
|
||||
``cte_type=7bit`` or ``8bit``. Returns a string.
|
||||
|
||||
At some point there will also be a ``cte_type=unicode``, and for that
|
||||
policy fold will serialize the idealized unicode message with RFC-like
|
||||
folding, converting any surrogateescaped bytes into the unicode
|
||||
unknown character glyph.
|
||||
|
||||
binary_fold
|
||||
Uses the new header folding algorithm, respecting the policy settings.
|
||||
surrogateescaped bytes are encoded using the `unknown-8bit` charset for
|
||||
``cte_type=7bit``, and get turned back into bytes for ``cte_type=8bit``.
|
||||
Returns bytes.
|
||||
|
||||
At some point there will also be a ``cte_type=unicode``, and for that
|
||||
policy binary_fold will serialize the message according to :rfc:``5335``.
|
119
third_party/python/Lib/email/base64mime.py
vendored
Normal file
119
third_party/python/Lib/email/base64mime.py
vendored
Normal file
|
@ -0,0 +1,119 @@
|
|||
# Copyright (C) 2002-2007 Python Software Foundation
|
||||
# Author: Ben Gertzfield
|
||||
# Contact: email-sig@python.org
|
||||
|
||||
"""Base64 content transfer encoding per RFCs 2045-2047.
|
||||
|
||||
This module handles the content transfer encoding method defined in RFC 2045
|
||||
to encode arbitrary 8-bit data using the three 8-bit bytes in four 7-bit
|
||||
characters encoding known as Base64.
|
||||
|
||||
It is used in the MIME standards for email to attach images, audio, and text
|
||||
using some 8-bit character sets to messages.
|
||||
|
||||
This module provides an interface to encode and decode both headers and bodies
|
||||
with Base64 encoding.
|
||||
|
||||
RFC 2045 defines a method for including character set information in an
|
||||
`encoded-word' in a header. This method is commonly used for 8-bit real names
|
||||
in To:, From:, Cc:, etc. fields, as well as Subject: lines.
|
||||
|
||||
This module does not do the line wrapping or end-of-line character conversion
|
||||
necessary for proper internationalized headers; it only does dumb encoding and
|
||||
decoding. To deal with the various line wrapping issues, use the email.header
|
||||
module.
|
||||
"""
|
||||
|
||||
__all__ = [
|
||||
'body_decode',
|
||||
'body_encode',
|
||||
'decode',
|
||||
'decodestring',
|
||||
'header_encode',
|
||||
'header_length',
|
||||
]
|
||||
|
||||
|
||||
from base64 import b64encode
|
||||
from binascii import b2a_base64, a2b_base64
|
||||
|
||||
CRLF = '\r\n'
|
||||
NL = '\n'
|
||||
EMPTYSTRING = ''
|
||||
|
||||
# See also Charset.py
|
||||
MISC_LEN = 7
|
||||
|
||||
|
||||
|
||||
# Helpers
|
||||
def header_length(bytearray):
|
||||
"""Return the length of s when it is encoded with base64."""
|
||||
groups_of_3, leftover = divmod(len(bytearray), 3)
|
||||
# 4 bytes out for each 3 bytes (or nonzero fraction thereof) in.
|
||||
n = groups_of_3 * 4
|
||||
if leftover:
|
||||
n += 4
|
||||
return n
|
||||
|
||||
|
||||
|
||||
def header_encode(header_bytes, charset='iso-8859-1'):
|
||||
"""Encode a single header line with Base64 encoding in a given charset.
|
||||
|
||||
charset names the character set to use to encode the header. It defaults
|
||||
to iso-8859-1. Base64 encoding is defined in RFC 2045.
|
||||
"""
|
||||
if not header_bytes:
|
||||
return ""
|
||||
if isinstance(header_bytes, str):
|
||||
header_bytes = header_bytes.encode(charset)
|
||||
encoded = b64encode(header_bytes).decode("ascii")
|
||||
return '=?%s?b?%s?=' % (charset, encoded)
|
||||
|
||||
|
||||
|
||||
def body_encode(s, maxlinelen=76, eol=NL):
|
||||
r"""Encode a string with base64.
|
||||
|
||||
Each line will be wrapped at, at most, maxlinelen characters (defaults to
|
||||
76 characters).
|
||||
|
||||
Each line of encoded text will end with eol, which defaults to "\n". Set
|
||||
this to "\r\n" if you will be using the result of this function directly
|
||||
in an email.
|
||||
"""
|
||||
if not s:
|
||||
return s
|
||||
|
||||
encvec = []
|
||||
max_unencoded = maxlinelen * 3 // 4
|
||||
for i in range(0, len(s), max_unencoded):
|
||||
# BAW: should encode() inherit b2a_base64()'s dubious behavior in
|
||||
# adding a newline to the encoded string?
|
||||
enc = b2a_base64(s[i:i + max_unencoded]).decode("ascii")
|
||||
if enc.endswith(NL) and eol != NL:
|
||||
enc = enc[:-1] + eol
|
||||
encvec.append(enc)
|
||||
return EMPTYSTRING.join(encvec)
|
||||
|
||||
|
||||
|
||||
def decode(string):
|
||||
"""Decode a raw base64 string, returning a bytes object.
|
||||
|
||||
This function does not parse a full MIME header value encoded with
|
||||
base64 (like =?iso-8859-1?b?bmloISBuaWgh?=) -- please use the high
|
||||
level email.header class for that functionality.
|
||||
"""
|
||||
if not string:
|
||||
return bytes()
|
||||
elif isinstance(string, str):
|
||||
return a2b_base64(string.encode('raw-unicode-escape'))
|
||||
else:
|
||||
return a2b_base64(string)
|
||||
|
||||
|
||||
# For convenience and backwards compatibility w/ standard base64 module
|
||||
body_decode = decode
|
||||
decodestring = decode
|
406
third_party/python/Lib/email/charset.py
vendored
Normal file
406
third_party/python/Lib/email/charset.py
vendored
Normal file
|
@ -0,0 +1,406 @@
|
|||
# Copyright (C) 2001-2007 Python Software Foundation
|
||||
# Author: Ben Gertzfield, Barry Warsaw
|
||||
# Contact: email-sig@python.org
|
||||
|
||||
__all__ = [
|
||||
'Charset',
|
||||
'add_alias',
|
||||
'add_charset',
|
||||
'add_codec',
|
||||
]
|
||||
|
||||
from functools import partial
|
||||
|
||||
import email.base64mime
|
||||
import email.quoprimime
|
||||
|
||||
from email import errors
|
||||
from email.encoders import encode_7or8bit
|
||||
|
||||
|
||||
|
||||
# Flags for types of header encodings
|
||||
QP = 1 # Quoted-Printable
|
||||
BASE64 = 2 # Base64
|
||||
SHORTEST = 3 # the shorter of QP and base64, but only for headers
|
||||
|
||||
# In "=?charset?q?hello_world?=", the =?, ?q?, and ?= add up to 7
|
||||
RFC2047_CHROME_LEN = 7
|
||||
|
||||
DEFAULT_CHARSET = 'us-ascii'
|
||||
UNKNOWN8BIT = 'unknown-8bit'
|
||||
EMPTYSTRING = ''
|
||||
|
||||
|
||||
|
||||
# Defaults
|
||||
CHARSETS = {
|
||||
# input header enc body enc output conv
|
||||
'iso-8859-1': (QP, QP, None),
|
||||
'iso-8859-2': (QP, QP, None),
|
||||
'iso-8859-3': (QP, QP, None),
|
||||
'iso-8859-4': (QP, QP, None),
|
||||
# iso-8859-5 is Cyrillic, and not especially used
|
||||
# iso-8859-6 is Arabic, also not particularly used
|
||||
# iso-8859-7 is Greek, QP will not make it readable
|
||||
# iso-8859-8 is Hebrew, QP will not make it readable
|
||||
'iso-8859-9': (QP, QP, None),
|
||||
'iso-8859-10': (QP, QP, None),
|
||||
# iso-8859-11 is Thai, QP will not make it readable
|
||||
'iso-8859-13': (QP, QP, None),
|
||||
'iso-8859-14': (QP, QP, None),
|
||||
'iso-8859-15': (QP, QP, None),
|
||||
'iso-8859-16': (QP, QP, None),
|
||||
'windows-1252':(QP, QP, None),
|
||||
'viscii': (QP, QP, None),
|
||||
'us-ascii': (None, None, None),
|
||||
'big5': (BASE64, BASE64, None),
|
||||
'gb2312': (BASE64, BASE64, None),
|
||||
'euc-jp': (BASE64, None, 'iso-2022-jp'),
|
||||
'shift_jis': (BASE64, None, 'iso-2022-jp'),
|
||||
'iso-2022-jp': (BASE64, None, None),
|
||||
'koi8-r': (BASE64, BASE64, None),
|
||||
'utf-8': (SHORTEST, BASE64, 'utf-8'),
|
||||
}
|
||||
|
||||
# Aliases for other commonly-used names for character sets. Map
|
||||
# them to the real ones used in email.
|
||||
ALIASES = {
|
||||
'latin_1': 'iso-8859-1',
|
||||
'latin-1': 'iso-8859-1',
|
||||
'latin_2': 'iso-8859-2',
|
||||
'latin-2': 'iso-8859-2',
|
||||
'latin_3': 'iso-8859-3',
|
||||
'latin-3': 'iso-8859-3',
|
||||
'latin_4': 'iso-8859-4',
|
||||
'latin-4': 'iso-8859-4',
|
||||
'latin_5': 'iso-8859-9',
|
||||
'latin-5': 'iso-8859-9',
|
||||
'latin_6': 'iso-8859-10',
|
||||
'latin-6': 'iso-8859-10',
|
||||
'latin_7': 'iso-8859-13',
|
||||
'latin-7': 'iso-8859-13',
|
||||
'latin_8': 'iso-8859-14',
|
||||
'latin-8': 'iso-8859-14',
|
||||
'latin_9': 'iso-8859-15',
|
||||
'latin-9': 'iso-8859-15',
|
||||
'latin_10':'iso-8859-16',
|
||||
'latin-10':'iso-8859-16',
|
||||
'cp949': 'ks_c_5601-1987',
|
||||
'euc_jp': 'euc-jp',
|
||||
'euc_kr': 'euc-kr',
|
||||
'ascii': 'us-ascii',
|
||||
}
|
||||
|
||||
|
||||
# Map charsets to their Unicode codec strings.
|
||||
CODEC_MAP = {
|
||||
'gb2312': 'eucgb2312_cn',
|
||||
'big5': 'big5_tw',
|
||||
# Hack: We don't want *any* conversion for stuff marked us-ascii, as all
|
||||
# sorts of garbage might be sent to us in the guise of 7-bit us-ascii.
|
||||
# Let that stuff pass through without conversion to/from Unicode.
|
||||
'us-ascii': None,
|
||||
}
|
||||
|
||||
|
||||
|
||||
# Convenience functions for extending the above mappings
|
||||
def add_charset(charset, header_enc=None, body_enc=None, output_charset=None):
|
||||
"""Add character set properties to the global registry.
|
||||
|
||||
charset is the input character set, and must be the canonical name of a
|
||||
character set.
|
||||
|
||||
Optional header_enc and body_enc is either Charset.QP for
|
||||
quoted-printable, Charset.BASE64 for base64 encoding, Charset.SHORTEST for
|
||||
the shortest of qp or base64 encoding, or None for no encoding. SHORTEST
|
||||
is only valid for header_enc. It describes how message headers and
|
||||
message bodies in the input charset are to be encoded. Default is no
|
||||
encoding.
|
||||
|
||||
Optional output_charset is the character set that the output should be
|
||||
in. Conversions will proceed from input charset, to Unicode, to the
|
||||
output charset when the method Charset.convert() is called. The default
|
||||
is to output in the same character set as the input.
|
||||
|
||||
Both input_charset and output_charset must have Unicode codec entries in
|
||||
the module's charset-to-codec mapping; use add_codec(charset, codecname)
|
||||
to add codecs the module does not know about. See the codecs module's
|
||||
documentation for more information.
|
||||
"""
|
||||
if body_enc == SHORTEST:
|
||||
raise ValueError('SHORTEST not allowed for body_enc')
|
||||
CHARSETS[charset] = (header_enc, body_enc, output_charset)
|
||||
|
||||
|
||||
def add_alias(alias, canonical):
|
||||
"""Add a character set alias.
|
||||
|
||||
alias is the alias name, e.g. latin-1
|
||||
canonical is the character set's canonical name, e.g. iso-8859-1
|
||||
"""
|
||||
ALIASES[alias] = canonical
|
||||
|
||||
|
||||
def add_codec(charset, codecname):
|
||||
"""Add a codec that map characters in the given charset to/from Unicode.
|
||||
|
||||
charset is the canonical name of a character set. codecname is the name
|
||||
of a Python codec, as appropriate for the second argument to the unicode()
|
||||
built-in, or to the encode() method of a Unicode string.
|
||||
"""
|
||||
CODEC_MAP[charset] = codecname
|
||||
|
||||
|
||||
|
||||
# Convenience function for encoding strings, taking into account
|
||||
# that they might be unknown-8bit (ie: have surrogate-escaped bytes)
|
||||
def _encode(string, codec):
|
||||
if codec == UNKNOWN8BIT:
|
||||
return string.encode('ascii', 'surrogateescape')
|
||||
else:
|
||||
return string.encode(codec)
|
||||
|
||||
|
||||
|
||||
class Charset:
|
||||
"""Map character sets to their email properties.
|
||||
|
||||
This class provides information about the requirements imposed on email
|
||||
for a specific character set. It also provides convenience routines for
|
||||
converting between character sets, given the availability of the
|
||||
applicable codecs. Given a character set, it will do its best to provide
|
||||
information on how to use that character set in an email in an
|
||||
RFC-compliant way.
|
||||
|
||||
Certain character sets must be encoded with quoted-printable or base64
|
||||
when used in email headers or bodies. Certain character sets must be
|
||||
converted outright, and are not allowed in email. Instances of this
|
||||
module expose the following information about a character set:
|
||||
|
||||
input_charset: The initial character set specified. Common aliases
|
||||
are converted to their `official' email names (e.g. latin_1
|
||||
is converted to iso-8859-1). Defaults to 7-bit us-ascii.
|
||||
|
||||
header_encoding: If the character set must be encoded before it can be
|
||||
used in an email header, this attribute will be set to
|
||||
Charset.QP (for quoted-printable), Charset.BASE64 (for
|
||||
base64 encoding), or Charset.SHORTEST for the shortest of
|
||||
QP or BASE64 encoding. Otherwise, it will be None.
|
||||
|
||||
body_encoding: Same as header_encoding, but describes the encoding for the
|
||||
mail message's body, which indeed may be different than the
|
||||
header encoding. Charset.SHORTEST is not allowed for
|
||||
body_encoding.
|
||||
|
||||
output_charset: Some character sets must be converted before they can be
|
||||
used in email headers or bodies. If the input_charset is
|
||||
one of them, this attribute will contain the name of the
|
||||
charset output will be converted to. Otherwise, it will
|
||||
be None.
|
||||
|
||||
input_codec: The name of the Python codec used to convert the
|
||||
input_charset to Unicode. If no conversion codec is
|
||||
necessary, this attribute will be None.
|
||||
|
||||
output_codec: The name of the Python codec used to convert Unicode
|
||||
to the output_charset. If no conversion codec is necessary,
|
||||
this attribute will have the same value as the input_codec.
|
||||
"""
|
||||
def __init__(self, input_charset=DEFAULT_CHARSET):
|
||||
# RFC 2046, $4.1.2 says charsets are not case sensitive. We coerce to
|
||||
# unicode because its .lower() is locale insensitive. If the argument
|
||||
# is already a unicode, we leave it at that, but ensure that the
|
||||
# charset is ASCII, as the standard (RFC XXX) requires.
|
||||
try:
|
||||
if isinstance(input_charset, str):
|
||||
input_charset.encode('ascii')
|
||||
else:
|
||||
input_charset = str(input_charset, 'ascii')
|
||||
except UnicodeError:
|
||||
raise errors.CharsetError(input_charset)
|
||||
input_charset = input_charset.lower()
|
||||
# Set the input charset after filtering through the aliases
|
||||
self.input_charset = ALIASES.get(input_charset, input_charset)
|
||||
# We can try to guess which encoding and conversion to use by the
|
||||
# charset_map dictionary. Try that first, but let the user override
|
||||
# it.
|
||||
henc, benc, conv = CHARSETS.get(self.input_charset,
|
||||
(SHORTEST, BASE64, None))
|
||||
if not conv:
|
||||
conv = self.input_charset
|
||||
# Set the attributes, allowing the arguments to override the default.
|
||||
self.header_encoding = henc
|
||||
self.body_encoding = benc
|
||||
self.output_charset = ALIASES.get(conv, conv)
|
||||
# Now set the codecs. If one isn't defined for input_charset,
|
||||
# guess and try a Unicode codec with the same name as input_codec.
|
||||
self.input_codec = CODEC_MAP.get(self.input_charset,
|
||||
self.input_charset)
|
||||
self.output_codec = CODEC_MAP.get(self.output_charset,
|
||||
self.output_charset)
|
||||
|
||||
def __str__(self):
|
||||
return self.input_charset.lower()
|
||||
|
||||
__repr__ = __str__
|
||||
|
||||
def __eq__(self, other):
|
||||
return str(self) == str(other).lower()
|
||||
|
||||
def get_body_encoding(self):
|
||||
"""Return the content-transfer-encoding used for body encoding.
|
||||
|
||||
This is either the string `quoted-printable' or `base64' depending on
|
||||
the encoding used, or it is a function in which case you should call
|
||||
the function with a single argument, the Message object being
|
||||
encoded. The function should then set the Content-Transfer-Encoding
|
||||
header itself to whatever is appropriate.
|
||||
|
||||
Returns "quoted-printable" if self.body_encoding is QP.
|
||||
Returns "base64" if self.body_encoding is BASE64.
|
||||
Returns conversion function otherwise.
|
||||
"""
|
||||
assert self.body_encoding != SHORTEST
|
||||
if self.body_encoding == QP:
|
||||
return 'quoted-printable'
|
||||
elif self.body_encoding == BASE64:
|
||||
return 'base64'
|
||||
else:
|
||||
return encode_7or8bit
|
||||
|
||||
def get_output_charset(self):
|
||||
"""Return the output character set.
|
||||
|
||||
This is self.output_charset if that is not None, otherwise it is
|
||||
self.input_charset.
|
||||
"""
|
||||
return self.output_charset or self.input_charset
|
||||
|
||||
def header_encode(self, string):
|
||||
"""Header-encode a string by converting it first to bytes.
|
||||
|
||||
The type of encoding (base64 or quoted-printable) will be based on
|
||||
this charset's `header_encoding`.
|
||||
|
||||
:param string: A unicode string for the header. It must be possible
|
||||
to encode this string to bytes using the character set's
|
||||
output codec.
|
||||
:return: The encoded string, with RFC 2047 chrome.
|
||||
"""
|
||||
codec = self.output_codec or 'us-ascii'
|
||||
header_bytes = _encode(string, codec)
|
||||
# 7bit/8bit encodings return the string unchanged (modulo conversions)
|
||||
encoder_module = self._get_encoder(header_bytes)
|
||||
if encoder_module is None:
|
||||
return string
|
||||
return encoder_module.header_encode(header_bytes, codec)
|
||||
|
||||
def header_encode_lines(self, string, maxlengths):
|
||||
"""Header-encode a string by converting it first to bytes.
|
||||
|
||||
This is similar to `header_encode()` except that the string is fit
|
||||
into maximum line lengths as given by the argument.
|
||||
|
||||
:param string: A unicode string for the header. It must be possible
|
||||
to encode this string to bytes using the character set's
|
||||
output codec.
|
||||
:param maxlengths: Maximum line length iterator. Each element
|
||||
returned from this iterator will provide the next maximum line
|
||||
length. This parameter is used as an argument to built-in next()
|
||||
and should never be exhausted. The maximum line lengths should
|
||||
not count the RFC 2047 chrome. These line lengths are only a
|
||||
hint; the splitter does the best it can.
|
||||
:return: Lines of encoded strings, each with RFC 2047 chrome.
|
||||
"""
|
||||
# See which encoding we should use.
|
||||
codec = self.output_codec or 'us-ascii'
|
||||
header_bytes = _encode(string, codec)
|
||||
encoder_module = self._get_encoder(header_bytes)
|
||||
encoder = partial(encoder_module.header_encode, charset=codec)
|
||||
# Calculate the number of characters that the RFC 2047 chrome will
|
||||
# contribute to each line.
|
||||
charset = self.get_output_charset()
|
||||
extra = len(charset) + RFC2047_CHROME_LEN
|
||||
# Now comes the hard part. We must encode bytes but we can't split on
|
||||
# bytes because some character sets are variable length and each
|
||||
# encoded word must stand on its own. So the problem is you have to
|
||||
# encode to bytes to figure out this word's length, but you must split
|
||||
# on characters. This causes two problems: first, we don't know how
|
||||
# many octets a specific substring of unicode characters will get
|
||||
# encoded to, and second, we don't know how many ASCII characters
|
||||
# those octets will get encoded to. Unless we try it. Which seems
|
||||
# inefficient. In the interest of being correct rather than fast (and
|
||||
# in the hope that there will be few encoded headers in any such
|
||||
# message), brute force it. :(
|
||||
lines = []
|
||||
current_line = []
|
||||
maxlen = next(maxlengths) - extra
|
||||
for character in string:
|
||||
current_line.append(character)
|
||||
this_line = EMPTYSTRING.join(current_line)
|
||||
length = encoder_module.header_length(_encode(this_line, charset))
|
||||
if length > maxlen:
|
||||
# This last character doesn't fit so pop it off.
|
||||
current_line.pop()
|
||||
# Does nothing fit on the first line?
|
||||
if not lines and not current_line:
|
||||
lines.append(None)
|
||||
else:
|
||||
separator = (' ' if lines else '')
|
||||
joined_line = EMPTYSTRING.join(current_line)
|
||||
header_bytes = _encode(joined_line, codec)
|
||||
lines.append(encoder(header_bytes))
|
||||
current_line = [character]
|
||||
maxlen = next(maxlengths) - extra
|
||||
joined_line = EMPTYSTRING.join(current_line)
|
||||
header_bytes = _encode(joined_line, codec)
|
||||
lines.append(encoder(header_bytes))
|
||||
return lines
|
||||
|
||||
def _get_encoder(self, header_bytes):
|
||||
if self.header_encoding == BASE64:
|
||||
return email.base64mime
|
||||
elif self.header_encoding == QP:
|
||||
return email.quoprimime
|
||||
elif self.header_encoding == SHORTEST:
|
||||
len64 = email.base64mime.header_length(header_bytes)
|
||||
lenqp = email.quoprimime.header_length(header_bytes)
|
||||
if len64 < lenqp:
|
||||
return email.base64mime
|
||||
else:
|
||||
return email.quoprimime
|
||||
else:
|
||||
return None
|
||||
|
||||
def body_encode(self, string):
|
||||
"""Body-encode a string by converting it first to bytes.
|
||||
|
||||
The type of encoding (base64 or quoted-printable) will be based on
|
||||
self.body_encoding. If body_encoding is None, we assume the
|
||||
output charset is a 7bit encoding, so re-encoding the decoded
|
||||
string using the ascii codec produces the correct string version
|
||||
of the content.
|
||||
"""
|
||||
if not string:
|
||||
return string
|
||||
if self.body_encoding is BASE64:
|
||||
if isinstance(string, str):
|
||||
string = string.encode(self.output_charset)
|
||||
return email.base64mime.body_encode(string)
|
||||
elif self.body_encoding is QP:
|
||||
# quopromime.body_encode takes a string, but operates on it as if
|
||||
# it were a list of byte codes. For a (minimal) history on why
|
||||
# this is so, see changeset 0cf700464177. To correctly encode a
|
||||
# character set, then, we must turn it into pseudo bytes via the
|
||||
# latin1 charset, which will encode any byte as a single code point
|
||||
# between 0 and 255, which is what body_encode is expecting.
|
||||
if isinstance(string, str):
|
||||
string = string.encode(self.output_charset)
|
||||
string = string.decode('latin1')
|
||||
return email.quoprimime.body_encode(string)
|
||||
else:
|
||||
if isinstance(string, str):
|
||||
string = string.encode(self.output_charset).decode('ascii')
|
||||
return string
|
250
third_party/python/Lib/email/contentmanager.py
vendored
Normal file
250
third_party/python/Lib/email/contentmanager.py
vendored
Normal file
|
@ -0,0 +1,250 @@
|
|||
import binascii
|
||||
import email.charset
|
||||
import email.message
|
||||
import email.errors
|
||||
from email import quoprimime
|
||||
|
||||
class ContentManager:
|
||||
|
||||
def __init__(self):
|
||||
self.get_handlers = {}
|
||||
self.set_handlers = {}
|
||||
|
||||
def add_get_handler(self, key, handler):
|
||||
self.get_handlers[key] = handler
|
||||
|
||||
def get_content(self, msg, *args, **kw):
|
||||
content_type = msg.get_content_type()
|
||||
if content_type in self.get_handlers:
|
||||
return self.get_handlers[content_type](msg, *args, **kw)
|
||||
maintype = msg.get_content_maintype()
|
||||
if maintype in self.get_handlers:
|
||||
return self.get_handlers[maintype](msg, *args, **kw)
|
||||
if '' in self.get_handlers:
|
||||
return self.get_handlers[''](msg, *args, **kw)
|
||||
raise KeyError(content_type)
|
||||
|
||||
def add_set_handler(self, typekey, handler):
|
||||
self.set_handlers[typekey] = handler
|
||||
|
||||
def set_content(self, msg, obj, *args, **kw):
|
||||
if msg.get_content_maintype() == 'multipart':
|
||||
# XXX: is this error a good idea or not? We can remove it later,
|
||||
# but we can't add it later, so do it for now.
|
||||
raise TypeError("set_content not valid on multipart")
|
||||
handler = self._find_set_handler(msg, obj)
|
||||
msg.clear_content()
|
||||
handler(msg, obj, *args, **kw)
|
||||
|
||||
def _find_set_handler(self, msg, obj):
|
||||
full_path_for_error = None
|
||||
for typ in type(obj).__mro__:
|
||||
if typ in self.set_handlers:
|
||||
return self.set_handlers[typ]
|
||||
qname = typ.__qualname__
|
||||
modname = getattr(typ, '__module__', '')
|
||||
full_path = '.'.join((modname, qname)) if modname else qname
|
||||
if full_path_for_error is None:
|
||||
full_path_for_error = full_path
|
||||
if full_path in self.set_handlers:
|
||||
return self.set_handlers[full_path]
|
||||
if qname in self.set_handlers:
|
||||
return self.set_handlers[qname]
|
||||
name = typ.__name__
|
||||
if name in self.set_handlers:
|
||||
return self.set_handlers[name]
|
||||
if None in self.set_handlers:
|
||||
return self.set_handlers[None]
|
||||
raise KeyError(full_path_for_error)
|
||||
|
||||
|
||||
raw_data_manager = ContentManager()
|
||||
|
||||
|
||||
def get_text_content(msg, errors='replace'):
|
||||
content = msg.get_payload(decode=True)
|
||||
charset = msg.get_param('charset', 'ASCII')
|
||||
return content.decode(charset, errors=errors)
|
||||
raw_data_manager.add_get_handler('text', get_text_content)
|
||||
|
||||
|
||||
def get_non_text_content(msg):
|
||||
return msg.get_payload(decode=True)
|
||||
for maintype in 'audio image video application'.split():
|
||||
raw_data_manager.add_get_handler(maintype, get_non_text_content)
|
||||
|
||||
|
||||
def get_message_content(msg):
|
||||
return msg.get_payload(0)
|
||||
for subtype in 'rfc822 external-body'.split():
|
||||
raw_data_manager.add_get_handler('message/'+subtype, get_message_content)
|
||||
|
||||
|
||||
def get_and_fixup_unknown_message_content(msg):
|
||||
# If we don't understand a message subtype, we are supposed to treat it as
|
||||
# if it were application/octet-stream, per
|
||||
# tools.ietf.org/html/rfc2046#section-5.2.4. Feedparser doesn't do that,
|
||||
# so do our best to fix things up. Note that it is *not* appropriate to
|
||||
# model message/partial content as Message objects, so they are handled
|
||||
# here as well. (How to reassemble them is out of scope for this comment :)
|
||||
return bytes(msg.get_payload(0))
|
||||
raw_data_manager.add_get_handler('message',
|
||||
get_and_fixup_unknown_message_content)
|
||||
|
||||
|
||||
def _prepare_set(msg, maintype, subtype, headers):
|
||||
msg['Content-Type'] = '/'.join((maintype, subtype))
|
||||
if headers:
|
||||
if not hasattr(headers[0], 'name'):
|
||||
mp = msg.policy
|
||||
headers = [mp.header_factory(*mp.header_source_parse([header]))
|
||||
for header in headers]
|
||||
try:
|
||||
for header in headers:
|
||||
if header.defects:
|
||||
raise header.defects[0]
|
||||
msg[header.name] = header
|
||||
except email.errors.HeaderDefect as exc:
|
||||
raise ValueError("Invalid header: {}".format(
|
||||
header.fold(policy=msg.policy))) from exc
|
||||
|
||||
|
||||
def _finalize_set(msg, disposition, filename, cid, params):
|
||||
if disposition is None and filename is not None:
|
||||
disposition = 'attachment'
|
||||
if disposition is not None:
|
||||
msg['Content-Disposition'] = disposition
|
||||
if filename is not None:
|
||||
msg.set_param('filename',
|
||||
filename,
|
||||
header='Content-Disposition',
|
||||
replace=True)
|
||||
if cid is not None:
|
||||
msg['Content-ID'] = cid
|
||||
if params is not None:
|
||||
for key, value in params.items():
|
||||
msg.set_param(key, value)
|
||||
|
||||
|
||||
# XXX: This is a cleaned-up version of base64mime.body_encode (including a bug
|
||||
# fix in the calculation of unencoded_bytes_per_line). It would be nice to
|
||||
# drop both this and quoprimime.body_encode in favor of enhanced binascii
|
||||
# routines that accepted a max_line_length parameter.
|
||||
def _encode_base64(data, max_line_length):
|
||||
encoded_lines = []
|
||||
unencoded_bytes_per_line = max_line_length // 4 * 3
|
||||
for i in range(0, len(data), unencoded_bytes_per_line):
|
||||
thisline = data[i:i+unencoded_bytes_per_line]
|
||||
encoded_lines.append(binascii.b2a_base64(thisline).decode('ascii'))
|
||||
return ''.join(encoded_lines)
|
||||
|
||||
|
||||
def _encode_text(string, charset, cte, policy):
|
||||
lines = string.encode(charset).splitlines()
|
||||
linesep = policy.linesep.encode('ascii')
|
||||
def embedded_body(lines): return linesep.join(lines) + linesep
|
||||
def normal_body(lines): return b'\n'.join(lines) + b'\n'
|
||||
if cte==None:
|
||||
# Use heuristics to decide on the "best" encoding.
|
||||
try:
|
||||
return '7bit', normal_body(lines).decode('ascii')
|
||||
except UnicodeDecodeError:
|
||||
pass
|
||||
if (policy.cte_type == '8bit' and
|
||||
max(len(x) for x in lines) <= policy.max_line_length):
|
||||
return '8bit', normal_body(lines).decode('ascii', 'surrogateescape')
|
||||
sniff = embedded_body(lines[:10])
|
||||
sniff_qp = quoprimime.body_encode(sniff.decode('latin-1'),
|
||||
policy.max_line_length)
|
||||
sniff_base64 = binascii.b2a_base64(sniff)
|
||||
# This is a little unfair to qp; it includes lineseps, base64 doesn't.
|
||||
if len(sniff_qp) > len(sniff_base64):
|
||||
cte = 'base64'
|
||||
else:
|
||||
cte = 'quoted-printable'
|
||||
if len(lines) <= 10:
|
||||
return cte, sniff_qp
|
||||
if cte == '7bit':
|
||||
data = normal_body(lines).decode('ascii')
|
||||
elif cte == '8bit':
|
||||
data = normal_body(lines).decode('ascii', 'surrogateescape')
|
||||
elif cte == 'quoted-printable':
|
||||
data = quoprimime.body_encode(normal_body(lines).decode('latin-1'),
|
||||
policy.max_line_length)
|
||||
elif cte == 'base64':
|
||||
data = _encode_base64(embedded_body(lines), policy.max_line_length)
|
||||
else:
|
||||
raise ValueError("Unknown content transfer encoding {}".format(cte))
|
||||
return cte, data
|
||||
|
||||
|
||||
def set_text_content(msg, string, subtype="plain", charset='utf-8', cte=None,
|
||||
disposition=None, filename=None, cid=None,
|
||||
params=None, headers=None):
|
||||
_prepare_set(msg, 'text', subtype, headers)
|
||||
cte, payload = _encode_text(string, charset, cte, msg.policy)
|
||||
msg.set_payload(payload)
|
||||
msg.set_param('charset',
|
||||
email.charset.ALIASES.get(charset, charset),
|
||||
replace=True)
|
||||
msg['Content-Transfer-Encoding'] = cte
|
||||
_finalize_set(msg, disposition, filename, cid, params)
|
||||
raw_data_manager.add_set_handler(str, set_text_content)
|
||||
|
||||
|
||||
def set_message_content(msg, message, subtype="rfc822", cte=None,
|
||||
disposition=None, filename=None, cid=None,
|
||||
params=None, headers=None):
|
||||
if subtype == 'partial':
|
||||
raise ValueError("message/partial is not supported for Message objects")
|
||||
if subtype == 'rfc822':
|
||||
if cte not in (None, '7bit', '8bit', 'binary'):
|
||||
# http://tools.ietf.org/html/rfc2046#section-5.2.1 mandate.
|
||||
raise ValueError(
|
||||
"message/rfc822 parts do not support cte={}".format(cte))
|
||||
# 8bit will get coerced on serialization if policy.cte_type='7bit'. We
|
||||
# may end up claiming 8bit when it isn't needed, but the only negative
|
||||
# result of that should be a gateway that needs to coerce to 7bit
|
||||
# having to look through the whole embedded message to discover whether
|
||||
# or not it actually has to do anything.
|
||||
cte = '8bit' if cte is None else cte
|
||||
elif subtype == 'external-body':
|
||||
if cte not in (None, '7bit'):
|
||||
# http://tools.ietf.org/html/rfc2046#section-5.2.3 mandate.
|
||||
raise ValueError(
|
||||
"message/external-body parts do not support cte={}".format(cte))
|
||||
cte = '7bit'
|
||||
elif cte is None:
|
||||
# http://tools.ietf.org/html/rfc2046#section-5.2.4 says all future
|
||||
# subtypes should be restricted to 7bit, so assume that.
|
||||
cte = '7bit'
|
||||
_prepare_set(msg, 'message', subtype, headers)
|
||||
msg.set_payload([message])
|
||||
msg['Content-Transfer-Encoding'] = cte
|
||||
_finalize_set(msg, disposition, filename, cid, params)
|
||||
raw_data_manager.add_set_handler(email.message.Message, set_message_content)
|
||||
|
||||
|
||||
def set_bytes_content(msg, data, maintype, subtype, cte='base64',
|
||||
disposition=None, filename=None, cid=None,
|
||||
params=None, headers=None):
|
||||
_prepare_set(msg, maintype, subtype, headers)
|
||||
if cte == 'base64':
|
||||
data = _encode_base64(data, max_line_length=msg.policy.max_line_length)
|
||||
elif cte == 'quoted-printable':
|
||||
# XXX: quoprimime.body_encode won't encode newline characters in data,
|
||||
# so we can't use it. This means max_line_length is ignored. Another
|
||||
# bug to fix later. (Note: encoders.quopri is broken on line ends.)
|
||||
data = binascii.b2a_qp(data, istext=False, header=False, quotetabs=True)
|
||||
data = data.decode('ascii')
|
||||
elif cte == '7bit':
|
||||
# Make sure it really is only ASCII. The early warning here seems
|
||||
# worth the overhead...if you care write your own content manager :).
|
||||
data.encode('ascii')
|
||||
elif cte in ('8bit', 'binary'):
|
||||
data = data.decode('ascii', 'surrogateescape')
|
||||
msg.set_payload(data)
|
||||
msg['Content-Transfer-Encoding'] = cte
|
||||
_finalize_set(msg, disposition, filename, cid, params)
|
||||
for typ in (bytes, bytearray, memoryview):
|
||||
raw_data_manager.add_set_handler(typ, set_bytes_content)
|
69
third_party/python/Lib/email/encoders.py
vendored
Normal file
69
third_party/python/Lib/email/encoders.py
vendored
Normal file
|
@ -0,0 +1,69 @@
|
|||
# Copyright (C) 2001-2006 Python Software Foundation
|
||||
# Author: Barry Warsaw
|
||||
# Contact: email-sig@python.org
|
||||
|
||||
"""Encodings and related functions."""
|
||||
|
||||
__all__ = [
|
||||
'encode_7or8bit',
|
||||
'encode_base64',
|
||||
'encode_noop',
|
||||
'encode_quopri',
|
||||
]
|
||||
|
||||
|
||||
from base64 import encodebytes as _bencode
|
||||
from quopri import encodestring as _encodestring
|
||||
|
||||
|
||||
|
||||
def _qencode(s):
|
||||
enc = _encodestring(s, quotetabs=True)
|
||||
# Must encode spaces, which quopri.encodestring() doesn't do
|
||||
return enc.replace(b' ', b'=20')
|
||||
|
||||
|
||||
def encode_base64(msg):
|
||||
"""Encode the message's payload in Base64.
|
||||
|
||||
Also, add an appropriate Content-Transfer-Encoding header.
|
||||
"""
|
||||
orig = msg.get_payload(decode=True)
|
||||
encdata = str(_bencode(orig), 'ascii')
|
||||
msg.set_payload(encdata)
|
||||
msg['Content-Transfer-Encoding'] = 'base64'
|
||||
|
||||
|
||||
|
||||
def encode_quopri(msg):
|
||||
"""Encode the message's payload in quoted-printable.
|
||||
|
||||
Also, add an appropriate Content-Transfer-Encoding header.
|
||||
"""
|
||||
orig = msg.get_payload(decode=True)
|
||||
encdata = _qencode(orig)
|
||||
msg.set_payload(encdata)
|
||||
msg['Content-Transfer-Encoding'] = 'quoted-printable'
|
||||
|
||||
|
||||
|
||||
def encode_7or8bit(msg):
|
||||
"""Set the Content-Transfer-Encoding header to 7bit or 8bit."""
|
||||
orig = msg.get_payload(decode=True)
|
||||
if orig is None:
|
||||
# There's no payload. For backwards compatibility we use 7bit
|
||||
msg['Content-Transfer-Encoding'] = '7bit'
|
||||
return
|
||||
# We play a trick to make this go fast. If decoding from ASCII succeeds,
|
||||
# we know the data must be 7bit, otherwise treat it as 8bit.
|
||||
try:
|
||||
orig.decode('ascii')
|
||||
except UnicodeError:
|
||||
msg['Content-Transfer-Encoding'] = '8bit'
|
||||
else:
|
||||
msg['Content-Transfer-Encoding'] = '7bit'
|
||||
|
||||
|
||||
|
||||
def encode_noop(msg):
|
||||
"""Do nothing."""
|
110
third_party/python/Lib/email/errors.py
vendored
Normal file
110
third_party/python/Lib/email/errors.py
vendored
Normal file
|
@ -0,0 +1,110 @@
|
|||
# Copyright (C) 2001-2006 Python Software Foundation
|
||||
# Author: Barry Warsaw
|
||||
# Contact: email-sig@python.org
|
||||
|
||||
"""email package exception classes."""
|
||||
|
||||
|
||||
class MessageError(Exception):
|
||||
"""Base class for errors in the email package."""
|
||||
|
||||
|
||||
class MessageParseError(MessageError):
|
||||
"""Base class for message parsing errors."""
|
||||
|
||||
|
||||
class HeaderParseError(MessageParseError):
|
||||
"""Error while parsing headers."""
|
||||
|
||||
|
||||
class BoundaryError(MessageParseError):
|
||||
"""Couldn't find terminating boundary."""
|
||||
|
||||
|
||||
class MultipartConversionError(MessageError, TypeError):
|
||||
"""Conversion to a multipart is prohibited."""
|
||||
|
||||
|
||||
class CharsetError(MessageError):
|
||||
"""An illegal charset was given."""
|
||||
|
||||
|
||||
# These are parsing defects which the parser was able to work around.
|
||||
class MessageDefect(ValueError):
|
||||
"""Base class for a message defect."""
|
||||
|
||||
def __init__(self, line=None):
|
||||
if line is not None:
|
||||
super().__init__(line)
|
||||
self.line = line
|
||||
|
||||
class NoBoundaryInMultipartDefect(MessageDefect):
|
||||
"""A message claimed to be a multipart but had no boundary parameter."""
|
||||
|
||||
class StartBoundaryNotFoundDefect(MessageDefect):
|
||||
"""The claimed start boundary was never found."""
|
||||
|
||||
class CloseBoundaryNotFoundDefect(MessageDefect):
|
||||
"""A start boundary was found, but not the corresponding close boundary."""
|
||||
|
||||
class FirstHeaderLineIsContinuationDefect(MessageDefect):
|
||||
"""A message had a continuation line as its first header line."""
|
||||
|
||||
class MisplacedEnvelopeHeaderDefect(MessageDefect):
|
||||
"""A 'Unix-from' header was found in the middle of a header block."""
|
||||
|
||||
class MissingHeaderBodySeparatorDefect(MessageDefect):
|
||||
"""Found line with no leading whitespace and no colon before blank line."""
|
||||
# XXX: backward compatibility, just in case (it was never emitted).
|
||||
MalformedHeaderDefect = MissingHeaderBodySeparatorDefect
|
||||
|
||||
class MultipartInvariantViolationDefect(MessageDefect):
|
||||
"""A message claimed to be a multipart but no subparts were found."""
|
||||
|
||||
class InvalidMultipartContentTransferEncodingDefect(MessageDefect):
|
||||
"""An invalid content transfer encoding was set on the multipart itself."""
|
||||
|
||||
class UndecodableBytesDefect(MessageDefect):
|
||||
"""Header contained bytes that could not be decoded"""
|
||||
|
||||
class InvalidBase64PaddingDefect(MessageDefect):
|
||||
"""base64 encoded sequence had an incorrect length"""
|
||||
|
||||
class InvalidBase64CharactersDefect(MessageDefect):
|
||||
"""base64 encoded sequence had characters not in base64 alphabet"""
|
||||
|
||||
class InvalidBase64LengthDefect(MessageDefect):
|
||||
"""base64 encoded sequence had invalid length (1 mod 4)"""
|
||||
|
||||
# These errors are specific to header parsing.
|
||||
|
||||
class HeaderDefect(MessageDefect):
|
||||
"""Base class for a header defect."""
|
||||
|
||||
def __init__(self, *args, **kw):
|
||||
super().__init__(*args, **kw)
|
||||
|
||||
class InvalidHeaderDefect(HeaderDefect):
|
||||
"""Header is not valid, message gives details."""
|
||||
|
||||
class HeaderMissingRequiredValue(HeaderDefect):
|
||||
"""A header that must have a value had none"""
|
||||
|
||||
class NonPrintableDefect(HeaderDefect):
|
||||
"""ASCII characters outside the ascii-printable range found"""
|
||||
|
||||
def __init__(self, non_printables):
|
||||
super().__init__(non_printables)
|
||||
self.non_printables = non_printables
|
||||
|
||||
def __str__(self):
|
||||
return ("the following ASCII non-printables found in header: "
|
||||
"{}".format(self.non_printables))
|
||||
|
||||
class ObsoleteHeaderDefect(HeaderDefect):
|
||||
"""Header uses syntax declared obsolete by RFC 5322"""
|
||||
|
||||
class NonASCIILocalPartDefect(HeaderDefect):
|
||||
"""local_part contains non-ASCII characters"""
|
||||
# This defect only occurs during unicode parsing, not when
|
||||
# parsing messages decoded from binary.
|
536
third_party/python/Lib/email/feedparser.py
vendored
Normal file
536
third_party/python/Lib/email/feedparser.py
vendored
Normal file
|
@ -0,0 +1,536 @@
|
|||
# Copyright (C) 2004-2006 Python Software Foundation
|
||||
# Authors: Baxter, Wouters and Warsaw
|
||||
# Contact: email-sig@python.org
|
||||
|
||||
"""FeedParser - An email feed parser.
|
||||
|
||||
The feed parser implements an interface for incrementally parsing an email
|
||||
message, line by line. This has advantages for certain applications, such as
|
||||
those reading email messages off a socket.
|
||||
|
||||
FeedParser.feed() is the primary interface for pushing new data into the
|
||||
parser. It returns when there's nothing more it can do with the available
|
||||
data. When you have no more data to push into the parser, call .close().
|
||||
This completes the parsing and returns the root message object.
|
||||
|
||||
The other advantage of this parser is that it will never raise a parsing
|
||||
exception. Instead, when it finds something unexpected, it adds a 'defect' to
|
||||
the current message. Defects are just instances that live on the message
|
||||
object's .defects attribute.
|
||||
"""
|
||||
|
||||
__all__ = ['FeedParser', 'BytesFeedParser']
|
||||
|
||||
import re
|
||||
|
||||
from email import errors
|
||||
from email._policybase import compat32
|
||||
from collections import deque
|
||||
from io import StringIO
|
||||
|
||||
NLCRE = re.compile(r'\r\n|\r|\n')
|
||||
NLCRE_bol = re.compile(r'(\r\n|\r|\n)')
|
||||
NLCRE_eol = re.compile(r'(\r\n|\r|\n)\Z')
|
||||
NLCRE_crack = re.compile(r'(\r\n|\r|\n)')
|
||||
# RFC 2822 $3.6.8 Optional fields. ftext is %d33-57 / %d59-126, Any character
|
||||
# except controls, SP, and ":".
|
||||
headerRE = re.compile(r'^(From |[\041-\071\073-\176]*:|[\t ])')
|
||||
EMPTYSTRING = ''
|
||||
NL = '\n'
|
||||
|
||||
NeedMoreData = object()
|
||||
|
||||
|
||||
|
||||
class BufferedSubFile(object):
|
||||
"""A file-ish object that can have new data loaded into it.
|
||||
|
||||
You can also push and pop line-matching predicates onto a stack. When the
|
||||
current predicate matches the current line, a false EOF response
|
||||
(i.e. empty string) is returned instead. This lets the parser adhere to a
|
||||
simple abstraction -- it parses until EOF closes the current message.
|
||||
"""
|
||||
def __init__(self):
|
||||
# Text stream of the last partial line pushed into this object.
|
||||
# See issue 22233 for why this is a text stream and not a list.
|
||||
self._partial = StringIO(newline='')
|
||||
# A deque of full, pushed lines
|
||||
self._lines = deque()
|
||||
# The stack of false-EOF checking predicates.
|
||||
self._eofstack = []
|
||||
# A flag indicating whether the file has been closed or not.
|
||||
self._closed = False
|
||||
|
||||
def push_eof_matcher(self, pred):
|
||||
self._eofstack.append(pred)
|
||||
|
||||
def pop_eof_matcher(self):
|
||||
return self._eofstack.pop()
|
||||
|
||||
def close(self):
|
||||
# Don't forget any trailing partial line.
|
||||
self._partial.seek(0)
|
||||
self.pushlines(self._partial.readlines())
|
||||
self._partial.seek(0)
|
||||
self._partial.truncate()
|
||||
self._closed = True
|
||||
|
||||
def readline(self):
|
||||
if not self._lines:
|
||||
if self._closed:
|
||||
return ''
|
||||
return NeedMoreData
|
||||
# Pop the line off the stack and see if it matches the current
|
||||
# false-EOF predicate.
|
||||
line = self._lines.popleft()
|
||||
# RFC 2046, section 5.1.2 requires us to recognize outer level
|
||||
# boundaries at any level of inner nesting. Do this, but be sure it's
|
||||
# in the order of most to least nested.
|
||||
for ateof in reversed(self._eofstack):
|
||||
if ateof(line):
|
||||
# We're at the false EOF. But push the last line back first.
|
||||
self._lines.appendleft(line)
|
||||
return ''
|
||||
return line
|
||||
|
||||
def unreadline(self, line):
|
||||
# Let the consumer push a line back into the buffer.
|
||||
assert line is not NeedMoreData
|
||||
self._lines.appendleft(line)
|
||||
|
||||
def push(self, data):
|
||||
"""Push some new data into this object."""
|
||||
self._partial.write(data)
|
||||
if '\n' not in data and '\r' not in data:
|
||||
# No new complete lines, wait for more.
|
||||
return
|
||||
|
||||
# Crack into lines, preserving the linesep characters.
|
||||
self._partial.seek(0)
|
||||
parts = self._partial.readlines()
|
||||
self._partial.seek(0)
|
||||
self._partial.truncate()
|
||||
|
||||
# If the last element of the list does not end in a newline, then treat
|
||||
# it as a partial line. We only check for '\n' here because a line
|
||||
# ending with '\r' might be a line that was split in the middle of a
|
||||
# '\r\n' sequence (see bugs 1555570 and 1721862).
|
||||
if not parts[-1].endswith('\n'):
|
||||
self._partial.write(parts.pop())
|
||||
self.pushlines(parts)
|
||||
|
||||
def pushlines(self, lines):
|
||||
self._lines.extend(lines)
|
||||
|
||||
def __iter__(self):
|
||||
return self
|
||||
|
||||
def __next__(self):
|
||||
line = self.readline()
|
||||
if line == '':
|
||||
raise StopIteration
|
||||
return line
|
||||
|
||||
|
||||
|
||||
class FeedParser:
|
||||
"""A feed-style parser of email."""
|
||||
|
||||
def __init__(self, _factory=None, *, policy=compat32):
|
||||
"""_factory is called with no arguments to create a new message obj
|
||||
|
||||
The policy keyword specifies a policy object that controls a number of
|
||||
aspects of the parser's operation. The default policy maintains
|
||||
backward compatibility.
|
||||
|
||||
"""
|
||||
self.policy = policy
|
||||
self._old_style_factory = False
|
||||
if _factory is None:
|
||||
if policy.message_factory is None:
|
||||
from email.message import Message
|
||||
self._factory = Message
|
||||
else:
|
||||
self._factory = policy.message_factory
|
||||
else:
|
||||
self._factory = _factory
|
||||
try:
|
||||
_factory(policy=self.policy)
|
||||
except TypeError:
|
||||
# Assume this is an old-style factory
|
||||
self._old_style_factory = True
|
||||
self._input = BufferedSubFile()
|
||||
self._msgstack = []
|
||||
self._parse = self._parsegen().__next__
|
||||
self._cur = None
|
||||
self._last = None
|
||||
self._headersonly = False
|
||||
|
||||
# Non-public interface for supporting Parser's headersonly flag
|
||||
def _set_headersonly(self):
|
||||
self._headersonly = True
|
||||
|
||||
def feed(self, data):
|
||||
"""Push more data into the parser."""
|
||||
self._input.push(data)
|
||||
self._call_parse()
|
||||
|
||||
def _call_parse(self):
|
||||
try:
|
||||
self._parse()
|
||||
except StopIteration:
|
||||
pass
|
||||
|
||||
def close(self):
|
||||
"""Parse all remaining data and return the root message object."""
|
||||
self._input.close()
|
||||
self._call_parse()
|
||||
root = self._pop_message()
|
||||
assert not self._msgstack
|
||||
# Look for final set of defects
|
||||
if root.get_content_maintype() == 'multipart' \
|
||||
and not root.is_multipart():
|
||||
defect = errors.MultipartInvariantViolationDefect()
|
||||
self.policy.handle_defect(root, defect)
|
||||
return root
|
||||
|
||||
def _new_message(self):
|
||||
if self._old_style_factory:
|
||||
msg = self._factory()
|
||||
else:
|
||||
msg = self._factory(policy=self.policy)
|
||||
if self._cur and self._cur.get_content_type() == 'multipart/digest':
|
||||
msg.set_default_type('message/rfc822')
|
||||
if self._msgstack:
|
||||
self._msgstack[-1].attach(msg)
|
||||
self._msgstack.append(msg)
|
||||
self._cur = msg
|
||||
self._last = msg
|
||||
|
||||
def _pop_message(self):
|
||||
retval = self._msgstack.pop()
|
||||
if self._msgstack:
|
||||
self._cur = self._msgstack[-1]
|
||||
else:
|
||||
self._cur = None
|
||||
return retval
|
||||
|
||||
def _parsegen(self):
|
||||
# Create a new message and start by parsing headers.
|
||||
self._new_message()
|
||||
headers = []
|
||||
# Collect the headers, searching for a line that doesn't match the RFC
|
||||
# 2822 header or continuation pattern (including an empty line).
|
||||
for line in self._input:
|
||||
if line is NeedMoreData:
|
||||
yield NeedMoreData
|
||||
continue
|
||||
if not headerRE.match(line):
|
||||
# If we saw the RFC defined header/body separator
|
||||
# (i.e. newline), just throw it away. Otherwise the line is
|
||||
# part of the body so push it back.
|
||||
if not NLCRE.match(line):
|
||||
defect = errors.MissingHeaderBodySeparatorDefect()
|
||||
self.policy.handle_defect(self._cur, defect)
|
||||
self._input.unreadline(line)
|
||||
break
|
||||
headers.append(line)
|
||||
# Done with the headers, so parse them and figure out what we're
|
||||
# supposed to see in the body of the message.
|
||||
self._parse_headers(headers)
|
||||
# Headers-only parsing is a backwards compatibility hack, which was
|
||||
# necessary in the older parser, which could raise errors. All
|
||||
# remaining lines in the input are thrown into the message body.
|
||||
if self._headersonly:
|
||||
lines = []
|
||||
while True:
|
||||
line = self._input.readline()
|
||||
if line is NeedMoreData:
|
||||
yield NeedMoreData
|
||||
continue
|
||||
if line == '':
|
||||
break
|
||||
lines.append(line)
|
||||
self._cur.set_payload(EMPTYSTRING.join(lines))
|
||||
return
|
||||
if self._cur.get_content_type() == 'message/delivery-status':
|
||||
# message/delivery-status contains blocks of headers separated by
|
||||
# a blank line. We'll represent each header block as a separate
|
||||
# nested message object, but the processing is a bit different
|
||||
# than standard message/* types because there is no body for the
|
||||
# nested messages. A blank line separates the subparts.
|
||||
while True:
|
||||
self._input.push_eof_matcher(NLCRE.match)
|
||||
for retval in self._parsegen():
|
||||
if retval is NeedMoreData:
|
||||
yield NeedMoreData
|
||||
continue
|
||||
break
|
||||
msg = self._pop_message()
|
||||
# We need to pop the EOF matcher in order to tell if we're at
|
||||
# the end of the current file, not the end of the last block
|
||||
# of message headers.
|
||||
self._input.pop_eof_matcher()
|
||||
# The input stream must be sitting at the newline or at the
|
||||
# EOF. We want to see if we're at the end of this subpart, so
|
||||
# first consume the blank line, then test the next line to see
|
||||
# if we're at this subpart's EOF.
|
||||
while True:
|
||||
line = self._input.readline()
|
||||
if line is NeedMoreData:
|
||||
yield NeedMoreData
|
||||
continue
|
||||
break
|
||||
while True:
|
||||
line = self._input.readline()
|
||||
if line is NeedMoreData:
|
||||
yield NeedMoreData
|
||||
continue
|
||||
break
|
||||
if line == '':
|
||||
break
|
||||
# Not at EOF so this is a line we're going to need.
|
||||
self._input.unreadline(line)
|
||||
return
|
||||
if self._cur.get_content_maintype() == 'message':
|
||||
# The message claims to be a message/* type, then what follows is
|
||||
# another RFC 2822 message.
|
||||
for retval in self._parsegen():
|
||||
if retval is NeedMoreData:
|
||||
yield NeedMoreData
|
||||
continue
|
||||
break
|
||||
self._pop_message()
|
||||
return
|
||||
if self._cur.get_content_maintype() == 'multipart':
|
||||
boundary = self._cur.get_boundary()
|
||||
if boundary is None:
|
||||
# The message /claims/ to be a multipart but it has not
|
||||
# defined a boundary. That's a problem which we'll handle by
|
||||
# reading everything until the EOF and marking the message as
|
||||
# defective.
|
||||
defect = errors.NoBoundaryInMultipartDefect()
|
||||
self.policy.handle_defect(self._cur, defect)
|
||||
lines = []
|
||||
for line in self._input:
|
||||
if line is NeedMoreData:
|
||||
yield NeedMoreData
|
||||
continue
|
||||
lines.append(line)
|
||||
self._cur.set_payload(EMPTYSTRING.join(lines))
|
||||
return
|
||||
# Make sure a valid content type was specified per RFC 2045:6.4.
|
||||
if (self._cur.get('content-transfer-encoding', '8bit').lower()
|
||||
not in ('7bit', '8bit', 'binary')):
|
||||
defect = errors.InvalidMultipartContentTransferEncodingDefect()
|
||||
self.policy.handle_defect(self._cur, defect)
|
||||
# Create a line match predicate which matches the inter-part
|
||||
# boundary as well as the end-of-multipart boundary. Don't push
|
||||
# this onto the input stream until we've scanned past the
|
||||
# preamble.
|
||||
separator = '--' + boundary
|
||||
boundaryre = re.compile(
|
||||
'(?P<sep>' + re.escape(separator) +
|
||||
r')(?P<end>--)?(?P<ws>[ \t]*)(?P<linesep>\r\n|\r|\n)?$')
|
||||
capturing_preamble = True
|
||||
preamble = []
|
||||
linesep = False
|
||||
close_boundary_seen = False
|
||||
while True:
|
||||
line = self._input.readline()
|
||||
if line is NeedMoreData:
|
||||
yield NeedMoreData
|
||||
continue
|
||||
if line == '':
|
||||
break
|
||||
mo = boundaryre.match(line)
|
||||
if mo:
|
||||
# If we're looking at the end boundary, we're done with
|
||||
# this multipart. If there was a newline at the end of
|
||||
# the closing boundary, then we need to initialize the
|
||||
# epilogue with the empty string (see below).
|
||||
if mo.group('end'):
|
||||
close_boundary_seen = True
|
||||
linesep = mo.group('linesep')
|
||||
break
|
||||
# We saw an inter-part boundary. Were we in the preamble?
|
||||
if capturing_preamble:
|
||||
if preamble:
|
||||
# According to RFC 2046, the last newline belongs
|
||||
# to the boundary.
|
||||
lastline = preamble[-1]
|
||||
eolmo = NLCRE_eol.search(lastline)
|
||||
if eolmo:
|
||||
preamble[-1] = lastline[:-len(eolmo.group(0))]
|
||||
self._cur.preamble = EMPTYSTRING.join(preamble)
|
||||
capturing_preamble = False
|
||||
self._input.unreadline(line)
|
||||
continue
|
||||
# We saw a boundary separating two parts. Consume any
|
||||
# multiple boundary lines that may be following. Our
|
||||
# interpretation of RFC 2046 BNF grammar does not produce
|
||||
# body parts within such double boundaries.
|
||||
while True:
|
||||
line = self._input.readline()
|
||||
if line is NeedMoreData:
|
||||
yield NeedMoreData
|
||||
continue
|
||||
mo = boundaryre.match(line)
|
||||
if not mo:
|
||||
self._input.unreadline(line)
|
||||
break
|
||||
# Recurse to parse this subpart; the input stream points
|
||||
# at the subpart's first line.
|
||||
self._input.push_eof_matcher(boundaryre.match)
|
||||
for retval in self._parsegen():
|
||||
if retval is NeedMoreData:
|
||||
yield NeedMoreData
|
||||
continue
|
||||
break
|
||||
# Because of RFC 2046, the newline preceding the boundary
|
||||
# separator actually belongs to the boundary, not the
|
||||
# previous subpart's payload (or epilogue if the previous
|
||||
# part is a multipart).
|
||||
if self._last.get_content_maintype() == 'multipart':
|
||||
epilogue = self._last.epilogue
|
||||
if epilogue == '':
|
||||
self._last.epilogue = None
|
||||
elif epilogue is not None:
|
||||
mo = NLCRE_eol.search(epilogue)
|
||||
if mo:
|
||||
end = len(mo.group(0))
|
||||
self._last.epilogue = epilogue[:-end]
|
||||
else:
|
||||
payload = self._last._payload
|
||||
if isinstance(payload, str):
|
||||
mo = NLCRE_eol.search(payload)
|
||||
if mo:
|
||||
payload = payload[:-len(mo.group(0))]
|
||||
self._last._payload = payload
|
||||
self._input.pop_eof_matcher()
|
||||
self._pop_message()
|
||||
# Set the multipart up for newline cleansing, which will
|
||||
# happen if we're in a nested multipart.
|
||||
self._last = self._cur
|
||||
else:
|
||||
# I think we must be in the preamble
|
||||
assert capturing_preamble
|
||||
preamble.append(line)
|
||||
# We've seen either the EOF or the end boundary. If we're still
|
||||
# capturing the preamble, we never saw the start boundary. Note
|
||||
# that as a defect and store the captured text as the payload.
|
||||
if capturing_preamble:
|
||||
defect = errors.StartBoundaryNotFoundDefect()
|
||||
self.policy.handle_defect(self._cur, defect)
|
||||
self._cur.set_payload(EMPTYSTRING.join(preamble))
|
||||
epilogue = []
|
||||
for line in self._input:
|
||||
if line is NeedMoreData:
|
||||
yield NeedMoreData
|
||||
continue
|
||||
self._cur.epilogue = EMPTYSTRING.join(epilogue)
|
||||
return
|
||||
# If we're not processing the preamble, then we might have seen
|
||||
# EOF without seeing that end boundary...that is also a defect.
|
||||
if not close_boundary_seen:
|
||||
defect = errors.CloseBoundaryNotFoundDefect()
|
||||
self.policy.handle_defect(self._cur, defect)
|
||||
return
|
||||
# Everything from here to the EOF is epilogue. If the end boundary
|
||||
# ended in a newline, we'll need to make sure the epilogue isn't
|
||||
# None
|
||||
if linesep:
|
||||
epilogue = ['']
|
||||
else:
|
||||
epilogue = []
|
||||
for line in self._input:
|
||||
if line is NeedMoreData:
|
||||
yield NeedMoreData
|
||||
continue
|
||||
epilogue.append(line)
|
||||
# Any CRLF at the front of the epilogue is not technically part of
|
||||
# the epilogue. Also, watch out for an empty string epilogue,
|
||||
# which means a single newline.
|
||||
if epilogue:
|
||||
firstline = epilogue[0]
|
||||
bolmo = NLCRE_bol.match(firstline)
|
||||
if bolmo:
|
||||
epilogue[0] = firstline[len(bolmo.group(0)):]
|
||||
self._cur.epilogue = EMPTYSTRING.join(epilogue)
|
||||
return
|
||||
# Otherwise, it's some non-multipart type, so the entire rest of the
|
||||
# file contents becomes the payload.
|
||||
lines = []
|
||||
for line in self._input:
|
||||
if line is NeedMoreData:
|
||||
yield NeedMoreData
|
||||
continue
|
||||
lines.append(line)
|
||||
self._cur.set_payload(EMPTYSTRING.join(lines))
|
||||
|
||||
def _parse_headers(self, lines):
|
||||
# Passed a list of lines that make up the headers for the current msg
|
||||
lastheader = ''
|
||||
lastvalue = []
|
||||
for lineno, line in enumerate(lines):
|
||||
# Check for continuation
|
||||
if line[0] in ' \t':
|
||||
if not lastheader:
|
||||
# The first line of the headers was a continuation. This
|
||||
# is illegal, so let's note the defect, store the illegal
|
||||
# line, and ignore it for purposes of headers.
|
||||
defect = errors.FirstHeaderLineIsContinuationDefect(line)
|
||||
self.policy.handle_defect(self._cur, defect)
|
||||
continue
|
||||
lastvalue.append(line)
|
||||
continue
|
||||
if lastheader:
|
||||
self._cur.set_raw(*self.policy.header_source_parse(lastvalue))
|
||||
lastheader, lastvalue = '', []
|
||||
# Check for envelope header, i.e. unix-from
|
||||
if line.startswith('From '):
|
||||
if lineno == 0:
|
||||
# Strip off the trailing newline
|
||||
mo = NLCRE_eol.search(line)
|
||||
if mo:
|
||||
line = line[:-len(mo.group(0))]
|
||||
self._cur.set_unixfrom(line)
|
||||
continue
|
||||
elif lineno == len(lines) - 1:
|
||||
# Something looking like a unix-from at the end - it's
|
||||
# probably the first line of the body, so push back the
|
||||
# line and stop.
|
||||
self._input.unreadline(line)
|
||||
return
|
||||
else:
|
||||
# Weirdly placed unix-from line. Note this as a defect
|
||||
# and ignore it.
|
||||
defect = errors.MisplacedEnvelopeHeaderDefect(line)
|
||||
self._cur.defects.append(defect)
|
||||
continue
|
||||
# Split the line on the colon separating field name from value.
|
||||
# There will always be a colon, because if there wasn't the part of
|
||||
# the parser that calls us would have started parsing the body.
|
||||
i = line.find(':')
|
||||
|
||||
# If the colon is on the start of the line the header is clearly
|
||||
# malformed, but we might be able to salvage the rest of the
|
||||
# message. Track the error but keep going.
|
||||
if i == 0:
|
||||
defect = errors.InvalidHeaderDefect("Missing header name.")
|
||||
self._cur.defects.append(defect)
|
||||
continue
|
||||
|
||||
assert i>0, "_parse_headers fed line with no : and no leading WS"
|
||||
lastheader = line[:i]
|
||||
lastvalue = [line]
|
||||
# Done with all the lines, so handle the last header.
|
||||
if lastheader:
|
||||
self._cur.set_raw(*self.policy.header_source_parse(lastvalue))
|
||||
|
||||
|
||||
class BytesFeedParser(FeedParser):
|
||||
"""Like FeedParser, but feed accepts bytes."""
|
||||
|
||||
def feed(self, data):
|
||||
super().feed(data.decode('ascii', 'surrogateescape'))
|
508
third_party/python/Lib/email/generator.py
vendored
Normal file
508
third_party/python/Lib/email/generator.py
vendored
Normal file
|
@ -0,0 +1,508 @@
|
|||
# Copyright (C) 2001-2010 Python Software Foundation
|
||||
# Author: Barry Warsaw
|
||||
# Contact: email-sig@python.org
|
||||
|
||||
"""Classes to generate plain text from a message object tree."""
|
||||
|
||||
__all__ = ['Generator', 'DecodedGenerator', 'BytesGenerator']
|
||||
|
||||
import re
|
||||
import sys
|
||||
import time
|
||||
import random
|
||||
|
||||
from copy import deepcopy
|
||||
from io import StringIO, BytesIO
|
||||
from email.utils import _has_surrogates
|
||||
|
||||
UNDERSCORE = '_'
|
||||
NL = '\n' # XXX: no longer used by the code below.
|
||||
|
||||
NLCRE = re.compile(r'\r\n|\r|\n')
|
||||
fcre = re.compile(r'^From ', re.MULTILINE)
|
||||
|
||||
|
||||
|
||||
class Generator:
|
||||
"""Generates output from a Message object tree.
|
||||
|
||||
This basic generator writes the message to the given file object as plain
|
||||
text.
|
||||
"""
|
||||
#
|
||||
# Public interface
|
||||
#
|
||||
|
||||
def __init__(self, outfp, mangle_from_=None, maxheaderlen=None, *,
|
||||
policy=None):
|
||||
"""Create the generator for message flattening.
|
||||
|
||||
outfp is the output file-like object for writing the message to. It
|
||||
must have a write() method.
|
||||
|
||||
Optional mangle_from_ is a flag that, when True (the default if policy
|
||||
is not set), escapes From_ lines in the body of the message by putting
|
||||
a `>' in front of them.
|
||||
|
||||
Optional maxheaderlen specifies the longest length for a non-continued
|
||||
header. When a header line is longer (in characters, with tabs
|
||||
expanded to 8 spaces) than maxheaderlen, the header will split as
|
||||
defined in the Header class. Set maxheaderlen to zero to disable
|
||||
header wrapping. The default is 78, as recommended (but not required)
|
||||
by RFC 2822.
|
||||
|
||||
The policy keyword specifies a policy object that controls a number of
|
||||
aspects of the generator's operation. If no policy is specified,
|
||||
the policy associated with the Message object passed to the
|
||||
flatten method is used.
|
||||
|
||||
"""
|
||||
|
||||
if mangle_from_ is None:
|
||||
mangle_from_ = True if policy is None else policy.mangle_from_
|
||||
self._fp = outfp
|
||||
self._mangle_from_ = mangle_from_
|
||||
self.maxheaderlen = maxheaderlen
|
||||
self.policy = policy
|
||||
|
||||
def write(self, s):
|
||||
# Just delegate to the file object
|
||||
self._fp.write(s)
|
||||
|
||||
def flatten(self, msg, unixfrom=False, linesep=None):
|
||||
r"""Print the message object tree rooted at msg to the output file
|
||||
specified when the Generator instance was created.
|
||||
|
||||
unixfrom is a flag that forces the printing of a Unix From_ delimiter
|
||||
before the first object in the message tree. If the original message
|
||||
has no From_ delimiter, a `standard' one is crafted. By default, this
|
||||
is False to inhibit the printing of any From_ delimiter.
|
||||
|
||||
Note that for subobjects, no From_ line is printed.
|
||||
|
||||
linesep specifies the characters used to indicate a new line in
|
||||
the output. The default value is determined by the policy specified
|
||||
when the Generator instance was created or, if none was specified,
|
||||
from the policy associated with the msg.
|
||||
|
||||
"""
|
||||
# We use the _XXX constants for operating on data that comes directly
|
||||
# from the msg, and _encoded_XXX constants for operating on data that
|
||||
# has already been converted (to bytes in the BytesGenerator) and
|
||||
# inserted into a temporary buffer.
|
||||
policy = msg.policy if self.policy is None else self.policy
|
||||
if linesep is not None:
|
||||
policy = policy.clone(linesep=linesep)
|
||||
if self.maxheaderlen is not None:
|
||||
policy = policy.clone(max_line_length=self.maxheaderlen)
|
||||
self._NL = policy.linesep
|
||||
self._encoded_NL = self._encode(self._NL)
|
||||
self._EMPTY = ''
|
||||
self._encoded_EMPTY = self._encode(self._EMPTY)
|
||||
# Because we use clone (below) when we recursively process message
|
||||
# subparts, and because clone uses the computed policy (not None),
|
||||
# submessages will automatically get set to the computed policy when
|
||||
# they are processed by this code.
|
||||
old_gen_policy = self.policy
|
||||
old_msg_policy = msg.policy
|
||||
try:
|
||||
self.policy = policy
|
||||
msg.policy = policy
|
||||
if unixfrom:
|
||||
ufrom = msg.get_unixfrom()
|
||||
if not ufrom:
|
||||
ufrom = 'From nobody ' + time.ctime(time.time())
|
||||
self.write(ufrom + self._NL)
|
||||
self._write(msg)
|
||||
finally:
|
||||
self.policy = old_gen_policy
|
||||
msg.policy = old_msg_policy
|
||||
|
||||
def clone(self, fp):
|
||||
"""Clone this generator with the exact same options."""
|
||||
return self.__class__(fp,
|
||||
self._mangle_from_,
|
||||
None, # Use policy setting, which we've adjusted
|
||||
policy=self.policy)
|
||||
|
||||
#
|
||||
# Protected interface - undocumented ;/
|
||||
#
|
||||
|
||||
# Note that we use 'self.write' when what we are writing is coming from
|
||||
# the source, and self._fp.write when what we are writing is coming from a
|
||||
# buffer (because the Bytes subclass has already had a chance to transform
|
||||
# the data in its write method in that case). This is an entirely
|
||||
# pragmatic split determined by experiment; we could be more general by
|
||||
# always using write and having the Bytes subclass write method detect when
|
||||
# it has already transformed the input; but, since this whole thing is a
|
||||
# hack anyway this seems good enough.
|
||||
|
||||
def _new_buffer(self):
|
||||
# BytesGenerator overrides this to return BytesIO.
|
||||
return StringIO()
|
||||
|
||||
def _encode(self, s):
|
||||
# BytesGenerator overrides this to encode strings to bytes.
|
||||
return s
|
||||
|
||||
def _write_lines(self, lines):
|
||||
# We have to transform the line endings.
|
||||
if not lines:
|
||||
return
|
||||
lines = NLCRE.split(lines)
|
||||
for line in lines[:-1]:
|
||||
self.write(line)
|
||||
self.write(self._NL)
|
||||
if lines[-1]:
|
||||
self.write(lines[-1])
|
||||
# XXX logic tells me this else should be needed, but the tests fail
|
||||
# with it and pass without it. (NLCRE.split ends with a blank element
|
||||
# if and only if there was a trailing newline.)
|
||||
#else:
|
||||
# self.write(self._NL)
|
||||
|
||||
def _write(self, msg):
|
||||
# We can't write the headers yet because of the following scenario:
|
||||
# say a multipart message includes the boundary string somewhere in
|
||||
# its body. We'd have to calculate the new boundary /before/ we write
|
||||
# the headers so that we can write the correct Content-Type:
|
||||
# parameter.
|
||||
#
|
||||
# The way we do this, so as to make the _handle_*() methods simpler,
|
||||
# is to cache any subpart writes into a buffer. The we write the
|
||||
# headers and the buffer contents. That way, subpart handlers can
|
||||
# Do The Right Thing, and can still modify the Content-Type: header if
|
||||
# necessary.
|
||||
oldfp = self._fp
|
||||
try:
|
||||
self._munge_cte = None
|
||||
self._fp = sfp = self._new_buffer()
|
||||
self._dispatch(msg)
|
||||
finally:
|
||||
self._fp = oldfp
|
||||
munge_cte = self._munge_cte
|
||||
del self._munge_cte
|
||||
# If we munged the cte, copy the message again and re-fix the CTE.
|
||||
if munge_cte:
|
||||
msg = deepcopy(msg)
|
||||
msg.replace_header('content-transfer-encoding', munge_cte[0])
|
||||
msg.replace_header('content-type', munge_cte[1])
|
||||
# Write the headers. First we see if the message object wants to
|
||||
# handle that itself. If not, we'll do it generically.
|
||||
meth = getattr(msg, '_write_headers', None)
|
||||
if meth is None:
|
||||
self._write_headers(msg)
|
||||
else:
|
||||
meth(self)
|
||||
self._fp.write(sfp.getvalue())
|
||||
|
||||
def _dispatch(self, msg):
|
||||
# Get the Content-Type: for the message, then try to dispatch to
|
||||
# self._handle_<maintype>_<subtype>(). If there's no handler for the
|
||||
# full MIME type, then dispatch to self._handle_<maintype>(). If
|
||||
# that's missing too, then dispatch to self._writeBody().
|
||||
main = msg.get_content_maintype()
|
||||
sub = msg.get_content_subtype()
|
||||
specific = UNDERSCORE.join((main, sub)).replace('-', '_')
|
||||
meth = getattr(self, '_handle_' + specific, None)
|
||||
if meth is None:
|
||||
generic = main.replace('-', '_')
|
||||
meth = getattr(self, '_handle_' + generic, None)
|
||||
if meth is None:
|
||||
meth = self._writeBody
|
||||
meth(msg)
|
||||
|
||||
#
|
||||
# Default handlers
|
||||
#
|
||||
|
||||
def _write_headers(self, msg):
|
||||
for h, v in msg.raw_items():
|
||||
self.write(self.policy.fold(h, v))
|
||||
# A blank line always separates headers from body
|
||||
self.write(self._NL)
|
||||
|
||||
#
|
||||
# Handlers for writing types and subtypes
|
||||
#
|
||||
|
||||
def _handle_text(self, msg):
|
||||
payload = msg.get_payload()
|
||||
if payload is None:
|
||||
return
|
||||
if not isinstance(payload, str):
|
||||
raise TypeError('string payload expected: %s' % type(payload))
|
||||
if _has_surrogates(msg._payload):
|
||||
charset = msg.get_param('charset')
|
||||
if charset is not None:
|
||||
# XXX: This copy stuff is an ugly hack to avoid modifying the
|
||||
# existing message.
|
||||
msg = deepcopy(msg)
|
||||
del msg['content-transfer-encoding']
|
||||
msg.set_payload(payload, charset)
|
||||
payload = msg.get_payload()
|
||||
self._munge_cte = (msg['content-transfer-encoding'],
|
||||
msg['content-type'])
|
||||
if self._mangle_from_:
|
||||
payload = fcre.sub('>From ', payload)
|
||||
self._write_lines(payload)
|
||||
|
||||
# Default body handler
|
||||
_writeBody = _handle_text
|
||||
|
||||
def _handle_multipart(self, msg):
|
||||
# The trick here is to write out each part separately, merge them all
|
||||
# together, and then make sure that the boundary we've chosen isn't
|
||||
# present in the payload.
|
||||
msgtexts = []
|
||||
subparts = msg.get_payload()
|
||||
if subparts is None:
|
||||
subparts = []
|
||||
elif isinstance(subparts, str):
|
||||
# e.g. a non-strict parse of a message with no starting boundary.
|
||||
self.write(subparts)
|
||||
return
|
||||
elif not isinstance(subparts, list):
|
||||
# Scalar payload
|
||||
subparts = [subparts]
|
||||
for part in subparts:
|
||||
s = self._new_buffer()
|
||||
g = self.clone(s)
|
||||
g.flatten(part, unixfrom=False, linesep=self._NL)
|
||||
msgtexts.append(s.getvalue())
|
||||
# BAW: What about boundaries that are wrapped in double-quotes?
|
||||
boundary = msg.get_boundary()
|
||||
if not boundary:
|
||||
# Create a boundary that doesn't appear in any of the
|
||||
# message texts.
|
||||
alltext = self._encoded_NL.join(msgtexts)
|
||||
boundary = self._make_boundary(alltext)
|
||||
msg.set_boundary(boundary)
|
||||
# If there's a preamble, write it out, with a trailing CRLF
|
||||
if msg.preamble is not None:
|
||||
if self._mangle_from_:
|
||||
preamble = fcre.sub('>From ', msg.preamble)
|
||||
else:
|
||||
preamble = msg.preamble
|
||||
self._write_lines(preamble)
|
||||
self.write(self._NL)
|
||||
# dash-boundary transport-padding CRLF
|
||||
self.write('--' + boundary + self._NL)
|
||||
# body-part
|
||||
if msgtexts:
|
||||
self._fp.write(msgtexts.pop(0))
|
||||
# *encapsulation
|
||||
# --> delimiter transport-padding
|
||||
# --> CRLF body-part
|
||||
for body_part in msgtexts:
|
||||
# delimiter transport-padding CRLF
|
||||
self.write(self._NL + '--' + boundary + self._NL)
|
||||
# body-part
|
||||
self._fp.write(body_part)
|
||||
# close-delimiter transport-padding
|
||||
self.write(self._NL + '--' + boundary + '--' + self._NL)
|
||||
if msg.epilogue is not None:
|
||||
if self._mangle_from_:
|
||||
epilogue = fcre.sub('>From ', msg.epilogue)
|
||||
else:
|
||||
epilogue = msg.epilogue
|
||||
self._write_lines(epilogue)
|
||||
|
||||
def _handle_multipart_signed(self, msg):
|
||||
# The contents of signed parts has to stay unmodified in order to keep
|
||||
# the signature intact per RFC1847 2.1, so we disable header wrapping.
|
||||
# RDM: This isn't enough to completely preserve the part, but it helps.
|
||||
p = self.policy
|
||||
self.policy = p.clone(max_line_length=0)
|
||||
try:
|
||||
self._handle_multipart(msg)
|
||||
finally:
|
||||
self.policy = p
|
||||
|
||||
def _handle_message_delivery_status(self, msg):
|
||||
# We can't just write the headers directly to self's file object
|
||||
# because this will leave an extra newline between the last header
|
||||
# block and the boundary. Sigh.
|
||||
blocks = []
|
||||
for part in msg.get_payload():
|
||||
s = self._new_buffer()
|
||||
g = self.clone(s)
|
||||
g.flatten(part, unixfrom=False, linesep=self._NL)
|
||||
text = s.getvalue()
|
||||
lines = text.split(self._encoded_NL)
|
||||
# Strip off the unnecessary trailing empty line
|
||||
if lines and lines[-1] == self._encoded_EMPTY:
|
||||
blocks.append(self._encoded_NL.join(lines[:-1]))
|
||||
else:
|
||||
blocks.append(text)
|
||||
# Now join all the blocks with an empty line. This has the lovely
|
||||
# effect of separating each block with an empty line, but not adding
|
||||
# an extra one after the last one.
|
||||
self._fp.write(self._encoded_NL.join(blocks))
|
||||
|
||||
def _handle_message(self, msg):
|
||||
s = self._new_buffer()
|
||||
g = self.clone(s)
|
||||
# The payload of a message/rfc822 part should be a multipart sequence
|
||||
# of length 1. The zeroth element of the list should be the Message
|
||||
# object for the subpart. Extract that object, stringify it, and
|
||||
# write it out.
|
||||
# Except, it turns out, when it's a string instead, which happens when
|
||||
# and only when HeaderParser is used on a message of mime type
|
||||
# message/rfc822. Such messages are generated by, for example,
|
||||
# Groupwise when forwarding unadorned messages. (Issue 7970.) So
|
||||
# in that case we just emit the string body.
|
||||
payload = msg._payload
|
||||
if isinstance(payload, list):
|
||||
g.flatten(msg.get_payload(0), unixfrom=False, linesep=self._NL)
|
||||
payload = s.getvalue()
|
||||
else:
|
||||
payload = self._encode(payload)
|
||||
self._fp.write(payload)
|
||||
|
||||
# This used to be a module level function; we use a classmethod for this
|
||||
# and _compile_re so we can continue to provide the module level function
|
||||
# for backward compatibility by doing
|
||||
# _make_boundary = Generator._make_boundary
|
||||
# at the end of the module. It *is* internal, so we could drop that...
|
||||
@classmethod
|
||||
def _make_boundary(cls, text=None):
|
||||
# Craft a random boundary. If text is given, ensure that the chosen
|
||||
# boundary doesn't appear in the text.
|
||||
token = random.randrange(sys.maxsize)
|
||||
boundary = ('=' * 15) + (_fmt % token) + '=='
|
||||
if text is None:
|
||||
return boundary
|
||||
b = boundary
|
||||
counter = 0
|
||||
while True:
|
||||
cre = cls._compile_re('^--' + re.escape(b) + '(--)?$', re.MULTILINE)
|
||||
if not cre.search(text):
|
||||
break
|
||||
b = boundary + '.' + str(counter)
|
||||
counter += 1
|
||||
return b
|
||||
|
||||
@classmethod
|
||||
def _compile_re(cls, s, flags):
|
||||
return re.compile(s, flags)
|
||||
|
||||
|
||||
class BytesGenerator(Generator):
|
||||
"""Generates a bytes version of a Message object tree.
|
||||
|
||||
Functionally identical to the base Generator except that the output is
|
||||
bytes and not string. When surrogates were used in the input to encode
|
||||
bytes, these are decoded back to bytes for output. If the policy has
|
||||
cte_type set to 7bit, then the message is transformed such that the
|
||||
non-ASCII bytes are properly content transfer encoded, using the charset
|
||||
unknown-8bit.
|
||||
|
||||
The outfp object must accept bytes in its write method.
|
||||
"""
|
||||
|
||||
def write(self, s):
|
||||
self._fp.write(s.encode('ascii', 'surrogateescape'))
|
||||
|
||||
def _new_buffer(self):
|
||||
return BytesIO()
|
||||
|
||||
def _encode(self, s):
|
||||
return s.encode('ascii')
|
||||
|
||||
def _write_headers(self, msg):
|
||||
# This is almost the same as the string version, except for handling
|
||||
# strings with 8bit bytes.
|
||||
for h, v in msg.raw_items():
|
||||
self._fp.write(self.policy.fold_binary(h, v))
|
||||
# A blank line always separates headers from body
|
||||
self.write(self._NL)
|
||||
|
||||
def _handle_text(self, msg):
|
||||
# If the string has surrogates the original source was bytes, so
|
||||
# just write it back out.
|
||||
if msg._payload is None:
|
||||
return
|
||||
if _has_surrogates(msg._payload) and not self.policy.cte_type=='7bit':
|
||||
if self._mangle_from_:
|
||||
msg._payload = fcre.sub(">From ", msg._payload)
|
||||
self._write_lines(msg._payload)
|
||||
else:
|
||||
super(BytesGenerator,self)._handle_text(msg)
|
||||
|
||||
# Default body handler
|
||||
_writeBody = _handle_text
|
||||
|
||||
@classmethod
|
||||
def _compile_re(cls, s, flags):
|
||||
return re.compile(s.encode('ascii'), flags)
|
||||
|
||||
|
||||
|
||||
_FMT = '[Non-text (%(type)s) part of message omitted, filename %(filename)s]'
|
||||
|
||||
class DecodedGenerator(Generator):
|
||||
"""Generates a text representation of a message.
|
||||
|
||||
Like the Generator base class, except that non-text parts are substituted
|
||||
with a format string representing the part.
|
||||
"""
|
||||
def __init__(self, outfp, mangle_from_=None, maxheaderlen=None, fmt=None, *,
|
||||
policy=None):
|
||||
"""Like Generator.__init__() except that an additional optional
|
||||
argument is allowed.
|
||||
|
||||
Walks through all subparts of a message. If the subpart is of main
|
||||
type `text', then it prints the decoded payload of the subpart.
|
||||
|
||||
Otherwise, fmt is a format string that is used instead of the message
|
||||
payload. fmt is expanded with the following keywords (in
|
||||
%(keyword)s format):
|
||||
|
||||
type : Full MIME type of the non-text part
|
||||
maintype : Main MIME type of the non-text part
|
||||
subtype : Sub-MIME type of the non-text part
|
||||
filename : Filename of the non-text part
|
||||
description: Description associated with the non-text part
|
||||
encoding : Content transfer encoding of the non-text part
|
||||
|
||||
The default value for fmt is None, meaning
|
||||
|
||||
[Non-text (%(type)s) part of message omitted, filename %(filename)s]
|
||||
"""
|
||||
Generator.__init__(self, outfp, mangle_from_, maxheaderlen,
|
||||
policy=policy)
|
||||
if fmt is None:
|
||||
self._fmt = _FMT
|
||||
else:
|
||||
self._fmt = fmt
|
||||
|
||||
def _dispatch(self, msg):
|
||||
for part in msg.walk():
|
||||
maintype = part.get_content_maintype()
|
||||
if maintype == 'text':
|
||||
print(part.get_payload(decode=False), file=self)
|
||||
elif maintype == 'multipart':
|
||||
# Just skip this
|
||||
pass
|
||||
else:
|
||||
print(self._fmt % {
|
||||
'type' : part.get_content_type(),
|
||||
'maintype' : part.get_content_maintype(),
|
||||
'subtype' : part.get_content_subtype(),
|
||||
'filename' : part.get_filename('[no filename]'),
|
||||
'description': part.get('Content-Description',
|
||||
'[no description]'),
|
||||
'encoding' : part.get('Content-Transfer-Encoding',
|
||||
'[no encoding]'),
|
||||
}, file=self)
|
||||
|
||||
|
||||
|
||||
# Helper used by Generator._make_boundary
|
||||
_width = len(repr(sys.maxsize-1))
|
||||
_fmt = '%%0%dd' % _width
|
||||
|
||||
# Backward compatibility
|
||||
_make_boundary = Generator._make_boundary
|
578
third_party/python/Lib/email/header.py
vendored
Normal file
578
third_party/python/Lib/email/header.py
vendored
Normal file
|
@ -0,0 +1,578 @@
|
|||
# Copyright (C) 2002-2007 Python Software Foundation
|
||||
# Author: Ben Gertzfield, Barry Warsaw
|
||||
# Contact: email-sig@python.org
|
||||
|
||||
"""Header encoding and decoding functionality."""
|
||||
|
||||
__all__ = [
|
||||
'Header',
|
||||
'decode_header',
|
||||
'make_header',
|
||||
]
|
||||
|
||||
import re
|
||||
import binascii
|
||||
|
||||
import email.quoprimime
|
||||
import email.base64mime
|
||||
|
||||
from email.errors import HeaderParseError
|
||||
from email import charset as _charset
|
||||
Charset = _charset.Charset
|
||||
|
||||
NL = '\n'
|
||||
SPACE = ' '
|
||||
BSPACE = b' '
|
||||
SPACE8 = ' ' * 8
|
||||
EMPTYSTRING = ''
|
||||
MAXLINELEN = 78
|
||||
FWS = ' \t'
|
||||
|
||||
USASCII = Charset('us-ascii')
|
||||
UTF8 = Charset('utf-8')
|
||||
|
||||
# Match encoded-word strings in the form =?charset?q?Hello_World?=
|
||||
ecre = re.compile(r'''
|
||||
=\? # literal =?
|
||||
(?P<charset>[^?]*?) # non-greedy up to the next ? is the charset
|
||||
\? # literal ?
|
||||
(?P<encoding>[qb]) # either a "q" or a "b", case insensitive
|
||||
\? # literal ?
|
||||
(?P<encoded>.*?) # non-greedy up to the next ?= is the encoded string
|
||||
\?= # literal ?=
|
||||
''', re.VERBOSE | re.IGNORECASE | re.MULTILINE)
|
||||
|
||||
# Field name regexp, including trailing colon, but not separating whitespace,
|
||||
# according to RFC 2822. Character range is from tilde to exclamation mark.
|
||||
# For use with .match()
|
||||
fcre = re.compile(r'[\041-\176]+:$')
|
||||
|
||||
# Find a header embedded in a putative header value. Used to check for
|
||||
# header injection attack.
|
||||
_embedded_header = re.compile(r'\n[^ \t]+:')
|
||||
|
||||
|
||||
|
||||
# Helpers
|
||||
_max_append = email.quoprimime._max_append
|
||||
|
||||
|
||||
|
||||
def decode_header(header):
|
||||
"""Decode a message header value without converting charset.
|
||||
|
||||
Returns a list of (string, charset) pairs containing each of the decoded
|
||||
parts of the header. Charset is None for non-encoded parts of the header,
|
||||
otherwise a lower-case string containing the name of the character set
|
||||
specified in the encoded string.
|
||||
|
||||
header may be a string that may or may not contain RFC2047 encoded words,
|
||||
or it may be a Header object.
|
||||
|
||||
An email.errors.HeaderParseError may be raised when certain decoding error
|
||||
occurs (e.g. a base64 decoding exception).
|
||||
"""
|
||||
# If it is a Header object, we can just return the encoded chunks.
|
||||
if hasattr(header, '_chunks'):
|
||||
return [(_charset._encode(string, str(charset)), str(charset))
|
||||
for string, charset in header._chunks]
|
||||
# If no encoding, just return the header with no charset.
|
||||
if not ecre.search(header):
|
||||
return [(header, None)]
|
||||
# First step is to parse all the encoded parts into triplets of the form
|
||||
# (encoded_string, encoding, charset). For unencoded strings, the last
|
||||
# two parts will be None.
|
||||
words = []
|
||||
for line in header.splitlines():
|
||||
parts = ecre.split(line)
|
||||
first = True
|
||||
while parts:
|
||||
unencoded = parts.pop(0)
|
||||
if first:
|
||||
unencoded = unencoded.lstrip()
|
||||
first = False
|
||||
if unencoded:
|
||||
words.append((unencoded, None, None))
|
||||
if parts:
|
||||
charset = parts.pop(0).lower()
|
||||
encoding = parts.pop(0).lower()
|
||||
encoded = parts.pop(0)
|
||||
words.append((encoded, encoding, charset))
|
||||
# Now loop over words and remove words that consist of whitespace
|
||||
# between two encoded strings.
|
||||
droplist = []
|
||||
for n, w in enumerate(words):
|
||||
if n>1 and w[1] and words[n-2][1] and words[n-1][0].isspace():
|
||||
droplist.append(n-1)
|
||||
for d in reversed(droplist):
|
||||
del words[d]
|
||||
|
||||
# The next step is to decode each encoded word by applying the reverse
|
||||
# base64 or quopri transformation. decoded_words is now a list of the
|
||||
# form (decoded_word, charset).
|
||||
decoded_words = []
|
||||
for encoded_string, encoding, charset in words:
|
||||
if encoding is None:
|
||||
# This is an unencoded word.
|
||||
decoded_words.append((encoded_string, charset))
|
||||
elif encoding == 'q':
|
||||
word = email.quoprimime.header_decode(encoded_string)
|
||||
decoded_words.append((word, charset))
|
||||
elif encoding == 'b':
|
||||
paderr = len(encoded_string) % 4 # Postel's law: add missing padding
|
||||
if paderr:
|
||||
encoded_string += '==='[:4 - paderr]
|
||||
try:
|
||||
word = email.base64mime.decode(encoded_string)
|
||||
except binascii.Error:
|
||||
raise HeaderParseError('Base64 decoding error')
|
||||
else:
|
||||
decoded_words.append((word, charset))
|
||||
else:
|
||||
raise AssertionError('Unexpected encoding: ' + encoding)
|
||||
# Now convert all words to bytes and collapse consecutive runs of
|
||||
# similarly encoded words.
|
||||
collapsed = []
|
||||
last_word = last_charset = None
|
||||
for word, charset in decoded_words:
|
||||
if isinstance(word, str):
|
||||
word = bytes(word, 'raw-unicode-escape')
|
||||
if last_word is None:
|
||||
last_word = word
|
||||
last_charset = charset
|
||||
elif charset != last_charset:
|
||||
collapsed.append((last_word, last_charset))
|
||||
last_word = word
|
||||
last_charset = charset
|
||||
elif last_charset is None:
|
||||
last_word += BSPACE + word
|
||||
else:
|
||||
last_word += word
|
||||
collapsed.append((last_word, last_charset))
|
||||
return collapsed
|
||||
|
||||
|
||||
|
||||
def make_header(decoded_seq, maxlinelen=None, header_name=None,
|
||||
continuation_ws=' '):
|
||||
"""Create a Header from a sequence of pairs as returned by decode_header()
|
||||
|
||||
decode_header() takes a header value string and returns a sequence of
|
||||
pairs of the format (decoded_string, charset) where charset is the string
|
||||
name of the character set.
|
||||
|
||||
This function takes one of those sequence of pairs and returns a Header
|
||||
instance. Optional maxlinelen, header_name, and continuation_ws are as in
|
||||
the Header constructor.
|
||||
"""
|
||||
h = Header(maxlinelen=maxlinelen, header_name=header_name,
|
||||
continuation_ws=continuation_ws)
|
||||
for s, charset in decoded_seq:
|
||||
# None means us-ascii but we can simply pass it on to h.append()
|
||||
if charset is not None and not isinstance(charset, Charset):
|
||||
charset = Charset(charset)
|
||||
h.append(s, charset)
|
||||
return h
|
||||
|
||||
|
||||
|
||||
class Header:
|
||||
def __init__(self, s=None, charset=None,
|
||||
maxlinelen=None, header_name=None,
|
||||
continuation_ws=' ', errors='strict'):
|
||||
"""Create a MIME-compliant header that can contain many character sets.
|
||||
|
||||
Optional s is the initial header value. If None, the initial header
|
||||
value is not set. You can later append to the header with .append()
|
||||
method calls. s may be a byte string or a Unicode string, but see the
|
||||
.append() documentation for semantics.
|
||||
|
||||
Optional charset serves two purposes: it has the same meaning as the
|
||||
charset argument to the .append() method. It also sets the default
|
||||
character set for all subsequent .append() calls that omit the charset
|
||||
argument. If charset is not provided in the constructor, the us-ascii
|
||||
charset is used both as s's initial charset and as the default for
|
||||
subsequent .append() calls.
|
||||
|
||||
The maximum line length can be specified explicitly via maxlinelen. For
|
||||
splitting the first line to a shorter value (to account for the field
|
||||
header which isn't included in s, e.g. `Subject') pass in the name of
|
||||
the field in header_name. The default maxlinelen is 78 as recommended
|
||||
by RFC 2822.
|
||||
|
||||
continuation_ws must be RFC 2822 compliant folding whitespace (usually
|
||||
either a space or a hard tab) which will be prepended to continuation
|
||||
lines.
|
||||
|
||||
errors is passed through to the .append() call.
|
||||
"""
|
||||
if charset is None:
|
||||
charset = USASCII
|
||||
elif not isinstance(charset, Charset):
|
||||
charset = Charset(charset)
|
||||
self._charset = charset
|
||||
self._continuation_ws = continuation_ws
|
||||
self._chunks = []
|
||||
if s is not None:
|
||||
self.append(s, charset, errors)
|
||||
if maxlinelen is None:
|
||||
maxlinelen = MAXLINELEN
|
||||
self._maxlinelen = maxlinelen
|
||||
if header_name is None:
|
||||
self._headerlen = 0
|
||||
else:
|
||||
# Take the separating colon and space into account.
|
||||
self._headerlen = len(header_name) + 2
|
||||
|
||||
def __str__(self):
|
||||
"""Return the string value of the header."""
|
||||
self._normalize()
|
||||
uchunks = []
|
||||
lastcs = None
|
||||
lastspace = None
|
||||
for string, charset in self._chunks:
|
||||
# We must preserve spaces between encoded and non-encoded word
|
||||
# boundaries, which means for us we need to add a space when we go
|
||||
# from a charset to None/us-ascii, or from None/us-ascii to a
|
||||
# charset. Only do this for the second and subsequent chunks.
|
||||
# Don't add a space if the None/us-ascii string already has
|
||||
# a space (trailing or leading depending on transition)
|
||||
nextcs = charset
|
||||
if nextcs == _charset.UNKNOWN8BIT:
|
||||
original_bytes = string.encode('ascii', 'surrogateescape')
|
||||
string = original_bytes.decode('ascii', 'replace')
|
||||
if uchunks:
|
||||
hasspace = string and self._nonctext(string[0])
|
||||
if lastcs not in (None, 'us-ascii'):
|
||||
if nextcs in (None, 'us-ascii') and not hasspace:
|
||||
uchunks.append(SPACE)
|
||||
nextcs = None
|
||||
elif nextcs not in (None, 'us-ascii') and not lastspace:
|
||||
uchunks.append(SPACE)
|
||||
lastspace = string and self._nonctext(string[-1])
|
||||
lastcs = nextcs
|
||||
uchunks.append(string)
|
||||
return EMPTYSTRING.join(uchunks)
|
||||
|
||||
# Rich comparison operators for equality only. BAW: does it make sense to
|
||||
# have or explicitly disable <, <=, >, >= operators?
|
||||
def __eq__(self, other):
|
||||
# other may be a Header or a string. Both are fine so coerce
|
||||
# ourselves to a unicode (of the unencoded header value), swap the
|
||||
# args and do another comparison.
|
||||
return other == str(self)
|
||||
|
||||
def append(self, s, charset=None, errors='strict'):
|
||||
"""Append a string to the MIME header.
|
||||
|
||||
Optional charset, if given, should be a Charset instance or the name
|
||||
of a character set (which will be converted to a Charset instance). A
|
||||
value of None (the default) means that the charset given in the
|
||||
constructor is used.
|
||||
|
||||
s may be a byte string or a Unicode string. If it is a byte string
|
||||
(i.e. isinstance(s, str) is false), then charset is the encoding of
|
||||
that byte string, and a UnicodeError will be raised if the string
|
||||
cannot be decoded with that charset. If s is a Unicode string, then
|
||||
charset is a hint specifying the character set of the characters in
|
||||
the string. In either case, when producing an RFC 2822 compliant
|
||||
header using RFC 2047 rules, the string will be encoded using the
|
||||
output codec of the charset. If the string cannot be encoded to the
|
||||
output codec, a UnicodeError will be raised.
|
||||
|
||||
Optional `errors' is passed as the errors argument to the decode
|
||||
call if s is a byte string.
|
||||
"""
|
||||
if charset is None:
|
||||
charset = self._charset
|
||||
elif not isinstance(charset, Charset):
|
||||
charset = Charset(charset)
|
||||
if not isinstance(s, str):
|
||||
input_charset = charset.input_codec or 'us-ascii'
|
||||
if input_charset == _charset.UNKNOWN8BIT:
|
||||
s = s.decode('us-ascii', 'surrogateescape')
|
||||
else:
|
||||
s = s.decode(input_charset, errors)
|
||||
# Ensure that the bytes we're storing can be decoded to the output
|
||||
# character set, otherwise an early error is raised.
|
||||
output_charset = charset.output_codec or 'us-ascii'
|
||||
if output_charset != _charset.UNKNOWN8BIT:
|
||||
try:
|
||||
s.encode(output_charset, errors)
|
||||
except UnicodeEncodeError:
|
||||
if output_charset!='us-ascii':
|
||||
raise
|
||||
charset = UTF8
|
||||
self._chunks.append((s, charset))
|
||||
|
||||
def _nonctext(self, s):
|
||||
"""True if string s is not a ctext character of RFC822.
|
||||
"""
|
||||
return s.isspace() or s in ('(', ')', '\\')
|
||||
|
||||
def encode(self, splitchars=';, \t', maxlinelen=None, linesep='\n'):
|
||||
r"""Encode a message header into an RFC-compliant format.
|
||||
|
||||
There are many issues involved in converting a given string for use in
|
||||
an email header. Only certain character sets are readable in most
|
||||
email clients, and as header strings can only contain a subset of
|
||||
7-bit ASCII, care must be taken to properly convert and encode (with
|
||||
Base64 or quoted-printable) header strings. In addition, there is a
|
||||
75-character length limit on any given encoded header field, so
|
||||
line-wrapping must be performed, even with double-byte character sets.
|
||||
|
||||
Optional maxlinelen specifies the maximum length of each generated
|
||||
line, exclusive of the linesep string. Individual lines may be longer
|
||||
than maxlinelen if a folding point cannot be found. The first line
|
||||
will be shorter by the length of the header name plus ": " if a header
|
||||
name was specified at Header construction time. The default value for
|
||||
maxlinelen is determined at header construction time.
|
||||
|
||||
Optional splitchars is a string containing characters which should be
|
||||
given extra weight by the splitting algorithm during normal header
|
||||
wrapping. This is in very rough support of RFC 2822's `higher level
|
||||
syntactic breaks': split points preceded by a splitchar are preferred
|
||||
during line splitting, with the characters preferred in the order in
|
||||
which they appear in the string. Space and tab may be included in the
|
||||
string to indicate whether preference should be given to one over the
|
||||
other as a split point when other split chars do not appear in the line
|
||||
being split. Splitchars does not affect RFC 2047 encoded lines.
|
||||
|
||||
Optional linesep is a string to be used to separate the lines of
|
||||
the value. The default value is the most useful for typical
|
||||
Python applications, but it can be set to \r\n to produce RFC-compliant
|
||||
line separators when needed.
|
||||
"""
|
||||
self._normalize()
|
||||
if maxlinelen is None:
|
||||
maxlinelen = self._maxlinelen
|
||||
# A maxlinelen of 0 means don't wrap. For all practical purposes,
|
||||
# choosing a huge number here accomplishes that and makes the
|
||||
# _ValueFormatter algorithm much simpler.
|
||||
if maxlinelen == 0:
|
||||
maxlinelen = 1000000
|
||||
formatter = _ValueFormatter(self._headerlen, maxlinelen,
|
||||
self._continuation_ws, splitchars)
|
||||
lastcs = None
|
||||
hasspace = lastspace = None
|
||||
for string, charset in self._chunks:
|
||||
if hasspace is not None:
|
||||
hasspace = string and self._nonctext(string[0])
|
||||
if lastcs not in (None, 'us-ascii'):
|
||||
if not hasspace or charset not in (None, 'us-ascii'):
|
||||
formatter.add_transition()
|
||||
elif charset not in (None, 'us-ascii') and not lastspace:
|
||||
formatter.add_transition()
|
||||
lastspace = string and self._nonctext(string[-1])
|
||||
lastcs = charset
|
||||
hasspace = False
|
||||
lines = string.splitlines()
|
||||
if lines:
|
||||
formatter.feed('', lines[0], charset)
|
||||
else:
|
||||
formatter.feed('', '', charset)
|
||||
for line in lines[1:]:
|
||||
formatter.newline()
|
||||
if charset.header_encoding is not None:
|
||||
formatter.feed(self._continuation_ws, ' ' + line.lstrip(),
|
||||
charset)
|
||||
else:
|
||||
sline = line.lstrip()
|
||||
fws = line[:len(line)-len(sline)]
|
||||
formatter.feed(fws, sline, charset)
|
||||
if len(lines) > 1:
|
||||
formatter.newline()
|
||||
if self._chunks:
|
||||
formatter.add_transition()
|
||||
value = formatter._str(linesep)
|
||||
if _embedded_header.search(value):
|
||||
raise HeaderParseError("header value appears to contain "
|
||||
"an embedded header: {!r}".format(value))
|
||||
return value
|
||||
|
||||
def _normalize(self):
|
||||
# Step 1: Normalize the chunks so that all runs of identical charsets
|
||||
# get collapsed into a single unicode string.
|
||||
chunks = []
|
||||
last_charset = None
|
||||
last_chunk = []
|
||||
for string, charset in self._chunks:
|
||||
if charset == last_charset:
|
||||
last_chunk.append(string)
|
||||
else:
|
||||
if last_charset is not None:
|
||||
chunks.append((SPACE.join(last_chunk), last_charset))
|
||||
last_chunk = [string]
|
||||
last_charset = charset
|
||||
if last_chunk:
|
||||
chunks.append((SPACE.join(last_chunk), last_charset))
|
||||
self._chunks = chunks
|
||||
|
||||
|
||||
|
||||
class _ValueFormatter:
|
||||
def __init__(self, headerlen, maxlen, continuation_ws, splitchars):
|
||||
self._maxlen = maxlen
|
||||
self._continuation_ws = continuation_ws
|
||||
self._continuation_ws_len = len(continuation_ws)
|
||||
self._splitchars = splitchars
|
||||
self._lines = []
|
||||
self._current_line = _Accumulator(headerlen)
|
||||
|
||||
def _str(self, linesep):
|
||||
self.newline()
|
||||
return linesep.join(self._lines)
|
||||
|
||||
def __str__(self):
|
||||
return self._str(NL)
|
||||
|
||||
def newline(self):
|
||||
end_of_line = self._current_line.pop()
|
||||
if end_of_line != (' ', ''):
|
||||
self._current_line.push(*end_of_line)
|
||||
if len(self._current_line) > 0:
|
||||
if self._current_line.is_onlyws():
|
||||
self._lines[-1] += str(self._current_line)
|
||||
else:
|
||||
self._lines.append(str(self._current_line))
|
||||
self._current_line.reset()
|
||||
|
||||
def add_transition(self):
|
||||
self._current_line.push(' ', '')
|
||||
|
||||
def feed(self, fws, string, charset):
|
||||
# If the charset has no header encoding (i.e. it is an ASCII encoding)
|
||||
# then we must split the header at the "highest level syntactic break"
|
||||
# possible. Note that we don't have a lot of smarts about field
|
||||
# syntax; we just try to break on semi-colons, then commas, then
|
||||
# whitespace. Eventually, this should be pluggable.
|
||||
if charset.header_encoding is None:
|
||||
self._ascii_split(fws, string, self._splitchars)
|
||||
return
|
||||
# Otherwise, we're doing either a Base64 or a quoted-printable
|
||||
# encoding which means we don't need to split the line on syntactic
|
||||
# breaks. We can basically just find enough characters to fit on the
|
||||
# current line, minus the RFC 2047 chrome. What makes this trickier
|
||||
# though is that we have to split at octet boundaries, not character
|
||||
# boundaries but it's only safe to split at character boundaries so at
|
||||
# best we can only get close.
|
||||
encoded_lines = charset.header_encode_lines(string, self._maxlengths())
|
||||
# The first element extends the current line, but if it's None then
|
||||
# nothing more fit on the current line so start a new line.
|
||||
try:
|
||||
first_line = encoded_lines.pop(0)
|
||||
except IndexError:
|
||||
# There are no encoded lines, so we're done.
|
||||
return
|
||||
if first_line is not None:
|
||||
self._append_chunk(fws, first_line)
|
||||
try:
|
||||
last_line = encoded_lines.pop()
|
||||
except IndexError:
|
||||
# There was only one line.
|
||||
return
|
||||
self.newline()
|
||||
self._current_line.push(self._continuation_ws, last_line)
|
||||
# Everything else are full lines in themselves.
|
||||
for line in encoded_lines:
|
||||
self._lines.append(self._continuation_ws + line)
|
||||
|
||||
def _maxlengths(self):
|
||||
# The first line's length.
|
||||
yield self._maxlen - len(self._current_line)
|
||||
while True:
|
||||
yield self._maxlen - self._continuation_ws_len
|
||||
|
||||
def _ascii_split(self, fws, string, splitchars):
|
||||
# The RFC 2822 header folding algorithm is simple in principle but
|
||||
# complex in practice. Lines may be folded any place where "folding
|
||||
# white space" appears by inserting a linesep character in front of the
|
||||
# FWS. The complication is that not all spaces or tabs qualify as FWS,
|
||||
# and we are also supposed to prefer to break at "higher level
|
||||
# syntactic breaks". We can't do either of these without intimate
|
||||
# knowledge of the structure of structured headers, which we don't have
|
||||
# here. So the best we can do here is prefer to break at the specified
|
||||
# splitchars, and hope that we don't choose any spaces or tabs that
|
||||
# aren't legal FWS. (This is at least better than the old algorithm,
|
||||
# where we would sometimes *introduce* FWS after a splitchar, or the
|
||||
# algorithm before that, where we would turn all white space runs into
|
||||
# single spaces or tabs.)
|
||||
parts = re.split("(["+FWS+"]+)", fws+string)
|
||||
if parts[0]:
|
||||
parts[:0] = ['']
|
||||
else:
|
||||
parts.pop(0)
|
||||
for fws, part in zip(*[iter(parts)]*2):
|
||||
self._append_chunk(fws, part)
|
||||
|
||||
def _append_chunk(self, fws, string):
|
||||
self._current_line.push(fws, string)
|
||||
if len(self._current_line) > self._maxlen:
|
||||
# Find the best split point, working backward from the end.
|
||||
# There might be none, on a long first line.
|
||||
for ch in self._splitchars:
|
||||
for i in range(self._current_line.part_count()-1, 0, -1):
|
||||
if ch.isspace():
|
||||
fws = self._current_line[i][0]
|
||||
if fws and fws[0]==ch:
|
||||
break
|
||||
prevpart = self._current_line[i-1][1]
|
||||
if prevpart and prevpart[-1]==ch:
|
||||
break
|
||||
else:
|
||||
continue
|
||||
break
|
||||
else:
|
||||
fws, part = self._current_line.pop()
|
||||
if self._current_line._initial_size > 0:
|
||||
# There will be a header, so leave it on a line by itself.
|
||||
self.newline()
|
||||
if not fws:
|
||||
# We don't use continuation_ws here because the whitespace
|
||||
# after a header should always be a space.
|
||||
fws = ' '
|
||||
self._current_line.push(fws, part)
|
||||
return
|
||||
remainder = self._current_line.pop_from(i)
|
||||
self._lines.append(str(self._current_line))
|
||||
self._current_line.reset(remainder)
|
||||
|
||||
|
||||
class _Accumulator(list):
|
||||
|
||||
def __init__(self, initial_size=0):
|
||||
self._initial_size = initial_size
|
||||
super().__init__()
|
||||
|
||||
def push(self, fws, string):
|
||||
self.append((fws, string))
|
||||
|
||||
def pop_from(self, i=0):
|
||||
popped = self[i:]
|
||||
self[i:] = []
|
||||
return popped
|
||||
|
||||
def pop(self):
|
||||
if self.part_count()==0:
|
||||
return ('', '')
|
||||
return super().pop()
|
||||
|
||||
def __len__(self):
|
||||
return sum((len(fws)+len(part) for fws, part in self),
|
||||
self._initial_size)
|
||||
|
||||
def __str__(self):
|
||||
return EMPTYSTRING.join((EMPTYSTRING.join((fws, part))
|
||||
for fws, part in self))
|
||||
|
||||
def reset(self, startval=None):
|
||||
if startval is None:
|
||||
startval = []
|
||||
self[:] = startval
|
||||
self._initial_size = 0
|
||||
|
||||
def is_onlyws(self):
|
||||
return self._initial_size==0 and (not self or str(self).isspace())
|
||||
|
||||
def part_count(self):
|
||||
return super().__len__()
|
594
third_party/python/Lib/email/headerregistry.py
vendored
Normal file
594
third_party/python/Lib/email/headerregistry.py
vendored
Normal file
|
@ -0,0 +1,594 @@
|
|||
"""Representing and manipulating email headers via custom objects.
|
||||
|
||||
This module provides an implementation of the HeaderRegistry API.
|
||||
The implementation is designed to flexibly follow RFC5322 rules.
|
||||
|
||||
Eventually HeaderRegistry will be a public API, but it isn't yet,
|
||||
and will probably change some before that happens.
|
||||
|
||||
"""
|
||||
from types import MappingProxyType
|
||||
|
||||
from email import utils
|
||||
from email import errors
|
||||
from email import _header_value_parser as parser
|
||||
|
||||
class Address:
|
||||
|
||||
def __init__(self, display_name='', username='', domain='', addr_spec=None):
|
||||
"""Create an object representing a full email address.
|
||||
|
||||
An address can have a 'display_name', a 'username', and a 'domain'. In
|
||||
addition to specifying the username and domain separately, they may be
|
||||
specified together by using the addr_spec keyword *instead of* the
|
||||
username and domain keywords. If an addr_spec string is specified it
|
||||
must be properly quoted according to RFC 5322 rules; an error will be
|
||||
raised if it is not.
|
||||
|
||||
An Address object has display_name, username, domain, and addr_spec
|
||||
attributes, all of which are read-only. The addr_spec and the string
|
||||
value of the object are both quoted according to RFC5322 rules, but
|
||||
without any Content Transfer Encoding.
|
||||
|
||||
"""
|
||||
|
||||
inputs = ''.join(filter(None, (display_name, username, domain, addr_spec)))
|
||||
if '\r' in inputs or '\n' in inputs:
|
||||
raise ValueError("invalid arguments; address parts cannot contain CR or LF")
|
||||
|
||||
# This clause with its potential 'raise' may only happen when an
|
||||
# application program creates an Address object using an addr_spec
|
||||
# keyword. The email library code itself must always supply username
|
||||
# and domain.
|
||||
if addr_spec is not None:
|
||||
if username or domain:
|
||||
raise TypeError("addrspec specified when username and/or "
|
||||
"domain also specified")
|
||||
a_s, rest = parser.get_addr_spec(addr_spec)
|
||||
if rest:
|
||||
raise ValueError("Invalid addr_spec; only '{}' "
|
||||
"could be parsed from '{}'".format(
|
||||
a_s, addr_spec))
|
||||
if a_s.all_defects:
|
||||
raise a_s.all_defects[0]
|
||||
username = a_s.local_part
|
||||
domain = a_s.domain
|
||||
self._display_name = display_name
|
||||
self._username = username
|
||||
self._domain = domain
|
||||
|
||||
@property
|
||||
def display_name(self):
|
||||
return self._display_name
|
||||
|
||||
@property
|
||||
def username(self):
|
||||
return self._username
|
||||
|
||||
@property
|
||||
def domain(self):
|
||||
return self._domain
|
||||
|
||||
@property
|
||||
def addr_spec(self):
|
||||
"""The addr_spec (username@domain) portion of the address, quoted
|
||||
according to RFC 5322 rules, but with no Content Transfer Encoding.
|
||||
"""
|
||||
nameset = set(self.username)
|
||||
if len(nameset) > len(nameset-parser.DOT_ATOM_ENDS):
|
||||
lp = parser.quote_string(self.username)
|
||||
else:
|
||||
lp = self.username
|
||||
if self.domain:
|
||||
return lp + '@' + self.domain
|
||||
if not lp:
|
||||
return '<>'
|
||||
return lp
|
||||
|
||||
def __repr__(self):
|
||||
return "{}(display_name={!r}, username={!r}, domain={!r})".format(
|
||||
self.__class__.__name__,
|
||||
self.display_name, self.username, self.domain)
|
||||
|
||||
def __str__(self):
|
||||
nameset = set(self.display_name)
|
||||
if len(nameset) > len(nameset-parser.SPECIALS):
|
||||
disp = parser.quote_string(self.display_name)
|
||||
else:
|
||||
disp = self.display_name
|
||||
if disp:
|
||||
addr_spec = '' if self.addr_spec=='<>' else self.addr_spec
|
||||
return "{} <{}>".format(disp, addr_spec)
|
||||
return self.addr_spec
|
||||
|
||||
def __eq__(self, other):
|
||||
if type(other) != type(self):
|
||||
return False
|
||||
return (self.display_name == other.display_name and
|
||||
self.username == other.username and
|
||||
self.domain == other.domain)
|
||||
|
||||
|
||||
class Group:
|
||||
|
||||
def __init__(self, display_name=None, addresses=None):
|
||||
"""Create an object representing an address group.
|
||||
|
||||
An address group consists of a display_name followed by colon and a
|
||||
list of addresses (see Address) terminated by a semi-colon. The Group
|
||||
is created by specifying a display_name and a possibly empty list of
|
||||
Address objects. A Group can also be used to represent a single
|
||||
address that is not in a group, which is convenient when manipulating
|
||||
lists that are a combination of Groups and individual Addresses. In
|
||||
this case the display_name should be set to None. In particular, the
|
||||
string representation of a Group whose display_name is None is the same
|
||||
as the Address object, if there is one and only one Address object in
|
||||
the addresses list.
|
||||
|
||||
"""
|
||||
self._display_name = display_name
|
||||
self._addresses = tuple(addresses) if addresses else tuple()
|
||||
|
||||
@property
|
||||
def display_name(self):
|
||||
return self._display_name
|
||||
|
||||
@property
|
||||
def addresses(self):
|
||||
return self._addresses
|
||||
|
||||
def __repr__(self):
|
||||
return "{}(display_name={!r}, addresses={!r}".format(
|
||||
self.__class__.__name__,
|
||||
self.display_name, self.addresses)
|
||||
|
||||
def __str__(self):
|
||||
if self.display_name is None and len(self.addresses)==1:
|
||||
return str(self.addresses[0])
|
||||
disp = self.display_name
|
||||
if disp is not None:
|
||||
nameset = set(disp)
|
||||
if len(nameset) > len(nameset-parser.SPECIALS):
|
||||
disp = parser.quote_string(disp)
|
||||
adrstr = ", ".join(str(x) for x in self.addresses)
|
||||
adrstr = ' ' + adrstr if adrstr else adrstr
|
||||
return "{}:{};".format(disp, adrstr)
|
||||
|
||||
def __eq__(self, other):
|
||||
if type(other) != type(self):
|
||||
return False
|
||||
return (self.display_name == other.display_name and
|
||||
self.addresses == other.addresses)
|
||||
|
||||
|
||||
# Header Classes #
|
||||
|
||||
class BaseHeader(str):
|
||||
|
||||
"""Base class for message headers.
|
||||
|
||||
Implements generic behavior and provides tools for subclasses.
|
||||
|
||||
A subclass must define a classmethod named 'parse' that takes an unfolded
|
||||
value string and a dictionary as its arguments. The dictionary will
|
||||
contain one key, 'defects', initialized to an empty list. After the call
|
||||
the dictionary must contain two additional keys: parse_tree, set to the
|
||||
parse tree obtained from parsing the header, and 'decoded', set to the
|
||||
string value of the idealized representation of the data from the value.
|
||||
(That is, encoded words are decoded, and values that have canonical
|
||||
representations are so represented.)
|
||||
|
||||
The defects key is intended to collect parsing defects, which the message
|
||||
parser will subsequently dispose of as appropriate. The parser should not,
|
||||
insofar as practical, raise any errors. Defects should be added to the
|
||||
list instead. The standard header parsers register defects for RFC
|
||||
compliance issues, for obsolete RFC syntax, and for unrecoverable parsing
|
||||
errors.
|
||||
|
||||
The parse method may add additional keys to the dictionary. In this case
|
||||
the subclass must define an 'init' method, which will be passed the
|
||||
dictionary as its keyword arguments. The method should use (usually by
|
||||
setting them as the value of similarly named attributes) and remove all the
|
||||
extra keys added by its parse method, and then use super to call its parent
|
||||
class with the remaining arguments and keywords.
|
||||
|
||||
The subclass should also make sure that a 'max_count' attribute is defined
|
||||
that is either None or 1. XXX: need to better define this API.
|
||||
|
||||
"""
|
||||
|
||||
def __new__(cls, name, value):
|
||||
kwds = {'defects': []}
|
||||
cls.parse(value, kwds)
|
||||
if utils._has_surrogates(kwds['decoded']):
|
||||
kwds['decoded'] = utils._sanitize(kwds['decoded'])
|
||||
self = str.__new__(cls, kwds['decoded'])
|
||||
del kwds['decoded']
|
||||
self.init(name, **kwds)
|
||||
return self
|
||||
|
||||
def init(self, name, *, parse_tree, defects):
|
||||
self._name = name
|
||||
self._parse_tree = parse_tree
|
||||
self._defects = defects
|
||||
|
||||
@property
|
||||
def name(self):
|
||||
return self._name
|
||||
|
||||
@property
|
||||
def defects(self):
|
||||
return tuple(self._defects)
|
||||
|
||||
def __reduce__(self):
|
||||
return (
|
||||
_reconstruct_header,
|
||||
(
|
||||
self.__class__.__name__,
|
||||
self.__class__.__bases__,
|
||||
str(self),
|
||||
),
|
||||
self.__dict__)
|
||||
|
||||
@classmethod
|
||||
def _reconstruct(cls, value):
|
||||
return str.__new__(cls, value)
|
||||
|
||||
def fold(self, *, policy):
|
||||
"""Fold header according to policy.
|
||||
|
||||
The parsed representation of the header is folded according to
|
||||
RFC5322 rules, as modified by the policy. If the parse tree
|
||||
contains surrogateescaped bytes, the bytes are CTE encoded using
|
||||
the charset 'unknown-8bit".
|
||||
|
||||
Any non-ASCII characters in the parse tree are CTE encoded using
|
||||
charset utf-8. XXX: make this a policy setting.
|
||||
|
||||
The returned value is an ASCII-only string possibly containing linesep
|
||||
characters, and ending with a linesep character. The string includes
|
||||
the header name and the ': ' separator.
|
||||
|
||||
"""
|
||||
# At some point we need to put fws here iif it was in the source.
|
||||
header = parser.Header([
|
||||
parser.HeaderLabel([
|
||||
parser.ValueTerminal(self.name, 'header-name'),
|
||||
parser.ValueTerminal(':', 'header-sep')]),
|
||||
])
|
||||
if self._parse_tree:
|
||||
header.append(
|
||||
parser.CFWSList([parser.WhiteSpaceTerminal(' ', 'fws')]))
|
||||
header.append(self._parse_tree)
|
||||
return header.fold(policy=policy)
|
||||
|
||||
|
||||
def _reconstruct_header(cls_name, bases, value):
|
||||
return type(cls_name, bases, {})._reconstruct(value)
|
||||
|
||||
|
||||
class UnstructuredHeader:
|
||||
|
||||
max_count = None
|
||||
value_parser = staticmethod(parser.get_unstructured)
|
||||
|
||||
@classmethod
|
||||
def parse(cls, value, kwds):
|
||||
kwds['parse_tree'] = cls.value_parser(value)
|
||||
kwds['decoded'] = str(kwds['parse_tree'])
|
||||
|
||||
|
||||
class UniqueUnstructuredHeader(UnstructuredHeader):
|
||||
|
||||
max_count = 1
|
||||
|
||||
|
||||
class DateHeader:
|
||||
|
||||
"""Header whose value consists of a single timestamp.
|
||||
|
||||
Provides an additional attribute, datetime, which is either an aware
|
||||
datetime using a timezone, or a naive datetime if the timezone
|
||||
in the input string is -0000. Also accepts a datetime as input.
|
||||
The 'value' attribute is the normalized form of the timestamp,
|
||||
which means it is the output of format_datetime on the datetime.
|
||||
"""
|
||||
|
||||
max_count = None
|
||||
|
||||
# This is used only for folding, not for creating 'decoded'.
|
||||
value_parser = staticmethod(parser.get_unstructured)
|
||||
|
||||
@classmethod
|
||||
def parse(cls, value, kwds):
|
||||
if not value:
|
||||
kwds['defects'].append(errors.HeaderMissingRequiredValue())
|
||||
kwds['datetime'] = None
|
||||
kwds['decoded'] = ''
|
||||
kwds['parse_tree'] = parser.TokenList()
|
||||
return
|
||||
if isinstance(value, str):
|
||||
value = utils.parsedate_to_datetime(value)
|
||||
kwds['datetime'] = value
|
||||
kwds['decoded'] = utils.format_datetime(kwds['datetime'])
|
||||
kwds['parse_tree'] = cls.value_parser(kwds['decoded'])
|
||||
|
||||
def init(self, *args, **kw):
|
||||
self._datetime = kw.pop('datetime')
|
||||
super().init(*args, **kw)
|
||||
|
||||
@property
|
||||
def datetime(self):
|
||||
return self._datetime
|
||||
|
||||
|
||||
class UniqueDateHeader(DateHeader):
|
||||
|
||||
max_count = 1
|
||||
|
||||
|
||||
class AddressHeader:
|
||||
|
||||
max_count = None
|
||||
|
||||
@staticmethod
|
||||
def value_parser(value):
|
||||
address_list, value = parser.get_address_list(value)
|
||||
assert not value, 'this should not happen'
|
||||
return address_list
|
||||
|
||||
@classmethod
|
||||
def parse(cls, value, kwds):
|
||||
if isinstance(value, str):
|
||||
# We are translating here from the RFC language (address/mailbox)
|
||||
# to our API language (group/address).
|
||||
kwds['parse_tree'] = address_list = cls.value_parser(value)
|
||||
groups = []
|
||||
for addr in address_list.addresses:
|
||||
groups.append(Group(addr.display_name,
|
||||
[Address(mb.display_name or '',
|
||||
mb.local_part or '',
|
||||
mb.domain or '')
|
||||
for mb in addr.all_mailboxes]))
|
||||
defects = list(address_list.all_defects)
|
||||
else:
|
||||
# Assume it is Address/Group stuff
|
||||
if not hasattr(value, '__iter__'):
|
||||
value = [value]
|
||||
groups = [Group(None, [item]) if not hasattr(item, 'addresses')
|
||||
else item
|
||||
for item in value]
|
||||
defects = []
|
||||
kwds['groups'] = groups
|
||||
kwds['defects'] = defects
|
||||
kwds['decoded'] = ', '.join([str(item) for item in groups])
|
||||
if 'parse_tree' not in kwds:
|
||||
kwds['parse_tree'] = cls.value_parser(kwds['decoded'])
|
||||
|
||||
def init(self, *args, **kw):
|
||||
self._groups = tuple(kw.pop('groups'))
|
||||
self._addresses = None
|
||||
super().init(*args, **kw)
|
||||
|
||||
@property
|
||||
def groups(self):
|
||||
return self._groups
|
||||
|
||||
@property
|
||||
def addresses(self):
|
||||
if self._addresses is None:
|
||||
self._addresses = tuple([address for group in self._groups
|
||||
for address in group.addresses])
|
||||
return self._addresses
|
||||
|
||||
|
||||
class UniqueAddressHeader(AddressHeader):
|
||||
|
||||
max_count = 1
|
||||
|
||||
|
||||
class SingleAddressHeader(AddressHeader):
|
||||
|
||||
@property
|
||||
def address(self):
|
||||
if len(self.addresses)!=1:
|
||||
raise ValueError(("value of single address header {} is not "
|
||||
"a single address").format(self.name))
|
||||
return self.addresses[0]
|
||||
|
||||
|
||||
class UniqueSingleAddressHeader(SingleAddressHeader):
|
||||
|
||||
max_count = 1
|
||||
|
||||
|
||||
class MIMEVersionHeader:
|
||||
|
||||
max_count = 1
|
||||
|
||||
value_parser = staticmethod(parser.parse_mime_version)
|
||||
|
||||
@classmethod
|
||||
def parse(cls, value, kwds):
|
||||
kwds['parse_tree'] = parse_tree = cls.value_parser(value)
|
||||
kwds['decoded'] = str(parse_tree)
|
||||
kwds['defects'].extend(parse_tree.all_defects)
|
||||
kwds['major'] = None if parse_tree.minor is None else parse_tree.major
|
||||
kwds['minor'] = parse_tree.minor
|
||||
if parse_tree.minor is not None:
|
||||
kwds['version'] = '{}.{}'.format(kwds['major'], kwds['minor'])
|
||||
else:
|
||||
kwds['version'] = None
|
||||
|
||||
def init(self, *args, **kw):
|
||||
self._version = kw.pop('version')
|
||||
self._major = kw.pop('major')
|
||||
self._minor = kw.pop('minor')
|
||||
super().init(*args, **kw)
|
||||
|
||||
@property
|
||||
def major(self):
|
||||
return self._major
|
||||
|
||||
@property
|
||||
def minor(self):
|
||||
return self._minor
|
||||
|
||||
@property
|
||||
def version(self):
|
||||
return self._version
|
||||
|
||||
|
||||
class ParameterizedMIMEHeader:
|
||||
|
||||
# Mixin that handles the params dict. Must be subclassed and
|
||||
# a property value_parser for the specific header provided.
|
||||
|
||||
max_count = 1
|
||||
|
||||
@classmethod
|
||||
def parse(cls, value, kwds):
|
||||
kwds['parse_tree'] = parse_tree = cls.value_parser(value)
|
||||
kwds['decoded'] = str(parse_tree)
|
||||
kwds['defects'].extend(parse_tree.all_defects)
|
||||
if parse_tree.params is None:
|
||||
kwds['params'] = {}
|
||||
else:
|
||||
# The MIME RFCs specify that parameter ordering is arbitrary.
|
||||
kwds['params'] = {utils._sanitize(name).lower():
|
||||
utils._sanitize(value)
|
||||
for name, value in parse_tree.params}
|
||||
|
||||
def init(self, *args, **kw):
|
||||
self._params = kw.pop('params')
|
||||
super().init(*args, **kw)
|
||||
|
||||
@property
|
||||
def params(self):
|
||||
return MappingProxyType(self._params)
|
||||
|
||||
|
||||
class ContentTypeHeader(ParameterizedMIMEHeader):
|
||||
|
||||
value_parser = staticmethod(parser.parse_content_type_header)
|
||||
|
||||
def init(self, *args, **kw):
|
||||
super().init(*args, **kw)
|
||||
self._maintype = utils._sanitize(self._parse_tree.maintype)
|
||||
self._subtype = utils._sanitize(self._parse_tree.subtype)
|
||||
|
||||
@property
|
||||
def maintype(self):
|
||||
return self._maintype
|
||||
|
||||
@property
|
||||
def subtype(self):
|
||||
return self._subtype
|
||||
|
||||
@property
|
||||
def content_type(self):
|
||||
return self.maintype + '/' + self.subtype
|
||||
|
||||
|
||||
class ContentDispositionHeader(ParameterizedMIMEHeader):
|
||||
|
||||
value_parser = staticmethod(parser.parse_content_disposition_header)
|
||||
|
||||
def init(self, *args, **kw):
|
||||
super().init(*args, **kw)
|
||||
cd = self._parse_tree.content_disposition
|
||||
self._content_disposition = cd if cd is None else utils._sanitize(cd)
|
||||
|
||||
@property
|
||||
def content_disposition(self):
|
||||
return self._content_disposition
|
||||
|
||||
|
||||
class ContentTransferEncodingHeader:
|
||||
|
||||
max_count = 1
|
||||
|
||||
value_parser = staticmethod(parser.parse_content_transfer_encoding_header)
|
||||
|
||||
@classmethod
|
||||
def parse(cls, value, kwds):
|
||||
kwds['parse_tree'] = parse_tree = cls.value_parser(value)
|
||||
kwds['decoded'] = str(parse_tree)
|
||||
kwds['defects'].extend(parse_tree.all_defects)
|
||||
|
||||
def init(self, *args, **kw):
|
||||
super().init(*args, **kw)
|
||||
self._cte = utils._sanitize(self._parse_tree.cte)
|
||||
|
||||
@property
|
||||
def cte(self):
|
||||
return self._cte
|
||||
|
||||
|
||||
# The header factory #
|
||||
|
||||
_default_header_map = {
|
||||
'subject': UniqueUnstructuredHeader,
|
||||
'date': UniqueDateHeader,
|
||||
'resent-date': DateHeader,
|
||||
'orig-date': UniqueDateHeader,
|
||||
'sender': UniqueSingleAddressHeader,
|
||||
'resent-sender': SingleAddressHeader,
|
||||
'to': UniqueAddressHeader,
|
||||
'resent-to': AddressHeader,
|
||||
'cc': UniqueAddressHeader,
|
||||
'resent-cc': AddressHeader,
|
||||
'bcc': UniqueAddressHeader,
|
||||
'resent-bcc': AddressHeader,
|
||||
'from': UniqueAddressHeader,
|
||||
'resent-from': AddressHeader,
|
||||
'reply-to': UniqueAddressHeader,
|
||||
'mime-version': MIMEVersionHeader,
|
||||
'content-type': ContentTypeHeader,
|
||||
'content-disposition': ContentDispositionHeader,
|
||||
'content-transfer-encoding': ContentTransferEncodingHeader,
|
||||
}
|
||||
|
||||
class HeaderRegistry:
|
||||
|
||||
"""A header_factory and header registry."""
|
||||
|
||||
def __init__(self, base_class=BaseHeader, default_class=UnstructuredHeader,
|
||||
use_default_map=True):
|
||||
"""Create a header_factory that works with the Policy API.
|
||||
|
||||
base_class is the class that will be the last class in the created
|
||||
header class's __bases__ list. default_class is the class that will be
|
||||
used if "name" (see __call__) does not appear in the registry.
|
||||
use_default_map controls whether or not the default mapping of names to
|
||||
specialized classes is copied in to the registry when the factory is
|
||||
created. The default is True.
|
||||
|
||||
"""
|
||||
self.registry = {}
|
||||
self.base_class = base_class
|
||||
self.default_class = default_class
|
||||
if use_default_map:
|
||||
self.registry.update(_default_header_map)
|
||||
|
||||
def map_to_type(self, name, cls):
|
||||
"""Register cls as the specialized class for handling "name" headers.
|
||||
|
||||
"""
|
||||
self.registry[name.lower()] = cls
|
||||
|
||||
def __getitem__(self, name):
|
||||
cls = self.registry.get(name.lower(), self.default_class)
|
||||
return type('_'+cls.__name__, (cls, self.base_class), {})
|
||||
|
||||
def __call__(self, name, value):
|
||||
"""Create a header instance for header 'name' from 'value'.
|
||||
|
||||
Creates a header instance by creating a specialized class for parsing
|
||||
and representing the specified header by combining the factory
|
||||
base_class with a specialized class from the registry or the
|
||||
default_class, and passing the name and value to the constructed
|
||||
class's constructor.
|
||||
|
||||
"""
|
||||
return self[name](name, value)
|
71
third_party/python/Lib/email/iterators.py
vendored
Normal file
71
third_party/python/Lib/email/iterators.py
vendored
Normal file
|
@ -0,0 +1,71 @@
|
|||
# Copyright (C) 2001-2006 Python Software Foundation
|
||||
# Author: Barry Warsaw
|
||||
# Contact: email-sig@python.org
|
||||
|
||||
"""Various types of useful iterators and generators."""
|
||||
|
||||
__all__ = [
|
||||
'body_line_iterator',
|
||||
'typed_subpart_iterator',
|
||||
'walk',
|
||||
# Do not include _structure() since it's part of the debugging API.
|
||||
]
|
||||
|
||||
import sys
|
||||
from io import StringIO
|
||||
|
||||
|
||||
|
||||
# This function will become a method of the Message class
|
||||
def walk(self):
|
||||
"""Walk over the message tree, yielding each subpart.
|
||||
|
||||
The walk is performed in depth-first order. This method is a
|
||||
generator.
|
||||
"""
|
||||
yield self
|
||||
if self.is_multipart():
|
||||
for subpart in self.get_payload():
|
||||
yield from subpart.walk()
|
||||
|
||||
|
||||
|
||||
# These two functions are imported into the Iterators.py interface module.
|
||||
def body_line_iterator(msg, decode=False):
|
||||
"""Iterate over the parts, returning string payloads line-by-line.
|
||||
|
||||
Optional decode (default False) is passed through to .get_payload().
|
||||
"""
|
||||
for subpart in msg.walk():
|
||||
payload = subpart.get_payload(decode=decode)
|
||||
if isinstance(payload, str):
|
||||
yield from StringIO(payload)
|
||||
|
||||
|
||||
def typed_subpart_iterator(msg, maintype='text', subtype=None):
|
||||
"""Iterate over the subparts with a given MIME type.
|
||||
|
||||
Use `maintype' as the main MIME type to match against; this defaults to
|
||||
"text". Optional `subtype' is the MIME subtype to match against; if
|
||||
omitted, only the main type is matched.
|
||||
"""
|
||||
for subpart in msg.walk():
|
||||
if subpart.get_content_maintype() == maintype:
|
||||
if subtype is None or subpart.get_content_subtype() == subtype:
|
||||
yield subpart
|
||||
|
||||
|
||||
|
||||
def _structure(msg, fp=None, level=0, include_default=False):
|
||||
"""A handy debugging aid"""
|
||||
if fp is None:
|
||||
fp = sys.stdout
|
||||
tab = ' ' * (level * 4)
|
||||
print(tab + msg.get_content_type(), end='', file=fp)
|
||||
if include_default:
|
||||
print(' [%s]' % msg.get_default_type(), file=fp)
|
||||
else:
|
||||
print(file=fp)
|
||||
if msg.is_multipart():
|
||||
for subpart in msg.get_payload():
|
||||
_structure(subpart, fp, level+1, include_default)
|
1164
third_party/python/Lib/email/message.py
vendored
Normal file
1164
third_party/python/Lib/email/message.py
vendored
Normal file
File diff suppressed because it is too large
Load diff
0
third_party/python/Lib/email/mime/__init__.py
vendored
Normal file
0
third_party/python/Lib/email/mime/__init__.py
vendored
Normal file
37
third_party/python/Lib/email/mime/application.py
vendored
Normal file
37
third_party/python/Lib/email/mime/application.py
vendored
Normal file
|
@ -0,0 +1,37 @@
|
|||
# Copyright (C) 2001-2006 Python Software Foundation
|
||||
# Author: Keith Dart
|
||||
# Contact: email-sig@python.org
|
||||
|
||||
"""Class representing application/* type MIME documents."""
|
||||
|
||||
__all__ = ["MIMEApplication"]
|
||||
|
||||
from email import encoders
|
||||
from email.mime.nonmultipart import MIMENonMultipart
|
||||
|
||||
|
||||
class MIMEApplication(MIMENonMultipart):
|
||||
"""Class for generating application/* MIME documents."""
|
||||
|
||||
def __init__(self, _data, _subtype='octet-stream',
|
||||
_encoder=encoders.encode_base64, *, policy=None, **_params):
|
||||
"""Create an application/* type MIME document.
|
||||
|
||||
_data is a string containing the raw application data.
|
||||
|
||||
_subtype is the MIME content type subtype, defaulting to
|
||||
'octet-stream'.
|
||||
|
||||
_encoder is a function which will perform the actual encoding for
|
||||
transport of the application data, defaulting to base64 encoding.
|
||||
|
||||
Any additional keyword arguments are passed to the base class
|
||||
constructor, which turns them into parameters on the Content-Type
|
||||
header.
|
||||
"""
|
||||
if _subtype is None:
|
||||
raise TypeError('Invalid application MIME subtype')
|
||||
MIMENonMultipart.__init__(self, 'application', _subtype, policy=policy,
|
||||
**_params)
|
||||
self.set_payload(_data)
|
||||
_encoder(self)
|
74
third_party/python/Lib/email/mime/audio.py
vendored
Normal file
74
third_party/python/Lib/email/mime/audio.py
vendored
Normal file
|
@ -0,0 +1,74 @@
|
|||
# Copyright (C) 2001-2007 Python Software Foundation
|
||||
# Author: Anthony Baxter
|
||||
# Contact: email-sig@python.org
|
||||
|
||||
"""Class representing audio/* type MIME documents."""
|
||||
|
||||
__all__ = ['MIMEAudio']
|
||||
|
||||
import sndhdr
|
||||
|
||||
from io import BytesIO
|
||||
from email import encoders
|
||||
from email.mime.nonmultipart import MIMENonMultipart
|
||||
|
||||
|
||||
|
||||
_sndhdr_MIMEmap = {'au' : 'basic',
|
||||
'wav' :'x-wav',
|
||||
'aiff':'x-aiff',
|
||||
'aifc':'x-aiff',
|
||||
}
|
||||
|
||||
# There are others in sndhdr that don't have MIME types. :(
|
||||
# Additional ones to be added to sndhdr? midi, mp3, realaudio, wma??
|
||||
def _whatsnd(data):
|
||||
"""Try to identify a sound file type.
|
||||
|
||||
sndhdr.what() has a pretty cruddy interface, unfortunately. This is why
|
||||
we re-do it here. It would be easier to reverse engineer the Unix 'file'
|
||||
command and use the standard 'magic' file, as shipped with a modern Unix.
|
||||
"""
|
||||
hdr = data[:512]
|
||||
fakefile = BytesIO(hdr)
|
||||
for testfn in sndhdr.tests:
|
||||
res = testfn(hdr, fakefile)
|
||||
if res is not None:
|
||||
return _sndhdr_MIMEmap.get(res[0])
|
||||
return None
|
||||
|
||||
|
||||
|
||||
class MIMEAudio(MIMENonMultipart):
|
||||
"""Class for generating audio/* MIME documents."""
|
||||
|
||||
def __init__(self, _audiodata, _subtype=None,
|
||||
_encoder=encoders.encode_base64, *, policy=None, **_params):
|
||||
"""Create an audio/* type MIME document.
|
||||
|
||||
_audiodata is a string containing the raw audio data. If this data
|
||||
can be decoded by the standard Python `sndhdr' module, then the
|
||||
subtype will be automatically included in the Content-Type header.
|
||||
Otherwise, you can specify the specific audio subtype via the
|
||||
_subtype parameter. If _subtype is not given, and no subtype can be
|
||||
guessed, a TypeError is raised.
|
||||
|
||||
_encoder is a function which will perform the actual encoding for
|
||||
transport of the image data. It takes one argument, which is this
|
||||
Image instance. It should use get_payload() and set_payload() to
|
||||
change the payload to the encoded form. It should also add any
|
||||
Content-Transfer-Encoding or other headers to the message as
|
||||
necessary. The default encoding is Base64.
|
||||
|
||||
Any additional keyword arguments are passed to the base class
|
||||
constructor, which turns them into parameters on the Content-Type
|
||||
header.
|
||||
"""
|
||||
if _subtype is None:
|
||||
_subtype = _whatsnd(_audiodata)
|
||||
if _subtype is None:
|
||||
raise TypeError('Could not find audio MIME subtype')
|
||||
MIMENonMultipart.__init__(self, 'audio', _subtype, policy=policy,
|
||||
**_params)
|
||||
self.set_payload(_audiodata)
|
||||
_encoder(self)
|
30
third_party/python/Lib/email/mime/base.py
vendored
Normal file
30
third_party/python/Lib/email/mime/base.py
vendored
Normal file
|
@ -0,0 +1,30 @@
|
|||
# Copyright (C) 2001-2006 Python Software Foundation
|
||||
# Author: Barry Warsaw
|
||||
# Contact: email-sig@python.org
|
||||
|
||||
"""Base class for MIME specializations."""
|
||||
|
||||
__all__ = ['MIMEBase']
|
||||
|
||||
import email.policy
|
||||
|
||||
from email import message
|
||||
|
||||
|
||||
|
||||
class MIMEBase(message.Message):
|
||||
"""Base class for MIME specializations."""
|
||||
|
||||
def __init__(self, _maintype, _subtype, *, policy=None, **_params):
|
||||
"""This constructor adds a Content-Type: and a MIME-Version: header.
|
||||
|
||||
The Content-Type: header is taken from the _maintype and _subtype
|
||||
arguments. Additional parameters for this header are taken from the
|
||||
keyword arguments.
|
||||
"""
|
||||
if policy is None:
|
||||
policy = email.policy.compat32
|
||||
message.Message.__init__(self, policy=policy)
|
||||
ctype = '%s/%s' % (_maintype, _subtype)
|
||||
self.add_header('Content-Type', ctype, **_params)
|
||||
self['MIME-Version'] = '1.0'
|
47
third_party/python/Lib/email/mime/image.py
vendored
Normal file
47
third_party/python/Lib/email/mime/image.py
vendored
Normal file
|
@ -0,0 +1,47 @@
|
|||
# Copyright (C) 2001-2006 Python Software Foundation
|
||||
# Author: Barry Warsaw
|
||||
# Contact: email-sig@python.org
|
||||
|
||||
"""Class representing image/* type MIME documents."""
|
||||
|
||||
__all__ = ['MIMEImage']
|
||||
|
||||
import imghdr
|
||||
|
||||
from email import encoders
|
||||
from email.mime.nonmultipart import MIMENonMultipart
|
||||
|
||||
|
||||
|
||||
class MIMEImage(MIMENonMultipart):
|
||||
"""Class for generating image/* type MIME documents."""
|
||||
|
||||
def __init__(self, _imagedata, _subtype=None,
|
||||
_encoder=encoders.encode_base64, *, policy=None, **_params):
|
||||
"""Create an image/* type MIME document.
|
||||
|
||||
_imagedata is a string containing the raw image data. If this data
|
||||
can be decoded by the standard Python `imghdr' module, then the
|
||||
subtype will be automatically included in the Content-Type header.
|
||||
Otherwise, you can specify the specific image subtype via the _subtype
|
||||
parameter.
|
||||
|
||||
_encoder is a function which will perform the actual encoding for
|
||||
transport of the image data. It takes one argument, which is this
|
||||
Image instance. It should use get_payload() and set_payload() to
|
||||
change the payload to the encoded form. It should also add any
|
||||
Content-Transfer-Encoding or other headers to the message as
|
||||
necessary. The default encoding is Base64.
|
||||
|
||||
Any additional keyword arguments are passed to the base class
|
||||
constructor, which turns them into parameters on the Content-Type
|
||||
header.
|
||||
"""
|
||||
if _subtype is None:
|
||||
_subtype = imghdr.what(None, _imagedata)
|
||||
if _subtype is None:
|
||||
raise TypeError('Could not guess image MIME subtype')
|
||||
MIMENonMultipart.__init__(self, 'image', _subtype, policy=policy,
|
||||
**_params)
|
||||
self.set_payload(_imagedata)
|
||||
_encoder(self)
|
34
third_party/python/Lib/email/mime/message.py
vendored
Normal file
34
third_party/python/Lib/email/mime/message.py
vendored
Normal file
|
@ -0,0 +1,34 @@
|
|||
# Copyright (C) 2001-2006 Python Software Foundation
|
||||
# Author: Barry Warsaw
|
||||
# Contact: email-sig@python.org
|
||||
|
||||
"""Class representing message/* MIME documents."""
|
||||
|
||||
__all__ = ['MIMEMessage']
|
||||
|
||||
from email import message
|
||||
from email.mime.nonmultipart import MIMENonMultipart
|
||||
|
||||
|
||||
|
||||
class MIMEMessage(MIMENonMultipart):
|
||||
"""Class representing message/* MIME documents."""
|
||||
|
||||
def __init__(self, _msg, _subtype='rfc822', *, policy=None):
|
||||
"""Create a message/* type MIME document.
|
||||
|
||||
_msg is a message object and must be an instance of Message, or a
|
||||
derived class of Message, otherwise a TypeError is raised.
|
||||
|
||||
Optional _subtype defines the subtype of the contained message. The
|
||||
default is "rfc822" (this is defined by the MIME standard, even though
|
||||
the term "rfc822" is technically outdated by RFC 2822).
|
||||
"""
|
||||
MIMENonMultipart.__init__(self, 'message', _subtype, policy=policy)
|
||||
if not isinstance(_msg, message.Message):
|
||||
raise TypeError('Argument is not an instance of Message')
|
||||
# It's convenient to use this base class method. We need to do it
|
||||
# this way or we'll get an exception
|
||||
message.Message.attach(self, _msg)
|
||||
# And be sure our default type is set correctly
|
||||
self.set_default_type('message/rfc822')
|
48
third_party/python/Lib/email/mime/multipart.py
vendored
Normal file
48
third_party/python/Lib/email/mime/multipart.py
vendored
Normal file
|
@ -0,0 +1,48 @@
|
|||
# Copyright (C) 2002-2006 Python Software Foundation
|
||||
# Author: Barry Warsaw
|
||||
# Contact: email-sig@python.org
|
||||
|
||||
"""Base class for MIME multipart/* type messages."""
|
||||
|
||||
__all__ = ['MIMEMultipart']
|
||||
|
||||
from email.mime.base import MIMEBase
|
||||
|
||||
|
||||
|
||||
class MIMEMultipart(MIMEBase):
|
||||
"""Base class for MIME multipart/* type messages."""
|
||||
|
||||
def __init__(self, _subtype='mixed', boundary=None, _subparts=None,
|
||||
*, policy=None,
|
||||
**_params):
|
||||
"""Creates a multipart/* type message.
|
||||
|
||||
By default, creates a multipart/mixed message, with proper
|
||||
Content-Type and MIME-Version headers.
|
||||
|
||||
_subtype is the subtype of the multipart content type, defaulting to
|
||||
`mixed'.
|
||||
|
||||
boundary is the multipart boundary string. By default it is
|
||||
calculated as needed.
|
||||
|
||||
_subparts is a sequence of initial subparts for the payload. It
|
||||
must be an iterable object, such as a list. You can always
|
||||
attach new subparts to the message by using the attach() method.
|
||||
|
||||
Additional parameters for the Content-Type header are taken from the
|
||||
keyword arguments (or passed into the _params argument).
|
||||
"""
|
||||
MIMEBase.__init__(self, 'multipart', _subtype, policy=policy, **_params)
|
||||
|
||||
# Initialise _payload to an empty list as the Message superclass's
|
||||
# implementation of is_multipart assumes that _payload is a list for
|
||||
# multipart messages.
|
||||
self._payload = []
|
||||
|
||||
if _subparts:
|
||||
for p in _subparts:
|
||||
self.attach(p)
|
||||
if boundary:
|
||||
self.set_boundary(boundary)
|
22
third_party/python/Lib/email/mime/nonmultipart.py
vendored
Normal file
22
third_party/python/Lib/email/mime/nonmultipart.py
vendored
Normal file
|
@ -0,0 +1,22 @@
|
|||
# Copyright (C) 2002-2006 Python Software Foundation
|
||||
# Author: Barry Warsaw
|
||||
# Contact: email-sig@python.org
|
||||
|
||||
"""Base class for MIME type messages that are not multipart."""
|
||||
|
||||
__all__ = ['MIMENonMultipart']
|
||||
|
||||
from email import errors
|
||||
from email.mime.base import MIMEBase
|
||||
|
||||
|
||||
|
||||
class MIMENonMultipart(MIMEBase):
|
||||
"""Base class for MIME non-multipart type messages."""
|
||||
|
||||
def attach(self, payload):
|
||||
# The public API prohibits attaching multiple subparts to MIMEBase
|
||||
# derived subtypes since none of them are, by definition, of content
|
||||
# type multipart/*
|
||||
raise errors.MultipartConversionError(
|
||||
'Cannot attach additional subparts to non-multipart/*')
|
42
third_party/python/Lib/email/mime/text.py
vendored
Normal file
42
third_party/python/Lib/email/mime/text.py
vendored
Normal file
|
@ -0,0 +1,42 @@
|
|||
# Copyright (C) 2001-2006 Python Software Foundation
|
||||
# Author: Barry Warsaw
|
||||
# Contact: email-sig@python.org
|
||||
|
||||
"""Class representing text/* type MIME documents."""
|
||||
|
||||
__all__ = ['MIMEText']
|
||||
|
||||
from email.charset import Charset
|
||||
from email.mime.nonmultipart import MIMENonMultipart
|
||||
|
||||
|
||||
|
||||
class MIMEText(MIMENonMultipart):
|
||||
"""Class for generating text/* type MIME documents."""
|
||||
|
||||
def __init__(self, _text, _subtype='plain', _charset=None, *, policy=None):
|
||||
"""Create a text/* type MIME document.
|
||||
|
||||
_text is the string for this message object.
|
||||
|
||||
_subtype is the MIME sub content type, defaulting to "plain".
|
||||
|
||||
_charset is the character set parameter added to the Content-Type
|
||||
header. This defaults to "us-ascii". Note that as a side-effect, the
|
||||
Content-Transfer-Encoding header will also be set.
|
||||
"""
|
||||
|
||||
# If no _charset was specified, check to see if there are non-ascii
|
||||
# characters present. If not, use 'us-ascii', otherwise use utf-8.
|
||||
# XXX: This can be removed once #7304 is fixed.
|
||||
if _charset is None:
|
||||
try:
|
||||
_text.encode('us-ascii')
|
||||
_charset = 'us-ascii'
|
||||
except UnicodeEncodeError:
|
||||
_charset = 'utf-8'
|
||||
|
||||
MIMENonMultipart.__init__(self, 'text', _subtype, policy=policy,
|
||||
**{'charset': str(_charset)})
|
||||
|
||||
self.set_payload(_text, _charset)
|
131
third_party/python/Lib/email/parser.py
vendored
Normal file
131
third_party/python/Lib/email/parser.py
vendored
Normal file
|
@ -0,0 +1,131 @@
|
|||
# Copyright (C) 2001-2007 Python Software Foundation
|
||||
# Author: Barry Warsaw, Thomas Wouters, Anthony Baxter
|
||||
# Contact: email-sig@python.org
|
||||
|
||||
"""A parser of RFC 2822 and MIME email messages."""
|
||||
|
||||
__all__ = ['Parser', 'HeaderParser', 'BytesParser', 'BytesHeaderParser',
|
||||
'FeedParser', 'BytesFeedParser']
|
||||
|
||||
from io import StringIO, TextIOWrapper
|
||||
|
||||
from email.feedparser import FeedParser, BytesFeedParser
|
||||
from email._policybase import compat32
|
||||
|
||||
|
||||
class Parser:
|
||||
def __init__(self, _class=None, *, policy=compat32):
|
||||
"""Parser of RFC 2822 and MIME email messages.
|
||||
|
||||
Creates an in-memory object tree representing the email message, which
|
||||
can then be manipulated and turned over to a Generator to return the
|
||||
textual representation of the message.
|
||||
|
||||
The string must be formatted as a block of RFC 2822 headers and header
|
||||
continuation lines, optionally preceded by a `Unix-from' header. The
|
||||
header block is terminated either by the end of the string or by a
|
||||
blank line.
|
||||
|
||||
_class is the class to instantiate for new message objects when they
|
||||
must be created. This class must have a constructor that can take
|
||||
zero arguments. Default is Message.Message.
|
||||
|
||||
The policy keyword specifies a policy object that controls a number of
|
||||
aspects of the parser's operation. The default policy maintains
|
||||
backward compatibility.
|
||||
|
||||
"""
|
||||
self._class = _class
|
||||
self.policy = policy
|
||||
|
||||
def parse(self, fp, headersonly=False):
|
||||
"""Create a message structure from the data in a file.
|
||||
|
||||
Reads all the data from the file and returns the root of the message
|
||||
structure. Optional headersonly is a flag specifying whether to stop
|
||||
parsing after reading the headers or not. The default is False,
|
||||
meaning it parses the entire contents of the file.
|
||||
"""
|
||||
feedparser = FeedParser(self._class, policy=self.policy)
|
||||
if headersonly:
|
||||
feedparser._set_headersonly()
|
||||
while True:
|
||||
data = fp.read(8192)
|
||||
if not data:
|
||||
break
|
||||
feedparser.feed(data)
|
||||
return feedparser.close()
|
||||
|
||||
def parsestr(self, text, headersonly=False):
|
||||
"""Create a message structure from a string.
|
||||
|
||||
Returns the root of the message structure. Optional headersonly is a
|
||||
flag specifying whether to stop parsing after reading the headers or
|
||||
not. The default is False, meaning it parses the entire contents of
|
||||
the file.
|
||||
"""
|
||||
return self.parse(StringIO(text), headersonly=headersonly)
|
||||
|
||||
|
||||
|
||||
class HeaderParser(Parser):
|
||||
def parse(self, fp, headersonly=True):
|
||||
return Parser.parse(self, fp, True)
|
||||
|
||||
def parsestr(self, text, headersonly=True):
|
||||
return Parser.parsestr(self, text, True)
|
||||
|
||||
|
||||
class BytesParser:
|
||||
|
||||
def __init__(self, *args, **kw):
|
||||
"""Parser of binary RFC 2822 and MIME email messages.
|
||||
|
||||
Creates an in-memory object tree representing the email message, which
|
||||
can then be manipulated and turned over to a Generator to return the
|
||||
textual representation of the message.
|
||||
|
||||
The input must be formatted as a block of RFC 2822 headers and header
|
||||
continuation lines, optionally preceded by a `Unix-from' header. The
|
||||
header block is terminated either by the end of the input or by a
|
||||
blank line.
|
||||
|
||||
_class is the class to instantiate for new message objects when they
|
||||
must be created. This class must have a constructor that can take
|
||||
zero arguments. Default is Message.Message.
|
||||
"""
|
||||
self.parser = Parser(*args, **kw)
|
||||
|
||||
def parse(self, fp, headersonly=False):
|
||||
"""Create a message structure from the data in a binary file.
|
||||
|
||||
Reads all the data from the file and returns the root of the message
|
||||
structure. Optional headersonly is a flag specifying whether to stop
|
||||
parsing after reading the headers or not. The default is False,
|
||||
meaning it parses the entire contents of the file.
|
||||
"""
|
||||
fp = TextIOWrapper(fp, encoding='ascii', errors='surrogateescape')
|
||||
try:
|
||||
return self.parser.parse(fp, headersonly)
|
||||
finally:
|
||||
fp.detach()
|
||||
|
||||
|
||||
def parsebytes(self, text, headersonly=False):
|
||||
"""Create a message structure from a byte string.
|
||||
|
||||
Returns the root of the message structure. Optional headersonly is a
|
||||
flag specifying whether to stop parsing after reading the headers or
|
||||
not. The default is False, meaning it parses the entire contents of
|
||||
the file.
|
||||
"""
|
||||
text = text.decode('ASCII', errors='surrogateescape')
|
||||
return self.parser.parsestr(text, headersonly)
|
||||
|
||||
|
||||
class BytesHeaderParser(BytesParser):
|
||||
def parse(self, fp, headersonly=True):
|
||||
return BytesParser.parse(self, fp, headersonly=True)
|
||||
|
||||
def parsebytes(self, text, headersonly=True):
|
||||
return BytesParser.parsebytes(self, text, headersonly=True)
|
223
third_party/python/Lib/email/policy.py
vendored
Normal file
223
third_party/python/Lib/email/policy.py
vendored
Normal file
|
@ -0,0 +1,223 @@
|
|||
"""This will be the home for the policy that hooks in the new
|
||||
code that adds all the email6 features.
|
||||
"""
|
||||
|
||||
import re
|
||||
from email._policybase import Policy, Compat32, compat32, _extend_docstrings
|
||||
from email.utils import _has_surrogates
|
||||
from email.headerregistry import HeaderRegistry as HeaderRegistry
|
||||
from email.contentmanager import raw_data_manager
|
||||
from email.message import EmailMessage
|
||||
|
||||
__all__ = [
|
||||
'Compat32',
|
||||
'compat32',
|
||||
'Policy',
|
||||
'EmailPolicy',
|
||||
'default',
|
||||
'strict',
|
||||
'SMTP',
|
||||
'HTTP',
|
||||
]
|
||||
|
||||
linesep_splitter = re.compile(r'\n|\r')
|
||||
|
||||
@_extend_docstrings
|
||||
class EmailPolicy(Policy):
|
||||
|
||||
"""+
|
||||
PROVISIONAL
|
||||
|
||||
The API extensions enabled by this policy are currently provisional.
|
||||
Refer to the documentation for details.
|
||||
|
||||
This policy adds new header parsing and folding algorithms. Instead of
|
||||
simple strings, headers are custom objects with custom attributes
|
||||
depending on the type of the field. The folding algorithm fully
|
||||
implements RFCs 2047 and 5322.
|
||||
|
||||
In addition to the settable attributes listed above that apply to
|
||||
all Policies, this policy adds the following additional attributes:
|
||||
|
||||
utf8 -- if False (the default) message headers will be
|
||||
serialized as ASCII, using encoded words to encode
|
||||
any non-ASCII characters in the source strings. If
|
||||
True, the message headers will be serialized using
|
||||
utf8 and will not contain encoded words (see RFC
|
||||
6532 for more on this serialization format).
|
||||
|
||||
refold_source -- if the value for a header in the Message object
|
||||
came from the parsing of some source, this attribute
|
||||
indicates whether or not a generator should refold
|
||||
that value when transforming the message back into
|
||||
stream form. The possible values are:
|
||||
|
||||
none -- all source values use original folding
|
||||
long -- source values that have any line that is
|
||||
longer than max_line_length will be
|
||||
refolded
|
||||
all -- all values are refolded.
|
||||
|
||||
The default is 'long'.
|
||||
|
||||
header_factory -- a callable that takes two arguments, 'name' and
|
||||
'value', where 'name' is a header field name and
|
||||
'value' is an unfolded header field value, and
|
||||
returns a string-like object that represents that
|
||||
header. A default header_factory is provided that
|
||||
understands some of the RFC5322 header field types.
|
||||
(Currently address fields and date fields have
|
||||
special treatment, while all other fields are
|
||||
treated as unstructured. This list will be
|
||||
completed before the extension is marked stable.)
|
||||
|
||||
content_manager -- an object with at least two methods: get_content
|
||||
and set_content. When the get_content or
|
||||
set_content method of a Message object is called,
|
||||
it calls the corresponding method of this object,
|
||||
passing it the message object as its first argument,
|
||||
and any arguments or keywords that were passed to
|
||||
it as additional arguments. The default
|
||||
content_manager is
|
||||
:data:`~email.contentmanager.raw_data_manager`.
|
||||
|
||||
"""
|
||||
|
||||
message_factory = EmailMessage
|
||||
utf8 = False
|
||||
refold_source = 'long'
|
||||
header_factory = HeaderRegistry()
|
||||
content_manager = raw_data_manager
|
||||
|
||||
def __init__(self, **kw):
|
||||
# Ensure that each new instance gets a unique header factory
|
||||
# (as opposed to clones, which share the factory).
|
||||
if 'header_factory' not in kw:
|
||||
object.__setattr__(self, 'header_factory', HeaderRegistry())
|
||||
super().__init__(**kw)
|
||||
|
||||
def header_max_count(self, name):
|
||||
"""+
|
||||
The implementation for this class returns the max_count attribute from
|
||||
the specialized header class that would be used to construct a header
|
||||
of type 'name'.
|
||||
"""
|
||||
return self.header_factory[name].max_count
|
||||
|
||||
# The logic of the next three methods is chosen such that it is possible to
|
||||
# switch a Message object between a Compat32 policy and a policy derived
|
||||
# from this class and have the results stay consistent. This allows a
|
||||
# Message object constructed with this policy to be passed to a library
|
||||
# that only handles Compat32 objects, or to receive such an object and
|
||||
# convert it to use the newer style by just changing its policy. It is
|
||||
# also chosen because it postpones the relatively expensive full rfc5322
|
||||
# parse until as late as possible when parsing from source, since in many
|
||||
# applications only a few headers will actually be inspected.
|
||||
|
||||
def header_source_parse(self, sourcelines):
|
||||
"""+
|
||||
The name is parsed as everything up to the ':' and returned unmodified.
|
||||
The value is determined by stripping leading whitespace off the
|
||||
remainder of the first line, joining all subsequent lines together, and
|
||||
stripping any trailing carriage return or linefeed characters. (This
|
||||
is the same as Compat32).
|
||||
|
||||
"""
|
||||
name, value = sourcelines[0].split(':', 1)
|
||||
value = value.lstrip(' \t') + ''.join(sourcelines[1:])
|
||||
return (name, value.rstrip('\r\n'))
|
||||
|
||||
def header_store_parse(self, name, value):
|
||||
"""+
|
||||
The name is returned unchanged. If the input value has a 'name'
|
||||
attribute and it matches the name ignoring case, the value is returned
|
||||
unchanged. Otherwise the name and value are passed to header_factory
|
||||
method, and the resulting custom header object is returned as the
|
||||
value. In this case a ValueError is raised if the input value contains
|
||||
CR or LF characters.
|
||||
|
||||
"""
|
||||
if hasattr(value, 'name') and value.name.lower() == name.lower():
|
||||
return (name, value)
|
||||
if isinstance(value, str) and len(value.splitlines())>1:
|
||||
# XXX this error message isn't quite right when we use splitlines
|
||||
# (see issue 22233), but I'm not sure what should happen here.
|
||||
raise ValueError("Header values may not contain linefeed "
|
||||
"or carriage return characters")
|
||||
return (name, self.header_factory(name, value))
|
||||
|
||||
def header_fetch_parse(self, name, value):
|
||||
"""+
|
||||
If the value has a 'name' attribute, it is returned to unmodified.
|
||||
Otherwise the name and the value with any linesep characters removed
|
||||
are passed to the header_factory method, and the resulting custom
|
||||
header object is returned. Any surrogateescaped bytes get turned
|
||||
into the unicode unknown-character glyph.
|
||||
|
||||
"""
|
||||
if hasattr(value, 'name'):
|
||||
return value
|
||||
# We can't use splitlines here because it splits on more than \r and \n.
|
||||
value = ''.join(linesep_splitter.split(value))
|
||||
return self.header_factory(name, value)
|
||||
|
||||
def fold(self, name, value):
|
||||
"""+
|
||||
Header folding is controlled by the refold_source policy setting. A
|
||||
value is considered to be a 'source value' if and only if it does not
|
||||
have a 'name' attribute (having a 'name' attribute means it is a header
|
||||
object of some sort). If a source value needs to be refolded according
|
||||
to the policy, it is converted into a custom header object by passing
|
||||
the name and the value with any linesep characters removed to the
|
||||
header_factory method. Folding of a custom header object is done by
|
||||
calling its fold method with the current policy.
|
||||
|
||||
Source values are split into lines using splitlines. If the value is
|
||||
not to be refolded, the lines are rejoined using the linesep from the
|
||||
policy and returned. The exception is lines containing non-ascii
|
||||
binary data. In that case the value is refolded regardless of the
|
||||
refold_source setting, which causes the binary data to be CTE encoded
|
||||
using the unknown-8bit charset.
|
||||
|
||||
"""
|
||||
return self._fold(name, value, refold_binary=True)
|
||||
|
||||
def fold_binary(self, name, value):
|
||||
"""+
|
||||
The same as fold if cte_type is 7bit, except that the returned value is
|
||||
bytes.
|
||||
|
||||
If cte_type is 8bit, non-ASCII binary data is converted back into
|
||||
bytes. Headers with binary data are not refolded, regardless of the
|
||||
refold_header setting, since there is no way to know whether the binary
|
||||
data consists of single byte characters or multibyte characters.
|
||||
|
||||
If utf8 is true, headers are encoded to utf8, otherwise to ascii with
|
||||
non-ASCII unicode rendered as encoded words.
|
||||
|
||||
"""
|
||||
folded = self._fold(name, value, refold_binary=self.cte_type=='7bit')
|
||||
charset = 'utf8' if self.utf8 else 'ascii'
|
||||
return folded.encode(charset, 'surrogateescape')
|
||||
|
||||
def _fold(self, name, value, refold_binary=False):
|
||||
if hasattr(value, 'name'):
|
||||
return value.fold(policy=self)
|
||||
maxlen = self.max_line_length if self.max_line_length else float('inf')
|
||||
lines = value.splitlines()
|
||||
refold = (self.refold_source == 'all' or
|
||||
self.refold_source == 'long' and
|
||||
(lines and len(lines[0])+len(name)+2 > maxlen or
|
||||
any(len(x) > maxlen for x in lines[1:])))
|
||||
if refold or refold_binary and _has_surrogates(value):
|
||||
return self.header_factory(name, ''.join(lines)).fold(policy=self)
|
||||
return name + ': ' + self.linesep.join(lines) + self.linesep
|
||||
|
||||
|
||||
default = EmailPolicy()
|
||||
# Make the default policy use the class default header_factory
|
||||
del default.header_factory
|
||||
strict = default.clone(raise_on_defect=True)
|
||||
SMTP = default.clone(linesep='\r\n')
|
||||
HTTP = default.clone(linesep='\r\n', max_line_length=None)
|
||||
SMTPUTF8 = SMTP.clone(utf8=True)
|
299
third_party/python/Lib/email/quoprimime.py
vendored
Normal file
299
third_party/python/Lib/email/quoprimime.py
vendored
Normal file
|
@ -0,0 +1,299 @@
|
|||
# Copyright (C) 2001-2006 Python Software Foundation
|
||||
# Author: Ben Gertzfield
|
||||
# Contact: email-sig@python.org
|
||||
|
||||
"""Quoted-printable content transfer encoding per RFCs 2045-2047.
|
||||
|
||||
This module handles the content transfer encoding method defined in RFC 2045
|
||||
to encode US ASCII-like 8-bit data called `quoted-printable'. It is used to
|
||||
safely encode text that is in a character set similar to the 7-bit US ASCII
|
||||
character set, but that includes some 8-bit characters that are normally not
|
||||
allowed in email bodies or headers.
|
||||
|
||||
Quoted-printable is very space-inefficient for encoding binary files; use the
|
||||
email.base64mime module for that instead.
|
||||
|
||||
This module provides an interface to encode and decode both headers and bodies
|
||||
with quoted-printable encoding.
|
||||
|
||||
RFC 2045 defines a method for including character set information in an
|
||||
`encoded-word' in a header. This method is commonly used for 8-bit real names
|
||||
in To:/From:/Cc: etc. fields, as well as Subject: lines.
|
||||
|
||||
This module does not do the line wrapping or end-of-line character
|
||||
conversion necessary for proper internationalized headers; it only
|
||||
does dumb encoding and decoding. To deal with the various line
|
||||
wrapping issues, use the email.header module.
|
||||
"""
|
||||
|
||||
__all__ = [
|
||||
'body_decode',
|
||||
'body_encode',
|
||||
'body_length',
|
||||
'decode',
|
||||
'decodestring',
|
||||
'header_decode',
|
||||
'header_encode',
|
||||
'header_length',
|
||||
'quote',
|
||||
'unquote',
|
||||
]
|
||||
|
||||
import re
|
||||
|
||||
from string import ascii_letters, digits, hexdigits
|
||||
|
||||
CRLF = '\r\n'
|
||||
NL = '\n'
|
||||
EMPTYSTRING = ''
|
||||
|
||||
# Build a mapping of octets to the expansion of that octet. Since we're only
|
||||
# going to have 256 of these things, this isn't terribly inefficient
|
||||
# space-wise. Remember that headers and bodies have different sets of safe
|
||||
# characters. Initialize both maps with the full expansion, and then override
|
||||
# the safe bytes with the more compact form.
|
||||
_QUOPRI_MAP = ['=%02X' % c for c in range(256)]
|
||||
_QUOPRI_HEADER_MAP = _QUOPRI_MAP[:]
|
||||
_QUOPRI_BODY_MAP = _QUOPRI_MAP[:]
|
||||
|
||||
# Safe header bytes which need no encoding.
|
||||
for c in b'-!*+/' + ascii_letters.encode('ascii') + digits.encode('ascii'):
|
||||
_QUOPRI_HEADER_MAP[c] = chr(c)
|
||||
# Headers have one other special encoding; spaces become underscores.
|
||||
_QUOPRI_HEADER_MAP[ord(' ')] = '_'
|
||||
|
||||
# Safe body bytes which need no encoding.
|
||||
for c in (b' !"#$%&\'()*+,-./0123456789:;<>'
|
||||
b'?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\\]^_`'
|
||||
b'abcdefghijklmnopqrstuvwxyz{|}~\t'):
|
||||
_QUOPRI_BODY_MAP[c] = chr(c)
|
||||
|
||||
|
||||
|
||||
# Helpers
|
||||
def header_check(octet):
|
||||
"""Return True if the octet should be escaped with header quopri."""
|
||||
return chr(octet) != _QUOPRI_HEADER_MAP[octet]
|
||||
|
||||
|
||||
def body_check(octet):
|
||||
"""Return True if the octet should be escaped with body quopri."""
|
||||
return chr(octet) != _QUOPRI_BODY_MAP[octet]
|
||||
|
||||
|
||||
def header_length(bytearray):
|
||||
"""Return a header quoted-printable encoding length.
|
||||
|
||||
Note that this does not include any RFC 2047 chrome added by
|
||||
`header_encode()`.
|
||||
|
||||
:param bytearray: An array of bytes (a.k.a. octets).
|
||||
:return: The length in bytes of the byte array when it is encoded with
|
||||
quoted-printable for headers.
|
||||
"""
|
||||
return sum(len(_QUOPRI_HEADER_MAP[octet]) for octet in bytearray)
|
||||
|
||||
|
||||
def body_length(bytearray):
|
||||
"""Return a body quoted-printable encoding length.
|
||||
|
||||
:param bytearray: An array of bytes (a.k.a. octets).
|
||||
:return: The length in bytes of the byte array when it is encoded with
|
||||
quoted-printable for bodies.
|
||||
"""
|
||||
return sum(len(_QUOPRI_BODY_MAP[octet]) for octet in bytearray)
|
||||
|
||||
|
||||
def _max_append(L, s, maxlen, extra=''):
|
||||
if not isinstance(s, str):
|
||||
s = chr(s)
|
||||
if not L:
|
||||
L.append(s.lstrip())
|
||||
elif len(L[-1]) + len(s) <= maxlen:
|
||||
L[-1] += extra + s
|
||||
else:
|
||||
L.append(s.lstrip())
|
||||
|
||||
|
||||
def unquote(s):
|
||||
"""Turn a string in the form =AB to the ASCII character with value 0xab"""
|
||||
return chr(int(s[1:3], 16))
|
||||
|
||||
|
||||
def quote(c):
|
||||
return _QUOPRI_MAP[ord(c)]
|
||||
|
||||
|
||||
def header_encode(header_bytes, charset='iso-8859-1'):
|
||||
"""Encode a single header line with quoted-printable (like) encoding.
|
||||
|
||||
Defined in RFC 2045, this `Q' encoding is similar to quoted-printable, but
|
||||
used specifically for email header fields to allow charsets with mostly 7
|
||||
bit characters (and some 8 bit) to remain more or less readable in non-RFC
|
||||
2045 aware mail clients.
|
||||
|
||||
charset names the character set to use in the RFC 2046 header. It
|
||||
defaults to iso-8859-1.
|
||||
"""
|
||||
# Return empty headers as an empty string.
|
||||
if not header_bytes:
|
||||
return ''
|
||||
# Iterate over every byte, encoding if necessary.
|
||||
encoded = header_bytes.decode('latin1').translate(_QUOPRI_HEADER_MAP)
|
||||
# Now add the RFC chrome to each encoded chunk and glue the chunks
|
||||
# together.
|
||||
return '=?%s?q?%s?=' % (charset, encoded)
|
||||
|
||||
|
||||
_QUOPRI_BODY_ENCODE_MAP = _QUOPRI_BODY_MAP[:]
|
||||
for c in b'\r\n':
|
||||
_QUOPRI_BODY_ENCODE_MAP[c] = chr(c)
|
||||
|
||||
def body_encode(body, maxlinelen=76, eol=NL):
|
||||
"""Encode with quoted-printable, wrapping at maxlinelen characters.
|
||||
|
||||
Each line of encoded text will end with eol, which defaults to "\\n". Set
|
||||
this to "\\r\\n" if you will be using the result of this function directly
|
||||
in an email.
|
||||
|
||||
Each line will be wrapped at, at most, maxlinelen characters before the
|
||||
eol string (maxlinelen defaults to 76 characters, the maximum value
|
||||
permitted by RFC 2045). Long lines will have the 'soft line break'
|
||||
quoted-printable character "=" appended to them, so the decoded text will
|
||||
be identical to the original text.
|
||||
|
||||
The minimum maxlinelen is 4 to have room for a quoted character ("=XX")
|
||||
followed by a soft line break. Smaller values will generate a
|
||||
ValueError.
|
||||
|
||||
"""
|
||||
|
||||
if maxlinelen < 4:
|
||||
raise ValueError("maxlinelen must be at least 4")
|
||||
if not body:
|
||||
return body
|
||||
|
||||
# quote special characters
|
||||
body = body.translate(_QUOPRI_BODY_ENCODE_MAP)
|
||||
|
||||
soft_break = '=' + eol
|
||||
# leave space for the '=' at the end of a line
|
||||
maxlinelen1 = maxlinelen - 1
|
||||
|
||||
encoded_body = []
|
||||
append = encoded_body.append
|
||||
|
||||
for line in body.splitlines():
|
||||
# break up the line into pieces no longer than maxlinelen - 1
|
||||
start = 0
|
||||
laststart = len(line) - 1 - maxlinelen
|
||||
while start <= laststart:
|
||||
stop = start + maxlinelen1
|
||||
# make sure we don't break up an escape sequence
|
||||
if line[stop - 2] == '=':
|
||||
append(line[start:stop - 1])
|
||||
start = stop - 2
|
||||
elif line[stop - 1] == '=':
|
||||
append(line[start:stop])
|
||||
start = stop - 1
|
||||
else:
|
||||
append(line[start:stop] + '=')
|
||||
start = stop
|
||||
|
||||
# handle rest of line, special case if line ends in whitespace
|
||||
if line and line[-1] in ' \t':
|
||||
room = start - laststart
|
||||
if room >= 3:
|
||||
# It's a whitespace character at end-of-line, and we have room
|
||||
# for the three-character quoted encoding.
|
||||
q = quote(line[-1])
|
||||
elif room == 2:
|
||||
# There's room for the whitespace character and a soft break.
|
||||
q = line[-1] + soft_break
|
||||
else:
|
||||
# There's room only for a soft break. The quoted whitespace
|
||||
# will be the only content on the subsequent line.
|
||||
q = soft_break + quote(line[-1])
|
||||
append(line[start:-1] + q)
|
||||
else:
|
||||
append(line[start:])
|
||||
|
||||
# add back final newline if present
|
||||
if body[-1] in CRLF:
|
||||
append('')
|
||||
|
||||
return eol.join(encoded_body)
|
||||
|
||||
|
||||
|
||||
# BAW: I'm not sure if the intent was for the signature of this function to be
|
||||
# the same as base64MIME.decode() or not...
|
||||
def decode(encoded, eol=NL):
|
||||
"""Decode a quoted-printable string.
|
||||
|
||||
Lines are separated with eol, which defaults to \\n.
|
||||
"""
|
||||
if not encoded:
|
||||
return encoded
|
||||
# BAW: see comment in encode() above. Again, we're building up the
|
||||
# decoded string with string concatenation, which could be done much more
|
||||
# efficiently.
|
||||
decoded = ''
|
||||
|
||||
for line in encoded.splitlines():
|
||||
line = line.rstrip()
|
||||
if not line:
|
||||
decoded += eol
|
||||
continue
|
||||
|
||||
i = 0
|
||||
n = len(line)
|
||||
while i < n:
|
||||
c = line[i]
|
||||
if c != '=':
|
||||
decoded += c
|
||||
i += 1
|
||||
# Otherwise, c == "=". Are we at the end of the line? If so, add
|
||||
# a soft line break.
|
||||
elif i+1 == n:
|
||||
i += 1
|
||||
continue
|
||||
# Decode if in form =AB
|
||||
elif i+2 < n and line[i+1] in hexdigits and line[i+2] in hexdigits:
|
||||
decoded += unquote(line[i:i+3])
|
||||
i += 3
|
||||
# Otherwise, not in form =AB, pass literally
|
||||
else:
|
||||
decoded += c
|
||||
i += 1
|
||||
|
||||
if i == n:
|
||||
decoded += eol
|
||||
# Special case if original string did not end with eol
|
||||
if encoded[-1] not in '\r\n' and decoded.endswith(eol):
|
||||
decoded = decoded[:-1]
|
||||
return decoded
|
||||
|
||||
|
||||
# For convenience and backwards compatibility w/ standard base64 module
|
||||
body_decode = decode
|
||||
decodestring = decode
|
||||
|
||||
|
||||
|
||||
def _unquote_match(match):
|
||||
"""Turn a match in the form =AB to the ASCII character with value 0xab"""
|
||||
s = match.group(0)
|
||||
return unquote(s)
|
||||
|
||||
|
||||
# Header decoding is done a bit differently
|
||||
def header_decode(s):
|
||||
"""Decode a string encoded with RFC 2045 MIME header `Q' encoding.
|
||||
|
||||
This function does not parse a full MIME header value encoded with
|
||||
quoted-printable (like =?iso-8859-1?q?Hello_World?=) -- please use
|
||||
the high level email.header class for that functionality.
|
||||
"""
|
||||
s = s.replace('_', ' ')
|
||||
return re.sub(r'=[a-fA-F0-9]{2}', _unquote_match, s, flags=re.ASCII)
|
388
third_party/python/Lib/email/utils.py
vendored
Normal file
388
third_party/python/Lib/email/utils.py
vendored
Normal file
|
@ -0,0 +1,388 @@
|
|||
# Copyright (C) 2001-2010 Python Software Foundation
|
||||
# Author: Barry Warsaw
|
||||
# Contact: email-sig@python.org
|
||||
|
||||
"""Miscellaneous utilities."""
|
||||
|
||||
__all__ = [
|
||||
'collapse_rfc2231_value',
|
||||
'decode_params',
|
||||
'decode_rfc2231',
|
||||
'encode_rfc2231',
|
||||
'formataddr',
|
||||
'formatdate',
|
||||
'format_datetime',
|
||||
'getaddresses',
|
||||
'make_msgid',
|
||||
'mktime_tz',
|
||||
'parseaddr',
|
||||
'parsedate',
|
||||
'parsedate_tz',
|
||||
'parsedate_to_datetime',
|
||||
'unquote',
|
||||
]
|
||||
|
||||
import os
|
||||
import re
|
||||
import time
|
||||
import random
|
||||
import socket
|
||||
import datetime
|
||||
import urllib.parse
|
||||
|
||||
from email._parseaddr import quote
|
||||
from email._parseaddr import AddressList as _AddressList
|
||||
from email._parseaddr import mktime_tz
|
||||
|
||||
from email._parseaddr import parsedate, parsedate_tz, _parsedate_tz
|
||||
|
||||
# Intrapackage imports
|
||||
from email.charset import Charset
|
||||
|
||||
COMMASPACE = ', '
|
||||
EMPTYSTRING = ''
|
||||
UEMPTYSTRING = ''
|
||||
CRLF = '\r\n'
|
||||
TICK = "'"
|
||||
|
||||
specialsre = re.compile(r'[][\\()<>@,:;".]')
|
||||
escapesre = re.compile(r'[\\"]')
|
||||
|
||||
def _has_surrogates(s):
|
||||
"""Return True if s contains surrogate-escaped binary data."""
|
||||
# This check is based on the fact that unless there are surrogates, utf8
|
||||
# (Python's default encoding) can encode any string. This is the fastest
|
||||
# way to check for surrogates, see issue 11454 for timings.
|
||||
try:
|
||||
s.encode()
|
||||
return False
|
||||
except UnicodeEncodeError:
|
||||
return True
|
||||
|
||||
# How to deal with a string containing bytes before handing it to the
|
||||
# application through the 'normal' interface.
|
||||
def _sanitize(string):
|
||||
# Turn any escaped bytes into unicode 'unknown' char. If the escaped
|
||||
# bytes happen to be utf-8 they will instead get decoded, even if they
|
||||
# were invalid in the charset the source was supposed to be in. This
|
||||
# seems like it is not a bad thing; a defect was still registered.
|
||||
original_bytes = string.encode('utf-8', 'surrogateescape')
|
||||
return original_bytes.decode('utf-8', 'replace')
|
||||
|
||||
|
||||
|
||||
# Helpers
|
||||
|
||||
def formataddr(pair, charset='utf-8'):
|
||||
"""The inverse of parseaddr(), this takes a 2-tuple of the form
|
||||
(realname, email_address) and returns the string value suitable
|
||||
for an RFC 2822 From, To or Cc header.
|
||||
|
||||
If the first element of pair is false, then the second element is
|
||||
returned unmodified.
|
||||
|
||||
Optional charset if given is the character set that is used to encode
|
||||
realname in case realname is not ASCII safe. Can be an instance of str or
|
||||
a Charset-like object which has a header_encode method. Default is
|
||||
'utf-8'.
|
||||
"""
|
||||
name, address = pair
|
||||
# The address MUST (per RFC) be ascii, so raise a UnicodeError if it isn't.
|
||||
address.encode('ascii')
|
||||
if name:
|
||||
try:
|
||||
name.encode('ascii')
|
||||
except UnicodeEncodeError:
|
||||
if isinstance(charset, str):
|
||||
charset = Charset(charset)
|
||||
encoded_name = charset.header_encode(name)
|
||||
return "%s <%s>" % (encoded_name, address)
|
||||
else:
|
||||
quotes = ''
|
||||
if specialsre.search(name):
|
||||
quotes = '"'
|
||||
name = escapesre.sub(r'\\\g<0>', name)
|
||||
return '%s%s%s <%s>' % (quotes, name, quotes, address)
|
||||
return address
|
||||
|
||||
|
||||
|
||||
def getaddresses(fieldvalues):
|
||||
"""Return a list of (REALNAME, EMAIL) for each fieldvalue."""
|
||||
all = COMMASPACE.join(fieldvalues)
|
||||
a = _AddressList(all)
|
||||
return a.addresslist
|
||||
|
||||
|
||||
|
||||
ecre = re.compile(r'''
|
||||
=\? # literal =?
|
||||
(?P<charset>[^?]*?) # non-greedy up to the next ? is the charset
|
||||
\? # literal ?
|
||||
(?P<encoding>[qb]) # either a "q" or a "b", case insensitive
|
||||
\? # literal ?
|
||||
(?P<atom>.*?) # non-greedy up to the next ?= is the atom
|
||||
\?= # literal ?=
|
||||
''', re.VERBOSE | re.IGNORECASE)
|
||||
|
||||
|
||||
def _format_timetuple_and_zone(timetuple, zone):
|
||||
return '%s, %02d %s %04d %02d:%02d:%02d %s' % (
|
||||
['Mon', 'Tue', 'Wed', 'Thu', 'Fri', 'Sat', 'Sun'][timetuple[6]],
|
||||
timetuple[2],
|
||||
['Jan', 'Feb', 'Mar', 'Apr', 'May', 'Jun',
|
||||
'Jul', 'Aug', 'Sep', 'Oct', 'Nov', 'Dec'][timetuple[1] - 1],
|
||||
timetuple[0], timetuple[3], timetuple[4], timetuple[5],
|
||||
zone)
|
||||
|
||||
def formatdate(timeval=None, localtime=False, usegmt=False):
|
||||
"""Returns a date string as specified by RFC 2822, e.g.:
|
||||
|
||||
Fri, 09 Nov 2001 01:08:47 -0000
|
||||
|
||||
Optional timeval if given is a floating point time value as accepted by
|
||||
gmtime() and localtime(), otherwise the current time is used.
|
||||
|
||||
Optional localtime is a flag that when True, interprets timeval, and
|
||||
returns a date relative to the local timezone instead of UTC, properly
|
||||
taking daylight savings time into account.
|
||||
|
||||
Optional argument usegmt means that the timezone is written out as
|
||||
an ascii string, not numeric one (so "GMT" instead of "+0000"). This
|
||||
is needed for HTTP, and is only used when localtime==False.
|
||||
"""
|
||||
# Note: we cannot use strftime() because that honors the locale and RFC
|
||||
# 2822 requires that day and month names be the English abbreviations.
|
||||
if timeval is None:
|
||||
timeval = time.time()
|
||||
if localtime or usegmt:
|
||||
dt = datetime.datetime.fromtimestamp(timeval, datetime.timezone.utc)
|
||||
else:
|
||||
dt = datetime.datetime.utcfromtimestamp(timeval)
|
||||
if localtime:
|
||||
dt = dt.astimezone()
|
||||
usegmt = False
|
||||
return format_datetime(dt, usegmt)
|
||||
|
||||
def format_datetime(dt, usegmt=False):
|
||||
"""Turn a datetime into a date string as specified in RFC 2822.
|
||||
|
||||
If usegmt is True, dt must be an aware datetime with an offset of zero. In
|
||||
this case 'GMT' will be rendered instead of the normal +0000 required by
|
||||
RFC2822. This is to support HTTP headers involving date stamps.
|
||||
"""
|
||||
now = dt.timetuple()
|
||||
if usegmt:
|
||||
if dt.tzinfo is None or dt.tzinfo != datetime.timezone.utc:
|
||||
raise ValueError("usegmt option requires a UTC datetime")
|
||||
zone = 'GMT'
|
||||
elif dt.tzinfo is None:
|
||||
zone = '-0000'
|
||||
else:
|
||||
zone = dt.strftime("%z")
|
||||
return _format_timetuple_and_zone(now, zone)
|
||||
|
||||
|
||||
def make_msgid(idstring=None, domain=None):
|
||||
"""Returns a string suitable for RFC 2822 compliant Message-ID, e.g:
|
||||
|
||||
<142480216486.20800.16526388040877946887@nightshade.la.mastaler.com>
|
||||
|
||||
Optional idstring if given is a string used to strengthen the
|
||||
uniqueness of the message id. Optional domain if given provides the
|
||||
portion of the message id after the '@'. It defaults to the locally
|
||||
defined hostname.
|
||||
"""
|
||||
timeval = int(time.time()*100)
|
||||
pid = os.getpid()
|
||||
randint = random.getrandbits(64)
|
||||
if idstring is None:
|
||||
idstring = ''
|
||||
else:
|
||||
idstring = '.' + idstring
|
||||
if domain is None:
|
||||
domain = socket.getfqdn()
|
||||
msgid = '<%d.%d.%d%s@%s>' % (timeval, pid, randint, idstring, domain)
|
||||
return msgid
|
||||
|
||||
|
||||
def parsedate_to_datetime(data):
|
||||
*dtuple, tz = _parsedate_tz(data)
|
||||
if tz is None:
|
||||
return datetime.datetime(*dtuple[:6])
|
||||
return datetime.datetime(*dtuple[:6],
|
||||
tzinfo=datetime.timezone(datetime.timedelta(seconds=tz)))
|
||||
|
||||
|
||||
def parseaddr(addr):
|
||||
"""
|
||||
Parse addr into its constituent realname and email address parts.
|
||||
|
||||
Return a tuple of realname and email address, unless the parse fails, in
|
||||
which case return a 2-tuple of ('', '').
|
||||
"""
|
||||
addrs = _AddressList(addr).addresslist
|
||||
if not addrs:
|
||||
return '', ''
|
||||
return addrs[0]
|
||||
|
||||
|
||||
# rfc822.unquote() doesn't properly de-backslash-ify in Python pre-2.3.
|
||||
def unquote(str):
|
||||
"""Remove quotes from a string."""
|
||||
if len(str) > 1:
|
||||
if str.startswith('"') and str.endswith('"'):
|
||||
return str[1:-1].replace('\\\\', '\\').replace('\\"', '"')
|
||||
if str.startswith('<') and str.endswith('>'):
|
||||
return str[1:-1]
|
||||
return str
|
||||
|
||||
|
||||
|
||||
# RFC2231-related functions - parameter encoding and decoding
|
||||
def decode_rfc2231(s):
|
||||
"""Decode string according to RFC 2231"""
|
||||
parts = s.split(TICK, 2)
|
||||
if len(parts) <= 2:
|
||||
return None, None, s
|
||||
return parts
|
||||
|
||||
|
||||
def encode_rfc2231(s, charset=None, language=None):
|
||||
"""Encode string according to RFC 2231.
|
||||
|
||||
If neither charset nor language is given, then s is returned as-is. If
|
||||
charset is given but not language, the string is encoded using the empty
|
||||
string for language.
|
||||
"""
|
||||
s = urllib.parse.quote(s, safe='', encoding=charset or 'ascii')
|
||||
if charset is None and language is None:
|
||||
return s
|
||||
if language is None:
|
||||
language = ''
|
||||
return "%s'%s'%s" % (charset, language, s)
|
||||
|
||||
|
||||
rfc2231_continuation = re.compile(r'^(?P<name>\w+)\*((?P<num>[0-9]+)\*?)?$',
|
||||
re.ASCII)
|
||||
|
||||
def decode_params(params):
|
||||
"""Decode parameters list according to RFC 2231.
|
||||
|
||||
params is a sequence of 2-tuples containing (param name, string value).
|
||||
"""
|
||||
# Copy params so we don't mess with the original
|
||||
params = params[:]
|
||||
new_params = []
|
||||
# Map parameter's name to a list of continuations. The values are a
|
||||
# 3-tuple of the continuation number, the string value, and a flag
|
||||
# specifying whether a particular segment is %-encoded.
|
||||
rfc2231_params = {}
|
||||
name, value = params.pop(0)
|
||||
new_params.append((name, value))
|
||||
while params:
|
||||
name, value = params.pop(0)
|
||||
if name.endswith('*'):
|
||||
encoded = True
|
||||
else:
|
||||
encoded = False
|
||||
value = unquote(value)
|
||||
mo = rfc2231_continuation.match(name)
|
||||
if mo:
|
||||
name, num = mo.group('name', 'num')
|
||||
if num is not None:
|
||||
num = int(num)
|
||||
rfc2231_params.setdefault(name, []).append((num, value, encoded))
|
||||
else:
|
||||
new_params.append((name, '"%s"' % quote(value)))
|
||||
if rfc2231_params:
|
||||
for name, continuations in rfc2231_params.items():
|
||||
value = []
|
||||
extended = False
|
||||
# Sort by number
|
||||
continuations.sort()
|
||||
# And now append all values in numerical order, converting
|
||||
# %-encodings for the encoded segments. If any of the
|
||||
# continuation names ends in a *, then the entire string, after
|
||||
# decoding segments and concatenating, must have the charset and
|
||||
# language specifiers at the beginning of the string.
|
||||
for num, s, encoded in continuations:
|
||||
if encoded:
|
||||
# Decode as "latin-1", so the characters in s directly
|
||||
# represent the percent-encoded octet values.
|
||||
# collapse_rfc2231_value treats this as an octet sequence.
|
||||
s = urllib.parse.unquote(s, encoding="latin-1")
|
||||
extended = True
|
||||
value.append(s)
|
||||
value = quote(EMPTYSTRING.join(value))
|
||||
if extended:
|
||||
charset, language, value = decode_rfc2231(value)
|
||||
new_params.append((name, (charset, language, '"%s"' % value)))
|
||||
else:
|
||||
new_params.append((name, '"%s"' % value))
|
||||
return new_params
|
||||
|
||||
def collapse_rfc2231_value(value, errors='replace',
|
||||
fallback_charset='us-ascii'):
|
||||
if not isinstance(value, tuple) or len(value) != 3:
|
||||
return unquote(value)
|
||||
# While value comes to us as a unicode string, we need it to be a bytes
|
||||
# object. We do not want bytes() normal utf-8 decoder, we want a straight
|
||||
# interpretation of the string as character bytes.
|
||||
charset, language, text = value
|
||||
if charset is None:
|
||||
# Issue 17369: if charset/lang is None, decode_rfc2231 couldn't parse
|
||||
# the value, so use the fallback_charset.
|
||||
charset = fallback_charset
|
||||
rawbytes = bytes(text, 'raw-unicode-escape')
|
||||
try:
|
||||
return str(rawbytes, charset, errors)
|
||||
except LookupError:
|
||||
# charset is not a known codec.
|
||||
return unquote(text)
|
||||
|
||||
|
||||
#
|
||||
# datetime doesn't provide a localtime function yet, so provide one. Code
|
||||
# adapted from the patch in issue 9527. This may not be perfect, but it is
|
||||
# better than not having it.
|
||||
#
|
||||
|
||||
def localtime(dt=None, isdst=-1):
|
||||
"""Return local time as an aware datetime object.
|
||||
|
||||
If called without arguments, return current time. Otherwise *dt*
|
||||
argument should be a datetime instance, and it is converted to the
|
||||
local time zone according to the system time zone database. If *dt* is
|
||||
naive (that is, dt.tzinfo is None), it is assumed to be in local time.
|
||||
In this case, a positive or zero value for *isdst* causes localtime to
|
||||
presume initially that summer time (for example, Daylight Saving Time)
|
||||
is or is not (respectively) in effect for the specified time. A
|
||||
negative value for *isdst* causes the localtime() function to attempt
|
||||
to divine whether summer time is in effect for the specified time.
|
||||
|
||||
"""
|
||||
if dt is None:
|
||||
return datetime.datetime.now(datetime.timezone.utc).astimezone()
|
||||
if dt.tzinfo is not None:
|
||||
return dt.astimezone()
|
||||
# We have a naive datetime. Convert to a (localtime) timetuple and pass to
|
||||
# system mktime together with the isdst hint. System mktime will return
|
||||
# seconds since epoch.
|
||||
tm = dt.timetuple()[:-1] + (isdst,)
|
||||
seconds = time.mktime(tm)
|
||||
localtm = time.localtime(seconds)
|
||||
try:
|
||||
delta = datetime.timedelta(seconds=localtm.tm_gmtoff)
|
||||
tz = datetime.timezone(delta, localtm.tm_zone)
|
||||
except AttributeError:
|
||||
# Compute UTC offset and compare with the value implied by tm_isdst.
|
||||
# If the values match, use the zone name implied by tm_isdst.
|
||||
delta = dt - datetime.datetime(*time.gmtime(seconds)[:6])
|
||||
dst = time.daylight and localtm.tm_isdst > 0
|
||||
gmtoff = -(time.altzone if dst else time.timezone)
|
||||
if delta == datetime.timedelta(seconds=gmtoff):
|
||||
tz = datetime.timezone(delta, time.tzname[dst])
|
||||
else:
|
||||
tz = datetime.timezone(delta)
|
||||
return dt.replace(tzinfo=tz)
|
Loading…
Add table
Add a link
Reference in a new issue