mirror of
				https://github.com/jart/cosmopolitan.git
				synced 2025-10-24 18:20:59 +00:00 
			
		
		
		
	
		
			
				
	
	
		
			246 lines
		
	
	
	
		
			7.9 KiB
		
	
	
	
		
			Python
		
	
	
		
			Executable file
		
	
	
	
	
			
		
		
	
	
			246 lines
		
	
	
	
		
			7.9 KiB
		
	
	
	
		
			Python
		
	
	
		
			Executable file
		
	
	
	
	
| #!/usr/bin/env python3
 | |
| """Classes to parse mailer-daemon messages."""
 | |
| 
 | |
| import calendar
 | |
| import email.message
 | |
| import re
 | |
| import os
 | |
| import sys
 | |
| 
 | |
| 
 | |
| class Unparseable(Exception):
 | |
|     pass
 | |
| 
 | |
| 
 | |
| class ErrorMessage(email.message.Message):
 | |
|     def __init__(self):
 | |
|         email.message.Message.__init__(self)
 | |
|         self.sub = ''
 | |
| 
 | |
|     def is_warning(self):
 | |
|         sub = self.get('Subject')
 | |
|         if not sub:
 | |
|             return 0
 | |
|         sub = sub.lower()
 | |
|         if sub.startswith('waiting mail'):
 | |
|             return 1
 | |
|         if 'warning' in sub:
 | |
|             return 1
 | |
|         self.sub = sub
 | |
|         return 0
 | |
| 
 | |
|     def get_errors(self):
 | |
|         for p in EMPARSERS:
 | |
|             self.rewindbody()
 | |
|             try:
 | |
|                 return p(self.fp, self.sub)
 | |
|             except Unparseable:
 | |
|                 pass
 | |
|         raise Unparseable
 | |
| 
 | |
| # List of re's or tuples of re's.
 | |
| # If a re, it should contain at least a group (?P<email>...) which
 | |
| # should refer to the email address.  The re can also contain a group
 | |
| # (?P<reason>...) which should refer to the reason (error message).
 | |
| # If no reason is present, the emparse_list_reason list is used to
 | |
| # find a reason.
 | |
| # If a tuple, the tuple should contain 2 re's.  The first re finds a
 | |
| # location, the second re is repeated one or more times to find
 | |
| # multiple email addresses.  The second re is matched (not searched)
 | |
| # where the previous match ended.
 | |
| # The re's are compiled using the re module.
 | |
| emparse_list_list = [
 | |
|     'error: (?P<reason>unresolvable): (?P<email>.+)',
 | |
|     ('----- The following addresses had permanent fatal errors -----\n',
 | |
|      '(?P<email>[^ \n].*)\n( .*\n)?'),
 | |
|     'remote execution.*\n.*rmail (?P<email>.+)',
 | |
|     ('The following recipients did not receive your message:\n\n',
 | |
|      ' +(?P<email>.*)\n(The following recipients did not receive your message:\n\n)?'),
 | |
|     '------- Failure Reasons  --------\n\n(?P<reason>.*)\n(?P<email>.*)',
 | |
|     '^<(?P<email>.*)>:\n(?P<reason>.*)',
 | |
|     '^(?P<reason>User mailbox exceeds allowed size): (?P<email>.+)',
 | |
|     '^5\\d{2} <(?P<email>[^\n>]+)>\\.\\.\\. (?P<reason>.+)',
 | |
|     '^Original-Recipient: rfc822;(?P<email>.*)',
 | |
|     '^did not reach the following recipient\\(s\\):\n\n(?P<email>.*) on .*\n +(?P<reason>.*)',
 | |
|     '^ <(?P<email>[^\n>]+)> \\.\\.\\. (?P<reason>.*)',
 | |
|     '^Report on your message to: (?P<email>.*)\nReason: (?P<reason>.*)',
 | |
|     '^Your message was not delivered to +(?P<email>.*)\n +for the following reason:\n +(?P<reason>.*)',
 | |
|     '^ was not +(?P<email>[^ \n].*?) *\n.*\n.*\n.*\n because:.*\n +(?P<reason>[^ \n].*?) *\n',
 | |
|     ]
 | |
| # compile the re's in the list and store them in-place.
 | |
| for i in range(len(emparse_list_list)):
 | |
|     x = emparse_list_list[i]
 | |
|     if type(x) is type(''):
 | |
|         x = re.compile(x, re.MULTILINE)
 | |
|     else:
 | |
|         xl = []
 | |
|         for x in x:
 | |
|             xl.append(re.compile(x, re.MULTILINE))
 | |
|         x = tuple(xl)
 | |
|         del xl
 | |
|     emparse_list_list[i] = x
 | |
|     del x
 | |
| del i
 | |
| 
 | |
| # list of re's used to find reasons (error messages).
 | |
| # if a string, "<>" is replaced by a copy of the email address.
 | |
| # The expressions are searched for in order.  After the first match,
 | |
| # no more expressions are searched for.  So, order is important.
 | |
| emparse_list_reason = [
 | |
|     r'^5\d{2} <>\.\.\. (?P<reason>.*)',
 | |
|     r'<>\.\.\. (?P<reason>.*)',
 | |
|     re.compile(r'^<<< 5\d{2} (?P<reason>.*)', re.MULTILINE),
 | |
|     re.compile('===== stderr was =====\nrmail: (?P<reason>.*)'),
 | |
|     re.compile('^Diagnostic-Code: (?P<reason>.*)', re.MULTILINE),
 | |
|     ]
 | |
| emparse_list_from = re.compile('^From:', re.IGNORECASE|re.MULTILINE)
 | |
| def emparse_list(fp, sub):
 | |
|     data = fp.read()
 | |
|     res = emparse_list_from.search(data)
 | |
|     if res is None:
 | |
|         from_index = len(data)
 | |
|     else:
 | |
|         from_index = res.start(0)
 | |
|     errors = []
 | |
|     emails = []
 | |
|     reason = None
 | |
|     for regexp in emparse_list_list:
 | |
|         if type(regexp) is type(()):
 | |
|             res = regexp[0].search(data, 0, from_index)
 | |
|             if res is not None:
 | |
|                 try:
 | |
|                     reason = res.group('reason')
 | |
|                 except IndexError:
 | |
|                     pass
 | |
|                 while 1:
 | |
|                     res = regexp[1].match(data, res.end(0), from_index)
 | |
|                     if res is None:
 | |
|                         break
 | |
|                     emails.append(res.group('email'))
 | |
|                 break
 | |
|         else:
 | |
|             res = regexp.search(data, 0, from_index)
 | |
|             if res is not None:
 | |
|                 emails.append(res.group('email'))
 | |
|                 try:
 | |
|                     reason = res.group('reason')
 | |
|                 except IndexError:
 | |
|                     pass
 | |
|                 break
 | |
|     if not emails:
 | |
|         raise Unparseable
 | |
|     if not reason:
 | |
|         reason = sub
 | |
|         if reason[:15] == 'returned mail: ':
 | |
|             reason = reason[15:]
 | |
|         for regexp in emparse_list_reason:
 | |
|             if type(regexp) is type(''):
 | |
|                 for i in range(len(emails)-1,-1,-1):
 | |
|                     email = emails[i]
 | |
|                     exp = re.compile(re.escape(email).join(regexp.split('<>')), re.MULTILINE)
 | |
|                     res = exp.search(data)
 | |
|                     if res is not None:
 | |
|                         errors.append(' '.join((email.strip()+': '+res.group('reason')).split()))
 | |
|                         del emails[i]
 | |
|                 continue
 | |
|             res = regexp.search(data)
 | |
|             if res is not None:
 | |
|                 reason = res.group('reason')
 | |
|                 break
 | |
|     for email in emails:
 | |
|         errors.append(' '.join((email.strip()+': '+reason).split()))
 | |
|     return errors
 | |
| 
 | |
| EMPARSERS = [emparse_list]
 | |
| 
 | |
| def sort_numeric(a, b):
 | |
|     a = int(a)
 | |
|     b = int(b)
 | |
|     if a < b:
 | |
|         return -1
 | |
|     elif a > b:
 | |
|         return 1
 | |
|     else:
 | |
|         return 0
 | |
| 
 | |
| def parsedir(dir, modify):
 | |
|     os.chdir(dir)
 | |
|     pat = re.compile('^[0-9]*$')
 | |
|     errordict = {}
 | |
|     errorfirst = {}
 | |
|     errorlast = {}
 | |
|     nok = nwarn = nbad = 0
 | |
| 
 | |
|     # find all numeric file names and sort them
 | |
|     files = list(filter(lambda fn, pat=pat: pat.match(fn) is not None, os.listdir('.')))
 | |
|     files.sort(sort_numeric)
 | |
| 
 | |
|     for fn in files:
 | |
|         # Lets try to parse the file.
 | |
|         fp = open(fn)
 | |
|         m = email.message_from_file(fp, _class=ErrorMessage)
 | |
|         sender = m.getaddr('From')
 | |
|         print('%s\t%-40s\t'%(fn, sender[1]), end=' ')
 | |
| 
 | |
|         if m.is_warning():
 | |
|             fp.close()
 | |
|             print('warning only')
 | |
|             nwarn = nwarn + 1
 | |
|             if modify:
 | |
|                 os.rename(fn, ','+fn)
 | |
| ##              os.unlink(fn)
 | |
|             continue
 | |
| 
 | |
|         try:
 | |
|             errors = m.get_errors()
 | |
|         except Unparseable:
 | |
|             print('** Not parseable')
 | |
|             nbad = nbad + 1
 | |
|             fp.close()
 | |
|             continue
 | |
|         print(len(errors), 'errors')
 | |
| 
 | |
|         # Remember them
 | |
|         for e in errors:
 | |
|             try:
 | |
|                 mm, dd = m.getdate('date')[1:1+2]
 | |
|                 date = '%s %02d' % (calendar.month_abbr[mm], dd)
 | |
|             except:
 | |
|                 date = '??????'
 | |
|             if e not in errordict:
 | |
|                 errordict[e] = 1
 | |
|                 errorfirst[e] = '%s (%s)' % (fn, date)
 | |
|             else:
 | |
|                 errordict[e] = errordict[e] + 1
 | |
|             errorlast[e] = '%s (%s)' % (fn, date)
 | |
| 
 | |
|         fp.close()
 | |
|         nok = nok + 1
 | |
|         if modify:
 | |
|             os.rename(fn, ','+fn)
 | |
| ##          os.unlink(fn)
 | |
| 
 | |
|     print('--------------')
 | |
|     print(nok, 'files parsed,',nwarn,'files warning-only,', end=' ')
 | |
|     print(nbad,'files unparseable')
 | |
|     print('--------------')
 | |
|     list = []
 | |
|     for e in errordict.keys():
 | |
|         list.append((errordict[e], errorfirst[e], errorlast[e], e))
 | |
|     list.sort()
 | |
|     for num, first, last, e in list:
 | |
|         print('%d %s - %s\t%s' % (num, first, last, e))
 | |
| 
 | |
| def main():
 | |
|     modify = 0
 | |
|     if len(sys.argv) > 1 and sys.argv[1] == '-d':
 | |
|         modify = 1
 | |
|         del sys.argv[1]
 | |
|     if len(sys.argv) > 1:
 | |
|         for folder in sys.argv[1:]:
 | |
|             parsedir(folder, modify)
 | |
|     else:
 | |
|         parsedir('/ufs/jack/Mail/errorsinbox', modify)
 | |
| 
 | |
| if __name__ == '__main__' or sys.argv[0] == __name__:
 | |
|     main()
 |