Blame - lib/python2.7/rfc822.py - platform/prebuilts/gdb/darwin-x86

blob: b65d8da0d25babd71cd52720e1e422524e38d921 [file] [log] [blame]

Josh Gao	b85a9f3	2015-09-23 20:40:47 -0700	[diff] [blame]	1	"""RFC 2822 message manipulation.
				2
				3	Note: This is only a very rough sketch of a full RFC-822 parser; in particular
				4	the tokenizing of addresses does not adhere to all the quoting rules.
				5
				6	Note: RFC 2822 is a long awaited update to RFC 822. This module should
				7	conform to RFC 2822, and is thus mis-named (it's not worth renaming it). Some
				8	effort at RFC 2822 updates have been made, but a thorough audit has not been
				9	performed. Consider any RFC 2822 non-conformance to be a bug.
				10
				11	RFC 2822: http://www.faqs.org/rfcs/rfc2822.html
				12	RFC 822 : http://www.faqs.org/rfcs/rfc822.html (obsolete)
				13
				14	Directions for use:
				15
				16	To create a Message object: first open a file, e.g.:
				17
				18	fp = open(file, 'r')
				19
				20	You can use any other legal way of getting an open file object, e.g. use
				21	sys.stdin or call os.popen(). Then pass the open file object to the Message()
				22	constructor:
				23
				24	m = Message(fp)
				25
				26	This class can work with any input object that supports a readline method. If
				27	the input object has seek and tell capability, the rewindbody method will
				28	work; also illegal lines will be pushed back onto the input stream. If the
				29	input object lacks seek but has an `unread' method that can push back a line
				30	of input, Message will use that to push back illegal lines. Thus this class
				31	can be used to parse messages coming from a buffered stream.
				32
				33	The optional `seekable' argument is provided as a workaround for certain stdio
				34	libraries in which tell() discards buffered data before discovering that the
				35	lseek() system call doesn't work. For maximum portability, you should set the
				36	seekable argument to zero to prevent that initial \code{tell} when passing in
				37	an unseekable object such as a file object created from a socket object. If
				38	it is 1 on entry -- which it is by default -- the tell() method of the open
				39	file object is called once; if this raises an exception, seekable is reset to
				40	0. For other nonzero values of seekable, this test is not made.
				41
				42	To get the text of a particular header there are several methods:
				43
				44	str = m.getheader(name)
				45	str = m.getrawheader(name)
				46
				47	where name is the name of the header, e.g. 'Subject'. The difference is that
				48	getheader() strips the leading and trailing whitespace, while getrawheader()
				49	doesn't. Both functions retain embedded whitespace (including newlines)
				50	exactly as they are specified in the header, and leave the case of the text
				51	unchanged.
				52
				53	For addresses and address lists there are functions
				54
				55	realname, mailaddress = m.getaddr(name)
				56	list = m.getaddrlist(name)
				57
				58	where the latter returns a list of (realname, mailaddr) tuples.
				59
				60	There is also a method
				61
				62	time = m.getdate(name)
				63
				64	which parses a Date-like field and returns a time-compatible tuple,
				65	i.e. a tuple such as returned by time.localtime() or accepted by
				66	time.mktime().
				67
				68	See the class definition for lower level access methods.
				69
				70	There are also some utility functions here.
				71	"""
				72	# Cleanup and extensions by Eric S. Raymond <esr@thyrsus.com>
				73
				74	import time
				75
				76	from warnings import warnpy3k
				77	warnpy3k("in 3.x, rfc822 has been removed in favor of the email package",
				78	stacklevel=2)
				79
				80	__all__ = ["Message","AddressList","parsedate","parsedate_tz","mktime_tz"]
				81
				82	_blanklines = ('\r\n', '\n') # Optimization for islast()
				83
				84
				85	class Message:
				86	"""Represents a single RFC 2822-compliant message."""
				87
				88	def __init__(self, fp, seekable = 1):
				89	"""Initialize the class instance and read the headers."""
				90	if seekable == 1:
				91	# Exercise tell() to make sure it works
				92	# (and then assume seek() works, too)
				93	try:
				94	fp.tell()
				95	except (AttributeError, IOError):
				96	seekable = 0
				97	self.fp = fp
				98	self.seekable = seekable
				99	self.startofheaders = None
				100	self.startofbody = None
				101	#
				102	if self.seekable:
				103	try:
				104	self.startofheaders = self.fp.tell()
				105	except IOError:
				106	self.seekable = 0
				107	#
				108	self.readheaders()
				109	#
				110	if self.seekable:
				111	try:
				112	self.startofbody = self.fp.tell()
				113	except IOError:
				114	self.seekable = 0
				115
				116	def rewindbody(self):
				117	"""Rewind the file to the start of the body (if seekable)."""
				118	if not self.seekable:
				119	raise IOError, "unseekable file"
				120	self.fp.seek(self.startofbody)
				121
				122	def readheaders(self):
				123	"""Read header lines.
				124
				125	Read header lines up to the entirely blank line that terminates them.
				126	The (normally blank) line that ends the headers is skipped, but not
				127	included in the returned list. If a non-header line ends the headers,
				128	(which is an error), an attempt is made to backspace over it; it is
				129	never included in the returned list.
				130
				131	The variable self.status is set to the empty string if all went well,
				132	otherwise it is an error message. The variable self.headers is a
				133	completely uninterpreted list of lines contained in the header (so
				134	printing them will reproduce the header exactly as it appears in the
				135	file).
				136	"""
				137	self.dict = {}
				138	self.unixfrom = ''
				139	self.headers = lst = []
				140	self.status = ''
				141	headerseen = ""
				142	firstline = 1
				143	startofline = unread = tell = None
				144	if hasattr(self.fp, 'unread'):
				145	unread = self.fp.unread
				146	elif self.seekable:
				147	tell = self.fp.tell
				148	while 1:
				149	if tell:
				150	try:
				151	startofline = tell()
				152	except IOError:
				153	startofline = tell = None
				154	self.seekable = 0
				155	line = self.fp.readline()
				156	if not line:
				157	self.status = 'EOF in headers'
				158	break
				159	# Skip unix From name time lines
				160	if firstline and line.startswith('From '):
				161	self.unixfrom = self.unixfrom + line
				162	continue
				163	firstline = 0
				164	if headerseen and line[0] in ' \t':
				165	# It's a continuation line.
				166	lst.append(line)
				167	x = (self.dict[headerseen] + "\n " + line.strip())
				168	self.dict[headerseen] = x.strip()
				169	continue
				170	elif self.iscomment(line):
				171	# It's a comment. Ignore it.
				172	continue
				173	elif self.islast(line):
				174	# Note! No pushback here! The delimiter line gets eaten.
				175	break
				176	headerseen = self.isheader(line)
				177	if headerseen:
				178	# It's a legal header line, save it.
				179	lst.append(line)
				180	self.dict[headerseen] = line[len(headerseen)+1:].strip()
				181	continue
				182	else:
				183	# It's not a header line; throw it back and stop here.
				184	if not self.dict:
				185	self.status = 'No headers'
				186	else:
				187	self.status = 'Non-header line where header expected'
				188	# Try to undo the read.
				189	if unread:
				190	unread(line)
				191	elif tell:
				192	self.fp.seek(startofline)
				193	else:
				194	self.status = self.status + '; bad seek'
				195	break
				196
				197	def isheader(self, line):
				198	"""Determine whether a given line is a legal header.
				199
				200	This method should return the header name, suitably canonicalized.
				201	You may override this method in order to use Message parsing on tagged
				202	data in RFC 2822-like formats with special header formats.
				203	"""
				204	i = line.find(':')
				205	if i > 0:
				206	return line[:i].lower()
				207	return None
				208
				209	def islast(self, line):
				210	"""Determine whether a line is a legal end of RFC 2822 headers.
				211
				212	You may override this method if your application wants to bend the
				213	rules, e.g. to strip trailing whitespace, or to recognize MH template
				214	separators ('--------'). For convenience (e.g. for code reading from
				215	sockets) a line consisting of \\r\\n also matches.
				216	"""
				217	return line in _blanklines
				218
				219	def iscomment(self, line):
				220	"""Determine whether a line should be skipped entirely.
				221
				222	You may override this method in order to use Message parsing on tagged
				223	data in RFC 2822-like formats that support embedded comments or
				224	free-text data.
				225	"""
				226	return False
				227
				228	def getallmatchingheaders(self, name):
				229	"""Find all header lines matching a given header name.
				230
				231	Look through the list of headers and find all lines matching a given
				232	header name (and their continuation lines). A list of the lines is
				233	returned, without interpretation. If the header does not occur, an
				234	empty list is returned. If the header occurs multiple times, all
				235	occurrences are returned. Case is not important in the header name.
				236	"""
				237	name = name.lower() + ':'
				238	n = len(name)
				239	lst = []
				240	hit = 0
				241	for line in self.headers:
				242	if line[:n].lower() == name:
				243	hit = 1
				244	elif not line[:1].isspace():
				245	hit = 0
				246	if hit:
				247	lst.append(line)
				248	return lst
				249
				250	def getfirstmatchingheader(self, name):
				251	"""Get the first header line matching name.
				252
				253	This is similar to getallmatchingheaders, but it returns only the
				254	first matching header (and its continuation lines).
				255	"""
				256	name = name.lower() + ':'
				257	n = len(name)
				258	lst = []
				259	hit = 0
				260	for line in self.headers:
				261	if hit:
				262	if not line[:1].isspace():
				263	break
				264	elif line[:n].lower() == name:
				265	hit = 1
				266	if hit:
				267	lst.append(line)
				268	return lst
				269
				270	def getrawheader(self, name):
				271	"""A higher-level interface to getfirstmatchingheader().
				272
				273	Return a string containing the literal text of the header but with the
				274	keyword stripped. All leading, trailing and embedded whitespace is
				275	kept in the string, however. Return None if the header does not
				276	occur.
				277	"""
				278
				279	lst = self.getfirstmatchingheader(name)
				280	if not lst:
				281	return None
				282	lst[0] = lst[0][len(name) + 1:]
				283	return ''.join(lst)
				284
				285	def getheader(self, name, default=None):
				286	"""Get the header value for a name.
				287
				288	This is the normal interface: it returns a stripped version of the
				289	header value for a given header name, or None if it doesn't exist.
				290	This uses the dictionary version which finds the last such header.
				291	"""
				292	return self.dict.get(name.lower(), default)
				293	get = getheader
				294
				295	def getheaders(self, name):
				296	"""Get all values for a header.
				297
				298	This returns a list of values for headers given more than once; each
				299	value in the result list is stripped in the same way as the result of
				300	getheader(). If the header is not given, return an empty list.
				301	"""
				302	result = []
				303	current = ''
				304	have_header = 0
				305	for s in self.getallmatchingheaders(name):
				306	if s[0].isspace():
				307	if current:
				308	current = "%s\n %s" % (current, s.strip())
				309	else:
				310	current = s.strip()
				311	else:
				312	if have_header:
				313	result.append(current)
				314	current = s[s.find(":") + 1:].strip()
				315	have_header = 1
				316	if have_header:
				317	result.append(current)
				318	return result
				319
				320	def getaddr(self, name):
				321	"""Get a single address from a header, as a tuple.
				322
				323	An example return value:
				324	('Guido van Rossum', 'guido@cwi.nl')
				325	"""
				326	# New, by Ben Escoto
				327	alist = self.getaddrlist(name)
				328	if alist:
				329	return alist[0]
				330	else:
				331	return (None, None)
				332
				333	def getaddrlist(self, name):
				334	"""Get a list of addresses from a header.
				335
				336	Retrieves a list of addresses from a header, where each address is a
				337	tuple as returned by getaddr(). Scans all named headers, so it works
				338	properly with multiple To: or Cc: headers for example.
				339	"""
				340	raw = []
				341	for h in self.getallmatchingheaders(name):
				342	if h[0] in ' \t':
				343	raw.append(h)
				344	else:
				345	if raw:
				346	raw.append(', ')
				347	i = h.find(':')
				348	if i > 0:
				349	addr = h[i+1:]
				350	raw.append(addr)
				351	alladdrs = ''.join(raw)
				352	a = AddressList(alladdrs)
				353	return a.addresslist
				354
				355	def getdate(self, name):
				356	"""Retrieve a date field from a header.
				357
				358	Retrieves a date field from the named header, returning a tuple
				359	compatible with time.mktime().
				360	"""
				361	try:
				362	data = self[name]
				363	except KeyError:
				364	return None
				365	return parsedate(data)
				366
				367	def getdate_tz(self, name):
				368	"""Retrieve a date field from a header as a 10-tuple.
				369
				370	The first 9 elements make up a tuple compatible with time.mktime(),
				371	and the 10th is the offset of the poster's time zone from GMT/UTC.
				372	"""
				373	try:
				374	data = self[name]
				375	except KeyError:
				376	return None
				377	return parsedate_tz(data)
				378
				379
				380	# Access as a dictionary (only finds last header of each type):
				381
				382	def __len__(self):
				383	"""Get the number of headers in a message."""
				384	return len(self.dict)
				385
				386	def __getitem__(self, name):
				387	"""Get a specific header, as from a dictionary."""
				388	return self.dict[name.lower()]
				389
				390	def __setitem__(self, name, value):
				391	"""Set the value of a header.
				392
				393	Note: This is not a perfect inversion of __getitem__, because any
				394	changed headers get stuck at the end of the raw-headers list rather
				395	than where the altered header was.
				396	"""
				397	del self[name] # Won't fail if it doesn't exist
				398	self.dict[name.lower()] = value
				399	text = name + ": " + value
				400	for line in text.split("\n"):
				401	self.headers.append(line + "\n")
				402
				403	def __delitem__(self, name):
				404	"""Delete all occurrences of a specific header, if it is present."""
				405	name = name.lower()
				406	if not name in self.dict:
				407	return
				408	del self.dict[name]
				409	name = name + ':'
				410	n = len(name)
				411	lst = []
				412	hit = 0
				413	for i in range(len(self.headers)):
				414	line = self.headers[i]
				415	if line[:n].lower() == name:
				416	hit = 1
				417	elif not line[:1].isspace():
				418	hit = 0
				419	if hit:
				420	lst.append(i)
				421	for i in reversed(lst):
				422	del self.headers[i]
				423
				424	def setdefault(self, name, default=""):
				425	lowername = name.lower()
				426	if lowername in self.dict:
				427	return self.dict[lowername]
				428	else:
				429	text = name + ": " + default
				430	for line in text.split("\n"):
				431	self.headers.append(line + "\n")
				432	self.dict[lowername] = default
				433	return default
				434
				435	def has_key(self, name):
				436	"""Determine whether a message contains the named header."""
				437	return name.lower() in self.dict
				438
				439	def __contains__(self, name):
				440	"""Determine whether a message contains the named header."""
				441	return name.lower() in self.dict
				442
				443	def __iter__(self):
				444	return iter(self.dict)
				445
				446	def keys(self):
				447	"""Get all of a message's header field names."""
				448	return self.dict.keys()
				449
				450	def values(self):
				451	"""Get all of a message's header field values."""
				452	return self.dict.values()
				453
				454	def items(self):
				455	"""Get all of a message's headers.
				456
				457	Returns a list of name, value tuples.
				458	"""
				459	return self.dict.items()
				460
				461	def __str__(self):
				462	return ''.join(self.headers)
				463
				464
				465	# Utility functions
				466	# -----------------
				467
				468	# XXX Should fix unquote() and quote() to be really conformant.
				469	# XXX The inverses of the parse functions may also be useful.
				470
				471
				472	def unquote(s):
				473	"""Remove quotes from a string."""
				474	if len(s) > 1:
				475	if s.startswith('"') and s.endswith('"'):
				476	return s[1:-1].replace('\\\\', '\\').replace('\\"', '"')
				477	if s.startswith('<') and s.endswith('>'):
				478	return s[1:-1]
				479	return s
				480
				481
				482	def quote(s):
				483	"""Add quotes around a string."""
				484	return s.replace('\\', '\\\\').replace('"', '\\"')
				485
				486
				487	def parseaddr(address):
				488	"""Parse an address into a (realname, mailaddr) tuple."""
				489	a = AddressList(address)
				490	lst = a.addresslist
				491	if not lst:
				492	return (None, None)
				493	return lst[0]
				494
				495
				496	class AddrlistClass:
				497	"""Address parser class by Ben Escoto.
				498
				499	To understand what this class does, it helps to have a copy of
				500	RFC 2822 in front of you.
				501
				502	http://www.faqs.org/rfcs/rfc2822.html
				503
				504	Note: this class interface is deprecated and may be removed in the future.
				505	Use rfc822.AddressList instead.
				506	"""
				507
				508	def __init__(self, field):
				509	"""Initialize a new instance.
				510
				511	`field' is an unparsed address header field, containing one or more
				512	addresses.
				513	"""
				514	self.specials = '()<>@,:;.\"[]'
				515	self.pos = 0
				516	self.LWS = ' \t'
				517	self.CR = '\r\n'
				518	self.atomends = self.specials + self.LWS + self.CR
				519	# Note that RFC 2822 now specifies `.' as obs-phrase, meaning that it
				520	# is obsolete syntax. RFC 2822 requires that we recognize obsolete
				521	# syntax, so allow dots in phrases.
				522	self.phraseends = self.atomends.replace('.', '')
				523	self.field = field
				524	self.commentlist = []
				525
				526	def gotonext(self):
				527	"""Parse up to the start of the next address."""
				528	while self.pos < len(self.field):
				529	if self.field[self.pos] in self.LWS + '\n\r':
				530	self.pos = self.pos + 1
				531	elif self.field[self.pos] == '(':
				532	self.commentlist.append(self.getcomment())
				533	else: break
				534
				535	def getaddrlist(self):
				536	"""Parse all addresses.
				537
				538	Returns a list containing all of the addresses.
				539	"""
				540	result = []
				541	ad = self.getaddress()
				542	while ad:
				543	result += ad
				544	ad = self.getaddress()
				545	return result
				546
				547	def getaddress(self):
				548	"""Parse the next address."""
				549	self.commentlist = []
				550	self.gotonext()
				551
				552	oldpos = self.pos
				553	oldcl = self.commentlist
				554	plist = self.getphraselist()
				555
				556	self.gotonext()
				557	returnlist = []
				558
				559	if self.pos >= len(self.field):
				560	# Bad email address technically, no domain.
				561	if plist:
				562	returnlist = [(' '.join(self.commentlist), plist[0])]
				563
				564	elif self.field[self.pos] in '.@':
				565	# email address is just an addrspec
				566	# this isn't very efficient since we start over
				567	self.pos = oldpos
				568	self.commentlist = oldcl
				569	addrspec = self.getaddrspec()
				570	returnlist = [(' '.join(self.commentlist), addrspec)]
				571
				572	elif self.field[self.pos] == ':':
				573	# address is a group
				574	returnlist = []
				575
				576	fieldlen = len(self.field)
				577	self.pos += 1
				578	while self.pos < len(self.field):
				579	self.gotonext()
				580	if self.pos < fieldlen and self.field[self.pos] == ';':
				581	self.pos += 1
				582	break
				583	returnlist = returnlist + self.getaddress()
				584
				585	elif self.field[self.pos] == '<':
				586	# Address is a phrase then a route addr
				587	routeaddr = self.getrouteaddr()
				588
				589	if self.commentlist:
				590	returnlist = [(' '.join(plist) + ' (' + \
				591	' '.join(self.commentlist) + ')', routeaddr)]
				592	else: returnlist = [(' '.join(plist), routeaddr)]
				593
				594	else:
				595	if plist:
				596	returnlist = [(' '.join(self.commentlist), plist[0])]
				597	elif self.field[self.pos] in self.specials:
				598	self.pos += 1
				599
				600	self.gotonext()
				601	if self.pos < len(self.field) and self.field[self.pos] == ',':
				602	self.pos += 1
				603	return returnlist
				604
				605	def getrouteaddr(self):
				606	"""Parse a route address (Return-path value).
				607
				608	This method just skips all the route stuff and returns the addrspec.
				609	"""
				610	if self.field[self.pos] != '<':
				611	return
				612
				613	expectroute = 0
				614	self.pos += 1
				615	self.gotonext()
				616	adlist = ""
				617	while self.pos < len(self.field):
				618	if expectroute:
				619	self.getdomain()
				620	expectroute = 0
				621	elif self.field[self.pos] == '>':
				622	self.pos += 1
				623	break
				624	elif self.field[self.pos] == '@':
				625	self.pos += 1
				626	expectroute = 1
				627	elif self.field[self.pos] == ':':
				628	self.pos += 1
				629	else:
				630	adlist = self.getaddrspec()
				631	self.pos += 1
				632	break
				633	self.gotonext()
				634
				635	return adlist
				636
				637	def getaddrspec(self):
				638	"""Parse an RFC 2822 addr-spec."""
				639	aslist = []
				640
				641	self.gotonext()
				642	while self.pos < len(self.field):
				643	if self.field[self.pos] == '.':
				644	aslist.append('.')
				645	self.pos += 1
				646	elif self.field[self.pos] == '"':
				647	aslist.append('"%s"' % self.getquote())
				648	elif self.field[self.pos] in self.atomends:
				649	break
				650	else: aslist.append(self.getatom())
				651	self.gotonext()
				652
				653	if self.pos >= len(self.field) or self.field[self.pos] != '@':
				654	return ''.join(aslist)
				655
				656	aslist.append('@')
				657	self.pos += 1
				658	self.gotonext()
				659	return ''.join(aslist) + self.getdomain()
				660
				661	def getdomain(self):
				662	"""Get the complete domain name from an address."""
				663	sdlist = []
				664	while self.pos < len(self.field):
				665	if self.field[self.pos] in self.LWS:
				666	self.pos += 1
				667	elif self.field[self.pos] == '(':
				668	self.commentlist.append(self.getcomment())
				669	elif self.field[self.pos] == '[':
				670	sdlist.append(self.getdomainliteral())
				671	elif self.field[self.pos] == '.':
				672	self.pos += 1
				673	sdlist.append('.')
				674	elif self.field[self.pos] in self.atomends:
				675	break
				676	else: sdlist.append(self.getatom())
				677	return ''.join(sdlist)
				678
				679	def getdelimited(self, beginchar, endchars, allowcomments = 1):
				680	"""Parse a header fragment delimited by special characters.
				681
				682	`beginchar' is the start character for the fragment. If self is not
				683	looking at an instance of `beginchar' then getdelimited returns the
				684	empty string.
				685
				686	`endchars' is a sequence of allowable end-delimiting characters.
				687	Parsing stops when one of these is encountered.
				688
				689	If `allowcomments' is non-zero, embedded RFC 2822 comments are allowed
				690	within the parsed fragment.
				691	"""
				692	if self.field[self.pos] != beginchar:
				693	return ''
				694
				695	slist = ['']
				696	quote = 0
				697	self.pos += 1
				698	while self.pos < len(self.field):
				699	if quote == 1:
				700	slist.append(self.field[self.pos])
				701	quote = 0
				702	elif self.field[self.pos] in endchars:
				703	self.pos += 1
				704	break
				705	elif allowcomments and self.field[self.pos] == '(':
				706	slist.append(self.getcomment())
				707	continue # have already advanced pos from getcomment
				708	elif self.field[self.pos] == '\\':
				709	quote = 1
				710	else:
				711	slist.append(self.field[self.pos])
				712	self.pos += 1
				713
				714	return ''.join(slist)
				715
				716	def getquote(self):
				717	"""Get a quote-delimited fragment from self's field."""
				718	return self.getdelimited('"', '"\r', 0)
				719
				720	def getcomment(self):
				721	"""Get a parenthesis-delimited fragment from self's field."""
				722	return self.getdelimited('(', ')\r', 1)
				723
				724	def getdomainliteral(self):
				725	"""Parse an RFC 2822 domain-literal."""
				726	return '[%s]' % self.getdelimited('[', ']\r', 0)
				727
				728	def getatom(self, atomends=None):
				729	"""Parse an RFC 2822 atom.
				730
				731	Optional atomends specifies a different set of end token delimiters
				732	(the default is to use self.atomends). This is used e.g. in
				733	getphraselist() since phrase endings must not include the `.' (which
				734	is legal in phrases)."""
				735	atomlist = ['']
				736	if atomends is None:
				737	atomends = self.atomends
				738
				739	while self.pos < len(self.field):
				740	if self.field[self.pos] in atomends:
				741	break
				742	else: atomlist.append(self.field[self.pos])
				743	self.pos += 1
				744
				745	return ''.join(atomlist)
				746
				747	def getphraselist(self):
				748	"""Parse a sequence of RFC 2822 phrases.
				749
				750	A phrase is a sequence of words, which are in turn either RFC 2822
				751	atoms or quoted-strings. Phrases are canonicalized by squeezing all
				752	runs of continuous whitespace into one space.
				753	"""
				754	plist = []
				755
				756	while self.pos < len(self.field):
				757	if self.field[self.pos] in self.LWS:
				758	self.pos += 1
				759	elif self.field[self.pos] == '"':
				760	plist.append(self.getquote())
				761	elif self.field[self.pos] == '(':
				762	self.commentlist.append(self.getcomment())
				763	elif self.field[self.pos] in self.phraseends:
				764	break
				765	else:
				766	plist.append(self.getatom(self.phraseends))
				767
				768	return plist
				769
				770	class AddressList(AddrlistClass):
				771	"""An AddressList encapsulates a list of parsed RFC 2822 addresses."""
				772	def __init__(self, field):
				773	AddrlistClass.__init__(self, field)
				774	if field:
				775	self.addresslist = self.getaddrlist()
				776	else:
				777	self.addresslist = []
				778
				779	def __len__(self):
				780	return len(self.addresslist)
				781
				782	def __str__(self):
				783	return ", ".join(map(dump_address_pair, self.addresslist))
				784
				785	def __add__(self, other):
				786	# Set union
				787	newaddr = AddressList(None)
				788	newaddr.addresslist = self.addresslist[:]
				789	for x in other.addresslist:
				790	if not x in self.addresslist:
				791	newaddr.addresslist.append(x)
				792	return newaddr
				793
				794	def __iadd__(self, other):
				795	# Set union, in-place
				796	for x in other.addresslist:
				797	if not x in self.addresslist:
				798	self.addresslist.append(x)
				799	return self
				800
				801	def __sub__(self, other):
				802	# Set difference
				803	newaddr = AddressList(None)
				804	for x in self.addresslist:
				805	if not x in other.addresslist:
				806	newaddr.addresslist.append(x)
				807	return newaddr
				808
				809	def __isub__(self, other):
				810	# Set difference, in-place
				811	for x in other.addresslist:
				812	if x in self.addresslist:
				813	self.addresslist.remove(x)
				814	return self
				815
				816	def __getitem__(self, index):
				817	# Make indexing, slices, and 'in' work
				818	return self.addresslist[index]
				819
				820	def dump_address_pair(pair):
				821	"""Dump a (name, address) pair in a canonicalized form."""
				822	if pair[0]:
				823	return '"' + pair[0] + '" <' + pair[1] + '>'
				824	else:
				825	return pair[1]
				826
				827	# Parse a date field
				828
				829	_monthnames = ['jan', 'feb', 'mar', 'apr', 'may', 'jun', 'jul',
				830	'aug', 'sep', 'oct', 'nov', 'dec',
				831	'january', 'february', 'march', 'april', 'may', 'june', 'july',
				832	'august', 'september', 'october', 'november', 'december']
				833	_daynames = ['mon', 'tue', 'wed', 'thu', 'fri', 'sat', 'sun']
				834
				835	# The timezone table does not include the military time zones defined
				836	# in RFC822, other than Z. According to RFC1123, the description in
				837	# RFC822 gets the signs wrong, so we can't rely on any such time
				838	# zones. RFC1123 recommends that numeric timezone indicators be used
				839	# instead of timezone names.
				840
				841	_timezones = {'UT':0, 'UTC':0, 'GMT':0, 'Z':0,
				842	'AST': -400, 'ADT': -300, # Atlantic (used in Canada)
				843	'EST': -500, 'EDT': -400, # Eastern
				844	'CST': -600, 'CDT': -500, # Central
				845	'MST': -700, 'MDT': -600, # Mountain
				846	'PST': -800, 'PDT': -700 # Pacific
				847	}
				848
				849
				850	def parsedate_tz(data):
				851	"""Convert a date string to a time tuple.
				852
				853	Accounts for military timezones.
				854	"""
				855	if not data:
				856	return None
				857	data = data.split()
				858	if data[0][-1] in (',', '.') or data[0].lower() in _daynames:
				859	# There's a dayname here. Skip it
				860	del data[0]
				861	else:
				862	# no space after the "weekday,"?
				863	i = data[0].rfind(',')
				864	if i >= 0:
				865	data[0] = data[0][i+1:]
				866	if len(data) == 3: # RFC 850 date, deprecated
				867	stuff = data[0].split('-')
				868	if len(stuff) == 3:
				869	data = stuff + data[1:]
				870	if len(data) == 4:
				871	s = data[3]
				872	i = s.find('+')
				873	if i > 0:
				874	data[3:] = [s[:i], s[i+1:]]
				875	else:
				876	data.append('') # Dummy tz
				877	if len(data) < 5:
				878	return None
				879	data = data[:5]
				880	[dd, mm, yy, tm, tz] = data
				881	mm = mm.lower()
				882	if not mm in _monthnames:
				883	dd, mm = mm, dd.lower()
				884	if not mm in _monthnames:
				885	return None
				886	mm = _monthnames.index(mm)+1
				887	if mm > 12: mm = mm - 12
				888	if dd[-1] == ',':
				889	dd = dd[:-1]
				890	i = yy.find(':')
				891	if i > 0:
				892	yy, tm = tm, yy
				893	if yy[-1] == ',':
				894	yy = yy[:-1]
				895	if not yy[0].isdigit():
				896	yy, tz = tz, yy
				897	if tm[-1] == ',':
				898	tm = tm[:-1]
				899	tm = tm.split(':')
				900	if len(tm) == 2:
				901	[thh, tmm] = tm
				902	tss = '0'
				903	elif len(tm) == 3:
				904	[thh, tmm, tss] = tm
				905	else:
				906	return None
				907	try:
				908	yy = int(yy)
				909	dd = int(dd)
				910	thh = int(thh)
				911	tmm = int(tmm)
				912	tss = int(tss)
				913	except ValueError:
				914	return None
				915	tzoffset = None
				916	tz = tz.upper()
				917	if tz in _timezones:
				918	tzoffset = _timezones[tz]
				919	else:
				920	try:
				921	tzoffset = int(tz)
				922	except ValueError:
				923	pass
				924	# Convert a timezone offset into seconds ; -0500 -> -18000
				925	if tzoffset:
				926	if tzoffset < 0:
				927	tzsign = -1
				928	tzoffset = -tzoffset
				929	else:
				930	tzsign = 1
				931	tzoffset = tzsign * ( (tzoffset//100)3600 + (tzoffset % 100)60)
				932	return (yy, mm, dd, thh, tmm, tss, 0, 1, 0, tzoffset)
				933
				934
				935	def parsedate(data):
				936	"""Convert a time string to a time tuple."""
				937	t = parsedate_tz(data)
				938	if t is None:
				939	return t
				940	return t[:9]
				941
				942
				943	def mktime_tz(data):
				944	"""Turn a 10-tuple as returned by parsedate_tz() into a UTC timestamp."""
				945	if data[9] is None:
				946	# No zone info, so localtime is better assumption than GMT
				947	return time.mktime(data[:8] + (-1,))
				948	else:
				949	t = time.mktime(data[:8] + (0,))
				950	return t - data[9] - time.timezone
				951
				952	def formatdate(timeval=None):
				953	"""Returns time format preferred for Internet standards.
				954
				955	Sun, 06 Nov 1994 08:49:37 GMT ; RFC 822, updated by RFC 1123
				956
				957	According to RFC 1123, day and month names must always be in
				958	English. If not for that, this code could use strftime(). It
				959	can't because strftime() honors the locale and could generated
				960	non-English names.
				961	"""
				962	if timeval is None:
				963	timeval = time.time()
				964	timeval = time.gmtime(timeval)
				965	return "%s, %02d %s %04d %02d:%02d:%02d GMT" % (
				966	("Mon", "Tue", "Wed", "Thu", "Fri", "Sat", "Sun")[timeval[6]],
				967	timeval[2],
				968	("Jan", "Feb", "Mar", "Apr", "May", "Jun",
				969	"Jul", "Aug", "Sep", "Oct", "Nov", "Dec")[timeval[1]-1],
				970	timeval[0], timeval[3], timeval[4], timeval[5])
				971
				972
				973	# When used as script, run a small test program.
				974	# The first command line argument must be a filename containing one
				975	# message in RFC-822 format.
				976
				977	if __name__ == '__main__':
				978	import sys, os
				979	file = os.path.join(os.environ['HOME'], 'Mail/inbox/1')
				980	if sys.argv[1:]: file = sys.argv[1]
				981	f = open(file, 'r')
				982	m = Message(f)
				983	print 'From:', m.getaddr('from')
				984	print 'To:', m.getaddrlist('to')
				985	print 'Subject:', m.getheader('subject')
				986	print 'Date:', m.getheader('date')
				987	date = m.getdate_tz('date')
				988	tz = date[-1]
				989	date = time.localtime(mktime_tz(date))
				990	if date:
				991	print 'ParsedDate:', time.asctime(date),
				992	hhmmss = tz
				993	hhmm, ss = divmod(hhmmss, 60)
				994	hh, mm = divmod(hhmm, 60)
				995	print "%+03d%02d" % (hh, mm),
				996	if ss: print ".%02d" % ss,
				997	print
				998	else:
				999	print 'ParsedDate:', None
				1000	m.rewindbody()
				1001	n = 0
				1002	while f.readline():
				1003	n += 1
				1004	print 'Lines:', n
				1005	print '-'*70
				1006	print 'len =', len(m)
				1007	if 'Date' in m: print 'Date =', m['Date']
				1008	if 'X-Nonsense' in m: pass
				1009	print 'keys =', m.keys()
				1010	print 'values =', m.values()
				1011	print 'items =', m.items()