Blame - Lib/email/message.py - platform/prebuilts/python/windows-x86

blob: 6752ce0fa138255f7e31320d2840abe5c903278a [file] [log] [blame]

Haibo Huang	d883030	2020-03-03 10:09:46 -0800	[diff] [blame]	1	# Copyright (C) 2001-2007 Python Software Foundation
				2	# Author: Barry Warsaw
				3	# Contact: email-sig@python.org
				4
				5	"""Basic message object for the email package object model."""
				6
				7	__all__ = ['Message', 'EmailMessage']
				8
				9	import re
				10	import uu
				11	import quopri
				12	from io import BytesIO, StringIO
				13
				14	# Intrapackage imports
				15	from email import utils
				16	from email import errors
				17	from email._policybase import Policy, compat32
				18	from email import charset as _charset
				19	from email._encoded_words import decode_b
				20	Charset = _charset.Charset
				21
				22	SEMISPACE = '; '
				23
				24	# Regular expression that matches `special' characters in parameters, the
				25	# existence of which force quoting of the parameter value.
				26	tspecials = re.compile(r'[ <>@,;:\\"/\[\]\?=]')
				27
				28
				29	def _splitparam(param):
				30	# Split header parameters. BAW: this may be too simple. It isn't
				31	# strictly RFC 2045 (section 5.1) compliant, but it catches most headers
				32	# found in the wild. We may eventually need a full fledged parser.
				33	# RDM: we might have a Header here; for now just stringify it.
				34	a, sep, b = str(param).partition(';')
				35	if not sep:
				36	return a.strip(), None
				37	return a.strip(), b.strip()
				38
				39	def _formatparam(param, value=None, quote=True):
				40	"""Convenience function to format and return a key=value pair.
				41
				42	This will quote the value if needed or if quote is true. If value is a
				43	three tuple (charset, language, value), it will be encoded according
				44	to RFC2231 rules. If it contains non-ascii characters it will likewise
				45	be encoded according to RFC2231 rules, using the utf-8 charset and
				46	a null language.
				47	"""
				48	if value is not None and len(value) > 0:
				49	# A tuple is used for RFC 2231 encoded parameter values where items
				50	# are (charset, language, value). charset is a string, not a Charset
				51	# instance. RFC 2231 encoded values are never quoted, per RFC.
				52	if isinstance(value, tuple):
				53	# Encode as per RFC 2231
				54	param += '*'
				55	value = utils.encode_rfc2231(value[2], value[0], value[1])
				56	return '%s=%s' % (param, value)
				57	else:
				58	try:
				59	value.encode('ascii')
				60	except UnicodeEncodeError:
				61	param += '*'
				62	value = utils.encode_rfc2231(value, 'utf-8', '')
				63	return '%s=%s' % (param, value)
				64	# BAW: Please check this. I think that if quote is set it should
				65	# force quoting even if not necessary.
				66	if quote or tspecials.search(value):
				67	return '%s="%s"' % (param, utils.quote(value))
				68	else:
				69	return '%s=%s' % (param, value)
				70	else:
				71	return param
				72
				73	def _parseparam(s):
				74	# RDM This might be a Header, so for now stringify it.
				75	s = ';' + str(s)
				76	plist = []
				77	while s[:1] == ';':
				78	s = s[1:]
				79	end = s.find(';')
				80	while end > 0 and (s.count('"', 0, end) - s.count('\\"', 0, end)) % 2:
				81	end = s.find(';', end + 1)
				82	if end < 0:
				83	end = len(s)
				84	f = s[:end]
				85	if '=' in f:
				86	i = f.index('=')
				87	f = f[:i].strip().lower() + '=' + f[i+1:].strip()
				88	plist.append(f.strip())
				89	s = s[end:]
				90	return plist
				91
				92
				93	def _unquotevalue(value):
				94	# This is different than utils.collapse_rfc2231_value() because it doesn't
				95	# try to convert the value to a unicode. Message.get_param() and
				96	# Message.get_params() are both currently defined to return the tuple in
				97	# the face of RFC 2231 parameters.
				98	if isinstance(value, tuple):
				99	return value[0], value[1], utils.unquote(value[2])
				100	else:
				101	return utils.unquote(value)
				102
				103
				104
				105	class Message:
				106	"""Basic message object.
				107
				108	A message object is defined as something that has a bunch of RFC 2822
				109	headers and a payload. It may optionally have an envelope header
				110	(a.k.a. Unix-From or From_ header). If the message is a container (i.e. a
				111	multipart or a message/rfc822), then the payload is a list of Message
				112	objects, otherwise it is a string.
				113
				114	Message objects implement part of the `mapping' interface, which assumes
				115	there is exactly one occurrence of the header per message. Some headers
				116	do in fact appear multiple times (e.g. Received) and for those headers,
				117	you must use the explicit API to set or get all the headers. Not all of
				118	the mapping methods are implemented.
				119	"""
				120	def __init__(self, policy=compat32):
				121	self.policy = policy
				122	self._headers = []
				123	self._unixfrom = None
				124	self._payload = None
				125	self._charset = None
				126	# Defaults for multipart messages
				127	self.preamble = self.epilogue = None
				128	self.defects = []
				129	# Default content type
				130	self._default_type = 'text/plain'
				131
				132	def __str__(self):
				133	"""Return the entire formatted message as a string.
				134	"""
				135	return self.as_string()
				136
				137	def as_string(self, unixfrom=False, maxheaderlen=0, policy=None):
				138	"""Return the entire formatted message as a string.
				139
				140	Optional 'unixfrom', when true, means include the Unix From_ envelope
				141	header. For backward compatibility reasons, if maxheaderlen is
				142	not specified it defaults to 0, so you must override it explicitly
				143	if you want a different maxheaderlen. 'policy' is passed to the
Haibo Huang	5eba2b4	2021-01-22 11:22:02 -0800	[diff] [blame]	144	Generator instance used to serialize the message; if it is not
Haibo Huang	d883030	2020-03-03 10:09:46 -0800	[diff] [blame]	145	specified the policy associated with the message instance is used.
				146
				147	If the message object contains binary data that is not encoded
				148	according to RFC standards, the non-compliant data will be replaced by
				149	unicode "unknown character" code points.
				150	"""
				151	from email.generator import Generator
				152	policy = self.policy if policy is None else policy
				153	fp = StringIO()
				154	g = Generator(fp,
				155	mangle_from_=False,
				156	maxheaderlen=maxheaderlen,
				157	policy=policy)
				158	g.flatten(self, unixfrom=unixfrom)
				159	return fp.getvalue()
				160
				161	def __bytes__(self):
				162	"""Return the entire formatted message as a bytes object.
				163	"""
				164	return self.as_bytes()
				165
				166	def as_bytes(self, unixfrom=False, policy=None):
				167	"""Return the entire formatted message as a bytes object.
				168
				169	Optional 'unixfrom', when true, means include the Unix From_ envelope
				170	header. 'policy' is passed to the BytesGenerator instance used to
				171	serialize the message; if not specified the policy associated with
				172	the message instance is used.
				173	"""
				174	from email.generator import BytesGenerator
				175	policy = self.policy if policy is None else policy
				176	fp = BytesIO()
				177	g = BytesGenerator(fp, mangle_from_=False, policy=policy)
				178	g.flatten(self, unixfrom=unixfrom)
				179	return fp.getvalue()
				180
				181	def is_multipart(self):
				182	"""Return True if the message consists of multiple parts."""
				183	return isinstance(self._payload, list)
				184
				185	#
				186	# Unix From_ line
				187	#
				188	def set_unixfrom(self, unixfrom):
				189	self._unixfrom = unixfrom
				190
				191	def get_unixfrom(self):
				192	return self._unixfrom
				193
				194	#
				195	# Payload manipulation.
				196	#
				197	def attach(self, payload):
				198	"""Add the given payload to the current payload.
				199
				200	The current payload will always be a list of objects after this method
				201	is called. If you want to set the payload to a scalar object, use
				202	set_payload() instead.
				203	"""
				204	if self._payload is None:
				205	self._payload = [payload]
				206	else:
				207	try:
				208	self._payload.append(payload)
				209	except AttributeError:
				210	raise TypeError("Attach is not valid on a message with a"
				211	" non-multipart payload")
				212
				213	def get_payload(self, i=None, decode=False):
				214	"""Return a reference to the payload.
				215
				216	The payload will either be a list object or a string. If you mutate
				217	the list object, you modify the message's payload in place. Optional
				218	i returns that index into the payload.
				219
				220	Optional decode is a flag indicating whether the payload should be
				221	decoded or not, according to the Content-Transfer-Encoding header
				222	(default is False).
				223
				224	When True and the message is not a multipart, the payload will be
				225	decoded if this header's value is `quoted-printable' or `base64'. If
				226	some other encoding is used, or the header is missing, or if the
				227	payload has bogus data (i.e. bogus base64 or uuencoded data), the
				228	payload is returned as-is.
				229
				230	If the message is a multipart and the decode flag is True, then None
				231	is returned.
				232	"""
				233	# Here is the logic table for this code, based on the email5.0.0 code:
				234	# i decode is_multipart result
				235	# ------ ------ ------------ ------------------------------
				236	# None True True None
				237	# i True True None
				238	# None False True _payload (a list)
				239	# i False True _payload element i (a Message)
				240	# i False False error (not a list)
				241	# i True False error (not a list)
				242	# None False False _payload
				243	# None True False _payload decoded (bytes)
				244	# Note that Barry planned to factor out the 'decode' case, but that
				245	# isn't so easy now that we handle the 8 bit data, which needs to be
				246	# converted in both the decode and non-decode path.
				247	if self.is_multipart():
				248	if decode:
				249	return None
				250	if i is None:
				251	return self._payload
				252	else:
				253	return self._payload[i]
				254	# For backward compatibility, Use isinstance and this error message
				255	# instead of the more logical is_multipart test.
				256	if i is not None and not isinstance(self._payload, list):
				257	raise TypeError('Expected list, got %s' % type(self._payload))
				258	payload = self._payload
				259	# cte might be a Header, so for now stringify it.
				260	cte = str(self.get('content-transfer-encoding', '')).lower()
				261	# payload may be bytes here.
				262	if isinstance(payload, str):
				263	if utils._has_surrogates(payload):
				264	bpayload = payload.encode('ascii', 'surrogateescape')
				265	if not decode:
				266	try:
				267	payload = bpayload.decode(self.get_param('charset', 'ascii'), 'replace')
				268	except LookupError:
				269	payload = bpayload.decode('ascii', 'replace')
				270	elif decode:
				271	try:
				272	bpayload = payload.encode('ascii')
				273	except UnicodeError:
				274	# This won't happen for RFC compliant messages (messages
				275	# containing only ASCII code points in the unicode input).
				276	# If it does happen, turn the string into bytes in a way
				277	# guaranteed not to fail.
				278	bpayload = payload.encode('raw-unicode-escape')
				279	if not decode:
				280	return payload
				281	if cte == 'quoted-printable':
				282	return quopri.decodestring(bpayload)
				283	elif cte == 'base64':
				284	# XXX: this is a bit of a hack; decode_b should probably be factored
				285	# out somewhere, but I haven't figured out where yet.
				286	value, defects = decode_b(b''.join(bpayload.splitlines()))
				287	for defect in defects:
				288	self.policy.handle_defect(self, defect)
				289	return value
				290	elif cte in ('x-uuencode', 'uuencode', 'uue', 'x-uue'):
				291	in_file = BytesIO(bpayload)
				292	out_file = BytesIO()
				293	try:
				294	uu.decode(in_file, out_file, quiet=True)
				295	return out_file.getvalue()
				296	except uu.Error:
				297	# Some decoding problem
				298	return bpayload
				299	if isinstance(payload, str):
				300	return bpayload
				301	return payload
				302
				303	def set_payload(self, payload, charset=None):
				304	"""Set the payload to the given value.
				305
				306	Optional charset sets the message's default character set. See
				307	set_charset() for details.
				308	"""
				309	if hasattr(payload, 'encode'):
				310	if charset is None:
				311	self._payload = payload
				312	return
				313	if not isinstance(charset, Charset):
				314	charset = Charset(charset)
				315	payload = payload.encode(charset.output_charset)
				316	if hasattr(payload, 'decode'):
				317	self._payload = payload.decode('ascii', 'surrogateescape')
				318	else:
				319	self._payload = payload
				320	if charset is not None:
				321	self.set_charset(charset)
				322
				323	def set_charset(self, charset):
				324	"""Set the charset of the payload to a given character set.
				325
				326	charset can be a Charset instance, a string naming a character set, or
				327	None. If it is a string it will be converted to a Charset instance.
				328	If charset is None, the charset parameter will be removed from the
				329	Content-Type field. Anything else will generate a TypeError.
				330
				331	The message will be assumed to be of type text/* encoded with
				332	charset.input_charset. It will be converted to charset.output_charset
				333	and encoded properly, if needed, when generating the plain text
				334	representation of the message. MIME headers (MIME-Version,
				335	Content-Type, Content-Transfer-Encoding) will be added as needed.
				336	"""
				337	if charset is None:
				338	self.del_param('charset')
				339	self._charset = None
				340	return
				341	if not isinstance(charset, Charset):
				342	charset = Charset(charset)
				343	self._charset = charset
				344	if 'MIME-Version' not in self:
				345	self.add_header('MIME-Version', '1.0')
				346	if 'Content-Type' not in self:
				347	self.add_header('Content-Type', 'text/plain',
				348	charset=charset.get_output_charset())
				349	else:
				350	self.set_param('charset', charset.get_output_charset())
				351	if charset != charset.get_output_charset():
				352	self._payload = charset.body_encode(self._payload)
				353	if 'Content-Transfer-Encoding' not in self:
				354	cte = charset.get_body_encoding()
				355	try:
				356	cte(self)
				357	except TypeError:
				358	# This 'if' is for backward compatibility, it allows unicode
				359	# through even though that won't work correctly if the
				360	# message is serialized.
				361	payload = self._payload
				362	if payload:
				363	try:
				364	payload = payload.encode('ascii', 'surrogateescape')
				365	except UnicodeError:
				366	payload = payload.encode(charset.output_charset)
				367	self._payload = charset.body_encode(payload)
				368	self.add_header('Content-Transfer-Encoding', cte)
				369
				370	def get_charset(self):
				371	"""Return the Charset instance associated with the message's payload.
				372	"""
				373	return self._charset
				374
				375	#
				376	# MAPPING INTERFACE (partial)
				377	#
				378	def __len__(self):
				379	"""Return the total number of headers, including duplicates."""
				380	return len(self._headers)
				381
				382	def __getitem__(self, name):
				383	"""Get a header value.
				384
				385	Return None if the header is missing instead of raising an exception.
				386
				387	Note that if the header appeared multiple times, exactly which
				388	occurrence gets returned is undefined. Use get_all() to get all
				389	the values matching a header field name.
				390	"""
				391	return self.get(name)
				392
				393	def __setitem__(self, name, val):
				394	"""Set the value of a header.
				395
				396	Note: this does not overwrite an existing header with the same field
				397	name. Use __delitem__() first to delete any existing headers.
				398	"""
				399	max_count = self.policy.header_max_count(name)
				400	if max_count:
				401	lname = name.lower()
				402	found = 0
				403	for k, v in self._headers:
				404	if k.lower() == lname:
				405	found += 1
				406	if found >= max_count:
				407	raise ValueError("There may be at most {} {} headers "
				408	"in a message".format(max_count, name))
				409	self._headers.append(self.policy.header_store_parse(name, val))
				410
				411	def __delitem__(self, name):
				412	"""Delete all occurrences of a header, if present.
				413
				414	Does not raise an exception if the header is missing.
				415	"""
				416	name = name.lower()
				417	newheaders = []
				418	for k, v in self._headers:
				419	if k.lower() != name:
				420	newheaders.append((k, v))
				421	self._headers = newheaders
				422
				423	def __contains__(self, name):
				424	return name.lower() in [k.lower() for k, v in self._headers]
				425
				426	def __iter__(self):
				427	for field, value in self._headers:
				428	yield field
				429
				430	def keys(self):
				431	"""Return a list of all the message's header field names.
				432
				433	These will be sorted in the order they appeared in the original
				434	message, or were added to the message, and may contain duplicates.
				435	Any fields deleted and re-inserted are always appended to the header
				436	list.
				437	"""
				438	return [k for k, v in self._headers]
				439
				440	def values(self):
				441	"""Return a list of all the message's header values.
				442
				443	These will be sorted in the order they appeared in the original
				444	message, or were added to the message, and may contain duplicates.
				445	Any fields deleted and re-inserted are always appended to the header
				446	list.
				447	"""
				448	return [self.policy.header_fetch_parse(k, v)
				449	for k, v in self._headers]
				450
				451	def items(self):
				452	"""Get all the message's header fields and values.
				453
				454	These will be sorted in the order they appeared in the original
				455	message, or were added to the message, and may contain duplicates.
				456	Any fields deleted and re-inserted are always appended to the header
				457	list.
				458	"""
				459	return [(k, self.policy.header_fetch_parse(k, v))
				460	for k, v in self._headers]
				461
				462	def get(self, name, failobj=None):
				463	"""Get a header value.
				464
				465	Like __getitem__() but return failobj instead of None when the field
				466	is missing.
				467	"""
				468	name = name.lower()
				469	for k, v in self._headers:
				470	if k.lower() == name:
				471	return self.policy.header_fetch_parse(k, v)
				472	return failobj
				473
				474	#
				475	# "Internal" methods (public API, but only intended for use by a parser
				476	# or generator, not normal application code.
				477	#
				478
				479	def set_raw(self, name, value):
				480	"""Store name and value in the model without modification.
				481
				482	This is an "internal" API, intended only for use by a parser.
				483	"""
				484	self._headers.append((name, value))
				485
				486	def raw_items(self):
				487	"""Return the (name, value) header pairs without modification.
				488
				489	This is an "internal" API, intended only for use by a generator.
				490	"""
				491	return iter(self._headers.copy())
				492
				493	#
				494	# Additional useful stuff
				495	#
				496
				497	def get_all(self, name, failobj=None):
				498	"""Return a list of all the values for the named field.
				499
				500	These will be sorted in the order they appeared in the original
				501	message, and may contain duplicates. Any fields deleted and
				502	re-inserted are always appended to the header list.
				503
				504	If no such fields exist, failobj is returned (defaults to None).
				505	"""
				506	values = []
				507	name = name.lower()
				508	for k, v in self._headers:
				509	if k.lower() == name:
				510	values.append(self.policy.header_fetch_parse(k, v))
				511	if not values:
				512	return failobj
				513	return values
				514
				515	def add_header(self, _name, _value, **_params):
				516	"""Extended header setting.
				517
				518	name is the header field to add. keyword arguments can be used to set
				519	additional parameters for the header field, with underscores converted
				520	to dashes. Normally the parameter will be added as key="value" unless
				521	value is None, in which case only the key will be added. If a
				522	parameter value contains non-ASCII characters it can be specified as a
				523	three-tuple of (charset, language, value), in which case it will be
				524	encoded according to RFC2231 rules. Otherwise it will be encoded using
				525	the utf-8 charset and a language of ''.
				526
				527	Examples:
				528
				529	msg.add_header('content-disposition', 'attachment', filename='bud.gif')
				530	msg.add_header('content-disposition', 'attachment',
				531	filename=('utf-8', '', Fußballer.ppt'))
				532	msg.add_header('content-disposition', 'attachment',
				533	filename='Fußballer.ppt'))
				534	"""
				535	parts = []
				536	for k, v in _params.items():
				537	if v is None:
				538	parts.append(k.replace('_', '-'))
				539	else:
				540	parts.append(_formatparam(k.replace('_', '-'), v))
				541	if _value is not None:
				542	parts.insert(0, _value)
				543	self[_name] = SEMISPACE.join(parts)
				544
				545	def replace_header(self, _name, _value):
				546	"""Replace a header.
				547
				548	Replace the first matching header found in the message, retaining
				549	header order and case. If no matching header was found, a KeyError is
				550	raised.
				551	"""
				552	_name = _name.lower()
				553	for i, (k, v) in zip(range(len(self._headers)), self._headers):
				554	if k.lower() == _name:
				555	self._headers[i] = self.policy.header_store_parse(k, _value)
				556	break
				557	else:
				558	raise KeyError(_name)
				559
				560	#
				561	# Use these three methods instead of the three above.
				562	#
				563
				564	def get_content_type(self):
				565	"""Return the message's content type.
				566
				567	The returned string is coerced to lower case of the form
				568	`maintype/subtype'. If there was no Content-Type header in the
				569	message, the default type as given by get_default_type() will be
				570	returned. Since according to RFC 2045, messages always have a default
				571	type this will always return a value.
				572
				573	RFC 2045 defines a message's default type to be text/plain unless it
				574	appears inside a multipart/digest container, in which case it would be
				575	message/rfc822.
				576	"""
				577	missing = object()
				578	value = self.get('content-type', missing)
				579	if value is missing:
				580	# This should have no parameters
				581	return self.get_default_type()
				582	ctype = _splitparam(value)[0].lower()
				583	# RFC 2045, section 5.2 says if its invalid, use text/plain
				584	if ctype.count('/') != 1:
				585	return 'text/plain'
				586	return ctype
				587
				588	def get_content_maintype(self):
				589	"""Return the message's main content type.
				590
				591	This is the `maintype' part of the string returned by
				592	get_content_type().
				593	"""
				594	ctype = self.get_content_type()
				595	return ctype.split('/')[0]
				596
				597	def get_content_subtype(self):
				598	"""Returns the message's sub-content type.
				599
				600	This is the `subtype' part of the string returned by
				601	get_content_type().
				602	"""
				603	ctype = self.get_content_type()
				604	return ctype.split('/')[1]
				605
				606	def get_default_type(self):
				607	"""Return the `default' content type.
				608
				609	Most messages have a default content type of text/plain, except for
				610	messages that are subparts of multipart/digest containers. Such
				611	subparts have a default content type of message/rfc822.
				612	"""
				613	return self._default_type
				614
				615	def set_default_type(self, ctype):
				616	"""Set the `default' content type.
				617
				618	ctype should be either "text/plain" or "message/rfc822", although this
				619	is not enforced. The default content type is not stored in the
				620	Content-Type header.
				621	"""
				622	self._default_type = ctype
				623
				624	def _get_params_preserve(self, failobj, header):
				625	# Like get_params() but preserves the quoting of values. BAW:
				626	# should this be part of the public interface?
				627	missing = object()
				628	value = self.get(header, missing)
				629	if value is missing:
				630	return failobj
				631	params = []
				632	for p in _parseparam(value):
				633	try:
				634	name, val = p.split('=', 1)
				635	name = name.strip()
				636	val = val.strip()
				637	except ValueError:
				638	# Must have been a bare attribute
				639	name = p.strip()
				640	val = ''
				641	params.append((name, val))
				642	params = utils.decode_params(params)
				643	return params
				644
				645	def get_params(self, failobj=None, header='content-type', unquote=True):
				646	"""Return the message's Content-Type parameters, as a list.
				647
				648	The elements of the returned list are 2-tuples of key/value pairs, as
				649	split on the `=' sign. The left hand side of the `=' is the key,
				650	while the right hand side is the value. If there is no `=' sign in
				651	the parameter the value is the empty string. The value is as
				652	described in the get_param() method.
				653
				654	Optional failobj is the object to return if there is no Content-Type
				655	header. Optional header is the header to search instead of
				656	Content-Type. If unquote is True, the value is unquoted.
				657	"""
				658	missing = object()
				659	params = self._get_params_preserve(missing, header)
				660	if params is missing:
				661	return failobj
				662	if unquote:
				663	return [(k, _unquotevalue(v)) for k, v in params]
				664	else:
				665	return params
				666
				667	def get_param(self, param, failobj=None, header='content-type',
				668	unquote=True):
				669	"""Return the parameter value if found in the Content-Type header.
				670
				671	Optional failobj is the object to return if there is no Content-Type
				672	header, or the Content-Type header has no such parameter. Optional
				673	header is the header to search instead of Content-Type.
				674
				675	Parameter keys are always compared case insensitively. The return
				676	value can either be a string, or a 3-tuple if the parameter was RFC
				677	2231 encoded. When it's a 3-tuple, the elements of the value are of
				678	the form (CHARSET, LANGUAGE, VALUE). Note that both CHARSET and
				679	LANGUAGE can be None, in which case you should consider VALUE to be
				680	encoded in the us-ascii charset. You can usually ignore LANGUAGE.
				681	The parameter value (either the returned string, or the VALUE item in
				682	the 3-tuple) is always unquoted, unless unquote is set to False.
				683
				684	If your application doesn't care whether the parameter was RFC 2231
				685	encoded, it can turn the return value into a string as follows:
				686
				687	rawparam = msg.get_param('foo')
				688	param = email.utils.collapse_rfc2231_value(rawparam)
				689
				690	"""
				691	if header not in self:
				692	return failobj
				693	for k, v in self._get_params_preserve(failobj, header):
				694	if k.lower() == param.lower():
				695	if unquote:
				696	return _unquotevalue(v)
				697	else:
				698	return v
				699	return failobj
				700
				701	def set_param(self, param, value, header='Content-Type', requote=True,
				702	charset=None, language='', replace=False):
				703	"""Set a parameter in the Content-Type header.
				704
				705	If the parameter already exists in the header, its value will be
				706	replaced with the new value.
				707
				708	If header is Content-Type and has not yet been defined for this
				709	message, it will be set to "text/plain" and the new parameter and
				710	value will be appended as per RFC 2045.
				711
				712	An alternate header can be specified in the header argument, and all
				713	parameters will be quoted as necessary unless requote is False.
				714
				715	If charset is specified, the parameter will be encoded according to RFC
				716	2231. Optional language specifies the RFC 2231 language, defaulting
				717	to the empty string. Both charset and language should be strings.
				718	"""
				719	if not isinstance(value, tuple) and charset:
				720	value = (charset, language, value)
				721
				722	if header not in self and header.lower() == 'content-type':
				723	ctype = 'text/plain'
				724	else:
				725	ctype = self.get(header)
				726	if not self.get_param(param, header=header):
				727	if not ctype:
				728	ctype = _formatparam(param, value, requote)
				729	else:
				730	ctype = SEMISPACE.join(
				731	[ctype, _formatparam(param, value, requote)])
				732	else:
				733	ctype = ''
				734	for old_param, old_value in self.get_params(header=header,
				735	unquote=requote):
				736	append_param = ''
				737	if old_param.lower() == param.lower():
				738	append_param = _formatparam(param, value, requote)
				739	else:
				740	append_param = _formatparam(old_param, old_value, requote)
				741	if not ctype:
				742	ctype = append_param
				743	else:
				744	ctype = SEMISPACE.join([ctype, append_param])
				745	if ctype != self.get(header):
				746	if replace:
				747	self.replace_header(header, ctype)
				748	else:
				749	del self[header]
				750	self[header] = ctype
				751
				752	def del_param(self, param, header='content-type', requote=True):
				753	"""Remove the given parameter completely from the Content-Type header.
				754
				755	The header will be re-written in place without the parameter or its
				756	value. All values will be quoted as necessary unless requote is
				757	False. Optional header specifies an alternative to the Content-Type
				758	header.
				759	"""
				760	if header not in self:
				761	return
				762	new_ctype = ''
				763	for p, v in self.get_params(header=header, unquote=requote):
				764	if p.lower() != param.lower():
				765	if not new_ctype:
				766	new_ctype = _formatparam(p, v, requote)
				767	else:
				768	new_ctype = SEMISPACE.join([new_ctype,
				769	_formatparam(p, v, requote)])
				770	if new_ctype != self.get(header):
				771	del self[header]
				772	self[header] = new_ctype
				773
				774	def set_type(self, type, header='Content-Type', requote=True):
				775	"""Set the main type and subtype for the Content-Type header.
				776
				777	type must be a string in the form "maintype/subtype", otherwise a
				778	ValueError is raised.
				779
				780	This method replaces the Content-Type header, keeping all the
				781	parameters in place. If requote is False, this leaves the existing
				782	header's quoting as is. Otherwise, the parameters will be quoted (the
				783	default).
				784
				785	An alternative header can be specified in the header argument. When
				786	the Content-Type header is set, we'll always also add a MIME-Version
				787	header.
				788	"""
				789	# BAW: should we be strict?
				790	if not type.count('/') == 1:
				791	raise ValueError
				792	# Set the Content-Type, you get a MIME-Version
				793	if header.lower() == 'content-type':
				794	del self['mime-version']
				795	self['MIME-Version'] = '1.0'
				796	if header not in self:
				797	self[header] = type
				798	return
				799	params = self.get_params(header=header, unquote=requote)
				800	del self[header]
				801	self[header] = type
				802	# Skip the first param; it's the old type.
				803	for p, v in params[1:]:
				804	self.set_param(p, v, header, requote)
				805
				806	def get_filename(self, failobj=None):
				807	"""Return the filename associated with the payload if present.
				808
				809	The filename is extracted from the Content-Disposition header's
				810	`filename' parameter, and it is unquoted. If that header is missing
				811	the `filename' parameter, this method falls back to looking for the
				812	`name' parameter.
				813	"""
				814	missing = object()
				815	filename = self.get_param('filename', missing, 'content-disposition')
				816	if filename is missing:
				817	filename = self.get_param('name', missing, 'content-type')
				818	if filename is missing:
				819	return failobj
				820	return utils.collapse_rfc2231_value(filename).strip()
				821
				822	def get_boundary(self, failobj=None):
				823	"""Return the boundary associated with the payload if present.
				824
				825	The boundary is extracted from the Content-Type header's `boundary'
				826	parameter, and it is unquoted.
				827	"""
				828	missing = object()
				829	boundary = self.get_param('boundary', missing)
				830	if boundary is missing:
				831	return failobj
				832	# RFC 2046 says that boundaries may begin but not end in w/s
				833	return utils.collapse_rfc2231_value(boundary).rstrip()
				834
				835	def set_boundary(self, boundary):
				836	"""Set the boundary parameter in Content-Type to 'boundary'.
				837
				838	This is subtly different than deleting the Content-Type header and
				839	adding a new one with a new boundary parameter via add_header(). The
				840	main difference is that using the set_boundary() method preserves the
				841	order of the Content-Type header in the original message.
				842
				843	HeaderParseError is raised if the message has no Content-Type header.
				844	"""
				845	missing = object()
				846	params = self._get_params_preserve(missing, 'content-type')
				847	if params is missing:
				848	# There was no Content-Type header, and we don't know what type
				849	# to set it to, so raise an exception.
				850	raise errors.HeaderParseError('No Content-Type header found')
				851	newparams = []
				852	foundp = False
				853	for pk, pv in params:
				854	if pk.lower() == 'boundary':
				855	newparams.append(('boundary', '"%s"' % boundary))
				856	foundp = True
				857	else:
				858	newparams.append((pk, pv))
				859	if not foundp:
				860	# The original Content-Type header had no boundary attribute.
				861	# Tack one on the end. BAW: should we raise an exception
				862	# instead???
				863	newparams.append(('boundary', '"%s"' % boundary))
				864	# Replace the existing Content-Type header with the new value
				865	newheaders = []
				866	for h, v in self._headers:
				867	if h.lower() == 'content-type':
				868	parts = []
				869	for k, v in newparams:
				870	if v == '':
				871	parts.append(k)
				872	else:
				873	parts.append('%s=%s' % (k, v))
				874	val = SEMISPACE.join(parts)
				875	newheaders.append(self.policy.header_store_parse(h, val))
				876
				877	else:
				878	newheaders.append((h, v))
				879	self._headers = newheaders
				880
				881	def get_content_charset(self, failobj=None):
				882	"""Return the charset parameter of the Content-Type header.
				883
				884	The returned string is always coerced to lower case. If there is no
				885	Content-Type header, or if that header has no charset parameter,
				886	failobj is returned.
				887	"""
				888	missing = object()
				889	charset = self.get_param('charset', missing)
				890	if charset is missing:
				891	return failobj
				892	if isinstance(charset, tuple):
				893	# RFC 2231 encoded, so decode it, and it better end up as ascii.
				894	pcharset = charset[0] or 'us-ascii'
				895	try:
				896	# LookupError will be raised if the charset isn't known to
				897	# Python. UnicodeError will be raised if the encoded text
				898	# contains a character not in the charset.
				899	as_bytes = charset[2].encode('raw-unicode-escape')
				900	charset = str(as_bytes, pcharset)
				901	except (LookupError, UnicodeError):
				902	charset = charset[2]
				903	# charset characters must be in us-ascii range
				904	try:
				905	charset.encode('us-ascii')
				906	except UnicodeError:
				907	return failobj
				908	# RFC 2046, $4.1.2 says charsets are not case sensitive
				909	return charset.lower()
				910
				911	def get_charsets(self, failobj=None):
				912	"""Return a list containing the charset(s) used in this message.
				913
				914	The returned list of items describes the Content-Type headers'
				915	charset parameter for this message and all the subparts in its
				916	payload.
				917
				918	Each item will either be a string (the value of the charset parameter
				919	in the Content-Type header of that part) or the value of the
				920	'failobj' parameter (defaults to None), if the part does not have a
				921	main MIME type of "text", or the charset is not defined.
				922
				923	The list will contain one string for each part of the message, plus
				924	one for the container message (i.e. self), so that a non-multipart
				925	message will still return a list of length 1.
				926	"""
				927	return [part.get_content_charset(failobj) for part in self.walk()]
				928
				929	def get_content_disposition(self):
				930	"""Return the message's content-disposition if it exists, or None.
				931
				932	The return values can be either 'inline', 'attachment' or None
				933	according to the rfc2183.
				934	"""
				935	value = self.get('content-disposition')
				936	if value is None:
				937	return None
				938	c_d = _splitparam(value)[0].lower()
				939	return c_d
				940
				941	# I.e. def walk(self): ...
				942	from email.iterators import walk
				943
				944
				945	class MIMEPart(Message):
				946
				947	def __init__(self, policy=None):
				948	if policy is None:
				949	from email.policy import default
				950	policy = default
Yi Kong	7119932	2022-08-30 15:53:45 +0800	[diff] [blame]	951	super().__init__(policy)
Haibo Huang	d883030	2020-03-03 10:09:46 -0800	[diff] [blame]	952
				953
				954	def as_string(self, unixfrom=False, maxheaderlen=None, policy=None):
				955	"""Return the entire formatted message as a string.
				956
				957	Optional 'unixfrom', when true, means include the Unix From_ envelope
				958	header. maxheaderlen is retained for backward compatibility with the
				959	base Message class, but defaults to None, meaning that the policy value
				960	for max_line_length controls the header maximum length. 'policy' is
Haibo Huang	5eba2b4	2021-01-22 11:22:02 -0800	[diff] [blame]	961	passed to the Generator instance used to serialize the message; if it
Haibo Huang	d883030	2020-03-03 10:09:46 -0800	[diff] [blame]	962	is not specified the policy associated with the message instance is
				963	used.
				964	"""
				965	policy = self.policy if policy is None else policy
				966	if maxheaderlen is None:
				967	maxheaderlen = policy.max_line_length
Yi Kong	7119932	2022-08-30 15:53:45 +0800	[diff] [blame]	968	return super().as_string(unixfrom, maxheaderlen, policy)
Haibo Huang	d883030	2020-03-03 10:09:46 -0800	[diff] [blame]	969
				970	def __str__(self):
				971	return self.as_string(policy=self.policy.clone(utf8=True))
				972
				973	def is_attachment(self):
				974	c_d = self.get('content-disposition')
				975	return False if c_d is None else c_d.content_disposition == 'attachment'
				976
				977	def _find_body(self, part, preferencelist):
				978	if part.is_attachment():
				979	return
				980	maintype, subtype = part.get_content_type().split('/')
				981	if maintype == 'text':
				982	if subtype in preferencelist:
				983	yield (preferencelist.index(subtype), part)
				984	return
Yi Kong	7119932	2022-08-30 15:53:45 +0800	[diff] [blame]	985	if maintype != 'multipart' or not self.is_multipart():
Haibo Huang	d883030	2020-03-03 10:09:46 -0800	[diff] [blame]	986	return
				987	if subtype != 'related':
				988	for subpart in part.iter_parts():
				989	yield from self._find_body(subpart, preferencelist)
				990	return
				991	if 'related' in preferencelist:
				992	yield (preferencelist.index('related'), part)
				993	candidate = None
				994	start = part.get_param('start')
				995	if start:
				996	for subpart in part.iter_parts():
				997	if subpart['content-id'] == start:
				998	candidate = subpart
				999	break
				1000	if candidate is None:
				1001	subparts = part.get_payload()
				1002	candidate = subparts[0] if subparts else None
				1003	if candidate is not None:
				1004	yield from self._find_body(candidate, preferencelist)
				1005
				1006	def get_body(self, preferencelist=('related', 'html', 'plain')):
				1007	"""Return best candidate mime part for display as 'body' of message.
				1008
				1009	Do a depth first search, starting with self, looking for the first part
				1010	matching each of the items in preferencelist, and return the part
				1011	corresponding to the first item that has a match, or None if no items
				1012	have a match. If 'related' is not included in preferencelist, consider
				1013	the root part of any multipart/related encountered as a candidate
				1014	match. Ignore parts with 'Content-Disposition: attachment'.
				1015	"""
				1016	best_prio = len(preferencelist)
				1017	body = None
				1018	for prio, part in self._find_body(self, preferencelist):
				1019	if prio < best_prio:
				1020	best_prio = prio
				1021	body = part
				1022	if prio == 0:
				1023	break
				1024	return body
				1025
				1026	_body_types = {('text', 'plain'),
				1027	('text', 'html'),
				1028	('multipart', 'related'),
				1029	('multipart', 'alternative')}
				1030	def iter_attachments(self):
				1031	"""Return an iterator over the non-main parts of a multipart.
				1032
				1033	Skip the first of each occurrence of text/plain, text/html,
				1034	multipart/related, or multipart/alternative in the multipart (unless
				1035	they have a 'Content-Disposition: attachment' header) and include all
				1036	remaining subparts in the returned iterator. When applied to a
				1037	multipart/related, return all parts except the root part. Return an
				1038	empty iterator when applied to a multipart/alternative or a
				1039	non-multipart.
				1040	"""
				1041	maintype, subtype = self.get_content_type().split('/')
				1042	if maintype != 'multipart' or subtype == 'alternative':
				1043	return
				1044	payload = self.get_payload()
				1045	# Certain malformed messages can have content type set to `multipart/*`
				1046	# but still have single part body, in which case payload.copy() can
				1047	# fail with AttributeError.
				1048	try:
				1049	parts = payload.copy()
				1050	except AttributeError:
				1051	# payload is not a list, it is most probably a string.
				1052	return
				1053
				1054	if maintype == 'multipart' and subtype == 'related':
				1055	# For related, we treat everything but the root as an attachment.
				1056	# The root may be indicated by 'start'; if there's no start or we
				1057	# can't find the named start, treat the first subpart as the root.
				1058	start = self.get_param('start')
				1059	if start:
				1060	found = False
				1061	attachments = []
				1062	for part in parts:
				1063	if part.get('content-id') == start:
				1064	found = True
				1065	else:
				1066	attachments.append(part)
				1067	if found:
				1068	yield from attachments
				1069	return
				1070	parts.pop(0)
				1071	yield from parts
				1072	return
				1073	# Otherwise we more or less invert the remaining logic in get_body.
				1074	# This only really works in edge cases (ex: non-text related or
				1075	# alternatives) if the sending agent sets content-disposition.
				1076	seen = [] # Only skip the first example of each candidate type.
				1077	for part in parts:
				1078	maintype, subtype = part.get_content_type().split('/')
				1079	if ((maintype, subtype) in self._body_types and
				1080	not part.is_attachment() and subtype not in seen):
				1081	seen.append(subtype)
				1082	continue
				1083	yield part
				1084
				1085	def iter_parts(self):
				1086	"""Return an iterator over all immediate subparts of a multipart.
				1087
				1088	Return an empty iterator for a non-multipart.
				1089	"""
Yi Kong	7119932	2022-08-30 15:53:45 +0800	[diff] [blame]	1090	if self.is_multipart():
Haibo Huang	d883030	2020-03-03 10:09:46 -0800	[diff] [blame]	1091	yield from self.get_payload()
				1092
				1093	def get_content(self, args, content_manager=None, *kw):
				1094	if content_manager is None:
				1095	content_manager = self.policy.content_manager
				1096	return content_manager.get_content(self, args, *kw)
				1097
				1098	def set_content(self, args, content_manager=None, *kw):
				1099	if content_manager is None:
				1100	content_manager = self.policy.content_manager
				1101	content_manager.set_content(self, args, *kw)
				1102
				1103	def _make_multipart(self, subtype, disallowed_subtypes, boundary):
				1104	if self.get_content_maintype() == 'multipart':
				1105	existing_subtype = self.get_content_subtype()
				1106	disallowed_subtypes = disallowed_subtypes + (subtype,)
				1107	if existing_subtype in disallowed_subtypes:
				1108	raise ValueError("Cannot convert {} to {}".format(
				1109	existing_subtype, subtype))
				1110	keep_headers = []
				1111	part_headers = []
				1112	for name, value in self._headers:
				1113	if name.lower().startswith('content-'):
				1114	part_headers.append((name, value))
				1115	else:
				1116	keep_headers.append((name, value))
				1117	if part_headers:
				1118	# There is existing content, move it to the first subpart.
				1119	part = type(self)(policy=self.policy)
				1120	part._headers = part_headers
				1121	part._payload = self._payload
				1122	self._payload = [part]
				1123	else:
				1124	self._payload = []
				1125	self._headers = keep_headers
				1126	self['Content-Type'] = 'multipart/' + subtype
				1127	if boundary is not None:
				1128	self.set_param('boundary', boundary)
				1129
				1130	def make_related(self, boundary=None):
				1131	self._make_multipart('related', ('alternative', 'mixed'), boundary)
				1132
				1133	def make_alternative(self, boundary=None):
				1134	self._make_multipart('alternative', ('mixed',), boundary)
				1135
				1136	def make_mixed(self, boundary=None):
				1137	self._make_multipart('mixed', (), boundary)
				1138
				1139	def _add_multipart(self, _subtype, args, _disp=None, *kw):
				1140	if (self.get_content_maintype() != 'multipart' or
				1141	self.get_content_subtype() != _subtype):
				1142	getattr(self, 'make_' + _subtype)()
				1143	part = type(self)(policy=self.policy)
				1144	part.set_content(args, *kw)
				1145	if _disp and 'content-disposition' not in part:
				1146	part['Content-Disposition'] = _disp
				1147	self.attach(part)
				1148
				1149	def add_related(self, args, *kw):
				1150	self._add_multipart('related', args, _disp='inline', *kw)
				1151
				1152	def add_alternative(self, args, *kw):
				1153	self._add_multipart('alternative', args, *kw)
				1154
				1155	def add_attachment(self, args, *kw):
				1156	self._add_multipart('mixed', args, _disp='attachment', *kw)
				1157
				1158	def clear(self):
				1159	self._headers = []
				1160	self._payload = None
				1161
				1162	def clear_content(self):
				1163	self._headers = [(n, v) for n, v in self._headers
				1164	if not n.lower().startswith('content-')]
				1165	self._payload = None
				1166
				1167
				1168	class EmailMessage(MIMEPart):
				1169
				1170	def set_content(self, args, *kw):
				1171	super().set_content(args, *kw)
				1172	if 'MIME-Version' not in self:
				1173	self['MIME-Version'] = '1.0'