blob: 6752ce0fa138255f7e31320d2840abe5c903278a [file] [log] [blame]
Haibo Huangd8830302020-03-03 10:09:46 -08001# Copyright (C) 2001-2007 Python Software Foundation
2# Author: Barry Warsaw
3# Contact: email-sig@python.org
4
5"""Basic message object for the email package object model."""
6
7__all__ = ['Message', 'EmailMessage']
8
9import re
10import uu
11import quopri
12from io import BytesIO, StringIO
13
14# Intrapackage imports
15from email import utils
16from email import errors
17from email._policybase import Policy, compat32
18from email import charset as _charset
19from email._encoded_words import decode_b
20Charset = _charset.Charset
21
22SEMISPACE = '; '
23
24# Regular expression that matches `special' characters in parameters, the
25# existence of which force quoting of the parameter value.
26tspecials = re.compile(r'[ \(\)<>@,;:\\"/\[\]\?=]')
27
28
29def _splitparam(param):
30 # Split header parameters. BAW: this may be too simple. It isn't
31 # strictly RFC 2045 (section 5.1) compliant, but it catches most headers
32 # found in the wild. We may eventually need a full fledged parser.
33 # RDM: we might have a Header here; for now just stringify it.
34 a, sep, b = str(param).partition(';')
35 if not sep:
36 return a.strip(), None
37 return a.strip(), b.strip()
38
39def _formatparam(param, value=None, quote=True):
40 """Convenience function to format and return a key=value pair.
41
42 This will quote the value if needed or if quote is true. If value is a
43 three tuple (charset, language, value), it will be encoded according
44 to RFC2231 rules. If it contains non-ascii characters it will likewise
45 be encoded according to RFC2231 rules, using the utf-8 charset and
46 a null language.
47 """
48 if value is not None and len(value) > 0:
49 # A tuple is used for RFC 2231 encoded parameter values where items
50 # are (charset, language, value). charset is a string, not a Charset
51 # instance. RFC 2231 encoded values are never quoted, per RFC.
52 if isinstance(value, tuple):
53 # Encode as per RFC 2231
54 param += '*'
55 value = utils.encode_rfc2231(value[2], value[0], value[1])
56 return '%s=%s' % (param, value)
57 else:
58 try:
59 value.encode('ascii')
60 except UnicodeEncodeError:
61 param += '*'
62 value = utils.encode_rfc2231(value, 'utf-8', '')
63 return '%s=%s' % (param, value)
64 # BAW: Please check this. I think that if quote is set it should
65 # force quoting even if not necessary.
66 if quote or tspecials.search(value):
67 return '%s="%s"' % (param, utils.quote(value))
68 else:
69 return '%s=%s' % (param, value)
70 else:
71 return param
72
73def _parseparam(s):
74 # RDM This might be a Header, so for now stringify it.
75 s = ';' + str(s)
76 plist = []
77 while s[:1] == ';':
78 s = s[1:]
79 end = s.find(';')
80 while end > 0 and (s.count('"', 0, end) - s.count('\\"', 0, end)) % 2:
81 end = s.find(';', end + 1)
82 if end < 0:
83 end = len(s)
84 f = s[:end]
85 if '=' in f:
86 i = f.index('=')
87 f = f[:i].strip().lower() + '=' + f[i+1:].strip()
88 plist.append(f.strip())
89 s = s[end:]
90 return plist
91
92
93def _unquotevalue(value):
94 # This is different than utils.collapse_rfc2231_value() because it doesn't
95 # try to convert the value to a unicode. Message.get_param() and
96 # Message.get_params() are both currently defined to return the tuple in
97 # the face of RFC 2231 parameters.
98 if isinstance(value, tuple):
99 return value[0], value[1], utils.unquote(value[2])
100 else:
101 return utils.unquote(value)
102
103
104
105class Message:
106 """Basic message object.
107
108 A message object is defined as something that has a bunch of RFC 2822
109 headers and a payload. It may optionally have an envelope header
110 (a.k.a. Unix-From or From_ header). If the message is a container (i.e. a
111 multipart or a message/rfc822), then the payload is a list of Message
112 objects, otherwise it is a string.
113
114 Message objects implement part of the `mapping' interface, which assumes
115 there is exactly one occurrence of the header per message. Some headers
116 do in fact appear multiple times (e.g. Received) and for those headers,
117 you must use the explicit API to set or get all the headers. Not all of
118 the mapping methods are implemented.
119 """
120 def __init__(self, policy=compat32):
121 self.policy = policy
122 self._headers = []
123 self._unixfrom = None
124 self._payload = None
125 self._charset = None
126 # Defaults for multipart messages
127 self.preamble = self.epilogue = None
128 self.defects = []
129 # Default content type
130 self._default_type = 'text/plain'
131
132 def __str__(self):
133 """Return the entire formatted message as a string.
134 """
135 return self.as_string()
136
137 def as_string(self, unixfrom=False, maxheaderlen=0, policy=None):
138 """Return the entire formatted message as a string.
139
140 Optional 'unixfrom', when true, means include the Unix From_ envelope
141 header. For backward compatibility reasons, if maxheaderlen is
142 not specified it defaults to 0, so you must override it explicitly
143 if you want a different maxheaderlen. 'policy' is passed to the
Haibo Huang5eba2b42021-01-22 11:22:02 -0800144 Generator instance used to serialize the message; if it is not
Haibo Huangd8830302020-03-03 10:09:46 -0800145 specified the policy associated with the message instance is used.
146
147 If the message object contains binary data that is not encoded
148 according to RFC standards, the non-compliant data will be replaced by
149 unicode "unknown character" code points.
150 """
151 from email.generator import Generator
152 policy = self.policy if policy is None else policy
153 fp = StringIO()
154 g = Generator(fp,
155 mangle_from_=False,
156 maxheaderlen=maxheaderlen,
157 policy=policy)
158 g.flatten(self, unixfrom=unixfrom)
159 return fp.getvalue()
160
161 def __bytes__(self):
162 """Return the entire formatted message as a bytes object.
163 """
164 return self.as_bytes()
165
166 def as_bytes(self, unixfrom=False, policy=None):
167 """Return the entire formatted message as a bytes object.
168
169 Optional 'unixfrom', when true, means include the Unix From_ envelope
170 header. 'policy' is passed to the BytesGenerator instance used to
171 serialize the message; if not specified the policy associated with
172 the message instance is used.
173 """
174 from email.generator import BytesGenerator
175 policy = self.policy if policy is None else policy
176 fp = BytesIO()
177 g = BytesGenerator(fp, mangle_from_=False, policy=policy)
178 g.flatten(self, unixfrom=unixfrom)
179 return fp.getvalue()
180
181 def is_multipart(self):
182 """Return True if the message consists of multiple parts."""
183 return isinstance(self._payload, list)
184
185 #
186 # Unix From_ line
187 #
188 def set_unixfrom(self, unixfrom):
189 self._unixfrom = unixfrom
190
191 def get_unixfrom(self):
192 return self._unixfrom
193
194 #
195 # Payload manipulation.
196 #
197 def attach(self, payload):
198 """Add the given payload to the current payload.
199
200 The current payload will always be a list of objects after this method
201 is called. If you want to set the payload to a scalar object, use
202 set_payload() instead.
203 """
204 if self._payload is None:
205 self._payload = [payload]
206 else:
207 try:
208 self._payload.append(payload)
209 except AttributeError:
210 raise TypeError("Attach is not valid on a message with a"
211 " non-multipart payload")
212
213 def get_payload(self, i=None, decode=False):
214 """Return a reference to the payload.
215
216 The payload will either be a list object or a string. If you mutate
217 the list object, you modify the message's payload in place. Optional
218 i returns that index into the payload.
219
220 Optional decode is a flag indicating whether the payload should be
221 decoded or not, according to the Content-Transfer-Encoding header
222 (default is False).
223
224 When True and the message is not a multipart, the payload will be
225 decoded if this header's value is `quoted-printable' or `base64'. If
226 some other encoding is used, or the header is missing, or if the
227 payload has bogus data (i.e. bogus base64 or uuencoded data), the
228 payload is returned as-is.
229
230 If the message is a multipart and the decode flag is True, then None
231 is returned.
232 """
233 # Here is the logic table for this code, based on the email5.0.0 code:
234 # i decode is_multipart result
235 # ------ ------ ------------ ------------------------------
236 # None True True None
237 # i True True None
238 # None False True _payload (a list)
239 # i False True _payload element i (a Message)
240 # i False False error (not a list)
241 # i True False error (not a list)
242 # None False False _payload
243 # None True False _payload decoded (bytes)
244 # Note that Barry planned to factor out the 'decode' case, but that
245 # isn't so easy now that we handle the 8 bit data, which needs to be
246 # converted in both the decode and non-decode path.
247 if self.is_multipart():
248 if decode:
249 return None
250 if i is None:
251 return self._payload
252 else:
253 return self._payload[i]
254 # For backward compatibility, Use isinstance and this error message
255 # instead of the more logical is_multipart test.
256 if i is not None and not isinstance(self._payload, list):
257 raise TypeError('Expected list, got %s' % type(self._payload))
258 payload = self._payload
259 # cte might be a Header, so for now stringify it.
260 cte = str(self.get('content-transfer-encoding', '')).lower()
261 # payload may be bytes here.
262 if isinstance(payload, str):
263 if utils._has_surrogates(payload):
264 bpayload = payload.encode('ascii', 'surrogateescape')
265 if not decode:
266 try:
267 payload = bpayload.decode(self.get_param('charset', 'ascii'), 'replace')
268 except LookupError:
269 payload = bpayload.decode('ascii', 'replace')
270 elif decode:
271 try:
272 bpayload = payload.encode('ascii')
273 except UnicodeError:
274 # This won't happen for RFC compliant messages (messages
275 # containing only ASCII code points in the unicode input).
276 # If it does happen, turn the string into bytes in a way
277 # guaranteed not to fail.
278 bpayload = payload.encode('raw-unicode-escape')
279 if not decode:
280 return payload
281 if cte == 'quoted-printable':
282 return quopri.decodestring(bpayload)
283 elif cte == 'base64':
284 # XXX: this is a bit of a hack; decode_b should probably be factored
285 # out somewhere, but I haven't figured out where yet.
286 value, defects = decode_b(b''.join(bpayload.splitlines()))
287 for defect in defects:
288 self.policy.handle_defect(self, defect)
289 return value
290 elif cte in ('x-uuencode', 'uuencode', 'uue', 'x-uue'):
291 in_file = BytesIO(bpayload)
292 out_file = BytesIO()
293 try:
294 uu.decode(in_file, out_file, quiet=True)
295 return out_file.getvalue()
296 except uu.Error:
297 # Some decoding problem
298 return bpayload
299 if isinstance(payload, str):
300 return bpayload
301 return payload
302
303 def set_payload(self, payload, charset=None):
304 """Set the payload to the given value.
305
306 Optional charset sets the message's default character set. See
307 set_charset() for details.
308 """
309 if hasattr(payload, 'encode'):
310 if charset is None:
311 self._payload = payload
312 return
313 if not isinstance(charset, Charset):
314 charset = Charset(charset)
315 payload = payload.encode(charset.output_charset)
316 if hasattr(payload, 'decode'):
317 self._payload = payload.decode('ascii', 'surrogateescape')
318 else:
319 self._payload = payload
320 if charset is not None:
321 self.set_charset(charset)
322
323 def set_charset(self, charset):
324 """Set the charset of the payload to a given character set.
325
326 charset can be a Charset instance, a string naming a character set, or
327 None. If it is a string it will be converted to a Charset instance.
328 If charset is None, the charset parameter will be removed from the
329 Content-Type field. Anything else will generate a TypeError.
330
331 The message will be assumed to be of type text/* encoded with
332 charset.input_charset. It will be converted to charset.output_charset
333 and encoded properly, if needed, when generating the plain text
334 representation of the message. MIME headers (MIME-Version,
335 Content-Type, Content-Transfer-Encoding) will be added as needed.
336 """
337 if charset is None:
338 self.del_param('charset')
339 self._charset = None
340 return
341 if not isinstance(charset, Charset):
342 charset = Charset(charset)
343 self._charset = charset
344 if 'MIME-Version' not in self:
345 self.add_header('MIME-Version', '1.0')
346 if 'Content-Type' not in self:
347 self.add_header('Content-Type', 'text/plain',
348 charset=charset.get_output_charset())
349 else:
350 self.set_param('charset', charset.get_output_charset())
351 if charset != charset.get_output_charset():
352 self._payload = charset.body_encode(self._payload)
353 if 'Content-Transfer-Encoding' not in self:
354 cte = charset.get_body_encoding()
355 try:
356 cte(self)
357 except TypeError:
358 # This 'if' is for backward compatibility, it allows unicode
359 # through even though that won't work correctly if the
360 # message is serialized.
361 payload = self._payload
362 if payload:
363 try:
364 payload = payload.encode('ascii', 'surrogateescape')
365 except UnicodeError:
366 payload = payload.encode(charset.output_charset)
367 self._payload = charset.body_encode(payload)
368 self.add_header('Content-Transfer-Encoding', cte)
369
370 def get_charset(self):
371 """Return the Charset instance associated with the message's payload.
372 """
373 return self._charset
374
375 #
376 # MAPPING INTERFACE (partial)
377 #
378 def __len__(self):
379 """Return the total number of headers, including duplicates."""
380 return len(self._headers)
381
382 def __getitem__(self, name):
383 """Get a header value.
384
385 Return None if the header is missing instead of raising an exception.
386
387 Note that if the header appeared multiple times, exactly which
388 occurrence gets returned is undefined. Use get_all() to get all
389 the values matching a header field name.
390 """
391 return self.get(name)
392
393 def __setitem__(self, name, val):
394 """Set the value of a header.
395
396 Note: this does not overwrite an existing header with the same field
397 name. Use __delitem__() first to delete any existing headers.
398 """
399 max_count = self.policy.header_max_count(name)
400 if max_count:
401 lname = name.lower()
402 found = 0
403 for k, v in self._headers:
404 if k.lower() == lname:
405 found += 1
406 if found >= max_count:
407 raise ValueError("There may be at most {} {} headers "
408 "in a message".format(max_count, name))
409 self._headers.append(self.policy.header_store_parse(name, val))
410
411 def __delitem__(self, name):
412 """Delete all occurrences of a header, if present.
413
414 Does not raise an exception if the header is missing.
415 """
416 name = name.lower()
417 newheaders = []
418 for k, v in self._headers:
419 if k.lower() != name:
420 newheaders.append((k, v))
421 self._headers = newheaders
422
423 def __contains__(self, name):
424 return name.lower() in [k.lower() for k, v in self._headers]
425
426 def __iter__(self):
427 for field, value in self._headers:
428 yield field
429
430 def keys(self):
431 """Return a list of all the message's header field names.
432
433 These will be sorted in the order they appeared in the original
434 message, or were added to the message, and may contain duplicates.
435 Any fields deleted and re-inserted are always appended to the header
436 list.
437 """
438 return [k for k, v in self._headers]
439
440 def values(self):
441 """Return a list of all the message's header values.
442
443 These will be sorted in the order they appeared in the original
444 message, or were added to the message, and may contain duplicates.
445 Any fields deleted and re-inserted are always appended to the header
446 list.
447 """
448 return [self.policy.header_fetch_parse(k, v)
449 for k, v in self._headers]
450
451 def items(self):
452 """Get all the message's header fields and values.
453
454 These will be sorted in the order they appeared in the original
455 message, or were added to the message, and may contain duplicates.
456 Any fields deleted and re-inserted are always appended to the header
457 list.
458 """
459 return [(k, self.policy.header_fetch_parse(k, v))
460 for k, v in self._headers]
461
462 def get(self, name, failobj=None):
463 """Get a header value.
464
465 Like __getitem__() but return failobj instead of None when the field
466 is missing.
467 """
468 name = name.lower()
469 for k, v in self._headers:
470 if k.lower() == name:
471 return self.policy.header_fetch_parse(k, v)
472 return failobj
473
474 #
475 # "Internal" methods (public API, but only intended for use by a parser
476 # or generator, not normal application code.
477 #
478
479 def set_raw(self, name, value):
480 """Store name and value in the model without modification.
481
482 This is an "internal" API, intended only for use by a parser.
483 """
484 self._headers.append((name, value))
485
486 def raw_items(self):
487 """Return the (name, value) header pairs without modification.
488
489 This is an "internal" API, intended only for use by a generator.
490 """
491 return iter(self._headers.copy())
492
493 #
494 # Additional useful stuff
495 #
496
497 def get_all(self, name, failobj=None):
498 """Return a list of all the values for the named field.
499
500 These will be sorted in the order they appeared in the original
501 message, and may contain duplicates. Any fields deleted and
502 re-inserted are always appended to the header list.
503
504 If no such fields exist, failobj is returned (defaults to None).
505 """
506 values = []
507 name = name.lower()
508 for k, v in self._headers:
509 if k.lower() == name:
510 values.append(self.policy.header_fetch_parse(k, v))
511 if not values:
512 return failobj
513 return values
514
515 def add_header(self, _name, _value, **_params):
516 """Extended header setting.
517
518 name is the header field to add. keyword arguments can be used to set
519 additional parameters for the header field, with underscores converted
520 to dashes. Normally the parameter will be added as key="value" unless
521 value is None, in which case only the key will be added. If a
522 parameter value contains non-ASCII characters it can be specified as a
523 three-tuple of (charset, language, value), in which case it will be
524 encoded according to RFC2231 rules. Otherwise it will be encoded using
525 the utf-8 charset and a language of ''.
526
527 Examples:
528
529 msg.add_header('content-disposition', 'attachment', filename='bud.gif')
530 msg.add_header('content-disposition', 'attachment',
531 filename=('utf-8', '', Fußballer.ppt'))
532 msg.add_header('content-disposition', 'attachment',
533 filename='Fußballer.ppt'))
534 """
535 parts = []
536 for k, v in _params.items():
537 if v is None:
538 parts.append(k.replace('_', '-'))
539 else:
540 parts.append(_formatparam(k.replace('_', '-'), v))
541 if _value is not None:
542 parts.insert(0, _value)
543 self[_name] = SEMISPACE.join(parts)
544
545 def replace_header(self, _name, _value):
546 """Replace a header.
547
548 Replace the first matching header found in the message, retaining
549 header order and case. If no matching header was found, a KeyError is
550 raised.
551 """
552 _name = _name.lower()
553 for i, (k, v) in zip(range(len(self._headers)), self._headers):
554 if k.lower() == _name:
555 self._headers[i] = self.policy.header_store_parse(k, _value)
556 break
557 else:
558 raise KeyError(_name)
559
560 #
561 # Use these three methods instead of the three above.
562 #
563
564 def get_content_type(self):
565 """Return the message's content type.
566
567 The returned string is coerced to lower case of the form
568 `maintype/subtype'. If there was no Content-Type header in the
569 message, the default type as given by get_default_type() will be
570 returned. Since according to RFC 2045, messages always have a default
571 type this will always return a value.
572
573 RFC 2045 defines a message's default type to be text/plain unless it
574 appears inside a multipart/digest container, in which case it would be
575 message/rfc822.
576 """
577 missing = object()
578 value = self.get('content-type', missing)
579 if value is missing:
580 # This should have no parameters
581 return self.get_default_type()
582 ctype = _splitparam(value)[0].lower()
583 # RFC 2045, section 5.2 says if its invalid, use text/plain
584 if ctype.count('/') != 1:
585 return 'text/plain'
586 return ctype
587
588 def get_content_maintype(self):
589 """Return the message's main content type.
590
591 This is the `maintype' part of the string returned by
592 get_content_type().
593 """
594 ctype = self.get_content_type()
595 return ctype.split('/')[0]
596
597 def get_content_subtype(self):
598 """Returns the message's sub-content type.
599
600 This is the `subtype' part of the string returned by
601 get_content_type().
602 """
603 ctype = self.get_content_type()
604 return ctype.split('/')[1]
605
606 def get_default_type(self):
607 """Return the `default' content type.
608
609 Most messages have a default content type of text/plain, except for
610 messages that are subparts of multipart/digest containers. Such
611 subparts have a default content type of message/rfc822.
612 """
613 return self._default_type
614
615 def set_default_type(self, ctype):
616 """Set the `default' content type.
617
618 ctype should be either "text/plain" or "message/rfc822", although this
619 is not enforced. The default content type is not stored in the
620 Content-Type header.
621 """
622 self._default_type = ctype
623
624 def _get_params_preserve(self, failobj, header):
625 # Like get_params() but preserves the quoting of values. BAW:
626 # should this be part of the public interface?
627 missing = object()
628 value = self.get(header, missing)
629 if value is missing:
630 return failobj
631 params = []
632 for p in _parseparam(value):
633 try:
634 name, val = p.split('=', 1)
635 name = name.strip()
636 val = val.strip()
637 except ValueError:
638 # Must have been a bare attribute
639 name = p.strip()
640 val = ''
641 params.append((name, val))
642 params = utils.decode_params(params)
643 return params
644
645 def get_params(self, failobj=None, header='content-type', unquote=True):
646 """Return the message's Content-Type parameters, as a list.
647
648 The elements of the returned list are 2-tuples of key/value pairs, as
649 split on the `=' sign. The left hand side of the `=' is the key,
650 while the right hand side is the value. If there is no `=' sign in
651 the parameter the value is the empty string. The value is as
652 described in the get_param() method.
653
654 Optional failobj is the object to return if there is no Content-Type
655 header. Optional header is the header to search instead of
656 Content-Type. If unquote is True, the value is unquoted.
657 """
658 missing = object()
659 params = self._get_params_preserve(missing, header)
660 if params is missing:
661 return failobj
662 if unquote:
663 return [(k, _unquotevalue(v)) for k, v in params]
664 else:
665 return params
666
667 def get_param(self, param, failobj=None, header='content-type',
668 unquote=True):
669 """Return the parameter value if found in the Content-Type header.
670
671 Optional failobj is the object to return if there is no Content-Type
672 header, or the Content-Type header has no such parameter. Optional
673 header is the header to search instead of Content-Type.
674
675 Parameter keys are always compared case insensitively. The return
676 value can either be a string, or a 3-tuple if the parameter was RFC
677 2231 encoded. When it's a 3-tuple, the elements of the value are of
678 the form (CHARSET, LANGUAGE, VALUE). Note that both CHARSET and
679 LANGUAGE can be None, in which case you should consider VALUE to be
680 encoded in the us-ascii charset. You can usually ignore LANGUAGE.
681 The parameter value (either the returned string, or the VALUE item in
682 the 3-tuple) is always unquoted, unless unquote is set to False.
683
684 If your application doesn't care whether the parameter was RFC 2231
685 encoded, it can turn the return value into a string as follows:
686
687 rawparam = msg.get_param('foo')
688 param = email.utils.collapse_rfc2231_value(rawparam)
689
690 """
691 if header not in self:
692 return failobj
693 for k, v in self._get_params_preserve(failobj, header):
694 if k.lower() == param.lower():
695 if unquote:
696 return _unquotevalue(v)
697 else:
698 return v
699 return failobj
700
701 def set_param(self, param, value, header='Content-Type', requote=True,
702 charset=None, language='', replace=False):
703 """Set a parameter in the Content-Type header.
704
705 If the parameter already exists in the header, its value will be
706 replaced with the new value.
707
708 If header is Content-Type and has not yet been defined for this
709 message, it will be set to "text/plain" and the new parameter and
710 value will be appended as per RFC 2045.
711
712 An alternate header can be specified in the header argument, and all
713 parameters will be quoted as necessary unless requote is False.
714
715 If charset is specified, the parameter will be encoded according to RFC
716 2231. Optional language specifies the RFC 2231 language, defaulting
717 to the empty string. Both charset and language should be strings.
718 """
719 if not isinstance(value, tuple) and charset:
720 value = (charset, language, value)
721
722 if header not in self and header.lower() == 'content-type':
723 ctype = 'text/plain'
724 else:
725 ctype = self.get(header)
726 if not self.get_param(param, header=header):
727 if not ctype:
728 ctype = _formatparam(param, value, requote)
729 else:
730 ctype = SEMISPACE.join(
731 [ctype, _formatparam(param, value, requote)])
732 else:
733 ctype = ''
734 for old_param, old_value in self.get_params(header=header,
735 unquote=requote):
736 append_param = ''
737 if old_param.lower() == param.lower():
738 append_param = _formatparam(param, value, requote)
739 else:
740 append_param = _formatparam(old_param, old_value, requote)
741 if not ctype:
742 ctype = append_param
743 else:
744 ctype = SEMISPACE.join([ctype, append_param])
745 if ctype != self.get(header):
746 if replace:
747 self.replace_header(header, ctype)
748 else:
749 del self[header]
750 self[header] = ctype
751
752 def del_param(self, param, header='content-type', requote=True):
753 """Remove the given parameter completely from the Content-Type header.
754
755 The header will be re-written in place without the parameter or its
756 value. All values will be quoted as necessary unless requote is
757 False. Optional header specifies an alternative to the Content-Type
758 header.
759 """
760 if header not in self:
761 return
762 new_ctype = ''
763 for p, v in self.get_params(header=header, unquote=requote):
764 if p.lower() != param.lower():
765 if not new_ctype:
766 new_ctype = _formatparam(p, v, requote)
767 else:
768 new_ctype = SEMISPACE.join([new_ctype,
769 _formatparam(p, v, requote)])
770 if new_ctype != self.get(header):
771 del self[header]
772 self[header] = new_ctype
773
774 def set_type(self, type, header='Content-Type', requote=True):
775 """Set the main type and subtype for the Content-Type header.
776
777 type must be a string in the form "maintype/subtype", otherwise a
778 ValueError is raised.
779
780 This method replaces the Content-Type header, keeping all the
781 parameters in place. If requote is False, this leaves the existing
782 header's quoting as is. Otherwise, the parameters will be quoted (the
783 default).
784
785 An alternative header can be specified in the header argument. When
786 the Content-Type header is set, we'll always also add a MIME-Version
787 header.
788 """
789 # BAW: should we be strict?
790 if not type.count('/') == 1:
791 raise ValueError
792 # Set the Content-Type, you get a MIME-Version
793 if header.lower() == 'content-type':
794 del self['mime-version']
795 self['MIME-Version'] = '1.0'
796 if header not in self:
797 self[header] = type
798 return
799 params = self.get_params(header=header, unquote=requote)
800 del self[header]
801 self[header] = type
802 # Skip the first param; it's the old type.
803 for p, v in params[1:]:
804 self.set_param(p, v, header, requote)
805
806 def get_filename(self, failobj=None):
807 """Return the filename associated with the payload if present.
808
809 The filename is extracted from the Content-Disposition header's
810 `filename' parameter, and it is unquoted. If that header is missing
811 the `filename' parameter, this method falls back to looking for the
812 `name' parameter.
813 """
814 missing = object()
815 filename = self.get_param('filename', missing, 'content-disposition')
816 if filename is missing:
817 filename = self.get_param('name', missing, 'content-type')
818 if filename is missing:
819 return failobj
820 return utils.collapse_rfc2231_value(filename).strip()
821
822 def get_boundary(self, failobj=None):
823 """Return the boundary associated with the payload if present.
824
825 The boundary is extracted from the Content-Type header's `boundary'
826 parameter, and it is unquoted.
827 """
828 missing = object()
829 boundary = self.get_param('boundary', missing)
830 if boundary is missing:
831 return failobj
832 # RFC 2046 says that boundaries may begin but not end in w/s
833 return utils.collapse_rfc2231_value(boundary).rstrip()
834
835 def set_boundary(self, boundary):
836 """Set the boundary parameter in Content-Type to 'boundary'.
837
838 This is subtly different than deleting the Content-Type header and
839 adding a new one with a new boundary parameter via add_header(). The
840 main difference is that using the set_boundary() method preserves the
841 order of the Content-Type header in the original message.
842
843 HeaderParseError is raised if the message has no Content-Type header.
844 """
845 missing = object()
846 params = self._get_params_preserve(missing, 'content-type')
847 if params is missing:
848 # There was no Content-Type header, and we don't know what type
849 # to set it to, so raise an exception.
850 raise errors.HeaderParseError('No Content-Type header found')
851 newparams = []
852 foundp = False
853 for pk, pv in params:
854 if pk.lower() == 'boundary':
855 newparams.append(('boundary', '"%s"' % boundary))
856 foundp = True
857 else:
858 newparams.append((pk, pv))
859 if not foundp:
860 # The original Content-Type header had no boundary attribute.
861 # Tack one on the end. BAW: should we raise an exception
862 # instead???
863 newparams.append(('boundary', '"%s"' % boundary))
864 # Replace the existing Content-Type header with the new value
865 newheaders = []
866 for h, v in self._headers:
867 if h.lower() == 'content-type':
868 parts = []
869 for k, v in newparams:
870 if v == '':
871 parts.append(k)
872 else:
873 parts.append('%s=%s' % (k, v))
874 val = SEMISPACE.join(parts)
875 newheaders.append(self.policy.header_store_parse(h, val))
876
877 else:
878 newheaders.append((h, v))
879 self._headers = newheaders
880
881 def get_content_charset(self, failobj=None):
882 """Return the charset parameter of the Content-Type header.
883
884 The returned string is always coerced to lower case. If there is no
885 Content-Type header, or if that header has no charset parameter,
886 failobj is returned.
887 """
888 missing = object()
889 charset = self.get_param('charset', missing)
890 if charset is missing:
891 return failobj
892 if isinstance(charset, tuple):
893 # RFC 2231 encoded, so decode it, and it better end up as ascii.
894 pcharset = charset[0] or 'us-ascii'
895 try:
896 # LookupError will be raised if the charset isn't known to
897 # Python. UnicodeError will be raised if the encoded text
898 # contains a character not in the charset.
899 as_bytes = charset[2].encode('raw-unicode-escape')
900 charset = str(as_bytes, pcharset)
901 except (LookupError, UnicodeError):
902 charset = charset[2]
903 # charset characters must be in us-ascii range
904 try:
905 charset.encode('us-ascii')
906 except UnicodeError:
907 return failobj
908 # RFC 2046, $4.1.2 says charsets are not case sensitive
909 return charset.lower()
910
911 def get_charsets(self, failobj=None):
912 """Return a list containing the charset(s) used in this message.
913
914 The returned list of items describes the Content-Type headers'
915 charset parameter for this message and all the subparts in its
916 payload.
917
918 Each item will either be a string (the value of the charset parameter
919 in the Content-Type header of that part) or the value of the
920 'failobj' parameter (defaults to None), if the part does not have a
921 main MIME type of "text", or the charset is not defined.
922
923 The list will contain one string for each part of the message, plus
924 one for the container message (i.e. self), so that a non-multipart
925 message will still return a list of length 1.
926 """
927 return [part.get_content_charset(failobj) for part in self.walk()]
928
929 def get_content_disposition(self):
930 """Return the message's content-disposition if it exists, or None.
931
932 The return values can be either 'inline', 'attachment' or None
933 according to the rfc2183.
934 """
935 value = self.get('content-disposition')
936 if value is None:
937 return None
938 c_d = _splitparam(value)[0].lower()
939 return c_d
940
941 # I.e. def walk(self): ...
942 from email.iterators import walk
943
944
945class MIMEPart(Message):
946
947 def __init__(self, policy=None):
948 if policy is None:
949 from email.policy import default
950 policy = default
Yi Kong71199322022-08-30 15:53:45 +0800951 super().__init__(policy)
Haibo Huangd8830302020-03-03 10:09:46 -0800952
953
954 def as_string(self, unixfrom=False, maxheaderlen=None, policy=None):
955 """Return the entire formatted message as a string.
956
957 Optional 'unixfrom', when true, means include the Unix From_ envelope
958 header. maxheaderlen is retained for backward compatibility with the
959 base Message class, but defaults to None, meaning that the policy value
960 for max_line_length controls the header maximum length. 'policy' is
Haibo Huang5eba2b42021-01-22 11:22:02 -0800961 passed to the Generator instance used to serialize the message; if it
Haibo Huangd8830302020-03-03 10:09:46 -0800962 is not specified the policy associated with the message instance is
963 used.
964 """
965 policy = self.policy if policy is None else policy
966 if maxheaderlen is None:
967 maxheaderlen = policy.max_line_length
Yi Kong71199322022-08-30 15:53:45 +0800968 return super().as_string(unixfrom, maxheaderlen, policy)
Haibo Huangd8830302020-03-03 10:09:46 -0800969
970 def __str__(self):
971 return self.as_string(policy=self.policy.clone(utf8=True))
972
973 def is_attachment(self):
974 c_d = self.get('content-disposition')
975 return False if c_d is None else c_d.content_disposition == 'attachment'
976
977 def _find_body(self, part, preferencelist):
978 if part.is_attachment():
979 return
980 maintype, subtype = part.get_content_type().split('/')
981 if maintype == 'text':
982 if subtype in preferencelist:
983 yield (preferencelist.index(subtype), part)
984 return
Yi Kong71199322022-08-30 15:53:45 +0800985 if maintype != 'multipart' or not self.is_multipart():
Haibo Huangd8830302020-03-03 10:09:46 -0800986 return
987 if subtype != 'related':
988 for subpart in part.iter_parts():
989 yield from self._find_body(subpart, preferencelist)
990 return
991 if 'related' in preferencelist:
992 yield (preferencelist.index('related'), part)
993 candidate = None
994 start = part.get_param('start')
995 if start:
996 for subpart in part.iter_parts():
997 if subpart['content-id'] == start:
998 candidate = subpart
999 break
1000 if candidate is None:
1001 subparts = part.get_payload()
1002 candidate = subparts[0] if subparts else None
1003 if candidate is not None:
1004 yield from self._find_body(candidate, preferencelist)
1005
1006 def get_body(self, preferencelist=('related', 'html', 'plain')):
1007 """Return best candidate mime part for display as 'body' of message.
1008
1009 Do a depth first search, starting with self, looking for the first part
1010 matching each of the items in preferencelist, and return the part
1011 corresponding to the first item that has a match, or None if no items
1012 have a match. If 'related' is not included in preferencelist, consider
1013 the root part of any multipart/related encountered as a candidate
1014 match. Ignore parts with 'Content-Disposition: attachment'.
1015 """
1016 best_prio = len(preferencelist)
1017 body = None
1018 for prio, part in self._find_body(self, preferencelist):
1019 if prio < best_prio:
1020 best_prio = prio
1021 body = part
1022 if prio == 0:
1023 break
1024 return body
1025
1026 _body_types = {('text', 'plain'),
1027 ('text', 'html'),
1028 ('multipart', 'related'),
1029 ('multipart', 'alternative')}
1030 def iter_attachments(self):
1031 """Return an iterator over the non-main parts of a multipart.
1032
1033 Skip the first of each occurrence of text/plain, text/html,
1034 multipart/related, or multipart/alternative in the multipart (unless
1035 they have a 'Content-Disposition: attachment' header) and include all
1036 remaining subparts in the returned iterator. When applied to a
1037 multipart/related, return all parts except the root part. Return an
1038 empty iterator when applied to a multipart/alternative or a
1039 non-multipart.
1040 """
1041 maintype, subtype = self.get_content_type().split('/')
1042 if maintype != 'multipart' or subtype == 'alternative':
1043 return
1044 payload = self.get_payload()
1045 # Certain malformed messages can have content type set to `multipart/*`
1046 # but still have single part body, in which case payload.copy() can
1047 # fail with AttributeError.
1048 try:
1049 parts = payload.copy()
1050 except AttributeError:
1051 # payload is not a list, it is most probably a string.
1052 return
1053
1054 if maintype == 'multipart' and subtype == 'related':
1055 # For related, we treat everything but the root as an attachment.
1056 # The root may be indicated by 'start'; if there's no start or we
1057 # can't find the named start, treat the first subpart as the root.
1058 start = self.get_param('start')
1059 if start:
1060 found = False
1061 attachments = []
1062 for part in parts:
1063 if part.get('content-id') == start:
1064 found = True
1065 else:
1066 attachments.append(part)
1067 if found:
1068 yield from attachments
1069 return
1070 parts.pop(0)
1071 yield from parts
1072 return
1073 # Otherwise we more or less invert the remaining logic in get_body.
1074 # This only really works in edge cases (ex: non-text related or
1075 # alternatives) if the sending agent sets content-disposition.
1076 seen = [] # Only skip the first example of each candidate type.
1077 for part in parts:
1078 maintype, subtype = part.get_content_type().split('/')
1079 if ((maintype, subtype) in self._body_types and
1080 not part.is_attachment() and subtype not in seen):
1081 seen.append(subtype)
1082 continue
1083 yield part
1084
1085 def iter_parts(self):
1086 """Return an iterator over all immediate subparts of a multipart.
1087
1088 Return an empty iterator for a non-multipart.
1089 """
Yi Kong71199322022-08-30 15:53:45 +08001090 if self.is_multipart():
Haibo Huangd8830302020-03-03 10:09:46 -08001091 yield from self.get_payload()
1092
1093 def get_content(self, *args, content_manager=None, **kw):
1094 if content_manager is None:
1095 content_manager = self.policy.content_manager
1096 return content_manager.get_content(self, *args, **kw)
1097
1098 def set_content(self, *args, content_manager=None, **kw):
1099 if content_manager is None:
1100 content_manager = self.policy.content_manager
1101 content_manager.set_content(self, *args, **kw)
1102
1103 def _make_multipart(self, subtype, disallowed_subtypes, boundary):
1104 if self.get_content_maintype() == 'multipart':
1105 existing_subtype = self.get_content_subtype()
1106 disallowed_subtypes = disallowed_subtypes + (subtype,)
1107 if existing_subtype in disallowed_subtypes:
1108 raise ValueError("Cannot convert {} to {}".format(
1109 existing_subtype, subtype))
1110 keep_headers = []
1111 part_headers = []
1112 for name, value in self._headers:
1113 if name.lower().startswith('content-'):
1114 part_headers.append((name, value))
1115 else:
1116 keep_headers.append((name, value))
1117 if part_headers:
1118 # There is existing content, move it to the first subpart.
1119 part = type(self)(policy=self.policy)
1120 part._headers = part_headers
1121 part._payload = self._payload
1122 self._payload = [part]
1123 else:
1124 self._payload = []
1125 self._headers = keep_headers
1126 self['Content-Type'] = 'multipart/' + subtype
1127 if boundary is not None:
1128 self.set_param('boundary', boundary)
1129
1130 def make_related(self, boundary=None):
1131 self._make_multipart('related', ('alternative', 'mixed'), boundary)
1132
1133 def make_alternative(self, boundary=None):
1134 self._make_multipart('alternative', ('mixed',), boundary)
1135
1136 def make_mixed(self, boundary=None):
1137 self._make_multipart('mixed', (), boundary)
1138
1139 def _add_multipart(self, _subtype, *args, _disp=None, **kw):
1140 if (self.get_content_maintype() != 'multipart' or
1141 self.get_content_subtype() != _subtype):
1142 getattr(self, 'make_' + _subtype)()
1143 part = type(self)(policy=self.policy)
1144 part.set_content(*args, **kw)
1145 if _disp and 'content-disposition' not in part:
1146 part['Content-Disposition'] = _disp
1147 self.attach(part)
1148
1149 def add_related(self, *args, **kw):
1150 self._add_multipart('related', *args, _disp='inline', **kw)
1151
1152 def add_alternative(self, *args, **kw):
1153 self._add_multipart('alternative', *args, **kw)
1154
1155 def add_attachment(self, *args, **kw):
1156 self._add_multipart('mixed', *args, _disp='attachment', **kw)
1157
1158 def clear(self):
1159 self._headers = []
1160 self._payload = None
1161
1162 def clear_content(self):
1163 self._headers = [(n, v) for n, v in self._headers
1164 if not n.lower().startswith('content-')]
1165 self._payload = None
1166
1167
1168class EmailMessage(MIMEPart):
1169
1170 def set_content(self, *args, **kw):
1171 super().set_content(*args, **kw)
1172 if 'MIME-Version' not in self:
1173 self['MIME-Version'] = '1.0'