Index: tests/testMailHost.py =================================================================== --- tests/testMailHost.py (revision 102634) +++ tests/testMailHost.py (working copy) @@ -16,6 +16,7 @@ """ import unittest +from email import message_from_string from Products.MailHost.MailHost import MailHost from Products.MailHost.MailHost import MailHostError, _mungeHeaders @@ -59,14 +60,14 @@ # Add duplicated info resmsg, resto, resfrom = _mungeHeaders(msg, 'recipient@domain.com', 'sender@domain.com', 'This is the subject' ) - self.failUnless(resto == ['recipient@domain.com']) - self.failUnless(resfrom == 'sender@domain.com' ) + self.failUnlessEqual(resto, ['recipient@domain.com']) + self.failUnlessEqual(resfrom, 'sender@domain.com' ) # Add extra info resmsg, resto, resfrom = _mungeHeaders(msg, 'recipient2@domain.com', 'sender2@domain.com', 'This is the real subject' ) - self.failUnless(resto == ['recipient2@domain.com']) - self.failUnless(resfrom == 'sender2@domain.com' ) + self.failUnlessEqual(resto, ['recipient2@domain.com']) + self.failUnlessEqual(resfrom, 'sender2@domain.com' ) def testMissingHeaders( self ): msg = """X-Header: Dummy header @@ -90,15 +91,15 @@ # Specify all resmsg, resto, resfrom = _mungeHeaders(msg, 'recipient2@domain.com', 'sender2@domain.com', 'This is the real subject') - self.failUnless(resto == ['recipient2@domain.com']) - self.failUnless(resfrom == 'sender2@domain.com' ) + self.failUnlessEqual(resto, ['recipient2@domain.com']) + self.failUnlessEqual(resfrom,'sender2@domain.com' ) def testBCCHeader( self ): msg = "From: me@example.com\nBcc: many@example.com\n\nMessage text" # Specify only the "Bcc" header. Useful for bulk emails. resmsg, resto, resfrom = _mungeHeaders(msg) - self.failUnless(resto == ['many@example.com']) - self.failUnless(resfrom == 'me@example.com' ) + self.failUnlessEqual(resto, ['many@example.com']) + self.failUnlessEqual(resfrom, 'me@example.com' ) def testAddressParser( self ): @@ -112,18 +113,18 @@ # Test Address-Parser for To & CC given in messageText resmsg, resto, resfrom = _mungeHeaders( msg ) - self.failUnless(resto == ['"Name, Nick" ', - '"Foo Bar" ', + self.failUnlessEqual(resto, ['"Name, Nick" ', + 'Foo Bar ', '"Web, Jack" ']) - self.failUnless(resfrom == 'sender@domain.com' ) + self.failUnlessEqual(resfrom, 'sender@domain.com') # Test Address-Parser for a given mto-string - resmsg, resto, resfrom = _mungeHeaders(msg, mto= '"Public, Joe" , "Foo Bar" ') + resmsg, resto, resfrom = _mungeHeaders(msg, mto= '"Public, Joe" , Foo Bar ') - self.failUnless(resto == ['"Public, Joe" ', - '"Foo Bar" ']) - self.failUnless(resfrom == 'sender@domain.com' ) + self.failUnlessEqual(resto, ['"Public, Joe" ', + 'Foo Bar ']) + self.failUnlessEqual(resfrom, 'sender@domain.com') def testSendMessageOnly(self): msg = """\ @@ -147,7 +148,7 @@ outmsg = """\ Date: Sun, 27 Aug 2006 17:00:00 +0200 Subject: This is the subject -To: "Name, Nick" ,"Foo Bar" +To: "Name, Nick" , Foo Bar From: sender@domain.com This is the message body.""" @@ -167,7 +168,7 @@ outmsg = """\ Date: Sun, 27 Aug 2006 17:00:00 +0200 Subject: This is the subject -To: "Name, Nick" ,"Foo Bar" +To: "Name, Nick" , "Foo Bar" From: sender@domain.com This is the message body.""" @@ -208,7 +209,180 @@ self.assertEqual(mailhost.sent, outmsg) self.assertEqual(mailhost.immediate, True) + def testSendBodyWithUrl(self): + # The implementation of rfc822.Message reacts poorly to + # message bodies containing ':' characters as in a url + msg = "Here's a nice link: http://www.zope.org/" + mailhost = self._makeOne('MailHost') + mailhost.send(messageText=msg, + mto='"Name, Nick" , "Foo Bar" ', + mfrom='sender@domain.com', subject='This is the subject') + out = message_from_string(mailhost.sent) + self.failUnlessEqual(out.get_payload(), msg) + self.failUnlessEqual(out['To'], + '"Name, Nick" , Foo Bar ') + self.failUnlessEqual(out['From'], 'sender@domain.com') + + def testSendEncodedBody(self): + # If a charset is specified the correct headers for content + # encoding will be set if not already set. Additionally, if + # there is a default transfer encoding for the charset, then + # the content will be encoded and the transfer encoding header + # will be set. + msg = "Here's some encoded t\xc3\xa9xt." + mailhost = self._makeOne('MailHost') + mailhost.send(messageText=msg, + mto='"Name, Nick" , "Foo Bar" ', + mfrom='sender@domain.com', subject='This is the subject', charset='utf-8') + out = message_from_string(mailhost.sent) + self.failUnlessEqual(out['To'], + '"Name, Nick" , Foo Bar ') + self.failUnlessEqual(out['From'], 'sender@domain.com') + # utf-8 will default to Quoted Printable encoding + self.failUnlessEqual(out['Content-Transfer-Encoding'], + 'quoted-printable') + self.failUnlessEqual(out['Content-Type'], 'text/plain; charset="utf-8"') + self.failUnlessEqual(out.get_payload(), + "Here's some encoded t=C3=A9xt.") + + def testEncodedHeaders(self): + # Headers are encoded automatically, email headers are encoded + # piece-wise to ensure the adresses remain ASCII + mfrom = "Jos\xc3\xa9 Andr\xc3\xa9s " + mto = "Ferran Adri\xc3\xa0 " + subject = "\xc2\xbfEsferificaci\xc3\xb3n?" + mailhost = self._makeOne('MailHost') + mailhost.send(messageText='A message.', mto=mto, mfrom=mfrom, + subject=subject, charset='utf-8') + out = message_from_string(mailhost.sent) + self.failUnlessEqual(out['To'], + 'Ferran =?utf-8?q?Adri=C3=A0?= ') + self.failUnlessEqual(out['From'], + '=?utf-8?q?Jos=C3=A9?= =?utf-8?q?Andr=C3=A9s?= ') + self.failUnlessEqual(out['Subject'], + '=?utf-8?q?=C2=BFEsferificaci=C3=B3n=3F?=') + # utf-8 will default to Quoted Printable encoding + self.failUnlessEqual(out['Content-Transfer-Encoding'], + 'quoted-printable') + self.failUnlessEqual(out['Content-Type'], 'text/plain; charset="utf-8"') + self.failUnlessEqual(out.get_payload(), "A message.") + + def testAlreadyEncodedMessage(self): + # If the message already specifies encodings, it is + # essentially not altered this is true even if charset or + # msg_type is specified + msg = """\ +From: =?utf-8?q?Jos=C3=A9?= =?utf-8?q?Andr=C3=A9s?= +To: Ferran =?utf-8?q?Adri=C3=A0?= +Subject: =?utf-8?q?=C2=BFEsferificaci=C3=B3n=3F?= +Date: Sun, 27 Aug 2006 17:00:00 +0200 +Content-Type: text/html; charset="utf-8" +Content-Transfer-Encoding: base64 +MIME-Version: 1.1 + +wqFVbiB0cnVjbyA8c3Ryb25nPmZhbnTDoXN0aWNvPC9zdHJvbmc+IQ=3D=3D +""" + mailhost = self._makeOne('MailHost') + mailhost.send(messageText=msg) + self.failUnlessEqual(mailhost.sent, msg) + mailhost.send(messageText=msg, charset='utf-8', msg_type='text/plain') + # We ignore charset and msg_type if they are set in the + # existing message body, is this ideal? Should we log a warning? + self.failUnlessEqual(mailhost.sent, msg) + + def testUnicodeMessage(self): + # unicode messages and headers are decoded using the given charset + msg = unicode("Here's some unencoded t\xc3\xa9xt.", + 'utf-8') + mfrom = unicode('Ferran Adri\xc3\xa0 ', 'utf-8') + subject = unicode('\xc2\xa1Andr\xc3\xa9s!', 'utf-8') + mailhost = self._makeOne('MailHost') + mailhost.send(messageText=msg, + mto='"Name, Nick" ', + mfrom=mfrom, subject=subject, charset='utf-8', + msg_type='text/html') + out = message_from_string(mailhost.sent) + self.failUnlessEqual(out['To'], + '"Name, Nick" ') + self.failUnlessEqual(out['From'], + 'Ferran =?utf-8?q?Adri=C3=A0?= ') + self.failUnlessEqual(out['Subject'], '=?utf-8?q?=C2=A1Andr=C3=A9s!?=') + self.failUnlessEqual(out['Content-Transfer-Encoding'], 'quoted-printable') + self.failUnlessEqual(out['Content-Type'], 'text/html; charset="utf-8"') + self.failUnlessEqual(out.get_payload(), + "Here's some unencoded t=C3=A9xt.") + + def testUnicodeNoEncodingErrors(self): + # Unicode messages and headers raise errors if no charset is passed to + # send + msg = unicode("Here's some unencoded t\xc3\xa9xt.", + 'utf-8') + subject = unicode('\xc2\xa1Andr\xc3\xa9s!', 'utf-8') + mailhost = self._makeOne('MailHost') + self.assertRaises(UnicodeEncodeError, + mailhost.send, msg, + mto='"Name, Nick" ', + mfrom='Foo Bar ', + subject=subject) + + def testUnicodeDefaultEncoding(self): + # However if we pass unicode that can be encoded to the + # default encoding (generally 'us-ascii'), no error is raised. + # We include a date in the messageText to make inspecting the + # results more convenient. + msg = u"""\ +Date: Sun, 27 Aug 2006 17:00:00 +0200 + +Here's some unencoded text.""" + subject = u'Andres!' + mailhost = self._makeOne('MailHost') + mailhost.send(msg, mto=u'"Name, Nick" ', + mfrom=u'Foo Bar ', subject=subject) + out = mailhost.sent + # Ensure the results are not unicode + self.failUnlessEqual(out,"""\ +Date: Sun, 27 Aug 2006 17:00:00 +0200 +Subject: Andres! +To: "Name, Nick" +From: Foo Bar + +Here's some unencoded text.""") + self.failUnlessEqual(type(out), str) + + def testSendMessageObject(self): + # send will accept an email.Message.Message object directly + msg = message_from_string("""\ +From: =?utf-8?q?Jos=C3=A9?= =?utf-8?q?Andr=C3=A9s?= +To: Ferran =?utf-8?q?Adri=C3=A0?= +Subject: =?utf-8?q?=C2=BFEsferificaci=C3=B3n=3F?= +Date: Sun, 27 Aug 2006 17:00:00 +0200 +Content-Type: text/html; charset="utf-8" +Content-Transfer-Encoding: base64 +MIME-Version: 1.1 + +wqFVbiB0cnVjbyA8c3Ryb25nPmZhbnTDoXN0aWNvPC9zdHJvbmc+IQ=3D=3D +""") + mailhost = self._makeOne('MailHost') + mailhost.send(msg) + out = message_from_string(mailhost.sent) + self.failUnlessEqual(out.as_string(), msg.as_string()) + + # we can even alter a from and subject headers without affecting the + # original object + mailhost.send(msg, mfrom='Foo Bar ', subject='Changed!') + out = message_from_string(mailhost.sent) + + # We need to make sure we didn't mutate the message we were passed + self.failIfEqual(out.as_string(), msg.as_string()) + self.failUnlessEqual(out['From'], 'Foo Bar ') + self.failUnlessEqual(msg['From'], + '=?utf-8?q?Jos=C3=A9?= =?utf-8?q?Andr=C3=A9s?= ') + self.failUnlessEqual(out['Subject'], 'Changed!') + self.failUnlessEqual(msg['Subject'], + '=?utf-8?q?=C2=BFEsferificaci=C3=B3n=3F?=') + + def test_suite(): suite = unittest.TestSuite() suite.addTest( unittest.makeSuite( TestMailHost ) ) Index: MailHost.py =================================================================== --- MailHost.py (revision 102634) +++ MailHost.py (working copy) @@ -14,10 +14,20 @@ $Id$ """ -from cStringIO import StringIO import logging -import mimetools -import rfc822 +from copy import deepcopy +from email.Header import Header +from email import message_from_string +from email.Message import Message +try: + import email.utils as emailutils +except ImportError: + import email.Utils as emailutils +import email.Charset +# We import from a private module here because the email module +# doesn't provide a good public address list parser +from email._parseaddr import AddressList as _AddressList + from threading import Lock import time @@ -49,6 +59,11 @@ LOG = logging.getLogger('MailHost') +# Encode utf-8 emails as Quoted Printable by default +email.Charset.add_charset("utf-8", email.Charset.QP, email.Charset.QP, "utf-8") + +formataddr = emailutils.formataddr + class MailHostError(Exception): pass @@ -185,16 +200,18 @@ encode=None, REQUEST=None, immediate=False, + charset=None, + msg_type=None, ): """Render a mail template, then send it... """ mtemplate = getattr(self, messageTemplate) messageText = mtemplate(self, trueself.REQUEST) - messageText, mto, mfrom = _mungeHeaders( messageText, mto, mfrom) - messageText = _encode(messageText, encode) + messageText, mto, mfrom = _mungeHeaders(messageText, mto, mfrom, + charset, msg_type) trueself._send(mfrom, mto, messageText, immediate) - if not statusTemplate: + if not statusTemplate: return "SEND OK" try: @@ -211,11 +228,11 @@ subject=None, encode=None, immediate=False, + charset=None, + msg_type=None, ): - - messageText, mto, mfrom = _mungeHeaders(messageText, - mto, mfrom, subject) - messageText = _encode(messageText, encode) + messageText, mto, mfrom = _mungeHeaders(messageText, mto, mfrom, + subject, charset, msg_type) self._send(mfrom, mto, messageText, immediate) # This is here for backwards compatibility only. Possibly it could @@ -327,68 +344,99 @@ class MailHost(Persistent, MailBase): """persistent version""" - -def _encode(body, encode=None): - if encode is None: - return body - mfile = StringIO(body) - mo = mimetools.Message(mfile) - if mo.getencoding() != '7bit': - raise MailHostError, 'Message already encoded' - newmfile = StringIO() - newmfile.write(''.join(mo.headers)) - newmfile.write('Content-Transfer-Encoding: %s\n' % encode) - if not mo.has_key('Mime-Version'): - newmfile.write('Mime-Version: 1.0\n') - newmfile.write('\n') - mimetools.encode(mfile, newmfile, encode) - return newmfile.getvalue() - -def _mungeHeaders( messageText, mto=None, mfrom=None, subject=None): +def _mungeHeaders(messageText, mto=None, mfrom=None, subject=None, + charset=None, msg_type=None): """Sets missing message headers, and deletes Bcc. returns fixed message, fixed mto and fixed mfrom""" - mfile = StringIO(messageText.lstrip()) - mo = rfc822.Message(mfile) + # If we have been given unicode fields, attempt to encode them + if isinstance(messageText, unicode): + messageText = _try_encode(messageText, charset) + if isinstance(mto, unicode): + mto = _try_encode(mto, charset) + if isinstance(mfrom, unicode): + mfrom = _try_encode(mfrom, charset) + if isinstance(subject, unicode): + subject = _try_encode(subject, charset) + if isinstance(messageText, Message): + # We already have a message, make a copy to operate on + mo = deepcopy(messageText) + else: + # Otherwise parse the input message + mo = message_from_string(messageText) + + if msg_type and not mo.get('Content-Type'): + # we don't use get_content_type because that has a default + # value of 'text/plain' + mo.set_type(msg_type) + if charset and not mo.get_charset(): + # This will encode the payload automatically based on either the default + # encoding for the charset or the encoding specified in the message + mo.set_charset(charset) + # Parameters given will *always* override headers in the messageText. # This is so that you can't override or add to subscribers by adding # them to # the message text. if subject: - mo['Subject'] = subject - elif not mo.getheader('Subject'): + # remove any existing header otherwise we get two + del mo['Subject'] + mo['Subject'] = Header(subject, charset) + elif not mo.get('Subject'): mo['Subject'] = '[No Subject]' if mto: if isinstance(mto, basestring): - mto = [rfc822.dump_address_pair(addr) - for addr in rfc822.AddressList(mto) ] - if not mo.getheader('To'): - mo['To'] = ','.join(mto) + mto = [formataddr(addr) for addr in _AddressList(mto).addresslist] + if not mo.get('To'): + email_to = ', '.join(mto) + mo['To'] = _encode_address_string(email_to, charset) else: + # If we don't have recipients, extract them from the message mto = [] for header in ('To', 'Cc', 'Bcc'): - v = mo.getheader(header) + v = ','.join(mo.get_all(header) or []) if v: - mto += [rfc822.dump_address_pair(addr) - for addr in rfc822.AddressList(v)] + mto += [formataddr(addr) for addr in + _AddressList(v).addresslist] if not mto: raise MailHostError, "No message recipients designated" if mfrom: - mo['From'] = mfrom + # XXX: do we really want to override an explicitly set From + # header in the messageText + del mo['From'] + mo['From'] = _encode_address_string(mfrom, charset) else: - if mo.getheader('From') is None: + if mo.get('From') is None: raise MailHostError,"Message missing SMTP Header 'From'" mfrom = mo['From'] - if mo.getheader('Bcc'): - mo.__delitem__('Bcc') + if mo.get('Bcc'): + del mo['Bcc'] - if not mo.getheader('Date'): + if not mo.get('Date'): mo['Date'] = DateTime().rfc822() - mo.rewindbody() - finalmessage = mo - finalmessage = mo.__str__() + '\n' + mfile.read() - mfile.close() - return finalmessage, mto, mfrom + return mo.as_string(), mto, mfrom + +def _try_encode(text, charset): + """Attempt to encode using the default charset if none is + provided. This may be unwise.""" + if charset: + return text.encode(charset) + else: + return text.encode() + +def _encode_address_string(text, charset): + """Split the email into pieces and use header encoding on all + non-ascii parts. We do this because the actuall addresses need to + be ASCII with no encoding for most SMTP servers, but the + non-address parts should be encoded appropriately""" + header = Header() + for part in text.split(' '): + try: + part.decode('us-ascii') + header.append(part) + except UnicodeDecodeError: + header.append(part, charset) + return header