[FIX] tools.email_split: improve email extraction using stdlib + extra tests

The original code was having issues telling apart names containing emails from the actual email. This patch is inspired by Etienne Hirt's patch on bug 1165531 for OpenERP 6.1. This will also help solve bug 1199386, if we can centralize all email parsing through this helper. lp bug: https://launchpad.net/bugs/1199386 fixed lp bug: https://launchpad.net/bugs/1165531 fixed bzr revid: odo@openerp.com-20140114154426-usu6intv5139egd3
2014-01-14 16:44:26 +01:00 · 2014-01-14 16:44:26 +01:00 · 7a819ca8d0
parent 3f4c637674 9a456ac078
commit 7a819ca8d0
2 changed files with 21 additions and 2 deletions
--- a/openerp/tests/test_mail.py
+++ b/openerp/tests/test_mail.py
@ -23,7 +23,7 @@
 ##############################################################################

 import unittest2
-from openerp.tools import html_sanitize, html_email_clean, append_content_to_html, plaintext2html
+from openerp.tools import html_sanitize, html_email_clean, append_content_to_html, plaintext2html, email_split

 HTML_SOURCE = """
 <font size="2" style="color: rgb(31, 31, 31); font-family: monospace; font-variant: normal; line-height: normal; ">test1</font>
@ -311,6 +311,19 @@ class TestHtmlTools(unittest2.TestCase):
        for html, content, plaintext_flag, preserve_flag, container_tag, expected in test_samples:
            self.assertEqual(append_content_to_html(html, content, plaintext_flag, preserve_flag, container_tag), expected, 'append_content_to_html is broken')

+class TestEmailTools(unittest2.TestCase):
+    """ Test some of our generic utility functions for emails """
+
+    def test_email_split(self):
+        cases = [
+            ("John <12345@gmail.com>", ['12345@gmail.com']), # regular form 
+            ("d@x; 1@2", ['d@x', '1@2']), # semi-colon + extra space
+            ("'(ss)' <123@gmail.com>, 'foo' <foo@bar>", ['123@gmail.com','foo@bar']), # comma + single-quoting
+            ('"john@gmail.com"<johnny@gmail.com>', ['johnny@gmail.com']), # double-quoting
+            ('"<jg>" <johnny@gmail.com>', ['johnny@gmail.com']), # double-quoting with brackets 
+        ]
+        for text, expected in cases:
+            self.assertEqual(email_split(text), expected, 'email_split is broken')

 if __name__ == '__main__':
    unittest2.main()
--- a/openerp/tools/mail.py
+++ b/openerp/tools/mail.py
@ -30,6 +30,7 @@ import re
 import socket
 import threading
 import time
+from email.utils import getaddresses

 from openerp.loglevels import ustr

@ -358,4 +359,9 @@ def email_split(text):
    """ Return a list of the email addresses found in ``text`` """
    if not text:
        return []
-    return re.findall(r'([^ ,<@]+@[^> ,]+)', text)
+    return [addr[1] for addr in getaddresses([text])
+                # getaddresses() returns '' when email parsing fails, and
+                # sometimes returns emails without at least '@'. The '@'
+                # is strictly required in RFC2822's `addr-spec`.
+                if addr[1]
+                if '@' in addr[1]]