[FIX] tools.email_split: improve email extraction using stdlib + extra tests
The original code was having issues telling apart names containing emails from the actual email. This patch is inspired by Etienne Hirt's patch on bug 1165531 for OpenERP 6.1. This will also help solve bug 1199386, if we can centralize all email parsing through this helper. lp bug: https://launchpad.net/bugs/1199386 fixed lp bug: https://launchpad.net/bugs/1165531 fixed bzr revid: odo@openerp.com-20140114154426-usu6intv5139egd3
This commit is contained in:
commit
7a819ca8d0
|
@ -23,7 +23,7 @@
|
|||
##############################################################################
|
||||
|
||||
import unittest2
|
||||
from openerp.tools import html_sanitize, html_email_clean, append_content_to_html, plaintext2html
|
||||
from openerp.tools import html_sanitize, html_email_clean, append_content_to_html, plaintext2html, email_split
|
||||
|
||||
HTML_SOURCE = """
|
||||
<font size="2" style="color: rgb(31, 31, 31); font-family: monospace; font-variant: normal; line-height: normal; ">test1</font>
|
||||
|
@ -311,6 +311,19 @@ class TestHtmlTools(unittest2.TestCase):
|
|||
for html, content, plaintext_flag, preserve_flag, container_tag, expected in test_samples:
|
||||
self.assertEqual(append_content_to_html(html, content, plaintext_flag, preserve_flag, container_tag), expected, 'append_content_to_html is broken')
|
||||
|
||||
class TestEmailTools(unittest2.TestCase):
|
||||
""" Test some of our generic utility functions for emails """
|
||||
|
||||
def test_email_split(self):
|
||||
cases = [
|
||||
("John <12345@gmail.com>", ['12345@gmail.com']), # regular form
|
||||
("d@x; 1@2", ['d@x', '1@2']), # semi-colon + extra space
|
||||
("'(ss)' <123@gmail.com>, 'foo' <foo@bar>", ['123@gmail.com','foo@bar']), # comma + single-quoting
|
||||
('"john@gmail.com"<johnny@gmail.com>', ['johnny@gmail.com']), # double-quoting
|
||||
('"<jg>" <johnny@gmail.com>', ['johnny@gmail.com']), # double-quoting with brackets
|
||||
]
|
||||
for text, expected in cases:
|
||||
self.assertEqual(email_split(text), expected, 'email_split is broken')
|
||||
|
||||
if __name__ == '__main__':
|
||||
unittest2.main()
|
||||
|
|
|
@ -30,6 +30,7 @@ import re
|
|||
import socket
|
||||
import threading
|
||||
import time
|
||||
from email.utils import getaddresses
|
||||
|
||||
from openerp.loglevels import ustr
|
||||
|
||||
|
@ -358,4 +359,9 @@ def email_split(text):
|
|||
""" Return a list of the email addresses found in ``text`` """
|
||||
if not text:
|
||||
return []
|
||||
return re.findall(r'([^ ,<@]+@[^> ,]+)', text)
|
||||
return [addr[1] for addr in getaddresses([text])
|
||||
# getaddresses() returns '' when email parsing fails, and
|
||||
# sometimes returns emails without at least '@'. The '@'
|
||||
# is strictly required in RFC2822's `addr-spec`.
|
||||
if addr[1]
|
||||
if '@' in addr[1]]
|
Loading…
Reference in New Issue