[FIX] tools.email_split: improve email extraction using stdlib + extra tests

The original code was having issues telling apart
names containing emails from the actual email.
This patch is inspired by Etienne Hirt's patch
on bug 1165531 for OpenERP 6.1.
This will also help solve bug 1199386, if we can
centralize all email parsing through this helper.

lp bug: https://launchpad.net/bugs/1199386 fixed
lp bug: https://launchpad.net/bugs/1165531 fixed

bzr revid: odo@openerp.com-20140114154426-usu6intv5139egd3
This commit is contained in:
Olivier Dony 2014-01-14 16:44:26 +01:00
commit 7a819ca8d0
2 changed files with 21 additions and 2 deletions

View File

@ -23,7 +23,7 @@
##############################################################################
import unittest2
from openerp.tools import html_sanitize, html_email_clean, append_content_to_html, plaintext2html
from openerp.tools import html_sanitize, html_email_clean, append_content_to_html, plaintext2html, email_split
HTML_SOURCE = """
<font size="2" style="color: rgb(31, 31, 31); font-family: monospace; font-variant: normal; line-height: normal; ">test1</font>
@ -311,6 +311,19 @@ class TestHtmlTools(unittest2.TestCase):
for html, content, plaintext_flag, preserve_flag, container_tag, expected in test_samples:
self.assertEqual(append_content_to_html(html, content, plaintext_flag, preserve_flag, container_tag), expected, 'append_content_to_html is broken')
class TestEmailTools(unittest2.TestCase):
""" Test some of our generic utility functions for emails """
def test_email_split(self):
cases = [
("John <12345@gmail.com>", ['12345@gmail.com']), # regular form
("d@x; 1@2", ['d@x', '1@2']), # semi-colon + extra space
("'(ss)' <123@gmail.com>, 'foo' <foo@bar>", ['123@gmail.com','foo@bar']), # comma + single-quoting
('"john@gmail.com"<johnny@gmail.com>', ['johnny@gmail.com']), # double-quoting
('"<jg>" <johnny@gmail.com>', ['johnny@gmail.com']), # double-quoting with brackets
]
for text, expected in cases:
self.assertEqual(email_split(text), expected, 'email_split is broken')
if __name__ == '__main__':
unittest2.main()

View File

@ -30,6 +30,7 @@ import re
import socket
import threading
import time
from email.utils import getaddresses
from openerp.loglevels import ustr
@ -358,4 +359,9 @@ def email_split(text):
""" Return a list of the email addresses found in ``text`` """
if not text:
return []
return re.findall(r'([^ ,<@]+@[^> ,]+)', text)
return [addr[1] for addr in getaddresses([text])
# getaddresses() returns '' when email parsing fails, and
# sometimes returns emails without at least '@'. The '@'
# is strictly required in RFC2822's `addr-spec`.
if addr[1]
if '@' in addr[1]]