From 7508a5c7243f7cdc17dd94866721555972a0d830 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Thibault=20Delavall=C3=A9e?= Date: Wed, 14 Nov 2012 11:38:17 +0100 Subject: [PATCH] [CLEAN] html_email_clean: cleaned a bit the code. bzr revid: tde@openerp.com-20121114103817-t19i2sv817jsur41 --- openerp/tools/mail.py | 34 ++++++++++++---------------------- 1 file changed, 12 insertions(+), 22 deletions(-) diff --git a/openerp/tools/mail.py b/openerp/tools/mail.py index 020ab2313b4..5f8c65c1d9b 100644 --- a/openerp/tools/mail.py +++ b/openerp/tools/mail.py @@ -121,17 +121,20 @@ def html_email_clean(html): be present in the html string. This method therefore takes as input html code coming from a sanitized source, like fields.html. """ + def _replace_matching_regex(regex, source, replace=''): + dest = '' + idx = 0 + for item in re.finditer(regex, source): + dest += source[idx:item.start()] + replace + idx = item.end() + dest += source[idx:] + return dest + html = ustr(html) - modified_html = '' # 1. -> \n, because otherwise the tree is obfuscated br_tags = re.compile(r'([<]\s*br\s*\/?[>])') - idx = 0 - for item in re.finditer(br_tags, html): - modified_html += html[idx:item.start()] + '__BR_TAG__' - idx = item.end() - modified_html += html[idx:] - html = modified_html + html = _replace_matching_regex(br_tags, html, '__BR_TAG__') # TDE note: seems to have lots of

in emails... needs to be checks, could be cleaned # 2. form a tree, handle (currently ?) pure-text by enclosing them in a pre @@ -145,14 +148,7 @@ def html_email_clean(html): for node in root.getiterator(): if not node.text: continue - idx = 0 - text = '' - for item in re.finditer(quote_tags, node.text): - print item - text += node.text[idx:item.start()] - idx = item.end() - text += node.text[idx:] - node.text = text + node.text = _replace_matching_regex(quote_tags, node.text) # 3. remove blockquotes quotes = [el for el in root.getiterator(tag='blockquote')] @@ -182,14 +178,8 @@ def html_email_clean(html): # 6. Misc cleaning : # - ClEditor seems to love using

-> replace with
- modified_html = '' br_div_tags = re.compile(r'(
\s*\s*<\/div>)') - idx = 0 - for item in re.finditer(br_div_tags, html): - modified_html += html[idx:item.start()] + '
' - idx = item.end() - modified_html += html[idx:] - html = modified_html + html = _replace_matching_regex(br_div_tags, html, '
') return html