[FIX] tools: html_sanitize: keep mako tags (<% ... %>), required for example in the portal sale quotation email.

This commit is contained in:
Thibault Delavallée 2014-08-07 11:21:41 +02:00
parent eb3d81fcf9
commit f5688cd8fd
2 changed files with 30 additions and 9 deletions

View File

@ -24,10 +24,7 @@
import unittest2
from lxml import etree
from openerp.tools import html_sanitize, html_email_clean, append_content_to_html, plaintext2html, email_split
import test_mail_examples
@ -45,6 +42,24 @@ class TestSanitizer(unittest2.TestCase):
html = html_sanitize(content)
self.assertEqual(html, expected, 'html_sanitize is broken')
def test_mako(self):
cases = [
('''<p>Some text</p>
<% set signup_url = object.get_signup_url() %>
% if signup_url:
<p>
You can access this document and pay online via our Customer Portal:
</p>''', '''<p>Some text</p>
<% set signup_url = object.get_signup_url() %>
% if signup_url:
<p>
You can access this document and pay online via our Customer Portal:
</p>''')
]
for content, expected in cases:
html = html_sanitize(content, silent=False)
self.assertEqual(html, expected, 'html_sanitize: broken mako management')
def test_evil_malicious_code(self):
# taken from https://www.owasp.org/index.php/XSS_Filter_Evasion_Cheat_Sheet#Tests
cases = [
@ -363,16 +378,17 @@ class TestHtmlTools(unittest2.TestCase):
for html, content, plaintext_flag, preserve_flag, container_tag, expected in test_samples:
self.assertEqual(append_content_to_html(html, content, plaintext_flag, preserve_flag, container_tag), expected, 'append_content_to_html is broken')
class TestEmailTools(unittest2.TestCase):
""" Test some of our generic utility functions for emails """
def test_email_split(self):
cases = [
("John <12345@gmail.com>", ['12345@gmail.com']), # regular form
("d@x; 1@2", ['d@x', '1@2']), # semi-colon + extra space
("'(ss)' <123@gmail.com>, 'foo' <foo@bar>", ['123@gmail.com','foo@bar']), # comma + single-quoting
('"john@gmail.com"<johnny@gmail.com>', ['johnny@gmail.com']), # double-quoting
('"<jg>" <johnny@gmail.com>', ['johnny@gmail.com']), # double-quoting with brackets
("John <12345@gmail.com>", ['12345@gmail.com']), # regular form
("d@x; 1@2", ['d@x', '1@2']), # semi-colon + extra space
("'(ss)' <123@gmail.com>, 'foo' <foo@bar>", ['123@gmail.com', 'foo@bar']), # comma + single-quoting
('"john@gmail.com"<johnny@gmail.com>', ['johnny@gmail.com']), # double-quoting
('"<jg>" <johnny@gmail.com>', ['johnny@gmail.com']), # double-quoting with brackets
]
for text, expected in cases:
self.assertEqual(email_split(text), expected, 'email_split is broken')

View File

@ -63,6 +63,9 @@ def html_sanitize(src, silent=True, strict=False):
# html encode email tags
part = re.compile(r"(<(([^a<>]|a[^<>\s])[^<>]*)@[^<>]+>)", re.IGNORECASE | re.DOTALL)
src = part.sub(lambda m: cgi.escape(m.group(1)), src)
# html encode mako tags <% ... %> to decode them later and keep them alive, otherwise they are stripped by the cleaner
src = src.replace('<%', cgi.escape('<%'))
src = src.replace('%>', cgi.escape('%>'))
kwargs = {
'page_structure': True,
@ -71,7 +74,7 @@ def html_sanitize(src, silent=True, strict=False):
'remove_unknown_tags': False,
'allow_tags': allowed_tags,
'comments': False,
'processing_instructions' : False
'processing_instructions': False
}
if etree.LXML_VERSION >= (2, 3, 1):
# kill_tags attribute has been added in version 2.3.1
@ -104,6 +107,8 @@ def html_sanitize(src, silent=True, strict=False):
cleaned = cleaned.replace('%20', ' ')
cleaned = cleaned.replace('%5B', '[')
cleaned = cleaned.replace('%5D', ']')
cleaned = cleaned.replace('&lt;%', '<%')
cleaned = cleaned.replace('%&gt;', '%>')
except etree.ParserError, e:
if 'empty' in str(e):
return ""