[IMP] [READD] [IMP] tools: mail: have a strict mode for html_sanitizer (not default) with
options before website: without frames, with safe attributes only. We will have to find a way to use the strict mode when parsing incoming emails, but not when sending or storing openerp content. Currently the not strict mode is the default one, to avoid side-effects with the website. bzr revid: tde@openerp.com-20140115141319-g15zl1kqrp8sgoa3
This commit is contained in:
parent
58ff276887
commit
2c4600f242
|
@ -2,7 +2,7 @@
|
|||
##############################################################################
|
||||
#
|
||||
# OpenERP, Open Source Business Applications
|
||||
# Copyright (C) 2012-2013 OpenERP S.A. (<http://openerp.com>).
|
||||
# Copyright (C) 2012-TODAY OpenERP S.A. (<http://openerp.com>).
|
||||
#
|
||||
# This program is free software: you can redistribute it and/or modify
|
||||
# it under the terms of the GNU Affero General Public License as
|
||||
|
@ -45,10 +45,14 @@ tags_to_remove = ['html', 'body', 'font']
|
|||
|
||||
# allow new semantic HTML5 tags
|
||||
allowed_tags = clean.defs.tags | frozenset('article section header footer hgroup nav aside figure main'.split())
|
||||
safe_attrs = clean.defs.safe_attrs | frozenset(['style'])
|
||||
safe_attrs = clean.defs.safe_attrs | frozenset(
|
||||
['style',
|
||||
'data-oe-model', 'data-oe-id', 'data-oe-field', 'data-oe-type', 'data-oe-expression', 'data-oe-translate', 'data-oe-nodeid',
|
||||
'data-snippet-id', 'data-publish', 'data-id', 'data-res_id', 'data-member_id', 'data-view-id'
|
||||
])
|
||||
|
||||
|
||||
def html_sanitize(src, silent=True):
|
||||
def html_sanitize(src, silent=True, strict=False):
|
||||
if not src:
|
||||
return src
|
||||
src = ustr(src, errors='replace')
|
||||
|
@ -62,7 +66,6 @@ def html_sanitize(src, silent=True):
|
|||
kwargs = {
|
||||
'page_structure': True,
|
||||
'style': False, # do not remove style attributes
|
||||
'frames': False, # de not remove frames (embbed video in CMS blogs)
|
||||
'forms': True, # remove form tags
|
||||
'remove_unknown_tags': False,
|
||||
'allow_tags': allowed_tags,
|
||||
|
@ -76,7 +79,16 @@ def html_sanitize(src, silent=True):
|
|||
else:
|
||||
kwargs['remove_tags'] = tags_to_kill + tags_to_remove
|
||||
|
||||
kwargs['safe_attrs_only'] = False
|
||||
if strict:
|
||||
if etree.LXML_VERSION >= (3, 1, 0):
|
||||
# lxml < 3.1.0 does not allow to specify safe_attrs. We keep all attributes in order to keep "style"
|
||||
kwargs.update({
|
||||
'safe_attrs_only': True,
|
||||
'safe_attrs': safe_attrs,
|
||||
})
|
||||
else:
|
||||
kwargs['safe_attrs_only'] = False # keep oe-data attributes + style
|
||||
kwargs['frames'] = False, # do not remove frames (embbed video in CMS blogs)
|
||||
|
||||
try:
|
||||
# some corner cases make the parser crash (such as <SCRIPT/XSS SRC=\"http://ha.ckers.org/xss.js\"></SCRIPT> in test_mail)
|
||||
|
|
Loading…
Reference in New Issue