[MERGE] Forward port of 7.0 until rev. 4767.
bzr revid: vmt@openerp.com-20130104093343-xtbw4tphmlupahuv
This commit is contained in:
commit
013efb71cb
|
@ -136,12 +136,13 @@ class ir_cron(osv.osv):
|
|||
except Exception, e:
|
||||
self._handle_callback_exception(cr, uid, model_name, method_name, args, job_id, e)
|
||||
|
||||
def _process_job(self, cr, job):
|
||||
def _process_job(self, job_cr, job, cron_cr):
|
||||
""" Run a given job taking care of the repetition.
|
||||
|
||||
The cursor has a lock on the job (aquired by _acquire_job()).
|
||||
|
||||
:param job_cr: cursor to use to execute the job, safe to commit/rollback
|
||||
:param job: job to be run (as a dictionary).
|
||||
:param cron_cr: cursor holding lock on the cron job row, to use to update the next exec date,
|
||||
must not be committed/rolled back!
|
||||
"""
|
||||
try:
|
||||
now = datetime.now()
|
||||
|
@ -153,19 +154,19 @@ class ir_cron(osv.osv):
|
|||
if numbercall > 0:
|
||||
numbercall -= 1
|
||||
if not ok or job['doall']:
|
||||
self._callback(cr, job['user_id'], job['model'], job['function'], job['args'], job['id'])
|
||||
self._callback(job_cr, job['user_id'], job['model'], job['function'], job['args'], job['id'])
|
||||
if numbercall:
|
||||
nextcall += _intervalTypes[job['interval_type']](job['interval_number'])
|
||||
ok = True
|
||||
addsql = ''
|
||||
if not numbercall:
|
||||
addsql = ', active=False'
|
||||
cr.execute("UPDATE ir_cron SET nextcall=%s, numbercall=%s"+addsql+" WHERE id=%s",
|
||||
cron_cr.execute("UPDATE ir_cron SET nextcall=%s, numbercall=%s"+addsql+" WHERE id=%s",
|
||||
(nextcall.strftime(DEFAULT_SERVER_DATETIME_FORMAT), numbercall, job['id']))
|
||||
|
||||
finally:
|
||||
cr.commit()
|
||||
cr.close()
|
||||
job_cr.commit()
|
||||
cron_cr.commit()
|
||||
|
||||
@classmethod
|
||||
def _acquire_job(cls, db_name):
|
||||
|
@ -181,44 +182,14 @@ class ir_cron(osv.osv):
|
|||
"""
|
||||
db = openerp.sql_db.db_connect(db_name)
|
||||
cr = db.cursor()
|
||||
jobs = []
|
||||
try:
|
||||
# Careful to compare timestamps with 'UTC' - everything is UTC as of v6.1.
|
||||
cr.execute("""SELECT * FROM ir_cron
|
||||
WHERE numbercall != 0
|
||||
AND active AND nextcall <= (now() at time zone 'UTC')
|
||||
ORDER BY priority""")
|
||||
for job in cr.dictfetchall():
|
||||
task_cr = db.cursor()
|
||||
try:
|
||||
# Try to grab an exclusive lock on the job row from within the task transaction
|
||||
acquired_lock = False
|
||||
task_cr.execute("""SELECT *
|
||||
FROM ir_cron
|
||||
WHERE id=%s
|
||||
FOR UPDATE NOWAIT""",
|
||||
(job['id'],), log_exceptions=False)
|
||||
acquired_lock = True
|
||||
except psycopg2.OperationalError, e:
|
||||
if e.pgcode == '55P03':
|
||||
# Class 55: Object not in prerequisite state; 55P03: lock_not_available
|
||||
_logger.debug('Another process/thread is already busy executing job `%s`, skipping it.', job['name'])
|
||||
continue
|
||||
else:
|
||||
# Unexpected OperationalError
|
||||
raise
|
||||
finally:
|
||||
if not acquired_lock:
|
||||
# we're exiting due to an exception while acquiring the lot
|
||||
task_cr.close()
|
||||
|
||||
# Got the lock on the job row, run its code
|
||||
_logger.debug('Starting job `%s`.', job['name'])
|
||||
openerp.modules.registry.RegistryManager.check_registry_signaling(db_name)
|
||||
registry = openerp.pooler.get_pool(db_name)
|
||||
registry[cls._name]._process_job(task_cr, job)
|
||||
openerp.modules.registry.RegistryManager.signal_caches_change(db_name)
|
||||
return True
|
||||
|
||||
jobs = cr.dictfetchall()
|
||||
except psycopg2.ProgrammingError, e:
|
||||
if e.pgcode == '42P01':
|
||||
# Class 42 — Syntax Error or Access Rule Violation; 42P01: undefined_table
|
||||
|
@ -228,12 +199,43 @@ class ir_cron(osv.osv):
|
|||
raise
|
||||
except Exception:
|
||||
_logger.warning('Exception in cron:', exc_info=True)
|
||||
|
||||
finally:
|
||||
cr.commit()
|
||||
cr.close()
|
||||
|
||||
return False
|
||||
for job in jobs:
|
||||
lock_cr = db.cursor()
|
||||
try:
|
||||
# Try to grab an exclusive lock on the job row from within the task transaction
|
||||
lock_cr.execute("""SELECT *
|
||||
FROM ir_cron
|
||||
WHERE id=%s
|
||||
FOR UPDATE NOWAIT""",
|
||||
(job['id'],), log_exceptions=False)
|
||||
|
||||
# Got the lock on the job row, run its code
|
||||
_logger.debug('Starting job `%s`.', job['name'])
|
||||
job_cr = db.cursor()
|
||||
try:
|
||||
openerp.modules.registry.RegistryManager.check_registry_signaling(db_name)
|
||||
registry = openerp.pooler.get_pool(db_name)
|
||||
registry[cls._name]._process_job(job_cr, job, lock_cr)
|
||||
openerp.modules.registry.RegistryManager.signal_caches_change(db_name)
|
||||
except Exception:
|
||||
_logger.exception('Unexpected exception while processing cron job %r', job)
|
||||
finally:
|
||||
job_cr.close()
|
||||
|
||||
except psycopg2.OperationalError, e:
|
||||
if e.pgcode == '55P03':
|
||||
# Class 55: Object not in prerequisite state; 55P03: lock_not_available
|
||||
_logger.debug('Another process/thread is already busy executing job `%s`, skipping it.', job['name'])
|
||||
continue
|
||||
else:
|
||||
# Unexpected OperationalError
|
||||
raise
|
||||
finally:
|
||||
# we're exiting due to an exception while acquiring the lock
|
||||
lock_cr.close()
|
||||
|
||||
def _try_lock(self, cr, uid, ids, context=None):
|
||||
"""Try to grab a dummy exclusive write-lock to the rows with the given ids,
|
||||
|
|
|
@ -199,7 +199,7 @@ class ir_model(osv.osv):
|
|||
|
||||
def instanciate(self, cr, user, model, context=None):
|
||||
class x_custom_model(osv.osv):
|
||||
pass
|
||||
_custom = True
|
||||
x_custom_model._name = model
|
||||
x_custom_model._module = False
|
||||
a = x_custom_model.create_instance(self.pool, cr)
|
||||
|
|
|
@ -177,7 +177,6 @@
|
|||
<field name="fax"/>
|
||||
<field name="email" widget="email"/>
|
||||
<field name="title" domain="[('domain', '=', 'contact')]"
|
||||
groups="base.group_no_one"
|
||||
options='{"no_open": True}' attrs="{'invisible': [('is_company','=', True)]}" />
|
||||
</group>
|
||||
</group>
|
||||
|
|
|
@ -36,6 +36,11 @@ import time
|
|||
import types
|
||||
from pprint import pformat
|
||||
|
||||
try:
|
||||
import psutil
|
||||
except ImportError:
|
||||
psutil = None
|
||||
|
||||
# TODO modules that import netsvc only for things from loglevels must be changed to use loglevels.
|
||||
from loglevels import *
|
||||
import tools
|
||||
|
@ -273,6 +278,9 @@ def dispatch_rpc(service_name, method, params):
|
|||
rpc_response_flag = rpc_response.isEnabledFor(logging.DEBUG)
|
||||
if rpc_request_flag or rpc_response_flag:
|
||||
start_time = time.time()
|
||||
start_rss, start_vms = 0, 0
|
||||
if psutil:
|
||||
start_rss, start_vms = psutil.Process(os.getpid()).get_memory_info()
|
||||
if rpc_request and rpc_response_flag:
|
||||
log(rpc_request,logging.DEBUG,'%s.%s'%(service_name,method), replace_request_password(params))
|
||||
|
||||
|
@ -282,10 +290,14 @@ def dispatch_rpc(service_name, method, params):
|
|||
|
||||
if rpc_request_flag or rpc_response_flag:
|
||||
end_time = time.time()
|
||||
end_rss, end_vms = 0, 0
|
||||
if psutil:
|
||||
end_rss, end_vms = psutil.Process(os.getpid()).get_memory_info()
|
||||
logline = '%s.%s time:%.3fs mem: %sk -> %sk (diff: %sk)' % (service_name, method, end_time - start_time, start_vms / 1024, end_vms / 1024, (end_vms - start_vms)/1024)
|
||||
if rpc_response_flag:
|
||||
log(rpc_response,logging.DEBUG,'%s.%s time:%.3fs '%(service_name,method,end_time - start_time), result)
|
||||
log(rpc_response,logging.DEBUG, logline, result)
|
||||
else:
|
||||
log(rpc_request,logging.DEBUG,'%s.%s time:%.3fs '%(service_name,method,end_time - start_time), replace_request_password(params), depth=1)
|
||||
log(rpc_request,logging.DEBUG, logline, replace_request_password(params), depth=1)
|
||||
|
||||
return result
|
||||
except openerp.exceptions.AccessError:
|
||||
|
|
|
@ -629,7 +629,8 @@ class MetaModel(type):
|
|||
self._module = module_name
|
||||
|
||||
# Remember which models to instanciate for this module.
|
||||
self.module_to_models.setdefault(self._module, []).append(self)
|
||||
if not self._custom:
|
||||
self.module_to_models.setdefault(self._module, []).append(self)
|
||||
|
||||
|
||||
# Definition of log access columns, automatically added to models if
|
||||
|
@ -666,6 +667,7 @@ class BaseModel(object):
|
|||
_name = None
|
||||
_columns = {}
|
||||
_constraints = []
|
||||
_custom = False
|
||||
_defaults = {}
|
||||
_rec_name = None
|
||||
_parent_name = 'parent_id'
|
||||
|
@ -942,7 +944,8 @@ class BaseModel(object):
|
|||
# managed by the metaclass.
|
||||
module_model_list = MetaModel.module_to_models.setdefault(cls._module, [])
|
||||
if cls not in module_model_list:
|
||||
module_model_list.append(cls)
|
||||
if not cls._custom:
|
||||
module_model_list.append(cls)
|
||||
|
||||
# Since we don't return an instance here, the __init__
|
||||
# method won't be called.
|
||||
|
|
|
@ -30,7 +30,7 @@ RELEASE_LEVELS_DISPLAY = {ALPHA: ALPHA,
|
|||
# properly comparable using normal operarors, for example:
|
||||
# (6,1,0,'beta',0) < (6,1,0,'candidate',1) < (6,1,0,'candidate',2)
|
||||
# (6,1,0,'candidate',2) < (6,1,0,'final',0) < (6,1,2,'final',0)
|
||||
version_info = (7, 0, 0, ALPHA, 0)
|
||||
version_info = (7, 0, 0, FINAL, 0)
|
||||
version = '.'.join(map(str, version_info[:2])) + RELEASE_LEVELS_DISPLAY[version_info[3]] + str(version_info[4] or '')
|
||||
serie = major_version = '.'.join(map(str, version_info[:2]))
|
||||
|
||||
|
|
|
@ -617,9 +617,7 @@ class report_sxw(report_rml, preprocess.report):
|
|||
create_doc = self.generators[mime_type]
|
||||
odt = etree.tostring(create_doc(rml_dom, rml_parser.localcontext),
|
||||
encoding='utf-8', xml_declaration=True)
|
||||
sxw_z = zipfile.ZipFile(sxw_io, mode='a')
|
||||
sxw_z.writestr('content.xml', odt)
|
||||
sxw_z.writestr('meta.xml', meta)
|
||||
sxw_contents = {'content.xml':odt, 'meta.xml':meta}
|
||||
|
||||
if report_xml.header:
|
||||
#Add corporate header/footer
|
||||
|
@ -638,12 +636,25 @@ class report_sxw(report_rml, preprocess.report):
|
|||
rml_parser._add_header(odt)
|
||||
odt = etree.tostring(odt, encoding='utf-8',
|
||||
xml_declaration=True)
|
||||
sxw_z.writestr('styles.xml', odt)
|
||||
sxw_contents['styles.xml'] = odt
|
||||
finally:
|
||||
rml_file.close()
|
||||
sxw_z.close()
|
||||
final_op = sxw_io.getvalue()
|
||||
|
||||
#created empty zip writing sxw contents to avoid duplication
|
||||
sxw_out = StringIO.StringIO()
|
||||
sxw_out_zip = zipfile.ZipFile(sxw_out, mode='w')
|
||||
sxw_template_zip = zipfile.ZipFile (sxw_io, 'r')
|
||||
for item in sxw_template_zip.infolist():
|
||||
if item.filename not in sxw_contents:
|
||||
buffer = sxw_template_zip.read(item.filename)
|
||||
sxw_out_zip.writestr(item.filename, buffer)
|
||||
for item_filename, buffer in sxw_contents.iteritems():
|
||||
sxw_out_zip.writestr(item_filename, buffer)
|
||||
sxw_template_zip.close()
|
||||
sxw_out_zip.close()
|
||||
final_op = sxw_out.getvalue()
|
||||
sxw_io.close()
|
||||
sxw_out.close()
|
||||
return final_op, mime_type
|
||||
|
||||
def create_single_html2html(self, cr, uid, ids, data, report_xml, context=None):
|
||||
|
|
|
@ -355,12 +355,17 @@ class WorkerCron(Worker):
|
|||
time.sleep(interval)
|
||||
|
||||
def process_work(self):
|
||||
rpc_request = logging.getLogger('openerp.netsvc.rpc.request')
|
||||
rpc_request_flag = rpc_request.isEnabledFor(logging.DEBUG)
|
||||
_logger.debug("WorkerCron (%s) polling for jobs", self.pid)
|
||||
if config['db_name']:
|
||||
db_names = config['db_name'].split(',')
|
||||
else:
|
||||
db_names = openerp.netsvc.ExportService._services['db'].exp_list(True)
|
||||
for db_name in db_names:
|
||||
if rpc_request_flag:
|
||||
start_time = time.time()
|
||||
start_rss, start_vms = psutil.Process(os.getpid()).get_memory_info()
|
||||
while True:
|
||||
# acquired = openerp.addons.base.ir.ir_cron.ir_cron._acquire_job(db_name)
|
||||
# TODO why isnt openerp.addons.base defined ?
|
||||
|
@ -371,7 +376,12 @@ class WorkerCron(Worker):
|
|||
# dont keep cursors in multi database mode
|
||||
if len(db_names) > 1:
|
||||
openerp.sql_db.close_db(db_name)
|
||||
# TODO Each job should be considered as one request instead of each db
|
||||
if rpc_request_flag:
|
||||
end_time = time.time()
|
||||
end_rss, end_vms = psutil.Process(os.getpid()).get_memory_info()
|
||||
logline = '%s time:%.3fs mem: %sk -> %sk (diff: %sk)' % (db_name, end_time - start_time, start_vms / 1024, end_vms / 1024, (end_vms - start_vms)/1024)
|
||||
_logger.debug("WorkerCron (%s) %s", self.pid, logline)
|
||||
# TODO Each job should be considered as one request instead of each run
|
||||
self.request_count += 1
|
||||
|
||||
def start(self):
|
||||
|
|
|
@ -43,6 +43,47 @@ test12</font></div><div><font color="#1f1f1f" face="monospace" size="2"><br></fo
|
|||
<a href="javascript:alert('malicious code')">test link</a>
|
||||
"""
|
||||
|
||||
EDI_LIKE_HTML_SOURCE = """<div style="font-family: 'Lucica Grande', Ubuntu, Arial, Verdana, sans-serif; font-size: 12px; color: rgb(34, 34, 34); background-color: #FFF; ">
|
||||
<p>Hello ${object.partner_id.name},</p>
|
||||
<p>A new invoice is available for you: </p>
|
||||
<p style="border-left: 1px solid #8e0000; margin-left: 30px;">
|
||||
<strong>REFERENCES</strong><br />
|
||||
Invoice number: <strong>${object.number}</strong><br />
|
||||
Invoice total: <strong>${object.amount_total} ${object.currency_id.name}</strong><br />
|
||||
Invoice date: ${object.date_invoice}<br />
|
||||
Order reference: ${object.origin}<br />
|
||||
Your contact: <a href="mailto:${object.user_id.email or ''}?subject=Invoice%20${object.number}">${object.user_id.name}</a>
|
||||
</p>
|
||||
<br/>
|
||||
<p>It is also possible to directly pay with Paypal:</p>
|
||||
<a style="margin-left: 120px;" href="${object.paypal_url}">
|
||||
<img class="oe_edi_paypal_button" src="https://www.paypal.com/en_US/i/btn/btn_paynowCC_LG.gif"/>
|
||||
</a>
|
||||
<br/>
|
||||
<p>If you have any question, do not hesitate to contact us.</p>
|
||||
<p>Thank you for choosing ${object.company_id.name or 'us'}!</p>
|
||||
<br/>
|
||||
<br/>
|
||||
<div style="width: 375px; margin: 0px; padding: 0px; background-color: #8E0000; border-top-left-radius: 5px 5px; border-top-right-radius: 5px 5px; background-repeat: repeat no-repeat;">
|
||||
<h3 style="margin: 0px; padding: 2px 14px; font-size: 12px; color: #DDD;">
|
||||
<strong style="text-transform:uppercase;">${object.company_id.name}</strong></h3>
|
||||
</div>
|
||||
<div style="width: 347px; margin: 0px; padding: 5px 14px; line-height: 16px; background-color: #F2F2F2;">
|
||||
<span style="color: #222; margin-bottom: 5px; display: block; ">
|
||||
${object.company_id.street}<br/>
|
||||
${object.company_id.street2}<br/>
|
||||
${object.company_id.zip} ${object.company_id.city}<br/>
|
||||
${object.company_id.state_id and ('%s, ' % object.company_id.state_id.name) or ''} ${object.company_id.country_id.name or ''}<br/>
|
||||
</span>
|
||||
<div style="margin-top: 0px; margin-right: 0px; margin-bottom: 0px; margin-left: 0px; padding-top: 0px; padding-right: 0px; padding-bottom: 0px; padding-left: 0px; ">
|
||||
Phone: ${object.company_id.phone}
|
||||
</div>
|
||||
<div>
|
||||
Web : <a href="${object.company_id.website}">${object.company_id.website}</a>
|
||||
</div>
|
||||
</div>
|
||||
</div></body></html>"""
|
||||
|
||||
TEXT_MAIL1 = """I contact you about our meeting for tomorrow. Here is the schedule I propose:
|
||||
9 AM: brainstorming about our new amazing business app</span></li>
|
||||
9.45 AM: summary
|
||||
|
@ -126,23 +167,85 @@ bert.tartopoils@miam.miam
|
|||
class TestSanitizer(unittest2.TestCase):
|
||||
""" Test the html sanitizer that filters html to remove unwanted attributes """
|
||||
|
||||
def test_simple(self):
|
||||
x = "yop"
|
||||
self.assertEqual(x, html_sanitize(x))
|
||||
def test_basic_sanitizer(self):
|
||||
cases = [
|
||||
("yop", "<p>yop</p>"), # simple
|
||||
("lala<p>yop</p>xxx", "<div><p>lala</p><p>yop</p>xxx</div>"), # trailing text
|
||||
("Merci à l'intérêt pour notre produit.nous vous contacterons bientôt. Merci",
|
||||
u"<p>Merci à l'intérêt pour notre produit.nous vous contacterons bientôt. Merci</p>"), # unicode
|
||||
]
|
||||
for content, expected in cases:
|
||||
html = html_sanitize(content)
|
||||
self.assertEqual(html, expected, 'html_sanitize is broken')
|
||||
|
||||
def test_trailing_text(self):
|
||||
x = 'lala<p>yop</p>xxx'
|
||||
self.assertEqual(x, html_sanitize(x))
|
||||
def test_evil_malicious_code(self):
|
||||
# taken from https://www.owasp.org/index.php/XSS_Filter_Evasion_Cheat_Sheet#Tests
|
||||
cases = [
|
||||
("<IMG SRC=javascript:alert('XSS')>"), # no quotes and semicolons
|
||||
("<IMG SRC=javascript:alert('XSS')>"), # UTF-8 Unicode encoding
|
||||
("<IMG SRC=javascript:alert('XSS')>"), # hex encoding
|
||||
("<IMG SRC=\"jav
ascript:alert('XSS');\">"), # embedded carriage return
|
||||
("<IMG SRC=\"jav
ascript:alert('XSS');\">"), # embedded newline
|
||||
("<IMG SRC=\"jav ascript:alert('XSS');\">"), # embedded tab
|
||||
("<IMG SRC=\"jav	ascript:alert('XSS');\">"), # embedded encoded tab
|
||||
("<IMG SRC=\"  javascript:alert('XSS');\">"), # spaces and meta-characters
|
||||
("<IMG SRC=\"javascript:alert('XSS')\""), # half-open html
|
||||
("<IMG \"\"\"><SCRIPT>alert(\"XSS\")</SCRIPT>\">"), # malformed tag
|
||||
("<SCRIPT/XSS SRC=\"http://ha.ckers.org/xss.js\"></SCRIPT>"), # non-alpha-non-digits
|
||||
("<SCRIPT/SRC=\"http://ha.ckers.org/xss.js\"></SCRIPT>"), # non-alpha-non-digits
|
||||
("<<SCRIPT>alert(\"XSS\");//<</SCRIPT>"), # extraneous open brackets
|
||||
("<SCRIPT SRC=http://ha.ckers.org/xss.js?< B >"), # non-closing script tags
|
||||
("<INPUT TYPE=\"IMAGE\" SRC=\"javascript:alert('XSS');\">"), # input image
|
||||
("<BODY BACKGROUND=\"javascript:alert('XSS')\">"), # body image
|
||||
("<IMG DYNSRC=\"javascript:alert('XSS')\">"), # img dynsrc
|
||||
("<IMG LOWSRC=\"javascript:alert('XSS')\">"), # img lowsrc
|
||||
("<TABLE BACKGROUND=\"javascript:alert('XSS')\">"), # table
|
||||
("<TABLE><TD BACKGROUND=\"javascript:alert('XSS')\">"), # td
|
||||
("<DIV STYLE=\"background-image: url(javascript:alert('XSS'))\">"), # div background
|
||||
("<DIV STYLE=\"background-image:\0075\0072\006C\0028'\006a\0061\0076\0061\0073\0063\0072\0069\0070\0074\003a\0061\006c\0065\0072\0074\0028.1027\0058.1053\0053\0027\0029'\0029\">"), # div background with unicoded exploit
|
||||
("<DIV STYLE=\"background-image: url(javascript:alert('XSS'))\">"), # div background + extra characters
|
||||
("<IMG SRC='vbscript:msgbox(\"XSS\")'>"), # VBscrip in an image
|
||||
("<BODY ONLOAD=alert('XSS')>"), # event handler
|
||||
("<BR SIZE=\"&{alert('XSS')}\>"), # & javascript includes
|
||||
("<LINK REL=\"stylesheet\" HREF=\"javascript:alert('XSS');\">"), # style sheet
|
||||
("<LINK REL=\"stylesheet\" HREF=\"http://ha.ckers.org/xss.css\">"), # remote style sheet
|
||||
("<STYLE>@import'http://ha.ckers.org/xss.css';</STYLE>"), # remote style sheet 2
|
||||
("<META HTTP-EQUIV=\"Link\" Content=\"<http://ha.ckers.org/xss.css>; REL=stylesheet\">"), # remote style sheet 3
|
||||
("<STYLE>BODY{-moz-binding:url(\"http://ha.ckers.org/xssmoz.xml#xss\")}</STYLE>"), # remote style sheet 4
|
||||
("<IMG STYLE=\"xss:expr/*XSS*/ession(alert('XSS'))\">"), # style attribute using a comment to break up expression
|
||||
("""<!--[if gte IE 4]>
|
||||
<SCRIPT>alert('XSS');</SCRIPT>
|
||||
<![endif]-->"""), # down-level hidden block
|
||||
]
|
||||
for content in cases:
|
||||
html = html_sanitize(content)
|
||||
self.assertNotIn('javascript', html, 'html_sanitize did not remove a malicious javascript')
|
||||
self.assertTrue('ha.ckers.org' not in html or 'http://ha.ckers.org/xss.css' in html, 'html_sanitize did not remove a malicious code in %s (%s)' % (content, html))
|
||||
|
||||
def test_html(self):
|
||||
sanitized_html = html_sanitize(HTML_SOURCE)
|
||||
for tag in ['<font>', '<div>', '<b>', '<i>', '<u>', '<strike>', '<li>', '<blockquote>', '<a href']:
|
||||
for tag in ['<div', '<b', '<i', '<u', '<strike', '<li', '<blockquote', '<a href']:
|
||||
self.assertIn(tag, sanitized_html, 'html_sanitize stripped too much of original html')
|
||||
for attr in ['style', 'javascript']:
|
||||
for attr in ['javascript']:
|
||||
self.assertNotIn(attr, sanitized_html, 'html_sanitize did not remove enough unwanted attributes')
|
||||
|
||||
def test_unicode(self):
|
||||
html_sanitize("Merci à l'intérêt pour notre produit.nous vous contacterons bientôt. Merci")
|
||||
emails =[("Charles <charles.bidule@truc.fr>", "Charles <charles.bidule@truc.fr>"),
|
||||
("Dupuis <'tr/-: ${dupuis#$'@truc.baz.fr>", "Dupuis <'tr/-: ${dupuis#$'@truc.baz.fr>"),
|
||||
("Technical <service/technical+2@open.com>", "Technical <service/technical+2@open.com>"),
|
||||
("Div nico <div-nico@open.com>", "Div nico <div-nico@open.com>")]
|
||||
for email in emails:
|
||||
self.assertIn(email[1], html_sanitize(email[0]), 'html_sanitize stripped emails of original html')
|
||||
|
||||
|
||||
def test_edi_source(self):
|
||||
html = html_sanitize(EDI_LIKE_HTML_SOURCE)
|
||||
self.assertIn('div style="font-family: \'Lucica Grande\', Ubuntu, Arial, Verdana, sans-serif; font-size: 12px; color: rgb(34, 34, 34); background-color: #FFF;', html,
|
||||
'html_sanitize removed valid style attribute')
|
||||
self.assertIn('<span style="color: #222; margin-bottom: 5px; display: block; ">', html,
|
||||
'html_sanitize removed valid style attribute')
|
||||
self.assertIn('img class="oe_edi_paypal_button" src="https://www.paypal.com/en_US/i/btn/btn_paynowCC_LG.gif"', html,
|
||||
'html_sanitize removed valid img')
|
||||
self.assertNotIn('</body></html>', html, 'html_sanitize did not remove extra closing tags')
|
||||
|
||||
|
||||
class TestCleaner(unittest2.TestCase):
|
||||
|
@ -181,6 +284,7 @@ class TestCleaner(unittest2.TestCase):
|
|||
new_html = html_email_clean(u'<?xml version="1.0" encoding="iso-8859-1"?>\n<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN"\n "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd">\n<html xmlns="http://www.w3.org/1999/xhtml" xml:lang="en" lang="en">\n <head>\n <title>404 - Not Found</title>\n </head>\n <body>\n <h1>404 - Not Found</h1>\n </body>\n</html>\n')
|
||||
self.assertNotIn('encoding', new_html, 'html_email_cleaner did not remove correctly encoding attributes')
|
||||
|
||||
|
||||
class TestHtmlTools(unittest2.TestCase):
|
||||
""" Test some of our generic utility functions about html """
|
||||
|
||||
|
|
|
@ -283,10 +283,10 @@ class configmanager(object):
|
|||
help="Specify the number of workers, 0 disable prefork mode.",
|
||||
type="int")
|
||||
group.add_option("--limit-memory-soft", dest="limit_memory_soft", my_default=640 * 1024 * 1024,
|
||||
help="Maximum allowed virtual memory per worker, when reached the worker be reset after the current request (default 640M).",
|
||||
help="Maximum allowed virtual memory per worker, when reached the worker be reset after the current request (default 671088640 aka 640MB).",
|
||||
type="int")
|
||||
group.add_option("--limit-memory-hard", dest="limit_memory_hard", my_default=768 * 1024 * 1024,
|
||||
help="Maximum allowed virtual memory per worker, when reached, any memory allocation will fail (default 768M).",
|
||||
help="Maximum allowed virtual memory per worker, when reached, any memory allocation will fail (default 805306368 aka 768MB).",
|
||||
type="int")
|
||||
group.add_option("--limit-time-cpu", dest="limit_time_cpu", my_default=60,
|
||||
help="Maximum allowed CPU time per request (default 60).",
|
||||
|
|
|
@ -23,8 +23,8 @@ from lxml import etree
|
|||
import cgi
|
||||
import logging
|
||||
import lxml.html
|
||||
import lxml.html.clean as clean
|
||||
import openerp.pooler as pooler
|
||||
import operator
|
||||
import random
|
||||
import re
|
||||
import socket
|
||||
|
@ -40,71 +40,32 @@ _logger = logging.getLogger(__name__)
|
|||
# HTML Sanitizer
|
||||
#----------------------------------------------------------
|
||||
|
||||
tags_to_kill = ["script", "head", "meta", "title", "link", "style", "frame", "iframe", "base", "object", "embed"]
|
||||
tags_to_remove = ['html', 'body', 'font']
|
||||
|
||||
|
||||
def html_sanitize(src):
|
||||
if not src:
|
||||
return src
|
||||
src = ustr(src, errors='replace')
|
||||
root = lxml.html.fromstring(u"<div>%s</div>" % src)
|
||||
result = handle_element(root)
|
||||
res = []
|
||||
for element in children(result[0]):
|
||||
if isinstance(element, basestring):
|
||||
res.append(element)
|
||||
else:
|
||||
element.tail = ""
|
||||
res.append(lxml.html.tostring(element))
|
||||
return ''.join(res)
|
||||
|
||||
# FIXME: shouldn't this be a whitelist rather than a blacklist?!
|
||||
to_remove = set(["script", "head", "meta", "title", "link", "img"])
|
||||
to_unwrap = set(["html", "body"])
|
||||
|
||||
javascript_regex = re.compile(r"^\s*javascript\s*:.*$", re.IGNORECASE)
|
||||
|
||||
def handle_a(el, new):
|
||||
href = el.get("href", "#")
|
||||
if javascript_regex.search(href):
|
||||
href = "#"
|
||||
new.set("href", href)
|
||||
|
||||
special = {
|
||||
"a": handle_a,
|
||||
}
|
||||
|
||||
def handle_element(element):
|
||||
if isinstance(element, basestring):
|
||||
return [element]
|
||||
if element.tag in to_remove:
|
||||
return []
|
||||
if element.tag in to_unwrap:
|
||||
return reduce(operator.add, [handle_element(x) for x in children(element)])
|
||||
result = lxml.html.fromstring("<%s />" % element.tag)
|
||||
for c in children(element):
|
||||
append_to(handle_element(c), result)
|
||||
if element.tag in special:
|
||||
special[element.tag](element, result)
|
||||
return [result]
|
||||
|
||||
def children(node):
|
||||
res = []
|
||||
if node.text is not None:
|
||||
res.append(node.text)
|
||||
for child_node in node.getchildren():
|
||||
res.append(child_node)
|
||||
if child_node.tail is not None:
|
||||
res.append(child_node.tail)
|
||||
return res
|
||||
|
||||
def append_to(elements, dest_node):
|
||||
for element in elements:
|
||||
if isinstance(element, basestring):
|
||||
children = dest_node.getchildren()
|
||||
if len(children) == 0:
|
||||
dest_node.text = element
|
||||
else:
|
||||
children[-1].tail = element
|
||||
else:
|
||||
dest_node.append(element)
|
||||
# html encode email tags
|
||||
part = re.compile(r"(<[^<>]+@[^<>]+>)", re.IGNORECASE | re.DOTALL)
|
||||
src = part.sub(lambda m: cgi.escape(m.group(1)), src)
|
||||
|
||||
# some corner cases make the parser crash (such as <SCRIPT/XSS SRC=\"http://ha.ckers.org/xss.js\"></SCRIPT> in test_mail)
|
||||
try:
|
||||
cleaner = clean.Cleaner(page_structure=True, style=False, safe_attrs_only=False, forms=False, kill_tags=tags_to_kill, remove_tags=tags_to_remove)
|
||||
cleaned = cleaner.clean_html(src)
|
||||
except TypeError, e:
|
||||
# lxml.clean version < 2.3.1 does not have a kill_tags attribute
|
||||
# to remove in 2014
|
||||
cleaner = clean.Cleaner(page_structure=True, style=False, safe_attrs_only=False, forms=False, remove_tags=tags_to_kill+tags_to_remove)
|
||||
cleaned = cleaner.clean_html(src)
|
||||
except:
|
||||
_logger.warning('html_sanitize failed to parse %s' % (src))
|
||||
cleaned = '<p>Impossible to parse</p>'
|
||||
return cleaned
|
||||
|
||||
|
||||
#----------------------------------------------------------
|
||||
|
|
Loading…
Reference in New Issue