From bb82904ba3b4f63f6c099c4a0c7b6b1814c61a33 Mon Sep 17 00:00:00 2001 From: Olivier Dony Date: Fri, 10 Dec 2010 00:07:42 +0100 Subject: [PATCH] [FIX] fields.function: make sure all binary values can always be serialized as valid XML Nnormally, binary fields should be 7-bit ASCII base64-encoded data, but sometimes it's not the case, so we do additional sanity checks to make sure the binary values can pass safely via xmlrpc as strings. As a last resort we coerce the binary values to unicode to make sure they can be safely serialized as utf-8-encoded values, always valid XML characters. When this happens, decoding on the other endpoint is not likely to produce the expected output, but this isjust a safety mechanism(in these cases base64 data or xmlrpc. Binary values should be returned instead by the function field. In a future version we should probably switch to using XMLRPC Binary types always for passing fields.binary values, but this requires more refactoring. lp bug: https://launchpad.net/bugs/670778 fixed bzr revid: odo@openerp.com-20101209230742-gwf8e4zvmk43k6ln --- bin/osv/fields.py | 45 ++++++++++++++++++++++++++++++++++++++++----- 1 file changed, 40 insertions(+), 5 deletions(-) diff --git a/bin/osv/fields.py b/bin/osv/fields.py index 135296779b1..423bd9cf1d1 100644 --- a/bin/osv/fields.py +++ b/bin/osv/fields.py @@ -33,12 +33,12 @@ # import datetime as DT import string -import netsvc import sys import warnings - +import xmlrpclib from psycopg2 import Binary +import netsvc import tools from tools.translate import _ @@ -673,6 +673,37 @@ def get_nice_size(a): size = 0 return (x, tools.human_size(size)) +def sanitize_binary_value(dict_item): + # binary fields should be 7-bit ASCII base64-encoded data, + # but we do additional sanity checks to make sure the values + # will are not something else that won't pass via xmlrpc + index, value = dict_item + if isinstance(value, (xmlrpclib.Binary, tuple, list, dict)): + # these builtin types are meant to pass untouched + return index, value + + # For all other cases, handle the value as a binary string: + # it could be a 7-bit ASCII string (e.g base64 data), but also + # any 8-bit content from files, with byte values that cannot + # be passed inside XML! + # See for more info: + # - http://bugs.python.org/issue10066 + # - http://www.w3.org/TR/2000/REC-xml-20001006#NT-Char + # + # One solution is to convert the byte-string to unicode, + # so it gets serialized as utf-8 encoded data (always valid XML) + # If invalid XML byte values were present, tools.ustr() uses + # the Latin-1 codec as fallback, which converts any 8-bit + # byte value, resulting in valid utf-8-encoded bytes + # in the end: + # >>> unicode('\xe1','latin1').encode('utf8') == '\xc3\xa1' + # Note: when this happens, decoding on the other endpoint + # is not likely to produce the expected output, but this is + # just a safety mechanism (in these cases base64 data or + # xmlrpc.Binary values should be used instead + return index, tools.ustr(value) + + # --------------------------------------------------------- # Function fields # --------------------------------------------------------- @@ -763,9 +794,13 @@ class function(_column): if res[r] and res[r] in dict_names: res[r] = (res[r], dict_names[res[r]]) - if self._type == 'binary' and context.get('bin_size', False): - # convert the data returned by the function with the size of that data... - res = dict(map( get_nice_size, res.items())) + if self._type == 'binary': + if context.get('bin_size', False): + # client requests only the size of binary fields + res = dict(map(get_nice_size, res.items())) + else: + res = dict(map(sanitize_binary_value, res.items())) + if self._type == "integer": for r in res.keys(): # Converting value into string so that it does not affect XML-RPC Limits