odoo/addons/base_report_designer/openerp_sxw2rml/openerp_sxw2rml.py

# -*- coding: utf-8 -*-
##############################################################################
#
# Copyright (c):
#
#     2005 pyopenoffice.py Martin Simon (http://www.bezirksreiter.de)
#     2005 Fabien Pinckaers, TINY SPRL. (http://tiny.be)
#
#    This program is free software: you can redistribute it and/or modify
#    it under the terms of the GNU Affero General Public License as
#    published by the Free Software Foundation, either version 3 of the
#    License, or (at your option) any later version.
#
#    This program is distributed in the hope that it will be useful,
#    but WITHOUT ANY WARRANTY; without even the implied warranty of
#    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
#    GNU Affero General Public License for more details.
#
#    You should have received a copy of the GNU Affero General Public License
#    along with this program.  If not, see <http://www.gnu.org/licenses/>.
#
##############################################################################
#!/usr/bin/python
"""
OpenERP SXW2RML - The OpenERP's report engine

OpenERP SXW2RML is part of the OpenERP Report Project.
OpenERP Report is a module that allows you to render high quality PDF document
from an OpenOffice template (.sxw) and any relationl database.
"""
__version__ = '0.9'


import re
import string
import os
import zipfile
import xml.dom.minidom
from reportlab.lib.units import toLength
import base64
import copy

class DomApiGeneral:
    """General DOM API utilities."""
    def __init__(self, content_string="", file=""):
        self.content_string = content_string
        self.re_digits = re.compile(r"(.*?\d)(pt|cm|mm|inch|in)")

    def _unitTuple(self, string):
        """Split values and units to a tuple."""
        temp = self.re_digits.findall(string)
        if not temp:
            return (string,"")
        else:
            return (temp[0])

    def stringPercentToFloat(self, string):
        temp = string.replace("""%""","")
        return float(temp)/100

    def findChildrenByName(self, parent, name, attr_dict=None):
        """Helper functions. Does not work recursively.
        Optional: also test for certain attribute/value pairs."""
        if attr_dict is None:
            attr_dict = {}
        children = []
        for c in parent.childNodes:
            if c.nodeType == c.ELEMENT_NODE and c.nodeName == name:
                children.append(c)
        if attr_dict == {}:
            return children
        else:
            return self._selectForAttributes(nodelist=children,attr_dict=attr_dict)

    def _selectForAttributes(self, nodelist, attr_dict):
        "Helper function."""
        selected_nodes = []
        for n in nodelist:
            check = 1
            for a in attr_dict.keys():
                if n.getAttribute(a) != attr_dict[a]:
                    # at least one incorrect attribute value?
                    check = 0
            if check:
                selected_nodes.append(n)
        return selected_nodes

    def _stringToTuple(self, s):
        """Helper function."""
        try:
            temp = string.split(s,",")
            return int(temp[0]),int(temp[1])
        except:
            return None

    def _tupleToString(self, t):
        try:
            return self.openOfficeStringUtf8("%s,%s" % (t[0],t[1]))
        except:
            return None

    def _lengthToFloat(self, value):
        v = value
        if not self.re_digits.search(v):
            return v
        try:
            if v[-4:] == "inch":
                # OO files use "inch" instead of "in" in Reportlab units
                v = v[:-2]
        except:
            pass
        try:
            c = round(toLength(v))
            return c
        except:
            return v

    def openOfficeStringUtf8(self, string):
        if type(string) == unicode:
            return string.encode("utf-8")
        tempstring = unicode(string,"cp1252").encode("utf-8")
        return tempstring

class DomApi(DomApiGeneral):
    """This class provides a DOM-API for XML-Files from an SXW-Archive."""
    def __init__(self, xml_content, xml_styles):
        DomApiGeneral.__init__(self)
        self.content_dom = xml.dom.minidom.parseString(xml_content)
        self.styles_dom = xml.dom.minidom.parseString(xml_styles)
        body = self.content_dom.getElementsByTagName("office:body")
        self.body = body and body[0]

        # TODO:
        self.style_dict = {}
        self.style_properties_dict = {}

        # ******** always use the following order:
        self.buildStyleDict()
        self.buildStylePropertiesDict()
        if self.styles_dom.getElementsByTagName("style:page-master").__len__()<>0:
            self.page_master = self.styles_dom.getElementsByTagName("style:page-master")[0]
        if  self.styles_dom.getElementsByTagName("style:page-layout").__len__()<>0 :
			self.page_master = self.styles_dom.getElementsByTagName("style:page-layout")[0]
        self.document = self.content_dom.getElementsByTagName("office:document-content")[0]

    def buildStylePropertiesDict(self):
        for s in self.style_dict.keys():
            self.style_properties_dict[s] = self.getStylePropertiesDict(s)

    def updateWithPercents(self, dict, updatedict):
        """Sometimes you find values like "115%" in the style hierarchy."""
        if not updatedict:
            # no style hierarchies for this style? =>
            return
        new_updatedict = copy.copy(updatedict)
        for u in new_updatedict.keys():
            try:
                if new_updatedict[u].find("""%""") != -1 and dict.has_key(u):
                    number = float(self.re_digits.search(dict[u]).group(1))
                    unit = self.re_digits.search(dict[u]).group(2)
                    new_number = self.stringPercentToFloat(new_updatedict[u]) * number
                    if unit == "pt":
                        new_number = int(new_number)
                        # no floats allowed for "pt"
                        # OOo just takes the int, does not round (try it out!)
                    new_updatedict[u] = "%s%s" % (new_number,unit)
                else:
                    dict[u] = new_updatedict[u]
            except:
                dict[u] = new_updatedict[u]
        dict.update(new_updatedict)

    def normalizeStyleProperties(self):
        """Transfer all style:style-properties attributes from the
        self.style_properties_hierarchical dict to the automatic-styles
        from content.xml. Use this function to preprocess content.xml for
        XSLT transformations etc.Do not try to implement this function
        with XSlT - believe me, it's a terrible task..."""
        styles_styles = self.styles_dom.getElementsByTagName("style:style")
        automatic_styles = self.content_dom.getElementsByTagName("office:automatic-styles")[0]
        for s in styles_styles:
            automatic_styles.appendChild(s.cloneNode(deep=1))
        content_styles = self.content_dom.getElementsByTagName("style:style")
        # these are the content_styles with styles_styles added!!!
        for s in content_styles:
            c = self.findChildrenByName(s,"style:properties")
            if c == []:
                # some derived automatic styles do not have "style:properties":
                temp = self.content_dom.createElement("style:properties")
                s.appendChild(temp)
                c = self.findChildrenByName(s,"style:properties")
            c = c[0]
            dict = self.style_properties_dict[(s.getAttribute("style:name")).encode("utf-8")] or {}
            for attribute in dict.keys():
                c.setAttribute(self.openOfficeStringUtf8(attribute),self.openOfficeStringUtf8(dict[attribute]))

    def transferStylesXml(self):
        """Transfer certain sub-trees from styles.xml to the normalized content.xml
        (see above). It is not necessary to do this - for example - with paragraph styles.
        the "normalized" style properties contain all information needed for
        further processing."""
        # TODO: What about table styles etc.?
        outline_styles = self.styles_dom.getElementsByTagName("text:outline-style")
        t = self.content_dom.createElement("transferredfromstylesxml")
        self.document.insertBefore(t,self.body)
        t_new = self.body.previousSibling
        try:
            page_master = self.page_master
            t_new.appendChild(page_master.cloneNode(deep=1))
            t_new.appendChild(outline_styles[0].cloneNode(deep=1))
        except:
            pass

    def normalizeLength(self):
        """Normalize all lengthes to floats (i.e: 1 inch = 72).
        Always use this after "normalizeContent" and "transferStyles"!"""
        # TODO: The complex attributes of table cell styles are not transferred yet.
        #all_styles = self.content_dom.getElementsByTagName("style:properties")
        #all_styles += self.content_dom.getElementsByTagName("draw:image")
        all_styles = self.content_dom.getElementsByTagName("*")
        for s in all_styles:
            for x in s._attrs.keys():
                v = s.getAttribute(x)
                s.setAttribute(x,"%s" % self._lengthToFloat(v))
                # convert float to string first!

    def normalizeTableColumns(self):
        """Handle this strange table:number-columns-repeated attribute."""
        columns = self.content_dom.getElementsByTagName("table:table-column")
        for c in columns:
            if c.hasAttribute("table:number-columns-repeated"):
                number = int(c.getAttribute("table:number-columns-repeated"))
                c.removeAttribute("table:number-columns-repeated")
                for i in range(number-1):
                    (c.parentNode).insertBefore(c.cloneNode(deep=1),c)

    def buildStyleDict(self):
        """Store all style:style-nodes from content.xml and styles.xml in self.style_dict.
        Caution: in this dict the nodes from two dom apis are merged!"""
        for st in (self.styles_dom,self.content_dom):
            for s in st.getElementsByTagName("style:style"):
                name = s.getAttribute("style:name").encode("utf-8")
                self.style_dict[name] = s
        return True

    def toxml(self):
        return self.content_dom.toxml(encoding="utf-8")

    def getStylePropertiesDict(self, style_name):
        res = {}

        if self.style_dict[style_name].hasAttribute("style:parent-style-name"):
            parent = self.style_dict[style_name].getAttribute("style:parent-style-name").encode("utf-8")
            res = self.getStylePropertiesDict(parent)

        children = self.style_dict[style_name].childNodes
        for c in children:
            if c.nodeType == c.ELEMENT_NODE and c.nodeName.find("properties")>0 :
                for attr in c._attrs.keys():
                    res[attr] = c.getAttribute(attr).encode("utf-8")
        return res

class PyOpenOffice(object):
    """This is the main class which provides all functionality."""
    def __init__(self, path='.', save_pict=False):
        self.path = path
        self.save_pict = save_pict
        self.images = {}

    def oo_read(self, fname):
        z = zipfile.ZipFile(fname,"r")
        content = z.read('content.xml')
        style = z.read('styles.xml')
        all = z.namelist()
        for a in all:
            if a[:9]=='Pictures/' and len(a)>10:
                pic_content = z.read(a)
                self.images[a[9:]] = pic_content
                if self.save_pict:
                    f=open(os.path.join(self.path, os.path.basename(a)),"wb")
                    f.write(pic_content)
                    f.close()
        z.close()
        return content,style

    def oo_replace(self, content):
        regex = [
            (r"<para[^>]*/>", ""),
            (r"<para(.*)>(.*?)<text:line-break[^>]*/>", "<para$1>$2</para><para$1>"),
        ]
        for key,val in regex:
            content = re.sub(key, val, content)
        return content

    def unpackNormalize(self, sourcefile):
        c,s = self.oo_read(sourcefile)
        c = self.oo_replace(c)
        dom = DomApi(c,s)
        dom.normalizeStyleProperties()
        dom.transferStylesXml()
        dom.normalizeLength()
        dom.normalizeTableColumns()
        new_c = dom.toxml()
        return new_c

def sxw2rml(sxw_file, xsl, output='.', save_pict=False):
    from lxml import etree
    from StringIO import StringIO

    tool = PyOpenOffice(output, save_pict = save_pict)
    res = tool.unpackNormalize(sxw_file)

    f = StringIO(xsl)
    styledoc = etree.parse(f)
    style = etree.XSLT(styledoc)

    f = StringIO(res)
    doc = etree.parse(f)
    result = style(doc)
    root = etree.XPathEvaluator(result)("/document/stylesheet")

    if root:
        root=root[0]
        images = etree.Element("images")
        for img in tool.images:
            node = etree.Element('image', name=img)
            node.text = base64.encodestring(tool.images[img])
            images.append(node)
        root.append(images)

    try:
        xml = str(result)
        return xml
    except:
        return result

if __name__ == "__main__":
    import optparse
    parser = optparse.OptionParser(
        version="OpenERP Report v%s" % __version__,
        usage = 'openerp_sxw2rml.py [options] file.sxw')
    parser.add_option("-v", "--verbose", default=False, dest="verbose", help="enable basic debugging")
    parser.add_option("-o", "--output", dest="output", default='.', help="directory of image output")
    (opt, args) = parser.parse_args()
    if len(args) != 1:
        parser.error("incorrect number of arguments")

    import sys

    fname = sys.argv[1]
    f = fname
    xsl_file = 'normalized_oo2rml.xsl'
    z = zipfile.ZipFile(fname,"r")
    mimetype = z.read('mimetype')
    if mimetype.split('/')[-1] == 'vnd.oasis.opendocument.text' :
		xsl_file = 'normalized_odt2rml.xsl'
    xsl = file(os.path.join(os.getcwd(), os.path.dirname(sys.argv[0]), xsl_file)).read()
    result = sxw2rml(f, xsl, output=opt.output, save_pict=False)

    print result
# vim:expandtab:smartindent:tabstop=4:softtabstop=4:shiftwidth=4: