odoo/addons/l10n_ch/report/tiny_sxw2rml.py

340 lines
12 KiB
Python
Executable File

#coding: latin-1
##############################################################################
#
# Copyright (c):
#
# 2005 pyopenoffice.py Martin Simon (http://www.bezirksreiter.de)
# 2005 Fabien Pinckaers, TINY SPRL. (http://tiny.be)
#
# WARNING: This program as such is intended to be used by professional
# programmers who take the whole responsability of assessing all potential
# consequences resulting from its eventual inadequacies and bugs
# End users who are looking for a ready-to-use solution with commercial
# garantees and support are strongly adviced to contact a Free Software
# Service Company
#
# This program is Free Software; you can redistribute it and/or
# modify it under the terms of the GNU General Public License
# as published by the Free Software Foundation; either version 2
# of the License, or (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program; if not, write to the Free Software
# Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
#
##############################################################################
#!/usr/bin/python
"""
Tiny SXW2RML - The Tiny ERP's report engine
Tiny SXW2RMLis part of the Tiny report project.
Tiny Report is a module that allows you to render high quality PDF document
from an OpenOffice template (.sxw) and any relationnal database.
The whole source code is distributed under the terms of the
GNU Public Licence.
(c) 2005 pyopenoffice.py Martin Simon (http://www.bezirksreiter.de)
(c) 2005-TODAY, Fabien Pinckaers - Tiny sprl
"""
__version__ = '0.9'
import re
import string
import os
import time
import zipfile
import xml.dom.minidom
from reportlab.lib.units import toLength
import optparse
parser = optparse.OptionParser(
version="Tiny Report v%s" % __version__,
usage = 'tiny_sxw2rml.py [options] file.sxw')
parser.add_option("-v", "--verbose", default=False, dest="verbose", help="enable basic debugging")
parser.add_option("-o", "--output", dest="output", default='.', help="directory of image output")
(opt, args) = parser.parse_args()
if len(args) != 1:
parser.error("incorrect number of arguments")
class DomApiGeneral:
"""General DOM API utilities."""
def __init__(self,content_string="",file=""):
self.content_string = content_string
self.re_digits = re.compile(r"(.*?\d)(pt|cm|mm|inch|in)")
def _unitTuple(self,string):
"""Split values and units to a tuple."""
temp = self.re_digits.findall(string)
if not temp:
return (string,"")
else:
return (temp[0])
def stringPercentToFloat(self,string):
temp = string.replace("""%""","")
return float(temp)/100
def findChildrenByName(self,parent,name,attr_dict={}):
"""Helper functions. Does not work recursively.
Optional: also test for certain attribute/value pairs."""
children = []
for c in parent.childNodes:
if c.nodeType == c.ELEMENT_NODE and c.nodeName == name:
children.append(c)
if attr_dict == {}:
return children
else:
return self._selectForAttributes(nodelist=children,attr_dict=attr_dict)
def _selectForAttributes(self,nodelist,attr_dict):
"Helper function."""
selected_nodes = []
for n in nodelist:
check = 1
for a in attr_dict.keys():
if n.getAttribute(a) != attr_dict[a]:
# at least one incorrect attribute value?
check = 0
if check:
selected_nodes.append(n)
return selected_nodes
def _stringToTuple(self,s):
"""Helper function."""
try:
temp = string.split(s,",")
return int(temp[0]),int(temp[1])
except:
return None
def _tupleToString(self,t):
try:
return self.openOfficeStringUtf8("%s,%s" % (t[0],t[1]))
except:
return None
def _lengthToFloat(self,value):
v = value
if not self.re_digits.search(v):
return v
try:
if v[-4:] == "inch":
# OO files use "inch" instead of "in" in Reportlab units
v = v[:-2]
except:
pass
try:
c = round(toLength(v))
return c
except:
return v
def openOfficeStringUtf8(self,string):
if type(string) == unicode:
return string.encode("utf-8")
tempstring = unicode(string,"cp1252").encode("utf-8")
return tempstring
class DomApi(DomApiGeneral):
"""This class provides a DOM-API for XML-Files from an SXW-Archive."""
def __init__(self,xml_content,xml_styles):
DomApiGeneral.__init__(self)
self.content_dom = xml.dom.minidom.parseString(xml_content)
self.styles_dom = xml.dom.minidom.parseString(xml_styles)
body = self.content_dom.getElementsByTagName("office:body")
self.body = body and body[0]
# TODO:
self.style_dict = {}
self.style_properties_dict = {}
# ******** always use the following order:
self.buildStyleDict()
self.buildStylePropertiesDict()
self.page_master = self.styles_dom.getElementsByTagName("style:page-master")[0]
self.document = self.content_dom.getElementsByTagName("office:document-content")[0]
def buildStylePropertiesDict(self):
for s in self.style_dict.keys():
self.style_properties_dict[s] = self.getStylePropertiesDict(s)
def updateWithPercents(self,dict,updatedict):
"""Sometimes you find values like "115%" in the style hierarchy."""
if not updatedict:
# no style hierarchies for this style? =>
return
new_updatedict = copy.copy(updatedict)
for u in new_updatedict.keys():
try:
if new_updatedict[u].find("""%""") != -1 and dict.has_key(u):
number = float(self.re_digits.search(dict[u]).group(1))
unit = self.re_digits.search(dict[u]).group(2)
new_number = self.stringPercentToFloat(new_updatedict[u]) * number
if unit == "pt":
new_number = int(new_number)
# no floats allowed for "pt"
# OOo just takes the int, does not round (try it out!)
new_updatedict[u] = "%s%s" % (new_number,unit)
else:
dict[u] = new_updatedict[u]
except:
dict[u] = new_updatedict[u]
dict.update(new_updatedict)
def normalizeStyleProperties(self):
"""Transfer all style:style-properties attributes from the
self.style_properties_hierarchical dict to the automatic-styles
from content.xml. Use this function to preprocess content.xml for
XSLT transformations etc.Do not try to implement this function
with XSlT - believe me, it's a terrible task..."""
styles_styles = self.styles_dom.getElementsByTagName("style:style")
automatic_styles = self.content_dom.getElementsByTagName("office:automatic-styles")[0]
for s in styles_styles:
automatic_styles.appendChild(s.cloneNode(deep=1))
content_styles = self.content_dom.getElementsByTagName("style:style")
# these are the content_styles with styles_styles added!!!
for s in content_styles:
c = self.findChildrenByName(s,"style:properties")
if c == []:
# some derived automatic styles do not have "style:properties":
temp = self.content_dom.createElement("style:properties")
s.appendChild(temp)
c = self.findChildrenByName(s,"style:properties")
c = c[0]
dict = self.style_properties_dict[(s.getAttribute("style:name")).encode("latin-1")] or {}
for attribute in dict.keys():
c.setAttribute(self.openOfficeStringUtf8(attribute),self.openOfficeStringUtf8(dict[attribute]))
def transferStylesXml(self):
"""Transfer certain sub-trees from styles.xml to the normalized content.xml
(see above). It is not necessary to do this - for example - with paragraph styles.
the "normalized" style properties contain all information needed for
further processing."""
# TODO: What about table styles etc.?
page_master = self.page_master
outline_styles = self.styles_dom.getElementsByTagName("text:outline-style")
t = self.content_dom.createElement("transferredfromstylesxml")
self.document.insertBefore(t,self.body)
t_new = self.body.previousSibling
try:
t_new.appendChild(page_master.cloneNode(deep=1))
t_new.appendChild(outline_styles[0].cloneNode(deep=1))
except:
pass
def normalizeLength(self):
"""Normalize all lengthes to floats (i.e: 1 inch = 72).
Always use this after "normalizeContent" and "transferStyles"!"""
# TODO: The complex attributes of table cell styles are not transferred yet.
#all_styles = self.content_dom.getElementsByTagName("style:properties")
#all_styles += self.content_dom.getElementsByTagName("draw:image")
all_styles = self.content_dom.getElementsByTagName("*")
for s in all_styles:
for x in s._attrs.keys():
v = s.getAttribute(x)
s.setAttribute(x,"%s" % self._lengthToFloat(v))
# convert float to string first!
def normalizeTableColumns(self):
"""Handle this strange table:number-columns-repeated attribute."""
columns = self.content_dom.getElementsByTagName("table:table-column")
for c in columns:
if c.hasAttribute("table:number-columns-repeated"):
number = int(c.getAttribute("table:number-columns-repeated"))
c.removeAttribute("table:number-columns-repeated")
for i in range(number-1):
(c.parentNode).insertBefore(c.cloneNode(deep=1),c)
def buildStyleDict(self):
"""Store all style:style-nodes from content.xml and styles.xml in self.style_dict.
Caution: in this dict the nodes from two dom apis are merged!"""
for st in (self.styles_dom,self.content_dom):
for s in st.getElementsByTagName("style:style"):
name = s.getAttribute("style:name").encode("latin-1")
self.style_dict[name] = s
return True
def toxml(self):
return self.content_dom.toxml(encoding="utf-8")
def getStylePropertiesDict(self,style_name):
res = {}
if self.style_dict[style_name].hasAttribute("style:parent-style-name"):
parent = self.style_dict[style_name].getAttribute("style:parent-style-name").encode("latin-1")
res = self.getStylePropertiesDict(parent)
childs = self.style_dict[style_name].childNodes
for c in childs:
if c.nodeType == c.ELEMENT_NODE and c.nodeName == "style:properties":
for attr in c._attrs.keys():
res[attr] = c.getAttribute(attr).encode("latin-1")
return res
class PyOpenOffice(object):
"""This is the main class which provides all functionality."""
def __init__(self, path='.'):
self.path = path
def oo_read(self,fname):
z = zipfile.ZipFile(fname,"r")
content = z.read('content.xml')
style = z.read('styles.xml')
all = z.namelist()
for a in all:
if a[:9]=='Pictures/' and len(a)>10:
pic_content = z.read(a)
f=open(os.path.join(self.path, os.path.basename(a)),"wb")
f.write(pic_content)
f.close()
z.close()
return content,style
def oo_replace(self,content):
regex = [
(r"<para[^>]*/>", ""),
#(r"<text:ordered-list.*?>(.*?)</text:ordered-list>", "$1"),
#(r"<text:unordered-list.*?>(.*?)</text:unordered-list>", "$1"),
(r"<para(.*)>(.*?)<text:line-break[^>]*/>", "<para$1>$2</para><para$1>"),
]
for key,val in regex:
content = re.sub(key, val, content)
return content
def unpackNormalize(self,sourcefile):
c,s = self.oo_read(sourcefile)
c = self.oo_replace(c)
dom = DomApi(c,s)
dom.normalizeStyleProperties()
dom.transferStylesXml()
dom.normalizeLength()
dom.normalizeTableColumns()
new_c = dom.toxml()
return new_c
if __name__ == "__main__":
import sys
import libxslt
import libxml2
f = sys.argv[1]
tool = PyOpenOffice(opt.output)
res = tool.unpackNormalize(f)
styledoc = libxml2.parseFile('normalized_oo2rml.xsl')
style = libxslt.parseStylesheetDoc(styledoc)
doc = libxml2.parseMemory(res,len(res))
result = style.applyStylesheet(doc, None)
xml = style.saveResultToString(result)
print xml
# vim:expandtab:smartindent:tabstop=4:softtabstop=4:shiftwidth=4: