432 lines
16 KiB
Python
Executable File
432 lines
16 KiB
Python
Executable File
#!/usr/bin/env python
|
|
# -*- coding: utf-8 -*-
|
|
|
|
# Tabellio -- software suite for deliberative assemblies
|
|
# -- suite logicielle pour assemblées délibératives
|
|
# -- http://www.tabellio.org/
|
|
# Copyright (C) 2006 Parlement de la Communauté française de Belgique
|
|
|
|
# This file is part of Tabellio.
|
|
|
|
# Tabellio is free software; you can redistribute it and/or modify
|
|
# it under the terms of the GNU General Public License as published by
|
|
# the Free Software Foundation; either version 2 of the License, or
|
|
# (at your option) any later version.
|
|
|
|
# Tabellio is distributed in the hope that it will be useful,
|
|
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
|
# GNU General Public License for more details.
|
|
|
|
# You should have received a copy of the GNU General Public License
|
|
# along with this program; if not, write to the Free Software
|
|
# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
|
|
|
|
import sys
|
|
import os
|
|
import re
|
|
import threading
|
|
import time
|
|
import string
|
|
import math
|
|
import tempfile
|
|
import getopt
|
|
import cStringIO
|
|
import shutil
|
|
import libxml2
|
|
import libxslt
|
|
import logging
|
|
import subprocess
|
|
|
|
try:
|
|
import elementtree.ElementTree as ET
|
|
except ImportError:
|
|
import xml.etree.ElementTree as ET
|
|
|
|
NAME_CONTENT_LEGI = 'contents.xml'
|
|
|
|
# from pythonlib
|
|
import xmlutils
|
|
from xmlutils import parseStream, applyStylesheet2
|
|
from epsutils import eps2pdf
|
|
from ziputils import unzipToDirectory
|
|
from utf8utils import utf8encode
|
|
import magic
|
|
|
|
try:
|
|
import PIL
|
|
import PIL.Image
|
|
except ImportError:
|
|
PIL = None
|
|
|
|
tempfilelock = threading.Lock()
|
|
tempfile.template = "F%ld-" % time.time()
|
|
|
|
log = logging.getLogger("")
|
|
|
|
abspath = os.path.abspath(os.path.dirname(__file__))
|
|
xslpath = os.path.join(abspath,"..","..","xsl")
|
|
|
|
xslPreprocessingFilterName = os.path.join(xslpath,"pre_proc.xsl")
|
|
xslPostprocessingFilterName = os.path.join(xslpath,"post_proc.xsl")
|
|
xslMainprocessingFilterName = os.path.join(xslpath,"main.xsl")
|
|
xslAnnexPreprocessingFilterName = os.path.join(xslpath,"annex_pre_proc.xsl")
|
|
xslCopyProcessingFilterName = os.path.join(xslpath,"copy_proc.xsl")
|
|
|
|
class PdfGeneratorException:
|
|
def __init__(self,message=""):
|
|
self.message = message
|
|
def __str__(self):
|
|
return self.message
|
|
|
|
|
|
def _writeToFile(filename, data):
|
|
f = None
|
|
try:
|
|
f = open(filename, "wb")
|
|
f.write(data)
|
|
finally:
|
|
if f != None:
|
|
f.close()
|
|
|
|
def makeTempFileName(suffix="",subdir=None):
|
|
tmpd = tempfile.tempdir
|
|
tempfile.tempdir = os.path.abspath(os.path.join(tempfile.gettempdir(),"legi2pdf"))
|
|
if not os.path.exists(tempfile.tempdir):
|
|
os.mkdir(tempfile.tempdir)
|
|
tempfilelock.acquire()
|
|
try:
|
|
if not subdir:
|
|
return tempfile.mktemp()+suffix
|
|
else:
|
|
old = tempfile.tempdir
|
|
tempfile.tempdir = os.path.join(old,subdir)
|
|
r = tempfile.mktemp()+suffix
|
|
tempfile.tempdir = old
|
|
return r
|
|
finally:
|
|
tempfile.tempdir = tmpd
|
|
tempfilelock.release()
|
|
|
|
def purgeDirectory(d):
|
|
try:
|
|
for f in os.listdir(os.path.abspath(d)):
|
|
os.remove(os.path.join(d, f))
|
|
os.rmdir(d)
|
|
finally:
|
|
pass
|
|
|
|
bboxRE = re.compile(r'\d+(?:\.\d+)?')
|
|
|
|
def _convertBboxVal(val):
|
|
r = bboxRE.findall(val)
|
|
f = string.atof(r[0])
|
|
return "%d" % math.floor(f + 0.5)
|
|
|
|
def unzipAnnexToDir(annexes, tempDir):
|
|
# unzip annex files
|
|
for i in range(len(annexes)):
|
|
annexeFileName = annexes[i][0]
|
|
if annexeFileName[-5:] == ".legi":
|
|
unzipToDirectory(annexeFileName, tempDir, "%d_" % i)
|
|
else:
|
|
shutil.copy(annexeFileName, os.path.join(tempDir,os.path.basename(annexeFileName)))
|
|
|
|
def getBboxes(d):
|
|
bboxDict = {}
|
|
doc = None
|
|
ctxt = None
|
|
try:
|
|
doc = libxml2.parseFile(os.path.join(d,NAME_CONTENT_LEGI))
|
|
ctxt = doc.xpathNewContext()
|
|
res = ctxt.xpathEval("//imagedata")
|
|
|
|
for r in res:
|
|
if r.name == "imagedata":
|
|
fileNameProp = r.hasProp("fileref")
|
|
widthProp = r.hasProp("width")
|
|
depthProp = r.hasProp("depth")
|
|
if (fileNameProp and widthProp and depthProp):
|
|
bboxDict[fileNameProp.getContent()] = (
|
|
_convertBboxVal(widthProp.getContent()),
|
|
_convertBboxVal(depthProp.getContent()))
|
|
finally:
|
|
if doc != None:
|
|
doc.freeDoc()
|
|
if ctxt != None:
|
|
ctxt.xpathFreeContext()
|
|
return bboxDict
|
|
|
|
def convertWmf2Pdf(d):
|
|
|
|
bboxDict = getBboxes(d)
|
|
for wmfFileName in os.listdir(d):
|
|
if wmfFileName[-4:] == ".wmf":
|
|
epsFileName = wmfFileName[:-4] + ".eps"
|
|
pdfFileName = wmfFileName[:-4] + ".pdf"
|
|
bbox = bboxDict.get(wmfFileName, None)
|
|
if bbox != None:
|
|
cmd = "/usr/bin/wmf2eps --bbox=%sx%s %s > %s" % (
|
|
bbox[0],
|
|
bbox[1],
|
|
os.path.join(d,wmfFileName),
|
|
os.path.join(d,epsFileName))
|
|
else:
|
|
cmd = "/usr/bin/wmf2eps %s > %s" % (os.path.join(d,wmfFileName), os.path.join(d,epsFileName))
|
|
subprocess.call([cmd],cwd=d, shell=True)
|
|
eps2pdf(os.path.join(d,epsFileName))
|
|
|
|
def insertAnnex(xmlMainDoc, annexes, d):
|
|
xmlAnnexDoc = None
|
|
inputFile = None
|
|
bookNode = xmlMainDoc.children
|
|
try:
|
|
for i in range(len(annexes)):
|
|
#bookNode = xmlMainDoc.children
|
|
if annexes[i][0][-5:] == ".legi":
|
|
inputFile = open(os.path.join(d,"%d_%s" % (i, NAME_CONTENT_LEGI)), "r")
|
|
xmlAnnexDoc = parseStream(inputFile, validate=0)
|
|
xslParams = {'annex-id':'%d_' % (i,), 'annex-title':'%s' % utf8encode(annexes[i][1])}
|
|
xmlAnnexDoc = applyStylesheet2( xmlAnnexDoc , xslAnnexPreprocessingFilterName, xslParams)
|
|
bookNode.addChild(xmlAnnexDoc.children)
|
|
inputFile.close()
|
|
inputFile = None
|
|
elif annexes[i][0][-4:] == ".pdf":
|
|
newNode = libxml2.newNode("appendix")
|
|
newNode.newProp("type", "pdf")
|
|
titleNode = libxml2.newNode("title")
|
|
title = annexes[i][1]
|
|
if title is not None:
|
|
title = title.encode("UTF-8")
|
|
titleNode.addContent(title)
|
|
newNode.addChild(titleNode)
|
|
pdfNode = libxml2.newNode("pdf-annex")
|
|
pdfNode.newProp("pdf-file", os.path.basename(annexes[i][0]))
|
|
scale = annexes[i][2]
|
|
pdfNode.newProp("scale", "%.2f" % (scale/100.0))
|
|
newNode.addChild(pdfNode)
|
|
bookNode.addChild(newNode)
|
|
else:
|
|
log.warn("format d'annexe non supporté %s" % annexes[i][0])
|
|
finally:
|
|
if inputFile != None:
|
|
inputFile.close()
|
|
|
|
def convertLegi2Tex(input, outputFileName, annexes, d, draft=0, toc=True,
|
|
style="normal", useFont=None, legacyMode=False):
|
|
xmlDoc = None
|
|
inputStream = None
|
|
outputStream = None
|
|
|
|
if style.startswith('ooo-'):
|
|
legacyMode = False
|
|
style = style[4:]
|
|
|
|
global xslpath, xslPreprocessingFilterName, xslPostprocessingFilterName, xslMainprocessingFilterName, xslAnnexPreprocessingFilterName, xslCopyProcessingFilterName
|
|
|
|
if legacyMode:
|
|
xslpath = os.path.join(abspath,"..","..","xsl-legacy")
|
|
xslPreprocessingFilterName = os.path.join(xslpath,"pre_proc.xsl")
|
|
xslPostprocessingFilterName = os.path.join(xslpath,"post_proc.xsl")
|
|
xslMainprocessingFilterName = os.path.join(xslpath,"main.xsl")
|
|
xslAnnexPreprocessingFilterName = os.path.join(xslpath,"annex_pre_proc.xsl")
|
|
xslCopyProcessingFilterName = os.path.join(xslpath,"copy_proc.xsl")
|
|
else:
|
|
xslpath = os.path.join(abspath,"..","..","xsl")
|
|
xslPreprocessingFilterName = os.path.join(xslpath,"pre_proc.xsl")
|
|
xslPostprocessingFilterName = os.path.join(xslpath,"post_proc.xsl")
|
|
xslMainprocessingFilterName = os.path.join(xslpath,"main.xsl")
|
|
xslAnnexPreprocessingFilterName = os.path.join(xslpath,"annex_pre_proc.xsl")
|
|
xslCopyProcessingFilterName = os.path.join(xslpath,"copy_proc.xsl")
|
|
|
|
xslParam = {}
|
|
|
|
tree = ET.fromstring(input)
|
|
for property in tree.findall('metadata/property'):
|
|
if property.attrib.get('name') != 'keyword':
|
|
continue
|
|
if 'PFB' in property.text:
|
|
xslParam['latex.document.font'] = 'helvet'
|
|
xslParam['latex.documentclass'] = 'PFBstd'
|
|
else:
|
|
xslParam['latex.document.font'] = 'sabon'
|
|
xslParam['latex.documentclass'] = 'PCFstd'
|
|
|
|
if useFont:
|
|
# override selected font
|
|
xslParam['latex.document.font'] = useFont
|
|
|
|
try:
|
|
# preprocessing
|
|
outputStream = open(os.path.join(d, outputFileName), "w")
|
|
xml_doc = libxml2.parseDoc(input)
|
|
xsl_style = libxslt.parseStylesheetFile(xslPreprocessingFilterName)
|
|
preprocessed_xml_doc = xsl_style.applyStylesheet(xml_doc, {})
|
|
xsl_style.freeStylesheet()
|
|
insertAnnex(preprocessed_xml_doc, annexes, d)
|
|
|
|
ctxt = preprocessed_xml_doc.xpathNewContext()
|
|
# some custom preprocessing of text content:
|
|
# marks -- as TABELLIO-- so substitution with the proper command
|
|
# (\hyp{}) can happen in the postprocessing phase.
|
|
for node in ctxt.xpathEval('//text()'):
|
|
if '--' in node.content:
|
|
node.setContent(node.content.replace('--', 'TABELLIO--'))
|
|
ctxt.xpathFreeContext()
|
|
|
|
# processing
|
|
if draft:
|
|
xslParam['with-draft-tag'] = '1'
|
|
|
|
if toc:
|
|
xslParam['with-toc'] = '1'
|
|
|
|
if style == "parchment":
|
|
xslParam['parchemin'] = '1'
|
|
|
|
if style == "bqr":
|
|
xslParam['bqr'] = '1'
|
|
|
|
for k, v in xslParam.items():
|
|
xslParam[k] = xmlutils.makeparam(v)
|
|
|
|
xsl_style = libxslt.parseStylesheetFile(xslMainprocessingFilterName)
|
|
processed_xml_doc = xsl_style.applyStylesheet(preprocessed_xml_doc, xslParam)
|
|
xml_latex_doc = xsl_style.saveResultToString(processed_xml_doc)
|
|
xsl_style.freeStylesheet()
|
|
|
|
# postprocessing
|
|
doc = ET.fromstring(xml_latex_doc).text
|
|
for before, after in [(u'’', u"'"), (u'\u2019', u"'"),
|
|
(u'“', u'\guillemotleft'), (u'\u201C', u'«'),
|
|
(u'”', u'\guillemotright'), (u'\u201D', u'»'),
|
|
(u'‑', u'-'), (u'\u2011', u'-'),
|
|
(u'a\u0300', u'à'),
|
|
(u'e\u0300', u'è'),
|
|
(u'u\u0300', u'ù'),
|
|
(u'e\u0301', u'é'),
|
|
(u'a\u0302', u'â'),
|
|
(u'e\u0302', u'ê'),
|
|
(u'i\u0302', u'î'),
|
|
(u'o\u0302', u'ô'),
|
|
(u'u\u0302', u'û'),
|
|
(u'e\u0308', u'ë'),
|
|
(u'i\u0308', u'ï'),
|
|
(u'i\u0308', u'ü'),
|
|
(u'c\u0327', u'ç'),
|
|
(u'TABELLIO--', u'\hyp{}'),]:
|
|
doc = doc.replace(before, after)
|
|
|
|
if not style == 'bqr':
|
|
# look for end of parts and change the multicol environment not to have
|
|
# balanced columns
|
|
parts = doc.split('\n% end part')
|
|
for i, part in enumerate(parts):
|
|
if i == 0:
|
|
continue
|
|
parts[i] = part.replace(r'\end{multicols}', r'\end{multicols*}', 1)
|
|
reversed_previous_part = parts[i-1][::-1]
|
|
reversed_previous_part = reversed_previous_part.replace(
|
|
r'\begin{multicols}'[::-1], r'\begin{multicols*}'[::-1], 1)
|
|
parts[i-1] = reversed_previous_part[::-1]
|
|
doc = '\n% adjusted end part'.join(parts)
|
|
|
|
outputStream.write(doc.encode('utf-8'))
|
|
finally:
|
|
if outputStream != None:
|
|
outputStream.close()
|
|
|
|
|
|
def copy_extra_files(dest):
|
|
extra_dir = os.path.join(abspath, '..', '..', 'extra')
|
|
for filename in os.listdir(extra_dir):
|
|
src = os.path.join(extra_dir, filename)
|
|
if not os.path.isfile(src):
|
|
continue
|
|
file(os.path.join(dest, filename), 'w').write(file(src).read())
|
|
|
|
|
|
def convertTex2Pdf(d, latexFileName):
|
|
cmd = "pdftex --fmt=pdflatex --interaction=nonstopmode %s" % (latexFileName)
|
|
copy_extra_files(d)
|
|
subprocess.call([cmd], cwd=d, shell=True)
|
|
subprocess.call([cmd], cwd=d, shell=True)
|
|
subprocess.call([cmd], cwd=d, shell=True )
|
|
|
|
def convertLegi2Pdf(inputFileName, pdfFileName, latexFileName, keepLatex=0,
|
|
debug=0, annexes=[], draft=0, toc=True, style="normal", useFont=None,
|
|
legacyMode=True, grayscale=True):
|
|
"""
|
|
Based on input .legi document, generatePdf is responsible
|
|
to generate a PDF documents.
|
|
|
|
Arguments:
|
|
inputFileName -- name of the input .legi file
|
|
pdfFileName -- name of the output file (pdf)
|
|
latexFileName -- name of the output file (LaTeX)
|
|
keepLatex -- keep the LaTeX file
|
|
debug -- keep the temporary files
|
|
annexes -- annexe definition
|
|
draft -- add a draft stamp
|
|
toc -- enable the table of content
|
|
style -- special style (normal, parchment, bqr)
|
|
useFont -- document font, override document class definition
|
|
legacyMode -- use the legacy xsl conversion files
|
|
grayscale -- convert image to grayscale
|
|
"""
|
|
result = []
|
|
allEntries = None
|
|
tempDir = None
|
|
f = None
|
|
try:
|
|
tempDir = makeTempFileName(".legi2pdf")
|
|
os.mkdir(tempDir)
|
|
|
|
f = open(inputFileName)
|
|
fmt = magic.fileFormat(f)
|
|
|
|
copy_extra_files(tempDir)
|
|
|
|
if fmt == "zip":
|
|
allEntries = unzipToDirectory(inputFileName, tempDir)
|
|
if allEntries == None:
|
|
raise PdfGeneratorException, 'Unable to open or to read the legi file: %s' % (inputFileName)
|
|
|
|
contentEntryInfo, contentEntryData = allEntries.get( NAME_CONTENT_LEGI, (None, None))
|
|
if contentEntryInfo == None or contentEntryData == None:
|
|
raise PdfGeneratorException, 'Corrupted legi file: No %s entry' % (NAME_CONTENT_LEGI)
|
|
if grayscale and PIL:
|
|
for filename in os.listdir(tempDir):
|
|
if not os.path.splitext(filename)[-1] in ('.jpg', '.png'):
|
|
continue
|
|
# image, convert it to grayscale
|
|
image = PIL.Image.open(os.path.join(tempDir, filename))
|
|
grayscaled = image.convert('L')
|
|
grayscaled.save(os.path.join(tempDir, filename))
|
|
elif fmt == "xml":
|
|
contentEntryData = f.read()
|
|
_writeToFile(os.path.join(tempDir, NAME_CONTENT_LEGI), contentEntryData)
|
|
else:
|
|
raise PdfGeneratorException, 'Unsupported input format: %s (should be: xml or zip)' % (fmt)
|
|
|
|
unzipAnnexToDir(annexes, tempDir)
|
|
|
|
convertLegi2Tex(contentEntryData, "temp.tex", annexes, tempDir, draft, toc, style, useFont, legacyMode)
|
|
convertWmf2Pdf(tempDir)
|
|
convertTex2Pdf(tempDir, "temp.tex")
|
|
if os.path.exists(pdfFileName):
|
|
os.remove(pdfFileName)
|
|
if not os.path.exists(os.path.join(tempDir, 'temp.pdf')):
|
|
raise PdfGeneratorException('legi2pdf failed to create a pdf file')
|
|
shutil.move(os.path.join(tempDir, "temp.pdf"), pdfFileName)
|
|
finally:
|
|
if keepLatex:
|
|
if os.path.exists(latexFileName):
|
|
os.remove(latexFileName)
|
|
if os.path.exists(os.path.join(tempDir, "temp.tex")):
|
|
shutil.move(os.path.join(tempDir, "temp.tex"), latexFileName)
|
|
if tempDir != None and not debug:
|
|
purgeDirectory(tempDir)
|
|
if f != None:
|
|
f.close()
|