odf2legi: parse static list styles (#6899)

The styles.xml file is supposed to be static but it can actually change in some
situations; we cannot therefore rely on known style names for lists and have to
parse the various levels to discover their formats.
This commit is contained in:
Frédéric Péters 2015-04-03 12:36:40 +02:00
parent a7a432a668
commit a3deae3580
1 changed files with 54 additions and 18 deletions

View File

@ -457,28 +457,32 @@ def handle_paragraph(parent, elem):
t_emph._children, para._children = para.getchildren(), [t_emph]
def handle_list(parent, elem):
style = elem.attrib.get('{%s}style-name' % TEXT_NS)
level = 1
style_props = None
ancestor = elem
ancestorstyle = None
while True:
try:
gdparent = ancestor.parent.parent
except AttributeError:
break
if gdparent.tag != '{%s}list' % TEXT_NS:
break
level += 1
ancestor = gdparent
if ancestorstyle is None:
ancestorstyle = ancestor.attrib.get('{%s}style-name' % TEXT_NS)
if style is None:
# means our parent was also a list
style_props = None
ancestor = elem
while True:
try:
gdparent = ancestor.parent.parent
except AttributeError:
break
if gdparent.tag != '{%s}list' % TEXT_NS:
break
level += 1
ancestor = gdparent
ancestorstyle = ancestor.attrib.get('{%s}style-name' % TEXT_NS)
style_props = STYLES.get('LIST:' + ancestorstyle)
if ancestorstyle:
style_props = STYLES.get('LIST:' + ancestorstyle)
else:
if 'parent' in STYLES.get(style, {}):
style = STYLES.get(style).get('parent')
style_props = STYLES.get('LIST:' + style)
num_format = None
if style_props:
num_format = style_props.get('levels', {}).get(level, {}).get('format')
@ -788,11 +792,13 @@ def convert(input_filename, output_filename):
content = z.read(zfile)
elif zfile == 'meta.xml':
metadata = z.read(zfile)
elif zfile == 'styles.xml':
styles = z.read(zfile)
if content and metadata:
break
legi = convert_to_legi_xml(content, metadata)
legi = convert_to_legi_xml(content, metadata, styles)
if debug:
print legi
@ -814,6 +820,31 @@ def convert(input_filename, output_filename):
legiz.writestr(zi, z.read(zfile))
legiz.close()
def parse_static_styles(content_tree):
'''
Parse styles from styles.xml
'''
global STYLES
for elem in content_tree.findall('{%s}styles/{%s}list-style' % (OFFICE_NS, TEXT_NS)):
style_name = 'LIST:%s' % elem.attrib.get('{%s}name' % STYLE_NS)
STYLES[style_name] = {'levels': {}}
for level in elem.findall('{%s}list-level-style-number' % TEXT_NS):
num_level = level.attrib.get('{%s}level' % TEXT_NS)
num_format = level.attrib.get('{%s}num-format' % STYLE_NS)
STYLES[style_name]['levels'][int(num_level)] = {
'format': num_format,
}
for level in elem.findall('{%s}list-level-style-bullet' % TEXT_NS):
num_level = level.attrib.get('{%s}level' % TEXT_NS)
bullet_char = level.attrib.get('{%s}bullet-char' % TEXT_NS)
if not STYLES[style_name]['levels'].get(int(num_level)):
STYLES[style_name]['levels'][int(num_level)] = {}
STYLES[style_name]['levels'][int(num_level)].update({
'bullet': bullet_char,
})
def parse_automatic_styles(content_tree):
'''
Parse styles created automatically and populate the global styles
@ -870,7 +901,8 @@ def parse_automatic_styles(content_tree):
# parse automatic list styles
for elem in content_tree.findall('{%s}automatic-styles/{%s}list-style' % (OFFICE_NS, TEXT_NS)):
style_name = 'LIST:%s' % elem.attrib.get('{%s}name' % STYLE_NS)
STYLES[style_name] = {'levels': {}}
if not style_name in STYLES:
STYLES[style_name] = {'levels': {}}
for level in elem.findall('{%s}list-level-style-number' % TEXT_NS):
num_level = level.attrib.get('{%s}level' % TEXT_NS)
num_format = level.attrib.get('{%s}num-format' % STYLE_NS)
@ -887,7 +919,7 @@ def parse_automatic_styles(content_tree):
})
def convert_to_legi_xml(content, metadata = None):
def convert_to_legi_xml(content, metadata=None, styles=None):
'''
Convert a content.xml/metadata.xml pair from an odt file
to the legi XML format.
@ -907,6 +939,10 @@ def convert_to_legi_xml(content, metadata = None):
metadata_element = ET.SubElement(legi, 'metadata')
create_metadata(metadata_element, metadata_tree, content_tree)
if styles:
styles_tree = ET.ElementTree(ET.fromstring(styles))
parse_static_styles(styles_tree)
current_top = [legi]
current_legi = []
speech = None