odf2legi: parse static list styles (#6899)
The styles.xml file is supposed to be static but it can actually change in some situations; we cannot therefore rely on known style names for lists and have to parse the various levels to discover their formats.
This commit is contained in:
parent
a7a432a668
commit
a3deae3580
|
@ -457,28 +457,32 @@ def handle_paragraph(parent, elem):
|
|||
t_emph._children, para._children = para.getchildren(), [t_emph]
|
||||
|
||||
def handle_list(parent, elem):
|
||||
|
||||
style = elem.attrib.get('{%s}style-name' % TEXT_NS)
|
||||
level = 1
|
||||
style_props = None
|
||||
|
||||
ancestor = elem
|
||||
ancestorstyle = None
|
||||
while True:
|
||||
try:
|
||||
gdparent = ancestor.parent.parent
|
||||
except AttributeError:
|
||||
break
|
||||
if gdparent.tag != '{%s}list' % TEXT_NS:
|
||||
break
|
||||
level += 1
|
||||
ancestor = gdparent
|
||||
if ancestorstyle is None:
|
||||
ancestorstyle = ancestor.attrib.get('{%s}style-name' % TEXT_NS)
|
||||
|
||||
if style is None:
|
||||
# means our parent was also a list
|
||||
style_props = None
|
||||
ancestor = elem
|
||||
while True:
|
||||
try:
|
||||
gdparent = ancestor.parent.parent
|
||||
except AttributeError:
|
||||
break
|
||||
if gdparent.tag != '{%s}list' % TEXT_NS:
|
||||
break
|
||||
level += 1
|
||||
ancestor = gdparent
|
||||
ancestorstyle = ancestor.attrib.get('{%s}style-name' % TEXT_NS)
|
||||
style_props = STYLES.get('LIST:' + ancestorstyle)
|
||||
if ancestorstyle:
|
||||
style_props = STYLES.get('LIST:' + ancestorstyle)
|
||||
else:
|
||||
if 'parent' in STYLES.get(style, {}):
|
||||
style = STYLES.get(style).get('parent')
|
||||
style_props = STYLES.get('LIST:' + style)
|
||||
|
||||
num_format = None
|
||||
if style_props:
|
||||
num_format = style_props.get('levels', {}).get(level, {}).get('format')
|
||||
|
@ -788,11 +792,13 @@ def convert(input_filename, output_filename):
|
|||
content = z.read(zfile)
|
||||
elif zfile == 'meta.xml':
|
||||
metadata = z.read(zfile)
|
||||
elif zfile == 'styles.xml':
|
||||
styles = z.read(zfile)
|
||||
|
||||
if content and metadata:
|
||||
break
|
||||
|
||||
legi = convert_to_legi_xml(content, metadata)
|
||||
legi = convert_to_legi_xml(content, metadata, styles)
|
||||
if debug:
|
||||
print legi
|
||||
|
||||
|
@ -814,6 +820,31 @@ def convert(input_filename, output_filename):
|
|||
legiz.writestr(zi, z.read(zfile))
|
||||
legiz.close()
|
||||
|
||||
def parse_static_styles(content_tree):
|
||||
'''
|
||||
Parse styles from styles.xml
|
||||
'''
|
||||
global STYLES
|
||||
|
||||
for elem in content_tree.findall('{%s}styles/{%s}list-style' % (OFFICE_NS, TEXT_NS)):
|
||||
style_name = 'LIST:%s' % elem.attrib.get('{%s}name' % STYLE_NS)
|
||||
STYLES[style_name] = {'levels': {}}
|
||||
for level in elem.findall('{%s}list-level-style-number' % TEXT_NS):
|
||||
num_level = level.attrib.get('{%s}level' % TEXT_NS)
|
||||
num_format = level.attrib.get('{%s}num-format' % STYLE_NS)
|
||||
STYLES[style_name]['levels'][int(num_level)] = {
|
||||
'format': num_format,
|
||||
}
|
||||
for level in elem.findall('{%s}list-level-style-bullet' % TEXT_NS):
|
||||
num_level = level.attrib.get('{%s}level' % TEXT_NS)
|
||||
bullet_char = level.attrib.get('{%s}bullet-char' % TEXT_NS)
|
||||
if not STYLES[style_name]['levels'].get(int(num_level)):
|
||||
STYLES[style_name]['levels'][int(num_level)] = {}
|
||||
STYLES[style_name]['levels'][int(num_level)].update({
|
||||
'bullet': bullet_char,
|
||||
})
|
||||
|
||||
|
||||
def parse_automatic_styles(content_tree):
|
||||
'''
|
||||
Parse styles created automatically and populate the global styles
|
||||
|
@ -870,7 +901,8 @@ def parse_automatic_styles(content_tree):
|
|||
# parse automatic list styles
|
||||
for elem in content_tree.findall('{%s}automatic-styles/{%s}list-style' % (OFFICE_NS, TEXT_NS)):
|
||||
style_name = 'LIST:%s' % elem.attrib.get('{%s}name' % STYLE_NS)
|
||||
STYLES[style_name] = {'levels': {}}
|
||||
if not style_name in STYLES:
|
||||
STYLES[style_name] = {'levels': {}}
|
||||
for level in elem.findall('{%s}list-level-style-number' % TEXT_NS):
|
||||
num_level = level.attrib.get('{%s}level' % TEXT_NS)
|
||||
num_format = level.attrib.get('{%s}num-format' % STYLE_NS)
|
||||
|
@ -887,7 +919,7 @@ def parse_automatic_styles(content_tree):
|
|||
})
|
||||
|
||||
|
||||
def convert_to_legi_xml(content, metadata = None):
|
||||
def convert_to_legi_xml(content, metadata=None, styles=None):
|
||||
'''
|
||||
Convert a content.xml/metadata.xml pair from an odt file
|
||||
to the legi XML format.
|
||||
|
@ -907,6 +939,10 @@ def convert_to_legi_xml(content, metadata = None):
|
|||
metadata_element = ET.SubElement(legi, 'metadata')
|
||||
create_metadata(metadata_element, metadata_tree, content_tree)
|
||||
|
||||
if styles:
|
||||
styles_tree = ET.ElementTree(ET.fromstring(styles))
|
||||
parse_static_styles(styles_tree)
|
||||
|
||||
current_top = [legi]
|
||||
current_legi = []
|
||||
speech = None
|
||||
|
|
Reference in New Issue