First commit

Code derived from XPath the xpaty.py module of the xmlschema package
(v.0.9.21-dev). The code has been splitted into some modules, one for
the base TODP parser, one for the XPath 1.0 parser and one for XPath
2.0 parser. The tests are implemented by the script test_elemenpath.py.

Changes to be committed:
	new file:   .gitignore
	new file:   LICENSE
	new file:   MANIFEST.in
	new file:   README.rst
	new file:   elementpath/__init__.py
	new file:   elementpath/exceptions.py
	new file:   elementpath/todp_parser.py
	new file:   elementpath/xpath1.py
	new file:   elementpath/xpath2.py
	new file:   requirements-dev.txt
	new file:   setup.cfg
	new file:   setup.py
	new file:   test_elementpath.py
This commit is contained in:
Davide Brunato 2018-02-11 10:51:03 +01:00
commit dde2acac06
13 changed files with 1344 additions and 0 deletions

13
.gitignore vendored Normal file
View File

@ -0,0 +1,13 @@
*.pyc
*.pyo
*~
*.so
*.egg-info
.idea/
.project
.ipynb_checkpoints/
doc/_*
__pycache__/
dist/
build/
tmp/

22
LICENSE Normal file
View File

@ -0,0 +1,22 @@
MIT License
Copyright (c), 2018, SISSA (Scuola Internazionale Superiore di Studi Avanzati -
International School for Advanced Studies).
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be included in all
copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
SOFTWARE.

2
MANIFEST.in Normal file
View File

@ -0,0 +1,2 @@
# Include the license file
include LICENSE

31
README.rst Normal file
View File

@ -0,0 +1,31 @@
***********
elementpath
***********
The library provides XPath selectors for Python's ElementTree XML libraries. Includes
a parser for XPath 1.0 and for XPath 2.0 and a mixin class for adding XPath selection
to other tree of elements.
Originally included into the `xmlschema <https://github.com/brunato/xmlschema>`_ library
this has been forked to a different package in order to provide an indipendent usage.
Installation and usage
======================
You can install the library with *pip* in a Python 2.7 or Python 3.3+ environment::
pip install elementpath
Then import the selector from the library and apply XPath selection to ElementTree structures:
.. code-block:: pycon
>>> from elementpath import XPathSelector
>>> ....
License
-------
This software is distributed under the terms of the MIT License.
See the file 'LICENSE' in the root directory of the present
distribution, or http://opensource.org/licenses/MIT.

157
elementpath/__init__.py Normal file
View File

@ -0,0 +1,157 @@
# -*- coding: utf-8 -*-
#
# Copyright (c), 2018, SISSA (International School for Advanced Studies).
# All rights reserved.
# This file is distributed under the terms of the MIT License.
# See the file 'LICENSE' in the root directory of the present
# distribution, or http://opensource.org/licenses/MIT.
#
# @author Davide Brunato <brunato@sissa.it>
#
__version__ = '1.0'
__author__ = "Davide Brunato"
__contact__ = "brunato@sissa.it"
__copyright__ = "Copyright 2018, SISSA"
__license__ = "MIT"
__status__ = "Production/Stable"
from .exceptions import ElementPathError, ElementPathSyntaxError, ElementPathValueError
from .todp_parser import Token, Parser
from .xpath1 import XPathToken, XPath1Parser
from .xpath2 import XPath2Parser
###
# XPath selectors
#
def relative_path(path, levels, namespaces=None, parser=XPath2Parser):
"""
Return a relative XPath expression.
:param path: An XPath expression.
:param levels: Number of path levels to remove.
:param namespaces: Is an optional mapping from namespace prefix \
to full qualified name.
:param parser: Is an optional XPath parser class. If not given the XPath2Parser is used.
:return: A string with a relative XPath expression.
"""
token_tree = parser(namespaces).parse(path)
path_parts = [t.value for t in token_tree.iter()]
i = 0
if path_parts[0] == '.':
i += 1
if path_parts[i] == '/':
i += 1
for value in path_parts[i:]:
if levels <= 0:
break
if value == '/':
levels -= 1
i += 1
return ''.join(path_parts[i:])
class XPathSelector(object):
"""
"""
def __init__(self, path, namespaces=None, parser=XPath2Parser):
self.path = path
self.parser = parser(namespaces)
self._selector = self.parser.parse(path)
def __repr__(self):
return u'%s(path=%r, namespaces=%r, parser=%s)' % (
self.__class__.__name__, self.path, self.namespaces, self.parser.__class__.__name__
)
@property
def namespaces(self):
return self.parser.namespaces
def iter_select(self, context):
return self._selector.iter_select(context)
_selector_cache = {}
def element_path_iterfind(context, path, namespaces=None):
if path[:1] == "/":
path = "." + path
path_key = (id(context), path)
try:
return _selector_cache[path_key].iter_select(context)
except KeyError:
pass
parser = XPath1Parser(namespaces)
selector = parser.parse(path)
if len(_selector_cache) > 100:
_selector_cache.clear()
_selector_cache[path] = selector
return selector.iter_select(context)
class ElementPathMixin(object):
"""
Mixin class that defines the ElementPath API.
"""
@property
def tag(self):
return getattr(self, 'name')
@property
def attrib(self):
return getattr(self, 'attributes')
def iterfind(self, path, namespaces=None):
"""
Generates all matching XSD/XML element declarations by path.
:param path: is an XPath expression that considers the schema as the root element \
with global elements as its children.
:param namespaces: is an optional mapping from namespace prefix to full name.
:return: an iterable yielding all matching declarations in the XSD/XML order.
"""
return element_path_iterfind(self, path, namespaces or self.xpath_namespaces)
def find(self, path, namespaces=None):
"""
Finds the first XSD/XML element or attribute matching the path.
:param path: is an XPath expression that considers the schema as the root element \
with global elements as its children.
:param namespaces: an optional mapping from namespace prefix to full name.
:return: The first matching XSD/XML element or attribute or ``None`` if there is not match.
"""
return next(element_path_iterfind(self, path, namespaces or self.xpath_namespaces), None)
def findall(self, path, namespaces=None):
"""
Finds all matching XSD/XML elements or attributes.
:param path: is an XPath expression that considers the schema as the root element \
with global elements as its children.
:param namespaces: an optional mapping from namespace prefix to full name.
:return: a list containing all matching XSD/XML elements or attributes. An empty list \
is returned if there is no match.
"""
return list(element_path_iterfind(self, path, namespaces or self.xpath_namespaces))
@property
def xpath_namespaces(self):
if hasattr(self, 'namespaces'):
namespaces = {k: v for k, v in self.namespaces.items() if k}
if hasattr(self, 'xpath_default_namespace'):
namespaces[''] = self.xpath_default_namespace
return namespaces
def iter(self, name=None):
raise NotImplementedError
def iterchildren(self, name=None):
raise NotImplementedError

21
elementpath/exceptions.py Normal file
View File

@ -0,0 +1,21 @@
# -*- coding: utf-8 -*-
#
# Copyright (c), 2018, SISSA (International School for Advanced Studies).
# All rights reserved.
# This file is distributed under the terms of the MIT License.
# See the file 'LICENSE' in the root directory of the present
# distribution, or http://opensource.org/licenses/MIT.
#
# @author Davide Brunato <brunato@sissa.it>
#
class ElementPathError(Exception):
pass
class ElementPathSyntaxError(ElementPathError, SyntaxError):
pass
class ElementPathValueError(ElementPathError, ValueError):
pass

336
elementpath/todp_parser.py Normal file
View File

@ -0,0 +1,336 @@
# -*- coding: utf-8 -*-
#
# Copyright (c), 2018, SISSA (International School for Advanced Studies).
# All rights reserved.
# This file is distributed under the terms of the MIT License.
# See the file 'LICENSE' in the root directory of the present
# distribution, or http://opensource.org/licenses/MIT.
#
# @author Davide Brunato <brunato@sissa.it>
#
"""
This module contains an classes and helper functions for defining Pratt parsers.
"""
import re
from decimal import Decimal
from abc import ABCMeta
from collections import MutableSequence
from .exceptions import ElementPathSyntaxError
def create_tokenizer(symbols):
"""
Create a simple tokenizer for a sequence of symbols. Extra spaces are skipped.
:param symbols: A sequence of strings representing the symbols. Blank and empty \
symbols are discarded.
:return: A regex compiled pattern.
"""
tokenizer_pattern_template = r"""
('[^']*' | "[^"]*" | \d+(?:\.\d?)? | \.\d+) | # Literals (strings or numbers)
(%s|[%s]) | # Symbols
((?:{[^}]+\})?[^/\[\]()@=|\s]+) | # References and other names
\s+ # Skip extra spaces
"""
def symbol_escape(s):
s = re.escape(s)
if s[-2:] == r'\(':
s = '%s\s*%s' % (s[:-2], s[-2:])
elif s[-4:] == r'\:\:':
s = '%s\s*%s' % (s[:-4], s[-4:])
return s
symbols = sorted([s2 for s2 in (s1.strip() for s1 in symbols) if s2], key=lambda x: -len(x))
fence = len([i for i in symbols if len(i) > 1])
return re.compile(
tokenizer_pattern_template % (
'|'.join(map(symbol_escape, symbols[:fence])),
''.join(map(re.escape, symbols[fence:]))
),
re.VERBOSE
)
#
# Simple top down parser based on Vaughan Pratt's algorithm (Top Down Operator Precedence).
#
# References:
#
# https://tdop.github.io/ (Vaughan R. Pratt's "Top Down Operator Precedence" - 1973)
# http://crockford.com/javascript/tdop/tdop.html (Douglas Crockford - 2007)
# http://effbot.org/zone/simple-top-down-parsing.htm (Fredrik Lundh - 2008)
#
class Token(MutableSequence):
"""
Token base class for defining a parser based on Pratt's method.
:cvar symbol: The symbol of the token class.
:param value: The token value. If not provided defaults to token symbol.
"""
symbol = None # the token identifier, key in the token table.
lbp = 0 # left binding power
rbp = 0 # right binding power
def __init__(self, parser, value=None):
self.parser = parser
self.value = value if value is not None else self.symbol
self._operands = []
def __getitem__(self, i):
return self._operands[i]
def __setitem__(self, i, item):
self._operands[i] = item
def __delitem__(self, i):
del self._operands[i]
def __len__(self):
return len(self._operands)
def insert(self, i, item):
self._operands.insert(i, item)
def __str__(self):
if self:
return u'(%s %s)' % (self.value, ' '.join(str(item) for item in self))
else:
return u'(%s)' % self.value
def __repr__(self):
return u'%s(value=%r)' % (self.__class__.__name__, self.value)
def __cmp__(self, other):
return self.symbol == other.symbol and self.value == other.value
@property
def arity(self):
return len(self)
def nud(self):
"""Null denotation method"""
raise ElementPathSyntaxError("Undefined operator for %r." % self.symbol)
def led(self, left):
"""Left denotation method"""
raise ElementPathSyntaxError("Undefined operator for %r." % self.symbol)
def eval(self):
"""Evaluation method"""
return self.value
def iter(self):
for t in self[:1]:
for token in t.iter():
yield token
yield self
for t in self[1:]:
for token in t.iter():
yield token
def expected(self, symbol):
if self.symbol != symbol:
raise ElementPathSyntaxError("Expected %r token, found %r." % (symbol, str(self.value)))
def unexpected(self, symbol=None):
if not symbol or self.symbol == symbol:
raise ElementPathSyntaxError("Unexpected %r token." % str(self.value))
class Parser(object):
symbol_table = {}
token_base_class = Token
tokenizer = None
SYMBOLS = ()
def __init__(self):
if '(end)' not in self.symbol_table or self.tokenizer is None:
raise ValueError("Incomplete parser class %s registration." % self.__class__.__name__)
self.token = None
self.next_token = None
self.match = None
self.tokens = iter(())
def parse(self, source):
try:
self.tokens = iter(self.tokenizer.finditer(source))
self.advance()
root_token = self.expression()
if self.next_token.symbol != '(end)':
self.next_token.unexpected()
return root_token
finally:
self.tokens = iter(())
self.next_token = None
def advance(self, symbol=None):
if getattr(self.next_token, 'symbol', None) == '(end)':
raise ElementPathSyntaxError(
"Unexpected end of source at position %d, after %r." % (self.match.span()[1], self.token.symbol)
)
self.token = self.next_token
if symbol and symbol not in (self.next_token.symbol, self.next_token.value):
self.next_token.expected(symbol)
while True:
try:
self.match = next(self.tokens)
except StopIteration:
self.next_token = self.symbol_table['(end)'](self)
break
else:
literal, operator, ref = self.match.groups()
if operator is not None:
try:
self.next_token = self.symbol_table[operator.replace(' ', '')](self)
except KeyError:
raise ElementPathSyntaxError("unknown operator %r." % operator)
break
elif literal is not None:
if literal[0] in '\'"':
self.next_token = self.symbol_table['(string)'](self, literal.strip("'\""))
elif '.' in literal:
self.next_token = self.symbol_table['(decimal)'](self, Decimal(literal))
else:
self.next_token = self.symbol_table['(integer)'](self, int(literal))
break
elif ref is not None:
self.next_token = self.symbol_table['(ref)'](self, ref)
break
elif str(self.match.group()).strip():
raise ElementPathSyntaxError("unexpected token: %r" % self.match)
return self.next_token
def expression(self, rbp=0):
"""
Recursive expression parser for expressions. Calls token.nud() and then
advance until the right binding power is less the left binding power of
the next token, invoking the led() method on the following token.
:param rbp: right binding power for the expression.
:return: left token.
"""
token = self.next_token
self.advance()
left = token.nud()
while rbp < self.next_token.lbp:
token = self.next_token
self.advance()
left = token.led(left)
return left
@classmethod
def begin(cls):
"""
Begin the symbol registration. Helper functions are bound to global names.
"""
cls.tokenizer = None
globals().update({
'register': cls.register,
'literal': cls.literal,
'prefix': cls.prefix,
'infix': cls.infix,
'infixr': cls.infixr,
'method': cls.method,
})
@classmethod
def end(cls):
"""
End the symbol registration. Registers the special (end) symbol and sets the tokenizer.
"""
cls.register('(end)')
cls.tokenizer = create_tokenizer(
s for s in cls.symbol_table
if s.strip() not in {'(end)', '(ref)', '(string)', '(decimal)', '(integer)'}
)
@classmethod
def register(cls, symbol, **kwargs):
"""
Register/update a token class in the symbol table.
:param symbol: The identifier symbol for the or an existent token class.
:param kwargs: Optional attributes/methods for the token class.
:return: A token class.
"""
try:
try:
symbol = symbol.strip()
except AttributeError:
assert issubclass(symbol, cls.token_base_class), \
"A %r subclass requested, not %r." % (cls.token_base_class, symbol)
symbol, token_class = symbol.symbol, symbol
if symbol not in cls.symbol_table:
cls.symbol_table[symbol] = token_class
else:
assert cls.symbol_table[symbol] is token_class, \
"The registered instance for %r is not %r." % (symbol, token_class)
else:
token_class = cls.symbol_table[symbol]
except KeyError:
name = '_%s_%s' % (symbol, cls.token_base_class.__name__)
kwargs['symbol'] = symbol
token_class = ABCMeta(name, (cls.token_base_class,), kwargs)
cls.symbol_table[symbol] = token_class
cls.tokenizer = None
ABCMeta.register(MutableSequence, token_class)
else:
for key, value in kwargs.items():
if key == 'lbp' and value > token_class.lbp:
token_class.lbp = value
elif callable(value):
setattr(token_class, key, value)
return token_class
@classmethod
def unregistered(cls):
if cls.SYMBOLS:
return [s for s in cls.SYMBOLS if s not in cls.symbol_table]
@classmethod
def symbol(cls, s):
return cls.register(s)
@classmethod
def literal(cls, symbol, bp=0):
def nud(self):
return self
return cls.register(symbol, lbp=bp, nud=nud)
@classmethod
def prefix(cls, symbol, bp=0):
def nud(self):
self[0:] = self.parser.expression(rbp=bp),
return self
return cls.register(symbol, lbp=bp, rbp=bp, nud=nud)
@classmethod
def infix(cls, symbol, bp=0):
def led(self, left):
self[0:1] = left, self.parser.expression(rbp=bp)
return self
return cls.register(symbol, lbp=bp, rbp=bp, led=led)
@classmethod
def infixr(cls, symbol, bp=0):
def led(self, left):
self[0:1] = left, self.parser.expression(rbp=bp-1)
return self
return cls.register(symbol, lbp=bp, rbp=bp-1, led=led)
@classmethod
def method(cls, symbol, bp=0):
token_class = cls.register(symbol, lbp=bp, rbp=bp)
def bind(func):
assert callable(getattr(token_class, func.__name__, None)), \
"The name %r does not match with a callable of %r." % (func.__name__, token_class)
setattr(token_class, func.__name__, func)
return func
return bind

588
elementpath/xpath1.py Normal file
View File

@ -0,0 +1,588 @@
# -*- coding: utf-8 -*-
#
# Copyright (c), 2018, SISSA (International School for Advanced Studies).
# All rights reserved.
# This file is distributed under the terms of the MIT License.
# See the file 'LICENSE' in the root directory of the present
# distribution, or http://opensource.org/licenses/MIT.
#
# @author Davide Brunato <brunato@sissa.it>
#
from .exceptions import ElementPathSyntaxError, ElementPathValueError
from .todp_parser import Token, Parser
class XPathToken(Token):
def iter_select(self, context):
return self.sed(context, [context])
def sed(self, context, results):
"""Select denotation"""
raise ElementPathSyntaxError("Undefined operator for %r." % self.symbol)
@staticmethod
def iselement(elem):
return hasattr(elem, 'tag') and hasattr(elem, 'attrib') and hasattr(elem, 'text')
class XPath1Parser(Parser):
"""
XPath 1.0 expression parser class.
:param namespaces: optional prefix to namespace map.
"""
token_base_class = XPathToken
symbol_table = {k: v for k, v in Parser.symbol_table.items()}
SYMBOLS = (
'processing-instruction(', 'descendant-or-self::', 'following-sibling::',
'preceding-sibling::', 'ancestor-or-self::', 'descendant::', 'attribute::',
'following::', 'namespace::', 'preceding::', 'ancestor::', 'comment(', 'parent::',
'child::', 'self::', 'text(', 'node(', 'and', 'mod', 'div', 'or',
'..', '//', '!=', '<=', '>=', '(', ')', '[', ']', '.', '@', ',', '/', '|', '*',
'-', '=', '+', '<', '>', '(:', ':)',
# XPath Core function library
'last(', 'position(', 'count(', 'id(', 'local-name(', # Node set functions
'namespace-uri(', 'name(',
'string(', 'concat(', 'starts-with(', 'contains(', # String functions
'substring-before(', 'substring-after(', 'substring(',
'string-length(', 'normalize-space(', 'translate(',
'boolean(', 'not(', 'true(', 'false(' # Boolean functions
)
RELATIVE_PATH_SYMBOLS = {s for s in SYMBOLS if s.endswith("::")} | {
'(integer)', '(string)', '(decimal)', '(ref)', '*', '@', '..', '.', '(', '/'
}
def __init__(self, namespaces=None):
super(XPath1Parser, self).__init__()
self.namespaces = namespaces if namespaces is not None else {}
@property
def version(self):
return '1.0'
@classmethod
def begin(cls):
super(XPath1Parser, cls).begin()
globals().update({'selector': cls.selector})
@classmethod
def selector(cls, symbol, bp=0):
def sed_(self, _context, results):
for elem in results:
if elem is not None:
yield self.value
return cls.register(symbol, lbp=bp, rbp=bp, sed=sed_)
def parse(self, path):
if not path:
raise ElementPathSyntaxError("empty XPath expression.")
elif path[-1] == '/':
raise ElementPathSyntaxError("invalid path: %r" % path)
if path[:1] == "/":
path = "." + path
return super(XPath1Parser, self).parse(path)
def map_reference(self, ref):
"""
Map a reference into a fully qualified name using the instance namespace map.
:param ref: a local name, a prefixed name or a fully qualified name.
:return: String with a FQN or a local name.
"""
if ref and ref[0] == '{':
return ref
try:
ns_prefix, name = ref.split(':')
except ValueError:
if ':' in ref:
raise ElementPathValueError("wrong format for reference name %r" % ref)
try:
uri = self.namespaces['']
except KeyError:
return ref
else:
return u'{%s}%s' % (uri, ref) if uri else ref
else:
if not ns_prefix or not name:
raise ElementPathValueError("wrong format for reference name %r" % ref)
try:
uri = self.namespaces[ns_prefix]
except KeyError:
raise ElementPathValueError("prefix %r not found in namespace map" % ns_prefix)
else:
return u'{%s}%s' % (uri, name) if uri else name
##
# XPath1 definitions
XPath1Parser.begin()
register = XPath1Parser.register
literal = XPath1Parser.literal
prefix = XPath1Parser.prefix
infix = XPath1Parser.infix
method = XPath1Parser.method
selector = XPath1Parser.selector
# Comments
@method('(:')
def nud(self):
comment_level = 1
value = []
while comment_level:
self.parser.advance()
token = self.parser.token
if token.symbol == ':)':
comment_level -= 1
if comment_level:
value.append(token.value)
elif token.symbol == '(:':
comment_level += 1
value.append(token.value)
else:
value.append(token.value)
self.value = ' '.join(value)
return self
register(':)')
###
# Axes
@method('child::', bp=80)
def nud(self):
if self.parser.next_token not in ('(ref)', '*', 'text(', 'node('):
raise ElementPathSyntaxError("invalid child axis %r." % self.parser.next_token)
self[0:] = self.parser.expression(80),
return self
@method('child::')
def sed(self, context, results):
for elem in results:
if self.iselement(elem):
for e in elem:
yield e
selector(literal('(string)'))
selector(literal('(decimal)'))
selector(literal('(integer)'))
@method(literal('(ref)'))
def nud(self):
if self.value[0] != '{' and ':' in self.value:
self.value = self.parser.map_reference(self.value)
return self
@method('(ref)')
@method('*')
def sed(self, _context, results):
"""Children selector."""
for elem in results:
if elem is not None:
for e in elem:
if self.value is None or e.tag == self.value:
yield e
@method('*')
def nud(self):
if self.parser.next_token.symbol not in ('/', '[', '(end)', ')'):
self.parser.next_token.unexpected()
self.value = None
return self
@method(infix('*', bp=45))
def led(self, left):
self[0:1] = left, self.parser.expression(45)
self.value = left.value + self[1].value
return self
@method('@')
@method('attribute::')
def nud(self):
self[0:] = self.parser.expression(),
if self[0].symbol not in ('*', '(ref)'):
raise ElementPathSyntaxError("invalid attribute specification for XPath.")
if self.parser.next_token.symbol == '=':
self.parser.advance('=')
self[0][0:] = self.parser.advance('(string)'),
return self
@selector('@')
@selector('attribute::')
def sed(self, _context, results):
"""
Attribute selector.
"""
if self[0].symbol != '=':
# @attribute
key = self.value
if key is None:
for elem in results:
if elem is not None:
for attr in elem.attrib.values():
yield attr
elif '{' == key[0]:
for elem in results:
if elem is not None and key in elem.attrib:
yield elem.attrib[key]
else:
for elem in results:
if elem is None:
continue
elif key in elem.attrib:
yield elem.attrib[key]
else:
# @attribute='value'
key = self.value
value = self[0].value
if key is not None:
for elem in results:
if elem is not None:
yield elem.get(key) == value
else:
for elem in results:
if elem is not None:
for attr in elem.attrib.values():
yield attr == value
# [tag='value']
@selector('unknown')
def sed(self, _context, results):
for elem in results:
if elem is not None:
for e in elem.findall(self.symbol):
if "".join(e.itertext()) == self.value:
yield elem
break
@method(infix('or', bp=20))
@method(infix('|', bp=50))
@method(infix('union', bp=50))
def sed(self, context, results):
left_results = list(self[0].sed(context, results))
right_results = list(self[1].sed(context, results))
for elem in left_results:
yield elem
for elem in right_results:
yield elem
@method(infix('and', bp=25))
def sed(self, context, results):
right_results = set(self[1].sed(context, results))
for elem in self[0].sed(context, results):
if elem in right_results:
yield elem
# prefix('=', bp=30)
# prefix('<', bp=30)
# prefix('>', bp=30)
# prefix('!=', bp=30)
# prefix('<=', bp=30)
# prefix('>=', bp=30)
infix('=', bp=30)
infix('<', bp=30)
infix('>', bp=30)
infix('!=', bp=30)
infix('<=', bp=30)
infix('>=', bp=30)
@method('+')
def nud(self):
self[0:] = self.parser.expression(75),
if not isinstance(self[0].value, int):
raise ElementPathSyntaxError("an integer value is required: %r." % self[0])
self.value = self[0].value
return self
@method(infix('+', bp=40))
def led(self, left):
self[0:1] = left, self.parser.expression(40)
self.value = self[0].value + self[1].value
return self
@method('-')
def nud(self):
self[0:] = self.parser.expression(75),
if not isinstance(self[0].value, int):
raise ElementPathSyntaxError("an integer value is required: %r." % self[0])
self.value = - self[0].value
return self
@method(infix('-', bp=40))
def led(self, left):
self[0:1] = left, self.parser.expression(40)
self.value = self[0].value - self[1].value
return self
infix('div', bp=45)
infix('mod', bp=45)
@method('self::', bp=60)
def sed(self, _context, results):
"""Self selector."""
for elem in results:
yield elem
@method(literal('.', bp=60))
def sed(self, _context, results):
"""Self node selector."""
for elem in results:
if self.iselement(elem):
yield elem
# @register_nud('parent::node()', bp=60)
@method(prefix('..', bp=60))
def sed(_self, context, results):
"""Parent selector."""
parent_map = context.parent_map
results_parents = []
for elem in results:
try:
parent = parent_map[elem]
except KeyError:
pass
else:
if parent not in results_parents:
results_parents.append(parent)
yield parent
# @register_nud('ancestor::', bp=60)
# def parent_token_nud(self):
# self.sed = self.parent_selector()
# return self
@method('/')
def nud(self):
self.parser.token.unexpected()
@method('/', bp=80)
def led(self, left):
self[0:1] = left, self.parser.expression(100)
if self[1].symbol not in self.parser.RELATIVE_PATH_SYMBOLS:
raise ElementPathSyntaxError("invalid child %r." % self[1])
return self
@method('/')
def sed(self, context, results):
results = self[0].sed(context, results)
return self[1].sed(context, results)
@method('//', bp=80)
def led(self, left):
self[0:1] = left, self.parser.expression(100)
if self[1].symbol not in self.parser.RELATIVE_PATH_SYMBOLS:
raise ElementPathSyntaxError("invalid descendant %r." % self[1])
if self[0].symbol in ('*', '(ref)'):
delattr(self[0], 'sed')
self.value = self[0].value
else:
self.value = None
return self
@method('//')
def sed(self, context, results):
"""Descendants selector."""
results = self[0].sed(context, results)
for elem in results:
if elem is not None:
for e in elem.iter(self[1].value):
if e is not elem:
yield e
@method('(', bp=90)
def nud(self):
self.parser.next_token.unexpected(')')
self[0:] = self.parser.expression(),
self.parser.advance(')')
return self[0]
@method(')')
def nud(self):
self.parser.token.unexpected()
@method(')')
def led(self):
self.parser.token.unexpected()
@method('[', bp=90)
def nud(self):
self.parser.token.unexpected()
@method('[', bp=90)
def led(self, left):
self.parser.next_token.unexpected(']')
self[0:1] = left, self.parser.expression()
self.parser.advance(']')
return self
@method('[')
def sed(self, context, results):
"""Predicate selector."""
results = self[0].sed(context, results)
if isinstance(self[1].value, int):
# subscript predicate
value = self[1].value
if value > 0:
index = value - 1
elif value == 0 or self[1].symbol not in ('last(', 'position('):
index = None
else:
index = value
if index is not None:
try:
yield [elem for elem in results][index]
except IndexError:
return
else:
for elem in results:
if elem is not None:
predicate_results = list(self[1].sed(context, [elem]))
if predicate_results and any(predicate_results):
yield elem
register(']')
# @register_nud(']')
# @register_led(']')
# def predicate_close_token(self, *_args, **_kwargs):
# self.parser.token.unexpected(']')
@method('last(')
def nud(self):
self.parser.advance(')')
if self.parser.next_token.symbol == '-':
self.parser.advance('-')
self[0:] = self.parser.advance('(integer)'),
self.value = -1 - self[0].value
else:
self.value = -1
return self
@method('position(')
def nud(self):
self.parser.advance(')')
self.parser.advance('=')
self[0:] = self.parser.expression(90),
if not isinstance(self[0].value, int):
raise ElementPathSyntaxError("an integer expression is required: %r." % self[0].value)
self.value = self[0].value
return self
@method('boolean(')
def nud(self):
"""
Syntax: boolean(expression) --> boolean
"""
self.parser.next_token.unexpected(')')
self[0:] = self.parser.expression(),
self.parser.advance(')')
print("Value:", self[0].value, self[0].sed)
self.sed = self.function_selector()
self.value = bool(self[0].value)
return self
@method('text(')
def nud(self):
self.parser.advance(')')
return self
@method('text(')
def sed(self, context, results):
for elem in results:
if self.iselement(elem):
if elem.text is not None:
yield elem.text
if elem.tail is not None:
yield elem.tail
@method('node(')
def nud(self):
self.parser.advance(')')
return self
@method('node(')
def sed(self, context, results):
for elem in results:
if self.iselement(elem):
yield elem
@method('not(')
def nud(self):
"""
Syntax: not(expression) --> boolean
"""
self.parser.next_token.unexpected(')')
self[0:] = self.parser.expression(),
self.parser.advance(')')
self.value = not bool(self[0].value)
return self
@method('true(')
def nud(self):
"""
Syntax: true() --> boolean (true)
"""
self.parser.advance(')')
self.value = True
return self
@method('false(')
def nud(self):
"""
Syntax: false() --> boolean (false)
"""
self.parser.advance(')')
self.value = False
return self
XPath1Parser.end()

24
elementpath/xpath2.py Normal file
View File

@ -0,0 +1,24 @@
# -*- coding: utf-8 -*-
#
# Copyright (c), 2018, SISSA (International School for Advanced Studies).
# All rights reserved.
# This file is distributed under the terms of the MIT License.
# See the file 'LICENSE' in the root directory of the present
# distribution, or http://opensource.org/licenses/MIT.
#
# @author Davide Brunato <brunato@sissa.it>
#
from .xpath1 import XPath1Parser
class XPath2Parser(XPath1Parser):
"""
XPath 2.0 expression parser class.
"""
symbol_table = {k: v for k, v in XPath1Parser.symbol_table.items()}
SYMBOLS = XPath1Parser.SYMBOLS + ('union', 'intersect')
RELATIVE_PATH_SYMBOLS = XPath1Parser.RELATIVE_PATH_SYMBOLS | {s for s in SYMBOLS if s.endswith("::")}
@property
def version(self):
return '2.0'

6
requirements-dev.txt Normal file
View File

@ -0,0 +1,6 @@
# Requirements for setup a development environment
setuptools
lxml
Sphinx
sphinx_rtd_theme
-e .

5
setup.cfg Normal file
View File

@ -0,0 +1,5 @@
[wheel]
universal = 1
[metadata]
license_file = LICENSE

42
setup.py Normal file
View File

@ -0,0 +1,42 @@
# -*- coding: utf-8 -*-
#
# Copyright (c), 2018, SISSA (International School for Advanced Studies).
# All rights reserved.
# This file is distributed under the terms of the MIT License.
# See the file 'LICENSE' in the root directory of the present
# distribution, or http://opensource.org/licenses/MIT.
#
# @author Davide Brunato <brunato@sissa.it>
#
from setuptools import setup
with open("README.rst") as readme:
long_description = readme.read()
setup(
name='elementpath',
version='0.9.20',
packages=['elementpath'],
author='Davide Brunato',
author_email='brunato@sissa.it',
url='https://github.com/brunato/elementpath',
license='MIT',
description='XPath parsers and selectors for ElementTree.',
long_description=long_description,
classifiers=[
'Development Status :: 5 - Production/Stable',
'Intended Audience :: Developers',
'Intended Audience :: Information Technology',
'Intended Audience :: Science/Research',
'License :: OSI Approved :: MIT License',
'Operating System :: OS Independent',
'Programming Language :: Python',
'Programming Language :: Python :: 2.7',
'Programming Language :: Python :: 3.3',
'Programming Language :: Python :: 3.4',
'Programming Language :: Python :: 3.5',
'Programming Language :: Python :: 3.6',
'Programming Language :: Python :: Implementation :: CPython',
'Topic :: Software Development :: Libraries'
]
)

97
test_elementpath.py Normal file
View File

@ -0,0 +1,97 @@
#!/usr/bin/env python
# -*- coding: utf-8 -*-
#
# Copyright (c), 2018, SISSA (International School for Advanced Studies).
# All rights reserved.
# This file is distributed under the terms of the MIT License.
# See the file 'LICENSE' in the root directory of the present
# distribution, or http://opensource.org/licenses/MIT.
#
# @author Davide Brunato <brunato@sissa.it>
#
import unittest
import os
from xml.etree import ElementTree
import lxml.etree
from elementpath import *
class TokenizerTest(unittest.TestCase):
def test_xpath_tokenizer(self):
def check(path, expected):
self.assertEqual([
lit or op or ref for lit, op, ref in XPath1Parser.tokenizer.findall(path)
], expected)
# tests from the XPath specification
check("*", ['*'])
check("text()", ['text(', ')'])
check("@name", ['@', 'name'])
check("@*", ['@', '*'])
check("para[1]", ['para', '[', '1', ']'])
check("para[last()]", ['para', '[', 'last(', ')', ']'])
check("*/para", ['*', '/', 'para'])
check("/doc/chapter[5]/section[2]",
['/', 'doc', '/', 'chapter', '[', '5', ']',
'/', 'section', '[', '2', ']'])
check("chapter//para", ['chapter', '//', 'para'])
check("//para", ['//', 'para'])
check("//olist/item", ['//', 'olist', '/', 'item'])
check(".", ['.'])
check(".//para", ['.', '//', 'para'])
check("..", ['..'])
check("../@lang", ['..', '/', '@', 'lang'])
check("chapter[title]", ['chapter', '[', 'title', ']'])
check("employee[@secretary and @assistant]", ['employee',
'[', '@', 'secretary', '', 'and', '', '@', 'assistant', ']'])
# additional tests from Python XML etree test cases
check("{http://spam}egg", ['{http://spam}egg'])
check("./spam.egg", ['.', '/', 'spam.egg'])
check(".//{http://spam}egg", ['.', '//', '{http://spam}egg'])
# additional tests
check("(: this is a comment :)", ['(:', '', 'this', '', 'is', '', 'a', '', 'comment', '', ':)'])
class XPath1ParserTest(unittest.TestCase):
@classmethod
def setUpClass(cls):
cls.parser = XPath1Parser()
def test_xpath_comment(self):
token = self.parser.parse("(: this is a comment :)")
print(token)
token = self.parser.parse("(: this is a (: nested :) comment :)")
print(token)
class ElementTreeTest(unittest.TestCase):
@classmethod
def setUpClass(cls):
cls.XML = ElementTree.XML
def _test_rel_xpath_boolean(self):
root = self.XML('<A><B><C/></B></A>')
el = root[0]
print(list(XPathSelector('boolean(D)').iter_select(el)))
self.assertTrue(XPathSelector('boolean(C)').iter_select(el))
self.assertFalse(next(XPathSelector('boolean(D)').iter_select(el)))
class LxmlEtreeTest(unittest.TestCase):
@classmethod
def setUpClass(cls):
cls.test_dir = os.path.dirname(__file__)
cls.XML = lxml.etree.XML
if __name__ == '__main__':
unittest.main()