First commit

Code derived from XPath the xpaty.py module of the xmlschema package (v.0.9.21-dev). The code has been splitted into some modules, one for the base TODP parser, one for the XPath 1.0 parser and one for XPath 2.0 parser. The tests are implemented by the script test_elemenpath.py. Changes to be committed: new file: .gitignore new file: LICENSE new file: MANIFEST.in new file: README.rst new file: elementpath/__init__.py new file: elementpath/exceptions.py new file: elementpath/todp_parser.py new file: elementpath/xpath1.py new file: elementpath/xpath2.py new file: requirements-dev.txt new file: setup.cfg new file: setup.py new file: test_elementpath.py
2018-02-11 10:51:03 +01:00 · 2018-02-11 10:51:03 +01:00 · dde2acac06
commit dde2acac06
13 changed files with 1344 additions and 0 deletions
--- a/.gitignore
+++ b/.gitignore
@ -0,0 +1,13 @@
+*.pyc
+*.pyo
+*~
+*.so
+*.egg-info
+.idea/
+.project
+.ipynb_checkpoints/
+doc/_*
+__pycache__/
+dist/
+build/
+tmp/
--- a/22
+++ b/22
@ -0,0 +1,22 @@
+MIT License
+
+Copyright (c), 2018, SISSA (Scuola Internazionale Superiore di Studi Avanzati -
+International School for Advanced Studies).
+
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+
+The above copyright notice and this permission notice shall be included in all
+copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+SOFTWARE.
--- a/MANIFEST.in
+++ b/MANIFEST.in
@ -0,0 +1,2 @@
+# Include the license file
+include LICENSE
--- a/README.rst
+++ b/README.rst
@ -0,0 +1,31 @@
+***********
+elementpath
+***********
+
+The library provides XPath selectors for Python's ElementTree XML libraries. Includes
+a parser for XPath 1.0 and for XPath 2.0 and a mixin class for adding XPath selection
+to other tree of elements.
+
+Originally included into the `xmlschema <https://github.com/brunato/xmlschema>`_ library
+this has been forked to a different package in order to provide an indipendent usage.
+
+Installation and usage
+======================
+
+You can install the library with *pip* in a Python 2.7 or Python 3.3+ environment::
+
+    pip install elementpath
+
+Then import the selector from the library and apply XPath selection to ElementTree structures:
+
+.. code-block:: pycon
+
+    >>> from elementpath import XPathSelector
+    >>> ....
+
+
+License
+-------
+This software is distributed under the terms of the MIT License.
+See the file 'LICENSE' in the root directory of the present
+distribution, or http://opensource.org/licenses/MIT.
--- a/elementpath/init.py
+++ b/elementpath/init.py
@ -0,0 +1,157 @@
+# -*- coding: utf-8 -*-
+#
+# Copyright (c), 2018, SISSA (International School for Advanced Studies).
+# All rights reserved.
+# This file is distributed under the terms of the MIT License.
+# See the file 'LICENSE' in the root directory of the present
+# distribution, or http://opensource.org/licenses/MIT.
+#
+# @author Davide Brunato <brunato@sissa.it>
+#
+
+__version__ = '1.0'
+__author__ = "Davide Brunato"
+__contact__ = "brunato@sissa.it"
+__copyright__ = "Copyright 2018, SISSA"
+__license__ = "MIT"
+__status__ = "Production/Stable"
+
+
+from .exceptions import ElementPathError, ElementPathSyntaxError, ElementPathValueError
+from .todp_parser import Token, Parser
+from .xpath1 import XPathToken, XPath1Parser
+from .xpath2 import XPath2Parser
+
+
+###
+# XPath selectors
+#
+def relative_path(path, levels, namespaces=None, parser=XPath2Parser):
+    """
+    Return a relative XPath expression.
+
+    :param path: An XPath expression.
+    :param levels: Number of path levels to remove.
+    :param namespaces: Is an optional mapping from namespace prefix \
+    to full qualified name.
+    :param parser: Is an optional XPath parser class. If not given the XPath2Parser is used.
+    :return: A string with a relative XPath expression.
+    """
+    token_tree = parser(namespaces).parse(path)
+    path_parts = [t.value for t in token_tree.iter()]
+    i = 0
+    if path_parts[0] == '.':
+        i += 1
+    if path_parts[i] == '/':
+        i += 1
+    for value in path_parts[i:]:
+        if levels <= 0:
+            break
+        if value == '/':
+            levels -= 1
+        i += 1
+    return ''.join(path_parts[i:])
+
+
+class XPathSelector(object):
+    """
+
+    """
+    def __init__(self, path, namespaces=None, parser=XPath2Parser):
+        self.path = path
+        self.parser = parser(namespaces)
+        self._selector = self.parser.parse(path)
+
+    def __repr__(self):
+        return u'%s(path=%r, namespaces=%r, parser=%s)' % (
+            self.__class__.__name__, self.path, self.namespaces, self.parser.__class__.__name__
+        )
+
+    @property
+    def namespaces(self):
+        return self.parser.namespaces
+
+    def iter_select(self, context):
+        return self._selector.iter_select(context)
+
+
+_selector_cache = {}
+
+
+def element_path_iterfind(context, path, namespaces=None):
+    if path[:1] == "/":
+        path = "." + path
+
+    path_key = (id(context), path)
+    try:
+        return _selector_cache[path_key].iter_select(context)
+    except KeyError:
+        pass
+
+    parser = XPath1Parser(namespaces)
+    selector = parser.parse(path)
+    if len(_selector_cache) > 100:
+        _selector_cache.clear()
+    _selector_cache[path] = selector
+    return selector.iter_select(context)
+
+
+class ElementPathMixin(object):
+    """
+    Mixin class that defines the ElementPath API.
+    """
+    @property
+    def tag(self):
+        return getattr(self, 'name')
+
+    @property
+    def attrib(self):
+        return getattr(self, 'attributes')
+
+    def iterfind(self, path, namespaces=None):
+        """
+        Generates all matching XSD/XML element declarations by path.
+
+        :param path: is an XPath expression that considers the schema as the root element \
+        with global elements as its children.
+        :param namespaces: is an optional mapping from namespace prefix to full name.
+        :return: an iterable yielding all matching declarations in the XSD/XML order.
+        """
+        return element_path_iterfind(self, path, namespaces or self.xpath_namespaces)
+
+    def find(self, path, namespaces=None):
+        """
+        Finds the first XSD/XML element or attribute matching the path.
+
+        :param path: is an XPath expression that considers the schema as the root element \
+        with global elements as its children.
+        :param namespaces: an optional mapping from namespace prefix to full name.
+        :return: The first matching XSD/XML element or attribute or ``None`` if there is not match.
+        """
+        return next(element_path_iterfind(self, path, namespaces or self.xpath_namespaces), None)
+
+    def findall(self, path, namespaces=None):
+        """
+        Finds all matching XSD/XML elements or attributes.
+
+        :param path: is an XPath expression that considers the schema as the root element \
+        with global elements as its children.
+        :param namespaces: an optional mapping from namespace prefix to full name.
+        :return: a list containing all matching XSD/XML elements or attributes. An empty list \
+        is returned if there is no match.
+        """
+        return list(element_path_iterfind(self, path, namespaces or self.xpath_namespaces))
+
+    @property
+    def xpath_namespaces(self):
+        if hasattr(self, 'namespaces'):
+            namespaces = {k: v for k, v in self.namespaces.items() if k}
+            if hasattr(self, 'xpath_default_namespace'):
+                namespaces[''] = self.xpath_default_namespace
+            return namespaces
+
+    def iter(self, name=None):
+        raise NotImplementedError
+
+    def iterchildren(self, name=None):
+        raise NotImplementedError
--- a/elementpath/exceptions.py
+++ b/elementpath/exceptions.py
@ -0,0 +1,21 @@
+# -*- coding: utf-8 -*-
+#
+# Copyright (c), 2018, SISSA (International School for Advanced Studies).
+# All rights reserved.
+# This file is distributed under the terms of the MIT License.
+# See the file 'LICENSE' in the root directory of the present
+# distribution, or http://opensource.org/licenses/MIT.
+#
+# @author Davide Brunato <brunato@sissa.it>
+#
+
+class ElementPathError(Exception):
+    pass
+
+
+class ElementPathSyntaxError(ElementPathError, SyntaxError):
+    pass
+
+
+class ElementPathValueError(ElementPathError, ValueError):
+    pass
--- a/elementpath/todp_parser.py
+++ b/elementpath/todp_parser.py
@ -0,0 +1,336 @@
+# -*- coding: utf-8 -*-
+#
+# Copyright (c), 2018, SISSA (International School for Advanced Studies).
+# All rights reserved.
+# This file is distributed under the terms of the MIT License.
+# See the file 'LICENSE' in the root directory of the present
+# distribution, or http://opensource.org/licenses/MIT.
+#
+# @author Davide Brunato <brunato@sissa.it>
+#
+"""
+This module contains an classes and helper functions for defining Pratt parsers.
+"""
+import re
+from decimal import Decimal
+from abc import ABCMeta
+from collections import MutableSequence
+from .exceptions import ElementPathSyntaxError
+
+
+def create_tokenizer(symbols):
+    """
+    Create a simple tokenizer for a sequence of symbols. Extra spaces are skipped.
+
+    :param symbols: A sequence of strings representing the symbols. Blank and empty \
+    symbols are discarded.
+    :return: A regex compiled pattern.
+    """
+    tokenizer_pattern_template = r"""
+        ('[^']*' | "[^"]*" | \d+(?:\.\d?)? | \.\d+) |   # Literals (strings or numbers)
+        (%s|[%s]) |                                     # Symbols
+        ((?:{[^}]+\})?[^/\[\]()@=|\s]+) |               # References and other names   
+        \s+                                             # Skip extra spaces
+    """
+
+    def symbol_escape(s):
+        s = re.escape(s)
+        if s[-2:] == r'\(':
+            s = '%s\s*%s' % (s[:-2], s[-2:])
+        elif s[-4:] == r'\:\:':
+            s = '%s\s*%s' % (s[:-4], s[-4:])
+        return s
+
+    symbols = sorted([s2 for s2 in (s1.strip() for s1 in symbols) if s2], key=lambda x: -len(x))
+    fence = len([i for i in symbols if len(i) > 1])
+    return re.compile(
+        tokenizer_pattern_template % (
+            '|'.join(map(symbol_escape, symbols[:fence])),
+            ''.join(map(re.escape, symbols[fence:]))
+        ),
+        re.VERBOSE
+    )
+
+
+#
+# Simple top down parser based on Vaughan Pratt's algorithm (Top Down Operator Precedence).
+#
+# References:
+#
+#   https://tdop.github.io/  (Vaughan R. Pratt's "Top Down Operator Precedence" - 1973)
+#   http://crockford.com/javascript/tdop/tdop.html  (Douglas Crockford - 2007)
+#   http://effbot.org/zone/simple-top-down-parsing.htm (Fredrik Lundh - 2008)
+#
+class Token(MutableSequence):
+    """
+    Token base class for defining a parser based on Pratt's method.
+
+    :cvar symbol: The symbol of the token class.
+    :param value: The token value. If not provided defaults to token symbol.
+    """
+    symbol = None  # the token identifier, key in the token table.
+    lbp = 0        # left binding power
+    rbp = 0        # right binding power
+
+    def __init__(self, parser, value=None):
+        self.parser = parser
+        self.value = value if value is not None else self.symbol
+        self._operands = []
+
+    def __getitem__(self, i):
+        return self._operands[i]
+
+    def __setitem__(self, i, item):
+        self._operands[i] = item
+
+    def __delitem__(self, i):
+        del self._operands[i]
+
+    def __len__(self):
+        return len(self._operands)
+
+    def insert(self, i, item):
+        self._operands.insert(i, item)
+
+    def __str__(self):
+        if self:
+            return u'(%s %s)' % (self.value, ' '.join(str(item) for item in self))
+        else:
+            return u'(%s)' % self.value
+
+    def __repr__(self):
+        return u'%s(value=%r)' % (self.__class__.__name__, self.value)
+
+    def __cmp__(self, other):
+        return self.symbol == other.symbol and self.value == other.value
+
+    @property
+    def arity(self):
+        return len(self)
+
+    def nud(self):
+        """Null denotation method"""
+        raise ElementPathSyntaxError("Undefined operator for %r." % self.symbol)
+
+    def led(self, left):
+        """Left denotation method"""
+        raise ElementPathSyntaxError("Undefined operator for %r." % self.symbol)
+
+    def eval(self):
+        """Evaluation method"""
+        return self.value
+
+    def iter(self):
+        for t in self[:1]:
+            for token in t.iter():
+                yield token
+        yield self
+        for t in self[1:]:
+            for token in t.iter():
+                yield token
+
+    def expected(self, symbol):
+        if self.symbol != symbol:
+            raise ElementPathSyntaxError("Expected %r token, found %r." % (symbol, str(self.value)))
+
+    def unexpected(self, symbol=None):
+        if not symbol or self.symbol == symbol:
+            raise ElementPathSyntaxError("Unexpected %r token." % str(self.value))
+
+
+class Parser(object):
+    symbol_table = {}
+    token_base_class = Token
+    tokenizer = None
+    SYMBOLS = ()
+
+    def __init__(self):
+        if '(end)' not in self.symbol_table or self.tokenizer is None:
+            raise ValueError("Incomplete parser class %s registration." % self.__class__.__name__)
+        self.token = None
+        self.next_token = None
+        self.match = None
+        self.tokens = iter(())
+
+    def parse(self, source):
+        try:
+            self.tokens = iter(self.tokenizer.finditer(source))
+            self.advance()
+            root_token = self.expression()
+            if self.next_token.symbol != '(end)':
+                self.next_token.unexpected()
+            return root_token
+        finally:
+            self.tokens = iter(())
+            self.next_token = None
+
+    def advance(self, symbol=None):
+        if getattr(self.next_token, 'symbol', None) == '(end)':
+            raise ElementPathSyntaxError(
+                "Unexpected end of source at position %d, after %r." % (self.match.span()[1], self.token.symbol)
+            )
+
+        self.token = self.next_token
+        if symbol and symbol not in (self.next_token.symbol, self.next_token.value):
+            self.next_token.expected(symbol)
+
+        while True:
+            try:
+                self.match = next(self.tokens)
+            except StopIteration:
+                self.next_token = self.symbol_table['(end)'](self)
+                break
+            else:
+                literal, operator, ref = self.match.groups()
+                if operator is not None:
+                    try:
+                        self.next_token = self.symbol_table[operator.replace(' ', '')](self)
+                    except KeyError:
+                        raise ElementPathSyntaxError("unknown operator %r." % operator)
+                    break
+                elif literal is not None:
+                    if literal[0] in '\'"':
+                        self.next_token = self.symbol_table['(string)'](self, literal.strip("'\""))
+                    elif '.' in literal:
+                        self.next_token = self.symbol_table['(decimal)'](self, Decimal(literal))
+                    else:
+                        self.next_token = self.symbol_table['(integer)'](self, int(literal))
+                    break
+                elif ref is not None:
+                    self.next_token = self.symbol_table['(ref)'](self, ref)
+                    break
+                elif str(self.match.group()).strip():
+                    raise ElementPathSyntaxError("unexpected token: %r" % self.match)
+
+        return self.next_token
+
+    def expression(self, rbp=0):
+        """
+        Recursive expression parser for expressions. Calls token.nud() and then
+        advance until the right binding power is less the left binding power of
+        the next token, invoking the led() method on the following token.
+
+        :param rbp: right binding power for the expression.
+        :return: left token.
+        """
+        token = self.next_token
+        self.advance()
+        left = token.nud()
+        while rbp < self.next_token.lbp:
+            token = self.next_token
+            self.advance()
+            left = token.led(left)
+        return left
+
+    @classmethod
+    def begin(cls):
+        """
+        Begin the symbol registration. Helper functions are bound to global names.
+        """
+        cls.tokenizer = None
+        globals().update({
+            'register': cls.register,
+            'literal': cls.literal,
+            'prefix': cls.prefix,
+            'infix': cls.infix,
+            'infixr': cls.infixr,
+            'method': cls.method,
+        })
+
+    @classmethod
+    def end(cls):
+        """
+        End the symbol registration. Registers the special (end) symbol and sets the tokenizer.
+        """
+        cls.register('(end)')
+        cls.tokenizer = create_tokenizer(
+            s for s in cls.symbol_table
+            if s.strip() not in {'(end)', '(ref)', '(string)', '(decimal)', '(integer)'}
+        )
+
+    @classmethod
+    def register(cls, symbol, **kwargs):
+        """
+        Register/update a token class in the symbol table.
+
+        :param symbol: The identifier symbol for the or an existent token class.
+        :param kwargs: Optional attributes/methods for the token class.
+        :return: A token class.
+        """
+        try:
+            try:
+                symbol = symbol.strip()
+            except AttributeError:
+                assert issubclass(symbol, cls.token_base_class), \
+                    "A %r subclass requested, not %r." % (cls.token_base_class, symbol)
+                symbol, token_class = symbol.symbol, symbol
+                if symbol not in cls.symbol_table:
+                    cls.symbol_table[symbol] = token_class
+                else:
+                    assert cls.symbol_table[symbol] is token_class, \
+                        "The registered instance for %r is not %r." % (symbol, token_class)
+            else:
+                token_class = cls.symbol_table[symbol]
+
+        except KeyError:
+            name = '_%s_%s' % (symbol, cls.token_base_class.__name__)
+            kwargs['symbol'] = symbol
+            token_class = ABCMeta(name, (cls.token_base_class,), kwargs)
+            cls.symbol_table[symbol] = token_class
+            cls.tokenizer = None
+            ABCMeta.register(MutableSequence, token_class)
+        else:
+            for key, value in kwargs.items():
+                if key == 'lbp' and value > token_class.lbp:
+                    token_class.lbp = value
+                elif callable(value):
+                    setattr(token_class, key, value)
+
+        return token_class
+
+    @classmethod
+    def unregistered(cls):
+        if cls.SYMBOLS:
+            return [s for s in cls.SYMBOLS if s not in cls.symbol_table]
+
+    @classmethod
+    def symbol(cls, s):
+        return cls.register(s)
+
+    @classmethod
+    def literal(cls, symbol, bp=0):
+        def nud(self):
+            return self
+        return cls.register(symbol, lbp=bp, nud=nud)
+
+    @classmethod
+    def prefix(cls, symbol, bp=0):
+        def nud(self):
+            self[0:] = self.parser.expression(rbp=bp),
+            return self
+        return cls.register(symbol, lbp=bp, rbp=bp, nud=nud)
+
+    @classmethod
+    def infix(cls, symbol, bp=0):
+        def led(self, left):
+            self[0:1] = left, self.parser.expression(rbp=bp)
+            return self
+        return cls.register(symbol, lbp=bp, rbp=bp, led=led)
+
+    @classmethod
+    def infixr(cls, symbol, bp=0):
+        def led(self, left):
+            self[0:1] = left, self.parser.expression(rbp=bp-1)
+            return self
+        return cls.register(symbol, lbp=bp, rbp=bp-1, led=led)
+
+    @classmethod
+    def method(cls, symbol, bp=0):
+        token_class = cls.register(symbol, lbp=bp, rbp=bp)
+
+        def bind(func):
+            assert callable(getattr(token_class, func.__name__, None)), \
+                "The name %r does not match with a callable of %r." % (func.__name__, token_class)
+            setattr(token_class, func.__name__, func)
+            return func
+        return bind
--- a/elementpath/xpath1.py
+++ b/elementpath/xpath1.py
@ -0,0 +1,588 @@
+# -*- coding: utf-8 -*-
+#
+# Copyright (c), 2018, SISSA (International School for Advanced Studies).
+# All rights reserved.
+# This file is distributed under the terms of the MIT License.
+# See the file 'LICENSE' in the root directory of the present
+# distribution, or http://opensource.org/licenses/MIT.
+#
+# @author Davide Brunato <brunato@sissa.it>
+#
+from .exceptions import ElementPathSyntaxError, ElementPathValueError
+from .todp_parser import Token, Parser
+
+
+class XPathToken(Token):
+
+    def iter_select(self, context):
+        return self.sed(context, [context])
+
+    def sed(self, context, results):
+        """Select denotation"""
+        raise ElementPathSyntaxError("Undefined operator for %r." % self.symbol)
+
+    @staticmethod
+    def iselement(elem):
+        return hasattr(elem, 'tag') and hasattr(elem, 'attrib') and hasattr(elem, 'text')
+
+
+class XPath1Parser(Parser):
+    """
+    XPath 1.0 expression parser class.
+
+    :param namespaces: optional prefix to namespace map.
+    """
+    token_base_class = XPathToken
+    symbol_table = {k: v for k, v in Parser.symbol_table.items()}
+    SYMBOLS = (
+        'processing-instruction(', 'descendant-or-self::', 'following-sibling::',
+        'preceding-sibling::', 'ancestor-or-self::', 'descendant::', 'attribute::',
+        'following::', 'namespace::', 'preceding::', 'ancestor::', 'comment(', 'parent::',
+        'child::', 'self::', 'text(', 'node(', 'and', 'mod', 'div', 'or',
+        '..', '//', '!=', '<=', '>=', '(', ')', '[', ']', '.', '@', ',', '/', '|', '*',
+        '-', '=', '+', '<', '>', '(:', ':)',
+
+        # XPath Core function library
+        'last(', 'position(', 'count(', 'id(', 'local-name(',   # Node set functions
+        'namespace-uri(', 'name(',
+        'string(', 'concat(', 'starts-with(', 'contains(',      # String functions
+        'substring-before(', 'substring-after(', 'substring(',
+        'string-length(', 'normalize-space(', 'translate(',
+        'boolean(', 'not(', 'true(', 'false('                   # Boolean functions
+    )
+    RELATIVE_PATH_SYMBOLS = {s for s in SYMBOLS if s.endswith("::")} | {
+        '(integer)', '(string)', '(decimal)', '(ref)', '*', '@', '..', '.', '(', '/'
+    }
+
+    def __init__(self, namespaces=None):
+        super(XPath1Parser, self).__init__()
+        self.namespaces = namespaces if namespaces is not None else {}
+
+    @property
+    def version(self):
+        return '1.0'
+
+    @classmethod
+    def begin(cls):
+        super(XPath1Parser, cls).begin()
+        globals().update({'selector': cls.selector})
+
+    @classmethod
+    def selector(cls, symbol, bp=0):
+        def sed_(self, _context, results):
+            for elem in results:
+                if elem is not None:
+                    yield self.value
+        return cls.register(symbol, lbp=bp, rbp=bp, sed=sed_)
+
+    def parse(self, path):
+        if not path:
+            raise ElementPathSyntaxError("empty XPath expression.")
+        elif path[-1] == '/':
+            raise ElementPathSyntaxError("invalid path: %r" % path)
+        if path[:1] == "/":
+            path = "." + path
+        return super(XPath1Parser, self).parse(path)
+
+    def map_reference(self, ref):
+        """
+        Map a reference into a fully qualified name using the instance namespace map.
+
+        :param ref: a local name, a prefixed name or a fully qualified name.
+        :return: String with a FQN or a local name.
+        """
+        if ref and ref[0] == '{':
+            return ref
+
+        try:
+            ns_prefix, name = ref.split(':')
+        except ValueError:
+            if ':' in ref:
+                raise ElementPathValueError("wrong format for reference name %r" % ref)
+            try:
+                uri = self.namespaces['']
+            except KeyError:
+                return ref
+            else:
+                return u'{%s}%s' % (uri, ref) if uri else ref
+        else:
+            if not ns_prefix or not name:
+                raise ElementPathValueError("wrong format for reference name %r" % ref)
+            try:
+                uri = self.namespaces[ns_prefix]
+            except KeyError:
+                raise ElementPathValueError("prefix %r not found in namespace map" % ns_prefix)
+            else:
+                return u'{%s}%s' % (uri, name) if uri else name
+
+
+##
+# XPath1 definitions
+XPath1Parser.begin()
+
+register = XPath1Parser.register
+literal = XPath1Parser.literal
+prefix = XPath1Parser.prefix
+infix = XPath1Parser.infix
+method = XPath1Parser.method
+selector = XPath1Parser.selector
+
+
+# Comments
+@method('(:')
+def nud(self):
+    comment_level = 1
+    value = []
+    while comment_level:
+        self.parser.advance()
+        token = self.parser.token
+        if token.symbol == ':)':
+            comment_level -= 1
+            if comment_level:
+                value.append(token.value)
+        elif token.symbol == '(:':
+            comment_level += 1
+            value.append(token.value)
+        else:
+            value.append(token.value)
+    self.value = ' '.join(value)
+    return self
+register(':)')
+
+
+###
+# Axes
+@method('child::', bp=80)
+def nud(self):
+    if self.parser.next_token not in ('(ref)', '*', 'text(', 'node('):
+        raise ElementPathSyntaxError("invalid child axis %r." % self.parser.next_token)
+    self[0:] = self.parser.expression(80),
+    return self
+
+@method('child::')
+def sed(self, context, results):
+    for elem in results:
+        if self.iselement(elem):
+            for e in elem:
+                yield e
+
+
+
+selector(literal('(string)'))
+selector(literal('(decimal)'))
+selector(literal('(integer)'))
+
+
+@method(literal('(ref)'))
+def nud(self):
+    if self.value[0] != '{' and ':' in self.value:
+        self.value = self.parser.map_reference(self.value)
+    return self
+
+
+@method('(ref)')
+@method('*')
+def sed(self, _context, results):
+    """Children selector."""
+    for elem in results:
+        if elem is not None:
+            for e in elem:
+                if self.value is None or e.tag == self.value:
+                    yield e
+
+
+@method('*')
+def nud(self):
+    if self.parser.next_token.symbol not in ('/', '[', '(end)', ')'):
+        self.parser.next_token.unexpected()
+    self.value = None
+    return self
+
+
+@method(infix('*', bp=45))
+def led(self, left):
+    self[0:1] = left, self.parser.expression(45)
+    self.value = left.value + self[1].value
+    return self
+
+
+@method('@')
+@method('attribute::')
+def nud(self):
+    self[0:] = self.parser.expression(),
+    if self[0].symbol not in ('*', '(ref)'):
+        raise ElementPathSyntaxError("invalid attribute specification for XPath.")
+    if self.parser.next_token.symbol == '=':
+        self.parser.advance('=')
+        self[0][0:] = self.parser.advance('(string)'),
+    return self
+
+
+@selector('@')
+@selector('attribute::')
+def sed(self, _context, results):
+    """
+    Attribute selector.
+    """
+    if self[0].symbol != '=':
+        # @attribute
+        key = self.value
+        if key is None:
+            for elem in results:
+                if elem is not None:
+                    for attr in elem.attrib.values():
+                        yield attr
+        elif '{' == key[0]:
+            for elem in results:
+                if elem is not None and key in elem.attrib:
+                    yield elem.attrib[key]
+        else:
+            for elem in results:
+                if elem is None:
+                    continue
+                elif key in elem.attrib:
+                    yield elem.attrib[key]
+    else:
+        # @attribute='value'
+        key = self.value
+        value = self[0].value
+        if key is not None:
+            for elem in results:
+                if elem is not None:
+                    yield elem.get(key) == value
+        else:
+            for elem in results:
+                if elem is not None:
+                    for attr in elem.attrib.values():
+                        yield attr == value
+
+
+# [tag='value']
+@selector('unknown')
+def sed(self, _context, results):
+    for elem in results:
+        if elem is not None:
+            for e in elem.findall(self.symbol):
+                if "".join(e.itertext()) == self.value:
+                    yield elem
+                    break
+
+
+@method(infix('or', bp=20))
+@method(infix('|', bp=50))
+@method(infix('union', bp=50))
+def sed(self, context, results):
+    left_results = list(self[0].sed(context, results))
+    right_results = list(self[1].sed(context, results))
+    for elem in left_results:
+        yield elem
+    for elem in right_results:
+        yield elem
+
+
+@method(infix('and', bp=25))
+def sed(self, context, results):
+    right_results = set(self[1].sed(context, results))
+    for elem in self[0].sed(context, results):
+        if elem in right_results:
+            yield elem
+
+
+# prefix('=', bp=30)
+# prefix('<', bp=30)
+# prefix('>', bp=30)
+# prefix('!=', bp=30)
+# prefix('<=', bp=30)
+# prefix('>=', bp=30)
+
+infix('=', bp=30)
+infix('<', bp=30)
+infix('>', bp=30)
+infix('!=', bp=30)
+infix('<=', bp=30)
+infix('>=', bp=30)
+
+
+@method('+')
+def nud(self):
+    self[0:] = self.parser.expression(75),
+    if not isinstance(self[0].value, int):
+        raise ElementPathSyntaxError("an integer value is required: %r." % self[0])
+    self.value = self[0].value
+    return self
+
+
+@method(infix('+', bp=40))
+def led(self, left):
+    self[0:1] = left, self.parser.expression(40)
+    self.value = self[0].value + self[1].value
+    return self
+
+
+@method('-')
+def nud(self):
+    self[0:] = self.parser.expression(75),
+    if not isinstance(self[0].value, int):
+        raise ElementPathSyntaxError("an integer value is required: %r." % self[0])
+    self.value = - self[0].value
+    return self
+
+
+@method(infix('-', bp=40))
+def led(self, left):
+    self[0:1] = left, self.parser.expression(40)
+    self.value = self[0].value - self[1].value
+    return self
+
+
+infix('div', bp=45)
+infix('mod', bp=45)
+
+
+
+@method('self::', bp=60)
+def sed(self, _context, results):
+    """Self selector."""
+    for elem in results:
+        yield elem
+
+
+@method(literal('.', bp=60))
+def sed(self, _context, results):
+    """Self node selector."""
+    for elem in results:
+        if self.iselement(elem):
+            yield elem
+
+
+# @register_nud('parent::node()', bp=60)
+@method(prefix('..', bp=60))
+def sed(_self, context, results):
+    """Parent selector."""
+    parent_map = context.parent_map
+    results_parents = []
+    for elem in results:
+        try:
+            parent = parent_map[elem]
+        except KeyError:
+            pass
+        else:
+            if parent not in results_parents:
+                results_parents.append(parent)
+                yield parent
+
+
+# @register_nud('ancestor::', bp=60)
+# def parent_token_nud(self):
+#    self.sed = self.parent_selector()
+#    return self
+
+
+@method('/')
+def nud(self):
+    self.parser.token.unexpected()
+
+
+@method('/', bp=80)
+def led(self, left):
+    self[0:1] = left, self.parser.expression(100)
+    if self[1].symbol not in self.parser.RELATIVE_PATH_SYMBOLS:
+        raise ElementPathSyntaxError("invalid child %r." % self[1])
+    return self
+
+
+@method('/')
+def sed(self, context, results):
+    results = self[0].sed(context, results)
+    return self[1].sed(context, results)
+
+
+
+@method('//', bp=80)
+def led(self, left):
+    self[0:1] = left, self.parser.expression(100)
+    if self[1].symbol not in self.parser.RELATIVE_PATH_SYMBOLS:
+        raise ElementPathSyntaxError("invalid descendant %r." % self[1])
+    if self[0].symbol in ('*', '(ref)'):
+        delattr(self[0], 'sed')
+        self.value = self[0].value
+    else:
+        self.value = None
+    return self
+
+
+@method('//')
+def sed(self, context, results):
+    """Descendants selector."""
+    results = self[0].sed(context, results)
+    for elem in results:
+        if elem is not None:
+            for e in elem.iter(self[1].value):
+                if e is not elem:
+                    yield e
+
+
+@method('(', bp=90)
+def nud(self):
+    self.parser.next_token.unexpected(')')
+    self[0:] = self.parser.expression(),
+    self.parser.advance(')')
+    return self[0]
+
+
+@method(')')
+def nud(self):
+    self.parser.token.unexpected()
+
+
+@method(')')
+def led(self):
+    self.parser.token.unexpected()
+
+
+@method('[', bp=90)
+def nud(self):
+    self.parser.token.unexpected()
+
+
+@method('[', bp=90)
+def led(self, left):
+    self.parser.next_token.unexpected(']')
+    self[0:1] = left, self.parser.expression()
+    self.parser.advance(']')
+    return self
+
+
+@method('[')
+def sed(self, context, results):
+    """Predicate selector."""
+    results = self[0].sed(context, results)
+    if isinstance(self[1].value, int):
+        # subscript predicate
+        value = self[1].value
+        if value > 0:
+            index = value - 1
+        elif value == 0 or self[1].symbol not in ('last(', 'position('):
+            index = None
+        else:
+            index = value
+
+        if index is not None:
+            try:
+                yield [elem for elem in results][index]
+            except IndexError:
+                return
+    else:
+        for elem in results:
+            if elem is not None:
+                predicate_results = list(self[1].sed(context, [elem]))
+                if predicate_results and any(predicate_results):
+                    yield elem
+
+
+register(']')
+# @register_nud(']')
+# @register_led(']')
+# def predicate_close_token(self, *_args, **_kwargs):
+#    self.parser.token.unexpected(']')
+
+
+@method('last(')
+def nud(self):
+    self.parser.advance(')')
+    if self.parser.next_token.symbol == '-':
+        self.parser.advance('-')
+        self[0:] = self.parser.advance('(integer)'),
+        self.value = -1 - self[0].value
+    else:
+        self.value = -1
+    return self
+
+
+@method('position(')
+def nud(self):
+    self.parser.advance(')')
+    self.parser.advance('=')
+    self[0:] = self.parser.expression(90),
+    if not isinstance(self[0].value, int):
+        raise ElementPathSyntaxError("an integer expression is required: %r." % self[0].value)
+    self.value = self[0].value
+    return self
+
+
+@method('boolean(')
+def nud(self):
+    """
+    Syntax: boolean(expression) --> boolean
+    """
+    self.parser.next_token.unexpected(')')
+    self[0:] = self.parser.expression(),
+    self.parser.advance(')')
+    print("Value:", self[0].value, self[0].sed)
+    self.sed = self.function_selector()
+    self.value = bool(self[0].value)
+    return self
+
+
+@method('text(')
+def nud(self):
+    self.parser.advance(')')
+    return self
+
+
+@method('text(')
+def sed(self, context, results):
+    for elem in results:
+        if self.iselement(elem):
+            if elem.text is not None:
+                yield elem.text
+            if elem.tail is not None:
+                yield elem.tail
+
+
+@method('node(')
+def nud(self):
+    self.parser.advance(')')
+    return self
+
+
+@method('node(')
+def sed(self, context, results):
+    for elem in results:
+        if self.iselement(elem):
+            yield elem
+
+
+@method('not(')
+def nud(self):
+    """
+    Syntax: not(expression) --> boolean
+    """
+    self.parser.next_token.unexpected(')')
+    self[0:] = self.parser.expression(),
+    self.parser.advance(')')
+    self.value = not bool(self[0].value)
+    return self
+
+
+@method('true(')
+def nud(self):
+    """
+    Syntax: true() --> boolean (true)
+    """
+    self.parser.advance(')')
+    self.value = True
+    return self
+
+
+@method('false(')
+def nud(self):
+    """
+    Syntax: false() --> boolean (false)
+    """
+    self.parser.advance(')')
+    self.value = False
+    return self
+
+
+XPath1Parser.end()
--- a/elementpath/xpath2.py
+++ b/elementpath/xpath2.py
@ -0,0 +1,24 @@
+# -*- coding: utf-8 -*-
+#
+# Copyright (c), 2018, SISSA (International School for Advanced Studies).
+# All rights reserved.
+# This file is distributed under the terms of the MIT License.
+# See the file 'LICENSE' in the root directory of the present
+# distribution, or http://opensource.org/licenses/MIT.
+#
+# @author Davide Brunato <brunato@sissa.it>
+#
+from .xpath1 import XPath1Parser
+
+
+class XPath2Parser(XPath1Parser):
+    """
+    XPath 2.0 expression parser class.
+    """
+    symbol_table = {k: v for k, v in XPath1Parser.symbol_table.items()}
+    SYMBOLS = XPath1Parser.SYMBOLS + ('union', 'intersect')
+    RELATIVE_PATH_SYMBOLS = XPath1Parser.RELATIVE_PATH_SYMBOLS | {s for s in SYMBOLS if s.endswith("::")}
+
+    @property
+    def version(self):
+        return '2.0'
--- a/requirements-dev.txt
+++ b/requirements-dev.txt
@ -0,0 +1,6 @@
+# Requirements for setup a development environment
+setuptools
+lxml
+Sphinx
+sphinx_rtd_theme
+-e .
--- a/setup.cfg
+++ b/setup.cfg
@ -0,0 +1,5 @@
+[wheel]
+universal = 1
+
+[metadata]
+license_file = LICENSE
--- a/setup.py
+++ b/setup.py
@ -0,0 +1,42 @@
+# -*- coding: utf-8 -*-
+#
+# Copyright (c), 2018, SISSA (International School for Advanced Studies).
+# All rights reserved.
+# This file is distributed under the terms of the MIT License.
+# See the file 'LICENSE' in the root directory of the present
+# distribution, or http://opensource.org/licenses/MIT.
+#
+# @author Davide Brunato <brunato@sissa.it>
+#
+from setuptools import setup
+
+with open("README.rst") as readme:
+    long_description = readme.read()
+
+setup(
+    name='elementpath',
+    version='0.9.20',
+    packages=['elementpath'],
+    author='Davide Brunato',
+    author_email='brunato@sissa.it',
+    url='https://github.com/brunato/elementpath',
+    license='MIT',
+    description='XPath parsers and selectors for ElementTree.',
+    long_description=long_description,
+    classifiers=[
+        'Development Status :: 5 - Production/Stable',
+        'Intended Audience :: Developers',
+        'Intended Audience :: Information Technology',
+        'Intended Audience :: Science/Research',
+        'License :: OSI Approved :: MIT License',
+        'Operating System :: OS Independent',
+        'Programming Language :: Python',
+        'Programming Language :: Python :: 2.7',
+        'Programming Language :: Python :: 3.3',
+        'Programming Language :: Python :: 3.4',
+        'Programming Language :: Python :: 3.5',
+        'Programming Language :: Python :: 3.6',
+        'Programming Language :: Python :: Implementation :: CPython',
+        'Topic :: Software Development :: Libraries'
+    ]
+)
--- a/test_elementpath.py
+++ b/test_elementpath.py
@ -0,0 +1,97 @@
+#!/usr/bin/env python
+# -*- coding: utf-8 -*-
+#
+# Copyright (c), 2018, SISSA (International School for Advanced Studies).
+# All rights reserved.
+# This file is distributed under the terms of the MIT License.
+# See the file 'LICENSE' in the root directory of the present
+# distribution, or http://opensource.org/licenses/MIT.
+#
+# @author Davide Brunato <brunato@sissa.it>
+#
+import unittest
+import os
+from xml.etree import ElementTree
+import lxml.etree
+
+from elementpath import *
+
+
+class TokenizerTest(unittest.TestCase):
+
+    def test_xpath_tokenizer(self):
+        def check(path, expected):
+            self.assertEqual([
+                lit or op or ref for lit, op, ref in XPath1Parser.tokenizer.findall(path)
+            ], expected)
+
+        # tests from the XPath specification
+        check("*", ['*'])
+        check("text()", ['text(', ')'])
+        check("@name", ['@', 'name'])
+        check("@*", ['@', '*'])
+        check("para[1]", ['para', '[', '1', ']'])
+        check("para[last()]", ['para', '[', 'last(', ')', ']'])
+        check("*/para", ['*', '/', 'para'])
+        check("/doc/chapter[5]/section[2]",
+              ['/', 'doc', '/', 'chapter', '[', '5', ']',
+               '/', 'section', '[', '2', ']'])
+        check("chapter//para", ['chapter', '//', 'para'])
+        check("//para", ['//', 'para'])
+        check("//olist/item", ['//', 'olist', '/', 'item'])
+        check(".", ['.'])
+        check(".//para", ['.', '//', 'para'])
+        check("..", ['..'])
+        check("../@lang", ['..', '/', '@', 'lang'])
+        check("chapter[title]", ['chapter', '[', 'title', ']'])
+        check("employee[@secretary and @assistant]", ['employee',
+              '[', '@', 'secretary', '', 'and', '', '@', 'assistant', ']'])
+
+        # additional tests from Python XML etree test cases
+        check("{http://spam}egg", ['{http://spam}egg'])
+        check("./spam.egg", ['.', '/', 'spam.egg'])
+        check(".//{http://spam}egg", ['.', '//', '{http://spam}egg'])
+
+        # additional tests
+        check("(: this is a comment :)", ['(:', '', 'this', '', 'is', '', 'a', '', 'comment', '', ':)'])
+
+
+class XPath1ParserTest(unittest.TestCase):
+
+    @classmethod
+    def setUpClass(cls):
+        cls.parser = XPath1Parser()
+
+    def test_xpath_comment(self):
+        token = self.parser.parse("(: this is a comment :)")
+        print(token)
+        token = self.parser.parse("(: this is a (: nested :) comment :)")
+        print(token)
+
+
+
+
+class ElementTreeTest(unittest.TestCase):
+
+    @classmethod
+    def setUpClass(cls):
+        cls.XML = ElementTree.XML
+
+    def _test_rel_xpath_boolean(self):
+        root = self.XML('<A><B><C/></B></A>')
+        el = root[0]
+        print(list(XPathSelector('boolean(D)').iter_select(el)))
+        self.assertTrue(XPathSelector('boolean(C)').iter_select(el))
+        self.assertFalse(next(XPathSelector('boolean(D)').iter_select(el)))
+
+
+class LxmlEtreeTest(unittest.TestCase):
+
+    @classmethod
+    def setUpClass(cls):
+        cls.test_dir = os.path.dirname(__file__)
+        cls.XML = lxml.etree.XML
+
+
+if __name__ == '__main__':
+    unittest.main()