Improve XPath bindings

- Extend ElementPathMixin with instance _xpath_parser - Add xpath_tokens dictionary to schema objects - Fix serialization removing xpath_tokens classes
2019-10-09 14:59:42 +02:00 · 2019-10-09 14:59:42 +02:00 · 75664150e6
parent 690a172502
commit 75664150e6
5 changed files with 109 additions and 60 deletions
--- a/xmlschema/validators/assertions.py
+++ b/xmlschema/validators/assertions.py
@ -57,8 +57,6 @@ class XsdAssert(XsdComponent, ElementPathMixin):
        else:
            self.xpath_default_namespace = self.schema.xpath_default_namespace

-        self.xpath_proxy = XMLSchemaProxy(self.schema, self)
-
    @property
    def built(self):
        return self.token is not None and (self.base_type.parent is None or self.base_type.built)
@ -77,7 +75,11 @@ class XsdAssert(XsdComponent, ElementPathMixin):
            variables = None

        self.parser = XPath2Parser(
-            self.namespaces, variables, False, self.xpath_default_namespace, schema=self.xpath_proxy
+            namespaces=self.namespaces,
+            variables=variables,
+            strict=False,
+            default_namespace=self.xpath_default_namespace,
+            schema=XMLSchemaProxy(self.schema, self)
        )

        try:
@ -89,6 +91,8 @@ class XsdAssert(XsdComponent, ElementPathMixin):
    def __call__(self, elem, value=None, source=None, namespaces=None, **kwargs):
        if value is not None:
            self.parser.variables['value'] = self.base_type.text_decode(value)
+        if not self.parser.is_schema_bound():
+            self.parser.schema.bind_parser(self.parser)

        if source is None:
            context = XPathContext(root=elem)
@ -96,7 +100,6 @@ class XsdAssert(XsdComponent, ElementPathMixin):
            context = XPathContext(root=source.root, item=elem)

        default_namespace = self.parser.namespaces['']
-
        if namespaces and '' in namespaces:
            self.parser.namespaces[''] = namespaces['']

--- a/xmlschema/validators/elements.py
+++ b/xmlschema/validators/elements.py
@ -96,8 +96,11 @@ class XsdElement(XsdComponent, ValidationMixin, ParticleMixin, ElementPathMixin)

    def __setattr__(self, name, value):
        if name == "type":
-            assert value is None or isinstance(value, XsdType), "Wrong value %r for attribute 'type'." % value
-            self.attributes = self.get_attributes(value)
+            assert value is None or isinstance(value, XsdType)
+            try:
+                self.attributes = value.attributes
+            except AttributeError:
+                self.attributes = self.schema.create_empty_attribute_group(self)
        super(XsdElement, self).__setattr__(name, value)

    def __iter__(self):
@ -105,6 +108,10 @@ class XsdElement(XsdComponent, ValidationMixin, ParticleMixin, ElementPathMixin)
            for e in self.type.content_type.iter_elements():
                yield e

+    @property
+    def xpath_proxy(self):
+        return XMLSchemaProxy(self.schema, self)
+
    def _parse(self):
        XsdComponent._parse(self)
        self._parse_attributes()
@ -112,7 +119,6 @@ class XsdElement(XsdComponent, ValidationMixin, ParticleMixin, ElementPathMixin)
        self._parse_identity_constraints(index)
        if self.parent is None and 'substitutionGroup' in self.elem.attrib:
            self._parse_substitution_group(self.elem.attrib['substitutionGroup'])
-        self.xpath_proxy = XMLSchemaProxy(self.schema, self)

    def _parse_attributes(self):
        self._parse_particle(self.elem)
@ -390,7 +396,7 @@ class XsdElement(XsdComponent, ValidationMixin, ParticleMixin, ElementPathMixin)
        try:
            return xsd_type.attributes
        except AttributeError:
-            return self.schema.empty_attribute_group
+            return self.attributes

    def get_path(self, ancestor=None, reverse=False):
        """
@ -848,8 +854,6 @@ class Xsd11Element(XsdElement):
        if any(v.inheritable for v in self.attributes.values()):
            self.inheritable = {k: v for k, v in self.attributes.items() if v.inheritable}

-        self.xpath_proxy = XMLSchemaProxy(self.schema, self)
-
    def _parse_alternatives(self, index=0):
        if self.ref is not None:
            self.alternatives = self.ref.alternatives
--- a/xmlschema/validators/schema.py
+++ b/xmlschema/validators/schema.py
@ -216,8 +216,6 @@ class XMLSchemaBase(XsdValidator, ValidationMixin, ElementPathMixin):
    :vartype maps: XsdGlobals
    :ivar converter: the default converter used for XML data decoding/encoding.
    :vartype converter: XMLSchemaConverter
-    :ivar xpath_proxy: a proxy for XPath operations on schema components.
-    :vartype xpath_proxy: XMLSchemaProxy
    :ivar locations: schema location hints.
    :vartype locations: NamespaceResourcesMap
    :ivar namespaces: a dictionary that maps from the prefixes used by the schema into namespace URI.
@ -340,10 +338,7 @@ class XMLSchemaBase(XsdValidator, ValidationMixin, ElementPathMixin):

        self.locations = NamespaceResourcesMap(self.source.get_locations(locations))
        self.converter = self.get_converter(converter)
-        self.xpath_proxy = XMLSchemaProxy(self)
-        self.empty_attribute_group = self.BUILDERS.attribute_group_class(
-            etree_element(XSD_ATTRIBUTE_GROUP), self, self
-        )
+        self.xpath_tokens = {}

        # Create or set the XSD global maps instance
        if self.meta_schema is None:
@ -416,6 +411,16 @@ class XMLSchemaBase(XsdValidator, ValidationMixin, ElementPathMixin):
            if loglevel is not None:
                logger.setLevel(logging.WARNING)  # Restore default logging

+    def __getstate__(self):
+        state = self.__dict__.copy()
+        del state['xpath_tokens']
+        state.pop('_xpath_parser', None)
+        return state
+
+    def __setstate__(self, state):
+        self.__dict__.update(state)
+        self.xpath_tokens = {}
+
    def __repr__(self):
        if self.url:
            basename = os.path.basename(self.url)
@ -457,6 +462,10 @@ class XMLSchemaBase(XsdValidator, ValidationMixin, ElementPathMixin):
    def __len__(self):
        return len(self.elements)

+    @property
+    def xpath_proxy(self):
+        return XMLSchemaProxy(self)
+
    @property
    def xsd_version(self):
        """Property that returns the class attribute XSD_VERSION."""
--- a/xmlschema/validators/wildcards.py
+++ b/xmlschema/validators/wildcards.py
@ -395,10 +395,13 @@ class XsdAnyElement(XsdWildcard, ParticleMixin, ElementPathMixin):
                self.__class__.__name__, self.not_namespace, self.process_contents, self.occurs
            )

+    @property
+    def xpath_proxy(self):
+        return XMLSchemaProxy(self.schema, self)
+
    def _parse(self):
        super(XsdAnyElement, self)._parse()
        self._parse_particle(self.elem)
-        self.xpath_proxy = XMLSchemaProxy(self.schema, self)

    def match(self, name, default_namespace=None, resolve=False, **kwargs):
        """
--- a/xmlschema/xpath.py
+++ b/xmlschema/xpath.py
@ -93,6 +93,20 @@ class XMLSchemaProxy(AbstractSchemaProxy):
            except AttributeError:
                raise XMLSchemaTypeError("%r is not an XsdElement" % base_element)

+    def bind_parser(self, parser):
+        if parser.schema is not self:
+            parser.schema = self
+
+        try:
+            parser.symbol_table = self._schema.xpath_tokens[parser.__class__]
+        except KeyError:
+            parser.symbol_table = parser.__class__.symbol_table.copy()
+            self._schema.xpath_tokens[parser.__class__] = parser.symbol_table
+            for xsd_type in self.iter_atomic_types():
+                parser.schema_constructor(xsd_type.name)
+
+        parser.tokenizer = parser.create_tokenizer(parser.symbol_table)
+
    def get_context(self):
        return XMLSchemaContext(root=self._schema, item=self._base_element)

@ -166,7 +180,13 @@ class ElementPathMixin(Sequence):
    attributes = {}
    namespaces = {}
    xpath_default_namespace = None
-    xpath_proxy = None
+
+    _xpath_parser = None  # Internal XPath 2.0 parser, instantiated at first use.
+
+    def __getstate__(self):
+        state = self.__dict__.copy()
+        state.pop('_xpath_parser', None)
+        return state

    @abstractmethod
    def __iter__(self):
@ -198,48 +218,56 @@ class ElementPathMixin(Sequence):
        """Gets an Element attribute. For compatibility with the ElementTree API."""
        return self.attributes.get(key, default)

-    def iterfind(self, path, namespaces=None):
+    @property
+    def xpath_proxy(self):
+        """Returns an XPath proxy instance bound with the schema."""
+        raise NotImplementedError
+
+    def _rebind_xpath_parser(self):
+        """Rebind XPath 2 parser with schema component."""
+        if self._xpath_parser is not None:
+            self._xpath_parser.schema.bind_parser(self._xpath_parser)
+
+    def _get_xpath_namespaces(self, namespaces=None):
        """
-        Creates and iterator for all XSD subelements matching the path.
+        Returns a dictionary with namespaces for XPath selection.

-        :param path: an XPath expression that considers the XSD component as the root element.
-        :param namespaces: is an optional mapping from namespace prefix to full name.
-        :return: an iterable yielding all matching XSD subelements in document order.
+        :param namespaces: an optional map from namespace prefix to namespace URI. \
+        If this argument is not provided the schema's namespaces are used.
        """
-        path = path.strip()
-        if path.startswith('/') and not path.startswith('//'):
-            path = ''.join(['/', XSD_SCHEMA, path])
-        if namespaces is None:
-            namespaces = {k: v for k, v in self.namespaces.items() if k}
-
-        parser = XPath2Parser(namespaces, strict=False, schema=self.xpath_proxy,
-                              default_namespace=self.xpath_default_namespace)
-        root_token = parser.parse(path)
-        context = XMLSchemaContext(self)
-        return root_token.select_results(context)
-
-    def find(self, path, namespaces=None):
-        """
-        Finds the first XSD subelement matching the path.
-
-        :param path: an XPath expression that considers the XSD component as the root element.
-        :param namespaces: an optional mapping from namespace prefix to full name.
-        :return: The first matching XSD subelement or ``None`` if there is not match.
-        """
-        path = path.strip()
-        if path.startswith('/') and not path.startswith('//'):
-            path = ''.join(['/', XSD_SCHEMA, path])
-
        if namespaces is None:
            namespaces = {k: v for k, v in self.namespaces.items() if k}
            namespaces[''] = self.xpath_default_namespace
        elif '' not in namespaces:
            namespaces[''] = self.xpath_default_namespace

-        parser = XPath2Parser(namespaces, strict=False, schema=self.xpath_proxy)
-        root_token = parser.parse(path)
+        xpath_namespaces = XPath2Parser.DEFAULT_NAMESPACES.copy()
+        xpath_namespaces.update(namespaces)
+        return xpath_namespaces
+
+    def _xpath_parse(self, path, namespaces=None):
+        path = path.strip()
+        if path.startswith('/') and not path.startswith('//'):
+            path = ''.join(['/', XSD_SCHEMA, path])
+
+        namespaces = self._get_xpath_namespaces(namespaces)
+        if self._xpath_parser is None:
+            self._xpath_parser = XPath2Parser(namespaces, strict=False, schema=self.xpath_proxy)
+        else:
+            self._xpath_parser.namespaces = namespaces
+
+        return self._xpath_parser.parse(path)
+
+    def find(self, path, namespaces=None):
+        """
+        Finds the first XSD subelement matching the path.
+
+        :param path: an XPath expression that considers the XSD component as the root element.
+        :param namespaces: an optional mapping from namespace prefix to namespace URI.
+        :return: The first matching XSD subelement or ``None`` if there is not match.
+        """
        context = XMLSchemaContext(self)
-        return next(root_token.select_results(context), None)
+        return next(self._xpath_parse(path, namespaces).select_results(context), None)

    def findall(self, path, namespaces=None):
        """
@ -250,17 +278,19 @@ class ElementPathMixin(Sequence):
        :return: a list containing all matching XSD subelements in document order, an empty \
        list is returned if there is no match.
        """
-        path = path.strip()
-        if path.startswith('/') and not path.startswith('//'):
-            path = ''.join(['/', XSD_SCHEMA, path])
-        if namespaces is None:
-            namespaces = {k: v for k, v in self.namespaces.items() if k}
-
-        parser = XPath2Parser(namespaces, strict=False, schema=self.xpath_proxy,
-                              default_namespace=self.xpath_default_namespace)
-        root_token = parser.parse(path)
        context = XMLSchemaContext(self)
-        return root_token.get_results(context)
+        return self._xpath_parse(path, namespaces).get_results(context)
+
+    def iterfind(self, path, namespaces=None):
+        """
+        Creates and iterator for all XSD subelements matching the path.
+
+        :param path: an XPath expression that considers the XSD component as the root element.
+        :param namespaces: is an optional mapping from namespace prefix to full name.
+        :return: an iterable yielding all matching XSD subelements in document order.
+        """
+        context = XMLSchemaContext(self)
+        return self._xpath_parse(path, namespaces).select_results(context)

    def iter(self, tag=None):
        """