Improve XPath bindings

- Extend ElementPathMixin with instance _xpath_parser
  - Add xpath_tokens dictionary to schema objects
  - Fix serialization removing xpath_tokens classes
This commit is contained in:
Davide Brunato 2019-10-09 14:59:42 +02:00
parent 690a172502
commit 75664150e6
5 changed files with 109 additions and 60 deletions

View File

@ -57,8 +57,6 @@ class XsdAssert(XsdComponent, ElementPathMixin):
else:
self.xpath_default_namespace = self.schema.xpath_default_namespace
self.xpath_proxy = XMLSchemaProxy(self.schema, self)
@property
def built(self):
return self.token is not None and (self.base_type.parent is None or self.base_type.built)
@ -77,7 +75,11 @@ class XsdAssert(XsdComponent, ElementPathMixin):
variables = None
self.parser = XPath2Parser(
self.namespaces, variables, False, self.xpath_default_namespace, schema=self.xpath_proxy
namespaces=self.namespaces,
variables=variables,
strict=False,
default_namespace=self.xpath_default_namespace,
schema=XMLSchemaProxy(self.schema, self)
)
try:
@ -89,6 +91,8 @@ class XsdAssert(XsdComponent, ElementPathMixin):
def __call__(self, elem, value=None, source=None, namespaces=None, **kwargs):
if value is not None:
self.parser.variables['value'] = self.base_type.text_decode(value)
if not self.parser.is_schema_bound():
self.parser.schema.bind_parser(self.parser)
if source is None:
context = XPathContext(root=elem)
@ -96,7 +100,6 @@ class XsdAssert(XsdComponent, ElementPathMixin):
context = XPathContext(root=source.root, item=elem)
default_namespace = self.parser.namespaces['']
if namespaces and '' in namespaces:
self.parser.namespaces[''] = namespaces['']

View File

@ -96,8 +96,11 @@ class XsdElement(XsdComponent, ValidationMixin, ParticleMixin, ElementPathMixin)
def __setattr__(self, name, value):
if name == "type":
assert value is None or isinstance(value, XsdType), "Wrong value %r for attribute 'type'." % value
self.attributes = self.get_attributes(value)
assert value is None or isinstance(value, XsdType)
try:
self.attributes = value.attributes
except AttributeError:
self.attributes = self.schema.create_empty_attribute_group(self)
super(XsdElement, self).__setattr__(name, value)
def __iter__(self):
@ -105,6 +108,10 @@ class XsdElement(XsdComponent, ValidationMixin, ParticleMixin, ElementPathMixin)
for e in self.type.content_type.iter_elements():
yield e
@property
def xpath_proxy(self):
return XMLSchemaProxy(self.schema, self)
def _parse(self):
XsdComponent._parse(self)
self._parse_attributes()
@ -112,7 +119,6 @@ class XsdElement(XsdComponent, ValidationMixin, ParticleMixin, ElementPathMixin)
self._parse_identity_constraints(index)
if self.parent is None and 'substitutionGroup' in self.elem.attrib:
self._parse_substitution_group(self.elem.attrib['substitutionGroup'])
self.xpath_proxy = XMLSchemaProxy(self.schema, self)
def _parse_attributes(self):
self._parse_particle(self.elem)
@ -390,7 +396,7 @@ class XsdElement(XsdComponent, ValidationMixin, ParticleMixin, ElementPathMixin)
try:
return xsd_type.attributes
except AttributeError:
return self.schema.empty_attribute_group
return self.attributes
def get_path(self, ancestor=None, reverse=False):
"""
@ -848,8 +854,6 @@ class Xsd11Element(XsdElement):
if any(v.inheritable for v in self.attributes.values()):
self.inheritable = {k: v for k, v in self.attributes.items() if v.inheritable}
self.xpath_proxy = XMLSchemaProxy(self.schema, self)
def _parse_alternatives(self, index=0):
if self.ref is not None:
self.alternatives = self.ref.alternatives

View File

@ -216,8 +216,6 @@ class XMLSchemaBase(XsdValidator, ValidationMixin, ElementPathMixin):
:vartype maps: XsdGlobals
:ivar converter: the default converter used for XML data decoding/encoding.
:vartype converter: XMLSchemaConverter
:ivar xpath_proxy: a proxy for XPath operations on schema components.
:vartype xpath_proxy: XMLSchemaProxy
:ivar locations: schema location hints.
:vartype locations: NamespaceResourcesMap
:ivar namespaces: a dictionary that maps from the prefixes used by the schema into namespace URI.
@ -340,10 +338,7 @@ class XMLSchemaBase(XsdValidator, ValidationMixin, ElementPathMixin):
self.locations = NamespaceResourcesMap(self.source.get_locations(locations))
self.converter = self.get_converter(converter)
self.xpath_proxy = XMLSchemaProxy(self)
self.empty_attribute_group = self.BUILDERS.attribute_group_class(
etree_element(XSD_ATTRIBUTE_GROUP), self, self
)
self.xpath_tokens = {}
# Create or set the XSD global maps instance
if self.meta_schema is None:
@ -416,6 +411,16 @@ class XMLSchemaBase(XsdValidator, ValidationMixin, ElementPathMixin):
if loglevel is not None:
logger.setLevel(logging.WARNING) # Restore default logging
def __getstate__(self):
state = self.__dict__.copy()
del state['xpath_tokens']
state.pop('_xpath_parser', None)
return state
def __setstate__(self, state):
self.__dict__.update(state)
self.xpath_tokens = {}
def __repr__(self):
if self.url:
basename = os.path.basename(self.url)
@ -457,6 +462,10 @@ class XMLSchemaBase(XsdValidator, ValidationMixin, ElementPathMixin):
def __len__(self):
return len(self.elements)
@property
def xpath_proxy(self):
return XMLSchemaProxy(self)
@property
def xsd_version(self):
"""Property that returns the class attribute XSD_VERSION."""

View File

@ -395,10 +395,13 @@ class XsdAnyElement(XsdWildcard, ParticleMixin, ElementPathMixin):
self.__class__.__name__, self.not_namespace, self.process_contents, self.occurs
)
@property
def xpath_proxy(self):
return XMLSchemaProxy(self.schema, self)
def _parse(self):
super(XsdAnyElement, self)._parse()
self._parse_particle(self.elem)
self.xpath_proxy = XMLSchemaProxy(self.schema, self)
def match(self, name, default_namespace=None, resolve=False, **kwargs):
"""

View File

@ -93,6 +93,20 @@ class XMLSchemaProxy(AbstractSchemaProxy):
except AttributeError:
raise XMLSchemaTypeError("%r is not an XsdElement" % base_element)
def bind_parser(self, parser):
if parser.schema is not self:
parser.schema = self
try:
parser.symbol_table = self._schema.xpath_tokens[parser.__class__]
except KeyError:
parser.symbol_table = parser.__class__.symbol_table.copy()
self._schema.xpath_tokens[parser.__class__] = parser.symbol_table
for xsd_type in self.iter_atomic_types():
parser.schema_constructor(xsd_type.name)
parser.tokenizer = parser.create_tokenizer(parser.symbol_table)
def get_context(self):
return XMLSchemaContext(root=self._schema, item=self._base_element)
@ -166,7 +180,13 @@ class ElementPathMixin(Sequence):
attributes = {}
namespaces = {}
xpath_default_namespace = None
xpath_proxy = None
_xpath_parser = None # Internal XPath 2.0 parser, instantiated at first use.
def __getstate__(self):
state = self.__dict__.copy()
state.pop('_xpath_parser', None)
return state
@abstractmethod
def __iter__(self):
@ -198,48 +218,56 @@ class ElementPathMixin(Sequence):
"""Gets an Element attribute. For compatibility with the ElementTree API."""
return self.attributes.get(key, default)
def iterfind(self, path, namespaces=None):
@property
def xpath_proxy(self):
"""Returns an XPath proxy instance bound with the schema."""
raise NotImplementedError
def _rebind_xpath_parser(self):
"""Rebind XPath 2 parser with schema component."""
if self._xpath_parser is not None:
self._xpath_parser.schema.bind_parser(self._xpath_parser)
def _get_xpath_namespaces(self, namespaces=None):
"""
Creates and iterator for all XSD subelements matching the path.
Returns a dictionary with namespaces for XPath selection.
:param path: an XPath expression that considers the XSD component as the root element.
:param namespaces: is an optional mapping from namespace prefix to full name.
:return: an iterable yielding all matching XSD subelements in document order.
:param namespaces: an optional map from namespace prefix to namespace URI. \
If this argument is not provided the schema's namespaces are used.
"""
path = path.strip()
if path.startswith('/') and not path.startswith('//'):
path = ''.join(['/', XSD_SCHEMA, path])
if namespaces is None:
namespaces = {k: v for k, v in self.namespaces.items() if k}
parser = XPath2Parser(namespaces, strict=False, schema=self.xpath_proxy,
default_namespace=self.xpath_default_namespace)
root_token = parser.parse(path)
context = XMLSchemaContext(self)
return root_token.select_results(context)
def find(self, path, namespaces=None):
"""
Finds the first XSD subelement matching the path.
:param path: an XPath expression that considers the XSD component as the root element.
:param namespaces: an optional mapping from namespace prefix to full name.
:return: The first matching XSD subelement or ``None`` if there is not match.
"""
path = path.strip()
if path.startswith('/') and not path.startswith('//'):
path = ''.join(['/', XSD_SCHEMA, path])
if namespaces is None:
namespaces = {k: v for k, v in self.namespaces.items() if k}
namespaces[''] = self.xpath_default_namespace
elif '' not in namespaces:
namespaces[''] = self.xpath_default_namespace
parser = XPath2Parser(namespaces, strict=False, schema=self.xpath_proxy)
root_token = parser.parse(path)
xpath_namespaces = XPath2Parser.DEFAULT_NAMESPACES.copy()
xpath_namespaces.update(namespaces)
return xpath_namespaces
def _xpath_parse(self, path, namespaces=None):
path = path.strip()
if path.startswith('/') and not path.startswith('//'):
path = ''.join(['/', XSD_SCHEMA, path])
namespaces = self._get_xpath_namespaces(namespaces)
if self._xpath_parser is None:
self._xpath_parser = XPath2Parser(namespaces, strict=False, schema=self.xpath_proxy)
else:
self._xpath_parser.namespaces = namespaces
return self._xpath_parser.parse(path)
def find(self, path, namespaces=None):
"""
Finds the first XSD subelement matching the path.
:param path: an XPath expression that considers the XSD component as the root element.
:param namespaces: an optional mapping from namespace prefix to namespace URI.
:return: The first matching XSD subelement or ``None`` if there is not match.
"""
context = XMLSchemaContext(self)
return next(root_token.select_results(context), None)
return next(self._xpath_parse(path, namespaces).select_results(context), None)
def findall(self, path, namespaces=None):
"""
@ -250,17 +278,19 @@ class ElementPathMixin(Sequence):
:return: a list containing all matching XSD subelements in document order, an empty \
list is returned if there is no match.
"""
path = path.strip()
if path.startswith('/') and not path.startswith('//'):
path = ''.join(['/', XSD_SCHEMA, path])
if namespaces is None:
namespaces = {k: v for k, v in self.namespaces.items() if k}
parser = XPath2Parser(namespaces, strict=False, schema=self.xpath_proxy,
default_namespace=self.xpath_default_namespace)
root_token = parser.parse(path)
context = XMLSchemaContext(self)
return root_token.get_results(context)
return self._xpath_parse(path, namespaces).get_results(context)
def iterfind(self, path, namespaces=None):
"""
Creates and iterator for all XSD subelements matching the path.
:param path: an XPath expression that considers the XSD component as the root element.
:param namespaces: is an optional mapping from namespace prefix to full name.
:return: an iterable yielding all matching XSD subelements in document order.
"""
context = XMLSchemaContext(self)
return self._xpath_parse(path, namespaces).select_results(context)
def iter(self, tag=None):
"""