Source code for structa.xml

# structa: an application for analyzing repetitive data structures
#
# Copyright (c) 2020-2021 Dave Jones <dave@waveform.org.uk>
#
# SPDX-License-Identifier: GPL-2.0-or-later

from copy import copy
from pkg_resources import resource_stream

import lxml.etree as et

from . import ui
from .format import pairwise


[docs]def xml(obj): """ In a similar manner to :class:`str`, this function calls the ``__xml__`` method (if any) on *obj*, returning the result which is expected to be an :class:`~xml.etree.ElementTree.Element` instance representing the object. """ return obj.__xml__()
[docs]def get_transform(name): """ Return the XSLT transform defined by *name* in the :mod:`structa.ui` module. """ return et.XSLT(et.parse(resource_stream(ui.__name__, name)))
[docs]def merge_siblings(elem): """ Consolidate the content of adjacent sibling child elements with the same tag. For example: >>> x = XML('<doc><a>a</a><a>b</a><a>c</a><b>d</b><a>e</a></doc>') >>> tostring(merge_siblings(x)) b'<doc><a>abc</a><b>d</b><a>e</a></doc>' Note that the function only deals with *direct* child elements of *elem*; it does nothing to descendents of those children, even if they have the same tag as their parent: >>> x = XML('<doc><a>a<a>b</a></a><a>c</a><b>d</b><a>e</a></doc>') >>> tostring(merge_siblings(x)) b'<doc><a>a<a>b</a>c</a><b>d</b><a>e</a></doc>' """ result = copy(elem) for this, prior in pairwise(reversed(result)): if ( this.tag == prior.tag and (prior.tail is None or not prior.tail.strip()) ): if len(prior): prior[-1].tail = (prior[-1].tail or '') + (this.text or '') else: prior.text = (prior.text or '') + (this.text or '') for child in this: prior.append(child) prior.tail = (prior.tail or '') + (this.tail or '') result.remove(this) return result
[docs]class ElementFactory: """ A class inspired by Genshi for easy creation of ElementTree Elements. The ElementFactory class was inspired by the Genshi builder unit in that it permits simple creation of Elements by calling methods on the tag object named after the element you wish to create. Positional arguments become content within the element, and keyword arguments become attributes. If you need an attribute or element tag that conflicts with a Python keyword, simply append an underscore to the name (which will be automatically stripped off). Content can be just about anything, including booleans, integers, longs, dates, times, etc. This class simply applies their default string conversion to them (except basestring derived types like string and unicode which are simply used verbatim). For example:: >>> tostring(tag.a('A link')) '<a>A link</a>' >>> tostring(tag.a('A link', class_='menuitem')) '<a class="menuitem">A link</a>' >>> tostring(tag.p('A ', tag.a('link', class_='menuitem'))) '<p>A <a class="menuitem">link</a></p>' """ def __init__(self, namespace=None): """Intializes an instance of the factory. The optional namespace parameter can be used to specify the namespace used to qualify all elements generated by an instance of the class. Rather than specifying this explicitly when constructing the class it is recommended that developers sub-class this class, and specify the namespace as part of an overridden __init__ method. In other words, make dialect specific sub-classes of this generic class (an HTMLElementFactory class for instance). """ self._namespace = namespace def _format(self, content): """ Re-formats *content* to a human-readable string. This method should be overridden to customize the representation of types (such as :class:`int`, :class:`~datetime.datetime` and so on). """ return str(content) def _append(self, node, contents): """ Adds *contents* (which can be a :class:`str`, element, element-list, or any type accepted by :meth:`_format`) to a *node*. """ if isinstance(contents, str): if contents: if len(node) == 0: if node.text is None: node.text = contents else: node.text += contents else: last = node[-1] if last.tail is None: last.tail = contents else: last.tail += contents elif et.iselement(contents): node.append(contents) else: try: it = iter(contents) except TypeError: self._append(node, self._format(contents)) else: for content in it: self._append(node, content) def _element(self, _name, *contents, **attrs): """ Generates an XML element with the tag *name*, containing *contents* and with attributes *attrs*. """ if self._namespace: _name = '{{{self._namespace}}}{_name}'.format(self=self, _name=_name) attrs = { '{{{self._namespace}}}{key}'.format(self=self, key=key): value for (key, value) in attrs.items() } e = et.Element(_name, { key.rstrip('_') if isinstance(key, str) else str(key): key if value is True else value if isinstance(value, str) else str(value) for key, value in attrs.items() if value is not None and value is not False }) for content in contents: self._append(e, content) return e def __getattr__(self, name): elem_name = name.rstrip('_') def generator(*content, **attrs): return self._element(elem_name, *content, **attrs) setattr(self, name, generator) return generator