| # |
| # ElementTree |
| # $Id: ElementInclude.py 3375 2008-02-13 08:05:08Z fredrik $ |
| # |
| # limited xinclude support for element trees |
| # |
| # history: |
| # 2003-08-15 fl created |
| # 2003-11-14 fl fixed default loader |
| # |
| # Copyright (c) 2003-2004 by Fredrik Lundh. All rights reserved. |
| # |
| # [email protected] |
| # http://www.pythonware.com |
| # |
| # -------------------------------------------------------------------- |
| # The ElementTree toolkit is |
| # |
| # Copyright (c) 1999-2008 by Fredrik Lundh |
| # |
| # By obtaining, using, and/or copying this software and/or its |
| # associated documentation, you agree that you have read, understood, |
| # and will comply with the following terms and conditions: |
| # |
| # Permission to use, copy, modify, and distribute this software and |
| # its associated documentation for any purpose and without fee is |
| # hereby granted, provided that the above copyright notice appears in |
| # all copies, and that both that copyright notice and this permission |
| # notice appear in supporting documentation, and that the name of |
| # Secret Labs AB or the author not be used in advertising or publicity |
| # pertaining to distribution of the software without specific, written |
| # prior permission. |
| # |
| # SECRET LABS AB AND THE AUTHOR DISCLAIMS ALL WARRANTIES WITH REGARD |
| # TO THIS SOFTWARE, INCLUDING ALL IMPLIED WARRANTIES OF MERCHANT- |
| # ABILITY AND FITNESS. IN NO EVENT SHALL SECRET LABS AB OR THE AUTHOR |
| # BE LIABLE FOR ANY SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY |
| # DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, |
| # WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS |
| # ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE |
| # OF THIS SOFTWARE. |
| # -------------------------------------------------------------------- |
| |
| # Licensed to PSF under a Contributor Agreement. |
| # See https://www.python.org/psf/license for licensing details. |
| |
| ## |
| # Limited XInclude support for the ElementTree package. |
| ## |
| |
| import copy |
| from . import ElementTree |
| from urllib.parse import urljoin |
| |
| XINCLUDE = "{http://www.w3.org/2001/XInclude}" |
| |
| XINCLUDE_INCLUDE = XINCLUDE + "include" |
| XINCLUDE_FALLBACK = XINCLUDE + "fallback" |
| |
| # For security reasons, the inclusion depth is limited to this read-only value by default. |
| DEFAULT_MAX_INCLUSION_DEPTH = 6 |
| |
| |
| ## |
| # Fatal include error. |
| |
| class FatalIncludeError(SyntaxError): |
| pass |
| |
| |
| class LimitedRecursiveIncludeError(FatalIncludeError): |
| pass |
| |
| |
| ## |
| # Default loader. This loader reads an included resource from disk. |
| # |
| # @param href Resource reference. |
| # @param parse Parse mode. Either "xml" or "text". |
| # @param encoding Optional text encoding (UTF-8 by default for "text"). |
| # @return The expanded resource. If the parse mode is "xml", this |
| # is an ElementTree instance. If the parse mode is "text", this |
| # is a Unicode string. If the loader fails, it can return None |
| # or raise an OSError exception. |
| # @throws OSError If the loader fails to load the resource. |
| |
| def default_loader(href, parse, encoding=None): |
| if parse == "xml": |
| with open(href, 'rb') as file: |
| data = ElementTree.parse(file).getroot() |
| else: |
| if not encoding: |
| encoding = 'UTF-8' |
| with open(href, 'r', encoding=encoding) as file: |
| data = file.read() |
| return data |
| |
| ## |
| # Expand XInclude directives. |
| # |
| # @param elem Root element. |
| # @param loader Optional resource loader. If omitted, it defaults |
| # to {@link default_loader}. If given, it should be a callable |
| # that implements the same interface as <b>default_loader</b>. |
| # @param base_url The base URL of the original file, to resolve |
| # relative include file references. |
| # @param max_depth The maximum number of recursive inclusions. |
| # Limited to reduce the risk of malicious content explosion. |
| # Pass a negative value to disable the limitation. |
| # @throws LimitedRecursiveIncludeError If the {@link max_depth} was exceeded. |
| # @throws FatalIncludeError If the function fails to include a given |
| # resource, or if the tree contains malformed XInclude elements. |
| # @throws IOError If the function fails to load a given resource. |
| # @returns the node or its replacement if it was an XInclude node |
| |
| def include(elem, loader=None, base_url=None, |
| max_depth=DEFAULT_MAX_INCLUSION_DEPTH): |
| if max_depth is None: |
| max_depth = -1 |
| elif max_depth < 0: |
| raise ValueError("expected non-negative depth or None for 'max_depth', got %r" % max_depth) |
| |
| if hasattr(elem, 'getroot'): |
| elem = elem.getroot() |
| if loader is None: |
| loader = default_loader |
| |
| _include(elem, loader, base_url, max_depth, set()) |
| |
| |
| def _include(elem, loader, base_url, max_depth, _parent_hrefs): |
| # look for xinclude elements |
| i = 0 |
| while i < len(elem): |
| e = elem[i] |
| if e.tag == XINCLUDE_INCLUDE: |
| # process xinclude directive |
| href = e.get("href") |
| if base_url: |
| href = urljoin(base_url, href) |
| parse = e.get("parse", "xml") |
| if parse == "xml": |
| if href in _parent_hrefs: |
| raise FatalIncludeError("recursive include of %s" % href) |
| if max_depth == 0: |
| raise LimitedRecursiveIncludeError( |
| "maximum xinclude depth reached when including file %s" % href) |
| _parent_hrefs.add(href) |
| node = loader(href, parse) |
| if node is None: |
| raise FatalIncludeError( |
| "cannot load %r as %r" % (href, parse) |
| ) |
| node = copy.copy(node) # FIXME: this makes little sense with recursive includes |
| _include(node, loader, href, max_depth - 1, _parent_hrefs) |
| _parent_hrefs.remove(href) |
| if e.tail: |
| node.tail = (node.tail or "") + e.tail |
| elem[i] = node |
| elif parse == "text": |
| text = loader(href, parse, e.get("encoding")) |
| if text is None: |
| raise FatalIncludeError( |
| "cannot load %r as %r" % (href, parse) |
| ) |
| if e.tail: |
| text += e.tail |
| if i: |
| node = elem[i-1] |
| node.tail = (node.tail or "") + text |
| else: |
| elem.text = (elem.text or "") + text |
| del elem[i] |
| continue |
| else: |
| raise FatalIncludeError( |
| "unknown parse type in xi:include tag (%r)" % parse |
| ) |
| elif e.tag == XINCLUDE_FALLBACK: |
| raise FatalIncludeError( |
| "xi:fallback tag must be child of xi:include (%r)" % e.tag |
| ) |
| else: |
| _include(e, loader, base_url, max_depth, _parent_hrefs) |
| i += 1 |