mirror of
https://github.com/jart/cosmopolitan.git
synced 2025-05-22 21:32:31 +00:00
python-3.6.zip added from Github
README.cosmo contains the necessary links.
This commit is contained in:
parent
75fc601ff5
commit
0c4c56ff39
4219 changed files with 1968626 additions and 0 deletions
20
third_party/python/Lib/xml/__init__.py
vendored
Normal file
20
third_party/python/Lib/xml/__init__.py
vendored
Normal file
|
@ -0,0 +1,20 @@
|
|||
"""Core XML support for Python.
|
||||
|
||||
This package contains four sub-packages:
|
||||
|
||||
dom -- The W3C Document Object Model. This supports DOM Level 1 +
|
||||
Namespaces.
|
||||
|
||||
parsers -- Python wrappers for XML parsers (currently only supports Expat).
|
||||
|
||||
sax -- The Simple API for XML, developed by XML-Dev, led by David
|
||||
Megginson and ported to Python by Lars Marius Garshol. This
|
||||
supports the SAX 2 API.
|
||||
|
||||
etree -- The ElementTree XML library. This is a subset of the full
|
||||
ElementTree XML release.
|
||||
|
||||
"""
|
||||
|
||||
|
||||
__all__ = ["dom", "parsers", "sax", "etree"]
|
27
third_party/python/Lib/xml/dom/NodeFilter.py
vendored
Normal file
27
third_party/python/Lib/xml/dom/NodeFilter.py
vendored
Normal file
|
@ -0,0 +1,27 @@
|
|||
# This is the Python mapping for interface NodeFilter from
|
||||
# DOM2-Traversal-Range. It contains only constants.
|
||||
|
||||
class NodeFilter:
|
||||
"""
|
||||
This is the DOM2 NodeFilter interface. It contains only constants.
|
||||
"""
|
||||
FILTER_ACCEPT = 1
|
||||
FILTER_REJECT = 2
|
||||
FILTER_SKIP = 3
|
||||
|
||||
SHOW_ALL = 0xFFFFFFFF
|
||||
SHOW_ELEMENT = 0x00000001
|
||||
SHOW_ATTRIBUTE = 0x00000002
|
||||
SHOW_TEXT = 0x00000004
|
||||
SHOW_CDATA_SECTION = 0x00000008
|
||||
SHOW_ENTITY_REFERENCE = 0x00000010
|
||||
SHOW_ENTITY = 0x00000020
|
||||
SHOW_PROCESSING_INSTRUCTION = 0x00000040
|
||||
SHOW_COMMENT = 0x00000080
|
||||
SHOW_DOCUMENT = 0x00000100
|
||||
SHOW_DOCUMENT_TYPE = 0x00000200
|
||||
SHOW_DOCUMENT_FRAGMENT = 0x00000400
|
||||
SHOW_NOTATION = 0x00000800
|
||||
|
||||
def acceptNode(self, node):
|
||||
raise NotImplementedError
|
140
third_party/python/Lib/xml/dom/__init__.py
vendored
Normal file
140
third_party/python/Lib/xml/dom/__init__.py
vendored
Normal file
|
@ -0,0 +1,140 @@
|
|||
"""W3C Document Object Model implementation for Python.
|
||||
|
||||
The Python mapping of the Document Object Model is documented in the
|
||||
Python Library Reference in the section on the xml.dom package.
|
||||
|
||||
This package contains the following modules:
|
||||
|
||||
minidom -- A simple implementation of the Level 1 DOM with namespace
|
||||
support added (based on the Level 2 specification) and other
|
||||
minor Level 2 functionality.
|
||||
|
||||
pulldom -- DOM builder supporting on-demand tree-building for selected
|
||||
subtrees of the document.
|
||||
|
||||
"""
|
||||
|
||||
|
||||
class Node:
|
||||
"""Class giving the NodeType constants."""
|
||||
__slots__ = ()
|
||||
|
||||
# DOM implementations may use this as a base class for their own
|
||||
# Node implementations. If they don't, the constants defined here
|
||||
# should still be used as the canonical definitions as they match
|
||||
# the values given in the W3C recommendation. Client code can
|
||||
# safely refer to these values in all tests of Node.nodeType
|
||||
# values.
|
||||
|
||||
ELEMENT_NODE = 1
|
||||
ATTRIBUTE_NODE = 2
|
||||
TEXT_NODE = 3
|
||||
CDATA_SECTION_NODE = 4
|
||||
ENTITY_REFERENCE_NODE = 5
|
||||
ENTITY_NODE = 6
|
||||
PROCESSING_INSTRUCTION_NODE = 7
|
||||
COMMENT_NODE = 8
|
||||
DOCUMENT_NODE = 9
|
||||
DOCUMENT_TYPE_NODE = 10
|
||||
DOCUMENT_FRAGMENT_NODE = 11
|
||||
NOTATION_NODE = 12
|
||||
|
||||
|
||||
#ExceptionCode
|
||||
INDEX_SIZE_ERR = 1
|
||||
DOMSTRING_SIZE_ERR = 2
|
||||
HIERARCHY_REQUEST_ERR = 3
|
||||
WRONG_DOCUMENT_ERR = 4
|
||||
INVALID_CHARACTER_ERR = 5
|
||||
NO_DATA_ALLOWED_ERR = 6
|
||||
NO_MODIFICATION_ALLOWED_ERR = 7
|
||||
NOT_FOUND_ERR = 8
|
||||
NOT_SUPPORTED_ERR = 9
|
||||
INUSE_ATTRIBUTE_ERR = 10
|
||||
INVALID_STATE_ERR = 11
|
||||
SYNTAX_ERR = 12
|
||||
INVALID_MODIFICATION_ERR = 13
|
||||
NAMESPACE_ERR = 14
|
||||
INVALID_ACCESS_ERR = 15
|
||||
VALIDATION_ERR = 16
|
||||
|
||||
|
||||
class DOMException(Exception):
|
||||
"""Abstract base class for DOM exceptions.
|
||||
Exceptions with specific codes are specializations of this class."""
|
||||
|
||||
def __init__(self, *args, **kw):
|
||||
if self.__class__ is DOMException:
|
||||
raise RuntimeError(
|
||||
"DOMException should not be instantiated directly")
|
||||
Exception.__init__(self, *args, **kw)
|
||||
|
||||
def _get_code(self):
|
||||
return self.code
|
||||
|
||||
|
||||
class IndexSizeErr(DOMException):
|
||||
code = INDEX_SIZE_ERR
|
||||
|
||||
class DomstringSizeErr(DOMException):
|
||||
code = DOMSTRING_SIZE_ERR
|
||||
|
||||
class HierarchyRequestErr(DOMException):
|
||||
code = HIERARCHY_REQUEST_ERR
|
||||
|
||||
class WrongDocumentErr(DOMException):
|
||||
code = WRONG_DOCUMENT_ERR
|
||||
|
||||
class InvalidCharacterErr(DOMException):
|
||||
code = INVALID_CHARACTER_ERR
|
||||
|
||||
class NoDataAllowedErr(DOMException):
|
||||
code = NO_DATA_ALLOWED_ERR
|
||||
|
||||
class NoModificationAllowedErr(DOMException):
|
||||
code = NO_MODIFICATION_ALLOWED_ERR
|
||||
|
||||
class NotFoundErr(DOMException):
|
||||
code = NOT_FOUND_ERR
|
||||
|
||||
class NotSupportedErr(DOMException):
|
||||
code = NOT_SUPPORTED_ERR
|
||||
|
||||
class InuseAttributeErr(DOMException):
|
||||
code = INUSE_ATTRIBUTE_ERR
|
||||
|
||||
class InvalidStateErr(DOMException):
|
||||
code = INVALID_STATE_ERR
|
||||
|
||||
class SyntaxErr(DOMException):
|
||||
code = SYNTAX_ERR
|
||||
|
||||
class InvalidModificationErr(DOMException):
|
||||
code = INVALID_MODIFICATION_ERR
|
||||
|
||||
class NamespaceErr(DOMException):
|
||||
code = NAMESPACE_ERR
|
||||
|
||||
class InvalidAccessErr(DOMException):
|
||||
code = INVALID_ACCESS_ERR
|
||||
|
||||
class ValidationErr(DOMException):
|
||||
code = VALIDATION_ERR
|
||||
|
||||
class UserDataHandler:
|
||||
"""Class giving the operation constants for UserDataHandler.handle()."""
|
||||
|
||||
# Based on DOM Level 3 (WD 9 April 2002)
|
||||
|
||||
NODE_CLONED = 1
|
||||
NODE_IMPORTED = 2
|
||||
NODE_DELETED = 3
|
||||
NODE_RENAMED = 4
|
||||
|
||||
XML_NAMESPACE = "http://www.w3.org/XML/1998/namespace"
|
||||
XMLNS_NAMESPACE = "http://www.w3.org/2000/xmlns/"
|
||||
XHTML_NAMESPACE = "http://www.w3.org/1999/xhtml"
|
||||
EMPTY_NAMESPACE = None
|
||||
EMPTY_PREFIX = None
|
||||
|
||||
from .domreg import getDOMImplementation, registerDOMImplementation
|
99
third_party/python/Lib/xml/dom/domreg.py
vendored
Normal file
99
third_party/python/Lib/xml/dom/domreg.py
vendored
Normal file
|
@ -0,0 +1,99 @@
|
|||
"""Registration facilities for DOM. This module should not be used
|
||||
directly. Instead, the functions getDOMImplementation and
|
||||
registerDOMImplementation should be imported from xml.dom."""
|
||||
|
||||
# This is a list of well-known implementations. Well-known names
|
||||
# should be published by posting to xml-sig@python.org, and are
|
||||
# subsequently recorded in this file.
|
||||
|
||||
import sys
|
||||
|
||||
well_known_implementations = {
|
||||
'minidom':'xml.dom.minidom',
|
||||
'4DOM': 'xml.dom.DOMImplementation',
|
||||
}
|
||||
|
||||
# DOM implementations not officially registered should register
|
||||
# themselves with their
|
||||
|
||||
registered = {}
|
||||
|
||||
def registerDOMImplementation(name, factory):
|
||||
"""registerDOMImplementation(name, factory)
|
||||
|
||||
Register the factory function with the name. The factory function
|
||||
should return an object which implements the DOMImplementation
|
||||
interface. The factory function can either return the same object,
|
||||
or a new one (e.g. if that implementation supports some
|
||||
customization)."""
|
||||
|
||||
registered[name] = factory
|
||||
|
||||
def _good_enough(dom, features):
|
||||
"_good_enough(dom, features) -> Return 1 if the dom offers the features"
|
||||
for f,v in features:
|
||||
if not dom.hasFeature(f,v):
|
||||
return 0
|
||||
return 1
|
||||
|
||||
def getDOMImplementation(name=None, features=()):
|
||||
"""getDOMImplementation(name = None, features = ()) -> DOM implementation.
|
||||
|
||||
Return a suitable DOM implementation. The name is either
|
||||
well-known, the module name of a DOM implementation, or None. If
|
||||
it is not None, imports the corresponding module and returns
|
||||
DOMImplementation object if the import succeeds.
|
||||
|
||||
If name is not given, consider the available implementations to
|
||||
find one with the required feature set. If no implementation can
|
||||
be found, raise an ImportError. The features list must be a sequence
|
||||
of (feature, version) pairs which are passed to hasFeature."""
|
||||
|
||||
import os
|
||||
creator = None
|
||||
mod = well_known_implementations.get(name)
|
||||
if mod:
|
||||
mod = __import__(mod, {}, {}, ['getDOMImplementation'])
|
||||
return mod.getDOMImplementation()
|
||||
elif name:
|
||||
return registered[name]()
|
||||
elif not sys.flags.ignore_environment and "PYTHON_DOM" in os.environ:
|
||||
return getDOMImplementation(name = os.environ["PYTHON_DOM"])
|
||||
|
||||
# User did not specify a name, try implementations in arbitrary
|
||||
# order, returning the one that has the required features
|
||||
if isinstance(features, str):
|
||||
features = _parse_feature_string(features)
|
||||
for creator in registered.values():
|
||||
dom = creator()
|
||||
if _good_enough(dom, features):
|
||||
return dom
|
||||
|
||||
for creator in well_known_implementations.keys():
|
||||
try:
|
||||
dom = getDOMImplementation(name = creator)
|
||||
except Exception: # typically ImportError, or AttributeError
|
||||
continue
|
||||
if _good_enough(dom, features):
|
||||
return dom
|
||||
|
||||
raise ImportError("no suitable DOM implementation found")
|
||||
|
||||
def _parse_feature_string(s):
|
||||
features = []
|
||||
parts = s.split()
|
||||
i = 0
|
||||
length = len(parts)
|
||||
while i < length:
|
||||
feature = parts[i]
|
||||
if feature[0] in "0123456789":
|
||||
raise ValueError("bad feature name: %r" % (feature,))
|
||||
i = i + 1
|
||||
version = None
|
||||
if i < length:
|
||||
v = parts[i]
|
||||
if v[0] in "0123456789":
|
||||
i = i + 1
|
||||
version = v
|
||||
features.append((feature, version))
|
||||
return tuple(features)
|
965
third_party/python/Lib/xml/dom/expatbuilder.py
vendored
Normal file
965
third_party/python/Lib/xml/dom/expatbuilder.py
vendored
Normal file
|
@ -0,0 +1,965 @@
|
|||
"""Facility to use the Expat parser to load a minidom instance
|
||||
from a string or file.
|
||||
|
||||
This avoids all the overhead of SAX and pulldom to gain performance.
|
||||
"""
|
||||
|
||||
# Warning!
|
||||
#
|
||||
# This module is tightly bound to the implementation details of the
|
||||
# minidom DOM and can't be used with other DOM implementations. This
|
||||
# is due, in part, to a lack of appropriate methods in the DOM (there is
|
||||
# no way to create Entity and Notation nodes via the DOM Level 2
|
||||
# interface), and for performance. The latter is the cause of some fairly
|
||||
# cryptic code.
|
||||
#
|
||||
# Performance hacks:
|
||||
#
|
||||
# - .character_data_handler() has an extra case in which continuing
|
||||
# data is appended to an existing Text node; this can be a
|
||||
# speedup since pyexpat can break up character data into multiple
|
||||
# callbacks even though we set the buffer_text attribute on the
|
||||
# parser. This also gives us the advantage that we don't need a
|
||||
# separate normalization pass.
|
||||
#
|
||||
# - Determining that a node exists is done using an identity comparison
|
||||
# with None rather than a truth test; this avoids searching for and
|
||||
# calling any methods on the node object if it exists. (A rather
|
||||
# nice speedup is achieved this way as well!)
|
||||
|
||||
from xml.dom import xmlbuilder, minidom, Node
|
||||
from xml.dom import EMPTY_NAMESPACE, EMPTY_PREFIX, XMLNS_NAMESPACE
|
||||
from xml.parsers import expat
|
||||
from xml.dom.minidom import _append_child, _set_attribute_node
|
||||
from xml.dom.NodeFilter import NodeFilter
|
||||
|
||||
TEXT_NODE = Node.TEXT_NODE
|
||||
CDATA_SECTION_NODE = Node.CDATA_SECTION_NODE
|
||||
DOCUMENT_NODE = Node.DOCUMENT_NODE
|
||||
|
||||
FILTER_ACCEPT = xmlbuilder.DOMBuilderFilter.FILTER_ACCEPT
|
||||
FILTER_REJECT = xmlbuilder.DOMBuilderFilter.FILTER_REJECT
|
||||
FILTER_SKIP = xmlbuilder.DOMBuilderFilter.FILTER_SKIP
|
||||
FILTER_INTERRUPT = xmlbuilder.DOMBuilderFilter.FILTER_INTERRUPT
|
||||
|
||||
theDOMImplementation = minidom.getDOMImplementation()
|
||||
|
||||
# Expat typename -> TypeInfo
|
||||
_typeinfo_map = {
|
||||
"CDATA": minidom.TypeInfo(None, "cdata"),
|
||||
"ENUM": minidom.TypeInfo(None, "enumeration"),
|
||||
"ENTITY": minidom.TypeInfo(None, "entity"),
|
||||
"ENTITIES": minidom.TypeInfo(None, "entities"),
|
||||
"ID": minidom.TypeInfo(None, "id"),
|
||||
"IDREF": minidom.TypeInfo(None, "idref"),
|
||||
"IDREFS": minidom.TypeInfo(None, "idrefs"),
|
||||
"NMTOKEN": minidom.TypeInfo(None, "nmtoken"),
|
||||
"NMTOKENS": minidom.TypeInfo(None, "nmtokens"),
|
||||
}
|
||||
|
||||
class ElementInfo(object):
|
||||
__slots__ = '_attr_info', '_model', 'tagName'
|
||||
|
||||
def __init__(self, tagName, model=None):
|
||||
self.tagName = tagName
|
||||
self._attr_info = []
|
||||
self._model = model
|
||||
|
||||
def __getstate__(self):
|
||||
return self._attr_info, self._model, self.tagName
|
||||
|
||||
def __setstate__(self, state):
|
||||
self._attr_info, self._model, self.tagName = state
|
||||
|
||||
def getAttributeType(self, aname):
|
||||
for info in self._attr_info:
|
||||
if info[1] == aname:
|
||||
t = info[-2]
|
||||
if t[0] == "(":
|
||||
return _typeinfo_map["ENUM"]
|
||||
else:
|
||||
return _typeinfo_map[info[-2]]
|
||||
return minidom._no_type
|
||||
|
||||
def getAttributeTypeNS(self, namespaceURI, localName):
|
||||
return minidom._no_type
|
||||
|
||||
def isElementContent(self):
|
||||
if self._model:
|
||||
type = self._model[0]
|
||||
return type not in (expat.model.XML_CTYPE_ANY,
|
||||
expat.model.XML_CTYPE_MIXED)
|
||||
else:
|
||||
return False
|
||||
|
||||
def isEmpty(self):
|
||||
if self._model:
|
||||
return self._model[0] == expat.model.XML_CTYPE_EMPTY
|
||||
else:
|
||||
return False
|
||||
|
||||
def isId(self, aname):
|
||||
for info in self._attr_info:
|
||||
if info[1] == aname:
|
||||
return info[-2] == "ID"
|
||||
return False
|
||||
|
||||
def isIdNS(self, euri, ename, auri, aname):
|
||||
# not sure this is meaningful
|
||||
return self.isId((auri, aname))
|
||||
|
||||
def _intern(builder, s):
|
||||
return builder._intern_setdefault(s, s)
|
||||
|
||||
def _parse_ns_name(builder, name):
|
||||
assert ' ' in name
|
||||
parts = name.split(' ')
|
||||
intern = builder._intern_setdefault
|
||||
if len(parts) == 3:
|
||||
uri, localname, prefix = parts
|
||||
prefix = intern(prefix, prefix)
|
||||
qname = "%s:%s" % (prefix, localname)
|
||||
qname = intern(qname, qname)
|
||||
localname = intern(localname, localname)
|
||||
elif len(parts) == 2:
|
||||
uri, localname = parts
|
||||
prefix = EMPTY_PREFIX
|
||||
qname = localname = intern(localname, localname)
|
||||
else:
|
||||
raise ValueError("Unsupported syntax: spaces in URIs not supported: %r" % name)
|
||||
return intern(uri, uri), localname, prefix, qname
|
||||
|
||||
|
||||
class ExpatBuilder:
|
||||
"""Document builder that uses Expat to build a ParsedXML.DOM document
|
||||
instance."""
|
||||
|
||||
def __init__(self, options=None):
|
||||
if options is None:
|
||||
options = xmlbuilder.Options()
|
||||
self._options = options
|
||||
if self._options.filter is not None:
|
||||
self._filter = FilterVisibilityController(self._options.filter)
|
||||
else:
|
||||
self._filter = None
|
||||
# This *really* doesn't do anything in this case, so
|
||||
# override it with something fast & minimal.
|
||||
self._finish_start_element = id
|
||||
self._parser = None
|
||||
self.reset()
|
||||
|
||||
def createParser(self):
|
||||
"""Create a new parser object."""
|
||||
return expat.ParserCreate()
|
||||
|
||||
def getParser(self):
|
||||
"""Return the parser object, creating a new one if needed."""
|
||||
if not self._parser:
|
||||
self._parser = self.createParser()
|
||||
self._intern_setdefault = self._parser.intern.setdefault
|
||||
self._parser.buffer_text = True
|
||||
self._parser.ordered_attributes = True
|
||||
self._parser.specified_attributes = True
|
||||
self.install(self._parser)
|
||||
return self._parser
|
||||
|
||||
def reset(self):
|
||||
"""Free all data structures used during DOM construction."""
|
||||
self.document = theDOMImplementation.createDocument(
|
||||
EMPTY_NAMESPACE, None, None)
|
||||
self.curNode = self.document
|
||||
self._elem_info = self.document._elem_info
|
||||
self._cdata = False
|
||||
|
||||
def install(self, parser):
|
||||
"""Install the callbacks needed to build the DOM into the parser."""
|
||||
# This creates circular references!
|
||||
parser.StartDoctypeDeclHandler = self.start_doctype_decl_handler
|
||||
parser.StartElementHandler = self.first_element_handler
|
||||
parser.EndElementHandler = self.end_element_handler
|
||||
parser.ProcessingInstructionHandler = self.pi_handler
|
||||
if self._options.entities:
|
||||
parser.EntityDeclHandler = self.entity_decl_handler
|
||||
parser.NotationDeclHandler = self.notation_decl_handler
|
||||
if self._options.comments:
|
||||
parser.CommentHandler = self.comment_handler
|
||||
if self._options.cdata_sections:
|
||||
parser.StartCdataSectionHandler = self.start_cdata_section_handler
|
||||
parser.EndCdataSectionHandler = self.end_cdata_section_handler
|
||||
parser.CharacterDataHandler = self.character_data_handler_cdata
|
||||
else:
|
||||
parser.CharacterDataHandler = self.character_data_handler
|
||||
parser.ExternalEntityRefHandler = self.external_entity_ref_handler
|
||||
parser.XmlDeclHandler = self.xml_decl_handler
|
||||
parser.ElementDeclHandler = self.element_decl_handler
|
||||
parser.AttlistDeclHandler = self.attlist_decl_handler
|
||||
|
||||
def parseFile(self, file):
|
||||
"""Parse a document from a file object, returning the document
|
||||
node."""
|
||||
parser = self.getParser()
|
||||
first_buffer = True
|
||||
try:
|
||||
while 1:
|
||||
buffer = file.read(16*1024)
|
||||
if not buffer:
|
||||
break
|
||||
parser.Parse(buffer, 0)
|
||||
if first_buffer and self.document.documentElement:
|
||||
self._setup_subset(buffer)
|
||||
first_buffer = False
|
||||
parser.Parse("", True)
|
||||
except ParseEscape:
|
||||
pass
|
||||
doc = self.document
|
||||
self.reset()
|
||||
self._parser = None
|
||||
return doc
|
||||
|
||||
def parseString(self, string):
|
||||
"""Parse a document from a string, returning the document node."""
|
||||
parser = self.getParser()
|
||||
try:
|
||||
parser.Parse(string, True)
|
||||
self._setup_subset(string)
|
||||
except ParseEscape:
|
||||
pass
|
||||
doc = self.document
|
||||
self.reset()
|
||||
self._parser = None
|
||||
return doc
|
||||
|
||||
def _setup_subset(self, buffer):
|
||||
"""Load the internal subset if there might be one."""
|
||||
if self.document.doctype:
|
||||
extractor = InternalSubsetExtractor()
|
||||
extractor.parseString(buffer)
|
||||
subset = extractor.getSubset()
|
||||
self.document.doctype.internalSubset = subset
|
||||
|
||||
def start_doctype_decl_handler(self, doctypeName, systemId, publicId,
|
||||
has_internal_subset):
|
||||
doctype = self.document.implementation.createDocumentType(
|
||||
doctypeName, publicId, systemId)
|
||||
doctype.ownerDocument = self.document
|
||||
_append_child(self.document, doctype)
|
||||
self.document.doctype = doctype
|
||||
if self._filter and self._filter.acceptNode(doctype) == FILTER_REJECT:
|
||||
self.document.doctype = None
|
||||
del self.document.childNodes[-1]
|
||||
doctype = None
|
||||
self._parser.EntityDeclHandler = None
|
||||
self._parser.NotationDeclHandler = None
|
||||
if has_internal_subset:
|
||||
if doctype is not None:
|
||||
doctype.entities._seq = []
|
||||
doctype.notations._seq = []
|
||||
self._parser.CommentHandler = None
|
||||
self._parser.ProcessingInstructionHandler = None
|
||||
self._parser.EndDoctypeDeclHandler = self.end_doctype_decl_handler
|
||||
|
||||
def end_doctype_decl_handler(self):
|
||||
if self._options.comments:
|
||||
self._parser.CommentHandler = self.comment_handler
|
||||
self._parser.ProcessingInstructionHandler = self.pi_handler
|
||||
if not (self._elem_info or self._filter):
|
||||
self._finish_end_element = id
|
||||
|
||||
def pi_handler(self, target, data):
|
||||
node = self.document.createProcessingInstruction(target, data)
|
||||
_append_child(self.curNode, node)
|
||||
if self._filter and self._filter.acceptNode(node) == FILTER_REJECT:
|
||||
self.curNode.removeChild(node)
|
||||
|
||||
def character_data_handler_cdata(self, data):
|
||||
childNodes = self.curNode.childNodes
|
||||
if self._cdata:
|
||||
if ( self._cdata_continue
|
||||
and childNodes[-1].nodeType == CDATA_SECTION_NODE):
|
||||
childNodes[-1].appendData(data)
|
||||
return
|
||||
node = self.document.createCDATASection(data)
|
||||
self._cdata_continue = True
|
||||
elif childNodes and childNodes[-1].nodeType == TEXT_NODE:
|
||||
node = childNodes[-1]
|
||||
value = node.data + data
|
||||
node.data = value
|
||||
return
|
||||
else:
|
||||
node = minidom.Text()
|
||||
node.data = data
|
||||
node.ownerDocument = self.document
|
||||
_append_child(self.curNode, node)
|
||||
|
||||
def character_data_handler(self, data):
|
||||
childNodes = self.curNode.childNodes
|
||||
if childNodes and childNodes[-1].nodeType == TEXT_NODE:
|
||||
node = childNodes[-1]
|
||||
node.data = node.data + data
|
||||
return
|
||||
node = minidom.Text()
|
||||
node.data = node.data + data
|
||||
node.ownerDocument = self.document
|
||||
_append_child(self.curNode, node)
|
||||
|
||||
def entity_decl_handler(self, entityName, is_parameter_entity, value,
|
||||
base, systemId, publicId, notationName):
|
||||
if is_parameter_entity:
|
||||
# we don't care about parameter entities for the DOM
|
||||
return
|
||||
if not self._options.entities:
|
||||
return
|
||||
node = self.document._create_entity(entityName, publicId,
|
||||
systemId, notationName)
|
||||
if value is not None:
|
||||
# internal entity
|
||||
# node *should* be readonly, but we'll cheat
|
||||
child = self.document.createTextNode(value)
|
||||
node.childNodes.append(child)
|
||||
self.document.doctype.entities._seq.append(node)
|
||||
if self._filter and self._filter.acceptNode(node) == FILTER_REJECT:
|
||||
del self.document.doctype.entities._seq[-1]
|
||||
|
||||
def notation_decl_handler(self, notationName, base, systemId, publicId):
|
||||
node = self.document._create_notation(notationName, publicId, systemId)
|
||||
self.document.doctype.notations._seq.append(node)
|
||||
if self._filter and self._filter.acceptNode(node) == FILTER_ACCEPT:
|
||||
del self.document.doctype.notations._seq[-1]
|
||||
|
||||
def comment_handler(self, data):
|
||||
node = self.document.createComment(data)
|
||||
_append_child(self.curNode, node)
|
||||
if self._filter and self._filter.acceptNode(node) == FILTER_REJECT:
|
||||
self.curNode.removeChild(node)
|
||||
|
||||
def start_cdata_section_handler(self):
|
||||
self._cdata = True
|
||||
self._cdata_continue = False
|
||||
|
||||
def end_cdata_section_handler(self):
|
||||
self._cdata = False
|
||||
self._cdata_continue = False
|
||||
|
||||
def external_entity_ref_handler(self, context, base, systemId, publicId):
|
||||
return 1
|
||||
|
||||
def first_element_handler(self, name, attributes):
|
||||
if self._filter is None and not self._elem_info:
|
||||
self._finish_end_element = id
|
||||
self.getParser().StartElementHandler = self.start_element_handler
|
||||
self.start_element_handler(name, attributes)
|
||||
|
||||
def start_element_handler(self, name, attributes):
|
||||
node = self.document.createElement(name)
|
||||
_append_child(self.curNode, node)
|
||||
self.curNode = node
|
||||
|
||||
if attributes:
|
||||
for i in range(0, len(attributes), 2):
|
||||
a = minidom.Attr(attributes[i], EMPTY_NAMESPACE,
|
||||
None, EMPTY_PREFIX)
|
||||
value = attributes[i+1]
|
||||
a.value = value
|
||||
a.ownerDocument = self.document
|
||||
_set_attribute_node(node, a)
|
||||
|
||||
if node is not self.document.documentElement:
|
||||
self._finish_start_element(node)
|
||||
|
||||
def _finish_start_element(self, node):
|
||||
if self._filter:
|
||||
# To be general, we'd have to call isSameNode(), but this
|
||||
# is sufficient for minidom:
|
||||
if node is self.document.documentElement:
|
||||
return
|
||||
filt = self._filter.startContainer(node)
|
||||
if filt == FILTER_REJECT:
|
||||
# ignore this node & all descendents
|
||||
Rejecter(self)
|
||||
elif filt == FILTER_SKIP:
|
||||
# ignore this node, but make it's children become
|
||||
# children of the parent node
|
||||
Skipper(self)
|
||||
else:
|
||||
return
|
||||
self.curNode = node.parentNode
|
||||
node.parentNode.removeChild(node)
|
||||
node.unlink()
|
||||
|
||||
# If this ever changes, Namespaces.end_element_handler() needs to
|
||||
# be changed to match.
|
||||
#
|
||||
def end_element_handler(self, name):
|
||||
curNode = self.curNode
|
||||
self.curNode = curNode.parentNode
|
||||
self._finish_end_element(curNode)
|
||||
|
||||
def _finish_end_element(self, curNode):
|
||||
info = self._elem_info.get(curNode.tagName)
|
||||
if info:
|
||||
self._handle_white_text_nodes(curNode, info)
|
||||
if self._filter:
|
||||
if curNode is self.document.documentElement:
|
||||
return
|
||||
if self._filter.acceptNode(curNode) == FILTER_REJECT:
|
||||
self.curNode.removeChild(curNode)
|
||||
curNode.unlink()
|
||||
|
||||
def _handle_white_text_nodes(self, node, info):
|
||||
if (self._options.whitespace_in_element_content
|
||||
or not info.isElementContent()):
|
||||
return
|
||||
|
||||
# We have element type information and should remove ignorable
|
||||
# whitespace; identify for text nodes which contain only
|
||||
# whitespace.
|
||||
L = []
|
||||
for child in node.childNodes:
|
||||
if child.nodeType == TEXT_NODE and not child.data.strip():
|
||||
L.append(child)
|
||||
|
||||
# Remove ignorable whitespace from the tree.
|
||||
for child in L:
|
||||
node.removeChild(child)
|
||||
|
||||
def element_decl_handler(self, name, model):
|
||||
info = self._elem_info.get(name)
|
||||
if info is None:
|
||||
self._elem_info[name] = ElementInfo(name, model)
|
||||
else:
|
||||
assert info._model is None
|
||||
info._model = model
|
||||
|
||||
def attlist_decl_handler(self, elem, name, type, default, required):
|
||||
info = self._elem_info.get(elem)
|
||||
if info is None:
|
||||
info = ElementInfo(elem)
|
||||
self._elem_info[elem] = info
|
||||
info._attr_info.append(
|
||||
[None, name, None, None, default, 0, type, required])
|
||||
|
||||
def xml_decl_handler(self, version, encoding, standalone):
|
||||
self.document.version = version
|
||||
self.document.encoding = encoding
|
||||
# This is still a little ugly, thanks to the pyexpat API. ;-(
|
||||
if standalone >= 0:
|
||||
if standalone:
|
||||
self.document.standalone = True
|
||||
else:
|
||||
self.document.standalone = False
|
||||
|
||||
|
||||
# Don't include FILTER_INTERRUPT, since that's checked separately
|
||||
# where allowed.
|
||||
_ALLOWED_FILTER_RETURNS = (FILTER_ACCEPT, FILTER_REJECT, FILTER_SKIP)
|
||||
|
||||
class FilterVisibilityController(object):
|
||||
"""Wrapper around a DOMBuilderFilter which implements the checks
|
||||
to make the whatToShow filter attribute work."""
|
||||
|
||||
__slots__ = 'filter',
|
||||
|
||||
def __init__(self, filter):
|
||||
self.filter = filter
|
||||
|
||||
def startContainer(self, node):
|
||||
mask = self._nodetype_mask[node.nodeType]
|
||||
if self.filter.whatToShow & mask:
|
||||
val = self.filter.startContainer(node)
|
||||
if val == FILTER_INTERRUPT:
|
||||
raise ParseEscape
|
||||
if val not in _ALLOWED_FILTER_RETURNS:
|
||||
raise ValueError(
|
||||
"startContainer() returned illegal value: " + repr(val))
|
||||
return val
|
||||
else:
|
||||
return FILTER_ACCEPT
|
||||
|
||||
def acceptNode(self, node):
|
||||
mask = self._nodetype_mask[node.nodeType]
|
||||
if self.filter.whatToShow & mask:
|
||||
val = self.filter.acceptNode(node)
|
||||
if val == FILTER_INTERRUPT:
|
||||
raise ParseEscape
|
||||
if val == FILTER_SKIP:
|
||||
# move all child nodes to the parent, and remove this node
|
||||
parent = node.parentNode
|
||||
for child in node.childNodes[:]:
|
||||
parent.appendChild(child)
|
||||
# node is handled by the caller
|
||||
return FILTER_REJECT
|
||||
if val not in _ALLOWED_FILTER_RETURNS:
|
||||
raise ValueError(
|
||||
"acceptNode() returned illegal value: " + repr(val))
|
||||
return val
|
||||
else:
|
||||
return FILTER_ACCEPT
|
||||
|
||||
_nodetype_mask = {
|
||||
Node.ELEMENT_NODE: NodeFilter.SHOW_ELEMENT,
|
||||
Node.ATTRIBUTE_NODE: NodeFilter.SHOW_ATTRIBUTE,
|
||||
Node.TEXT_NODE: NodeFilter.SHOW_TEXT,
|
||||
Node.CDATA_SECTION_NODE: NodeFilter.SHOW_CDATA_SECTION,
|
||||
Node.ENTITY_REFERENCE_NODE: NodeFilter.SHOW_ENTITY_REFERENCE,
|
||||
Node.ENTITY_NODE: NodeFilter.SHOW_ENTITY,
|
||||
Node.PROCESSING_INSTRUCTION_NODE: NodeFilter.SHOW_PROCESSING_INSTRUCTION,
|
||||
Node.COMMENT_NODE: NodeFilter.SHOW_COMMENT,
|
||||
Node.DOCUMENT_NODE: NodeFilter.SHOW_DOCUMENT,
|
||||
Node.DOCUMENT_TYPE_NODE: NodeFilter.SHOW_DOCUMENT_TYPE,
|
||||
Node.DOCUMENT_FRAGMENT_NODE: NodeFilter.SHOW_DOCUMENT_FRAGMENT,
|
||||
Node.NOTATION_NODE: NodeFilter.SHOW_NOTATION,
|
||||
}
|
||||
|
||||
|
||||
class FilterCrutch(object):
|
||||
__slots__ = '_builder', '_level', '_old_start', '_old_end'
|
||||
|
||||
def __init__(self, builder):
|
||||
self._level = 0
|
||||
self._builder = builder
|
||||
parser = builder._parser
|
||||
self._old_start = parser.StartElementHandler
|
||||
self._old_end = parser.EndElementHandler
|
||||
parser.StartElementHandler = self.start_element_handler
|
||||
parser.EndElementHandler = self.end_element_handler
|
||||
|
||||
class Rejecter(FilterCrutch):
|
||||
__slots__ = ()
|
||||
|
||||
def __init__(self, builder):
|
||||
FilterCrutch.__init__(self, builder)
|
||||
parser = builder._parser
|
||||
for name in ("ProcessingInstructionHandler",
|
||||
"CommentHandler",
|
||||
"CharacterDataHandler",
|
||||
"StartCdataSectionHandler",
|
||||
"EndCdataSectionHandler",
|
||||
"ExternalEntityRefHandler",
|
||||
):
|
||||
setattr(parser, name, None)
|
||||
|
||||
def start_element_handler(self, *args):
|
||||
self._level = self._level + 1
|
||||
|
||||
def end_element_handler(self, *args):
|
||||
if self._level == 0:
|
||||
# restore the old handlers
|
||||
parser = self._builder._parser
|
||||
self._builder.install(parser)
|
||||
parser.StartElementHandler = self._old_start
|
||||
parser.EndElementHandler = self._old_end
|
||||
else:
|
||||
self._level = self._level - 1
|
||||
|
||||
class Skipper(FilterCrutch):
|
||||
__slots__ = ()
|
||||
|
||||
def start_element_handler(self, *args):
|
||||
node = self._builder.curNode
|
||||
self._old_start(*args)
|
||||
if self._builder.curNode is not node:
|
||||
self._level = self._level + 1
|
||||
|
||||
def end_element_handler(self, *args):
|
||||
if self._level == 0:
|
||||
# We're popping back out of the node we're skipping, so we
|
||||
# shouldn't need to do anything but reset the handlers.
|
||||
self._builder._parser.StartElementHandler = self._old_start
|
||||
self._builder._parser.EndElementHandler = self._old_end
|
||||
self._builder = None
|
||||
else:
|
||||
self._level = self._level - 1
|
||||
self._old_end(*args)
|
||||
|
||||
|
||||
# framework document used by the fragment builder.
|
||||
# Takes a string for the doctype, subset string, and namespace attrs string.
|
||||
|
||||
_FRAGMENT_BUILDER_INTERNAL_SYSTEM_ID = \
|
||||
"http://xml.python.org/entities/fragment-builder/internal"
|
||||
|
||||
_FRAGMENT_BUILDER_TEMPLATE = (
|
||||
'''\
|
||||
<!DOCTYPE wrapper
|
||||
%%s [
|
||||
<!ENTITY fragment-builder-internal
|
||||
SYSTEM "%s">
|
||||
%%s
|
||||
]>
|
||||
<wrapper %%s
|
||||
>&fragment-builder-internal;</wrapper>'''
|
||||
% _FRAGMENT_BUILDER_INTERNAL_SYSTEM_ID)
|
||||
|
||||
|
||||
class FragmentBuilder(ExpatBuilder):
|
||||
"""Builder which constructs document fragments given XML source
|
||||
text and a context node.
|
||||
|
||||
The context node is expected to provide information about the
|
||||
namespace declarations which are in scope at the start of the
|
||||
fragment.
|
||||
"""
|
||||
|
||||
def __init__(self, context, options=None):
|
||||
if context.nodeType == DOCUMENT_NODE:
|
||||
self.originalDocument = context
|
||||
self.context = context
|
||||
else:
|
||||
self.originalDocument = context.ownerDocument
|
||||
self.context = context
|
||||
ExpatBuilder.__init__(self, options)
|
||||
|
||||
def reset(self):
|
||||
ExpatBuilder.reset(self)
|
||||
self.fragment = None
|
||||
|
||||
def parseFile(self, file):
|
||||
"""Parse a document fragment from a file object, returning the
|
||||
fragment node."""
|
||||
return self.parseString(file.read())
|
||||
|
||||
def parseString(self, string):
|
||||
"""Parse a document fragment from a string, returning the
|
||||
fragment node."""
|
||||
self._source = string
|
||||
parser = self.getParser()
|
||||
doctype = self.originalDocument.doctype
|
||||
ident = ""
|
||||
if doctype:
|
||||
subset = doctype.internalSubset or self._getDeclarations()
|
||||
if doctype.publicId:
|
||||
ident = ('PUBLIC "%s" "%s"'
|
||||
% (doctype.publicId, doctype.systemId))
|
||||
elif doctype.systemId:
|
||||
ident = 'SYSTEM "%s"' % doctype.systemId
|
||||
else:
|
||||
subset = ""
|
||||
nsattrs = self._getNSattrs() # get ns decls from node's ancestors
|
||||
document = _FRAGMENT_BUILDER_TEMPLATE % (ident, subset, nsattrs)
|
||||
try:
|
||||
parser.Parse(document, 1)
|
||||
except:
|
||||
self.reset()
|
||||
raise
|
||||
fragment = self.fragment
|
||||
self.reset()
|
||||
## self._parser = None
|
||||
return fragment
|
||||
|
||||
def _getDeclarations(self):
|
||||
"""Re-create the internal subset from the DocumentType node.
|
||||
|
||||
This is only needed if we don't already have the
|
||||
internalSubset as a string.
|
||||
"""
|
||||
doctype = self.context.ownerDocument.doctype
|
||||
s = ""
|
||||
if doctype:
|
||||
for i in range(doctype.notations.length):
|
||||
notation = doctype.notations.item(i)
|
||||
if s:
|
||||
s = s + "\n "
|
||||
s = "%s<!NOTATION %s" % (s, notation.nodeName)
|
||||
if notation.publicId:
|
||||
s = '%s PUBLIC "%s"\n "%s">' \
|
||||
% (s, notation.publicId, notation.systemId)
|
||||
else:
|
||||
s = '%s SYSTEM "%s">' % (s, notation.systemId)
|
||||
for i in range(doctype.entities.length):
|
||||
entity = doctype.entities.item(i)
|
||||
if s:
|
||||
s = s + "\n "
|
||||
s = "%s<!ENTITY %s" % (s, entity.nodeName)
|
||||
if entity.publicId:
|
||||
s = '%s PUBLIC "%s"\n "%s"' \
|
||||
% (s, entity.publicId, entity.systemId)
|
||||
elif entity.systemId:
|
||||
s = '%s SYSTEM "%s"' % (s, entity.systemId)
|
||||
else:
|
||||
s = '%s "%s"' % (s, entity.firstChild.data)
|
||||
if entity.notationName:
|
||||
s = "%s NOTATION %s" % (s, entity.notationName)
|
||||
s = s + ">"
|
||||
return s
|
||||
|
||||
def _getNSattrs(self):
|
||||
return ""
|
||||
|
||||
def external_entity_ref_handler(self, context, base, systemId, publicId):
|
||||
if systemId == _FRAGMENT_BUILDER_INTERNAL_SYSTEM_ID:
|
||||
# this entref is the one that we made to put the subtree
|
||||
# in; all of our given input is parsed in here.
|
||||
old_document = self.document
|
||||
old_cur_node = self.curNode
|
||||
parser = self._parser.ExternalEntityParserCreate(context)
|
||||
# put the real document back, parse into the fragment to return
|
||||
self.document = self.originalDocument
|
||||
self.fragment = self.document.createDocumentFragment()
|
||||
self.curNode = self.fragment
|
||||
try:
|
||||
parser.Parse(self._source, 1)
|
||||
finally:
|
||||
self.curNode = old_cur_node
|
||||
self.document = old_document
|
||||
self._source = None
|
||||
return -1
|
||||
else:
|
||||
return ExpatBuilder.external_entity_ref_handler(
|
||||
self, context, base, systemId, publicId)
|
||||
|
||||
|
||||
class Namespaces:
|
||||
"""Mix-in class for builders; adds support for namespaces."""
|
||||
|
||||
def _initNamespaces(self):
|
||||
# list of (prefix, uri) ns declarations. Namespace attrs are
|
||||
# constructed from this and added to the element's attrs.
|
||||
self._ns_ordered_prefixes = []
|
||||
|
||||
def createParser(self):
|
||||
"""Create a new namespace-handling parser."""
|
||||
parser = expat.ParserCreate(namespace_separator=" ")
|
||||
parser.namespace_prefixes = True
|
||||
return parser
|
||||
|
||||
def install(self, parser):
|
||||
"""Insert the namespace-handlers onto the parser."""
|
||||
ExpatBuilder.install(self, parser)
|
||||
if self._options.namespace_declarations:
|
||||
parser.StartNamespaceDeclHandler = (
|
||||
self.start_namespace_decl_handler)
|
||||
|
||||
def start_namespace_decl_handler(self, prefix, uri):
|
||||
"""Push this namespace declaration on our storage."""
|
||||
self._ns_ordered_prefixes.append((prefix, uri))
|
||||
|
||||
def start_element_handler(self, name, attributes):
|
||||
if ' ' in name:
|
||||
uri, localname, prefix, qname = _parse_ns_name(self, name)
|
||||
else:
|
||||
uri = EMPTY_NAMESPACE
|
||||
qname = name
|
||||
localname = None
|
||||
prefix = EMPTY_PREFIX
|
||||
node = minidom.Element(qname, uri, prefix, localname)
|
||||
node.ownerDocument = self.document
|
||||
_append_child(self.curNode, node)
|
||||
self.curNode = node
|
||||
|
||||
if self._ns_ordered_prefixes:
|
||||
for prefix, uri in self._ns_ordered_prefixes:
|
||||
if prefix:
|
||||
a = minidom.Attr(_intern(self, 'xmlns:' + prefix),
|
||||
XMLNS_NAMESPACE, prefix, "xmlns")
|
||||
else:
|
||||
a = minidom.Attr("xmlns", XMLNS_NAMESPACE,
|
||||
"xmlns", EMPTY_PREFIX)
|
||||
a.value = uri
|
||||
a.ownerDocument = self.document
|
||||
_set_attribute_node(node, a)
|
||||
del self._ns_ordered_prefixes[:]
|
||||
|
||||
if attributes:
|
||||
node._ensure_attributes()
|
||||
_attrs = node._attrs
|
||||
_attrsNS = node._attrsNS
|
||||
for i in range(0, len(attributes), 2):
|
||||
aname = attributes[i]
|
||||
value = attributes[i+1]
|
||||
if ' ' in aname:
|
||||
uri, localname, prefix, qname = _parse_ns_name(self, aname)
|
||||
a = minidom.Attr(qname, uri, localname, prefix)
|
||||
_attrs[qname] = a
|
||||
_attrsNS[(uri, localname)] = a
|
||||
else:
|
||||
a = minidom.Attr(aname, EMPTY_NAMESPACE,
|
||||
aname, EMPTY_PREFIX)
|
||||
_attrs[aname] = a
|
||||
_attrsNS[(EMPTY_NAMESPACE, aname)] = a
|
||||
a.ownerDocument = self.document
|
||||
a.value = value
|
||||
a.ownerElement = node
|
||||
|
||||
if __debug__:
|
||||
# This only adds some asserts to the original
|
||||
# end_element_handler(), so we only define this when -O is not
|
||||
# used. If changing one, be sure to check the other to see if
|
||||
# it needs to be changed as well.
|
||||
#
|
||||
def end_element_handler(self, name):
|
||||
curNode = self.curNode
|
||||
if ' ' in name:
|
||||
uri, localname, prefix, qname = _parse_ns_name(self, name)
|
||||
assert (curNode.namespaceURI == uri
|
||||
and curNode.localName == localname
|
||||
and curNode.prefix == prefix), \
|
||||
"element stack messed up! (namespace)"
|
||||
else:
|
||||
assert curNode.nodeName == name, \
|
||||
"element stack messed up - bad nodeName"
|
||||
assert curNode.namespaceURI == EMPTY_NAMESPACE, \
|
||||
"element stack messed up - bad namespaceURI"
|
||||
self.curNode = curNode.parentNode
|
||||
self._finish_end_element(curNode)
|
||||
|
||||
|
||||
class ExpatBuilderNS(Namespaces, ExpatBuilder):
|
||||
"""Document builder that supports namespaces."""
|
||||
|
||||
def reset(self):
|
||||
ExpatBuilder.reset(self)
|
||||
self._initNamespaces()
|
||||
|
||||
|
||||
class FragmentBuilderNS(Namespaces, FragmentBuilder):
|
||||
"""Fragment builder that supports namespaces."""
|
||||
|
||||
def reset(self):
|
||||
FragmentBuilder.reset(self)
|
||||
self._initNamespaces()
|
||||
|
||||
def _getNSattrs(self):
|
||||
"""Return string of namespace attributes from this element and
|
||||
ancestors."""
|
||||
# XXX This needs to be re-written to walk the ancestors of the
|
||||
# context to build up the namespace information from
|
||||
# declarations, elements, and attributes found in context.
|
||||
# Otherwise we have to store a bunch more data on the DOM
|
||||
# (though that *might* be more reliable -- not clear).
|
||||
attrs = ""
|
||||
context = self.context
|
||||
L = []
|
||||
while context:
|
||||
if hasattr(context, '_ns_prefix_uri'):
|
||||
for prefix, uri in context._ns_prefix_uri.items():
|
||||
# add every new NS decl from context to L and attrs string
|
||||
if prefix in L:
|
||||
continue
|
||||
L.append(prefix)
|
||||
if prefix:
|
||||
declname = "xmlns:" + prefix
|
||||
else:
|
||||
declname = "xmlns"
|
||||
if attrs:
|
||||
attrs = "%s\n %s='%s'" % (attrs, declname, uri)
|
||||
else:
|
||||
attrs = " %s='%s'" % (declname, uri)
|
||||
context = context.parentNode
|
||||
return attrs
|
||||
|
||||
|
||||
class ParseEscape(Exception):
|
||||
"""Exception raised to short-circuit parsing in InternalSubsetExtractor."""
|
||||
pass
|
||||
|
||||
class InternalSubsetExtractor(ExpatBuilder):
|
||||
"""XML processor which can rip out the internal document type subset."""
|
||||
|
||||
subset = None
|
||||
|
||||
def getSubset(self):
|
||||
"""Return the internal subset as a string."""
|
||||
return self.subset
|
||||
|
||||
def parseFile(self, file):
|
||||
try:
|
||||
ExpatBuilder.parseFile(self, file)
|
||||
except ParseEscape:
|
||||
pass
|
||||
|
||||
def parseString(self, string):
|
||||
try:
|
||||
ExpatBuilder.parseString(self, string)
|
||||
except ParseEscape:
|
||||
pass
|
||||
|
||||
def install(self, parser):
|
||||
parser.StartDoctypeDeclHandler = self.start_doctype_decl_handler
|
||||
parser.StartElementHandler = self.start_element_handler
|
||||
|
||||
def start_doctype_decl_handler(self, name, publicId, systemId,
|
||||
has_internal_subset):
|
||||
if has_internal_subset:
|
||||
parser = self.getParser()
|
||||
self.subset = []
|
||||
parser.DefaultHandler = self.subset.append
|
||||
parser.EndDoctypeDeclHandler = self.end_doctype_decl_handler
|
||||
else:
|
||||
raise ParseEscape()
|
||||
|
||||
def end_doctype_decl_handler(self):
|
||||
s = ''.join(self.subset).replace('\r\n', '\n').replace('\r', '\n')
|
||||
self.subset = s
|
||||
raise ParseEscape()
|
||||
|
||||
def start_element_handler(self, name, attrs):
|
||||
raise ParseEscape()
|
||||
|
||||
|
||||
def parse(file, namespaces=True):
|
||||
"""Parse a document, returning the resulting Document node.
|
||||
|
||||
'file' may be either a file name or an open file object.
|
||||
"""
|
||||
if namespaces:
|
||||
builder = ExpatBuilderNS()
|
||||
else:
|
||||
builder = ExpatBuilder()
|
||||
|
||||
if isinstance(file, str):
|
||||
with open(file, 'rb') as fp:
|
||||
result = builder.parseFile(fp)
|
||||
else:
|
||||
result = builder.parseFile(file)
|
||||
return result
|
||||
|
||||
|
||||
def parseString(string, namespaces=True):
|
||||
"""Parse a document from a string, returning the resulting
|
||||
Document node.
|
||||
"""
|
||||
if namespaces:
|
||||
builder = ExpatBuilderNS()
|
||||
else:
|
||||
builder = ExpatBuilder()
|
||||
return builder.parseString(string)
|
||||
|
||||
|
||||
def parseFragment(file, context, namespaces=True):
|
||||
"""Parse a fragment of a document, given the context from which it
|
||||
was originally extracted. context should be the parent of the
|
||||
node(s) which are in the fragment.
|
||||
|
||||
'file' may be either a file name or an open file object.
|
||||
"""
|
||||
if namespaces:
|
||||
builder = FragmentBuilderNS(context)
|
||||
else:
|
||||
builder = FragmentBuilder(context)
|
||||
|
||||
if isinstance(file, str):
|
||||
with open(file, 'rb') as fp:
|
||||
result = builder.parseFile(fp)
|
||||
else:
|
||||
result = builder.parseFile(file)
|
||||
return result
|
||||
|
||||
|
||||
def parseFragmentString(string, context, namespaces=True):
|
||||
"""Parse a fragment of a document from a string, given the context
|
||||
from which it was originally extracted. context should be the
|
||||
parent of the node(s) which are in the fragment.
|
||||
"""
|
||||
if namespaces:
|
||||
builder = FragmentBuilderNS(context)
|
||||
else:
|
||||
builder = FragmentBuilder(context)
|
||||
return builder.parseString(string)
|
||||
|
||||
|
||||
def makeBuilder(options):
|
||||
"""Create a builder based on an Options object."""
|
||||
if options.namespaces:
|
||||
return ExpatBuilderNS(options)
|
||||
else:
|
||||
return ExpatBuilder(options)
|
109
third_party/python/Lib/xml/dom/minicompat.py
vendored
Normal file
109
third_party/python/Lib/xml/dom/minicompat.py
vendored
Normal file
|
@ -0,0 +1,109 @@
|
|||
"""Python version compatibility support for minidom.
|
||||
|
||||
This module contains internal implementation details and
|
||||
should not be imported; use xml.dom.minidom instead.
|
||||
"""
|
||||
|
||||
# This module should only be imported using "import *".
|
||||
#
|
||||
# The following names are defined:
|
||||
#
|
||||
# NodeList -- lightest possible NodeList implementation
|
||||
#
|
||||
# EmptyNodeList -- lightest possible NodeList that is guaranteed to
|
||||
# remain empty (immutable)
|
||||
#
|
||||
# StringTypes -- tuple of defined string types
|
||||
#
|
||||
# defproperty -- function used in conjunction with GetattrMagic;
|
||||
# using these together is needed to make them work
|
||||
# as efficiently as possible in both Python 2.2+
|
||||
# and older versions. For example:
|
||||
#
|
||||
# class MyClass(GetattrMagic):
|
||||
# def _get_myattr(self):
|
||||
# return something
|
||||
#
|
||||
# defproperty(MyClass, "myattr",
|
||||
# "return some value")
|
||||
#
|
||||
# For Python 2.2 and newer, this will construct a
|
||||
# property object on the class, which avoids
|
||||
# needing to override __getattr__(). It will only
|
||||
# work for read-only attributes.
|
||||
#
|
||||
# For older versions of Python, inheriting from
|
||||
# GetattrMagic will use the traditional
|
||||
# __getattr__() hackery to achieve the same effect,
|
||||
# but less efficiently.
|
||||
#
|
||||
# defproperty() should be used for each version of
|
||||
# the relevant _get_<property>() function.
|
||||
|
||||
__all__ = ["NodeList", "EmptyNodeList", "StringTypes", "defproperty"]
|
||||
|
||||
import xml.dom
|
||||
|
||||
StringTypes = (str,)
|
||||
|
||||
|
||||
class NodeList(list):
|
||||
__slots__ = ()
|
||||
|
||||
def item(self, index):
|
||||
if 0 <= index < len(self):
|
||||
return self[index]
|
||||
|
||||
def _get_length(self):
|
||||
return len(self)
|
||||
|
||||
def _set_length(self, value):
|
||||
raise xml.dom.NoModificationAllowedErr(
|
||||
"attempt to modify read-only attribute 'length'")
|
||||
|
||||
length = property(_get_length, _set_length,
|
||||
doc="The number of nodes in the NodeList.")
|
||||
|
||||
# For backward compatibility
|
||||
def __setstate__(self, state):
|
||||
if state is None:
|
||||
state = []
|
||||
self[:] = state
|
||||
|
||||
|
||||
class EmptyNodeList(tuple):
|
||||
__slots__ = ()
|
||||
|
||||
def __add__(self, other):
|
||||
NL = NodeList()
|
||||
NL.extend(other)
|
||||
return NL
|
||||
|
||||
def __radd__(self, other):
|
||||
NL = NodeList()
|
||||
NL.extend(other)
|
||||
return NL
|
||||
|
||||
def item(self, index):
|
||||
return None
|
||||
|
||||
def _get_length(self):
|
||||
return 0
|
||||
|
||||
def _set_length(self, value):
|
||||
raise xml.dom.NoModificationAllowedErr(
|
||||
"attempt to modify read-only attribute 'length'")
|
||||
|
||||
length = property(_get_length, _set_length,
|
||||
doc="The number of nodes in the NodeList.")
|
||||
|
||||
|
||||
def defproperty(klass, name, doc):
|
||||
get = getattr(klass, ("_get_" + name))
|
||||
def set(self, value, name=name):
|
||||
raise xml.dom.NoModificationAllowedErr(
|
||||
"attempt to modify read-only attribute " + repr(name))
|
||||
assert not hasattr(klass, "_set_" + name), \
|
||||
"expected not to find _set_" + name
|
||||
prop = property(get, set, doc=doc)
|
||||
setattr(klass, name, prop)
|
1981
third_party/python/Lib/xml/dom/minidom.py
vendored
Normal file
1981
third_party/python/Lib/xml/dom/minidom.py
vendored
Normal file
File diff suppressed because it is too large
Load diff
342
third_party/python/Lib/xml/dom/pulldom.py
vendored
Normal file
342
third_party/python/Lib/xml/dom/pulldom.py
vendored
Normal file
|
@ -0,0 +1,342 @@
|
|||
import xml.sax
|
||||
import xml.sax.handler
|
||||
|
||||
START_ELEMENT = "START_ELEMENT"
|
||||
END_ELEMENT = "END_ELEMENT"
|
||||
COMMENT = "COMMENT"
|
||||
START_DOCUMENT = "START_DOCUMENT"
|
||||
END_DOCUMENT = "END_DOCUMENT"
|
||||
PROCESSING_INSTRUCTION = "PROCESSING_INSTRUCTION"
|
||||
IGNORABLE_WHITESPACE = "IGNORABLE_WHITESPACE"
|
||||
CHARACTERS = "CHARACTERS"
|
||||
|
||||
class PullDOM(xml.sax.ContentHandler):
|
||||
_locator = None
|
||||
document = None
|
||||
|
||||
def __init__(self, documentFactory=None):
|
||||
from xml.dom import XML_NAMESPACE
|
||||
self.documentFactory = documentFactory
|
||||
self.firstEvent = [None, None]
|
||||
self.lastEvent = self.firstEvent
|
||||
self.elementStack = []
|
||||
self.push = self.elementStack.append
|
||||
try:
|
||||
self.pop = self.elementStack.pop
|
||||
except AttributeError:
|
||||
# use class' pop instead
|
||||
pass
|
||||
self._ns_contexts = [{XML_NAMESPACE:'xml'}] # contains uri -> prefix dicts
|
||||
self._current_context = self._ns_contexts[-1]
|
||||
self.pending_events = []
|
||||
|
||||
def pop(self):
|
||||
result = self.elementStack[-1]
|
||||
del self.elementStack[-1]
|
||||
return result
|
||||
|
||||
def setDocumentLocator(self, locator):
|
||||
self._locator = locator
|
||||
|
||||
def startPrefixMapping(self, prefix, uri):
|
||||
if not hasattr(self, '_xmlns_attrs'):
|
||||
self._xmlns_attrs = []
|
||||
self._xmlns_attrs.append((prefix or 'xmlns', uri))
|
||||
self._ns_contexts.append(self._current_context.copy())
|
||||
self._current_context[uri] = prefix or None
|
||||
|
||||
def endPrefixMapping(self, prefix):
|
||||
self._current_context = self._ns_contexts.pop()
|
||||
|
||||
def startElementNS(self, name, tagName , attrs):
|
||||
# Retrieve xml namespace declaration attributes.
|
||||
xmlns_uri = 'http://www.w3.org/2000/xmlns/'
|
||||
xmlns_attrs = getattr(self, '_xmlns_attrs', None)
|
||||
if xmlns_attrs is not None:
|
||||
for aname, value in xmlns_attrs:
|
||||
attrs._attrs[(xmlns_uri, aname)] = value
|
||||
self._xmlns_attrs = []
|
||||
uri, localname = name
|
||||
if uri:
|
||||
# When using namespaces, the reader may or may not
|
||||
# provide us with the original name. If not, create
|
||||
# *a* valid tagName from the current context.
|
||||
if tagName is None:
|
||||
prefix = self._current_context[uri]
|
||||
if prefix:
|
||||
tagName = prefix + ":" + localname
|
||||
else:
|
||||
tagName = localname
|
||||
if self.document:
|
||||
node = self.document.createElementNS(uri, tagName)
|
||||
else:
|
||||
node = self.buildDocument(uri, tagName)
|
||||
else:
|
||||
# When the tagname is not prefixed, it just appears as
|
||||
# localname
|
||||
if self.document:
|
||||
node = self.document.createElement(localname)
|
||||
else:
|
||||
node = self.buildDocument(None, localname)
|
||||
|
||||
for aname,value in attrs.items():
|
||||
a_uri, a_localname = aname
|
||||
if a_uri == xmlns_uri:
|
||||
if a_localname == 'xmlns':
|
||||
qname = a_localname
|
||||
else:
|
||||
qname = 'xmlns:' + a_localname
|
||||
attr = self.document.createAttributeNS(a_uri, qname)
|
||||
node.setAttributeNodeNS(attr)
|
||||
elif a_uri:
|
||||
prefix = self._current_context[a_uri]
|
||||
if prefix:
|
||||
qname = prefix + ":" + a_localname
|
||||
else:
|
||||
qname = a_localname
|
||||
attr = self.document.createAttributeNS(a_uri, qname)
|
||||
node.setAttributeNodeNS(attr)
|
||||
else:
|
||||
attr = self.document.createAttribute(a_localname)
|
||||
node.setAttributeNode(attr)
|
||||
attr.value = value
|
||||
|
||||
self.lastEvent[1] = [(START_ELEMENT, node), None]
|
||||
self.lastEvent = self.lastEvent[1]
|
||||
self.push(node)
|
||||
|
||||
def endElementNS(self, name, tagName):
|
||||
self.lastEvent[1] = [(END_ELEMENT, self.pop()), None]
|
||||
self.lastEvent = self.lastEvent[1]
|
||||
|
||||
def startElement(self, name, attrs):
|
||||
if self.document:
|
||||
node = self.document.createElement(name)
|
||||
else:
|
||||
node = self.buildDocument(None, name)
|
||||
|
||||
for aname,value in attrs.items():
|
||||
attr = self.document.createAttribute(aname)
|
||||
attr.value = value
|
||||
node.setAttributeNode(attr)
|
||||
|
||||
self.lastEvent[1] = [(START_ELEMENT, node), None]
|
||||
self.lastEvent = self.lastEvent[1]
|
||||
self.push(node)
|
||||
|
||||
def endElement(self, name):
|
||||
self.lastEvent[1] = [(END_ELEMENT, self.pop()), None]
|
||||
self.lastEvent = self.lastEvent[1]
|
||||
|
||||
def comment(self, s):
|
||||
if self.document:
|
||||
node = self.document.createComment(s)
|
||||
self.lastEvent[1] = [(COMMENT, node), None]
|
||||
self.lastEvent = self.lastEvent[1]
|
||||
else:
|
||||
event = [(COMMENT, s), None]
|
||||
self.pending_events.append(event)
|
||||
|
||||
def processingInstruction(self, target, data):
|
||||
if self.document:
|
||||
node = self.document.createProcessingInstruction(target, data)
|
||||
self.lastEvent[1] = [(PROCESSING_INSTRUCTION, node), None]
|
||||
self.lastEvent = self.lastEvent[1]
|
||||
else:
|
||||
event = [(PROCESSING_INSTRUCTION, target, data), None]
|
||||
self.pending_events.append(event)
|
||||
|
||||
def ignorableWhitespace(self, chars):
|
||||
node = self.document.createTextNode(chars)
|
||||
self.lastEvent[1] = [(IGNORABLE_WHITESPACE, node), None]
|
||||
self.lastEvent = self.lastEvent[1]
|
||||
|
||||
def characters(self, chars):
|
||||
node = self.document.createTextNode(chars)
|
||||
self.lastEvent[1] = [(CHARACTERS, node), None]
|
||||
self.lastEvent = self.lastEvent[1]
|
||||
|
||||
def startDocument(self):
|
||||
if self.documentFactory is None:
|
||||
import xml.dom.minidom
|
||||
self.documentFactory = xml.dom.minidom.Document.implementation
|
||||
|
||||
def buildDocument(self, uri, tagname):
|
||||
# Can't do that in startDocument, since we need the tagname
|
||||
# XXX: obtain DocumentType
|
||||
node = self.documentFactory.createDocument(uri, tagname, None)
|
||||
self.document = node
|
||||
self.lastEvent[1] = [(START_DOCUMENT, node), None]
|
||||
self.lastEvent = self.lastEvent[1]
|
||||
self.push(node)
|
||||
# Put everything we have seen so far into the document
|
||||
for e in self.pending_events:
|
||||
if e[0][0] == PROCESSING_INSTRUCTION:
|
||||
_,target,data = e[0]
|
||||
n = self.document.createProcessingInstruction(target, data)
|
||||
e[0] = (PROCESSING_INSTRUCTION, n)
|
||||
elif e[0][0] == COMMENT:
|
||||
n = self.document.createComment(e[0][1])
|
||||
e[0] = (COMMENT, n)
|
||||
else:
|
||||
raise AssertionError("Unknown pending event ",e[0][0])
|
||||
self.lastEvent[1] = e
|
||||
self.lastEvent = e
|
||||
self.pending_events = None
|
||||
return node.firstChild
|
||||
|
||||
def endDocument(self):
|
||||
self.lastEvent[1] = [(END_DOCUMENT, self.document), None]
|
||||
self.pop()
|
||||
|
||||
def clear(self):
|
||||
"clear(): Explicitly release parsing structures"
|
||||
self.document = None
|
||||
|
||||
class ErrorHandler:
|
||||
def warning(self, exception):
|
||||
print(exception)
|
||||
def error(self, exception):
|
||||
raise exception
|
||||
def fatalError(self, exception):
|
||||
raise exception
|
||||
|
||||
class DOMEventStream:
|
||||
def __init__(self, stream, parser, bufsize):
|
||||
self.stream = stream
|
||||
self.parser = parser
|
||||
self.bufsize = bufsize
|
||||
if not hasattr(self.parser, 'feed'):
|
||||
self.getEvent = self._slurp
|
||||
self.reset()
|
||||
|
||||
def reset(self):
|
||||
self.pulldom = PullDOM()
|
||||
# This content handler relies on namespace support
|
||||
self.parser.setFeature(xml.sax.handler.feature_namespaces, 1)
|
||||
self.parser.setContentHandler(self.pulldom)
|
||||
|
||||
def __getitem__(self, pos):
|
||||
rc = self.getEvent()
|
||||
if rc:
|
||||
return rc
|
||||
raise IndexError
|
||||
|
||||
def __next__(self):
|
||||
rc = self.getEvent()
|
||||
if rc:
|
||||
return rc
|
||||
raise StopIteration
|
||||
|
||||
def __iter__(self):
|
||||
return self
|
||||
|
||||
def expandNode(self, node):
|
||||
event = self.getEvent()
|
||||
parents = [node]
|
||||
while event:
|
||||
token, cur_node = event
|
||||
if cur_node is node:
|
||||
return
|
||||
if token != END_ELEMENT:
|
||||
parents[-1].appendChild(cur_node)
|
||||
if token == START_ELEMENT:
|
||||
parents.append(cur_node)
|
||||
elif token == END_ELEMENT:
|
||||
del parents[-1]
|
||||
event = self.getEvent()
|
||||
|
||||
def getEvent(self):
|
||||
# use IncrementalParser interface, so we get the desired
|
||||
# pull effect
|
||||
if not self.pulldom.firstEvent[1]:
|
||||
self.pulldom.lastEvent = self.pulldom.firstEvent
|
||||
while not self.pulldom.firstEvent[1]:
|
||||
buf = self.stream.read(self.bufsize)
|
||||
if not buf:
|
||||
self.parser.close()
|
||||
return None
|
||||
self.parser.feed(buf)
|
||||
rc = self.pulldom.firstEvent[1][0]
|
||||
self.pulldom.firstEvent[1] = self.pulldom.firstEvent[1][1]
|
||||
return rc
|
||||
|
||||
def _slurp(self):
|
||||
""" Fallback replacement for getEvent() using the
|
||||
standard SAX2 interface, which means we slurp the
|
||||
SAX events into memory (no performance gain, but
|
||||
we are compatible to all SAX parsers).
|
||||
"""
|
||||
self.parser.parse(self.stream)
|
||||
self.getEvent = self._emit
|
||||
return self._emit()
|
||||
|
||||
def _emit(self):
|
||||
""" Fallback replacement for getEvent() that emits
|
||||
the events that _slurp() read previously.
|
||||
"""
|
||||
rc = self.pulldom.firstEvent[1][0]
|
||||
self.pulldom.firstEvent[1] = self.pulldom.firstEvent[1][1]
|
||||
return rc
|
||||
|
||||
def clear(self):
|
||||
"""clear(): Explicitly release parsing objects"""
|
||||
self.pulldom.clear()
|
||||
del self.pulldom
|
||||
self.parser = None
|
||||
self.stream = None
|
||||
|
||||
class SAX2DOM(PullDOM):
|
||||
|
||||
def startElementNS(self, name, tagName , attrs):
|
||||
PullDOM.startElementNS(self, name, tagName, attrs)
|
||||
curNode = self.elementStack[-1]
|
||||
parentNode = self.elementStack[-2]
|
||||
parentNode.appendChild(curNode)
|
||||
|
||||
def startElement(self, name, attrs):
|
||||
PullDOM.startElement(self, name, attrs)
|
||||
curNode = self.elementStack[-1]
|
||||
parentNode = self.elementStack[-2]
|
||||
parentNode.appendChild(curNode)
|
||||
|
||||
def processingInstruction(self, target, data):
|
||||
PullDOM.processingInstruction(self, target, data)
|
||||
node = self.lastEvent[0][1]
|
||||
parentNode = self.elementStack[-1]
|
||||
parentNode.appendChild(node)
|
||||
|
||||
def ignorableWhitespace(self, chars):
|
||||
PullDOM.ignorableWhitespace(self, chars)
|
||||
node = self.lastEvent[0][1]
|
||||
parentNode = self.elementStack[-1]
|
||||
parentNode.appendChild(node)
|
||||
|
||||
def characters(self, chars):
|
||||
PullDOM.characters(self, chars)
|
||||
node = self.lastEvent[0][1]
|
||||
parentNode = self.elementStack[-1]
|
||||
parentNode.appendChild(node)
|
||||
|
||||
|
||||
default_bufsize = (2 ** 14) - 20
|
||||
|
||||
def parse(stream_or_string, parser=None, bufsize=None):
|
||||
if bufsize is None:
|
||||
bufsize = default_bufsize
|
||||
if isinstance(stream_or_string, str):
|
||||
stream = open(stream_or_string, 'rb')
|
||||
else:
|
||||
stream = stream_or_string
|
||||
if not parser:
|
||||
parser = xml.sax.make_parser()
|
||||
return DOMEventStream(stream, parser, bufsize)
|
||||
|
||||
def parseString(string, parser=None):
|
||||
from io import StringIO
|
||||
|
||||
bufsize = len(string)
|
||||
buf = StringIO(string)
|
||||
if not parser:
|
||||
parser = xml.sax.make_parser()
|
||||
return DOMEventStream(buf, parser, bufsize)
|
410
third_party/python/Lib/xml/dom/xmlbuilder.py
vendored
Normal file
410
third_party/python/Lib/xml/dom/xmlbuilder.py
vendored
Normal file
|
@ -0,0 +1,410 @@
|
|||
"""Implementation of the DOM Level 3 'LS-Load' feature."""
|
||||
|
||||
import copy
|
||||
import warnings
|
||||
import xml.dom
|
||||
|
||||
from xml.dom.NodeFilter import NodeFilter
|
||||
|
||||
|
||||
__all__ = ["DOMBuilder", "DOMEntityResolver", "DOMInputSource"]
|
||||
|
||||
|
||||
class Options:
|
||||
"""Features object that has variables set for each DOMBuilder feature.
|
||||
|
||||
The DOMBuilder class uses an instance of this class to pass settings to
|
||||
the ExpatBuilder class.
|
||||
"""
|
||||
|
||||
# Note that the DOMBuilder class in LoadSave constrains which of these
|
||||
# values can be set using the DOM Level 3 LoadSave feature.
|
||||
|
||||
namespaces = 1
|
||||
namespace_declarations = True
|
||||
validation = False
|
||||
external_parameter_entities = True
|
||||
external_general_entities = True
|
||||
external_dtd_subset = True
|
||||
validate_if_schema = False
|
||||
validate = False
|
||||
datatype_normalization = False
|
||||
create_entity_ref_nodes = True
|
||||
entities = True
|
||||
whitespace_in_element_content = True
|
||||
cdata_sections = True
|
||||
comments = True
|
||||
charset_overrides_xml_encoding = True
|
||||
infoset = False
|
||||
supported_mediatypes_only = False
|
||||
|
||||
errorHandler = None
|
||||
filter = None
|
||||
|
||||
|
||||
class DOMBuilder:
|
||||
entityResolver = None
|
||||
errorHandler = None
|
||||
filter = None
|
||||
|
||||
ACTION_REPLACE = 1
|
||||
ACTION_APPEND_AS_CHILDREN = 2
|
||||
ACTION_INSERT_AFTER = 3
|
||||
ACTION_INSERT_BEFORE = 4
|
||||
|
||||
_legal_actions = (ACTION_REPLACE, ACTION_APPEND_AS_CHILDREN,
|
||||
ACTION_INSERT_AFTER, ACTION_INSERT_BEFORE)
|
||||
|
||||
def __init__(self):
|
||||
self._options = Options()
|
||||
|
||||
def _get_entityResolver(self):
|
||||
return self.entityResolver
|
||||
def _set_entityResolver(self, entityResolver):
|
||||
self.entityResolver = entityResolver
|
||||
|
||||
def _get_errorHandler(self):
|
||||
return self.errorHandler
|
||||
def _set_errorHandler(self, errorHandler):
|
||||
self.errorHandler = errorHandler
|
||||
|
||||
def _get_filter(self):
|
||||
return self.filter
|
||||
def _set_filter(self, filter):
|
||||
self.filter = filter
|
||||
|
||||
def setFeature(self, name, state):
|
||||
if self.supportsFeature(name):
|
||||
state = state and 1 or 0
|
||||
try:
|
||||
settings = self._settings[(_name_xform(name), state)]
|
||||
except KeyError:
|
||||
raise xml.dom.NotSupportedErr(
|
||||
"unsupported feature: %r" % (name,))
|
||||
else:
|
||||
for name, value in settings:
|
||||
setattr(self._options, name, value)
|
||||
else:
|
||||
raise xml.dom.NotFoundErr("unknown feature: " + repr(name))
|
||||
|
||||
def supportsFeature(self, name):
|
||||
return hasattr(self._options, _name_xform(name))
|
||||
|
||||
def canSetFeature(self, name, state):
|
||||
key = (_name_xform(name), state and 1 or 0)
|
||||
return key in self._settings
|
||||
|
||||
# This dictionary maps from (feature,value) to a list of
|
||||
# (option,value) pairs that should be set on the Options object.
|
||||
# If a (feature,value) setting is not in this dictionary, it is
|
||||
# not supported by the DOMBuilder.
|
||||
#
|
||||
_settings = {
|
||||
("namespace_declarations", 0): [
|
||||
("namespace_declarations", 0)],
|
||||
("namespace_declarations", 1): [
|
||||
("namespace_declarations", 1)],
|
||||
("validation", 0): [
|
||||
("validation", 0)],
|
||||
("external_general_entities", 0): [
|
||||
("external_general_entities", 0)],
|
||||
("external_general_entities", 1): [
|
||||
("external_general_entities", 1)],
|
||||
("external_parameter_entities", 0): [
|
||||
("external_parameter_entities", 0)],
|
||||
("external_parameter_entities", 1): [
|
||||
("external_parameter_entities", 1)],
|
||||
("validate_if_schema", 0): [
|
||||
("validate_if_schema", 0)],
|
||||
("create_entity_ref_nodes", 0): [
|
||||
("create_entity_ref_nodes", 0)],
|
||||
("create_entity_ref_nodes", 1): [
|
||||
("create_entity_ref_nodes", 1)],
|
||||
("entities", 0): [
|
||||
("create_entity_ref_nodes", 0),
|
||||
("entities", 0)],
|
||||
("entities", 1): [
|
||||
("entities", 1)],
|
||||
("whitespace_in_element_content", 0): [
|
||||
("whitespace_in_element_content", 0)],
|
||||
("whitespace_in_element_content", 1): [
|
||||
("whitespace_in_element_content", 1)],
|
||||
("cdata_sections", 0): [
|
||||
("cdata_sections", 0)],
|
||||
("cdata_sections", 1): [
|
||||
("cdata_sections", 1)],
|
||||
("comments", 0): [
|
||||
("comments", 0)],
|
||||
("comments", 1): [
|
||||
("comments", 1)],
|
||||
("charset_overrides_xml_encoding", 0): [
|
||||
("charset_overrides_xml_encoding", 0)],
|
||||
("charset_overrides_xml_encoding", 1): [
|
||||
("charset_overrides_xml_encoding", 1)],
|
||||
("infoset", 0): [],
|
||||
("infoset", 1): [
|
||||
("namespace_declarations", 0),
|
||||
("validate_if_schema", 0),
|
||||
("create_entity_ref_nodes", 0),
|
||||
("entities", 0),
|
||||
("cdata_sections", 0),
|
||||
("datatype_normalization", 1),
|
||||
("whitespace_in_element_content", 1),
|
||||
("comments", 1),
|
||||
("charset_overrides_xml_encoding", 1)],
|
||||
("supported_mediatypes_only", 0): [
|
||||
("supported_mediatypes_only", 0)],
|
||||
("namespaces", 0): [
|
||||
("namespaces", 0)],
|
||||
("namespaces", 1): [
|
||||
("namespaces", 1)],
|
||||
}
|
||||
|
||||
def getFeature(self, name):
|
||||
xname = _name_xform(name)
|
||||
try:
|
||||
return getattr(self._options, xname)
|
||||
except AttributeError:
|
||||
if name == "infoset":
|
||||
options = self._options
|
||||
return (options.datatype_normalization
|
||||
and options.whitespace_in_element_content
|
||||
and options.comments
|
||||
and options.charset_overrides_xml_encoding
|
||||
and not (options.namespace_declarations
|
||||
or options.validate_if_schema
|
||||
or options.create_entity_ref_nodes
|
||||
or options.entities
|
||||
or options.cdata_sections))
|
||||
raise xml.dom.NotFoundErr("feature %s not known" % repr(name))
|
||||
|
||||
def parseURI(self, uri):
|
||||
if self.entityResolver:
|
||||
input = self.entityResolver.resolveEntity(None, uri)
|
||||
else:
|
||||
input = DOMEntityResolver().resolveEntity(None, uri)
|
||||
return self.parse(input)
|
||||
|
||||
def parse(self, input):
|
||||
options = copy.copy(self._options)
|
||||
options.filter = self.filter
|
||||
options.errorHandler = self.errorHandler
|
||||
fp = input.byteStream
|
||||
if fp is None and options.systemId:
|
||||
import urllib.request
|
||||
fp = urllib.request.urlopen(input.systemId)
|
||||
return self._parse_bytestream(fp, options)
|
||||
|
||||
def parseWithContext(self, input, cnode, action):
|
||||
if action not in self._legal_actions:
|
||||
raise ValueError("not a legal action")
|
||||
raise NotImplementedError("Haven't written this yet...")
|
||||
|
||||
def _parse_bytestream(self, stream, options):
|
||||
import xml.dom.expatbuilder
|
||||
builder = xml.dom.expatbuilder.makeBuilder(options)
|
||||
return builder.parseFile(stream)
|
||||
|
||||
|
||||
def _name_xform(name):
|
||||
return name.lower().replace('-', '_')
|
||||
|
||||
|
||||
class DOMEntityResolver(object):
|
||||
__slots__ = '_opener',
|
||||
|
||||
def resolveEntity(self, publicId, systemId):
|
||||
assert systemId is not None
|
||||
source = DOMInputSource()
|
||||
source.publicId = publicId
|
||||
source.systemId = systemId
|
||||
source.byteStream = self._get_opener().open(systemId)
|
||||
|
||||
# determine the encoding if the transport provided it
|
||||
source.encoding = self._guess_media_encoding(source)
|
||||
|
||||
# determine the base URI is we can
|
||||
import posixpath, urllib.parse
|
||||
parts = urllib.parse.urlparse(systemId)
|
||||
scheme, netloc, path, params, query, fragment = parts
|
||||
# XXX should we check the scheme here as well?
|
||||
if path and not path.endswith("/"):
|
||||
path = posixpath.dirname(path) + "/"
|
||||
parts = scheme, netloc, path, params, query, fragment
|
||||
source.baseURI = urllib.parse.urlunparse(parts)
|
||||
|
||||
return source
|
||||
|
||||
def _get_opener(self):
|
||||
try:
|
||||
return self._opener
|
||||
except AttributeError:
|
||||
self._opener = self._create_opener()
|
||||
return self._opener
|
||||
|
||||
def _create_opener(self):
|
||||
import urllib.request
|
||||
return urllib.request.build_opener()
|
||||
|
||||
def _guess_media_encoding(self, source):
|
||||
info = source.byteStream.info()
|
||||
if "Content-Type" in info:
|
||||
for param in info.getplist():
|
||||
if param.startswith("charset="):
|
||||
return param.split("=", 1)[1].lower()
|
||||
|
||||
|
||||
class DOMInputSource(object):
|
||||
__slots__ = ('byteStream', 'characterStream', 'stringData',
|
||||
'encoding', 'publicId', 'systemId', 'baseURI')
|
||||
|
||||
def __init__(self):
|
||||
self.byteStream = None
|
||||
self.characterStream = None
|
||||
self.stringData = None
|
||||
self.encoding = None
|
||||
self.publicId = None
|
||||
self.systemId = None
|
||||
self.baseURI = None
|
||||
|
||||
def _get_byteStream(self):
|
||||
return self.byteStream
|
||||
def _set_byteStream(self, byteStream):
|
||||
self.byteStream = byteStream
|
||||
|
||||
def _get_characterStream(self):
|
||||
return self.characterStream
|
||||
def _set_characterStream(self, characterStream):
|
||||
self.characterStream = characterStream
|
||||
|
||||
def _get_stringData(self):
|
||||
return self.stringData
|
||||
def _set_stringData(self, data):
|
||||
self.stringData = data
|
||||
|
||||
def _get_encoding(self):
|
||||
return self.encoding
|
||||
def _set_encoding(self, encoding):
|
||||
self.encoding = encoding
|
||||
|
||||
def _get_publicId(self):
|
||||
return self.publicId
|
||||
def _set_publicId(self, publicId):
|
||||
self.publicId = publicId
|
||||
|
||||
def _get_systemId(self):
|
||||
return self.systemId
|
||||
def _set_systemId(self, systemId):
|
||||
self.systemId = systemId
|
||||
|
||||
def _get_baseURI(self):
|
||||
return self.baseURI
|
||||
def _set_baseURI(self, uri):
|
||||
self.baseURI = uri
|
||||
|
||||
|
||||
class DOMBuilderFilter:
|
||||
"""Element filter which can be used to tailor construction of
|
||||
a DOM instance.
|
||||
"""
|
||||
|
||||
# There's really no need for this class; concrete implementations
|
||||
# should just implement the endElement() and startElement()
|
||||
# methods as appropriate. Using this makes it easy to only
|
||||
# implement one of them.
|
||||
|
||||
FILTER_ACCEPT = 1
|
||||
FILTER_REJECT = 2
|
||||
FILTER_SKIP = 3
|
||||
FILTER_INTERRUPT = 4
|
||||
|
||||
whatToShow = NodeFilter.SHOW_ALL
|
||||
|
||||
def _get_whatToShow(self):
|
||||
return self.whatToShow
|
||||
|
||||
def acceptNode(self, element):
|
||||
return self.FILTER_ACCEPT
|
||||
|
||||
def startContainer(self, element):
|
||||
return self.FILTER_ACCEPT
|
||||
|
||||
del NodeFilter
|
||||
|
||||
|
||||
class _AsyncDeprecatedProperty:
|
||||
def warn(self, cls):
|
||||
clsname = cls.__name__
|
||||
warnings.warn(
|
||||
"{cls}.async is deprecated; use {cls}.async_".format(cls=clsname),
|
||||
DeprecationWarning)
|
||||
|
||||
def __get__(self, instance, cls):
|
||||
self.warn(cls)
|
||||
if instance is not None:
|
||||
return instance.async_
|
||||
return False
|
||||
|
||||
def __set__(self, instance, value):
|
||||
self.warn(type(instance))
|
||||
setattr(instance, 'async_', value)
|
||||
|
||||
|
||||
class DocumentLS:
|
||||
"""Mixin to create documents that conform to the load/save spec."""
|
||||
|
||||
async_ = False
|
||||
locals()['async'] = _AsyncDeprecatedProperty() # Avoid DeprecationWarning
|
||||
|
||||
def _get_async(self):
|
||||
return False
|
||||
|
||||
def _set_async(self, flag):
|
||||
if flag:
|
||||
raise xml.dom.NotSupportedErr(
|
||||
"asynchronous document loading is not supported")
|
||||
|
||||
def abort(self):
|
||||
# What does it mean to "clear" a document? Does the
|
||||
# documentElement disappear?
|
||||
raise NotImplementedError(
|
||||
"haven't figured out what this means yet")
|
||||
|
||||
def load(self, uri):
|
||||
raise NotImplementedError("haven't written this yet")
|
||||
|
||||
def loadXML(self, source):
|
||||
raise NotImplementedError("haven't written this yet")
|
||||
|
||||
def saveXML(self, snode):
|
||||
if snode is None:
|
||||
snode = self
|
||||
elif snode.ownerDocument is not self:
|
||||
raise xml.dom.WrongDocumentErr()
|
||||
return snode.toxml()
|
||||
|
||||
|
||||
del _AsyncDeprecatedProperty
|
||||
|
||||
|
||||
class DOMImplementationLS:
|
||||
MODE_SYNCHRONOUS = 1
|
||||
MODE_ASYNCHRONOUS = 2
|
||||
|
||||
def createDOMBuilder(self, mode, schemaType):
|
||||
if schemaType is not None:
|
||||
raise xml.dom.NotSupportedErr(
|
||||
"schemaType not yet supported")
|
||||
if mode == self.MODE_SYNCHRONOUS:
|
||||
return DOMBuilder()
|
||||
if mode == self.MODE_ASYNCHRONOUS:
|
||||
raise xml.dom.NotSupportedErr(
|
||||
"asynchronous builders are not supported")
|
||||
raise ValueError("unknown value for mode")
|
||||
|
||||
def createDOMWriter(self):
|
||||
raise NotImplementedError(
|
||||
"the writer interface hasn't been written yet!")
|
||||
|
||||
def createDOMInputSource(self):
|
||||
return DOMInputSource()
|
143
third_party/python/Lib/xml/etree/ElementInclude.py
vendored
Normal file
143
third_party/python/Lib/xml/etree/ElementInclude.py
vendored
Normal file
|
@ -0,0 +1,143 @@
|
|||
#
|
||||
# ElementTree
|
||||
# $Id: ElementInclude.py 3375 2008-02-13 08:05:08Z fredrik $
|
||||
#
|
||||
# limited xinclude support for element trees
|
||||
#
|
||||
# history:
|
||||
# 2003-08-15 fl created
|
||||
# 2003-11-14 fl fixed default loader
|
||||
#
|
||||
# Copyright (c) 2003-2004 by Fredrik Lundh. All rights reserved.
|
||||
#
|
||||
# fredrik@pythonware.com
|
||||
# http://www.pythonware.com
|
||||
#
|
||||
# --------------------------------------------------------------------
|
||||
# The ElementTree toolkit is
|
||||
#
|
||||
# Copyright (c) 1999-2008 by Fredrik Lundh
|
||||
#
|
||||
# By obtaining, using, and/or copying this software and/or its
|
||||
# associated documentation, you agree that you have read, understood,
|
||||
# and will comply with the following terms and conditions:
|
||||
#
|
||||
# Permission to use, copy, modify, and distribute this software and
|
||||
# its associated documentation for any purpose and without fee is
|
||||
# hereby granted, provided that the above copyright notice appears in
|
||||
# all copies, and that both that copyright notice and this permission
|
||||
# notice appear in supporting documentation, and that the name of
|
||||
# Secret Labs AB or the author not be used in advertising or publicity
|
||||
# pertaining to distribution of the software without specific, written
|
||||
# prior permission.
|
||||
#
|
||||
# SECRET LABS AB AND THE AUTHOR DISCLAIMS ALL WARRANTIES WITH REGARD
|
||||
# TO THIS SOFTWARE, INCLUDING ALL IMPLIED WARRANTIES OF MERCHANT-
|
||||
# ABILITY AND FITNESS. IN NO EVENT SHALL SECRET LABS AB OR THE AUTHOR
|
||||
# BE LIABLE FOR ANY SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY
|
||||
# DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS,
|
||||
# WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS
|
||||
# ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE
|
||||
# OF THIS SOFTWARE.
|
||||
# --------------------------------------------------------------------
|
||||
|
||||
# Licensed to PSF under a Contributor Agreement.
|
||||
# See http://www.python.org/psf/license for licensing details.
|
||||
|
||||
##
|
||||
# Limited XInclude support for the ElementTree package.
|
||||
##
|
||||
|
||||
import copy
|
||||
from . import ElementTree
|
||||
|
||||
XINCLUDE = "{http://www.w3.org/2001/XInclude}"
|
||||
|
||||
XINCLUDE_INCLUDE = XINCLUDE + "include"
|
||||
XINCLUDE_FALLBACK = XINCLUDE + "fallback"
|
||||
|
||||
##
|
||||
# Fatal include error.
|
||||
|
||||
class FatalIncludeError(SyntaxError):
|
||||
pass
|
||||
|
||||
##
|
||||
# Default loader. This loader reads an included resource from disk.
|
||||
#
|
||||
# @param href Resource reference.
|
||||
# @param parse Parse mode. Either "xml" or "text".
|
||||
# @param encoding Optional text encoding (UTF-8 by default for "text").
|
||||
# @return The expanded resource. If the parse mode is "xml", this
|
||||
# is an ElementTree instance. If the parse mode is "text", this
|
||||
# is a Unicode string. If the loader fails, it can return None
|
||||
# or raise an OSError exception.
|
||||
# @throws OSError If the loader fails to load the resource.
|
||||
|
||||
def default_loader(href, parse, encoding=None):
|
||||
if parse == "xml":
|
||||
with open(href, 'rb') as file:
|
||||
data = ElementTree.parse(file).getroot()
|
||||
else:
|
||||
if not encoding:
|
||||
encoding = 'UTF-8'
|
||||
with open(href, 'r', encoding=encoding) as file:
|
||||
data = file.read()
|
||||
return data
|
||||
|
||||
##
|
||||
# Expand XInclude directives.
|
||||
#
|
||||
# @param elem Root element.
|
||||
# @param loader Optional resource loader. If omitted, it defaults
|
||||
# to {@link default_loader}. If given, it should be a callable
|
||||
# that implements the same interface as <b>default_loader</b>.
|
||||
# @throws FatalIncludeError If the function fails to include a given
|
||||
# resource, or if the tree contains malformed XInclude elements.
|
||||
# @throws OSError If the function fails to load a given resource.
|
||||
|
||||
def include(elem, loader=None):
|
||||
if loader is None:
|
||||
loader = default_loader
|
||||
# look for xinclude elements
|
||||
i = 0
|
||||
while i < len(elem):
|
||||
e = elem[i]
|
||||
if e.tag == XINCLUDE_INCLUDE:
|
||||
# process xinclude directive
|
||||
href = e.get("href")
|
||||
parse = e.get("parse", "xml")
|
||||
if parse == "xml":
|
||||
node = loader(href, parse)
|
||||
if node is None:
|
||||
raise FatalIncludeError(
|
||||
"cannot load %r as %r" % (href, parse)
|
||||
)
|
||||
node = copy.copy(node)
|
||||
if e.tail:
|
||||
node.tail = (node.tail or "") + e.tail
|
||||
elem[i] = node
|
||||
elif parse == "text":
|
||||
text = loader(href, parse, e.get("encoding"))
|
||||
if text is None:
|
||||
raise FatalIncludeError(
|
||||
"cannot load %r as %r" % (href, parse)
|
||||
)
|
||||
if i:
|
||||
node = elem[i-1]
|
||||
node.tail = (node.tail or "") + text + (e.tail or "")
|
||||
else:
|
||||
elem.text = (elem.text or "") + text + (e.tail or "")
|
||||
del elem[i]
|
||||
continue
|
||||
else:
|
||||
raise FatalIncludeError(
|
||||
"unknown parse type in xi:include tag (%r)" % parse
|
||||
)
|
||||
elif e.tag == XINCLUDE_FALLBACK:
|
||||
raise FatalIncludeError(
|
||||
"xi:fallback tag must be child of xi:include (%r)" % e.tag
|
||||
)
|
||||
else:
|
||||
include(e, loader)
|
||||
i = i + 1
|
314
third_party/python/Lib/xml/etree/ElementPath.py
vendored
Normal file
314
third_party/python/Lib/xml/etree/ElementPath.py
vendored
Normal file
|
@ -0,0 +1,314 @@
|
|||
#
|
||||
# ElementTree
|
||||
# $Id: ElementPath.py 3375 2008-02-13 08:05:08Z fredrik $
|
||||
#
|
||||
# limited xpath support for element trees
|
||||
#
|
||||
# history:
|
||||
# 2003-05-23 fl created
|
||||
# 2003-05-28 fl added support for // etc
|
||||
# 2003-08-27 fl fixed parsing of periods in element names
|
||||
# 2007-09-10 fl new selection engine
|
||||
# 2007-09-12 fl fixed parent selector
|
||||
# 2007-09-13 fl added iterfind; changed findall to return a list
|
||||
# 2007-11-30 fl added namespaces support
|
||||
# 2009-10-30 fl added child element value filter
|
||||
#
|
||||
# Copyright (c) 2003-2009 by Fredrik Lundh. All rights reserved.
|
||||
#
|
||||
# fredrik@pythonware.com
|
||||
# http://www.pythonware.com
|
||||
#
|
||||
# --------------------------------------------------------------------
|
||||
# The ElementTree toolkit is
|
||||
#
|
||||
# Copyright (c) 1999-2009 by Fredrik Lundh
|
||||
#
|
||||
# By obtaining, using, and/or copying this software and/or its
|
||||
# associated documentation, you agree that you have read, understood,
|
||||
# and will comply with the following terms and conditions:
|
||||
#
|
||||
# Permission to use, copy, modify, and distribute this software and
|
||||
# its associated documentation for any purpose and without fee is
|
||||
# hereby granted, provided that the above copyright notice appears in
|
||||
# all copies, and that both that copyright notice and this permission
|
||||
# notice appear in supporting documentation, and that the name of
|
||||
# Secret Labs AB or the author not be used in advertising or publicity
|
||||
# pertaining to distribution of the software without specific, written
|
||||
# prior permission.
|
||||
#
|
||||
# SECRET LABS AB AND THE AUTHOR DISCLAIMS ALL WARRANTIES WITH REGARD
|
||||
# TO THIS SOFTWARE, INCLUDING ALL IMPLIED WARRANTIES OF MERCHANT-
|
||||
# ABILITY AND FITNESS. IN NO EVENT SHALL SECRET LABS AB OR THE AUTHOR
|
||||
# BE LIABLE FOR ANY SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY
|
||||
# DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS,
|
||||
# WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS
|
||||
# ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE
|
||||
# OF THIS SOFTWARE.
|
||||
# --------------------------------------------------------------------
|
||||
|
||||
# Licensed to PSF under a Contributor Agreement.
|
||||
# See http://www.python.org/psf/license for licensing details.
|
||||
|
||||
##
|
||||
# Implementation module for XPath support. There's usually no reason
|
||||
# to import this module directly; the <b>ElementTree</b> does this for
|
||||
# you, if needed.
|
||||
##
|
||||
|
||||
import re
|
||||
|
||||
xpath_tokenizer_re = re.compile(
|
||||
r"("
|
||||
r"'[^']*'|\"[^\"]*\"|"
|
||||
r"::|"
|
||||
r"//?|"
|
||||
r"\.\.|"
|
||||
r"\(\)|"
|
||||
r"[/.*:\[\]\(\)@=])|"
|
||||
r"((?:\{[^}]+\})?[^/\[\]\(\)@=\s]+)|"
|
||||
r"\s+"
|
||||
)
|
||||
|
||||
def xpath_tokenizer(pattern, namespaces=None):
|
||||
for token in xpath_tokenizer_re.findall(pattern):
|
||||
tag = token[1]
|
||||
if tag and tag[0] != "{" and ":" in tag:
|
||||
try:
|
||||
prefix, uri = tag.split(":", 1)
|
||||
if not namespaces:
|
||||
raise KeyError
|
||||
yield token[0], "{%s}%s" % (namespaces[prefix], uri)
|
||||
except KeyError:
|
||||
raise SyntaxError("prefix %r not found in prefix map" % prefix)
|
||||
else:
|
||||
yield token
|
||||
|
||||
def get_parent_map(context):
|
||||
parent_map = context.parent_map
|
||||
if parent_map is None:
|
||||
context.parent_map = parent_map = {}
|
||||
for p in context.root.iter():
|
||||
for e in p:
|
||||
parent_map[e] = p
|
||||
return parent_map
|
||||
|
||||
def prepare_child(next, token):
|
||||
tag = token[1]
|
||||
def select(context, result):
|
||||
for elem in result:
|
||||
for e in elem:
|
||||
if e.tag == tag:
|
||||
yield e
|
||||
return select
|
||||
|
||||
def prepare_star(next, token):
|
||||
def select(context, result):
|
||||
for elem in result:
|
||||
yield from elem
|
||||
return select
|
||||
|
||||
def prepare_self(next, token):
|
||||
def select(context, result):
|
||||
yield from result
|
||||
return select
|
||||
|
||||
def prepare_descendant(next, token):
|
||||
try:
|
||||
token = next()
|
||||
except StopIteration:
|
||||
return
|
||||
if token[0] == "*":
|
||||
tag = "*"
|
||||
elif not token[0]:
|
||||
tag = token[1]
|
||||
else:
|
||||
raise SyntaxError("invalid descendant")
|
||||
def select(context, result):
|
||||
for elem in result:
|
||||
for e in elem.iter(tag):
|
||||
if e is not elem:
|
||||
yield e
|
||||
return select
|
||||
|
||||
def prepare_parent(next, token):
|
||||
def select(context, result):
|
||||
# FIXME: raise error if .. is applied at toplevel?
|
||||
parent_map = get_parent_map(context)
|
||||
result_map = {}
|
||||
for elem in result:
|
||||
if elem in parent_map:
|
||||
parent = parent_map[elem]
|
||||
if parent not in result_map:
|
||||
result_map[parent] = None
|
||||
yield parent
|
||||
return select
|
||||
|
||||
def prepare_predicate(next, token):
|
||||
# FIXME: replace with real parser!!! refs:
|
||||
# http://effbot.org/zone/simple-iterator-parser.htm
|
||||
# http://javascript.crockford.com/tdop/tdop.html
|
||||
signature = []
|
||||
predicate = []
|
||||
while 1:
|
||||
try:
|
||||
token = next()
|
||||
except StopIteration:
|
||||
return
|
||||
if token[0] == "]":
|
||||
break
|
||||
if token[0] and token[0][:1] in "'\"":
|
||||
token = "'", token[0][1:-1]
|
||||
signature.append(token[0] or "-")
|
||||
predicate.append(token[1])
|
||||
signature = "".join(signature)
|
||||
# use signature to determine predicate type
|
||||
if signature == "@-":
|
||||
# [@attribute] predicate
|
||||
key = predicate[1]
|
||||
def select(context, result):
|
||||
for elem in result:
|
||||
if elem.get(key) is not None:
|
||||
yield elem
|
||||
return select
|
||||
if signature == "@-='":
|
||||
# [@attribute='value']
|
||||
key = predicate[1]
|
||||
value = predicate[-1]
|
||||
def select(context, result):
|
||||
for elem in result:
|
||||
if elem.get(key) == value:
|
||||
yield elem
|
||||
return select
|
||||
if signature == "-" and not re.match(r"\-?\d+$", predicate[0]):
|
||||
# [tag]
|
||||
tag = predicate[0]
|
||||
def select(context, result):
|
||||
for elem in result:
|
||||
if elem.find(tag) is not None:
|
||||
yield elem
|
||||
return select
|
||||
if signature == "-='" and not re.match(r"\-?\d+$", predicate[0]):
|
||||
# [tag='value']
|
||||
tag = predicate[0]
|
||||
value = predicate[-1]
|
||||
def select(context, result):
|
||||
for elem in result:
|
||||
for e in elem.findall(tag):
|
||||
if "".join(e.itertext()) == value:
|
||||
yield elem
|
||||
break
|
||||
return select
|
||||
if signature == "-" or signature == "-()" or signature == "-()-":
|
||||
# [index] or [last()] or [last()-index]
|
||||
if signature == "-":
|
||||
# [index]
|
||||
index = int(predicate[0]) - 1
|
||||
if index < 0:
|
||||
raise SyntaxError("XPath position >= 1 expected")
|
||||
else:
|
||||
if predicate[0] != "last":
|
||||
raise SyntaxError("unsupported function")
|
||||
if signature == "-()-":
|
||||
try:
|
||||
index = int(predicate[2]) - 1
|
||||
except ValueError:
|
||||
raise SyntaxError("unsupported expression")
|
||||
if index > -2:
|
||||
raise SyntaxError("XPath offset from last() must be negative")
|
||||
else:
|
||||
index = -1
|
||||
def select(context, result):
|
||||
parent_map = get_parent_map(context)
|
||||
for elem in result:
|
||||
try:
|
||||
parent = parent_map[elem]
|
||||
# FIXME: what if the selector is "*" ?
|
||||
elems = list(parent.findall(elem.tag))
|
||||
if elems[index] is elem:
|
||||
yield elem
|
||||
except (IndexError, KeyError):
|
||||
pass
|
||||
return select
|
||||
raise SyntaxError("invalid predicate")
|
||||
|
||||
ops = {
|
||||
"": prepare_child,
|
||||
"*": prepare_star,
|
||||
".": prepare_self,
|
||||
"..": prepare_parent,
|
||||
"//": prepare_descendant,
|
||||
"[": prepare_predicate,
|
||||
}
|
||||
|
||||
_cache = {}
|
||||
|
||||
class _SelectorContext:
|
||||
parent_map = None
|
||||
def __init__(self, root):
|
||||
self.root = root
|
||||
|
||||
# --------------------------------------------------------------------
|
||||
|
||||
##
|
||||
# Generate all matching objects.
|
||||
|
||||
def iterfind(elem, path, namespaces=None):
|
||||
# compile selector pattern
|
||||
cache_key = (path, None if namespaces is None
|
||||
else tuple(sorted(namespaces.items())))
|
||||
if path[-1:] == "/":
|
||||
path = path + "*" # implicit all (FIXME: keep this?)
|
||||
try:
|
||||
selector = _cache[cache_key]
|
||||
except KeyError:
|
||||
if len(_cache) > 100:
|
||||
_cache.clear()
|
||||
if path[:1] == "/":
|
||||
raise SyntaxError("cannot use absolute path on element")
|
||||
next = iter(xpath_tokenizer(path, namespaces)).__next__
|
||||
try:
|
||||
token = next()
|
||||
except StopIteration:
|
||||
return
|
||||
selector = []
|
||||
while 1:
|
||||
try:
|
||||
selector.append(ops[token[0]](next, token))
|
||||
except StopIteration:
|
||||
raise SyntaxError("invalid path")
|
||||
try:
|
||||
token = next()
|
||||
if token[0] == "/":
|
||||
token = next()
|
||||
except StopIteration:
|
||||
break
|
||||
_cache[cache_key] = selector
|
||||
# execute selector pattern
|
||||
result = [elem]
|
||||
context = _SelectorContext(elem)
|
||||
for select in selector:
|
||||
result = select(context, result)
|
||||
return result
|
||||
|
||||
##
|
||||
# Find first matching object.
|
||||
|
||||
def find(elem, path, namespaces=None):
|
||||
return next(iterfind(elem, path, namespaces), None)
|
||||
|
||||
##
|
||||
# Find all matching objects.
|
||||
|
||||
def findall(elem, path, namespaces=None):
|
||||
return list(iterfind(elem, path, namespaces))
|
||||
|
||||
##
|
||||
# Find text for first matching object.
|
||||
|
||||
def findtext(elem, path, default=None, namespaces=None):
|
||||
try:
|
||||
elem = next(iterfind(elem, path, namespaces))
|
||||
return elem.text or ""
|
||||
except StopIteration:
|
||||
return default
|
1656
third_party/python/Lib/xml/etree/ElementTree.py
vendored
Normal file
1656
third_party/python/Lib/xml/etree/ElementTree.py
vendored
Normal file
File diff suppressed because it is too large
Load diff
33
third_party/python/Lib/xml/etree/__init__.py
vendored
Normal file
33
third_party/python/Lib/xml/etree/__init__.py
vendored
Normal file
|
@ -0,0 +1,33 @@
|
|||
# $Id: __init__.py 3375 2008-02-13 08:05:08Z fredrik $
|
||||
# elementtree package
|
||||
|
||||
# --------------------------------------------------------------------
|
||||
# The ElementTree toolkit is
|
||||
#
|
||||
# Copyright (c) 1999-2008 by Fredrik Lundh
|
||||
#
|
||||
# By obtaining, using, and/or copying this software and/or its
|
||||
# associated documentation, you agree that you have read, understood,
|
||||
# and will comply with the following terms and conditions:
|
||||
#
|
||||
# Permission to use, copy, modify, and distribute this software and
|
||||
# its associated documentation for any purpose and without fee is
|
||||
# hereby granted, provided that the above copyright notice appears in
|
||||
# all copies, and that both that copyright notice and this permission
|
||||
# notice appear in supporting documentation, and that the name of
|
||||
# Secret Labs AB or the author not be used in advertising or publicity
|
||||
# pertaining to distribution of the software without specific, written
|
||||
# prior permission.
|
||||
#
|
||||
# SECRET LABS AB AND THE AUTHOR DISCLAIMS ALL WARRANTIES WITH REGARD
|
||||
# TO THIS SOFTWARE, INCLUDING ALL IMPLIED WARRANTIES OF MERCHANT-
|
||||
# ABILITY AND FITNESS. IN NO EVENT SHALL SECRET LABS AB OR THE AUTHOR
|
||||
# BE LIABLE FOR ANY SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY
|
||||
# DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS,
|
||||
# WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS
|
||||
# ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE
|
||||
# OF THIS SOFTWARE.
|
||||
# --------------------------------------------------------------------
|
||||
|
||||
# Licensed to PSF under a Contributor Agreement.
|
||||
# See http://www.python.org/psf/license for licensing details.
|
3
third_party/python/Lib/xml/etree/cElementTree.py
vendored
Normal file
3
third_party/python/Lib/xml/etree/cElementTree.py
vendored
Normal file
|
@ -0,0 +1,3 @@
|
|||
# Deprecated alias for xml.etree.ElementTree
|
||||
|
||||
from xml.etree.ElementTree import *
|
8
third_party/python/Lib/xml/parsers/__init__.py
vendored
Normal file
8
third_party/python/Lib/xml/parsers/__init__.py
vendored
Normal file
|
@ -0,0 +1,8 @@
|
|||
"""Python interfaces to XML parsers.
|
||||
|
||||
This package contains one module:
|
||||
|
||||
expat -- Python wrapper for James Clark's Expat parser, with namespace
|
||||
support.
|
||||
|
||||
"""
|
8
third_party/python/Lib/xml/parsers/expat.py
vendored
Normal file
8
third_party/python/Lib/xml/parsers/expat.py
vendored
Normal file
|
@ -0,0 +1,8 @@
|
|||
"""Interface to the Expat non-validating XML parser."""
|
||||
import sys
|
||||
|
||||
from pyexpat import *
|
||||
|
||||
# provide pyexpat submodules as xml.parsers.expat submodules
|
||||
sys.modules['xml.parsers.expat.model'] = model
|
||||
sys.modules['xml.parsers.expat.errors'] = errors
|
107
third_party/python/Lib/xml/sax/__init__.py
vendored
Normal file
107
third_party/python/Lib/xml/sax/__init__.py
vendored
Normal file
|
@ -0,0 +1,107 @@
|
|||
"""Simple API for XML (SAX) implementation for Python.
|
||||
|
||||
This module provides an implementation of the SAX 2 interface;
|
||||
information about the Java version of the interface can be found at
|
||||
http://www.megginson.com/SAX/. The Python version of the interface is
|
||||
documented at <...>.
|
||||
|
||||
This package contains the following modules:
|
||||
|
||||
handler -- Base classes and constants which define the SAX 2 API for
|
||||
the 'client-side' of SAX for Python.
|
||||
|
||||
saxutils -- Implementation of the convenience classes commonly used to
|
||||
work with SAX.
|
||||
|
||||
xmlreader -- Base classes and constants which define the SAX 2 API for
|
||||
the parsers used with SAX for Python.
|
||||
|
||||
expatreader -- Driver that allows use of the Expat parser with SAX.
|
||||
"""
|
||||
|
||||
from .xmlreader import InputSource
|
||||
from .handler import ContentHandler, ErrorHandler
|
||||
from ._exceptions import SAXException, SAXNotRecognizedException, \
|
||||
SAXParseException, SAXNotSupportedException, \
|
||||
SAXReaderNotAvailable
|
||||
|
||||
|
||||
def parse(source, handler, errorHandler=ErrorHandler()):
|
||||
parser = make_parser()
|
||||
parser.setContentHandler(handler)
|
||||
parser.setErrorHandler(errorHandler)
|
||||
parser.parse(source)
|
||||
|
||||
def parseString(string, handler, errorHandler=ErrorHandler()):
|
||||
import io
|
||||
if errorHandler is None:
|
||||
errorHandler = ErrorHandler()
|
||||
parser = make_parser()
|
||||
parser.setContentHandler(handler)
|
||||
parser.setErrorHandler(errorHandler)
|
||||
|
||||
inpsrc = InputSource()
|
||||
if isinstance(string, str):
|
||||
inpsrc.setCharacterStream(io.StringIO(string))
|
||||
else:
|
||||
inpsrc.setByteStream(io.BytesIO(string))
|
||||
parser.parse(inpsrc)
|
||||
|
||||
# this is the parser list used by the make_parser function if no
|
||||
# alternatives are given as parameters to the function
|
||||
|
||||
default_parser_list = ["xml.sax.expatreader"]
|
||||
|
||||
# tell modulefinder that importing sax potentially imports expatreader
|
||||
_false = 0
|
||||
if _false:
|
||||
import xml.sax.expatreader
|
||||
|
||||
import os, sys
|
||||
if not sys.flags.ignore_environment and "PY_SAX_PARSER" in os.environ:
|
||||
default_parser_list = os.environ["PY_SAX_PARSER"].split(",")
|
||||
del os
|
||||
|
||||
_key = "python.xml.sax.parser"
|
||||
if sys.platform[:4] == "java" and sys.registry.containsKey(_key):
|
||||
default_parser_list = sys.registry.getProperty(_key).split(",")
|
||||
|
||||
|
||||
def make_parser(parser_list = []):
|
||||
"""Creates and returns a SAX parser.
|
||||
|
||||
Creates the first parser it is able to instantiate of the ones
|
||||
given in the list created by doing parser_list +
|
||||
default_parser_list. The lists must contain the names of Python
|
||||
modules containing both a SAX parser and a create_parser function."""
|
||||
|
||||
for parser_name in parser_list + default_parser_list:
|
||||
try:
|
||||
return _create_parser(parser_name)
|
||||
except ImportError as e:
|
||||
import sys
|
||||
if parser_name in sys.modules:
|
||||
# The parser module was found, but importing it
|
||||
# failed unexpectedly, pass this exception through
|
||||
raise
|
||||
except SAXReaderNotAvailable:
|
||||
# The parser module detected that it won't work properly,
|
||||
# so try the next one
|
||||
pass
|
||||
|
||||
raise SAXReaderNotAvailable("No parsers found", None)
|
||||
|
||||
# --- Internal utility methods used by make_parser
|
||||
|
||||
if sys.platform[ : 4] == "java":
|
||||
def _create_parser(parser_name):
|
||||
from org.python.core import imp
|
||||
drv_module = imp.importName(parser_name, 0, globals())
|
||||
return drv_module.create_parser()
|
||||
|
||||
else:
|
||||
def _create_parser(parser_name):
|
||||
drv_module = __import__(parser_name,{},{},['create_parser'])
|
||||
return drv_module.create_parser()
|
||||
|
||||
del sys
|
131
third_party/python/Lib/xml/sax/_exceptions.py
vendored
Normal file
131
third_party/python/Lib/xml/sax/_exceptions.py
vendored
Normal file
|
@ -0,0 +1,131 @@
|
|||
"""Different kinds of SAX Exceptions"""
|
||||
import sys
|
||||
if sys.platform[:4] == "java":
|
||||
from java.lang import Exception
|
||||
del sys
|
||||
|
||||
# ===== SAXEXCEPTION =====
|
||||
|
||||
class SAXException(Exception):
|
||||
"""Encapsulate an XML error or warning. This class can contain
|
||||
basic error or warning information from either the XML parser or
|
||||
the application: you can subclass it to provide additional
|
||||
functionality, or to add localization. Note that although you will
|
||||
receive a SAXException as the argument to the handlers in the
|
||||
ErrorHandler interface, you are not actually required to raise
|
||||
the exception; instead, you can simply read the information in
|
||||
it."""
|
||||
|
||||
def __init__(self, msg, exception=None):
|
||||
"""Creates an exception. The message is required, but the exception
|
||||
is optional."""
|
||||
self._msg = msg
|
||||
self._exception = exception
|
||||
Exception.__init__(self, msg)
|
||||
|
||||
def getMessage(self):
|
||||
"Return a message for this exception."
|
||||
return self._msg
|
||||
|
||||
def getException(self):
|
||||
"Return the embedded exception, or None if there was none."
|
||||
return self._exception
|
||||
|
||||
def __str__(self):
|
||||
"Create a string representation of the exception."
|
||||
return self._msg
|
||||
|
||||
def __getitem__(self, ix):
|
||||
"""Avoids weird error messages if someone does exception[ix] by
|
||||
mistake, since Exception has __getitem__ defined."""
|
||||
raise AttributeError("__getitem__")
|
||||
|
||||
|
||||
# ===== SAXPARSEEXCEPTION =====
|
||||
|
||||
class SAXParseException(SAXException):
|
||||
"""Encapsulate an XML parse error or warning.
|
||||
|
||||
This exception will include information for locating the error in
|
||||
the original XML document. Note that although the application will
|
||||
receive a SAXParseException as the argument to the handlers in the
|
||||
ErrorHandler interface, the application is not actually required
|
||||
to raise the exception; instead, it can simply read the
|
||||
information in it and take a different action.
|
||||
|
||||
Since this exception is a subclass of SAXException, it inherits
|
||||
the ability to wrap another exception."""
|
||||
|
||||
def __init__(self, msg, exception, locator):
|
||||
"Creates the exception. The exception parameter is allowed to be None."
|
||||
SAXException.__init__(self, msg, exception)
|
||||
self._locator = locator
|
||||
|
||||
# We need to cache this stuff at construction time.
|
||||
# If this exception is raised, the objects through which we must
|
||||
# traverse to get this information may be deleted by the time
|
||||
# it gets caught.
|
||||
self._systemId = self._locator.getSystemId()
|
||||
self._colnum = self._locator.getColumnNumber()
|
||||
self._linenum = self._locator.getLineNumber()
|
||||
|
||||
def getColumnNumber(self):
|
||||
"""The column number of the end of the text where the exception
|
||||
occurred."""
|
||||
return self._colnum
|
||||
|
||||
def getLineNumber(self):
|
||||
"The line number of the end of the text where the exception occurred."
|
||||
return self._linenum
|
||||
|
||||
def getPublicId(self):
|
||||
"Get the public identifier of the entity where the exception occurred."
|
||||
return self._locator.getPublicId()
|
||||
|
||||
def getSystemId(self):
|
||||
"Get the system identifier of the entity where the exception occurred."
|
||||
return self._systemId
|
||||
|
||||
def __str__(self):
|
||||
"Create a string representation of the exception."
|
||||
sysid = self.getSystemId()
|
||||
if sysid is None:
|
||||
sysid = "<unknown>"
|
||||
linenum = self.getLineNumber()
|
||||
if linenum is None:
|
||||
linenum = "?"
|
||||
colnum = self.getColumnNumber()
|
||||
if colnum is None:
|
||||
colnum = "?"
|
||||
return "%s:%s:%s: %s" % (sysid, linenum, colnum, self._msg)
|
||||
|
||||
|
||||
# ===== SAXNOTRECOGNIZEDEXCEPTION =====
|
||||
|
||||
class SAXNotRecognizedException(SAXException):
|
||||
"""Exception class for an unrecognized identifier.
|
||||
|
||||
An XMLReader will raise this exception when it is confronted with an
|
||||
unrecognized feature or property. SAX applications and extensions may
|
||||
use this class for similar purposes."""
|
||||
|
||||
|
||||
# ===== SAXNOTSUPPORTEDEXCEPTION =====
|
||||
|
||||
class SAXNotSupportedException(SAXException):
|
||||
"""Exception class for an unsupported operation.
|
||||
|
||||
An XMLReader will raise this exception when a service it cannot
|
||||
perform is requested (specifically setting a state or value). SAX
|
||||
applications and extensions may use this class for similar
|
||||
purposes."""
|
||||
|
||||
# ===== SAXNOTSUPPORTEDEXCEPTION =====
|
||||
|
||||
class SAXReaderNotAvailable(SAXNotSupportedException):
|
||||
"""Exception class for a missing driver.
|
||||
|
||||
An XMLReader module (driver) should raise this exception when it
|
||||
is first imported, e.g. when a support module cannot be imported.
|
||||
It also may be raised during parsing, e.g. if executing an external
|
||||
program is not permitted."""
|
446
third_party/python/Lib/xml/sax/expatreader.py
vendored
Normal file
446
third_party/python/Lib/xml/sax/expatreader.py
vendored
Normal file
|
@ -0,0 +1,446 @@
|
|||
"""
|
||||
SAX driver for the pyexpat C module. This driver works with
|
||||
pyexpat.__version__ == '2.22'.
|
||||
"""
|
||||
|
||||
version = "0.20"
|
||||
|
||||
from xml.sax._exceptions import *
|
||||
from xml.sax.handler import feature_validation, feature_namespaces
|
||||
from xml.sax.handler import feature_namespace_prefixes
|
||||
from xml.sax.handler import feature_external_ges, feature_external_pes
|
||||
from xml.sax.handler import feature_string_interning
|
||||
from xml.sax.handler import property_xml_string, property_interning_dict
|
||||
|
||||
# xml.parsers.expat does not raise ImportError in Jython
|
||||
import sys
|
||||
if sys.platform[:4] == "java":
|
||||
raise SAXReaderNotAvailable("expat not available in Java", None)
|
||||
del sys
|
||||
|
||||
try:
|
||||
from xml.parsers import expat
|
||||
except ImportError:
|
||||
raise SAXReaderNotAvailable("expat not supported", None)
|
||||
else:
|
||||
if not hasattr(expat, "ParserCreate"):
|
||||
raise SAXReaderNotAvailable("expat not supported", None)
|
||||
from xml.sax import xmlreader, saxutils, handler
|
||||
|
||||
AttributesImpl = xmlreader.AttributesImpl
|
||||
AttributesNSImpl = xmlreader.AttributesNSImpl
|
||||
|
||||
# If we're using a sufficiently recent version of Python, we can use
|
||||
# weak references to avoid cycles between the parser and content
|
||||
# handler, otherwise we'll just have to pretend.
|
||||
try:
|
||||
import _weakref
|
||||
except ImportError:
|
||||
def _mkproxy(o):
|
||||
return o
|
||||
else:
|
||||
import weakref
|
||||
_mkproxy = weakref.proxy
|
||||
del weakref, _weakref
|
||||
|
||||
class _ClosedParser:
|
||||
pass
|
||||
|
||||
# --- ExpatLocator
|
||||
|
||||
class ExpatLocator(xmlreader.Locator):
|
||||
"""Locator for use with the ExpatParser class.
|
||||
|
||||
This uses a weak reference to the parser object to avoid creating
|
||||
a circular reference between the parser and the content handler.
|
||||
"""
|
||||
def __init__(self, parser):
|
||||
self._ref = _mkproxy(parser)
|
||||
|
||||
def getColumnNumber(self):
|
||||
parser = self._ref
|
||||
if parser._parser is None:
|
||||
return None
|
||||
return parser._parser.ErrorColumnNumber
|
||||
|
||||
def getLineNumber(self):
|
||||
parser = self._ref
|
||||
if parser._parser is None:
|
||||
return 1
|
||||
return parser._parser.ErrorLineNumber
|
||||
|
||||
def getPublicId(self):
|
||||
parser = self._ref
|
||||
if parser is None:
|
||||
return None
|
||||
return parser._source.getPublicId()
|
||||
|
||||
def getSystemId(self):
|
||||
parser = self._ref
|
||||
if parser is None:
|
||||
return None
|
||||
return parser._source.getSystemId()
|
||||
|
||||
|
||||
# --- ExpatParser
|
||||
|
||||
class ExpatParser(xmlreader.IncrementalParser, xmlreader.Locator):
|
||||
"""SAX driver for the pyexpat C module."""
|
||||
|
||||
def __init__(self, namespaceHandling=0, bufsize=2**16-20):
|
||||
xmlreader.IncrementalParser.__init__(self, bufsize)
|
||||
self._source = xmlreader.InputSource()
|
||||
self._parser = None
|
||||
self._namespaces = namespaceHandling
|
||||
self._lex_handler_prop = None
|
||||
self._parsing = 0
|
||||
self._entity_stack = []
|
||||
self._external_ges = 0
|
||||
self._interning = None
|
||||
|
||||
# XMLReader methods
|
||||
|
||||
def parse(self, source):
|
||||
"Parse an XML document from a URL or an InputSource."
|
||||
source = saxutils.prepare_input_source(source)
|
||||
|
||||
self._source = source
|
||||
try:
|
||||
self.reset()
|
||||
self._cont_handler.setDocumentLocator(ExpatLocator(self))
|
||||
xmlreader.IncrementalParser.parse(self, source)
|
||||
except:
|
||||
# bpo-30264: Close the source on error to not leak resources:
|
||||
# xml.sax.parse() doesn't give access to the underlying parser
|
||||
# to the caller
|
||||
self._close_source()
|
||||
raise
|
||||
|
||||
def prepareParser(self, source):
|
||||
if source.getSystemId() is not None:
|
||||
self._parser.SetBase(source.getSystemId())
|
||||
|
||||
# Redefined setContentHandler to allow changing handlers during parsing
|
||||
|
||||
def setContentHandler(self, handler):
|
||||
xmlreader.IncrementalParser.setContentHandler(self, handler)
|
||||
if self._parsing:
|
||||
self._reset_cont_handler()
|
||||
|
||||
def getFeature(self, name):
|
||||
if name == feature_namespaces:
|
||||
return self._namespaces
|
||||
elif name == feature_string_interning:
|
||||
return self._interning is not None
|
||||
elif name in (feature_validation, feature_external_pes,
|
||||
feature_namespace_prefixes):
|
||||
return 0
|
||||
elif name == feature_external_ges:
|
||||
return self._external_ges
|
||||
raise SAXNotRecognizedException("Feature '%s' not recognized" % name)
|
||||
|
||||
def setFeature(self, name, state):
|
||||
if self._parsing:
|
||||
raise SAXNotSupportedException("Cannot set features while parsing")
|
||||
|
||||
if name == feature_namespaces:
|
||||
self._namespaces = state
|
||||
elif name == feature_external_ges:
|
||||
self._external_ges = state
|
||||
elif name == feature_string_interning:
|
||||
if state:
|
||||
if self._interning is None:
|
||||
self._interning = {}
|
||||
else:
|
||||
self._interning = None
|
||||
elif name == feature_validation:
|
||||
if state:
|
||||
raise SAXNotSupportedException(
|
||||
"expat does not support validation")
|
||||
elif name == feature_external_pes:
|
||||
if state:
|
||||
raise SAXNotSupportedException(
|
||||
"expat does not read external parameter entities")
|
||||
elif name == feature_namespace_prefixes:
|
||||
if state:
|
||||
raise SAXNotSupportedException(
|
||||
"expat does not report namespace prefixes")
|
||||
else:
|
||||
raise SAXNotRecognizedException(
|
||||
"Feature '%s' not recognized" % name)
|
||||
|
||||
def getProperty(self, name):
|
||||
if name == handler.property_lexical_handler:
|
||||
return self._lex_handler_prop
|
||||
elif name == property_interning_dict:
|
||||
return self._interning
|
||||
elif name == property_xml_string:
|
||||
if self._parser:
|
||||
if hasattr(self._parser, "GetInputContext"):
|
||||
return self._parser.GetInputContext()
|
||||
else:
|
||||
raise SAXNotRecognizedException(
|
||||
"This version of expat does not support getting"
|
||||
" the XML string")
|
||||
else:
|
||||
raise SAXNotSupportedException(
|
||||
"XML string cannot be returned when not parsing")
|
||||
raise SAXNotRecognizedException("Property '%s' not recognized" % name)
|
||||
|
||||
def setProperty(self, name, value):
|
||||
if name == handler.property_lexical_handler:
|
||||
self._lex_handler_prop = value
|
||||
if self._parsing:
|
||||
self._reset_lex_handler_prop()
|
||||
elif name == property_interning_dict:
|
||||
self._interning = value
|
||||
elif name == property_xml_string:
|
||||
raise SAXNotSupportedException("Property '%s' cannot be set" %
|
||||
name)
|
||||
else:
|
||||
raise SAXNotRecognizedException("Property '%s' not recognized" %
|
||||
name)
|
||||
|
||||
# IncrementalParser methods
|
||||
|
||||
def feed(self, data, isFinal = 0):
|
||||
if not self._parsing:
|
||||
self.reset()
|
||||
self._parsing = 1
|
||||
self._cont_handler.startDocument()
|
||||
|
||||
try:
|
||||
# The isFinal parameter is internal to the expat reader.
|
||||
# If it is set to true, expat will check validity of the entire
|
||||
# document. When feeding chunks, they are not normally final -
|
||||
# except when invoked from close.
|
||||
self._parser.Parse(data, isFinal)
|
||||
except expat.error as e:
|
||||
exc = SAXParseException(expat.ErrorString(e.code), e, self)
|
||||
# FIXME: when to invoke error()?
|
||||
self._err_handler.fatalError(exc)
|
||||
|
||||
def _close_source(self):
|
||||
source = self._source
|
||||
try:
|
||||
file = source.getCharacterStream()
|
||||
if file is not None:
|
||||
file.close()
|
||||
finally:
|
||||
file = source.getByteStream()
|
||||
if file is not None:
|
||||
file.close()
|
||||
|
||||
def close(self):
|
||||
if (self._entity_stack or self._parser is None or
|
||||
isinstance(self._parser, _ClosedParser)):
|
||||
# If we are completing an external entity, do nothing here
|
||||
return
|
||||
try:
|
||||
self.feed("", isFinal = 1)
|
||||
self._cont_handler.endDocument()
|
||||
self._parsing = 0
|
||||
# break cycle created by expat handlers pointing to our methods
|
||||
self._parser = None
|
||||
finally:
|
||||
self._parsing = 0
|
||||
if self._parser is not None:
|
||||
# Keep ErrorColumnNumber and ErrorLineNumber after closing.
|
||||
parser = _ClosedParser()
|
||||
parser.ErrorColumnNumber = self._parser.ErrorColumnNumber
|
||||
parser.ErrorLineNumber = self._parser.ErrorLineNumber
|
||||
self._parser = parser
|
||||
self._close_source()
|
||||
|
||||
def _reset_cont_handler(self):
|
||||
self._parser.ProcessingInstructionHandler = \
|
||||
self._cont_handler.processingInstruction
|
||||
self._parser.CharacterDataHandler = self._cont_handler.characters
|
||||
|
||||
def _reset_lex_handler_prop(self):
|
||||
lex = self._lex_handler_prop
|
||||
parser = self._parser
|
||||
if lex is None:
|
||||
parser.CommentHandler = None
|
||||
parser.StartCdataSectionHandler = None
|
||||
parser.EndCdataSectionHandler = None
|
||||
parser.StartDoctypeDeclHandler = None
|
||||
parser.EndDoctypeDeclHandler = None
|
||||
else:
|
||||
parser.CommentHandler = lex.comment
|
||||
parser.StartCdataSectionHandler = lex.startCDATA
|
||||
parser.EndCdataSectionHandler = lex.endCDATA
|
||||
parser.StartDoctypeDeclHandler = self.start_doctype_decl
|
||||
parser.EndDoctypeDeclHandler = lex.endDTD
|
||||
|
||||
def reset(self):
|
||||
if self._namespaces:
|
||||
self._parser = expat.ParserCreate(self._source.getEncoding(), " ",
|
||||
intern=self._interning)
|
||||
self._parser.namespace_prefixes = 1
|
||||
self._parser.StartElementHandler = self.start_element_ns
|
||||
self._parser.EndElementHandler = self.end_element_ns
|
||||
else:
|
||||
self._parser = expat.ParserCreate(self._source.getEncoding(),
|
||||
intern = self._interning)
|
||||
self._parser.StartElementHandler = self.start_element
|
||||
self._parser.EndElementHandler = self.end_element
|
||||
|
||||
self._reset_cont_handler()
|
||||
self._parser.UnparsedEntityDeclHandler = self.unparsed_entity_decl
|
||||
self._parser.NotationDeclHandler = self.notation_decl
|
||||
self._parser.StartNamespaceDeclHandler = self.start_namespace_decl
|
||||
self._parser.EndNamespaceDeclHandler = self.end_namespace_decl
|
||||
|
||||
self._decl_handler_prop = None
|
||||
if self._lex_handler_prop:
|
||||
self._reset_lex_handler_prop()
|
||||
# self._parser.DefaultHandler =
|
||||
# self._parser.DefaultHandlerExpand =
|
||||
# self._parser.NotStandaloneHandler =
|
||||
self._parser.ExternalEntityRefHandler = self.external_entity_ref
|
||||
try:
|
||||
self._parser.SkippedEntityHandler = self.skipped_entity_handler
|
||||
except AttributeError:
|
||||
# This pyexpat does not support SkippedEntity
|
||||
pass
|
||||
self._parser.SetParamEntityParsing(
|
||||
expat.XML_PARAM_ENTITY_PARSING_UNLESS_STANDALONE)
|
||||
|
||||
self._parsing = 0
|
||||
self._entity_stack = []
|
||||
|
||||
# Locator methods
|
||||
|
||||
def getColumnNumber(self):
|
||||
if self._parser is None:
|
||||
return None
|
||||
return self._parser.ErrorColumnNumber
|
||||
|
||||
def getLineNumber(self):
|
||||
if self._parser is None:
|
||||
return 1
|
||||
return self._parser.ErrorLineNumber
|
||||
|
||||
def getPublicId(self):
|
||||
return self._source.getPublicId()
|
||||
|
||||
def getSystemId(self):
|
||||
return self._source.getSystemId()
|
||||
|
||||
# event handlers
|
||||
def start_element(self, name, attrs):
|
||||
self._cont_handler.startElement(name, AttributesImpl(attrs))
|
||||
|
||||
def end_element(self, name):
|
||||
self._cont_handler.endElement(name)
|
||||
|
||||
def start_element_ns(self, name, attrs):
|
||||
pair = name.split()
|
||||
if len(pair) == 1:
|
||||
# no namespace
|
||||
pair = (None, name)
|
||||
elif len(pair) == 3:
|
||||
pair = pair[0], pair[1]
|
||||
else:
|
||||
# default namespace
|
||||
pair = tuple(pair)
|
||||
|
||||
newattrs = {}
|
||||
qnames = {}
|
||||
for (aname, value) in attrs.items():
|
||||
parts = aname.split()
|
||||
length = len(parts)
|
||||
if length == 1:
|
||||
# no namespace
|
||||
qname = aname
|
||||
apair = (None, aname)
|
||||
elif length == 3:
|
||||
qname = "%s:%s" % (parts[2], parts[1])
|
||||
apair = parts[0], parts[1]
|
||||
else:
|
||||
# default namespace
|
||||
qname = parts[1]
|
||||
apair = tuple(parts)
|
||||
|
||||
newattrs[apair] = value
|
||||
qnames[apair] = qname
|
||||
|
||||
self._cont_handler.startElementNS(pair, None,
|
||||
AttributesNSImpl(newattrs, qnames))
|
||||
|
||||
def end_element_ns(self, name):
|
||||
pair = name.split()
|
||||
if len(pair) == 1:
|
||||
pair = (None, name)
|
||||
elif len(pair) == 3:
|
||||
pair = pair[0], pair[1]
|
||||
else:
|
||||
pair = tuple(pair)
|
||||
|
||||
self._cont_handler.endElementNS(pair, None)
|
||||
|
||||
# this is not used (call directly to ContentHandler)
|
||||
def processing_instruction(self, target, data):
|
||||
self._cont_handler.processingInstruction(target, data)
|
||||
|
||||
# this is not used (call directly to ContentHandler)
|
||||
def character_data(self, data):
|
||||
self._cont_handler.characters(data)
|
||||
|
||||
def start_namespace_decl(self, prefix, uri):
|
||||
self._cont_handler.startPrefixMapping(prefix, uri)
|
||||
|
||||
def end_namespace_decl(self, prefix):
|
||||
self._cont_handler.endPrefixMapping(prefix)
|
||||
|
||||
def start_doctype_decl(self, name, sysid, pubid, has_internal_subset):
|
||||
self._lex_handler_prop.startDTD(name, pubid, sysid)
|
||||
|
||||
def unparsed_entity_decl(self, name, base, sysid, pubid, notation_name):
|
||||
self._dtd_handler.unparsedEntityDecl(name, pubid, sysid, notation_name)
|
||||
|
||||
def notation_decl(self, name, base, sysid, pubid):
|
||||
self._dtd_handler.notationDecl(name, pubid, sysid)
|
||||
|
||||
def external_entity_ref(self, context, base, sysid, pubid):
|
||||
if not self._external_ges:
|
||||
return 1
|
||||
|
||||
source = self._ent_handler.resolveEntity(pubid, sysid)
|
||||
source = saxutils.prepare_input_source(source,
|
||||
self._source.getSystemId() or
|
||||
"")
|
||||
|
||||
self._entity_stack.append((self._parser, self._source))
|
||||
self._parser = self._parser.ExternalEntityParserCreate(context)
|
||||
self._source = source
|
||||
|
||||
try:
|
||||
xmlreader.IncrementalParser.parse(self, source)
|
||||
except:
|
||||
return 0 # FIXME: save error info here?
|
||||
|
||||
(self._parser, self._source) = self._entity_stack[-1]
|
||||
del self._entity_stack[-1]
|
||||
return 1
|
||||
|
||||
def skipped_entity_handler(self, name, is_pe):
|
||||
if is_pe:
|
||||
# The SAX spec requires to report skipped PEs with a '%'
|
||||
name = '%'+name
|
||||
self._cont_handler.skippedEntity(name)
|
||||
|
||||
# ---
|
||||
|
||||
def create_parser(*args, **kwargs):
|
||||
return ExpatParser(*args, **kwargs)
|
||||
|
||||
# ---
|
||||
|
||||
if __name__ == "__main__":
|
||||
import xml.sax.saxutils
|
||||
p = create_parser()
|
||||
p.setContentHandler(xml.sax.saxutils.XMLGenerator())
|
||||
p.setErrorHandler(xml.sax.ErrorHandler())
|
||||
p.parse("http://www.ibiblio.org/xml/examples/shakespeare/hamlet.xml")
|
342
third_party/python/Lib/xml/sax/handler.py
vendored
Normal file
342
third_party/python/Lib/xml/sax/handler.py
vendored
Normal file
|
@ -0,0 +1,342 @@
|
|||
"""
|
||||
This module contains the core classes of version 2.0 of SAX for Python.
|
||||
This file provides only default classes with absolutely minimum
|
||||
functionality, from which drivers and applications can be subclassed.
|
||||
|
||||
Many of these classes are empty and are included only as documentation
|
||||
of the interfaces.
|
||||
|
||||
$Id$
|
||||
"""
|
||||
|
||||
version = '2.0beta'
|
||||
|
||||
#============================================================================
|
||||
#
|
||||
# HANDLER INTERFACES
|
||||
#
|
||||
#============================================================================
|
||||
|
||||
# ===== ERRORHANDLER =====
|
||||
|
||||
class ErrorHandler:
|
||||
"""Basic interface for SAX error handlers.
|
||||
|
||||
If you create an object that implements this interface, then
|
||||
register the object with your XMLReader, the parser will call the
|
||||
methods in your object to report all warnings and errors. There
|
||||
are three levels of errors available: warnings, (possibly)
|
||||
recoverable errors, and unrecoverable errors. All methods take a
|
||||
SAXParseException as the only parameter."""
|
||||
|
||||
def error(self, exception):
|
||||
"Handle a recoverable error."
|
||||
raise exception
|
||||
|
||||
def fatalError(self, exception):
|
||||
"Handle a non-recoverable error."
|
||||
raise exception
|
||||
|
||||
def warning(self, exception):
|
||||
"Handle a warning."
|
||||
print(exception)
|
||||
|
||||
|
||||
# ===== CONTENTHANDLER =====
|
||||
|
||||
class ContentHandler:
|
||||
"""Interface for receiving logical document content events.
|
||||
|
||||
This is the main callback interface in SAX, and the one most
|
||||
important to applications. The order of events in this interface
|
||||
mirrors the order of the information in the document."""
|
||||
|
||||
def __init__(self):
|
||||
self._locator = None
|
||||
|
||||
def setDocumentLocator(self, locator):
|
||||
"""Called by the parser to give the application a locator for
|
||||
locating the origin of document events.
|
||||
|
||||
SAX parsers are strongly encouraged (though not absolutely
|
||||
required) to supply a locator: if it does so, it must supply
|
||||
the locator to the application by invoking this method before
|
||||
invoking any of the other methods in the DocumentHandler
|
||||
interface.
|
||||
|
||||
The locator allows the application to determine the end
|
||||
position of any document-related event, even if the parser is
|
||||
not reporting an error. Typically, the application will use
|
||||
this information for reporting its own errors (such as
|
||||
character content that does not match an application's
|
||||
business rules). The information returned by the locator is
|
||||
probably not sufficient for use with a search engine.
|
||||
|
||||
Note that the locator will return correct information only
|
||||
during the invocation of the events in this interface. The
|
||||
application should not attempt to use it at any other time."""
|
||||
self._locator = locator
|
||||
|
||||
def startDocument(self):
|
||||
"""Receive notification of the beginning of a document.
|
||||
|
||||
The SAX parser will invoke this method only once, before any
|
||||
other methods in this interface or in DTDHandler (except for
|
||||
setDocumentLocator)."""
|
||||
|
||||
def endDocument(self):
|
||||
"""Receive notification of the end of a document.
|
||||
|
||||
The SAX parser will invoke this method only once, and it will
|
||||
be the last method invoked during the parse. The parser shall
|
||||
not invoke this method until it has either abandoned parsing
|
||||
(because of an unrecoverable error) or reached the end of
|
||||
input."""
|
||||
|
||||
def startPrefixMapping(self, prefix, uri):
|
||||
"""Begin the scope of a prefix-URI Namespace mapping.
|
||||
|
||||
The information from this event is not necessary for normal
|
||||
Namespace processing: the SAX XML reader will automatically
|
||||
replace prefixes for element and attribute names when the
|
||||
http://xml.org/sax/features/namespaces feature is true (the
|
||||
default).
|
||||
|
||||
There are cases, however, when applications need to use
|
||||
prefixes in character data or in attribute values, where they
|
||||
cannot safely be expanded automatically; the
|
||||
start/endPrefixMapping event supplies the information to the
|
||||
application to expand prefixes in those contexts itself, if
|
||||
necessary.
|
||||
|
||||
Note that start/endPrefixMapping events are not guaranteed to
|
||||
be properly nested relative to each-other: all
|
||||
startPrefixMapping events will occur before the corresponding
|
||||
startElement event, and all endPrefixMapping events will occur
|
||||
after the corresponding endElement event, but their order is
|
||||
not guaranteed."""
|
||||
|
||||
def endPrefixMapping(self, prefix):
|
||||
"""End the scope of a prefix-URI mapping.
|
||||
|
||||
See startPrefixMapping for details. This event will always
|
||||
occur after the corresponding endElement event, but the order
|
||||
of endPrefixMapping events is not otherwise guaranteed."""
|
||||
|
||||
def startElement(self, name, attrs):
|
||||
"""Signals the start of an element in non-namespace mode.
|
||||
|
||||
The name parameter contains the raw XML 1.0 name of the
|
||||
element type as a string and the attrs parameter holds an
|
||||
instance of the Attributes class containing the attributes of
|
||||
the element."""
|
||||
|
||||
def endElement(self, name):
|
||||
"""Signals the end of an element in non-namespace mode.
|
||||
|
||||
The name parameter contains the name of the element type, just
|
||||
as with the startElement event."""
|
||||
|
||||
def startElementNS(self, name, qname, attrs):
|
||||
"""Signals the start of an element in namespace mode.
|
||||
|
||||
The name parameter contains the name of the element type as a
|
||||
(uri, localname) tuple, the qname parameter the raw XML 1.0
|
||||
name used in the source document, and the attrs parameter
|
||||
holds an instance of the Attributes class containing the
|
||||
attributes of the element.
|
||||
|
||||
The uri part of the name tuple is None for elements which have
|
||||
no namespace."""
|
||||
|
||||
def endElementNS(self, name, qname):
|
||||
"""Signals the end of an element in namespace mode.
|
||||
|
||||
The name parameter contains the name of the element type, just
|
||||
as with the startElementNS event."""
|
||||
|
||||
def characters(self, content):
|
||||
"""Receive notification of character data.
|
||||
|
||||
The Parser will call this method to report each chunk of
|
||||
character data. SAX parsers may return all contiguous
|
||||
character data in a single chunk, or they may split it into
|
||||
several chunks; however, all of the characters in any single
|
||||
event must come from the same external entity so that the
|
||||
Locator provides useful information."""
|
||||
|
||||
def ignorableWhitespace(self, whitespace):
|
||||
"""Receive notification of ignorable whitespace in element content.
|
||||
|
||||
Validating Parsers must use this method to report each chunk
|
||||
of ignorable whitespace (see the W3C XML 1.0 recommendation,
|
||||
section 2.10): non-validating parsers may also use this method
|
||||
if they are capable of parsing and using content models.
|
||||
|
||||
SAX parsers may return all contiguous whitespace in a single
|
||||
chunk, or they may split it into several chunks; however, all
|
||||
of the characters in any single event must come from the same
|
||||
external entity, so that the Locator provides useful
|
||||
information."""
|
||||
|
||||
def processingInstruction(self, target, data):
|
||||
"""Receive notification of a processing instruction.
|
||||
|
||||
The Parser will invoke this method once for each processing
|
||||
instruction found: note that processing instructions may occur
|
||||
before or after the main document element.
|
||||
|
||||
A SAX parser should never report an XML declaration (XML 1.0,
|
||||
section 2.8) or a text declaration (XML 1.0, section 4.3.1)
|
||||
using this method."""
|
||||
|
||||
def skippedEntity(self, name):
|
||||
"""Receive notification of a skipped entity.
|
||||
|
||||
The Parser will invoke this method once for each entity
|
||||
skipped. Non-validating processors may skip entities if they
|
||||
have not seen the declarations (because, for example, the
|
||||
entity was declared in an external DTD subset). All processors
|
||||
may skip external entities, depending on the values of the
|
||||
http://xml.org/sax/features/external-general-entities and the
|
||||
http://xml.org/sax/features/external-parameter-entities
|
||||
properties."""
|
||||
|
||||
|
||||
# ===== DTDHandler =====
|
||||
|
||||
class DTDHandler:
|
||||
"""Handle DTD events.
|
||||
|
||||
This interface specifies only those DTD events required for basic
|
||||
parsing (unparsed entities and attributes)."""
|
||||
|
||||
def notationDecl(self, name, publicId, systemId):
|
||||
"Handle a notation declaration event."
|
||||
|
||||
def unparsedEntityDecl(self, name, publicId, systemId, ndata):
|
||||
"Handle an unparsed entity declaration event."
|
||||
|
||||
|
||||
# ===== ENTITYRESOLVER =====
|
||||
|
||||
class EntityResolver:
|
||||
"""Basic interface for resolving entities. If you create an object
|
||||
implementing this interface, then register the object with your
|
||||
Parser, the parser will call the method in your object to
|
||||
resolve all external entities. Note that DefaultHandler implements
|
||||
this interface with the default behaviour."""
|
||||
|
||||
def resolveEntity(self, publicId, systemId):
|
||||
"""Resolve the system identifier of an entity and return either
|
||||
the system identifier to read from as a string, or an InputSource
|
||||
to read from."""
|
||||
return systemId
|
||||
|
||||
|
||||
#============================================================================
|
||||
#
|
||||
# CORE FEATURES
|
||||
#
|
||||
#============================================================================
|
||||
|
||||
feature_namespaces = "http://xml.org/sax/features/namespaces"
|
||||
# true: Perform Namespace processing (default).
|
||||
# false: Optionally do not perform Namespace processing
|
||||
# (implies namespace-prefixes).
|
||||
# access: (parsing) read-only; (not parsing) read/write
|
||||
|
||||
feature_namespace_prefixes = "http://xml.org/sax/features/namespace-prefixes"
|
||||
# true: Report the original prefixed names and attributes used for Namespace
|
||||
# declarations.
|
||||
# false: Do not report attributes used for Namespace declarations, and
|
||||
# optionally do not report original prefixed names (default).
|
||||
# access: (parsing) read-only; (not parsing) read/write
|
||||
|
||||
feature_string_interning = "http://xml.org/sax/features/string-interning"
|
||||
# true: All element names, prefixes, attribute names, Namespace URIs, and
|
||||
# local names are interned using the built-in intern function.
|
||||
# false: Names are not necessarily interned, although they may be (default).
|
||||
# access: (parsing) read-only; (not parsing) read/write
|
||||
|
||||
feature_validation = "http://xml.org/sax/features/validation"
|
||||
# true: Report all validation errors (implies external-general-entities and
|
||||
# external-parameter-entities).
|
||||
# false: Do not report validation errors.
|
||||
# access: (parsing) read-only; (not parsing) read/write
|
||||
|
||||
feature_external_ges = "http://xml.org/sax/features/external-general-entities"
|
||||
# true: Include all external general (text) entities.
|
||||
# false: Do not include external general entities.
|
||||
# access: (parsing) read-only; (not parsing) read/write
|
||||
|
||||
feature_external_pes = "http://xml.org/sax/features/external-parameter-entities"
|
||||
# true: Include all external parameter entities, including the external
|
||||
# DTD subset.
|
||||
# false: Do not include any external parameter entities, even the external
|
||||
# DTD subset.
|
||||
# access: (parsing) read-only; (not parsing) read/write
|
||||
|
||||
all_features = [feature_namespaces,
|
||||
feature_namespace_prefixes,
|
||||
feature_string_interning,
|
||||
feature_validation,
|
||||
feature_external_ges,
|
||||
feature_external_pes]
|
||||
|
||||
|
||||
#============================================================================
|
||||
#
|
||||
# CORE PROPERTIES
|
||||
#
|
||||
#============================================================================
|
||||
|
||||
property_lexical_handler = "http://xml.org/sax/properties/lexical-handler"
|
||||
# data type: xml.sax.sax2lib.LexicalHandler
|
||||
# description: An optional extension handler for lexical events like comments.
|
||||
# access: read/write
|
||||
|
||||
property_declaration_handler = "http://xml.org/sax/properties/declaration-handler"
|
||||
# data type: xml.sax.sax2lib.DeclHandler
|
||||
# description: An optional extension handler for DTD-related events other
|
||||
# than notations and unparsed entities.
|
||||
# access: read/write
|
||||
|
||||
property_dom_node = "http://xml.org/sax/properties/dom-node"
|
||||
# data type: org.w3c.dom.Node
|
||||
# description: When parsing, the current DOM node being visited if this is
|
||||
# a DOM iterator; when not parsing, the root DOM node for
|
||||
# iteration.
|
||||
# access: (parsing) read-only; (not parsing) read/write
|
||||
|
||||
property_xml_string = "http://xml.org/sax/properties/xml-string"
|
||||
# data type: String
|
||||
# description: The literal string of characters that was the source for
|
||||
# the current event.
|
||||
# access: read-only
|
||||
|
||||
property_encoding = "http://www.python.org/sax/properties/encoding"
|
||||
# data type: String
|
||||
# description: The name of the encoding to assume for input data.
|
||||
# access: write: set the encoding, e.g. established by a higher-level
|
||||
# protocol. May change during parsing (e.g. after
|
||||
# processing a META tag)
|
||||
# read: return the current encoding (possibly established through
|
||||
# auto-detection.
|
||||
# initial value: UTF-8
|
||||
#
|
||||
|
||||
property_interning_dict = "http://www.python.org/sax/properties/interning-dict"
|
||||
# data type: Dictionary
|
||||
# description: The dictionary used to intern common strings in the document
|
||||
# access: write: Request that the parser uses a specific dictionary, to
|
||||
# allow interning across different documents
|
||||
# read: return the current interning dictionary, or None
|
||||
#
|
||||
|
||||
all_properties = [property_lexical_handler,
|
||||
property_dom_node,
|
||||
property_declaration_handler,
|
||||
property_xml_string,
|
||||
property_encoding,
|
||||
property_interning_dict]
|
368
third_party/python/Lib/xml/sax/saxutils.py
vendored
Normal file
368
third_party/python/Lib/xml/sax/saxutils.py
vendored
Normal file
|
@ -0,0 +1,368 @@
|
|||
"""\
|
||||
A library of useful helper classes to the SAX classes, for the
|
||||
convenience of application and driver writers.
|
||||
"""
|
||||
|
||||
import os, urllib.parse, urllib.request
|
||||
import io
|
||||
import codecs
|
||||
from . import handler
|
||||
from . import xmlreader
|
||||
|
||||
def __dict_replace(s, d):
|
||||
"""Replace substrings of a string using a dictionary."""
|
||||
for key, value in d.items():
|
||||
s = s.replace(key, value)
|
||||
return s
|
||||
|
||||
def escape(data, entities={}):
|
||||
"""Escape &, <, and > in a string of data.
|
||||
|
||||
You can escape other strings of data by passing a dictionary as
|
||||
the optional entities parameter. The keys and values must all be
|
||||
strings; each key will be replaced with its corresponding value.
|
||||
"""
|
||||
|
||||
# must do ampersand first
|
||||
data = data.replace("&", "&")
|
||||
data = data.replace(">", ">")
|
||||
data = data.replace("<", "<")
|
||||
if entities:
|
||||
data = __dict_replace(data, entities)
|
||||
return data
|
||||
|
||||
def unescape(data, entities={}):
|
||||
"""Unescape &, <, and > in a string of data.
|
||||
|
||||
You can unescape other strings of data by passing a dictionary as
|
||||
the optional entities parameter. The keys and values must all be
|
||||
strings; each key will be replaced with its corresponding value.
|
||||
"""
|
||||
data = data.replace("<", "<")
|
||||
data = data.replace(">", ">")
|
||||
if entities:
|
||||
data = __dict_replace(data, entities)
|
||||
# must do ampersand last
|
||||
return data.replace("&", "&")
|
||||
|
||||
def quoteattr(data, entities={}):
|
||||
"""Escape and quote an attribute value.
|
||||
|
||||
Escape &, <, and > in a string of data, then quote it for use as
|
||||
an attribute value. The \" character will be escaped as well, if
|
||||
necessary.
|
||||
|
||||
You can escape other strings of data by passing a dictionary as
|
||||
the optional entities parameter. The keys and values must all be
|
||||
strings; each key will be replaced with its corresponding value.
|
||||
"""
|
||||
entities = entities.copy()
|
||||
entities.update({'\n': ' ', '\r': ' ', '\t':'	'})
|
||||
data = escape(data, entities)
|
||||
if '"' in data:
|
||||
if "'" in data:
|
||||
data = '"%s"' % data.replace('"', """)
|
||||
else:
|
||||
data = "'%s'" % data
|
||||
else:
|
||||
data = '"%s"' % data
|
||||
return data
|
||||
|
||||
|
||||
def _gettextwriter(out, encoding):
|
||||
if out is None:
|
||||
import sys
|
||||
return sys.stdout
|
||||
|
||||
if isinstance(out, io.TextIOBase):
|
||||
# use a text writer as is
|
||||
return out
|
||||
|
||||
if isinstance(out, (codecs.StreamWriter, codecs.StreamReaderWriter)):
|
||||
# use a codecs stream writer as is
|
||||
return out
|
||||
|
||||
# wrap a binary writer with TextIOWrapper
|
||||
if isinstance(out, io.RawIOBase):
|
||||
# Keep the original file open when the TextIOWrapper is
|
||||
# destroyed
|
||||
class _wrapper:
|
||||
__class__ = out.__class__
|
||||
def __getattr__(self, name):
|
||||
return getattr(out, name)
|
||||
buffer = _wrapper()
|
||||
buffer.close = lambda: None
|
||||
else:
|
||||
# This is to handle passed objects that aren't in the
|
||||
# IOBase hierarchy, but just have a write method
|
||||
buffer = io.BufferedIOBase()
|
||||
buffer.writable = lambda: True
|
||||
buffer.write = out.write
|
||||
try:
|
||||
# TextIOWrapper uses this methods to determine
|
||||
# if BOM (for UTF-16, etc) should be added
|
||||
buffer.seekable = out.seekable
|
||||
buffer.tell = out.tell
|
||||
except AttributeError:
|
||||
pass
|
||||
return io.TextIOWrapper(buffer, encoding=encoding,
|
||||
errors='xmlcharrefreplace',
|
||||
newline='\n',
|
||||
write_through=True)
|
||||
|
||||
class XMLGenerator(handler.ContentHandler):
|
||||
|
||||
def __init__(self, out=None, encoding="iso-8859-1", short_empty_elements=False):
|
||||
handler.ContentHandler.__init__(self)
|
||||
out = _gettextwriter(out, encoding)
|
||||
self._write = out.write
|
||||
self._flush = out.flush
|
||||
self._ns_contexts = [{}] # contains uri -> prefix dicts
|
||||
self._current_context = self._ns_contexts[-1]
|
||||
self._undeclared_ns_maps = []
|
||||
self._encoding = encoding
|
||||
self._short_empty_elements = short_empty_elements
|
||||
self._pending_start_element = False
|
||||
|
||||
def _qname(self, name):
|
||||
"""Builds a qualified name from a (ns_url, localname) pair"""
|
||||
if name[0]:
|
||||
# Per http://www.w3.org/XML/1998/namespace, The 'xml' prefix is
|
||||
# bound by definition to http://www.w3.org/XML/1998/namespace. It
|
||||
# does not need to be declared and will not usually be found in
|
||||
# self._current_context.
|
||||
if 'http://www.w3.org/XML/1998/namespace' == name[0]:
|
||||
return 'xml:' + name[1]
|
||||
# The name is in a non-empty namespace
|
||||
prefix = self._current_context[name[0]]
|
||||
if prefix:
|
||||
# If it is not the default namespace, prepend the prefix
|
||||
return prefix + ":" + name[1]
|
||||
# Return the unqualified name
|
||||
return name[1]
|
||||
|
||||
def _finish_pending_start_element(self,endElement=False):
|
||||
if self._pending_start_element:
|
||||
self._write('>')
|
||||
self._pending_start_element = False
|
||||
|
||||
# ContentHandler methods
|
||||
|
||||
def startDocument(self):
|
||||
self._write('<?xml version="1.0" encoding="%s"?>\n' %
|
||||
self._encoding)
|
||||
|
||||
def endDocument(self):
|
||||
self._flush()
|
||||
|
||||
def startPrefixMapping(self, prefix, uri):
|
||||
self._ns_contexts.append(self._current_context.copy())
|
||||
self._current_context[uri] = prefix
|
||||
self._undeclared_ns_maps.append((prefix, uri))
|
||||
|
||||
def endPrefixMapping(self, prefix):
|
||||
self._current_context = self._ns_contexts[-1]
|
||||
del self._ns_contexts[-1]
|
||||
|
||||
def startElement(self, name, attrs):
|
||||
self._finish_pending_start_element()
|
||||
self._write('<' + name)
|
||||
for (name, value) in attrs.items():
|
||||
self._write(' %s=%s' % (name, quoteattr(value)))
|
||||
if self._short_empty_elements:
|
||||
self._pending_start_element = True
|
||||
else:
|
||||
self._write(">")
|
||||
|
||||
def endElement(self, name):
|
||||
if self._pending_start_element:
|
||||
self._write('/>')
|
||||
self._pending_start_element = False
|
||||
else:
|
||||
self._write('</%s>' % name)
|
||||
|
||||
def startElementNS(self, name, qname, attrs):
|
||||
self._finish_pending_start_element()
|
||||
self._write('<' + self._qname(name))
|
||||
|
||||
for prefix, uri in self._undeclared_ns_maps:
|
||||
if prefix:
|
||||
self._write(' xmlns:%s="%s"' % (prefix, uri))
|
||||
else:
|
||||
self._write(' xmlns="%s"' % uri)
|
||||
self._undeclared_ns_maps = []
|
||||
|
||||
for (name, value) in attrs.items():
|
||||
self._write(' %s=%s' % (self._qname(name), quoteattr(value)))
|
||||
if self._short_empty_elements:
|
||||
self._pending_start_element = True
|
||||
else:
|
||||
self._write(">")
|
||||
|
||||
def endElementNS(self, name, qname):
|
||||
if self._pending_start_element:
|
||||
self._write('/>')
|
||||
self._pending_start_element = False
|
||||
else:
|
||||
self._write('</%s>' % self._qname(name))
|
||||
|
||||
def characters(self, content):
|
||||
if content:
|
||||
self._finish_pending_start_element()
|
||||
if not isinstance(content, str):
|
||||
content = str(content, self._encoding)
|
||||
self._write(escape(content))
|
||||
|
||||
def ignorableWhitespace(self, content):
|
||||
if content:
|
||||
self._finish_pending_start_element()
|
||||
if not isinstance(content, str):
|
||||
content = str(content, self._encoding)
|
||||
self._write(content)
|
||||
|
||||
def processingInstruction(self, target, data):
|
||||
self._finish_pending_start_element()
|
||||
self._write('<?%s %s?>' % (target, data))
|
||||
|
||||
|
||||
class XMLFilterBase(xmlreader.XMLReader):
|
||||
"""This class is designed to sit between an XMLReader and the
|
||||
client application's event handlers. By default, it does nothing
|
||||
but pass requests up to the reader and events on to the handlers
|
||||
unmodified, but subclasses can override specific methods to modify
|
||||
the event stream or the configuration requests as they pass
|
||||
through."""
|
||||
|
||||
def __init__(self, parent = None):
|
||||
xmlreader.XMLReader.__init__(self)
|
||||
self._parent = parent
|
||||
|
||||
# ErrorHandler methods
|
||||
|
||||
def error(self, exception):
|
||||
self._err_handler.error(exception)
|
||||
|
||||
def fatalError(self, exception):
|
||||
self._err_handler.fatalError(exception)
|
||||
|
||||
def warning(self, exception):
|
||||
self._err_handler.warning(exception)
|
||||
|
||||
# ContentHandler methods
|
||||
|
||||
def setDocumentLocator(self, locator):
|
||||
self._cont_handler.setDocumentLocator(locator)
|
||||
|
||||
def startDocument(self):
|
||||
self._cont_handler.startDocument()
|
||||
|
||||
def endDocument(self):
|
||||
self._cont_handler.endDocument()
|
||||
|
||||
def startPrefixMapping(self, prefix, uri):
|
||||
self._cont_handler.startPrefixMapping(prefix, uri)
|
||||
|
||||
def endPrefixMapping(self, prefix):
|
||||
self._cont_handler.endPrefixMapping(prefix)
|
||||
|
||||
def startElement(self, name, attrs):
|
||||
self._cont_handler.startElement(name, attrs)
|
||||
|
||||
def endElement(self, name):
|
||||
self._cont_handler.endElement(name)
|
||||
|
||||
def startElementNS(self, name, qname, attrs):
|
||||
self._cont_handler.startElementNS(name, qname, attrs)
|
||||
|
||||
def endElementNS(self, name, qname):
|
||||
self._cont_handler.endElementNS(name, qname)
|
||||
|
||||
def characters(self, content):
|
||||
self._cont_handler.characters(content)
|
||||
|
||||
def ignorableWhitespace(self, chars):
|
||||
self._cont_handler.ignorableWhitespace(chars)
|
||||
|
||||
def processingInstruction(self, target, data):
|
||||
self._cont_handler.processingInstruction(target, data)
|
||||
|
||||
def skippedEntity(self, name):
|
||||
self._cont_handler.skippedEntity(name)
|
||||
|
||||
# DTDHandler methods
|
||||
|
||||
def notationDecl(self, name, publicId, systemId):
|
||||
self._dtd_handler.notationDecl(name, publicId, systemId)
|
||||
|
||||
def unparsedEntityDecl(self, name, publicId, systemId, ndata):
|
||||
self._dtd_handler.unparsedEntityDecl(name, publicId, systemId, ndata)
|
||||
|
||||
# EntityResolver methods
|
||||
|
||||
def resolveEntity(self, publicId, systemId):
|
||||
return self._ent_handler.resolveEntity(publicId, systemId)
|
||||
|
||||
# XMLReader methods
|
||||
|
||||
def parse(self, source):
|
||||
self._parent.setContentHandler(self)
|
||||
self._parent.setErrorHandler(self)
|
||||
self._parent.setEntityResolver(self)
|
||||
self._parent.setDTDHandler(self)
|
||||
self._parent.parse(source)
|
||||
|
||||
def setLocale(self, locale):
|
||||
self._parent.setLocale(locale)
|
||||
|
||||
def getFeature(self, name):
|
||||
return self._parent.getFeature(name)
|
||||
|
||||
def setFeature(self, name, state):
|
||||
self._parent.setFeature(name, state)
|
||||
|
||||
def getProperty(self, name):
|
||||
return self._parent.getProperty(name)
|
||||
|
||||
def setProperty(self, name, value):
|
||||
self._parent.setProperty(name, value)
|
||||
|
||||
# XMLFilter methods
|
||||
|
||||
def getParent(self):
|
||||
return self._parent
|
||||
|
||||
def setParent(self, parent):
|
||||
self._parent = parent
|
||||
|
||||
# --- Utility functions
|
||||
|
||||
def prepare_input_source(source, base=""):
|
||||
"""This function takes an InputSource and an optional base URL and
|
||||
returns a fully resolved InputSource object ready for reading."""
|
||||
|
||||
if isinstance(source, str):
|
||||
source = xmlreader.InputSource(source)
|
||||
elif hasattr(source, "read"):
|
||||
f = source
|
||||
source = xmlreader.InputSource()
|
||||
if isinstance(f.read(0), str):
|
||||
source.setCharacterStream(f)
|
||||
else:
|
||||
source.setByteStream(f)
|
||||
if hasattr(f, "name") and isinstance(f.name, str):
|
||||
source.setSystemId(f.name)
|
||||
|
||||
if source.getCharacterStream() is None and source.getByteStream() is None:
|
||||
sysid = source.getSystemId()
|
||||
basehead = os.path.dirname(os.path.normpath(base))
|
||||
sysidfilename = os.path.join(basehead, sysid)
|
||||
if os.path.isfile(sysidfilename):
|
||||
source.setSystemId(sysidfilename)
|
||||
f = open(sysidfilename, "rb")
|
||||
else:
|
||||
source.setSystemId(urllib.parse.urljoin(base, sysid))
|
||||
f = urllib.request.urlopen(source.getSystemId())
|
||||
|
||||
source.setByteStream(f)
|
||||
|
||||
return source
|
380
third_party/python/Lib/xml/sax/xmlreader.py
vendored
Normal file
380
third_party/python/Lib/xml/sax/xmlreader.py
vendored
Normal file
|
@ -0,0 +1,380 @@
|
|||
"""An XML Reader is the SAX 2 name for an XML parser. XML Parsers
|
||||
should be based on this code. """
|
||||
|
||||
from . import handler
|
||||
|
||||
from ._exceptions import SAXNotSupportedException, SAXNotRecognizedException
|
||||
|
||||
|
||||
# ===== XMLREADER =====
|
||||
|
||||
class XMLReader:
|
||||
"""Interface for reading an XML document using callbacks.
|
||||
|
||||
XMLReader is the interface that an XML parser's SAX2 driver must
|
||||
implement. This interface allows an application to set and query
|
||||
features and properties in the parser, to register event handlers
|
||||
for document processing, and to initiate a document parse.
|
||||
|
||||
All SAX interfaces are assumed to be synchronous: the parse
|
||||
methods must not return until parsing is complete, and readers
|
||||
must wait for an event-handler callback to return before reporting
|
||||
the next event."""
|
||||
|
||||
def __init__(self):
|
||||
self._cont_handler = handler.ContentHandler()
|
||||
self._dtd_handler = handler.DTDHandler()
|
||||
self._ent_handler = handler.EntityResolver()
|
||||
self._err_handler = handler.ErrorHandler()
|
||||
|
||||
def parse(self, source):
|
||||
"Parse an XML document from a system identifier or an InputSource."
|
||||
raise NotImplementedError("This method must be implemented!")
|
||||
|
||||
def getContentHandler(self):
|
||||
"Returns the current ContentHandler."
|
||||
return self._cont_handler
|
||||
|
||||
def setContentHandler(self, handler):
|
||||
"Registers a new object to receive document content events."
|
||||
self._cont_handler = handler
|
||||
|
||||
def getDTDHandler(self):
|
||||
"Returns the current DTD handler."
|
||||
return self._dtd_handler
|
||||
|
||||
def setDTDHandler(self, handler):
|
||||
"Register an object to receive basic DTD-related events."
|
||||
self._dtd_handler = handler
|
||||
|
||||
def getEntityResolver(self):
|
||||
"Returns the current EntityResolver."
|
||||
return self._ent_handler
|
||||
|
||||
def setEntityResolver(self, resolver):
|
||||
"Register an object to resolve external entities."
|
||||
self._ent_handler = resolver
|
||||
|
||||
def getErrorHandler(self):
|
||||
"Returns the current ErrorHandler."
|
||||
return self._err_handler
|
||||
|
||||
def setErrorHandler(self, handler):
|
||||
"Register an object to receive error-message events."
|
||||
self._err_handler = handler
|
||||
|
||||
def setLocale(self, locale):
|
||||
"""Allow an application to set the locale for errors and warnings.
|
||||
|
||||
SAX parsers are not required to provide localization for errors
|
||||
and warnings; if they cannot support the requested locale,
|
||||
however, they must raise a SAX exception. Applications may
|
||||
request a locale change in the middle of a parse."""
|
||||
raise SAXNotSupportedException("Locale support not implemented")
|
||||
|
||||
def getFeature(self, name):
|
||||
"Looks up and returns the state of a SAX2 feature."
|
||||
raise SAXNotRecognizedException("Feature '%s' not recognized" % name)
|
||||
|
||||
def setFeature(self, name, state):
|
||||
"Sets the state of a SAX2 feature."
|
||||
raise SAXNotRecognizedException("Feature '%s' not recognized" % name)
|
||||
|
||||
def getProperty(self, name):
|
||||
"Looks up and returns the value of a SAX2 property."
|
||||
raise SAXNotRecognizedException("Property '%s' not recognized" % name)
|
||||
|
||||
def setProperty(self, name, value):
|
||||
"Sets the value of a SAX2 property."
|
||||
raise SAXNotRecognizedException("Property '%s' not recognized" % name)
|
||||
|
||||
class IncrementalParser(XMLReader):
|
||||
"""This interface adds three extra methods to the XMLReader
|
||||
interface that allow XML parsers to support incremental
|
||||
parsing. Support for this interface is optional, since not all
|
||||
underlying XML parsers support this functionality.
|
||||
|
||||
When the parser is instantiated it is ready to begin accepting
|
||||
data from the feed method immediately. After parsing has been
|
||||
finished with a call to close the reset method must be called to
|
||||
make the parser ready to accept new data, either from feed or
|
||||
using the parse method.
|
||||
|
||||
Note that these methods must _not_ be called during parsing, that
|
||||
is, after parse has been called and before it returns.
|
||||
|
||||
By default, the class also implements the parse method of the XMLReader
|
||||
interface using the feed, close and reset methods of the
|
||||
IncrementalParser interface as a convenience to SAX 2.0 driver
|
||||
writers."""
|
||||
|
||||
def __init__(self, bufsize=2**16):
|
||||
self._bufsize = bufsize
|
||||
XMLReader.__init__(self)
|
||||
|
||||
def parse(self, source):
|
||||
from . import saxutils
|
||||
source = saxutils.prepare_input_source(source)
|
||||
|
||||
self.prepareParser(source)
|
||||
file = source.getCharacterStream()
|
||||
if file is None:
|
||||
file = source.getByteStream()
|
||||
buffer = file.read(self._bufsize)
|
||||
while buffer:
|
||||
self.feed(buffer)
|
||||
buffer = file.read(self._bufsize)
|
||||
self.close()
|
||||
|
||||
def feed(self, data):
|
||||
"""This method gives the raw XML data in the data parameter to
|
||||
the parser and makes it parse the data, emitting the
|
||||
corresponding events. It is allowed for XML constructs to be
|
||||
split across several calls to feed.
|
||||
|
||||
feed may raise SAXException."""
|
||||
raise NotImplementedError("This method must be implemented!")
|
||||
|
||||
def prepareParser(self, source):
|
||||
"""This method is called by the parse implementation to allow
|
||||
the SAX 2.0 driver to prepare itself for parsing."""
|
||||
raise NotImplementedError("prepareParser must be overridden!")
|
||||
|
||||
def close(self):
|
||||
"""This method is called when the entire XML document has been
|
||||
passed to the parser through the feed method, to notify the
|
||||
parser that there are no more data. This allows the parser to
|
||||
do the final checks on the document and empty the internal
|
||||
data buffer.
|
||||
|
||||
The parser will not be ready to parse another document until
|
||||
the reset method has been called.
|
||||
|
||||
close may raise SAXException."""
|
||||
raise NotImplementedError("This method must be implemented!")
|
||||
|
||||
def reset(self):
|
||||
"""This method is called after close has been called to reset
|
||||
the parser so that it is ready to parse new documents. The
|
||||
results of calling parse or feed after close without calling
|
||||
reset are undefined."""
|
||||
raise NotImplementedError("This method must be implemented!")
|
||||
|
||||
# ===== LOCATOR =====
|
||||
|
||||
class Locator:
|
||||
"""Interface for associating a SAX event with a document
|
||||
location. A locator object will return valid results only during
|
||||
calls to DocumentHandler methods; at any other time, the
|
||||
results are unpredictable."""
|
||||
|
||||
def getColumnNumber(self):
|
||||
"Return the column number where the current event ends."
|
||||
return -1
|
||||
|
||||
def getLineNumber(self):
|
||||
"Return the line number where the current event ends."
|
||||
return -1
|
||||
|
||||
def getPublicId(self):
|
||||
"Return the public identifier for the current event."
|
||||
return None
|
||||
|
||||
def getSystemId(self):
|
||||
"Return the system identifier for the current event."
|
||||
return None
|
||||
|
||||
# ===== INPUTSOURCE =====
|
||||
|
||||
class InputSource:
|
||||
"""Encapsulation of the information needed by the XMLReader to
|
||||
read entities.
|
||||
|
||||
This class may include information about the public identifier,
|
||||
system identifier, byte stream (possibly with character encoding
|
||||
information) and/or the character stream of an entity.
|
||||
|
||||
Applications will create objects of this class for use in the
|
||||
XMLReader.parse method and for returning from
|
||||
EntityResolver.resolveEntity.
|
||||
|
||||
An InputSource belongs to the application, the XMLReader is not
|
||||
allowed to modify InputSource objects passed to it from the
|
||||
application, although it may make copies and modify those."""
|
||||
|
||||
def __init__(self, system_id = None):
|
||||
self.__system_id = system_id
|
||||
self.__public_id = None
|
||||
self.__encoding = None
|
||||
self.__bytefile = None
|
||||
self.__charfile = None
|
||||
|
||||
def setPublicId(self, public_id):
|
||||
"Sets the public identifier of this InputSource."
|
||||
self.__public_id = public_id
|
||||
|
||||
def getPublicId(self):
|
||||
"Returns the public identifier of this InputSource."
|
||||
return self.__public_id
|
||||
|
||||
def setSystemId(self, system_id):
|
||||
"Sets the system identifier of this InputSource."
|
||||
self.__system_id = system_id
|
||||
|
||||
def getSystemId(self):
|
||||
"Returns the system identifier of this InputSource."
|
||||
return self.__system_id
|
||||
|
||||
def setEncoding(self, encoding):
|
||||
"""Sets the character encoding of this InputSource.
|
||||
|
||||
The encoding must be a string acceptable for an XML encoding
|
||||
declaration (see section 4.3.3 of the XML recommendation).
|
||||
|
||||
The encoding attribute of the InputSource is ignored if the
|
||||
InputSource also contains a character stream."""
|
||||
self.__encoding = encoding
|
||||
|
||||
def getEncoding(self):
|
||||
"Get the character encoding of this InputSource."
|
||||
return self.__encoding
|
||||
|
||||
def setByteStream(self, bytefile):
|
||||
"""Set the byte stream (a Python file-like object which does
|
||||
not perform byte-to-character conversion) for this input
|
||||
source.
|
||||
|
||||
The SAX parser will ignore this if there is also a character
|
||||
stream specified, but it will use a byte stream in preference
|
||||
to opening a URI connection itself.
|
||||
|
||||
If the application knows the character encoding of the byte
|
||||
stream, it should set it with the setEncoding method."""
|
||||
self.__bytefile = bytefile
|
||||
|
||||
def getByteStream(self):
|
||||
"""Get the byte stream for this input source.
|
||||
|
||||
The getEncoding method will return the character encoding for
|
||||
this byte stream, or None if unknown."""
|
||||
return self.__bytefile
|
||||
|
||||
def setCharacterStream(self, charfile):
|
||||
"""Set the character stream for this input source. (The stream
|
||||
must be a Python 2.0 Unicode-wrapped file-like that performs
|
||||
conversion to Unicode strings.)
|
||||
|
||||
If there is a character stream specified, the SAX parser will
|
||||
ignore any byte stream and will not attempt to open a URI
|
||||
connection to the system identifier."""
|
||||
self.__charfile = charfile
|
||||
|
||||
def getCharacterStream(self):
|
||||
"Get the character stream for this input source."
|
||||
return self.__charfile
|
||||
|
||||
# ===== ATTRIBUTESIMPL =====
|
||||
|
||||
class AttributesImpl:
|
||||
|
||||
def __init__(self, attrs):
|
||||
"""Non-NS-aware implementation.
|
||||
|
||||
attrs should be of the form {name : value}."""
|
||||
self._attrs = attrs
|
||||
|
||||
def getLength(self):
|
||||
return len(self._attrs)
|
||||
|
||||
def getType(self, name):
|
||||
return "CDATA"
|
||||
|
||||
def getValue(self, name):
|
||||
return self._attrs[name]
|
||||
|
||||
def getValueByQName(self, name):
|
||||
return self._attrs[name]
|
||||
|
||||
def getNameByQName(self, name):
|
||||
if name not in self._attrs:
|
||||
raise KeyError(name)
|
||||
return name
|
||||
|
||||
def getQNameByName(self, name):
|
||||
if name not in self._attrs:
|
||||
raise KeyError(name)
|
||||
return name
|
||||
|
||||
def getNames(self):
|
||||
return list(self._attrs.keys())
|
||||
|
||||
def getQNames(self):
|
||||
return list(self._attrs.keys())
|
||||
|
||||
def __len__(self):
|
||||
return len(self._attrs)
|
||||
|
||||
def __getitem__(self, name):
|
||||
return self._attrs[name]
|
||||
|
||||
def keys(self):
|
||||
return list(self._attrs.keys())
|
||||
|
||||
def __contains__(self, name):
|
||||
return name in self._attrs
|
||||
|
||||
def get(self, name, alternative=None):
|
||||
return self._attrs.get(name, alternative)
|
||||
|
||||
def copy(self):
|
||||
return self.__class__(self._attrs)
|
||||
|
||||
def items(self):
|
||||
return list(self._attrs.items())
|
||||
|
||||
def values(self):
|
||||
return list(self._attrs.values())
|
||||
|
||||
# ===== ATTRIBUTESNSIMPL =====
|
||||
|
||||
class AttributesNSImpl(AttributesImpl):
|
||||
|
||||
def __init__(self, attrs, qnames):
|
||||
"""NS-aware implementation.
|
||||
|
||||
attrs should be of the form {(ns_uri, lname): value, ...}.
|
||||
qnames of the form {(ns_uri, lname): qname, ...}."""
|
||||
self._attrs = attrs
|
||||
self._qnames = qnames
|
||||
|
||||
def getValueByQName(self, name):
|
||||
for (nsname, qname) in self._qnames.items():
|
||||
if qname == name:
|
||||
return self._attrs[nsname]
|
||||
|
||||
raise KeyError(name)
|
||||
|
||||
def getNameByQName(self, name):
|
||||
for (nsname, qname) in self._qnames.items():
|
||||
if qname == name:
|
||||
return nsname
|
||||
|
||||
raise KeyError(name)
|
||||
|
||||
def getQNameByName(self, name):
|
||||
return self._qnames[name]
|
||||
|
||||
def getQNames(self):
|
||||
return list(self._qnames.values())
|
||||
|
||||
def copy(self):
|
||||
return self.__class__(self._attrs, self._qnames)
|
||||
|
||||
|
||||
def _test():
|
||||
XMLReader()
|
||||
IncrementalParser()
|
||||
Locator()
|
||||
|
||||
if __name__ == "__main__":
|
||||
_test()
|
Loading…
Add table
Add a link
Reference in a new issue