1import libxml2mod
2import types
3import sys
4
5# The root of all libxml2 errors.
6class libxmlError(Exception): pass
7
8# Type of the wrapper class for the C objects wrappers
9def checkWrapper(obj):
10    try:
11        n = type(_obj).__name__
12        if n != 'PyCObject' and n != 'PyCapsule':
13            return 1
14    except:
15        return 0
16    return 0
17
18#
19# id() is sometimes negative ...
20#
21def pos_id(o):
22    i = id(o)
23    if (i < 0):
24        return (sys.maxsize - i)
25    return i
26
27#
28# Errors raised by the wrappers when some tree handling failed.
29#
30class treeError(libxmlError):
31    def __init__(self, msg):
32        self.msg = msg
33    def __str__(self):
34        return self.msg
35
36class parserError(libxmlError):
37    def __init__(self, msg):
38        self.msg = msg
39    def __str__(self):
40        return self.msg
41
42class uriError(libxmlError):
43    def __init__(self, msg):
44        self.msg = msg
45    def __str__(self):
46        return self.msg
47
48class xpathError(libxmlError):
49    def __init__(self, msg):
50        self.msg = msg
51    def __str__(self):
52        return self.msg
53
54class ioWrapper:
55    def __init__(self, _obj):
56        self.__io = _obj
57        self._o = None
58
59    def io_close(self):
60        if self.__io == None:
61            return(-1)
62        self.__io.close()
63        self.__io = None
64        return(0)
65
66    def io_flush(self):
67        if self.__io == None:
68            return(-1)
69        self.__io.flush()
70        return(0)
71
72    def io_read(self, len = -1):
73        if self.__io == None:
74            return(-1)
75        try:
76            if len < 0:
77                ret = self.__io.read()
78            else:
79                ret = self.__io.read(len)
80        except Exception:
81            import sys
82            e = sys.exc_info()[1]
83            print("failed to read from Python:", type(e))
84            print("on IO:", self.__io)
85            self.__io == None
86            return(-1)
87
88        return(ret)
89
90    def io_write(self, str, len = -1):
91        if self.__io == None:
92            return(-1)
93        if len < 0:
94            return(self.__io.write(str))
95        return(self.__io.write(str, len))
96
97class ioReadWrapper(ioWrapper):
98    def __init__(self, _obj, enc = ""):
99        ioWrapper.__init__(self, _obj)
100        self._o = libxml2mod.xmlCreateInputBuffer(self, enc)
101
102    def __del__(self):
103        print("__del__")
104        self.io_close()
105        if self._o != None:
106            libxml2mod.xmlFreeParserInputBuffer(self._o)
107        self._o = None
108
109    def close(self):
110        self.io_close()
111        if self._o != None:
112            libxml2mod.xmlFreeParserInputBuffer(self._o)
113        self._o = None
114
115class ioWriteWrapper(ioWrapper):
116    def __init__(self, _obj, enc = ""):
117#        print "ioWriteWrapper.__init__", _obj
118        if type(_obj) == type(''):
119            print("write io from a string")
120            self.o = None
121        elif type(_obj).__name__ == 'PyCapsule':
122            file = libxml2mod.outputBufferGetPythonFile(_obj)
123            if file != None:
124                ioWrapper.__init__(self, file)
125            else:
126                ioWrapper.__init__(self, _obj)
127            self._o = _obj
128#        elif type(_obj) == types.InstanceType:
129#            print(("write io from instance of %s" % (_obj.__class__)))
130#            ioWrapper.__init__(self, _obj)
131#            self._o = libxml2mod.xmlCreateOutputBuffer(self, enc)
132        else:
133            file = libxml2mod.outputBufferGetPythonFile(_obj)
134            if file != None:
135                ioWrapper.__init__(self, file)
136            else:
137                ioWrapper.__init__(self, _obj)
138            self._o = _obj
139
140    def __del__(self):
141#        print "__del__"
142        self.io_close()
143        if self._o != None:
144            libxml2mod.xmlOutputBufferClose(self._o)
145        self._o = None
146
147    def flush(self):
148        self.io_flush()
149        if self._o != None:
150            libxml2mod.xmlOutputBufferClose(self._o)
151        self._o = None
152
153    def close(self):
154        self.io_flush()
155        if self._o != None:
156            libxml2mod.xmlOutputBufferClose(self._o)
157        self._o = None
158
159#
160# Example of a class to handle SAX events
161#
162class SAXCallback:
163    """Base class for SAX handlers"""
164    def startDocument(self):
165        """called at the start of the document"""
166        pass
167
168    def endDocument(self):
169        """called at the end of the document"""
170        pass
171
172    def startElement(self, tag, attrs):
173        """called at the start of every element, tag is the name of
174           the element, attrs is a dictionary of the element's attributes"""
175        pass
176
177    def endElement(self, tag):
178        """called at the start of every element, tag is the name of
179           the element"""
180        pass
181
182    def characters(self, data):
183        """called when character data have been read, data is the string
184           containing the data, multiple consecutive characters() callback
185           are possible."""
186        pass
187
188    def cdataBlock(self, data):
189        """called when CDATA section have been read, data is the string
190           containing the data, multiple consecutive cdataBlock() callback
191           are possible."""
192        pass
193
194    def reference(self, name):
195        """called when an entity reference has been found"""
196        pass
197
198    def ignorableWhitespace(self, data):
199        """called when potentially ignorable white spaces have been found"""
200        pass
201
202    def processingInstruction(self, target, data):
203        """called when a PI has been found, target contains the PI name and
204           data is the associated data in the PI"""
205        pass
206
207    def comment(self, content):
208        """called when a comment has been found, content contains the comment"""
209        pass
210
211    def externalSubset(self, name, externalID, systemID):
212        """called when a DOCTYPE declaration has been found, name is the
213           DTD name and externalID, systemID are the DTD public and system
214           identifier for that DTd if available"""
215        pass
216
217    def internalSubset(self, name, externalID, systemID):
218        """called when a DOCTYPE declaration has been found, name is the
219           DTD name and externalID, systemID are the DTD public and system
220           identifier for that DTD if available"""
221        pass
222
223    def entityDecl(self, name, type, externalID, systemID, content):
224        """called when an ENTITY declaration has been found, name is the
225           entity name and externalID, systemID are the entity public and
226           system identifier for that entity if available, type indicates
227           the entity type, and content reports it's string content"""
228        pass
229
230    def notationDecl(self, name, externalID, systemID):
231        """called when an NOTATION declaration has been found, name is the
232           notation name and externalID, systemID are the notation public and
233           system identifier for that notation if available"""
234        pass
235
236    def attributeDecl(self, elem, name, type, defi, defaultValue, nameList):
237        """called when an ATTRIBUTE definition has been found"""
238        pass
239
240    def elementDecl(self, name, type, content):
241        """called when an ELEMENT definition has been found"""
242        pass
243
244    def entityDecl(self, name, publicId, systemID, notationName):
245        """called when an unparsed ENTITY declaration has been found,
246           name is the entity name and publicId,, systemID are the entity
247           public and system identifier for that entity if available,
248           and notationName indicate the associated NOTATION"""
249        pass
250
251    def warning(self, msg):
252        #print msg
253        pass
254
255    def error(self, msg):
256        raise parserError(msg)
257
258    def fatalError(self, msg):
259        raise parserError(msg)
260
261#
262# This class is the ancestor of all the Node classes. It provides
263# the basic functionalities shared by all nodes (and handle
264# gracefylly the exception), like name, navigation in the tree,
265# doc reference, content access and serializing to a string or URI
266#
267class xmlCore:
268    def __init__(self, _obj=None):
269        if _obj != None:
270            self._o = _obj;
271            return
272        self._o = None
273
274    def __eq__(self, other):
275        if other == None:
276            return False
277        ret = libxml2mod.compareNodesEqual(self._o, other._o)
278        if ret == None:
279            return False
280        return ret == True
281    def __ne__(self, other):
282        if other == None:
283            return True
284        ret = libxml2mod.compareNodesEqual(self._o, other._o)
285        return not ret
286    def __hash__(self):
287        ret = libxml2mod.nodeHash(self._o)
288        return ret
289
290    def __str__(self):
291        return self.serialize()
292    def get_parent(self):
293        ret = libxml2mod.parent(self._o)
294        if ret == None:
295            return None
296        return nodeWrap(ret)
297    def get_children(self):
298        ret = libxml2mod.children(self._o)
299        if ret == None:
300            return None
301        return nodeWrap(ret)
302    def get_last(self):
303        ret = libxml2mod.last(self._o)
304        if ret == None:
305            return None
306        return nodeWrap(ret)
307    def get_next(self):
308        ret = libxml2mod.next(self._o)
309        if ret == None:
310            return None
311        return nodeWrap(ret)
312    def get_properties(self):
313        ret = libxml2mod.properties(self._o)
314        if ret == None:
315            return None
316        return xmlAttr(_obj=ret)
317    def get_prev(self):
318        ret = libxml2mod.prev(self._o)
319        if ret == None:
320            return None
321        return nodeWrap(ret)
322    def get_content(self):
323        return libxml2mod.xmlNodeGetContent(self._o)
324    getContent = get_content  # why is this duplicate naming needed ?
325    def get_name(self):
326        return libxml2mod.name(self._o)
327    def get_type(self):
328        return libxml2mod.type(self._o)
329    def get_doc(self):
330        ret = libxml2mod.doc(self._o)
331        if ret == None:
332            if self.type in ["document_xml", "document_html"]:
333                return xmlDoc(_obj=self._o)
334            else:
335                return None
336        return xmlDoc(_obj=ret)
337    #
338    # Those are common attributes to nearly all type of nodes
339    # defined as python2 properties
340    #
341    import sys
342    if float(sys.version[0:3]) < 2.2:
343        def __getattr__(self, attr):
344            if attr == "parent":
345                ret = libxml2mod.parent(self._o)
346                if ret == None:
347                    return None
348                return nodeWrap(ret)
349            elif attr == "properties":
350                ret = libxml2mod.properties(self._o)
351                if ret == None:
352                    return None
353                return xmlAttr(_obj=ret)
354            elif attr == "children":
355                ret = libxml2mod.children(self._o)
356                if ret == None:
357                    return None
358                return nodeWrap(ret)
359            elif attr == "last":
360                ret = libxml2mod.last(self._o)
361                if ret == None:
362                    return None
363                return nodeWrap(ret)
364            elif attr == "next":
365                ret = libxml2mod.next(self._o)
366                if ret == None:
367                    return None
368                return nodeWrap(ret)
369            elif attr == "prev":
370                ret = libxml2mod.prev(self._o)
371                if ret == None:
372                    return None
373                return nodeWrap(ret)
374            elif attr == "content":
375                return libxml2mod.xmlNodeGetContent(self._o)
376            elif attr == "name":
377                return libxml2mod.name(self._o)
378            elif attr == "type":
379                return libxml2mod.type(self._o)
380            elif attr == "doc":
381                ret = libxml2mod.doc(self._o)
382                if ret == None:
383                    if self.type == "document_xml" or self.type == "document_html":
384                        return xmlDoc(_obj=self._o)
385                    else:
386                        return None
387                return xmlDoc(_obj=ret)
388            raise AttributeError(attr)
389    else:
390        parent = property(get_parent, None, None, "Parent node")
391        children = property(get_children, None, None, "First child node")
392        last = property(get_last, None, None, "Last sibling node")
393        next = property(get_next, None, None, "Next sibling node")
394        prev = property(get_prev, None, None, "Previous sibling node")
395        properties = property(get_properties, None, None, "List of properies")
396        content = property(get_content, None, None, "Content of this node")
397        name = property(get_name, None, None, "Node name")
398        type = property(get_type, None, None, "Node type")
399        doc = property(get_doc, None, None, "The document this node belongs to")
400
401    #
402    # Serialization routines, the optional arguments have the following
403    # meaning:
404    #     encoding: string to ask saving in a specific encoding
405    #     indent: if 1 the serializer is asked to indent the output
406    #
407    def serialize(self, encoding = None, format = 0):
408        return libxml2mod.serializeNode(self._o, encoding, format)
409    def saveTo(self, file, encoding = None, format = 0):
410        return libxml2mod.saveNodeTo(self._o, file, encoding, format)
411
412    #
413    # Canonicalization routines:
414    #
415    #   nodes: the node set (tuple or list) to be included in the
416    #     canonized image or None if all document nodes should be
417    #     included.
418    #   exclusive: the exclusive flag (0 - non-exclusive
419    #     canonicalization; otherwise - exclusive canonicalization)
420    #   prefixes: the list of inclusive namespace prefixes (strings),
421    #     or None if there is no inclusive namespaces (only for
422    #     exclusive canonicalization, ignored otherwise)
423    #   with_comments: include comments in the result (!=0) or not
424    #     (==0)
425    def c14nMemory(self,
426                   nodes=None,
427                   exclusive=0,
428                   prefixes=None,
429                   with_comments=0):
430        if nodes:
431            nodes = [n._o for n in nodes]
432        return libxml2mod.xmlC14NDocDumpMemory(
433            self.get_doc()._o,
434            nodes,
435            exclusive != 0,
436            prefixes,
437            with_comments != 0)
438    def c14nSaveTo(self,
439                   file,
440                   nodes=None,
441                   exclusive=0,
442                   prefixes=None,
443                   with_comments=0):
444        if nodes:
445            nodes = [n._o for n in nodes]
446        return libxml2mod.xmlC14NDocSaveTo(
447            self.get_doc()._o,
448            nodes,
449            exclusive != 0,
450            prefixes,
451            with_comments != 0,
452            file)
453
454    #
455    # Selecting nodes using XPath, a bit slow because the context
456    # is allocated/freed every time but convenient.
457    #
458    def xpathEval(self, expr):
459        doc = self.doc
460        if doc == None:
461            return None
462        ctxt = doc.xpathNewContext()
463        ctxt.setContextNode(self)
464        res = ctxt.xpathEval(expr)
465        ctxt.xpathFreeContext()
466        return res
467
468#    #
469#    # Selecting nodes using XPath, faster because the context
470#    # is allocated just once per xmlDoc.
471#    #
472#    # Removed: DV memleaks c.f. #126735
473#    #
474#    def xpathEval2(self, expr):
475#        doc = self.doc
476#        if doc == None:
477#            return None
478#        try:
479#            doc._ctxt.setContextNode(self)
480#        except:
481#            doc._ctxt = doc.xpathNewContext()
482#            doc._ctxt.setContextNode(self)
483#        res = doc._ctxt.xpathEval(expr)
484#        return res
485    def xpathEval2(self, expr):
486        return self.xpathEval(expr)
487
488    # Remove namespaces
489    def removeNsDef(self, href):
490        """
491        Remove a namespace definition from a node.  If href is None,
492        remove all of the ns definitions on that node.  The removed
493        namespaces are returned as a linked list.
494
495        Note: If any child nodes referred to the removed namespaces,
496        they will be left with dangling links.  You should call
497        renconciliateNs() to fix those pointers.
498
499        Note: This method does not free memory taken by the ns
500        definitions.  You will need to free it manually with the
501        freeNsList() method on the returns xmlNs object.
502        """
503
504        ret = libxml2mod.xmlNodeRemoveNsDef(self._o, href)
505        if ret is None:return None
506        __tmp = xmlNs(_obj=ret)
507        return __tmp
508
509    # support for python2 iterators
510    def walk_depth_first(self):
511        return xmlCoreDepthFirstItertor(self)
512    def walk_breadth_first(self):
513        return xmlCoreBreadthFirstItertor(self)
514    __iter__ = walk_depth_first
515
516    def free(self):
517        try:
518            self.doc._ctxt.xpathFreeContext()
519        except:
520            pass
521        libxml2mod.xmlFreeDoc(self._o)
522
523
524#
525# implements the depth-first iterator for libxml2 DOM tree
526#
527class xmlCoreDepthFirstItertor:
528    def __init__(self, node):
529        self.node = node
530        self.parents = []
531    def __iter__(self):
532        return self
533    def next(self):
534        while 1:
535            if self.node:
536                ret = self.node
537                self.parents.append(self.node)
538                self.node = self.node.children
539                return ret
540            try:
541                parent = self.parents.pop()
542            except IndexError:
543                raise StopIteration
544            self.node = parent.next
545
546#
547# implements the breadth-first iterator for libxml2 DOM tree
548#
549class xmlCoreBreadthFirstItertor:
550    def __init__(self, node):
551        self.node = node
552        self.parents = []
553    def __iter__(self):
554        return self
555    def next(self):
556        while 1:
557            if self.node:
558                ret = self.node
559                self.parents.append(self.node)
560                self.node = self.node.next
561                return ret
562            try:
563                parent = self.parents.pop()
564            except IndexError:
565                raise StopIteration
566            self.node = parent.children
567
568#
569# converters to present a nicer view of the XPath returns
570#
571def nodeWrap(o):
572    # TODO try to cast to the most appropriate node class
573    name = libxml2mod.type(o)
574    if name == "element" or name == "text":
575        return xmlNode(_obj=o)
576    if name == "attribute":
577        return xmlAttr(_obj=o)
578    if name[0:8] == "document":
579        return xmlDoc(_obj=o)
580    if name == "namespace":
581        return xmlNs(_obj=o)
582    if name == "elem_decl":
583        return xmlElement(_obj=o)
584    if name == "attribute_decl":
585        return xmlAttribute(_obj=o)
586    if name == "entity_decl":
587        return xmlEntity(_obj=o)
588    if name == "dtd":
589        return xmlDtd(_obj=o)
590    return xmlNode(_obj=o)
591
592def xpathObjectRet(o):
593    otype = type(o)
594    if otype == type([]):
595        ret = list(map(xpathObjectRet, o))
596        return ret
597    elif otype == type(()):
598        ret = list(map(xpathObjectRet, o))
599        return tuple(ret)
600    elif otype == type('') or otype == type(0) or otype == type(0.0):
601        return o
602    else:
603        return nodeWrap(o)
604
605#
606# register an XPath function
607#
608def registerXPathFunction(ctxt, name, ns_uri, f):
609    ret = libxml2mod.xmlRegisterXPathFunction(ctxt, name, ns_uri, f)
610
611#
612# For the xmlTextReader parser configuration
613#
614PARSER_LOADDTD=1
615PARSER_DEFAULTATTRS=2
616PARSER_VALIDATE=3
617PARSER_SUBST_ENTITIES=4
618
619#
620# For the error callback severities
621#
622PARSER_SEVERITY_VALIDITY_WARNING=1
623PARSER_SEVERITY_VALIDITY_ERROR=2
624PARSER_SEVERITY_WARNING=3
625PARSER_SEVERITY_ERROR=4
626
627#
628# register the libxml2 error handler
629#
630def registerErrorHandler(f, ctx):
631    """Register a Python written function to for error reporting.
632       The function is called back as f(ctx, error). """
633    import sys
634    if 'libxslt' not in sys.modules:
635        # normal behaviour when libxslt is not imported
636        ret = libxml2mod.xmlRegisterErrorHandler(f,ctx)
637    else:
638        # when libxslt is already imported, one must
639        # use libxst's error handler instead
640        import libxslt
641        ret = libxslt.registerErrorHandler(f,ctx)
642    return ret
643
644class parserCtxtCore:
645
646    def __init__(self, _obj=None):
647        if _obj != None:
648            self._o = _obj;
649            return
650        self._o = None
651
652    def __del__(self):
653        if self._o != None:
654            libxml2mod.xmlFreeParserCtxt(self._o)
655        self._o = None
656
657    def setErrorHandler(self,f,arg):
658        """Register an error handler that will be called back as
659           f(arg,msg,severity,reserved).
660
661           @reserved is currently always None."""
662        libxml2mod.xmlParserCtxtSetErrorHandler(self._o,f,arg)
663
664    def getErrorHandler(self):
665        """Return (f,arg) as previously registered with setErrorHandler
666           or (None,None)."""
667        return libxml2mod.xmlParserCtxtGetErrorHandler(self._o)
668
669    def addLocalCatalog(self, uri):
670        """Register a local catalog with the parser"""
671        return libxml2mod.addLocalCatalog(self._o, uri)
672
673
674class ValidCtxtCore:
675
676    def __init__(self, *args, **kw):
677        pass
678
679    def setValidityErrorHandler(self, err_func, warn_func, arg=None):
680        """
681        Register error and warning handlers for DTD validation.
682        These will be called back as f(msg,arg)
683        """
684        libxml2mod.xmlSetValidErrors(self._o, err_func, warn_func, arg)
685
686
687class SchemaValidCtxtCore:
688
689    def __init__(self, *args, **kw):
690        pass
691
692    def setValidityErrorHandler(self, err_func, warn_func, arg=None):
693        """
694        Register error and warning handlers for Schema validation.
695        These will be called back as f(msg,arg)
696        """
697        libxml2mod.xmlSchemaSetValidErrors(self._o, err_func, warn_func, arg)
698
699
700class relaxNgValidCtxtCore:
701
702    def __init__(self, *args, **kw):
703        pass
704
705    def setValidityErrorHandler(self, err_func, warn_func, arg=None):
706        """
707        Register error and warning handlers for RelaxNG validation.
708        These will be called back as f(msg,arg)
709        """
710        libxml2mod.xmlRelaxNGSetValidErrors(self._o, err_func, warn_func, arg)
711
712
713def _xmlTextReaderErrorFunc(xxx_todo_changeme,msg,severity,locator):
714    """Intermediate callback to wrap the locator"""
715    (f,arg) = xxx_todo_changeme
716    return f(arg,msg,severity,xmlTextReaderLocator(locator))
717
718class xmlTextReaderCore:
719
720    def __init__(self, _obj=None):
721        self.input = None
722        if _obj != None:self._o = _obj;return
723        self._o = None
724
725    def __del__(self):
726        if self._o != None:
727            libxml2mod.xmlFreeTextReader(self._o)
728        self._o = None
729
730    def SetErrorHandler(self,f,arg):
731        """Register an error handler that will be called back as
732           f(arg,msg,severity,locator)."""
733        if f is None:
734            libxml2mod.xmlTextReaderSetErrorHandler(\
735                self._o,None,None)
736        else:
737            libxml2mod.xmlTextReaderSetErrorHandler(\
738                self._o,_xmlTextReaderErrorFunc,(f,arg))
739
740    def GetErrorHandler(self):
741        """Return (f,arg) as previously registered with setErrorHandler
742           or (None,None)."""
743        f,arg = libxml2mod.xmlTextReaderGetErrorHandler(self._o)
744        if f is None:
745            return None,None
746        else:
747            # assert f is _xmlTextReaderErrorFunc
748            return arg
749
750#
751# The cleanup now goes though a wrapper in libxml.c
752#
753def cleanupParser():
754    libxml2mod.xmlPythonCleanupParser()
755
756#
757# The interface to xmlRegisterInputCallbacks.
758# Since this API does not allow to pass a data object along with
759# match/open callbacks, it is necessary to maintain a list of all
760# Python callbacks.
761#
762__input_callbacks = []
763def registerInputCallback(func):
764    def findOpenCallback(URI):
765        for cb in reversed(__input_callbacks):
766            o = cb(URI)
767            if o is not None:
768                return o
769    libxml2mod.xmlRegisterInputCallback(findOpenCallback)
770    __input_callbacks.append(func)
771
772def popInputCallbacks():
773    # First pop python-level callbacks, when no more available - start
774    # popping built-in ones.
775    if len(__input_callbacks) > 0:
776        __input_callbacks.pop()
777    if len(__input_callbacks) == 0:
778        libxml2mod.xmlUnregisterInputCallback()
779
780# WARNING WARNING WARNING WARNING WARNING WARNING WARNING WARNING WARNING
781#
782# Everything before this line comes from libxml.py
783# Everything after this line is automatically generated
784#
785# WARNING WARNING WARNING WARNING WARNING WARNING WARNING WARNING WARNING
786
787