1import libxml2mod
2import types
3import sys
4
5# The root of all libxml2 errors.
6class libxmlError(Exception): pass
7
8#
9# id() is sometimes negative ...
10#
11def pos_id(o):
12    i = id(o)
13    if (i < 0):
14        return (sys.maxint - i)
15    return i
16
17#
18# Errors raised by the wrappers when some tree handling failed.
19#
20class treeError(libxmlError):
21    def __init__(self, msg):
22        self.msg = msg
23    def __str__(self):
24        return self.msg
25
26class parserError(libxmlError):
27    def __init__(self, msg):
28        self.msg = msg
29    def __str__(self):
30        return self.msg
31
32class uriError(libxmlError):
33    def __init__(self, msg):
34        self.msg = msg
35    def __str__(self):
36        return self.msg
37
38class xpathError(libxmlError):
39    def __init__(self, msg):
40        self.msg = msg
41    def __str__(self):
42        return self.msg
43
44class ioWrapper:
45    def __init__(self, _obj):
46        self.__io = _obj
47        self._o = None
48
49    def io_close(self):
50        if self.__io == None:
51            return(-1)
52        self.__io.close()
53        self.__io = None
54        return(0)
55
56    def io_flush(self):
57        if self.__io == None:
58            return(-1)
59        self.__io.flush()
60        return(0)
61
62    def io_read(self, len = -1):
63        if self.__io == None:
64            return(-1)
65        if len < 0:
66            return(self.__io.read())
67        return(self.__io.read(len))
68
69    def io_write(self, str, len = -1):
70        if self.__io == None:
71            return(-1)
72        if len < 0:
73            return(self.__io.write(str))
74        return(self.__io.write(str, len))
75
76class ioReadWrapper(ioWrapper):
77    def __init__(self, _obj, enc = ""):
78        ioWrapper.__init__(self, _obj)
79        self._o = libxml2mod.xmlCreateInputBuffer(self, enc)
80
81    def __del__(self):
82        print "__del__"
83        self.io_close()
84        if self._o != None:
85            libxml2mod.xmlFreeParserInputBuffer(self._o)
86        self._o = None
87
88    def close(self):
89        self.io_close()
90        if self._o != None:
91            libxml2mod.xmlFreeParserInputBuffer(self._o)
92        self._o = None
93
94class ioWriteWrapper(ioWrapper):
95    def __init__(self, _obj, enc = ""):
96#        print "ioWriteWrapper.__init__", _obj
97        if type(_obj) == type(''):
98            print "write io from a string"
99            self.o = None
100        elif type(_obj) == types.InstanceType:
101            print "write io from instance of %s" % (_obj.__class__)
102            ioWrapper.__init__(self, _obj)
103            self._o = libxml2mod.xmlCreateOutputBuffer(self, enc)
104        else:
105            file = libxml2mod.outputBufferGetPythonFile(_obj)
106            if file != None:
107                ioWrapper.__init__(self, file)
108            else:
109                ioWrapper.__init__(self, _obj)
110            self._o = _obj
111
112    def __del__(self):
113#        print "__del__"
114        self.io_close()
115        if self._o != None:
116            libxml2mod.xmlOutputBufferClose(self._o)
117        self._o = None
118
119    def flush(self):
120        self.io_flush()
121        if self._o != None:
122            libxml2mod.xmlOutputBufferClose(self._o)
123        self._o = None
124
125    def close(self):
126        self.io_flush()
127        if self._o != None:
128            libxml2mod.xmlOutputBufferClose(self._o)
129        self._o = None
130
131#
132# Example of a class to handle SAX events
133#
134class SAXCallback:
135    """Base class for SAX handlers"""
136    def startDocument(self):
137        """called at the start of the document"""
138        pass
139
140    def endDocument(self):
141        """called at the end of the document"""
142        pass
143
144    def startElement(self, tag, attrs):
145        """called at the start of every element, tag is the name of
146           the element, attrs is a dictionary of the element's attributes"""
147        pass
148
149    def endElement(self, tag):
150        """called at the start of every element, tag is the name of
151           the element"""
152        pass
153
154    def characters(self, data):
155        """called when character data have been read, data is the string
156           containing the data, multiple consecutive characters() callback
157           are possible."""
158        pass
159
160    def cdataBlock(self, data):
161        """called when CDATA section have been read, data is the string
162           containing the data, multiple consecutive cdataBlock() callback
163           are possible."""
164        pass
165
166    def reference(self, name):
167        """called when an entity reference has been found"""
168        pass
169
170    def ignorableWhitespace(self, data):
171        """called when potentially ignorable white spaces have been found"""
172        pass
173
174    def processingInstruction(self, target, data):
175        """called when a PI has been found, target contains the PI name and
176           data is the associated data in the PI"""
177        pass
178
179    def comment(self, content):
180        """called when a comment has been found, content contains the comment"""
181        pass
182
183    def externalSubset(self, name, externalID, systemID):
184        """called when a DOCTYPE declaration has been found, name is the
185           DTD name and externalID, systemID are the DTD public and system
186           identifier for that DTd if available"""
187        pass
188
189    def internalSubset(self, name, externalID, systemID):
190        """called when a DOCTYPE declaration has been found, name is the
191           DTD name and externalID, systemID are the DTD public and system
192           identifier for that DTD if available"""
193        pass
194
195    def entityDecl(self, name, type, externalID, systemID, content):
196        """called when an ENTITY declaration has been found, name is the
197           entity name and externalID, systemID are the entity public and
198           system identifier for that entity if available, type indicates
199           the entity type, and content reports it's string content"""
200        pass
201
202    def notationDecl(self, name, externalID, systemID):
203        """called when an NOTATION declaration has been found, name is the
204           notation name and externalID, systemID are the notation public and
205           system identifier for that notation if available"""
206        pass
207
208    def attributeDecl(self, elem, name, type, defi, defaultValue, nameList):
209        """called when an ATTRIBUTE definition has been found"""
210        pass
211
212    def elementDecl(self, name, type, content):
213        """called when an ELEMENT definition has been found"""
214        pass
215
216    def entityDecl(self, name, publicId, systemID, notationName):
217        """called when an unparsed ENTITY declaration has been found,
218           name is the entity name and publicId,, systemID are the entity
219           public and system identifier for that entity if available,
220           and notationName indicate the associated NOTATION"""
221        pass
222
223    def warning(self, msg):
224        #print msg
225        pass
226
227    def error(self, msg):
228        raise parserError(msg)
229
230    def fatalError(self, msg):
231        raise parserError(msg)
232
233#
234# This class is the ancestor of all the Node classes. It provides
235# the basic functionalities shared by all nodes (and handle
236# gracefylly the exception), like name, navigation in the tree,
237# doc reference, content access and serializing to a string or URI
238#
239class xmlCore:
240    def __init__(self, _obj=None):
241        if _obj != None:
242            self._o = _obj;
243            return
244        self._o = None
245
246    def __eq__(self, other):
247        if other == None:
248            return False
249        ret = libxml2mod.compareNodesEqual(self._o, other._o)
250        if ret == None:
251            return False
252        return ret == True
253    def __ne__(self, other):
254        if other == None:
255            return True
256        ret = libxml2mod.compareNodesEqual(self._o, other._o)
257        return not ret
258    def __hash__(self):
259        ret = libxml2mod.nodeHash(self._o)
260        return ret
261
262    def __str__(self):
263        return self.serialize()
264    def get_parent(self):
265        ret = libxml2mod.parent(self._o)
266        if ret == None:
267            return None
268        return xmlNode(_obj=ret)
269    def get_children(self):
270        ret = libxml2mod.children(self._o)
271        if ret == None:
272            return None
273        return xmlNode(_obj=ret)
274    def get_last(self):
275        ret = libxml2mod.last(self._o)
276        if ret == None:
277            return None
278        return xmlNode(_obj=ret)
279    def get_next(self):
280        ret = libxml2mod.next(self._o)
281        if ret == None:
282            return None
283        return xmlNode(_obj=ret)
284    def get_properties(self):
285        ret = libxml2mod.properties(self._o)
286        if ret == None:
287            return None
288        return xmlAttr(_obj=ret)
289    def get_prev(self):
290        ret = libxml2mod.prev(self._o)
291        if ret == None:
292            return None
293        return xmlNode(_obj=ret)
294    def get_content(self):
295        return libxml2mod.xmlNodeGetContent(self._o)
296    getContent = get_content  # why is this duplicate naming needed ?
297    def get_name(self):
298        return libxml2mod.name(self._o)
299    def get_type(self):
300        return libxml2mod.type(self._o)
301    def get_doc(self):
302        ret = libxml2mod.doc(self._o)
303        if ret == None:
304            if self.type in ["document_xml", "document_html"]:
305                return xmlDoc(_obj=self._o)
306            else:
307                return None
308        return xmlDoc(_obj=ret)
309    #
310    # Those are common attributes to nearly all type of nodes
311    # defined as python2 properties
312    #
313    import sys
314    if float(sys.version[0:3]) < 2.2:
315        def __getattr__(self, attr):
316            if attr == "parent":
317                ret = libxml2mod.parent(self._o)
318                if ret == None:
319                    return None
320                return xmlNode(_obj=ret)
321            elif attr == "properties":
322                ret = libxml2mod.properties(self._o)
323                if ret == None:
324                    return None
325                return xmlAttr(_obj=ret)
326            elif attr == "children":
327                ret = libxml2mod.children(self._o)
328                if ret == None:
329                    return None
330                return xmlNode(_obj=ret)
331            elif attr == "last":
332                ret = libxml2mod.last(self._o)
333                if ret == None:
334                    return None
335                return xmlNode(_obj=ret)
336            elif attr == "next":
337                ret = libxml2mod.next(self._o)
338                if ret == None:
339                    return None
340                return xmlNode(_obj=ret)
341            elif attr == "prev":
342                ret = libxml2mod.prev(self._o)
343                if ret == None:
344                    return None
345                return xmlNode(_obj=ret)
346            elif attr == "content":
347                return libxml2mod.xmlNodeGetContent(self._o)
348            elif attr == "name":
349                return libxml2mod.name(self._o)
350            elif attr == "type":
351                return libxml2mod.type(self._o)
352            elif attr == "doc":
353                ret = libxml2mod.doc(self._o)
354                if ret == None:
355                    if self.type == "document_xml" or self.type == "document_html":
356                        return xmlDoc(_obj=self._o)
357                    else:
358                        return None
359                return xmlDoc(_obj=ret)
360            raise AttributeError,attr
361    else:
362        parent = property(get_parent, None, None, "Parent node")
363        children = property(get_children, None, None, "First child node")
364        last = property(get_last, None, None, "Last sibling node")
365        next = property(get_next, None, None, "Next sibling node")
366        prev = property(get_prev, None, None, "Previous sibling node")
367        properties = property(get_properties, None, None, "List of properies")
368        content = property(get_content, None, None, "Content of this node")
369        name = property(get_name, None, None, "Node name")
370        type = property(get_type, None, None, "Node type")
371        doc = property(get_doc, None, None, "The document this node belongs to")
372
373    #
374    # Serialization routines, the optional arguments have the following
375    # meaning:
376    #     encoding: string to ask saving in a specific encoding
377    #     indent: if 1 the serializer is asked to indent the output
378    #
379    def serialize(self, encoding = None, format = 0):
380        return libxml2mod.serializeNode(self._o, encoding, format)
381    def saveTo(self, file, encoding = None, format = 0):
382        return libxml2mod.saveNodeTo(self._o, file, encoding, format)
383
384    #
385    # Canonicalization routines:
386    #
387    #   nodes: the node set (tuple or list) to be included in the
388    #     canonized image or None if all document nodes should be
389    #     included.
390    #   exclusive: the exclusive flag (0 - non-exclusive
391    #     canonicalization; otherwise - exclusive canonicalization)
392    #   prefixes: the list of inclusive namespace prefixes (strings),
393    #     or None if there is no inclusive namespaces (only for
394    #     exclusive canonicalization, ignored otherwise)
395    #   with_comments: include comments in the result (!=0) or not
396    #     (==0)
397    def c14nMemory(self,
398                   nodes=None,
399                   exclusive=0,
400                   prefixes=None,
401                   with_comments=0):
402        if nodes:
403            nodes = map(lambda n: n._o, nodes)
404        return libxml2mod.xmlC14NDocDumpMemory(
405            self.get_doc()._o,
406            nodes,
407            exclusive != 0,
408            prefixes,
409            with_comments != 0)
410    def c14nSaveTo(self,
411                   file,
412                   nodes=None,
413                   exclusive=0,
414                   prefixes=None,
415                   with_comments=0):
416        if nodes:
417            nodes = map(lambda n: n._o, nodes)
418        return libxml2mod.xmlC14NDocSaveTo(
419            self.get_doc()._o,
420            nodes,
421            exclusive != 0,
422            prefixes,
423            with_comments != 0,
424            file)
425
426    #
427    # Selecting nodes using XPath, a bit slow because the context
428    # is allocated/freed every time but convenient.
429    #
430    def xpathEval(self, expr):
431        doc = self.doc
432        if doc == None:
433            return None
434        ctxt = doc.xpathNewContext()
435        ctxt.setContextNode(self)
436        res = ctxt.xpathEval(expr)
437        ctxt.xpathFreeContext()
438        return res
439
440#    #
441#    # Selecting nodes using XPath, faster because the context
442#    # is allocated just once per xmlDoc.
443#    #
444#    # Removed: DV memleaks c.f. #126735
445#    #
446#    def xpathEval2(self, expr):
447#        doc = self.doc
448#        if doc == None:
449#            return None
450#        try:
451#            doc._ctxt.setContextNode(self)
452#        except:
453#            doc._ctxt = doc.xpathNewContext()
454#            doc._ctxt.setContextNode(self)
455#        res = doc._ctxt.xpathEval(expr)
456#        return res
457    def xpathEval2(self, expr):
458        return self.xpathEval(expr)
459
460    # Remove namespaces
461    def removeNsDef(self, href):
462        """
463        Remove a namespace definition from a node.  If href is None,
464        remove all of the ns definitions on that node.  The removed
465        namespaces are returned as a linked list.
466
467        Note: If any child nodes referred to the removed namespaces,
468        they will be left with dangling links.  You should call
469        renconciliateNs() to fix those pointers.
470
471        Note: This method does not free memory taken by the ns
472        definitions.  You will need to free it manually with the
473        freeNsList() method on the returns xmlNs object.
474        """
475
476        ret = libxml2mod.xmlNodeRemoveNsDef(self._o, href)
477        if ret is None:return None
478        __tmp = xmlNs(_obj=ret)
479        return __tmp
480
481    # support for python2 iterators
482    def walk_depth_first(self):
483        return xmlCoreDepthFirstItertor(self)
484    def walk_breadth_first(self):
485        return xmlCoreBreadthFirstItertor(self)
486    __iter__ = walk_depth_first
487
488    def free(self):
489        try:
490            self.doc._ctxt.xpathFreeContext()
491        except:
492            pass
493        libxml2mod.xmlFreeDoc(self._o)
494
495
496#
497# implements the depth-first iterator for libxml2 DOM tree
498#
499class xmlCoreDepthFirstItertor:
500    def __init__(self, node):
501        self.node = node
502        self.parents = []
503    def __iter__(self):
504        return self
505    def next(self):
506        while 1:
507            if self.node:
508                ret = self.node
509                self.parents.append(self.node)
510                self.node = self.node.children
511                return ret
512            try:
513                parent = self.parents.pop()
514            except IndexError:
515                raise StopIteration
516            self.node = parent.next
517
518#
519# implements the breadth-first iterator for libxml2 DOM tree
520#
521class xmlCoreBreadthFirstItertor:
522    def __init__(self, node):
523        self.node = node
524        self.parents = []
525    def __iter__(self):
526        return self
527    def next(self):
528        while 1:
529            if self.node:
530                ret = self.node
531                self.parents.append(self.node)
532                self.node = self.node.next
533                return ret
534            try:
535                parent = self.parents.pop()
536            except IndexError:
537                raise StopIteration
538            self.node = parent.children
539
540#
541# converters to present a nicer view of the XPath returns
542#
543def nodeWrap(o):
544    # TODO try to cast to the most appropriate node class
545    name = libxml2mod.type(o)
546    if name == "element" or name == "text":
547        return xmlNode(_obj=o)
548    if name == "attribute":
549        return xmlAttr(_obj=o)
550    if name[0:8] == "document":
551        return xmlDoc(_obj=o)
552    if name == "namespace":
553        return xmlNs(_obj=o)
554    if name == "elem_decl":
555        return xmlElement(_obj=o)
556    if name == "attribute_decl":
557        return xmlAttribute(_obj=o)
558    if name == "entity_decl":
559        return xmlEntity(_obj=o)
560    if name == "dtd":
561        return xmlDtd(_obj=o)
562    return xmlNode(_obj=o)
563
564def xpathObjectRet(o):
565    otype = type(o)
566    if otype == type([]):
567        ret = map(xpathObjectRet, o)
568        return ret
569    elif otype == type(()):
570        ret = map(xpathObjectRet, o)
571        return tuple(ret)
572    elif otype == type('') or otype == type(0) or otype == type(0.0):
573        return o
574    else:
575        return nodeWrap(o)
576
577#
578# register an XPath function
579#
580def registerXPathFunction(ctxt, name, ns_uri, f):
581    ret = libxml2mod.xmlRegisterXPathFunction(ctxt, name, ns_uri, f)
582
583#
584# For the xmlTextReader parser configuration
585#
586PARSER_LOADDTD=1
587PARSER_DEFAULTATTRS=2
588PARSER_VALIDATE=3
589PARSER_SUBST_ENTITIES=4
590
591#
592# For the error callback severities
593#
594PARSER_SEVERITY_VALIDITY_WARNING=1
595PARSER_SEVERITY_VALIDITY_ERROR=2
596PARSER_SEVERITY_WARNING=3
597PARSER_SEVERITY_ERROR=4
598
599#
600# register the libxml2 error handler
601#
602def registerErrorHandler(f, ctx):
603    """Register a Python written function to for error reporting.
604       The function is called back as f(ctx, error). """
605    import sys
606    if not sys.modules.has_key('libxslt'):
607        # normal behaviour when libxslt is not imported
608        ret = libxml2mod.xmlRegisterErrorHandler(f,ctx)
609    else:
610        # when libxslt is already imported, one must
611        # use libxst's error handler instead
612        import libxslt
613        ret = libxslt.registerErrorHandler(f,ctx)
614    return ret
615
616class parserCtxtCore:
617
618    def __init__(self, _obj=None):
619        if _obj != None:
620            self._o = _obj;
621            return
622        self._o = None
623
624    def __del__(self):
625        if self._o != None:
626            libxml2mod.xmlFreeParserCtxt(self._o)
627        self._o = None
628
629    def setErrorHandler(self,f,arg):
630        """Register an error handler that will be called back as
631           f(arg,msg,severity,reserved).
632
633           @reserved is currently always None."""
634        libxml2mod.xmlParserCtxtSetErrorHandler(self._o,f,arg)
635
636    def getErrorHandler(self):
637        """Return (f,arg) as previously registered with setErrorHandler
638           or (None,None)."""
639        return libxml2mod.xmlParserCtxtGetErrorHandler(self._o)
640
641    def addLocalCatalog(self, uri):
642        """Register a local catalog with the parser"""
643        return libxml2mod.addLocalCatalog(self._o, uri)
644
645
646class ValidCtxtCore:
647
648    def __init__(self, *args, **kw):
649        pass
650
651    def setValidityErrorHandler(self, err_func, warn_func, arg=None):
652        """
653        Register error and warning handlers for DTD validation.
654        These will be called back as f(msg,arg)
655        """
656        libxml2mod.xmlSetValidErrors(self._o, err_func, warn_func, arg)
657
658
659class SchemaValidCtxtCore:
660
661    def __init__(self, *args, **kw):
662        pass
663
664    def setValidityErrorHandler(self, err_func, warn_func, arg=None):
665        """
666        Register error and warning handlers for Schema validation.
667        These will be called back as f(msg,arg)
668        """
669        libxml2mod.xmlSchemaSetValidErrors(self._o, err_func, warn_func, arg)
670
671
672class relaxNgValidCtxtCore:
673
674    def __init__(self, *args, **kw):
675        pass
676
677    def setValidityErrorHandler(self, err_func, warn_func, arg=None):
678        """
679        Register error and warning handlers for RelaxNG validation.
680        These will be called back as f(msg,arg)
681        """
682        libxml2mod.xmlRelaxNGSetValidErrors(self._o, err_func, warn_func, arg)
683
684
685def _xmlTextReaderErrorFunc((f,arg),msg,severity,locator):
686    """Intermediate callback to wrap the locator"""
687    return f(arg,msg,severity,xmlTextReaderLocator(locator))
688
689class xmlTextReaderCore:
690
691    def __init__(self, _obj=None):
692        self.input = None
693        if _obj != None:self._o = _obj;return
694        self._o = None
695
696    def __del__(self):
697        if self._o != None:
698            libxml2mod.xmlFreeTextReader(self._o)
699        self._o = None
700
701    def SetErrorHandler(self,f,arg):
702        """Register an error handler that will be called back as
703           f(arg,msg,severity,locator)."""
704        if f is None:
705            libxml2mod.xmlTextReaderSetErrorHandler(\
706                self._o,None,None)
707        else:
708            libxml2mod.xmlTextReaderSetErrorHandler(\
709                self._o,_xmlTextReaderErrorFunc,(f,arg))
710
711    def GetErrorHandler(self):
712        """Return (f,arg) as previously registered with setErrorHandler
713           or (None,None)."""
714        f,arg = libxml2mod.xmlTextReaderGetErrorHandler(self._o)
715        if f is None:
716            return None,None
717        else:
718            # assert f is _xmlTextReaderErrorFunc
719            return arg
720
721#
722# The cleanup now goes though a wrappe in libxml.c
723#
724def cleanupParser():
725    libxml2mod.xmlPythonCleanupParser()
726
727# WARNING WARNING WARNING WARNING WARNING WARNING WARNING WARNING WARNING
728#
729# Everything before this line comes from libxml.py
730# Everything after this line is automatically generated
731#
732# WARNING WARNING WARNING WARNING WARNING WARNING WARNING WARNING WARNING
733
734