1import libxml2mod 2import types 3import sys 4 5# The root of all libxml2 errors. 6class libxmlError(Exception): pass 7 8# 9# id() is sometimes negative ... 10# 11def pos_id(o): 12 i = id(o) 13 if (i < 0): 14 return (sys.maxint - i) 15 return i 16 17# 18# Errors raised by the wrappers when some tree handling failed. 19# 20class treeError(libxmlError): 21 def __init__(self, msg): 22 self.msg = msg 23 def __str__(self): 24 return self.msg 25 26class parserError(libxmlError): 27 def __init__(self, msg): 28 self.msg = msg 29 def __str__(self): 30 return self.msg 31 32class uriError(libxmlError): 33 def __init__(self, msg): 34 self.msg = msg 35 def __str__(self): 36 return self.msg 37 38class xpathError(libxmlError): 39 def __init__(self, msg): 40 self.msg = msg 41 def __str__(self): 42 return self.msg 43 44class ioWrapper: 45 def __init__(self, _obj): 46 self.__io = _obj 47 self._o = None 48 49 def io_close(self): 50 if self.__io == None: 51 return(-1) 52 self.__io.close() 53 self.__io = None 54 return(0) 55 56 def io_flush(self): 57 if self.__io == None: 58 return(-1) 59 self.__io.flush() 60 return(0) 61 62 def io_read(self, len = -1): 63 if self.__io == None: 64 return(-1) 65 if len < 0: 66 return(self.__io.read()) 67 return(self.__io.read(len)) 68 69 def io_write(self, str, len = -1): 70 if self.__io == None: 71 return(-1) 72 if len < 0: 73 return(self.__io.write(str)) 74 return(self.__io.write(str, len)) 75 76class ioReadWrapper(ioWrapper): 77 def __init__(self, _obj, enc = ""): 78 ioWrapper.__init__(self, _obj) 79 self._o = libxml2mod.xmlCreateInputBuffer(self, enc) 80 81 def __del__(self): 82 print "__del__" 83 self.io_close() 84 if self._o != None: 85 libxml2mod.xmlFreeParserInputBuffer(self._o) 86 self._o = None 87 88 def close(self): 89 self.io_close() 90 if self._o != None: 91 libxml2mod.xmlFreeParserInputBuffer(self._o) 92 self._o = None 93 94class ioWriteWrapper(ioWrapper): 95 def __init__(self, _obj, enc = ""): 96# print "ioWriteWrapper.__init__", _obj 97 if type(_obj) == type(''): 98 print "write io from a string" 99 self.o = None 100 elif type(_obj) == types.InstanceType: 101 print "write io from instance of %s" % (_obj.__class__) 102 ioWrapper.__init__(self, _obj) 103 self._o = libxml2mod.xmlCreateOutputBuffer(self, enc) 104 else: 105 file = libxml2mod.outputBufferGetPythonFile(_obj) 106 if file != None: 107 ioWrapper.__init__(self, file) 108 else: 109 ioWrapper.__init__(self, _obj) 110 self._o = _obj 111 112 def __del__(self): 113# print "__del__" 114 self.io_close() 115 if self._o != None: 116 libxml2mod.xmlOutputBufferClose(self._o) 117 self._o = None 118 119 def flush(self): 120 self.io_flush() 121 if self._o != None: 122 libxml2mod.xmlOutputBufferClose(self._o) 123 self._o = None 124 125 def close(self): 126 self.io_flush() 127 if self._o != None: 128 libxml2mod.xmlOutputBufferClose(self._o) 129 self._o = None 130 131# 132# Example of a class to handle SAX events 133# 134class SAXCallback: 135 """Base class for SAX handlers""" 136 def startDocument(self): 137 """called at the start of the document""" 138 pass 139 140 def endDocument(self): 141 """called at the end of the document""" 142 pass 143 144 def startElement(self, tag, attrs): 145 """called at the start of every element, tag is the name of 146 the element, attrs is a dictionary of the element's attributes""" 147 pass 148 149 def endElement(self, tag): 150 """called at the start of every element, tag is the name of 151 the element""" 152 pass 153 154 def characters(self, data): 155 """called when character data have been read, data is the string 156 containing the data, multiple consecutive characters() callback 157 are possible.""" 158 pass 159 160 def cdataBlock(self, data): 161 """called when CDATA section have been read, data is the string 162 containing the data, multiple consecutive cdataBlock() callback 163 are possible.""" 164 pass 165 166 def reference(self, name): 167 """called when an entity reference has been found""" 168 pass 169 170 def ignorableWhitespace(self, data): 171 """called when potentially ignorable white spaces have been found""" 172 pass 173 174 def processingInstruction(self, target, data): 175 """called when a PI has been found, target contains the PI name and 176 data is the associated data in the PI""" 177 pass 178 179 def comment(self, content): 180 """called when a comment has been found, content contains the comment""" 181 pass 182 183 def externalSubset(self, name, externalID, systemID): 184 """called when a DOCTYPE declaration has been found, name is the 185 DTD name and externalID, systemID are the DTD public and system 186 identifier for that DTd if available""" 187 pass 188 189 def internalSubset(self, name, externalID, systemID): 190 """called when a DOCTYPE declaration has been found, name is the 191 DTD name and externalID, systemID are the DTD public and system 192 identifier for that DTD if available""" 193 pass 194 195 def entityDecl(self, name, type, externalID, systemID, content): 196 """called when an ENTITY declaration has been found, name is the 197 entity name and externalID, systemID are the entity public and 198 system identifier for that entity if available, type indicates 199 the entity type, and content reports it's string content""" 200 pass 201 202 def notationDecl(self, name, externalID, systemID): 203 """called when an NOTATION declaration has been found, name is the 204 notation name and externalID, systemID are the notation public and 205 system identifier for that notation if available""" 206 pass 207 208 def attributeDecl(self, elem, name, type, defi, defaultValue, nameList): 209 """called when an ATTRIBUTE definition has been found""" 210 pass 211 212 def elementDecl(self, name, type, content): 213 """called when an ELEMENT definition has been found""" 214 pass 215 216 def entityDecl(self, name, publicId, systemID, notationName): 217 """called when an unparsed ENTITY declaration has been found, 218 name is the entity name and publicId,, systemID are the entity 219 public and system identifier for that entity if available, 220 and notationName indicate the associated NOTATION""" 221 pass 222 223 def warning(self, msg): 224 #print msg 225 pass 226 227 def error(self, msg): 228 raise parserError(msg) 229 230 def fatalError(self, msg): 231 raise parserError(msg) 232 233# 234# This class is the ancestor of all the Node classes. It provides 235# the basic functionalities shared by all nodes (and handle 236# gracefylly the exception), like name, navigation in the tree, 237# doc reference, content access and serializing to a string or URI 238# 239class xmlCore: 240 def __init__(self, _obj=None): 241 if _obj != None: 242 self._o = _obj; 243 return 244 self._o = None 245 246 def __eq__(self, other): 247 if other == None: 248 return False 249 ret = libxml2mod.compareNodesEqual(self._o, other._o) 250 if ret == None: 251 return False 252 return ret == True 253 def __ne__(self, other): 254 if other == None: 255 return True 256 ret = libxml2mod.compareNodesEqual(self._o, other._o) 257 return not ret 258 def __hash__(self): 259 ret = libxml2mod.nodeHash(self._o) 260 return ret 261 262 def __str__(self): 263 return self.serialize() 264 def get_parent(self): 265 ret = libxml2mod.parent(self._o) 266 if ret == None: 267 return None 268 return xmlNode(_obj=ret) 269 def get_children(self): 270 ret = libxml2mod.children(self._o) 271 if ret == None: 272 return None 273 return xmlNode(_obj=ret) 274 def get_last(self): 275 ret = libxml2mod.last(self._o) 276 if ret == None: 277 return None 278 return xmlNode(_obj=ret) 279 def get_next(self): 280 ret = libxml2mod.next(self._o) 281 if ret == None: 282 return None 283 return xmlNode(_obj=ret) 284 def get_properties(self): 285 ret = libxml2mod.properties(self._o) 286 if ret == None: 287 return None 288 return xmlAttr(_obj=ret) 289 def get_prev(self): 290 ret = libxml2mod.prev(self._o) 291 if ret == None: 292 return None 293 return xmlNode(_obj=ret) 294 def get_content(self): 295 return libxml2mod.xmlNodeGetContent(self._o) 296 getContent = get_content # why is this duplicate naming needed ? 297 def get_name(self): 298 return libxml2mod.name(self._o) 299 def get_type(self): 300 return libxml2mod.type(self._o) 301 def get_doc(self): 302 ret = libxml2mod.doc(self._o) 303 if ret == None: 304 if self.type in ["document_xml", "document_html"]: 305 return xmlDoc(_obj=self._o) 306 else: 307 return None 308 return xmlDoc(_obj=ret) 309 # 310 # Those are common attributes to nearly all type of nodes 311 # defined as python2 properties 312 # 313 import sys 314 if float(sys.version[0:3]) < 2.2: 315 def __getattr__(self, attr): 316 if attr == "parent": 317 ret = libxml2mod.parent(self._o) 318 if ret == None: 319 return None 320 return xmlNode(_obj=ret) 321 elif attr == "properties": 322 ret = libxml2mod.properties(self._o) 323 if ret == None: 324 return None 325 return xmlAttr(_obj=ret) 326 elif attr == "children": 327 ret = libxml2mod.children(self._o) 328 if ret == None: 329 return None 330 return xmlNode(_obj=ret) 331 elif attr == "last": 332 ret = libxml2mod.last(self._o) 333 if ret == None: 334 return None 335 return xmlNode(_obj=ret) 336 elif attr == "next": 337 ret = libxml2mod.next(self._o) 338 if ret == None: 339 return None 340 return xmlNode(_obj=ret) 341 elif attr == "prev": 342 ret = libxml2mod.prev(self._o) 343 if ret == None: 344 return None 345 return xmlNode(_obj=ret) 346 elif attr == "content": 347 return libxml2mod.xmlNodeGetContent(self._o) 348 elif attr == "name": 349 return libxml2mod.name(self._o) 350 elif attr == "type": 351 return libxml2mod.type(self._o) 352 elif attr == "doc": 353 ret = libxml2mod.doc(self._o) 354 if ret == None: 355 if self.type == "document_xml" or self.type == "document_html": 356 return xmlDoc(_obj=self._o) 357 else: 358 return None 359 return xmlDoc(_obj=ret) 360 raise AttributeError,attr 361 else: 362 parent = property(get_parent, None, None, "Parent node") 363 children = property(get_children, None, None, "First child node") 364 last = property(get_last, None, None, "Last sibling node") 365 next = property(get_next, None, None, "Next sibling node") 366 prev = property(get_prev, None, None, "Previous sibling node") 367 properties = property(get_properties, None, None, "List of properies") 368 content = property(get_content, None, None, "Content of this node") 369 name = property(get_name, None, None, "Node name") 370 type = property(get_type, None, None, "Node type") 371 doc = property(get_doc, None, None, "The document this node belongs to") 372 373 # 374 # Serialization routines, the optional arguments have the following 375 # meaning: 376 # encoding: string to ask saving in a specific encoding 377 # indent: if 1 the serializer is asked to indent the output 378 # 379 def serialize(self, encoding = None, format = 0): 380 return libxml2mod.serializeNode(self._o, encoding, format) 381 def saveTo(self, file, encoding = None, format = 0): 382 return libxml2mod.saveNodeTo(self._o, file, encoding, format) 383 384 # 385 # Canonicalization routines: 386 # 387 # nodes: the node set (tuple or list) to be included in the 388 # canonized image or None if all document nodes should be 389 # included. 390 # exclusive: the exclusive flag (0 - non-exclusive 391 # canonicalization; otherwise - exclusive canonicalization) 392 # prefixes: the list of inclusive namespace prefixes (strings), 393 # or None if there is no inclusive namespaces (only for 394 # exclusive canonicalization, ignored otherwise) 395 # with_comments: include comments in the result (!=0) or not 396 # (==0) 397 def c14nMemory(self, 398 nodes=None, 399 exclusive=0, 400 prefixes=None, 401 with_comments=0): 402 if nodes: 403 nodes = map(lambda n: n._o, nodes) 404 return libxml2mod.xmlC14NDocDumpMemory( 405 self.get_doc()._o, 406 nodes, 407 exclusive != 0, 408 prefixes, 409 with_comments != 0) 410 def c14nSaveTo(self, 411 file, 412 nodes=None, 413 exclusive=0, 414 prefixes=None, 415 with_comments=0): 416 if nodes: 417 nodes = map(lambda n: n._o, nodes) 418 return libxml2mod.xmlC14NDocSaveTo( 419 self.get_doc()._o, 420 nodes, 421 exclusive != 0, 422 prefixes, 423 with_comments != 0, 424 file) 425 426 # 427 # Selecting nodes using XPath, a bit slow because the context 428 # is allocated/freed every time but convenient. 429 # 430 def xpathEval(self, expr): 431 doc = self.doc 432 if doc == None: 433 return None 434 ctxt = doc.xpathNewContext() 435 ctxt.setContextNode(self) 436 res = ctxt.xpathEval(expr) 437 ctxt.xpathFreeContext() 438 return res 439 440# # 441# # Selecting nodes using XPath, faster because the context 442# # is allocated just once per xmlDoc. 443# # 444# # Removed: DV memleaks c.f. #126735 445# # 446# def xpathEval2(self, expr): 447# doc = self.doc 448# if doc == None: 449# return None 450# try: 451# doc._ctxt.setContextNode(self) 452# except: 453# doc._ctxt = doc.xpathNewContext() 454# doc._ctxt.setContextNode(self) 455# res = doc._ctxt.xpathEval(expr) 456# return res 457 def xpathEval2(self, expr): 458 return self.xpathEval(expr) 459 460 # Remove namespaces 461 def removeNsDef(self, href): 462 """ 463 Remove a namespace definition from a node. If href is None, 464 remove all of the ns definitions on that node. The removed 465 namespaces are returned as a linked list. 466 467 Note: If any child nodes referred to the removed namespaces, 468 they will be left with dangling links. You should call 469 renconciliateNs() to fix those pointers. 470 471 Note: This method does not free memory taken by the ns 472 definitions. You will need to free it manually with the 473 freeNsList() method on the returns xmlNs object. 474 """ 475 476 ret = libxml2mod.xmlNodeRemoveNsDef(self._o, href) 477 if ret is None:return None 478 __tmp = xmlNs(_obj=ret) 479 return __tmp 480 481 # support for python2 iterators 482 def walk_depth_first(self): 483 return xmlCoreDepthFirstItertor(self) 484 def walk_breadth_first(self): 485 return xmlCoreBreadthFirstItertor(self) 486 __iter__ = walk_depth_first 487 488 def free(self): 489 try: 490 self.doc._ctxt.xpathFreeContext() 491 except: 492 pass 493 libxml2mod.xmlFreeDoc(self._o) 494 495 496# 497# implements the depth-first iterator for libxml2 DOM tree 498# 499class xmlCoreDepthFirstItertor: 500 def __init__(self, node): 501 self.node = node 502 self.parents = [] 503 def __iter__(self): 504 return self 505 def next(self): 506 while 1: 507 if self.node: 508 ret = self.node 509 self.parents.append(self.node) 510 self.node = self.node.children 511 return ret 512 try: 513 parent = self.parents.pop() 514 except IndexError: 515 raise StopIteration 516 self.node = parent.next 517 518# 519# implements the breadth-first iterator for libxml2 DOM tree 520# 521class xmlCoreBreadthFirstItertor: 522 def __init__(self, node): 523 self.node = node 524 self.parents = [] 525 def __iter__(self): 526 return self 527 def next(self): 528 while 1: 529 if self.node: 530 ret = self.node 531 self.parents.append(self.node) 532 self.node = self.node.next 533 return ret 534 try: 535 parent = self.parents.pop() 536 except IndexError: 537 raise StopIteration 538 self.node = parent.children 539 540# 541# converters to present a nicer view of the XPath returns 542# 543def nodeWrap(o): 544 # TODO try to cast to the most appropriate node class 545 name = libxml2mod.type(o) 546 if name == "element" or name == "text": 547 return xmlNode(_obj=o) 548 if name == "attribute": 549 return xmlAttr(_obj=o) 550 if name[0:8] == "document": 551 return xmlDoc(_obj=o) 552 if name == "namespace": 553 return xmlNs(_obj=o) 554 if name == "elem_decl": 555 return xmlElement(_obj=o) 556 if name == "attribute_decl": 557 return xmlAttribute(_obj=o) 558 if name == "entity_decl": 559 return xmlEntity(_obj=o) 560 if name == "dtd": 561 return xmlDtd(_obj=o) 562 return xmlNode(_obj=o) 563 564def xpathObjectRet(o): 565 otype = type(o) 566 if otype == type([]): 567 ret = map(xpathObjectRet, o) 568 return ret 569 elif otype == type(()): 570 ret = map(xpathObjectRet, o) 571 return tuple(ret) 572 elif otype == type('') or otype == type(0) or otype == type(0.0): 573 return o 574 else: 575 return nodeWrap(o) 576 577# 578# register an XPath function 579# 580def registerXPathFunction(ctxt, name, ns_uri, f): 581 ret = libxml2mod.xmlRegisterXPathFunction(ctxt, name, ns_uri, f) 582 583# 584# For the xmlTextReader parser configuration 585# 586PARSER_LOADDTD=1 587PARSER_DEFAULTATTRS=2 588PARSER_VALIDATE=3 589PARSER_SUBST_ENTITIES=4 590 591# 592# For the error callback severities 593# 594PARSER_SEVERITY_VALIDITY_WARNING=1 595PARSER_SEVERITY_VALIDITY_ERROR=2 596PARSER_SEVERITY_WARNING=3 597PARSER_SEVERITY_ERROR=4 598 599# 600# register the libxml2 error handler 601# 602def registerErrorHandler(f, ctx): 603 """Register a Python written function to for error reporting. 604 The function is called back as f(ctx, error). """ 605 import sys 606 if not sys.modules.has_key('libxslt'): 607 # normal behaviour when libxslt is not imported 608 ret = libxml2mod.xmlRegisterErrorHandler(f,ctx) 609 else: 610 # when libxslt is already imported, one must 611 # use libxst's error handler instead 612 import libxslt 613 ret = libxslt.registerErrorHandler(f,ctx) 614 return ret 615 616class parserCtxtCore: 617 618 def __init__(self, _obj=None): 619 if _obj != None: 620 self._o = _obj; 621 return 622 self._o = None 623 624 def __del__(self): 625 if self._o != None: 626 libxml2mod.xmlFreeParserCtxt(self._o) 627 self._o = None 628 629 def setErrorHandler(self,f,arg): 630 """Register an error handler that will be called back as 631 f(arg,msg,severity,reserved). 632 633 @reserved is currently always None.""" 634 libxml2mod.xmlParserCtxtSetErrorHandler(self._o,f,arg) 635 636 def getErrorHandler(self): 637 """Return (f,arg) as previously registered with setErrorHandler 638 or (None,None).""" 639 return libxml2mod.xmlParserCtxtGetErrorHandler(self._o) 640 641 def addLocalCatalog(self, uri): 642 """Register a local catalog with the parser""" 643 return libxml2mod.addLocalCatalog(self._o, uri) 644 645 646class ValidCtxtCore: 647 648 def __init__(self, *args, **kw): 649 pass 650 651 def setValidityErrorHandler(self, err_func, warn_func, arg=None): 652 """ 653 Register error and warning handlers for DTD validation. 654 These will be called back as f(msg,arg) 655 """ 656 libxml2mod.xmlSetValidErrors(self._o, err_func, warn_func, arg) 657 658 659class SchemaValidCtxtCore: 660 661 def __init__(self, *args, **kw): 662 pass 663 664 def setValidityErrorHandler(self, err_func, warn_func, arg=None): 665 """ 666 Register error and warning handlers for Schema validation. 667 These will be called back as f(msg,arg) 668 """ 669 libxml2mod.xmlSchemaSetValidErrors(self._o, err_func, warn_func, arg) 670 671 672class relaxNgValidCtxtCore: 673 674 def __init__(self, *args, **kw): 675 pass 676 677 def setValidityErrorHandler(self, err_func, warn_func, arg=None): 678 """ 679 Register error and warning handlers for RelaxNG validation. 680 These will be called back as f(msg,arg) 681 """ 682 libxml2mod.xmlRelaxNGSetValidErrors(self._o, err_func, warn_func, arg) 683 684 685def _xmlTextReaderErrorFunc((f,arg),msg,severity,locator): 686 """Intermediate callback to wrap the locator""" 687 return f(arg,msg,severity,xmlTextReaderLocator(locator)) 688 689class xmlTextReaderCore: 690 691 def __init__(self, _obj=None): 692 self.input = None 693 if _obj != None:self._o = _obj;return 694 self._o = None 695 696 def __del__(self): 697 if self._o != None: 698 libxml2mod.xmlFreeTextReader(self._o) 699 self._o = None 700 701 def SetErrorHandler(self,f,arg): 702 """Register an error handler that will be called back as 703 f(arg,msg,severity,locator).""" 704 if f is None: 705 libxml2mod.xmlTextReaderSetErrorHandler(\ 706 self._o,None,None) 707 else: 708 libxml2mod.xmlTextReaderSetErrorHandler(\ 709 self._o,_xmlTextReaderErrorFunc,(f,arg)) 710 711 def GetErrorHandler(self): 712 """Return (f,arg) as previously registered with setErrorHandler 713 or (None,None).""" 714 f,arg = libxml2mod.xmlTextReaderGetErrorHandler(self._o) 715 if f is None: 716 return None,None 717 else: 718 # assert f is _xmlTextReaderErrorFunc 719 return arg 720 721# 722# The cleanup now goes though a wrappe in libxml.c 723# 724def cleanupParser(): 725 libxml2mod.xmlPythonCleanupParser() 726 727# WARNING WARNING WARNING WARNING WARNING WARNING WARNING WARNING WARNING 728# 729# Everything before this line comes from libxml.py 730# Everything after this line is automatically generated 731# 732# WARNING WARNING WARNING WARNING WARNING WARNING WARNING WARNING WARNING 733 734