1# $Id: Node.pm,v 1.13 2002/12/26 17:24:50 matt Exp $ 2 3package XML::XPath::Node; 4 5use strict; 6use vars qw(@ISA @EXPORT $AUTOLOAD %EXPORT_TAGS @EXPORT_OK); 7use Exporter; 8use Carp; 9@ISA = ('Exporter'); 10 11sub UNKNOWN_NODE () {0;} 12sub ELEMENT_NODE () {1;} 13sub ATTRIBUTE_NODE () {2;} 14sub TEXT_NODE () {3;} 15sub CDATA_SECTION_NODE () {4;} 16sub ENTITY_REFERENCE_NODE () {5;} 17sub ENTITY_NODE () {6;} 18sub PROCESSING_INSTRUCTION_NODE () {7;} 19sub COMMENT_NODE () {8;} 20sub DOCUMENT_NODE () {9;} 21sub DOCUMENT_TYPE_NODE () {10;} 22sub DOCUMENT_FRAGMENT_NODE () {11;} 23sub NOTATION_NODE () {12;} 24 25# Non core DOM stuff 26sub ELEMENT_DECL_NODE () {13;} 27sub ATT_DEF_NODE () {14;} 28sub XML_DECL_NODE () {15;} 29sub ATTLIST_DECL_NODE () {16;} 30sub NAMESPACE_NODE () {17;} 31 32# per-node constants 33 34# All 35sub node_parent () { 0; } 36sub node_pos () { 1; } 37sub node_global_pos () { 2; } 38 39# Element 40sub node_prefix () { 3; } 41sub node_children () { 4; } 42sub node_name () { 5; } 43sub node_attribs () { 6; } 44sub node_namespaces () { 7; } 45sub node_ids () { 8; } 46 47# Char 48sub node_text () { 3; } 49 50# PI 51sub node_target () { 3; } 52sub node_data () { 4; } 53 54# Comment 55sub node_comment () { 3; } 56 57# Attribute 58# sub node_prefix () { 3; } 59sub node_key () { 4; } 60sub node_value () { 5; } 61 62# Namespaces 63# sub node_prefix () { 3; } 64sub node_expanded () { 4; } 65 66@EXPORT = qw( 67 UNKNOWN_NODE 68 ELEMENT_NODE 69 ATTRIBUTE_NODE 70 TEXT_NODE 71 CDATA_SECTION_NODE 72 ENTITY_REFERENCE_NODE 73 ENTITY_NODE 74 PROCESSING_INSTRUCTION_NODE 75 COMMENT_NODE 76 DOCUMENT_NODE 77 DOCUMENT_TYPE_NODE 78 DOCUMENT_FRAGMENT_NODE 79 NOTATION_NODE 80 ELEMENT_DECL_NODE 81 ATT_DEF_NODE 82 XML_DECL_NODE 83 ATTLIST_DECL_NODE 84 NAMESPACE_NODE 85 ); 86 87@EXPORT_OK = qw( 88 node_parent 89 node_pos 90 node_global_pos 91 node_prefix 92 node_children 93 node_name 94 node_attribs 95 node_namespaces 96 node_text 97 node_target 98 node_data 99 node_comment 100 node_key 101 node_value 102 node_expanded 103 node_ids 104 ); 105 106%EXPORT_TAGS = ( 107 'node_keys' => [ 108 qw( 109 node_parent 110 node_pos 111 node_global_pos 112 node_prefix 113 node_children 114 node_name 115 node_attribs 116 node_namespaces 117 node_text 118 node_target 119 node_data 120 node_comment 121 node_key 122 node_value 123 node_expanded 124 node_ids 125 ), @EXPORT, 126 ], 127); 128 129 130my $global_pos = 0; 131 132sub nextPos { 133 my $class = shift; 134 return $global_pos += 5; 135} 136 137sub resetPos { 138 $global_pos = 0; 139} 140 141my %DecodeDefaultEntity = 142( 143 '"' => """, 144 ">" => ">", 145 "<" => "<", 146 "'" => "'", 147 "&" => "&" 148); 149 150sub XMLescape { 151 my ($str, $default) = @_; 152 return undef unless defined $str; 153 $default ||= ''; 154 155 if ($XML::XPath::EncodeUtf8AsEntity) { 156 $str =~ s/([\xC0-\xDF].|[\xE0-\xEF]..|[\xF0-\xFF]...)|([$default])|(]]>)/ 157 defined($1) ? XmlUtf8Decode ($1) : 158 defined ($2) ? $DecodeDefaultEntity{$2} : "]]>" /egsx; 159 } 160 else { 161 $str =~ s/([$default])|(]]>)/ 162 defined ($1) ? $DecodeDefaultEntity{$1} : ']]>' /gsex; 163 } 164 165#?? could there be references that should not be expanded? 166# e.g. should not replace &#nn; ¯ and &abc; 167# $str =~ s/&(?!($ReName|#[0-9]+|#x[0-9a-fA-F]+);)/&/go; 168 169 $str; 170} 171 172# 173# Opposite of XmlUtf8Decode plus it adds prefix "&#" or "&#x" and suffix ";" 174# The 2nd parameter ($hex) indicates whether the result is hex encoded or not. 175# 176sub XmlUtf8Decode 177{ 178 my ($str, $hex) = @_; 179 my $len = length ($str); 180 my $n; 181 182 if ($len == 2) { 183 my @n = unpack "C2", $str; 184 $n = (($n[0] & 0x3f) << 6) + ($n[1] & 0x3f); 185 } 186 elsif ($len == 3) { 187 my @n = unpack "C3", $str; 188 $n = (($n[0] & 0x1f) << 12) + (($n[1] & 0x3f) << 6) + 189 ($n[2] & 0x3f); 190 } 191 elsif ($len == 4) { 192 my @n = unpack "C4", $str; 193 $n = (($n[0] & 0x0f) << 18) + (($n[1] & 0x3f) << 12) + 194 (($n[2] & 0x3f) << 6) + ($n[3] & 0x3f); 195 } 196 elsif ($len == 1) { # just to be complete... 197 $n = ord ($str); 198 } 199 else { 200 die "bad value [$str] for XmlUtf8Decode"; 201 } 202 $hex ? sprintf ("&#x%x;", $n) : "&#$n;"; 203} 204 205sub new { 206 my $class = shift; 207 no strict 'refs'; 208 my $impl = $class . "Impl"; 209 my $this = $impl->new(@_); 210 if ($XML::XPath::SafeMode) { 211 return $this; 212 } 213 my $self = \$this; 214 return bless $self, $class; 215} 216 217sub AUTOLOAD { 218 my $method = $AUTOLOAD; 219 $method =~ s/.*:://; 220# warn "AUTOLOAD $method!\n"; 221 no strict 'refs'; 222 *{$AUTOLOAD} = sub { 223 my $self = shift; 224 my $olderror = $@; # store previous exceptions 225 my $obj = eval { $$self }; 226 if ($@) { 227 if ($@ =~ /Not a SCALAR reference/) { 228 croak("No such method $method in " . ref($self)); 229 } 230 croak $@; 231 } 232 if ($obj) { 233 # make sure $@ propogates if this method call was the result 234 # of losing scope because of a die(). 235 if ($method =~ /^(DESTROY|del_parent_link)$/) { 236 $obj->$method(@_); 237 $@ = $olderror if $olderror; 238 return; 239 } 240 return $obj->$method(@_); 241 } 242 }; 243 goto &$AUTOLOAD; 244} 245 246package XML::XPath::NodeImpl; 247 248use vars qw/@ISA $AUTOLOAD/; 249@ISA = ('XML::XPath::Node'); 250 251sub new { 252 die "Virtual base method"; 253} 254 255sub getNodeType { 256 my $self = shift; 257 return XML::XPath::Node::UNKNOWN_NODE; 258} 259 260sub isElementNode {} 261sub isAttributeNode {} 262sub isNamespaceNode {} 263sub isTextNode {} 264sub isProcessingInstructionNode {} 265sub isPINode {} 266sub isCommentNode {} 267 268sub getNodeValue { 269 return; 270} 271 272sub getValue { 273 shift->getNodeValue(@_); 274} 275 276sub setNodeValue { 277 return; 278} 279 280sub setValue { 281 shift->setNodeValue(@_); 282} 283 284sub getParentNode { 285 my $self = shift; 286 return $self->[XML::XPath::Node::node_parent]; 287} 288 289sub getRootNode { 290 my $self = shift; 291 while (my $parent = $self->getParentNode) { 292 $self = $parent; 293 } 294 return $self; 295} 296 297sub getElementById { 298 my $self = shift; 299 my ($id) = @_; 300# warn "getElementById: $id\n"; 301 my $root = $self->getRootNode; 302 my $node = $root->[XML::XPath::Node::node_ids]{$id}; 303# warn "returning node: ", $node->getName, "\n"; 304 return $node; 305} 306 307sub getName { } 308sub getData { } 309 310sub getChildNodes { 311 return wantarray ? () : []; 312} 313 314sub getChildNode { 315 return; 316} 317 318sub getAttribute { 319 return; 320} 321 322sub getAttributes { 323 return wantarray ? () : []; 324} 325 326sub getAttributeNodes { 327 shift->getAttributes(@_); 328} 329 330sub getNamespaceNodes { 331 return wantarray ? () : []; 332} 333 334sub getNamespace { 335 return; 336} 337 338sub getLocalName { 339 return; 340} 341 342sub string_value { return; } 343 344sub get_pos { 345 my $self = shift; 346 return $self->[XML::XPath::Node::node_pos]; 347} 348 349sub set_pos { 350 my $self = shift; 351 $self->[XML::XPath::Node::node_pos] = shift; 352} 353 354sub get_global_pos { 355 my $self = shift; 356 return $self->[XML::XPath::Node::node_global_pos]; 357} 358 359sub set_global_pos { 360 my $self = shift; 361 $self->[XML::XPath::Node::node_global_pos] = shift; 362} 363 364sub renumber { 365 my $self = shift; 366 my $search = shift; 367 my $diff = shift; 368 369 foreach my $node ($self->findnodes($search)) { 370 $node->set_global_pos( 371 $node->get_global_pos + $diff 372 ); 373 } 374} 375 376sub insertAfter { 377 my $self = shift; 378 my $newnode = shift; 379 my $posnode = shift; 380 381 my $pos_number = eval { $posnode->[XML::XPath::Node::node_children][-1]->get_global_pos() + 1; }; 382 if (!defined $pos_number) { 383 $pos_number = $posnode->get_global_pos() + 1; 384 } 385 386 eval { 387 if ($pos_number == 388 $posnode->findnodes( 389 'following::node()' 390 )->get_node(1)->get_global_pos()) { 391 $posnode->renumber('following::node()', +5); 392 } 393 }; 394 395 my $pos = $posnode->get_pos; 396 397 $newnode->setParentNode($self); 398 splice @{$self->[XML::XPath::Node::node_children]}, $pos + 1, 0, $newnode; 399 400 for (my $i = $pos + 1; $i < @{$self->[XML::XPath::Node::node_children]}; $i++) { 401 $self->[XML::XPath::Node::node_children][$i]->set_pos($i); 402 } 403 404 $newnode->set_global_pos($pos_number); 405} 406 407sub insertBefore { 408 my $self = shift; 409 my $newnode = shift; 410 my $posnode = shift; 411 412 my $pos_number = ($posnode->getPreviousSibling() || $posnode->getParentNode)->get_global_pos(); 413 if ($pos_number == $posnode->get_global_pos()) { 414 $posnode->renumber('self::node() | descendant::node() | following::node()', +5); 415 } 416 417 my $pos = $posnode->get_pos; 418 419 $newnode->setParentNode($self); 420 splice @{$self->[XML::XPath::Node::node_children]}, $pos, 0, $newnode; 421 422 for (my $i = $pos; $i < @{$self->[XML::XPath::Node::node_children]}; $i++) { 423 $self->[XML::XPath::Node::node_children][$i]->set_pos($i); 424 } 425 426 $newnode->set_global_pos($pos_number); 427} 428 429sub getPreviousSibling { 430 my $self = shift; 431 my $pos = $self->[XML::XPath::Node::node_pos]; 432 return unless $self->[XML::XPath::Node::node_parent]; 433 return $self->[XML::XPath::Node::node_parent]->getChildNode($pos); 434} 435 436sub getNextSibling { 437 my $self = shift; 438 my $pos = $self->[XML::XPath::Node::node_pos]; 439 return unless $self->[XML::XPath::Node::node_parent]; 440 return $self->[XML::XPath::Node::node_parent]->getChildNode($pos + 2); 441} 442 443sub setParentNode { 444 my $self = shift; 445 my $parent = shift; 446# warn "SetParent of ", ref($self), " to ", $parent->[XML::XPath::Node::node_name], "\n"; 447 $self->[XML::XPath::Node::node_parent] = $parent; 448} 449 450sub del_parent_link { 451 my $self = shift; 452 $self->[XML::XPath::Node::node_parent] = undef; 453} 454 455sub dispose { 456 my $self = shift; 457 foreach my $kid ($self->getChildNodes) { 458 $kid->dispose; 459 } 460 foreach my $kid ($self->getAttributeNodes) { 461 $kid->dispose; 462 } 463 foreach my $kid ($self->getNamespaceNodes) { 464 $kid->dispose; 465 } 466 $self->[XML::XPath::Node::node_parent] = undef; 467} 468 469sub to_number { 470 my $num = shift->string_value; 471 return XML::XPath::Number->new($num); 472} 473 474sub find { 475 my $node = shift; 476 my ($path) = @_; 477 my $xp = XML::XPath->new(); # new is v. lightweight 478 return $xp->find($path, $node); 479} 480 481sub findvalue { 482 my $node = shift; 483 my ($path) = @_; 484 my $xp = XML::XPath->new(); 485 return $xp->findvalue($path, $node); 486} 487 488sub findnodes { 489 my $node = shift; 490 my ($path) = @_; 491 my $xp = XML::XPath->new(); 492 return $xp->findnodes($path, $node); 493} 494 495sub matches { 496 my $node = shift; 497 my ($path, $context) = @_; 498 my $xp = XML::XPath->new(); 499 return $xp->matches($node, $path, $context); 500} 501 502sub to_sax { 503 my $self = shift; 504 unshift @_, 'Handler' if @_ == 1; 505 my %handlers = @_; 506 507 my $doch = $handlers{DocumentHandler} || $handlers{Handler}; 508 my $dtdh = $handlers{DTDHandler} || $handlers{Handler}; 509 my $enth = $handlers{EntityResolver} || $handlers{Handler}; 510 511 $self->_to_sax ($doch, $dtdh, $enth); 512} 513 514sub DESTROY {} 515 516use Carp; 517 518sub _to_sax { 519 carp "_to_sax not implemented in ", ref($_[0]); 520} 521 5221; 523__END__ 524 525=head1 NAME 526 527XML::XPath::Node - internal representation of a node 528 529=head1 API 530 531The Node API aims to emulate DOM to some extent, however the API 532isn't quite compatible with DOM. This is to ease transition from 533XML::DOM programming to XML::XPath. Compatibility with DOM may 534arise once XML::DOM gets namespace support. 535 536=head2 new 537 538Creates a new node. See the sub-classes for parameters to pass to new(). 539 540=head2 getNodeType 541 542Returns one of ELEMENT_NODE, TEXT_NODE, COMMENT_NODE, ATTRIBUTE_NODE, 543PROCESSING_INSTRUCTION_NODE or NAMESPACE_NODE. UNKNOWN_NODE is returned 544if the sub-class doesn't implement getNodeType - but that means 545something is broken! The constants are exported by default from 546XML::XPath::Node. The constants have the same numeric value as the 547XML::DOM versions. 548 549=head2 getParentNode 550 551Returns the parent of this node, or undef if this is the root node. Note 552that the root node is the root node in terms of XPath - not the root 553element node. 554 555=head2 to_sax ( $handler | %handlers ) 556 557Generates sax calls to the handler or handlers. See the PerlSAX docs for 558details (not yet implemented correctly). 559 560=head1 MORE INFO 561 562See the sub-classes for the meaning of the rest of the API: 563 564=over 4 565 566=item * 567 568L<XML::XPath::Node::Element> 569 570=item * 571 572L<XML::XPath::Node::Attribute> 573 574=item * 575 576L<XML::XPath::Node::Namespace> 577 578=item * 579 580L<XML::XPath::Node::Text> 581 582=item * 583 584L<XML::XPath::Node::Comment> 585 586=item * 587 588L<XML::XPath::Node::PI> 589 590=back 591 592=cut 593