1package XML::Parser::Expat; 2 3require 5.004; 4 5use strict; 6use vars qw($VERSION @ISA %Handler_Setters %Encoding_Table @Encoding_Path 7 $have_File_Spec); 8use Carp; 9 10require DynaLoader; 11 12@ISA = qw(DynaLoader); 13$VERSION = "2.34" ; 14 15$have_File_Spec = $INC{'File/Spec.pm'} || do 'File/Spec.pm'; 16 17%Encoding_Table = (); 18if ($have_File_Spec) { 19 @Encoding_Path = (grep(-d $_, 20 map(File::Spec->catdir($_, qw(XML Parser Encodings)), 21 @INC)), 22 File::Spec->curdir); 23} 24else { 25 @Encoding_Path = (grep(-d $_, map($_ . '/XML/Parser/Encodings', @INC)), '.'); 26} 27 28 29bootstrap XML::Parser::Expat $VERSION; 30 31%Handler_Setters = ( 32 Start => \&SetStartElementHandler, 33 End => \&SetEndElementHandler, 34 Char => \&SetCharacterDataHandler, 35 Proc => \&SetProcessingInstructionHandler, 36 Comment => \&SetCommentHandler, 37 CdataStart => \&SetStartCdataHandler, 38 CdataEnd => \&SetEndCdataHandler, 39 Default => \&SetDefaultHandler, 40 Unparsed => \&SetUnparsedEntityDeclHandler, 41 Notation => \&SetNotationDeclHandler, 42 ExternEnt => \&SetExternalEntityRefHandler, 43 ExternEntFin => \&SetExtEntFinishHandler, 44 Entity => \&SetEntityDeclHandler, 45 Element => \&SetElementDeclHandler, 46 Attlist => \&SetAttListDeclHandler, 47 Doctype => \&SetDoctypeHandler, 48 DoctypeFin => \&SetEndDoctypeHandler, 49 XMLDecl => \&SetXMLDeclHandler 50 ); 51 52sub new { 53 my ($class, %args) = @_; 54 my $self = bless \%args, $_[0]; 55 $args{_State_} = 0; 56 $args{Context} = []; 57 $args{Namespaces} ||= 0; 58 $args{ErrorMessage} ||= ''; 59 if ($args{Namespaces}) { 60 $args{Namespace_Table} = {}; 61 $args{Namespace_List} = [undef]; 62 $args{Prefix_Table} = {}; 63 $args{New_Prefixes} = []; 64 } 65 $args{_Setters} = \%Handler_Setters; 66 $args{Parser} = ParserCreate($self, $args{ProtocolEncoding}, 67 $args{Namespaces}); 68 $self; 69} 70 71sub load_encoding { 72 my ($file) = @_; 73 74 $file =~ s!([^/]+)$!\L$1\E!; 75 $file .= '.enc' unless $file =~ /\.enc$/; 76 unless ($file =~ m!^/!) { 77 foreach (@Encoding_Path) { 78 my $tmp = ($have_File_Spec 79 ? File::Spec->catfile($_, $file) 80 : "$_/$file"); 81 if (-e $tmp) { 82 $file = $tmp; 83 last; 84 } 85 } 86 } 87 88 local(*ENC); 89 open(ENC, $file) or croak("Couldn't open encmap $file:\n$!\n"); 90 binmode(ENC); 91 my $data; 92 my $br = sysread(ENC, $data, -s $file); 93 croak("Trouble reading $file:\n$!\n") 94 unless defined($br); 95 close(ENC); 96 97 my $name = LoadEncoding($data, $br); 98 croak("$file isn't an encmap file") 99 unless defined($name); 100 101 $name; 102} # End load_encoding 103 104sub setHandlers { 105 my ($self, @handler_pairs) = @_; 106 107 croak("Uneven number of arguments to setHandlers method") 108 if (int(@handler_pairs) & 1); 109 110 my @ret; 111 112 while (@handler_pairs) { 113 my $type = shift @handler_pairs; 114 my $handler = shift @handler_pairs; 115 croak "Handler for $type not a Code ref" 116 unless (! defined($handler) or ! $handler or ref($handler) eq 'CODE'); 117 118 my $hndl = $self->{_Setters}->{$type}; 119 120 unless (defined($hndl)) { 121 my @types = sort keys %{$self->{_Setters}}; 122 croak("Unknown Expat handler type: $type\n Valid types: @types"); 123 } 124 125 my $old = &$hndl($self->{Parser}, $handler); 126 push (@ret, $type, $old); 127 } 128 129 return @ret; 130} 131 132sub xpcroak 133 { 134 my ($self, $message) = @_; 135 136 my $eclines = $self->{ErrorContext}; 137 my $line = GetCurrentLineNumber($_[0]->{Parser}); 138 $message .= " at line $line"; 139 $message .= ":\n" . $self->position_in_context($eclines) 140 if defined($eclines); 141 croak $message; 142} 143 144sub xpcarp { 145 my ($self, $message) = @_; 146 147 my $eclines = $self->{ErrorContext}; 148 my $line = GetCurrentLineNumber($_[0]->{Parser}); 149 $message .= " at line $line"; 150 $message .= ":\n" . $self->position_in_context($eclines) 151 if defined($eclines); 152 carp $message; 153} 154 155sub default_current { 156 my $self = shift; 157 if ($self->{_State_} == 1) { 158 return DefaultCurrent($self->{Parser}); 159 } 160} 161 162sub recognized_string { 163 my $self = shift; 164 if ($self->{_State_} == 1) { 165 return RecognizedString($self->{Parser}); 166 } 167} 168 169sub original_string { 170 my $self = shift; 171 if ($self->{_State_} == 1) { 172 return OriginalString($self->{Parser}); 173 } 174} 175 176sub current_line { 177 my $self = shift; 178 if ($self->{_State_} == 1) { 179 return GetCurrentLineNumber($self->{Parser}); 180 } 181} 182 183sub current_column { 184 my $self = shift; 185 if ($self->{_State_} == 1) { 186 return GetCurrentColumnNumber($self->{Parser}); 187 } 188} 189 190sub current_byte { 191 my $self = shift; 192 if ($self->{_State_} == 1) { 193 return GetCurrentByteIndex($self->{Parser}); 194 } 195} 196 197sub base { 198 my ($self, $newbase) = @_; 199 my $p = $self->{Parser}; 200 my $oldbase = GetBase($p); 201 SetBase($p, $newbase) if @_ > 1; 202 return $oldbase; 203} 204 205sub context { 206 my $ctx = $_[0]->{Context}; 207 @$ctx; 208} 209 210sub current_element { 211 my ($self) = @_; 212 @{$self->{Context}} ? $self->{Context}->[-1] : undef; 213} 214 215sub in_element { 216 my ($self, $element) = @_; 217 @{$self->{Context}} ? $self->eq_name($self->{Context}->[-1], $element) 218 : undef; 219} 220 221sub within_element { 222 my ($self, $element) = @_; 223 my $cnt = 0; 224 foreach (@{$self->{Context}}) { 225 $cnt++ if $self->eq_name($_, $element); 226 } 227 return $cnt; 228} 229 230sub depth { 231 my ($self) = @_; 232 int(@{$self->{Context}}); 233} 234 235sub element_index { 236 my ($self) = @_; 237 238 if ($self->{_State_} == 1) { 239 return ElementIndex($self->{Parser}); 240 } 241} 242 243################ 244# Namespace methods 245 246sub namespace { 247 my ($self, $name) = @_; 248 local($^W) = 0; 249 $self->{Namespace_List}->[int($name)]; 250} 251 252sub eq_name { 253 my ($self, $nm1, $nm2) = @_; 254 local($^W) = 0; 255 256 int($nm1) == int($nm2) and $nm1 eq $nm2; 257} 258 259sub generate_ns_name { 260 my ($self, $name, $namespace) = @_; 261 262 $namespace ? 263 GenerateNSName($name, $namespace, $self->{Namespace_Table}, 264 $self->{Namespace_List}) 265 : $name; 266} 267 268sub new_ns_prefixes { 269 my ($self) = @_; 270 if ($self->{Namespaces}) { 271 return @{$self->{New_Prefixes}}; 272 } 273 return (); 274} 275 276sub expand_ns_prefix { 277 my ($self, $prefix) = @_; 278 279 if ($self->{Namespaces}) { 280 my $stack = $self->{Prefix_Table}->{$prefix}; 281 return (defined($stack) and @$stack) ? $stack->[-1] : undef; 282 } 283 284 return undef; 285} 286 287sub current_ns_prefixes { 288 my ($self) = @_; 289 290 if ($self->{Namespaces}) { 291 my %set = %{$self->{Prefix_Table}}; 292 293 if (exists $set{'#default'} and not defined($set{'#default'}->[-1])) { 294 delete $set{'#default'}; 295 } 296 297 return keys %set; 298 } 299 300 return (); 301} 302 303 304################################################################ 305# Namespace declaration handlers 306# 307 308sub NamespaceStart { 309 my ($self, $prefix, $uri) = @_; 310 311 $prefix = '#default' unless defined $prefix; 312 my $stack = $self->{Prefix_Table}->{$prefix}; 313 314 if (defined $stack) { 315 push(@$stack, $uri); 316 } 317 else { 318 $self->{Prefix_Table}->{$prefix} = [$uri]; 319 } 320 321 # The New_Prefixes list gets emptied at end of startElement function 322 # in Expat.xs 323 324 push(@{$self->{New_Prefixes}}, $prefix); 325} 326 327sub NamespaceEnd { 328 my ($self, $prefix) = @_; 329 330 $prefix = '#default' unless defined $prefix; 331 332 my $stack = $self->{Prefix_Table}->{$prefix}; 333 if (@$stack > 1) { 334 pop(@$stack); 335 } 336 else { 337 delete $self->{Prefix_Table}->{$prefix}; 338 } 339} 340 341################ 342 343sub specified_attr { 344 my $self = shift; 345 346 if ($self->{_State_} == 1) { 347 return GetSpecifiedAttributeCount($self->{Parser}); 348 } 349} 350 351sub finish { 352 my ($self) = @_; 353 if ($self->{_State_} == 1) { 354 my $parser = $self->{Parser}; 355 UnsetAllHandlers($parser); 356 } 357} 358 359sub position_in_context { 360 my ($self, $lines) = @_; 361 if ($self->{_State_} == 1) { 362 my $parser = $self->{Parser}; 363 my ($string, $linepos) = PositionContext($parser, $lines); 364 365 return '' unless defined($string); 366 367 my $col = GetCurrentColumnNumber($parser); 368 my $ptr = ('=' x ($col - 1)) . '^' . "\n"; 369 my $ret; 370 my $dosplit = $linepos < length($string); 371 372 $string .= "\n" unless $string =~ /\n$/; 373 374 if ($dosplit) { 375 $ret = substr($string, 0, $linepos) . $ptr 376 . substr($string, $linepos); 377 } else { 378 $ret = $string . $ptr; 379 } 380 381 return $ret; 382 } 383} 384 385sub xml_escape { 386 my $self = shift; 387 my $text = shift; 388 389 study $text; 390 $text =~ s/\&/\&/g; 391 $text =~ s/</\</g; 392 foreach (@_) { 393 croak "xml_escape: '$_' isn't a single character" if length($_) > 1; 394 395 if ($_ eq '>') { 396 $text =~ s/>/\>/g; 397 } 398 elsif ($_ eq '"') { 399 $text =~ s/\"/\"/; 400 } 401 elsif ($_ eq "'") { 402 $text =~ s/\'/\'/; 403 } 404 else { 405 my $rep = '&#' . sprintf('x%X', ord($_)) . ';'; 406 if (/\W/) { 407 my $ptrn = "\\$_"; 408 $text =~ s/$ptrn/$rep/g; 409 } 410 else { 411 $text =~ s/$_/$rep/g; 412 } 413 } 414 } 415 $text; 416} 417 418sub skip_until { 419 my $self = shift; 420 if ($self->{_State_} <= 1) { 421 SkipUntil($self->{Parser}, $_[0]); 422 } 423} 424 425sub release { 426 my $self = shift; 427 ParserRelease($self->{Parser}); 428} 429 430sub DESTROY { 431 my $self = shift; 432 ParserFree($self->{Parser}); 433} 434 435sub parse { 436 my $self = shift; 437 my $arg = shift; 438 croak "Parse already in progress (Expat)" if $self->{_State_}; 439 $self->{_State_} = 1; 440 my $parser = $self->{Parser}; 441 my $ioref; 442 my $result = 0; 443 444 if (defined $arg) { 445 if (ref($arg) and UNIVERSAL::isa($arg, 'IO::Handle')) { 446 $ioref = $arg; 447 } elsif (tied($arg)) { 448 my $class = ref($arg); 449 no strict 'refs'; 450 $ioref = $arg if defined &{"${class}::TIEHANDLE"}; 451 } 452 else { 453 require IO::Handle; 454 eval { 455 no strict 'refs'; 456 $ioref = *{$arg}{IO} if defined *{$arg}; 457 }; 458 undef $@; 459 } 460 } 461 462 if (defined($ioref)) { 463 my $delim = $self->{Stream_Delimiter}; 464 my $prev_rs; 465 466 $prev_rs = ref($ioref)->input_record_separator("\n$delim\n") 467 if defined($delim); 468 469 $result = ParseStream($parser, $ioref, $delim); 470 471 ref($ioref)->input_record_separator($prev_rs) 472 if defined($delim); 473 } else { 474 $result = ParseString($parser, $arg); 475 } 476 477 $self->{_State_} = 2; 478 $result or croak $self->{ErrorMessage}; 479} 480 481sub parsestring { 482 my $self = shift; 483 $self->parse(@_); 484} 485 486sub parsefile { 487 my $self = shift; 488 croak "Parser has already been used" if $self->{_State_}; 489 local(*FILE); 490 open(FILE, $_[0]) or croak "Couldn't open $_[0]:\n$!"; 491 binmode(FILE); 492 my $ret = $self->parse(*FILE); 493 close(FILE); 494 $ret; 495} 496 497################################################################ 498package XML::Parser::ContentModel; 499use overload '""' => \&asString, 'eq' => \&thiseq; 500 501sub EMPTY () {1} 502sub ANY () {2} 503sub MIXED () {3} 504sub NAME () {4} 505sub CHOICE () {5} 506sub SEQ () {6} 507 508 509sub isempty { 510 return $_[0]->{Type} == EMPTY; 511} 512 513sub isany { 514 return $_[0]->{Type} == ANY; 515} 516 517sub ismixed { 518 return $_[0]->{Type} == MIXED; 519} 520 521sub isname { 522 return $_[0]->{Type} == NAME; 523} 524 525sub name { 526 return $_[0]->{Tag}; 527} 528 529sub ischoice { 530 return $_[0]->{Type} == CHOICE; 531} 532 533sub isseq { 534 return $_[0]->{Type} == SEQ; 535} 536 537sub quant { 538 return $_[0]->{Quant}; 539} 540 541sub children { 542 my $children = $_[0]->{Children}; 543 if (defined $children) { 544 return @$children; 545 } 546 return undef; 547} 548 549sub asString { 550 my ($self) = @_; 551 my $ret; 552 553 if ($self->{Type} == NAME) { 554 $ret = $self->{Tag}; 555 } 556 elsif ($self->{Type} == EMPTY) { 557 return "EMPTY"; 558 } 559 elsif ($self->{Type} == ANY) { 560 return "ANY"; 561 } 562 elsif ($self->{Type} == MIXED) { 563 $ret = '(#PCDATA'; 564 foreach (@{$self->{Children}}) { 565 $ret .= '|' . $_; 566 } 567 $ret .= ')'; 568 } 569 else { 570 my $sep = $self->{Type} == CHOICE ? '|' : ','; 571 $ret = '(' . join($sep, map { $_->asString } @{$self->{Children}}) . ')'; 572 } 573 574 $ret .= $self->{Quant} if $self->{Quant}; 575 return $ret; 576} 577 578sub thiseq { 579 my $self = shift; 580 581 return $self->asString eq $_[0]; 582} 583 584################################################################ 585package XML::Parser::ExpatNB; 586 587use vars qw(@ISA); 588use Carp; 589 590@ISA = qw(XML::Parser::Expat); 591 592sub parse { 593 my $self = shift; 594 my $class = ref($self); 595 croak "parse method not supported in $class"; 596} 597 598sub parsestring { 599 my $self = shift; 600 my $class = ref($self); 601 croak "parsestring method not supported in $class"; 602} 603 604sub parsefile { 605 my $self = shift; 606 my $class = ref($self); 607 croak "parsefile method not supported in $class"; 608} 609 610sub parse_more { 611 my ($self, $data) = @_; 612 613 $self->{_State_} = 1; 614 my $ret = XML::Parser::Expat::ParsePartial($self->{Parser}, $data); 615 616 croak $self->{ErrorMessage} unless $ret; 617} 618 619sub parse_done { 620 my $self = shift; 621 622 my $ret = XML::Parser::Expat::ParseDone($self->{Parser}); 623 unless ($ret) { 624 my $msg = $self->{ErrorMessage}; 625 $self->release; 626 croak $msg; 627 } 628 629 $self->{_State_} = 2; 630 631 my $result = $ret; 632 my @result = (); 633 my $final = $self->{FinalHandler}; 634 if (defined $final) { 635 if (wantarray) { 636 @result = &$final($self); 637 } 638 else { 639 $result = &$final($self); 640 } 641 } 642 643 $self->release; 644 645 return unless defined wantarray; 646 return wantarray ? @result : $result; 647} 648 649################################################################ 650 651package XML::Parser::Encinfo; 652 653sub DESTROY { 654 my $self = shift; 655 XML::Parser::Expat::FreeEncoding($self); 656} 657 6581; 659 660__END__ 661 662=head1 NAME 663 664XML::Parser::Expat - Lowlevel access to James Clark's expat XML parser 665 666=head1 SYNOPSIS 667 668 use XML::Parser::Expat; 669 670 $parser = new XML::Parser::Expat; 671 $parser->setHandlers('Start' => \&sh, 672 'End' => \&eh, 673 'Char' => \&ch); 674 open(FOO, 'info.xml') or die "Couldn't open"; 675 $parser->parse(*FOO); 676 close(FOO); 677 # $parser->parse('<foo id="me"> here <em>we</em> go </foo>'); 678 679 sub sh 680 { 681 my ($p, $el, %atts) = @_; 682 $p->setHandlers('Char' => \&spec) 683 if ($el eq 'special'); 684 ... 685 } 686 687 sub eh 688 { 689 my ($p, $el) = @_; 690 $p->setHandlers('Char' => \&ch) # Special elements won't contain 691 if ($el eq 'special'); # other special elements 692 ... 693 } 694 695=head1 DESCRIPTION 696 697This module provides an interface to James Clark's XML parser, expat. As in 698expat, a single instance of the parser can only parse one document. Calls 699to parsestring after the first for a given instance will die. 700 701Expat (and XML::Parser::Expat) are event based. As the parser recognizes 702parts of the document (say the start or end of an XML element), then any 703handlers registered for that type of an event are called with suitable 704parameters. 705 706=head1 METHODS 707 708=over 4 709 710=item new 711 712This is a class method, the constructor for XML::Parser::Expat. Options are 713passed as keyword value pairs. The recognized options are: 714 715=over 4 716 717=item * ProtocolEncoding 718 719The protocol encoding name. The default is none. The expat built-in 720encodings are: C<UTF-8>, C<ISO-8859-1>, C<UTF-16>, and C<US-ASCII>. 721Other encodings may be used if they have encoding maps in one of the 722directories in the @Encoding_Path list. Setting the protocol encoding 723overrides any encoding in the XML declaration. 724 725=item * Namespaces 726 727When this option is given with a true value, then the parser does namespace 728processing. By default, namespace processing is turned off. When it is 729turned on, the parser consumes I<xmlns> attributes and strips off prefixes 730from element and attributes names where those prefixes have a defined 731namespace. A name's namespace can be found using the L<"namespace"> method 732and two names can be checked for absolute equality with the L<"eq_name"> 733method. 734 735=item * NoExpand 736 737Normally, the parser will try to expand references to entities defined in 738the internal subset. If this option is set to a true value, and a default 739handler is also set, then the default handler will be called when an 740entity reference is seen in text. This has no effect if a default handler 741has not been registered, and it has no effect on the expansion of entity 742references inside attribute values. 743 744=item * Stream_Delimiter 745 746This option takes a string value. When this string is found alone on a line 747while parsing from a stream, then the parse is ended as if it saw an end of 748file. The intended use is with a stream of xml documents in a MIME multipart 749format. The string should not contain a trailing newline. 750 751=item * ErrorContext 752 753When this option is defined, errors are reported in context. The value 754of ErrorContext should be the number of lines to show on either side of 755the line in which the error occurred. 756 757=item * ParseParamEnt 758 759Unless standalone is set to "yes" in the XML declaration, setting this to 760a true value allows the external DTD to be read, and parameter entities 761to be parsed and expanded. 762 763=item * Base 764 765The base to use for relative pathnames or URLs. This can also be done by 766using the base method. 767 768=back 769 770=item setHandlers(TYPE, HANDLER [, TYPE, HANDLER [...]]) 771 772This method registers handlers for the various events. If no handlers are 773registered, then a call to parsestring or parsefile will only determine if 774the corresponding XML document is well formed (by returning without error.) 775This may be called from within a handler, after the parse has started. 776 777Setting a handler to something that evaluates to false unsets that 778handler. 779 780This method returns a list of type, handler pairs corresponding to the 781input. The handlers returned are the ones that were in effect before the 782call to setHandlers. 783 784The recognized events and the parameters passed to the corresponding 785handlers are: 786 787=over 4 788 789=item * Start (Parser, Element [, Attr, Val [,...]]) 790 791This event is generated when an XML start tag is recognized. Parser is 792an XML::Parser::Expat instance. Element is the name of the XML element that 793is opened with the start tag. The Attr & Val pairs are generated for each 794attribute in the start tag. 795 796=item * End (Parser, Element) 797 798This event is generated when an XML end tag is recognized. Note that 799an XML empty tag (<foo/>) generates both a start and an end event. 800 801There is always a lower level start and end handler installed that wrap 802the corresponding callbacks. This is to handle the context mechanism. 803A consequence of this is that the default handler (see below) will not 804see a start tag or end tag unless the default_current method is called. 805 806=item * Char (Parser, String) 807 808This event is generated when non-markup is recognized. The non-markup 809sequence of characters is in String. A single non-markup sequence of 810characters may generate multiple calls to this handler. Whatever the 811encoding of the string in the original document, this is given to the 812handler in UTF-8. 813 814=item * Proc (Parser, Target, Data) 815 816This event is generated when a processing instruction is recognized. 817 818=item * Comment (Parser, String) 819 820This event is generated when a comment is recognized. 821 822=item * CdataStart (Parser) 823 824This is called at the start of a CDATA section. 825 826=item * CdataEnd (Parser) 827 828This is called at the end of a CDATA section. 829 830=item * Default (Parser, String) 831 832This is called for any characters that don't have a registered handler. 833This includes both characters that are part of markup for which no 834events are generated (markup declarations) and characters that 835could generate events, but for which no handler has been registered. 836 837Whatever the encoding in the original document, the string is returned to 838the handler in UTF-8. 839 840=item * Unparsed (Parser, Entity, Base, Sysid, Pubid, Notation) 841 842This is called for a declaration of an unparsed entity. Entity is the name 843of the entity. Base is the base to be used for resolving a relative URI. 844Sysid is the system id. Pubid is the public id. Notation is the notation 845name. Base and Pubid may be undefined. 846 847=item * Notation (Parser, Notation, Base, Sysid, Pubid) 848 849This is called for a declaration of notation. Notation is the notation name. 850Base is the base to be used for resolving a relative URI. Sysid is the system 851id. Pubid is the public id. Base, Sysid, and Pubid may all be undefined. 852 853=item * ExternEnt (Parser, Base, Sysid, Pubid) 854 855This is called when an external entity is referenced. Base is the base to be 856used for resolving a relative URI. Sysid is the system id. Pubid is the public 857id. Base, and Pubid may be undefined. 858 859This handler should either return a string, which represents the contents of 860the external entity, or return an open filehandle that can be read to obtain 861the contents of the external entity, or return undef, which indicates the 862external entity couldn't be found and will generate a parse error. 863 864If an open filehandle is returned, it must be returned as either a glob 865(*FOO) or as a reference to a glob (e.g. an instance of IO::Handle). 866 867=item * ExternEntFin (Parser) 868 869This is called after an external entity has been parsed. It allows 870applications to perform cleanup on actions performed in the above 871ExternEnt handler. 872 873=item * Entity (Parser, Name, Val, Sysid, Pubid, Ndata, IsParam) 874 875This is called when an entity is declared. For internal entities, the Val 876parameter will contain the value and the remaining three parameters will 877be undefined. For external entities, the Val parameter 878will be undefined, the Sysid parameter will have the system id, the Pubid 879parameter will have the public id if it was provided (it will be undefined 880otherwise), the Ndata parameter will contain the notation for unparsed 881entities. If this is a parameter entity declaration, then the IsParam 882parameter is true. 883 884Note that this handler and the Unparsed handler above overlap. If both are 885set, then this handler will not be called for unparsed entities. 886 887=item * Element (Parser, Name, Model) 888 889The element handler is called when an element declaration is found. Name is 890the element name, and Model is the content model as an 891XML::Parser::ContentModel object. See L<"XML::Parser::ContentModel Methods"> 892for methods available for this class. 893 894=item * Attlist (Parser, Elname, Attname, Type, Default, Fixed) 895 896This handler is called for each attribute in an ATTLIST declaration. 897So an ATTLIST declaration that has multiple attributes 898will generate multiple calls to this handler. The Elname parameter is the 899name of the element with which the attribute is being associated. The Attname 900parameter is the name of the attribute. Type is the attribute type, given as 901a string. Default is the default value, which will either be "#REQUIRED", 902"#IMPLIED" or a quoted string (i.e. the returned string will begin and end 903with a quote character). If Fixed is true, then this is a fixed attribute. 904 905=item * Doctype (Parser, Name, Sysid, Pubid, Internal) 906 907This handler is called for DOCTYPE declarations. Name is the document type 908name. Sysid is the system id of the document type, if it was provided, 909otherwise it's undefined. Pubid is the public id of the document type, 910which will be undefined if no public id was given. Internal will be 911true or false, indicating whether or not the doctype declaration contains 912an internal subset. 913 914=item * DoctypeFin (Parser) 915 916This handler is called after parsing of the DOCTYPE declaration has finished, 917including any internal or external DTD declarations. 918 919=item * XMLDecl (Parser, Version, Encoding, Standalone) 920 921This handler is called for XML declarations. Version is a string containg 922the version. Encoding is either undefined or contains an encoding string. 923Standalone is either undefined, or true or false. Undefined indicates 924that no standalone parameter was given in the XML declaration. True or 925false indicates "yes" or "no" respectively. 926 927=back 928 929=item namespace(name) 930 931Return the URI of the namespace that the name belongs to. If the name doesn't 932belong to any namespace, an undef is returned. This is only valid on names 933received through the Start or End handlers from a single document, or through 934a call to the generate_ns_name method. In other words, don't use names 935generated from one instance of XML::Parser::Expat with other instances. 936 937=item eq_name(name1, name2) 938 939Return true if name1 and name2 are identical (i.e. same name and from 940the same namespace.) This is only meaningful if both names were obtained 941through the Start or End handlers from a single document, or through 942a call to the generate_ns_name method. 943 944=item generate_ns_name(name, namespace) 945 946Return a name, associated with a given namespace, good for using with the 947above 2 methods. The namespace argument should be the namespace URI, not 948a prefix. 949 950=item new_ns_prefixes 951 952When called from a start tag handler, returns namespace prefixes declared 953with this start tag. If called elsewere (or if there were no namespace 954prefixes declared), it returns an empty list. Setting of the default 955namespace is indicated with '#default' as a prefix. 956 957=item expand_ns_prefix(prefix) 958 959Return the uri to which the given prefix is currently bound. Returns 960undef if the prefix isn't currently bound. Use '#default' to find the 961current binding of the default namespace (if any). 962 963=item current_ns_prefixes 964 965Return a list of currently bound namespace prefixes. The order of the 966the prefixes in the list has no meaning. If the default namespace is 967currently bound, '#default' appears in the list. 968 969=item recognized_string 970 971Returns the string from the document that was recognized in order to call 972the current handler. For instance, when called from a start handler, it 973will give us the the start-tag string. The string is encoded in UTF-8. 974This method doesn't return a meaningful string inside declaration handlers. 975 976=item original_string 977 978Returns the verbatim string from the document that was recognized in 979order to call the current handler. The string is in the original document 980encoding. This method doesn't return a meaningful string inside declaration 981handlers. 982 983=item default_current 984 985When called from a handler, causes the sequence of characters that generated 986the corresponding event to be sent to the default handler (if one is 987registered). Use of this method is deprecated in favor the recognized_string 988method, which you can use without installing a default handler. This 989method doesn't deliver a meaningful string to the default handler when 990called from inside declaration handlers. 991 992=item xpcroak(message) 993 994Concatenate onto the given message the current line number within the 995XML document plus the message implied by ErrorContext. Then croak with 996the formed message. 997 998=item xpcarp(message) 999 1000Concatenate onto the given message the current line number within the 1001XML document plus the message implied by ErrorContext. Then carp with 1002the formed message. 1003 1004=item current_line 1005 1006Returns the line number of the current position of the parse. 1007 1008=item current_column 1009 1010Returns the column number of the current position of the parse. 1011 1012=item current_byte 1013 1014Returns the current position of the parse. 1015 1016=item base([NEWBASE]); 1017 1018Returns the current value of the base for resolving relative URIs. If 1019NEWBASE is supplied, changes the base to that value. 1020 1021=item context 1022 1023Returns a list of element names that represent open elements, with the 1024last one being the innermost. Inside start and end tag handlers, this 1025will be the tag of the parent element. 1026 1027=item current_element 1028 1029Returns the name of the innermost currently opened element. Inside 1030start or end handlers, returns the parent of the element associated 1031with those tags. 1032 1033=item in_element(NAME) 1034 1035Returns true if NAME is equal to the name of the innermost currently opened 1036element. If namespace processing is being used and you want to check 1037against a name that may be in a namespace, then use the generate_ns_name 1038method to create the NAME argument. 1039 1040=item within_element(NAME) 1041 1042Returns the number of times the given name appears in the context list. 1043If namespace processing is being used and you want to check 1044against a name that may be in a namespace, then use the generate_ns_name 1045method to create the NAME argument. 1046 1047=item depth 1048 1049Returns the size of the context list. 1050 1051=item element_index 1052 1053Returns an integer that is the depth-first visit order of the current 1054element. This will be zero outside of the root element. For example, 1055this will return 1 when called from the start handler for the root element 1056start tag. 1057 1058=item skip_until(INDEX) 1059 1060INDEX is an integer that represents an element index. When this method 1061is called, all handlers are suspended until the start tag for an element 1062that has an index number equal to INDEX is seen. If a start handler has 1063been set, then this is the first tag that the start handler will see 1064after skip_until has been called. 1065 1066 1067=item position_in_context(LINES) 1068 1069Returns a string that shows the current parse position. LINES should be 1070an integer >= 0 that represents the number of lines on either side of the 1071current parse line to place into the returned string. 1072 1073=item xml_escape(TEXT [, CHAR [, CHAR ...]]) 1074 1075Returns TEXT with markup characters turned into character entities. Any 1076additional characters provided as arguments are also turned into character 1077references where found in TEXT. 1078 1079=item parse (SOURCE) 1080 1081The SOURCE parameter should either be a string containing the whole XML 1082document, or it should be an open IO::Handle. Only a single document 1083may be parsed for a given instance of XML::Parser::Expat, so this will croak 1084if it's been called previously for this instance. 1085 1086=item parsestring(XML_DOC_STRING) 1087 1088Parses the given string as an XML document. Only a single document may be 1089parsed for a given instance of XML::Parser::Expat, so this will die if either 1090parsestring or parsefile has been called for this instance previously. 1091 1092This method is deprecated in favor of the parse method. 1093 1094=item parsefile(FILENAME) 1095 1096Parses the XML document in the given file. Will die if parsestring or 1097parsefile has been called previously for this instance. 1098 1099=item is_defaulted(ATTNAME) 1100 1101NO LONGER WORKS. To find out if an attribute is defaulted please use 1102the specified_attr method. 1103 1104=item specified_attr 1105 1106When the start handler receives lists of attributes and values, the 1107non-defaulted (i.e. explicitly specified) attributes occur in the list 1108first. This method returns the number of specified items in the list. 1109So if this number is equal to the length of the list, there were no 1110defaulted values. Otherwise the number points to the index of the 1111first defaulted attribute name. 1112 1113=item finish 1114 1115Unsets all handlers (including internal ones that set context), but expat 1116continues parsing to the end of the document or until it finds an error. 1117It should finish up a lot faster than with the handlers set. 1118 1119=item release 1120 1121There are data structures used by XML::Parser::Expat that have circular 1122references. This means that these structures will never be garbage 1123collected unless these references are explicitly broken. Calling this 1124method breaks those references (and makes the instance unusable.) 1125 1126Normally, higher level calls handle this for you, but if you are using 1127XML::Parser::Expat directly, then it's your responsibility to call it. 1128 1129=back 1130 1131=head2 XML::Parser::ContentModel Methods 1132 1133The element declaration handlers are passed objects of this class as the 1134content model of the element declaration. They also represent content 1135particles, components of a content model. 1136 1137When referred to as a string, these objects are automagicly converted to a 1138string representation of the model (or content particle). 1139 1140=over 4 1141 1142=item isempty 1143 1144This method returns true if the object is "EMPTY", false otherwise. 1145 1146=item isany 1147 1148This method returns true if the object is "ANY", false otherwise. 1149 1150=item ismixed 1151 1152This method returns true if the object is "(#PCDATA)" or "(#PCDATA|...)*", 1153false otherwise. 1154 1155=item isname 1156 1157This method returns if the object is an element name. 1158 1159=item ischoice 1160 1161This method returns true if the object is a choice of content particles. 1162 1163 1164=item isseq 1165 1166This method returns true if the object is a sequence of content particles. 1167 1168=item quant 1169 1170This method returns undef or a string representing the quantifier 1171('?', '*', '+') associated with the model or particle. 1172 1173=item children 1174 1175This method returns undef or (for mixed, choice, and sequence types) 1176an array of component content particles. There will always be at least 1177one component for choices and sequences, but for a mixed content model 1178of pure PCDATA, "(#PCDATA)", then an undef is returned. 1179 1180=back 1181 1182=head2 XML::Parser::ExpatNB Methods 1183 1184The class XML::Parser::ExpatNB is a subclass of XML::Parser::Expat used 1185for non-blocking access to the expat library. It does not support the parse, 1186parsestring, or parsefile methods, but it does have these additional methods: 1187 1188=over 4 1189 1190=item parse_more(DATA) 1191 1192Feed expat more text to munch on. 1193 1194=item parse_done 1195 1196Tell expat that it's gotten the whole document. 1197 1198=back 1199 1200=head1 FUNCTIONS 1201 1202=over 4 1203 1204=item XML::Parser::Expat::load_encoding(ENCODING) 1205 1206Load an external encoding. ENCODING is either the name of an encoding or 1207the name of a file. The basename is converted to lowercase and a '.enc' 1208extension is appended unless there's one already there. Then, unless 1209it's an absolute pathname (i.e. begins with '/'), the first file by that 1210name discovered in the @Encoding_Path path list is used. 1211 1212The encoding in the file is loaded and kept in the %Encoding_Table 1213table. Earlier encodings of the same name are replaced. 1214 1215This function is automaticly called by expat when it encounters an encoding 1216it doesn't know about. Expat shouldn't call this twice for the same 1217encoding name. The only reason users should use this function is to 1218explicitly load an encoding not contained in the @Encoding_Path list. 1219 1220=back 1221 1222=head1 AUTHORS 1223 1224Larry Wall <F<larry@wall.org>> wrote version 1.0. 1225 1226Clark Cooper <F<coopercc@netheaven.com>> picked up support, changed the API 1227for this version (2.x), provided documentation, and added some standard 1228package features. 1229 1230=cut 1231