1######################################################################## 2# Writer.pm - write an XML document. 3# Copyright (c) 1999 by Megginson Technologies. 4# Copyright (c) 2004, 2005 by Joseph Walton <joe@kafsemo.org>. 5# No warranty. Commercial and non-commercial use freely permitted. 6# 7# $Id: Writer.pm,v 1.48 2005/06/30 22:17:04 josephw Exp $ 8######################################################################## 9 10package XML::Writer; 11 12require 5.004; 13 14use strict; 15use vars qw($VERSION); 16use Carp; 17use IO::Handle; 18$VERSION = "0.600"; 19 20 21 22######################################################################## 23# Constructor. 24######################################################################## 25 26# 27# Public constructor. 28# 29# This actually does most of the work of the module: it defines closures 30# for all of the real processing, and selects the appropriate closures 31# to use based on the value of the UNSAFE parameter. The actual methods 32# are just stubs. 33# 34sub new { 35 my ($class, %params) = (@_); 36 37 # If the user wants namespaces, 38 # intercept the request here; it will 39 # come back to this constructor 40 # from within XML::Writer::Namespaces::new() 41 if ($params{NAMESPACES}) { 42 delete $params{NAMESPACES}; 43 return new XML::Writer::Namespaces(%params); 44 } 45 46 # Set up $self and basic parameters 47 my $self; 48 my $output; 49 my $unsafe = $params{UNSAFE}; 50 my $newlines = $params{NEWLINES}; 51 my $dataMode = $params{DATA_MODE}; 52 my $dataIndent = $params{DATA_INDENT} || 0; 53 54 # If the NEWLINES parameter is specified, 55 # set the $nl variable appropriately 56 my $nl = ''; 57 if ($newlines) { 58 $nl = "\n"; 59 } 60 61 my $outputEncoding = $params{ENCODING}; 62 my ($checkUnencodedRepertoire, $escapeEncoding); 63 if (lc($outputEncoding) eq 'us-ascii') { 64 $checkUnencodedRepertoire = \&_croakUnlessASCII; 65 $escapeEncoding = \&_escapeASCII; 66 } else { 67 my $doNothing = sub {}; 68 $checkUnencodedRepertoire = $doNothing; 69 $escapeEncoding = $doNothing; 70 } 71 72 # Parse variables 73 my @elementStack = (); 74 my $elementLevel = 0; 75 my %seen = (); 76 77 my $hasData = 0; 78 my @hasDataStack = (); 79 my $hasElement = 0; 80 my @hasElementStack = (); 81 my $hasHeading = 0; # Does this document have anything before the first element? 82 83 # 84 # Private method to show attributes. 85 # 86 my $showAttributes = sub { 87 my $atts = $_[0]; 88 my $i = 1; 89 while ($atts->[$i]) { 90 my $aname = $atts->[$i++]; 91 my $value = _escapeLiteral($atts->[$i++]); 92 $value =~ s/\x0a/\
\;/g; 93 &{$escapeEncoding}($value); 94 $output->print(" $aname=\"$value\""); 95 } 96 }; 97 98 # Method implementations: the SAFE_ 99 # versions perform error checking 100 # and then call the regular ones. 101 my $end = sub { 102 $output->print("\n"); 103 }; 104 105 my $SAFE_end = sub { 106 if (!$seen{ELEMENT}) { 107 croak("Document cannot end without a document element"); 108 } elsif ($elementLevel > 0) { 109 croak("Document ended with unmatched start tag(s): @elementStack"); 110 } else { 111 @elementStack = (); 112 $elementLevel = 0; 113 %seen = (); 114 &{$end}; 115 } 116 }; 117 118 my $xmlDecl = sub { 119 my ($encoding, $standalone) = (@_); 120 if ($standalone && $standalone ne 'no') { 121 $standalone = 'yes'; 122 } 123 124 # Only include an encoding if one has been explicitly supplied, 125 # either here or on construction. Allow the empty string 126 # to suppress it. 127 if (!defined($encoding)) { 128 $encoding = $outputEncoding; 129 } 130 $output->print("<?xml version=\"1.0\""); 131 if ($encoding) { 132 $output->print(" encoding=\"$encoding\""); 133 } 134 if ($standalone) { 135 $output->print(" standalone=\"$standalone\""); 136 } 137 $output->print("?>\n"); 138 $hasHeading = 1; 139 }; 140 141 my $SAFE_xmlDecl = sub { 142 if ($seen{ANYTHING}) { 143 croak("The XML declaration is not the first thing in the document"); 144 } else { 145 $seen{ANYTHING} = 1; 146 $seen{XMLDECL} = 1; 147 &{$xmlDecl}; 148 } 149 }; 150 151 my $pi = sub { 152 my ($target, $data) = (@_); 153 if ($data) { 154 $output->print("<?$target $data?>"); 155 } else { 156 $output->print("<?$target?>"); 157 } 158 if ($elementLevel == 0) { 159 $output->print("\n"); 160 $hasHeading = 1; 161 } 162 }; 163 164 my $SAFE_pi = sub { 165 my ($name, $data) = (@_); 166 $seen{ANYTHING} = 1; 167 if (($name =~ /^xml/i) && ($name !~ /^xml-stylesheet$/i)) { 168 carp("Processing instruction target begins with 'xml'"); 169 } 170 171 if ($name =~ /\?\>/ || (defined($data) && $data =~ /\?\>/)) { 172 croak("Processing instruction may not contain '?>'"); 173 } elsif ($name =~ /\s/) { 174 croak("Processing instruction name may not contain whitespace"); 175 } else { 176 &{$pi}; 177 } 178 }; 179 180 my $comment = sub { 181 my $data = $_[0]; 182 if ($dataMode && $elementLevel) { 183 $output->print("\n"); 184 $output->print(" " x ($elementLevel * $dataIndent)); 185 } 186 $output->print("<!-- $data -->"); 187 if ($dataMode && $elementLevel) { 188 $hasElement = 1; 189 } elsif ($elementLevel == 0) { 190 $output->print("\n"); 191 $hasHeading = 1; 192 } 193 }; 194 195 my $SAFE_comment = sub { 196 my $data = $_[0]; 197 if ($data =~ /--/) { 198 carp("Interoperability problem: \"--\" in comment text"); 199 } 200 201 if ($data =~ /-->/) { 202 croak("Comment may not contain '-->'"); 203 } else { 204 &{$checkUnencodedRepertoire}($data); 205 $seen{ANYTHING} = 1; 206 &{$comment}; 207 } 208 }; 209 210 my $doctype = sub { 211 my ($name, $publicId, $systemId) = (@_); 212 $output->print("<!DOCTYPE $name"); 213 if ($publicId) { 214 unless ($systemId) { 215 croak("A DOCTYPE declaration with a public ID must also have a system ID"); 216 } 217 $output->print(" PUBLIC \"$publicId\" \"$systemId\""); 218 } elsif ($systemId) { 219 $output->print(" SYSTEM \"$systemId\""); 220 } 221 $output->print(">\n"); 222 $hasHeading = 1; 223 }; 224 225 my $SAFE_doctype = sub { 226 my $name = $_[0]; 227 if ($seen{DOCTYPE}) { 228 croak("Attempt to insert second DOCTYPE declaration"); 229 } elsif ($seen{ELEMENT}) { 230 croak("The DOCTYPE declaration must come before the first start tag"); 231 } else { 232 $seen{ANYTHING} = 1; 233 $seen{DOCTYPE} = $name; 234 &{$doctype}; 235 } 236 }; 237 238 my $startTag = sub { 239 my $name = $_[0]; 240 if ($dataMode && ($hasHeading || $elementLevel)) { 241 $output->print("\n"); 242 $output->print(" " x ($elementLevel * $dataIndent)); 243 } 244 $elementLevel++; 245 push @elementStack, $name; 246 $output->print("<$name"); 247 &{$showAttributes}(\@_); 248 $output->print("$nl>"); 249 if ($dataMode) { 250 $hasElement = 1; 251 push @hasDataStack, $hasData; 252 $hasData = 0; 253 push @hasElementStack, $hasElement; 254 $hasElement = 0; 255 } 256 }; 257 258 my $SAFE_startTag = sub { 259 my $name = $_[0]; 260 261 &{$checkUnencodedRepertoire}($name); 262 _checkAttributes(\@_); 263 264 if ($seen{ELEMENT} && $elementLevel == 0) { 265 croak("Attempt to insert start tag after close of document element"); 266 } elsif ($elementLevel == 0 && $seen{DOCTYPE} && $name ne $seen{DOCTYPE}) { 267 croak("Document element is \"$name\", but DOCTYPE is \"" 268 . $seen{DOCTYPE} 269 . "\""); 270 } elsif ($dataMode && $hasData) { 271 croak("Mixed content not allowed in data mode: element $name"); 272 } else { 273 $seen{ANYTHING} = 1; 274 $seen{ELEMENT} = 1; 275 &{$startTag}; 276 } 277 }; 278 279 my $emptyTag = sub { 280 my $name = $_[0]; 281 if ($dataMode && ($hasHeading || $elementLevel)) { 282 $output->print("\n"); 283 $output->print(" " x ($elementLevel * $dataIndent)); 284 } 285 $output->print("<$name"); 286 &{$showAttributes}(\@_); 287 $output->print("$nl />"); 288 if ($dataMode) { 289 $hasElement = 1; 290 } 291 }; 292 293 my $SAFE_emptyTag = sub { 294 my $name = $_[0]; 295 296 &{$checkUnencodedRepertoire}($name); 297 _checkAttributes(\@_); 298 299 if ($seen{ELEMENT} && $elementLevel == 0) { 300 croak("Attempt to insert empty tag after close of document element"); 301 } elsif ($elementLevel == 0 && $seen{DOCTYPE} && $name ne $seen{DOCTYPE}) { 302 croak("Document element is \"$name\", but DOCTYPE is \"" 303 . $seen{DOCTYPE} 304 . "\""); 305 } elsif ($dataMode && $hasData) { 306 croak("Mixed content not allowed in data mode: element $name"); 307 } else { 308 $seen{ANYTHING} = 1; 309 $seen{ELEMENT} = 1; 310 &{$emptyTag}; 311 } 312 }; 313 314 my $endTag = sub { 315 my $name = $_[0]; 316 my $currentName = pop @elementStack; 317 $name = $currentName unless $name; 318 $elementLevel--; 319 if ($dataMode && $hasElement) { 320 $output->print("\n"); 321 $output->print(" " x ($elementLevel * $dataIndent)); 322 } 323 $output->print("</$name$nl>"); 324 if ($dataMode) { 325 $hasData = pop @hasDataStack; 326 $hasElement = pop @hasElementStack; 327 } 328 }; 329 330 my $SAFE_endTag = sub { 331 my $name = $_[0]; 332 my $oldName = $elementStack[$#elementStack]; 333 if ($elementLevel <= 0) { 334 croak("End tag \"$name\" does not close any open element"); 335 } elsif ($name && ($name ne $oldName)) { 336 croak("Attempt to end element \"$oldName\" with \"$name\" tag"); 337 } else { 338 &{$endTag}; 339 } 340 }; 341 342 my $characters = sub { 343 my $data = $_[0]; 344 if ($data =~ /[\&\<\>]/) { 345 $data =~ s/\&/\&\;/g; 346 $data =~ s/\</\<\;/g; 347 $data =~ s/\>/\>\;/g; 348 } 349 &{$escapeEncoding}($data); 350 $output->print($data); 351 $hasData = 1; 352 }; 353 354 my $SAFE_characters = sub { 355 if ($elementLevel < 1) { 356 croak("Attempt to insert characters outside of document element"); 357 } elsif ($dataMode && $hasElement) { 358 croak("Mixed content not allowed in data mode: characters"); 359 } else { 360 _croakUnlessDefinedCharacters($_[0]); 361 &{$characters}; 362 } 363 }; 364 365 my $raw = sub { 366 $output->print($_[0]); 367 # Don't set $hasData or any other information: we know nothing 368 # about what was just written. 369 # 370 }; 371 372 my $SAFE_raw = sub { 373 croak('raw() is only available when UNSAFE is set'); 374 }; 375 376 my $cdata = sub { 377 my $data = $_[0]; 378 $data =~ s/\]\]>/\]\]\]\]><!\[CDATA\[>/g; 379 $output->print("<![CDATA[$data]]>"); 380 $hasData = 1; 381 }; 382 383 my $SAFE_cdata = sub { 384 if ($elementLevel < 1) { 385 croak("Attempt to insert characters outside of document element"); 386 } elsif ($dataMode && $hasElement) { 387 croak("Mixed content not allowed in data mode: characters"); 388 } else { 389 _croakUnlessDefinedCharacters($_[0]); 390 &{$checkUnencodedRepertoire}($_[0]); 391 &{$cdata}; 392 } 393 }; 394 395 # Assign the correct closures based on 396 # the UNSAFE parameter 397 if ($unsafe) { 398 $self = {'END' => $end, 399 'XMLDECL' => $xmlDecl, 400 'PI' => $pi, 401 'COMMENT' => $comment, 402 'DOCTYPE' => $doctype, 403 'STARTTAG' => $startTag, 404 'EMPTYTAG' => $emptyTag, 405 'ENDTAG' => $endTag, 406 'CHARACTERS' => $characters, 407 'RAW' => $raw, 408 'CDATA' => $cdata 409 }; 410 } else { 411 $self = {'END' => $SAFE_end, 412 'XMLDECL' => $SAFE_xmlDecl, 413 'PI' => $SAFE_pi, 414 'COMMENT' => $SAFE_comment, 415 'DOCTYPE' => $SAFE_doctype, 416 'STARTTAG' => $SAFE_startTag, 417 'EMPTYTAG' => $SAFE_emptyTag, 418 'ENDTAG' => $SAFE_endTag, 419 'CHARACTERS' => $SAFE_characters, 420 'RAW' => $SAFE_raw, # This will intentionally fail 421 'CDATA' => $SAFE_cdata 422 }; 423 } 424 425 # Query methods 426 $self->{'IN_ELEMENT'} = sub { 427 my ($ancestor) = (@_); 428 return $elementStack[$#elementStack] eq $ancestor; 429 }; 430 431 $self->{'WITHIN_ELEMENT'} = sub { 432 my ($ancestor) = (@_); 433 my $el; 434 foreach $el (@elementStack) { 435 return 1 if $el eq $ancestor; 436 } 437 return 0; 438 }; 439 440 $self->{'CURRENT_ELEMENT'} = sub { 441 return $elementStack[$#elementStack]; 442 }; 443 444 $self->{'ANCESTOR'} = sub { 445 my ($n) = (@_); 446 if ($n < scalar(@elementStack)) { 447 return $elementStack[$#elementStack-$n]; 448 } else { 449 return undef; 450 } 451 }; 452 453 # Set and get the output destination. 454 $self->{'GETOUTPUT'} = sub { 455 return $output; 456 }; 457 458 $self->{'SETOUTPUT'} = sub { 459 my $newOutput = $_[0]; 460 461 if (ref($newOutput) eq 'SCALAR') { 462 $output = new XML::Writer::_String($newOutput); 463 } else { 464 # If there is no OUTPUT parameter, 465 # use standard output 466 $output = $newOutput || \*STDOUT; 467 if ($outputEncoding) { 468 if (lc($outputEncoding) eq 'utf-8') { 469 binmode($output, ':encoding(utf-8)'); 470 } elsif (lc($outputEncoding) eq 'us-ascii') { 471 binmode($output, ':encoding(us-ascii)'); 472 } else { 473 die 'The only supported encodings are utf-8 and us-ascii'; 474 } 475 } 476 } 477 }; 478 479 $self->{'SETDATAMODE'} = sub { 480 $dataMode = $_[0]; 481 }; 482 483 $self->{'GETDATAMODE'} = sub { 484 return $dataMode; 485 }; 486 487 $self->{'SETDATAINDENT'} = sub { 488 $dataIndent = $_[0]; 489 }; 490 491 $self->{'GETDATAINDENT'} = sub { 492 return $dataIndent; 493 }; 494 495 # Set the output. 496 &{$self->{'SETOUTPUT'}}($params{'OUTPUT'}); 497 498 # Return the blessed object. 499 return bless $self, $class; 500} 501 502 503 504######################################################################## 505# Public methods 506######################################################################## 507 508# 509# Finish writing the document. 510# 511sub end { 512 my $self = shift; 513 &{$self->{END}}; 514} 515 516# 517# Write an XML declaration. 518# 519sub xmlDecl { 520 my $self = shift; 521 &{$self->{XMLDECL}}; 522} 523 524# 525# Write a processing instruction. 526# 527sub pi { 528 my $self = shift; 529 &{$self->{PI}}; 530} 531 532# 533# Write a comment. 534# 535sub comment { 536 my $self = shift; 537 &{$self->{COMMENT}}; 538} 539 540# 541# Write a DOCTYPE declaration. 542# 543sub doctype { 544 my $self = shift; 545 &{$self->{DOCTYPE}}; 546} 547 548# 549# Write a start tag. 550# 551sub startTag { 552 my $self = shift; 553 &{$self->{STARTTAG}}; 554} 555 556# 557# Write an empty tag. 558# 559sub emptyTag { 560 my $self = shift; 561 &{$self->{EMPTYTAG}}; 562} 563 564# 565# Write an end tag. 566# 567sub endTag { 568 my $self = shift; 569 &{$self->{ENDTAG}}; 570} 571 572# 573# Write a simple data element. 574# 575sub dataElement { 576 my ($self, $name, $data, %atts) = (@_); 577 $self->startTag($name, %atts); 578 $self->characters($data); 579 $self->endTag($name); 580} 581 582# 583# Write a simple CDATA element. 584# 585sub cdataElement { 586 my ($self, $name, $data, %atts) = (@_); 587 $self->startTag($name, %atts); 588 $self->cdata($data); 589 $self->endTag($name); 590} 591 592# 593# Write character data. 594# 595sub characters { 596 my $self = shift; 597 &{$self->{CHARACTERS}}; 598} 599 600# 601# Write raw, unquoted, completely unchecked character data. 602# 603sub raw { 604 my $self = shift; 605 &{$self->{RAW}}; 606} 607 608# 609# Write CDATA. 610# 611sub cdata { 612 my $self = shift; 613 &{$self->{CDATA}}; 614} 615 616# 617# Query the current element. 618# 619sub in_element { 620 my $self = shift; 621 return &{$self->{IN_ELEMENT}}; 622} 623 624# 625# Query the ancestors. 626# 627sub within_element { 628 my $self = shift; 629 return &{$self->{WITHIN_ELEMENT}}; 630} 631 632# 633# Get the name of the current element. 634# 635sub current_element { 636 my $self = shift; 637 return &{$self->{CURRENT_ELEMENT}}; 638} 639 640# 641# Get the name of the numbered ancestor (zero-based). 642# 643sub ancestor { 644 my $self = shift; 645 return &{$self->{ANCESTOR}}; 646} 647 648# 649# Get the current output destination. 650# 651sub getOutput { 652 my $self = shift; 653 return &{$self->{GETOUTPUT}}; 654} 655 656 657# 658# Set the current output destination. 659# 660sub setOutput { 661 my $self = shift; 662 return &{$self->{SETOUTPUT}}; 663} 664 665# 666# Set the current data mode (true or false). 667# 668sub setDataMode { 669 my $self = shift; 670 return &{$self->{SETDATAMODE}}; 671} 672 673 674# 675# Get the current data mode (true or false). 676# 677sub getDataMode { 678 my $self = shift; 679 return &{$self->{GETDATAMODE}}; 680} 681 682 683# 684# Set the current data indent step. 685# 686sub setDataIndent { 687 my $self = shift; 688 return &{$self->{SETDATAINDENT}}; 689} 690 691 692# 693# Get the current data indent step. 694# 695sub getDataIndent { 696 my $self = shift; 697 return &{$self->{GETDATAINDENT}}; 698} 699 700 701# 702# Empty stub. 703# 704sub addPrefix { 705} 706 707 708# 709# Empty stub. 710# 711sub removePrefix { 712} 713 714 715 716######################################################################## 717# Private functions. 718######################################################################## 719 720# 721# Private: check for duplicate attributes and bad characters. 722# Note - this starts at $_[1], because $_[0] is assumed to be an 723# element name. 724# 725sub _checkAttributes { 726 my %anames; 727 my $i = 1; 728 while ($_[0]->[$i]) { 729 my $name = $_[0]->[$i]; 730 $i += 1; 731 if ($anames{$name}) { 732 croak("Two attributes named \"$name\""); 733 } else { 734 $anames{$name} = 1; 735 } 736 _croakUnlessDefinedCharacters($_[0]->[$i]); 737 $i += 1; 738 } 739} 740 741# 742# Private: escape an attribute value literal. 743# 744sub _escapeLiteral { 745 my $data = $_[0]; 746 if ($data =~ /[\&\<\>\"]/) { 747 $data =~ s/\&/\&\;/g; 748 $data =~ s/\</\<\;/g; 749 $data =~ s/\>/\>\;/g; 750 $data =~ s/\"/\"\;/g; 751 } 752 return $data; 753} 754 755sub _escapeASCII($) { 756 $_[0] =~ s/([^\x00-\x7F])/sprintf('&#x%X;', ord($1))/ge; 757} 758 759sub _croakUnlessASCII($) { 760 if ($_[0] =~ /[^\x00-\x7F]/) { 761 croak('Non-ASCII characters are not permitted in this part of a US-ASCII document'); 762 } 763} 764 765# Enforce XML 1.0, section 2.2's definition of "Char" (only reject low ASCII, 766# so as not to require Unicode support from perl) 767sub _croakUnlessDefinedCharacters($) { 768 if ($_[0] =~ /([\x00-\x08\x0B-\x0C\x0E-\x1F])/) { 769 croak(sprintf('Code point \u%04X is not a valid character in XML', ord($1))); 770 } 771} 772 773 774######################################################################## 775# XML::Writer::Namespaces - subclass for Namespace processing. 776######################################################################## 777 778package XML::Writer::Namespaces; 779use strict; 780use vars qw(@ISA); 781use Carp; 782 783@ISA = qw(XML::Writer); 784 785# 786# Constructor 787# 788sub new { 789 my ($class, %params) = (@_); 790 791 my $unsafe = $params{UNSAFE}; 792 793 # Snarf the prefix map, if any, and 794 # note the default prefix. 795 my %prefixMap = (); 796 if ($params{PREFIX_MAP}) { 797 %prefixMap = (%{$params{PREFIX_MAP}}); 798 delete $params{PREFIX_MAP}; 799 } 800 $prefixMap{'http://www.w3.org/XML/1998/namespace'} = 'xml'; 801 802 # Generate the reverse map for URIs 803 my $uriMap = {}; 804 my $key; 805 foreach $key (keys(%prefixMap)) { 806 $uriMap->{$prefixMap{$key}} = $key; 807 } 808 809 my $defaultPrefix = $uriMap->{''}; 810 delete $prefixMap{$defaultPrefix} if ($defaultPrefix); 811 812 # Create an instance of the parent. 813 my $self = new XML::Writer(%params); 814 815 # Snarf the parent's methods that we're 816 # going to override. 817 my $OLD_startTag = $self->{STARTTAG}; 818 my $OLD_emptyTag = $self->{EMPTYTAG}; 819 my $OLD_endTag = $self->{ENDTAG}; 820 821 # State variables 822 my @stack; 823 my $prefixCounter = 1; 824 my $nsDecls = {'http://www.w3.org/XML/1998/namespace' => 'xml'}; 825 my $nsDefaultDecl = undef; 826 my $nsCopyFlag = 0; 827 my @forcedNSDecls = (); 828 829 if ($params{FORCED_NS_DECLS}) { 830 @forcedNSDecls = @{$params{FORCED_NS_DECLS}}; 831 delete $params{FORCED_NS_DECLS}; 832 } 833 834 # 835 # Push the current declaration state. 836 # 837 my $pushState = sub { 838 push @stack, [$nsDecls, $nsDefaultDecl, $nsCopyFlag, $uriMap]; 839 $nsCopyFlag = 0; 840 }; 841 842 843 # 844 # Pop the current declaration state. 845 # 846 my $popState = sub { 847 ($nsDecls, $nsDefaultDecl, $nsCopyFlag, $uriMap) = @{pop @stack}; 848 }; 849 850 # 851 # Generate a new prefix. 852 # 853 my $genPrefix = sub { 854 my $uri = $_[0]; 855 my $prefixCounter = 1; 856 my $prefix = $prefixMap{$uri}; 857 my %clashMap = %{$uriMap}; 858 while( my ($u, $p) = each(%prefixMap)) { 859 $clashMap{$p} = $u; 860 } 861 862 while (!defined($prefix) || ($clashMap{$prefix} && $clashMap{$prefix} ne $uri)) { 863 $prefix = "__NS$prefixCounter"; 864 $prefixCounter++; 865 } 866 867 return $prefix; 868 }; 869 870 # 871 # Perform namespace processing on a single name. 872 # 873 my $processName = sub { 874 my ($nameref, $atts, $attFlag) = (@_); 875 my ($uri, $local) = @{$$nameref}; 876 my $prefix = $nsDecls->{$uri}; 877 878 # Is this an element name that matches 879 # the default NS? 880 if (!$attFlag && $defaultPrefix && ($uri eq $defaultPrefix)) { 881 unless ($nsDefaultDecl && ($nsDefaultDecl eq $uri)) { 882 push @{$atts}, 'xmlns'; 883 push @{$atts}, $uri; 884 $nsDefaultDecl = $uri; 885 } 886 $$nameref = $local; 887 888 if (defined($uriMap->{''})) { 889 delete ($nsDecls->{$uriMap->{''}}); 890 } 891 892 $nsDecls->{$uri} = ''; 893 unless ($nsCopyFlag) { 894 $uriMap = {%{$uriMap}}; 895 $nsDecls = {%{$nsDecls}}; 896 $nsCopyFlag = 1; 897 } 898 $uriMap->{''} = $uri; 899 900 # Is there a straight-forward prefix? 901 } elsif ($prefix) { 902 $$nameref = "$prefix:$local"; 903 } else { 904 $prefix = &{$genPrefix}($uri); 905 unless ($nsCopyFlag) { 906 $uriMap = {%{$uriMap}}; 907 $nsDecls = {%{$nsDecls}}; 908 $nsCopyFlag = 1; 909 } 910 $uriMap->{$prefix} = $uri; 911 $nsDecls->{$uri} = $prefix; 912 push @{$atts}, "xmlns:$prefix"; 913 push @{$atts}, $uri; 914 $$nameref = "$prefix:$local"; 915 } 916 }; 917 918 919 # 920 # Perform namespace processing on element and attribute names. 921 # 922 my $nsProcess = sub { 923 if (ref($_[0]->[0]) eq 'ARRAY') { 924 &{$processName}(\$_[0]->[0], $_[0], 0); 925 } 926 my $i = 1; 927 while ($_[0]->[$i]) { 928 if (ref($_[0]->[$i]) eq 'ARRAY') { 929 &{$processName}(\$_[0]->[$i], $_[0], 1); 930 } 931 $i += 2; 932 } 933 934 # We do this if any declarations are forced, due either to 935 # constructor arguments or to a call during processing. 936 if (@forcedNSDecls) { 937 foreach (@forcedNSDecls) { 938 my @dummy = ($_, 'dummy'); 939 my $d2 = \@dummy; 940 if ($defaultPrefix && ($_ eq $defaultPrefix)) { 941 &{$processName}(\$d2, $_[0], 0); 942 } else { 943 &{$processName}(\$d2, $_[0], 1); 944 } 945 } 946 @forcedNSDecls = (); 947 } 948 }; 949 950 951 # Indicate that a namespace should be declared by the next open element 952 $self->{FORCENSDECL} = sub { 953 push @forcedNSDecls, $_[0]; 954 }; 955 956 957 # 958 # Start tag, with NS processing 959 # 960 $self->{STARTTAG} = sub { 961 my $name = $_[0]; 962 unless ($unsafe) { 963 _checkNSNames(\@_); 964 } 965 &{$pushState}(); 966 &{$nsProcess}(\@_); 967 &{$OLD_startTag}; 968 }; 969 970 971 # 972 # Empty tag, with NS processing 973 # 974 $self->{EMPTYTAG} = sub { 975 unless ($unsafe) { 976 _checkNSNames(\@_); 977 } 978 &{$pushState}(); 979 &{$nsProcess}(\@_); 980 &{$OLD_emptyTag}; 981 &{$popState}(); 982 }; 983 984 985 # 986 # End tag, with NS processing 987 # 988 $self->{ENDTAG} = sub { 989 my $name = $_[0]; 990 if (ref($_[0]) eq 'ARRAY') { 991 my $pfx = $nsDecls->{$_[0]->[0]}; 992 if ($pfx) { 993 $_[0] = $pfx . ':' . $_[0]->[1]; 994 } else { 995 $_[0] = $_[0]->[1]; 996 } 997 } else { 998 $_[0] = $_[0]; 999 } 1000# &{$nsProcess}(\@_); 1001 &{$OLD_endTag}; 1002 &{$popState}(); 1003 }; 1004 1005 1006 # 1007 # Processing instruction, but only if not UNSAFE. 1008 # 1009 unless ($unsafe) { 1010 my $OLD_pi = $self->{PI}; 1011 $self->{PI} = sub { 1012 my $target = $_[0]; 1013 if (index($target, ':') >= 0) { 1014 croak "PI target '$target' contains a colon."; 1015 } 1016 &{$OLD_pi}; 1017 } 1018 }; 1019 1020 1021 # 1022 # Add a prefix to the prefix map. 1023 # 1024 $self->{ADDPREFIX} = sub { 1025 my ($uri, $prefix) = (@_); 1026 if ($prefix) { 1027 $prefixMap{$uri} = $prefix; 1028 } else { 1029 if (defined($defaultPrefix)) { 1030 delete($prefixMap{$defaultPrefix}); 1031 } 1032 $defaultPrefix = $uri; 1033 } 1034 }; 1035 1036 1037 # 1038 # Remove a prefix from the prefix map. 1039 # 1040 $self->{REMOVEPREFIX} = sub { 1041 my ($uri) = (@_); 1042 if ($defaultPrefix && ($defaultPrefix eq $uri)) { 1043 $defaultPrefix = undef; 1044 } 1045 delete $prefixMap{$uri}; 1046 }; 1047 1048 1049 # 1050 # Bless and return the object. 1051 # 1052 return bless $self, $class; 1053} 1054 1055 1056# 1057# Add a preferred prefix for a namespace URI. 1058# 1059sub addPrefix { 1060 my $self = shift; 1061 return &{$self->{ADDPREFIX}}; 1062} 1063 1064 1065# 1066# Remove a preferred prefix for a namespace URI. 1067# 1068sub removePrefix { 1069 my $self = shift; 1070 return &{$self->{REMOVEPREFIX}}; 1071} 1072 1073 1074# 1075# Check names. 1076# 1077sub _checkNSNames { 1078 my $names = $_[0]; 1079 my $i = 1; 1080 my $name = $names->[0]; 1081 1082 # Check the element name. 1083 if (ref($name) eq 'ARRAY') { 1084 if (index($name->[1], ':') >= 0) { 1085 croak("Local part of element name '" . 1086 $name->[1] . 1087 "' contains a colon."); 1088 } 1089 } elsif (index($name, ':') >= 0) { 1090 croak("Element name '$name' contains a colon."); 1091 } 1092 1093 # Check the attribute names. 1094 while ($names->[$i]) { 1095 my $name = $names->[$i]; 1096 if (ref($name) eq 'ARRAY') { 1097 my $local = $name->[1]; 1098 if (index($local, ':') >= 0) { 1099 croak "Local part of attribute name '$local' contains a colon."; 1100 } 1101 } else { 1102 if ($name =~ /^xmlns/) { 1103 croak "Attribute name '$name' begins with 'xmlns'"; 1104 } elsif (index($name, ':') >= 0) { 1105 croak "Attribute name '$name' contains ':'"; 1106 } 1107 } 1108 $i += 2; 1109 } 1110} 1111 1112sub forceNSDecl 1113{ 1114 my $self = shift; 1115 return &{$self->{FORCENSDECL}}; 1116} 1117 1118 1119package XML::Writer::_String; 1120 1121# Internal class, behaving sufficiently like an IO::Handle, 1122# that stores written output in a string 1123# 1124# Heavily inspired by Simon Oliver's XML::Writer::String 1125 1126sub new 1127{ 1128 my $class = shift; 1129 my $scalar_ref = shift; 1130 return bless($scalar_ref, $class); 1131} 1132 1133sub print 1134{ 1135 ${(shift)} .= join('', @_); 1136 return 1; 1137} 1138 11391; 1140__END__ 1141 1142######################################################################## 1143# POD Documentation 1144######################################################################## 1145 1146=head1 NAME 1147 1148XML::Writer - Perl extension for writing XML documents. 1149 1150=head1 SYNOPSIS 1151 1152 use XML::Writer; 1153 use IO::File; 1154 1155 my $output = new IO::File(">output.xml"); 1156 1157 my $writer = new XML::Writer(OUTPUT => $output); 1158 $writer->startTag("greeting", 1159 "class" => "simple"); 1160 $writer->characters("Hello, world!"); 1161 $writer->endTag("greeting"); 1162 $writer->end(); 1163 $output->close(); 1164 1165 1166=head1 DESCRIPTION 1167 1168XML::Writer is a helper module for Perl programs that write an XML 1169document. The module handles all escaping for attribute values and 1170character data and constructs different types of markup, such as tags, 1171comments, and processing instructions. 1172 1173By default, the module performs several well-formedness checks to 1174catch errors during output. This behaviour can be extremely useful 1175during development and debugging, but it can be turned off for 1176production-grade code. 1177 1178The module can operate either in regular mode in or Namespace 1179processing mode. In Namespace mode, the module will generate 1180Namespace Declarations itself, and will perform additional checks on 1181the output. 1182 1183Additional support is available for a simplified data mode with no 1184mixed content: newlines are automatically inserted around elements and 1185elements can optionally be indented based as their nesting level. 1186 1187 1188=head1 METHODS 1189 1190=head2 Writing XML 1191 1192=over 4 1193 1194=item new([$params]) 1195 1196Create a new XML::Writer object: 1197 1198 my $writer = new XML::Writer(OUTPUT => $output, NEWLINES => 1); 1199 1200Arguments are an anonymous hash array of parameters: 1201 1202=over 4 1203 1204=item OUTPUT 1205 1206An object blessed into IO::Handle or one of its subclasses (such as 1207IO::File), or a reference to a string; if this parameter is not present, 1208the module will write to standard output. If a string reference is passed, 1209it will capture the generated XML (as a string; to get bytes use the 1210C<Encode> module). 1211 1212=item NAMESPACES 1213 1214A true (1) or false (0, undef) value; if this parameter is present and 1215its value is true, then the module will accept two-member array 1216reference in the place of element and attribute names, as in the 1217following example: 1218 1219 my $rdfns = "http://www.w3.org/1999/02/22-rdf-syntax-ns#"; 1220 my $writer = new XML::Writer(NAMESPACES => 1); 1221 $writer->startTag([$rdfns, "Description"]); 1222 1223The first member of the array is a namespace URI, and the second part 1224is the local part of a qualified name. The module will automatically 1225generate appropriate namespace declarations and will replace the URI 1226part with a prefix. 1227 1228=item PREFIX_MAP 1229 1230A hash reference; if this parameter is present and the module is 1231performing namespace processing (see the NAMESPACES parameter), then 1232the module will use this hash to look up preferred prefixes for 1233namespace URIs: 1234 1235 1236 my $rdfns = "http://www.w3.org/1999/02/22-rdf-syntax-ns#"; 1237 my $writer = new XML::Writer(NAMESPACES => 1, 1238 PREFIX_MAP => {$rdfns => 'rdf'}); 1239 1240The keys in the hash table are namespace URIs, and the values are the 1241associated prefixes. If there is not a preferred prefix for the 1242namespace URI in this hash, then the module will automatically 1243generate prefixes of the form "__NS1", "__NS2", etc. 1244 1245To set the default namespace, use '' for the prefix. 1246 1247=item FORCED_NS_DECLS 1248 1249An array reference; if this parameter is present, the document element 1250will contain declarations for all the given namespace URIs. 1251Declaring namespaces in advance is particularly useful when a large 1252number of elements from a namespace are siblings, but don't share a direct 1253ancestor from the same namespace. 1254 1255=item NEWLINES 1256 1257A true or false value; if this parameter is present and its value is 1258true, then the module will insert an extra newline before the closing 1259delimiter of start, end, and empty tags to guarantee that the document 1260does not end up as a single, long line. If the paramter is not 1261present, the module will not insert the newlines. 1262 1263=item UNSAFE 1264 1265A true or false value; if this parameter is present and its value is 1266true, then the module will skip most well-formedness error checking. 1267If the parameter is not present, the module will perform the 1268well-formedness error checking by default. Turn off error checking at 1269your own risk! 1270 1271=item DATA_MODE 1272 1273A true or false value; if this parameter is present and its value is 1274true, then the module will enter a special data mode, inserting 1275newlines automatically around elements and (unless UNSAFE is also 1276specified) reporting an error if any element has both characters and 1277elements as content. 1278 1279=item DATA_INDENT 1280 1281A numeric value; if this parameter is present, it represents the 1282indent step for elements in data mode (it will be ignored when not in 1283data mode). 1284 1285=item ENCODING 1286 1287A character encoding; currently this must be one of 'utf-8' or 'us-ascii'. 1288If present, it will be used for the underlying character encoding and as the 1289default in the XML declaration. 1290 1291=back 1292 1293=item end() 1294 1295Finish creating an XML document. This method will check that the 1296document has exactly one document element, and that all start tags are 1297closed: 1298 1299 $writer->end(); 1300 1301=item xmlDecl([$encoding, $standalone]) 1302 1303Add an XML declaration to the beginning of an XML document. The 1304version will always be "1.0". If you provide a non-null encoding or 1305standalone argument, its value will appear in the declaration (any 1306non-null value for standalone except 'no' will automatically be 1307converted to 'yes'). If not given here, the encoding will be taken from the 1308ENCODING argument. Pass the empty string to suppress this behaviour. 1309 1310 $writer->xmlDecl("UTF-8"); 1311 1312=item doctype($name, [$publicId, $systemId]) 1313 1314Add a DOCTYPE declaration to an XML document. The declaration must 1315appear before the beginning of the root element. If you provide a 1316publicId, you must provide a systemId as well, but you may provide 1317just a system ID by passing 'undef' for the publicId. 1318 1319 $writer->doctype("html"); 1320 1321=item comment($text) 1322 1323Add a comment to an XML document. If the comment appears outside the 1324document element (either before the first start tag or after the last 1325end tag), the module will add a carriage return after it to improve 1326readability. In data mode, comments will be treated as empty tags: 1327 1328 $writer->comment("This is a comment"); 1329 1330=item pi($target [, $data]) 1331 1332Add a processing instruction to an XML document: 1333 1334 $writer->pi('xml-stylesheet', 'href="style.css" type="text/css"'); 1335 1336If the processing instruction appears outside the document element 1337(either before the first start tag or after the last end tag), the 1338module will add a carriage return after it to improve readability. 1339 1340The $target argument must be a single XML name. If you provide the 1341$data argument, the module will insert its contents following the 1342$target argument, separated by a single space. 1343 1344=item startTag($name [, $aname1 => $value1, ...]) 1345 1346Add a start tag to an XML document. Any arguments after the element 1347name are assumed to be name/value pairs for attributes: the module 1348will escape all '&', '<', '>', and '"' characters in the attribute 1349values using the predefined XML entities: 1350 1351 $writer->startTag('doc', 'version' => '1.0', 1352 'status' => 'draft', 1353 'topic' => 'AT&T'); 1354 1355All start tags must eventually have matching end tags. 1356 1357=item emptyTag($name [, $aname1 => $value1, ...]) 1358 1359Add an empty tag to an XML document. Any arguments after the element 1360name are assumed to be name/value pairs for attributes (see startTag() 1361for details): 1362 1363 $writer->emptyTag('img', 'src' => 'portrait.jpg', 1364 'alt' => 'Portrait of Emma.'); 1365 1366=item endTag([$name]) 1367 1368Add an end tag to an XML document. The end tag must match the closest 1369open start tag, and there must be a matching and properly-nested end 1370tag for every start tag: 1371 1372 $writer->endTag('doc'); 1373 1374If the $name argument is omitted, then the module will automatically 1375supply the name of the currently open element: 1376 1377 $writer->startTag('p'); 1378 $writer->endTag(); 1379 1380=item dataElement($name, $data [, $aname1 => $value1, ...]) 1381 1382Print an entire element containing only character data. This is 1383equivalent to 1384 1385 $writer->startTag($name [, $aname1 => $value1, ...]); 1386 $writer->characters($data); 1387 $writer->endTag($name); 1388 1389=item characters($data) 1390 1391Add character data to an XML document. All '<', '>', and '&' 1392characters in the $data argument will automatically be escaped using 1393the predefined XML entities: 1394 1395 $writer->characters("Here is the formula: "); 1396 $writer->characters("a < 100 && a > 5"); 1397 1398You may invoke this method only within the document element 1399(i.e. after the first start tag and before the last end tag). 1400 1401In data mode, you must not use this method to add whitespace between 1402elements. 1403 1404=item raw($data) 1405 1406Print data completely unquoted and unchecked to the XML document. For 1407example C<raw('<')> will print a literal < character. This 1408necessarily bypasses all well-formedness checking, and is therefore 1409only available in unsafe mode. 1410 1411This can sometimes be useful for printing entities which are defined 1412for your XML format but the module doesn't know about, for example 1413 for XHTML. 1414 1415=item cdata($data) 1416 1417As C<characters()> but writes the data quoted in a CDATA section, that 1418is, between <![CDATA[ and ]]>. If the data to be written itself 1419contains ]]>, it will be written as several consecutive CDATA 1420sections. 1421 1422=item cdataElement($name, $data [, $aname1 => $value1, ...]) 1423 1424As C<dataElement()> but the element content is written as one or more 1425CDATA sections (see C<cdata()>). 1426 1427=item setOutput($output) 1428 1429Set the current output destination, as in the OUTPUT parameter for the 1430constructor. 1431 1432=item getOutput() 1433 1434Return the current output destination, as in the OUTPUT parameter for 1435the constructor. 1436 1437=item setDataMode($mode) 1438 1439Enable or disable data mode, as in the DATA_MODE parameter for the 1440constructor. 1441 1442=item getDataMode() 1443 1444Return the current data mode, as in the DATA_MODE parameter for the 1445constructor. 1446 1447=item setDataIndent($step) 1448 1449Set the indent step for data mode, as in the DATA_INDENT parameter for 1450the constructor. 1451 1452=item getDataIndent() 1453 1454Return the indent step for data mode, as in the DATA_INDENT parameter 1455for the constructor. 1456 1457 1458=back 1459 1460=head2 Querying XML 1461 1462=over 4 1463 1464=item in_element($name) 1465 1466Return a true value if the most recent open element matches $name: 1467 1468 if ($writer->in_element('dl')) { 1469 $writer->startTag('dt'); 1470 } else { 1471 $writer->startTag('li'); 1472 } 1473 1474=item within_element($name) 1475 1476Return a true value if any open element matches $name: 1477 1478 if ($writer->within_element('body')) { 1479 $writer->startTag('h1'); 1480 } else { 1481 $writer->startTag('title'); 1482 } 1483 1484=item current_element() 1485 1486Return the name of the currently open element: 1487 1488 my $name = $writer->current_element(); 1489 1490This is the equivalent of 1491 1492 my $name = $writer->ancestor(0); 1493 1494=item ancestor($n) 1495 1496Return the name of the nth ancestor, where $n=0 for the current open 1497element. 1498 1499=back 1500 1501 1502=head2 Additional Namespace Support 1503 1504As of 0.510, these methods may be used while writing a document. 1505 1506=over 4 1507 1508=item addPrefix($uri, $prefix) 1509 1510Add a preferred mapping between a Namespace URI and a prefix. See 1511also the PREFIX_MAP constructor parameter. 1512 1513To set the default namespace, omit the $prefix parameter or set it to 1514''. 1515 1516=item removePrefix($uri) 1517 1518Remove a preferred mapping between a Namespace URI and a prefix. 1519 1520=item forceNSDecl($uri) 1521 1522Indicate that a namespace declaration for this URI should be included 1523with the next element to be started. 1524 1525=back 1526 1527 1528=head1 ERROR REPORTING 1529 1530With the default settings, the XML::Writer module can detect several 1531basic XML well-formedness errors: 1532 1533=over 4 1534 1535=item * 1536 1537Lack of a (top-level) document element, or multiple document elements. 1538 1539=item * 1540 1541Unclosed start tags. 1542 1543=item * 1544 1545Misplaced delimiters in the contents of processing instructions or 1546comments. 1547 1548=item * 1549 1550Misplaced or duplicate XML declaration(s). 1551 1552=item * 1553 1554Misplaced or duplicate DOCTYPE declaration(s). 1555 1556=item * 1557 1558Mismatch between the document type name in the DOCTYPE declaration and 1559the name of the document element. 1560 1561=item * 1562 1563Mismatched start and end tags. 1564 1565=item * 1566 1567Attempts to insert character data outside the document element. 1568 1569=item * 1570 1571Duplicate attributes with the same name. 1572 1573=back 1574 1575During Namespace processing, the module can detect the following 1576additional errors: 1577 1578=over 4 1579 1580=item * 1581 1582Attempts to use PI targets or element or attribute names containing a 1583colon. 1584 1585=item * 1586 1587Attempts to use attributes with names beginning "xmlns". 1588 1589=back 1590 1591To ensure full error detection, a program must also invoke the end 1592method when it has finished writing a document: 1593 1594 $writer->startTag('greeting'); 1595 $writer->characters("Hello, world!"); 1596 $writer->endTag('greeting'); 1597 $writer->end(); 1598 1599This error reporting can catch many hidden bugs in Perl programs that 1600create XML documents; however, if necessary, it can be turned off by 1601providing an UNSAFE parameter: 1602 1603 my $writer = new XML::Writer(OUTPUT => $output, UNSAFE => 1); 1604 1605 1606=head1 AUTHOR 1607 1608David Megginson E<lt>david@megginson.comE<gt> 1609 1610 1611=head1 COPYRIGHT 1612 1613Copyright 1999, 2000 David Megginson E<lt>david@megginson.comE<gt> 1614 1615Copyright 2004, 2005 Joseph Walton E<lt>joe@kafsemo.orgE<gt> 1616 1617 1618=head1 SEE ALSO 1619 1620XML::Parser 1621 1622=cut 1623