1########################################################################
2# Writer.pm - write an XML document.
3# Copyright (c) 1999 by Megginson Technologies.
4# Copyright (c) 2004, 2005 by Joseph Walton <joe@kafsemo.org>.
5# No warranty.  Commercial and non-commercial use freely permitted.
6#
7# $Id: Writer.pm,v 1.48 2005/06/30 22:17:04 josephw Exp $
8########################################################################
9
10package XML::Writer;
11
12require 5.004;
13
14use strict;
15use vars qw($VERSION);
16use Carp;
17use IO::Handle;
18$VERSION = "0.600";
19
20
21
22########################################################################
23# Constructor.
24########################################################################
25
26#
27# Public constructor.
28#
29# This actually does most of the work of the module: it defines closures
30# for all of the real processing, and selects the appropriate closures
31# to use based on the value of the UNSAFE parameter.  The actual methods
32# are just stubs.
33#
34sub new {
35  my ($class, %params) = (@_);
36
37                                # If the user wants namespaces,
38                                # intercept the request here; it will
39                                # come back to this constructor
40                                # from within XML::Writer::Namespaces::new()
41  if ($params{NAMESPACES}) {
42    delete $params{NAMESPACES};
43    return new XML::Writer::Namespaces(%params);
44  }
45
46                                # Set up $self and basic parameters
47  my $self;
48  my $output;
49  my $unsafe = $params{UNSAFE};
50  my $newlines = $params{NEWLINES};
51  my $dataMode = $params{DATA_MODE};
52  my $dataIndent = $params{DATA_INDENT} || 0;
53
54                                # If the NEWLINES parameter is specified,
55                                # set the $nl variable appropriately
56  my $nl = '';
57  if ($newlines) {
58    $nl = "\n";
59  }
60
61  my $outputEncoding = $params{ENCODING};
62  my ($checkUnencodedRepertoire, $escapeEncoding);
63  if (lc($outputEncoding) eq 'us-ascii') {
64    $checkUnencodedRepertoire = \&_croakUnlessASCII;
65    $escapeEncoding = \&_escapeASCII;
66  } else {
67    my $doNothing = sub {};
68    $checkUnencodedRepertoire = $doNothing;
69    $escapeEncoding = $doNothing;
70  }
71
72                                # Parse variables
73  my @elementStack = ();
74  my $elementLevel = 0;
75  my %seen = ();
76
77  my $hasData = 0;
78  my @hasDataStack = ();
79  my $hasElement = 0;
80  my @hasElementStack = ();
81  my $hasHeading = 0; # Does this document have anything before the first element?
82
83  #
84  # Private method to show attributes.
85  #
86  my $showAttributes = sub {
87    my $atts = $_[0];
88    my $i = 1;
89    while ($atts->[$i]) {
90      my $aname = $atts->[$i++];
91      my $value = _escapeLiteral($atts->[$i++]);
92      $value =~ s/\x0a/\&#10\;/g;
93      &{$escapeEncoding}($value);
94      $output->print(" $aname=\"$value\"");
95    }
96  };
97
98                                # Method implementations: the SAFE_
99                                # versions perform error checking
100                                # and then call the regular ones.
101  my $end = sub {
102    $output->print("\n");
103  };
104
105  my $SAFE_end = sub {
106    if (!$seen{ELEMENT}) {
107      croak("Document cannot end without a document element");
108    } elsif ($elementLevel > 0) {
109      croak("Document ended with unmatched start tag(s): @elementStack");
110    } else {
111      @elementStack = ();
112      $elementLevel = 0;
113      %seen = ();
114      &{$end};
115    }
116  };
117
118  my $xmlDecl = sub {
119    my ($encoding, $standalone) = (@_);
120    if ($standalone && $standalone ne 'no') {
121      $standalone = 'yes';
122    }
123
124    # Only include an encoding if one has been explicitly supplied,
125    #  either here or on construction. Allow the empty string
126    #  to suppress it.
127    if (!defined($encoding)) {
128      $encoding = $outputEncoding;
129    }
130    $output->print("<?xml version=\"1.0\"");
131    if ($encoding) {
132      $output->print(" encoding=\"$encoding\"");
133    }
134    if ($standalone) {
135      $output->print(" standalone=\"$standalone\"");
136    }
137    $output->print("?>\n");
138    $hasHeading = 1;
139  };
140
141  my $SAFE_xmlDecl = sub {
142    if ($seen{ANYTHING}) {
143      croak("The XML declaration is not the first thing in the document");
144    } else {
145      $seen{ANYTHING} = 1;
146      $seen{XMLDECL} = 1;
147      &{$xmlDecl};
148    }
149  };
150
151  my $pi = sub {
152    my ($target, $data) = (@_);
153    if ($data) {
154      $output->print("<?$target $data?>");
155    } else {
156      $output->print("<?$target?>");
157    }
158    if ($elementLevel == 0) {
159      $output->print("\n");
160      $hasHeading = 1;
161    }
162  };
163
164  my $SAFE_pi = sub {
165    my ($name, $data) = (@_);
166    $seen{ANYTHING} = 1;
167    if (($name =~ /^xml/i) && ($name !~ /^xml-stylesheet$/i)) {
168      carp("Processing instruction target begins with 'xml'");
169    }
170
171    if ($name =~ /\?\>/ || (defined($data) && $data =~ /\?\>/)) {
172      croak("Processing instruction may not contain '?>'");
173    } elsif ($name =~ /\s/) {
174      croak("Processing instruction name may not contain whitespace");
175    } else {
176      &{$pi};
177    }
178  };
179
180  my $comment = sub {
181    my $data = $_[0];
182    if ($dataMode && $elementLevel) {
183      $output->print("\n");
184      $output->print(" " x ($elementLevel * $dataIndent));
185    }
186    $output->print("<!-- $data -->");
187    if ($dataMode && $elementLevel) {
188      $hasElement = 1;
189    } elsif ($elementLevel == 0) {
190      $output->print("\n");
191      $hasHeading = 1;
192    }
193  };
194
195  my $SAFE_comment = sub {
196    my $data = $_[0];
197    if ($data =~ /--/) {
198      carp("Interoperability problem: \"--\" in comment text");
199    }
200
201    if ($data =~ /-->/) {
202      croak("Comment may not contain '-->'");
203    } else {
204      &{$checkUnencodedRepertoire}($data);
205      $seen{ANYTHING} = 1;
206      &{$comment};
207    }
208  };
209
210  my $doctype = sub {
211    my ($name, $publicId, $systemId) = (@_);
212    $output->print("<!DOCTYPE $name");
213    if ($publicId) {
214      unless ($systemId) {
215        croak("A DOCTYPE declaration with a public ID must also have a system ID");
216      }
217      $output->print(" PUBLIC \"$publicId\" \"$systemId\"");
218    } elsif ($systemId) {
219      $output->print(" SYSTEM \"$systemId\"");
220    }
221    $output->print(">\n");
222    $hasHeading = 1;
223  };
224
225  my $SAFE_doctype = sub {
226    my $name = $_[0];
227    if ($seen{DOCTYPE}) {
228      croak("Attempt to insert second DOCTYPE declaration");
229    } elsif ($seen{ELEMENT}) {
230      croak("The DOCTYPE declaration must come before the first start tag");
231    } else {
232      $seen{ANYTHING} = 1;
233      $seen{DOCTYPE} = $name;
234      &{$doctype};
235    }
236  };
237
238  my $startTag = sub {
239    my $name = $_[0];
240    if ($dataMode && ($hasHeading || $elementLevel)) {
241      $output->print("\n");
242      $output->print(" " x ($elementLevel * $dataIndent));
243    }
244    $elementLevel++;
245    push @elementStack, $name;
246    $output->print("<$name");
247    &{$showAttributes}(\@_);
248    $output->print("$nl>");
249    if ($dataMode) {
250      $hasElement = 1;
251      push @hasDataStack, $hasData;
252      $hasData = 0;
253      push @hasElementStack, $hasElement;
254      $hasElement = 0;
255    }
256  };
257
258  my $SAFE_startTag = sub {
259    my $name = $_[0];
260
261    &{$checkUnencodedRepertoire}($name);
262    _checkAttributes(\@_);
263
264    if ($seen{ELEMENT} && $elementLevel == 0) {
265      croak("Attempt to insert start tag after close of document element");
266    } elsif ($elementLevel == 0 && $seen{DOCTYPE} && $name ne $seen{DOCTYPE}) {
267      croak("Document element is \"$name\", but DOCTYPE is \""
268            . $seen{DOCTYPE}
269            . "\"");
270    } elsif ($dataMode && $hasData) {
271      croak("Mixed content not allowed in data mode: element $name");
272    } else {
273      $seen{ANYTHING} = 1;
274      $seen{ELEMENT} = 1;
275      &{$startTag};
276    }
277  };
278
279  my $emptyTag = sub {
280    my $name = $_[0];
281    if ($dataMode && ($hasHeading || $elementLevel)) {
282      $output->print("\n");
283      $output->print(" " x ($elementLevel * $dataIndent));
284    }
285    $output->print("<$name");
286    &{$showAttributes}(\@_);
287    $output->print("$nl />");
288    if ($dataMode) {
289      $hasElement = 1;
290    }
291  };
292
293  my $SAFE_emptyTag = sub {
294    my $name = $_[0];
295
296    &{$checkUnencodedRepertoire}($name);
297    _checkAttributes(\@_);
298
299    if ($seen{ELEMENT} && $elementLevel == 0) {
300      croak("Attempt to insert empty tag after close of document element");
301    } elsif ($elementLevel == 0 && $seen{DOCTYPE} && $name ne $seen{DOCTYPE}) {
302      croak("Document element is \"$name\", but DOCTYPE is \""
303            . $seen{DOCTYPE}
304            . "\"");
305    } elsif ($dataMode && $hasData) {
306      croak("Mixed content not allowed in data mode: element $name");
307    } else {
308      $seen{ANYTHING} = 1;
309      $seen{ELEMENT} = 1;
310      &{$emptyTag};
311    }
312  };
313
314  my $endTag = sub {
315    my $name = $_[0];
316    my $currentName = pop @elementStack;
317    $name = $currentName unless $name;
318    $elementLevel--;
319    if ($dataMode && $hasElement) {
320      $output->print("\n");
321      $output->print(" " x ($elementLevel * $dataIndent));
322    }
323    $output->print("</$name$nl>");
324    if ($dataMode) {
325      $hasData = pop @hasDataStack;
326      $hasElement = pop @hasElementStack;
327    }
328  };
329
330  my $SAFE_endTag = sub {
331    my $name = $_[0];
332    my $oldName = $elementStack[$#elementStack];
333    if ($elementLevel <= 0) {
334      croak("End tag \"$name\" does not close any open element");
335    } elsif ($name && ($name ne $oldName)) {
336      croak("Attempt to end element \"$oldName\" with \"$name\" tag");
337    } else {
338      &{$endTag};
339    }
340  };
341
342  my $characters = sub {
343    my $data = $_[0];
344    if ($data =~ /[\&\<\>]/) {
345      $data =~ s/\&/\&amp\;/g;
346      $data =~ s/\</\&lt\;/g;
347      $data =~ s/\>/\&gt\;/g;
348    }
349    &{$escapeEncoding}($data);
350    $output->print($data);
351    $hasData = 1;
352  };
353
354  my $SAFE_characters = sub {
355    if ($elementLevel < 1) {
356      croak("Attempt to insert characters outside of document element");
357    } elsif ($dataMode && $hasElement) {
358      croak("Mixed content not allowed in data mode: characters");
359    } else {
360      _croakUnlessDefinedCharacters($_[0]);
361      &{$characters};
362    }
363  };
364
365  my $raw = sub {
366    $output->print($_[0]);
367    # Don't set $hasData or any other information: we know nothing
368    # about what was just written.
369    #
370  };
371
372  my $SAFE_raw = sub {
373    croak('raw() is only available when UNSAFE is set');
374  };
375
376  my $cdata = sub {
377      my $data = $_[0];
378      $data    =~ s/\]\]>/\]\]\]\]><!\[CDATA\[>/g;
379      $output->print("<![CDATA[$data]]>");
380      $hasData = 1;
381  };
382
383  my $SAFE_cdata = sub {
384    if ($elementLevel < 1) {
385      croak("Attempt to insert characters outside of document element");
386    } elsif ($dataMode && $hasElement) {
387      croak("Mixed content not allowed in data mode: characters");
388    } else {
389      _croakUnlessDefinedCharacters($_[0]);
390      &{$checkUnencodedRepertoire}($_[0]);
391      &{$cdata};
392    }
393  };
394
395                                # Assign the correct closures based on
396                                # the UNSAFE parameter
397  if ($unsafe) {
398    $self = {'END' => $end,
399             'XMLDECL' => $xmlDecl,
400             'PI' => $pi,
401             'COMMENT' => $comment,
402             'DOCTYPE' => $doctype,
403             'STARTTAG' => $startTag,
404             'EMPTYTAG' => $emptyTag,
405             'ENDTAG' => $endTag,
406             'CHARACTERS' => $characters,
407             'RAW' => $raw,
408             'CDATA' => $cdata
409            };
410  } else {
411    $self = {'END' => $SAFE_end,
412             'XMLDECL' => $SAFE_xmlDecl,
413             'PI' => $SAFE_pi,
414             'COMMENT' => $SAFE_comment,
415             'DOCTYPE' => $SAFE_doctype,
416             'STARTTAG' => $SAFE_startTag,
417             'EMPTYTAG' => $SAFE_emptyTag,
418             'ENDTAG' => $SAFE_endTag,
419             'CHARACTERS' => $SAFE_characters,
420             'RAW' => $SAFE_raw,               # This will intentionally fail
421             'CDATA' => $SAFE_cdata
422            };
423  }
424
425                                # Query methods
426  $self->{'IN_ELEMENT'} = sub {
427    my ($ancestor) = (@_);
428    return $elementStack[$#elementStack] eq $ancestor;
429  };
430
431  $self->{'WITHIN_ELEMENT'} = sub {
432    my ($ancestor) = (@_);
433    my $el;
434    foreach $el (@elementStack) {
435      return 1 if $el eq $ancestor;
436    }
437    return 0;
438  };
439
440  $self->{'CURRENT_ELEMENT'} = sub {
441    return $elementStack[$#elementStack];
442  };
443
444  $self->{'ANCESTOR'} = sub {
445    my ($n) = (@_);
446    if ($n < scalar(@elementStack)) {
447      return $elementStack[$#elementStack-$n];
448    } else {
449      return undef;
450    }
451  };
452
453                                # Set and get the output destination.
454  $self->{'GETOUTPUT'} = sub {
455    return $output;
456  };
457
458  $self->{'SETOUTPUT'} = sub {
459    my $newOutput = $_[0];
460
461    if (ref($newOutput) eq 'SCALAR') {
462      $output = new XML::Writer::_String($newOutput);
463    } else {
464                                # If there is no OUTPUT parameter,
465                                # use standard output
466      $output = $newOutput || \*STDOUT;
467      if ($outputEncoding) {
468        if (lc($outputEncoding) eq 'utf-8') {
469          binmode($output, ':encoding(utf-8)');
470        } elsif (lc($outputEncoding) eq 'us-ascii') {
471          binmode($output, ':encoding(us-ascii)');
472        } else {
473          die 'The only supported encodings are utf-8 and us-ascii';
474        }
475      }
476    }
477  };
478
479  $self->{'SETDATAMODE'} = sub {
480    $dataMode = $_[0];
481  };
482
483  $self->{'GETDATAMODE'} = sub {
484    return $dataMode;
485  };
486
487  $self->{'SETDATAINDENT'} = sub {
488    $dataIndent = $_[0];
489  };
490
491  $self->{'GETDATAINDENT'} = sub {
492    return $dataIndent;
493  };
494
495                                # Set the output.
496  &{$self->{'SETOUTPUT'}}($params{'OUTPUT'});
497
498                                # Return the blessed object.
499  return bless $self, $class;
500}
501
502
503
504########################################################################
505# Public methods
506########################################################################
507
508#
509# Finish writing the document.
510#
511sub end {
512  my $self = shift;
513  &{$self->{END}};
514}
515
516#
517# Write an XML declaration.
518#
519sub xmlDecl {
520  my $self = shift;
521  &{$self->{XMLDECL}};
522}
523
524#
525# Write a processing instruction.
526#
527sub pi {
528  my $self = shift;
529  &{$self->{PI}};
530}
531
532#
533# Write a comment.
534#
535sub comment {
536  my $self = shift;
537  &{$self->{COMMENT}};
538}
539
540#
541# Write a DOCTYPE declaration.
542#
543sub doctype {
544  my $self = shift;
545  &{$self->{DOCTYPE}};
546}
547
548#
549# Write a start tag.
550#
551sub startTag {
552  my $self = shift;
553  &{$self->{STARTTAG}};
554}
555
556#
557# Write an empty tag.
558#
559sub emptyTag {
560  my $self = shift;
561  &{$self->{EMPTYTAG}};
562}
563
564#
565# Write an end tag.
566#
567sub endTag {
568  my $self = shift;
569  &{$self->{ENDTAG}};
570}
571
572#
573# Write a simple data element.
574#
575sub dataElement {
576  my ($self, $name, $data, %atts) = (@_);
577  $self->startTag($name, %atts);
578  $self->characters($data);
579  $self->endTag($name);
580}
581
582#
583# Write a simple CDATA element.
584#
585sub cdataElement {
586    my ($self, $name, $data, %atts) = (@_);
587    $self->startTag($name, %atts);
588    $self->cdata($data);
589    $self->endTag($name);
590}
591
592#
593# Write character data.
594#
595sub characters {
596  my $self = shift;
597  &{$self->{CHARACTERS}};
598}
599
600#
601# Write raw, unquoted, completely unchecked character data.
602#
603sub raw {
604  my $self = shift;
605  &{$self->{RAW}};
606}
607
608#
609# Write CDATA.
610#
611sub cdata {
612    my $self = shift;
613    &{$self->{CDATA}};
614}
615
616#
617# Query the current element.
618#
619sub in_element {
620  my $self = shift;
621  return &{$self->{IN_ELEMENT}};
622}
623
624#
625# Query the ancestors.
626#
627sub within_element {
628  my $self = shift;
629  return &{$self->{WITHIN_ELEMENT}};
630}
631
632#
633# Get the name of the current element.
634#
635sub current_element {
636  my $self = shift;
637  return &{$self->{CURRENT_ELEMENT}};
638}
639
640#
641# Get the name of the numbered ancestor (zero-based).
642#
643sub ancestor {
644  my $self = shift;
645  return &{$self->{ANCESTOR}};
646}
647
648#
649# Get the current output destination.
650#
651sub getOutput {
652  my $self = shift;
653  return &{$self->{GETOUTPUT}};
654}
655
656
657#
658# Set the current output destination.
659#
660sub setOutput {
661  my $self = shift;
662  return &{$self->{SETOUTPUT}};
663}
664
665#
666# Set the current data mode (true or false).
667#
668sub setDataMode {
669  my $self = shift;
670  return &{$self->{SETDATAMODE}};
671}
672
673
674#
675# Get the current data mode (true or false).
676#
677sub getDataMode {
678  my $self = shift;
679  return &{$self->{GETDATAMODE}};
680}
681
682
683#
684# Set the current data indent step.
685#
686sub setDataIndent {
687  my $self = shift;
688  return &{$self->{SETDATAINDENT}};
689}
690
691
692#
693# Get the current data indent step.
694#
695sub getDataIndent {
696  my $self = shift;
697  return &{$self->{GETDATAINDENT}};
698}
699
700
701#
702# Empty stub.
703#
704sub addPrefix {
705}
706
707
708#
709# Empty stub.
710#
711sub removePrefix {
712}
713
714
715
716########################################################################
717# Private functions.
718########################################################################
719
720#
721# Private: check for duplicate attributes and bad characters.
722# Note - this starts at $_[1], because $_[0] is assumed to be an
723# element name.
724#
725sub _checkAttributes {
726  my %anames;
727  my $i = 1;
728  while ($_[0]->[$i]) {
729    my $name = $_[0]->[$i];
730    $i += 1;
731    if ($anames{$name}) {
732      croak("Two attributes named \"$name\"");
733    } else {
734      $anames{$name} = 1;
735    }
736    _croakUnlessDefinedCharacters($_[0]->[$i]);
737    $i += 1;
738  }
739}
740
741#
742# Private: escape an attribute value literal.
743#
744sub _escapeLiteral {
745  my $data = $_[0];
746  if ($data =~ /[\&\<\>\"]/) {
747    $data =~ s/\&/\&amp\;/g;
748    $data =~ s/\</\&lt\;/g;
749    $data =~ s/\>/\&gt\;/g;
750    $data =~ s/\"/\&quot\;/g;
751  }
752  return $data;
753}
754
755sub _escapeASCII($) {
756  $_[0] =~ s/([^\x00-\x7F])/sprintf('&#x%X;', ord($1))/ge;
757}
758
759sub _croakUnlessASCII($) {
760  if ($_[0] =~ /[^\x00-\x7F]/) {
761    croak('Non-ASCII characters are not permitted in this part of a US-ASCII document');
762  }
763}
764
765# Enforce XML 1.0, section 2.2's definition of "Char" (only reject low ASCII,
766#  so as not to require Unicode support from perl)
767sub _croakUnlessDefinedCharacters($) {
768  if ($_[0] =~ /([\x00-\x08\x0B-\x0C\x0E-\x1F])/) {
769    croak(sprintf('Code point \u%04X is not a valid character in XML', ord($1)));
770  }
771}
772
773
774########################################################################
775# XML::Writer::Namespaces - subclass for Namespace processing.
776########################################################################
777
778package XML::Writer::Namespaces;
779use strict;
780use vars qw(@ISA);
781use Carp;
782
783@ISA = qw(XML::Writer);
784
785#
786# Constructor
787#
788sub new {
789  my ($class, %params) = (@_);
790
791  my $unsafe = $params{UNSAFE};
792
793                                # Snarf the prefix map, if any, and
794                                # note the default prefix.
795  my %prefixMap = ();
796  if ($params{PREFIX_MAP}) {
797    %prefixMap = (%{$params{PREFIX_MAP}});
798    delete $params{PREFIX_MAP};
799  }
800  $prefixMap{'http://www.w3.org/XML/1998/namespace'} = 'xml';
801
802                                # Generate the reverse map for URIs
803  my $uriMap = {};
804  my $key;
805  foreach $key (keys(%prefixMap)) {
806    $uriMap->{$prefixMap{$key}} = $key;
807  }
808
809  my $defaultPrefix = $uriMap->{''};
810  delete $prefixMap{$defaultPrefix} if ($defaultPrefix);
811
812                                # Create an instance of the parent.
813  my $self = new XML::Writer(%params);
814
815                                # Snarf the parent's methods that we're
816                                # going to override.
817  my $OLD_startTag = $self->{STARTTAG};
818  my $OLD_emptyTag = $self->{EMPTYTAG};
819  my $OLD_endTag = $self->{ENDTAG};
820
821                                # State variables
822  my @stack;
823  my $prefixCounter = 1;
824  my $nsDecls = {'http://www.w3.org/XML/1998/namespace' => 'xml'};
825  my $nsDefaultDecl = undef;
826  my $nsCopyFlag = 0;
827  my @forcedNSDecls = ();
828
829  if ($params{FORCED_NS_DECLS}) {
830    @forcedNSDecls = @{$params{FORCED_NS_DECLS}};
831    delete $params{FORCED_NS_DECLS};
832  }
833
834  #
835  # Push the current declaration state.
836  #
837  my $pushState = sub {
838    push @stack, [$nsDecls, $nsDefaultDecl, $nsCopyFlag, $uriMap];
839    $nsCopyFlag = 0;
840  };
841
842
843  #
844  # Pop the current declaration state.
845  #
846  my $popState = sub {
847    ($nsDecls, $nsDefaultDecl, $nsCopyFlag, $uriMap) = @{pop @stack};
848  };
849
850  #
851  # Generate a new prefix.
852  #
853  my $genPrefix = sub {
854    my $uri = $_[0];
855    my $prefixCounter = 1;
856    my $prefix = $prefixMap{$uri};
857    my %clashMap = %{$uriMap};
858    while( my ($u, $p) = each(%prefixMap)) {
859      $clashMap{$p} = $u;
860    }
861
862    while (!defined($prefix) || ($clashMap{$prefix} && $clashMap{$prefix} ne $uri)) {
863      $prefix = "__NS$prefixCounter";
864      $prefixCounter++;
865    }
866
867    return $prefix;
868  };
869
870  #
871  # Perform namespace processing on a single name.
872  #
873  my $processName = sub {
874    my ($nameref, $atts, $attFlag) = (@_);
875    my ($uri, $local) = @{$$nameref};
876    my $prefix = $nsDecls->{$uri};
877
878                                # Is this an element name that matches
879                                # the default NS?
880    if (!$attFlag && $defaultPrefix && ($uri eq $defaultPrefix)) {
881      unless ($nsDefaultDecl && ($nsDefaultDecl eq $uri)) {
882        push @{$atts}, 'xmlns';
883        push @{$atts}, $uri;
884        $nsDefaultDecl = $uri;
885      }
886      $$nameref = $local;
887
888      if (defined($uriMap->{''})) {
889        delete ($nsDecls->{$uriMap->{''}});
890      }
891
892      $nsDecls->{$uri} = '';
893      unless ($nsCopyFlag) {
894        $uriMap = {%{$uriMap}};
895        $nsDecls = {%{$nsDecls}};
896        $nsCopyFlag = 1;
897      }
898      $uriMap->{''} = $uri;
899
900                                # Is there a straight-forward prefix?
901    } elsif ($prefix) {
902      $$nameref = "$prefix:$local";
903    } else {
904      $prefix = &{$genPrefix}($uri);
905      unless ($nsCopyFlag) {
906        $uriMap = {%{$uriMap}};
907        $nsDecls = {%{$nsDecls}};
908        $nsCopyFlag = 1;
909      }
910      $uriMap->{$prefix} = $uri;
911      $nsDecls->{$uri} = $prefix;
912      push @{$atts}, "xmlns:$prefix";
913      push @{$atts}, $uri;
914      $$nameref = "$prefix:$local";
915    }
916  };
917
918
919  #
920  # Perform namespace processing on element and attribute names.
921  #
922  my $nsProcess = sub {
923    if (ref($_[0]->[0]) eq 'ARRAY') {
924      &{$processName}(\$_[0]->[0], $_[0], 0);
925    }
926    my $i = 1;
927    while ($_[0]->[$i]) {
928      if (ref($_[0]->[$i]) eq 'ARRAY') {
929        &{$processName}(\$_[0]->[$i], $_[0], 1);
930      }
931      $i += 2;
932    }
933
934    # We do this if any declarations are forced, due either to
935    #  constructor arguments or to a call during processing.
936    if (@forcedNSDecls) {
937      foreach (@forcedNSDecls) {
938        my @dummy = ($_, 'dummy');
939        my $d2 = \@dummy;
940        if ($defaultPrefix && ($_ eq $defaultPrefix)) {
941          &{$processName}(\$d2, $_[0], 0);
942        } else {
943          &{$processName}(\$d2, $_[0], 1);
944        }
945      }
946      @forcedNSDecls = ();
947    }
948  };
949
950
951  # Indicate that a namespace should be declared by the next open element
952  $self->{FORCENSDECL} = sub {
953    push @forcedNSDecls, $_[0];
954  };
955
956
957  #
958  # Start tag, with NS processing
959  #
960  $self->{STARTTAG} = sub {
961    my $name = $_[0];
962    unless ($unsafe) {
963      _checkNSNames(\@_);
964    }
965    &{$pushState}();
966    &{$nsProcess}(\@_);
967    &{$OLD_startTag};
968  };
969
970
971  #
972  # Empty tag, with NS processing
973  #
974  $self->{EMPTYTAG} = sub {
975    unless ($unsafe) {
976      _checkNSNames(\@_);
977    }
978    &{$pushState}();
979    &{$nsProcess}(\@_);
980    &{$OLD_emptyTag};
981    &{$popState}();
982  };
983
984
985  #
986  # End tag, with NS processing
987  #
988  $self->{ENDTAG} = sub {
989    my $name = $_[0];
990    if (ref($_[0]) eq 'ARRAY') {
991      my $pfx = $nsDecls->{$_[0]->[0]};
992      if ($pfx) {
993        $_[0] = $pfx . ':' . $_[0]->[1];
994      } else {
995        $_[0] = $_[0]->[1];
996      }
997    } else {
998      $_[0] = $_[0];
999    }
1000#    &{$nsProcess}(\@_);
1001    &{$OLD_endTag};
1002    &{$popState}();
1003  };
1004
1005
1006  #
1007  # Processing instruction, but only if not UNSAFE.
1008  #
1009  unless ($unsafe) {
1010    my $OLD_pi = $self->{PI};
1011    $self->{PI} = sub {
1012      my $target = $_[0];
1013      if (index($target, ':') >= 0) {
1014        croak "PI target '$target' contains a colon.";
1015      }
1016      &{$OLD_pi};
1017    }
1018  };
1019
1020
1021  #
1022  # Add a prefix to the prefix map.
1023  #
1024  $self->{ADDPREFIX} = sub {
1025    my ($uri, $prefix) = (@_);
1026    if ($prefix) {
1027      $prefixMap{$uri} = $prefix;
1028    } else {
1029      if (defined($defaultPrefix)) {
1030        delete($prefixMap{$defaultPrefix});
1031      }
1032      $defaultPrefix = $uri;
1033    }
1034  };
1035
1036
1037  #
1038  # Remove a prefix from the prefix map.
1039  #
1040  $self->{REMOVEPREFIX} = sub {
1041    my ($uri) = (@_);
1042    if ($defaultPrefix && ($defaultPrefix eq $uri)) {
1043      $defaultPrefix = undef;
1044    }
1045    delete $prefixMap{$uri};
1046  };
1047
1048
1049  #
1050  # Bless and return the object.
1051  #
1052  return bless $self, $class;
1053}
1054
1055
1056#
1057# Add a preferred prefix for a namespace URI.
1058#
1059sub addPrefix {
1060  my $self = shift;
1061  return &{$self->{ADDPREFIX}};
1062}
1063
1064
1065#
1066# Remove a preferred prefix for a namespace URI.
1067#
1068sub removePrefix {
1069  my $self = shift;
1070  return &{$self->{REMOVEPREFIX}};
1071}
1072
1073
1074#
1075# Check names.
1076#
1077sub _checkNSNames {
1078  my $names = $_[0];
1079  my $i = 1;
1080  my $name = $names->[0];
1081
1082                                # Check the element name.
1083  if (ref($name) eq 'ARRAY') {
1084    if (index($name->[1], ':') >= 0) {
1085      croak("Local part of element name '" .
1086            $name->[1] .
1087            "' contains a colon.");
1088    }
1089  } elsif (index($name, ':') >= 0) {
1090    croak("Element name '$name' contains a colon.");
1091  }
1092
1093                                # Check the attribute names.
1094  while ($names->[$i]) {
1095    my $name = $names->[$i];
1096    if (ref($name) eq 'ARRAY') {
1097      my $local = $name->[1];
1098      if (index($local, ':') >= 0) {
1099        croak "Local part of attribute name '$local' contains a colon.";
1100      }
1101    } else {
1102      if ($name =~ /^xmlns/) {
1103        croak "Attribute name '$name' begins with 'xmlns'";
1104      } elsif (index($name, ':') >= 0) {
1105        croak "Attribute name '$name' contains ':'";
1106      }
1107    }
1108    $i += 2;
1109  }
1110}
1111
1112sub forceNSDecl
1113{
1114  my $self = shift;
1115  return &{$self->{FORCENSDECL}};
1116}
1117
1118
1119package XML::Writer::_String;
1120
1121# Internal class, behaving sufficiently like an IO::Handle,
1122#  that stores written output in a string
1123#
1124# Heavily inspired by Simon Oliver's XML::Writer::String
1125
1126sub new
1127{
1128  my $class = shift;
1129  my $scalar_ref = shift;
1130  return bless($scalar_ref, $class);
1131}
1132
1133sub print
1134{
1135  ${(shift)} .= join('', @_);
1136  return 1;
1137}
1138
11391;
1140__END__
1141
1142########################################################################
1143# POD Documentation
1144########################################################################
1145
1146=head1 NAME
1147
1148XML::Writer - Perl extension for writing XML documents.
1149
1150=head1 SYNOPSIS
1151
1152  use XML::Writer;
1153  use IO::File;
1154
1155  my $output = new IO::File(">output.xml");
1156
1157  my $writer = new XML::Writer(OUTPUT => $output);
1158  $writer->startTag("greeting",
1159                    "class" => "simple");
1160  $writer->characters("Hello, world!");
1161  $writer->endTag("greeting");
1162  $writer->end();
1163  $output->close();
1164
1165
1166=head1 DESCRIPTION
1167
1168XML::Writer is a helper module for Perl programs that write an XML
1169document.  The module handles all escaping for attribute values and
1170character data and constructs different types of markup, such as tags,
1171comments, and processing instructions.
1172
1173By default, the module performs several well-formedness checks to
1174catch errors during output.  This behaviour can be extremely useful
1175during development and debugging, but it can be turned off for
1176production-grade code.
1177
1178The module can operate either in regular mode in or Namespace
1179processing mode.  In Namespace mode, the module will generate
1180Namespace Declarations itself, and will perform additional checks on
1181the output.
1182
1183Additional support is available for a simplified data mode with no
1184mixed content: newlines are automatically inserted around elements and
1185elements can optionally be indented based as their nesting level.
1186
1187
1188=head1 METHODS
1189
1190=head2 Writing XML
1191
1192=over 4
1193
1194=item new([$params])
1195
1196Create a new XML::Writer object:
1197
1198  my $writer = new XML::Writer(OUTPUT => $output, NEWLINES => 1);
1199
1200Arguments are an anonymous hash array of parameters:
1201
1202=over 4
1203
1204=item OUTPUT
1205
1206An object blessed into IO::Handle or one of its subclasses (such as
1207IO::File), or a reference to a string; if this parameter is not present,
1208the module will write to standard output. If a string reference is passed,
1209it will capture the generated XML (as a string; to get bytes use the
1210C<Encode> module).
1211
1212=item NAMESPACES
1213
1214A true (1) or false (0, undef) value; if this parameter is present and
1215its value is true, then the module will accept two-member array
1216reference in the place of element and attribute names, as in the
1217following example:
1218
1219  my $rdfns = "http://www.w3.org/1999/02/22-rdf-syntax-ns#";
1220  my $writer = new XML::Writer(NAMESPACES => 1);
1221  $writer->startTag([$rdfns, "Description"]);
1222
1223The first member of the array is a namespace URI, and the second part
1224is the local part of a qualified name.  The module will automatically
1225generate appropriate namespace declarations and will replace the URI
1226part with a prefix.
1227
1228=item PREFIX_MAP
1229
1230A hash reference; if this parameter is present and the module is
1231performing namespace processing (see the NAMESPACES parameter), then
1232the module will use this hash to look up preferred prefixes for
1233namespace URIs:
1234
1235
1236  my $rdfns = "http://www.w3.org/1999/02/22-rdf-syntax-ns#";
1237  my $writer = new XML::Writer(NAMESPACES => 1,
1238                               PREFIX_MAP => {$rdfns => 'rdf'});
1239
1240The keys in the hash table are namespace URIs, and the values are the
1241associated prefixes.  If there is not a preferred prefix for the
1242namespace URI in this hash, then the module will automatically
1243generate prefixes of the form "__NS1", "__NS2", etc.
1244
1245To set the default namespace, use '' for the prefix.
1246
1247=item FORCED_NS_DECLS
1248
1249An array reference; if this parameter is present, the document element
1250will contain declarations for all the given namespace URIs.
1251Declaring namespaces in advance is particularly useful when a large
1252number of elements from a namespace are siblings, but don't share a direct
1253ancestor from the same namespace.
1254
1255=item NEWLINES
1256
1257A true or false value; if this parameter is present and its value is
1258true, then the module will insert an extra newline before the closing
1259delimiter of start, end, and empty tags to guarantee that the document
1260does not end up as a single, long line.  If the paramter is not
1261present, the module will not insert the newlines.
1262
1263=item UNSAFE
1264
1265A true or false value; if this parameter is present and its value is
1266true, then the module will skip most well-formedness error checking.
1267If the parameter is not present, the module will perform the
1268well-formedness error checking by default.  Turn off error checking at
1269your own risk!
1270
1271=item DATA_MODE
1272
1273A true or false value; if this parameter is present and its value is
1274true, then the module will enter a special data mode, inserting
1275newlines automatically around elements and (unless UNSAFE is also
1276specified) reporting an error if any element has both characters and
1277elements as content.
1278
1279=item DATA_INDENT
1280
1281A numeric value; if this parameter is present, it represents the
1282indent step for elements in data mode (it will be ignored when not in
1283data mode).
1284
1285=item ENCODING
1286
1287A character encoding; currently this must be one of 'utf-8' or 'us-ascii'.
1288If present, it will be used for the underlying character encoding and as the
1289default in the XML declaration.
1290
1291=back
1292
1293=item end()
1294
1295Finish creating an XML document.  This method will check that the
1296document has exactly one document element, and that all start tags are
1297closed:
1298
1299  $writer->end();
1300
1301=item xmlDecl([$encoding, $standalone])
1302
1303Add an XML declaration to the beginning of an XML document.  The
1304version will always be "1.0".  If you provide a non-null encoding or
1305standalone argument, its value will appear in the declaration (any
1306non-null value for standalone except 'no' will automatically be
1307converted to 'yes'). If not given here, the encoding will be taken from the
1308ENCODING argument. Pass the empty string to suppress this behaviour.
1309
1310  $writer->xmlDecl("UTF-8");
1311
1312=item doctype($name, [$publicId, $systemId])
1313
1314Add a DOCTYPE declaration to an XML document.  The declaration must
1315appear before the beginning of the root element.  If you provide a
1316publicId, you must provide a systemId as well, but you may provide
1317just a system ID by passing 'undef' for the publicId.
1318
1319  $writer->doctype("html");
1320
1321=item comment($text)
1322
1323Add a comment to an XML document.  If the comment appears outside the
1324document element (either before the first start tag or after the last
1325end tag), the module will add a carriage return after it to improve
1326readability. In data mode, comments will be treated as empty tags:
1327
1328  $writer->comment("This is a comment");
1329
1330=item pi($target [, $data])
1331
1332Add a processing instruction to an XML document:
1333
1334  $writer->pi('xml-stylesheet', 'href="style.css" type="text/css"');
1335
1336If the processing instruction appears outside the document element
1337(either before the first start tag or after the last end tag), the
1338module will add a carriage return after it to improve readability.
1339
1340The $target argument must be a single XML name.  If you provide the
1341$data argument, the module will insert its contents following the
1342$target argument, separated by a single space.
1343
1344=item startTag($name [, $aname1 => $value1, ...])
1345
1346Add a start tag to an XML document.  Any arguments after the element
1347name are assumed to be name/value pairs for attributes: the module
1348will escape all '&', '<', '>', and '"' characters in the attribute
1349values using the predefined XML entities:
1350
1351  $writer->startTag('doc', 'version' => '1.0',
1352                           'status' => 'draft',
1353                           'topic' => 'AT&T');
1354
1355All start tags must eventually have matching end tags.
1356
1357=item emptyTag($name [, $aname1 => $value1, ...])
1358
1359Add an empty tag to an XML document.  Any arguments after the element
1360name are assumed to be name/value pairs for attributes (see startTag()
1361for details):
1362
1363  $writer->emptyTag('img', 'src' => 'portrait.jpg',
1364                           'alt' => 'Portrait of Emma.');
1365
1366=item endTag([$name])
1367
1368Add an end tag to an XML document.  The end tag must match the closest
1369open start tag, and there must be a matching and properly-nested end
1370tag for every start tag:
1371
1372  $writer->endTag('doc');
1373
1374If the $name argument is omitted, then the module will automatically
1375supply the name of the currently open element:
1376
1377  $writer->startTag('p');
1378  $writer->endTag();
1379
1380=item dataElement($name, $data [, $aname1 => $value1, ...])
1381
1382Print an entire element containing only character data.  This is
1383equivalent to
1384
1385  $writer->startTag($name [, $aname1 => $value1, ...]);
1386  $writer->characters($data);
1387  $writer->endTag($name);
1388
1389=item characters($data)
1390
1391Add character data to an XML document.  All '<', '>', and '&'
1392characters in the $data argument will automatically be escaped using
1393the predefined XML entities:
1394
1395  $writer->characters("Here is the formula: ");
1396  $writer->characters("a < 100 && a > 5");
1397
1398You may invoke this method only within the document element
1399(i.e. after the first start tag and before the last end tag).
1400
1401In data mode, you must not use this method to add whitespace between
1402elements.
1403
1404=item raw($data)
1405
1406Print data completely unquoted and unchecked to the XML document.  For
1407example C<raw('<')> will print a literal < character.  This
1408necessarily bypasses all well-formedness checking, and is therefore
1409only available in unsafe mode.
1410
1411This can sometimes be useful for printing entities which are defined
1412for your XML format but the module doesn't know about, for example
1413&nbsp; for XHTML.
1414
1415=item cdata($data)
1416
1417As C<characters()> but writes the data quoted in a CDATA section, that
1418is, between <![CDATA[ and ]]>.  If the data to be written itself
1419contains ]]>, it will be written as several consecutive CDATA
1420sections.
1421
1422=item cdataElement($name, $data [, $aname1 => $value1, ...])
1423
1424As C<dataElement()> but the element content is written as one or more
1425CDATA sections (see C<cdata()>).
1426
1427=item setOutput($output)
1428
1429Set the current output destination, as in the OUTPUT parameter for the
1430constructor.
1431
1432=item getOutput()
1433
1434Return the current output destination, as in the OUTPUT parameter for
1435the constructor.
1436
1437=item setDataMode($mode)
1438
1439Enable or disable data mode, as in the DATA_MODE parameter for the
1440constructor.
1441
1442=item getDataMode()
1443
1444Return the current data mode, as in the DATA_MODE parameter for the
1445constructor.
1446
1447=item setDataIndent($step)
1448
1449Set the indent step for data mode, as in the DATA_INDENT parameter for
1450the constructor.
1451
1452=item getDataIndent()
1453
1454Return the indent step for data mode, as in the DATA_INDENT parameter
1455for the constructor.
1456
1457
1458=back
1459
1460=head2 Querying XML
1461
1462=over 4
1463
1464=item in_element($name)
1465
1466Return a true value if the most recent open element matches $name:
1467
1468  if ($writer->in_element('dl')) {
1469    $writer->startTag('dt');
1470  } else {
1471    $writer->startTag('li');
1472  }
1473
1474=item within_element($name)
1475
1476Return a true value if any open element matches $name:
1477
1478  if ($writer->within_element('body')) {
1479    $writer->startTag('h1');
1480  } else {
1481    $writer->startTag('title');
1482  }
1483
1484=item current_element()
1485
1486Return the name of the currently open element:
1487
1488  my $name = $writer->current_element();
1489
1490This is the equivalent of
1491
1492  my $name = $writer->ancestor(0);
1493
1494=item ancestor($n)
1495
1496Return the name of the nth ancestor, where $n=0 for the current open
1497element.
1498
1499=back
1500
1501
1502=head2 Additional Namespace Support
1503
1504As of 0.510, these methods may be used while writing a document.
1505
1506=over 4
1507
1508=item addPrefix($uri, $prefix)
1509
1510Add a preferred mapping between a Namespace URI and a prefix.  See
1511also the PREFIX_MAP constructor parameter.
1512
1513To set the default namespace, omit the $prefix parameter or set it to
1514''.
1515
1516=item removePrefix($uri)
1517
1518Remove a preferred mapping between a Namespace URI and a prefix.
1519
1520=item forceNSDecl($uri)
1521
1522Indicate that a namespace declaration for this URI should be included
1523with the next element to be started.
1524
1525=back
1526
1527
1528=head1 ERROR REPORTING
1529
1530With the default settings, the XML::Writer module can detect several
1531basic XML well-formedness errors:
1532
1533=over 4
1534
1535=item *
1536
1537Lack of a (top-level) document element, or multiple document elements.
1538
1539=item *
1540
1541Unclosed start tags.
1542
1543=item *
1544
1545Misplaced delimiters in the contents of processing instructions or
1546comments.
1547
1548=item *
1549
1550Misplaced or duplicate XML declaration(s).
1551
1552=item *
1553
1554Misplaced or duplicate DOCTYPE declaration(s).
1555
1556=item *
1557
1558Mismatch between the document type name in the DOCTYPE declaration and
1559the name of the document element.
1560
1561=item *
1562
1563Mismatched start and end tags.
1564
1565=item *
1566
1567Attempts to insert character data outside the document element.
1568
1569=item *
1570
1571Duplicate attributes with the same name.
1572
1573=back
1574
1575During Namespace processing, the module can detect the following
1576additional errors:
1577
1578=over 4
1579
1580=item *
1581
1582Attempts to use PI targets or element or attribute names containing a
1583colon.
1584
1585=item *
1586
1587Attempts to use attributes with names beginning "xmlns".
1588
1589=back
1590
1591To ensure full error detection, a program must also invoke the end
1592method when it has finished writing a document:
1593
1594  $writer->startTag('greeting');
1595  $writer->characters("Hello, world!");
1596  $writer->endTag('greeting');
1597  $writer->end();
1598
1599This error reporting can catch many hidden bugs in Perl programs that
1600create XML documents; however, if necessary, it can be turned off by
1601providing an UNSAFE parameter:
1602
1603  my $writer = new XML::Writer(OUTPUT => $output, UNSAFE => 1);
1604
1605
1606=head1 AUTHOR
1607
1608David Megginson E<lt>david@megginson.comE<gt>
1609
1610
1611=head1 COPYRIGHT
1612
1613Copyright 1999, 2000 David Megginson E<lt>david@megginson.comE<gt>
1614
1615Copyright 2004, 2005 Joseph Walton E<lt>joe@kafsemo.orgE<gt>
1616
1617
1618=head1 SEE ALSO
1619
1620XML::Parser
1621
1622=cut
1623