1# $Id: Parser.pm,v 1.1.1.2 2007/10/10 23:04:14 ahuda Exp $
2
3package XML::LibXML::SAX::Parser;
4
5use strict;
6use vars qw($VERSION @ISA);
7
8use XML::LibXML;
9use XML::LibXML::Common qw(:libxml);
10use XML::SAX::Base;
11use XML::SAX::DocumentLocator;
12
13$VERSION = "1.65"; # VERSION TEMPLATE: DO NOT CHANGE
14@ISA = ('XML::SAX::Base');
15
16sub _parse_characterstream {
17    my ($self, $fh, $options) = @_;
18    die "parsing a characterstream is not supported at this time";
19}
20
21sub _parse_bytestream {
22    my ($self, $fh, $options) = @_;
23    my $parser = XML::LibXML->new();
24    my $doc = exists($options->{Source}{SystemId}) ? $parser->parse_fh($fh, $options->{Source}{SystemId}) : $parser->parse_fh($fh);
25    $self->generate($doc);
26}
27
28sub _parse_string {
29    my ($self, $str, $options) = @_;
30    my $parser = XML::LibXML->new();
31    my $doc = exists($options->{Source}{SystemId}) ? $parser->parse_string($str, $options->{Source}{SystemId}) : $parser->parse_string($str);
32    $self->generate($doc);
33}
34
35sub _parse_systemid {
36    my ($self, $sysid, $options) = @_;
37    my $parser = XML::LibXML->new();
38    my $doc = $parser->parse_file($sysid);
39    $self->generate($doc);
40}
41
42sub generate {
43    my $self = shift;
44    my ($node) = @_;
45
46    my $doc = $node->ownerDocument();
47    {
48      # precompute some DocumentLocator values
49      my %locator = (
50	PublicId => undef,
51	SystemId => undef,
52	Encoding => undef,
53	XMLVersion => undef,
54       );
55      my $dtd = defined $doc ? $doc->externalSubset() : undef;
56      if (defined $dtd) {
57	$locator{PublicId} = $dtd->publicId();
58	$locator{SystemId} = $dtd->systemId();
59      }
60      if (defined $doc) {
61	$locator{Encoding} = $doc->encoding();
62	$locator{XMLVersion} = $doc->version();
63      }
64      $self->set_document_locator(
65	XML::SAX::DocumentLocator->new(
66	  sub { $locator{PublicId} },
67	  sub { $locator{SystemId} },
68	  sub { defined($self->{current_node}) ? $self->{current_node}->line_number() : undef },
69	  sub { 1 },
70	  sub { $locator{Encoding} },
71	  sub { $locator{XMLVersion} },
72	 ),
73       );
74    }
75
76    if ( $node->nodeType() == XML_DOCUMENT_NODE
77         || $node->nodeType == XML_HTML_DOCUMENT_NODE ) {
78        $self->start_document({});
79        $self->xml_decl({Version => $node->getVersion, Encoding => $node->getEncoding});
80        $self->process_node($node);
81        $self->end_document({});
82    }
83}
84
85sub process_node {
86    my ($self, $node) = @_;
87
88    local $self->{current_node} = $node;
89
90    my $node_type = $node->nodeType();
91    if ($node_type == XML_COMMENT_NODE) {
92        $self->comment( { Data => $node->getData } );
93    }
94    elsif ($node_type == XML_TEXT_NODE
95           || $node_type == XML_CDATA_SECTION_NODE) {
96        # warn($node->getData . "\n");
97        $self->characters( { Data => $node->nodeValue } );
98    }
99    elsif ($node_type == XML_ELEMENT_NODE) {
100        # warn("<" . $node->getName . ">\n");
101        $self->process_element($node);
102        # warn("</" . $node->getName . ">\n");
103    }
104    elsif ($node_type == XML_ENTITY_REF_NODE) {
105        foreach my $kid ($node->childNodes) {
106            # warn("child of entity ref: " . $kid->getType() . " called: " . $kid->getName . "\n");
107            $self->process_node($kid);
108        }
109    }
110    elsif ($node_type == XML_DOCUMENT_NODE
111           || $node_type == XML_HTML_DOCUMENT_NODE
112           || $node_type == XML_DOCUMENT_FRAG_NODE) {
113        # some times it is just usefull to generate SAX events from
114        # a document fragment (very good with filters).
115        foreach my $kid ($node->childNodes) {
116            $self->process_node($kid);
117        }
118    }
119    elsif ($node_type == XML_PI_NODE) {
120        $self->processing_instruction( { Target =>  $node->getName, Data => $node->getData } );
121    }
122    elsif ($node_type == XML_COMMENT_NODE) {
123        $self->comment( { Data => $node->getData } );
124    }
125    elsif ( $node_type == XML_XINCLUDE_START
126            || $node_type == XML_XINCLUDE_END ) {
127        # ignore!
128        # i may want to handle this one day, dunno yet
129    }
130    elsif ($node_type == XML_DTD_NODE ) {
131        # ignore!
132        # i will support DTDs, but had no time yet.
133    }
134    else {
135        # warn("unsupported node type: $node_type");
136    }
137
138}
139
140sub process_element {
141    my ($self, $element) = @_;
142
143    my $attribs = {};
144    my @ns_maps = $element->getNamespaces;
145
146    foreach my $ns (@ns_maps) {
147        $self->start_prefix_mapping(
148            {
149                NamespaceURI => $ns->href,
150                Prefix       => ( defined $ns->localname  ? $ns->localname : ''),
151            }
152        );
153    }
154
155    foreach my $attr ($element->attributes) {
156        my $key;
157        # warn("Attr: $attr -> ", $attr->getName, " = ", $attr->getData, "\n");
158        # this isa dump thing...
159        if ($attr->isa('XML::LibXML::Namespace')) {
160            # TODO This needs fixing modulo agreeing on what
161            # is the right thing to do here.
162            unless ( defined $attr->name ) {
163                ## It's an atter like "xmlns='foo'"
164                $attribs->{"{}xmlns"} =
165                  {
166                   Name         => "xmlns",
167                   LocalName    => "xmlns",
168                   Prefix       => "",
169                   Value        => $attr->href,
170                   NamespaceURI => "",
171                  };
172            }
173            else {
174                my $prefix = "xmlns";
175                my $localname = $attr->localname;
176                my $key = "{http://www.w3.org/2000/xmlns/}";
177                my $name = "xmlns";
178
179                if ( defined $localname ) {
180                    $key .= $localname;
181                    $name.= ":".$localname;
182                }
183
184                $attribs->{$key} =
185                  {
186                   Name         => $name,
187                   Value        => $attr->href,
188                   NamespaceURI => "http://www.w3.org/2000/xmlns/",
189                   Prefix       => $prefix,
190                   LocalName    => $localname,
191                  };
192            }
193        }
194        else {
195            my $ns = $attr->namespaceURI;
196
197            $ns = '' unless defined $ns;
198            $key = "{$ns}".$attr->localname;
199            ## Not sure why, but $attr->name is coming through stripped
200            ## of its prefix, so we need to hand-assemble a real name.
201            my $name = $attr->name;
202            $name = "" unless defined $name;
203
204            my $prefix = $attr->prefix;
205            $prefix = "" unless defined $prefix;
206            $name = "$prefix:$name"
207              if index( $name, ":" ) < 0 && length $prefix;
208
209            $attribs->{$key} =
210                {
211                    Name => $name,
212                    Value => $attr->value,
213                    NamespaceURI => $ns,
214                    Prefix => $prefix,
215                    LocalName => $attr->localname,
216                };
217        }
218        # use Data::Dumper;
219        # warn("Attr made: ", Dumper($attribs->{$key}), "\n");
220    }
221
222    my $node = {
223        Name => $element->nodeName,
224        Attributes => $attribs,
225        NamespaceURI => $element->namespaceURI,
226        Prefix => $element->prefix || "",
227        LocalName => $element->localname,
228    };
229
230    $self->start_element($node);
231
232    foreach my $child ($element->childNodes) {
233        $self->process_node($child);
234    }
235
236    my $end_node = { %$node };
237
238    delete $end_node->{Attributes};
239
240    $self->end_element($end_node);
241
242    foreach my $ns (@ns_maps) {
243        $self->end_prefix_mapping(
244            {
245                NamespaceURI => $ns->href,
246                Prefix       => ( defined $ns->localname  ? $ns->localname : ''),
247            }
248        );
249    }
250}
251
2521;
253
254__END__
255