1# $Id: Parser.pm,v 1.1.1.2 2007/10/10 23:04:14 ahuda Exp $ 2 3package XML::LibXML::SAX::Parser; 4 5use strict; 6use vars qw($VERSION @ISA); 7 8use XML::LibXML; 9use XML::LibXML::Common qw(:libxml); 10use XML::SAX::Base; 11use XML::SAX::DocumentLocator; 12 13$VERSION = "1.65"; # VERSION TEMPLATE: DO NOT CHANGE 14@ISA = ('XML::SAX::Base'); 15 16sub _parse_characterstream { 17 my ($self, $fh, $options) = @_; 18 die "parsing a characterstream is not supported at this time"; 19} 20 21sub _parse_bytestream { 22 my ($self, $fh, $options) = @_; 23 my $parser = XML::LibXML->new(); 24 my $doc = exists($options->{Source}{SystemId}) ? $parser->parse_fh($fh, $options->{Source}{SystemId}) : $parser->parse_fh($fh); 25 $self->generate($doc); 26} 27 28sub _parse_string { 29 my ($self, $str, $options) = @_; 30 my $parser = XML::LibXML->new(); 31 my $doc = exists($options->{Source}{SystemId}) ? $parser->parse_string($str, $options->{Source}{SystemId}) : $parser->parse_string($str); 32 $self->generate($doc); 33} 34 35sub _parse_systemid { 36 my ($self, $sysid, $options) = @_; 37 my $parser = XML::LibXML->new(); 38 my $doc = $parser->parse_file($sysid); 39 $self->generate($doc); 40} 41 42sub generate { 43 my $self = shift; 44 my ($node) = @_; 45 46 my $doc = $node->ownerDocument(); 47 { 48 # precompute some DocumentLocator values 49 my %locator = ( 50 PublicId => undef, 51 SystemId => undef, 52 Encoding => undef, 53 XMLVersion => undef, 54 ); 55 my $dtd = defined $doc ? $doc->externalSubset() : undef; 56 if (defined $dtd) { 57 $locator{PublicId} = $dtd->publicId(); 58 $locator{SystemId} = $dtd->systemId(); 59 } 60 if (defined $doc) { 61 $locator{Encoding} = $doc->encoding(); 62 $locator{XMLVersion} = $doc->version(); 63 } 64 $self->set_document_locator( 65 XML::SAX::DocumentLocator->new( 66 sub { $locator{PublicId} }, 67 sub { $locator{SystemId} }, 68 sub { defined($self->{current_node}) ? $self->{current_node}->line_number() : undef }, 69 sub { 1 }, 70 sub { $locator{Encoding} }, 71 sub { $locator{XMLVersion} }, 72 ), 73 ); 74 } 75 76 if ( $node->nodeType() == XML_DOCUMENT_NODE 77 || $node->nodeType == XML_HTML_DOCUMENT_NODE ) { 78 $self->start_document({}); 79 $self->xml_decl({Version => $node->getVersion, Encoding => $node->getEncoding}); 80 $self->process_node($node); 81 $self->end_document({}); 82 } 83} 84 85sub process_node { 86 my ($self, $node) = @_; 87 88 local $self->{current_node} = $node; 89 90 my $node_type = $node->nodeType(); 91 if ($node_type == XML_COMMENT_NODE) { 92 $self->comment( { Data => $node->getData } ); 93 } 94 elsif ($node_type == XML_TEXT_NODE 95 || $node_type == XML_CDATA_SECTION_NODE) { 96 # warn($node->getData . "\n"); 97 $self->characters( { Data => $node->nodeValue } ); 98 } 99 elsif ($node_type == XML_ELEMENT_NODE) { 100 # warn("<" . $node->getName . ">\n"); 101 $self->process_element($node); 102 # warn("</" . $node->getName . ">\n"); 103 } 104 elsif ($node_type == XML_ENTITY_REF_NODE) { 105 foreach my $kid ($node->childNodes) { 106 # warn("child of entity ref: " . $kid->getType() . " called: " . $kid->getName . "\n"); 107 $self->process_node($kid); 108 } 109 } 110 elsif ($node_type == XML_DOCUMENT_NODE 111 || $node_type == XML_HTML_DOCUMENT_NODE 112 || $node_type == XML_DOCUMENT_FRAG_NODE) { 113 # some times it is just usefull to generate SAX events from 114 # a document fragment (very good with filters). 115 foreach my $kid ($node->childNodes) { 116 $self->process_node($kid); 117 } 118 } 119 elsif ($node_type == XML_PI_NODE) { 120 $self->processing_instruction( { Target => $node->getName, Data => $node->getData } ); 121 } 122 elsif ($node_type == XML_COMMENT_NODE) { 123 $self->comment( { Data => $node->getData } ); 124 } 125 elsif ( $node_type == XML_XINCLUDE_START 126 || $node_type == XML_XINCLUDE_END ) { 127 # ignore! 128 # i may want to handle this one day, dunno yet 129 } 130 elsif ($node_type == XML_DTD_NODE ) { 131 # ignore! 132 # i will support DTDs, but had no time yet. 133 } 134 else { 135 # warn("unsupported node type: $node_type"); 136 } 137 138} 139 140sub process_element { 141 my ($self, $element) = @_; 142 143 my $attribs = {}; 144 my @ns_maps = $element->getNamespaces; 145 146 foreach my $ns (@ns_maps) { 147 $self->start_prefix_mapping( 148 { 149 NamespaceURI => $ns->href, 150 Prefix => ( defined $ns->localname ? $ns->localname : ''), 151 } 152 ); 153 } 154 155 foreach my $attr ($element->attributes) { 156 my $key; 157 # warn("Attr: $attr -> ", $attr->getName, " = ", $attr->getData, "\n"); 158 # this isa dump thing... 159 if ($attr->isa('XML::LibXML::Namespace')) { 160 # TODO This needs fixing modulo agreeing on what 161 # is the right thing to do here. 162 unless ( defined $attr->name ) { 163 ## It's an atter like "xmlns='foo'" 164 $attribs->{"{}xmlns"} = 165 { 166 Name => "xmlns", 167 LocalName => "xmlns", 168 Prefix => "", 169 Value => $attr->href, 170 NamespaceURI => "", 171 }; 172 } 173 else { 174 my $prefix = "xmlns"; 175 my $localname = $attr->localname; 176 my $key = "{http://www.w3.org/2000/xmlns/}"; 177 my $name = "xmlns"; 178 179 if ( defined $localname ) { 180 $key .= $localname; 181 $name.= ":".$localname; 182 } 183 184 $attribs->{$key} = 185 { 186 Name => $name, 187 Value => $attr->href, 188 NamespaceURI => "http://www.w3.org/2000/xmlns/", 189 Prefix => $prefix, 190 LocalName => $localname, 191 }; 192 } 193 } 194 else { 195 my $ns = $attr->namespaceURI; 196 197 $ns = '' unless defined $ns; 198 $key = "{$ns}".$attr->localname; 199 ## Not sure why, but $attr->name is coming through stripped 200 ## of its prefix, so we need to hand-assemble a real name. 201 my $name = $attr->name; 202 $name = "" unless defined $name; 203 204 my $prefix = $attr->prefix; 205 $prefix = "" unless defined $prefix; 206 $name = "$prefix:$name" 207 if index( $name, ":" ) < 0 && length $prefix; 208 209 $attribs->{$key} = 210 { 211 Name => $name, 212 Value => $attr->value, 213 NamespaceURI => $ns, 214 Prefix => $prefix, 215 LocalName => $attr->localname, 216 }; 217 } 218 # use Data::Dumper; 219 # warn("Attr made: ", Dumper($attribs->{$key}), "\n"); 220 } 221 222 my $node = { 223 Name => $element->nodeName, 224 Attributes => $attribs, 225 NamespaceURI => $element->namespaceURI, 226 Prefix => $element->prefix || "", 227 LocalName => $element->localname, 228 }; 229 230 $self->start_element($node); 231 232 foreach my $child ($element->childNodes) { 233 $self->process_node($child); 234 } 235 236 my $end_node = { %$node }; 237 238 delete $end_node->{Attributes}; 239 240 $self->end_element($end_node); 241 242 foreach my $ns (@ns_maps) { 243 $self->end_prefix_mapping( 244 { 245 NamespaceURI => $ns->href, 246 Prefix => ( defined $ns->localname ? $ns->localname : ''), 247 } 248 ); 249 } 250} 251 2521; 253 254__END__ 255