1 2### 3# XML::NamespaceSupport - a simple generic namespace processor 4# Robin Berjon <robin@knowscape.com> 5### 6 7package XML::NamespaceSupport; 8use strict; 9use constant FATALS => 0; # root object 10use constant NSMAP => 1; 11use constant UNKNOWN_PREF => 2; 12use constant AUTO_PREFIX => 3; 13use constant DEFAULT => 0; # maps 14use constant PREFIX_MAP => 1; 15use constant DECLARATIONS => 2; 16 17use vars qw($VERSION $NS_XMLNS $NS_XML); 18$VERSION = '1.08'; 19$NS_XMLNS = 'http://www.w3.org/2000/xmlns/'; 20$NS_XML = 'http://www.w3.org/XML/1998/namespace'; 21 22 23# add the ns stuff that baud wants based on Java's xml-writer 24 25 26#-------------------------------------------------------------------# 27# constructor 28#-------------------------------------------------------------------# 29sub new { 30 my $class = ref($_[0]) ? ref(shift) : shift; 31 my $options = shift; 32 my $self = [ 33 1, # FATALS 34 [[ # NSMAP 35 undef, # DEFAULT 36 { xml => $NS_XML }, # PREFIX_MAP 37 undef, # DECLARATIONS 38 ]], 39 'aaa', # UNKNOWN_PREF 40 0, # AUTO_PREFIX 41 ]; 42 $self->[NSMAP]->[0]->[PREFIX_MAP]->{xmlns} = $NS_XMLNS if $options->{xmlns}; 43 $self->[FATALS] = $options->{fatal_errors} if defined $options->{fatal_errors}; 44 $self->[AUTO_PREFIX] = $options->{auto_prefix} if defined $options->{auto_prefix}; 45 return bless $self, $class; 46} 47#-------------------------------------------------------------------# 48 49#-------------------------------------------------------------------# 50# reset() - return to the original state (for reuse) 51#-------------------------------------------------------------------# 52sub reset { 53 my $self = shift; 54 $#{$self->[NSMAP]} = 0; 55} 56#-------------------------------------------------------------------# 57 58#-------------------------------------------------------------------# 59# push_context() - add a new empty context to the stack 60#-------------------------------------------------------------------# 61sub push_context { 62 my $self = shift; 63 push @{$self->[NSMAP]}, [ 64 $self->[NSMAP]->[-1]->[DEFAULT], 65 { %{$self->[NSMAP]->[-1]->[PREFIX_MAP]} }, 66 [], 67 ]; 68} 69#-------------------------------------------------------------------# 70 71#-------------------------------------------------------------------# 72# pop_context() - remove the topmost context fromt the stack 73#-------------------------------------------------------------------# 74sub pop_context { 75 my $self = shift; 76 die 'Trying to pop context without push context' unless @{$self->[NSMAP]} > 1; 77 pop @{$self->[NSMAP]}; 78} 79#-------------------------------------------------------------------# 80 81#-------------------------------------------------------------------# 82# declare_prefix() - declare a prefix in the current scope 83#-------------------------------------------------------------------# 84sub declare_prefix { 85 my $self = shift; 86 my $prefix = shift; 87 my $value = shift; 88 89 warn <<' EOWARN' unless defined $prefix or $self->[AUTO_PREFIX]; 90 Prefix was undefined. 91 If you wish to set the default namespace, use the empty string ''. 92 If you wish to autogenerate prefixes, set the auto_prefix option 93 to a true value. 94 EOWARN 95 96 if ($prefix eq 'xml' and $value ne $NS_XML) { 97 die "The xml prefix can only be bound to the $NS_XML namespace." 98 } 99 elsif ($value eq $NS_XML and $prefix ne 'xml') { 100 die "the $NS_XML namespace can only be bound to the xml prefix."; 101 } 102 elsif ($value eq $NS_XML and $prefix eq 'xml') { 103 return 1; 104 } 105 return 0 if index(lc($prefix), 'xml') == 0; 106 107 if (defined $prefix and $prefix eq '') { 108 $self->[NSMAP]->[-1]->[DEFAULT] = $value; 109 } 110 else { 111 die "Cannot undeclare prefix $prefix" if $value eq ''; 112 if (not defined $prefix and $self->[AUTO_PREFIX]) { 113 while (1) { 114 $prefix = $self->[UNKNOWN_PREF]++; 115 last if not exists $self->[NSMAP]->[-1]->[PREFIX_MAP]->{$prefix}; 116 } 117 } 118 elsif (not defined $prefix and not $self->[AUTO_PREFIX]) { 119 return 0; 120 } 121 $self->[NSMAP]->[-1]->[PREFIX_MAP]->{$prefix} = $value; 122 } 123 push @{$self->[NSMAP]->[-1]->[DECLARATIONS]}, $prefix; 124 return 1; 125} 126#-------------------------------------------------------------------# 127 128#-------------------------------------------------------------------# 129# declare_prefixes() - declare several prefixes in the current scope 130#-------------------------------------------------------------------# 131sub declare_prefixes { 132 my $self = shift; 133 my %prefixes = @_; 134 while (my ($k,$v) = each %prefixes) { 135 $self->declare_prefix($k,$v); 136 } 137} 138#-------------------------------------------------------------------# 139 140#-------------------------------------------------------------------# 141# undeclare_prefix 142#-------------------------------------------------------------------# 143sub undeclare_prefix { 144 my $self = shift; 145 my $prefix = shift; 146 return unless not defined $prefix or $prefix eq ''; 147 return unless exists $self->[NSMAP]->[-1]->[PREFIX_MAP]->{$prefix}; 148 149 my ( $tfix ) = grep { $_ eq $prefix } @{$self->[NSMAP]->[-1]->[DECLARATIONS]}; 150 if ( not defined $tfix ) { 151 die "prefix $prefix not declared in this context\n"; 152 } 153 154 @{$self->[NSMAP]->[-1]->[DECLARATIONS]} = grep { $_ ne $prefix } @{$self->[NSMAP]->[-1]->[DECLARATIONS]}; 155 delete $self->[NSMAP]->[-1]->[PREFIX_MAP]->{$prefix}; 156} 157#-------------------------------------------------------------------# 158 159#-------------------------------------------------------------------# 160# get_prefix() - get a (random) prefix for a given URI 161#-------------------------------------------------------------------# 162sub get_prefix { 163 my $self = shift; 164 my $uri = shift; 165 166 # we have to iterate over the whole hash here because if we don't 167 # the iterator isn't reset and the next pass will fail 168 my $pref; 169 while (my ($k, $v) = each %{$self->[NSMAP]->[-1]->[PREFIX_MAP]}) { 170 $pref = $k if $v eq $uri; 171 } 172 return $pref; 173} 174#-------------------------------------------------------------------# 175 176#-------------------------------------------------------------------# 177# get_prefixes() - get all the prefixes for a given URI 178#-------------------------------------------------------------------# 179sub get_prefixes { 180 my $self = shift; 181 my $uri = shift; 182 183 return keys %{$self->[NSMAP]->[-1]->[PREFIX_MAP]} unless defined $uri; 184 return grep { $self->[NSMAP]->[-1]->[PREFIX_MAP]->{$_} eq $uri } keys %{$self->[NSMAP]->[-1]->[PREFIX_MAP]}; 185} 186#-------------------------------------------------------------------# 187 188#-------------------------------------------------------------------# 189# get_declared_prefixes() - get all prefixes declared in the last context 190#-------------------------------------------------------------------# 191sub get_declared_prefixes { 192 return @{$_[0]->[NSMAP]->[-1]->[DECLARATIONS]}; 193} 194#-------------------------------------------------------------------# 195 196#-------------------------------------------------------------------# 197# get_uri() - get an URI given a prefix 198#-------------------------------------------------------------------# 199sub get_uri { 200 my $self = shift; 201 my $prefix = shift; 202 203 warn "Prefix must not be undef in get_uri(). The emtpy prefix must be ''" unless defined $prefix; 204 205 return $self->[NSMAP]->[-1]->[DEFAULT] if $prefix eq ''; 206 return $self->[NSMAP]->[-1]->[PREFIX_MAP]->{$prefix} if exists $self->[NSMAP]->[-1]->[PREFIX_MAP]->{$prefix}; 207 return undef; 208} 209#-------------------------------------------------------------------# 210 211#-------------------------------------------------------------------# 212# process_name() - provide details on a name 213#-------------------------------------------------------------------# 214sub process_name { 215 my $self = shift; 216 my $qname = shift; 217 my $aflag = shift; 218 219 if ($self->[FATALS]) { 220 return( ($self->_get_ns_details($qname, $aflag))[0,2], $qname ); 221 } 222 else { 223 eval { return( ($self->_get_ns_details($qname, $aflag))[0,2], $qname ); } 224 } 225} 226#-------------------------------------------------------------------# 227 228#-------------------------------------------------------------------# 229# process_element_name() - provide details on a element's name 230#-------------------------------------------------------------------# 231sub process_element_name { 232 my $self = shift; 233 my $qname = shift; 234 235 if ($self->[FATALS]) { 236 return $self->_get_ns_details($qname, 0); 237 } 238 else { 239 eval { return $self->_get_ns_details($qname, 0); } 240 } 241} 242#-------------------------------------------------------------------# 243 244 245#-------------------------------------------------------------------# 246# process_attribute_name() - provide details on a attribute's name 247#-------------------------------------------------------------------# 248sub process_attribute_name { 249 my $self = shift; 250 my $qname = shift; 251 252 if ($self->[FATALS]) { 253 return $self->_get_ns_details($qname, 1); 254 } 255 else { 256 eval { return $self->_get_ns_details($qname, 1); } 257 } 258} 259#-------------------------------------------------------------------# 260 261 262#-------------------------------------------------------------------# 263# ($ns, $prefix, $lname) = $self->_get_ns_details($qname, $f_attr) 264# returns ns, prefix, and lname for a given attribute name 265# >> the $f_attr flag, if set to one, will work for an attribute 266#-------------------------------------------------------------------# 267sub _get_ns_details { 268 my $self = shift; 269 my $qname = shift; 270 my $aflag = shift; 271 272 my ($ns, $prefix, $lname); 273 (my ($tmp_prefix, $tmp_lname) = split /:/, $qname, 3) 274 < 3 or die "Invalid QName: $qname"; 275 276 # no prefix 277 my $cur_map = $self->[NSMAP]->[-1]; 278 if (not defined($tmp_lname)) { 279 $prefix = undef; 280 $lname = $qname; 281 # attr don't have a default namespace 282 $ns = ($aflag) ? undef : $cur_map->[DEFAULT]; 283 } 284 285 # prefix 286 else { 287 if (exists $cur_map->[PREFIX_MAP]->{$tmp_prefix}) { 288 $prefix = $tmp_prefix; 289 $lname = $tmp_lname; 290 $ns = $cur_map->[PREFIX_MAP]->{$prefix} 291 } 292 else { # no ns -> lname == name, all rest undef 293 die "Undeclared prefix: $tmp_prefix"; 294 } 295 } 296 297 return ($ns, $prefix, $lname); 298} 299#-------------------------------------------------------------------# 300 301#-------------------------------------------------------------------# 302# parse_jclark_notation() - parse the Clarkian notation 303#-------------------------------------------------------------------# 304sub parse_jclark_notation { 305 shift; 306 my $jc = shift; 307 $jc =~ m/^\{(.*)\}([^}]+)$/; 308 return $1, $2; 309} 310#-------------------------------------------------------------------# 311 312 313#-------------------------------------------------------------------# 314# Java names mapping 315#-------------------------------------------------------------------# 316*XML::NamespaceSupport::pushContext = \&push_context; 317*XML::NamespaceSupport::popContext = \&pop_context; 318*XML::NamespaceSupport::declarePrefix = \&declare_prefix; 319*XML::NamespaceSupport::declarePrefixes = \&declare_prefixes; 320*XML::NamespaceSupport::getPrefix = \&get_prefix; 321*XML::NamespaceSupport::getPrefixes = \&get_prefixes; 322*XML::NamespaceSupport::getDeclaredPrefixes = \&get_declared_prefixes; 323*XML::NamespaceSupport::getURI = \&get_uri; 324*XML::NamespaceSupport::processName = \&process_name; 325*XML::NamespaceSupport::processElementName = \&process_element_name; 326*XML::NamespaceSupport::processAttributeName = \&process_attribute_name; 327*XML::NamespaceSupport::parseJClarkNotation = \&parse_jclark_notation; 328*XML::NamespaceSupport::undeclarePrefix = \&undeclare_prefix; 329#-------------------------------------------------------------------# 330 331 3321; 333#,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,# 334#`,`, Documentation `,`,`,`,`,`,`,`,`,`,`,`,`,`,`,`,`,`,`,`,`,`,`,`,# 335#```````````````````````````````````````````````````````````````````# 336 337=pod 338 339=head1 NAME 340 341XML::NamespaceSupport - a simple generic namespace support class 342 343=head1 SYNOPSIS 344 345 use XML::NamespaceSupport; 346 my $nsup = XML::NamespaceSupport->new; 347 348 # add a new empty context 349 $nsup->push_context; 350 # declare a few prefixes 351 $nsup->declare_prefix($prefix1, $uri1); 352 $nsup->declare_prefix($prefix2, $uri2); 353 # the same shorter 354 $nsup->declare_prefixes($prefix1 => $uri1, $prefix2 => $uri2); 355 356 # get a single prefix for a URI (randomly) 357 $prefix = $nsup->get_prefix($uri); 358 # get all prefixes for a URI (probably better) 359 @prefixes = $nsup->get_prefixes($uri); 360 # get all prefixes in scope 361 @prefixes = $nsup->get_prefixes(); 362 # get all prefixes that were declared for the current scope 363 @prefixes = $nsup->get_declared_prefixes; 364 # get a URI for a given prefix 365 $uri = $nsup->get_uri($prefix); 366 367 # get info on a qname (java-ish way, it's a bit weird) 368 ($ns_uri, $local_name, $qname) = $nsup->process_name($qname, $is_attr); 369 # the same, more perlish 370 ($ns_uri, $prefix, $local_name) = $nsup->process_element_name($qname); 371 ($ns_uri, $prefix, $local_name) = $nsup->process_attribute_name($qname); 372 373 # remove the current context 374 $nsup->pop_context; 375 376 # reset the object for reuse in another document 377 $nsup->reset; 378 379 # a simple helper to process Clarkian Notation 380 my ($ns, $lname) = $nsup->parse_jclark_notation('{http://foo}bar'); 381 # or (given that it doesn't care about the object 382 my ($ns, $lname) = XML::NamespaceSupport->parse_jclark_notation('{http://foo}bar'); 383 384 385=head1 DESCRIPTION 386 387This module offers a simple to process namespaced XML names (unames) 388from within any application that may need them. It also helps maintain 389a prefix to namespace URI map, and provides a number of basic checks. 390 391The model for this module is SAX2's NamespaceSupport class, readable at 392http://www.megginson.com/SAX/Java/javadoc/org/xml/sax/helpers/NamespaceSupport.html. 393It adds a few perlisations where we thought it appropriate. 394 395=head1 METHODS 396 397=over 4 398 399=item * XML::NamespaceSupport->new(\%options) 400 401A simple constructor. 402 403The options are C<xmlns>, C<fatal_errors>, and C<auto_prefix> 404 405If C<xmlns> is turned on (it is off by default) the mapping from the 406xmlns prefix to the URI defined for it in DOM level 2 is added to the 407list of predefined mappings (which normally only contains the xml 408prefix mapping). 409 410If C<fatal_errors> is turned off (it is on by default) a number of 411validity errors will simply be flagged as failures, instead of 412die()ing. 413 414If C<auto_prefix> is turned on (it is off by default) when one 415provides a prefix of C<undef> to C<declare_prefix> it will generate a 416random prefix mapped to that namespace. Otherwise an undef prefix will 417trigger a warning (you should probably know what you're doing if you 418turn this option on). 419 420=item * $nsup->push_context 421 422Adds a new empty context to the stack. You can then populate it with 423new prefixes defined at this level. 424 425=item * $nsup->pop_context 426 427Removes the topmost context in the stack and reverts to the previous 428one. It will die() if you try to pop more than you have pushed. 429 430=item * $nsup->declare_prefix($prefix, $uri) 431 432Declares a mapping of $prefix to $uri, at the current level. 433 434Note that with C<auto_prefix> turned on, if you declare a prefix 435mapping in which $prefix is undef(), you will get an automatic prefix 436selected for you. If it is off you will get a warning. 437 438This is useful when you deal with code that hasn't kept prefixes around 439and need to reserialize the nodes. It also means that if you want to 440set the default namespace (ie with an empty prefix) you must use the 441empty string instead of undef. This behaviour is consistent with the 442SAX 2.0 specification. 443 444=item * $nsup->declare_prefixes(%prefixes2uris) 445 446Declares a mapping of several prefixes to URIs, at the current level. 447 448=item * $nsup->get_prefix($uri) 449 450Returns a prefix given an URI. Note that as several prefixes may be 451mapped to the same URI, it returns an arbitrary one. It'll return 452undef on failure. 453 454=item * $nsup->get_prefixes($uri) 455 456Returns an array of prefixes given an URI. It'll return all the 457prefixes if the uri is undef. 458 459=item * $nsup->get_declared_prefixes 460 461Returns an array of all the prefixes that have been declared within 462this context, ie those that were declared on the last element, not 463those that were declared above and are simply in scope. 464 465=item * $nsup->get_uri($prefix) 466 467Returns a URI for a given prefix. Returns undef on failure. 468 469=item * $nsup->process_name($qname, $is_attr) 470 471Given a qualified name and a boolean indicating whether this is an 472attribute or another type of name (those are differently affected by 473default namespaces), it returns a namespace URI, local name, qualified 474name tuple. I know that that is a rather abnormal list to return, but 475it is so for compatibility with the Java spec. See below for more 476Perlish alternatives. 477 478If the prefix is not declared, or if the name is not valid, it'll 479either die or return undef depending on the current setting of 480C<fatal_errors>. 481 482=item * $nsup->undeclare_prefix($prefix); 483 484Removes a namespace prefix from the current context. This function may 485be used in SAX's end_prefix_mapping when there is fear that a namespace 486declaration might be available outside their scope (which shouldn't 487normally happen, but you never know ;). This may be needed in order to 488properly support Namespace 1.1. 489 490=item * $nsup->process_element_name($qname) 491 492Given a qualified name, it returns a namespace URI, prefix, and local 493name tuple. This method applies to element names. 494 495If the prefix is not declared, or if the name is not valid, it'll 496either die or return undef depending on the current setting of 497C<fatal_errors>. 498 499=item * $nsup->process_attribute_name($qname) 500 501Given a qualified name, it returns a namespace URI, prefix, and local 502name tuple. This method applies to attribute names. 503 504If the prefix is not declared, or if the name is not valid, it'll 505either die or return undef depending on the current setting of 506C<fatal_errors>. 507 508=item * $nsup->reset 509 510Resets the object so that it can be reused on another document. 511 512=back 513 514All methods of the interface have an alias that is the name used in 515the original Java specification. You can use either name 516interchangeably. Here is the mapping: 517 518 Java name Perl name 519 --------------------------------------------------- 520 pushContext push_context 521 popContext pop_context 522 declarePrefix declare_prefix 523 declarePrefixes declare_prefixes 524 getPrefix get_prefix 525 getPrefixes get_prefixes 526 getDeclaredPrefixes get_declared_prefixes 527 getURI get_uri 528 processName process_name 529 processElementName process_element_name 530 processAttributeName process_attribute_name 531 parseJClarkNotation parse_jclark_notation 532 undeclarePrefix undeclare_prefix 533 534=head1 VARIABLES 535 536Two global variables are made available to you. They used to be constants but 537simple scalars are easier to use in a number of contexts. They are not 538exported but can easily be accessed from any package, or copied into it. 539 540=over 4 541 542=item * C<$NS_XMLNS> 543 544The namespace for xmlns prefixes, http://www.w3.org/2000/xmlns/. 545 546=item * C<$NS_XML> 547 548The namespace for xml prefixes, http://www.w3.org/XML/1998/namespace. 549 550=back 551 552=head1 TODO 553 554 - add more tests 555 - optimise here and there 556 557=head1 AUTHOR 558 559Robin Berjon, robin@knowscape.com, with lots of it having been done 560by Duncan Cameron, and a number of suggestions from the perl-xml 561list. 562 563=head1 COPYRIGHT 564 565Copyright (c) 2001-2002 Robin Berjon. All rights reserved. This program is 566free software; you can redistribute it and/or modify it under the same terms 567as Perl itself. 568 569=head1 SEE ALSO 570 571XML::Parser::PerlSAX 572 573=cut 574 575