1
2###
3# XML::NamespaceSupport - a simple generic namespace processor
4# Robin Berjon <robin@knowscape.com>
5###
6
7package XML::NamespaceSupport;
8use strict;
9use constant FATALS         => 0; # root object
10use constant NSMAP          => 1;
11use constant UNKNOWN_PREF   => 2;
12use constant AUTO_PREFIX    => 3;
13use constant DEFAULT        => 0; # maps
14use constant PREFIX_MAP     => 1;
15use constant DECLARATIONS   => 2;
16
17use vars qw($VERSION $NS_XMLNS $NS_XML);
18$VERSION    = '1.08';
19$NS_XMLNS   = 'http://www.w3.org/2000/xmlns/';
20$NS_XML     = 'http://www.w3.org/XML/1998/namespace';
21
22
23# add the ns stuff that baud wants based on Java's xml-writer
24
25
26#-------------------------------------------------------------------#
27# constructor
28#-------------------------------------------------------------------#
29sub new {
30    my $class   = ref($_[0]) ? ref(shift) : shift;
31    my $options = shift;
32    my $self = [
33                1, # FATALS
34                [[ # NSMAP
35                  undef,              # DEFAULT
36                  { xml => $NS_XML }, # PREFIX_MAP
37                  undef,              # DECLARATIONS
38                ]],
39                'aaa', # UNKNOWN_PREF
40                0,     # AUTO_PREFIX
41               ];
42    $self->[NSMAP]->[0]->[PREFIX_MAP]->{xmlns} = $NS_XMLNS if $options->{xmlns};
43    $self->[FATALS] = $options->{fatal_errors} if defined $options->{fatal_errors};
44    $self->[AUTO_PREFIX] = $options->{auto_prefix} if defined $options->{auto_prefix};
45    return bless $self, $class;
46}
47#-------------------------------------------------------------------#
48
49#-------------------------------------------------------------------#
50# reset() - return to the original state (for reuse)
51#-------------------------------------------------------------------#
52sub reset {
53    my $self = shift;
54    $#{$self->[NSMAP]} = 0;
55}
56#-------------------------------------------------------------------#
57
58#-------------------------------------------------------------------#
59# push_context() - add a new empty context to the stack
60#-------------------------------------------------------------------#
61sub push_context {
62    my $self = shift;
63    push @{$self->[NSMAP]}, [
64                             $self->[NSMAP]->[-1]->[DEFAULT],
65                             { %{$self->[NSMAP]->[-1]->[PREFIX_MAP]} },
66                             [],
67                            ];
68}
69#-------------------------------------------------------------------#
70
71#-------------------------------------------------------------------#
72# pop_context() - remove the topmost context fromt the stack
73#-------------------------------------------------------------------#
74sub pop_context {
75    my $self = shift;
76    die 'Trying to pop context without push context' unless @{$self->[NSMAP]} > 1;
77    pop @{$self->[NSMAP]};
78}
79#-------------------------------------------------------------------#
80
81#-------------------------------------------------------------------#
82# declare_prefix() - declare a prefix in the current scope
83#-------------------------------------------------------------------#
84sub declare_prefix {
85    my $self    = shift;
86    my $prefix  = shift;
87    my $value   = shift;
88
89    warn <<'    EOWARN' unless defined $prefix or $self->[AUTO_PREFIX];
90    Prefix was undefined.
91    If you wish to set the default namespace, use the empty string ''.
92    If you wish to autogenerate prefixes, set the auto_prefix option
93    to a true value.
94    EOWARN
95
96    if ($prefix eq 'xml' and $value ne $NS_XML) {
97        die "The xml prefix can only be bound to the $NS_XML namespace."
98    }
99    elsif ($value eq $NS_XML and $prefix ne 'xml') {
100        die "the $NS_XML namespace can only be bound to the xml prefix.";
101    }
102    elsif ($value eq $NS_XML and $prefix eq 'xml') {
103        return 1;
104    }
105    return 0 if index(lc($prefix), 'xml') == 0;
106
107    if (defined $prefix and $prefix eq '') {
108        $self->[NSMAP]->[-1]->[DEFAULT] = $value;
109    }
110    else {
111        die "Cannot undeclare prefix $prefix" if $value eq '';
112        if (not defined $prefix and $self->[AUTO_PREFIX]) {
113            while (1) {
114                $prefix = $self->[UNKNOWN_PREF]++;
115                last if not exists $self->[NSMAP]->[-1]->[PREFIX_MAP]->{$prefix};
116            }
117        }
118        elsif (not defined $prefix and not $self->[AUTO_PREFIX]) {
119            return 0;
120        }
121        $self->[NSMAP]->[-1]->[PREFIX_MAP]->{$prefix} = $value;
122    }
123    push @{$self->[NSMAP]->[-1]->[DECLARATIONS]}, $prefix;
124    return 1;
125}
126#-------------------------------------------------------------------#
127
128#-------------------------------------------------------------------#
129# declare_prefixes() - declare several prefixes in the current scope
130#-------------------------------------------------------------------#
131sub declare_prefixes {
132    my $self     = shift;
133    my %prefixes = @_;
134    while (my ($k,$v) = each %prefixes) {
135        $self->declare_prefix($k,$v);
136    }
137}
138#-------------------------------------------------------------------#
139
140#-------------------------------------------------------------------#
141# undeclare_prefix
142#-------------------------------------------------------------------#
143sub undeclare_prefix {
144    my $self   = shift;
145    my $prefix = shift;
146    return unless not defined $prefix or $prefix eq '';
147    return unless exists $self->[NSMAP]->[-1]->[PREFIX_MAP]->{$prefix};
148
149    my ( $tfix ) = grep { $_ eq $prefix } @{$self->[NSMAP]->[-1]->[DECLARATIONS]};
150    if ( not defined $tfix ) {
151        die "prefix $prefix not declared in this context\n";
152    }
153
154    @{$self->[NSMAP]->[-1]->[DECLARATIONS]} = grep { $_ ne $prefix } @{$self->[NSMAP]->[-1]->[DECLARATIONS]};
155    delete $self->[NSMAP]->[-1]->[PREFIX_MAP]->{$prefix};
156}
157#-------------------------------------------------------------------#
158
159#-------------------------------------------------------------------#
160# get_prefix() - get a (random) prefix for a given URI
161#-------------------------------------------------------------------#
162sub get_prefix {
163    my $self    = shift;
164    my $uri     = shift;
165
166    # we have to iterate over the whole hash here because if we don't
167    # the iterator isn't reset and the next pass will fail
168    my $pref;
169    while (my ($k, $v) = each %{$self->[NSMAP]->[-1]->[PREFIX_MAP]}) {
170        $pref = $k if $v eq $uri;
171    }
172    return $pref;
173}
174#-------------------------------------------------------------------#
175
176#-------------------------------------------------------------------#
177# get_prefixes() - get all the prefixes for a given URI
178#-------------------------------------------------------------------#
179sub get_prefixes {
180    my $self    = shift;
181    my $uri     = shift;
182
183    return keys %{$self->[NSMAP]->[-1]->[PREFIX_MAP]} unless defined $uri;
184    return grep { $self->[NSMAP]->[-1]->[PREFIX_MAP]->{$_} eq $uri } keys %{$self->[NSMAP]->[-1]->[PREFIX_MAP]};
185}
186#-------------------------------------------------------------------#
187
188#-------------------------------------------------------------------#
189# get_declared_prefixes() - get all prefixes declared in the last context
190#-------------------------------------------------------------------#
191sub get_declared_prefixes {
192    return @{$_[0]->[NSMAP]->[-1]->[DECLARATIONS]};
193}
194#-------------------------------------------------------------------#
195
196#-------------------------------------------------------------------#
197# get_uri() - get an URI given a prefix
198#-------------------------------------------------------------------#
199sub get_uri {
200    my $self    = shift;
201    my $prefix  = shift;
202
203    warn "Prefix must not be undef in get_uri(). The emtpy prefix must be ''" unless defined $prefix;
204
205    return $self->[NSMAP]->[-1]->[DEFAULT] if $prefix eq '';
206    return $self->[NSMAP]->[-1]->[PREFIX_MAP]->{$prefix} if exists $self->[NSMAP]->[-1]->[PREFIX_MAP]->{$prefix};
207    return undef;
208}
209#-------------------------------------------------------------------#
210
211#-------------------------------------------------------------------#
212# process_name() - provide details on a name
213#-------------------------------------------------------------------#
214sub process_name {
215    my $self    = shift;
216    my $qname   = shift;
217    my $aflag   = shift;
218
219    if ($self->[FATALS]) {
220        return( ($self->_get_ns_details($qname, $aflag))[0,2], $qname );
221    }
222    else {
223        eval { return( ($self->_get_ns_details($qname, $aflag))[0,2], $qname ); }
224    }
225}
226#-------------------------------------------------------------------#
227
228#-------------------------------------------------------------------#
229# process_element_name() - provide details on a element's name
230#-------------------------------------------------------------------#
231sub process_element_name {
232    my $self    = shift;
233    my $qname   = shift;
234
235    if ($self->[FATALS]) {
236        return $self->_get_ns_details($qname, 0);
237    }
238    else {
239        eval { return $self->_get_ns_details($qname, 0); }
240    }
241}
242#-------------------------------------------------------------------#
243
244
245#-------------------------------------------------------------------#
246# process_attribute_name() - provide details on a attribute's name
247#-------------------------------------------------------------------#
248sub process_attribute_name {
249    my $self    = shift;
250    my $qname   = shift;
251
252    if ($self->[FATALS]) {
253        return $self->_get_ns_details($qname, 1);
254    }
255    else {
256        eval { return $self->_get_ns_details($qname, 1); }
257    }
258}
259#-------------------------------------------------------------------#
260
261
262#-------------------------------------------------------------------#
263# ($ns, $prefix, $lname) = $self->_get_ns_details($qname, $f_attr)
264# returns ns, prefix, and lname for a given attribute name
265# >> the $f_attr flag, if set to one, will work for an attribute
266#-------------------------------------------------------------------#
267sub _get_ns_details {
268    my $self    = shift;
269    my $qname   = shift;
270    my $aflag   = shift;
271
272    my ($ns, $prefix, $lname);
273    (my ($tmp_prefix, $tmp_lname) = split /:/, $qname, 3)
274                                    < 3 or die "Invalid QName: $qname";
275
276    # no prefix
277    my $cur_map = $self->[NSMAP]->[-1];
278    if (not defined($tmp_lname)) {
279        $prefix = undef;
280        $lname = $qname;
281        # attr don't have a default namespace
282        $ns = ($aflag) ? undef : $cur_map->[DEFAULT];
283    }
284
285    # prefix
286    else {
287        if (exists $cur_map->[PREFIX_MAP]->{$tmp_prefix}) {
288            $prefix = $tmp_prefix;
289            $lname  = $tmp_lname;
290            $ns     = $cur_map->[PREFIX_MAP]->{$prefix}
291        }
292        else { # no ns -> lname == name, all rest undef
293            die "Undeclared prefix: $tmp_prefix";
294        }
295    }
296
297    return ($ns, $prefix, $lname);
298}
299#-------------------------------------------------------------------#
300
301#-------------------------------------------------------------------#
302# parse_jclark_notation() - parse the Clarkian notation
303#-------------------------------------------------------------------#
304sub parse_jclark_notation {
305    shift;
306    my $jc = shift;
307    $jc =~ m/^\{(.*)\}([^}]+)$/;
308    return $1, $2;
309}
310#-------------------------------------------------------------------#
311
312
313#-------------------------------------------------------------------#
314# Java names mapping
315#-------------------------------------------------------------------#
316*XML::NamespaceSupport::pushContext          = \&push_context;
317*XML::NamespaceSupport::popContext           = \&pop_context;
318*XML::NamespaceSupport::declarePrefix        = \&declare_prefix;
319*XML::NamespaceSupport::declarePrefixes      = \&declare_prefixes;
320*XML::NamespaceSupport::getPrefix            = \&get_prefix;
321*XML::NamespaceSupport::getPrefixes          = \&get_prefixes;
322*XML::NamespaceSupport::getDeclaredPrefixes  = \&get_declared_prefixes;
323*XML::NamespaceSupport::getURI               = \&get_uri;
324*XML::NamespaceSupport::processName          = \&process_name;
325*XML::NamespaceSupport::processElementName   = \&process_element_name;
326*XML::NamespaceSupport::processAttributeName = \&process_attribute_name;
327*XML::NamespaceSupport::parseJClarkNotation  = \&parse_jclark_notation;
328*XML::NamespaceSupport::undeclarePrefix      = \&undeclare_prefix;
329#-------------------------------------------------------------------#
330
331
3321;
333#,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,#
334#`,`, Documentation `,`,`,`,`,`,`,`,`,`,`,`,`,`,`,`,`,`,`,`,`,`,`,`,#
335#```````````````````````````````````````````````````````````````````#
336
337=pod
338
339=head1 NAME
340
341XML::NamespaceSupport - a simple generic namespace support class
342
343=head1 SYNOPSIS
344
345  use XML::NamespaceSupport;
346  my $nsup = XML::NamespaceSupport->new;
347
348  # add a new empty context
349  $nsup->push_context;
350  # declare a few prefixes
351  $nsup->declare_prefix($prefix1, $uri1);
352  $nsup->declare_prefix($prefix2, $uri2);
353  # the same shorter
354  $nsup->declare_prefixes($prefix1 => $uri1, $prefix2 => $uri2);
355
356  # get a single prefix for a URI (randomly)
357  $prefix = $nsup->get_prefix($uri);
358  # get all prefixes for a URI (probably better)
359  @prefixes = $nsup->get_prefixes($uri);
360  # get all prefixes in scope
361  @prefixes = $nsup->get_prefixes();
362  # get all prefixes that were declared for the current scope
363  @prefixes = $nsup->get_declared_prefixes;
364  # get a URI for a given prefix
365  $uri = $nsup->get_uri($prefix);
366
367  # get info on a qname (java-ish way, it's a bit weird)
368  ($ns_uri, $local_name, $qname) = $nsup->process_name($qname, $is_attr);
369  # the same, more perlish
370  ($ns_uri, $prefix, $local_name) = $nsup->process_element_name($qname);
371  ($ns_uri, $prefix, $local_name) = $nsup->process_attribute_name($qname);
372
373  # remove the current context
374  $nsup->pop_context;
375
376  # reset the object for reuse in another document
377  $nsup->reset;
378
379  # a simple helper to process Clarkian Notation
380  my ($ns, $lname) = $nsup->parse_jclark_notation('{http://foo}bar');
381  # or (given that it doesn't care about the object
382  my ($ns, $lname) = XML::NamespaceSupport->parse_jclark_notation('{http://foo}bar');
383
384
385=head1 DESCRIPTION
386
387This module offers a simple to process namespaced XML names (unames)
388from within any application that may need them. It also helps maintain
389a prefix to namespace URI map, and provides a number of basic checks.
390
391The model for this module is SAX2's NamespaceSupport class, readable at
392http://www.megginson.com/SAX/Java/javadoc/org/xml/sax/helpers/NamespaceSupport.html.
393It adds a few perlisations where we thought it appropriate.
394
395=head1 METHODS
396
397=over 4
398
399=item * XML::NamespaceSupport->new(\%options)
400
401A simple constructor.
402
403The options are C<xmlns>, C<fatal_errors>, and C<auto_prefix>
404
405If C<xmlns> is turned on (it is off by default) the mapping from the
406xmlns prefix to the URI defined for it in DOM level 2 is added to the
407list of predefined mappings (which normally only contains the xml
408prefix mapping).
409
410If C<fatal_errors> is turned off (it is on by default) a number of
411validity errors will simply be flagged as failures, instead of
412die()ing.
413
414If C<auto_prefix> is turned on (it is off by default) when one
415provides a prefix of C<undef> to C<declare_prefix> it will generate a
416random prefix mapped to that namespace. Otherwise an undef prefix will
417trigger a warning (you should probably know what you're doing if you
418turn this option on).
419
420=item * $nsup->push_context
421
422Adds a new empty context to the stack. You can then populate it with
423new prefixes defined at this level.
424
425=item * $nsup->pop_context
426
427Removes the topmost context in the stack and reverts to the previous
428one. It will die() if you try to pop more than you have pushed.
429
430=item * $nsup->declare_prefix($prefix, $uri)
431
432Declares a mapping of $prefix to $uri, at the current level.
433
434Note that with C<auto_prefix> turned on, if you declare a prefix
435mapping in which $prefix is undef(), you will get an automatic prefix
436selected for you. If it is off you will get a warning.
437
438This is useful when you deal with code that hasn't kept prefixes around
439and need to reserialize the nodes. It also means that if you want to
440set the default namespace (ie with an empty prefix) you must use the
441empty string instead of undef. This behaviour is consistent with the
442SAX 2.0 specification.
443
444=item * $nsup->declare_prefixes(%prefixes2uris)
445
446Declares a mapping of several prefixes to URIs, at the current level.
447
448=item * $nsup->get_prefix($uri)
449
450Returns a prefix given an URI. Note that as several prefixes may be
451mapped to the same URI, it returns an arbitrary one. It'll return
452undef on failure.
453
454=item * $nsup->get_prefixes($uri)
455
456Returns an array of prefixes given an URI. It'll return all the
457prefixes if the uri is undef.
458
459=item * $nsup->get_declared_prefixes
460
461Returns an array of all the prefixes that have been declared within
462this context, ie those that were declared on the last element, not
463those that were declared above and are simply in scope.
464
465=item * $nsup->get_uri($prefix)
466
467Returns a URI for a given prefix. Returns undef on failure.
468
469=item * $nsup->process_name($qname, $is_attr)
470
471Given a qualified name and a boolean indicating whether this is an
472attribute or another type of name (those are differently affected by
473default namespaces), it returns a namespace URI, local name, qualified
474name tuple. I know that that is a rather abnormal list to return, but
475it is so for compatibility with the Java spec. See below for more
476Perlish alternatives.
477
478If the prefix is not declared, or if the name is not valid, it'll
479either die or return undef depending on the current setting of
480C<fatal_errors>.
481
482=item * $nsup->undeclare_prefix($prefix);
483
484Removes a namespace prefix from the current context. This function may
485be used in SAX's end_prefix_mapping when there is fear that a namespace
486declaration might be available outside their scope (which shouldn't
487normally happen, but you never know ;). This may be needed in order to
488properly support Namespace 1.1.
489
490=item * $nsup->process_element_name($qname)
491
492Given a qualified name, it returns a namespace URI, prefix, and local
493name tuple. This method applies to element names.
494
495If the prefix is not declared, or if the name is not valid, it'll
496either die or return undef depending on the current setting of
497C<fatal_errors>.
498
499=item * $nsup->process_attribute_name($qname)
500
501Given a qualified name, it returns a namespace URI, prefix, and local
502name tuple. This method applies to attribute names.
503
504If the prefix is not declared, or if the name is not valid, it'll
505either die or return undef depending on the current setting of
506C<fatal_errors>.
507
508=item * $nsup->reset
509
510Resets the object so that it can be reused on another document.
511
512=back
513
514All methods of the interface have an alias that is the name used in
515the original Java specification. You can use either name
516interchangeably. Here is the mapping:
517
518  Java name                 Perl name
519  ---------------------------------------------------
520  pushContext               push_context
521  popContext                pop_context
522  declarePrefix             declare_prefix
523  declarePrefixes           declare_prefixes
524  getPrefix                 get_prefix
525  getPrefixes               get_prefixes
526  getDeclaredPrefixes       get_declared_prefixes
527  getURI                    get_uri
528  processName               process_name
529  processElementName        process_element_name
530  processAttributeName      process_attribute_name
531  parseJClarkNotation       parse_jclark_notation
532  undeclarePrefix           undeclare_prefix
533
534=head1 VARIABLES
535
536Two global variables are made available to you. They used to be constants but
537simple scalars are easier to use in a number of contexts. They are not
538exported but can easily be accessed from any package, or copied into it.
539
540=over 4
541
542=item * C<$NS_XMLNS>
543
544The namespace for xmlns prefixes, http://www.w3.org/2000/xmlns/.
545
546=item * C<$NS_XML>
547
548The namespace for xml prefixes, http://www.w3.org/XML/1998/namespace.
549
550=back
551
552=head1 TODO
553
554 - add more tests
555 - optimise here and there
556
557=head1 AUTHOR
558
559Robin Berjon, robin@knowscape.com, with lots of it having been done
560by Duncan Cameron, and a number of suggestions from the perl-xml
561list.
562
563=head1 COPYRIGHT
564
565Copyright (c) 2001-2002 Robin Berjon. All rights reserved. This program is
566free software; you can redistribute it and/or modify it under the same terms
567as Perl itself.
568
569=head1 SEE ALSO
570
571XML::Parser::PerlSAX
572
573=cut
574
575