PullParser.pm revision 1.2
1require 5;
2package Pod::Simple::PullParser;
3$VERSION = '3.32';
4use Pod::Simple ();
5BEGIN {@ISA = ('Pod::Simple')}
6
7use strict;
8use Carp ();
9
10use Pod::Simple::PullParserStartToken;
11use Pod::Simple::PullParserEndToken;
12use Pod::Simple::PullParserTextToken;
13
14BEGIN { *DEBUG = \&Pod::Simple::DEBUG unless defined &DEBUG }
15
16__PACKAGE__->_accessorize(
17  'source_fh',         # the filehandle we're reading from
18  'source_scalar_ref', # the scalarref we're reading from
19  'source_arrayref',   # the arrayref we're reading from
20);
21
22#@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@
23#
24#  And here is how we implement a pull-parser on top of a push-parser...
25
26sub filter {
27  my($self, $source) = @_;
28  $self = $self->new unless ref $self;
29
30  $source = *STDIN{IO} unless defined $source;
31  $self->set_source($source);
32  $self->output_fh(*STDOUT{IO});
33
34  $self->run; # define run() in a subclass if you want to use filter()!
35  return $self;
36}
37
38# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
39
40sub parse_string_document {
41  my $this = shift;
42  $this->set_source(\ $_[0]);
43  $this->run;
44}
45
46sub parse_file {
47  my($this, $filename) = @_;
48  $this->set_source($filename);
49  $this->run;
50}
51
52# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
53#  In case anyone tries to use them:
54
55sub run {
56  use Carp ();
57  if( __PACKAGE__ eq ref($_[0]) || $_[0]) { # I'm not being subclassed!
58    Carp::croak "You can call run() only on subclasses of "
59     . __PACKAGE__;
60  } else {
61    Carp::croak join '',
62      "You can't call run() because ",
63      ref($_[0]) || $_[0], " didn't define a run() method";
64  }
65}
66
67sub parse_lines {
68  use Carp ();
69  Carp::croak "Use set_source with ", __PACKAGE__,
70    " and subclasses, not parse_lines";
71}
72
73sub parse_line {
74  use Carp ();
75  Carp::croak "Use set_source with ", __PACKAGE__,
76    " and subclasses, not parse_line";
77}
78
79#~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
80
81sub new {
82  my $class = shift;
83  my $self = $class->SUPER::new(@_);
84  die "Couldn't construct for $class" unless $self;
85
86  $self->{'token_buffer'} ||= [];
87  $self->{'start_token_class'} ||= 'Pod::Simple::PullParserStartToken';
88  $self->{'text_token_class'}  ||= 'Pod::Simple::PullParserTextToken';
89  $self->{'end_token_class'}   ||= 'Pod::Simple::PullParserEndToken';
90
91  DEBUG > 1 and print STDERR "New pullparser object: $self\n";
92
93  return $self;
94}
95
96# ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~
97
98sub get_token {
99  my $self = shift;
100  DEBUG > 1 and print STDERR "\nget_token starting up on $self.\n";
101  DEBUG > 2 and print STDERR " Items in token-buffer (",
102   scalar( @{ $self->{'token_buffer'} } ) ,
103   ") :\n", map(
104     "    " . $_->dump . "\n", @{ $self->{'token_buffer'} }
105   ),
106   @{ $self->{'token_buffer'} } ? '' : '       (no tokens)',
107   "\n"
108  ;
109
110  until( @{ $self->{'token_buffer'} } ) {
111    DEBUG > 3 and print STDERR "I need to get something into my empty token buffer...\n";
112    if($self->{'source_dead'}) {
113      DEBUG and print STDERR "$self 's source is dead.\n";
114      push @{ $self->{'token_buffer'} }, undef;
115    } elsif(exists $self->{'source_fh'}) {
116      my @lines;
117      my $fh = $self->{'source_fh'}
118       || Carp::croak('You have to call set_source before you can call get_token');
119
120      DEBUG and print STDERR "$self 's source is filehandle $fh.\n";
121      # Read those many lines at a time
122      for(my $i = Pod::Simple::MANY_LINES; $i--;) {
123        DEBUG > 3 and print STDERR " Fetching a line from source filehandle $fh...\n";
124        local $/ = $Pod::Simple::NL;
125        push @lines, scalar(<$fh>); # readline
126        DEBUG > 3 and print STDERR "  Line is: ",
127          defined($lines[-1]) ? $lines[-1] : "<undef>\n";
128        unless( defined $lines[-1] ) {
129          DEBUG and print STDERR "That's it for that source fh!  Killing.\n";
130          delete $self->{'source_fh'}; # so it can be GC'd
131          last;
132        }
133         # but pass thru the undef, which will set source_dead to true
134
135        # TODO: look to see if $lines[-1] is =encoding, and if so,
136        # do horribly magic things
137
138      }
139
140      if(DEBUG > 8) {
141        print STDERR "* I've gotten ", scalar(@lines), " lines:\n";
142        foreach my $l (@lines) {
143          if(defined $l) {
144            print STDERR "  line {$l}\n";
145          } else {
146            print STDERR "  line undef\n";
147          }
148        }
149        print STDERR "* end of ", scalar(@lines), " lines\n";
150      }
151
152      $self->SUPER::parse_lines(@lines);
153
154    } elsif(exists $self->{'source_arrayref'}) {
155      DEBUG and print STDERR "$self 's source is arrayref $self->{'source_arrayref'}, with ",
156       scalar(@{$self->{'source_arrayref'}}), " items left in it.\n";
157
158      DEBUG > 3 and print STDERR "  Fetching ", Pod::Simple::MANY_LINES, " lines.\n";
159      $self->SUPER::parse_lines(
160        splice @{ $self->{'source_arrayref'} },
161        0,
162        Pod::Simple::MANY_LINES
163      );
164      unless( @{ $self->{'source_arrayref'} } ) {
165        DEBUG and print STDERR "That's it for that source arrayref!  Killing.\n";
166        $self->SUPER::parse_lines(undef);
167        delete $self->{'source_arrayref'}; # so it can be GC'd
168      }
169       # to make sure that an undef is always sent to signal end-of-stream
170
171    } elsif(exists $self->{'source_scalar_ref'}) {
172
173      DEBUG and print STDERR "$self 's source is scalarref $self->{'source_scalar_ref'}, with ",
174        length(${ $self->{'source_scalar_ref'} }) -
175        (pos(${ $self->{'source_scalar_ref'} }) || 0),
176        " characters left to parse.\n";
177
178      DEBUG > 3 and print STDERR " Fetching a line from source-string...\n";
179      if( ${ $self->{'source_scalar_ref'} } =~
180        m/([^\n\r]*)((?:\r?\n)?)/g
181      ) {
182        #print(">> $1\n"),
183        $self->SUPER::parse_lines($1)
184         if length($1) or length($2)
185          or pos(     ${ $self->{'source_scalar_ref'} })
186           != length( ${ $self->{'source_scalar_ref'} });
187         # I.e., unless it's a zero-length "empty line" at the very
188         #  end of "foo\nbar\n" (i.e., between the \n and the EOS).
189      } else { # that's the end.  Byebye
190        $self->SUPER::parse_lines(undef);
191        delete $self->{'source_scalar_ref'};
192        DEBUG and print STDERR "That's it for that source scalarref!  Killing.\n";
193      }
194
195
196    } else {
197      die "What source??";
198    }
199  }
200  DEBUG and print STDERR "get_token about to return ",
201   Pod::Simple::pretty( @{$self->{'token_buffer'}}
202     ? $self->{'token_buffer'}[-1] : undef
203   ), "\n";
204  return shift @{$self->{'token_buffer'}}; # that's an undef if empty
205}
206
207sub unget_token {
208  my $self = shift;
209  DEBUG and print STDERR "Ungetting ", scalar(@_), " tokens: ",
210   @_ ? "@_\n" : "().\n";
211  foreach my $t (@_) {
212    Carp::croak "Can't unget that, because it's not a token -- it's undef!"
213     unless defined $t;
214    Carp::croak "Can't unget $t, because it's not a token -- it's a string!"
215     unless ref $t;
216    Carp::croak "Can't unget $t, because it's not a token object!"
217     unless UNIVERSAL::can($t, 'type');
218  }
219
220  unshift @{$self->{'token_buffer'}}, @_;
221  DEBUG > 1 and print STDERR "Token buffer now has ",
222   scalar(@{$self->{'token_buffer'}}), " items in it.\n";
223  return;
224}
225
226#@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@
227
228# $self->{'source_filename'} = $source;
229
230sub set_source {
231  my $self = shift @_;
232  return $self->{'source_fh'} unless @_;
233  Carp::croak("Cannot assign new source to pull parser; create a new instance, instead")
234      if $self->{'source_fh'} || $self->{'source_scalar_ref'} || $self->{'source_arrayref'};
235  my $handle;
236  if(!defined $_[0]) {
237    Carp::croak("Can't use empty-string as a source for set_source");
238  } elsif(ref(\( $_[0] )) eq 'GLOB') {
239    $self->{'source_filename'} = '' . ($handle = $_[0]);
240    DEBUG and print STDERR "$self 's source is glob $_[0]\n";
241    # and fall thru
242  } elsif(ref( $_[0] ) eq 'SCALAR') {
243    $self->{'source_scalar_ref'} = $_[0];
244    DEBUG and print STDERR "$self 's source is scalar ref $_[0]\n";
245    return;
246  } elsif(ref( $_[0] ) eq 'ARRAY') {
247    $self->{'source_arrayref'} = $_[0];
248    DEBUG and print STDERR "$self 's source is array ref $_[0]\n";
249    return;
250  } elsif(ref $_[0]) {
251    $self->{'source_filename'} = '' . ($handle = $_[0]);
252    DEBUG and print STDERR "$self 's source is fh-obj $_[0]\n";
253  } elsif(!length $_[0]) {
254    Carp::croak("Can't use empty-string as a source for set_source");
255  } else {  # It's a filename!
256    DEBUG and print STDERR "$self 's source is filename $_[0]\n";
257    {
258      local *PODSOURCE;
259      open(PODSOURCE, "<$_[0]") || Carp::croak "Can't open $_[0]: $!";
260      $handle = *PODSOURCE{IO};
261    }
262    $self->{'source_filename'} = $_[0];
263    DEBUG and print STDERR "  Its name is $_[0].\n";
264
265    # TODO: file-discipline things here!
266  }
267
268  $self->{'source_fh'} = $handle;
269  DEBUG and print STDERR "  Its handle is $handle\n";
270  return 1;
271}
272
273# ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~
274
275sub get_title_short {  shift->get_short_title(@_)  } # alias
276
277sub get_short_title {
278  my $title = shift->get_title(@_);
279  $title = $1 if $title =~ m/^(\S{1,60})\s+--?\s+./s;
280    # turn "Foo::Bar -- bars for your foo" into "Foo::Bar"
281  return $title;
282}
283
284sub get_title       { shift->_get_titled_section(
285  'NAME', max_token => 50, desperate => 1, @_)
286}
287sub get_version     { shift->_get_titled_section(
288   'VERSION',
289    max_token => 400,
290    accept_verbatim => 1,
291    max_content_length => 3_000,
292   @_,
293  );
294}
295sub get_description { shift->_get_titled_section(
296   'DESCRIPTION',
297    max_token => 400,
298    max_content_length => 3_000,
299   @_,
300) }
301
302sub get_authors     { shift->get_author(@_) }  # a harmless alias
303
304sub get_author      {
305  my $this = shift;
306  # Max_token is so high because these are
307  #  typically at the end of the document:
308  $this->_get_titled_section('AUTHOR' , max_token => 10_000, @_) ||
309  $this->_get_titled_section('AUTHORS', max_token => 10_000, @_);
310}
311
312#--------------------------------------------------------------------------
313
314sub _get_titled_section {
315  # Based on a get_title originally contributed by Graham Barr
316  my($self, $titlename, %options) = (@_);
317
318  my $max_token            = delete $options{'max_token'};
319  my $desperate_for_title  = delete $options{'desperate'};
320  my $accept_verbatim      = delete $options{'accept_verbatim'};
321  my $max_content_length   = delete $options{'max_content_length'};
322  my $nocase               = delete $options{'nocase'};
323  $max_content_length = 120 unless defined $max_content_length;
324
325  Carp::croak( "Unknown " . ((1 == keys %options) ? "option: " : "options: ")
326    . join " ", map "[$_]", sort keys %options
327  )
328   if keys %options;
329
330  my %content_containers;
331  $content_containers{'Para'} = 1;
332  if($accept_verbatim) {
333    $content_containers{'Verbatim'} = 1;
334    $content_containers{'VerbatimFormatted'} = 1;
335  }
336
337  my $token_count = 0;
338  my $title;
339  my @to_unget;
340  my $state = 0;
341  my $depth = 0;
342
343  Carp::croak "What kind of titlename is \"$titlename\"?!" unless
344   defined $titlename and $titlename =~ m/^[A-Z ]{1,60}$/s; #sanity
345  my $titlename_re = quotemeta($titlename);
346
347  my $head1_text_content;
348  my $para_text_content;
349  my $skipX;
350
351  while(
352    ++$token_count <= ($max_token || 1_000_000)
353    and defined(my $token = $self->get_token)
354  ) {
355    push @to_unget, $token;
356
357    if ($state == 0) { # seeking =head1
358      if( $token->is_start and $token->tagname eq 'head1' ) {
359        DEBUG and print STDERR "  Found head1.  Seeking content...\n";
360        ++$state;
361        $head1_text_content = '';
362      }
363    }
364
365    elsif($state == 1) { # accumulating text until end of head1
366      if( $token->is_text ) {
367          unless ($skipX) {
368            DEBUG and print STDERR "   Adding \"", $token->text, "\" to head1-content.\n";
369            $head1_text_content .= $token->text;
370          }
371      } elsif( $token->is_tagname('X') ) {
372          # We're going to want to ignore X<> stuff.
373          $skipX = $token->is_start;
374          DEBUG and print STDERR +($skipX ? 'Start' : 'End'), 'ing ignoring of X<> tag';
375      } elsif( $token->is_end and $token->tagname eq 'head1' ) {
376        DEBUG and print STDERR "  Found end of head1.  Considering content...\n";
377        $head1_text_content = uc $head1_text_content if $nocase;
378        if($head1_text_content eq $titlename
379          or $head1_text_content =~ m/\($titlename_re\)/s
380          # We accept "=head1 Nomen Modularis (NAME)" for sake of i18n
381        ) {
382          DEBUG and print STDERR "  Yup, it was $titlename.  Seeking next para-content...\n";
383          ++$state;
384        } elsif(
385          $desperate_for_title
386           # if we're so desperate we'll take the first
387           #  =head1's content as a title
388          and $head1_text_content =~ m/\S/
389          and $head1_text_content !~ m/^[ A-Z]+$/s
390          and $head1_text_content !~
391            m/\((?:
392             NAME | TITLE | VERSION | AUTHORS? | DESCRIPTION | SYNOPSIS
393             | COPYRIGHT | LICENSE | NOTES? | FUNCTIONS? | METHODS?
394             | CAVEATS? | BUGS? | SEE\ ALSO | SWITCHES | ENVIRONMENT
395            )\)/sx
396            # avoid accepting things like =head1 Thingy Thongy (DESCRIPTION)
397          and ($max_content_length
398            ? (length($head1_text_content) <= $max_content_length) # sanity
399            : 1)
400        ) {
401          # Looks good; trim it
402          ($title = $head1_text_content) =~ s/\s+$//;
403          DEBUG and print STDERR "  It looks titular: \"$title\".\n\n  Using that.\n";
404          last;
405        } else {
406          --$state;
407          DEBUG and print STDERR "  Didn't look titular ($head1_text_content).\n",
408            "\n  Dropping back to seeking-head1-content mode...\n";
409        }
410      }
411    }
412
413    elsif($state == 2) {
414      # seeking start of para (which must immediately follow)
415      if($token->is_start and $content_containers{ $token->tagname }) {
416        DEBUG and print STDERR "  Found start of Para.  Accumulating content...\n";
417        $para_text_content = '';
418        ++$state;
419      } else {
420        DEBUG and print
421         "  Didn't see an immediately subsequent start-Para.  Reseeking H1\n";
422        $state = 0;
423      }
424    }
425
426    elsif($state == 3) {
427      # accumulating text until end of Para
428      if( $token->is_text ) {
429        DEBUG and print STDERR "   Adding \"", $token->text, "\" to para-content.\n";
430        $para_text_content .= $token->text;
431        # and keep looking
432
433      } elsif( $token->is_end and $content_containers{ $token->tagname } ) {
434        DEBUG and print STDERR "  Found end of Para.  Considering content: ",
435          $para_text_content, "\n";
436
437        if( $para_text_content =~ m/\S/
438          and ($max_content_length
439           ? (length($para_text_content) <= $max_content_length)
440           : 1)
441        ) {
442          # Some minimal sanity constraints, I think.
443          DEBUG and print STDERR "  It looks contentworthy, I guess.  Using it.\n";
444          $title = $para_text_content;
445          last;
446        } else {
447          DEBUG and print STDERR "  Doesn't look at all contentworthy!\n  Giving up.\n";
448          undef $title;
449          last;
450        }
451      }
452    }
453
454    else {
455      die "IMPOSSIBLE STATE $state!\n";  # should never happen
456    }
457
458  }
459
460  # Put it all back!
461  $self->unget_token(@to_unget);
462
463  if(DEBUG) {
464    if(defined $title) { print STDERR "  Returning title <$title>\n" }
465    else { print STDERR "Returning title <>\n" }
466  }
467
468  return '' unless defined $title;
469  $title =~ s/^\s+//;
470  return $title;
471}
472
473#@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@
474#
475#  Methods that actually do work at parse-time:
476
477sub _handle_element_start {
478  my $self = shift;   # leaving ($element_name, $attr_hash_r)
479  DEBUG > 2 and print STDERR "++ $_[0] (", map("<$_> ", %{$_[1]}), ")\n";
480
481  push @{ $self->{'token_buffer'} },
482       $self->{'start_token_class'}->new(@_);
483  return;
484}
485
486sub _handle_text {
487  my $self = shift;   # leaving ($text)
488  DEBUG > 2 and print STDERR "== $_[0]\n";
489  push @{ $self->{'token_buffer'} },
490       $self->{'text_token_class'}->new(@_);
491  return;
492}
493
494sub _handle_element_end {
495  my $self = shift;   # leaving ($element_name);
496  DEBUG > 2 and print STDERR "-- $_[0]\n";
497  push @{ $self->{'token_buffer'} },
498       $self->{'end_token_class'}->new(@_);
499  return;
500}
501
502#@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@
503
5041;
505
506
507__END__
508
509=head1 NAME
510
511Pod::Simple::PullParser -- a pull-parser interface to parsing Pod
512
513=head1 SYNOPSIS
514
515 my $parser = SomePodProcessor->new;
516 $parser->set_source( "whatever.pod" );
517 $parser->run;
518
519Or:
520
521 my $parser = SomePodProcessor->new;
522 $parser->set_source( $some_filehandle_object );
523 $parser->run;
524
525Or:
526
527 my $parser = SomePodProcessor->new;
528 $parser->set_source( \$document_source );
529 $parser->run;
530
531Or:
532
533 my $parser = SomePodProcessor->new;
534 $parser->set_source( \@document_lines );
535 $parser->run;
536
537And elsewhere:
538
539 require 5;
540 package SomePodProcessor;
541 use strict;
542 use base qw(Pod::Simple::PullParser);
543
544 sub run {
545   my $self = shift;
546  Token:
547   while(my $token = $self->get_token) {
548     ...process each token...
549   }
550 }
551
552=head1 DESCRIPTION
553
554This class is for using Pod::Simple to build a Pod processor -- but
555one that uses an interface based on a stream of token objects,
556instead of based on events.
557
558This is a subclass of L<Pod::Simple> and inherits all its methods.
559
560A subclass of Pod::Simple::PullParser should define a C<run> method
561that calls C<< $token = $parser->get_token >> to pull tokens.
562
563See the source for Pod::Simple::RTF for an example of a formatter
564that uses Pod::Simple::PullParser.
565
566=head1 METHODS
567
568=over
569
570=item my $token = $parser->get_token
571
572This returns the next token object (which will be of a subclass of
573L<Pod::Simple::PullParserToken>), or undef if the parser-stream has hit
574the end of the document.
575
576=item $parser->unget_token( $token )
577
578=item $parser->unget_token( $token1, $token2, ... )
579
580This restores the token object(s) to the front of the parser stream.
581
582=back
583
584The source has to be set before you can parse anything.  The lowest-level
585way is to call C<set_source>:
586
587=over
588
589=item $parser->set_source( $filename )
590
591=item $parser->set_source( $filehandle_object )
592
593=item $parser->set_source( \$document_source )
594
595=item $parser->set_source( \@document_lines )
596
597=back
598
599Or you can call these methods, which Pod::Simple::PullParser has defined
600to work just like Pod::Simple's same-named methods:
601
602=over
603
604=item $parser->parse_file(...)
605
606=item $parser->parse_string_document(...)
607
608=item $parser->filter(...)
609
610=item $parser->parse_from_file(...)
611
612=back
613
614For those to work, the Pod-processing subclass of
615Pod::Simple::PullParser has to have defined a $parser->run method --
616so it is advised that all Pod::Simple::PullParser subclasses do so.
617See the Synopsis above, or the source for Pod::Simple::RTF.
618
619Authors of formatter subclasses might find these methods useful to
620call on a parser object that you haven't started pulling tokens
621from yet:
622
623=over
624
625=item my $title_string = $parser->get_title
626
627This tries to get the title string out of $parser, by getting some tokens,
628and scanning them for the title, and then ungetting them so that you can
629process the token-stream from the beginning.
630
631For example, suppose you have a document that starts out:
632
633  =head1 NAME
634
635  Hoo::Boy::Wowza -- Stuff B<wow> yeah!
636
637$parser->get_title on that document will return "Hoo::Boy::Wowza --
638Stuff wow yeah!". If the document starts with:
639
640  =head1 Name
641
642  Hoo::Boy::W00t -- Stuff B<w00t> yeah!
643
644Then you'll need to pass the C<nocase> option in order to recognize "Name":
645
646  $parser->get_title(nocase => 1);
647
648In cases where get_title can't find the title, it will return empty-string
649("").
650
651=item my $title_string = $parser->get_short_title
652
653This is just like get_title, except that it returns just the modulename, if
654the title seems to be of the form "SomeModuleName -- description".
655
656For example, suppose you have a document that starts out:
657
658  =head1 NAME
659
660  Hoo::Boy::Wowza -- Stuff B<wow> yeah!
661
662then $parser->get_short_title on that document will return
663"Hoo::Boy::Wowza".
664
665But if the document starts out:
666
667  =head1 NAME
668
669  Hooboy, stuff B<wow> yeah!
670
671then $parser->get_short_title on that document will return "Hooboy,
672stuff wow yeah!". If the document starts with:
673
674  =head1 Name
675
676  Hoo::Boy::W00t -- Stuff B<w00t> yeah!
677
678Then you'll need to pass the C<nocase> option in order to recognize "Name":
679
680  $parser->get_short_title(nocase => 1);
681
682If the title can't be found, then get_short_title returns empty-string
683("").
684
685=item $author_name   = $parser->get_author
686
687This works like get_title except that it returns the contents of the
688"=head1 AUTHOR\n\nParagraph...\n" section, assuming that that section
689isn't terribly long. To recognize a "=head1 Author\n\nParagraph\n"
690section, pass the C<nocase> option:
691
692  $parser->get_author(nocase => 1);
693
694(This method tolerates "AUTHORS" instead of "AUTHOR" too.)
695
696=item $description_name = $parser->get_description
697
698This works like get_title except that it returns the contents of the
699"=head1 DESCRIPTION\n\nParagraph...\n" section, assuming that that section
700isn't terribly long. To recognize a "=head1 Description\n\nParagraph\n"
701section, pass the C<nocase> option:
702
703  $parser->get_description(nocase => 1);
704
705=item $version_block = $parser->get_version
706
707This works like get_title except that it returns the contents of
708the "=head1 VERSION\n\n[BIG BLOCK]\n" block.  Note that this does NOT
709return the module's C<$VERSION>!! To recognize a
710"=head1 Version\n\n[BIG BLOCK]\n" section, pass the C<nocase> option:
711
712  $parser->get_version(nocase => 1);
713
714=back
715
716=head1 NOTE
717
718You don't actually I<have> to define a C<run> method.  If you're
719writing a Pod-formatter class, you should define a C<run> just so
720that users can call C<parse_file> etc, but you don't I<have> to.
721
722And if you're not writing a formatter class, but are instead just
723writing a program that does something simple with a Pod::PullParser
724object (and not an object of a subclass), then there's no reason to
725bother subclassing to add a C<run> method.
726
727=head1 SEE ALSO
728
729L<Pod::Simple>
730
731L<Pod::Simple::PullParserToken> -- and its subclasses
732L<Pod::Simple::PullParserStartToken>,
733L<Pod::Simple::PullParserTextToken>, and
734L<Pod::Simple::PullParserEndToken>.
735
736L<HTML::TokeParser>, which inspired this.
737
738=head1 SUPPORT
739
740Questions or discussion about POD and Pod::Simple should be sent to the
741pod-people@perl.org mail list. Send an empty email to
742pod-people-subscribe@perl.org to subscribe.
743
744This module is managed in an open GitHub repository,
745L<https://github.com/perl-pod/pod-simple/>. Feel free to fork and contribute, or
746to clone L<git://github.com/perl-pod/pod-simple.git> and send patches!
747
748Patches against Pod::Simple are welcome. Please send bug reports to
749<bug-pod-simple@rt.cpan.org>.
750
751=head1 COPYRIGHT AND DISCLAIMERS
752
753Copyright (c) 2002 Sean M. Burke.
754
755This library is free software; you can redistribute it and/or modify it
756under the same terms as Perl itself.
757
758This program is distributed in the hope that it will be useful, but
759without any warranty; without even the implied warranty of
760merchantability or fitness for a particular purpose.
761
762=head1 AUTHOR
763
764Pod::Simple was created by Sean M. Burke <sburke@cpan.org>.
765But don't bother him, he's retired.
766
767Pod::Simple is maintained by:
768
769=over
770
771=item * Allison Randal C<allison@perl.org>
772
773=item * Hans Dieter Pearcey C<hdp@cpan.org>
774
775=item * David E. Wheeler C<dwheeler@cpan.org>
776
777=back
778
779=cut
780
781JUNK:
782
783sub _old_get_title {  # some witchery in here
784  my $self = $_[0];
785  my $title;
786  my @to_unget;
787
788  while(1) {
789    push @to_unget, $self->get_token;
790    unless(defined $to_unget[-1]) { # whoops, short doc!
791      pop @to_unget;
792      last;
793    }
794
795    DEBUG and print STDERR "-Got token ", $to_unget[-1]->dump, "\n";
796
797    (DEBUG and print STDERR "Too much in the buffer.\n"),
798     last if @to_unget > 25; # sanity
799
800    my $pattern = '';
801    if( #$to_unget[-1]->type eq 'end'
802        #and $to_unget[-1]->tagname eq 'Para'
803        #and
804        ($pattern = join('',
805         map {;
806            ($_->type eq 'start') ? ("<" . $_->tagname .">")
807          : ($_->type eq 'end'  ) ? ("</". $_->tagname .">")
808          : ($_->type eq 'text' ) ? ($_->text =~ m<^([A-Z]+)$>s ? $1 : 'X')
809          : "BLORP"
810         } @to_unget
811       )) =~ m{<head1>NAME</head1><Para>(X|</?[BCIFLS]>)+</Para>$}s
812    ) {
813      # Whee, it fits the pattern
814      DEBUG and print STDERR "Seems to match =head1 NAME pattern.\n";
815      $title = '';
816      foreach my $t (reverse @to_unget) {
817        last if $t->type eq 'start' and $t->tagname eq 'Para';
818        $title = $t->text . $title if $t->type eq 'text';
819      }
820      undef $title if $title =~ m<^\s*$>; # make sure it's contentful!
821      last;
822
823    } elsif ($pattern =~ m{<head(\d)>(.+)</head\d>$}
824      and !( $1 eq '1' and $2 eq 'NAME' )
825    ) {
826      # Well, it fits a fallback pattern
827      DEBUG and print STDERR "Seems to match NAMEless pattern.\n";
828      $title = '';
829      foreach my $t (reverse @to_unget) {
830        last if $t->type eq 'start' and $t->tagname =~ m/^head\d$/s;
831        $title = $t->text . $title if $t->type eq 'text';
832      }
833      undef $title if $title =~ m<^\s*$>; # make sure it's contentful!
834      last;
835
836    } else {
837      DEBUG and $pattern and print STDERR "Leading pattern: $pattern\n";
838    }
839  }
840
841  # Put it all back:
842  $self->unget_token(@to_unget);
843
844  if(DEBUG) {
845    if(defined $title) { print STDERR "  Returning title <$title>\n" }
846    else { print STDERR "Returning title <>\n" }
847  }
848
849  return '' unless defined $title;
850  return $title;
851}
852
853