1#############################################################################
2# Pod/Find.pm -- finds files containing POD documentation
3#
4# Author: Marek Rouchal <marekr@cpan.org>
5#
6# Copyright (C) 1999-2000 by Marek Rouchal (and borrowing code
7# from Nick Ing-Simmon's PodToHtml). All rights reserved.
8# This file is part of "PodParser". Pod::Find is free software;
9# you can redistribute it and/or modify it under the same terms
10# as Perl itself.
11#############################################################################
12
13package Pod::Find;
14
15use vars qw($VERSION);
16$VERSION = 0.24_01;   ## Current version of this package
17require  5.005;   ## requires this Perl version or later
18use Carp;
19
20#############################################################################
21
22=head1 NAME
23
24Pod::Find - find POD documents in directory trees
25
26=head1 SYNOPSIS
27
28  use Pod::Find qw(pod_find simplify_name);
29  my %pods = pod_find({ -verbose => 1, -inc => 1 });
30  foreach(keys %pods) {
31     print "found library POD `$pods{$_}' in $_\n";
32  }
33
34  print "podname=",simplify_name('a/b/c/mymodule.pod'),"\n";
35
36  $location = pod_where( { -inc => 1 }, "Pod::Find" );
37
38=head1 DESCRIPTION
39
40B<Pod::Find> provides a set of functions to locate POD files.  Note that
41no function is exported by default to avoid pollution of your namespace,
42so be sure to specify them in the B<use> statement if you need them:
43
44  use Pod::Find qw(pod_find);
45
46=cut
47
48use strict;
49#use diagnostics;
50use Exporter;
51use File::Spec;
52use File::Find;
53use Cwd;
54
55use vars qw(@ISA @EXPORT_OK $VERSION);
56@ISA = qw(Exporter);
57@EXPORT_OK = qw(&pod_find &simplify_name &pod_where &contains_pod);
58
59# package global variables
60my $SIMPLIFY_RX;
61
62=head2 C<pod_find( { %opts } , @directories )>
63
64The function B<pod_find> searches for POD documents in a given set of
65files and/or directories. It returns a hash with the file names as keys
66and the POD name as value. The POD name is derived from the file name
67and its position in the directory tree.
68
69E.g. when searching in F<$HOME/perl5lib>, the file
70F<$HOME/perl5lib/MyModule.pm> would get the POD name I<MyModule>,
71whereas F<$HOME/perl5lib/Myclass/Subclass.pm> would be
72I<Myclass::Subclass>. The name information can be used for POD
73translators.
74
75Only text files containing at least one valid POD command are found.
76
77A warning is printed if more than one POD file with the same POD name
78is found, e.g. F<CPAN.pm> in different directories. This usually
79indicates duplicate occurrences of modules in the I<@INC> search path.
80
81B<OPTIONS> The first argument for B<pod_find> may be a hash reference
82with options. The rest are either directories that are searched
83recursively or files.  The POD names of files are the plain basenames
84with any Perl-like extension (.pm, .pl, .pod) stripped.
85
86=over 4
87
88=item C<-verbose =E<gt> 1>
89
90Print progress information while scanning.
91
92=item C<-perl =E<gt> 1>
93
94Apply Perl-specific heuristics to find the correct PODs. This includes
95stripping Perl-like extensions, omitting subdirectories that are numeric
96but do I<not> match the current Perl interpreter's version id, suppressing
97F<site_perl> as a module hierarchy name etc.
98
99=item C<-script =E<gt> 1>
100
101Search for PODs in the current Perl interpreter's installation
102B<scriptdir>. This is taken from the local L<Config|Config> module.
103
104=item C<-inc =E<gt> 1>
105
106Search for PODs in the current Perl interpreter's I<@INC> paths. This
107automatically considers paths specified in the C<PERL5LIB> environment
108as this is prepended to I<@INC> by the Perl interpreter itself.
109
110=back
111
112=cut
113
114# return a hash of the POD files found
115# first argument may be a hashref (options),
116# rest is a list of directories to search recursively
117sub pod_find
118{
119    my %opts;
120    if(ref $_[0]) {
121        %opts = %{shift()};
122    }
123
124    $opts{-verbose} ||= 0;
125    $opts{-perl}    ||= 0;
126
127    my (@search) = @_;
128
129    if($opts{-script}) {
130        require Config;
131        push(@search, $Config::Config{scriptdir})
132            if -d $Config::Config{scriptdir};
133        $opts{-perl} = 1;
134    }
135
136    if($opts{-inc}) {
137        if ($^O eq 'MacOS') {
138            # tolerate '.', './some_dir' and '(../)+some_dir' on Mac OS
139            my @new_INC = @INC;
140            for (@new_INC) {
141                if ( $_ eq '.' ) {
142                    $_ = ':';
143                } elsif ( $_ =~ s|^((?:\.\./)+)|':' x (length($1)/3)|e ) {
144                    $_ = ':'. $_;
145                } else {
146                    $_ =~ s|^\./|:|;
147                }
148            }
149            push(@search, grep($_ ne File::Spec->curdir, @new_INC));
150        } else {
151            push(@search, grep($_ ne File::Spec->curdir, @INC));
152        }
153
154        $opts{-perl} = 1;
155    }
156
157    if($opts{-perl}) {
158        require Config;
159        # this code simplifies the POD name for Perl modules:
160        # * remove "site_perl"
161        # * remove e.g. "i586-linux" (from 'archname')
162        # * remove e.g. 5.00503
163        # * remove pod/ if followed by *.pod (e.g. in pod/perlfunc.pod)
164
165        # Mac OS:
166        # * remove ":?site_perl:"
167        # * remove :?pod: if followed by *.pod (e.g. in :pod:perlfunc.pod)
168
169        if ($^O eq 'MacOS') {
170            $SIMPLIFY_RX =
171              qq!^(?i:\:?site_perl\:|\:?pod\:(?=.*?\\.pod\\z))*!;
172        } else {
173            $SIMPLIFY_RX =
174              qq!^(?i:site(_perl)?/|\Q$Config::Config{archname}\E/|\\d+\\.\\d+([_.]?\\d+)?/|pod/(?=.*?\\.pod\\z))*!;
175        }
176    }
177
178    my %dirs_visited;
179    my %pods;
180    my %names;
181    my $pwd = cwd();
182
183    foreach my $try (@search) {
184        unless(File::Spec->file_name_is_absolute($try)) {
185            # make path absolute
186            $try = File::Spec->catfile($pwd,$try);
187        }
188        # simplify path
189        # on VMS canonpath will vmsify:[the.path], but File::Find::find
190        # wants /unixy/paths
191        $try = File::Spec->canonpath($try) if ($^O ne 'VMS');
192        $try = VMS::Filespec::unixify($try) if ($^O eq 'VMS');
193        my $name;
194        if(-f $try) {
195            if($name = _check_and_extract_name($try, $opts{-verbose})) {
196                _check_for_duplicates($try, $name, \%names, \%pods);
197            }
198            next;
199        }
200        my $root_rx = $^O eq 'MacOS' ? qq!^\Q$try\E! : qq!^\Q$try\E/!;
201        File::Find::find( sub {
202            my $item = $File::Find::name;
203            if(-d) {
204                if($item =~ m{/(?:RCS|CVS|SCCS|\.svn)$}) {
205                    $File::Find::prune = 1;
206                    return;
207                }
208                elsif($dirs_visited{$item}) {
209                    warn "Directory '$item' already seen, skipping.\n"
210                        if($opts{-verbose});
211                    $File::Find::prune = 1;
212                    return;
213                }
214                else {
215                    $dirs_visited{$item} = 1;
216                }
217                if($opts{-perl} && /^(\d+\.[\d_]+)\z/s && eval "$1" != $]) {
218                    $File::Find::prune = 1;
219                    warn "Perl $] version mismatch on $_, skipping.\n"
220                        if($opts{-verbose});
221                }
222                return;
223            }
224            if($name = _check_and_extract_name($item, $opts{-verbose}, $root_rx)) {
225                _check_for_duplicates($item, $name, \%names, \%pods);
226            }
227        }, $try); # end of File::Find::find
228    }
229    chdir $pwd;
230    %pods;
231}
232
233sub _check_for_duplicates {
234    my ($file, $name, $names_ref, $pods_ref) = @_;
235    if($$names_ref{$name}) {
236        warn "Duplicate POD found (shadowing?): $name ($file)\n";
237        warn "    Already seen in ",
238            join(' ', grep($$pods_ref{$_} eq $name, keys %$pods_ref)),"\n";
239    }
240    else {
241        $$names_ref{$name} = 1;
242    }
243    $$pods_ref{$file} = $name;
244}
245
246sub _check_and_extract_name {
247    my ($file, $verbose, $root_rx) = @_;
248
249    # check extension or executable flag
250    # this involves testing the .bat extension on Win32!
251    unless(-f $file && -T _ && ($file =~ /\.(pod|pm|plx?)\z/i || -x _ )) {
252      return undef;
253    }
254
255    return undef unless contains_pod($file,$verbose);
256
257    # strip non-significant path components
258    # TODO what happens on e.g. Win32?
259    my $name = $file;
260    if(defined $root_rx) {
261        $name =~ s!$root_rx!!s;
262        $name =~ s!$SIMPLIFY_RX!!os if(defined $SIMPLIFY_RX);
263    }
264    else {
265        if ($^O eq 'MacOS') {
266            $name =~ s/^.*://s;
267        } else {
268            $name =~ s:^.*/::s;
269        }
270    }
271    _simplify($name);
272    $name =~ s!/+!::!g; #/
273    if ($^O eq 'MacOS') {
274        $name =~ s!:+!::!g; # : -> ::
275    } else {
276        $name =~ s!/+!::!g; # / -> ::
277    }
278    $name;
279}
280
281=head2 C<simplify_name( $str )>
282
283The function B<simplify_name> is equivalent to B<basename>, but also
284strips Perl-like extensions (.pm, .pl, .pod) and extensions like
285F<.bat>, F<.cmd> on Win32 and OS/2, or F<.com> on VMS, respectively.
286
287=cut
288
289# basic simplification of the POD name:
290# basename & strip extension
291sub simplify_name {
292    my ($str) = @_;
293    # remove all path components
294    if ($^O eq 'MacOS') {
295        $str =~ s/^.*://s;
296    } else {
297        $str =~ s:^.*/::s;
298    }
299    _simplify($str);
300    $str;
301}
302
303# internal sub only
304sub _simplify {
305    # strip Perl's own extensions
306    $_[0] =~ s/\.(pod|pm|plx?)\z//i;
307    # strip meaningless extensions on Win32 and OS/2
308    $_[0] =~ s/\.(bat|exe|cmd)\z//i if($^O =~ /mswin|os2/i);
309    # strip meaningless extensions on VMS
310    $_[0] =~ s/\.(com)\z//i if($^O eq 'VMS');
311}
312
313# contribution from Tim Jenness <t.jenness@jach.hawaii.edu>
314
315=head2 C<pod_where( { %opts }, $pod )>
316
317Returns the location of a pod document given a search directory
318and a module (e.g. C<File::Find>) or script (e.g. C<perldoc>) name.
319
320Options:
321
322=over 4
323
324=item C<-inc =E<gt> 1>
325
326Search @INC for the pod and also the C<scriptdir> defined in the
327L<Config|Config> module.
328
329=item C<-dirs =E<gt> [ $dir1, $dir2, ... ]>
330
331Reference to an array of search directories. These are searched in order
332before looking in C<@INC> (if B<-inc>). Current directory is used if
333none are specified.
334
335=item C<-verbose =E<gt> 1>
336
337List directories as they are searched
338
339=back
340
341Returns the full path of the first occurrence to the file.
342Package names (eg 'A::B') are automatically converted to directory
343names in the selected directory. (eg on unix 'A::B' is converted to
344'A/B'). Additionally, '.pm', '.pl' and '.pod' are appended to the
345search automatically if required.
346
347A subdirectory F<pod/> is also checked if it exists in any of the given
348search directories. This ensures that e.g. L<perlfunc|perlfunc> is
349found.
350
351It is assumed that if a module name is supplied, that that name
352matches the file name. Pods are not opened to check for the 'NAME'
353entry.
354
355A check is made to make sure that the file that is found does
356contain some pod documentation.
357
358=cut
359
360sub pod_where {
361
362  # default options
363  my %options = (
364         '-inc' => 0,
365         '-verbose' => 0,
366         '-dirs' => [ File::Spec->curdir ],
367        );
368
369  # Check for an options hash as first argument
370  if (defined $_[0] && ref($_[0]) eq 'HASH') {
371    my $opt = shift;
372
373    # Merge default options with supplied options
374    %options = (%options, %$opt);
375  }
376
377  # Check usage
378  carp 'Usage: pod_where({options}, $pod)' unless (scalar(@_));
379
380  # Read argument
381  my $pod = shift;
382
383  # Split on :: and then join the name together using File::Spec
384  my @parts = split (/::/, $pod);
385
386  # Get full directory list
387  my @search_dirs = @{ $options{'-dirs'} };
388
389  if ($options{'-inc'}) {
390
391    require Config;
392
393    # Add @INC
394    if ($^O eq 'MacOS' && $options{'-inc'}) {
395        # tolerate '.', './some_dir' and '(../)+some_dir' on Mac OS
396        my @new_INC = @INC;
397        for (@new_INC) {
398            if ( $_ eq '.' ) {
399                $_ = ':';
400            } elsif ( $_ =~ s|^((?:\.\./)+)|':' x (length($1)/3)|e ) {
401                $_ = ':'. $_;
402            } else {
403                $_ =~ s|^\./|:|;
404            }
405        }
406        push (@search_dirs, @new_INC);
407    } elsif ($options{'-inc'}) {
408        push (@search_dirs, @INC);
409    }
410
411    # Add location of pod documentation for perl man pages (eg perlfunc)
412    # This is a pod directory in the private install tree
413    #my $perlpoddir = File::Spec->catdir($Config::Config{'installprivlib'},
414    #					'pod');
415    #push (@search_dirs, $perlpoddir)
416    #  if -d $perlpoddir;
417
418    # Add location of binaries such as pod2text
419    push (@search_dirs, $Config::Config{'scriptdir'})
420      if -d $Config::Config{'scriptdir'};
421  }
422
423  warn "Search path is: ".join(' ', @search_dirs)."\n"
424        if $options{'-verbose'};
425
426  # Loop over directories
427  Dir: foreach my $dir ( @search_dirs ) {
428
429    # Don't bother if can't find the directory
430    if (-d $dir) {
431      warn "Looking in directory $dir\n"
432        if $options{'-verbose'};
433
434      # Now concatenate this directory with the pod we are searching for
435      my $fullname = File::Spec->catfile($dir, @parts);
436      warn "Filename is now $fullname\n"
437        if $options{'-verbose'};
438
439      # Loop over possible extensions
440      foreach my $ext ('', '.pod', '.pm', '.pl') {
441        my $fullext = $fullname . $ext;
442        if (-f $fullext &&
443         contains_pod($fullext, $options{'-verbose'}) ) {
444          warn "FOUND: $fullext\n" if $options{'-verbose'};
445          return $fullext;
446        }
447      }
448    } else {
449      warn "Directory $dir does not exist\n"
450        if $options{'-verbose'};
451      next Dir;
452    }
453    # for some strange reason the path on MacOS/darwin/cygwin is
454    # 'pods' not 'pod'
455    # this could be the case also for other systems that
456    # have a case-tolerant file system, but File::Spec
457    # does not recognize 'darwin' yet. And cygwin also has "pods",
458    # but is not case tolerant. Oh well...
459    if((File::Spec->case_tolerant || $^O =~ /macos|darwin|cygwin/i)
460     && -d File::Spec->catdir($dir,'pods')) {
461      $dir = File::Spec->catdir($dir,'pods');
462      redo Dir;
463    }
464    if(-d File::Spec->catdir($dir,'pod')) {
465      $dir = File::Spec->catdir($dir,'pod');
466      redo Dir;
467    }
468  }
469  # No match;
470  return undef;
471}
472
473=head2 C<contains_pod( $file , $verbose )>
474
475Returns true if the supplied filename (not POD module) contains some pod
476information.
477
478=cut
479
480sub contains_pod {
481  my $file = shift;
482  my $verbose = 0;
483  $verbose = shift if @_;
484
485  # check for one line of POD
486  unless(open(POD,"<$file")) {
487    warn "Error: $file is unreadable: $!\n";
488    return undef;
489  }
490
491  local $/ = undef;
492  my $pod = <POD>;
493  close(POD) || die "Error closing $file: $!\n";
494  unless($pod =~ /\n=(head\d|pod|over|item)\b/s) {
495    warn "No POD in $file, skipping.\n"
496      if($verbose);
497    return 0;
498  }
499
500  return 1;
501}
502
503=head1 AUTHOR
504
505Please report bugs using L<http://rt.cpan.org>.
506
507Marek Rouchal E<lt>marekr@cpan.orgE<gt>,
508heavily borrowing code from Nick Ing-Simmons' PodToHtml.
509
510Tim Jenness E<lt>t.jenness@jach.hawaii.eduE<gt> provided
511C<pod_where> and C<contains_pod>.
512
513=head1 SEE ALSO
514
515L<Pod::Parser>, L<Pod::Checker>, L<perldoc>
516
517=cut
518
5191;
520
521