Find.pm revision 1.5
1package File::Find;
2use 5.006;
3use strict;
4use warnings;
5use warnings::register;
6our $VERSION = '1.40';
7use Exporter 'import';
8require Cwd;
9
10our @EXPORT = qw(find finddepth);
11
12
13use strict;
14my $Is_VMS = $^O eq 'VMS';
15my $Is_Win32 = $^O eq 'MSWin32';
16
17require File::Basename;
18require File::Spec;
19
20# Should ideally be my() not our() but local() currently
21# refuses to operate on lexicals
22
23our %SLnkSeen;
24our ($wanted_callback, $avoid_nlink, $bydepth, $no_chdir, $follow,
25    $follow_skip, $full_check, $untaint, $untaint_skip, $untaint_pat,
26    $pre_process, $post_process, $dangling_symlinks);
27
28sub contract_name {
29    my ($cdir,$fn) = @_;
30
31    return substr($cdir,0,rindex($cdir,'/')) if $fn eq $File::Find::current_dir;
32
33    $cdir = substr($cdir,0,rindex($cdir,'/')+1);
34
35    $fn =~ s|^\./||;
36
37    my $abs_name= $cdir . $fn;
38
39    if (substr($fn,0,3) eq '../') {
40       1 while $abs_name =~ s!/[^/]*/\.\./+!/!;
41    }
42
43    return $abs_name;
44}
45
46sub PathCombine($$) {
47    my ($Base,$Name) = @_;
48    my $AbsName;
49
50    if (substr($Name,0,1) eq '/') {
51	$AbsName= $Name;
52    }
53    else {
54	$AbsName= contract_name($Base,$Name);
55    }
56
57    # (simple) check for recursion
58    my $newlen= length($AbsName);
59    if ($newlen <= length($Base)) {
60	if (($newlen == length($Base) || substr($Base,$newlen,1) eq '/')
61	    && $AbsName eq substr($Base,0,$newlen))
62	{
63	    return undef;
64	}
65    }
66    return $AbsName;
67}
68
69sub Follow_SymLink($) {
70    my ($AbsName) = @_;
71
72    my ($NewName,$DEV, $INO);
73    ($DEV, $INO)= lstat $AbsName;
74
75    while (-l _) {
76	if ($SLnkSeen{$DEV, $INO}++) {
77	    if ($follow_skip < 2) {
78		die "$AbsName is encountered a second time";
79	    }
80	    else {
81		return undef;
82	    }
83	}
84	$NewName= PathCombine($AbsName, readlink($AbsName));
85	unless(defined $NewName) {
86	    if ($follow_skip < 2) {
87		die "$AbsName is a recursive symbolic link";
88	    }
89	    else {
90		return undef;
91	    }
92	}
93	else {
94	    $AbsName= $NewName;
95	}
96	($DEV, $INO) = lstat($AbsName);
97	return undef unless defined $DEV;  #  dangling symbolic link
98    }
99
100    if ($full_check && defined $DEV && $SLnkSeen{$DEV, $INO}++) {
101	if ( ($follow_skip < 1) || ((-d _) && ($follow_skip < 2)) ) {
102	    die "$AbsName encountered a second time";
103	}
104	else {
105	    return undef;
106	}
107    }
108
109    return $AbsName;
110}
111
112our($dir, $name, $fullname, $prune);
113sub _find_dir_symlnk($$$);
114sub _find_dir($$$);
115
116# check whether or not a scalar variable is tainted
117# (code straight from the Camel, 3rd ed., page 561)
118sub is_tainted_pp {
119    my $arg = shift;
120    my $nada = substr($arg, 0, 0); # zero-length
121    local $@;
122    eval { eval "# $nada" };
123    return length($@) != 0;
124}
125
126sub _find_opt {
127    my $wanted = shift;
128    return unless @_;
129    die "invalid top directory" unless defined $_[0];
130
131    # This function must local()ize everything because callbacks may
132    # call find() or finddepth()
133
134    local %SLnkSeen;
135    local ($wanted_callback, $avoid_nlink, $bydepth, $no_chdir, $follow,
136	$follow_skip, $full_check, $untaint, $untaint_skip, $untaint_pat,
137	$pre_process, $post_process, $dangling_symlinks);
138    local($dir, $name, $fullname, $prune);
139    local *_ = \my $a;
140
141    my $cwd            = $wanted->{bydepth} ? Cwd::fastcwd() : Cwd::getcwd();
142    if ($Is_VMS) {
143	# VMS returns this by default in VMS format which just doesn't
144	# work for the rest of this module.
145	$cwd = VMS::Filespec::unixpath($cwd);
146
147	# Apparently this is not expected to have a trailing space.
148	# To attempt to make VMS/UNIX conversions mostly reversible,
149	# a trailing slash is needed.  The run-time functions ignore the
150	# resulting double slash, but it causes the perl tests to fail.
151        $cwd =~ s#/\z##;
152
153	# This comes up in upper case now, but should be lower.
154	# In the future this could be exact case, no need to change.
155    }
156    my $cwd_untainted  = $cwd;
157    my $check_t_cwd    = 1;
158    $wanted_callback   = $wanted->{wanted};
159    $bydepth           = $wanted->{bydepth};
160    $pre_process       = $wanted->{preprocess};
161    $post_process      = $wanted->{postprocess};
162    $no_chdir          = $wanted->{no_chdir};
163    $full_check        = $wanted->{follow};
164    $follow            = $full_check || $wanted->{follow_fast};
165    $follow_skip       = $wanted->{follow_skip};
166    $untaint           = $wanted->{untaint};
167    $untaint_pat       = $wanted->{untaint_pattern};
168    $untaint_skip      = $wanted->{untaint_skip};
169    $dangling_symlinks = $wanted->{dangling_symlinks};
170
171    # for compatibility reasons (find.pl, find2perl)
172    local our ($topdir, $topdev, $topino, $topmode, $topnlink);
173
174    # a symbolic link to a directory doesn't increase the link count
175    $avoid_nlink      = $follow || $File::Find::dont_use_nlink;
176
177    my ($abs_dir, $Is_Dir);
178
179    Proc_Top_Item:
180    foreach my $TOP (@_) {
181	my $top_item = $TOP;
182	$top_item = VMS::Filespec::unixify($top_item) if $Is_VMS;
183
184	($topdev,$topino,$topmode,$topnlink) = $follow ? stat $top_item : lstat $top_item;
185
186	if ($Is_Win32) {
187	    $top_item =~ s|[/\\]\z||
188	      unless $top_item =~ m{^(?:\w:)?[/\\]$};
189	}
190	else {
191	    $top_item =~ s|/\z|| unless $top_item eq '/';
192	}
193
194	$Is_Dir= 0;
195
196	if ($follow) {
197
198	    if (substr($top_item,0,1) eq '/') {
199		$abs_dir = $top_item;
200	    }
201	    elsif ($top_item eq $File::Find::current_dir) {
202		$abs_dir = $cwd;
203	    }
204	    else {  # care about any  ../
205		$top_item =~ s/\.dir\z//i if $Is_VMS;
206		$abs_dir = contract_name("$cwd/",$top_item);
207	    }
208	    $abs_dir= Follow_SymLink($abs_dir);
209	    unless (defined $abs_dir) {
210		if ($dangling_symlinks) {
211		    if (ref $dangling_symlinks eq 'CODE') {
212			$dangling_symlinks->($top_item, $cwd);
213		    } else {
214			warnings::warnif "$top_item is a dangling symbolic link\n";
215		    }
216		}
217		next Proc_Top_Item;
218	    }
219
220	    if (-d _) {
221		$top_item =~ s/\.dir\z//i if $Is_VMS;
222		_find_dir_symlnk($wanted, $abs_dir, $top_item);
223		$Is_Dir= 1;
224	    }
225	}
226	else { # no follow
227	    $topdir = $top_item;
228	    unless (defined $topnlink) {
229		warnings::warnif "Can't stat $top_item: $!\n";
230		next Proc_Top_Item;
231	    }
232	    if (-d _) {
233		$top_item =~ s/\.dir\z//i if $Is_VMS;
234		_find_dir($wanted, $top_item, $topnlink);
235		$Is_Dir= 1;
236	    }
237	    else {
238		$abs_dir= $top_item;
239	    }
240	}
241
242	unless ($Is_Dir) {
243	    unless (($_,$dir) = File::Basename::fileparse($abs_dir)) {
244		($dir,$_) = ('./', $top_item);
245	    }
246
247	    $abs_dir = $dir;
248	    if (( $untaint ) && (is_tainted($dir) )) {
249		( $abs_dir ) = $dir =~ m|$untaint_pat|;
250		unless (defined $abs_dir) {
251		    if ($untaint_skip == 0) {
252			die "directory $dir is still tainted";
253		    }
254		    else {
255			next Proc_Top_Item;
256		    }
257		}
258	    }
259
260	    unless ($no_chdir || chdir $abs_dir) {
261		warnings::warnif "Couldn't chdir $abs_dir: $!\n";
262		next Proc_Top_Item;
263	    }
264
265	    $name = $abs_dir . $_; # $File::Find::name
266	    $_ = $name if $no_chdir;
267
268	    { $wanted_callback->() }; # protect against wild "next"
269
270	}
271
272	unless ( $no_chdir ) {
273	    if ( ($check_t_cwd) && (($untaint) && (is_tainted($cwd) )) ) {
274		( $cwd_untainted ) = $cwd =~ m|$untaint_pat|;
275		unless (defined $cwd_untainted) {
276		    die "insecure cwd in find(depth)";
277		}
278		$check_t_cwd = 0;
279	    }
280	    unless (chdir $cwd_untainted) {
281		die "Can't cd to $cwd: $!\n";
282	    }
283	}
284    }
285}
286
287# API:
288#  $wanted
289#  $p_dir :  "parent directory"
290#  $nlink :  what came back from the stat
291# preconditions:
292#  chdir (if not no_chdir) to dir
293
294sub _find_dir($$$) {
295    my ($wanted, $p_dir, $nlink) = @_;
296    my ($CdLvl,$Level) = (0,0);
297    my @Stack;
298    my @filenames;
299    my ($subcount,$sub_nlink);
300    my $SE= [];
301    my $dir_name= $p_dir;
302    my $dir_pref;
303    my $dir_rel = $File::Find::current_dir;
304    my $tainted = 0;
305    my $no_nlink;
306
307    if ($Is_Win32) {
308	$dir_pref
309	  = ($p_dir =~ m{^(?:\w:[/\\]?|[/\\])$} ? $p_dir : "$p_dir/" );
310    } elsif ($Is_VMS) {
311
312	#	VMS is returning trailing .dir on directories
313	#	and trailing . on files and symbolic links
314	#	in UNIX syntax.
315	#
316
317	$p_dir =~ s/\.(dir)?$//i unless $p_dir eq '.';
318
319	$dir_pref = ($p_dir =~ m/[\]>]+$/ ? $p_dir : "$p_dir/" );
320    }
321    else {
322	$dir_pref= ( $p_dir eq '/' ? '/' : "$p_dir/" );
323    }
324
325    local ($dir, $name, $prune);
326
327    unless ( $no_chdir || ($p_dir eq $File::Find::current_dir)) {
328	my $udir = $p_dir;
329	if (( $untaint ) && (is_tainted($p_dir) )) {
330	    ( $udir ) = $p_dir =~ m|$untaint_pat|;
331	    unless (defined $udir) {
332		if ($untaint_skip == 0) {
333		    die "directory $p_dir is still tainted";
334		}
335		else {
336		    return;
337		}
338	    }
339	}
340	unless (chdir ($Is_VMS && $udir !~ /[\/\[<]+/ ? "./$udir" : $udir)) {
341	    warnings::warnif "Can't cd to $udir: $!\n";
342	    return;
343	}
344    }
345
346    # push the starting directory
347    push @Stack,[$CdLvl,$p_dir,$dir_rel,-1]  if  $bydepth;
348
349    while (defined $SE) {
350	unless ($bydepth) {
351	    $dir= $p_dir; # $File::Find::dir
352	    $name= $dir_name; # $File::Find::name
353	    $_= ($no_chdir ? $dir_name : $dir_rel ); # $_
354	    # prune may happen here
355	    $prune= 0;
356	    { $wanted_callback->() };	# protect against wild "next"
357	    next if $prune;
358	}
359
360	# change to that directory
361	unless ($no_chdir || ($dir_rel eq $File::Find::current_dir)) {
362	    my $udir= $dir_rel;
363	    if ( ($untaint) && (($tainted) || ($tainted = is_tainted($dir_rel) )) ) {
364		( $udir ) = $dir_rel =~ m|$untaint_pat|;
365		unless (defined $udir) {
366		    if ($untaint_skip == 0) {
367			die "directory (" . ($p_dir ne '/' ? $p_dir : '') . "/) $dir_rel is still tainted";
368		    } else { # $untaint_skip == 1
369			next;
370		    }
371		}
372	    }
373	    unless (chdir ($Is_VMS && $udir !~ /[\/\[<]+/ ? "./$udir" : $udir)) {
374		warnings::warnif "Can't cd to (" .
375		    ($p_dir ne '/' ? $p_dir : '') . "/) $udir: $!\n";
376		next;
377	    }
378	    $CdLvl++;
379	}
380
381	$dir= $dir_name; # $File::Find::dir
382
383	# Get the list of files in the current directory.
384    my $dh;
385	unless (opendir $dh, ($no_chdir ? $dir_name : $File::Find::current_dir)) {
386	    warnings::warnif "Can't opendir($dir_name): $!\n";
387	    next;
388	}
389	@filenames = readdir $dh;
390	closedir($dh);
391	@filenames = $pre_process->(@filenames) if $pre_process;
392	push @Stack,[$CdLvl,$dir_name,"",-2]   if $post_process;
393
394	# default: use whatever was specified
395        # (if $nlink >= 2, and $avoid_nlink == 0, this will switch back)
396        $no_nlink = $avoid_nlink;
397        # if dir has wrong nlink count, force switch to slower stat method
398        $no_nlink = 1 if ($nlink < 2);
399
400	if ($nlink == 2 && !$no_nlink) {
401	    # This dir has no subdirectories.
402	    for my $FN (@filenames) {
403		if ($Is_VMS) {
404		# Big hammer here - Compensate for VMS trailing . and .dir
405		# No win situation until this is changed, but this
406		# will handle the majority of the cases with breaking the fewest
407
408		    $FN =~ s/\.dir\z//i;
409		    $FN =~ s#\.$## if ($FN ne '.');
410		}
411		next if $FN =~ $File::Find::skip_pattern;
412
413		$name = $dir_pref . $FN; # $File::Find::name
414		$_ = ($no_chdir ? $name : $FN); # $_
415		{ $wanted_callback->() }; # protect against wild "next"
416	    }
417
418	}
419	else {
420	    # This dir has subdirectories.
421	    $subcount = $nlink - 2;
422
423	    # HACK: insert directories at this position, so as to preserve
424	    # the user pre-processed ordering of files (thus ensuring
425	    # directory traversal is in user sorted order, not at random).
426            my $stack_top = @Stack;
427
428	    for my $FN (@filenames) {
429		next if $FN =~ $File::Find::skip_pattern;
430		if ($subcount > 0 || $no_nlink) {
431		    # Seen all the subdirs?
432		    # check for directoriness.
433		    # stat is faster for a file in the current directory
434		    $sub_nlink = (lstat ($no_chdir ? $dir_pref . $FN : $FN))[3];
435
436		    if (-d _) {
437			--$subcount;
438			$FN =~ s/\.dir\z//i if $Is_VMS;
439			# HACK: replace push to preserve dir traversal order
440			#push @Stack,[$CdLvl,$dir_name,$FN,$sub_nlink];
441			splice @Stack, $stack_top, 0,
442			         [$CdLvl,$dir_name,$FN,$sub_nlink];
443		    }
444		    else {
445			$name = $dir_pref . $FN; # $File::Find::name
446			$_= ($no_chdir ? $name : $FN); # $_
447			{ $wanted_callback->() }; # protect against wild "next"
448		    }
449		}
450		else {
451		    $name = $dir_pref . $FN; # $File::Find::name
452		    $_= ($no_chdir ? $name : $FN); # $_
453		    { $wanted_callback->() }; # protect against wild "next"
454		}
455	    }
456	}
457    }
458    continue {
459	while ( defined ($SE = pop @Stack) ) {
460	    ($Level, $p_dir, $dir_rel, $nlink) = @$SE;
461	    if ($CdLvl > $Level && !$no_chdir) {
462		my $tmp;
463		if ($Is_VMS) {
464		    $tmp = '[' . ('-' x ($CdLvl-$Level)) . ']';
465		}
466		else {
467		    $tmp = join('/',('..') x ($CdLvl-$Level));
468		}
469		die "Can't cd to $tmp from $dir_name: $!"
470		    unless chdir ($tmp);
471		$CdLvl = $Level;
472	    }
473
474	    if ($Is_Win32) {
475		$dir_name = ($p_dir =~ m{^(?:\w:[/\\]?|[/\\])$}
476		    ? "$p_dir$dir_rel" : "$p_dir/$dir_rel");
477		$dir_pref = "$dir_name/";
478	    }
479	    elsif ($^O eq 'VMS') {
480                if ($p_dir =~ m/[\]>]+$/) {
481                    $dir_name = $p_dir;
482                    $dir_name =~ s/([\]>]+)$/.$dir_rel$1/;
483                    $dir_pref = $dir_name;
484                }
485                else {
486                    $dir_name = "$p_dir/$dir_rel";
487                    $dir_pref = "$dir_name/";
488                }
489	    }
490	    else {
491		$dir_name = ($p_dir eq '/' ? "/$dir_rel" : "$p_dir/$dir_rel");
492		$dir_pref = "$dir_name/";
493	    }
494
495	    if ( $nlink == -2 ) {
496		$name = $dir = $p_dir; # $File::Find::name / dir
497                $_ = $File::Find::current_dir;
498		$post_process->();		# End-of-directory processing
499	    }
500	    elsif ( $nlink < 0 ) {  # must be finddepth, report dirname now
501		$name = $dir_name;
502		if ( substr($name,-2) eq '/.' ) {
503		    substr($name, length($name) == 2 ? -1 : -2) = '';
504		}
505		$dir = $p_dir;
506		$_ = ($no_chdir ? $dir_name : $dir_rel );
507		if ( substr($_,-2) eq '/.' ) {
508		    substr($_, length($_) == 2 ? -1 : -2) = '';
509		}
510		{ $wanted_callback->() }; # protect against wild "next"
511	     }
512	     else {
513		push @Stack,[$CdLvl,$p_dir,$dir_rel,-1]  if  $bydepth;
514		last;
515	    }
516	}
517    }
518}
519
520
521# API:
522#  $wanted
523#  $dir_loc : absolute location of a dir
524#  $p_dir   : "parent directory"
525# preconditions:
526#  chdir (if not no_chdir) to dir
527
528sub _find_dir_symlnk($$$) {
529    my ($wanted, $dir_loc, $p_dir) = @_; # $dir_loc is the absolute directory
530    my @Stack;
531    my @filenames;
532    my $new_loc;
533    my $updir_loc = $dir_loc; # untainted parent directory
534    my $SE = [];
535    my $dir_name = $p_dir;
536    my $dir_pref;
537    my $loc_pref;
538    my $dir_rel = $File::Find::current_dir;
539    my $byd_flag; # flag for pending stack entry if $bydepth
540    my $tainted = 0;
541    my $ok = 1;
542
543    $dir_pref = ( $p_dir   eq '/' ? '/' : "$p_dir/" );
544    $loc_pref = ( $dir_loc eq '/' ? '/' : "$dir_loc/" );
545
546    local ($dir, $name, $fullname, $prune);
547
548    unless ($no_chdir) {
549	# untaint the topdir
550	if (( $untaint ) && (is_tainted($dir_loc) )) {
551	    ( $updir_loc ) = $dir_loc =~ m|$untaint_pat|; # parent dir, now untainted
552	     # once untainted, $updir_loc is pushed on the stack (as parent directory);
553	    # hence, we don't need to untaint the parent directory every time we chdir
554	    # to it later
555	    unless (defined $updir_loc) {
556		if ($untaint_skip == 0) {
557		    die "directory $dir_loc is still tainted";
558		}
559		else {
560		    return;
561		}
562	    }
563	}
564	$ok = chdir($updir_loc) unless ($p_dir eq $File::Find::current_dir);
565	unless ($ok) {
566	    warnings::warnif "Can't cd to $updir_loc: $!\n";
567	    return;
568	}
569    }
570
571    push @Stack,[$dir_loc,$updir_loc,$p_dir,$dir_rel,-1]  if  $bydepth;
572
573    while (defined $SE) {
574
575	unless ($bydepth) {
576	    # change (back) to parent directory (always untainted)
577	    unless ($no_chdir) {
578		unless (chdir $updir_loc) {
579		    warnings::warnif "Can't cd to $updir_loc: $!\n";
580		    next;
581		}
582	    }
583	    $dir= $p_dir; # $File::Find::dir
584	    $name= $dir_name; # $File::Find::name
585	    $_= ($no_chdir ? $dir_name : $dir_rel ); # $_
586	    $fullname= $dir_loc; # $File::Find::fullname
587	    # prune may happen here
588	    $prune= 0;
589	    lstat($_); # make sure  file tests with '_' work
590	    { $wanted_callback->() }; # protect against wild "next"
591	    next if $prune;
592	}
593
594	# change to that directory
595	unless ($no_chdir || ($dir_rel eq $File::Find::current_dir)) {
596	    $updir_loc = $dir_loc;
597	    if ( ($untaint) && (($tainted) || ($tainted = is_tainted($dir_loc) )) ) {
598		# untaint $dir_loc, what will be pushed on the stack as (untainted) parent dir
599		( $updir_loc ) = $dir_loc =~ m|$untaint_pat|;
600		unless (defined $updir_loc) {
601		    if ($untaint_skip == 0) {
602			die "directory $dir_loc is still tainted";
603		    }
604		    else {
605			next;
606		    }
607		}
608	    }
609	    unless (chdir $updir_loc) {
610		warnings::warnif "Can't cd to $updir_loc: $!\n";
611		next;
612	    }
613	}
614
615	$dir = $dir_name; # $File::Find::dir
616
617	# Get the list of files in the current directory.
618    my $dh;
619	unless (opendir $dh, ($no_chdir ? $dir_loc : $File::Find::current_dir)) {
620	    warnings::warnif "Can't opendir($dir_loc): $!\n";
621	    next;
622	}
623	@filenames = readdir $dh;
624	closedir($dh);
625
626	for my $FN (@filenames) {
627	    if ($Is_VMS) {
628	    # Big hammer here - Compensate for VMS trailing . and .dir
629	    # No win situation until this is changed, but this
630	    # will handle the majority of the cases with breaking the fewest.
631
632		$FN =~ s/\.dir\z//i;
633		$FN =~ s#\.$## if ($FN ne '.');
634	    }
635	    next if $FN =~ $File::Find::skip_pattern;
636
637	    # follow symbolic links / do an lstat
638	    $new_loc = Follow_SymLink($loc_pref.$FN);
639
640	    # ignore if invalid symlink
641	    unless (defined $new_loc) {
642	        if (!defined -l _ && $dangling_symlinks) {
643                $fullname = undef;
644	            if (ref $dangling_symlinks eq 'CODE') {
645	                $dangling_symlinks->($FN, $dir_pref);
646	            } else {
647	                warnings::warnif "$dir_pref$FN is a dangling symbolic link\n";
648	            }
649	        }
650            else {
651                $fullname = $loc_pref . $FN;
652            }
653	        $name = $dir_pref . $FN;
654	        $_ = ($no_chdir ? $name : $FN);
655	        { $wanted_callback->() };
656	        next;
657	    }
658
659	    if (-d _) {
660		if ($Is_VMS) {
661		    $FN =~ s/\.dir\z//i;
662		    $FN =~ s#\.$## if ($FN ne '.');
663		    $new_loc =~ s/\.dir\z//i;
664		    $new_loc =~ s#\.$## if ($new_loc ne '.');
665		}
666		push @Stack,[$new_loc,$updir_loc,$dir_name,$FN,1];
667	    }
668	    else {
669		$fullname = $new_loc; # $File::Find::fullname
670		$name = $dir_pref . $FN; # $File::Find::name
671		$_ = ($no_chdir ? $name : $FN); # $_
672		{ $wanted_callback->() }; # protect against wild "next"
673	    }
674	}
675
676    }
677    continue {
678	while (defined($SE = pop @Stack)) {
679	    ($dir_loc, $updir_loc, $p_dir, $dir_rel, $byd_flag) = @$SE;
680	    $dir_name = ($p_dir eq '/' ? "/$dir_rel" : "$p_dir/$dir_rel");
681	    $dir_pref = "$dir_name/";
682	    $loc_pref = "$dir_loc/";
683	    if ( $byd_flag < 0 ) {  # must be finddepth, report dirname now
684		unless ($no_chdir || ($dir_rel eq $File::Find::current_dir)) {
685		    unless (chdir $updir_loc) { # $updir_loc (parent dir) is always untainted
686			warnings::warnif "Can't cd to $updir_loc: $!\n";
687			next;
688		    }
689		}
690		$fullname = $dir_loc; # $File::Find::fullname
691		$name = $dir_name; # $File::Find::name
692		if ( substr($name,-2) eq '/.' ) {
693		    substr($name, length($name) == 2 ? -1 : -2) = ''; # $File::Find::name
694		}
695		$dir = $p_dir; # $File::Find::dir
696		$_ = ($no_chdir ? $dir_name : $dir_rel); # $_
697		if ( substr($_,-2) eq '/.' ) {
698		    substr($_, length($_) == 2 ? -1 : -2) = '';
699		}
700
701		lstat($_); # make sure file tests with '_' work
702		{ $wanted_callback->() }; # protect against wild "next"
703	    }
704	    else {
705		push @Stack,[$dir_loc, $updir_loc, $p_dir, $dir_rel,-1]  if  $bydepth;
706		last;
707	    }
708	}
709    }
710}
711
712
713sub wrap_wanted {
714    my $wanted = shift;
715    if ( ref($wanted) eq 'HASH' ) {
716        # RT #122547
717        my %valid_options = map {$_ => 1} qw(
718            wanted
719            bydepth
720            preprocess
721            postprocess
722            follow
723            follow_fast
724            follow_skip
725            dangling_symlinks
726            no_chdir
727            untaint
728            untaint_pattern
729            untaint_skip
730        );
731        my @invalid_options = ();
732        for my $v (keys %{$wanted}) {
733            push @invalid_options, $v unless exists $valid_options{$v};
734        }
735        warn "Invalid option(s): @invalid_options" if @invalid_options;
736
737        unless( exists $wanted->{wanted} and ref( $wanted->{wanted} ) eq 'CODE' ) {
738            die 'no &wanted subroutine given';
739        }
740        if ( $wanted->{follow} || $wanted->{follow_fast}) {
741            $wanted->{follow_skip} = 1 unless defined $wanted->{follow_skip};
742        }
743        if ( $wanted->{untaint} ) {
744            $wanted->{untaint_pattern} = $File::Find::untaint_pattern
745            unless defined $wanted->{untaint_pattern};
746            $wanted->{untaint_skip} = 0 unless defined $wanted->{untaint_skip};
747        }
748        return $wanted;
749    }
750    elsif( ref( $wanted ) eq 'CODE' ) {
751        return { wanted => $wanted };
752    }
753    else {
754       die 'no &wanted subroutine given';
755    }
756}
757
758sub find {
759    my $wanted = shift;
760    _find_opt(wrap_wanted($wanted), @_);
761}
762
763sub finddepth {
764    my $wanted = wrap_wanted(shift);
765    $wanted->{bydepth} = 1;
766    _find_opt($wanted, @_);
767}
768
769# default
770$File::Find::skip_pattern    = qr/^\.{1,2}\z/;
771$File::Find::untaint_pattern = qr|^([-+@\w./]+)$|;
772
773# this _should_ work properly on all platforms
774# where File::Find can be expected to work
775$File::Find::current_dir = File::Spec->curdir || '.';
776
777$File::Find::dont_use_nlink = 1;
778
779# We need a function that checks if a scalar is tainted. Either use the
780# Scalar::Util module's tainted() function or our (slower) pure Perl
781# fallback is_tainted_pp()
782{
783    local $@;
784    eval { require Scalar::Util };
785    *is_tainted = $@ ? \&is_tainted_pp : \&Scalar::Util::tainted;
786}
787
7881;
789
790__END__
791
792=head1 NAME
793
794File::Find - Traverse a directory tree.
795
796=head1 SYNOPSIS
797
798    use File::Find;
799    find(\&wanted, @directories_to_search);
800    sub wanted { ... }
801
802    use File::Find;
803    finddepth(\&wanted, @directories_to_search);
804    sub wanted { ... }
805
806    use File::Find;
807    find({ wanted => \&process, follow => 1 }, '.');
808
809=head1 DESCRIPTION
810
811These are functions for searching through directory trees doing work
812on each file found similar to the Unix I<find> command.  File::Find
813exports two functions, C<find> and C<finddepth>.  They work similarly
814but have subtle differences.
815
816=over 4
817
818=item B<find>
819
820  find(\&wanted,  @directories);
821  find(\%options, @directories);
822
823C<find()> does a depth-first search over the given C<@directories> in
824the order they are given.  For each file or directory found, it calls
825the C<&wanted> subroutine.  (See below for details on how to use the
826C<&wanted> function).  Additionally, for each directory found, it will
827C<chdir()> into that directory and continue the search, invoking the
828C<&wanted> function on each file or subdirectory in the directory.
829
830=item B<finddepth>
831
832  finddepth(\&wanted,  @directories);
833  finddepth(\%options, @directories);
834
835C<finddepth()> works just like C<find()> except that it invokes the
836C<&wanted> function for a directory I<after> invoking it for the
837directory's contents.  It does a postorder traversal instead of a
838preorder traversal, working from the bottom of the directory tree up
839where C<find()> works from the top of the tree down.
840
841=back
842
843Despite the name of the C<finddepth()> function, both C<find()> and
844C<finddepth()> perform a depth-first search of the directory hierarchy.
845
846=head2 %options
847
848The first argument to C<find()> is either a code reference to your
849C<&wanted> function, or a hash reference describing the operations
850to be performed for each file.  The
851code reference is described in L</The wanted function> below.
852
853Here are the possible keys for the hash:
854
855=over 4
856
857=item C<wanted>
858
859The value should be a code reference.  This code reference is
860described in L</The wanted function> below. The C<&wanted> subroutine is
861mandatory.
862
863=item C<bydepth>
864
865Reports the name of a directory only AFTER all its entries
866have been reported.  Entry point C<finddepth()> is a shortcut for
867specifying C<< { bydepth => 1 } >> in the first argument of C<find()>.
868
869=item C<preprocess>
870
871The value should be a code reference. This code reference is used to
872preprocess the current directory. The name of the currently processed
873directory is in C<$File::Find::dir>. Your preprocessing function is
874called after C<readdir()>, but before the loop that calls the C<wanted()>
875function. It is called with a list of strings (actually file/directory
876names) and is expected to return a list of strings. The code can be
877used to sort the file/directory names alphabetically, numerically,
878or to filter out directory entries based on their name alone. When
879I<follow> or I<follow_fast> are in effect, C<preprocess> is a no-op.
880
881=item C<postprocess>
882
883The value should be a code reference. It is invoked just before leaving
884the currently processed directory. It is called in void context with no
885arguments. The name of the current directory is in C<$File::Find::dir>. This
886hook is handy for summarizing a directory, such as calculating its disk
887usage. When I<follow> or I<follow_fast> are in effect, C<postprocess> is a
888no-op.
889
890=item C<follow>
891
892Causes symbolic links to be followed. Since directory trees with symbolic
893links (followed) may contain files more than once and may even have
894cycles, a hash has to be built up with an entry for each file.
895This might be expensive both in space and time for a large
896directory tree. See L</follow_fast> and L</follow_skip> below.
897If either I<follow> or I<follow_fast> is in effect:
898
899=over 4
900
901=item *
902
903It is guaranteed that an I<lstat> has been called before the user's
904C<wanted()> function is called. This enables fast file checks involving C<_>.
905Note that this guarantee no longer holds if I<follow> or I<follow_fast>
906are not set.
907
908=item *
909
910There is a variable C<$File::Find::fullname> which holds the absolute
911pathname of the file with all symbolic links resolved.  If the link is
912a dangling symbolic link, then fullname will be set to C<undef>.
913
914=back
915
916This is a no-op on Win32.
917
918=item C<follow_fast>
919
920This is similar to I<follow> except that it may report some files more
921than once.  It does detect cycles, however.  Since only symbolic links
922have to be hashed, this is much cheaper both in space and time.  If
923processing a file more than once (by the user's C<wanted()> function)
924is worse than just taking time, the option I<follow> should be used.
925
926This is also a no-op on Win32.
927
928=item C<follow_skip>
929
930C<follow_skip==1>, which is the default, causes all files which are
931neither directories nor symbolic links to be ignored if they are about
932to be processed a second time. If a directory or a symbolic link
933are about to be processed a second time, File::Find dies.
934
935C<follow_skip==0> causes File::Find to die if any file is about to be
936processed a second time.
937
938C<follow_skip==2> causes File::Find to ignore any duplicate files and
939directories but to proceed normally otherwise.
940
941=item C<dangling_symlinks>
942
943Specifies what to do with symbolic links whose target doesn't exist.
944If true and a code reference, will be called with the symbolic link
945name and the directory it lives in as arguments.  Otherwise, if true
946and warnings are on, a warning of the form C<"symbolic_link_name is a dangling
947symbolic link\n"> will be issued.  If false, the dangling symbolic link
948will be silently ignored.
949
950=item C<no_chdir>
951
952Does not C<chdir()> to each directory as it recurses. The C<wanted()>
953function will need to be aware of this, of course. In this case,
954C<$_> will be the same as C<$File::Find::name>.
955
956=item C<untaint>
957
958If find is used in L<taint-mode|perlsec/Taint mode> (-T command line switch or
959if EUID != UID or if EGID != GID), then internally directory names have to be
960untainted before they can be C<chdir>'d to. Therefore they are checked against
961a regular expression I<untaint_pattern>.  Note that all names passed to the
962user's C<wanted()> function are still tainted. If this option is used while not
963in taint-mode, C<untaint> is a no-op.
964
965=item C<untaint_pattern>
966
967See above. This should be set using the C<qr> quoting operator.
968The default is set to C<qr|^([-+@\w./]+)$|>.
969Note that the parentheses are vital.
970
971=item C<untaint_skip>
972
973If set, a directory which fails the I<untaint_pattern> is skipped,
974including all its sub-directories. The default is to C<die> in such a case.
975
976=back
977
978=head2 The wanted function
979
980The C<wanted()> function does whatever verifications you want on
981each file and directory.  Note that despite its name, the C<wanted()>
982function is a generic callback function, and does B<not> tell
983File::Find if a file is "wanted" or not.  In fact, its return value
984is ignored.
985
986The wanted function takes no arguments but rather does its work
987through a collection of variables.
988
989=over 4
990
991=item C<$File::Find::dir> is the current directory name,
992
993=item C<$_> is the current filename within that directory
994
995=item C<$File::Find::name> is the complete pathname to the file.
996
997=back
998
999The above variables have all been localized and may be changed without
1000affecting data outside of the wanted function.
1001
1002For example, when examining the file F</some/path/foo.ext> you will have:
1003
1004    $File::Find::dir  = /some/path/
1005    $_                = foo.ext
1006    $File::Find::name = /some/path/foo.ext
1007
1008You are chdir()'d to C<$File::Find::dir> when the function is called,
1009unless C<no_chdir> was specified. Note that when changing to
1010directories is in effect, the root directory (F</>) is a somewhat
1011special case inasmuch as the concatenation of C<$File::Find::dir>,
1012C<'/'> and C<$_> is not literally equal to C<$File::Find::name>. The
1013table below summarizes all variants:
1014
1015              $File::Find::name  $File::Find::dir  $_
1016 default      /                  /                 .
1017 no_chdir=>0  /etc               /                 etc
1018              /etc/x             /etc              x
1019
1020 no_chdir=>1  /                  /                 /
1021              /etc               /                 /etc
1022              /etc/x             /etc              /etc/x
1023
1024
1025When C<follow> or C<follow_fast> are in effect, there is
1026also a C<$File::Find::fullname>.  The function may set
1027C<$File::Find::prune> to prune the tree unless C<bydepth> was
1028specified.  Unless C<follow> or C<follow_fast> is specified, for
1029compatibility reasons (find.pl, find2perl) there are in addition the
1030following globals available: C<$File::Find::topdir>,
1031C<$File::Find::topdev>, C<$File::Find::topino>,
1032C<$File::Find::topmode> and C<$File::Find::topnlink>.
1033
1034This library is useful for the C<find2perl> tool (distributed as part of the
1035App-find2perl CPAN distribution), which when fed,
1036
1037  find2perl / -name .nfs\* -mtime +7 \
1038    -exec rm -f {} \; -o -fstype nfs -prune
1039
1040produces something like:
1041
1042 sub wanted {
1043    /^\.nfs.*\z/s &&
1044    (($dev, $ino, $mode, $nlink, $uid, $gid) = lstat($_)) &&
1045    int(-M _) > 7 &&
1046    unlink($_)
1047    ||
1048    ($nlink || (($dev, $ino, $mode, $nlink, $uid, $gid) = lstat($_))) &&
1049    $dev < 0 &&
1050    ($File::Find::prune = 1);
1051 }
1052
1053Notice the C<_> in the above C<int(-M _)>: the C<_> is a magical
1054filehandle that caches the information from the preceding
1055C<stat()>, C<lstat()>, or filetest.
1056
1057Here's another interesting wanted function.  It will find all symbolic
1058links that don't resolve:
1059
1060    sub wanted {
1061         -l && !-e && print "bogus link: $File::Find::name\n";
1062    }
1063
1064Note that you may mix directories and (non-directory) files in the list of
1065directories to be searched by the C<wanted()> function.
1066
1067    find(\&wanted, "./foo", "./bar", "./baz/epsilon");
1068
1069In the example above, no file in F<./baz/> other than F<./baz/epsilon> will be
1070evaluated by C<wanted()>.
1071
1072See also the script C<pfind> on CPAN for a nice application of this
1073module.
1074
1075=head1 WARNINGS
1076
1077If you run your program with the C<-w> switch, or if you use the
1078C<warnings> pragma, File::Find will report warnings for several weird
1079situations. You can disable these warnings by putting the statement
1080
1081    no warnings 'File::Find';
1082
1083in the appropriate scope. See L<warnings> for more info about lexical
1084warnings.
1085
1086=head1 BUGS AND CAVEATS
1087
1088=over 4
1089
1090=item $dont_use_nlink
1091
1092You can set the variable C<$File::Find::dont_use_nlink> to 0 if you
1093are sure the filesystem you are scanning reflects the number of
1094subdirectories in the parent directory's C<nlink> count.
1095
1096If you do set C<$File::Find::dont_use_nlink> to 0, you may notice an
1097improvement in speed at the risk of not recursing into subdirectories
1098if a filesystem doesn't populate C<nlink> as expected.
1099
1100C<$File::Find::dont_use_nlink> now defaults to 1 on all platforms.
1101
1102=item symlinks
1103
1104Be aware that the option to follow symbolic links can be dangerous.
1105Depending on the structure of the directory tree (including symbolic
1106links to directories) you might traverse a given (physical) directory
1107more than once (only if C<follow_fast> is in effect).
1108Furthermore, deleting or changing files in a symbolically linked directory
1109might cause very unpleasant surprises, since you delete or change files
1110in an unknown directory.
1111
1112=back
1113
1114=head1 HISTORY
1115
1116File::Find used to produce incorrect results if called recursively.
1117During the development of perl 5.8 this bug was fixed.
1118The first fixed version of File::Find was 1.01.
1119
1120=head1 SEE ALSO
1121
1122L<find(1)>, find2perl.
1123
1124=cut
1125