Find.pm revision 1.5
1package File::Find; 2use 5.006; 3use strict; 4use warnings; 5use warnings::register; 6our $VERSION = '1.40'; 7use Exporter 'import'; 8require Cwd; 9 10our @EXPORT = qw(find finddepth); 11 12 13use strict; 14my $Is_VMS = $^O eq 'VMS'; 15my $Is_Win32 = $^O eq 'MSWin32'; 16 17require File::Basename; 18require File::Spec; 19 20# Should ideally be my() not our() but local() currently 21# refuses to operate on lexicals 22 23our %SLnkSeen; 24our ($wanted_callback, $avoid_nlink, $bydepth, $no_chdir, $follow, 25 $follow_skip, $full_check, $untaint, $untaint_skip, $untaint_pat, 26 $pre_process, $post_process, $dangling_symlinks); 27 28sub contract_name { 29 my ($cdir,$fn) = @_; 30 31 return substr($cdir,0,rindex($cdir,'/')) if $fn eq $File::Find::current_dir; 32 33 $cdir = substr($cdir,0,rindex($cdir,'/')+1); 34 35 $fn =~ s|^\./||; 36 37 my $abs_name= $cdir . $fn; 38 39 if (substr($fn,0,3) eq '../') { 40 1 while $abs_name =~ s!/[^/]*/\.\./+!/!; 41 } 42 43 return $abs_name; 44} 45 46sub PathCombine($$) { 47 my ($Base,$Name) = @_; 48 my $AbsName; 49 50 if (substr($Name,0,1) eq '/') { 51 $AbsName= $Name; 52 } 53 else { 54 $AbsName= contract_name($Base,$Name); 55 } 56 57 # (simple) check for recursion 58 my $newlen= length($AbsName); 59 if ($newlen <= length($Base)) { 60 if (($newlen == length($Base) || substr($Base,$newlen,1) eq '/') 61 && $AbsName eq substr($Base,0,$newlen)) 62 { 63 return undef; 64 } 65 } 66 return $AbsName; 67} 68 69sub Follow_SymLink($) { 70 my ($AbsName) = @_; 71 72 my ($NewName,$DEV, $INO); 73 ($DEV, $INO)= lstat $AbsName; 74 75 while (-l _) { 76 if ($SLnkSeen{$DEV, $INO}++) { 77 if ($follow_skip < 2) { 78 die "$AbsName is encountered a second time"; 79 } 80 else { 81 return undef; 82 } 83 } 84 $NewName= PathCombine($AbsName, readlink($AbsName)); 85 unless(defined $NewName) { 86 if ($follow_skip < 2) { 87 die "$AbsName is a recursive symbolic link"; 88 } 89 else { 90 return undef; 91 } 92 } 93 else { 94 $AbsName= $NewName; 95 } 96 ($DEV, $INO) = lstat($AbsName); 97 return undef unless defined $DEV; # dangling symbolic link 98 } 99 100 if ($full_check && defined $DEV && $SLnkSeen{$DEV, $INO}++) { 101 if ( ($follow_skip < 1) || ((-d _) && ($follow_skip < 2)) ) { 102 die "$AbsName encountered a second time"; 103 } 104 else { 105 return undef; 106 } 107 } 108 109 return $AbsName; 110} 111 112our($dir, $name, $fullname, $prune); 113sub _find_dir_symlnk($$$); 114sub _find_dir($$$); 115 116# check whether or not a scalar variable is tainted 117# (code straight from the Camel, 3rd ed., page 561) 118sub is_tainted_pp { 119 my $arg = shift; 120 my $nada = substr($arg, 0, 0); # zero-length 121 local $@; 122 eval { eval "# $nada" }; 123 return length($@) != 0; 124} 125 126sub _find_opt { 127 my $wanted = shift; 128 return unless @_; 129 die "invalid top directory" unless defined $_[0]; 130 131 # This function must local()ize everything because callbacks may 132 # call find() or finddepth() 133 134 local %SLnkSeen; 135 local ($wanted_callback, $avoid_nlink, $bydepth, $no_chdir, $follow, 136 $follow_skip, $full_check, $untaint, $untaint_skip, $untaint_pat, 137 $pre_process, $post_process, $dangling_symlinks); 138 local($dir, $name, $fullname, $prune); 139 local *_ = \my $a; 140 141 my $cwd = $wanted->{bydepth} ? Cwd::fastcwd() : Cwd::getcwd(); 142 if ($Is_VMS) { 143 # VMS returns this by default in VMS format which just doesn't 144 # work for the rest of this module. 145 $cwd = VMS::Filespec::unixpath($cwd); 146 147 # Apparently this is not expected to have a trailing space. 148 # To attempt to make VMS/UNIX conversions mostly reversible, 149 # a trailing slash is needed. The run-time functions ignore the 150 # resulting double slash, but it causes the perl tests to fail. 151 $cwd =~ s#/\z##; 152 153 # This comes up in upper case now, but should be lower. 154 # In the future this could be exact case, no need to change. 155 } 156 my $cwd_untainted = $cwd; 157 my $check_t_cwd = 1; 158 $wanted_callback = $wanted->{wanted}; 159 $bydepth = $wanted->{bydepth}; 160 $pre_process = $wanted->{preprocess}; 161 $post_process = $wanted->{postprocess}; 162 $no_chdir = $wanted->{no_chdir}; 163 $full_check = $wanted->{follow}; 164 $follow = $full_check || $wanted->{follow_fast}; 165 $follow_skip = $wanted->{follow_skip}; 166 $untaint = $wanted->{untaint}; 167 $untaint_pat = $wanted->{untaint_pattern}; 168 $untaint_skip = $wanted->{untaint_skip}; 169 $dangling_symlinks = $wanted->{dangling_symlinks}; 170 171 # for compatibility reasons (find.pl, find2perl) 172 local our ($topdir, $topdev, $topino, $topmode, $topnlink); 173 174 # a symbolic link to a directory doesn't increase the link count 175 $avoid_nlink = $follow || $File::Find::dont_use_nlink; 176 177 my ($abs_dir, $Is_Dir); 178 179 Proc_Top_Item: 180 foreach my $TOP (@_) { 181 my $top_item = $TOP; 182 $top_item = VMS::Filespec::unixify($top_item) if $Is_VMS; 183 184 ($topdev,$topino,$topmode,$topnlink) = $follow ? stat $top_item : lstat $top_item; 185 186 if ($Is_Win32) { 187 $top_item =~ s|[/\\]\z|| 188 unless $top_item =~ m{^(?:\w:)?[/\\]$}; 189 } 190 else { 191 $top_item =~ s|/\z|| unless $top_item eq '/'; 192 } 193 194 $Is_Dir= 0; 195 196 if ($follow) { 197 198 if (substr($top_item,0,1) eq '/') { 199 $abs_dir = $top_item; 200 } 201 elsif ($top_item eq $File::Find::current_dir) { 202 $abs_dir = $cwd; 203 } 204 else { # care about any ../ 205 $top_item =~ s/\.dir\z//i if $Is_VMS; 206 $abs_dir = contract_name("$cwd/",$top_item); 207 } 208 $abs_dir= Follow_SymLink($abs_dir); 209 unless (defined $abs_dir) { 210 if ($dangling_symlinks) { 211 if (ref $dangling_symlinks eq 'CODE') { 212 $dangling_symlinks->($top_item, $cwd); 213 } else { 214 warnings::warnif "$top_item is a dangling symbolic link\n"; 215 } 216 } 217 next Proc_Top_Item; 218 } 219 220 if (-d _) { 221 $top_item =~ s/\.dir\z//i if $Is_VMS; 222 _find_dir_symlnk($wanted, $abs_dir, $top_item); 223 $Is_Dir= 1; 224 } 225 } 226 else { # no follow 227 $topdir = $top_item; 228 unless (defined $topnlink) { 229 warnings::warnif "Can't stat $top_item: $!\n"; 230 next Proc_Top_Item; 231 } 232 if (-d _) { 233 $top_item =~ s/\.dir\z//i if $Is_VMS; 234 _find_dir($wanted, $top_item, $topnlink); 235 $Is_Dir= 1; 236 } 237 else { 238 $abs_dir= $top_item; 239 } 240 } 241 242 unless ($Is_Dir) { 243 unless (($_,$dir) = File::Basename::fileparse($abs_dir)) { 244 ($dir,$_) = ('./', $top_item); 245 } 246 247 $abs_dir = $dir; 248 if (( $untaint ) && (is_tainted($dir) )) { 249 ( $abs_dir ) = $dir =~ m|$untaint_pat|; 250 unless (defined $abs_dir) { 251 if ($untaint_skip == 0) { 252 die "directory $dir is still tainted"; 253 } 254 else { 255 next Proc_Top_Item; 256 } 257 } 258 } 259 260 unless ($no_chdir || chdir $abs_dir) { 261 warnings::warnif "Couldn't chdir $abs_dir: $!\n"; 262 next Proc_Top_Item; 263 } 264 265 $name = $abs_dir . $_; # $File::Find::name 266 $_ = $name if $no_chdir; 267 268 { $wanted_callback->() }; # protect against wild "next" 269 270 } 271 272 unless ( $no_chdir ) { 273 if ( ($check_t_cwd) && (($untaint) && (is_tainted($cwd) )) ) { 274 ( $cwd_untainted ) = $cwd =~ m|$untaint_pat|; 275 unless (defined $cwd_untainted) { 276 die "insecure cwd in find(depth)"; 277 } 278 $check_t_cwd = 0; 279 } 280 unless (chdir $cwd_untainted) { 281 die "Can't cd to $cwd: $!\n"; 282 } 283 } 284 } 285} 286 287# API: 288# $wanted 289# $p_dir : "parent directory" 290# $nlink : what came back from the stat 291# preconditions: 292# chdir (if not no_chdir) to dir 293 294sub _find_dir($$$) { 295 my ($wanted, $p_dir, $nlink) = @_; 296 my ($CdLvl,$Level) = (0,0); 297 my @Stack; 298 my @filenames; 299 my ($subcount,$sub_nlink); 300 my $SE= []; 301 my $dir_name= $p_dir; 302 my $dir_pref; 303 my $dir_rel = $File::Find::current_dir; 304 my $tainted = 0; 305 my $no_nlink; 306 307 if ($Is_Win32) { 308 $dir_pref 309 = ($p_dir =~ m{^(?:\w:[/\\]?|[/\\])$} ? $p_dir : "$p_dir/" ); 310 } elsif ($Is_VMS) { 311 312 # VMS is returning trailing .dir on directories 313 # and trailing . on files and symbolic links 314 # in UNIX syntax. 315 # 316 317 $p_dir =~ s/\.(dir)?$//i unless $p_dir eq '.'; 318 319 $dir_pref = ($p_dir =~ m/[\]>]+$/ ? $p_dir : "$p_dir/" ); 320 } 321 else { 322 $dir_pref= ( $p_dir eq '/' ? '/' : "$p_dir/" ); 323 } 324 325 local ($dir, $name, $prune); 326 327 unless ( $no_chdir || ($p_dir eq $File::Find::current_dir)) { 328 my $udir = $p_dir; 329 if (( $untaint ) && (is_tainted($p_dir) )) { 330 ( $udir ) = $p_dir =~ m|$untaint_pat|; 331 unless (defined $udir) { 332 if ($untaint_skip == 0) { 333 die "directory $p_dir is still tainted"; 334 } 335 else { 336 return; 337 } 338 } 339 } 340 unless (chdir ($Is_VMS && $udir !~ /[\/\[<]+/ ? "./$udir" : $udir)) { 341 warnings::warnif "Can't cd to $udir: $!\n"; 342 return; 343 } 344 } 345 346 # push the starting directory 347 push @Stack,[$CdLvl,$p_dir,$dir_rel,-1] if $bydepth; 348 349 while (defined $SE) { 350 unless ($bydepth) { 351 $dir= $p_dir; # $File::Find::dir 352 $name= $dir_name; # $File::Find::name 353 $_= ($no_chdir ? $dir_name : $dir_rel ); # $_ 354 # prune may happen here 355 $prune= 0; 356 { $wanted_callback->() }; # protect against wild "next" 357 next if $prune; 358 } 359 360 # change to that directory 361 unless ($no_chdir || ($dir_rel eq $File::Find::current_dir)) { 362 my $udir= $dir_rel; 363 if ( ($untaint) && (($tainted) || ($tainted = is_tainted($dir_rel) )) ) { 364 ( $udir ) = $dir_rel =~ m|$untaint_pat|; 365 unless (defined $udir) { 366 if ($untaint_skip == 0) { 367 die "directory (" . ($p_dir ne '/' ? $p_dir : '') . "/) $dir_rel is still tainted"; 368 } else { # $untaint_skip == 1 369 next; 370 } 371 } 372 } 373 unless (chdir ($Is_VMS && $udir !~ /[\/\[<]+/ ? "./$udir" : $udir)) { 374 warnings::warnif "Can't cd to (" . 375 ($p_dir ne '/' ? $p_dir : '') . "/) $udir: $!\n"; 376 next; 377 } 378 $CdLvl++; 379 } 380 381 $dir= $dir_name; # $File::Find::dir 382 383 # Get the list of files in the current directory. 384 my $dh; 385 unless (opendir $dh, ($no_chdir ? $dir_name : $File::Find::current_dir)) { 386 warnings::warnif "Can't opendir($dir_name): $!\n"; 387 next; 388 } 389 @filenames = readdir $dh; 390 closedir($dh); 391 @filenames = $pre_process->(@filenames) if $pre_process; 392 push @Stack,[$CdLvl,$dir_name,"",-2] if $post_process; 393 394 # default: use whatever was specified 395 # (if $nlink >= 2, and $avoid_nlink == 0, this will switch back) 396 $no_nlink = $avoid_nlink; 397 # if dir has wrong nlink count, force switch to slower stat method 398 $no_nlink = 1 if ($nlink < 2); 399 400 if ($nlink == 2 && !$no_nlink) { 401 # This dir has no subdirectories. 402 for my $FN (@filenames) { 403 if ($Is_VMS) { 404 # Big hammer here - Compensate for VMS trailing . and .dir 405 # No win situation until this is changed, but this 406 # will handle the majority of the cases with breaking the fewest 407 408 $FN =~ s/\.dir\z//i; 409 $FN =~ s#\.$## if ($FN ne '.'); 410 } 411 next if $FN =~ $File::Find::skip_pattern; 412 413 $name = $dir_pref . $FN; # $File::Find::name 414 $_ = ($no_chdir ? $name : $FN); # $_ 415 { $wanted_callback->() }; # protect against wild "next" 416 } 417 418 } 419 else { 420 # This dir has subdirectories. 421 $subcount = $nlink - 2; 422 423 # HACK: insert directories at this position, so as to preserve 424 # the user pre-processed ordering of files (thus ensuring 425 # directory traversal is in user sorted order, not at random). 426 my $stack_top = @Stack; 427 428 for my $FN (@filenames) { 429 next if $FN =~ $File::Find::skip_pattern; 430 if ($subcount > 0 || $no_nlink) { 431 # Seen all the subdirs? 432 # check for directoriness. 433 # stat is faster for a file in the current directory 434 $sub_nlink = (lstat ($no_chdir ? $dir_pref . $FN : $FN))[3]; 435 436 if (-d _) { 437 --$subcount; 438 $FN =~ s/\.dir\z//i if $Is_VMS; 439 # HACK: replace push to preserve dir traversal order 440 #push @Stack,[$CdLvl,$dir_name,$FN,$sub_nlink]; 441 splice @Stack, $stack_top, 0, 442 [$CdLvl,$dir_name,$FN,$sub_nlink]; 443 } 444 else { 445 $name = $dir_pref . $FN; # $File::Find::name 446 $_= ($no_chdir ? $name : $FN); # $_ 447 { $wanted_callback->() }; # protect against wild "next" 448 } 449 } 450 else { 451 $name = $dir_pref . $FN; # $File::Find::name 452 $_= ($no_chdir ? $name : $FN); # $_ 453 { $wanted_callback->() }; # protect against wild "next" 454 } 455 } 456 } 457 } 458 continue { 459 while ( defined ($SE = pop @Stack) ) { 460 ($Level, $p_dir, $dir_rel, $nlink) = @$SE; 461 if ($CdLvl > $Level && !$no_chdir) { 462 my $tmp; 463 if ($Is_VMS) { 464 $tmp = '[' . ('-' x ($CdLvl-$Level)) . ']'; 465 } 466 else { 467 $tmp = join('/',('..') x ($CdLvl-$Level)); 468 } 469 die "Can't cd to $tmp from $dir_name: $!" 470 unless chdir ($tmp); 471 $CdLvl = $Level; 472 } 473 474 if ($Is_Win32) { 475 $dir_name = ($p_dir =~ m{^(?:\w:[/\\]?|[/\\])$} 476 ? "$p_dir$dir_rel" : "$p_dir/$dir_rel"); 477 $dir_pref = "$dir_name/"; 478 } 479 elsif ($^O eq 'VMS') { 480 if ($p_dir =~ m/[\]>]+$/) { 481 $dir_name = $p_dir; 482 $dir_name =~ s/([\]>]+)$/.$dir_rel$1/; 483 $dir_pref = $dir_name; 484 } 485 else { 486 $dir_name = "$p_dir/$dir_rel"; 487 $dir_pref = "$dir_name/"; 488 } 489 } 490 else { 491 $dir_name = ($p_dir eq '/' ? "/$dir_rel" : "$p_dir/$dir_rel"); 492 $dir_pref = "$dir_name/"; 493 } 494 495 if ( $nlink == -2 ) { 496 $name = $dir = $p_dir; # $File::Find::name / dir 497 $_ = $File::Find::current_dir; 498 $post_process->(); # End-of-directory processing 499 } 500 elsif ( $nlink < 0 ) { # must be finddepth, report dirname now 501 $name = $dir_name; 502 if ( substr($name,-2) eq '/.' ) { 503 substr($name, length($name) == 2 ? -1 : -2) = ''; 504 } 505 $dir = $p_dir; 506 $_ = ($no_chdir ? $dir_name : $dir_rel ); 507 if ( substr($_,-2) eq '/.' ) { 508 substr($_, length($_) == 2 ? -1 : -2) = ''; 509 } 510 { $wanted_callback->() }; # protect against wild "next" 511 } 512 else { 513 push @Stack,[$CdLvl,$p_dir,$dir_rel,-1] if $bydepth; 514 last; 515 } 516 } 517 } 518} 519 520 521# API: 522# $wanted 523# $dir_loc : absolute location of a dir 524# $p_dir : "parent directory" 525# preconditions: 526# chdir (if not no_chdir) to dir 527 528sub _find_dir_symlnk($$$) { 529 my ($wanted, $dir_loc, $p_dir) = @_; # $dir_loc is the absolute directory 530 my @Stack; 531 my @filenames; 532 my $new_loc; 533 my $updir_loc = $dir_loc; # untainted parent directory 534 my $SE = []; 535 my $dir_name = $p_dir; 536 my $dir_pref; 537 my $loc_pref; 538 my $dir_rel = $File::Find::current_dir; 539 my $byd_flag; # flag for pending stack entry if $bydepth 540 my $tainted = 0; 541 my $ok = 1; 542 543 $dir_pref = ( $p_dir eq '/' ? '/' : "$p_dir/" ); 544 $loc_pref = ( $dir_loc eq '/' ? '/' : "$dir_loc/" ); 545 546 local ($dir, $name, $fullname, $prune); 547 548 unless ($no_chdir) { 549 # untaint the topdir 550 if (( $untaint ) && (is_tainted($dir_loc) )) { 551 ( $updir_loc ) = $dir_loc =~ m|$untaint_pat|; # parent dir, now untainted 552 # once untainted, $updir_loc is pushed on the stack (as parent directory); 553 # hence, we don't need to untaint the parent directory every time we chdir 554 # to it later 555 unless (defined $updir_loc) { 556 if ($untaint_skip == 0) { 557 die "directory $dir_loc is still tainted"; 558 } 559 else { 560 return; 561 } 562 } 563 } 564 $ok = chdir($updir_loc) unless ($p_dir eq $File::Find::current_dir); 565 unless ($ok) { 566 warnings::warnif "Can't cd to $updir_loc: $!\n"; 567 return; 568 } 569 } 570 571 push @Stack,[$dir_loc,$updir_loc,$p_dir,$dir_rel,-1] if $bydepth; 572 573 while (defined $SE) { 574 575 unless ($bydepth) { 576 # change (back) to parent directory (always untainted) 577 unless ($no_chdir) { 578 unless (chdir $updir_loc) { 579 warnings::warnif "Can't cd to $updir_loc: $!\n"; 580 next; 581 } 582 } 583 $dir= $p_dir; # $File::Find::dir 584 $name= $dir_name; # $File::Find::name 585 $_= ($no_chdir ? $dir_name : $dir_rel ); # $_ 586 $fullname= $dir_loc; # $File::Find::fullname 587 # prune may happen here 588 $prune= 0; 589 lstat($_); # make sure file tests with '_' work 590 { $wanted_callback->() }; # protect against wild "next" 591 next if $prune; 592 } 593 594 # change to that directory 595 unless ($no_chdir || ($dir_rel eq $File::Find::current_dir)) { 596 $updir_loc = $dir_loc; 597 if ( ($untaint) && (($tainted) || ($tainted = is_tainted($dir_loc) )) ) { 598 # untaint $dir_loc, what will be pushed on the stack as (untainted) parent dir 599 ( $updir_loc ) = $dir_loc =~ m|$untaint_pat|; 600 unless (defined $updir_loc) { 601 if ($untaint_skip == 0) { 602 die "directory $dir_loc is still tainted"; 603 } 604 else { 605 next; 606 } 607 } 608 } 609 unless (chdir $updir_loc) { 610 warnings::warnif "Can't cd to $updir_loc: $!\n"; 611 next; 612 } 613 } 614 615 $dir = $dir_name; # $File::Find::dir 616 617 # Get the list of files in the current directory. 618 my $dh; 619 unless (opendir $dh, ($no_chdir ? $dir_loc : $File::Find::current_dir)) { 620 warnings::warnif "Can't opendir($dir_loc): $!\n"; 621 next; 622 } 623 @filenames = readdir $dh; 624 closedir($dh); 625 626 for my $FN (@filenames) { 627 if ($Is_VMS) { 628 # Big hammer here - Compensate for VMS trailing . and .dir 629 # No win situation until this is changed, but this 630 # will handle the majority of the cases with breaking the fewest. 631 632 $FN =~ s/\.dir\z//i; 633 $FN =~ s#\.$## if ($FN ne '.'); 634 } 635 next if $FN =~ $File::Find::skip_pattern; 636 637 # follow symbolic links / do an lstat 638 $new_loc = Follow_SymLink($loc_pref.$FN); 639 640 # ignore if invalid symlink 641 unless (defined $new_loc) { 642 if (!defined -l _ && $dangling_symlinks) { 643 $fullname = undef; 644 if (ref $dangling_symlinks eq 'CODE') { 645 $dangling_symlinks->($FN, $dir_pref); 646 } else { 647 warnings::warnif "$dir_pref$FN is a dangling symbolic link\n"; 648 } 649 } 650 else { 651 $fullname = $loc_pref . $FN; 652 } 653 $name = $dir_pref . $FN; 654 $_ = ($no_chdir ? $name : $FN); 655 { $wanted_callback->() }; 656 next; 657 } 658 659 if (-d _) { 660 if ($Is_VMS) { 661 $FN =~ s/\.dir\z//i; 662 $FN =~ s#\.$## if ($FN ne '.'); 663 $new_loc =~ s/\.dir\z//i; 664 $new_loc =~ s#\.$## if ($new_loc ne '.'); 665 } 666 push @Stack,[$new_loc,$updir_loc,$dir_name,$FN,1]; 667 } 668 else { 669 $fullname = $new_loc; # $File::Find::fullname 670 $name = $dir_pref . $FN; # $File::Find::name 671 $_ = ($no_chdir ? $name : $FN); # $_ 672 { $wanted_callback->() }; # protect against wild "next" 673 } 674 } 675 676 } 677 continue { 678 while (defined($SE = pop @Stack)) { 679 ($dir_loc, $updir_loc, $p_dir, $dir_rel, $byd_flag) = @$SE; 680 $dir_name = ($p_dir eq '/' ? "/$dir_rel" : "$p_dir/$dir_rel"); 681 $dir_pref = "$dir_name/"; 682 $loc_pref = "$dir_loc/"; 683 if ( $byd_flag < 0 ) { # must be finddepth, report dirname now 684 unless ($no_chdir || ($dir_rel eq $File::Find::current_dir)) { 685 unless (chdir $updir_loc) { # $updir_loc (parent dir) is always untainted 686 warnings::warnif "Can't cd to $updir_loc: $!\n"; 687 next; 688 } 689 } 690 $fullname = $dir_loc; # $File::Find::fullname 691 $name = $dir_name; # $File::Find::name 692 if ( substr($name,-2) eq '/.' ) { 693 substr($name, length($name) == 2 ? -1 : -2) = ''; # $File::Find::name 694 } 695 $dir = $p_dir; # $File::Find::dir 696 $_ = ($no_chdir ? $dir_name : $dir_rel); # $_ 697 if ( substr($_,-2) eq '/.' ) { 698 substr($_, length($_) == 2 ? -1 : -2) = ''; 699 } 700 701 lstat($_); # make sure file tests with '_' work 702 { $wanted_callback->() }; # protect against wild "next" 703 } 704 else { 705 push @Stack,[$dir_loc, $updir_loc, $p_dir, $dir_rel,-1] if $bydepth; 706 last; 707 } 708 } 709 } 710} 711 712 713sub wrap_wanted { 714 my $wanted = shift; 715 if ( ref($wanted) eq 'HASH' ) { 716 # RT #122547 717 my %valid_options = map {$_ => 1} qw( 718 wanted 719 bydepth 720 preprocess 721 postprocess 722 follow 723 follow_fast 724 follow_skip 725 dangling_symlinks 726 no_chdir 727 untaint 728 untaint_pattern 729 untaint_skip 730 ); 731 my @invalid_options = (); 732 for my $v (keys %{$wanted}) { 733 push @invalid_options, $v unless exists $valid_options{$v}; 734 } 735 warn "Invalid option(s): @invalid_options" if @invalid_options; 736 737 unless( exists $wanted->{wanted} and ref( $wanted->{wanted} ) eq 'CODE' ) { 738 die 'no &wanted subroutine given'; 739 } 740 if ( $wanted->{follow} || $wanted->{follow_fast}) { 741 $wanted->{follow_skip} = 1 unless defined $wanted->{follow_skip}; 742 } 743 if ( $wanted->{untaint} ) { 744 $wanted->{untaint_pattern} = $File::Find::untaint_pattern 745 unless defined $wanted->{untaint_pattern}; 746 $wanted->{untaint_skip} = 0 unless defined $wanted->{untaint_skip}; 747 } 748 return $wanted; 749 } 750 elsif( ref( $wanted ) eq 'CODE' ) { 751 return { wanted => $wanted }; 752 } 753 else { 754 die 'no &wanted subroutine given'; 755 } 756} 757 758sub find { 759 my $wanted = shift; 760 _find_opt(wrap_wanted($wanted), @_); 761} 762 763sub finddepth { 764 my $wanted = wrap_wanted(shift); 765 $wanted->{bydepth} = 1; 766 _find_opt($wanted, @_); 767} 768 769# default 770$File::Find::skip_pattern = qr/^\.{1,2}\z/; 771$File::Find::untaint_pattern = qr|^([-+@\w./]+)$|; 772 773# this _should_ work properly on all platforms 774# where File::Find can be expected to work 775$File::Find::current_dir = File::Spec->curdir || '.'; 776 777$File::Find::dont_use_nlink = 1; 778 779# We need a function that checks if a scalar is tainted. Either use the 780# Scalar::Util module's tainted() function or our (slower) pure Perl 781# fallback is_tainted_pp() 782{ 783 local $@; 784 eval { require Scalar::Util }; 785 *is_tainted = $@ ? \&is_tainted_pp : \&Scalar::Util::tainted; 786} 787 7881; 789 790__END__ 791 792=head1 NAME 793 794File::Find - Traverse a directory tree. 795 796=head1 SYNOPSIS 797 798 use File::Find; 799 find(\&wanted, @directories_to_search); 800 sub wanted { ... } 801 802 use File::Find; 803 finddepth(\&wanted, @directories_to_search); 804 sub wanted { ... } 805 806 use File::Find; 807 find({ wanted => \&process, follow => 1 }, '.'); 808 809=head1 DESCRIPTION 810 811These are functions for searching through directory trees doing work 812on each file found similar to the Unix I<find> command. File::Find 813exports two functions, C<find> and C<finddepth>. They work similarly 814but have subtle differences. 815 816=over 4 817 818=item B<find> 819 820 find(\&wanted, @directories); 821 find(\%options, @directories); 822 823C<find()> does a depth-first search over the given C<@directories> in 824the order they are given. For each file or directory found, it calls 825the C<&wanted> subroutine. (See below for details on how to use the 826C<&wanted> function). Additionally, for each directory found, it will 827C<chdir()> into that directory and continue the search, invoking the 828C<&wanted> function on each file or subdirectory in the directory. 829 830=item B<finddepth> 831 832 finddepth(\&wanted, @directories); 833 finddepth(\%options, @directories); 834 835C<finddepth()> works just like C<find()> except that it invokes the 836C<&wanted> function for a directory I<after> invoking it for the 837directory's contents. It does a postorder traversal instead of a 838preorder traversal, working from the bottom of the directory tree up 839where C<find()> works from the top of the tree down. 840 841=back 842 843Despite the name of the C<finddepth()> function, both C<find()> and 844C<finddepth()> perform a depth-first search of the directory hierarchy. 845 846=head2 %options 847 848The first argument to C<find()> is either a code reference to your 849C<&wanted> function, or a hash reference describing the operations 850to be performed for each file. The 851code reference is described in L</The wanted function> below. 852 853Here are the possible keys for the hash: 854 855=over 4 856 857=item C<wanted> 858 859The value should be a code reference. This code reference is 860described in L</The wanted function> below. The C<&wanted> subroutine is 861mandatory. 862 863=item C<bydepth> 864 865Reports the name of a directory only AFTER all its entries 866have been reported. Entry point C<finddepth()> is a shortcut for 867specifying C<< { bydepth => 1 } >> in the first argument of C<find()>. 868 869=item C<preprocess> 870 871The value should be a code reference. This code reference is used to 872preprocess the current directory. The name of the currently processed 873directory is in C<$File::Find::dir>. Your preprocessing function is 874called after C<readdir()>, but before the loop that calls the C<wanted()> 875function. It is called with a list of strings (actually file/directory 876names) and is expected to return a list of strings. The code can be 877used to sort the file/directory names alphabetically, numerically, 878or to filter out directory entries based on their name alone. When 879I<follow> or I<follow_fast> are in effect, C<preprocess> is a no-op. 880 881=item C<postprocess> 882 883The value should be a code reference. It is invoked just before leaving 884the currently processed directory. It is called in void context with no 885arguments. The name of the current directory is in C<$File::Find::dir>. This 886hook is handy for summarizing a directory, such as calculating its disk 887usage. When I<follow> or I<follow_fast> are in effect, C<postprocess> is a 888no-op. 889 890=item C<follow> 891 892Causes symbolic links to be followed. Since directory trees with symbolic 893links (followed) may contain files more than once and may even have 894cycles, a hash has to be built up with an entry for each file. 895This might be expensive both in space and time for a large 896directory tree. See L</follow_fast> and L</follow_skip> below. 897If either I<follow> or I<follow_fast> is in effect: 898 899=over 4 900 901=item * 902 903It is guaranteed that an I<lstat> has been called before the user's 904C<wanted()> function is called. This enables fast file checks involving C<_>. 905Note that this guarantee no longer holds if I<follow> or I<follow_fast> 906are not set. 907 908=item * 909 910There is a variable C<$File::Find::fullname> which holds the absolute 911pathname of the file with all symbolic links resolved. If the link is 912a dangling symbolic link, then fullname will be set to C<undef>. 913 914=back 915 916This is a no-op on Win32. 917 918=item C<follow_fast> 919 920This is similar to I<follow> except that it may report some files more 921than once. It does detect cycles, however. Since only symbolic links 922have to be hashed, this is much cheaper both in space and time. If 923processing a file more than once (by the user's C<wanted()> function) 924is worse than just taking time, the option I<follow> should be used. 925 926This is also a no-op on Win32. 927 928=item C<follow_skip> 929 930C<follow_skip==1>, which is the default, causes all files which are 931neither directories nor symbolic links to be ignored if they are about 932to be processed a second time. If a directory or a symbolic link 933are about to be processed a second time, File::Find dies. 934 935C<follow_skip==0> causes File::Find to die if any file is about to be 936processed a second time. 937 938C<follow_skip==2> causes File::Find to ignore any duplicate files and 939directories but to proceed normally otherwise. 940 941=item C<dangling_symlinks> 942 943Specifies what to do with symbolic links whose target doesn't exist. 944If true and a code reference, will be called with the symbolic link 945name and the directory it lives in as arguments. Otherwise, if true 946and warnings are on, a warning of the form C<"symbolic_link_name is a dangling 947symbolic link\n"> will be issued. If false, the dangling symbolic link 948will be silently ignored. 949 950=item C<no_chdir> 951 952Does not C<chdir()> to each directory as it recurses. The C<wanted()> 953function will need to be aware of this, of course. In this case, 954C<$_> will be the same as C<$File::Find::name>. 955 956=item C<untaint> 957 958If find is used in L<taint-mode|perlsec/Taint mode> (-T command line switch or 959if EUID != UID or if EGID != GID), then internally directory names have to be 960untainted before they can be C<chdir>'d to. Therefore they are checked against 961a regular expression I<untaint_pattern>. Note that all names passed to the 962user's C<wanted()> function are still tainted. If this option is used while not 963in taint-mode, C<untaint> is a no-op. 964 965=item C<untaint_pattern> 966 967See above. This should be set using the C<qr> quoting operator. 968The default is set to C<qr|^([-+@\w./]+)$|>. 969Note that the parentheses are vital. 970 971=item C<untaint_skip> 972 973If set, a directory which fails the I<untaint_pattern> is skipped, 974including all its sub-directories. The default is to C<die> in such a case. 975 976=back 977 978=head2 The wanted function 979 980The C<wanted()> function does whatever verifications you want on 981each file and directory. Note that despite its name, the C<wanted()> 982function is a generic callback function, and does B<not> tell 983File::Find if a file is "wanted" or not. In fact, its return value 984is ignored. 985 986The wanted function takes no arguments but rather does its work 987through a collection of variables. 988 989=over 4 990 991=item C<$File::Find::dir> is the current directory name, 992 993=item C<$_> is the current filename within that directory 994 995=item C<$File::Find::name> is the complete pathname to the file. 996 997=back 998 999The above variables have all been localized and may be changed without 1000affecting data outside of the wanted function. 1001 1002For example, when examining the file F</some/path/foo.ext> you will have: 1003 1004 $File::Find::dir = /some/path/ 1005 $_ = foo.ext 1006 $File::Find::name = /some/path/foo.ext 1007 1008You are chdir()'d to C<$File::Find::dir> when the function is called, 1009unless C<no_chdir> was specified. Note that when changing to 1010directories is in effect, the root directory (F</>) is a somewhat 1011special case inasmuch as the concatenation of C<$File::Find::dir>, 1012C<'/'> and C<$_> is not literally equal to C<$File::Find::name>. The 1013table below summarizes all variants: 1014 1015 $File::Find::name $File::Find::dir $_ 1016 default / / . 1017 no_chdir=>0 /etc / etc 1018 /etc/x /etc x 1019 1020 no_chdir=>1 / / / 1021 /etc / /etc 1022 /etc/x /etc /etc/x 1023 1024 1025When C<follow> or C<follow_fast> are in effect, there is 1026also a C<$File::Find::fullname>. The function may set 1027C<$File::Find::prune> to prune the tree unless C<bydepth> was 1028specified. Unless C<follow> or C<follow_fast> is specified, for 1029compatibility reasons (find.pl, find2perl) there are in addition the 1030following globals available: C<$File::Find::topdir>, 1031C<$File::Find::topdev>, C<$File::Find::topino>, 1032C<$File::Find::topmode> and C<$File::Find::topnlink>. 1033 1034This library is useful for the C<find2perl> tool (distributed as part of the 1035App-find2perl CPAN distribution), which when fed, 1036 1037 find2perl / -name .nfs\* -mtime +7 \ 1038 -exec rm -f {} \; -o -fstype nfs -prune 1039 1040produces something like: 1041 1042 sub wanted { 1043 /^\.nfs.*\z/s && 1044 (($dev, $ino, $mode, $nlink, $uid, $gid) = lstat($_)) && 1045 int(-M _) > 7 && 1046 unlink($_) 1047 || 1048 ($nlink || (($dev, $ino, $mode, $nlink, $uid, $gid) = lstat($_))) && 1049 $dev < 0 && 1050 ($File::Find::prune = 1); 1051 } 1052 1053Notice the C<_> in the above C<int(-M _)>: the C<_> is a magical 1054filehandle that caches the information from the preceding 1055C<stat()>, C<lstat()>, or filetest. 1056 1057Here's another interesting wanted function. It will find all symbolic 1058links that don't resolve: 1059 1060 sub wanted { 1061 -l && !-e && print "bogus link: $File::Find::name\n"; 1062 } 1063 1064Note that you may mix directories and (non-directory) files in the list of 1065directories to be searched by the C<wanted()> function. 1066 1067 find(\&wanted, "./foo", "./bar", "./baz/epsilon"); 1068 1069In the example above, no file in F<./baz/> other than F<./baz/epsilon> will be 1070evaluated by C<wanted()>. 1071 1072See also the script C<pfind> on CPAN for a nice application of this 1073module. 1074 1075=head1 WARNINGS 1076 1077If you run your program with the C<-w> switch, or if you use the 1078C<warnings> pragma, File::Find will report warnings for several weird 1079situations. You can disable these warnings by putting the statement 1080 1081 no warnings 'File::Find'; 1082 1083in the appropriate scope. See L<warnings> for more info about lexical 1084warnings. 1085 1086=head1 BUGS AND CAVEATS 1087 1088=over 4 1089 1090=item $dont_use_nlink 1091 1092You can set the variable C<$File::Find::dont_use_nlink> to 0 if you 1093are sure the filesystem you are scanning reflects the number of 1094subdirectories in the parent directory's C<nlink> count. 1095 1096If you do set C<$File::Find::dont_use_nlink> to 0, you may notice an 1097improvement in speed at the risk of not recursing into subdirectories 1098if a filesystem doesn't populate C<nlink> as expected. 1099 1100C<$File::Find::dont_use_nlink> now defaults to 1 on all platforms. 1101 1102=item symlinks 1103 1104Be aware that the option to follow symbolic links can be dangerous. 1105Depending on the structure of the directory tree (including symbolic 1106links to directories) you might traverse a given (physical) directory 1107more than once (only if C<follow_fast> is in effect). 1108Furthermore, deleting or changing files in a symbolically linked directory 1109might cause very unpleasant surprises, since you delete or change files 1110in an unknown directory. 1111 1112=back 1113 1114=head1 HISTORY 1115 1116File::Find used to produce incorrect results if called recursively. 1117During the development of perl 5.8 this bug was fixed. 1118The first fixed version of File::Find was 1.01. 1119 1120=head1 SEE ALSO 1121 1122L<find(1)>, find2perl. 1123 1124=cut 1125