1#!/usr/local/bin/perl -w
2'di';
3'ig00';
4##############################################################################
5##
6## search
7##
8## Jeffrey Friedl (jfriedl@omron.co.jp), Dec 1994.
9## Copyright 19.... ah hell, just take it.
10##
11## BLURB:
12## A combo of find and grep -- more or less do a 'grep' on a whole
13## directory tree. Fast, with lots of options. Much more powerful than
14## the simple "find ... | xargs grep ....". Has a full man page.
15## Powerfully customizable.
16##
17## This file is big, but mostly comments and man page.
18##
19## See man page for usage info.
20## Return value: 2=error, 1=nothing found, 0=something found.
21##
22
23$version = "950918.5";
24##
25## "950918.5";
26##	Changed all 'sysread' to 'read' because Linux perl's don't seem
27##	to like sysread()
28##
29## "941227.4";
30##	Added -n, -u
31##
32## "941222.3"
33##      Added -nice (due to Lionel Cons <Lionel.Cons@cern.ch>)
34##	Removed any leading "./" from name.
35##      Added default flags for ~/.search, including TTY, -nice, -list, etc.
36##	Program name now has path removed when printed in diagnostics.
37##	Added simple tilde-expansion to -dir arg.
38##	Added -dskip, etc. Fixed -iregex bug.
39##	Changed -dir to be additive, adding -ddir.
40##	Now screen out devices, pipes, and sockets.
41##	More tidying and lots of expanding of the man page
42##
43##
44## "941217.2";
45##	initial release.
46
47$stripped=0;
48
49&init;
50if (exists $ENV{'HOME'}) {
51    $rc_file = join('/', $ENV{'HOME'}, ".search");
52}
53else {
54    $rc_file = "";
55}
56
57&check_args;
58
59## Make sure we've got a regex.
60## Don't need one if -find or -showrc was specified.
61$!=2, die "expecting regex arguments.\n"
62	if $FIND_ONLY == 0 && $showrc == 0 && @ARGV == 0;
63
64&prepare_to_search($rc_file);
65
66&import_program if !defined &dodir; ## BIG key to speed.
67
68## do search while there are directories to be done.
69&dodir(shift(@todo)) while @todo;
70
71&clear_message if $VERBOSE && $STDERR_IS_TTY;
72exit($retval);
73###############################################################################
74
75sub init
76{
77  ## initialize variables that might be reset by command-line args
78  $DOREP=0; 		## set true by -dorep (redo multi-hardlink files)
79  $DOREP=1 if $^O eq 'MSWin32';
80  $DO_SORT=0;           ## set by -sort (sort files in a dir before checking)
81  $FIND_ONLY=0;         ## set by -find (don't search files)
82  $LIST_ONLY=0;		## set true by -l (list filenames only)
83  $NEWER=0;             ## set by -newer, "-mtime -###"
84  $NICE=0;              ## set by -nice (print human-readable output)
85  $NOLINKS=0; 		## set true by -nolinks (don't follow symlinks)
86  $OLDER=0;             ## set by -older, "-mtime  ###"
87  $PREPEND_FILENAME=1;  ## set false by -h (don't prefix lines with filename)
88  $REPORT_LINENUM=0;    ## set true by -n (show line numbers)
89  $VERBOSE=0;		## set to a value by -v, -vv, etc. (verbose messages)
90  $WHY=0;		## set true by -why, -vvv+ (report why skipped)
91  $XDEV=0;		## set true by -xdev (stay on one filesystem)
92  $all=0;		## set true by -all (don't skip many kinds of files)
93  $iflag = '';		## set to 'i' by -i (ignore case);
94  $norc=0;              ## set by -norc (don't load rc file)
95  $showrc=0;            ## set by -showrc (show what happens with rc file)
96  $underlineOK=0;       ## set true by -u (watch for underline stuff)
97  $words=0;             ## set true by -w (match whole-words only)
98  $DELAY=0;		## inter-file delay (seconds)
99  $retval=1;            ## will set to 0 if we find anything.
100
101  ## various elements of stat() that we might access
102  $STAT_DEV   = 1;
103  $STAT_INODE = 2;
104  $STAT_MTIME = 9;
105
106  $VV_PRINT_COUNT = 50;  ## with -vv, print every VV_PRINT_COUNT files, or...
107  $VV_SIZE = 1024*1024;  ## ...every VV_SIZE bytes searched
108  $vv_print = $vv_size = 0; ## running totals.
109
110  ## set default options, in case the rc file wants them
111  $opt{'TTY'}= 1 if -t STDOUT;
112
113  ## want to know this for debugging message stuff
114  $STDERR_IS_TTY = -t STDERR ? 1 : 0;
115  $STDERR_SCREWS_STDOUT = ($STDERR_IS_TTY && -t STDOUT) ? 1 : 0;
116
117  $0 =~ s,.*/,,;  ## clean up $0 for any diagnostics we'll be printing.
118}
119
120##
121## Check arguments.
122##
123sub check_args
124{
125  while (@ARGV && $ARGV[0] =~ m/^-/)
126  {
127      $arg = shift(@ARGV);
128
129      if ($arg eq '-version' || ($VERBOSE && $arg eq '-help')) {
130	  print qq/Jeffrey's file search, version "$version".\n/;
131	  exit(0) unless $arg eq '-help';
132      }
133      if ($arg eq '-help') {
134	  print <<INLINE_LITERAL_TEXT;
135usage: $0 [options] [-e] [PerlRegex ....]
136OPTIONS TELLING *WHERE* TO SEARCH:
137  -dir DIR       start search at the named directory (default is current dir).
138  -xdev          stay on starting file system.
139  -sort          sort the files in each directory before processing.
140  -nolinks       don't follow symbolic links.
141OPTIONS TELLING WHICH FILES TO EVEN CONSIDER:
142  -mtime #       consider files modified > # days ago (-# for < # days old)
143  -newer FILE    consider files modified more recently than FILE (also -older)
144  -name GLOB     consider files whose name matches pattern (also -regex).
145  -skip GLOB     opposite of -name: identifies files to not consider.
146  -path GLOB     like -name, but for files whose whole path is described.
147  -dpath/-dregex/-dskip versions for selecting or pruning directories.
148  -all           don't skip any files marked to be skipped by the startup file.
149  -x<SPECIAL>    (see manual, and/or try -showrc).
150  -why           report why a file isn't checked (also implied by -vvvv).
151OPTIONS TELLING WHAT TO DO WITH FILES THAT WILL BE CONSIDERED:
152  -f  | -find    just list files (PerlRegex ignored). Default is to grep them.
153  -ff | -ffind   Does a faster -find (implies -find -all -dorep)
154OPTIONS CONTROLLING HOW THE SEARCH IS DONE (AND WHAT IS PRINTED):
155  -l | -list     only list files with matches, not the lines themselves.
156  -nice | -nnice print more "human readable" output.
157  -n             prefix each output line with its line number in the file.
158  -h             don't prefix output lines with file name.
159  -u             also look "inside" manpage-style underlined text
160  -i             do case-insensitive searching.
161  -w             match words only (as defined by perl's \\b).
162OTHER OPTIONS:
163  -v, -vv, -vvv  various levels of message verbosity.
164  -e             end of options (in case a regex looks like an option).
165  -showrc        show what the rc file sets, then exit.
166  -norc          don't load the rc file.
167  -dorep         check files with multiple hard links multiple times.
168INLINE_LITERAL_TEXT
169	print "Use -v -help for more verbose help.\n" unless $VERBOSE;
170	print "This script file is also a man page.\n" unless $stripped;
171	print <<INLINE_LITERAL_TEXT if $VERBOSE;
172
173If -f (or -find) given, PerlRegex is optional and ignored.
174Otherwise, will search for files with lines matching any of the given regexes.
175
176Combining things like -name and -mtime implies boolean AND.
177However, duplicating things (such as -name '*.c' -name '*.txt') implies OR.
178
179-mtime may be given floating point (i.e. 1.5 is a day and a half).
180-iskip/-idskip/-ipath/... etc are case-insensitive versions.
181
182If any letter in -newer/-older is upper case, "or equal" is
183inserted into the test.
184
185INLINE_LITERAL_TEXT
186	  exit(0);
187      }
188      $DOREP=1,             next if $arg eq '-dorep';   ## do repeats
189      $DO_SORT=1,           next if $arg eq '-sort';    ## sort files
190      $NOLINKS=1,           next if $arg eq '-nolinks'; ## no sym. links
191      $PREPEND_FILENAME=0,  next if $arg eq '-h';       ## no filename prefix
192      $REPORT_LINENUM=1,    next if $arg eq '-n';       ## show line numbers
193      $WHY=1,               next if $arg eq '-why';     ## tell why skipped
194      $XDEV=1,              next if $arg eq '-xdev';    ## don't leave F.S.
195      $all=1,$opt{'-all'}=1,next if $arg eq '-all';     ## don't skip *.Z, etc
196      $iflag='i',           next if $arg eq '-i';       ## ignore case
197      $norc=1,              next if $arg eq '-norc';    ## don't load rc file
198      $showrc=1,            next if $arg eq '-showrc';  ## show rc file
199      $underlineOK=1,       next if $arg eq '-u';       ## look through underln.
200      $words=1,             next if $arg eq '-w';       ## match "words" only
201      &strip                     if $arg eq '-strip';   ## dump this program
202      last                       if $arg eq '-e';
203      $DELAY=$1,            next if $arg =~ m/-delay(\d+)/;
204
205      $FIND_ONLY=1,         next if $arg =~/^-f(ind)?$/;## do "find" only
206
207      $FIND_ONLY=1, $DOREP=1, $all=1,
208                            next if $arg =~/^-ff(ind)?$/;## fast -find
209      $LIST_ONLY=1,$opt{'-list'}=1,
210		            next if $arg =~/^-l(ist)?$/;## only list files
211
212      if ($arg =~ m/^-(v+)$/) { ## verbosity
213	$VERBOSE =length($1);
214	foreach $len (1..$VERBOSE) { $opt{'-'.('v' x $len)}=1 }
215	next;
216      }
217      if ($arg =~ m/^-(n+)ice$/) { ## "nice" output
218        $NICE =length($1);
219	foreach $len (1..$NICE) { $opt{'-'.('n' x $len).'ice'}=1 }
220	next;
221      }
222
223      if ($arg =~ m/^-(i?)(d?)skip$/) {
224	  local($i) = $1 eq 'i';
225	  local($d) = $2 eq 'd';
226	  $! = 2, die qq/$0: expecting glob arg to -$arg\n/ unless @ARGV;
227	  foreach (split(/\s+/, shift @ARGV)) {
228	      if ($d) {
229		  $idskip{$_}=1 if $i;
230		   $dskip{$_}=1;
231	      } else {
232		  $iskip{$_}=1 if $i;
233		   $skip{$_}=1;
234	      }
235	  }
236	  next;
237      }
238
239
240      if ($arg =~ m/^-(i?)(d?)(regex|path|name)$/) {
241	  local($i) = $1 eq 'i';
242	  $! = 2, die qq/$0: expecting arg to -$arg\n/ unless @ARGV;
243	  foreach (split(/\s+/, shift @ARGV)) {
244	      $iname{join(',', $arg, $_)}=1 if $i;
245	       $name{join(',', $arg, $_)}=1;
246	  }
247	  next;
248      }
249
250      if ($arg =~ m/^-d?dir$/) {
251	  $opt{'-dir'}=1;
252	  $! = 2, die qq/$0: expecting filename arg to -$arg\n/ unless @ARGV;
253	  $start = shift(@ARGV);
254	  $start =~ s#^~(/+|$)#$ENV{'HOME'}$1# if defined $ENV{'HOME'};
255	  $! = 2, die qq/$0: can't find ${arg}'s "$start"\n/ unless -e $start;
256	  $! = 2, die qq/$0: ${arg}'s "$start" not a directory.\n/ unless -d _;
257	  undef(@todo), $opt{'-ddir'}=1 if $arg eq '-ddir';
258	  push(@todo, $start);
259	  next;
260      }
261
262      if ($arg =~ m/^-(new|old)er$/i) {
263	  $! = 2, die "$0: expecting filename arg to -$arg\n" unless @ARGV;
264	  local($file, $time) = shift(@ARGV);
265	  $! = 2, die qq/$0: can't stat -${arg}'s "$file"./
266		  unless $time = (stat($file))[$STAT_MTIME];
267	  local($upper) = $arg =~ tr/A-Z//;
268	  if ($arg =~ m/new/i) {
269	     $time++ unless $upper;
270	     $NEWER = $time if $NEWER < $time;
271	  } else {
272	     $time-- unless $upper;
273	     $OLDER = $time if $OLDER == 0 || $OLDER > $time;
274	  }
275	  next;
276      }
277
278      if ($arg =~ m/-mtime/) {
279	  $! = 2, die "$0: expecting numerical arg to -$arg\n" unless @ARGV;
280	  local($days) = shift(@ARGV);
281	  $! = 2, die qq/$0: inappropriate arg ($days) to $arg\n/ if $days==0;
282	  $days *= 3600 * 24;
283	  if ($days < 0) {
284	      local($time) = $^T + $days;
285	      $NEWER = $time if $NEWER < $time;
286	  } else {
287	      local($time) = $^T - $days;
288  	      $OLDER = $time if $OLDER == 0 || $OLDER > $time;
289	  }
290	  next;
291      }
292
293      ## special user options
294      if ($arg =~ m/^-x(.+)/) {
295	  foreach (split(/[\s,]+/, $1)) {  $user_opt{$_} = $opt{$_}= 1;  }
296	  next;
297      }
298
299      $! = 2, die "$0: unknown arg [$arg]\n";
300  }
301}
302
303##
304## Given a filename glob, return a regex.
305## If the glob has no globbing chars (no * ? or [..]), then
306## prepend an effective '*' to it.
307##
308sub glob_to_regex
309{
310    local($glob) = @_;
311    local(@parts) = $glob =~ m/\\.|[*?]|\[]?[^]]*]|[^[\\*?]+/g;
312    local($trueglob)=0;
313    foreach (@parts) {
314	if ($_ eq '*' || $_ eq '?') {
315	    $_ = ".$_";
316	    $trueglob=1;  ## * and ? are a real glob
317	} elsif (substr($_, 0, 1) eq '[') {
318	    $trueglob=1;  ## [..] is a real glob
319	} else {
320	    s/^\\//;     ## remove any leading backslash;
321	    s/\W/\\$&/g; ## now quote anything dangerous;
322	}
323    }
324    unshift(@parts, '.*') unless $trueglob;
325    join('', '^', @parts, '$');
326}
327
328sub prepare_to_search
329{
330  local($rc_file) = @_;
331
332  $HEADER_BYTES=0;          ## Might be set nonzero in &read_rc;
333  $last_message_length = 0; ## For &message and &clear_message.
334
335  &read_rc($rc_file, $showrc) unless $norc;
336  exit(0) if $showrc;
337
338  $NEXT_DIR_ENTRY = $DO_SORT ? 'shift @files' : 'readdir(DIR)';
339  $WHY = 1 if $VERBOSE > 3; ## Arg -vvvv or above implies  -why.
340  @todo = ('.') if @todo == 0; ## Where we'll start looking
341
342  ## see if any user options were specified that weren't accounted for
343  foreach $opt (keys %user_opt) {
344      next if defined $seen_opt{$opt};
345      warn "warning: -x$opt never considered.\n";
346  }
347
348  die "$0: multiple time constraints exclude all possible files.\n"
349      if ($NEWER && $OLDER) && ($NEWER > $OLDER);
350
351  ##
352  ## Process any -skip/-iskip args that had been given
353  ##
354  local(@skip_test);
355  foreach $glob (keys %skip) {
356      $i = defined($iskip{$glob}) ? 'i': '';
357      push(@skip_test, '$name =~ m/'. &glob_to_regex($glob). "/$i");
358  }
359  if (@skip_test) {
360      $SKIP_TEST = join('||',@skip_test);
361      $DO_SKIP_TEST = 1;
362  } else {
363      $DO_SKIP_TEST = $SKIP_TEST = 0;
364  }
365
366  ##
367  ## Process any -dskip/-idskip args that had been given
368  ##
369  local(@dskip_test);
370  foreach $glob (keys %dskip) {
371      $i = defined($idskip{$glob}) ? 'i': '';
372      push(@dskip_test, '$name =~ m/'. &glob_to_regex($glob). "/$i");
373  }
374  if (@dskip_test) {
375      $DSKIP_TEST = join('||',@dskip_test);
376      $DO_DSKIP_TEST = 1;
377  } else {
378      $DO_DSKIP_TEST = $DSKIP_TEST = 0;
379  }
380
381
382  ##
383  ## Process any -name, -path, -regex, etc. args that had been given.
384  ##
385  undef @name_test;
386  undef @dname_test;
387  foreach $key (keys %name) {
388      local($type, $pat) = split(/,/, $key, 2);
389      local($i) = defined($iname{$key}) ? 'i' : '';
390      if ($type =~ /regex/) {
391	  $pat =~ s/!/\\!/g;
392	  $test = "\$name =~ m!^$pat\$!$i";
393      } else {
394	  local($var) = $type eq 'name' ? '$name' : '$file';
395	  $test = "$var =~ m/". &glob_to_regex($pat). "/$i";
396      }
397      if ($type =~ m/^-i?d/) {
398	  push(@dname_test, $test);
399      } else {
400	  push(@name_test, $test);
401      }
402  }
403  if (@name_test) {
404      $GLOB_TESTS = join('||', @name_test);
405
406      $DO_GLOB_TESTS = 1;
407  } else {
408      $GLOB_TESTS = $DO_GLOB_TESTS = 0;
409  }
410  if (@dname_test) {
411      $DGLOB_TESTS = join('||', @dname_test);
412      $DO_DGLOB_TESTS = 1;
413  } else {
414      $DGLOB_TESTS = $DO_DGLOB_TESTS = 0;
415  }
416
417
418  ##
419  ## Process any 'magic' things from the startup file.
420  ##
421  if (@magic_tests && $HEADER_BYTES) {
422      ## the $magic' one is for when &dodir is not inlined
423      $tests = join('||',@magic_tests);
424      $MAGIC_TESTS = " { package magic; \$val = ($tests) }";
425      $DO_MAGIC_TESTS = 1;
426  } else {
427      $MAGIC_TESTS = 1;
428      $DO_MAGIC_TESTS = 0;
429  }
430
431  ##
432  ## Prepare regular expressions.
433  ##
434  {
435      local(@regex_tests);
436
437      if ($LIST_ONLY) {
438	 $mflag = '';
439	 ## need to have $* set, but perl5 just won''t shut up about it.
440	 if ($] >= 5) {
441	      $mflag = 'm';
442	 } else {
443	      eval ' $* = 1 ';
444	 }
445      }
446
447      ##
448      ## Until I figure out a better way to deal with it,
449      ## We have to worry about a regex like [^xyz] when doing $LIST_ONLY.
450      ## Such a regex *will* match \n, and if I'm pulling in multiple
451      ## lines, it can allow lines to match that would otherwise not match.
452      ##
453      ## Therefore, if there is a '[^' in a regex, we can NOT take a chance
454      ## an use the fast listonly.
455      ##
456      $CAN_USE_FAST_LISTONLY = $LIST_ONLY;
457
458      local(@extra);
459      local($underline_glue) = ($] >= 5) ? '(:?_\cH)?' : '(_\cH)?';
460      while (@ARGV) {
461          $regex = shift(@ARGV);
462	  ##
463	  ## If watching for underlined things too, add another regex.
464	  ##
465	  if ($underlineOK) {
466	     if ($regex =~ m/[?*+{}()\\.|^\$[]/) {
467		warn "$0: warning, can't underline-safe '$regex'.\n";
468	     } else {
469		$regex = join($underline_glue, split(//, $regex));
470	     }
471	  }
472
473	  ## If nothing special in the regex, just use index...
474	  ## is quite a bit faster.
475	  if (($iflag eq '') && ($words == 0) &&
476			$regex !~ m/[?*+{}()\\.|^\$[]/)
477	  {
478	      push(@regex_tests, "(index(\$_, q+$regex+)>=0)");
479
480	  } else {
481	      $regex =~ s#[\$\@\/]\w#\\$&#;
482	      if ($words) {
483		  if ($regex =~ m/\|/) {
484		      ## could be dangerous -- see if we can wrap in parens.
485		      if ($regex =~ m/\\\d/) {
486			  warn "warning: -w and a | in a regex is dangerous.\n"
487		      } else {
488			  $regex = join($regex, '(', ')');
489		      }
490		  }
491		  $regex = join($regex, '\b', '\b');
492	      }
493	      $CAN_USE_FAST_LISTONLY = 0 if substr($regex, "[^") >= 0;
494	      push(@regex_tests, "m/$regex/$iflag$mflag");
495	  }
496
497	  ## If we're done, but still have @extra to do, get set for that.
498	  if (@ARGV == 0 && @extra) {
499	      @ARGV = @extra;   ## now deal with the extra stuff.
500	      $underlineOK = 0; ## but no more of this.
501	      undef @extra;     ## or this.
502	  }
503      }
504      if (@regex_tests) {
505	  $REGEX_TEST = join('||', @regex_tests);
506	  ## print STDERR $REGEX_TEST, "\n"; exit;
507      } else {
508	  ## must be doing -find -- just give something syntactically correct.
509	  $REGEX_TEST = 1;
510      }
511  }
512
513  ##
514  ## Make sure we can read the first item(s).
515  ##
516  foreach $start (@todo) {
517      $! = 2, die qq/$0: can't stat "$start"\n/
518	  unless ($dev,$inode) = (stat($start))[$STAT_DEV,$STAT_INODE];
519
520      if (defined $dir_done{"$dev,$inode"}) {
521	  ## ignore the repeat.
522	  warn(qq/ignoring "$start" (same as "$dir_done{"$dev,$inode"}").\n/)
523		if $VERBOSE;
524	  next;
525      }
526
527      ## if -xdev was given, remember the device.
528      $xdev{$dev} = 1 if $XDEV;
529
530      ## Note that we won't want to do it again
531      $dir_done{"$dev,$inode"} = $start;
532  }
533}
534
535
536##
537## See the comment above the __END__ above the 'sub dodir' below.
538##
539sub import_program
540{
541    sub bad {
542	print STDERR "$0: internal error (@_)\n";
543	exit 2;
544    }
545
546    ## Read from data, up to next __END__. This will be &dodir.
547    local($/) = "\n__END__";
548    $prog = <DATA>;
549    close(DATA);
550
551    $prog =~ s/\beval\b//g;       ## remove any 'eval'
552
553    ## Inline uppercase $-variables by their current values.
554    if ($] >= 5) {
555	$prog =~ s/\$([A-Z][A-Z0-9_]{2,}\b)/
556		    &bad($1) if !defined ${$main::{$1}}; ${$main::{$1}};/eg;
557    } else {
558	$prog =~ s/\$([A-Z][A-Z0-9_]{2,}\b)/local(*VAR) = $_main{$1};
559		    &bad($1) if !defined $VAR; $VAR;/eg;
560    }
561
562    eval $prog;  ## now do it. This will define &dodir;
563    $!=2, die "$0 internal error: $@\n" if $@;
564}
565
566###########################################################################
567
568##
569## Read the .search file:
570##    Blank lines and lines that are only #-comments ignored.
571##    Newlines may be escaped to create long lines
572##    Other lines are directives.
573##
574##    A directive may begin with an optional tag in the form <...>
575##    Things inside the <...> are evaluated as with:
576##	   <(this || that) && must>
577##    will be true if
578##       -xmust -xthis   or   -xmust -xthat
579##    were specified on the command line (order doesn't matter, though)
580##    A directive is not done if there is a tag and it's false.
581##    Any characters but whitespace and &|()>,! may appear after an -x
582##    (although "-xdev" is special).  -xmust,this is the same as -xmust -xthis.
583##    Something like -x~ would make <~> true, and <!~> false.
584##
585##    Directives are in the form:
586##      option: STRING
587##	magic : NUMBYTES : EXPR
588##
589##    With option:
590##      The STRING is parsed like a Bourne shell command line, and the
591##      options are used as if given on the command line.
592##      No comments are allowed on 'option' lines.
593##	Examples:
594##	    # skip objects and libraries
595##	    option: -skip '.o .a'
596##	    # skip emacs *~ and *# files, unless -x~ given:
597##	    <!~> option: -skip '~ #'
598##
599##    With magic:
600##	EXPR can be pretty much any perl (comments allowed!).
601##      If it evaluates to true for any particular file, it is skipped.
602##      The only info you'll have about a file is the variable $H, which
603##      will have at least the first NUMBYTES of the file (less if the file
604##      is shorter than that, of course, and maybe more). You'll also have
605##      any variables you set in previous 'magic' lines.
606##	Examples:
607##	    magic: 6 : ($x6 = substr($H, 0, 6)) eq 'GIF87a'
608##	    magic: 6 :  $x6                     eq 'GIF89a'
609##
610##          magic: 6 : (($x6 = substr($H, 0, 6)) eq 'GIF87a' ## old gif \
611##		                         || $x6  eq 'GIF89a' ## new gif
612##	(the above two sets are the same)
613##	    ## Check the first 32 bytes for "binarish" looking bytes.
614##	    ## Don't blindly dump on any high-bit set, as non-ASCII text
615##	    ## often has them set. \x80 and \xff seem to be special, though.
616##	    ## Require two in a row to not get things like perl's $^T.
617##	    ## This is known to get *.Z, *.gz, pkzip, *.elc and about any
618##	    ## executable you'll find.
619##	    magic: 32 : $H =~ m/[\x00-\x06\x10-\x1a\x1c-\x1f\x80\xff]{2}/
620##
621sub read_rc
622{
623    local($file, $show) = @_;
624    local($line_num, $ln, $tag) = 0;
625    local($use_default, @default) = 0;
626
627    { package magic; $^W= 0; } ## turn off warnings for when we run EXPR's
628
629    unless (open(RC, '<', $file)) {
630	$use_default=1;
631	$file = "<internal default startup file>";
632	## no RC file -- use this default.
633	@default = split(/\n/,<<'--------INLINE_LITERAL_TEXT');
634            magic: 32 : $H =~ m/[\x00-\x06\x10-\x1a\x1c-\x1f\x80\xff]{2}/
635	    option: -skip '.a .elc .gz .o .pbm .xbm .dvi'
636	    option: -iskip '.com .exe .lib .pdb .tarz .zip .z .lzh .jpg .jpeg .gif .uu'
637	    <!~> option: -skip '~ #'
638--------INLINE_LITERAL_TEXT
639    }
640
641    ##
642    ## Make an eval error pretty.
643    ##
644    sub clean_eval_error {
645	local($_) = @_;
646	s/ in file \(eval\) at line \d+,//g; ## perl4-style error
647	s/ at \(eval \d+\) line \d+,//g;     ## perl5-style error
648	$_ = $` if m/\n/;                    ## remove all but first line
649	"$_\n";
650    }
651
652    print "reading RC file: $file\n" if $show;
653
654    while (defined($_ = ($use_default ? shift(@default) : <RC>))) {
655	$ln = ++$line_num;			     ## note starting line num.
656        $_ .= <RC>, $line_num++ while s/\\\n?$/\n/;  ## allow continuations
657	next if /^\s*(#.*)?$/;          ## skip blank or comment-only lines.
658        $do = '';
659
660	## look for an initial <...> tag.
661	if (s/^\s*<([^>]*)>//) {
662	    ## This simple s// will make the tag ready to eval.
663	    ($tag = $msg = $1) =~
664		s/[^\s&|(!)]+/
665			$seen_opt{$&}=1;         ## note seen option
666			"defined(\$opt{q>$&>})"  ## (q>> is safe quoting here)
667		/eg;
668
669	    ## see if the tag is true or not, abort this line if not.
670	    $dothis = (eval $tag);
671	    $!=2, die "$file $ln <$msg>: $_".&clean_eval_error($@) if $@;
672
673	    if ($show) {
674	        $msg =~ s/[^\s&|(!)]+/-x$&/;
675	        $msg =~ s/\s*!\s*/ no /g;
676	        $msg =~ s/\s*&&\s*/ and /g;
677	        $msg =~ s/\s*\|\|\s*/ or /g;
678		$msg =~ s/^\s+//; $msg =~ s/\s+$//;
679		$do = $dothis ? "(doing because $msg)" :
680				"(do if $msg)";
681	    } elsif (!$dothis) {
682	        next;
683	    }
684	}
685
686	if (m/^\s*option\s*:\s*/) {
687	    next if $all && !$show; ## -all turns off these checks;
688	    local($_) = $';
689            s/\n$//;
690	    local($orig) = $_;
691	    print " $do option: $_\n" if $show;
692	    local($0) = "$0 ($file)"; ## for any error message.
693	    local(@ARGV);
694	    local($this);
695	    ##
696	    ## Parse $_ as a Bourne shell line -- fill @ARGV
697	    ##
698	    while (length) {
699		if (s/^\s+//) {
700		    push(@ARGV, $this) if defined $this;
701		    undef $this;
702		    next;
703		}
704		$this = '' if !defined $this;
705		$this .= $1 while s/^'([^']*)'// ||
706				  s/^"([^"]*)"// ||
707				  s/^([^'"\s\\]+)//||
708				  s/^(\\[\D\d])//;
709		die "$file $ln: error parsing $orig at $_\n" if m/^\S/;
710	    }
711	    push(@ARGV, $this) if defined $this;
712	    &check_args;
713	    die qq/$file $ln: unused arg "@ARGV".\n/ if @ARGV;
714	    next;
715	}
716
717	if (m/^\s*magic\s*:\s*(\d+)\s*:\s*/) {
718	    next if $all && !$show; ## -all turns off these checks;
719	    local($bytes, $check) = ($1, $');
720
721	    if ($show) {
722		$check =~ s/\n?$/\n/;
723		print " $do contents: $check";
724	    }
725	    ## Check to make sure the thing at least compiles.
726	    eval  "package magic; (\$H = '1'x \$main'bytes) && (\n$check\n)\n";
727	    $! = 2, die "$file $ln: ".&clean_eval_error($@) if $@;
728
729	    $HEADER_BYTES = $bytes if $bytes > $HEADER_BYTES;
730	    push(@magic_tests, "(\n$check\n)");
731	    next;
732	}
733	$! = 2, die "$file $ln: unknown command\n";
734    }
735    close(RC);
736}
737
738sub message
739{
740    if (!$STDERR_IS_TTY) {
741	print STDERR $_[0], "\n";
742    } else {
743	local($text) = @_;
744	$thislength = length($text);
745	if ($thislength >= $last_message_length) {
746	    print STDERR $text, "\r";
747	} else {
748	    print STDERR $text, ' 'x ($last_message_length-$thislength),"\r";
749	}
750	$last_message_length = $thislength;
751    }
752}
753
754sub clear_message
755{
756    print STDERR ' ' x $last_message_length, "\r" if $last_message_length;
757    $vv_print = $vv_size = $last_message_length = 0;
758}
759
760##
761## Output a copy of this program with comments, extra whitespace, and
762## the trailing man page removed. On an ultra slow machine, such a copy
763## might load faster (but I can't tell any difference on my machine).
764##
765sub strip {
766    seek(DATA, 0, 0) || die "$0: can't reset internal pointer.\n";
767    while(<DATA>) {
768      print, next if /INLINE_LITERAL_TEXT/.../INLINE_LITERAL_TEXT/;
769      ## must mention INLINE_LITERAL_TEXT on this line!
770      s/\#\#.*|^\s+|\s+$//; ## remove cruft
771      last if $_ eq '.00;';
772      next if ($_ eq '') || ($_ eq "'di'") || ($_ eq "'ig00'");
773      s/\$stripped=0;/\$stripped=1;/;
774      s/\s\s+/ /;  ## squish multiple whitespaces down to one.
775      print $_, "\n";
776    }
777    exit(0);
778}
779
780##
781## Just to shut up -w. Never executed.
782##
783sub dummy {
784
785    1 || &dummy || &dir_done || &bad || &message || $NEXT_DIR_ENTRY ||
786    $DELAY || $VV_SIZE || $VV_PRINT_COUNT || $STDERR_SCREWS_STDOUT ||
787    @files || @files || $magic'H || $magic'H || $xdev{''} || &clear_message;
788
789}
790
791##
792## If the following __END__ is in place, what follows will be
793## inlined when the program first starts up. Any $ variable name
794## all in upper case, specifically, any string matching
795##	\$([A-Z][A-Z0-9_]{2,}\b
796## will have the true value for that variable inlined. Also, any 'eval' is
797## removed
798##
799## The idea is that when the whole thing is then eval'ed to define &dodir,
800## the perl optimizer will make all the decisions that are based upon
801## command-line options (such as $VERBOSE), since they'll be inlined as
802## constants
803##
804## Also, and here's the big win, the tests for matching the regex, and a
805## few others, are all inlined. Should be blinding speed here.
806##
807## See the read from <DATA> above for where all this takes place.
808## But all-in-all, you *want* the __END__ here. Comment it out only for
809## debugging....
810##
811
812__END__
813
814##
815## Given a directory, check all "appropriate" files in it.
816## Shove any subdirectories into the global @todo, so they'll be done
817## later.
818##
819## Be careful about adding any upper-case variables, as they are subject
820## to being inlined. See comments above the __END__ above.
821##
822sub dodir
823{
824  local($dir) = @_;
825  $dir =~ s,/+$,,; ## remove any trailing slash.
826  unless (opendir(DIR, "$dir/.")) {
827      &clear_message if $VERBOSE && $STDERR_SCREWS_STDOUT;
828      warn qq($0: can't opendir "$dir/".\n);
829      return;
830  }
831
832  if ($VERBOSE) {
833      &message($dir);
834      $vv_print = $vv_size = 0;
835  }
836
837  @files = sort readdir(DIR) if $DO_SORT;
838
839  while (defined($name = eval $NEXT_DIR_ENTRY))
840  {
841    next if $name eq '.' || $name eq '..'; ## never follow these.
842
843    ## create full relative pathname.
844    $file = $dir eq '.' ? $name : "$dir/$name";
845
846    ## if link and skipping them, do so.
847    if ($NOLINKS && -l $file) {
848	warn qq/skip (symlink): $file\n/ if $WHY;
849	next;
850    }
851
852    ## skip things unless files or directories
853    unless (-f $file || -d _) {
854	if ($WHY) {
855	    $why = (-S _ && "socket")       ||
856		   (-p _ && "pipe")         ||
857		   (-b _ && "block special")||
858		   (-c _ && "char special") || "somekinda special";
859	    warn qq/skip ($why): $file\n/;
860	}
861	next;
862    }
863
864    ## skip things we can't read
865    unless (-r _) {
866	if ($WHY) {
867	    $why = (-l $file) ? "follow" : "read";
868	    warn qq/skip (can't $why): $file\n/;
869	}
870	next;
871    }
872
873    ## skip things that are empty
874    unless (-s _ || -d _) {
875	warn qq/skip (empty): $file\n/ if $WHY;
876	next;
877    }
878
879    ## Note file device & inode. If -xdev, skip if appropriate.
880    ($dev, $inode) = (stat(_))[$STAT_DEV, $STAT_INODE];
881    if ($XDEV && defined $xdev{$dev}) {
882	warn qq/skip (other device): $file\n/ if $WHY;
883	next;
884    }
885    $id = "$dev,$inode";
886
887    ## special work for a directory
888    if (-d _) {
889	## Do checks for directory file endings.
890	if ($DO_DSKIP_TEST && (eval $DSKIP_TEST)) {
891	    warn qq/skip (-dskip): $file\n/ if $WHY;
892	    next;
893	}
894	## do checks for -name/-regex/-path tests
895	if ($DO_DGLOB_TESTS && !(eval $DGLOB_TESTS)) {
896	    warn qq/skip (dirname): $file\n/ if $WHY;
897	    next;
898	}
899
900	## _never_ redo a directory
901	if (defined $dir_done{$id} and $^O ne 'MSWin32') {
902	    warn qq/skip (did as "$dir_done{$id}"): $file\n/ if $WHY;
903	    next;
904	}
905	$dir_done{$id} = $file;     ## mark it done.
906	unshift(@todo, $file);	    ## add to the list to do.
907	next;
908    }
909    if ($WHY == 0  && $VERBOSE > 1) {
910      if ($VERBOSE>2||$vv_print++>$VV_PRINT_COUNT||($vv_size+=-s _)>$VV_SIZE){
911	  &message($file);
912	  $vv_print = $vv_size = 0;
913      }
914    }
915
916    ## do time-related tests
917    if ($NEWER || $OLDER) {
918	$_ = (stat(_))[$STAT_MTIME];
919	if ($NEWER && $_ < $NEWER) {
920	    warn qq/skip (too old): $file\n/ if $WHY;
921	    next;
922	}
923	if ($OLDER && $_ > $OLDER) {
924	    warn qq/skip (too new): $file\n/ if $WHY;
925	    next;
926	}
927    }
928
929    ## do checks for file endings
930    if ($DO_SKIP_TEST && (eval $SKIP_TEST)) {
931	warn qq/skip (-skip): $file\n/ if $WHY;
932	next;
933    }
934
935    ## do checks for -name/-regex/-path tests
936    if ($DO_GLOB_TESTS && !(eval $GLOB_TESTS)) {
937	warn qq/skip (filename): $file\n/ if $WHY;
938	next;
939    }
940
941
942    ## If we're not repeating files,
943    ##	skip this one if we've done it, or note we're doing it.
944    unless ($DOREP) {
945	if (defined $file_done{$id}) {
946	    warn qq/skip (did as "$file_done{$id}"): $file\n/ if $WHY;
947	    next;
948	}
949	$file_done{$id} = $file;
950    }
951
952    if ($DO_MAGIC_TESTS) {
953	if (!open(FILE_IN, '<', $file)) {
954	    &clear_message if $VERBOSE && $STDERR_SCREWS_STDOUT;
955	    warn qq/$0: can't open: $file\n/;
956	    next;
957	}
958	unless (read(FILE_IN, $magic'H, $HEADER_BYTES)) {
959	    &clear_message if $VERBOSE && $STDERR_SCREWS_STDOUT;
960	    warn qq/$0: can't read from "$file"\n"/;
961	    close(FILE_IN);
962	    next;
963	}
964
965	eval $MAGIC_TESTS;
966	if ($magic'val) {
967	    close(FILE_IN);
968	    warn qq/skip (magic): $file\n/ if $WHY;
969	    next;
970	}
971	seek(FILE_IN, 0, 0);  ## reset for later <FILE_IN>
972    }
973
974    if ($WHY != 0  && $VERBOSE > 1) {
975      if ($VERBOSE>2||$vv_print++>$VV_PRINT_COUNT||($vv_size+=-s _)>$VV_SIZE){
976	  &message($file);
977	  $vv_print = $vv_size = 0;
978      }
979    }
980
981    if ($DELAY) {
982	sleep($DELAY);
983    }
984
985    if ($FIND_ONLY) {
986	&clear_message if $VERBOSE && $STDERR_SCREWS_STDOUT;
987	print $file, "\n";
988	$retval=0; ## we've found something
989	close(FILE_IN) if $DO_MAGIC_TESTS;
990	next;
991    } else {
992	## if we weren't doing magic tests, file won't be open yet...
993	if (!$DO_MAGIC_TESTS && !open(FILE_IN, '<', $file)) {
994	    &clear_message if $VERBOSE && $STDERR_SCREWS_STDOUT;
995	    warn qq/$0: can't open: $file\n/;
996	    next;
997	}
998	if ($LIST_ONLY && $CAN_USE_FAST_LISTONLY) {
999	    ##
1000	    ## This is rather complex, but buys us a LOT when we're just
1001	    ## listing files and not the individual internal lines.
1002	    ##
1003	    local($size) = 4096;  ## block-size in which to do reads
1004	    local($nl);           ## will point to $_'s ending newline.
1005	    local($read);	  ## will be how many bytes read.
1006	    local($_) = '';       ## Starts out empty
1007	    local($hold);	  ## (see below)
1008
1009	    while (($read = read(FILE_IN,$_,$size,length($_)))||length($_))
1010	    {
1011		undef @parts;
1012		## if read a full block, but no newline, need to read more.
1013		while ($read == $size && ($nl = rindex($_, "\n")) < 0) {
1014		    push(@parts, $_);                    ## save that part
1015		    $read = read(FILE_IN, $_, $size); ## keep trying
1016		}
1017
1018		##
1019		## If we had to save parts, must now combine them together.
1020		## adjusting $nl to reflect the now-larger $_. This should
1021		## be a lot more efficient than using any kind of .= in the
1022		## loop above.
1023		##
1024		if (@parts) {
1025		    local($lastlen) = length($_); #only need if $nl >= 0
1026		    $_ = join('', @parts, $_);
1027		    $nl = length($_) - ($lastlen - $nl) if $nl >= 0;
1028		}
1029
1030		##
1031		## If we're at the end of the file, then we can use $_ as
1032		## is.  Otherwise, we need to remove the final partial-line
1033		## and save it so that it'll be at the beginning of the
1034		## next read (where the rest of the line will be layed in
1035		## right after it).  $hold will be what we should save
1036		## until next time.
1037		##
1038		if ($read != $size || $nl < 0) {
1039		    $hold = '';
1040		} else {
1041		    $hold = substr($_, $nl + 1);
1042		    substr($_, $nl + 1) = '';
1043		}
1044
1045		##
1046		## Now have a bunch of full lines in $_. Use it.
1047		##
1048		if (eval $REGEX_TEST) {
1049		    &clear_message if $VERBOSE && $STDERR_SCREWS_STDOUT;
1050		    print $file, "\n";
1051		    $retval=0; ## we've found something
1052
1053		    last;
1054		}
1055
1056		## Prepare for next read....
1057		$_ = $hold;
1058	    }
1059
1060	} else {  ## else not using faster block scanning.....
1061
1062            $lines_printed = 0 if $NICE;
1063	    while (<FILE_IN>) {
1064		study;
1065		next unless (eval $REGEX_TEST);
1066
1067		##
1068		## We found a matching line.
1069		##
1070		$retval=0;
1071		&clear_message if $VERBOSE && $STDERR_SCREWS_STDOUT;
1072		if ($LIST_ONLY) {
1073		    print $file, "\n";
1074		    last;
1075		} else {
1076		    ## prepare to print line.
1077		    if ($NICE && $lines_printed++ == 0) {
1078			print '-' x 70, "\n" if $NICE > 1;
1079			print $file, ":\n";
1080		    }
1081
1082		    ##
1083		    ## Print all the prelim stuff. This looks less efficient
1084		    ## than it needs to be, but that's so that when the eval
1085		    ## is compiled (and the tests are optimized away), the
1086		    ## result will be less actual PRINTs than the more natural
1087		    ## way of doing these tests....
1088		    ##
1089		    if ($NICE) {
1090			if ($REPORT_LINENUM) {
1091			    print " line $.:  ";
1092			} else {
1093			    print "  ";
1094			}
1095		    } elsif ($REPORT_LINENUM && $PREPEND_FILENAME) {
1096			print "$file,:$.: ";
1097		    } elsif ($PREPEND_FILENAME) {
1098			print "$file: ";
1099		    } elsif ($REPORT_LINENUM) {
1100			print "$.: ";
1101		    }
1102		    print $_;
1103		    print "\n" unless m/\n$/;
1104		}
1105	    }
1106	    print "\n" if ($NICE > 1) && $lines_printed;
1107	}
1108	close(FILE_IN);
1109    }
1110  }
1111  closedir(DIR);
1112}
1113
1114__END__
1115.00;			## finish .ig
1116
1117'di			\" finish diversion--previous line must be blank
1118.nr nl 0-1		\" fake up transition to first page again
1119.nr % 0			\" start at page 1
1120.\"__________________NORMAL_MAN_PAGE_BELOW_________________
1121.ll+10n
1122.TH search 1 "Dec 17, 1994"
1123.SH SEARCH
1124search \- search files (a'la grep) in a whole directory tree.
1125.SH SYNOPSIS
1126search [ grep-like and find-like options] [regex ....]
1127.SH DESCRIPTION
1128.I Search
1129is more or less a combo of 'find' and 'grep' (although the regular
1130expression flavor is that of the perl being used, which is closer to
1131egrep's than grep's).
1132
1133.I Search
1134does generally the same kind of thing that
1135.nf
1136   find <blah blah> | xargs egrep <blah blah>
1137.fi
1138does, but is
1139.I much
1140more powerful and efficient (and intuitive, I think).
1141
1142This manual describes
1143.I search
1144as of version "941227.4".
1145
1146.SH "QUICK EXAMPLE"
1147Basic use is simple:
1148.nf
1149    % search jeff
1150.fi
1151will search files in the current directory, and all sub directories, for
1152files that have "jeff" in them. The lines will be listed with the
1153containing file's name prepended.
1154.PP
1155If you list more than one regex, such as with
1156.nf
1157    % search jeff Larry Randal+ 'Stoc?k' 'C.*son'
1158.fi
1159then a line containing any of the regexes will be listed.
1160This makes it effectively the same as
1161.nf
1162    % search 'jeff|Larry|Randal+|Stoc?k|C.*son'
1163.fi
1164However, listing them separately is much more efficient (and is easier
1165to type).
1166.PP
1167Note that in the case of these examples, the
1168.B \-w
1169(list whole-words only) option would be useful.
1170.PP
1171Normally, various kinds of files are automatically removed from consideration.
1172If it has a certain ending (such as ".tar", ".Z", ".o", .etc), or if
1173the beginning of the file looks like a binary, it'll be excluded.
1174You can control exactly how this works -- see below. One quick way to
1175override this is to use the
1176.B \-all
1177option, which means to consider all the files that would normally be
1178automatically excluded.
1179Or, if you're curious, you can use
1180.B \-why
1181to have notes about what files are skipped (and why) printed to stderr.
1182
1183.SH "BASIC OVERVIEW"
1184Normally, the search starts in the current directory, considering files in
1185all subdirectories.
1186
1187You can use the
1188.I ~/.search
1189file to control ways to automatically exclude files.
1190If you don't have this file, a default one will kick in, which automatically
1191add
1192.nf
1193    -skip .o .Z .gif
1194.fi
1195(among others) to exclude those kinds of files (which you probably want to
1196skip when searching for text, as is normal).
1197Files that look to be binary will also be excluded.
1198
1199Files ending with "#" and "~" will also be excluded unless the
1200.B -x~
1201option is given.
1202
1203You can use
1204.B -showrc
1205to show what kinds of files will normally be skipped.
1206See the section on the startup file
1207for more info.
1208
1209You can use the
1210.B -all
1211option to indicate you want to consider all files that would otherwise be
1212skipped by the startup file.
1213
1214Based upon various other flags (see "WHICH FILES TO CONSIDER" below),
1215more files might be removed from consideration. For example
1216.nf
1217    -mtime 3
1218.fi
1219will exclude files that aren't at least three days old (change the 3 to -3
1220to exclude files that are more than three days old), while
1221.nf
1222    -skip .*
1223.fi
1224would exclude any file beginning with a dot (of course, '.' and '..'  are
1225special and always excluded).
1226
1227If you'd like to see what files are being excluded, and why, you can get the
1228list via the
1229.B \-why
1230option.
1231
1232If a file makes it past all the checks, it is then "considered".
1233This usually means it is greped for the regular expressions you gave
1234on the command line.
1235
1236If any of the regexes match a line, the line is printed.
1237However, if
1238.B -list
1239is given, just the filename is printed. Or, if
1240.B -nice
1241is given, a somewhat more (human-)readable output is generated.
1242
1243If you're searching a huge tree and want to keep informed about how
1244the search is progressing,
1245.B -v
1246will print (to stderr) the current directory being searched.
1247Using
1248.B -vv
1249will also print the current file "every so often", which could be useful
1250if a directory is huge. Using
1251.B -vvv
1252will print the update with every file.
1253
1254Below is the full listing of options.
1255
1256.SH "OPTIONS TELLING *WHERE* TO SEARCH"
1257.TP
1258.BI -dir " DIR"
1259Start searching at the named directory instead of the current directory.
1260If multiple
1261.B -dir
1262arguments are given, multiple trees will be searched.
1263.TP
1264.BI -ddir " DIR"
1265Like
1266.B -dir
1267except it flushes any previous
1268.B -dir
1269directories (i.e. "-dir A -dir B -dir C" will search A, B, and C, while
1270"-dir A -ddir B -dir C" will search only B and C. This might be of use
1271in the startup file (see that section below).
1272.TP
1273.B -xdev
1274Stay on the same filesystem as the starting directory/directories.
1275.TP
1276.B -sort
1277Sort the items in a directory before processing them.
1278Normally they are processed in whatever order they happen to be read from
1279the directory.
1280.TP
1281.B -nolinks
1282Don't follow symbolic links. Normally they're followed.
1283
1284.SH "OPTIONS CONTROLLING WHICH FILES TO CONSIDER AND EXCLUDE"
1285.TP
1286.BI -mtime " NUM"
1287Only consider files that were last changed more than
1288.I NUM
1289days ago
1290(less than
1291.I NUM
1292days if
1293.I NUM
1294has '-' prepended, i.e. "-mtime -2.5" means to consider files that
1295have been changed in the last two and a half days).
1296.TP
1297.B -older FILE
1298Only consider files that have not changed since
1299.I FILE
1300was last changed.
1301If there is any upper case in the "-older", "or equal" is added to the sense
1302of the test.  Therefore, "search -older ./file regex" will never consider
1303"./file", while "search -Older ./file regex" will.
1304
1305If a file is a symbolic link, the time used is that of the file and not the
1306link.
1307.TP
1308.BI -newer " FILE"
1309Opposite of
1310.BR -older .
1311.TP
1312.BI -name " GLOB"
1313Only consider files that match the shell filename pattern
1314.IR GLOB .
1315The check is only done on a file's name (use
1316.B -path
1317to check the whole path, and use
1318.B -dname
1319to check directory names).
1320
1321Multiple specifications can be given by separating them with spaces, a'la
1322.nf
1323    -name '*.c *.h'
1324.fi
1325to consider C source and header files.
1326If
1327.I GLOB
1328doesn't contain any special pattern characters, a '*' is prepended.
1329This last example could have been given as
1330.nf
1331   -name '.c .h'
1332.fi
1333It could also be given as
1334.nf
1335    -name .c -name .h
1336.fi
1337or
1338.nf
1339    -name '*.c' -name '*.h'
1340.fi
1341or
1342.nf
1343    -name '*.[ch]'
1344.fi
1345(among others)
1346but in this last case, you have to be sure to supply the leading '*'.
1347.TP
1348.BI -path " GLOB"
1349Like
1350.B -name
1351except the entire path is checked against the pattern.
1352.TP
1353.B -regex " REGEX"
1354Considers files whose names (not paths) match the given perl regex
1355exactly.
1356.TP
1357.BI -iname " GLOB"
1358Case-insensitive version of
1359.BR -name .
1360.TP
1361.BI -ipath " GLOB"
1362Case-insensitive version of
1363.BR -path .
1364.TP
1365.BI -iregex " REGEX"
1366Case-insensitive version of
1367.BR -regex .
1368
1369.TP
1370.BI -dpath " GLOB"
1371Only search down directories whose path matches the given pattern (this
1372doesn't apply to the initial directory given by
1373.BI -dir ,
1374of course).
1375Something like
1376.nf
1377    -dir /usr/man -dpath /usr/man/man*
1378.fi
1379would completely skip
1380"/usr/man/cat1", "/usr/man/cat2", etc.
1381.TP
1382.BI -dskip " GLOB"
1383Skips directories whose name (not path) matches the given pattern.
1384Something like
1385.nf
1386    -dir /usr/man -dskip cat*
1387.fi
1388would completely skip any directory in the tree whose name begins with "cat"
1389(including "/usr/man/cat1", "/usr/man/cat2", etc.).
1390.TP
1391.BI -dregex " REGEX"
1392Like
1393.BI -dpath ,
1394but the pattern is a full perl regex. Note that this quite different
1395from
1396.B -regex
1397which considers only file names (not paths). This option considers
1398full directory paths (not just names). It's much more useful this way.
1399Sorry if it's confusing.
1400.TP
1401.BI -dpath " GLOB"
1402This option exists, but is probably not very useful. It probably wants to
1403be like the '-below' or something I mention in the "TODO" section.
1404.TP
1405.BI -idpath " GLOB"
1406Case-insensitive version of
1407.BR -dpath .
1408.TP
1409.BI -idskip " GLOB"
1410Case-insensitive version of
1411.BR -dskip .
1412.TP
1413.BI -idregex " REGEX"
1414Case-insensitive version of
1415.BR -dregex .
1416.TP
1417.B -all
1418Ignore any 'magic' or 'option' lines in the startup file.
1419The effect is that all files that would otherwise be automatically
1420excluded are considered.
1421.TP
1422.BI -x SPECIAL
1423Arguments starting with
1424.B -x
1425(except
1426.BR -xdev ,
1427explained elsewhere) do special interaction with the
1428.I ~/.search
1429startup file. Something like
1430.nf
1431	-xflag1 -xflag2
1432.fi
1433will turn on "flag1" and "flag2" in the startup file (and is
1434the same as "-xflag1,flag2"). You can use this to write your own
1435rules for what kinds of files are to be considered.
1436
1437For example, the internal-default startup file contains the line
1438.nf
1439	<!~> option: -skip '~ #'
1440.fi
1441This means that if the
1442.B -x~
1443flag is
1444.I not
1445seen, the option
1446.nf
1447    -skip '~ #'
1448.fi
1449should be done.
1450The effect is that emacs temp and backup files are not normally
1451considered, but you can included them with the -x~ flag.
1452
1453You can write your own rules to customize
1454.I search
1455in powerful ways. See the STARTUP FILE section below.
1456.TP
1457.B -why
1458Print a message (to stderr) when and why a file is not considered.
1459
1460.SH "OPTIONS TELLING WHAT TO DO WITH FILES THAT WILL BE CONSIDERED"
1461.TP
1462.B -find
1463(you can use
1464.B -f
1465as well).
1466This option changes the basic action of
1467.IR search .
1468
1469Normally, if a file is considered, it is searched
1470for the regular expressions as described earlier. However, if this option
1471is given, the filename is printed and no searching takes place. This turns
1472.I search
1473into a 'find' of some sorts.
1474
1475In this case, no regular expressions are needed on the command line
1476(any that are there are silently ignored).
1477
1478This is not intended to be a replacement for the 'find' program,
1479but to aid
1480you in understanding just what files are getting past the exclusion checks.
1481If you really want to use it as a sort of replacement for the 'find' program,
1482you might want to use
1483.B -all
1484so that it doesn't waste time checking to see if the file is binary, etc
1485(unless you really want that, of course).
1486
1487If you use
1488.BR -find ,
1489none of the "GREP-LIKE OPTIONS" (below) matter.
1490
1491As a replacement for 'find',
1492.I search
1493is probably a bit slower (or in the case of GNU find, a lot slower --
1494GNU find is
1495.I unbelievably
1496fast).
1497However, "search -ffind"
1498might be more useful than 'find' when options such as
1499.B -skip
1500are used (at least until 'find' gets such functionality).
1501.TP
1502.B -ffind
1503(or
1504.BR -ff )
1505A faster more 'find'-like find. Does
1506.nf
1507    -find  -all -dorep
1508.fi
1509.SH "GREP-LIKE OPTIONS"
1510These options control how a searched file is accessed,
1511and how things are printed.
1512.TP
1513.B -i
1514Ignore letter case when matching.
1515.TP
1516.B -w
1517Consider only whole-word matches ("whole word" as defined by perl's "\\b"
1518regex).
1519.TP
1520.B -u
1521If the regex(es) is/are simple, try to modify them so that they'll work
1522in manpage-like underlined text (i.e. like _^Ht_^Hh_^Hi_^Hs).
1523This is very rudimentary at the moment.
1524.TP
1525.B -list
1526(you can use
1527.B -l
1528too).
1529Don't print matching lines, but the names of files that contain matching
1530lines. This will likely be *much* faster, as special optimizations are
1531made -- particularly with large files.
1532.TP
1533.B -n
1534Pepfix each line by its line number.
1535.TP
1536.B -nice
1537Not a grep-like option, but similar to
1538.BR -list ,
1539so included here.
1540.B -nice
1541will have the output be a bit more human-readable, with matching lines printed
1542slightly indented after the filename, a'la
1543.nf
1544
1545   % search foo
1546   somedir/somefile: line with foo in it
1547   somedir/somefile: some food for thought
1548   anotherdir/x: don't be a buffoon!
1549   %
1550
1551.fi
1552will become
1553.nf
1554
1555   % search -nice foo
1556   somedir/somefile:
1557     line with foo in it
1558     some food for thought
1559   anotherdir/x:
1560     don't be a buffoon!
1561   %
1562
1563.fi
1564This option due to Lionel Cons.
1565.TP
1566.B -nnice
1567Be a bit nicer than
1568.BR -nice .
1569Prefix each file's output by a rule line, and follow with an extra blank line.
1570.TP
1571.B -h
1572Don't prepend each output line with the name of the file
1573(meaningless when
1574.B -find
1575or
1576.B -l
1577are given).
1578
1579.SH "OTHER OPTIONS"
1580.TP
1581.B -help
1582Print the usage information.
1583.TP
1584.B -version
1585Print the version information and quit.
1586.TP
1587.B -v
1588Set the level of message verbosity.
1589.B -v
1590will print a note whenever a new directory is entered.
1591.B -vv
1592will also print a note "every so often". This can be useful to see
1593what's happening when searching huge directories.
1594.B -vvv
1595will print a new with every file.
1596.B -vvvv
1597is
1598-vvv
1599plus
1600.BR -why .
1601.TP
1602.B -e
1603This ends the options, and can be useful if the regex begins with '-'.
1604.TP
1605.B -showrc
1606Shows what is being considered in the startup file, then exits.
1607.TP
1608.B -dorep
1609Normally, an identical file won't be checked twice (even with multiple
1610hard or symbolic links). If you're just trying to do a fast
1611.BR -find ,
1612the bookkeeping to remember which files have been seen is not desirable,
1613so you can eliminate the bookkeeping with this flag.
1614
1615.SH "STARTUP FILE"
1616When
1617.I search
1618starts up, it processes the directives in
1619.IR ~/.search .
1620If no such file exists, a default
1621internal version is used.
1622
1623The internal version looks like:
1624.nf
1625
1626   magic: 32 : $H =~ m/[\ex00-\ex06\ex10-\ex1a\ex1c-\ex1f\ex80\exff]{2}/
1627   option: -skip '.a .COM .elc .EXE .gz .o .pbm .xbm .dvi'
1628   option: -iskip '.tarz .zip .z .lzh .jpg .jpeg .gif .uu'
1629   <!~> option: -skip '~ #'
1630
1631.fi
1632If you wish to create your own "~/.search",
1633you might consider copying the above, and then working from there.
1634
1635There are two kinds of directives in a startup file: "magic" and "option".
1636.RS 0n
1637.TP
1638OPTION
1639Option lines will automatically do the command-line options given.
1640For example, the line
1641.nf
1642	option: -v
1643.fi
1644in you startup file will turn on -v every time, without needing to type it
1645on the command line.
1646
1647The text on the line after the "option:" directive is processed
1648like the Bourne shell, so make sure to pay attention to quoting.
1649.nf
1650	option: -skip .exe .com
1651.fi
1652will give an error (".com" by itself isn't a valid option), while
1653.nf
1654	option: -skip ".exe .com"
1655.fi
1656will properly include it as part of -skip's argument.
1657
1658.TP
1659MAGIC
1660Magic lines are used to determine if a file should be considered a binary
1661or not (the term "magic" refers to checking a file's magic number).  These
1662are described in more detail below.
1663.RE
1664
1665Blank lines and comments (lines beginning with '#') are allowed.
1666
1667If a line begins with  <...>, then it's a check to see if the
1668directive on the line should be done or not. The stuff inside the <...>
1669can contain perl's && (and), || (or), ! (not), and parens for grouping,
1670along with "flags" that might be indicated by the user with
1671.BI -x flag
1672options.
1673
1674For example, using "-xfoo" will cause "foo" to be true inside the <...>
1675blocks. Therefore, a line beginning with "<foo>" would be done only when
1676"-xfoo" had been specified, while a line beginning with "<!foo>" would be
1677done only when "-xfoo" is not specified (of course, a line without any <...>
1678is done in either case).
1679
1680A realistic example might be
1681.nf
1682	<!v> -vv
1683.fi
1684This will cause -vv messages to be the default, but allow "-xv" to override.
1685
1686There are a few flags that are set automatically:
1687.RS
1688.TP
1689.B TTY
1690true if the output is to the screen (as opposed to being redirected to a file).
1691You can force this (as with all the other automatic flags) with -xTTY.
1692.TP
1693.B -v
1694True if -v was specified. If -vv was specified, both
1695.B -v
1696and
1697.B -vv
1698flags are true (and so on).
1699.TP
1700.B -nice
1701True if -nice was specified. Same thing about -nnice as for -vv.
1702.PP
1703.TP
1704.B -list
1705true if -list (or -l) was given.
1706.TP
1707.B -dir
1708true if -dir was given.
1709.RE
1710
1711Using this info, you might change the last example to
1712.nf
1713
1714    <!v && !-v> option: -vv
1715
1716.fi
1717The added "&& !-v" means "and if the '-v' option not given".
1718This will allow you to use "-v" alone on the command line, and not
1719have this directive add the more verbose "-vv" automatically.
1720
1721.RS 0
1722Some other examples:
1723.TP
1724<!-dir && !here> option: -dir ~/
1725Effectively make the default directory your home directory (instead of the
1726current directory). Using -dir or -xhere will undo this.
1727.TP
1728<tex> option: -name .tex -dir ~/pub
1729Create '-xtex' to search only "*.tex" files in your ~/pub directory tree.
1730Actually, this could be made a bit better. If you combine '-xtex' and '-dir'
1731on the command line, this directive will add ~/pub to the list, when you
1732probably want to use the -dir directory only. You could do
1733.nf
1734
1735   <tex> option: -name .tex
1736   <tex && !-dir> option: -dir ~/pub
1737.fi
1738
1739to will allow '-xtex' to work as before, but allow a command-line "-dir"
1740to take precedence with respect to ~/pub.
1741.TP
1742<fluff> option: -nnice -sort -i -vvv
1743Combine a few user-friendly options into one '-xfluff' option.
1744.TP
1745<man> option: -ddir /usr/man -v -w
1746When the '-xman' option is given, search "/usr/man" for whole-words
1747(of whatever regex or regexes are given on the command line), with -v.
1748.RE
1749
1750The lines in the startup file are executed from top to bottom, so something
1751like
1752.nf
1753
1754   <both> option: -xflag1 -xflag2
1755   <flag1> option: ...whatever...
1756   <flag2> option: ...whatever...
1757
1758.fi
1759will allow '-xboth' to be the same as '-xflag1 -xflag2' (or '-xflag1,flag2'
1760for that matter). However, if you put the "<both>" line below the others,
1761they will not be true when encountered, so the result would be different
1762(and probably undesired).
1763
1764The "magic" directives are used to determine if a file looks to be binary
1765or not. The form of a magic line is
1766.nf
1767    magic: \fISIZE\fP : \fIPERLCODE\fP
1768.fi
1769where
1770.I SIZE
1771is the number of bytes of the file you need to check, and
1772.I PERLCODE
1773is the code to do the check. Within
1774.IR PERLCODE ,
1775the variable $H will hold at least the first
1776.I SIZE
1777bytes of the file (unless the file is shorter than that, of course).
1778It might hold more bytes. The perl should evaluate to true if the file
1779should be considered a binary.
1780
1781An example might be
1782.nf
1783    magic: 6 : substr($H, 0, 6) eq 'GIF87a'
1784.fi
1785to test for a GIF ("-iskip .gif" is better, but this might be useful
1786if you have images in files without the ".gif" extension).
1787
1788Since the startup file is checked from top to bottom, you can be a bit
1789efficient:
1790.nf
1791    magic: 6 : ($x6 = substr($H, 0, 6)) eq 'GIF87a'
1792    magic: 6 :  $x6                     eq 'GIF89a'
1793.fi
1794You could also write the same thing as
1795.nf
1796  magic: 6 : (($x6 = substr($H, 0, 6)) eq 'GIF87a') || ## an old gif, or.. \e
1797	       $x6                     eq 'GIF89a'     ## .. a new one.
1798.fi
1799since newlines may be escaped.
1800
1801The default internal startup file includes
1802.nf
1803   magic: 32 : $H =~ m/[\ex00-\ex06\ex10-\ex1a\ex1c-\ex1f\ex80\exff]{2}/
1804.fi
1805which checks for certain non-printable characters, and catches a large
1806number of binary files, including most system's executables, linkable
1807objects, compressed, tarred, and otherwise folded, spindled, and mutilated
1808files.
1809
1810Another example might be
1811.nf
1812    ## an archive library
1813    magic: 17 : substr($H, 0, 17) eq "!<arch>\en__.SYMDEF"
1814.fi
1815
1816.SH "RETURN VALUE"
1817.I Search
1818returns zero if lines (or files, if appropriate) were found,
1819or if no work was requested (such as with
1820.BR -help ).
1821Returns 1 if no lines (or files) were found.
1822Returns 2 on error.
1823
1824.SH TODO
1825Things I'd like to add some day:
1826.nf
1827  + show surrounding lines (context).
1828  + highlight matched portions of lines.
1829  + add '-and', which can go between regexes to override
1830    the default logical or of the regexes.
1831  + add something like
1832      -below GLOB
1833    which will examine a tree and only consider files that
1834    lie in a directory deeper than one named by the pattern.
1835  + add 'warning' and 'error' directives.
1836  + add 'help' directive.
1837.fi
1838.SH BUGS
1839If -xdev and multiple -dir arguments are given, any file in any of the
1840target filesystems are allowed. It would be better to allow each filesystem
1841for each separate tree.
1842
1843Multiple -dir args might also cause some confusing effects. Doing
1844.nf
1845   -dir some/dir -dir other
1846.fi
1847will search "some/dir" completely, then search "other" completely. This
1848is good. However, something like
1849.nf
1850   -dir some/dir -dir some/dir/more/specific
1851.fi
1852will search "some/dir" completely *except for* "some/dir/more/specific",
1853after which it will return and be searched. Not really a bug, but just sort
1854of odd.
1855
1856File times (for -newer, etc.) of symbolic links are for the file, not the
1857link. This could cause some misunderstandings.
1858
1859Probably more. Please let me know.
1860.SH AUTHOR
1861Jeffrey Friedl, Omron Corp (jfriedl@omron.co.jp)
1862.br
1863http://www.wg.omron.co.jp/cgi-bin/j-e/jfriedl.html
1864
1865.SH "LATEST SOURCE"
1866See http://www.wg.omron.co.jp/~jfriedl/perl/index.html
1867