1/* 2 * Copyright (c) 1995 Wolfram Schneider <wosch@FreeBSD.org>. Berlin. 3 * Copyright (c) 1989, 1993 4 * The Regents of the University of California. All rights reserved. 5 * 6 * This code is derived from software contributed to Berkeley by 7 * James A. Woods. 8 * 9 * Redistribution and use in source and binary forms, with or without 10 * modification, are permitted provided that the following conditions 11 * are met: 12 * 1. Redistributions of source code must retain the above copyright 13 * notice, this list of conditions and the following disclaimer. 14 * 2. Redistributions in binary form must reproduce the above copyright 15 * notice, this list of conditions and the following disclaimer in the 16 * documentation and/or other materials provided with the distribution. 17 * 3. All advertising materials mentioning features or use of this software 18 * must display the following acknowledgement: 19 * This product includes software developed by the University of 20 * California, Berkeley and its contributors. 21 * 4. Neither the name of the University nor the names of its contributors 22 * may be used to endorse or promote products derived from this software 23 * without specific prior written permission. 24 * 25 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 26 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 27 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 28 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 29 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 30 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 31 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 32 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 33 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 34 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 35 * SUCH DAMAGE. 36 */ 37 38#ifndef lint 39static const char copyright[] = 40"@(#) Copyright (c) 1995-1996 Wolfram Schneider, Berlin.\n\ 41@(#) Copyright (c) 1989, 1993\n\ 42 The Regents of the University of California. All rights reserved.\n"; 43#endif /* not lint */ 44 45#ifndef lint 46#if 0 47static char sccsid[] = "@(#)locate.c 8.1 (Berkeley) 6/6/93"; 48#endif 49static const char rcsid[] = 50 "$FreeBSD$"; 51#endif /* not lint */ 52 53/* 54 * Ref: Usenix ;login:, Vol 8, No 1, February/March, 1983, p. 8. 55 * 56 * Locate scans a file list for the full pathname of a file given only part 57 * of the name. The list has been processed with with "front-compression" 58 * and bigram coding. Front compression reduces space by a factor of 4-5, 59 * bigram coding by a further 20-25%. 60 * 61 * The codes are: 62 * 63 * 0-28 likeliest differential counts + offset to make nonnegative 64 * 30 switch code for out-of-range count to follow in next word 65 * 31 an 8 bit char followed 66 * 128-255 bigram codes (128 most common, as determined by 'updatedb') 67 * 32-127 single character (printable) ascii residue (ie, literal) 68 * 69 * A novel two-tiered string search technique is employed: 70 * 71 * First, a metacharacter-free subpattern and partial pathname is matched 72 * BACKWARDS to avoid full expansion of the pathname list. The time savings 73 * is 40-50% over forward matching, which cannot efficiently handle 74 * overlapped search patterns and compressed path residue. 75 * 76 * Then, the actual shell glob-style regular expression (if in this form) is 77 * matched against the candidate pathnames using the slower routines provided 78 * in the standard 'find'. 79 */ 80 81#include <sys/param.h> 82#include <ctype.h> 83#include <err.h> 84#include <fnmatch.h> 85#include <locale.h> 86#include <stdio.h> 87#include <stdlib.h> 88#include <string.h> 89#include <unistd.h> 90 91#ifdef MMAP 92# include <sys/types.h> 93# include <sys/stat.h> 94# include <sys/mman.h> 95# include <fcntl.h> 96#endif 97 98 99#include "locate.h" 100#include "pathnames.h" 101 102#ifdef DEBUG 103# include <sys/time.h> 104# include <sys/types.h> 105# include <sys/resource.h> 106#endif 107 108int f_mmap; /* use mmap */ 109int f_icase; /* ignore case */ 110int f_stdin; /* read database from stdin */ 111int f_statistic; /* print statistic */ 112int f_silent; /* suppress output, show only count of matches */ 113int f_limit; /* limit number of output lines, 0 == infinite */ 114u_int counter; /* counter for matches [-c] */ 115char separator='\n'; /* line separator */ 116 117 118void usage(void); 119void statistic(FILE *, char *); 120void fastfind(FILE *, char *, char *); 121void fastfind_icase(FILE *, char *, char *); 122void fastfind_mmap(char *, caddr_t, int, char *); 123void fastfind_mmap_icase(char *, caddr_t, int, char *); 124void search_mmap(char *, char **); 125void search_fopen(char *, char **); 126unsigned long cputime(void); 127 128extern char **colon(char **, char*, char*); 129extern void print_matches(u_int); 130extern int getwm(caddr_t); 131extern int getwf(FILE *); 132extern u_char *tolower_word(u_char *); 133extern int check_bigram_char(int); 134extern char *patprep(char *); 135 136int 137main(argc, argv) 138 int argc; 139 char **argv; 140{ 141 register int ch; 142 char **dbv = NULL; 143 char *path_fcodes; /* locate database */ 144#ifdef MMAP 145 f_mmap = 1; /* mmap is default */ 146#endif 147 (void) setlocale(LC_ALL, ""); 148 149 while ((ch = getopt(argc, argv, "0Scd:il:ms")) != -1) 150 switch(ch) { 151 case '0': /* 'find -print0' style */ 152 separator = '\0'; 153 break; 154 case 'S': /* statistic lines */ 155 f_statistic = 1; 156 break; 157 case 'l': /* limit number of output lines, 0 == infinite */ 158 f_limit = atoi(optarg); 159 break; 160 case 'd': /* database */ 161 dbv = colon(dbv, optarg, _PATH_FCODES); 162 break; 163 case 'i': /* ignore case */ 164 f_icase = 1; 165 break; 166 case 'm': /* mmap */ 167#ifdef MMAP 168 f_mmap = 1; 169#else 170 warnx("mmap(2) not implemented"); 171#endif 172 break; 173 case 's': /* stdio lib */ 174 f_mmap = 0; 175 break; 176 case 'c': /* suppress output, show only count of matches */ 177 f_silent = 1; 178 break; 179 default: 180 usage(); 181 } 182 argv += optind; 183 argc -= optind; 184 185 /* to few arguments */ 186 if (argc < 1 && !(f_statistic)) 187 usage(); 188 189 /* no (valid) database as argument */ 190 if (dbv == NULL || *dbv == NULL) { 191 /* try to read database from environment */ 192 if ((path_fcodes = getenv("LOCATE_PATH")) == NULL || 193 *path_fcodes == '\0') 194 /* use default database */ 195 dbv = colon(dbv, _PATH_FCODES, _PATH_FCODES); 196 else /* $LOCATE_PATH */ 197 dbv = colon(dbv, path_fcodes, _PATH_FCODES); 198 } 199 200 if (f_icase && UCHAR_MAX < 4096) /* init tolower lookup table */ 201 for (ch = 0; ch < UCHAR_MAX + 1; ch++) 202 myctype[ch] = tolower(ch); 203 204 /* foreach database ... */ 205 while((path_fcodes = *dbv) != NULL) { 206 dbv++; 207 208 if (!strcmp(path_fcodes, "-")) 209 f_stdin = 1; 210 else 211 f_stdin = 0; 212 213#ifndef MMAP 214 f_mmap = 0; /* be paranoid */ 215#endif 216 if (!f_mmap || f_stdin || f_statistic) 217 search_fopen(path_fcodes, argv); 218 else 219 search_mmap(path_fcodes, argv); 220 } 221 222 if (f_silent) 223 print_matches(counter); 224 exit(0); 225} 226 227 228void 229search_fopen(db, s) 230 char *db; /* database */ 231 char **s; /* search strings */ 232{ 233 FILE *fp; 234#ifdef DEBUG 235 long t0; 236#endif 237 238 /* can only read stdin once */ 239 if (f_stdin) { 240 fp = stdin; 241 if (*(s+1) != NULL) { 242 warnx("read database from stdin, use only `%s' as pattern", *s); 243 *(s+1) = NULL; 244 } 245 } 246 else if ((fp = fopen(db, "r")) == NULL) 247 err(1, "`%s'", db); 248 249 /* count only chars or lines */ 250 if (f_statistic) { 251 statistic(fp, db); 252 (void)fclose(fp); 253 return; 254 } 255 256 /* foreach search string ... */ 257 while(*s != NULL) { 258#ifdef DEBUG 259 t0 = cputime(); 260#endif 261 if (!f_stdin && 262 fseek(fp, (long)0, SEEK_SET) == -1) 263 err(1, "fseek to begin of ``%s''\n", db); 264 265 if (f_icase) 266 fastfind_icase(fp, *s, db); 267 else 268 fastfind(fp, *s, db); 269#ifdef DEBUG 270 warnx("fastfind %ld ms", cputime () - t0); 271#endif 272 s++; 273 } 274 (void)fclose(fp); 275} 276 277#ifdef MMAP 278void 279search_mmap(db, s) 280 char *db; /* database */ 281 char **s; /* search strings */ 282{ 283 struct stat sb; 284 int fd; 285 caddr_t p; 286 off_t len; 287#ifdef DEBUG 288 long t0; 289#endif 290 if ((fd = open(db, O_RDONLY)) == -1 || 291 fstat(fd, &sb) == -1) 292 err(1, "`%s'", db); 293 len = sb.st_size; 294 if (len < (2*NBG)) 295 errx(1, 296 "database too small: %s\nRun /usr/libexec/locate.updatedb", 297 db); 298 299 if ((p = mmap((caddr_t)0, (size_t)len, 300 PROT_READ, MAP_SHARED, 301 fd, (off_t)0)) == MAP_FAILED) 302 err(1, "mmap ``%s''", db); 303 304 /* foreach search string ... */ 305 while (*s != NULL) { 306#ifdef DEBUG 307 t0 = cputime(); 308#endif 309 if (f_icase) 310 fastfind_mmap_icase(*s, p, (int)len, db); 311 else 312 fastfind_mmap(*s, p, (int)len, db); 313#ifdef DEBUG 314 warnx("fastfind %ld ms", cputime () - t0); 315#endif 316 s++; 317 } 318 319 if (munmap(p, (size_t)len) == -1) 320 warn("munmap %s\n", db); 321 322 (void)close(fd); 323} 324#endif /* MMAP */ 325 326#ifdef DEBUG 327unsigned long 328cputime () 329{ 330 struct rusage rus; 331 332 getrusage(RUSAGE_SELF, &rus); 333 return(rus.ru_utime.tv_sec * 1000 + rus.ru_utime.tv_usec / 1000); 334} 335#endif /* DEBUG */ 336 337void 338usage () 339{ 340 (void)fprintf(stderr, 341 "usage: locate [-0Scims] [-l limit] [-d database] pattern ...\n\n"); 342 (void)fprintf(stderr, 343 "default database: `%s' or $LOCATE_PATH\n", _PATH_FCODES); 344 exit(1); 345} 346 347 348/* load fastfind functions */ 349 350/* statistic */ 351/* fastfind_mmap, fastfind_mmap_icase */ 352#ifdef MMAP 353#undef FF_MMAP 354#undef FF_ICASE 355 356#define FF_MMAP 357#include "fastfind.c" 358#define FF_ICASE 359#include "fastfind.c" 360#endif /* MMAP */ 361 362/* fopen */ 363/* fastfind, fastfind_icase */ 364#undef FF_MMAP 365#undef FF_ICASE 366#include "fastfind.c" 367#define FF_ICASE 368#include "fastfind.c" 369