fastfind.c revision 18829
1234287Sdim/* 2234287Sdim * Copyright (c) 1995 Wolfram Schneider <wosch@FreeBSD.org>. Berlin. 3234287Sdim * Copyright (c) 1989, 1993 4234287Sdim * The Regents of the University of California. All rights reserved. 5234287Sdim * 6234287Sdim * This code is derived from software contributed to Berkeley by 7234287Sdim * James A. Woods. 8234287Sdim * 9234287Sdim * Redistribution and use in source and binary forms, with or without 10234287Sdim * modification, are permitted provided that the following conditions 11234287Sdim * are met: 12234287Sdim * 1. Redistributions of source code must retain the above copyright 13234287Sdim * notice, this list of conditions and the following disclaimer. 14234287Sdim * 2. Redistributions in binary form must reproduce the above copyright 15249423Sdim * notice, this list of conditions and the following disclaimer in the 16234287Sdim * documentation and/or other materials provided with the distribution. 17234287Sdim * 3. All advertising materials mentioning features or use of this software 18249423Sdim * must display the following acknowledgement: 19234287Sdim * This product includes software developed by the University of 20234287Sdim * California, Berkeley and its contributors. 21234287Sdim * 4. Neither the name of the University nor the names of its contributors 22234287Sdim * may be used to endorse or promote products derived from this software 23234287Sdim * without specific prior written permission. 24234287Sdim * 25234287Sdim * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 26234287Sdim * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 27234287Sdim * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 28234287Sdim * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 29234287Sdim * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 30276479Sdim * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 31234287Sdim * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 32234287Sdim * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 33234287Sdim * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 34234287Sdim * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 35234287Sdim * SUCH DAMAGE. 36234287Sdim * 37234287Sdim * $Id: fastfind.c,v 1.1 1996/08/31 23:14:52 wosch Exp $ 38234287Sdim */ 39234287Sdim 40234287Sdim 41234287Sdim#ifndef _LOCATE_STATISTIC_ 42234287Sdim#define _LOCATE_STATISTIC_ 43234287Sdim 44234287Sdimvoid 45234287Sdimstatistic (fp, path_fcodes) 46234287Sdim FILE *fp; /* open database */ 47234287Sdim char *path_fcodes; /* for error message */ 48234287Sdim{ 49234287Sdim register int lines, chars, size, big; 50234287Sdim register u_char *p, *s; 51234287Sdim register int c; 52234287Sdim int count; 53234287Sdim u_char bigram1[NBG], bigram2[NBG], path[MAXPATHLEN]; 54234287Sdim 55234287Sdim for (c = 0, p = bigram1, s = bigram2; c < NBG; c++) { 56234287Sdim p[c] = check_bigram_char(getc(fp)); 57234287Sdim s[c] = check_bigram_char(getc(fp)); 58234287Sdim } 59234287Sdim 60234287Sdim lines = chars = big = 0; 61234287Sdim size = NBG + NBG; 62234287Sdim 63234287Sdim for (c = getc(fp), count = 0; c != EOF; size++) { 64234287Sdim if (c == SWITCH) { 65234287Sdim count += getwf(fp) - OFFSET; 66234287Sdim size += sizeof(int); 67234287Sdim } else 68234287Sdim count += c - OFFSET; 69234287Sdim 70234287Sdim for (p = path + count; (c = getc(fp)) > SWITCH; size++) 71234287Sdim if (c < PARITY) 72234287Sdim p++; 73234287Sdim else { 74234287Sdim big++; 75276479Sdim p += 2; 76276479Sdim } 77276479Sdim 78234287Sdim p++; 79234287Sdim lines++; 80234287Sdim chars += (p - path); 81234287Sdim } 82234287Sdim 83234287Sdim (void)printf("\nDatabase: %s\n", path_fcodes); 84234287Sdim (void)printf("Compression: Front: %2.2f%%, ", 85234287Sdim (float)(100 * (size + big)) / chars); 86234287Sdim (void)printf("Bigram: %2.2f%%, ", (float)(100 * (size - big)) / size); 87234287Sdim (void)printf("Total: %2.2f%%\n", (float)(100 * size) / chars); 88234287Sdim (void)printf("Filenames: %d, ", lines); 89234287Sdim (void)printf("Chars: %d\n", chars); 90234287Sdim (void)printf("Database size: %d, ", size); 91234287Sdim (void)printf("Bigram chars: %d\n", big); 92234287Sdim 93234287Sdim} 94234287Sdim#endif /* _LOCATE_STATISTIC_ */ 95234287Sdim 96234287Sdim 97234287Sdimvoid 98234287Sdim#ifdef FF_MMAP 99234287Sdim 100234287Sdim 101234287Sdim#ifdef FF_ICASE 102234287Sdimfastfind_mmap_icase 103276479Sdim#else 104243830Sdimfastfind_mmap 105234287Sdim#endif 106234287Sdim(pathpart, paddr, len, database) 107243830Sdim char *pathpart; /* search string */ 108243830Sdim caddr_t paddr; /* mmap pointer */ 109234287Sdim int len; /* length of database */ 110234287Sdim char *database; /* for error message */ 111234287Sdim 112234287Sdim 113243830Sdim#else /* MMAP */ 114243830Sdim 115243830Sdim 116234287Sdim#ifdef FF_ICASE 117234287Sdimfastfind_icase 118234287Sdim#else /* !FF_ICASE */ 119234287Sdimfastfind 120234287Sdim#endif /* FF_ICASE */ 121234287Sdim 122234287Sdim(fp, pathpart, database) 123234287Sdim FILE *fp; /* open database */ 124234287Sdim char *pathpart; /* search string */ 125234287Sdim char *database; /* for error message */ 126234287Sdim 127234287Sdim 128234287Sdim#endif /* MMAP */ 129243830Sdim 130234287Sdim{ 131234287Sdim register u_char *p, *s, *patend, *q, *foundchar; 132243830Sdim register int c, cc; 133234287Sdim int count, found, globflag; 134234287Sdim u_char *cutoff; 135234287Sdim u_char bigram1[NBG], bigram2[NBG], path[MAXPATHLEN]; 136234287Sdim 137234287Sdim#ifdef FF_ICASE 138243830Sdim /* use a lookup table for case insensitive search */ 139243830Sdim u_char table[UCHAR_MAX]; 140243830Sdim 141234287Sdim tolower_word(pathpart); 142234287Sdim#endif 143234287Sdim 144234287Sdim /* init bigram table */ 145234287Sdim#ifdef FF_MMAP 146276479Sdim if (len < (2*NBG)) { 147276479Sdim (void)fprintf(stderr, "database to small: %s\n", database); 148276479Sdim exit(1); 149234287Sdim } 150234287Sdim 151234287Sdim for (c = 0, p = bigram1, s = bigram2; c < NBG; c++, len-= 2) { 152234287Sdim p[c] = check_bigram_char(*paddr++); 153234287Sdim s[c] = check_bigram_char(*paddr++); 154234287Sdim } 155234287Sdim#else 156288943Sdim for (c = 0, p = bigram1, s = bigram2; c < NBG; c++) { 157288943Sdim p[c] = check_bigram_char(getc(fp)); 158288943Sdim s[c] = check_bigram_char(getc(fp)); 159234287Sdim } 160234287Sdim#endif 161234287Sdim 162234287Sdim /* find optimal (last) char for searching */ 163234287Sdim for (p = pathpart; *p != '\0'; p++) 164234287Sdim if (index(LOCATE_REG, *p) != NULL) 165234287Sdim break; 166234287Sdim 167276479Sdim if (*p == '\0') 168234287Sdim globflag = 0; 169234287Sdim else 170234287Sdim globflag = 1; 171234287Sdim 172234287Sdim p = pathpart; 173234287Sdim patend = patprep(p); 174234287Sdim cc = *patend; 175234287Sdim 176#ifdef FF_ICASE 177 /* set patend char to true */ 178 table[TOLOWER(*patend)] = 1; 179 table[toupper(*patend)] = 1; 180#endif 181 182 183 /* main loop */ 184 found = count = 0; 185 foundchar = 0; 186 187#ifdef FF_MMAP 188 for (c = (u_char)*paddr++; len-- > 0; ) { 189#else 190 for (c = getc(fp); c != EOF; ) { 191#endif 192 193 /* go forward or backward */ 194 if (c == SWITCH) { /* big step, an integer */ 195#ifdef FF_MMAP 196 count += getwm(paddr) - OFFSET; 197 len -= INTSIZE; paddr += INTSIZE; 198#else 199 count += getwf(fp) - OFFSET; 200#endif 201 } else { /* slow step, =< 14 chars */ 202 count += c - OFFSET; 203 } 204 205 /* overlay old path */ 206 p = path + count; 207 foundchar = p - 1; 208#ifdef FF_MMAP 209 for (; (c = (u_char)*paddr++) > SWITCH; len--) 210#else 211 for (; (c = getc(fp)) > SWITCH; ) 212#endif 213 214 if (c < PARITY) { 215#ifdef FF_ICASE 216 if (table[c]) 217#else 218 if (c == cc) 219#endif 220 foundchar = p; 221 *p++ = c; 222 } 223 else { 224 /* bigrams are parity-marked */ 225 TO7BIT(c); 226 227#ifndef FF_ICASE 228 if (bigram1[c] == cc || 229 bigram2[c] == cc) 230#else 231 232 if (table[bigram1[c]] || 233 table[bigram2[c]]) 234#endif 235 foundchar = p + 1; 236 237 *p++ = bigram1[c]; 238 *p++ = bigram2[c]; 239 } 240 241 242 if (found) { /* previous line matched */ 243 cutoff = path; 244 *p-- = '\0'; 245 foundchar = p; 246 } else if (foundchar >= path + count) { /* a char matched */ 247 *p-- = '\0'; 248 cutoff = path + count; 249 } else /* nothing to do */ 250 continue; 251 252 found = 0; 253 for (s = foundchar; s >= cutoff; s--) { 254 if (*s == cc 255#ifdef FF_ICASE 256 || TOLOWER(*s) == cc 257#endif 258 ) { /* fast first char check */ 259 for (p = patend - 1, q = s - 1; *p != '\0'; 260 p--, q--) 261 if (*q != *p 262#ifdef FF_ICASE 263 && TOLOWER(*q) != *p 264#endif 265 ) 266 break; 267 if (*p == '\0') { /* fast match success */ 268 found = 1; 269 if (!globflag || !fnmatch(pathpart, path, 0)) { 270 if (f_silent) 271 counter++; 272 else if (f_limit) { 273 counter++; 274 if (f_limit >= counter) 275 (void)puts(path); 276 else { 277 (void)fprintf(stderr, "[show only %d lines]\n", counter - 1); 278 exit(0); 279 } 280 } else 281 (void)puts(path); 282 } 283 break; 284 } 285 } 286 } 287 } 288} 289