fastfind.c revision 27574
1116742Ssam/* 2116904Ssam * Copyright (c) 1995 Wolfram Schneider <wosch@FreeBSD.org>. Berlin. 3186904Ssam * Copyright (c) 1989, 1993 4116742Ssam * The Regents of the University of California. All rights reserved. 5116742Ssam * 6116742Ssam * This code is derived from software contributed to Berkeley by 7116742Ssam * James A. Woods. 8116742Ssam * 9116742Ssam * Redistribution and use in source and binary forms, with or without 10116904Ssam * modification, are permitted provided that the following conditions 11116904Ssam * are met: 12116904Ssam * 1. Redistributions of source code must retain the above copyright 13116904Ssam * notice, this list of conditions and the following disclaimer. 14116742Ssam * 2. Redistributions in binary form must reproduce the above copyright 15116904Ssam * notice, this list of conditions and the following disclaimer in the 16116904Ssam * documentation and/or other materials provided with the distribution. 17116904Ssam * 3. All advertising materials mentioning features or use of this software 18116904Ssam * must display the following acknowledgement: 19116904Ssam * This product includes software developed by the University of 20116904Ssam * California, Berkeley and its contributors. 21116904Ssam * 4. Neither the name of the University nor the names of its contributors 22116904Ssam * may be used to endorse or promote products derived from this software 23116904Ssam * without specific prior written permission. 24116904Ssam * 25116742Ssam * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 26116742Ssam * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 27116742Ssam * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 28116742Ssam * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 29116742Ssam * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 30116742Ssam * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 31116742Ssam * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 32116742Ssam * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 33178354Ssam * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 34116742Ssam * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 35116742Ssam * SUCH DAMAGE. 36191746Sthompsa * 37116742Ssam * $Id: fastfind.c,v 1.9 1997/02/22 19:55:45 peter Exp $ 38295126Sglebius */ 39116742Ssam 40287197Sglebius 41116742Ssam#ifndef _LOCATE_STATISTIC_ 42283529Sglebius#define _LOCATE_STATISTIC_ 43283529Sglebius 44116742Ssamvoid 45257176Sglebiusstatistic (fp, path_fcodes) 46178354Ssam FILE *fp; /* open database */ 47116742Ssam char *path_fcodes; /* for error message */ 48178354Ssam{ 49116742Ssam register int lines, chars, size, big, zwerg; 50116742Ssam register u_char *p, *s; 51116742Ssam register int c; 52178354Ssam int count, umlaut; 53190391Ssam u_char bigram1[NBG], bigram2[NBG], path[MAXPATHLEN]; 54190391Ssam 55190391Ssam for (c = 0, p = bigram1, s = bigram2; c < NBG; c++) { 56206358Srpaulo p[c] = check_bigram_char(getc(fp)); 57116742Ssam s[c] = check_bigram_char(getc(fp)); 58116742Ssam } 59116742Ssam 60178955Ssam lines = chars = big = zwerg = umlaut = 0; 61178955Ssam size = NBG + NBG; 62178955Ssam 63178955Ssam for (c = getc(fp), count = 0; c != EOF; size++) { 64178955Ssam if (c == SWITCH) { 65178955Ssam count += getwf(fp) - OFFSET; 66178955Ssam size += sizeof(int); 67178955Ssam zwerg++; 68178955Ssam } else 69188782Ssam count += c - OFFSET; 70188782Ssam 71178955Ssam for (p = path + count; (c = getc(fp)) > SWITCH; size++) 72178955Ssam if (c < PARITY) { 73116742Ssam if (c == UMLAUT) { 74178957Ssam c = getc(fp); 75178957Ssam size++; 76178957Ssam umlaut++; 77178957Ssam } 78178957Ssam p++; 79178957Ssam } else { 80178957Ssam /* bigram char */ 81178957Ssam big++; 82195618Srpaulo p += 2; 83195618Srpaulo } 84195618Srpaulo 85178957Ssam p++; 86178957Ssam lines++; 87283566Sglebius chars += (p - path); 88178354Ssam } 89116742Ssam 90178354Ssam (void)printf("\nDatabase: %s\n", path_fcodes); 91193655Ssam (void)printf("Compression: Front: %2.2f%%, ", 92178354Ssam (float)(100 * (size + big - (2 * NBG))) / chars); 93178354Ssam (void)printf("Bigram: %2.2f%%, ", (float)(100 * (size - big)) / size); 94178354Ssam (void)printf("Total: %2.2f%%\n", 95178354Ssam (float)(100 * (size - (2 * NBG))) / chars); 96178354Ssam (void)printf("Filenames: %d, ", lines); 97178354Ssam (void)printf("Characters: %d, ", chars); 98283568Sglebius (void)printf("Database size: %d\n", size); 99178354Ssam (void)printf("Bigram characters: %d, ", big); 100178354Ssam (void)printf("Integers: %d, ", zwerg); 101178354Ssam (void)printf("8-Bit characters: %d\n", umlaut); 102164645Ssam 103164645Ssam} 104164645Ssam#endif /* _LOCATE_STATISTIC_ */ 105164645Ssam 106164645Ssam 107164645Ssamvoid 108165569Ssam#ifdef FF_MMAP 109165569Ssam 110165569Ssam 111165569Ssam#ifdef FF_ICASE 112164645Ssamfastfind_mmap_icase 113164645Ssam#else 114164645Ssamfastfind_mmap 115164645Ssam#endif /* FF_ICASE */ 116164645Ssam(pathpart, paddr, len, database) 117164645Ssam char *pathpart; /* search string */ 118164645Ssam caddr_t paddr; /* mmap pointer */ 119140915Ssam int len; /* length of database */ 120165569Ssam char *database; /* for error message */ 121165569Ssam 122165569Ssam 123165569Ssam#else /* MMAP */ 124287197Sglebius 125165569Ssam 126116742Ssam#ifdef FF_ICASE 127165569Ssamfastfind_icase 128188782Ssam#else 129165574Ssamfastfind 130165569Ssam#endif /* FF_ICASE */ 131116742Ssam 132116742Ssam(fp, pathpart, database) 133116742Ssam FILE *fp; /* open database */ 134186107Ssam char *pathpart; /* search string */ 135170530Ssam char *database; /* for error message */ 136116742Ssam 137178354Ssam 138167468Ssam#endif /* MMAP */ 139170530Ssam 140116742Ssam{ 141170530Ssam register u_char *p, *s, *patend, *q, *foundchar; 142187796Ssam register int c, cc; 143187796Ssam int count, found, globflag; 144187796Ssam u_char *cutoff; 145187796Ssam u_char bigram1[NBG], bigram2[NBG], path[MAXPATHLEN]; 146187796Ssam 147187796Ssam#ifdef FF_ICASE 148187796Ssam /* use a lookup table for case insensitive search */ 149187796Ssam u_char table[UCHAR_MAX + 1]; 150187796Ssam 151187796Ssam tolower_word(pathpart); 152187796Ssam#endif /* FF_ICASE*/ 153187796Ssam 154187796Ssam /* init bigram table */ 155187796Ssam#ifdef FF_MMAP 156187796Ssam if (len < (2*NBG)) 157170530Ssam errx(1, "database too small: %s", database); 158170530Ssam 159170530Ssam for (c = 0, p = bigram1, s = bigram2; c < NBG; c++, len-= 2) { 160170530Ssam p[c] = check_bigram_char(*paddr++); 161170530Ssam s[c] = check_bigram_char(*paddr++); 162170530Ssam } 163170530Ssam#else 164170530Ssam for (c = 0, p = bigram1, s = bigram2; c < NBG; c++) { 165170530Ssam p[c] = check_bigram_char(getc(fp)); 166170530Ssam s[c] = check_bigram_char(getc(fp)); 167170530Ssam } 168170530Ssam#endif /* FF_MMAP */ 169170530Ssam 170170530Ssam /* find optimal (last) char for searching */ 171170530Ssam for (p = pathpart; *p != '\0'; p++) 172170530Ssam if (index(LOCATE_REG, *p) != NULL) 173170530Ssam break; 174170530Ssam 175188782Ssam if (*p == '\0') 176188782Ssam globflag = 0; 177188782Ssam else 178188782Ssam globflag = 1; 179170530Ssam 180170530Ssam p = pathpart; 181170530Ssam patend = patprep(p); 182170530Ssam cc = *patend; 183116742Ssam 184170530Ssam#ifdef FF_ICASE 185170530Ssam /* set patend char to true */ 186170530Ssam for (c = 0; c < UCHAR_MAX + 1; c++) 187164645Ssam table[c] = 0; 188178354Ssam 189178354Ssam table[TOLOWER(*patend)] = 1; 190178354Ssam table[toupper(*patend)] = 1; 191178354Ssam#endif /* FF_ICASE */ 192170530Ssam 193172233Ssam 194178354Ssam /* main loop */ 195170530Ssam found = count = 0; 196170530Ssam foundchar = 0; 197190532Ssam 198170530Ssam#ifdef FF_MMAP 199164645Ssam c = (u_char)*paddr++; len--; 200165569Ssam for (; len > 0; ) { 201165569Ssam#else 202165569Ssam c = getc(fp); 203165569Ssam for (; c != EOF; ) { 204165569Ssam#endif /* FF_MMAP */ 205187897Ssam 206188782Ssam /* go forward or backward */ 207188782Ssam if (c == SWITCH) { /* big step, an integer */ 208188774Ssam#ifdef FF_MMAP 209188774Ssam count += getwm(paddr) - OFFSET; 210165569Ssam len -= INTSIZE; paddr += INTSIZE; 211165569Ssam#else 212219596Sbschmidt count += getwf(fp) - OFFSET; 213219596Sbschmidt#endif /* FF_MMAP */ 214219596Sbschmidt } else { /* slow step, =< 14 chars */ 215219596Sbschmidt count += c - OFFSET; 216219596Sbschmidt } 217219596Sbschmidt 218219596Sbschmidt /* overlay old path */ 219219596Sbschmidt p = path + count; 220219596Sbschmidt foundchar = p - 1; 221165569Ssam 222165569Ssam for (;;) { 223165569Ssam#ifdef FF_MMAP 224165569Ssam c = (u_char)*paddr++; 225165569Ssam len--; 226165569Ssam#else 227178354Ssam c = getc(fp); 228283540Sglebius#endif /* FF_MMAP */ 229178354Ssam /* 230283540Sglebius * == UMLAUT: 8 bit char followed 231283540Sglebius * <= SWITCH: offset 232178354Ssam * >= PARITY: bigram 233178354Ssam * rest: single ascii char 234178354Ssam * 235283540Sglebius * offset < SWITCH < UMLAUT < ascii < PARITY < bigram 236178354Ssam */ 237283540Sglebius if (c < PARITY) { 238283540Sglebius if (c <= UMLAUT) { 239178354Ssam if (c == UMLAUT) { 240178354Ssam#ifdef FF_MMAP 241178521Ssam c = (u_char)*paddr++; 242233452Sadrian len--; 243233452Sadrian#else 244233452Sadrian c = getc(fp); 245283529Sglebius#endif /* FF_MMAP */ 246233452Sadrian 247233452Sadrian } else 248283529Sglebius break; /* SWITCH */ 249283529Sglebius } 250283529Sglebius#ifdef FF_ICASE 251283529Sglebius if (table[c]) 252283529Sglebius#else 253283529Sglebius if (c == cc) 254283529Sglebius#endif /* FF_ICASE */ 255283529Sglebius foundchar = p; 256283529Sglebius *p++ = c; 257283529Sglebius } 258283529Sglebius else { 259283529Sglebius /* bigrams are parity-marked */ 260283529Sglebius TO7BIT(c); 261287197Sglebius 262287197Sglebius#ifndef FF_ICASE 263287197Sglebius if (bigram1[c] == cc || 264287197Sglebius bigram2[c] == cc) 265287197Sglebius#else 266287197Sglebius 267287197Sglebius if (table[bigram1[c]] || 268287197Sglebius table[bigram2[c]]) 269296114Savos#endif /* FF_ICASE */ 270287197Sglebius foundchar = p + 1; 271287197Sglebius 272287197Sglebius *p++ = bigram1[c]; 273296114Savos *p++ = bigram2[c]; 274296114Savos } 275296114Savos } 276296114Savos 277296114Savos if (found) { /* previous line matched */ 278287197Sglebius cutoff = path; 279287197Sglebius *p-- = '\0'; 280287197Sglebius foundchar = p; 281296114Savos } else if (foundchar >= path + count) { /* a char matched */ 282287197Sglebius *p-- = '\0'; 283287197Sglebius cutoff = path + count; 284287197Sglebius } else /* nothing to do */ 285296114Savos continue; 286296114Savos 287287197Sglebius found = 0; 288287197Sglebius for (s = foundchar; s >= cutoff; s--) { 289287197Sglebius if (*s == cc 290287197Sglebius#ifdef FF_ICASE 291287197Sglebius || TOLOWER(*s) == cc 292287197Sglebius#endif /* FF_ICASE */ 293287197Sglebius ) { /* fast first char check */ 294178354Ssam for (p = patend - 1, q = s - 1; *p != '\0'; 295178354Ssam p--, q--) 296178354Ssam if (*q != *p 297178354Ssam#ifdef FF_ICASE 298165569Ssam && TOLOWER(*q) != *p 299287197Sglebius#endif /* FF_ICASE */ 300165569Ssam ) 301165569Ssam break; 302283529Sglebius if (*p == '\0') { /* fast match success */ 303283529Sglebius found = 1; 304178354Ssam if (!globflag || 305191746Sthompsa#ifndef FF_ICASE 306191746Sthompsa !fnmatch(pathpart, path, 0)) 307191746Sthompsa#else 308191746Sthompsa !fnmatch(pathpart, path, 309230447Sadrian FNM_CASEFOLD)) 310283565Sglebius#endif /* !FF_ICASE */ 311283568Sglebius { 312283568Sglebius if (f_silent) 313165569Ssam counter++; 314165569Ssam else if (f_limit) { 315165569Ssam counter++; 316165569Ssam if (f_limit >= counter) 317165569Ssam (void)puts(path); 318287197Sglebius else 319170530Ssam errx(0, "[show only %d lines]", counter - 1); 320178354Ssam } else 321178354Ssam (void)puts(path); 322233452Sadrian } 323116742Ssam break; 324195379Ssam } 325155688Ssam } 326155688Ssam } 327138568Ssam } 328138568Ssam} 329170530Ssam