1/* $OpenBSD: fastfind.c,v 1.17 2023/04/28 20:22:35 tb Exp $ */ 2 3/* 4 * Copyright (c) 1995 Wolfram Schneider <wosch@FreeBSD.org>. Berlin. 5 * Copyright (c) 1989, 1993 6 * The Regents of the University of California. All rights reserved. 7 * 8 * This code is derived from software contributed to Berkeley by 9 * James A. Woods. 10 * 11 * Redistribution and use in source and binary forms, with or without 12 * modification, are permitted provided that the following conditions 13 * are met: 14 * 1. Redistributions of source code must retain the above copyright 15 * notice, this list of conditions and the following disclaimer. 16 * 2. Redistributions in binary form must reproduce the above copyright 17 * notice, this list of conditions and the following disclaimer in the 18 * documentation and/or other materials provided with the distribution. 19 * 3. Neither the name of the University nor the names of its contributors 20 * may be used to endorse or promote products derived from this software 21 * without specific prior written permission. 22 * 23 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 24 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 25 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 26 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 27 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 28 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 29 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 30 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 31 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 32 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 33 * SUCH DAMAGE. 34 */ 35 36#ifndef _LOCATE_STATISTIC_ 37#define _LOCATE_STATISTIC_ 38 39void 40statistic (FILE *fp, char *path_fcodes) 41{ 42 int lines, chars, size, big, zwerg; 43 u_char *p, *s; 44 int c; 45 int count, umlaut; 46 u_char bigram1[NBG], bigram2[NBG], path[PATH_MAX]; 47 48 for (c = 0, p = bigram1, s = bigram2; c < NBG; c++) { 49 p[c] = check_bigram_char(getc(fp)); 50 s[c] = check_bigram_char(getc(fp)); 51 } 52 53 lines = chars = big = zwerg = umlaut = 0; 54 size = NBG + NBG; 55 56 for (c = getc(fp), count = 0; c != EOF; size++) { 57 if (c == SWITCH) { 58 count += getwf(fp) - OFFSET; 59 size += sizeof(int); 60 zwerg++; 61 } else 62 count += c - OFFSET; 63 64 sane_count(count); 65 for (p = path + count; (c = getc(fp)) > SWITCH; size++) 66 if (c < PARITY) { 67 if (c == UMLAUT) { 68 c = getc(fp); 69 size++; 70 umlaut++; 71 } 72 p++; 73 } else { 74 /* bigram char */ 75 big++; 76 p += 2; 77 } 78 79 p++; 80 lines++; 81 chars += (p - path); 82 } 83 84 (void)printf("\nDatabase: %s\n", path_fcodes); 85 (void)printf("Compression: Front: %2.2f%%, ", 86 (float)(100 * (size + big - (2 * NBG))) / chars); 87 (void)printf("Bigram: %2.2f%%, ", (float)(100 * (size - big)) / size); 88 (void)printf("Total: %2.2f%%\n", 89 (float)(100 * (size - (2 * NBG))) / chars); 90 (void)printf("Filenames: %d, ", lines); 91 (void)printf("Characters: %d, ", chars); 92 (void)printf("Database size: %d\n", size); 93 (void)printf("Bigram characters: %d, ", big); 94 (void)printf("Integers: %d, ", zwerg); 95 (void)printf("8-Bit characters: %d\n", umlaut); 96 97} 98#endif /* _LOCATE_STATISTIC_ */ 99 100 101void 102 103 104#ifdef FF_ICASE 105fastfind_mmap_icase 106#else 107fastfind_mmap 108#endif /* FF_ICASE */ 109(char *pathpart, caddr_t paddr, int len, char *database) 110{ 111 u_char *p, *s, *patend, *q, *foundchar; 112 int c, cc; 113 int count, found, globflag; 114 u_char *cutoff; 115 u_char bigram1[NBG], bigram2[NBG], path[PATH_MAX]; 116 117#ifdef FF_ICASE 118 for (p = pathpart; *p != '\0'; p++) 119 *p = tolower(*p); 120#endif /* FF_ICASE*/ 121 122 /* init bigram table */ 123 if (len < (2*NBG)) { 124 (void)fprintf(stderr, "database too small: %s\n", database); 125 exit(1); 126 } 127 128 for (c = 0, p = bigram1, s = bigram2; c < NBG; c++, len-= 2) { 129 p[c] = check_bigram_char(*paddr++); 130 s[c] = check_bigram_char(*paddr++); 131 } 132 133 /* find optimal (last) char for searching */ 134 for (p = pathpart; *p != '\0'; p++) 135 if (strchr(LOCATE_REG, *p) != NULL) 136 break; 137 138 if (*p == '\0') 139 globflag = 0; 140 else 141 globflag = 1; 142 143 p = pathpart; 144 patend = patprep(p); 145 cc = *patend; 146#ifdef FF_ICASE 147 cc = tolower(cc); 148#endif /* FF_ICASE */ 149 150 151 /* main loop */ 152 found = count = 0; 153 foundchar = 0; 154 155 c = (u_char)*paddr++; len--; 156 for (; len > 0; ) { 157 158 /* go forward or backward */ 159 if (c == SWITCH) { /* big step, an integer */ 160 if (len < sizeof(int)) 161 break; 162 count += getwm(paddr) - OFFSET; 163 len -= sizeof(int); 164 paddr += sizeof(int); 165 } else { /* slow step, =< 14 chars */ 166 count += c - OFFSET; 167 } 168 169 sane_count(count); 170 /* overlay old path */ 171 p = path + count; 172 foundchar = p - 1; 173 174 for (; len > 0; ) { 175 c = (u_char)*paddr++; 176 len--; 177 /* 178 * == UMLAUT: 8 bit char followed 179 * <= SWITCH: offset 180 * >= PARITY: bigram 181 * rest: single ascii char 182 * 183 * offset < SWITCH < UMLAUT < ascii < PARITY < bigram 184 */ 185 if (c < PARITY) { 186 if (c <= UMLAUT) { 187 if (c == UMLAUT && len > 0) { 188 c = (u_char)*paddr++; 189 len--; 190 191 } else 192 break; /* SWITCH */ 193 } 194#ifdef FF_ICASE 195 if (tolower(c) == cc) 196#else 197 if (c == cc) 198#endif /* FF_ICASE */ 199 foundchar = p; 200 *p++ = c; 201 } else { 202 /* bigrams are parity-marked */ 203 c &= ASCII_MAX; 204#ifdef FF_ICASE 205 if (tolower(bigram1[c]) == cc || 206 tolower(bigram2[c]) == cc) 207#else 208 if (bigram1[c] == cc || 209 bigram2[c] == cc) 210#endif /* FF_ICASE */ 211 foundchar = p + 1; 212 213 *p++ = bigram1[c]; 214 *p++ = bigram2[c]; 215 } 216 } 217 218 if (found) { /* previous line matched */ 219 cutoff = path; 220 *p-- = '\0'; 221 foundchar = p; 222 } else if (foundchar >= path + count) { /* a char matched */ 223 *p-- = '\0'; 224 cutoff = path + count; 225 } else /* nothing to do */ 226 continue; 227 228 found = 0; 229 for (s = foundchar; s >= cutoff; s--) { 230 if (*s == cc 231#ifdef FF_ICASE 232 || tolower(*s) == cc 233#endif /* FF_ICASE */ 234 ) { /* fast first char check */ 235 for (p = patend - 1, q = s - 1; *p != '\0'; 236 p--, q--) 237 if (*q != *p 238#ifdef FF_ICASE 239 && tolower(*q) != *p 240#endif /* FF_ICASE */ 241 ) 242 break; 243 if (*p == '\0') { /* fast match success */ 244 char *shortpath; 245 246 found = 1; 247 shortpath = path; 248 if (f_basename) 249 shortpath = basename(path); 250 251 if ((!f_basename && (!globflag || 252#ifdef FF_ICASE 253 !fnmatch(pathpart, shortpath, 254 FNM_CASEFOLD))) 255#else 256 !fnmatch(pathpart, shortpath, 0))) 257#endif /* FF_ICASE */ 258 || (strstr(shortpath, pathpart) != 259 NULL)) { 260 if (f_silent) 261 counter++; 262 else if (f_limit) { 263 counter++; 264 if (f_limit >= counter) 265 (void)puts(path); 266 else { 267 fprintf(stderr, "[show only %u lines]\n", counter - 1); 268 exit(0); 269 } 270 } else 271 (void)puts(path); 272 } 273 break; 274 } 275 } 276 } 277 } 278} 279