1/*	$OpenBSD: fastfind.c,v 1.17 2023/04/28 20:22:35 tb Exp $	*/
2
3/*
4 * Copyright (c) 1995 Wolfram Schneider <wosch@FreeBSD.org>. Berlin.
5 * Copyright (c) 1989, 1993
6 *	The Regents of the University of California.  All rights reserved.
7 *
8 * This code is derived from software contributed to Berkeley by
9 * James A. Woods.
10 *
11 * Redistribution and use in source and binary forms, with or without
12 * modification, are permitted provided that the following conditions
13 * are met:
14 * 1. Redistributions of source code must retain the above copyright
15 *    notice, this list of conditions and the following disclaimer.
16 * 2. Redistributions in binary form must reproduce the above copyright
17 *    notice, this list of conditions and the following disclaimer in the
18 *    documentation and/or other materials provided with the distribution.
19 * 3. Neither the name of the University nor the names of its contributors
20 *    may be used to endorse or promote products derived from this software
21 *    without specific prior written permission.
22 *
23 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
24 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
25 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
26 * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
27 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
28 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
29 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
30 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
31 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
32 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
33 * SUCH DAMAGE.
34 */
35
36#ifndef _LOCATE_STATISTIC_
37#define _LOCATE_STATISTIC_
38
39void
40statistic (FILE *fp, char *path_fcodes)
41{
42	int lines, chars, size, big, zwerg;
43	u_char *p, *s;
44	int c;
45	int count, umlaut;
46	u_char bigram1[NBG], bigram2[NBG], path[PATH_MAX];
47
48	for (c = 0, p = bigram1, s = bigram2; c < NBG; c++) {
49		p[c] = check_bigram_char(getc(fp));
50		s[c] = check_bigram_char(getc(fp));
51	}
52
53	lines = chars = big = zwerg = umlaut = 0;
54	size = NBG + NBG;
55
56	for (c = getc(fp), count = 0; c != EOF; size++) {
57		if (c == SWITCH) {
58			count += getwf(fp) - OFFSET;
59			size += sizeof(int);
60			zwerg++;
61		} else
62			count += c - OFFSET;
63
64		sane_count(count);
65		for (p = path + count; (c = getc(fp)) > SWITCH; size++)
66			if (c < PARITY) {
67				if (c == UMLAUT) {
68					c = getc(fp);
69					size++;
70					umlaut++;
71				}
72				p++;
73			} else {
74				/* bigram char */
75				big++;
76				p += 2;
77			}
78
79		p++;
80		lines++;
81		chars += (p - path);
82	}
83
84	(void)printf("\nDatabase: %s\n", path_fcodes);
85	(void)printf("Compression: Front: %2.2f%%, ",
86	    (float)(100 * (size + big - (2 * NBG))) / chars);
87	(void)printf("Bigram: %2.2f%%, ", (float)(100 * (size - big)) / size);
88	(void)printf("Total: %2.2f%%\n",
89	    (float)(100 * (size - (2 * NBG))) / chars);
90	(void)printf("Filenames: %d, ", lines);
91	(void)printf("Characters: %d, ", chars);
92	(void)printf("Database size: %d\n", size);
93	(void)printf("Bigram characters: %d, ", big);
94	(void)printf("Integers: %d, ", zwerg);
95	(void)printf("8-Bit characters: %d\n", umlaut);
96
97}
98#endif /* _LOCATE_STATISTIC_ */
99
100
101void
102
103
104#ifdef FF_ICASE
105fastfind_mmap_icase
106#else
107fastfind_mmap
108#endif /* FF_ICASE */
109(char *pathpart, caddr_t paddr, int len, char *database)
110{
111	u_char *p, *s, *patend, *q, *foundchar;
112	int c, cc;
113	int count, found, globflag;
114	u_char *cutoff;
115	u_char bigram1[NBG], bigram2[NBG], path[PATH_MAX];
116
117#ifdef FF_ICASE
118	for (p = pathpart; *p != '\0'; p++)
119		*p = tolower(*p);
120#endif /* FF_ICASE*/
121
122	/* init bigram table */
123	if (len < (2*NBG)) {
124		(void)fprintf(stderr, "database too small: %s\n", database);
125		exit(1);
126	}
127
128	for (c = 0, p = bigram1, s = bigram2; c < NBG; c++, len-= 2) {
129		p[c] = check_bigram_char(*paddr++);
130		s[c] = check_bigram_char(*paddr++);
131	}
132
133	/* find optimal (last) char for searching */
134	for (p = pathpart; *p != '\0'; p++)
135		if (strchr(LOCATE_REG, *p) != NULL)
136			break;
137
138	if (*p == '\0')
139		globflag = 0;
140	else
141		globflag = 1;
142
143	p = pathpart;
144	patend = patprep(p);
145	cc = *patend;
146#ifdef FF_ICASE
147	cc = tolower(cc);
148#endif /* FF_ICASE */
149
150
151	/* main loop */
152	found = count = 0;
153	foundchar = 0;
154
155	c = (u_char)*paddr++; len--;
156	for (; len > 0; ) {
157
158		/* go forward or backward */
159		if (c == SWITCH) { /* big step, an integer */
160			if (len < sizeof(int))
161				break;
162			count += getwm(paddr) - OFFSET;
163			len -= sizeof(int);
164			paddr += sizeof(int);
165		} else {	   /* slow step, =< 14 chars */
166			count += c - OFFSET;
167		}
168
169		sane_count(count);
170		/* overlay old path */
171		p = path + count;
172		foundchar = p - 1;
173
174		for (; len > 0; ) {
175			c = (u_char)*paddr++;
176			len--;
177			/*
178			 * == UMLAUT: 8 bit char followed
179			 * <= SWITCH: offset
180			 * >= PARITY: bigram
181			 * rest:      single ascii char
182			 *
183			 * offset < SWITCH < UMLAUT < ascii < PARITY < bigram
184			 */
185			if (c < PARITY) {
186				if (c <= UMLAUT) {
187					if (c == UMLAUT && len > 0) {
188						c = (u_char)*paddr++;
189						len--;
190
191					} else
192						break; /* SWITCH */
193				}
194#ifdef FF_ICASE
195				if (tolower(c) == cc)
196#else
197				if (c == cc)
198#endif /* FF_ICASE */
199					foundchar = p;
200				*p++ = c;
201			} else {
202				/* bigrams are parity-marked */
203				c &= ASCII_MAX;
204#ifdef FF_ICASE
205				if (tolower(bigram1[c]) == cc ||
206				    tolower(bigram2[c]) == cc)
207#else
208				if (bigram1[c] == cc ||
209				    bigram2[c] == cc)
210#endif /* FF_ICASE */
211					foundchar = p + 1;
212
213				*p++ = bigram1[c];
214				*p++ = bigram2[c];
215			}
216		}
217
218		if (found) {			/* previous line matched */
219			cutoff = path;
220			*p-- = '\0';
221			foundchar = p;
222		} else if (foundchar >= path + count) { /* a char matched */
223			*p-- = '\0';
224			cutoff = path + count;
225		} else				/* nothing to do */
226			continue;
227
228		found = 0;
229		for (s = foundchar; s >= cutoff; s--) {
230			if (*s == cc
231#ifdef FF_ICASE
232			    || tolower(*s) == cc
233#endif /* FF_ICASE */
234			    ) {	/* fast first char check */
235				for (p = patend - 1, q = s - 1; *p != '\0';
236				    p--, q--)
237					if (*q != *p
238#ifdef FF_ICASE
239					    && tolower(*q) != *p
240#endif /* FF_ICASE */
241					    )
242						break;
243				if (*p == '\0') {   /* fast match success */
244					char	*shortpath;
245
246					found = 1;
247					shortpath = path;
248					if (f_basename)
249						shortpath = basename(path);
250
251					if ((!f_basename && (!globflag ||
252#ifdef FF_ICASE
253					    !fnmatch(pathpart, shortpath,
254						FNM_CASEFOLD)))
255#else
256					    !fnmatch(pathpart, shortpath, 0)))
257#endif /* FF_ICASE */
258					    || (strstr(shortpath, pathpart) !=
259					    NULL)) {
260						if (f_silent)
261							counter++;
262						else if (f_limit) {
263							counter++;
264							if (f_limit >= counter)
265								(void)puts(path);
266							else  {
267								fprintf(stderr, "[show only %u lines]\n", counter - 1);
268								exit(0);
269							}
270						} else
271							(void)puts(path);
272					}
273					break;
274				}
275			}
276		}
277	}
278}
279